稀疏矩阵 part 5

时间:2023-03-09 07:37:57
稀疏矩阵 part 5

▶ 目前为止能跑的所有代码及其结果(2019年2月24日),之后添加:DIA 乘法 GPU 版;其他维度的乘法(矩阵乘矩阵);其他稀疏矩阵格式之间的相互转化

 #include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <malloc.h>
#include <math.h>
#include <time.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h" #define ROW (8192)
#define COL (8192)
#define RATIO 0.1 // 系数矩阵非零元素占比
#define EPSILON 0.0001 // 浮点数比较
#define THREAD_SIZE 256
#define SEED 1 // (unsigned int)time(NULL)
#define MAX(x,y) (((x)>(y))?(x):(y))
#define CEIL(x,y) (int)(( x - 1 ) / y + 1)
#define INT // 数字格式用 int 还是 float #ifdef INT
typedef int format;
#else
typedef float format;
#endif // 矩阵存储格式
typedef struct // 顺序格式
{
int row; // 行数
int col; // 列数
int count; // 非零元个数(用于转换,不用于计算)
format *data; // 元素的值
}
MAT; typedef struct // Compressed Sparse Row 格式
{
int row; // 行数
int col; // 列数
format *data; // 元素的值
int *index; // 元素的列号
int *ptr; // 每行首元在 index 中的下标,最后一个元素的值等于矩阵非零元个数
}
CSR; typedef struct // Compressed Sparse Row 格式
{
int row; // 行数
int col; // 列数
format *data; // 元素的值
int *index; // 元素的行号
int *ptr; // 每列首元在 index 中的下标,最后一个元素的值等于矩阵非零元个数
}
CSC; typedef struct // ELLPACK 格式
{
int row; // 行数
int col; // 列数,相当于MAT格式的行数
int colOrigin; // 原列数,相当于MAT格式的列数
format *data; // 元素的值
int *index; // 元素在MAT格式中的列号
}
ELL; typedef struct // Coordinate 格式
{
int row; // 行数
int col; // 列数
int count; // 非零元个数
int *rowIndex; // 行向量
int *colIndex; // 列向量
format *data; // 元素的值
}
COO; typedef struct // Diagonal 格式
{
int row; // 行数
int col; // 列数
int colOrigin; // 原列数
format *data; // 元素的值
int *index; // 原矩阵各对角线是否非零
}
DIA; // 全局指针
__managed__ MAT *aMAT, *xMAT, *yMATRef, *yMATCal;
__managed__ CSR *aCSR;
__managed__ ELL *aELL;
__managed__ COO *aCOO; // 通用函数
__host__ __device__ inline void checkNULL(const void *input) // 有点问题,设备函数无法使用 exit(1) 来推出
{
if (input == NULL)
printf("\n\tfind a NULL!");
return;
} __host__ inline void checkCudaError(cudaError input)
{
if (input != cudaSuccess)
{
printf("\n\tfind a cudaError!");
exit();
}
return;
} int checkEqual(const format * in1, const format * in2, const int length)// 注意两向量相等时返 0,否则返回“值不等的元素下标加一”
{
int i;
for (i = ; i < length; i++)
{
#ifdef INT
if (in1[i] != in2[i])
#else
if (fabs(in1[i] - in2[i]) > EPSILON)
#endif
{
printf("\n\tError at i = %d\n\tin1[i] = %10f, in2[i] = %10f\n", i, (float)in1[i], (float)in2[i]);
return i + ;
}
}
printf("\n\tCheck output, passed.\n");
return ;
} // 打印矩阵
void print(const char* info, const MAT *in)// 打印MAT格式的矩阵
{
printf("%s\n\tMAT,\n\trow = %d, col = %d, count = %d", info, in->row, in->col, in->count);
printf("\n\tdata =\n\t");
for (int i = ; i < in->row * in->col; i++)
{
#ifdef INT
printf("%d ", in->data[i]);
#else
printf("%.2f ", in->data[i]);
#endif
if ((i + ) % in->col == )
printf("\n\t");
}
printf("\n");
return;
} void print(const char* info, const CSR *in)// 打印CSR格式的矩阵
{
printf("%s\n\tCSR,\n\trow = %d, col = %d", info, in->row, in->col);
printf("\n\tdata =\n\t");
for (int i = ; i < in->ptr[in->row]; i++)
#ifdef INT
printf("%d ", in->data[i]);
#else
printf("%.2f ", in->data[i]);
#endif
printf("\n\tindex =\n\t");
for (int i = ; i < in->ptr[in->row]; i++)
printf("%d ", in->index[i]);
printf("\n\tptr =\n\t");
for (int i = ; i < in->row + ; i++)
printf("%d ", in->ptr[i]);
printf("\n\n");
return;
} void print(const char* info, const ELL *in)// 打印ELL格式的矩阵
{
int i;
printf("%s\n\tELL,\n\trow = %d, col = %d, colOrigin = %d", info, in->row, in->col, in->colOrigin);
printf("\n\tdata =\n\t");
for (i = ; i < in->row * in->col; i++)
{
#ifdef INT
printf("%d ", in->data[i]);
#else
printf("%.2f ", in->data[i]);
#endif
if ((i + ) % in->col == )
printf("\n\t");
}
printf("index =\n\t");
for (i = ; i < in->row * in->col; i++)
{
printf("%d ", in->index[i]);
if ((i + ) % in->col == )
printf("\n\t");
}
printf("\n\n");
return;
} void print(const char* info, const COO *in)// 打印COO格式的矩阵
{
int i;
printf("%s\n\tCOO,\n\trow = %d, col = %d, count = %d", info, in->row, in->col, in->count);
printf("\n\t(rowIndex, colIndex, data) =\n\t");
for (i = ; i < in->count; i++)
{
#ifdef INT
printf("(%d,%d,%d)\n\t", in->rowIndex[i], in->colIndex[i], in->data[i]);
#else
printf("(%d,%d,%.2f)\n\t", in->rowIndex[i], in->colIndex[i], in->data[i]);
#endif
}
printf("\n\n");
return;
} void print(const char* info, const DIA *in)// 打印DIA格式的矩阵
{
int i;
printf("%s\n\tDIA,\n\trow = %d, col = %d, colOrigin = %d", info, in->row, in->col, in->colOrigin,in->colOrigin);
printf("\n\tdata =\n\t");
int * inverseIndex = (int *)malloc(sizeof(int) * in->col);
for (int i = , j = ; i < in->row + in->col - ; i++)
{
if (in->index[i] == )
{
inverseIndex[j] = i;
j++;
}
}
for (i = ; i < in->row * in->col; i++)
{
if (i / in->col < in->row - - inverseIndex[i % in->col] || i / in->col > inverseIndex[in->col - ] - inverseIndex[i % in->col])
printf("* ");
else
#ifdef INT
printf("%d ", in->data[i]);
#else
printf("%.2f ", in->data[i]);
#endif
if ((i + ) % in->col == )
printf("\n\t");
}
printf("index =\n\t");
for (i = ; i < in->row + in->col - ; i++)
printf("%d ", in->index[i]);
printf("\n\n");
free(inverseIndex);
return;
} // 矩阵初始化与清理
MAT *initializeMAT(const int row, const int col, const int count = )// 初始化MAT,注意非零元默认为 0
{
MAT *temp;
checkCudaError(cudaMallocManaged((void **)&temp, sizeof(MAT)));
temp->row = row;
temp->col = col;
temp->count = count;
checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
return temp;
} // 统计MAT形式的矩阵中的非零元素个数
#define COUNT_MAT(in) \
{ \
checkNULL(in); \
int i, zero; \
for (i = zero = ; i < in->row * in->col; i++) \
zero += !in->data[i]; \
in->count = in->row * in->col - zero; \
} int freeMatrix(MAT *in)// 释放MAT
{
checkNULL(in);
cudaFree(in->data);
cudaFree(in);
return ;
} CSR * initializeCSR(const int row, const int col, const int count)// 初始化CSR,要求给出 count
{
CSR *temp;
checkCudaError(cudaMallocManaged((void **)&temp, sizeof(CSR)));
temp->row = row;
temp->col = col;
checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * count));
checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(int) * count));
checkCudaError(cudaMallocManaged((void **)&temp->ptr, sizeof(int) * (row + )));
return temp;
} int freeMatrix(CSR *in)// 释放CSR
{
checkNULL(in);
cudaFree(in->data);
cudaFree(in->index);
cudaFree(in->ptr);
cudaFree(in);
return ;
} ELL * initializeELL(const int row, const int col, const int colOrigin)// 初始化ELL,要求给出原列数
{
ELL *temp;
checkCudaError(cudaMallocManaged((void **)&temp, sizeof(ELL)));
temp->row = row;
temp->col = col;
temp->colOrigin = colOrigin;
checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(int) * row * col));
return temp;
} int freeMatrix(ELL *in)// 释放ELL
{
cudaFree(in->data);
cudaFree(in->index);
cudaFree(in);
return ;
} COO * initializeCOO(const int row, const int col, const int count)// 初始化COO,要求给出 count
{
COO * temp;
checkCudaError(cudaMallocManaged((void **)&temp, sizeof(COO)));
temp->row = row;
temp->col = col;
temp->count = count;
checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * count));
checkCudaError(cudaMallocManaged((void **)&temp->rowIndex, sizeof(int) * count));
checkCudaError(cudaMallocManaged((void **)&temp->colIndex, sizeof(int) * count));
return temp;
} int freeMatrix(COO *in)// 释放COO
{
checkNULL(in);
cudaFree(in->data);
cudaFree(in->rowIndex);
cudaFree(in->colIndex);
cudaFree(in);
return ;
} DIA * initializeDIA(const int row, const int col, const int colOrigin)// 初始化DIA,要求给出原行数、原列数
{
DIA * temp;
checkCudaError(cudaMallocManaged((void **)&temp, sizeof(DIA)));
temp->row = row;
temp->col = col;
temp->colOrigin = colOrigin;
checkCudaError(cudaMallocManaged((void **)&temp->data, sizeof(format) * row * col));
checkCudaError(cudaMallocManaged((void **)&temp->index, sizeof(format) * (row + col - )));
return temp;
} int freeMatrix(DIA *in)// 释放DIA
{
checkNULL(in);
cudaFree(in->data);
cudaFree(in->index);
cudaFree(in);
return ;
} // 矩阵格式的相互转化
CSR * MATToCSR(const MAT *in) // MAT 转 CSR
{
checkNULL(in);
CSR * out = initializeCSR(in->row, in->col, in->count);
checkNULL(out); out->ptr[] = ;
for (int i = , j = , k = ; i < in->row * in->col; i++) // i 遍历 in->data
{
if (in->data[i] != ) // 找到非零元
{
if (j == in->count) // 在 out->data 已经填满了的基础上又发现了非零元,错误
return NULL;
out->data[j] = in->data[i]; // 填充非零元素
out->index[j] = i % in->col; // 填充列号
j++;
}
if ((i + ) % in->col == ) // 到了最后一列,写入行指针号
out->ptr[k++] = j;
}
return out;
} MAT * CSRToMAT(const CSR *in) // CSR转MAT
{
checkNULL(in);
MAT *out = initializeMAT(in->row, in->col, in->ptr[in->row]);
checkNULL(out); memset(out->data, , sizeof(format) * in->row * in->col);
for (int i = ; i < in->row; i++) // i 遍历行
{
for (int j = in->ptr[i]; j < in->ptr[i + ]; j++) // j 遍历列
out->data[i * in->col + in->index[j]] = in->data[j];
}
return out;
} ELL * MATToELL(const MAT *in)// MAT转ELL
{
checkNULL(in); int i, j, maxElement;
for (i = j = maxElement = ; i < in->row * in->col; i++) // i 遍历 in->data,j 记录该行非零元素数,maxElement 记录一行非零元素最大值
{
if (in->data[i] != ) // 找到非零元
j++;
if ((i + ) % in->col == ) // 行末,更新 maxElement
{
maxElement = MAX(j, maxElement);
j = ; // 开始下一行之前清空 j
}
}
format* temp_data=(format *)malloc(sizeof(format) * in->row * maxElement); // 临时数组,将列数压缩到 maxElement
checkNULL(temp_data);
int* temp_index = (int *)malloc(sizeof(int) * in->row * maxElement);
checkNULL(temp_index);
memset(temp_data, , sizeof(format) * in->row * maxElement);
memset(temp_index, , sizeof(int) * in->row * maxElement);
for (i = j = ; i < in->row * in->col; i++) // i 遍历 in->data,j 记录该行非零元素数,把 in 中每行的元素往左边推
{
if (in->data[i] != ) // 找到非零元
{
temp_data[i / in->col * maxElement + j] = in->data[i]; // 存放元素
temp_index[i / in->col * maxElement + j] = i % in->col; // 记录所在的列号
j++;
}
if ((i + ) % in->col == ) // 行末,将剩余位置的下标记作 -1,即无效元素
{
for (j += i / in->col * in->col; j < maxElement * (i / in->col + ); j++) // 使得 j 指向本行最后一个非零元素之后的元素,再开始填充
temp_index[j] = -;
j = ; // 开始下一行之前清空 j
}
}
ELL *out = initializeELL(maxElement, in->row, in->col); // 最终输出,如果不转置的话不要这部分
checkNULL(out);
for (i = ; i < out->row * out->col; i++) // 将 temp_data 和 temp_index 转置以提高缓存利用
{
out->data[i] = temp_data[i % out->col * out->row + i / out->col];
out->index[i] = temp_index[i % out->col * out->row + i / out->col];
}
free(temp_data);
free(temp_index);
return out;
} MAT * ELLToMAT(const ELL *in) // ELL转MAT
{
checkNULL(in);
MAT *out = initializeMAT(in->col, in->colOrigin);
checkNULL(out); for (int i = ; i < in->row * in->col; i++) // i 遍历 out->data
{
if (in->index[i] < ) // 注意跳过无效元素
continue;
out->data[i % in->col * in->colOrigin + in->index[i]] = in->data[i];
}
COUNT_MAT(out);
return out;
} COO * MATToCOO(const MAT *in) // MAT转COO
{
checkNULL(in);
COO *out = initializeCOO(in->row, in->col, in->count); for (int i=, j = ; i < in->row * in->col; i++)
{
#ifdef INT
if (in->data[i] != )
#else
if (fabs(in->data[i]) > EPSILON)
#endif
{
out->data[j] = in->data[i];
out->rowIndex[j] = i / in->col;
out->colIndex[j] = i % in->col;
j++;
}
}
return out;
} MAT * COOToMAT(const COO *in) // COO转MAT
{
checkNULL(in);
MAT *out = initializeMAT(in->row, in->col, in->count);
checkNULL(out); for (int i = ; i < in->row * in->col; i++)
out->data[i] = ;
for (int i = ; i < in->count; i++)
out->data[in->rowIndex[i] * in->col + in->colIndex[i]] = in->data[i];
return out;
} DIA * MATToDIA(const MAT *in) // MAT转DIA
{
checkNULL(in); int *index = (int *)malloc(sizeof(int)*(in->row + in->col - ));
for (int diff = in->row - ; diff > ; diff--) // 左侧零对角线情况
{
int flagNonZero = ;
for (int i = ; i < in->col && i + diff < in->row; i++) // i 沿着对角线方向遍历 in->data,flagNonZero 记录该对角线是否全部为零元
{
#ifdef INT
if (in->data[(i + diff) * in->col + i] != )
#else
if (fabs(in->data[(i + diff) * in->col + i]) > EPSILON)
#endif
flagNonZero = ;
}
index[in->row - - diff] = flagNonZero; // 标记该对角线上有非零元
}
for (int diff = in->col - ; diff >= ; diff--) // 右侧零对角线情况
{
int flagNonZero = ;
for (int j = ; j < in->row && j + diff < in->col; j++)
{
#ifdef INT
if (in->data[j * in->col + j + diff] != )
#else
if (fabs(in->data[j * in->col + j + diff]) > EPSILON)
#endif
flagNonZero = ;
}
index[in->row - + diff] = flagNonZero; // 标记该对角线上有非零元
}
int *prefixSumIndex = (int *)malloc(sizeof(int)*(in->row + in->col - ));
prefixSumIndex[] = index[];
for (int i = ; i < in->row + in->col - ; i++) // 闭前缀和,prefixSumIndex[i] 表示原矩阵第 0 ~ i 条对角线*有多少条非零对角线(含)
prefixSumIndex[i] = prefixSumIndex[i-] + index[i]; // index[in->row + in->col -2] 表示原矩阵非零对角线条数,等于 DIA 矩阵列数
DIA *out = initializeDIA(in->row, prefixSumIndex[in->row + in->col - ], in->col);
checkNULL(out); memset(out->data, , sizeof(int)*out->row * out->col);
for (int i = ; i < in->row + in->col - ; i++)
out->index[i] = index[i]; // index 搬进 out
for (int i = ; i < in->row; i++) // i,j 遍历原矩阵,将元素搬进 out
{
for (int j = ; j < in->col; j++)
{
int temp = j - i + in->row - ;
if (index[temp] == )
continue;
out->data[i * out->col + (temp > ? prefixSumIndex[temp - ] : )] = in->data[i * in->col + j]; // 第 row - 1 行第 0 列元素 temp == 0,单独处理
}
}
free(index);
free(prefixSumIndex);
return out;
} MAT * DIAToMAT(const DIA *in) // DIA转MAT
{
checkNULL(in);
MAT *out = initializeMAT(in->row, in->colOrigin);
checkNULL(out); int * inverseIndex = (int *)malloc(sizeof(int) * in->col);
for (int i = , j = ; i < in->row + in->col - ; i++) // 求一个 index 的逆,即 DIA 中第 i 列对应原矩阵第 inverseIndex[i] 对角线
{ // 原矩阵对角线编号 (row-1, 0) 为第 0 条,(0, 0) 为第 row - 1 条,(col-1, 0) 为第 row + col - 2 条
if (in->index[i] == )
{
inverseIndex[j] = i;
j++;
}
}
for (int i = ; i < in->row; i++) // i 遍历 in->data 行,j 遍历 in->data 列
{
for (int j = ; j < in->col; j++)
{
if (i < in->row - - inverseIndex[j] || i > inverseIndex[in->col - ] - inverseIndex[j]) // 跳过两边呈三角形的无效元素
continue;
out->data[i * in->col + inverseIndex[j] - in->row + ] = in->data[i * in->col + j]; // 利用 inverseIndex 来找钙元素在原距震中的位置
}
}
free(inverseIndex);
return out;
} // 各种形式的矩阵和一个向量的乘法
int dotCPU(const MAT *a, const MAT *x, MAT *y) // CPU MAT 乘法
{
checkNULL(a); checkNULL(x); checkNULL(y);
if (a->col != x->row)
{
printf("dotMATCPU dimension mismatch!\n");
return ;
} y->row = a->row;
y->col = x->col;
for (int i = ; i < a->row; i++)
{
format sum = ;
for (int j = ; j < a->col; j++)
sum += a->data[i * a->col + j] * x->data[j];
y->data[i] = sum;
}
COUNT_MAT(y);
return ;
} int dotCPU(const CSR *a, const MAT *x, MAT *y) // CPU CSR 乘法
{
checkNULL(a); checkNULL(x); checkNULL(y);
if (a->col != x->row)
{
printf("dotCSRCPU dimension mismatch!\n");
return ;
} y->row = a->row;
y->col = x->col;
for (int i = ; i < a->row; i++) // i 遍历 ptr,j 遍历行内数据,A 中为 0 的元素不参加乘法
{
format sum = ;
for (int j = a->ptr[i]; j < a->ptr[i + ]; j++)
sum += a->data[j] * x->data[a->index[j]];
y->data[i] = sum;
}
COUNT_MAT(y);
return ;
} int dotCPU(const ELL *a, const MAT *x, MAT *y) // CPU ELL乘法
{
checkNULL(a); checkNULL(x); checkNULL(y);
if (a->colOrigin != x->row)
{
printf("dotELLCPU dimension mismatch!\n");
return ;
} y->row = a->col;
y->col = x->col;
for (int i = ; i<a->col; i++)
{
format sum = ;
for (int j = ; j < a->row; j++)
{
int temp = a->index[j * a->col + i];
if (temp < ) // 跳过无效元素
continue;
sum += a->data[j * a->col + i] * x->data[temp];
}
y->data[i] = sum;
}
COUNT_MAT(y);
return ;
} int dotCPU(const COO *a, const MAT *x, MAT *y)// CPU COO乘法
{
checkNULL(a); checkNULL(x); checkNULL(y);
if (a->col != x->row)
{
printf("dotCOOCPU null!\n");
return ;
} y->row = a->row;
y->col = x->col;
for (int i = ; i<a->count; i++)
y->data[a->rowIndex[i]] += a->data[i] * x->data[a->colIndex[i]];
COUNT_MAT(y);
return ;
} int dotCPU(const DIA *a, const MAT *x, MAT *y)// CPU DIA乘法
{
checkNULL(a); checkNULL(x); checkNULL(y);
if (a->colOrigin != x->row)
{
printf("dotDIACPU null!\n");
return ;
}
y->row = a->row;
y->col = x->col;
int * inverseIndex = (int *)malloc(sizeof(int) * a->col);
for (int i = , j = ; i < a->row + a->col - ; i++)
{
if (a->index[i] == )
{
inverseIndex[j] = i;
j++;
}
}
for (int i = ; i < a->row; i++)
{
format sum = ;
for (int j = ; j < a->col; j++)
{
if (i < a->row - - inverseIndex[j] || i > inverseIndex[a->col - ] - inverseIndex[j])
continue;
sum += a->data[i * a->col + j] * x->data[i + inverseIndex[j] - a->row + ];
}
y->data[i] = sum;
}
COUNT_MAT(y);
free(inverseIndex);
return ;
} __global__ void dotGPU(const MAT *a, const MAT *x, MAT *y)// GPU MAT乘法
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
if (id < a->row)
{
format sum = ;
for (int i = ; i < a->col; i++)
sum += a->data[id * a->col + i] * x->data[i];
y->data[id] = sum;
}
if (id == )
{
y->row = a->row;
y->col = x->col;
COUNT_MAT(y);
}
return;
} __global__ void dotGPU(const CSR *a, const MAT *x, MAT *y)// GPU CSR乘法
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
if (id < a->row)
{
format sum = ;
for (int j = a->ptr[id]; j < a->ptr[id + ]; j++)
sum += a->data[j] * x->data[a->index[j]];
y->data[id] = sum;
}
if (id == )
{
y->row = a->row;
y->col = x->col;
COUNT_MAT(y);
}
return;
} __global__ void dotGPU(const ELL *a, const MAT *x, MAT *y)// GPU ELL乘法
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
if (id < a->col)
{
format sum = ;
for (int j = ; j < a->row; j++)
sum += a->data[id + j * a->col] * (a->index[id + j * a->col] < ? : x->data[a->index[id + j * a->col]]);
y->data[id] = sum;
}
if (id == )
{
y->row = a->col;
y->col = x->col;
COUNT_MAT(y);
}
return;
} __global__ void dotGPU(const COO *a, const MAT *x, MAT *y)// GPU COO乘法
{
int id = blockIdx.x * blockDim.x + threadIdx.x;
if (id < a->count)
atomicAdd(&(y->data[a->rowIndex[id]]), a->data[id] * x->data[a->colIndex[id]]);
if (id == )
{
y->row = a->row;
y->col = x->col;
COUNT_MAT(y);
}
return;
} int test()// 测试矩阵打印和CPU计算的函数
{
const int row = , col = ; MAT* a = initializeMAT(row, col);
a->data[] = , a->data[] = , a->data[] = , a->data[] = ;
a->data[] = , a->data[] = , a->data[] = , a->data[] = ;
COUNT_MAT(a); MAT* x = initializeMAT(col, );
for (int i = ; i < x->row; i++)
x->data[i] = i + ;
COUNT_MAT(x); MAT* y = initializeMAT(row, );
COUNT_MAT(y); print("MAT a =", a);
print("MAT x =", x);
dotCPU(a, x, y);
print("MAT y = a * x = ", y); CSR* b = MATToCSR(a);
print("CSR b = a = ", b);
memset(y->data, , sizeof(format) * y->row * y->col);
dotCPU(b, x, y);
print("MAT y = b * x = ", y);
MAT* c = CSRToMAT(b);
print("MAT c = b =", c);
freeMatrix(b); ELL* d = MATToELL(a);
print("ELL d = a =", d);
memset(y->data, , sizeof(format) * y->row * y->col);
dotCPU(d, x, y);
print("MAT y = d * x =", y);
c = ELLToMAT(d);
print("MAT c = d =", c);
freeMatrix(d); COO* e = MATToCOO(a);
print("ELL e = a = ", e);
memset(y->data, , sizeof(format) * y->row * y->col);
dotCPU(e, x, y);
print("MAT y = e * x = ", y);
c = COOToMAT(e);
print("MAT c = e =", c);
freeMatrix(e); DIA* f = MATToDIA(a);
print("DIA f = a = ", f);
memset(y->data, , sizeof(format) * y->row * y->col);
dotCPU(f, x, y);
print("MAT y = f * x = ", y);
c = DIAToMAT(f);
print("MAT c = f =", c);
freeMatrix(f); freeMatrix(a);
freeMatrix(x);
freeMatrix(y);
freeMatrix(c);
return ;
} int main()
{
test(); clock_t timeCPU;
cudaEvent_t startGPU, stopGPU;
float timeGPU;
cudaEventCreate(&startGPU);
cudaEventCreate(&stopGPU); printf("\n\tStart. Matrix dimension:\t%d × %d", ROW, COL); // 初始化
timeCPU = clock();
aMAT = initializeMAT(ROW, COL);
xMAT = initializeMAT(COL, );
yMATRef = initializeMAT(ROW, );
yMATCal = initializeMAT(ROW, ); srand(SEED);
#ifdef INT
for (int i = ; i < COL * ROW; i++)
aMAT->data[i] = ((float)rand() < RAND_MAX * RATIO) ? (rand() - RAND_MAX / ) % : ;
COUNT_MAT(aMAT);
int count = aMAT->count;
for (int i = ; i < COL; i++)
xMAT->data[i] = i % ;
COUNT_MAT(xMAT);
#else
for (int i = ; i < COL * ROW; i++)
aMAT->data[i] = ((float)rand() < RAND_MAX * RATIO) ? ((float)rand() / RAND_MAX) : ;
aMAT->count = countMAT(aMAT);
for (int i = ; i < COL; i++)
xMAT->data[i] = ((float)rand() / RAND_MAX);
xMAT->count = countMAT(xMAT);
#endif
printf("\n\tInitialized. Time:\t\t%d ms\n", clock() - timeCPU); //CPU计算部分
//MAT 格式
timeCPU = clock();
dotCPU(aMAT, xMAT, yMATRef);
timeCPU = clock() - timeCPU;
printf("\n\tdotMATCPU time:\t%8.3f ms\n", (float)timeCPU); // CSR格式
aCSR = MATToCSR(aMAT);
timeCPU = clock();
dotCPU(aCSR, xMAT, yMATCal);
timeCPU = clock() - timeCPU;
printf("\n\tdotCSRCPU time:\t%8.3f ms\n", (float)timeCPU);
checkEqual(yMATRef->data, yMATCal->data, ROW); // ELL格式
aELL = MATToELL(aMAT);
timeCPU = clock();
dotCPU(aELL, xMAT, yMATCal);
timeCPU = clock() - timeCPU;
printf("\n\tdotELLCPU time:\t%8.3f ms\n", (float)timeCPU);
checkEqual(yMATRef->data, yMATCal->data, ROW); // COO格式
aCOO = MATToCOO(aMAT);
timeCPU = clock();
dotCPU(aCOO, xMAT, yMATCal);
timeCPU = clock() - timeCPU;
printf("\n\tdotCOOCPU time:\t%8.3f ms\n", (float)timeCPU);
checkEqual(yMATRef->data, yMATCal->data, ROW); // GPU计算部分
// MAT格式
cudaMemset(yMATCal->data, , sizeof(format) * yMATCal->row * yMATCal->col);
yMATCal->count = ;
cudaEventRecord(startGPU, );
dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aMAT, xMAT, yMATCal);
cudaDeviceSynchronize();
cudaEventRecord(stopGPU, );
cudaEventSynchronize(stopGPU);
cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
printf("\n\tdotMATGPU time:\t%8.3f ms\n", timeGPU);
checkEqual(yMATRef->data, yMATCal->data, ROW);
freeMatrix(aMAT); // CSR格式
cudaMemset(yMATCal->data, , sizeof(format) * yMATCal->row * yMATCal->col);
yMATCal->count = ;
cudaEventRecord(startGPU, );
dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aCSR, xMAT, yMATCal);
cudaDeviceSynchronize();
cudaEventRecord(stopGPU, );
cudaEventSynchronize(stopGPU);
cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
printf("\n\tdotCSRGPU time:\t%8.3f ms\n", timeGPU);
checkEqual(yMATRef->data, yMATCal->data, ROW);
freeMatrix(aCSR); // ELL格式
cudaMemset(yMATCal->data, , sizeof(format) * yMATCal->row * yMATCal->col);
yMATCal->count = ;
cudaEventRecord(startGPU, );
dotGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (aELL, xMAT, yMATCal);
cudaDeviceSynchronize();
cudaEventRecord(stopGPU, );
cudaEventSynchronize(stopGPU);
cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
printf("\n\tdotELLGPU time:\t%8.3f ms\n", timeGPU);
checkEqual(yMATRef->data, yMATCal->data, ROW);
freeMatrix(aELL); // COO格式
cudaMemset(yMATCal->data, , sizeof(format) * yMATCal->row * yMATCal->col);
yMATCal->count = ;
cudaEventRecord(startGPU, );
dotGPU << <CEIL(count, THREAD_SIZE), THREAD_SIZE >> > (aCOO, xMAT, yMATCal);
cudaDeviceSynchronize();
cudaEventRecord(stopGPU, );
cudaEventSynchronize(stopGPU);
cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
printf("\n\tdotCOOGPU time:\t%8.3f ms\n", timeGPU);
checkEqual(yMATRef->data, yMATCal->data, ROW);
freeMatrix(aCOO); // 清理内存
freeMatrix(xMAT);
freeMatrix(yMATRef);
freeMatrix(yMATCal); getchar();
return ;
}

● 输出结果

MAT a =
MAT,
row = , col = , count = MAT x =
MAT,
row = , col = , count = MAT y = a * x =
MAT,
row = , col = , count = CSR b = a =
CSR,
row = , col = MAT y = b * x =
MAT,
row = , col = , count = MAT c = b =
MAT,
row = , col = , count = ELL d = a =
ELL,
row = , col = , colOrigin = - MAT y = d * x =
MAT,
row = , col = , count = MAT c = d =
MAT,
row = , col = , count = ELL e = a =
COO,
row = , col = , count =
(,,)
(,,)
(,,)
(,,)
(,,)
(,,)
(,,)
(,,) MAT y = e * x
MAT,
row = , col = , count = MAT c = e =
MAT,
row = , col = , count = Start. Matrix dimension: ×
Initialized. Time: 0.000 ms dotMATCPU time: 304.000 ms dotCSRCPU time: 3.000 ms Check output, passed. dotCELLPU time: 103.000 ms Check output, passed. dotCOOCPU time: 11.000 ms Check output, passed. dotMATGPU time: 5.133 ms Check output, passed. dotCSRGPU time: 2.263 ms Check output, passed. dotELLGPU time: 1.253 ms Check output, passed. dotCOOGPU time: 4.754 ms Check output, passed.