C语言处理CSV数据

时间:2022-09-09 19:48:20

以下代码为博客

《Python的并行求和例子》:

http://www.cnblogs.com/instant7/p/4312786.html

中并行python代码的C语言重写版。

用C来跑一遍单线程也只需要50秒,比python 开4进程的实现要快6倍多,CPU占用率也只用python的1/4。

看来计算密集型应用还是需要用这些不顺手的老古董来弄的:)

#include <stdio.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <time.h>

using namespace std;

char *trim(char *str)
{
    char *p = str; 
    while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
        p ++;
    str = p; 
    p = str + strlen(str) - 1; 
    while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
        -- p;
    *(p + 1) = '\0'; 
    return str; 
}

int main(){
    cout<<"start time: ";
    system("Echo %Date% %Time%");
    FILE *fexp, *fpred;
    fexp = fopen("D:\\kaggle\\rain\\train_exp.csv", "r");
    fpred = fopen("D:\\kaggle\\rain\\trainChangeTimePeriod.csv","r");

    char expLine[10240];
    char predLine[10240];

    fgets(expLine, sizeof(expLine), fexp);
    fgets(predLine, sizeof(predLine), fpred);


    double squareErrorSum = 0;
    int rowCnt = 0;
    while(fgets(expLine, sizeof(expLine), fexp)) {
        //printf("%s", expLine);
        fgets(predLine, sizeof(predLine), fpred);
        //printf("%s", predLine);

        char *save_ptr;
        char *expId = trim(strtok_s(expLine, ",", &save_ptr));
        if (expId == NULL) {
            return -1;
        } 
        char *exp = trim(strtok_s(NULL, ",", &save_ptr));
        double expVal = atof(exp);
        //printf("%s\t%s\n", expId, exp);

        char *predId = trim(strtok_s(predLine, ",", &save_ptr));
        //printf("%s\n", predId);
        double prob[100];
        rowCnt += 1;
        if (rowCnt % 20000 == 0)
            cout << rowCnt << " finished"<<endl;
        for (int i = 0; i < 70; i++){
            char *temp = trim(strtok_s(NULL, ",", &save_ptr));
            prob[i] = atof(temp);
            squareErrorSum += pow((prob[i]-(i>=expVal)),2);
            //cout<<squareErrorSum<<endl;
        }
    }
    double score = squareErrorSum / (70 * rowCnt);
    cout<<"Score: "<<score<<endl;
    cout<<"end time: ";
    system("Echo %Date% %Time%");
    system("pause");
    return 0;
}

linux的话分割字符串函数要换成:

strtok_r

C/C++对CSV的处理参考自:

《c语言读取csv文件和c++读取csv文件示例分享》

http://www.jb51.net/article/47962.htm