參考文章:http://www.cnblogs.com/yicoder/
因為最近有幾個實驗需要處理大型數據,因為需要讀取的是一千萬個double型的數據,雖然不要求快速讀取文件數據,但是實在是無法忍受那幾十秒鐘的停頓。所以上網搜了下關於大數據的處理。
雖然可以利用scanf()提高讀取的速度,但還是有幾十秒鐘的停頓。所以在這裡選擇使用fread()讀取出所有的字符。
為了方便實驗,先寫了個生成 0< n <10,的double型數據n的文件數據。這個沒做優化,時間較久。
源碼:
void product_data ()
{
uniform_int_distribution<unsigned> u(0, 9999);
default_random_engine e (time(0));
freopen ("data.txt", "w", stdout);
for (int i = 0; i < 10000000; i++){
cout << (double)u(e)/1000 << endl;
}
fclose(stdout);
}
各種方法的結果對比
可見使用fread讀取整個文件要比其他兩種方法更快。
cin 比 scand 慢的原因很清楚,流數據處理比標准化處理要慢。而用fread更快的原因是吧所有數據當作一個字符串來讀取,一次讀入整個文件,這種方法的主要時間開銷是把字符轉換成要求的數值。
雖然用fread讀取要快得多,但是這種方法卻有很大的缺陷,buffer數組的容量要比文件所包含的所有字符都要大,要不然不能讀取出所有的數據,或許可以利用分塊處理解決這個問題。如果各位大蝦有更好的方法請指點一二。
源碼:
#include <iostream> #include <fstream> #include <ctime> #include <random> #include <cstdio> #define N 10000000 #define M 70000000 using namespace std; void cin_read() { time_t start_time = clock (); double *nums = new double [N]; freopen ("data.txt", "r", stdin); for (int i = 0; i < N; i++){ cin >> nums[i]; } time_t end_time = clock (); fclose (stdin); cout << "cin_read time: " << end_time - start_time << "ms" << endl; } void scanf_read() { time_t start_time = clock (); double *nums = new double [N]; freopen ("data.txt", "r", stdin); for (int i = 0; i < N; i++){ scanf ("%lf", &nums[N]); } time_t end_time = clock (); fclose (stdin); cout << "scanf_read time: " << end_time - start_time << "ms" << endl; } double* transform_num(char* buffer, int lenght) { bool is_dec = false; double *nums = new double [M]; int k = 0; int nt = 10; for (int i = 0; i < lenght; i++){ if (buffer [i] == '\n'){ k++; nt = 10; continue; } else if (buffer [i] == '.')is_dec = true; if (!is_dec)nums[k] = buffer[i] - '0'; else { nums[k] += (double)(buffer[i] - '0')/nt; nt = nt*nt; } } } void fread_read() { time_t start_time = clock (); freopen ("data.txt", "r", stdin); char *buffer = new char [M]; //把所有字符一次全部讀取到buf裡面,包括'\n' int lenght = fread (buffer, 1, M, stdin); double *nums = transform_num (buffer, lenght); time_t end_time = clock (); fclose (stdin); cout << "fread_read time: " << end_time - start_time << "ms" << endl; } void product_data () { uniform_int_distribution<unsigned> u(0, 9999); default_random_engine e (time(0)); freopen ("data.txt", "w", stdout); for (int i = 0; i < 10000000; i++){ cout << (double)u(e)/1000 << endl; } fclose(stdout); } int main() { cout << "*******************************" << endl; cin_read(); cout << "*******************************" << endl; scanf_read(); cout << "*******************************" << endl; fread_read(); cout << "*******************************" << endl; return 0; }
完。