【 聲明:版權所有,歡迎轉載,請勿用於商業用途。 聯系信箱:feixiaoxing @163.com】
假設我們有一個1億個數據,其中數據的范圍是0~1億,也就是100M的數據。但是這個數組中丟了一些數據,比如說少了5啊,少了10啊,那麼有什麼辦法可以把這些丟失的數據找回來呢?這個題目不難,但是它可以幫助我們拓展思路,不斷提高算法的運行效率。
對於這個問題,我們一個最簡單的思路就是對各個數據進行flag判斷,然後依次輸出數據。
copy to clipboardprint?void get_lost_number(int data[], int length)
{
int index;
assert(NULL != data && 0 != length);
unsigned char* pFlag = (unsigned char*)malloc(length * sizeof(unsigned char));
memset(pFlag, 0, length * sizeof(unsigned char));
for(index = 0; index < length; index ++){
if(0 == pFlag[data[index]])
pFlag[data[index]] = 1;
}
for(index = 0; index < length; index++){
if(0 == pFlag[index])
printf("%d\n", index);
}
free(pFlag);
return;
}
void get_lost_number(int data[], int length)
{
int index;
assert(NULL != data && 0 != length);
unsigned char* pFlag = (unsigned char*)malloc(length * sizeof(unsigned char));
memset(pFlag, 0, length * sizeof(unsigned char));
for(index = 0; index < length; index ++){
if(0 == pFlag[data[index]])
pFlag[data[index]] = 1;
}
for(index = 0; index < length; index++){
if(0 == pFlag[index])
printf("%d\n", index);
}
free(pFlag);
return;
} 可能朋友也看到了,上面的代碼需要分配和原來數據一樣length的空間。其實我們可以用bit進行訪問標志的設定,所以我們申請的空間還可以減少。
copy to clipboardprint?void get_lost_number(int data[], int length)
{
int index;
assert(NULL != data && 0 != length);
unsigned char* pFlag = (unsigned char*)malloc((length + 7) >> 3);
memset(pFlag, 0, length * sizeof(unsigned char));
for(index = 0; index < length; index ++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
pFlag[data[index] >> 3] |= 1 << (data[index] % 8);
}
for(index = 0; index < length; index++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
printf("%d\n", index);
}
free(pFlag);
return;
}
void get_lost_number(int data[], int length)
{
int index;
assert(NULL != data && 0 != length);
unsigned char* pFlag = (unsigned char*)malloc((length + 7) >> 3);
memset(pFlag, 0, length * sizeof(unsigned char));
for(index = 0; index < length; index ++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
pFlag[data[index] >> 3] |= 1 << (data[index] % 8);
}
for(index = 0; index < length; index++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
printf("%d\n", index);
}
free(pFlag);
return;
} 上面的代碼已經在空間上面有所減小,那麼有什麼辦法並行運算這些數據呢?
copy to clipboardprint?void get_lost_number(int data[], int length)
{
int index;
RANGE range[4] = {0};
assert(NULL != data && 0 != length);
unsigned char* pFlag = (unsigned char*)malloc((length + 7) >> 3);
memset(pFlag, 0, length * sizeof(unsigned char));
range[0].start = 0, range[0].end = length >> 2;
range[1].start = length >> 2 , range[1].end = length >> 1;
range[2].start = length >> 1 , range[2].end = length >> 2 * 3;
range[3].start = length >> 2 * 3, range[3].end = length;
#pragma omp parallel for
for(index = 0; index < 4; index ++){
_get_lost_number(data, range[index].start, range[index].end, pFlag);
}
for(index = 0; index < length; index++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
printf("%d\n", index);
}
free(pFlag);
return;
}
void get_lost_number(int data[], int length)
{
int index;
RANGE range[4] = {0};
assert(NULL != data && 0 != length);
unsigned char* pFlag = (unsigned char*)malloc((length + 7) >> 3);
memset(pFlag, 0, length * sizeof(unsigned char));
range[0].start = 0, range[0].end = length >> 2;
range[1].start = length >> 2 , range[1].end = length >> 1;
range[2].start = length >> 1 , range[2].end = length >> 2 * 3;
range[3].start = length >> 2 * 3, range[3].end = length;
#pragma omp parallel for
for(index = 0; index < 4; index ++){
_get_lost_number(data, range[index].start, range[index].end, pFlag);
}
for(index = 0; index < length; index++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
printf("%d\n", index);
}
free(pFlag);
return;
} 為了多核的並行計算,我們添加了子函數_get_lost,我們進一步補充完整。
copy to clipboardprint?typedef struct _RANGE
{
int start;
int end;
}RANGE;
void _get_lost_number(int data[], int start, int end, unsigned char pFlag[])
{
int index;
for(index = start; index < end; index++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
pFlag[data[index] >> 3] |= 1 << (data[index] % 8);
}
}
typedef struct _RANGE
{
int start;
int end;
}RANGE;
void _get_lost_number(int data[], int start, int end, unsigned char pFlag[])
{
int index;
for(index = start; index < end; index++){
if(0 == (pFlag[data[index] >> 3] & (1 << (data[index] % 8))))
pFlag[data[index] >> 3] |= 1 << (data[index] % 8);
}
}
工作總結:
(1)代碼的優化是可以不斷進行得,但是不見得適用於所有的場景
(2)目前的cpu已經開始從2核->4核->8核轉變,朋友們在可能的情況下盡量多掌握一些多核編程的知識。