今天心血來潮,想把傳統的卷積算法實現一份不采用各種加速方式,僅優化算法邏輯的純淨版本。
寫完發現性能還可以,特發出來分享之,若有博友在此基礎上,進行了再次優化,那就更贊了。
算法很簡單:
inline unsigned char Clamp2Byte(int n) { return (((255 - n) >> 31) | (n & ~(n >> 31))); } void Convolution2D(unsigned char * data, unsigned int width, unsigned int height, unsigned int channels, int * filter, unsigned char filterW, unsigned char cfactor, unsigned char bias) { unsigned char * tmpData = (unsigned char * ) malloc(width * height * channels); int factor = 256 / cfactor; int halfW = filterW / 2; if (channels == 3 || channels == 4) { for (int y = 0; y < height; y++) { int y1 = y - halfW + height; for (int x = 0; x < width; x++) { int x1 = x - halfW + width; int r = 0; int g = 0; int b = 0; unsigned int p = (y * width + x) * channels; for (unsigned int fx = 0; fx < filterW; fx++) { int dx = (x1 + fx) % width; int fidx = fx * (filterW); for (unsigned int fy = 0; fy < filterW; fy++) { int pos = (((y1 + fy) % height) * width + dx) * channels; int * pfilter = & filter[fidx + (fy)]; r += data[pos] * ( * pfilter); g += data[pos + 1] * ( * pfilter); b += data[pos + 2] * ( * pfilter); } } tmpData[p] = Clamp2Byte(((factor * r) >> 8) + bias); tmpData[p + 1] = Clamp2Byte(((factor * g) >> 8) + bias); tmpData[p + 2] = Clamp2Byte(((factor * b) >> 8) + bias); } } } else if (channels == 1) { for (int y = 0; y < height; y++) { int y1 = y - halfW + height; for (int x = 0; x < width; x++) { int r = 0; unsigned int p = (y * width + x); int x1 = x - halfW + width; for (unsigned int fx = 0; fx < filterW; fx++) { int dx = (x1 + fx) % width; int fidx = fx * (filterW); for (unsigned int fy = 0; fy < filterW; fy++) { int pos = (((y1 + fy) % height) * width + dx); int szfilter = filter[fidx + (fy)]; r += data[pos] * szfilter; } } tmpData[p] = Clamp2Byte(((factor * r) >> 8) + bias); } } } memcpy(data, tmpData, width * height * channels); free(tmpData); }
調用例子:
例子 //模糊 int Blurfilter[25] = { 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Blurfilter, 5, 13, 0); // 運動模糊 int MotionBlurfilter[81] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, MotionBlurfilter, 9, 9, 0); //邊緣探測1 int edges1filter[25] = { -1, 0, 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, -1, }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0); //邊緣探測2 int edges2filter[9] = { -1, -1, -1, -1, 8, -1, -1, -1, -1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges2filter, 3, 1, 0); //銳化1 int sharpen1filter[9] = { -1, -1, -1, -1, 9, -1, -1, -1, -1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen1filter, 3, 1, 0); //銳化2 int sharpen2filter[25] = { -1, -1, -1, -1, -1, -1, 2, 2, 2, -1, -1, 2, 8, 2, -1, -1, 2, 2, 2, -1, -1, -1, -1, -1, -1, }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen2filter, 5, 8, 0); //銳化3 int sharpen3filter[9] = { 1, 1, 1, 1, -7, 1, 1, 1, 1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, sharpen3filter, 3, 1, 0); // 浮雕1 int Embossfilter[9] = { -1, -1, 0, -1, 0, 1, 0, 1, 1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, Embossfilter, 3, 1, 128); // 浮雕2 int emboss2filter[25] = { -1, -1, -1, -1, 0, -1, -1, -1, 0, 1, -1, -1, 0, 1, 1, -1, 0, 1, 1, 1, 0, 1, 1, 1, 1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, emboss2filter, 5, 1, 128); // 均值模糊1 int meanfilter[9] = { 1, 1, 1, 1, 1, 1, 1, 1, 1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, meanfilter, 3, 9, 0); // 均值模糊2 int mean2filter[81] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, mean2filter, 9, 81, 0);
博主在一張大小為960x1280的圖片,進行了邊緣探測卷積核的處理,在博主機子上耗時是100毫秒。
//邊緣探測1 int edges1filter[25] = { -1, 0, 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, -1, }; Convolution2D(imgData, imgWidth, imgHeight, imgChannels, edges1filter, 5, 1, 0);
效果圖:
其他相關資料,見各種百科網站。
關鍵詞:卷積(英語:Convolution)