程式師世界 >> 編程語言 >> C語言 >> C++ >> C++入門知識 >> C++AMP介紹（一）

C++AMP介紹（一）

編輯：C++入門知識

C++AMP介紹（一）

最後更新日期：2014-05-02

閱讀前提：

環境:Windows 8.1 64bit英文版，Visual Studio 2013 Professional Update1英文版，Nvidia QuadroK600 顯卡

內容簡介

介紹C++ AMP如何使用加速器(GPU)的並發執行能力。通過兩個盡可能簡潔的程序，讓用戶了解到如何把AMP應用到自己的程序開發當中。

正文

C++AMP (C++ Accelerated Massive Parallelism)利用並行硬件（例如獨立圖形加速卡）的性能，加速你C++程序的執行速度，C++ AMP編程模型包括支持多維數組，索引，內存傳輸和平鋪，包括數學函數庫。你可以使用C++ AMP更廣泛的控制CPU同GPU之間數據的傳遞。

C++ AMP要求你的顯卡完整支持DirectX11硬件特性。

在Visual Studio上建立Win32 控制台項目，下面是我第一個C++AMP應用程序源代碼

#include "stdafx.h"

#include 
#include 
using namespace concurrency;

const int size = 5;

void CppAmpMethod() {
	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };
	int sumCPP[size];

	//concurrency::array_view是AMP的數據包裝器，可作為智能指針使用，代表了一維或多維數組。
	//第一個模板參數是數據類型，第二個模板參數是維度。
	//第一個構造參數是數組中元素的數量，第二個構造參數是數組
	array_view a(size, aCPP);
	array_view b(size, bCPP);
	array_view sum(size, sumCPP);

	//調用dsicard_data方法，是為了避免sum包裝器中的數據復制到GPU
	//此方法的調用不能出現在有restrict(amp)約束的上下文(代碼段)中
	sum.discard_data();

	parallel_for_each(
		//sum.extent代表計算域，在這上面將會建立線程集合
		//因為數組中有5個元素，所以會建立5根線程分別運行
		sum.extent,
		//Lambda表達式定義在加速器上各個線程將會運行的代碼
		//restrict(amp)是Microsoft AMP引入的約束符號，要求Lambda運行在GPU上
		//默認值是restrict(cpu)約束在CPU上運行，所以不加約束可以在任何標准C++編譯器中正確編譯
		//約束還可以是restrict(cpu,amp)，沒有其它。
		//index類用來索引array_view中的元素,index模板參數表示idx的維度
		[=](index<1> idx) restrict(amp)
	{
		//restrict(amp)約束使lambda表達式無法捕獲到外面的引用型和指針型變量
		//只能使用concurrency::array_view容器，輸入輸出數據
		sum[idx] = a[idx] + b[idx];
	}
	);

	// 打印輸出結果. 正確的輸出應該是 "7, 9, 11, 13, 15".
	for (int i = 0; i < size; i++) {
		std::cout << sum[i] << "\n";
	}

	//更新sum包裝器指向的數據源，即sumCPP中的數據（元素）
	sum.synchronize();

	// 打印輸出結果. 正確的輸出應該是 "7, 9, 11, 13, 15".
	for (int i = 0; i < size; i++) {
		std::cout << sumCPP[i] << "\n";
	}
}
int _tmain(int argc, _TCHAR* argv[])
{
	CppAmpMethod();

	system("pause");

	return 0;
}

第二個C++ AMP程序演示如何自己編寫帶restrict(amp)修飾的函數，以及如何調用它。

#include "stdafx.h"

#include 
#include 
#include 
using namespace concurrency;

const int size = 5;

//帶restrict(amp)約束的函數只能使用C++標准的子集，稱為kernel函數，
//在GPU上運行，只能被帶有restrict(amp)約束的上下文(代碼段)調用
void AddElementsWithRestrictedFunction(
	index<1> idx, array_view sum, array_view a, array_view b) restrict(amp)
{
	sum[idx] = a[idx] + b[idx];
}


void AddArraysWithFunction() {

	int aCPP[] = { 1, 2, 3, 4, 5 };
	int bCPP[] = { 6, 7, 8, 9, 10 };
	int sumCPP[5];

	array_view a(5, aCPP);
	array_view b(5, bCPP);
	array_view sum(5, sumCPP);
	sum.discard_data();

	parallel_for_each(
		sum.extent,
		[=](index<1> idx) restrict(amp)
	{
		//調用restrict(amp)約束的函數
		AddElementsWithRestrictedFunction(idx, sum, a, b);
	}
	);

	for (int i = 0; i < 5; i++) {
		std::cout << sum[i] << "\n";
	}
}

/*
C++ AMP 帶了兩個數學庫， 在名字空間Concurrency::precise_math的雙精度庫，也提供單精度數學函數。
在Concurrency::fast_math名字空間的單精度庫，只提供單精度數學函數。
可以使用accelerator::supports_double_precision屬性判斷GPU是否支持雙精度庫。
這些帶restrict(amp)約束的數學函數在頭文件中聲明。
標准C++庫頭文件中聲明的數學函數在fast_math和precise_math空間中都能找到。
*/
void MathExample() {

	double numbers[] = { 1.0, 10.0, 60.0, 100.0, 600.0, 1000.0 };
	array_view logs(6, numbers);

	parallel_for_each(
		logs.extent,
		[=](index<1> idx) restrict(amp) {
		logs[idx] = concurrency::fast_math::log10(logs[idx]);
	}
	);

	for (int i = 0; i < 6; i++) {
		std::cout << logs[i] << "\n";
	}
}

int _tmain(int argc, _TCHAR* argv[])
{
	//測試這裡寫的帶restrict(amp)約束的函數
	AddArraysWithFunction();

	//測試C++ AMP提供的帶restrict(amp)約束的數學函數
	MathExample();

	system("pause");

	return 0;
}

現在你應該已經學會了C++AMP的編程方式，下一篇介紹C++ AMP關於性能優化方面的基本知識。