配置VS2013 OpenCL環(huán)境

1. 安裝CUDA安裝包

==================

由于目前的CUDA安裝包自帶顯卡驅(qū)動、CUAD工具、OpenCL的SDK;其中OpenCL的相關(guān)內(nèi)容的默認目錄有:

  • CL文件夾的目錄:C:\Program Files\NVIDIA GPU Computing
    Toolkit\CUDA\v7.0\include

  • OpenCL.lib文件目錄:C:\Program Files\NVIDIA GPU Computing
    Toolkit\CUDA\v7.0\lib

  • OpenCL.dll文件目錄:C:\Program Files\NVIDIA Corporation\OpenCL

2. 新建空項目

==============

可以通過VS2013的VC++模板新建一個空項目;


圖 1.png

3. 添加文件

============

為了驗證配置的正確性,所以為項目添加兩個文件:cl_kernel.cl和main.cpp。

  1. 添加cl_kernel.cl文件
    其中在項目所在的目錄下新建一個cl_kernel.cl文件,其內(nèi)容為附錄1所示,目錄結(jié)構(gòu)如圖1所示。同時在VS2013的項目中將cl_kernel.cl文件添加到項目的“源文件”篩選器中,如圖2所示。
圖 2.png
圖 3.png
  1. 添加main.cpp文件

類似cl_kernel.cl文件操作,同樣將main.cpp文件添加到項目中。

4. 配置CL目錄

==============

需要將OpenCL的SDK的頭文件包含到項目中,具體操作方法為:

在項目->屬性->配置屬性->C/C++->常規(guī)->附加包含目錄->配置,然后添加CL文件夾的目錄:C:\Program
Files\NVIDIA GPU Computing Toolkit\CUDA\v7.0\include。如
圖 3所示。

圖 4.png

5. 配置預(yù)處理器

================

項目->屬性->配置屬性->c/c++->預(yù)處理器定義->編輯,然后添加“_CRT_SECURE_NO_WARNINGS”,否則會報錯。

圖 5.png

6. 配置外部依賴OpenCL.lib目錄

==============================

具體操作:項目->屬性->配置屬性->鏈接器->常規(guī)->附加庫目錄。然后將OpenCL.lib文件所在的目錄添加進去,其中需要注意的是將程序Debug成32位和64位平臺添加的Opencl.lib目錄是不同的,如圖
4所示,是Debug成Win32平臺,所以只加“C:\Program Files\NVIDIA GPU Computing
Toolkit\CUDA\v7.0\lib\Win32”路徑;若是Debug成X64,則添加的路徑為“C:\Program
Files\NVIDIA GPU Computing
Toolkit\CUDA\v7.0\lib\x64”。同時需要在“啟用增量鏈接”選項中選否。

圖 6.png

圖 7.png

7. 配置OpenCL.lib文件

==================
項目->屬性->配置屬性->連接器->輸入->附件依賴庫->編輯,接著添加OpenCL.lib


圖 8.png

8. 運行結(jié)果圖

==============


圖 9.png

附錄

附錄1 cl_kernel.cl文件

__kernel void MyCLAdd(__global int *dst, __global int *src1, __global int *src2)
{
    int index = get_global_id(0);
    dst[index] = src1[index] + src2[index];
}

附錄2:main.cpp文件

#include <CL/cl.h>
#include <stdio.h>
#include <iostream>
using namespace std;

int main(void){
    cl_uint numPlatforms = 0;           //the NO. of platforms
    cl_platform_id platform = nullptr;  //the chosen platform
    cl_context context = nullptr;       // OpenCL context
    cl_command_queue commandQueue = nullptr;
    cl_program program = nullptr;       // OpenCL kernel program object that'll be running on the compute device
    cl_mem input1MemObj = nullptr;      // input1 memory object for input argument 1
    cl_mem input2MemObj = nullptr;      // input2 memory object for input argument 2
    cl_mem outputMemObj = nullptr;      // output memory object for output
    cl_kernel kernel = nullptr;         // kernel object

    cl_int    status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (status != CL_SUCCESS)
    {
        cout << "Error: Getting platforms!" << endl;
        return 0;
    }

    /*For clarity, choose the first available platform. */
    if (numPlatforms > 0)
    {
        cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms* sizeof(cl_platform_id));
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        platform = platforms[0];
        free(platforms);
    }
    else
    {
        puts("Your system does not have any OpenCL platform!");
        return 0;
    }

    /*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/
    cl_uint                numDevices = 0;
    cl_device_id        *devices;
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
    if (numDevices == 0) //no GPU available.
    {
        cout << "No GPU device available." << endl;
        cout << "Choose CPU as default device." << endl;
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
        devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));

        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
    }
    else
    {
        devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
        cout << "The number of devices: " << numDevices << endl;
    }

    /*Step 3: Create context.*/
    context = clCreateContext(NULL, 1, devices, NULL, NULL, NULL);

    /*Step 4: Creating command queue associate with the context.*/
    commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

    /*Step 5: Create program object */
    // Read the kernel code to the buffer
    FILE *fp = fopen("cl_kernel.cl", "rb");

    //錯誤    1   error C4996 : 'fopen' : This function or variable may be unsafe.Consider using fopen_s instead.To disable deprecation, use _CRT_SECURE_NO_WARNINGS.See online help for details.c : \users\zyj\documents\visual studio 2013\projects\project3\project3\main.cpp  67  1   Project3


    if (fp == nullptr)
    {
        puts("The kernel file not found!");
        goto RELEASE_RESOURCES;
    }
    fseek(fp, 0, SEEK_END);
    size_t kernelLength = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    char *kernelCodeBuffer = (char*)malloc(kernelLength + 1);
    fread(kernelCodeBuffer, 1, kernelLength, fp);
    kernelCodeBuffer[kernelLength] = '\0';
    fclose(fp);

    const char *aSource = kernelCodeBuffer;
    program = clCreateProgramWithSource(context, 1, &aSource, &kernelLength, NULL);

    /*Step 6: Build program. */
    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);

    /*Step 7: Initial inputs and output for the host and create memory objects for the kernel*/
    int __declspec(align(32)) input1Buffer[128];    // 32 bytes alignment to improve data copy
    int __declspec(align(32)) input2Buffer[128];
    int __declspec(align(32)) outputBuffer[128];

    // Do initialization
    int i;
    for (i = 0; i < 128; i++)
        input1Buffer[i] = input2Buffer[i] = i + 1;
    memset(outputBuffer, 0, sizeof(outputBuffer));

    // Create mmory object
    input1MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 128 * sizeof(int), input1Buffer, nullptr);
    input2MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 128 * sizeof(int), input2Buffer, nullptr);
    outputMemObj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 128 * sizeof(int), NULL, NULL);

    /*Step 8: Create kernel object */
    kernel = clCreateKernel(program, "MyCLAdd", NULL);

    /*Step 9: Sets Kernel arguments.*/
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&outputMemObj);
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&input1MemObj);
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&input2MemObj);

    /*Step 10: Running the kernel.*/
    size_t global_work_size[1] = { 128 };
    status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
    clFinish(commandQueue);     // Force wait until the OpenCL kernel is completed

    /*Step 11: Read the cout put back to host memory.*/
    status = clEnqueueReadBuffer(commandQueue, outputMemObj, CL_TRUE, 0, global_work_size[0] * sizeof(int), outputBuffer, 0, NULL, NULL);

    printf("Veryfy the rsults... ");
    for (i = 0; i < 128; i++)
    {
        if (outputBuffer[i] != (i + 1) * 2)
        {
            puts("Results not correct!");
            break;
        }
    }
    if (i == 128)
        puts("Correct!");
RELEASE_RESOURCES:
    /*Step 12: Clean the resources.*/
    status = clReleaseKernel(kernel);//*Release kernel.
    status = clReleaseProgram(program);    //Release the program object.
    status = clReleaseMemObject(input1MemObj);//Release mem object.
    status = clReleaseMemObject(input2MemObj);
    status = clReleaseMemObject(outputMemObj);
    status = clReleaseCommandQueue(commandQueue);//Release  Command queue.
    status = clReleaseContext(context);//Release context.

    free(devices);
}
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容