define 16 define define BLOCKS 10 define THREADS 64 define OUT_OF_ARRA

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#define N 16
#define M 8
#define BLOCKS 10
#define THREADS 64
#define OUT_OF_ARRAY -18
using namespace std;
using namespace utils;
__global__ void kernel_task1(int *vecBlockIdx, int *vecThreadIdx) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < N) {
vecBlockIdx[idx] = blockIdx.x;
vecThreadIdx[idx] = threadIdx.x;
}
}
void task1() {
int *dev_vecBlockIdx = 0;
int *dev_vecThreadIdx = 0;
cudaMalloc((void**)&dev_vecBlockIdx, N * sizeof(int));
cudaMalloc((void**)&dev_vecThreadIdx, N * sizeof(int));
kernel_task1 << <N / THREADS + 1, THREADS >> >(dev_vecBlockIdx, dev_vecThreadIdx);
int *vecBlockIdx = new int[N];
int *vecThreadIdx = new int[N];
cudaMemcpy(vecBlockIdx, dev_vecBlockIdx, N * sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(vecThreadIdx, dev_vecThreadIdx, N * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(dev_vecBlockIdx);
cudaFree(dev_vecThreadIdx);
printf("\n\nResults on host.");
printf("\n\nvecBlockIdx: \n");
Console::printMatrix(vecBlockIdx, 16, 8);
printf("\n\nvecThreadIdx: \n");
Console::printMatrix(vecThreadIdx, 16, 8);
getchar();
delete[] vecBlockIdx;
delete[] vecThreadIdx;
}