vector_addition_gpu_thread_only.cu 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. // Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
  2. #include<stdio.h>
  3. #include<stdlib.h>
  4. #define N 512
  5. void host_add(int *a, int *b, int *c) {
  6. for(int idx=0;idx<N;idx++)
  7. c[idx] = a[idx] + b[idx];
  8. }
  9. __global__ void device_add(int *a, int *b, int *c) {
  10. c[threadIdx.x] = a[threadIdx.x] + b[threadIdx.x];
  11. }
  12. //basically just fills the array with index.
  13. void fill_array(int *data) {
  14. for(int idx=0;idx<N;idx++)
  15. data[idx] = idx;
  16. }
  17. void print_output(int *a, int *b, int*c) {
  18. for(int idx=0;idx<N;idx++)
  19. printf("\n %d + %d = %d", a[idx] , b[idx], c[idx]);
  20. }
  21. int main(void) {
  22. int *a, *b, *c;
  23. int *d_a, *d_b, *d_c; // device copies of a, b, c
  24. int size = N * sizeof(int);
  25. // Alloc space for host copies of a, b, c and setup input values
  26. a = (int *)malloc(size); fill_array(a);
  27. b = (int *)malloc(size); fill_array(b);
  28. c = (int *)malloc(size);
  29. // Alloc space for device copies of a, b, c
  30. cudaMalloc((void **)&d_a, size);
  31. cudaMalloc((void **)&d_b, size);
  32. cudaMalloc((void **)&d_c, size);
  33. // Copy inputs to device
  34. cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice);
  35. cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice);
  36. device_add<<<1,N>>>(d_a,d_b,d_c);
  37. // Copy result back to host
  38. cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost);
  39. print_output(a,b,c);
  40. free(a); free(b); free(c);
  41. cudaFree(d_a); cudaFree(d_b); cudaFree(d_c);
  42. return 0;
  43. }