#include "stdio.h" __global__ void add(int a, int b, int *dev_result) { *dev_result = a + b; } int main() { int a, b, c; int *dev_result; printf("a = "); scanf("%d", &a); printf("b = "); scanf("%d", &b); cudaMalloc((void **) &dev_result, sizeof(int)); add<<<8,8>>>(a, b, dev_result); /* Copies from GPU memory back to main memory: */ cudaMemcpy(&c, dev_result, sizeof(int), cudaMemcpyDeviceToHost); printf("%d + %d is %d\n", a, b, c); cudaFree(dev_result); return 0; }