mirror of
https://github.com/compiler-explorer/compiler-explorer.git
synced 2025-12-27 09:23:52 -05:00
- Add examples for CUDA - Update CONTRIBUTORS.md
This commit is contained in:
@@ -113,3 +113,4 @@ From oldest to newest contributor, we would like to thank:
|
||||
- [Marcus Geelnard](https://github.com/mbitsnbites)
|
||||
- [Haneef Mubarak](https://github.com/haneefmubarak)
|
||||
- [Jeremy Rifkin](https://github.com/jeremy-rifkin)
|
||||
- [Ankur Saini](https://github.com/Arsenic-ATG)
|
||||
|
||||
9
examples/cuda/Add_two_arrays.cu
Normal file
9
examples/cuda/Add_two_arrays.cu
Normal file
@@ -0,0 +1,9 @@
|
||||
__global__ void elementwise_add(const int * array1,
|
||||
const int * array2, int * result, int size) {
|
||||
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
unsigned int stride = gridDim.x * blockDim.x;
|
||||
while (idx < size) {
|
||||
result[idx] = array1[idx] + array2[idx];
|
||||
idx += stride;
|
||||
}
|
||||
}
|
||||
36
examples/cuda/Max_array.cu
Normal file
36
examples/cuda/Max_array.cu
Normal file
@@ -0,0 +1,36 @@
|
||||
#define MAX(x, y)((x > y) ? x : y)
|
||||
|
||||
__global__ void find_max(const int * array, int * max, int * mutex, unsigned int n) {
|
||||
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
unsigned int stride = gridDim.x * blockDim.x;
|
||||
unsigned int input_idx = idx;
|
||||
|
||||
__shared__ int partial_res[256];
|
||||
|
||||
int local_max = array[0];
|
||||
while (input_idx < n) {
|
||||
local_max = MAX(local_max, array[input_idx]);
|
||||
input_idx += stride;
|
||||
}
|
||||
|
||||
partial_res[threadIdx.x] = local_max;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// reduction
|
||||
unsigned int i = blockDim.x / 2;
|
||||
while (i != 0) {
|
||||
if (threadIdx.x < i) {
|
||||
partial_res[threadIdx.x] = MAX(partial_res[threadIdx.x], partial_res[threadIdx.x + i]);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
i /= 2;
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
while (atomicCAS(mutex, 0, 1) != 0);
|
||||
* max = MAX( * max, partial_res[0]);
|
||||
atomicExch(mutex, 0);
|
||||
}
|
||||
}
|
||||
31
examples/cuda/Sum_over_array.cu
Normal file
31
examples/cuda/Sum_over_array.cu
Normal file
@@ -0,0 +1,31 @@
|
||||
__global__ void sum_array(const int * array, int * total, unsigned int n) {
|
||||
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
unsigned int stride = gridDim.x * blockDim.x;
|
||||
unsigned int input_idx = idx;
|
||||
|
||||
__shared__ int partial_res[256];
|
||||
|
||||
int partial_sum = 0;
|
||||
while (input_idx < n) {
|
||||
partial_sum += array[input_idx];
|
||||
input_idx += stride;
|
||||
}
|
||||
|
||||
partial_res[threadIdx.x] = partial_sum;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// reduction
|
||||
unsigned int i = blockDim.x / 2;
|
||||
while (i != 0) {
|
||||
if (threadIdx.x < i) {
|
||||
partial_res[threadIdx.x] += partial_res[threadIdx.x + i];
|
||||
}
|
||||
__syncthreads();
|
||||
i /= 2;
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0) {
|
||||
atomicAdd(total, partial_res[0]);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user