blob: 0ddbf9c5386098ca2631663ee4d74f4a7a53ed38 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#include <inttypes.h>
#include <cuda_runtime_api.h>
__device__ inline uint64_t GlobalTimer64(void) {
volatile uint64_t time;
asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(time));
return time;
}
__global__ void cuda_loop(int cs_length) {
uint64_t start_time = GlobalTimer64();
int ms2ns = 1000000;
while (GlobalTimer64() - start_time < cs_length * ms2ns) {
continue;
}
}
extern "C" void gpu_loop_start(int cs_length) {
cuda_loop<<<1, 1>>>(0.95 * cs_length);
}
extern "C" void wait_for_gpu_loop_end(void) {
cudaDeviceSynchronize();
}
extern "C" void init_gpu_context(int* dev_mem) {
cudaSetDeviceFlags(2);
cudaMalloc(&dev_mem, 8);
}
|