summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/os/linux/ioctl_as.c
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2022-05-30 12:20:48 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2022-05-31 11:32:12 -0400
commit745b3ef2ac4d7afa99202e6afc441e3f0b97f5b4 (patch)
treea3bf20ee2975666318a7275231d15fec451c1b10 /drivers/gpu/nvgpu/os/linux/ioctl_as.c
parentff66847a00ac27d8d94b3664ec156a195dbf3676 (diff)
gpu-paging: Support asynchronous paging
- Fully enables *_ASYNC API - Allows page mapping to be overlapped with I/O, resulting in an 11% speedup to synchronous reads Benchmarks, 1,000 iters, before: gpu_paging_speed, write: 185.5ms +/- 3.58 gpu_paging_speed, read: 180.5ms +/- 1.42 gpu_paging_overhead_speed, write start: 183.3ms +/- 3.89 gpu_paging_overhead_speed, write finish: 3.4ms +/- 2.61 gpu_paging_overhead_speed, read start: 181.6ms +/- 3.34 gpu_paging_overhead_speed, read finish: 41.1ms +/- 2.69 Benchmarks, 1,000 iters, after: gpu_paging_speed, write: 185.8ms +/- 3.70 gpu_paging_speed, read: 161.3ms +/- 0.97 gpu_paging_overhead_speed, write start: 38.9ms +/- 5.47 gpu_paging_overhead_speed, write finish: 3.1ms +/- 2.42 gpu_paging_overhead_speed, read start: 79.4 +/- 6.42 gpu_paging_overhead_speed, read finish: 44.3 +/- 1.53
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux/ioctl_as.c')
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_as.c13
1 files changed, 6 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index af6cdb5b..6348bb2a 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -365,9 +365,9 @@ static int nvgpu_as_ioctl_write_swap_buffer(
365 // (Assuming that NVMe DRAM acceses are uncached) 365 // (Assuming that NVMe DRAM acceses are uncached)
366 gk20a_mm_l2_flush(g, false); 366 gk20a_mm_l2_flush(g, false);
367 367
368 // Copy out (blocking) TODO: non-blocking 368 // Copy out (non-blocking)
369 // Could fail on inaccessible swap device, etc 369 // Could fail on inaccessible swap device, etc
370 err = copy_out(m->os_priv.sgt); 370 err = copy_out(m->os_priv.sgt, m);
371 371
372out: 372out:
373 return err; 373 return err;
@@ -393,7 +393,7 @@ static int nvgpu_as_ioctl_write_swap_buffer_finish(
393 nvgpu_log_fn(g, " "); 393 nvgpu_log_fn(g, " ");
394 394
395 // Wait for the pages to get written out 395 // Wait for the pages to get written out
396 //wait_for_completion_io(m->os_priv.swap_completion); 396 wait_for_completion_io(&m->os_priv.swap_io_done);
397 397
398 // Unpin needs to happen after copy out is done 398 // Unpin needs to happen after copy out is done
399 // (No return value check as it's a void function) 399 // (No return value check as it's a void function)
@@ -448,9 +448,8 @@ static int nvgpu_as_ioctl_read_swap_buffer(
448 // Do any bookeeping not done by gk20a_mm_pin() 448 // Do any bookeeping not done by gk20a_mm_pin()
449 m->os_priv.sgt = sgt; 449 m->os_priv.sgt = sgt;
450 450
451 // Reload page contents from disk (blocking) 451 // Reload page contents from disk (non-blocking)
452 // TODO: non-blocking 452 err = copy_in(sgt, m);
453 err = copy_in(sgt);
454 if (err) { 453 if (err) {
455 int err2; 454 int err2;
456 // Rollback pinning and allocation 455 // Rollback pinning and allocation
@@ -487,7 +486,7 @@ static int nvgpu_as_ioctl_read_swap_buffer_finish(
487 // Invalidate L2 so that TLB refill does not load stale PT 486 // Invalidate L2 so that TLB refill does not load stale PT
488 gk20a_mm_l2_flush(g, true); 487 gk20a_mm_l2_flush(g, true);
489 // Wait for read to complete if it hasn't yet 488 // Wait for read to complete if it hasn't yet
490 //wait_for_completion_io(m->os_priv.swap_completion); 489 wait_for_completion_io(&m->os_priv.swap_io_done);
491 490
492 return err; 491 return err;
493} 492}