diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2022-05-30 12:20:48 -0400 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2022-05-31 11:32:12 -0400 |
commit | 745b3ef2ac4d7afa99202e6afc441e3f0b97f5b4 (patch) | |
tree | a3bf20ee2975666318a7275231d15fec451c1b10 /drivers/gpu/nvgpu/os/linux/ioctl_as.c | |
parent | ff66847a00ac27d8d94b3664ec156a195dbf3676 (diff) |
gpu-paging: Support asynchronous paging
- Fully enables *_ASYNC API
- Allows page mapping to be overlapped with I/O, resulting in an 11% speedup
to synchronous reads
Benchmarks, 1,000 iters, before:
gpu_paging_speed, write: 185.5ms +/- 3.58
gpu_paging_speed, read: 180.5ms +/- 1.42
gpu_paging_overhead_speed, write start: 183.3ms +/- 3.89
gpu_paging_overhead_speed, write finish: 3.4ms +/- 2.61
gpu_paging_overhead_speed, read start: 181.6ms +/- 3.34
gpu_paging_overhead_speed, read finish: 41.1ms +/- 2.69
Benchmarks, 1,000 iters, after:
gpu_paging_speed, write: 185.8ms +/- 3.70
gpu_paging_speed, read: 161.3ms +/- 0.97
gpu_paging_overhead_speed, write start: 38.9ms +/- 5.47
gpu_paging_overhead_speed, write finish: 3.1ms +/- 2.42
gpu_paging_overhead_speed, read start: 79.4 +/- 6.42
gpu_paging_overhead_speed, read finish: 44.3 +/- 1.53
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux/ioctl_as.c')
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_as.c | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index af6cdb5b..6348bb2a 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c | |||
@@ -365,9 +365,9 @@ static int nvgpu_as_ioctl_write_swap_buffer( | |||
365 | // (Assuming that NVMe DRAM acceses are uncached) | 365 | // (Assuming that NVMe DRAM acceses are uncached) |
366 | gk20a_mm_l2_flush(g, false); | 366 | gk20a_mm_l2_flush(g, false); |
367 | 367 | ||
368 | // Copy out (blocking) TODO: non-blocking | 368 | // Copy out (non-blocking) |
369 | // Could fail on inaccessible swap device, etc | 369 | // Could fail on inaccessible swap device, etc |
370 | err = copy_out(m->os_priv.sgt); | 370 | err = copy_out(m->os_priv.sgt, m); |
371 | 371 | ||
372 | out: | 372 | out: |
373 | return err; | 373 | return err; |
@@ -393,7 +393,7 @@ static int nvgpu_as_ioctl_write_swap_buffer_finish( | |||
393 | nvgpu_log_fn(g, " "); | 393 | nvgpu_log_fn(g, " "); |
394 | 394 | ||
395 | // Wait for the pages to get written out | 395 | // Wait for the pages to get written out |
396 | //wait_for_completion_io(m->os_priv.swap_completion); | 396 | wait_for_completion_io(&m->os_priv.swap_io_done); |
397 | 397 | ||
398 | // Unpin needs to happen after copy out is done | 398 | // Unpin needs to happen after copy out is done |
399 | // (No return value check as it's a void function) | 399 | // (No return value check as it's a void function) |
@@ -448,9 +448,8 @@ static int nvgpu_as_ioctl_read_swap_buffer( | |||
448 | // Do any bookeeping not done by gk20a_mm_pin() | 448 | // Do any bookeeping not done by gk20a_mm_pin() |
449 | m->os_priv.sgt = sgt; | 449 | m->os_priv.sgt = sgt; |
450 | 450 | ||
451 | // Reload page contents from disk (blocking) | 451 | // Reload page contents from disk (non-blocking) |
452 | // TODO: non-blocking | 452 | err = copy_in(sgt, m); |
453 | err = copy_in(sgt); | ||
454 | if (err) { | 453 | if (err) { |
455 | int err2; | 454 | int err2; |
456 | // Rollback pinning and allocation | 455 | // Rollback pinning and allocation |
@@ -487,7 +486,7 @@ static int nvgpu_as_ioctl_read_swap_buffer_finish( | |||
487 | // Invalidate L2 so that TLB refill does not load stale PT | 486 | // Invalidate L2 so that TLB refill does not load stale PT |
488 | gk20a_mm_l2_flush(g, true); | 487 | gk20a_mm_l2_flush(g, true); |
489 | // Wait for read to complete if it hasn't yet | 488 | // Wait for read to complete if it hasn't yet |
490 | //wait_for_completion_io(m->os_priv.swap_completion); | 489 | wait_for_completion_io(&m->os_priv.swap_io_done); |
491 | 490 | ||
492 | return err; | 491 | return err; |
493 | } | 492 | } |