From e5000d299f84432f63b30c0bd43841046c704f04 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Sat, 28 Apr 2018 07:34:46 -0700 Subject: gpu: nvgpu: implement new host semaphore methods for Volta Semaphore methods currently being used in Volta are deprecated for future chips And on Volta we support both old and new methods So replace old methods by new methods on Volta itself so that new methods get tested on silicon Implement below HALs for Volta with new semaphore methods gops.fifo.add_sema_cmd() to insert HOST semaphore acquire/release methods gops.fifo.get_sema_wait_cmd_size() to get size of acquire command buffer gops.fifo.get_sema_incr_cmd_size() to get size of release command buffer Also use new methods in these APIs gv11b_fifo_add_syncpt_wait_cmd() gv11b_fifo_add_syncpt_incr_cmd() And change corresponding APIs to reflect correct size of command buffer gv11b_fifo_get_syncpt_wait_cmd_size() gv11b_fifo_get_syncpt_incr_cmd_size() Jira NVGPUT-16 Change-Id: Ia3a37cd0560ddb54761dfea9bd28c4384cd8a11c Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1704518 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv100/hal_gv100.c | 6 +- drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 114 +++++++++++++++++++------- drivers/gpu/nvgpu/gv11b/fifo_gv11b.h | 6 ++ drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 6 +- drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 6 +- 5 files changed, 98 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index c64a06ca..4a710689 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -597,9 +597,9 @@ static const struct gpu_ops gv100_ops = { .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, .ring_channel_doorbell = gv11b_ring_channel_doorbell, - .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, - .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, - .add_sema_cmd = gk20a_fifo_add_sema_cmd, + .get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gv11b_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gv100_get_netlist_name, diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index b374e517..9843c7de 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -1659,6 +1659,55 @@ void gv11b_fifo_deinit_eng_method_buffers(struct gk20a *g, nvgpu_log_info(g, "eng method buffers de-allocated"); } +u32 gv11b_fifo_get_sema_wait_cmd_size(void) +{ + return 10; +} + +u32 gv11b_fifo_get_sema_incr_cmd_size(void) +{ + return 12; +} + +void gv11b_fifo_add_sema_cmd(struct gk20a *g, + struct nvgpu_semaphore *s, u64 sema_va, + struct priv_cmd_entry *cmd, + u32 off, bool acquire, bool wfi) +{ + nvgpu_log_fn(g, " "); + + /* sema_addr_lo */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017); + nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffff); + + /* sema_addr_hi */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018); + nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32) & 0xff); + + /* payload_lo */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019); + nvgpu_mem_wr32(g, cmd->mem, off++, nvgpu_semaphore_get_value(s)); + + /* payload_hi : ignored */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a); + nvgpu_mem_wr32(g, cmd->mem, off++, 0); + + if (acquire) { + /* sema_execute : acq_strict_geq | switch_en | 32bit */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2 | (1 << 12)); + } else { + /* sema_execute : release | wfi | 32bit */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); + nvgpu_mem_wr32(g, cmd->mem, off++, + 0x1 | ((wfi ? 0x1 : 0x0) << 20)); + + /* non_stall_int : payload is ignored */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008); + nvgpu_mem_wr32(g, cmd->mem, off++, 0); + } +} + #ifdef CONFIG_TEGRA_GK20A_NVHOST static int set_syncpt_ro_map_gpu_va_locked(struct vm_gk20a *vm) { @@ -1751,28 +1800,30 @@ void gv11b_fifo_add_syncpt_wait_cmd(struct gk20a *g, off = cmd->off + off; - /* semaphore_a */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); - nvgpu_mem_wr32(g, cmd->mem, off++, - (gpu_va >> 32) & 0xff); - /* semaphore_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005); - /* offset */ + /* sema_addr_lo */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017); nvgpu_mem_wr32(g, cmd->mem, off++, gpu_va & 0xffffffff); - /* semaphore_c */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); - /* payload */ + /* sema_addr_hi */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018); + nvgpu_mem_wr32(g, cmd->mem, off++, (gpu_va >> 32) & 0xff); + + /* payload_lo */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019); nvgpu_mem_wr32(g, cmd->mem, off++, thresh); - /* semaphore_d */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); - /* operation: acq_geq, switch_en */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); + + /* payload_hi : ignored */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a); + nvgpu_mem_wr32(g, cmd->mem, off++, 0); + + /* sema_execute : acq_strict_geq | switch_en | 32bit */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2 | (1 << 12)); } u32 gv11b_fifo_get_syncpt_wait_cmd_size(void) { - return 8; + return 10; } u32 gv11b_fifo_get_syncpt_incr_per_release(void) @@ -1788,30 +1839,31 @@ void gv11b_fifo_add_syncpt_incr_cmd(struct gk20a *g, nvgpu_log_fn(g, " "); - /* semaphore_a */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); - nvgpu_mem_wr32(g, cmd->mem, off++, - (gpu_va >> 32) & 0xff); - /* semaphore_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005); - /* offset */ + /* sema_addr_lo */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017); nvgpu_mem_wr32(g, cmd->mem, off++, gpu_va & 0xffffffff); - /* semaphore_c */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); - /* payload */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x0); - /* semaphore_d */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); + /* sema_addr_hi */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010018); + nvgpu_mem_wr32(g, cmd->mem, off++, (gpu_va >> 32) & 0xff); + + /* payload_lo */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010019); + nvgpu_mem_wr32(g, cmd->mem, off++, 0); + + /* payload_hi : ignored */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a); + nvgpu_mem_wr32(g, cmd->mem, off++, 0); - /* operation: 4 byte payload, release, wfi */ + /* sema_execute : release | wfi | 32bit */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b); nvgpu_mem_wr32(g, cmd->mem, off++, - (0x1 << 24) | 0x2 | ((wfi_cmd ? 0x0 : 0x1) << 20)); + 0x1 | ((wfi_cmd ? 0x1 : 0x0) << 20)); } u32 gv11b_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd) { - return 8; + return 10; } #endif /* CONFIG_TEGRA_GK20A_NVHOST */ diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h index 9157c300..3f58f927 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h @@ -106,6 +106,12 @@ void gv11b_fifo_free_syncpt_buf(struct channel_gk20a *c, struct nvgpu_mem *syncpt_buf); int gv11b_fifo_get_sync_ro_map(struct vm_gk20a *vm, u64 *base_gpuva, u32 *sync_size); +u32 gv11b_fifo_get_sema_wait_cmd_size(void); +u32 gv11b_fifo_get_sema_incr_cmd_size(void); +void gv11b_fifo_add_sema_cmd(struct gk20a *g, + struct nvgpu_semaphore *s, u64 sema_va, + struct priv_cmd_entry *cmd, + u32 off, bool acquire, bool wfi); void gv11b_fifo_add_syncpt_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, u32 id, u32 thresh, u64 gpu_va_base); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 7bfcc1dd..290a9452 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -566,9 +566,9 @@ static const struct gpu_ops gv11b_ops = { .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, .ring_channel_doorbell = gv11b_ring_channel_doorbell, - .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, - .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, - .add_sema_cmd = gk20a_fifo_add_sema_cmd, + .get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gv11b_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gv11b_get_netlist_name, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 7bc053e8..9fe76573 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -425,9 +425,9 @@ static const struct gpu_ops vgpu_gv11b_ops = { .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, .ring_channel_doorbell = gv11b_ring_channel_doorbell, - .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, - .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, - .add_sema_cmd = gk20a_fifo_add_sema_cmd, + .get_sema_wait_cmd_size = gv11b_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gv11b_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gv11b_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gv11b_get_netlist_name, -- cgit v1.2.2