From 0301cc01f6cbfb752290bc63a2ed4eb19129c7c1 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Mon, 23 Apr 2018 04:18:33 -0700 Subject: gpu: nvgpu: add HAL to insert semaphore commands Add below new HALs gops.fifo.add_sema_cmd() to insert HOST semaphore acquire/release methods gops.fifo.get_sema_wait_cmd_size() to get size of acquire command buffer gops.fifo.get_sema_incr_cmd_size() to get size of release command buffer Separate out new API gk20a_fifo_add_sema_cmd() to implement semaphore acquire/ release sequence and set it to gops.fifo.add_sema_cmd() Add gk20a_fifo_get_sema_wait_cmd_size() and gk20a_fifo_get_sema_incr_cmd_size() to return respective command buffer sizes Jira NVGPUT-16 Change-Id: Ia81a50921a6a56ebc237f2f90b137268aaa2d749 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1704490 Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../gpu/nvgpu/common/linux/os_fence_android_sema.c | 4 +- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 38 +-------------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 54 ++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 6 +++ drivers/gpu/nvgpu/gk20a/gk20a.h | 6 +++ drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 3 ++ drivers/gpu/nvgpu/gp106/hal_gp106.c | 3 ++ drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 3 ++ drivers/gpu/nvgpu/gv100/hal_gv100.c | 3 ++ drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 3 ++ drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c | 3 ++ drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c | 3 ++ 12 files changed, 92 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c index 2bb71c99..25832417 100644 --- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c +++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c @@ -34,12 +34,14 @@ int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s, int max_wait_cmds) { int err; - const int wait_cmd_size = 8; + int wait_cmd_size; int num_wait_cmds; int i; struct nvgpu_semaphore *sema; struct sync_fence *sync_fence = nvgpu_get_sync_fence(s); + wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size(); + num_wait_cmds = sync_fence->num_fences; if (num_wait_cmds == 0) return 0; diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 68fbb738..7a664bf8 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -372,41 +372,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, if (!acquire) nvgpu_semaphore_prepare(s, c->hw_sema); - /* semaphore_a */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); - /* offset_upper */ - nvgpu_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff); - /* semaphore_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005); - /* offset */ - nvgpu_mem_wr32(g, cmd->mem, off++, va & 0xffffffff); - - if (acquire) { - /* semaphore_c */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); - /* payload */ - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_semaphore_get_value(s)); - /* semaphore_d */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); - /* operation: acq_geq, switch_en */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); - } else { - /* semaphore_c */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); - /* payload */ - nvgpu_mem_wr32(g, cmd->mem, off++, - nvgpu_semaphore_get_value(s)); - /* semaphore_d */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); - /* operation: release, wfi */ - nvgpu_mem_wr32(g, cmd->mem, off++, - 0x2 | ((wfi ? 0x0 : 0x1) << 20)); - /* non_stall_int */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008); - /* ignored */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0); - } + g->ops.fifo.add_sema_cmd(g, s, va, cmd, off, acquire, wfi); if (acquire) gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d" @@ -495,7 +461,7 @@ static int __gk20a_channel_semaphore_incr( return -ENOMEM; } - incr_cmd_size = 10; + incr_cmd_size = c->g->ops.fifo.get_sema_incr_cmd_size(); err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); if (err) { nvgpu_err(c->g, diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 0c3e8039..aada3065 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -4077,6 +4077,60 @@ const char *gk20a_fifo_interleave_level_name(u32 interleave_level) } } +u32 gk20a_fifo_get_sema_wait_cmd_size(void) +{ + return 8; +} + +u32 gk20a_fifo_get_sema_incr_cmd_size(void) +{ + return 10; +} + +void gk20a_fifo_add_sema_cmd(struct gk20a *g, + struct nvgpu_semaphore *s, u64 sema_va, + struct priv_cmd_entry *cmd, + u32 off, bool acquire, bool wfi) +{ + nvgpu_log_fn(g, " "); + + /* semaphore_a */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); + /* offset_upper */ + nvgpu_mem_wr32(g, cmd->mem, off++, (sema_va >> 32) & 0xff); + /* semaphore_b */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005); + /* offset */ + nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffff); + + if (acquire) { + /* semaphore_c */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); + /* payload */ + nvgpu_mem_wr32(g, cmd->mem, off++, + nvgpu_semaphore_get_value(s)); + /* semaphore_d */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); + /* operation: acq_geq, switch_en */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); + } else { + /* semaphore_c */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006); + /* payload */ + nvgpu_mem_wr32(g, cmd->mem, off++, + nvgpu_semaphore_get_value(s)); + /* semaphore_d */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007); + /* operation: release, wfi */ + nvgpu_mem_wr32(g, cmd->mem, off++, + 0x2 | ((wfi ? 0x0 : 0x1) << 20)); + /* non_stall_int */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008); + /* ignored */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0); + } +} + #ifdef CONFIG_TEGRA_GK20A_NVHOST void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 20533f5d..7216302c 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -444,4 +444,10 @@ void gk20a_fifo_get_mmu_fault_info(struct gk20a *g, u32 mmu_fault_id, void gk20a_fifo_get_mmu_fault_desc(struct mmu_fault_info *mmfault); void gk20a_fifo_get_mmu_fault_client_desc(struct mmu_fault_info *mmfault); void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault); +u32 gk20a_fifo_get_sema_wait_cmd_size(void); +u32 gk20a_fifo_get_sema_incr_cmd_size(void); +void gk20a_fifo_add_sema_cmd(struct gk20a *g, + struct nvgpu_semaphore *s, u64 sema_va, + struct priv_cmd_entry *cmd, + u32 off, bool acquire, bool wfi); #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 23e85ee9..17f662df 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -678,6 +678,12 @@ struct gpu_ops { u32 count, u32 buffer_index); int (*runlist_wait_pending)(struct gk20a *g, u32 runlist_id); void (*ring_channel_doorbell)(struct channel_gk20a *c); + u32 (*get_sema_wait_cmd_size)(void); + u32 (*get_sema_incr_cmd_size)(void); + void (*add_sema_cmd)(struct gk20a *g, + struct nvgpu_semaphore *s, u64 sema_va, + struct priv_cmd_entry *cmd, + u32 off, bool acquire, bool wfi); } fifo; struct pmu_v { u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 26b3f61a..bbeaa40f 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -467,6 +467,9 @@ static const struct gpu_ops gm20b_ops = { #endif .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, + .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gk20a_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gm20b_get_netlist_name, diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 6d3154e3..4111ac7d 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -531,6 +531,9 @@ static const struct gpu_ops gp106_ops = { .device_info_fault_id = top_device_info_data_fault_id_enum_v, .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, + .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gk20a_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gp106_get_netlist_name, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 79eeb25a..e4bf0fd7 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -504,6 +504,9 @@ static const struct gpu_ops gp10b_ops = { .device_info_fault_id = top_device_info_data_fault_id_enum_v, .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, + .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gk20a_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gp10b_get_netlist_name, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index dc404b7a..c64a06ca 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -597,6 +597,9 @@ static const struct gpu_ops gv100_ops = { .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, .ring_channel_doorbell = gv11b_ring_channel_doorbell, + .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gk20a_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gv100_get_netlist_name, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index c62dee26..7bfcc1dd 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -566,6 +566,9 @@ static const struct gpu_ops gv11b_ops = { .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, .ring_channel_doorbell = gv11b_ring_channel_doorbell, + .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gk20a_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gv11b_get_netlist_name, diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 5630e406..708c52a1 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -377,6 +377,9 @@ static const struct gpu_ops vgpu_gp10b_ops = { .device_info_fault_id = top_device_info_data_fault_id_enum_v, .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, + .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gk20a_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gp10b_get_netlist_name, diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c index 7b536329..7bc053e8 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_hal_gv11b.c @@ -425,6 +425,9 @@ static const struct gpu_ops vgpu_gv11b_ops = { .runlist_hw_submit = gk20a_fifo_runlist_hw_submit, .runlist_wait_pending = gk20a_fifo_runlist_wait_pending, .ring_channel_doorbell = gv11b_ring_channel_doorbell, + .get_sema_wait_cmd_size = gk20a_fifo_get_sema_wait_cmd_size, + .get_sema_incr_cmd_size = gk20a_fifo_get_sema_incr_cmd_size, + .add_sema_cmd = gk20a_fifo_add_sema_cmd, }, .gr_ctx = { .get_netlist_name = gr_gv11b_get_netlist_name, -- cgit v1.2.2