From c0822cb22e13204e06b145ae950a33d45e95918e Mon Sep 17 00:00:00 2001 From: seshendra Gadagottu Date: Fri, 17 Mar 2017 11:30:57 -0700 Subject: gpu: nvgpu: add chip specific sync point support Added support for chip specific sync point implementation. Relevant fifo hal functions are added and updated for legacy chips. JIRA GPUT19X-2 Change-Id: I9a9c36d71e15c384b5e5af460cd52012f94e0b04 Signed-off-by: seshendra Gadagottu Reviewed-on: http://git-master/r/1258232 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile.nvgpu | 1 + drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 90 +++++++++++----------------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 83 +++++++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 16 +++++ drivers/gpu/nvgpu/gk20a/gk20a.h | 21 +++++++ drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 10 ++++ 6 files changed, 166 insertions(+), 55 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 109d1e4d..e21a9426 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -18,6 +18,7 @@ ccflags-y += -I$(srctree)/../nvgpu-t19x/drivers/gpu/nvgpu ccflags-y += -I$(srctree)/../nvgpu-t19x/drivers/gpu/nvgpu/include ccflags-y += -I$(srctree)/../nvgpu-t19x/include ccflags-y += -I$(srctree)/../nvgpu-t19x/include/uapi +ccflags-y += -I$(srctree)/../nvhost-t19x/include endif obj-$(CONFIG_GK20A) := nvgpu.o diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index eac7dbc3..07157dc9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -42,22 +42,9 @@ struct gk20a_channel_syncpt { struct channel_gk20a *c; struct platform_device *host1x_pdev; u32 id; + struct nvgpu_mem syncpt_buf; }; -static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, - u32 id, u32 thresh) -{ - off = cmd->off + off; - /* syncpoint_a */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C); - /* payload */ - nvgpu_mem_wr32(g, cmd->mem, off++, thresh); - /* syncpoint_b */ - nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D); - /* syncpt_id, switch_en, wait */ - nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10); -} - static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, struct gk20a_fence *fence) @@ -75,14 +62,18 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) return 0; - err = gk20a_channel_alloc_priv_cmdbuf(c, 4, wait_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, + c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd); if (err) { nvgpu_err(c->g, "not enough priv cmd buffer space"); return err; } - add_wait_cmd(c->g, wait_cmd, 0, id, thresh); + nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", + id, sp->syncpt_buf.gpu_va); + c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id, + thresh, sp->syncpt_buf.gpu_va); return 0; } @@ -101,6 +92,7 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, struct channel_gk20a *c = sp->c; u32 wait_id; int err = 0; + u32 wait_cmd_size = 0; sync_fence = nvhost_sync_fdget(fd); if (!sync_fence) @@ -130,8 +122,10 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, sync_fence_put(sync_fence); return 0; } - - err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, wait_cmd); + wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size(); + err = gk20a_channel_alloc_priv_cmdbuf(c, + wait_cmd_size * num_wait_cmds, + wait_cmd); if (err) { nvgpu_err(c->g, "not enough priv cmd buffer space"); @@ -152,13 +146,16 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, wait_id, wait_value)) { - /* each wait_cmd is 4 u32s */ nvgpu_memset(c->g, wait_cmd->mem, - (wait_cmd->off + i * 4) * sizeof(u32), - 0, 4 * sizeof(u32)); - } else - add_wait_cmd(c->g, wait_cmd, i * 4, wait_id, - wait_value); + (wait_cmd->off + i * wait_cmd_size) * sizeof(u32), + 0, wait_cmd_size * sizeof(u32)); + } else { + nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", + wait_id, sp->syncpt_buf.gpu_va); + c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, + i * wait_cmd_size, wait_id, wait_value, + sp->syncpt_buf.gpu_va); + } #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) i++; } @@ -193,45 +190,21 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, bool need_sync_fence) { u32 thresh; - size_t incr_cmd_size; - int off; int err; struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; - incr_cmd_size = 6; - if (wfi_cmd) - incr_cmd_size += 2; - - err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, + c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd), + incr_cmd); if (err) return err; - off = incr_cmd->off; - - /* WAR for hw bug 1491360: syncpt needs to be incremented twice */ - - if (wfi_cmd) { - /* wfi */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E); - /* handle, ignored */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000); - } - /* syncpoint_a */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C); - /* payload, ignored */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0); - /* syncpoint_b */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D); - /* syncpt_id, incr */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1); - /* syncpoint_b */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D); - /* syncpt_id, incr */ - nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1); - - WARN_ON(off - incr_cmd->off != incr_cmd_size); + nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx", + sp->id, sp->syncpt_buf.gpu_va); + c->g->ops.fifo.add_syncpt_incr_cmd(c->g, wfi_cmd, + incr_cmd, sp->id, sp->syncpt_buf.gpu_va); thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); @@ -337,6 +310,10 @@ static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); + + + sp->c->g->ops.fifo.free_syncpt_buf(sp->c, &sp->syncpt_buf); + nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); nvhost_syncpt_put_ref_ext(sp->host1x_pdev, sp->id); nvgpu_kfree(sp->c->g, sp); @@ -366,6 +343,9 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c) return NULL; } + sp->c->g->ops.fifo.alloc_syncpt_buf(sp->c, sp->id, + &sp->syncpt_buf); + nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); atomic_set(&sp->ops.refcount, 0); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 05c13374..b8ff84df 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -4265,6 +4265,79 @@ u32 gk20a_fifo_pbdma_acquire_val(u64 timeout) return val; } +#ifdef CONFIG_TEGRA_GK20A_NVHOST +void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g, + struct priv_cmd_entry *cmd, u32 off, + u32 id, u32 thresh, u64 gpu_va) +{ + gk20a_dbg_fn(""); + + off = cmd->off + off; + /* syncpoint_a */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C); + /* payload */ + nvgpu_mem_wr32(g, cmd->mem, off++, thresh); + /* syncpoint_b */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D); + /* syncpt_id, switch_en, wait */ + nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10); +} + +u32 gk20a_fifo_get_syncpt_wait_cmd_size(void) +{ + return 4; +} + +void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g, + bool wfi_cmd, struct priv_cmd_entry *cmd, + u32 id, u64 gpu_va) +{ + u32 off = cmd->off; + + gk20a_dbg_fn(""); + if (wfi_cmd) { + /* wfi */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001E); + /* handle, ignored */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000); + } + /* syncpoint_a */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C); + /* payload, ignored */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0); + /* syncpoint_b */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D); + /* syncpt_id, incr */ + nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1); + /* syncpoint_b */ + nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D); + /* syncpt_id, incr */ + nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1); + +} + +u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd) +{ + if (wfi_cmd) + return 8; + else + return 6; +} + +void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c, + struct nvgpu_mem *syncpt_buf) +{ + +} + +int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c, + u32 syncpt_id, struct nvgpu_mem *syncpt_buf) +{ + return 0; +} +#endif + + void gk20a_init_fifo(struct gpu_ops *gops) { gops->fifo.disable_channel = gk20a_fifo_disable_channel; @@ -4312,4 +4385,14 @@ void gk20a_init_fifo(struct gpu_ops *gops) gops->fifo.teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg; gops->fifo.handle_sched_error = gk20a_fifo_handle_sched_error; gops->fifo.handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0; +#ifdef CONFIG_TEGRA_GK20A_NVHOST + gops->fifo.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf; + gops->fifo.free_syncpt_buf = gk20a_fifo_free_syncpt_buf; + gops->fifo.add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd; + gops->fifo.get_syncpt_wait_cmd_size = + gk20a_fifo_get_syncpt_wait_cmd_size; + gops->fifo.add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd; + gops->fifo.get_syncpt_incr_cmd_size = + gk20a_fifo_get_syncpt_incr_cmd_size; +#endif } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 7351478a..80f1853c 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -405,4 +405,20 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_0, u32 *handled, u32 *error_notifier); u32 gk20a_fifo_default_timeslice_us(struct gk20a *g); + +#ifdef CONFIG_TEGRA_GK20A_NVHOST +void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g, + struct priv_cmd_entry *cmd, u32 off, + u32 id, u32 thresh, u64 gpu_va); +u32 gk20a_fifo_get_syncpt_wait_cmd_size(void); +void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g, + bool wfi_cmd, struct priv_cmd_entry *cmd, + u32 id, u64 gpu_va); +u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd); +void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c, + struct nvgpu_mem *syncpt_buf); +int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c, + u32 syncpt_id, struct nvgpu_mem *syncpt_buf); +#endif + #endif /*__GR_GK20A_H__*/ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 29ac4763..f246b857 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -499,6 +499,20 @@ struct gpu_ops { unsigned int (*handle_pbdma_intr_0)(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_0, u32 *handled, u32 *error_notifier); +#ifdef CONFIG_TEGRA_GK20A_NVHOST + int (*alloc_syncpt_buf)(struct channel_gk20a *c, + u32 syncpt_id, struct nvgpu_mem *syncpt_buf); + void (*free_syncpt_buf)(struct channel_gk20a *c, + struct nvgpu_mem *syncpt_buf); + void (*add_syncpt_wait_cmd)(struct gk20a *g, + struct priv_cmd_entry *cmd, u32 off, + u32 id, u32 thresh, u64 gpu_va); + u32 (*get_syncpt_wait_cmd_size)(void); + void (*add_syncpt_incr_cmd)(struct gk20a *g, + bool wfi_cmd, struct priv_cmd_entry *cmd, + u32 id, u64 gpu_va); + u32 (*get_syncpt_incr_cmd_size)(bool wfi_cmd); +#endif } fifo; struct pmu_v { /*used for change of enum zbc update cmd id from ver 0 to ver1*/ @@ -1243,6 +1257,13 @@ struct gk20a { /* Some boards might be missing power sensor, preventing * from monitoring power, current and voltage */ bool power_sensor_missing; + +#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU) + phys_addr_t syncpt_unit_base; + size_t syncpt_unit_size; + u32 syncpt_size; +#endif + }; static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index 0931d226..6fb5802b 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -230,4 +230,14 @@ void gm20b_init_fifo(struct gpu_ops *gops) gops->fifo.teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg; gops->fifo.handle_sched_error = gk20a_fifo_handle_sched_error; gops->fifo.handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0; +#ifdef CONFIG_TEGRA_GK20A_NVHOST + gops->fifo.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf; + gops->fifo.free_syncpt_buf = gk20a_fifo_free_syncpt_buf; + gops->fifo.add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd; + gops->fifo.get_syncpt_wait_cmd_size = + gk20a_fifo_get_syncpt_wait_cmd_size; + gops->fifo.add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd; + gops->fifo.get_syncpt_incr_cmd_size = + gk20a_fifo_get_syncpt_incr_cmd_size; +#endif } -- cgit v1.2.2