From dc45473eeb39d93100290a0f09bd787b3a5ce3f2 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Wed, 11 May 2016 15:04:44 +0300 Subject: gpu: nvgpu: use mem_desc in priv_cmd_entry Replace the plain cpu pointer accesses with gk20a_mem_wr32(), and use a reference to the underlying mem_desc (within priv_cmd_queue) paired with an offset, for buffer aperture flexibility. JIRA DNVGPU-21 JIRA DNVGPU-23 Change-Id: I317672c94bb682bb895f9ed3e8116729c8bb7f4b Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1145922 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: Terje Bergstrom --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 19 +++--- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 91 +++++++++++++++------------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 3 +- 3 files changed, 62 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 065e8ab1..31a3ceeb 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1355,15 +1355,16 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, e->gp_get = c->gpfifo.get; e->gp_put = c->gpfifo.put; e->gp_wrap = c->gpfifo.wrap; + e->mem = &q->mem; /* if we have increased size to skip free space in the end, set put to beginning of cmd buffer (0) + size */ if (size != orig_size) { - e->ptr = (u32 *)q->mem.cpu_va; + e->off = 0; e->gva = q->mem.gpu_va; q->put = orig_size; } else { - e->ptr = (u32 *)q->mem.cpu_va + q->put; + e->off = q->put; e->gva = q->mem.gpu_va + q->put * sizeof(u32); q->put = (q->put + orig_size) & (q->size - 1); } @@ -1755,17 +1756,15 @@ static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) { struct priv_cmd_queue *q = &c->priv_cmd_q; - u32 cmd_entry_start; struct device *d = dev_from_gk20a(c->g); if (!e) return 0; - cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va); - if ((q->get != cmd_entry_start) && cmd_entry_start != 0) + if ((q->get != e->off) && e->off != 0) gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); - q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size; + q->get = e->off + e->size; free_priv_cmdbuf(c, e); return 0; @@ -2150,7 +2149,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) | pbdma_gp_entry1_length_f(wait_cmd->size); trace_gk20a_push_cmdbuf(dev_name(c->g->dev), - 0, wait_cmd->size, 0, wait_cmd->ptr); + 0, wait_cmd->size, 0, + wait_cmd->mem->cpu_va + wait_cmd->off * + sizeof(u32)); c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); @@ -2235,7 +2236,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) | pbdma_gp_entry1_length_f(incr_cmd->size); trace_gk20a_push_cmdbuf(dev_name(c->g->dev), - 0, incr_cmd->size, 0, incr_cmd->ptr); + 0, incr_cmd->size, 0, + incr_cmd->mem->cpu_va + incr_cmd->off * + sizeof(u32)); c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index b47c1010..0a769b56 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -42,16 +42,18 @@ struct gk20a_channel_syncpt { u32 id; }; -static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh) +static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, + u32 id, u32 thresh) { + off = cmd->off + off; /* syncpoint_a */ - ptr[0] = 0x2001001C; + gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001C); /* payload */ - ptr[1] = thresh; + gk20a_mem_wr32(g, cmd->mem, off++, thresh); /* syncpoint_b */ - ptr[2] = 0x2001001D; + gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001D); /* syncpt_id, switch_en, wait */ - ptr[3] = (id << 8) | 0x10; + gk20a_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10); } static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, @@ -61,10 +63,11 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct priv_cmd_entry *wait_cmd = NULL; + struct channel_gk20a *c = sp->c; int err = 0; if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) { - dev_warn(dev_from_gk20a(sp->c->g), + dev_warn(dev_from_gk20a(c->g), "invalid wait id in gpfifo submit, elided"); return 0; } @@ -72,14 +75,14 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) return 0; - err = gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd); if (err) { - gk20a_err(dev_from_gk20a(sp->c->g), + gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); return err; } - add_wait_cmd(&wait_cmd->ptr[0], id, thresh); + add_wait_cmd(c->g, wait_cmd, 0, id, thresh); *entry = wait_cmd; *fence = NULL; @@ -148,12 +151,12 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, wait_id, wait_value)) { - wait_cmd->ptr[i * 4 + 0] = 0; - wait_cmd->ptr[i * 4 + 1] = 0; - wait_cmd->ptr[i * 4 + 2] = 0; - wait_cmd->ptr[i * 4 + 3] = 0; + /* each wait_cmd is 4 u32s */ + gk20a_memset(c->g, wait_cmd->mem, + (wait_cmd->off + i * 4) * sizeof(u32), + 0, 4 * sizeof(u32)); } else - add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id, + add_wait_cmd(c->g, wait_cmd, i * 4, wait_id, wait_value); #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) i++; @@ -189,7 +192,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, { u32 thresh; int incr_cmd_size; - int j = 0; + int off; int err; struct priv_cmd_entry *incr_cmd = NULL; struct gk20a_channel_syncpt *sp = @@ -207,27 +210,30 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, return err; } + off = incr_cmd->off; + /* WAR for hw bug 1491360: syncpt needs to be incremented twice */ if (wfi_cmd) { /* wfi */ - incr_cmd->ptr[j++] = 0x2001001E; + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E); /* handle, ignored */ - incr_cmd->ptr[j++] = 0x00000000; + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000); } /* syncpoint_a */ - incr_cmd->ptr[j++] = 0x2001001C; + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C); /* payload, ignored */ - incr_cmd->ptr[j++] = 0; + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0); /* syncpoint_b */ - incr_cmd->ptr[j++] = 0x2001001D; + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D); /* syncpt_id, incr */ - incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1); /* syncpoint_b */ - incr_cmd->ptr[j++] = 0x2001001D; + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D); /* syncpt_id, incr */ - incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; - WARN_ON(j != incr_cmd_size); + gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1); + + WARN_ON(off - incr_cmd->off != incr_cmd_size); thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); @@ -414,38 +420,39 @@ static void gk20a_channel_semaphore_launcher( } #endif -static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload, - bool acquire, bool wfi) +static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, + u64 sema, u32 payload, bool acquire, bool wfi) { - int i = 0; + u32 off = cmd->off; /* semaphore_a */ - ptr[i++] = 0x20010004; + gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004); /* offset_upper */ - ptr[i++] = (sema >> 32) & 0xff; + gk20a_mem_wr32(g, cmd->mem, off++, (sema >> 32) & 0xff); /* semaphore_b */ - ptr[i++] = 0x20010005; + gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005); /* offset */ - ptr[i++] = sema & 0xffffffff; + gk20a_mem_wr32(g, cmd->mem, off++, sema & 0xffffffff); /* semaphore_c */ - ptr[i++] = 0x20010006; + gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); /* payload */ - ptr[i++] = payload; + gk20a_mem_wr32(g, cmd->mem, off++, payload); if (acquire) { /* semaphore_d */ - ptr[i++] = 0x20010007; + gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); /* operation: acq_geq, switch_en */ - ptr[i++] = 0x4 | (0x1 << 12); + gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); } else { /* semaphore_d */ - ptr[i++] = 0x20010007; + gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); /* operation: release, wfi */ - ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20); + gk20a_mem_wr32(g, cmd->mem, off++, + 0x2 | ((wfi ? 0x0 : 0x1) << 20)); /* non_stall_int */ - ptr[i++] = 0x20010008; + gk20a_mem_wr32(g, cmd->mem, off++, 0x20010008); /* ignored */ - ptr[i++] = 0; + gk20a_mem_wr32(g, cmd->mem, off++, 0); } - return i; + return off - cmd->off; } static int gk20a_channel_semaphore_wait_syncpt( @@ -506,7 +513,7 @@ static int gk20a_channel_semaphore_wait_fd( va = gk20a_semaphore_gpu_va(w->sema, c->vm); /* GPU unblocked when when the semaphore value becomes 1. */ - written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); + written = add_sema_cmd(c->g, wait_cmd, va, 1, true, false); WARN_ON(written != wait_cmd->size); ret = sync_fence_wait_async(sync_fence, &w->waiter); @@ -575,7 +582,7 @@ static int __gk20a_channel_semaphore_incr( /* Release the completion semaphore. */ va = gk20a_semaphore_gpu_va(semaphore, c->vm); - written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd); + written = add_sema_cmd(c->g, incr_cmd, va, 1, false, wfi_cmd); WARN_ON(written != incr_cmd_size); *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index e9ac8f18..d943b231 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -164,7 +164,8 @@ struct priv_cmd_queue { }; struct priv_cmd_entry { - u32 *ptr; + struct mem_desc *mem; + u32 off; /* offset in mem, in u32 entries */ u64 gva; u32 get; /* start of entry in queue */ u32 size; /* in words */ -- cgit v1.2.2