gpu: nvgpu: use mem_desc in priv_cmd_entry

Replace the plain cpu pointer accesses with gk20a_mem_wr32(), and use a reference to the underlying mem_desc (within priv_cmd_queue) paired with an offset, for buffer aperture flexibility. JIRA DNVGPU-21 JIRA DNVGPU-23 Change-Id: I317672c94bb682bb895f9ed3e8116729c8bb7f4b Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1145922 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2016-05-11 08:04:44 -0400
committer: Terje Bergstrom <tbergstrom@nvidia.com> 2016-05-18 14:54:34 -0400
commit: dc45473eeb39d93100290a0f09bd787b3a5ce3f2 (patch)
tree: 90c56a0fc8ed2019bd4e1bfdb9fbddf18c20bd0c /drivers/gpu/nvgpu/gk20a
parent: 67a41e46a230cde7353e4cd46040f1e71d7cd289 (diff)
3 files changed, 62 insertions, 51 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 065e8ab1..31a3ceeb 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1355,15 +1355,16 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
        e->gp_get = c->gpfifo.get;
        e->gp_put = c->gpfifo.put;
        e->gp_wrap = c->gpfifo.wrap;
+        e->mem = &q->mem;
        /* if we have increased size to skip free space in the end, set put
           to beginning of cmd buffer (0) + size */
        if (size != orig_size) {
-                e->ptr = (u32 *)q->mem.cpu_va;
+                e->off = 0;
                e->gva = q->mem.gpu_va;
                q->put = orig_size;
        } else {
-                e->ptr = (u32 *)q->mem.cpu_va + q->put;
+                e->off = q->put;
                e->gva = q->mem.gpu_va + q->put * sizeof(u32);
                q->put = (q->put + orig_size) & (q->size - 1);
        }
@@ -1755,17 +1756,15 @@ static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
                                        struct priv_cmd_entry *e)
 {
        struct priv_cmd_queue *q = &c->priv_cmd_q;
-        u32 cmd_entry_start;
        struct device *d = dev_from_gk20a(c->g);
        if (!e)
                return 0;
-        cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
+        if ((q->get != e->off) && e->off != 0)
-        if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
                gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
-        q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
+        q->get = e->off + e->size;
        free_priv_cmdbuf(c, e);
        return 0;
@@ -2150,7 +2149,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) |
                        pbdma_gp_entry1_length_f(wait_cmd->size);
                trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
-                        0, wait_cmd->size, 0, wait_cmd->ptr);
+                                0, wait_cmd->size, 0,
+                                wait_cmd->mem->cpu_va + wait_cmd->off *
+                                sizeof(u32));
                c->gpfifo.put = (c->gpfifo.put + 1) &
                        (c->gpfifo.entry_num - 1);
@@ -2235,7 +2236,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) |
                        pbdma_gp_entry1_length_f(incr_cmd->size);
                trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
-                        0, incr_cmd->size, 0, incr_cmd->ptr);
+                                0, incr_cmd->size, 0,
+                                incr_cmd->mem->cpu_va + incr_cmd->off *
+                                sizeof(u32));
                c->gpfifo.put = (c->gpfifo.put + 1) &
                        (c->gpfifo.entry_num - 1);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index b47c1010..0a769b56 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -42,16 +42,18 @@ struct gk20a_channel_syncpt {
        u32 id;
 };
-static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)
+static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off,
+                u32 id, u32 thresh)
 {
+        off = cmd->off + off;
        /* syncpoint_a */
-        ptr[0] = 0x2001001C;
+        gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001C);
        /* payload */
-        ptr[1] = thresh;
+        gk20a_mem_wr32(g, cmd->mem, off++, thresh);
        /* syncpoint_b */
-        ptr[2] = 0x2001001D;
+        gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001D);
        /* syncpt_id, switch_en, wait */
-        ptr[3] = (id << 8) | 0x10;
+        gk20a_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
 }
 static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
@@ -61,10 +63,11 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
        struct gk20a_channel_syncpt *sp =
                container_of(s, struct gk20a_channel_syncpt, ops);
        struct priv_cmd_entry *wait_cmd = NULL;
+        struct channel_gk20a *c = sp->c;
        int err = 0;
        if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) {
-                dev_warn(dev_from_gk20a(sp->c->g),
+                dev_warn(dev_from_gk20a(c->g),
                                "invalid wait id in gpfifo submit, elided");
                return 0;
        }
@@ -72,14 +75,14 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
        if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
                return 0;
-        err = gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd);
+        err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd);
        if (err) {
-                gk20a_err(dev_from_gk20a(sp->c->g),
+                gk20a_err(dev_from_gk20a(c->g),
                                "not enough priv cmd buffer space");
                return err;
        }
-        add_wait_cmd(&wait_cmd->ptr[0], id, thresh);
+        add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
        *entry = wait_cmd;
        *fence = NULL;
@@ -148,12 +151,12 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
                if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,
                                wait_id, wait_value)) {
-                        wait_cmd->ptr[i * 4 + 0] = 0;
+                        /* each wait_cmd is 4 u32s */
-                        wait_cmd->ptr[i * 4 + 1] = 0;
+                        gk20a_memset(c->g, wait_cmd->mem,
-                        wait_cmd->ptr[i * 4 + 2] = 0;
+                                        (wait_cmd->off + i * 4) * sizeof(u32),
-                        wait_cmd->ptr[i * 4 + 3] = 0;
+                                        0, 4 * sizeof(u32));
                } else
-                        add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id,
+                        add_wait_cmd(c->g, wait_cmd, i * 4, wait_id,
                                        wait_value);
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
                i++;
@@ -189,7 +192,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 {
        u32 thresh;
        int incr_cmd_size;
-        int j = 0;
+        int off;
        int err;
        struct priv_cmd_entry *incr_cmd = NULL;
        struct gk20a_channel_syncpt *sp =
@@ -207,27 +210,30 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
                return err;
        }
+        off = incr_cmd->off;
        /* WAR for hw bug 1491360: syncpt needs to be incremented twice */
        if (wfi_cmd) {
                /* wfi */
-                incr_cmd->ptr[j++] = 0x2001001E;
+                gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E);
                /* handle, ignored */
-                incr_cmd->ptr[j++] = 0x00000000;
+                gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000);
        }
        /* syncpoint_a */
-        incr_cmd->ptr[j++] = 0x2001001C;
+        gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C);
        /* payload, ignored */
-        incr_cmd->ptr[j++] = 0;
+        gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0);
        /* syncpoint_b */
-        incr_cmd->ptr[j++] = 0x2001001D;
+        gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
        /* syncpt_id, incr */
-        incr_cmd->ptr[j++] = (sp->id << 8) | 0x1;
+        gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
        /* syncpoint_b */
-        incr_cmd->ptr[j++] = 0x2001001D;
+        gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
        /* syncpt_id, incr */
-        incr_cmd->ptr[j++] = (sp->id << 8) | 0x1;
+        gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
-        WARN_ON(j != incr_cmd_size);
+        WARN_ON(off - incr_cmd->off != incr_cmd_size);
        thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
@@ -414,38 +420,39 @@ static void gk20a_channel_semaphore_launcher(
 }
 #endif
-static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload,
+static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd,
-                        bool acquire, bool wfi)
+                u64 sema, u32 payload, bool acquire, bool wfi)
 {
-        int i = 0;
+        u32 off = cmd->off;
        /* semaphore_a */
-        ptr[i++] = 0x20010004;
+        gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004);
        /* offset_upper */
-        ptr[i++] = (sema >> 32) & 0xff;
+        gk20a_mem_wr32(g, cmd->mem, off++, (sema >> 32) & 0xff);
        /* semaphore_b */
-        ptr[i++] = 0x20010005;
+        gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005);
        /* offset */
-        ptr[i++] = sema & 0xffffffff;
+        gk20a_mem_wr32(g, cmd->mem, off++, sema & 0xffffffff);
        /* semaphore_c */
-        ptr[i++] = 0x20010006;
+        gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
        /* payload */
-        ptr[i++] = payload;
+        gk20a_mem_wr32(g, cmd->mem, off++, payload);
        if (acquire) {
                /* semaphore_d */
-                ptr[i++] = 0x20010007;
+                gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
                /* operation: acq_geq, switch_en */
-                ptr[i++] = 0x4 | (0x1 << 12);
+                gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
        } else {
                /* semaphore_d */
-                ptr[i++] = 0x20010007;
+                gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
                /* operation: release, wfi */
-                ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20);
+                gk20a_mem_wr32(g, cmd->mem, off++,
+                                0x2 | ((wfi ? 0x0 : 0x1) << 20));
                /* non_stall_int */
-                ptr[i++] = 0x20010008;
+                gk20a_mem_wr32(g, cmd->mem, off++, 0x20010008);
                /* ignored */
-                ptr[i++] = 0;
+                gk20a_mem_wr32(g, cmd->mem, off++, 0);
        }
-        return i;
+        return off - cmd->off;
 }
 static int gk20a_channel_semaphore_wait_syncpt(
@@ -506,7 +513,7 @@ static int gk20a_channel_semaphore_wait_fd(
        va = gk20a_semaphore_gpu_va(w->sema, c->vm);
        /* GPU unblocked when when the semaphore value becomes 1. */
-        written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false);
+        written = add_sema_cmd(c->g, wait_cmd, va, 1, true, false);
        WARN_ON(written != wait_cmd->size);
        ret = sync_fence_wait_async(sync_fence, &w->waiter);
@@ -575,7 +582,7 @@ static int __gk20a_channel_semaphore_incr(
        /* Release the completion semaphore. */
        va = gk20a_semaphore_gpu_va(semaphore, c->vm);
-        written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd);
+        written = add_sema_cmd(c->g, incr_cmd, va, 1, false, wfi_cmd);
        WARN_ON(written != incr_cmd_size);
        *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index e9ac8f18..d943b231 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -164,7 +164,8 @@ struct priv_cmd_queue {
 };
 struct priv_cmd_entry {
-        u32 *ptr;
+        struct mem_desc *mem;
+        u32 off;        /* offset in mem, in u32 entries */
        u64 gva;
        u32 get;        /* start of entry in queue */
        u32 size;       /* in words */
author	Konsta Holtta <kholtta@nvidia.com>	2016-05-11 08:04:44 -0400
committer	Terje Bergstrom <tbergstrom@nvidia.com>	2016-05-18 14:54:34 -0400
commit	dc45473eeb39d93100290a0f09bd787b3a5ce3f2 (patch)
tree	90c56a0fc8ed2019bd4e1bfdb9fbddf18c20bd0c /drivers/gpu/nvgpu/gk20a
parent	67a41e46a230cde7353e4cd46040f1e71d7cd289 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 065e8ab1..31a3ceeb 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1355,15 +1355,16 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1355	e->gp_get = c->gpfifo.get;	1355	e->gp_get = c->gpfifo.get;
1356	e->gp_put = c->gpfifo.put;	1356	e->gp_put = c->gpfifo.put;
1357	e->gp_wrap = c->gpfifo.wrap;	1357	e->gp_wrap = c->gpfifo.wrap;
		1358	e->mem = &q->mem;
1358		1359
1359	/* if we have increased size to skip free space in the end, set put	1360	/* if we have increased size to skip free space in the end, set put
1360	to beginning of cmd buffer (0) + size */	1361	to beginning of cmd buffer (0) + size */
1361	if (size != orig_size) {	1362	if (size != orig_size) {
1362	e->ptr = (u32 *)q->mem.cpu_va;	1363	e->off = 0;
1363	e->gva = q->mem.gpu_va;	1364	e->gva = q->mem.gpu_va;
1364	q->put = orig_size;	1365	q->put = orig_size;
1365	} else {	1366	} else {
1366	e->ptr = (u32 *)q->mem.cpu_va + q->put;	1367	e->off = q->put;
1367	e->gva = q->mem.gpu_va + q->put * sizeof(u32);	1368	e->gva = q->mem.gpu_va + q->put * sizeof(u32);
1368	q->put = (q->put + orig_size) & (q->size - 1);	1369	q->put = (q->put + orig_size) & (q->size - 1);
1369	}	1370	}
@@ -1755,17 +1756,15 @@ static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1755	struct priv_cmd_entry *e)	1756	struct priv_cmd_entry *e)
1756	{	1757	{
1757	struct priv_cmd_queue *q = &c->priv_cmd_q;	1758	struct priv_cmd_queue *q = &c->priv_cmd_q;
1758	u32 cmd_entry_start;
1759	struct device *d = dev_from_gk20a(c->g);	1759	struct device *d = dev_from_gk20a(c->g);
1760		1760
1761	if (!e)	1761	if (!e)
1762	return 0;	1762	return 0;
1763		1763
1764	cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);	1764	if ((q->get != e->off) && e->off != 0)
1765	if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1766	gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);	1765	gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1767		1766
1768	q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;	1767	q->get = e->off + e->size;
1769	free_priv_cmdbuf(c, e);	1768	free_priv_cmdbuf(c, e);
1770		1769
1771	return 0;	1770	return 0;
@@ -2150,7 +2149,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2150	gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) \|	2149	gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) \|
2151	pbdma_gp_entry1_length_f(wait_cmd->size);	2150	pbdma_gp_entry1_length_f(wait_cmd->size);
2152	trace_gk20a_push_cmdbuf(dev_name(c->g->dev),	2151	trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
2153	0, wait_cmd->size, 0, wait_cmd->ptr);	2152	0, wait_cmd->size, 0,
		2153	wait_cmd->mem->cpu_va + wait_cmd->off *
		2154	sizeof(u32));
2154		2155
2155	c->gpfifo.put = (c->gpfifo.put + 1) &	2156	c->gpfifo.put = (c->gpfifo.put + 1) &
2156	(c->gpfifo.entry_num - 1);	2157	(c->gpfifo.entry_num - 1);
@@ -2235,7 +2236,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2235	gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) \|	2236	gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) \|
2236	pbdma_gp_entry1_length_f(incr_cmd->size);	2237	pbdma_gp_entry1_length_f(incr_cmd->size);
2237	trace_gk20a_push_cmdbuf(dev_name(c->g->dev),	2238	trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
2238	0, incr_cmd->size, 0, incr_cmd->ptr);	2239	0, incr_cmd->size, 0,
		2240	incr_cmd->mem->cpu_va + incr_cmd->off *
		2241	sizeof(u32));
2239		2242
2240	c->gpfifo.put = (c->gpfifo.put + 1) &	2243	c->gpfifo.put = (c->gpfifo.put + 1) &
2241	(c->gpfifo.entry_num - 1);	2244	(c->gpfifo.entry_num - 1);


diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index b47c1010..0a769b56 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -42,16 +42,18 @@ struct gk20a_channel_syncpt {
42	u32 id;	42	u32 id;
43	};	43	};
44		44
45	static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh)	45	static void add_wait_cmd(struct gk20a g, struct priv_cmd_entry cmd, u32 off,
		46	u32 id, u32 thresh)
46	{	47	{
		48	off = cmd->off + off;
47	/* syncpoint_a */	49	/* syncpoint_a */
48	ptr[0] = 0x2001001C;	50	gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001C);
49	/* payload */	51	/* payload */
50	ptr[1] = thresh;	52	gk20a_mem_wr32(g, cmd->mem, off++, thresh);
51	/* syncpoint_b */	53	/* syncpoint_b */
52	ptr[2] = 0x2001001D;	54	gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001D);
53	/* syncpt_id, switch_en, wait */	55	/* syncpt_id, switch_en, wait */
54	ptr[3] = (id << 8) \| 0x10;	56	gk20a_mem_wr32(g, cmd->mem, off++, (id << 8) \| 0x10);
55	}	57	}
56		58
57	static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,	59	static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
@@ -61,10 +63,11 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
61	struct gk20a_channel_syncpt *sp =	63	struct gk20a_channel_syncpt *sp =
62	container_of(s, struct gk20a_channel_syncpt, ops);	64	container_of(s, struct gk20a_channel_syncpt, ops);
63	struct priv_cmd_entry *wait_cmd = NULL;	65	struct priv_cmd_entry *wait_cmd = NULL;
		66	struct channel_gk20a *c = sp->c;
64	int err = 0;	67	int err = 0;
65		68
66	if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) {	69	if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) {
67	dev_warn(dev_from_gk20a(sp->c->g),	70	dev_warn(dev_from_gk20a(c->g),
68	"invalid wait id in gpfifo submit, elided");	71	"invalid wait id in gpfifo submit, elided");
69	return 0;	72	return 0;
70	}	73	}
@@ -72,14 +75,14 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
72	if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))	75	if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
73	return 0;	76	return 0;
74		77
75	err = gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd);	78	err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd);
76	if (err) {	79	if (err) {
77	gk20a_err(dev_from_gk20a(sp->c->g),	80	gk20a_err(dev_from_gk20a(c->g),
78	"not enough priv cmd buffer space");	81	"not enough priv cmd buffer space");
79	return err;	82	return err;
80	}	83	}
81		84
82	add_wait_cmd(&wait_cmd->ptr[0], id, thresh);	85	add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
83		86
84	*entry = wait_cmd;	87	*entry = wait_cmd;
85	*fence = NULL;	88	*fence = NULL;
@@ -148,12 +151,12 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
148		151
149	if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,	152	if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,
150	wait_id, wait_value)) {	153	wait_id, wait_value)) {
151	wait_cmd->ptr[i * 4 + 0] = 0;	154	/* each wait_cmd is 4 u32s */
152	wait_cmd->ptr[i * 4 + 1] = 0;	155	gk20a_memset(c->g, wait_cmd->mem,
153	wait_cmd->ptr[i * 4 + 2] = 0;	156	(wait_cmd->off + i * 4) * sizeof(u32),
154	wait_cmd->ptr[i * 4 + 3] = 0;	157	0, 4 * sizeof(u32));
155	} else	158	} else
156	add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id,	159	add_wait_cmd(c->g, wait_cmd, i * 4, wait_id,
157	wait_value);	160	wait_value);
158	#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)	161	#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
159	i++;	162	i++;
@@ -189,7 +192,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
189	{	192	{
190	u32 thresh;	193	u32 thresh;
191	int incr_cmd_size;	194	int incr_cmd_size;
192	int j = 0;	195	int off;
193	int err;	196	int err;
194	struct priv_cmd_entry *incr_cmd = NULL;	197	struct priv_cmd_entry *incr_cmd = NULL;
195	struct gk20a_channel_syncpt *sp =	198	struct gk20a_channel_syncpt *sp =
@@ -207,27 +210,30 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
207	return err;	210	return err;
208	}	211	}
209		212
		213	off = incr_cmd->off;
		214
210	/* WAR for hw bug 1491360: syncpt needs to be incremented twice */	215	/* WAR for hw bug 1491360: syncpt needs to be incremented twice */
211		216
212	if (wfi_cmd) {	217	if (wfi_cmd) {
213	/* wfi */	218	/* wfi */
214	incr_cmd->ptr[j++] = 0x2001001E;	219	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E);
215	/* handle, ignored */	220	/* handle, ignored */
216	incr_cmd->ptr[j++] = 0x00000000;	221	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000);
217	}	222	}
218	/* syncpoint_a */	223	/* syncpoint_a */
219	incr_cmd->ptr[j++] = 0x2001001C;	224	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C);
220	/* payload, ignored */	225	/* payload, ignored */
221	incr_cmd->ptr[j++] = 0;	226	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0);
222	/* syncpoint_b */	227	/* syncpoint_b */
223	incr_cmd->ptr[j++] = 0x2001001D;	228	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
224	/* syncpt_id, incr */	229	/* syncpt_id, incr */
225	incr_cmd->ptr[j++] = (sp->id << 8) \| 0x1;	230	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) \| 0x1);
226	/* syncpoint_b */	231	/* syncpoint_b */
227	incr_cmd->ptr[j++] = 0x2001001D;	232	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
228	/* syncpt_id, incr */	233	/* syncpt_id, incr */
229	incr_cmd->ptr[j++] = (sp->id << 8) \| 0x1;	234	gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) \| 0x1);
230	WARN_ON(j != incr_cmd_size);	235
		236	WARN_ON(off - incr_cmd->off != incr_cmd_size);
231		237
232	thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);	238	thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
233		239
@@ -414,38 +420,39 @@ static void gk20a_channel_semaphore_launcher(
414	}	420	}
415	#endif	421	#endif
416		422
417	static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload,	423	static int add_sema_cmd(struct gk20a g, struct priv_cmd_entry cmd,
418	bool acquire, bool wfi)	424	u64 sema, u32 payload, bool acquire, bool wfi)
419	{	425	{
420	int i = 0;	426	u32 off = cmd->off;
421	/* semaphore_a */	427	/* semaphore_a */
422	ptr[i++] = 0x20010004;	428	gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004);
423	/* offset_upper */	429	/* offset_upper */
424	ptr[i++] = (sema >> 32) & 0xff;	430	gk20a_mem_wr32(g, cmd->mem, off++, (sema >> 32) & 0xff);
425	/* semaphore_b */	431	/* semaphore_b */
426	ptr[i++] = 0x20010005;	432	gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005);
427	/* offset */	433	/* offset */
428	ptr[i++] = sema & 0xffffffff;	434	gk20a_mem_wr32(g, cmd->mem, off++, sema & 0xffffffff);
429	/* semaphore_c */	435	/* semaphore_c */
430	ptr[i++] = 0x20010006;	436	gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
431	/* payload */	437	/* payload */
432	ptr[i++] = payload;	438	gk20a_mem_wr32(g, cmd->mem, off++, payload);
433	if (acquire) {	439	if (acquire) {
434	/* semaphore_d */	440	/* semaphore_d */
435	ptr[i++] = 0x20010007;	441	gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
436	/* operation: acq_geq, switch_en */	442	/* operation: acq_geq, switch_en */
437	ptr[i++] = 0x4 \| (0x1 << 12);	443	gk20a_mem_wr32(g, cmd->mem, off++, 0x4 \| (0x1 << 12));
438	} else {	444	} else {
439	/* semaphore_d */	445	/* semaphore_d */
440	ptr[i++] = 0x20010007;	446	gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
441	/* operation: release, wfi */	447	/* operation: release, wfi */
442	ptr[i++] = 0x2 \| ((wfi ? 0x0 : 0x1) << 20);	448	gk20a_mem_wr32(g, cmd->mem, off++,
		449	0x2 \| ((wfi ? 0x0 : 0x1) << 20));
443	/* non_stall_int */	450	/* non_stall_int */
444	ptr[i++] = 0x20010008;	451	gk20a_mem_wr32(g, cmd->mem, off++, 0x20010008);
445	/* ignored */	452	/* ignored */
446	ptr[i++] = 0;	453	gk20a_mem_wr32(g, cmd->mem, off++, 0);
447	}	454	}
448	return i;	455	return off - cmd->off;
449	}	456	}
450		457
451	static int gk20a_channel_semaphore_wait_syncpt(	458	static int gk20a_channel_semaphore_wait_syncpt(
@@ -506,7 +513,7 @@ static int gk20a_channel_semaphore_wait_fd(
506		513
507	va = gk20a_semaphore_gpu_va(w->sema, c->vm);	514	va = gk20a_semaphore_gpu_va(w->sema, c->vm);
508	/* GPU unblocked when when the semaphore value becomes 1. */	515	/* GPU unblocked when when the semaphore value becomes 1. */
509	written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false);	516	written = add_sema_cmd(c->g, wait_cmd, va, 1, true, false);
510		517
511	WARN_ON(written != wait_cmd->size);	518	WARN_ON(written != wait_cmd->size);
512	ret = sync_fence_wait_async(sync_fence, &w->waiter);	519	ret = sync_fence_wait_async(sync_fence, &w->waiter);
@@ -575,7 +582,7 @@ static int __gk20a_channel_semaphore_incr(
575		582
576	/* Release the completion semaphore. */	583	/* Release the completion semaphore. */
577	va = gk20a_semaphore_gpu_va(semaphore, c->vm);	584	va = gk20a_semaphore_gpu_va(semaphore, c->vm);
578	written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd);	585	written = add_sema_cmd(c->g, incr_cmd, va, 1, false, wfi_cmd);
579	WARN_ON(written != incr_cmd_size);	586	WARN_ON(written != incr_cmd_size);
580		587
581	*fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,	588	*fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,


diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index e9ac8f18..d943b231 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -164,7 +164,8 @@ struct priv_cmd_queue {
164	};	164	};
165		165
166	struct priv_cmd_entry {	166	struct priv_cmd_entry {
167	u32 *ptr;	167	struct mem_desc *mem;
		168	u32 off; /* offset in mem, in u32 entries */
168	u64 gva;	169	u64 gva;
169	u32 get; /* start of entry in queue */	170	u32 get; /* start of entry in queue */
170	u32 size; /* in words */	171	u32 size; /* in words */