diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-05-11 08:04:44 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-18 14:54:34 -0400 |
commit | dc45473eeb39d93100290a0f09bd787b3a5ce3f2 (patch) | |
tree | 90c56a0fc8ed2019bd4e1bfdb9fbddf18c20bd0c /drivers/gpu/nvgpu/gk20a | |
parent | 67a41e46a230cde7353e4cd46040f1e71d7cd289 (diff) |
gpu: nvgpu: use mem_desc in priv_cmd_entry
Replace the plain cpu pointer accesses with gk20a_mem_wr32(), and use a
reference to the underlying mem_desc (within priv_cmd_queue) paired with
an offset, for buffer aperture flexibility.
JIRA DNVGPU-21
JIRA DNVGPU-23
Change-Id: I317672c94bb682bb895f9ed3e8116729c8bb7f4b
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1145922
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 91 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 3 |
3 files changed, 62 insertions, 51 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 065e8ab1..31a3ceeb 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1355,15 +1355,16 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1355 | e->gp_get = c->gpfifo.get; | 1355 | e->gp_get = c->gpfifo.get; |
1356 | e->gp_put = c->gpfifo.put; | 1356 | e->gp_put = c->gpfifo.put; |
1357 | e->gp_wrap = c->gpfifo.wrap; | 1357 | e->gp_wrap = c->gpfifo.wrap; |
1358 | e->mem = &q->mem; | ||
1358 | 1359 | ||
1359 | /* if we have increased size to skip free space in the end, set put | 1360 | /* if we have increased size to skip free space in the end, set put |
1360 | to beginning of cmd buffer (0) + size */ | 1361 | to beginning of cmd buffer (0) + size */ |
1361 | if (size != orig_size) { | 1362 | if (size != orig_size) { |
1362 | e->ptr = (u32 *)q->mem.cpu_va; | 1363 | e->off = 0; |
1363 | e->gva = q->mem.gpu_va; | 1364 | e->gva = q->mem.gpu_va; |
1364 | q->put = orig_size; | 1365 | q->put = orig_size; |
1365 | } else { | 1366 | } else { |
1366 | e->ptr = (u32 *)q->mem.cpu_va + q->put; | 1367 | e->off = q->put; |
1367 | e->gva = q->mem.gpu_va + q->put * sizeof(u32); | 1368 | e->gva = q->mem.gpu_va + q->put * sizeof(u32); |
1368 | q->put = (q->put + orig_size) & (q->size - 1); | 1369 | q->put = (q->put + orig_size) & (q->size - 1); |
1369 | } | 1370 | } |
@@ -1755,17 +1756,15 @@ static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, | |||
1755 | struct priv_cmd_entry *e) | 1756 | struct priv_cmd_entry *e) |
1756 | { | 1757 | { |
1757 | struct priv_cmd_queue *q = &c->priv_cmd_q; | 1758 | struct priv_cmd_queue *q = &c->priv_cmd_q; |
1758 | u32 cmd_entry_start; | ||
1759 | struct device *d = dev_from_gk20a(c->g); | 1759 | struct device *d = dev_from_gk20a(c->g); |
1760 | 1760 | ||
1761 | if (!e) | 1761 | if (!e) |
1762 | return 0; | 1762 | return 0; |
1763 | 1763 | ||
1764 | cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va); | 1764 | if ((q->get != e->off) && e->off != 0) |
1765 | if ((q->get != cmd_entry_start) && cmd_entry_start != 0) | ||
1766 | gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); | 1765 | gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); |
1767 | 1766 | ||
1768 | q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size; | 1767 | q->get = e->off + e->size; |
1769 | free_priv_cmdbuf(c, e); | 1768 | free_priv_cmdbuf(c, e); |
1770 | 1769 | ||
1771 | return 0; | 1770 | return 0; |
@@ -2150,7 +2149,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2150 | gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) | | 2149 | gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) | |
2151 | pbdma_gp_entry1_length_f(wait_cmd->size); | 2150 | pbdma_gp_entry1_length_f(wait_cmd->size); |
2152 | trace_gk20a_push_cmdbuf(dev_name(c->g->dev), | 2151 | trace_gk20a_push_cmdbuf(dev_name(c->g->dev), |
2153 | 0, wait_cmd->size, 0, wait_cmd->ptr); | 2152 | 0, wait_cmd->size, 0, |
2153 | wait_cmd->mem->cpu_va + wait_cmd->off * | ||
2154 | sizeof(u32)); | ||
2154 | 2155 | ||
2155 | c->gpfifo.put = (c->gpfifo.put + 1) & | 2156 | c->gpfifo.put = (c->gpfifo.put + 1) & |
2156 | (c->gpfifo.entry_num - 1); | 2157 | (c->gpfifo.entry_num - 1); |
@@ -2235,7 +2236,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2235 | gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) | | 2236 | gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) | |
2236 | pbdma_gp_entry1_length_f(incr_cmd->size); | 2237 | pbdma_gp_entry1_length_f(incr_cmd->size); |
2237 | trace_gk20a_push_cmdbuf(dev_name(c->g->dev), | 2238 | trace_gk20a_push_cmdbuf(dev_name(c->g->dev), |
2238 | 0, incr_cmd->size, 0, incr_cmd->ptr); | 2239 | 0, incr_cmd->size, 0, |
2240 | incr_cmd->mem->cpu_va + incr_cmd->off * | ||
2241 | sizeof(u32)); | ||
2239 | 2242 | ||
2240 | c->gpfifo.put = (c->gpfifo.put + 1) & | 2243 | c->gpfifo.put = (c->gpfifo.put + 1) & |
2241 | (c->gpfifo.entry_num - 1); | 2244 | (c->gpfifo.entry_num - 1); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index b47c1010..0a769b56 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -42,16 +42,18 @@ struct gk20a_channel_syncpt { | |||
42 | u32 id; | 42 | u32 id; |
43 | }; | 43 | }; |
44 | 44 | ||
45 | static void add_wait_cmd(u32 *ptr, u32 id, u32 thresh) | 45 | static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, |
46 | u32 id, u32 thresh) | ||
46 | { | 47 | { |
48 | off = cmd->off + off; | ||
47 | /* syncpoint_a */ | 49 | /* syncpoint_a */ |
48 | ptr[0] = 0x2001001C; | 50 | gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001C); |
49 | /* payload */ | 51 | /* payload */ |
50 | ptr[1] = thresh; | 52 | gk20a_mem_wr32(g, cmd->mem, off++, thresh); |
51 | /* syncpoint_b */ | 53 | /* syncpoint_b */ |
52 | ptr[2] = 0x2001001D; | 54 | gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001D); |
53 | /* syncpt_id, switch_en, wait */ | 55 | /* syncpt_id, switch_en, wait */ |
54 | ptr[3] = (id << 8) | 0x10; | 56 | gk20a_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10); |
55 | } | 57 | } |
56 | 58 | ||
57 | static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | 59 | static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, |
@@ -61,10 +63,11 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | |||
61 | struct gk20a_channel_syncpt *sp = | 63 | struct gk20a_channel_syncpt *sp = |
62 | container_of(s, struct gk20a_channel_syncpt, ops); | 64 | container_of(s, struct gk20a_channel_syncpt, ops); |
63 | struct priv_cmd_entry *wait_cmd = NULL; | 65 | struct priv_cmd_entry *wait_cmd = NULL; |
66 | struct channel_gk20a *c = sp->c; | ||
64 | int err = 0; | 67 | int err = 0; |
65 | 68 | ||
66 | if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) { | 69 | if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) { |
67 | dev_warn(dev_from_gk20a(sp->c->g), | 70 | dev_warn(dev_from_gk20a(c->g), |
68 | "invalid wait id in gpfifo submit, elided"); | 71 | "invalid wait id in gpfifo submit, elided"); |
69 | return 0; | 72 | return 0; |
70 | } | 73 | } |
@@ -72,14 +75,14 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | |||
72 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) | 75 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) |
73 | return 0; | 76 | return 0; |
74 | 77 | ||
75 | err = gk20a_channel_alloc_priv_cmdbuf(sp->c, 4, &wait_cmd); | 78 | err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd); |
76 | if (err) { | 79 | if (err) { |
77 | gk20a_err(dev_from_gk20a(sp->c->g), | 80 | gk20a_err(dev_from_gk20a(c->g), |
78 | "not enough priv cmd buffer space"); | 81 | "not enough priv cmd buffer space"); |
79 | return err; | 82 | return err; |
80 | } | 83 | } |
81 | 84 | ||
82 | add_wait_cmd(&wait_cmd->ptr[0], id, thresh); | 85 | add_wait_cmd(c->g, wait_cmd, 0, id, thresh); |
83 | 86 | ||
84 | *entry = wait_cmd; | 87 | *entry = wait_cmd; |
85 | *fence = NULL; | 88 | *fence = NULL; |
@@ -148,12 +151,12 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | |||
148 | 151 | ||
149 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, | 152 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, |
150 | wait_id, wait_value)) { | 153 | wait_id, wait_value)) { |
151 | wait_cmd->ptr[i * 4 + 0] = 0; | 154 | /* each wait_cmd is 4 u32s */ |
152 | wait_cmd->ptr[i * 4 + 1] = 0; | 155 | gk20a_memset(c->g, wait_cmd->mem, |
153 | wait_cmd->ptr[i * 4 + 2] = 0; | 156 | (wait_cmd->off + i * 4) * sizeof(u32), |
154 | wait_cmd->ptr[i * 4 + 3] = 0; | 157 | 0, 4 * sizeof(u32)); |
155 | } else | 158 | } else |
156 | add_wait_cmd(&wait_cmd->ptr[i * 4], wait_id, | 159 | add_wait_cmd(c->g, wait_cmd, i * 4, wait_id, |
157 | wait_value); | 160 | wait_value); |
158 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | 161 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) |
159 | i++; | 162 | i++; |
@@ -189,7 +192,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
189 | { | 192 | { |
190 | u32 thresh; | 193 | u32 thresh; |
191 | int incr_cmd_size; | 194 | int incr_cmd_size; |
192 | int j = 0; | 195 | int off; |
193 | int err; | 196 | int err; |
194 | struct priv_cmd_entry *incr_cmd = NULL; | 197 | struct priv_cmd_entry *incr_cmd = NULL; |
195 | struct gk20a_channel_syncpt *sp = | 198 | struct gk20a_channel_syncpt *sp = |
@@ -207,27 +210,30 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
207 | return err; | 210 | return err; |
208 | } | 211 | } |
209 | 212 | ||
213 | off = incr_cmd->off; | ||
214 | |||
210 | /* WAR for hw bug 1491360: syncpt needs to be incremented twice */ | 215 | /* WAR for hw bug 1491360: syncpt needs to be incremented twice */ |
211 | 216 | ||
212 | if (wfi_cmd) { | 217 | if (wfi_cmd) { |
213 | /* wfi */ | 218 | /* wfi */ |
214 | incr_cmd->ptr[j++] = 0x2001001E; | 219 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E); |
215 | /* handle, ignored */ | 220 | /* handle, ignored */ |
216 | incr_cmd->ptr[j++] = 0x00000000; | 221 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000); |
217 | } | 222 | } |
218 | /* syncpoint_a */ | 223 | /* syncpoint_a */ |
219 | incr_cmd->ptr[j++] = 0x2001001C; | 224 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C); |
220 | /* payload, ignored */ | 225 | /* payload, ignored */ |
221 | incr_cmd->ptr[j++] = 0; | 226 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0); |
222 | /* syncpoint_b */ | 227 | /* syncpoint_b */ |
223 | incr_cmd->ptr[j++] = 0x2001001D; | 228 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D); |
224 | /* syncpt_id, incr */ | 229 | /* syncpt_id, incr */ |
225 | incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; | 230 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1); |
226 | /* syncpoint_b */ | 231 | /* syncpoint_b */ |
227 | incr_cmd->ptr[j++] = 0x2001001D; | 232 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D); |
228 | /* syncpt_id, incr */ | 233 | /* syncpt_id, incr */ |
229 | incr_cmd->ptr[j++] = (sp->id << 8) | 0x1; | 234 | gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1); |
230 | WARN_ON(j != incr_cmd_size); | 235 | |
236 | WARN_ON(off - incr_cmd->off != incr_cmd_size); | ||
231 | 237 | ||
232 | thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); | 238 | thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); |
233 | 239 | ||
@@ -414,38 +420,39 @@ static void gk20a_channel_semaphore_launcher( | |||
414 | } | 420 | } |
415 | #endif | 421 | #endif |
416 | 422 | ||
417 | static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload, | 423 | static int add_sema_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, |
418 | bool acquire, bool wfi) | 424 | u64 sema, u32 payload, bool acquire, bool wfi) |
419 | { | 425 | { |
420 | int i = 0; | 426 | u32 off = cmd->off; |
421 | /* semaphore_a */ | 427 | /* semaphore_a */ |
422 | ptr[i++] = 0x20010004; | 428 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004); |
423 | /* offset_upper */ | 429 | /* offset_upper */ |
424 | ptr[i++] = (sema >> 32) & 0xff; | 430 | gk20a_mem_wr32(g, cmd->mem, off++, (sema >> 32) & 0xff); |
425 | /* semaphore_b */ | 431 | /* semaphore_b */ |
426 | ptr[i++] = 0x20010005; | 432 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005); |
427 | /* offset */ | 433 | /* offset */ |
428 | ptr[i++] = sema & 0xffffffff; | 434 | gk20a_mem_wr32(g, cmd->mem, off++, sema & 0xffffffff); |
429 | /* semaphore_c */ | 435 | /* semaphore_c */ |
430 | ptr[i++] = 0x20010006; | 436 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006); |
431 | /* payload */ | 437 | /* payload */ |
432 | ptr[i++] = payload; | 438 | gk20a_mem_wr32(g, cmd->mem, off++, payload); |
433 | if (acquire) { | 439 | if (acquire) { |
434 | /* semaphore_d */ | 440 | /* semaphore_d */ |
435 | ptr[i++] = 0x20010007; | 441 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); |
436 | /* operation: acq_geq, switch_en */ | 442 | /* operation: acq_geq, switch_en */ |
437 | ptr[i++] = 0x4 | (0x1 << 12); | 443 | gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12)); |
438 | } else { | 444 | } else { |
439 | /* semaphore_d */ | 445 | /* semaphore_d */ |
440 | ptr[i++] = 0x20010007; | 446 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007); |
441 | /* operation: release, wfi */ | 447 | /* operation: release, wfi */ |
442 | ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20); | 448 | gk20a_mem_wr32(g, cmd->mem, off++, |
449 | 0x2 | ((wfi ? 0x0 : 0x1) << 20)); | ||
443 | /* non_stall_int */ | 450 | /* non_stall_int */ |
444 | ptr[i++] = 0x20010008; | 451 | gk20a_mem_wr32(g, cmd->mem, off++, 0x20010008); |
445 | /* ignored */ | 452 | /* ignored */ |
446 | ptr[i++] = 0; | 453 | gk20a_mem_wr32(g, cmd->mem, off++, 0); |
447 | } | 454 | } |
448 | return i; | 455 | return off - cmd->off; |
449 | } | 456 | } |
450 | 457 | ||
451 | static int gk20a_channel_semaphore_wait_syncpt( | 458 | static int gk20a_channel_semaphore_wait_syncpt( |
@@ -506,7 +513,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
506 | 513 | ||
507 | va = gk20a_semaphore_gpu_va(w->sema, c->vm); | 514 | va = gk20a_semaphore_gpu_va(w->sema, c->vm); |
508 | /* GPU unblocked when when the semaphore value becomes 1. */ | 515 | /* GPU unblocked when when the semaphore value becomes 1. */ |
509 | written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); | 516 | written = add_sema_cmd(c->g, wait_cmd, va, 1, true, false); |
510 | 517 | ||
511 | WARN_ON(written != wait_cmd->size); | 518 | WARN_ON(written != wait_cmd->size); |
512 | ret = sync_fence_wait_async(sync_fence, &w->waiter); | 519 | ret = sync_fence_wait_async(sync_fence, &w->waiter); |
@@ -575,7 +582,7 @@ static int __gk20a_channel_semaphore_incr( | |||
575 | 582 | ||
576 | /* Release the completion semaphore. */ | 583 | /* Release the completion semaphore. */ |
577 | va = gk20a_semaphore_gpu_va(semaphore, c->vm); | 584 | va = gk20a_semaphore_gpu_va(semaphore, c->vm); |
578 | written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd); | 585 | written = add_sema_cmd(c->g, incr_cmd, va, 1, false, wfi_cmd); |
579 | WARN_ON(written != incr_cmd_size); | 586 | WARN_ON(written != incr_cmd_size); |
580 | 587 | ||
581 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, | 588 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index e9ac8f18..d943b231 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -164,7 +164,8 @@ struct priv_cmd_queue { | |||
164 | }; | 164 | }; |
165 | 165 | ||
166 | struct priv_cmd_entry { | 166 | struct priv_cmd_entry { |
167 | u32 *ptr; | 167 | struct mem_desc *mem; |
168 | u32 off; /* offset in mem, in u32 entries */ | ||
168 | u64 gva; | 169 | u64 gva; |
169 | u32 get; /* start of entry in queue */ | 170 | u32 get; /* start of entry in queue */ |
170 | u32 size; /* in words */ | 171 | u32 size; /* in words */ |