diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-04-29 20:03:03 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-08-30 13:04:30 -0400 |
commit | 9eac0fd84921359ded2acdf920de5592322ad93c (patch) | |
tree | 5d250d475a2dc4629df39cee285a4429660c87f1 /drivers/gpu/nvgpu | |
parent | 0e69c6707b974726459759464bc7876afe894740 (diff) |
gpu: nvgpu: Add debugging to the semaphore code
Add GPU debugging to the semaphore code.
Bug 1732449
JIRA DNVGPU-12
Change-Id: I98466570cf8d234b49a7f85d88c834648ddaaaee
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1198594
(cherry picked from commit 420809cc31fcdddde32b8e59721676c67b45f592)
Reviewed-on: http://git-master/r/1153671
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | 28 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | 13 |
4 files changed, 59 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 9c8911e9..e4972610 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -428,9 +428,12 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | |||
428 | struct gk20a_semaphore *s, struct priv_cmd_entry *cmd, | 428 | struct gk20a_semaphore *s, struct priv_cmd_entry *cmd, |
429 | int cmd_size, bool acquire, bool wfi) | 429 | int cmd_size, bool acquire, bool wfi) |
430 | { | 430 | { |
431 | u32 off = cmd->off; | 431 | int ch = c->hw_chid; |
432 | u32 ob, off = cmd->off; | ||
432 | u64 va; | 433 | u64 va; |
433 | 434 | ||
435 | ob = off; | ||
436 | |||
434 | /* | 437 | /* |
435 | * RO for acquire (since we just need to read the mem) and RW for | 438 | * RO for acquire (since we just need to read the mem) and RW for |
436 | * release since we will need to write back to the semaphore memory. | 439 | * release since we will need to write back to the semaphore memory. |
@@ -480,6 +483,19 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | |||
480 | /* ignored */ | 483 | /* ignored */ |
481 | gk20a_mem_wr32(g, cmd->mem, off++, 0); | 484 | gk20a_mem_wr32(g, cmd->mem, off++, 0); |
482 | } | 485 | } |
486 | |||
487 | if (acquire) | ||
488 | gpu_sema_verbose_dbg("(A) c=%d ACQ_GE %-4u owner=%-3d" | ||
489 | "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", | ||
490 | ch, gk20a_semaphore_get_value(s), | ||
491 | s->hw_sema->ch->hw_chid, va, cmd->gva, | ||
492 | cmd->mem->gpu_va, ob); | ||
493 | else | ||
494 | gpu_sema_verbose_dbg("(R) c=%d INCR %u (%u) va=0x%llx " | ||
495 | "cmd_mem=0x%llx b=0x%llx off=%u", | ||
496 | ch, gk20a_semaphore_get_value(s), | ||
497 | readl(s->hw_sema->value), va, cmd->gva, | ||
498 | cmd->mem->gpu_va, ob); | ||
483 | } | 499 | } |
484 | 500 | ||
485 | static int gk20a_channel_semaphore_wait_syncpt( | 501 | static int gk20a_channel_semaphore_wait_syncpt( |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 77a84e59..c255f8f8 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -953,6 +953,8 @@ enum gk20a_dbg_categories { | |||
953 | gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */ | 953 | gpu_dbg_ctxsw = BIT(12), /* ctxsw tracing */ |
954 | gpu_dbg_sched = BIT(13), /* sched control tracing */ | 954 | gpu_dbg_sched = BIT(13), /* sched control tracing */ |
955 | gpu_dbg_map_v = BIT(14), /* verbose mem mappings */ | 955 | gpu_dbg_map_v = BIT(14), /* verbose mem mappings */ |
956 | gpu_dbg_sema = BIT(15), /* semaphore debugging */ | ||
957 | gpu_dbg_sema_v = BIT(16), /* verbose semaphore debugging */ | ||
956 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ | 958 | gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ |
957 | }; | 959 | }; |
958 | 960 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c index 396e7419..0fa31468 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c | |||
@@ -29,12 +29,15 @@ | |||
29 | 29 | ||
30 | #define __lock_sema_sea(s) \ | 30 | #define __lock_sema_sea(s) \ |
31 | do { \ | 31 | do { \ |
32 | gpu_sema_verbose_dbg("Acquiring sema lock..."); \ | ||
32 | mutex_lock(&s->sea_lock); \ | 33 | mutex_lock(&s->sea_lock); \ |
34 | gpu_sema_verbose_dbg("Sema lock aquried!"); \ | ||
33 | } while (0) | 35 | } while (0) |
34 | 36 | ||
35 | #define __unlock_sema_sea(s) \ | 37 | #define __unlock_sema_sea(s) \ |
36 | do { \ | 38 | do { \ |
37 | mutex_unlock(&s->sea_lock); \ | 39 | mutex_unlock(&s->sea_lock); \ |
40 | gpu_sema_verbose_dbg("Released sema lock"); \ | ||
38 | } while (0) | 41 | } while (0) |
39 | 42 | ||
40 | /* | 43 | /* |
@@ -89,11 +92,13 @@ struct gk20a_semaphore_sea *gk20a_semaphore_sea_create(struct gk20a *g) | |||
89 | if (__gk20a_semaphore_sea_grow(g->sema_sea)) | 92 | if (__gk20a_semaphore_sea_grow(g->sema_sea)) |
90 | goto cleanup; | 93 | goto cleanup; |
91 | 94 | ||
95 | gpu_sema_dbg("Created semaphore sea!"); | ||
92 | return g->sema_sea; | 96 | return g->sema_sea; |
93 | 97 | ||
94 | cleanup: | 98 | cleanup: |
95 | kfree(g->sema_sea); | 99 | kfree(g->sema_sea); |
96 | g->sema_sea = NULL; | 100 | g->sema_sea = NULL; |
101 | gpu_sema_dbg("Failed to creat semaphore sea!"); | ||
97 | return NULL; | 102 | return NULL; |
98 | } | 103 | } |
99 | 104 | ||
@@ -144,11 +149,14 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc( | |||
144 | list_add(&p->pool_list_entry, &sea->pool_list); | 149 | list_add(&p->pool_list_entry, &sea->pool_list); |
145 | __unlock_sema_sea(sea); | 150 | __unlock_sema_sea(sea); |
146 | 151 | ||
152 | gpu_sema_dbg("Allocated semaphore pool: page-idx=%d", p->page_idx); | ||
153 | |||
147 | return p; | 154 | return p; |
148 | 155 | ||
149 | fail: | 156 | fail: |
150 | __unlock_sema_sea(sea); | 157 | __unlock_sema_sea(sea); |
151 | kfree(p); | 158 | kfree(p); |
159 | gpu_sema_dbg("Failed to allocate semaphore pool!"); | ||
152 | return ERR_PTR(err); | 160 | return ERR_PTR(err); |
153 | } | 161 | } |
154 | 162 | ||
@@ -162,9 +170,13 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | |||
162 | int ents, err = 0; | 170 | int ents, err = 0; |
163 | u64 addr; | 171 | u64 addr; |
164 | 172 | ||
173 | gpu_sema_dbg("Mapping sempahore pool! (idx=%d)", p->page_idx); | ||
174 | |||
165 | p->cpu_va = vmap(&p->page, 1, 0, | 175 | p->cpu_va = vmap(&p->page, 1, 0, |
166 | pgprot_writecombine(PAGE_KERNEL)); | 176 | pgprot_writecombine(PAGE_KERNEL)); |
167 | 177 | ||
178 | gpu_sema_dbg(" %d: CPU VA = 0x%p!", p->page_idx, p->cpu_va); | ||
179 | |||
168 | /* First do the RW mapping. */ | 180 | /* First do the RW mapping. */ |
169 | p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL); | 181 | p->rw_sg_table = kzalloc(sizeof(*p->rw_sg_table), GFP_KERNEL); |
170 | if (!p->rw_sg_table) | 182 | if (!p->rw_sg_table) |
@@ -185,6 +197,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | |||
185 | goto fail_free_sgt; | 197 | goto fail_free_sgt; |
186 | } | 198 | } |
187 | 199 | ||
200 | gpu_sema_dbg(" %d: DMA addr = 0x%pad", p->page_idx, | ||
201 | &sg_dma_address(p->rw_sg_table->sgl)); | ||
202 | |||
188 | /* Map into the GPU... Doesn't need to be fixed. */ | 203 | /* Map into the GPU... Doesn't need to be fixed. */ |
189 | p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, | 204 | p->gpu_va = gk20a_gmmu_map(vm, &p->rw_sg_table, PAGE_SIZE, |
190 | 0, gk20a_mem_flag_none, false, | 205 | 0, gk20a_mem_flag_none, false, |
@@ -194,6 +209,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | |||
194 | goto fail_unmap_sgt; | 209 | goto fail_unmap_sgt; |
195 | } | 210 | } |
196 | 211 | ||
212 | gpu_sema_dbg(" %d: GPU read-write VA = 0x%llx", p->page_idx, | ||
213 | p->gpu_va); | ||
214 | |||
197 | /* | 215 | /* |
198 | * And now the global mapping. Take the sea lock so that we don't race | 216 | * And now the global mapping. Take the sea lock so that we don't race |
199 | * with a concurrent remap. | 217 | * with a concurrent remap. |
@@ -215,6 +233,9 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, | |||
215 | p->gpu_va_ro = addr; | 233 | p->gpu_va_ro = addr; |
216 | p->mapped = 1; | 234 | p->mapped = 1; |
217 | 235 | ||
236 | gpu_sema_dbg(" %d: GPU read-only VA = 0x%llx", p->page_idx, | ||
237 | p->gpu_va_ro); | ||
238 | |||
218 | __unlock_sema_sea(p->sema_sea); | 239 | __unlock_sema_sea(p->sema_sea); |
219 | 240 | ||
220 | return 0; | 241 | return 0; |
@@ -229,6 +250,7 @@ fail_free_sgt: | |||
229 | fail: | 250 | fail: |
230 | kfree(p->rw_sg_table); | 251 | kfree(p->rw_sg_table); |
231 | p->rw_sg_table = NULL; | 252 | p->rw_sg_table = NULL; |
253 | gpu_sema_dbg(" %d: Failed to map semaphore pool!", p->page_idx); | ||
232 | return err; | 254 | return err; |
233 | } | 255 | } |
234 | 256 | ||
@@ -260,13 +282,14 @@ void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, | |||
260 | kfree(p->rw_sg_table); | 282 | kfree(p->rw_sg_table); |
261 | p->rw_sg_table = NULL; | 283 | p->rw_sg_table = NULL; |
262 | 284 | ||
263 | gk20a_dbg_info("Unmapped sema-pool: idx = %d", p->page_idx); | ||
264 | list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list) | 285 | list_for_each_entry(hw_sema, &p->hw_semas, hw_sema_list) |
265 | /* | 286 | /* |
266 | * Make sure the mem addresses are all NULL so if this gets | 287 | * Make sure the mem addresses are all NULL so if this gets |
267 | * reused we will fault. | 288 | * reused we will fault. |
268 | */ | 289 | */ |
269 | hw_sema->value = NULL; | 290 | hw_sema->value = NULL; |
291 | |||
292 | gpu_sema_dbg("Unmapped semaphore pool! (idx=%d)", p->page_idx); | ||
270 | } | 293 | } |
271 | 294 | ||
272 | /* | 295 | /* |
@@ -291,6 +314,7 @@ static void gk20a_semaphore_pool_free(struct kref *ref) | |||
291 | list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list) | 314 | list_for_each_entry_safe(hw_sema, tmp, &p->hw_semas, hw_sema_list) |
292 | kfree(hw_sema); | 315 | kfree(hw_sema); |
293 | 316 | ||
317 | gpu_sema_dbg("Freed semaphore pool! (idx=%d)", p->page_idx); | ||
294 | kfree(p); | 318 | kfree(p); |
295 | } | 319 | } |
296 | 320 | ||
@@ -415,6 +439,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct channel_gk20a *ch) | |||
415 | */ | 439 | */ |
416 | gk20a_semaphore_pool_get(s->hw_sema->p); | 440 | gk20a_semaphore_pool_get(s->hw_sema->p); |
417 | 441 | ||
442 | gpu_sema_dbg("Allocated semaphore (c=%d)", ch->hw_chid); | ||
443 | |||
418 | return s; | 444 | return s; |
419 | } | 445 | } |
420 | 446 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h index 29a01ab8..d96037ce 100644 --- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h | |||
@@ -21,6 +21,12 @@ | |||
21 | #include "gk20a.h" | 21 | #include "gk20a.h" |
22 | #include "mm_gk20a.h" | 22 | #include "mm_gk20a.h" |
23 | #include "channel_gk20a.h" | 23 | #include "channel_gk20a.h" |
24 | #include "gk20a_allocator.h" | ||
25 | |||
26 | #define gpu_sema_dbg(fmt, args...) \ | ||
27 | gk20a_dbg(gpu_dbg_sema, fmt, ##args) | ||
28 | #define gpu_sema_verbose_dbg(fmt, args...) \ | ||
29 | gk20a_dbg(gpu_dbg_sema_v, fmt, ##args) | ||
24 | 30 | ||
25 | /* | 31 | /* |
26 | * Max number of channels that can be used is 512. This of course needs to be | 32 | * Max number of channels that can be used is 512. This of course needs to be |
@@ -274,6 +280,9 @@ static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) | |||
274 | return; | 280 | return; |
275 | 281 | ||
276 | writel(val, s->hw_sema->value); | 282 | writel(val, s->hw_sema->value); |
283 | |||
284 | gpu_sema_verbose_dbg("(c=%d) WRITE %u", | ||
285 | s->hw_sema->ch->hw_chid, val); | ||
277 | } | 286 | } |
278 | 287 | ||
279 | /* | 288 | /* |
@@ -291,5 +300,9 @@ static inline void gk20a_semaphore_incr(struct gk20a_semaphore *s) | |||
291 | 300 | ||
292 | atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); | 301 | atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value)); |
293 | s->incremented = 1; | 302 | s->incremented = 1; |
303 | |||
304 | gpu_sema_verbose_dbg("INCR sema for c=%d (%u)", | ||
305 | s->hw_sema->ch->hw_chid, | ||
306 | gk20a_semaphore_next_value(s)); | ||
294 | } | 307 | } |
295 | #endif | 308 | #endif |