diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/semaphore.c | 24 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 21 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 62 |
5 files changed, 62 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index dfed3588..b2238bb7 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c | |||
@@ -376,10 +376,10 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch) | |||
376 | 376 | ||
377 | ch->hw_sema = hw_sema; | 377 | ch->hw_sema = hw_sema; |
378 | hw_sema->ch = ch; | 378 | hw_sema->ch = ch; |
379 | hw_sema->p = p; | 379 | hw_sema->location.pool = p; |
380 | hw_sema->idx = hw_sema_idx; | 380 | hw_sema->location.offset = SEMAPHORE_SIZE * hw_sema_idx; |
381 | hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; | 381 | current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, |
382 | current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, hw_sema->offset); | 382 | hw_sema->location.offset); |
383 | nvgpu_atomic_set(&hw_sema->next_value, current_value); | 383 | nvgpu_atomic_set(&hw_sema->next_value, current_value); |
384 | 384 | ||
385 | nvgpu_mutex_release(&p->pool_lock); | 385 | nvgpu_mutex_release(&p->pool_lock); |
@@ -399,15 +399,16 @@ fail: | |||
399 | void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch) | 399 | void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch) |
400 | { | 400 | { |
401 | struct nvgpu_semaphore_pool *p = ch->vm->sema_pool; | 401 | struct nvgpu_semaphore_pool *p = ch->vm->sema_pool; |
402 | struct nvgpu_semaphore_int *hw_sema = ch->hw_sema; | ||
403 | int idx = hw_sema->location.offset / SEMAPHORE_SIZE; | ||
402 | 404 | ||
403 | BUG_ON(!p); | 405 | BUG_ON(!p); |
404 | 406 | ||
405 | nvgpu_mutex_acquire(&p->pool_lock); | 407 | nvgpu_mutex_acquire(&p->pool_lock); |
406 | 408 | ||
407 | clear_bit(ch->hw_sema->idx, p->semas_alloced); | 409 | clear_bit(idx, p->semas_alloced); |
408 | 410 | ||
409 | /* Make sure that when the ch is re-opened it will get a new HW sema. */ | 411 | nvgpu_kfree(ch->g, hw_sema); |
410 | nvgpu_kfree(ch->g, ch->hw_sema); | ||
411 | ch->hw_sema = NULL; | 412 | ch->hw_sema = NULL; |
412 | 413 | ||
413 | nvgpu_mutex_release(&p->pool_lock); | 414 | nvgpu_mutex_release(&p->pool_lock); |
@@ -435,14 +436,15 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch) | |||
435 | return NULL; | 436 | return NULL; |
436 | 437 | ||
437 | nvgpu_ref_init(&s->ref); | 438 | nvgpu_ref_init(&s->ref); |
438 | s->hw_sema = ch->hw_sema; | 439 | s->g = ch->g; |
440 | s->location = ch->hw_sema->location; | ||
439 | nvgpu_atomic_set(&s->value, 0); | 441 | nvgpu_atomic_set(&s->value, 0); |
440 | 442 | ||
441 | /* | 443 | /* |
442 | * Take a ref on the pool so that we can keep this pool alive for | 444 | * Take a ref on the pool so that we can keep this pool alive for |
443 | * as long as this semaphore is alive. | 445 | * as long as this semaphore is alive. |
444 | */ | 446 | */ |
445 | nvgpu_semaphore_pool_get(s->hw_sema->p); | 447 | nvgpu_semaphore_pool_get(s->location.pool); |
446 | 448 | ||
447 | gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid); | 449 | gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid); |
448 | 450 | ||
@@ -454,9 +456,9 @@ static void nvgpu_semaphore_free(struct nvgpu_ref *ref) | |||
454 | struct nvgpu_semaphore *s = | 456 | struct nvgpu_semaphore *s = |
455 | container_of(ref, struct nvgpu_semaphore, ref); | 457 | container_of(ref, struct nvgpu_semaphore, ref); |
456 | 458 | ||
457 | nvgpu_semaphore_pool_put(s->hw_sema->p); | 459 | nvgpu_semaphore_pool_put(s->location.pool); |
458 | 460 | ||
459 | nvgpu_kfree(s->hw_sema->ch->g, s); | 461 | nvgpu_kfree(s->g, s); |
460 | } | 462 | } |
461 | 463 | ||
462 | void nvgpu_semaphore_put(struct nvgpu_semaphore *s) | 464 | void nvgpu_semaphore_put(struct nvgpu_semaphore *s) |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 5cd7223f..fb7406da 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -215,7 +215,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
215 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; | 215 | job = &ch->joblist.pre_alloc.jobs[tmp_get]; |
216 | if (job->post_fence->semaphore) { | 216 | if (job->post_fence->semaphore) { |
217 | __nvgpu_semaphore_release( | 217 | __nvgpu_semaphore_release( |
218 | job->post_fence->semaphore, true); | 218 | job->post_fence->semaphore, true, |
219 | ch->hw_sema); | ||
219 | released_job_semaphore = true; | 220 | released_job_semaphore = true; |
220 | } | 221 | } |
221 | tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; | 222 | tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; |
@@ -226,7 +227,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) | |||
226 | channel_gk20a_job, list) { | 227 | channel_gk20a_job, list) { |
227 | if (job->post_fence->semaphore) { | 228 | if (job->post_fence->semaphore) { |
228 | __nvgpu_semaphore_release( | 229 | __nvgpu_semaphore_release( |
229 | job->post_fence->semaphore, true); | 230 | job->post_fence->semaphore, true, |
231 | ch->hw_sema); | ||
230 | released_job_semaphore = true; | 232 | released_job_semaphore = true; |
231 | } | 233 | } |
232 | } | 234 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 45d9ae9c..4b1be8b9 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -494,7 +494,7 @@ static void gk20a_channel_semaphore_launcher( | |||
494 | "wait completed (%d) for fence %p '%s', triggering gpu work", | 494 | "wait completed (%d) for fence %p '%s', triggering gpu work", |
495 | err, fence, fence->name); | 495 | err, fence, fence->name); |
496 | sync_fence_put(fence); | 496 | sync_fence_put(fence); |
497 | nvgpu_semaphore_release(w->sema); | 497 | nvgpu_semaphore_release(w->sema, w->ch->hw_sema); |
498 | nvgpu_semaphore_put(w->sema); | 498 | nvgpu_semaphore_put(w->sema); |
499 | nvgpu_kfree(g, w); | 499 | nvgpu_kfree(g, w); |
500 | } | 500 | } |
@@ -522,7 +522,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | |||
522 | * incr the underlying sema next_value. | 522 | * incr the underlying sema next_value. |
523 | */ | 523 | */ |
524 | if (!acquire) | 524 | if (!acquire) |
525 | nvgpu_semaphore_incr(s); | 525 | nvgpu_semaphore_incr(s, c->hw_sema); |
526 | 526 | ||
527 | /* semaphore_a */ | 527 | /* semaphore_a */ |
528 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); | 528 | nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); |
@@ -561,17 +561,18 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | |||
561 | } | 561 | } |
562 | 562 | ||
563 | if (acquire) | 563 | if (acquire) |
564 | gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d" | 564 | gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d" |
565 | "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", | 565 | "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", |
566 | ch, nvgpu_semaphore_get_value(s), | 566 | ch, nvgpu_semaphore_get_value(s), |
567 | s->hw_sema->ch->chid, va, cmd->gva, | 567 | s->location.pool->page_idx, va, cmd->gva, |
568 | cmd->mem->gpu_va, ob); | 568 | cmd->mem->gpu_va, ob); |
569 | else | 569 | else |
570 | gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx " | 570 | gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3d" |
571 | "cmd_mem=0x%llx b=0x%llx off=%u", | 571 | "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", |
572 | ch, nvgpu_semaphore_get_value(s), | 572 | ch, nvgpu_semaphore_get_value(s), |
573 | nvgpu_semaphore_read(s), va, cmd->gva, | 573 | nvgpu_semaphore_read(s), |
574 | cmd->mem->gpu_va, ob); | 574 | s->location.pool->page_idx, |
575 | va, cmd->gva, cmd->mem->gpu_va, ob); | ||
575 | } | 576 | } |
576 | 577 | ||
577 | static int gk20a_channel_semaphore_wait_syncpt( | 578 | static int gk20a_channel_semaphore_wait_syncpt( |
@@ -714,7 +715,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
714 | 715 | ||
715 | /* worker takes one reference */ | 716 | /* worker takes one reference */ |
716 | nvgpu_semaphore_get(w->sema); | 717 | nvgpu_semaphore_get(w->sema); |
717 | nvgpu_semaphore_incr(w->sema); | 718 | nvgpu_semaphore_incr(w->sema, c->hw_sema); |
718 | 719 | ||
719 | /* GPU unblocked when the semaphore value increments. */ | 720 | /* GPU unblocked when the semaphore value increments. */ |
720 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); | 721 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); |
@@ -740,7 +741,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
740 | */ | 741 | */ |
741 | if (ret == 1) { | 742 | if (ret == 1) { |
742 | sync_fence_put(sync_fence); | 743 | sync_fence_put(sync_fence); |
743 | nvgpu_semaphore_release(w->sema); | 744 | nvgpu_semaphore_release(w->sema, c->hw_sema); |
744 | nvgpu_semaphore_put(w->sema); | 745 | nvgpu_semaphore_put(w->sema); |
745 | } | 746 | } |
746 | 747 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index cd0c094f..f6d16b90 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |||
@@ -338,8 +338,8 @@ static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, | |||
338 | { | 338 | { |
339 | struct nvgpu_semaphore *s = pt->sema; | 339 | struct nvgpu_semaphore *s = pt->sema; |
340 | 340 | ||
341 | snprintf(str, size, "S: c=%d [v=%u,r_v=%u]", | 341 | snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]", |
342 | s->hw_sema->ch->chid, | 342 | s->location.pool->page_idx, |
343 | nvgpu_semaphore_get_value(s), | 343 | nvgpu_semaphore_get_value(s), |
344 | nvgpu_semaphore_read(s)); | 344 | nvgpu_semaphore_read(s)); |
345 | } | 345 | } |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 9c74d300..e66b2188 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h | |||
@@ -50,15 +50,18 @@ | |||
50 | 50 | ||
51 | struct nvgpu_semaphore_sea; | 51 | struct nvgpu_semaphore_sea; |
52 | 52 | ||
53 | struct nvgpu_semaphore_loc { | ||
54 | struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */ | ||
55 | u32 offset; /* Byte offset into the pool. */ | ||
56 | }; | ||
57 | |||
53 | /* | 58 | /* |
54 | * Underlying semaphore data structure. This semaphore can be shared amongst | 59 | * Underlying semaphore data structure. This semaphore can be shared amongst |
55 | * other semaphore instances. | 60 | * other semaphore instances. |
56 | */ | 61 | */ |
57 | struct nvgpu_semaphore_int { | 62 | struct nvgpu_semaphore_int { |
58 | int idx; /* Semaphore index. */ | 63 | struct nvgpu_semaphore_loc location; |
59 | u32 offset; /* Offset into the pool. */ | ||
60 | nvgpu_atomic_t next_value; /* Next available value. */ | 64 | nvgpu_atomic_t next_value; /* Next available value. */ |
61 | struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */ | ||
62 | struct channel_gk20a *ch; /* Channel that owns this sema. */ | 65 | struct channel_gk20a *ch; /* Channel that owns this sema. */ |
63 | }; | 66 | }; |
64 | 67 | ||
@@ -68,7 +71,8 @@ struct nvgpu_semaphore_int { | |||
68 | * semaphore to be shared among an essentially infinite number of submits. | 71 | * semaphore to be shared among an essentially infinite number of submits. |
69 | */ | 72 | */ |
70 | struct nvgpu_semaphore { | 73 | struct nvgpu_semaphore { |
71 | struct nvgpu_semaphore_int *hw_sema; | 74 | struct gk20a *g; |
75 | struct nvgpu_semaphore_loc location; | ||
72 | 76 | ||
73 | nvgpu_atomic_t value; | 77 | nvgpu_atomic_t value; |
74 | int incremented; | 78 | int incremented; |
@@ -195,8 +199,8 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch); | |||
195 | */ | 199 | */ |
196 | static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) | 200 | static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) |
197 | { | 201 | { |
198 | return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, false) + | 202 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + |
199 | s->hw_sema->offset; | 203 | s->location.offset; |
200 | } | 204 | } |
201 | 205 | ||
202 | /* | 206 | /* |
@@ -205,20 +209,20 @@ static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) | |||
205 | */ | 209 | */ |
206 | static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) | 210 | static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) |
207 | { | 211 | { |
208 | return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, true) + | 212 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + |
209 | s->hw_sema->offset; | 213 | s->location.offset; |
210 | } | 214 | } |
211 | 215 | ||
212 | static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) | 216 | static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) |
213 | { | 217 | { |
214 | return __nvgpu_semaphore_pool_gpu_va(hw_sema->p, true) + | 218 | return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + |
215 | hw_sema->offset; | 219 | hw_sema->location.offset; |
216 | } | 220 | } |
217 | 221 | ||
218 | static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) | 222 | static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) |
219 | { | 223 | { |
220 | return nvgpu_mem_rd(hw_sema->ch->g, | 224 | return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, |
221 | &hw_sema->p->rw_mem, hw_sema->offset); | 225 | hw_sema->location.offset); |
222 | } | 226 | } |
223 | 227 | ||
224 | /* | 228 | /* |
@@ -226,7 +230,8 @@ static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) | |||
226 | */ | 230 | */ |
227 | static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | 231 | static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) |
228 | { | 232 | { |
229 | return __nvgpu_semaphore_read(s->hw_sema); | 233 | return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, |
234 | s->location.offset); | ||
230 | } | 235 | } |
231 | 236 | ||
232 | /* | 237 | /* |
@@ -270,19 +275,14 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | |||
270 | return !nvgpu_semaphore_is_released(s); | 275 | return !nvgpu_semaphore_is_released(s); |
271 | } | 276 | } |
272 | 277 | ||
273 | static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) | ||
274 | { | ||
275 | return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); | ||
276 | } | ||
277 | |||
278 | /* | 278 | /* |
279 | * If @force is set then this will not wait for the underlying semaphore to | 279 | * If @force is set then this will not wait for the underlying semaphore to |
280 | * catch up to the passed semaphore threshold. | 280 | * catch up to the passed semaphore threshold. |
281 | */ | 281 | */ |
282 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | 282 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, |
283 | bool force) | 283 | bool force, |
284 | struct nvgpu_semaphore_int *hw_sema) | ||
284 | { | 285 | { |
285 | struct nvgpu_semaphore_int *hw_sema = s->hw_sema; | ||
286 | u32 current_val; | 286 | u32 current_val; |
287 | u32 threshold = nvgpu_semaphore_get_value(s); | 287 | u32 threshold = nvgpu_semaphore_get_value(s); |
288 | int attempts = 0; | 288 | int attempts = 0; |
@@ -312,16 +312,17 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | |||
312 | if (__nvgpu_semaphore_value_released(threshold, current_val)) | 312 | if (__nvgpu_semaphore_value_released(threshold, current_val)) |
313 | return; | 313 | return; |
314 | 314 | ||
315 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, | 315 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, |
316 | threshold); | 316 | hw_sema->location.offset, threshold); |
317 | 317 | ||
318 | gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, | 318 | gpu_sema_verbose_dbg(hw_sema->location.pool->sema_sea->gk20a, |
319 | "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); | 319 | "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); |
320 | } | 320 | } |
321 | 321 | ||
322 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) | 322 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s, |
323 | struct nvgpu_semaphore_int *hw_sema) | ||
323 | { | 324 | { |
324 | __nvgpu_semaphore_release(s, false); | 325 | __nvgpu_semaphore_release(s, false, hw_sema); |
325 | } | 326 | } |
326 | 327 | ||
327 | /* | 328 | /* |
@@ -333,16 +334,17 @@ static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) | |||
333 | * | 334 | * |
334 | * Also used to prep a semaphore for an INCR by the GPU. | 335 | * Also used to prep a semaphore for an INCR by the GPU. |
335 | */ | 336 | */ |
336 | static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s) | 337 | static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s, |
338 | struct nvgpu_semaphore_int *hw_sema) | ||
337 | { | 339 | { |
338 | BUG_ON(s->incremented); | 340 | BUG_ON(s->incremented); |
339 | 341 | ||
340 | nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value)); | 342 | nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &hw_sema->next_value)); |
341 | s->incremented = 1; | 343 | s->incremented = 1; |
342 | 344 | ||
343 | gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a, | 345 | gpu_sema_verbose_dbg(s->g, |
344 | "INCR sema for c=%d (%u)", | 346 | "INCR sema for c=%d (%u)", |
345 | s->hw_sema->ch->chid, | 347 | hw_sema->ch->chid, |
346 | nvgpu_semaphore_next_value(s)); | 348 | nvgpu_atomic_read(&hw_sema->next_value)); |
347 | } | 349 | } |
348 | #endif | 350 | #endif |