summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/common/semaphore.c24
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c21
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h62
5 files changed, 62 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index dfed3588..b2238bb7 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -376,10 +376,10 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
376 376
377 ch->hw_sema = hw_sema; 377 ch->hw_sema = hw_sema;
378 hw_sema->ch = ch; 378 hw_sema->ch = ch;
379 hw_sema->p = p; 379 hw_sema->location.pool = p;
380 hw_sema->idx = hw_sema_idx; 380 hw_sema->location.offset = SEMAPHORE_SIZE * hw_sema_idx;
381 hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; 381 current_value = nvgpu_mem_rd(ch->g, &p->rw_mem,
382 current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, hw_sema->offset); 382 hw_sema->location.offset);
383 nvgpu_atomic_set(&hw_sema->next_value, current_value); 383 nvgpu_atomic_set(&hw_sema->next_value, current_value);
384 384
385 nvgpu_mutex_release(&p->pool_lock); 385 nvgpu_mutex_release(&p->pool_lock);
@@ -399,15 +399,16 @@ fail:
399void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch) 399void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
400{ 400{
401 struct nvgpu_semaphore_pool *p = ch->vm->sema_pool; 401 struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
402 struct nvgpu_semaphore_int *hw_sema = ch->hw_sema;
403 int idx = hw_sema->location.offset / SEMAPHORE_SIZE;
402 404
403 BUG_ON(!p); 405 BUG_ON(!p);
404 406
405 nvgpu_mutex_acquire(&p->pool_lock); 407 nvgpu_mutex_acquire(&p->pool_lock);
406 408
407 clear_bit(ch->hw_sema->idx, p->semas_alloced); 409 clear_bit(idx, p->semas_alloced);
408 410
409 /* Make sure that when the ch is re-opened it will get a new HW sema. */ 411 nvgpu_kfree(ch->g, hw_sema);
410 nvgpu_kfree(ch->g, ch->hw_sema);
411 ch->hw_sema = NULL; 412 ch->hw_sema = NULL;
412 413
413 nvgpu_mutex_release(&p->pool_lock); 414 nvgpu_mutex_release(&p->pool_lock);
@@ -435,14 +436,15 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
435 return NULL; 436 return NULL;
436 437
437 nvgpu_ref_init(&s->ref); 438 nvgpu_ref_init(&s->ref);
438 s->hw_sema = ch->hw_sema; 439 s->g = ch->g;
440 s->location = ch->hw_sema->location;
439 nvgpu_atomic_set(&s->value, 0); 441 nvgpu_atomic_set(&s->value, 0);
440 442
441 /* 443 /*
442 * Take a ref on the pool so that we can keep this pool alive for 444 * Take a ref on the pool so that we can keep this pool alive for
443 * as long as this semaphore is alive. 445 * as long as this semaphore is alive.
444 */ 446 */
445 nvgpu_semaphore_pool_get(s->hw_sema->p); 447 nvgpu_semaphore_pool_get(s->location.pool);
446 448
447 gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid); 449 gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid);
448 450
@@ -454,9 +456,9 @@ static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
454 struct nvgpu_semaphore *s = 456 struct nvgpu_semaphore *s =
455 container_of(ref, struct nvgpu_semaphore, ref); 457 container_of(ref, struct nvgpu_semaphore, ref);
456 458
457 nvgpu_semaphore_pool_put(s->hw_sema->p); 459 nvgpu_semaphore_pool_put(s->location.pool);
458 460
459 nvgpu_kfree(s->hw_sema->ch->g, s); 461 nvgpu_kfree(s->g, s);
460} 462}
461 463
462void nvgpu_semaphore_put(struct nvgpu_semaphore *s) 464void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5cd7223f..fb7406da 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -215,7 +215,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
215 job = &ch->joblist.pre_alloc.jobs[tmp_get]; 215 job = &ch->joblist.pre_alloc.jobs[tmp_get];
216 if (job->post_fence->semaphore) { 216 if (job->post_fence->semaphore) {
217 __nvgpu_semaphore_release( 217 __nvgpu_semaphore_release(
218 job->post_fence->semaphore, true); 218 job->post_fence->semaphore, true,
219 ch->hw_sema);
219 released_job_semaphore = true; 220 released_job_semaphore = true;
220 } 221 }
221 tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; 222 tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
@@ -226,7 +227,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
226 channel_gk20a_job, list) { 227 channel_gk20a_job, list) {
227 if (job->post_fence->semaphore) { 228 if (job->post_fence->semaphore) {
228 __nvgpu_semaphore_release( 229 __nvgpu_semaphore_release(
229 job->post_fence->semaphore, true); 230 job->post_fence->semaphore, true,
231 ch->hw_sema);
230 released_job_semaphore = true; 232 released_job_semaphore = true;
231 } 233 }
232 } 234 }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 45d9ae9c..4b1be8b9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -494,7 +494,7 @@ static void gk20a_channel_semaphore_launcher(
494 "wait completed (%d) for fence %p '%s', triggering gpu work", 494 "wait completed (%d) for fence %p '%s', triggering gpu work",
495 err, fence, fence->name); 495 err, fence, fence->name);
496 sync_fence_put(fence); 496 sync_fence_put(fence);
497 nvgpu_semaphore_release(w->sema); 497 nvgpu_semaphore_release(w->sema, w->ch->hw_sema);
498 nvgpu_semaphore_put(w->sema); 498 nvgpu_semaphore_put(w->sema);
499 nvgpu_kfree(g, w); 499 nvgpu_kfree(g, w);
500} 500}
@@ -522,7 +522,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
522 * incr the underlying sema next_value. 522 * incr the underlying sema next_value.
523 */ 523 */
524 if (!acquire) 524 if (!acquire)
525 nvgpu_semaphore_incr(s); 525 nvgpu_semaphore_incr(s, c->hw_sema);
526 526
527 /* semaphore_a */ 527 /* semaphore_a */
528 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); 528 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
@@ -561,17 +561,18 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
561 } 561 }
562 562
563 if (acquire) 563 if (acquire)
564 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d" 564 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d"
565 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", 565 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
566 ch, nvgpu_semaphore_get_value(s), 566 ch, nvgpu_semaphore_get_value(s),
567 s->hw_sema->ch->chid, va, cmd->gva, 567 s->location.pool->page_idx, va, cmd->gva,
568 cmd->mem->gpu_va, ob); 568 cmd->mem->gpu_va, ob);
569 else 569 else
570 gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx " 570 gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3d"
571 "cmd_mem=0x%llx b=0x%llx off=%u", 571 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
572 ch, nvgpu_semaphore_get_value(s), 572 ch, nvgpu_semaphore_get_value(s),
573 nvgpu_semaphore_read(s), va, cmd->gva, 573 nvgpu_semaphore_read(s),
574 cmd->mem->gpu_va, ob); 574 s->location.pool->page_idx,
575 va, cmd->gva, cmd->mem->gpu_va, ob);
575} 576}
576 577
577static int gk20a_channel_semaphore_wait_syncpt( 578static int gk20a_channel_semaphore_wait_syncpt(
@@ -714,7 +715,7 @@ static int gk20a_channel_semaphore_wait_fd(
714 715
715 /* worker takes one reference */ 716 /* worker takes one reference */
716 nvgpu_semaphore_get(w->sema); 717 nvgpu_semaphore_get(w->sema);
717 nvgpu_semaphore_incr(w->sema); 718 nvgpu_semaphore_incr(w->sema, c->hw_sema);
718 719
719 /* GPU unblocked when the semaphore value increments. */ 720 /* GPU unblocked when the semaphore value increments. */
720 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); 721 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
@@ -740,7 +741,7 @@ static int gk20a_channel_semaphore_wait_fd(
740 */ 741 */
741 if (ret == 1) { 742 if (ret == 1) {
742 sync_fence_put(sync_fence); 743 sync_fence_put(sync_fence);
743 nvgpu_semaphore_release(w->sema); 744 nvgpu_semaphore_release(w->sema, c->hw_sema);
744 nvgpu_semaphore_put(w->sema); 745 nvgpu_semaphore_put(w->sema);
745 } 746 }
746 747
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index cd0c094f..f6d16b90 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -338,8 +338,8 @@ static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
338{ 338{
339 struct nvgpu_semaphore *s = pt->sema; 339 struct nvgpu_semaphore *s = pt->sema;
340 340
341 snprintf(str, size, "S: c=%d [v=%u,r_v=%u]", 341 snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
342 s->hw_sema->ch->chid, 342 s->location.pool->page_idx,
343 nvgpu_semaphore_get_value(s), 343 nvgpu_semaphore_get_value(s),
344 nvgpu_semaphore_read(s)); 344 nvgpu_semaphore_read(s));
345} 345}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9c74d300..e66b2188 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -50,15 +50,18 @@
50 50
51struct nvgpu_semaphore_sea; 51struct nvgpu_semaphore_sea;
52 52
53struct nvgpu_semaphore_loc {
54 struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */
55 u32 offset; /* Byte offset into the pool. */
56};
57
53/* 58/*
54 * Underlying semaphore data structure. This semaphore can be shared amongst 59 * Underlying semaphore data structure. This semaphore can be shared amongst
55 * other semaphore instances. 60 * other semaphore instances.
56 */ 61 */
57struct nvgpu_semaphore_int { 62struct nvgpu_semaphore_int {
58 int idx; /* Semaphore index. */ 63 struct nvgpu_semaphore_loc location;
59 u32 offset; /* Offset into the pool. */
60 nvgpu_atomic_t next_value; /* Next available value. */ 64 nvgpu_atomic_t next_value; /* Next available value. */
61 struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */
62 struct channel_gk20a *ch; /* Channel that owns this sema. */ 65 struct channel_gk20a *ch; /* Channel that owns this sema. */
63}; 66};
64 67
@@ -68,7 +71,8 @@ struct nvgpu_semaphore_int {
68 * semaphore to be shared among an essentially infinite number of submits. 71 * semaphore to be shared among an essentially infinite number of submits.
69 */ 72 */
70struct nvgpu_semaphore { 73struct nvgpu_semaphore {
71 struct nvgpu_semaphore_int *hw_sema; 74 struct gk20a *g;
75 struct nvgpu_semaphore_loc location;
72 76
73 nvgpu_atomic_t value; 77 nvgpu_atomic_t value;
74 int incremented; 78 int incremented;
@@ -195,8 +199,8 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
195 */ 199 */
196static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) 200static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
197{ 201{
198 return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, false) + 202 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
199 s->hw_sema->offset; 203 s->location.offset;
200} 204}
201 205
202/* 206/*
@@ -205,20 +209,20 @@ static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
205 */ 209 */
206static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) 210static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
207{ 211{
208 return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, true) + 212 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
209 s->hw_sema->offset; 213 s->location.offset;
210} 214}
211 215
212static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) 216static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
213{ 217{
214 return __nvgpu_semaphore_pool_gpu_va(hw_sema->p, true) + 218 return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
215 hw_sema->offset; 219 hw_sema->location.offset;
216} 220}
217 221
218static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) 222static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
219{ 223{
220 return nvgpu_mem_rd(hw_sema->ch->g, 224 return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
221 &hw_sema->p->rw_mem, hw_sema->offset); 225 hw_sema->location.offset);
222} 226}
223 227
224/* 228/*
@@ -226,7 +230,8 @@ static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
226 */ 230 */
227static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) 231static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
228{ 232{
229 return __nvgpu_semaphore_read(s->hw_sema); 233 return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
234 s->location.offset);
230} 235}
231 236
232/* 237/*
@@ -270,19 +275,14 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
270 return !nvgpu_semaphore_is_released(s); 275 return !nvgpu_semaphore_is_released(s);
271} 276}
272 277
273static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
274{
275 return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
276}
277
278/* 278/*
279 * If @force is set then this will not wait for the underlying semaphore to 279 * If @force is set then this will not wait for the underlying semaphore to
280 * catch up to the passed semaphore threshold. 280 * catch up to the passed semaphore threshold.
281 */ 281 */
282static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, 282static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
283 bool force) 283 bool force,
284 struct nvgpu_semaphore_int *hw_sema)
284{ 285{
285 struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
286 u32 current_val; 286 u32 current_val;
287 u32 threshold = nvgpu_semaphore_get_value(s); 287 u32 threshold = nvgpu_semaphore_get_value(s);
288 int attempts = 0; 288 int attempts = 0;
@@ -312,16 +312,17 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
312 if (__nvgpu_semaphore_value_released(threshold, current_val)) 312 if (__nvgpu_semaphore_value_released(threshold, current_val))
313 return; 313 return;
314 314
315 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, 315 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
316 threshold); 316 hw_sema->location.offset, threshold);
317 317
318 gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, 318 gpu_sema_verbose_dbg(hw_sema->location.pool->sema_sea->gk20a,
319 "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); 319 "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
320} 320}
321 321
322static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) 322static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s,
323 struct nvgpu_semaphore_int *hw_sema)
323{ 324{
324 __nvgpu_semaphore_release(s, false); 325 __nvgpu_semaphore_release(s, false, hw_sema);
325} 326}
326 327
327/* 328/*
@@ -333,16 +334,17 @@ static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
333 * 334 *
334 * Also used to prep a semaphore for an INCR by the GPU. 335 * Also used to prep a semaphore for an INCR by the GPU.
335 */ 336 */
336static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s) 337static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s,
338 struct nvgpu_semaphore_int *hw_sema)
337{ 339{
338 BUG_ON(s->incremented); 340 BUG_ON(s->incremented);
339 341
340 nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value)); 342 nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &hw_sema->next_value));
341 s->incremented = 1; 343 s->incremented = 1;
342 344
343 gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a, 345 gpu_sema_verbose_dbg(s->g,
344 "INCR sema for c=%d (%u)", 346 "INCR sema for c=%d (%u)",
345 s->hw_sema->ch->chid, 347 hw_sema->ch->chid,
346 nvgpu_semaphore_next_value(s)); 348 nvgpu_atomic_read(&hw_sema->next_value));
347} 349}
348#endif 350#endif