summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-03-12 04:37:38 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-13 05:43:37 -0400
commit86943d3d03953cc88b3e8a85aa232493b729137a (patch)
treef70caf4e93c13c541a33ad0a8f63122715ec8cc6
parentc6e8257c445ad7cd3924673ac2e36f9dde796f0f (diff)
gpu: nvgpu: decouple sema and hw sema
struct nvgpu_semaphore represents (mainly) a threshold value that a sema at some index will get and struct nvgpu_semaphore_int (aka "hw_sema") represents the allocation (and write access) of a semaphore index and the next value that the sema at that index can have. The threshold object doesn't need a pointer to the sema allocation that is not even guaranteed to exist for the whole threshold lifetime, so replace the pointer by the position of the sema in the sema pool. This requires some modifications to pass a hw sema around explicitly because it now represents write access more explicitly. Delete also the index field of semaphore_int because it can be directly derived from the offset in the sema location and is thus unnecessary. Jira NVGPU-512 Change-Id: I40be523fd68327e2f9928f10de4f771fe24d49ee Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1658102 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/semaphore.c24
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c21
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h62
5 files changed, 62 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index dfed3588..b2238bb7 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -376,10 +376,10 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
376 376
377 ch->hw_sema = hw_sema; 377 ch->hw_sema = hw_sema;
378 hw_sema->ch = ch; 378 hw_sema->ch = ch;
379 hw_sema->p = p; 379 hw_sema->location.pool = p;
380 hw_sema->idx = hw_sema_idx; 380 hw_sema->location.offset = SEMAPHORE_SIZE * hw_sema_idx;
381 hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; 381 current_value = nvgpu_mem_rd(ch->g, &p->rw_mem,
382 current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, hw_sema->offset); 382 hw_sema->location.offset);
383 nvgpu_atomic_set(&hw_sema->next_value, current_value); 383 nvgpu_atomic_set(&hw_sema->next_value, current_value);
384 384
385 nvgpu_mutex_release(&p->pool_lock); 385 nvgpu_mutex_release(&p->pool_lock);
@@ -399,15 +399,16 @@ fail:
399void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch) 399void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
400{ 400{
401 struct nvgpu_semaphore_pool *p = ch->vm->sema_pool; 401 struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
402 struct nvgpu_semaphore_int *hw_sema = ch->hw_sema;
403 int idx = hw_sema->location.offset / SEMAPHORE_SIZE;
402 404
403 BUG_ON(!p); 405 BUG_ON(!p);
404 406
405 nvgpu_mutex_acquire(&p->pool_lock); 407 nvgpu_mutex_acquire(&p->pool_lock);
406 408
407 clear_bit(ch->hw_sema->idx, p->semas_alloced); 409 clear_bit(idx, p->semas_alloced);
408 410
409 /* Make sure that when the ch is re-opened it will get a new HW sema. */ 411 nvgpu_kfree(ch->g, hw_sema);
410 nvgpu_kfree(ch->g, ch->hw_sema);
411 ch->hw_sema = NULL; 412 ch->hw_sema = NULL;
412 413
413 nvgpu_mutex_release(&p->pool_lock); 414 nvgpu_mutex_release(&p->pool_lock);
@@ -435,14 +436,15 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
435 return NULL; 436 return NULL;
436 437
437 nvgpu_ref_init(&s->ref); 438 nvgpu_ref_init(&s->ref);
438 s->hw_sema = ch->hw_sema; 439 s->g = ch->g;
440 s->location = ch->hw_sema->location;
439 nvgpu_atomic_set(&s->value, 0); 441 nvgpu_atomic_set(&s->value, 0);
440 442
441 /* 443 /*
442 * Take a ref on the pool so that we can keep this pool alive for 444 * Take a ref on the pool so that we can keep this pool alive for
443 * as long as this semaphore is alive. 445 * as long as this semaphore is alive.
444 */ 446 */
445 nvgpu_semaphore_pool_get(s->hw_sema->p); 447 nvgpu_semaphore_pool_get(s->location.pool);
446 448
447 gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid); 449 gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid);
448 450
@@ -454,9 +456,9 @@ static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
454 struct nvgpu_semaphore *s = 456 struct nvgpu_semaphore *s =
455 container_of(ref, struct nvgpu_semaphore, ref); 457 container_of(ref, struct nvgpu_semaphore, ref);
456 458
457 nvgpu_semaphore_pool_put(s->hw_sema->p); 459 nvgpu_semaphore_pool_put(s->location.pool);
458 460
459 nvgpu_kfree(s->hw_sema->ch->g, s); 461 nvgpu_kfree(s->g, s);
460} 462}
461 463
462void nvgpu_semaphore_put(struct nvgpu_semaphore *s) 464void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5cd7223f..fb7406da 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -215,7 +215,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
215 job = &ch->joblist.pre_alloc.jobs[tmp_get]; 215 job = &ch->joblist.pre_alloc.jobs[tmp_get];
216 if (job->post_fence->semaphore) { 216 if (job->post_fence->semaphore) {
217 __nvgpu_semaphore_release( 217 __nvgpu_semaphore_release(
218 job->post_fence->semaphore, true); 218 job->post_fence->semaphore, true,
219 ch->hw_sema);
219 released_job_semaphore = true; 220 released_job_semaphore = true;
220 } 221 }
221 tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; 222 tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
@@ -226,7 +227,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
226 channel_gk20a_job, list) { 227 channel_gk20a_job, list) {
227 if (job->post_fence->semaphore) { 228 if (job->post_fence->semaphore) {
228 __nvgpu_semaphore_release( 229 __nvgpu_semaphore_release(
229 job->post_fence->semaphore, true); 230 job->post_fence->semaphore, true,
231 ch->hw_sema);
230 released_job_semaphore = true; 232 released_job_semaphore = true;
231 } 233 }
232 } 234 }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 45d9ae9c..4b1be8b9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -494,7 +494,7 @@ static void gk20a_channel_semaphore_launcher(
494 "wait completed (%d) for fence %p '%s', triggering gpu work", 494 "wait completed (%d) for fence %p '%s', triggering gpu work",
495 err, fence, fence->name); 495 err, fence, fence->name);
496 sync_fence_put(fence); 496 sync_fence_put(fence);
497 nvgpu_semaphore_release(w->sema); 497 nvgpu_semaphore_release(w->sema, w->ch->hw_sema);
498 nvgpu_semaphore_put(w->sema); 498 nvgpu_semaphore_put(w->sema);
499 nvgpu_kfree(g, w); 499 nvgpu_kfree(g, w);
500} 500}
@@ -522,7 +522,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
522 * incr the underlying sema next_value. 522 * incr the underlying sema next_value.
523 */ 523 */
524 if (!acquire) 524 if (!acquire)
525 nvgpu_semaphore_incr(s); 525 nvgpu_semaphore_incr(s, c->hw_sema);
526 526
527 /* semaphore_a */ 527 /* semaphore_a */
528 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); 528 nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
@@ -561,17 +561,18 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
561 } 561 }
562 562
563 if (acquire) 563 if (acquire)
564 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d" 564 gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d"
565 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", 565 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
566 ch, nvgpu_semaphore_get_value(s), 566 ch, nvgpu_semaphore_get_value(s),
567 s->hw_sema->ch->chid, va, cmd->gva, 567 s->location.pool->page_idx, va, cmd->gva,
568 cmd->mem->gpu_va, ob); 568 cmd->mem->gpu_va, ob);
569 else 569 else
570 gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx " 570 gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3d"
571 "cmd_mem=0x%llx b=0x%llx off=%u", 571 "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
572 ch, nvgpu_semaphore_get_value(s), 572 ch, nvgpu_semaphore_get_value(s),
573 nvgpu_semaphore_read(s), va, cmd->gva, 573 nvgpu_semaphore_read(s),
574 cmd->mem->gpu_va, ob); 574 s->location.pool->page_idx,
575 va, cmd->gva, cmd->mem->gpu_va, ob);
575} 576}
576 577
577static int gk20a_channel_semaphore_wait_syncpt( 578static int gk20a_channel_semaphore_wait_syncpt(
@@ -714,7 +715,7 @@ static int gk20a_channel_semaphore_wait_fd(
714 715
715 /* worker takes one reference */ 716 /* worker takes one reference */
716 nvgpu_semaphore_get(w->sema); 717 nvgpu_semaphore_get(w->sema);
717 nvgpu_semaphore_incr(w->sema); 718 nvgpu_semaphore_incr(w->sema, c->hw_sema);
718 719
719 /* GPU unblocked when the semaphore value increments. */ 720 /* GPU unblocked when the semaphore value increments. */
720 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); 721 add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
@@ -740,7 +741,7 @@ static int gk20a_channel_semaphore_wait_fd(
740 */ 741 */
741 if (ret == 1) { 742 if (ret == 1) {
742 sync_fence_put(sync_fence); 743 sync_fence_put(sync_fence);
743 nvgpu_semaphore_release(w->sema); 744 nvgpu_semaphore_release(w->sema, c->hw_sema);
744 nvgpu_semaphore_put(w->sema); 745 nvgpu_semaphore_put(w->sema);
745 } 746 }
746 747
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index cd0c094f..f6d16b90 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -338,8 +338,8 @@ static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
338{ 338{
339 struct nvgpu_semaphore *s = pt->sema; 339 struct nvgpu_semaphore *s = pt->sema;
340 340
341 snprintf(str, size, "S: c=%d [v=%u,r_v=%u]", 341 snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
342 s->hw_sema->ch->chid, 342 s->location.pool->page_idx,
343 nvgpu_semaphore_get_value(s), 343 nvgpu_semaphore_get_value(s),
344 nvgpu_semaphore_read(s)); 344 nvgpu_semaphore_read(s));
345} 345}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9c74d300..e66b2188 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -50,15 +50,18 @@
50 50
51struct nvgpu_semaphore_sea; 51struct nvgpu_semaphore_sea;
52 52
53struct nvgpu_semaphore_loc {
54 struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */
55 u32 offset; /* Byte offset into the pool. */
56};
57
53/* 58/*
54 * Underlying semaphore data structure. This semaphore can be shared amongst 59 * Underlying semaphore data structure. This semaphore can be shared amongst
55 * other semaphore instances. 60 * other semaphore instances.
56 */ 61 */
57struct nvgpu_semaphore_int { 62struct nvgpu_semaphore_int {
58 int idx; /* Semaphore index. */ 63 struct nvgpu_semaphore_loc location;
59 u32 offset; /* Offset into the pool. */
60 nvgpu_atomic_t next_value; /* Next available value. */ 64 nvgpu_atomic_t next_value; /* Next available value. */
61 struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */
62 struct channel_gk20a *ch; /* Channel that owns this sema. */ 65 struct channel_gk20a *ch; /* Channel that owns this sema. */
63}; 66};
64 67
@@ -68,7 +71,8 @@ struct nvgpu_semaphore_int {
68 * semaphore to be shared among an essentially infinite number of submits. 71 * semaphore to be shared among an essentially infinite number of submits.
69 */ 72 */
70struct nvgpu_semaphore { 73struct nvgpu_semaphore {
71 struct nvgpu_semaphore_int *hw_sema; 74 struct gk20a *g;
75 struct nvgpu_semaphore_loc location;
72 76
73 nvgpu_atomic_t value; 77 nvgpu_atomic_t value;
74 int incremented; 78 int incremented;
@@ -195,8 +199,8 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
195 */ 199 */
196static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) 200static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
197{ 201{
198 return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, false) + 202 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
199 s->hw_sema->offset; 203 s->location.offset;
200} 204}
201 205
202/* 206/*
@@ -205,20 +209,20 @@ static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
205 */ 209 */
206static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) 210static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
207{ 211{
208 return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, true) + 212 return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
209 s->hw_sema->offset; 213 s->location.offset;
210} 214}
211 215
212static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) 216static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
213{ 217{
214 return __nvgpu_semaphore_pool_gpu_va(hw_sema->p, true) + 218 return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
215 hw_sema->offset; 219 hw_sema->location.offset;
216} 220}
217 221
218static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) 222static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
219{ 223{
220 return nvgpu_mem_rd(hw_sema->ch->g, 224 return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
221 &hw_sema->p->rw_mem, hw_sema->offset); 225 hw_sema->location.offset);
222} 226}
223 227
224/* 228/*
@@ -226,7 +230,8 @@ static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
226 */ 230 */
227static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) 231static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
228{ 232{
229 return __nvgpu_semaphore_read(s->hw_sema); 233 return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
234 s->location.offset);
230} 235}
231 236
232/* 237/*
@@ -270,19 +275,14 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
270 return !nvgpu_semaphore_is_released(s); 275 return !nvgpu_semaphore_is_released(s);
271} 276}
272 277
273static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
274{
275 return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
276}
277
278/* 278/*
279 * If @force is set then this will not wait for the underlying semaphore to 279 * If @force is set then this will not wait for the underlying semaphore to
280 * catch up to the passed semaphore threshold. 280 * catch up to the passed semaphore threshold.
281 */ 281 */
282static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, 282static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
283 bool force) 283 bool force,
284 struct nvgpu_semaphore_int *hw_sema)
284{ 285{
285 struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
286 u32 current_val; 286 u32 current_val;
287 u32 threshold = nvgpu_semaphore_get_value(s); 287 u32 threshold = nvgpu_semaphore_get_value(s);
288 int attempts = 0; 288 int attempts = 0;
@@ -312,16 +312,17 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
312 if (__nvgpu_semaphore_value_released(threshold, current_val)) 312 if (__nvgpu_semaphore_value_released(threshold, current_val))
313 return; 313 return;
314 314
315 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, 315 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
316 threshold); 316 hw_sema->location.offset, threshold);
317 317
318 gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, 318 gpu_sema_verbose_dbg(hw_sema->location.pool->sema_sea->gk20a,
319 "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); 319 "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
320} 320}
321 321
322static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) 322static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s,
323 struct nvgpu_semaphore_int *hw_sema)
323{ 324{
324 __nvgpu_semaphore_release(s, false); 325 __nvgpu_semaphore_release(s, false, hw_sema);
325} 326}
326 327
327/* 328/*
@@ -333,16 +334,17 @@ static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
333 * 334 *
334 * Also used to prep a semaphore for an INCR by the GPU. 335 * Also used to prep a semaphore for an INCR by the GPU.
335 */ 336 */
336static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s) 337static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s,
338 struct nvgpu_semaphore_int *hw_sema)
337{ 339{
338 BUG_ON(s->incremented); 340 BUG_ON(s->incremented);
339 341
340 nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value)); 342 nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &hw_sema->next_value));
341 s->incremented = 1; 343 s->incremented = 1;
342 344
343 gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a, 345 gpu_sema_verbose_dbg(s->g,
344 "INCR sema for c=%d (%u)", 346 "INCR sema for c=%d (%u)",
345 s->hw_sema->ch->chid, 347 hw_sema->ch->chid,
346 nvgpu_semaphore_next_value(s)); 348 nvgpu_atomic_read(&hw_sema->next_value));
347} 349}
348#endif 350#endif