5 files changed, 62 insertions, 55 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index dfed3588..b2238bb7 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -376,10 +376,10 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
        ch->hw_sema = hw_sema;
        hw_sema->ch = ch;
-        hw_sema->p = p;
+        hw_sema->location.pool = p;
-        hw_sema->idx = hw_sema_idx;
+        hw_sema->location.offset = SEMAPHORE_SIZE * hw_sema_idx;
-        hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
+        current_value = nvgpu_mem_rd(ch->g, &p->rw_mem,
-        current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, hw_sema->offset);
+                        hw_sema->location.offset);
        nvgpu_atomic_set(&hw_sema->next_value, current_value);
        nvgpu_mutex_release(&p->pool_lock);
@@ -399,15 +399,16 @@ fail:
 void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
 {
        struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
+        struct nvgpu_semaphore_int *hw_sema = ch->hw_sema;
+        int idx = hw_sema->location.offset / SEMAPHORE_SIZE;
        BUG_ON(!p);
        nvgpu_mutex_acquire(&p->pool_lock);
-        clear_bit(ch->hw_sema->idx, p->semas_alloced);
+        clear_bit(idx, p->semas_alloced);
-        /* Make sure that when the ch is re-opened it will get a new HW sema. */
+        nvgpu_kfree(ch->g, hw_sema);
-        nvgpu_kfree(ch->g, ch->hw_sema);
        ch->hw_sema = NULL;
        nvgpu_mutex_release(&p->pool_lock);
@@ -435,14 +436,15 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
                return NULL;
        nvgpu_ref_init(&s->ref);
-        s->hw_sema = ch->hw_sema;
+        s->g = ch->g;
+        s->location = ch->hw_sema->location;
        nvgpu_atomic_set(&s->value, 0);
        /*
         * Take a ref on the pool so that we can keep this pool alive for
         * as long as this semaphore is alive.
         */
-        nvgpu_semaphore_pool_get(s->hw_sema->p);
+        nvgpu_semaphore_pool_get(s->location.pool);
        gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid);
@@ -454,9 +456,9 @@ static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
        struct nvgpu_semaphore *s =
                container_of(ref, struct nvgpu_semaphore, ref);
-        nvgpu_semaphore_pool_put(s->hw_sema->p);
+        nvgpu_semaphore_pool_put(s->location.pool);
-        nvgpu_kfree(s->hw_sema->ch->g, s);
+        nvgpu_kfree(s->g, s);
 }
 void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5cd7223f..fb7406da 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -215,7 +215,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
                        job = &ch->joblist.pre_alloc.jobs[tmp_get];
                        if (job->post_fence->semaphore) {
                                __nvgpu_semaphore_release(
-                                        job->post_fence->semaphore, true);
+                                        job->post_fence->semaphore, true,
+                                        ch->hw_sema);
                                released_job_semaphore = true;
                        }
                        tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
@@ -226,7 +227,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
                                channel_gk20a_job, list) {
                        if (job->post_fence->semaphore) {
                                __nvgpu_semaphore_release(
-                                        job->post_fence->semaphore, true);
+                                        job->post_fence->semaphore, true,
+                                        ch->hw_sema);
                                released_job_semaphore = true;
                        }
                }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 45d9ae9c..4b1be8b9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -494,7 +494,7 @@ static void gk20a_channel_semaphore_launcher(
                  "wait completed (%d) for fence %p '%s', triggering gpu work",
                  err, fence, fence->name);
        sync_fence_put(fence);
-        nvgpu_semaphore_release(w->sema);
+        nvgpu_semaphore_release(w->sema, w->ch->hw_sema);
        nvgpu_semaphore_put(w->sema);
        nvgpu_kfree(g, w);
 }
@@ -522,7 +522,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
         * incr the underlying sema next_value.
         */
        if (!acquire)
-                nvgpu_semaphore_incr(s);
+                nvgpu_semaphore_incr(s, c->hw_sema);
        /* semaphore_a */
        nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
@@ -561,17 +561,18 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
        }
        if (acquire)
-                gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d"
+                gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d"
                                     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
                                     ch, nvgpu_semaphore_get_value(s),
-                                     s->hw_sema->ch->chid, va, cmd->gva,
+                                     s->location.pool->page_idx, va, cmd->gva,
                                     cmd->mem->gpu_va, ob);
        else
-                gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx "
+                gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3d"
-                                     "cmd_mem=0x%llx b=0x%llx off=%u",
+                                     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
                                     ch, nvgpu_semaphore_get_value(s),
-                                     nvgpu_semaphore_read(s), va, cmd->gva,
+                                     nvgpu_semaphore_read(s),
-                                     cmd->mem->gpu_va, ob);
+                                     s->location.pool->page_idx,
+                                     va, cmd->gva, cmd->mem->gpu_va, ob);
 }
 static int gk20a_channel_semaphore_wait_syncpt(
@@ -714,7 +715,7 @@ static int gk20a_channel_semaphore_wait_fd(
        /* worker takes one reference */
        nvgpu_semaphore_get(w->sema);
-        nvgpu_semaphore_incr(w->sema);
+        nvgpu_semaphore_incr(w->sema, c->hw_sema);
        /* GPU unblocked when the semaphore value increments. */
        add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
@@ -740,7 +741,7 @@ static int gk20a_channel_semaphore_wait_fd(
         */
        if (ret == 1) {
                sync_fence_put(sync_fence);
-                nvgpu_semaphore_release(w->sema);
+                nvgpu_semaphore_release(w->sema, c->hw_sema);
                nvgpu_semaphore_put(w->sema);
        }
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index cd0c094f..f6d16b90 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -338,8 +338,8 @@ static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
 {
        struct nvgpu_semaphore *s = pt->sema;
-        snprintf(str, size, "S: c=%d [v=%u,r_v=%u]",
+        snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
-                 s->hw_sema->ch->chid,
+                 s->location.pool->page_idx,
                 nvgpu_semaphore_get_value(s),
                 nvgpu_semaphore_read(s));
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9c74d300..e66b2188 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -50,15 +50,18 @@
 struct nvgpu_semaphore_sea;
+struct nvgpu_semaphore_loc {
+        struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */
+        u32 offset;                        /* Byte offset into the pool. */
+};
 /*
 * Underlying semaphore data structure. This semaphore can be shared amongst
 * other semaphore instances.
 */
 struct nvgpu_semaphore_int {
-        int idx;                        /* Semaphore index. */
+        struct nvgpu_semaphore_loc location;
-        u32 offset;                     /* Offset into the pool. */
        nvgpu_atomic_t next_value;      /* Next available value. */
-        struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */
        struct channel_gk20a *ch;       /* Channel that owns this sema. */
 };
@@ -68,7 +71,8 @@ struct nvgpu_semaphore_int {
 * semaphore to be shared among an essentially infinite number of submits.
 */
 struct nvgpu_semaphore {
-        struct nvgpu_semaphore_int *hw_sema;
+        struct gk20a *g;
+        struct nvgpu_semaphore_loc location;
        nvgpu_atomic_t value;
        int incremented;
@@ -195,8 +199,8 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
 */
 static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
 {
-        return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, false) +
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
-                s->hw_sema->offset;
+                s->location.offset;
 }
 /*
@@ -205,20 +209,20 @@ static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
 */
 static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
 {
-        return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, true) +
+        return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
-                s->hw_sema->offset;
+                s->location.offset;
 }
 static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
 {
-        return __nvgpu_semaphore_pool_gpu_va(hw_sema->p, true) +
+        return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
-                hw_sema->offset;
+                hw_sema->location.offset;
 }
 static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
 {
-        return nvgpu_mem_rd(hw_sema->ch->g,
+        return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-                            &hw_sema->p->rw_mem, hw_sema->offset);
+                        hw_sema->location.offset);
 }
 /*
@@ -226,7 +230,8 @@ static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
 */
 static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
 {
-        return __nvgpu_semaphore_read(s->hw_sema);
+        return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+                        s->location.offset);
 }
 /*
@@ -270,19 +275,14 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
        return !nvgpu_semaphore_is_released(s);
 }
-static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
-{
-        return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
-}
 /*
 * If @force is set then this will not wait for the underlying semaphore to
 * catch up to the passed semaphore threshold.
 */
 static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
-                                             bool force)
+                                             bool force,
+                                             struct nvgpu_semaphore_int *hw_sema)
 {
-        struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
        u32 current_val;
        u32 threshold = nvgpu_semaphore_get_value(s);
        int attempts = 0;
@@ -312,16 +312,17 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
        if (__nvgpu_semaphore_value_released(threshold, current_val))
                return;
-        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset,
+        nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-                        threshold);
+                        hw_sema->location.offset, threshold);
-        gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a,
+        gpu_sema_verbose_dbg(hw_sema->location.pool->sema_sea->gk20a,
                             "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
 }
-static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
+static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s,
+                                           struct nvgpu_semaphore_int *hw_sema)
 {
-        __nvgpu_semaphore_release(s, false);
+        __nvgpu_semaphore_release(s, false, hw_sema);
 }
 /*
@@ -333,16 +334,17 @@ static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
 *
 * Also used to prep a semaphore for an INCR by the GPU.
 */
-static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s)
+static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s,
+                struct nvgpu_semaphore_int *hw_sema)
 {
        BUG_ON(s->incremented);
-        nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value));
+        nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &hw_sema->next_value));
        s->incremented = 1;
-        gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a,
+        gpu_sema_verbose_dbg(s->g,
                             "INCR sema for c=%d (%u)",
-                             s->hw_sema->ch->chid,
+                             hw_sema->ch->chid,
-                             nvgpu_semaphore_next_value(s));
+                             nvgpu_atomic_read(&hw_sema->next_value));
 }
 #endif