gpu: nvgpu: remove fence dependency tracking

In preparation for better abstraction in job synchronization, drop support for the dependency fences tracked via submit pre-fences in semaphore-based syncs. This has only worked for semaphores, not nvhost syncpoints, and hasn't really been used. The dependency was printed in the sync framework's sync pt value string. Remove also the userspace-visible gk20a_sync_pt_info which is not used and depends on this feature (providing a duration since the dependency fence's timestamp). Jira NVGPU-43 Change-Id: Ia2b26502a9dc8f5bef5470f94b1475001f621da1 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1456880 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2017-04-06 03:48:35 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-04-11 12:57:21 -0400
commit: 1a4647272f4fe50137c79583b698c1ef6f5def12 (patch)
tree: 0eee9698405e4467a7644c74c3d41d4bca933a8f /drivers/gpu
parent: 6de456f8407ba7230410175f1aff90541137d19b (diff)
5 files changed, 12 insertions, 114 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index fbeb1e4a..6d0f0854 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -678,7 +678,7 @@ static int gk20a_channel_semaphore_wait_fd(
                                        sema->timeline,
                                        fp_sema,
                                        &c->semaphore_wq,
-                                        NULL, false, false);
+                                        false, false);
                        if (err) {
                                nvgpu_semaphore_put(fp_sema);
                                goto clean_up_priv_cmd;
@@ -742,7 +742,7 @@ static int gk20a_channel_semaphore_wait_fd(
         *  already signaled
         */
        err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,
-                        &c->semaphore_wq, NULL, false, false);
+                        &c->semaphore_wq, false, false);
        if (err)
                goto clean_up_sema;
@@ -787,7 +787,6 @@ clean_up_sync_fence:
 static int __gk20a_channel_semaphore_incr(
                struct gk20a_channel_sync *s, bool wfi_cmd,
-                struct sync_fence *dependency,
                struct priv_cmd_entry *incr_cmd,
                struct gk20a_fence *fence,
                bool need_sync_fence)
@@ -820,7 +819,7 @@ static int __gk20a_channel_semaphore_incr(
        err = gk20a_fence_from_semaphore(c->g, fence,
                        sp->timeline, semaphore,
                        &c->semaphore_wq,
-                        dependency, wfi_cmd,
+                        wfi_cmd,
                        need_sync_fence);
        if (err)
                goto clean_up_sema;
@@ -839,7 +838,6 @@ static int gk20a_channel_semaphore_incr_wfi(
 {
        return __gk20a_channel_semaphore_incr(s,
                        true /* wfi */,
-                        NULL,
                        entry, fence, true);
 }
@@ -854,7 +852,6 @@ static int gk20a_channel_semaphore_incr(
         * a fence to user space. */
        return __gk20a_channel_semaphore_incr(s,
                        false /* no wfi */,
-                        NULL,
                        entry, fence, need_sync_fence);
 }
@@ -868,22 +865,12 @@ static int gk20a_channel_semaphore_incr_user(
                bool register_irq)
 {
 #ifdef CONFIG_SYNC
-        struct sync_fence *dependency = NULL;
        int err;
-        if (wait_fence_fd >= 0) {
+        err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence,
-                dependency = gk20a_sync_fence_fdget(wait_fence_fd);
+                        need_sync_fence);
-                if (!dependency)
+        if (err)
-                        return -EINVAL;
-        }
-        err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
-                                             entry, fence, need_sync_fence);
-        if (err) {
-                if (dependency)
-                        sync_fence_put(dependency);
                return err;
-        }
        return 0;
 #else
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 63da0959..a5aeae08 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -233,7 +233,6 @@ int gk20a_fence_from_semaphore(
                struct sync_timeline *timeline,
                struct nvgpu_semaphore *semaphore,
                wait_queue_head_t *semaphore_wq,
-                struct sync_fence *dependency,
                bool wfi, bool need_sync_fence)
 {
        struct gk20a_fence *f = fence_out;
@@ -242,7 +241,7 @@ int gk20a_fence_from_semaphore(
 #ifdef CONFIG_SYNC
        if (need_sync_fence) {
                sync_fence = gk20a_sync_fence_create(g, timeline, semaphore,
-                                        dependency, "f-gk20a-0x%04x",
+                                        "f-gk20a-0x%04x",
                                        nvgpu_semaphore_gpu_ro_va(semaphore));
                if (!sync_fence)
                        return -1;
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index fccf0c27..426556cc 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -58,7 +58,6 @@ int gk20a_fence_from_semaphore(
                struct sync_timeline *timeline,
                struct nvgpu_semaphore *semaphore,
                wait_queue_head_t *semaphore_wq,
-                struct sync_fence *dependency,
                bool wfi, bool need_sync_fence);
 int gk20a_fence_from_syncpt(
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 5b888299..ecc96a7b 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -47,8 +47,6 @@ struct gk20a_sync_pt {
        u32                             thresh;
        struct nvgpu_semaphore          *sema;
        struct gk20a_sync_timeline      *obj;
-        struct sync_fence               *dep;
-        ktime_t                         dep_timestamp;
        /*
         * Use a spin lock here since it will have better performance
@@ -206,8 +204,6 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
                container_of(ref, struct gk20a_sync_pt, refcount);
        struct gk20a *g = pt->g;
-        if (pt->dep)
-                sync_fence_put(pt->dep);
        if (pt->sema)
                nvgpu_semaphore_put(pt->sema);
        nvgpu_kfree(g, pt);
@@ -216,8 +212,7 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
 static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
                struct gk20a *g,
                struct gk20a_sync_timeline *obj,
-                struct nvgpu_semaphore *sema,
+                struct nvgpu_semaphore *sema)
-                struct sync_fence *dependency)
 {
        struct gk20a_sync_pt *shared;
@@ -231,20 +226,6 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
        shared->sema = sema;
        shared->thresh = ++obj->max; /* sync framework has a lock */
-        /* Store the dependency fence for this pt. */
-        if (dependency) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-                if (dependency->status == 0)
-#else
-                if (!atomic_read(&dependency->status))
-#endif
-                        shared->dep = dependency;
-                else {
-                        shared->dep_timestamp = ktime_get();
-                        sync_fence_put(dependency);
-                }
-        }
        nvgpu_spinlock_init(&shared->lock);
        nvgpu_semaphore_get(sema);
@@ -255,8 +236,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
 static struct sync_pt *gk20a_sync_pt_create_inst(
                struct gk20a *g,
                struct gk20a_sync_timeline *obj,
-                struct nvgpu_semaphore *sema,
+                struct nvgpu_semaphore *sema)
-                struct sync_fence *dependency)
 {
        struct gk20a_sync_pt_inst *pti;
@@ -265,7 +245,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
        if (!pti)
                return NULL;
-        pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency);
+        pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
        if (!pti->shared) {
                sync_pt_free(&pti->pt);
                return NULL;
@@ -303,9 +283,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
 {
        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
        struct gk20a_sync_timeline *obj = pt->obj;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-        struct sync_pt *pos;
-#endif
        bool signaled = true;
        nvgpu_spinlock_acquire(&pt->lock);
@@ -321,29 +298,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
                                                obj->min) == 1)
                        obj->min = pt->thresh;
-                /* Release the dependency fence, but get its timestamp
-                 * first.*/
-                if (pt->dep) {
-                        s64 ns = 0;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-                        struct list_head *dep_pts = &pt->dep->pt_list_head;
-                        list_for_each_entry(pos, dep_pts, pt_list) {
-                                ns = max(ns, ktime_to_ns(pos->timestamp));
-                        }
-#else
-                        struct fence *fence;
-                        int i;
-                        for (i = 0; i < pt->dep->num_fences; i++) {
-                                fence = pt->dep->cbs[i].sync_pt;
-                                ns = max(ns, ktime_to_ns(fence->timestamp));
-                        }
-#endif
-                        pt->dep_timestamp = ns_to_ktime(ns);
-                        sync_fence_put(pt->dep);
-                        pt->dep = NULL;
-                }
                /* Release the semaphore to the pool. */
                nvgpu_semaphore_put(pt->sema);
                pt->sema = NULL;
@@ -354,18 +308,6 @@ done:
        return signaled;
 }
-static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt)
-{
-        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-        if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64)
-                return ns_to_ktime(0);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-        return ktime_sub(sync_pt->timestamp, pt->dep_timestamp);
-#else
-        return ktime_sub(sync_pt->base.timestamp, pt->dep_timestamp);
-#endif
-}
 static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
 {
        bool a_expired;
@@ -426,39 +368,13 @@ static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
                int size)
 {
        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-        ktime_t dur = gk20a_sync_pt_duration(sync_pt);
        if (pt->sema) {
                gk20a_sync_pt_value_str_for_sema(pt, str, size);
                return;
        }
-        if (pt->dep) {
+        snprintf(str, size, "%d", pt->thresh);
-                snprintf(str, size, "(dep: [%p] %s) %d",
-                         pt->dep, pt->dep->name, pt->thresh);
-        } else if (dur.tv64) {
-                struct timeval tv = ktime_to_timeval(dur);
-                snprintf(str, size, "(took %ld.%03ld ms) %d",
-                         tv.tv_sec * 1000 + tv.tv_usec / 1000,
-                         tv.tv_usec % 1000,
-                         pt->thresh);
-        } else {
-                snprintf(str, size, "%d", pt->thresh);
-        }
-}
-static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt,
-                void *data, int size)
-{
-        struct gk20a_sync_pt_info info;
-        if (size < (int)sizeof(info))
-                return -ENOMEM;
-        info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt));
-        memcpy(data, &info, sizeof(info));
-        return sizeof(info);
 }
 static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
@@ -467,7 +383,6 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
        .has_signaled = gk20a_sync_pt_has_signaled,
        .compare = gk20a_sync_pt_compare,
        .free_pt = gk20a_sync_pt_free_inst,
-        .fill_driver_data = gk20a_sync_fill_driver_data,
        .timeline_value_str = gk20a_sync_timeline_value_str,
        .pt_value_str = gk20a_sync_pt_value_str,
 };
@@ -515,7 +430,6 @@ struct sync_fence *gk20a_sync_fence_create(
                struct gk20a *g,
                struct sync_timeline *obj,
                struct nvgpu_semaphore *sema,
-                struct sync_fence *dependency,
                const char *fmt, ...)
 {
        char name[30];
@@ -524,7 +438,7 @@ struct sync_fence *gk20a_sync_fence_create(
        struct sync_fence *fence;
        struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
-        pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency);
+        pt = gk20a_sync_pt_create_inst(g, timeline, sema);
        if (pt == NULL)
                return NULL;
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 33112bb3..a422377c 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -37,7 +37,6 @@ struct sync_fence *gk20a_sync_fence_create(
                struct gk20a *g,
                struct sync_timeline *,
                struct nvgpu_semaphore *,
-                struct sync_fence *dependency,
                const char *fmt, ...);
 struct sync_fence *gk20a_sync_fence_fdget(int fd);
 #else
author	Konsta Holtta <kholtta@nvidia.com>	2017-04-06 03:48:35 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-04-11 12:57:21 -0400
commit	1a4647272f4fe50137c79583b698c1ef6f5def12 (patch)
tree	0eee9698405e4467a7644c74c3d41d4bca933a8f /drivers/gpu
parent	6de456f8407ba7230410175f1aff90541137d19b (diff)