From 1a4647272f4fe50137c79583b698c1ef6f5def12 Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Thu, 6 Apr 2017 10:48:35 +0300
Subject: gpu: nvgpu: remove fence dependency tracking

In preparation for better abstraction in job synchronization, drop
support for the dependency fences tracked via submit pre-fences in
semaphore-based syncs. This has only worked for semaphores, not nvhost
syncpoints, and hasn't really been used. The dependency was printed in
the sync framework's sync pt value string.

Remove also the userspace-visible gk20a_sync_pt_info which is not used
and depends on this feature (providing a duration since the dependency
fence's timestamp).

Jira NVGPU-43

Change-Id: Ia2b26502a9dc8f5bef5470f94b1475001f621da1
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1456880
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 25 ++------
 drivers/gpu/nvgpu/gk20a/fence_gk20a.c        |  3 +-
 drivers/gpu/nvgpu/gk20a/fence_gk20a.h        |  1 -
 drivers/gpu/nvgpu/gk20a/sync_gk20a.c         | 96 ++--------------------------
 drivers/gpu/nvgpu/gk20a/sync_gk20a.h         |  1 -
 include/uapi/linux/nvgpu.h                   |  4 --
 6 files changed, 12 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index fbeb1e4a..6d0f0854 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -678,7 +678,7 @@ static int gk20a_channel_semaphore_wait_fd(
 					sema->timeline,
 					fp_sema,
 					&c->semaphore_wq,
-					NULL, false, false);
+					false, false);
 			if (err) {
 				nvgpu_semaphore_put(fp_sema);
 				goto clean_up_priv_cmd;
@@ -742,7 +742,7 @@ static int gk20a_channel_semaphore_wait_fd(
 	 *  already signaled
 	 */
 	err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,
-			&c->semaphore_wq, NULL, false, false);
+			&c->semaphore_wq, false, false);
 	if (err)
 		goto clean_up_sema;
 
@@ -787,7 +787,6 @@ clean_up_sync_fence:
 
 static int __gk20a_channel_semaphore_incr(
 		struct gk20a_channel_sync *s, bool wfi_cmd,
-		struct sync_fence *dependency,
 		struct priv_cmd_entry *incr_cmd,
 		struct gk20a_fence *fence,
 		bool need_sync_fence)
@@ -820,7 +819,7 @@ static int __gk20a_channel_semaphore_incr(
 	err = gk20a_fence_from_semaphore(c->g, fence,
 			sp->timeline, semaphore,
 			&c->semaphore_wq,
-			dependency, wfi_cmd,
+			wfi_cmd,
 			need_sync_fence);
 	if (err)
 		goto clean_up_sema;
@@ -839,7 +838,6 @@ static int gk20a_channel_semaphore_incr_wfi(
 {
 	return __gk20a_channel_semaphore_incr(s,
 			true /* wfi */,
-			NULL,
 			entry, fence, true);
 }
 
@@ -854,7 +852,6 @@ static int gk20a_channel_semaphore_incr(
 	 * a fence to user space. */
 	return __gk20a_channel_semaphore_incr(s,
 			false /* no wfi */,
-			NULL,
 			entry, fence, need_sync_fence);
 }
 
@@ -868,22 +865,12 @@ static int gk20a_channel_semaphore_incr_user(
 		bool register_irq)
 {
 #ifdef CONFIG_SYNC
-	struct sync_fence *dependency = NULL;
 	int err;
 
-	if (wait_fence_fd >= 0) {
-		dependency = gk20a_sync_fence_fdget(wait_fence_fd);
-		if (!dependency)
-			return -EINVAL;
-	}
-
-	err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
-					     entry, fence, need_sync_fence);
-	if (err) {
-		if (dependency)
-			sync_fence_put(dependency);
+	err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence,
+			need_sync_fence);
+	if (err)
 		return err;
-	}
 
 	return 0;
 #else
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 63da0959..a5aeae08 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -233,7 +233,6 @@ int gk20a_fence_from_semaphore(
 		struct sync_timeline *timeline,
 		struct nvgpu_semaphore *semaphore,
 		wait_queue_head_t *semaphore_wq,
-		struct sync_fence *dependency,
 		bool wfi, bool need_sync_fence)
 {
 	struct gk20a_fence *f = fence_out;
@@ -242,7 +241,7 @@ int gk20a_fence_from_semaphore(
 #ifdef CONFIG_SYNC
 	if (need_sync_fence) {
 		sync_fence = gk20a_sync_fence_create(g, timeline, semaphore,
-					dependency, "f-gk20a-0x%04x",
+					"f-gk20a-0x%04x",
 					nvgpu_semaphore_gpu_ro_va(semaphore));
 		if (!sync_fence)
 			return -1;
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index fccf0c27..426556cc 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -58,7 +58,6 @@ int gk20a_fence_from_semaphore(
 		struct sync_timeline *timeline,
 		struct nvgpu_semaphore *semaphore,
 		wait_queue_head_t *semaphore_wq,
-		struct sync_fence *dependency,
 		bool wfi, bool need_sync_fence);
 
 int gk20a_fence_from_syncpt(
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 5b888299..ecc96a7b 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -47,8 +47,6 @@ struct gk20a_sync_pt {
 	u32				thresh;
 	struct nvgpu_semaphore		*sema;
 	struct gk20a_sync_timeline	*obj;
-	struct sync_fence		*dep;
-	ktime_t				dep_timestamp;
 
 	/*
 	 * Use a spin lock here since it will have better performance
@@ -206,8 +204,6 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
 		container_of(ref, struct gk20a_sync_pt, refcount);
 	struct gk20a *g = pt->g;
 
-	if (pt->dep)
-		sync_fence_put(pt->dep);
 	if (pt->sema)
 		nvgpu_semaphore_put(pt->sema);
 	nvgpu_kfree(g, pt);
@@ -216,8 +212,7 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
 static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
 		struct gk20a *g,
 		struct gk20a_sync_timeline *obj,
-		struct nvgpu_semaphore *sema,
-		struct sync_fence *dependency)
+		struct nvgpu_semaphore *sema)
 {
 	struct gk20a_sync_pt *shared;
 
@@ -231,20 +226,6 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
 	shared->sema = sema;
 	shared->thresh = ++obj->max; /* sync framework has a lock */
 
-	/* Store the dependency fence for this pt. */
-	if (dependency) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-		if (dependency->status == 0)
-#else
-		if (!atomic_read(&dependency->status))
-#endif
-			shared->dep = dependency;
-		else {
-			shared->dep_timestamp = ktime_get();
-			sync_fence_put(dependency);
-		}
-	}
-
 	nvgpu_spinlock_init(&shared->lock);
 
 	nvgpu_semaphore_get(sema);
@@ -255,8 +236,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
 static struct sync_pt *gk20a_sync_pt_create_inst(
 		struct gk20a *g,
 		struct gk20a_sync_timeline *obj,
-		struct nvgpu_semaphore *sema,
-		struct sync_fence *dependency)
+		struct nvgpu_semaphore *sema)
 {
 	struct gk20a_sync_pt_inst *pti;
 
@@ -265,7 +245,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
 	if (!pti)
 		return NULL;
 
-	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency);
+	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
 	if (!pti->shared) {
 		sync_pt_free(&pti->pt);
 		return NULL;
@@ -303,9 +283,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
 {
 	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
 	struct gk20a_sync_timeline *obj = pt->obj;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-	struct sync_pt *pos;
-#endif
 	bool signaled = true;
 
 	nvgpu_spinlock_acquire(&pt->lock);
@@ -321,29 +298,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
 						obj->min) == 1)
 			obj->min = pt->thresh;
 
-		/* Release the dependency fence, but get its timestamp
-		 * first.*/
-		if (pt->dep) {
-			s64 ns = 0;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-			struct list_head *dep_pts = &pt->dep->pt_list_head;
-			list_for_each_entry(pos, dep_pts, pt_list) {
-				ns = max(ns, ktime_to_ns(pos->timestamp));
-			}
-#else
-			struct fence *fence;
-			int i;
-
-			for (i = 0; i < pt->dep->num_fences; i++) {
-				fence = pt->dep->cbs[i].sync_pt;
-				ns = max(ns, ktime_to_ns(fence->timestamp));
-			}
-#endif
-			pt->dep_timestamp = ns_to_ktime(ns);
-			sync_fence_put(pt->dep);
-			pt->dep = NULL;
-		}
-
 		/* Release the semaphore to the pool. */
 		nvgpu_semaphore_put(pt->sema);
 		pt->sema = NULL;
@@ -354,18 +308,6 @@ done:
 	return signaled;
 }
 
-static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt)
-{
-	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-	if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64)
-		return ns_to_ktime(0);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
-	return ktime_sub(sync_pt->timestamp, pt->dep_timestamp);
-#else
-	return ktime_sub(sync_pt->base.timestamp, pt->dep_timestamp);
-#endif
-}
-
 static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
 {
 	bool a_expired;
@@ -426,39 +368,13 @@ static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
 		int size)
 {
 	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
-	ktime_t dur = gk20a_sync_pt_duration(sync_pt);
 
 	if (pt->sema) {
 		gk20a_sync_pt_value_str_for_sema(pt, str, size);
 		return;
 	}
 
-	if (pt->dep) {
-		snprintf(str, size, "(dep: [%p] %s) %d",
-			 pt->dep, pt->dep->name, pt->thresh);
-	} else if (dur.tv64) {
-		struct timeval tv = ktime_to_timeval(dur);
-		snprintf(str, size, "(took %ld.%03ld ms) %d",
-			 tv.tv_sec * 1000 + tv.tv_usec / 1000,
-			 tv.tv_usec % 1000,
-			 pt->thresh);
-	} else {
-		snprintf(str, size, "%d", pt->thresh);
-	}
-}
-
-static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt,
-		void *data, int size)
-{
-	struct gk20a_sync_pt_info info;
-
-	if (size < (int)sizeof(info))
-		return -ENOMEM;
-
-	info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt));
-	memcpy(data, &info, sizeof(info));
-
-	return sizeof(info);
+	snprintf(str, size, "%d", pt->thresh);
 }
 
 static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
@@ -467,7 +383,6 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
 	.has_signaled = gk20a_sync_pt_has_signaled,
 	.compare = gk20a_sync_pt_compare,
 	.free_pt = gk20a_sync_pt_free_inst,
-	.fill_driver_data = gk20a_sync_fill_driver_data,
 	.timeline_value_str = gk20a_sync_timeline_value_str,
 	.pt_value_str = gk20a_sync_pt_value_str,
 };
@@ -515,7 +430,6 @@ struct sync_fence *gk20a_sync_fence_create(
 		struct gk20a *g,
 		struct sync_timeline *obj,
 		struct nvgpu_semaphore *sema,
-		struct sync_fence *dependency,
 		const char *fmt, ...)
 {
 	char name[30];
@@ -524,7 +438,7 @@ struct sync_fence *gk20a_sync_fence_create(
 	struct sync_fence *fence;
 	struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
 
-	pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency);
+	pt = gk20a_sync_pt_create_inst(g, timeline, sema);
 	if (pt == NULL)
 		return NULL;
 
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 33112bb3..a422377c 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -37,7 +37,6 @@ struct sync_fence *gk20a_sync_fence_create(
 		struct gk20a *g,
 		struct sync_timeline *,
 		struct nvgpu_semaphore *,
-		struct sync_fence *dependency,
 		const char *fmt, ...);
 struct sync_fence *gk20a_sync_fence_fdget(int fd);
 #else
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index ca9b49e6..e25667cb 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1354,10 +1354,6 @@ struct nvgpu_alloc_gpfifo_ex_args {
 	__u32 reserved[5];
 };
 
-struct gk20a_sync_pt_info {
-	__u64 hw_op_ns;
-};
-
 struct nvgpu_fence {
 	__u32 id;        /* syncpoint id or sync fence fd */
 	__u32 value;     /* syncpoint value (discarded when using sync fence) */
-- 
cgit v1.2.2