From 63e8592e06939e20c7b9e56b430353ebbee31ad6 Mon Sep 17 00:00:00 2001
From: Sachit Kadle <skadle@nvidia.com>
Date: Mon, 22 Aug 2016 18:06:30 -0700
Subject: gpu: nvgpu: use inplace allocation in sync framework

This change is the first of a series of changes to
support the usage of pre-allocated job tracking resources
in the submit path. With this change, we still maintain a
dynamically-allocated joblist, but make the necessary changes
in the channel_sync & fence framework to use in-place
allocations. Specifically, we:

1) Update channel sync framework routines to take in
pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather
than dynamically allocating themselves

2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s)
to gk20a_submit_prepare_syncs

3) Modify fence framework to have seperate allocation
and init APIs. We expose allocation as a seperate API, so
the client can allocate the object before passing it into
the channel sync framework.

4) Fix clean_up logic in channel sync framework

Bug 1795076

Change-Id: I96db457683cd207fd029c31c45f548f98055e844
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1206725
(cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624)
Reviewed-on: http://git-master/r/1223933
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 88 ++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 27 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a/fence_gk20a.c')

diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 596dc549..f788829f 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
 	return f;
 }
 
+static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
+{
+	bool valid = f->valid;
+
+	rmb();
+	return valid;
+}
+
 int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
 {
-	if (!tegra_platform_is_silicon())
-		timeout = (u32)MAX_SCHEDULE_TIMEOUT;
-	return f->ops->wait(f, timeout);
+	if (f && gk20a_fence_is_valid(f)) {
+		if (!tegra_platform_is_silicon())
+			timeout = (u32)MAX_SCHEDULE_TIMEOUT;
+		return f->ops->wait(f, timeout);
+	}
+	return 0;
 }
 
 bool gk20a_fence_is_expired(struct gk20a_fence *f)
 {
-	if (f && f->ops)
+	if (f && gk20a_fence_is_valid(f) && f->ops)
 		return f->ops->is_expired(f);
 	else
 		return true;
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
 #ifdef CONFIG_SYNC
 	int fd;
 
-	if (!f->sync_fence)
+	if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence)
 		return -EINVAL;
 
 	fd = get_unused_fd_flags(O_RDWR);
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
 #endif
 }
 
-struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops,
-				      struct sync_fence *sync_fence, bool wfi)
+struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
 {
-	struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL);
-	if (!f)
+	struct gk20a_fence *fence;
+
+	fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
+	if (!fence)
 		return NULL;
-	kref_init(&f->ref);
+
+	kref_init(&fence->ref);
+	return fence;
+}
+
+void gk20a_init_fence(struct gk20a_fence *f,
+		const struct gk20a_fence_ops *ops,
+		struct sync_fence *sync_fence, bool wfi)
+{
+	if (!f)
+		return;
 	f->ops = ops;
 	f->sync_fence = sync_fence;
 	f->wfi = wfi;
 	f->syncpt_id = -1;
-	return f;
 }
 
 /* Fences that are backed by GPU semaphores: */
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
 };
 
 /* This function takes ownership of the semaphore */
-struct gk20a_fence *gk20a_fence_from_semaphore(
+int gk20a_fence_from_semaphore(
+		struct gk20a_fence *fence_out,
 		struct sync_timeline *timeline,
 		struct gk20a_semaphore *semaphore,
 		wait_queue_head_t *semaphore_wq,
 		struct sync_fence *dependency,
 		bool wfi, bool need_sync_fence)
 {
-	struct gk20a_fence *f;
+	struct gk20a_fence *f = fence_out;
 	struct sync_fence *sync_fence = NULL;
 
 #ifdef CONFIG_SYNC
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
 					dependency, "f-gk20a-0x%04x",
 					gk20a_semaphore_gpu_ro_va(semaphore));
 		if (!sync_fence)
-			return NULL;
+			return -1;
 	}
 #endif
 
-	f  = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);
+	gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi);
 	if (!f) {
 #ifdef CONFIG_SYNC
 		sync_fence_put(sync_fence);
 #endif
-		return NULL;
+		return -EINVAL;
 	}
 
 	f->semaphore = semaphore;
 	f->semaphore_wq = semaphore_wq;
-	return f;
+
+	/* commit previous writes before setting the valid flag */
+	wmb();
+	f->valid = true;
+
+	return 0;
 }
 
 #ifdef CONFIG_TEGRA_GK20A
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
 	.is_expired = &gk20a_syncpt_fence_is_expired,
 };
 
-struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
-					    u32 id, u32 value, bool wfi,
-					    bool need_sync_fence)
+int gk20a_fence_from_syncpt(
+		struct gk20a_fence *fence_out,
+		struct platform_device *host1x_pdev,
+		u32 id, u32 value, bool wfi,
+		bool need_sync_fence)
 {
-	struct gk20a_fence *f;
+	struct gk20a_fence *f = fence_out;
 	struct sync_fence *sync_fence = NULL;
 
 #ifdef CONFIG_SYNC
@@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
 		sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
 						      "fence");
 		if (IS_ERR(sync_fence))
-			return NULL;
+			return -1;
 	}
 #endif
 
-	f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
+	gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi);
 	if (!f) {
 #ifdef CONFIG_SYNC
 		if (sync_fence)
 			sync_fence_put(sync_fence);
 #endif
-		return NULL;
+		return -EINVAL;
 	}
 	f->host1x_pdev = host1x_pdev;
 	f->syncpt_id = id;
 	f->syncpt_value = value;
-	return f;
+
+	/* commit previous writes before setting the valid flag */
+	wmb();
+	f->valid = true;
+
+	return 0;
 }
 #else
-struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
+int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
 					    u32 id, u32 value, bool wfi)
 {
-	return NULL;
+	return -EINVAL;
 }
 #endif
-- 
cgit v1.2.2