From 84536b929be7e56dd8221ba8d541fd4cbcd149ed Mon Sep 17 00:00:00 2001
From: Konsta Holtta <kholtta@nvidia.com>
Date: Mon, 21 May 2018 17:31:10 +0300
Subject: gpu: nvgpu: split gk20a_submit_append_gpfifo

In gk20a_submit_channel_gpfifo the gpfifo entries can come from a kernel
buffer or from userspace. To simplify the logic in
gk20a_submit_append_gpfifo, extract out a function that copies the
entries directly from userspace to the gpu memory for performance, and
another function that copies from a kernel buffer to the gpu memory. The
latter is used for kernel submits and when the gpfifo pipe exists which
would mean that the gpfifo memory is in vidmem and is thus not directly
accessible with a kernel virtual pointer.

While this function is being changed a lot, also rename it to start with
nvgpu_ instead of gk20a_.

Additionally, simplify pushbuffer debug tracing by always using the
kernel memory for the prints. Tracing when the gpfifo memory has been
allocated in vidmem is no longer supported; sysmem is almost always used
in practice anyway.

Jira NVGPU-705

Change-Id: Icab843a379a75fb46054dee157a0a54ff9fbba59
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1730481
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/os/linux/channel.c | 182 ++++++++++++++++-------------------
 1 file changed, 85 insertions(+), 97 deletions(-)

(limited to 'drivers')

diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c
index dd2b17ee..37e36cb9 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.c
+++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -491,45 +491,22 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
 }
 #endif
 
-static void trace_write_pushbuffer_range(struct channel_gk20a *c,
-					 struct nvgpu_gpfifo_entry *g,
-					 struct nvgpu_gpfifo_entry __user *user_gpfifo,
-					 int offset,
-					 int count)
+static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
 {
 #ifdef CONFIG_DEBUG_FS
-	u32 size;
-	int i;
-	struct nvgpu_gpfifo_entry *gp;
-	bool gpfifo_allocated = false;
+	struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
+	u32 n = c->gpfifo.entry_num;
+	u32 start = c->gpfifo.put;
+	u32 i;
 
 	if (!gk20a_debug_trace_cmdbuf)
 		return;
 
-	if (!g && !user_gpfifo)
+	if (!gp)
 		return;
 
-	if (!g) {
-		size = count * sizeof(struct nvgpu_gpfifo_entry);
-		if (size) {
-			g = nvgpu_big_malloc(c->g, size);
-			if (!g)
-				return;
-
-			if (copy_from_user(g, user_gpfifo, size)) {
-				nvgpu_big_free(c->g, g);
-				return;
-			}
-		}
-		gpfifo_allocated = true;
-	}
-
-	gp = g + offset;
-	for (i = 0; i < count; i++, gp++)
-		trace_write_pushbuffer(c, gp);
-
-	if (gpfifo_allocated)
-		nvgpu_big_free(c->g, g);
+	for (i = 0; i < count; i++)
+		trace_write_pushbuffer(c, &gp[(start + i) % n]);
 #endif
 }
 
@@ -682,98 +659,109 @@ static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
 	c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
 }
 
-/*
- * Copy source gpfifo entries into the gpfifo ring buffer, potentially
- * splitting into two memcpys to handle wrap-around.
- */
-static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
-		struct nvgpu_gpfifo_entry *kern_gpfifo,
+static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
 		struct nvgpu_gpfifo_userdata userdata,
 		u32 num_entries)
 {
 	struct gk20a *g = c->g;
+	struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
 	u32 gpfifo_size = c->gpfifo.entry_num;
 	u32 len = num_entries;
 	u32 start = c->gpfifo.put;
 	u32 end = start + len; /* exclusive */
+	int err;
+
+	if (end > gpfifo_size) {
+		/* wrap-around */
+		int length0 = gpfifo_size - start;
+		int length1 = len - length0;
+
+		err = g->os_channel.copy_user_gpfifo(
+				gpfifo_cpu + start, userdata,
+				0, length0);
+		if (err)
+			return err;
+
+		err = g->os_channel.copy_user_gpfifo(
+				gpfifo_cpu, userdata,
+				length0, length1);
+		if (err)
+			return err;
+	} else {
+		err = g->os_channel.copy_user_gpfifo(
+				gpfifo_cpu + start, userdata,
+				0, len);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
+		struct nvgpu_gpfifo_entry *src, u32 num_entries)
+{
+	struct gk20a *g = c->g;
 	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
-	struct nvgpu_gpfifo_entry *cpu_src;
+	/* in bytes */
+	u32 gpfifo_size =
+		c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
+	u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
+	u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
+	u32 end = start + len; /* exclusive */
+
+	if (end > gpfifo_size) {
+		/* wrap-around */
+		int length0 = gpfifo_size - start;
+		int length1 = len - length0;
+		struct nvgpu_gpfifo_entry *src2 = src + length0;
+
+		nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
+		nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
+	} else {
+		nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
+	}
+}
+
+/*
+ * Copy source gpfifo entries into the gpfifo ring buffer, potentially
+ * splitting into two memcpys to handle wrap-around.
+ */
+static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
+		struct nvgpu_gpfifo_entry *kern_gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries)
+{
+	struct gk20a *g = c->g;
 	int err;
 
 	if (!kern_gpfifo && !c->gpfifo.pipe) {
-		struct nvgpu_gpfifo_entry *gpfifo_cpu = gpfifo_mem->cpu_va;
 		/*
 		 * This path (from userspace to sysmem) is special in order to
 		 * avoid two copies unnecessarily (from user to pipe, then from
 		 * pipe to gpu sysmem buffer).
 		 */
-		if (end > gpfifo_size) {
-			/* wrap-around */
-			int length0 = gpfifo_size - start;
-			int length1 = len - length0;
-
-			err = g->os_channel.copy_user_gpfifo(
-					gpfifo_cpu + start, userdata,
-					0, length0);
-			if (err)
-				return err;
-
-			err = g->os_channel.copy_user_gpfifo(
-					gpfifo_cpu, userdata,
-					length0, length1);
-			if (err)
-				return err;
-
-			trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
-					start, length0);
-			trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
-					0, length1);
-		} else {
-			err = g->os_channel.copy_user_gpfifo(
-					gpfifo_cpu + start, userdata,
-					0, len);
-			if (err)
-				return err;
-
-			trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
-					start, len);
-		}
-		goto out;
+		err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
+				num_entries);
+		if (err)
+			return err;
 	} else if (!kern_gpfifo) {
-		/* from userspace to vidmem, use the common copy path below */
+		/* from userspace to vidmem, use the common path */
 		err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
-				0, len);
+				0, num_entries);
 		if (err)
 			return err;
 
-		cpu_src = c->gpfifo.pipe;
+		nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
+				num_entries);
 	} else {
 		/* from kernel to either sysmem or vidmem, don't need
-		 * copy_user_gpfifo so use the common path below */
-		cpu_src = kern_gpfifo;
-	}
-
-	if (end > gpfifo_size) {
-		/* wrap-around */
-		int length0 = gpfifo_size - start;
-		int length1 = len - length0;
-		struct nvgpu_gpfifo_entry *src2 = cpu_src + length0;
-		int s_bytes = start * sizeof(struct nvgpu_gpfifo_entry);
-		int l0_bytes = length0 * sizeof(struct nvgpu_gpfifo_entry);
-		int l1_bytes = length1 * sizeof(struct nvgpu_gpfifo_entry);
-
-		nvgpu_mem_wr_n(c->g, gpfifo_mem, s_bytes, cpu_src, l0_bytes);
-		nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, l1_bytes);
-	} else {
-		nvgpu_mem_wr_n(c->g, gpfifo_mem,
-				start * sizeof(struct nvgpu_gpfifo_entry),
-				cpu_src,
-				len * sizeof(struct nvgpu_gpfifo_entry));
+		 * copy_user_gpfifo so use the common path */
+		nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
 	}
 
-	trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
+	trace_write_pushbuffers(c, num_entries);
 
-out:
 	c->gpfifo.put = (c->gpfifo.put + num_entries) &
 		(c->gpfifo.entry_num - 1);
 
@@ -987,7 +975,7 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	if (wait_cmd)
 		gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
 
-	err = gk20a_submit_append_gpfifo(c, gpfifo, userdata,
+	err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
 			num_entries);
 	if (err)
 		goto clean_up_job;
-- 
cgit v1.2.2