gpu: nvgpu: split gk20a_submit_append_gpfifo

In gk20a_submit_channel_gpfifo the gpfifo entries can come from a kernel buffer or from userspace. To simplify the logic in gk20a_submit_append_gpfifo, extract out a function that copies the entries directly from userspace to the gpu memory for performance, and another function that copies from a kernel buffer to the gpu memory. The latter is used for kernel submits and when the gpfifo pipe exists which would mean that the gpfifo memory is in vidmem and is thus not directly accessible with a kernel virtual pointer. While this function is being changed a lot, also rename it to start with nvgpu_ instead of gk20a_. Additionally, simplify pushbuffer debug tracing by always using the kernel memory for the prints. Tracing when the gpfifo memory has been allocated in vidmem is no longer supported; sysmem is almost always used in practice anyway. Jira NVGPU-705 Change-Id: Icab843a379a75fb46054dee157a0a54ff9fbba59 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1730481 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Konsta Holtta <kholtta@nvidia.com> 2018-05-21 10:31:10 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-06-20 15:25:49 -0400
commit: 84536b929be7e56dd8221ba8d541fd4cbcd149ed (patch)
tree: dd52bafa84fcf297861a8f3356f2f7651ea35792 /drivers
parent: 819f32bdf119ebf4d758a901c9a22c2c4a7d167a (diff)
1 files changed, 85 insertions, 97 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/channel.c b/drivers/gpu/nvgpu/os/linux/channel.c
index dd2b17ee..37e36cb9 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.c
+++ b/drivers/gpu/nvgpu/os/linux/channel.c
@@ -491,45 +491,22 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
 }
 #endif
-static void trace_write_pushbuffer_range(struct channel_gk20a *c,
+static void trace_write_pushbuffers(struct channel_gk20a *c, u32 count)
-                                         struct nvgpu_gpfifo_entry *g,
-                                         struct nvgpu_gpfifo_entry __user *user_gpfifo,
-                                         int offset,
-                                         int count)
 {
 #ifdef CONFIG_DEBUG_FS
-        u32 size;
+        struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
-        int i;
+        u32 n = c->gpfifo.entry_num;
-        struct nvgpu_gpfifo_entry *gp;
+        u32 start = c->gpfifo.put;
-        bool gpfifo_allocated = false;
+        u32 i;
        if (!gk20a_debug_trace_cmdbuf)
                return;
-        if (!g && !user_gpfifo)
+        if (!gp)
                return;
-        if (!g) {
+        for (i = 0; i < count; i++)
-                size = count * sizeof(struct nvgpu_gpfifo_entry);
+                trace_write_pushbuffer(c, &gp[(start + i) % n]);
-                if (size) {
-                        g = nvgpu_big_malloc(c->g, size);
-                        if (!g)
-                                return;
-                        if (copy_from_user(g, user_gpfifo, size)) {
-                                nvgpu_big_free(c->g, g);
-                                return;
-                        }
-                }
-                gpfifo_allocated = true;
-        }
-        gp = g + offset;
-        for (i = 0; i < count; i++, gp++)
-                trace_write_pushbuffer(c, gp);
-        if (gpfifo_allocated)
-                nvgpu_big_free(c->g, g);
 #endif
 }
@@ -682,98 +659,109 @@ static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
        c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
 }
-/*
+static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
- * Copy source gpfifo entries into the gpfifo ring buffer, potentially
- * splitting into two memcpys to handle wrap-around.
- */
-static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
-                struct nvgpu_gpfifo_entry *kern_gpfifo,
                struct nvgpu_gpfifo_userdata userdata,
                u32 num_entries)
 {
        struct gk20a *g = c->g;
+        struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
        u32 gpfifo_size = c->gpfifo.entry_num;
        u32 len = num_entries;
        u32 start = c->gpfifo.put;
        u32 end = start + len; /* exclusive */
+        int err;
+        if (end > gpfifo_size) {
+                /* wrap-around */
+                int length0 = gpfifo_size - start;
+                int length1 = len - length0;
+                err = g->os_channel.copy_user_gpfifo(
+                                gpfifo_cpu + start, userdata,
+                                0, length0);
+                if (err)
+                        return err;
+                err = g->os_channel.copy_user_gpfifo(
+                                gpfifo_cpu, userdata,
+                                length0, length1);
+                if (err)
+                        return err;
+        } else {
+                err = g->os_channel.copy_user_gpfifo(
+                                gpfifo_cpu + start, userdata,
+                                0, len);
+                if (err)
+                        return err;
+        }
+        return 0;
+}
+static void nvgpu_submit_append_gpfifo_common(struct channel_gk20a *c,
+                struct nvgpu_gpfifo_entry *src, u32 num_entries)
+{
+        struct gk20a *g = c->g;
        struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
-        struct nvgpu_gpfifo_entry *cpu_src;
+        /* in bytes */
+        u32 gpfifo_size =
+                c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo_entry);
+        u32 len = num_entries * sizeof(struct nvgpu_gpfifo_entry);
+        u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo_entry);
+        u32 end = start + len; /* exclusive */
+        if (end > gpfifo_size) {
+                /* wrap-around */
+                int length0 = gpfifo_size - start;
+                int length1 = len - length0;
+                struct nvgpu_gpfifo_entry *src2 = src + length0;
+                nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
+                nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
+        } else {
+                nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
+        }
+}
+/*
+ * Copy source gpfifo entries into the gpfifo ring buffer, potentially
+ * splitting into two memcpys to handle wrap-around.
+ */
+static int nvgpu_submit_append_gpfifo(struct channel_gk20a *c,
+                struct nvgpu_gpfifo_entry *kern_gpfifo,
+                struct nvgpu_gpfifo_userdata userdata,
+                u32 num_entries)
+{
+        struct gk20a *g = c->g;
        int err;
        if (!kern_gpfifo && !c->gpfifo.pipe) {
-                struct nvgpu_gpfifo_entry *gpfifo_cpu = gpfifo_mem->cpu_va;
                /*
                 * This path (from userspace to sysmem) is special in order to
                 * avoid two copies unnecessarily (from user to pipe, then from
                 * pipe to gpu sysmem buffer).
                 */
-                if (end > gpfifo_size) {
+                err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
-                        /* wrap-around */
+                                num_entries);
-                        int length0 = gpfifo_size - start;
+                if (err)
-                        int length1 = len - length0;
+                        return err;
-                        err = g->os_channel.copy_user_gpfifo(
-                                        gpfifo_cpu + start, userdata,
-                                        0, length0);
-                        if (err)
-                                return err;
-                        err = g->os_channel.copy_user_gpfifo(
-                                        gpfifo_cpu, userdata,
-                                        length0, length1);
-                        if (err)
-                                return err;
-                        trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
-                                        start, length0);
-                        trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
-                                        0, length1);
-                } else {
-                        err = g->os_channel.copy_user_gpfifo(
-                                        gpfifo_cpu + start, userdata,
-                                        0, len);
-                        if (err)
-                                return err;
-                        trace_write_pushbuffer_range(c, gpfifo_cpu, NULL,
-                                        start, len);
-                }
-                goto out;
        } else if (!kern_gpfifo) {
-                /* from userspace to vidmem, use the common copy path below */
+                /* from userspace to vidmem, use the common path */
                err = g->os_channel.copy_user_gpfifo(c->gpfifo.pipe, userdata,
-                                0, len);
+                                0, num_entries);
                if (err)
                        return err;
-                cpu_src = c->gpfifo.pipe;
+                nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
+                                num_entries);
        } else {
                /* from kernel to either sysmem or vidmem, don't need
-                 * copy_user_gpfifo so use the common path below */
+                 * copy_user_gpfifo so use the common path */
-                cpu_src = kern_gpfifo;
+                nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
-        }
-        if (end > gpfifo_size) {
-                /* wrap-around */
-                int length0 = gpfifo_size - start;
-                int length1 = len - length0;
-                struct nvgpu_gpfifo_entry *src2 = cpu_src + length0;
-                int s_bytes = start * sizeof(struct nvgpu_gpfifo_entry);
-                int l0_bytes = length0 * sizeof(struct nvgpu_gpfifo_entry);
-                int l1_bytes = length1 * sizeof(struct nvgpu_gpfifo_entry);
-                nvgpu_mem_wr_n(c->g, gpfifo_mem, s_bytes, cpu_src, l0_bytes);
-                nvgpu_mem_wr_n(c->g, gpfifo_mem, 0, src2, l1_bytes);
-        } else {
-                nvgpu_mem_wr_n(c->g, gpfifo_mem,
-                                start * sizeof(struct nvgpu_gpfifo_entry),
-                                cpu_src,
-                                len * sizeof(struct nvgpu_gpfifo_entry));
        }
-        trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
+        trace_write_pushbuffers(c, num_entries);
-out:
        c->gpfifo.put = (c->gpfifo.put + num_entries) &
                (c->gpfifo.entry_num - 1);
@@ -987,7 +975,7 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
        if (wait_cmd)
                gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
-        err = gk20a_submit_append_gpfifo(c, gpfifo, userdata,
+        err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
                        num_entries);
        if (err)
                goto clean_up_job;
author	Konsta Holtta <kholtta@nvidia.com>	2018-05-21 10:31:10 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-06-20 15:25:49 -0400
commit	84536b929be7e56dd8221ba8d541fd4cbcd149ed (patch)
tree	dd52bafa84fcf297861a8f3356f2f7651ea35792 /drivers
parent	819f32bdf119ebf4d758a901c9a22c2c4a7d167a (diff)