summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-10-26 09:17:55 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-11-23 11:30:13 -0500
commit67fe5f6d738a9b1ba2d8f5dd0726790d965c8883 (patch)
tree07cc9865536b4fbb3c6737434d551c6d03548dfa /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent938b16909f85231a8c62d563b603ed93862c8ed8 (diff)
gpu: nvgpu: remove temporary gpfifo allocation in submit path
In GPU job submit path gk20a_ioctl_channel_submit_gpfifo(), we currently allocate a temporary gpfifo, copy user space gpfifo content into this temporary buffer, and then copy temp buffer content into channel's gpfifo. Allocation/copy/free of temporary buffer adds additional overhead Rewrite this sequence such that gk20a_submit_channel_gpfifo() can receive either a pre-filled gpfifo or pointer to user provided args. And then we can direclty copy the user provided gpfifo into the channel's gpfifo Also, if command buffer tracing is enabled, we still need to copy user provided gpfifo into temporaty buffer for reading But that should not cause overhead in real world use case Bug 200141116 Change-Id: I7166c9271da2694059da9853ab8839e98457b941 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/823386 (cherry picked from commit 3e0702db006c262dd8737a567b8e06f7ff005e2c) Reviewed-on: http://git-master/r/835799 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c132
1 files changed, 95 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 4cf3beec..0b84b7da 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1551,14 +1551,42 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
1551 1551
1552static void trace_write_pushbuffer_range(struct channel_gk20a *c, 1552static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1553 struct nvgpu_gpfifo *g, 1553 struct nvgpu_gpfifo *g,
1554 struct nvgpu_submit_gpfifo_args *args,
1555 int offset,
1554 int count) 1556 int count)
1555{ 1557{
1556 if (gk20a_debug_trace_cmdbuf) { 1558 u32 size;
1557 int i; 1559 int i;
1558 struct nvgpu_gpfifo *gp = g; 1560 struct nvgpu_gpfifo *gp;
1559 for (i = 0; i < count; i++, gp++) 1561 bool gpfifo_allocated = false;
1560 trace_write_pushbuffer(c, gp); 1562
1563 if (!gk20a_debug_trace_cmdbuf)
1564 return;
1565
1566 if (!g && !args)
1567 return;
1568
1569 if (!g) {
1570 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
1571 if (size) {
1572 g = nvgpu_alloc(size, false);
1573 if (!g)
1574 return;
1575
1576 if (copy_from_user(g,
1577 (void __user *)(uintptr_t)args->gpfifo, size)) {
1578 return;
1579 }
1580 }
1581 gpfifo_allocated = true;
1561 } 1582 }
1583
1584 gp = g + offset;
1585 for (i = 0; i < count; i++, gp++)
1586 trace_write_pushbuffer(c, gp);
1587
1588 if (gpfifo_allocated)
1589 nvgpu_free(g);
1562} 1590}
1563 1591
1564static void gk20a_channel_timeout_start(struct channel_gk20a *ch, 1592static void gk20a_channel_timeout_start(struct channel_gk20a *ch,
@@ -1810,6 +1838,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1810 1838
1811int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, 1839int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1812 struct nvgpu_gpfifo *gpfifo, 1840 struct nvgpu_gpfifo *gpfifo,
1841 struct nvgpu_submit_gpfifo_args *args,
1813 u32 num_entries, 1842 u32 num_entries,
1814 u32 flags, 1843 u32 flags,
1815 struct nvgpu_fence *fence, 1844 struct nvgpu_fence *fence,
@@ -1842,6 +1871,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1842 return -ENOMEM; 1871 return -ENOMEM;
1843 } 1872 }
1844 1873
1874 if (!gpfifo && !args)
1875 return -EINVAL;
1876
1845 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT | 1877 if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
1846 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) && 1878 NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
1847 !fence) 1879 !fence)
@@ -1986,24 +2018,69 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1986 start = c->gpfifo.put; 2018 start = c->gpfifo.put;
1987 end = start + num_entries; 2019 end = start + num_entries;
1988 2020
1989 if (end > c->gpfifo.entry_num) { 2021 if (gpfifo) {
1990 int length0 = c->gpfifo.entry_num - start; 2022 if (end > c->gpfifo.entry_num) {
1991 int length1 = num_entries - length0; 2023 int length0 = c->gpfifo.entry_num - start;
2024 int length1 = num_entries - length0;
1992 2025
1993 memcpy(gpfifo_mem + start, gpfifo, 2026 memcpy(gpfifo_mem + start, gpfifo,
1994 length0 * sizeof(*gpfifo)); 2027 length0 * sizeof(*gpfifo));
1995 2028
1996 memcpy(gpfifo_mem, gpfifo + length0, 2029 memcpy(gpfifo_mem, gpfifo + length0,
1997 length1 * sizeof(*gpfifo)); 2030 length1 * sizeof(*gpfifo));
2031
2032 trace_write_pushbuffer_range(c, gpfifo, NULL,
2033 0, length0);
2034 trace_write_pushbuffer_range(c, gpfifo, NULL,
2035 length0, length1);
2036 } else {
2037 memcpy(gpfifo_mem + start, gpfifo,
2038 num_entries * sizeof(*gpfifo));
1998 2039
1999 trace_write_pushbuffer_range(c, gpfifo, length0); 2040 trace_write_pushbuffer_range(c, gpfifo, NULL,
2000 trace_write_pushbuffer_range(c, gpfifo + length0, length1); 2041 0, num_entries);
2042 }
2001 } else { 2043 } else {
2002 memcpy(gpfifo_mem + start, gpfifo, 2044 struct nvgpu_gpfifo __user *user_gpfifo =
2003 num_entries * sizeof(*gpfifo)); 2045 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
2046 if (end > c->gpfifo.entry_num) {
2047 int length0 = c->gpfifo.entry_num - start;
2048 int length1 = num_entries - length0;
2049
2050 err = copy_from_user(gpfifo_mem + start,
2051 user_gpfifo,
2052 length0 * sizeof(*user_gpfifo));
2053 if (err) {
2054 mutex_unlock(&c->submit_lock);
2055 goto clean_up;
2056 }
2057
2058 err = copy_from_user(gpfifo_mem,
2059 user_gpfifo + length0,
2060 length1 * sizeof(*user_gpfifo));
2061 if (err) {
2062 mutex_unlock(&c->submit_lock);
2063 goto clean_up;
2064 }
2004 2065
2005 trace_write_pushbuffer_range(c, gpfifo, num_entries); 2066 trace_write_pushbuffer_range(c, NULL, args,
2067 0, length0);
2068 trace_write_pushbuffer_range(c, NULL, args,
2069 length0, length1);
2070 } else {
2071 err = copy_from_user(gpfifo_mem + start,
2072 user_gpfifo,
2073 num_entries * sizeof(*user_gpfifo));
2074 if (err) {
2075 mutex_unlock(&c->submit_lock);
2076 goto clean_up;
2077 }
2078
2079 trace_write_pushbuffer_range(c, NULL, args,
2080 0, num_entries);
2081 }
2006 } 2082 }
2083
2007 c->gpfifo.put = (c->gpfifo.put + num_entries) & 2084 c->gpfifo.put = (c->gpfifo.put + num_entries) &
2008 (c->gpfifo.entry_num - 1); 2085 (c->gpfifo.entry_num - 1);
2009 2086
@@ -2501,8 +2578,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2501 struct nvgpu_submit_gpfifo_args *args) 2578 struct nvgpu_submit_gpfifo_args *args)
2502{ 2579{
2503 struct gk20a_fence *fence_out; 2580 struct gk20a_fence *fence_out;
2504 void *gpfifo = NULL;
2505 u32 size;
2506 int ret = 0; 2581 int ret = 0;
2507 2582
2508 gk20a_dbg_fn(""); 2583 gk20a_dbg_fn("");
@@ -2510,23 +2585,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2510 if (ch->has_timedout) 2585 if (ch->has_timedout)
2511 return -ETIMEDOUT; 2586 return -ETIMEDOUT;
2512 2587
2513 /* zero-sized submits are allowed, since they can be used for 2588 ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
2514 * synchronization; we might still wait and do an increment */
2515 size = args->num_entries * sizeof(struct nvgpu_gpfifo);
2516 if (size) {
2517 gpfifo = nvgpu_alloc(size, false);
2518 if (!gpfifo)
2519 return -ENOMEM;
2520
2521 if (copy_from_user(gpfifo,
2522 (void __user *)(uintptr_t)args->gpfifo,
2523 size)) {
2524 ret = -EINVAL;
2525 goto clean_up;
2526 }
2527 }
2528
2529 ret = gk20a_submit_channel_gpfifo(ch, gpfifo, args->num_entries,
2530 args->flags, &args->fence, 2589 args->flags, &args->fence,
2531 &fence_out); 2590 &fence_out);
2532 2591
@@ -2549,7 +2608,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2549 gk20a_fence_put(fence_out); 2608 gk20a_fence_put(fence_out);
2550 2609
2551clean_up: 2610clean_up:
2552 nvgpu_free(gpfifo);
2553 return ret; 2611 return ret;
2554} 2612}
2555 2613