summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-06-15 07:06:28 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-06-20 10:45:33 -0400
commit27baafaad1a5c999642939faef63cacab17c9ed6 (patch)
tree9ad2ce8dbf044f120c8959f4c69d825dee3183f8 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parentefb6113b65c4976cf718787b2adc64d495e8fd94 (diff)
gpu: nvgpu: use gpfifo_mem via gk20a_mem_{rd,wr}
Use gk20a_mem_*() accessors for gpfifo memory in work submission instead of direct cpu accesses in order to support other apertures than sysmem. The gpfifo memory is still allocated from sysmem for dgpus too. Split the copying of priv_cmds and the main gpfifo to be submitted in gk20a_submit_channel_gpfifo() into separate functions. JIRA DNVGPU-21 Change-Id: If271ca8e7e34235f00d31855dbccf77c0008e10b Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1145923 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c242
1 files changed, 138 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index fc1edd99..cc097ae4 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -918,7 +918,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
918 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); 918 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
919 919
920 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); 920 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
921 921 nvgpu_free(ch->gpfifo.pipe);
922 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 922 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
923 923
924#if defined(CONFIG_GK20A_CYCLE_STATS) 924#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1430,7 +1430,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1430 c->ramfc.offset = 0; 1430 c->ramfc.offset = 0;
1431 c->ramfc.size = ram_in_ramfc_s() / 8; 1431 c->ramfc.size = ram_in_ramfc_s() / 8;
1432 1432
1433 if (c->gpfifo.mem.cpu_va) { 1433 if (c->gpfifo.mem.size) {
1434 gk20a_err(d, "channel %d :" 1434 gk20a_err(d, "channel %d :"
1435 "gpfifo already allocated", c->hw_chid); 1435 "gpfifo already allocated", c->hw_chid);
1436 return -EEXIST; 1436 return -EEXIST;
@@ -1444,6 +1444,16 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1444 goto clean_up; 1444 goto clean_up;
1445 } 1445 }
1446 1446
1447 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1448 c->gpfifo.pipe = nvgpu_alloc(
1449 gpfifo_size * sizeof(struct nvgpu_gpfifo),
1450 false);
1451 if (!c->gpfifo.pipe) {
1452 err = -ENOMEM;
1453 goto clean_up_unmap;
1454 }
1455 }
1456
1447 c->gpfifo.entry_num = gpfifo_size; 1457 c->gpfifo.entry_num = gpfifo_size;
1448 c->gpfifo.get = c->gpfifo.put = 0; 1458 c->gpfifo.get = c->gpfifo.put = 0;
1449 1459
@@ -1473,6 +1483,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1473 return 0; 1483 return 0;
1474 1484
1475clean_up_unmap: 1485clean_up_unmap:
1486 kfree(c->gpfifo.pipe);
1476 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1487 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1477clean_up: 1488clean_up:
1478 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1489 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -1568,7 +1579,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
1568 1579
1569static void trace_write_pushbuffer_range(struct channel_gk20a *c, 1580static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1570 struct nvgpu_gpfifo *g, 1581 struct nvgpu_gpfifo *g,
1571 struct nvgpu_submit_gpfifo_args *args, 1582 struct nvgpu_gpfifo __user *user_gpfifo,
1572 int offset, 1583 int offset,
1573 int count) 1584 int count)
1574{ 1585{
@@ -1580,18 +1591,17 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1580 if (!gk20a_debug_trace_cmdbuf) 1591 if (!gk20a_debug_trace_cmdbuf)
1581 return; 1592 return;
1582 1593
1583 if (!g && !args) 1594 if (!g && !user_gpfifo)
1584 return; 1595 return;
1585 1596
1586 if (!g) { 1597 if (!g) {
1587 size = args->num_entries * sizeof(struct nvgpu_gpfifo); 1598 size = count * sizeof(struct nvgpu_gpfifo);
1588 if (size) { 1599 if (size) {
1589 g = nvgpu_alloc(size, false); 1600 g = nvgpu_alloc(size, false);
1590 if (!g) 1601 if (!g)
1591 return; 1602 return;
1592 1603
1593 if (copy_from_user(g, 1604 if (copy_from_user(g, user_gpfifo, size)) {
1594 (void __user *)(uintptr_t)args->gpfifo, size)) {
1595 nvgpu_free(g); 1605 nvgpu_free(g);
1596 return; 1606 return;
1597 } 1607 }
@@ -1984,6 +1994,116 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1984 gk20a_channel_put(c); 1994 gk20a_channel_put(c);
1985} 1995}
1986 1996
1997static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
1998 struct priv_cmd_entry *cmd)
1999{
2000 struct gk20a *g = c->g;
2001 struct mem_desc *gpfifo_mem = &c->gpfifo.mem;
2002 struct nvgpu_gpfifo x = {
2003 .entry0 = u64_lo32(cmd->gva),
2004 .entry1 = u64_hi32(cmd->gva) |
2005 pbdma_gp_entry1_length_f(cmd->size)
2006 };
2007
2008 gk20a_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
2009 &x, sizeof(x));
2010
2011 if (cmd->mem->aperture == APERTURE_SYSMEM)
2012 trace_gk20a_push_cmdbuf(dev_name(g->dev), 0, cmd->size, 0,
2013 cmd->mem->cpu_va + cmd->off * sizeof(u32));
2014
2015 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
2016}
2017
2018/*
2019 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
2020 * splitting into two memcpys to handle wrap-around.
2021 */
2022static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
2023 struct nvgpu_gpfifo *kern_gpfifo,
2024 struct nvgpu_gpfifo __user *user_gpfifo,
2025 u32 num_entries)
2026{
2027 /* byte offsets */
2028 u32 gpfifo_size = c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo);
2029 u32 len = num_entries * sizeof(struct nvgpu_gpfifo);
2030 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo);
2031 u32 end = start + len; /* exclusive */
2032 struct mem_desc *gpfifo_mem = &c->gpfifo.mem;
2033 struct nvgpu_gpfifo *cpu_src;
2034 int err;
2035
2036 if (user_gpfifo && !c->gpfifo.pipe) {
2037 /*
2038 * This path (from userspace to sysmem) is special in order to
2039 * avoid two copies unnecessarily (from user to pipe, then from
2040 * pipe to gpu sysmem buffer).
2041 *
2042 * As a special case, the pipe buffer exists if PRAMIN writes
2043 * are forced, although the buffers may not be in vidmem in
2044 * that case.
2045 */
2046 if (end > gpfifo_size) {
2047 /* wrap-around */
2048 int length0 = gpfifo_size - start;
2049 int length1 = len - length0;
2050 void *user2 = (u8*)user_gpfifo + length0;
2051
2052 err = copy_from_user(gpfifo_mem->cpu_va + start,
2053 user_gpfifo, length0);
2054 if (err)
2055 return err;
2056
2057 err = copy_from_user(gpfifo_mem->cpu_va,
2058 user2, length1);
2059 if (err)
2060 return err;
2061 } else {
2062 err = copy_from_user(gpfifo_mem->cpu_va + start,
2063 user_gpfifo, len);
2064 if (err)
2065 return err;
2066 }
2067
2068 trace_write_pushbuffer_range(c, NULL, user_gpfifo,
2069 0, num_entries);
2070 goto out;
2071 } else if (user_gpfifo) {
2072 /* from userspace to vidmem or sysmem when pramin forced, use
2073 * the common copy path below */
2074 err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
2075 if (err)
2076 return err;
2077
2078 cpu_src = c->gpfifo.pipe;
2079 } else {
2080 /* from kernel to either sysmem or vidmem, don't need
2081 * copy_from_user so use the common path below */
2082 cpu_src = kern_gpfifo;
2083 }
2084
2085 if (end > gpfifo_size) {
2086 /* wrap-around */
2087 int length0 = gpfifo_size - start;
2088 int length1 = len - length0;
2089 void *src2 = (u8 *)cpu_src + length0;
2090
2091 gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
2092 gk20a_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
2093 } else {
2094 gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
2095
2096 }
2097
2098 trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
2099
2100out:
2101 c->gpfifo.put = (c->gpfifo.put + num_entries) &
2102 (c->gpfifo.entry_num - 1);
2103
2104 return 0;
2105}
2106
1987int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, 2107int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1988 struct nvgpu_gpfifo *gpfifo, 2108 struct nvgpu_gpfifo *gpfifo,
1989 struct nvgpu_submit_gpfifo_args *args, 2109 struct nvgpu_submit_gpfifo_args *args,
@@ -1996,7 +2116,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1996 struct gk20a *g = c->g; 2116 struct gk20a *g = c->g;
1997 struct device *d = dev_from_gk20a(g); 2117 struct device *d = dev_from_gk20a(g);
1998 int err = 0; 2118 int err = 0;
1999 int start, end;
2000 int wait_fence_fd = -1; 2119 int wait_fence_fd = -1;
2001 struct priv_cmd_entry *wait_cmd = NULL; 2120 struct priv_cmd_entry *wait_cmd = NULL;
2002 struct priv_cmd_entry *incr_cmd = NULL; 2121 struct priv_cmd_entry *incr_cmd = NULL;
@@ -2006,11 +2125,12 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2006 * and one for post fence. */ 2125 * and one for post fence. */
2007 const int extra_entries = 2; 2126 const int extra_entries = 2;
2008 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); 2127 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
2009 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
2010 bool skip_buffer_refcounting = (flags & 2128 bool skip_buffer_refcounting = (flags &
2011 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); 2129 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
2012 bool need_sync_fence = false; 2130 bool need_sync_fence = false;
2013 bool new_sync_created = false; 2131 bool new_sync_created = false;
2132 struct nvgpu_gpfifo __user *user_gpfifo = args ?
2133 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : 0;
2014 2134
2015 /* 2135 /*
2016 * If user wants to allocate sync_fence_fd always, then respect that; 2136 * If user wants to allocate sync_fence_fd always, then respect that;
@@ -2157,102 +2277,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2157 goto clean_up; 2277 goto clean_up;
2158 } 2278 }
2159 2279
2160 if (wait_cmd) { 2280 if (wait_cmd)
2161 gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(wait_cmd->gva); 2281 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
2162 gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) |
2163 pbdma_gp_entry1_length_f(wait_cmd->size);
2164 trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
2165 0, wait_cmd->size, 0,
2166 wait_cmd->mem->cpu_va + wait_cmd->off *
2167 sizeof(u32));
2168
2169 c->gpfifo.put = (c->gpfifo.put + 1) &
2170 (c->gpfifo.entry_num - 1);
2171 }
2172
2173 /*
2174 * Copy source gpfifo entries into the gpfifo ring buffer,
2175 * potentially splitting into two memcpies to handle the
2176 * ring buffer wrap-around case.
2177 */
2178 start = c->gpfifo.put;
2179 end = start + num_entries;
2180
2181 if (gpfifo) {
2182 if (end > c->gpfifo.entry_num) {
2183 int length0 = c->gpfifo.entry_num - start;
2184 int length1 = num_entries - length0;
2185
2186 memcpy(gpfifo_mem + start, gpfifo,
2187 length0 * sizeof(*gpfifo));
2188
2189 memcpy(gpfifo_mem, gpfifo + length0,
2190 length1 * sizeof(*gpfifo));
2191
2192 trace_write_pushbuffer_range(c, gpfifo, NULL,
2193 0, length0);
2194 trace_write_pushbuffer_range(c, gpfifo, NULL,
2195 length0, length1);
2196 } else {
2197 memcpy(gpfifo_mem + start, gpfifo,
2198 num_entries * sizeof(*gpfifo));
2199
2200 trace_write_pushbuffer_range(c, gpfifo, NULL,
2201 0, num_entries);
2202 }
2203 } else {
2204 struct nvgpu_gpfifo __user *user_gpfifo =
2205 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
2206 if (end > c->gpfifo.entry_num) {
2207 int length0 = c->gpfifo.entry_num - start;
2208 int length1 = num_entries - length0;
2209
2210 err = copy_from_user(gpfifo_mem + start,
2211 user_gpfifo,
2212 length0 * sizeof(*user_gpfifo));
2213 if (err) {
2214 goto clean_up;
2215 }
2216 2282
2217 err = copy_from_user(gpfifo_mem, 2283 if (gpfifo || user_gpfifo)
2218 user_gpfifo + length0, 2284 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
2219 length1 * sizeof(*user_gpfifo)); 2285 num_entries);
2220 if (err) { 2286 if (err)
2221 goto clean_up; 2287 goto clean_up;
2222 }
2223
2224 trace_write_pushbuffer_range(c, NULL, args,
2225 0, length0);
2226 trace_write_pushbuffer_range(c, NULL, args,
2227 length0, length1);
2228 } else {
2229 err = copy_from_user(gpfifo_mem + start,
2230 user_gpfifo,
2231 num_entries * sizeof(*user_gpfifo));
2232 if (err) {
2233 goto clean_up;
2234 }
2235
2236 trace_write_pushbuffer_range(c, NULL, args,
2237 0, num_entries);
2238 }
2239 }
2240
2241 c->gpfifo.put = (c->gpfifo.put + num_entries) &
2242 (c->gpfifo.entry_num - 1);
2243
2244 if (incr_cmd) {
2245 gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(incr_cmd->gva);
2246 gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) |
2247 pbdma_gp_entry1_length_f(incr_cmd->size);
2248 trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
2249 0, incr_cmd->size, 0,
2250 incr_cmd->mem->cpu_va + incr_cmd->off *
2251 sizeof(u32));
2252 2288
2253 c->gpfifo.put = (c->gpfifo.put + 1) & 2289 if (incr_cmd)
2254 (c->gpfifo.entry_num - 1); 2290 gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
2255 }
2256 2291
2257 mutex_lock(&c->last_submit.fence_lock); 2292 mutex_lock(&c->last_submit.fence_lock);
2258 gk20a_fence_put(c->last_submit.pre_fence); 2293 gk20a_fence_put(c->last_submit.pre_fence);
@@ -2892,7 +2927,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2892{ 2927{
2893 struct gk20a_fence *fence_out; 2928 struct gk20a_fence *fence_out;
2894 int ret = 0; 2929 int ret = 0;
2895
2896 gk20a_dbg_fn(""); 2930 gk20a_dbg_fn("");
2897 2931
2898 if (ch->has_timedout) 2932 if (ch->has_timedout)