summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c242
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h4
2 files changed, 142 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index fc1edd99..cc097ae4 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -918,7 +918,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
918 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); 918 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
919 919
920 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); 920 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
921 921 nvgpu_free(ch->gpfifo.pipe);
922 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 922 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
923 923
924#if defined(CONFIG_GK20A_CYCLE_STATS) 924#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1430,7 +1430,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1430 c->ramfc.offset = 0; 1430 c->ramfc.offset = 0;
1431 c->ramfc.size = ram_in_ramfc_s() / 8; 1431 c->ramfc.size = ram_in_ramfc_s() / 8;
1432 1432
1433 if (c->gpfifo.mem.cpu_va) { 1433 if (c->gpfifo.mem.size) {
1434 gk20a_err(d, "channel %d :" 1434 gk20a_err(d, "channel %d :"
1435 "gpfifo already allocated", c->hw_chid); 1435 "gpfifo already allocated", c->hw_chid);
1436 return -EEXIST; 1436 return -EEXIST;
@@ -1444,6 +1444,16 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1444 goto clean_up; 1444 goto clean_up;
1445 } 1445 }
1446 1446
1447 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1448 c->gpfifo.pipe = nvgpu_alloc(
1449 gpfifo_size * sizeof(struct nvgpu_gpfifo),
1450 false);
1451 if (!c->gpfifo.pipe) {
1452 err = -ENOMEM;
1453 goto clean_up_unmap;
1454 }
1455 }
1456
1447 c->gpfifo.entry_num = gpfifo_size; 1457 c->gpfifo.entry_num = gpfifo_size;
1448 c->gpfifo.get = c->gpfifo.put = 0; 1458 c->gpfifo.get = c->gpfifo.put = 0;
1449 1459
@@ -1473,6 +1483,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1473 return 0; 1483 return 0;
1474 1484
1475clean_up_unmap: 1485clean_up_unmap:
1486 kfree(c->gpfifo.pipe);
1476 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1487 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1477clean_up: 1488clean_up:
1478 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1489 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -1568,7 +1579,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c,
1568 1579
1569static void trace_write_pushbuffer_range(struct channel_gk20a *c, 1580static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1570 struct nvgpu_gpfifo *g, 1581 struct nvgpu_gpfifo *g,
1571 struct nvgpu_submit_gpfifo_args *args, 1582 struct nvgpu_gpfifo __user *user_gpfifo,
1572 int offset, 1583 int offset,
1573 int count) 1584 int count)
1574{ 1585{
@@ -1580,18 +1591,17 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
1580 if (!gk20a_debug_trace_cmdbuf) 1591 if (!gk20a_debug_trace_cmdbuf)
1581 return; 1592 return;
1582 1593
1583 if (!g && !args) 1594 if (!g && !user_gpfifo)
1584 return; 1595 return;
1585 1596
1586 if (!g) { 1597 if (!g) {
1587 size = args->num_entries * sizeof(struct nvgpu_gpfifo); 1598 size = count * sizeof(struct nvgpu_gpfifo);
1588 if (size) { 1599 if (size) {
1589 g = nvgpu_alloc(size, false); 1600 g = nvgpu_alloc(size, false);
1590 if (!g) 1601 if (!g)
1591 return; 1602 return;
1592 1603
1593 if (copy_from_user(g, 1604 if (copy_from_user(g, user_gpfifo, size)) {
1594 (void __user *)(uintptr_t)args->gpfifo, size)) {
1595 nvgpu_free(g); 1605 nvgpu_free(g);
1596 return; 1606 return;
1597 } 1607 }
@@ -1984,6 +1994,116 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1984 gk20a_channel_put(c); 1994 gk20a_channel_put(c);
1985} 1995}
1986 1996
1997static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
1998 struct priv_cmd_entry *cmd)
1999{
2000 struct gk20a *g = c->g;
2001 struct mem_desc *gpfifo_mem = &c->gpfifo.mem;
2002 struct nvgpu_gpfifo x = {
2003 .entry0 = u64_lo32(cmd->gva),
2004 .entry1 = u64_hi32(cmd->gva) |
2005 pbdma_gp_entry1_length_f(cmd->size)
2006 };
2007
2008 gk20a_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
2009 &x, sizeof(x));
2010
2011 if (cmd->mem->aperture == APERTURE_SYSMEM)
2012 trace_gk20a_push_cmdbuf(dev_name(g->dev), 0, cmd->size, 0,
2013 cmd->mem->cpu_va + cmd->off * sizeof(u32));
2014
2015 c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
2016}
2017
2018/*
2019 * Copy source gpfifo entries into the gpfifo ring buffer, potentially
2020 * splitting into two memcpys to handle wrap-around.
2021 */
2022static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
2023 struct nvgpu_gpfifo *kern_gpfifo,
2024 struct nvgpu_gpfifo __user *user_gpfifo,
2025 u32 num_entries)
2026{
2027 /* byte offsets */
2028 u32 gpfifo_size = c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo);
2029 u32 len = num_entries * sizeof(struct nvgpu_gpfifo);
2030 u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo);
2031 u32 end = start + len; /* exclusive */
2032 struct mem_desc *gpfifo_mem = &c->gpfifo.mem;
2033 struct nvgpu_gpfifo *cpu_src;
2034 int err;
2035
2036 if (user_gpfifo && !c->gpfifo.pipe) {
2037 /*
2038 * This path (from userspace to sysmem) is special in order to
2039 * avoid two copies unnecessarily (from user to pipe, then from
2040 * pipe to gpu sysmem buffer).
2041 *
2042 * As a special case, the pipe buffer exists if PRAMIN writes
2043 * are forced, although the buffers may not be in vidmem in
2044 * that case.
2045 */
2046 if (end > gpfifo_size) {
2047 /* wrap-around */
2048 int length0 = gpfifo_size - start;
2049 int length1 = len - length0;
2050 void *user2 = (u8*)user_gpfifo + length0;
2051
2052 err = copy_from_user(gpfifo_mem->cpu_va + start,
2053 user_gpfifo, length0);
2054 if (err)
2055 return err;
2056
2057 err = copy_from_user(gpfifo_mem->cpu_va,
2058 user2, length1);
2059 if (err)
2060 return err;
2061 } else {
2062 err = copy_from_user(gpfifo_mem->cpu_va + start,
2063 user_gpfifo, len);
2064 if (err)
2065 return err;
2066 }
2067
2068 trace_write_pushbuffer_range(c, NULL, user_gpfifo,
2069 0, num_entries);
2070 goto out;
2071 } else if (user_gpfifo) {
2072 /* from userspace to vidmem or sysmem when pramin forced, use
2073 * the common copy path below */
2074 err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
2075 if (err)
2076 return err;
2077
2078 cpu_src = c->gpfifo.pipe;
2079 } else {
2080 /* from kernel to either sysmem or vidmem, don't need
2081 * copy_from_user so use the common path below */
2082 cpu_src = kern_gpfifo;
2083 }
2084
2085 if (end > gpfifo_size) {
2086 /* wrap-around */
2087 int length0 = gpfifo_size - start;
2088 int length1 = len - length0;
2089 void *src2 = (u8 *)cpu_src + length0;
2090
2091 gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
2092 gk20a_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
2093 } else {
2094 gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
2095
2096 }
2097
2098 trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
2099
2100out:
2101 c->gpfifo.put = (c->gpfifo.put + num_entries) &
2102 (c->gpfifo.entry_num - 1);
2103
2104 return 0;
2105}
2106
1987int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, 2107int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1988 struct nvgpu_gpfifo *gpfifo, 2108 struct nvgpu_gpfifo *gpfifo,
1989 struct nvgpu_submit_gpfifo_args *args, 2109 struct nvgpu_submit_gpfifo_args *args,
@@ -1996,7 +2116,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1996 struct gk20a *g = c->g; 2116 struct gk20a *g = c->g;
1997 struct device *d = dev_from_gk20a(g); 2117 struct device *d = dev_from_gk20a(g);
1998 int err = 0; 2118 int err = 0;
1999 int start, end;
2000 int wait_fence_fd = -1; 2119 int wait_fence_fd = -1;
2001 struct priv_cmd_entry *wait_cmd = NULL; 2120 struct priv_cmd_entry *wait_cmd = NULL;
2002 struct priv_cmd_entry *incr_cmd = NULL; 2121 struct priv_cmd_entry *incr_cmd = NULL;
@@ -2006,11 +2125,12 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2006 * and one for post fence. */ 2125 * and one for post fence. */
2007 const int extra_entries = 2; 2126 const int extra_entries = 2;
2008 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); 2127 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
2009 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
2010 bool skip_buffer_refcounting = (flags & 2128 bool skip_buffer_refcounting = (flags &
2011 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); 2129 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
2012 bool need_sync_fence = false; 2130 bool need_sync_fence = false;
2013 bool new_sync_created = false; 2131 bool new_sync_created = false;
2132 struct nvgpu_gpfifo __user *user_gpfifo = args ?
2133 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : 0;
2014 2134
2015 /* 2135 /*
2016 * If user wants to allocate sync_fence_fd always, then respect that; 2136 * If user wants to allocate sync_fence_fd always, then respect that;
@@ -2157,102 +2277,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2157 goto clean_up; 2277 goto clean_up;
2158 } 2278 }
2159 2279
2160 if (wait_cmd) { 2280 if (wait_cmd)
2161 gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(wait_cmd->gva); 2281 gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
2162 gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) |
2163 pbdma_gp_entry1_length_f(wait_cmd->size);
2164 trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
2165 0, wait_cmd->size, 0,
2166 wait_cmd->mem->cpu_va + wait_cmd->off *
2167 sizeof(u32));
2168
2169 c->gpfifo.put = (c->gpfifo.put + 1) &
2170 (c->gpfifo.entry_num - 1);
2171 }
2172
2173 /*
2174 * Copy source gpfifo entries into the gpfifo ring buffer,
2175 * potentially splitting into two memcpies to handle the
2176 * ring buffer wrap-around case.
2177 */
2178 start = c->gpfifo.put;
2179 end = start + num_entries;
2180
2181 if (gpfifo) {
2182 if (end > c->gpfifo.entry_num) {
2183 int length0 = c->gpfifo.entry_num - start;
2184 int length1 = num_entries - length0;
2185
2186 memcpy(gpfifo_mem + start, gpfifo,
2187 length0 * sizeof(*gpfifo));
2188
2189 memcpy(gpfifo_mem, gpfifo + length0,
2190 length1 * sizeof(*gpfifo));
2191
2192 trace_write_pushbuffer_range(c, gpfifo, NULL,
2193 0, length0);
2194 trace_write_pushbuffer_range(c, gpfifo, NULL,
2195 length0, length1);
2196 } else {
2197 memcpy(gpfifo_mem + start, gpfifo,
2198 num_entries * sizeof(*gpfifo));
2199
2200 trace_write_pushbuffer_range(c, gpfifo, NULL,
2201 0, num_entries);
2202 }
2203 } else {
2204 struct nvgpu_gpfifo __user *user_gpfifo =
2205 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo;
2206 if (end > c->gpfifo.entry_num) {
2207 int length0 = c->gpfifo.entry_num - start;
2208 int length1 = num_entries - length0;
2209
2210 err = copy_from_user(gpfifo_mem + start,
2211 user_gpfifo,
2212 length0 * sizeof(*user_gpfifo));
2213 if (err) {
2214 goto clean_up;
2215 }
2216 2282
2217 err = copy_from_user(gpfifo_mem, 2283 if (gpfifo || user_gpfifo)
2218 user_gpfifo + length0, 2284 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
2219 length1 * sizeof(*user_gpfifo)); 2285 num_entries);
2220 if (err) { 2286 if (err)
2221 goto clean_up; 2287 goto clean_up;
2222 }
2223
2224 trace_write_pushbuffer_range(c, NULL, args,
2225 0, length0);
2226 trace_write_pushbuffer_range(c, NULL, args,
2227 length0, length1);
2228 } else {
2229 err = copy_from_user(gpfifo_mem + start,
2230 user_gpfifo,
2231 num_entries * sizeof(*user_gpfifo));
2232 if (err) {
2233 goto clean_up;
2234 }
2235
2236 trace_write_pushbuffer_range(c, NULL, args,
2237 0, num_entries);
2238 }
2239 }
2240
2241 c->gpfifo.put = (c->gpfifo.put + num_entries) &
2242 (c->gpfifo.entry_num - 1);
2243
2244 if (incr_cmd) {
2245 gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(incr_cmd->gva);
2246 gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) |
2247 pbdma_gp_entry1_length_f(incr_cmd->size);
2248 trace_gk20a_push_cmdbuf(dev_name(c->g->dev),
2249 0, incr_cmd->size, 0,
2250 incr_cmd->mem->cpu_va + incr_cmd->off *
2251 sizeof(u32));
2252 2288
2253 c->gpfifo.put = (c->gpfifo.put + 1) & 2289 if (incr_cmd)
2254 (c->gpfifo.entry_num - 1); 2290 gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
2255 }
2256 2291
2257 mutex_lock(&c->last_submit.fence_lock); 2292 mutex_lock(&c->last_submit.fence_lock);
2258 gk20a_fence_put(c->last_submit.pre_fence); 2293 gk20a_fence_put(c->last_submit.pre_fence);
@@ -2892,7 +2927,6 @@ static int gk20a_ioctl_channel_submit_gpfifo(
2892{ 2927{
2893 struct gk20a_fence *fence_out; 2928 struct gk20a_fence *fence_out;
2894 int ret = 0; 2929 int ret = 0;
2895
2896 gk20a_dbg_fn(""); 2930 gk20a_dbg_fn("");
2897 2931
2898 if (ch->has_timedout) 2932 if (ch->has_timedout)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index f9c5477e..db74a5ca 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -69,6 +69,10 @@ struct gpfifo_desc {
69 u32 put; 69 u32 put;
70 70
71 bool wrap; 71 bool wrap;
72
73 /* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy
74 * from userspace to pipe and then from pipe to gpu buffer */
75 void *pipe;
72}; 76};
73 77
74struct patch_desc { 78struct patch_desc {