diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 242 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 |
2 files changed, 142 insertions, 104 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index fc1edd99..cc097ae4 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -918,7 +918,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch) | |||
918 | memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); | 918 | memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); |
919 | 919 | ||
920 | gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); | 920 | gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); |
921 | 921 | nvgpu_free(ch->gpfifo.pipe); | |
922 | memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); | 922 | memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); |
923 | 923 | ||
924 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 924 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
@@ -1430,7 +1430,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1430 | c->ramfc.offset = 0; | 1430 | c->ramfc.offset = 0; |
1431 | c->ramfc.size = ram_in_ramfc_s() / 8; | 1431 | c->ramfc.size = ram_in_ramfc_s() / 8; |
1432 | 1432 | ||
1433 | if (c->gpfifo.mem.cpu_va) { | 1433 | if (c->gpfifo.mem.size) { |
1434 | gk20a_err(d, "channel %d :" | 1434 | gk20a_err(d, "channel %d :" |
1435 | "gpfifo already allocated", c->hw_chid); | 1435 | "gpfifo already allocated", c->hw_chid); |
1436 | return -EEXIST; | 1436 | return -EEXIST; |
@@ -1444,6 +1444,16 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1444 | goto clean_up; | 1444 | goto clean_up; |
1445 | } | 1445 | } |
1446 | 1446 | ||
1447 | if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
1448 | c->gpfifo.pipe = nvgpu_alloc( | ||
1449 | gpfifo_size * sizeof(struct nvgpu_gpfifo), | ||
1450 | false); | ||
1451 | if (!c->gpfifo.pipe) { | ||
1452 | err = -ENOMEM; | ||
1453 | goto clean_up_unmap; | ||
1454 | } | ||
1455 | } | ||
1456 | |||
1447 | c->gpfifo.entry_num = gpfifo_size; | 1457 | c->gpfifo.entry_num = gpfifo_size; |
1448 | c->gpfifo.get = c->gpfifo.put = 0; | 1458 | c->gpfifo.get = c->gpfifo.put = 0; |
1449 | 1459 | ||
@@ -1473,6 +1483,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1473 | return 0; | 1483 | return 0; |
1474 | 1484 | ||
1475 | clean_up_unmap: | 1485 | clean_up_unmap: |
1486 | kfree(c->gpfifo.pipe); | ||
1476 | gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); | 1487 | gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); |
1477 | clean_up: | 1488 | clean_up: |
1478 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); | 1489 | memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); |
@@ -1568,7 +1579,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, | |||
1568 | 1579 | ||
1569 | static void trace_write_pushbuffer_range(struct channel_gk20a *c, | 1580 | static void trace_write_pushbuffer_range(struct channel_gk20a *c, |
1570 | struct nvgpu_gpfifo *g, | 1581 | struct nvgpu_gpfifo *g, |
1571 | struct nvgpu_submit_gpfifo_args *args, | 1582 | struct nvgpu_gpfifo __user *user_gpfifo, |
1572 | int offset, | 1583 | int offset, |
1573 | int count) | 1584 | int count) |
1574 | { | 1585 | { |
@@ -1580,18 +1591,17 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, | |||
1580 | if (!gk20a_debug_trace_cmdbuf) | 1591 | if (!gk20a_debug_trace_cmdbuf) |
1581 | return; | 1592 | return; |
1582 | 1593 | ||
1583 | if (!g && !args) | 1594 | if (!g && !user_gpfifo) |
1584 | return; | 1595 | return; |
1585 | 1596 | ||
1586 | if (!g) { | 1597 | if (!g) { |
1587 | size = args->num_entries * sizeof(struct nvgpu_gpfifo); | 1598 | size = count * sizeof(struct nvgpu_gpfifo); |
1588 | if (size) { | 1599 | if (size) { |
1589 | g = nvgpu_alloc(size, false); | 1600 | g = nvgpu_alloc(size, false); |
1590 | if (!g) | 1601 | if (!g) |
1591 | return; | 1602 | return; |
1592 | 1603 | ||
1593 | if (copy_from_user(g, | 1604 | if (copy_from_user(g, user_gpfifo, size)) { |
1594 | (void __user *)(uintptr_t)args->gpfifo, size)) { | ||
1595 | nvgpu_free(g); | 1605 | nvgpu_free(g); |
1596 | return; | 1606 | return; |
1597 | } | 1607 | } |
@@ -1984,6 +1994,116 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
1984 | gk20a_channel_put(c); | 1994 | gk20a_channel_put(c); |
1985 | } | 1995 | } |
1986 | 1996 | ||
1997 | static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, | ||
1998 | struct priv_cmd_entry *cmd) | ||
1999 | { | ||
2000 | struct gk20a *g = c->g; | ||
2001 | struct mem_desc *gpfifo_mem = &c->gpfifo.mem; | ||
2002 | struct nvgpu_gpfifo x = { | ||
2003 | .entry0 = u64_lo32(cmd->gva), | ||
2004 | .entry1 = u64_hi32(cmd->gva) | | ||
2005 | pbdma_gp_entry1_length_f(cmd->size) | ||
2006 | }; | ||
2007 | |||
2008 | gk20a_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x), | ||
2009 | &x, sizeof(x)); | ||
2010 | |||
2011 | if (cmd->mem->aperture == APERTURE_SYSMEM) | ||
2012 | trace_gk20a_push_cmdbuf(dev_name(g->dev), 0, cmd->size, 0, | ||
2013 | cmd->mem->cpu_va + cmd->off * sizeof(u32)); | ||
2014 | |||
2015 | c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1); | ||
2016 | } | ||
2017 | |||
2018 | /* | ||
2019 | * Copy source gpfifo entries into the gpfifo ring buffer, potentially | ||
2020 | * splitting into two memcpys to handle wrap-around. | ||
2021 | */ | ||
2022 | static int gk20a_submit_append_gpfifo(struct channel_gk20a *c, | ||
2023 | struct nvgpu_gpfifo *kern_gpfifo, | ||
2024 | struct nvgpu_gpfifo __user *user_gpfifo, | ||
2025 | u32 num_entries) | ||
2026 | { | ||
2027 | /* byte offsets */ | ||
2028 | u32 gpfifo_size = c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo); | ||
2029 | u32 len = num_entries * sizeof(struct nvgpu_gpfifo); | ||
2030 | u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo); | ||
2031 | u32 end = start + len; /* exclusive */ | ||
2032 | struct mem_desc *gpfifo_mem = &c->gpfifo.mem; | ||
2033 | struct nvgpu_gpfifo *cpu_src; | ||
2034 | int err; | ||
2035 | |||
2036 | if (user_gpfifo && !c->gpfifo.pipe) { | ||
2037 | /* | ||
2038 | * This path (from userspace to sysmem) is special in order to | ||
2039 | * avoid two copies unnecessarily (from user to pipe, then from | ||
2040 | * pipe to gpu sysmem buffer). | ||
2041 | * | ||
2042 | * As a special case, the pipe buffer exists if PRAMIN writes | ||
2043 | * are forced, although the buffers may not be in vidmem in | ||
2044 | * that case. | ||
2045 | */ | ||
2046 | if (end > gpfifo_size) { | ||
2047 | /* wrap-around */ | ||
2048 | int length0 = gpfifo_size - start; | ||
2049 | int length1 = len - length0; | ||
2050 | void *user2 = (u8*)user_gpfifo + length0; | ||
2051 | |||
2052 | err = copy_from_user(gpfifo_mem->cpu_va + start, | ||
2053 | user_gpfifo, length0); | ||
2054 | if (err) | ||
2055 | return err; | ||
2056 | |||
2057 | err = copy_from_user(gpfifo_mem->cpu_va, | ||
2058 | user2, length1); | ||
2059 | if (err) | ||
2060 | return err; | ||
2061 | } else { | ||
2062 | err = copy_from_user(gpfifo_mem->cpu_va + start, | ||
2063 | user_gpfifo, len); | ||
2064 | if (err) | ||
2065 | return err; | ||
2066 | } | ||
2067 | |||
2068 | trace_write_pushbuffer_range(c, NULL, user_gpfifo, | ||
2069 | 0, num_entries); | ||
2070 | goto out; | ||
2071 | } else if (user_gpfifo) { | ||
2072 | /* from userspace to vidmem or sysmem when pramin forced, use | ||
2073 | * the common copy path below */ | ||
2074 | err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len); | ||
2075 | if (err) | ||
2076 | return err; | ||
2077 | |||
2078 | cpu_src = c->gpfifo.pipe; | ||
2079 | } else { | ||
2080 | /* from kernel to either sysmem or vidmem, don't need | ||
2081 | * copy_from_user so use the common path below */ | ||
2082 | cpu_src = kern_gpfifo; | ||
2083 | } | ||
2084 | |||
2085 | if (end > gpfifo_size) { | ||
2086 | /* wrap-around */ | ||
2087 | int length0 = gpfifo_size - start; | ||
2088 | int length1 = len - length0; | ||
2089 | void *src2 = (u8 *)cpu_src + length0; | ||
2090 | |||
2091 | gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0); | ||
2092 | gk20a_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1); | ||
2093 | } else { | ||
2094 | gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len); | ||
2095 | |||
2096 | } | ||
2097 | |||
2098 | trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries); | ||
2099 | |||
2100 | out: | ||
2101 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | ||
2102 | (c->gpfifo.entry_num - 1); | ||
2103 | |||
2104 | return 0; | ||
2105 | } | ||
2106 | |||
1987 | int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | 2107 | int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, |
1988 | struct nvgpu_gpfifo *gpfifo, | 2108 | struct nvgpu_gpfifo *gpfifo, |
1989 | struct nvgpu_submit_gpfifo_args *args, | 2109 | struct nvgpu_submit_gpfifo_args *args, |
@@ -1996,7 +2116,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
1996 | struct gk20a *g = c->g; | 2116 | struct gk20a *g = c->g; |
1997 | struct device *d = dev_from_gk20a(g); | 2117 | struct device *d = dev_from_gk20a(g); |
1998 | int err = 0; | 2118 | int err = 0; |
1999 | int start, end; | ||
2000 | int wait_fence_fd = -1; | 2119 | int wait_fence_fd = -1; |
2001 | struct priv_cmd_entry *wait_cmd = NULL; | 2120 | struct priv_cmd_entry *wait_cmd = NULL; |
2002 | struct priv_cmd_entry *incr_cmd = NULL; | 2121 | struct priv_cmd_entry *incr_cmd = NULL; |
@@ -2006,11 +2125,12 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2006 | * and one for post fence. */ | 2125 | * and one for post fence. */ |
2007 | const int extra_entries = 2; | 2126 | const int extra_entries = 2; |
2008 | bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); | 2127 | bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); |
2009 | struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; | ||
2010 | bool skip_buffer_refcounting = (flags & | 2128 | bool skip_buffer_refcounting = (flags & |
2011 | NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); | 2129 | NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); |
2012 | bool need_sync_fence = false; | 2130 | bool need_sync_fence = false; |
2013 | bool new_sync_created = false; | 2131 | bool new_sync_created = false; |
2132 | struct nvgpu_gpfifo __user *user_gpfifo = args ? | ||
2133 | (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : 0; | ||
2014 | 2134 | ||
2015 | /* | 2135 | /* |
2016 | * If user wants to allocate sync_fence_fd always, then respect that; | 2136 | * If user wants to allocate sync_fence_fd always, then respect that; |
@@ -2157,102 +2277,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2157 | goto clean_up; | 2277 | goto clean_up; |
2158 | } | 2278 | } |
2159 | 2279 | ||
2160 | if (wait_cmd) { | 2280 | if (wait_cmd) |
2161 | gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(wait_cmd->gva); | 2281 | gk20a_submit_append_priv_cmdbuf(c, wait_cmd); |
2162 | gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(wait_cmd->gva) | | ||
2163 | pbdma_gp_entry1_length_f(wait_cmd->size); | ||
2164 | trace_gk20a_push_cmdbuf(dev_name(c->g->dev), | ||
2165 | 0, wait_cmd->size, 0, | ||
2166 | wait_cmd->mem->cpu_va + wait_cmd->off * | ||
2167 | sizeof(u32)); | ||
2168 | |||
2169 | c->gpfifo.put = (c->gpfifo.put + 1) & | ||
2170 | (c->gpfifo.entry_num - 1); | ||
2171 | } | ||
2172 | |||
2173 | /* | ||
2174 | * Copy source gpfifo entries into the gpfifo ring buffer, | ||
2175 | * potentially splitting into two memcpies to handle the | ||
2176 | * ring buffer wrap-around case. | ||
2177 | */ | ||
2178 | start = c->gpfifo.put; | ||
2179 | end = start + num_entries; | ||
2180 | |||
2181 | if (gpfifo) { | ||
2182 | if (end > c->gpfifo.entry_num) { | ||
2183 | int length0 = c->gpfifo.entry_num - start; | ||
2184 | int length1 = num_entries - length0; | ||
2185 | |||
2186 | memcpy(gpfifo_mem + start, gpfifo, | ||
2187 | length0 * sizeof(*gpfifo)); | ||
2188 | |||
2189 | memcpy(gpfifo_mem, gpfifo + length0, | ||
2190 | length1 * sizeof(*gpfifo)); | ||
2191 | |||
2192 | trace_write_pushbuffer_range(c, gpfifo, NULL, | ||
2193 | 0, length0); | ||
2194 | trace_write_pushbuffer_range(c, gpfifo, NULL, | ||
2195 | length0, length1); | ||
2196 | } else { | ||
2197 | memcpy(gpfifo_mem + start, gpfifo, | ||
2198 | num_entries * sizeof(*gpfifo)); | ||
2199 | |||
2200 | trace_write_pushbuffer_range(c, gpfifo, NULL, | ||
2201 | 0, num_entries); | ||
2202 | } | ||
2203 | } else { | ||
2204 | struct nvgpu_gpfifo __user *user_gpfifo = | ||
2205 | (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo; | ||
2206 | if (end > c->gpfifo.entry_num) { | ||
2207 | int length0 = c->gpfifo.entry_num - start; | ||
2208 | int length1 = num_entries - length0; | ||
2209 | |||
2210 | err = copy_from_user(gpfifo_mem + start, | ||
2211 | user_gpfifo, | ||
2212 | length0 * sizeof(*user_gpfifo)); | ||
2213 | if (err) { | ||
2214 | goto clean_up; | ||
2215 | } | ||
2216 | 2282 | ||
2217 | err = copy_from_user(gpfifo_mem, | 2283 | if (gpfifo || user_gpfifo) |
2218 | user_gpfifo + length0, | 2284 | err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, |
2219 | length1 * sizeof(*user_gpfifo)); | 2285 | num_entries); |
2220 | if (err) { | 2286 | if (err) |
2221 | goto clean_up; | 2287 | goto clean_up; |
2222 | } | ||
2223 | |||
2224 | trace_write_pushbuffer_range(c, NULL, args, | ||
2225 | 0, length0); | ||
2226 | trace_write_pushbuffer_range(c, NULL, args, | ||
2227 | length0, length1); | ||
2228 | } else { | ||
2229 | err = copy_from_user(gpfifo_mem + start, | ||
2230 | user_gpfifo, | ||
2231 | num_entries * sizeof(*user_gpfifo)); | ||
2232 | if (err) { | ||
2233 | goto clean_up; | ||
2234 | } | ||
2235 | |||
2236 | trace_write_pushbuffer_range(c, NULL, args, | ||
2237 | 0, num_entries); | ||
2238 | } | ||
2239 | } | ||
2240 | |||
2241 | c->gpfifo.put = (c->gpfifo.put + num_entries) & | ||
2242 | (c->gpfifo.entry_num - 1); | ||
2243 | |||
2244 | if (incr_cmd) { | ||
2245 | gpfifo_mem[c->gpfifo.put].entry0 = u64_lo32(incr_cmd->gva); | ||
2246 | gpfifo_mem[c->gpfifo.put].entry1 = u64_hi32(incr_cmd->gva) | | ||
2247 | pbdma_gp_entry1_length_f(incr_cmd->size); | ||
2248 | trace_gk20a_push_cmdbuf(dev_name(c->g->dev), | ||
2249 | 0, incr_cmd->size, 0, | ||
2250 | incr_cmd->mem->cpu_va + incr_cmd->off * | ||
2251 | sizeof(u32)); | ||
2252 | 2288 | ||
2253 | c->gpfifo.put = (c->gpfifo.put + 1) & | 2289 | if (incr_cmd) |
2254 | (c->gpfifo.entry_num - 1); | 2290 | gk20a_submit_append_priv_cmdbuf(c, incr_cmd); |
2255 | } | ||
2256 | 2291 | ||
2257 | mutex_lock(&c->last_submit.fence_lock); | 2292 | mutex_lock(&c->last_submit.fence_lock); |
2258 | gk20a_fence_put(c->last_submit.pre_fence); | 2293 | gk20a_fence_put(c->last_submit.pre_fence); |
@@ -2892,7 +2927,6 @@ static int gk20a_ioctl_channel_submit_gpfifo( | |||
2892 | { | 2927 | { |
2893 | struct gk20a_fence *fence_out; | 2928 | struct gk20a_fence *fence_out; |
2894 | int ret = 0; | 2929 | int ret = 0; |
2895 | |||
2896 | gk20a_dbg_fn(""); | 2930 | gk20a_dbg_fn(""); |
2897 | 2931 | ||
2898 | if (ch->has_timedout) | 2932 | if (ch->has_timedout) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index f9c5477e..db74a5ca 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -69,6 +69,10 @@ struct gpfifo_desc { | |||
69 | u32 put; | 69 | u32 put; |
70 | 70 | ||
71 | bool wrap; | 71 | bool wrap; |
72 | |||
73 | /* if gpfifo lives in vidmem or is forced to go via PRAMIN, first copy | ||
74 | * from userspace to pipe and then from pipe to gpu buffer */ | ||
75 | void *pipe; | ||
72 | }; | 76 | }; |
73 | 77 | ||
74 | struct patch_desc { | 78 | struct patch_desc { |