diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 156 |
1 files changed, 99 insertions, 57 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index f60a92b4..4019721a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1376,16 +1376,20 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) | |||
1376 | 1376 | ||
1377 | /* allocate a cmd buffer with given size. size is number of u32 entries */ | 1377 | /* allocate a cmd buffer with given size. size is number of u32 entries */ |
1378 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | 1378 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, |
1379 | struct priv_cmd_entry **entry) | 1379 | struct priv_cmd_entry *e) |
1380 | { | 1380 | { |
1381 | struct priv_cmd_queue *q = &c->priv_cmd_q; | 1381 | struct priv_cmd_queue *q = &c->priv_cmd_q; |
1382 | struct priv_cmd_entry *e; | ||
1383 | u32 free_count; | 1382 | u32 free_count; |
1384 | u32 size = orig_size; | 1383 | u32 size = orig_size; |
1385 | 1384 | ||
1386 | gk20a_dbg_fn("size %d", orig_size); | 1385 | gk20a_dbg_fn("size %d", orig_size); |
1387 | 1386 | ||
1388 | *entry = NULL; | 1387 | if (!e) { |
1388 | gk20a_err(dev_from_gk20a(c->g), | ||
1389 | "ch %d: priv cmd entry is null", | ||
1390 | c->hw_chid); | ||
1391 | return -EINVAL; | ||
1392 | } | ||
1389 | 1393 | ||
1390 | /* if free space in the end is less than requested, increase the size | 1394 | /* if free space in the end is less than requested, increase the size |
1391 | * to make the real allocated space start from beginning. */ | 1395 | * to make the real allocated space start from beginning. */ |
@@ -1400,14 +1404,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1400 | if (size > free_count) | 1404 | if (size > free_count) |
1401 | return -EAGAIN; | 1405 | return -EAGAIN; |
1402 | 1406 | ||
1403 | e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
1404 | if (!e) { | ||
1405 | gk20a_err(dev_from_gk20a(c->g), | ||
1406 | "ch %d: fail to allocate priv cmd entry", | ||
1407 | c->hw_chid); | ||
1408 | return -ENOMEM; | ||
1409 | } | ||
1410 | |||
1411 | e->size = orig_size; | 1407 | e->size = orig_size; |
1412 | e->mem = &q->mem; | 1408 | e->mem = &q->mem; |
1413 | 1409 | ||
@@ -1426,8 +1422,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1426 | /* we already handled q->put + size > q->size so BUG_ON this */ | 1422 | /* we already handled q->put + size > q->size so BUG_ON this */ |
1427 | BUG_ON(q->put > q->size); | 1423 | BUG_ON(q->put > q->size); |
1428 | 1424 | ||
1429 | *entry = e; | 1425 | /* commit the previous writes before making the entry valid */ |
1426 | wmb(); | ||
1430 | 1427 | ||
1428 | e->valid = true; | ||
1431 | gk20a_dbg_fn("done"); | 1429 | gk20a_dbg_fn("done"); |
1432 | 1430 | ||
1433 | return 0; | 1431 | return 0; |
@@ -1441,6 +1439,21 @@ static void free_priv_cmdbuf(struct channel_gk20a *c, | |||
1441 | kfree(e); | 1439 | kfree(e); |
1442 | } | 1440 | } |
1443 | 1441 | ||
1442 | static struct channel_gk20a_job *channel_gk20a_alloc_job( | ||
1443 | struct channel_gk20a *c) | ||
1444 | { | ||
1445 | struct channel_gk20a_job *job = NULL; | ||
1446 | |||
1447 | job = kzalloc(sizeof(*job), GFP_KERNEL); | ||
1448 | return job; | ||
1449 | } | ||
1450 | |||
1451 | static void channel_gk20a_free_job(struct channel_gk20a *c, | ||
1452 | struct channel_gk20a_job *job) | ||
1453 | { | ||
1454 | kfree(job); | ||
1455 | } | ||
1456 | |||
1444 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 1457 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
1445 | struct nvgpu_alloc_gpfifo_args *args) | 1458 | struct nvgpu_alloc_gpfifo_args *args) |
1446 | { | 1459 | { |
@@ -1818,10 +1831,15 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | |||
1818 | if (!e) | 1831 | if (!e) |
1819 | return 0; | 1832 | return 0; |
1820 | 1833 | ||
1821 | if ((q->get != e->off) && e->off != 0) | 1834 | if (e->valid) { |
1822 | gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); | 1835 | /* read the entry's valid flag before reading its contents */ |
1836 | rmb(); | ||
1837 | if ((q->get != e->off) && e->off != 0) | ||
1838 | gk20a_err(d, "requests out-of-order, ch=%d\n", | ||
1839 | c->hw_chid); | ||
1840 | q->get = e->off + e->size; | ||
1841 | } | ||
1823 | 1842 | ||
1824 | q->get = e->off + e->size; | ||
1825 | free_priv_cmdbuf(c, e); | 1843 | free_priv_cmdbuf(c, e); |
1826 | 1844 | ||
1827 | return 0; | 1845 | return 0; |
@@ -1854,14 +1872,10 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, | |||
1854 | } | 1872 | } |
1855 | 1873 | ||
1856 | static int gk20a_channel_add_job(struct channel_gk20a *c, | 1874 | static int gk20a_channel_add_job(struct channel_gk20a *c, |
1857 | struct gk20a_fence *pre_fence, | 1875 | struct channel_gk20a_job *job, |
1858 | struct gk20a_fence *post_fence, | ||
1859 | struct priv_cmd_entry *wait_cmd, | ||
1860 | struct priv_cmd_entry *incr_cmd, | ||
1861 | bool skip_buffer_refcounting) | 1876 | bool skip_buffer_refcounting) |
1862 | { | 1877 | { |
1863 | struct vm_gk20a *vm = c->vm; | 1878 | struct vm_gk20a *vm = c->vm; |
1864 | struct channel_gk20a_job *job = NULL; | ||
1865 | struct mapped_buffer_node **mapped_buffers = NULL; | 1879 | struct mapped_buffer_node **mapped_buffers = NULL; |
1866 | int err = 0, num_mapped_buffers = 0; | 1880 | int err = 0, num_mapped_buffers = 0; |
1867 | 1881 | ||
@@ -1875,22 +1889,12 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1875 | goto err_put_vm; | 1889 | goto err_put_vm; |
1876 | } | 1890 | } |
1877 | 1891 | ||
1878 | job = kzalloc(sizeof(*job), GFP_KERNEL); | ||
1879 | if (!job) { | ||
1880 | err = -ENOMEM; | ||
1881 | goto err_put_buffers; | ||
1882 | } | ||
1883 | |||
1884 | /* put() is done in gk20a_channel_update() when the job is done */ | 1892 | /* put() is done in gk20a_channel_update() when the job is done */ |
1885 | c = gk20a_channel_get(c); | 1893 | c = gk20a_channel_get(c); |
1886 | 1894 | ||
1887 | if (c) { | 1895 | if (c) { |
1888 | job->num_mapped_buffers = num_mapped_buffers; | 1896 | job->num_mapped_buffers = num_mapped_buffers; |
1889 | job->mapped_buffers = mapped_buffers; | 1897 | job->mapped_buffers = mapped_buffers; |
1890 | job->pre_fence = pre_fence; | ||
1891 | job->post_fence = post_fence; | ||
1892 | job->wait_cmd = wait_cmd; | ||
1893 | job->incr_cmd = incr_cmd; | ||
1894 | 1898 | ||
1895 | gk20a_channel_timeout_start(c, job); | 1899 | gk20a_channel_timeout_start(c, job); |
1896 | 1900 | ||
@@ -1899,13 +1903,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1899 | spin_unlock(&c->jobs_lock); | 1903 | spin_unlock(&c->jobs_lock); |
1900 | } else { | 1904 | } else { |
1901 | err = -ETIMEDOUT; | 1905 | err = -ETIMEDOUT; |
1902 | goto err_free_job; | 1906 | goto err_put_buffers; |
1903 | } | 1907 | } |
1904 | 1908 | ||
1905 | return 0; | 1909 | return 0; |
1906 | 1910 | ||
1907 | err_free_job: | ||
1908 | kfree(job); | ||
1909 | err_put_buffers: | 1911 | err_put_buffers: |
1910 | gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); | 1912 | gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); |
1911 | err_put_vm: | 1913 | err_put_vm: |
@@ -2000,7 +2002,7 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
2000 | list_del_init(&job->list); | 2002 | list_del_init(&job->list); |
2001 | spin_unlock(&c->jobs_lock); | 2003 | spin_unlock(&c->jobs_lock); |
2002 | 2004 | ||
2003 | kfree(job); | 2005 | channel_gk20a_free_job(c, job); |
2004 | job_finished = 1; | 2006 | job_finished = 1; |
2005 | gk20a_idle(g->dev); | 2007 | gk20a_idle(g->dev); |
2006 | } | 2008 | } |
@@ -2143,6 +2145,7 @@ out: | |||
2143 | */ | 2145 | */ |
2144 | static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | 2146 | static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, |
2145 | struct nvgpu_fence *fence, | 2147 | struct nvgpu_fence *fence, |
2148 | struct channel_gk20a_job *job, | ||
2146 | struct priv_cmd_entry **wait_cmd, | 2149 | struct priv_cmd_entry **wait_cmd, |
2147 | struct priv_cmd_entry **incr_cmd, | 2150 | struct priv_cmd_entry **incr_cmd, |
2148 | struct gk20a_fence **pre_fence, | 2151 | struct gk20a_fence **pre_fence, |
@@ -2194,18 +2197,32 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2194 | * this condition. | 2197 | * this condition. |
2195 | */ | 2198 | */ |
2196 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { | 2199 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { |
2200 | job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2201 | GFP_KERNEL); | ||
2202 | job->pre_fence = gk20a_alloc_fence(c); | ||
2203 | |||
2204 | if (!job->wait_cmd || !job->pre_fence) { | ||
2205 | err = -ENOMEM; | ||
2206 | goto clean_up_pre_fence; | ||
2207 | } | ||
2208 | |||
2197 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { | 2209 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { |
2198 | wait_fence_fd = fence->id; | 2210 | wait_fence_fd = fence->id; |
2199 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | 2211 | err = c->sync->wait_fd(c->sync, wait_fence_fd, |
2200 | wait_cmd, pre_fence); | 2212 | job->wait_cmd, job->pre_fence); |
2201 | } else { | 2213 | } else { |
2202 | err = c->sync->wait_syncpt(c->sync, fence->id, | 2214 | err = c->sync->wait_syncpt(c->sync, fence->id, |
2203 | fence->value, wait_cmd, | 2215 | fence->value, job->wait_cmd, |
2204 | pre_fence); | 2216 | job->pre_fence); |
2205 | } | 2217 | } |
2218 | |||
2219 | if (!err) { | ||
2220 | if (job->wait_cmd->valid) | ||
2221 | *wait_cmd = job->wait_cmd; | ||
2222 | *pre_fence = job->pre_fence; | ||
2223 | } else | ||
2224 | goto clean_up_pre_fence; | ||
2206 | } | 2225 | } |
2207 | if (err) | ||
2208 | goto fail; | ||
2209 | 2226 | ||
2210 | if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && | 2227 | if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && |
2211 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) | 2228 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) |
@@ -2216,22 +2233,41 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2216 | * is used to keep track of method completion for idle railgating. The | 2233 | * is used to keep track of method completion for idle railgating. The |
2217 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | 2234 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. |
2218 | */ | 2235 | */ |
2236 | job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
2237 | job->post_fence = gk20a_alloc_fence(c); | ||
2238 | |||
2239 | if (!job->incr_cmd || !job->post_fence) { | ||
2240 | err = -ENOMEM; | ||
2241 | goto clean_up_post_fence; | ||
2242 | } | ||
2243 | |||
2219 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | 2244 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) |
2220 | err = c->sync->incr_user(c->sync, wait_fence_fd, incr_cmd, | 2245 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, |
2221 | post_fence, need_wfi, need_sync_fence); | 2246 | job->post_fence, need_wfi, need_sync_fence); |
2222 | else | 2247 | else |
2223 | err = c->sync->incr(c->sync, incr_cmd, | 2248 | err = c->sync->incr(c->sync, job->incr_cmd, |
2224 | post_fence, need_sync_fence); | 2249 | job->post_fence, need_sync_fence); |
2225 | if (err) | 2250 | if (!err) { |
2226 | goto fail; | 2251 | *incr_cmd = job->incr_cmd; |
2252 | *post_fence = job->post_fence; | ||
2253 | } else | ||
2254 | goto clean_up_post_fence; | ||
2227 | 2255 | ||
2228 | return 0; | 2256 | return 0; |
2229 | 2257 | ||
2258 | clean_up_post_fence: | ||
2259 | gk20a_free_priv_cmdbuf(c, job->incr_cmd); | ||
2260 | gk20a_fence_put(job->post_fence); | ||
2261 | job->incr_cmd = NULL; | ||
2262 | job->post_fence = NULL; | ||
2263 | clean_up_pre_fence: | ||
2264 | gk20a_free_priv_cmdbuf(c, job->wait_cmd); | ||
2265 | gk20a_fence_put(job->pre_fence); | ||
2266 | job->wait_cmd = NULL; | ||
2267 | job->pre_fence = NULL; | ||
2268 | *wait_cmd = NULL; | ||
2269 | *pre_fence = NULL; | ||
2230 | fail: | 2270 | fail: |
2231 | /* | ||
2232 | * Cleanup is handled by gk20a_submit_channel_gpfifo() since it is the | ||
2233 | * real owner of the objects we make here. | ||
2234 | */ | ||
2235 | return err; | 2271 | return err; |
2236 | } | 2272 | } |
2237 | 2273 | ||
@@ -2250,6 +2286,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2250 | struct priv_cmd_entry *incr_cmd = NULL; | 2286 | struct priv_cmd_entry *incr_cmd = NULL; |
2251 | struct gk20a_fence *pre_fence = NULL; | 2287 | struct gk20a_fence *pre_fence = NULL; |
2252 | struct gk20a_fence *post_fence = NULL; | 2288 | struct gk20a_fence *post_fence = NULL; |
2289 | struct channel_gk20a_job *job = NULL; | ||
2253 | /* we might need two extra gpfifo entries - one for pre fence | 2290 | /* we might need two extra gpfifo entries - one for pre fence |
2254 | * and one for post fence. */ | 2291 | * and one for post fence. */ |
2255 | const int extra_entries = 2; | 2292 | const int extra_entries = 2; |
@@ -2351,11 +2388,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2351 | } | 2388 | } |
2352 | 2389 | ||
2353 | if (need_job_tracking) { | 2390 | if (need_job_tracking) { |
2354 | err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd, | 2391 | job = channel_gk20a_alloc_job(c); |
2392 | if (!job) { | ||
2393 | err = -ENOMEM; | ||
2394 | goto clean_up; | ||
2395 | } | ||
2396 | |||
2397 | err = gk20a_submit_prepare_syncs(c, fence, job, | ||
2398 | &wait_cmd, &incr_cmd, | ||
2355 | &pre_fence, &post_fence, | 2399 | &pre_fence, &post_fence, |
2356 | force_need_sync_fence, flags); | 2400 | force_need_sync_fence, flags); |
2357 | if (err) | 2401 | if (err) |
2358 | goto clean_up; | 2402 | goto clean_up_job; |
2359 | } | 2403 | } |
2360 | 2404 | ||
2361 | if (wait_cmd) | 2405 | if (wait_cmd) |
@@ -2365,7 +2409,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2365 | err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, | 2409 | err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, |
2366 | num_entries); | 2410 | num_entries); |
2367 | if (err) | 2411 | if (err) |
2368 | goto clean_up; | 2412 | goto clean_up_job; |
2369 | 2413 | ||
2370 | /* | 2414 | /* |
2371 | * And here's where we add the incr_cmd we generated earlier. It should | 2415 | * And here's where we add the incr_cmd we generated earlier. It should |
@@ -2379,9 +2423,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2379 | 2423 | ||
2380 | if (need_job_tracking) | 2424 | if (need_job_tracking) |
2381 | /* TODO! Check for errors... */ | 2425 | /* TODO! Check for errors... */ |
2382 | gk20a_channel_add_job(c, pre_fence, post_fence, | 2426 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); |
2383 | wait_cmd, incr_cmd, | ||
2384 | skip_buffer_refcounting); | ||
2385 | 2427 | ||
2386 | g->ops.fifo.userd_gp_put(g, c); | 2428 | g->ops.fifo.userd_gp_put(g, c); |
2387 | 2429 | ||
@@ -2398,10 +2440,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2398 | gk20a_dbg_fn("done"); | 2440 | gk20a_dbg_fn("done"); |
2399 | return err; | 2441 | return err; |
2400 | 2442 | ||
2443 | clean_up_job: | ||
2444 | channel_gk20a_free_job(c, job); | ||
2401 | clean_up: | 2445 | clean_up: |
2402 | gk20a_dbg_fn("fail"); | 2446 | gk20a_dbg_fn("fail"); |
2403 | free_priv_cmdbuf(c, wait_cmd); | ||
2404 | free_priv_cmdbuf(c, incr_cmd); | ||
2405 | gk20a_fence_put(pre_fence); | 2447 | gk20a_fence_put(pre_fence); |
2406 | gk20a_fence_put(post_fence); | 2448 | gk20a_fence_put(post_fence); |
2407 | if (need_job_tracking) | 2449 | if (need_job_tracking) |