diff options
author | Sachit Kadle <skadle@nvidia.com> | 2016-08-22 21:06:30 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-10-20 11:14:04 -0400 |
commit | 63e8592e06939e20c7b9e56b430353ebbee31ad6 (patch) | |
tree | b91247eebf886f4e987d38eb4069aceace284ecf /drivers/gpu/nvgpu/gk20a | |
parent | 3c2656c8c6ebf7cef7376d3a28451249643121c4 (diff) |
gpu: nvgpu: use inplace allocation in sync framework
This change is the first of a series of changes to
support the usage of pre-allocated job tracking resources
in the submit path. With this change, we still maintain a
dynamically-allocated joblist, but make the necessary changes
in the channel_sync & fence framework to use in-place
allocations. Specifically, we:
1) Update channel sync framework routines to take in
pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather
than dynamically allocating themselves
2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s)
to gk20a_submit_prepare_syncs
3) Modify fence framework to have seperate allocation
and init APIs. We expose allocation as a seperate API, so
the client can allocate the object before passing it into
the channel sync framework.
4) Fix clean_up logic in channel sync framework
Bug 1795076
Change-Id: I96db457683cd207fd029c31c45f548f98055e844
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1206725
(cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624)
Reviewed-on: http://git-master/r/1223933
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 156 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 168 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 88 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 15 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 |
7 files changed, 275 insertions, 175 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index f60a92b4..4019721a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -1376,16 +1376,20 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) | |||
1376 | 1376 | ||
1377 | /* allocate a cmd buffer with given size. size is number of u32 entries */ | 1377 | /* allocate a cmd buffer with given size. size is number of u32 entries */ |
1378 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | 1378 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, |
1379 | struct priv_cmd_entry **entry) | 1379 | struct priv_cmd_entry *e) |
1380 | { | 1380 | { |
1381 | struct priv_cmd_queue *q = &c->priv_cmd_q; | 1381 | struct priv_cmd_queue *q = &c->priv_cmd_q; |
1382 | struct priv_cmd_entry *e; | ||
1383 | u32 free_count; | 1382 | u32 free_count; |
1384 | u32 size = orig_size; | 1383 | u32 size = orig_size; |
1385 | 1384 | ||
1386 | gk20a_dbg_fn("size %d", orig_size); | 1385 | gk20a_dbg_fn("size %d", orig_size); |
1387 | 1386 | ||
1388 | *entry = NULL; | 1387 | if (!e) { |
1388 | gk20a_err(dev_from_gk20a(c->g), | ||
1389 | "ch %d: priv cmd entry is null", | ||
1390 | c->hw_chid); | ||
1391 | return -EINVAL; | ||
1392 | } | ||
1389 | 1393 | ||
1390 | /* if free space in the end is less than requested, increase the size | 1394 | /* if free space in the end is less than requested, increase the size |
1391 | * to make the real allocated space start from beginning. */ | 1395 | * to make the real allocated space start from beginning. */ |
@@ -1400,14 +1404,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1400 | if (size > free_count) | 1404 | if (size > free_count) |
1401 | return -EAGAIN; | 1405 | return -EAGAIN; |
1402 | 1406 | ||
1403 | e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
1404 | if (!e) { | ||
1405 | gk20a_err(dev_from_gk20a(c->g), | ||
1406 | "ch %d: fail to allocate priv cmd entry", | ||
1407 | c->hw_chid); | ||
1408 | return -ENOMEM; | ||
1409 | } | ||
1410 | |||
1411 | e->size = orig_size; | 1407 | e->size = orig_size; |
1412 | e->mem = &q->mem; | 1408 | e->mem = &q->mem; |
1413 | 1409 | ||
@@ -1426,8 +1422,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, | |||
1426 | /* we already handled q->put + size > q->size so BUG_ON this */ | 1422 | /* we already handled q->put + size > q->size so BUG_ON this */ |
1427 | BUG_ON(q->put > q->size); | 1423 | BUG_ON(q->put > q->size); |
1428 | 1424 | ||
1429 | *entry = e; | 1425 | /* commit the previous writes before making the entry valid */ |
1426 | wmb(); | ||
1430 | 1427 | ||
1428 | e->valid = true; | ||
1431 | gk20a_dbg_fn("done"); | 1429 | gk20a_dbg_fn("done"); |
1432 | 1430 | ||
1433 | return 0; | 1431 | return 0; |
@@ -1441,6 +1439,21 @@ static void free_priv_cmdbuf(struct channel_gk20a *c, | |||
1441 | kfree(e); | 1439 | kfree(e); |
1442 | } | 1440 | } |
1443 | 1441 | ||
1442 | static struct channel_gk20a_job *channel_gk20a_alloc_job( | ||
1443 | struct channel_gk20a *c) | ||
1444 | { | ||
1445 | struct channel_gk20a_job *job = NULL; | ||
1446 | |||
1447 | job = kzalloc(sizeof(*job), GFP_KERNEL); | ||
1448 | return job; | ||
1449 | } | ||
1450 | |||
1451 | static void channel_gk20a_free_job(struct channel_gk20a *c, | ||
1452 | struct channel_gk20a_job *job) | ||
1453 | { | ||
1454 | kfree(job); | ||
1455 | } | ||
1456 | |||
1444 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | 1457 | int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, |
1445 | struct nvgpu_alloc_gpfifo_args *args) | 1458 | struct nvgpu_alloc_gpfifo_args *args) |
1446 | { | 1459 | { |
@@ -1818,10 +1831,15 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) | |||
1818 | if (!e) | 1831 | if (!e) |
1819 | return 0; | 1832 | return 0; |
1820 | 1833 | ||
1821 | if ((q->get != e->off) && e->off != 0) | 1834 | if (e->valid) { |
1822 | gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); | 1835 | /* read the entry's valid flag before reading its contents */ |
1836 | rmb(); | ||
1837 | if ((q->get != e->off) && e->off != 0) | ||
1838 | gk20a_err(d, "requests out-of-order, ch=%d\n", | ||
1839 | c->hw_chid); | ||
1840 | q->get = e->off + e->size; | ||
1841 | } | ||
1823 | 1842 | ||
1824 | q->get = e->off + e->size; | ||
1825 | free_priv_cmdbuf(c, e); | 1843 | free_priv_cmdbuf(c, e); |
1826 | 1844 | ||
1827 | return 0; | 1845 | return 0; |
@@ -1854,14 +1872,10 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, | |||
1854 | } | 1872 | } |
1855 | 1873 | ||
1856 | static int gk20a_channel_add_job(struct channel_gk20a *c, | 1874 | static int gk20a_channel_add_job(struct channel_gk20a *c, |
1857 | struct gk20a_fence *pre_fence, | 1875 | struct channel_gk20a_job *job, |
1858 | struct gk20a_fence *post_fence, | ||
1859 | struct priv_cmd_entry *wait_cmd, | ||
1860 | struct priv_cmd_entry *incr_cmd, | ||
1861 | bool skip_buffer_refcounting) | 1876 | bool skip_buffer_refcounting) |
1862 | { | 1877 | { |
1863 | struct vm_gk20a *vm = c->vm; | 1878 | struct vm_gk20a *vm = c->vm; |
1864 | struct channel_gk20a_job *job = NULL; | ||
1865 | struct mapped_buffer_node **mapped_buffers = NULL; | 1879 | struct mapped_buffer_node **mapped_buffers = NULL; |
1866 | int err = 0, num_mapped_buffers = 0; | 1880 | int err = 0, num_mapped_buffers = 0; |
1867 | 1881 | ||
@@ -1875,22 +1889,12 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1875 | goto err_put_vm; | 1889 | goto err_put_vm; |
1876 | } | 1890 | } |
1877 | 1891 | ||
1878 | job = kzalloc(sizeof(*job), GFP_KERNEL); | ||
1879 | if (!job) { | ||
1880 | err = -ENOMEM; | ||
1881 | goto err_put_buffers; | ||
1882 | } | ||
1883 | |||
1884 | /* put() is done in gk20a_channel_update() when the job is done */ | 1892 | /* put() is done in gk20a_channel_update() when the job is done */ |
1885 | c = gk20a_channel_get(c); | 1893 | c = gk20a_channel_get(c); |
1886 | 1894 | ||
1887 | if (c) { | 1895 | if (c) { |
1888 | job->num_mapped_buffers = num_mapped_buffers; | 1896 | job->num_mapped_buffers = num_mapped_buffers; |
1889 | job->mapped_buffers = mapped_buffers; | 1897 | job->mapped_buffers = mapped_buffers; |
1890 | job->pre_fence = pre_fence; | ||
1891 | job->post_fence = post_fence; | ||
1892 | job->wait_cmd = wait_cmd; | ||
1893 | job->incr_cmd = incr_cmd; | ||
1894 | 1898 | ||
1895 | gk20a_channel_timeout_start(c, job); | 1899 | gk20a_channel_timeout_start(c, job); |
1896 | 1900 | ||
@@ -1899,13 +1903,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, | |||
1899 | spin_unlock(&c->jobs_lock); | 1903 | spin_unlock(&c->jobs_lock); |
1900 | } else { | 1904 | } else { |
1901 | err = -ETIMEDOUT; | 1905 | err = -ETIMEDOUT; |
1902 | goto err_free_job; | 1906 | goto err_put_buffers; |
1903 | } | 1907 | } |
1904 | 1908 | ||
1905 | return 0; | 1909 | return 0; |
1906 | 1910 | ||
1907 | err_free_job: | ||
1908 | kfree(job); | ||
1909 | err_put_buffers: | 1911 | err_put_buffers: |
1910 | gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); | 1912 | gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); |
1911 | err_put_vm: | 1913 | err_put_vm: |
@@ -2000,7 +2002,7 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
2000 | list_del_init(&job->list); | 2002 | list_del_init(&job->list); |
2001 | spin_unlock(&c->jobs_lock); | 2003 | spin_unlock(&c->jobs_lock); |
2002 | 2004 | ||
2003 | kfree(job); | 2005 | channel_gk20a_free_job(c, job); |
2004 | job_finished = 1; | 2006 | job_finished = 1; |
2005 | gk20a_idle(g->dev); | 2007 | gk20a_idle(g->dev); |
2006 | } | 2008 | } |
@@ -2143,6 +2145,7 @@ out: | |||
2143 | */ | 2145 | */ |
2144 | static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | 2146 | static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, |
2145 | struct nvgpu_fence *fence, | 2147 | struct nvgpu_fence *fence, |
2148 | struct channel_gk20a_job *job, | ||
2146 | struct priv_cmd_entry **wait_cmd, | 2149 | struct priv_cmd_entry **wait_cmd, |
2147 | struct priv_cmd_entry **incr_cmd, | 2150 | struct priv_cmd_entry **incr_cmd, |
2148 | struct gk20a_fence **pre_fence, | 2151 | struct gk20a_fence **pre_fence, |
@@ -2194,18 +2197,32 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2194 | * this condition. | 2197 | * this condition. |
2195 | */ | 2198 | */ |
2196 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { | 2199 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { |
2200 | job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry), | ||
2201 | GFP_KERNEL); | ||
2202 | job->pre_fence = gk20a_alloc_fence(c); | ||
2203 | |||
2204 | if (!job->wait_cmd || !job->pre_fence) { | ||
2205 | err = -ENOMEM; | ||
2206 | goto clean_up_pre_fence; | ||
2207 | } | ||
2208 | |||
2197 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { | 2209 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { |
2198 | wait_fence_fd = fence->id; | 2210 | wait_fence_fd = fence->id; |
2199 | err = c->sync->wait_fd(c->sync, wait_fence_fd, | 2211 | err = c->sync->wait_fd(c->sync, wait_fence_fd, |
2200 | wait_cmd, pre_fence); | 2212 | job->wait_cmd, job->pre_fence); |
2201 | } else { | 2213 | } else { |
2202 | err = c->sync->wait_syncpt(c->sync, fence->id, | 2214 | err = c->sync->wait_syncpt(c->sync, fence->id, |
2203 | fence->value, wait_cmd, | 2215 | fence->value, job->wait_cmd, |
2204 | pre_fence); | 2216 | job->pre_fence); |
2205 | } | 2217 | } |
2218 | |||
2219 | if (!err) { | ||
2220 | if (job->wait_cmd->valid) | ||
2221 | *wait_cmd = job->wait_cmd; | ||
2222 | *pre_fence = job->pre_fence; | ||
2223 | } else | ||
2224 | goto clean_up_pre_fence; | ||
2206 | } | 2225 | } |
2207 | if (err) | ||
2208 | goto fail; | ||
2209 | 2226 | ||
2210 | if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && | 2227 | if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && |
2211 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) | 2228 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) |
@@ -2216,22 +2233,41 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2216 | * is used to keep track of method completion for idle railgating. The | 2233 | * is used to keep track of method completion for idle railgating. The |
2217 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. | 2234 | * sync_pt/semaphore PB is added to the GPFIFO later on in submit. |
2218 | */ | 2235 | */ |
2236 | job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); | ||
2237 | job->post_fence = gk20a_alloc_fence(c); | ||
2238 | |||
2239 | if (!job->incr_cmd || !job->post_fence) { | ||
2240 | err = -ENOMEM; | ||
2241 | goto clean_up_post_fence; | ||
2242 | } | ||
2243 | |||
2219 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | 2244 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) |
2220 | err = c->sync->incr_user(c->sync, wait_fence_fd, incr_cmd, | 2245 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, |
2221 | post_fence, need_wfi, need_sync_fence); | 2246 | job->post_fence, need_wfi, need_sync_fence); |
2222 | else | 2247 | else |
2223 | err = c->sync->incr(c->sync, incr_cmd, | 2248 | err = c->sync->incr(c->sync, job->incr_cmd, |
2224 | post_fence, need_sync_fence); | 2249 | job->post_fence, need_sync_fence); |
2225 | if (err) | 2250 | if (!err) { |
2226 | goto fail; | 2251 | *incr_cmd = job->incr_cmd; |
2252 | *post_fence = job->post_fence; | ||
2253 | } else | ||
2254 | goto clean_up_post_fence; | ||
2227 | 2255 | ||
2228 | return 0; | 2256 | return 0; |
2229 | 2257 | ||
2258 | clean_up_post_fence: | ||
2259 | gk20a_free_priv_cmdbuf(c, job->incr_cmd); | ||
2260 | gk20a_fence_put(job->post_fence); | ||
2261 | job->incr_cmd = NULL; | ||
2262 | job->post_fence = NULL; | ||
2263 | clean_up_pre_fence: | ||
2264 | gk20a_free_priv_cmdbuf(c, job->wait_cmd); | ||
2265 | gk20a_fence_put(job->pre_fence); | ||
2266 | job->wait_cmd = NULL; | ||
2267 | job->pre_fence = NULL; | ||
2268 | *wait_cmd = NULL; | ||
2269 | *pre_fence = NULL; | ||
2230 | fail: | 2270 | fail: |
2231 | /* | ||
2232 | * Cleanup is handled by gk20a_submit_channel_gpfifo() since it is the | ||
2233 | * real owner of the objects we make here. | ||
2234 | */ | ||
2235 | return err; | 2271 | return err; |
2236 | } | 2272 | } |
2237 | 2273 | ||
@@ -2250,6 +2286,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2250 | struct priv_cmd_entry *incr_cmd = NULL; | 2286 | struct priv_cmd_entry *incr_cmd = NULL; |
2251 | struct gk20a_fence *pre_fence = NULL; | 2287 | struct gk20a_fence *pre_fence = NULL; |
2252 | struct gk20a_fence *post_fence = NULL; | 2288 | struct gk20a_fence *post_fence = NULL; |
2289 | struct channel_gk20a_job *job = NULL; | ||
2253 | /* we might need two extra gpfifo entries - one for pre fence | 2290 | /* we might need two extra gpfifo entries - one for pre fence |
2254 | * and one for post fence. */ | 2291 | * and one for post fence. */ |
2255 | const int extra_entries = 2; | 2292 | const int extra_entries = 2; |
@@ -2351,11 +2388,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2351 | } | 2388 | } |
2352 | 2389 | ||
2353 | if (need_job_tracking) { | 2390 | if (need_job_tracking) { |
2354 | err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd, | 2391 | job = channel_gk20a_alloc_job(c); |
2392 | if (!job) { | ||
2393 | err = -ENOMEM; | ||
2394 | goto clean_up; | ||
2395 | } | ||
2396 | |||
2397 | err = gk20a_submit_prepare_syncs(c, fence, job, | ||
2398 | &wait_cmd, &incr_cmd, | ||
2355 | &pre_fence, &post_fence, | 2399 | &pre_fence, &post_fence, |
2356 | force_need_sync_fence, flags); | 2400 | force_need_sync_fence, flags); |
2357 | if (err) | 2401 | if (err) |
2358 | goto clean_up; | 2402 | goto clean_up_job; |
2359 | } | 2403 | } |
2360 | 2404 | ||
2361 | if (wait_cmd) | 2405 | if (wait_cmd) |
@@ -2365,7 +2409,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2365 | err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, | 2409 | err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, |
2366 | num_entries); | 2410 | num_entries); |
2367 | if (err) | 2411 | if (err) |
2368 | goto clean_up; | 2412 | goto clean_up_job; |
2369 | 2413 | ||
2370 | /* | 2414 | /* |
2371 | * And here's where we add the incr_cmd we generated earlier. It should | 2415 | * And here's where we add the incr_cmd we generated earlier. It should |
@@ -2379,9 +2423,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2379 | 2423 | ||
2380 | if (need_job_tracking) | 2424 | if (need_job_tracking) |
2381 | /* TODO! Check for errors... */ | 2425 | /* TODO! Check for errors... */ |
2382 | gk20a_channel_add_job(c, pre_fence, post_fence, | 2426 | gk20a_channel_add_job(c, job, skip_buffer_refcounting); |
2383 | wait_cmd, incr_cmd, | ||
2384 | skip_buffer_refcounting); | ||
2385 | 2427 | ||
2386 | g->ops.fifo.userd_gp_put(g, c); | 2428 | g->ops.fifo.userd_gp_put(g, c); |
2387 | 2429 | ||
@@ -2398,10 +2440,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2398 | gk20a_dbg_fn("done"); | 2440 | gk20a_dbg_fn("done"); |
2399 | return err; | 2441 | return err; |
2400 | 2442 | ||
2443 | clean_up_job: | ||
2444 | channel_gk20a_free_job(c, job); | ||
2401 | clean_up: | 2445 | clean_up: |
2402 | gk20a_dbg_fn("fail"); | 2446 | gk20a_dbg_fn("fail"); |
2403 | free_priv_cmdbuf(c, wait_cmd); | ||
2404 | free_priv_cmdbuf(c, incr_cmd); | ||
2405 | gk20a_fence_put(pre_fence); | 2447 | gk20a_fence_put(pre_fence); |
2406 | gk20a_fence_put(post_fence); | 2448 | gk20a_fence_put(post_fence); |
2407 | if (need_job_tracking) | 2449 | if (need_job_tracking) |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index f6571b6f..0d8746b8 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -218,7 +218,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); | |||
218 | void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); | 218 | void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); |
219 | void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); | 219 | void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); |
220 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, | 220 | int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, |
221 | struct priv_cmd_entry **entry); | 221 | struct priv_cmd_entry *entry); |
222 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); | 222 | int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); |
223 | 223 | ||
224 | int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); | 224 | int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 7a71c4eb..767738ea 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -57,12 +57,11 @@ static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, | |||
57 | } | 57 | } |
58 | 58 | ||
59 | static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | 59 | static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, |
60 | u32 id, u32 thresh, struct priv_cmd_entry **entry, | 60 | u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, |
61 | struct gk20a_fence **fence) | 61 | struct gk20a_fence *fence) |
62 | { | 62 | { |
63 | struct gk20a_channel_syncpt *sp = | 63 | struct gk20a_channel_syncpt *sp = |
64 | container_of(s, struct gk20a_channel_syncpt, ops); | 64 | container_of(s, struct gk20a_channel_syncpt, ops); |
65 | struct priv_cmd_entry *wait_cmd = NULL; | ||
66 | struct channel_gk20a *c = sp->c; | 65 | struct channel_gk20a *c = sp->c; |
67 | int err = 0; | 66 | int err = 0; |
68 | 67 | ||
@@ -75,7 +74,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | |||
75 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) | 74 | if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) |
76 | return 0; | 75 | return 0; |
77 | 76 | ||
78 | err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd); | 77 | err = gk20a_channel_alloc_priv_cmdbuf(c, 4, wait_cmd); |
79 | if (err) { | 78 | if (err) { |
80 | gk20a_err(dev_from_gk20a(c->g), | 79 | gk20a_err(dev_from_gk20a(c->g), |
81 | "not enough priv cmd buffer space"); | 80 | "not enough priv cmd buffer space"); |
@@ -84,21 +83,18 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, | |||
84 | 83 | ||
85 | add_wait_cmd(c->g, wait_cmd, 0, id, thresh); | 84 | add_wait_cmd(c->g, wait_cmd, 0, id, thresh); |
86 | 85 | ||
87 | *entry = wait_cmd; | ||
88 | *fence = NULL; | ||
89 | return 0; | 86 | return 0; |
90 | } | 87 | } |
91 | 88 | ||
92 | static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | 89 | static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, |
93 | struct priv_cmd_entry **entry, | 90 | struct priv_cmd_entry *wait_cmd, |
94 | struct gk20a_fence **fence) | 91 | struct gk20a_fence *fence) |
95 | { | 92 | { |
96 | #ifdef CONFIG_SYNC | 93 | #ifdef CONFIG_SYNC |
97 | int i; | 94 | int i; |
98 | int num_wait_cmds; | 95 | int num_wait_cmds; |
99 | struct sync_fence *sync_fence; | 96 | struct sync_fence *sync_fence; |
100 | struct sync_pt *pt; | 97 | struct sync_pt *pt; |
101 | struct priv_cmd_entry *wait_cmd = NULL; | ||
102 | struct gk20a_channel_syncpt *sp = | 98 | struct gk20a_channel_syncpt *sp = |
103 | container_of(s, struct gk20a_channel_syncpt, ops); | 99 | container_of(s, struct gk20a_channel_syncpt, ops); |
104 | struct channel_gk20a *c = sp->c; | 100 | struct channel_gk20a *c = sp->c; |
@@ -134,7 +130,7 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | |||
134 | return 0; | 130 | return 0; |
135 | } | 131 | } |
136 | 132 | ||
137 | err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); | 133 | err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, wait_cmd); |
138 | if (err) { | 134 | if (err) { |
139 | gk20a_err(dev_from_gk20a(c->g), | 135 | gk20a_err(dev_from_gk20a(c->g), |
140 | "not enough priv cmd buffer space"); | 136 | "not enough priv cmd buffer space"); |
@@ -172,8 +168,6 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, | |||
172 | WARN_ON(i != num_wait_cmds); | 168 | WARN_ON(i != num_wait_cmds); |
173 | sync_fence_put(sync_fence); | 169 | sync_fence_put(sync_fence); |
174 | 170 | ||
175 | *entry = wait_cmd; | ||
176 | *fence = NULL; | ||
177 | return 0; | 171 | return 0; |
178 | #else | 172 | #else |
179 | return -ENODEV; | 173 | return -ENODEV; |
@@ -193,15 +187,14 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed) | |||
193 | static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | 187 | static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, |
194 | bool wfi_cmd, | 188 | bool wfi_cmd, |
195 | bool register_irq, | 189 | bool register_irq, |
196 | struct priv_cmd_entry **entry, | 190 | struct priv_cmd_entry *incr_cmd, |
197 | struct gk20a_fence **fence, | 191 | struct gk20a_fence *fence, |
198 | bool need_sync_fence) | 192 | bool need_sync_fence) |
199 | { | 193 | { |
200 | u32 thresh; | 194 | u32 thresh; |
201 | int incr_cmd_size; | 195 | int incr_cmd_size; |
202 | int off; | 196 | int off; |
203 | int err; | 197 | int err; |
204 | struct priv_cmd_entry *incr_cmd = NULL; | ||
205 | struct gk20a_channel_syncpt *sp = | 198 | struct gk20a_channel_syncpt *sp = |
206 | container_of(s, struct gk20a_channel_syncpt, ops); | 199 | container_of(s, struct gk20a_channel_syncpt, ops); |
207 | struct channel_gk20a *c = sp->c; | 200 | struct channel_gk20a *c = sp->c; |
@@ -210,7 +203,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
210 | if (wfi_cmd) | 203 | if (wfi_cmd) |
211 | incr_cmd_size += 2; | 204 | incr_cmd_size += 2; |
212 | 205 | ||
213 | err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); | 206 | err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); |
214 | if (err) | 207 | if (err) |
215 | return err; | 208 | return err; |
216 | 209 | ||
@@ -267,15 +260,21 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
267 | } | 260 | } |
268 | } | 261 | } |
269 | 262 | ||
270 | *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, | 263 | err = gk20a_fence_from_syncpt(fence, sp->host1x_pdev, sp->id, thresh, |
271 | wfi_cmd, need_sync_fence); | 264 | wfi_cmd, need_sync_fence); |
272 | *entry = incr_cmd; | 265 | if (err) |
266 | goto clean_up_priv_cmd; | ||
267 | |||
273 | return 0; | 268 | return 0; |
269 | |||
270 | clean_up_priv_cmd: | ||
271 | gk20a_free_priv_cmdbuf(c, incr_cmd); | ||
272 | return err; | ||
274 | } | 273 | } |
275 | 274 | ||
276 | static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, | 275 | static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, |
277 | struct priv_cmd_entry **entry, | 276 | struct priv_cmd_entry *entry, |
278 | struct gk20a_fence **fence) | 277 | struct gk20a_fence *fence) |
279 | { | 278 | { |
280 | return __gk20a_channel_syncpt_incr(s, | 279 | return __gk20a_channel_syncpt_incr(s, |
281 | true /* wfi */, | 280 | true /* wfi */, |
@@ -284,8 +283,8 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, | |||
284 | } | 283 | } |
285 | 284 | ||
286 | static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | 285 | static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, |
287 | struct priv_cmd_entry **entry, | 286 | struct priv_cmd_entry *entry, |
288 | struct gk20a_fence **fence, | 287 | struct gk20a_fence *fence, |
289 | bool need_sync_fence) | 288 | bool need_sync_fence) |
290 | { | 289 | { |
291 | /* Don't put wfi cmd to this one since we're not returning | 290 | /* Don't put wfi cmd to this one since we're not returning |
@@ -298,8 +297,8 @@ static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, | |||
298 | 297 | ||
299 | static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, | 298 | static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, |
300 | int wait_fence_fd, | 299 | int wait_fence_fd, |
301 | struct priv_cmd_entry **entry, | 300 | struct priv_cmd_entry *entry, |
302 | struct gk20a_fence **fence, | 301 | struct gk20a_fence *fence, |
303 | bool wfi, | 302 | bool wfi, |
304 | bool need_sync_fence) | 303 | bool need_sync_fence) |
305 | { | 304 | { |
@@ -500,8 +499,8 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, | |||
500 | 499 | ||
501 | static int gk20a_channel_semaphore_wait_syncpt( | 500 | static int gk20a_channel_semaphore_wait_syncpt( |
502 | struct gk20a_channel_sync *s, u32 id, | 501 | struct gk20a_channel_sync *s, u32 id, |
503 | u32 thresh, struct priv_cmd_entry **entry, | 502 | u32 thresh, struct priv_cmd_entry *entry, |
504 | struct gk20a_fence **fence) | 503 | struct gk20a_fence *fence) |
505 | { | 504 | { |
506 | struct gk20a_channel_semaphore *sema = | 505 | struct gk20a_channel_semaphore *sema = |
507 | container_of(s, struct gk20a_channel_semaphore, ops); | 506 | container_of(s, struct gk20a_channel_semaphore, ops); |
@@ -525,7 +524,7 @@ static int gk20a_channel_semaphore_wait_syncpt( | |||
525 | */ | 524 | */ |
526 | static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | 525 | static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, |
527 | struct sync_fence *fence, | 526 | struct sync_fence *fence, |
528 | struct priv_cmd_entry **wait_cmd, | 527 | struct priv_cmd_entry *wait_cmd, |
529 | struct gk20a_semaphore **fp_sema) | 528 | struct gk20a_semaphore **fp_sema) |
530 | { | 529 | { |
531 | struct gk20a_semaphore *sema; | 530 | struct gk20a_semaphore *sema; |
@@ -551,7 +550,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | |||
551 | 550 | ||
552 | gk20a_semaphore_get(sema); | 551 | gk20a_semaphore_get(sema); |
553 | BUG_ON(!atomic_read(&sema->value)); | 552 | BUG_ON(!atomic_read(&sema->value)); |
554 | add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false); | 553 | add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false); |
555 | 554 | ||
556 | /* | 555 | /* |
557 | * Make sure that gk20a_channel_semaphore_wait_fd() can create another | 556 | * Make sure that gk20a_channel_semaphore_wait_fd() can create another |
@@ -565,8 +564,8 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, | |||
565 | 564 | ||
566 | static int gk20a_channel_semaphore_wait_fd( | 565 | static int gk20a_channel_semaphore_wait_fd( |
567 | struct gk20a_channel_sync *s, int fd, | 566 | struct gk20a_channel_sync *s, int fd, |
568 | struct priv_cmd_entry **entry, | 567 | struct priv_cmd_entry *entry, |
569 | struct gk20a_fence **fence) | 568 | struct gk20a_fence *fence) |
570 | { | 569 | { |
571 | struct gk20a_channel_semaphore *sema = | 570 | struct gk20a_channel_semaphore *sema = |
572 | container_of(s, struct gk20a_channel_semaphore, ops); | 571 | container_of(s, struct gk20a_channel_semaphore, ops); |
@@ -574,7 +573,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
574 | #ifdef CONFIG_SYNC | 573 | #ifdef CONFIG_SYNC |
575 | struct gk20a_semaphore *fp_sema; | 574 | struct gk20a_semaphore *fp_sema; |
576 | struct sync_fence *sync_fence; | 575 | struct sync_fence *sync_fence; |
577 | struct priv_cmd_entry *wait_cmd = NULL; | 576 | struct priv_cmd_entry *wait_cmd = entry; |
578 | struct wait_fence_work *w = NULL; | 577 | struct wait_fence_work *w = NULL; |
579 | int err, ret, status; | 578 | int err, ret, status; |
580 | 579 | ||
@@ -582,19 +581,24 @@ static int gk20a_channel_semaphore_wait_fd( | |||
582 | if (!sync_fence) | 581 | if (!sync_fence) |
583 | return -EINVAL; | 582 | return -EINVAL; |
584 | 583 | ||
585 | ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema); | 584 | ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema); |
586 | if (ret == 0) { | 585 | if (ret == 0) { |
587 | if (fp_sema) | 586 | if (fp_sema) { |
588 | *fence = gk20a_fence_from_semaphore(sema->timeline, | 587 | err = gk20a_fence_from_semaphore(fence, |
589 | fp_sema, | 588 | sema->timeline, |
590 | &c->semaphore_wq, | 589 | fp_sema, |
591 | NULL, false, false); | 590 | &c->semaphore_wq, |
592 | else | 591 | NULL, false, false); |
592 | if (err) { | ||
593 | gk20a_semaphore_put(fp_sema); | ||
594 | goto clean_up_priv_cmd; | ||
595 | } | ||
596 | } else | ||
593 | /* | 597 | /* |
594 | * Allocate an empty fence. It will instantly return | 598 | * Init an empty fence. It will instantly return |
595 | * from gk20a_fence_wait(). | 599 | * from gk20a_fence_wait(). |
596 | */ | 600 | */ |
597 | *fence = gk20a_alloc_fence(NULL, NULL, false); | 601 | gk20a_init_fence(fence, NULL, NULL, false); |
598 | 602 | ||
599 | sync_fence_put(sync_fence); | 603 | sync_fence_put(sync_fence); |
600 | goto skip_slow_path; | 604 | goto skip_slow_path; |
@@ -611,18 +615,17 @@ static int gk20a_channel_semaphore_wait_fd( | |||
611 | goto skip_slow_path; | 615 | goto skip_slow_path; |
612 | } | 616 | } |
613 | 617 | ||
614 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); | 618 | err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); |
615 | if (err) { | 619 | if (err) { |
616 | gk20a_err(dev_from_gk20a(c->g), | 620 | gk20a_err(dev_from_gk20a(c->g), |
617 | "not enough priv cmd buffer space"); | 621 | "not enough priv cmd buffer space"); |
618 | sync_fence_put(sync_fence); | 622 | goto clean_up_sync_fence; |
619 | return -ENOMEM; | ||
620 | } | 623 | } |
621 | 624 | ||
622 | w = kzalloc(sizeof(*w), GFP_KERNEL); | 625 | w = kzalloc(sizeof(*w), GFP_KERNEL); |
623 | if (!w) { | 626 | if (!w) { |
624 | err = -ENOMEM; | 627 | err = -ENOMEM; |
625 | goto fail_free_cmdbuf; | 628 | goto clean_up_priv_cmd; |
626 | } | 629 | } |
627 | 630 | ||
628 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); | 631 | sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); |
@@ -631,7 +634,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
631 | if (!w->sema) { | 634 | if (!w->sema) { |
632 | gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); | 635 | gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); |
633 | err = -ENOMEM; | 636 | err = -ENOMEM; |
634 | goto fail_free_worker; | 637 | goto clean_up_worker; |
635 | } | 638 | } |
636 | 639 | ||
637 | /* worker takes one reference */ | 640 | /* worker takes one reference */ |
@@ -641,6 +644,16 @@ static int gk20a_channel_semaphore_wait_fd( | |||
641 | /* GPU unblocked when the semaphore value increments. */ | 644 | /* GPU unblocked when the semaphore value increments. */ |
642 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); | 645 | add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); |
643 | 646 | ||
647 | /* | ||
648 | * We need to create the fence before adding the waiter to ensure | ||
649 | * that we properly clean up in the event the sync_fence has | ||
650 | * already signaled | ||
651 | */ | ||
652 | err = gk20a_fence_from_semaphore(fence, sema->timeline, w->sema, | ||
653 | &c->semaphore_wq, NULL, false, false); | ||
654 | if (err) | ||
655 | goto clean_up_sema; | ||
656 | |||
644 | ret = sync_fence_wait_async(sync_fence, &w->waiter); | 657 | ret = sync_fence_wait_async(sync_fence, &w->waiter); |
645 | 658 | ||
646 | /* | 659 | /* |
@@ -655,24 +668,22 @@ static int gk20a_channel_semaphore_wait_fd( | |||
655 | gk20a_semaphore_put(w->sema); | 668 | gk20a_semaphore_put(w->sema); |
656 | } | 669 | } |
657 | 670 | ||
658 | /* XXX - this fixes an actual bug, we need to hold a ref to this | ||
659 | semaphore while the job is in flight. */ | ||
660 | *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, | ||
661 | &c->semaphore_wq, | ||
662 | NULL, false, false); | ||
663 | |||
664 | skip_slow_path: | 671 | skip_slow_path: |
665 | *entry = wait_cmd; | ||
666 | return 0; | 672 | return 0; |
667 | 673 | ||
668 | fail_free_worker: | 674 | clean_up_sema: |
669 | if (w && w->sema) | 675 | /* |
670 | gk20a_semaphore_put(w->sema); | 676 | * Release the refs to the semaphore, including |
677 | * the one for the worker since it will never run. | ||
678 | */ | ||
679 | gk20a_semaphore_put(w->sema); | ||
680 | gk20a_semaphore_put(w->sema); | ||
681 | clean_up_worker: | ||
671 | kfree(w); | 682 | kfree(w); |
683 | clean_up_priv_cmd: | ||
684 | gk20a_free_priv_cmdbuf(c, entry); | ||
685 | clean_up_sync_fence: | ||
672 | sync_fence_put(sync_fence); | 686 | sync_fence_put(sync_fence); |
673 | fail_free_cmdbuf: | ||
674 | if (wait_cmd) | ||
675 | gk20a_free_priv_cmdbuf(c, wait_cmd); | ||
676 | return err; | 687 | return err; |
677 | #else | 688 | #else |
678 | gk20a_err(dev_from_gk20a(c->g), | 689 | gk20a_err(dev_from_gk20a(c->g), |
@@ -684,12 +695,11 @@ fail_free_cmdbuf: | |||
684 | static int __gk20a_channel_semaphore_incr( | 695 | static int __gk20a_channel_semaphore_incr( |
685 | struct gk20a_channel_sync *s, bool wfi_cmd, | 696 | struct gk20a_channel_sync *s, bool wfi_cmd, |
686 | struct sync_fence *dependency, | 697 | struct sync_fence *dependency, |
687 | struct priv_cmd_entry **entry, | 698 | struct priv_cmd_entry *incr_cmd, |
688 | struct gk20a_fence **fence, | 699 | struct gk20a_fence *fence, |
689 | bool need_sync_fence) | 700 | bool need_sync_fence) |
690 | { | 701 | { |
691 | int incr_cmd_size; | 702 | int incr_cmd_size; |
692 | struct priv_cmd_entry *incr_cmd = NULL; | ||
693 | struct gk20a_channel_semaphore *sp = | 703 | struct gk20a_channel_semaphore *sp = |
694 | container_of(s, struct gk20a_channel_semaphore, ops); | 704 | container_of(s, struct gk20a_channel_semaphore, ops); |
695 | struct channel_gk20a *c = sp->c; | 705 | struct channel_gk20a *c = sp->c; |
@@ -704,29 +714,37 @@ static int __gk20a_channel_semaphore_incr( | |||
704 | } | 714 | } |
705 | 715 | ||
706 | incr_cmd_size = 10; | 716 | incr_cmd_size = 10; |
707 | err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); | 717 | err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); |
708 | if (err) { | 718 | if (err) { |
709 | gk20a_err(dev_from_gk20a(c->g), | 719 | gk20a_err(dev_from_gk20a(c->g), |
710 | "not enough priv cmd buffer space"); | 720 | "not enough priv cmd buffer space"); |
711 | gk20a_semaphore_put(semaphore); | 721 | goto clean_up_sema; |
712 | return err; | ||
713 | } | 722 | } |
714 | 723 | ||
715 | /* Release the completion semaphore. */ | 724 | /* Release the completion semaphore. */ |
716 | add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); | 725 | add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); |
717 | 726 | ||
718 | *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, | 727 | err = gk20a_fence_from_semaphore(fence, |
719 | &c->semaphore_wq, | 728 | sp->timeline, semaphore, |
720 | dependency, wfi_cmd, | 729 | &c->semaphore_wq, |
721 | need_sync_fence); | 730 | dependency, wfi_cmd, |
722 | *entry = incr_cmd; | 731 | need_sync_fence); |
732 | if (err) | ||
733 | goto clean_up_priv_cmd; | ||
734 | |||
723 | return 0; | 735 | return 0; |
736 | |||
737 | clean_up_priv_cmd: | ||
738 | gk20a_free_priv_cmdbuf(c, incr_cmd); | ||
739 | clean_up_sema: | ||
740 | gk20a_semaphore_put(semaphore); | ||
741 | return err; | ||
724 | } | 742 | } |
725 | 743 | ||
726 | static int gk20a_channel_semaphore_incr_wfi( | 744 | static int gk20a_channel_semaphore_incr_wfi( |
727 | struct gk20a_channel_sync *s, | 745 | struct gk20a_channel_sync *s, |
728 | struct priv_cmd_entry **entry, | 746 | struct priv_cmd_entry *entry, |
729 | struct gk20a_fence **fence) | 747 | struct gk20a_fence *fence) |
730 | { | 748 | { |
731 | return __gk20a_channel_semaphore_incr(s, | 749 | return __gk20a_channel_semaphore_incr(s, |
732 | true /* wfi */, | 750 | true /* wfi */, |
@@ -736,8 +754,8 @@ static int gk20a_channel_semaphore_incr_wfi( | |||
736 | 754 | ||
737 | static int gk20a_channel_semaphore_incr( | 755 | static int gk20a_channel_semaphore_incr( |
738 | struct gk20a_channel_sync *s, | 756 | struct gk20a_channel_sync *s, |
739 | struct priv_cmd_entry **entry, | 757 | struct priv_cmd_entry *entry, |
740 | struct gk20a_fence **fence, | 758 | struct gk20a_fence *fence, |
741 | bool need_sync_fence) | 759 | bool need_sync_fence) |
742 | { | 760 | { |
743 | /* Don't put wfi cmd to this one since we're not returning | 761 | /* Don't put wfi cmd to this one since we're not returning |
@@ -751,8 +769,8 @@ static int gk20a_channel_semaphore_incr( | |||
751 | static int gk20a_channel_semaphore_incr_user( | 769 | static int gk20a_channel_semaphore_incr_user( |
752 | struct gk20a_channel_sync *s, | 770 | struct gk20a_channel_sync *s, |
753 | int wait_fence_fd, | 771 | int wait_fence_fd, |
754 | struct priv_cmd_entry **entry, | 772 | struct priv_cmd_entry *entry, |
755 | struct gk20a_fence **fence, | 773 | struct gk20a_fence *fence, |
756 | bool wfi, | 774 | bool wfi, |
757 | bool need_sync_fence) | 775 | bool need_sync_fence) |
758 | { | 776 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 4b0918de..c3a92ad2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -36,8 +36,8 @@ struct gk20a_channel_sync { | |||
36 | * cmdbuf is executed. | 36 | * cmdbuf is executed. |
37 | */ | 37 | */ |
38 | int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, | 38 | int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, |
39 | struct priv_cmd_entry **entry, | 39 | struct priv_cmd_entry *entry, |
40 | struct gk20a_fence **fence); | 40 | struct gk20a_fence *fence); |
41 | 41 | ||
42 | /* Generate a gpu wait cmdbuf from sync fd. | 42 | /* Generate a gpu wait cmdbuf from sync fd. |
43 | * Returns | 43 | * Returns |
@@ -46,8 +46,8 @@ struct gk20a_channel_sync { | |||
46 | * cmdbuf is executed. | 46 | * cmdbuf is executed. |
47 | */ | 47 | */ |
48 | int (*wait_fd)(struct gk20a_channel_sync *s, int fd, | 48 | int (*wait_fd)(struct gk20a_channel_sync *s, int fd, |
49 | struct priv_cmd_entry **entry, | 49 | struct priv_cmd_entry *entry, |
50 | struct gk20a_fence **fence); | 50 | struct gk20a_fence *fence); |
51 | 51 | ||
52 | /* Increment syncpoint/semaphore. | 52 | /* Increment syncpoint/semaphore. |
53 | * Returns | 53 | * Returns |
@@ -55,8 +55,8 @@ struct gk20a_channel_sync { | |||
55 | * - a fence that can be passed to wait_cpu() and is_expired(). | 55 | * - a fence that can be passed to wait_cpu() and is_expired(). |
56 | */ | 56 | */ |
57 | int (*incr)(struct gk20a_channel_sync *s, | 57 | int (*incr)(struct gk20a_channel_sync *s, |
58 | struct priv_cmd_entry **entry, | 58 | struct priv_cmd_entry *entry, |
59 | struct gk20a_fence **fence, | 59 | struct gk20a_fence *fence, |
60 | bool need_sync_fence); | 60 | bool need_sync_fence); |
61 | 61 | ||
62 | /* Increment syncpoint/semaphore, preceded by a wfi. | 62 | /* Increment syncpoint/semaphore, preceded by a wfi. |
@@ -65,8 +65,8 @@ struct gk20a_channel_sync { | |||
65 | * - a fence that can be passed to wait_cpu() and is_expired(). | 65 | * - a fence that can be passed to wait_cpu() and is_expired(). |
66 | */ | 66 | */ |
67 | int (*incr_wfi)(struct gk20a_channel_sync *s, | 67 | int (*incr_wfi)(struct gk20a_channel_sync *s, |
68 | struct priv_cmd_entry **entry, | 68 | struct priv_cmd_entry *entry, |
69 | struct gk20a_fence **fence); | 69 | struct gk20a_fence *fence); |
70 | 70 | ||
71 | /* Increment syncpoint/semaphore, so that the returned fence represents | 71 | /* Increment syncpoint/semaphore, so that the returned fence represents |
72 | * work completion (may need wfi) and can be returned to user space. | 72 | * work completion (may need wfi) and can be returned to user space. |
@@ -77,8 +77,8 @@ struct gk20a_channel_sync { | |||
77 | */ | 77 | */ |
78 | int (*incr_user)(struct gk20a_channel_sync *s, | 78 | int (*incr_user)(struct gk20a_channel_sync *s, |
79 | int wait_fence_fd, | 79 | int wait_fence_fd, |
80 | struct priv_cmd_entry **entry, | 80 | struct priv_cmd_entry *entry, |
81 | struct gk20a_fence **fence, | 81 | struct gk20a_fence *fence, |
82 | bool wfi, | 82 | bool wfi, |
83 | bool need_sync_fence); | 83 | bool need_sync_fence); |
84 | 84 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 596dc549..f788829f 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f) | |||
63 | return f; | 63 | return f; |
64 | } | 64 | } |
65 | 65 | ||
66 | static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) | ||
67 | { | ||
68 | bool valid = f->valid; | ||
69 | |||
70 | rmb(); | ||
71 | return valid; | ||
72 | } | ||
73 | |||
66 | int gk20a_fence_wait(struct gk20a_fence *f, int timeout) | 74 | int gk20a_fence_wait(struct gk20a_fence *f, int timeout) |
67 | { | 75 | { |
68 | if (!tegra_platform_is_silicon()) | 76 | if (f && gk20a_fence_is_valid(f)) { |
69 | timeout = (u32)MAX_SCHEDULE_TIMEOUT; | 77 | if (!tegra_platform_is_silicon()) |
70 | return f->ops->wait(f, timeout); | 78 | timeout = (u32)MAX_SCHEDULE_TIMEOUT; |
79 | return f->ops->wait(f, timeout); | ||
80 | } | ||
81 | return 0; | ||
71 | } | 82 | } |
72 | 83 | ||
73 | bool gk20a_fence_is_expired(struct gk20a_fence *f) | 84 | bool gk20a_fence_is_expired(struct gk20a_fence *f) |
74 | { | 85 | { |
75 | if (f && f->ops) | 86 | if (f && gk20a_fence_is_valid(f) && f->ops) |
76 | return f->ops->is_expired(f); | 87 | return f->ops->is_expired(f); |
77 | else | 88 | else |
78 | return true; | 89 | return true; |
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f) | |||
83 | #ifdef CONFIG_SYNC | 94 | #ifdef CONFIG_SYNC |
84 | int fd; | 95 | int fd; |
85 | 96 | ||
86 | if (!f->sync_fence) | 97 | if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence) |
87 | return -EINVAL; | 98 | return -EINVAL; |
88 | 99 | ||
89 | fd = get_unused_fd_flags(O_RDWR); | 100 | fd = get_unused_fd_flags(O_RDWR); |
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f) | |||
98 | #endif | 109 | #endif |
99 | } | 110 | } |
100 | 111 | ||
101 | struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops, | 112 | struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) |
102 | struct sync_fence *sync_fence, bool wfi) | ||
103 | { | 113 | { |
104 | struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL); | 114 | struct gk20a_fence *fence; |
105 | if (!f) | 115 | |
116 | fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL); | ||
117 | if (!fence) | ||
106 | return NULL; | 118 | return NULL; |
107 | kref_init(&f->ref); | 119 | |
120 | kref_init(&fence->ref); | ||
121 | return fence; | ||
122 | } | ||
123 | |||
124 | void gk20a_init_fence(struct gk20a_fence *f, | ||
125 | const struct gk20a_fence_ops *ops, | ||
126 | struct sync_fence *sync_fence, bool wfi) | ||
127 | { | ||
128 | if (!f) | ||
129 | return; | ||
108 | f->ops = ops; | 130 | f->ops = ops; |
109 | f->sync_fence = sync_fence; | 131 | f->sync_fence = sync_fence; |
110 | f->wfi = wfi; | 132 | f->wfi = wfi; |
111 | f->syncpt_id = -1; | 133 | f->syncpt_id = -1; |
112 | return f; | ||
113 | } | 134 | } |
114 | 135 | ||
115 | /* Fences that are backed by GPU semaphores: */ | 136 | /* Fences that are backed by GPU semaphores: */ |
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = { | |||
143 | }; | 164 | }; |
144 | 165 | ||
145 | /* This function takes ownership of the semaphore */ | 166 | /* This function takes ownership of the semaphore */ |
146 | struct gk20a_fence *gk20a_fence_from_semaphore( | 167 | int gk20a_fence_from_semaphore( |
168 | struct gk20a_fence *fence_out, | ||
147 | struct sync_timeline *timeline, | 169 | struct sync_timeline *timeline, |
148 | struct gk20a_semaphore *semaphore, | 170 | struct gk20a_semaphore *semaphore, |
149 | wait_queue_head_t *semaphore_wq, | 171 | wait_queue_head_t *semaphore_wq, |
150 | struct sync_fence *dependency, | 172 | struct sync_fence *dependency, |
151 | bool wfi, bool need_sync_fence) | 173 | bool wfi, bool need_sync_fence) |
152 | { | 174 | { |
153 | struct gk20a_fence *f; | 175 | struct gk20a_fence *f = fence_out; |
154 | struct sync_fence *sync_fence = NULL; | 176 | struct sync_fence *sync_fence = NULL; |
155 | 177 | ||
156 | #ifdef CONFIG_SYNC | 178 | #ifdef CONFIG_SYNC |
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore( | |||
159 | dependency, "f-gk20a-0x%04x", | 181 | dependency, "f-gk20a-0x%04x", |
160 | gk20a_semaphore_gpu_ro_va(semaphore)); | 182 | gk20a_semaphore_gpu_ro_va(semaphore)); |
161 | if (!sync_fence) | 183 | if (!sync_fence) |
162 | return NULL; | 184 | return -1; |
163 | } | 185 | } |
164 | #endif | 186 | #endif |
165 | 187 | ||
166 | f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi); | 188 | gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi); |
167 | if (!f) { | 189 | if (!f) { |
168 | #ifdef CONFIG_SYNC | 190 | #ifdef CONFIG_SYNC |
169 | sync_fence_put(sync_fence); | 191 | sync_fence_put(sync_fence); |
170 | #endif | 192 | #endif |
171 | return NULL; | 193 | return -EINVAL; |
172 | } | 194 | } |
173 | 195 | ||
174 | f->semaphore = semaphore; | 196 | f->semaphore = semaphore; |
175 | f->semaphore_wq = semaphore_wq; | 197 | f->semaphore_wq = semaphore_wq; |
176 | return f; | 198 | |
199 | /* commit previous writes before setting the valid flag */ | ||
200 | wmb(); | ||
201 | f->valid = true; | ||
202 | |||
203 | return 0; | ||
177 | } | 204 | } |
178 | 205 | ||
179 | #ifdef CONFIG_TEGRA_GK20A | 206 | #ifdef CONFIG_TEGRA_GK20A |
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { | |||
197 | .is_expired = &gk20a_syncpt_fence_is_expired, | 224 | .is_expired = &gk20a_syncpt_fence_is_expired, |
198 | }; | 225 | }; |
199 | 226 | ||
200 | struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | 227 | int gk20a_fence_from_syncpt( |
201 | u32 id, u32 value, bool wfi, | 228 | struct gk20a_fence *fence_out, |
202 | bool need_sync_fence) | 229 | struct platform_device *host1x_pdev, |
230 | u32 id, u32 value, bool wfi, | ||
231 | bool need_sync_fence) | ||
203 | { | 232 | { |
204 | struct gk20a_fence *f; | 233 | struct gk20a_fence *f = fence_out; |
205 | struct sync_fence *sync_fence = NULL; | 234 | struct sync_fence *sync_fence = NULL; |
206 | 235 | ||
207 | #ifdef CONFIG_SYNC | 236 | #ifdef CONFIG_SYNC |
@@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | |||
214 | sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, | 243 | sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, |
215 | "fence"); | 244 | "fence"); |
216 | if (IS_ERR(sync_fence)) | 245 | if (IS_ERR(sync_fence)) |
217 | return NULL; | 246 | return -1; |
218 | } | 247 | } |
219 | #endif | 248 | #endif |
220 | 249 | ||
221 | f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); | 250 | gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi); |
222 | if (!f) { | 251 | if (!f) { |
223 | #ifdef CONFIG_SYNC | 252 | #ifdef CONFIG_SYNC |
224 | if (sync_fence) | 253 | if (sync_fence) |
225 | sync_fence_put(sync_fence); | 254 | sync_fence_put(sync_fence); |
226 | #endif | 255 | #endif |
227 | return NULL; | 256 | return -EINVAL; |
228 | } | 257 | } |
229 | f->host1x_pdev = host1x_pdev; | 258 | f->host1x_pdev = host1x_pdev; |
230 | f->syncpt_id = id; | 259 | f->syncpt_id = id; |
231 | f->syncpt_value = value; | 260 | f->syncpt_value = value; |
232 | return f; | 261 | |
262 | /* commit previous writes before setting the valid flag */ | ||
263 | wmb(); | ||
264 | f->valid = true; | ||
265 | |||
266 | return 0; | ||
233 | } | 267 | } |
234 | #else | 268 | #else |
235 | struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, | 269 | int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, |
236 | u32 id, u32 value, bool wfi) | 270 | u32 id, u32 value, bool wfi) |
237 | { | 271 | { |
238 | return NULL; | 272 | return -EINVAL; |
239 | } | 273 | } |
240 | #endif | 274 | #endif |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index 35488ea3..3fe2d8b2 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h | |||
@@ -31,6 +31,7 @@ struct gk20a_fence_ops; | |||
31 | 31 | ||
32 | struct gk20a_fence { | 32 | struct gk20a_fence { |
33 | /* Valid for all fence types: */ | 33 | /* Valid for all fence types: */ |
34 | bool valid; | ||
34 | struct kref ref; | 35 | struct kref ref; |
35 | bool wfi; | 36 | bool wfi; |
36 | struct sync_fence *sync_fence; | 37 | struct sync_fence *sync_fence; |
@@ -47,21 +48,25 @@ struct gk20a_fence { | |||
47 | }; | 48 | }; |
48 | 49 | ||
49 | /* Fences can be created from semaphores or syncpoint (id, value) pairs */ | 50 | /* Fences can be created from semaphores or syncpoint (id, value) pairs */ |
50 | struct gk20a_fence *gk20a_fence_from_semaphore( | 51 | int gk20a_fence_from_semaphore( |
52 | struct gk20a_fence *fence_out, | ||
51 | struct sync_timeline *timeline, | 53 | struct sync_timeline *timeline, |
52 | struct gk20a_semaphore *semaphore, | 54 | struct gk20a_semaphore *semaphore, |
53 | wait_queue_head_t *semaphore_wq, | 55 | wait_queue_head_t *semaphore_wq, |
54 | struct sync_fence *dependency, | 56 | struct sync_fence *dependency, |
55 | bool wfi, bool need_sync_fence); | 57 | bool wfi, bool need_sync_fence); |
56 | 58 | ||
57 | struct gk20a_fence *gk20a_fence_from_syncpt( | 59 | int gk20a_fence_from_syncpt( |
60 | struct gk20a_fence *fence_out, | ||
58 | struct platform_device *host1x_pdev, | 61 | struct platform_device *host1x_pdev, |
59 | u32 id, u32 value, bool wfi, | 62 | u32 id, u32 value, bool wfi, |
60 | bool need_sync_fence); | 63 | bool need_sync_fence); |
61 | 64 | ||
62 | struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops, | 65 | struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c); |
63 | struct sync_fence *sync_fence, | 66 | |
64 | bool wfi); | 67 | void gk20a_init_fence(struct gk20a_fence *f, |
68 | const struct gk20a_fence_ops *ops, | ||
69 | struct sync_fence *sync_fence, bool wfi); | ||
65 | 70 | ||
66 | /* Fence operations */ | 71 | /* Fence operations */ |
67 | void gk20a_fence_put(struct gk20a_fence *f); | 72 | void gk20a_fence_put(struct gk20a_fence *f); |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b34ff4a7..b2cca072 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -198,6 +198,7 @@ struct priv_cmd_queue { | |||
198 | }; | 198 | }; |
199 | 199 | ||
200 | struct priv_cmd_entry { | 200 | struct priv_cmd_entry { |
201 | bool valid; | ||
201 | struct mem_desc *mem; | 202 | struct mem_desc *mem; |
202 | u32 off; /* offset in mem, in u32 entries */ | 203 | u32 off; /* offset in mem, in u32 entries */ |
203 | u64 gva; | 204 | u64 gva; |