summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c156
1 files changed, 99 insertions, 57 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f60a92b4..4019721a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1376,16 +1376,20 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1376 1376
1377/* allocate a cmd buffer with given size. size is number of u32 entries */ 1377/* allocate a cmd buffer with given size. size is number of u32 entries */
1378int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, 1378int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1379 struct priv_cmd_entry **entry) 1379 struct priv_cmd_entry *e)
1380{ 1380{
1381 struct priv_cmd_queue *q = &c->priv_cmd_q; 1381 struct priv_cmd_queue *q = &c->priv_cmd_q;
1382 struct priv_cmd_entry *e;
1383 u32 free_count; 1382 u32 free_count;
1384 u32 size = orig_size; 1383 u32 size = orig_size;
1385 1384
1386 gk20a_dbg_fn("size %d", orig_size); 1385 gk20a_dbg_fn("size %d", orig_size);
1387 1386
1388 *entry = NULL; 1387 if (!e) {
1388 gk20a_err(dev_from_gk20a(c->g),
1389 "ch %d: priv cmd entry is null",
1390 c->hw_chid);
1391 return -EINVAL;
1392 }
1389 1393
1390 /* if free space in the end is less than requested, increase the size 1394 /* if free space in the end is less than requested, increase the size
1391 * to make the real allocated space start from beginning. */ 1395 * to make the real allocated space start from beginning. */
@@ -1400,14 +1404,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1400 if (size > free_count) 1404 if (size > free_count)
1401 return -EAGAIN; 1405 return -EAGAIN;
1402 1406
1403 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1404 if (!e) {
1405 gk20a_err(dev_from_gk20a(c->g),
1406 "ch %d: fail to allocate priv cmd entry",
1407 c->hw_chid);
1408 return -ENOMEM;
1409 }
1410
1411 e->size = orig_size; 1407 e->size = orig_size;
1412 e->mem = &q->mem; 1408 e->mem = &q->mem;
1413 1409
@@ -1426,8 +1422,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1426 /* we already handled q->put + size > q->size so BUG_ON this */ 1422 /* we already handled q->put + size > q->size so BUG_ON this */
1427 BUG_ON(q->put > q->size); 1423 BUG_ON(q->put > q->size);
1428 1424
1429 *entry = e; 1425 /* commit the previous writes before making the entry valid */
1426 wmb();
1430 1427
1428 e->valid = true;
1431 gk20a_dbg_fn("done"); 1429 gk20a_dbg_fn("done");
1432 1430
1433 return 0; 1431 return 0;
@@ -1441,6 +1439,21 @@ static void free_priv_cmdbuf(struct channel_gk20a *c,
1441 kfree(e); 1439 kfree(e);
1442} 1440}
1443 1441
1442static struct channel_gk20a_job *channel_gk20a_alloc_job(
1443 struct channel_gk20a *c)
1444{
1445 struct channel_gk20a_job *job = NULL;
1446
1447 job = kzalloc(sizeof(*job), GFP_KERNEL);
1448 return job;
1449}
1450
1451static void channel_gk20a_free_job(struct channel_gk20a *c,
1452 struct channel_gk20a_job *job)
1453{
1454 kfree(job);
1455}
1456
1444int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 1457int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1445 struct nvgpu_alloc_gpfifo_args *args) 1458 struct nvgpu_alloc_gpfifo_args *args)
1446{ 1459{
@@ -1818,10 +1831,15 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
1818 if (!e) 1831 if (!e)
1819 return 0; 1832 return 0;
1820 1833
1821 if ((q->get != e->off) && e->off != 0) 1834 if (e->valid) {
1822 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); 1835 /* read the entry's valid flag before reading its contents */
1836 rmb();
1837 if ((q->get != e->off) && e->off != 0)
1838 gk20a_err(d, "requests out-of-order, ch=%d\n",
1839 c->hw_chid);
1840 q->get = e->off + e->size;
1841 }
1823 1842
1824 q->get = e->off + e->size;
1825 free_priv_cmdbuf(c, e); 1843 free_priv_cmdbuf(c, e);
1826 1844
1827 return 0; 1845 return 0;
@@ -1854,14 +1872,10 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
1854} 1872}
1855 1873
1856static int gk20a_channel_add_job(struct channel_gk20a *c, 1874static int gk20a_channel_add_job(struct channel_gk20a *c,
1857 struct gk20a_fence *pre_fence, 1875 struct channel_gk20a_job *job,
1858 struct gk20a_fence *post_fence,
1859 struct priv_cmd_entry *wait_cmd,
1860 struct priv_cmd_entry *incr_cmd,
1861 bool skip_buffer_refcounting) 1876 bool skip_buffer_refcounting)
1862{ 1877{
1863 struct vm_gk20a *vm = c->vm; 1878 struct vm_gk20a *vm = c->vm;
1864 struct channel_gk20a_job *job = NULL;
1865 struct mapped_buffer_node **mapped_buffers = NULL; 1879 struct mapped_buffer_node **mapped_buffers = NULL;
1866 int err = 0, num_mapped_buffers = 0; 1880 int err = 0, num_mapped_buffers = 0;
1867 1881
@@ -1875,22 +1889,12 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1875 goto err_put_vm; 1889 goto err_put_vm;
1876 } 1890 }
1877 1891
1878 job = kzalloc(sizeof(*job), GFP_KERNEL);
1879 if (!job) {
1880 err = -ENOMEM;
1881 goto err_put_buffers;
1882 }
1883
1884 /* put() is done in gk20a_channel_update() when the job is done */ 1892 /* put() is done in gk20a_channel_update() when the job is done */
1885 c = gk20a_channel_get(c); 1893 c = gk20a_channel_get(c);
1886 1894
1887 if (c) { 1895 if (c) {
1888 job->num_mapped_buffers = num_mapped_buffers; 1896 job->num_mapped_buffers = num_mapped_buffers;
1889 job->mapped_buffers = mapped_buffers; 1897 job->mapped_buffers = mapped_buffers;
1890 job->pre_fence = pre_fence;
1891 job->post_fence = post_fence;
1892 job->wait_cmd = wait_cmd;
1893 job->incr_cmd = incr_cmd;
1894 1898
1895 gk20a_channel_timeout_start(c, job); 1899 gk20a_channel_timeout_start(c, job);
1896 1900
@@ -1899,13 +1903,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1899 spin_unlock(&c->jobs_lock); 1903 spin_unlock(&c->jobs_lock);
1900 } else { 1904 } else {
1901 err = -ETIMEDOUT; 1905 err = -ETIMEDOUT;
1902 goto err_free_job; 1906 goto err_put_buffers;
1903 } 1907 }
1904 1908
1905 return 0; 1909 return 0;
1906 1910
1907err_free_job:
1908 kfree(job);
1909err_put_buffers: 1911err_put_buffers:
1910 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); 1912 gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
1911err_put_vm: 1913err_put_vm:
@@ -2000,7 +2002,7 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
2000 list_del_init(&job->list); 2002 list_del_init(&job->list);
2001 spin_unlock(&c->jobs_lock); 2003 spin_unlock(&c->jobs_lock);
2002 2004
2003 kfree(job); 2005 channel_gk20a_free_job(c, job);
2004 job_finished = 1; 2006 job_finished = 1;
2005 gk20a_idle(g->dev); 2007 gk20a_idle(g->dev);
2006 } 2008 }
@@ -2143,6 +2145,7 @@ out:
2143 */ 2145 */
2144static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, 2146static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2145 struct nvgpu_fence *fence, 2147 struct nvgpu_fence *fence,
2148 struct channel_gk20a_job *job,
2146 struct priv_cmd_entry **wait_cmd, 2149 struct priv_cmd_entry **wait_cmd,
2147 struct priv_cmd_entry **incr_cmd, 2150 struct priv_cmd_entry **incr_cmd,
2148 struct gk20a_fence **pre_fence, 2151 struct gk20a_fence **pre_fence,
@@ -2194,18 +2197,32 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2194 * this condition. 2197 * this condition.
2195 */ 2198 */
2196 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { 2199 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
2200 job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
2201 GFP_KERNEL);
2202 job->pre_fence = gk20a_alloc_fence(c);
2203
2204 if (!job->wait_cmd || !job->pre_fence) {
2205 err = -ENOMEM;
2206 goto clean_up_pre_fence;
2207 }
2208
2197 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { 2209 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
2198 wait_fence_fd = fence->id; 2210 wait_fence_fd = fence->id;
2199 err = c->sync->wait_fd(c->sync, wait_fence_fd, 2211 err = c->sync->wait_fd(c->sync, wait_fence_fd,
2200 wait_cmd, pre_fence); 2212 job->wait_cmd, job->pre_fence);
2201 } else { 2213 } else {
2202 err = c->sync->wait_syncpt(c->sync, fence->id, 2214 err = c->sync->wait_syncpt(c->sync, fence->id,
2203 fence->value, wait_cmd, 2215 fence->value, job->wait_cmd,
2204 pre_fence); 2216 job->pre_fence);
2205 } 2217 }
2218
2219 if (!err) {
2220 if (job->wait_cmd->valid)
2221 *wait_cmd = job->wait_cmd;
2222 *pre_fence = job->pre_fence;
2223 } else
2224 goto clean_up_pre_fence;
2206 } 2225 }
2207 if (err)
2208 goto fail;
2209 2226
2210 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && 2227 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
2211 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) 2228 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
@@ -2216,22 +2233,41 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2216 * is used to keep track of method completion for idle railgating. The 2233 * is used to keep track of method completion for idle railgating. The
2217 * sync_pt/semaphore PB is added to the GPFIFO later on in submit. 2234 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
2218 */ 2235 */
2236 job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
2237 job->post_fence = gk20a_alloc_fence(c);
2238
2239 if (!job->incr_cmd || !job->post_fence) {
2240 err = -ENOMEM;
2241 goto clean_up_post_fence;
2242 }
2243
2219 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 2244 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
2220 err = c->sync->incr_user(c->sync, wait_fence_fd, incr_cmd, 2245 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
2221 post_fence, need_wfi, need_sync_fence); 2246 job->post_fence, need_wfi, need_sync_fence);
2222 else 2247 else
2223 err = c->sync->incr(c->sync, incr_cmd, 2248 err = c->sync->incr(c->sync, job->incr_cmd,
2224 post_fence, need_sync_fence); 2249 job->post_fence, need_sync_fence);
2225 if (err) 2250 if (!err) {
2226 goto fail; 2251 *incr_cmd = job->incr_cmd;
2252 *post_fence = job->post_fence;
2253 } else
2254 goto clean_up_post_fence;
2227 2255
2228 return 0; 2256 return 0;
2229 2257
2258clean_up_post_fence:
2259 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
2260 gk20a_fence_put(job->post_fence);
2261 job->incr_cmd = NULL;
2262 job->post_fence = NULL;
2263clean_up_pre_fence:
2264 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
2265 gk20a_fence_put(job->pre_fence);
2266 job->wait_cmd = NULL;
2267 job->pre_fence = NULL;
2268 *wait_cmd = NULL;
2269 *pre_fence = NULL;
2230fail: 2270fail:
2231 /*
2232 * Cleanup is handled by gk20a_submit_channel_gpfifo() since it is the
2233 * real owner of the objects we make here.
2234 */
2235 return err; 2271 return err;
2236} 2272}
2237 2273
@@ -2250,6 +2286,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2250 struct priv_cmd_entry *incr_cmd = NULL; 2286 struct priv_cmd_entry *incr_cmd = NULL;
2251 struct gk20a_fence *pre_fence = NULL; 2287 struct gk20a_fence *pre_fence = NULL;
2252 struct gk20a_fence *post_fence = NULL; 2288 struct gk20a_fence *post_fence = NULL;
2289 struct channel_gk20a_job *job = NULL;
2253 /* we might need two extra gpfifo entries - one for pre fence 2290 /* we might need two extra gpfifo entries - one for pre fence
2254 * and one for post fence. */ 2291 * and one for post fence. */
2255 const int extra_entries = 2; 2292 const int extra_entries = 2;
@@ -2351,11 +2388,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2351 } 2388 }
2352 2389
2353 if (need_job_tracking) { 2390 if (need_job_tracking) {
2354 err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd, 2391 job = channel_gk20a_alloc_job(c);
2392 if (!job) {
2393 err = -ENOMEM;
2394 goto clean_up;
2395 }
2396
2397 err = gk20a_submit_prepare_syncs(c, fence, job,
2398 &wait_cmd, &incr_cmd,
2355 &pre_fence, &post_fence, 2399 &pre_fence, &post_fence,
2356 force_need_sync_fence, flags); 2400 force_need_sync_fence, flags);
2357 if (err) 2401 if (err)
2358 goto clean_up; 2402 goto clean_up_job;
2359 } 2403 }
2360 2404
2361 if (wait_cmd) 2405 if (wait_cmd)
@@ -2365,7 +2409,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2365 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, 2409 err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
2366 num_entries); 2410 num_entries);
2367 if (err) 2411 if (err)
2368 goto clean_up; 2412 goto clean_up_job;
2369 2413
2370 /* 2414 /*
2371 * And here's where we add the incr_cmd we generated earlier. It should 2415 * And here's where we add the incr_cmd we generated earlier. It should
@@ -2379,9 +2423,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2379 2423
2380 if (need_job_tracking) 2424 if (need_job_tracking)
2381 /* TODO! Check for errors... */ 2425 /* TODO! Check for errors... */
2382 gk20a_channel_add_job(c, pre_fence, post_fence, 2426 gk20a_channel_add_job(c, job, skip_buffer_refcounting);
2383 wait_cmd, incr_cmd,
2384 skip_buffer_refcounting);
2385 2427
2386 g->ops.fifo.userd_gp_put(g, c); 2428 g->ops.fifo.userd_gp_put(g, c);
2387 2429
@@ -2398,10 +2440,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2398 gk20a_dbg_fn("done"); 2440 gk20a_dbg_fn("done");
2399 return err; 2441 return err;
2400 2442
2443clean_up_job:
2444 channel_gk20a_free_job(c, job);
2401clean_up: 2445clean_up:
2402 gk20a_dbg_fn("fail"); 2446 gk20a_dbg_fn("fail");
2403 free_priv_cmdbuf(c, wait_cmd);
2404 free_priv_cmdbuf(c, incr_cmd);
2405 gk20a_fence_put(pre_fence); 2447 gk20a_fence_put(pre_fence);
2406 gk20a_fence_put(post_fence); 2448 gk20a_fence_put(post_fence);
2407 if (need_job_tracking) 2449 if (need_job_tracking)