summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-11-04 03:36:37 -0500
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-11-23 11:33:01 -0500
commit2d40ebb1caa313d5d12a13f15b2623faa3ad914b (patch)
treeac9463d1255e896ec06fb7b03870a5323ff9ceb7 /drivers/gpu
parentf50d0ffb15aef2cbf419b81cdbc3031097767bff (diff)
gpu: nvgpu: rework private command buffer free path
We currently allocate private command buffers (wait_cmd and incr_cmd) before submitting the job but we never free them explicitly. When private command queue of the channel is full, we then try to recycle/remove free command buffers. But this recycling happens during submit path, and hence that particular submit path takes much longer Rework this as below : - add reference of command buffers to job structure - when job completes, free the command buffers explicitly - remove the code to recycle buffers since it should not be needed now Note that command buffers need to be freed in order of their allocation. Ensure this with error print before freeing the command buffer entry Bug 200141116 Bug 1698667 Change-Id: Id4b69429d7ad966307e0d122a71ad55076684307 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/827638 (cherry picked from commit c6cefd69b71c9b70d6df5343b13dfcfb3fa99598) Reviewed-on: http://git-master/r/835802 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c121
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
3 files changed, 34 insertions, 91 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9bacb5c9..6015ab5e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -47,7 +47,6 @@ static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
47 47
48static void free_priv_cmdbuf(struct channel_gk20a *c, 48static void free_priv_cmdbuf(struct channel_gk20a *c,
49 struct priv_cmd_entry *e); 49 struct priv_cmd_entry *e);
50static void recycle_priv_cmdbuf(struct channel_gk20a *c);
51 50
52static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c); 51static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
53static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c); 52static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
@@ -1179,9 +1178,6 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
1179 1178
1180 q->size = q->mem.size / sizeof (u32); 1179 q->size = q->mem.size / sizeof (u32);
1181 1180
1182 INIT_LIST_HEAD(&q->head);
1183 INIT_LIST_HEAD(&q->free);
1184
1185 return 0; 1181 return 0;
1186 1182
1187clean_up: 1183clean_up:
@@ -1193,28 +1189,12 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
1193{ 1189{
1194 struct vm_gk20a *ch_vm = c->vm; 1190 struct vm_gk20a *ch_vm = c->vm;
1195 struct priv_cmd_queue *q = &c->priv_cmd_q; 1191 struct priv_cmd_queue *q = &c->priv_cmd_q;
1196 struct priv_cmd_entry *e;
1197 struct list_head *pos, *tmp, *head;
1198 1192
1199 if (q->size == 0) 1193 if (q->size == 0)
1200 return; 1194 return;
1201 1195
1202 gk20a_gmmu_unmap_free(ch_vm, &q->mem); 1196 gk20a_gmmu_unmap_free(ch_vm, &q->mem);
1203 1197
1204 /* free used list */
1205 head = &q->head;
1206 list_for_each_safe(pos, tmp, head) {
1207 e = container_of(pos, struct priv_cmd_entry, list);
1208 free_priv_cmdbuf(c, e);
1209 }
1210
1211 /* free free list */
1212 head = &q->free;
1213 list_for_each_safe(pos, tmp, head) {
1214 e = container_of(pos, struct priv_cmd_entry, list);
1215 kfree(e);
1216 }
1217
1218 memset(q, 0, sizeof(struct priv_cmd_queue)); 1198 memset(q, 0, sizeof(struct priv_cmd_queue));
1219} 1199}
1220 1200
@@ -1226,7 +1206,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1226 struct priv_cmd_entry *e; 1206 struct priv_cmd_entry *e;
1227 u32 free_count; 1207 u32 free_count;
1228 u32 size = orig_size; 1208 u32 size = orig_size;
1229 bool no_retry = false;
1230 1209
1231 gk20a_dbg_fn("size %d", orig_size); 1210 gk20a_dbg_fn("size %d", orig_size);
1232 1211
@@ -1240,17 +1219,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
1240 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d", 1219 gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
1241 c->hw_chid, q->get, q->put); 1220 c->hw_chid, q->get, q->put);
1242 1221
1243TRY_AGAIN:
1244 free_count = (q->size - (q->put - q->get) - 1) % q->size; 1222 free_count = (q->size - (q->put - q->get) - 1) % q->size;
1245 1223
1246 if (size > free_count) { 1224 if (size > free_count)
1247 if (!no_retry) { 1225 return -EAGAIN;
1248 recycle_priv_cmdbuf(c);
1249 no_retry = true;
1250 goto TRY_AGAIN;
1251 } else
1252 return -EAGAIN;
1253 }
1254 1226
1255 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); 1227 e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
1256 if (!e) { 1228 if (!e) {
@@ -1280,9 +1252,6 @@ TRY_AGAIN:
1280 /* we already handled q->put + size > q->size so BUG_ON this */ 1252 /* we already handled q->put + size > q->size so BUG_ON this */
1281 BUG_ON(q->put > q->size); 1253 BUG_ON(q->put > q->size);
1282 1254
1283 /* add new entry to head since we free from head */
1284 list_add(&e->list, &q->head);
1285
1286 *entry = e; 1255 *entry = e;
1287 1256
1288 gk20a_dbg_fn("done"); 1257 gk20a_dbg_fn("done");
@@ -1295,65 +1264,9 @@ TRY_AGAIN:
1295static void free_priv_cmdbuf(struct channel_gk20a *c, 1264static void free_priv_cmdbuf(struct channel_gk20a *c,
1296 struct priv_cmd_entry *e) 1265 struct priv_cmd_entry *e)
1297{ 1266{
1298 if (!e)
1299 return;
1300
1301 list_del(&e->list);
1302
1303 kfree(e); 1267 kfree(e);
1304} 1268}
1305 1269
1306/* free entries if they're no longer being used */
1307static void recycle_priv_cmdbuf(struct channel_gk20a *c)
1308{
1309 struct priv_cmd_queue *q = &c->priv_cmd_q;
1310 struct priv_cmd_entry *e, *tmp;
1311 struct list_head *head = &q->head;
1312 bool wrap_around, found = false;
1313
1314 gk20a_dbg_fn("");
1315
1316 /* Find the most recent free entry. Free it and everything before it */
1317 list_for_each_entry(e, head, list) {
1318
1319 gk20a_dbg_info("ch %d: cmd entry get:put:wrap %d:%d:%d "
1320 "curr get:put:wrap %d:%d:%d",
1321 c->hw_chid, e->gp_get, e->gp_put, e->gp_wrap,
1322 c->gpfifo.get, c->gpfifo.put, c->gpfifo.wrap);
1323
1324 wrap_around = (c->gpfifo.wrap != e->gp_wrap);
1325 if (e->gp_get < e->gp_put) {
1326 if (c->gpfifo.get >= e->gp_put ||
1327 wrap_around) {
1328 found = true;
1329 break;
1330 } else
1331 e->gp_get = c->gpfifo.get;
1332 } else if (e->gp_get > e->gp_put) {
1333 if (wrap_around &&
1334 c->gpfifo.get >= e->gp_put) {
1335 found = true;
1336 break;
1337 } else
1338 e->gp_get = c->gpfifo.get;
1339 }
1340 }
1341
1342 if (found)
1343 q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1344 else {
1345 gk20a_dbg_info("no free entry recycled");
1346 return;
1347 }
1348
1349 list_for_each_entry_safe_continue(e, tmp, head, list) {
1350 free_priv_cmdbuf(c, e);
1351 }
1352
1353 gk20a_dbg_fn("done");
1354}
1355
1356
1357int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, 1270int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1358 struct nvgpu_alloc_gpfifo_args *args) 1271 struct nvgpu_alloc_gpfifo_args *args)
1359{ 1272{
@@ -1724,9 +1637,31 @@ fail_unlock:
1724 gk20a_channel_put(ch); 1637 gk20a_channel_put(ch);
1725} 1638}
1726 1639
1640static int gk20a_free_priv_cmdbuf(struct channel_gk20a *c,
1641 struct priv_cmd_entry *e)
1642{
1643 struct priv_cmd_queue *q = &c->priv_cmd_q;
1644 u32 cmd_entry_start;
1645 struct device *d = dev_from_gk20a(c->g);
1646
1647 if (!e)
1648 return 0;
1649
1650 cmd_entry_start = (u32)(e->ptr - (u32 *)q->mem.cpu_va);
1651 if ((q->get != cmd_entry_start) && cmd_entry_start != 0)
1652 gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
1653
1654 q->get = (e->ptr - (u32 *)q->mem.cpu_va) + e->size;
1655 free_priv_cmdbuf(c, e);
1656
1657 return 0;
1658}
1659
1727static int gk20a_channel_add_job(struct channel_gk20a *c, 1660static int gk20a_channel_add_job(struct channel_gk20a *c,
1728 struct gk20a_fence *pre_fence, 1661 struct gk20a_fence *pre_fence,
1729 struct gk20a_fence *post_fence, 1662 struct gk20a_fence *post_fence,
1663 struct priv_cmd_entry *wait_cmd,
1664 struct priv_cmd_entry *incr_cmd,
1730 bool skip_buffer_refcounting) 1665 bool skip_buffer_refcounting)
1731{ 1666{
1732 struct vm_gk20a *vm = c->vm; 1667 struct vm_gk20a *vm = c->vm;
@@ -1761,6 +1696,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
1761 job->mapped_buffers = mapped_buffers; 1696 job->mapped_buffers = mapped_buffers;
1762 job->pre_fence = gk20a_fence_get(pre_fence); 1697 job->pre_fence = gk20a_fence_get(pre_fence);
1763 job->post_fence = gk20a_fence_get(post_fence); 1698 job->post_fence = gk20a_fence_get(post_fence);
1699 job->wait_cmd = wait_cmd;
1700 job->incr_cmd = incr_cmd;
1764 1701
1765 gk20a_channel_timeout_start(c, job); 1702 gk20a_channel_timeout_start(c, job);
1766 1703
@@ -1808,6 +1745,11 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1808 gk20a_fence_put(job->pre_fence); 1745 gk20a_fence_put(job->pre_fence);
1809 gk20a_fence_put(job->post_fence); 1746 gk20a_fence_put(job->post_fence);
1810 1747
1748 /* Free the private command buffers (wait_cmd first and
1749 * then incr_cmd i.e. order of allocation) */
1750 gk20a_free_priv_cmdbuf(c, job->wait_cmd);
1751 gk20a_free_priv_cmdbuf(c, job->incr_cmd);
1752
1811 /* job is done. release its vm reference (taken in add_job) */ 1753 /* job is done. release its vm reference (taken in add_job) */
1812 gk20a_vm_put(vm); 1754 gk20a_vm_put(vm);
1813 /* another bookkeeping taken in add_job. caller must hold a ref 1755 /* another bookkeeping taken in add_job. caller must hold a ref
@@ -2114,6 +2056,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2114 2056
2115 /* TODO! Check for errors... */ 2057 /* TODO! Check for errors... */
2116 gk20a_channel_add_job(c, pre_fence, post_fence, 2058 gk20a_channel_add_job(c, pre_fence, post_fence,
2059 wait_cmd, incr_cmd,
2117 skip_buffer_refcounting); 2060 skip_buffer_refcounting);
2118 2061
2119 c->cmds_pending = true; 2062 c->cmds_pending = true;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index d5f5e6a2..245db56a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -62,6 +62,8 @@ struct channel_gk20a_job {
62 int num_mapped_buffers; 62 int num_mapped_buffers;
63 struct gk20a_fence *pre_fence; 63 struct gk20a_fence *pre_fence;
64 struct gk20a_fence *post_fence; 64 struct gk20a_fence *post_fence;
65 struct priv_cmd_entry *wait_cmd;
66 struct priv_cmd_entry *incr_cmd;
65 struct list_head list; 67 struct list_head list;
66}; 68};
67 69
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2dd4ccf5..ac55e988 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -160,8 +160,6 @@ struct priv_cmd_queue {
160 u32 size; /* num of entries in words */ 160 u32 size; /* num of entries in words */
161 u32 put; /* put for priv cmd queue */ 161 u32 put; /* put for priv cmd queue */
162 u32 get; /* get for priv cmd queue */ 162 u32 get; /* get for priv cmd queue */
163 struct list_head free; /* list of pre-allocated free entries */
164 struct list_head head; /* list of used entries */
165}; 163};
166 164
167struct priv_cmd_entry { 165struct priv_cmd_entry {