summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2015-10-29 05:50:50 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-11-23 11:32:39 -0500
commitf50d0ffb15aef2cbf419b81cdbc3031097767bff (patch)
treee1a6b25699c735864c6048dd72568d2435e50f51 /drivers/gpu/nvgpu/gk20a/channel_gk20a.c
parent67fe5f6d738a9b1ba2d8f5dd0726790d965c8883 (diff)
gpu: nvgpu: support skipping buffer refcounting in submit
In job submission path, we always take refcount on all the mapped buffers to safeguard against case where user space releases the buffer early But in case user space itself is doing proper buffer management, kernel need not take refcounts on all the buffers - which is also a overhead in submit path Hence, provide a new submit flag NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING to optionally skip taking refcounts on all the buffers Also, if we do not take refcounts, then no need to drop any refcounts in gk20a_channel_update() as well Bug 1698667 Bug 200141116 Change-Id: I81bb7a03240300b691c70bcec04ea1badd5934f4 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/824718 (cherry picked from commit 8c8978fa303ec4e6db0233becdbdcbad4a248173) Reviewed-on: http://git-master/r/835801 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c24
1 files changed, 16 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 0b84b7da..9bacb5c9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1726,20 +1726,24 @@ fail_unlock:
1726 1726
1727static int gk20a_channel_add_job(struct channel_gk20a *c, 1727static int gk20a_channel_add_job(struct channel_gk20a *c,
1728 struct gk20a_fence *pre_fence, 1728 struct gk20a_fence *pre_fence,
1729 struct gk20a_fence *post_fence) 1729 struct gk20a_fence *post_fence,
1730 bool skip_buffer_refcounting)
1730{ 1731{
1731 struct vm_gk20a *vm = c->vm; 1732 struct vm_gk20a *vm = c->vm;
1732 struct channel_gk20a_job *job = NULL; 1733 struct channel_gk20a_job *job = NULL;
1733 struct mapped_buffer_node **mapped_buffers = NULL; 1734 struct mapped_buffer_node **mapped_buffers = NULL;
1734 int err = 0, num_mapped_buffers; 1735 int err = 0, num_mapped_buffers = 0;
1735 1736
1736 /* job needs reference to this vm (released in channel_update) */ 1737 /* job needs reference to this vm (released in channel_update) */
1737 gk20a_vm_get(vm); 1738 gk20a_vm_get(vm);
1738 1739
1739 err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); 1740 if (!skip_buffer_refcounting) {
1740 if (err) { 1741 err = gk20a_vm_get_buffers(vm, &mapped_buffers,
1741 gk20a_vm_put(vm); 1742 &num_mapped_buffers);
1742 return err; 1743 if (err) {
1744 gk20a_vm_put(vm);
1745 return err;
1746 }
1743 } 1747 }
1744 1748
1745 job = kzalloc(sizeof(*job), GFP_KERNEL); 1749 job = kzalloc(sizeof(*job), GFP_KERNEL);
@@ -1795,7 +1799,8 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
1795 if (c->sync) 1799 if (c->sync)
1796 c->sync->signal_timeline(c->sync); 1800 c->sync->signal_timeline(c->sync);
1797 1801
1798 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1802 if (job->num_mapped_buffers)
1803 gk20a_vm_put_buffers(vm, job->mapped_buffers,
1799 job->num_mapped_buffers); 1804 job->num_mapped_buffers);
1800 1805
1801 /* Close the fences (this will unref the semaphores and release 1806 /* Close the fences (this will unref the semaphores and release
@@ -1858,6 +1863,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
1858 const int extra_entries = 2; 1863 const int extra_entries = 2;
1859 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI); 1864 bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
1860 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; 1865 struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
1866 bool skip_buffer_refcounting = (flags &
1867 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
1861 1868
1862 if (c->has_timedout) 1869 if (c->has_timedout)
1863 return -ETIMEDOUT; 1870 return -ETIMEDOUT;
@@ -2106,7 +2113,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2106 *fence_out = gk20a_fence_get(post_fence); 2113 *fence_out = gk20a_fence_get(post_fence);
2107 2114
2108 /* TODO! Check for errors... */ 2115 /* TODO! Check for errors... */
2109 gk20a_channel_add_job(c, pre_fence, post_fence); 2116 gk20a_channel_add_job(c, pre_fence, post_fence,
2117 skip_buffer_refcounting);
2110 2118
2111 c->cmds_pending = true; 2119 c->cmds_pending = true;
2112 gk20a_bar1_writel(g, 2120 gk20a_bar1_writel(g,