summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2016-04-18 06:16:10 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-04-19 11:16:13 -0400
commite0c9da1fe9d8862fc89773208aa170b7c73d093b (patch)
treef9f0f9edbe7ae1c2f44285b0ce89385d18dc826a
parent1c96bc6942cdae7f4e90563687da7d068aea90bc (diff)
gpu: nvgpu: implement sync refcounting
We currently free sync when we find job list empty If aggressive_sync is set to true, we try to free sync during channel unbind() call But we rarely free sync from channel_unbind() call since freeing it when job list is empty is aggressive enough Hence remove sync free code from channel_unbind() Implement refcounting for sync: - get a refcount while submitting a job (and allocate sync if it is not allocated already) - put a refcount while freeing the job - if refcount==0 and if aggressive_sync_destroy is set, free the sync - if aggressive_sync_destroy is not set, we will free the sync during channel close time Bug 200187553 Change-Id: I74e24adb15dc26a375ebca1fdd017b3ad6d57b61 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1120410 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c45
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/vgpu/fifo_vgpu.c10
4 files changed, 23 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index aeb115ef..b282db89 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -370,7 +370,6 @@ static void channel_gk20a_bind(struct channel_gk20a *c)
370void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) 370void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
371{ 371{
372 struct gk20a *g = ch_gk20a->g; 372 struct gk20a *g = ch_gk20a->g;
373 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
374 373
375 gk20a_dbg_fn(""); 374 gk20a_dbg_fn("");
376 375
@@ -380,18 +379,6 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
380 ccsr_channel_inst_bind_false_f()); 379 ccsr_channel_inst_bind_false_f());
381 380
382 ch_gk20a->bound = false; 381 ch_gk20a->bound = false;
383
384 /*
385 * if we are agrressive then we can destroy the syncpt
386 * resource at this point
387 * if not, then it will be destroyed at channel_free()
388 */
389 mutex_lock(&ch_gk20a->sync_lock);
390 if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
391 ch_gk20a->sync->destroy(ch_gk20a->sync);
392 ch_gk20a->sync = NULL;
393 }
394 mutex_unlock(&ch_gk20a->sync_lock);
395} 382}
396 383
397int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) 384int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
@@ -954,7 +941,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
954 /* sync must be destroyed before releasing channel vm */ 941 /* sync must be destroyed before releasing channel vm */
955 mutex_lock(&ch->sync_lock); 942 mutex_lock(&ch->sync_lock);
956 if (ch->sync) { 943 if (ch->sync) {
957 ch->sync->destroy(ch->sync); 944 gk20a_channel_sync_destroy(ch->sync);
958 ch->sync = NULL; 945 ch->sync = NULL;
959 } 946 }
960 mutex_unlock(&ch->sync_lock); 947 mutex_unlock(&ch->sync_lock);
@@ -1922,8 +1909,18 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1922 1909
1923 gk20a_channel_timeout_stop(c); 1910 gk20a_channel_timeout_stop(c);
1924 1911
1925 if (c->sync) 1912 mutex_lock(&c->sync_lock);
1913 if (c->sync) {
1926 c->sync->signal_timeline(c->sync); 1914 c->sync->signal_timeline(c->sync);
1915 if (atomic_dec_and_test(&c->sync->refcount) &&
1916 platform->aggressive_sync_destroy) {
1917 gk20a_channel_sync_destroy(c->sync);
1918 c->sync = NULL;
1919 }
1920 } else {
1921 WARN_ON(1);
1922 }
1923 mutex_unlock(&c->sync_lock);
1927 1924
1928 if (job->num_mapped_buffers) 1925 if (job->num_mapped_buffers)
1929 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1926 gk20a_vm_put_buffers(vm, job->mapped_buffers,
@@ -1950,23 +1947,6 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1950 gk20a_idle(g->dev); 1947 gk20a_idle(g->dev);
1951 } 1948 }
1952 1949
1953 /*
1954 * If job list is empty then channel is idle and we can free
1955 * the syncpt here (given aggressive_destroy flag is set)
1956 * Note: check if last submit is complete before destroying
1957 * the sync resource
1958 */
1959 if (list_empty(&c->jobs)) {
1960 mutex_lock(&c->sync_lock);
1961 mutex_lock(&c->last_submit.fence_lock);
1962 if (c->sync && platform->aggressive_sync_destroy &&
1963 gk20a_fence_is_expired(c->last_submit.post_fence)) {
1964 c->sync->destroy(c->sync);
1965 c->sync = NULL;
1966 }
1967 mutex_unlock(&c->last_submit.fence_lock);
1968 mutex_unlock(&c->sync_lock);
1969 }
1970 mutex_unlock(&c->jobs_lock); 1950 mutex_unlock(&c->jobs_lock);
1971 mutex_unlock(&c->submit_lock); 1951 mutex_unlock(&c->submit_lock);
1972 1952
@@ -2121,6 +2101,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2121 } 2101 }
2122 new_sync_created = true; 2102 new_sync_created = true;
2123 } 2103 }
2104 atomic_inc(&c->sync->refcount);
2124 mutex_unlock(&c->sync_lock); 2105 mutex_unlock(&c->sync_lock);
2125 2106
2126 if (g->ops.fifo.resetup_ramfc && new_sync_created) { 2107 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 025b000e..b47c1010 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -356,6 +356,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
356 356
357 nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id); 357 nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
358 358
359 atomic_set(&sp->ops.refcount, 0);
359 sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt; 360 sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
360 sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd; 361 sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
361 sp->ops.incr = gk20a_channel_syncpt_incr; 362 sp->ops.incr = gk20a_channel_syncpt_incr;
@@ -711,6 +712,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
711 if (!sema->timeline) 712 if (!sema->timeline)
712 goto clean_up; 713 goto clean_up;
713#endif 714#endif
715 atomic_set(&sema->ops.refcount, 0);
714 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; 716 sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
715 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd; 717 sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
716 sema->ops.incr = gk20a_channel_semaphore_incr; 718 sema->ops.incr = gk20a_channel_semaphore_incr;
@@ -727,6 +729,11 @@ clean_up:
727 return NULL; 729 return NULL;
728} 730}
729 731
732void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
733{
734 sync->destroy(sync);
735}
736
730struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) 737struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
731{ 738{
732#ifdef CONFIG_TEGRA_GK20A 739#ifdef CONFIG_TEGRA_GK20A
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 618e1b26..4b0918de 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -27,6 +27,8 @@ struct gk20a_semaphore;
27struct gk20a_fence; 27struct gk20a_fence;
28 28
29struct gk20a_channel_sync { 29struct gk20a_channel_sync {
30 atomic_t refcount;
31
30 /* Generate a gpu wait cmdbuf from syncpoint. 32 /* Generate a gpu wait cmdbuf from syncpoint.
31 * Returns 33 * Returns
32 * - a gpu cmdbuf that performs the wait when executed, 34 * - a gpu cmdbuf that performs the wait when executed,
@@ -91,13 +93,11 @@ struct gk20a_channel_sync {
91 /* Returns the sync point id or negative number if no syncpt*/ 93 /* Returns the sync point id or negative number if no syncpt*/
92 int (*syncpt_id)(struct gk20a_channel_sync *s); 94 int (*syncpt_id)(struct gk20a_channel_sync *s);
93 95
94 /* flag to set sync destroy aggressiveness */
95 bool aggressive_destroy;
96
97 /* Free the resources allocated by gk20a_channel_sync_create. */ 96 /* Free the resources allocated by gk20a_channel_sync_create. */
98 void (*destroy)(struct gk20a_channel_sync *s); 97 void (*destroy)(struct gk20a_channel_sync *s);
99}; 98};
100 99
100void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
101struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); 101struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
102 102
103#endif 103#endif
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index 535de851..294581b9 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -59,16 +59,6 @@ static void vgpu_channel_unbind(struct channel_gk20a *ch)
59 } 59 }
60 60
61 ch->bound = false; 61 ch->bound = false;
62
63 /*
64 * if we are agrressive then we can destroy the syncpt
65 * resource at this point
66 * if not, then it will be destroyed at channel_free()
67 */
68 if (ch->sync && platform->aggressive_sync_destroy) {
69 ch->sync->destroy(ch->sync);
70 ch->sync = NULL;
71 }
72} 62}
73 63
74static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch) 64static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)