diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2015-10-23 06:11:21 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-11-23 11:43:49 -0500 |
commit | 7f79d647d6f8beffc5d98d3b703f9408b5a05d14 (patch) | |
tree | 87c91574f99c562780ba793ef0e5b68088604a42 /drivers/gpu/nvgpu/gk20a | |
parent | b8b6df791b407a6b9cf568ecd34744c347968246 (diff) |
gpu: nvgpu: set aggressive_sync_destroy at runtime
We currently set "aggressive_destroy" flag to destroy
sync object statically and for each sync object
Move this flag to per-platform structure so that it
can be set per-platform for all the sync objects
Also, set the default value of this flag as "false"
and set it to "true" once we have more than 64
channels in use
Bug 200141116
Change-Id: I1bc271df4f468a4087a06a27c7289ee0ec3ef29c
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/822041
(cherry picked from commit 98741e7e88066648f4f14490c76b61dbff745103)
Reviewed-on: http://git-master/r/835800
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/platform_gk20a.h | 3 |
4 files changed, 21 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 6015ab5e..8db885e3 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -42,6 +42,8 @@ | |||
42 | 42 | ||
43 | #define NVMAP_HANDLE_PARAM_SIZE 1 | 43 | #define NVMAP_HANDLE_PARAM_SIZE 1 |
44 | 44 | ||
45 | #define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */ | ||
46 | |||
45 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f); | 47 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f); |
46 | static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); | 48 | static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); |
47 | 49 | ||
@@ -64,6 +66,7 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch); | |||
64 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | 66 | static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) |
65 | { | 67 | { |
66 | struct channel_gk20a *ch = NULL; | 68 | struct channel_gk20a *ch = NULL; |
69 | struct gk20a_platform *platform = gk20a_get_platform(f->g->dev); | ||
67 | 70 | ||
68 | mutex_lock(&f->free_chs_mutex); | 71 | mutex_lock(&f->free_chs_mutex); |
69 | if (!list_empty(&f->free_chs)) { | 72 | if (!list_empty(&f->free_chs)) { |
@@ -72,21 +75,31 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) | |||
72 | list_del(&ch->free_chs); | 75 | list_del(&ch->free_chs); |
73 | WARN_ON(atomic_read(&ch->ref_count)); | 76 | WARN_ON(atomic_read(&ch->ref_count)); |
74 | WARN_ON(ch->referenceable); | 77 | WARN_ON(ch->referenceable); |
78 | f->used_channels++; | ||
75 | } | 79 | } |
76 | mutex_unlock(&f->free_chs_mutex); | 80 | mutex_unlock(&f->free_chs_mutex); |
77 | 81 | ||
82 | if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) | ||
83 | platform->aggressive_sync_destroy = true; | ||
84 | |||
78 | return ch; | 85 | return ch; |
79 | } | 86 | } |
80 | 87 | ||
81 | static void free_channel(struct fifo_gk20a *f, | 88 | static void free_channel(struct fifo_gk20a *f, |
82 | struct channel_gk20a *ch) | 89 | struct channel_gk20a *ch) |
83 | { | 90 | { |
91 | struct gk20a_platform *platform = gk20a_get_platform(f->g->dev); | ||
92 | |||
84 | trace_gk20a_release_used_channel(ch->hw_chid); | 93 | trace_gk20a_release_used_channel(ch->hw_chid); |
85 | /* refcount is zero here and channel is in a freed/dead state */ | 94 | /* refcount is zero here and channel is in a freed/dead state */ |
86 | mutex_lock(&f->free_chs_mutex); | 95 | mutex_lock(&f->free_chs_mutex); |
87 | /* add to head to increase visibility of timing-related bugs */ | 96 | /* add to head to increase visibility of timing-related bugs */ |
88 | list_add(&ch->free_chs, &f->free_chs); | 97 | list_add(&ch->free_chs, &f->free_chs); |
98 | f->used_channels--; | ||
89 | mutex_unlock(&f->free_chs_mutex); | 99 | mutex_unlock(&f->free_chs_mutex); |
100 | |||
101 | if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) | ||
102 | platform->aggressive_sync_destroy = false; | ||
90 | } | 103 | } |
91 | 104 | ||
92 | int channel_gk20a_commit_va(struct channel_gk20a *c) | 105 | int channel_gk20a_commit_va(struct channel_gk20a *c) |
@@ -311,6 +324,7 @@ static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a) | |||
311 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) | 324 | void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) |
312 | { | 325 | { |
313 | struct gk20a *g = ch_gk20a->g; | 326 | struct gk20a *g = ch_gk20a->g; |
327 | struct gk20a_platform *platform = gk20a_get_platform(g->dev); | ||
314 | 328 | ||
315 | gk20a_dbg_fn(""); | 329 | gk20a_dbg_fn(""); |
316 | 330 | ||
@@ -327,8 +341,7 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) | |||
327 | * if not, then it will be destroyed at channel_free() | 341 | * if not, then it will be destroyed at channel_free() |
328 | */ | 342 | */ |
329 | mutex_lock(&ch_gk20a->sync_lock); | 343 | mutex_lock(&ch_gk20a->sync_lock); |
330 | if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) { | 344 | if (ch_gk20a->sync && platform->aggressive_sync_destroy) { |
331 | |||
332 | ch_gk20a->sync->destroy(ch_gk20a->sync); | 345 | ch_gk20a->sync->destroy(ch_gk20a->sync); |
333 | ch_gk20a->sync = NULL; | 346 | ch_gk20a->sync = NULL; |
334 | } | 347 | } |
@@ -1715,6 +1728,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
1715 | { | 1728 | { |
1716 | struct vm_gk20a *vm = c->vm; | 1729 | struct vm_gk20a *vm = c->vm; |
1717 | struct channel_gk20a_job *job, *n; | 1730 | struct channel_gk20a_job *job, *n; |
1731 | struct gk20a_platform *platform = gk20a_get_platform(c->g->dev); | ||
1718 | 1732 | ||
1719 | trace_gk20a_channel_update(c->hw_chid); | 1733 | trace_gk20a_channel_update(c->hw_chid); |
1720 | 1734 | ||
@@ -1769,7 +1783,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) | |||
1769 | */ | 1783 | */ |
1770 | if (list_empty(&c->jobs)) { | 1784 | if (list_empty(&c->jobs)) { |
1771 | mutex_lock(&c->sync_lock); | 1785 | mutex_lock(&c->sync_lock); |
1772 | if (c->sync && c->sync->aggressive_destroy && | 1786 | if (c->sync && platform->aggressive_sync_destroy && |
1773 | gk20a_fence_is_expired(c->last_submit.post_fence)) { | 1787 | gk20a_fence_is_expired(c->last_submit.post_fence)) { |
1774 | c->sync->destroy(c->sync); | 1788 | c->sync->destroy(c->sync); |
1775 | c->sync = NULL; | 1789 | c->sync = NULL; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 7f70d347..95647774 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -345,8 +345,6 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c) | |||
345 | sp->ops.syncpt_id = gk20a_channel_syncpt_id; | 345 | sp->ops.syncpt_id = gk20a_channel_syncpt_id; |
346 | sp->ops.destroy = gk20a_channel_syncpt_destroy; | 346 | sp->ops.destroy = gk20a_channel_syncpt_destroy; |
347 | 347 | ||
348 | sp->ops.aggressive_destroy = true; | ||
349 | |||
350 | return &sp->ops; | 348 | return &sp->ops; |
351 | } | 349 | } |
352 | #endif /* CONFIG_TEGRA_GK20A */ | 350 | #endif /* CONFIG_TEGRA_GK20A */ |
@@ -690,10 +688,6 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) | |||
690 | sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; | 688 | sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; |
691 | sema->ops.destroy = gk20a_channel_semaphore_destroy; | 689 | sema->ops.destroy = gk20a_channel_semaphore_destroy; |
692 | 690 | ||
693 | /* Aggressively destroying the semaphore sync would cause overhead | ||
694 | * since the pool needs to be mapped to GMMU. */ | ||
695 | sema->ops.aggressive_destroy = false; | ||
696 | |||
697 | return &sema->ops; | 691 | return &sema->ops; |
698 | clean_up: | 692 | clean_up: |
699 | gk20a_channel_semaphore_destroy(&sema->ops); | 693 | gk20a_channel_semaphore_destroy(&sema->ops); |
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 3f9fac54..1b47677b 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | |||
@@ -105,6 +105,7 @@ struct fifo_gk20a { | |||
105 | struct mem_desc userd; | 105 | struct mem_desc userd; |
106 | u32 userd_entry_size; | 106 | u32 userd_entry_size; |
107 | 107 | ||
108 | int used_channels; | ||
108 | struct channel_gk20a *channel; | 109 | struct channel_gk20a *channel; |
109 | /* zero-kref'd channels here */ | 110 | /* zero-kref'd channels here */ |
110 | struct list_head free_chs; | 111 | struct list_head free_chs; |
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index b7a28ea5..84b3fcaf 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h | |||
@@ -47,6 +47,9 @@ struct gk20a_platform { | |||
47 | /* Should be populated at probe. */ | 47 | /* Should be populated at probe. */ |
48 | bool has_syncpoints; | 48 | bool has_syncpoints; |
49 | 49 | ||
50 | /* flag to set sync destroy aggressiveness */ | ||
51 | bool aggressive_sync_destroy; | ||
52 | |||
50 | /* Should be populated by probe. */ | 53 | /* Should be populated by probe. */ |
51 | struct dentry *debugfs; | 54 | struct dentry *debugfs; |
52 | struct dentry *debugfs_alias; | 55 | struct dentry *debugfs_alias; |