diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-07-20 03:00:50 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-10-07 02:34:57 -0400 |
commit | 0bb1f4595885714057fa5399e3ee7a6ce5422de1 (patch) | |
tree | b0bcbf0d1d1eb5ab010ee3e9bc9e897f38cbcb83 /drivers/gpu/nvgpu/gk20a | |
parent | 03a6e36c2ff7afc0379f2e501ef62fdfeb82639d (diff) |
gpu: nvgpu: optimize barrier in batch pramin writes
Move wmb() before the loop in pramin-accessed batch writes and use
writel_relaxed() directly, instead of calling gk20a_writel() that would
do wmb() on each iteration separately.
Jira DNVGPU-24
Change-Id: I4c1375a819266727f97e2f109d3132b5b0974ac6
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1213600
(cherry picked from commit 79e3e38e0c5384ababfd55b8e6cd9723eb8f7b66)
Reviewed-on: http://git-master/r/1184343
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 1551dd16..a84d8ff0 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -229,8 +229,14 @@ static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start, | |||
229 | { | 229 | { |
230 | u32 r = start, *src_u32 = *arg; | 230 | u32 r = start, *src_u32 = *arg; |
231 | 231 | ||
232 | /* | ||
233 | * Barrier moved here from gk20a_writel in the loop. The writes don't | ||
234 | * have to be ordered. | ||
235 | */ | ||
236 | wmb(); | ||
237 | |||
232 | while (words--) { | 238 | while (words--) { |
233 | gk20a_writel(g, r, *src_u32++); | 239 | writel_relaxed(*src_u32++, g->regs + r); |
234 | r += sizeof(u32); | 240 | r += sizeof(u32); |
235 | } | 241 | } |
236 | 242 | ||
@@ -242,8 +248,14 @@ static inline void pramin_access_batch_set(struct gk20a *g, u32 start, | |||
242 | { | 248 | { |
243 | u32 r = start, repeat = **arg; | 249 | u32 r = start, repeat = **arg; |
244 | 250 | ||
251 | /* | ||
252 | * Barrier moved here from gk20a_writel in the loop. The writes don't | ||
253 | * have to be ordered. | ||
254 | */ | ||
255 | wmb(); | ||
256 | |||
245 | while (words--) { | 257 | while (words--) { |
246 | gk20a_writel(g, r, repeat); | 258 | writel_relaxed(repeat, g->regs + r); |
247 | r += sizeof(u32); | 259 | r += sizeof(u32); |
248 | } | 260 | } |
249 | } | 261 | } |