diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-06-06 09:23:06 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-06-13 10:42:26 -0400 |
commit | 987de665838f6b4aceadf52f076b91da4cc633ca (patch) | |
tree | b265cf18cbd3cba69202674b0b5033ee28948234 /drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |
parent | 15d241a8cb1d6cf25752f4c0f1e858bbcd34db3f (diff) |
gpu: nvgpu: optimize mem_desc accessor loops
Instead of going via gk20a_mem_{wr,rd}32() on each iteration, do direct
memcpy/memset with sysmem, and minimize the enter/exit overhead with
vidmem.
JIRA DNVGPU-23
Change-Id: I5437e35f8393a746777a40636c1e9b5d93ced1f6
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1159524
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d1628b07..23420fef 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -458,9 +458,9 @@ void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data); | |||
458 | /* memcpy from cpu, offset and size in bytes (32b-aligned) */ | 458 | /* memcpy from cpu, offset and size in bytes (32b-aligned) */ |
459 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | 459 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, |
460 | void *src, u32 size); | 460 | void *src, u32 size); |
461 | /* size and offset in bytes (32b-aligned), filled with u32s */ | 461 | /* size and offset in bytes (32b-aligned), filled with the constant byte c */ |
462 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | 462 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, |
463 | u32 value, u32 size); | 463 | u32 c, u32 size); |
464 | 464 | ||
465 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ | 465 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ |
466 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ | 466 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ |