summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-06-06 09:23:06 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-06-13 10:42:26 -0400
commit987de665838f6b4aceadf52f076b91da4cc633ca (patch)
treeb265cf18cbd3cba69202674b0b5033ee28948234 /drivers/gpu/nvgpu/gk20a/mm_gk20a.h
parent15d241a8cb1d6cf25752f4c0f1e858bbcd34db3f (diff)
gpu: nvgpu: optimize mem_desc accessor loops
Instead of going via gk20a_mem_{wr,rd}32() on each iteration, do direct memcpy/memset with sysmem, and minimize the enter/exit overhead with vidmem. JIRA DNVGPU-23 Change-Id: I5437e35f8393a746777a40636c1e9b5d93ced1f6 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1159524 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.h')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h4
1 files changed, 2 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index d1628b07..23420fef 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -458,9 +458,9 @@ void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data);
458/* memcpy from cpu, offset and size in bytes (32b-aligned) */ 458/* memcpy from cpu, offset and size in bytes (32b-aligned) */
459void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, 459void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
460 void *src, u32 size); 460 void *src, u32 size);
461/* size and offset in bytes (32b-aligned), filled with u32s */ 461/* size and offset in bytes (32b-aligned), filled with the constant byte c */
462void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, 462void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
463 u32 value, u32 size); 463 u32 c, u32 size);
464 464
465#if 0 /*related to addr bits above, concern below TBD on which is accurate */ 465#if 0 /*related to addr bits above, concern below TBD on which is accurate */
466#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ 466#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\