summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-01-11 18:00:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-03 13:34:48 -0500
commit707ea45e0f1d7a07885597777496b186dd5fb6f0 (patch)
tree9b48640703ccdf0108d731e66574370179a44b23 /drivers/gpu/nvgpu/gk20a
parent3966efc2e58f1802411f44fd00967dde448f278d (diff)
gpu: nvgpu: kmem abstraction and tracking
Implement kmem abstraction and tracking in nvgpu. The abstraction helps move nvgpu's core code away from being Linux dependent and allows kmem allocation tracking to be done for Linux and any other OS supported by nvgpu. Bug 1799159 Bug 1823380 Change-Id: Ieaae4ca1bbd1d4db4a1546616ab8b9fc53a4079d Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1283828 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/debug_gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c6
7 files changed, 28 insertions, 14 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index f228110e..68e43259 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -986,7 +986,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); 986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
987 987
988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); 988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
989 nvgpu_big_free(ch->gpfifo.pipe); 989 nvgpu_big_free(g, ch->gpfifo.pipe);
990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
991 991
992#if defined(CONFIG_GK20A_CYCLE_STATS) 992#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1856,7 +1856,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1856 } 1856 }
1857 1857
1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1859 c->gpfifo.pipe = nvgpu_big_malloc( 1859 c->gpfifo.pipe = nvgpu_big_malloc(g,
1860 gpfifo_size * sizeof(struct nvgpu_gpfifo)); 1860 gpfifo_size * sizeof(struct nvgpu_gpfifo));
1861 if (!c->gpfifo.pipe) { 1861 if (!c->gpfifo.pipe) {
1862 err = -ENOMEM; 1862 err = -ENOMEM;
@@ -1927,7 +1927,7 @@ clean_up_sync:
1927 c->sync = NULL; 1927 c->sync = NULL;
1928 } 1928 }
1929clean_up_unmap: 1929clean_up_unmap:
1930 nvgpu_big_free(c->gpfifo.pipe); 1930 nvgpu_big_free(g, c->gpfifo.pipe);
1931 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1931 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1932clean_up: 1932clean_up:
1933 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1933 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -2057,12 +2057,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2057 if (!g) { 2057 if (!g) {
2058 size = count * sizeof(struct nvgpu_gpfifo); 2058 size = count * sizeof(struct nvgpu_gpfifo);
2059 if (size) { 2059 if (size) {
2060 g = nvgpu_big_malloc(size); 2060 g = nvgpu_big_malloc(c->g, size);
2061 if (!g) 2061 if (!g)
2062 return; 2062 return;
2063 2063
2064 if (copy_from_user(g, user_gpfifo, size)) { 2064 if (copy_from_user(g, user_gpfifo, size)) {
2065 nvgpu_big_free(g); 2065 nvgpu_big_free(c->g, g);
2066 return; 2066 return;
2067 } 2067 }
2068 } 2068 }
@@ -2074,7 +2074,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2074 trace_write_pushbuffer(c, gp); 2074 trace_write_pushbuffer(c, gp);
2075 2075
2076 if (gpfifo_allocated) 2076 if (gpfifo_allocated)
2077 nvgpu_big_free(g); 2077 nvgpu_big_free(c->g, g);
2078} 2078}
2079 2079
2080static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) 2080static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 4a42e03f..0a0aada7 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -819,7 +819,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
819 goto fail_dmabuf_put; 819 goto fail_dmabuf_put;
820 } 820 }
821 821
822 buffer = nvgpu_big_zalloc(access_limit_size); 822 buffer = nvgpu_big_zalloc(g, access_limit_size);
823 if (!buffer) { 823 if (!buffer) {
824 err = -ENOMEM; 824 err = -ENOMEM;
825 goto fail_dmabuf_put; 825 goto fail_dmabuf_put;
@@ -865,7 +865,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
865fail_idle: 865fail_idle:
866 gk20a_idle(g->dev); 866 gk20a_idle(g->dev);
867fail_free_buffer: 867fail_free_buffer:
868 nvgpu_big_free(buffer); 868 nvgpu_big_free(g, buffer);
869fail_dmabuf_put: 869fail_dmabuf_put:
870 dma_buf_put(dmabuf); 870 dma_buf_put(dmabuf);
871 871
diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
index 67f9b532..6341a962 100644
--- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c
@@ -21,6 +21,7 @@
21#include <linux/io.h> 21#include <linux/io.h>
22 22
23#include <nvgpu/semaphore.h> 23#include <nvgpu/semaphore.h>
24#include <nvgpu/kmem.h>
24 25
25#include "gk20a.h" 26#include "gk20a.h"
26#include "debug_gk20a.h" 27#include "debug_gk20a.h"
@@ -485,6 +486,9 @@ void gk20a_debug_init(struct device *dev, const char *debugfs_symlink)
485 gk20a_mm_debugfs_init(g->dev); 486 gk20a_mm_debugfs_init(g->dev);
486 gk20a_fifo_debugfs_init(g->dev); 487 gk20a_fifo_debugfs_init(g->dev);
487 gk20a_sched_debugfs_init(g->dev); 488 gk20a_sched_debugfs_init(g->dev);
489#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
490 nvgpu_kmem_debugfs_init(g->dev);
491#endif
488#endif 492#endif
489 493
490} 494}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 3504a32f..6b026ee2 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -43,6 +43,7 @@
43#include <linux/version.h> 43#include <linux/version.h>
44 44
45#include <nvgpu/nvgpu_common.h> 45#include <nvgpu/nvgpu_common.h>
46#include <nvgpu/kmem.h>
46#include <nvgpu/allocator.h> 47#include <nvgpu/allocator.h>
47#include <nvgpu/timers.h> 48#include <nvgpu/timers.h>
48 49
@@ -1598,6 +1599,8 @@ static int gk20a_probe(struct platform_device *dev)
1598 set_gk20a(dev, gk20a); 1599 set_gk20a(dev, gk20a);
1599 gk20a->dev = &dev->dev; 1600 gk20a->dev = &dev->dev;
1600 1601
1602 nvgpu_kmem_init(gk20a);
1603
1601 gk20a->irq_stall = platform_get_irq(dev, 0); 1604 gk20a->irq_stall = platform_get_irq(dev, 0);
1602 gk20a->irq_nonstall = platform_get_irq(dev, 1); 1605 gk20a->irq_nonstall = platform_get_irq(dev, 1);
1603 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0) 1606 if (gk20a->irq_stall < 0 || gk20a->irq_nonstall < 0)
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8006a4fe..69528c1f 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -27,6 +27,7 @@ struct gk20a_ctxsw_ucode_segments;
27struct gk20a_fecs_trace; 27struct gk20a_fecs_trace;
28struct gk20a_ctxsw_trace; 28struct gk20a_ctxsw_trace;
29struct acr_desc; 29struct acr_desc;
30struct nvgpu_mem_alloc_tracker;
30 31
31#include <linux/sched.h> 32#include <linux/sched.h>
32#include <nvgpu/lock.h> 33#include <nvgpu/lock.h>
@@ -915,6 +916,7 @@ struct gk20a {
915 struct dentry *debugfs_runlist_interleave; 916 struct dentry *debugfs_runlist_interleave;
916 struct dentry *debugfs_allocators; 917 struct dentry *debugfs_allocators;
917 struct dentry *debugfs_xve; 918 struct dentry *debugfs_xve;
919 struct dentry *debugfs_kmem;
918#endif 920#endif
919 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; 921 struct gk20a_ctxsw_ucode_info ctxsw_ucode_info;
920 922
@@ -1055,6 +1057,10 @@ struct gk20a {
1055 /* Check if msi is enabled */ 1057 /* Check if msi is enabled */
1056 bool msi_enabled; 1058 bool msi_enabled;
1057#endif 1059#endif
1060#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
1061 struct nvgpu_mem_alloc_tracker *vmallocs;
1062 struct nvgpu_mem_alloc_tracker *kmallocs;
1063#endif
1058}; 1064};
1059 1065
1060static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) 1066static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
@@ -1131,6 +1137,7 @@ enum gk20a_dbg_categories {
1131 gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ 1137 gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */
1132 gpu_dbg_xv = BIT(18), /* XVE debugging */ 1138 gpu_dbg_xv = BIT(18), /* XVE debugging */
1133 gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */ 1139 gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */
1140 gpu_dbg_kmem = BIT(20), /* Kmem tracking debugging */
1134 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ 1141 gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
1135}; 1142};
1136 1143
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 36b85f3b..e695f02e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3424,7 +3424,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3424 gr->ctx_vars.local_golden_image = NULL; 3424 gr->ctx_vars.local_golden_image = NULL;
3425 3425
3426 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) 3426 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
3427 nvgpu_big_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); 3427 nvgpu_big_free(g, gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3428 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3428 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3429 3429
3430 gk20a_comptag_allocator_destroy(&gr->comp_tags); 3430 gk20a_comptag_allocator_destroy(&gr->comp_tags);
@@ -8055,7 +8055,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8055 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; 8055 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
8056 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); 8056 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
8057 8057
8058 map = nvgpu_big_zalloc(map_size); 8058 map = nvgpu_big_zalloc(g, map_size);
8059 if (!map) 8059 if (!map)
8060 return -ENOMEM; 8060 return -ENOMEM;
8061 8061
@@ -8145,7 +8145,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8145 return 0; 8145 return 0;
8146cleanup: 8146cleanup:
8147 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); 8147 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
8148 nvgpu_big_free(map); 8148 nvgpu_big_free(g, map);
8149 return -EINVAL; 8149 return -EINVAL;
8150} 8150}
8151 8151
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 7a64f79b..2ff54653 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1487,8 +1487,8 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
1487 1487
1488 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1488 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1489 1489
1490 buffer_list = nvgpu_big_zalloc(sizeof(*buffer_list) * 1490 buffer_list = nvgpu_big_zalloc(vm->mm->g, sizeof(*buffer_list) *
1491 vm->num_user_mapped_buffers); 1491 vm->num_user_mapped_buffers);
1492 if (!buffer_list) { 1492 if (!buffer_list) {
1493 nvgpu_mutex_release(&vm->update_gmmu_lock); 1493 nvgpu_mutex_release(&vm->update_gmmu_lock);
1494 return -ENOMEM; 1494 return -ENOMEM;
@@ -1572,7 +1572,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1572 gk20a_vm_mapping_batch_finish_locked(vm, &batch); 1572 gk20a_vm_mapping_batch_finish_locked(vm, &batch);
1573 nvgpu_mutex_release(&vm->update_gmmu_lock); 1573 nvgpu_mutex_release(&vm->update_gmmu_lock);
1574 1574
1575 nvgpu_big_free(mapped_buffers); 1575 nvgpu_big_free(vm->mm->g, mapped_buffers);
1576} 1576}
1577 1577
1578static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, 1578static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,