summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-01-11 19:58:14 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-03-03 13:34:43 -0500
commit3966efc2e58f1802411f44fd00967dde448f278d (patch)
treeb6cf822abc638b79acbd12b749a97ab5507a6fe9 /drivers
parent76b78b6fdcb0bbed72645aaa85de6013e2b135c3 (diff)
gpu: nvgpu: Give nvgpu_kalloc a less generic name
Change nvgpu_kalloc() to nvgpu_big_[mz]alloc(). This is necessary since the natural free function name for this is nvgpu_kfree() but that conflicts with nvgpu_k[mz]alloc() (implemented in a subsequent patch). This API exists becasue not all allocation sizes can be determined at compile time and in some cases sizes may vary across the system page size. Thus always using kmalloc() could lead to OOM errors due to fragmentation. But always using vmalloc() is wastful of memory for small allocations. This API tries to alleviate those problems. Bug 1799159 Bug 1823380 Change-Id: I49ec5292ce13bcdecf112afbb4a0cfffeeb5ecfc Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1283827 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c19
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h27
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/kmem.h82
6 files changed, 103 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 6eb1cb06..f228110e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -30,6 +30,8 @@
30#include <linux/circ_buf.h> 30#include <linux/circ_buf.h>
31 31
32#include <nvgpu/semaphore.h> 32#include <nvgpu/semaphore.h>
33#include <nvgpu/timers.h>
34#include <nvgpu/kmem.h>
33 35
34#include "gk20a.h" 36#include "gk20a.h"
35#include "debug_gk20a.h" 37#include "debug_gk20a.h"
@@ -37,8 +39,6 @@
37#include "dbg_gpu_gk20a.h" 39#include "dbg_gpu_gk20a.h"
38#include "fence_gk20a.h" 40#include "fence_gk20a.h"
39 41
40#include <nvgpu/timers.h>
41
42#include <nvgpu/hw/gk20a/hw_ram_gk20a.h> 42#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
43#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h> 43#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
44#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h> 44#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -986,7 +986,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub)); 986 memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
987 987
988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem); 988 gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
989 nvgpu_kfree(ch->gpfifo.pipe); 989 nvgpu_big_free(ch->gpfifo.pipe);
990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 990 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
991 991
992#if defined(CONFIG_GK20A_CYCLE_STATS) 992#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1856,9 +1856,8 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1856 } 1856 }
1857 1857
1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { 1858 if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
1859 c->gpfifo.pipe = nvgpu_kalloc( 1859 c->gpfifo.pipe = nvgpu_big_malloc(
1860 gpfifo_size * sizeof(struct nvgpu_gpfifo), 1860 gpfifo_size * sizeof(struct nvgpu_gpfifo));
1861 false);
1862 if (!c->gpfifo.pipe) { 1861 if (!c->gpfifo.pipe) {
1863 err = -ENOMEM; 1862 err = -ENOMEM;
1864 goto clean_up_unmap; 1863 goto clean_up_unmap;
@@ -1928,7 +1927,7 @@ clean_up_sync:
1928 c->sync = NULL; 1927 c->sync = NULL;
1929 } 1928 }
1930clean_up_unmap: 1929clean_up_unmap:
1931 nvgpu_kfree(c->gpfifo.pipe); 1930 nvgpu_big_free(c->gpfifo.pipe);
1932 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1931 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
1933clean_up: 1932clean_up:
1934 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); 1933 memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -2058,12 +2057,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2058 if (!g) { 2057 if (!g) {
2059 size = count * sizeof(struct nvgpu_gpfifo); 2058 size = count * sizeof(struct nvgpu_gpfifo);
2060 if (size) { 2059 if (size) {
2061 g = nvgpu_kalloc(size, false); 2060 g = nvgpu_big_malloc(size);
2062 if (!g) 2061 if (!g)
2063 return; 2062 return;
2064 2063
2065 if (copy_from_user(g, user_gpfifo, size)) { 2064 if (copy_from_user(g, user_gpfifo, size)) {
2066 nvgpu_kfree(g); 2065 nvgpu_big_free(g);
2067 return; 2066 return;
2068 } 2067 }
2069 } 2068 }
@@ -2075,7 +2074,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
2075 trace_write_pushbuffer(c, gp); 2074 trace_write_pushbuffer(c, gp);
2076 2075
2077 if (gpfifo_allocated) 2076 if (gpfifo_allocated)
2078 nvgpu_kfree(g); 2077 nvgpu_big_free(g);
2079} 2078}
2080 2079
2081static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) 2080static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index f6290e1d..4a42e03f 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -24,6 +24,8 @@
24#include <linux/dma-buf.h> 24#include <linux/dma-buf.h>
25#include <uapi/linux/nvgpu.h> 25#include <uapi/linux/nvgpu.h>
26 26
27#include <nvgpu/kmem.h>
28
27#include "gk20a.h" 29#include "gk20a.h"
28#include "gr_gk20a.h" 30#include "gr_gk20a.h"
29#include "dbg_gpu_gk20a.h" 31#include "dbg_gpu_gk20a.h"
@@ -817,7 +819,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
817 goto fail_dmabuf_put; 819 goto fail_dmabuf_put;
818 } 820 }
819 821
820 buffer = nvgpu_kalloc(access_limit_size, true); 822 buffer = nvgpu_big_zalloc(access_limit_size);
821 if (!buffer) { 823 if (!buffer) {
822 err = -ENOMEM; 824 err = -ENOMEM;
823 goto fail_dmabuf_put; 825 goto fail_dmabuf_put;
@@ -863,7 +865,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
863fail_idle: 865fail_idle:
864 gk20a_idle(g->dev); 866 gk20a_idle(g->dev);
865fail_free_buffer: 867fail_free_buffer:
866 nvgpu_kfree(buffer); 868 nvgpu_big_free(buffer);
867fail_dmabuf_put: 869fail_dmabuf_put:
868 dma_buf_put(dmabuf); 870 dma_buf_put(dmabuf);
869 871
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cb4f8007..36b85f3b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -31,6 +31,7 @@
31#include <linux/bsearch.h> 31#include <linux/bsearch.h>
32#include <trace/events/gk20a.h> 32#include <trace/events/gk20a.h>
33 33
34#include <nvgpu/kmem.h>
34#include <nvgpu/timers.h> 35#include <nvgpu/timers.h>
35#include <nvgpu/nvgpu_common.h> 36#include <nvgpu/nvgpu_common.h>
36 37
@@ -3423,7 +3424,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
3423 gr->ctx_vars.local_golden_image = NULL; 3424 gr->ctx_vars.local_golden_image = NULL;
3424 3425
3425 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map) 3426 if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
3426 nvgpu_kfree(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map); 3427 nvgpu_big_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
3427 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL; 3428 gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
3428 3429
3429 gk20a_comptag_allocator_destroy(&gr->comp_tags); 3430 gk20a_comptag_allocator_destroy(&gr->comp_tags);
@@ -8054,7 +8055,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8054 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2; 8055 hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
8055 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map); 8056 map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
8056 8057
8057 map = nvgpu_kalloc(map_size, true); 8058 map = nvgpu_big_zalloc(map_size);
8058 if (!map) 8059 if (!map)
8059 return -ENOMEM; 8060 return -ENOMEM;
8060 8061
@@ -8144,7 +8145,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
8144 return 0; 8145 return 0;
8145cleanup: 8146cleanup:
8146 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map"); 8147 gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
8147 nvgpu_kfree(map); 8148 nvgpu_big_free(map);
8148 return -EINVAL; 8149 return -EINVAL;
8149} 8150}
8150 8151
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index c95e744e..7a64f79b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -31,6 +31,7 @@
31#include <uapi/linux/nvgpu.h> 31#include <uapi/linux/nvgpu.h>
32#include <trace/events/gk20a.h> 32#include <trace/events/gk20a.h>
33 33
34#include <nvgpu/kmem.h>
34#include <nvgpu/timers.h> 35#include <nvgpu/timers.h>
35#include <nvgpu/allocator.h> 36#include <nvgpu/allocator.h>
36#include <nvgpu/semaphore.h> 37#include <nvgpu/semaphore.h>
@@ -1486,8 +1487,8 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
1486 1487
1487 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 1488 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
1488 1489
1489 buffer_list = nvgpu_kalloc(sizeof(*buffer_list) * 1490 buffer_list = nvgpu_big_zalloc(sizeof(*buffer_list) *
1490 vm->num_user_mapped_buffers, true); 1491 vm->num_user_mapped_buffers);
1491 if (!buffer_list) { 1492 if (!buffer_list) {
1492 nvgpu_mutex_release(&vm->update_gmmu_lock); 1493 nvgpu_mutex_release(&vm->update_gmmu_lock);
1493 return -ENOMEM; 1494 return -ENOMEM;
@@ -1571,7 +1572,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
1571 gk20a_vm_mapping_batch_finish_locked(vm, &batch); 1572 gk20a_vm_mapping_batch_finish_locked(vm, &batch);
1572 nvgpu_mutex_release(&vm->update_gmmu_lock); 1573 nvgpu_mutex_release(&vm->update_gmmu_lock);
1573 1574
1574 nvgpu_kfree(mapped_buffers); 1575 nvgpu_big_free(mapped_buffers);
1575} 1576}
1576 1577
1577static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, 1578static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index d7f6cb9a..5b96726f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -791,33 +791,6 @@ int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
791extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; 791extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
792extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; 792extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
793 793
794static inline void *nvgpu_kalloc(size_t size, bool clear)
795{
796 void *p;
797
798 if (size > PAGE_SIZE) {
799 if (clear)
800 p = vzalloc(size);
801 else
802 p = vmalloc(size);
803 } else {
804 if (clear)
805 p = kzalloc(size, GFP_KERNEL);
806 else
807 p = kmalloc(size, GFP_KERNEL);
808 }
809
810 return p;
811}
812
813static inline void nvgpu_kfree(void *p)
814{
815 if (virt_addr_valid(p))
816 kfree(p);
817 else
818 vfree(p);
819}
820
821int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd, 794int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd,
822 u64 *buffer_id, u64 *buffer_len); 795 u64 *buffer_id, u64 *buffer_len);
823 796
diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
index 3d983e77..c08e40a6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/kmem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
@@ -17,6 +17,12 @@
17#ifndef NVGPU_KMEM_H 17#ifndef NVGPU_KMEM_H
18#define NVGPU_KMEM_H 18#define NVGPU_KMEM_H
19 19
20#include <linux/mm.h>
21#include <linux/slab.h>
22#include <linux/vmalloc.h>
23
24#include <asm/page.h>
25
20struct gk20a; 26struct gk20a;
21 27
22/* 28/*
@@ -37,4 +43,80 @@ void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache);
37void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache); 43void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache);
38void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr); 44void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr);
39 45
46static inline void *__nvgpu_big_alloc(size_t size, bool clear)
47{
48 void *p;
49
50 if (size > PAGE_SIZE) {
51 if (clear)
52 p = vzalloc(size);
53 else
54 p = vmalloc(size);
55 } else {
56 if (clear)
57 p = kzalloc(size, GFP_KERNEL);
58 else
59 p = kmalloc(size, GFP_KERNEL);
60 }
61
62 return p;
63}
64
65/**
66 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
67 *
68 * @size - Size of the allocation.
69 *
70 * On some platforms (i.e Linux) it is possible to allocate memory directly
71 * mapped into the kernel's address space (kmalloc) or allocate discontiguous
72 * pages which are then mapped into a special kernel address range. Each type
73 * of allocation has pros and cons. kmalloc() for instance lets you allocate
74 * small buffers more space efficiently but vmalloc() allows you to successfully
75 * allocate much larger buffers without worrying about fragmentation as much
76 * (but will allocate in multiples of page size).
77 *
78 * This function aims to provide the right allocation for when buffers are of
79 * variable size. In some cases the code doesn't know ahead of time if the
80 * buffer is going to be big or small so this does the check for you and
81 * provides the right type of memory allocation.
82 *
83 * Returns a pointer to a virtual address range that the kernel can access or
84 * %NULL on failure.
85 */
86static inline void *nvgpu_big_malloc(size_t size)
87{
88 return __nvgpu_big_alloc(size, false);
89}
90
91/**
92 * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
93 *
94 * @size - Size of the allocation.
95 *
96 * Zeroed memory version of nvgpu_big_malloc().
97 */
98static inline void *nvgpu_big_zalloc(size_t size)
99{
100 return __nvgpu_big_alloc(size, true);
101}
102
103/**
104 * nvgpu_big_free - Free and alloc from nvgpu_big_zalloc() or
105 * nvgpu_big_malloc().
106 *
107 * @p - A pointer allocated by nvgpu_big_zalloc() or nvgpu_big_malloc().
108 */
109static inline void nvgpu_big_free(void *p)
110{
111 /*
112 * This will have to be fixed eventually. Allocs that use
113 * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
114 * when freeing.
115 */
116 if (virt_addr_valid(p))
117 kfree(p);
118 else
119 vfree(p);
120}
121
40#endif 122#endif