gpu: nvgpu: Give nvgpu_kalloc a less generic name

Change nvgpu_kalloc() to nvgpu_big_[mz]alloc(). This is necessary since the natural free function name for this is nvgpu_kfree() but that conflicts with nvgpu_k[mz]alloc() (implemented in a subsequent patch). This API exists becasue not all allocation sizes can be determined at compile time and in some cases sizes may vary across the system page size. Thus always using kmalloc() could lead to OOM errors due to fragmentation. But always using vmalloc() is wastful of memory for small allocations. This API tries to alleviate those problems. Bug 1799159 Bug 1823380 Change-Id: I49ec5292ce13bcdecf112afbb4a0cfffeeb5ecfc Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1283827 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Alex Waterman <alexw@nvidia.com> 2017-01-11 19:58:14 -0500
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-03-03 13:34:43 -0500
commit: 3966efc2e58f1802411f44fd00967dde448f278d (patch)
tree: b6cf822abc638b79acbd12b749a97ab5507a6fe9 /drivers
parent: 76b78b6fdcb0bbed72645aaa85de6013e2b135c3 (diff)
6 files changed, 103 insertions, 45 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 6eb1cb06..f228110e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -30,6 +30,8 @@
 #include <linux/circ_buf.h>
 #include <nvgpu/semaphore.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/kmem.h>
 #include "gk20a.h"
 #include "debug_gk20a.h"
@@ -37,8 +39,6 @@
 #include "dbg_gpu_gk20a.h"
 #include "fence_gk20a.h"
-#include <nvgpu/timers.h>
 #include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -986,7 +986,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
        memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
        gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
-        nvgpu_kfree(ch->gpfifo.pipe);
+        nvgpu_big_free(ch->gpfifo.pipe);
        memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
 #if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1856,9 +1856,8 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        }
        if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
-                c->gpfifo.pipe = nvgpu_kalloc(
+                c->gpfifo.pipe = nvgpu_big_malloc(
-                                gpfifo_size * sizeof(struct nvgpu_gpfifo),
+                                gpfifo_size * sizeof(struct nvgpu_gpfifo));
-                                false);
                if (!c->gpfifo.pipe) {
                        err = -ENOMEM;
                        goto clean_up_unmap;
@@ -1928,7 +1927,7 @@ clean_up_sync:
                c->sync = NULL;
        }
 clean_up_unmap:
-        nvgpu_kfree(c->gpfifo.pipe);
+        nvgpu_big_free(c->gpfifo.pipe);
        gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
 clean_up:
        memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
@@ -2058,12 +2057,12 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
        if (!g) {
                size = count * sizeof(struct nvgpu_gpfifo);
                if (size) {
-                        g = nvgpu_kalloc(size, false);
+                        g = nvgpu_big_malloc(size);
                        if (!g)
                                return;
                        if (copy_from_user(g, user_gpfifo, size)) {
-                                nvgpu_kfree(g);
+                                nvgpu_big_free(g);
                                return;
                        }
                }
@@ -2075,7 +2074,7 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
                trace_write_pushbuffer(c, gp);
        if (gpfifo_allocated)
-                nvgpu_kfree(g);
+                nvgpu_big_free(g);
 }
 static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index f6290e1d..4a42e03f 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -24,6 +24,8 @@
 #include <linux/dma-buf.h>
 #include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
 #include "gk20a.h"
 #include "gr_gk20a.h"
 #include "dbg_gpu_gk20a.h"
@@ -817,7 +819,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
                goto fail_dmabuf_put;
        }
-        buffer = nvgpu_kalloc(access_limit_size, true);
+        buffer = nvgpu_big_zalloc(access_limit_size);
        if (!buffer) {
                err = -ENOMEM;
                goto fail_dmabuf_put;
@@ -863,7 +865,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
 fail_idle:
        gk20a_idle(g->dev);
 fail_free_buffer:
-        nvgpu_kfree(buffer);
+        nvgpu_big_free(buffer);
 fail_dmabuf_put:
        dma_buf_put(dmabuf);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index cb4f8007..36b85f3b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -31,6 +31,7 @@
 #include <linux/bsearch.h>
 #include <trace/events/gk20a.h>
+#include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
@@ -3423,7 +3424,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
        gr->ctx_vars.local_golden_image = NULL;
        if (gr->ctx_vars.hwpm_ctxsw_buffer_offset_map)
-                nvgpu_kfree(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
+                nvgpu_big_free(gr->ctx_vars.hwpm_ctxsw_buffer_offset_map);
        gr->ctx_vars.hwpm_ctxsw_buffer_offset_map = NULL;
        gk20a_comptag_allocator_destroy(&gr->comp_tags);
@@ -8054,7 +8055,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
        hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
        map_size = hwpm_ctxsw_reg_count_max * sizeof(*map);
-        map = nvgpu_kalloc(map_size, true);
+        map = nvgpu_big_zalloc(map_size);
        if (!map)
                return -ENOMEM;
@@ -8144,7 +8145,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
        return 0;
 cleanup:
        gk20a_err(dev_from_gk20a(g), "Failed to create HWPM buffer offset map");
-        nvgpu_kfree(map);
+        nvgpu_big_free(map);
        return -EINVAL;
 }
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index c95e744e..7a64f79b 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -31,6 +31,7 @@
 #include <uapi/linux/nvgpu.h>
 #include <trace/events/gk20a.h>
+#include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/allocator.h>
 #include <nvgpu/semaphore.h>
@@ -1486,8 +1487,8 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
        nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-        buffer_list = nvgpu_kalloc(sizeof(*buffer_list) *
+        buffer_list = nvgpu_big_zalloc(sizeof(*buffer_list) *
-                              vm->num_user_mapped_buffers, true);
+                                          vm->num_user_mapped_buffers);
        if (!buffer_list) {
                nvgpu_mutex_release(&vm->update_gmmu_lock);
                return -ENOMEM;
@@ -1571,7 +1572,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
        gk20a_vm_mapping_batch_finish_locked(vm, &batch);
        nvgpu_mutex_release(&vm->update_gmmu_lock);
-        nvgpu_kfree(mapped_buffers);
+        nvgpu_big_free(mapped_buffers);
 }
 static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index d7f6cb9a..5b96726f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -791,33 +791,6 @@ int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
 extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
 extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
-static inline void *nvgpu_kalloc(size_t size, bool clear)
-{
-        void *p;
-        if (size > PAGE_SIZE) {
-                if (clear)
-                        p = vzalloc(size);
-                else
-                        p = vmalloc(size);
-        } else {
-                if (clear)
-                        p = kzalloc(size, GFP_KERNEL);
-                else
-                        p = kmalloc(size, GFP_KERNEL);
-        }
-        return p;
-}
-static inline void nvgpu_kfree(void *p)
-{
-        if (virt_addr_valid(p))
-                kfree(p);
-        else
-                vfree(p);
-}
 int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd,
                             u64 *buffer_id, u64 *buffer_len);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/kmem.h b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
index 3d983e77..c08e40a6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/kmem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/kmem.h
@@ -17,6 +17,12 @@
 #ifndef NVGPU_KMEM_H
 #define NVGPU_KMEM_H
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/page.h>
 struct gk20a;
 /*
@@ -37,4 +43,80 @@ void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache);
 void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache);
 void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr);
+static inline void *__nvgpu_big_alloc(size_t size, bool clear)
+{
+        void *p;
+        if (size > PAGE_SIZE) {
+                if (clear)
+                        p = vzalloc(size);
+                else
+                        p = vmalloc(size);
+        } else {
+                if (clear)
+                        p = kzalloc(size, GFP_KERNEL);
+                else
+                        p = kmalloc(size, GFP_KERNEL);
+        }
+        return p;
+}
+/**
+ * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
+ *
+ * @size - Size of the allocation.
+ *
+ * On some platforms (i.e Linux) it is possible to allocate memory directly
+ * mapped into the kernel's address space (kmalloc) or allocate discontiguous
+ * pages which are then mapped into a special kernel address range. Each type
+ * of allocation has pros and cons. kmalloc() for instance lets you allocate
+ * small buffers more space efficiently but vmalloc() allows you to successfully
+ * allocate much larger buffers without worrying about fragmentation as much
+ * (but will allocate in multiples of page size).
+ *
+ * This function aims to provide the right allocation for when buffers are of
+ * variable size. In some cases the code doesn't know ahead of time if the
+ * buffer is going to be big or small so this does the check for you and
+ * provides the right type of memory allocation.
+ *
+ * Returns a pointer to a virtual address range that the kernel can access or
+ * %NULL on failure.
+ */
+static inline void *nvgpu_big_malloc(size_t size)
+{
+        return __nvgpu_big_alloc(size, false);
+}
+/**
+ * nvgpu_big_malloc - Pick virtual or physical alloc based on @size
+ *
+ * @size - Size of the allocation.
+ *
+ * Zeroed memory version of nvgpu_big_malloc().
+ */
+static inline void *nvgpu_big_zalloc(size_t size)
+{
+        return __nvgpu_big_alloc(size, true);
+}
+/**
+ * nvgpu_big_free - Free and alloc from nvgpu_big_zalloc() or
+ *                  nvgpu_big_malloc().
+ *
+ * @p - A pointer allocated by nvgpu_big_zalloc() or nvgpu_big_malloc().
+ */
+static inline void nvgpu_big_free(void *p)
+{
+        /*
+         * This will have to be fixed eventually. Allocs that use
+         * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
+         * when freeing.
+         */
+        if (virt_addr_valid(p))
+                kfree(p);
+        else
+                vfree(p);
+}
 #endif
author	Alex Waterman <alexw@nvidia.com>	2017-01-11 19:58:14 -0500
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-03-03 13:34:43 -0500
commit	3966efc2e58f1802411f44fd00967dde448f278d (patch)
tree	b6cf822abc638b79acbd12b749a97ab5507a6fe9 /drivers
parent	76b78b6fdcb0bbed72645aaa85de6013e2b135c3 (diff)