1 files changed, 654 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c
new file mode 100644
index 00000000..a492fb35
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/stacktrace.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/bug.h>
+#include "gk20a/gk20a.h"
+#include "kmem_priv.h"
+/*
+ * Statically declared because this needs to be shared across all nvgpu driver
+ * instances. This makes sure that all kmem caches are _definitely_ uniquely
+ * named.
+ */
+static atomic_t kmem_cache_id;
+void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
+{
+        void *p;
+        if (size > PAGE_SIZE) {
+                if (clear)
+                        p = nvgpu_vzalloc(g, size);
+                else
+                        p = nvgpu_vmalloc(g, size);
+        } else {
+                if (clear)
+                        p = nvgpu_kzalloc(g, size);
+                else
+                        p = nvgpu_kmalloc(g, size);
+        }
+        return p;
+}
+void nvgpu_big_free(struct gk20a *g, void *p)
+{
+        /*
+         * This will have to be fixed eventually. Allocs that use
+         * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
+         * when freeing.
+         */
+        if (virt_addr_valid(p))
+                nvgpu_kfree(g, p);
+        else
+                nvgpu_vfree(g, p);
+}
+void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_kmalloc(g, size, ip);
+#else
+        alloc = kmalloc(size, GFP_KERNEL);
+#endif
+        kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+                 size, alloc, GFP_KERNEL);
+        return alloc;
+}
+void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_kzalloc(g, size, ip);
+#else
+        alloc = kzalloc(size, GFP_KERNEL);
+#endif
+        kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+                 size, alloc, GFP_KERNEL);
+        return alloc;
+}
+void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_kcalloc(g, n, size, ip);
+#else
+        alloc = kcalloc(n, size, GFP_KERNEL);
+#endif
+        kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+                 n * size, alloc, GFP_KERNEL);
+        return alloc;
+}
+void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_vmalloc(g, size, ip);
+#else
+        alloc = vmalloc(size);
+#endif
+        kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
+        return alloc;
+}
+void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_vzalloc(g, size, ip);
+#else
+        alloc = vzalloc(size);
+#endif
+        kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
+        return alloc;
+}
+void __nvgpu_kfree(struct gk20a *g, void *addr)
+{
+        kmem_dbg(g, "kfree: addr=0x%p", addr);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        __nvgpu_track_kfree(g, addr);
+#else
+        kfree(addr);
+#endif
+}
+void __nvgpu_vfree(struct gk20a *g, void *addr)
+{
+        kmem_dbg(g, "vfree: addr=0x%p", addr);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        __nvgpu_track_vfree(g, addr);
+#else
+        vfree(addr);
+#endif
+}
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+{
+        nvgpu_mutex_acquire(&tracker->lock);
+}
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+{
+        nvgpu_mutex_release(&tracker->lock);
+}
+void kmem_print_mem_alloc(struct gk20a *g,
+                         struct nvgpu_mem_alloc *alloc,
+                         struct seq_file *s)
+{
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+        int i;
+        __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
+                alloc->addr, alloc->size);
+        for (i = 0; i < alloc->stack_length; i++)
+                __pstat(s, "  %3d [<%p>] %pS\n", i,
+                        (void *)alloc->stack[i],
+                        (void *)alloc->stack[i]);
+        __pstat(s, "\n");
+#else
+        __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
+                alloc->addr, alloc->size, alloc->ip);
+#endif
+}
+static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+                           struct nvgpu_mem_alloc *alloc)
+{
+        alloc->allocs_entry.key_start = alloc->addr;
+        alloc->allocs_entry.key_end = alloc->addr + alloc->size;
+        nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
+        return 0;
+}
+static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
+        struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
+{
+        struct nvgpu_mem_alloc *alloc;
+        struct nvgpu_rbtree_node *node = NULL;
+        nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
+        if (!node)
+                return NULL;
+        alloc = nvgpu_mem_alloc_from_rbtree_node(node);
+        nvgpu_rbtree_unlink(node, &tracker->allocs);
+        return alloc;
+}
+static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+                                   unsigned long size, unsigned long real_size,
+                                   u64 addr, unsigned long ip)
+{
+        int ret;
+        struct nvgpu_mem_alloc *alloc;
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+        struct stack_trace stack_trace;
+#endif
+        alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
+        if (!alloc)
+                return -ENOMEM;
+        alloc->owner = tracker;
+        alloc->size = size;
+        alloc->real_size = real_size;
+        alloc->addr = addr;
+        alloc->ip = (void *)(uintptr_t)ip;
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+        stack_trace.max_entries = MAX_STACK_TRACE;
+        stack_trace.nr_entries = 0;
+        stack_trace.entries = alloc->stack;
+        /*
+         * This 4 here skips the 2 function calls that happen for all traced
+         * allocs due to nvgpu:
+         *
+         *   __nvgpu_save_kmem_alloc+0x7c/0x128
+         *   __nvgpu_track_kzalloc+0xcc/0xf8
+         *
+         * And the function calls that get made by the stack trace code itself.
+         * If the trace savings code changes this will likely have to change
+         * as well.
+         */
+        stack_trace.skip = 4;
+        save_stack_trace(&stack_trace);
+        alloc->stack_length = stack_trace.nr_entries;
+#endif
+        nvgpu_lock_tracker(tracker);
+        tracker->bytes_alloced += size;
+        tracker->bytes_alloced_real += real_size;
+        tracker->nr_allocs++;
+        /* Keep track of this for building a histogram later on. */
+        if (tracker->max_alloc < size)
+                tracker->max_alloc = size;
+        if (tracker->min_alloc > size)
+                tracker->min_alloc = size;
+        ret = nvgpu_add_alloc(tracker, alloc);
+        if (ret) {
+                WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
+                kfree(alloc);
+                nvgpu_unlock_tracker(tracker);
+                return ret;
+        }
+        nvgpu_unlock_tracker(tracker);
+        return 0;
+}
+static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+                                   u64 addr)
+{
+        struct nvgpu_mem_alloc *alloc;
+        nvgpu_lock_tracker(tracker);
+        alloc = nvgpu_rem_alloc(tracker, addr);
+        if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
+                nvgpu_unlock_tracker(tracker);
+                return -EINVAL;
+        }
+        memset((void *)alloc->addr, 0, alloc->size);
+        tracker->nr_frees++;
+        tracker->bytes_freed += alloc->size;
+        tracker->bytes_freed_real += alloc->real_size;
+        nvgpu_unlock_tracker(tracker);
+        return 0;
+}
+static void __nvgpu_check_valloc_size(unsigned long size)
+{
+        WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
+}
+static void __nvgpu_check_kalloc_size(size_t size)
+{
+        WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
+}
+void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
+                            unsigned long ip)
+{
+        void *alloc = vmalloc(size);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_valloc_size(size);
+        /*
+         * Ignore the return message. If this fails let's not cause any issues
+         * for the rest of the driver.
+         */
+        __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
+                            unsigned long ip)
+{
+        void *alloc = vzalloc(size);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_valloc_size(size);
+        /*
+         * Ignore the return message. If this fails let's not cause any issues
+         * for the rest of the driver.
+         */
+        __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+        void *alloc = kmalloc(size, GFP_KERNEL);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_kalloc_size(size);
+        __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
+{
+        void *alloc = kzalloc(size, GFP_KERNEL);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_kalloc_size(size);
+        __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
+                            unsigned long ip)
+{
+        void *alloc = kcalloc(n, size, GFP_KERNEL);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_kalloc_size(n * size);
+        __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
+                                roundup_pow_of_two(n * size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void __nvgpu_track_vfree(struct gk20a *g, void *addr)
+{
+        /*
+         * Often it is accepted practice to pass NULL pointers into free
+         * functions to save code.
+         */
+        if (!addr)
+                return;
+        __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
+        vfree(addr);
+}
+void __nvgpu_track_kfree(struct gk20a *g, void *addr)
+{
+        if (!addr)
+                return;
+        __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
+        kfree(addr);
+}
+static int __do_check_for_outstanding_allocs(
+        struct gk20a *g,
+        struct nvgpu_mem_alloc_tracker *tracker,
+        const char *type, bool silent)
+{
+        struct nvgpu_rbtree_node *node;
+        int count = 0;
+        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        while (node) {
+                struct nvgpu_mem_alloc *alloc =
+                        nvgpu_mem_alloc_from_rbtree_node(node);
+                if (!silent)
+                        kmem_print_mem_alloc(g, alloc, NULL);
+                count++;
+                nvgpu_rbtree_enum_next(&node, node);
+        }
+        return count;
+}
+/**
+ * check_for_outstanding_allocs - Count and display outstanding allocs
+ *
+ * @g      - The GPU.
+ * @silent - If set don't print anything about the allocs.
+ *
+ * Dump (or just count) the number of allocations left outstanding.
+ */
+static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
+{
+        int count = 0;
+        count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
+                                                   silent);
+        count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
+                                                   silent);
+        return count;
+}
+static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
+                                  void (*force_free_func)(const void *))
+{
+        struct nvgpu_rbtree_node *node;
+        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        while (node) {
+                struct nvgpu_mem_alloc *alloc =
+                        nvgpu_mem_alloc_from_rbtree_node(node);
+                if (force_free_func)
+                        force_free_func((void *)alloc->addr);
+                nvgpu_rbtree_unlink(node, &tracker->allocs);
+                kfree(alloc);
+                nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        }
+}
+/**
+ * nvgpu_kmem_cleanup - Cleanup the kmem tracking
+ *
+ * @g          - The GPU.
+ * @force_free - If set will also free leaked objects if possible.
+ *
+ * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
+ * is non-zero then the allocation made by nvgpu is also freed. This is risky,
+ * though, as it is possible that the memory is still in use by other parts of
+ * the GPU driver not aware that this has happened.
+ *
+ * In theory it should be fine if the GPU driver has been deinitialized and
+ * there are no bugs in that code. However, if there are any bugs in that code
+ * then they could likely manifest as odd crashes indeterminate amounts of time
+ * in the future. So use @force_free at your own risk.
+ */
+static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
+{
+        do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
+        do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
+}
+void nvgpu_kmem_fini(struct gk20a *g, int flags)
+{
+        int count;
+        bool silent, force_free;
+        if (!flags)
+                return;
+        silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
+        force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
+        count = check_for_outstanding_allocs(g, silent);
+        nvgpu_kmem_cleanup(g, force_free);
+        /*
+         * If we leak objects we can either BUG() out or just WARN(). In general
+         * it doesn't make sense to BUG() on here since leaking a few objects
+         * won't crash the kernel but it can be helpful for development.
+         *
+         * If neither flag is set then we just silently do nothing.
+         */
+        if (count > 0) {
+                if (flags & NVGPU_KMEM_FINI_WARN) {
+                        WARN(1, "Letting %d allocs leak!!\n", count);
+                } else if (flags & NVGPU_KMEM_FINI_BUG) {
+                        nvgpu_err(g, "Letting %d allocs leak!!", count);
+                        BUG();
+                }
+        }
+}
+int nvgpu_kmem_init(struct gk20a *g)
+{
+        int err;
+        g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
+        g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
+        if (!g->vmallocs || !g->kmallocs) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        g->vmallocs->name = "vmalloc";
+        g->kmallocs->name = "kmalloc";
+        g->vmallocs->allocs = NULL;
+        g->kmallocs->allocs = NULL;
+        nvgpu_mutex_init(&g->vmallocs->lock);
+        nvgpu_mutex_init(&g->kmallocs->lock);
+        g->vmallocs->min_alloc = PAGE_SIZE;
+        g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
+        /*
+         * This needs to go after all the other initialization since they use
+         * the nvgpu_kzalloc() API.
+         */
+        g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
+                                                sizeof(struct nvgpu_mem_alloc));
+        g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
+                                                sizeof(struct nvgpu_mem_alloc));
+        if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
+                err = -ENOMEM;
+                if (g->vmallocs->allocs_cache)
+                        nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
+                if (g->kmallocs->allocs_cache)
+                        nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
+                goto fail;
+        }
+        return 0;
+fail:
+        if (g->vmallocs)
+                kfree(g->vmallocs);
+        if (g->kmallocs)
+                kfree(g->kmallocs);
+        return err;
+}
+#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
+int nvgpu_kmem_init(struct gk20a *g)
+{
+        return 0;
+}
+void nvgpu_kmem_fini(struct gk20a *g, int flags)
+{
+}
+#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
+struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
+{
+        struct nvgpu_kmem_cache *cache =
+                nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
+        if (!cache)
+                return NULL;
+        cache->g = g;
+        snprintf(cache->name, sizeof(cache->name),
+                 "nvgpu-cache-0x%p-%d-%d", g, (int)size,
+                 atomic_inc_return(&kmem_cache_id));
+        cache->cache = kmem_cache_create(cache->name,
+                                         size, size, 0, NULL);
+        if (!cache->cache) {
+                nvgpu_kfree(g, cache);
+                return NULL;
+        }
+        return cache;
+}
+void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
+{
+        struct gk20a *g = cache->g;
+        kmem_cache_destroy(cache->cache);
+        nvgpu_kfree(g, cache);
+}
+void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
+{
+        return kmem_cache_alloc(cache->cache, GFP_KERNEL);
+}
+void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
+{
+        kmem_cache_free(cache->cache, ptr);
+}

diff --git a/drivers/gpu/nvgpu/common/linux/kmem.c b/drivers/gpu/nvgpu/common/linux/kmem.c new file mode 100644 index 00000000..a492fb35 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/kmem.c
@@ -0,0 +1,654 @@
	1	/*
	2	* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <linux/mm.h>
	18	#include <linux/slab.h>
	19	#include <linux/debugfs.h>
	20	#include <linux/seq_file.h>
	21	#include <linux/vmalloc.h>
	22	#include <linux/stacktrace.h>
	23
	24	#include <nvgpu/lock.h>
	25	#include <nvgpu/kmem.h>
	26	#include <nvgpu/atomic.h>
	27	#include <nvgpu/bug.h>
	28
	29	#include "gk20a/gk20a.h"
	30
	31	#include "kmem_priv.h"
	32
	33	/*
	34	* Statically declared because this needs to be shared across all nvgpu driver
	35	* instances. This makes sure that all kmem caches are _definitely_ uniquely
	36	* named.
	37	*/
	38	static atomic_t kmem_cache_id;
	39
	40	void __nvgpu_big_alloc(struct gk20a g, size_t size, bool clear)
	41	{
	42	void *p;
	43
	44	if (size > PAGE_SIZE) {
	45	if (clear)
	46	p = nvgpu_vzalloc(g, size);
	47	else
	48	p = nvgpu_vmalloc(g, size);
	49	} else {
	50	if (clear)
	51	p = nvgpu_kzalloc(g, size);
	52	else
	53	p = nvgpu_kmalloc(g, size);
	54	}
	55
	56	return p;
	57	}
	58
	59	void nvgpu_big_free(struct gk20a g, void p)
	60	{
	61	/*
	62	* This will have to be fixed eventually. Allocs that use
	63	* nvgpu_big_[mz]alloc() will need to remember the size of the alloc
	64	* when freeing.
	65	*/
	66	if (virt_addr_valid(p))
	67	nvgpu_kfree(g, p);
	68	else
	69	nvgpu_vfree(g, p);
	70	}
	71
	72	void __nvgpu_kmalloc(struct gk20a g, size_t size, unsigned long ip)
	73	{
	74	void *alloc;
	75
	76	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	77	alloc = __nvgpu_track_kmalloc(g, size, ip);
	78	#else
	79	alloc = kmalloc(size, GFP_KERNEL);
	80	#endif
	81
	82	kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
	83	size, alloc, GFP_KERNEL);
	84
	85	return alloc;
	86	}
	87
	88	void __nvgpu_kzalloc(struct gk20a g, size_t size, unsigned long ip)
	89	{
	90	void *alloc;
	91
	92	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	93	alloc = __nvgpu_track_kzalloc(g, size, ip);
	94	#else
	95	alloc = kzalloc(size, GFP_KERNEL);
	96	#endif
	97
	98	kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
	99	size, alloc, GFP_KERNEL);
	100
	101	return alloc;
	102	}
	103
	104	void __nvgpu_kcalloc(struct gk20a g, size_t n, size_t size, unsigned long ip)
	105	{
	106	void *alloc;
	107
	108	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	109	alloc = __nvgpu_track_kcalloc(g, n, size, ip);
	110	#else
	111	alloc = kcalloc(n, size, GFP_KERNEL);
	112	#endif
	113
	114	kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
	115	n * size, alloc, GFP_KERNEL);
	116
	117	return alloc;
	118	}
	119
	120	void __nvgpu_vmalloc(struct gk20a g, unsigned long size, unsigned long ip)
	121	{
	122	void *alloc;
	123
	124	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	125	alloc = __nvgpu_track_vmalloc(g, size, ip);
	126	#else
	127	alloc = vmalloc(size);
	128	#endif
	129
	130	kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
	131
	132	return alloc;
	133	}
	134
	135	void __nvgpu_vzalloc(struct gk20a g, unsigned long size, unsigned long ip)
	136	{
	137	void *alloc;
	138
	139	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	140	alloc = __nvgpu_track_vzalloc(g, size, ip);
	141	#else
	142	alloc = vzalloc(size);
	143	#endif
	144
	145	kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
	146
	147	return alloc;
	148	}
	149
	150	void __nvgpu_kfree(struct gk20a g, void addr)
	151	{
	152	kmem_dbg(g, "kfree: addr=0x%p", addr);
	153	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	154	__nvgpu_track_kfree(g, addr);
	155	#else
	156	kfree(addr);
	157	#endif
	158	}
	159
	160	void __nvgpu_vfree(struct gk20a g, void addr)
	161	{
	162	kmem_dbg(g, "vfree: addr=0x%p", addr);
	163	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	164	__nvgpu_track_vfree(g, addr);
	165	#else
	166	vfree(addr);
	167	#endif
	168	}
	169
	170	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	171
	172	void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
	173	{
	174	nvgpu_mutex_acquire(&tracker->lock);
	175	}
	176
	177	void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
	178	{
	179	nvgpu_mutex_release(&tracker->lock);
	180	}
	181
	182	void kmem_print_mem_alloc(struct gk20a *g,
	183	struct nvgpu_mem_alloc *alloc,
	184	struct seq_file *s)
	185	{
	186	#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
	187	int i;
	188
	189	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
	190	alloc->addr, alloc->size);
	191	for (i = 0; i < alloc->stack_length; i++)
	192	__pstat(s, " %3d [<%p>] %pS\n", i,
	193	(void *)alloc->stack[i],
	194	(void *)alloc->stack[i]);
	195	__pstat(s, "\n");
	196	#else
	197	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
	198	alloc->addr, alloc->size, alloc->ip);
	199	#endif
	200	}
	201
	202	static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
	203	struct nvgpu_mem_alloc *alloc)
	204	{
	205	alloc->allocs_entry.key_start = alloc->addr;
	206	alloc->allocs_entry.key_end = alloc->addr + alloc->size;
	207
	208	nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
	209	return 0;
	210	}
	211
	212	static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
	213	struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
	214	{
	215	struct nvgpu_mem_alloc *alloc;
	216	struct nvgpu_rbtree_node *node = NULL;
	217
	218	nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
	219	if (!node)
	220	return NULL;
	221
	222	alloc = nvgpu_mem_alloc_from_rbtree_node(node);
	223
	224	nvgpu_rbtree_unlink(node, &tracker->allocs);
	225
	226	return alloc;
	227	}
	228
	229	static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
	230	unsigned long size, unsigned long real_size,
	231	u64 addr, unsigned long ip)
	232	{
	233	int ret;
	234	struct nvgpu_mem_alloc *alloc;
	235	#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
	236	struct stack_trace stack_trace;
	237	#endif
	238
	239	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
	240	if (!alloc)
	241	return -ENOMEM;
	242
	243	alloc->owner = tracker;
	244	alloc->size = size;
	245	alloc->real_size = real_size;
	246	alloc->addr = addr;
	247	alloc->ip = (void *)(uintptr_t)ip;
	248
	249	#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
	250	stack_trace.max_entries = MAX_STACK_TRACE;
	251	stack_trace.nr_entries = 0;
	252	stack_trace.entries = alloc->stack;
	253	/*
	254	* This 4 here skips the 2 function calls that happen for all traced
	255	* allocs due to nvgpu:
	256	*
	257	* __nvgpu_save_kmem_alloc+0x7c/0x128
	258	* __nvgpu_track_kzalloc+0xcc/0xf8
	259	*
	260	* And the function calls that get made by the stack trace code itself.
	261	* If the trace savings code changes this will likely have to change
	262	* as well.
	263	*/
	264	stack_trace.skip = 4;
	265	save_stack_trace(&stack_trace);
	266	alloc->stack_length = stack_trace.nr_entries;
	267	#endif
	268
	269	nvgpu_lock_tracker(tracker);
	270	tracker->bytes_alloced += size;
	271	tracker->bytes_alloced_real += real_size;
	272	tracker->nr_allocs++;
	273
	274	/* Keep track of this for building a histogram later on. */
	275	if (tracker->max_alloc < size)
	276	tracker->max_alloc = size;
	277	if (tracker->min_alloc > size)
	278	tracker->min_alloc = size;
	279
	280	ret = nvgpu_add_alloc(tracker, alloc);
	281	if (ret) {
	282	WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
	283	kfree(alloc);
	284	nvgpu_unlock_tracker(tracker);
	285	return ret;
	286	}
	287	nvgpu_unlock_tracker(tracker);
	288
	289	return 0;
	290	}
	291
	292	static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
	293	u64 addr)
	294	{
	295	struct nvgpu_mem_alloc *alloc;
	296
	297	nvgpu_lock_tracker(tracker);
	298	alloc = nvgpu_rem_alloc(tracker, addr);
	299	if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
	300	nvgpu_unlock_tracker(tracker);
	301	return -EINVAL;
	302	}
	303
	304	memset((void *)alloc->addr, 0, alloc->size);
	305
	306	tracker->nr_frees++;
	307	tracker->bytes_freed += alloc->size;
	308	tracker->bytes_freed_real += alloc->real_size;
	309	nvgpu_unlock_tracker(tracker);
	310
	311	return 0;
	312	}
	313
	314	static void __nvgpu_check_valloc_size(unsigned long size)
	315	{
	316	WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
	317	}
	318
	319	static void __nvgpu_check_kalloc_size(size_t size)
	320	{
	321	WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
	322	}
	323
	324	void __nvgpu_track_vmalloc(struct gk20a g, unsigned long size,
	325	unsigned long ip)
	326	{
	327	void *alloc = vmalloc(size);
	328
	329	if (!alloc)
	330	return NULL;
	331
	332	__nvgpu_check_valloc_size(size);
	333
	334	/*
	335	* Ignore the return message. If this fails let's not cause any issues
	336	* for the rest of the driver.
	337	*/
	338	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
	339	(u64)(uintptr_t)alloc, ip);
	340
	341	return alloc;
	342	}
	343
	344	void __nvgpu_track_vzalloc(struct gk20a g, unsigned long size,
	345	unsigned long ip)
	346	{
	347	void *alloc = vzalloc(size);
	348
	349	if (!alloc)
	350	return NULL;
	351
	352	__nvgpu_check_valloc_size(size);
	353
	354	/*
	355	* Ignore the return message. If this fails let's not cause any issues
	356	* for the rest of the driver.
	357	*/
	358	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
	359	(u64)(uintptr_t)alloc, ip);
	360
	361	return alloc;
	362	}
	363
	364	void __nvgpu_track_kmalloc(struct gk20a g, size_t size, unsigned long ip)
	365	{
	366	void *alloc = kmalloc(size, GFP_KERNEL);
	367
	368	if (!alloc)
	369	return NULL;
	370
	371	__nvgpu_check_kalloc_size(size);
	372
	373	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
	374	(u64)(uintptr_t)alloc, ip);
	375
	376	return alloc;
	377	}
	378
	379	void __nvgpu_track_kzalloc(struct gk20a g, size_t size, unsigned long ip)
	380	{
	381	void *alloc = kzalloc(size, GFP_KERNEL);
	382
	383	if (!alloc)
	384	return NULL;
	385
	386	__nvgpu_check_kalloc_size(size);
	387
	388	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
	389	(u64)(uintptr_t)alloc, ip);
	390
	391	return alloc;
	392	}
	393
	394	void __nvgpu_track_kcalloc(struct gk20a g, size_t n, size_t size,
	395	unsigned long ip)
	396	{
	397	void *alloc = kcalloc(n, size, GFP_KERNEL);
	398
	399	if (!alloc)
	400	return NULL;
	401
	402	__nvgpu_check_kalloc_size(n * size);
	403
	404	__nvgpu_save_kmem_alloc(g->kmallocs, n * size,
	405	roundup_pow_of_two(n * size),
	406	(u64)(uintptr_t)alloc, ip);
	407
	408	return alloc;
	409	}
	410
	411	void __nvgpu_track_vfree(struct gk20a g, void addr)
	412	{
	413	/*
	414	* Often it is accepted practice to pass NULL pointers into free
	415	* functions to save code.
	416	*/
	417	if (!addr)
	418	return;
	419
	420	__nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
	421
	422	vfree(addr);
	423	}
	424
	425	void __nvgpu_track_kfree(struct gk20a g, void addr)
	426	{
	427	if (!addr)
	428	return;
	429
	430	__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
	431
	432	kfree(addr);
	433	}
	434
	435	static int __do_check_for_outstanding_allocs(
	436	struct gk20a *g,
	437	struct nvgpu_mem_alloc_tracker *tracker,
	438	const char *type, bool silent)
	439	{
	440	struct nvgpu_rbtree_node *node;
	441	int count = 0;
	442
	443	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
	444	while (node) {
	445	struct nvgpu_mem_alloc *alloc =
	446	nvgpu_mem_alloc_from_rbtree_node(node);
	447
	448	if (!silent)
	449	kmem_print_mem_alloc(g, alloc, NULL);
	450
	451	count++;
	452	nvgpu_rbtree_enum_next(&node, node);
	453	}
	454
	455	return count;
	456	}
	457
	458	/**
	459	* check_for_outstanding_allocs - Count and display outstanding allocs
	460	*
	461	* @g - The GPU.
	462	* @silent - If set don't print anything about the allocs.
	463	*
	464	* Dump (or just count) the number of allocations left outstanding.
	465	*/
	466	static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
	467	{
	468	int count = 0;
	469
	470	count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
	471	silent);
	472	count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
	473	silent);
	474
	475	return count;
	476	}
	477
	478	static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
	479	void (force_free_func)(const void ))
	480	{
	481	struct nvgpu_rbtree_node *node;
	482
	483	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
	484	while (node) {
	485	struct nvgpu_mem_alloc *alloc =
	486	nvgpu_mem_alloc_from_rbtree_node(node);
	487
	488	if (force_free_func)
	489	force_free_func((void *)alloc->addr);
	490
	491	nvgpu_rbtree_unlink(node, &tracker->allocs);
	492	kfree(alloc);
	493
	494	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
	495	}
	496	}
	497
	498	/**
	499	* nvgpu_kmem_cleanup - Cleanup the kmem tracking
	500	*
	501	* @g - The GPU.
	502	* @force_free - If set will also free leaked objects if possible.
	503	*
	504	* Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
	505	* is non-zero then the allocation made by nvgpu is also freed. This is risky,
	506	* though, as it is possible that the memory is still in use by other parts of
	507	* the GPU driver not aware that this has happened.
	508	*
	509	* In theory it should be fine if the GPU driver has been deinitialized and
	510	* there are no bugs in that code. However, if there are any bugs in that code
	511	* then they could likely manifest as odd crashes indeterminate amounts of time
	512	* in the future. So use @force_free at your own risk.
	513	*/
	514	static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
	515	{
	516	do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
	517	do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
	518	}
	519
	520	void nvgpu_kmem_fini(struct gk20a *g, int flags)
	521	{
	522	int count;
	523	bool silent, force_free;
	524
	525	if (!flags)
	526	return;
	527
	528	silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
	529	force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
	530
	531	count = check_for_outstanding_allocs(g, silent);
	532	nvgpu_kmem_cleanup(g, force_free);
	533
	534	/*
	535	* If we leak objects we can either BUG() out or just WARN(). In general
	536	* it doesn't make sense to BUG() on here since leaking a few objects
	537	* won't crash the kernel but it can be helpful for development.
	538	*
	539	* If neither flag is set then we just silently do nothing.
	540	*/
	541	if (count > 0) {
	542	if (flags & NVGPU_KMEM_FINI_WARN) {
	543	WARN(1, "Letting %d allocs leak!!\n", count);
	544	} else if (flags & NVGPU_KMEM_FINI_BUG) {
	545	nvgpu_err(g, "Letting %d allocs leak!!", count);
	546	BUG();
	547	}
	548	}
	549	}
	550
	551	int nvgpu_kmem_init(struct gk20a *g)
	552	{
	553	int err;
	554
	555	g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
	556	g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
	557
	558	if (!g->vmallocs \|\| !g->kmallocs) {
	559	err = -ENOMEM;
	560	goto fail;
	561	}
	562
	563	g->vmallocs->name = "vmalloc";
	564	g->kmallocs->name = "kmalloc";
	565
	566	g->vmallocs->allocs = NULL;
	567	g->kmallocs->allocs = NULL;
	568
	569	nvgpu_mutex_init(&g->vmallocs->lock);
	570	nvgpu_mutex_init(&g->kmallocs->lock);
	571
	572	g->vmallocs->min_alloc = PAGE_SIZE;
	573	g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
	574
	575	/*
	576	* This needs to go after all the other initialization since they use
	577	* the nvgpu_kzalloc() API.
	578	*/
	579	g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
	580	sizeof(struct nvgpu_mem_alloc));
	581	g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
	582	sizeof(struct nvgpu_mem_alloc));
	583
	584	if (!g->vmallocs->allocs_cache \|\| !g->kmallocs->allocs_cache) {
	585	err = -ENOMEM;
	586	if (g->vmallocs->allocs_cache)
	587	nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
	588	if (g->kmallocs->allocs_cache)
	589	nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
	590	goto fail;
	591	}
	592
	593	return 0;
	594
	595	fail:
	596	if (g->vmallocs)
	597	kfree(g->vmallocs);
	598	if (g->kmallocs)
	599	kfree(g->kmallocs);
	600	return err;
	601	}
	602
	603	#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
	604
	605	int nvgpu_kmem_init(struct gk20a *g)
	606	{
	607	return 0;
	608	}
	609
	610	void nvgpu_kmem_fini(struct gk20a *g, int flags)
	611	{
	612	}
	613	#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
	614
	615	struct nvgpu_kmem_cache nvgpu_kmem_cache_create(struct gk20a g, size_t size)
	616	{
	617	struct nvgpu_kmem_cache *cache =
	618	nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
	619
	620	if (!cache)
	621	return NULL;
	622
	623	cache->g = g;
	624
	625	snprintf(cache->name, sizeof(cache->name),
	626	"nvgpu-cache-0x%p-%d-%d", g, (int)size,
	627	atomic_inc_return(&kmem_cache_id));
	628	cache->cache = kmem_cache_create(cache->name,
	629	size, size, 0, NULL);
	630	if (!cache->cache) {
	631	nvgpu_kfree(g, cache);
	632	return NULL;
	633	}
	634
	635	return cache;
	636	}
	637
	638	void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
	639	{
	640	struct gk20a *g = cache->g;
	641
	642	kmem_cache_destroy(cache->cache);
	643	nvgpu_kfree(g, cache);
	644	}
	645
	646	void nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache cache)
	647	{
	648	return kmem_cache_alloc(cache->cache, GFP_KERNEL);
	649	}
	650
	651	void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache cache, void ptr)
	652	{
	653	kmem_cache_free(cache->cache, ptr);
	654	}