1 files changed, 653 insertions, 0 deletions
diff --git a/include/os/linux/kmem.c b/include/os/linux/kmem.c
new file mode 100644
index 0000000..395cc45
--- /dev/null
+++ b/include/os/linux/kmem.c
@@ -0,0 +1,653 @@
+/*
+ * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+#include <linux/stacktrace.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/gk20a.h>
+#include "kmem_priv.h"
+/*
+ * Statically declared because this needs to be shared across all nvgpu driver
+ * instances. This makes sure that all kmem caches are _definitely_ uniquely
+ * named.
+ */
+static atomic_t kmem_cache_id;
+void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
+{
+        void *p;
+        if (size > PAGE_SIZE) {
+                if (clear)
+                        p = nvgpu_vzalloc(g, size);
+                else
+                        p = nvgpu_vmalloc(g, size);
+        } else {
+                if (clear)
+                        p = nvgpu_kzalloc(g, size);
+                else
+                        p = nvgpu_kmalloc(g, size);
+        }
+        return p;
+}
+void nvgpu_big_free(struct gk20a *g, void *p)
+{
+        /*
+         * This will have to be fixed eventually. Allocs that use
+         * nvgpu_big_[mz]alloc() will need to remember the size of the alloc
+         * when freeing.
+         */
+        if (is_vmalloc_addr(p))
+                nvgpu_vfree(g, p);
+        else
+                nvgpu_kfree(g, p);
+}
+void *__nvgpu_kmalloc(struct gk20a *g, size_t size, void *ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_kmalloc(g, size, ip);
+#else
+        alloc = kmalloc(size, GFP_KERNEL);
+#endif
+        kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+                 size, alloc, GFP_KERNEL);
+        return alloc;
+}
+void *__nvgpu_kzalloc(struct gk20a *g, size_t size, void *ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_kzalloc(g, size, ip);
+#else
+        alloc = kzalloc(size, GFP_KERNEL);
+#endif
+        kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+                 size, alloc, GFP_KERNEL);
+        return alloc;
+}
+void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, void *ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_kcalloc(g, n, size, ip);
+#else
+        alloc = kcalloc(n, size, GFP_KERNEL);
+#endif
+        kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
+                 n * size, alloc, GFP_KERNEL);
+        return alloc;
+}
+void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, void *ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_vmalloc(g, size, ip);
+#else
+        alloc = vmalloc(size);
+#endif
+        kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
+        return alloc;
+}
+void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, void *ip)
+{
+        void *alloc;
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        alloc = __nvgpu_track_vzalloc(g, size, ip);
+#else
+        alloc = vzalloc(size);
+#endif
+        kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
+        return alloc;
+}
+void __nvgpu_kfree(struct gk20a *g, void *addr)
+{
+        kmem_dbg(g, "kfree: addr=0x%p", addr);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        __nvgpu_track_kfree(g, addr);
+#else
+        kfree(addr);
+#endif
+}
+void __nvgpu_vfree(struct gk20a *g, void *addr)
+{
+        kmem_dbg(g, "vfree: addr=0x%p", addr);
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+        __nvgpu_track_vfree(g, addr);
+#else
+        vfree(addr);
+#endif
+}
+#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
+void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+{
+        nvgpu_mutex_acquire(&tracker->lock);
+}
+void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
+{
+        nvgpu_mutex_release(&tracker->lock);
+}
+void kmem_print_mem_alloc(struct gk20a *g,
+                         struct nvgpu_mem_alloc *alloc,
+                         struct seq_file *s)
+{
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+        int i;
+        __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
+                alloc->addr, alloc->size);
+        for (i = 0; i < alloc->stack_length; i++)
+                __pstat(s, "  %3d [<%p>] %pS\n", i,
+                        (void *)alloc->stack[i],
+                        (void *)alloc->stack[i]);
+        __pstat(s, "\n");
+#else
+        __pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
+                alloc->addr, alloc->size, alloc->ip);
+#endif
+}
+static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+                           struct nvgpu_mem_alloc *alloc)
+{
+        alloc->allocs_entry.key_start = alloc->addr;
+        alloc->allocs_entry.key_end = alloc->addr + alloc->size;
+        nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
+        return 0;
+}
+static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
+        struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
+{
+        struct nvgpu_mem_alloc *alloc;
+        struct nvgpu_rbtree_node *node = NULL;
+        nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
+        if (!node)
+                return NULL;
+        alloc = nvgpu_mem_alloc_from_rbtree_node(node);
+        nvgpu_rbtree_unlink(node, &tracker->allocs);
+        return alloc;
+}
+static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+                                   unsigned long size, unsigned long real_size,
+                                   u64 addr, void *ip)
+{
+        int ret;
+        struct nvgpu_mem_alloc *alloc;
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+        struct stack_trace stack_trace;
+#endif
+        alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
+        if (!alloc)
+                return -ENOMEM;
+        alloc->owner = tracker;
+        alloc->size = size;
+        alloc->real_size = real_size;
+        alloc->addr = addr;
+        alloc->ip = ip;
+#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
+        stack_trace.max_entries = MAX_STACK_TRACE;
+        stack_trace.nr_entries = 0;
+        stack_trace.entries = alloc->stack;
+        /*
+         * This 4 here skips the 2 function calls that happen for all traced
+         * allocs due to nvgpu:
+         *
+         *   __nvgpu_save_kmem_alloc+0x7c/0x128
+         *   __nvgpu_track_kzalloc+0xcc/0xf8
+         *
+         * And the function calls that get made by the stack trace code itself.
+         * If the trace savings code changes this will likely have to change
+         * as well.
+         */
+        stack_trace.skip = 4;
+        save_stack_trace(&stack_trace);
+        alloc->stack_length = stack_trace.nr_entries;
+#endif
+        nvgpu_lock_tracker(tracker);
+        tracker->bytes_alloced += size;
+        tracker->bytes_alloced_real += real_size;
+        tracker->nr_allocs++;
+        /* Keep track of this for building a histogram later on. */
+        if (tracker->max_alloc < size)
+                tracker->max_alloc = size;
+        if (tracker->min_alloc > size)
+                tracker->min_alloc = size;
+        ret = nvgpu_add_alloc(tracker, alloc);
+        if (ret) {
+                WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
+                kfree(alloc);
+                nvgpu_unlock_tracker(tracker);
+                return ret;
+        }
+        nvgpu_unlock_tracker(tracker);
+        return 0;
+}
+static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
+                                   u64 addr)
+{
+        struct nvgpu_mem_alloc *alloc;
+        nvgpu_lock_tracker(tracker);
+        alloc = nvgpu_rem_alloc(tracker, addr);
+        if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
+                nvgpu_unlock_tracker(tracker);
+                return -EINVAL;
+        }
+        memset((void *)alloc->addr, 0, alloc->size);
+        tracker->nr_frees++;
+        tracker->bytes_freed += alloc->size;
+        tracker->bytes_freed_real += alloc->real_size;
+        nvgpu_unlock_tracker(tracker);
+        return 0;
+}
+static void __nvgpu_check_valloc_size(unsigned long size)
+{
+        WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
+}
+static void __nvgpu_check_kalloc_size(size_t size)
+{
+        WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
+}
+void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
+                            void *ip)
+{
+        void *alloc = vmalloc(size);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_valloc_size(size);
+        /*
+         * Ignore the return message. If this fails let's not cause any issues
+         * for the rest of the driver.
+         */
+        __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
+                            void *ip)
+{
+        void *alloc = vzalloc(size);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_valloc_size(size);
+        /*
+         * Ignore the return message. If this fails let's not cause any issues
+         * for the rest of the driver.
+         */
+        __nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, void *ip)
+{
+        void *alloc = kmalloc(size, GFP_KERNEL);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_kalloc_size(size);
+        __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, void *ip)
+{
+        void *alloc = kzalloc(size, GFP_KERNEL);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_kalloc_size(size);
+        __nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
+                            void *ip)
+{
+        void *alloc = kcalloc(n, size, GFP_KERNEL);
+        if (!alloc)
+                return NULL;
+        __nvgpu_check_kalloc_size(n * size);
+        __nvgpu_save_kmem_alloc(g->kmallocs, n * size,
+                                roundup_pow_of_two(n * size),
+                                (u64)(uintptr_t)alloc, ip);
+        return alloc;
+}
+void __nvgpu_track_vfree(struct gk20a *g, void *addr)
+{
+        /*
+         * Often it is accepted practice to pass NULL pointers into free
+         * functions to save code.
+         */
+        if (!addr)
+                return;
+        __nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
+        vfree(addr);
+}
+void __nvgpu_track_kfree(struct gk20a *g, void *addr)
+{
+        if (!addr)
+                return;
+        __nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
+        kfree(addr);
+}
+static int __do_check_for_outstanding_allocs(
+        struct gk20a *g,
+        struct nvgpu_mem_alloc_tracker *tracker,
+        const char *type, bool silent)
+{
+        struct nvgpu_rbtree_node *node;
+        int count = 0;
+        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        while (node) {
+                struct nvgpu_mem_alloc *alloc =
+                        nvgpu_mem_alloc_from_rbtree_node(node);
+                if (!silent)
+                        kmem_print_mem_alloc(g, alloc, NULL);
+                count++;
+                nvgpu_rbtree_enum_next(&node, node);
+        }
+        return count;
+}
+/**
+ * check_for_outstanding_allocs - Count and display outstanding allocs
+ *
+ * @g      - The GPU.
+ * @silent - If set don't print anything about the allocs.
+ *
+ * Dump (or just count) the number of allocations left outstanding.
+ */
+static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
+{
+        int count = 0;
+        count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
+                                                   silent);
+        count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
+                                                   silent);
+        return count;
+}
+static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
+                                  void (*force_free_func)(const void *))
+{
+        struct nvgpu_rbtree_node *node;
+        nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        while (node) {
+                struct nvgpu_mem_alloc *alloc =
+                        nvgpu_mem_alloc_from_rbtree_node(node);
+                if (force_free_func)
+                        force_free_func((void *)alloc->addr);
+                nvgpu_rbtree_unlink(node, &tracker->allocs);
+                kfree(alloc);
+                nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
+        }
+}
+/**
+ * nvgpu_kmem_cleanup - Cleanup the kmem tracking
+ *
+ * @g          - The GPU.
+ * @force_free - If set will also free leaked objects if possible.
+ *
+ * Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
+ * is non-zero then the allocation made by nvgpu is also freed. This is risky,
+ * though, as it is possible that the memory is still in use by other parts of
+ * the GPU driver not aware that this has happened.
+ *
+ * In theory it should be fine if the GPU driver has been deinitialized and
+ * there are no bugs in that code. However, if there are any bugs in that code
+ * then they could likely manifest as odd crashes indeterminate amounts of time
+ * in the future. So use @force_free at your own risk.
+ */
+static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
+{
+        do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
+        do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
+}
+void nvgpu_kmem_fini(struct gk20a *g, int flags)
+{
+        int count;
+        bool silent, force_free;
+        if (!flags)
+                return;
+        silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
+        force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
+        count = check_for_outstanding_allocs(g, silent);
+        nvgpu_kmem_cleanup(g, force_free);
+        /*
+         * If we leak objects we can either BUG() out or just WARN(). In general
+         * it doesn't make sense to BUG() on here since leaking a few objects
+         * won't crash the kernel but it can be helpful for development.
+         *
+         * If neither flag is set then we just silently do nothing.
+         */
+        if (count > 0) {
+                if (flags & NVGPU_KMEM_FINI_WARN) {
+                        WARN(1, "Letting %d allocs leak!!\n", count);
+                } else if (flags & NVGPU_KMEM_FINI_BUG) {
+                        nvgpu_err(g, "Letting %d allocs leak!!", count);
+                        BUG();
+                }
+        }
+}
+int nvgpu_kmem_init(struct gk20a *g)
+{
+        int err;
+        g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
+        g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
+        if (!g->vmallocs || !g->kmallocs) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        g->vmallocs->name = "vmalloc";
+        g->kmallocs->name = "kmalloc";
+        g->vmallocs->allocs = NULL;
+        g->kmallocs->allocs = NULL;
+        nvgpu_mutex_init(&g->vmallocs->lock);
+        nvgpu_mutex_init(&g->kmallocs->lock);
+        g->vmallocs->min_alloc = PAGE_SIZE;
+        g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
+        /*
+         * This needs to go after all the other initialization since they use
+         * the nvgpu_kzalloc() API.
+         */
+        g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
+                                                sizeof(struct nvgpu_mem_alloc));
+        g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
+                                                sizeof(struct nvgpu_mem_alloc));
+        if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
+                err = -ENOMEM;
+                if (g->vmallocs->allocs_cache)
+                        nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
+                if (g->kmallocs->allocs_cache)
+                        nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
+                goto fail;
+        }
+        return 0;
+fail:
+        if (g->vmallocs)
+                kfree(g->vmallocs);
+        if (g->kmallocs)
+                kfree(g->kmallocs);
+        return err;
+}
+#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
+int nvgpu_kmem_init(struct gk20a *g)
+{
+        return 0;
+}
+void nvgpu_kmem_fini(struct gk20a *g, int flags)
+{
+}
+#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
+struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
+{
+        struct nvgpu_kmem_cache *cache =
+                nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
+        if (!cache)
+                return NULL;
+        cache->g = g;
+        snprintf(cache->name, sizeof(cache->name),
+                 "nvgpu-cache-0x%p-%d-%d", g, (int)size,
+                 atomic_inc_return(&kmem_cache_id));
+        cache->cache = kmem_cache_create(cache->name,
+                                         size, size, 0, NULL);
+        if (!cache->cache) {
+                nvgpu_kfree(g, cache);
+                return NULL;
+        }
+        return cache;
+}
+void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
+{
+        struct gk20a *g = cache->g;
+        kmem_cache_destroy(cache->cache);
+        nvgpu_kfree(g, cache);
+}
+void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
+{
+        return kmem_cache_alloc(cache->cache, GFP_KERNEL);
+}
+void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
+{
+        kmem_cache_free(cache->cache, ptr);
+}

diff --git a/include/os/linux/kmem.c b/include/os/linux/kmem.c new file mode 100644 index 0000000..395cc45 --- /dev/null +++ b/include/os/linux/kmem.c
@@ -0,0 +1,653 @@
	1	/*
	2	* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* This program is free software; you can redistribute it and/or modify it
	5	* under the terms and conditions of the GNU General Public License,
	6	* version 2, as published by the Free Software Foundation.
	7	*
	8	* This program is distributed in the hope it will be useful, but WITHOUT
	9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	11	* more details.
	12	*
	13	* You should have received a copy of the GNU General Public License
	14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	15	*/
	16
	17	#include <linux/mm.h>
	18	#include <linux/slab.h>
	19	#include <linux/debugfs.h>
	20	#include <linux/seq_file.h>
	21	#include <linux/vmalloc.h>
	22	#include <linux/stacktrace.h>
	23
	24	#include <nvgpu/lock.h>
	25	#include <nvgpu/kmem.h>
	26	#include <nvgpu/atomic.h>
	27	#include <nvgpu/bug.h>
	28	#include <nvgpu/gk20a.h>
	29
	30	#include "kmem_priv.h"
	31
	32	/*
	33	* Statically declared because this needs to be shared across all nvgpu driver
	34	* instances. This makes sure that all kmem caches are _definitely_ uniquely
	35	* named.
	36	*/
	37	static atomic_t kmem_cache_id;
	38
	39	void __nvgpu_big_alloc(struct gk20a g, size_t size, bool clear)
	40	{
	41	void *p;
	42
	43	if (size > PAGE_SIZE) {
	44	if (clear)
	45	p = nvgpu_vzalloc(g, size);
	46	else
	47	p = nvgpu_vmalloc(g, size);
	48	} else {
	49	if (clear)
	50	p = nvgpu_kzalloc(g, size);
	51	else
	52	p = nvgpu_kmalloc(g, size);
	53	}
	54
	55	return p;
	56	}
	57
	58	void nvgpu_big_free(struct gk20a g, void p)
	59	{
	60	/*
	61	* This will have to be fixed eventually. Allocs that use
	62	* nvgpu_big_[mz]alloc() will need to remember the size of the alloc
	63	* when freeing.
	64	*/
	65	if (is_vmalloc_addr(p))
	66	nvgpu_vfree(g, p);
	67	else
	68	nvgpu_kfree(g, p);
	69	}
	70
	71	void __nvgpu_kmalloc(struct gk20a g, size_t size, void *ip)
	72	{
	73	void *alloc;
	74
	75	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	76	alloc = __nvgpu_track_kmalloc(g, size, ip);
	77	#else
	78	alloc = kmalloc(size, GFP_KERNEL);
	79	#endif
	80
	81	kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
	82	size, alloc, GFP_KERNEL);
	83
	84	return alloc;
	85	}
	86
	87	void __nvgpu_kzalloc(struct gk20a g, size_t size, void *ip)
	88	{
	89	void *alloc;
	90
	91	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	92	alloc = __nvgpu_track_kzalloc(g, size, ip);
	93	#else
	94	alloc = kzalloc(size, GFP_KERNEL);
	95	#endif
	96
	97	kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
	98	size, alloc, GFP_KERNEL);
	99
	100	return alloc;
	101	}
	102
	103	void __nvgpu_kcalloc(struct gk20a g, size_t n, size_t size, void *ip)
	104	{
	105	void *alloc;
	106
	107	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	108	alloc = __nvgpu_track_kcalloc(g, n, size, ip);
	109	#else
	110	alloc = kcalloc(n, size, GFP_KERNEL);
	111	#endif
	112
	113	kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
	114	n * size, alloc, GFP_KERNEL);
	115
	116	return alloc;
	117	}
	118
	119	void __nvgpu_vmalloc(struct gk20a g, unsigned long size, void *ip)
	120	{
	121	void *alloc;
	122
	123	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	124	alloc = __nvgpu_track_vmalloc(g, size, ip);
	125	#else
	126	alloc = vmalloc(size);
	127	#endif
	128
	129	kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
	130
	131	return alloc;
	132	}
	133
	134	void __nvgpu_vzalloc(struct gk20a g, unsigned long size, void *ip)
	135	{
	136	void *alloc;
	137
	138	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	139	alloc = __nvgpu_track_vzalloc(g, size, ip);
	140	#else
	141	alloc = vzalloc(size);
	142	#endif
	143
	144	kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
	145
	146	return alloc;
	147	}
	148
	149	void __nvgpu_kfree(struct gk20a g, void addr)
	150	{
	151	kmem_dbg(g, "kfree: addr=0x%p", addr);
	152	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	153	__nvgpu_track_kfree(g, addr);
	154	#else
	155	kfree(addr);
	156	#endif
	157	}
	158
	159	void __nvgpu_vfree(struct gk20a g, void addr)
	160	{
	161	kmem_dbg(g, "vfree: addr=0x%p", addr);
	162	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	163	__nvgpu_track_vfree(g, addr);
	164	#else
	165	vfree(addr);
	166	#endif
	167	}
	168
	169	#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
	170
	171	void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
	172	{
	173	nvgpu_mutex_acquire(&tracker->lock);
	174	}
	175
	176	void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
	177	{
	178	nvgpu_mutex_release(&tracker->lock);
	179	}
	180
	181	void kmem_print_mem_alloc(struct gk20a *g,
	182	struct nvgpu_mem_alloc *alloc,
	183	struct seq_file *s)
	184	{
	185	#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
	186	int i;
	187
	188	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
	189	alloc->addr, alloc->size);
	190	for (i = 0; i < alloc->stack_length; i++)
	191	__pstat(s, " %3d [<%p>] %pS\n", i,
	192	(void *)alloc->stack[i],
	193	(void *)alloc->stack[i]);
	194	__pstat(s, "\n");
	195	#else
	196	__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
	197	alloc->addr, alloc->size, alloc->ip);
	198	#endif
	199	}
	200
	201	static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
	202	struct nvgpu_mem_alloc *alloc)
	203	{
	204	alloc->allocs_entry.key_start = alloc->addr;
	205	alloc->allocs_entry.key_end = alloc->addr + alloc->size;
	206
	207	nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
	208	return 0;
	209	}
	210
	211	static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
	212	struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
	213	{
	214	struct nvgpu_mem_alloc *alloc;
	215	struct nvgpu_rbtree_node *node = NULL;
	216
	217	nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
	218	if (!node)
	219	return NULL;
	220
	221	alloc = nvgpu_mem_alloc_from_rbtree_node(node);
	222
	223	nvgpu_rbtree_unlink(node, &tracker->allocs);
	224
	225	return alloc;
	226	}
	227
	228	static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
	229	unsigned long size, unsigned long real_size,
	230	u64 addr, void *ip)
	231	{
	232	int ret;
	233	struct nvgpu_mem_alloc *alloc;
	234	#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
	235	struct stack_trace stack_trace;
	236	#endif
	237
	238	alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
	239	if (!alloc)
	240	return -ENOMEM;
	241
	242	alloc->owner = tracker;
	243	alloc->size = size;
	244	alloc->real_size = real_size;
	245	alloc->addr = addr;
	246	alloc->ip = ip;
	247
	248	#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
	249	stack_trace.max_entries = MAX_STACK_TRACE;
	250	stack_trace.nr_entries = 0;
	251	stack_trace.entries = alloc->stack;
	252	/*
	253	* This 4 here skips the 2 function calls that happen for all traced
	254	* allocs due to nvgpu:
	255	*
	256	* __nvgpu_save_kmem_alloc+0x7c/0x128
	257	* __nvgpu_track_kzalloc+0xcc/0xf8
	258	*
	259	* And the function calls that get made by the stack trace code itself.
	260	* If the trace savings code changes this will likely have to change
	261	* as well.
	262	*/
	263	stack_trace.skip = 4;
	264	save_stack_trace(&stack_trace);
	265	alloc->stack_length = stack_trace.nr_entries;
	266	#endif
	267
	268	nvgpu_lock_tracker(tracker);
	269	tracker->bytes_alloced += size;
	270	tracker->bytes_alloced_real += real_size;
	271	tracker->nr_allocs++;
	272
	273	/* Keep track of this for building a histogram later on. */
	274	if (tracker->max_alloc < size)
	275	tracker->max_alloc = size;
	276	if (tracker->min_alloc > size)
	277	tracker->min_alloc = size;
	278
	279	ret = nvgpu_add_alloc(tracker, alloc);
	280	if (ret) {
	281	WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
	282	kfree(alloc);
	283	nvgpu_unlock_tracker(tracker);
	284	return ret;
	285	}
	286	nvgpu_unlock_tracker(tracker);
	287
	288	return 0;
	289	}
	290
	291	static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
	292	u64 addr)
	293	{
	294	struct nvgpu_mem_alloc *alloc;
	295
	296	nvgpu_lock_tracker(tracker);
	297	alloc = nvgpu_rem_alloc(tracker, addr);
	298	if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
	299	nvgpu_unlock_tracker(tracker);
	300	return -EINVAL;
	301	}
	302
	303	memset((void *)alloc->addr, 0, alloc->size);
	304
	305	tracker->nr_frees++;
	306	tracker->bytes_freed += alloc->size;
	307	tracker->bytes_freed_real += alloc->real_size;
	308	nvgpu_unlock_tracker(tracker);
	309
	310	return 0;
	311	}
	312
	313	static void __nvgpu_check_valloc_size(unsigned long size)
	314	{
	315	WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
	316	}
	317
	318	static void __nvgpu_check_kalloc_size(size_t size)
	319	{
	320	WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
	321	}
	322
	323	void __nvgpu_track_vmalloc(struct gk20a g, unsigned long size,
	324	void *ip)
	325	{
	326	void *alloc = vmalloc(size);
	327
	328	if (!alloc)
	329	return NULL;
	330
	331	__nvgpu_check_valloc_size(size);
	332
	333	/*
	334	* Ignore the return message. If this fails let's not cause any issues
	335	* for the rest of the driver.
	336	*/
	337	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
	338	(u64)(uintptr_t)alloc, ip);
	339
	340	return alloc;
	341	}
	342
	343	void __nvgpu_track_vzalloc(struct gk20a g, unsigned long size,
	344	void *ip)
	345	{
	346	void *alloc = vzalloc(size);
	347
	348	if (!alloc)
	349	return NULL;
	350
	351	__nvgpu_check_valloc_size(size);
	352
	353	/*
	354	* Ignore the return message. If this fails let's not cause any issues
	355	* for the rest of the driver.
	356	*/
	357	__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
	358	(u64)(uintptr_t)alloc, ip);
	359
	360	return alloc;
	361	}
	362
	363	void __nvgpu_track_kmalloc(struct gk20a g, size_t size, void *ip)
	364	{
	365	void *alloc = kmalloc(size, GFP_KERNEL);
	366
	367	if (!alloc)
	368	return NULL;
	369
	370	__nvgpu_check_kalloc_size(size);
	371
	372	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
	373	(u64)(uintptr_t)alloc, ip);
	374
	375	return alloc;
	376	}
	377
	378	void __nvgpu_track_kzalloc(struct gk20a g, size_t size, void *ip)
	379	{
	380	void *alloc = kzalloc(size, GFP_KERNEL);
	381
	382	if (!alloc)
	383	return NULL;
	384
	385	__nvgpu_check_kalloc_size(size);
	386
	387	__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
	388	(u64)(uintptr_t)alloc, ip);
	389
	390	return alloc;
	391	}
	392
	393	void __nvgpu_track_kcalloc(struct gk20a g, size_t n, size_t size,
	394	void *ip)
	395	{
	396	void *alloc = kcalloc(n, size, GFP_KERNEL);
	397
	398	if (!alloc)
	399	return NULL;
	400
	401	__nvgpu_check_kalloc_size(n * size);
	402
	403	__nvgpu_save_kmem_alloc(g->kmallocs, n * size,
	404	roundup_pow_of_two(n * size),
	405	(u64)(uintptr_t)alloc, ip);
	406
	407	return alloc;
	408	}
	409
	410	void __nvgpu_track_vfree(struct gk20a g, void addr)
	411	{
	412	/*
	413	* Often it is accepted practice to pass NULL pointers into free
	414	* functions to save code.
	415	*/
	416	if (!addr)
	417	return;
	418
	419	__nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
	420
	421	vfree(addr);
	422	}
	423
	424	void __nvgpu_track_kfree(struct gk20a g, void addr)
	425	{
	426	if (!addr)
	427	return;
	428
	429	__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
	430
	431	kfree(addr);
	432	}
	433
	434	static int __do_check_for_outstanding_allocs(
	435	struct gk20a *g,
	436	struct nvgpu_mem_alloc_tracker *tracker,
	437	const char *type, bool silent)
	438	{
	439	struct nvgpu_rbtree_node *node;
	440	int count = 0;
	441
	442	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
	443	while (node) {
	444	struct nvgpu_mem_alloc *alloc =
	445	nvgpu_mem_alloc_from_rbtree_node(node);
	446
	447	if (!silent)
	448	kmem_print_mem_alloc(g, alloc, NULL);
	449
	450	count++;
	451	nvgpu_rbtree_enum_next(&node, node);
	452	}
	453
	454	return count;
	455	}
	456
	457	/**
	458	* check_for_outstanding_allocs - Count and display outstanding allocs
	459	*
	460	* @g - The GPU.
	461	* @silent - If set don't print anything about the allocs.
	462	*
	463	* Dump (or just count) the number of allocations left outstanding.
	464	*/
	465	static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
	466	{
	467	int count = 0;
	468
	469	count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
	470	silent);
	471	count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
	472	silent);
	473
	474	return count;
	475	}
	476
	477	static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
	478	void (force_free_func)(const void ))
	479	{
	480	struct nvgpu_rbtree_node *node;
	481
	482	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
	483	while (node) {
	484	struct nvgpu_mem_alloc *alloc =
	485	nvgpu_mem_alloc_from_rbtree_node(node);
	486
	487	if (force_free_func)
	488	force_free_func((void *)alloc->addr);
	489
	490	nvgpu_rbtree_unlink(node, &tracker->allocs);
	491	kfree(alloc);
	492
	493	nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
	494	}
	495	}
	496
	497	/**
	498	* nvgpu_kmem_cleanup - Cleanup the kmem tracking
	499	*
	500	* @g - The GPU.
	501	* @force_free - If set will also free leaked objects if possible.
	502	*
	503	* Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
	504	* is non-zero then the allocation made by nvgpu is also freed. This is risky,
	505	* though, as it is possible that the memory is still in use by other parts of
	506	* the GPU driver not aware that this has happened.
	507	*
	508	* In theory it should be fine if the GPU driver has been deinitialized and
	509	* there are no bugs in that code. However, if there are any bugs in that code
	510	* then they could likely manifest as odd crashes indeterminate amounts of time
	511	* in the future. So use @force_free at your own risk.
	512	*/
	513	static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
	514	{
	515	do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
	516	do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
	517	}
	518
	519	void nvgpu_kmem_fini(struct gk20a *g, int flags)
	520	{
	521	int count;
	522	bool silent, force_free;
	523
	524	if (!flags)
	525	return;
	526
	527	silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
	528	force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
	529
	530	count = check_for_outstanding_allocs(g, silent);
	531	nvgpu_kmem_cleanup(g, force_free);
	532
	533	/*
	534	* If we leak objects we can either BUG() out or just WARN(). In general
	535	* it doesn't make sense to BUG() on here since leaking a few objects
	536	* won't crash the kernel but it can be helpful for development.
	537	*
	538	* If neither flag is set then we just silently do nothing.
	539	*/
	540	if (count > 0) {
	541	if (flags & NVGPU_KMEM_FINI_WARN) {
	542	WARN(1, "Letting %d allocs leak!!\n", count);
	543	} else if (flags & NVGPU_KMEM_FINI_BUG) {
	544	nvgpu_err(g, "Letting %d allocs leak!!", count);
	545	BUG();
	546	}
	547	}
	548	}
	549
	550	int nvgpu_kmem_init(struct gk20a *g)
	551	{
	552	int err;
	553
	554	g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
	555	g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
	556
	557	if (!g->vmallocs \|\| !g->kmallocs) {
	558	err = -ENOMEM;
	559	goto fail;
	560	}
	561
	562	g->vmallocs->name = "vmalloc";
	563	g->kmallocs->name = "kmalloc";
	564
	565	g->vmallocs->allocs = NULL;
	566	g->kmallocs->allocs = NULL;
	567
	568	nvgpu_mutex_init(&g->vmallocs->lock);
	569	nvgpu_mutex_init(&g->kmallocs->lock);
	570
	571	g->vmallocs->min_alloc = PAGE_SIZE;
	572	g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
	573
	574	/*
	575	* This needs to go after all the other initialization since they use
	576	* the nvgpu_kzalloc() API.
	577	*/
	578	g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
	579	sizeof(struct nvgpu_mem_alloc));
	580	g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
	581	sizeof(struct nvgpu_mem_alloc));
	582
	583	if (!g->vmallocs->allocs_cache \|\| !g->kmallocs->allocs_cache) {
	584	err = -ENOMEM;
	585	if (g->vmallocs->allocs_cache)
	586	nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
	587	if (g->kmallocs->allocs_cache)
	588	nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
	589	goto fail;
	590	}
	591
	592	return 0;
	593
	594	fail:
	595	if (g->vmallocs)
	596	kfree(g->vmallocs);
	597	if (g->kmallocs)
	598	kfree(g->kmallocs);
	599	return err;
	600	}
	601
	602	#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
	603
	604	int nvgpu_kmem_init(struct gk20a *g)
	605	{
	606	return 0;
	607	}
	608
	609	void nvgpu_kmem_fini(struct gk20a *g, int flags)
	610	{
	611	}
	612	#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
	613
	614	struct nvgpu_kmem_cache nvgpu_kmem_cache_create(struct gk20a g, size_t size)
	615	{
	616	struct nvgpu_kmem_cache *cache =
	617	nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
	618
	619	if (!cache)
	620	return NULL;
	621
	622	cache->g = g;
	623
	624	snprintf(cache->name, sizeof(cache->name),
	625	"nvgpu-cache-0x%p-%d-%d", g, (int)size,
	626	atomic_inc_return(&kmem_cache_id));
	627	cache->cache = kmem_cache_create(cache->name,
	628	size, size, 0, NULL);
	629	if (!cache->cache) {
	630	nvgpu_kfree(g, cache);
	631	return NULL;
	632	}
	633
	634	return cache;
	635	}
	636
	637	void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
	638	{
	639	struct gk20a *g = cache->g;
	640
	641	kmem_cache_destroy(cache->cache);
	642	nvgpu_kfree(g, cache);
	643	}
	644
	645	void nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache cache)
	646	{
	647	return kmem_cache_alloc(cache->cache, GFP_KERNEL);
	648	}
	649
	650	void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache cache, void ptr)
	651	{
	652	kmem_cache_free(cache->cache, ptr);
	653	}