gpu: nvgpu: implement lockless allocator

Implement a lockless allocator for fixed-size data structures. Bug 1795076 Change-Id: I70a5f52cbdb4452cc0fd9a8edf26735be29ede57 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1213211 (cherry picked from commit e4bff7da0f39c8f4b5691169c02e482bc9d4166e) Reviewed-on: http://git-master/r/1223246 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Sachit Kadle <skadle@nvidia.com> 2016-09-01 23:50:06 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-09-20 13:43:40 -0400
commit: 35b2507fe3ec6c28c27dd7fb289c003c7a0baf33 (patch)
tree: a691805580ded71038d7fcca07891c8ef210d420 /drivers/gpu
parent: 101689dd8b536afa3ee7e265dc4ea846fa053767 (diff)
4 files changed, 335 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index b8e38919..33d158bb 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -57,6 +57,7 @@ nvgpu-y := \
        gk20a/gk20a_allocator_bitmap.o \
        gk20a/gk20a_allocator_buddy.o \
        gk20a/gk20a_allocator_page.o \
+        gk20a/gk20a_allocator_lockless.o \
        gk20a/cde_gk20a.o \
        gk20a/platform_gk20a_generic.o \
        gk20a/tsg_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index 93865190..0d611ba3 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -165,6 +165,15 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
                              const char *name, u64 base, u64 length,
                              u64 blk_size, u64 flags);
+/*
+ * Lockless allocatior initializers.
+ * Note: This allocator can only allocate fixed-size structures of a
+ * pre-defined size.
+ */
+int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
+                                  const char *name, u64 base, u64 length,
+                                  u64 struct_size, u64 flags);
 #define GPU_BALLOC_MAX_ORDER            31
 /*
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
new file mode 100644
index 00000000..32455c98
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/atomic.h>
+#include "gk20a_allocator.h"
+#include "lockless_allocator_priv.h"
+static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        return pa->length;
+}
+static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        return pa->base;
+}
+static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        int inited = pa->inited;
+        rmb();
+        return inited;
+}
+static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        return pa->base + pa->length;
+}
+static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        int head, new_head, ret;
+        u64 addr = 0;
+        if (len != pa->blk_size)
+                return 0;
+        head = ACCESS_ONCE(pa->head);
+        while (head >= 0) {
+                new_head = ACCESS_ONCE(pa->next[head]);
+                ret = cmpxchg(&pa->head, head, new_head);
+                if (ret == head) {
+                        addr = pa->base + head * pa->blk_size;
+                        atomic_inc(&pa->nr_allocs);
+                        alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
+                                  addr);
+                        break;
+                }
+                head = ACCESS_ONCE(pa->head);
+        }
+        return addr;
+}
+static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        int head, ret;
+        u64 cur_idx, rem;
+        cur_idx = addr - pa->base;
+        rem = do_div(cur_idx, pa->blk_size);
+        while (1) {
+                head = ACCESS_ONCE(pa->head);
+                ACCESS_ONCE(pa->next[cur_idx]) = head;
+                ret = cmpxchg(&pa->head, head, cur_idx);
+                if (ret == head) {
+                        atomic_dec(&pa->nr_allocs);
+                        alloc_dbg(a, "Free node # %llu\n", cur_idx);
+                        break;
+                }
+        }
+}
+static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        vfree(pa->next);
+        kfree(pa);
+}
+static void gk20a_lockless_print_stats(struct gk20a_allocator *a,
+                                   struct seq_file *s, int lock)
+{
+        struct gk20a_lockless_allocator *pa = a->priv;
+        __alloc_pstat(s, a, "Lockless allocator params:\n");
+        __alloc_pstat(s, a, "  start = 0x%llx\n", pa->base);
+        __alloc_pstat(s, a, "  end   = 0x%llx\n", pa->base + pa->length);
+        /* Actual stats. */
+        __alloc_pstat(s, a, "Stats:\n");
+        __alloc_pstat(s, a, "  Number allocs = %d\n",
+                      atomic_read(&pa->nr_allocs));
+        __alloc_pstat(s, a, "  Number free   = %d\n",
+                      pa->nr_nodes - atomic_read(&pa->nr_allocs));
+}
+static const struct gk20a_allocator_ops pool_ops = {
+        .alloc          = gk20a_lockless_alloc,
+        .free           = gk20a_lockless_free,
+        .base           = gk20a_lockless_alloc_base,
+        .length         = gk20a_lockless_alloc_length,
+        .end            = gk20a_lockless_alloc_end,
+        .inited         = gk20a_lockless_alloc_inited,
+        .fini           = gk20a_lockless_alloc_destroy,
+        .print_stats    = gk20a_lockless_print_stats,
+};
+int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
+                              const char *name, u64 base, u64 length,
+                              u64 blk_size, u64 flags)
+{
+        int i;
+        int err;
+        int nr_nodes;
+        u64 count, rem;
+        struct gk20a_lockless_allocator *a;
+        if (!blk_size)
+                return -EINVAL;
+        /*
+         * Ensure we have space for atleast one node & there's no overflow.
+         * In order to control memory footprint, we require count < INT_MAX
+         */
+        count = length;
+        rem = do_div(count, blk_size);
+        if (!base || !count || count > INT_MAX)
+                return -EINVAL;
+        a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL);
+        if (!a)
+                return -ENOMEM;
+        err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops);
+        if (err)
+                goto fail;
+        a->next = vzalloc(sizeof(*a->next) * count);
+        if (!a->next) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        /* chain the elements together to form the initial free list  */
+        nr_nodes = (int)count;
+        for (i = 0; i < nr_nodes; i++)
+                a->next[i] = i + 1;
+        a->next[nr_nodes - 1] = -1;
+        a->base = base;
+        a->length = length;
+        a->blk_size = blk_size;
+        a->nr_nodes = nr_nodes;
+        a->flags = flags;
+        atomic_set(&a->nr_allocs, 0);
+        wmb();
+        a->inited = true;
+        gk20a_init_alloc_debug(__a);
+        alloc_dbg(__a, "New allocator: type          lockless\n");
+        alloc_dbg(__a, "               base          0x%llx\n", a->base);
+        alloc_dbg(__a, "               nodes         %d\n", a->nr_nodes);
+        alloc_dbg(__a, "               blk_size      0x%llx\n", a->blk_size);
+        alloc_dbg(__a, "               flags         0x%llx\n", a->flags);
+        return 0;
+fail:
+        kfree(a);
+        return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
new file mode 100644
index 00000000..f9b03e0e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ * Basics:
+ *
+ *    - Lockless memory allocator for fixed-size structures, whose
+ *      size is defined up front at init time.
+ *    - Memory footprint scales linearly w/ the number of structures in
+ *      the pool. It is ~= sizeof(int) * N.
+ *    - Memory is pre-allocated by the client. The allocator itself
+ *      only computes the addresses for allocations.
+ *    - Limit of MAX_INT nodes that the allocator can be responsible for.
+ *
+ * Implementation details:
+ *
+ *    The allocator maintains a single list of free nodes. We allocate &
+ *    free nodes from the head of the list. We rely on the cmpxchg() operator
+ *    to maintain atomicity on the head.
+ *
+ *    So, both allocs & frees are O(1)!!
+ *
+ *    -- Definitions --
+ *    Block Size - size of a single structure that this allocator will
+ *                 allocate.
+ *    Node       - one of the elements of size blk_size in the
+ *                 client-allocated buffer.
+ *    Node Index - zero-based index of a node in the client-allocated
+ *                 contiguous buffer.
+ *
+ *    -- Initial State --
+ *    We maintain the following to track the state of the free list:
+ *
+ *    1) A "head" index to track the index of the first free node in the list
+ *    2) A "next" array to track the index of the next free node in the list
+ *       for every node. So next[head], will give the index to the 2nd free
+ *       element in the list.
+ *
+ *    So, to begin with, the free list consists of all node indices, and each
+ *    position in the next array contains index N + 1:
+ *
+ *    head = 0
+ *    next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
+ *    free_list = 0->1->2->3->4->-1
+ *
+ *    -- Allocations --
+ *    1) Read the current head (aka acq_head)
+ *    2) Read next[acq_head], to get the 2nd free element (aka new_head)
+ *    3) cmp_xchg(&head, acq_head, new_head)
+ *    4) If it succeeds, compute the address of the node, based on
+ *       base address, blk_size, & acq_head.
+ *
+ *    head = 1;
+ *    next = [1, 2, 3, 4, -1] : Example after allocating Node #0
+ *    free_list = 1->2->3->4->-1
+ *
+ *    head = 2;
+ *    next = [1, 2, 3, 4, -1] : Example after allocating Node #1
+ *    free_list = 2->3->4->-1
+ *
+ *    -- Frees --
+ *    1) Based on the address to be freed, calculate the index of the node
+ *       being freed (cur_idx)
+ *    2) Read the current head (old_head)
+ *    3) So the freed node is going to go at the head of the list, and we
+ *       want to put the old_head after it. So next[cur_idx] = old_head
+ *    4) cmpxchg(head, old_head, cur_idx)
+ *
+ *    head = 0
+ *    next = [2, 2, 3, 4, -1]
+ *    free_list = 0->2->3->4->-1 : Example after freeing Node #0
+ *
+ *    head = 1
+ *    next = [2, 0, 3, 4, -1]
+ *    free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
+ */
+#ifndef LOCKLESS_ALLOCATOR_PRIV_H
+#define LOCKLESS_ALLOCATOR_PRIV_H
+struct gk20a_allocator;
+struct gk20a_lockless_allocator {
+        struct gk20a_allocator *owner;
+        u64 base;               /* Base address of the space. */
+        u64 length;             /* Length of the space. */
+        u64 blk_size;           /* Size of the structure being allocated */
+        int nr_nodes;           /* Number of nodes available for allocation */
+        int *next;              /* An array holding the next indices per node */
+        int head;               /* Current node at the top of the stack */
+        u64 flags;
+        bool inited;
+        /* Statistics */
+        atomic_t nr_allocs;
+};
+static inline struct gk20a_lockless_allocator *lockless_allocator(
+        struct gk20a_allocator *a)
+{
+        return (struct gk20a_lockless_allocator *)(a)->priv;
+}
+#endif
author	Sachit Kadle <skadle@nvidia.com>	2016-09-01 23:50:06 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-09-20 13:43:40 -0400
commit	35b2507fe3ec6c28c27dd7fb289c003c7a0baf33 (patch)
tree	a691805580ded71038d7fcca07891c8ef210d420 /drivers/gpu
parent	101689dd8b536afa3ee7e265dc4ea846fa053767 (diff)

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index b8e38919..33d158bb 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -57,6 +57,7 @@ nvgpu-y := \
57	gk20a/gk20a_allocator_bitmap.o \	57	gk20a/gk20a_allocator_bitmap.o \
58	gk20a/gk20a_allocator_buddy.o \	58	gk20a/gk20a_allocator_buddy.o \
59	gk20a/gk20a_allocator_page.o \	59	gk20a/gk20a_allocator_page.o \
		60	gk20a/gk20a_allocator_lockless.o \
60	gk20a/cde_gk20a.o \	61	gk20a/cde_gk20a.o \
61	gk20a/platform_gk20a_generic.o \	62	gk20a/platform_gk20a_generic.o \
62	gk20a/tsg_gk20a.o \	63	gk20a/tsg_gk20a.o \


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 93865190..0d611ba3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -165,6 +165,15 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
165	const char *name, u64 base, u64 length,	165	const char *name, u64 base, u64 length,
166	u64 blk_size, u64 flags);	166	u64 blk_size, u64 flags);
167		167
		168	/*
		169	* Lockless allocatior initializers.
		170	* Note: This allocator can only allocate fixed-size structures of a
		171	* pre-defined size.
		172	*/
		173	int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
		174	const char *name, u64 base, u64 length,
		175	u64 struct_size, u64 flags);
		176
168	#define GPU_BALLOC_MAX_ORDER 31	177	#define GPU_BALLOC_MAX_ORDER 31
169		178
170	/*	179	/*


diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c new file mode 100644 index 00000000..32455c98 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
@@ -0,0 +1,204 @@
		1	/*
		2	* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
		3	*
		4	* This program is free software; you can redistribute it and/or modify it
		5	* under the terms and conditions of the GNU General Public License,
		6	* version 2, as published by the Free Software Foundation.
		7	*
		8	* This program is distributed in the hope it will be useful, but WITHOUT
		9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		11	* more details.
		12	*
		13	* You should have received a copy of the GNU General Public License
		14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
		15	*/
		16
		17	#include <linux/kernel.h>
		18	#include <linux/slab.h>
		19	#include <linux/vmalloc.h>
		20	#include <linux/atomic.h>
		21
		22	#include "gk20a_allocator.h"
		23	#include "lockless_allocator_priv.h"
		24
		25	static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a)
		26	{
		27	struct gk20a_lockless_allocator *pa = a->priv;
		28
		29	return pa->length;
		30	}
		31
		32	static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a)
		33	{
		34	struct gk20a_lockless_allocator *pa = a->priv;
		35
		36	return pa->base;
		37	}
		38
		39	static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a)
		40	{
		41	struct gk20a_lockless_allocator *pa = a->priv;
		42	int inited = pa->inited;
		43
		44	rmb();
		45	return inited;
		46	}
		47
		48	static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a)
		49	{
		50	struct gk20a_lockless_allocator *pa = a->priv;
		51
		52	return pa->base + pa->length;
		53	}
		54
		55	static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len)
		56	{
		57	struct gk20a_lockless_allocator *pa = a->priv;
		58	int head, new_head, ret;
		59	u64 addr = 0;
		60
		61	if (len != pa->blk_size)
		62	return 0;
		63
		64	head = ACCESS_ONCE(pa->head);
		65	while (head >= 0) {
		66	new_head = ACCESS_ONCE(pa->next[head]);
		67	ret = cmpxchg(&pa->head, head, new_head);
		68	if (ret == head) {
		69	addr = pa->base + head * pa->blk_size;
		70	atomic_inc(&pa->nr_allocs);
		71	alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
		72	addr);
		73	break;
		74	}
		75	head = ACCESS_ONCE(pa->head);
		76	}
		77	return addr;
		78	}
		79
		80	static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr)
		81	{
		82	struct gk20a_lockless_allocator *pa = a->priv;
		83	int head, ret;
		84	u64 cur_idx, rem;
		85
		86	cur_idx = addr - pa->base;
		87	rem = do_div(cur_idx, pa->blk_size);
		88
		89	while (1) {
		90	head = ACCESS_ONCE(pa->head);
		91	ACCESS_ONCE(pa->next[cur_idx]) = head;
		92	ret = cmpxchg(&pa->head, head, cur_idx);
		93	if (ret == head) {
		94	atomic_dec(&pa->nr_allocs);
		95	alloc_dbg(a, "Free node # %llu\n", cur_idx);
		96	break;
		97	}
		98	}
		99	}
		100
		101	static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a)
		102	{
		103	struct gk20a_lockless_allocator *pa = a->priv;
		104
		105	vfree(pa->next);
		106	kfree(pa);
		107	}
		108
		109	static void gk20a_lockless_print_stats(struct gk20a_allocator *a,
		110	struct seq_file *s, int lock)
		111	{
		112	struct gk20a_lockless_allocator *pa = a->priv;
		113
		114	__alloc_pstat(s, a, "Lockless allocator params:\n");
		115	__alloc_pstat(s, a, " start = 0x%llx\n", pa->base);
		116	__alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length);
		117
		118	/* Actual stats. */
		119	__alloc_pstat(s, a, "Stats:\n");
		120	__alloc_pstat(s, a, " Number allocs = %d\n",
		121	atomic_read(&pa->nr_allocs));
		122	__alloc_pstat(s, a, " Number free = %d\n",
		123	pa->nr_nodes - atomic_read(&pa->nr_allocs));
		124	}
		125
		126	static const struct gk20a_allocator_ops pool_ops = {
		127	.alloc = gk20a_lockless_alloc,
		128	.free = gk20a_lockless_free,
		129
		130	.base = gk20a_lockless_alloc_base,
		131	.length = gk20a_lockless_alloc_length,
		132	.end = gk20a_lockless_alloc_end,
		133	.inited = gk20a_lockless_alloc_inited,
		134
		135	.fini = gk20a_lockless_alloc_destroy,
		136
		137	.print_stats = gk20a_lockless_print_stats,
		138	};
		139
		140	int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
		141	const char *name, u64 base, u64 length,
		142	u64 blk_size, u64 flags)
		143	{
		144	int i;
		145	int err;
		146	int nr_nodes;
		147	u64 count, rem;
		148	struct gk20a_lockless_allocator *a;
		149
		150	if (!blk_size)
		151	return -EINVAL;
		152
		153	/*
		154	* Ensure we have space for atleast one node & there's no overflow.
		155	* In order to control memory footprint, we require count < INT_MAX
		156	*/
		157	count = length;
		158	rem = do_div(count, blk_size);
		159	if (!base \|\| !count \|\| count > INT_MAX)
		160	return -EINVAL;
		161
		162	a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL);
		163	if (!a)
		164	return -ENOMEM;
		165
		166	err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops);
		167	if (err)
		168	goto fail;
		169
		170	a->next = vzalloc(sizeof(a->next) count);
		171	if (!a->next) {
		172	err = -ENOMEM;
		173	goto fail;
		174	}
		175
		176	/* chain the elements together to form the initial free list */
		177	nr_nodes = (int)count;
		178	for (i = 0; i < nr_nodes; i++)
		179	a->next[i] = i + 1;
		180	a->next[nr_nodes - 1] = -1;
		181
		182	a->base = base;
		183	a->length = length;
		184	a->blk_size = blk_size;
		185	a->nr_nodes = nr_nodes;
		186	a->flags = flags;
		187	atomic_set(&a->nr_allocs, 0);
		188
		189	wmb();
		190	a->inited = true;
		191
		192	gk20a_init_alloc_debug(__a);
		193	alloc_dbg(__a, "New allocator: type lockless\n");
		194	alloc_dbg(__a, " base 0x%llx\n", a->base);
		195	alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
		196	alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
		197	alloc_dbg(__a, " flags 0x%llx\n", a->flags);
		198
		199	return 0;
		200
		201	fail:
		202	kfree(a);
		203	return err;
		204	}


diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h new file mode 100644 index 00000000..f9b03e0e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
@@ -0,0 +1,121 @@
		1	/*
		2	* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
		3	*
		4	* This program is free software; you can redistribute it and/or modify it
		5	* under the terms and conditions of the GNU General Public License,
		6	* version 2, as published by the Free Software Foundation.
		7	*
		8	* This program is distributed in the hope it will be useful, but WITHOUT
		9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		11	* more details.
		12	*
		13	* You should have received a copy of the GNU General Public License
		14	* along with this program. If not, see <http://www.gnu.org/licenses/>.
		15	*/
		16
		17	/*
		18	* Basics:
		19	*
		20	* - Lockless memory allocator for fixed-size structures, whose
		21	* size is defined up front at init time.
		22	* - Memory footprint scales linearly w/ the number of structures in
		23	* the pool. It is ~= sizeof(int) * N.
		24	* - Memory is pre-allocated by the client. The allocator itself
		25	* only computes the addresses for allocations.
		26	* - Limit of MAX_INT nodes that the allocator can be responsible for.
		27	*
		28	* Implementation details:
		29	*
		30	* The allocator maintains a single list of free nodes. We allocate &
		31	* free nodes from the head of the list. We rely on the cmpxchg() operator
		32	* to maintain atomicity on the head.
		33	*
		34	* So, both allocs & frees are O(1)!!
		35	*
		36	* -- Definitions --
		37	* Block Size - size of a single structure that this allocator will
		38	* allocate.
		39	* Node - one of the elements of size blk_size in the
		40	* client-allocated buffer.
		41	* Node Index - zero-based index of a node in the client-allocated
		42	* contiguous buffer.
		43	*
		44	* -- Initial State --
		45	* We maintain the following to track the state of the free list:
		46	*
		47	* 1) A "head" index to track the index of the first free node in the list
		48	* 2) A "next" array to track the index of the next free node in the list
		49	* for every node. So next[head], will give the index to the 2nd free
		50	* element in the list.
		51	*
		52	* So, to begin with, the free list consists of all node indices, and each
		53	* position in the next array contains index N + 1:
		54	*
		55	* head = 0
		56	* next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
		57	* free_list = 0->1->2->3->4->-1
		58	*
		59	* -- Allocations --
		60	* 1) Read the current head (aka acq_head)
		61	* 2) Read next[acq_head], to get the 2nd free element (aka new_head)
		62	* 3) cmp_xchg(&head, acq_head, new_head)
		63	* 4) If it succeeds, compute the address of the node, based on
		64	* base address, blk_size, & acq_head.
		65	*
		66	* head = 1;
		67	* next = [1, 2, 3, 4, -1] : Example after allocating Node #0
		68	* free_list = 1->2->3->4->-1
		69	*
		70	* head = 2;
		71	* next = [1, 2, 3, 4, -1] : Example after allocating Node #1
		72	* free_list = 2->3->4->-1
		73	*
		74	* -- Frees --
		75	* 1) Based on the address to be freed, calculate the index of the node
		76	* being freed (cur_idx)
		77	* 2) Read the current head (old_head)
		78	* 3) So the freed node is going to go at the head of the list, and we
		79	* want to put the old_head after it. So next[cur_idx] = old_head
		80	* 4) cmpxchg(head, old_head, cur_idx)
		81	*
		82	* head = 0
		83	* next = [2, 2, 3, 4, -1]
		84	* free_list = 0->2->3->4->-1 : Example after freeing Node #0
		85	*
		86	* head = 1
		87	* next = [2, 0, 3, 4, -1]
		88	* free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
		89	*/
		90
		91	#ifndef LOCKLESS_ALLOCATOR_PRIV_H
		92	#define LOCKLESS_ALLOCATOR_PRIV_H
		93
		94	struct gk20a_allocator;
		95
		96	struct gk20a_lockless_allocator {
		97	struct gk20a_allocator *owner;
		98
		99	u64 base; /* Base address of the space. */
		100	u64 length; /* Length of the space. */
		101	u64 blk_size; /* Size of the structure being allocated */
		102	int nr_nodes; /* Number of nodes available for allocation */
		103
		104	int next; / An array holding the next indices per node */
		105	int head; /* Current node at the top of the stack */
		106
		107	u64 flags;
		108
		109	bool inited;
		110
		111	/* Statistics */
		112	atomic_t nr_allocs;
		113	};
		114
		115	static inline struct gk20a_lockless_allocator *lockless_allocator(
		116	struct gk20a_allocator *a)
		117	{
		118	return (struct gk20a_lockless_allocator *)(a)->priv;
		119	}
		120
		121	#endif