summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorSachit Kadle <skadle@nvidia.com>2016-09-01 23:50:06 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-09-20 13:43:40 -0400
commit35b2507fe3ec6c28c27dd7fb289c003c7a0baf33 (patch)
treea691805580ded71038d7fcca07891c8ef210d420 /drivers/gpu
parent101689dd8b536afa3ee7e265dc4ea846fa053767 (diff)
gpu: nvgpu: implement lockless allocator
Implement a lockless allocator for fixed-size data structures. Bug 1795076 Change-Id: I70a5f52cbdb4452cc0fd9a8edf26735be29ede57 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1213211 (cherry picked from commit e4bff7da0f39c8f4b5691169c02e482bc9d4166e) Reviewed-on: http://git-master/r/1223246 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c204
-rw-r--r--drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h121
4 files changed, 335 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index b8e38919..33d158bb 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -57,6 +57,7 @@ nvgpu-y := \
57 gk20a/gk20a_allocator_bitmap.o \ 57 gk20a/gk20a_allocator_bitmap.o \
58 gk20a/gk20a_allocator_buddy.o \ 58 gk20a/gk20a_allocator_buddy.o \
59 gk20a/gk20a_allocator_page.o \ 59 gk20a/gk20a_allocator_page.o \
60 gk20a/gk20a_allocator_lockless.o \
60 gk20a/cde_gk20a.o \ 61 gk20a/cde_gk20a.o \
61 gk20a/platform_gk20a_generic.o \ 62 gk20a/platform_gk20a_generic.o \
62 gk20a/tsg_gk20a.o \ 63 gk20a/tsg_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index 93865190..0d611ba3 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -165,6 +165,15 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
165 const char *name, u64 base, u64 length, 165 const char *name, u64 base, u64 length,
166 u64 blk_size, u64 flags); 166 u64 blk_size, u64 flags);
167 167
168/*
169 * Lockless allocatior initializers.
170 * Note: This allocator can only allocate fixed-size structures of a
171 * pre-defined size.
172 */
173int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
174 const char *name, u64 base, u64 length,
175 u64 struct_size, u64 flags);
176
168#define GPU_BALLOC_MAX_ORDER 31 177#define GPU_BALLOC_MAX_ORDER 31
169 178
170/* 179/*
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
new file mode 100644
index 00000000..32455c98
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
@@ -0,0 +1,204 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20#include <linux/atomic.h>
21
22#include "gk20a_allocator.h"
23#include "lockless_allocator_priv.h"
24
25static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a)
26{
27 struct gk20a_lockless_allocator *pa = a->priv;
28
29 return pa->length;
30}
31
32static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a)
33{
34 struct gk20a_lockless_allocator *pa = a->priv;
35
36 return pa->base;
37}
38
39static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a)
40{
41 struct gk20a_lockless_allocator *pa = a->priv;
42 int inited = pa->inited;
43
44 rmb();
45 return inited;
46}
47
48static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a)
49{
50 struct gk20a_lockless_allocator *pa = a->priv;
51
52 return pa->base + pa->length;
53}
54
55static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len)
56{
57 struct gk20a_lockless_allocator *pa = a->priv;
58 int head, new_head, ret;
59 u64 addr = 0;
60
61 if (len != pa->blk_size)
62 return 0;
63
64 head = ACCESS_ONCE(pa->head);
65 while (head >= 0) {
66 new_head = ACCESS_ONCE(pa->next[head]);
67 ret = cmpxchg(&pa->head, head, new_head);
68 if (ret == head) {
69 addr = pa->base + head * pa->blk_size;
70 atomic_inc(&pa->nr_allocs);
71 alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
72 addr);
73 break;
74 }
75 head = ACCESS_ONCE(pa->head);
76 }
77 return addr;
78}
79
80static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr)
81{
82 struct gk20a_lockless_allocator *pa = a->priv;
83 int head, ret;
84 u64 cur_idx, rem;
85
86 cur_idx = addr - pa->base;
87 rem = do_div(cur_idx, pa->blk_size);
88
89 while (1) {
90 head = ACCESS_ONCE(pa->head);
91 ACCESS_ONCE(pa->next[cur_idx]) = head;
92 ret = cmpxchg(&pa->head, head, cur_idx);
93 if (ret == head) {
94 atomic_dec(&pa->nr_allocs);
95 alloc_dbg(a, "Free node # %llu\n", cur_idx);
96 break;
97 }
98 }
99}
100
101static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a)
102{
103 struct gk20a_lockless_allocator *pa = a->priv;
104
105 vfree(pa->next);
106 kfree(pa);
107}
108
109static void gk20a_lockless_print_stats(struct gk20a_allocator *a,
110 struct seq_file *s, int lock)
111{
112 struct gk20a_lockless_allocator *pa = a->priv;
113
114 __alloc_pstat(s, a, "Lockless allocator params:\n");
115 __alloc_pstat(s, a, " start = 0x%llx\n", pa->base);
116 __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length);
117
118 /* Actual stats. */
119 __alloc_pstat(s, a, "Stats:\n");
120 __alloc_pstat(s, a, " Number allocs = %d\n",
121 atomic_read(&pa->nr_allocs));
122 __alloc_pstat(s, a, " Number free = %d\n",
123 pa->nr_nodes - atomic_read(&pa->nr_allocs));
124}
125
126static const struct gk20a_allocator_ops pool_ops = {
127 .alloc = gk20a_lockless_alloc,
128 .free = gk20a_lockless_free,
129
130 .base = gk20a_lockless_alloc_base,
131 .length = gk20a_lockless_alloc_length,
132 .end = gk20a_lockless_alloc_end,
133 .inited = gk20a_lockless_alloc_inited,
134
135 .fini = gk20a_lockless_alloc_destroy,
136
137 .print_stats = gk20a_lockless_print_stats,
138};
139
140int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
141 const char *name, u64 base, u64 length,
142 u64 blk_size, u64 flags)
143{
144 int i;
145 int err;
146 int nr_nodes;
147 u64 count, rem;
148 struct gk20a_lockless_allocator *a;
149
150 if (!blk_size)
151 return -EINVAL;
152
153 /*
154 * Ensure we have space for atleast one node & there's no overflow.
155 * In order to control memory footprint, we require count < INT_MAX
156 */
157 count = length;
158 rem = do_div(count, blk_size);
159 if (!base || !count || count > INT_MAX)
160 return -EINVAL;
161
162 a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL);
163 if (!a)
164 return -ENOMEM;
165
166 err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops);
167 if (err)
168 goto fail;
169
170 a->next = vzalloc(sizeof(*a->next) * count);
171 if (!a->next) {
172 err = -ENOMEM;
173 goto fail;
174 }
175
176 /* chain the elements together to form the initial free list */
177 nr_nodes = (int)count;
178 for (i = 0; i < nr_nodes; i++)
179 a->next[i] = i + 1;
180 a->next[nr_nodes - 1] = -1;
181
182 a->base = base;
183 a->length = length;
184 a->blk_size = blk_size;
185 a->nr_nodes = nr_nodes;
186 a->flags = flags;
187 atomic_set(&a->nr_allocs, 0);
188
189 wmb();
190 a->inited = true;
191
192 gk20a_init_alloc_debug(__a);
193 alloc_dbg(__a, "New allocator: type lockless\n");
194 alloc_dbg(__a, " base 0x%llx\n", a->base);
195 alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
196 alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
197 alloc_dbg(__a, " flags 0x%llx\n", a->flags);
198
199 return 0;
200
201fail:
202 kfree(a);
203 return err;
204}
diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
new file mode 100644
index 00000000..f9b03e0e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
@@ -0,0 +1,121 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17/*
18 * Basics:
19 *
20 * - Lockless memory allocator for fixed-size structures, whose
21 * size is defined up front at init time.
22 * - Memory footprint scales linearly w/ the number of structures in
23 * the pool. It is ~= sizeof(int) * N.
24 * - Memory is pre-allocated by the client. The allocator itself
25 * only computes the addresses for allocations.
26 * - Limit of MAX_INT nodes that the allocator can be responsible for.
27 *
28 * Implementation details:
29 *
30 * The allocator maintains a single list of free nodes. We allocate &
31 * free nodes from the head of the list. We rely on the cmpxchg() operator
32 * to maintain atomicity on the head.
33 *
34 * So, both allocs & frees are O(1)!!
35 *
36 * -- Definitions --
37 * Block Size - size of a single structure that this allocator will
38 * allocate.
39 * Node - one of the elements of size blk_size in the
40 * client-allocated buffer.
41 * Node Index - zero-based index of a node in the client-allocated
42 * contiguous buffer.
43 *
44 * -- Initial State --
45 * We maintain the following to track the state of the free list:
46 *
47 * 1) A "head" index to track the index of the first free node in the list
48 * 2) A "next" array to track the index of the next free node in the list
49 * for every node. So next[head], will give the index to the 2nd free
50 * element in the list.
51 *
52 * So, to begin with, the free list consists of all node indices, and each
53 * position in the next array contains index N + 1:
54 *
55 * head = 0
56 * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
57 * free_list = 0->1->2->3->4->-1
58 *
59 * -- Allocations --
60 * 1) Read the current head (aka acq_head)
61 * 2) Read next[acq_head], to get the 2nd free element (aka new_head)
62 * 3) cmp_xchg(&head, acq_head, new_head)
63 * 4) If it succeeds, compute the address of the node, based on
64 * base address, blk_size, & acq_head.
65 *
66 * head = 1;
67 * next = [1, 2, 3, 4, -1] : Example after allocating Node #0
68 * free_list = 1->2->3->4->-1
69 *
70 * head = 2;
71 * next = [1, 2, 3, 4, -1] : Example after allocating Node #1
72 * free_list = 2->3->4->-1
73 *
74 * -- Frees --
75 * 1) Based on the address to be freed, calculate the index of the node
76 * being freed (cur_idx)
77 * 2) Read the current head (old_head)
78 * 3) So the freed node is going to go at the head of the list, and we
79 * want to put the old_head after it. So next[cur_idx] = old_head
80 * 4) cmpxchg(head, old_head, cur_idx)
81 *
82 * head = 0
83 * next = [2, 2, 3, 4, -1]
84 * free_list = 0->2->3->4->-1 : Example after freeing Node #0
85 *
86 * head = 1
87 * next = [2, 0, 3, 4, -1]
88 * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
89 */
90
91#ifndef LOCKLESS_ALLOCATOR_PRIV_H
92#define LOCKLESS_ALLOCATOR_PRIV_H
93
94struct gk20a_allocator;
95
96struct gk20a_lockless_allocator {
97 struct gk20a_allocator *owner;
98
99 u64 base; /* Base address of the space. */
100 u64 length; /* Length of the space. */
101 u64 blk_size; /* Size of the structure being allocated */
102 int nr_nodes; /* Number of nodes available for allocation */
103
104 int *next; /* An array holding the next indices per node */
105 int head; /* Current node at the top of the stack */
106
107 u64 flags;
108
109 bool inited;
110
111 /* Statistics */
112 atomic_t nr_allocs;
113};
114
115static inline struct gk20a_lockless_allocator *lockless_allocator(
116 struct gk20a_allocator *a)
117{
118 return (struct gk20a_lockless_allocator *)(a)->priv;
119}
120
121#endif