diff options
author | Sachit Kadle <skadle@nvidia.com> | 2016-09-01 23:50:06 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-09-20 13:43:40 -0400 |
commit | 35b2507fe3ec6c28c27dd7fb289c003c7a0baf33 (patch) | |
tree | a691805580ded71038d7fcca07891c8ef210d420 | |
parent | 101689dd8b536afa3ee7e265dc4ea846fa053767 (diff) |
gpu: nvgpu: implement lockless allocator
Implement a lockless allocator for fixed-size data
structures.
Bug 1795076
Change-Id: I70a5f52cbdb4452cc0fd9a8edf26735be29ede57
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1213211
(cherry picked from commit e4bff7da0f39c8f4b5691169c02e482bc9d4166e)
Reviewed-on: http://git-master/r/1223246
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/Makefile.nvgpu | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c | 204 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h | 121 |
4 files changed, 335 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index b8e38919..33d158bb 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
@@ -57,6 +57,7 @@ nvgpu-y := \ | |||
57 | gk20a/gk20a_allocator_bitmap.o \ | 57 | gk20a/gk20a_allocator_bitmap.o \ |
58 | gk20a/gk20a_allocator_buddy.o \ | 58 | gk20a/gk20a_allocator_buddy.o \ |
59 | gk20a/gk20a_allocator_page.o \ | 59 | gk20a/gk20a_allocator_page.o \ |
60 | gk20a/gk20a_allocator_lockless.o \ | ||
60 | gk20a/cde_gk20a.o \ | 61 | gk20a/cde_gk20a.o \ |
61 | gk20a/platform_gk20a_generic.o \ | 62 | gk20a/platform_gk20a_generic.o \ |
62 | gk20a/tsg_gk20a.o \ | 63 | gk20a/tsg_gk20a.o \ |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 93865190..0d611ba3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | |||
@@ -165,6 +165,15 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a, | |||
165 | const char *name, u64 base, u64 length, | 165 | const char *name, u64 base, u64 length, |
166 | u64 blk_size, u64 flags); | 166 | u64 blk_size, u64 flags); |
167 | 167 | ||
168 | /* | ||
169 | * Lockless allocatior initializers. | ||
170 | * Note: This allocator can only allocate fixed-size structures of a | ||
171 | * pre-defined size. | ||
172 | */ | ||
173 | int gk20a_lockless_allocator_init(struct gk20a_allocator *__a, | ||
174 | const char *name, u64 base, u64 length, | ||
175 | u64 struct_size, u64 flags); | ||
176 | |||
168 | #define GPU_BALLOC_MAX_ORDER 31 | 177 | #define GPU_BALLOC_MAX_ORDER 31 |
169 | 178 | ||
170 | /* | 179 | /* |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c new file mode 100644 index 00000000..32455c98 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c | |||
@@ -0,0 +1,204 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/vmalloc.h> | ||
20 | #include <linux/atomic.h> | ||
21 | |||
22 | #include "gk20a_allocator.h" | ||
23 | #include "lockless_allocator_priv.h" | ||
24 | |||
25 | static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a) | ||
26 | { | ||
27 | struct gk20a_lockless_allocator *pa = a->priv; | ||
28 | |||
29 | return pa->length; | ||
30 | } | ||
31 | |||
32 | static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a) | ||
33 | { | ||
34 | struct gk20a_lockless_allocator *pa = a->priv; | ||
35 | |||
36 | return pa->base; | ||
37 | } | ||
38 | |||
39 | static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a) | ||
40 | { | ||
41 | struct gk20a_lockless_allocator *pa = a->priv; | ||
42 | int inited = pa->inited; | ||
43 | |||
44 | rmb(); | ||
45 | return inited; | ||
46 | } | ||
47 | |||
48 | static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a) | ||
49 | { | ||
50 | struct gk20a_lockless_allocator *pa = a->priv; | ||
51 | |||
52 | return pa->base + pa->length; | ||
53 | } | ||
54 | |||
55 | static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len) | ||
56 | { | ||
57 | struct gk20a_lockless_allocator *pa = a->priv; | ||
58 | int head, new_head, ret; | ||
59 | u64 addr = 0; | ||
60 | |||
61 | if (len != pa->blk_size) | ||
62 | return 0; | ||
63 | |||
64 | head = ACCESS_ONCE(pa->head); | ||
65 | while (head >= 0) { | ||
66 | new_head = ACCESS_ONCE(pa->next[head]); | ||
67 | ret = cmpxchg(&pa->head, head, new_head); | ||
68 | if (ret == head) { | ||
69 | addr = pa->base + head * pa->blk_size; | ||
70 | atomic_inc(&pa->nr_allocs); | ||
71 | alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head, | ||
72 | addr); | ||
73 | break; | ||
74 | } | ||
75 | head = ACCESS_ONCE(pa->head); | ||
76 | } | ||
77 | return addr; | ||
78 | } | ||
79 | |||
80 | static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr) | ||
81 | { | ||
82 | struct gk20a_lockless_allocator *pa = a->priv; | ||
83 | int head, ret; | ||
84 | u64 cur_idx, rem; | ||
85 | |||
86 | cur_idx = addr - pa->base; | ||
87 | rem = do_div(cur_idx, pa->blk_size); | ||
88 | |||
89 | while (1) { | ||
90 | head = ACCESS_ONCE(pa->head); | ||
91 | ACCESS_ONCE(pa->next[cur_idx]) = head; | ||
92 | ret = cmpxchg(&pa->head, head, cur_idx); | ||
93 | if (ret == head) { | ||
94 | atomic_dec(&pa->nr_allocs); | ||
95 | alloc_dbg(a, "Free node # %llu\n", cur_idx); | ||
96 | break; | ||
97 | } | ||
98 | } | ||
99 | } | ||
100 | |||
101 | static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a) | ||
102 | { | ||
103 | struct gk20a_lockless_allocator *pa = a->priv; | ||
104 | |||
105 | vfree(pa->next); | ||
106 | kfree(pa); | ||
107 | } | ||
108 | |||
109 | static void gk20a_lockless_print_stats(struct gk20a_allocator *a, | ||
110 | struct seq_file *s, int lock) | ||
111 | { | ||
112 | struct gk20a_lockless_allocator *pa = a->priv; | ||
113 | |||
114 | __alloc_pstat(s, a, "Lockless allocator params:\n"); | ||
115 | __alloc_pstat(s, a, " start = 0x%llx\n", pa->base); | ||
116 | __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length); | ||
117 | |||
118 | /* Actual stats. */ | ||
119 | __alloc_pstat(s, a, "Stats:\n"); | ||
120 | __alloc_pstat(s, a, " Number allocs = %d\n", | ||
121 | atomic_read(&pa->nr_allocs)); | ||
122 | __alloc_pstat(s, a, " Number free = %d\n", | ||
123 | pa->nr_nodes - atomic_read(&pa->nr_allocs)); | ||
124 | } | ||
125 | |||
126 | static const struct gk20a_allocator_ops pool_ops = { | ||
127 | .alloc = gk20a_lockless_alloc, | ||
128 | .free = gk20a_lockless_free, | ||
129 | |||
130 | .base = gk20a_lockless_alloc_base, | ||
131 | .length = gk20a_lockless_alloc_length, | ||
132 | .end = gk20a_lockless_alloc_end, | ||
133 | .inited = gk20a_lockless_alloc_inited, | ||
134 | |||
135 | .fini = gk20a_lockless_alloc_destroy, | ||
136 | |||
137 | .print_stats = gk20a_lockless_print_stats, | ||
138 | }; | ||
139 | |||
140 | int gk20a_lockless_allocator_init(struct gk20a_allocator *__a, | ||
141 | const char *name, u64 base, u64 length, | ||
142 | u64 blk_size, u64 flags) | ||
143 | { | ||
144 | int i; | ||
145 | int err; | ||
146 | int nr_nodes; | ||
147 | u64 count, rem; | ||
148 | struct gk20a_lockless_allocator *a; | ||
149 | |||
150 | if (!blk_size) | ||
151 | return -EINVAL; | ||
152 | |||
153 | /* | ||
154 | * Ensure we have space for atleast one node & there's no overflow. | ||
155 | * In order to control memory footprint, we require count < INT_MAX | ||
156 | */ | ||
157 | count = length; | ||
158 | rem = do_div(count, blk_size); | ||
159 | if (!base || !count || count > INT_MAX) | ||
160 | return -EINVAL; | ||
161 | |||
162 | a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL); | ||
163 | if (!a) | ||
164 | return -ENOMEM; | ||
165 | |||
166 | err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops); | ||
167 | if (err) | ||
168 | goto fail; | ||
169 | |||
170 | a->next = vzalloc(sizeof(*a->next) * count); | ||
171 | if (!a->next) { | ||
172 | err = -ENOMEM; | ||
173 | goto fail; | ||
174 | } | ||
175 | |||
176 | /* chain the elements together to form the initial free list */ | ||
177 | nr_nodes = (int)count; | ||
178 | for (i = 0; i < nr_nodes; i++) | ||
179 | a->next[i] = i + 1; | ||
180 | a->next[nr_nodes - 1] = -1; | ||
181 | |||
182 | a->base = base; | ||
183 | a->length = length; | ||
184 | a->blk_size = blk_size; | ||
185 | a->nr_nodes = nr_nodes; | ||
186 | a->flags = flags; | ||
187 | atomic_set(&a->nr_allocs, 0); | ||
188 | |||
189 | wmb(); | ||
190 | a->inited = true; | ||
191 | |||
192 | gk20a_init_alloc_debug(__a); | ||
193 | alloc_dbg(__a, "New allocator: type lockless\n"); | ||
194 | alloc_dbg(__a, " base 0x%llx\n", a->base); | ||
195 | alloc_dbg(__a, " nodes %d\n", a->nr_nodes); | ||
196 | alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); | ||
197 | alloc_dbg(__a, " flags 0x%llx\n", a->flags); | ||
198 | |||
199 | return 0; | ||
200 | |||
201 | fail: | ||
202 | kfree(a); | ||
203 | return err; | ||
204 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h new file mode 100644 index 00000000..f9b03e0e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * Basics: | ||
19 | * | ||
20 | * - Lockless memory allocator for fixed-size structures, whose | ||
21 | * size is defined up front at init time. | ||
22 | * - Memory footprint scales linearly w/ the number of structures in | ||
23 | * the pool. It is ~= sizeof(int) * N. | ||
24 | * - Memory is pre-allocated by the client. The allocator itself | ||
25 | * only computes the addresses for allocations. | ||
26 | * - Limit of MAX_INT nodes that the allocator can be responsible for. | ||
27 | * | ||
28 | * Implementation details: | ||
29 | * | ||
30 | * The allocator maintains a single list of free nodes. We allocate & | ||
31 | * free nodes from the head of the list. We rely on the cmpxchg() operator | ||
32 | * to maintain atomicity on the head. | ||
33 | * | ||
34 | * So, both allocs & frees are O(1)!! | ||
35 | * | ||
36 | * -- Definitions -- | ||
37 | * Block Size - size of a single structure that this allocator will | ||
38 | * allocate. | ||
39 | * Node - one of the elements of size blk_size in the | ||
40 | * client-allocated buffer. | ||
41 | * Node Index - zero-based index of a node in the client-allocated | ||
42 | * contiguous buffer. | ||
43 | * | ||
44 | * -- Initial State -- | ||
45 | * We maintain the following to track the state of the free list: | ||
46 | * | ||
47 | * 1) A "head" index to track the index of the first free node in the list | ||
48 | * 2) A "next" array to track the index of the next free node in the list | ||
49 | * for every node. So next[head], will give the index to the 2nd free | ||
50 | * element in the list. | ||
51 | * | ||
52 | * So, to begin with, the free list consists of all node indices, and each | ||
53 | * position in the next array contains index N + 1: | ||
54 | * | ||
55 | * head = 0 | ||
56 | * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes | ||
57 | * free_list = 0->1->2->3->4->-1 | ||
58 | * | ||
59 | * -- Allocations -- | ||
60 | * 1) Read the current head (aka acq_head) | ||
61 | * 2) Read next[acq_head], to get the 2nd free element (aka new_head) | ||
62 | * 3) cmp_xchg(&head, acq_head, new_head) | ||
63 | * 4) If it succeeds, compute the address of the node, based on | ||
64 | * base address, blk_size, & acq_head. | ||
65 | * | ||
66 | * head = 1; | ||
67 | * next = [1, 2, 3, 4, -1] : Example after allocating Node #0 | ||
68 | * free_list = 1->2->3->4->-1 | ||
69 | * | ||
70 | * head = 2; | ||
71 | * next = [1, 2, 3, 4, -1] : Example after allocating Node #1 | ||
72 | * free_list = 2->3->4->-1 | ||
73 | * | ||
74 | * -- Frees -- | ||
75 | * 1) Based on the address to be freed, calculate the index of the node | ||
76 | * being freed (cur_idx) | ||
77 | * 2) Read the current head (old_head) | ||
78 | * 3) So the freed node is going to go at the head of the list, and we | ||
79 | * want to put the old_head after it. So next[cur_idx] = old_head | ||
80 | * 4) cmpxchg(head, old_head, cur_idx) | ||
81 | * | ||
82 | * head = 0 | ||
83 | * next = [2, 2, 3, 4, -1] | ||
84 | * free_list = 0->2->3->4->-1 : Example after freeing Node #0 | ||
85 | * | ||
86 | * head = 1 | ||
87 | * next = [2, 0, 3, 4, -1] | ||
88 | * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1 | ||
89 | */ | ||
90 | |||
91 | #ifndef LOCKLESS_ALLOCATOR_PRIV_H | ||
92 | #define LOCKLESS_ALLOCATOR_PRIV_H | ||
93 | |||
94 | struct gk20a_allocator; | ||
95 | |||
96 | struct gk20a_lockless_allocator { | ||
97 | struct gk20a_allocator *owner; | ||
98 | |||
99 | u64 base; /* Base address of the space. */ | ||
100 | u64 length; /* Length of the space. */ | ||
101 | u64 blk_size; /* Size of the structure being allocated */ | ||
102 | int nr_nodes; /* Number of nodes available for allocation */ | ||
103 | |||
104 | int *next; /* An array holding the next indices per node */ | ||
105 | int head; /* Current node at the top of the stack */ | ||
106 | |||
107 | u64 flags; | ||
108 | |||
109 | bool inited; | ||
110 | |||
111 | /* Statistics */ | ||
112 | atomic_t nr_allocs; | ||
113 | }; | ||
114 | |||
115 | static inline struct gk20a_lockless_allocator *lockless_allocator( | ||
116 | struct gk20a_allocator *a) | ||
117 | { | ||
118 | return (struct gk20a_lockless_allocator *)(a)->priv; | ||
119 | } | ||
120 | |||
121 | #endif | ||