diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/posix/bitmap.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/posix/bitmap.c | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/posix/bitmap.c b/drivers/gpu/nvgpu/common/posix/bitmap.c new file mode 100644 index 00000000..51361777 --- /dev/null +++ b/drivers/gpu/nvgpu/common/posix/bitmap.c | |||
@@ -0,0 +1,268 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include <stdio.h> | ||
24 | #include <stdlib.h> | ||
25 | |||
26 | #include <nvgpu/posix/bitops.h> | ||
27 | #include <nvgpu/posix/atomic.h> | ||
28 | |||
29 | #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) | ||
30 | #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) | ||
31 | |||
32 | unsigned long __nvgpu_posix_fls(unsigned long word) | ||
33 | { | ||
34 | int num = BITS_PER_LONG - 1; | ||
35 | |||
36 | #if BITS_PER_LONG == 64 | ||
37 | if (!(word & (~0ul << 32))) { | ||
38 | num -= 32; | ||
39 | word <<= 32; | ||
40 | } | ||
41 | #endif | ||
42 | if (!(word & (~0ul << (BITS_PER_LONG-16)))) { | ||
43 | num -= 16; | ||
44 | word <<= 16; | ||
45 | } | ||
46 | if (!(word & (~0ul << (BITS_PER_LONG-8)))) { | ||
47 | num -= 8; | ||
48 | word <<= 8; | ||
49 | } | ||
50 | if (!(word & (~0ul << (BITS_PER_LONG-4)))) { | ||
51 | num -= 4; | ||
52 | word <<= 4; | ||
53 | } | ||
54 | if (!(word & (~0ul << (BITS_PER_LONG-2)))) { | ||
55 | num -= 2; | ||
56 | word <<= 2; | ||
57 | } | ||
58 | if (!(word & (~0ul << (BITS_PER_LONG-1)))) | ||
59 | num -= 1; | ||
60 | return num; | ||
61 | } | ||
62 | |||
63 | unsigned long __nvgpu_posix_ffs(unsigned long word) | ||
64 | { | ||
65 | int num = 0; | ||
66 | |||
67 | #if BITS_PER_LONG == 64 | ||
68 | if ((word & 0xffffffff) == 0) { | ||
69 | num += 32; | ||
70 | word >>= 32; | ||
71 | } | ||
72 | #endif | ||
73 | if ((word & 0xffff) == 0) { | ||
74 | num += 16; | ||
75 | word >>= 16; | ||
76 | } | ||
77 | if ((word & 0xff) == 0) { | ||
78 | num += 8; | ||
79 | word >>= 8; | ||
80 | } | ||
81 | if ((word & 0xf) == 0) { | ||
82 | num += 4; | ||
83 | word >>= 4; | ||
84 | } | ||
85 | if ((word & 0x3) == 0) { | ||
86 | num += 2; | ||
87 | word >>= 2; | ||
88 | } | ||
89 | if ((word & 0x1) == 0) | ||
90 | num += 1; | ||
91 | |||
92 | return num; | ||
93 | } | ||
94 | |||
95 | static unsigned long __find_next_bit(const unsigned long *addr, | ||
96 | unsigned long n, | ||
97 | unsigned long start, | ||
98 | bool invert) | ||
99 | { | ||
100 | unsigned long idx; | ||
101 | unsigned long w; | ||
102 | unsigned long start_mask; | ||
103 | |||
104 | /* | ||
105 | * We make a mask we can XOR into the word so that we can invert the | ||
106 | * word without requiring a branch. I.e instead of doing: | ||
107 | * | ||
108 | * w = invert ? ~addr[idx] : addr[idx] | ||
109 | * | ||
110 | * We can do: | ||
111 | * | ||
112 | * w = addr[idx] ^= invert_mask | ||
113 | * | ||
114 | * This saves us a branch every iteration through the loop. Now we can | ||
115 | * always just look for 1s. | ||
116 | */ | ||
117 | unsigned long invert_mask = invert ? ~0UL : 0UL; | ||
118 | |||
119 | if (start >= n) | ||
120 | return n; | ||
121 | |||
122 | start_mask = ~0UL << (start & (BITS_PER_LONG - 1)); | ||
123 | |||
124 | idx = start / BITS_PER_LONG; | ||
125 | w = (addr[idx] ^ invert_mask) & start_mask; | ||
126 | |||
127 | start = round_up(start, BITS_PER_LONG); | ||
128 | |||
129 | /* | ||
130 | * Find the first non-zero word taking into account start and | ||
131 | * invert. | ||
132 | */ | ||
133 | while (!w) { | ||
134 | idx++; | ||
135 | start += BITS_PER_LONG; | ||
136 | |||
137 | w = addr[idx] ^ invert_mask; | ||
138 | } | ||
139 | |||
140 | return min(n, ffs(w) + idx * BITS_PER_LONG); | ||
141 | } | ||
142 | |||
143 | unsigned long find_first_bit(const unsigned long *addr, unsigned long size) | ||
144 | { | ||
145 | return __find_next_bit(addr, size, 0, false); | ||
146 | } | ||
147 | |||
148 | unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) | ||
149 | { | ||
150 | return __find_next_bit(addr, size, 0, true); | ||
151 | } | ||
152 | |||
153 | unsigned long find_next_bit(const unsigned long *addr, unsigned long size, | ||
154 | unsigned long offset) | ||
155 | { | ||
156 | return __find_next_bit(addr, size, offset, false); | ||
157 | } | ||
158 | |||
159 | static unsigned long find_next_zero_bit(const unsigned long *addr, | ||
160 | unsigned long size, | ||
161 | unsigned long offset) | ||
162 | { | ||
163 | return __find_next_bit(addr, size, offset, true); | ||
164 | } | ||
165 | |||
166 | void bitmap_set(unsigned long *map, unsigned int start, int len) | ||
167 | { | ||
168 | unsigned int end = start + len; | ||
169 | |||
170 | /* | ||
171 | * Super slow naive implementation. But speed isn't what matters here. | ||
172 | */ | ||
173 | while (start < end) | ||
174 | set_bit(start++, map); | ||
175 | } | ||
176 | |||
177 | void bitmap_clear(unsigned long *map, unsigned int start, int len) | ||
178 | { | ||
179 | unsigned int end = start + len; | ||
180 | |||
181 | while (start < end) | ||
182 | clear_bit(start++, map); | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * This is essentially a find-first-fit allocator: this searches a bitmap for | ||
187 | * the first space that is large enough to satisfy the requested size of bits. | ||
188 | * That means that this is not a vary smart allocator. But it is fast relative | ||
189 | * to an allocator that goes looking for an optimal location. | ||
190 | */ | ||
191 | unsigned long bitmap_find_next_zero_area_off(unsigned long *map, | ||
192 | unsigned long size, | ||
193 | unsigned long start, | ||
194 | unsigned int nr, | ||
195 | unsigned long align_mask, | ||
196 | unsigned long align_offset) | ||
197 | { | ||
198 | unsigned long offs; | ||
199 | |||
200 | while (start + nr <= size) { | ||
201 | start = find_next_zero_bit(map, size, start); | ||
202 | |||
203 | start = ALIGN_MASK(start + align_offset, align_mask) - | ||
204 | align_offset; | ||
205 | |||
206 | /* | ||
207 | * Not enough space left to satisfy the requested area. | ||
208 | */ | ||
209 | if ((start + nr) > size) | ||
210 | return size; | ||
211 | |||
212 | offs = find_next_bit(map, size, start); | ||
213 | |||
214 | if ((offs - start) >= nr) | ||
215 | return start; | ||
216 | |||
217 | start = offs + 1; | ||
218 | } | ||
219 | |||
220 | return size; | ||
221 | } | ||
222 | |||
223 | unsigned long bitmap_find_next_zero_area(unsigned long *map, | ||
224 | unsigned long size, | ||
225 | unsigned long start, | ||
226 | unsigned int nr, | ||
227 | unsigned long align_mask) | ||
228 | { | ||
229 | return bitmap_find_next_zero_area_off(map, size, start, nr, | ||
230 | align_mask, 0); | ||
231 | } | ||
232 | |||
233 | bool test_bit(int nr, const volatile unsigned long *addr) | ||
234 | { | ||
235 | return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); | ||
236 | } | ||
237 | |||
238 | bool test_and_set_bit(int nr, volatile unsigned long *addr) | ||
239 | { | ||
240 | unsigned long mask = BIT_MASK(nr); | ||
241 | volatile unsigned long *p = addr + BIT_WORD(nr); | ||
242 | |||
243 | return !!(__sync_fetch_and_or(p, mask) & mask); | ||
244 | } | ||
245 | |||
246 | bool test_and_clear_bit(int nr, volatile unsigned long *addr) | ||
247 | { | ||
248 | unsigned long mask = BIT_MASK(nr); | ||
249 | unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); | ||
250 | |||
251 | return !!(__sync_fetch_and_and(p, ~mask) & mask); | ||
252 | } | ||
253 | |||
254 | void set_bit(int nr, volatile unsigned long *addr) | ||
255 | { | ||
256 | unsigned long mask = BIT_MASK(nr); | ||
257 | unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); | ||
258 | |||
259 | __atomic_or(p, mask); | ||
260 | } | ||
261 | |||
262 | void clear_bit(int nr, volatile unsigned long *addr) | ||
263 | { | ||
264 | unsigned long mask = BIT_MASK(nr); | ||
265 | unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); | ||
266 | |||
267 | __atomic_and(p, ~mask); | ||
268 | } | ||