1 files changed, 268 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/posix/bitmap.c b/drivers/gpu/nvgpu/common/posix/bitmap.c
new file mode 100644
index 00000000..51361777
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/posix/bitmap.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <nvgpu/posix/bitops.h>
+#include <nvgpu/posix/atomic.h>
+#define BIT_MASK(nr)    (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)    ((nr) / BITS_PER_LONG)
+unsigned long __nvgpu_posix_fls(unsigned long word)
+{
+        int num = BITS_PER_LONG - 1;
+#if BITS_PER_LONG == 64
+        if (!(word & (~0ul << 32))) {
+                num -= 32;
+                word <<= 32;
+        }
+#endif
+        if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+                num -= 16;
+                word <<= 16;
+        }
+        if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+                num -= 8;
+                word <<= 8;
+        }
+        if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+                num -= 4;
+                word <<= 4;
+        }
+        if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+                num -= 2;
+                word <<= 2;
+        }
+        if (!(word & (~0ul << (BITS_PER_LONG-1))))
+                num -= 1;
+        return num;
+}
+unsigned long __nvgpu_posix_ffs(unsigned long word)
+{
+        int num = 0;
+#if BITS_PER_LONG == 64
+        if ((word & 0xffffffff) == 0) {
+                num += 32;
+                word >>= 32;
+        }
+#endif
+        if ((word & 0xffff) == 0) {
+                num += 16;
+                word >>= 16;
+        }
+        if ((word & 0xff) == 0) {
+                num += 8;
+                word >>= 8;
+        }
+        if ((word & 0xf) == 0) {
+                num += 4;
+                word >>= 4;
+        }
+        if ((word & 0x3) == 0) {
+                num += 2;
+                word >>= 2;
+        }
+        if ((word & 0x1) == 0)
+                num += 1;
+        return num;
+}
+static unsigned long __find_next_bit(const unsigned long *addr,
+                                     unsigned long n,
+                                     unsigned long start,
+                                     bool invert)
+{
+        unsigned long idx;
+        unsigned long w;
+        unsigned long start_mask;
+        /*
+         * We make a mask we can XOR into the word so that we can invert the
+         * word without requiring a branch. I.e instead of doing:
+         *
+         *   w = invert ? ~addr[idx] : addr[idx]
+         *
+         * We can do:
+         *
+         *   w = addr[idx] ^= invert_mask
+         *
+         * This saves us a branch every iteration through the loop. Now we can
+         * always just look for 1s.
+         */
+        unsigned long invert_mask = invert ? ~0UL : 0UL;
+        if (start >= n)
+                return n;
+        start_mask = ~0UL << (start & (BITS_PER_LONG - 1));
+        idx = start / BITS_PER_LONG;
+        w = (addr[idx] ^ invert_mask) & start_mask;
+        start = round_up(start, BITS_PER_LONG);
+        /*
+         * Find the first non-zero word taking into account start and
+         * invert.
+         */
+        while (!w) {
+                idx++;
+                start += BITS_PER_LONG;
+                w = addr[idx] ^ invert_mask;
+        }
+        return min(n, ffs(w) + idx * BITS_PER_LONG);
+}
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+        return __find_next_bit(addr, size, 0, false);
+}
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+        return __find_next_bit(addr, size, 0, true);
+}
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+                            unsigned long offset)
+{
+        return __find_next_bit(addr, size, offset, false);
+}
+static unsigned long find_next_zero_bit(const unsigned long *addr,
+                                        unsigned long size,
+                                        unsigned long offset)
+{
+        return __find_next_bit(addr, size, offset, true);
+}
+void bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+        unsigned int end = start + len;
+        /*
+         * Super slow naive implementation. But speed isn't what matters here.
+         */
+        while (start < end)
+                set_bit(start++, map);
+}
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
+{
+        unsigned int end = start + len;
+        while (start < end)
+                clear_bit(start++, map);
+}
+/*
+ * This is essentially a find-first-fit allocator: this searches a bitmap for
+ * the first space that is large enough to satisfy the requested size of bits.
+ * That means that this is not a vary smart allocator. But it is fast relative
+ * to an allocator that goes looking for an optimal location.
+ */
+unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
+                                             unsigned long size,
+                                             unsigned long start,
+                                             unsigned int nr,
+                                             unsigned long align_mask,
+                                             unsigned long align_offset)
+{
+        unsigned long offs;
+        while (start + nr <= size) {
+                start = find_next_zero_bit(map, size, start);
+                start = ALIGN_MASK(start + align_offset, align_mask) -
+                        align_offset;
+                /*
+                 * Not enough space left to satisfy the requested area.
+                 */
+                if ((start + nr) > size)
+                        return size;
+                offs = find_next_bit(map, size, start);
+                if ((offs - start) >= nr)
+                        return start;
+                start = offs + 1;
+        }
+        return size;
+}
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+                                         unsigned long size,
+                                         unsigned long start,
+                                         unsigned int nr,
+                                         unsigned long align_mask)
+{
+        return bitmap_find_next_zero_area_off(map, size, start, nr,
+                                              align_mask, 0);
+}
+bool test_bit(int nr, const volatile unsigned long *addr)
+{
+        return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+bool test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long mask = BIT_MASK(nr);
+        volatile unsigned long *p = addr + BIT_WORD(nr);
+        return !!(__sync_fetch_and_or(p, mask) & mask);
+}
+bool test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long mask = BIT_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+        return !!(__sync_fetch_and_and(p, ~mask) & mask);
+}
+void set_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long mask = BIT_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+        __atomic_or(p, mask);
+}
+void clear_bit(int nr, volatile unsigned long *addr)
+{
+        unsigned long mask = BIT_MASK(nr);
+        unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+        __atomic_and(p, ~mask);
+}

diff --git a/drivers/gpu/nvgpu/common/posix/bitmap.c b/drivers/gpu/nvgpu/common/posix/bitmap.c new file mode 100644 index 00000000..51361777 --- /dev/null +++ b/drivers/gpu/nvgpu/common/posix/bitmap.c
@@ -0,0 +1,268 @@
	1	/*
	2	* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
	3	*
	4	* Permission is hereby granted, free of charge, to any person obtaining a
	5	* copy of this software and associated documentation files (the "Software"),
	6	* to deal in the Software without restriction, including without limitation
	7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	8	* and/or sell copies of the Software, and to permit persons to whom the
	9	* Software is furnished to do so, subject to the following conditions:
	10	*
	11	* The above copyright notice and this permission notice shall be included in
	12	* all copies or substantial portions of the Software.
	13	*
	14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	17	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	18	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	19	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	20	* DEALINGS IN THE SOFTWARE.
	21	*/
	22
	23	#include <stdio.h>
	24	#include <stdlib.h>
	25
	26	#include <nvgpu/posix/bitops.h>
	27	#include <nvgpu/posix/atomic.h>
	28
	29	#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
	30	#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
	31
	32	unsigned long __nvgpu_posix_fls(unsigned long word)
	33	{
	34	int num = BITS_PER_LONG - 1;
	35
	36	#if BITS_PER_LONG == 64
	37	if (!(word & (~0ul << 32))) {
	38	num -= 32;
	39	word <<= 32;
	40	}
	41	#endif
	42	if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
	43	num -= 16;
	44	word <<= 16;
	45	}
	46	if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
	47	num -= 8;
	48	word <<= 8;
	49	}
	50	if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
	51	num -= 4;
	52	word <<= 4;
	53	}
	54	if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
	55	num -= 2;
	56	word <<= 2;
	57	}
	58	if (!(word & (~0ul << (BITS_PER_LONG-1))))
	59	num -= 1;
	60	return num;
	61	}
	62
	63	unsigned long __nvgpu_posix_ffs(unsigned long word)
	64	{
	65	int num = 0;
	66
	67	#if BITS_PER_LONG == 64
	68	if ((word & 0xffffffff) == 0) {
	69	num += 32;
	70	word >>= 32;
	71	}
	72	#endif
	73	if ((word & 0xffff) == 0) {
	74	num += 16;
	75	word >>= 16;
	76	}
	77	if ((word & 0xff) == 0) {
	78	num += 8;
	79	word >>= 8;
	80	}
	81	if ((word & 0xf) == 0) {
	82	num += 4;
	83	word >>= 4;
	84	}
	85	if ((word & 0x3) == 0) {
	86	num += 2;
	87	word >>= 2;
	88	}
	89	if ((word & 0x1) == 0)
	90	num += 1;
	91
	92	return num;
	93	}
	94
	95	static unsigned long __find_next_bit(const unsigned long *addr,
	96	unsigned long n,
	97	unsigned long start,
	98	bool invert)
	99	{
	100	unsigned long idx;
	101	unsigned long w;
	102	unsigned long start_mask;
	103
	104	/*
	105	* We make a mask we can XOR into the word so that we can invert the
	106	* word without requiring a branch. I.e instead of doing:
	107	*
	108	* w = invert ? ~addr[idx] : addr[idx]
	109	*
	110	* We can do:
	111	*
	112	* w = addr[idx] ^= invert_mask
	113	*
	114	* This saves us a branch every iteration through the loop. Now we can
	115	* always just look for 1s.
	116	*/
	117	unsigned long invert_mask = invert ? ~0UL : 0UL;
	118
	119	if (start >= n)
	120	return n;
	121
	122	start_mask = ~0UL << (start & (BITS_PER_LONG - 1));
	123
	124	idx = start / BITS_PER_LONG;
	125	w = (addr[idx] ^ invert_mask) & start_mask;
	126
	127	start = round_up(start, BITS_PER_LONG);
	128
	129	/*
	130	* Find the first non-zero word taking into account start and
	131	* invert.
	132	*/
	133	while (!w) {
	134	idx++;
	135	start += BITS_PER_LONG;
	136
	137	w = addr[idx] ^ invert_mask;
	138	}
	139
	140	return min(n, ffs(w) + idx * BITS_PER_LONG);
	141	}
	142
	143	unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
	144	{
	145	return __find_next_bit(addr, size, 0, false);
	146	}
	147
	148	unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
	149	{
	150	return __find_next_bit(addr, size, 0, true);
	151	}
	152
	153	unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
	154	unsigned long offset)
	155	{
	156	return __find_next_bit(addr, size, offset, false);
	157	}
	158
	159	static unsigned long find_next_zero_bit(const unsigned long *addr,
	160	unsigned long size,
	161	unsigned long offset)
	162	{
	163	return __find_next_bit(addr, size, offset, true);
	164	}
	165
	166	void bitmap_set(unsigned long *map, unsigned int start, int len)
	167	{
	168	unsigned int end = start + len;
	169
	170	/*
	171	* Super slow naive implementation. But speed isn't what matters here.
	172	*/
	173	while (start < end)
	174	set_bit(start++, map);
	175	}
	176
	177	void bitmap_clear(unsigned long *map, unsigned int start, int len)
	178	{
	179	unsigned int end = start + len;
	180
	181	while (start < end)
	182	clear_bit(start++, map);
	183	}
	184
	185	/*
	186	* This is essentially a find-first-fit allocator: this searches a bitmap for
	187	* the first space that is large enough to satisfy the requested size of bits.
	188	* That means that this is not a vary smart allocator. But it is fast relative
	189	* to an allocator that goes looking for an optimal location.
	190	*/
	191	unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
	192	unsigned long size,
	193	unsigned long start,
	194	unsigned int nr,
	195	unsigned long align_mask,
	196	unsigned long align_offset)
	197	{
	198	unsigned long offs;
	199
	200	while (start + nr <= size) {
	201	start = find_next_zero_bit(map, size, start);
	202
	203	start = ALIGN_MASK(start + align_offset, align_mask) -
	204	align_offset;
	205
	206	/*
	207	* Not enough space left to satisfy the requested area.
	208	*/
	209	if ((start + nr) > size)
	210	return size;
	211
	212	offs = find_next_bit(map, size, start);
	213
	214	if ((offs - start) >= nr)
	215	return start;
	216
	217	start = offs + 1;
	218	}
	219
	220	return size;
	221	}
	222
	223	unsigned long bitmap_find_next_zero_area(unsigned long *map,
	224	unsigned long size,
	225	unsigned long start,
	226	unsigned int nr,
	227	unsigned long align_mask)
	228	{
	229	return bitmap_find_next_zero_area_off(map, size, start, nr,
	230	align_mask, 0);
	231	}
	232
	233	bool test_bit(int nr, const volatile unsigned long *addr)
	234	{
	235	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
	236	}
	237
	238	bool test_and_set_bit(int nr, volatile unsigned long *addr)
	239	{
	240	unsigned long mask = BIT_MASK(nr);
	241	volatile unsigned long *p = addr + BIT_WORD(nr);
	242
	243	return !!(__sync_fetch_and_or(p, mask) & mask);
	244	}
	245
	246	bool test_and_clear_bit(int nr, volatile unsigned long *addr)
	247	{
	248	unsigned long mask = BIT_MASK(nr);
	249	unsigned long p = ((unsigned long )addr) + BIT_WORD(nr);
	250
	251	return !!(__sync_fetch_and_and(p, ~mask) & mask);
	252	}
	253
	254	void set_bit(int nr, volatile unsigned long *addr)
	255	{
	256	unsigned long mask = BIT_MASK(nr);
	257	unsigned long p = ((unsigned long )addr) + BIT_WORD(nr);
	258
	259	__atomic_or(p, mask);
	260	}
	261
	262	void clear_bit(int nr, volatile unsigned long *addr)
	263	{
	264	unsigned long mask = BIT_MASK(nr);
	265	unsigned long p = ((unsigned long )addr) + BIT_WORD(nr);
	266
	267	__atomic_and(p, ~mask);
	268	}