aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2011-07-13 01:14:24 -0400
committerLen Brown <len.brown@intel.com>2011-08-03 11:15:57 -0400
commit7f184275aa306046fe7edcbef3229754f0d97402 (patch)
treee2f82957072dd1ada3a4bc6c0281a6296123d8a0 /lib
parentf49f23abf3dd786ddcac1c1e7db3c2013b07413f (diff)
lib, Make gen_pool memory allocator lockless
This version of the gen_pool memory allocator supports lockless operation. This makes it safe to use in NMI handlers and other special unblockable contexts that could otherwise deadlock on locks. This is implemented by using atomic operations and retries on any conflicts. The disadvantage is that there may be livelocks in extreme cases. For better scalability, one gen_pool allocator can be used for each CPU. The lockless operation only works if there is enough memory available. If new memory is added to the pool a lock has to be still taken. So any user relying on locklessness has to ensure that sufficient memory is preallocated. The basic atomic operation of this allocator is cmpxchg on long. On architectures that don't have NMI-safe cmpxchg implementation, the allocator can NOT be used in NMI handler. So code uses the allocator in NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. Signed-off-by: Huang Ying <ying.huang@intel.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/genalloc.c300
2 files changed, 243 insertions, 59 deletions
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 3f3b68199d74..e3c9e999501e 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -271,8 +271,6 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
271} 271}
272EXPORT_SYMBOL(__bitmap_weight); 272EXPORT_SYMBOL(__bitmap_weight);
273 273
274#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
275
276void bitmap_set(unsigned long *map, int start, int nr) 274void bitmap_set(unsigned long *map, int start, int nr)
277{ 275{
278 unsigned long *p = map + BIT_WORD(start); 276 unsigned long *p = map + BIT_WORD(start);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 577ddf805975..f352cc42f4f8 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -1,8 +1,26 @@
1/* 1/*
2 * Basic general purpose allocator for managing special purpose memory 2 * Basic general purpose allocator for managing special purpose
3 * not managed by the regular kmalloc/kfree interface. 3 * memory, for example, memory that is not managed by the regular
4 * Uses for this includes on-device special memory, uncached memory 4 * kmalloc/kfree interface. Uses for this includes on-device special
5 * etc. 5 * memory, uncached memory etc.
6 *
7 * It is safe to use the allocator in NMI handlers and other special
8 * unblockable contexts that could otherwise deadlock on locks. This
9 * is implemented by using atomic operations and retries on any
10 * conflicts. The disadvantage is that there may be livelocks in
11 * extreme cases. For better scalability, one allocator can be used
12 * for each CPU.
13 *
14 * The lockless operation only works if there is enough memory
15 * available. If new memory is added to the pool a lock has to be
16 * still taken. So any user relying on locklessness has to ensure
17 * that sufficient memory is preallocated.
18 *
19 * The basic atomic operation of this allocator is cmpxchg on long.
20 * On architectures that don't have NMI-safe cmpxchg implementation,
21 * the allocator can NOT be used in NMI handler. So code uses the
22 * allocator in NMI handler should depend on
23 * CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
6 * 24 *
7 * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> 25 * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
8 * 26 *
@@ -13,8 +31,109 @@
13#include <linux/slab.h> 31#include <linux/slab.h>
14#include <linux/module.h> 32#include <linux/module.h>
15#include <linux/bitmap.h> 33#include <linux/bitmap.h>
34#include <linux/rculist.h>
35#include <linux/interrupt.h>
16#include <linux/genalloc.h> 36#include <linux/genalloc.h>
17 37
38static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
39{
40 unsigned long val, nval;
41
42 nval = *addr;
43 do {
44 val = nval;
45 if (val & mask_to_set)
46 return -EBUSY;
47 cpu_relax();
48 } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val);
49
50 return 0;
51}
52
53static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
54{
55 unsigned long val, nval;
56
57 nval = *addr;
58 do {
59 val = nval;
60 if ((val & mask_to_clear) != mask_to_clear)
61 return -EBUSY;
62 cpu_relax();
63 } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val);
64
65 return 0;
66}
67
68/*
69 * bitmap_set_ll - set the specified number of bits at the specified position
70 * @map: pointer to a bitmap
71 * @start: a bit position in @map
72 * @nr: number of bits to set
73 *
74 * Set @nr bits start from @start in @map lock-lessly. Several users
75 * can set/clear the same bitmap simultaneously without lock. If two
76 * users set the same bit, one user will return remain bits, otherwise
77 * return 0.
78 */
79static int bitmap_set_ll(unsigned long *map, int start, int nr)
80{
81 unsigned long *p = map + BIT_WORD(start);
82 const int size = start + nr;
83 int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
84 unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
85
86 while (nr - bits_to_set >= 0) {
87 if (set_bits_ll(p, mask_to_set))
88 return nr;
89 nr -= bits_to_set;
90 bits_to_set = BITS_PER_LONG;
91 mask_to_set = ~0UL;
92 p++;
93 }
94 if (nr) {
95 mask_to_set &= BITMAP_LAST_WORD_MASK(size);
96 if (set_bits_ll(p, mask_to_set))
97 return nr;
98 }
99
100 return 0;
101}
102
103/*
104 * bitmap_clear_ll - clear the specified number of bits at the specified position
105 * @map: pointer to a bitmap
106 * @start: a bit position in @map
107 * @nr: number of bits to set
108 *
109 * Clear @nr bits start from @start in @map lock-lessly. Several users
110 * can set/clear the same bitmap simultaneously without lock. If two
111 * users clear the same bit, one user will return remain bits,
112 * otherwise return 0.
113 */
114static int bitmap_clear_ll(unsigned long *map, int start, int nr)
115{
116 unsigned long *p = map + BIT_WORD(start);
117 const int size = start + nr;
118 int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
119 unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
120
121 while (nr - bits_to_clear >= 0) {
122 if (clear_bits_ll(p, mask_to_clear))
123 return nr;
124 nr -= bits_to_clear;
125 bits_to_clear = BITS_PER_LONG;
126 mask_to_clear = ~0UL;
127 p++;
128 }
129 if (nr) {
130 mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
131 if (clear_bits_ll(p, mask_to_clear))
132 return nr;
133 }
134
135 return 0;
136}
18 137
19/** 138/**
20 * gen_pool_create - create a new special memory pool 139 * gen_pool_create - create a new special memory pool
@@ -30,7 +149,7 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
30 149
31 pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid); 150 pool = kmalloc_node(sizeof(struct gen_pool), GFP_KERNEL, nid);
32 if (pool != NULL) { 151 if (pool != NULL) {
33 rwlock_init(&pool->lock); 152 spin_lock_init(&pool->lock);
34 INIT_LIST_HEAD(&pool->chunks); 153 INIT_LIST_HEAD(&pool->chunks);
35 pool->min_alloc_order = min_alloc_order; 154 pool->min_alloc_order = min_alloc_order;
36 } 155 }
@@ -63,14 +182,14 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
63 if (unlikely(chunk == NULL)) 182 if (unlikely(chunk == NULL))
64 return -ENOMEM; 183 return -ENOMEM;
65 184
66 spin_lock_init(&chunk->lock);
67 chunk->phys_addr = phys; 185 chunk->phys_addr = phys;
68 chunk->start_addr = virt; 186 chunk->start_addr = virt;
69 chunk->end_addr = virt + size; 187 chunk->end_addr = virt + size;
188 atomic_set(&chunk->avail, size);
70 189
71 write_lock(&pool->lock); 190 spin_lock(&pool->lock);
72 list_add(&chunk->next_chunk, &pool->chunks); 191 list_add_rcu(&chunk->next_chunk, &pool->chunks);
73 write_unlock(&pool->lock); 192 spin_unlock(&pool->lock);
74 193
75 return 0; 194 return 0;
76} 195}
@@ -85,19 +204,19 @@ EXPORT_SYMBOL(gen_pool_add_virt);
85 */ 204 */
86phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) 205phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
87{ 206{
88 struct list_head *_chunk;
89 struct gen_pool_chunk *chunk; 207 struct gen_pool_chunk *chunk;
208 phys_addr_t paddr = -1;
90 209
91 read_lock(&pool->lock); 210 rcu_read_lock();
92 list_for_each(_chunk, &pool->chunks) { 211 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
93 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 212 if (addr >= chunk->start_addr && addr < chunk->end_addr) {
94 213 paddr = chunk->phys_addr + (addr - chunk->start_addr);
95 if (addr >= chunk->start_addr && addr < chunk->end_addr) 214 break;
96 return chunk->phys_addr + addr - chunk->start_addr; 215 }
97 } 216 }
98 read_unlock(&pool->lock); 217 rcu_read_unlock();
99 218
100 return -1; 219 return paddr;
101} 220}
102EXPORT_SYMBOL(gen_pool_virt_to_phys); 221EXPORT_SYMBOL(gen_pool_virt_to_phys);
103 222
@@ -115,7 +234,6 @@ void gen_pool_destroy(struct gen_pool *pool)
115 int order = pool->min_alloc_order; 234 int order = pool->min_alloc_order;
116 int bit, end_bit; 235 int bit, end_bit;
117 236
118
119 list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { 237 list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
120 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 238 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
121 list_del(&chunk->next_chunk); 239 list_del(&chunk->next_chunk);
@@ -137,44 +255,50 @@ EXPORT_SYMBOL(gen_pool_destroy);
137 * @size: number of bytes to allocate from the pool 255 * @size: number of bytes to allocate from the pool
138 * 256 *
139 * Allocate the requested number of bytes from the specified pool. 257 * Allocate the requested number of bytes from the specified pool.
140 * Uses a first-fit algorithm. 258 * Uses a first-fit algorithm. Can not be used in NMI handler on
259 * architectures without NMI-safe cmpxchg implementation.
141 */ 260 */
142unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) 261unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
143{ 262{
144 struct list_head *_chunk;
145 struct gen_pool_chunk *chunk; 263 struct gen_pool_chunk *chunk;
146 unsigned long addr, flags; 264 unsigned long addr = 0;
147 int order = pool->min_alloc_order; 265 int order = pool->min_alloc_order;
148 int nbits, start_bit, end_bit; 266 int nbits, start_bit = 0, end_bit, remain;
267
268#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
269 BUG_ON(in_nmi());
270#endif
149 271
150 if (size == 0) 272 if (size == 0)
151 return 0; 273 return 0;
152 274
153 nbits = (size + (1UL << order) - 1) >> order; 275 nbits = (size + (1UL << order) - 1) >> order;
154 276 rcu_read_lock();
155 read_lock(&pool->lock); 277 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
156 list_for_each(_chunk, &pool->chunks) { 278 if (size > atomic_read(&chunk->avail))
157 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); 279 continue;
158 280
159 end_bit = (chunk->end_addr - chunk->start_addr) >> order; 281 end_bit = (chunk->end_addr - chunk->start_addr) >> order;
160 282retry:
161 spin_lock_irqsave(&chunk->lock, flags); 283 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit,
162 start_bit = bitmap_find_next_zero_area(chunk->bits, end_bit, 0, 284 start_bit, nbits, 0);
163 nbits, 0); 285 if (start_bit >= end_bit)
164 if (start_bit >= end_bit) {
165 spin_unlock_irqrestore(&chunk->lock, flags);
166 continue; 286 continue;
287 remain = bitmap_set_ll(chunk->bits, start_bit, nbits);
288 if (remain) {
289 remain = bitmap_clear_ll(chunk->bits, start_bit,
290 nbits - remain);
291 BUG_ON(remain);
292 goto retry;
167 } 293 }
168 294
169 addr = chunk->start_addr + ((unsigned long)start_bit << order); 295 addr = chunk->start_addr + ((unsigned long)start_bit << order);
170 296 size = nbits << order;
171 bitmap_set(chunk->bits, start_bit, nbits); 297 atomic_sub(size, &chunk->avail);
172 spin_unlock_irqrestore(&chunk->lock, flags); 298 break;
173 read_unlock(&pool->lock);
174 return addr;
175 } 299 }
176 read_unlock(&pool->lock); 300 rcu_read_unlock();
177 return 0; 301 return addr;
178} 302}
179EXPORT_SYMBOL(gen_pool_alloc); 303EXPORT_SYMBOL(gen_pool_alloc);
180 304
@@ -184,33 +308,95 @@ EXPORT_SYMBOL(gen_pool_alloc);
184 * @addr: starting address of memory to free back to pool 308 * @addr: starting address of memory to free back to pool
185 * @size: size in bytes of memory to free 309 * @size: size in bytes of memory to free
186 * 310 *
187 * Free previously allocated special memory back to the specified pool. 311 * Free previously allocated special memory back to the specified
312 * pool. Can not be used in NMI handler on architectures without
313 * NMI-safe cmpxchg implementation.
188 */ 314 */
189void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) 315void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
190{ 316{
191 struct list_head *_chunk;
192 struct gen_pool_chunk *chunk; 317 struct gen_pool_chunk *chunk;
193 unsigned long flags;
194 int order = pool->min_alloc_order; 318 int order = pool->min_alloc_order;
195 int bit, nbits; 319 int start_bit, nbits, remain;
196 320
197 nbits = (size + (1UL << order) - 1) >> order; 321#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
198 322 BUG_ON(in_nmi());
199 read_lock(&pool->lock); 323#endif
200 list_for_each(_chunk, &pool->chunks) {
201 chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
202 324
325 nbits = (size + (1UL << order) - 1) >> order;
326 rcu_read_lock();
327 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
203 if (addr >= chunk->start_addr && addr < chunk->end_addr) { 328 if (addr >= chunk->start_addr && addr < chunk->end_addr) {
204 BUG_ON(addr + size > chunk->end_addr); 329 BUG_ON(addr + size > chunk->end_addr);
205 spin_lock_irqsave(&chunk->lock, flags); 330 start_bit = (addr - chunk->start_addr) >> order;
206 bit = (addr - chunk->start_addr) >> order; 331 remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
207 while (nbits--) 332 BUG_ON(remain);
208 __clear_bit(bit++, chunk->bits); 333 size = nbits << order;
209 spin_unlock_irqrestore(&chunk->lock, flags); 334 atomic_add(size, &chunk->avail);
210 break; 335 rcu_read_unlock();
336 return;
211 } 337 }
212 } 338 }
213 BUG_ON(nbits > 0); 339 rcu_read_unlock();
214 read_unlock(&pool->lock); 340 BUG();
215} 341}
216EXPORT_SYMBOL(gen_pool_free); 342EXPORT_SYMBOL(gen_pool_free);
343
344/**
345 * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
346 * @pool: the generic memory pool
347 * @func: func to call
348 * @data: additional data used by @func
349 *
350 * Call @func for every chunk of generic memory pool. The @func is
351 * called with rcu_read_lock held.
352 */
353void gen_pool_for_each_chunk(struct gen_pool *pool,
354 void (*func)(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data),
355 void *data)
356{
357 struct gen_pool_chunk *chunk;
358
359 rcu_read_lock();
360 list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk)
361 func(pool, chunk, data);
362 rcu_read_unlock();
363}
364EXPORT_SYMBOL(gen_pool_for_each_chunk);
365
366/**
367 * gen_pool_avail - get available free space of the pool
368 * @pool: pool to get available free space
369 *
370 * Return available free space of the specified pool.
371 */
372size_t gen_pool_avail(struct gen_pool *pool)
373{
374 struct gen_pool_chunk *chunk;
375 size_t avail = 0;
376
377 rcu_read_lock();
378 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
379 avail += atomic_read(&chunk->avail);
380 rcu_read_unlock();
381 return avail;
382}
383EXPORT_SYMBOL_GPL(gen_pool_avail);
384
385/**
386 * gen_pool_size - get size in bytes of memory managed by the pool
387 * @pool: pool to get size
388 *
389 * Return size in bytes of memory managed by the pool.
390 */
391size_t gen_pool_size(struct gen_pool *pool)
392{
393 struct gen_pool_chunk *chunk;
394 size_t size = 0;
395
396 rcu_read_lock();
397 list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
398 size += chunk->end_addr - chunk->start_addr;
399 rcu_read_unlock();
400 return size;
401}
402EXPORT_SYMBOL_GPL(gen_pool_size);