diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/slob.c | 72 |
1 files changed, 55 insertions, 17 deletions
@@ -3,6 +3,8 @@ | |||
3 | * | 3 | * |
4 | * Matt Mackall <mpm@selenic.com> 12/30/03 | 4 | * Matt Mackall <mpm@selenic.com> 12/30/03 |
5 | * | 5 | * |
6 | * NUMA support by Paul Mundt, 2007. | ||
7 | * | ||
6 | * How SLOB works: | 8 | * How SLOB works: |
7 | * | 9 | * |
8 | * The core of SLOB is a traditional K&R style heap allocator, with | 10 | * The core of SLOB is a traditional K&R style heap allocator, with |
@@ -10,7 +12,7 @@ | |||
10 | * allocator is as little as 2 bytes, however typically most architectures | 12 | * allocator is as little as 2 bytes, however typically most architectures |
11 | * will require 4 bytes on 32-bit and 8 bytes on 64-bit. | 13 | * will require 4 bytes on 32-bit and 8 bytes on 64-bit. |
12 | * | 14 | * |
13 | * The slob heap is a linked list of pages from __get_free_page, and | 15 | * The slob heap is a linked list of pages from alloc_pages(), and |
14 | * within each page, there is a singly-linked list of free blocks (slob_t). | 16 | * within each page, there is a singly-linked list of free blocks (slob_t). |
15 | * The heap is grown on demand and allocation from the heap is currently | 17 | * The heap is grown on demand and allocation from the heap is currently |
16 | * first-fit. | 18 | * first-fit. |
@@ -18,7 +20,7 @@ | |||
18 | * Above this is an implementation of kmalloc/kfree. Blocks returned | 20 | * Above this is an implementation of kmalloc/kfree. Blocks returned |
19 | * from kmalloc are prepended with a 4-byte header with the kmalloc size. | 21 | * from kmalloc are prepended with a 4-byte header with the kmalloc size. |
20 | * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls | 22 | * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls |
21 | * __get_free_pages directly, allocating compound pages so the page order | 23 | * alloc_pages() directly, allocating compound pages so the page order |
22 | * does not have to be separately tracked, and also stores the exact | 24 | * does not have to be separately tracked, and also stores the exact |
23 | * allocation size in page->private so that it can be used to accurately | 25 | * allocation size in page->private so that it can be used to accurately |
24 | * provide ksize(). These objects are detected in kfree() because slob_page() | 26 | * provide ksize(). These objects are detected in kfree() because slob_page() |
@@ -29,10 +31,23 @@ | |||
29 | * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which | 31 | * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which |
30 | * case the low-level allocator will fragment blocks to create the proper | 32 | * case the low-level allocator will fragment blocks to create the proper |
31 | * alignment. Again, objects of page-size or greater are allocated by | 33 | * alignment. Again, objects of page-size or greater are allocated by |
32 | * calling __get_free_pages. As SLAB objects know their size, no separate | 34 | * calling alloc_pages(). As SLAB objects know their size, no separate |
33 | * size bookkeeping is necessary and there is essentially no allocation | 35 | * size bookkeeping is necessary and there is essentially no allocation |
34 | * space overhead, and compound pages aren't needed for multi-page | 36 | * space overhead, and compound pages aren't needed for multi-page |
35 | * allocations. | 37 | * allocations. |
38 | * | ||
39 | * NUMA support in SLOB is fairly simplistic, pushing most of the real | ||
40 | * logic down to the page allocator, and simply doing the node accounting | ||
41 | * on the upper levels. In the event that a node id is explicitly | ||
42 | * provided, alloc_pages_node() with the specified node id is used | ||
43 | * instead. The common case (or when the node id isn't explicitly provided) | ||
44 | * will default to the current node, as per numa_node_id(). | ||
45 | * | ||
46 | * Node aware pages are still inserted in to the global freelist, and | ||
47 | * these are scanned for by matching against the node id encoded in the | ||
48 | * page flags. As a result, block allocations that can be satisfied from | ||
49 | * the freelist will only be done so on pages residing on the same node, | ||
50 | * in order to prevent random node placement. | ||
36 | */ | 51 | */ |
37 | 52 | ||
38 | #include <linux/kernel.h> | 53 | #include <linux/kernel.h> |
@@ -204,6 +219,23 @@ static int slob_last(slob_t *s) | |||
204 | return !((unsigned long)slob_next(s) & ~PAGE_MASK); | 219 | return !((unsigned long)slob_next(s) & ~PAGE_MASK); |
205 | } | 220 | } |
206 | 221 | ||
222 | static void *slob_new_page(gfp_t gfp, int order, int node) | ||
223 | { | ||
224 | void *page; | ||
225 | |||
226 | #ifdef CONFIG_NUMA | ||
227 | if (node != -1) | ||
228 | page = alloc_pages_node(node, gfp, order); | ||
229 | else | ||
230 | #endif | ||
231 | page = alloc_pages(gfp, order); | ||
232 | |||
233 | if (!page) | ||
234 | return NULL; | ||
235 | |||
236 | return page_address(page); | ||
237 | } | ||
238 | |||
207 | /* | 239 | /* |
208 | * Allocate a slob block within a given slob_page sp. | 240 | * Allocate a slob block within a given slob_page sp. |
209 | */ | 241 | */ |
@@ -258,7 +290,7 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) | |||
258 | /* | 290 | /* |
259 | * slob_alloc: entry point into the slob allocator. | 291 | * slob_alloc: entry point into the slob allocator. |
260 | */ | 292 | */ |
261 | static void *slob_alloc(size_t size, gfp_t gfp, int align) | 293 | static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) |
262 | { | 294 | { |
263 | struct slob_page *sp; | 295 | struct slob_page *sp; |
264 | slob_t *b = NULL; | 296 | slob_t *b = NULL; |
@@ -267,6 +299,15 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align) | |||
267 | spin_lock_irqsave(&slob_lock, flags); | 299 | spin_lock_irqsave(&slob_lock, flags); |
268 | /* Iterate through each partially free page, try to find room */ | 300 | /* Iterate through each partially free page, try to find room */ |
269 | list_for_each_entry(sp, &free_slob_pages, list) { | 301 | list_for_each_entry(sp, &free_slob_pages, list) { |
302 | #ifdef CONFIG_NUMA | ||
303 | /* | ||
304 | * If there's a node specification, search for a partial | ||
305 | * page with a matching node id in the freelist. | ||
306 | */ | ||
307 | if (node != -1 && page_to_nid(&sp->page) != node) | ||
308 | continue; | ||
309 | #endif | ||
310 | |||
270 | if (sp->units >= SLOB_UNITS(size)) { | 311 | if (sp->units >= SLOB_UNITS(size)) { |
271 | b = slob_page_alloc(sp, size, align); | 312 | b = slob_page_alloc(sp, size, align); |
272 | if (b) | 313 | if (b) |
@@ -277,7 +318,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align) | |||
277 | 318 | ||
278 | /* Not enough space: must allocate a new page */ | 319 | /* Not enough space: must allocate a new page */ |
279 | if (!b) { | 320 | if (!b) { |
280 | b = (slob_t *)__get_free_page(gfp); | 321 | b = slob_new_page(gfp, 0, node); |
281 | if (!b) | 322 | if (!b) |
282 | return 0; | 323 | return 0; |
283 | sp = (struct slob_page *)virt_to_page(b); | 324 | sp = (struct slob_page *)virt_to_page(b); |
@@ -381,22 +422,20 @@ out: | |||
381 | #define ARCH_SLAB_MINALIGN __alignof__(unsigned long) | 422 | #define ARCH_SLAB_MINALIGN __alignof__(unsigned long) |
382 | #endif | 423 | #endif |
383 | 424 | ||
384 | 425 | void *__kmalloc_node(size_t size, gfp_t gfp, int node) | |
385 | void *__kmalloc(size_t size, gfp_t gfp) | ||
386 | { | 426 | { |
387 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 427 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); |
388 | 428 | ||
389 | if (size < PAGE_SIZE - align) { | 429 | if (size < PAGE_SIZE - align) { |
390 | unsigned int *m; | 430 | unsigned int *m; |
391 | m = slob_alloc(size + align, gfp, align); | 431 | m = slob_alloc(size + align, gfp, align, node); |
392 | if (m) | 432 | if (m) |
393 | *m = size; | 433 | *m = size; |
394 | return (void *)m + align; | 434 | return (void *)m + align; |
395 | } else { | 435 | } else { |
396 | void *ret; | 436 | void *ret; |
397 | 437 | ||
398 | ret = (void *) __get_free_pages(gfp | __GFP_COMP, | 438 | ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); |
399 | get_order(size)); | ||
400 | if (ret) { | 439 | if (ret) { |
401 | struct page *page; | 440 | struct page *page; |
402 | page = virt_to_page(ret); | 441 | page = virt_to_page(ret); |
@@ -405,7 +444,7 @@ void *__kmalloc(size_t size, gfp_t gfp) | |||
405 | return ret; | 444 | return ret; |
406 | } | 445 | } |
407 | } | 446 | } |
408 | EXPORT_SYMBOL(__kmalloc); | 447 | EXPORT_SYMBOL(__kmalloc_node); |
409 | 448 | ||
410 | /** | 449 | /** |
411 | * krealloc - reallocate memory. The contents will remain unchanged. | 450 | * krealloc - reallocate memory. The contents will remain unchanged. |
@@ -455,7 +494,6 @@ void kfree(const void *block) | |||
455 | } else | 494 | } else |
456 | put_page(&sp->page); | 495 | put_page(&sp->page); |
457 | } | 496 | } |
458 | |||
459 | EXPORT_SYMBOL(kfree); | 497 | EXPORT_SYMBOL(kfree); |
460 | 498 | ||
461 | /* can't use ksize for kmem_cache_alloc memory, only kmalloc */ | 499 | /* can't use ksize for kmem_cache_alloc memory, only kmalloc */ |
@@ -487,7 +525,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
487 | { | 525 | { |
488 | struct kmem_cache *c; | 526 | struct kmem_cache *c; |
489 | 527 | ||
490 | c = slob_alloc(sizeof(struct kmem_cache), flags, 0); | 528 | c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1); |
491 | 529 | ||
492 | if (c) { | 530 | if (c) { |
493 | c->name = name; | 531 | c->name = name; |
@@ -517,21 +555,21 @@ void kmem_cache_destroy(struct kmem_cache *c) | |||
517 | } | 555 | } |
518 | EXPORT_SYMBOL(kmem_cache_destroy); | 556 | EXPORT_SYMBOL(kmem_cache_destroy); |
519 | 557 | ||
520 | void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags) | 558 | void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) |
521 | { | 559 | { |
522 | void *b; | 560 | void *b; |
523 | 561 | ||
524 | if (c->size < PAGE_SIZE) | 562 | if (c->size < PAGE_SIZE) |
525 | b = slob_alloc(c->size, flags, c->align); | 563 | b = slob_alloc(c->size, flags, c->align, node); |
526 | else | 564 | else |
527 | b = (void *)__get_free_pages(flags, get_order(c->size)); | 565 | b = slob_new_page(flags, get_order(c->size), node); |
528 | 566 | ||
529 | if (c->ctor) | 567 | if (c->ctor) |
530 | c->ctor(b, c, 0); | 568 | c->ctor(b, c, 0); |
531 | 569 | ||
532 | return b; | 570 | return b; |
533 | } | 571 | } |
534 | EXPORT_SYMBOL(kmem_cache_alloc); | 572 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
535 | 573 | ||
536 | void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags) | 574 | void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags) |
537 | { | 575 | { |