aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2007-07-16 02:38:22 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-16 12:05:36 -0400
commit6193a2ff180920f84ee06977165ebf32431fc2d2 (patch)
treed3c6423c50463ea741080a58a2e654cf103431f3 /mm
parentf7977793240d836e60ff413e94e6914f08e10941 (diff)
slob: initial NUMA support
This adds preliminary NUMA support to SLOB, primarily aimed at systems with small nodes (tested all the way down to a 128kB SRAM block), whether asymmetric or otherwise. We follow the same conventions as SLAB/SLUB, preferring current node placement for new pages, or with explicit placement, if a node has been specified. Presently on UP NUMA this has the side-effect of preferring node#0 allocations (since numa_node_id() == 0, though this could be reworked if we could hand off a pfn to determine node placement), so single-CPU NUMA systems will want to place smaller nodes further out in terms of node id. Once a page has been bound to a node (via explicit node id typing), we only do block allocations from partial free pages that have a matching node id in the page flags. The current implementation does have some scalability problems, in that all partial free pages are tracked in the global freelist (with contention due to the single spinlock). However, these are things that are being reworked for SMP scalability first, while things like per-node freelists can easily be built on top of this sort of functionality once it's been added. More background can be found in: http://marc.info/?l=linux-mm&m=118117916022379&w=2 http://marc.info/?l=linux-mm&m=118170446306199&w=2 http://marc.info/?l=linux-mm&m=118187859420048&w=2 and subsequent threads. Acked-by: Christoph Lameter <clameter@sgi.com> Acked-by: Matt Mackall <mpm@selenic.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org> Acked-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/slob.c72
1 files changed, 55 insertions, 17 deletions
diff --git a/mm/slob.c b/mm/slob.c
index 06e5e725fab3..b99b0ef2347e 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -3,6 +3,8 @@
3 * 3 *
4 * Matt Mackall <mpm@selenic.com> 12/30/03 4 * Matt Mackall <mpm@selenic.com> 12/30/03
5 * 5 *
6 * NUMA support by Paul Mundt, 2007.
7 *
6 * How SLOB works: 8 * How SLOB works:
7 * 9 *
8 * The core of SLOB is a traditional K&R style heap allocator, with 10 * The core of SLOB is a traditional K&R style heap allocator, with
@@ -10,7 +12,7 @@
10 * allocator is as little as 2 bytes, however typically most architectures 12 * allocator is as little as 2 bytes, however typically most architectures
11 * will require 4 bytes on 32-bit and 8 bytes on 64-bit. 13 * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
12 * 14 *
13 * The slob heap is a linked list of pages from __get_free_page, and 15 * The slob heap is a linked list of pages from alloc_pages(), and
14 * within each page, there is a singly-linked list of free blocks (slob_t). 16 * within each page, there is a singly-linked list of free blocks (slob_t).
15 * The heap is grown on demand and allocation from the heap is currently 17 * The heap is grown on demand and allocation from the heap is currently
16 * first-fit. 18 * first-fit.
@@ -18,7 +20,7 @@
18 * Above this is an implementation of kmalloc/kfree. Blocks returned 20 * Above this is an implementation of kmalloc/kfree. Blocks returned
19 * from kmalloc are prepended with a 4-byte header with the kmalloc size. 21 * from kmalloc are prepended with a 4-byte header with the kmalloc size.
20 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls 22 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
21 * __get_free_pages directly, allocating compound pages so the page order 23 * alloc_pages() directly, allocating compound pages so the page order
22 * does not have to be separately tracked, and also stores the exact 24 * does not have to be separately tracked, and also stores the exact
23 * allocation size in page->private so that it can be used to accurately 25 * allocation size in page->private so that it can be used to accurately
24 * provide ksize(). These objects are detected in kfree() because slob_page() 26 * provide ksize(). These objects are detected in kfree() because slob_page()
@@ -29,10 +31,23 @@
29 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which 31 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
30 * case the low-level allocator will fragment blocks to create the proper 32 * case the low-level allocator will fragment blocks to create the proper
31 * alignment. Again, objects of page-size or greater are allocated by 33 * alignment. Again, objects of page-size or greater are allocated by
32 * calling __get_free_pages. As SLAB objects know their size, no separate 34 * calling alloc_pages(). As SLAB objects know their size, no separate
33 * size bookkeeping is necessary and there is essentially no allocation 35 * size bookkeeping is necessary and there is essentially no allocation
34 * space overhead, and compound pages aren't needed for multi-page 36 * space overhead, and compound pages aren't needed for multi-page
35 * allocations. 37 * allocations.
38 *
39 * NUMA support in SLOB is fairly simplistic, pushing most of the real
40 * logic down to the page allocator, and simply doing the node accounting
41 * on the upper levels. In the event that a node id is explicitly
42 * provided, alloc_pages_node() with the specified node id is used
43 * instead. The common case (or when the node id isn't explicitly provided)
44 * will default to the current node, as per numa_node_id().
45 *
46 * Node aware pages are still inserted in to the global freelist, and
47 * these are scanned for by matching against the node id encoded in the
48 * page flags. As a result, block allocations that can be satisfied from
49 * the freelist will only be done so on pages residing on the same node,
50 * in order to prevent random node placement.
36 */ 51 */
37 52
38#include <linux/kernel.h> 53#include <linux/kernel.h>
@@ -204,6 +219,23 @@ static int slob_last(slob_t *s)
204 return !((unsigned long)slob_next(s) & ~PAGE_MASK); 219 return !((unsigned long)slob_next(s) & ~PAGE_MASK);
205} 220}
206 221
222static void *slob_new_page(gfp_t gfp, int order, int node)
223{
224 void *page;
225
226#ifdef CONFIG_NUMA
227 if (node != -1)
228 page = alloc_pages_node(node, gfp, order);
229 else
230#endif
231 page = alloc_pages(gfp, order);
232
233 if (!page)
234 return NULL;
235
236 return page_address(page);
237}
238
207/* 239/*
208 * Allocate a slob block within a given slob_page sp. 240 * Allocate a slob block within a given slob_page sp.
209 */ 241 */
@@ -258,7 +290,7 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
258/* 290/*
259 * slob_alloc: entry point into the slob allocator. 291 * slob_alloc: entry point into the slob allocator.
260 */ 292 */
261static void *slob_alloc(size_t size, gfp_t gfp, int align) 293static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
262{ 294{
263 struct slob_page *sp; 295 struct slob_page *sp;
264 slob_t *b = NULL; 296 slob_t *b = NULL;
@@ -267,6 +299,15 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align)
267 spin_lock_irqsave(&slob_lock, flags); 299 spin_lock_irqsave(&slob_lock, flags);
268 /* Iterate through each partially free page, try to find room */ 300 /* Iterate through each partially free page, try to find room */
269 list_for_each_entry(sp, &free_slob_pages, list) { 301 list_for_each_entry(sp, &free_slob_pages, list) {
302#ifdef CONFIG_NUMA
303 /*
304 * If there's a node specification, search for a partial
305 * page with a matching node id in the freelist.
306 */
307 if (node != -1 && page_to_nid(&sp->page) != node)
308 continue;
309#endif
310
270 if (sp->units >= SLOB_UNITS(size)) { 311 if (sp->units >= SLOB_UNITS(size)) {
271 b = slob_page_alloc(sp, size, align); 312 b = slob_page_alloc(sp, size, align);
272 if (b) 313 if (b)
@@ -277,7 +318,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align)
277 318
278 /* Not enough space: must allocate a new page */ 319 /* Not enough space: must allocate a new page */
279 if (!b) { 320 if (!b) {
280 b = (slob_t *)__get_free_page(gfp); 321 b = slob_new_page(gfp, 0, node);
281 if (!b) 322 if (!b)
282 return 0; 323 return 0;
283 sp = (struct slob_page *)virt_to_page(b); 324 sp = (struct slob_page *)virt_to_page(b);
@@ -381,22 +422,20 @@ out:
381#define ARCH_SLAB_MINALIGN __alignof__(unsigned long) 422#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
382#endif 423#endif
383 424
384 425void *__kmalloc_node(size_t size, gfp_t gfp, int node)
385void *__kmalloc(size_t size, gfp_t gfp)
386{ 426{
387 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 427 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
388 428
389 if (size < PAGE_SIZE - align) { 429 if (size < PAGE_SIZE - align) {
390 unsigned int *m; 430 unsigned int *m;
391 m = slob_alloc(size + align, gfp, align); 431 m = slob_alloc(size + align, gfp, align, node);
392 if (m) 432 if (m)
393 *m = size; 433 *m = size;
394 return (void *)m + align; 434 return (void *)m + align;
395 } else { 435 } else {
396 void *ret; 436 void *ret;
397 437
398 ret = (void *) __get_free_pages(gfp | __GFP_COMP, 438 ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
399 get_order(size));
400 if (ret) { 439 if (ret) {
401 struct page *page; 440 struct page *page;
402 page = virt_to_page(ret); 441 page = virt_to_page(ret);
@@ -405,7 +444,7 @@ void *__kmalloc(size_t size, gfp_t gfp)
405 return ret; 444 return ret;
406 } 445 }
407} 446}
408EXPORT_SYMBOL(__kmalloc); 447EXPORT_SYMBOL(__kmalloc_node);
409 448
410/** 449/**
411 * krealloc - reallocate memory. The contents will remain unchanged. 450 * krealloc - reallocate memory. The contents will remain unchanged.
@@ -455,7 +494,6 @@ void kfree(const void *block)
455 } else 494 } else
456 put_page(&sp->page); 495 put_page(&sp->page);
457} 496}
458
459EXPORT_SYMBOL(kfree); 497EXPORT_SYMBOL(kfree);
460 498
461/* can't use ksize for kmem_cache_alloc memory, only kmalloc */ 499/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
@@ -487,7 +525,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
487{ 525{
488 struct kmem_cache *c; 526 struct kmem_cache *c;
489 527
490 c = slob_alloc(sizeof(struct kmem_cache), flags, 0); 528 c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1);
491 529
492 if (c) { 530 if (c) {
493 c->name = name; 531 c->name = name;
@@ -517,21 +555,21 @@ void kmem_cache_destroy(struct kmem_cache *c)
517} 555}
518EXPORT_SYMBOL(kmem_cache_destroy); 556EXPORT_SYMBOL(kmem_cache_destroy);
519 557
520void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags) 558void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
521{ 559{
522 void *b; 560 void *b;
523 561
524 if (c->size < PAGE_SIZE) 562 if (c->size < PAGE_SIZE)
525 b = slob_alloc(c->size, flags, c->align); 563 b = slob_alloc(c->size, flags, c->align, node);
526 else 564 else
527 b = (void *)__get_free_pages(flags, get_order(c->size)); 565 b = slob_new_page(flags, get_order(c->size), node);
528 566
529 if (c->ctor) 567 if (c->ctor)
530 c->ctor(b, c, 0); 568 c->ctor(b, c, 0);
531 569
532 return b; 570 return b;
533} 571}
534EXPORT_SYMBOL(kmem_cache_alloc); 572EXPORT_SYMBOL(kmem_cache_alloc_node);
535 573
536void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags) 574void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags)
537{ 575{