1 files changed, 55 insertions, 17 deletions
diff --git a/mm/slob.c b/mm/slob.c
index 06e5e725fab..b99b0ef2347 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -3,6 +3,8 @@
 *
 * Matt Mackall <mpm@selenic.com> 12/30/03
 *
+ * NUMA support by Paul Mundt, 2007.
+ *
 * How SLOB works:
 *
 * The core of SLOB is a traditional K&R style heap allocator, with
@@ -10,7 +12,7 @@
 * allocator is as little as 2 bytes, however typically most architectures
 * will require 4 bytes on 32-bit and 8 bytes on 64-bit.
 *
- * The slob heap is a linked list of pages from __get_free_page, and
+ * The slob heap is a linked list of pages from alloc_pages(), and
 * within each page, there is a singly-linked list of free blocks (slob_t).
 * The heap is grown on demand and allocation from the heap is currently
 * first-fit.
@@ -18,7 +20,7 @@
 * Above this is an implementation of kmalloc/kfree. Blocks returned
 * from kmalloc are prepended with a 4-byte header with the kmalloc size.
 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
- * __get_free_pages directly, allocating compound pages so the page order
+ * alloc_pages() directly, allocating compound pages so the page order
 * does not have to be separately tracked, and also stores the exact
 * allocation size in page->private so that it can be used to accurately
 * provide ksize(). These objects are detected in kfree() because slob_page()
@@ -29,10 +31,23 @@
 * 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
 * case the low-level allocator will fragment blocks to create the proper
 * alignment. Again, objects of page-size or greater are allocated by
- * calling __get_free_pages. As SLAB objects know their size, no separate
+ * calling alloc_pages(). As SLAB objects know their size, no separate
 * size bookkeeping is necessary and there is essentially no allocation
 * space overhead, and compound pages aren't needed for multi-page
 * allocations.
+ *
+ * NUMA support in SLOB is fairly simplistic, pushing most of the real
+ * logic down to the page allocator, and simply doing the node accounting
+ * on the upper levels. In the event that a node id is explicitly
+ * provided, alloc_pages_node() with the specified node id is used
+ * instead. The common case (or when the node id isn't explicitly provided)
+ * will default to the current node, as per numa_node_id().
+ *
+ * Node aware pages are still inserted in to the global freelist, and
+ * these are scanned for by matching against the node id encoded in the
+ * page flags. As a result, block allocations that can be satisfied from
+ * the freelist will only be done so on pages residing on the same node,
+ * in order to prevent random node placement.
 */
 #include <linux/kernel.h>
@@ -204,6 +219,23 @@ static int slob_last(slob_t *s)
        return !((unsigned long)slob_next(s) & ~PAGE_MASK);
 }
+static void *slob_new_page(gfp_t gfp, int order, int node)
+{
+        void *page;
+#ifdef CONFIG_NUMA
+        if (node != -1)
+                page = alloc_pages_node(node, gfp, order);
+        else
+#endif
+                page = alloc_pages(gfp, order);
+        if (!page)
+                return NULL;
+        return page_address(page);
+}
 /*
 * Allocate a slob block within a given slob_page sp.
 */
@@ -258,7 +290,7 @@ static void *slob_page_alloc(struct slob_page *sp, size_t size, int align)
 /*
 * slob_alloc: entry point into the slob allocator.
 */
-static void *slob_alloc(size_t size, gfp_t gfp, int align)
+static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
 {
        struct slob_page *sp;
        slob_t *b = NULL;
@@ -267,6 +299,15 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align)
        spin_lock_irqsave(&slob_lock, flags);
        /* Iterate through each partially free page, try to find room */
        list_for_each_entry(sp, &free_slob_pages, list) {
+#ifdef CONFIG_NUMA
+                /*
+                 * If there's a node specification, search for a partial
+                 * page with a matching node id in the freelist.
+                 */
+                if (node != -1 && page_to_nid(&sp->page) != node)
+                        continue;
+#endif
                if (sp->units >= SLOB_UNITS(size)) {
                        b = slob_page_alloc(sp, size, align);
                        if (b)
@@ -277,7 +318,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align)
        /* Not enough space: must allocate a new page */
        if (!b) {
-                b = (slob_t *)__get_free_page(gfp);
+                b = slob_new_page(gfp, 0, node);
                if (!b)
                        return 0;
                sp = (struct slob_page *)virt_to_page(b);
@@ -381,22 +422,20 @@ out:
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
 #endif
+void *__kmalloc_node(size_t size, gfp_t gfp, int node)
-void *__kmalloc(size_t size, gfp_t gfp)
 {
        int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
        if (size < PAGE_SIZE - align) {
                unsigned int *m;
-                m = slob_alloc(size + align, gfp, align);
+                m = slob_alloc(size + align, gfp, align, node);
                if (m)
                        *m = size;
                return (void *)m + align;
        } else {
                void *ret;
-                ret = (void *) __get_free_pages(gfp | __GFP_COMP,
+                ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
-                                                get_order(size));
                if (ret) {
                        struct page *page;
                        page = virt_to_page(ret);
@@ -405,7 +444,7 @@ void *__kmalloc(size_t size, gfp_t gfp)
                return ret;
        }
 }
-EXPORT_SYMBOL(__kmalloc);
+EXPORT_SYMBOL(__kmalloc_node);
 /**
 * krealloc - reallocate memory. The contents will remain unchanged.
@@ -455,7 +494,6 @@ void kfree(const void *block)
        } else
                put_page(&sp->page);
 }
 EXPORT_SYMBOL(kfree);
 /* can't use ksize for kmem_cache_alloc memory, only kmalloc */
@@ -487,7 +525,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 {
        struct kmem_cache *c;
-        c = slob_alloc(sizeof(struct kmem_cache), flags, 0);
+        c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1);
        if (c) {
                c->name = name;
@@ -517,21 +555,21 @@ void kmem_cache_destroy(struct kmem_cache *c)
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
-void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
+void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 {
        void *b;
        if (c->size < PAGE_SIZE)
-                b = slob_alloc(c->size, flags, c->align);
+                b = slob_alloc(c->size, flags, c->align, node);
        else
-                b = (void *)__get_free_pages(flags, get_order(c->size));
+                b = slob_new_page(flags, get_order(c->size), node);
        if (c->ctor)
                c->ctor(b, c, 0);
        return b;
 }
-EXPORT_SYMBOL(kmem_cache_alloc);
+EXPORT_SYMBOL(kmem_cache_alloc_node);
 void *kmem_cache_zalloc(struct kmem_cache *c, gfp_t flags)
 {

diff --git a/mm/slob.c b/mm/slob.c index 06e5e725fab..b99b0ef2347 100644 --- a/mm/slob.c +++ b/mm/slob.c
@@ -3,6 +3,8 @@
3	*	3	*
4	* Matt Mackall <mpm@selenic.com> 12/30/03	4	* Matt Mackall <mpm@selenic.com> 12/30/03
5	*	5	*
		6	* NUMA support by Paul Mundt, 2007.
		7	*
6	* How SLOB works:	8	* How SLOB works:
7	*	9	*
8	* The core of SLOB is a traditional K&R style heap allocator, with	10	* The core of SLOB is a traditional K&R style heap allocator, with
@@ -10,7 +12,7 @@
10	* allocator is as little as 2 bytes, however typically most architectures	12	* allocator is as little as 2 bytes, however typically most architectures
11	* will require 4 bytes on 32-bit and 8 bytes on 64-bit.	13	* will require 4 bytes on 32-bit and 8 bytes on 64-bit.
12	*	14	*
13	* The slob heap is a linked list of pages from __get_free_page, and	15	* The slob heap is a linked list of pages from alloc_pages(), and
14	* within each page, there is a singly-linked list of free blocks (slob_t).	16	* within each page, there is a singly-linked list of free blocks (slob_t).
15	* The heap is grown on demand and allocation from the heap is currently	17	* The heap is grown on demand and allocation from the heap is currently
16	* first-fit.	18	* first-fit.
@@ -18,7 +20,7 @@
18	* Above this is an implementation of kmalloc/kfree. Blocks returned	20	* Above this is an implementation of kmalloc/kfree. Blocks returned
19	* from kmalloc are prepended with a 4-byte header with the kmalloc size.	21	* from kmalloc are prepended with a 4-byte header with the kmalloc size.
20	* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls	22	* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
21	* __get_free_pages directly, allocating compound pages so the page order	23	* alloc_pages() directly, allocating compound pages so the page order
22	* does not have to be separately tracked, and also stores the exact	24	* does not have to be separately tracked, and also stores the exact
23	* allocation size in page->private so that it can be used to accurately	25	* allocation size in page->private so that it can be used to accurately
24	* provide ksize(). These objects are detected in kfree() because slob_page()	26	* provide ksize(). These objects are detected in kfree() because slob_page()
@@ -29,10 +31,23 @@
29	* 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which	31	* 4-byte alignment unless the SLAB_HWCACHE_ALIGN flag is set, in which
30	* case the low-level allocator will fragment blocks to create the proper	32	* case the low-level allocator will fragment blocks to create the proper
31	* alignment. Again, objects of page-size or greater are allocated by	33	* alignment. Again, objects of page-size or greater are allocated by
32	* calling __get_free_pages. As SLAB objects know their size, no separate	34	* calling alloc_pages(). As SLAB objects know their size, no separate
33	* size bookkeeping is necessary and there is essentially no allocation	35	* size bookkeeping is necessary and there is essentially no allocation
34	* space overhead, and compound pages aren't needed for multi-page	36	* space overhead, and compound pages aren't needed for multi-page
35	* allocations.	37	* allocations.
		38	*
		39	* NUMA support in SLOB is fairly simplistic, pushing most of the real
		40	* logic down to the page allocator, and simply doing the node accounting
		41	* on the upper levels. In the event that a node id is explicitly
		42	* provided, alloc_pages_node() with the specified node id is used
		43	* instead. The common case (or when the node id isn't explicitly provided)
		44	* will default to the current node, as per numa_node_id().
		45	*
		46	* Node aware pages are still inserted in to the global freelist, and
		47	* these are scanned for by matching against the node id encoded in the
		48	* page flags. As a result, block allocations that can be satisfied from
		49	* the freelist will only be done so on pages residing on the same node,
		50	* in order to prevent random node placement.
36	*/	51	*/
37		52
38	#include <linux/kernel.h>	53	#include <linux/kernel.h>
@@ -204,6 +219,23 @@ static int slob_last(slob_t *s)
204	return !((unsigned long)slob_next(s) & ~PAGE_MASK);	219	return !((unsigned long)slob_next(s) & ~PAGE_MASK);
205	}	220	}
206		221
		222	static void *slob_new_page(gfp_t gfp, int order, int node)
		223	{
		224	void *page;
		225
		226	#ifdef CONFIG_NUMA
		227	if (node != -1)
		228	page = alloc_pages_node(node, gfp, order);
		229	else
		230	#endif
		231	page = alloc_pages(gfp, order);
		232
		233	if (!page)
		234	return NULL;
		235
		236	return page_address(page);
		237	}
		238
207	/*	239	/*
208	* Allocate a slob block within a given slob_page sp.	240	* Allocate a slob block within a given slob_page sp.
209	*/	241	*/
@@ -258,7 +290,7 @@ static void slob_page_alloc(struct slob_page sp, size_t size, int align)
258	/*	290	/*
259	* slob_alloc: entry point into the slob allocator.	291	* slob_alloc: entry point into the slob allocator.
260	*/	292	*/
261	static void *slob_alloc(size_t size, gfp_t gfp, int align)	293	static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
262	{	294	{
263	struct slob_page *sp;	295	struct slob_page *sp;
264	slob_t *b = NULL;	296	slob_t *b = NULL;
@@ -267,6 +299,15 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align)
267	spin_lock_irqsave(&slob_lock, flags);	299	spin_lock_irqsave(&slob_lock, flags);
268	/* Iterate through each partially free page, try to find room */	300	/* Iterate through each partially free page, try to find room */
269	list_for_each_entry(sp, &free_slob_pages, list) {	301	list_for_each_entry(sp, &free_slob_pages, list) {
		302	#ifdef CONFIG_NUMA
		303	/*
		304	* If there's a node specification, search for a partial
		305	* page with a matching node id in the freelist.
		306	*/
		307	if (node != -1 && page_to_nid(&sp->page) != node)
		308	continue;
		309	#endif
		310
270	if (sp->units >= SLOB_UNITS(size)) {	311	if (sp->units >= SLOB_UNITS(size)) {
271	b = slob_page_alloc(sp, size, align);	312	b = slob_page_alloc(sp, size, align);
272	if (b)	313	if (b)
@@ -277,7 +318,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align)
277		318
278	/* Not enough space: must allocate a new page */	319	/* Not enough space: must allocate a new page */
279	if (!b) {	320	if (!b) {
280	b = (slob_t *)__get_free_page(gfp);	321	b = slob_new_page(gfp, 0, node);
281	if (!b)	322	if (!b)
282	return 0;	323	return 0;
283	sp = (struct slob_page *)virt_to_page(b);	324	sp = (struct slob_page *)virt_to_page(b);
@@ -381,22 +422,20 @@ out:
381	#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)	422	#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
382	#endif	423	#endif
383		424
384		425	void *__kmalloc_node(size_t size, gfp_t gfp, int node)
385	void *__kmalloc(size_t size, gfp_t gfp)
386	{	426	{
387	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);	427	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
388		428
389	if (size < PAGE_SIZE - align) {	429	if (size < PAGE_SIZE - align) {
390	unsigned int *m;	430	unsigned int *m;
391	m = slob_alloc(size + align, gfp, align);	431	m = slob_alloc(size + align, gfp, align, node);
392	if (m)	432	if (m)
393	*m = size;	433	*m = size;
394	return (void *)m + align;	434	return (void *)m + align;
395	} else {	435	} else {
396	void *ret;	436	void *ret;
397		437
398	ret = (void *) __get_free_pages(gfp \| __GFP_COMP,	438	ret = slob_new_page(gfp \| __GFP_COMP, get_order(size), node);
399	get_order(size));
400	if (ret) {	439	if (ret) {
401	struct page *page;	440	struct page *page;
402	page = virt_to_page(ret);	441	page = virt_to_page(ret);
@@ -405,7 +444,7 @@ void *__kmalloc(size_t size, gfp_t gfp)
405	return ret;	444	return ret;
406	}	445	}
407	}	446	}
408	EXPORT_SYMBOL(__kmalloc);	447	EXPORT_SYMBOL(__kmalloc_node);
409		448
410	/**	449	/**
411	* krealloc - reallocate memory. The contents will remain unchanged.	450	* krealloc - reallocate memory. The contents will remain unchanged.
@@ -455,7 +494,6 @@ void kfree(const void *block)
455	} else	494	} else
456	put_page(&sp->page);	495	put_page(&sp->page);
457	}	496	}
458
459	EXPORT_SYMBOL(kfree);	497	EXPORT_SYMBOL(kfree);
460		498
461	/* can't use ksize for kmem_cache_alloc memory, only kmalloc */	499	/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
@@ -487,7 +525,7 @@ struct kmem_cache kmem_cache_create(const char name, size_t size,
487	{	525	{
488	struct kmem_cache *c;	526	struct kmem_cache *c;
489		527
490	c = slob_alloc(sizeof(struct kmem_cache), flags, 0);	528	c = slob_alloc(sizeof(struct kmem_cache), flags, 0, -1);
491		529
492	if (c) {	530	if (c) {
493	c->name = name;	531	c->name = name;
@@ -517,21 +555,21 @@ void kmem_cache_destroy(struct kmem_cache *c)
517	}	555	}
518	EXPORT_SYMBOL(kmem_cache_destroy);	556	EXPORT_SYMBOL(kmem_cache_destroy);
519		557
520	void kmem_cache_alloc(struct kmem_cache c, gfp_t flags)	558	void kmem_cache_alloc_node(struct kmem_cache c, gfp_t flags, int node)
521	{	559	{
522	void *b;	560	void *b;
523		561
524	if (c->size < PAGE_SIZE)	562	if (c->size < PAGE_SIZE)
525	b = slob_alloc(c->size, flags, c->align);	563	b = slob_alloc(c->size, flags, c->align, node);
526	else	564	else
527	b = (void *)__get_free_pages(flags, get_order(c->size));	565	b = slob_new_page(flags, get_order(c->size), node);
528		566
529	if (c->ctor)	567	if (c->ctor)
530	c->ctor(b, c, 0);	568	c->ctor(b, c, 0);
531		569
532	return b;	570	return b;
533	}	571	}
534	EXPORT_SYMBOL(kmem_cache_alloc);	572	EXPORT_SYMBOL(kmem_cache_alloc_node);
535		573
536	void kmem_cache_zalloc(struct kmem_cache c, gfp_t flags)	574	void kmem_cache_zalloc(struct kmem_cache c, gfp_t flags)
537	{	575	{