percpu: implement kernel memory based chunk allocation

Implement an alternate percpu chunk management based on kernel memeory for nommu SMP architectures. Instead of mapping into vmalloc area, chunks are allocated as a contiguous kernel memory using alloc_pages(). As such, percpu allocator on nommu will have the following restrictions. * It can't fill chunks on-demand page-by-page. It has to allocate each chunk fully upfront. * It can't support sparse chunk for NUMA configurations. SMP w/o mmu is crazy enough. Let's hope no one does NUMA w/o mmu. :-P * If chunk size isn't power-of-two multiple of PAGE_SIZE, the unaligned amount will be wasted on each chunk. So, archs which use this better align chunk size. For instructions on how to use this, read the comment on top of mm/percpu-km.c. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: David Howells <dhowells@redhat.com> Cc: Graff Yang <graff.yang@gmail.com> Cc: Sonic Zhang <sonic.adi@gmail.com>
author: Tejun Heo <tj@kernel.org> 2010-04-09 05:57:01 -0400
committer: Tejun Heo <tj@kernel.org> 2010-05-01 02:30:50 -0400
commit: b0c9778b1d07ed3aa7e411db201275553527b1b1 (patch)
tree: 8649c1b27edebc6addef5087eb1830fc8bccdb74
parent: 9f6455325618821dcf6775d7972881fde32e77c5 (diff)
2 files changed, 108 insertions, 0 deletions
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
new file mode 100644
index 000000000000..df680855540a
--- /dev/null
+++ b/mm/percpu-km.c
@@ -0,0 +1,104 @@
+/*
+ * mm/percpu-km.c - kernel memory based chunk allocation
+ *
+ * Copyright (C) 2010           SUSE Linux Products GmbH
+ * Copyright (C) 2010           Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * Chunks are allocated as a contiguous kernel memory using gfp
+ * allocation.  This is to be used on nommu architectures.
+ *
+ * To use percpu-km,
+ *
+ * - define CONFIG_NEED_PER_CPU_KM from the arch Kconfig.
+ *
+ * - CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK must not be defined.  It's
+ *   not compatible with PER_CPU_KM.  EMBED_FIRST_CHUNK should work
+ *   fine.
+ *
+ * - NUMA is not supported.  When setting up the first chunk,
+ *   @cpu_distance_fn should be NULL or report all CPUs to be nearer
+ *   than or at LOCAL_DISTANCE.
+ *
+ * - It's best if the chunk size is power of two multiple of
+ *   PAGE_SIZE.  Because each chunk is allocated as a contiguous
+ *   kernel memory block using alloc_pages(), memory will be wasted if
+ *   chunk size is not aligned.  percpu-km code will whine about it.
+ */
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+#error "contiguous percpu allocation is incompatible with paged first chunk"
+#endif
+#include <linux/log2.h>
+static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
+{
+        /* noop */
+        return 0;
+}
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
+{
+        /* nada */
+}
+static struct pcpu_chunk *pcpu_create_chunk(void)
+{
+        const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
+        struct pcpu_chunk *chunk;
+        struct page *pages;
+        int i;
+        chunk = pcpu_alloc_chunk();
+        if (!chunk)
+                return NULL;
+        pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
+        if (!pages) {
+                pcpu_free_chunk(chunk);
+                return NULL;
+        }
+        for (i = 0; i < nr_pages; i++)
+                pcpu_set_page_chunk(nth_page(pages, i), chunk);
+        chunk->data = pages;
+        chunk->base_addr = page_address(pages) - pcpu_group_offsets[0];
+        return chunk;
+}
+static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
+{
+        const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
+        if (chunk && chunk->data)
+                __free_pages(chunk->data, order_base_2(nr_pages));
+        pcpu_free_chunk(chunk);
+}
+static struct page *pcpu_addr_to_page(void *addr)
+{
+        return virt_to_page(addr);
+}
+static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
+{
+        size_t nr_pages, alloc_pages;
+        /* all units must be in a single group */
+        if (ai->nr_groups != 1) {
+                printk(KERN_CRIT "percpu: can't handle more than one groups\n");
+                return -EINVAL;
+        }
+        nr_pages = (ai->groups[0].nr_units * ai->unit_size) >> PAGE_SHIFT;
+        alloc_pages = roundup_pow_of_two(nr_pages);
+        if (alloc_pages > nr_pages)
+                printk(KERN_WARNING "percpu: wasting %zu pages per chunk\n",
+                       alloc_pages - nr_pages);
+        return 0;
+}
diff --git a/mm/percpu.c b/mm/percpu.c
index 15f680430671..39f7dfd59585 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -654,7 +654,11 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
 static struct page *pcpu_addr_to_page(void *addr);
 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
+#ifdef CONFIG_NEED_PER_CPU_KM
+#include "percpu-km.c"
+#else
 #include "percpu-vm.c"
+#endif
 /**
 * pcpu_chunk_addr_search - determine chunk containing specified address
author	Tejun Heo <tj@kernel.org>	2010-04-09 05:57:01 -0400
committer	Tejun Heo <tj@kernel.org>	2010-05-01 02:30:50 -0400
commit	b0c9778b1d07ed3aa7e411db201275553527b1b1 (patch)
tree	8649c1b27edebc6addef5087eb1830fc8bccdb74
parent	9f6455325618821dcf6775d7972881fde32e77c5 (diff)

diff --git a/mm/percpu-km.c b/mm/percpu-km.c new file mode 100644 index 000000000000..df680855540a --- /dev/null +++ b/mm/percpu-km.c
@@ -0,0 +1,104 @@
		1	/*
		2	* mm/percpu-km.c - kernel memory based chunk allocation
		3	*
		4	* Copyright (C) 2010 SUSE Linux Products GmbH
		5	* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
		6	*
		7	* This file is released under the GPLv2.
		8	*
		9	* Chunks are allocated as a contiguous kernel memory using gfp
		10	* allocation. This is to be used on nommu architectures.
		11	*
		12	* To use percpu-km,
		13	*
		14	* - define CONFIG_NEED_PER_CPU_KM from the arch Kconfig.
		15	*
		16	* - CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK must not be defined. It's
		17	* not compatible with PER_CPU_KM. EMBED_FIRST_CHUNK should work
		18	* fine.
		19	*
		20	* - NUMA is not supported. When setting up the first chunk,
		21	* @cpu_distance_fn should be NULL or report all CPUs to be nearer
		22	* than or at LOCAL_DISTANCE.
		23	*
		24	* - It's best if the chunk size is power of two multiple of
		25	* PAGE_SIZE. Because each chunk is allocated as a contiguous
		26	* kernel memory block using alloc_pages(), memory will be wasted if
		27	* chunk size is not aligned. percpu-km code will whine about it.
		28	*/
		29
		30	#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
		31	#error "contiguous percpu allocation is incompatible with paged first chunk"
		32	#endif
		33
		34	#include <linux/log2.h>
		35
		36	static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
		37	{
		38	/* noop */
		39	return 0;
		40	}
		41
		42	static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
		43	{
		44	/* nada */
		45	}
		46
		47	static struct pcpu_chunk *pcpu_create_chunk(void)
		48	{
		49	const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
		50	struct pcpu_chunk *chunk;
		51	struct page *pages;
		52	int i;
		53
		54	chunk = pcpu_alloc_chunk();
		55	if (!chunk)
		56	return NULL;
		57
		58	pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
		59	if (!pages) {
		60	pcpu_free_chunk(chunk);
		61	return NULL;
		62	}
		63
		64	for (i = 0; i < nr_pages; i++)
		65	pcpu_set_page_chunk(nth_page(pages, i), chunk);
		66
		67	chunk->data = pages;
		68	chunk->base_addr = page_address(pages) - pcpu_group_offsets[0];
		69	return chunk;
		70	}
		71
		72	static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
		73	{
		74	const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
		75
		76	if (chunk && chunk->data)
		77	__free_pages(chunk->data, order_base_2(nr_pages));
		78	pcpu_free_chunk(chunk);
		79	}
		80
		81	static struct page pcpu_addr_to_page(void addr)
		82	{
		83	return virt_to_page(addr);
		84	}
		85
		86	static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai)
		87	{
		88	size_t nr_pages, alloc_pages;
		89
		90	/* all units must be in a single group */
		91	if (ai->nr_groups != 1) {
		92	printk(KERN_CRIT "percpu: can't handle more than one groups\n");
		93	return -EINVAL;
		94	}
		95
		96	nr_pages = (ai->groups[0].nr_units * ai->unit_size) >> PAGE_SHIFT;
		97	alloc_pages = roundup_pow_of_two(nr_pages);
		98
		99	if (alloc_pages > nr_pages)
		100	printk(KERN_WARNING "percpu: wasting %zu pages per chunk\n",
		101	alloc_pages - nr_pages);
		102
		103	return 0;
		104	}


diff --git a/mm/percpu.c b/mm/percpu.c index 15f680430671..39f7dfd59585 100644 --- a/mm/percpu.c +++ b/mm/percpu.c
@@ -654,7 +654,11 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
654	static struct page pcpu_addr_to_page(void addr);	654	static struct page pcpu_addr_to_page(void addr);
655	static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);	655	static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
656		656
		657	#ifdef CONFIG_NEED_PER_CPU_KM
		658	#include "percpu-km.c"
		659	#else
657	#include "percpu-vm.c"	660	#include "percpu-vm.c"
		661	#endif
658		662
659	/**	663	/**
660	* pcpu_chunk_addr_search - determine chunk containing specified address	664	* pcpu_chunk_addr_search - determine chunk containing specified address