diff options
author | Tejun Heo <tj@kernel.org> | 2010-09-03 12:22:48 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-09-08 05:11:23 -0400 |
commit | bbddff0545878a8649c091a9dd7c43ce91516734 (patch) | |
tree | 667714de4398d1589605555650cf7431a27e1a13 /mm | |
parent | 6abad5acac09921f4944af77d3860f82d49f528d (diff) |
percpu: use percpu allocator on UP too
On UP, percpu allocations were redirected to kmalloc. This has the
following problems.
* For certain amount of allocations (determined by
PERCPU_DYNAMIC_EARLY_SLOTS and PERCPU_DYNAMIC_EARLY_SIZE), percpu
allocator can be used before the usual kernel memory allocator is
brought online. On SMP, this is used to initialize the kernel
memory allocator.
* percpu allocator honors alignment upto PAGE_SIZE but kmalloc()
doesn't. For example, workqueue makes use of larger alignments for
cpu_workqueues.
Currently, users of percpu allocators need to handle UP differently,
which is somewhat fragile and ugly. Other than small amount of
memory, there isn't much to lose by enabling percpu allocator on UP.
It can simply use kernel memory based chunk allocation which was added
for SMP archs w/o MMUs.
This patch removes mm/percpu_up.c, builds mm/percpu.c on UP too and
makes UP build use percpu-km. As percpu addresses and kernel
addresses are always identity mapped and static percpu variables don't
need any special treatment, nothing is arch dependent and mm/percpu.c
implements generic setup_per_cpu_areas() for UP.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 8 | ||||
-rw-r--r-- | mm/Makefile | 7 | ||||
-rw-r--r-- | mm/percpu-km.c | 2 | ||||
-rw-r--r-- | mm/percpu.c | 60 | ||||
-rw-r--r-- | mm/percpu_up.c | 30 |
5 files changed, 66 insertions, 41 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index f4e516e9c37c..01a57447a410 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS | |||
301 | of 1 says that all excess pages should be trimmed. | 301 | of 1 says that all excess pages should be trimmed. |
302 | 302 | ||
303 | See Documentation/nommu-mmap.txt for more information. | 303 | See Documentation/nommu-mmap.txt for more information. |
304 | |||
305 | # | ||
306 | # UP and nommu archs use km based percpu allocator | ||
307 | # | ||
308 | config NEED_PER_CPU_KM | ||
309 | depends on !SMP | ||
310 | bool | ||
311 | default y | ||
diff --git a/mm/Makefile b/mm/Makefile index 34b2546a9e37..f73f75a29f82 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | |||
11 | maccess.o page_alloc.o page-writeback.o \ | 11 | maccess.o page_alloc.o page-writeback.o \ |
12 | readahead.o swap.o truncate.o vmscan.o shmem.o \ | 12 | readahead.o swap.o truncate.o vmscan.o shmem.o \ |
13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ | 13 | prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ |
14 | page_isolation.o mm_init.o mmu_context.o \ | 14 | page_isolation.o mm_init.o mmu_context.o percpu.o \ |
15 | $(mmu-y) | 15 | $(mmu-y) |
16 | obj-y += init-mm.o | 16 | obj-y += init-mm.o |
17 | 17 | ||
@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o | |||
36 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o | 36 | obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o |
37 | obj-$(CONFIG_FS_XIP) += filemap_xip.o | 37 | obj-$(CONFIG_FS_XIP) += filemap_xip.o |
38 | obj-$(CONFIG_MIGRATION) += migrate.o | 38 | obj-$(CONFIG_MIGRATION) += migrate.o |
39 | ifdef CONFIG_SMP | ||
40 | obj-y += percpu.o | ||
41 | else | ||
42 | obj-y += percpu_up.o | ||
43 | endif | ||
44 | obj-$(CONFIG_QUICKLIST) += quicklist.o | 39 | obj-$(CONFIG_QUICKLIST) += quicklist.o |
45 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o | 40 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o |
46 | obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o | 41 | obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o |
diff --git a/mm/percpu-km.c b/mm/percpu-km.c index df680855540a..7037bc73bfa4 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c | |||
@@ -27,7 +27,7 @@ | |||
27 | * chunk size is not aligned. percpu-km code will whine about it. | 27 | * chunk size is not aligned. percpu-km code will whine about it. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | 30 | #if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) |
31 | #error "contiguous percpu allocation is incompatible with paged first chunk" | 31 | #error "contiguous percpu allocation is incompatible with paged first chunk" |
32 | #endif | 32 | #endif |
33 | 33 | ||
diff --git a/mm/percpu.c b/mm/percpu.c index 58c572b18b07..fa70122dfdd0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -76,6 +76,7 @@ | |||
76 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 76 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
77 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 77 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
78 | 78 | ||
79 | #ifdef CONFIG_SMP | ||
79 | /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ | 80 | /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ |
80 | #ifndef __addr_to_pcpu_ptr | 81 | #ifndef __addr_to_pcpu_ptr |
81 | #define __addr_to_pcpu_ptr(addr) \ | 82 | #define __addr_to_pcpu_ptr(addr) \ |
@@ -89,6 +90,11 @@ | |||
89 | (unsigned long)pcpu_base_addr - \ | 90 | (unsigned long)pcpu_base_addr - \ |
90 | (unsigned long)__per_cpu_start) | 91 | (unsigned long)__per_cpu_start) |
91 | #endif | 92 | #endif |
93 | #else /* CONFIG_SMP */ | ||
94 | /* on UP, it's always identity mapped */ | ||
95 | #define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr) | ||
96 | #define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr) | ||
97 | #endif /* CONFIG_SMP */ | ||
92 | 98 | ||
93 | struct pcpu_chunk { | 99 | struct pcpu_chunk { |
94 | struct list_head list; /* linked to pcpu_slot lists */ | 100 | struct list_head list; /* linked to pcpu_slot lists */ |
@@ -949,6 +955,7 @@ EXPORT_SYMBOL_GPL(free_percpu); | |||
949 | */ | 955 | */ |
950 | bool is_kernel_percpu_address(unsigned long addr) | 956 | bool is_kernel_percpu_address(unsigned long addr) |
951 | { | 957 | { |
958 | #ifdef CONFIG_SMP | ||
952 | const size_t static_size = __per_cpu_end - __per_cpu_start; | 959 | const size_t static_size = __per_cpu_end - __per_cpu_start; |
953 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); | 960 | void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); |
954 | unsigned int cpu; | 961 | unsigned int cpu; |
@@ -959,6 +966,8 @@ bool is_kernel_percpu_address(unsigned long addr) | |||
959 | if ((void *)addr >= start && (void *)addr < start + static_size) | 966 | if ((void *)addr >= start && (void *)addr < start + static_size) |
960 | return true; | 967 | return true; |
961 | } | 968 | } |
969 | #endif | ||
970 | /* on UP, can't distinguish from other static vars, always false */ | ||
962 | return false; | 971 | return false; |
963 | } | 972 | } |
964 | 973 | ||
@@ -1066,6 +1075,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) | |||
1066 | free_bootmem(__pa(ai), ai->__ai_size); | 1075 | free_bootmem(__pa(ai), ai->__ai_size); |
1067 | } | 1076 | } |
1068 | 1077 | ||
1078 | #if defined(CONFIG_SMP) && (defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ | ||
1079 | defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)) | ||
1069 | /** | 1080 | /** |
1070 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs | 1081 | * pcpu_build_alloc_info - build alloc_info considering distances between CPUs |
1071 | * @reserved_size: the size of reserved percpu area in bytes | 1082 | * @reserved_size: the size of reserved percpu area in bytes |
@@ -1220,6 +1231,8 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( | |||
1220 | 1231 | ||
1221 | return ai; | 1232 | return ai; |
1222 | } | 1233 | } |
1234 | #endif /* CONFIG_SMP && (CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || | ||
1235 | CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) */ | ||
1223 | 1236 | ||
1224 | /** | 1237 | /** |
1225 | * pcpu_dump_alloc_info - print out information about pcpu_alloc_info | 1238 | * pcpu_dump_alloc_info - print out information about pcpu_alloc_info |
@@ -1363,7 +1376,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1363 | 1376 | ||
1364 | /* sanity checks */ | 1377 | /* sanity checks */ |
1365 | PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); | 1378 | PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); |
1379 | #ifdef CONFIG_SMP | ||
1366 | PCPU_SETUP_BUG_ON(!ai->static_size); | 1380 | PCPU_SETUP_BUG_ON(!ai->static_size); |
1381 | #endif | ||
1367 | PCPU_SETUP_BUG_ON(!base_addr); | 1382 | PCPU_SETUP_BUG_ON(!base_addr); |
1368 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); | 1383 | PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); |
1369 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); | 1384 | PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); |
@@ -1488,6 +1503,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1488 | return 0; | 1503 | return 0; |
1489 | } | 1504 | } |
1490 | 1505 | ||
1506 | #ifdef CONFIG_SMP | ||
1507 | |||
1491 | const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { | 1508 | const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { |
1492 | [PCPU_FC_AUTO] = "auto", | 1509 | [PCPU_FC_AUTO] = "auto", |
1493 | [PCPU_FC_EMBED] = "embed", | 1510 | [PCPU_FC_EMBED] = "embed", |
@@ -1758,8 +1775,9 @@ out_free_ar: | |||
1758 | } | 1775 | } |
1759 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ | 1776 | #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ |
1760 | 1777 | ||
1778 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | ||
1761 | /* | 1779 | /* |
1762 | * Generic percpu area setup. | 1780 | * Generic SMP percpu area setup. |
1763 | * | 1781 | * |
1764 | * The embedding helper is used because its behavior closely resembles | 1782 | * The embedding helper is used because its behavior closely resembles |
1765 | * the original non-dynamic generic percpu area setup. This is | 1783 | * the original non-dynamic generic percpu area setup. This is |
@@ -1770,7 +1788,6 @@ out_free_ar: | |||
1770 | * on the physical linear memory mapping which uses large page | 1788 | * on the physical linear memory mapping which uses large page |
1771 | * mappings on applicable archs. | 1789 | * mappings on applicable archs. |
1772 | */ | 1790 | */ |
1773 | #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA | ||
1774 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | 1791 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; |
1775 | EXPORT_SYMBOL(__per_cpu_offset); | 1792 | EXPORT_SYMBOL(__per_cpu_offset); |
1776 | 1793 | ||
@@ -1799,13 +1816,48 @@ void __init setup_per_cpu_areas(void) | |||
1799 | PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, | 1816 | PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, |
1800 | pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); | 1817 | pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); |
1801 | if (rc < 0) | 1818 | if (rc < 0) |
1802 | panic("Failed to initialized percpu areas."); | 1819 | panic("Failed to initialize percpu areas."); |
1803 | 1820 | ||
1804 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; | 1821 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
1805 | for_each_possible_cpu(cpu) | 1822 | for_each_possible_cpu(cpu) |
1806 | __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; | 1823 | __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; |
1807 | } | 1824 | } |
1808 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ | 1825 | #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ |
1826 | |||
1827 | #else /* CONFIG_SMP */ | ||
1828 | |||
1829 | /* | ||
1830 | * UP percpu area setup. | ||
1831 | * | ||
1832 | * UP always uses km-based percpu allocator with identity mapping. | ||
1833 | * Static percpu variables are indistinguishable from the usual static | ||
1834 | * variables and don't require any special preparation. | ||
1835 | */ | ||
1836 | void __init setup_per_cpu_areas(void) | ||
1837 | { | ||
1838 | const size_t unit_size = | ||
1839 | roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE, | ||
1840 | PERCPU_DYNAMIC_RESERVE)); | ||
1841 | struct pcpu_alloc_info *ai; | ||
1842 | void *fc; | ||
1843 | |||
1844 | ai = pcpu_alloc_alloc_info(1, 1); | ||
1845 | fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | ||
1846 | if (!ai || !fc) | ||
1847 | panic("Failed to allocate memory for percpu areas."); | ||
1848 | |||
1849 | ai->dyn_size = unit_size; | ||
1850 | ai->unit_size = unit_size; | ||
1851 | ai->atom_size = unit_size; | ||
1852 | ai->alloc_size = unit_size; | ||
1853 | ai->groups[0].nr_units = 1; | ||
1854 | ai->groups[0].cpu_map[0] = 0; | ||
1855 | |||
1856 | if (pcpu_setup_first_chunk(ai, fc) < 0) | ||
1857 | panic("Failed to initialize percpu areas."); | ||
1858 | } | ||
1859 | |||
1860 | #endif /* CONFIG_SMP */ | ||
1809 | 1861 | ||
1810 | /* | 1862 | /* |
1811 | * First and reserved chunks are initialized with temporary allocation | 1863 | * First and reserved chunks are initialized with temporary allocation |
diff --git a/mm/percpu_up.c b/mm/percpu_up.c deleted file mode 100644 index db884fae5721..000000000000 --- a/mm/percpu_up.c +++ /dev/null | |||
@@ -1,30 +0,0 @@ | |||
1 | /* | ||
2 | * mm/percpu_up.c - dummy percpu memory allocator implementation for UP | ||
3 | */ | ||
4 | |||
5 | #include <linux/module.h> | ||
6 | #include <linux/percpu.h> | ||
7 | #include <linux/slab.h> | ||
8 | |||
9 | void __percpu *__alloc_percpu(size_t size, size_t align) | ||
10 | { | ||
11 | /* | ||
12 | * Can't easily make larger alignment work with kmalloc. WARN | ||
13 | * on it. Larger alignment should only be used for module | ||
14 | * percpu sections on SMP for which this path isn't used. | ||
15 | */ | ||
16 | WARN_ON_ONCE(align > SMP_CACHE_BYTES); | ||
17 | return (void __percpu __force *)kzalloc(size, GFP_KERNEL); | ||
18 | } | ||
19 | EXPORT_SYMBOL_GPL(__alloc_percpu); | ||
20 | |||
21 | void free_percpu(void __percpu *p) | ||
22 | { | ||
23 | kfree(this_cpu_ptr(p)); | ||
24 | } | ||
25 | EXPORT_SYMBOL_GPL(free_percpu); | ||
26 | |||
27 | phys_addr_t per_cpu_ptr_to_phys(void *addr) | ||
28 | { | ||
29 | return __pa(addr); | ||
30 | } | ||