aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:31:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:31:36 -0400
commit0fc0531e0a2174377a86fd6953ecaa00287d8f70 (patch)
treeafe56978729300df96b002a064c9de927fadcfab
parent91b745016c12d440386c40fb76ab69c8e08cbc06 (diff)
parent9329ba9704f6bd51a735982e0d4a3eed72c3294f (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu: update comments to reflect that percpu allocations are always zero-filled percpu: Optimize __get_cpu_var() x86, percpu: Optimize this_cpu_ptr percpu: clear memory allocated with the km allocator percpu: fix build breakage on s390 and cleanup build configuration tests percpu: use percpu allocator on UP too percpu: reduce PCPU_MIN_UNIT_SIZE to 32k vmalloc: pcpu_get/free_vm_areas() aren't needed on UP Fixed up trivial conflicts in include/linux/percpu.h
-rw-r--r--arch/x86/include/asm/percpu.h14
-rw-r--r--include/asm-generic/percpu.h14
-rw-r--r--include/linux/percpu.h31
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--mm/Kconfig8
-rw-r--r--mm/Makefile7
-rw-r--r--mm/percpu-km.c8
-rw-r--r--mm/percpu.c401
-rw-r--r--mm/percpu_up.c30
-rw-r--r--mm/vmalloc.c2
10 files changed, 281 insertions, 236 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index cd28f9ad910d..f899e01a8ac9 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -47,6 +47,20 @@
47#ifdef CONFIG_SMP 47#ifdef CONFIG_SMP
48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x 48#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
49#define __my_cpu_offset percpu_read(this_cpu_off) 49#define __my_cpu_offset percpu_read(this_cpu_off)
50
51/*
52 * Compared to the generic __my_cpu_offset version, the following
53 * saves one instruction and avoids clobbering a temp register.
54 */
55#define __this_cpu_ptr(ptr) \
56({ \
57 unsigned long tcp_ptr__; \
58 __verify_pcpu_ptr(ptr); \
59 asm volatile("add " __percpu_arg(1) ", %0" \
60 : "=r" (tcp_ptr__) \
61 : "m" (this_cpu_off), "0" (ptr)); \
62 (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
63})
50#else 64#else
51#define __percpu_arg(x) "%P" #x 65#define __percpu_arg(x) "%P" #x
52#endif 66#endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 08923b684768..d17784ea37ff 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -55,14 +55,18 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
55 */ 55 */
56#define per_cpu(var, cpu) \ 56#define per_cpu(var, cpu) \
57 (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) 57 (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
58#define __get_cpu_var(var) \
59 (*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
60#define __raw_get_cpu_var(var) \
61 (*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
62 58
63#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) 59#ifndef __this_cpu_ptr
64#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) 60#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
61#endif
62#ifdef CONFIG_DEBUG_PREEMPT
63#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
64#else
65#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
66#endif
65 67
68#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
69#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
66 70
67#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA 71#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
68extern void setup_per_cpu_areas(void); 72extern void setup_per_cpu_areas(void);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 0eb50832aa00..5095b834a6fb 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -48,10 +48,8 @@
48 preempt_enable(); \ 48 preempt_enable(); \
49} while (0) 49} while (0)
50 50
51#ifdef CONFIG_SMP
52
53/* minimum unit size, also is the maximum supported allocation size */ 51/* minimum unit size, also is the maximum supported allocation size */
54#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) 52#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
55 53
56/* 54/*
57 * Percpu allocator can serve percpu allocations before slab is 55 * Percpu allocator can serve percpu allocations before slab is
@@ -146,37 +144,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
146 * dynamically allocated. Non-atomic access to the current CPU's 144 * dynamically allocated. Non-atomic access to the current CPU's
147 * version should probably be combined with get_cpu()/put_cpu(). 145 * version should probably be combined with get_cpu()/put_cpu().
148 */ 146 */
147#ifdef CONFIG_SMP
149#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) 148#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
149#else
150#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
151#endif
150 152
151extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); 153extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
152extern bool is_kernel_percpu_address(unsigned long addr); 154extern bool is_kernel_percpu_address(unsigned long addr);
153 155
154#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA 156#if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
155extern void __init setup_per_cpu_areas(void); 157extern void __init setup_per_cpu_areas(void);
156#endif 158#endif
157extern void __init percpu_init_late(void); 159extern void __init percpu_init_late(void);
158 160
159#else /* CONFIG_SMP */
160
161#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
162
163/* can't distinguish from other static vars, always false */
164static inline bool is_kernel_percpu_address(unsigned long addr)
165{
166 return false;
167}
168
169static inline void __init setup_per_cpu_areas(void) { }
170
171static inline void __init percpu_init_late(void) { }
172
173static inline void *pcpu_lpage_remapped(void *kaddr)
174{
175 return NULL;
176}
177
178#endif /* CONFIG_SMP */
179
180extern void __percpu *__alloc_percpu(size_t size, size_t align); 161extern void __percpu *__alloc_percpu(size_t size, size_t align);
181extern void free_percpu(void __percpu *__pdata); 162extern void free_percpu(void __percpu *__pdata);
182extern phys_addr_t per_cpu_ptr_to_phys(void *addr); 163extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 01c2145118dc..63a4fe6d51bd 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock;
117extern struct vm_struct *vmlist; 117extern struct vm_struct *vmlist;
118extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); 118extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
119 119
120#ifdef CONFIG_SMP
120struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, 121struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
121 const size_t *sizes, int nr_vms, 122 const size_t *sizes, int nr_vms,
122 size_t align, gfp_t gfp_mask); 123 size_t align, gfp_t gfp_mask);
123 124
124void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); 125void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
126#endif
125 127
126#endif /* _LINUX_VMALLOC_H */ 128#endif /* _LINUX_VMALLOC_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index f0fb9124e410..c2c8a4a11898 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS
301 of 1 says that all excess pages should be trimmed. 301 of 1 says that all excess pages should be trimmed.
302 302
303 See Documentation/nommu-mmap.txt for more information. 303 See Documentation/nommu-mmap.txt for more information.
304
305#
306# UP and nommu archs use km based percpu allocator
307#
308config NEED_PER_CPU_KM
309 depends on !SMP
310 bool
311 default y
diff --git a/mm/Makefile b/mm/Makefile
index 34b2546a9e37..f73f75a29f82 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
11 maccess.o page_alloc.o page-writeback.o \ 11 maccess.o page_alloc.o page-writeback.o \
12 readahead.o swap.o truncate.o vmscan.o shmem.o \ 12 readahead.o swap.o truncate.o vmscan.o shmem.o \
13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ 13 prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
14 page_isolation.o mm_init.o mmu_context.o \ 14 page_isolation.o mm_init.o mmu_context.o percpu.o \
15 $(mmu-y) 15 $(mmu-y)
16obj-y += init-mm.o 16obj-y += init-mm.o
17 17
@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
36obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 36obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
37obj-$(CONFIG_FS_XIP) += filemap_xip.o 37obj-$(CONFIG_FS_XIP) += filemap_xip.o
38obj-$(CONFIG_MIGRATION) += migrate.o 38obj-$(CONFIG_MIGRATION) += migrate.o
39ifdef CONFIG_SMP
40obj-y += percpu.o
41else
42obj-y += percpu_up.o
43endif
44obj-$(CONFIG_QUICKLIST) += quicklist.o 39obj-$(CONFIG_QUICKLIST) += quicklist.o
45obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o 40obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
46obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o 41obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index df680855540a..89633fefc6a2 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -27,7 +27,7 @@
27 * chunk size is not aligned. percpu-km code will whine about it. 27 * chunk size is not aligned. percpu-km code will whine about it.
28 */ 28 */
29 29
30#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK 30#if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
31#error "contiguous percpu allocation is incompatible with paged first chunk" 31#error "contiguous percpu allocation is incompatible with paged first chunk"
32#endif 32#endif
33 33
@@ -35,7 +35,11 @@
35 35
36static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) 36static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
37{ 37{
38 /* noop */ 38 unsigned int cpu;
39
40 for_each_possible_cpu(cpu)
41 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
42
39 return 0; 43 return 0;
40} 44}
41 45
diff --git a/mm/percpu.c b/mm/percpu.c
index c76ef3891e0d..6fc9015534f8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -76,6 +76,7 @@
76#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ 76#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
77#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ 77#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
78 78
79#ifdef CONFIG_SMP
79/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ 80/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
80#ifndef __addr_to_pcpu_ptr 81#ifndef __addr_to_pcpu_ptr
81#define __addr_to_pcpu_ptr(addr) \ 82#define __addr_to_pcpu_ptr(addr) \
@@ -89,6 +90,11 @@
89 (unsigned long)pcpu_base_addr - \ 90 (unsigned long)pcpu_base_addr - \
90 (unsigned long)__per_cpu_start) 91 (unsigned long)__per_cpu_start)
91#endif 92#endif
93#else /* CONFIG_SMP */
94/* on UP, it's always identity mapped */
95#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
96#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
97#endif /* CONFIG_SMP */
92 98
93struct pcpu_chunk { 99struct pcpu_chunk {
94 struct list_head list; /* linked to pcpu_slot lists */ 100 struct list_head list; /* linked to pcpu_slot lists */
@@ -820,8 +826,8 @@ fail_unlock_mutex:
820 * @size: size of area to allocate in bytes 826 * @size: size of area to allocate in bytes
821 * @align: alignment of area (max PAGE_SIZE) 827 * @align: alignment of area (max PAGE_SIZE)
822 * 828 *
823 * Allocate percpu area of @size bytes aligned at @align. Might 829 * Allocate zero-filled percpu area of @size bytes aligned at @align.
824 * sleep. Might trigger writeouts. 830 * Might sleep. Might trigger writeouts.
825 * 831 *
826 * CONTEXT: 832 * CONTEXT:
827 * Does GFP_KERNEL allocation. 833 * Does GFP_KERNEL allocation.
@@ -840,9 +846,10 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
840 * @size: size of area to allocate in bytes 846 * @size: size of area to allocate in bytes
841 * @align: alignment of area (max PAGE_SIZE) 847 * @align: alignment of area (max PAGE_SIZE)
842 * 848 *
843 * Allocate percpu area of @size bytes aligned at @align from reserved 849 * Allocate zero-filled percpu area of @size bytes aligned at @align
844 * percpu area if arch has set it up; otherwise, allocation is served 850 * from reserved percpu area if arch has set it up; otherwise,
845 * from the same dynamic area. Might sleep. Might trigger writeouts. 851 * allocation is served from the same dynamic area. Might sleep.
852 * Might trigger writeouts.
846 * 853 *
847 * CONTEXT: 854 * CONTEXT:
848 * Does GFP_KERNEL allocation. 855 * Does GFP_KERNEL allocation.
@@ -949,6 +956,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
949 */ 956 */
950bool is_kernel_percpu_address(unsigned long addr) 957bool is_kernel_percpu_address(unsigned long addr)
951{ 958{
959#ifdef CONFIG_SMP
952 const size_t static_size = __per_cpu_end - __per_cpu_start; 960 const size_t static_size = __per_cpu_end - __per_cpu_start;
953 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); 961 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
954 unsigned int cpu; 962 unsigned int cpu;
@@ -959,6 +967,8 @@ bool is_kernel_percpu_address(unsigned long addr)
959 if ((void *)addr >= start && (void *)addr < start + static_size) 967 if ((void *)addr >= start && (void *)addr < start + static_size)
960 return true; 968 return true;
961 } 969 }
970#endif
971 /* on UP, can't distinguish from other static vars, always false */
962 return false; 972 return false;
963} 973}
964 974
@@ -1067,161 +1077,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
1067} 1077}
1068 1078
1069/** 1079/**
1070 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
1071 * @reserved_size: the size of reserved percpu area in bytes
1072 * @dyn_size: minimum free size for dynamic allocation in bytes
1073 * @atom_size: allocation atom size
1074 * @cpu_distance_fn: callback to determine distance between cpus, optional
1075 *
1076 * This function determines grouping of units, their mappings to cpus
1077 * and other parameters considering needed percpu size, allocation
1078 * atom size and distances between CPUs.
1079 *
1080 * Groups are always mutliples of atom size and CPUs which are of
1081 * LOCAL_DISTANCE both ways are grouped together and share space for
1082 * units in the same group. The returned configuration is guaranteed
1083 * to have CPUs on different nodes on different groups and >=75% usage
1084 * of allocated virtual address space.
1085 *
1086 * RETURNS:
1087 * On success, pointer to the new allocation_info is returned. On
1088 * failure, ERR_PTR value is returned.
1089 */
1090static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1091 size_t reserved_size, size_t dyn_size,
1092 size_t atom_size,
1093 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1094{
1095 static int group_map[NR_CPUS] __initdata;
1096 static int group_cnt[NR_CPUS] __initdata;
1097 const size_t static_size = __per_cpu_end - __per_cpu_start;
1098 int nr_groups = 1, nr_units = 0;
1099 size_t size_sum, min_unit_size, alloc_size;
1100 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1101 int last_allocs, group, unit;
1102 unsigned int cpu, tcpu;
1103 struct pcpu_alloc_info *ai;
1104 unsigned int *cpu_map;
1105
1106 /* this function may be called multiple times */
1107 memset(group_map, 0, sizeof(group_map));
1108 memset(group_cnt, 0, sizeof(group_cnt));
1109
1110 /* calculate size_sum and ensure dyn_size is enough for early alloc */
1111 size_sum = PFN_ALIGN(static_size + reserved_size +
1112 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1113 dyn_size = size_sum - static_size - reserved_size;
1114
1115 /*
1116 * Determine min_unit_size, alloc_size and max_upa such that
1117 * alloc_size is multiple of atom_size and is the smallest
1118 * which can accomodate 4k aligned segments which are equal to
1119 * or larger than min_unit_size.
1120 */
1121 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1122
1123 alloc_size = roundup(min_unit_size, atom_size);
1124 upa = alloc_size / min_unit_size;
1125 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1126 upa--;
1127 max_upa = upa;
1128
1129 /* group cpus according to their proximity */
1130 for_each_possible_cpu(cpu) {
1131 group = 0;
1132 next_group:
1133 for_each_possible_cpu(tcpu) {
1134 if (cpu == tcpu)
1135 break;
1136 if (group_map[tcpu] == group && cpu_distance_fn &&
1137 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1138 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1139 group++;
1140 nr_groups = max(nr_groups, group + 1);
1141 goto next_group;
1142 }
1143 }
1144 group_map[cpu] = group;
1145 group_cnt[group]++;
1146 }
1147
1148 /*
1149 * Expand unit size until address space usage goes over 75%
1150 * and then as much as possible without using more address
1151 * space.
1152 */
1153 last_allocs = INT_MAX;
1154 for (upa = max_upa; upa; upa--) {
1155 int allocs = 0, wasted = 0;
1156
1157 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1158 continue;
1159
1160 for (group = 0; group < nr_groups; group++) {
1161 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1162 allocs += this_allocs;
1163 wasted += this_allocs * upa - group_cnt[group];
1164 }
1165
1166 /*
1167 * Don't accept if wastage is over 1/3. The
1168 * greater-than comparison ensures upa==1 always
1169 * passes the following check.
1170 */
1171 if (wasted > num_possible_cpus() / 3)
1172 continue;
1173
1174 /* and then don't consume more memory */
1175 if (allocs > last_allocs)
1176 break;
1177 last_allocs = allocs;
1178 best_upa = upa;
1179 }
1180 upa = best_upa;
1181
1182 /* allocate and fill alloc_info */
1183 for (group = 0; group < nr_groups; group++)
1184 nr_units += roundup(group_cnt[group], upa);
1185
1186 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1187 if (!ai)
1188 return ERR_PTR(-ENOMEM);
1189 cpu_map = ai->groups[0].cpu_map;
1190
1191 for (group = 0; group < nr_groups; group++) {
1192 ai->groups[group].cpu_map = cpu_map;
1193 cpu_map += roundup(group_cnt[group], upa);
1194 }
1195
1196 ai->static_size = static_size;
1197 ai->reserved_size = reserved_size;
1198 ai->dyn_size = dyn_size;
1199 ai->unit_size = alloc_size / upa;
1200 ai->atom_size = atom_size;
1201 ai->alloc_size = alloc_size;
1202
1203 for (group = 0, unit = 0; group_cnt[group]; group++) {
1204 struct pcpu_group_info *gi = &ai->groups[group];
1205
1206 /*
1207 * Initialize base_offset as if all groups are located
1208 * back-to-back. The caller should update this to
1209 * reflect actual allocation.
1210 */
1211 gi->base_offset = unit * ai->unit_size;
1212
1213 for_each_possible_cpu(cpu)
1214 if (group_map[cpu] == group)
1215 gi->cpu_map[gi->nr_units++] = cpu;
1216 gi->nr_units = roundup(gi->nr_units, upa);
1217 unit += gi->nr_units;
1218 }
1219 BUG_ON(unit != nr_units);
1220
1221 return ai;
1222}
1223
1224/**
1225 * pcpu_dump_alloc_info - print out information about pcpu_alloc_info 1080 * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
1226 * @lvl: loglevel 1081 * @lvl: loglevel
1227 * @ai: allocation info to dump 1082 * @ai: allocation info to dump
@@ -1363,7 +1218,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1363 1218
1364 /* sanity checks */ 1219 /* sanity checks */
1365 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); 1220 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
1221#ifdef CONFIG_SMP
1366 PCPU_SETUP_BUG_ON(!ai->static_size); 1222 PCPU_SETUP_BUG_ON(!ai->static_size);
1223#endif
1367 PCPU_SETUP_BUG_ON(!base_addr); 1224 PCPU_SETUP_BUG_ON(!base_addr);
1368 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); 1225 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
1369 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); 1226 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
@@ -1488,6 +1345,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
1488 return 0; 1345 return 0;
1489} 1346}
1490 1347
1348#ifdef CONFIG_SMP
1349
1491const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { 1350const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
1492 [PCPU_FC_AUTO] = "auto", 1351 [PCPU_FC_AUTO] = "auto",
1493 [PCPU_FC_EMBED] = "embed", 1352 [PCPU_FC_EMBED] = "embed",
@@ -1515,8 +1374,180 @@ static int __init percpu_alloc_setup(char *str)
1515} 1374}
1516early_param("percpu_alloc", percpu_alloc_setup); 1375early_param("percpu_alloc", percpu_alloc_setup);
1517 1376
1377/*
1378 * pcpu_embed_first_chunk() is used by the generic percpu setup.
1379 * Build it if needed by the arch config or the generic setup is going
1380 * to be used.
1381 */
1518#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ 1382#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
1519 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) 1383 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
1384#define BUILD_EMBED_FIRST_CHUNK
1385#endif
1386
1387/* build pcpu_page_first_chunk() iff needed by the arch config */
1388#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
1389#define BUILD_PAGE_FIRST_CHUNK
1390#endif
1391
1392/* pcpu_build_alloc_info() is used by both embed and page first chunk */
1393#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
1394/**
1395 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
1396 * @reserved_size: the size of reserved percpu area in bytes
1397 * @dyn_size: minimum free size for dynamic allocation in bytes
1398 * @atom_size: allocation atom size
1399 * @cpu_distance_fn: callback to determine distance between cpus, optional
1400 *
1401 * This function determines grouping of units, their mappings to cpus
1402 * and other parameters considering needed percpu size, allocation
1403 * atom size and distances between CPUs.
1404 *
1405 * Groups are always mutliples of atom size and CPUs which are of
1406 * LOCAL_DISTANCE both ways are grouped together and share space for
1407 * units in the same group. The returned configuration is guaranteed
1408 * to have CPUs on different nodes on different groups and >=75% usage
1409 * of allocated virtual address space.
1410 *
1411 * RETURNS:
1412 * On success, pointer to the new allocation_info is returned. On
1413 * failure, ERR_PTR value is returned.
1414 */
1415static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1416 size_t reserved_size, size_t dyn_size,
1417 size_t atom_size,
1418 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
1419{
1420 static int group_map[NR_CPUS] __initdata;
1421 static int group_cnt[NR_CPUS] __initdata;
1422 const size_t static_size = __per_cpu_end - __per_cpu_start;
1423 int nr_groups = 1, nr_units = 0;
1424 size_t size_sum, min_unit_size, alloc_size;
1425 int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
1426 int last_allocs, group, unit;
1427 unsigned int cpu, tcpu;
1428 struct pcpu_alloc_info *ai;
1429 unsigned int *cpu_map;
1430
1431 /* this function may be called multiple times */
1432 memset(group_map, 0, sizeof(group_map));
1433 memset(group_cnt, 0, sizeof(group_cnt));
1434
1435 /* calculate size_sum and ensure dyn_size is enough for early alloc */
1436 size_sum = PFN_ALIGN(static_size + reserved_size +
1437 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
1438 dyn_size = size_sum - static_size - reserved_size;
1439
1440 /*
1441 * Determine min_unit_size, alloc_size and max_upa such that
1442 * alloc_size is multiple of atom_size and is the smallest
1443 * which can accomodate 4k aligned segments which are equal to
1444 * or larger than min_unit_size.
1445 */
1446 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
1447
1448 alloc_size = roundup(min_unit_size, atom_size);
1449 upa = alloc_size / min_unit_size;
1450 while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1451 upa--;
1452 max_upa = upa;
1453
1454 /* group cpus according to their proximity */
1455 for_each_possible_cpu(cpu) {
1456 group = 0;
1457 next_group:
1458 for_each_possible_cpu(tcpu) {
1459 if (cpu == tcpu)
1460 break;
1461 if (group_map[tcpu] == group && cpu_distance_fn &&
1462 (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
1463 cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
1464 group++;
1465 nr_groups = max(nr_groups, group + 1);
1466 goto next_group;
1467 }
1468 }
1469 group_map[cpu] = group;
1470 group_cnt[group]++;
1471 }
1472
1473 /*
1474 * Expand unit size until address space usage goes over 75%
1475 * and then as much as possible without using more address
1476 * space.
1477 */
1478 last_allocs = INT_MAX;
1479 for (upa = max_upa; upa; upa--) {
1480 int allocs = 0, wasted = 0;
1481
1482 if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
1483 continue;
1484
1485 for (group = 0; group < nr_groups; group++) {
1486 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
1487 allocs += this_allocs;
1488 wasted += this_allocs * upa - group_cnt[group];
1489 }
1490
1491 /*
1492 * Don't accept if wastage is over 1/3. The
1493 * greater-than comparison ensures upa==1 always
1494 * passes the following check.
1495 */
1496 if (wasted > num_possible_cpus() / 3)
1497 continue;
1498
1499 /* and then don't consume more memory */
1500 if (allocs > last_allocs)
1501 break;
1502 last_allocs = allocs;
1503 best_upa = upa;
1504 }
1505 upa = best_upa;
1506
1507 /* allocate and fill alloc_info */
1508 for (group = 0; group < nr_groups; group++)
1509 nr_units += roundup(group_cnt[group], upa);
1510
1511 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
1512 if (!ai)
1513 return ERR_PTR(-ENOMEM);
1514 cpu_map = ai->groups[0].cpu_map;
1515
1516 for (group = 0; group < nr_groups; group++) {
1517 ai->groups[group].cpu_map = cpu_map;
1518 cpu_map += roundup(group_cnt[group], upa);
1519 }
1520
1521 ai->static_size = static_size;
1522 ai->reserved_size = reserved_size;
1523 ai->dyn_size = dyn_size;
1524 ai->unit_size = alloc_size / upa;
1525 ai->atom_size = atom_size;
1526 ai->alloc_size = alloc_size;
1527
1528 for (group = 0, unit = 0; group_cnt[group]; group++) {
1529 struct pcpu_group_info *gi = &ai->groups[group];
1530
1531 /*
1532 * Initialize base_offset as if all groups are located
1533 * back-to-back. The caller should update this to
1534 * reflect actual allocation.
1535 */
1536 gi->base_offset = unit * ai->unit_size;
1537
1538 for_each_possible_cpu(cpu)
1539 if (group_map[cpu] == group)
1540 gi->cpu_map[gi->nr_units++] = cpu;
1541 gi->nr_units = roundup(gi->nr_units, upa);
1542 unit += gi->nr_units;
1543 }
1544 BUG_ON(unit != nr_units);
1545
1546 return ai;
1547}
1548#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
1549
1550#if defined(BUILD_EMBED_FIRST_CHUNK)
1520/** 1551/**
1521 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem 1552 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
1522 * @reserved_size: the size of reserved percpu area in bytes 1553 * @reserved_size: the size of reserved percpu area in bytes
@@ -1645,10 +1676,9 @@ out_free:
1645 free_bootmem(__pa(areas), areas_size); 1676 free_bootmem(__pa(areas), areas_size);
1646 return rc; 1677 return rc;
1647} 1678}
1648#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || 1679#endif /* BUILD_EMBED_FIRST_CHUNK */
1649 !CONFIG_HAVE_SETUP_PER_CPU_AREA */
1650 1680
1651#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK 1681#ifdef BUILD_PAGE_FIRST_CHUNK
1652/** 1682/**
1653 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages 1683 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
1654 * @reserved_size: the size of reserved percpu area in bytes 1684 * @reserved_size: the size of reserved percpu area in bytes
@@ -1756,10 +1786,11 @@ out_free_ar:
1756 pcpu_free_alloc_info(ai); 1786 pcpu_free_alloc_info(ai);
1757 return rc; 1787 return rc;
1758} 1788}
1759#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ 1789#endif /* BUILD_PAGE_FIRST_CHUNK */
1760 1790
1791#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1761/* 1792/*
1762 * Generic percpu area setup. 1793 * Generic SMP percpu area setup.
1763 * 1794 *
1764 * The embedding helper is used because its behavior closely resembles 1795 * The embedding helper is used because its behavior closely resembles
1765 * the original non-dynamic generic percpu area setup. This is 1796 * the original non-dynamic generic percpu area setup. This is
@@ -1770,7 +1801,6 @@ out_free_ar:
1770 * on the physical linear memory mapping which uses large page 1801 * on the physical linear memory mapping which uses large page
1771 * mappings on applicable archs. 1802 * mappings on applicable archs.
1772 */ 1803 */
1773#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
1774unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 1804unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
1775EXPORT_SYMBOL(__per_cpu_offset); 1805EXPORT_SYMBOL(__per_cpu_offset);
1776 1806
@@ -1799,13 +1829,48 @@ void __init setup_per_cpu_areas(void)
1799 PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, 1829 PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
1800 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); 1830 pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
1801 if (rc < 0) 1831 if (rc < 0)
1802 panic("Failed to initialized percpu areas."); 1832 panic("Failed to initialize percpu areas.");
1803 1833
1804 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 1834 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1805 for_each_possible_cpu(cpu) 1835 for_each_possible_cpu(cpu)
1806 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 1836 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
1807} 1837}
1808#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ 1838#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
1839
1840#else /* CONFIG_SMP */
1841
1842/*
1843 * UP percpu area setup.
1844 *
1845 * UP always uses km-based percpu allocator with identity mapping.
1846 * Static percpu variables are indistinguishable from the usual static
1847 * variables and don't require any special preparation.
1848 */
1849void __init setup_per_cpu_areas(void)
1850{
1851 const size_t unit_size =
1852 roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
1853 PERCPU_DYNAMIC_RESERVE));
1854 struct pcpu_alloc_info *ai;
1855 void *fc;
1856
1857 ai = pcpu_alloc_alloc_info(1, 1);
1858 fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
1859 if (!ai || !fc)
1860 panic("Failed to allocate memory for percpu areas.");
1861
1862 ai->dyn_size = unit_size;
1863 ai->unit_size = unit_size;
1864 ai->atom_size = unit_size;
1865 ai->alloc_size = unit_size;
1866 ai->groups[0].nr_units = 1;
1867 ai->groups[0].cpu_map[0] = 0;
1868
1869 if (pcpu_setup_first_chunk(ai, fc) < 0)
1870 panic("Failed to initialize percpu areas.");
1871}
1872
1873#endif /* CONFIG_SMP */
1809 1874
1810/* 1875/*
1811 * First and reserved chunks are initialized with temporary allocation 1876 * First and reserved chunks are initialized with temporary allocation
diff --git a/mm/percpu_up.c b/mm/percpu_up.c
deleted file mode 100644
index db884fae5721..000000000000
--- a/mm/percpu_up.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
3 */
4
5#include <linux/module.h>
6#include <linux/percpu.h>
7#include <linux/slab.h>
8
9void __percpu *__alloc_percpu(size_t size, size_t align)
10{
11 /*
12 * Can't easily make larger alignment work with kmalloc. WARN
13 * on it. Larger alignment should only be used for module
14 * percpu sections on SMP for which this path isn't used.
15 */
16 WARN_ON_ONCE(align > SMP_CACHE_BYTES);
17 return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
18}
19EXPORT_SYMBOL_GPL(__alloc_percpu);
20
21void free_percpu(void __percpu *p)
22{
23 kfree(this_cpu_ptr(p));
24}
25EXPORT_SYMBOL_GPL(free_percpu);
26
27phys_addr_t per_cpu_ptr_to_phys(void *addr)
28{
29 return __pa(addr);
30}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d8087f0db507..9f909622a25e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2065,6 +2065,7 @@ void free_vm_area(struct vm_struct *area)
2065} 2065}
2066EXPORT_SYMBOL_GPL(free_vm_area); 2066EXPORT_SYMBOL_GPL(free_vm_area);
2067 2067
2068#ifdef CONFIG_SMP
2068static struct vmap_area *node_to_va(struct rb_node *n) 2069static struct vmap_area *node_to_va(struct rb_node *n)
2069{ 2070{
2070 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; 2071 return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
@@ -2345,6 +2346,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
2345 free_vm_area(vms[i]); 2346 free_vm_area(vms[i]);
2346 kfree(vms); 2347 kfree(vms);
2347} 2348}
2349#endif /* CONFIG_SMP */
2348 2350
2349#ifdef CONFIG_PROC_FS 2351#ifdef CONFIG_PROC_FS
2350static void *s_start(struct seq_file *m, loff_t *pos) 2352static void *s_start(struct seq_file *m, loff_t *pos)