aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-03-06 00:33:59 -0500
committerTejun Heo <tj@kernel.org>2009-03-06 00:33:59 -0500
commit6b19b0c2400437a3c10059ede0e59b517092e1bd (patch)
tree4fc1868fc8fde37315b54c6d416b48000621af9d
parentedcb463997ed7b2ffa3bac76e3e75957318f2e01 (diff)
x86, percpu: setup reserved percpu area for x86_64
Impact: fix relocation overflow during module load x86_64 uses 32bit relocations for symbol access and static percpu symbols whether in core or modules must be inside 2GB of the percpu segement base which the dynamic percpu allocator doesn't guarantee. This patch makes x86_64 reserve PERCPU_MODULE_RESERVE bytes in the first chunk so that module percpu areas are always allocated from the first chunk which is always inside the relocatable range. This problem exists for any percpu allocator but is easily triggered when using the embedding allocator because the second chunk is located beyond 2GB on it. This patch also changes the meaning of PERCPU_DYNAMIC_RESERVE such that it only indicates the size of the area to reserve for dynamic allocation as static and dynamic areas can be separate. New PERCPU_DYNAMIC_RESERVED is increased by 4k for both 32 and 64bits as the reserved area separation eats away some allocatable space and having slightly more headroom (currently between 4 and 8k after minimal boot sans module area) makes sense for common case performance. x86_32 can address anywhere from anywhere and doesn't need reserving. Mike Galbraith first reported the problem first and bisected it to the embedding percpu allocator commit. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Mike Galbraith <efault@gmx.de> Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
-rw-r--r--arch/x86/kernel/setup_percpu.c37
-rw-r--r--include/linux/percpu.h35
2 files changed, 40 insertions, 32 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index dd4eabc747c8..efa615f2bf43 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -42,6 +42,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
42}; 42};
43EXPORT_SYMBOL(__per_cpu_offset); 43EXPORT_SYMBOL(__per_cpu_offset);
44 44
45/*
46 * On x86_64 symbols referenced from code should be reachable using
47 * 32bit relocations. Reserve space for static percpu variables in
48 * modules so that they are always served from the first chunk which
49 * is located at the percpu segment base. On x86_32, anything can
50 * address anywhere. No need to reserve space in the first chunk.
51 */
52#ifdef CONFIG_X86_64
53#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE
54#else
55#define PERCPU_FIRST_CHUNK_RESERVE 0
56#endif
57
45/** 58/**
46 * pcpu_need_numa - determine percpu allocation needs to consider NUMA 59 * pcpu_need_numa - determine percpu allocation needs to consider NUMA
47 * 60 *
@@ -141,7 +154,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size)
141{ 154{
142 static struct vm_struct vm; 155 static struct vm_struct vm;
143 pg_data_t *last; 156 pg_data_t *last;
144 size_t ptrs_size; 157 size_t ptrs_size, dyn_size;
145 unsigned int cpu; 158 unsigned int cpu;
146 ssize_t ret; 159 ssize_t ret;
147 160
@@ -169,12 +182,14 @@ proceed:
169 * Currently supports only single page. Supporting multiple 182 * Currently supports only single page. Supporting multiple
170 * pages won't be too difficult if it ever becomes necessary. 183 * pages won't be too difficult if it ever becomes necessary.
171 */ 184 */
172 pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); 185 pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
186 PERCPU_DYNAMIC_RESERVE);
173 if (pcpur_size > PMD_SIZE) { 187 if (pcpur_size > PMD_SIZE) {
174 pr_warning("PERCPU: static data is larger than large page, " 188 pr_warning("PERCPU: static data is larger than large page, "
175 "can't use large page\n"); 189 "can't use large page\n");
176 return -EINVAL; 190 return -EINVAL;
177 } 191 }
192 dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
178 193
179 /* allocate pointer array and alloc large pages */ 194 /* allocate pointer array and alloc large pages */
180 ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); 195 ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
@@ -217,8 +232,9 @@ proceed:
217 pr_info("PERCPU: Remapped at %p with large pages, static data " 232 pr_info("PERCPU: Remapped at %p with large pages, static data "
218 "%zu bytes\n", vm.addr, static_size); 233 "%zu bytes\n", vm.addr, static_size);
219 234
220 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 0, PMD_SIZE, 235 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
221 pcpur_size - static_size, vm.addr, NULL); 236 PERCPU_FIRST_CHUNK_RESERVE,
237 PMD_SIZE, dyn_size, vm.addr, NULL);
222 goto out_free_ar; 238 goto out_free_ar;
223 239
224enomem: 240enomem:
@@ -276,9 +292,10 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
276 return -EINVAL; 292 return -EINVAL;
277 293
278 /* allocate and copy */ 294 /* allocate and copy */
279 pcpue_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); 295 pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
296 PERCPU_DYNAMIC_RESERVE);
280 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 297 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
281 dyn_size = pcpue_size - static_size; 298 dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
282 299
283 pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, 300 pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size,
284 PAGE_SIZE); 301 PAGE_SIZE);
@@ -297,7 +314,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size)
297 pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", 314 pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
298 pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); 315 pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
299 316
300 return pcpu_setup_first_chunk(pcpue_get_page, static_size, 0, 317 return pcpu_setup_first_chunk(pcpue_get_page, static_size,
318 PERCPU_FIRST_CHUNK_RESERVE,
301 pcpue_unit_size, dyn_size, 319 pcpue_unit_size, dyn_size,
302 pcpue_ptr, NULL); 320 pcpue_ptr, NULL);
303} 321}
@@ -356,8 +374,9 @@ static ssize_t __init setup_pcpu_4k(size_t static_size)
356 pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", 374 pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
357 pcpu4k_nr_static_pages, static_size); 375 pcpu4k_nr_static_pages, static_size);
358 376
359 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, -1, -1, 377 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
360 NULL, pcpu4k_populate_pte); 378 PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL,
379 pcpu4k_populate_pte);
361 goto out_free_ar; 380 goto out_free_ar;
362 381
363enomem: 382enomem:
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 8ff15153ae20..54a968b4b924 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -85,31 +85,20 @@
85 85
86/* 86/*
87 * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy 87 * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
88 * back on the first chunk if arch is manually allocating and mapping 88 * back on the first chunk for dynamic percpu allocation if arch is
89 * it for faster access (as a part of large page mapping for example). 89 * manually allocating and mapping it for faster access (as a part of
90 * Note that dynamic percpu allocator covers both static and dynamic 90 * large page mapping for example).
91 * areas, so these values are bigger than PERCPU_MODULE_RESERVE.
92 * 91 *
93 * On typical configuration with modules, the following values leave 92 * The following values give between one and two pages of free space
94 * about 8k of free space on the first chunk after boot on both x86_32 93 * after typical minimal boot (2-way SMP, single disk and NIC) with
95 * and 64 when module support is enabled. When module support is 94 * both defconfig and a distro config on x86_64 and 32. More
96 * disabled, it's much tighter. 95 * intelligent way to determine this would be nice.
97 */ 96 */
98#ifndef PERCPU_DYNAMIC_RESERVE 97#if BITS_PER_LONG > 32
99# if BITS_PER_LONG > 32 98#define PERCPU_DYNAMIC_RESERVE (20 << 10)
100# ifdef CONFIG_MODULES 99#else
101# define PERCPU_DYNAMIC_RESERVE (24 << 10) 100#define PERCPU_DYNAMIC_RESERVE (12 << 10)
102# else 101#endif
103# define PERCPU_DYNAMIC_RESERVE (16 << 10)
104# endif
105# else
106# ifdef CONFIG_MODULES
107# define PERCPU_DYNAMIC_RESERVE (16 << 10)
108# else
109# define PERCPU_DYNAMIC_RESERVE (8 << 10)
110# endif
111# endif
112#endif /* PERCPU_DYNAMIC_RESERVE */
113 102
114extern void *pcpu_base_addr; 103extern void *pcpu_base_addr;
115 104