diff options
| author | Ingo Molnar <mingo@elte.hu> | 2009-09-07 02:19:51 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-09-07 02:19:51 -0400 |
| commit | a1922ed661ab2c1637d0b10cde933bd9cd33d965 (patch) | |
| tree | 0f1777542b385ebefd30b3586d830fd8ed6fda5b /arch/x86/kernel/setup_percpu.c | |
| parent | 75e33751ca8bbb72dd6f1a74d2810ddc8cbe4bdf (diff) | |
| parent | d28daf923ac5e4a0d7cecebae56f3e339189366b (diff) | |
Merge branch 'tracing/core' into tracing/hw-breakpoints
Conflicts:
arch/Kconfig
kernel/trace/trace.h
Merge reason: resolve the conflicts, plus adopt to the new
ring-buffer APIs.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/setup_percpu.c')
| -rw-r--r-- | arch/x86/kernel/setup_percpu.c | 221 |
1 files changed, 163 insertions, 58 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 9c3f0823e6aa..07d81916f212 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -124,7 +124,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
| 124 | } | 124 | } |
| 125 | 125 | ||
| 126 | /* | 126 | /* |
| 127 | * Remap allocator | 127 | * Large page remap allocator |
| 128 | * | 128 | * |
| 129 | * This allocator uses PMD page as unit. A PMD page is allocated for | 129 | * This allocator uses PMD page as unit. A PMD page is allocated for |
| 130 | * each cpu and each is remapped into vmalloc area using PMD mapping. | 130 | * each cpu and each is remapped into vmalloc area using PMD mapping. |
| @@ -137,105 +137,185 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
| 137 | * better than only using 4k mappings while still being NUMA friendly. | 137 | * better than only using 4k mappings while still being NUMA friendly. |
| 138 | */ | 138 | */ |
| 139 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 139 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
| 140 | static size_t pcpur_size __initdata; | 140 | struct pcpul_ent { |
| 141 | static void **pcpur_ptrs __initdata; | 141 | unsigned int cpu; |
| 142 | void *ptr; | ||
| 143 | }; | ||
| 144 | |||
| 145 | static size_t pcpul_size; | ||
| 146 | static struct pcpul_ent *pcpul_map; | ||
| 147 | static struct vm_struct pcpul_vm; | ||
| 142 | 148 | ||
| 143 | static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) | 149 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) |
| 144 | { | 150 | { |
| 145 | size_t off = (size_t)pageno << PAGE_SHIFT; | 151 | size_t off = (size_t)pageno << PAGE_SHIFT; |
| 146 | 152 | ||
| 147 | if (off >= pcpur_size) | 153 | if (off >= pcpul_size) |
| 148 | return NULL; | 154 | return NULL; |
| 149 | 155 | ||
| 150 | return virt_to_page(pcpur_ptrs[cpu] + off); | 156 | return virt_to_page(pcpul_map[cpu].ptr + off); |
| 151 | } | 157 | } |
| 152 | 158 | ||
| 153 | static ssize_t __init setup_pcpu_remap(size_t static_size) | 159 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
| 154 | { | 160 | { |
| 155 | static struct vm_struct vm; | 161 | size_t map_size, dyn_size; |
| 156 | size_t ptrs_size, dyn_size; | ||
| 157 | unsigned int cpu; | 162 | unsigned int cpu; |
| 163 | int i, j; | ||
| 158 | ssize_t ret; | 164 | ssize_t ret; |
| 159 | 165 | ||
| 160 | /* | 166 | if (!chosen) { |
| 161 | * If large page isn't supported, there's no benefit in doing | 167 | size_t vm_size = VMALLOC_END - VMALLOC_START; |
| 162 | * this. Also, on non-NUMA, embedding is better. | 168 | size_t tot_size = nr_cpu_ids * PMD_SIZE; |
| 163 | * | 169 | |
| 164 | * NOTE: disabled for now. | 170 | /* on non-NUMA, embedding is better */ |
| 165 | */ | 171 | if (!pcpu_need_numa()) |
| 166 | if (true || !cpu_has_pse || !pcpu_need_numa()) | 172 | return -EINVAL; |
| 173 | |||
| 174 | /* don't consume more than 20% of vmalloc area */ | ||
| 175 | if (tot_size > vm_size / 5) { | ||
| 176 | pr_info("PERCPU: too large chunk size %zuMB for " | ||
| 177 | "large page remap\n", tot_size >> 20); | ||
| 178 | return -EINVAL; | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | /* need PSE */ | ||
| 183 | if (!cpu_has_pse) { | ||
| 184 | pr_warning("PERCPU: lpage allocator requires PSE\n"); | ||
| 167 | return -EINVAL; | 185 | return -EINVAL; |
| 186 | } | ||
| 168 | 187 | ||
| 169 | /* | 188 | /* |
| 170 | * Currently supports only single page. Supporting multiple | 189 | * Currently supports only single page. Supporting multiple |
| 171 | * pages won't be too difficult if it ever becomes necessary. | 190 | * pages won't be too difficult if it ever becomes necessary. |
| 172 | */ | 191 | */ |
| 173 | pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | 192 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + |
| 174 | PERCPU_DYNAMIC_RESERVE); | 193 | PERCPU_DYNAMIC_RESERVE); |
| 175 | if (pcpur_size > PMD_SIZE) { | 194 | if (pcpul_size > PMD_SIZE) { |
| 176 | pr_warning("PERCPU: static data is larger than large page, " | 195 | pr_warning("PERCPU: static data is larger than large page, " |
| 177 | "can't use large page\n"); | 196 | "can't use large page\n"); |
| 178 | return -EINVAL; | 197 | return -EINVAL; |
| 179 | } | 198 | } |
| 180 | dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | 199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; |
| 181 | 200 | ||
| 182 | /* allocate pointer array and alloc large pages */ | 201 | /* allocate pointer array and alloc large pages */ |
| 183 | ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); | 202 | map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); |
| 184 | pcpur_ptrs = alloc_bootmem(ptrs_size); | 203 | pcpul_map = alloc_bootmem(map_size); |
| 185 | 204 | ||
| 186 | for_each_possible_cpu(cpu) { | 205 | for_each_possible_cpu(cpu) { |
| 187 | pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); | 206 | pcpul_map[cpu].cpu = cpu; |
| 188 | if (!pcpur_ptrs[cpu]) | 207 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, |
| 208 | PMD_SIZE); | ||
| 209 | if (!pcpul_map[cpu].ptr) { | ||
| 210 | pr_warning("PERCPU: failed to allocate large page " | ||
| 211 | "for cpu%u\n", cpu); | ||
| 189 | goto enomem; | 212 | goto enomem; |
| 213 | } | ||
| 190 | 214 | ||
| 191 | /* | 215 | /* |
| 192 | * Only use pcpur_size bytes and give back the rest. | 216 | * Only use pcpul_size bytes and give back the rest. |
| 193 | * | 217 | * |
| 194 | * Ingo: The 2MB up-rounding bootmem is needed to make | 218 | * Ingo: The 2MB up-rounding bootmem is needed to make |
| 195 | * sure the partial 2MB page is still fully RAM - it's | 219 | * sure the partial 2MB page is still fully RAM - it's |
| 196 | * not well-specified to have a PAT-incompatible area | 220 | * not well-specified to have a PAT-incompatible area |
| 197 | * (unmapped RAM, device memory, etc.) in that hole. | 221 | * (unmapped RAM, device memory, etc.) in that hole. |
| 198 | */ | 222 | */ |
| 199 | free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), | 223 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), |
| 200 | PMD_SIZE - pcpur_size); | 224 | PMD_SIZE - pcpul_size); |
| 201 | 225 | ||
| 202 | memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); | 226 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); |
| 203 | } | 227 | } |
| 204 | 228 | ||
| 205 | /* allocate address and map */ | 229 | /* allocate address and map */ |
| 206 | vm.flags = VM_ALLOC; | 230 | pcpul_vm.flags = VM_ALLOC; |
| 207 | vm.size = num_possible_cpus() * PMD_SIZE; | 231 | pcpul_vm.size = nr_cpu_ids * PMD_SIZE; |
| 208 | vm_area_register_early(&vm, PMD_SIZE); | 232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); |
| 209 | 233 | ||
| 210 | for_each_possible_cpu(cpu) { | 234 | for_each_possible_cpu(cpu) { |
| 211 | pmd_t *pmd; | 235 | pmd_t *pmd, pmd_v; |
| 212 | 236 | ||
| 213 | pmd = populate_extra_pmd((unsigned long)vm.addr | 237 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + |
| 214 | + cpu * PMD_SIZE); | 238 | cpu * PMD_SIZE); |
| 215 | set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), | 239 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), |
| 216 | PAGE_KERNEL_LARGE)); | 240 | PAGE_KERNEL_LARGE); |
| 241 | set_pmd(pmd, pmd_v); | ||
| 217 | } | 242 | } |
| 218 | 243 | ||
| 219 | /* we're ready, commit */ | 244 | /* we're ready, commit */ |
| 220 | pr_info("PERCPU: Remapped at %p with large pages, static data " | 245 | pr_info("PERCPU: Remapped at %p with large pages, static data " |
| 221 | "%zu bytes\n", vm.addr, static_size); | 246 | "%zu bytes\n", pcpul_vm.addr, static_size); |
| 222 | 247 | ||
| 223 | ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, | 248 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, |
| 224 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | 249 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, |
| 225 | PMD_SIZE, vm.addr, NULL); | 250 | PMD_SIZE, pcpul_vm.addr, NULL); |
| 226 | goto out_free_ar; | 251 | |
| 252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
| 253 | for (i = 0; i < nr_cpu_ids - 1; i++) | ||
| 254 | for (j = i + 1; j < nr_cpu_ids; j++) | ||
| 255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
| 256 | struct pcpul_ent tmp = pcpul_map[i]; | ||
| 257 | pcpul_map[i] = pcpul_map[j]; | ||
| 258 | pcpul_map[j] = tmp; | ||
| 259 | } | ||
| 260 | |||
| 261 | return ret; | ||
| 227 | 262 | ||
| 228 | enomem: | 263 | enomem: |
| 229 | for_each_possible_cpu(cpu) | 264 | for_each_possible_cpu(cpu) |
| 230 | if (pcpur_ptrs[cpu]) | 265 | if (pcpul_map[cpu].ptr) |
| 231 | free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); | 266 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); |
| 232 | ret = -ENOMEM; | 267 | free_bootmem(__pa(pcpul_map), map_size); |
| 233 | out_free_ar: | 268 | return -ENOMEM; |
| 234 | free_bootmem(__pa(pcpur_ptrs), ptrs_size); | 269 | } |
| 235 | return ret; | 270 | |
| 271 | /** | ||
| 272 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
| 273 | * @kaddr: the kernel address in question | ||
| 274 | * | ||
| 275 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
| 276 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
| 277 | * mapping such that the same physical page is not mapped under | ||
| 278 | * different attributes. | ||
| 279 | * | ||
| 280 | * The recycled area is always at the tail of a partially used PMD | ||
| 281 | * page. | ||
| 282 | * | ||
| 283 | * RETURNS: | ||
| 284 | * Address of corresponding remapped pcpu address if match is found; | ||
| 285 | * otherwise, NULL. | ||
| 286 | */ | ||
| 287 | void *pcpu_lpage_remapped(void *kaddr) | ||
| 288 | { | ||
| 289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | ||
| 290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | ||
| 291 | int left = 0, right = nr_cpu_ids - 1; | ||
| 292 | int pos; | ||
| 293 | |||
| 294 | /* pcpul in use at all? */ | ||
| 295 | if (!pcpul_map) | ||
| 296 | return NULL; | ||
| 297 | |||
| 298 | /* okay, perform binary search */ | ||
| 299 | while (left <= right) { | ||
| 300 | pos = (left + right) / 2; | ||
| 301 | |||
| 302 | if (pcpul_map[pos].ptr < pmd_addr) | ||
| 303 | left = pos + 1; | ||
| 304 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
| 305 | right = pos - 1; | ||
| 306 | else { | ||
| 307 | /* it shouldn't be in the area for the first chunk */ | ||
| 308 | WARN_ON(offset < pcpul_size); | ||
| 309 | |||
| 310 | return pcpul_vm.addr + | ||
| 311 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
| 312 | } | ||
| 313 | } | ||
| 314 | |||
| 315 | return NULL; | ||
| 236 | } | 316 | } |
| 237 | #else | 317 | #else |
| 238 | static ssize_t __init setup_pcpu_remap(size_t static_size) | 318 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) |
| 239 | { | 319 | { |
| 240 | return -EINVAL; | 320 | return -EINVAL; |
| 241 | } | 321 | } |
| @@ -249,7 +329,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) | |||
| 249 | * mapping so that it can use PMD mapping without additional TLB | 329 | * mapping so that it can use PMD mapping without additional TLB |
| 250 | * pressure. | 330 | * pressure. |
| 251 | */ | 331 | */ |
| 252 | static ssize_t __init setup_pcpu_embed(size_t static_size) | 332 | static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) |
| 253 | { | 333 | { |
| 254 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | 334 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; |
| 255 | 335 | ||
| @@ -258,7 +338,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) | |||
| 258 | * this. Also, embedding allocation doesn't play well with | 338 | * this. Also, embedding allocation doesn't play well with |
| 259 | * NUMA. | 339 | * NUMA. |
| 260 | */ | 340 | */ |
| 261 | if (!cpu_has_pse || pcpu_need_numa()) | 341 | if (!chosen && (!cpu_has_pse || pcpu_need_numa())) |
| 262 | return -EINVAL; | 342 | return -EINVAL; |
| 263 | 343 | ||
| 264 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, | 344 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, |
| @@ -297,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
| 297 | pcpu4k_nr_static_pages = PFN_UP(static_size); | 377 | pcpu4k_nr_static_pages = PFN_UP(static_size); |
| 298 | 378 | ||
| 299 | /* unaligned allocations can't be freed, round up to page size */ | 379 | /* unaligned allocations can't be freed, round up to page size */ |
| 300 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() | 380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids |
| 301 | * sizeof(pcpu4k_pages[0])); | 381 | * sizeof(pcpu4k_pages[0])); |
| 302 | pcpu4k_pages = alloc_bootmem(pages_size); | 382 | pcpu4k_pages = alloc_bootmem(pages_size); |
| 303 | 383 | ||
| @@ -308,8 +388,11 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) | |||
| 308 | void *ptr; | 388 | void *ptr; |
| 309 | 389 | ||
| 310 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); | 390 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); |
| 311 | if (!ptr) | 391 | if (!ptr) { |
| 392 | pr_warning("PERCPU: failed to allocate " | ||
| 393 | "4k page for cpu%u\n", cpu); | ||
| 312 | goto enomem; | 394 | goto enomem; |
| 395 | } | ||
| 313 | 396 | ||
| 314 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | 397 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); |
| 315 | pcpu4k_pages[j++] = virt_to_page(ptr); | 398 | pcpu4k_pages[j++] = virt_to_page(ptr); |
| @@ -333,6 +416,16 @@ out_free_ar: | |||
| 333 | return ret; | 416 | return ret; |
| 334 | } | 417 | } |
| 335 | 418 | ||
| 419 | /* for explicit first chunk allocator selection */ | ||
| 420 | static char pcpu_chosen_alloc[16] __initdata; | ||
| 421 | |||
| 422 | static int __init percpu_alloc_setup(char *str) | ||
| 423 | { | ||
| 424 | strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); | ||
| 425 | return 0; | ||
| 426 | } | ||
| 427 | early_param("percpu_alloc", percpu_alloc_setup); | ||
| 428 | |||
| 336 | static inline void setup_percpu_segment(int cpu) | 429 | static inline void setup_percpu_segment(int cpu) |
| 337 | { | 430 | { |
| 338 | #ifdef CONFIG_X86_32 | 431 | #ifdef CONFIG_X86_32 |
| @@ -346,11 +439,6 @@ static inline void setup_percpu_segment(int cpu) | |||
| 346 | #endif | 439 | #endif |
| 347 | } | 440 | } |
| 348 | 441 | ||
| 349 | /* | ||
| 350 | * Great future plan: | ||
| 351 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | ||
| 352 | * Always point %gs to its beginning | ||
| 353 | */ | ||
| 354 | void __init setup_per_cpu_areas(void) | 442 | void __init setup_per_cpu_areas(void) |
| 355 | { | 443 | { |
| 356 | size_t static_size = __per_cpu_end - __per_cpu_start; | 444 | size_t static_size = __per_cpu_end - __per_cpu_start; |
| @@ -367,9 +455,26 @@ void __init setup_per_cpu_areas(void) | |||
| 367 | * of large page mappings. Please read comments on top of | 455 | * of large page mappings. Please read comments on top of |
| 368 | * each allocator for details. | 456 | * each allocator for details. |
| 369 | */ | 457 | */ |
| 370 | ret = setup_pcpu_remap(static_size); | 458 | ret = -EINVAL; |
| 371 | if (ret < 0) | 459 | if (strlen(pcpu_chosen_alloc)) { |
| 372 | ret = setup_pcpu_embed(static_size); | 460 | if (strcmp(pcpu_chosen_alloc, "4k")) { |
| 461 | if (!strcmp(pcpu_chosen_alloc, "lpage")) | ||
| 462 | ret = setup_pcpu_lpage(static_size, true); | ||
| 463 | else if (!strcmp(pcpu_chosen_alloc, "embed")) | ||
| 464 | ret = setup_pcpu_embed(static_size, true); | ||
| 465 | else | ||
| 466 | pr_warning("PERCPU: unknown allocator %s " | ||
| 467 | "specified\n", pcpu_chosen_alloc); | ||
| 468 | if (ret < 0) | ||
| 469 | pr_warning("PERCPU: %s allocator failed (%zd), " | ||
| 470 | "falling back to 4k\n", | ||
| 471 | pcpu_chosen_alloc, ret); | ||
| 472 | } | ||
| 473 | } else { | ||
| 474 | ret = setup_pcpu_lpage(static_size, false); | ||
| 475 | if (ret < 0) | ||
| 476 | ret = setup_pcpu_embed(static_size, false); | ||
| 477 | } | ||
| 373 | if (ret < 0) | 478 | if (ret < 0) |
| 374 | ret = setup_pcpu_4k(static_size); | 479 | ret = setup_pcpu_4k(static_size); |
| 375 | if (ret < 0) | 480 | if (ret < 0) |
