diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/init.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 41 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/kmemcheck.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/shadow.c | 16 | ||||
-rw-r--r-- | arch/x86/mm/kmemcheck/shadow.h | 2 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 235 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 31 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 8 |
10 files changed, 159 insertions, 195 deletions
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d406c5239019..e71c5cbc8f35 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -266,16 +266,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
266 | if (!after_bootmem) | 266 | if (!after_bootmem) |
267 | find_early_table_space(end, use_pse, use_gbpages); | 267 | find_early_table_space(end, use_pse, use_gbpages); |
268 | 268 | ||
269 | #ifdef CONFIG_X86_32 | ||
270 | for (i = 0; i < nr_range; i++) | ||
271 | kernel_physical_mapping_init(mr[i].start, mr[i].end, | ||
272 | mr[i].page_size_mask); | ||
273 | ret = end; | ||
274 | #else /* CONFIG_X86_64 */ | ||
275 | for (i = 0; i < nr_range; i++) | 269 | for (i = 0; i < nr_range; i++) |
276 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | 270 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, |
277 | mr[i].page_size_mask); | 271 | mr[i].page_size_mask); |
278 | #endif | ||
279 | 272 | ||
280 | #ifdef CONFIG_X86_32 | 273 | #ifdef CONFIG_X86_32 |
281 | early_ioremap_page_table_range_init(); | 274 | early_ioremap_page_table_range_init(); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9a0c258a86be..2226f2c70ea3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
241 | unsigned long page_size_mask) | 241 | unsigned long page_size_mask) |
242 | { | 242 | { |
243 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); | 243 | int use_pse = page_size_mask == (1<<PG_LEVEL_2M); |
244 | unsigned long last_map_addr = end; | ||
244 | unsigned long start_pfn, end_pfn; | 245 | unsigned long start_pfn, end_pfn; |
245 | pgd_t *pgd_base = swapper_pg_dir; | 246 | pgd_t *pgd_base = swapper_pg_dir; |
246 | int pgd_idx, pmd_idx, pte_ofs; | 247 | int pgd_idx, pmd_idx, pte_ofs; |
@@ -341,9 +342,10 @@ repeat: | |||
341 | prot = PAGE_KERNEL_EXEC; | 342 | prot = PAGE_KERNEL_EXEC; |
342 | 343 | ||
343 | pages_4k++; | 344 | pages_4k++; |
344 | if (mapping_iter == 1) | 345 | if (mapping_iter == 1) { |
345 | set_pte(pte, pfn_pte(pfn, init_prot)); | 346 | set_pte(pte, pfn_pte(pfn, init_prot)); |
346 | else | 347 | last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE; |
348 | } else | ||
347 | set_pte(pte, pfn_pte(pfn, prot)); | 349 | set_pte(pte, pfn_pte(pfn, prot)); |
348 | } | 350 | } |
349 | } | 351 | } |
@@ -368,7 +370,7 @@ repeat: | |||
368 | mapping_iter = 2; | 370 | mapping_iter = 2; |
369 | goto repeat; | 371 | goto repeat; |
370 | } | 372 | } |
371 | return 0; | 373 | return last_map_addr; |
372 | } | 374 | } |
373 | 375 | ||
374 | pte_t *kmap_pte; | 376 | pte_t *kmap_pte; |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c246d259822d..5eb1ba74a3a9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -24,43 +24,6 @@ | |||
24 | 24 | ||
25 | #include "physaddr.h" | 25 | #include "physaddr.h" |
26 | 26 | ||
27 | int page_is_ram(unsigned long pagenr) | ||
28 | { | ||
29 | resource_size_t addr, end; | ||
30 | int i; | ||
31 | |||
32 | /* | ||
33 | * A special case is the first 4Kb of memory; | ||
34 | * This is a BIOS owned area, not kernel ram, but generally | ||
35 | * not listed as such in the E820 table. | ||
36 | */ | ||
37 | if (pagenr == 0) | ||
38 | return 0; | ||
39 | |||
40 | /* | ||
41 | * Second special case: Some BIOSen report the PC BIOS | ||
42 | * area (640->1Mb) as ram even though it is not. | ||
43 | */ | ||
44 | if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && | ||
45 | pagenr < (BIOS_END >> PAGE_SHIFT)) | ||
46 | return 0; | ||
47 | |||
48 | for (i = 0; i < e820.nr_map; i++) { | ||
49 | /* | ||
50 | * Not usable memory: | ||
51 | */ | ||
52 | if (e820.map[i].type != E820_RAM) | ||
53 | continue; | ||
54 | addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT; | ||
55 | end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; | ||
56 | |||
57 | |||
58 | if ((pagenr >= addr) && (pagenr < end)) | ||
59 | return 1; | ||
60 | } | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | /* | 27 | /* |
65 | * Fix up the linear direct mapping of the kernel to avoid cache attribute | 28 | * Fix up the linear direct mapping of the kernel to avoid cache attribute |
66 | * conflicts. | 29 | * conflicts. |
@@ -422,6 +385,10 @@ void __init early_ioremap_init(void) | |||
422 | * The boot-ioremap range spans multiple pmds, for which | 385 | * The boot-ioremap range spans multiple pmds, for which |
423 | * we are not prepared: | 386 | * we are not prepared: |
424 | */ | 387 | */ |
388 | #define __FIXADDR_TOP (-PAGE_SIZE) | ||
389 | BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) | ||
390 | != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); | ||
391 | #undef __FIXADDR_TOP | ||
425 | if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { | 392 | if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { |
426 | WARN_ON(1); | 393 | WARN_ON(1); |
427 | printk(KERN_WARNING "pmd %p != %p\n", | 394 | printk(KERN_WARNING "pmd %p != %p\n", |
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 8cc183344140..b3b531a4f8e5 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) | |||
337 | if (!shadow) | 337 | if (!shadow) |
338 | return true; | 338 | return true; |
339 | 339 | ||
340 | status = kmemcheck_shadow_test(shadow, size); | 340 | status = kmemcheck_shadow_test_all(shadow, size); |
341 | 341 | ||
342 | return status == KMEMCHECK_SHADOW_INITIALIZED; | 342 | return status == KMEMCHECK_SHADOW_INITIALIZED; |
343 | } | 343 | } |
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c index 3f66b82076a3..aec124214d97 100644 --- a/arch/x86/mm/kmemcheck/shadow.c +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | |||
125 | 125 | ||
126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | 126 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) |
127 | { | 127 | { |
128 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
128 | uint8_t *x; | 129 | uint8_t *x; |
129 | unsigned int i; | 130 | unsigned int i; |
130 | 131 | ||
131 | x = shadow; | 132 | x = shadow; |
132 | 133 | ||
133 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
134 | /* | 134 | /* |
135 | * Make sure _some_ bytes are initialized. Gcc frequently generates | 135 | * Make sure _some_ bytes are initialized. Gcc frequently generates |
136 | * code to access neighboring bytes. | 136 | * code to access neighboring bytes. |
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | |||
139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | 139 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) |
140 | return x[i]; | 140 | return x[i]; |
141 | } | 141 | } |
142 | |||
143 | return x[0]; | ||
142 | #else | 144 | #else |
145 | return kmemcheck_shadow_test_all(shadow, size); | ||
146 | #endif | ||
147 | } | ||
148 | |||
149 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size) | ||
150 | { | ||
151 | uint8_t *x; | ||
152 | unsigned int i; | ||
153 | |||
154 | x = shadow; | ||
155 | |||
143 | /* All bytes must be initialized. */ | 156 | /* All bytes must be initialized. */ |
144 | for (i = 0; i < size; ++i) { | 157 | for (i = 0; i < size; ++i) { |
145 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | 158 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) |
146 | return x[i]; | 159 | return x[i]; |
147 | } | 160 | } |
148 | #endif | ||
149 | 161 | ||
150 | return x[0]; | 162 | return x[0]; |
151 | } | 163 | } |
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h index af46d9ab9d86..ff0b2f70fbcb 100644 --- a/arch/x86/mm/kmemcheck/shadow.h +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -11,6 +11,8 @@ enum kmemcheck_shadow { | |||
11 | void *kmemcheck_shadow_lookup(unsigned long address); | 11 | void *kmemcheck_shadow_lookup(unsigned long address); |
12 | 12 | ||
13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | 13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); |
14 | enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, | ||
15 | unsigned int size); | ||
14 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | 16 | void kmemcheck_shadow_set(void *shadow, unsigned int size); |
15 | 17 | ||
16 | #endif | 18 | #endif |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c8191defc38a..1dab5194fd9d 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void) | |||
71 | if (current->personality & ADDR_COMPAT_LAYOUT) | 71 | if (current->personality & ADDR_COMPAT_LAYOUT) |
72 | return 1; | 72 | return 1; |
73 | 73 | ||
74 | if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) | 74 | if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) |
75 | return 1; | 75 | return 1; |
76 | 76 | ||
77 | return sysctl_legacy_va_layout; | 77 | return sysctl_legacy_va_layout; |
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void) | |||
96 | 96 | ||
97 | static unsigned long mmap_base(void) | 97 | static unsigned long mmap_base(void) |
98 | { | 98 | { |
99 | unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; | 99 | unsigned long gap = rlimit(RLIMIT_STACK); |
100 | 100 | ||
101 | if (gap < MIN_GAP) | 101 | if (gap < MIN_GAP) |
102 | gap = MIN_GAP; | 102 | gap = MIN_GAP; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 83bbc70d11bb..3307ea8bd43a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -427,7 +427,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
427 | * Calculate the number of big nodes that can be allocated as a result | 427 | * Calculate the number of big nodes that can be allocated as a result |
428 | * of consolidating the remainder. | 428 | * of consolidating the remainder. |
429 | */ | 429 | */ |
430 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / | 430 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) / |
431 | FAKE_NODE_MIN_SIZE; | 431 | FAKE_NODE_MIN_SIZE; |
432 | 432 | ||
433 | size &= FAKE_NODE_MIN_HASH_MASK; | 433 | size &= FAKE_NODE_MIN_HASH_MASK; |
@@ -502,77 +502,99 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr, | |||
502 | } | 502 | } |
503 | 503 | ||
504 | /* | 504 | /* |
505 | * Splits num_nodes nodes up equally starting at node_start. The return value | 505 | * Returns the end address of a node so that there is at least `size' amount of |
506 | * is the number of nodes split up and addr is adjusted to be at the end of the | 506 | * non-reserved memory or `max_addr' is reached. |
507 | * last node allocated. | ||
508 | */ | 507 | */ |
509 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, | 508 | static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) |
510 | int num_nodes) | ||
511 | { | 509 | { |
512 | unsigned int big; | 510 | u64 end = start + size; |
513 | u64 size; | ||
514 | int i; | ||
515 | |||
516 | if (num_nodes <= 0) | ||
517 | return -1; | ||
518 | if (num_nodes > MAX_NUMNODES) | ||
519 | num_nodes = MAX_NUMNODES; | ||
520 | size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) / | ||
521 | num_nodes; | ||
522 | /* | ||
523 | * Calculate the number of big nodes that can be allocated as a result | ||
524 | * of consolidating the leftovers. | ||
525 | */ | ||
526 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / | ||
527 | FAKE_NODE_MIN_SIZE; | ||
528 | |||
529 | /* Round down to nearest FAKE_NODE_MIN_SIZE. */ | ||
530 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
531 | if (!size) { | ||
532 | printk(KERN_ERR "Not enough memory for each node. " | ||
533 | "NUMA emulation disabled.\n"); | ||
534 | return -1; | ||
535 | } | ||
536 | |||
537 | for (i = node_start; i < num_nodes + node_start; i++) { | ||
538 | u64 end = *addr + size; | ||
539 | 511 | ||
540 | if (i < big) | 512 | while (end - start - e820_hole_size(start, end) < size) { |
541 | end += FAKE_NODE_MIN_SIZE; | 513 | end += FAKE_NODE_MIN_SIZE; |
542 | /* | 514 | if (end > max_addr) { |
543 | * The final node can have the remaining system RAM. Other | ||
544 | * nodes receive roughly the same amount of available pages. | ||
545 | */ | ||
546 | if (i == num_nodes + node_start - 1) | ||
547 | end = max_addr; | 515 | end = max_addr; |
548 | else | ||
549 | while (end - *addr - e820_hole_size(*addr, end) < | ||
550 | size) { | ||
551 | end += FAKE_NODE_MIN_SIZE; | ||
552 | if (end > max_addr) { | ||
553 | end = max_addr; | ||
554 | break; | ||
555 | } | ||
556 | } | ||
557 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) | ||
558 | break; | 516 | break; |
517 | } | ||
559 | } | 518 | } |
560 | return i - node_start + 1; | 519 | return end; |
561 | } | 520 | } |
562 | 521 | ||
563 | /* | 522 | /* |
564 | * Splits the remaining system RAM into chunks of size. The remaining memory is | 523 | * Sets up fake nodes of `size' interleaved over physical nodes ranging from |
565 | * always assigned to a final node and can be asymmetric. Returns the number of | 524 | * `addr' to `max_addr'. The return value is the number of nodes allocated. |
566 | * nodes split. | ||
567 | */ | 525 | */ |
568 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, | 526 | static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) |
569 | u64 size) | ||
570 | { | 527 | { |
571 | int i = node_start; | 528 | nodemask_t physnode_mask = NODE_MASK_NONE; |
572 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | 529 | u64 min_size; |
573 | while (!setup_node_range(i++, addr, size, max_addr)) | 530 | int ret = 0; |
574 | ; | 531 | int i; |
575 | return i - node_start; | 532 | |
533 | if (!size) | ||
534 | return -1; | ||
535 | /* | ||
536 | * The limit on emulated nodes is MAX_NUMNODES, so the size per node is | ||
537 | * increased accordingly if the requested size is too small. This | ||
538 | * creates a uniform distribution of node sizes across the entire | ||
539 | * machine (but not necessarily over physical nodes). | ||
540 | */ | ||
541 | min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) / | ||
542 | MAX_NUMNODES; | ||
543 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); | ||
544 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) | ||
545 | min_size = (min_size + FAKE_NODE_MIN_SIZE) & | ||
546 | FAKE_NODE_MIN_HASH_MASK; | ||
547 | if (size < min_size) { | ||
548 | pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", | ||
549 | size >> 20, min_size >> 20); | ||
550 | size = min_size; | ||
551 | } | ||
552 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
553 | |||
554 | for (i = 0; i < MAX_NUMNODES; i++) | ||
555 | if (physnodes[i].start != physnodes[i].end) | ||
556 | node_set(i, physnode_mask); | ||
557 | /* | ||
558 | * Fill physical nodes with fake nodes of size until there is no memory | ||
559 | * left on any of them. | ||
560 | */ | ||
561 | while (nodes_weight(physnode_mask)) { | ||
562 | for_each_node_mask(i, physnode_mask) { | ||
563 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; | ||
564 | u64 end; | ||
565 | |||
566 | end = find_end_of_node(physnodes[i].start, | ||
567 | physnodes[i].end, size); | ||
568 | /* | ||
569 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
570 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
571 | * this one must extend to the boundary. | ||
572 | */ | ||
573 | if (end < dma32_end && dma32_end - end - | ||
574 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
575 | end = dma32_end; | ||
576 | |||
577 | /* | ||
578 | * If there won't be enough non-reserved memory for the | ||
579 | * next node, this one must extend to the end of the | ||
580 | * physical node. | ||
581 | */ | ||
582 | if (physnodes[i].end - end - | ||
583 | e820_hole_size(end, physnodes[i].end) < size) | ||
584 | end = physnodes[i].end; | ||
585 | |||
586 | /* | ||
587 | * Setup the fake node that will be allocated as bootmem | ||
588 | * later. If setup_node_range() returns non-zero, there | ||
589 | * is no more memory available on this physical node. | ||
590 | */ | ||
591 | if (setup_node_range(ret++, &physnodes[i].start, | ||
592 | end - physnodes[i].start, | ||
593 | physnodes[i].end) < 0) | ||
594 | node_clear(i, physnode_mask); | ||
595 | } | ||
596 | } | ||
597 | return ret; | ||
576 | } | 598 | } |
577 | 599 | ||
578 | /* | 600 | /* |
@@ -582,87 +604,32 @@ static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, | |||
582 | static int __init numa_emulation(unsigned long start_pfn, | 604 | static int __init numa_emulation(unsigned long start_pfn, |
583 | unsigned long last_pfn, int acpi, int k8) | 605 | unsigned long last_pfn, int acpi, int k8) |
584 | { | 606 | { |
585 | u64 size, addr = start_pfn << PAGE_SHIFT; | 607 | u64 addr = start_pfn << PAGE_SHIFT; |
586 | u64 max_addr = last_pfn << PAGE_SHIFT; | 608 | u64 max_addr = last_pfn << PAGE_SHIFT; |
587 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | ||
588 | int num_phys_nodes; | 609 | int num_phys_nodes; |
610 | int num_nodes; | ||
611 | int i; | ||
589 | 612 | ||
590 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); | 613 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
591 | /* | 614 | /* |
592 | * If the numa=fake command-line is just a single number N, split the | 615 | * If the numa=fake command-line contains a 'M' or 'G', it represents |
593 | * system RAM into N fake nodes. | 616 | * the fixed node size. Otherwise, if it is just a single number N, |
617 | * split the system RAM into N fake nodes. | ||
594 | */ | 618 | */ |
595 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | 619 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { |
596 | long n = simple_strtol(cmdline, NULL, 0); | 620 | u64 size; |
597 | |||
598 | num_nodes = split_nodes_interleave(addr, max_addr, | ||
599 | num_phys_nodes, n); | ||
600 | if (num_nodes < 0) | ||
601 | return num_nodes; | ||
602 | goto out; | ||
603 | } | ||
604 | 621 | ||
605 | /* Parse the command line. */ | 622 | size = memparse(cmdline, &cmdline); |
606 | for (coeff_flag = 0; ; cmdline++) { | 623 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); |
607 | if (*cmdline && isdigit(*cmdline)) { | 624 | } else { |
608 | num = num * 10 + *cmdline - '0'; | 625 | unsigned long n; |
609 | continue; | 626 | |
610 | } | 627 | n = simple_strtoul(cmdline, NULL, 0); |
611 | if (*cmdline == '*') { | 628 | num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); |
612 | if (num > 0) | ||
613 | coeff = num; | ||
614 | coeff_flag = 1; | ||
615 | } | ||
616 | if (!*cmdline || *cmdline == ',') { | ||
617 | if (!coeff_flag) | ||
618 | coeff = 1; | ||
619 | /* | ||
620 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
621 | * Command-line coefficients are in megabytes. | ||
622 | */ | ||
623 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
624 | if (size) | ||
625 | for (i = 0; i < coeff; i++, num_nodes++) | ||
626 | if (setup_node_range(num_nodes, &addr, | ||
627 | size, max_addr) < 0) | ||
628 | goto done; | ||
629 | if (!*cmdline) | ||
630 | break; | ||
631 | coeff_flag = 0; | ||
632 | coeff = -1; | ||
633 | } | ||
634 | num = 0; | ||
635 | } | ||
636 | done: | ||
637 | if (!num_nodes) | ||
638 | return -1; | ||
639 | /* Fill remainder of system RAM, if appropriate. */ | ||
640 | if (addr < max_addr) { | ||
641 | if (coeff_flag && coeff < 0) { | ||
642 | /* Split remaining nodes into num-sized chunks */ | ||
643 | num_nodes += split_nodes_by_size(&addr, max_addr, | ||
644 | num_nodes, num); | ||
645 | goto out; | ||
646 | } | ||
647 | switch (*(cmdline - 1)) { | ||
648 | case '*': | ||
649 | /* Split remaining nodes into coeff chunks */ | ||
650 | if (coeff <= 0) | ||
651 | break; | ||
652 | num_nodes += split_nodes_equally(&addr, max_addr, | ||
653 | num_nodes, coeff); | ||
654 | break; | ||
655 | case ',': | ||
656 | /* Do not allocate remaining system RAM */ | ||
657 | break; | ||
658 | default: | ||
659 | /* Give one final node */ | ||
660 | setup_node_range(num_nodes, &addr, max_addr - addr, | ||
661 | max_addr); | ||
662 | num_nodes++; | ||
663 | } | ||
664 | } | 629 | } |
665 | out: | 630 | |
631 | if (num_nodes < 0) | ||
632 | return num_nodes; | ||
666 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); | 633 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); |
667 | if (memnode_shift < 0) { | 634 | if (memnode_shift < 0) { |
668 | memnode_shift = 0; | 635 | memnode_shift = 0; |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ed34f5e35999..c9ba9deafe83 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -6,6 +6,14 @@ | |||
6 | 6 | ||
7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | 7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO |
8 | 8 | ||
9 | #ifdef CONFIG_HIGHPTE | ||
10 | #define PGALLOC_USER_GFP __GFP_HIGHMEM | ||
11 | #else | ||
12 | #define PGALLOC_USER_GFP 0 | ||
13 | #endif | ||
14 | |||
15 | gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; | ||
16 | |||
9 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 17 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
10 | { | 18 | { |
11 | return (pte_t *)__get_free_page(PGALLOC_GFP); | 19 | return (pte_t *)__get_free_page(PGALLOC_GFP); |
@@ -15,16 +23,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
15 | { | 23 | { |
16 | struct page *pte; | 24 | struct page *pte; |
17 | 25 | ||
18 | #ifdef CONFIG_HIGHPTE | 26 | pte = alloc_pages(__userpte_alloc_gfp, 0); |
19 | pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); | ||
20 | #else | ||
21 | pte = alloc_pages(PGALLOC_GFP, 0); | ||
22 | #endif | ||
23 | if (pte) | 27 | if (pte) |
24 | pgtable_page_ctor(pte); | 28 | pgtable_page_ctor(pte); |
25 | return pte; | 29 | return pte; |
26 | } | 30 | } |
27 | 31 | ||
32 | static int __init setup_userpte(char *arg) | ||
33 | { | ||
34 | if (!arg) | ||
35 | return -EINVAL; | ||
36 | |||
37 | /* | ||
38 | * "userpte=nohigh" disables allocation of user pagetables in | ||
39 | * high memory. | ||
40 | */ | ||
41 | if (strcmp(arg, "nohigh") == 0) | ||
42 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
43 | else | ||
44 | return -EINVAL; | ||
45 | return 0; | ||
46 | } | ||
47 | early_param("userpte", setup_userpte); | ||
48 | |||
28 | void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) | 49 | void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) |
29 | { | 50 | { |
30 | pgtable_page_dtor(pte); | 51 | pgtable_page_dtor(pte); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 65b58e4b0b8b..426f3a1a64d3 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -41,7 +41,7 @@ union smp_flush_state { | |||
41 | struct { | 41 | struct { |
42 | struct mm_struct *flush_mm; | 42 | struct mm_struct *flush_mm; |
43 | unsigned long flush_va; | 43 | unsigned long flush_va; |
44 | spinlock_t tlbstate_lock; | 44 | raw_spinlock_t tlbstate_lock; |
45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
46 | }; | 46 | }; |
47 | char pad[INTERNODE_CACHE_BYTES]; | 47 | char pad[INTERNODE_CACHE_BYTES]; |
@@ -181,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
181 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | 181 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is |
182 | * probably not worth checking this for a cache-hot lock. | 182 | * probably not worth checking this for a cache-hot lock. |
183 | */ | 183 | */ |
184 | spin_lock(&f->tlbstate_lock); | 184 | raw_spin_lock(&f->tlbstate_lock); |
185 | 185 | ||
186 | f->flush_mm = mm; | 186 | f->flush_mm = mm; |
187 | f->flush_va = va; | 187 | f->flush_va = va; |
@@ -199,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
199 | 199 | ||
200 | f->flush_mm = NULL; | 200 | f->flush_mm = NULL; |
201 | f->flush_va = 0; | 201 | f->flush_va = 0; |
202 | spin_unlock(&f->tlbstate_lock); | 202 | raw_spin_unlock(&f->tlbstate_lock); |
203 | } | 203 | } |
204 | 204 | ||
205 | void native_flush_tlb_others(const struct cpumask *cpumask, | 205 | void native_flush_tlb_others(const struct cpumask *cpumask, |
@@ -223,7 +223,7 @@ static int __cpuinit init_smp_flush(void) | |||
223 | int i; | 223 | int i; |
224 | 224 | ||
225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) | 225 | for (i = 0; i < ARRAY_SIZE(flush_state); i++) |
226 | spin_lock_init(&flush_state[i].tlbstate_lock); | 226 | raw_spin_lock_init(&flush_state[i].tlbstate_lock); |
227 | 227 | ||
228 | return 0; | 228 | return 0; |
229 | } | 229 | } |