aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-21 04:46:39 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-21 04:46:54 -0400
commit8ecee4620e76aae418bfa0e8cc830e92cb559bbb (patch)
tree49a36784c0a26c8494a37087e37502101013b35d /arch/x86/mm
parent6424fb38667fffbbb1b90be0ffd9a0c540db6a4b (diff)
parenta939b96cccdb65df80a52447ec8e4a6d79c56dbb (diff)
Merge branch 'linus' into x86/mm
Merge reason: refresh the topic: there's been 290 non-merges commits upstream to arch/x86 alone. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/gup.c16
-rw-r--r--arch/x86/mm/highmem_32.c45
-rw-r--r--arch/x86/mm/init_32.c35
-rw-r--r--arch/x86/mm/init_64.c37
-rw-r--r--arch/x86/mm/iomap_32.c4
-rw-r--r--arch/x86/mm/ioremap.c33
-rw-r--r--arch/x86/mm/mmio-mod.c19
-rw-r--r--arch/x86/mm/numa.c67
-rw-r--r--arch/x86/mm/numa_64.c111
-rw-r--r--arch/x86/mm/pageattr.c127
-rw-r--r--arch/x86/mm/pat.c191
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/mm/srat_64.c30
14 files changed, 334 insertions, 386 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 08537747cb58..fdd30d08ab52 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o 14mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 15obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
16 16
17obj-$(CONFIG_NUMA) += numa_$(BITS).o 17obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
18obj-$(CONFIG_K8_NUMA) += k8topology_64.o 18obj-$(CONFIG_K8_NUMA) += k8topology_64.o
19obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 19obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
20 20
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index be54176e9eb2..6340cef6798a 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -219,6 +219,22 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
219 return 1; 219 return 1;
220} 220}
221 221
222/**
223 * get_user_pages_fast() - pin user pages in memory
224 * @start: starting user address
225 * @nr_pages: number of pages from start to pin
226 * @write: whether pages will be written to
227 * @pages: array that receives pointers to the pages pinned.
228 * Should be at least nr_pages long.
229 *
230 * Attempt to pin user pages in memory without taking mm->mmap_sem.
231 * If not successful, it will fall back to taking the lock and
232 * calling get_user_pages().
233 *
234 * Returns number of pages pinned. This may be fewer than the number
235 * requested. If nr_pages is 0 or negative, returns 0. If no pages
236 * were pinned, returns -errno.
237 */
222int get_user_pages_fast(unsigned long start, int nr_pages, int write, 238int get_user_pages_fast(unsigned long start, int nr_pages, int write,
223 struct page **pages) 239 struct page **pages)
224{ 240{
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 522db5e3d0bf..8126e8d1a2a4 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -19,49 +19,6 @@ void kunmap(struct page *page)
19 kunmap_high(page); 19 kunmap_high(page);
20} 20}
21 21
22static void debug_kmap_atomic_prot(enum km_type type)
23{
24#ifdef CONFIG_DEBUG_HIGHMEM
25 static unsigned warn_count = 10;
26
27 if (unlikely(warn_count == 0))
28 return;
29
30 if (unlikely(in_interrupt())) {
31 if (in_irq()) {
32 if (type != KM_IRQ0 && type != KM_IRQ1 &&
33 type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
34 type != KM_BOUNCE_READ) {
35 WARN_ON(1);
36 warn_count--;
37 }
38 } else if (!irqs_disabled()) { /* softirq */
39 if (type != KM_IRQ0 && type != KM_IRQ1 &&
40 type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
41 type != KM_SKB_SUNRPC_DATA &&
42 type != KM_SKB_DATA_SOFTIRQ &&
43 type != KM_BOUNCE_READ) {
44 WARN_ON(1);
45 warn_count--;
46 }
47 }
48 }
49
50 if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
51 type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
52 if (!irqs_disabled()) {
53 WARN_ON(1);
54 warn_count--;
55 }
56 } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
57 if (irq_count() == 0 && !irqs_disabled()) {
58 WARN_ON(1);
59 warn_count--;
60 }
61 }
62#endif
63}
64
65/* 22/*
66 * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because 23 * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
67 * no global lock is needed and because the kmap code must perform a global TLB 24 * no global lock is needed and because the kmap code must perform a global TLB
@@ -81,7 +38,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
81 if (!PageHighMem(page)) 38 if (!PageHighMem(page))
82 return page_address(page); 39 return page_address(page);
83 40
84 debug_kmap_atomic_prot(type); 41 debug_kmap_atomic(type);
85 42
86 idx = type + KM_TYPE_NR*smp_processor_id(); 43 idx = type + KM_TYPE_NR*smp_processor_id();
87 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index db81e9a8556b..749559ed80f5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1054,17 +1054,47 @@ static noinline int do_test_wp_bit(void)
1054const int rodata_test_data = 0xC3; 1054const int rodata_test_data = 0xC3;
1055EXPORT_SYMBOL_GPL(rodata_test_data); 1055EXPORT_SYMBOL_GPL(rodata_test_data);
1056 1056
1057static int kernel_set_to_readonly;
1058
1059void set_kernel_text_rw(void)
1060{
1061 unsigned long start = PFN_ALIGN(_text);
1062 unsigned long size = PFN_ALIGN(_etext) - start;
1063
1064 if (!kernel_set_to_readonly)
1065 return;
1066
1067 pr_debug("Set kernel text: %lx - %lx for read write\n",
1068 start, start+size);
1069
1070 set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
1071}
1072
1073void set_kernel_text_ro(void)
1074{
1075 unsigned long start = PFN_ALIGN(_text);
1076 unsigned long size = PFN_ALIGN(_etext) - start;
1077
1078 if (!kernel_set_to_readonly)
1079 return;
1080
1081 pr_debug("Set kernel text: %lx - %lx for read only\n",
1082 start, start+size);
1083
1084 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1085}
1086
1057void mark_rodata_ro(void) 1087void mark_rodata_ro(void)
1058{ 1088{
1059 unsigned long start = PFN_ALIGN(_text); 1089 unsigned long start = PFN_ALIGN(_text);
1060 unsigned long size = PFN_ALIGN(_etext) - start; 1090 unsigned long size = PFN_ALIGN(_etext) - start;
1061 1091
1062#ifndef CONFIG_DYNAMIC_FTRACE
1063 /* Dynamic tracing modifies the kernel text section */
1064 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 1092 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
1065 printk(KERN_INFO "Write protecting the kernel text: %luk\n", 1093 printk(KERN_INFO "Write protecting the kernel text: %luk\n",
1066 size >> 10); 1094 size >> 10);
1067 1095
1096 kernel_set_to_readonly = 1;
1097
1068#ifdef CONFIG_CPA_DEBUG 1098#ifdef CONFIG_CPA_DEBUG
1069 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n", 1099 printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
1070 start, start+size); 1100 start, start+size);
@@ -1073,7 +1103,6 @@ void mark_rodata_ro(void)
1073 printk(KERN_INFO "Testing CPA: write protecting again\n"); 1103 printk(KERN_INFO "Testing CPA: write protecting again\n");
1074 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); 1104 set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
1075#endif 1105#endif
1076#endif /* CONFIG_DYNAMIC_FTRACE */
1077 1106
1078 start += size; 1107 start += size;
1079 size = (unsigned long)__end_rodata - start; 1108 size = (unsigned long)__end_rodata - start;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 54efa57d1c03..1753e8020df6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -734,21 +734,48 @@ void __init mem_init(void)
734const int rodata_test_data = 0xC3; 734const int rodata_test_data = 0xC3;
735EXPORT_SYMBOL_GPL(rodata_test_data); 735EXPORT_SYMBOL_GPL(rodata_test_data);
736 736
737static int kernel_set_to_readonly;
738
739void set_kernel_text_rw(void)
740{
741 unsigned long start = PFN_ALIGN(_stext);
742 unsigned long end = PFN_ALIGN(__start_rodata);
743
744 if (!kernel_set_to_readonly)
745 return;
746
747 pr_debug("Set kernel text: %lx - %lx for read write\n",
748 start, end);
749
750 set_memory_rw(start, (end - start) >> PAGE_SHIFT);
751}
752
753void set_kernel_text_ro(void)
754{
755 unsigned long start = PFN_ALIGN(_stext);
756 unsigned long end = PFN_ALIGN(__start_rodata);
757
758 if (!kernel_set_to_readonly)
759 return;
760
761 pr_debug("Set kernel text: %lx - %lx for read only\n",
762 start, end);
763
764 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
765}
766
737void mark_rodata_ro(void) 767void mark_rodata_ro(void)
738{ 768{
739 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); 769 unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
740 unsigned long rodata_start = 770 unsigned long rodata_start =
741 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; 771 ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
742 772
743#ifdef CONFIG_DYNAMIC_FTRACE
744 /* Dynamic tracing modifies the kernel text section */
745 start = rodata_start;
746#endif
747
748 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 773 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
749 (end - start) >> 10); 774 (end - start) >> 10);
750 set_memory_ro(start, (end - start) >> PAGE_SHIFT); 775 set_memory_ro(start, (end - start) >> PAGE_SHIFT);
751 776
777 kernel_set_to_readonly = 1;
778
752 /* 779 /*
753 * The rodata section (but not the kernel text!) should also be 780 * The rodata section (but not the kernel text!) should also be
754 * not-executable. 781 * not-executable.
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 699c9b2895ae..8056545e2d39 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -19,10 +19,11 @@
19#include <asm/iomap.h> 19#include <asm/iomap.h>
20#include <asm/pat.h> 20#include <asm/pat.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/highmem.h>
22 23
23int is_io_mapping_possible(resource_size_t base, unsigned long size) 24int is_io_mapping_possible(resource_size_t base, unsigned long size)
24{ 25{
25#ifndef CONFIG_X86_PAE 26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
26 /* There is no way to map greater than 1 << 32 address without PAE */ 27 /* There is no way to map greater than 1 << 32 address without PAE */
27 if (base + size > 0x100000000ULL) 28 if (base + size > 0x100000000ULL)
28 return 0; 29 return 0;
@@ -38,6 +39,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
38 39
39 pagefault_disable(); 40 pagefault_disable();
40 41
42 debug_kmap_atomic(type);
41 idx = type + KM_TYPE_NR * smp_processor_id(); 43 idx = type + KM_TYPE_NR * smp_processor_id();
42 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 44 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
43 set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); 45 set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0dfa09d69e80..8a450930834f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -280,15 +280,16 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
280 return NULL; 280 return NULL;
281 area->phys_addr = phys_addr; 281 area->phys_addr = phys_addr;
282 vaddr = (unsigned long) area->addr; 282 vaddr = (unsigned long) area->addr;
283 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { 283
284 if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
284 free_memtype(phys_addr, phys_addr + size); 285 free_memtype(phys_addr, phys_addr + size);
285 free_vm_area(area); 286 free_vm_area(area);
286 return NULL; 287 return NULL;
287 } 288 }
288 289
289 if (ioremap_change_attr(vaddr, size, prot_val) < 0) { 290 if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
290 free_memtype(phys_addr, phys_addr + size); 291 free_memtype(phys_addr, phys_addr + size);
291 vunmap(area->addr); 292 free_vm_area(area);
292 return NULL; 293 return NULL;
293 } 294 }
294 295
@@ -374,7 +375,8 @@ static void __iomem *ioremap_default(resource_size_t phys_addr,
374 * - UC_MINUS for non-WB-able memory with no other conflicting mappings 375 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
375 * - Inherit from confliting mappings otherwise 376 * - Inherit from confliting mappings otherwise
376 */ 377 */
377 err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags); 378 err = reserve_memtype(phys_addr, phys_addr + size,
379 _PAGE_CACHE_WB, &flags);
378 if (err < 0) 380 if (err < 0)
379 return NULL; 381 return NULL;
380 382
@@ -547,7 +549,7 @@ void __init early_ioremap_reset(void)
547} 549}
548 550
549static void __init __early_set_fixmap(enum fixed_addresses idx, 551static void __init __early_set_fixmap(enum fixed_addresses idx,
550 unsigned long phys, pgprot_t flags) 552 phys_addr_t phys, pgprot_t flags)
551{ 553{
552 unsigned long addr = __fix_to_virt(idx); 554 unsigned long addr = __fix_to_virt(idx);
553 pte_t *pte; 555 pte_t *pte;
@@ -566,7 +568,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
566} 568}
567 569
568static inline void __init early_set_fixmap(enum fixed_addresses idx, 570static inline void __init early_set_fixmap(enum fixed_addresses idx,
569 unsigned long phys, pgprot_t prot) 571 phys_addr_t phys, pgprot_t prot)
570{ 572{
571 if (after_paging_init) 573 if (after_paging_init)
572 __set_fixmap(idx, phys, prot); 574 __set_fixmap(idx, phys, prot);
@@ -607,9 +609,10 @@ static int __init check_early_ioremap_leak(void)
607late_initcall(check_early_ioremap_leak); 609late_initcall(check_early_ioremap_leak);
608 610
609static void __init __iomem * 611static void __init __iomem *
610__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) 612__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
611{ 613{
612 unsigned long offset, last_addr; 614 unsigned long offset;
615 resource_size_t last_addr;
613 unsigned int nrpages; 616 unsigned int nrpages;
614 enum fixed_addresses idx0, idx; 617 enum fixed_addresses idx0, idx;
615 int i, slot; 618 int i, slot;
@@ -625,15 +628,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
625 } 628 }
626 629
627 if (slot < 0) { 630 if (slot < 0) {
628 printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n", 631 printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n",
629 phys_addr, size); 632 (u64)phys_addr, size);
630 WARN_ON(1); 633 WARN_ON(1);
631 return NULL; 634 return NULL;
632 } 635 }
633 636
634 if (early_ioremap_debug) { 637 if (early_ioremap_debug) {
635 printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ", 638 printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ",
636 phys_addr, size, slot); 639 (u64)phys_addr, size, slot);
637 dump_stack(); 640 dump_stack();
638 } 641 }
639 642
@@ -680,13 +683,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
680} 683}
681 684
682/* Remap an IO device */ 685/* Remap an IO device */
683void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size) 686void __init __iomem *
687early_ioremap(resource_size_t phys_addr, unsigned long size)
684{ 688{
685 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); 689 return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
686} 690}
687 691
688/* Remap memory */ 692/* Remap memory */
689void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size) 693void __init __iomem *
694early_memremap(resource_size_t phys_addr, unsigned long size)
690{ 695{
691 return __early_ioremap(phys_addr, size, PAGE_KERNEL); 696 return __early_ioremap(phys_addr, size, PAGE_KERNEL);
692} 697}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 2c4baa88f2cb..c9342ed8b402 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -378,27 +378,34 @@ static void clear_trace_list(void)
378} 378}
379 379
380#ifdef CONFIG_HOTPLUG_CPU 380#ifdef CONFIG_HOTPLUG_CPU
381static cpumask_t downed_cpus; 381static cpumask_var_t downed_cpus;
382 382
383static void enter_uniprocessor(void) 383static void enter_uniprocessor(void)
384{ 384{
385 int cpu; 385 int cpu;
386 int err; 386 int err;
387 387
388 if (downed_cpus == NULL &&
389 !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
390 pr_notice(NAME "Failed to allocate mask\n");
391 goto out;
392 }
393
388 get_online_cpus(); 394 get_online_cpus();
389 downed_cpus = cpu_online_map; 395 cpumask_copy(downed_cpus, cpu_online_mask);
390 cpu_clear(first_cpu(cpu_online_map), downed_cpus); 396 cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
391 if (num_online_cpus() > 1) 397 if (num_online_cpus() > 1)
392 pr_notice(NAME "Disabling non-boot CPUs...\n"); 398 pr_notice(NAME "Disabling non-boot CPUs...\n");
393 put_online_cpus(); 399 put_online_cpus();
394 400
395 for_each_cpu_mask(cpu, downed_cpus) { 401 for_each_cpu(cpu, downed_cpus) {
396 err = cpu_down(cpu); 402 err = cpu_down(cpu);
397 if (!err) 403 if (!err)
398 pr_info(NAME "CPU%d is down.\n", cpu); 404 pr_info(NAME "CPU%d is down.\n", cpu);
399 else 405 else
400 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); 406 pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
401 } 407 }
408out:
402 if (num_online_cpus() > 1) 409 if (num_online_cpus() > 1)
403 pr_warning(NAME "multiple CPUs still online, " 410 pr_warning(NAME "multiple CPUs still online, "
404 "may miss events.\n"); 411 "may miss events.\n");
@@ -411,10 +418,10 @@ static void __ref leave_uniprocessor(void)
411 int cpu; 418 int cpu;
412 int err; 419 int err;
413 420
414 if (cpus_weight(downed_cpus) == 0) 421 if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
415 return; 422 return;
416 pr_notice(NAME "Re-enabling CPUs...\n"); 423 pr_notice(NAME "Re-enabling CPUs...\n");
417 for_each_cpu_mask(cpu, downed_cpus) { 424 for_each_cpu(cpu, downed_cpus) {
418 err = cpu_up(cpu); 425 err = cpu_up(cpu);
419 if (!err) 426 if (!err)
420 pr_info(NAME "enabled CPU%d.\n", cpu); 427 pr_info(NAME "enabled CPU%d.\n", cpu);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
new file mode 100644
index 000000000000..550df481accd
--- /dev/null
+++ b/arch/x86/mm/numa.c
@@ -0,0 +1,67 @@
1/* Common code for 32 and 64-bit NUMA */
2#include <linux/topology.h>
3#include <linux/module.h>
4#include <linux/bootmem.h>
5
6#ifdef CONFIG_DEBUG_PER_CPU_MAPS
7# define DBG(x...) printk(KERN_DEBUG x)
8#else
9# define DBG(x...)
10#endif
11
12/*
13 * Which logical CPUs are on which nodes
14 */
15cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
16EXPORT_SYMBOL(node_to_cpumask_map);
17
18/*
19 * Allocate node_to_cpumask_map based on number of available nodes
20 * Requires node_possible_map to be valid.
21 *
22 * Note: node_to_cpumask() is not valid until after this is done.
23 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
24 */
25void __init setup_node_to_cpumask_map(void)
26{
27 unsigned int node, num = 0;
28
29 /* setup nr_node_ids if not done yet */
30 if (nr_node_ids == MAX_NUMNODES) {
31 for_each_node_mask(node, node_possible_map)
32 num = node;
33 nr_node_ids = num + 1;
34 }
35
36 /* allocate the map */
37 for (node = 0; node < nr_node_ids; node++)
38 alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
39
40 /* cpumask_of_node() will now work */
41 pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
42}
43
44#ifdef CONFIG_DEBUG_PER_CPU_MAPS
45/*
46 * Returns a pointer to the bitmask of CPUs on Node 'node'.
47 */
48const struct cpumask *cpumask_of_node(int node)
49{
50 if (node >= nr_node_ids) {
51 printk(KERN_WARNING
52 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
53 node, nr_node_ids);
54 dump_stack();
55 return cpu_none_mask;
56 }
57 if (node_to_cpumask_map[node] == NULL) {
58 printk(KERN_WARNING
59 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
60 node);
61 dump_stack();
62 return cpu_online_mask;
63 }
64 return node_to_cpumask_map[node];
65}
66EXPORT_SYMBOL(cpumask_of_node);
67#endif
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 64c9cf043cdd..d73aaa892371 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,12 +20,6 @@
20#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h> 21#include <asm/k8.h>
22 22
23#ifdef CONFIG_DEBUG_PER_CPU_MAPS
24# define DBG(x...) printk(KERN_DEBUG x)
25#else
26# define DBG(x...)
27#endif
28
29struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
30EXPORT_SYMBOL(node_data); 24EXPORT_SYMBOL(node_data);
31 25
@@ -49,12 +43,6 @@ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
49EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 43EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
50 44
51/* 45/*
52 * Which logical CPUs are on which nodes
53 */
54cpumask_t *node_to_cpumask_map;
55EXPORT_SYMBOL(node_to_cpumask_map);
56
57/*
58 * Given a shift value, try to populate memnodemap[] 46 * Given a shift value, try to populate memnodemap[]
59 * Returns : 47 * Returns :
60 * 1 if OK 48 * 1 if OK
@@ -661,36 +649,6 @@ void __init init_cpu_to_node(void)
661#endif 649#endif
662 650
663 651
664/*
665 * Allocate node_to_cpumask_map based on number of available nodes
666 * Requires node_possible_map to be valid.
667 *
668 * Note: node_to_cpumask() is not valid until after this is done.
669 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
670 */
671void __init setup_node_to_cpumask_map(void)
672{
673 unsigned int node, num = 0;
674 cpumask_t *map;
675
676 /* setup nr_node_ids if not done yet */
677 if (nr_node_ids == MAX_NUMNODES) {
678 for_each_node_mask(node, node_possible_map)
679 num = node;
680 nr_node_ids = num + 1;
681 }
682
683 /* allocate the map */
684 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
685 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
686
687 pr_debug("Node to cpumask map at %p for %d nodes\n",
688 map, nr_node_ids);
689
690 /* node_to_cpumask() will now work */
691 node_to_cpumask_map = map;
692}
693
694void __cpuinit numa_set_node(int cpu, int node) 652void __cpuinit numa_set_node(int cpu, int node)
695{ 653{
696 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 654 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
@@ -723,12 +681,12 @@ void __cpuinit numa_clear_node(int cpu)
723 681
724void __cpuinit numa_add_cpu(int cpu) 682void __cpuinit numa_add_cpu(int cpu)
725{ 683{
726 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 684 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
727} 685}
728 686
729void __cpuinit numa_remove_cpu(int cpu) 687void __cpuinit numa_remove_cpu(int cpu)
730{ 688{
731 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 689 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
732} 690}
733 691
734#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 692#else /* CONFIG_DEBUG_PER_CPU_MAPS */
@@ -739,20 +697,20 @@ void __cpuinit numa_remove_cpu(int cpu)
739static void __cpuinit numa_set_cpumask(int cpu, int enable) 697static void __cpuinit numa_set_cpumask(int cpu, int enable)
740{ 698{
741 int node = early_cpu_to_node(cpu); 699 int node = early_cpu_to_node(cpu);
742 cpumask_t *mask; 700 struct cpumask *mask;
743 char buf[64]; 701 char buf[64];
744 702
745 if (node_to_cpumask_map == NULL) { 703 mask = node_to_cpumask_map[node];
746 printk(KERN_ERR "node_to_cpumask_map NULL\n"); 704 if (mask == NULL) {
705 printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
747 dump_stack(); 706 dump_stack();
748 return; 707 return;
749 } 708 }
750 709
751 mask = &node_to_cpumask_map[node];
752 if (enable) 710 if (enable)
753 cpu_set(cpu, *mask); 711 cpumask_set_cpu(cpu, mask);
754 else 712 else
755 cpu_clear(cpu, *mask); 713 cpumask_clear_cpu(cpu, mask);
756 714
757 cpulist_scnprintf(buf, sizeof(buf), mask); 715 cpulist_scnprintf(buf, sizeof(buf), mask);
758 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 716 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
@@ -799,59 +757,6 @@ int early_cpu_to_node(int cpu)
799 return per_cpu(x86_cpu_to_node_map, cpu); 757 return per_cpu(x86_cpu_to_node_map, cpu);
800} 758}
801 759
802
803/* empty cpumask */
804static const cpumask_t cpu_mask_none;
805
806/*
807 * Returns a pointer to the bitmask of CPUs on Node 'node'.
808 */
809const cpumask_t *cpumask_of_node(int node)
810{
811 if (node_to_cpumask_map == NULL) {
812 printk(KERN_WARNING
813 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
814 node);
815 dump_stack();
816 return (const cpumask_t *)&cpu_online_map;
817 }
818 if (node >= nr_node_ids) {
819 printk(KERN_WARNING
820 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
821 node, nr_node_ids);
822 dump_stack();
823 return &cpu_mask_none;
824 }
825 return &node_to_cpumask_map[node];
826}
827EXPORT_SYMBOL(cpumask_of_node);
828
829/*
830 * Returns a bitmask of CPUs on Node 'node'.
831 *
832 * Side note: this function creates the returned cpumask on the stack
833 * so with a high NR_CPUS count, excessive stack space is used. The
834 * node_to_cpumask_ptr function should be used whenever possible.
835 */
836cpumask_t node_to_cpumask(int node)
837{
838 if (node_to_cpumask_map == NULL) {
839 printk(KERN_WARNING
840 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
841 dump_stack();
842 return cpu_online_map;
843 }
844 if (node >= nr_node_ids) {
845 printk(KERN_WARNING
846 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
847 node, nr_node_ids);
848 dump_stack();
849 return cpu_mask_none;
850 }
851 return node_to_cpumask_map[node];
852}
853EXPORT_SYMBOL(node_to_cpumask);
854
855/* 760/*
856 * --------- end of debug versions of the numa functions --------- 761 * --------- end of debug versions of the numa functions ---------
857 */ 762 */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d71e1b636ce6..797f9f107cb6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -945,71 +945,94 @@ int _set_memory_uc(unsigned long addr, int numpages)
945 945
946int set_memory_uc(unsigned long addr, int numpages) 946int set_memory_uc(unsigned long addr, int numpages)
947{ 947{
948 int ret;
949
948 /* 950 /*
949 * for now UC MINUS. see comments in ioremap_nocache() 951 * for now UC MINUS. see comments in ioremap_nocache()
950 */ 952 */
951 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, 953 ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
952 _PAGE_CACHE_UC_MINUS, NULL)) 954 _PAGE_CACHE_UC_MINUS, NULL);
953 return -EINVAL; 955 if (ret)
956 goto out_err;
957
958 ret = _set_memory_uc(addr, numpages);
959 if (ret)
960 goto out_free;
954 961
955 return _set_memory_uc(addr, numpages); 962 return 0;
963
964out_free:
965 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
966out_err:
967 return ret;
956} 968}
957EXPORT_SYMBOL(set_memory_uc); 969EXPORT_SYMBOL(set_memory_uc);
958 970
959int set_memory_array_uc(unsigned long *addr, int addrinarray) 971int set_memory_array_uc(unsigned long *addr, int addrinarray)
960{ 972{
961 unsigned long start; 973 int i, j;
962 unsigned long end; 974 int ret;
963 int i; 975
964 /* 976 /*
965 * for now UC MINUS. see comments in ioremap_nocache() 977 * for now UC MINUS. see comments in ioremap_nocache()
966 */ 978 */
967 for (i = 0; i < addrinarray; i++) { 979 for (i = 0; i < addrinarray; i++) {
968 start = __pa(addr[i]); 980 ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
969 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { 981 _PAGE_CACHE_UC_MINUS, NULL);
970 if (end != __pa(addr[i + 1])) 982 if (ret)
971 break; 983 goto out_free;
972 i++;
973 }
974 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
975 goto out;
976 } 984 }
977 985
978 return change_page_attr_set(addr, addrinarray, 986 ret = change_page_attr_set(addr, addrinarray,
979 __pgprot(_PAGE_CACHE_UC_MINUS), 1); 987 __pgprot(_PAGE_CACHE_UC_MINUS), 1);
980out: 988 if (ret)
981 for (i = 0; i < addrinarray; i++) { 989 goto out_free;
982 unsigned long tmp = __pa(addr[i]); 990
983 991 return 0;
984 if (tmp == start) 992
985 break; 993out_free:
986 for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { 994 for (j = 0; j < i; j++)
987 if (end != __pa(addr[i + 1])) 995 free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
988 break; 996
989 i++; 997 return ret;
990 }
991 free_memtype(tmp, end);
992 }
993 return -EINVAL;
994} 998}
995EXPORT_SYMBOL(set_memory_array_uc); 999EXPORT_SYMBOL(set_memory_array_uc);
996 1000
997int _set_memory_wc(unsigned long addr, int numpages) 1001int _set_memory_wc(unsigned long addr, int numpages)
998{ 1002{
999 return change_page_attr_set(&addr, numpages, 1003 int ret;
1004 ret = change_page_attr_set(&addr, numpages,
1005 __pgprot(_PAGE_CACHE_UC_MINUS), 0);
1006
1007 if (!ret) {
1008 ret = change_page_attr_set(&addr, numpages,
1000 __pgprot(_PAGE_CACHE_WC), 0); 1009 __pgprot(_PAGE_CACHE_WC), 0);
1010 }
1011 return ret;
1001} 1012}
1002 1013
1003int set_memory_wc(unsigned long addr, int numpages) 1014int set_memory_wc(unsigned long addr, int numpages)
1004{ 1015{
1016 int ret;
1017
1005 if (!pat_enabled) 1018 if (!pat_enabled)
1006 return set_memory_uc(addr, numpages); 1019 return set_memory_uc(addr, numpages);
1007 1020
1008 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, 1021 ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
1009 _PAGE_CACHE_WC, NULL)) 1022 _PAGE_CACHE_WC, NULL);
1010 return -EINVAL; 1023 if (ret)
1024 goto out_err;
1025
1026 ret = _set_memory_wc(addr, numpages);
1027 if (ret)
1028 goto out_free;
1029
1030 return 0;
1011 1031
1012 return _set_memory_wc(addr, numpages); 1032out_free:
1033 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1034out_err:
1035 return ret;
1013} 1036}
1014EXPORT_SYMBOL(set_memory_wc); 1037EXPORT_SYMBOL(set_memory_wc);
1015 1038
@@ -1021,29 +1044,31 @@ int _set_memory_wb(unsigned long addr, int numpages)
1021 1044
1022int set_memory_wb(unsigned long addr, int numpages) 1045int set_memory_wb(unsigned long addr, int numpages)
1023{ 1046{
1024 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); 1047 int ret;
1048
1049 ret = _set_memory_wb(addr, numpages);
1050 if (ret)
1051 return ret;
1025 1052
1026 return _set_memory_wb(addr, numpages); 1053 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
1054 return 0;
1027} 1055}
1028EXPORT_SYMBOL(set_memory_wb); 1056EXPORT_SYMBOL(set_memory_wb);
1029 1057
1030int set_memory_array_wb(unsigned long *addr, int addrinarray) 1058int set_memory_array_wb(unsigned long *addr, int addrinarray)
1031{ 1059{
1032 int i; 1060 int i;
1061 int ret;
1033 1062
1034 for (i = 0; i < addrinarray; i++) { 1063 ret = change_page_attr_clear(addr, addrinarray,
1035 unsigned long start = __pa(addr[i]);
1036 unsigned long end;
1037
1038 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
1039 if (end != __pa(addr[i + 1]))
1040 break;
1041 i++;
1042 }
1043 free_memtype(start, end);
1044 }
1045 return change_page_attr_clear(addr, addrinarray,
1046 __pgprot(_PAGE_CACHE_MASK), 1); 1064 __pgprot(_PAGE_CACHE_MASK), 1);
1065 if (ret)
1066 return ret;
1067
1068 for (i = 0; i < addrinarray; i++)
1069 free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
1070
1071 return 0;
1047} 1072}
1048EXPORT_SYMBOL(set_memory_array_wb); 1073EXPORT_SYMBOL(set_memory_array_wb);
1049 1074
@@ -1136,6 +1161,8 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
1136 1161
1137 retval = cpa_clear_pages_array(pages, addrinarray, 1162 retval = cpa_clear_pages_array(pages, addrinarray,
1138 __pgprot(_PAGE_CACHE_MASK)); 1163 __pgprot(_PAGE_CACHE_MASK));
1164 if (retval)
1165 return retval;
1139 1166
1140 for (i = 0; i < addrinarray; i++) { 1167 for (i = 0; i < addrinarray; i++) {
1141 start = (unsigned long)page_address(pages[i]); 1168 start = (unsigned long)page_address(pages[i]);
@@ -1143,7 +1170,7 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
1143 free_memtype(start, end); 1170 free_memtype(start, end);
1144 } 1171 }
1145 1172
1146 return retval; 1173 return 0;
1147} 1174}
1148EXPORT_SYMBOL(set_pages_array_wb); 1175EXPORT_SYMBOL(set_pages_array_wb);
1149 1176
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 640339ee4fb2..e6718bb28065 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -31,7 +31,7 @@
31#ifdef CONFIG_X86_PAT 31#ifdef CONFIG_X86_PAT
32int __read_mostly pat_enabled = 1; 32int __read_mostly pat_enabled = 1;
33 33
34void __cpuinit pat_disable(const char *reason) 34static inline void pat_disable(const char *reason)
35{ 35{
36 pat_enabled = 0; 36 pat_enabled = 0;
37 printk(KERN_INFO "%s\n", reason); 37 printk(KERN_INFO "%s\n", reason);
@@ -182,10 +182,10 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
182 u8 mtrr_type; 182 u8 mtrr_type;
183 183
184 mtrr_type = mtrr_type_lookup(start, end); 184 mtrr_type = mtrr_type_lookup(start, end);
185 if (mtrr_type == MTRR_TYPE_UNCACHABLE) 185 if (mtrr_type != MTRR_TYPE_WRBACK)
186 return _PAGE_CACHE_UC; 186 return _PAGE_CACHE_UC_MINUS;
187 if (mtrr_type == MTRR_TYPE_WRCOMB) 187
188 return _PAGE_CACHE_WC; 188 return _PAGE_CACHE_WB;
189 } 189 }
190 190
191 return req_type; 191 return req_type;
@@ -352,23 +352,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
352 return 0; 352 return 0;
353 } 353 }
354 354
355 if (req_type == -1) { 355 /*
356 /* 356 * Call mtrr_lookup to get the type hint. This is an
357 * Call mtrr_lookup to get the type hint. This is an 357 * optimization for /dev/mem mmap'ers into WB memory (BIOS
358 * optimization for /dev/mem mmap'ers into WB memory (BIOS 358 * tools and ACPI tools). Use WB request for WB memory and use
359 * tools and ACPI tools). Use WB request for WB memory and use 359 * UC_MINUS otherwise.
360 * UC_MINUS otherwise. 360 */
361 */ 361 actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK);
362 u8 mtrr_type = mtrr_type_lookup(start, end);
363
364 if (mtrr_type == MTRR_TYPE_WRBACK)
365 actual_type = _PAGE_CACHE_WB;
366 else
367 actual_type = _PAGE_CACHE_UC_MINUS;
368 } else {
369 actual_type = pat_x_mtrr_type(start, end,
370 req_type & _PAGE_CACHE_MASK);
371 }
372 362
373 if (new_type) 363 if (new_type)
374 *new_type = actual_type; 364 *new_type = actual_type;
@@ -546,9 +536,7 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
546int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 536int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
547 unsigned long size, pgprot_t *vma_prot) 537 unsigned long size, pgprot_t *vma_prot)
548{ 538{
549 u64 offset = ((u64) pfn) << PAGE_SHIFT; 539 unsigned long flags = _PAGE_CACHE_WB;
550 unsigned long flags = -1;
551 int retval;
552 540
553 if (!range_is_allowed(pfn, size)) 541 if (!range_is_allowed(pfn, size))
554 return 0; 542 return 0;
@@ -576,64 +564,11 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
576 } 564 }
577#endif 565#endif
578 566
579 /*
580 * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
581 *
582 * Without O_SYNC, we want to get
583 * - WB for WB-able memory and no other conflicting mappings
584 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
585 * - Inherit from confliting mappings otherwise
586 */
587 if (flags != -1) {
588 retval = reserve_memtype(offset, offset + size, flags, NULL);
589 } else {
590 retval = reserve_memtype(offset, offset + size, -1, &flags);
591 }
592
593 if (retval < 0)
594 return 0;
595
596 if (((pfn < max_low_pfn_mapped) ||
597 (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
598 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
599 free_memtype(offset, offset + size);
600 printk(KERN_INFO
601 "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
602 current->comm, current->pid,
603 cattr_name(flags),
604 offset, (unsigned long long)(offset + size));
605 return 0;
606 }
607
608 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | 567 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
609 flags); 568 flags);
610 return 1; 569 return 1;
611} 570}
612 571
613void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
614{
615 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
616 u64 addr = (u64)pfn << PAGE_SHIFT;
617 unsigned long flags;
618
619 reserve_memtype(addr, addr + size, want_flags, &flags);
620 if (flags != want_flags) {
621 printk(KERN_INFO
622 "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
623 current->comm, current->pid,
624 cattr_name(want_flags),
625 addr, (unsigned long long)(addr + size),
626 cattr_name(flags));
627 }
628}
629
630void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
631{
632 u64 addr = (u64)pfn << PAGE_SHIFT;
633
634 free_memtype(addr, addr + size);
635}
636
637/* 572/*
638 * Change the memory type for the physial address range in kernel identity 573 * Change the memory type for the physial address range in kernel identity
639 * mapping space if that range is a part of identity map. 574 * mapping space if that range is a part of identity map.
@@ -671,8 +606,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
671{ 606{
672 int is_ram = 0; 607 int is_ram = 0;
673 int ret; 608 int ret;
674 unsigned long flags;
675 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); 609 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
610 unsigned long flags = want_flags;
676 611
677 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 612 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
678 613
@@ -734,29 +669,28 @@ static void free_pfn_range(u64 paddr, unsigned long size)
734 * 669 *
735 * If the vma has a linear pfn mapping for the entire range, we get the prot 670 * If the vma has a linear pfn mapping for the entire range, we get the prot
736 * from pte and reserve the entire vma range with single reserve_pfn_range call. 671 * from pte and reserve the entire vma range with single reserve_pfn_range call.
737 * Otherwise, we reserve the entire vma range, my ging through the PTEs page
738 * by page to get physical address and protection.
739 */ 672 */
740int track_pfn_vma_copy(struct vm_area_struct *vma) 673int track_pfn_vma_copy(struct vm_area_struct *vma)
741{ 674{
742 int retval = 0;
743 unsigned long i, j;
744 resource_size_t paddr; 675 resource_size_t paddr;
745 unsigned long prot; 676 unsigned long prot;
746 unsigned long vma_start = vma->vm_start; 677 unsigned long vma_size = vma->vm_end - vma->vm_start;
747 unsigned long vma_end = vma->vm_end;
748 unsigned long vma_size = vma_end - vma_start;
749 pgprot_t pgprot; 678 pgprot_t pgprot;
750 679
751 if (!pat_enabled) 680 if (!pat_enabled)
752 return 0; 681 return 0;
753 682
683 /*
684 * For now, only handle remap_pfn_range() vmas where
685 * is_linear_pfn_mapping() == TRUE. Handling of
686 * vm_insert_pfn() is TBD.
687 */
754 if (is_linear_pfn_mapping(vma)) { 688 if (is_linear_pfn_mapping(vma)) {
755 /* 689 /*
756 * reserve the whole chunk covered by vma. We need the 690 * reserve the whole chunk covered by vma. We need the
757 * starting address and protection from pte. 691 * starting address and protection from pte.
758 */ 692 */
759 if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { 693 if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
760 WARN_ON_ONCE(1); 694 WARN_ON_ONCE(1);
761 return -EINVAL; 695 return -EINVAL;
762 } 696 }
@@ -764,28 +698,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
764 return reserve_pfn_range(paddr, vma_size, &pgprot, 1); 698 return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
765 } 699 }
766 700
767 /* reserve entire vma page by page, using pfn and prot from pte */
768 for (i = 0; i < vma_size; i += PAGE_SIZE) {
769 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
770 continue;
771
772 pgprot = __pgprot(prot);
773 retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
774 if (retval)
775 goto cleanup_ret;
776 }
777 return 0; 701 return 0;
778
779cleanup_ret:
780 /* Reserve error: Cleanup partial reservation and return error */
781 for (j = 0; j < i; j += PAGE_SIZE) {
782 if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
783 continue;
784
785 free_pfn_range(paddr, PAGE_SIZE);
786 }
787
788 return retval;
789} 702}
790 703
791/* 704/*
@@ -795,50 +708,28 @@ cleanup_ret:
795 * prot is passed in as a parameter for the new mapping. If the vma has a 708 * prot is passed in as a parameter for the new mapping. If the vma has a
796 * linear pfn mapping for the entire range reserve the entire vma range with 709 * linear pfn mapping for the entire range reserve the entire vma range with
797 * single reserve_pfn_range call. 710 * single reserve_pfn_range call.
798 * Otherwise, we look t the pfn and size and reserve only the specified range
799 * page by page.
800 *
801 * Note that this function can be called with caller trying to map only a
802 * subrange/page inside the vma.
803 */ 711 */
804int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 712int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
805 unsigned long pfn, unsigned long size) 713 unsigned long pfn, unsigned long size)
806{ 714{
807 int retval = 0;
808 unsigned long i, j;
809 resource_size_t base_paddr;
810 resource_size_t paddr; 715 resource_size_t paddr;
811 unsigned long vma_start = vma->vm_start; 716 unsigned long vma_size = vma->vm_end - vma->vm_start;
812 unsigned long vma_end = vma->vm_end;
813 unsigned long vma_size = vma_end - vma_start;
814 717
815 if (!pat_enabled) 718 if (!pat_enabled)
816 return 0; 719 return 0;
817 720
721 /*
722 * For now, only handle remap_pfn_range() vmas where
723 * is_linear_pfn_mapping() == TRUE. Handling of
724 * vm_insert_pfn() is TBD.
725 */
818 if (is_linear_pfn_mapping(vma)) { 726 if (is_linear_pfn_mapping(vma)) {
819 /* reserve the whole chunk starting from vm_pgoff */ 727 /* reserve the whole chunk starting from vm_pgoff */
820 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 728 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
821 return reserve_pfn_range(paddr, vma_size, prot, 0); 729 return reserve_pfn_range(paddr, vma_size, prot, 0);
822 } 730 }
823 731
824 /* reserve page by page using pfn and size */
825 base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
826 for (i = 0; i < size; i += PAGE_SIZE) {
827 paddr = base_paddr + i;
828 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
829 if (retval)
830 goto cleanup_ret;
831 }
832 return 0; 732 return 0;
833
834cleanup_ret:
835 /* Reserve error: Cleanup partial reservation and return error */
836 for (j = 0; j < i; j += PAGE_SIZE) {
837 paddr = base_paddr + j;
838 free_pfn_range(paddr, PAGE_SIZE);
839 }
840
841 return retval;
842} 733}
843 734
844/* 735/*
@@ -849,39 +740,23 @@ cleanup_ret:
849void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, 740void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
850 unsigned long size) 741 unsigned long size)
851{ 742{
852 unsigned long i;
853 resource_size_t paddr; 743 resource_size_t paddr;
854 unsigned long prot; 744 unsigned long vma_size = vma->vm_end - vma->vm_start;
855 unsigned long vma_start = vma->vm_start;
856 unsigned long vma_end = vma->vm_end;
857 unsigned long vma_size = vma_end - vma_start;
858 745
859 if (!pat_enabled) 746 if (!pat_enabled)
860 return; 747 return;
861 748
749 /*
750 * For now, only handle remap_pfn_range() vmas where
751 * is_linear_pfn_mapping() == TRUE. Handling of
752 * vm_insert_pfn() is TBD.
753 */
862 if (is_linear_pfn_mapping(vma)) { 754 if (is_linear_pfn_mapping(vma)) {
863 /* free the whole chunk starting from vm_pgoff */ 755 /* free the whole chunk starting from vm_pgoff */
864 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 756 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
865 free_pfn_range(paddr, vma_size); 757 free_pfn_range(paddr, vma_size);
866 return; 758 return;
867 } 759 }
868
869 if (size != 0 && size != vma_size) {
870 /* free page by page, using pfn and size */
871 paddr = (resource_size_t)pfn << PAGE_SHIFT;
872 for (i = 0; i < size; i += PAGE_SIZE) {
873 paddr = paddr + i;
874 free_pfn_range(paddr, PAGE_SIZE);
875 }
876 } else {
877 /* free entire vma, page by page, using the pfn from pte */
878 for (i = 0; i < vma_size; i += PAGE_SIZE) {
879 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
880 continue;
881
882 free_pfn_range(paddr, PAGE_SIZE);
883 }
884 }
885} 760}
886 761
887pgprot_t pgprot_writecombine(pgprot_t prot) 762pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5b7c7c8464fe..7aa03a5389f5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -345,7 +345,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
345 fixmaps_set++; 345 fixmaps_set++;
346} 346}
347 347
348void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) 348void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
349 pgprot_t flags)
349{ 350{
350 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); 351 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
351} 352}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 574c8bc95ef0..c7d272b8574c 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -116,6 +116,36 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
116 reserve_early(phys, phys + length, "ACPI SLIT"); 116 reserve_early(phys, phys + length, "ACPI SLIT");
117} 117}
118 118
119/* Callback for Proximity Domain -> x2APIC mapping */
120void __init
121acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
122{
123 int pxm, node;
124 int apic_id;
125
126 if (srat_disabled())
127 return;
128 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
129 bad_srat();
130 return;
131 }
132 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
133 return;
134 pxm = pa->proximity_domain;
135 node = setup_node(pxm);
136 if (node < 0) {
137 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
138 bad_srat();
139 return;
140 }
141
142 apic_id = pa->apic_id;
143 apicid_to_node[apic_id] = node;
144 acpi_numa = 1;
145 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
146 pxm, apic_id, node);
147}
148
119/* Callback for Proximity Domain -> LAPIC mapping */ 149/* Callback for Proximity Domain -> LAPIC mapping */
120void __init 150void __init
121acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) 151acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)