aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-15 12:19:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-15 12:19:38 -0400
commit227423904c709a8e60245c97081bbeb4fb500655 (patch)
tree97db1b8df1e4518334aea2fdf60363e0a691eb1e /arch
parent1aaf2e59135fd67321f47c11c64a54aac27014e9 (diff)
parentfa526d0d641b5365676a1fb821ce359e217c9b85 (diff)
Merge branch 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86, pat: Fix cacheflush address in change_page_attr_set_clr() mm: remove !NUMA condition from PAGEFLAGS_EXTENDED condition set x86: Fix earlyprintk=dbgp for machines without NX x86, pat: Sanity check remap_pfn_range for RAM region x86, pat: Lookup the protection from memtype list on vm_insert_pfn() x86, pat: Add lookup_memtype to get the current memtype of a paddr x86, pat: Use page flags to track memtypes of RAM pages x86, pat: Generalize the use of page flag PG_uncached x86, pat: Add rbtree to do quick lookup in memtype tracking x86, pat: Add PAT reserve free to io_mapping* APIs x86, pat: New i/f for driver to request memtype for IO regions x86, pat: ioremap to follow same PAT restrictions as other PAT users x86, pat: Keep identity maps consistent with mmaps even when pat_disabled x86, mtrr: make mtrr_aps_delayed_init static bool x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT init generic-ipi: Allow cpus not yet online to call smp_call_function with irqs disabled x86: Fix an incorrect argument of reserve_bootmem() x86: Fix system crash when loading with "reservetop" parameter
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/include/asm/cacheflush.h54
-rw-r--r--arch/x86/include/asm/iomap.h9
-rw-r--r--arch/x86/include/asm/mtrr.h6
-rw-r--r--arch/x86/include/asm/pat.h5
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c46
-rw-r--r--arch/x86/kernel/setup.c20
-rw-r--r--arch/x86/kernel/smpboot.c14
-rw-r--r--arch/x86/mm/iomap_32.c27
-rw-r--r--arch/x86/mm/ioremap.c18
-rw-r--r--arch/x86/mm/pageattr.c8
-rw-r--r--arch/x86/mm/pat.c353
-rw-r--r--arch/x86/power/cpu.c2
14 files changed, 444 insertions, 126 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 170042b420d4..e6246119932a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR
112 bool 112 bool
113 select GENERIC_ALLOCATOR 113 select GENERIC_ALLOCATOR
114 114
115config ARCH_USES_PG_UNCACHED
116 def_bool y
117 depends on IA64_UNCACHED_ALLOCATOR
118
115config AUDIT_ARCH 119config AUDIT_ARCH
116 bool 120 bool
117 default y 121 default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 74bf3e30ce75..a800b0faaad6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1417,6 +1417,10 @@ config X86_PAT
1417 1417
1418 If unsure, say Y. 1418 If unsure, say Y.
1419 1419
1420config ARCH_USES_PG_UNCACHED
1421 def_bool y
1422 depends on X86_PAT
1423
1420config EFI 1424config EFI
1421 bool "EFI runtime service support" 1425 bool "EFI runtime service support"
1422 depends on ACPI 1426 depends on ACPI
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e55dfc1ad453..b54f6afe7ec4 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -43,8 +43,58 @@ static inline void copy_from_user_page(struct vm_area_struct *vma,
43 memcpy(dst, src, len); 43 memcpy(dst, src, len);
44} 44}
45 45
46#define PG_non_WB PG_arch_1 46#define PG_WC PG_arch_1
47PAGEFLAG(NonWB, non_WB) 47PAGEFLAG(WC, WC)
48
49#ifdef CONFIG_X86_PAT
50/*
51 * X86 PAT uses page flags WC and Uncached together to keep track of
52 * memory type of pages that have backing page struct. X86 PAT supports 3
53 * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and
54 * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not
55 * been changed from its default (value of -1 used to denote this).
56 * Note we do not support _PAGE_CACHE_UC here.
57 *
58 * Caller must hold memtype_lock for atomicity.
59 */
60static inline unsigned long get_page_memtype(struct page *pg)
61{
62 if (!PageUncached(pg) && !PageWC(pg))
63 return -1;
64 else if (!PageUncached(pg) && PageWC(pg))
65 return _PAGE_CACHE_WC;
66 else if (PageUncached(pg) && !PageWC(pg))
67 return _PAGE_CACHE_UC_MINUS;
68 else
69 return _PAGE_CACHE_WB;
70}
71
72static inline void set_page_memtype(struct page *pg, unsigned long memtype)
73{
74 switch (memtype) {
75 case _PAGE_CACHE_WC:
76 ClearPageUncached(pg);
77 SetPageWC(pg);
78 break;
79 case _PAGE_CACHE_UC_MINUS:
80 SetPageUncached(pg);
81 ClearPageWC(pg);
82 break;
83 case _PAGE_CACHE_WB:
84 SetPageUncached(pg);
85 SetPageWC(pg);
86 break;
87 default:
88 case -1:
89 ClearPageUncached(pg);
90 ClearPageWC(pg);
91 break;
92 }
93}
94#else
95static inline unsigned long get_page_memtype(struct page *pg) { return -1; }
96static inline void set_page_memtype(struct page *pg, unsigned long memtype) { }
97#endif
48 98
49/* 99/*
50 * The set_memory_* API can be used to change various attributes of a virtual 100 * The set_memory_* API can be used to change various attributes of a virtual
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 0e9fe1d9d971..f35eb45d6576 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -26,13 +26,16 @@
26#include <asm/pgtable.h> 26#include <asm/pgtable.h>
27#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
28 28
29int
30is_io_mapping_possible(resource_size_t base, unsigned long size);
31
32void * 29void *
33iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); 30iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
34 31
35void 32void
36iounmap_atomic(void *kvaddr, enum km_type type); 33iounmap_atomic(void *kvaddr, enum km_type type);
37 34
35int
36iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
37
38void
39iomap_free(resource_size_t base, unsigned long size);
40
38#endif /* _ASM_X86_IOMAP_H */ 41#endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index a51ada8467de..4365ffdb461f 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -121,6 +121,9 @@ extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
121extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); 121extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
122extern void mtrr_ap_init(void); 122extern void mtrr_ap_init(void);
123extern void mtrr_bp_init(void); 123extern void mtrr_bp_init(void);
124extern void set_mtrr_aps_delayed_init(void);
125extern void mtrr_aps_init(void);
126extern void mtrr_bp_restore(void);
124extern int mtrr_trim_uncached_memory(unsigned long end_pfn); 127extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
125extern int amd_special_default_mtrr(void); 128extern int amd_special_default_mtrr(void);
126# else 129# else
@@ -161,6 +164,9 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
161 164
162#define mtrr_ap_init() do {} while (0) 165#define mtrr_ap_init() do {} while (0)
163#define mtrr_bp_init() do {} while (0) 166#define mtrr_bp_init() do {} while (0)
167#define set_mtrr_aps_delayed_init() do {} while (0)
168#define mtrr_aps_init() do {} while (0)
169#define mtrr_bp_restore() do {} while (0)
164# endif 170# endif
165 171
166#ifdef CONFIG_COMPAT 172#ifdef CONFIG_COMPAT
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 7af14e512f97..e2c1668dde7a 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -19,4 +19,9 @@ extern int free_memtype(u64 start, u64 end);
19extern int kernel_map_sync_memtype(u64 base, unsigned long size, 19extern int kernel_map_sync_memtype(u64 base, unsigned long size,
20 unsigned long flag); 20 unsigned long flag);
21 21
22int io_reserve_memtype(resource_size_t start, resource_size_t end,
23 unsigned long *type);
24
25void io_free_memtype(resource_size_t start, resource_size_t end);
26
22#endif /* _ASM_X86_PAT_H */ 27#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 7af0f88a4163..84e83de54575 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
58static DEFINE_MUTEX(mtrr_mutex); 58static DEFINE_MUTEX(mtrr_mutex);
59 59
60u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
61static bool mtrr_aps_delayed_init;
61 62
62static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 63static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
63 64
@@ -163,7 +164,10 @@ static void ipi_handler(void *info)
163 if (data->smp_reg != ~0U) { 164 if (data->smp_reg != ~0U) {
164 mtrr_if->set(data->smp_reg, data->smp_base, 165 mtrr_if->set(data->smp_reg, data->smp_base,
165 data->smp_size, data->smp_type); 166 data->smp_size, data->smp_type);
166 } else { 167 } else if (mtrr_aps_delayed_init) {
168 /*
169 * Initialize the MTRRs inaddition to the synchronisation.
170 */
167 mtrr_if->set_all(); 171 mtrr_if->set_all();
168 } 172 }
169 173
@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
265 */ 269 */
266 if (reg != ~0U) 270 if (reg != ~0U)
267 mtrr_if->set(reg, base, size, type); 271 mtrr_if->set(reg, base, size, type);
272 else if (!mtrr_aps_delayed_init)
273 mtrr_if->set_all();
268 274
269 /* Wait for the others */ 275 /* Wait for the others */
270 while (atomic_read(&data.count)) 276 while (atomic_read(&data.count))
@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void)
721 727
722void mtrr_ap_init(void) 728void mtrr_ap_init(void)
723{ 729{
724 unsigned long flags; 730 if (!use_intel() || mtrr_aps_delayed_init)
725
726 if (!mtrr_if || !use_intel())
727 return; 731 return;
728 /* 732 /*
729 * Ideally we should hold mtrr_mutex here to avoid mtrr entries 733 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
@@ -738,11 +742,7 @@ void mtrr_ap_init(void)
738 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug 742 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
739 * lock to prevent mtrr entry changes 743 * lock to prevent mtrr entry changes
740 */ 744 */
741 local_irq_save(flags); 745 set_mtrr(~0U, 0, 0, 0);
742
743 mtrr_if->set_all();
744
745 local_irq_restore(flags);
746} 746}
747 747
748/** 748/**
@@ -753,6 +753,34 @@ void mtrr_save_state(void)
753 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); 753 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
754} 754}
755 755
756void set_mtrr_aps_delayed_init(void)
757{
758 if (!use_intel())
759 return;
760
761 mtrr_aps_delayed_init = true;
762}
763
764/*
765 * MTRR initialization for all AP's
766 */
767void mtrr_aps_init(void)
768{
769 if (!use_intel())
770 return;
771
772 set_mtrr(~0U, 0, 0, 0);
773 mtrr_aps_delayed_init = false;
774}
775
776void mtrr_bp_restore(void)
777{
778 if (!use_intel())
779 return;
780
781 mtrr_if->set_all();
782}
783
756static int __init mtrr_init_finialize(void) 784static int __init mtrr_init_finialize(void)
757{ 785{
758 if (!mtrr_if) 786 if (!mtrr_if)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 61f86f241420..19f15c4076fb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -712,6 +712,21 @@ void __init setup_arch(char **cmdline_p)
712 printk(KERN_INFO "Command line: %s\n", boot_command_line); 712 printk(KERN_INFO "Command line: %s\n", boot_command_line);
713#endif 713#endif
714 714
715 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
716 *cmdline_p = command_line;
717
718#ifdef CONFIG_X86_64
719 /*
720 * Must call this twice: Once just to detect whether hardware doesn't
721 * support NX (so that the early EHCI debug console setup can safely
722 * call set_fixmap(), and then again after parsing early parameters to
723 * honor the respective command line option.
724 */
725 check_efer();
726#endif
727
728 parse_early_param();
729
715 /* VMI may relocate the fixmap; do this before touching ioremap area */ 730 /* VMI may relocate the fixmap; do this before touching ioremap area */
716 vmi_init(); 731 vmi_init();
717 732
@@ -794,11 +809,6 @@ void __init setup_arch(char **cmdline_p)
794#endif 809#endif
795#endif 810#endif
796 811
797 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
798 *cmdline_p = command_line;
799
800 parse_early_param();
801
802#ifdef CONFIG_X86_64 812#ifdef CONFIG_X86_64
803 check_efer(); 813 check_efer();
804#endif 814#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9d8319183aae..a25eeec00080 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1118,9 +1118,22 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1118 1118
1119 if (is_uv_system()) 1119 if (is_uv_system())
1120 uv_system_init(); 1120 uv_system_init();
1121
1122 set_mtrr_aps_delayed_init();
1121out: 1123out:
1122 preempt_enable(); 1124 preempt_enable();
1123} 1125}
1126
1127void arch_enable_nonboot_cpus_begin(void)
1128{
1129 set_mtrr_aps_delayed_init();
1130}
1131
1132void arch_enable_nonboot_cpus_end(void)
1133{
1134 mtrr_aps_init();
1135}
1136
1124/* 1137/*
1125 * Early setup to make printk work. 1138 * Early setup to make printk work.
1126 */ 1139 */
@@ -1142,6 +1155,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1142 setup_ioapic_dest(); 1155 setup_ioapic_dest();
1143#endif 1156#endif
1144 check_nmi_watchdog(); 1157 check_nmi_watchdog();
1158 mtrr_aps_init();
1145} 1159}
1146 1160
1147static int __initdata setup_possible_cpus = -1; 1161static int __initdata setup_possible_cpus = -1;
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index fe6f84ca121e..84e236ce76ba 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -21,7 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23 23
24int is_io_mapping_possible(resource_size_t base, unsigned long size) 24static int is_io_mapping_possible(resource_size_t base, unsigned long size)
25{ 25{
26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) 26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
27 /* There is no way to map greater than 1 << 32 address without PAE */ 27 /* There is no way to map greater than 1 << 32 address without PAE */
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
30#endif 30#endif
31 return 1; 31 return 1;
32} 32}
33EXPORT_SYMBOL_GPL(is_io_mapping_possible); 33
34int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
35{
36 unsigned long flag = _PAGE_CACHE_WC;
37 int ret;
38
39 if (!is_io_mapping_possible(base, size))
40 return -EINVAL;
41
42 ret = io_reserve_memtype(base, base + size, &flag);
43 if (ret)
44 return ret;
45
46 *prot = __pgprot(__PAGE_KERNEL | flag);
47 return 0;
48}
49EXPORT_SYMBOL_GPL(iomap_create_wc);
50
51void
52iomap_free(resource_size_t base, unsigned long size)
53{
54 io_free_memtype(base, base + size);
55}
56EXPORT_SYMBOL_GPL(iomap_free);
34 57
35void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) 58void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
36{ 59{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 04e1ad60c63a..334e63ca7b2b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
158 retval = reserve_memtype(phys_addr, (u64)phys_addr + size, 158 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
159 prot_val, &new_prot_val); 159 prot_val, &new_prot_val);
160 if (retval) { 160 if (retval) {
161 pr_debug("Warning: reserve_memtype returned %d\n", retval); 161 printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
162 return NULL; 162 return NULL;
163 } 163 }
164 164
165 if (prot_val != new_prot_val) { 165 if (prot_val != new_prot_val) {
166 /* 166 if (!is_new_memtype_allowed(phys_addr, size,
167 * Do not fallback to certain memory types with certain 167 prot_val, new_prot_val)) {
168 * requested type: 168 printk(KERN_ERR
169 * - request is uc-, return cannot be write-back
170 * - request is uc-, return cannot be write-combine
171 * - request is write-combine, return cannot be write-back
172 */
173 if ((prot_val == _PAGE_CACHE_UC_MINUS &&
174 (new_prot_val == _PAGE_CACHE_WB ||
175 new_prot_val == _PAGE_CACHE_WC)) ||
176 (prot_val == _PAGE_CACHE_WC &&
177 new_prot_val == _PAGE_CACHE_WB)) {
178 pr_debug(
179 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", 169 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
180 (unsigned long long)phys_addr, 170 (unsigned long long)phys_addr,
181 (unsigned long long)(phys_addr + size), 171 (unsigned long long)(phys_addr + size),
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7e600c1962db..e245775ec856 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -822,6 +822,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
822{ 822{
823 struct cpa_data cpa; 823 struct cpa_data cpa;
824 int ret, cache, checkalias; 824 int ret, cache, checkalias;
825 unsigned long baddr = 0;
825 826
826 /* 827 /*
827 * Check, if we are requested to change a not supported 828 * Check, if we are requested to change a not supported
@@ -853,6 +854,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
853 */ 854 */
854 WARN_ON_ONCE(1); 855 WARN_ON_ONCE(1);
855 } 856 }
857 /*
858 * Save address for cache flush. *addr is modified in the call
859 * to __change_page_attr_set_clr() below.
860 */
861 baddr = *addr;
856 } 862 }
857 863
858 /* Must avoid aliasing mappings in the highmem code */ 864 /* Must avoid aliasing mappings in the highmem code */
@@ -900,7 +906,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
900 cpa_flush_array(addr, numpages, cache, 906 cpa_flush_array(addr, numpages, cache,
901 cpa.flags, pages); 907 cpa.flags, pages);
902 } else 908 } else
903 cpa_flush_range(*addr, numpages, cache); 909 cpa_flush_range(baddr, numpages, cache);
904 } else 910 } else
905 cpa_flush_all(cache); 911 cpa_flush_all(cache);
906 912
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index b2f7d3e59b86..d7ebc3a10f2f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -15,6 +15,7 @@
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/rbtree.h>
18 19
19#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
20#include <asm/processor.h> 21#include <asm/processor.h>
@@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags)
148 * areas). All the aliases have the same cache attributes of course. 149 * areas). All the aliases have the same cache attributes of course.
149 * Zero attributes are represented as holes. 150 * Zero attributes are represented as holes.
150 * 151 *
151 * Currently the data structure is a list because the number of mappings 152 * The data structure is a list that is also organized as an rbtree
152 * are expected to be relatively small. If this should be a problem 153 * sorted on the start address of memtype range.
153 * it could be changed to a rbtree or similar.
154 * 154 *
155 * memtype_lock protects the whole list. 155 * memtype_lock protects both the linear list and rbtree.
156 */ 156 */
157 157
158struct memtype { 158struct memtype {
@@ -160,11 +160,53 @@ struct memtype {
160 u64 end; 160 u64 end;
161 unsigned long type; 161 unsigned long type;
162 struct list_head nd; 162 struct list_head nd;
163 struct rb_node rb;
163}; 164};
164 165
166static struct rb_root memtype_rbroot = RB_ROOT;
165static LIST_HEAD(memtype_list); 167static LIST_HEAD(memtype_list);
166static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ 168static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
167 169
170static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
171{
172 struct rb_node *node = root->rb_node;
173 struct memtype *last_lower = NULL;
174
175 while (node) {
176 struct memtype *data = container_of(node, struct memtype, rb);
177
178 if (data->start < start) {
179 last_lower = data;
180 node = node->rb_right;
181 } else if (data->start > start) {
182 node = node->rb_left;
183 } else
184 return data;
185 }
186
187 /* Will return NULL if there is no entry with its start <= start */
188 return last_lower;
189}
190
191static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
192{
193 struct rb_node **new = &(root->rb_node);
194 struct rb_node *parent = NULL;
195
196 while (*new) {
197 struct memtype *this = container_of(*new, struct memtype, rb);
198
199 parent = *new;
200 if (data->start <= this->start)
201 new = &((*new)->rb_left);
202 else if (data->start > this->start)
203 new = &((*new)->rb_right);
204 }
205
206 rb_link_node(&data->rb, parent, new);
207 rb_insert_color(&data->rb, root);
208}
209
168/* 210/*
169 * Does intersection of PAT memory type and MTRR memory type and returns 211 * Does intersection of PAT memory type and MTRR memory type and returns
170 * the resulting memory type as PAT understands it. 212 * the resulting memory type as PAT understands it.
@@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
218 return -EBUSY; 260 return -EBUSY;
219} 261}
220 262
221static struct memtype *cached_entry;
222static u64 cached_start;
223
224static int pat_pagerange_is_ram(unsigned long start, unsigned long end) 263static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
225{ 264{
226 int ram_page = 0, not_rampage = 0; 265 int ram_page = 0, not_rampage = 0;
@@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
249} 288}
250 289
251/* 290/*
252 * For RAM pages, mark the pages as non WB memory type using 291 * For RAM pages, we use page flags to mark the pages with appropriate type.
253 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 292 * Here we do two pass:
254 * set_memory_wc() on a RAM page at a time before marking it as WB again. 293 * - Find the memtype of all the pages in the range, look for any conflicts
255 * This is ok, because only one driver will be owning the page and 294 * - In case of no conflicts, set the new memtype for pages in the range
256 * doing set_memory_*() calls.
257 * 295 *
258 * For now, we use PageNonWB to track that the RAM page is being mapped 296 * Caller must hold memtype_lock for atomicity.
259 * as non WB. In future, we will have to use one more flag
260 * (or some other mechanism in page_struct) to distinguish between
261 * UC and WC mapping.
262 */ 297 */
263static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, 298static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
264 unsigned long *new_type) 299 unsigned long *new_type)
265{ 300{
266 struct page *page; 301 struct page *page;
267 u64 pfn, end_pfn; 302 u64 pfn;
303
304 if (req_type == _PAGE_CACHE_UC) {
305 /* We do not support strong UC */
306 WARN_ON_ONCE(1);
307 req_type = _PAGE_CACHE_UC_MINUS;
308 }
268 309
269 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 310 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
270 page = pfn_to_page(pfn); 311 unsigned long type;
271 if (page_mapped(page) || PageNonWB(page))
272 goto out;
273 312
274 SetPageNonWB(page); 313 page = pfn_to_page(pfn);
314 type = get_page_memtype(page);
315 if (type != -1) {
316 printk(KERN_INFO "reserve_ram_pages_type failed "
317 "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
318 start, end, type, req_type);
319 if (new_type)
320 *new_type = type;
321
322 return -EBUSY;
323 }
275 } 324 }
276 return 0;
277 325
278out: 326 if (new_type)
279 end_pfn = pfn; 327 *new_type = req_type;
280 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { 328
329 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
281 page = pfn_to_page(pfn); 330 page = pfn_to_page(pfn);
282 ClearPageNonWB(page); 331 set_page_memtype(page, req_type);
283 } 332 }
284 333 return 0;
285 return -EINVAL;
286} 334}
287 335
288static int free_ram_pages_type(u64 start, u64 end) 336static int free_ram_pages_type(u64 start, u64 end)
289{ 337{
290 struct page *page; 338 struct page *page;
291 u64 pfn, end_pfn; 339 u64 pfn;
292 340
293 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 341 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
294 page = pfn_to_page(pfn); 342 page = pfn_to_page(pfn);
295 if (page_mapped(page) || !PageNonWB(page)) 343 set_page_memtype(page, -1);
296 goto out;
297
298 ClearPageNonWB(page);
299 } 344 }
300 return 0; 345 return 0;
301
302out:
303 end_pfn = pfn;
304 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
305 page = pfn_to_page(pfn);
306 SetPageNonWB(page);
307 }
308 return -EINVAL;
309} 346}
310 347
311/* 348/*
@@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
339 if (new_type) { 376 if (new_type) {
340 if (req_type == -1) 377 if (req_type == -1)
341 *new_type = _PAGE_CACHE_WB; 378 *new_type = _PAGE_CACHE_WB;
379 else if (req_type == _PAGE_CACHE_WC)
380 *new_type = _PAGE_CACHE_UC_MINUS;
342 else 381 else
343 *new_type = req_type & _PAGE_CACHE_MASK; 382 *new_type = req_type & _PAGE_CACHE_MASK;
344 } 383 }
@@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
364 *new_type = actual_type; 403 *new_type = actual_type;
365 404
366 is_range_ram = pat_pagerange_is_ram(start, end); 405 is_range_ram = pat_pagerange_is_ram(start, end);
367 if (is_range_ram == 1) 406 if (is_range_ram == 1) {
368 return reserve_ram_pages_type(start, end, req_type, 407
369 new_type); 408 spin_lock(&memtype_lock);
370 else if (is_range_ram < 0) 409 err = reserve_ram_pages_type(start, end, req_type, new_type);
410 spin_unlock(&memtype_lock);
411
412 return err;
413 } else if (is_range_ram < 0) {
371 return -EINVAL; 414 return -EINVAL;
415 }
372 416
373 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 417 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
374 if (!new) 418 if (!new)
@@ -380,17 +424,19 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
380 424
381 spin_lock(&memtype_lock); 425 spin_lock(&memtype_lock);
382 426
383 if (cached_entry && start >= cached_start) 427 entry = memtype_rb_search(&memtype_rbroot, new->start);
384 entry = cached_entry; 428 if (likely(entry != NULL)) {
385 else 429 /* To work correctly with list_for_each_entry_continue */
430 entry = list_entry(entry->nd.prev, struct memtype, nd);
431 } else {
386 entry = list_entry(&memtype_list, struct memtype, nd); 432 entry = list_entry(&memtype_list, struct memtype, nd);
433 }
387 434
388 /* Search for existing mapping that overlaps the current range */ 435 /* Search for existing mapping that overlaps the current range */
389 where = NULL; 436 where = NULL;
390 list_for_each_entry_continue(entry, &memtype_list, nd) { 437 list_for_each_entry_continue(entry, &memtype_list, nd) {
391 if (end <= entry->start) { 438 if (end <= entry->start) {
392 where = entry->nd.prev; 439 where = entry->nd.prev;
393 cached_entry = list_entry(where, struct memtype, nd);
394 break; 440 break;
395 } else if (start <= entry->start) { /* end > entry->start */ 441 } else if (start <= entry->start) { /* end > entry->start */
396 err = chk_conflict(new, entry, new_type); 442 err = chk_conflict(new, entry, new_type);
@@ -398,8 +444,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
398 dprintk("Overlap at 0x%Lx-0x%Lx\n", 444 dprintk("Overlap at 0x%Lx-0x%Lx\n",
399 entry->start, entry->end); 445 entry->start, entry->end);
400 where = entry->nd.prev; 446 where = entry->nd.prev;
401 cached_entry = list_entry(where,
402 struct memtype, nd);
403 } 447 }
404 break; 448 break;
405 } else if (start < entry->end) { /* start > entry->start */ 449 } else if (start < entry->end) { /* start > entry->start */
@@ -407,8 +451,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
407 if (!err) { 451 if (!err) {
408 dprintk("Overlap at 0x%Lx-0x%Lx\n", 452 dprintk("Overlap at 0x%Lx-0x%Lx\n",
409 entry->start, entry->end); 453 entry->start, entry->end);
410 cached_entry = list_entry(entry->nd.prev,
411 struct memtype, nd);
412 454
413 /* 455 /*
414 * Move to right position in the linked 456 * Move to right position in the linked
@@ -436,13 +478,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
436 return err; 478 return err;
437 } 479 }
438 480
439 cached_start = start;
440
441 if (where) 481 if (where)
442 list_add(&new->nd, where); 482 list_add(&new->nd, where);
443 else 483 else
444 list_add_tail(&new->nd, &memtype_list); 484 list_add_tail(&new->nd, &memtype_list);
445 485
486 memtype_rb_insert(&memtype_rbroot, new);
487
446 spin_unlock(&memtype_lock); 488 spin_unlock(&memtype_lock);
447 489
448 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", 490 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -454,7 +496,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
454 496
455int free_memtype(u64 start, u64 end) 497int free_memtype(u64 start, u64 end)
456{ 498{
457 struct memtype *entry; 499 struct memtype *entry, *saved_entry;
458 int err = -EINVAL; 500 int err = -EINVAL;
459 int is_range_ram; 501 int is_range_ram;
460 502
@@ -466,23 +508,58 @@ int free_memtype(u64 start, u64 end)
466 return 0; 508 return 0;
467 509
468 is_range_ram = pat_pagerange_is_ram(start, end); 510 is_range_ram = pat_pagerange_is_ram(start, end);
469 if (is_range_ram == 1) 511 if (is_range_ram == 1) {
470 return free_ram_pages_type(start, end); 512
471 else if (is_range_ram < 0) 513 spin_lock(&memtype_lock);
514 err = free_ram_pages_type(start, end);
515 spin_unlock(&memtype_lock);
516
517 return err;
518 } else if (is_range_ram < 0) {
472 return -EINVAL; 519 return -EINVAL;
520 }
473 521
474 spin_lock(&memtype_lock); 522 spin_lock(&memtype_lock);
523
524 entry = memtype_rb_search(&memtype_rbroot, start);
525 if (unlikely(entry == NULL))
526 goto unlock_ret;
527
528 /*
529 * Saved entry points to an entry with start same or less than what
530 * we searched for. Now go through the list in both directions to look
531 * for the entry that matches with both start and end, with list stored
532 * in sorted start address
533 */
534 saved_entry = entry;
475 list_for_each_entry(entry, &memtype_list, nd) { 535 list_for_each_entry(entry, &memtype_list, nd) {
476 if (entry->start == start && entry->end == end) { 536 if (entry->start == start && entry->end == end) {
477 if (cached_entry == entry || cached_start == start) 537 rb_erase(&entry->rb, &memtype_rbroot);
478 cached_entry = NULL; 538 list_del(&entry->nd);
539 kfree(entry);
540 err = 0;
541 break;
542 } else if (entry->start > start) {
543 break;
544 }
545 }
546
547 if (!err)
548 goto unlock_ret;
479 549
550 entry = saved_entry;
551 list_for_each_entry_reverse(entry, &memtype_list, nd) {
552 if (entry->start == start && entry->end == end) {
553 rb_erase(&entry->rb, &memtype_rbroot);
480 list_del(&entry->nd); 554 list_del(&entry->nd);
481 kfree(entry); 555 kfree(entry);
482 err = 0; 556 err = 0;
483 break; 557 break;
558 } else if (entry->start < start) {
559 break;
484 } 560 }
485 } 561 }
562unlock_ret:
486 spin_unlock(&memtype_lock); 563 spin_unlock(&memtype_lock);
487 564
488 if (err) { 565 if (err) {
@@ -496,6 +573,101 @@ int free_memtype(u64 start, u64 end)
496} 573}
497 574
498 575
576/**
577 * lookup_memtype - Looksup the memory type for a physical address
578 * @paddr: physical address of which memory type needs to be looked up
579 *
580 * Only to be called when PAT is enabled
581 *
582 * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
583 * _PAGE_CACHE_UC
584 */
585static unsigned long lookup_memtype(u64 paddr)
586{
587 int rettype = _PAGE_CACHE_WB;
588 struct memtype *entry;
589
590 if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
591 return rettype;
592
593 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
594 struct page *page;
595 spin_lock(&memtype_lock);
596 page = pfn_to_page(paddr >> PAGE_SHIFT);
597 rettype = get_page_memtype(page);
598 spin_unlock(&memtype_lock);
599 /*
600 * -1 from get_page_memtype() implies RAM page is in its
601 * default state and not reserved, and hence of type WB
602 */
603 if (rettype == -1)
604 rettype = _PAGE_CACHE_WB;
605
606 return rettype;
607 }
608
609 spin_lock(&memtype_lock);
610
611 entry = memtype_rb_search(&memtype_rbroot, paddr);
612 if (entry != NULL)
613 rettype = entry->type;
614 else
615 rettype = _PAGE_CACHE_UC_MINUS;
616
617 spin_unlock(&memtype_lock);
618 return rettype;
619}
620
621/**
622 * io_reserve_memtype - Request a memory type mapping for a region of memory
623 * @start: start (physical address) of the region
624 * @end: end (physical address) of the region
625 * @type: A pointer to memtype, with requested type. On success, requested
626 * or any other compatible type that was available for the region is returned
627 *
628 * On success, returns 0
629 * On failure, returns non-zero
630 */
631int io_reserve_memtype(resource_size_t start, resource_size_t end,
632 unsigned long *type)
633{
634 resource_size_t size = end - start;
635 unsigned long req_type = *type;
636 unsigned long new_type;
637 int ret;
638
639 WARN_ON_ONCE(iomem_map_sanity_check(start, size));
640
641 ret = reserve_memtype(start, end, req_type, &new_type);
642 if (ret)
643 goto out_err;
644
645 if (!is_new_memtype_allowed(start, size, req_type, new_type))
646 goto out_free;
647
648 if (kernel_map_sync_memtype(start, size, new_type) < 0)
649 goto out_free;
650
651 *type = new_type;
652 return 0;
653
654out_free:
655 free_memtype(start, end);
656 ret = -EBUSY;
657out_err:
658 return ret;
659}
660
661/**
662 * io_free_memtype - Release a memory type mapping for a region of memory
663 * @start: start (physical address) of the region
664 * @end: end (physical address) of the region
665 */
666void io_free_memtype(resource_size_t start, resource_size_t end)
667{
668 free_memtype(start, end);
669}
670
499pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 671pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
500 unsigned long size, pgprot_t vma_prot) 672 unsigned long size, pgprot_t vma_prot)
501{ 673{
@@ -577,7 +749,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
577{ 749{
578 unsigned long id_sz; 750 unsigned long id_sz;
579 751
580 if (!pat_enabled || base >= __pa(high_memory)) 752 if (base >= __pa(high_memory))
581 return 0; 753 return 0;
582 754
583 id_sz = (__pa(high_memory) < base + size) ? 755 id_sz = (__pa(high_memory) < base + size) ?
@@ -612,11 +784,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
612 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 784 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
613 785
614 /* 786 /*
615 * reserve_pfn_range() doesn't support RAM pages. Maintain the current 787 * reserve_pfn_range() for RAM pages. We do not refcount to keep
616 * behavior with RAM pages by returning success. 788 * track of number of mappings of RAM pages. We can assert that
789 * the type requested matches the type of first page in the range.
617 */ 790 */
618 if (is_ram != 0) 791 if (is_ram) {
792 if (!pat_enabled)
793 return 0;
794
795 flags = lookup_memtype(paddr);
796 if (want_flags != flags) {
797 printk(KERN_WARNING
798 "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
799 current->comm, current->pid,
800 cattr_name(want_flags),
801 (unsigned long long)paddr,
802 (unsigned long long)(paddr + size),
803 cattr_name(flags));
804 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
805 (~_PAGE_CACHE_MASK)) |
806 flags);
807 }
619 return 0; 808 return 0;
809 }
620 810
621 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 811 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
622 if (ret) 812 if (ret)
@@ -678,14 +868,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
678 unsigned long vma_size = vma->vm_end - vma->vm_start; 868 unsigned long vma_size = vma->vm_end - vma->vm_start;
679 pgprot_t pgprot; 869 pgprot_t pgprot;
680 870
681 if (!pat_enabled)
682 return 0;
683
684 /*
685 * For now, only handle remap_pfn_range() vmas where
686 * is_linear_pfn_mapping() == TRUE. Handling of
687 * vm_insert_pfn() is TBD.
688 */
689 if (is_linear_pfn_mapping(vma)) { 871 if (is_linear_pfn_mapping(vma)) {
690 /* 872 /*
691 * reserve the whole chunk covered by vma. We need the 873 * reserve the whole chunk covered by vma. We need the
@@ -713,23 +895,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
713int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 895int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
714 unsigned long pfn, unsigned long size) 896 unsigned long pfn, unsigned long size)
715{ 897{
898 unsigned long flags;
716 resource_size_t paddr; 899 resource_size_t paddr;
717 unsigned long vma_size = vma->vm_end - vma->vm_start; 900 unsigned long vma_size = vma->vm_end - vma->vm_start;
718 901
719 if (!pat_enabled)
720 return 0;
721
722 /*
723 * For now, only handle remap_pfn_range() vmas where
724 * is_linear_pfn_mapping() == TRUE. Handling of
725 * vm_insert_pfn() is TBD.
726 */
727 if (is_linear_pfn_mapping(vma)) { 902 if (is_linear_pfn_mapping(vma)) {
728 /* reserve the whole chunk starting from vm_pgoff */ 903 /* reserve the whole chunk starting from vm_pgoff */
729 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 904 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
730 return reserve_pfn_range(paddr, vma_size, prot, 0); 905 return reserve_pfn_range(paddr, vma_size, prot, 0);
731 } 906 }
732 907
908 if (!pat_enabled)
909 return 0;
910
911 /* for vm_insert_pfn and friends, we set prot based on lookup */
912 flags = lookup_memtype(pfn << PAGE_SHIFT);
913 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
914 flags);
915
733 return 0; 916 return 0;
734} 917}
735 918
@@ -744,14 +927,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
744 resource_size_t paddr; 927 resource_size_t paddr;
745 unsigned long vma_size = vma->vm_end - vma->vm_start; 928 unsigned long vma_size = vma->vm_end - vma->vm_start;
746 929
747 if (!pat_enabled)
748 return;
749
750 /*
751 * For now, only handle remap_pfn_range() vmas where
752 * is_linear_pfn_mapping() == TRUE. Handling of
753 * vm_insert_pfn() is TBD.
754 */
755 if (is_linear_pfn_mapping(vma)) { 930 if (is_linear_pfn_mapping(vma)) {
756 /* free the whole chunk starting from vm_pgoff */ 931 /* free the whole chunk starting from vm_pgoff */
757 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 932 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index b3d20b9cac63..417c9f5b4afa 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -242,7 +242,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
242 fix_processor_context(); 242 fix_processor_context();
243 243
244 do_fpu_end(); 244 do_fpu_end();
245 mtrr_ap_init(); 245 mtrr_bp_restore();
246 246
247#ifdef CONFIG_X86_OLD_MCE 247#ifdef CONFIG_X86_OLD_MCE
248 mcheck_init(&boot_cpu_data); 248 mcheck_init(&boot_cpu_data);