diff options
-rw-r--r-- | arch/ia64/Kconfig | 4 | ||||
-rw-r--r-- | arch/x86/Kconfig | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/cacheflush.h | 54 | ||||
-rw-r--r-- | arch/x86/include/asm/iomap.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/mtrr.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/pat.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 46 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 20 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 353 | ||||
-rw-r--r-- | arch/x86/power/cpu.c | 2 | ||||
-rw-r--r-- | include/linux/io-mapping.h | 17 | ||||
-rw-r--r-- | include/linux/page-flags.h | 4 | ||||
-rw-r--r-- | kernel/cpu.c | 14 | ||||
-rw-r--r-- | kernel/smp.c | 40 | ||||
-rw-r--r-- | mm/Kconfig | 2 |
19 files changed, 507 insertions, 140 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 170042b420d4..e6246119932a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR | |||
112 | bool | 112 | bool |
113 | select GENERIC_ALLOCATOR | 113 | select GENERIC_ALLOCATOR |
114 | 114 | ||
115 | config ARCH_USES_PG_UNCACHED | ||
116 | def_bool y | ||
117 | depends on IA64_UNCACHED_ALLOCATOR | ||
118 | |||
115 | config AUDIT_ARCH | 119 | config AUDIT_ARCH |
116 | bool | 120 | bool |
117 | default y | 121 | default y |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 74bf3e30ce75..a800b0faaad6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1417,6 +1417,10 @@ config X86_PAT | |||
1417 | 1417 | ||
1418 | If unsure, say Y. | 1418 | If unsure, say Y. |
1419 | 1419 | ||
1420 | config ARCH_USES_PG_UNCACHED | ||
1421 | def_bool y | ||
1422 | depends on X86_PAT | ||
1423 | |||
1420 | config EFI | 1424 | config EFI |
1421 | bool "EFI runtime service support" | 1425 | bool "EFI runtime service support" |
1422 | depends on ACPI | 1426 | depends on ACPI |
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index e55dfc1ad453..b54f6afe7ec4 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -43,8 +43,58 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, | |||
43 | memcpy(dst, src, len); | 43 | memcpy(dst, src, len); |
44 | } | 44 | } |
45 | 45 | ||
46 | #define PG_non_WB PG_arch_1 | 46 | #define PG_WC PG_arch_1 |
47 | PAGEFLAG(NonWB, non_WB) | 47 | PAGEFLAG(WC, WC) |
48 | |||
49 | #ifdef CONFIG_X86_PAT | ||
50 | /* | ||
51 | * X86 PAT uses page flags WC and Uncached together to keep track of | ||
52 | * memory type of pages that have backing page struct. X86 PAT supports 3 | ||
53 | * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and | ||
54 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not | ||
55 | * been changed from its default (value of -1 used to denote this). | ||
56 | * Note we do not support _PAGE_CACHE_UC here. | ||
57 | * | ||
58 | * Caller must hold memtype_lock for atomicity. | ||
59 | */ | ||
60 | static inline unsigned long get_page_memtype(struct page *pg) | ||
61 | { | ||
62 | if (!PageUncached(pg) && !PageWC(pg)) | ||
63 | return -1; | ||
64 | else if (!PageUncached(pg) && PageWC(pg)) | ||
65 | return _PAGE_CACHE_WC; | ||
66 | else if (PageUncached(pg) && !PageWC(pg)) | ||
67 | return _PAGE_CACHE_UC_MINUS; | ||
68 | else | ||
69 | return _PAGE_CACHE_WB; | ||
70 | } | ||
71 | |||
72 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) | ||
73 | { | ||
74 | switch (memtype) { | ||
75 | case _PAGE_CACHE_WC: | ||
76 | ClearPageUncached(pg); | ||
77 | SetPageWC(pg); | ||
78 | break; | ||
79 | case _PAGE_CACHE_UC_MINUS: | ||
80 | SetPageUncached(pg); | ||
81 | ClearPageWC(pg); | ||
82 | break; | ||
83 | case _PAGE_CACHE_WB: | ||
84 | SetPageUncached(pg); | ||
85 | SetPageWC(pg); | ||
86 | break; | ||
87 | default: | ||
88 | case -1: | ||
89 | ClearPageUncached(pg); | ||
90 | ClearPageWC(pg); | ||
91 | break; | ||
92 | } | ||
93 | } | ||
94 | #else | ||
95 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } | ||
96 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) { } | ||
97 | #endif | ||
48 | 98 | ||
49 | /* | 99 | /* |
50 | * The set_memory_* API can be used to change various attributes of a virtual | 100 | * The set_memory_* API can be used to change various attributes of a virtual |
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 0e9fe1d9d971..f35eb45d6576 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h | |||
@@ -26,13 +26,16 @@ | |||
26 | #include <asm/pgtable.h> | 26 | #include <asm/pgtable.h> |
27 | #include <asm/tlbflush.h> | 27 | #include <asm/tlbflush.h> |
28 | 28 | ||
29 | int | ||
30 | is_io_mapping_possible(resource_size_t base, unsigned long size); | ||
31 | |||
32 | void * | 29 | void * |
33 | iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); | 30 | iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); |
34 | 31 | ||
35 | void | 32 | void |
36 | iounmap_atomic(void *kvaddr, enum km_type type); | 33 | iounmap_atomic(void *kvaddr, enum km_type type); |
37 | 34 | ||
35 | int | ||
36 | iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); | ||
37 | |||
38 | void | ||
39 | iomap_free(resource_size_t base, unsigned long size); | ||
40 | |||
38 | #endif /* _ASM_X86_IOMAP_H */ | 41 | #endif /* _ASM_X86_IOMAP_H */ |
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index a51ada8467de..4365ffdb461f 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h | |||
@@ -121,6 +121,9 @@ extern int mtrr_del_page(int reg, unsigned long base, unsigned long size); | |||
121 | extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); | 121 | extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); |
122 | extern void mtrr_ap_init(void); | 122 | extern void mtrr_ap_init(void); |
123 | extern void mtrr_bp_init(void); | 123 | extern void mtrr_bp_init(void); |
124 | extern void set_mtrr_aps_delayed_init(void); | ||
125 | extern void mtrr_aps_init(void); | ||
126 | extern void mtrr_bp_restore(void); | ||
124 | extern int mtrr_trim_uncached_memory(unsigned long end_pfn); | 127 | extern int mtrr_trim_uncached_memory(unsigned long end_pfn); |
125 | extern int amd_special_default_mtrr(void); | 128 | extern int amd_special_default_mtrr(void); |
126 | # else | 129 | # else |
@@ -161,6 +164,9 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) | |||
161 | 164 | ||
162 | #define mtrr_ap_init() do {} while (0) | 165 | #define mtrr_ap_init() do {} while (0) |
163 | #define mtrr_bp_init() do {} while (0) | 166 | #define mtrr_bp_init() do {} while (0) |
167 | #define set_mtrr_aps_delayed_init() do {} while (0) | ||
168 | #define mtrr_aps_init() do {} while (0) | ||
169 | #define mtrr_bp_restore() do {} while (0) | ||
164 | # endif | 170 | # endif |
165 | 171 | ||
166 | #ifdef CONFIG_COMPAT | 172 | #ifdef CONFIG_COMPAT |
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index 7af14e512f97..e2c1668dde7a 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h | |||
@@ -19,4 +19,9 @@ extern int free_memtype(u64 start, u64 end); | |||
19 | extern int kernel_map_sync_memtype(u64 base, unsigned long size, | 19 | extern int kernel_map_sync_memtype(u64 base, unsigned long size, |
20 | unsigned long flag); | 20 | unsigned long flag); |
21 | 21 | ||
22 | int io_reserve_memtype(resource_size_t start, resource_size_t end, | ||
23 | unsigned long *type); | ||
24 | |||
25 | void io_free_memtype(resource_size_t start, resource_size_t end); | ||
26 | |||
22 | #endif /* _ASM_X86_PAT_H */ | 27 | #endif /* _ASM_X86_PAT_H */ |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 7af0f88a4163..84e83de54575 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; | |||
58 | static DEFINE_MUTEX(mtrr_mutex); | 58 | static DEFINE_MUTEX(mtrr_mutex); |
59 | 59 | ||
60 | u64 size_or_mask, size_and_mask; | 60 | u64 size_or_mask, size_and_mask; |
61 | static bool mtrr_aps_delayed_init; | ||
61 | 62 | ||
62 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 63 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
63 | 64 | ||
@@ -163,7 +164,10 @@ static void ipi_handler(void *info) | |||
163 | if (data->smp_reg != ~0U) { | 164 | if (data->smp_reg != ~0U) { |
164 | mtrr_if->set(data->smp_reg, data->smp_base, | 165 | mtrr_if->set(data->smp_reg, data->smp_base, |
165 | data->smp_size, data->smp_type); | 166 | data->smp_size, data->smp_type); |
166 | } else { | 167 | } else if (mtrr_aps_delayed_init) { |
168 | /* | ||
169 | * Initialize the MTRRs inaddition to the synchronisation. | ||
170 | */ | ||
167 | mtrr_if->set_all(); | 171 | mtrr_if->set_all(); |
168 | } | 172 | } |
169 | 173 | ||
@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
265 | */ | 269 | */ |
266 | if (reg != ~0U) | 270 | if (reg != ~0U) |
267 | mtrr_if->set(reg, base, size, type); | 271 | mtrr_if->set(reg, base, size, type); |
272 | else if (!mtrr_aps_delayed_init) | ||
273 | mtrr_if->set_all(); | ||
268 | 274 | ||
269 | /* Wait for the others */ | 275 | /* Wait for the others */ |
270 | while (atomic_read(&data.count)) | 276 | while (atomic_read(&data.count)) |
@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void) | |||
721 | 727 | ||
722 | void mtrr_ap_init(void) | 728 | void mtrr_ap_init(void) |
723 | { | 729 | { |
724 | unsigned long flags; | 730 | if (!use_intel() || mtrr_aps_delayed_init) |
725 | |||
726 | if (!mtrr_if || !use_intel()) | ||
727 | return; | 731 | return; |
728 | /* | 732 | /* |
729 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries | 733 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries |
@@ -738,11 +742,7 @@ void mtrr_ap_init(void) | |||
738 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug | 742 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug |
739 | * lock to prevent mtrr entry changes | 743 | * lock to prevent mtrr entry changes |
740 | */ | 744 | */ |
741 | local_irq_save(flags); | 745 | set_mtrr(~0U, 0, 0, 0); |
742 | |||
743 | mtrr_if->set_all(); | ||
744 | |||
745 | local_irq_restore(flags); | ||
746 | } | 746 | } |
747 | 747 | ||
748 | /** | 748 | /** |
@@ -753,6 +753,34 @@ void mtrr_save_state(void) | |||
753 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); | 753 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); |
754 | } | 754 | } |
755 | 755 | ||
756 | void set_mtrr_aps_delayed_init(void) | ||
757 | { | ||
758 | if (!use_intel()) | ||
759 | return; | ||
760 | |||
761 | mtrr_aps_delayed_init = true; | ||
762 | } | ||
763 | |||
764 | /* | ||
765 | * MTRR initialization for all AP's | ||
766 | */ | ||
767 | void mtrr_aps_init(void) | ||
768 | { | ||
769 | if (!use_intel()) | ||
770 | return; | ||
771 | |||
772 | set_mtrr(~0U, 0, 0, 0); | ||
773 | mtrr_aps_delayed_init = false; | ||
774 | } | ||
775 | |||
776 | void mtrr_bp_restore(void) | ||
777 | { | ||
778 | if (!use_intel()) | ||
779 | return; | ||
780 | |||
781 | mtrr_if->set_all(); | ||
782 | } | ||
783 | |||
756 | static int __init mtrr_init_finialize(void) | 784 | static int __init mtrr_init_finialize(void) |
757 | { | 785 | { |
758 | if (!mtrr_if) | 786 | if (!mtrr_if) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 61f86f241420..19f15c4076fb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -712,6 +712,21 @@ void __init setup_arch(char **cmdline_p) | |||
712 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 712 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
713 | #endif | 713 | #endif |
714 | 714 | ||
715 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | ||
716 | *cmdline_p = command_line; | ||
717 | |||
718 | #ifdef CONFIG_X86_64 | ||
719 | /* | ||
720 | * Must call this twice: Once just to detect whether hardware doesn't | ||
721 | * support NX (so that the early EHCI debug console setup can safely | ||
722 | * call set_fixmap(), and then again after parsing early parameters to | ||
723 | * honor the respective command line option. | ||
724 | */ | ||
725 | check_efer(); | ||
726 | #endif | ||
727 | |||
728 | parse_early_param(); | ||
729 | |||
715 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 730 | /* VMI may relocate the fixmap; do this before touching ioremap area */ |
716 | vmi_init(); | 731 | vmi_init(); |
717 | 732 | ||
@@ -794,11 +809,6 @@ void __init setup_arch(char **cmdline_p) | |||
794 | #endif | 809 | #endif |
795 | #endif | 810 | #endif |
796 | 811 | ||
797 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | ||
798 | *cmdline_p = command_line; | ||
799 | |||
800 | parse_early_param(); | ||
801 | |||
802 | #ifdef CONFIG_X86_64 | 812 | #ifdef CONFIG_X86_64 |
803 | check_efer(); | 813 | check_efer(); |
804 | #endif | 814 | #endif |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9d8319183aae..a25eeec00080 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1118,9 +1118,22 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1118 | 1118 | ||
1119 | if (is_uv_system()) | 1119 | if (is_uv_system()) |
1120 | uv_system_init(); | 1120 | uv_system_init(); |
1121 | |||
1122 | set_mtrr_aps_delayed_init(); | ||
1121 | out: | 1123 | out: |
1122 | preempt_enable(); | 1124 | preempt_enable(); |
1123 | } | 1125 | } |
1126 | |||
1127 | void arch_enable_nonboot_cpus_begin(void) | ||
1128 | { | ||
1129 | set_mtrr_aps_delayed_init(); | ||
1130 | } | ||
1131 | |||
1132 | void arch_enable_nonboot_cpus_end(void) | ||
1133 | { | ||
1134 | mtrr_aps_init(); | ||
1135 | } | ||
1136 | |||
1124 | /* | 1137 | /* |
1125 | * Early setup to make printk work. | 1138 | * Early setup to make printk work. |
1126 | */ | 1139 | */ |
@@ -1142,6 +1155,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1142 | setup_ioapic_dest(); | 1155 | setup_ioapic_dest(); |
1143 | #endif | 1156 | #endif |
1144 | check_nmi_watchdog(); | 1157 | check_nmi_watchdog(); |
1158 | mtrr_aps_init(); | ||
1145 | } | 1159 | } |
1146 | 1160 | ||
1147 | static int __initdata setup_possible_cpus = -1; | 1161 | static int __initdata setup_possible_cpus = -1; |
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | 23 | ||
24 | int is_io_mapping_possible(resource_size_t base, unsigned long size) | 24 | static int is_io_mapping_possible(resource_size_t base, unsigned long size) |
25 | { | 25 | { |
26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) | 26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) |
27 | /* There is no way to map greater than 1 << 32 address without PAE */ | 27 | /* There is no way to map greater than 1 << 32 address without PAE */ |
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) | |||
30 | #endif | 30 | #endif |
31 | return 1; | 31 | return 1; |
32 | } | 32 | } |
33 | EXPORT_SYMBOL_GPL(is_io_mapping_possible); | 33 | |
34 | int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) | ||
35 | { | ||
36 | unsigned long flag = _PAGE_CACHE_WC; | ||
37 | int ret; | ||
38 | |||
39 | if (!is_io_mapping_possible(base, size)) | ||
40 | return -EINVAL; | ||
41 | |||
42 | ret = io_reserve_memtype(base, base + size, &flag); | ||
43 | if (ret) | ||
44 | return ret; | ||
45 | |||
46 | *prot = __pgprot(__PAGE_KERNEL | flag); | ||
47 | return 0; | ||
48 | } | ||
49 | EXPORT_SYMBOL_GPL(iomap_create_wc); | ||
50 | |||
51 | void | ||
52 | iomap_free(resource_size_t base, unsigned long size) | ||
53 | { | ||
54 | io_free_memtype(base, base + size); | ||
55 | } | ||
56 | EXPORT_SYMBOL_GPL(iomap_free); | ||
34 | 57 | ||
35 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | 58 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) |
36 | { | 59 | { |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 04e1ad60c63a..334e63ca7b2b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, | 158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, |
159 | prot_val, &new_prot_val); | 159 | prot_val, &new_prot_val); |
160 | if (retval) { | 160 | if (retval) { |
161 | pr_debug("Warning: reserve_memtype returned %d\n", retval); | 161 | printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); |
162 | return NULL; | 162 | return NULL; |
163 | } | 163 | } |
164 | 164 | ||
165 | if (prot_val != new_prot_val) { | 165 | if (prot_val != new_prot_val) { |
166 | /* | 166 | if (!is_new_memtype_allowed(phys_addr, size, |
167 | * Do not fallback to certain memory types with certain | 167 | prot_val, new_prot_val)) { |
168 | * requested type: | 168 | printk(KERN_ERR |
169 | * - request is uc-, return cannot be write-back | ||
170 | * - request is uc-, return cannot be write-combine | ||
171 | * - request is write-combine, return cannot be write-back | ||
172 | */ | ||
173 | if ((prot_val == _PAGE_CACHE_UC_MINUS && | ||
174 | (new_prot_val == _PAGE_CACHE_WB || | ||
175 | new_prot_val == _PAGE_CACHE_WC)) || | ||
176 | (prot_val == _PAGE_CACHE_WC && | ||
177 | new_prot_val == _PAGE_CACHE_WB)) { | ||
178 | pr_debug( | ||
179 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", | 169 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", |
180 | (unsigned long long)phys_addr, | 170 | (unsigned long long)phys_addr, |
181 | (unsigned long long)(phys_addr + size), | 171 | (unsigned long long)(phys_addr + size), |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1962db..e245775ec856 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -822,6 +822,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
822 | { | 822 | { |
823 | struct cpa_data cpa; | 823 | struct cpa_data cpa; |
824 | int ret, cache, checkalias; | 824 | int ret, cache, checkalias; |
825 | unsigned long baddr = 0; | ||
825 | 826 | ||
826 | /* | 827 | /* |
827 | * Check, if we are requested to change a not supported | 828 | * Check, if we are requested to change a not supported |
@@ -853,6 +854,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
853 | */ | 854 | */ |
854 | WARN_ON_ONCE(1); | 855 | WARN_ON_ONCE(1); |
855 | } | 856 | } |
857 | /* | ||
858 | * Save address for cache flush. *addr is modified in the call | ||
859 | * to __change_page_attr_set_clr() below. | ||
860 | */ | ||
861 | baddr = *addr; | ||
856 | } | 862 | } |
857 | 863 | ||
858 | /* Must avoid aliasing mappings in the highmem code */ | 864 | /* Must avoid aliasing mappings in the highmem code */ |
@@ -900,7 +906,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
900 | cpa_flush_array(addr, numpages, cache, | 906 | cpa_flush_array(addr, numpages, cache, |
901 | cpa.flags, pages); | 907 | cpa.flags, pages); |
902 | } else | 908 | } else |
903 | cpa_flush_range(*addr, numpages, cache); | 909 | cpa_flush_range(baddr, numpages, cache); |
904 | } else | 910 | } else |
905 | cpa_flush_all(cache); | 911 | cpa_flush_all(cache); |
906 | 912 | ||
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index b2f7d3e59b86..d7ebc3a10f2f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/rbtree.h> | ||
18 | 19 | ||
19 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
@@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags) | |||
148 | * areas). All the aliases have the same cache attributes of course. | 149 | * areas). All the aliases have the same cache attributes of course. |
149 | * Zero attributes are represented as holes. | 150 | * Zero attributes are represented as holes. |
150 | * | 151 | * |
151 | * Currently the data structure is a list because the number of mappings | 152 | * The data structure is a list that is also organized as an rbtree |
152 | * are expected to be relatively small. If this should be a problem | 153 | * sorted on the start address of memtype range. |
153 | * it could be changed to a rbtree or similar. | ||
154 | * | 154 | * |
155 | * memtype_lock protects the whole list. | 155 | * memtype_lock protects both the linear list and rbtree. |
156 | */ | 156 | */ |
157 | 157 | ||
158 | struct memtype { | 158 | struct memtype { |
@@ -160,11 +160,53 @@ struct memtype { | |||
160 | u64 end; | 160 | u64 end; |
161 | unsigned long type; | 161 | unsigned long type; |
162 | struct list_head nd; | 162 | struct list_head nd; |
163 | struct rb_node rb; | ||
163 | }; | 164 | }; |
164 | 165 | ||
166 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
165 | static LIST_HEAD(memtype_list); | 167 | static LIST_HEAD(memtype_list); |
166 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | 168 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ |
167 | 169 | ||
170 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
171 | { | ||
172 | struct rb_node *node = root->rb_node; | ||
173 | struct memtype *last_lower = NULL; | ||
174 | |||
175 | while (node) { | ||
176 | struct memtype *data = container_of(node, struct memtype, rb); | ||
177 | |||
178 | if (data->start < start) { | ||
179 | last_lower = data; | ||
180 | node = node->rb_right; | ||
181 | } else if (data->start > start) { | ||
182 | node = node->rb_left; | ||
183 | } else | ||
184 | return data; | ||
185 | } | ||
186 | |||
187 | /* Will return NULL if there is no entry with its start <= start */ | ||
188 | return last_lower; | ||
189 | } | ||
190 | |||
191 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
192 | { | ||
193 | struct rb_node **new = &(root->rb_node); | ||
194 | struct rb_node *parent = NULL; | ||
195 | |||
196 | while (*new) { | ||
197 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
198 | |||
199 | parent = *new; | ||
200 | if (data->start <= this->start) | ||
201 | new = &((*new)->rb_left); | ||
202 | else if (data->start > this->start) | ||
203 | new = &((*new)->rb_right); | ||
204 | } | ||
205 | |||
206 | rb_link_node(&data->rb, parent, new); | ||
207 | rb_insert_color(&data->rb, root); | ||
208 | } | ||
209 | |||
168 | /* | 210 | /* |
169 | * Does intersection of PAT memory type and MTRR memory type and returns | 211 | * Does intersection of PAT memory type and MTRR memory type and returns |
170 | * the resulting memory type as PAT understands it. | 212 | * the resulting memory type as PAT understands it. |
@@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | |||
218 | return -EBUSY; | 260 | return -EBUSY; |
219 | } | 261 | } |
220 | 262 | ||
221 | static struct memtype *cached_entry; | ||
222 | static u64 cached_start; | ||
223 | |||
224 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 263 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
225 | { | 264 | { |
226 | int ram_page = 0, not_rampage = 0; | 265 | int ram_page = 0, not_rampage = 0; |
@@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
249 | } | 288 | } |
250 | 289 | ||
251 | /* | 290 | /* |
252 | * For RAM pages, mark the pages as non WB memory type using | 291 | * For RAM pages, we use page flags to mark the pages with appropriate type. |
253 | * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or | 292 | * Here we do two pass: |
254 | * set_memory_wc() on a RAM page at a time before marking it as WB again. | 293 | * - Find the memtype of all the pages in the range, look for any conflicts |
255 | * This is ok, because only one driver will be owning the page and | 294 | * - In case of no conflicts, set the new memtype for pages in the range |
256 | * doing set_memory_*() calls. | ||
257 | * | 295 | * |
258 | * For now, we use PageNonWB to track that the RAM page is being mapped | 296 | * Caller must hold memtype_lock for atomicity. |
259 | * as non WB. In future, we will have to use one more flag | ||
260 | * (or some other mechanism in page_struct) to distinguish between | ||
261 | * UC and WC mapping. | ||
262 | */ | 297 | */ |
263 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 298 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
264 | unsigned long *new_type) | 299 | unsigned long *new_type) |
265 | { | 300 | { |
266 | struct page *page; | 301 | struct page *page; |
267 | u64 pfn, end_pfn; | 302 | u64 pfn; |
303 | |||
304 | if (req_type == _PAGE_CACHE_UC) { | ||
305 | /* We do not support strong UC */ | ||
306 | WARN_ON_ONCE(1); | ||
307 | req_type = _PAGE_CACHE_UC_MINUS; | ||
308 | } | ||
268 | 309 | ||
269 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 310 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
270 | page = pfn_to_page(pfn); | 311 | unsigned long type; |
271 | if (page_mapped(page) || PageNonWB(page)) | ||
272 | goto out; | ||
273 | 312 | ||
274 | SetPageNonWB(page); | 313 | page = pfn_to_page(pfn); |
314 | type = get_page_memtype(page); | ||
315 | if (type != -1) { | ||
316 | printk(KERN_INFO "reserve_ram_pages_type failed " | ||
317 | "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", | ||
318 | start, end, type, req_type); | ||
319 | if (new_type) | ||
320 | *new_type = type; | ||
321 | |||
322 | return -EBUSY; | ||
323 | } | ||
275 | } | 324 | } |
276 | return 0; | ||
277 | 325 | ||
278 | out: | 326 | if (new_type) |
279 | end_pfn = pfn; | 327 | *new_type = req_type; |
280 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | 328 | |
329 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | ||
281 | page = pfn_to_page(pfn); | 330 | page = pfn_to_page(pfn); |
282 | ClearPageNonWB(page); | 331 | set_page_memtype(page, req_type); |
283 | } | 332 | } |
284 | 333 | return 0; | |
285 | return -EINVAL; | ||
286 | } | 334 | } |
287 | 335 | ||
288 | static int free_ram_pages_type(u64 start, u64 end) | 336 | static int free_ram_pages_type(u64 start, u64 end) |
289 | { | 337 | { |
290 | struct page *page; | 338 | struct page *page; |
291 | u64 pfn, end_pfn; | 339 | u64 pfn; |
292 | 340 | ||
293 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 341 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
294 | page = pfn_to_page(pfn); | 342 | page = pfn_to_page(pfn); |
295 | if (page_mapped(page) || !PageNonWB(page)) | 343 | set_page_memtype(page, -1); |
296 | goto out; | ||
297 | |||
298 | ClearPageNonWB(page); | ||
299 | } | 344 | } |
300 | return 0; | 345 | return 0; |
301 | |||
302 | out: | ||
303 | end_pfn = pfn; | ||
304 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | ||
305 | page = pfn_to_page(pfn); | ||
306 | SetPageNonWB(page); | ||
307 | } | ||
308 | return -EINVAL; | ||
309 | } | 346 | } |
310 | 347 | ||
311 | /* | 348 | /* |
@@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
339 | if (new_type) { | 376 | if (new_type) { |
340 | if (req_type == -1) | 377 | if (req_type == -1) |
341 | *new_type = _PAGE_CACHE_WB; | 378 | *new_type = _PAGE_CACHE_WB; |
379 | else if (req_type == _PAGE_CACHE_WC) | ||
380 | *new_type = _PAGE_CACHE_UC_MINUS; | ||
342 | else | 381 | else |
343 | *new_type = req_type & _PAGE_CACHE_MASK; | 382 | *new_type = req_type & _PAGE_CACHE_MASK; |
344 | } | 383 | } |
@@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
364 | *new_type = actual_type; | 403 | *new_type = actual_type; |
365 | 404 | ||
366 | is_range_ram = pat_pagerange_is_ram(start, end); | 405 | is_range_ram = pat_pagerange_is_ram(start, end); |
367 | if (is_range_ram == 1) | 406 | if (is_range_ram == 1) { |
368 | return reserve_ram_pages_type(start, end, req_type, | 407 | |
369 | new_type); | 408 | spin_lock(&memtype_lock); |
370 | else if (is_range_ram < 0) | 409 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
410 | spin_unlock(&memtype_lock); | ||
411 | |||
412 | return err; | ||
413 | } else if (is_range_ram < 0) { | ||
371 | return -EINVAL; | 414 | return -EINVAL; |
415 | } | ||
372 | 416 | ||
373 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 417 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); |
374 | if (!new) | 418 | if (!new) |
@@ -380,17 +424,19 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
380 | 424 | ||
381 | spin_lock(&memtype_lock); | 425 | spin_lock(&memtype_lock); |
382 | 426 | ||
383 | if (cached_entry && start >= cached_start) | 427 | entry = memtype_rb_search(&memtype_rbroot, new->start); |
384 | entry = cached_entry; | 428 | if (likely(entry != NULL)) { |
385 | else | 429 | /* To work correctly with list_for_each_entry_continue */ |
430 | entry = list_entry(entry->nd.prev, struct memtype, nd); | ||
431 | } else { | ||
386 | entry = list_entry(&memtype_list, struct memtype, nd); | 432 | entry = list_entry(&memtype_list, struct memtype, nd); |
433 | } | ||
387 | 434 | ||
388 | /* Search for existing mapping that overlaps the current range */ | 435 | /* Search for existing mapping that overlaps the current range */ |
389 | where = NULL; | 436 | where = NULL; |
390 | list_for_each_entry_continue(entry, &memtype_list, nd) { | 437 | list_for_each_entry_continue(entry, &memtype_list, nd) { |
391 | if (end <= entry->start) { | 438 | if (end <= entry->start) { |
392 | where = entry->nd.prev; | 439 | where = entry->nd.prev; |
393 | cached_entry = list_entry(where, struct memtype, nd); | ||
394 | break; | 440 | break; |
395 | } else if (start <= entry->start) { /* end > entry->start */ | 441 | } else if (start <= entry->start) { /* end > entry->start */ |
396 | err = chk_conflict(new, entry, new_type); | 442 | err = chk_conflict(new, entry, new_type); |
@@ -398,8 +444,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
398 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 444 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
399 | entry->start, entry->end); | 445 | entry->start, entry->end); |
400 | where = entry->nd.prev; | 446 | where = entry->nd.prev; |
401 | cached_entry = list_entry(where, | ||
402 | struct memtype, nd); | ||
403 | } | 447 | } |
404 | break; | 448 | break; |
405 | } else if (start < entry->end) { /* start > entry->start */ | 449 | } else if (start < entry->end) { /* start > entry->start */ |
@@ -407,8 +451,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
407 | if (!err) { | 451 | if (!err) { |
408 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 452 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
409 | entry->start, entry->end); | 453 | entry->start, entry->end); |
410 | cached_entry = list_entry(entry->nd.prev, | ||
411 | struct memtype, nd); | ||
412 | 454 | ||
413 | /* | 455 | /* |
414 | * Move to right position in the linked | 456 | * Move to right position in the linked |
@@ -436,13 +478,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
436 | return err; | 478 | return err; |
437 | } | 479 | } |
438 | 480 | ||
439 | cached_start = start; | ||
440 | |||
441 | if (where) | 481 | if (where) |
442 | list_add(&new->nd, where); | 482 | list_add(&new->nd, where); |
443 | else | 483 | else |
444 | list_add_tail(&new->nd, &memtype_list); | 484 | list_add_tail(&new->nd, &memtype_list); |
445 | 485 | ||
486 | memtype_rb_insert(&memtype_rbroot, new); | ||
487 | |||
446 | spin_unlock(&memtype_lock); | 488 | spin_unlock(&memtype_lock); |
447 | 489 | ||
448 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 490 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
@@ -454,7 +496,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
454 | 496 | ||
455 | int free_memtype(u64 start, u64 end) | 497 | int free_memtype(u64 start, u64 end) |
456 | { | 498 | { |
457 | struct memtype *entry; | 499 | struct memtype *entry, *saved_entry; |
458 | int err = -EINVAL; | 500 | int err = -EINVAL; |
459 | int is_range_ram; | 501 | int is_range_ram; |
460 | 502 | ||
@@ -466,23 +508,58 @@ int free_memtype(u64 start, u64 end) | |||
466 | return 0; | 508 | return 0; |
467 | 509 | ||
468 | is_range_ram = pat_pagerange_is_ram(start, end); | 510 | is_range_ram = pat_pagerange_is_ram(start, end); |
469 | if (is_range_ram == 1) | 511 | if (is_range_ram == 1) { |
470 | return free_ram_pages_type(start, end); | 512 | |
471 | else if (is_range_ram < 0) | 513 | spin_lock(&memtype_lock); |
514 | err = free_ram_pages_type(start, end); | ||
515 | spin_unlock(&memtype_lock); | ||
516 | |||
517 | return err; | ||
518 | } else if (is_range_ram < 0) { | ||
472 | return -EINVAL; | 519 | return -EINVAL; |
520 | } | ||
473 | 521 | ||
474 | spin_lock(&memtype_lock); | 522 | spin_lock(&memtype_lock); |
523 | |||
524 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
525 | if (unlikely(entry == NULL)) | ||
526 | goto unlock_ret; | ||
527 | |||
528 | /* | ||
529 | * Saved entry points to an entry with start same or less than what | ||
530 | * we searched for. Now go through the list in both directions to look | ||
531 | * for the entry that matches with both start and end, with list stored | ||
532 | * in sorted start address | ||
533 | */ | ||
534 | saved_entry = entry; | ||
475 | list_for_each_entry(entry, &memtype_list, nd) { | 535 | list_for_each_entry(entry, &memtype_list, nd) { |
476 | if (entry->start == start && entry->end == end) { | 536 | if (entry->start == start && entry->end == end) { |
477 | if (cached_entry == entry || cached_start == start) | 537 | rb_erase(&entry->rb, &memtype_rbroot); |
478 | cached_entry = NULL; | 538 | list_del(&entry->nd); |
539 | kfree(entry); | ||
540 | err = 0; | ||
541 | break; | ||
542 | } else if (entry->start > start) { | ||
543 | break; | ||
544 | } | ||
545 | } | ||
546 | |||
547 | if (!err) | ||
548 | goto unlock_ret; | ||
479 | 549 | ||
550 | entry = saved_entry; | ||
551 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
552 | if (entry->start == start && entry->end == end) { | ||
553 | rb_erase(&entry->rb, &memtype_rbroot); | ||
480 | list_del(&entry->nd); | 554 | list_del(&entry->nd); |
481 | kfree(entry); | 555 | kfree(entry); |
482 | err = 0; | 556 | err = 0; |
483 | break; | 557 | break; |
558 | } else if (entry->start < start) { | ||
559 | break; | ||
484 | } | 560 | } |
485 | } | 561 | } |
562 | unlock_ret: | ||
486 | spin_unlock(&memtype_lock); | 563 | spin_unlock(&memtype_lock); |
487 | 564 | ||
488 | if (err) { | 565 | if (err) { |
@@ -496,6 +573,101 @@ int free_memtype(u64 start, u64 end) | |||
496 | } | 573 | } |
497 | 574 | ||
498 | 575 | ||
576 | /** | ||
577 | * lookup_memtype - Looksup the memory type for a physical address | ||
578 | * @paddr: physical address of which memory type needs to be looked up | ||
579 | * | ||
580 | * Only to be called when PAT is enabled | ||
581 | * | ||
582 | * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or | ||
583 | * _PAGE_CACHE_UC | ||
584 | */ | ||
585 | static unsigned long lookup_memtype(u64 paddr) | ||
586 | { | ||
587 | int rettype = _PAGE_CACHE_WB; | ||
588 | struct memtype *entry; | ||
589 | |||
590 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | ||
591 | return rettype; | ||
592 | |||
593 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | ||
594 | struct page *page; | ||
595 | spin_lock(&memtype_lock); | ||
596 | page = pfn_to_page(paddr >> PAGE_SHIFT); | ||
597 | rettype = get_page_memtype(page); | ||
598 | spin_unlock(&memtype_lock); | ||
599 | /* | ||
600 | * -1 from get_page_memtype() implies RAM page is in its | ||
601 | * default state and not reserved, and hence of type WB | ||
602 | */ | ||
603 | if (rettype == -1) | ||
604 | rettype = _PAGE_CACHE_WB; | ||
605 | |||
606 | return rettype; | ||
607 | } | ||
608 | |||
609 | spin_lock(&memtype_lock); | ||
610 | |||
611 | entry = memtype_rb_search(&memtype_rbroot, paddr); | ||
612 | if (entry != NULL) | ||
613 | rettype = entry->type; | ||
614 | else | ||
615 | rettype = _PAGE_CACHE_UC_MINUS; | ||
616 | |||
617 | spin_unlock(&memtype_lock); | ||
618 | return rettype; | ||
619 | } | ||
620 | |||
621 | /** | ||
622 | * io_reserve_memtype - Request a memory type mapping for a region of memory | ||
623 | * @start: start (physical address) of the region | ||
624 | * @end: end (physical address) of the region | ||
625 | * @type: A pointer to memtype, with requested type. On success, requested | ||
626 | * or any other compatible type that was available for the region is returned | ||
627 | * | ||
628 | * On success, returns 0 | ||
629 | * On failure, returns non-zero | ||
630 | */ | ||
631 | int io_reserve_memtype(resource_size_t start, resource_size_t end, | ||
632 | unsigned long *type) | ||
633 | { | ||
634 | resource_size_t size = end - start; | ||
635 | unsigned long req_type = *type; | ||
636 | unsigned long new_type; | ||
637 | int ret; | ||
638 | |||
639 | WARN_ON_ONCE(iomem_map_sanity_check(start, size)); | ||
640 | |||
641 | ret = reserve_memtype(start, end, req_type, &new_type); | ||
642 | if (ret) | ||
643 | goto out_err; | ||
644 | |||
645 | if (!is_new_memtype_allowed(start, size, req_type, new_type)) | ||
646 | goto out_free; | ||
647 | |||
648 | if (kernel_map_sync_memtype(start, size, new_type) < 0) | ||
649 | goto out_free; | ||
650 | |||
651 | *type = new_type; | ||
652 | return 0; | ||
653 | |||
654 | out_free: | ||
655 | free_memtype(start, end); | ||
656 | ret = -EBUSY; | ||
657 | out_err: | ||
658 | return ret; | ||
659 | } | ||
660 | |||
661 | /** | ||
662 | * io_free_memtype - Release a memory type mapping for a region of memory | ||
663 | * @start: start (physical address) of the region | ||
664 | * @end: end (physical address) of the region | ||
665 | */ | ||
666 | void io_free_memtype(resource_size_t start, resource_size_t end) | ||
667 | { | ||
668 | free_memtype(start, end); | ||
669 | } | ||
670 | |||
499 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 671 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
500 | unsigned long size, pgprot_t vma_prot) | 672 | unsigned long size, pgprot_t vma_prot) |
501 | { | 673 | { |
@@ -577,7 +749,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) | |||
577 | { | 749 | { |
578 | unsigned long id_sz; | 750 | unsigned long id_sz; |
579 | 751 | ||
580 | if (!pat_enabled || base >= __pa(high_memory)) | 752 | if (base >= __pa(high_memory)) |
581 | return 0; | 753 | return 0; |
582 | 754 | ||
583 | id_sz = (__pa(high_memory) < base + size) ? | 755 | id_sz = (__pa(high_memory) < base + size) ? |
@@ -612,11 +784,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
612 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); | 784 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); |
613 | 785 | ||
614 | /* | 786 | /* |
615 | * reserve_pfn_range() doesn't support RAM pages. Maintain the current | 787 | * reserve_pfn_range() for RAM pages. We do not refcount to keep |
616 | * behavior with RAM pages by returning success. | 788 | * track of number of mappings of RAM pages. We can assert that |
789 | * the type requested matches the type of first page in the range. | ||
617 | */ | 790 | */ |
618 | if (is_ram != 0) | 791 | if (is_ram) { |
792 | if (!pat_enabled) | ||
793 | return 0; | ||
794 | |||
795 | flags = lookup_memtype(paddr); | ||
796 | if (want_flags != flags) { | ||
797 | printk(KERN_WARNING | ||
798 | "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", | ||
799 | current->comm, current->pid, | ||
800 | cattr_name(want_flags), | ||
801 | (unsigned long long)paddr, | ||
802 | (unsigned long long)(paddr + size), | ||
803 | cattr_name(flags)); | ||
804 | *vma_prot = __pgprot((pgprot_val(*vma_prot) & | ||
805 | (~_PAGE_CACHE_MASK)) | | ||
806 | flags); | ||
807 | } | ||
619 | return 0; | 808 | return 0; |
809 | } | ||
620 | 810 | ||
621 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | 811 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); |
622 | if (ret) | 812 | if (ret) |
@@ -678,14 +868,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
678 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 868 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
679 | pgprot_t pgprot; | 869 | pgprot_t pgprot; |
680 | 870 | ||
681 | if (!pat_enabled) | ||
682 | return 0; | ||
683 | |||
684 | /* | ||
685 | * For now, only handle remap_pfn_range() vmas where | ||
686 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
687 | * vm_insert_pfn() is TBD. | ||
688 | */ | ||
689 | if (is_linear_pfn_mapping(vma)) { | 871 | if (is_linear_pfn_mapping(vma)) { |
690 | /* | 872 | /* |
691 | * reserve the whole chunk covered by vma. We need the | 873 | * reserve the whole chunk covered by vma. We need the |
@@ -713,23 +895,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
713 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, | 895 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, |
714 | unsigned long pfn, unsigned long size) | 896 | unsigned long pfn, unsigned long size) |
715 | { | 897 | { |
898 | unsigned long flags; | ||
716 | resource_size_t paddr; | 899 | resource_size_t paddr; |
717 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 900 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
718 | 901 | ||
719 | if (!pat_enabled) | ||
720 | return 0; | ||
721 | |||
722 | /* | ||
723 | * For now, only handle remap_pfn_range() vmas where | ||
724 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
725 | * vm_insert_pfn() is TBD. | ||
726 | */ | ||
727 | if (is_linear_pfn_mapping(vma)) { | 902 | if (is_linear_pfn_mapping(vma)) { |
728 | /* reserve the whole chunk starting from vm_pgoff */ | 903 | /* reserve the whole chunk starting from vm_pgoff */ |
729 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 904 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |
730 | return reserve_pfn_range(paddr, vma_size, prot, 0); | 905 | return reserve_pfn_range(paddr, vma_size, prot, 0); |
731 | } | 906 | } |
732 | 907 | ||
908 | if (!pat_enabled) | ||
909 | return 0; | ||
910 | |||
911 | /* for vm_insert_pfn and friends, we set prot based on lookup */ | ||
912 | flags = lookup_memtype(pfn << PAGE_SHIFT); | ||
913 | *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | | ||
914 | flags); | ||
915 | |||
733 | return 0; | 916 | return 0; |
734 | } | 917 | } |
735 | 918 | ||
@@ -744,14 +927,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, | |||
744 | resource_size_t paddr; | 927 | resource_size_t paddr; |
745 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 928 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
746 | 929 | ||
747 | if (!pat_enabled) | ||
748 | return; | ||
749 | |||
750 | /* | ||
751 | * For now, only handle remap_pfn_range() vmas where | ||
752 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
753 | * vm_insert_pfn() is TBD. | ||
754 | */ | ||
755 | if (is_linear_pfn_mapping(vma)) { | 930 | if (is_linear_pfn_mapping(vma)) { |
756 | /* free the whole chunk starting from vm_pgoff */ | 931 | /* free the whole chunk starting from vm_pgoff */ |
757 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 932 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index b3d20b9cac63..417c9f5b4afa 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -242,7 +242,7 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
242 | fix_processor_context(); | 242 | fix_processor_context(); |
243 | 243 | ||
244 | do_fpu_end(); | 244 | do_fpu_end(); |
245 | mtrr_ap_init(); | 245 | mtrr_bp_restore(); |
246 | 246 | ||
247 | #ifdef CONFIG_X86_OLD_MCE | 247 | #ifdef CONFIG_X86_OLD_MCE |
248 | mcheck_init(&boot_cpu_data); | 248 | mcheck_init(&boot_cpu_data); |
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0adb0f91568c..97eb928b4924 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h | |||
@@ -49,23 +49,30 @@ static inline struct io_mapping * | |||
49 | io_mapping_create_wc(resource_size_t base, unsigned long size) | 49 | io_mapping_create_wc(resource_size_t base, unsigned long size) |
50 | { | 50 | { |
51 | struct io_mapping *iomap; | 51 | struct io_mapping *iomap; |
52 | 52 | pgprot_t prot; | |
53 | if (!is_io_mapping_possible(base, size)) | ||
54 | return NULL; | ||
55 | 53 | ||
56 | iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); | 54 | iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); |
57 | if (!iomap) | 55 | if (!iomap) |
58 | return NULL; | 56 | goto out_err; |
57 | |||
58 | if (iomap_create_wc(base, size, &prot)) | ||
59 | goto out_free; | ||
59 | 60 | ||
60 | iomap->base = base; | 61 | iomap->base = base; |
61 | iomap->size = size; | 62 | iomap->size = size; |
62 | iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); | 63 | iomap->prot = prot; |
63 | return iomap; | 64 | return iomap; |
65 | |||
66 | out_free: | ||
67 | kfree(iomap); | ||
68 | out_err: | ||
69 | return NULL; | ||
64 | } | 70 | } |
65 | 71 | ||
66 | static inline void | 72 | static inline void |
67 | io_mapping_free(struct io_mapping *mapping) | 73 | io_mapping_free(struct io_mapping *mapping) |
68 | { | 74 | { |
75 | iomap_free(mapping->base, mapping->size); | ||
69 | kfree(mapping); | 76 | kfree(mapping); |
70 | } | 77 | } |
71 | 78 | ||
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e2e5ce543595..2b87acfc5f87 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -99,7 +99,7 @@ enum pageflags { | |||
99 | #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT | 99 | #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT |
100 | PG_mlocked, /* Page is vma mlocked */ | 100 | PG_mlocked, /* Page is vma mlocked */ |
101 | #endif | 101 | #endif |
102 | #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR | 102 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED |
103 | PG_uncached, /* Page has been mapped as uncached */ | 103 | PG_uncached, /* Page has been mapped as uncached */ |
104 | #endif | 104 | #endif |
105 | __NR_PAGEFLAGS, | 105 | __NR_PAGEFLAGS, |
@@ -257,7 +257,7 @@ PAGEFLAG_FALSE(Mlocked) | |||
257 | SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) | 257 | SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) |
258 | #endif | 258 | #endif |
259 | 259 | ||
260 | #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR | 260 | #ifdef CONFIG_ARCH_USES_PG_UNCACHED |
261 | PAGEFLAG(Uncached, uncached) | 261 | PAGEFLAG(Uncached, uncached) |
262 | #else | 262 | #else |
263 | PAGEFLAG_FALSE(Uncached) | 263 | PAGEFLAG_FALSE(Uncached) |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 67a60076dd7e..6ba0f1ecb212 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -414,6 +414,14 @@ int disable_nonboot_cpus(void) | |||
414 | return error; | 414 | return error; |
415 | } | 415 | } |
416 | 416 | ||
417 | void __weak arch_enable_nonboot_cpus_begin(void) | ||
418 | { | ||
419 | } | ||
420 | |||
421 | void __weak arch_enable_nonboot_cpus_end(void) | ||
422 | { | ||
423 | } | ||
424 | |||
417 | void __ref enable_nonboot_cpus(void) | 425 | void __ref enable_nonboot_cpus(void) |
418 | { | 426 | { |
419 | int cpu, error; | 427 | int cpu, error; |
@@ -425,6 +433,9 @@ void __ref enable_nonboot_cpus(void) | |||
425 | goto out; | 433 | goto out; |
426 | 434 | ||
427 | printk("Enabling non-boot CPUs ...\n"); | 435 | printk("Enabling non-boot CPUs ...\n"); |
436 | |||
437 | arch_enable_nonboot_cpus_begin(); | ||
438 | |||
428 | for_each_cpu(cpu, frozen_cpus) { | 439 | for_each_cpu(cpu, frozen_cpus) { |
429 | error = _cpu_up(cpu, 1); | 440 | error = _cpu_up(cpu, 1); |
430 | if (!error) { | 441 | if (!error) { |
@@ -433,6 +444,9 @@ void __ref enable_nonboot_cpus(void) | |||
433 | } | 444 | } |
434 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); | 445 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); |
435 | } | 446 | } |
447 | |||
448 | arch_enable_nonboot_cpus_end(); | ||
449 | |||
436 | cpumask_clear(frozen_cpus); | 450 | cpumask_clear(frozen_cpus); |
437 | out: | 451 | out: |
438 | cpu_maps_update_done(); | 452 | cpu_maps_update_done(); |
diff --git a/kernel/smp.c b/kernel/smp.c index 94188b8ecc33..8e218500ab14 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -177,6 +177,11 @@ void generic_smp_call_function_interrupt(void) | |||
177 | int cpu = get_cpu(); | 177 | int cpu = get_cpu(); |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * Shouldn't receive this interrupt on a cpu that is not yet online. | ||
181 | */ | ||
182 | WARN_ON_ONCE(!cpu_online(cpu)); | ||
183 | |||
184 | /* | ||
180 | * Ensure entry is visible on call_function_queue after we have | 185 | * Ensure entry is visible on call_function_queue after we have |
181 | * entered the IPI. See comment in smp_call_function_many. | 186 | * entered the IPI. See comment in smp_call_function_many. |
182 | * If we don't have this, then we may miss an entry on the list | 187 | * If we don't have this, then we may miss an entry on the list |
@@ -230,6 +235,11 @@ void generic_smp_call_function_single_interrupt(void) | |||
230 | unsigned int data_flags; | 235 | unsigned int data_flags; |
231 | LIST_HEAD(list); | 236 | LIST_HEAD(list); |
232 | 237 | ||
238 | /* | ||
239 | * Shouldn't receive this interrupt on a cpu that is not yet online. | ||
240 | */ | ||
241 | WARN_ON_ONCE(!cpu_online(smp_processor_id())); | ||
242 | |||
233 | spin_lock(&q->lock); | 243 | spin_lock(&q->lock); |
234 | list_replace_init(&q->list, &list); | 244 | list_replace_init(&q->list, &list); |
235 | spin_unlock(&q->lock); | 245 | spin_unlock(&q->lock); |
@@ -285,8 +295,14 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
285 | */ | 295 | */ |
286 | this_cpu = get_cpu(); | 296 | this_cpu = get_cpu(); |
287 | 297 | ||
288 | /* Can deadlock when called with interrupts disabled */ | 298 | /* |
289 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); | 299 | * Can deadlock when called with interrupts disabled. |
300 | * We allow cpu's that are not yet online though, as no one else can | ||
301 | * send smp call function interrupt to this cpu and as such deadlocks | ||
302 | * can't happen. | ||
303 | */ | ||
304 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | ||
305 | && !oops_in_progress); | ||
290 | 306 | ||
291 | if (cpu == this_cpu) { | 307 | if (cpu == this_cpu) { |
292 | local_irq_save(flags); | 308 | local_irq_save(flags); |
@@ -329,8 +345,14 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
329 | { | 345 | { |
330 | csd_lock(data); | 346 | csd_lock(data); |
331 | 347 | ||
332 | /* Can deadlock when called with interrupts disabled */ | 348 | /* |
333 | WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress); | 349 | * Can deadlock when called with interrupts disabled. |
350 | * We allow cpu's that are not yet online though, as no one else can | ||
351 | * send smp call function interrupt to this cpu and as such deadlocks | ||
352 | * can't happen. | ||
353 | */ | ||
354 | WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() | ||
355 | && !oops_in_progress); | ||
334 | 356 | ||
335 | generic_exec_single(cpu, data, wait); | 357 | generic_exec_single(cpu, data, wait); |
336 | } | 358 | } |
@@ -365,8 +387,14 @@ void smp_call_function_many(const struct cpumask *mask, | |||
365 | unsigned long flags; | 387 | unsigned long flags; |
366 | int cpu, next_cpu, this_cpu = smp_processor_id(); | 388 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
367 | 389 | ||
368 | /* Can deadlock when called with interrupts disabled */ | 390 | /* |
369 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); | 391 | * Can deadlock when called with interrupts disabled. |
392 | * We allow cpu's that are not yet online though, as no one else can | ||
393 | * send smp call function interrupt to this cpu and as such deadlocks | ||
394 | * can't happen. | ||
395 | */ | ||
396 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | ||
397 | && !oops_in_progress); | ||
370 | 398 | ||
371 | /* So, what's a CPU they want? Ignoring this one. */ | 399 | /* So, what's a CPU they want? Ignoring this one. */ |
372 | cpu = cpumask_first_and(mask, cpu_online_mask); | 400 | cpu = cpumask_first_and(mask, cpu_online_mask); |
diff --git a/mm/Kconfig b/mm/Kconfig index fe5f674d7a7d..3aa519f52e18 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -153,7 +153,7 @@ config MEMORY_HOTREMOVE | |||
153 | # | 153 | # |
154 | config PAGEFLAGS_EXTENDED | 154 | config PAGEFLAGS_EXTENDED |
155 | def_bool y | 155 | def_bool y |
156 | depends on 64BIT || SPARSEMEM_VMEMMAP || !NUMA || !SPARSEMEM | 156 | depends on 64BIT || SPARSEMEM_VMEMMAP || !SPARSEMEM |
157 | 157 | ||
158 | # Heavily threaded applications may benefit from splitting the mm-wide | 158 | # Heavily threaded applications may benefit from splitting the mm-wide |
159 | # page_table_lock, so that faults on different parts of the user address | 159 | # page_table_lock, so that faults on different parts of the user address |