diff options
Diffstat (limited to 'arch/x86')
50 files changed, 1233 insertions, 1015 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0dc9d0144a27..5e28e2be3a41 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -138,6 +138,7 @@ config X86 | |||
138 | select HAVE_ACPI_APEI_NMI if ACPI | 138 | select HAVE_ACPI_APEI_NMI if ACPI |
139 | select ACPI_LEGACY_TABLES_LOOKUP if ACPI | 139 | select ACPI_LEGACY_TABLES_LOOKUP if ACPI |
140 | select X86_FEATURE_NAMES if PROC_FS | 140 | select X86_FEATURE_NAMES if PROC_FS |
141 | select SRCU | ||
141 | 142 | ||
142 | config INSTRUCTION_DECODER | 143 | config INSTRUCTION_DECODER |
143 | def_bool y | 144 | def_bool y |
@@ -855,6 +856,10 @@ config SCHED_MC | |||
855 | 856 | ||
856 | source "kernel/Kconfig.preempt" | 857 | source "kernel/Kconfig.preempt" |
857 | 858 | ||
859 | config UP_LATE_INIT | ||
860 | def_bool y | ||
861 | depends on !SMP && X86_LOCAL_APIC | ||
862 | |||
858 | config X86_UP_APIC | 863 | config X86_UP_APIC |
859 | bool "Local APIC support on uniprocessors" | 864 | bool "Local APIC support on uniprocessors" |
860 | depends on X86_32 && !SMP && !X86_32_NON_STANDARD | 865 | depends on X86_32 && !SMP && !X86_32_NON_STANDARD |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ad754b4411f7..8bd44e8ee6e2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -49,6 +49,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone | |||
49 | 49 | ||
50 | vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ | 50 | vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ |
51 | $(objtree)/drivers/firmware/efi/libstub/lib.a | 51 | $(objtree)/drivers/firmware/efi/libstub/lib.a |
52 | vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o | ||
52 | 53 | ||
53 | $(obj)/vmlinux: $(vmlinux-objs-y) FORCE | 54 | $(obj)/vmlinux: $(vmlinux-objs-y) FORCE |
54 | $(call if_changed,ld) | 55 | $(call if_changed,ld) |
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S index 7ff3632806b1..99494dff2113 100644 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ b/arch/x86/boot/compressed/efi_stub_64.S | |||
@@ -3,28 +3,3 @@ | |||
3 | #include <asm/processor-flags.h> | 3 | #include <asm/processor-flags.h> |
4 | 4 | ||
5 | #include "../../platform/efi/efi_stub_64.S" | 5 | #include "../../platform/efi/efi_stub_64.S" |
6 | |||
7 | #ifdef CONFIG_EFI_MIXED | ||
8 | .code64 | ||
9 | .text | ||
10 | ENTRY(efi64_thunk) | ||
11 | push %rbp | ||
12 | push %rbx | ||
13 | |||
14 | subq $16, %rsp | ||
15 | leaq efi_exit32(%rip), %rax | ||
16 | movl %eax, 8(%rsp) | ||
17 | leaq efi_gdt64(%rip), %rax | ||
18 | movl %eax, 4(%rsp) | ||
19 | movl %eax, 2(%rax) /* Fixup the gdt base address */ | ||
20 | leaq efi32_boot_gdt(%rip), %rax | ||
21 | movl %eax, (%rsp) | ||
22 | |||
23 | call __efi64_thunk | ||
24 | |||
25 | addq $16, %rsp | ||
26 | pop %rbx | ||
27 | pop %rbp | ||
28 | ret | ||
29 | ENDPROC(efi64_thunk) | ||
30 | #endif /* CONFIG_EFI_MIXED */ | ||
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S new file mode 100644 index 000000000000..630384a4c14a --- /dev/null +++ b/arch/x86/boot/compressed/efi_thunk_64.S | |||
@@ -0,0 +1,196 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming | ||
3 | * | ||
4 | * Early support for invoking 32-bit EFI services from a 64-bit kernel. | ||
5 | * | ||
6 | * Because this thunking occurs before ExitBootServices() we have to | ||
7 | * restore the firmware's 32-bit GDT before we make EFI serivce calls, | ||
8 | * since the firmware's 32-bit IDT is still currently installed and it | ||
9 | * needs to be able to service interrupts. | ||
10 | * | ||
11 | * On the plus side, we don't have to worry about mangling 64-bit | ||
12 | * addresses into 32-bits because we're executing with an identify | ||
13 | * mapped pagetable and haven't transitioned to 64-bit virtual addresses | ||
14 | * yet. | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | #include <asm/msr.h> | ||
19 | #include <asm/page_types.h> | ||
20 | #include <asm/processor-flags.h> | ||
21 | #include <asm/segment.h> | ||
22 | |||
23 | .code64 | ||
24 | .text | ||
25 | ENTRY(efi64_thunk) | ||
26 | push %rbp | ||
27 | push %rbx | ||
28 | |||
29 | subq $8, %rsp | ||
30 | leaq efi_exit32(%rip), %rax | ||
31 | movl %eax, 4(%rsp) | ||
32 | leaq efi_gdt64(%rip), %rax | ||
33 | movl %eax, (%rsp) | ||
34 | movl %eax, 2(%rax) /* Fixup the gdt base address */ | ||
35 | |||
36 | movl %ds, %eax | ||
37 | push %rax | ||
38 | movl %es, %eax | ||
39 | push %rax | ||
40 | movl %ss, %eax | ||
41 | push %rax | ||
42 | |||
43 | /* | ||
44 | * Convert x86-64 ABI params to i386 ABI | ||
45 | */ | ||
46 | subq $32, %rsp | ||
47 | movl %esi, 0x0(%rsp) | ||
48 | movl %edx, 0x4(%rsp) | ||
49 | movl %ecx, 0x8(%rsp) | ||
50 | movq %r8, %rsi | ||
51 | movl %esi, 0xc(%rsp) | ||
52 | movq %r9, %rsi | ||
53 | movl %esi, 0x10(%rsp) | ||
54 | |||
55 | sgdt save_gdt(%rip) | ||
56 | |||
57 | leaq 1f(%rip), %rbx | ||
58 | movq %rbx, func_rt_ptr(%rip) | ||
59 | |||
60 | /* | ||
61 | * Switch to gdt with 32-bit segments. This is the firmware GDT | ||
62 | * that was installed when the kernel started executing. This | ||
63 | * pointer was saved at the EFI stub entry point in head_64.S. | ||
64 | */ | ||
65 | leaq efi32_boot_gdt(%rip), %rax | ||
66 | lgdt (%rax) | ||
67 | |||
68 | pushq $__KERNEL_CS | ||
69 | leaq efi_enter32(%rip), %rax | ||
70 | pushq %rax | ||
71 | lretq | ||
72 | |||
73 | 1: addq $32, %rsp | ||
74 | |||
75 | lgdt save_gdt(%rip) | ||
76 | |||
77 | pop %rbx | ||
78 | movl %ebx, %ss | ||
79 | pop %rbx | ||
80 | movl %ebx, %es | ||
81 | pop %rbx | ||
82 | movl %ebx, %ds | ||
83 | |||
84 | /* | ||
85 | * Convert 32-bit status code into 64-bit. | ||
86 | */ | ||
87 | test %rax, %rax | ||
88 | jz 1f | ||
89 | movl %eax, %ecx | ||
90 | andl $0x0fffffff, %ecx | ||
91 | andl $0xf0000000, %eax | ||
92 | shl $32, %rax | ||
93 | or %rcx, %rax | ||
94 | 1: | ||
95 | addq $8, %rsp | ||
96 | pop %rbx | ||
97 | pop %rbp | ||
98 | ret | ||
99 | ENDPROC(efi64_thunk) | ||
100 | |||
101 | ENTRY(efi_exit32) | ||
102 | movq func_rt_ptr(%rip), %rax | ||
103 | push %rax | ||
104 | mov %rdi, %rax | ||
105 | ret | ||
106 | ENDPROC(efi_exit32) | ||
107 | |||
108 | .code32 | ||
109 | /* | ||
110 | * EFI service pointer must be in %edi. | ||
111 | * | ||
112 | * The stack should represent the 32-bit calling convention. | ||
113 | */ | ||
114 | ENTRY(efi_enter32) | ||
115 | movl $__KERNEL_DS, %eax | ||
116 | movl %eax, %ds | ||
117 | movl %eax, %es | ||
118 | movl %eax, %ss | ||
119 | |||
120 | /* Reload pgtables */ | ||
121 | movl %cr3, %eax | ||
122 | movl %eax, %cr3 | ||
123 | |||
124 | /* Disable paging */ | ||
125 | movl %cr0, %eax | ||
126 | btrl $X86_CR0_PG_BIT, %eax | ||
127 | movl %eax, %cr0 | ||
128 | |||
129 | /* Disable long mode via EFER */ | ||
130 | movl $MSR_EFER, %ecx | ||
131 | rdmsr | ||
132 | btrl $_EFER_LME, %eax | ||
133 | wrmsr | ||
134 | |||
135 | call *%edi | ||
136 | |||
137 | /* We must preserve return value */ | ||
138 | movl %eax, %edi | ||
139 | |||
140 | /* | ||
141 | * Some firmware will return with interrupts enabled. Be sure to | ||
142 | * disable them before we switch GDTs. | ||
143 | */ | ||
144 | cli | ||
145 | |||
146 | movl 56(%esp), %eax | ||
147 | movl %eax, 2(%eax) | ||
148 | lgdtl (%eax) | ||
149 | |||
150 | movl %cr4, %eax | ||
151 | btsl $(X86_CR4_PAE_BIT), %eax | ||
152 | movl %eax, %cr4 | ||
153 | |||
154 | movl %cr3, %eax | ||
155 | movl %eax, %cr3 | ||
156 | |||
157 | movl $MSR_EFER, %ecx | ||
158 | rdmsr | ||
159 | btsl $_EFER_LME, %eax | ||
160 | wrmsr | ||
161 | |||
162 | xorl %eax, %eax | ||
163 | lldt %ax | ||
164 | |||
165 | movl 60(%esp), %eax | ||
166 | pushl $__KERNEL_CS | ||
167 | pushl %eax | ||
168 | |||
169 | /* Enable paging */ | ||
170 | movl %cr0, %eax | ||
171 | btsl $X86_CR0_PG_BIT, %eax | ||
172 | movl %eax, %cr0 | ||
173 | lret | ||
174 | ENDPROC(efi_enter32) | ||
175 | |||
176 | .data | ||
177 | .balign 8 | ||
178 | .global efi32_boot_gdt | ||
179 | efi32_boot_gdt: .word 0 | ||
180 | .quad 0 | ||
181 | |||
182 | save_gdt: .word 0 | ||
183 | .quad 0 | ||
184 | func_rt_ptr: .quad 0 | ||
185 | |||
186 | .global efi_gdt64 | ||
187 | efi_gdt64: | ||
188 | .word efi_gdt64_end - efi_gdt64 | ||
189 | .long 0 /* Filled out by user */ | ||
190 | .word 0 | ||
191 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
192 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
193 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
194 | .quad 0x0080890000000000 /* TS descriptor */ | ||
195 | .quad 0x0000000000000000 /* TS continued */ | ||
196 | efi_gdt64_end: | ||
diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h index 25e13403193c..020f137df7a2 100644 --- a/arch/x86/boot/ctype.h +++ b/arch/x86/boot/ctype.h | |||
@@ -1,6 +1,5 @@ | |||
1 | #ifndef BOOT_ISDIGIT_H | 1 | #ifndef BOOT_CTYPE_H |
2 | 2 | #define BOOT_CTYPE_H | |
3 | #define BOOT_ISDIGIT_H | ||
4 | 3 | ||
5 | static inline int isdigit(int ch) | 4 | static inline int isdigit(int ch) |
6 | { | 5 | { |
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 5df2869c874b..45a07684bbab 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c | |||
@@ -2,8 +2,6 @@ | |||
2 | 2 | ||
3 | #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ | 3 | #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ |
4 | 4 | ||
5 | #define XMTRDY 0x20 | ||
6 | |||
7 | #define DLAB 0x80 | 5 | #define DLAB 0x80 |
8 | 6 | ||
9 | #define TXR 0 /* Transmit register (WRITE) */ | 7 | #define TXR 0 /* Transmit register (WRITE) */ |
@@ -74,8 +72,8 @@ static void parse_earlyprintk(void) | |||
74 | static const int bases[] = { 0x3f8, 0x2f8 }; | 72 | static const int bases[] = { 0x3f8, 0x2f8 }; |
75 | int idx = 0; | 73 | int idx = 0; |
76 | 74 | ||
77 | if (!strncmp(arg + pos, "ttyS", 4)) | 75 | /* += strlen("ttyS"); */ |
78 | pos += 4; | 76 | pos += 4; |
79 | 77 | ||
80 | if (arg[pos++] == '1') | 78 | if (arg[pos++] == '1') |
81 | idx = 1; | 79 | idx = 1; |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 82e8a1d44658..156ebcab4ada 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -179,8 +179,8 @@ sysenter_dispatch: | |||
179 | sysexit_from_sys_call: | 179 | sysexit_from_sys_call: |
180 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 180 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
181 | /* clear IF, that popfq doesn't enable interrupts early */ | 181 | /* clear IF, that popfq doesn't enable interrupts early */ |
182 | andl $~0x200,EFLAGS-R11(%rsp) | 182 | andl $~0x200,EFLAGS-ARGOFFSET(%rsp) |
183 | movl RIP-R11(%rsp),%edx /* User %eip */ | 183 | movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ |
184 | CFI_REGISTER rip,rdx | 184 | CFI_REGISTER rip,rdx |
185 | RESTORE_ARGS 0,24,0,0,0,0 | 185 | RESTORE_ARGS 0,24,0,0,0,0 |
186 | xorq %r8,%r8 | 186 | xorq %r8,%r8 |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 465b309af254..efc3b22d896e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -106,7 +106,14 @@ extern u32 native_safe_apic_wait_icr_idle(void); | |||
106 | extern void native_apic_icr_write(u32 low, u32 id); | 106 | extern void native_apic_icr_write(u32 low, u32 id); |
107 | extern u64 native_apic_icr_read(void); | 107 | extern u64 native_apic_icr_read(void); |
108 | 108 | ||
109 | extern int x2apic_mode; | 109 | static inline bool apic_is_x2apic_enabled(void) |
110 | { | ||
111 | u64 msr; | ||
112 | |||
113 | if (rdmsrl_safe(MSR_IA32_APICBASE, &msr)) | ||
114 | return false; | ||
115 | return msr & X2APIC_ENABLE; | ||
116 | } | ||
110 | 117 | ||
111 | #ifdef CONFIG_X86_X2APIC | 118 | #ifdef CONFIG_X86_X2APIC |
112 | /* | 119 | /* |
@@ -169,48 +176,23 @@ static inline u64 native_x2apic_icr_read(void) | |||
169 | return val; | 176 | return val; |
170 | } | 177 | } |
171 | 178 | ||
179 | extern int x2apic_mode; | ||
172 | extern int x2apic_phys; | 180 | extern int x2apic_phys; |
173 | extern int x2apic_preenabled; | 181 | extern void __init check_x2apic(void); |
174 | extern void check_x2apic(void); | 182 | extern void x2apic_setup(void); |
175 | extern void enable_x2apic(void); | ||
176 | static inline int x2apic_enabled(void) | 183 | static inline int x2apic_enabled(void) |
177 | { | 184 | { |
178 | u64 msr; | 185 | return cpu_has_x2apic && apic_is_x2apic_enabled(); |
179 | |||
180 | if (!cpu_has_x2apic) | ||
181 | return 0; | ||
182 | |||
183 | rdmsrl(MSR_IA32_APICBASE, msr); | ||
184 | if (msr & X2APIC_ENABLE) | ||
185 | return 1; | ||
186 | return 0; | ||
187 | } | 186 | } |
188 | 187 | ||
189 | #define x2apic_supported() (cpu_has_x2apic) | 188 | #define x2apic_supported() (cpu_has_x2apic) |
190 | static inline void x2apic_force_phys(void) | ||
191 | { | ||
192 | x2apic_phys = 1; | ||
193 | } | ||
194 | #else | 189 | #else |
195 | static inline void disable_x2apic(void) | 190 | static inline void check_x2apic(void) { } |
196 | { | 191 | static inline void x2apic_setup(void) { } |
197 | } | 192 | static inline int x2apic_enabled(void) { return 0; } |
198 | static inline void check_x2apic(void) | ||
199 | { | ||
200 | } | ||
201 | static inline void enable_x2apic(void) | ||
202 | { | ||
203 | } | ||
204 | static inline int x2apic_enabled(void) | ||
205 | { | ||
206 | return 0; | ||
207 | } | ||
208 | static inline void x2apic_force_phys(void) | ||
209 | { | ||
210 | } | ||
211 | 193 | ||
212 | #define x2apic_preenabled 0 | 194 | #define x2apic_mode (0) |
213 | #define x2apic_supported() 0 | 195 | #define x2apic_supported() (0) |
214 | #endif | 196 | #endif |
215 | 197 | ||
216 | extern void enable_IR_x2apic(void); | 198 | extern void enable_IR_x2apic(void); |
@@ -219,7 +201,6 @@ extern int get_physical_broadcast(void); | |||
219 | 201 | ||
220 | extern int lapic_get_maxlvt(void); | 202 | extern int lapic_get_maxlvt(void); |
221 | extern void clear_local_APIC(void); | 203 | extern void clear_local_APIC(void); |
222 | extern void connect_bsp_APIC(void); | ||
223 | extern void disconnect_bsp_APIC(int virt_wire_setup); | 204 | extern void disconnect_bsp_APIC(int virt_wire_setup); |
224 | extern void disable_local_APIC(void); | 205 | extern void disable_local_APIC(void); |
225 | extern void lapic_shutdown(void); | 206 | extern void lapic_shutdown(void); |
@@ -227,14 +208,23 @@ extern int verify_local_APIC(void); | |||
227 | extern void sync_Arb_IDs(void); | 208 | extern void sync_Arb_IDs(void); |
228 | extern void init_bsp_APIC(void); | 209 | extern void init_bsp_APIC(void); |
229 | extern void setup_local_APIC(void); | 210 | extern void setup_local_APIC(void); |
230 | extern void end_local_APIC_setup(void); | ||
231 | extern void bsp_end_local_APIC_setup(void); | ||
232 | extern void init_apic_mappings(void); | 211 | extern void init_apic_mappings(void); |
233 | void register_lapic_address(unsigned long address); | 212 | void register_lapic_address(unsigned long address); |
234 | extern void setup_boot_APIC_clock(void); | 213 | extern void setup_boot_APIC_clock(void); |
235 | extern void setup_secondary_APIC_clock(void); | 214 | extern void setup_secondary_APIC_clock(void); |
236 | extern int APIC_init_uniprocessor(void); | 215 | extern int APIC_init_uniprocessor(void); |
216 | |||
217 | #ifdef CONFIG_X86_64 | ||
218 | static inline int apic_force_enable(unsigned long addr) | ||
219 | { | ||
220 | return -1; | ||
221 | } | ||
222 | #else | ||
237 | extern int apic_force_enable(unsigned long addr); | 223 | extern int apic_force_enable(unsigned long addr); |
224 | #endif | ||
225 | |||
226 | extern int apic_bsp_setup(bool upmode); | ||
227 | extern void apic_ap_setup(void); | ||
238 | 228 | ||
239 | /* | 229 | /* |
240 | * On 32bit this is mach-xxx local | 230 | * On 32bit this is mach-xxx local |
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 76659b67fd11..1f1297b46f83 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h | |||
@@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with | |||
83 | #define SS 160 | 83 | #define SS 160 |
84 | 84 | ||
85 | #define ARGOFFSET R11 | 85 | #define ARGOFFSET R11 |
86 | #define SWFRAME ORIG_RAX | ||
87 | 86 | ||
88 | .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 | 87 | .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 |
89 | subq $9*8+\addskip, %rsp | 88 | subq $9*8+\addskip, %rsp |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aede2c347bde..90a54851aedc 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -174,6 +174,7 @@ | |||
174 | #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ | 174 | #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ |
175 | #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ | 175 | #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ |
176 | #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ | 176 | #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ |
177 | #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ | ||
177 | #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ | 178 | #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ |
178 | 179 | ||
179 | /* | 180 | /* |
@@ -388,6 +389,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | |||
388 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) | 389 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) |
389 | #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) | 390 | #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) |
390 | #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) | 391 | #define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) |
392 | #define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT) | ||
391 | 393 | ||
392 | #if __GNUC__ >= 4 | 394 | #if __GNUC__ >= 4 |
393 | extern void warn_pre_alternatives(void); | 395 | extern void warn_pre_alternatives(void); |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 61fd18b83b6c..12cb66f6d3a5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
@@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { } | |||
114 | static inline void debug_stack_usage_dec(void) { } | 114 | static inline void debug_stack_usage_dec(void) { } |
115 | #endif /* X86_64 */ | 115 | #endif /* X86_64 */ |
116 | 116 | ||
117 | #ifdef CONFIG_CPU_SUP_AMD | ||
118 | extern void set_dr_addr_mask(unsigned long mask, int dr); | ||
119 | #else | ||
120 | static inline void set_dr_addr_mask(unsigned long mask, int dr) { } | ||
121 | #endif | ||
117 | 122 | ||
118 | #endif /* _ASM_X86_DEBUGREG_H */ | 123 | #endif /* _ASM_X86_DEBUGREG_H */ |
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e97622f57722..0dbc08282291 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
@@ -207,7 +207,7 @@ static inline void fpu_fxsave(struct fpu *fpu) | |||
207 | if (config_enabled(CONFIG_X86_32)) | 207 | if (config_enabled(CONFIG_X86_32)) |
208 | asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); | 208 | asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); |
209 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | 209 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) |
210 | asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); | 210 | asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); |
211 | else { | 211 | else { |
212 | /* Using "rex64; fxsave %0" is broken because, if the memory | 212 | /* Using "rex64; fxsave %0" is broken because, if the memory |
213 | * operand uses any extended registers for addressing, a second | 213 | * operand uses any extended registers for addressing, a second |
@@ -290,9 +290,11 @@ static inline int fpu_restore_checking(struct fpu *fpu) | |||
290 | 290 | ||
291 | static inline int restore_fpu_checking(struct task_struct *tsk) | 291 | static inline int restore_fpu_checking(struct task_struct *tsk) |
292 | { | 292 | { |
293 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 293 | /* |
294 | is pending. Clear the x87 state here by setting it to fixed | 294 | * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is |
295 | values. "m" is a random variable that should be in L1 */ | 295 | * pending. Clear the x87 state here by setting it to fixed values. |
296 | * "m" is a random variable that should be in L1. | ||
297 | */ | ||
296 | if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { | 298 | if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { |
297 | asm volatile( | 299 | asm volatile( |
298 | "fnclex\n\t" | 300 | "fnclex\n\t" |
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index ef1c4d2d41ec..6c98be864a75 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -12,6 +12,7 @@ | |||
12 | */ | 12 | */ |
13 | struct arch_hw_breakpoint { | 13 | struct arch_hw_breakpoint { |
14 | unsigned long address; | 14 | unsigned long address; |
15 | unsigned long mask; | ||
15 | u8 len; | 16 | u8 len; |
16 | u8 type; | 17 | u8 type; |
17 | }; | 18 | }; |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index ed8089d69094..6eb6fcb83f63 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -40,8 +40,8 @@ extern void __kernel_fpu_end(void); | |||
40 | 40 | ||
41 | static inline void kernel_fpu_begin(void) | 41 | static inline void kernel_fpu_begin(void) |
42 | { | 42 | { |
43 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
44 | preempt_disable(); | 43 | preempt_disable(); |
44 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
45 | __kernel_fpu_begin(); | 45 | __kernel_fpu_begin(); |
46 | } | 46 | } |
47 | 47 | ||
@@ -51,6 +51,10 @@ static inline void kernel_fpu_end(void) | |||
51 | preempt_enable(); | 51 | preempt_enable(); |
52 | } | 52 | } |
53 | 53 | ||
54 | /* Must be called with preempt disabled */ | ||
55 | extern void kernel_fpu_disable(void); | ||
56 | extern void kernel_fpu_enable(void); | ||
57 | |||
54 | /* | 58 | /* |
55 | * Some instructions like VIA's padlock instructions generate a spurious | 59 | * Some instructions like VIA's padlock instructions generate a spurious |
56 | * DNA fault but don't modify SSE registers. And these instructions | 60 | * DNA fault but don't modify SSE registers. And these instructions |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index bf006cce9418..2f91685fe1cd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -279,6 +279,11 @@ static inline void disable_ioapic_support(void) { } | |||
279 | #define native_ioapic_set_affinity NULL | 279 | #define native_ioapic_set_affinity NULL |
280 | #define native_setup_ioapic_entry NULL | 280 | #define native_setup_ioapic_entry NULL |
281 | #define native_eoi_ioapic_pin NULL | 281 | #define native_eoi_ioapic_pin NULL |
282 | |||
283 | static inline void setup_IO_APIC(void) { } | ||
284 | static inline void enable_IO_APIC(void) { } | ||
285 | static inline void setup_ioapic_dest(void) { } | ||
286 | |||
282 | #endif | 287 | #endif |
283 | 288 | ||
284 | #endif /* _ASM_X86_IO_APIC_H */ | 289 | #endif /* _ASM_X86_IO_APIC_H */ |
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index b7747c4c2cf2..6224d316c405 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h | |||
@@ -33,8 +33,6 @@ struct irq_cfg; | |||
33 | 33 | ||
34 | #ifdef CONFIG_IRQ_REMAP | 34 | #ifdef CONFIG_IRQ_REMAP |
35 | 35 | ||
36 | extern void setup_irq_remapping_ops(void); | ||
37 | extern int irq_remapping_supported(void); | ||
38 | extern void set_irq_remapping_broken(void); | 36 | extern void set_irq_remapping_broken(void); |
39 | extern int irq_remapping_prepare(void); | 37 | extern int irq_remapping_prepare(void); |
40 | extern int irq_remapping_enable(void); | 38 | extern int irq_remapping_enable(void); |
@@ -60,8 +58,6 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip); | |||
60 | 58 | ||
61 | #else /* CONFIG_IRQ_REMAP */ | 59 | #else /* CONFIG_IRQ_REMAP */ |
62 | 60 | ||
63 | static inline void setup_irq_remapping_ops(void) { } | ||
64 | static inline int irq_remapping_supported(void) { return 0; } | ||
65 | static inline void set_irq_remapping_broken(void) { } | 61 | static inline void set_irq_remapping_broken(void) { } |
66 | static inline int irq_remapping_prepare(void) { return -ENODEV; } | 62 | static inline int irq_remapping_prepare(void) { return -ENODEV; } |
67 | static inline int irq_remapping_enable(void) { return -ENODEV; } | 63 | static inline int irq_remapping_enable(void) { return -ENODEV; } |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 51b26e895933..9b3de99dc004 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -190,7 +190,6 @@ enum mcp_flags { | |||
190 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 190 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
191 | 191 | ||
192 | int mce_notify_irq(void); | 192 | int mce_notify_irq(void); |
193 | void mce_notify_process(void); | ||
194 | 193 | ||
195 | DECLARE_PER_CPU(struct mce, injectm); | 194 | DECLARE_PER_CPU(struct mce, injectm); |
196 | 195 | ||
diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h index fc7a17c05d35..bc0fc0866553 100644 --- a/arch/x86/include/asm/pmc_atom.h +++ b/arch/x86/include/asm/pmc_atom.h | |||
@@ -53,6 +53,28 @@ | |||
53 | /* Sleep state counter is in units of of 32us */ | 53 | /* Sleep state counter is in units of of 32us */ |
54 | #define PMC_TMR_SHIFT 5 | 54 | #define PMC_TMR_SHIFT 5 |
55 | 55 | ||
56 | /* Power status of power islands */ | ||
57 | #define PMC_PSS 0x98 | ||
58 | |||
59 | #define PMC_PSS_BIT_GBE BIT(0) | ||
60 | #define PMC_PSS_BIT_SATA BIT(1) | ||
61 | #define PMC_PSS_BIT_HDA BIT(2) | ||
62 | #define PMC_PSS_BIT_SEC BIT(3) | ||
63 | #define PMC_PSS_BIT_PCIE BIT(4) | ||
64 | #define PMC_PSS_BIT_LPSS BIT(5) | ||
65 | #define PMC_PSS_BIT_LPE BIT(6) | ||
66 | #define PMC_PSS_BIT_DFX BIT(7) | ||
67 | #define PMC_PSS_BIT_USH_CTRL BIT(8) | ||
68 | #define PMC_PSS_BIT_USH_SUS BIT(9) | ||
69 | #define PMC_PSS_BIT_USH_VCCS BIT(10) | ||
70 | #define PMC_PSS_BIT_USH_VCCA BIT(11) | ||
71 | #define PMC_PSS_BIT_OTG_CTRL BIT(12) | ||
72 | #define PMC_PSS_BIT_OTG_VCCS BIT(13) | ||
73 | #define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14) | ||
74 | #define PMC_PSS_BIT_OTG_VCCA BIT(15) | ||
75 | #define PMC_PSS_BIT_USB BIT(16) | ||
76 | #define PMC_PSS_BIT_USB_SUS BIT(17) | ||
77 | |||
56 | /* These registers reflect D3 status of functions */ | 78 | /* These registers reflect D3 status of functions */ |
57 | #define PMC_D3_STS_0 0xA0 | 79 | #define PMC_D3_STS_0 0xA0 |
58 | 80 | ||
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h deleted file mode 100644 index 0da7409f0bec..000000000000 --- a/arch/x86/include/asm/smpboot_hooks.h +++ /dev/null | |||
@@ -1,68 +0,0 @@ | |||
1 | /* two abstractions specific to kernel/smpboot.c, mainly to cater to visws | ||
2 | * which needs to alter them. */ | ||
3 | |||
4 | static inline void smpboot_clear_io_apic_irqs(void) | ||
5 | { | ||
6 | #ifdef CONFIG_X86_IO_APIC | ||
7 | io_apic_irqs = 0; | ||
8 | #endif | ||
9 | } | ||
10 | |||
11 | static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) | ||
12 | { | ||
13 | unsigned long flags; | ||
14 | |||
15 | spin_lock_irqsave(&rtc_lock, flags); | ||
16 | CMOS_WRITE(0xa, 0xf); | ||
17 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
18 | local_flush_tlb(); | ||
19 | pr_debug("1.\n"); | ||
20 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = | ||
21 | start_eip >> 4; | ||
22 | pr_debug("2.\n"); | ||
23 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = | ||
24 | start_eip & 0xf; | ||
25 | pr_debug("3.\n"); | ||
26 | } | ||
27 | |||
28 | static inline void smpboot_restore_warm_reset_vector(void) | ||
29 | { | ||
30 | unsigned long flags; | ||
31 | |||
32 | /* | ||
33 | * Install writable page 0 entry to set BIOS data area. | ||
34 | */ | ||
35 | local_flush_tlb(); | ||
36 | |||
37 | /* | ||
38 | * Paranoid: Set warm reset code and vector here back | ||
39 | * to default values. | ||
40 | */ | ||
41 | spin_lock_irqsave(&rtc_lock, flags); | ||
42 | CMOS_WRITE(0, 0xf); | ||
43 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
44 | |||
45 | *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; | ||
46 | } | ||
47 | |||
48 | static inline void __init smpboot_setup_io_apic(void) | ||
49 | { | ||
50 | #ifdef CONFIG_X86_IO_APIC | ||
51 | /* | ||
52 | * Here we can be sure that there is an IO-APIC in the system. Let's | ||
53 | * go and set it up: | ||
54 | */ | ||
55 | if (!skip_ioapic_setup && nr_ioapics) | ||
56 | setup_IO_APIC(); | ||
57 | else { | ||
58 | nr_ioapics = 0; | ||
59 | } | ||
60 | #endif | ||
61 | } | ||
62 | |||
63 | static inline void smpboot_clear_io_apic(void) | ||
64 | { | ||
65 | #ifdef CONFIG_X86_IO_APIC | ||
66 | nr_ioapics = 0; | ||
67 | #endif | ||
68 | } | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 547e344a6dc6..e82e95abc92b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -75,7 +75,6 @@ struct thread_info { | |||
75 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ | 75 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ |
76 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 76 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
77 | #define TIF_SECCOMP 8 /* secure computing */ | 77 | #define TIF_SECCOMP 8 /* secure computing */ |
78 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | ||
79 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | 78 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ |
80 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | 79 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ |
81 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 80 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
@@ -100,7 +99,6 @@ struct thread_info { | |||
100 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) | 99 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) |
101 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | 100 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
102 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 101 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
103 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | ||
104 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | 102 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) |
105 | #define _TIF_UPROBE (1 << TIF_UPROBE) | 103 | #define _TIF_UPROBE (1 << TIF_UPROBE) |
106 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 104 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
@@ -140,7 +138,7 @@ struct thread_info { | |||
140 | 138 | ||
141 | /* Only used for 64 bit */ | 139 | /* Only used for 64 bit */ |
142 | #define _TIF_DO_NOTIFY_MASK \ | 140 | #define _TIF_DO_NOTIFY_MASK \ |
143 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ | 141 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ |
144 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) | 142 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) |
145 | 143 | ||
146 | /* flags to check in __switch_to() */ | 144 | /* flags to check in __switch_to() */ |
@@ -170,6 +168,17 @@ static inline struct thread_info *current_thread_info(void) | |||
170 | return ti; | 168 | return ti; |
171 | } | 169 | } |
172 | 170 | ||
171 | static inline unsigned long current_stack_pointer(void) | ||
172 | { | ||
173 | unsigned long sp; | ||
174 | #ifdef CONFIG_X86_64 | ||
175 | asm("mov %%rsp,%0" : "=g" (sp)); | ||
176 | #else | ||
177 | asm("mov %%esp,%0" : "=g" (sp)); | ||
178 | #endif | ||
179 | return sp; | ||
180 | } | ||
181 | |||
173 | #else /* !__ASSEMBLY__ */ | 182 | #else /* !__ASSEMBLY__ */ |
174 | 183 | ||
175 | /* how to get the thread information struct from ASM */ | 184 | /* how to get the thread information struct from ASM */ |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 707adc6549d8..4e49d7dff78e 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _ASM_X86_TRAPS_H | 1 | #ifndef _ASM_X86_TRAPS_H |
2 | #define _ASM_X86_TRAPS_H | 2 | #define _ASM_X86_TRAPS_H |
3 | 3 | ||
4 | #include <linux/context_tracking_state.h> | ||
4 | #include <linux/kprobes.h> | 5 | #include <linux/kprobes.h> |
5 | 6 | ||
6 | #include <asm/debugreg.h> | 7 | #include <asm/debugreg.h> |
@@ -110,6 +111,11 @@ asmlinkage void smp_thermal_interrupt(void); | |||
110 | asmlinkage void mce_threshold_interrupt(void); | 111 | asmlinkage void mce_threshold_interrupt(void); |
111 | #endif | 112 | #endif |
112 | 113 | ||
114 | extern enum ctx_state ist_enter(struct pt_regs *regs); | ||
115 | extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state); | ||
116 | extern void ist_begin_non_atomic(struct pt_regs *regs); | ||
117 | extern void ist_end_non_atomic(void); | ||
118 | |||
113 | /* Interrupts/Exceptions */ | 119 | /* Interrupts/Exceptions */ |
114 | enum { | 120 | enum { |
115 | X86_TRAP_DE = 0, /* 0, Divide-by-zero */ | 121 | X86_TRAP_DE = 0, /* 0, Divide-by-zero */ |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c8aa65d56027..d979e5abae55 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -251,6 +251,10 @@ | |||
251 | /* Fam 16h MSRs */ | 251 | /* Fam 16h MSRs */ |
252 | #define MSR_F16H_L2I_PERF_CTL 0xc0010230 | 252 | #define MSR_F16H_L2I_PERF_CTL 0xc0010230 |
253 | #define MSR_F16H_L2I_PERF_CTR 0xc0010231 | 253 | #define MSR_F16H_L2I_PERF_CTR 0xc0010231 |
254 | #define MSR_F16H_DR1_ADDR_MASK 0xc0011019 | ||
255 | #define MSR_F16H_DR2_ADDR_MASK 0xc001101a | ||
256 | #define MSR_F16H_DR3_ADDR_MASK 0xc001101b | ||
257 | #define MSR_F16H_DR0_ADDR_MASK 0xc0011027 | ||
254 | 258 | ||
255 | /* Fam 15h MSRs */ | 259 | /* Fam 15h MSRs */ |
256 | #define MSR_F15H_PERF_CTL 0xc0010200 | 260 | #define MSR_F15H_PERF_CTL 0xc0010200 |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b9e30daa0881..a18fff361c7f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -653,6 +653,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi, | |||
653 | return gsi; | 653 | return gsi; |
654 | } | 654 | } |
655 | 655 | ||
656 | #ifdef CONFIG_X86_LOCAL_APIC | ||
656 | static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, | 657 | static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, |
657 | int trigger, int polarity) | 658 | int trigger, int polarity) |
658 | { | 659 | { |
@@ -675,6 +676,7 @@ static void acpi_unregister_gsi_ioapic(u32 gsi) | |||
675 | mutex_unlock(&acpi_ioapic_lock); | 676 | mutex_unlock(&acpi_ioapic_lock); |
676 | #endif | 677 | #endif |
677 | } | 678 | } |
679 | #endif | ||
678 | 680 | ||
679 | int (*__acpi_register_gsi)(struct device *dev, u32 gsi, | 681 | int (*__acpi_register_gsi)(struct device *dev, u32 gsi, |
680 | int trigger, int polarity) = acpi_register_gsi_pic; | 682 | int trigger, int polarity) = acpi_register_gsi_pic; |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index b708738d016e..6a7c23ff21d3 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -135,14 +135,6 @@ static inline void apbt_clear_mapping(void) | |||
135 | apbt_virt_address = NULL; | 135 | apbt_virt_address = NULL; |
136 | } | 136 | } |
137 | 137 | ||
138 | /* | ||
139 | * APBT timer interrupt enable / disable | ||
140 | */ | ||
141 | static inline int is_apbt_capable(void) | ||
142 | { | ||
143 | return apbt_virt_address ? 1 : 0; | ||
144 | } | ||
145 | |||
146 | static int __init apbt_clockevent_register(void) | 138 | static int __init apbt_clockevent_register(void) |
147 | { | 139 | { |
148 | struct sfi_timer_table_entry *mtmr; | 140 | struct sfi_timer_table_entry *mtmr; |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 29b5b18afa27..b665d241efad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -134,9 +134,6 @@ static inline void imcr_apic_to_pic(void) | |||
134 | */ | 134 | */ |
135 | static int force_enable_local_apic __initdata; | 135 | static int force_enable_local_apic __initdata; |
136 | 136 | ||
137 | /* Control whether x2APIC mode is enabled or not */ | ||
138 | static bool nox2apic __initdata; | ||
139 | |||
140 | /* | 137 | /* |
141 | * APIC command line parameters | 138 | * APIC command line parameters |
142 | */ | 139 | */ |
@@ -161,33 +158,6 @@ static __init int setup_apicpmtimer(char *s) | |||
161 | __setup("apicpmtimer", setup_apicpmtimer); | 158 | __setup("apicpmtimer", setup_apicpmtimer); |
162 | #endif | 159 | #endif |
163 | 160 | ||
164 | int x2apic_mode; | ||
165 | #ifdef CONFIG_X86_X2APIC | ||
166 | /* x2apic enabled before OS handover */ | ||
167 | int x2apic_preenabled; | ||
168 | static int x2apic_disabled; | ||
169 | static int __init setup_nox2apic(char *str) | ||
170 | { | ||
171 | if (x2apic_enabled()) { | ||
172 | int apicid = native_apic_msr_read(APIC_ID); | ||
173 | |||
174 | if (apicid >= 255) { | ||
175 | pr_warning("Apicid: %08x, cannot enforce nox2apic\n", | ||
176 | apicid); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | pr_warning("x2apic already enabled. will disable it\n"); | ||
181 | } else | ||
182 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
183 | |||
184 | nox2apic = true; | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | early_param("nox2apic", setup_nox2apic); | ||
189 | #endif | ||
190 | |||
191 | unsigned long mp_lapic_addr; | 161 | unsigned long mp_lapic_addr; |
192 | int disable_apic; | 162 | int disable_apic; |
193 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ | 163 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ |
@@ -1475,7 +1445,7 @@ void setup_local_APIC(void) | |||
1475 | #endif | 1445 | #endif |
1476 | } | 1446 | } |
1477 | 1447 | ||
1478 | void end_local_APIC_setup(void) | 1448 | static void end_local_APIC_setup(void) |
1479 | { | 1449 | { |
1480 | lapic_setup_esr(); | 1450 | lapic_setup_esr(); |
1481 | 1451 | ||
@@ -1492,116 +1462,184 @@ void end_local_APIC_setup(void) | |||
1492 | apic_pm_activate(); | 1462 | apic_pm_activate(); |
1493 | } | 1463 | } |
1494 | 1464 | ||
1495 | void __init bsp_end_local_APIC_setup(void) | 1465 | /* |
1466 | * APIC setup function for application processors. Called from smpboot.c | ||
1467 | */ | ||
1468 | void apic_ap_setup(void) | ||
1496 | { | 1469 | { |
1470 | setup_local_APIC(); | ||
1497 | end_local_APIC_setup(); | 1471 | end_local_APIC_setup(); |
1498 | |||
1499 | /* | ||
1500 | * Now that local APIC setup is completed for BP, configure the fault | ||
1501 | * handling for interrupt remapping. | ||
1502 | */ | ||
1503 | irq_remap_enable_fault_handling(); | ||
1504 | |||
1505 | } | 1472 | } |
1506 | 1473 | ||
1507 | #ifdef CONFIG_X86_X2APIC | 1474 | #ifdef CONFIG_X86_X2APIC |
1508 | /* | 1475 | int x2apic_mode; |
1509 | * Need to disable xapic and x2apic at the same time and then enable xapic mode | ||
1510 | */ | ||
1511 | static inline void __disable_x2apic(u64 msr) | ||
1512 | { | ||
1513 | wrmsrl(MSR_IA32_APICBASE, | ||
1514 | msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); | ||
1515 | wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); | ||
1516 | } | ||
1517 | 1476 | ||
1518 | static __init void disable_x2apic(void) | 1477 | enum { |
1478 | X2APIC_OFF, | ||
1479 | X2APIC_ON, | ||
1480 | X2APIC_DISABLED, | ||
1481 | }; | ||
1482 | static int x2apic_state; | ||
1483 | |||
1484 | static inline void __x2apic_disable(void) | ||
1519 | { | 1485 | { |
1520 | u64 msr; | 1486 | u64 msr; |
1521 | 1487 | ||
1522 | if (!cpu_has_x2apic) | 1488 | if (cpu_has_apic) |
1523 | return; | 1489 | return; |
1524 | 1490 | ||
1525 | rdmsrl(MSR_IA32_APICBASE, msr); | 1491 | rdmsrl(MSR_IA32_APICBASE, msr); |
1526 | if (msr & X2APIC_ENABLE) { | 1492 | if (!(msr & X2APIC_ENABLE)) |
1527 | u32 x2apic_id = read_apic_id(); | 1493 | return; |
1528 | 1494 | /* Disable xapic and x2apic first and then reenable xapic mode */ | |
1529 | if (x2apic_id >= 255) | 1495 | wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); |
1530 | panic("Cannot disable x2apic, id: %08x\n", x2apic_id); | 1496 | wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); |
1497 | printk_once(KERN_INFO "x2apic disabled\n"); | ||
1498 | } | ||
1531 | 1499 | ||
1532 | pr_info("Disabling x2apic\n"); | 1500 | static inline void __x2apic_enable(void) |
1533 | __disable_x2apic(msr); | 1501 | { |
1502 | u64 msr; | ||
1534 | 1503 | ||
1535 | if (nox2apic) { | 1504 | rdmsrl(MSR_IA32_APICBASE, msr); |
1536 | clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); | 1505 | if (msr & X2APIC_ENABLE) |
1537 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | 1506 | return; |
1538 | } | 1507 | wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); |
1508 | printk_once(KERN_INFO "x2apic enabled\n"); | ||
1509 | } | ||
1539 | 1510 | ||
1540 | x2apic_disabled = 1; | 1511 | static int __init setup_nox2apic(char *str) |
1541 | x2apic_mode = 0; | 1512 | { |
1513 | if (x2apic_enabled()) { | ||
1514 | int apicid = native_apic_msr_read(APIC_ID); | ||
1542 | 1515 | ||
1543 | register_lapic_address(mp_lapic_addr); | 1516 | if (apicid >= 255) { |
1517 | pr_warning("Apicid: %08x, cannot enforce nox2apic\n", | ||
1518 | apicid); | ||
1519 | return 0; | ||
1520 | } | ||
1521 | pr_warning("x2apic already enabled.\n"); | ||
1522 | __x2apic_disable(); | ||
1544 | } | 1523 | } |
1524 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | ||
1525 | x2apic_state = X2APIC_DISABLED; | ||
1526 | x2apic_mode = 0; | ||
1527 | return 0; | ||
1545 | } | 1528 | } |
1529 | early_param("nox2apic", setup_nox2apic); | ||
1546 | 1530 | ||
1547 | void check_x2apic(void) | 1531 | /* Called from cpu_init() to enable x2apic on (secondary) cpus */ |
1532 | void x2apic_setup(void) | ||
1548 | { | 1533 | { |
1549 | if (x2apic_enabled()) { | 1534 | /* |
1550 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1535 | * If x2apic is not in ON state, disable it if already enabled |
1551 | x2apic_preenabled = x2apic_mode = 1; | 1536 | * from BIOS. |
1537 | */ | ||
1538 | if (x2apic_state != X2APIC_ON) { | ||
1539 | __x2apic_disable(); | ||
1540 | return; | ||
1552 | } | 1541 | } |
1542 | __x2apic_enable(); | ||
1553 | } | 1543 | } |
1554 | 1544 | ||
1555 | void enable_x2apic(void) | 1545 | static __init void x2apic_disable(void) |
1556 | { | 1546 | { |
1557 | u64 msr; | 1547 | u32 x2apic_id; |
1558 | 1548 | ||
1559 | rdmsrl(MSR_IA32_APICBASE, msr); | 1549 | if (x2apic_state != X2APIC_ON) |
1560 | if (x2apic_disabled) { | 1550 | goto out; |
1561 | __disable_x2apic(msr); | 1551 | |
1552 | x2apic_id = read_apic_id(); | ||
1553 | if (x2apic_id >= 255) | ||
1554 | panic("Cannot disable x2apic, id: %08x\n", x2apic_id); | ||
1555 | |||
1556 | __x2apic_disable(); | ||
1557 | register_lapic_address(mp_lapic_addr); | ||
1558 | out: | ||
1559 | x2apic_state = X2APIC_DISABLED; | ||
1560 | x2apic_mode = 0; | ||
1561 | } | ||
1562 | |||
1563 | static __init void x2apic_enable(void) | ||
1564 | { | ||
1565 | if (x2apic_state != X2APIC_OFF) | ||
1562 | return; | 1566 | return; |
1563 | } | ||
1564 | 1567 | ||
1565 | if (!x2apic_mode) | 1568 | x2apic_mode = 1; |
1569 | x2apic_state = X2APIC_ON; | ||
1570 | __x2apic_enable(); | ||
1571 | } | ||
1572 | |||
1573 | static __init void try_to_enable_x2apic(int remap_mode) | ||
1574 | { | ||
1575 | if (x2apic_state == X2APIC_DISABLED) | ||
1566 | return; | 1576 | return; |
1567 | 1577 | ||
1568 | if (!(msr & X2APIC_ENABLE)) { | 1578 | if (remap_mode != IRQ_REMAP_X2APIC_MODE) { |
1569 | printk_once(KERN_INFO "Enabling x2apic\n"); | 1579 | /* IR is required if there is APIC ID > 255 even when running |
1570 | wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); | 1580 | * under KVM |
1581 | */ | ||
1582 | if (max_physical_apicid > 255 || | ||
1583 | (IS_ENABLED(CONFIG_HYPERVISOR_GUEST) && | ||
1584 | !hypervisor_x2apic_available())) { | ||
1585 | pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); | ||
1586 | x2apic_disable(); | ||
1587 | return; | ||
1588 | } | ||
1589 | |||
1590 | /* | ||
1591 | * without IR all CPUs can be addressed by IOAPIC/MSI | ||
1592 | * only in physical mode | ||
1593 | */ | ||
1594 | x2apic_phys = 1; | ||
1571 | } | 1595 | } |
1596 | x2apic_enable(); | ||
1572 | } | 1597 | } |
1573 | #endif /* CONFIG_X86_X2APIC */ | ||
1574 | 1598 | ||
1575 | int __init enable_IR(void) | 1599 | void __init check_x2apic(void) |
1576 | { | 1600 | { |
1577 | #ifdef CONFIG_IRQ_REMAP | 1601 | if (x2apic_enabled()) { |
1578 | if (!irq_remapping_supported()) { | 1602 | pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n"); |
1579 | pr_debug("intr-remapping not supported\n"); | 1603 | x2apic_mode = 1; |
1580 | return -1; | 1604 | x2apic_state = X2APIC_ON; |
1605 | } else if (!cpu_has_x2apic) { | ||
1606 | x2apic_state = X2APIC_DISABLED; | ||
1581 | } | 1607 | } |
1608 | } | ||
1609 | #else /* CONFIG_X86_X2APIC */ | ||
1610 | static int __init validate_x2apic(void) | ||
1611 | { | ||
1612 | if (!apic_is_x2apic_enabled()) | ||
1613 | return 0; | ||
1614 | /* | ||
1615 | * Checkme: Can we simply turn off x2apic here instead of panic? | ||
1616 | */ | ||
1617 | panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n"); | ||
1618 | } | ||
1619 | early_initcall(validate_x2apic); | ||
1582 | 1620 | ||
1583 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1621 | static inline void try_to_enable_x2apic(int remap_mode) { } |
1584 | pr_info("Skipped enabling intr-remap because of skipping " | 1622 | static inline void __x2apic_enable(void) { } |
1585 | "io-apic setup\n"); | 1623 | #endif /* !CONFIG_X86_X2APIC */ |
1624 | |||
1625 | static int __init try_to_enable_IR(void) | ||
1626 | { | ||
1627 | #ifdef CONFIG_X86_IO_APIC | ||
1628 | if (!x2apic_enabled() && skip_ioapic_setup) { | ||
1629 | pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); | ||
1586 | return -1; | 1630 | return -1; |
1587 | } | 1631 | } |
1588 | |||
1589 | return irq_remapping_enable(); | ||
1590 | #endif | 1632 | #endif |
1591 | return -1; | 1633 | return irq_remapping_enable(); |
1592 | } | 1634 | } |
1593 | 1635 | ||
1594 | void __init enable_IR_x2apic(void) | 1636 | void __init enable_IR_x2apic(void) |
1595 | { | 1637 | { |
1596 | unsigned long flags; | 1638 | unsigned long flags; |
1597 | int ret, x2apic_enabled = 0; | 1639 | int ret, ir_stat; |
1598 | int hardware_init_ret; | ||
1599 | |||
1600 | /* Make sure irq_remap_ops are initialized */ | ||
1601 | setup_irq_remapping_ops(); | ||
1602 | 1640 | ||
1603 | hardware_init_ret = irq_remapping_prepare(); | 1641 | ir_stat = irq_remapping_prepare(); |
1604 | if (hardware_init_ret && !x2apic_supported()) | 1642 | if (ir_stat < 0 && !x2apic_supported()) |
1605 | return; | 1643 | return; |
1606 | 1644 | ||
1607 | ret = save_ioapic_entries(); | 1645 | ret = save_ioapic_entries(); |
@@ -1614,49 +1652,13 @@ void __init enable_IR_x2apic(void) | |||
1614 | legacy_pic->mask_all(); | 1652 | legacy_pic->mask_all(); |
1615 | mask_ioapic_entries(); | 1653 | mask_ioapic_entries(); |
1616 | 1654 | ||
1617 | if (x2apic_preenabled && nox2apic) | 1655 | /* If irq_remapping_prepare() succeded, try to enable it */ |
1618 | disable_x2apic(); | 1656 | if (ir_stat >= 0) |
1619 | 1657 | ir_stat = try_to_enable_IR(); | |
1620 | if (hardware_init_ret) | 1658 | /* ir_stat contains the remap mode or an error code */ |
1621 | ret = -1; | 1659 | try_to_enable_x2apic(ir_stat); |
1622 | else | ||
1623 | ret = enable_IR(); | ||
1624 | |||
1625 | if (!x2apic_supported()) | ||
1626 | goto skip_x2apic; | ||
1627 | 1660 | ||
1628 | if (ret < 0) { | 1661 | if (ir_stat < 0) |
1629 | /* IR is required if there is APIC ID > 255 even when running | ||
1630 | * under KVM | ||
1631 | */ | ||
1632 | if (max_physical_apicid > 255 || | ||
1633 | !hypervisor_x2apic_available()) { | ||
1634 | if (x2apic_preenabled) | ||
1635 | disable_x2apic(); | ||
1636 | goto skip_x2apic; | ||
1637 | } | ||
1638 | /* | ||
1639 | * without IR all CPUs can be addressed by IOAPIC/MSI | ||
1640 | * only in physical mode | ||
1641 | */ | ||
1642 | x2apic_force_phys(); | ||
1643 | } | ||
1644 | |||
1645 | if (ret == IRQ_REMAP_XAPIC_MODE) { | ||
1646 | pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); | ||
1647 | goto skip_x2apic; | ||
1648 | } | ||
1649 | |||
1650 | x2apic_enabled = 1; | ||
1651 | |||
1652 | if (x2apic_supported() && !x2apic_mode) { | ||
1653 | x2apic_mode = 1; | ||
1654 | enable_x2apic(); | ||
1655 | pr_info("Enabled x2apic\n"); | ||
1656 | } | ||
1657 | |||
1658 | skip_x2apic: | ||
1659 | if (ret < 0) /* IR enabling failed */ | ||
1660 | restore_ioapic_entries(); | 1662 | restore_ioapic_entries(); |
1661 | legacy_pic->restore_mask(); | 1663 | legacy_pic->restore_mask(); |
1662 | local_irq_restore(flags); | 1664 | local_irq_restore(flags); |
@@ -1847,82 +1849,8 @@ void __init register_lapic_address(unsigned long address) | |||
1847 | } | 1849 | } |
1848 | } | 1850 | } |
1849 | 1851 | ||
1850 | /* | ||
1851 | * This initializes the IO-APIC and APIC hardware if this is | ||
1852 | * a UP kernel. | ||
1853 | */ | ||
1854 | int apic_version[MAX_LOCAL_APIC]; | 1852 | int apic_version[MAX_LOCAL_APIC]; |
1855 | 1853 | ||
1856 | int __init APIC_init_uniprocessor(void) | ||
1857 | { | ||
1858 | if (disable_apic) { | ||
1859 | pr_info("Apic disabled\n"); | ||
1860 | return -1; | ||
1861 | } | ||
1862 | #ifdef CONFIG_X86_64 | ||
1863 | if (!cpu_has_apic) { | ||
1864 | disable_apic = 1; | ||
1865 | pr_info("Apic disabled by BIOS\n"); | ||
1866 | return -1; | ||
1867 | } | ||
1868 | #else | ||
1869 | if (!smp_found_config && !cpu_has_apic) | ||
1870 | return -1; | ||
1871 | |||
1872 | /* | ||
1873 | * Complain if the BIOS pretends there is one. | ||
1874 | */ | ||
1875 | if (!cpu_has_apic && | ||
1876 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | ||
1877 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", | ||
1878 | boot_cpu_physical_apicid); | ||
1879 | return -1; | ||
1880 | } | ||
1881 | #endif | ||
1882 | |||
1883 | default_setup_apic_routing(); | ||
1884 | |||
1885 | verify_local_APIC(); | ||
1886 | connect_bsp_APIC(); | ||
1887 | |||
1888 | #ifdef CONFIG_X86_64 | ||
1889 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); | ||
1890 | #else | ||
1891 | /* | ||
1892 | * Hack: In case of kdump, after a crash, kernel might be booting | ||
1893 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | ||
1894 | * might be zero if read from MP tables. Get it from LAPIC. | ||
1895 | */ | ||
1896 | # ifdef CONFIG_CRASH_DUMP | ||
1897 | boot_cpu_physical_apicid = read_apic_id(); | ||
1898 | # endif | ||
1899 | #endif | ||
1900 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | ||
1901 | setup_local_APIC(); | ||
1902 | |||
1903 | #ifdef CONFIG_X86_IO_APIC | ||
1904 | /* | ||
1905 | * Now enable IO-APICs, actually call clear_IO_APIC | ||
1906 | * We need clear_IO_APIC before enabling error vector | ||
1907 | */ | ||
1908 | if (!skip_ioapic_setup && nr_ioapics) | ||
1909 | enable_IO_APIC(); | ||
1910 | #endif | ||
1911 | |||
1912 | bsp_end_local_APIC_setup(); | ||
1913 | |||
1914 | #ifdef CONFIG_X86_IO_APIC | ||
1915 | if (smp_found_config && !skip_ioapic_setup && nr_ioapics) | ||
1916 | setup_IO_APIC(); | ||
1917 | else { | ||
1918 | nr_ioapics = 0; | ||
1919 | } | ||
1920 | #endif | ||
1921 | |||
1922 | x86_init.timers.setup_percpu_clockev(); | ||
1923 | return 0; | ||
1924 | } | ||
1925 | |||
1926 | /* | 1854 | /* |
1927 | * Local APIC interrupts | 1855 | * Local APIC interrupts |
1928 | */ | 1856 | */ |
@@ -2027,7 +1955,7 @@ __visible void smp_trace_error_interrupt(struct pt_regs *regs) | |||
2027 | /** | 1955 | /** |
2028 | * connect_bsp_APIC - attach the APIC to the interrupt system | 1956 | * connect_bsp_APIC - attach the APIC to the interrupt system |
2029 | */ | 1957 | */ |
2030 | void __init connect_bsp_APIC(void) | 1958 | static void __init connect_bsp_APIC(void) |
2031 | { | 1959 | { |
2032 | #ifdef CONFIG_X86_32 | 1960 | #ifdef CONFIG_X86_32 |
2033 | if (pic_mode) { | 1961 | if (pic_mode) { |
@@ -2274,6 +2202,100 @@ void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) | |||
2274 | } | 2202 | } |
2275 | } | 2203 | } |
2276 | 2204 | ||
2205 | static void __init apic_bsp_up_setup(void) | ||
2206 | { | ||
2207 | #ifdef CONFIG_X86_64 | ||
2208 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); | ||
2209 | #else | ||
2210 | /* | ||
2211 | * Hack: In case of kdump, after a crash, kernel might be booting | ||
2212 | * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid | ||
2213 | * might be zero if read from MP tables. Get it from LAPIC. | ||
2214 | */ | ||
2215 | # ifdef CONFIG_CRASH_DUMP | ||
2216 | boot_cpu_physical_apicid = read_apic_id(); | ||
2217 | # endif | ||
2218 | #endif | ||
2219 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | ||
2220 | } | ||
2221 | |||
2222 | /** | ||
2223 | * apic_bsp_setup - Setup function for local apic and io-apic | ||
2224 | * @upmode: Force UP mode (for APIC_init_uniprocessor) | ||
2225 | * | ||
2226 | * Returns: | ||
2227 | * apic_id of BSP APIC | ||
2228 | */ | ||
2229 | int __init apic_bsp_setup(bool upmode) | ||
2230 | { | ||
2231 | int id; | ||
2232 | |||
2233 | connect_bsp_APIC(); | ||
2234 | if (upmode) | ||
2235 | apic_bsp_up_setup(); | ||
2236 | setup_local_APIC(); | ||
2237 | |||
2238 | if (x2apic_mode) | ||
2239 | id = apic_read(APIC_LDR); | ||
2240 | else | ||
2241 | id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); | ||
2242 | |||
2243 | enable_IO_APIC(); | ||
2244 | end_local_APIC_setup(); | ||
2245 | irq_remap_enable_fault_handling(); | ||
2246 | setup_IO_APIC(); | ||
2247 | /* Setup local timer */ | ||
2248 | x86_init.timers.setup_percpu_clockev(); | ||
2249 | return id; | ||
2250 | } | ||
2251 | |||
2252 | /* | ||
2253 | * This initializes the IO-APIC and APIC hardware if this is | ||
2254 | * a UP kernel. | ||
2255 | */ | ||
2256 | int __init APIC_init_uniprocessor(void) | ||
2257 | { | ||
2258 | if (disable_apic) { | ||
2259 | pr_info("Apic disabled\n"); | ||
2260 | return -1; | ||
2261 | } | ||
2262 | #ifdef CONFIG_X86_64 | ||
2263 | if (!cpu_has_apic) { | ||
2264 | disable_apic = 1; | ||
2265 | pr_info("Apic disabled by BIOS\n"); | ||
2266 | return -1; | ||
2267 | } | ||
2268 | #else | ||
2269 | if (!smp_found_config && !cpu_has_apic) | ||
2270 | return -1; | ||
2271 | |||
2272 | /* | ||
2273 | * Complain if the BIOS pretends there is one. | ||
2274 | */ | ||
2275 | if (!cpu_has_apic && | ||
2276 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | ||
2277 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", | ||
2278 | boot_cpu_physical_apicid); | ||
2279 | return -1; | ||
2280 | } | ||
2281 | #endif | ||
2282 | |||
2283 | if (!smp_found_config) | ||
2284 | disable_ioapic_support(); | ||
2285 | |||
2286 | default_setup_apic_routing(); | ||
2287 | verify_local_APIC(); | ||
2288 | apic_bsp_setup(true); | ||
2289 | return 0; | ||
2290 | } | ||
2291 | |||
2292 | #ifdef CONFIG_UP_LATE_INIT | ||
2293 | void __init up_late_init(void) | ||
2294 | { | ||
2295 | APIC_init_uniprocessor(); | ||
2296 | } | ||
2297 | #endif | ||
2298 | |||
2277 | /* | 2299 | /* |
2278 | * Power management | 2300 | * Power management |
2279 | */ | 2301 | */ |
@@ -2359,9 +2381,9 @@ static void lapic_resume(void) | |||
2359 | mask_ioapic_entries(); | 2381 | mask_ioapic_entries(); |
2360 | legacy_pic->mask_all(); | 2382 | legacy_pic->mask_all(); |
2361 | 2383 | ||
2362 | if (x2apic_mode) | 2384 | if (x2apic_mode) { |
2363 | enable_x2apic(); | 2385 | __x2apic_enable(); |
2364 | else { | 2386 | } else { |
2365 | /* | 2387 | /* |
2366 | * Make sure the APICBASE points to the right address | 2388 | * Make sure the APICBASE points to the right address |
2367 | * | 2389 | * |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3f5f60406ab1..f4dc2462a1ac 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -1507,7 +1507,10 @@ void __init enable_IO_APIC(void) | |||
1507 | int i8259_apic, i8259_pin; | 1507 | int i8259_apic, i8259_pin; |
1508 | int apic, pin; | 1508 | int apic, pin; |
1509 | 1509 | ||
1510 | if (!nr_legacy_irqs()) | 1510 | if (skip_ioapic_setup) |
1511 | nr_ioapics = 0; | ||
1512 | |||
1513 | if (!nr_legacy_irqs() || !nr_ioapics) | ||
1511 | return; | 1514 | return; |
1512 | 1515 | ||
1513 | for_each_ioapic_pin(apic, pin) { | 1516 | for_each_ioapic_pin(apic, pin) { |
@@ -2295,7 +2298,7 @@ static inline void __init check_timer(void) | |||
2295 | } | 2298 | } |
2296 | local_irq_disable(); | 2299 | local_irq_disable(); |
2297 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); | 2300 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
2298 | if (x2apic_preenabled) | 2301 | if (apic_is_x2apic_enabled()) |
2299 | apic_printk(APIC_QUIET, KERN_INFO | 2302 | apic_printk(APIC_QUIET, KERN_INFO |
2300 | "Perhaps problem with the pre-enabled x2apic mode\n" | 2303 | "Perhaps problem with the pre-enabled x2apic mode\n" |
2301 | "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); | 2304 | "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); |
@@ -2373,9 +2376,9 @@ void __init setup_IO_APIC(void) | |||
2373 | { | 2376 | { |
2374 | int ioapic; | 2377 | int ioapic; |
2375 | 2378 | ||
2376 | /* | 2379 | if (skip_ioapic_setup || !nr_ioapics) |
2377 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 2380 | return; |
2378 | */ | 2381 | |
2379 | io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; | 2382 | io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; |
2380 | 2383 | ||
2381 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 2384 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15c5df92f74e..a220239cea65 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -869,3 +869,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) | |||
869 | 869 | ||
870 | return false; | 870 | return false; |
871 | } | 871 | } |
872 | |||
873 | void set_dr_addr_mask(unsigned long mask, int dr) | ||
874 | { | ||
875 | if (!cpu_has_bpext) | ||
876 | return; | ||
877 | |||
878 | switch (dr) { | ||
879 | case 0: | ||
880 | wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0); | ||
881 | break; | ||
882 | case 1: | ||
883 | case 2: | ||
884 | case 3: | ||
885 | wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0); | ||
886 | break; | ||
887 | default: | ||
888 | break; | ||
889 | } | ||
890 | } | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c6049650c093..b15bffcaba6d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -491,17 +491,18 @@ u16 __read_mostly tlb_lld_2m[NR_INFO]; | |||
491 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | 491 | u16 __read_mostly tlb_lld_4m[NR_INFO]; |
492 | u16 __read_mostly tlb_lld_1g[NR_INFO]; | 492 | u16 __read_mostly tlb_lld_1g[NR_INFO]; |
493 | 493 | ||
494 | void cpu_detect_tlb(struct cpuinfo_x86 *c) | 494 | static void cpu_detect_tlb(struct cpuinfo_x86 *c) |
495 | { | 495 | { |
496 | if (this_cpu->c_detect_tlb) | 496 | if (this_cpu->c_detect_tlb) |
497 | this_cpu->c_detect_tlb(c); | 497 | this_cpu->c_detect_tlb(c); |
498 | 498 | ||
499 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" | 499 | pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n", |
500 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", | ||
501 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | 500 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
502 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | 501 | tlb_lli_4m[ENTRIES]); |
503 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | 502 | |
504 | tlb_lld_1g[ENTRIES]); | 503 | pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n", |
504 | tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], | ||
505 | tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); | ||
505 | } | 506 | } |
506 | 507 | ||
507 | void detect_ht(struct cpuinfo_x86 *c) | 508 | void detect_ht(struct cpuinfo_x86 *c) |
@@ -1332,7 +1333,7 @@ void cpu_init(void) | |||
1332 | barrier(); | 1333 | barrier(); |
1333 | 1334 | ||
1334 | x86_configure_nx(); | 1335 | x86_configure_nx(); |
1335 | enable_x2apic(); | 1336 | x2apic_setup(); |
1336 | 1337 | ||
1337 | /* | 1338 | /* |
1338 | * set up and load the per-CPU TSS | 1339 | * set up and load the per-CPU TSS |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 9cc6b6f25f42..94d7dcb12145 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -487,10 +487,8 @@ static void init_intel(struct cpuinfo_x86 *c) | |||
487 | 487 | ||
488 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | 488 | rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
489 | if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { | 489 | if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { |
490 | printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" | 490 | pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n"); |
491 | " Set to 'normal', was 'performance'\n" | 491 | pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n"); |
492 | "ENERGY_PERF_BIAS: View and update with" | ||
493 | " x86_energy_perf_policy(8)\n"); | ||
494 | epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; | 492 | epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; |
495 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); | 493 | wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); |
496 | } | 494 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d2c611699cd9..cdfed7953963 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/export.h> | 43 | #include <linux/export.h> |
44 | 44 | ||
45 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
46 | #include <asm/traps.h> | ||
46 | #include <asm/mce.h> | 47 | #include <asm/mce.h> |
47 | #include <asm/msr.h> | 48 | #include <asm/msr.h> |
48 | 49 | ||
@@ -115,7 +116,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | |||
115 | * CPU/chipset specific EDAC code can register a notifier call here to print | 116 | * CPU/chipset specific EDAC code can register a notifier call here to print |
116 | * MCE errors in a human-readable form. | 117 | * MCE errors in a human-readable form. |
117 | */ | 118 | */ |
118 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | 119 | static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); |
119 | 120 | ||
120 | /* Do initial initialization of a struct mce */ | 121 | /* Do initial initialization of a struct mce */ |
121 | void mce_setup(struct mce *m) | 122 | void mce_setup(struct mce *m) |
@@ -311,7 +312,7 @@ static void wait_for_panic(void) | |||
311 | panic("Panicing machine check CPU died"); | 312 | panic("Panicing machine check CPU died"); |
312 | } | 313 | } |
313 | 314 | ||
314 | static void mce_panic(char *msg, struct mce *final, char *exp) | 315 | static void mce_panic(const char *msg, struct mce *final, char *exp) |
315 | { | 316 | { |
316 | int i, apei_err = 0; | 317 | int i, apei_err = 0; |
317 | 318 | ||
@@ -529,7 +530,7 @@ static void mce_schedule_work(void) | |||
529 | schedule_work(this_cpu_ptr(&mce_work)); | 530 | schedule_work(this_cpu_ptr(&mce_work)); |
530 | } | 531 | } |
531 | 532 | ||
532 | DEFINE_PER_CPU(struct irq_work, mce_irq_work); | 533 | static DEFINE_PER_CPU(struct irq_work, mce_irq_work); |
533 | 534 | ||
534 | static void mce_irq_work_cb(struct irq_work *entry) | 535 | static void mce_irq_work_cb(struct irq_work *entry) |
535 | { | 536 | { |
@@ -735,7 +736,7 @@ static atomic_t mce_callin; | |||
735 | /* | 736 | /* |
736 | * Check if a timeout waiting for other CPUs happened. | 737 | * Check if a timeout waiting for other CPUs happened. |
737 | */ | 738 | */ |
738 | static int mce_timed_out(u64 *t) | 739 | static int mce_timed_out(u64 *t, const char *msg) |
739 | { | 740 | { |
740 | /* | 741 | /* |
741 | * The others already did panic for some reason. | 742 | * The others already did panic for some reason. |
@@ -750,8 +751,7 @@ static int mce_timed_out(u64 *t) | |||
750 | goto out; | 751 | goto out; |
751 | if ((s64)*t < SPINUNIT) { | 752 | if ((s64)*t < SPINUNIT) { |
752 | if (mca_cfg.tolerant <= 1) | 753 | if (mca_cfg.tolerant <= 1) |
753 | mce_panic("Timeout synchronizing machine check over CPUs", | 754 | mce_panic(msg, NULL, NULL); |
754 | NULL, NULL); | ||
755 | cpu_missing = 1; | 755 | cpu_missing = 1; |
756 | return 1; | 756 | return 1; |
757 | } | 757 | } |
@@ -867,7 +867,8 @@ static int mce_start(int *no_way_out) | |||
867 | * Wait for everyone. | 867 | * Wait for everyone. |
868 | */ | 868 | */ |
869 | while (atomic_read(&mce_callin) != cpus) { | 869 | while (atomic_read(&mce_callin) != cpus) { |
870 | if (mce_timed_out(&timeout)) { | 870 | if (mce_timed_out(&timeout, |
871 | "Timeout: Not all CPUs entered broadcast exception handler")) { | ||
871 | atomic_set(&global_nwo, 0); | 872 | atomic_set(&global_nwo, 0); |
872 | return -1; | 873 | return -1; |
873 | } | 874 | } |
@@ -892,7 +893,8 @@ static int mce_start(int *no_way_out) | |||
892 | * only seen by one CPU before cleared, avoiding duplicates. | 893 | * only seen by one CPU before cleared, avoiding duplicates. |
893 | */ | 894 | */ |
894 | while (atomic_read(&mce_executing) < order) { | 895 | while (atomic_read(&mce_executing) < order) { |
895 | if (mce_timed_out(&timeout)) { | 896 | if (mce_timed_out(&timeout, |
897 | "Timeout: Subject CPUs unable to finish machine check processing")) { | ||
896 | atomic_set(&global_nwo, 0); | 898 | atomic_set(&global_nwo, 0); |
897 | return -1; | 899 | return -1; |
898 | } | 900 | } |
@@ -936,7 +938,8 @@ static int mce_end(int order) | |||
936 | * loops. | 938 | * loops. |
937 | */ | 939 | */ |
938 | while (atomic_read(&mce_executing) <= cpus) { | 940 | while (atomic_read(&mce_executing) <= cpus) { |
939 | if (mce_timed_out(&timeout)) | 941 | if (mce_timed_out(&timeout, |
942 | "Timeout: Monarch CPU unable to finish machine check processing")) | ||
940 | goto reset; | 943 | goto reset; |
941 | ndelay(SPINUNIT); | 944 | ndelay(SPINUNIT); |
942 | } | 945 | } |
@@ -949,7 +952,8 @@ static int mce_end(int order) | |||
949 | * Subject: Wait for Monarch to finish. | 952 | * Subject: Wait for Monarch to finish. |
950 | */ | 953 | */ |
951 | while (atomic_read(&mce_executing) != 0) { | 954 | while (atomic_read(&mce_executing) != 0) { |
952 | if (mce_timed_out(&timeout)) | 955 | if (mce_timed_out(&timeout, |
956 | "Timeout: Monarch CPU did not finish machine check processing")) | ||
953 | goto reset; | 957 | goto reset; |
954 | ndelay(SPINUNIT); | 958 | ndelay(SPINUNIT); |
955 | } | 959 | } |
@@ -1003,51 +1007,6 @@ static void mce_clear_state(unsigned long *toclear) | |||
1003 | } | 1007 | } |
1004 | 1008 | ||
1005 | /* | 1009 | /* |
1006 | * Need to save faulting physical address associated with a process | ||
1007 | * in the machine check handler some place where we can grab it back | ||
1008 | * later in mce_notify_process() | ||
1009 | */ | ||
1010 | #define MCE_INFO_MAX 16 | ||
1011 | |||
1012 | struct mce_info { | ||
1013 | atomic_t inuse; | ||
1014 | struct task_struct *t; | ||
1015 | __u64 paddr; | ||
1016 | int restartable; | ||
1017 | } mce_info[MCE_INFO_MAX]; | ||
1018 | |||
1019 | static void mce_save_info(__u64 addr, int c) | ||
1020 | { | ||
1021 | struct mce_info *mi; | ||
1022 | |||
1023 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) { | ||
1024 | if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) { | ||
1025 | mi->t = current; | ||
1026 | mi->paddr = addr; | ||
1027 | mi->restartable = c; | ||
1028 | return; | ||
1029 | } | ||
1030 | } | ||
1031 | |||
1032 | mce_panic("Too many concurrent recoverable errors", NULL, NULL); | ||
1033 | } | ||
1034 | |||
1035 | static struct mce_info *mce_find_info(void) | ||
1036 | { | ||
1037 | struct mce_info *mi; | ||
1038 | |||
1039 | for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) | ||
1040 | if (atomic_read(&mi->inuse) && mi->t == current) | ||
1041 | return mi; | ||
1042 | return NULL; | ||
1043 | } | ||
1044 | |||
1045 | static void mce_clear_info(struct mce_info *mi) | ||
1046 | { | ||
1047 | atomic_set(&mi->inuse, 0); | ||
1048 | } | ||
1049 | |||
1050 | /* | ||
1051 | * The actual machine check handler. This only handles real | 1010 | * The actual machine check handler. This only handles real |
1052 | * exceptions when something got corrupted coming in through int 18. | 1011 | * exceptions when something got corrupted coming in through int 18. |
1053 | * | 1012 | * |
@@ -1063,6 +1022,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1063 | { | 1022 | { |
1064 | struct mca_config *cfg = &mca_cfg; | 1023 | struct mca_config *cfg = &mca_cfg; |
1065 | struct mce m, *final; | 1024 | struct mce m, *final; |
1025 | enum ctx_state prev_state; | ||
1066 | int i; | 1026 | int i; |
1067 | int worst = 0; | 1027 | int worst = 0; |
1068 | int severity; | 1028 | int severity; |
@@ -1084,6 +1044,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1084 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | 1044 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); |
1085 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); | 1045 | DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); |
1086 | char *msg = "Unknown"; | 1046 | char *msg = "Unknown"; |
1047 | u64 recover_paddr = ~0ull; | ||
1048 | int flags = MF_ACTION_REQUIRED; | ||
1049 | |||
1050 | prev_state = ist_enter(regs); | ||
1087 | 1051 | ||
1088 | this_cpu_inc(mce_exception_count); | 1052 | this_cpu_inc(mce_exception_count); |
1089 | 1053 | ||
@@ -1203,9 +1167,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1203 | if (no_way_out) | 1167 | if (no_way_out) |
1204 | mce_panic("Fatal machine check on current CPU", &m, msg); | 1168 | mce_panic("Fatal machine check on current CPU", &m, msg); |
1205 | if (worst == MCE_AR_SEVERITY) { | 1169 | if (worst == MCE_AR_SEVERITY) { |
1206 | /* schedule action before return to userland */ | 1170 | recover_paddr = m.addr; |
1207 | mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); | 1171 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
1208 | set_thread_flag(TIF_MCE_NOTIFY); | 1172 | flags |= MF_MUST_KILL; |
1209 | } else if (kill_it) { | 1173 | } else if (kill_it) { |
1210 | force_sig(SIGBUS, current); | 1174 | force_sig(SIGBUS, current); |
1211 | } | 1175 | } |
@@ -1216,6 +1180,27 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1216 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | 1180 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); |
1217 | out: | 1181 | out: |
1218 | sync_core(); | 1182 | sync_core(); |
1183 | |||
1184 | if (recover_paddr == ~0ull) | ||
1185 | goto done; | ||
1186 | |||
1187 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
1188 | recover_paddr); | ||
1189 | /* | ||
1190 | * We must call memory_failure() here even if the current process is | ||
1191 | * doomed. We still need to mark the page as poisoned and alert any | ||
1192 | * other users of the page. | ||
1193 | */ | ||
1194 | ist_begin_non_atomic(regs); | ||
1195 | local_irq_enable(); | ||
1196 | if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) { | ||
1197 | pr_err("Memory error not recovered"); | ||
1198 | force_sig(SIGBUS, current); | ||
1199 | } | ||
1200 | local_irq_disable(); | ||
1201 | ist_end_non_atomic(); | ||
1202 | done: | ||
1203 | ist_exit(regs, prev_state); | ||
1219 | } | 1204 | } |
1220 | EXPORT_SYMBOL_GPL(do_machine_check); | 1205 | EXPORT_SYMBOL_GPL(do_machine_check); |
1221 | 1206 | ||
@@ -1233,42 +1218,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) | |||
1233 | #endif | 1218 | #endif |
1234 | 1219 | ||
1235 | /* | 1220 | /* |
1236 | * Called in process context that interrupted by MCE and marked with | ||
1237 | * TIF_MCE_NOTIFY, just before returning to erroneous userland. | ||
1238 | * This code is allowed to sleep. | ||
1239 | * Attempt possible recovery such as calling the high level VM handler to | ||
1240 | * process any corrupted pages, and kill/signal current process if required. | ||
1241 | * Action required errors are handled here. | ||
1242 | */ | ||
1243 | void mce_notify_process(void) | ||
1244 | { | ||
1245 | unsigned long pfn; | ||
1246 | struct mce_info *mi = mce_find_info(); | ||
1247 | int flags = MF_ACTION_REQUIRED; | ||
1248 | |||
1249 | if (!mi) | ||
1250 | mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); | ||
1251 | pfn = mi->paddr >> PAGE_SHIFT; | ||
1252 | |||
1253 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
1254 | |||
1255 | pr_err("Uncorrected hardware memory error in user-access at %llx", | ||
1256 | mi->paddr); | ||
1257 | /* | ||
1258 | * We must call memory_failure() here even if the current process is | ||
1259 | * doomed. We still need to mark the page as poisoned and alert any | ||
1260 | * other users of the page. | ||
1261 | */ | ||
1262 | if (!mi->restartable) | ||
1263 | flags |= MF_MUST_KILL; | ||
1264 | if (memory_failure(pfn, MCE_VECTOR, flags) < 0) { | ||
1265 | pr_err("Memory error not recovered"); | ||
1266 | force_sig(SIGBUS, current); | ||
1267 | } | ||
1268 | mce_clear_info(mi); | ||
1269 | } | ||
1270 | |||
1271 | /* | ||
1272 | * Action optional processing happens here (picking up | 1221 | * Action optional processing happens here (picking up |
1273 | * from the list of faulting pages that do_machine_check() | 1222 | * from the list of faulting pages that do_machine_check() |
1274 | * placed into the "ring"). | 1223 | * placed into the "ring"). |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index a3042989398c..ec2663a708e4 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/smp.h> | 8 | #include <linux/smp.h> |
9 | 9 | ||
10 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
11 | #include <asm/traps.h> | ||
11 | #include <asm/mce.h> | 12 | #include <asm/mce.h> |
12 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
13 | 14 | ||
@@ -17,8 +18,11 @@ int mce_p5_enabled __read_mostly; | |||
17 | /* Machine check handler for Pentium class Intel CPUs: */ | 18 | /* Machine check handler for Pentium class Intel CPUs: */ |
18 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 19 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 20 | { |
21 | enum ctx_state prev_state; | ||
20 | u32 loaddr, hi, lotype; | 22 | u32 loaddr, hi, lotype; |
21 | 23 | ||
24 | prev_state = ist_enter(regs); | ||
25 | |||
22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | 26 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); |
23 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | 27 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); |
24 | 28 | ||
@@ -33,6 +37,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) | |||
33 | } | 37 | } |
34 | 38 | ||
35 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); | 39 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
40 | |||
41 | ist_exit(regs, prev_state); | ||
36 | } | 42 | } |
37 | 43 | ||
38 | /* Set up machine check reporting for processors with Intel style MCE: */ | 44 | /* Set up machine check reporting for processors with Intel style MCE: */ |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 7dc5564d0cdf..bd5d46a32210 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -7,14 +7,19 @@ | |||
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | 8 | ||
9 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
10 | #include <asm/traps.h> | ||
10 | #include <asm/mce.h> | 11 | #include <asm/mce.h> |
11 | #include <asm/msr.h> | 12 | #include <asm/msr.h> |
12 | 13 | ||
13 | /* Machine check handler for WinChip C6: */ | 14 | /* Machine check handler for WinChip C6: */ |
14 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 15 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
15 | { | 16 | { |
17 | enum ctx_state prev_state = ist_enter(regs); | ||
18 | |||
16 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 19 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
17 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); | 20 | add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); |
21 | |||
22 | ist_exit(regs, prev_state); | ||
18 | } | 23 | } |
19 | 24 | ||
20 | /* Set up machine check reporting on the Winchip C6 series */ | 25 | /* Set up machine check reporting on the Winchip C6 series */ |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index dd2f07ae9d0c..46201deee923 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -184,9 +184,9 @@ void __init e820_print_map(char *who) | |||
184 | * overwritten in the same location, starting at biosmap. | 184 | * overwritten in the same location, starting at biosmap. |
185 | * | 185 | * |
186 | * The integer pointed to by pnr_map must be valid on entry (the | 186 | * The integer pointed to by pnr_map must be valid on entry (the |
187 | * current number of valid entries located at biosmap) and will | 187 | * current number of valid entries located at biosmap). If the |
188 | * be updated on return, with the new number of valid entries | 188 | * sanitizing succeeds the *pnr_map will be updated with the new |
189 | * (something no more than max_nr_map.) | 189 | * number of valid entries (something no more than max_nr_map). |
190 | * | 190 | * |
191 | * The return value from sanitize_e820_map() is zero if it | 191 | * The return value from sanitize_e820_map() is zero if it |
192 | * successfully 'sanitized' the map entries passed in, and is -1 | 192 | * successfully 'sanitized' the map entries passed in, and is -1 |
@@ -561,23 +561,15 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |||
561 | 561 | ||
562 | void __init update_e820(void) | 562 | void __init update_e820(void) |
563 | { | 563 | { |
564 | u32 nr_map; | 564 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map)) |
565 | |||
566 | nr_map = e820.nr_map; | ||
567 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map)) | ||
568 | return; | 565 | return; |
569 | e820.nr_map = nr_map; | ||
570 | printk(KERN_INFO "e820: modified physical RAM map:\n"); | 566 | printk(KERN_INFO "e820: modified physical RAM map:\n"); |
571 | e820_print_map("modified"); | 567 | e820_print_map("modified"); |
572 | } | 568 | } |
573 | static void __init update_e820_saved(void) | 569 | static void __init update_e820_saved(void) |
574 | { | 570 | { |
575 | u32 nr_map; | 571 | sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), |
576 | 572 | &e820_saved.nr_map); | |
577 | nr_map = e820_saved.nr_map; | ||
578 | if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) | ||
579 | return; | ||
580 | e820_saved.nr_map = nr_map; | ||
581 | } | 573 | } |
582 | #define MAX_GAP_END 0x100000000ull | 574 | #define MAX_GAP_END 0x100000000ull |
583 | /* | 575 | /* |
@@ -898,11 +890,9 @@ early_param("memmap", parse_memmap_opt); | |||
898 | void __init finish_e820_parsing(void) | 890 | void __init finish_e820_parsing(void) |
899 | { | 891 | { |
900 | if (userdef) { | 892 | if (userdef) { |
901 | u32 nr = e820.nr_map; | 893 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), |
902 | 894 | &e820.nr_map) < 0) | |
903 | if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0) | ||
904 | early_panic("Invalid user supplied memory map"); | 895 | early_panic("Invalid user supplied memory map"); |
905 | e820.nr_map = nr; | ||
906 | 896 | ||
907 | printk(KERN_INFO "e820: user-defined physical RAM map:\n"); | 897 | printk(KERN_INFO "e820: user-defined physical RAM map:\n"); |
908 | e820_print_map("user"); | 898 | e820_print_map("user"); |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9ebaf63ba182..db13655c3a2a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -143,7 +143,8 @@ ENDPROC(native_usergs_sysret64) | |||
143 | movq \tmp,RSP+\offset(%rsp) | 143 | movq \tmp,RSP+\offset(%rsp) |
144 | movq $__USER_DS,SS+\offset(%rsp) | 144 | movq $__USER_DS,SS+\offset(%rsp) |
145 | movq $__USER_CS,CS+\offset(%rsp) | 145 | movq $__USER_CS,CS+\offset(%rsp) |
146 | movq $-1,RCX+\offset(%rsp) | 146 | movq RIP+\offset(%rsp),\tmp /* get rip */ |
147 | movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */ | ||
147 | movq R11+\offset(%rsp),\tmp /* get eflags */ | 148 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
148 | movq \tmp,EFLAGS+\offset(%rsp) | 149 | movq \tmp,EFLAGS+\offset(%rsp) |
149 | .endm | 150 | .endm |
@@ -155,27 +156,6 @@ ENDPROC(native_usergs_sysret64) | |||
155 | movq \tmp,R11+\offset(%rsp) | 156 | movq \tmp,R11+\offset(%rsp) |
156 | .endm | 157 | .endm |
157 | 158 | ||
158 | .macro FAKE_STACK_FRAME child_rip | ||
159 | /* push in order ss, rsp, eflags, cs, rip */ | ||
160 | xorl %eax, %eax | ||
161 | pushq_cfi $__KERNEL_DS /* ss */ | ||
162 | /*CFI_REL_OFFSET ss,0*/ | ||
163 | pushq_cfi %rax /* rsp */ | ||
164 | CFI_REL_OFFSET rsp,0 | ||
165 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */ | ||
166 | /*CFI_REL_OFFSET rflags,0*/ | ||
167 | pushq_cfi $__KERNEL_CS /* cs */ | ||
168 | /*CFI_REL_OFFSET cs,0*/ | ||
169 | pushq_cfi \child_rip /* rip */ | ||
170 | CFI_REL_OFFSET rip,0 | ||
171 | pushq_cfi %rax /* orig rax */ | ||
172 | .endm | ||
173 | |||
174 | .macro UNFAKE_STACK_FRAME | ||
175 | addq $8*6, %rsp | ||
176 | CFI_ADJUST_CFA_OFFSET -(6*8) | ||
177 | .endm | ||
178 | |||
179 | /* | 159 | /* |
180 | * initial frame state for interrupts (and exceptions without error code) | 160 | * initial frame state for interrupts (and exceptions without error code) |
181 | */ | 161 | */ |
@@ -238,51 +218,6 @@ ENDPROC(native_usergs_sysret64) | |||
238 | CFI_REL_OFFSET r15, R15+\offset | 218 | CFI_REL_OFFSET r15, R15+\offset |
239 | .endm | 219 | .endm |
240 | 220 | ||
241 | /* save partial stack frame */ | ||
242 | .macro SAVE_ARGS_IRQ | ||
243 | cld | ||
244 | /* start from rbp in pt_regs and jump over */ | ||
245 | movq_cfi rdi, (RDI-RBP) | ||
246 | movq_cfi rsi, (RSI-RBP) | ||
247 | movq_cfi rdx, (RDX-RBP) | ||
248 | movq_cfi rcx, (RCX-RBP) | ||
249 | movq_cfi rax, (RAX-RBP) | ||
250 | movq_cfi r8, (R8-RBP) | ||
251 | movq_cfi r9, (R9-RBP) | ||
252 | movq_cfi r10, (R10-RBP) | ||
253 | movq_cfi r11, (R11-RBP) | ||
254 | |||
255 | /* Save rbp so that we can unwind from get_irq_regs() */ | ||
256 | movq_cfi rbp, 0 | ||
257 | |||
258 | /* Save previous stack value */ | ||
259 | movq %rsp, %rsi | ||
260 | |||
261 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | ||
262 | testl $3, CS-RBP(%rsi) | ||
263 | je 1f | ||
264 | SWAPGS | ||
265 | /* | ||
266 | * irq_count is used to check if a CPU is already on an interrupt stack | ||
267 | * or not. While this is essentially redundant with preempt_count it is | ||
268 | * a little cheaper to use a separate counter in the PDA (short of | ||
269 | * moving irq_enter into assembly, which would be too much work) | ||
270 | */ | ||
271 | 1: incl PER_CPU_VAR(irq_count) | ||
272 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | ||
273 | CFI_DEF_CFA_REGISTER rsi | ||
274 | |||
275 | /* Store previous stack value */ | ||
276 | pushq %rsi | ||
277 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | ||
278 | 0x77 /* DW_OP_breg7 */, 0, \ | ||
279 | 0x06 /* DW_OP_deref */, \ | ||
280 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | ||
281 | 0x22 /* DW_OP_plus */ | ||
282 | /* We entered an interrupt context - irqs are off: */ | ||
283 | TRACE_IRQS_OFF | ||
284 | .endm | ||
285 | |||
286 | ENTRY(save_paranoid) | 221 | ENTRY(save_paranoid) |
287 | XCPT_FRAME 1 RDI+8 | 222 | XCPT_FRAME 1 RDI+8 |
288 | cld | 223 | cld |
@@ -426,15 +361,12 @@ system_call_fastpath: | |||
426 | * Has incomplete stack frame and undefined top of stack. | 361 | * Has incomplete stack frame and undefined top of stack. |
427 | */ | 362 | */ |
428 | ret_from_sys_call: | 363 | ret_from_sys_call: |
429 | movl $_TIF_ALLWORK_MASK,%edi | 364 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
430 | /* edi: flagmask */ | 365 | jnz int_ret_from_sys_call_fixup /* Go the the slow path */ |
431 | sysret_check: | 366 | |
432 | LOCKDEP_SYS_EXIT | 367 | LOCKDEP_SYS_EXIT |
433 | DISABLE_INTERRUPTS(CLBR_NONE) | 368 | DISABLE_INTERRUPTS(CLBR_NONE) |
434 | TRACE_IRQS_OFF | 369 | TRACE_IRQS_OFF |
435 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx | ||
436 | andl %edi,%edx | ||
437 | jnz sysret_careful | ||
438 | CFI_REMEMBER_STATE | 370 | CFI_REMEMBER_STATE |
439 | /* | 371 | /* |
440 | * sysretq will re-enable interrupts: | 372 | * sysretq will re-enable interrupts: |
@@ -448,49 +380,10 @@ sysret_check: | |||
448 | USERGS_SYSRET64 | 380 | USERGS_SYSRET64 |
449 | 381 | ||
450 | CFI_RESTORE_STATE | 382 | CFI_RESTORE_STATE |
451 | /* Handle reschedules */ | ||
452 | /* edx: work, edi: workmask */ | ||
453 | sysret_careful: | ||
454 | bt $TIF_NEED_RESCHED,%edx | ||
455 | jnc sysret_signal | ||
456 | TRACE_IRQS_ON | ||
457 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
458 | pushq_cfi %rdi | ||
459 | SCHEDULE_USER | ||
460 | popq_cfi %rdi | ||
461 | jmp sysret_check | ||
462 | 383 | ||
463 | /* Handle a signal */ | 384 | int_ret_from_sys_call_fixup: |
464 | sysret_signal: | ||
465 | TRACE_IRQS_ON | ||
466 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
467 | #ifdef CONFIG_AUDITSYSCALL | ||
468 | bt $TIF_SYSCALL_AUDIT,%edx | ||
469 | jc sysret_audit | ||
470 | #endif | ||
471 | /* | ||
472 | * We have a signal, or exit tracing or single-step. | ||
473 | * These all wind up with the iret return path anyway, | ||
474 | * so just join that path right now. | ||
475 | */ | ||
476 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET | 385 | FIXUP_TOP_OF_STACK %r11, -ARGOFFSET |
477 | jmp int_check_syscall_exit_work | 386 | jmp int_ret_from_sys_call |
478 | |||
479 | #ifdef CONFIG_AUDITSYSCALL | ||
480 | /* | ||
481 | * Return fast path for syscall audit. Call __audit_syscall_exit() | ||
482 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | ||
483 | * masked off. | ||
484 | */ | ||
485 | sysret_audit: | ||
486 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | ||
487 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ | ||
488 | setbe %al /* 1 if so, 0 if not */ | ||
489 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
490 | call __audit_syscall_exit | ||
491 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
492 | jmp sysret_check | ||
493 | #endif /* CONFIG_AUDITSYSCALL */ | ||
494 | 387 | ||
495 | /* Do syscall tracing */ | 388 | /* Do syscall tracing */ |
496 | tracesys: | 389 | tracesys: |
@@ -626,19 +519,6 @@ END(\label) | |||
626 | FORK_LIKE vfork | 519 | FORK_LIKE vfork |
627 | FIXED_FRAME stub_iopl, sys_iopl | 520 | FIXED_FRAME stub_iopl, sys_iopl |
628 | 521 | ||
629 | ENTRY(ptregscall_common) | ||
630 | DEFAULT_FRAME 1 8 /* offset 8: return address */ | ||
631 | RESTORE_TOP_OF_STACK %r11, 8 | ||
632 | movq_cfi_restore R15+8, r15 | ||
633 | movq_cfi_restore R14+8, r14 | ||
634 | movq_cfi_restore R13+8, r13 | ||
635 | movq_cfi_restore R12+8, r12 | ||
636 | movq_cfi_restore RBP+8, rbp | ||
637 | movq_cfi_restore RBX+8, rbx | ||
638 | ret $REST_SKIP /* pop extended registers */ | ||
639 | CFI_ENDPROC | ||
640 | END(ptregscall_common) | ||
641 | |||
642 | ENTRY(stub_execve) | 522 | ENTRY(stub_execve) |
643 | CFI_STARTPROC | 523 | CFI_STARTPROC |
644 | addq $8, %rsp | 524 | addq $8, %rsp |
@@ -779,7 +659,48 @@ END(interrupt) | |||
779 | /* reserve pt_regs for scratch regs and rbp */ | 659 | /* reserve pt_regs for scratch regs and rbp */ |
780 | subq $ORIG_RAX-RBP, %rsp | 660 | subq $ORIG_RAX-RBP, %rsp |
781 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | 661 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP |
782 | SAVE_ARGS_IRQ | 662 | cld |
663 | /* start from rbp in pt_regs and jump over */ | ||
664 | movq_cfi rdi, (RDI-RBP) | ||
665 | movq_cfi rsi, (RSI-RBP) | ||
666 | movq_cfi rdx, (RDX-RBP) | ||
667 | movq_cfi rcx, (RCX-RBP) | ||
668 | movq_cfi rax, (RAX-RBP) | ||
669 | movq_cfi r8, (R8-RBP) | ||
670 | movq_cfi r9, (R9-RBP) | ||
671 | movq_cfi r10, (R10-RBP) | ||
672 | movq_cfi r11, (R11-RBP) | ||
673 | |||
674 | /* Save rbp so that we can unwind from get_irq_regs() */ | ||
675 | movq_cfi rbp, 0 | ||
676 | |||
677 | /* Save previous stack value */ | ||
678 | movq %rsp, %rsi | ||
679 | |||
680 | leaq -RBP(%rsp),%rdi /* arg1 for handler */ | ||
681 | testl $3, CS-RBP(%rsi) | ||
682 | je 1f | ||
683 | SWAPGS | ||
684 | /* | ||
685 | * irq_count is used to check if a CPU is already on an interrupt stack | ||
686 | * or not. While this is essentially redundant with preempt_count it is | ||
687 | * a little cheaper to use a separate counter in the PDA (short of | ||
688 | * moving irq_enter into assembly, which would be too much work) | ||
689 | */ | ||
690 | 1: incl PER_CPU_VAR(irq_count) | ||
691 | cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp | ||
692 | CFI_DEF_CFA_REGISTER rsi | ||
693 | |||
694 | /* Store previous stack value */ | ||
695 | pushq %rsi | ||
696 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | ||
697 | 0x77 /* DW_OP_breg7 */, 0, \ | ||
698 | 0x06 /* DW_OP_deref */, \ | ||
699 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | ||
700 | 0x22 /* DW_OP_plus */ | ||
701 | /* We entered an interrupt context - irqs are off: */ | ||
702 | TRACE_IRQS_OFF | ||
703 | |||
783 | call \func | 704 | call \func |
784 | .endm | 705 | .endm |
785 | 706 | ||
@@ -831,6 +752,60 @@ retint_swapgs: /* return to user-space */ | |||
831 | */ | 752 | */ |
832 | DISABLE_INTERRUPTS(CLBR_ANY) | 753 | DISABLE_INTERRUPTS(CLBR_ANY) |
833 | TRACE_IRQS_IRETQ | 754 | TRACE_IRQS_IRETQ |
755 | |||
756 | /* | ||
757 | * Try to use SYSRET instead of IRET if we're returning to | ||
758 | * a completely clean 64-bit userspace context. | ||
759 | */ | ||
760 | movq (RCX-R11)(%rsp), %rcx | ||
761 | cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */ | ||
762 | jne opportunistic_sysret_failed | ||
763 | |||
764 | /* | ||
765 | * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP | ||
766 | * in kernel space. This essentially lets the user take over | ||
767 | * the kernel, since userspace controls RSP. It's not worth | ||
768 | * testing for canonicalness exactly -- this check detects any | ||
769 | * of the 17 high bits set, which is true for non-canonical | ||
770 | * or kernel addresses. (This will pessimize vsyscall=native. | ||
771 | * Big deal.) | ||
772 | * | ||
773 | * If virtual addresses ever become wider, this will need | ||
774 | * to be updated to remain correct on both old and new CPUs. | ||
775 | */ | ||
776 | .ifne __VIRTUAL_MASK_SHIFT - 47 | ||
777 | .error "virtual address width changed -- sysret checks need update" | ||
778 | .endif | ||
779 | shr $__VIRTUAL_MASK_SHIFT, %rcx | ||
780 | jnz opportunistic_sysret_failed | ||
781 | |||
782 | cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */ | ||
783 | jne opportunistic_sysret_failed | ||
784 | |||
785 | movq (R11-ARGOFFSET)(%rsp), %r11 | ||
786 | cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ | ||
787 | jne opportunistic_sysret_failed | ||
788 | |||
789 | testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ | ||
790 | jnz opportunistic_sysret_failed | ||
791 | |||
792 | /* nothing to check for RSP */ | ||
793 | |||
794 | cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */ | ||
795 | jne opportunistic_sysret_failed | ||
796 | |||
797 | /* | ||
798 | * We win! This label is here just for ease of understanding | ||
799 | * perf profiles. Nothing jumps here. | ||
800 | */ | ||
801 | irq_return_via_sysret: | ||
802 | CFI_REMEMBER_STATE | ||
803 | RESTORE_ARGS 1,8,1 | ||
804 | movq (RSP-RIP)(%rsp),%rsp | ||
805 | USERGS_SYSRET64 | ||
806 | CFI_RESTORE_STATE | ||
807 | |||
808 | opportunistic_sysret_failed: | ||
834 | SWAPGS | 809 | SWAPGS |
835 | jmp restore_args | 810 | jmp restore_args |
836 | 811 | ||
@@ -1048,6 +1023,11 @@ ENTRY(\sym) | |||
1048 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1023 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1049 | 1024 | ||
1050 | .if \paranoid | 1025 | .if \paranoid |
1026 | .if \paranoid == 1 | ||
1027 | CFI_REMEMBER_STATE | ||
1028 | testl $3, CS(%rsp) /* If coming from userspace, switch */ | ||
1029 | jnz 1f /* stacks. */ | ||
1030 | .endif | ||
1051 | call save_paranoid | 1031 | call save_paranoid |
1052 | .else | 1032 | .else |
1053 | call error_entry | 1033 | call error_entry |
@@ -1088,6 +1068,36 @@ ENTRY(\sym) | |||
1088 | jmp error_exit /* %ebx: no swapgs flag */ | 1068 | jmp error_exit /* %ebx: no swapgs flag */ |
1089 | .endif | 1069 | .endif |
1090 | 1070 | ||
1071 | .if \paranoid == 1 | ||
1072 | CFI_RESTORE_STATE | ||
1073 | /* | ||
1074 | * Paranoid entry from userspace. Switch stacks and treat it | ||
1075 | * as a normal entry. This means that paranoid handlers | ||
1076 | * run in real process context if user_mode(regs). | ||
1077 | */ | ||
1078 | 1: | ||
1079 | call error_entry | ||
1080 | |||
1081 | DEFAULT_FRAME 0 | ||
1082 | |||
1083 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1084 | call sync_regs | ||
1085 | movq %rax,%rsp /* switch stack */ | ||
1086 | |||
1087 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1088 | |||
1089 | .if \has_error_code | ||
1090 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | ||
1091 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | ||
1092 | .else | ||
1093 | xorl %esi,%esi /* no error code */ | ||
1094 | .endif | ||
1095 | |||
1096 | call \do_sym | ||
1097 | |||
1098 | jmp error_exit /* %ebx: no swapgs flag */ | ||
1099 | .endif | ||
1100 | |||
1091 | CFI_ENDPROC | 1101 | CFI_ENDPROC |
1092 | END(\sym) | 1102 | END(\sym) |
1093 | .endm | 1103 | .endm |
@@ -1108,7 +1118,7 @@ idtentry overflow do_overflow has_error_code=0 | |||
1108 | idtentry bounds do_bounds has_error_code=0 | 1118 | idtentry bounds do_bounds has_error_code=0 |
1109 | idtentry invalid_op do_invalid_op has_error_code=0 | 1119 | idtentry invalid_op do_invalid_op has_error_code=0 |
1110 | idtentry device_not_available do_device_not_available has_error_code=0 | 1120 | idtentry device_not_available do_device_not_available has_error_code=0 |
1111 | idtentry double_fault do_double_fault has_error_code=1 paranoid=1 | 1121 | idtentry double_fault do_double_fault has_error_code=1 paranoid=2 |
1112 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 | 1122 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 |
1113 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 | 1123 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 |
1114 | idtentry segment_not_present do_segment_not_present has_error_code=1 | 1124 | idtentry segment_not_present do_segment_not_present has_error_code=1 |
@@ -1289,16 +1299,14 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector( | |||
1289 | #endif | 1299 | #endif |
1290 | 1300 | ||
1291 | /* | 1301 | /* |
1292 | * "Paranoid" exit path from exception stack. | 1302 | * "Paranoid" exit path from exception stack. This is invoked |
1293 | * Paranoid because this is used by NMIs and cannot take | 1303 | * only on return from non-NMI IST interrupts that came |
1294 | * any kernel state for granted. | 1304 | * from kernel space. |
1295 | * We don't do kernel preemption checks here, because only | ||
1296 | * NMI should be common and it does not enable IRQs and | ||
1297 | * cannot get reschedule ticks. | ||
1298 | * | 1305 | * |
1299 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1306 | * We may be returning to very strange contexts (e.g. very early |
1300 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1307 | * in syscall entry), so checking for preemption here would |
1301 | * hard flags at once, atomically) | 1308 | * be complicated. Fortunately, we there's no good reason |
1309 | * to try to handle preemption here. | ||
1302 | */ | 1310 | */ |
1303 | 1311 | ||
1304 | /* ebx: no swapgs flag */ | 1312 | /* ebx: no swapgs flag */ |
@@ -1308,43 +1316,14 @@ ENTRY(paranoid_exit) | |||
1308 | TRACE_IRQS_OFF_DEBUG | 1316 | TRACE_IRQS_OFF_DEBUG |
1309 | testl %ebx,%ebx /* swapgs needed? */ | 1317 | testl %ebx,%ebx /* swapgs needed? */ |
1310 | jnz paranoid_restore | 1318 | jnz paranoid_restore |
1311 | testl $3,CS(%rsp) | ||
1312 | jnz paranoid_userspace | ||
1313 | paranoid_swapgs: | ||
1314 | TRACE_IRQS_IRETQ 0 | 1319 | TRACE_IRQS_IRETQ 0 |
1315 | SWAPGS_UNSAFE_STACK | 1320 | SWAPGS_UNSAFE_STACK |
1316 | RESTORE_ALL 8 | 1321 | RESTORE_ALL 8 |
1317 | jmp irq_return | 1322 | INTERRUPT_RETURN |
1318 | paranoid_restore: | 1323 | paranoid_restore: |
1319 | TRACE_IRQS_IRETQ_DEBUG 0 | 1324 | TRACE_IRQS_IRETQ_DEBUG 0 |
1320 | RESTORE_ALL 8 | 1325 | RESTORE_ALL 8 |
1321 | jmp irq_return | 1326 | INTERRUPT_RETURN |
1322 | paranoid_userspace: | ||
1323 | GET_THREAD_INFO(%rcx) | ||
1324 | movl TI_flags(%rcx),%ebx | ||
1325 | andl $_TIF_WORK_MASK,%ebx | ||
1326 | jz paranoid_swapgs | ||
1327 | movq %rsp,%rdi /* &pt_regs */ | ||
1328 | call sync_regs | ||
1329 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1330 | testl $_TIF_NEED_RESCHED,%ebx | ||
1331 | jnz paranoid_schedule | ||
1332 | movl %ebx,%edx /* arg3: thread flags */ | ||
1333 | TRACE_IRQS_ON | ||
1334 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1335 | xorl %esi,%esi /* arg2: oldset */ | ||
1336 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1337 | call do_notify_resume | ||
1338 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1339 | TRACE_IRQS_OFF | ||
1340 | jmp paranoid_userspace | ||
1341 | paranoid_schedule: | ||
1342 | TRACE_IRQS_ON | ||
1343 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1344 | SCHEDULE_USER | ||
1345 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1346 | TRACE_IRQS_OFF | ||
1347 | jmp paranoid_userspace | ||
1348 | CFI_ENDPROC | 1327 | CFI_ENDPROC |
1349 | END(paranoid_exit) | 1328 | END(paranoid_exit) |
1350 | 1329 | ||
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 3d5fb509bdeb..7114ba220fd4 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp) | |||
126 | *dr7 |= encode_dr7(i, info->len, info->type); | 126 | *dr7 |= encode_dr7(i, info->len, info->type); |
127 | 127 | ||
128 | set_debugreg(*dr7, 7); | 128 | set_debugreg(*dr7, 7); |
129 | if (info->mask) | ||
130 | set_dr_addr_mask(info->mask, i); | ||
129 | 131 | ||
130 | return 0; | 132 | return 0; |
131 | } | 133 | } |
@@ -161,29 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) | |||
161 | *dr7 &= ~__encode_dr7(i, info->len, info->type); | 163 | *dr7 &= ~__encode_dr7(i, info->len, info->type); |
162 | 164 | ||
163 | set_debugreg(*dr7, 7); | 165 | set_debugreg(*dr7, 7); |
164 | } | 166 | if (info->mask) |
165 | 167 | set_dr_addr_mask(0, i); | |
166 | static int get_hbp_len(u8 hbp_len) | ||
167 | { | ||
168 | unsigned int len_in_bytes = 0; | ||
169 | |||
170 | switch (hbp_len) { | ||
171 | case X86_BREAKPOINT_LEN_1: | ||
172 | len_in_bytes = 1; | ||
173 | break; | ||
174 | case X86_BREAKPOINT_LEN_2: | ||
175 | len_in_bytes = 2; | ||
176 | break; | ||
177 | case X86_BREAKPOINT_LEN_4: | ||
178 | len_in_bytes = 4; | ||
179 | break; | ||
180 | #ifdef CONFIG_X86_64 | ||
181 | case X86_BREAKPOINT_LEN_8: | ||
182 | len_in_bytes = 8; | ||
183 | break; | ||
184 | #endif | ||
185 | } | ||
186 | return len_in_bytes; | ||
187 | } | 168 | } |
188 | 169 | ||
189 | /* | 170 | /* |
@@ -196,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) | |||
196 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | 177 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
197 | 178 | ||
198 | va = info->address; | 179 | va = info->address; |
199 | len = get_hbp_len(info->len); | 180 | len = bp->attr.bp_len; |
200 | 181 | ||
201 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 182 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
202 | } | 183 | } |
@@ -277,6 +258,8 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
277 | } | 258 | } |
278 | 259 | ||
279 | /* Len */ | 260 | /* Len */ |
261 | info->mask = 0; | ||
262 | |||
280 | switch (bp->attr.bp_len) { | 263 | switch (bp->attr.bp_len) { |
281 | case HW_BREAKPOINT_LEN_1: | 264 | case HW_BREAKPOINT_LEN_1: |
282 | info->len = X86_BREAKPOINT_LEN_1; | 265 | info->len = X86_BREAKPOINT_LEN_1; |
@@ -293,11 +276,17 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
293 | break; | 276 | break; |
294 | #endif | 277 | #endif |
295 | default: | 278 | default: |
296 | return -EINVAL; | 279 | if (!is_power_of_2(bp->attr.bp_len)) |
280 | return -EINVAL; | ||
281 | if (!cpu_has_bpext) | ||
282 | return -EOPNOTSUPP; | ||
283 | info->mask = bp->attr.bp_len - 1; | ||
284 | info->len = X86_BREAKPOINT_LEN_1; | ||
297 | } | 285 | } |
298 | 286 | ||
299 | return 0; | 287 | return 0; |
300 | } | 288 | } |
289 | |||
301 | /* | 290 | /* |
302 | * Validate the arch-specific HW Breakpoint register settings | 291 | * Validate the arch-specific HW Breakpoint register settings |
303 | */ | 292 | */ |
@@ -312,11 +301,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
312 | if (ret) | 301 | if (ret) |
313 | return ret; | 302 | return ret; |
314 | 303 | ||
315 | ret = -EINVAL; | ||
316 | |||
317 | switch (info->len) { | 304 | switch (info->len) { |
318 | case X86_BREAKPOINT_LEN_1: | 305 | case X86_BREAKPOINT_LEN_1: |
319 | align = 0; | 306 | align = 0; |
307 | if (info->mask) | ||
308 | align = info->mask; | ||
320 | break; | 309 | break; |
321 | case X86_BREAKPOINT_LEN_2: | 310 | case X86_BREAKPOINT_LEN_2: |
322 | align = 1; | 311 | align = 1; |
@@ -330,7 +319,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
330 | break; | 319 | break; |
331 | #endif | 320 | #endif |
332 | default: | 321 | default: |
333 | return ret; | 322 | WARN_ON_ONCE(1); |
334 | } | 323 | } |
335 | 324 | ||
336 | /* | 325 | /* |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index a9a4229f6161..81049ffab2d6 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -19,6 +19,19 @@ | |||
19 | #include <asm/fpu-internal.h> | 19 | #include <asm/fpu-internal.h> |
20 | #include <asm/user.h> | 20 | #include <asm/user.h> |
21 | 21 | ||
22 | static DEFINE_PER_CPU(bool, in_kernel_fpu); | ||
23 | |||
24 | void kernel_fpu_disable(void) | ||
25 | { | ||
26 | WARN_ON(this_cpu_read(in_kernel_fpu)); | ||
27 | this_cpu_write(in_kernel_fpu, true); | ||
28 | } | ||
29 | |||
30 | void kernel_fpu_enable(void) | ||
31 | { | ||
32 | this_cpu_write(in_kernel_fpu, false); | ||
33 | } | ||
34 | |||
22 | /* | 35 | /* |
23 | * Were we in an interrupt that interrupted kernel mode? | 36 | * Were we in an interrupt that interrupted kernel mode? |
24 | * | 37 | * |
@@ -33,6 +46,9 @@ | |||
33 | */ | 46 | */ |
34 | static inline bool interrupted_kernel_fpu_idle(void) | 47 | static inline bool interrupted_kernel_fpu_idle(void) |
35 | { | 48 | { |
49 | if (this_cpu_read(in_kernel_fpu)) | ||
50 | return false; | ||
51 | |||
36 | if (use_eager_fpu()) | 52 | if (use_eager_fpu()) |
37 | return __thread_has_fpu(current); | 53 | return __thread_has_fpu(current); |
38 | 54 | ||
@@ -73,10 +89,10 @@ void __kernel_fpu_begin(void) | |||
73 | { | 89 | { |
74 | struct task_struct *me = current; | 90 | struct task_struct *me = current; |
75 | 91 | ||
92 | this_cpu_write(in_kernel_fpu, true); | ||
93 | |||
76 | if (__thread_has_fpu(me)) { | 94 | if (__thread_has_fpu(me)) { |
77 | __thread_clear_has_fpu(me); | ||
78 | __save_init_fpu(me); | 95 | __save_init_fpu(me); |
79 | /* We do 'stts()' in __kernel_fpu_end() */ | ||
80 | } else if (!use_eager_fpu()) { | 96 | } else if (!use_eager_fpu()) { |
81 | this_cpu_write(fpu_owner_task, NULL); | 97 | this_cpu_write(fpu_owner_task, NULL); |
82 | clts(); | 98 | clts(); |
@@ -86,19 +102,16 @@ EXPORT_SYMBOL(__kernel_fpu_begin); | |||
86 | 102 | ||
87 | void __kernel_fpu_end(void) | 103 | void __kernel_fpu_end(void) |
88 | { | 104 | { |
89 | if (use_eager_fpu()) { | 105 | struct task_struct *me = current; |
90 | /* | 106 | |
91 | * For eager fpu, most the time, tsk_used_math() is true. | 107 | if (__thread_has_fpu(me)) { |
92 | * Restore the user math as we are done with the kernel usage. | 108 | if (WARN_ON(restore_fpu_checking(me))) |
93 | * At few instances during thread exit, signal handling etc, | 109 | drop_init_fpu(me); |
94 | * tsk_used_math() is false. Those few places will take proper | 110 | } else if (!use_eager_fpu()) { |
95 | * actions, so we don't need to restore the math here. | ||
96 | */ | ||
97 | if (likely(tsk_used_math(current))) | ||
98 | math_state_restore(); | ||
99 | } else { | ||
100 | stts(); | 111 | stts(); |
101 | } | 112 | } |
113 | |||
114 | this_cpu_write(in_kernel_fpu, false); | ||
102 | } | 115 | } |
103 | EXPORT_SYMBOL(__kernel_fpu_end); | 116 | EXPORT_SYMBOL(__kernel_fpu_end); |
104 | 117 | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 63ce838e5a54..28d28f5eb8f4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack) | |||
69 | : "memory", "cc", "edx", "ecx", "eax"); | 69 | : "memory", "cc", "edx", "ecx", "eax"); |
70 | } | 70 | } |
71 | 71 | ||
72 | /* how to get the current stack pointer from C */ | ||
73 | #define current_stack_pointer ({ \ | ||
74 | unsigned long sp; \ | ||
75 | asm("mov %%esp,%0" : "=g" (sp)); \ | ||
76 | sp; \ | ||
77 | }) | ||
78 | |||
79 | static inline void *current_stack(void) | 72 | static inline void *current_stack(void) |
80 | { | 73 | { |
81 | return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); | 74 | return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); |
82 | } | 75 | } |
83 | 76 | ||
84 | static inline int | 77 | static inline int |
@@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
103 | 96 | ||
104 | /* Save the next esp at the bottom of the stack */ | 97 | /* Save the next esp at the bottom of the stack */ |
105 | prev_esp = (u32 *)irqstk; | 98 | prev_esp = (u32 *)irqstk; |
106 | *prev_esp = current_stack_pointer; | 99 | *prev_esp = current_stack_pointer(); |
107 | 100 | ||
108 | if (unlikely(overflow)) | 101 | if (unlikely(overflow)) |
109 | call_on_stack(print_stack_overflow, isp); | 102 | call_on_stack(print_stack_overflow, isp); |
@@ -156,7 +149,7 @@ void do_softirq_own_stack(void) | |||
156 | 149 | ||
157 | /* Push the previous esp onto the stack */ | 150 | /* Push the previous esp onto the stack */ |
158 | prev_esp = (u32 *)irqstk; | 151 | prev_esp = (u32 *)irqstk; |
159 | *prev_esp = current_stack_pointer; | 152 | *prev_esp = current_stack_pointer(); |
160 | 153 | ||
161 | call_on_stack(__do_softirq, isp); | 154 | call_on_stack(__do_softirq, isp); |
162 | } | 155 | } |
diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c index 0ee5025e0fa4..d66a4fe6caee 100644 --- a/arch/x86/kernel/pmc_atom.c +++ b/arch/x86/kernel/pmc_atom.c | |||
@@ -25,8 +25,6 @@ | |||
25 | 25 | ||
26 | #include <asm/pmc_atom.h> | 26 | #include <asm/pmc_atom.h> |
27 | 27 | ||
28 | #define DRIVER_NAME KBUILD_MODNAME | ||
29 | |||
30 | struct pmc_dev { | 28 | struct pmc_dev { |
31 | u32 base_addr; | 29 | u32 base_addr; |
32 | void __iomem *regmap; | 30 | void __iomem *regmap; |
@@ -38,12 +36,12 @@ struct pmc_dev { | |||
38 | static struct pmc_dev pmc_device; | 36 | static struct pmc_dev pmc_device; |
39 | static u32 acpi_base_addr; | 37 | static u32 acpi_base_addr; |
40 | 38 | ||
41 | struct pmc_dev_map { | 39 | struct pmc_bit_map { |
42 | const char *name; | 40 | const char *name; |
43 | u32 bit_mask; | 41 | u32 bit_mask; |
44 | }; | 42 | }; |
45 | 43 | ||
46 | static const struct pmc_dev_map dev_map[] = { | 44 | static const struct pmc_bit_map dev_map[] = { |
47 | {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, | 45 | {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, |
48 | {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, | 46 | {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, |
49 | {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, | 47 | {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, |
@@ -82,6 +80,27 @@ static const struct pmc_dev_map dev_map[] = { | |||
82 | {"35 - DFX", BIT_DFX}, | 80 | {"35 - DFX", BIT_DFX}, |
83 | }; | 81 | }; |
84 | 82 | ||
83 | static const struct pmc_bit_map pss_map[] = { | ||
84 | {"0 - GBE", PMC_PSS_BIT_GBE}, | ||
85 | {"1 - SATA", PMC_PSS_BIT_SATA}, | ||
86 | {"2 - HDA", PMC_PSS_BIT_HDA}, | ||
87 | {"3 - SEC", PMC_PSS_BIT_SEC}, | ||
88 | {"4 - PCIE", PMC_PSS_BIT_PCIE}, | ||
89 | {"5 - LPSS", PMC_PSS_BIT_LPSS}, | ||
90 | {"6 - LPE", PMC_PSS_BIT_LPE}, | ||
91 | {"7 - DFX", PMC_PSS_BIT_DFX}, | ||
92 | {"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL}, | ||
93 | {"9 - USH_SUS", PMC_PSS_BIT_USH_SUS}, | ||
94 | {"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS}, | ||
95 | {"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA}, | ||
96 | {"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL}, | ||
97 | {"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS}, | ||
98 | {"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK}, | ||
99 | {"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA}, | ||
100 | {"16 - USB", PMC_PSS_BIT_USB}, | ||
101 | {"17 - USB_SUS", PMC_PSS_BIT_USB_SUS}, | ||
102 | }; | ||
103 | |||
85 | static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) | 104 | static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) |
86 | { | 105 | { |
87 | return readl(pmc->regmap + reg_offset); | 106 | return readl(pmc->regmap + reg_offset); |
@@ -169,6 +188,32 @@ static const struct file_operations pmc_dev_state_ops = { | |||
169 | .release = single_release, | 188 | .release = single_release, |
170 | }; | 189 | }; |
171 | 190 | ||
191 | static int pmc_pss_state_show(struct seq_file *s, void *unused) | ||
192 | { | ||
193 | struct pmc_dev *pmc = s->private; | ||
194 | u32 pss = pmc_reg_read(pmc, PMC_PSS); | ||
195 | int pss_index; | ||
196 | |||
197 | for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) { | ||
198 | seq_printf(s, "Island: %-32s\tState: %s\n", | ||
199 | pss_map[pss_index].name, | ||
200 | pss_map[pss_index].bit_mask & pss ? "Off" : "On"); | ||
201 | } | ||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | static int pmc_pss_state_open(struct inode *inode, struct file *file) | ||
206 | { | ||
207 | return single_open(file, pmc_pss_state_show, inode->i_private); | ||
208 | } | ||
209 | |||
210 | static const struct file_operations pmc_pss_state_ops = { | ||
211 | .open = pmc_pss_state_open, | ||
212 | .read = seq_read, | ||
213 | .llseek = seq_lseek, | ||
214 | .release = single_release, | ||
215 | }; | ||
216 | |||
172 | static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) | 217 | static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) |
173 | { | 218 | { |
174 | struct pmc_dev *pmc = s->private; | 219 | struct pmc_dev *pmc = s->private; |
@@ -202,11 +247,7 @@ static const struct file_operations pmc_sleep_tmr_ops = { | |||
202 | 247 | ||
203 | static void pmc_dbgfs_unregister(struct pmc_dev *pmc) | 248 | static void pmc_dbgfs_unregister(struct pmc_dev *pmc) |
204 | { | 249 | { |
205 | if (!pmc->dbgfs_dir) | ||
206 | return; | ||
207 | |||
208 | debugfs_remove_recursive(pmc->dbgfs_dir); | 250 | debugfs_remove_recursive(pmc->dbgfs_dir); |
209 | pmc->dbgfs_dir = NULL; | ||
210 | } | 251 | } |
211 | 252 | ||
212 | static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) | 253 | static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) |
@@ -217,19 +258,29 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) | |||
217 | if (!dir) | 258 | if (!dir) |
218 | return -ENOMEM; | 259 | return -ENOMEM; |
219 | 260 | ||
261 | pmc->dbgfs_dir = dir; | ||
262 | |||
220 | f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, | 263 | f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, |
221 | dir, pmc, &pmc_dev_state_ops); | 264 | dir, pmc, &pmc_dev_state_ops); |
222 | if (!f) { | 265 | if (!f) { |
223 | dev_err(&pdev->dev, "dev_states register failed\n"); | 266 | dev_err(&pdev->dev, "dev_state register failed\n"); |
224 | goto err; | 267 | goto err; |
225 | } | 268 | } |
269 | |||
270 | f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO, | ||
271 | dir, pmc, &pmc_pss_state_ops); | ||
272 | if (!f) { | ||
273 | dev_err(&pdev->dev, "pss_state register failed\n"); | ||
274 | goto err; | ||
275 | } | ||
276 | |||
226 | f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, | 277 | f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, |
227 | dir, pmc, &pmc_sleep_tmr_ops); | 278 | dir, pmc, &pmc_sleep_tmr_ops); |
228 | if (!f) { | 279 | if (!f) { |
229 | dev_err(&pdev->dev, "sleep_state register failed\n"); | 280 | dev_err(&pdev->dev, "sleep_state register failed\n"); |
230 | goto err; | 281 | goto err; |
231 | } | 282 | } |
232 | pmc->dbgfs_dir = dir; | 283 | |
233 | return 0; | 284 | return 0; |
234 | err: | 285 | err: |
235 | pmc_dbgfs_unregister(pmc); | 286 | pmc_dbgfs_unregister(pmc); |
@@ -292,7 +343,6 @@ MODULE_DEVICE_TABLE(pci, pmc_pci_ids); | |||
292 | 343 | ||
293 | static int __init pmc_atom_init(void) | 344 | static int __init pmc_atom_init(void) |
294 | { | 345 | { |
295 | int err = -ENODEV; | ||
296 | struct pci_dev *pdev = NULL; | 346 | struct pci_dev *pdev = NULL; |
297 | const struct pci_device_id *ent; | 347 | const struct pci_device_id *ent; |
298 | 348 | ||
@@ -306,14 +356,11 @@ static int __init pmc_atom_init(void) | |||
306 | */ | 356 | */ |
307 | for_each_pci_dev(pdev) { | 357 | for_each_pci_dev(pdev) { |
308 | ent = pci_match_id(pmc_pci_ids, pdev); | 358 | ent = pci_match_id(pmc_pci_ids, pdev); |
309 | if (ent) { | 359 | if (ent) |
310 | err = pmc_setup_dev(pdev); | 360 | return pmc_setup_dev(pdev); |
311 | goto out; | ||
312 | } | ||
313 | } | 361 | } |
314 | /* Device not found. */ | 362 | /* Device not found. */ |
315 | out: | 363 | return -ENODEV; |
316 | return err; | ||
317 | } | 364 | } |
318 | 365 | ||
319 | module_init(pmc_atom_init); | 366 | module_init(pmc_atom_init); |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index ca9622a25e95..fe3dbfe0c4a5 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -170,7 +170,7 @@ static struct platform_device rtc_device = { | |||
170 | static __init int add_rtc_cmos(void) | 170 | static __init int add_rtc_cmos(void) |
171 | { | 171 | { |
172 | #ifdef CONFIG_PNP | 172 | #ifdef CONFIG_PNP |
173 | static const char * const const ids[] __initconst = | 173 | static const char * const ids[] __initconst = |
174 | { "PNP0b00", "PNP0b01", "PNP0b02", }; | 174 | { "PNP0b00", "PNP0b01", "PNP0b02", }; |
175 | struct pnp_dev *dev; | 175 | struct pnp_dev *dev; |
176 | struct pnp_id *id; | 176 | struct pnp_id *id; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16b6043cb073..0d8071d7addb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -438,15 +438,13 @@ static void __init parse_setup_data(void) | |||
438 | 438 | ||
439 | pa_data = boot_params.hdr.setup_data; | 439 | pa_data = boot_params.hdr.setup_data; |
440 | while (pa_data) { | 440 | while (pa_data) { |
441 | u32 data_len, map_len, data_type; | 441 | u32 data_len, data_type; |
442 | 442 | ||
443 | map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), | 443 | data = early_memremap(pa_data, sizeof(*data)); |
444 | (u64)sizeof(struct setup_data)); | ||
445 | data = early_memremap(pa_data, map_len); | ||
446 | data_len = data->len + sizeof(struct setup_data); | 444 | data_len = data->len + sizeof(struct setup_data); |
447 | data_type = data->type; | 445 | data_type = data->type; |
448 | pa_next = data->next; | 446 | pa_next = data->next; |
449 | early_iounmap(data, map_len); | 447 | early_iounmap(data, sizeof(*data)); |
450 | 448 | ||
451 | switch (data_type) { | 449 | switch (data_type) { |
452 | case SETUP_E820_EXT: | 450 | case SETUP_E820_EXT: |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ed37a768d0fc..2a33c8f68319 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
740 | { | 740 | { |
741 | user_exit(); | 741 | user_exit(); |
742 | 742 | ||
743 | #ifdef CONFIG_X86_MCE | ||
744 | /* notify userspace of pending MCEs */ | ||
745 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
746 | mce_notify_process(); | ||
747 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
748 | |||
749 | if (thread_info_flags & _TIF_UPROBE) | 743 | if (thread_info_flags & _TIF_UPROBE) |
750 | uprobe_notify_resume(regs); | 744 | uprobe_notify_resume(regs); |
751 | 745 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6d7022c683e3..febc6aabc72e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -73,7 +73,6 @@ | |||
73 | #include <asm/setup.h> | 73 | #include <asm/setup.h> |
74 | #include <asm/uv/uv.h> | 74 | #include <asm/uv/uv.h> |
75 | #include <linux/mc146818rtc.h> | 75 | #include <linux/mc146818rtc.h> |
76 | #include <asm/smpboot_hooks.h> | ||
77 | #include <asm/i8259.h> | 76 | #include <asm/i8259.h> |
78 | #include <asm/realmode.h> | 77 | #include <asm/realmode.h> |
79 | #include <asm/misc.h> | 78 | #include <asm/misc.h> |
@@ -104,6 +103,43 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
104 | 103 | ||
105 | atomic_t init_deasserted; | 104 | atomic_t init_deasserted; |
106 | 105 | ||
106 | static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) | ||
107 | { | ||
108 | unsigned long flags; | ||
109 | |||
110 | spin_lock_irqsave(&rtc_lock, flags); | ||
111 | CMOS_WRITE(0xa, 0xf); | ||
112 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
113 | local_flush_tlb(); | ||
114 | pr_debug("1.\n"); | ||
115 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = | ||
116 | start_eip >> 4; | ||
117 | pr_debug("2.\n"); | ||
118 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = | ||
119 | start_eip & 0xf; | ||
120 | pr_debug("3.\n"); | ||
121 | } | ||
122 | |||
123 | static inline void smpboot_restore_warm_reset_vector(void) | ||
124 | { | ||
125 | unsigned long flags; | ||
126 | |||
127 | /* | ||
128 | * Install writable page 0 entry to set BIOS data area. | ||
129 | */ | ||
130 | local_flush_tlb(); | ||
131 | |||
132 | /* | ||
133 | * Paranoid: Set warm reset code and vector here back | ||
134 | * to default values. | ||
135 | */ | ||
136 | spin_lock_irqsave(&rtc_lock, flags); | ||
137 | CMOS_WRITE(0, 0xf); | ||
138 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
139 | |||
140 | *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; | ||
141 | } | ||
142 | |||
107 | /* | 143 | /* |
108 | * Report back to the Boot Processor during boot time or to the caller processor | 144 | * Report back to the Boot Processor during boot time or to the caller processor |
109 | * during CPU online. | 145 | * during CPU online. |
@@ -136,8 +172,7 @@ static void smp_callin(void) | |||
136 | * CPU, first the APIC. (this is probably redundant on most | 172 | * CPU, first the APIC. (this is probably redundant on most |
137 | * boards) | 173 | * boards) |
138 | */ | 174 | */ |
139 | setup_local_APIC(); | 175 | apic_ap_setup(); |
140 | end_local_APIC_setup(); | ||
141 | 176 | ||
142 | /* | 177 | /* |
143 | * Need to setup vector mappings before we enable interrupts. | 178 | * Need to setup vector mappings before we enable interrupts. |
@@ -955,9 +990,12 @@ void arch_disable_smp_support(void) | |||
955 | */ | 990 | */ |
956 | static __init void disable_smp(void) | 991 | static __init void disable_smp(void) |
957 | { | 992 | { |
993 | pr_info("SMP disabled\n"); | ||
994 | |||
995 | disable_ioapic_support(); | ||
996 | |||
958 | init_cpu_present(cpumask_of(0)); | 997 | init_cpu_present(cpumask_of(0)); |
959 | init_cpu_possible(cpumask_of(0)); | 998 | init_cpu_possible(cpumask_of(0)); |
960 | smpboot_clear_io_apic_irqs(); | ||
961 | 999 | ||
962 | if (smp_found_config) | 1000 | if (smp_found_config) |
963 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | 1001 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); |
@@ -967,6 +1005,13 @@ static __init void disable_smp(void) | |||
967 | cpumask_set_cpu(0, cpu_core_mask(0)); | 1005 | cpumask_set_cpu(0, cpu_core_mask(0)); |
968 | } | 1006 | } |
969 | 1007 | ||
1008 | enum { | ||
1009 | SMP_OK, | ||
1010 | SMP_NO_CONFIG, | ||
1011 | SMP_NO_APIC, | ||
1012 | SMP_FORCE_UP, | ||
1013 | }; | ||
1014 | |||
970 | /* | 1015 | /* |
971 | * Various sanity checks. | 1016 | * Various sanity checks. |
972 | */ | 1017 | */ |
@@ -1014,10 +1059,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1014 | if (!smp_found_config && !acpi_lapic) { | 1059 | if (!smp_found_config && !acpi_lapic) { |
1015 | preempt_enable(); | 1060 | preempt_enable(); |
1016 | pr_notice("SMP motherboard not detected\n"); | 1061 | pr_notice("SMP motherboard not detected\n"); |
1017 | disable_smp(); | 1062 | return SMP_NO_CONFIG; |
1018 | if (APIC_init_uniprocessor()) | ||
1019 | pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); | ||
1020 | return -1; | ||
1021 | } | 1063 | } |
1022 | 1064 | ||
1023 | /* | 1065 | /* |
@@ -1041,9 +1083,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1041 | boot_cpu_physical_apicid); | 1083 | boot_cpu_physical_apicid); |
1042 | pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); | 1084 | pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); |
1043 | } | 1085 | } |
1044 | smpboot_clear_io_apic(); | 1086 | return SMP_NO_APIC; |
1045 | disable_ioapic_support(); | ||
1046 | return -1; | ||
1047 | } | 1087 | } |
1048 | 1088 | ||
1049 | verify_local_APIC(); | 1089 | verify_local_APIC(); |
@@ -1053,15 +1093,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1053 | */ | 1093 | */ |
1054 | if (!max_cpus) { | 1094 | if (!max_cpus) { |
1055 | pr_info("SMP mode deactivated\n"); | 1095 | pr_info("SMP mode deactivated\n"); |
1056 | smpboot_clear_io_apic(); | 1096 | return SMP_FORCE_UP; |
1057 | |||
1058 | connect_bsp_APIC(); | ||
1059 | setup_local_APIC(); | ||
1060 | bsp_end_local_APIC_setup(); | ||
1061 | return -1; | ||
1062 | } | 1097 | } |
1063 | 1098 | ||
1064 | return 0; | 1099 | return SMP_OK; |
1065 | } | 1100 | } |
1066 | 1101 | ||
1067 | static void __init smp_cpu_index_default(void) | 1102 | static void __init smp_cpu_index_default(void) |
@@ -1101,10 +1136,21 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1101 | } | 1136 | } |
1102 | set_cpu_sibling_map(0); | 1137 | set_cpu_sibling_map(0); |
1103 | 1138 | ||
1104 | if (smp_sanity_check(max_cpus) < 0) { | 1139 | switch (smp_sanity_check(max_cpus)) { |
1105 | pr_info("SMP disabled\n"); | 1140 | case SMP_NO_CONFIG: |
1106 | disable_smp(); | 1141 | disable_smp(); |
1142 | if (APIC_init_uniprocessor()) | ||
1143 | pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); | ||
1107 | return; | 1144 | return; |
1145 | case SMP_NO_APIC: | ||
1146 | disable_smp(); | ||
1147 | return; | ||
1148 | case SMP_FORCE_UP: | ||
1149 | disable_smp(); | ||
1150 | apic_bsp_setup(false); | ||
1151 | return; | ||
1152 | case SMP_OK: | ||
1153 | break; | ||
1108 | } | 1154 | } |
1109 | 1155 | ||
1110 | default_setup_apic_routing(); | 1156 | default_setup_apic_routing(); |
@@ -1115,33 +1161,10 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1115 | /* Or can we switch back to PIC here? */ | 1161 | /* Or can we switch back to PIC here? */ |
1116 | } | 1162 | } |
1117 | 1163 | ||
1118 | connect_bsp_APIC(); | 1164 | cpu0_logical_apicid = apic_bsp_setup(false); |
1119 | |||
1120 | /* | ||
1121 | * Switch from PIC to APIC mode. | ||
1122 | */ | ||
1123 | setup_local_APIC(); | ||
1124 | |||
1125 | if (x2apic_mode) | ||
1126 | cpu0_logical_apicid = apic_read(APIC_LDR); | ||
1127 | else | ||
1128 | cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); | ||
1129 | |||
1130 | /* | ||
1131 | * Enable IO APIC before setting up error vector | ||
1132 | */ | ||
1133 | if (!skip_ioapic_setup && nr_ioapics) | ||
1134 | enable_IO_APIC(); | ||
1135 | |||
1136 | bsp_end_local_APIC_setup(); | ||
1137 | smpboot_setup_io_apic(); | ||
1138 | /* | ||
1139 | * Set up local APIC timer on boot CPU. | ||
1140 | */ | ||
1141 | 1165 | ||
1142 | pr_info("CPU%d: ", 0); | 1166 | pr_info("CPU%d: ", 0); |
1143 | print_cpu_info(&cpu_data(0)); | 1167 | print_cpu_info(&cpu_data(0)); |
1144 | x86_init.timers.setup_percpu_clockev(); | ||
1145 | 1168 | ||
1146 | if (is_uv_system()) | 1169 | if (is_uv_system()) |
1147 | uv_system_init(); | 1170 | uv_system_init(); |
@@ -1177,9 +1200,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1177 | 1200 | ||
1178 | nmi_selftest(); | 1201 | nmi_selftest(); |
1179 | impress_friends(); | 1202 | impress_friends(); |
1180 | #ifdef CONFIG_X86_IO_APIC | ||
1181 | setup_ioapic_dest(); | 1203 | setup_ioapic_dest(); |
1182 | #endif | ||
1183 | mtrr_aps_init(); | 1204 | mtrr_aps_init(); |
1184 | } | 1205 | } |
1185 | 1206 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88900e288021..9d2073e2ecc9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -108,6 +108,88 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
108 | preempt_count_dec(); | 108 | preempt_count_dec(); |
109 | } | 109 | } |
110 | 110 | ||
111 | enum ctx_state ist_enter(struct pt_regs *regs) | ||
112 | { | ||
113 | enum ctx_state prev_state; | ||
114 | |||
115 | if (user_mode_vm(regs)) { | ||
116 | /* Other than that, we're just an exception. */ | ||
117 | prev_state = exception_enter(); | ||
118 | } else { | ||
119 | /* | ||
120 | * We might have interrupted pretty much anything. In | ||
121 | * fact, if we're a machine check, we can even interrupt | ||
122 | * NMI processing. We don't want in_nmi() to return true, | ||
123 | * but we need to notify RCU. | ||
124 | */ | ||
125 | rcu_nmi_enter(); | ||
126 | prev_state = IN_KERNEL; /* the value is irrelevant. */ | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * We are atomic because we're on the IST stack (or we're on x86_32, | ||
131 | * in which case we still shouldn't schedule). | ||
132 | * | ||
133 | * This must be after exception_enter(), because exception_enter() | ||
134 | * won't do anything if in_interrupt() returns true. | ||
135 | */ | ||
136 | preempt_count_add(HARDIRQ_OFFSET); | ||
137 | |||
138 | /* This code is a bit fragile. Test it. */ | ||
139 | rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); | ||
140 | |||
141 | return prev_state; | ||
142 | } | ||
143 | |||
144 | void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) | ||
145 | { | ||
146 | /* Must be before exception_exit. */ | ||
147 | preempt_count_sub(HARDIRQ_OFFSET); | ||
148 | |||
149 | if (user_mode_vm(regs)) | ||
150 | return exception_exit(prev_state); | ||
151 | else | ||
152 | rcu_nmi_exit(); | ||
153 | } | ||
154 | |||
155 | /** | ||
156 | * ist_begin_non_atomic() - begin a non-atomic section in an IST exception | ||
157 | * @regs: regs passed to the IST exception handler | ||
158 | * | ||
159 | * IST exception handlers normally cannot schedule. As a special | ||
160 | * exception, if the exception interrupted userspace code (i.e. | ||
161 | * user_mode_vm(regs) would return true) and the exception was not | ||
162 | * a double fault, it can be safe to schedule. ist_begin_non_atomic() | ||
163 | * begins a non-atomic section within an ist_enter()/ist_exit() region. | ||
164 | * Callers are responsible for enabling interrupts themselves inside | ||
165 | * the non-atomic section, and callers must call is_end_non_atomic() | ||
166 | * before ist_exit(). | ||
167 | */ | ||
168 | void ist_begin_non_atomic(struct pt_regs *regs) | ||
169 | { | ||
170 | BUG_ON(!user_mode_vm(regs)); | ||
171 | |||
172 | /* | ||
173 | * Sanity check: we need to be on the normal thread stack. This | ||
174 | * will catch asm bugs and any attempt to use ist_preempt_enable | ||
175 | * from double_fault. | ||
176 | */ | ||
177 | BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack)) | ||
178 | & ~(THREAD_SIZE - 1)) != 0); | ||
179 | |||
180 | preempt_count_sub(HARDIRQ_OFFSET); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * ist_end_non_atomic() - begin a non-atomic section in an IST exception | ||
185 | * | ||
186 | * Ends a non-atomic section started with ist_begin_non_atomic(). | ||
187 | */ | ||
188 | void ist_end_non_atomic(void) | ||
189 | { | ||
190 | preempt_count_add(HARDIRQ_OFFSET); | ||
191 | } | ||
192 | |||
111 | static nokprobe_inline int | 193 | static nokprobe_inline int |
112 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, | 194 | do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, |
113 | struct pt_regs *regs, long error_code) | 195 | struct pt_regs *regs, long error_code) |
@@ -251,6 +333,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
251 | * end up promoting it to a doublefault. In that case, modify | 333 | * end up promoting it to a doublefault. In that case, modify |
252 | * the stack to make it look like we just entered the #GP | 334 | * the stack to make it look like we just entered the #GP |
253 | * handler from user space, similar to bad_iret. | 335 | * handler from user space, similar to bad_iret. |
336 | * | ||
337 | * No need for ist_enter here because we don't use RCU. | ||
254 | */ | 338 | */ |
255 | if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && | 339 | if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && |
256 | regs->cs == __KERNEL_CS && | 340 | regs->cs == __KERNEL_CS && |
@@ -263,12 +347,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
263 | normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ | 347 | normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ |
264 | regs->ip = (unsigned long)general_protection; | 348 | regs->ip = (unsigned long)general_protection; |
265 | regs->sp = (unsigned long)&normal_regs->orig_ax; | 349 | regs->sp = (unsigned long)&normal_regs->orig_ax; |
350 | |||
266 | return; | 351 | return; |
267 | } | 352 | } |
268 | #endif | 353 | #endif |
269 | 354 | ||
270 | exception_enter(); | 355 | ist_enter(regs); /* Discard prev_state because we won't return. */ |
271 | /* Return not checked because double check cannot be ignored */ | ||
272 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 356 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
273 | 357 | ||
274 | tsk->thread.error_code = error_code; | 358 | tsk->thread.error_code = error_code; |
@@ -434,7 +518,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | |||
434 | if (poke_int3_handler(regs)) | 518 | if (poke_int3_handler(regs)) |
435 | return; | 519 | return; |
436 | 520 | ||
437 | prev_state = exception_enter(); | 521 | prev_state = ist_enter(regs); |
438 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 522 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
439 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 523 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
440 | SIGTRAP) == NOTIFY_STOP) | 524 | SIGTRAP) == NOTIFY_STOP) |
@@ -460,33 +544,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) | |||
460 | preempt_conditional_cli(regs); | 544 | preempt_conditional_cli(regs); |
461 | debug_stack_usage_dec(); | 545 | debug_stack_usage_dec(); |
462 | exit: | 546 | exit: |
463 | exception_exit(prev_state); | 547 | ist_exit(regs, prev_state); |
464 | } | 548 | } |
465 | NOKPROBE_SYMBOL(do_int3); | 549 | NOKPROBE_SYMBOL(do_int3); |
466 | 550 | ||
467 | #ifdef CONFIG_X86_64 | 551 | #ifdef CONFIG_X86_64 |
468 | /* | 552 | /* |
469 | * Help handler running on IST stack to switch back to user stack | 553 | * Help handler running on IST stack to switch off the IST stack if the |
470 | * for scheduling or signal handling. The actual stack switch is done in | 554 | * interrupted code was in user mode. The actual stack switch is done in |
471 | * entry.S | 555 | * entry_64.S |
472 | */ | 556 | */ |
473 | asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) | 557 | asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) |
474 | { | 558 | { |
475 | struct pt_regs *regs = eregs; | 559 | struct pt_regs *regs = task_pt_regs(current); |
476 | /* Did already sync */ | 560 | *regs = *eregs; |
477 | if (eregs == (struct pt_regs *)eregs->sp) | ||
478 | ; | ||
479 | /* Exception from user space */ | ||
480 | else if (user_mode(eregs)) | ||
481 | regs = task_pt_regs(current); | ||
482 | /* | ||
483 | * Exception from kernel and interrupts are enabled. Move to | ||
484 | * kernel process stack. | ||
485 | */ | ||
486 | else if (eregs->flags & X86_EFLAGS_IF) | ||
487 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
488 | if (eregs != regs) | ||
489 | *regs = *eregs; | ||
490 | return regs; | 561 | return regs; |
491 | } | 562 | } |
492 | NOKPROBE_SYMBOL(sync_regs); | 563 | NOKPROBE_SYMBOL(sync_regs); |
@@ -554,7 +625,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
554 | unsigned long dr6; | 625 | unsigned long dr6; |
555 | int si_code; | 626 | int si_code; |
556 | 627 | ||
557 | prev_state = exception_enter(); | 628 | prev_state = ist_enter(regs); |
558 | 629 | ||
559 | get_debugreg(dr6, 6); | 630 | get_debugreg(dr6, 6); |
560 | 631 | ||
@@ -629,7 +700,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | |||
629 | debug_stack_usage_dec(); | 700 | debug_stack_usage_dec(); |
630 | 701 | ||
631 | exit: | 702 | exit: |
632 | exception_exit(prev_state); | 703 | ist_exit(regs, prev_state); |
633 | } | 704 | } |
634 | NOKPROBE_SYMBOL(do_debug); | 705 | NOKPROBE_SYMBOL(do_debug); |
635 | 706 | ||
@@ -788,18 +859,16 @@ void math_state_restore(void) | |||
788 | local_irq_disable(); | 859 | local_irq_disable(); |
789 | } | 860 | } |
790 | 861 | ||
862 | /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ | ||
863 | kernel_fpu_disable(); | ||
791 | __thread_fpu_begin(tsk); | 864 | __thread_fpu_begin(tsk); |
792 | |||
793 | /* | ||
794 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
795 | */ | ||
796 | if (unlikely(restore_fpu_checking(tsk))) { | 865 | if (unlikely(restore_fpu_checking(tsk))) { |
797 | drop_init_fpu(tsk); | 866 | drop_init_fpu(tsk); |
798 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); | 867 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); |
799 | return; | 868 | } else { |
869 | tsk->thread.fpu_counter++; | ||
800 | } | 870 | } |
801 | 871 | kernel_fpu_enable(); | |
802 | tsk->thread.fpu_counter++; | ||
803 | } | 872 | } |
804 | EXPORT_SYMBOL_GPL(math_state_restore); | 873 | EXPORT_SYMBOL_GPL(math_state_restore); |
805 | 874 | ||
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f9d16ff56c6b..7dc7ba577ecd 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -40,6 +40,7 @@ config KVM | |||
40 | select HAVE_KVM_MSI | 40 | select HAVE_KVM_MSI |
41 | select HAVE_KVM_CPU_RELAX_INTERCEPT | 41 | select HAVE_KVM_CPU_RELAX_INTERCEPT |
42 | select KVM_VFIO | 42 | select KVM_VFIO |
43 | select SRCU | ||
43 | ---help--- | 44 | ---help--- |
44 | Support hosting fully virtualized guest machines using hardware | 45 | Support hosting fully virtualized guest machines using hardware |
45 | virtualization extensions. You will need a fairly recent | 46 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 079c3b6a3ff1..7ff24240d863 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr, | |||
238 | } | 238 | } |
239 | } | 239 | } |
240 | 240 | ||
241 | static const char *page_size_string(struct map_range *mr) | ||
242 | { | ||
243 | static const char str_1g[] = "1G"; | ||
244 | static const char str_2m[] = "2M"; | ||
245 | static const char str_4m[] = "4M"; | ||
246 | static const char str_4k[] = "4k"; | ||
247 | |||
248 | if (mr->page_size_mask & (1<<PG_LEVEL_1G)) | ||
249 | return str_1g; | ||
250 | /* | ||
251 | * 32-bit without PAE has a 4M large page size. | ||
252 | * PG_LEVEL_2M is misnamed, but we can at least | ||
253 | * print out the right size in the string. | ||
254 | */ | ||
255 | if (IS_ENABLED(CONFIG_X86_32) && | ||
256 | !IS_ENABLED(CONFIG_X86_PAE) && | ||
257 | mr->page_size_mask & (1<<PG_LEVEL_2M)) | ||
258 | return str_4m; | ||
259 | |||
260 | if (mr->page_size_mask & (1<<PG_LEVEL_2M)) | ||
261 | return str_2m; | ||
262 | |||
263 | return str_4k; | ||
264 | } | ||
265 | |||
241 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, | 266 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, |
242 | unsigned long start, | 267 | unsigned long start, |
243 | unsigned long end) | 268 | unsigned long end) |
@@ -333,8 +358,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, | |||
333 | for (i = 0; i < nr_range; i++) | 358 | for (i = 0; i < nr_range; i++) |
334 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 359 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
335 | mr[i].start, mr[i].end - 1, | 360 | mr[i].start, mr[i].end - 1, |
336 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | 361 | page_size_string(&mr[i])); |
337 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | ||
338 | 362 | ||
339 | return nr_range; | 363 | return nr_range; |
340 | } | 364 | } |
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 44b9271580b5..852aa4c92da0 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c | |||
@@ -293,7 +293,6 @@ static void mrst_power_off_unused_dev(struct pci_dev *dev) | |||
293 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); | 293 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); |
294 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); | 294 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); |
295 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); | 295 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); |
296 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev); | ||
297 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); | 296 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); |
298 | 297 | ||
299 | /* | 298 | /* |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 326198a4434e..676e5e04e4d4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -610,6 +610,32 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) | |||
610 | return 0; | 610 | return 0; |
611 | } | 611 | } |
612 | 612 | ||
613 | #ifdef CONFIG_ACPI_APEI | ||
614 | extern int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size, | ||
615 | void *data), void *data); | ||
616 | |||
617 | static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size, | ||
618 | void *data), void *data) | ||
619 | { | ||
620 | struct pci_mmcfg_region *cfg; | ||
621 | int rc; | ||
622 | |||
623 | if (list_empty(&pci_mmcfg_list)) | ||
624 | return 0; | ||
625 | |||
626 | list_for_each_entry(cfg, &pci_mmcfg_list, list) { | ||
627 | rc = func(cfg->res.start, resource_size(&cfg->res), data); | ||
628 | if (rc) | ||
629 | return rc; | ||
630 | } | ||
631 | |||
632 | return 0; | ||
633 | } | ||
634 | #define set_apei_filter() (arch_apei_filter_addr = pci_mmcfg_for_each_region) | ||
635 | #else | ||
636 | #define set_apei_filter() | ||
637 | #endif | ||
638 | |||
613 | static void __init __pci_mmcfg_init(int early) | 639 | static void __init __pci_mmcfg_init(int early) |
614 | { | 640 | { |
615 | pci_mmcfg_reject_broken(early); | 641 | pci_mmcfg_reject_broken(early); |
@@ -644,6 +670,8 @@ void __init pci_mmcfg_early_init(void) | |||
644 | else | 670 | else |
645 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); | 671 | acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); |
646 | __pci_mmcfg_init(1); | 672 | __pci_mmcfg_init(1); |
673 | |||
674 | set_apei_filter(); | ||
647 | } | 675 | } |
648 | } | 676 | } |
649 | 677 | ||
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 5fcda7272550..86d0f9e08dd9 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
@@ -91,167 +91,6 @@ ENTRY(efi_call) | |||
91 | ret | 91 | ret |
92 | ENDPROC(efi_call) | 92 | ENDPROC(efi_call) |
93 | 93 | ||
94 | #ifdef CONFIG_EFI_MIXED | ||
95 | |||
96 | /* | ||
97 | * We run this function from the 1:1 mapping. | ||
98 | * | ||
99 | * This function must be invoked with a 1:1 mapped stack. | ||
100 | */ | ||
101 | ENTRY(__efi64_thunk) | ||
102 | movl %ds, %eax | ||
103 | push %rax | ||
104 | movl %es, %eax | ||
105 | push %rax | ||
106 | movl %ss, %eax | ||
107 | push %rax | ||
108 | |||
109 | subq $32, %rsp | ||
110 | movl %esi, 0x0(%rsp) | ||
111 | movl %edx, 0x4(%rsp) | ||
112 | movl %ecx, 0x8(%rsp) | ||
113 | movq %r8, %rsi | ||
114 | movl %esi, 0xc(%rsp) | ||
115 | movq %r9, %rsi | ||
116 | movl %esi, 0x10(%rsp) | ||
117 | |||
118 | sgdt save_gdt(%rip) | ||
119 | |||
120 | leaq 1f(%rip), %rbx | ||
121 | movq %rbx, func_rt_ptr(%rip) | ||
122 | |||
123 | /* Switch to gdt with 32-bit segments */ | ||
124 | movl 64(%rsp), %eax | ||
125 | lgdt (%rax) | ||
126 | |||
127 | leaq efi_enter32(%rip), %rax | ||
128 | pushq $__KERNEL_CS | ||
129 | pushq %rax | ||
130 | lretq | ||
131 | |||
132 | 1: addq $32, %rsp | ||
133 | |||
134 | lgdt save_gdt(%rip) | ||
135 | |||
136 | pop %rbx | ||
137 | movl %ebx, %ss | ||
138 | pop %rbx | ||
139 | movl %ebx, %es | ||
140 | pop %rbx | ||
141 | movl %ebx, %ds | ||
142 | |||
143 | /* | ||
144 | * Convert 32-bit status code into 64-bit. | ||
145 | */ | ||
146 | test %rax, %rax | ||
147 | jz 1f | ||
148 | movl %eax, %ecx | ||
149 | andl $0x0fffffff, %ecx | ||
150 | andl $0xf0000000, %eax | ||
151 | shl $32, %rax | ||
152 | or %rcx, %rax | ||
153 | 1: | ||
154 | ret | ||
155 | ENDPROC(__efi64_thunk) | ||
156 | |||
157 | ENTRY(efi_exit32) | ||
158 | movq func_rt_ptr(%rip), %rax | ||
159 | push %rax | ||
160 | mov %rdi, %rax | ||
161 | ret | ||
162 | ENDPROC(efi_exit32) | ||
163 | |||
164 | .code32 | ||
165 | /* | ||
166 | * EFI service pointer must be in %edi. | ||
167 | * | ||
168 | * The stack should represent the 32-bit calling convention. | ||
169 | */ | ||
170 | ENTRY(efi_enter32) | ||
171 | movl $__KERNEL_DS, %eax | ||
172 | movl %eax, %ds | ||
173 | movl %eax, %es | ||
174 | movl %eax, %ss | ||
175 | |||
176 | /* Reload pgtables */ | ||
177 | movl %cr3, %eax | ||
178 | movl %eax, %cr3 | ||
179 | |||
180 | /* Disable paging */ | ||
181 | movl %cr0, %eax | ||
182 | btrl $X86_CR0_PG_BIT, %eax | ||
183 | movl %eax, %cr0 | ||
184 | |||
185 | /* Disable long mode via EFER */ | ||
186 | movl $MSR_EFER, %ecx | ||
187 | rdmsr | ||
188 | btrl $_EFER_LME, %eax | ||
189 | wrmsr | ||
190 | |||
191 | call *%edi | ||
192 | |||
193 | /* We must preserve return value */ | ||
194 | movl %eax, %edi | ||
195 | |||
196 | /* | ||
197 | * Some firmware will return with interrupts enabled. Be sure to | ||
198 | * disable them before we switch GDTs. | ||
199 | */ | ||
200 | cli | ||
201 | |||
202 | movl 68(%esp), %eax | ||
203 | movl %eax, 2(%eax) | ||
204 | lgdtl (%eax) | ||
205 | |||
206 | movl %cr4, %eax | ||
207 | btsl $(X86_CR4_PAE_BIT), %eax | ||
208 | movl %eax, %cr4 | ||
209 | |||
210 | movl %cr3, %eax | ||
211 | movl %eax, %cr3 | ||
212 | |||
213 | movl $MSR_EFER, %ecx | ||
214 | rdmsr | ||
215 | btsl $_EFER_LME, %eax | ||
216 | wrmsr | ||
217 | |||
218 | xorl %eax, %eax | ||
219 | lldt %ax | ||
220 | |||
221 | movl 72(%esp), %eax | ||
222 | pushl $__KERNEL_CS | ||
223 | pushl %eax | ||
224 | |||
225 | /* Enable paging */ | ||
226 | movl %cr0, %eax | ||
227 | btsl $X86_CR0_PG_BIT, %eax | ||
228 | movl %eax, %cr0 | ||
229 | lret | ||
230 | ENDPROC(efi_enter32) | ||
231 | |||
232 | .data | ||
233 | .balign 8 | ||
234 | .global efi32_boot_gdt | ||
235 | efi32_boot_gdt: .word 0 | ||
236 | .quad 0 | ||
237 | |||
238 | save_gdt: .word 0 | ||
239 | .quad 0 | ||
240 | func_rt_ptr: .quad 0 | ||
241 | |||
242 | .global efi_gdt64 | ||
243 | efi_gdt64: | ||
244 | .word efi_gdt64_end - efi_gdt64 | ||
245 | .long 0 /* Filled out by user */ | ||
246 | .word 0 | ||
247 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
248 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
249 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
250 | .quad 0x0080890000000000 /* TS descriptor */ | ||
251 | .quad 0x0000000000000000 /* TS continued */ | ||
252 | efi_gdt64_end: | ||
253 | #endif /* CONFIG_EFI_MIXED */ | ||
254 | |||
255 | .data | 94 | .data |
256 | ENTRY(efi_scratch) | 95 | ENTRY(efi_scratch) |
257 | .fill 3,8,0 | 96 | .fill 3,8,0 |
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 8806fa73e6e6..ff85d28c50f2 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S | |||
@@ -1,9 +1,26 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2014 Intel Corporation; author Matt Fleming | 2 | * Copyright (C) 2014 Intel Corporation; author Matt Fleming |
3 | * | ||
4 | * Support for invoking 32-bit EFI runtime services from a 64-bit | ||
5 | * kernel. | ||
6 | * | ||
7 | * The below thunking functions are only used after ExitBootServices() | ||
8 | * has been called. This simplifies things considerably as compared with | ||
9 | * the early EFI thunking because we can leave all the kernel state | ||
10 | * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime | ||
11 | * services from __KERNEL32_CS. This means we can continue to service | ||
12 | * interrupts across an EFI mixed mode call. | ||
13 | * | ||
14 | * We do however, need to handle the fact that we're running in a full | ||
15 | * 64-bit virtual address space. Things like the stack and instruction | ||
16 | * addresses need to be accessible by the 32-bit firmware, so we rely on | ||
17 | * using the identity mappings in the EFI page table to access the stack | ||
18 | * and kernel text (see efi_setup_page_tables()). | ||
3 | */ | 19 | */ |
4 | 20 | ||
5 | #include <linux/linkage.h> | 21 | #include <linux/linkage.h> |
6 | #include <asm/page_types.h> | 22 | #include <asm/page_types.h> |
23 | #include <asm/segment.h> | ||
7 | 24 | ||
8 | .text | 25 | .text |
9 | .code64 | 26 | .code64 |
@@ -33,14 +50,6 @@ ENTRY(efi64_thunk) | |||
33 | leaq efi_exit32(%rip), %rbx | 50 | leaq efi_exit32(%rip), %rbx |
34 | subq %rax, %rbx | 51 | subq %rax, %rbx |
35 | movl %ebx, 8(%rsp) | 52 | movl %ebx, 8(%rsp) |
36 | leaq efi_gdt64(%rip), %rbx | ||
37 | subq %rax, %rbx | ||
38 | movl %ebx, 2(%ebx) | ||
39 | movl %ebx, 4(%rsp) | ||
40 | leaq efi_gdt32(%rip), %rbx | ||
41 | subq %rax, %rbx | ||
42 | movl %ebx, 2(%ebx) | ||
43 | movl %ebx, (%rsp) | ||
44 | 53 | ||
45 | leaq __efi64_thunk(%rip), %rbx | 54 | leaq __efi64_thunk(%rip), %rbx |
46 | subq %rax, %rbx | 55 | subq %rax, %rbx |
@@ -52,14 +61,92 @@ ENTRY(efi64_thunk) | |||
52 | retq | 61 | retq |
53 | ENDPROC(efi64_thunk) | 62 | ENDPROC(efi64_thunk) |
54 | 63 | ||
55 | .data | 64 | /* |
56 | efi_gdt32: | 65 | * We run this function from the 1:1 mapping. |
57 | .word efi_gdt32_end - efi_gdt32 | 66 | * |
58 | .long 0 /* Filled out above */ | 67 | * This function must be invoked with a 1:1 mapped stack. |
59 | .word 0 | 68 | */ |
60 | .quad 0x0000000000000000 /* NULL descriptor */ | 69 | ENTRY(__efi64_thunk) |
61 | .quad 0x00cf9a000000ffff /* __KERNEL_CS */ | 70 | movl %ds, %eax |
62 | .quad 0x00cf93000000ffff /* __KERNEL_DS */ | 71 | push %rax |
63 | efi_gdt32_end: | 72 | movl %es, %eax |
73 | push %rax | ||
74 | movl %ss, %eax | ||
75 | push %rax | ||
76 | |||
77 | subq $32, %rsp | ||
78 | movl %esi, 0x0(%rsp) | ||
79 | movl %edx, 0x4(%rsp) | ||
80 | movl %ecx, 0x8(%rsp) | ||
81 | movq %r8, %rsi | ||
82 | movl %esi, 0xc(%rsp) | ||
83 | movq %r9, %rsi | ||
84 | movl %esi, 0x10(%rsp) | ||
85 | |||
86 | leaq 1f(%rip), %rbx | ||
87 | movq %rbx, func_rt_ptr(%rip) | ||
88 | |||
89 | /* Switch to 32-bit descriptor */ | ||
90 | pushq $__KERNEL32_CS | ||
91 | leaq efi_enter32(%rip), %rax | ||
92 | pushq %rax | ||
93 | lretq | ||
94 | |||
95 | 1: addq $32, %rsp | ||
96 | |||
97 | pop %rbx | ||
98 | movl %ebx, %ss | ||
99 | pop %rbx | ||
100 | movl %ebx, %es | ||
101 | pop %rbx | ||
102 | movl %ebx, %ds | ||
64 | 103 | ||
104 | /* | ||
105 | * Convert 32-bit status code into 64-bit. | ||
106 | */ | ||
107 | test %rax, %rax | ||
108 | jz 1f | ||
109 | movl %eax, %ecx | ||
110 | andl $0x0fffffff, %ecx | ||
111 | andl $0xf0000000, %eax | ||
112 | shl $32, %rax | ||
113 | or %rcx, %rax | ||
114 | 1: | ||
115 | ret | ||
116 | ENDPROC(__efi64_thunk) | ||
117 | |||
118 | ENTRY(efi_exit32) | ||
119 | movq func_rt_ptr(%rip), %rax | ||
120 | push %rax | ||
121 | mov %rdi, %rax | ||
122 | ret | ||
123 | ENDPROC(efi_exit32) | ||
124 | |||
125 | .code32 | ||
126 | /* | ||
127 | * EFI service pointer must be in %edi. | ||
128 | * | ||
129 | * The stack should represent the 32-bit calling convention. | ||
130 | */ | ||
131 | ENTRY(efi_enter32) | ||
132 | movl $__KERNEL_DS, %eax | ||
133 | movl %eax, %ds | ||
134 | movl %eax, %es | ||
135 | movl %eax, %ss | ||
136 | |||
137 | call *%edi | ||
138 | |||
139 | /* We must preserve return value */ | ||
140 | movl %eax, %edi | ||
141 | |||
142 | movl 72(%esp), %eax | ||
143 | pushl $__KERNEL_CS | ||
144 | pushl %eax | ||
145 | |||
146 | lret | ||
147 | ENDPROC(efi_enter32) | ||
148 | |||
149 | .data | ||
150 | .balign 8 | ||
151 | func_rt_ptr: .quad 0 | ||
65 | efi_saved_sp: .quad 0 | 152 | efi_saved_sp: .quad 0 |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 5a4affe025e8..09297c8e1fcd 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -205,4 +205,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE | |||
205 | PHONY += vdso_install $(vdso_img_insttargets) | 205 | PHONY += vdso_install $(vdso_img_insttargets) |
206 | vdso_install: $(vdso_img_insttargets) FORCE | 206 | vdso_install: $(vdso_img_insttargets) FORCE |
207 | 207 | ||
208 | clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* | 208 | clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* |