aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/boot/ctype.h5
-rw-r--r--arch/x86/boot/early_serial_console.c6
-rw-r--r--arch/x86/ia32/ia32entry.S4
-rw-r--r--arch/x86/include/asm/apic.h58
-rw-r--r--arch/x86/include/asm/calling.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/debugreg.h5
-rw-r--r--arch/x86/include/asm/fpu-internal.h10
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h1
-rw-r--r--arch/x86/include/asm/i387.h6
-rw-r--r--arch/x86/include/asm/io_apic.h5
-rw-r--r--arch/x86/include/asm/irq_remapping.h4
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/pmc_atom.h22
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h68
-rw-r--r--arch/x86/include/asm/thread_info.h15
-rw-r--r--arch/x86/include/asm/traps.h6
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h4
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/apb_timer.c8
-rw-r--r--arch/x86/kernel/apic/apic.c456
-rw-r--r--arch/x86/kernel/apic/io_apic.c13
-rw-r--r--arch/x86/kernel/cpu/amd.c19
-rw-r--r--arch/x86/kernel/cpu/common.c15
-rw-r--r--arch/x86/kernel/cpu/intel.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c137
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h18
-rw-r--r--arch/x86/kernel/e820.c26
-rw-r--r--arch/x86/kernel/entry_64.S317
-rw-r--r--arch/x86/kernel/hw_breakpoint.c45
-rw-r--r--arch/x86/kernel/i387.c39
-rw-r--r--arch/x86/kernel/irq_32.c13
-rw-r--r--arch/x86/kernel/pmc_atom.c81
-rw-r--r--arch/x86/kernel/rtc.c2
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/smpboot.c113
-rw-r--r--arch/x86/kernel/traps.c131
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/pci/common.c16
-rw-r--r--arch/x86/pci/intel_mid_pci.c1
-rw-r--r--arch/x86/pci/mmconfig-shared.c28
-rw-r--r--arch/x86/vdso/Makefile2
52 files changed, 934 insertions, 827 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0dc9d0144a27..5e28e2be3a41 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -138,6 +138,7 @@ config X86
138 select HAVE_ACPI_APEI_NMI if ACPI 138 select HAVE_ACPI_APEI_NMI if ACPI
139 select ACPI_LEGACY_TABLES_LOOKUP if ACPI 139 select ACPI_LEGACY_TABLES_LOOKUP if ACPI
140 select X86_FEATURE_NAMES if PROC_FS 140 select X86_FEATURE_NAMES if PROC_FS
141 select SRCU
141 142
142config INSTRUCTION_DECODER 143config INSTRUCTION_DECODER
143 def_bool y 144 def_bool y
@@ -855,6 +856,10 @@ config SCHED_MC
855 856
856source "kernel/Kconfig.preempt" 857source "kernel/Kconfig.preempt"
857 858
859config UP_LATE_INIT
860 def_bool y
861 depends on !SMP && X86_LOCAL_APIC
862
858config X86_UP_APIC 863config X86_UP_APIC
859 bool "Local APIC support on uniprocessors" 864 bool "Local APIC support on uniprocessors"
860 depends on X86_32 && !SMP && !X86_32_NON_STANDARD 865 depends on X86_32 && !SMP && !X86_32_NON_STANDARD
diff --git a/arch/x86/boot/ctype.h b/arch/x86/boot/ctype.h
index 25e13403193c..020f137df7a2 100644
--- a/arch/x86/boot/ctype.h
+++ b/arch/x86/boot/ctype.h
@@ -1,6 +1,5 @@
1#ifndef BOOT_ISDIGIT_H 1#ifndef BOOT_CTYPE_H
2 2#define BOOT_CTYPE_H
3#define BOOT_ISDIGIT_H
4 3
5static inline int isdigit(int ch) 4static inline int isdigit(int ch)
6{ 5{
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
index 5df2869c874b..45a07684bbab 100644
--- a/arch/x86/boot/early_serial_console.c
+++ b/arch/x86/boot/early_serial_console.c
@@ -2,8 +2,6 @@
2 2
3#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ 3#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */
4 4
5#define XMTRDY 0x20
6
7#define DLAB 0x80 5#define DLAB 0x80
8 6
9#define TXR 0 /* Transmit register (WRITE) */ 7#define TXR 0 /* Transmit register (WRITE) */
@@ -74,8 +72,8 @@ static void parse_earlyprintk(void)
74 static const int bases[] = { 0x3f8, 0x2f8 }; 72 static const int bases[] = { 0x3f8, 0x2f8 };
75 int idx = 0; 73 int idx = 0;
76 74
77 if (!strncmp(arg + pos, "ttyS", 4)) 75 /* += strlen("ttyS"); */
78 pos += 4; 76 pos += 4;
79 77
80 if (arg[pos++] == '1') 78 if (arg[pos++] == '1')
81 idx = 1; 79 idx = 1;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 82e8a1d44658..156ebcab4ada 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -179,8 +179,8 @@ sysenter_dispatch:
179sysexit_from_sys_call: 179sysexit_from_sys_call:
180 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) 180 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
181 /* clear IF, that popfq doesn't enable interrupts early */ 181 /* clear IF, that popfq doesn't enable interrupts early */
182 andl $~0x200,EFLAGS-R11(%rsp) 182 andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
183 movl RIP-R11(%rsp),%edx /* User %eip */ 183 movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
184 CFI_REGISTER rip,rdx 184 CFI_REGISTER rip,rdx
185 RESTORE_ARGS 0,24,0,0,0,0 185 RESTORE_ARGS 0,24,0,0,0,0
186 xorq %r8,%r8 186 xorq %r8,%r8
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 465b309af254..92003f3c8a42 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -106,7 +106,14 @@ extern u32 native_safe_apic_wait_icr_idle(void);
106extern void native_apic_icr_write(u32 low, u32 id); 106extern void native_apic_icr_write(u32 low, u32 id);
107extern u64 native_apic_icr_read(void); 107extern u64 native_apic_icr_read(void);
108 108
109extern int x2apic_mode; 109static inline bool apic_is_x2apic_enabled(void)
110{
111 u64 msr;
112
113 if (rdmsrl_safe(MSR_IA32_APICBASE, &msr))
114 return false;
115 return msr & X2APIC_ENABLE;
116}
110 117
111#ifdef CONFIG_X86_X2APIC 118#ifdef CONFIG_X86_X2APIC
112/* 119/*
@@ -169,48 +176,23 @@ static inline u64 native_x2apic_icr_read(void)
169 return val; 176 return val;
170} 177}
171 178
179extern int x2apic_mode;
172extern int x2apic_phys; 180extern int x2apic_phys;
173extern int x2apic_preenabled; 181extern void __init check_x2apic(void);
174extern void check_x2apic(void); 182extern void x2apic_setup(void);
175extern void enable_x2apic(void);
176static inline int x2apic_enabled(void) 183static inline int x2apic_enabled(void)
177{ 184{
178 u64 msr; 185 return cpu_has_x2apic && apic_is_x2apic_enabled();
179
180 if (!cpu_has_x2apic)
181 return 0;
182
183 rdmsrl(MSR_IA32_APICBASE, msr);
184 if (msr & X2APIC_ENABLE)
185 return 1;
186 return 0;
187} 186}
188 187
189#define x2apic_supported() (cpu_has_x2apic) 188#define x2apic_supported() (cpu_has_x2apic)
190static inline void x2apic_force_phys(void)
191{
192 x2apic_phys = 1;
193}
194#else 189#else
195static inline void disable_x2apic(void) 190static inline void check_x2apic(void) { }
196{ 191static inline void x2apic_setup(void) { }
197} 192static inline int x2apic_enabled(void) { return 0; }
198static inline void check_x2apic(void)
199{
200}
201static inline void enable_x2apic(void)
202{
203}
204static inline int x2apic_enabled(void)
205{
206 return 0;
207}
208static inline void x2apic_force_phys(void)
209{
210}
211 193
212#define x2apic_preenabled 0 194#define x2apic_mode (0)
213#define x2apic_supported() 0 195#define x2apic_supported() (0)
214#endif 196#endif
215 197
216extern void enable_IR_x2apic(void); 198extern void enable_IR_x2apic(void);
@@ -219,7 +201,6 @@ extern int get_physical_broadcast(void);
219 201
220extern int lapic_get_maxlvt(void); 202extern int lapic_get_maxlvt(void);
221extern void clear_local_APIC(void); 203extern void clear_local_APIC(void);
222extern void connect_bsp_APIC(void);
223extern void disconnect_bsp_APIC(int virt_wire_setup); 204extern void disconnect_bsp_APIC(int virt_wire_setup);
224extern void disable_local_APIC(void); 205extern void disable_local_APIC(void);
225extern void lapic_shutdown(void); 206extern void lapic_shutdown(void);
@@ -227,8 +208,6 @@ extern int verify_local_APIC(void);
227extern void sync_Arb_IDs(void); 208extern void sync_Arb_IDs(void);
228extern void init_bsp_APIC(void); 209extern void init_bsp_APIC(void);
229extern void setup_local_APIC(void); 210extern void setup_local_APIC(void);
230extern void end_local_APIC_setup(void);
231extern void bsp_end_local_APIC_setup(void);
232extern void init_apic_mappings(void); 211extern void init_apic_mappings(void);
233void register_lapic_address(unsigned long address); 212void register_lapic_address(unsigned long address);
234extern void setup_boot_APIC_clock(void); 213extern void setup_boot_APIC_clock(void);
@@ -236,6 +215,9 @@ extern void setup_secondary_APIC_clock(void);
236extern int APIC_init_uniprocessor(void); 215extern int APIC_init_uniprocessor(void);
237extern int apic_force_enable(unsigned long addr); 216extern int apic_force_enable(unsigned long addr);
238 217
218extern int apic_bsp_setup(bool upmode);
219extern void apic_ap_setup(void);
220
239/* 221/*
240 * On 32bit this is mach-xxx local 222 * On 32bit this is mach-xxx local
241 */ 223 */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 76659b67fd11..1f1297b46f83 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -83,7 +83,6 @@ For 32-bit we have the following conventions - kernel is built with
83#define SS 160 83#define SS 160
84 84
85#define ARGOFFSET R11 85#define ARGOFFSET R11
86#define SWFRAME ORIG_RAX
87 86
88 .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 87 .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
89 subq $9*8+\addskip, %rsp 88 subq $9*8+\addskip, %rsp
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index aede2c347bde..90a54851aedc 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -174,6 +174,7 @@
174#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ 174#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
175#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ 175#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
176#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ 176#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
177#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
177#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ 178#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
178 179
179/* 180/*
@@ -388,6 +389,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
388#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) 389#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
389#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) 390#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
390#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT) 391#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
392#define cpu_has_bpext boot_cpu_has(X86_FEATURE_BPEXT)
391 393
392#if __GNUC__ >= 4 394#if __GNUC__ >= 4
393extern void warn_pre_alternatives(void); 395extern void warn_pre_alternatives(void);
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 61fd18b83b6c..12cb66f6d3a5 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -114,5 +114,10 @@ static inline void debug_stack_usage_inc(void) { }
114static inline void debug_stack_usage_dec(void) { } 114static inline void debug_stack_usage_dec(void) { }
115#endif /* X86_64 */ 115#endif /* X86_64 */
116 116
117#ifdef CONFIG_CPU_SUP_AMD
118extern void set_dr_addr_mask(unsigned long mask, int dr);
119#else
120static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
121#endif
117 122
118#endif /* _ASM_X86_DEBUGREG_H */ 123#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e97622f57722..0dbc08282291 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -207,7 +207,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
207 if (config_enabled(CONFIG_X86_32)) 207 if (config_enabled(CONFIG_X86_32))
208 asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); 208 asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
209 else if (config_enabled(CONFIG_AS_FXSAVEQ)) 209 else if (config_enabled(CONFIG_AS_FXSAVEQ))
210 asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); 210 asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave));
211 else { 211 else {
212 /* Using "rex64; fxsave %0" is broken because, if the memory 212 /* Using "rex64; fxsave %0" is broken because, if the memory
213 * operand uses any extended registers for addressing, a second 213 * operand uses any extended registers for addressing, a second
@@ -290,9 +290,11 @@ static inline int fpu_restore_checking(struct fpu *fpu)
290 290
291static inline int restore_fpu_checking(struct task_struct *tsk) 291static inline int restore_fpu_checking(struct task_struct *tsk)
292{ 292{
293 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 293 /*
294 is pending. Clear the x87 state here by setting it to fixed 294 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
295 values. "m" is a random variable that should be in L1 */ 295 * pending. Clear the x87 state here by setting it to fixed values.
296 * "m" is a random variable that should be in L1.
297 */
296 if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { 298 if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
297 asm volatile( 299 asm volatile(
298 "fnclex\n\t" 300 "fnclex\n\t"
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index ef1c4d2d41ec..6c98be864a75 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -12,6 +12,7 @@
12 */ 12 */
13struct arch_hw_breakpoint { 13struct arch_hw_breakpoint {
14 unsigned long address; 14 unsigned long address;
15 unsigned long mask;
15 u8 len; 16 u8 len;
16 u8 type; 17 u8 type;
17}; 18};
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index ed8089d69094..6eb6fcb83f63 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -40,8 +40,8 @@ extern void __kernel_fpu_end(void);
40 40
41static inline void kernel_fpu_begin(void) 41static inline void kernel_fpu_begin(void)
42{ 42{
43 WARN_ON_ONCE(!irq_fpu_usable());
44 preempt_disable(); 43 preempt_disable();
44 WARN_ON_ONCE(!irq_fpu_usable());
45 __kernel_fpu_begin(); 45 __kernel_fpu_begin();
46} 46}
47 47
@@ -51,6 +51,10 @@ static inline void kernel_fpu_end(void)
51 preempt_enable(); 51 preempt_enable();
52} 52}
53 53
54/* Must be called with preempt disabled */
55extern void kernel_fpu_disable(void);
56extern void kernel_fpu_enable(void);
57
54/* 58/*
55 * Some instructions like VIA's padlock instructions generate a spurious 59 * Some instructions like VIA's padlock instructions generate a spurious
56 * DNA fault but don't modify SSE registers. And these instructions 60 * DNA fault but don't modify SSE registers. And these instructions
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index bf006cce9418..2f91685fe1cd 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -279,6 +279,11 @@ static inline void disable_ioapic_support(void) { }
279#define native_ioapic_set_affinity NULL 279#define native_ioapic_set_affinity NULL
280#define native_setup_ioapic_entry NULL 280#define native_setup_ioapic_entry NULL
281#define native_eoi_ioapic_pin NULL 281#define native_eoi_ioapic_pin NULL
282
283static inline void setup_IO_APIC(void) { }
284static inline void enable_IO_APIC(void) { }
285static inline void setup_ioapic_dest(void) { }
286
282#endif 287#endif
283 288
284#endif /* _ASM_X86_IO_APIC_H */ 289#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index b7747c4c2cf2..6224d316c405 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,8 +33,6 @@ struct irq_cfg;
33 33
34#ifdef CONFIG_IRQ_REMAP 34#ifdef CONFIG_IRQ_REMAP
35 35
36extern void setup_irq_remapping_ops(void);
37extern int irq_remapping_supported(void);
38extern void set_irq_remapping_broken(void); 36extern void set_irq_remapping_broken(void);
39extern int irq_remapping_prepare(void); 37extern int irq_remapping_prepare(void);
40extern int irq_remapping_enable(void); 38extern int irq_remapping_enable(void);
@@ -60,8 +58,6 @@ void irq_remap_modify_chip_defaults(struct irq_chip *chip);
60 58
61#else /* CONFIG_IRQ_REMAP */ 59#else /* CONFIG_IRQ_REMAP */
62 60
63static inline void setup_irq_remapping_ops(void) { }
64static inline int irq_remapping_supported(void) { return 0; }
65static inline void set_irq_remapping_broken(void) { } 61static inline void set_irq_remapping_broken(void) { }
66static inline int irq_remapping_prepare(void) { return -ENODEV; } 62static inline int irq_remapping_prepare(void) { return -ENODEV; }
67static inline int irq_remapping_enable(void) { return -ENODEV; } 63static inline int irq_remapping_enable(void) { return -ENODEV; }
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 51b26e895933..9b3de99dc004 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -190,7 +190,6 @@ enum mcp_flags {
190void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); 190void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
191 191
192int mce_notify_irq(void); 192int mce_notify_irq(void);
193void mce_notify_process(void);
194 193
195DECLARE_PER_CPU(struct mce, injectm); 194DECLARE_PER_CPU(struct mce, injectm);
196 195
diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h
index fc7a17c05d35..bc0fc0866553 100644
--- a/arch/x86/include/asm/pmc_atom.h
+++ b/arch/x86/include/asm/pmc_atom.h
@@ -53,6 +53,28 @@
53/* Sleep state counter is in units of of 32us */ 53/* Sleep state counter is in units of of 32us */
54#define PMC_TMR_SHIFT 5 54#define PMC_TMR_SHIFT 5
55 55
56/* Power status of power islands */
57#define PMC_PSS 0x98
58
59#define PMC_PSS_BIT_GBE BIT(0)
60#define PMC_PSS_BIT_SATA BIT(1)
61#define PMC_PSS_BIT_HDA BIT(2)
62#define PMC_PSS_BIT_SEC BIT(3)
63#define PMC_PSS_BIT_PCIE BIT(4)
64#define PMC_PSS_BIT_LPSS BIT(5)
65#define PMC_PSS_BIT_LPE BIT(6)
66#define PMC_PSS_BIT_DFX BIT(7)
67#define PMC_PSS_BIT_USH_CTRL BIT(8)
68#define PMC_PSS_BIT_USH_SUS BIT(9)
69#define PMC_PSS_BIT_USH_VCCS BIT(10)
70#define PMC_PSS_BIT_USH_VCCA BIT(11)
71#define PMC_PSS_BIT_OTG_CTRL BIT(12)
72#define PMC_PSS_BIT_OTG_VCCS BIT(13)
73#define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14)
74#define PMC_PSS_BIT_OTG_VCCA BIT(15)
75#define PMC_PSS_BIT_USB BIT(16)
76#define PMC_PSS_BIT_USB_SUS BIT(17)
77
56/* These registers reflect D3 status of functions */ 78/* These registers reflect D3 status of functions */
57#define PMC_D3_STS_0 0xA0 79#define PMC_D3_STS_0 0xA0
58 80
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
deleted file mode 100644
index 0da7409f0bec..000000000000
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ /dev/null
@@ -1,68 +0,0 @@
1/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
2 * which needs to alter them. */
3
4static inline void smpboot_clear_io_apic_irqs(void)
5{
6#ifdef CONFIG_X86_IO_APIC
7 io_apic_irqs = 0;
8#endif
9}
10
11static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
12{
13 unsigned long flags;
14
15 spin_lock_irqsave(&rtc_lock, flags);
16 CMOS_WRITE(0xa, 0xf);
17 spin_unlock_irqrestore(&rtc_lock, flags);
18 local_flush_tlb();
19 pr_debug("1.\n");
20 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
21 start_eip >> 4;
22 pr_debug("2.\n");
23 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
24 start_eip & 0xf;
25 pr_debug("3.\n");
26}
27
28static inline void smpboot_restore_warm_reset_vector(void)
29{
30 unsigned long flags;
31
32 /*
33 * Install writable page 0 entry to set BIOS data area.
34 */
35 local_flush_tlb();
36
37 /*
38 * Paranoid: Set warm reset code and vector here back
39 * to default values.
40 */
41 spin_lock_irqsave(&rtc_lock, flags);
42 CMOS_WRITE(0, 0xf);
43 spin_unlock_irqrestore(&rtc_lock, flags);
44
45 *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
46}
47
48static inline void __init smpboot_setup_io_apic(void)
49{
50#ifdef CONFIG_X86_IO_APIC
51 /*
52 * Here we can be sure that there is an IO-APIC in the system. Let's
53 * go and set it up:
54 */
55 if (!skip_ioapic_setup && nr_ioapics)
56 setup_IO_APIC();
57 else {
58 nr_ioapics = 0;
59 }
60#endif
61}
62
63static inline void smpboot_clear_io_apic(void)
64{
65#ifdef CONFIG_X86_IO_APIC
66 nr_ioapics = 0;
67#endif
68}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 547e344a6dc6..e82e95abc92b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -75,7 +75,6 @@ struct thread_info {
75#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ 75#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
76#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 76#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
77#define TIF_SECCOMP 8 /* secure computing */ 77#define TIF_SECCOMP 8 /* secure computing */
78#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
79#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ 78#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
80#define TIF_UPROBE 12 /* breakpointed or singlestepping */ 79#define TIF_UPROBE 12 /* breakpointed or singlestepping */
81#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 80#define TIF_NOTSC 16 /* TSC is not accessible in userland */
@@ -100,7 +99,6 @@ struct thread_info {
100#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) 99#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
101#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 100#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
102#define _TIF_SECCOMP (1 << TIF_SECCOMP) 101#define _TIF_SECCOMP (1 << TIF_SECCOMP)
103#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
104#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) 102#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
105#define _TIF_UPROBE (1 << TIF_UPROBE) 103#define _TIF_UPROBE (1 << TIF_UPROBE)
106#define _TIF_NOTSC (1 << TIF_NOTSC) 104#define _TIF_NOTSC (1 << TIF_NOTSC)
@@ -140,7 +138,7 @@ struct thread_info {
140 138
141/* Only used for 64 bit */ 139/* Only used for 64 bit */
142#define _TIF_DO_NOTIFY_MASK \ 140#define _TIF_DO_NOTIFY_MASK \
143 (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ 141 (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
144 _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) 142 _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
145 143
146/* flags to check in __switch_to() */ 144/* flags to check in __switch_to() */
@@ -170,6 +168,17 @@ static inline struct thread_info *current_thread_info(void)
170 return ti; 168 return ti;
171} 169}
172 170
171static inline unsigned long current_stack_pointer(void)
172{
173 unsigned long sp;
174#ifdef CONFIG_X86_64
175 asm("mov %%rsp,%0" : "=g" (sp));
176#else
177 asm("mov %%esp,%0" : "=g" (sp));
178#endif
179 return sp;
180}
181
173#else /* !__ASSEMBLY__ */ 182#else /* !__ASSEMBLY__ */
174 183
175/* how to get the thread information struct from ASM */ 184/* how to get the thread information struct from ASM */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 707adc6549d8..4e49d7dff78e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -1,6 +1,7 @@
1#ifndef _ASM_X86_TRAPS_H 1#ifndef _ASM_X86_TRAPS_H
2#define _ASM_X86_TRAPS_H 2#define _ASM_X86_TRAPS_H
3 3
4#include <linux/context_tracking_state.h>
4#include <linux/kprobes.h> 5#include <linux/kprobes.h>
5 6
6#include <asm/debugreg.h> 7#include <asm/debugreg.h>
@@ -110,6 +111,11 @@ asmlinkage void smp_thermal_interrupt(void);
110asmlinkage void mce_threshold_interrupt(void); 111asmlinkage void mce_threshold_interrupt(void);
111#endif 112#endif
112 113
114extern enum ctx_state ist_enter(struct pt_regs *regs);
115extern void ist_exit(struct pt_regs *regs, enum ctx_state prev_state);
116extern void ist_begin_non_atomic(struct pt_regs *regs);
117extern void ist_end_non_atomic(void);
118
113/* Interrupts/Exceptions */ 119/* Interrupts/Exceptions */
114enum { 120enum {
115 X86_TRAP_DE = 0, /* 0, Divide-by-zero */ 121 X86_TRAP_DE = 0, /* 0, Divide-by-zero */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index c8aa65d56027..d979e5abae55 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -251,6 +251,10 @@
251/* Fam 16h MSRs */ 251/* Fam 16h MSRs */
252#define MSR_F16H_L2I_PERF_CTL 0xc0010230 252#define MSR_F16H_L2I_PERF_CTL 0xc0010230
253#define MSR_F16H_L2I_PERF_CTR 0xc0010231 253#define MSR_F16H_L2I_PERF_CTR 0xc0010231
254#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
255#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
256#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
257#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
254 258
255/* Fam 15h MSRs */ 259/* Fam 15h MSRs */
256#define MSR_F15H_PERF_CTL 0xc0010200 260#define MSR_F15H_PERF_CTL 0xc0010200
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b9e30daa0881..a18fff361c7f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -653,6 +653,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 gsi,
653 return gsi; 653 return gsi;
654} 654}
655 655
656#ifdef CONFIG_X86_LOCAL_APIC
656static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi, 657static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
657 int trigger, int polarity) 658 int trigger, int polarity)
658{ 659{
@@ -675,6 +676,7 @@ static void acpi_unregister_gsi_ioapic(u32 gsi)
675 mutex_unlock(&acpi_ioapic_lock); 676 mutex_unlock(&acpi_ioapic_lock);
676#endif 677#endif
677} 678}
679#endif
678 680
679int (*__acpi_register_gsi)(struct device *dev, u32 gsi, 681int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
680 int trigger, int polarity) = acpi_register_gsi_pic; 682 int trigger, int polarity) = acpi_register_gsi_pic;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index b708738d016e..6a7c23ff21d3 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -135,14 +135,6 @@ static inline void apbt_clear_mapping(void)
135 apbt_virt_address = NULL; 135 apbt_virt_address = NULL;
136} 136}
137 137
138/*
139 * APBT timer interrupt enable / disable
140 */
141static inline int is_apbt_capable(void)
142{
143 return apbt_virt_address ? 1 : 0;
144}
145
146static int __init apbt_clockevent_register(void) 138static int __init apbt_clockevent_register(void)
147{ 139{
148 struct sfi_timer_table_entry *mtmr; 140 struct sfi_timer_table_entry *mtmr;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 29b5b18afa27..b665d241efad 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -134,9 +134,6 @@ static inline void imcr_apic_to_pic(void)
134 */ 134 */
135static int force_enable_local_apic __initdata; 135static int force_enable_local_apic __initdata;
136 136
137/* Control whether x2APIC mode is enabled or not */
138static bool nox2apic __initdata;
139
140/* 137/*
141 * APIC command line parameters 138 * APIC command line parameters
142 */ 139 */
@@ -161,33 +158,6 @@ static __init int setup_apicpmtimer(char *s)
161__setup("apicpmtimer", setup_apicpmtimer); 158__setup("apicpmtimer", setup_apicpmtimer);
162#endif 159#endif
163 160
164int x2apic_mode;
165#ifdef CONFIG_X86_X2APIC
166/* x2apic enabled before OS handover */
167int x2apic_preenabled;
168static int x2apic_disabled;
169static int __init setup_nox2apic(char *str)
170{
171 if (x2apic_enabled()) {
172 int apicid = native_apic_msr_read(APIC_ID);
173
174 if (apicid >= 255) {
175 pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
176 apicid);
177 return 0;
178 }
179
180 pr_warning("x2apic already enabled. will disable it\n");
181 } else
182 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
183
184 nox2apic = true;
185
186 return 0;
187}
188early_param("nox2apic", setup_nox2apic);
189#endif
190
191unsigned long mp_lapic_addr; 161unsigned long mp_lapic_addr;
192int disable_apic; 162int disable_apic;
193/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 163/* Disable local APIC timer from the kernel commandline or via dmi quirk */
@@ -1475,7 +1445,7 @@ void setup_local_APIC(void)
1475#endif 1445#endif
1476} 1446}
1477 1447
1478void end_local_APIC_setup(void) 1448static void end_local_APIC_setup(void)
1479{ 1449{
1480 lapic_setup_esr(); 1450 lapic_setup_esr();
1481 1451
@@ -1492,116 +1462,184 @@ void end_local_APIC_setup(void)
1492 apic_pm_activate(); 1462 apic_pm_activate();
1493} 1463}
1494 1464
1495void __init bsp_end_local_APIC_setup(void) 1465/*
1466 * APIC setup function for application processors. Called from smpboot.c
1467 */
1468void apic_ap_setup(void)
1496{ 1469{
1470 setup_local_APIC();
1497 end_local_APIC_setup(); 1471 end_local_APIC_setup();
1498
1499 /*
1500 * Now that local APIC setup is completed for BP, configure the fault
1501 * handling for interrupt remapping.
1502 */
1503 irq_remap_enable_fault_handling();
1504
1505} 1472}
1506 1473
1507#ifdef CONFIG_X86_X2APIC 1474#ifdef CONFIG_X86_X2APIC
1508/* 1475int x2apic_mode;
1509 * Need to disable xapic and x2apic at the same time and then enable xapic mode
1510 */
1511static inline void __disable_x2apic(u64 msr)
1512{
1513 wrmsrl(MSR_IA32_APICBASE,
1514 msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1515 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1516}
1517 1476
1518static __init void disable_x2apic(void) 1477enum {
1478 X2APIC_OFF,
1479 X2APIC_ON,
1480 X2APIC_DISABLED,
1481};
1482static int x2apic_state;
1483
1484static inline void __x2apic_disable(void)
1519{ 1485{
1520 u64 msr; 1486 u64 msr;
1521 1487
1522 if (!cpu_has_x2apic) 1488 if (cpu_has_apic)
1523 return; 1489 return;
1524 1490
1525 rdmsrl(MSR_IA32_APICBASE, msr); 1491 rdmsrl(MSR_IA32_APICBASE, msr);
1526 if (msr & X2APIC_ENABLE) { 1492 if (!(msr & X2APIC_ENABLE))
1527 u32 x2apic_id = read_apic_id(); 1493 return;
1528 1494 /* Disable xapic and x2apic first and then reenable xapic mode */
1529 if (x2apic_id >= 255) 1495 wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1530 panic("Cannot disable x2apic, id: %08x\n", x2apic_id); 1496 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1497 printk_once(KERN_INFO "x2apic disabled\n");
1498}
1531 1499
1532 pr_info("Disabling x2apic\n"); 1500static inline void __x2apic_enable(void)
1533 __disable_x2apic(msr); 1501{
1502 u64 msr;
1534 1503
1535 if (nox2apic) { 1504 rdmsrl(MSR_IA32_APICBASE, msr);
1536 clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); 1505 if (msr & X2APIC_ENABLE)
1537 setup_clear_cpu_cap(X86_FEATURE_X2APIC); 1506 return;
1538 } 1507 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1508 printk_once(KERN_INFO "x2apic enabled\n");
1509}
1539 1510
1540 x2apic_disabled = 1; 1511static int __init setup_nox2apic(char *str)
1541 x2apic_mode = 0; 1512{
1513 if (x2apic_enabled()) {
1514 int apicid = native_apic_msr_read(APIC_ID);
1542 1515
1543 register_lapic_address(mp_lapic_addr); 1516 if (apicid >= 255) {
1517 pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
1518 apicid);
1519 return 0;
1520 }
1521 pr_warning("x2apic already enabled.\n");
1522 __x2apic_disable();
1544 } 1523 }
1524 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1525 x2apic_state = X2APIC_DISABLED;
1526 x2apic_mode = 0;
1527 return 0;
1545} 1528}
1529early_param("nox2apic", setup_nox2apic);
1546 1530
1547void check_x2apic(void) 1531/* Called from cpu_init() to enable x2apic on (secondary) cpus */
1532void x2apic_setup(void)
1548{ 1533{
1549 if (x2apic_enabled()) { 1534 /*
1550 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); 1535 * If x2apic is not in ON state, disable it if already enabled
1551 x2apic_preenabled = x2apic_mode = 1; 1536 * from BIOS.
1537 */
1538 if (x2apic_state != X2APIC_ON) {
1539 __x2apic_disable();
1540 return;
1552 } 1541 }
1542 __x2apic_enable();
1553} 1543}
1554 1544
1555void enable_x2apic(void) 1545static __init void x2apic_disable(void)
1556{ 1546{
1557 u64 msr; 1547 u32 x2apic_id;
1558 1548
1559 rdmsrl(MSR_IA32_APICBASE, msr); 1549 if (x2apic_state != X2APIC_ON)
1560 if (x2apic_disabled) { 1550 goto out;
1561 __disable_x2apic(msr); 1551
1552 x2apic_id = read_apic_id();
1553 if (x2apic_id >= 255)
1554 panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1555
1556 __x2apic_disable();
1557 register_lapic_address(mp_lapic_addr);
1558out:
1559 x2apic_state = X2APIC_DISABLED;
1560 x2apic_mode = 0;
1561}
1562
1563static __init void x2apic_enable(void)
1564{
1565 if (x2apic_state != X2APIC_OFF)
1562 return; 1566 return;
1563 }
1564 1567
1565 if (!x2apic_mode) 1568 x2apic_mode = 1;
1569 x2apic_state = X2APIC_ON;
1570 __x2apic_enable();
1571}
1572
1573static __init void try_to_enable_x2apic(int remap_mode)
1574{
1575 if (x2apic_state == X2APIC_DISABLED)
1566 return; 1576 return;
1567 1577
1568 if (!(msr & X2APIC_ENABLE)) { 1578 if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
1569 printk_once(KERN_INFO "Enabling x2apic\n"); 1579 /* IR is required if there is APIC ID > 255 even when running
1570 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); 1580 * under KVM
1581 */
1582 if (max_physical_apicid > 255 ||
1583 (IS_ENABLED(CONFIG_HYPERVISOR_GUEST) &&
1584 !hypervisor_x2apic_available())) {
1585 pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
1586 x2apic_disable();
1587 return;
1588 }
1589
1590 /*
1591 * without IR all CPUs can be addressed by IOAPIC/MSI
1592 * only in physical mode
1593 */
1594 x2apic_phys = 1;
1571 } 1595 }
1596 x2apic_enable();
1572} 1597}
1573#endif /* CONFIG_X86_X2APIC */
1574 1598
1575int __init enable_IR(void) 1599void __init check_x2apic(void)
1576{ 1600{
1577#ifdef CONFIG_IRQ_REMAP 1601 if (x2apic_enabled()) {
1578 if (!irq_remapping_supported()) { 1602 pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
1579 pr_debug("intr-remapping not supported\n"); 1603 x2apic_mode = 1;
1580 return -1; 1604 x2apic_state = X2APIC_ON;
1605 } else if (!cpu_has_x2apic) {
1606 x2apic_state = X2APIC_DISABLED;
1581 } 1607 }
1608}
1609#else /* CONFIG_X86_X2APIC */
1610static int __init validate_x2apic(void)
1611{
1612 if (!apic_is_x2apic_enabled())
1613 return 0;
1614 /*
1615 * Checkme: Can we simply turn off x2apic here instead of panic?
1616 */
1617 panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
1618}
1619early_initcall(validate_x2apic);
1582 1620
1583 if (!x2apic_preenabled && skip_ioapic_setup) { 1621static inline void try_to_enable_x2apic(int remap_mode) { }
1584 pr_info("Skipped enabling intr-remap because of skipping " 1622static inline void __x2apic_enable(void) { }
1585 "io-apic setup\n"); 1623#endif /* !CONFIG_X86_X2APIC */
1624
1625static int __init try_to_enable_IR(void)
1626{
1627#ifdef CONFIG_X86_IO_APIC
1628 if (!x2apic_enabled() && skip_ioapic_setup) {
1629 pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
1586 return -1; 1630 return -1;
1587 } 1631 }
1588
1589 return irq_remapping_enable();
1590#endif 1632#endif
1591 return -1; 1633 return irq_remapping_enable();
1592} 1634}
1593 1635
1594void __init enable_IR_x2apic(void) 1636void __init enable_IR_x2apic(void)
1595{ 1637{
1596 unsigned long flags; 1638 unsigned long flags;
1597 int ret, x2apic_enabled = 0; 1639 int ret, ir_stat;
1598 int hardware_init_ret;
1599
1600 /* Make sure irq_remap_ops are initialized */
1601 setup_irq_remapping_ops();
1602 1640
1603 hardware_init_ret = irq_remapping_prepare(); 1641 ir_stat = irq_remapping_prepare();
1604 if (hardware_init_ret && !x2apic_supported()) 1642 if (ir_stat < 0 && !x2apic_supported())
1605 return; 1643 return;
1606 1644
1607 ret = save_ioapic_entries(); 1645 ret = save_ioapic_entries();
@@ -1614,49 +1652,13 @@ void __init enable_IR_x2apic(void)
1614 legacy_pic->mask_all(); 1652 legacy_pic->mask_all();
1615 mask_ioapic_entries(); 1653 mask_ioapic_entries();
1616 1654
1617 if (x2apic_preenabled && nox2apic) 1655 /* If irq_remapping_prepare() succeded, try to enable it */
1618 disable_x2apic(); 1656 if (ir_stat >= 0)
1619 1657 ir_stat = try_to_enable_IR();
1620 if (hardware_init_ret) 1658 /* ir_stat contains the remap mode or an error code */
1621 ret = -1; 1659 try_to_enable_x2apic(ir_stat);
1622 else
1623 ret = enable_IR();
1624
1625 if (!x2apic_supported())
1626 goto skip_x2apic;
1627 1660
1628 if (ret < 0) { 1661 if (ir_stat < 0)
1629 /* IR is required if there is APIC ID > 255 even when running
1630 * under KVM
1631 */
1632 if (max_physical_apicid > 255 ||
1633 !hypervisor_x2apic_available()) {
1634 if (x2apic_preenabled)
1635 disable_x2apic();
1636 goto skip_x2apic;
1637 }
1638 /*
1639 * without IR all CPUs can be addressed by IOAPIC/MSI
1640 * only in physical mode
1641 */
1642 x2apic_force_phys();
1643 }
1644
1645 if (ret == IRQ_REMAP_XAPIC_MODE) {
1646 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1647 goto skip_x2apic;
1648 }
1649
1650 x2apic_enabled = 1;
1651
1652 if (x2apic_supported() && !x2apic_mode) {
1653 x2apic_mode = 1;
1654 enable_x2apic();
1655 pr_info("Enabled x2apic\n");
1656 }
1657
1658skip_x2apic:
1659 if (ret < 0) /* IR enabling failed */
1660 restore_ioapic_entries(); 1662 restore_ioapic_entries();
1661 legacy_pic->restore_mask(); 1663 legacy_pic->restore_mask();
1662 local_irq_restore(flags); 1664 local_irq_restore(flags);
@@ -1847,82 +1849,8 @@ void __init register_lapic_address(unsigned long address)
1847 } 1849 }
1848} 1850}
1849 1851
1850/*
1851 * This initializes the IO-APIC and APIC hardware if this is
1852 * a UP kernel.
1853 */
1854int apic_version[MAX_LOCAL_APIC]; 1852int apic_version[MAX_LOCAL_APIC];
1855 1853
1856int __init APIC_init_uniprocessor(void)
1857{
1858 if (disable_apic) {
1859 pr_info("Apic disabled\n");
1860 return -1;
1861 }
1862#ifdef CONFIG_X86_64
1863 if (!cpu_has_apic) {
1864 disable_apic = 1;
1865 pr_info("Apic disabled by BIOS\n");
1866 return -1;
1867 }
1868#else
1869 if (!smp_found_config && !cpu_has_apic)
1870 return -1;
1871
1872 /*
1873 * Complain if the BIOS pretends there is one.
1874 */
1875 if (!cpu_has_apic &&
1876 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1877 pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
1878 boot_cpu_physical_apicid);
1879 return -1;
1880 }
1881#endif
1882
1883 default_setup_apic_routing();
1884
1885 verify_local_APIC();
1886 connect_bsp_APIC();
1887
1888#ifdef CONFIG_X86_64
1889 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1890#else
1891 /*
1892 * Hack: In case of kdump, after a crash, kernel might be booting
1893 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1894 * might be zero if read from MP tables. Get it from LAPIC.
1895 */
1896# ifdef CONFIG_CRASH_DUMP
1897 boot_cpu_physical_apicid = read_apic_id();
1898# endif
1899#endif
1900 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1901 setup_local_APIC();
1902
1903#ifdef CONFIG_X86_IO_APIC
1904 /*
1905 * Now enable IO-APICs, actually call clear_IO_APIC
1906 * We need clear_IO_APIC before enabling error vector
1907 */
1908 if (!skip_ioapic_setup && nr_ioapics)
1909 enable_IO_APIC();
1910#endif
1911
1912 bsp_end_local_APIC_setup();
1913
1914#ifdef CONFIG_X86_IO_APIC
1915 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1916 setup_IO_APIC();
1917 else {
1918 nr_ioapics = 0;
1919 }
1920#endif
1921
1922 x86_init.timers.setup_percpu_clockev();
1923 return 0;
1924}
1925
1926/* 1854/*
1927 * Local APIC interrupts 1855 * Local APIC interrupts
1928 */ 1856 */
@@ -2027,7 +1955,7 @@ __visible void smp_trace_error_interrupt(struct pt_regs *regs)
2027/** 1955/**
2028 * connect_bsp_APIC - attach the APIC to the interrupt system 1956 * connect_bsp_APIC - attach the APIC to the interrupt system
2029 */ 1957 */
2030void __init connect_bsp_APIC(void) 1958static void __init connect_bsp_APIC(void)
2031{ 1959{
2032#ifdef CONFIG_X86_32 1960#ifdef CONFIG_X86_32
2033 if (pic_mode) { 1961 if (pic_mode) {
@@ -2274,6 +2202,100 @@ void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
2274 } 2202 }
2275} 2203}
2276 2204
2205static void __init apic_bsp_up_setup(void)
2206{
2207#ifdef CONFIG_X86_64
2208 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
2209#else
2210 /*
2211 * Hack: In case of kdump, after a crash, kernel might be booting
2212 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
2213 * might be zero if read from MP tables. Get it from LAPIC.
2214 */
2215# ifdef CONFIG_CRASH_DUMP
2216 boot_cpu_physical_apicid = read_apic_id();
2217# endif
2218#endif
2219 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
2220}
2221
2222/**
2223 * apic_bsp_setup - Setup function for local apic and io-apic
2224 * @upmode: Force UP mode (for APIC_init_uniprocessor)
2225 *
2226 * Returns:
2227 * apic_id of BSP APIC
2228 */
2229int __init apic_bsp_setup(bool upmode)
2230{
2231 int id;
2232
2233 connect_bsp_APIC();
2234 if (upmode)
2235 apic_bsp_up_setup();
2236 setup_local_APIC();
2237
2238 if (x2apic_mode)
2239 id = apic_read(APIC_LDR);
2240 else
2241 id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
2242
2243 enable_IO_APIC();
2244 end_local_APIC_setup();
2245 irq_remap_enable_fault_handling();
2246 setup_IO_APIC();
2247 /* Setup local timer */
2248 x86_init.timers.setup_percpu_clockev();
2249 return id;
2250}
2251
2252/*
2253 * This initializes the IO-APIC and APIC hardware if this is
2254 * a UP kernel.
2255 */
2256int __init APIC_init_uniprocessor(void)
2257{
2258 if (disable_apic) {
2259 pr_info("Apic disabled\n");
2260 return -1;
2261 }
2262#ifdef CONFIG_X86_64
2263 if (!cpu_has_apic) {
2264 disable_apic = 1;
2265 pr_info("Apic disabled by BIOS\n");
2266 return -1;
2267 }
2268#else
2269 if (!smp_found_config && !cpu_has_apic)
2270 return -1;
2271
2272 /*
2273 * Complain if the BIOS pretends there is one.
2274 */
2275 if (!cpu_has_apic &&
2276 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
2277 pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
2278 boot_cpu_physical_apicid);
2279 return -1;
2280 }
2281#endif
2282
2283 if (!smp_found_config)
2284 disable_ioapic_support();
2285
2286 default_setup_apic_routing();
2287 verify_local_APIC();
2288 apic_bsp_setup(true);
2289 return 0;
2290}
2291
2292#ifdef CONFIG_UP_LATE_INIT
2293void __init up_late_init(void)
2294{
2295 APIC_init_uniprocessor();
2296}
2297#endif
2298
2277/* 2299/*
2278 * Power management 2300 * Power management
2279 */ 2301 */
@@ -2359,9 +2381,9 @@ static void lapic_resume(void)
2359 mask_ioapic_entries(); 2381 mask_ioapic_entries();
2360 legacy_pic->mask_all(); 2382 legacy_pic->mask_all();
2361 2383
2362 if (x2apic_mode) 2384 if (x2apic_mode) {
2363 enable_x2apic(); 2385 __x2apic_enable();
2364 else { 2386 } else {
2365 /* 2387 /*
2366 * Make sure the APICBASE points to the right address 2388 * Make sure the APICBASE points to the right address
2367 * 2389 *
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3f5f60406ab1..f4dc2462a1ac 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1507,7 +1507,10 @@ void __init enable_IO_APIC(void)
1507 int i8259_apic, i8259_pin; 1507 int i8259_apic, i8259_pin;
1508 int apic, pin; 1508 int apic, pin;
1509 1509
1510 if (!nr_legacy_irqs()) 1510 if (skip_ioapic_setup)
1511 nr_ioapics = 0;
1512
1513 if (!nr_legacy_irqs() || !nr_ioapics)
1511 return; 1514 return;
1512 1515
1513 for_each_ioapic_pin(apic, pin) { 1516 for_each_ioapic_pin(apic, pin) {
@@ -2295,7 +2298,7 @@ static inline void __init check_timer(void)
2295 } 2298 }
2296 local_irq_disable(); 2299 local_irq_disable();
2297 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 2300 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2298 if (x2apic_preenabled) 2301 if (apic_is_x2apic_enabled())
2299 apic_printk(APIC_QUIET, KERN_INFO 2302 apic_printk(APIC_QUIET, KERN_INFO
2300 "Perhaps problem with the pre-enabled x2apic mode\n" 2303 "Perhaps problem with the pre-enabled x2apic mode\n"
2301 "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); 2304 "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
@@ -2373,9 +2376,9 @@ void __init setup_IO_APIC(void)
2373{ 2376{
2374 int ioapic; 2377 int ioapic;
2375 2378
2376 /* 2379 if (skip_ioapic_setup || !nr_ioapics)
2377 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 2380 return;
2378 */ 2381
2379 io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL; 2382 io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
2380 2383
2381 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 2384 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 15c5df92f74e..a220239cea65 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -869,3 +869,22 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
869 869
870 return false; 870 return false;
871} 871}
872
873void set_dr_addr_mask(unsigned long mask, int dr)
874{
875 if (!cpu_has_bpext)
876 return;
877
878 switch (dr) {
879 case 0:
880 wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
881 break;
882 case 1:
883 case 2:
884 case 3:
885 wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
886 break;
887 default:
888 break;
889 }
890}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c6049650c093..b15bffcaba6d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -491,17 +491,18 @@ u16 __read_mostly tlb_lld_2m[NR_INFO];
491u16 __read_mostly tlb_lld_4m[NR_INFO]; 491u16 __read_mostly tlb_lld_4m[NR_INFO];
492u16 __read_mostly tlb_lld_1g[NR_INFO]; 492u16 __read_mostly tlb_lld_1g[NR_INFO];
493 493
494void cpu_detect_tlb(struct cpuinfo_x86 *c) 494static void cpu_detect_tlb(struct cpuinfo_x86 *c)
495{ 495{
496 if (this_cpu->c_detect_tlb) 496 if (this_cpu->c_detect_tlb)
497 this_cpu->c_detect_tlb(c); 497 this_cpu->c_detect_tlb(c);
498 498
499 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" 499 pr_info("Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n",
500 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
501 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], 500 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
502 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], 501 tlb_lli_4m[ENTRIES]);
503 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], 502
504 tlb_lld_1g[ENTRIES]); 503 pr_info("Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
504 tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES],
505 tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]);
505} 506}
506 507
507void detect_ht(struct cpuinfo_x86 *c) 508void detect_ht(struct cpuinfo_x86 *c)
@@ -1332,7 +1333,7 @@ void cpu_init(void)
1332 barrier(); 1333 barrier();
1333 1334
1334 x86_configure_nx(); 1335 x86_configure_nx();
1335 enable_x2apic(); 1336 x2apic_setup();
1336 1337
1337 /* 1338 /*
1338 * set up and load the per-CPU TSS 1339 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9cc6b6f25f42..94d7dcb12145 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -487,10 +487,8 @@ static void init_intel(struct cpuinfo_x86 *c)
487 487
488 rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); 488 rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
489 if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) { 489 if ((epb & 0xF) == ENERGY_PERF_BIAS_PERFORMANCE) {
490 printk_once(KERN_WARNING "ENERGY_PERF_BIAS:" 490 pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
491 " Set to 'normal', was 'performance'\n" 491 pr_warn_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
492 "ENERGY_PERF_BIAS: View and update with"
493 " x86_energy_perf_policy(8)\n");
494 epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL; 492 epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
495 wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb); 493 wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
496 } 494 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d2c611699cd9..cdfed7953963 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,7 @@
43#include <linux/export.h> 43#include <linux/export.h>
44 44
45#include <asm/processor.h> 45#include <asm/processor.h>
46#include <asm/traps.h>
46#include <asm/mce.h> 47#include <asm/mce.h>
47#include <asm/msr.h> 48#include <asm/msr.h>
48 49
@@ -115,7 +116,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
115 * CPU/chipset specific EDAC code can register a notifier call here to print 116 * CPU/chipset specific EDAC code can register a notifier call here to print
116 * MCE errors in a human-readable form. 117 * MCE errors in a human-readable form.
117 */ 118 */
118ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); 119static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
119 120
120/* Do initial initialization of a struct mce */ 121/* Do initial initialization of a struct mce */
121void mce_setup(struct mce *m) 122void mce_setup(struct mce *m)
@@ -311,7 +312,7 @@ static void wait_for_panic(void)
311 panic("Panicing machine check CPU died"); 312 panic("Panicing machine check CPU died");
312} 313}
313 314
314static void mce_panic(char *msg, struct mce *final, char *exp) 315static void mce_panic(const char *msg, struct mce *final, char *exp)
315{ 316{
316 int i, apei_err = 0; 317 int i, apei_err = 0;
317 318
@@ -529,7 +530,7 @@ static void mce_schedule_work(void)
529 schedule_work(this_cpu_ptr(&mce_work)); 530 schedule_work(this_cpu_ptr(&mce_work));
530} 531}
531 532
532DEFINE_PER_CPU(struct irq_work, mce_irq_work); 533static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
533 534
534static void mce_irq_work_cb(struct irq_work *entry) 535static void mce_irq_work_cb(struct irq_work *entry)
535{ 536{
@@ -735,7 +736,7 @@ static atomic_t mce_callin;
735/* 736/*
736 * Check if a timeout waiting for other CPUs happened. 737 * Check if a timeout waiting for other CPUs happened.
737 */ 738 */
738static int mce_timed_out(u64 *t) 739static int mce_timed_out(u64 *t, const char *msg)
739{ 740{
740 /* 741 /*
741 * The others already did panic for some reason. 742 * The others already did panic for some reason.
@@ -750,8 +751,7 @@ static int mce_timed_out(u64 *t)
750 goto out; 751 goto out;
751 if ((s64)*t < SPINUNIT) { 752 if ((s64)*t < SPINUNIT) {
752 if (mca_cfg.tolerant <= 1) 753 if (mca_cfg.tolerant <= 1)
753 mce_panic("Timeout synchronizing machine check over CPUs", 754 mce_panic(msg, NULL, NULL);
754 NULL, NULL);
755 cpu_missing = 1; 755 cpu_missing = 1;
756 return 1; 756 return 1;
757 } 757 }
@@ -867,7 +867,8 @@ static int mce_start(int *no_way_out)
867 * Wait for everyone. 867 * Wait for everyone.
868 */ 868 */
869 while (atomic_read(&mce_callin) != cpus) { 869 while (atomic_read(&mce_callin) != cpus) {
870 if (mce_timed_out(&timeout)) { 870 if (mce_timed_out(&timeout,
871 "Timeout: Not all CPUs entered broadcast exception handler")) {
871 atomic_set(&global_nwo, 0); 872 atomic_set(&global_nwo, 0);
872 return -1; 873 return -1;
873 } 874 }
@@ -892,7 +893,8 @@ static int mce_start(int *no_way_out)
892 * only seen by one CPU before cleared, avoiding duplicates. 893 * only seen by one CPU before cleared, avoiding duplicates.
893 */ 894 */
894 while (atomic_read(&mce_executing) < order) { 895 while (atomic_read(&mce_executing) < order) {
895 if (mce_timed_out(&timeout)) { 896 if (mce_timed_out(&timeout,
897 "Timeout: Subject CPUs unable to finish machine check processing")) {
896 atomic_set(&global_nwo, 0); 898 atomic_set(&global_nwo, 0);
897 return -1; 899 return -1;
898 } 900 }
@@ -936,7 +938,8 @@ static int mce_end(int order)
936 * loops. 938 * loops.
937 */ 939 */
938 while (atomic_read(&mce_executing) <= cpus) { 940 while (atomic_read(&mce_executing) <= cpus) {
939 if (mce_timed_out(&timeout)) 941 if (mce_timed_out(&timeout,
942 "Timeout: Monarch CPU unable to finish machine check processing"))
940 goto reset; 943 goto reset;
941 ndelay(SPINUNIT); 944 ndelay(SPINUNIT);
942 } 945 }
@@ -949,7 +952,8 @@ static int mce_end(int order)
949 * Subject: Wait for Monarch to finish. 952 * Subject: Wait for Monarch to finish.
950 */ 953 */
951 while (atomic_read(&mce_executing) != 0) { 954 while (atomic_read(&mce_executing) != 0) {
952 if (mce_timed_out(&timeout)) 955 if (mce_timed_out(&timeout,
956 "Timeout: Monarch CPU did not finish machine check processing"))
953 goto reset; 957 goto reset;
954 ndelay(SPINUNIT); 958 ndelay(SPINUNIT);
955 } 959 }
@@ -1003,51 +1007,6 @@ static void mce_clear_state(unsigned long *toclear)
1003} 1007}
1004 1008
1005/* 1009/*
1006 * Need to save faulting physical address associated with a process
1007 * in the machine check handler some place where we can grab it back
1008 * later in mce_notify_process()
1009 */
1010#define MCE_INFO_MAX 16
1011
1012struct mce_info {
1013 atomic_t inuse;
1014 struct task_struct *t;
1015 __u64 paddr;
1016 int restartable;
1017} mce_info[MCE_INFO_MAX];
1018
1019static void mce_save_info(__u64 addr, int c)
1020{
1021 struct mce_info *mi;
1022
1023 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
1024 if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
1025 mi->t = current;
1026 mi->paddr = addr;
1027 mi->restartable = c;
1028 return;
1029 }
1030 }
1031
1032 mce_panic("Too many concurrent recoverable errors", NULL, NULL);
1033}
1034
1035static struct mce_info *mce_find_info(void)
1036{
1037 struct mce_info *mi;
1038
1039 for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
1040 if (atomic_read(&mi->inuse) && mi->t == current)
1041 return mi;
1042 return NULL;
1043}
1044
1045static void mce_clear_info(struct mce_info *mi)
1046{
1047 atomic_set(&mi->inuse, 0);
1048}
1049
1050/*
1051 * The actual machine check handler. This only handles real 1010 * The actual machine check handler. This only handles real
1052 * exceptions when something got corrupted coming in through int 18. 1011 * exceptions when something got corrupted coming in through int 18.
1053 * 1012 *
@@ -1063,6 +1022,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1063{ 1022{
1064 struct mca_config *cfg = &mca_cfg; 1023 struct mca_config *cfg = &mca_cfg;
1065 struct mce m, *final; 1024 struct mce m, *final;
1025 enum ctx_state prev_state;
1066 int i; 1026 int i;
1067 int worst = 0; 1027 int worst = 0;
1068 int severity; 1028 int severity;
@@ -1084,6 +1044,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1084 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 1044 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1085 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); 1045 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1086 char *msg = "Unknown"; 1046 char *msg = "Unknown";
1047 u64 recover_paddr = ~0ull;
1048 int flags = MF_ACTION_REQUIRED;
1049
1050 prev_state = ist_enter(regs);
1087 1051
1088 this_cpu_inc(mce_exception_count); 1052 this_cpu_inc(mce_exception_count);
1089 1053
@@ -1203,9 +1167,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1203 if (no_way_out) 1167 if (no_way_out)
1204 mce_panic("Fatal machine check on current CPU", &m, msg); 1168 mce_panic("Fatal machine check on current CPU", &m, msg);
1205 if (worst == MCE_AR_SEVERITY) { 1169 if (worst == MCE_AR_SEVERITY) {
1206 /* schedule action before return to userland */ 1170 recover_paddr = m.addr;
1207 mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV); 1171 if (!(m.mcgstatus & MCG_STATUS_RIPV))
1208 set_thread_flag(TIF_MCE_NOTIFY); 1172 flags |= MF_MUST_KILL;
1209 } else if (kill_it) { 1173 } else if (kill_it) {
1210 force_sig(SIGBUS, current); 1174 force_sig(SIGBUS, current);
1211 } 1175 }
@@ -1216,6 +1180,27 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1216 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); 1180 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1217out: 1181out:
1218 sync_core(); 1182 sync_core();
1183
1184 if (recover_paddr == ~0ull)
1185 goto done;
1186
1187 pr_err("Uncorrected hardware memory error in user-access at %llx",
1188 recover_paddr);
1189 /*
1190 * We must call memory_failure() here even if the current process is
1191 * doomed. We still need to mark the page as poisoned and alert any
1192 * other users of the page.
1193 */
1194 ist_begin_non_atomic(regs);
1195 local_irq_enable();
1196 if (memory_failure(recover_paddr >> PAGE_SHIFT, MCE_VECTOR, flags) < 0) {
1197 pr_err("Memory error not recovered");
1198 force_sig(SIGBUS, current);
1199 }
1200 local_irq_disable();
1201 ist_end_non_atomic();
1202done:
1203 ist_exit(regs, prev_state);
1219} 1204}
1220EXPORT_SYMBOL_GPL(do_machine_check); 1205EXPORT_SYMBOL_GPL(do_machine_check);
1221 1206
@@ -1233,42 +1218,6 @@ int memory_failure(unsigned long pfn, int vector, int flags)
1233#endif 1218#endif
1234 1219
1235/* 1220/*
1236 * Called in process context that interrupted by MCE and marked with
1237 * TIF_MCE_NOTIFY, just before returning to erroneous userland.
1238 * This code is allowed to sleep.
1239 * Attempt possible recovery such as calling the high level VM handler to
1240 * process any corrupted pages, and kill/signal current process if required.
1241 * Action required errors are handled here.
1242 */
1243void mce_notify_process(void)
1244{
1245 unsigned long pfn;
1246 struct mce_info *mi = mce_find_info();
1247 int flags = MF_ACTION_REQUIRED;
1248
1249 if (!mi)
1250 mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
1251 pfn = mi->paddr >> PAGE_SHIFT;
1252
1253 clear_thread_flag(TIF_MCE_NOTIFY);
1254
1255 pr_err("Uncorrected hardware memory error in user-access at %llx",
1256 mi->paddr);
1257 /*
1258 * We must call memory_failure() here even if the current process is
1259 * doomed. We still need to mark the page as poisoned and alert any
1260 * other users of the page.
1261 */
1262 if (!mi->restartable)
1263 flags |= MF_MUST_KILL;
1264 if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
1265 pr_err("Memory error not recovered");
1266 force_sig(SIGBUS, current);
1267 }
1268 mce_clear_info(mi);
1269}
1270
1271/*
1272 * Action optional processing happens here (picking up 1221 * Action optional processing happens here (picking up
1273 * from the list of faulting pages that do_machine_check() 1222 * from the list of faulting pages that do_machine_check()
1274 * placed into the "ring"). 1223 * placed into the "ring").
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index a3042989398c..ec2663a708e4 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -8,6 +8,7 @@
8#include <linux/smp.h> 8#include <linux/smp.h>
9 9
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/traps.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13 14
@@ -17,8 +18,11 @@ int mce_p5_enabled __read_mostly;
17/* Machine check handler for Pentium class Intel CPUs: */ 18/* Machine check handler for Pentium class Intel CPUs: */
18static void pentium_machine_check(struct pt_regs *regs, long error_code) 19static void pentium_machine_check(struct pt_regs *regs, long error_code)
19{ 20{
21 enum ctx_state prev_state;
20 u32 loaddr, hi, lotype; 22 u32 loaddr, hi, lotype;
21 23
24 prev_state = ist_enter(regs);
25
22 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); 26 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
23 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); 27 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
24 28
@@ -33,6 +37,8 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
33 } 37 }
34 38
35 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 39 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
40
41 ist_exit(regs, prev_state);
36} 42}
37 43
38/* Set up machine check reporting for processors with Intel style MCE: */ 44/* Set up machine check reporting for processors with Intel style MCE: */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 7dc5564d0cdf..bd5d46a32210 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -7,14 +7,19 @@
7#include <linux/types.h> 7#include <linux/types.h>
8 8
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/traps.h>
10#include <asm/mce.h> 11#include <asm/mce.h>
11#include <asm/msr.h> 12#include <asm/msr.h>
12 13
13/* Machine check handler for WinChip C6: */ 14/* Machine check handler for WinChip C6: */
14static void winchip_machine_check(struct pt_regs *regs, long error_code) 15static void winchip_machine_check(struct pt_regs *regs, long error_code)
15{ 16{
17 enum ctx_state prev_state = ist_enter(regs);
18
16 printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); 19 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
17 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); 20 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
21
22 ist_exit(regs, prev_state);
18} 23}
19 24
20/* Set up machine check reporting on the Winchip C6 series */ 25/* Set up machine check reporting on the Winchip C6 series */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 15c29096136b..36a83617eb21 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -552,7 +552,7 @@ static int __init microcode_init(void)
552 int error; 552 int error;
553 553
554 if (paravirt_enabled() || dis_ucode_ldr) 554 if (paravirt_enabled() || dis_ucode_ldr)
555 return 0; 555 return -EINVAL;
556 556
557 if (c->x86_vendor == X86_VENDOR_INTEL) 557 if (c->x86_vendor == X86_VENDOR_INTEL)
558 microcode_ops = init_intel_microcode(); 558 microcode_ops = init_intel_microcode();
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 944bf019b74f..498b6d967138 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2431,6 +2431,7 @@ __init int intel_pmu_init(void)
2431 break; 2431 break;
2432 2432
2433 case 55: /* 22nm Atom "Silvermont" */ 2433 case 55: /* 22nm Atom "Silvermont" */
2434 case 76: /* 14nm Atom "Airmont" */
2434 case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ 2435 case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
2435 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, 2436 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
2436 sizeof(hw_cache_event_ids)); 2437 sizeof(hw_cache_event_ids));
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 6e434f8e5fc8..c4bb8b8e5017 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -142,7 +142,7 @@ static inline u64 rapl_scale(u64 v)
142 * or use ldexp(count, -32). 142 * or use ldexp(count, -32).
143 * Watts = Joules/Time delta 143 * Watts = Joules/Time delta
144 */ 144 */
145 return v << (32 - __this_cpu_read(rapl_pmu->hw_unit)); 145 return v << (32 - __this_cpu_read(rapl_pmu)->hw_unit);
146} 146}
147 147
148static u64 rapl_event_update(struct perf_event *event) 148static u64 rapl_event_update(struct perf_event *event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 10b8d3eaaf15..c635b8b49e93 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -840,7 +840,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
840 box->phys_id = phys_id; 840 box->phys_id = phys_id;
841 box->pci_dev = pdev; 841 box->pci_dev = pdev;
842 box->pmu = pmu; 842 box->pmu = pmu;
843 uncore_box_init(box);
844 pci_set_drvdata(pdev, box); 843 pci_set_drvdata(pdev, box);
845 844
846 raw_spin_lock(&uncore_box_lock); 845 raw_spin_lock(&uncore_box_lock);
@@ -1004,10 +1003,8 @@ static int uncore_cpu_starting(int cpu)
1004 pmu = &type->pmus[j]; 1003 pmu = &type->pmus[j];
1005 box = *per_cpu_ptr(pmu->box, cpu); 1004 box = *per_cpu_ptr(pmu->box, cpu);
1006 /* called by uncore_cpu_init? */ 1005 /* called by uncore_cpu_init? */
1007 if (box && box->phys_id >= 0) { 1006 if (box && box->phys_id >= 0)
1008 uncore_box_init(box);
1009 continue; 1007 continue;
1010 }
1011 1008
1012 for_each_online_cpu(k) { 1009 for_each_online_cpu(k) {
1013 exist = *per_cpu_ptr(pmu->box, k); 1010 exist = *per_cpu_ptr(pmu->box, k);
@@ -1023,10 +1020,8 @@ static int uncore_cpu_starting(int cpu)
1023 } 1020 }
1024 } 1021 }
1025 1022
1026 if (box) { 1023 if (box)
1027 box->phys_id = phys_id; 1024 box->phys_id = phys_id;
1028 uncore_box_init(box);
1029 }
1030 } 1025 }
1031 } 1026 }
1032 return 0; 1027 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 863d9b02563e..6c8c1e7e69d8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -257,6 +257,14 @@ static inline int uncore_num_counters(struct intel_uncore_box *box)
257 return box->pmu->type->num_counters; 257 return box->pmu->type->num_counters;
258} 258}
259 259
260static inline void uncore_box_init(struct intel_uncore_box *box)
261{
262 if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
263 if (box->pmu->type->ops->init_box)
264 box->pmu->type->ops->init_box(box);
265 }
266}
267
260static inline void uncore_disable_box(struct intel_uncore_box *box) 268static inline void uncore_disable_box(struct intel_uncore_box *box)
261{ 269{
262 if (box->pmu->type->ops->disable_box) 270 if (box->pmu->type->ops->disable_box)
@@ -265,6 +273,8 @@ static inline void uncore_disable_box(struct intel_uncore_box *box)
265 273
266static inline void uncore_enable_box(struct intel_uncore_box *box) 274static inline void uncore_enable_box(struct intel_uncore_box *box)
267{ 275{
276 uncore_box_init(box);
277
268 if (box->pmu->type->ops->enable_box) 278 if (box->pmu->type->ops->enable_box)
269 box->pmu->type->ops->enable_box(box); 279 box->pmu->type->ops->enable_box(box);
270} 280}
@@ -287,14 +297,6 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box,
287 return box->pmu->type->ops->read_counter(box, event); 297 return box->pmu->type->ops->read_counter(box, event);
288} 298}
289 299
290static inline void uncore_box_init(struct intel_uncore_box *box)
291{
292 if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
293 if (box->pmu->type->ops->init_box)
294 box->pmu->type->ops->init_box(box);
295 }
296}
297
298static inline bool uncore_box_is_fake(struct intel_uncore_box *box) 300static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
299{ 301{
300 return (box->phys_id < 0); 302 return (box->phys_id < 0);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index dd2f07ae9d0c..46201deee923 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -184,9 +184,9 @@ void __init e820_print_map(char *who)
184 * overwritten in the same location, starting at biosmap. 184 * overwritten in the same location, starting at biosmap.
185 * 185 *
186 * The integer pointed to by pnr_map must be valid on entry (the 186 * The integer pointed to by pnr_map must be valid on entry (the
187 * current number of valid entries located at biosmap) and will 187 * current number of valid entries located at biosmap). If the
188 * be updated on return, with the new number of valid entries 188 * sanitizing succeeds the *pnr_map will be updated with the new
189 * (something no more than max_nr_map.) 189 * number of valid entries (something no more than max_nr_map).
190 * 190 *
191 * The return value from sanitize_e820_map() is zero if it 191 * The return value from sanitize_e820_map() is zero if it
192 * successfully 'sanitized' the map entries passed in, and is -1 192 * successfully 'sanitized' the map entries passed in, and is -1
@@ -561,23 +561,15 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
561 561
562void __init update_e820(void) 562void __init update_e820(void)
563{ 563{
564 u32 nr_map; 564 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map))
565
566 nr_map = e820.nr_map;
567 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
568 return; 565 return;
569 e820.nr_map = nr_map;
570 printk(KERN_INFO "e820: modified physical RAM map:\n"); 566 printk(KERN_INFO "e820: modified physical RAM map:\n");
571 e820_print_map("modified"); 567 e820_print_map("modified");
572} 568}
573static void __init update_e820_saved(void) 569static void __init update_e820_saved(void)
574{ 570{
575 u32 nr_map; 571 sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map),
576 572 &e820_saved.nr_map);
577 nr_map = e820_saved.nr_map;
578 if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
579 return;
580 e820_saved.nr_map = nr_map;
581} 573}
582#define MAX_GAP_END 0x100000000ull 574#define MAX_GAP_END 0x100000000ull
583/* 575/*
@@ -898,11 +890,9 @@ early_param("memmap", parse_memmap_opt);
898void __init finish_e820_parsing(void) 890void __init finish_e820_parsing(void)
899{ 891{
900 if (userdef) { 892 if (userdef) {
901 u32 nr = e820.nr_map; 893 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map),
902 894 &e820.nr_map) < 0)
903 if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
904 early_panic("Invalid user supplied memory map"); 895 early_panic("Invalid user supplied memory map");
905 e820.nr_map = nr;
906 896
907 printk(KERN_INFO "e820: user-defined physical RAM map:\n"); 897 printk(KERN_INFO "e820: user-defined physical RAM map:\n");
908 e820_print_map("user"); 898 e820_print_map("user");
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 9ebaf63ba182..db13655c3a2a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -143,7 +143,8 @@ ENDPROC(native_usergs_sysret64)
143 movq \tmp,RSP+\offset(%rsp) 143 movq \tmp,RSP+\offset(%rsp)
144 movq $__USER_DS,SS+\offset(%rsp) 144 movq $__USER_DS,SS+\offset(%rsp)
145 movq $__USER_CS,CS+\offset(%rsp) 145 movq $__USER_CS,CS+\offset(%rsp)
146 movq $-1,RCX+\offset(%rsp) 146 movq RIP+\offset(%rsp),\tmp /* get rip */
147 movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */
147 movq R11+\offset(%rsp),\tmp /* get eflags */ 148 movq R11+\offset(%rsp),\tmp /* get eflags */
148 movq \tmp,EFLAGS+\offset(%rsp) 149 movq \tmp,EFLAGS+\offset(%rsp)
149 .endm 150 .endm
@@ -155,27 +156,6 @@ ENDPROC(native_usergs_sysret64)
155 movq \tmp,R11+\offset(%rsp) 156 movq \tmp,R11+\offset(%rsp)
156 .endm 157 .endm
157 158
158 .macro FAKE_STACK_FRAME child_rip
159 /* push in order ss, rsp, eflags, cs, rip */
160 xorl %eax, %eax
161 pushq_cfi $__KERNEL_DS /* ss */
162 /*CFI_REL_OFFSET ss,0*/
163 pushq_cfi %rax /* rsp */
164 CFI_REL_OFFSET rsp,0
165 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
166 /*CFI_REL_OFFSET rflags,0*/
167 pushq_cfi $__KERNEL_CS /* cs */
168 /*CFI_REL_OFFSET cs,0*/
169 pushq_cfi \child_rip /* rip */
170 CFI_REL_OFFSET rip,0
171 pushq_cfi %rax /* orig rax */
172 .endm
173
174 .macro UNFAKE_STACK_FRAME
175 addq $8*6, %rsp
176 CFI_ADJUST_CFA_OFFSET -(6*8)
177 .endm
178
179/* 159/*
180 * initial frame state for interrupts (and exceptions without error code) 160 * initial frame state for interrupts (and exceptions without error code)
181 */ 161 */
@@ -238,51 +218,6 @@ ENDPROC(native_usergs_sysret64)
238 CFI_REL_OFFSET r15, R15+\offset 218 CFI_REL_OFFSET r15, R15+\offset
239 .endm 219 .endm
240 220
241/* save partial stack frame */
242 .macro SAVE_ARGS_IRQ
243 cld
244 /* start from rbp in pt_regs and jump over */
245 movq_cfi rdi, (RDI-RBP)
246 movq_cfi rsi, (RSI-RBP)
247 movq_cfi rdx, (RDX-RBP)
248 movq_cfi rcx, (RCX-RBP)
249 movq_cfi rax, (RAX-RBP)
250 movq_cfi r8, (R8-RBP)
251 movq_cfi r9, (R9-RBP)
252 movq_cfi r10, (R10-RBP)
253 movq_cfi r11, (R11-RBP)
254
255 /* Save rbp so that we can unwind from get_irq_regs() */
256 movq_cfi rbp, 0
257
258 /* Save previous stack value */
259 movq %rsp, %rsi
260
261 leaq -RBP(%rsp),%rdi /* arg1 for handler */
262 testl $3, CS-RBP(%rsi)
263 je 1f
264 SWAPGS
265 /*
266 * irq_count is used to check if a CPU is already on an interrupt stack
267 * or not. While this is essentially redundant with preempt_count it is
268 * a little cheaper to use a separate counter in the PDA (short of
269 * moving irq_enter into assembly, which would be too much work)
270 */
2711: incl PER_CPU_VAR(irq_count)
272 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
273 CFI_DEF_CFA_REGISTER rsi
274
275 /* Store previous stack value */
276 pushq %rsi
277 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
278 0x77 /* DW_OP_breg7 */, 0, \
279 0x06 /* DW_OP_deref */, \
280 0x08 /* DW_OP_const1u */, SS+8-RBP, \
281 0x22 /* DW_OP_plus */
282 /* We entered an interrupt context - irqs are off: */
283 TRACE_IRQS_OFF
284 .endm
285
286ENTRY(save_paranoid) 221ENTRY(save_paranoid)
287 XCPT_FRAME 1 RDI+8 222 XCPT_FRAME 1 RDI+8
288 cld 223 cld
@@ -426,15 +361,12 @@ system_call_fastpath:
426 * Has incomplete stack frame and undefined top of stack. 361 * Has incomplete stack frame and undefined top of stack.
427 */ 362 */
428ret_from_sys_call: 363ret_from_sys_call:
429 movl $_TIF_ALLWORK_MASK,%edi 364 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
430 /* edi: flagmask */ 365 jnz int_ret_from_sys_call_fixup /* Go the the slow path */
431sysret_check: 366
432 LOCKDEP_SYS_EXIT 367 LOCKDEP_SYS_EXIT
433 DISABLE_INTERRUPTS(CLBR_NONE) 368 DISABLE_INTERRUPTS(CLBR_NONE)
434 TRACE_IRQS_OFF 369 TRACE_IRQS_OFF
435 movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
436 andl %edi,%edx
437 jnz sysret_careful
438 CFI_REMEMBER_STATE 370 CFI_REMEMBER_STATE
439 /* 371 /*
440 * sysretq will re-enable interrupts: 372 * sysretq will re-enable interrupts:
@@ -448,49 +380,10 @@ sysret_check:
448 USERGS_SYSRET64 380 USERGS_SYSRET64
449 381
450 CFI_RESTORE_STATE 382 CFI_RESTORE_STATE
451 /* Handle reschedules */
452 /* edx: work, edi: workmask */
453sysret_careful:
454 bt $TIF_NEED_RESCHED,%edx
455 jnc sysret_signal
456 TRACE_IRQS_ON
457 ENABLE_INTERRUPTS(CLBR_NONE)
458 pushq_cfi %rdi
459 SCHEDULE_USER
460 popq_cfi %rdi
461 jmp sysret_check
462 383
463 /* Handle a signal */ 384int_ret_from_sys_call_fixup:
464sysret_signal:
465 TRACE_IRQS_ON
466 ENABLE_INTERRUPTS(CLBR_NONE)
467#ifdef CONFIG_AUDITSYSCALL
468 bt $TIF_SYSCALL_AUDIT,%edx
469 jc sysret_audit
470#endif
471 /*
472 * We have a signal, or exit tracing or single-step.
473 * These all wind up with the iret return path anyway,
474 * so just join that path right now.
475 */
476 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET 385 FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
477 jmp int_check_syscall_exit_work 386 jmp int_ret_from_sys_call
478
479#ifdef CONFIG_AUDITSYSCALL
480 /*
481 * Return fast path for syscall audit. Call __audit_syscall_exit()
482 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
483 * masked off.
484 */
485sysret_audit:
486 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
487 cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
488 setbe %al /* 1 if so, 0 if not */
489 movzbl %al,%edi /* zero-extend that into %edi */
490 call __audit_syscall_exit
491 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
492 jmp sysret_check
493#endif /* CONFIG_AUDITSYSCALL */
494 387
495 /* Do syscall tracing */ 388 /* Do syscall tracing */
496tracesys: 389tracesys:
@@ -626,19 +519,6 @@ END(\label)
626 FORK_LIKE vfork 519 FORK_LIKE vfork
627 FIXED_FRAME stub_iopl, sys_iopl 520 FIXED_FRAME stub_iopl, sys_iopl
628 521
629ENTRY(ptregscall_common)
630 DEFAULT_FRAME 1 8 /* offset 8: return address */
631 RESTORE_TOP_OF_STACK %r11, 8
632 movq_cfi_restore R15+8, r15
633 movq_cfi_restore R14+8, r14
634 movq_cfi_restore R13+8, r13
635 movq_cfi_restore R12+8, r12
636 movq_cfi_restore RBP+8, rbp
637 movq_cfi_restore RBX+8, rbx
638 ret $REST_SKIP /* pop extended registers */
639 CFI_ENDPROC
640END(ptregscall_common)
641
642ENTRY(stub_execve) 522ENTRY(stub_execve)
643 CFI_STARTPROC 523 CFI_STARTPROC
644 addq $8, %rsp 524 addq $8, %rsp
@@ -779,7 +659,48 @@ END(interrupt)
779 /* reserve pt_regs for scratch regs and rbp */ 659 /* reserve pt_regs for scratch regs and rbp */
780 subq $ORIG_RAX-RBP, %rsp 660 subq $ORIG_RAX-RBP, %rsp
781 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 661 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
782 SAVE_ARGS_IRQ 662 cld
663 /* start from rbp in pt_regs and jump over */
664 movq_cfi rdi, (RDI-RBP)
665 movq_cfi rsi, (RSI-RBP)
666 movq_cfi rdx, (RDX-RBP)
667 movq_cfi rcx, (RCX-RBP)
668 movq_cfi rax, (RAX-RBP)
669 movq_cfi r8, (R8-RBP)
670 movq_cfi r9, (R9-RBP)
671 movq_cfi r10, (R10-RBP)
672 movq_cfi r11, (R11-RBP)
673
674 /* Save rbp so that we can unwind from get_irq_regs() */
675 movq_cfi rbp, 0
676
677 /* Save previous stack value */
678 movq %rsp, %rsi
679
680 leaq -RBP(%rsp),%rdi /* arg1 for handler */
681 testl $3, CS-RBP(%rsi)
682 je 1f
683 SWAPGS
684 /*
685 * irq_count is used to check if a CPU is already on an interrupt stack
686 * or not. While this is essentially redundant with preempt_count it is
687 * a little cheaper to use a separate counter in the PDA (short of
688 * moving irq_enter into assembly, which would be too much work)
689 */
6901: incl PER_CPU_VAR(irq_count)
691 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
692 CFI_DEF_CFA_REGISTER rsi
693
694 /* Store previous stack value */
695 pushq %rsi
696 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
697 0x77 /* DW_OP_breg7 */, 0, \
698 0x06 /* DW_OP_deref */, \
699 0x08 /* DW_OP_const1u */, SS+8-RBP, \
700 0x22 /* DW_OP_plus */
701 /* We entered an interrupt context - irqs are off: */
702 TRACE_IRQS_OFF
703
783 call \func 704 call \func
784 .endm 705 .endm
785 706
@@ -831,6 +752,60 @@ retint_swapgs: /* return to user-space */
831 */ 752 */
832 DISABLE_INTERRUPTS(CLBR_ANY) 753 DISABLE_INTERRUPTS(CLBR_ANY)
833 TRACE_IRQS_IRETQ 754 TRACE_IRQS_IRETQ
755
756 /*
757 * Try to use SYSRET instead of IRET if we're returning to
758 * a completely clean 64-bit userspace context.
759 */
760 movq (RCX-R11)(%rsp), %rcx
761 cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */
762 jne opportunistic_sysret_failed
763
764 /*
765 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
766 * in kernel space. This essentially lets the user take over
767 * the kernel, since userspace controls RSP. It's not worth
768 * testing for canonicalness exactly -- this check detects any
769 * of the 17 high bits set, which is true for non-canonical
770 * or kernel addresses. (This will pessimize vsyscall=native.
771 * Big deal.)
772 *
773 * If virtual addresses ever become wider, this will need
774 * to be updated to remain correct on both old and new CPUs.
775 */
776 .ifne __VIRTUAL_MASK_SHIFT - 47
777 .error "virtual address width changed -- sysret checks need update"
778 .endif
779 shr $__VIRTUAL_MASK_SHIFT, %rcx
780 jnz opportunistic_sysret_failed
781
782 cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */
783 jne opportunistic_sysret_failed
784
785 movq (R11-ARGOFFSET)(%rsp), %r11
786 cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */
787 jne opportunistic_sysret_failed
788
789 testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */
790 jnz opportunistic_sysret_failed
791
792 /* nothing to check for RSP */
793
794 cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */
795 jne opportunistic_sysret_failed
796
797 /*
798 * We win! This label is here just for ease of understanding
799 * perf profiles. Nothing jumps here.
800 */
801irq_return_via_sysret:
802 CFI_REMEMBER_STATE
803 RESTORE_ARGS 1,8,1
804 movq (RSP-RIP)(%rsp),%rsp
805 USERGS_SYSRET64
806 CFI_RESTORE_STATE
807
808opportunistic_sysret_failed:
834 SWAPGS 809 SWAPGS
835 jmp restore_args 810 jmp restore_args
836 811
@@ -1048,6 +1023,11 @@ ENTRY(\sym)
1048 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1023 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1049 1024
1050 .if \paranoid 1025 .if \paranoid
1026 .if \paranoid == 1
1027 CFI_REMEMBER_STATE
1028 testl $3, CS(%rsp) /* If coming from userspace, switch */
1029 jnz 1f /* stacks. */
1030 .endif
1051 call save_paranoid 1031 call save_paranoid
1052 .else 1032 .else
1053 call error_entry 1033 call error_entry
@@ -1088,6 +1068,36 @@ ENTRY(\sym)
1088 jmp error_exit /* %ebx: no swapgs flag */ 1068 jmp error_exit /* %ebx: no swapgs flag */
1089 .endif 1069 .endif
1090 1070
1071 .if \paranoid == 1
1072 CFI_RESTORE_STATE
1073 /*
1074 * Paranoid entry from userspace. Switch stacks and treat it
1075 * as a normal entry. This means that paranoid handlers
1076 * run in real process context if user_mode(regs).
1077 */
10781:
1079 call error_entry
1080
1081 DEFAULT_FRAME 0
1082
1083 movq %rsp,%rdi /* pt_regs pointer */
1084 call sync_regs
1085 movq %rax,%rsp /* switch stack */
1086
1087 movq %rsp,%rdi /* pt_regs pointer */
1088
1089 .if \has_error_code
1090 movq ORIG_RAX(%rsp),%rsi /* get error code */
1091 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
1092 .else
1093 xorl %esi,%esi /* no error code */
1094 .endif
1095
1096 call \do_sym
1097
1098 jmp error_exit /* %ebx: no swapgs flag */
1099 .endif
1100
1091 CFI_ENDPROC 1101 CFI_ENDPROC
1092END(\sym) 1102END(\sym)
1093.endm 1103.endm
@@ -1108,7 +1118,7 @@ idtentry overflow do_overflow has_error_code=0
1108idtentry bounds do_bounds has_error_code=0 1118idtentry bounds do_bounds has_error_code=0
1109idtentry invalid_op do_invalid_op has_error_code=0 1119idtentry invalid_op do_invalid_op has_error_code=0
1110idtentry device_not_available do_device_not_available has_error_code=0 1120idtentry device_not_available do_device_not_available has_error_code=0
1111idtentry double_fault do_double_fault has_error_code=1 paranoid=1 1121idtentry double_fault do_double_fault has_error_code=1 paranoid=2
1112idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1122idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1113idtentry invalid_TSS do_invalid_TSS has_error_code=1 1123idtentry invalid_TSS do_invalid_TSS has_error_code=1
1114idtentry segment_not_present do_segment_not_present has_error_code=1 1124idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1289,16 +1299,14 @@ idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(
1289#endif 1299#endif
1290 1300
1291 /* 1301 /*
1292 * "Paranoid" exit path from exception stack. 1302 * "Paranoid" exit path from exception stack. This is invoked
1293 * Paranoid because this is used by NMIs and cannot take 1303 * only on return from non-NMI IST interrupts that came
1294 * any kernel state for granted. 1304 * from kernel space.
1295 * We don't do kernel preemption checks here, because only
1296 * NMI should be common and it does not enable IRQs and
1297 * cannot get reschedule ticks.
1298 * 1305 *
1299 * "trace" is 0 for the NMI handler only, because irq-tracing 1306 * We may be returning to very strange contexts (e.g. very early
1300 * is fundamentally NMI-unsafe. (we cannot change the soft and 1307 * in syscall entry), so checking for preemption here would
1301 * hard flags at once, atomically) 1308 * be complicated. Fortunately, we there's no good reason
1309 * to try to handle preemption here.
1302 */ 1310 */
1303 1311
1304 /* ebx: no swapgs flag */ 1312 /* ebx: no swapgs flag */
@@ -1308,43 +1316,14 @@ ENTRY(paranoid_exit)
1308 TRACE_IRQS_OFF_DEBUG 1316 TRACE_IRQS_OFF_DEBUG
1309 testl %ebx,%ebx /* swapgs needed? */ 1317 testl %ebx,%ebx /* swapgs needed? */
1310 jnz paranoid_restore 1318 jnz paranoid_restore
1311 testl $3,CS(%rsp)
1312 jnz paranoid_userspace
1313paranoid_swapgs:
1314 TRACE_IRQS_IRETQ 0 1319 TRACE_IRQS_IRETQ 0
1315 SWAPGS_UNSAFE_STACK 1320 SWAPGS_UNSAFE_STACK
1316 RESTORE_ALL 8 1321 RESTORE_ALL 8
1317 jmp irq_return 1322 INTERRUPT_RETURN
1318paranoid_restore: 1323paranoid_restore:
1319 TRACE_IRQS_IRETQ_DEBUG 0 1324 TRACE_IRQS_IRETQ_DEBUG 0
1320 RESTORE_ALL 8 1325 RESTORE_ALL 8
1321 jmp irq_return 1326 INTERRUPT_RETURN
1322paranoid_userspace:
1323 GET_THREAD_INFO(%rcx)
1324 movl TI_flags(%rcx),%ebx
1325 andl $_TIF_WORK_MASK,%ebx
1326 jz paranoid_swapgs
1327 movq %rsp,%rdi /* &pt_regs */
1328 call sync_regs
1329 movq %rax,%rsp /* switch stack for scheduling */
1330 testl $_TIF_NEED_RESCHED,%ebx
1331 jnz paranoid_schedule
1332 movl %ebx,%edx /* arg3: thread flags */
1333 TRACE_IRQS_ON
1334 ENABLE_INTERRUPTS(CLBR_NONE)
1335 xorl %esi,%esi /* arg2: oldset */
1336 movq %rsp,%rdi /* arg1: &pt_regs */
1337 call do_notify_resume
1338 DISABLE_INTERRUPTS(CLBR_NONE)
1339 TRACE_IRQS_OFF
1340 jmp paranoid_userspace
1341paranoid_schedule:
1342 TRACE_IRQS_ON
1343 ENABLE_INTERRUPTS(CLBR_ANY)
1344 SCHEDULE_USER
1345 DISABLE_INTERRUPTS(CLBR_ANY)
1346 TRACE_IRQS_OFF
1347 jmp paranoid_userspace
1348 CFI_ENDPROC 1327 CFI_ENDPROC
1349END(paranoid_exit) 1328END(paranoid_exit)
1350 1329
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 3d5fb509bdeb..7114ba220fd4 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -126,6 +126,8 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
126 *dr7 |= encode_dr7(i, info->len, info->type); 126 *dr7 |= encode_dr7(i, info->len, info->type);
127 127
128 set_debugreg(*dr7, 7); 128 set_debugreg(*dr7, 7);
129 if (info->mask)
130 set_dr_addr_mask(info->mask, i);
129 131
130 return 0; 132 return 0;
131} 133}
@@ -161,29 +163,8 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
161 *dr7 &= ~__encode_dr7(i, info->len, info->type); 163 *dr7 &= ~__encode_dr7(i, info->len, info->type);
162 164
163 set_debugreg(*dr7, 7); 165 set_debugreg(*dr7, 7);
164} 166 if (info->mask)
165 167 set_dr_addr_mask(0, i);
166static int get_hbp_len(u8 hbp_len)
167{
168 unsigned int len_in_bytes = 0;
169
170 switch (hbp_len) {
171 case X86_BREAKPOINT_LEN_1:
172 len_in_bytes = 1;
173 break;
174 case X86_BREAKPOINT_LEN_2:
175 len_in_bytes = 2;
176 break;
177 case X86_BREAKPOINT_LEN_4:
178 len_in_bytes = 4;
179 break;
180#ifdef CONFIG_X86_64
181 case X86_BREAKPOINT_LEN_8:
182 len_in_bytes = 8;
183 break;
184#endif
185 }
186 return len_in_bytes;
187} 168}
188 169
189/* 170/*
@@ -196,7 +177,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
196 struct arch_hw_breakpoint *info = counter_arch_bp(bp); 177 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
197 178
198 va = info->address; 179 va = info->address;
199 len = get_hbp_len(info->len); 180 len = bp->attr.bp_len;
200 181
201 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); 182 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
202} 183}
@@ -277,6 +258,8 @@ static int arch_build_bp_info(struct perf_event *bp)
277 } 258 }
278 259
279 /* Len */ 260 /* Len */
261 info->mask = 0;
262
280 switch (bp->attr.bp_len) { 263 switch (bp->attr.bp_len) {
281 case HW_BREAKPOINT_LEN_1: 264 case HW_BREAKPOINT_LEN_1:
282 info->len = X86_BREAKPOINT_LEN_1; 265 info->len = X86_BREAKPOINT_LEN_1;
@@ -293,11 +276,17 @@ static int arch_build_bp_info(struct perf_event *bp)
293 break; 276 break;
294#endif 277#endif
295 default: 278 default:
296 return -EINVAL; 279 if (!is_power_of_2(bp->attr.bp_len))
280 return -EINVAL;
281 if (!cpu_has_bpext)
282 return -EOPNOTSUPP;
283 info->mask = bp->attr.bp_len - 1;
284 info->len = X86_BREAKPOINT_LEN_1;
297 } 285 }
298 286
299 return 0; 287 return 0;
300} 288}
289
301/* 290/*
302 * Validate the arch-specific HW Breakpoint register settings 291 * Validate the arch-specific HW Breakpoint register settings
303 */ 292 */
@@ -312,11 +301,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
312 if (ret) 301 if (ret)
313 return ret; 302 return ret;
314 303
315 ret = -EINVAL;
316
317 switch (info->len) { 304 switch (info->len) {
318 case X86_BREAKPOINT_LEN_1: 305 case X86_BREAKPOINT_LEN_1:
319 align = 0; 306 align = 0;
307 if (info->mask)
308 align = info->mask;
320 break; 309 break;
321 case X86_BREAKPOINT_LEN_2: 310 case X86_BREAKPOINT_LEN_2:
322 align = 1; 311 align = 1;
@@ -330,7 +319,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
330 break; 319 break;
331#endif 320#endif
332 default: 321 default:
333 return ret; 322 WARN_ON_ONCE(1);
334 } 323 }
335 324
336 /* 325 /*
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index a9a4229f6161..81049ffab2d6 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -19,6 +19,19 @@
19#include <asm/fpu-internal.h> 19#include <asm/fpu-internal.h>
20#include <asm/user.h> 20#include <asm/user.h>
21 21
22static DEFINE_PER_CPU(bool, in_kernel_fpu);
23
24void kernel_fpu_disable(void)
25{
26 WARN_ON(this_cpu_read(in_kernel_fpu));
27 this_cpu_write(in_kernel_fpu, true);
28}
29
30void kernel_fpu_enable(void)
31{
32 this_cpu_write(in_kernel_fpu, false);
33}
34
22/* 35/*
23 * Were we in an interrupt that interrupted kernel mode? 36 * Were we in an interrupt that interrupted kernel mode?
24 * 37 *
@@ -33,6 +46,9 @@
33 */ 46 */
34static inline bool interrupted_kernel_fpu_idle(void) 47static inline bool interrupted_kernel_fpu_idle(void)
35{ 48{
49 if (this_cpu_read(in_kernel_fpu))
50 return false;
51
36 if (use_eager_fpu()) 52 if (use_eager_fpu())
37 return __thread_has_fpu(current); 53 return __thread_has_fpu(current);
38 54
@@ -73,10 +89,10 @@ void __kernel_fpu_begin(void)
73{ 89{
74 struct task_struct *me = current; 90 struct task_struct *me = current;
75 91
92 this_cpu_write(in_kernel_fpu, true);
93
76 if (__thread_has_fpu(me)) { 94 if (__thread_has_fpu(me)) {
77 __thread_clear_has_fpu(me);
78 __save_init_fpu(me); 95 __save_init_fpu(me);
79 /* We do 'stts()' in __kernel_fpu_end() */
80 } else if (!use_eager_fpu()) { 96 } else if (!use_eager_fpu()) {
81 this_cpu_write(fpu_owner_task, NULL); 97 this_cpu_write(fpu_owner_task, NULL);
82 clts(); 98 clts();
@@ -86,19 +102,16 @@ EXPORT_SYMBOL(__kernel_fpu_begin);
86 102
87void __kernel_fpu_end(void) 103void __kernel_fpu_end(void)
88{ 104{
89 if (use_eager_fpu()) { 105 struct task_struct *me = current;
90 /* 106
91 * For eager fpu, most the time, tsk_used_math() is true. 107 if (__thread_has_fpu(me)) {
92 * Restore the user math as we are done with the kernel usage. 108 if (WARN_ON(restore_fpu_checking(me)))
93 * At few instances during thread exit, signal handling etc, 109 drop_init_fpu(me);
94 * tsk_used_math() is false. Those few places will take proper 110 } else if (!use_eager_fpu()) {
95 * actions, so we don't need to restore the math here.
96 */
97 if (likely(tsk_used_math(current)))
98 math_state_restore();
99 } else {
100 stts(); 111 stts();
101 } 112 }
113
114 this_cpu_write(in_kernel_fpu, false);
102} 115}
103EXPORT_SYMBOL(__kernel_fpu_end); 116EXPORT_SYMBOL(__kernel_fpu_end);
104 117
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 63ce838e5a54..28d28f5eb8f4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -69,16 +69,9 @@ static void call_on_stack(void *func, void *stack)
69 : "memory", "cc", "edx", "ecx", "eax"); 69 : "memory", "cc", "edx", "ecx", "eax");
70} 70}
71 71
72/* how to get the current stack pointer from C */
73#define current_stack_pointer ({ \
74 unsigned long sp; \
75 asm("mov %%esp,%0" : "=g" (sp)); \
76 sp; \
77})
78
79static inline void *current_stack(void) 72static inline void *current_stack(void)
80{ 73{
81 return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); 74 return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
82} 75}
83 76
84static inline int 77static inline int
@@ -103,7 +96,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
103 96
104 /* Save the next esp at the bottom of the stack */ 97 /* Save the next esp at the bottom of the stack */
105 prev_esp = (u32 *)irqstk; 98 prev_esp = (u32 *)irqstk;
106 *prev_esp = current_stack_pointer; 99 *prev_esp = current_stack_pointer();
107 100
108 if (unlikely(overflow)) 101 if (unlikely(overflow))
109 call_on_stack(print_stack_overflow, isp); 102 call_on_stack(print_stack_overflow, isp);
@@ -156,7 +149,7 @@ void do_softirq_own_stack(void)
156 149
157 /* Push the previous esp onto the stack */ 150 /* Push the previous esp onto the stack */
158 prev_esp = (u32 *)irqstk; 151 prev_esp = (u32 *)irqstk;
159 *prev_esp = current_stack_pointer; 152 *prev_esp = current_stack_pointer();
160 153
161 call_on_stack(__do_softirq, isp); 154 call_on_stack(__do_softirq, isp);
162} 155}
diff --git a/arch/x86/kernel/pmc_atom.c b/arch/x86/kernel/pmc_atom.c
index 0ee5025e0fa4..d66a4fe6caee 100644
--- a/arch/x86/kernel/pmc_atom.c
+++ b/arch/x86/kernel/pmc_atom.c
@@ -25,8 +25,6 @@
25 25
26#include <asm/pmc_atom.h> 26#include <asm/pmc_atom.h>
27 27
28#define DRIVER_NAME KBUILD_MODNAME
29
30struct pmc_dev { 28struct pmc_dev {
31 u32 base_addr; 29 u32 base_addr;
32 void __iomem *regmap; 30 void __iomem *regmap;
@@ -38,12 +36,12 @@ struct pmc_dev {
38static struct pmc_dev pmc_device; 36static struct pmc_dev pmc_device;
39static u32 acpi_base_addr; 37static u32 acpi_base_addr;
40 38
41struct pmc_dev_map { 39struct pmc_bit_map {
42 const char *name; 40 const char *name;
43 u32 bit_mask; 41 u32 bit_mask;
44}; 42};
45 43
46static const struct pmc_dev_map dev_map[] = { 44static const struct pmc_bit_map dev_map[] = {
47 {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, 45 {"0 - LPSS1_F0_DMA", BIT_LPSS1_F0_DMA},
48 {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, 46 {"1 - LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1},
49 {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, 47 {"2 - LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2},
@@ -82,6 +80,27 @@ static const struct pmc_dev_map dev_map[] = {
82 {"35 - DFX", BIT_DFX}, 80 {"35 - DFX", BIT_DFX},
83}; 81};
84 82
83static const struct pmc_bit_map pss_map[] = {
84 {"0 - GBE", PMC_PSS_BIT_GBE},
85 {"1 - SATA", PMC_PSS_BIT_SATA},
86 {"2 - HDA", PMC_PSS_BIT_HDA},
87 {"3 - SEC", PMC_PSS_BIT_SEC},
88 {"4 - PCIE", PMC_PSS_BIT_PCIE},
89 {"5 - LPSS", PMC_PSS_BIT_LPSS},
90 {"6 - LPE", PMC_PSS_BIT_LPE},
91 {"7 - DFX", PMC_PSS_BIT_DFX},
92 {"8 - USH_CTRL", PMC_PSS_BIT_USH_CTRL},
93 {"9 - USH_SUS", PMC_PSS_BIT_USH_SUS},
94 {"10 - USH_VCCS", PMC_PSS_BIT_USH_VCCS},
95 {"11 - USH_VCCA", PMC_PSS_BIT_USH_VCCA},
96 {"12 - OTG_CTRL", PMC_PSS_BIT_OTG_CTRL},
97 {"13 - OTG_VCCS", PMC_PSS_BIT_OTG_VCCS},
98 {"14 - OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK},
99 {"15 - OTG_VCCA", PMC_PSS_BIT_OTG_VCCA},
100 {"16 - USB", PMC_PSS_BIT_USB},
101 {"17 - USB_SUS", PMC_PSS_BIT_USB_SUS},
102};
103
85static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) 104static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset)
86{ 105{
87 return readl(pmc->regmap + reg_offset); 106 return readl(pmc->regmap + reg_offset);
@@ -169,6 +188,32 @@ static const struct file_operations pmc_dev_state_ops = {
169 .release = single_release, 188 .release = single_release,
170}; 189};
171 190
191static int pmc_pss_state_show(struct seq_file *s, void *unused)
192{
193 struct pmc_dev *pmc = s->private;
194 u32 pss = pmc_reg_read(pmc, PMC_PSS);
195 int pss_index;
196
197 for (pss_index = 0; pss_index < ARRAY_SIZE(pss_map); pss_index++) {
198 seq_printf(s, "Island: %-32s\tState: %s\n",
199 pss_map[pss_index].name,
200 pss_map[pss_index].bit_mask & pss ? "Off" : "On");
201 }
202 return 0;
203}
204
205static int pmc_pss_state_open(struct inode *inode, struct file *file)
206{
207 return single_open(file, pmc_pss_state_show, inode->i_private);
208}
209
210static const struct file_operations pmc_pss_state_ops = {
211 .open = pmc_pss_state_open,
212 .read = seq_read,
213 .llseek = seq_lseek,
214 .release = single_release,
215};
216
172static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) 217static int pmc_sleep_tmr_show(struct seq_file *s, void *unused)
173{ 218{
174 struct pmc_dev *pmc = s->private; 219 struct pmc_dev *pmc = s->private;
@@ -202,11 +247,7 @@ static const struct file_operations pmc_sleep_tmr_ops = {
202 247
203static void pmc_dbgfs_unregister(struct pmc_dev *pmc) 248static void pmc_dbgfs_unregister(struct pmc_dev *pmc)
204{ 249{
205 if (!pmc->dbgfs_dir)
206 return;
207
208 debugfs_remove_recursive(pmc->dbgfs_dir); 250 debugfs_remove_recursive(pmc->dbgfs_dir);
209 pmc->dbgfs_dir = NULL;
210} 251}
211 252
212static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev) 253static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
@@ -217,19 +258,29 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc, struct pci_dev *pdev)
217 if (!dir) 258 if (!dir)
218 return -ENOMEM; 259 return -ENOMEM;
219 260
261 pmc->dbgfs_dir = dir;
262
220 f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, 263 f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO,
221 dir, pmc, &pmc_dev_state_ops); 264 dir, pmc, &pmc_dev_state_ops);
222 if (!f) { 265 if (!f) {
223 dev_err(&pdev->dev, "dev_states register failed\n"); 266 dev_err(&pdev->dev, "dev_state register failed\n");
224 goto err; 267 goto err;
225 } 268 }
269
270 f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO,
271 dir, pmc, &pmc_pss_state_ops);
272 if (!f) {
273 dev_err(&pdev->dev, "pss_state register failed\n");
274 goto err;
275 }
276
226 f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, 277 f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO,
227 dir, pmc, &pmc_sleep_tmr_ops); 278 dir, pmc, &pmc_sleep_tmr_ops);
228 if (!f) { 279 if (!f) {
229 dev_err(&pdev->dev, "sleep_state register failed\n"); 280 dev_err(&pdev->dev, "sleep_state register failed\n");
230 goto err; 281 goto err;
231 } 282 }
232 pmc->dbgfs_dir = dir; 283
233 return 0; 284 return 0;
234err: 285err:
235 pmc_dbgfs_unregister(pmc); 286 pmc_dbgfs_unregister(pmc);
@@ -292,7 +343,6 @@ MODULE_DEVICE_TABLE(pci, pmc_pci_ids);
292 343
293static int __init pmc_atom_init(void) 344static int __init pmc_atom_init(void)
294{ 345{
295 int err = -ENODEV;
296 struct pci_dev *pdev = NULL; 346 struct pci_dev *pdev = NULL;
297 const struct pci_device_id *ent; 347 const struct pci_device_id *ent;
298 348
@@ -306,14 +356,11 @@ static int __init pmc_atom_init(void)
306 */ 356 */
307 for_each_pci_dev(pdev) { 357 for_each_pci_dev(pdev) {
308 ent = pci_match_id(pmc_pci_ids, pdev); 358 ent = pci_match_id(pmc_pci_ids, pdev);
309 if (ent) { 359 if (ent)
310 err = pmc_setup_dev(pdev); 360 return pmc_setup_dev(pdev);
311 goto out;
312 }
313 } 361 }
314 /* Device not found. */ 362 /* Device not found. */
315out: 363 return -ENODEV;
316 return err;
317} 364}
318 365
319module_init(pmc_atom_init); 366module_init(pmc_atom_init);
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index ca9622a25e95..fe3dbfe0c4a5 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -170,7 +170,7 @@ static struct platform_device rtc_device = {
170static __init int add_rtc_cmos(void) 170static __init int add_rtc_cmos(void)
171{ 171{
172#ifdef CONFIG_PNP 172#ifdef CONFIG_PNP
173 static const char * const const ids[] __initconst = 173 static const char * const ids[] __initconst =
174 { "PNP0b00", "PNP0b01", "PNP0b02", }; 174 { "PNP0b00", "PNP0b01", "PNP0b02", };
175 struct pnp_dev *dev; 175 struct pnp_dev *dev;
176 struct pnp_id *id; 176 struct pnp_id *id;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ab4734e5411d..c4648adadd7d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -431,15 +431,13 @@ static void __init parse_setup_data(void)
431 431
432 pa_data = boot_params.hdr.setup_data; 432 pa_data = boot_params.hdr.setup_data;
433 while (pa_data) { 433 while (pa_data) {
434 u32 data_len, map_len, data_type; 434 u32 data_len, data_type;
435 435
436 map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), 436 data = early_memremap(pa_data, sizeof(*data));
437 (u64)sizeof(struct setup_data));
438 data = early_memremap(pa_data, map_len);
439 data_len = data->len + sizeof(struct setup_data); 437 data_len = data->len + sizeof(struct setup_data);
440 data_type = data->type; 438 data_type = data->type;
441 pa_next = data->next; 439 pa_next = data->next;
442 early_iounmap(data, map_len); 440 early_iounmap(data, sizeof(*data));
443 441
444 switch (data_type) { 442 switch (data_type) {
445 case SETUP_E820_EXT: 443 case SETUP_E820_EXT:
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ed37a768d0fc..2a33c8f68319 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -740,12 +740,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
740{ 740{
741 user_exit(); 741 user_exit();
742 742
743#ifdef CONFIG_X86_MCE
744 /* notify userspace of pending MCEs */
745 if (thread_info_flags & _TIF_MCE_NOTIFY)
746 mce_notify_process();
747#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
748
749 if (thread_info_flags & _TIF_UPROBE) 743 if (thread_info_flags & _TIF_UPROBE)
750 uprobe_notify_resume(regs); 744 uprobe_notify_resume(regs);
751 745
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6d7022c683e3..febc6aabc72e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,7 +73,6 @@
73#include <asm/setup.h> 73#include <asm/setup.h>
74#include <asm/uv/uv.h> 74#include <asm/uv/uv.h>
75#include <linux/mc146818rtc.h> 75#include <linux/mc146818rtc.h>
76#include <asm/smpboot_hooks.h>
77#include <asm/i8259.h> 76#include <asm/i8259.h>
78#include <asm/realmode.h> 77#include <asm/realmode.h>
79#include <asm/misc.h> 78#include <asm/misc.h>
@@ -104,6 +103,43 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
104 103
105atomic_t init_deasserted; 104atomic_t init_deasserted;
106 105
106static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
107{
108 unsigned long flags;
109
110 spin_lock_irqsave(&rtc_lock, flags);
111 CMOS_WRITE(0xa, 0xf);
112 spin_unlock_irqrestore(&rtc_lock, flags);
113 local_flush_tlb();
114 pr_debug("1.\n");
115 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
116 start_eip >> 4;
117 pr_debug("2.\n");
118 *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
119 start_eip & 0xf;
120 pr_debug("3.\n");
121}
122
123static inline void smpboot_restore_warm_reset_vector(void)
124{
125 unsigned long flags;
126
127 /*
128 * Install writable page 0 entry to set BIOS data area.
129 */
130 local_flush_tlb();
131
132 /*
133 * Paranoid: Set warm reset code and vector here back
134 * to default values.
135 */
136 spin_lock_irqsave(&rtc_lock, flags);
137 CMOS_WRITE(0, 0xf);
138 spin_unlock_irqrestore(&rtc_lock, flags);
139
140 *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
141}
142
107/* 143/*
108 * Report back to the Boot Processor during boot time or to the caller processor 144 * Report back to the Boot Processor during boot time or to the caller processor
109 * during CPU online. 145 * during CPU online.
@@ -136,8 +172,7 @@ static void smp_callin(void)
136 * CPU, first the APIC. (this is probably redundant on most 172 * CPU, first the APIC. (this is probably redundant on most
137 * boards) 173 * boards)
138 */ 174 */
139 setup_local_APIC(); 175 apic_ap_setup();
140 end_local_APIC_setup();
141 176
142 /* 177 /*
143 * Need to setup vector mappings before we enable interrupts. 178 * Need to setup vector mappings before we enable interrupts.
@@ -955,9 +990,12 @@ void arch_disable_smp_support(void)
955 */ 990 */
956static __init void disable_smp(void) 991static __init void disable_smp(void)
957{ 992{
993 pr_info("SMP disabled\n");
994
995 disable_ioapic_support();
996
958 init_cpu_present(cpumask_of(0)); 997 init_cpu_present(cpumask_of(0));
959 init_cpu_possible(cpumask_of(0)); 998 init_cpu_possible(cpumask_of(0));
960 smpboot_clear_io_apic_irqs();
961 999
962 if (smp_found_config) 1000 if (smp_found_config)
963 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1001 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
@@ -967,6 +1005,13 @@ static __init void disable_smp(void)
967 cpumask_set_cpu(0, cpu_core_mask(0)); 1005 cpumask_set_cpu(0, cpu_core_mask(0));
968} 1006}
969 1007
1008enum {
1009 SMP_OK,
1010 SMP_NO_CONFIG,
1011 SMP_NO_APIC,
1012 SMP_FORCE_UP,
1013};
1014
970/* 1015/*
971 * Various sanity checks. 1016 * Various sanity checks.
972 */ 1017 */
@@ -1014,10 +1059,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1014 if (!smp_found_config && !acpi_lapic) { 1059 if (!smp_found_config && !acpi_lapic) {
1015 preempt_enable(); 1060 preempt_enable();
1016 pr_notice("SMP motherboard not detected\n"); 1061 pr_notice("SMP motherboard not detected\n");
1017 disable_smp(); 1062 return SMP_NO_CONFIG;
1018 if (APIC_init_uniprocessor())
1019 pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
1020 return -1;
1021 } 1063 }
1022 1064
1023 /* 1065 /*
@@ -1041,9 +1083,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1041 boot_cpu_physical_apicid); 1083 boot_cpu_physical_apicid);
1042 pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); 1084 pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
1043 } 1085 }
1044 smpboot_clear_io_apic(); 1086 return SMP_NO_APIC;
1045 disable_ioapic_support();
1046 return -1;
1047 } 1087 }
1048 1088
1049 verify_local_APIC(); 1089 verify_local_APIC();
@@ -1053,15 +1093,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
1053 */ 1093 */
1054 if (!max_cpus) { 1094 if (!max_cpus) {
1055 pr_info("SMP mode deactivated\n"); 1095 pr_info("SMP mode deactivated\n");
1056 smpboot_clear_io_apic(); 1096 return SMP_FORCE_UP;
1057
1058 connect_bsp_APIC();
1059 setup_local_APIC();
1060 bsp_end_local_APIC_setup();
1061 return -1;
1062 } 1097 }
1063 1098
1064 return 0; 1099 return SMP_OK;
1065} 1100}
1066 1101
1067static void __init smp_cpu_index_default(void) 1102static void __init smp_cpu_index_default(void)
@@ -1101,10 +1136,21 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1101 } 1136 }
1102 set_cpu_sibling_map(0); 1137 set_cpu_sibling_map(0);
1103 1138
1104 if (smp_sanity_check(max_cpus) < 0) { 1139 switch (smp_sanity_check(max_cpus)) {
1105 pr_info("SMP disabled\n"); 1140 case SMP_NO_CONFIG:
1106 disable_smp(); 1141 disable_smp();
1142 if (APIC_init_uniprocessor())
1143 pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
1107 return; 1144 return;
1145 case SMP_NO_APIC:
1146 disable_smp();
1147 return;
1148 case SMP_FORCE_UP:
1149 disable_smp();
1150 apic_bsp_setup(false);
1151 return;
1152 case SMP_OK:
1153 break;
1108 } 1154 }
1109 1155
1110 default_setup_apic_routing(); 1156 default_setup_apic_routing();
@@ -1115,33 +1161,10 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1115 /* Or can we switch back to PIC here? */ 1161 /* Or can we switch back to PIC here? */
1116 } 1162 }
1117 1163
1118 connect_bsp_APIC(); 1164 cpu0_logical_apicid = apic_bsp_setup(false);
1119
1120 /*
1121 * Switch from PIC to APIC mode.
1122 */
1123 setup_local_APIC();
1124
1125 if (x2apic_mode)
1126 cpu0_logical_apicid = apic_read(APIC_LDR);
1127 else
1128 cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
1129
1130 /*
1131 * Enable IO APIC before setting up error vector
1132 */
1133 if (!skip_ioapic_setup && nr_ioapics)
1134 enable_IO_APIC();
1135
1136 bsp_end_local_APIC_setup();
1137 smpboot_setup_io_apic();
1138 /*
1139 * Set up local APIC timer on boot CPU.
1140 */
1141 1165
1142 pr_info("CPU%d: ", 0); 1166 pr_info("CPU%d: ", 0);
1143 print_cpu_info(&cpu_data(0)); 1167 print_cpu_info(&cpu_data(0));
1144 x86_init.timers.setup_percpu_clockev();
1145 1168
1146 if (is_uv_system()) 1169 if (is_uv_system())
1147 uv_system_init(); 1170 uv_system_init();
@@ -1177,9 +1200,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1177 1200
1178 nmi_selftest(); 1201 nmi_selftest();
1179 impress_friends(); 1202 impress_friends();
1180#ifdef CONFIG_X86_IO_APIC
1181 setup_ioapic_dest(); 1203 setup_ioapic_dest();
1182#endif
1183 mtrr_aps_init(); 1204 mtrr_aps_init();
1184} 1205}
1185 1206
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 88900e288021..9d2073e2ecc9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -108,6 +108,88 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
108 preempt_count_dec(); 108 preempt_count_dec();
109} 109}
110 110
111enum ctx_state ist_enter(struct pt_regs *regs)
112{
113 enum ctx_state prev_state;
114
115 if (user_mode_vm(regs)) {
116 /* Other than that, we're just an exception. */
117 prev_state = exception_enter();
118 } else {
119 /*
120 * We might have interrupted pretty much anything. In
121 * fact, if we're a machine check, we can even interrupt
122 * NMI processing. We don't want in_nmi() to return true,
123 * but we need to notify RCU.
124 */
125 rcu_nmi_enter();
126 prev_state = IN_KERNEL; /* the value is irrelevant. */
127 }
128
129 /*
130 * We are atomic because we're on the IST stack (or we're on x86_32,
131 * in which case we still shouldn't schedule).
132 *
133 * This must be after exception_enter(), because exception_enter()
134 * won't do anything if in_interrupt() returns true.
135 */
136 preempt_count_add(HARDIRQ_OFFSET);
137
138 /* This code is a bit fragile. Test it. */
139 rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
140
141 return prev_state;
142}
143
144void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
145{
146 /* Must be before exception_exit. */
147 preempt_count_sub(HARDIRQ_OFFSET);
148
149 if (user_mode_vm(regs))
150 return exception_exit(prev_state);
151 else
152 rcu_nmi_exit();
153}
154
155/**
156 * ist_begin_non_atomic() - begin a non-atomic section in an IST exception
157 * @regs: regs passed to the IST exception handler
158 *
159 * IST exception handlers normally cannot schedule. As a special
160 * exception, if the exception interrupted userspace code (i.e.
161 * user_mode_vm(regs) would return true) and the exception was not
162 * a double fault, it can be safe to schedule. ist_begin_non_atomic()
163 * begins a non-atomic section within an ist_enter()/ist_exit() region.
164 * Callers are responsible for enabling interrupts themselves inside
165 * the non-atomic section, and callers must call is_end_non_atomic()
166 * before ist_exit().
167 */
168void ist_begin_non_atomic(struct pt_regs *regs)
169{
170 BUG_ON(!user_mode_vm(regs));
171
172 /*
173 * Sanity check: we need to be on the normal thread stack. This
174 * will catch asm bugs and any attempt to use ist_preempt_enable
175 * from double_fault.
176 */
177 BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
178 & ~(THREAD_SIZE - 1)) != 0);
179
180 preempt_count_sub(HARDIRQ_OFFSET);
181}
182
183/**
184 * ist_end_non_atomic() - begin a non-atomic section in an IST exception
185 *
186 * Ends a non-atomic section started with ist_begin_non_atomic().
187 */
188void ist_end_non_atomic(void)
189{
190 preempt_count_add(HARDIRQ_OFFSET);
191}
192
111static nokprobe_inline int 193static nokprobe_inline int
112do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, 194do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
113 struct pt_regs *regs, long error_code) 195 struct pt_regs *regs, long error_code)
@@ -251,6 +333,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
251 * end up promoting it to a doublefault. In that case, modify 333 * end up promoting it to a doublefault. In that case, modify
252 * the stack to make it look like we just entered the #GP 334 * the stack to make it look like we just entered the #GP
253 * handler from user space, similar to bad_iret. 335 * handler from user space, similar to bad_iret.
336 *
337 * No need for ist_enter here because we don't use RCU.
254 */ 338 */
255 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && 339 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
256 regs->cs == __KERNEL_CS && 340 regs->cs == __KERNEL_CS &&
@@ -263,12 +347,12 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
263 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ 347 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
264 regs->ip = (unsigned long)general_protection; 348 regs->ip = (unsigned long)general_protection;
265 regs->sp = (unsigned long)&normal_regs->orig_ax; 349 regs->sp = (unsigned long)&normal_regs->orig_ax;
350
266 return; 351 return;
267 } 352 }
268#endif 353#endif
269 354
270 exception_enter(); 355 ist_enter(regs); /* Discard prev_state because we won't return. */
271 /* Return not checked because double check cannot be ignored */
272 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 356 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
273 357
274 tsk->thread.error_code = error_code; 358 tsk->thread.error_code = error_code;
@@ -434,7 +518,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
434 if (poke_int3_handler(regs)) 518 if (poke_int3_handler(regs))
435 return; 519 return;
436 520
437 prev_state = exception_enter(); 521 prev_state = ist_enter(regs);
438#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 522#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
439 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 523 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
440 SIGTRAP) == NOTIFY_STOP) 524 SIGTRAP) == NOTIFY_STOP)
@@ -460,33 +544,20 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
460 preempt_conditional_cli(regs); 544 preempt_conditional_cli(regs);
461 debug_stack_usage_dec(); 545 debug_stack_usage_dec();
462exit: 546exit:
463 exception_exit(prev_state); 547 ist_exit(regs, prev_state);
464} 548}
465NOKPROBE_SYMBOL(do_int3); 549NOKPROBE_SYMBOL(do_int3);
466 550
467#ifdef CONFIG_X86_64 551#ifdef CONFIG_X86_64
468/* 552/*
469 * Help handler running on IST stack to switch back to user stack 553 * Help handler running on IST stack to switch off the IST stack if the
470 * for scheduling or signal handling. The actual stack switch is done in 554 * interrupted code was in user mode. The actual stack switch is done in
471 * entry.S 555 * entry_64.S
472 */ 556 */
473asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) 557asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
474{ 558{
475 struct pt_regs *regs = eregs; 559 struct pt_regs *regs = task_pt_regs(current);
476 /* Did already sync */ 560 *regs = *eregs;
477 if (eregs == (struct pt_regs *)eregs->sp)
478 ;
479 /* Exception from user space */
480 else if (user_mode(eregs))
481 regs = task_pt_regs(current);
482 /*
483 * Exception from kernel and interrupts are enabled. Move to
484 * kernel process stack.
485 */
486 else if (eregs->flags & X86_EFLAGS_IF)
487 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
488 if (eregs != regs)
489 *regs = *eregs;
490 return regs; 561 return regs;
491} 562}
492NOKPROBE_SYMBOL(sync_regs); 563NOKPROBE_SYMBOL(sync_regs);
@@ -554,7 +625,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
554 unsigned long dr6; 625 unsigned long dr6;
555 int si_code; 626 int si_code;
556 627
557 prev_state = exception_enter(); 628 prev_state = ist_enter(regs);
558 629
559 get_debugreg(dr6, 6); 630 get_debugreg(dr6, 6);
560 631
@@ -629,7 +700,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
629 debug_stack_usage_dec(); 700 debug_stack_usage_dec();
630 701
631exit: 702exit:
632 exception_exit(prev_state); 703 ist_exit(regs, prev_state);
633} 704}
634NOKPROBE_SYMBOL(do_debug); 705NOKPROBE_SYMBOL(do_debug);
635 706
@@ -788,18 +859,16 @@ void math_state_restore(void)
788 local_irq_disable(); 859 local_irq_disable();
789 } 860 }
790 861
862 /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
863 kernel_fpu_disable();
791 __thread_fpu_begin(tsk); 864 __thread_fpu_begin(tsk);
792
793 /*
794 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
795 */
796 if (unlikely(restore_fpu_checking(tsk))) { 865 if (unlikely(restore_fpu_checking(tsk))) {
797 drop_init_fpu(tsk); 866 drop_init_fpu(tsk);
798 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); 867 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
799 return; 868 } else {
869 tsk->thread.fpu_counter++;
800 } 870 }
801 871 kernel_fpu_enable();
802 tsk->thread.fpu_counter++;
803} 872}
804EXPORT_SYMBOL_GPL(math_state_restore); 873EXPORT_SYMBOL_GPL(math_state_restore);
805 874
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f9d16ff56c6b..7dc7ba577ecd 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -40,6 +40,7 @@ config KVM
40 select HAVE_KVM_MSI 40 select HAVE_KVM_MSI
41 select HAVE_KVM_CPU_RELAX_INTERCEPT 41 select HAVE_KVM_CPU_RELAX_INTERCEPT
42 select KVM_VFIO 42 select KVM_VFIO
43 select SRCU
43 ---help--- 44 ---help---
44 Support hosting fully virtualized guest machines using hardware 45 Support hosting fully virtualized guest machines using hardware
45 virtualization extensions. You will need a fairly recent 46 virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4f0c0b954686..d52dcf0776ea 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -192,6 +192,9 @@ static void recalculate_apic_map(struct kvm *kvm)
192 u16 cid, lid; 192 u16 cid, lid;
193 u32 ldr, aid; 193 u32 ldr, aid;
194 194
195 if (!kvm_apic_present(vcpu))
196 continue;
197
195 aid = kvm_apic_id(apic); 198 aid = kvm_apic_id(apic);
196 ldr = kvm_apic_get_reg(apic, APIC_LDR); 199 ldr = kvm_apic_get_reg(apic, APIC_LDR);
197 cid = apic_cluster_id(new, ldr); 200 cid = apic_cluster_id(new, ldr);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 38dcec403b46..e3ff27a5b634 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -898,6 +898,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
898 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 898 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
899 VM_FAULT_HWPOISON_LARGE)) 899 VM_FAULT_HWPOISON_LARGE))
900 do_sigbus(regs, error_code, address, fault); 900 do_sigbus(regs, error_code, address, fault);
901 else if (fault & VM_FAULT_SIGSEGV)
902 bad_area_nosemaphore(regs, error_code, address);
901 else 903 else
902 BUG(); 904 BUG();
903 } 905 }
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 7b20bccf3648..2fb384724ebb 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -448,6 +448,22 @@ static const struct dmi_system_id pciprobe_dmi_table[] __initconst = {
448 DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"), 448 DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"),
449 }, 449 },
450 }, 450 },
451 {
452 .callback = set_scan_all,
453 .ident = "Stratus/NEC ftServer",
454 .matches = {
455 DMI_MATCH(DMI_SYS_VENDOR, "NEC"),
456 DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R32"),
457 },
458 },
459 {
460 .callback = set_scan_all,
461 .ident = "Stratus/NEC ftServer",
462 .matches = {
463 DMI_MATCH(DMI_SYS_VENDOR, "NEC"),
464 DMI_MATCH(DMI_PRODUCT_NAME, "Express5800/R31"),
465 },
466 },
451 {} 467 {}
452}; 468};
453 469
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 44b9271580b5..852aa4c92da0 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -293,7 +293,6 @@ static void mrst_power_off_unused_dev(struct pci_dev *dev)
293DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev); 293DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
294DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev); 294DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
295DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev); 295DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev);
296DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev);
297DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev); 296DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
298 297
299/* 298/*
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 326198a4434e..676e5e04e4d4 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -610,6 +610,32 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
610 return 0; 610 return 0;
611} 611}
612 612
613#ifdef CONFIG_ACPI_APEI
614extern int (*arch_apei_filter_addr)(int (*func)(__u64 start, __u64 size,
615 void *data), void *data);
616
617static int pci_mmcfg_for_each_region(int (*func)(__u64 start, __u64 size,
618 void *data), void *data)
619{
620 struct pci_mmcfg_region *cfg;
621 int rc;
622
623 if (list_empty(&pci_mmcfg_list))
624 return 0;
625
626 list_for_each_entry(cfg, &pci_mmcfg_list, list) {
627 rc = func(cfg->res.start, resource_size(&cfg->res), data);
628 if (rc)
629 return rc;
630 }
631
632 return 0;
633}
634#define set_apei_filter() (arch_apei_filter_addr = pci_mmcfg_for_each_region)
635#else
636#define set_apei_filter()
637#endif
638
613static void __init __pci_mmcfg_init(int early) 639static void __init __pci_mmcfg_init(int early)
614{ 640{
615 pci_mmcfg_reject_broken(early); 641 pci_mmcfg_reject_broken(early);
@@ -644,6 +670,8 @@ void __init pci_mmcfg_early_init(void)
644 else 670 else
645 acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); 671 acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
646 __pci_mmcfg_init(1); 672 __pci_mmcfg_init(1);
673
674 set_apei_filter();
647 } 675 }
648} 676}
649 677
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 5a4affe025e8..09297c8e1fcd 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -205,4 +205,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
205PHONY += vdso_install $(vdso_img_insttargets) 205PHONY += vdso_install $(vdso_img_insttargets)
206vdso_install: $(vdso_img_insttargets) FORCE 206vdso_install: $(vdso_img_insttargets) FORCE
207 207
208clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* 208clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64*