aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S2
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/alternative.c1
-rw-r--r--arch/x86/kernel/amd_iommu.c8
-rw-r--r--arch/x86/kernel/apb_timer.c37
-rw-r--r--arch/x86/kernel/aperture_64.c4
-rw-r--r--arch/x86/kernel/apic/Makefile7
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/apic/es7000_32.c1
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c107
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/nmi.c7
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c77
-rw-r--r--arch/x86/kernel/cpu/cmpxchg.c72
-rw-r--r--arch/x86/kernel/cpu/common.c28
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c7
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c11
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.h26
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c7
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c8
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c108
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c35
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c206
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c3
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c56
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c156
-rw-r--r--arch/x86/kernel/cpu/scattered.c63
-rw-r--r--arch/x86/kernel/cpu/topology.c (renamed from arch/x86/kernel/cpu/addon_cpuid_features.c)58
-rw-r--r--arch/x86/kernel/cpu/vmware.c9
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/entry_32.S16
-rw-r--r--arch/x86/kernel/entry_64.S9
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S5
-rw-r--r--arch/x86/kernel/hpet.c15
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--arch/x86/kernel/i387.c42
-rw-r--r--arch/x86/kernel/kgdb.c189
-rw-r--r--arch/x86/kernel/kprobes.c33
-rw-r--r--arch/x86/kernel/mrst.c105
-rw-r--r--arch/x86/kernel/olpc.c20
-rw-r--r--arch/x86/kernel/olpc_ofw.c106
-rw-r--r--arch/x86/kernel/pci-dma.c7
-rw-r--r--arch/x86/kernel/process.c54
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/setup.c6
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/stacktrace.c31
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/kernel/verify_cpu_64.S3
-rw-r--r--arch/x86/kernel/vsyscall_64.c17
-rw-r--r--arch/x86/kernel/xsave.c195
71 files changed, 1453 insertions, 778 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e77b2208372..0925676266b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_SCx200) += scx200.o
104scx200-y += scx200_32.o 104scx200-y += scx200_32.o
105 105
106obj-$(CONFIG_OLPC) += olpc.o 106obj-$(CONFIG_OLPC) += olpc.o
107obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
107obj-$(CONFIG_X86_MRST) += mrst.o 108obj-$(CONFIG_X86_MRST) += mrst.o
108 109
109microcode-y := microcode_core.o 110microcode-y := microcode_core.o
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index 580b4e29601..28595d6df47 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -104,7 +104,7 @@ _start:
104 movl %eax, %ecx 104 movl %eax, %ecx
105 orl %edx, %ecx 105 orl %edx, %ecx
106 jz 1f 106 jz 1f
107 movl $0xc0000080, %ecx 107 movl $MSR_EFER, %ecx
108 wrmsr 108 wrmsr
1091: 1091:
110 110
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index fcc3c61fdec..33cec152070 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -2,7 +2,7 @@
2 * sleep.c - x86-specific ACPI sleep support. 2 * sleep.c - x86-specific ACPI sleep support.
3 * 3 *
4 * Copyright (C) 2001-2003 Patrick Mochel 4 * Copyright (C) 2001-2003 Patrick Mochel
5 * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz> 5 * Copyright (C) 2001-2003 Pavel Machek <pavel@ucw.cz>
6 */ 6 */
7 7
8#include <linux/acpi.h> 8#include <linux/acpi.h>
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 70237732a6c..f65ab8b014c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -214,6 +214,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
214 u8 *instr = a->instr; 214 u8 *instr = a->instr;
215 BUG_ON(a->replacementlen > a->instrlen); 215 BUG_ON(a->replacementlen > a->instrlen);
216 BUG_ON(a->instrlen > sizeof(insnbuf)); 216 BUG_ON(a->instrlen > sizeof(insnbuf));
217 BUG_ON(a->cpuid >= NCAPINTS*32);
217 if (!boot_cpu_has(a->cpuid)) 218 if (!boot_cpu_has(a->cpuid))
218 continue; 219 continue;
219#ifdef CONFIG_X86_64 220#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0d20286d78c..fa044e1e30a 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -2572,6 +2572,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
2572static int amd_iommu_domain_has_cap(struct iommu_domain *domain, 2572static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
2573 unsigned long cap) 2573 unsigned long cap)
2574{ 2574{
2575 switch (cap) {
2576 case IOMMU_CAP_CACHE_COHERENCY:
2577 return 1;
2578 }
2579
2575 return 0; 2580 return 0;
2576} 2581}
2577 2582
@@ -2609,8 +2614,7 @@ int __init amd_iommu_init_passthrough(void)
2609 2614
2610 pt_domain->mode |= PAGE_MODE_NONE; 2615 pt_domain->mode |= PAGE_MODE_NONE;
2611 2616
2612 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 2617 for_each_pci_dev(dev) {
2613
2614 if (!check_device(&dev->dev)) 2618 if (!check_device(&dev->dev))
2615 continue; 2619 continue;
2616 2620
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index a35347501d3..8dd77800ff5 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -43,10 +43,11 @@
43 43
44#include <asm/fixmap.h> 44#include <asm/fixmap.h>
45#include <asm/apb_timer.h> 45#include <asm/apb_timer.h>
46#include <asm/mrst.h>
46 47
47#define APBT_MASK CLOCKSOURCE_MASK(32) 48#define APBT_MASK CLOCKSOURCE_MASK(32)
48#define APBT_SHIFT 22 49#define APBT_SHIFT 22
49#define APBT_CLOCKEVENT_RATING 150 50#define APBT_CLOCKEVENT_RATING 110
50#define APBT_CLOCKSOURCE_RATING 250 51#define APBT_CLOCKSOURCE_RATING 250
51#define APBT_MIN_DELTA_USEC 200 52#define APBT_MIN_DELTA_USEC 200
52 53
@@ -83,8 +84,6 @@ struct apbt_dev {
83 char name[10]; 84 char name[10];
84}; 85};
85 86
86int disable_apbt_percpu __cpuinitdata;
87
88static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); 87static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);
89 88
90#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
@@ -195,29 +194,6 @@ static struct clock_event_device apbt_clockevent = {
195}; 194};
196 195
197/* 196/*
198 * if user does not want to use per CPU apb timer, just give it a lower rating
199 * than local apic timer and skip the late per cpu timer init.
200 */
201static inline int __init setup_x86_mrst_timer(char *arg)
202{
203 if (!arg)
204 return -EINVAL;
205
206 if (strcmp("apbt_only", arg) == 0)
207 disable_apbt_percpu = 0;
208 else if (strcmp("lapic_and_apbt", arg) == 0)
209 disable_apbt_percpu = 1;
210 else {
211 pr_warning("X86 MRST timer option %s not recognised"
212 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
213 arg);
214 return -EINVAL;
215 }
216 return 0;
217}
218__setup("x86_mrst_timer=", setup_x86_mrst_timer);
219
220/*
221 * start count down from 0xffff_ffff. this is done by toggling the enable bit 197 * start count down from 0xffff_ffff. this is done by toggling the enable bit
222 * then load initial load count to ~0. 198 * then load initial load count to ~0.
223 */ 199 */
@@ -335,7 +311,7 @@ static int __init apbt_clockevent_register(void)
335 adev->num = smp_processor_id(); 311 adev->num = smp_processor_id();
336 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); 312 memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
337 313
338 if (disable_apbt_percpu) { 314 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
339 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; 315 apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;
340 global_clock_event = &adev->evt; 316 global_clock_event = &adev->evt;
341 printk(KERN_DEBUG "%s clockevent registered as global\n", 317 printk(KERN_DEBUG "%s clockevent registered as global\n",
@@ -429,7 +405,8 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
429 405
430static __init int apbt_late_init(void) 406static __init int apbt_late_init(void)
431{ 407{
432 if (disable_apbt_percpu || !apb_timer_block_enabled) 408 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT ||
409 !apb_timer_block_enabled)
433 return 0; 410 return 0;
434 /* This notifier should be called after workqueue is ready */ 411 /* This notifier should be called after workqueue is ready */
435 hotcpu_notifier(apbt_cpuhp_notify, -20); 412 hotcpu_notifier(apbt_cpuhp_notify, -20);
@@ -450,6 +427,8 @@ static void apbt_set_mode(enum clock_event_mode mode,
450 int timer_num; 427 int timer_num;
451 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); 428 struct apbt_dev *adev = EVT_TO_APBT_DEV(evt);
452 429
430 BUG_ON(!apbt_virt_address);
431
453 timer_num = adev->num; 432 timer_num = adev->num;
454 pr_debug("%s CPU %d timer %d mode=%d\n", 433 pr_debug("%s CPU %d timer %d mode=%d\n",
455 __func__, first_cpu(*evt->cpumask), timer_num, mode); 434 __func__, first_cpu(*evt->cpumask), timer_num, mode);
@@ -676,7 +655,7 @@ void __init apbt_time_init(void)
676 } 655 }
677#ifdef CONFIG_SMP 656#ifdef CONFIG_SMP
678 /* kernel cmdline disable apb timer, so we will use lapic timers */ 657 /* kernel cmdline disable apb timer, so we will use lapic timers */
679 if (disable_apbt_percpu) { 658 if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
680 printk(KERN_INFO "apbt: disabled per cpu timer\n"); 659 printk(KERN_INFO "apbt: disabled per cpu timer\n");
681 return; 660 return;
682 } 661 }
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b5d8b0bcf23..a2e0caf26e1 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -280,7 +280,7 @@ void __init early_gart_iommu_check(void)
280 * or BIOS forget to put that in reserved. 280 * or BIOS forget to put that in reserved.
281 * try to update e820 to make that region as reserved. 281 * try to update e820 to make that region as reserved.
282 */ 282 */
283 u32 agp_aper_base = 0, agp_aper_order = 0; 283 u32 agp_aper_order = 0;
284 int i, fix, slot, valid_agp = 0; 284 int i, fix, slot, valid_agp = 0;
285 u32 ctl; 285 u32 ctl;
286 u32 aper_size = 0, aper_order = 0, last_aper_order = 0; 286 u32 aper_size = 0, aper_order = 0, last_aper_order = 0;
@@ -291,7 +291,7 @@ void __init early_gart_iommu_check(void)
291 return; 291 return;
292 292
293 /* This is mostly duplicate of iommu_hole_init */ 293 /* This is mostly duplicate of iommu_hole_init */
294 agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); 294 search_agp_bridge(&agp_aper_order, &valid_agp);
295 295
296 fix = 0; 296 fix = 0;
297 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 297 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 565c1bfc507..910f20b457c 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,12 @@
2# Makefile for local APIC drivers and for the IO-APIC code 2# Makefile for local APIC drivers and for the IO-APIC code
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10
6obj-$(CONFIG_X86_IO_APIC) += io_apic.o 11obj-$(CONFIG_X86_IO_APIC) += io_apic.o
7obj-$(CONFIG_SMP) += ipi.o 12obj-$(CONFIG_SMP) += ipi.o
8 13
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a96489ee6ca..980508c7908 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -460,7 +460,7 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
460} 460}
461 461
462/* 462/*
463 * Setup the local APIC timer for this CPU. Copy the initilized values 463 * Setup the local APIC timer for this CPU. Copy the initialized values
464 * of the boot CPU and register the clock event in the framework. 464 * of the boot CPU and register the clock event in the framework.
465 */ 465 */
466static void __cpuinit setup_APIC_timer(void) 466static void __cpuinit setup_APIC_timer(void)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 425e53a87fe..8593582d802 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -129,7 +129,6 @@ int es7000_plat;
129 * GSI override for ES7000 platforms. 129 * GSI override for ES7000 platforms.
130 */ 130 */
131 131
132static unsigned int base;
133 132
134static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) 133static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
135{ 134{
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
new file mode 100644
index 00000000000..cefd6942f0e
--- /dev/null
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -0,0 +1,107 @@
1/*
2 * HW NMI watchdog support
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * Arch specific calls to support NMI watchdog
7 *
8 * Bits copied from original nmi.c file
9 *
10 */
11#include <asm/apic.h>
12
13#include <linux/cpumask.h>
14#include <linux/kdebug.h>
15#include <linux/notifier.h>
16#include <linux/kprobes.h>
17#include <linux/nmi.h>
18#include <linux/module.h>
19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void)
24{
25 return (u64)(cpu_khz) * 1000 * 60;
26}
27
28#ifdef ARCH_HAS_NMI_WATCHDOG
29void arch_trigger_all_cpu_backtrace(void)
30{
31 int i;
32
33 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
34
35 printk(KERN_INFO "sending NMI to all CPUs:\n");
36 apic->send_IPI_all(NMI_VECTOR);
37
38 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
39 for (i = 0; i < 10 * 1000; i++) {
40 if (cpumask_empty(to_cpumask(backtrace_mask)))
41 break;
42 mdelay(1);
43 }
44}
45
46static int __kprobes
47arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
48 unsigned long cmd, void *__args)
49{
50 struct die_args *args = __args;
51 struct pt_regs *regs;
52 int cpu = smp_processor_id();
53
54 switch (cmd) {
55 case DIE_NMI:
56 case DIE_NMI_IPI:
57 break;
58
59 default:
60 return NOTIFY_DONE;
61 }
62
63 regs = args->regs;
64
65 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
66 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
67
68 arch_spin_lock(&lock);
69 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
70 show_regs(regs);
71 dump_stack();
72 arch_spin_unlock(&lock);
73 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
74 return NOTIFY_STOP;
75 }
76
77 return NOTIFY_DONE;
78}
79
80static __read_mostly struct notifier_block backtrace_notifier = {
81 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
82 .next = NULL,
83 .priority = 1
84};
85
86static int __init register_trigger_all_cpu_backtrace(void)
87{
88 register_die_notifier(&backtrace_notifier);
89 return 0;
90}
91early_initcall(register_trigger_all_cpu_backtrace);
92#endif
93
94/* STUB calls to mimic old nmi_watchdog behaviour */
95#if defined(CONFIG_X86_LOCAL_APIC)
96unsigned int nmi_watchdog = NMI_NONE;
97EXPORT_SYMBOL(nmi_watchdog);
98void acpi_nmi_enable(void) { return; }
99void acpi_nmi_disable(void) { return; }
100#endif
101atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
102EXPORT_SYMBOL(nmi_active);
103int unknown_nmi_panic;
104void cpu_nmi_set_wd_enabled(void) { return; }
105void stop_apic_nmi_watchdog(void *unused) { return; }
106void setup_apic_nmi_watchdog(void *unused) { return; }
107int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e41ed24ab26..4dc0084ec1b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3397,7 +3397,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3397 3397
3398 cfg = desc->chip_data; 3398 cfg = desc->chip_data;
3399 3399
3400 read_msi_msg_desc(desc, &msg); 3400 get_cached_msi_msg_desc(desc, &msg);
3401 3401
3402 msg.data &= ~MSI_DATA_VECTOR_MASK; 3402 msg.data &= ~MSI_DATA_VECTOR_MASK;
3403 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3403 msg.data |= MSI_DATA_VECTOR(cfg->vector);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 1edaf15c0b8..a43f71cb30f 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
401 int cpu = smp_processor_id(); 401 int cpu = smp_processor_id();
402 int rc = 0; 402 int rc = 0;
403 403
404 /* check for other users first */
405 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
406 == NOTIFY_STOP) {
407 rc = 1;
408 touched = 1;
409 }
410
411 sum = get_timer_irqs(cpu); 404 sum = get_timer_irqs(cpu);
412 405
413 if (__get_cpu_var(nmi_touch)) { 406 if (__get_cpu_var(nmi_touch)) {
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index c4f9182ca3a..4c9c67bf09b 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -140,7 +140,7 @@
140 * is now the way life works). 140 * is now the way life works).
141 * Fix thinko in suspend() (wrong return). 141 * Fix thinko in suspend() (wrong return).
142 * Notify drivers on critical suspend. 142 * Notify drivers on critical suspend.
143 * Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz> 143 * Make kapmd absorb more idle time (Pavel Machek <pavel@ucw.cz>
144 * modified by sfr). 144 * modified by sfr).
145 * Disable interrupts while we are suspended (Andy Henroid 145 * Disable interrupts while we are suspended (Andy Henroid
146 * <andy_henroid@yahoo.com> fixed by sfr). 146 * <andy_henroid@yahoo.com> fixed by sfr).
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 3a785da34b6..3f0ebe429a0 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -12,11 +12,11 @@ endif
12nostackp := $(call cc-option, -fno-stack-protector) 12nostackp := $(call cc-option, -fno-stack-protector)
13CFLAGS_common.o := $(nostackp) 13CFLAGS_common.o := $(nostackp)
14 14
15obj-y := intel_cacheinfo.o addon_cpuid_features.o 15obj-y := intel_cacheinfo.o scattered.o topology.o
16obj-y += proc.o capflags.o powerflags.o common.o 16obj-y += proc.o capflags.o powerflags.o common.o
17obj-y += vmware.o hypervisor.o sched.o mshyperv.o 17obj-y += vmware.o hypervisor.o sched.o mshyperv.o
18 18
19obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 19obj-$(CONFIG_X86_32) += bugs.o
20obj-$(CONFIG_X86_64) += bugs_64.o 20obj-$(CONFIG_X86_64) += bugs_64.o
21 21
22obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 22obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e485825130d..60a57b13082 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -466,7 +466,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
466 } 466 }
467 467
468 } 468 }
469 if (c->x86 == 0x10 || c->x86 == 0x11) 469 if (c->x86 >= 0x10)
470 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 470 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
471 471
472 /* get apicid instead of initial apic id from cpuid */ 472 /* get apicid instead of initial apic id from cpuid */
@@ -529,7 +529,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
529 num_cache_leaves = 3; 529 num_cache_leaves = 3;
530 } 530 }
531 531
532 if (c->x86 >= 0xf && c->x86 <= 0x11) 532 if (c->x86 >= 0xf)
533 set_cpu_cap(c, X86_FEATURE_K8); 533 set_cpu_cap(c, X86_FEATURE_K8);
534 534
535 if (cpu_has_xmm2) { 535 if (cpu_has_xmm2) {
@@ -546,7 +546,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
546 fam10h_check_enable_mmcfg(); 546 fam10h_check_enable_mmcfg();
547 } 547 }
548 548
549 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { 549 if (c == &boot_cpu_data && c->x86 >= 0xf) {
550 unsigned long long tseg; 550 unsigned long long tseg;
551 551
552 /* 552 /*
@@ -609,3 +609,74 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
609}; 609};
610 610
611cpu_dev_register(amd_cpu_dev); 611cpu_dev_register(amd_cpu_dev);
612
613/*
614 * AMD errata checking
615 *
616 * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
617 * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
618 * have an OSVW id assigned, which it takes as first argument. Both take a
619 * variable number of family-specific model-stepping ranges created by
620 * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const
621 * int[] in arch/x86/include/asm/processor.h.
622 *
623 * Example:
624 *
625 * const int amd_erratum_319[] =
626 * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
627 * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
628 * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
629 */
630
631const int amd_erratum_400[] =
632 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
633 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
634EXPORT_SYMBOL_GPL(amd_erratum_400);
635
636const int amd_erratum_383[] =
637 AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
638EXPORT_SYMBOL_GPL(amd_erratum_383);
639
640bool cpu_has_amd_erratum(const int *erratum)
641{
642 struct cpuinfo_x86 *cpu = &current_cpu_data;
643 int osvw_id = *erratum++;
644 u32 range;
645 u32 ms;
646
647 /*
648 * If called early enough that current_cpu_data hasn't been initialized
649 * yet, fall back to boot_cpu_data.
650 */
651 if (cpu->x86 == 0)
652 cpu = &boot_cpu_data;
653
654 if (cpu->x86_vendor != X86_VENDOR_AMD)
655 return false;
656
657 if (osvw_id >= 0 && osvw_id < 65536 &&
658 cpu_has(cpu, X86_FEATURE_OSVW)) {
659 u64 osvw_len;
660
661 rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
662 if (osvw_id < osvw_len) {
663 u64 osvw_bits;
664
665 rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
666 osvw_bits);
667 return osvw_bits & (1ULL << (osvw_id & 0x3f));
668 }
669 }
670
671 /* OSVW unavailable or ID unknown, match family-model-stepping range */
672 ms = (cpu->x86_model << 8) | cpu->x86_mask;
673 while ((range = *erratum++))
674 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
675 (ms >= AMD_MODEL_RANGE_START(range)) &&
676 (ms <= AMD_MODEL_RANGE_END(range)))
677 return true;
678
679 return false;
680}
681
682EXPORT_SYMBOL_GPL(cpu_has_amd_erratum);
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c
deleted file mode 100644
index 2056ccf572c..00000000000
--- a/arch/x86/kernel/cpu/cmpxchg.c
+++ /dev/null
@@ -1,72 +0,0 @@
1/*
2 * cmpxchg*() fallbacks for CPU not supporting these instructions
3 */
4
5#include <linux/kernel.h>
6#include <linux/smp.h>
7#include <linux/module.h>
8
9#ifndef CONFIG_X86_CMPXCHG
10unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
11{
12 u8 prev;
13 unsigned long flags;
14
15 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
16 local_irq_save(flags);
17 prev = *(u8 *)ptr;
18 if (prev == old)
19 *(u8 *)ptr = new;
20 local_irq_restore(flags);
21 return prev;
22}
23EXPORT_SYMBOL(cmpxchg_386_u8);
24
25unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
26{
27 u16 prev;
28 unsigned long flags;
29
30 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
31 local_irq_save(flags);
32 prev = *(u16 *)ptr;
33 if (prev == old)
34 *(u16 *)ptr = new;
35 local_irq_restore(flags);
36 return prev;
37}
38EXPORT_SYMBOL(cmpxchg_386_u16);
39
40unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
41{
42 u32 prev;
43 unsigned long flags;
44
45 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
46 local_irq_save(flags);
47 prev = *(u32 *)ptr;
48 if (prev == old)
49 *(u32 *)ptr = new;
50 local_irq_restore(flags);
51 return prev;
52}
53EXPORT_SYMBOL(cmpxchg_386_u32);
54#endif
55
56#ifndef CONFIG_X86_CMPXCHG64
57unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
58{
59 u64 prev;
60 unsigned long flags;
61
62 /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
63 local_irq_save(flags);
64 prev = *(u64 *)ptr;
65 if (prev == old)
66 *(u64 *)ptr = new;
67 local_irq_restore(flags);
68 return prev;
69}
70EXPORT_SYMBOL(cmpxchg_486_u64);
71#endif
72
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 68e4a6f2211..490dac63c2d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
140static int __init x86_xsave_setup(char *s) 140static int __init x86_xsave_setup(char *s)
141{ 141{
142 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 142 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
143 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
143 return 1; 144 return 1;
144} 145}
145__setup("noxsave", x86_xsave_setup); 146__setup("noxsave", x86_xsave_setup);
146 147
148static int __init x86_xsaveopt_setup(char *s)
149{
150 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
151 return 1;
152}
153__setup("noxsaveopt", x86_xsaveopt_setup);
154
147#ifdef CONFIG_X86_32 155#ifdef CONFIG_X86_32
148static int cachesize_override __cpuinitdata = -1; 156static int cachesize_override __cpuinitdata = -1;
149static int disable_x86_serial_nr __cpuinitdata = 1; 157static int disable_x86_serial_nr __cpuinitdata = 1;
@@ -551,6 +559,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
551 c->x86_capability[4] = excap; 559 c->x86_capability[4] = excap;
552 } 560 }
553 561
562 /* Additional Intel-defined flags: level 0x00000007 */
563 if (c->cpuid_level >= 0x00000007) {
564 u32 eax, ebx, ecx, edx;
565
566 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
567
568 if (eax > 0)
569 c->x86_capability[9] = ebx;
570 }
571
554 /* AMD-defined flags: level 0x80000001 */ 572 /* AMD-defined flags: level 0x80000001 */
555 xlvl = cpuid_eax(0x80000000); 573 xlvl = cpuid_eax(0x80000000);
556 c->extended_cpuid_level = xlvl; 574 c->extended_cpuid_level = xlvl;
@@ -576,6 +594,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
576 if (c->extended_cpuid_level >= 0x80000007) 594 if (c->extended_cpuid_level >= 0x80000007)
577 c->x86_power = cpuid_edx(0x80000007); 595 c->x86_power = cpuid_edx(0x80000007);
578 596
597 init_scattered_cpuid_features(c);
579} 598}
580 599
581static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) 600static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -731,7 +750,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
731 750
732 get_model_name(c); /* Default name */ 751 get_model_name(c); /* Default name */
733 752
734 init_scattered_cpuid_features(c);
735 detect_nopl(c); 753 detect_nopl(c);
736} 754}
737 755
@@ -1192,6 +1210,7 @@ void __cpuinit cpu_init(void)
1192 dbg_restore_debug_regs(); 1210 dbg_restore_debug_regs();
1193 1211
1194 fpu_init(); 1212 fpu_init();
1213 xsave_init();
1195 1214
1196 raw_local_save_flags(kernel_eflags); 1215 raw_local_save_flags(kernel_eflags);
1197 1216
@@ -1252,12 +1271,7 @@ void __cpuinit cpu_init(void)
1252 clear_used_math(); 1271 clear_used_math();
1253 mxcsr_feature_mask_init(); 1272 mxcsr_feature_mask_init();
1254 1273
1255 /* 1274 fpu_init();
1256 * Boot processor to setup the FP and extended state context info.
1257 */
1258 if (smp_processor_id() == boot_cpu_id)
1259 init_thread_xstate();
1260
1261 xsave_init(); 1275 xsave_init();
1262} 1276}
1263#endif 1277#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 1d3cddaa40e..246cd3afbb5 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -34,7 +34,6 @@
34#include <linux/compiler.h> 34#include <linux/compiler.h>
35#include <linux/dmi.h> 35#include <linux/dmi.h>
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <trace/events/power.h>
38 37
39#include <linux/acpi.h> 38#include <linux/acpi.h>
40#include <linux/io.h> 39#include <linux/io.h>
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
324 } 323 }
325 } 324 }
326 325
327 trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
328
329 switch (data->cpu_feature) { 326 switch (data->cpu_feature) {
330 case SYSTEM_INTEL_MSR_CAPABLE: 327 case SYSTEM_INTEL_MSR_CAPABLE:
331 cmd.type = SYSTEM_INTEL_MSR_CAPABLE; 328 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
@@ -351,7 +348,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
351 348
352 freqs.old = perf->states[perf->state].core_frequency * 1000; 349 freqs.old = perf->states[perf->state].core_frequency * 1000;
353 freqs.new = data->freq_table[next_state].frequency; 350 freqs.new = data->freq_table[next_state].frequency;
354 for_each_cpu(i, cmd.mask) { 351 for_each_cpu(i, policy->cpus) {
355 freqs.cpu = i; 352 freqs.cpu = i;
356 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); 353 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
357 } 354 }
@@ -367,7 +364,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
367 } 364 }
368 } 365 }
369 366
370 for_each_cpu(i, cmd.mask) { 367 for_each_cpu(i, policy->cpus) {
371 freqs.cpu = i; 368 freqs.cpu = i;
372 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 369 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
373 } 370 }
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 16e3483be9e..32974cf8423 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -169,12 +169,9 @@ static int gx_freq_mult[16] = {
169 * Low Level chipset interface * 169 * Low Level chipset interface *
170 ****************************************************************/ 170 ****************************************************************/
171static struct pci_device_id gx_chipset_tbl[] __initdata = { 171static struct pci_device_id gx_chipset_tbl[] __initdata = {
172 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, 172 { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), },
173 PCI_ANY_ID, PCI_ANY_ID }, 173 { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), },
174 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, 174 { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), },
175 PCI_ANY_ID, PCI_ANY_ID },
176 { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510,
177 PCI_ANY_ID, PCI_ANY_ID },
178 { 0, }, 175 { 0, },
179}; 176};
180 177
@@ -199,7 +196,7 @@ static __init struct pci_dev *gx_detect_chipset(void)
199 } 196 }
200 197
201 /* detect which companion chip is used */ 198 /* detect which companion chip is used */
202 while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { 199 for_each_pci_dev(gx_pci) {
203 if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) 200 if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL)
204 return gx_pci; 201 return gx_pci;
205 } 202 }
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 7e7eea4f826..03162dac627 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -426,7 +426,7 @@ static int guess_fsb(int mult)
426} 426}
427 427
428 428
429static int __init longhaul_get_ranges(void) 429static int __cpuinit longhaul_get_ranges(void)
430{ 430{
431 unsigned int i, j, k = 0; 431 unsigned int i, j, k = 0;
432 unsigned int ratio; 432 unsigned int ratio;
@@ -530,7 +530,7 @@ static int __init longhaul_get_ranges(void)
530} 530}
531 531
532 532
533static void __init longhaul_setup_voltagescaling(void) 533static void __cpuinit longhaul_setup_voltagescaling(void)
534{ 534{
535 union msr_longhaul longhaul; 535 union msr_longhaul longhaul;
536 struct mV_pos minvid, maxvid, vid; 536 struct mV_pos minvid, maxvid, vid;
@@ -784,7 +784,7 @@ static int longhaul_setup_southbridge(void)
784 return 0; 784 return 0;
785} 785}
786 786
787static int __init longhaul_cpu_init(struct cpufreq_policy *policy) 787static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy)
788{ 788{
789 struct cpuinfo_x86 *c = &cpu_data(0); 789 struct cpuinfo_x86 *c = &cpu_data(0);
790 char *cpuname = NULL; 790 char *cpuname = NULL;
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h
index e2360a469f7..cbf48fbca88 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h
@@ -56,7 +56,7 @@ union msr_longhaul {
56/* 56/*
57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0) 57 * VIA C3 Samuel 1 & Samuel 2 (stepping 0)
58 */ 58 */
59static const int __initdata samuel1_mults[16] = { 59static const int __cpuinitdata samuel1_mults[16] = {
60 -1, /* 0000 -> RESERVED */ 60 -1, /* 0000 -> RESERVED */
61 30, /* 0001 -> 3.0x */ 61 30, /* 0001 -> 3.0x */
62 40, /* 0010 -> 4.0x */ 62 40, /* 0010 -> 4.0x */
@@ -75,7 +75,7 @@ static const int __initdata samuel1_mults[16] = {
75 -1, /* 1111 -> RESERVED */ 75 -1, /* 1111 -> RESERVED */
76}; 76};
77 77
78static const int __initdata samuel1_eblcr[16] = { 78static const int __cpuinitdata samuel1_eblcr[16] = {
79 50, /* 0000 -> RESERVED */ 79 50, /* 0000 -> RESERVED */
80 30, /* 0001 -> 3.0x */ 80 30, /* 0001 -> 3.0x */
81 40, /* 0010 -> 4.0x */ 81 40, /* 0010 -> 4.0x */
@@ -97,7 +97,7 @@ static const int __initdata samuel1_eblcr[16] = {
97/* 97/*
98 * VIA C3 Samuel2 Stepping 1->15 98 * VIA C3 Samuel2 Stepping 1->15
99 */ 99 */
100static const int __initdata samuel2_eblcr[16] = { 100static const int __cpuinitdata samuel2_eblcr[16] = {
101 50, /* 0000 -> 5.0x */ 101 50, /* 0000 -> 5.0x */
102 30, /* 0001 -> 3.0x */ 102 30, /* 0001 -> 3.0x */
103 40, /* 0010 -> 4.0x */ 103 40, /* 0010 -> 4.0x */
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = {
119/* 119/*
120 * VIA C3 Ezra 120 * VIA C3 Ezra
121 */ 121 */
122static const int __initdata ezra_mults[16] = { 122static const int __cpuinitdata ezra_mults[16] = {
123 100, /* 0000 -> 10.0x */ 123 100, /* 0000 -> 10.0x */
124 30, /* 0001 -> 3.0x */ 124 30, /* 0001 -> 3.0x */
125 40, /* 0010 -> 4.0x */ 125 40, /* 0010 -> 4.0x */
@@ -138,7 +138,7 @@ static const int __initdata ezra_mults[16] = {
138 120, /* 1111 -> 12.0x */ 138 120, /* 1111 -> 12.0x */
139}; 139};
140 140
141static const int __initdata ezra_eblcr[16] = { 141static const int __cpuinitdata ezra_eblcr[16] = {
142 50, /* 0000 -> 5.0x */ 142 50, /* 0000 -> 5.0x */
143 30, /* 0001 -> 3.0x */ 143 30, /* 0001 -> 3.0x */
144 40, /* 0010 -> 4.0x */ 144 40, /* 0010 -> 4.0x */
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = {
160/* 160/*
161 * VIA C3 (Ezra-T) [C5M]. 161 * VIA C3 (Ezra-T) [C5M].
162 */ 162 */
163static const int __initdata ezrat_mults[32] = { 163static const int __cpuinitdata ezrat_mults[32] = {
164 100, /* 0000 -> 10.0x */ 164 100, /* 0000 -> 10.0x */
165 30, /* 0001 -> 3.0x */ 165 30, /* 0001 -> 3.0x */
166 40, /* 0010 -> 4.0x */ 166 40, /* 0010 -> 4.0x */
@@ -196,7 +196,7 @@ static const int __initdata ezrat_mults[32] = {
196 -1, /* 1111 -> RESERVED (12.0x) */ 196 -1, /* 1111 -> RESERVED (12.0x) */
197}; 197};
198 198
199static const int __initdata ezrat_eblcr[32] = { 199static const int __cpuinitdata ezrat_eblcr[32] = {
200 50, /* 0000 -> 5.0x */ 200 50, /* 0000 -> 5.0x */
201 30, /* 0001 -> 3.0x */ 201 30, /* 0001 -> 3.0x */
202 40, /* 0010 -> 4.0x */ 202 40, /* 0010 -> 4.0x */
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = {
235/* 235/*
236 * VIA C3 Nehemiah */ 236 * VIA C3 Nehemiah */
237 237
238static const int __initdata nehemiah_mults[32] = { 238static const int __cpuinitdata nehemiah_mults[32] = {
239 100, /* 0000 -> 10.0x */ 239 100, /* 0000 -> 10.0x */
240 -1, /* 0001 -> 16.0x */ 240 -1, /* 0001 -> 16.0x */
241 40, /* 0010 -> 4.0x */ 241 40, /* 0010 -> 4.0x */
@@ -270,7 +270,7 @@ static const int __initdata nehemiah_mults[32] = {
270 -1, /* 1111 -> 12.0x */ 270 -1, /* 1111 -> 12.0x */
271}; 271};
272 272
273static const int __initdata nehemiah_eblcr[32] = { 273static const int __cpuinitdata nehemiah_eblcr[32] = {
274 50, /* 0000 -> 5.0x */ 274 50, /* 0000 -> 5.0x */
275 160, /* 0001 -> 16.0x */ 275 160, /* 0001 -> 16.0x */
276 40, /* 0010 -> 4.0x */ 276 40, /* 0010 -> 4.0x */
@@ -315,7 +315,7 @@ struct mV_pos {
315 unsigned short pos; 315 unsigned short pos;
316}; 316};
317 317
318static const struct mV_pos __initdata vrm85_mV[32] = { 318static const struct mV_pos __cpuinitdata vrm85_mV[32] = {
319 {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, 319 {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2},
320 {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, 320 {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26},
321 {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, 321 {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18},
@@ -326,14 +326,14 @@ static const struct mV_pos __initdata vrm85_mV[32] = {
326 {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} 326 {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11}
327}; 327};
328 328
329static const unsigned char __initdata mV_vrm85[32] = { 329static const unsigned char __cpuinitdata mV_vrm85[32] = {
330 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, 330 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11,
331 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, 331 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d,
332 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, 332 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19,
333 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 333 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15
334}; 334};
335 335
336static const struct mV_pos __initdata mobilevrm_mV[32] = { 336static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = {
337 {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, 337 {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28},
338 {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, 338 {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24},
339 {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, 339 {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20},
@@ -344,7 +344,7 @@ static const struct mV_pos __initdata mobilevrm_mV[32] = {
344 {675, 3}, {650, 2}, {625, 1}, {600, 0} 344 {675, 3}, {650, 2}, {625, 1}, {600, 0}
345}; 345};
346 346
347static const unsigned char __initdata mV_mobilevrm[32] = { 347static const unsigned char __cpuinitdata mV_mobilevrm[32] = {
348 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 348 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18,
349 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 349 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
350 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 350 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index e7b559d74c5..fc09f142d94 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -165,8 +165,8 @@ static unsigned int longrun_get(unsigned int cpu)
165 * TMTA rules: 165 * TMTA rules:
166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) 166 * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq)
167 */ 167 */
168static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, 168static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq,
169 unsigned int *high_freq) 169 unsigned int *high_freq)
170{ 170{
171 u32 msr_lo, msr_hi; 171 u32 msr_lo, msr_hi;
172 u32 save_lo, save_hi; 172 u32 save_lo, save_hi;
@@ -258,7 +258,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
258} 258}
259 259
260 260
261static int __init longrun_cpu_init(struct cpufreq_policy *policy) 261static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy)
262{ 262{
263 int result = 0; 263 int result = 0;
264 264
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 7b8a8ba67b0..bd1cac747f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -178,13 +178,8 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
178 } 178 }
179 } 179 }
180 180
181 if (c->x86 != 0xF) { 181 if (c->x86 != 0xF)
182 if (!cpu_has(c, X86_FEATURE_EST))
183 printk(KERN_WARNING PFX "Unknown CPU. "
184 "Please send an e-mail to "
185 "<cpufreq@vger.kernel.org>\n");
186 return 0; 182 return 0;
187 }
188 183
189 /* on P-4s, the TSC runs with constant frequency independent whether 184 /* on P-4s, the TSC runs with constant frequency independent whether
190 * throttling is active or not. */ 185 * throttling is active or not. */
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index a36de5bbb62..994230d4dc4 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -110,7 +110,7 @@ struct pcc_cpu {
110 u32 output_offset; 110 u32 output_offset;
111}; 111};
112 112
113static struct pcc_cpu *pcc_cpu_info; 113static struct pcc_cpu __percpu *pcc_cpu_info;
114 114
115static int pcc_cpufreq_verify(struct cpufreq_policy *policy) 115static int pcc_cpufreq_verify(struct cpufreq_policy *policy)
116{ 116{
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 9a97116f89e..4a45fd6e41b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -569,7 +569,7 @@ static int powernow_verify(struct cpufreq_policy *policy)
569 * We will then get the same kind of behaviour already tested under 569 * We will then get the same kind of behaviour already tested under
570 * the "well-known" other OS. 570 * the "well-known" other OS.
571 */ 571 */
572static int __init fixup_sgtc(void) 572static int __cpuinit fixup_sgtc(void)
573{ 573{
574 unsigned int sgtc; 574 unsigned int sgtc;
575 unsigned int m; 575 unsigned int m;
@@ -603,7 +603,7 @@ static unsigned int powernow_get(unsigned int cpu)
603} 603}
604 604
605 605
606static int __init acer_cpufreq_pst(const struct dmi_system_id *d) 606static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d)
607{ 607{
608 printk(KERN_WARNING PFX 608 printk(KERN_WARNING PFX
609 "%s laptop with broken PST tables in BIOS detected.\n", 609 "%s laptop with broken PST tables in BIOS detected.\n",
@@ -621,7 +621,7 @@ static int __init acer_cpufreq_pst(const struct dmi_system_id *d)
621 * A BIOS update is all that can save them. 621 * A BIOS update is all that can save them.
622 * Mention this, and disable cpufreq. 622 * Mention this, and disable cpufreq.
623 */ 623 */
624static struct dmi_system_id __initdata powernow_dmi_table[] = { 624static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = {
625 { 625 {
626 .callback = acer_cpufreq_pst, 626 .callback = acer_cpufreq_pst,
627 .ident = "Acer Aspire", 627 .ident = "Acer Aspire",
@@ -633,7 +633,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = {
633 { } 633 { }
634}; 634};
635 635
636static int __init powernow_cpu_init(struct cpufreq_policy *policy) 636static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy)
637{ 637{
638 union msr_fidvidstatus fidvidstatus; 638 union msr_fidvidstatus fidvidstatus;
639 int result; 639 int result;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 3e90cce3dc8..491977baf6c 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -9,7 +9,7 @@
9 * Based on the powernow-k7.c module written by Dave Jones. 9 * Based on the powernow-k7.c module written by Dave Jones.
10 * (C) 2003 Dave Jones on behalf of SuSE Labs 10 * (C) 2003 Dave Jones on behalf of SuSE Labs
11 * (C) 2004 Dominik Brodowski <linux@brodo.de> 11 * (C) 2004 Dominik Brodowski <linux@brodo.de>
12 * (C) 2004 Pavel Machek <pavel@suse.cz> 12 * (C) 2004 Pavel Machek <pavel@ucw.cz>
13 * Licensed under the terms of the GNU GPL License version 2. 13 * Licensed under the terms of the GNU GPL License version 2.
14 * Based upon datasheets & sample CPUs kindly provided by AMD. 14 * Based upon datasheets & sample CPUs kindly provided by AMD.
15 * 15 *
@@ -806,6 +806,8 @@ static int find_psb_table(struct powernow_k8_data *data)
806 * www.amd.com 806 * www.amd.com
807 */ 807 */
808 printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); 808 printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n");
809 printk(KERN_ERR PFX "Make sure that your BIOS is up to date"
810 " and Cool'N'Quiet support is enabled in BIOS setup\n");
809 return -ENODEV; 811 return -ENODEV;
810} 812}
811 813
@@ -910,8 +912,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
910{ 912{
911 int i; 913 int i;
912 u32 hi = 0, lo = 0; 914 u32 hi = 0, lo = 0;
913 rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); 915 rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
914 data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; 916 data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT;
915 917
916 for (i = 0; i < data->acpi_data.state_count; i++) { 918 for (i = 0; i < data->acpi_data.state_count; i++) {
917 u32 index; 919 u32 index;
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index dd531cc56a8..8095f8611f8 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -34,6 +34,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
34{ 34{
35 &x86_hyper_vmware, 35 &x86_hyper_vmware,
36 &x86_hyper_ms_hyperv, 36 &x86_hyper_ms_hyperv,
37#ifdef CONFIG_XEN_PVHVM
38 &x86_hyper_xen_hvm,
39#endif
37}; 40};
38 41
39const struct hypervisor_x86 *x86_hyper; 42const struct hypervisor_x86 *x86_hyper;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 33eae2062cf..898c2f4eab8 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -347,8 +347,8 @@ static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
347 return l3; 347 return l3;
348} 348}
349 349
350static void __cpuinit 350static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
351amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 351 int index)
352{ 352{
353 int node; 353 int node;
354 354
@@ -396,20 +396,39 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
396 this_leaf->l3 = l3_caches[node]; 396 this_leaf->l3 = l3_caches[node];
397} 397}
398 398
399/*
400 * check whether a slot used for disabling an L3 index is occupied.
401 * @l3: L3 cache descriptor
402 * @slot: slot number (0..1)
403 *
404 * @returns: the disabled index if used or negative value if slot free.
405 */
406int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
407{
408 unsigned int reg = 0;
409
410 pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
411
412 /* check whether this slot is activated already */
413 if (reg & (3UL << 30))
414 return reg & 0xfff;
415
416 return -1;
417}
418
399static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, 419static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
400 unsigned int slot) 420 unsigned int slot)
401{ 421{
402 struct pci_dev *dev = this_leaf->l3->dev; 422 int index;
403 unsigned int reg = 0;
404 423
405 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 424 if (!this_leaf->l3 || !this_leaf->l3->can_disable)
406 return -EINVAL; 425 return -EINVAL;
407 426
408 if (!dev) 427 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
409 return -EINVAL; 428 if (index >= 0)
429 return sprintf(buf, "%d\n", index);
410 430
411 pci_read_config_dword(dev, 0x1BC + slot * 4, &reg); 431 return sprintf(buf, "FREE\n");
412 return sprintf(buf, "0x%08x\n", reg);
413} 432}
414 433
415#define SHOW_CACHE_DISABLE(slot) \ 434#define SHOW_CACHE_DISABLE(slot) \
@@ -451,37 +470,74 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
451 } 470 }
452} 471}
453 472
454 473/*
455static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, 474 * disable a L3 cache index by using a disable-slot
456 const char *buf, size_t count, 475 *
457 unsigned int slot) 476 * @l3: L3 cache descriptor
477 * @cpu: A CPU on the node containing the L3 cache
478 * @slot: slot number (0..1)
479 * @index: index to disable
480 *
481 * @return: 0 on success, error status on failure
482 */
483int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
484 unsigned long index)
458{ 485{
459 struct pci_dev *dev = this_leaf->l3->dev; 486 int ret = 0;
460 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
461 unsigned long val = 0;
462 487
463#define SUBCACHE_MASK (3UL << 20) 488#define SUBCACHE_MASK (3UL << 20)
464#define SUBCACHE_INDEX 0xfff 489#define SUBCACHE_INDEX 0xfff
465 490
466 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 491 /*
492 * check whether this slot is already used or
493 * the index is already disabled
494 */
495 ret = amd_get_l3_disable_slot(l3, slot);
496 if (ret >= 0)
467 return -EINVAL; 497 return -EINVAL;
468 498
499 /*
500 * check whether the other slot has disabled the
501 * same index already
502 */
503 if (index == amd_get_l3_disable_slot(l3, !slot))
504 return -EINVAL;
505
506 /* do not allow writes outside of allowed bits */
507 if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
508 ((index & SUBCACHE_INDEX) > l3->indices))
509 return -EINVAL;
510
511 amd_l3_disable_index(l3, cpu, slot, index);
512
513 return 0;
514}
515
516static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
517 const char *buf, size_t count,
518 unsigned int slot)
519{
520 unsigned long val = 0;
521 int cpu, err = 0;
522
469 if (!capable(CAP_SYS_ADMIN)) 523 if (!capable(CAP_SYS_ADMIN))
470 return -EPERM; 524 return -EPERM;
471 525
472 if (!dev) 526 if (!this_leaf->l3 || !this_leaf->l3->can_disable)
473 return -EINVAL; 527 return -EINVAL;
474 528
475 if (strict_strtoul(buf, 10, &val) < 0) 529 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
476 return -EINVAL;
477 530
478 /* do not allow writes outside of allowed bits */ 531 if (strict_strtoul(buf, 10, &val) < 0)
479 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
480 ((val & SUBCACHE_INDEX) > this_leaf->l3->indices))
481 return -EINVAL; 532 return -EINVAL;
482 533
483 amd_l3_disable_index(this_leaf->l3, cpu, slot, val); 534 err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
484 535 if (err) {
536 if (err == -EEXIST)
537 printk(KERN_WARNING "L3 disable slot %d in use!\n",
538 slot);
539 return err;
540 }
485 return count; 541 return count;
486} 542}
487 543
@@ -502,7 +558,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
502 558
503#else /* CONFIG_CPU_SUP_AMD */ 559#else /* CONFIG_CPU_SUP_AMD */
504static void __cpuinit 560static void __cpuinit
505amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
506{ 562{
507}; 563};
508#endif /* CONFIG_CPU_SUP_AMD */ 564#endif /* CONFIG_CPU_SUP_AMD */
@@ -518,7 +574,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
518 574
519 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 575 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
520 amd_cpuid4(index, &eax, &ebx, &ecx); 576 amd_cpuid4(index, &eax, &ebx, &ecx);
521 amd_check_l3_disable(index, this_leaf); 577 amd_check_l3_disable(this_leaf, index);
522 } else { 578 } else {
523 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 579 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
524 } 580 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 18cc4256225..ed41562909f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -51,7 +51,7 @@
51static DEFINE_MUTEX(mce_read_mutex); 51static DEFINE_MUTEX(mce_read_mutex);
52 52
53#define rcu_dereference_check_mce(p) \ 53#define rcu_dereference_check_mce(p) \
54 rcu_dereference_check((p), \ 54 rcu_dereference_index_check((p), \
55 rcu_read_lock_sched_held() || \ 55 rcu_read_lock_sched_held() || \
56 lockdep_is_held(&mce_read_mutex)) 56 lockdep_is_held(&mce_read_mutex))
57 57
@@ -107,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
107static int default_decode_mce(struct notifier_block *nb, unsigned long val, 107static int default_decode_mce(struct notifier_block *nb, unsigned long val,
108 void *data) 108 void *data)
109{ 109{
110 pr_emerg("No human readable MCE decoding support on this CPU type.\n"); 110 pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
111 pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); 111 pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
112 112
113 return NOTIFY_STOP; 113 return NOTIFY_STOP;
114} 114}
@@ -211,11 +211,11 @@ void mce_log(struct mce *mce)
211 211
212static void print_mce(struct mce *m) 212static void print_mce(struct mce *m)
213{ 213{
214 pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", 214 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
215 m->extcpu, m->mcgstatus, m->bank, m->status); 215 m->extcpu, m->mcgstatus, m->bank, m->status);
216 216
217 if (m->ip) { 217 if (m->ip) {
218 pr_emerg("RIP%s %02x:<%016Lx> ", 218 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
219 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 219 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
220 m->cs, m->ip); 220 m->cs, m->ip);
221 221
@@ -224,14 +224,14 @@ static void print_mce(struct mce *m)
224 pr_cont("\n"); 224 pr_cont("\n");
225 } 225 }
226 226
227 pr_emerg("TSC %llx ", m->tsc); 227 pr_emerg(HW_ERR "TSC %llx ", m->tsc);
228 if (m->addr) 228 if (m->addr)
229 pr_cont("ADDR %llx ", m->addr); 229 pr_cont("ADDR %llx ", m->addr);
230 if (m->misc) 230 if (m->misc)
231 pr_cont("MISC %llx ", m->misc); 231 pr_cont("MISC %llx ", m->misc);
232 232
233 pr_cont("\n"); 233 pr_cont("\n");
234 pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", 234 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
235 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); 235 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
236 236
237 /* 237 /*
@@ -241,16 +241,6 @@ static void print_mce(struct mce *m)
241 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); 241 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
242} 242}
243 243
244static void print_mce_head(void)
245{
246 pr_emerg("\nHARDWARE ERROR\n");
247}
248
249static void print_mce_tail(void)
250{
251 pr_emerg("This is not a software problem!\n");
252}
253
254#define PANIC_TIMEOUT 5 /* 5 seconds */ 244#define PANIC_TIMEOUT 5 /* 5 seconds */
255 245
256static atomic_t mce_paniced; 246static atomic_t mce_paniced;
@@ -291,7 +281,6 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
291 if (atomic_inc_return(&mce_fake_paniced) > 1) 281 if (atomic_inc_return(&mce_fake_paniced) > 1)
292 return; 282 return;
293 } 283 }
294 print_mce_head();
295 /* First print corrected ones that are still unlogged */ 284 /* First print corrected ones that are still unlogged */
296 for (i = 0; i < MCE_LOG_LEN; i++) { 285 for (i = 0; i < MCE_LOG_LEN; i++) {
297 struct mce *m = &mcelog.entry[i]; 286 struct mce *m = &mcelog.entry[i];
@@ -322,16 +311,15 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
322 apei_err = apei_write_mce(final); 311 apei_err = apei_write_mce(final);
323 } 312 }
324 if (cpu_missing) 313 if (cpu_missing)
325 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); 314 pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
326 print_mce_tail();
327 if (exp) 315 if (exp)
328 printk(KERN_EMERG "Machine check: %s\n", exp); 316 pr_emerg(HW_ERR "Machine check: %s\n", exp);
329 if (!fake_panic) { 317 if (!fake_panic) {
330 if (panic_timeout == 0) 318 if (panic_timeout == 0)
331 panic_timeout = mce_panic_timeout; 319 panic_timeout = mce_panic_timeout;
332 panic(msg); 320 panic(msg);
333 } else 321 } else
334 printk(KERN_EMERG "Fake kernel panic: %s\n", msg); 322 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
335} 323}
336 324
337/* Support code for software error injection */ 325/* Support code for software error injection */
@@ -600,6 +588,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
600 */ 588 */
601 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { 589 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
602 mce_log(&m); 590 mce_log(&m);
591 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
603 add_taint(TAINT_MACHINE_CHECK); 592 add_taint(TAINT_MACHINE_CHECK);
604 } 593 }
605 594
@@ -1220,7 +1209,7 @@ int mce_notify_irq(void)
1220 schedule_work(&mce_trigger_work); 1209 schedule_work(&mce_trigger_work);
1221 1210
1222 if (__ratelimit(&ratelimit)) 1211 if (__ratelimit(&ratelimit))
1223 printk(KERN_INFO "Machine check events logged\n"); 1212 pr_info(HW_ERR "Machine check events logged\n");
1224 1213
1225 return 1; 1214 return 1;
1226 } 1215 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 62b48e40920..6fcd0936194 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot)
95 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 95 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
96 96
97 /* Already owned by someone else? */ 97 /* Already owned by someone else? */
98 if (val & CMCI_EN) { 98 if (val & MCI_CTL2_CMCI_EN) {
99 if (test_and_clear_bit(i, owned) && !boot) 99 if (test_and_clear_bit(i, owned) && !boot)
100 print_update("SHD", &hdr, i); 100 print_update("SHD", &hdr, i);
101 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 101 __clear_bit(i, __get_cpu_var(mce_poll_banks));
102 continue; 102 continue;
103 } 103 }
104 104
105 val |= CMCI_EN | CMCI_THRESHOLD; 105 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
106 val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
106 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 107 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
107 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 108 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
108 109
109 /* Did the enable bit stick? -- the bank supports CMCI */ 110 /* Did the enable bit stick? -- the bank supports CMCI */
110 if (val & CMCI_EN) { 111 if (val & MCI_CTL2_CMCI_EN) {
111 if (!test_and_set_bit(i, owned) && !boot) 112 if (!test_and_set_bit(i, owned) && !boot)
112 print_update("CMCI", &hdr, i); 113 print_update("CMCI", &hdr, i);
113 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 114 __clear_bit(i, __get_cpu_var(mce_poll_banks));
@@ -155,7 +156,7 @@ void cmci_clear(void)
155 continue; 156 continue;
156 /* Disable CMCI */ 157 /* Disable CMCI */
157 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 158 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
158 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); 159 val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
159 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 160 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
160 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 161 __clear_bit(i, __get_cpu_var(mce_banks_owned));
161 } 162 }
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index e1a0a3bf971..c2a8b26d4fe 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -34,15 +34,25 @@
34/* How long to wait between reporting thermal events */ 34/* How long to wait between reporting thermal events */
35#define CHECK_INTERVAL (300 * HZ) 35#define CHECK_INTERVAL (300 * HZ)
36 36
37#define THERMAL_THROTTLING_EVENT 0
38#define POWER_LIMIT_EVENT 1
39
37/* 40/*
38 * Current thermal throttling state: 41 * Current thermal event state:
39 */ 42 */
40struct thermal_state { 43struct _thermal_state {
41 bool is_throttled; 44 bool new_event;
42 45 int event;
43 u64 next_check; 46 u64 next_check;
44 unsigned long throttle_count; 47 unsigned long count;
45 unsigned long last_throttle_count; 48 unsigned long last_count;
49};
50
51struct thermal_state {
52 struct _thermal_state core_throttle;
53 struct _thermal_state core_power_limit;
54 struct _thermal_state package_throttle;
55 struct _thermal_state package_power_limit;
46}; 56};
47 57
48static DEFINE_PER_CPU(struct thermal_state, thermal_state); 58static DEFINE_PER_CPU(struct thermal_state, thermal_state);
@@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly;
53 63
54#ifdef CONFIG_SYSFS 64#ifdef CONFIG_SYSFS
55#define define_therm_throt_sysdev_one_ro(_name) \ 65#define define_therm_throt_sysdev_one_ro(_name) \
56 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) 66 static SYSDEV_ATTR(_name, 0444, \
67 therm_throt_sysdev_show_##_name, \
68 NULL) \
57 69
58#define define_therm_throt_sysdev_show_func(name) \ 70#define define_therm_throt_sysdev_show_func(event, name) \
59 \ 71 \
60static ssize_t therm_throt_sysdev_show_##name( \ 72static ssize_t therm_throt_sysdev_show_##event##_##name( \
61 struct sys_device *dev, \ 73 struct sys_device *dev, \
62 struct sysdev_attribute *attr, \ 74 struct sysdev_attribute *attr, \
63 char *buf) \ 75 char *buf) \
@@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name( \
66 ssize_t ret; \ 78 ssize_t ret; \
67 \ 79 \
68 preempt_disable(); /* CPU hotplug */ \ 80 preempt_disable(); /* CPU hotplug */ \
69 if (cpu_online(cpu)) \ 81 if (cpu_online(cpu)) { \
70 ret = sprintf(buf, "%lu\n", \ 82 ret = sprintf(buf, "%lu\n", \
71 per_cpu(thermal_state, cpu).name); \ 83 per_cpu(thermal_state, cpu).event.name); \
72 else \ 84 } else \
73 ret = 0; \ 85 ret = 0; \
74 preempt_enable(); \ 86 preempt_enable(); \
75 \ 87 \
76 return ret; \ 88 return ret; \
77} 89}
78 90
79define_therm_throt_sysdev_show_func(throttle_count); 91define_therm_throt_sysdev_show_func(core_throttle, count);
80define_therm_throt_sysdev_one_ro(throttle_count); 92define_therm_throt_sysdev_one_ro(core_throttle_count);
93
94define_therm_throt_sysdev_show_func(core_power_limit, count);
95define_therm_throt_sysdev_one_ro(core_power_limit_count);
96
97define_therm_throt_sysdev_show_func(package_throttle, count);
98define_therm_throt_sysdev_one_ro(package_throttle_count);
99
100define_therm_throt_sysdev_show_func(package_power_limit, count);
101define_therm_throt_sysdev_one_ro(package_power_limit_count);
81 102
82static struct attribute *thermal_throttle_attrs[] = { 103static struct attribute *thermal_throttle_attrs[] = {
83 &attr_throttle_count.attr, 104 &attr_core_throttle_count.attr,
84 NULL 105 NULL
85}; 106};
86 107
87static struct attribute_group thermal_throttle_attr_group = { 108static struct attribute_group thermal_attr_group = {
88 .attrs = thermal_throttle_attrs, 109 .attrs = thermal_throttle_attrs,
89 .name = "thermal_throttle" 110 .name = "thermal_throttle"
90}; 111};
91#endif /* CONFIG_SYSFS */ 112#endif /* CONFIG_SYSFS */
92 113
114#define CORE_LEVEL 0
115#define PACKAGE_LEVEL 1
116
93/*** 117/***
94 * therm_throt_process - Process thermal throttling event from interrupt 118 * therm_throt_process - Process thermal throttling event from interrupt
95 * @curr: Whether the condition is current or not (boolean), since the 119 * @curr: Whether the condition is current or not (boolean), since the
@@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = {
106 * 1 : Event should be logged further, and a message has been 130 * 1 : Event should be logged further, and a message has been
107 * printed to the syslog. 131 * printed to the syslog.
108 */ 132 */
109static int therm_throt_process(bool is_throttled) 133static int therm_throt_process(bool new_event, int event, int level)
110{ 134{
111 struct thermal_state *state; 135 struct _thermal_state *state;
112 unsigned int this_cpu; 136 unsigned int this_cpu = smp_processor_id();
113 bool was_throttled; 137 bool old_event;
114 u64 now; 138 u64 now;
139 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
115 140
116 this_cpu = smp_processor_id();
117 now = get_jiffies_64(); 141 now = get_jiffies_64();
118 state = &per_cpu(thermal_state, this_cpu); 142 if (level == CORE_LEVEL) {
143 if (event == THERMAL_THROTTLING_EVENT)
144 state = &pstate->core_throttle;
145 else if (event == POWER_LIMIT_EVENT)
146 state = &pstate->core_power_limit;
147 else
148 return 0;
149 } else if (level == PACKAGE_LEVEL) {
150 if (event == THERMAL_THROTTLING_EVENT)
151 state = &pstate->package_throttle;
152 else if (event == POWER_LIMIT_EVENT)
153 state = &pstate->package_power_limit;
154 else
155 return 0;
156 } else
157 return 0;
119 158
120 was_throttled = state->is_throttled; 159 old_event = state->new_event;
121 state->is_throttled = is_throttled; 160 state->new_event = new_event;
122 161
123 if (is_throttled) 162 if (new_event)
124 state->throttle_count++; 163 state->count++;
125 164
126 if (time_before64(now, state->next_check) && 165 if (time_before64(now, state->next_check) &&
127 state->throttle_count != state->last_throttle_count) 166 state->count != state->last_count)
128 return 0; 167 return 0;
129 168
130 state->next_check = now + CHECK_INTERVAL; 169 state->next_check = now + CHECK_INTERVAL;
131 state->last_throttle_count = state->throttle_count; 170 state->last_count = state->count;
132 171
133 /* if we just entered the thermal event */ 172 /* if we just entered the thermal event */
134 if (is_throttled) { 173 if (new_event) {
135 printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); 174 if (event == THERMAL_THROTTLING_EVENT)
175 printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
176 this_cpu,
177 level == CORE_LEVEL ? "Core" : "Package",
178 state->count);
179 else
180 printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
181 this_cpu,
182 level == CORE_LEVEL ? "Core" : "Package",
183 state->count);
136 184
137 add_taint(TAINT_MACHINE_CHECK); 185 add_taint(TAINT_MACHINE_CHECK);
138 return 1; 186 return 1;
139 } 187 }
140 if (was_throttled) { 188 if (old_event) {
141 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); 189 if (event == THERMAL_THROTTLING_EVENT)
190 printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
191 this_cpu,
192 level == CORE_LEVEL ? "Core" : "Package");
193 else
194 printk(KERN_INFO "CPU%d: %s power limit normal\n",
195 this_cpu,
196 level == CORE_LEVEL ? "Core" : "Package");
142 return 1; 197 return 1;
143 } 198 }
144 199
@@ -149,13 +204,32 @@ static int therm_throt_process(bool is_throttled)
149/* Add/Remove thermal_throttle interface for CPU device: */ 204/* Add/Remove thermal_throttle interface for CPU device: */
150static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) 205static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
151{ 206{
152 return sysfs_create_group(&sys_dev->kobj, 207 int err;
153 &thermal_throttle_attr_group); 208 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
209
210 err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group);
211 if (err)
212 return err;
213
214 if (cpu_has(c, X86_FEATURE_PLN))
215 err = sysfs_add_file_to_group(&sys_dev->kobj,
216 &attr_core_power_limit_count.attr,
217 thermal_attr_group.name);
218 if (cpu_has(c, X86_FEATURE_PTS))
219 err = sysfs_add_file_to_group(&sys_dev->kobj,
220 &attr_package_throttle_count.attr,
221 thermal_attr_group.name);
222 if (cpu_has(c, X86_FEATURE_PLN))
223 err = sysfs_add_file_to_group(&sys_dev->kobj,
224 &attr_package_power_limit_count.attr,
225 thermal_attr_group.name);
226
227 return err;
154} 228}
155 229
156static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) 230static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
157{ 231{
158 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); 232 sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group);
159} 233}
160 234
161/* Mutex protecting device creation against CPU hotplug: */ 235/* Mutex protecting device creation against CPU hotplug: */
@@ -226,14 +300,50 @@ device_initcall(thermal_throttle_init_device);
226 300
227#endif /* CONFIG_SYSFS */ 301#endif /* CONFIG_SYSFS */
228 302
303/*
304 * Set up the most two significant bit to notify mce log that this thermal
305 * event type.
306 * This is a temp solution. May be changed in the future with mce log
307 * infrasture.
308 */
309#define CORE_THROTTLED (0)
310#define CORE_POWER_LIMIT ((__u64)1 << 62)
311#define PACKAGE_THROTTLED ((__u64)2 << 62)
312#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
313
229/* Thermal transition interrupt handler */ 314/* Thermal transition interrupt handler */
230static void intel_thermal_interrupt(void) 315static void intel_thermal_interrupt(void)
231{ 316{
232 __u64 msr_val; 317 __u64 msr_val;
318 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
233 319
234 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 320 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
235 if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) 321
236 mce_log_therm_throt_event(msr_val); 322 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
323 THERMAL_THROTTLING_EVENT,
324 CORE_LEVEL) != 0)
325 mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
326
327 if (cpu_has(c, X86_FEATURE_PLN))
328 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
329 POWER_LIMIT_EVENT,
330 CORE_LEVEL) != 0)
331 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
332
333 if (cpu_has(c, X86_FEATURE_PTS)) {
334 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
335 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
336 THERMAL_THROTTLING_EVENT,
337 PACKAGE_LEVEL) != 0)
338 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
339 if (cpu_has(c, X86_FEATURE_PLN))
340 if (therm_throt_process(msr_val &
341 PACKAGE_THERM_STATUS_POWER_LIMIT,
342 POWER_LIMIT_EVENT,
343 PACKAGE_LEVEL) != 0)
344 mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
345 | msr_val);
346 }
237} 347}
238 348
239static void unexpected_thermal_interrupt(void) 349static void unexpected_thermal_interrupt(void)
@@ -335,8 +445,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
335 apic_write(APIC_LVTTHMR, h); 445 apic_write(APIC_LVTTHMR, h);
336 446
337 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); 447 rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
338 wrmsr(MSR_IA32_THERM_INTERRUPT, 448 if (cpu_has(c, X86_FEATURE_PLN))
339 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); 449 wrmsr(MSR_IA32_THERM_INTERRUPT,
450 l | (THERM_INT_LOW_ENABLE
451 | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
452 else
453 wrmsr(MSR_IA32_THERM_INTERRUPT,
454 l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
455
456 if (cpu_has(c, X86_FEATURE_PTS)) {
457 rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
458 if (cpu_has(c, X86_FEATURE_PLN))
459 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
460 l | (PACKAGE_THERM_INT_LOW_ENABLE
461 | PACKAGE_THERM_INT_HIGH_ENABLE
462 | PACKAGE_THERM_INT_PLN_ENABLE), h);
463 else
464 wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
465 l | (PACKAGE_THERM_INT_LOW_ENABLE
466 | PACKAGE_THERM_INT_HIGH_ENABLE), h);
467 }
340 468
341 smp_thermal_vector = intel_thermal_interrupt; 469 smp_thermal_vector = intel_thermal_interrupt;
342 470
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 16f41bbe46b..d944bf6c50e 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -18,6 +18,7 @@
18#include <asm/mshyperv.h> 18#include <asm/mshyperv.h>
19 19
20struct ms_hyperv_info ms_hyperv; 20struct ms_hyperv_info ms_hyperv;
21EXPORT_SYMBOL_GPL(ms_hyperv);
21 22
22static bool __init ms_hyperv_platform(void) 23static bool __init ms_hyperv_platform(void)
23{ 24{
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 06130b52f01..c5f59d07142 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -632,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i)
632 unsigned long gran_base, chunk_base, lose_base; 632 unsigned long gran_base, chunk_base, lose_base;
633 char gran_factor, chunk_factor, lose_factor; 633 char gran_factor, chunk_factor, lose_factor;
634 634
635 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 635 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor);
636 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 636 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor);
637 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), 637 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor);
638 638
639 pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", 639 pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
640 result[i].bad ? "*BAD*" : " ", 640 result[i].bad ? "*BAD*" : " ",
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fd31a441c61..7d28d7d0388 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -433,13 +433,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
433{ 433{
434 unsigned int mask_lo, mask_hi, base_lo, base_hi; 434 unsigned int mask_lo, mask_hi, base_lo, base_hi;
435 unsigned int tmp, hi; 435 unsigned int tmp, hi;
436 int cpu;
437 436
438 /* 437 /*
439 * get_mtrr doesn't need to update mtrr_state, also it could be called 438 * get_mtrr doesn't need to update mtrr_state, also it could be called
440 * from any cpu, so try to print it out directly. 439 * from any cpu, so try to print it out directly.
441 */ 440 */
442 cpu = get_cpu(); 441 get_cpu();
443 442
444 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); 443 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
445 444
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 79556bd9b60..01c0f3ee6cc 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -35,6 +35,7 @@
35 35
36#include <linux/types.h> /* FIXME: kvm_para.h needs this */ 36#include <linux/types.h> /* FIXME: kvm_para.h needs this */
37 37
38#include <linux/stop_machine.h>
38#include <linux/kvm_para.h> 39#include <linux/kvm_para.h>
39#include <linux/uaccess.h> 40#include <linux/uaccess.h>
40#include <linux/module.h> 41#include <linux/module.h>
@@ -143,22 +144,28 @@ struct set_mtrr_data {
143 mtrr_type smp_type; 144 mtrr_type smp_type;
144}; 145};
145 146
147static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);
148
146/** 149/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs. 150 * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.
148 * @info: pointer to mtrr configuration data 151 * @info: pointer to mtrr configuration data
149 * 152 *
150 * Returns nothing. 153 * Returns nothing.
151 */ 154 */
152static void ipi_handler(void *info) 155static int mtrr_work_handler(void *info)
153{ 156{
154#ifdef CONFIG_SMP 157#ifdef CONFIG_SMP
155 struct set_mtrr_data *data = info; 158 struct set_mtrr_data *data = info;
156 unsigned long flags; 159 unsigned long flags;
157 160
161 atomic_dec(&data->count);
162 while (!atomic_read(&data->gate))
163 cpu_relax();
164
158 local_irq_save(flags); 165 local_irq_save(flags);
159 166
160 atomic_dec(&data->count); 167 atomic_dec(&data->count);
161 while (!atomic_read(&data->gate)) 168 while (atomic_read(&data->gate))
162 cpu_relax(); 169 cpu_relax();
163 170
164 /* The master has cleared me to execute */ 171 /* The master has cleared me to execute */
@@ -173,12 +180,13 @@ static void ipi_handler(void *info)
173 } 180 }
174 181
175 atomic_dec(&data->count); 182 atomic_dec(&data->count);
176 while (atomic_read(&data->gate)) 183 while (!atomic_read(&data->gate))
177 cpu_relax(); 184 cpu_relax();
178 185
179 atomic_dec(&data->count); 186 atomic_dec(&data->count);
180 local_irq_restore(flags); 187 local_irq_restore(flags);
181#endif 188#endif
189 return 0;
182} 190}
183 191
184static inline int types_compatible(mtrr_type type1, mtrr_type type2) 192static inline int types_compatible(mtrr_type type1, mtrr_type type2)
@@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
198 * 206 *
199 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: 207 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
200 * 208 *
201 * 1. Send IPI to do the following: 209 * 1. Queue work to do the following on all processors:
202 * 2. Disable Interrupts 210 * 2. Disable Interrupts
203 * 3. Wait for all procs to do so 211 * 3. Wait for all procs to do so
204 * 4. Enter no-fill cache mode 212 * 4. Enter no-fill cache mode
@@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
215 * 15. Enable interrupts. 223 * 15. Enable interrupts.
216 * 224 *
217 * What does that mean for us? Well, first we set data.count to the number 225 * What does that mean for us? Well, first we set data.count to the number
218 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait 226 * of CPUs. As each CPU announces that it started the rendezvous handler by
219 * until it hits 0 and proceed. We set the data.gate flag and reset data.count. 227 * decrementing the count, We reset data.count and set the data.gate flag
220 * Meanwhile, they are waiting for that flag to be set. Once it's set, each 228 * allowing all the cpu's to proceed with the work. As each cpu disables
229 * interrupts, it'll decrement data.count once. We wait until it hits 0 and
230 * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
231 * are waiting for that flag to be cleared. Once it's cleared, each
221 * CPU goes through the transition of updating MTRRs. 232 * CPU goes through the transition of updating MTRRs.
222 * The CPU vendors may each do it differently, 233 * The CPU vendors may each do it differently,
223 * so we call mtrr_if->set() callback and let them take care of it. 234 * so we call mtrr_if->set() callback and let them take care of it.
224 * When they're done, they again decrement data->count and wait for data.gate 235 * When they're done, they again decrement data->count and wait for data.gate
225 * to be reset. 236 * to be set.
226 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag 237 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
227 * Everyone then enables interrupts and we all continue on. 238 * Everyone then enables interrupts and we all continue on.
228 * 239 *
@@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
234{ 245{
235 struct set_mtrr_data data; 246 struct set_mtrr_data data;
236 unsigned long flags; 247 unsigned long flags;
248 int cpu;
249
250 preempt_disable();
237 251
238 data.smp_reg = reg; 252 data.smp_reg = reg;
239 data.smp_base = base; 253 data.smp_base = base;
@@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
246 atomic_set(&data.gate, 0); 260 atomic_set(&data.gate, 0);
247 261
248 /* Start the ball rolling on other CPUs */ 262 /* Start the ball rolling on other CPUs */
249 if (smp_call_function(ipi_handler, &data, 0) != 0) 263 for_each_online_cpu(cpu) {
250 panic("mtrr: timed out waiting for other CPUs\n"); 264 struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);
265
266 if (cpu == smp_processor_id())
267 continue;
268
269 stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
270 }
251 271
252 local_irq_save(flags);
253 272
254 while (atomic_read(&data.count)) 273 while (atomic_read(&data.count))
255 cpu_relax(); 274 cpu_relax();
@@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
259 smp_wmb(); 278 smp_wmb();
260 atomic_set(&data.gate, 1); 279 atomic_set(&data.gate, 1);
261 280
281 local_irq_save(flags);
282
283 while (atomic_read(&data.count))
284 cpu_relax();
285
286 /* Ok, reset count and toggle gate */
287 atomic_set(&data.count, num_booting_cpus() - 1);
288 smp_wmb();
289 atomic_set(&data.gate, 0);
290
262 /* Do our MTRR business */ 291 /* Do our MTRR business */
263 292
264 /* 293 /*
@@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
279 308
280 atomic_set(&data.count, num_booting_cpus() - 1); 309 atomic_set(&data.count, num_booting_cpus() - 1);
281 smp_wmb(); 310 smp_wmb();
282 atomic_set(&data.gate, 0); 311 atomic_set(&data.gate, 1);
283 312
284 /* 313 /*
285 * Wait here for everyone to have seen the gate change 314 * Wait here for everyone to have seen the gate change
@@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
289 cpu_relax(); 318 cpu_relax();
290 319
291 local_irq_restore(flags); 320 local_irq_restore(flags);
321 preempt_enable();
292} 322}
293 323
294/** 324/**
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a1..f2da20fda02 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
220 struct perf_event *event); 220 struct perf_event *event);
221 struct event_constraint *event_constraints; 221 struct event_constraint *event_constraints;
222 void (*quirks)(void); 222 void (*quirks)(void);
223 int perfctr_second_write;
223 224
224 int (*cpu_prepare)(int cpu); 225 int (*cpu_prepare)(int cpu);
225 void (*cpu_starting)(int cpu); 226 void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
295 * count to the generic event atomically: 296 * count to the generic event atomically:
296 */ 297 */
297again: 298again:
298 prev_raw_count = atomic64_read(&hwc->prev_count); 299 prev_raw_count = local64_read(&hwc->prev_count);
299 rdmsrl(hwc->event_base + idx, new_raw_count); 300 rdmsrl(hwc->event_base + idx, new_raw_count);
300 301
301 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 302 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
302 new_raw_count) != prev_raw_count) 303 new_raw_count) != prev_raw_count)
303 goto again; 304 goto again;
304 305
@@ -313,8 +314,8 @@ again:
313 delta = (new_raw_count << shift) - (prev_raw_count << shift); 314 delta = (new_raw_count << shift) - (prev_raw_count << shift);
314 delta >>= shift; 315 delta >>= shift;
315 316
316 atomic64_add(delta, &event->count); 317 local64_add(delta, &event->count);
317 atomic64_sub(delta, &hwc->period_left); 318 local64_sub(delta, &hwc->period_left);
318 319
319 return new_raw_count; 320 return new_raw_count;
320} 321}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
438 if (!hwc->sample_period) { 439 if (!hwc->sample_period) {
439 hwc->sample_period = x86_pmu.max_period; 440 hwc->sample_period = x86_pmu.max_period;
440 hwc->last_period = hwc->sample_period; 441 hwc->last_period = hwc->sample_period;
441 atomic64_set(&hwc->period_left, hwc->sample_period); 442 local64_set(&hwc->period_left, hwc->sample_period);
442 } else { 443 } else {
443 /* 444 /*
444 * If we have a PMU initialized but no APIC 445 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
885x86_perf_event_set_period(struct perf_event *event) 886x86_perf_event_set_period(struct perf_event *event)
886{ 887{
887 struct hw_perf_event *hwc = &event->hw; 888 struct hw_perf_event *hwc = &event->hw;
888 s64 left = atomic64_read(&hwc->period_left); 889 s64 left = local64_read(&hwc->period_left);
889 s64 period = hwc->sample_period; 890 s64 period = hwc->sample_period;
890 int ret = 0, idx = hwc->idx; 891 int ret = 0, idx = hwc->idx;
891 892
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
897 */ 898 */
898 if (unlikely(left <= -period)) { 899 if (unlikely(left <= -period)) {
899 left = period; 900 left = period;
900 atomic64_set(&hwc->period_left, left); 901 local64_set(&hwc->period_left, left);
901 hwc->last_period = period; 902 hwc->last_period = period;
902 ret = 1; 903 ret = 1;
903 } 904 }
904 905
905 if (unlikely(left <= 0)) { 906 if (unlikely(left <= 0)) {
906 left += period; 907 left += period;
907 atomic64_set(&hwc->period_left, left); 908 local64_set(&hwc->period_left, left);
908 hwc->last_period = period; 909 hwc->last_period = period;
909 ret = 1; 910 ret = 1;
910 } 911 }
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
923 * The hw event starts counting from this event offset, 924 * The hw event starts counting from this event offset,
924 * mark it to be able to extra future deltas: 925 * mark it to be able to extra future deltas:
925 */ 926 */
926 atomic64_set(&hwc->prev_count, (u64)-left); 927 local64_set(&hwc->prev_count, (u64)-left);
927 928
928 wrmsrl(hwc->event_base + idx, 929 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
930
931 /*
932 * Due to erratum on certan cpu we need
933 * a second write to be sure the register
934 * is updated properly
935 */
936 if (x86_pmu.perfctr_second_write) {
937 wrmsrl(hwc->event_base + idx,
929 (u64)(-left) & x86_pmu.cntval_mask); 938 (u64)(-left) & x86_pmu.cntval_mask);
939 }
930 940
931 perf_event_update_userpage(event); 941 perf_event_update_userpage(event);
932 942
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
969 * skip the schedulability test here, it will be peformed 979 * skip the schedulability test here, it will be peformed
970 * at commit time(->commit_txn) as a whole 980 * at commit time(->commit_txn) as a whole
971 */ 981 */
972 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 982 if (cpuc->group_flag & PERF_EVENT_TXN)
973 goto out; 983 goto out;
974 984
975 ret = x86_pmu.schedule_events(cpuc, n, assign); 985 ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
1096 * The events never got scheduled and ->cancel_txn will truncate 1106 * The events never got scheduled and ->cancel_txn will truncate
1097 * the event_list. 1107 * the event_list.
1098 */ 1108 */
1099 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1109 if (cpuc->group_flag & PERF_EVENT_TXN)
1100 return; 1110 return;
1101 1111
1102 x86_pmu_stop(event); 1112 x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1388{ 1398{
1389 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1399 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1390 1400
1391 cpuc->group_flag |= PERF_EVENT_TXN_STARTED; 1401 cpuc->group_flag |= PERF_EVENT_TXN;
1392 cpuc->n_txn = 0; 1402 cpuc->n_txn = 0;
1393} 1403}
1394 1404
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1401{ 1411{
1402 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1412 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1403 1413
1404 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; 1414 cpuc->group_flag &= ~PERF_EVENT_TXN;
1405 /* 1415 /*
1406 * Truncate the collected events. 1416 * Truncate the collected events.
1407 */ 1417 */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1435 */ 1445 */
1436 memcpy(cpuc->assign, assign, n*sizeof(int)); 1446 memcpy(cpuc->assign, assign, n*sizeof(int));
1437 1447
1438 /* 1448 cpuc->group_flag &= ~PERF_EVENT_TXN;
1439 * Clear out the txn count so that ->cancel_txn() which gets
1440 * run after ->commit_txn() doesn't undo things.
1441 */
1442 cpuc->n_txn = 0;
1443 1449
1444 return 0; 1450 return 0;
1445} 1451}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
1607 .walk_stack = print_context_stack_bp, 1613 .walk_stack = print_context_stack_bp,
1608}; 1614};
1609 1615
1610#include "../dumpstack.h"
1611
1612static void 1616static void
1613perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1617perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1614{ 1618{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1730 return entry; 1734 return entry;
1731} 1735}
1732 1736
1733void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1734{
1735 regs->ip = ip;
1736 /*
1737 * perf_arch_fetch_caller_regs adds another call, we need to increment
1738 * the skip level
1739 */
1740 regs->bp = rewind_frame_pointer(skip + 1);
1741 regs->cs = __KERNEL_CS;
1742 /*
1743 * We abuse bit 3 to pass exact information, see perf_misc_flags
1744 * and the comment with PERF_EFLAGS_EXACT.
1745 */
1746 regs->flags = 0;
1747}
1748
1749unsigned long perf_instruction_pointer(struct pt_regs *regs) 1737unsigned long perf_instruction_pointer(struct pt_regs *regs)
1750{ 1738{
1751 unsigned long ip; 1739 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d..107711bf0ee 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -21,22 +21,36 @@ struct p4_event_bind {
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 22};
23 23
24struct p4_cache_event_bind { 24struct p4_pebs_bind {
25 unsigned int metric_pebs; 25 unsigned int metric_pebs;
26 unsigned int metric_vert; 26 unsigned int metric_vert;
27}; 27};
28 28
29#define P4_GEN_CACHE_EVENT_BIND(name) \ 29/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
30 [P4_CACHE__##name] = { \ 30#define P4_GEN_PEBS_BIND(name, pebs, vert) \
31 .metric_pebs = P4_PEBS__##name, \ 31 [P4_PEBS_METRIC__##name] = { \
32 .metric_vert = P4_VERT__##name, \ 32 .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
33 .metric_vert = vert, \
33 } 34 }
34 35
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = { 36/*
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), 37 * note we have P4_PEBS_ENABLE_UOP_TAG always set here
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), 38 *
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), 39 * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), 40 * event configuration to find out which values are to be
41 * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
42 * resgisters
43 */
44static struct p4_pebs_bind p4_pebs_bind_map[] = {
45 P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
46 P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
47 P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
48 P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
49 P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
50 P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
51 P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
52 P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
53 P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
40}; 54};
41 55
42/* 56/*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
281 }, 295 },
282}; 296};
283 297
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ 298#define P4_GEN_CACHE_EVENT(event, bit, metric) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \ 299 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \ 300 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \ 301 p4_config_pack_cccr(metric | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) 302 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
289 303
290static __initconst const u64 p4_hw_cache_event_ids 304static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
296 [ C(OP_READ) ] = { 310 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0, 311 [ C(RESULT_ACCESS) ] = 0x0,
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired), 313 P4_PEBS_METRIC__1stl_cache_load_miss_retired),
300 }, 314 },
301 }, 315 },
302 [ C(LL ) ] = { 316 [ C(LL ) ] = {
303 [ C(OP_READ) ] = { 317 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0, 318 [ C(RESULT_ACCESS) ] = 0x0,
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 319 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired), 320 P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
307 }, 321 },
308}, 322},
309 [ C(DTLB) ] = { 323 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = { 324 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0, 325 [ C(RESULT_ACCESS) ] = 0x0,
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 326 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired), 327 P4_PEBS_METRIC__dtlb_load_miss_retired),
314 }, 328 },
315 [ C(OP_WRITE) ] = { 329 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0, 330 [ C(RESULT_ACCESS) ] = 0x0,
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, 331 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired), 332 P4_PEBS_METRIC__dtlb_store_miss_retired),
319 }, 333 },
320 }, 334 },
321 [ C(ITLB) ] = { 335 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = { 336 [ C(OP_READ) ] = {
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, 337 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit), 338 P4_PEBS_METRIC__none),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, 339 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss), 340 P4_PEBS_METRIC__none),
327 }, 341 },
328 [ C(OP_WRITE) ] = { 342 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1, 343 [ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
414 return config; 428 return config;
415} 429}
416 430
431static int p4_validate_raw_event(struct perf_event *event)
432{
433 unsigned int v;
434
435 /* user data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) {
438 pr_warning("P4 PMU: Unknown event code: %d\n", v);
439 return -EINVAL;
440 }
441
442 /*
443 * it may have some screwed PEBS bits
444 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL;
448 }
449 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL;
453 }
454
455 return 0;
456}
457
417static int p4_hw_config(struct perf_event *event) 458static int p4_hw_config(struct perf_event *event)
418{ 459{
419 int cpu = get_cpu(); 460 int cpu = get_cpu();
420 int rc = 0; 461 int rc = 0;
421 unsigned int evnt;
422 u32 escr, cccr; 462 u32 escr, cccr;
423 463
424 /* 464 /*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
438 478
439 if (event->attr.type == PERF_TYPE_RAW) { 479 if (event->attr.type == PERF_TYPE_RAW) {
440 480
441 /* user data may have out-of-bound event index */ 481 rc = p4_validate_raw_event(event);
442 evnt = p4_config_unpack_event(event->attr.config); 482 if (rc)
443 if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
444 rc = -EINVAL;
445 goto out; 483 goto out;
446 }
447 484
448 /* 485 /*
449 * We don't control raw events so it's up to the caller 486 * We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
451 * on HT machine but allow HT-compatible specifics to be 488 * on HT machine but allow HT-compatible specifics to be
452 * passed on) 489 * passed on)
453 * 490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc)
493 *
454 * XXX: HT wide things should check perf_paranoid_cpu() && 494 * XXX: HT wide things should check perf_paranoid_cpu() &&
455 * CAP_SYS_ADMIN 495 * CAP_SYS_ADMIN
456 */ 496 */
457 event->hw.config |= event->attr.config & 497 event->hw.config |= event->attr.config &
458 (p4_config_pack_escr(P4_ESCR_MASK_HT) | 498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
459 p4_config_pack_cccr(P4_CCCR_MASK_HT)); 499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
460 } 500 }
461 501
462 rc = x86_setup_perfctr(event); 502 rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
482 return overflow; 522 return overflow;
483} 523}
484 524
525static void p4_pmu_disable_pebs(void)
526{
527 /*
528 * FIXME
529 *
530 * It's still allowed that two threads setup same cache
531 * events so we can't simply clear metrics until we knew
532 * noone is depending on us, so we need kind of counter
533 * for "ReplayEvent" users.
534 *
535 * What is more complex -- RAW events, if user (for some
536 * reason) will pass some cache event metric with improper
537 * event opcode -- it's fine from hardware point of view
538 * but completely nonsence from "meaning" of such action.
539 *
540 * So at moment let leave metrics turned on forever -- it's
541 * ok for now but need to be revisited!
542 *
543 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
544 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
545 */
546}
547
485static inline void p4_pmu_disable_event(struct perf_event *event) 548static inline void p4_pmu_disable_event(struct perf_event *event)
486{ 549{
487 struct hw_perf_event *hwc = &event->hw; 550 struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
507 continue; 570 continue;
508 p4_pmu_disable_event(event); 571 p4_pmu_disable_event(event);
509 } 572 }
573
574 p4_pmu_disable_pebs();
575}
576
577/* configuration must be valid */
578static void p4_pmu_enable_pebs(u64 config)
579{
580 struct p4_pebs_bind *bind;
581 unsigned int idx;
582
583 BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
584
585 idx = p4_config_unpack_metric(config);
586 if (idx == P4_PEBS_METRIC__none)
587 return;
588
589 bind = &p4_pebs_bind_map[idx];
590
591 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
592 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
510} 593}
511 594
512static void p4_pmu_enable_event(struct perf_event *event) 595static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
515 int thread = p4_ht_config_thread(hwc->config); 598 int thread = p4_ht_config_thread(hwc->config);
516 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); 599 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
517 unsigned int idx = p4_config_unpack_event(hwc->config); 600 unsigned int idx = p4_config_unpack_event(hwc->config);
518 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
519 struct p4_event_bind *bind; 601 struct p4_event_bind *bind;
520 struct p4_cache_event_bind *bind_cache;
521 u64 escr_addr, cccr; 602 u64 escr_addr, cccr;
522 603
523 bind = &p4_event_bind_map[idx]; 604 bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
537 cccr = p4_config_unpack_cccr(hwc->config); 618 cccr = p4_config_unpack_cccr(hwc->config);
538 619
539 /* 620 /*
540 * it could be Cache event so that we need to 621 * it could be Cache event so we need to write metrics
541 * set metrics into additional MSRs 622 * into additional MSRs
542 */ 623 */
543 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); 624 p4_pmu_enable_pebs(hwc->config);
544 if (idx_cache > P4_CACHE__NONE &&
545 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
546 bind_cache = &p4_cache_event_bind_map[idx_cache];
547 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
548 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
549 }
550 625
551 (void)checking_wrmsrl(escr_addr, escr_conf); 626 (void)checking_wrmsrl(escr_addr, escr_conf);
552 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 627 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
829 .max_period = (1ULL << 39) - 1, 904 .max_period = (1ULL << 39) - 1,
830 .hw_config = p4_hw_config, 905 .hw_config = p4_hw_config,
831 .schedule_events = p4_pmu_schedule_events, 906 .schedule_events = p4_pmu_schedule_events,
907 /*
908 * This handles erratum N15 in intel doc 249199-029,
909 * the counter may not be updated correctly on write
910 * so we need a second write operation to do the trick
911 * (the official workaround didn't work)
912 *
913 * the former idea is taken from OProfile code
914 */
915 .perfctr_second_write = 1,
832}; 916};
833 917
834static __init int p4_pmu_init(void) 918static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
new file mode 100644
index 00000000000..34b4dad6f0b
--- /dev/null
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -0,0 +1,63 @@
1/*
2 * Routines to indentify additional cpu features that are scattered in
3 * cpuid space.
4 */
5#include <linux/cpu.h>
6
7#include <asm/pat.h>
8#include <asm/processor.h>
9
10#include <asm/apic.h>
11
12struct cpuid_bit {
13 u16 feature;
14 u8 reg;
15 u8 bit;
16 u32 level;
17 u32 sub_leaf;
18};
19
20enum cpuid_regs {
21 CR_EAX = 0,
22 CR_ECX,
23 CR_EDX,
24 CR_EBX
25};
26
27void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
28{
29 u32 max_level;
30 u32 regs[4];
31 const struct cpuid_bit *cb;
32
33 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
34 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
35 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
36 { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
37 { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
38 { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
39 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
40 { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
41 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
42 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
43 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
44 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
45 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 },
46 { 0, 0, 0, 0, 0 }
47 };
48
49 for (cb = cpuid_bits; cb->feature; cb++) {
50
51 /* Verify that the level is valid */
52 max_level = cpuid_eax(cb->level & 0xffff0000);
53 if (max_level < cb->level ||
54 max_level > (cb->level | 0xffff))
55 continue;
56
57 cpuid_count(cb->level, cb->sub_leaf, &regs[CR_EAX],
58 &regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]);
59
60 if (regs[cb->reg] & (1 << cb->bit))
61 set_cpu_cap(c, cb->feature);
62 }
63}
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/topology.c
index 10fa5684a66..4397e987a1c 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -1,62 +1,14 @@
1/* 1/*
2 * Routines to indentify additional cpu features that are scattered in 2 * Check for extended topology enumeration cpuid leaf 0xb and if it
3 * cpuid space. 3 * exists, use it for populating initial_apicid and cpu topology
4 * detection.
4 */ 5 */
5#include <linux/cpu.h>
6 6
7#include <linux/cpu.h>
8#include <asm/apic.h>
7#include <asm/pat.h> 9#include <asm/pat.h>
8#include <asm/processor.h> 10#include <asm/processor.h>
9 11
10#include <asm/apic.h>
11
12struct cpuid_bit {
13 u16 feature;
14 u8 reg;
15 u8 bit;
16 u32 level;
17};
18
19enum cpuid_regs {
20 CR_EAX = 0,
21 CR_ECX,
22 CR_EDX,
23 CR_EBX
24};
25
26void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
27{
28 u32 max_level;
29 u32 regs[4];
30 const struct cpuid_bit *cb;
31
32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
35 { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 },
36 { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 },
37 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a },
38 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a },
39 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a },
40 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a },
41 { 0, 0, 0, 0 }
42 };
43
44 for (cb = cpuid_bits; cb->feature; cb++) {
45
46 /* Verify that the level is valid */
47 max_level = cpuid_eax(cb->level & 0xffff0000);
48 if (max_level < cb->level ||
49 max_level > (cb->level | 0xffff))
50 continue;
51
52 cpuid(cb->level, &regs[CR_EAX], &regs[CR_EBX],
53 &regs[CR_ECX], &regs[CR_EDX]);
54
55 if (regs[cb->reg] & (1 << cb->bit))
56 set_cpu_cap(c, cb->feature);
57 }
58}
59
60/* leaf 0xb SMT level */ 12/* leaf 0xb SMT level */
61#define SMT_LEVEL 0 13#define SMT_LEVEL 0
62 14
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index b9d1ff58844..227b0448960 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -51,7 +51,7 @@ static inline int __vmware_platform(void)
51 51
52static unsigned long vmware_get_tsc_khz(void) 52static unsigned long vmware_get_tsc_khz(void)
53{ 53{
54 uint64_t tsc_hz; 54 uint64_t tsc_hz, lpj;
55 uint32_t eax, ebx, ecx, edx; 55 uint32_t eax, ebx, ecx, edx;
56 56
57 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); 57 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
@@ -62,6 +62,13 @@ static unsigned long vmware_get_tsc_khz(void)
62 printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", 62 printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
63 (unsigned long) tsc_hz / 1000, 63 (unsigned long) tsc_hz / 1000,
64 (unsigned long) tsc_hz % 1000); 64 (unsigned long) tsc_hz % 1000);
65
66 if (!preset_lpj) {
67 lpj = ((u64)tsc_hz * 1000);
68 do_div(lpj, HZ);
69 preset_lpj = lpj;
70 }
71
65 return tsc_hz; 72 return tsc_hz;
66} 73}
67 74
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386930b..6e8752c1bd5 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20
21#include "dumpstack.h"
22 21
23int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
24int panic_on_io_nmi; 23int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be4fd4..00000000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17#include <linux/uaccess.h>
18
19extern void
20show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
21 unsigned long *stack, unsigned long bp, char *log_lvl);
22
23extern void
24show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *sp, unsigned long bp, char *log_lvl);
26
27extern unsigned int code_bytes;
28
29/* The form of the top of the frame on the stack */
30struct stack_frame {
31 struct stack_frame *next_frame;
32 unsigned long return_address;
33};
34
35struct stack_frame_ia32 {
36 u32 next_frame;
37 u32 return_address;
38};
39
40static inline unsigned long rewind_frame_pointer(int n)
41{
42 struct stack_frame *frame;
43
44 get_bp(frame);
45
46#ifdef CONFIG_FRAME_POINTER
47 while (n--) {
48 if (probe_kernel_address(&frame->next_frame, frame))
49 break;
50 }
51#endif
52
53 return (unsigned long)frame;
54}
55
56#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a189d9..0f6376ffa2d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20
21 19
22void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task, struct pt_regs *regs,
23 unsigned long *stack, unsigned long bp, 21 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1f05f..57a21f11c79 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20 19
21#define N_EXCEPTION_STACKS_END \ 20#define N_EXCEPTION_STACKS_END \
22 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cd49141cf15..227d00920d2 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -611,14 +611,14 @@ ldt_ss:
611 * compensating for the offset by changing to the ESPFIX segment with 611 * compensating for the offset by changing to the ESPFIX segment with
612 * a base address that matches for the difference. 612 * a base address that matches for the difference.
613 */ 613 */
614#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
614 mov %esp, %edx /* load kernel esp */ 615 mov %esp, %edx /* load kernel esp */
615 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 616 mov PT_OLDESP(%esp), %eax /* load userspace esp */
616 mov %dx, %ax /* eax: new kernel esp */ 617 mov %dx, %ax /* eax: new kernel esp */
617 sub %eax, %edx /* offset (low word is 0) */ 618 sub %eax, %edx /* offset (low word is 0) */
618 PER_CPU(gdt_page, %ebx)
619 shr $16, %edx 619 shr $16, %edx
620 mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ 620 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
621 mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ 621 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
622 pushl $__ESPFIX_SS 622 pushl $__ESPFIX_SS
623 CFI_ADJUST_CFA_OFFSET 4 623 CFI_ADJUST_CFA_OFFSET 4
624 push %eax /* new kernel esp */ 624 push %eax /* new kernel esp */
@@ -791,9 +791,8 @@ ptregs_clone:
791 * normal stack and adjusts ESP with the matching offset. 791 * normal stack and adjusts ESP with the matching offset.
792 */ 792 */
793 /* fixup the stack */ 793 /* fixup the stack */
794 PER_CPU(gdt_page, %ebx) 794 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
795 mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ 795 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
796 mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
797 shl $16, %eax 796 shl $16, %eax
798 addl %esp, %eax /* the adjusted stack pointer */ 797 addl %esp, %eax /* the adjusted stack pointer */
799 pushl $__KERNEL_DS 798 pushl $__KERNEL_DS
@@ -914,7 +913,7 @@ ENTRY(simd_coprocessor_error)
914 .balign 4 913 .balign 4
915 .long 661b 914 .long 661b
916 .long 663f 915 .long 663f
917 .byte X86_FEATURE_XMM 916 .word X86_FEATURE_XMM
918 .byte 662b-661b 917 .byte 662b-661b
919 .byte 664f-663f 918 .byte 664f-663f
920.previous 919.previous
@@ -1166,6 +1165,9 @@ ENTRY(xen_failsafe_callback)
1166.previous 1165.previous
1167ENDPROC(xen_failsafe_callback) 1166ENDPROC(xen_failsafe_callback)
1168 1167
1168BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
1169 xen_evtchn_do_upcall)
1170
1169#endif /* CONFIG_XEN */ 1171#endif /* CONFIG_XEN */
1170 1172
1171#ifdef CONFIG_FUNCTION_TRACER 1173#ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 4db7c4d12ff..c5ea5cdbe7b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1065,6 +1065,7 @@ ENTRY(\sym)
1065END(\sym) 1065END(\sym)
1066.endm 1066.endm
1067 1067
1068#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
1068.macro paranoidzeroentry_ist sym do_sym ist 1069.macro paranoidzeroentry_ist sym do_sym ist
1069ENTRY(\sym) 1070ENTRY(\sym)
1070 INTR_FRAME 1071 INTR_FRAME
@@ -1076,10 +1077,9 @@ ENTRY(\sym)
1076 TRACE_IRQS_OFF 1077 TRACE_IRQS_OFF
1077 movq %rsp,%rdi /* pt_regs pointer */ 1078 movq %rsp,%rdi /* pt_regs pointer */
1078 xorl %esi,%esi /* no error code */ 1079 xorl %esi,%esi /* no error code */
1079 PER_CPU(init_tss, %r12) 1080 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1080 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12)
1081 call \do_sym 1081 call \do_sym
1082 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) 1082 addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
1083 jmp paranoid_exit /* %ebx: no swapgs flag */ 1083 jmp paranoid_exit /* %ebx: no swapgs flag */
1084 CFI_ENDPROC 1084 CFI_ENDPROC
1085END(\sym) 1085END(\sym)
@@ -1329,6 +1329,9 @@ ENTRY(xen_failsafe_callback)
1329 CFI_ENDPROC 1329 CFI_ENDPROC
1330END(xen_failsafe_callback) 1330END(xen_failsafe_callback)
1331 1331
1332apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
1333 xen_hvm_callback_vector xen_evtchn_do_upcall
1334
1332#endif /* CONFIG_XEN */ 1335#endif /* CONFIG_XEN */
1333 1336
1334/* 1337/*
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index b2e24603739..784360c0625 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -20,7 +20,7 @@
20 20
21static void __init i386_default_early_setup(void) 21static void __init i386_default_early_setup(void)
22{ 22{
23 /* Initilize 32bit specific setup functions */ 23 /* Initialize 32bit specific setup functions */
24 x86_init.resources.probe_roms = probe_roms; 24 x86_init.resources.probe_roms = probe_roms;
25 x86_init.resources.reserve_resources = i386_reserve_resources; 25 x86_init.resources.reserve_resources = i386_reserve_resources;
26 x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; 26 x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 37c3d4b17d8..ff4c453e13f 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -131,6 +131,12 @@ ENTRY(startup_32)
131 movsl 131 movsl
1321: 1321:
133 133
134#ifdef CONFIG_OLPC_OPENFIRMWARE
135 /* save OFW's pgdir table for later use when calling into OFW */
136 movl %cr3, %eax
137 movl %eax, pa(olpc_ofw_pgd)
138#endif
139
134#ifdef CONFIG_PARAVIRT 140#ifdef CONFIG_PARAVIRT
135 /* This is can only trip for a broken bootloader... */ 141 /* This is can only trip for a broken bootloader... */
136 cmpw $0x207, pa(boot_params + BP_version) 142 cmpw $0x207, pa(boot_params + BP_version)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 3d1e6f16b7a..239046bd447 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -234,9 +234,8 @@ ENTRY(secondary_startup_64)
234 * init data section till per cpu areas are set up. 234 * init data section till per cpu areas are set up.
235 */ 235 */
236 movl $MSR_GS_BASE,%ecx 236 movl $MSR_GS_BASE,%ecx
237 movq initial_gs(%rip),%rax 237 movl initial_gs(%rip),%eax
238 movq %rax,%rdx 238 movl initial_gs+4(%rip),%edx
239 shrq $32,%rdx
240 wrmsr 239 wrmsr
241 240
242 /* esi is pointer to real mode structure with interesting info. 241 /* esi is pointer to real mode structure with interesting info.
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ba390d73117..351f9c0fea1 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -16,7 +16,6 @@
16#include <asm/hpet.h> 16#include <asm/hpet.h>
17 17
18#define HPET_MASK CLOCKSOURCE_MASK(32) 18#define HPET_MASK CLOCKSOURCE_MASK(32)
19#define HPET_SHIFT 22
20 19
21/* FSEC = 10^-15 20/* FSEC = 10^-15
22 NSEC = 10^-9 */ 21 NSEC = 10^-9 */
@@ -583,7 +582,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
583 * scaled math multiplication factor for nanosecond to hpet tick 582 * scaled math multiplication factor for nanosecond to hpet tick
584 * conversion. 583 * conversion.
585 */ 584 */
586 hpet_freq = 1000000000000000ULL; 585 hpet_freq = FSEC_PER_SEC;
587 do_div(hpet_freq, hpet_period); 586 do_div(hpet_freq, hpet_period);
588 evt->mult = div_sc((unsigned long) hpet_freq, 587 evt->mult = div_sc((unsigned long) hpet_freq,
589 NSEC_PER_SEC, evt->shift); 588 NSEC_PER_SEC, evt->shift);
@@ -787,7 +786,6 @@ static struct clocksource clocksource_hpet = {
787 .rating = 250, 786 .rating = 250,
788 .read = read_hpet, 787 .read = read_hpet,
789 .mask = HPET_MASK, 788 .mask = HPET_MASK,
790 .shift = HPET_SHIFT,
791 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 789 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
792 .resume = hpet_resume_counter, 790 .resume = hpet_resume_counter,
793#ifdef CONFIG_X86_64 791#ifdef CONFIG_X86_64
@@ -798,6 +796,7 @@ static struct clocksource clocksource_hpet = {
798static int hpet_clocksource_register(void) 796static int hpet_clocksource_register(void)
799{ 797{
800 u64 start, now; 798 u64 start, now;
799 u64 hpet_freq;
801 cycle_t t1; 800 cycle_t t1;
802 801
803 /* Start the counter */ 802 /* Start the counter */
@@ -832,9 +831,15 @@ static int hpet_clocksource_register(void)
832 * mult = (hpet_period * 2^shift)/10^6 831 * mult = (hpet_period * 2^shift)/10^6
833 * mult = (hpet_period << shift)/FSEC_PER_NSEC 832 * mult = (hpet_period << shift)/FSEC_PER_NSEC
834 */ 833 */
835 clocksource_hpet.mult = div_sc(hpet_period, FSEC_PER_NSEC, HPET_SHIFT);
836 834
837 clocksource_register(&clocksource_hpet); 835 /* Need to convert hpet_period (fsec/cyc) to cyc/sec:
836 *
837 * cyc/sec = FSEC_PER_SEC/hpet_period(fsec/cyc)
838 * cyc/sec = (FSEC_PER_NSEC * NSEC_PER_SEC)/hpet_period
839 */
840 hpet_freq = FSEC_PER_SEC;
841 do_div(hpet_freq, hpet_period);
842 clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
838 843
839 return 0; 844 return 0;
840} 845}
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b803d2f..a474ec37c32 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
208{ 208{
209 /* Len */ 209 /* Len */
210 switch (x86_len) { 210 switch (x86_len) {
211 case X86_BREAKPOINT_LEN_X:
212 *gen_len = sizeof(long);
213 break;
211 case X86_BREAKPOINT_LEN_1: 214 case X86_BREAKPOINT_LEN_1:
212 *gen_len = HW_BREAKPOINT_LEN_1; 215 *gen_len = HW_BREAKPOINT_LEN_1;
213 break; 216 break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
251 254
252 info->address = bp->attr.bp_addr; 255 info->address = bp->attr.bp_addr;
253 256
257 /* Type */
258 switch (bp->attr.bp_type) {
259 case HW_BREAKPOINT_W:
260 info->type = X86_BREAKPOINT_WRITE;
261 break;
262 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
263 info->type = X86_BREAKPOINT_RW;
264 break;
265 case HW_BREAKPOINT_X:
266 info->type = X86_BREAKPOINT_EXECUTE;
267 /*
268 * x86 inst breakpoints need to have a specific undefined len.
269 * But we still need to check userspace is not trying to setup
270 * an unsupported length, to get a range breakpoint for example.
271 */
272 if (bp->attr.bp_len == sizeof(long)) {
273 info->len = X86_BREAKPOINT_LEN_X;
274 return 0;
275 }
276 default:
277 return -EINVAL;
278 }
279
254 /* Len */ 280 /* Len */
255 switch (bp->attr.bp_len) { 281 switch (bp->attr.bp_len) {
256 case HW_BREAKPOINT_LEN_1: 282 case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
271 return -EINVAL; 297 return -EINVAL;
272 } 298 }
273 299
274 /* Type */
275 switch (bp->attr.bp_type) {
276 case HW_BREAKPOINT_W:
277 info->type = X86_BREAKPOINT_WRITE;
278 break;
279 case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
280 info->type = X86_BREAKPOINT_RW;
281 break;
282 case HW_BREAKPOINT_X:
283 info->type = X86_BREAKPOINT_EXECUTE;
284 break;
285 default:
286 return -EINVAL;
287 }
288
289 return 0; 300 return 0;
290} 301}
291/* 302/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
305 ret = -EINVAL; 316 ret = -EINVAL;
306 317
307 switch (info->len) { 318 switch (info->len) {
319 case X86_BREAKPOINT_LEN_X:
320 align = sizeof(long) -1;
321 break;
308 case X86_BREAKPOINT_LEN_1: 322 case X86_BREAKPOINT_LEN_1:
309 align = 0; 323 align = 0;
310 break; 324 break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
466 480
467 perf_bp_event(bp, args->regs); 481 perf_bp_event(bp, args->regs);
468 482
483 /*
484 * Set up resume flag to avoid breakpoint recursion when
485 * returning back to origin.
486 */
487 if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
488 args->regs->flags |= X86_EFLAGS_RF;
489
469 rcu_read_unlock(); 490 rcu_read_unlock();
470 } 491 }
471 /* 492 /*
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 86cef6b3225..1f11f5ce668 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -59,18 +59,18 @@ void __cpuinit mxcsr_feature_mask_init(void)
59 stts(); 59 stts();
60} 60}
61 61
62void __cpuinit init_thread_xstate(void) 62static void __cpuinit init_thread_xstate(void)
63{ 63{
64 /*
65 * Note that xstate_size might be overwriten later during
66 * xsave_init().
67 */
68
64 if (!HAVE_HWFP) { 69 if (!HAVE_HWFP) {
65 xstate_size = sizeof(struct i387_soft_struct); 70 xstate_size = sizeof(struct i387_soft_struct);
66 return; 71 return;
67 } 72 }
68 73
69 if (cpu_has_xsave) {
70 xsave_cntxt_init();
71 return;
72 }
73
74 if (cpu_has_fxsr) 74 if (cpu_has_fxsr)
75 xstate_size = sizeof(struct i387_fxsave_struct); 75 xstate_size = sizeof(struct i387_fxsave_struct);
76#ifdef CONFIG_X86_32 76#ifdef CONFIG_X86_32
@@ -84,6 +84,7 @@ void __cpuinit init_thread_xstate(void)
84 * Called at bootup to set up the initial FPU state that is later cloned 84 * Called at bootup to set up the initial FPU state that is later cloned
85 * into all processes. 85 * into all processes.
86 */ 86 */
87
87void __cpuinit fpu_init(void) 88void __cpuinit fpu_init(void)
88{ 89{
89 unsigned long oldcr0 = read_cr0(); 90 unsigned long oldcr0 = read_cr0();
@@ -93,21 +94,26 @@ void __cpuinit fpu_init(void)
93 94
94 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 95 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
95 96
96 /*
97 * Boot processor to setup the FP and extended state context info.
98 */
99 if (!smp_processor_id()) 97 if (!smp_processor_id())
100 init_thread_xstate(); 98 init_thread_xstate();
101 xsave_init();
102 99
103 mxcsr_feature_mask_init(); 100 mxcsr_feature_mask_init();
104 /* clean state in init */ 101 /* clean state in init */
105 current_thread_info()->status = 0; 102 current_thread_info()->status = 0;
106 clear_used_math(); 103 clear_used_math();
107} 104}
108#endif /* CONFIG_X86_64 */
109 105
110static void fpu_finit(struct fpu *fpu) 106#else /* CONFIG_X86_64 */
107
108void __cpuinit fpu_init(void)
109{
110 if (!smp_processor_id())
111 init_thread_xstate();
112}
113
114#endif /* CONFIG_X86_32 */
115
116void fpu_finit(struct fpu *fpu)
111{ 117{
112#ifdef CONFIG_X86_32 118#ifdef CONFIG_X86_32
113 if (!HAVE_HWFP) { 119 if (!HAVE_HWFP) {
@@ -132,6 +138,7 @@ static void fpu_finit(struct fpu *fpu)
132 fp->fos = 0xffff0000u; 138 fp->fos = 0xffff0000u;
133 } 139 }
134} 140}
141EXPORT_SYMBOL_GPL(fpu_finit);
135 142
136/* 143/*
137 * The _current_ task is using the FPU for the first time 144 * The _current_ task is using the FPU for the first time
@@ -190,6 +197,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
190 if (ret) 197 if (ret)
191 return ret; 198 return ret;
192 199
200 sanitize_i387_state(target);
201
193 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 202 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
194 &target->thread.fpu.state->fxsave, 0, -1); 203 &target->thread.fpu.state->fxsave, 0, -1);
195} 204}
@@ -207,6 +216,8 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
207 if (ret) 216 if (ret)
208 return ret; 217 return ret;
209 218
219 sanitize_i387_state(target);
220
210 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 221 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
211 &target->thread.fpu.state->fxsave, 0, -1); 222 &target->thread.fpu.state->fxsave, 0, -1);
212 223
@@ -446,6 +457,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
446 -1); 457 -1);
447 } 458 }
448 459
460 sanitize_i387_state(target);
461
449 if (kbuf && pos == 0 && count == sizeof(env)) { 462 if (kbuf && pos == 0 && count == sizeof(env)) {
450 convert_from_fxsr(kbuf, target); 463 convert_from_fxsr(kbuf, target);
451 return 0; 464 return 0;
@@ -467,6 +480,8 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
467 if (ret) 480 if (ret)
468 return ret; 481 return ret;
469 482
483 sanitize_i387_state(target);
484
470 if (!HAVE_HWFP) 485 if (!HAVE_HWFP)
471 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 486 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
472 487
@@ -533,6 +548,9 @@ static int save_i387_xsave(void __user *buf)
533 struct _fpstate_ia32 __user *fx = buf; 548 struct _fpstate_ia32 __user *fx = buf;
534 int err = 0; 549 int err = 0;
535 550
551
552 sanitize_i387_state(tsk);
553
536 /* 554 /*
537 * For legacy compatible, we always set FP/SSE bits in the bit 555 * For legacy compatible, we always set FP/SSE bits in the bit
538 * vector while saving the state to the user context. 556 * vector while saving the state to the user context.
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 01ab17ae2ae..ef10940e1af 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -49,55 +49,94 @@
49#include <asm/system.h> 49#include <asm/system.h>
50#include <asm/apic.h> 50#include <asm/apic.h>
51 51
52/** 52struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
53 * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs
54 * @gdb_regs: A pointer to hold the registers in the order GDB wants.
55 * @regs: The &struct pt_regs of the current process.
56 *
57 * Convert the pt_regs in @regs into the format for registers that
58 * GDB expects, stored in @gdb_regs.
59 */
60void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
61{ 53{
62#ifndef CONFIG_X86_32 54#ifdef CONFIG_X86_32
63 u32 *gdb_regs32 = (u32 *)gdb_regs; 55 { "ax", 4, offsetof(struct pt_regs, ax) },
56 { "cx", 4, offsetof(struct pt_regs, cx) },
57 { "dx", 4, offsetof(struct pt_regs, dx) },
58 { "bx", 4, offsetof(struct pt_regs, bx) },
59 { "sp", 4, offsetof(struct pt_regs, sp) },
60 { "bp", 4, offsetof(struct pt_regs, bp) },
61 { "si", 4, offsetof(struct pt_regs, si) },
62 { "di", 4, offsetof(struct pt_regs, di) },
63 { "ip", 4, offsetof(struct pt_regs, ip) },
64 { "flags", 4, offsetof(struct pt_regs, flags) },
65 { "cs", 4, offsetof(struct pt_regs, cs) },
66 { "ss", 4, offsetof(struct pt_regs, ss) },
67 { "ds", 4, offsetof(struct pt_regs, ds) },
68 { "es", 4, offsetof(struct pt_regs, es) },
69 { "fs", 4, -1 },
70 { "gs", 4, -1 },
71#else
72 { "ax", 8, offsetof(struct pt_regs, ax) },
73 { "bx", 8, offsetof(struct pt_regs, bx) },
74 { "cx", 8, offsetof(struct pt_regs, cx) },
75 { "dx", 8, offsetof(struct pt_regs, dx) },
76 { "si", 8, offsetof(struct pt_regs, dx) },
77 { "di", 8, offsetof(struct pt_regs, di) },
78 { "bp", 8, offsetof(struct pt_regs, bp) },
79 { "sp", 8, offsetof(struct pt_regs, sp) },
80 { "r8", 8, offsetof(struct pt_regs, r8) },
81 { "r9", 8, offsetof(struct pt_regs, r9) },
82 { "r10", 8, offsetof(struct pt_regs, r10) },
83 { "r11", 8, offsetof(struct pt_regs, r11) },
84 { "r12", 8, offsetof(struct pt_regs, r12) },
85 { "r13", 8, offsetof(struct pt_regs, r13) },
86 { "r14", 8, offsetof(struct pt_regs, r14) },
87 { "r15", 8, offsetof(struct pt_regs, r15) },
88 { "ip", 8, offsetof(struct pt_regs, ip) },
89 { "flags", 4, offsetof(struct pt_regs, flags) },
90 { "cs", 4, offsetof(struct pt_regs, cs) },
91 { "ss", 4, offsetof(struct pt_regs, ss) },
64#endif 92#endif
65 gdb_regs[GDB_AX] = regs->ax; 93};
66 gdb_regs[GDB_BX] = regs->bx; 94
67 gdb_regs[GDB_CX] = regs->cx; 95int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
68 gdb_regs[GDB_DX] = regs->dx; 96{
69 gdb_regs[GDB_SI] = regs->si; 97 if (
70 gdb_regs[GDB_DI] = regs->di;
71 gdb_regs[GDB_BP] = regs->bp;
72 gdb_regs[GDB_PC] = regs->ip;
73#ifdef CONFIG_X86_32 98#ifdef CONFIG_X86_32
74 gdb_regs[GDB_PS] = regs->flags; 99 regno == GDB_SS || regno == GDB_FS || regno == GDB_GS ||
75 gdb_regs[GDB_DS] = regs->ds; 100#endif
76 gdb_regs[GDB_ES] = regs->es; 101 regno == GDB_SP || regno == GDB_ORIG_AX)
77 gdb_regs[GDB_CS] = regs->cs; 102 return 0;
78 gdb_regs[GDB_FS] = 0xFFFF; 103
79 gdb_regs[GDB_GS] = 0xFFFF; 104 if (dbg_reg_def[regno].offset != -1)
80 if (user_mode_vm(regs)) { 105 memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
81 gdb_regs[GDB_SS] = regs->ss; 106 dbg_reg_def[regno].size);
82 gdb_regs[GDB_SP] = regs->sp; 107 return 0;
83 } else { 108}
84 gdb_regs[GDB_SS] = __KERNEL_DS; 109
85 gdb_regs[GDB_SP] = kernel_stack_pointer(regs); 110char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
111{
112 if (regno == GDB_ORIG_AX) {
113 memcpy(mem, &regs->orig_ax, sizeof(regs->orig_ax));
114 return "orig_ax";
86 } 115 }
87#else 116 if (regno >= DBG_MAX_REG_NUM || regno < 0)
88 gdb_regs[GDB_R8] = regs->r8; 117 return NULL;
89 gdb_regs[GDB_R9] = regs->r9; 118
90 gdb_regs[GDB_R10] = regs->r10; 119 if (dbg_reg_def[regno].offset != -1)
91 gdb_regs[GDB_R11] = regs->r11; 120 memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
92 gdb_regs[GDB_R12] = regs->r12; 121 dbg_reg_def[regno].size);
93 gdb_regs[GDB_R13] = regs->r13; 122
94 gdb_regs[GDB_R14] = regs->r14; 123 switch (regno) {
95 gdb_regs[GDB_R15] = regs->r15; 124#ifdef CONFIG_X86_32
96 gdb_regs32[GDB_PS] = regs->flags; 125 case GDB_SS:
97 gdb_regs32[GDB_CS] = regs->cs; 126 if (!user_mode_vm(regs))
98 gdb_regs32[GDB_SS] = regs->ss; 127 *(unsigned long *)mem = __KERNEL_DS;
99 gdb_regs[GDB_SP] = kernel_stack_pointer(regs); 128 break;
129 case GDB_SP:
130 if (!user_mode_vm(regs))
131 *(unsigned long *)mem = kernel_stack_pointer(regs);
132 break;
133 case GDB_GS:
134 case GDB_FS:
135 *(unsigned long *)mem = 0xFFFF;
136 break;
100#endif 137#endif
138 }
139 return dbg_reg_def[regno].name;
101} 140}
102 141
103/** 142/**
@@ -150,54 +189,13 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
150 gdb_regs[GDB_SP] = p->thread.sp; 189 gdb_regs[GDB_SP] = p->thread.sp;
151} 190}
152 191
153/**
154 * gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs.
155 * @gdb_regs: A pointer to hold the registers we've received from GDB.
156 * @regs: A pointer to a &struct pt_regs to hold these values in.
157 *
158 * Convert the GDB regs in @gdb_regs into the pt_regs, and store them
159 * in @regs.
160 */
161void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
162{
163#ifndef CONFIG_X86_32
164 u32 *gdb_regs32 = (u32 *)gdb_regs;
165#endif
166 regs->ax = gdb_regs[GDB_AX];
167 regs->bx = gdb_regs[GDB_BX];
168 regs->cx = gdb_regs[GDB_CX];
169 regs->dx = gdb_regs[GDB_DX];
170 regs->si = gdb_regs[GDB_SI];
171 regs->di = gdb_regs[GDB_DI];
172 regs->bp = gdb_regs[GDB_BP];
173 regs->ip = gdb_regs[GDB_PC];
174#ifdef CONFIG_X86_32
175 regs->flags = gdb_regs[GDB_PS];
176 regs->ds = gdb_regs[GDB_DS];
177 regs->es = gdb_regs[GDB_ES];
178 regs->cs = gdb_regs[GDB_CS];
179#else
180 regs->r8 = gdb_regs[GDB_R8];
181 regs->r9 = gdb_regs[GDB_R9];
182 regs->r10 = gdb_regs[GDB_R10];
183 regs->r11 = gdb_regs[GDB_R11];
184 regs->r12 = gdb_regs[GDB_R12];
185 regs->r13 = gdb_regs[GDB_R13];
186 regs->r14 = gdb_regs[GDB_R14];
187 regs->r15 = gdb_regs[GDB_R15];
188 regs->flags = gdb_regs32[GDB_PS];
189 regs->cs = gdb_regs32[GDB_CS];
190 regs->ss = gdb_regs32[GDB_SS];
191#endif
192}
193
194static struct hw_breakpoint { 192static struct hw_breakpoint {
195 unsigned enabled; 193 unsigned enabled;
196 unsigned long addr; 194 unsigned long addr;
197 int len; 195 int len;
198 int type; 196 int type;
199 struct perf_event **pev; 197 struct perf_event **pev;
200} breakinfo[4]; 198} breakinfo[HBP_NUM];
201 199
202static unsigned long early_dr7; 200static unsigned long early_dr7;
203 201
@@ -205,7 +203,7 @@ static void kgdb_correct_hw_break(void)
205{ 203{
206 int breakno; 204 int breakno;
207 205
208 for (breakno = 0; breakno < 4; breakno++) { 206 for (breakno = 0; breakno < HBP_NUM; breakno++) {
209 struct perf_event *bp; 207 struct perf_event *bp;
210 struct arch_hw_breakpoint *info; 208 struct arch_hw_breakpoint *info;
211 int val; 209 int val;
@@ -292,10 +290,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
292{ 290{
293 int i; 291 int i;
294 292
295 for (i = 0; i < 4; i++) 293 for (i = 0; i < HBP_NUM; i++)
296 if (breakinfo[i].addr == addr && breakinfo[i].enabled) 294 if (breakinfo[i].addr == addr && breakinfo[i].enabled)
297 break; 295 break;
298 if (i == 4) 296 if (i == HBP_NUM)
299 return -1; 297 return -1;
300 298
301 if (hw_break_release_slot(i)) { 299 if (hw_break_release_slot(i)) {
@@ -313,7 +311,7 @@ static void kgdb_remove_all_hw_break(void)
313 int cpu = raw_smp_processor_id(); 311 int cpu = raw_smp_processor_id();
314 struct perf_event *bp; 312 struct perf_event *bp;
315 313
316 for (i = 0; i < 4; i++) { 314 for (i = 0; i < HBP_NUM; i++) {
317 if (!breakinfo[i].enabled) 315 if (!breakinfo[i].enabled)
318 continue; 316 continue;
319 bp = *per_cpu_ptr(breakinfo[i].pev, cpu); 317 bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
@@ -333,10 +331,10 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
333{ 331{
334 int i; 332 int i;
335 333
336 for (i = 0; i < 4; i++) 334 for (i = 0; i < HBP_NUM; i++)
337 if (!breakinfo[i].enabled) 335 if (!breakinfo[i].enabled)
338 break; 336 break;
339 if (i == 4) 337 if (i == HBP_NUM)
340 return -1; 338 return -1;
341 339
342 switch (bptype) { 340 switch (bptype) {
@@ -397,7 +395,7 @@ void kgdb_disable_hw_debug(struct pt_regs *regs)
397 395
398 /* Disable hardware debugging while we are in kgdb: */ 396 /* Disable hardware debugging while we are in kgdb: */
399 set_debugreg(0UL, 7); 397 set_debugreg(0UL, 7);
400 for (i = 0; i < 4; i++) { 398 for (i = 0; i < HBP_NUM; i++) {
401 if (!breakinfo[i].enabled) 399 if (!breakinfo[i].enabled)
402 continue; 400 continue;
403 if (dbg_is_early) { 401 if (dbg_is_early) {
@@ -458,7 +456,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
458{ 456{
459 unsigned long addr; 457 unsigned long addr;
460 char *ptr; 458 char *ptr;
461 int newPC;
462 459
463 switch (remcomInBuffer[0]) { 460 switch (remcomInBuffer[0]) {
464 case 'c': 461 case 'c':
@@ -469,8 +466,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
469 linux_regs->ip = addr; 466 linux_regs->ip = addr;
470 case 'D': 467 case 'D':
471 case 'k': 468 case 'k':
472 newPC = linux_regs->ip;
473
474 /* clear the trace bit */ 469 /* clear the trace bit */
475 linux_regs->flags &= ~X86_EFLAGS_TF; 470 linux_regs->flags &= ~X86_EFLAGS_TF;
476 atomic_set(&kgdb_cpu_doing_single_step, -1); 471 atomic_set(&kgdb_cpu_doing_single_step, -1);
@@ -645,7 +640,7 @@ void kgdb_arch_late(void)
645 attr.bp_len = HW_BREAKPOINT_LEN_1; 640 attr.bp_len = HW_BREAKPOINT_LEN_1;
646 attr.bp_type = HW_BREAKPOINT_W; 641 attr.bp_type = HW_BREAKPOINT_W;
647 attr.disabled = 1; 642 attr.disabled = 1;
648 for (i = 0; i < 4; i++) { 643 for (i = 0; i < HBP_NUM; i++) {
649 if (breakinfo[i].pev) 644 if (breakinfo[i].pev)
650 continue; 645 continue;
651 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); 646 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 675879b65ce..1bfb6cf4dd5 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
126} 126}
127 127
128/* 128/*
129 * Check for the REX prefix which can only exist on X86_64 129 * Skip the prefixes of the instruction.
130 * X86_32 always returns 0
131 */ 130 */
132static int __kprobes is_REX_prefix(kprobe_opcode_t *insn) 131static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
133{ 132{
133 insn_attr_t attr;
134
135 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
136 while (inat_is_legacy_prefix(attr)) {
137 insn++;
138 attr = inat_get_opcode_attribute((insn_byte_t)*insn);
139 }
134#ifdef CONFIG_X86_64 140#ifdef CONFIG_X86_64
135 if ((*insn & 0xf0) == 0x40) 141 if (inat_is_rex_prefix(attr))
136 return 1; 142 insn++;
137#endif 143#endif
138 return 0; 144 return insn;
139} 145}
140 146
141/* 147/*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
272 */ 278 */
273static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) 279static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
274{ 280{
281 /* Skip prefixes */
282 insn = skip_prefixes(insn);
283
275 switch (*insn) { 284 switch (*insn) {
276 case 0xfa: /* cli */ 285 case 0xfa: /* cli */
277 case 0xfb: /* sti */ 286 case 0xfb: /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
280 return 1; 289 return 1;
281 } 290 }
282 291
283 /*
284 * on X86_64, 0x40-0x4f are REX prefixes so we need to look
285 * at the next byte instead.. but of course not recurse infinitely
286 */
287 if (is_REX_prefix(insn))
288 return is_IF_modifier(++insn);
289
290 return 0; 292 return 0;
291} 293}
292 294
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
803 unsigned long orig_ip = (unsigned long)p->addr; 805 unsigned long orig_ip = (unsigned long)p->addr;
804 kprobe_opcode_t *insn = p->ainsn.insn; 806 kprobe_opcode_t *insn = p->ainsn.insn;
805 807
806 /*skip the REX prefix*/ 808 /* Skip prefixes */
807 if (is_REX_prefix(insn)) 809 insn = skip_prefixes(insn);
808 insn++;
809 810
810 regs->flags &= ~X86_EFLAGS_TF; 811 regs->flags &= ~X86_EFLAGS_TF;
811 switch (*insn) { 812 switch (*insn) {
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index 5915e0b3330..79ae68154e8 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -25,8 +25,34 @@
25#include <asm/i8259.h> 25#include <asm/i8259.h>
26#include <asm/apb_timer.h> 26#include <asm/apb_timer.h>
27 27
28/*
29 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
30 * cmdline option x86_mrst_timer can be used to override the configuration
31 * to prefer one or the other.
32 * at runtime, there are basically three timer configurations:
33 * 1. per cpu apbt clock only
34 * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
35 * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
36 *
37 * by default (without cmdline option), platform code first detects cpu type
38 * to see if we are on lincroft or penwell, then set up both lapic or apbt
39 * clocks accordingly.
40 * i.e. by default, medfield uses configuration #2, moorestown uses #1.
41 * config #3 is supported but not recommended on medfield.
42 *
43 * rating and feature summary:
44 * lapic (with C3STOP) --------- 100
45 * apbt (always-on) ------------ 110
46 * lapic (always-on,ARAT) ------ 150
47 */
48
49__cpuinitdata enum mrst_timer_options mrst_timer_options;
50
28static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; 51static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
29static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; 52static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
53enum mrst_cpu_type __mrst_cpu_chip;
54EXPORT_SYMBOL_GPL(__mrst_cpu_chip);
55
30int sfi_mtimer_num; 56int sfi_mtimer_num;
31 57
32struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; 58struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
@@ -167,18 +193,6 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
167 return 0; 193 return 0;
168} 194}
169 195
170/*
171 * the secondary clock in Moorestown can be APBT or LAPIC clock, default to
172 * APBT but cmdline option can also override it.
173 */
174static void __cpuinit mrst_setup_secondary_clock(void)
175{
176 /* restore default lapic clock if disabled by cmdline */
177 if (disable_apbt_percpu)
178 return setup_secondary_APIC_clock();
179 apbt_setup_secondary_clock();
180}
181
182static unsigned long __init mrst_calibrate_tsc(void) 196static unsigned long __init mrst_calibrate_tsc(void)
183{ 197{
184 unsigned long flags, fast_calibrate; 198 unsigned long flags, fast_calibrate;
@@ -195,6 +209,21 @@ static unsigned long __init mrst_calibrate_tsc(void)
195 209
196void __init mrst_time_init(void) 210void __init mrst_time_init(void)
197{ 211{
212 switch (mrst_timer_options) {
213 case MRST_TIMER_APBT_ONLY:
214 break;
215 case MRST_TIMER_LAPIC_APBT:
216 x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
217 x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
218 break;
219 default:
220 if (!boot_cpu_has(X86_FEATURE_ARAT))
221 break;
222 x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
223 x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
224 return;
225 }
226 /* we need at least one APB timer */
198 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); 227 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
199 pre_init_apic_IRQ0(); 228 pre_init_apic_IRQ0();
200 apbt_time_init(); 229 apbt_time_init();
@@ -205,16 +234,21 @@ void __init mrst_rtc_init(void)
205 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); 234 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
206} 235}
207 236
208/* 237void __cpuinit mrst_arch_setup(void)
209 * if we use per cpu apb timer, the bootclock already setup. if we use lapic
210 * timer and one apbt timer for broadcast, we need to set up lapic boot clock.
211 */
212static void __init mrst_setup_boot_clock(void)
213{ 238{
214 pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu); 239 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
215 if (disable_apbt_percpu) 240 __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
216 setup_boot_APIC_clock(); 241 else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
217}; 242 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
243 else {
244 pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n",
245 boot_cpu_data.x86, boot_cpu_data.x86_model);
246 __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
247 }
248 pr_debug("Moorestown CPU %s identified\n",
249 (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
250 "Lincroft" : "Penwell");
251}
218 252
219/* MID systems don't have i8042 controller */ 253/* MID systems don't have i8042 controller */
220static int mrst_i8042_detect(void) 254static int mrst_i8042_detect(void)
@@ -232,11 +266,13 @@ void __init x86_mrst_early_setup(void)
232 x86_init.resources.reserve_resources = x86_init_noop; 266 x86_init.resources.reserve_resources = x86_init_noop;
233 267
234 x86_init.timers.timer_init = mrst_time_init; 268 x86_init.timers.timer_init = mrst_time_init;
235 x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock; 269 x86_init.timers.setup_percpu_clockev = x86_init_noop;
236 270
237 x86_init.irqs.pre_vector_init = x86_init_noop; 271 x86_init.irqs.pre_vector_init = x86_init_noop;
238 272
239 x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; 273 x86_init.oem.arch_setup = mrst_arch_setup;
274
275 x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
240 276
241 x86_platform.calibrate_tsc = mrst_calibrate_tsc; 277 x86_platform.calibrate_tsc = mrst_calibrate_tsc;
242 x86_platform.i8042_detect = mrst_i8042_detect; 278 x86_platform.i8042_detect = mrst_i8042_detect;
@@ -250,3 +286,26 @@ void __init x86_mrst_early_setup(void)
250 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 286 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
251 287
252} 288}
289
290/*
291 * if user does not want to use per CPU apb timer, just give it a lower rating
292 * than local apic timer and skip the late per cpu timer init.
293 */
294static inline int __init setup_x86_mrst_timer(char *arg)
295{
296 if (!arg)
297 return -EINVAL;
298
299 if (strcmp("apbt_only", arg) == 0)
300 mrst_timer_options = MRST_TIMER_APBT_ONLY;
301 else if (strcmp("lapic_and_apbt", arg) == 0)
302 mrst_timer_options = MRST_TIMER_LAPIC_APBT;
303 else {
304 pr_warning("X86 MRST timer option %s not recognised"
305 " use x86_mrst_timer=apbt_only or lapic_and_apbt\n",
306 arg);
307 return -EINVAL;
308 }
309 return 0;
310}
311__setup("x86_mrst_timer=", setup_x86_mrst_timer);
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
index 8297160c41b..0e0cdde519b 100644
--- a/arch/x86/kernel/olpc.c
+++ b/arch/x86/kernel/olpc.c
@@ -21,10 +21,7 @@
21#include <asm/geode.h> 21#include <asm/geode.h>
22#include <asm/setup.h> 22#include <asm/setup.h>
23#include <asm/olpc.h> 23#include <asm/olpc.h>
24 24#include <asm/olpc_ofw.h>
25#ifdef CONFIG_OPEN_FIRMWARE
26#include <asm/ofw.h>
27#endif
28 25
29struct olpc_platform_t olpc_platform_info; 26struct olpc_platform_t olpc_platform_info;
30EXPORT_SYMBOL_GPL(olpc_platform_info); 27EXPORT_SYMBOL_GPL(olpc_platform_info);
@@ -145,7 +142,7 @@ restart:
145 * The OBF flag will sometimes misbehave due to what we believe 142 * The OBF flag will sometimes misbehave due to what we believe
146 * is a hardware quirk.. 143 * is a hardware quirk..
147 */ 144 */
148 printk(KERN_DEBUG "olpc-ec: running cmd 0x%x\n", cmd); 145 pr_devel("olpc-ec: running cmd 0x%x\n", cmd);
149 outb(cmd, 0x6c); 146 outb(cmd, 0x6c);
150 147
151 if (wait_on_ibf(0x6c, 0)) { 148 if (wait_on_ibf(0x6c, 0)) {
@@ -162,8 +159,7 @@ restart:
162 " EC accept data!\n"); 159 " EC accept data!\n");
163 goto err; 160 goto err;
164 } 161 }
165 printk(KERN_DEBUG "olpc-ec: sending cmd arg 0x%x\n", 162 pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
166 inbuf[i]);
167 outb(inbuf[i], 0x68); 163 outb(inbuf[i], 0x68);
168 } 164 }
169 } 165 }
@@ -176,8 +172,7 @@ restart:
176 goto restart; 172 goto restart;
177 } 173 }
178 outbuf[i] = inb(0x68); 174 outbuf[i] = inb(0x68);
179 printk(KERN_DEBUG "olpc-ec: received 0x%x\n", 175 pr_devel("olpc-ec: received 0x%x\n", outbuf[i]);
180 outbuf[i]);
181 } 176 }
182 } 177 }
183 178
@@ -188,14 +183,15 @@ err:
188} 183}
189EXPORT_SYMBOL_GPL(olpc_ec_cmd); 184EXPORT_SYMBOL_GPL(olpc_ec_cmd);
190 185
191#ifdef CONFIG_OPEN_FIRMWARE 186#ifdef CONFIG_OLPC_OPENFIRMWARE
192static void __init platform_detect(void) 187static void __init platform_detect(void)
193{ 188{
194 size_t propsize; 189 size_t propsize;
195 __be32 rev; 190 __be32 rev;
191 const void *args[] = { NULL, "board-revision-int", &rev, (void *)4 };
192 void *res[] = { &propsize };
196 193
197 if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, 194 if (olpc_ofw("getprop", args, res) || propsize != 4) {
198 &propsize) || propsize != 4) {
199 printk(KERN_ERR "ofw: getprop call failed!\n"); 195 printk(KERN_ERR "ofw: getprop call failed!\n");
200 rev = cpu_to_be32(0); 196 rev = cpu_to_be32(0);
201 } 197 }
diff --git a/arch/x86/kernel/olpc_ofw.c b/arch/x86/kernel/olpc_ofw.c
new file mode 100644
index 00000000000..3218aa71ab5
--- /dev/null
+++ b/arch/x86/kernel/olpc_ofw.c
@@ -0,0 +1,106 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/init.h>
4#include <asm/page.h>
5#include <asm/setup.h>
6#include <asm/io.h>
7#include <asm/pgtable.h>
8#include <asm/olpc_ofw.h>
9
10/* address of OFW callback interface; will be NULL if OFW isn't found */
11static int (*olpc_ofw_cif)(int *);
12
13/* page dir entry containing OFW's pgdir table; filled in by head_32.S */
14u32 olpc_ofw_pgd __initdata;
15
16static DEFINE_SPINLOCK(ofw_lock);
17
18#define MAXARGS 10
19
20void __init setup_olpc_ofw_pgd(void)
21{
22 pgd_t *base, *ofw_pde;
23
24 if (!olpc_ofw_cif)
25 return;
26
27 /* fetch OFW's PDE */
28 base = early_ioremap(olpc_ofw_pgd, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
29 if (!base) {
30 printk(KERN_ERR "failed to remap OFW's pgd - disabling OFW!\n");
31 olpc_ofw_cif = NULL;
32 return;
33 }
34 ofw_pde = &base[OLPC_OFW_PDE_NR];
35
36 /* install OFW's PDE permanently into the kernel's pgtable */
37 set_pgd(&swapper_pg_dir[OLPC_OFW_PDE_NR], *ofw_pde);
38 /* implicit optimization barrier here due to uninline function return */
39
40 early_iounmap(base, sizeof(olpc_ofw_pgd) * PTRS_PER_PGD);
41}
42
43int __olpc_ofw(const char *name, int nr_args, const void **args, int nr_res,
44 void **res)
45{
46 int ofw_args[MAXARGS + 3];
47 unsigned long flags;
48 int ret, i, *p;
49
50 BUG_ON(nr_args + nr_res > MAXARGS);
51
52 if (!olpc_ofw_cif)
53 return -EIO;
54
55 ofw_args[0] = (int)name;
56 ofw_args[1] = nr_args;
57 ofw_args[2] = nr_res;
58
59 p = &ofw_args[3];
60 for (i = 0; i < nr_args; i++, p++)
61 *p = (int)args[i];
62
63 /* call into ofw */
64 spin_lock_irqsave(&ofw_lock, flags);
65 ret = olpc_ofw_cif(ofw_args);
66 spin_unlock_irqrestore(&ofw_lock, flags);
67
68 if (!ret) {
69 for (i = 0; i < nr_res; i++, p++)
70 *((int *)res[i]) = *p;
71 }
72
73 return ret;
74}
75EXPORT_SYMBOL_GPL(__olpc_ofw);
76
77/* OFW cif _should_ be above this address */
78#define OFW_MIN 0xff000000
79
80/* OFW starts on a 1MB boundary */
81#define OFW_BOUND (1<<20)
82
83void __init olpc_ofw_detect(void)
84{
85 struct olpc_ofw_header *hdr = &boot_params.olpc_ofw_header;
86 unsigned long start;
87
88 /* ensure OFW booted us by checking for "OFW " string */
89 if (hdr->ofw_magic != OLPC_OFW_SIG)
90 return;
91
92 olpc_ofw_cif = (int (*)(int *))hdr->cif_handler;
93
94 if ((unsigned long)olpc_ofw_cif < OFW_MIN) {
95 printk(KERN_ERR "OFW detected, but cif has invalid address 0x%lx - disabling.\n",
96 (unsigned long)olpc_ofw_cif);
97 olpc_ofw_cif = NULL;
98 return;
99 }
100
101 /* determine where OFW starts in memory */
102 start = round_down((unsigned long)olpc_ofw_cif, OFW_BOUND);
103 printk(KERN_INFO "OFW detected in memory, cif @ 0x%lx (reserving top %ldMB)\n",
104 (unsigned long)olpc_ofw_cif, (-start) >> 20);
105 reserve_top_address(-start);
106}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 4b7e3d8b01d..9f07cfcbd3a 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -13,6 +13,7 @@
13#include <asm/calgary.h> 13#include <asm/calgary.h>
14#include <asm/amd_iommu.h> 14#include <asm/amd_iommu.h>
15#include <asm/x86_init.h> 15#include <asm/x86_init.h>
16#include <asm/xen/swiotlb-xen.h>
16 17
17static int forbid_dac __read_mostly; 18static int forbid_dac __read_mostly;
18 19
@@ -132,7 +133,7 @@ void __init pci_iommu_alloc(void)
132 /* free the range so iommu could get some range less than 4G */ 133 /* free the range so iommu could get some range less than 4G */
133 dma32_free_bootmem(); 134 dma32_free_bootmem();
134 135
135 if (pci_swiotlb_detect()) 136 if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
136 goto out; 137 goto out;
137 138
138 gart_iommu_hole_init(); 139 gart_iommu_hole_init();
@@ -144,6 +145,8 @@ void __init pci_iommu_alloc(void)
144 /* needs to be called after gart_iommu_hole_init */ 145 /* needs to be called after gart_iommu_hole_init */
145 amd_iommu_detect(); 146 amd_iommu_detect();
146out: 147out:
148 pci_xen_swiotlb_init();
149
147 pci_swiotlb_init(); 150 pci_swiotlb_init();
148} 151}
149 152
@@ -296,7 +299,7 @@ static int __init pci_iommu_init(void)
296#endif 299#endif
297 x86_init.iommu.iommu_init(); 300 x86_init.iommu.iommu_init();
298 301
299 if (swiotlb) { 302 if (swiotlb || xen_swiotlb) {
300 printk(KERN_INFO "PCI-DMA: " 303 printk(KERN_INFO "PCI-DMA: "
301 "Using software bounce buffering for IO (SWIOTLB)\n"); 304 "Using software bounce buffering for IO (SWIOTLB)\n");
302 swiotlb_print_info(); 305 swiotlb_print_info();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e7e35219b32..d401f1d2d06 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -28,6 +28,7 @@ unsigned long idle_nomwait;
28EXPORT_SYMBOL(idle_nomwait); 28EXPORT_SYMBOL(idle_nomwait);
29 29
30struct kmem_cache *task_xstate_cachep; 30struct kmem_cache *task_xstate_cachep;
31EXPORT_SYMBOL_GPL(task_xstate_cachep);
31 32
32int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 33int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
33{ 34{
@@ -371,7 +372,7 @@ static inline int hlt_use_halt(void)
371void default_idle(void) 372void default_idle(void)
372{ 373{
373 if (hlt_use_halt()) { 374 if (hlt_use_halt()) {
374 trace_power_start(POWER_CSTATE, 1); 375 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
375 current_thread_info()->status &= ~TS_POLLING; 376 current_thread_info()->status &= ~TS_POLLING;
376 /* 377 /*
377 * TS_POLLING-cleared state must be visible before we 378 * TS_POLLING-cleared state must be visible before we
@@ -441,7 +442,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
441 */ 442 */
442void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 443void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
443{ 444{
444 trace_power_start(POWER_CSTATE, (ax>>4)+1); 445 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
445 if (!need_resched()) { 446 if (!need_resched()) {
446 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 447 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
447 clflush((void *)&current_thread_info()->flags); 448 clflush((void *)&current_thread_info()->flags);
@@ -457,7 +458,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
457static void mwait_idle(void) 458static void mwait_idle(void)
458{ 459{
459 if (!need_resched()) { 460 if (!need_resched()) {
460 trace_power_start(POWER_CSTATE, 1); 461 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
461 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 462 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
462 clflush((void *)&current_thread_info()->flags); 463 clflush((void *)&current_thread_info()->flags);
463 464
@@ -478,7 +479,7 @@ static void mwait_idle(void)
478 */ 479 */
479static void poll_idle(void) 480static void poll_idle(void)
480{ 481{
481 trace_power_start(POWER_CSTATE, 0); 482 trace_power_start(POWER_CSTATE, 0, smp_processor_id());
482 local_irq_enable(); 483 local_irq_enable();
483 while (!need_resched()) 484 while (!need_resched())
484 cpu_relax(); 485 cpu_relax();
@@ -525,44 +526,10 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
525 return (edx & MWAIT_EDX_C1); 526 return (edx & MWAIT_EDX_C1);
526} 527}
527 528
528/* 529bool c1e_detected;
529 * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. 530EXPORT_SYMBOL(c1e_detected);
530 * For more information see
531 * - Erratum #400 for NPT family 0xf and family 0x10 CPUs
532 * - Erratum #365 for family 0x11 (not affected because C1e not in use)
533 */
534static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
535{
536 u64 val;
537 if (c->x86_vendor != X86_VENDOR_AMD)
538 goto no_c1e_idle;
539
540 /* Family 0x0f models < rev F do not have C1E */
541 if (c->x86 == 0x0F && c->x86_model >= 0x40)
542 return 1;
543
544 if (c->x86 == 0x10) {
545 /*
546 * check OSVW bit for CPUs that are not affected
547 * by erratum #400
548 */
549 if (cpu_has(c, X86_FEATURE_OSVW)) {
550 rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
551 if (val >= 2) {
552 rdmsrl(MSR_AMD64_OSVW_STATUS, val);
553 if (!(val & BIT(1)))
554 goto no_c1e_idle;
555 }
556 }
557 return 1;
558 }
559
560no_c1e_idle:
561 return 0;
562}
563 531
564static cpumask_var_t c1e_mask; 532static cpumask_var_t c1e_mask;
565static int c1e_detected;
566 533
567void c1e_remove_cpu(int cpu) 534void c1e_remove_cpu(int cpu)
568{ 535{
@@ -584,12 +551,12 @@ static void c1e_idle(void)
584 u32 lo, hi; 551 u32 lo, hi;
585 552
586 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 553 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
554
587 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 555 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
588 c1e_detected = 1; 556 c1e_detected = true;
589 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 557 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
590 mark_tsc_unstable("TSC halt in AMD C1E"); 558 mark_tsc_unstable("TSC halt in AMD C1E");
591 printk(KERN_INFO "System has AMD C1E enabled\n"); 559 printk(KERN_INFO "System has AMD C1E enabled\n");
592 set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
593 } 560 }
594 } 561 }
595 562
@@ -638,7 +605,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
638 */ 605 */
639 printk(KERN_INFO "using mwait in idle threads.\n"); 606 printk(KERN_INFO "using mwait in idle threads.\n");
640 pm_idle = mwait_idle; 607 pm_idle = mwait_idle;
641 } else if (check_c1e_idle(c)) { 608 } else if (cpu_has_amd_erratum(amd_erratum_400)) {
609 /* E400: APIC timer interrupt does not wake up CPU from C1e */
642 printk(KERN_INFO "using C1E aware idle routine\n"); 610 printk(KERN_INFO "using C1E aware idle routine\n");
643 pm_idle = c1e_idle; 611 pm_idle = c1e_idle;
644 } else 612 } else
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af4..96586c3cbbb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,8 @@
57#include <asm/syscalls.h> 57#include <asm/syscalls.h>
58#include <asm/debugreg.h> 58#include <asm/debugreg.h>
59 59
60#include <trace/events/power.h>
61
60asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
61 63
62/* 64/*
@@ -111,6 +113,8 @@ void cpu_idle(void)
111 stop_critical_timings(); 113 stop_critical_timings();
112 pm_idle(); 114 pm_idle();
113 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id());
114 } 118 }
115 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
116 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a99f1..3d9ea531ddd 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,8 @@
51#include <asm/syscalls.h> 51#include <asm/syscalls.h>
52#include <asm/debugreg.h> 52#include <asm/debugreg.h>
53 53
54#include <trace/events/power.h>
55
54asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
55 57
56DEFINE_PER_CPU(unsigned long, old_rsp); 58DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
138 stop_critical_timings(); 140 stop_critical_timings();
139 pm_idle(); 141 pm_idle();
140 start_critical_timings(); 142 start_critical_timings();
143
144 trace_power_end(smp_processor_id());
145
141 /* In many cases the interrupt that ended idle 146 /* In many cases the interrupt that ended idle
142 has already called exit_idle. But some idle 147 has already called exit_idle. But some idle
143 loops can be woken up without interrupt. */ 148 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b4ae4acbd03..b008e788320 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -102,6 +102,7 @@
102 102
103#include <asm/paravirt.h> 103#include <asm/paravirt.h>
104#include <asm/hypervisor.h> 104#include <asm/hypervisor.h>
105#include <asm/olpc_ofw.h>
105 106
106#include <asm/percpu.h> 107#include <asm/percpu.h>
107#include <asm/topology.h> 108#include <asm/topology.h>
@@ -736,10 +737,15 @@ void __init setup_arch(char **cmdline_p)
736 /* VMI may relocate the fixmap; do this before touching ioremap area */ 737 /* VMI may relocate the fixmap; do this before touching ioremap area */
737 vmi_init(); 738 vmi_init();
738 739
740 /* OFW also may relocate the fixmap */
741 olpc_ofw_detect();
742
739 early_trap_init(); 743 early_trap_init();
740 early_cpu_init(); 744 early_cpu_init();
741 early_ioremap_init(); 745 early_ioremap_init();
742 746
747 setup_olpc_ofw_pgd();
748
743 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); 749 ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
744 screen_info = boot_params.screen_info; 750 screen_info = boot_params.screen_info;
745 edid_info = boot_params.edid_info; 751 edid_info = boot_params.edid_info;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c4f33b2e77d..a5e928b0cb5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -735,12 +735,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
735 goto do_rest; 735 goto do_rest;
736 } 736 }
737 737
738 if (!keventd_up() || current_is_keventd()) 738 schedule_work(&c_idle.work);
739 c_idle.work.func(&c_idle.work); 739 wait_for_completion(&c_idle.done);
740 else {
741 schedule_work(&c_idle.work);
742 wait_for_completion(&c_idle.done);
743 }
744 740
745 if (IS_ERR(c_idle.idle)) { 741 if (IS_ERR(c_idle.idle)) {
746 printk("failed fork for CPU %d\n", cpu); 742 printk("failed fork for CPU %d\n", cpu);
@@ -816,6 +812,13 @@ do_rest:
816 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 812 if (cpumask_test_cpu(cpu, cpu_callin_mask))
817 break; /* It has booted */ 813 break; /* It has booted */
818 udelay(100); 814 udelay(100);
815 /*
816 * Allow other tasks to run while we wait for the
817 * AP to come online. This also gives a chance
818 * for the MTRR work(triggered by the AP coming online)
819 * to be completed in the stop machine context.
820 */
821 schedule();
819 } 822 }
820 823
821 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 824 if (cpumask_test_cpu(cpu, cpu_callin_mask))
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefbb3f6..b53c525368a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
23 return 0; 23 return 0;
24} 24}
25 25
26static void save_stack_address(void *data, unsigned long addr, int reliable) 26static void
27__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
27{ 28{
28 struct stack_trace *trace = data; 29 struct stack_trace *trace = data;
30#ifdef CONFIG_FRAME_POINTER
29 if (!reliable) 31 if (!reliable)
30 return; 32 return;
33#endif
34 if (nosched && in_sched_functions(addr))
35 return;
31 if (trace->skip > 0) { 36 if (trace->skip > 0) {
32 trace->skip--; 37 trace->skip--;
33 return; 38 return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
36 trace->entries[trace->nr_entries++] = addr; 41 trace->entries[trace->nr_entries++] = addr;
37} 42}
38 43
44static void save_stack_address(void *data, unsigned long addr, int reliable)
45{
46 return __save_stack_address(data, addr, reliable, false);
47}
48
39static void 49static void
40save_stack_address_nosched(void *data, unsigned long addr, int reliable) 50save_stack_address_nosched(void *data, unsigned long addr, int reliable)
41{ 51{
42 struct stack_trace *trace = (struct stack_trace *)data; 52 return __save_stack_address(data, addr, reliable, true);
43 if (!reliable)
44 return;
45 if (in_sched_functions(addr))
46 return;
47 if (trace->skip > 0) {
48 trace->skip--;
49 return;
50 }
51 if (trace->nr_entries < trace->max_entries)
52 trace->entries[trace->nr_entries++] = addr;
53} 53}
54 54
55static const struct stacktrace_ops save_stack_ops = { 55static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
96 96
97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ 97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
98 98
99struct stack_frame { 99struct stack_frame_user {
100 const void __user *next_fp; 100 const void __user *next_fp;
101 unsigned long ret_addr; 101 unsigned long ret_addr;
102}; 102};
103 103
104static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 104static int
105copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
105{ 106{
106 int ret; 107 int ret;
107 108
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
126 trace->entries[trace->nr_entries++] = regs->ip; 127 trace->entries[trace->nr_entries++] = regs->ip;
127 128
128 while (trace->nr_entries < trace->max_entries) { 129 while (trace->nr_entries < trace->max_entries) {
129 struct stack_frame frame; 130 struct stack_frame_user frame;
130 131
131 frame.next_fp = NULL; 132 frame.next_fp = NULL;
132 frame.ret_addr = 0; 133 frame.ret_addr = 0;
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 8b372934121..b35786dc9b8 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -337,3 +337,6 @@ ENTRY(sys_call_table)
337 .long sys_rt_tgsigqueueinfo /* 335 */ 337 .long sys_rt_tgsigqueueinfo /* 335 */
338 .long sys_perf_event_open 338 .long sys_perf_event_open
339 .long sys_recvmmsg 339 .long sys_recvmmsg
340 .long sys_fanotify_init
341 .long sys_fanotify_mark
342 .long sys_prlimit64 /* 340 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 725ef4d17cd..60788dee0f8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
393 == NOTIFY_STOP) 393 == NOTIFY_STOP)
394 return; 394 return;
395
395#ifdef CONFIG_X86_LOCAL_APIC 396#ifdef CONFIG_X86_LOCAL_APIC
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
398 == NOTIFY_STOP)
399 return;
400
401#ifndef CONFIG_LOCKUP_DETECTOR
396 /* 402 /*
397 * Ok, so this is none of the documented NMI sources, 403 * Ok, so this is none of the documented NMI sources,
398 * so it must be the NMI watchdog. 404 * so it must be the NMI watchdog.
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
400 if (nmi_watchdog_tick(regs, reason)) 406 if (nmi_watchdog_tick(regs, reason))
401 return; 407 return;
402 if (!do_nmi_callback(regs, cpu)) 408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
403 unknown_nmi_error(reason, regs); 410 unknown_nmi_error(reason, regs);
404#else 411#else
405 unknown_nmi_error(reason, regs); 412 unknown_nmi_error(reason, regs);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9faf91ae184..ce8e5023933 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -751,7 +751,6 @@ static struct clocksource clocksource_tsc = {
751 .read = read_tsc, 751 .read = read_tsc,
752 .resume = resume_tsc, 752 .resume = resume_tsc,
753 .mask = CLOCKSOURCE_MASK(64), 753 .mask = CLOCKSOURCE_MASK(64),
754 .shift = 22,
755 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 754 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
756 CLOCK_SOURCE_MUST_VERIFY, 755 CLOCK_SOURCE_MUST_VERIFY,
757#ifdef CONFIG_X86_64 756#ifdef CONFIG_X86_64
@@ -845,8 +844,6 @@ __cpuinit int unsynchronized_tsc(void)
845 844
846static void __init init_tsc_clocksource(void) 845static void __init init_tsc_clocksource(void)
847{ 846{
848 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
849 clocksource_tsc.shift);
850 if (tsc_clocksource_reliable) 847 if (tsc_clocksource_reliable)
851 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 848 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
852 /* lower the rating if we already know its unstable: */ 849 /* lower the rating if we already know its unstable: */
@@ -854,7 +851,7 @@ static void __init init_tsc_clocksource(void)
854 clocksource_tsc.rating = 0; 851 clocksource_tsc.rating = 0;
855 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 852 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
856 } 853 }
857 clocksource_register(&clocksource_tsc); 854 clocksource_register_khz(&clocksource_tsc, tsc_khz);
858} 855}
859 856
860#ifdef CONFIG_X86_64 857#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu_64.S
index 45b6f8a975a..56a8c2a867d 100644
--- a/arch/x86/kernel/verify_cpu_64.S
+++ b/arch/x86/kernel/verify_cpu_64.S
@@ -31,6 +31,7 @@
31 */ 31 */
32 32
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/msr-index.h>
34 35
35verify_cpu: 36verify_cpu:
36 pushfl # Save caller passed flags 37 pushfl # Save caller passed flags
@@ -88,7 +89,7 @@ verify_cpu_sse_test:
88 je verify_cpu_sse_ok 89 je verify_cpu_sse_ok
89 test %di,%di 90 test %di,%di
90 jz verify_cpu_no_longmode # only try to force SSE on AMD 91 jz verify_cpu_no_longmode # only try to force SSE on AMD
91 movl $0xc0010015,%ecx # HWCR 92 movl $MSR_K7_HWCR,%ecx
92 rdmsr 93 rdmsr
93 btr $15,%eax # enable SSE 94 btr $15,%eax # enable SSE
94 wrmsr 95 wrmsr
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 1c0c6ab9c60..dcbb28c4b69 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -73,8 +73,8 @@ void update_vsyscall_tz(void)
73 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 73 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
74} 74}
75 75
76void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, 76void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
77 u32 mult) 77 struct clocksource *clock, u32 mult)
78{ 78{
79 unsigned long flags; 79 unsigned long flags;
80 80
@@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
87 vsyscall_gtod_data.clock.shift = clock->shift; 87 vsyscall_gtod_data.clock.shift = clock->shift;
88 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; 88 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
89 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; 89 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
90 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; 90 vsyscall_gtod_data.wall_to_monotonic = *wtm;
91 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); 91 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
92 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 92 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
93} 93}
@@ -169,13 +169,18 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
169 * unlikely */ 169 * unlikely */
170time_t __vsyscall(1) vtime(time_t *t) 170time_t __vsyscall(1) vtime(time_t *t)
171{ 171{
172 struct timeval tv; 172 unsigned seq;
173 time_t result; 173 time_t result;
174 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) 174 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
175 return time_syscall(t); 175 return time_syscall(t);
176 176
177 vgettimeofday(&tv, NULL); 177 do {
178 result = tv.tv_sec; 178 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
179
180 result = __vsyscall_gtod_data.wall_time_sec;
181
182 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
183
179 if (t) 184 if (t)
180 *t = result; 185 *t = result;
181 return result; 186 return result;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 37e68fc5e24..9c253bd65e2 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -16,11 +16,88 @@
16 */ 16 */
17u64 pcntxt_mask; 17u64 pcntxt_mask;
18 18
19/*
20 * Represents init state for the supported extended state.
21 */
22static struct xsave_struct *init_xstate_buf;
23
19struct _fpx_sw_bytes fx_sw_reserved; 24struct _fpx_sw_bytes fx_sw_reserved;
20#ifdef CONFIG_IA32_EMULATION 25#ifdef CONFIG_IA32_EMULATION
21struct _fpx_sw_bytes fx_sw_reserved_ia32; 26struct _fpx_sw_bytes fx_sw_reserved_ia32;
22#endif 27#endif
23 28
29static unsigned int *xstate_offsets, *xstate_sizes, xstate_features;
30
31/*
32 * If a processor implementation discern that a processor state component is
33 * in its initialized state it may modify the corresponding bit in the
34 * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory
35 * layout in the case of xsaveopt. While presenting the xstate information to
36 * the user, we always ensure that the memory layout of a feature will be in
37 * the init state if the corresponding header bit is zero. This is to ensure
38 * that the user doesn't see some stale state in the memory layout during
39 * signal handling, debugging etc.
40 */
41void __sanitize_i387_state(struct task_struct *tsk)
42{
43 u64 xstate_bv;
44 int feature_bit = 0x2;
45 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
46
47 if (!fx)
48 return;
49
50 BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU);
51
52 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
53
54 /*
55 * None of the feature bits are in init state. So nothing else
56 * to do for us, as the memory layout is upto date.
57 */
58 if ((xstate_bv & pcntxt_mask) == pcntxt_mask)
59 return;
60
61 /*
62 * FP is in init state
63 */
64 if (!(xstate_bv & XSTATE_FP)) {
65 fx->cwd = 0x37f;
66 fx->swd = 0;
67 fx->twd = 0;
68 fx->fop = 0;
69 fx->rip = 0;
70 fx->rdp = 0;
71 memset(&fx->st_space[0], 0, 128);
72 }
73
74 /*
75 * SSE is in init state
76 */
77 if (!(xstate_bv & XSTATE_SSE))
78 memset(&fx->xmm_space[0], 0, 256);
79
80 xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2;
81
82 /*
83 * Update all the other memory layouts for which the corresponding
84 * header bit is in the init state.
85 */
86 while (xstate_bv) {
87 if (xstate_bv & 0x1) {
88 int offset = xstate_offsets[feature_bit];
89 int size = xstate_sizes[feature_bit];
90
91 memcpy(((void *) fx) + offset,
92 ((void *) init_xstate_buf) + offset,
93 size);
94 }
95
96 xstate_bv >>= 1;
97 feature_bit++;
98 }
99}
100
24/* 101/*
25 * Check for the presence of extended state information in the 102 * Check for the presence of extended state information in the
26 * user fpstate pointer in the sigcontext. 103 * user fpstate pointer in the sigcontext.
@@ -36,15 +113,14 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf,
36 113
37 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], 114 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
38 sizeof(struct _fpx_sw_bytes)); 115 sizeof(struct _fpx_sw_bytes));
39
40 if (err) 116 if (err)
41 return err; 117 return -EFAULT;
42 118
43 /* 119 /*
44 * First Magic check failed. 120 * First Magic check failed.
45 */ 121 */
46 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) 122 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
47 return -1; 123 return -EINVAL;
48 124
49 /* 125 /*
50 * Check for error scenarios. 126 * Check for error scenarios.
@@ -52,19 +128,21 @@ int check_for_xstate(struct i387_fxsave_struct __user *buf,
52 if (fx_sw_user->xstate_size < min_xstate_size || 128 if (fx_sw_user->xstate_size < min_xstate_size ||
53 fx_sw_user->xstate_size > xstate_size || 129 fx_sw_user->xstate_size > xstate_size ||
54 fx_sw_user->xstate_size > fx_sw_user->extended_size) 130 fx_sw_user->xstate_size > fx_sw_user->extended_size)
55 return -1; 131 return -EINVAL;
56 132
57 err = __get_user(magic2, (__u32 *) (((void *)fpstate) + 133 err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
58 fx_sw_user->extended_size - 134 fx_sw_user->extended_size -
59 FP_XSTATE_MAGIC2_SIZE)); 135 FP_XSTATE_MAGIC2_SIZE));
136 if (err)
137 return err;
60 /* 138 /*
61 * Check for the presence of second magic word at the end of memory 139 * Check for the presence of second magic word at the end of memory
62 * layout. This detects the case where the user just copied the legacy 140 * layout. This detects the case where the user just copied the legacy
63 * fpstate layout with out copying the extended state information 141 * fpstate layout with out copying the extended state information
64 * in the memory layout. 142 * in the memory layout.
65 */ 143 */
66 if (err || magic2 != FP_XSTATE_MAGIC2) 144 if (magic2 != FP_XSTATE_MAGIC2)
67 return -1; 145 return -EFAULT;
68 146
69 return 0; 147 return 0;
70} 148}
@@ -91,14 +169,6 @@ int save_i387_xstate(void __user *buf)
91 return 0; 169 return 0;
92 170
93 if (task_thread_info(tsk)->status & TS_USEDFPU) { 171 if (task_thread_info(tsk)->status & TS_USEDFPU) {
94 /*
95 * Start with clearing the user buffer. This will present a
96 * clean context for the bytes not touched by the fxsave/xsave.
97 */
98 err = __clear_user(buf, sig_xstate_size);
99 if (err)
100 return err;
101
102 if (use_xsave()) 172 if (use_xsave())
103 err = xsave_user(buf); 173 err = xsave_user(buf);
104 else 174 else
@@ -109,6 +179,7 @@ int save_i387_xstate(void __user *buf)
109 task_thread_info(tsk)->status &= ~TS_USEDFPU; 179 task_thread_info(tsk)->status &= ~TS_USEDFPU;
110 stts(); 180 stts();
111 } else { 181 } else {
182 sanitize_i387_state(tsk);
112 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, 183 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
113 xstate_size)) 184 xstate_size))
114 return -1; 185 return -1;
@@ -184,8 +255,8 @@ static int restore_user_xstate(void __user *buf)
184 * init the state skipped by the user. 255 * init the state skipped by the user.
185 */ 256 */
186 mask = pcntxt_mask & ~mask; 257 mask = pcntxt_mask & ~mask;
187 258 if (unlikely(mask))
188 xrstor_state(init_xstate_buf, mask); 259 xrstor_state(init_xstate_buf, mask);
189 260
190 return 0; 261 return 0;
191 262
@@ -274,11 +345,6 @@ static void prepare_fx_sw_frame(void)
274#endif 345#endif
275} 346}
276 347
277/*
278 * Represents init state for the supported extended state.
279 */
280struct xsave_struct *init_xstate_buf;
281
282#ifdef CONFIG_X86_64 348#ifdef CONFIG_X86_64
283unsigned int sig_xstate_size = sizeof(struct _fpstate); 349unsigned int sig_xstate_size = sizeof(struct _fpstate);
284#endif 350#endif
@@ -286,37 +352,77 @@ unsigned int sig_xstate_size = sizeof(struct _fpstate);
286/* 352/*
287 * Enable the extended processor state save/restore feature 353 * Enable the extended processor state save/restore feature
288 */ 354 */
289void __cpuinit xsave_init(void) 355static inline void xstate_enable(void)
290{ 356{
291 if (!cpu_has_xsave)
292 return;
293
294 set_in_cr4(X86_CR4_OSXSAVE); 357 set_in_cr4(X86_CR4_OSXSAVE);
295
296 /*
297 * Enable all the features that the HW is capable of
298 * and the Linux kernel is aware of.
299 */
300 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); 358 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
301} 359}
302 360
303/* 361/*
362 * Record the offsets and sizes of different state managed by the xsave
363 * memory layout.
364 */
365static void __init setup_xstate_features(void)
366{
367 int eax, ebx, ecx, edx, leaf = 0x2;
368
369 xstate_features = fls64(pcntxt_mask);
370 xstate_offsets = alloc_bootmem(xstate_features * sizeof(int));
371 xstate_sizes = alloc_bootmem(xstate_features * sizeof(int));
372
373 do {
374 cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
375
376 if (eax == 0)
377 break;
378
379 xstate_offsets[leaf] = ebx;
380 xstate_sizes[leaf] = eax;
381
382 leaf++;
383 } while (1);
384}
385
386/*
304 * setup the xstate image representing the init state 387 * setup the xstate image representing the init state
305 */ 388 */
306static void __init setup_xstate_init(void) 389static void __init setup_xstate_init(void)
307{ 390{
391 setup_xstate_features();
392
393 /*
394 * Setup init_xstate_buf to represent the init state of
395 * all the features managed by the xsave
396 */
308 init_xstate_buf = alloc_bootmem(xstate_size); 397 init_xstate_buf = alloc_bootmem(xstate_size);
309 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 398 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
399
400 clts();
401 /*
402 * Init all the features state with header_bv being 0x0
403 */
404 xrstor_state(init_xstate_buf, -1);
405 /*
406 * Dump the init state again. This is to identify the init state
407 * of any feature which is not represented by all zero's.
408 */
409 xsave_state(init_xstate_buf, -1);
410 stts();
310} 411}
311 412
312/* 413/*
313 * Enable and initialize the xsave feature. 414 * Enable and initialize the xsave feature.
314 */ 415 */
315void __ref xsave_cntxt_init(void) 416static void __init xstate_enable_boot_cpu(void)
316{ 417{
317 unsigned int eax, ebx, ecx, edx; 418 unsigned int eax, ebx, ecx, edx;
318 419
319 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); 420 if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
421 WARN(1, KERN_ERR "XSTATE_CPUID missing\n");
422 return;
423 }
424
425 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
320 pcntxt_mask = eax + ((u64)edx << 32); 426 pcntxt_mask = eax + ((u64)edx << 32);
321 427
322 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { 428 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
@@ -329,12 +435,13 @@ void __ref xsave_cntxt_init(void)
329 * Support only the state known to OS. 435 * Support only the state known to OS.
330 */ 436 */
331 pcntxt_mask = pcntxt_mask & XCNTXT_MASK; 437 pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
332 xsave_init(); 438
439 xstate_enable();
333 440
334 /* 441 /*
335 * Recompute the context size for enabled features 442 * Recompute the context size for enabled features
336 */ 443 */
337 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); 444 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
338 xstate_size = ebx; 445 xstate_size = ebx;
339 446
340 update_regset_xstate_info(xstate_size, pcntxt_mask); 447 update_regset_xstate_info(xstate_size, pcntxt_mask);
@@ -346,3 +453,23 @@ void __ref xsave_cntxt_init(void)
346 "cntxt size 0x%x\n", 453 "cntxt size 0x%x\n",
347 pcntxt_mask, xstate_size); 454 pcntxt_mask, xstate_size);
348} 455}
456
457/*
458 * For the very first instance, this calls xstate_enable_boot_cpu();
459 * for all subsequent instances, this calls xstate_enable().
460 *
461 * This is somewhat obfuscated due to the lack of powerful enough
462 * overrides for the section checks.
463 */
464void __cpuinit xsave_init(void)
465{
466 static __refdata void (*next_func)(void) = xstate_enable_boot_cpu;
467 void (*this_func)(void);
468
469 if (!cpu_has_xsave)
470 return;
471
472 this_func = next_func;
473 next_func = xstate_enable;
474 this_func();
475}