aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile15
-rw-r--r--arch/x86/kernel/acpi/cstate.c23
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/amd_gart_64.c4
-rw-r--r--arch/x86/kernel/apic/apic.c68
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c20
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c27
-rw-r--r--arch/x86/kernel/apic/io_apic.c687
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c28
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile8
-rw-r--r--arch/x86/kernel/cpu/amd.c50
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/intel.c24
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c127
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c22
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c76
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c23
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c445
-rw-r--r--arch/x86/kernel/cpu/perf_event.h505
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c38
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c294
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c148
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c79
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c28
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c9
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/cpu/rdrand.c73
-rw-r--r--arch/x86/kernel/crash.c5
-rw-r--r--arch/x86/kernel/devicetree.c1
-rw-r--r--arch/x86/kernel/e820.c1
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S15
-rw-r--r--arch/x86/kernel/hpet.c1
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/irq.c1
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/jump_label.c20
-rw-r--r--arch/x86/kernel/kgdb.c60
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/kvmclock.c5
-rw-r--r--arch/x86/kernel/microcode_amd.c5
-rw-r--r--arch/x86/kernel/microcode_core.c8
-rw-r--r--arch/x86/kernel/microcode_intel.c14
-rw-r--r--arch/x86/kernel/nmi.c435
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-dma.c5
-rw-r--r--arch/x86/kernel/probe_roms.c6
-rw-r--r--arch/x86/kernel/process.c25
-rw-r--r--arch/x86/kernel/process_32.c8
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/reboot.c23
-rw-r--r--arch/x86/kernel/rtc.c24
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smp.c1
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c78
-rw-r--r--arch/x86/kernel/syscall_table_32.S4
-rw-r--r--arch/x86/kernel/tboot.c1
-rw-r--r--arch/x86/kernel/time.c1
-rw-r--r--arch/x86/kernel/topology.c1
-rw-r--r--arch/x86/kernel/traps.c163
-rw-r--r--arch/x86/kernel/vmlinux.lds.S41
-rw-r--r--arch/x86/kernel/vsyscall_64.c91
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S36
-rw-r--r--arch/x86/kernel/vsyscall_trace.h29
-rw-r--r--arch/x86/kernel/x86_init.c4
77 files changed, 2522 insertions, 1496 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 04105574c8e9..8baca3c4871c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,22 +17,9 @@ CFLAGS_REMOVE_ftrace.o = -pg
17CFLAGS_REMOVE_early_printk.o = -pg 17CFLAGS_REMOVE_early_printk.o = -pg
18endif 18endif
19 19
20#
21# vsyscalls (which work on the user stack) should have
22# no stack-protector checks:
23#
24nostackp := $(call cc-option, -fno-stack-protector)
25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
26CFLAGS_hpet.o := $(nostackp)
27CFLAGS_paravirt.o := $(nostackp)
28GCOV_PROFILE_vsyscall_64.o := n
29GCOV_PROFILE_hpet.o := n
30GCOV_PROFILE_tsc.o := n
31GCOV_PROFILE_paravirt.o := n
32
33obj-y := process_$(BITS).o signal.o entry_$(BITS).o 20obj-y := process_$(BITS).o signal.o entry_$(BITS).o
34obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 21obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
35obj-y += time.o ioport.o ldt.o dumpstack.o 22obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
36obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o 23obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
37obj-$(CONFIG_IRQ_WORK) += irq_work.o 24obj-$(CONFIG_IRQ_WORK) += irq_work.o
38obj-y += probe_roms.o 25obj-y += probe_roms.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 5812404a0d4c..f50e7fb2a201 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
149} 149}
150EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); 150EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
151 151
152/*
153 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
154 * which can obviate IPI to trigger checking of need_resched.
155 * We execute MONITOR against need_resched and enter optimized wait state
156 * through MWAIT. Whenever someone changes need_resched, we would be woken
157 * up from MWAIT (without an IPI).
158 *
159 * New with Core Duo processors, MWAIT can take some hints based on CPU
160 * capability.
161 */
162void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
163{
164 if (!need_resched()) {
165 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
166 clflush((void *)&current_thread_info()->flags);
167
168 __monitor((void *)&current_thread_info()->flags, 0, 0);
169 smp_mb();
170 if (!need_resched())
171 __mwait(ax, cx);
172 }
173}
174
152void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) 175void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
153{ 176{
154 unsigned int cpu = smp_processor_id(); 177 unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c63822816249..1f84794f0759 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -738,5 +738,5 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
738 738
739 atomic_set(&stop_machine_first, 1); 739 atomic_set(&stop_machine_first, 1);
740 wrote_text = 0; 740 wrote_text = 0;
741 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 741 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
742} 742}
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b117efd24f71..b1e7c7f7a0af 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -5,7 +5,7 @@
5 * This allows to use PCI devices that only support 32bit addresses on systems 5 * This allows to use PCI devices that only support 32bit addresses on systems
6 * with more than 4GB. 6 * with more than 4GB.
7 * 7 *
8 * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification. 8 * See Documentation/DMA-API-HOWTO.txt for the interface specification.
9 * 9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs. 10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 * Subject to the GNU General Public License v2 only. 11 * Subject to the GNU General Public License v2 only.
@@ -30,7 +30,7 @@
30#include <linux/syscore_ops.h> 30#include <linux/syscore_ops.h>
31#include <linux/io.h> 31#include <linux/io.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <asm/atomic.h> 33#include <linux/atomic.h>
34#include <asm/mtrr.h> 34#include <asm/mtrr.h>
35#include <asm/pgtable.h> 35#include <asm/pgtable.h>
36#include <asm/proto.h> 36#include <asm/proto.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b24be38c8cf8..f98d84caf94c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -38,7 +38,7 @@
38#include <asm/perf_event.h> 38#include <asm/perf_event.h>
39#include <asm/x86_init.h> 39#include <asm/x86_init.h>
40#include <asm/pgalloc.h> 40#include <asm/pgalloc.h>
41#include <asm/atomic.h> 41#include <linux/atomic.h>
42#include <asm/mpspec.h> 42#include <asm/mpspec.h>
43#include <asm/i8259.h> 43#include <asm/i8259.h>
44#include <asm/proto.h> 44#include <asm/proto.h>
@@ -186,7 +186,7 @@ static struct resource lapic_resource = {
186 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 186 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
187}; 187};
188 188
189static unsigned int calibration_result; 189unsigned int lapic_timer_frequency = 0;
190 190
191static void apic_pm_activate(void); 191static void apic_pm_activate(void);
192 192
@@ -454,7 +454,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
454 switch (mode) { 454 switch (mode) {
455 case CLOCK_EVT_MODE_PERIODIC: 455 case CLOCK_EVT_MODE_PERIODIC:
456 case CLOCK_EVT_MODE_ONESHOT: 456 case CLOCK_EVT_MODE_ONESHOT:
457 __setup_APIC_LVTT(calibration_result, 457 __setup_APIC_LVTT(lapic_timer_frequency,
458 mode != CLOCK_EVT_MODE_PERIODIC, 1); 458 mode != CLOCK_EVT_MODE_PERIODIC, 1);
459 break; 459 break;
460 case CLOCK_EVT_MODE_UNUSED: 460 case CLOCK_EVT_MODE_UNUSED:
@@ -638,6 +638,25 @@ static int __init calibrate_APIC_clock(void)
638 long delta, deltatsc; 638 long delta, deltatsc;
639 int pm_referenced = 0; 639 int pm_referenced = 0;
640 640
641 /**
642 * check if lapic timer has already been calibrated by platform
643 * specific routine, such as tsc calibration code. if so, we just fill
644 * in the clockevent structure and return.
645 */
646
647 if (lapic_timer_frequency) {
648 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
649 lapic_timer_frequency);
650 lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
651 TICK_NSEC, lapic_clockevent.shift);
652 lapic_clockevent.max_delta_ns =
653 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
654 lapic_clockevent.min_delta_ns =
655 clockevent_delta2ns(0xF, &lapic_clockevent);
656 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
657 return 0;
658 }
659
641 local_irq_disable(); 660 local_irq_disable();
642 661
643 /* Replace the global interrupt handler */ 662 /* Replace the global interrupt handler */
@@ -679,12 +698,12 @@ static int __init calibrate_APIC_clock(void)
679 lapic_clockevent.min_delta_ns = 698 lapic_clockevent.min_delta_ns =
680 clockevent_delta2ns(0xF, &lapic_clockevent); 699 clockevent_delta2ns(0xF, &lapic_clockevent);
681 700
682 calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; 701 lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
683 702
684 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); 703 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
685 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); 704 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
686 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 705 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
687 calibration_result); 706 lapic_timer_frequency);
688 707
689 if (cpu_has_tsc) { 708 if (cpu_has_tsc) {
690 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 709 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
@@ -695,13 +714,13 @@ static int __init calibrate_APIC_clock(void)
695 714
696 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 715 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
697 "%u.%04u MHz.\n", 716 "%u.%04u MHz.\n",
698 calibration_result / (1000000 / HZ), 717 lapic_timer_frequency / (1000000 / HZ),
699 calibration_result % (1000000 / HZ)); 718 lapic_timer_frequency % (1000000 / HZ));
700 719
701 /* 720 /*
702 * Do a sanity check on the APIC calibration result 721 * Do a sanity check on the APIC calibration result
703 */ 722 */
704 if (calibration_result < (1000000 / HZ)) { 723 if (lapic_timer_frequency < (1000000 / HZ)) {
705 local_irq_enable(); 724 local_irq_enable();
706 pr_warning("APIC frequency too slow, disabling apic timer\n"); 725 pr_warning("APIC frequency too slow, disabling apic timer\n");
707 return -1; 726 return -1;
@@ -1437,27 +1456,21 @@ void enable_x2apic(void)
1437 1456
1438int __init enable_IR(void) 1457int __init enable_IR(void)
1439{ 1458{
1440#ifdef CONFIG_INTR_REMAP 1459#ifdef CONFIG_IRQ_REMAP
1441 if (!intr_remapping_supported()) { 1460 if (!intr_remapping_supported()) {
1442 pr_debug("intr-remapping not supported\n"); 1461 pr_debug("intr-remapping not supported\n");
1443 return 0; 1462 return -1;
1444 } 1463 }
1445 1464
1446 if (!x2apic_preenabled && skip_ioapic_setup) { 1465 if (!x2apic_preenabled && skip_ioapic_setup) {
1447 pr_info("Skipped enabling intr-remap because of skipping " 1466 pr_info("Skipped enabling intr-remap because of skipping "
1448 "io-apic setup\n"); 1467 "io-apic setup\n");
1449 return 0; 1468 return -1;
1450 } 1469 }
1451 1470
1452 if (enable_intr_remapping(x2apic_supported())) 1471 return enable_intr_remapping();
1453 return 0;
1454
1455 pr_info("Enabled Interrupt-remapping\n");
1456
1457 return 1;
1458
1459#endif 1472#endif
1460 return 0; 1473 return -1;
1461} 1474}
1462 1475
1463void __init enable_IR_x2apic(void) 1476void __init enable_IR_x2apic(void)
@@ -1481,11 +1494,11 @@ void __init enable_IR_x2apic(void)
1481 mask_ioapic_entries(); 1494 mask_ioapic_entries();
1482 1495
1483 if (dmar_table_init_ret) 1496 if (dmar_table_init_ret)
1484 ret = 0; 1497 ret = -1;
1485 else 1498 else
1486 ret = enable_IR(); 1499 ret = enable_IR();
1487 1500
1488 if (!ret) { 1501 if (ret < 0) {
1489 /* IR is required if there is APIC ID > 255 even when running 1502 /* IR is required if there is APIC ID > 255 even when running
1490 * under KVM 1503 * under KVM
1491 */ 1504 */
@@ -1499,6 +1512,9 @@ void __init enable_IR_x2apic(void)
1499 x2apic_force_phys(); 1512 x2apic_force_phys();
1500 } 1513 }
1501 1514
1515 if (ret == IRQ_REMAP_XAPIC_MODE)
1516 goto nox2apic;
1517
1502 x2apic_enabled = 1; 1518 x2apic_enabled = 1;
1503 1519
1504 if (x2apic_supported() && !x2apic_mode) { 1520 if (x2apic_supported() && !x2apic_mode) {
@@ -1508,19 +1524,21 @@ void __init enable_IR_x2apic(void)
1508 } 1524 }
1509 1525
1510nox2apic: 1526nox2apic:
1511 if (!ret) /* IR enabling failed */ 1527 if (ret < 0) /* IR enabling failed */
1512 restore_ioapic_entries(); 1528 restore_ioapic_entries();
1513 legacy_pic->restore_mask(); 1529 legacy_pic->restore_mask();
1514 local_irq_restore(flags); 1530 local_irq_restore(flags);
1515 1531
1516out: 1532out:
1517 if (x2apic_enabled) 1533 if (x2apic_enabled || !x2apic_supported())
1518 return; 1534 return;
1519 1535
1520 if (x2apic_preenabled) 1536 if (x2apic_preenabled)
1521 panic("x2apic: enabled by BIOS but kernel init failed."); 1537 panic("x2apic: enabled by BIOS but kernel init failed.");
1522 else if (cpu_has_x2apic) 1538 else if (ret == IRQ_REMAP_XAPIC_MODE)
1523 pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); 1539 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1540 else if (ret < 0)
1541 pr_info("x2apic not enabled, IRQ remapping init failed\n");
1524} 1542}
1525 1543
1526#ifdef CONFIG_X86_64 1544#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index efd737e827f4..521bead01137 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -255,12 +255,24 @@ static struct apic apic_bigsmp = {
255 .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, 255 .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
256}; 256};
257 257
258struct apic * __init generic_bigsmp_probe(void) 258void __init generic_bigsmp_probe(void)
259{ 259{
260 if (probe_bigsmp()) 260 unsigned int cpu;
261 return &apic_bigsmp;
262 261
263 return NULL; 262 if (!probe_bigsmp())
263 return;
264
265 apic = &apic_bigsmp;
266
267 for_each_possible_cpu(cpu) {
268 if (early_per_cpu(x86_cpu_to_logical_apicid,
269 cpu) == BAD_APICID)
270 continue;
271 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
272 bigsmp_early_logical_apicid(cpu);
273 }
274
275 pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name);
264} 276}
265 277
266apic_driver(apic_bigsmp); 278apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 9536b3fe43f8..5d513bc47b6b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -48,7 +48,7 @@
48#include <linux/io.h> 48#include <linux/io.h>
49 49
50#include <asm/apicdef.h> 50#include <asm/apicdef.h>
51#include <asm/atomic.h> 51#include <linux/atomic.h>
52#include <asm/fixmap.h> 52#include <asm/fixmap.h>
53#include <asm/mpspec.h> 53#include <asm/mpspec.h>
54#include <asm/setup.h> 54#include <asm/setup.h>
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index d5e57db0f7be..31cb9ae992b7 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -60,22 +60,10 @@ void arch_trigger_all_cpu_backtrace(void)
60} 60}
61 61
62static int __kprobes 62static int __kprobes
63arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, 63arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
64 unsigned long cmd, void *__args)
65{ 64{
66 struct die_args *args = __args;
67 struct pt_regs *regs;
68 int cpu; 65 int cpu;
69 66
70 switch (cmd) {
71 case DIE_NMI:
72 break;
73
74 default:
75 return NOTIFY_DONE;
76 }
77
78 regs = args->regs;
79 cpu = smp_processor_id(); 67 cpu = smp_processor_id();
80 68
81 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 69 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
@@ -86,21 +74,16 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
86 show_regs(regs); 74 show_regs(regs);
87 arch_spin_unlock(&lock); 75 arch_spin_unlock(&lock);
88 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 76 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
89 return NOTIFY_STOP; 77 return NMI_HANDLED;
90 } 78 }
91 79
92 return NOTIFY_DONE; 80 return NMI_DONE;
93} 81}
94 82
95static __read_mostly struct notifier_block backtrace_notifier = {
96 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
97 .next = NULL,
98 .priority = NMI_LOCAL_LOW_PRIOR,
99};
100
101static int __init register_trigger_all_cpu_backtrace(void) 83static int __init register_trigger_all_cpu_backtrace(void)
102{ 84{
103 register_die_notifier(&backtrace_notifier); 85 register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
86 0, "arch_bt");
104 return 0; 87 return 0;
105} 88}
106early_initcall(register_trigger_all_cpu_backtrace); 89early_initcall(register_trigger_all_cpu_backtrace);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8eb863e27ea6..6d939d7847e2 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -92,21 +92,21 @@ static struct ioapic {
92 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 92 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
93} ioapics[MAX_IO_APICS]; 93} ioapics[MAX_IO_APICS];
94 94
95#define mpc_ioapic_ver(id) ioapics[id].mp_config.apicver 95#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver
96 96
97int mpc_ioapic_id(int id) 97int mpc_ioapic_id(int ioapic_idx)
98{ 98{
99 return ioapics[id].mp_config.apicid; 99 return ioapics[ioapic_idx].mp_config.apicid;
100} 100}
101 101
102unsigned int mpc_ioapic_addr(int id) 102unsigned int mpc_ioapic_addr(int ioapic_idx)
103{ 103{
104 return ioapics[id].mp_config.apicaddr; 104 return ioapics[ioapic_idx].mp_config.apicaddr;
105} 105}
106 106
107struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int id) 107struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
108{ 108{
109 return &ioapics[id].gsi_config; 109 return &ioapics[ioapic_idx].gsi_config;
110} 110}
111 111
112int nr_ioapics; 112int nr_ioapics;
@@ -186,21 +186,15 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
186 186
187 187
188/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 188/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
189#ifdef CONFIG_SPARSE_IRQ
190static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; 189static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
191#else
192static struct irq_cfg irq_cfgx[NR_IRQS];
193#endif
194 190
195int __init arch_early_irq_init(void) 191int __init arch_early_irq_init(void)
196{ 192{
197 struct irq_cfg *cfg; 193 struct irq_cfg *cfg;
198 int count, node, i; 194 int count, node, i;
199 195
200 if (!legacy_pic->nr_legacy_irqs) { 196 if (!legacy_pic->nr_legacy_irqs)
201 nr_irqs_gsi = 0;
202 io_apic_irqs = ~0UL; 197 io_apic_irqs = ~0UL;
203 }
204 198
205 for (i = 0; i < nr_ioapics; i++) { 199 for (i = 0; i < nr_ioapics; i++) {
206 ioapics[i].saved_registers = 200 ioapics[i].saved_registers =
@@ -234,7 +228,6 @@ int __init arch_early_irq_init(void)
234 return 0; 228 return 0;
235} 229}
236 230
237#ifdef CONFIG_SPARSE_IRQ
238static struct irq_cfg *irq_cfg(unsigned int irq) 231static struct irq_cfg *irq_cfg(unsigned int irq)
239{ 232{
240 return irq_get_chip_data(irq); 233 return irq_get_chip_data(irq);
@@ -269,22 +262,6 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
269 kfree(cfg); 262 kfree(cfg);
270} 263}
271 264
272#else
273
274struct irq_cfg *irq_cfg(unsigned int irq)
275{
276 return irq < nr_irqs ? irq_cfgx + irq : NULL;
277}
278
279static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
280{
281 return irq_cfgx + irq;
282}
283
284static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
285
286#endif
287
288static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) 265static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
289{ 266{
290 int res = irq_alloc_desc_at(at, node); 267 int res = irq_alloc_desc_at(at, node);
@@ -394,13 +371,21 @@ union entry_union {
394 struct IO_APIC_route_entry entry; 371 struct IO_APIC_route_entry entry;
395}; 372};
396 373
374static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
375{
376 union entry_union eu;
377
378 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
379 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
380 return eu.entry;
381}
382
397static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) 383static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
398{ 384{
399 union entry_union eu; 385 union entry_union eu;
400 unsigned long flags; 386 unsigned long flags;
401 raw_spin_lock_irqsave(&ioapic_lock, flags); 387 raw_spin_lock_irqsave(&ioapic_lock, flags);
402 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); 388 eu.entry = __ioapic_read_entry(apic, pin);
403 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
404 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 389 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
405 return eu.entry; 390 return eu.entry;
406} 391}
@@ -529,18 +514,6 @@ static void io_apic_modify_irq(struct irq_cfg *cfg,
529 __io_apic_modify_irq(entry, mask_and, mask_or, final); 514 __io_apic_modify_irq(entry, mask_and, mask_or, final);
530} 515}
531 516
532static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
533{
534 __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
535 IO_APIC_REDIR_MASKED, NULL);
536}
537
538static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
539{
540 __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
541 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
542}
543
544static void io_apic_sync(struct irq_pin_list *entry) 517static void io_apic_sync(struct irq_pin_list *entry)
545{ 518{
546 /* 519 /*
@@ -585,6 +558,66 @@ static void unmask_ioapic_irq(struct irq_data *data)
585 unmask_ioapic(data->chip_data); 558 unmask_ioapic(data->chip_data);
586} 559}
587 560
561/*
562 * IO-APIC versions below 0x20 don't support EOI register.
563 * For the record, here is the information about various versions:
564 * 0Xh 82489DX
565 * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
566 * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
567 * 30h-FFh Reserved
568 *
569 * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
570 * version as 0x2. This is an error with documentation and these ICH chips
571 * use io-apic's of version 0x20.
572 *
573 * For IO-APIC's with EOI register, we use that to do an explicit EOI.
574 * Otherwise, we simulate the EOI message manually by changing the trigger
575 * mode to edge and then back to level, with RTE being masked during this.
576 */
577static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
578{
579 if (mpc_ioapic_ver(apic) >= 0x20) {
580 /*
581 * Intr-remapping uses pin number as the virtual vector
582 * in the RTE. Actual vector is programmed in
583 * intr-remapping table entry. Hence for the io-apic
584 * EOI we use the pin number.
585 */
586 if (cfg && irq_remapped(cfg))
587 io_apic_eoi(apic, pin);
588 else
589 io_apic_eoi(apic, vector);
590 } else {
591 struct IO_APIC_route_entry entry, entry1;
592
593 entry = entry1 = __ioapic_read_entry(apic, pin);
594
595 /*
596 * Mask the entry and change the trigger mode to edge.
597 */
598 entry1.mask = 1;
599 entry1.trigger = IOAPIC_EDGE;
600
601 __ioapic_write_entry(apic, pin, entry1);
602
603 /*
604 * Restore the previous level triggered entry.
605 */
606 __ioapic_write_entry(apic, pin, entry);
607 }
608}
609
610static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
611{
612 struct irq_pin_list *entry;
613 unsigned long flags;
614
615 raw_spin_lock_irqsave(&ioapic_lock, flags);
616 for_each_irq_pin(entry, cfg->irq_2_pin)
617 __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
618 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
619}
620
588static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 621static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
589{ 622{
590 struct IO_APIC_route_entry entry; 623 struct IO_APIC_route_entry entry;
@@ -593,10 +626,44 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
593 entry = ioapic_read_entry(apic, pin); 626 entry = ioapic_read_entry(apic, pin);
594 if (entry.delivery_mode == dest_SMI) 627 if (entry.delivery_mode == dest_SMI)
595 return; 628 return;
629
630 /*
631 * Make sure the entry is masked and re-read the contents to check
632 * if it is a level triggered pin and if the remote-IRR is set.
633 */
634 if (!entry.mask) {
635 entry.mask = 1;
636 ioapic_write_entry(apic, pin, entry);
637 entry = ioapic_read_entry(apic, pin);
638 }
639
640 if (entry.irr) {
641 unsigned long flags;
642
643 /*
644 * Make sure the trigger mode is set to level. Explicit EOI
645 * doesn't clear the remote-IRR if the trigger mode is not
646 * set to level.
647 */
648 if (!entry.trigger) {
649 entry.trigger = IOAPIC_LEVEL;
650 ioapic_write_entry(apic, pin, entry);
651 }
652
653 raw_spin_lock_irqsave(&ioapic_lock, flags);
654 __eoi_ioapic_pin(apic, pin, entry.vector, NULL);
655 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
656 }
657
596 /* 658 /*
597 * Disable it in the IO-APIC irq-routing table: 659 * Clear the rest of the bits in the IO-APIC RTE except for the mask
660 * bit.
598 */ 661 */
599 ioapic_mask_entry(apic, pin); 662 ioapic_mask_entry(apic, pin);
663 entry = ioapic_read_entry(apic, pin);
664 if (entry.irr)
665 printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
666 mpc_ioapic_id(apic), pin);
600} 667}
601 668
602static void clear_IO_APIC (void) 669static void clear_IO_APIC (void)
@@ -712,13 +779,13 @@ int restore_ioapic_entries(void)
712/* 779/*
713 * Find the IRQ entry number of a certain pin. 780 * Find the IRQ entry number of a certain pin.
714 */ 781 */
715static int find_irq_entry(int apic, int pin, int type) 782static int find_irq_entry(int ioapic_idx, int pin, int type)
716{ 783{
717 int i; 784 int i;
718 785
719 for (i = 0; i < mp_irq_entries; i++) 786 for (i = 0; i < mp_irq_entries; i++)
720 if (mp_irqs[i].irqtype == type && 787 if (mp_irqs[i].irqtype == type &&
721 (mp_irqs[i].dstapic == mpc_ioapic_id(apic) || 788 (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
722 mp_irqs[i].dstapic == MP_APIC_ALL) && 789 mp_irqs[i].dstapic == MP_APIC_ALL) &&
723 mp_irqs[i].dstirq == pin) 790 mp_irqs[i].dstirq == pin)
724 return i; 791 return i;
@@ -757,12 +824,13 @@ static int __init find_isa_irq_apic(int irq, int type)
757 (mp_irqs[i].srcbusirq == irq)) 824 (mp_irqs[i].srcbusirq == irq))
758 break; 825 break;
759 } 826 }
827
760 if (i < mp_irq_entries) { 828 if (i < mp_irq_entries) {
761 int apic; 829 int ioapic_idx;
762 for(apic = 0; apic < nr_ioapics; apic++) { 830
763 if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic) 831 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
764 return apic; 832 if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
765 } 833 return ioapic_idx;
766 } 834 }
767 835
768 return -1; 836 return -1;
@@ -977,7 +1045,7 @@ static int pin_2_irq(int idx, int apic, int pin)
977int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, 1045int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
978 struct io_apic_irq_attr *irq_attr) 1046 struct io_apic_irq_attr *irq_attr)
979{ 1047{
980 int apic, i, best_guess = -1; 1048 int ioapic_idx, i, best_guess = -1;
981 1049
982 apic_printk(APIC_DEBUG, 1050 apic_printk(APIC_DEBUG,
983 "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", 1051 "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
@@ -990,8 +1058,8 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
990 for (i = 0; i < mp_irq_entries; i++) { 1058 for (i = 0; i < mp_irq_entries; i++) {
991 int lbus = mp_irqs[i].srcbus; 1059 int lbus = mp_irqs[i].srcbus;
992 1060
993 for (apic = 0; apic < nr_ioapics; apic++) 1061 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
994 if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic || 1062 if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
995 mp_irqs[i].dstapic == MP_APIC_ALL) 1063 mp_irqs[i].dstapic == MP_APIC_ALL)
996 break; 1064 break;
997 1065
@@ -999,13 +1067,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
999 !mp_irqs[i].irqtype && 1067 !mp_irqs[i].irqtype &&
1000 (bus == lbus) && 1068 (bus == lbus) &&
1001 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { 1069 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1002 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); 1070 int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq);
1003 1071
1004 if (!(apic || IO_APIC_IRQ(irq))) 1072 if (!(ioapic_idx || IO_APIC_IRQ(irq)))
1005 continue; 1073 continue;
1006 1074
1007 if (pin == (mp_irqs[i].srcbusirq & 3)) { 1075 if (pin == (mp_irqs[i].srcbusirq & 3)) {
1008 set_io_apic_irq_attr(irq_attr, apic, 1076 set_io_apic_irq_attr(irq_attr, ioapic_idx,
1009 mp_irqs[i].dstirq, 1077 mp_irqs[i].dstirq,
1010 irq_trigger(i), 1078 irq_trigger(i),
1011 irq_polarity(i)); 1079 irq_polarity(i));
@@ -1016,7 +1084,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
1016 * best-guess fuzzy result for broken mptables. 1084 * best-guess fuzzy result for broken mptables.
1017 */ 1085 */
1018 if (best_guess < 0) { 1086 if (best_guess < 0) {
1019 set_io_apic_irq_attr(irq_attr, apic, 1087 set_io_apic_irq_attr(irq_attr, ioapic_idx,
1020 mp_irqs[i].dstirq, 1088 mp_irqs[i].dstirq,
1021 irq_trigger(i), 1089 irq_trigger(i),
1022 irq_polarity(i)); 1090 irq_polarity(i));
@@ -1202,7 +1270,6 @@ void __setup_vector_irq(int cpu)
1202} 1270}
1203 1271
1204static struct irq_chip ioapic_chip; 1272static struct irq_chip ioapic_chip;
1205static struct irq_chip ir_ioapic_chip;
1206 1273
1207#ifdef CONFIG_X86_32 1274#ifdef CONFIG_X86_32
1208static inline int IO_APIC_irq_trigger(int irq) 1275static inline int IO_APIC_irq_trigger(int irq)
@@ -1246,7 +1313,7 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1246 1313
1247 if (irq_remapped(cfg)) { 1314 if (irq_remapped(cfg)) {
1248 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 1315 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1249 chip = &ir_ioapic_chip; 1316 irq_remap_modify_chip_defaults(chip);
1250 fasteoi = trigger != 0; 1317 fasteoi = trigger != 0;
1251 } 1318 }
1252 1319
@@ -1255,77 +1322,100 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1255 fasteoi ? "fasteoi" : "edge"); 1322 fasteoi ? "fasteoi" : "edge");
1256} 1323}
1257 1324
1258static int setup_ioapic_entry(int apic_id, int irq, 1325
1259 struct IO_APIC_route_entry *entry, 1326static int setup_ir_ioapic_entry(int irq,
1260 unsigned int destination, int trigger, 1327 struct IR_IO_APIC_route_entry *entry,
1261 int polarity, int vector, int pin) 1328 unsigned int destination, int vector,
1329 struct io_apic_irq_attr *attr)
1262{ 1330{
1263 /* 1331 int index;
1264 * add it to the IO-APIC irq-routing table: 1332 struct irte irte;
1265 */ 1333 int ioapic_id = mpc_ioapic_id(attr->ioapic);
1266 memset(entry,0,sizeof(*entry)); 1334 struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id);
1267 1335
1268 if (intr_remapping_enabled) { 1336 if (!iommu) {
1269 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); 1337 pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
1270 struct irte irte; 1338 return -ENODEV;
1271 struct IR_IO_APIC_route_entry *ir_entry = 1339 }
1272 (struct IR_IO_APIC_route_entry *) entry;
1273 int index;
1274 1340
1275 if (!iommu) 1341 index = alloc_irte(iommu, irq, 1);
1276 panic("No mapping iommu for ioapic %d\n", apic_id); 1342 if (index < 0) {
1343 pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id);
1344 return -ENOMEM;
1345 }
1277 1346
1278 index = alloc_irte(iommu, irq, 1); 1347 prepare_irte(&irte, vector, destination);
1279 if (index < 0)
1280 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1281 1348
1282 prepare_irte(&irte, vector, destination); 1349 /* Set source-id of interrupt request */
1350 set_ioapic_sid(&irte, ioapic_id);
1283 1351
1284 /* Set source-id of interrupt request */ 1352 modify_irte(irq, &irte);
1285 set_ioapic_sid(&irte, apic_id);
1286 1353
1287 modify_irte(irq, &irte); 1354 apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
1355 "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
1356 "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
1357 "Avail:%X Vector:%02X Dest:%08X "
1358 "SID:%04X SQ:%X SVT:%X)\n",
1359 attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
1360 irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
1361 irte.avail, irte.vector, irte.dest_id,
1362 irte.sid, irte.sq, irte.svt);
1363
1364 memset(entry, 0, sizeof(*entry));
1365
1366 entry->index2 = (index >> 15) & 0x1;
1367 entry->zero = 0;
1368 entry->format = 1;
1369 entry->index = (index & 0x7fff);
1370 /*
1371 * IO-APIC RTE will be configured with virtual vector.
1372 * irq handler will do the explicit EOI to the io-apic.
1373 */
1374 entry->vector = attr->ioapic_pin;
1375 entry->mask = 0; /* enable IRQ */
1376 entry->trigger = attr->trigger;
1377 entry->polarity = attr->polarity;
1288 1378
1289 ir_entry->index2 = (index >> 15) & 0x1; 1379 /* Mask level triggered irqs.
1290 ir_entry->zero = 0; 1380 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1291 ir_entry->format = 1; 1381 */
1292 ir_entry->index = (index & 0x7fff); 1382 if (attr->trigger)
1293 /* 1383 entry->mask = 1;
1294 * IO-APIC RTE will be configured with virtual vector.
1295 * irq handler will do the explicit EOI to the io-apic.
1296 */
1297 ir_entry->vector = pin;
1298
1299 apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
1300 "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
1301 "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
1302 "Avail:%X Vector:%02X Dest:%08X "
1303 "SID:%04X SQ:%X SVT:%X)\n",
1304 apic_id, irte.present, irte.fpd, irte.dst_mode,
1305 irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
1306 irte.avail, irte.vector, irte.dest_id,
1307 irte.sid, irte.sq, irte.svt);
1308 } else {
1309 entry->delivery_mode = apic->irq_delivery_mode;
1310 entry->dest_mode = apic->irq_dest_mode;
1311 entry->dest = destination;
1312 entry->vector = vector;
1313 }
1314 1384
1315 entry->mask = 0; /* enable IRQ */ 1385 return 0;
1316 entry->trigger = trigger; 1386}
1317 entry->polarity = polarity;
1318 1387
1319 /* Mask level triggered irqs. 1388static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
1389 unsigned int destination, int vector,
1390 struct io_apic_irq_attr *attr)
1391{
1392 if (intr_remapping_enabled)
1393 return setup_ir_ioapic_entry(irq,
1394 (struct IR_IO_APIC_route_entry *)entry,
1395 destination, vector, attr);
1396
1397 memset(entry, 0, sizeof(*entry));
1398
1399 entry->delivery_mode = apic->irq_delivery_mode;
1400 entry->dest_mode = apic->irq_dest_mode;
1401 entry->dest = destination;
1402 entry->vector = vector;
1403 entry->mask = 0; /* enable IRQ */
1404 entry->trigger = attr->trigger;
1405 entry->polarity = attr->polarity;
1406
1407 /*
1408 * Mask level triggered irqs.
1320 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1409 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1321 */ 1410 */
1322 if (trigger) 1411 if (attr->trigger)
1323 entry->mask = 1; 1412 entry->mask = 1;
1413
1324 return 0; 1414 return 0;
1325} 1415}
1326 1416
1327static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, 1417static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1328 struct irq_cfg *cfg, int trigger, int polarity) 1418 struct io_apic_irq_attr *attr)
1329{ 1419{
1330 struct IO_APIC_route_entry entry; 1420 struct IO_APIC_route_entry entry;
1331 unsigned int dest; 1421 unsigned int dest;
@@ -1348,49 +1438,48 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
1348 apic_printk(APIC_VERBOSE,KERN_DEBUG 1438 apic_printk(APIC_VERBOSE,KERN_DEBUG
1349 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1439 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1350 "IRQ %d Mode:%i Active:%i Dest:%d)\n", 1440 "IRQ %d Mode:%i Active:%i Dest:%d)\n",
1351 apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector, 1441 attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
1352 irq, trigger, polarity, dest); 1442 cfg->vector, irq, attr->trigger, attr->polarity, dest);
1353
1354 1443
1355 if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry, 1444 if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
1356 dest, trigger, polarity, cfg->vector, pin)) { 1445 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1357 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1446 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1358 mpc_ioapic_id(apic_id), pin);
1359 __clear_irq_vector(irq, cfg); 1447 __clear_irq_vector(irq, cfg);
1448
1360 return; 1449 return;
1361 } 1450 }
1362 1451
1363 ioapic_register_intr(irq, cfg, trigger); 1452 ioapic_register_intr(irq, cfg, attr->trigger);
1364 if (irq < legacy_pic->nr_legacy_irqs) 1453 if (irq < legacy_pic->nr_legacy_irqs)
1365 legacy_pic->mask(irq); 1454 legacy_pic->mask(irq);
1366 1455
1367 ioapic_write_entry(apic_id, pin, entry); 1456 ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
1368} 1457}
1369 1458
1370static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin) 1459static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin)
1371{ 1460{
1372 if (idx != -1) 1461 if (idx != -1)
1373 return false; 1462 return false;
1374 1463
1375 apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", 1464 apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
1376 mpc_ioapic_id(apic_id), pin); 1465 mpc_ioapic_id(ioapic_idx), pin);
1377 return true; 1466 return true;
1378} 1467}
1379 1468
1380static void __init __io_apic_setup_irqs(unsigned int apic_id) 1469static void __init __io_apic_setup_irqs(unsigned int ioapic_idx)
1381{ 1470{
1382 int idx, node = cpu_to_node(0); 1471 int idx, node = cpu_to_node(0);
1383 struct io_apic_irq_attr attr; 1472 struct io_apic_irq_attr attr;
1384 unsigned int pin, irq; 1473 unsigned int pin, irq;
1385 1474
1386 for (pin = 0; pin < ioapics[apic_id].nr_registers; pin++) { 1475 for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) {
1387 idx = find_irq_entry(apic_id, pin, mp_INT); 1476 idx = find_irq_entry(ioapic_idx, pin, mp_INT);
1388 if (io_apic_pin_not_connected(idx, apic_id, pin)) 1477 if (io_apic_pin_not_connected(idx, ioapic_idx, pin))
1389 continue; 1478 continue;
1390 1479
1391 irq = pin_2_irq(idx, apic_id, pin); 1480 irq = pin_2_irq(idx, ioapic_idx, pin);
1392 1481
1393 if ((apic_id > 0) && (irq > 16)) 1482 if ((ioapic_idx > 0) && (irq > 16))
1394 continue; 1483 continue;
1395 1484
1396 /* 1485 /*
@@ -1398,10 +1487,10 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id)
1398 * installed and if it returns 1: 1487 * installed and if it returns 1:
1399 */ 1488 */
1400 if (apic->multi_timer_check && 1489 if (apic->multi_timer_check &&
1401 apic->multi_timer_check(apic_id, irq)) 1490 apic->multi_timer_check(ioapic_idx, irq))
1402 continue; 1491 continue;
1403 1492
1404 set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), 1493 set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
1405 irq_polarity(idx)); 1494 irq_polarity(idx));
1406 1495
1407 io_apic_setup_irq_pin(irq, node, &attr); 1496 io_apic_setup_irq_pin(irq, node, &attr);
@@ -1410,12 +1499,12 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id)
1410 1499
1411static void __init setup_IO_APIC_irqs(void) 1500static void __init setup_IO_APIC_irqs(void)
1412{ 1501{
1413 unsigned int apic_id; 1502 unsigned int ioapic_idx;
1414 1503
1415 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1504 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1416 1505
1417 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) 1506 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
1418 __io_apic_setup_irqs(apic_id); 1507 __io_apic_setup_irqs(ioapic_idx);
1419} 1508}
1420 1509
1421/* 1510/*
@@ -1425,28 +1514,28 @@ static void __init setup_IO_APIC_irqs(void)
1425 */ 1514 */
1426void setup_IO_APIC_irq_extra(u32 gsi) 1515void setup_IO_APIC_irq_extra(u32 gsi)
1427{ 1516{
1428 int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); 1517 int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0);
1429 struct io_apic_irq_attr attr; 1518 struct io_apic_irq_attr attr;
1430 1519
1431 /* 1520 /*
1432 * Convert 'gsi' to 'ioapic.pin'. 1521 * Convert 'gsi' to 'ioapic.pin'.
1433 */ 1522 */
1434 apic_id = mp_find_ioapic(gsi); 1523 ioapic_idx = mp_find_ioapic(gsi);
1435 if (apic_id < 0) 1524 if (ioapic_idx < 0)
1436 return; 1525 return;
1437 1526
1438 pin = mp_find_ioapic_pin(apic_id, gsi); 1527 pin = mp_find_ioapic_pin(ioapic_idx, gsi);
1439 idx = find_irq_entry(apic_id, pin, mp_INT); 1528 idx = find_irq_entry(ioapic_idx, pin, mp_INT);
1440 if (idx == -1) 1529 if (idx == -1)
1441 return; 1530 return;
1442 1531
1443 irq = pin_2_irq(idx, apic_id, pin); 1532 irq = pin_2_irq(idx, ioapic_idx, pin);
1444 1533
1445 /* Only handle the non legacy irqs on secondary ioapics */ 1534 /* Only handle the non legacy irqs on secondary ioapics */
1446 if (apic_id == 0 || irq < NR_IRQS_LEGACY) 1535 if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY)
1447 return; 1536 return;
1448 1537
1449 set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), 1538 set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx),
1450 irq_polarity(idx)); 1539 irq_polarity(idx));
1451 1540
1452 io_apic_setup_irq_pin_once(irq, node, &attr); 1541 io_apic_setup_irq_pin_once(irq, node, &attr);
@@ -1455,8 +1544,8 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1455/* 1544/*
1456 * Set up the timer pin, possibly with the 8259A-master behind. 1545 * Set up the timer pin, possibly with the 8259A-master behind.
1457 */ 1546 */
1458static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, 1547static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1459 int vector) 1548 unsigned int pin, int vector)
1460{ 1549{
1461 struct IO_APIC_route_entry entry; 1550 struct IO_APIC_route_entry entry;
1462 1551
@@ -1487,45 +1576,29 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1487 /* 1576 /*
1488 * Add it to the IO-APIC irq-routing table: 1577 * Add it to the IO-APIC irq-routing table:
1489 */ 1578 */
1490 ioapic_write_entry(apic_id, pin, entry); 1579 ioapic_write_entry(ioapic_idx, pin, entry);
1491} 1580}
1492 1581
1493 1582__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1494__apicdebuginit(void) print_IO_APIC(void)
1495{ 1583{
1496 int apic, i; 1584 int i;
1497 union IO_APIC_reg_00 reg_00; 1585 union IO_APIC_reg_00 reg_00;
1498 union IO_APIC_reg_01 reg_01; 1586 union IO_APIC_reg_01 reg_01;
1499 union IO_APIC_reg_02 reg_02; 1587 union IO_APIC_reg_02 reg_02;
1500 union IO_APIC_reg_03 reg_03; 1588 union IO_APIC_reg_03 reg_03;
1501 unsigned long flags; 1589 unsigned long flags;
1502 struct irq_cfg *cfg;
1503 unsigned int irq;
1504
1505 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1506 for (i = 0; i < nr_ioapics; i++)
1507 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1508 mpc_ioapic_id(i), ioapics[i].nr_registers);
1509
1510 /*
1511 * We are a bit conservative about what we expect. We have to
1512 * know about every hardware change ASAP.
1513 */
1514 printk(KERN_INFO "testing the IO APIC.......................\n");
1515
1516 for (apic = 0; apic < nr_ioapics; apic++) {
1517 1590
1518 raw_spin_lock_irqsave(&ioapic_lock, flags); 1591 raw_spin_lock_irqsave(&ioapic_lock, flags);
1519 reg_00.raw = io_apic_read(apic, 0); 1592 reg_00.raw = io_apic_read(ioapic_idx, 0);
1520 reg_01.raw = io_apic_read(apic, 1); 1593 reg_01.raw = io_apic_read(ioapic_idx, 1);
1521 if (reg_01.bits.version >= 0x10) 1594 if (reg_01.bits.version >= 0x10)
1522 reg_02.raw = io_apic_read(apic, 2); 1595 reg_02.raw = io_apic_read(ioapic_idx, 2);
1523 if (reg_01.bits.version >= 0x20) 1596 if (reg_01.bits.version >= 0x20)
1524 reg_03.raw = io_apic_read(apic, 3); 1597 reg_03.raw = io_apic_read(ioapic_idx, 3);
1525 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 1598 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1526 1599
1527 printk("\n"); 1600 printk("\n");
1528 printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(apic)); 1601 printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
1529 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1602 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1530 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1603 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1531 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1604 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1575,7 +1648,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1575 struct IO_APIC_route_entry entry; 1648 struct IO_APIC_route_entry entry;
1576 struct IR_IO_APIC_route_entry *ir_entry; 1649 struct IR_IO_APIC_route_entry *ir_entry;
1577 1650
1578 entry = ioapic_read_entry(apic, i); 1651 entry = ioapic_read_entry(ioapic_idx, i);
1579 ir_entry = (struct IR_IO_APIC_route_entry *) &entry; 1652 ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
1580 printk(KERN_DEBUG " %02x %04X ", 1653 printk(KERN_DEBUG " %02x %04X ",
1581 i, 1654 i,
@@ -1596,7 +1669,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1596 } else { 1669 } else {
1597 struct IO_APIC_route_entry entry; 1670 struct IO_APIC_route_entry entry;
1598 1671
1599 entry = ioapic_read_entry(apic, i); 1672 entry = ioapic_read_entry(ioapic_idx, i);
1600 printk(KERN_DEBUG " %02x %02X ", 1673 printk(KERN_DEBUG " %02x %02X ",
1601 i, 1674 i,
1602 entry.dest 1675 entry.dest
@@ -1614,12 +1687,38 @@ __apicdebuginit(void) print_IO_APIC(void)
1614 ); 1687 );
1615 } 1688 }
1616 } 1689 }
1617 } 1690}
1691
1692__apicdebuginit(void) print_IO_APICs(void)
1693{
1694 int ioapic_idx;
1695 struct irq_cfg *cfg;
1696 unsigned int irq;
1697 struct irq_chip *chip;
1698
1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
1701 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 mpc_ioapic_id(ioapic_idx),
1703 ioapics[ioapic_idx].nr_registers);
1704
1705 /*
1706 * We are a bit conservative about what we expect. We have to
1707 * know about every hardware change ASAP.
1708 */
1709 printk(KERN_INFO "testing the IO APIC.......................\n");
1710
1711 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
1712 print_IO_APIC(ioapic_idx);
1618 1713
1619 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1714 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1620 for_each_active_irq(irq) { 1715 for_each_active_irq(irq) {
1621 struct irq_pin_list *entry; 1716 struct irq_pin_list *entry;
1622 1717
1718 chip = irq_get_chip(irq);
1719 if (chip != &ioapic_chip)
1720 continue;
1721
1623 cfg = irq_get_chip_data(irq); 1722 cfg = irq_get_chip_data(irq);
1624 if (!cfg) 1723 if (!cfg)
1625 continue; 1724 continue;
@@ -1633,8 +1732,6 @@ __apicdebuginit(void) print_IO_APIC(void)
1633 } 1732 }
1634 1733
1635 printk(KERN_INFO ".................................... done.\n"); 1734 printk(KERN_INFO ".................................... done.\n");
1636
1637 return;
1638} 1735}
1639 1736
1640__apicdebuginit(void) print_APIC_field(int base) 1737__apicdebuginit(void) print_APIC_field(int base)
@@ -1828,7 +1925,7 @@ __apicdebuginit(int) print_ICs(void)
1828 return 0; 1925 return 0;
1829 1926
1830 print_local_APICs(show_lapic); 1927 print_local_APICs(show_lapic);
1831 print_IO_APIC(); 1928 print_IO_APICs();
1832 1929
1833 return 0; 1930 return 0;
1834} 1931}
@@ -1953,7 +2050,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
1953{ 2050{
1954 union IO_APIC_reg_00 reg_00; 2051 union IO_APIC_reg_00 reg_00;
1955 physid_mask_t phys_id_present_map; 2052 physid_mask_t phys_id_present_map;
1956 int apic_id; 2053 int ioapic_idx;
1957 int i; 2054 int i;
1958 unsigned char old_id; 2055 unsigned char old_id;
1959 unsigned long flags; 2056 unsigned long flags;
@@ -1967,21 +2064,20 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
1967 /* 2064 /*
1968 * Set the IOAPIC ID to the value stored in the MPC table. 2065 * Set the IOAPIC ID to the value stored in the MPC table.
1969 */ 2066 */
1970 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { 2067 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
1971
1972 /* Read the register 0 value */ 2068 /* Read the register 0 value */
1973 raw_spin_lock_irqsave(&ioapic_lock, flags); 2069 raw_spin_lock_irqsave(&ioapic_lock, flags);
1974 reg_00.raw = io_apic_read(apic_id, 0); 2070 reg_00.raw = io_apic_read(ioapic_idx, 0);
1975 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2071 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1976 2072
1977 old_id = mpc_ioapic_id(apic_id); 2073 old_id = mpc_ioapic_id(ioapic_idx);
1978 2074
1979 if (mpc_ioapic_id(apic_id) >= get_physical_broadcast()) { 2075 if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
1980 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2076 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1981 apic_id, mpc_ioapic_id(apic_id)); 2077 ioapic_idx, mpc_ioapic_id(ioapic_idx));
1982 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2078 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1983 reg_00.bits.ID); 2079 reg_00.bits.ID);
1984 ioapics[apic_id].mp_config.apicid = reg_00.bits.ID; 2080 ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
1985 } 2081 }
1986 2082
1987 /* 2083 /*
@@ -1990,9 +2086,9 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
1990 * 'stuck on smp_invalidate_needed IPI wait' messages. 2086 * 'stuck on smp_invalidate_needed IPI wait' messages.
1991 */ 2087 */
1992 if (apic->check_apicid_used(&phys_id_present_map, 2088 if (apic->check_apicid_used(&phys_id_present_map,
1993 mpc_ioapic_id(apic_id))) { 2089 mpc_ioapic_id(ioapic_idx))) {
1994 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2090 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1995 apic_id, mpc_ioapic_id(apic_id)); 2091 ioapic_idx, mpc_ioapic_id(ioapic_idx));
1996 for (i = 0; i < get_physical_broadcast(); i++) 2092 for (i = 0; i < get_physical_broadcast(); i++)
1997 if (!physid_isset(i, phys_id_present_map)) 2093 if (!physid_isset(i, phys_id_present_map))
1998 break; 2094 break;
@@ -2001,14 +2097,14 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
2001 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2097 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2002 i); 2098 i);
2003 physid_set(i, phys_id_present_map); 2099 physid_set(i, phys_id_present_map);
2004 ioapics[apic_id].mp_config.apicid = i; 2100 ioapics[ioapic_idx].mp_config.apicid = i;
2005 } else { 2101 } else {
2006 physid_mask_t tmp; 2102 physid_mask_t tmp;
2007 apic->apicid_to_cpu_present(mpc_ioapic_id(apic_id), 2103 apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
2008 &tmp); 2104 &tmp);
2009 apic_printk(APIC_VERBOSE, "Setting %d in the " 2105 apic_printk(APIC_VERBOSE, "Setting %d in the "
2010 "phys_id_present_map\n", 2106 "phys_id_present_map\n",
2011 mpc_ioapic_id(apic_id)); 2107 mpc_ioapic_id(ioapic_idx));
2012 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2108 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2013 } 2109 }
2014 2110
@@ -2016,35 +2112,35 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
2016 * We need to adjust the IRQ routing table 2112 * We need to adjust the IRQ routing table
2017 * if the ID changed. 2113 * if the ID changed.
2018 */ 2114 */
2019 if (old_id != mpc_ioapic_id(apic_id)) 2115 if (old_id != mpc_ioapic_id(ioapic_idx))
2020 for (i = 0; i < mp_irq_entries; i++) 2116 for (i = 0; i < mp_irq_entries; i++)
2021 if (mp_irqs[i].dstapic == old_id) 2117 if (mp_irqs[i].dstapic == old_id)
2022 mp_irqs[i].dstapic 2118 mp_irqs[i].dstapic
2023 = mpc_ioapic_id(apic_id); 2119 = mpc_ioapic_id(ioapic_idx);
2024 2120
2025 /* 2121 /*
2026 * Update the ID register according to the right value 2122 * Update the ID register according to the right value
2027 * from the MPC table if they are different. 2123 * from the MPC table if they are different.
2028 */ 2124 */
2029 if (mpc_ioapic_id(apic_id) == reg_00.bits.ID) 2125 if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
2030 continue; 2126 continue;
2031 2127
2032 apic_printk(APIC_VERBOSE, KERN_INFO 2128 apic_printk(APIC_VERBOSE, KERN_INFO
2033 "...changing IO-APIC physical APIC ID to %d ...", 2129 "...changing IO-APIC physical APIC ID to %d ...",
2034 mpc_ioapic_id(apic_id)); 2130 mpc_ioapic_id(ioapic_idx));
2035 2131
2036 reg_00.bits.ID = mpc_ioapic_id(apic_id); 2132 reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
2037 raw_spin_lock_irqsave(&ioapic_lock, flags); 2133 raw_spin_lock_irqsave(&ioapic_lock, flags);
2038 io_apic_write(apic_id, 0, reg_00.raw); 2134 io_apic_write(ioapic_idx, 0, reg_00.raw);
2039 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2135 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2040 2136
2041 /* 2137 /*
2042 * Sanity check 2138 * Sanity check
2043 */ 2139 */
2044 raw_spin_lock_irqsave(&ioapic_lock, flags); 2140 raw_spin_lock_irqsave(&ioapic_lock, flags);
2045 reg_00.raw = io_apic_read(apic_id, 0); 2141 reg_00.raw = io_apic_read(ioapic_idx, 0);
2046 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2142 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2047 if (reg_00.bits.ID != mpc_ioapic_id(apic_id)) 2143 if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
2048 printk("could not set ID!\n"); 2144 printk("could not set ID!\n");
2049 else 2145 else
2050 apic_printk(APIC_VERBOSE, " ok.\n"); 2146 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2255,7 +2351,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2255 return ret; 2351 return ret;
2256} 2352}
2257 2353
2258#ifdef CONFIG_INTR_REMAP 2354#ifdef CONFIG_IRQ_REMAP
2259 2355
2260/* 2356/*
2261 * Migrate the IO-APIC irq in the presence of intr-remapping. 2357 * Migrate the IO-APIC irq in the presence of intr-remapping.
@@ -2267,6 +2363,9 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2267 * updated vector information), by using a virtual vector (io-apic pin number). 2363 * updated vector information), by using a virtual vector (io-apic pin number).
2268 * Real vector that is used for interrupting cpu will be coming from 2364 * Real vector that is used for interrupting cpu will be coming from
2269 * the interrupt-remapping table entry. 2365 * the interrupt-remapping table entry.
2366 *
2367 * As the migration is a simple atomic update of IRTE, the same mechanism
2368 * is used to migrate MSI irq's in the presence of interrupt-remapping.
2270 */ 2369 */
2271static int 2370static int
2272ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, 2371ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -2291,10 +2390,16 @@ ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2291 irte.dest_id = IRTE_DEST(dest); 2390 irte.dest_id = IRTE_DEST(dest);
2292 2391
2293 /* 2392 /*
2294 * Modified the IRTE and flushes the Interrupt entry cache. 2393 * Atomically updates the IRTE with the new destination, vector
2394 * and flushes the interrupt entry cache.
2295 */ 2395 */
2296 modify_irte(irq, &irte); 2396 modify_irte(irq, &irte);
2297 2397
2398 /*
2399 * After this point, all the interrupts will start arriving
2400 * at the new destination. So, time to cleanup the previous
2401 * vector allocation.
2402 */
2298 if (cfg->move_in_progress) 2403 if (cfg->move_in_progress)
2299 send_cleanup_vector(cfg); 2404 send_cleanup_vector(cfg);
2300 2405
@@ -2407,48 +2512,6 @@ static void ack_apic_edge(struct irq_data *data)
2407 2512
2408atomic_t irq_mis_count; 2513atomic_t irq_mis_count;
2409 2514
2410/*
2411 * IO-APIC versions below 0x20 don't support EOI register.
2412 * For the record, here is the information about various versions:
2413 * 0Xh 82489DX
2414 * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
2415 * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
2416 * 30h-FFh Reserved
2417 *
2418 * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
2419 * version as 0x2. This is an error with documentation and these ICH chips
2420 * use io-apic's of version 0x20.
2421 *
2422 * For IO-APIC's with EOI register, we use that to do an explicit EOI.
2423 * Otherwise, we simulate the EOI message manually by changing the trigger
2424 * mode to edge and then back to level, with RTE being masked during this.
2425*/
2426static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2427{
2428 struct irq_pin_list *entry;
2429 unsigned long flags;
2430
2431 raw_spin_lock_irqsave(&ioapic_lock, flags);
2432 for_each_irq_pin(entry, cfg->irq_2_pin) {
2433 if (mpc_ioapic_ver(entry->apic) >= 0x20) {
2434 /*
2435 * Intr-remapping uses pin number as the virtual vector
2436 * in the RTE. Actual vector is programmed in
2437 * intr-remapping table entry. Hence for the io-apic
2438 * EOI we use the pin number.
2439 */
2440 if (irq_remapped(cfg))
2441 io_apic_eoi(entry->apic, entry->pin);
2442 else
2443 io_apic_eoi(entry->apic, cfg->vector);
2444 } else {
2445 __mask_and_edge_IO_APIC_irq(entry);
2446 __unmask_and_level_IO_APIC_irq(entry);
2447 }
2448 }
2449 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2450}
2451
2452static void ack_apic_level(struct irq_data *data) 2515static void ack_apic_level(struct irq_data *data)
2453{ 2516{
2454 struct irq_cfg *cfg = data->chip_data; 2517 struct irq_cfg *cfg = data->chip_data;
@@ -2552,7 +2615,7 @@ static void ack_apic_level(struct irq_data *data)
2552 } 2615 }
2553} 2616}
2554 2617
2555#ifdef CONFIG_INTR_REMAP 2618#ifdef CONFIG_IRQ_REMAP
2556static void ir_ack_apic_edge(struct irq_data *data) 2619static void ir_ack_apic_edge(struct irq_data *data)
2557{ 2620{
2558 ack_APIC_irq(); 2621 ack_APIC_irq();
@@ -2563,7 +2626,23 @@ static void ir_ack_apic_level(struct irq_data *data)
2563 ack_APIC_irq(); 2626 ack_APIC_irq();
2564 eoi_ioapic_irq(data->irq, data->chip_data); 2627 eoi_ioapic_irq(data->irq, data->chip_data);
2565} 2628}
2566#endif /* CONFIG_INTR_REMAP */ 2629
2630static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
2631{
2632 seq_printf(p, " IR-%s", data->chip->name);
2633}
2634
2635static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2636{
2637 chip->irq_print_chip = ir_print_prefix;
2638 chip->irq_ack = ir_ack_apic_edge;
2639 chip->irq_eoi = ir_ack_apic_level;
2640
2641#ifdef CONFIG_SMP
2642 chip->irq_set_affinity = ir_ioapic_set_affinity;
2643#endif
2644}
2645#endif /* CONFIG_IRQ_REMAP */
2567 2646
2568static struct irq_chip ioapic_chip __read_mostly = { 2647static struct irq_chip ioapic_chip __read_mostly = {
2569 .name = "IO-APIC", 2648 .name = "IO-APIC",
@@ -2578,21 +2657,6 @@ static struct irq_chip ioapic_chip __read_mostly = {
2578 .irq_retrigger = ioapic_retrigger_irq, 2657 .irq_retrigger = ioapic_retrigger_irq,
2579}; 2658};
2580 2659
2581static struct irq_chip ir_ioapic_chip __read_mostly = {
2582 .name = "IR-IO-APIC",
2583 .irq_startup = startup_ioapic_irq,
2584 .irq_mask = mask_ioapic_irq,
2585 .irq_unmask = unmask_ioapic_irq,
2586#ifdef CONFIG_INTR_REMAP
2587 .irq_ack = ir_ack_apic_edge,
2588 .irq_eoi = ir_ack_apic_level,
2589#ifdef CONFIG_SMP
2590 .irq_set_affinity = ir_ioapic_set_affinity,
2591#endif
2592#endif
2593 .irq_retrigger = ioapic_retrigger_irq,
2594};
2595
2596static inline void init_IO_APIC_traps(void) 2660static inline void init_IO_APIC_traps(void)
2597{ 2661{
2598 struct irq_cfg *cfg; 2662 struct irq_cfg *cfg;
@@ -2944,27 +3008,26 @@ static int __init io_apic_bug_finalize(void)
2944 3008
2945late_initcall(io_apic_bug_finalize); 3009late_initcall(io_apic_bug_finalize);
2946 3010
2947static void resume_ioapic_id(int ioapic_id) 3011static void resume_ioapic_id(int ioapic_idx)
2948{ 3012{
2949 unsigned long flags; 3013 unsigned long flags;
2950 union IO_APIC_reg_00 reg_00; 3014 union IO_APIC_reg_00 reg_00;
2951 3015
2952
2953 raw_spin_lock_irqsave(&ioapic_lock, flags); 3016 raw_spin_lock_irqsave(&ioapic_lock, flags);
2954 reg_00.raw = io_apic_read(ioapic_id, 0); 3017 reg_00.raw = io_apic_read(ioapic_idx, 0);
2955 if (reg_00.bits.ID != mpc_ioapic_id(ioapic_id)) { 3018 if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
2956 reg_00.bits.ID = mpc_ioapic_id(ioapic_id); 3019 reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
2957 io_apic_write(ioapic_id, 0, reg_00.raw); 3020 io_apic_write(ioapic_idx, 0, reg_00.raw);
2958 } 3021 }
2959 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 3022 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2960} 3023}
2961 3024
2962static void ioapic_resume(void) 3025static void ioapic_resume(void)
2963{ 3026{
2964 int ioapic_id; 3027 int ioapic_idx;
2965 3028
2966 for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--) 3029 for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--)
2967 resume_ioapic_id(ioapic_id); 3030 resume_ioapic_id(ioapic_idx);
2968 3031
2969 restore_ioapic_entries(); 3032 restore_ioapic_entries();
2970} 3033}
@@ -3144,45 +3207,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3144 3207
3145 return 0; 3208 return 0;
3146} 3209}
3147#ifdef CONFIG_INTR_REMAP
3148/*
3149 * Migrate the MSI irq to another cpumask. This migration is
3150 * done in the process context using interrupt-remapping hardware.
3151 */
3152static int
3153ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3154 bool force)
3155{
3156 struct irq_cfg *cfg = data->chip_data;
3157 unsigned int dest, irq = data->irq;
3158 struct irte irte;
3159
3160 if (get_irte(irq, &irte))
3161 return -1;
3162
3163 if (__ioapic_set_affinity(data, mask, &dest))
3164 return -1;
3165
3166 irte.vector = cfg->vector;
3167 irte.dest_id = IRTE_DEST(dest);
3168
3169 /*
3170 * atomically update the IRTE with the new destination and vector.
3171 */
3172 modify_irte(irq, &irte);
3173
3174 /*
3175 * After this point, all the interrupts will start arriving
3176 * at the new destination. So, time to cleanup the previous
3177 * vector allocation.
3178 */
3179 if (cfg->move_in_progress)
3180 send_cleanup_vector(cfg);
3181
3182 return 0;
3183}
3184
3185#endif
3186#endif /* CONFIG_SMP */ 3210#endif /* CONFIG_SMP */
3187 3211
3188/* 3212/*
@@ -3200,19 +3224,6 @@ static struct irq_chip msi_chip = {
3200 .irq_retrigger = ioapic_retrigger_irq, 3224 .irq_retrigger = ioapic_retrigger_irq,
3201}; 3225};
3202 3226
3203static struct irq_chip msi_ir_chip = {
3204 .name = "IR-PCI-MSI",
3205 .irq_unmask = unmask_msi_irq,
3206 .irq_mask = mask_msi_irq,
3207#ifdef CONFIG_INTR_REMAP
3208 .irq_ack = ir_ack_apic_edge,
3209#ifdef CONFIG_SMP
3210 .irq_set_affinity = ir_msi_set_affinity,
3211#endif
3212#endif
3213 .irq_retrigger = ioapic_retrigger_irq,
3214};
3215
3216/* 3227/*
3217 * Map the PCI dev to the corresponding remapping hardware unit 3228 * Map the PCI dev to the corresponding remapping hardware unit
3218 * and allocate 'nvec' consecutive interrupt-remapping table entries 3229 * and allocate 'nvec' consecutive interrupt-remapping table entries
@@ -3255,7 +3266,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3255 3266
3256 if (irq_remapped(irq_get_chip_data(irq))) { 3267 if (irq_remapped(irq_get_chip_data(irq))) {
3257 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3268 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3258 chip = &msi_ir_chip; 3269 irq_remap_modify_chip_defaults(chip);
3259 } 3270 }
3260 3271
3261 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3272 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3328,7 +3339,7 @@ void native_teardown_msi_irq(unsigned int irq)
3328 destroy_irq(irq); 3339 destroy_irq(irq);
3329} 3340}
3330 3341
3331#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) 3342#ifdef CONFIG_DMAR_TABLE
3332#ifdef CONFIG_SMP 3343#ifdef CONFIG_SMP
3333static int 3344static int
3334dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, 3345dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
@@ -3409,19 +3420,6 @@ static int hpet_msi_set_affinity(struct irq_data *data,
3409 3420
3410#endif /* CONFIG_SMP */ 3421#endif /* CONFIG_SMP */
3411 3422
3412static struct irq_chip ir_hpet_msi_type = {
3413 .name = "IR-HPET_MSI",
3414 .irq_unmask = hpet_msi_unmask,
3415 .irq_mask = hpet_msi_mask,
3416#ifdef CONFIG_INTR_REMAP
3417 .irq_ack = ir_ack_apic_edge,
3418#ifdef CONFIG_SMP
3419 .irq_set_affinity = ir_msi_set_affinity,
3420#endif
3421#endif
3422 .irq_retrigger = ioapic_retrigger_irq,
3423};
3424
3425static struct irq_chip hpet_msi_type = { 3423static struct irq_chip hpet_msi_type = {
3426 .name = "HPET_MSI", 3424 .name = "HPET_MSI",
3427 .irq_unmask = hpet_msi_unmask, 3425 .irq_unmask = hpet_msi_unmask,
@@ -3458,7 +3456,7 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3458 hpet_msi_write(irq_get_handler_data(irq), &msg); 3456 hpet_msi_write(irq_get_handler_data(irq), &msg);
3459 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); 3457 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3460 if (irq_remapped(irq_get_chip_data(irq))) 3458 if (irq_remapped(irq_get_chip_data(irq)))
3461 chip = &ir_hpet_msi_type; 3459 irq_remap_modify_chip_defaults(chip);
3462 3460
3463 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); 3461 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3464 return 0; 3462 return 0;
@@ -3566,26 +3564,25 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3566 return -EINVAL; 3564 return -EINVAL;
3567 ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); 3565 ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
3568 if (!ret) 3566 if (!ret)
3569 setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg, 3567 setup_ioapic_irq(irq, cfg, attr);
3570 attr->trigger, attr->polarity);
3571 return ret; 3568 return ret;
3572} 3569}
3573 3570
3574int io_apic_setup_irq_pin_once(unsigned int irq, int node, 3571int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3575 struct io_apic_irq_attr *attr) 3572 struct io_apic_irq_attr *attr)
3576{ 3573{
3577 unsigned int id = attr->ioapic, pin = attr->ioapic_pin; 3574 unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
3578 int ret; 3575 int ret;
3579 3576
3580 /* Avoid redundant programming */ 3577 /* Avoid redundant programming */
3581 if (test_bit(pin, ioapics[id].pin_programmed)) { 3578 if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
3582 pr_debug("Pin %d-%d already programmed\n", 3579 pr_debug("Pin %d-%d already programmed\n",
3583 mpc_ioapic_id(id), pin); 3580 mpc_ioapic_id(ioapic_idx), pin);
3584 return 0; 3581 return 0;
3585 } 3582 }
3586 ret = io_apic_setup_irq_pin(irq, node, attr); 3583 ret = io_apic_setup_irq_pin(irq, node, attr);
3587 if (!ret) 3584 if (!ret)
3588 set_bit(pin, ioapics[id].pin_programmed); 3585 set_bit(pin, ioapics[ioapic_idx].pin_programmed);
3589 return ret; 3586 return ret;
3590} 3587}
3591 3588
@@ -3621,7 +3618,6 @@ int get_nr_irqs_gsi(void)
3621 return nr_irqs_gsi; 3618 return nr_irqs_gsi;
3622} 3619}
3623 3620
3624#ifdef CONFIG_SPARSE_IRQ
3625int __init arch_probe_nr_irqs(void) 3621int __init arch_probe_nr_irqs(void)
3626{ 3622{
3627 int nr; 3623 int nr;
@@ -3641,7 +3637,6 @@ int __init arch_probe_nr_irqs(void)
3641 3637
3642 return NR_IRQS_LEGACY; 3638 return NR_IRQS_LEGACY;
3643} 3639}
3644#endif
3645 3640
3646int io_apic_set_pci_routing(struct device *dev, int irq, 3641int io_apic_set_pci_routing(struct device *dev, int irq,
3647 struct io_apic_irq_attr *irq_attr) 3642 struct io_apic_irq_attr *irq_attr)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index b5254ad044ab..0787bb3412f4 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -200,14 +200,8 @@ void __init default_setup_apic_routing(void)
200 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support 200 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
201 */ 201 */
202 202
203 if (!cmdline_apic && apic == &apic_default) { 203 if (!cmdline_apic && apic == &apic_default)
204 struct apic *bigsmp = generic_bigsmp_probe(); 204 generic_bigsmp_probe();
205 if (bigsmp) {
206 apic = bigsmp;
207 printk(KERN_INFO "Overriding APIC driver with %s\n",
208 apic->name);
209 }
210 }
211#endif 205#endif
212 206
213 if (apic->setup_apic_routing) 207 if (apic->setup_apic_routing)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index adc66c3a1fef..62ae3001ae02 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
207 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 207 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
208 APIC_DM_INIT; 208 APIC_DM_INIT;
209 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 209 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
210 mdelay(10);
211 210
212 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 211 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
213 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 212 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
@@ -673,18 +672,11 @@ void __cpuinit uv_cpu_init(void)
673/* 672/*
674 * When NMI is received, print a stack trace. 673 * When NMI is received, print a stack trace.
675 */ 674 */
676int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 675int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
677{ 676{
678 unsigned long real_uv_nmi; 677 unsigned long real_uv_nmi;
679 int bid; 678 int bid;
680 679
681 if (reason != DIE_NMIUNKNOWN)
682 return NOTIFY_OK;
683
684 if (in_crash_kexec)
685 /* do nothing if entering the crash kernel */
686 return NOTIFY_OK;
687
688 /* 680 /*
689 * Each blade has an MMR that indicates when an NMI has been sent 681 * Each blade has an MMR that indicates when an NMI has been sent
690 * to cpus on the blade. If an NMI is detected, atomically 682 * to cpus on the blade. If an NMI is detected, atomically
@@ -705,7 +697,7 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
705 } 697 }
706 698
707 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) 699 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
708 return NOTIFY_DONE; 700 return NMI_DONE;
709 701
710 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; 702 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
711 703
@@ -718,17 +710,12 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
718 dump_stack(); 710 dump_stack();
719 spin_unlock(&uv_nmi_lock); 711 spin_unlock(&uv_nmi_lock);
720 712
721 return NOTIFY_STOP; 713 return NMI_HANDLED;
722} 714}
723 715
724static struct notifier_block uv_dump_stack_nmi_nb = {
725 .notifier_call = uv_handle_nmi,
726 .priority = NMI_LOCAL_LOW_PRIOR - 1,
727};
728
729void uv_register_nmi_notifier(void) 716void uv_register_nmi_notifier(void)
730{ 717{
731 if (register_die_notifier(&uv_dump_stack_nmi_nb)) 718 if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv"))
732 printk(KERN_WARNING "UV NMI handler failed to register\n"); 719 printk(KERN_WARNING "UV NMI handler failed to register\n");
733} 720}
734 721
@@ -833,6 +820,10 @@ void __init uv_system_init(void)
833 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; 820 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
834 uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; 821 uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
835 822
823 uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
824 uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ?
825 (m_val == 40 ? 40 : 39) : m_val;
826
836 pnode = uv_apicid_to_pnode(apicid); 827 pnode = uv_apicid_to_pnode(apicid);
837 blade = boot_pnode_to_blade(pnode); 828 blade = boot_pnode_to_blade(pnode);
838 lcpu = uv_blade_info[blade].nr_possible_cpus; 829 lcpu = uv_blade_info[blade].nr_possible_cpus;
@@ -863,8 +854,7 @@ void __init uv_system_init(void)
863 if (uv_node_to_blade[nid] >= 0) 854 if (uv_node_to_blade[nid] >= 0)
864 continue; 855 continue;
865 paddr = node_start_pfn(nid) << PAGE_SHIFT; 856 paddr = node_start_pfn(nid) << PAGE_SHIFT;
866 paddr = uv_soc_phys_ram_to_gpa(paddr); 857 pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
867 pnode = (paddr >> m_val) & pnode_mask;
868 blade = boot_pnode_to_blade(pnode); 858 blade = boot_pnode_to_blade(pnode);
869 uv_node_to_blade[nid] = blade; 859 uv_node_to_blade[nid] = blade;
870 } 860 }
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0371c484bb8a..a46bd383953c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -249,8 +249,6 @@ extern int (*console_blank_hook)(int);
249#define APM_MINOR_DEV 134 249#define APM_MINOR_DEV 134
250 250
251/* 251/*
252 * See Documentation/Config.help for the configuration options.
253 *
254 * Various options can be changed at boot time as follows: 252 * Various options can be changed at boot time as follows:
255 * (We allow underscores for compatibility with the modules code) 253 * (We allow underscores for compatibility with the modules code)
256 * apm=on/off enable/disable APM 254 * apm=on/off enable/disable APM
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6042981d0309..25f24dccdcfa 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -15,6 +15,7 @@ CFLAGS_common.o := $(nostackp)
15obj-y := intel_cacheinfo.o scattered.o topology.o 15obj-y := intel_cacheinfo.o scattered.o topology.o
16obj-y += proc.o capflags.o powerflags.o common.o 16obj-y += proc.o capflags.o powerflags.o common.o
17obj-y += vmware.o hypervisor.o sched.o mshyperv.o 17obj-y += vmware.o hypervisor.o sched.o mshyperv.o
18obj-y += rdrand.o
18 19
19obj-$(CONFIG_X86_32) += bugs.o 20obj-$(CONFIG_X86_32) += bugs.o
20obj-$(CONFIG_X86_64) += bugs_64.o 21obj-$(CONFIG_X86_64) += bugs_64.o
@@ -28,10 +29,15 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
28 29
29obj-$(CONFIG_PERF_EVENTS) += perf_event.o 30obj-$(CONFIG_PERF_EVENTS) += perf_event.o
30 31
32ifdef CONFIG_PERF_EVENTS
33obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
34obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
35endif
36
31obj-$(CONFIG_X86_MCE) += mcheck/ 37obj-$(CONFIG_X86_MCE) += mcheck/
32obj-$(CONFIG_MTRR) += mtrr/ 38obj-$(CONFIG_MTRR) += mtrr/
33 39
34obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 40obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
35 41
36quiet_cmd_mkcapflags = MKCAP $@ 42quiet_cmd_mkcapflags = MKCAP $@
37 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ 43 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b13ed393dfce..c7e46cb35327 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,5 +1,7 @@
1#include <linux/export.h>
1#include <linux/init.h> 2#include <linux/init.h>
2#include <linux/bitops.h> 3#include <linux/bitops.h>
4#include <linux/elf.h>
3#include <linux/mm.h> 5#include <linux/mm.h>
4 6
5#include <linux/io.h> 7#include <linux/io.h>
@@ -410,8 +412,38 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
410#endif 412#endif
411} 413}
412 414
415static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
416{
417 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
418
419 if (c->x86 > 0x10 ||
420 (c->x86 == 0x10 && c->x86_model >= 0x2)) {
421 u64 val;
422
423 rdmsrl(MSR_K7_HWCR, val);
424 if (!(val & BIT(24)))
425 printk(KERN_WARNING FW_BUG "TSC doesn't count "
426 "with P0 frequency!\n");
427 }
428 }
429
430 if (c->x86 == 0x15) {
431 unsigned long upperbit;
432 u32 cpuid, assoc;
433
434 cpuid = cpuid_edx(0x80000005);
435 assoc = cpuid >> 16 & 0xff;
436 upperbit = ((cpuid >> 24) << 10) / assoc;
437
438 va_align.mask = (upperbit - 1) & PAGE_MASK;
439 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
440 }
441}
442
413static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 443static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
414{ 444{
445 u32 dummy;
446
415 early_init_amd_mc(c); 447 early_init_amd_mc(c);
416 448
417 /* 449 /*
@@ -442,22 +474,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
442 } 474 }
443#endif 475#endif
444 476
445 /* We need to do the following only once */ 477 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
446 if (c != &boot_cpu_data)
447 return;
448
449 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
450
451 if (c->x86 > 0x10 ||
452 (c->x86 == 0x10 && c->x86_model >= 0x2)) {
453 u64 val;
454
455 rdmsrl(MSR_K7_HWCR, val);
456 if (!(val & BIT(24)))
457 printk(KERN_WARNING FW_BUG "TSC doesn't count "
458 "with P0 frequency!\n");
459 }
460 }
461} 478}
462 479
463static void __cpuinit init_amd(struct cpuinfo_x86 *c) 480static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -679,6 +696,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
679 .c_size_cache = amd_size_cache, 696 .c_size_cache = amd_size_cache,
680#endif 697#endif
681 .c_early_init = early_init_amd, 698 .c_early_init = early_init_amd,
699 .c_bsp_init = bsp_init_amd,
682 .c_init = init_amd, 700 .c_init = init_amd,
683 .c_x86_vendor = X86_VENDOR_AMD, 701 .c_x86_vendor = X86_VENDOR_AMD,
684}; 702};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22a073d7fbff..aa003b13a831 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -15,13 +15,14 @@
15#include <asm/stackprotector.h> 15#include <asm/stackprotector.h>
16#include <asm/perf_event.h> 16#include <asm/perf_event.h>
17#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
18#include <asm/archrandom.h>
18#include <asm/hypervisor.h> 19#include <asm/hypervisor.h>
19#include <asm/processor.h> 20#include <asm/processor.h>
20#include <asm/sections.h> 21#include <asm/sections.h>
21#include <linux/topology.h> 22#include <linux/topology.h>
22#include <linux/cpumask.h> 23#include <linux/cpumask.h>
23#include <asm/pgtable.h> 24#include <asm/pgtable.h>
24#include <asm/atomic.h> 25#include <linux/atomic.h>
25#include <asm/proto.h> 26#include <asm/proto.h>
26#include <asm/setup.h> 27#include <asm/setup.h>
27#include <asm/apic.h> 28#include <asm/apic.h>
@@ -681,6 +682,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
681 filter_cpuid_features(c, false); 682 filter_cpuid_features(c, false);
682 683
683 setup_smep(c); 684 setup_smep(c);
685
686 if (this_cpu->c_bsp_init)
687 this_cpu->c_bsp_init(c);
684} 688}
685 689
686void __init early_cpu_init(void) 690void __init early_cpu_init(void)
@@ -857,6 +861,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
857#endif 861#endif
858 862
859 init_hypervisor(c); 863 init_hypervisor(c);
864 x86_init_rdrand(c);
860 865
861 /* 866 /*
862 * Clear/Set all flags overriden by options, need do it 867 * Clear/Set all flags overriden by options, need do it
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index e765633f210e..1b22dcc51af4 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -18,6 +18,7 @@ struct cpu_dev {
18 struct cpu_model_info c_models[4]; 18 struct cpu_model_info c_models[4];
19 19
20 void (*c_early_init)(struct cpuinfo_x86 *); 20 void (*c_early_init)(struct cpuinfo_x86 *);
21 void (*c_bsp_init)(struct cpuinfo_x86 *);
21 void (*c_init)(struct cpuinfo_x86 *); 22 void (*c_init)(struct cpuinfo_x86 *);
22 void (*c_identify)(struct cpuinfo_x86 *); 23 void (*c_identify)(struct cpuinfo_x86 *);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); 24 unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ed6086eedf1d..523131213f08 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -47,6 +47,15 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
47 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 47 (c->x86 == 0x6 && c->x86_model >= 0x0e))
48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 48 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
49 49
50 if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) {
51 unsigned lower_word;
52
53 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
54 /* Required by the SDM */
55 sync_core();
56 rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
57 }
58
50 /* 59 /*
51 * Atom erratum AAE44/AAF40/AAG38/AAH41: 60 * Atom erratum AAE44/AAF40/AAG38/AAH41:
52 * 61 *
@@ -55,17 +64,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
55 * need the microcode to have already been loaded... so if it is 64 * need the microcode to have already been loaded... so if it is
56 * not, recommend a BIOS update and disable large pages. 65 * not, recommend a BIOS update and disable large pages.
57 */ 66 */
58 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) { 67 if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
59 u32 ucode, junk; 68 c->microcode < 0x20e) {
60 69 printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
61 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 70 clear_cpu_cap(c, X86_FEATURE_PSE);
62 sync_core();
63 rdmsr(MSR_IA32_UCODE_REV, junk, ucode);
64
65 if (ucode < 0x20e) {
66 printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n");
67 clear_cpu_cap(c, X86_FEATURE_PSE);
68 }
69 } 71 }
70 72
71#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c105c533ed94..a3b0811693c9 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -151,28 +151,17 @@ union _cpuid4_leaf_ecx {
151 u32 full; 151 u32 full;
152}; 152};
153 153
154struct amd_l3_cache { 154struct _cpuid4_info_regs {
155 struct amd_northbridge *nb;
156 unsigned indices;
157 u8 subcaches[4];
158};
159
160struct _cpuid4_info {
161 union _cpuid4_leaf_eax eax; 155 union _cpuid4_leaf_eax eax;
162 union _cpuid4_leaf_ebx ebx; 156 union _cpuid4_leaf_ebx ebx;
163 union _cpuid4_leaf_ecx ecx; 157 union _cpuid4_leaf_ecx ecx;
164 unsigned long size; 158 unsigned long size;
165 struct amd_l3_cache *l3; 159 struct amd_northbridge *nb;
166 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
167}; 160};
168 161
169/* subset of above _cpuid4_info w/o shared_cpu_map */ 162struct _cpuid4_info {
170struct _cpuid4_info_regs { 163 struct _cpuid4_info_regs base;
171 union _cpuid4_leaf_eax eax; 164 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
172 union _cpuid4_leaf_ebx ebx;
173 union _cpuid4_leaf_ecx ecx;
174 unsigned long size;
175 struct amd_l3_cache *l3;
176}; 165};
177 166
178unsigned short num_cache_leaves; 167unsigned short num_cache_leaves;
@@ -314,16 +303,23 @@ struct _cache_attr {
314/* 303/*
315 * L3 cache descriptors 304 * L3 cache descriptors
316 */ 305 */
317static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) 306static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
318{ 307{
308 struct amd_l3_cache *l3 = &nb->l3_cache;
319 unsigned int sc0, sc1, sc2, sc3; 309 unsigned int sc0, sc1, sc2, sc3;
320 u32 val = 0; 310 u32 val = 0;
321 311
322 pci_read_config_dword(l3->nb->misc, 0x1C4, &val); 312 pci_read_config_dword(nb->misc, 0x1C4, &val);
323 313
324 /* calculate subcache sizes */ 314 /* calculate subcache sizes */
325 l3->subcaches[0] = sc0 = !(val & BIT(0)); 315 l3->subcaches[0] = sc0 = !(val & BIT(0));
326 l3->subcaches[1] = sc1 = !(val & BIT(4)); 316 l3->subcaches[1] = sc1 = !(val & BIT(4));
317
318 if (boot_cpu_data.x86 == 0x15) {
319 l3->subcaches[0] = sc0 += !(val & BIT(1));
320 l3->subcaches[1] = sc1 += !(val & BIT(5));
321 }
322
327 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); 323 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
328 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); 324 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
329 325
@@ -333,33 +329,16 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
333static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, 329static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
334 int index) 330 int index)
335{ 331{
336 static struct amd_l3_cache *__cpuinitdata l3_caches;
337 int node; 332 int node;
338 333
339 /* only for L3, and not in virtualized environments */ 334 /* only for L3, and not in virtualized environments */
340 if (index < 3 || amd_nb_num() == 0) 335 if (index < 3)
341 return; 336 return;
342 337
343 /*
344 * Strictly speaking, the amount in @size below is leaked since it is
345 * never freed but this is done only on shutdown so it doesn't matter.
346 */
347 if (!l3_caches) {
348 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
349
350 l3_caches = kzalloc(size, GFP_ATOMIC);
351 if (!l3_caches)
352 return;
353 }
354
355 node = amd_get_nb_id(smp_processor_id()); 338 node = amd_get_nb_id(smp_processor_id());
356 339 this_leaf->nb = node_to_amd_nb(node);
357 if (!l3_caches[node].nb) { 340 if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
358 l3_caches[node].nb = node_to_amd_nb(node); 341 amd_calc_l3_indices(this_leaf->nb);
359 amd_calc_l3_indices(&l3_caches[node]);
360 }
361
362 this_leaf->l3 = &l3_caches[node];
363} 342}
364 343
365/* 344/*
@@ -369,11 +348,11 @@ static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
369 * 348 *
370 * @returns: the disabled index if used or negative value if slot free. 349 * @returns: the disabled index if used or negative value if slot free.
371 */ 350 */
372int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) 351int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
373{ 352{
374 unsigned int reg = 0; 353 unsigned int reg = 0;
375 354
376 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg); 355 pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
377 356
378 /* check whether this slot is activated already */ 357 /* check whether this slot is activated already */
379 if (reg & (3UL << 30)) 358 if (reg & (3UL << 30))
@@ -387,11 +366,10 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
387{ 366{
388 int index; 367 int index;
389 368
390 if (!this_leaf->l3 || 369 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
391 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
392 return -EINVAL; 370 return -EINVAL;
393 371
394 index = amd_get_l3_disable_slot(this_leaf->l3, slot); 372 index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
395 if (index >= 0) 373 if (index >= 0)
396 return sprintf(buf, "%d\n", index); 374 return sprintf(buf, "%d\n", index);
397 375
@@ -408,7 +386,7 @@ show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
408SHOW_CACHE_DISABLE(0) 386SHOW_CACHE_DISABLE(0)
409SHOW_CACHE_DISABLE(1) 387SHOW_CACHE_DISABLE(1)
410 388
411static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, 389static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
412 unsigned slot, unsigned long idx) 390 unsigned slot, unsigned long idx)
413{ 391{
414 int i; 392 int i;
@@ -421,10 +399,10 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
421 for (i = 0; i < 4; i++) { 399 for (i = 0; i < 4; i++) {
422 u32 reg = idx | (i << 20); 400 u32 reg = idx | (i << 20);
423 401
424 if (!l3->subcaches[i]) 402 if (!nb->l3_cache.subcaches[i])
425 continue; 403 continue;
426 404
427 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); 405 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
428 406
429 /* 407 /*
430 * We need to WBINVD on a core on the node containing the L3 408 * We need to WBINVD on a core on the node containing the L3
@@ -434,7 +412,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
434 wbinvd_on_cpu(cpu); 412 wbinvd_on_cpu(cpu);
435 413
436 reg |= BIT(31); 414 reg |= BIT(31);
437 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); 415 pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
438 } 416 }
439} 417}
440 418
@@ -448,24 +426,24 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
448 * 426 *
449 * @return: 0 on success, error status on failure 427 * @return: 0 on success, error status on failure
450 */ 428 */
451int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, 429int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
452 unsigned long index) 430 unsigned long index)
453{ 431{
454 int ret = 0; 432 int ret = 0;
455 433
456 /* check if @slot is already used or the index is already disabled */ 434 /* check if @slot is already used or the index is already disabled */
457 ret = amd_get_l3_disable_slot(l3, slot); 435 ret = amd_get_l3_disable_slot(nb, slot);
458 if (ret >= 0) 436 if (ret >= 0)
459 return -EINVAL; 437 return -EINVAL;
460 438
461 if (index > l3->indices) 439 if (index > nb->l3_cache.indices)
462 return -EINVAL; 440 return -EINVAL;
463 441
464 /* check whether the other slot has disabled the same index already */ 442 /* check whether the other slot has disabled the same index already */
465 if (index == amd_get_l3_disable_slot(l3, !slot)) 443 if (index == amd_get_l3_disable_slot(nb, !slot))
466 return -EINVAL; 444 return -EINVAL;
467 445
468 amd_l3_disable_index(l3, cpu, slot, index); 446 amd_l3_disable_index(nb, cpu, slot, index);
469 447
470 return 0; 448 return 0;
471} 449}
@@ -480,8 +458,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
480 if (!capable(CAP_SYS_ADMIN)) 458 if (!capable(CAP_SYS_ADMIN))
481 return -EPERM; 459 return -EPERM;
482 460
483 if (!this_leaf->l3 || 461 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
484 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
485 return -EINVAL; 462 return -EINVAL;
486 463
487 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 464 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -489,7 +466,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
489 if (strict_strtoul(buf, 10, &val) < 0) 466 if (strict_strtoul(buf, 10, &val) < 0)
490 return -EINVAL; 467 return -EINVAL;
491 468
492 err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val); 469 err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
493 if (err) { 470 if (err) {
494 if (err == -EEXIST) 471 if (err == -EEXIST)
495 printk(KERN_WARNING "L3 disable slot %d in use!\n", 472 printk(KERN_WARNING "L3 disable slot %d in use!\n",
@@ -518,7 +495,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
518static ssize_t 495static ssize_t
519show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu) 496show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
520{ 497{
521 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 498 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
522 return -EINVAL; 499 return -EINVAL;
523 500
524 return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); 501 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
@@ -533,7 +510,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
533 if (!capable(CAP_SYS_ADMIN)) 510 if (!capable(CAP_SYS_ADMIN))
534 return -EPERM; 511 return -EPERM;
535 512
536 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 513 if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
537 return -EINVAL; 514 return -EINVAL;
538 515
539 if (strict_strtoul(buf, 16, &val) < 0) 516 if (strict_strtoul(buf, 16, &val) < 0)
@@ -769,7 +746,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
769 return; 746 return;
770 } 747 }
771 this_leaf = CPUID4_INFO_IDX(cpu, index); 748 this_leaf = CPUID4_INFO_IDX(cpu, index);
772 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 749 num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
773 750
774 if (num_threads_sharing == 1) 751 if (num_threads_sharing == 1)
775 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); 752 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
@@ -820,29 +797,19 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
820 for (i = 0; i < num_cache_leaves; i++) 797 for (i = 0; i < num_cache_leaves; i++)
821 cache_remove_shared_cpu_map(cpu, i); 798 cache_remove_shared_cpu_map(cpu, i);
822 799
823 kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
824 kfree(per_cpu(ici_cpuid4_info, cpu)); 800 kfree(per_cpu(ici_cpuid4_info, cpu));
825 per_cpu(ici_cpuid4_info, cpu) = NULL; 801 per_cpu(ici_cpuid4_info, cpu) = NULL;
826} 802}
827 803
828static int
829__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
830{
831 struct _cpuid4_info_regs *leaf_regs =
832 (struct _cpuid4_info_regs *)this_leaf;
833
834 return cpuid4_cache_lookup_regs(index, leaf_regs);
835}
836
837static void __cpuinit get_cpu_leaves(void *_retval) 804static void __cpuinit get_cpu_leaves(void *_retval)
838{ 805{
839 int j, *retval = _retval, cpu = smp_processor_id(); 806 int j, *retval = _retval, cpu = smp_processor_id();
840 807
841 /* Do cpuid and store the results */ 808 /* Do cpuid and store the results */
842 for (j = 0; j < num_cache_leaves; j++) { 809 for (j = 0; j < num_cache_leaves; j++) {
843 struct _cpuid4_info *this_leaf; 810 struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
844 this_leaf = CPUID4_INFO_IDX(cpu, j); 811
845 *retval = cpuid4_cache_lookup(j, this_leaf); 812 *retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
846 if (unlikely(*retval < 0)) { 813 if (unlikely(*retval < 0)) {
847 int i; 814 int i;
848 815
@@ -900,16 +867,16 @@ static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
900 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 867 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
901} 868}
902 869
903show_one_plus(level, eax.split.level, 0); 870show_one_plus(level, base.eax.split.level, 0);
904show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); 871show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
905show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); 872show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
906show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); 873show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
907show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); 874show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
908 875
909static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, 876static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
910 unsigned int cpu) 877 unsigned int cpu)
911{ 878{
912 return sprintf(buf, "%luK\n", this_leaf->size / 1024); 879 return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
913} 880}
914 881
915static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, 882static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
@@ -946,7 +913,7 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
946static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf, 913static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
947 unsigned int cpu) 914 unsigned int cpu)
948{ 915{
949 switch (this_leaf->eax.split.type) { 916 switch (this_leaf->base.eax.split.type) {
950 case CACHE_TYPE_DATA: 917 case CACHE_TYPE_DATA:
951 return sprintf(buf, "Data\n"); 918 return sprintf(buf, "Data\n");
952 case CACHE_TYPE_INST: 919 case CACHE_TYPE_INST:
@@ -1135,7 +1102,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1135 1102
1136 ktype_cache.default_attrs = default_attrs; 1103 ktype_cache.default_attrs = default_attrs;
1137#ifdef CONFIG_AMD_NB 1104#ifdef CONFIG_AMD_NB
1138 if (this_leaf->l3) 1105 if (this_leaf->base.nb)
1139 ktype_cache.default_attrs = amd_l3_attrs(); 1106 ktype_cache.default_attrs = amd_l3_attrs();
1140#endif 1107#endif
1141 retval = kobject_init_and_add(&(this_object->kobj), 1108 retval = kobject_init_and_add(&(this_object->kobj),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 83930deec3c6..507ea58688e2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -28,6 +28,7 @@
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 */ 29 */
30 30
31#include <linux/export.h>
31#include <linux/kernel.h> 32#include <linux/kernel.h>
32#include <linux/acpi.h> 33#include <linux/acpi.h>
33#include <linux/cper.h> 34#include <linux/cper.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 0ed633c5048b..319882ef848d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
78 78
79static cpumask_var_t mce_inject_cpumask; 79static cpumask_var_t mce_inject_cpumask;
80 80
81static int mce_raise_notify(struct notifier_block *self, 81static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
82 unsigned long val, void *data)
83{ 82{
84 struct die_args *args = (struct die_args *)data;
85 int cpu = smp_processor_id(); 83 int cpu = smp_processor_id();
86 struct mce *m = &__get_cpu_var(injectm); 84 struct mce *m = &__get_cpu_var(injectm);
87 if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 85 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
88 return NOTIFY_DONE; 86 return NMI_DONE;
89 cpumask_clear_cpu(cpu, mce_inject_cpumask); 87 cpumask_clear_cpu(cpu, mce_inject_cpumask);
90 if (m->inject_flags & MCJ_EXCEPTION) 88 if (m->inject_flags & MCJ_EXCEPTION)
91 raise_exception(m, args->regs); 89 raise_exception(m, regs);
92 else if (m->status) 90 else if (m->status)
93 raise_poll(m); 91 raise_poll(m);
94 return NOTIFY_STOP; 92 return NMI_HANDLED;
95} 93}
96 94
97static struct notifier_block mce_raise_nb = {
98 .notifier_call = mce_raise_notify,
99 .priority = NMI_LOCAL_NORMAL_PRIOR,
100};
101
102/* Inject mce on current CPU */ 95/* Inject mce on current CPU */
103static int raise_local(void) 96static int raise_local(void)
104{ 97{
@@ -215,8 +208,9 @@ static int inject_init(void)
215 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) 208 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
216 return -ENOMEM; 209 return -ENOMEM;
217 printk(KERN_INFO "Machine check injector initialized\n"); 210 printk(KERN_INFO "Machine check injector initialized\n");
218 mce_chrdev_ops.write = mce_write; 211 register_mce_write_callback(mce_write);
219 register_die_notifier(&mce_raise_nb); 212 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
213 "mce_notify");
220 return 0; 214 return 0;
221} 215}
222 216
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 08363b042122..2af127d4c3d1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,8 +36,8 @@
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/edac_mce.h>
40#include <linux/irq_work.h> 39#include <linux/irq_work.h>
40#include <linux/export.h>
41 41
42#include <asm/processor.h> 42#include <asm/processor.h>
43#include <asm/mce.h> 43#include <asm/mce.h>
@@ -144,23 +144,20 @@ static struct mce_log mcelog = {
144void mce_log(struct mce *mce) 144void mce_log(struct mce *mce)
145{ 145{
146 unsigned next, entry; 146 unsigned next, entry;
147 int ret = 0;
147 148
148 /* Emit the trace record: */ 149 /* Emit the trace record: */
149 trace_mce_record(mce); 150 trace_mce_record(mce);
150 151
152 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
153 if (ret == NOTIFY_STOP)
154 return;
155
151 mce->finished = 0; 156 mce->finished = 0;
152 wmb(); 157 wmb();
153 for (;;) { 158 for (;;) {
154 entry = rcu_dereference_check_mce(mcelog.next); 159 entry = rcu_dereference_check_mce(mcelog.next);
155 for (;;) { 160 for (;;) {
156 /*
157 * If edac_mce is enabled, it will check the error type
158 * and will process it, if it is a known error.
159 * Otherwise, the error will be sent through mcelog
160 * interface
161 */
162 if (edac_mce_parse(mce))
163 return;
164 161
165 /* 162 /*
166 * When the buffer fills up discard new entries. 163 * When the buffer fills up discard new entries.
@@ -217,8 +214,13 @@ static void print_mce(struct mce *m)
217 pr_cont("MISC %llx ", m->misc); 214 pr_cont("MISC %llx ", m->misc);
218 215
219 pr_cont("\n"); 216 pr_cont("\n");
220 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", 217 /*
221 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); 218 * Note this output is parsed by external tools and old fields
219 * should not be changed.
220 */
221 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
222 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
223 cpu_data(m->extcpu).microcode);
222 224
223 /* 225 /*
224 * Print out human-readable details about the MCE error, 226 * Print out human-readable details about the MCE error,
@@ -551,10 +553,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
551 * Don't get the IP here because it's unlikely to 553 * Don't get the IP here because it's unlikely to
552 * have anything to do with the actual error location. 554 * have anything to do with the actual error location.
553 */ 555 */
554 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { 556 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
555 mce_log(&m); 557 mce_log(&m);
556 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
557 }
558 558
559 /* 559 /*
560 * Clear state for this bank. 560 * Clear state for this bank.
@@ -908,9 +908,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
908 908
909 percpu_inc(mce_exception_count); 909 percpu_inc(mce_exception_count);
910 910
911 if (notify_die(DIE_NMI, "machine check", regs, error_code,
912 18, SIGKILL) == NOTIFY_STOP)
913 goto out;
914 if (!banks) 911 if (!banks)
915 goto out; 912 goto out;
916 913
@@ -1140,6 +1137,15 @@ static void mce_start_timer(unsigned long data)
1140 add_timer_on(t, smp_processor_id()); 1137 add_timer_on(t, smp_processor_id());
1141} 1138}
1142 1139
1140/* Must not be called in IRQ context where del_timer_sync() can deadlock */
1141static void mce_timer_delete_all(void)
1142{
1143 int cpu;
1144
1145 for_each_online_cpu(cpu)
1146 del_timer_sync(&per_cpu(mce_timer, cpu));
1147}
1148
1143static void mce_do_trigger(struct work_struct *work) 1149static void mce_do_trigger(struct work_struct *work)
1144{ 1150{
1145 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 1151 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
@@ -1628,16 +1634,35 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
1628 } 1634 }
1629} 1635}
1630 1636
1631/* Modified in mce-inject.c, so not static or const */ 1637static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
1632struct file_operations mce_chrdev_ops = { 1638 size_t usize, loff_t *off);
1639
1640void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
1641 const char __user *ubuf,
1642 size_t usize, loff_t *off))
1643{
1644 mce_write = fn;
1645}
1646EXPORT_SYMBOL_GPL(register_mce_write_callback);
1647
1648ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
1649 size_t usize, loff_t *off)
1650{
1651 if (mce_write)
1652 return mce_write(filp, ubuf, usize, off);
1653 else
1654 return -EINVAL;
1655}
1656
1657static const struct file_operations mce_chrdev_ops = {
1633 .open = mce_chrdev_open, 1658 .open = mce_chrdev_open,
1634 .release = mce_chrdev_release, 1659 .release = mce_chrdev_release,
1635 .read = mce_chrdev_read, 1660 .read = mce_chrdev_read,
1661 .write = mce_chrdev_write,
1636 .poll = mce_chrdev_poll, 1662 .poll = mce_chrdev_poll,
1637 .unlocked_ioctl = mce_chrdev_ioctl, 1663 .unlocked_ioctl = mce_chrdev_ioctl,
1638 .llseek = no_llseek, 1664 .llseek = no_llseek,
1639}; 1665};
1640EXPORT_SYMBOL_GPL(mce_chrdev_ops);
1641 1666
1642static struct miscdevice mce_chrdev_device = { 1667static struct miscdevice mce_chrdev_device = {
1643 MISC_MCELOG_MINOR, 1668 MISC_MCELOG_MINOR,
@@ -1750,7 +1775,6 @@ static struct syscore_ops mce_syscore_ops = {
1750 1775
1751static void mce_cpu_restart(void *data) 1776static void mce_cpu_restart(void *data)
1752{ 1777{
1753 del_timer_sync(&__get_cpu_var(mce_timer));
1754 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1778 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1755 return; 1779 return;
1756 __mcheck_cpu_init_generic(); 1780 __mcheck_cpu_init_generic();
@@ -1760,16 +1784,15 @@ static void mce_cpu_restart(void *data)
1760/* Reinit MCEs after user configuration changes */ 1784/* Reinit MCEs after user configuration changes */
1761static void mce_restart(void) 1785static void mce_restart(void)
1762{ 1786{
1787 mce_timer_delete_all();
1763 on_each_cpu(mce_cpu_restart, NULL, 1); 1788 on_each_cpu(mce_cpu_restart, NULL, 1);
1764} 1789}
1765 1790
1766/* Toggle features for corrected errors */ 1791/* Toggle features for corrected errors */
1767static void mce_disable_ce(void *all) 1792static void mce_disable_cmci(void *data)
1768{ 1793{
1769 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1794 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1770 return; 1795 return;
1771 if (all)
1772 del_timer_sync(&__get_cpu_var(mce_timer));
1773 cmci_clear(); 1796 cmci_clear();
1774} 1797}
1775 1798
@@ -1852,7 +1875,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
1852 if (mce_ignore_ce ^ !!new) { 1875 if (mce_ignore_ce ^ !!new) {
1853 if (new) { 1876 if (new) {
1854 /* disable ce features */ 1877 /* disable ce features */
1855 on_each_cpu(mce_disable_ce, (void *)1, 1); 1878 mce_timer_delete_all();
1879 on_each_cpu(mce_disable_cmci, NULL, 1);
1856 mce_ignore_ce = 1; 1880 mce_ignore_ce = 1;
1857 } else { 1881 } else {
1858 /* enable ce features */ 1882 /* enable ce features */
@@ -1875,7 +1899,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
1875 if (mce_cmci_disabled ^ !!new) { 1899 if (mce_cmci_disabled ^ !!new) {
1876 if (new) { 1900 if (new) {
1877 /* disable cmci */ 1901 /* disable cmci */
1878 on_each_cpu(mce_disable_ce, NULL, 1); 1902 on_each_cpu(mce_disable_cmci, NULL, 1);
1879 mce_cmci_disabled = 1; 1903 mce_cmci_disabled = 1;
1880 } else { 1904 } else {
1881 /* enable cmci */ 1905 /* enable cmci */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 8694ef56459d..38e49bc95ffc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -28,7 +28,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
28 * cmci_discover_lock protects against parallel discovery attempts 28 * cmci_discover_lock protects against parallel discovery attempts
29 * which could race against each other. 29 * which could race against each other.
30 */ 30 */
31static DEFINE_SPINLOCK(cmci_discover_lock); 31static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
32 32
33#define CMCI_THRESHOLD 1 33#define CMCI_THRESHOLD 1
34 34
@@ -85,7 +85,7 @@ static void cmci_discover(int banks, int boot)
85 int hdr = 0; 85 int hdr = 0;
86 int i; 86 int i;
87 87
88 spin_lock_irqsave(&cmci_discover_lock, flags); 88 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
89 for (i = 0; i < banks; i++) { 89 for (i = 0; i < banks; i++) {
90 u64 val; 90 u64 val;
91 91
@@ -116,7 +116,7 @@ static void cmci_discover(int banks, int boot)
116 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); 116 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
117 } 117 }
118 } 118 }
119 spin_unlock_irqrestore(&cmci_discover_lock, flags); 119 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
120 if (hdr) 120 if (hdr)
121 printk(KERN_CONT "\n"); 121 printk(KERN_CONT "\n");
122} 122}
@@ -150,7 +150,7 @@ void cmci_clear(void)
150 150
151 if (!cmci_supported(&banks)) 151 if (!cmci_supported(&banks))
152 return; 152 return;
153 spin_lock_irqsave(&cmci_discover_lock, flags); 153 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
154 for (i = 0; i < banks; i++) { 154 for (i = 0; i < banks; i++) {
155 if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 155 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
156 continue; 156 continue;
@@ -160,7 +160,7 @@ void cmci_clear(void)
160 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 160 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
161 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 161 __clear_bit(i, __get_cpu_var(mce_banks_owned));
162 } 162 }
163 spin_unlock_irqrestore(&cmci_discover_lock, flags); 163 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
164} 164}
165 165
166/* 166/*
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 27c625178bf1..787e06c84ea6 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -18,6 +18,7 @@
18#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/export.h>
21#include <linux/sysdev.h> 22#include <linux/sysdev.h>
22#include <linux/types.h> 23#include <linux/types.h>
23#include <linux/init.h> 24#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index d944bf6c50e9..0a630dd4b620 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -11,6 +11,8 @@
11 */ 11 */
12 12
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/time.h>
15#include <linux/clocksource.h>
14#include <linux/module.h> 16#include <linux/module.h>
15#include <asm/processor.h> 17#include <asm/processor.h>
16#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
@@ -36,6 +38,25 @@ static bool __init ms_hyperv_platform(void)
36 !memcmp("Microsoft Hv", hyp_signature, 12); 38 !memcmp("Microsoft Hv", hyp_signature, 12);
37} 39}
38 40
41static cycle_t read_hv_clock(struct clocksource *arg)
42{
43 cycle_t current_tick;
44 /*
45 * Read the partition counter to get the current tick count. This count
46 * is set to 0 when the partition is created and is incremented in
47 * 100 nanosecond units.
48 */
49 rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
50 return current_tick;
51}
52
53static struct clocksource hyperv_cs = {
54 .name = "hyperv_clocksource",
55 .rating = 400, /* use this when running on Hyperv*/
56 .read = read_hv_clock,
57 .mask = CLOCKSOURCE_MASK(64),
58};
59
39static void __init ms_hyperv_init_platform(void) 60static void __init ms_hyperv_init_platform(void)
40{ 61{
41 /* 62 /*
@@ -46,6 +67,8 @@ static void __init ms_hyperv_init_platform(void)
46 67
47 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", 68 printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n",
48 ms_hyperv.features, ms_hyperv.hints); 69 ms_hyperv.features, ms_hyperv.hints);
70
71 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
49} 72}
50 73
51const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { 74const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 08119a37e53c..6b96110bb0c3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -149,7 +149,6 @@ struct set_mtrr_data {
149 */ 149 */
150static int mtrr_rendezvous_handler(void *info) 150static int mtrr_rendezvous_handler(void *info)
151{ 151{
152#ifdef CONFIG_SMP
153 struct set_mtrr_data *data = info; 152 struct set_mtrr_data *data = info;
154 153
155 /* 154 /*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
171 } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) { 170 } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
172 mtrr_if->set_all(); 171 mtrr_if->set_all();
173 } 172 }
174#endif
175 return 0; 173 return 0;
176} 174}
177 175
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4ee3abf20ed6..640891014b2a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -32,6 +32,8 @@
32#include <asm/smp.h> 32#include <asm/smp.h>
33#include <asm/alternative.h> 33#include <asm/alternative.h>
34 34
35#include "perf_event.h"
36
35#if 0 37#if 0
36#undef wrmsrl 38#undef wrmsrl
37#define wrmsrl(msr, val) \ 39#define wrmsrl(msr, val) \
@@ -43,283 +45,17 @@ do { \
43} while (0) 45} while (0)
44#endif 46#endif
45 47
46/* 48struct x86_pmu x86_pmu __read_mostly;
47 * | NHM/WSM | SNB |
48 * register -------------------------------
49 * | HT | no HT | HT | no HT |
50 *-----------------------------------------
51 * offcore | core | core | cpu | core |
52 * lbr_sel | core | core | cpu | core |
53 * ld_lat | cpu | core | cpu | core |
54 *-----------------------------------------
55 *
56 * Given that there is a small number of shared regs,
57 * we can pre-allocate their slot in the per-cpu
58 * per-core reg tables.
59 */
60enum extra_reg_type {
61 EXTRA_REG_NONE = -1, /* not used */
62
63 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
64 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
65
66 EXTRA_REG_MAX /* number of entries needed */
67};
68
69struct event_constraint {
70 union {
71 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
72 u64 idxmsk64;
73 };
74 u64 code;
75 u64 cmask;
76 int weight;
77};
78
79struct amd_nb {
80 int nb_id; /* NorthBridge id */
81 int refcnt; /* reference count */
82 struct perf_event *owners[X86_PMC_IDX_MAX];
83 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
84};
85
86struct intel_percore;
87
88#define MAX_LBR_ENTRIES 16
89
90struct cpu_hw_events {
91 /*
92 * Generic x86 PMC bits
93 */
94 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
95 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
96 unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
97 int enabled;
98
99 int n_events;
100 int n_added;
101 int n_txn;
102 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
103 u64 tags[X86_PMC_IDX_MAX];
104 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
105
106 unsigned int group_flag;
107
108 /*
109 * Intel DebugStore bits
110 */
111 struct debug_store *ds;
112 u64 pebs_enabled;
113
114 /*
115 * Intel LBR bits
116 */
117 int lbr_users;
118 void *lbr_context;
119 struct perf_branch_stack lbr_stack;
120 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
121
122 /*
123 * manage shared (per-core, per-cpu) registers
124 * used on Intel NHM/WSM/SNB
125 */
126 struct intel_shared_regs *shared_regs;
127
128 /*
129 * AMD specific bits
130 */
131 struct amd_nb *amd_nb;
132};
133
134#define __EVENT_CONSTRAINT(c, n, m, w) {\
135 { .idxmsk64 = (n) }, \
136 .code = (c), \
137 .cmask = (m), \
138 .weight = (w), \
139}
140
141#define EVENT_CONSTRAINT(c, n, m) \
142 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
143
144/*
145 * Constraint on the Event code.
146 */
147#define INTEL_EVENT_CONSTRAINT(c, n) \
148 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
149
150/*
151 * Constraint on the Event code + UMask + fixed-mask
152 *
153 * filter mask to validate fixed counter events.
154 * the following filters disqualify for fixed counters:
155 * - inv
156 * - edge
157 * - cnt-mask
158 * The other filters are supported by fixed counters.
159 * The any-thread option is supported starting with v3.
160 */
161#define FIXED_EVENT_CONSTRAINT(c, n) \
162 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
163
164/*
165 * Constraint on the Event code + UMask
166 */
167#define INTEL_UEVENT_CONSTRAINT(c, n) \
168 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
169 49
170#define EVENT_CONSTRAINT_END \ 50DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
171 EVENT_CONSTRAINT(0, 0, 0)
172
173#define for_each_event_constraint(e, c) \
174 for ((e) = (c); (e)->weight; (e)++)
175
176/*
177 * Per register state.
178 */
179struct er_account {
180 raw_spinlock_t lock; /* per-core: protect structure */
181 u64 config; /* extra MSR config */
182 u64 reg; /* extra MSR number */
183 atomic_t ref; /* reference count */
184};
185
186/*
187 * Extra registers for specific events.
188 *
189 * Some events need large masks and require external MSRs.
190 * Those extra MSRs end up being shared for all events on
191 * a PMU and sometimes between PMU of sibling HT threads.
192 * In either case, the kernel needs to handle conflicting
193 * accesses to those extra, shared, regs. The data structure
194 * to manage those registers is stored in cpu_hw_event.
195 */
196struct extra_reg {
197 unsigned int event;
198 unsigned int msr;
199 u64 config_mask;
200 u64 valid_mask;
201 int idx; /* per_xxx->regs[] reg index */
202};
203
204#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
205 .event = (e), \
206 .msr = (ms), \
207 .config_mask = (m), \
208 .valid_mask = (vm), \
209 .idx = EXTRA_REG_##i \
210 }
211
212#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
213 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
214
215#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
216
217union perf_capabilities {
218 struct {
219 u64 lbr_format : 6;
220 u64 pebs_trap : 1;
221 u64 pebs_arch_reg : 1;
222 u64 pebs_format : 4;
223 u64 smm_freeze : 1;
224 };
225 u64 capabilities;
226};
227
228/*
229 * struct x86_pmu - generic x86 pmu
230 */
231struct x86_pmu {
232 /*
233 * Generic x86 PMC bits
234 */
235 const char *name;
236 int version;
237 int (*handle_irq)(struct pt_regs *);
238 void (*disable_all)(void);
239 void (*enable_all)(int added);
240 void (*enable)(struct perf_event *);
241 void (*disable)(struct perf_event *);
242 int (*hw_config)(struct perf_event *event);
243 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
244 unsigned eventsel;
245 unsigned perfctr;
246 u64 (*event_map)(int);
247 int max_events;
248 int num_counters;
249 int num_counters_fixed;
250 int cntval_bits;
251 u64 cntval_mask;
252 int apic;
253 u64 max_period;
254 struct event_constraint *
255 (*get_event_constraints)(struct cpu_hw_events *cpuc,
256 struct perf_event *event);
257
258 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
259 struct perf_event *event);
260 struct event_constraint *event_constraints;
261 void (*quirks)(void);
262 int perfctr_second_write;
263
264 int (*cpu_prepare)(int cpu);
265 void (*cpu_starting)(int cpu);
266 void (*cpu_dying)(int cpu);
267 void (*cpu_dead)(int cpu);
268
269 /*
270 * Intel Arch Perfmon v2+
271 */
272 u64 intel_ctrl;
273 union perf_capabilities intel_cap;
274
275 /*
276 * Intel DebugStore bits
277 */
278 int bts, pebs;
279 int bts_active, pebs_active;
280 int pebs_record_size;
281 void (*drain_pebs)(struct pt_regs *regs);
282 struct event_constraint *pebs_constraints;
283
284 /*
285 * Intel LBR
286 */
287 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
288 int lbr_nr; /* hardware stack size */
289
290 /*
291 * Extra registers for events
292 */
293 struct extra_reg *extra_regs;
294 unsigned int er_flags;
295};
296
297#define ERF_NO_HT_SHARING 1
298#define ERF_HAS_RSP_1 2
299
300static struct x86_pmu x86_pmu __read_mostly;
301
302static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
303 .enabled = 1, 51 .enabled = 1,
304}; 52};
305 53
306static int x86_perf_event_set_period(struct perf_event *event); 54u64 __read_mostly hw_cache_event_ids
307
308/*
309 * Generalized hw caching related hw_event table, filled
310 * in on a per model basis. A value of 0 means
311 * 'not supported', -1 means 'hw_event makes no sense on
312 * this CPU', any other value means the raw hw_event
313 * ID.
314 */
315
316#define C(x) PERF_COUNT_HW_CACHE_##x
317
318static u64 __read_mostly hw_cache_event_ids
319 [PERF_COUNT_HW_CACHE_MAX] 55 [PERF_COUNT_HW_CACHE_MAX]
320 [PERF_COUNT_HW_CACHE_OP_MAX] 56 [PERF_COUNT_HW_CACHE_OP_MAX]
321 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 57 [PERF_COUNT_HW_CACHE_RESULT_MAX];
322static u64 __read_mostly hw_cache_extra_regs 58u64 __read_mostly hw_cache_extra_regs
323 [PERF_COUNT_HW_CACHE_MAX] 59 [PERF_COUNT_HW_CACHE_MAX]
324 [PERF_COUNT_HW_CACHE_OP_MAX] 60 [PERF_COUNT_HW_CACHE_OP_MAX]
325 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 61 [PERF_COUNT_HW_CACHE_RESULT_MAX];
@@ -329,8 +65,7 @@ static u64 __read_mostly hw_cache_extra_regs
329 * Can only be executed on the CPU where the event is active. 65 * Can only be executed on the CPU where the event is active.
330 * Returns the delta events processed. 66 * Returns the delta events processed.
331 */ 67 */
332static u64 68u64 x86_perf_event_update(struct perf_event *event)
333x86_perf_event_update(struct perf_event *event)
334{ 69{
335 struct hw_perf_event *hwc = &event->hw; 70 struct hw_perf_event *hwc = &event->hw;
336 int shift = 64 - x86_pmu.cntval_bits; 71 int shift = 64 - x86_pmu.cntval_bits;
@@ -373,30 +108,6 @@ again:
373 return new_raw_count; 108 return new_raw_count;
374} 109}
375 110
376static inline int x86_pmu_addr_offset(int index)
377{
378 int offset;
379
380 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
381 alternative_io(ASM_NOP2,
382 "shll $1, %%eax",
383 X86_FEATURE_PERFCTR_CORE,
384 "=a" (offset),
385 "a" (index));
386
387 return offset;
388}
389
390static inline unsigned int x86_pmu_config_addr(int index)
391{
392 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
393}
394
395static inline unsigned int x86_pmu_event_addr(int index)
396{
397 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
398}
399
400/* 111/*
401 * Find and validate any extra registers to set up. 112 * Find and validate any extra registers to set up.
402 */ 113 */
@@ -532,9 +243,6 @@ msr_fail:
532 return false; 243 return false;
533} 244}
534 245
535static void reserve_ds_buffers(void);
536static void release_ds_buffers(void);
537
538static void hw_perf_event_destroy(struct perf_event *event) 246static void hw_perf_event_destroy(struct perf_event *event)
539{ 247{
540 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 248 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
@@ -583,7 +291,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
583 return x86_pmu_extra_regs(val, event); 291 return x86_pmu_extra_regs(val, event);
584} 292}
585 293
586static int x86_setup_perfctr(struct perf_event *event) 294int x86_setup_perfctr(struct perf_event *event)
587{ 295{
588 struct perf_event_attr *attr = &event->attr; 296 struct perf_event_attr *attr = &event->attr;
589 struct hw_perf_event *hwc = &event->hw; 297 struct hw_perf_event *hwc = &event->hw;
@@ -647,7 +355,7 @@ static int x86_setup_perfctr(struct perf_event *event)
647 return 0; 355 return 0;
648} 356}
649 357
650static int x86_pmu_hw_config(struct perf_event *event) 358int x86_pmu_hw_config(struct perf_event *event)
651{ 359{
652 if (event->attr.precise_ip) { 360 if (event->attr.precise_ip) {
653 int precise = 0; 361 int precise = 0;
@@ -723,7 +431,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
723 return x86_pmu.hw_config(event); 431 return x86_pmu.hw_config(event);
724} 432}
725 433
726static void x86_pmu_disable_all(void) 434void x86_pmu_disable_all(void)
727{ 435{
728 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 436 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
729 int idx; 437 int idx;
@@ -758,15 +466,7 @@ static void x86_pmu_disable(struct pmu *pmu)
758 x86_pmu.disable_all(); 466 x86_pmu.disable_all();
759} 467}
760 468
761static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 469void x86_pmu_enable_all(int added)
762 u64 enable_mask)
763{
764 if (hwc->extra_reg.reg)
765 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
766 wrmsrl(hwc->config_base, hwc->config | enable_mask);
767}
768
769static void x86_pmu_enable_all(int added)
770{ 470{
771 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
772 int idx; 472 int idx;
@@ -788,7 +488,7 @@ static inline int is_x86_event(struct perf_event *event)
788 return event->pmu == &pmu; 488 return event->pmu == &pmu;
789} 489}
790 490
791static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 491int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
792{ 492{
793 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 493 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
794 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 494 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -959,7 +659,6 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
959} 659}
960 660
961static void x86_pmu_start(struct perf_event *event, int flags); 661static void x86_pmu_start(struct perf_event *event, int flags);
962static void x86_pmu_stop(struct perf_event *event, int flags);
963 662
964static void x86_pmu_enable(struct pmu *pmu) 663static void x86_pmu_enable(struct pmu *pmu)
965{ 664{
@@ -1031,21 +730,13 @@ static void x86_pmu_enable(struct pmu *pmu)
1031 x86_pmu.enable_all(added); 730 x86_pmu.enable_all(added);
1032} 731}
1033 732
1034static inline void x86_pmu_disable_event(struct perf_event *event)
1035{
1036 struct hw_perf_event *hwc = &event->hw;
1037
1038 wrmsrl(hwc->config_base, hwc->config);
1039}
1040
1041static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 733static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1042 734
1043/* 735/*
1044 * Set the next IRQ period, based on the hwc->period_left value. 736 * Set the next IRQ period, based on the hwc->period_left value.
1045 * To be called with the event disabled in hw: 737 * To be called with the event disabled in hw:
1046 */ 738 */
1047static int 739int x86_perf_event_set_period(struct perf_event *event)
1048x86_perf_event_set_period(struct perf_event *event)
1049{ 740{
1050 struct hw_perf_event *hwc = &event->hw; 741 struct hw_perf_event *hwc = &event->hw;
1051 s64 left = local64_read(&hwc->period_left); 742 s64 left = local64_read(&hwc->period_left);
@@ -1105,7 +796,7 @@ x86_perf_event_set_period(struct perf_event *event)
1105 return ret; 796 return ret;
1106} 797}
1107 798
1108static void x86_pmu_enable_event(struct perf_event *event) 799void x86_pmu_enable_event(struct perf_event *event)
1109{ 800{
1110 if (__this_cpu_read(cpu_hw_events.enabled)) 801 if (__this_cpu_read(cpu_hw_events.enabled))
1111 __x86_pmu_enable_event(&event->hw, 802 __x86_pmu_enable_event(&event->hw,
@@ -1244,7 +935,7 @@ void perf_event_print_debug(void)
1244 local_irq_restore(flags); 935 local_irq_restore(flags);
1245} 936}
1246 937
1247static void x86_pmu_stop(struct perf_event *event, int flags) 938void x86_pmu_stop(struct perf_event *event, int flags)
1248{ 939{
1249 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 940 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1250 struct hw_perf_event *hwc = &event->hw; 941 struct hw_perf_event *hwc = &event->hw;
@@ -1297,7 +988,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1297 perf_event_update_userpage(event); 988 perf_event_update_userpage(event);
1298} 989}
1299 990
1300static int x86_pmu_handle_irq(struct pt_regs *regs) 991int x86_pmu_handle_irq(struct pt_regs *regs)
1301{ 992{
1302 struct perf_sample_data data; 993 struct perf_sample_data data;
1303 struct cpu_hw_events *cpuc; 994 struct cpu_hw_events *cpuc;
@@ -1367,109 +1058,28 @@ void perf_events_lapic_init(void)
1367 apic_write(APIC_LVTPC, APIC_DM_NMI); 1058 apic_write(APIC_LVTPC, APIC_DM_NMI);
1368} 1059}
1369 1060
1370struct pmu_nmi_state {
1371 unsigned int marked;
1372 int handled;
1373};
1374
1375static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
1376
1377static int __kprobes 1061static int __kprobes
1378perf_event_nmi_handler(struct notifier_block *self, 1062perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
1379 unsigned long cmd, void *__args)
1380{ 1063{
1381 struct die_args *args = __args;
1382 unsigned int this_nmi;
1383 int handled;
1384
1385 if (!atomic_read(&active_events)) 1064 if (!atomic_read(&active_events))
1386 return NOTIFY_DONE; 1065 return NMI_DONE;
1387
1388 switch (cmd) {
1389 case DIE_NMI:
1390 break;
1391 case DIE_NMIUNKNOWN:
1392 this_nmi = percpu_read(irq_stat.__nmi_count);
1393 if (this_nmi != __this_cpu_read(pmu_nmi.marked))
1394 /* let the kernel handle the unknown nmi */
1395 return NOTIFY_DONE;
1396 /*
1397 * This one is a PMU back-to-back nmi. Two events
1398 * trigger 'simultaneously' raising two back-to-back
1399 * NMIs. If the first NMI handles both, the latter
1400 * will be empty and daze the CPU. So, we drop it to
1401 * avoid false-positive 'unknown nmi' messages.
1402 */
1403 return NOTIFY_STOP;
1404 default:
1405 return NOTIFY_DONE;
1406 }
1407
1408 handled = x86_pmu.handle_irq(args->regs);
1409 if (!handled)
1410 return NOTIFY_DONE;
1411
1412 this_nmi = percpu_read(irq_stat.__nmi_count);
1413 if ((handled > 1) ||
1414 /* the next nmi could be a back-to-back nmi */
1415 ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
1416 (__this_cpu_read(pmu_nmi.handled) > 1))) {
1417 /*
1418 * We could have two subsequent back-to-back nmis: The
1419 * first handles more than one counter, the 2nd
1420 * handles only one counter and the 3rd handles no
1421 * counter.
1422 *
1423 * This is the 2nd nmi because the previous was
1424 * handling more than one counter. We will mark the
1425 * next (3rd) and then drop it if unhandled.
1426 */
1427 __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
1428 __this_cpu_write(pmu_nmi.handled, handled);
1429 }
1430 1066
1431 return NOTIFY_STOP; 1067 return x86_pmu.handle_irq(regs);
1432} 1068}
1433 1069
1434static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1070struct event_constraint emptyconstraint;
1435 .notifier_call = perf_event_nmi_handler, 1071struct event_constraint unconstrained;
1436 .next = NULL,
1437 .priority = NMI_LOCAL_LOW_PRIOR,
1438};
1439
1440static struct event_constraint unconstrained;
1441static struct event_constraint emptyconstraint;
1442
1443static struct event_constraint *
1444x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1445{
1446 struct event_constraint *c;
1447
1448 if (x86_pmu.event_constraints) {
1449 for_each_event_constraint(c, x86_pmu.event_constraints) {
1450 if ((event->hw.config & c->cmask) == c->code)
1451 return c;
1452 }
1453 }
1454
1455 return &unconstrained;
1456}
1457
1458#include "perf_event_amd.c"
1459#include "perf_event_p6.c"
1460#include "perf_event_p4.c"
1461#include "perf_event_intel_lbr.c"
1462#include "perf_event_intel_ds.c"
1463#include "perf_event_intel.c"
1464 1072
1465static int __cpuinit 1073static int __cpuinit
1466x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) 1074x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1467{ 1075{
1468 unsigned int cpu = (long)hcpu; 1076 unsigned int cpu = (long)hcpu;
1077 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1469 int ret = NOTIFY_OK; 1078 int ret = NOTIFY_OK;
1470 1079
1471 switch (action & ~CPU_TASKS_FROZEN) { 1080 switch (action & ~CPU_TASKS_FROZEN) {
1472 case CPU_UP_PREPARE: 1081 case CPU_UP_PREPARE:
1082 cpuc->kfree_on_online = NULL;
1473 if (x86_pmu.cpu_prepare) 1083 if (x86_pmu.cpu_prepare)
1474 ret = x86_pmu.cpu_prepare(cpu); 1084 ret = x86_pmu.cpu_prepare(cpu);
1475 break; 1085 break;
@@ -1479,6 +1089,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1479 x86_pmu.cpu_starting(cpu); 1089 x86_pmu.cpu_starting(cpu);
1480 break; 1090 break;
1481 1091
1092 case CPU_ONLINE:
1093 kfree(cpuc->kfree_on_online);
1094 break;
1095
1482 case CPU_DYING: 1096 case CPU_DYING:
1483 if (x86_pmu.cpu_dying) 1097 if (x86_pmu.cpu_dying)
1484 x86_pmu.cpu_dying(cpu); 1098 x86_pmu.cpu_dying(cpu);
@@ -1557,7 +1171,7 @@ static int __init init_hw_perf_events(void)
1557 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; 1171 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1558 1172
1559 perf_events_lapic_init(); 1173 perf_events_lapic_init();
1560 register_die_notifier(&perf_event_nmi_notifier); 1174 register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
1561 1175
1562 unconstrained = (struct event_constraint) 1176 unconstrained = (struct event_constraint)
1563 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1177 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
@@ -1900,6 +1514,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1900 1514
1901 perf_callchain_store(entry, regs->ip); 1515 perf_callchain_store(entry, regs->ip);
1902 1516
1517 if (!current->mm)
1518 return;
1519
1903 if (perf_callchain_user32(regs, entry)) 1520 if (perf_callchain_user32(regs, entry))
1904 return; 1521 return;
1905 1522
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
new file mode 100644
index 000000000000..b9698d40ac4b
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -0,0 +1,505 @@
1/*
2 * Performance events x86 architecture header
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14
15#include <linux/perf_event.h>
16
17/*
18 * | NHM/WSM | SNB |
19 * register -------------------------------
20 * | HT | no HT | HT | no HT |
21 *-----------------------------------------
22 * offcore | core | core | cpu | core |
23 * lbr_sel | core | core | cpu | core |
24 * ld_lat | cpu | core | cpu | core |
25 *-----------------------------------------
26 *
27 * Given that there is a small number of shared regs,
28 * we can pre-allocate their slot in the per-cpu
29 * per-core reg tables.
30 */
31enum extra_reg_type {
32 EXTRA_REG_NONE = -1, /* not used */
33
34 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
35 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
36
37 EXTRA_REG_MAX /* number of entries needed */
38};
39
40struct event_constraint {
41 union {
42 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
43 u64 idxmsk64;
44 };
45 u64 code;
46 u64 cmask;
47 int weight;
48};
49
50struct amd_nb {
51 int nb_id; /* NorthBridge id */
52 int refcnt; /* reference count */
53 struct perf_event *owners[X86_PMC_IDX_MAX];
54 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
55};
56
57/* The maximal number of PEBS events: */
58#define MAX_PEBS_EVENTS 4
59
60/*
61 * A debug store configuration.
62 *
63 * We only support architectures that use 64bit fields.
64 */
65struct debug_store {
66 u64 bts_buffer_base;
67 u64 bts_index;
68 u64 bts_absolute_maximum;
69 u64 bts_interrupt_threshold;
70 u64 pebs_buffer_base;
71 u64 pebs_index;
72 u64 pebs_absolute_maximum;
73 u64 pebs_interrupt_threshold;
74 u64 pebs_event_reset[MAX_PEBS_EVENTS];
75};
76
77/*
78 * Per register state.
79 */
80struct er_account {
81 raw_spinlock_t lock; /* per-core: protect structure */
82 u64 config; /* extra MSR config */
83 u64 reg; /* extra MSR number */
84 atomic_t ref; /* reference count */
85};
86
87/*
88 * Per core/cpu state
89 *
90 * Used to coordinate shared registers between HT threads or
91 * among events on a single PMU.
92 */
93struct intel_shared_regs {
94 struct er_account regs[EXTRA_REG_MAX];
95 int refcnt; /* per-core: #HT threads */
96 unsigned core_id; /* per-core: core id */
97};
98
99#define MAX_LBR_ENTRIES 16
100
101struct cpu_hw_events {
102 /*
103 * Generic x86 PMC bits
104 */
105 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
106 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
107 unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
108 int enabled;
109
110 int n_events;
111 int n_added;
112 int n_txn;
113 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
114 u64 tags[X86_PMC_IDX_MAX];
115 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
116
117 unsigned int group_flag;
118
119 /*
120 * Intel DebugStore bits
121 */
122 struct debug_store *ds;
123 u64 pebs_enabled;
124
125 /*
126 * Intel LBR bits
127 */
128 int lbr_users;
129 void *lbr_context;
130 struct perf_branch_stack lbr_stack;
131 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
132
133 /*
134 * Intel host/guest exclude bits
135 */
136 u64 intel_ctrl_guest_mask;
137 u64 intel_ctrl_host_mask;
138 struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
139
140 /*
141 * manage shared (per-core, per-cpu) registers
142 * used on Intel NHM/WSM/SNB
143 */
144 struct intel_shared_regs *shared_regs;
145
146 /*
147 * AMD specific bits
148 */
149 struct amd_nb *amd_nb;
150
151 void *kfree_on_online;
152};
153
154#define __EVENT_CONSTRAINT(c, n, m, w) {\
155 { .idxmsk64 = (n) }, \
156 .code = (c), \
157 .cmask = (m), \
158 .weight = (w), \
159}
160
161#define EVENT_CONSTRAINT(c, n, m) \
162 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
163
164/*
165 * Constraint on the Event code.
166 */
167#define INTEL_EVENT_CONSTRAINT(c, n) \
168 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
169
170/*
171 * Constraint on the Event code + UMask + fixed-mask
172 *
173 * filter mask to validate fixed counter events.
174 * the following filters disqualify for fixed counters:
175 * - inv
176 * - edge
177 * - cnt-mask
178 * The other filters are supported by fixed counters.
179 * The any-thread option is supported starting with v3.
180 */
181#define FIXED_EVENT_CONSTRAINT(c, n) \
182 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
183
184/*
185 * Constraint on the Event code + UMask
186 */
187#define INTEL_UEVENT_CONSTRAINT(c, n) \
188 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
189
190#define EVENT_CONSTRAINT_END \
191 EVENT_CONSTRAINT(0, 0, 0)
192
193#define for_each_event_constraint(e, c) \
194 for ((e) = (c); (e)->weight; (e)++)
195
196/*
197 * Extra registers for specific events.
198 *
199 * Some events need large masks and require external MSRs.
200 * Those extra MSRs end up being shared for all events on
201 * a PMU and sometimes between PMU of sibling HT threads.
202 * In either case, the kernel needs to handle conflicting
203 * accesses to those extra, shared, regs. The data structure
204 * to manage those registers is stored in cpu_hw_event.
205 */
206struct extra_reg {
207 unsigned int event;
208 unsigned int msr;
209 u64 config_mask;
210 u64 valid_mask;
211 int idx; /* per_xxx->regs[] reg index */
212};
213
214#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
215 .event = (e), \
216 .msr = (ms), \
217 .config_mask = (m), \
218 .valid_mask = (vm), \
219 .idx = EXTRA_REG_##i \
220 }
221
222#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
223 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
224
225#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
226
227union perf_capabilities {
228 struct {
229 u64 lbr_format:6;
230 u64 pebs_trap:1;
231 u64 pebs_arch_reg:1;
232 u64 pebs_format:4;
233 u64 smm_freeze:1;
234 };
235 u64 capabilities;
236};
237
238/*
239 * struct x86_pmu - generic x86 pmu
240 */
241struct x86_pmu {
242 /*
243 * Generic x86 PMC bits
244 */
245 const char *name;
246 int version;
247 int (*handle_irq)(struct pt_regs *);
248 void (*disable_all)(void);
249 void (*enable_all)(int added);
250 void (*enable)(struct perf_event *);
251 void (*disable)(struct perf_event *);
252 int (*hw_config)(struct perf_event *event);
253 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
254 unsigned eventsel;
255 unsigned perfctr;
256 u64 (*event_map)(int);
257 int max_events;
258 int num_counters;
259 int num_counters_fixed;
260 int cntval_bits;
261 u64 cntval_mask;
262 int apic;
263 u64 max_period;
264 struct event_constraint *
265 (*get_event_constraints)(struct cpu_hw_events *cpuc,
266 struct perf_event *event);
267
268 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
269 struct perf_event *event);
270 struct event_constraint *event_constraints;
271 void (*quirks)(void);
272 int perfctr_second_write;
273
274 int (*cpu_prepare)(int cpu);
275 void (*cpu_starting)(int cpu);
276 void (*cpu_dying)(int cpu);
277 void (*cpu_dead)(int cpu);
278
279 /*
280 * Intel Arch Perfmon v2+
281 */
282 u64 intel_ctrl;
283 union perf_capabilities intel_cap;
284
285 /*
286 * Intel DebugStore bits
287 */
288 int bts, pebs;
289 int bts_active, pebs_active;
290 int pebs_record_size;
291 void (*drain_pebs)(struct pt_regs *regs);
292 struct event_constraint *pebs_constraints;
293
294 /*
295 * Intel LBR
296 */
297 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
298 int lbr_nr; /* hardware stack size */
299
300 /*
301 * Extra registers for events
302 */
303 struct extra_reg *extra_regs;
304 unsigned int er_flags;
305
306 /*
307 * Intel host/guest support (KVM)
308 */
309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
310};
311
312#define ERF_NO_HT_SHARING 1
313#define ERF_HAS_RSP_1 2
314
315extern struct x86_pmu x86_pmu __read_mostly;
316
317DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
318
319int x86_perf_event_set_period(struct perf_event *event);
320
321/*
322 * Generalized hw caching related hw_event table, filled
323 * in on a per model basis. A value of 0 means
324 * 'not supported', -1 means 'hw_event makes no sense on
325 * this CPU', any other value means the raw hw_event
326 * ID.
327 */
328
329#define C(x) PERF_COUNT_HW_CACHE_##x
330
331extern u64 __read_mostly hw_cache_event_ids
332 [PERF_COUNT_HW_CACHE_MAX]
333 [PERF_COUNT_HW_CACHE_OP_MAX]
334 [PERF_COUNT_HW_CACHE_RESULT_MAX];
335extern u64 __read_mostly hw_cache_extra_regs
336 [PERF_COUNT_HW_CACHE_MAX]
337 [PERF_COUNT_HW_CACHE_OP_MAX]
338 [PERF_COUNT_HW_CACHE_RESULT_MAX];
339
340u64 x86_perf_event_update(struct perf_event *event);
341
342static inline int x86_pmu_addr_offset(int index)
343{
344 int offset;
345
346 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
347 alternative_io(ASM_NOP2,
348 "shll $1, %%eax",
349 X86_FEATURE_PERFCTR_CORE,
350 "=a" (offset),
351 "a" (index));
352
353 return offset;
354}
355
356static inline unsigned int x86_pmu_config_addr(int index)
357{
358 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
359}
360
361static inline unsigned int x86_pmu_event_addr(int index)
362{
363 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
364}
365
366int x86_setup_perfctr(struct perf_event *event);
367
368int x86_pmu_hw_config(struct perf_event *event);
369
370void x86_pmu_disable_all(void);
371
372static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
373 u64 enable_mask)
374{
375 if (hwc->extra_reg.reg)
376 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
377 wrmsrl(hwc->config_base, hwc->config | enable_mask);
378}
379
380void x86_pmu_enable_all(int added);
381
382int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
383
384void x86_pmu_stop(struct perf_event *event, int flags);
385
386static inline void x86_pmu_disable_event(struct perf_event *event)
387{
388 struct hw_perf_event *hwc = &event->hw;
389
390 wrmsrl(hwc->config_base, hwc->config);
391}
392
393void x86_pmu_enable_event(struct perf_event *event);
394
395int x86_pmu_handle_irq(struct pt_regs *regs);
396
397extern struct event_constraint emptyconstraint;
398
399extern struct event_constraint unconstrained;
400
401#ifdef CONFIG_CPU_SUP_AMD
402
403int amd_pmu_init(void);
404
405#else /* CONFIG_CPU_SUP_AMD */
406
407static inline int amd_pmu_init(void)
408{
409 return 0;
410}
411
412#endif /* CONFIG_CPU_SUP_AMD */
413
414#ifdef CONFIG_CPU_SUP_INTEL
415
416int intel_pmu_save_and_restart(struct perf_event *event);
417
418struct event_constraint *
419x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event);
420
421struct intel_shared_regs *allocate_shared_regs(int cpu);
422
423int intel_pmu_init(void);
424
425void init_debug_store_on_cpu(int cpu);
426
427void fini_debug_store_on_cpu(int cpu);
428
429void release_ds_buffers(void);
430
431void reserve_ds_buffers(void);
432
433extern struct event_constraint bts_constraint;
434
435void intel_pmu_enable_bts(u64 config);
436
437void intel_pmu_disable_bts(void);
438
439int intel_pmu_drain_bts_buffer(void);
440
441extern struct event_constraint intel_core2_pebs_event_constraints[];
442
443extern struct event_constraint intel_atom_pebs_event_constraints[];
444
445extern struct event_constraint intel_nehalem_pebs_event_constraints[];
446
447extern struct event_constraint intel_westmere_pebs_event_constraints[];
448
449extern struct event_constraint intel_snb_pebs_event_constraints[];
450
451struct event_constraint *intel_pebs_constraints(struct perf_event *event);
452
453void intel_pmu_pebs_enable(struct perf_event *event);
454
455void intel_pmu_pebs_disable(struct perf_event *event);
456
457void intel_pmu_pebs_enable_all(void);
458
459void intel_pmu_pebs_disable_all(void);
460
461void intel_ds_init(void);
462
463void intel_pmu_lbr_reset(void);
464
465void intel_pmu_lbr_enable(struct perf_event *event);
466
467void intel_pmu_lbr_disable(struct perf_event *event);
468
469void intel_pmu_lbr_enable_all(void);
470
471void intel_pmu_lbr_disable_all(void);
472
473void intel_pmu_lbr_read(void);
474
475void intel_pmu_lbr_init_core(void);
476
477void intel_pmu_lbr_init_nhm(void);
478
479void intel_pmu_lbr_init_atom(void);
480
481int p4_pmu_init(void);
482
483int p6_pmu_init(void);
484
485#else /* CONFIG_CPU_SUP_INTEL */
486
487static inline void reserve_ds_buffers(void)
488{
489}
490
491static inline void release_ds_buffers(void)
492{
493}
494
495static inline int intel_pmu_init(void)
496{
497 return 0;
498}
499
500static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
501{
502 return NULL;
503}
504
505#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 941caa2e449b..aeefd45697a2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,4 +1,10 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#include <linux/perf_event.h>
2#include <linux/types.h>
3#include <linux/init.h>
4#include <linux/slab.h>
5#include <asm/apicdef.h>
6
7#include "perf_event.h"
2 8
3static __initconst const u64 amd_hw_cache_event_ids 9static __initconst const u64 amd_hw_cache_event_ids
4 [PERF_COUNT_HW_CACHE_MAX] 10 [PERF_COUNT_HW_CACHE_MAX]
@@ -132,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event)
132 if (ret) 138 if (ret)
133 return ret; 139 return ret;
134 140
141 if (event->attr.exclude_host && event->attr.exclude_guest)
142 /*
143 * When HO == GO == 1 the hardware treats that as GO == HO == 0
144 * and will count in both modes. We don't want to count in that
145 * case so we emulate no-counting by setting US = OS = 0.
146 */
147 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
148 ARCH_PERFMON_EVENTSEL_OS);
149 else if (event->attr.exclude_host)
150 event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
151 else if (event->attr.exclude_guest)
152 event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
153
135 if (event->attr.type != PERF_TYPE_RAW) 154 if (event->attr.type != PERF_TYPE_RAW)
136 return 0; 155 return 0;
137 156
@@ -350,7 +369,7 @@ static void amd_pmu_cpu_starting(int cpu)
350 continue; 369 continue;
351 370
352 if (nb->nb_id == nb_id) { 371 if (nb->nb_id == nb_id) {
353 kfree(cpuc->amd_nb); 372 cpuc->kfree_on_online = cpuc->amd_nb;
354 cpuc->amd_nb = nb; 373 cpuc->amd_nb = nb;
355 break; 374 break;
356 } 375 }
@@ -392,7 +411,7 @@ static __initconst const struct x86_pmu amd_pmu = {
392 .perfctr = MSR_K7_PERFCTR0, 411 .perfctr = MSR_K7_PERFCTR0,
393 .event_map = amd_pmu_event_map, 412 .event_map = amd_pmu_event_map,
394 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 413 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
395 .num_counters = 4, 414 .num_counters = AMD64_NUM_COUNTERS,
396 .cntval_bits = 48, 415 .cntval_bits = 48,
397 .cntval_mask = (1ULL << 48) - 1, 416 .cntval_mask = (1ULL << 48) - 1,
398 .apic = 1, 417 .apic = 1,
@@ -556,7 +575,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
556 .perfctr = MSR_F15H_PERF_CTR, 575 .perfctr = MSR_F15H_PERF_CTR,
557 .event_map = amd_pmu_event_map, 576 .event_map = amd_pmu_event_map,
558 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 577 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
559 .num_counters = 6, 578 .num_counters = AMD64_NUM_COUNTERS_F15H,
560 .cntval_bits = 48, 579 .cntval_bits = 48,
561 .cntval_mask = (1ULL << 48) - 1, 580 .cntval_mask = (1ULL << 48) - 1,
562 .apic = 1, 581 .apic = 1,
@@ -573,7 +592,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
573#endif 592#endif
574}; 593};
575 594
576static __init int amd_pmu_init(void) 595__init int amd_pmu_init(void)
577{ 596{
578 /* Performance-monitoring supported from K7 and later: */ 597 /* Performance-monitoring supported from K7 and later: */
579 if (boot_cpu_data.x86 < 6) 598 if (boot_cpu_data.x86 < 6)
@@ -602,12 +621,3 @@ static __init int amd_pmu_init(void)
602 621
603 return 0; 622 return 0;
604} 623}
605
606#else /* CONFIG_CPU_SUP_AMD */
607
608static int amd_pmu_init(void)
609{
610 return 0;
611}
612
613#endif
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
new file mode 100644
index 000000000000..ab6343d21825
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -0,0 +1,294 @@
1/*
2 * Performance events - AMD IBS
3 *
4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/perf_event.h>
10#include <linux/module.h>
11#include <linux/pci.h>
12
13#include <asm/apic.h>
14
15static u32 ibs_caps;
16
17#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18
19static struct pmu perf_ibs;
20
21static int perf_ibs_init(struct perf_event *event)
22{
23 if (perf_ibs.type != event->attr.type)
24 return -ENOENT;
25 return 0;
26}
27
28static int perf_ibs_add(struct perf_event *event, int flags)
29{
30 return 0;
31}
32
33static void perf_ibs_del(struct perf_event *event, int flags)
34{
35}
36
37static struct pmu perf_ibs = {
38 .event_init= perf_ibs_init,
39 .add= perf_ibs_add,
40 .del= perf_ibs_del,
41};
42
43static __init int perf_event_ibs_init(void)
44{
45 if (!ibs_caps)
46 return -ENODEV; /* ibs not supported by the cpu */
47
48 perf_pmu_register(&perf_ibs, "ibs", -1);
49 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50
51 return 0;
52}
53
54#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
55
56static __init int perf_event_ibs_init(void) { return 0; }
57
58#endif
59
60/* IBS - apic initialization, for perf and oprofile */
61
62static __init u32 __get_ibs_caps(void)
63{
64 u32 caps;
65 unsigned int max_level;
66
67 if (!boot_cpu_has(X86_FEATURE_IBS))
68 return 0;
69
70 /* check IBS cpuid feature flags */
71 max_level = cpuid_eax(0x80000000);
72 if (max_level < IBS_CPUID_FEATURES)
73 return IBS_CAPS_DEFAULT;
74
75 caps = cpuid_eax(IBS_CPUID_FEATURES);
76 if (!(caps & IBS_CAPS_AVAIL))
77 /* cpuid flags not valid */
78 return IBS_CAPS_DEFAULT;
79
80 return caps;
81}
82
83u32 get_ibs_caps(void)
84{
85 return ibs_caps;
86}
87
88EXPORT_SYMBOL(get_ibs_caps);
89
90static inline int get_eilvt(int offset)
91{
92 return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
93}
94
95static inline int put_eilvt(int offset)
96{
97 return !setup_APIC_eilvt(offset, 0, 0, 1);
98}
99
100/*
101 * Check and reserve APIC extended interrupt LVT offset for IBS if available.
102 */
103static inline int ibs_eilvt_valid(void)
104{
105 int offset;
106 u64 val;
107 int valid = 0;
108
109 preempt_disable();
110
111 rdmsrl(MSR_AMD64_IBSCTL, val);
112 offset = val & IBSCTL_LVT_OFFSET_MASK;
113
114 if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
115 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
116 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
117 goto out;
118 }
119
120 if (!get_eilvt(offset)) {
121 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
122 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
123 goto out;
124 }
125
126 valid = 1;
127out:
128 preempt_enable();
129
130 return valid;
131}
132
133static int setup_ibs_ctl(int ibs_eilvt_off)
134{
135 struct pci_dev *cpu_cfg;
136 int nodes;
137 u32 value = 0;
138
139 nodes = 0;
140 cpu_cfg = NULL;
141 do {
142 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
143 PCI_DEVICE_ID_AMD_10H_NB_MISC,
144 cpu_cfg);
145 if (!cpu_cfg)
146 break;
147 ++nodes;
148 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
149 | IBSCTL_LVT_OFFSET_VALID);
150 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
151 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
152 pci_dev_put(cpu_cfg);
153 printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
154 "IBSCTL = 0x%08x\n", value);
155 return -EINVAL;
156 }
157 } while (1);
158
159 if (!nodes) {
160 printk(KERN_DEBUG "No CPU node configured for IBS\n");
161 return -ENODEV;
162 }
163
164 return 0;
165}
166
167/*
168 * This runs only on the current cpu. We try to find an LVT offset and
169 * setup the local APIC. For this we must disable preemption. On
170 * success we initialize all nodes with this offset. This updates then
171 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
172 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
173 * is using the new offset.
174 */
175static int force_ibs_eilvt_setup(void)
176{
177 int offset;
178 int ret;
179
180 preempt_disable();
181 /* find the next free available EILVT entry, skip offset 0 */
182 for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
183 if (get_eilvt(offset))
184 break;
185 }
186 preempt_enable();
187
188 if (offset == APIC_EILVT_NR_MAX) {
189 printk(KERN_DEBUG "No EILVT entry available\n");
190 return -EBUSY;
191 }
192
193 ret = setup_ibs_ctl(offset);
194 if (ret)
195 goto out;
196
197 if (!ibs_eilvt_valid()) {
198 ret = -EFAULT;
199 goto out;
200 }
201
202 pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset);
203 pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
204
205 return 0;
206out:
207 preempt_disable();
208 put_eilvt(offset);
209 preempt_enable();
210 return ret;
211}
212
213static inline int get_ibs_lvt_offset(void)
214{
215 u64 val;
216
217 rdmsrl(MSR_AMD64_IBSCTL, val);
218 if (!(val & IBSCTL_LVT_OFFSET_VALID))
219 return -EINVAL;
220
221 return val & IBSCTL_LVT_OFFSET_MASK;
222}
223
224static void setup_APIC_ibs(void *dummy)
225{
226 int offset;
227
228 offset = get_ibs_lvt_offset();
229 if (offset < 0)
230 goto failed;
231
232 if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
233 return;
234failed:
235 pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
236 smp_processor_id());
237}
238
239static void clear_APIC_ibs(void *dummy)
240{
241 int offset;
242
243 offset = get_ibs_lvt_offset();
244 if (offset >= 0)
245 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
246}
247
248static int __cpuinit
249perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
250{
251 switch (action & ~CPU_TASKS_FROZEN) {
252 case CPU_STARTING:
253 setup_APIC_ibs(NULL);
254 break;
255 case CPU_DYING:
256 clear_APIC_ibs(NULL);
257 break;
258 default:
259 break;
260 }
261
262 return NOTIFY_OK;
263}
264
265static __init int amd_ibs_init(void)
266{
267 u32 caps;
268 int ret;
269
270 caps = __get_ibs_caps();
271 if (!caps)
272 return -ENODEV; /* ibs not supported by the cpu */
273
274 if (!ibs_eilvt_valid()) {
275 ret = force_ibs_eilvt_setup();
276 if (ret) {
277 pr_err("Failed to setup IBS, %d\n", ret);
278 return ret;
279 }
280 }
281
282 get_online_cpus();
283 ibs_caps = caps;
284 /* make ibs_caps visible to other cpus: */
285 smp_mb();
286 perf_cpu_notifier(perf_ibs_cpu_notifier);
287 smp_call_function(setup_APIC_ibs, NULL, 1);
288 put_online_cpus();
289
290 return perf_event_ibs_init();
291}
292
293/* Since we need the pci subsystem to init ibs we can't do this earlier: */
294device_initcall(amd_ibs_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 45fbb8f7f549..2be5ebe99872 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,16 +1,20 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/* 1/*
4 * Per core/cpu state 2 * Per core/cpu state
5 * 3 *
6 * Used to coordinate shared registers between HT threads or 4 * Used to coordinate shared registers between HT threads or
7 * among events on a single PMU. 5 * among events on a single PMU.
8 */ 6 */
9struct intel_shared_regs { 7
10 struct er_account regs[EXTRA_REG_MAX]; 8#include <linux/stddef.h>
11 int refcnt; /* per-core: #HT threads */ 9#include <linux/types.h>
12 unsigned core_id; /* per-core: core id */ 10#include <linux/init.h>
13}; 11#include <linux/slab.h>
12#include <linux/export.h>
13
14#include <asm/hardirq.h>
15#include <asm/apic.h>
16
17#include "perf_event.h"
14 18
15/* 19/*
16 * Intel PerfMon, used on Core and later. 20 * Intel PerfMon, used on Core and later.
@@ -746,7 +750,8 @@ static void intel_pmu_enable_all(int added)
746 750
747 intel_pmu_pebs_enable_all(); 751 intel_pmu_pebs_enable_all();
748 intel_pmu_lbr_enable_all(); 752 intel_pmu_lbr_enable_all();
749 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 753 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
754 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
750 755
751 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 756 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
752 struct perf_event *event = 757 struct perf_event *event =
@@ -869,6 +874,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
869static void intel_pmu_disable_event(struct perf_event *event) 874static void intel_pmu_disable_event(struct perf_event *event)
870{ 875{
871 struct hw_perf_event *hwc = &event->hw; 876 struct hw_perf_event *hwc = &event->hw;
877 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
872 878
873 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 879 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
874 intel_pmu_disable_bts(); 880 intel_pmu_disable_bts();
@@ -876,6 +882,9 @@ static void intel_pmu_disable_event(struct perf_event *event)
876 return; 882 return;
877 } 883 }
878 884
885 cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
886 cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
887
879 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 888 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
880 intel_pmu_disable_fixed(hwc); 889 intel_pmu_disable_fixed(hwc);
881 return; 890 return;
@@ -921,6 +930,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
921static void intel_pmu_enable_event(struct perf_event *event) 930static void intel_pmu_enable_event(struct perf_event *event)
922{ 931{
923 struct hw_perf_event *hwc = &event->hw; 932 struct hw_perf_event *hwc = &event->hw;
933 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
924 934
925 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 935 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
926 if (!__this_cpu_read(cpu_hw_events.enabled)) 936 if (!__this_cpu_read(cpu_hw_events.enabled))
@@ -930,6 +940,11 @@ static void intel_pmu_enable_event(struct perf_event *event)
930 return; 940 return;
931 } 941 }
932 942
943 if (event->attr.exclude_host)
944 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
945 if (event->attr.exclude_guest)
946 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
947
933 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 948 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
934 intel_pmu_enable_fixed(hwc); 949 intel_pmu_enable_fixed(hwc);
935 return; 950 return;
@@ -945,7 +960,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
945 * Save and restart an expired event. Called by NMI contexts, 960 * Save and restart an expired event. Called by NMI contexts,
946 * so it has to be careful about preempting normal event ops: 961 * so it has to be careful about preempting normal event ops:
947 */ 962 */
948static int intel_pmu_save_and_restart(struct perf_event *event) 963int intel_pmu_save_and_restart(struct perf_event *event)
949{ 964{
950 x86_perf_event_update(event); 965 x86_perf_event_update(event);
951 return x86_perf_event_set_period(event); 966 return x86_perf_event_set_period(event);
@@ -1197,6 +1212,21 @@ intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1197 return c; 1212 return c;
1198} 1213}
1199 1214
1215struct event_constraint *
1216x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1217{
1218 struct event_constraint *c;
1219
1220 if (x86_pmu.event_constraints) {
1221 for_each_event_constraint(c, x86_pmu.event_constraints) {
1222 if ((event->hw.config & c->cmask) == c->code)
1223 return c;
1224 }
1225 }
1226
1227 return &unconstrained;
1228}
1229
1200static struct event_constraint * 1230static struct event_constraint *
1201intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1231intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1202{ 1232{
@@ -1284,12 +1314,84 @@ static int intel_pmu_hw_config(struct perf_event *event)
1284 return 0; 1314 return 0;
1285} 1315}
1286 1316
1317struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
1318{
1319 if (x86_pmu.guest_get_msrs)
1320 return x86_pmu.guest_get_msrs(nr);
1321 *nr = 0;
1322 return NULL;
1323}
1324EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
1325
1326static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
1327{
1328 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1329 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1330
1331 arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
1332 arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
1333 arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
1334
1335 *nr = 1;
1336 return arr;
1337}
1338
1339static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
1340{
1341 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1342 struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1343 int idx;
1344
1345 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1346 struct perf_event *event = cpuc->events[idx];
1347
1348 arr[idx].msr = x86_pmu_config_addr(idx);
1349 arr[idx].host = arr[idx].guest = 0;
1350
1351 if (!test_bit(idx, cpuc->active_mask))
1352 continue;
1353
1354 arr[idx].host = arr[idx].guest =
1355 event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
1356
1357 if (event->attr.exclude_host)
1358 arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1359 else if (event->attr.exclude_guest)
1360 arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1361 }
1362
1363 *nr = x86_pmu.num_counters;
1364 return arr;
1365}
1366
1367static void core_pmu_enable_event(struct perf_event *event)
1368{
1369 if (!event->attr.exclude_host)
1370 x86_pmu_enable_event(event);
1371}
1372
1373static void core_pmu_enable_all(int added)
1374{
1375 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1376 int idx;
1377
1378 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1379 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
1380
1381 if (!test_bit(idx, cpuc->active_mask) ||
1382 cpuc->events[idx]->attr.exclude_host)
1383 continue;
1384
1385 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1386 }
1387}
1388
1287static __initconst const struct x86_pmu core_pmu = { 1389static __initconst const struct x86_pmu core_pmu = {
1288 .name = "core", 1390 .name = "core",
1289 .handle_irq = x86_pmu_handle_irq, 1391 .handle_irq = x86_pmu_handle_irq,
1290 .disable_all = x86_pmu_disable_all, 1392 .disable_all = x86_pmu_disable_all,
1291 .enable_all = x86_pmu_enable_all, 1393 .enable_all = core_pmu_enable_all,
1292 .enable = x86_pmu_enable_event, 1394 .enable = core_pmu_enable_event,
1293 .disable = x86_pmu_disable_event, 1395 .disable = x86_pmu_disable_event,
1294 .hw_config = x86_pmu_hw_config, 1396 .hw_config = x86_pmu_hw_config,
1295 .schedule_events = x86_schedule_events, 1397 .schedule_events = x86_schedule_events,
@@ -1307,9 +1409,10 @@ static __initconst const struct x86_pmu core_pmu = {
1307 .get_event_constraints = intel_get_event_constraints, 1409 .get_event_constraints = intel_get_event_constraints,
1308 .put_event_constraints = intel_put_event_constraints, 1410 .put_event_constraints = intel_put_event_constraints,
1309 .event_constraints = intel_core_event_constraints, 1411 .event_constraints = intel_core_event_constraints,
1412 .guest_get_msrs = core_guest_get_msrs,
1310}; 1413};
1311 1414
1312static struct intel_shared_regs *allocate_shared_regs(int cpu) 1415struct intel_shared_regs *allocate_shared_regs(int cpu)
1313{ 1416{
1314 struct intel_shared_regs *regs; 1417 struct intel_shared_regs *regs;
1315 int i; 1418 int i;
@@ -1362,7 +1465,7 @@ static void intel_pmu_cpu_starting(int cpu)
1362 1465
1363 pc = per_cpu(cpu_hw_events, i).shared_regs; 1466 pc = per_cpu(cpu_hw_events, i).shared_regs;
1364 if (pc && pc->core_id == core_id) { 1467 if (pc && pc->core_id == core_id) {
1365 kfree(cpuc->shared_regs); 1468 cpuc->kfree_on_online = cpuc->shared_regs;
1366 cpuc->shared_regs = pc; 1469 cpuc->shared_regs = pc;
1367 break; 1470 break;
1368 } 1471 }
@@ -1413,6 +1516,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1413 .cpu_prepare = intel_pmu_cpu_prepare, 1516 .cpu_prepare = intel_pmu_cpu_prepare,
1414 .cpu_starting = intel_pmu_cpu_starting, 1517 .cpu_starting = intel_pmu_cpu_starting,
1415 .cpu_dying = intel_pmu_cpu_dying, 1518 .cpu_dying = intel_pmu_cpu_dying,
1519 .guest_get_msrs = intel_guest_get_msrs,
1416}; 1520};
1417 1521
1418static void intel_clovertown_quirks(void) 1522static void intel_clovertown_quirks(void)
@@ -1441,7 +1545,7 @@ static void intel_clovertown_quirks(void)
1441 x86_pmu.pebs_constraints = NULL; 1545 x86_pmu.pebs_constraints = NULL;
1442} 1546}
1443 1547
1444static __init int intel_pmu_init(void) 1548__init int intel_pmu_init(void)
1445{ 1549{
1446 union cpuid10_edx edx; 1550 union cpuid10_edx edx;
1447 union cpuid10_eax eax; 1551 union cpuid10_eax eax;
@@ -1590,13 +1694,14 @@ static __init int intel_pmu_init(void)
1590 break; 1694 break;
1591 1695
1592 case 42: /* SandyBridge */ 1696 case 42: /* SandyBridge */
1697 case 45: /* SandyBridge, "Romely-EP" */
1593 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1698 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1594 sizeof(hw_cache_event_ids)); 1699 sizeof(hw_cache_event_ids));
1595 1700
1596 intel_pmu_lbr_init_nhm(); 1701 intel_pmu_lbr_init_nhm();
1597 1702
1598 x86_pmu.event_constraints = intel_snb_event_constraints; 1703 x86_pmu.event_constraints = intel_snb_event_constraints;
1599 x86_pmu.pebs_constraints = intel_snb_pebs_events; 1704 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1600 x86_pmu.extra_regs = intel_snb_extra_regs; 1705 x86_pmu.extra_regs = intel_snb_extra_regs;
1601 /* all extra regs are per-cpu when HT is on */ 1706 /* all extra regs are per-cpu when HT is on */
1602 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1707 x86_pmu.er_flags |= ERF_HAS_RSP_1;
@@ -1627,16 +1732,3 @@ static __init int intel_pmu_init(void)
1627 } 1732 }
1628 return 0; 1733 return 0;
1629} 1734}
1630
1631#else /* CONFIG_CPU_SUP_INTEL */
1632
1633static int intel_pmu_init(void)
1634{
1635 return 0;
1636}
1637
1638static struct intel_shared_regs *allocate_shared_regs(int cpu)
1639{
1640 return NULL;
1641}
1642#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 1b1ef3addcfd..c0d238f49db8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1,7 +1,10 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#include <linux/bitops.h>
2#include <linux/types.h>
3#include <linux/slab.h>
2 4
3/* The maximal number of PEBS events: */ 5#include <asm/perf_event.h>
4#define MAX_PEBS_EVENTS 4 6
7#include "perf_event.h"
5 8
6/* The size of a BTS record in bytes: */ 9/* The size of a BTS record in bytes: */
7#define BTS_RECORD_SIZE 24 10#define BTS_RECORD_SIZE 24
@@ -37,24 +40,7 @@ struct pebs_record_nhm {
37 u64 status, dla, dse, lat; 40 u64 status, dla, dse, lat;
38}; 41};
39 42
40/* 43void init_debug_store_on_cpu(int cpu)
41 * A debug store configuration.
42 *
43 * We only support architectures that use 64bit fields.
44 */
45struct debug_store {
46 u64 bts_buffer_base;
47 u64 bts_index;
48 u64 bts_absolute_maximum;
49 u64 bts_interrupt_threshold;
50 u64 pebs_buffer_base;
51 u64 pebs_index;
52 u64 pebs_absolute_maximum;
53 u64 pebs_interrupt_threshold;
54 u64 pebs_event_reset[MAX_PEBS_EVENTS];
55};
56
57static void init_debug_store_on_cpu(int cpu)
58{ 44{
59 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 45 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
60 46
@@ -66,7 +52,7 @@ static void init_debug_store_on_cpu(int cpu)
66 (u32)((u64)(unsigned long)ds >> 32)); 52 (u32)((u64)(unsigned long)ds >> 32));
67} 53}
68 54
69static void fini_debug_store_on_cpu(int cpu) 55void fini_debug_store_on_cpu(int cpu)
70{ 56{
71 if (!per_cpu(cpu_hw_events, cpu).ds) 57 if (!per_cpu(cpu_hw_events, cpu).ds)
72 return; 58 return;
@@ -175,7 +161,7 @@ static void release_ds_buffer(int cpu)
175 kfree(ds); 161 kfree(ds);
176} 162}
177 163
178static void release_ds_buffers(void) 164void release_ds_buffers(void)
179{ 165{
180 int cpu; 166 int cpu;
181 167
@@ -194,7 +180,7 @@ static void release_ds_buffers(void)
194 put_online_cpus(); 180 put_online_cpus();
195} 181}
196 182
197static void reserve_ds_buffers(void) 183void reserve_ds_buffers(void)
198{ 184{
199 int bts_err = 0, pebs_err = 0; 185 int bts_err = 0, pebs_err = 0;
200 int cpu; 186 int cpu;
@@ -260,10 +246,10 @@ static void reserve_ds_buffers(void)
260 * BTS 246 * BTS
261 */ 247 */
262 248
263static struct event_constraint bts_constraint = 249struct event_constraint bts_constraint =
264 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); 250 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
265 251
266static void intel_pmu_enable_bts(u64 config) 252void intel_pmu_enable_bts(u64 config)
267{ 253{
268 unsigned long debugctlmsr; 254 unsigned long debugctlmsr;
269 255
@@ -282,7 +268,7 @@ static void intel_pmu_enable_bts(u64 config)
282 update_debugctlmsr(debugctlmsr); 268 update_debugctlmsr(debugctlmsr);
283} 269}
284 270
285static void intel_pmu_disable_bts(void) 271void intel_pmu_disable_bts(void)
286{ 272{
287 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 273 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
288 unsigned long debugctlmsr; 274 unsigned long debugctlmsr;
@@ -299,7 +285,7 @@ static void intel_pmu_disable_bts(void)
299 update_debugctlmsr(debugctlmsr); 285 update_debugctlmsr(debugctlmsr);
300} 286}
301 287
302static int intel_pmu_drain_bts_buffer(void) 288int intel_pmu_drain_bts_buffer(void)
303{ 289{
304 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 290 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
305 struct debug_store *ds = cpuc->ds; 291 struct debug_store *ds = cpuc->ds;
@@ -361,7 +347,7 @@ static int intel_pmu_drain_bts_buffer(void)
361/* 347/*
362 * PEBS 348 * PEBS
363 */ 349 */
364static struct event_constraint intel_core2_pebs_event_constraints[] = { 350struct event_constraint intel_core2_pebs_event_constraints[] = {
365 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 351 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
366 INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 352 INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
367 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 353 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
@@ -370,14 +356,14 @@ static struct event_constraint intel_core2_pebs_event_constraints[] = {
370 EVENT_CONSTRAINT_END 356 EVENT_CONSTRAINT_END
371}; 357};
372 358
373static struct event_constraint intel_atom_pebs_event_constraints[] = { 359struct event_constraint intel_atom_pebs_event_constraints[] = {
374 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ 360 INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
375 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ 361 INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
376 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ 362 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
377 EVENT_CONSTRAINT_END 363 EVENT_CONSTRAINT_END
378}; 364};
379 365
380static struct event_constraint intel_nehalem_pebs_event_constraints[] = { 366struct event_constraint intel_nehalem_pebs_event_constraints[] = {
381 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 367 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
382 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 368 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
383 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 369 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
@@ -392,7 +378,7 @@ static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
392 EVENT_CONSTRAINT_END 378 EVENT_CONSTRAINT_END
393}; 379};
394 380
395static struct event_constraint intel_westmere_pebs_event_constraints[] = { 381struct event_constraint intel_westmere_pebs_event_constraints[] = {
396 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ 382 INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
397 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 383 INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
398 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ 384 INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
@@ -407,7 +393,7 @@ static struct event_constraint intel_westmere_pebs_event_constraints[] = {
407 EVENT_CONSTRAINT_END 393 EVENT_CONSTRAINT_END
408}; 394};
409 395
410static struct event_constraint intel_snb_pebs_events[] = { 396struct event_constraint intel_snb_pebs_event_constraints[] = {
411 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ 397 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
412 INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ 398 INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
413 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ 399 INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
@@ -428,8 +414,7 @@ static struct event_constraint intel_snb_pebs_events[] = {
428 EVENT_CONSTRAINT_END 414 EVENT_CONSTRAINT_END
429}; 415};
430 416
431static struct event_constraint * 417struct event_constraint *intel_pebs_constraints(struct perf_event *event)
432intel_pebs_constraints(struct perf_event *event)
433{ 418{
434 struct event_constraint *c; 419 struct event_constraint *c;
435 420
@@ -446,7 +431,7 @@ intel_pebs_constraints(struct perf_event *event)
446 return &emptyconstraint; 431 return &emptyconstraint;
447} 432}
448 433
449static void intel_pmu_pebs_enable(struct perf_event *event) 434void intel_pmu_pebs_enable(struct perf_event *event)
450{ 435{
451 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 436 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
452 struct hw_perf_event *hwc = &event->hw; 437 struct hw_perf_event *hwc = &event->hw;
@@ -460,7 +445,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event)
460 intel_pmu_lbr_enable(event); 445 intel_pmu_lbr_enable(event);
461} 446}
462 447
463static void intel_pmu_pebs_disable(struct perf_event *event) 448void intel_pmu_pebs_disable(struct perf_event *event)
464{ 449{
465 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 450 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
466 struct hw_perf_event *hwc = &event->hw; 451 struct hw_perf_event *hwc = &event->hw;
@@ -475,7 +460,7 @@ static void intel_pmu_pebs_disable(struct perf_event *event)
475 intel_pmu_lbr_disable(event); 460 intel_pmu_lbr_disable(event);
476} 461}
477 462
478static void intel_pmu_pebs_enable_all(void) 463void intel_pmu_pebs_enable_all(void)
479{ 464{
480 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 465 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
481 466
@@ -483,7 +468,7 @@ static void intel_pmu_pebs_enable_all(void)
483 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 468 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
484} 469}
485 470
486static void intel_pmu_pebs_disable_all(void) 471void intel_pmu_pebs_disable_all(void)
487{ 472{
488 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 473 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
489 474
@@ -576,8 +561,6 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
576 return 0; 561 return 0;
577} 562}
578 563
579static int intel_pmu_save_and_restart(struct perf_event *event);
580
581static void __intel_pmu_pebs_event(struct perf_event *event, 564static void __intel_pmu_pebs_event(struct perf_event *event,
582 struct pt_regs *iregs, void *__pebs) 565 struct pt_regs *iregs, void *__pebs)
583{ 566{
@@ -716,7 +699,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
716 * BTS, PEBS probe and setup 699 * BTS, PEBS probe and setup
717 */ 700 */
718 701
719static void intel_ds_init(void) 702void intel_ds_init(void)
720{ 703{
721 /* 704 /*
722 * No support for 32bit formats 705 * No support for 32bit formats
@@ -749,15 +732,3 @@ static void intel_ds_init(void)
749 } 732 }
750 } 733 }
751} 734}
752
753#else /* CONFIG_CPU_SUP_INTEL */
754
755static void reserve_ds_buffers(void)
756{
757}
758
759static void release_ds_buffers(void)
760{
761}
762
763#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d202c1bece1a..3fab3de3ce96 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -1,4 +1,10 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#include <linux/perf_event.h>
2#include <linux/types.h>
3
4#include <asm/perf_event.h>
5#include <asm/msr.h>
6
7#include "perf_event.h"
2 8
3enum { 9enum {
4 LBR_FORMAT_32 = 0x00, 10 LBR_FORMAT_32 = 0x00,
@@ -48,7 +54,7 @@ static void intel_pmu_lbr_reset_64(void)
48 } 54 }
49} 55}
50 56
51static void intel_pmu_lbr_reset(void) 57void intel_pmu_lbr_reset(void)
52{ 58{
53 if (!x86_pmu.lbr_nr) 59 if (!x86_pmu.lbr_nr)
54 return; 60 return;
@@ -59,7 +65,7 @@ static void intel_pmu_lbr_reset(void)
59 intel_pmu_lbr_reset_64(); 65 intel_pmu_lbr_reset_64();
60} 66}
61 67
62static void intel_pmu_lbr_enable(struct perf_event *event) 68void intel_pmu_lbr_enable(struct perf_event *event)
63{ 69{
64 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 70 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
65 71
@@ -81,7 +87,7 @@ static void intel_pmu_lbr_enable(struct perf_event *event)
81 cpuc->lbr_users++; 87 cpuc->lbr_users++;
82} 88}
83 89
84static void intel_pmu_lbr_disable(struct perf_event *event) 90void intel_pmu_lbr_disable(struct perf_event *event)
85{ 91{
86 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 92 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
87 93
@@ -95,7 +101,7 @@ static void intel_pmu_lbr_disable(struct perf_event *event)
95 __intel_pmu_lbr_disable(); 101 __intel_pmu_lbr_disable();
96} 102}
97 103
98static void intel_pmu_lbr_enable_all(void) 104void intel_pmu_lbr_enable_all(void)
99{ 105{
100 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 106 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
101 107
@@ -103,7 +109,7 @@ static void intel_pmu_lbr_enable_all(void)
103 __intel_pmu_lbr_enable(); 109 __intel_pmu_lbr_enable();
104} 110}
105 111
106static void intel_pmu_lbr_disable_all(void) 112void intel_pmu_lbr_disable_all(void)
107{ 113{
108 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 114 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
109 115
@@ -178,7 +184,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
178 cpuc->lbr_stack.nr = i; 184 cpuc->lbr_stack.nr = i;
179} 185}
180 186
181static void intel_pmu_lbr_read(void) 187void intel_pmu_lbr_read(void)
182{ 188{
183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 189 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
184 190
@@ -191,7 +197,7 @@ static void intel_pmu_lbr_read(void)
191 intel_pmu_lbr_read_64(cpuc); 197 intel_pmu_lbr_read_64(cpuc);
192} 198}
193 199
194static void intel_pmu_lbr_init_core(void) 200void intel_pmu_lbr_init_core(void)
195{ 201{
196 x86_pmu.lbr_nr = 4; 202 x86_pmu.lbr_nr = 4;
197 x86_pmu.lbr_tos = 0x01c9; 203 x86_pmu.lbr_tos = 0x01c9;
@@ -199,7 +205,7 @@ static void intel_pmu_lbr_init_core(void)
199 x86_pmu.lbr_to = 0x60; 205 x86_pmu.lbr_to = 0x60;
200} 206}
201 207
202static void intel_pmu_lbr_init_nhm(void) 208void intel_pmu_lbr_init_nhm(void)
203{ 209{
204 x86_pmu.lbr_nr = 16; 210 x86_pmu.lbr_nr = 16;
205 x86_pmu.lbr_tos = 0x01c9; 211 x86_pmu.lbr_tos = 0x01c9;
@@ -207,12 +213,10 @@ static void intel_pmu_lbr_init_nhm(void)
207 x86_pmu.lbr_to = 0x6c0; 213 x86_pmu.lbr_to = 0x6c0;
208} 214}
209 215
210static void intel_pmu_lbr_init_atom(void) 216void intel_pmu_lbr_init_atom(void)
211{ 217{
212 x86_pmu.lbr_nr = 8; 218 x86_pmu.lbr_nr = 8;
213 x86_pmu.lbr_tos = 0x01c9; 219 x86_pmu.lbr_tos = 0x01c9;
214 x86_pmu.lbr_from = 0x40; 220 x86_pmu.lbr_from = 0x40;
215 x86_pmu.lbr_to = 0x60; 221 x86_pmu.lbr_to = 0x60;
216} 222}
217
218#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 7809d2bcb209..492bf1358a7c 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -7,9 +7,13 @@
7 * For licencing details see kernel-base/COPYING 7 * For licencing details see kernel-base/COPYING
8 */ 8 */
9 9
10#ifdef CONFIG_CPU_SUP_INTEL 10#include <linux/perf_event.h>
11 11
12#include <asm/perf_event_p4.h> 12#include <asm/perf_event_p4.h>
13#include <asm/hardirq.h>
14#include <asm/apic.h>
15
16#include "perf_event.h"
13 17
14#define P4_CNTR_LIMIT 3 18#define P4_CNTR_LIMIT 3
15/* 19/*
@@ -1303,7 +1307,7 @@ static __initconst const struct x86_pmu p4_pmu = {
1303 .perfctr_second_write = 1, 1307 .perfctr_second_write = 1,
1304}; 1308};
1305 1309
1306static __init int p4_pmu_init(void) 1310__init int p4_pmu_init(void)
1307{ 1311{
1308 unsigned int low, high; 1312 unsigned int low, high;
1309 1313
@@ -1326,5 +1330,3 @@ static __init int p4_pmu_init(void)
1326 1330
1327 return 0; 1331 return 0;
1328} 1332}
1329
1330#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 20c097e33860..c7181befecde 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -1,4 +1,7 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#include <linux/perf_event.h>
2#include <linux/types.h>
3
4#include "perf_event.h"
2 5
3/* 6/*
4 * Not sure about some of these 7 * Not sure about some of these
@@ -114,7 +117,7 @@ static __initconst const struct x86_pmu p6_pmu = {
114 .event_constraints = p6_event_constraints, 117 .event_constraints = p6_event_constraints,
115}; 118};
116 119
117static __init int p6_pmu_init(void) 120__init int p6_pmu_init(void)
118{ 121{
119 switch (boot_cpu_data.x86_model) { 122 switch (boot_cpu_data.x86_model) {
120 case 1: 123 case 1:
@@ -138,5 +141,3 @@ static __init int p6_pmu_init(void)
138 141
139 return 0; 142 return 0;
140} 143}
141
142#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 62ac8cb6ba27..14b23140e81f 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -85,6 +85,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
85 seq_printf(m, "stepping\t: %d\n", c->x86_mask); 85 seq_printf(m, "stepping\t: %d\n", c->x86_mask);
86 else 86 else
87 seq_printf(m, "stepping\t: unknown\n"); 87 seq_printf(m, "stepping\t: unknown\n");
88 if (c->microcode)
89 seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
88 90
89 if (cpu_has(c, X86_FEATURE_TSC)) { 91 if (cpu_has(c, X86_FEATURE_TSC)) {
90 unsigned int freq = cpufreq_quick_get(cpu); 92 unsigned int freq = cpufreq_quick_get(cpu);
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
new file mode 100644
index 000000000000..feca286c2bb4
--- /dev/null
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -0,0 +1,73 @@
1/*
2 * This file is part of the Linux kernel.
3 *
4 * Copyright (c) 2011, Intel Corporation
5 * Authors: Fenghua Yu <fenghua.yu@intel.com>,
6 * H. Peter Anvin <hpa@linux.intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 */
22
23#include <asm/processor.h>
24#include <asm/archrandom.h>
25#include <asm/sections.h>
26
27static int __init x86_rdrand_setup(char *s)
28{
29 setup_clear_cpu_cap(X86_FEATURE_RDRAND);
30 return 1;
31}
32__setup("nordrand", x86_rdrand_setup);
33
34/* We can't use arch_get_random_long() here since alternatives haven't run */
35static inline int rdrand_long(unsigned long *v)
36{
37 int ok;
38 asm volatile("1: " RDRAND_LONG "\n\t"
39 "jc 2f\n\t"
40 "decl %0\n\t"
41 "jnz 1b\n\t"
42 "2:"
43 : "=r" (ok), "=a" (*v)
44 : "0" (RDRAND_RETRY_LOOPS));
45 return ok;
46}
47
48/*
49 * Force a reseed cycle; we are architecturally guaranteed a reseed
50 * after no more than 512 128-bit chunks of random data. This also
51 * acts as a test of the CPU capability.
52 */
53#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
54
55void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c)
56{
57#ifdef CONFIG_ARCH_RANDOM
58 unsigned long tmp;
59 int i, count, ok;
60
61 if (!cpu_has(c, X86_FEATURE_RDRAND))
62 return; /* Nothing to do */
63
64 for (count = i = 0; i < RESEED_LOOP; i++) {
65 ok = rdrand_long(&tmp);
66 if (ok)
67 count++;
68 }
69
70 if (count != RESEED_LOOP)
71 clear_cpu_cap(c, X86_FEATURE_RDRAND);
72#endif
73}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 764c7c2b1811..13ad89971d47 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -32,15 +32,12 @@ int in_crash_kexec;
32 32
33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 33#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
34 34
35static void kdump_nmi_callback(int cpu, struct die_args *args) 35static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
36{ 36{
37 struct pt_regs *regs;
38#ifdef CONFIG_X86_32 37#ifdef CONFIG_X86_32
39 struct pt_regs fixed_regs; 38 struct pt_regs fixed_regs;
40#endif 39#endif
41 40
42 regs = args->regs;
43
44#ifdef CONFIG_X86_32 41#ifdef CONFIG_X86_32
45 if (!user_mode_vm(regs)) { 42 if (!user_mode_vm(regs)) {
46 crash_fixup_ss_esp(&fixed_regs, regs); 43 crash_fixup_ss_esp(&fixed_regs, regs);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index a621f3427685..52821799a702 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -2,6 +2,7 @@
2 * Architecture specific OF callbacks. 2 * Architecture specific OF callbacks.
3 */ 3 */
4#include <linux/bootmem.h> 4#include <linux/bootmem.h>
5#include <linux/export.h>
5#include <linux/io.h> 6#include <linux/io.h>
6#include <linux/interrupt.h> 7#include <linux/interrupt.h>
7#include <linux/list.h> 8#include <linux/list.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 3e2ef8425316..303a0e48f076 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -12,6 +12,7 @@
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/crash_dump.h> 14#include <linux/crash_dump.h>
15#include <linux/export.h>
15#include <linux/bootmem.h> 16#include <linux/bootmem.h>
16#include <linux/pfn.h> 17#include <linux/pfn.h>
17#include <linux/suspend.h> 18#include <linux/suspend.h>
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a91974918..f3f6f5344001 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,7 @@
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/irq_vectors.h> 55#include <asm/irq_vectors.h>
56#include <asm/cpufeature.h> 56#include <asm/cpufeature.h>
57#include <asm/alternative-asm.h>
57 58
58/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 59/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
59#include <linux/elf-em.h> 60#include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
873661: pushl_cfi $do_general_protection 874661: pushl_cfi $do_general_protection
874662: 875662:
875.section .altinstructions,"a" 876.section .altinstructions,"a"
876 .balign 4 877 altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
877 .long 661b
878 .long 663f
879 .word X86_FEATURE_XMM
880 .byte 662b-661b
881 .byte 664f-663f
882.previous 878.previous
883.section .altinstr_replacement,"ax" 879.section .altinstr_replacement,"ax"
884663: pushl $do_simd_coprocessor_error 880663: pushl $do_simd_coprocessor_error
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e13329d800c8..faf8d5e74b0b 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -331,10 +331,15 @@ ENDPROC(native_usergs_sysret64)
3311: incl PER_CPU_VAR(irq_count) 3311: incl PER_CPU_VAR(irq_count)
332 jne 2f 332 jne 2f
333 mov PER_CPU_VAR(irq_stack_ptr),%rsp 333 mov PER_CPU_VAR(irq_stack_ptr),%rsp
334 EMPTY_FRAME 0 334 CFI_DEF_CFA_REGISTER rsi
335 335
3362: /* Store previous stack value */ 3362: /* Store previous stack value */
337 pushq %rsi 337 pushq %rsi
338 CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
339 0x77 /* DW_OP_breg7 */, 0, \
340 0x06 /* DW_OP_deref */, \
341 0x08 /* DW_OP_const1u */, SS+8-RBP, \
342 0x22 /* DW_OP_plus */
338 /* We entered an interrupt context - irqs are off: */ 343 /* We entered an interrupt context - irqs are off: */
339 TRACE_IRQS_OFF 344 TRACE_IRQS_OFF
340 .endm 345 .endm
@@ -788,7 +793,6 @@ END(interrupt)
788 subq $ORIG_RAX-RBP, %rsp 793 subq $ORIG_RAX-RBP, %rsp
789 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 794 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
790 SAVE_ARGS_IRQ 795 SAVE_ARGS_IRQ
791 PARTIAL_FRAME 0
792 call \func 796 call \func
793 .endm 797 .endm
794 798
@@ -813,10 +817,10 @@ ret_from_intr:
813 817
814 /* Restore saved previous stack */ 818 /* Restore saved previous stack */
815 popq %rsi 819 popq %rsi
816 leaq 16(%rsi), %rsp 820 CFI_DEF_CFA_REGISTER rsi
817 821 leaq ARGOFFSET-RBP(%rsi), %rsp
818 CFI_DEF_CFA_REGISTER rsp 822 CFI_DEF_CFA_REGISTER rsp
819 CFI_ADJUST_CFA_OFFSET -16 823 CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
820 824
821exit_intr: 825exit_intr:
822 GET_THREAD_INFO(%rcx) 826 GET_THREAD_INFO(%rcx)
@@ -1111,7 +1115,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1111zeroentry coprocessor_error do_coprocessor_error 1115zeroentry coprocessor_error do_coprocessor_error
1112errorentry alignment_check do_alignment_check 1116errorentry alignment_check do_alignment_check
1113zeroentry simd_coprocessor_error do_simd_coprocessor_error 1117zeroentry simd_coprocessor_error do_simd_coprocessor_error
1114zeroentry emulate_vsyscall do_emulate_vsyscall
1115 1118
1116 1119
1117 /* Reload gs selector with exception handling */ 1120 /* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 4aecc54236a9..b946a9eac7d9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,6 +1,7 @@
1#include <linux/clocksource.h> 1#include <linux/clocksource.h>
2#include <linux/clockchips.h> 2#include <linux/clockchips.h>
3#include <linux/interrupt.h> 3#include <linux/interrupt.h>
4#include <linux/export.h>
4#include <linux/sysdev.h> 5#include <linux/sysdev.h>
5#include <linux/delay.h> 6#include <linux/delay.h>
6#include <linux/errno.h> 7#include <linux/errno.h>
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 12aff2537682..739d8598f789 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -321,7 +321,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
321 return tmp; 321 return tmp;
322} 322}
323 323
324#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); 324#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
325#define FP_EXP_TAG_VALID 0 325#define FP_EXP_TAG_VALID 0
326#define FP_EXP_TAG_ZERO 1 326#define FP_EXP_TAG_ZERO 1
327#define FP_EXP_TAG_SPECIAL 2 327#define FP_EXP_TAG_SPECIAL 2
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 65b8f5c2eebf..610485223bdb 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -14,7 +14,7 @@
14#include <linux/io.h> 14#include <linux/io.h>
15#include <linux/delay.h> 15#include <linux/delay.h>
16 16
17#include <asm/atomic.h> 17#include <linux/atomic.h>
18#include <asm/system.h> 18#include <asm/system.h>
19#include <asm/timer.h> 19#include <asm/timer.h>
20#include <asm/hw_irq.h> 20#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 6c0802eb2f7f..429e0c92924e 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -9,6 +9,7 @@
9#include <linux/smp.h> 9#include <linux/smp.h>
10#include <linux/ftrace.h> 10#include <linux/ftrace.h>
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/export.h>
12 13
13#include <asm/apic.h> 14#include <asm/apic.h>
14#include <asm/io_apic.h> 15#include <asm/io_apic.h>
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f09d4bbe2d2d..b3300e6bacef 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -15,7 +15,7 @@
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/delay.h> 16#include <linux/delay.h>
17 17
18#include <asm/atomic.h> 18#include <linux/atomic.h>
19#include <asm/system.h> 19#include <asm/system.h>
20#include <asm/timer.h> 20#include <asm/timer.h>
21#include <asm/hw_irq.h> 21#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 3fee346ef545..ea9d5f2f13ef 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,8 +24,9 @@ union jump_code_union {
24 } __attribute__((packed)); 24 } __attribute__((packed));
25}; 25};
26 26
27void arch_jump_label_transform(struct jump_entry *entry, 27static void __jump_label_transform(struct jump_entry *entry,
28 enum jump_label_type type) 28 enum jump_label_type type,
29 void *(*poker)(void *, const void *, size_t))
29{ 30{
30 union jump_code_union code; 31 union jump_code_union code;
31 32
@@ -35,17 +36,24 @@ void arch_jump_label_transform(struct jump_entry *entry,
35 (entry->code + JUMP_LABEL_NOP_SIZE); 36 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else 37 } else
37 memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); 38 memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
39
40 (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
41}
42
43void arch_jump_label_transform(struct jump_entry *entry,
44 enum jump_label_type type)
45{
38 get_online_cpus(); 46 get_online_cpus();
39 mutex_lock(&text_mutex); 47 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); 48 __jump_label_transform(entry, type, text_poke_smp);
41 mutex_unlock(&text_mutex); 49 mutex_unlock(&text_mutex);
42 put_online_cpus(); 50 put_online_cpus();
43} 51}
44 52
45void arch_jump_label_text_poke_early(jump_label_t addr) 53void arch_jump_label_transform_static(struct jump_entry *entry,
54 enum jump_label_type type)
46{ 55{
47 text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5], 56 __jump_label_transform(entry, type, text_poke_early);
48 JUMP_LABEL_NOP_SIZE);
49} 57}
50 58
51#endif 59#endif
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 00354d4919a9..faba5771acad 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -511,28 +511,37 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
511 511
512static int was_in_debug_nmi[NR_CPUS]; 512static int was_in_debug_nmi[NR_CPUS];
513 513
514static int __kgdb_notify(struct die_args *args, unsigned long cmd) 514static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs)
515{ 515{
516 struct pt_regs *regs = args->regs;
517
518 switch (cmd) { 516 switch (cmd) {
519 case DIE_NMI: 517 case NMI_LOCAL:
520 if (atomic_read(&kgdb_active) != -1) { 518 if (atomic_read(&kgdb_active) != -1) {
521 /* KGDB CPU roundup */ 519 /* KGDB CPU roundup */
522 kgdb_nmicallback(raw_smp_processor_id(), regs); 520 kgdb_nmicallback(raw_smp_processor_id(), regs);
523 was_in_debug_nmi[raw_smp_processor_id()] = 1; 521 was_in_debug_nmi[raw_smp_processor_id()] = 1;
524 touch_nmi_watchdog(); 522 touch_nmi_watchdog();
525 return NOTIFY_STOP; 523 return NMI_HANDLED;
526 } 524 }
527 return NOTIFY_DONE; 525 break;
528 526
529 case DIE_NMIUNKNOWN: 527 case NMI_UNKNOWN:
530 if (was_in_debug_nmi[raw_smp_processor_id()]) { 528 if (was_in_debug_nmi[raw_smp_processor_id()]) {
531 was_in_debug_nmi[raw_smp_processor_id()] = 0; 529 was_in_debug_nmi[raw_smp_processor_id()] = 0;
532 return NOTIFY_STOP; 530 return NMI_HANDLED;
533 } 531 }
534 return NOTIFY_DONE; 532 break;
533 default:
534 /* do nothing */
535 break;
536 }
537 return NMI_DONE;
538}
539
540static int __kgdb_notify(struct die_args *args, unsigned long cmd)
541{
542 struct pt_regs *regs = args->regs;
535 543
544 switch (cmd) {
536 case DIE_DEBUG: 545 case DIE_DEBUG:
537 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { 546 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
538 if (user_mode(regs)) 547 if (user_mode(regs))
@@ -590,11 +599,6 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
590 599
591static struct notifier_block kgdb_notifier = { 600static struct notifier_block kgdb_notifier = {
592 .notifier_call = kgdb_notify, 601 .notifier_call = kgdb_notify,
593
594 /*
595 * Lowest-prio notifier priority, we want to be notified last:
596 */
597 .priority = NMI_LOCAL_LOW_PRIOR,
598}; 602};
599 603
600/** 604/**
@@ -605,7 +609,31 @@ static struct notifier_block kgdb_notifier = {
605 */ 609 */
606int kgdb_arch_init(void) 610int kgdb_arch_init(void)
607{ 611{
608 return register_die_notifier(&kgdb_notifier); 612 int retval;
613
614 retval = register_die_notifier(&kgdb_notifier);
615 if (retval)
616 goto out;
617
618 retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler,
619 0, "kgdb");
620 if (retval)
621 goto out1;
622
623 retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler,
624 0, "kgdb");
625
626 if (retval)
627 goto out2;
628
629 return retval;
630
631out2:
632 unregister_nmi_handler(NMI_LOCAL, "kgdb");
633out1:
634 unregister_die_notifier(&kgdb_notifier);
635out:
636 return retval;
609} 637}
610 638
611static void kgdb_hw_overflow_handler(struct perf_event *event, 639static void kgdb_hw_overflow_handler(struct perf_event *event,
@@ -673,6 +701,8 @@ void kgdb_arch_exit(void)
673 breakinfo[i].pev = NULL; 701 breakinfo[i].pev = NULL;
674 } 702 }
675 } 703 }
704 unregister_nmi_handler(NMI_UNKNOWN, "kgdb");
705 unregister_nmi_handler(NMI_LOCAL, "kgdb");
676 unregister_die_notifier(&kgdb_notifier); 706 unregister_die_notifier(&kgdb_notifier);
677} 707}
678 708
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f1a6244d7d93..7da647d8b64c 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -75,8 +75,11 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
75 /* 75 /*
76 * Undefined/reserved opcodes, conditional jump, Opcode Extension 76 * Undefined/reserved opcodes, conditional jump, Opcode Extension
77 * Groups, and some special opcodes can not boost. 77 * Groups, and some special opcodes can not boost.
78 * This is non-const and volatile to keep gcc from statically
79 * optimizing it out, as variable_test_bit makes gcc think only
80 * *(unsigned long*) is used.
78 */ 81 */
79static const u32 twobyte_is_boostable[256 / 32] = { 82static volatile u32 twobyte_is_boostable[256 / 32] = {
80 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 83 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
81 /* ---------------------------------------------- */ 84 /* ---------------------------------------------- */
82 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ 85 W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index c1a0188e29ae..44842d756b29 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -74,9 +74,10 @@ static cycle_t kvm_clock_read(void)
74 struct pvclock_vcpu_time_info *src; 74 struct pvclock_vcpu_time_info *src;
75 cycle_t ret; 75 cycle_t ret;
76 76
77 src = &get_cpu_var(hv_clock); 77 preempt_disable_notrace();
78 src = &__get_cpu_var(hv_clock);
78 ret = pvclock_clocksource_read(src); 79 ret = pvclock_clocksource_read(src);
79 put_cpu_var(hv_clock); 80 preempt_enable_notrace();
80 return ret; 81 return ret;
81} 82}
82 83
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 591be0ee1934..d494799aafcd 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -74,14 +74,13 @@ static struct equiv_cpu_entry *equiv_cpu_table;
74static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 74static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
75{ 75{
76 struct cpuinfo_x86 *c = &cpu_data(cpu); 76 struct cpuinfo_x86 *c = &cpu_data(cpu);
77 u32 dummy;
78 77
79 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { 78 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
80 pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); 79 pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
81 return -1; 80 return -1;
82 } 81 }
83 82
84 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); 83 csig->rev = c->microcode;
85 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); 84 pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
86 85
87 return 0; 86 return 0;
@@ -130,6 +129,7 @@ static int apply_microcode_amd(int cpu)
130 int cpu_num = raw_smp_processor_id(); 129 int cpu_num = raw_smp_processor_id();
131 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; 130 struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
132 struct microcode_amd *mc_amd = uci->mc; 131 struct microcode_amd *mc_amd = uci->mc;
132 struct cpuinfo_x86 *c = &cpu_data(cpu);
133 133
134 /* We should bind the task to the CPU */ 134 /* We should bind the task to the CPU */
135 BUG_ON(cpu_num != cpu); 135 BUG_ON(cpu_num != cpu);
@@ -150,6 +150,7 @@ static int apply_microcode_amd(int cpu)
150 150
151 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); 151 pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
152 uci->cpu_sig.rev = rev; 152 uci->cpu_sig.rev = rev;
153 c->microcode = rev;
153 154
154 return 0; 155 return 0;
155} 156}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index f9242800bc84..f2d2a664e797 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -483,7 +483,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
483 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 483 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
484 pr_debug("CPU%d removed\n", cpu); 484 pr_debug("CPU%d removed\n", cpu);
485 break; 485 break;
486 case CPU_DEAD: 486
487 /*
488 * When a CPU goes offline, don't free up or invalidate the copy of
489 * the microcode in kernel memory, so that we can reuse it when the
490 * CPU comes back online without unnecessarily requesting the userspace
491 * for it again.
492 */
487 case CPU_UP_CANCELED_FROZEN: 493 case CPU_UP_CANCELED_FROZEN:
488 /* The CPU refused to come up during a system resume */ 494 /* The CPU refused to come up during a system resume */
489 microcode_fini_cpu(cpu); 495 microcode_fini_cpu(cpu);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 1a1b606d3e92..3ca42d0e43a2 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -161,12 +161,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
161 csig->pf = 1 << ((val[1] >> 18) & 7); 161 csig->pf = 1 << ((val[1] >> 18) & 7);
162 } 162 }
163 163
164 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 164 csig->rev = c->microcode;
165 /* see notes above for revision 1.07. Apparent chip bug */
166 sync_core();
167 /* get the current revision from MSR 0x8B */
168 rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
169
170 pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", 165 pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
171 cpu_num, csig->sig, csig->pf, csig->rev); 166 cpu_num, csig->sig, csig->pf, csig->rev);
172 167
@@ -299,9 +294,9 @@ static int apply_microcode(int cpu)
299 struct microcode_intel *mc_intel; 294 struct microcode_intel *mc_intel;
300 struct ucode_cpu_info *uci; 295 struct ucode_cpu_info *uci;
301 unsigned int val[2]; 296 unsigned int val[2];
302 int cpu_num; 297 int cpu_num = raw_smp_processor_id();
298 struct cpuinfo_x86 *c = &cpu_data(cpu_num);
303 299
304 cpu_num = raw_smp_processor_id();
305 uci = ucode_cpu_info + cpu; 300 uci = ucode_cpu_info + cpu;
306 mc_intel = uci->mc; 301 mc_intel = uci->mc;
307 302
@@ -317,7 +312,7 @@ static int apply_microcode(int cpu)
317 (unsigned long) mc_intel->bits >> 16 >> 16); 312 (unsigned long) mc_intel->bits >> 16 >> 16);
318 wrmsr(MSR_IA32_UCODE_REV, 0, 0); 313 wrmsr(MSR_IA32_UCODE_REV, 0, 0);
319 314
320 /* see notes above for revision 1.07. Apparent chip bug */ 315 /* As documented in the SDM: Do a CPUID 1 here */
321 sync_core(); 316 sync_core();
322 317
323 /* get the current revision from MSR 0x8B */ 318 /* get the current revision from MSR 0x8B */
@@ -335,6 +330,7 @@ static int apply_microcode(int cpu)
335 (mc_intel->hdr.date >> 16) & 0xff); 330 (mc_intel->hdr.date >> 16) & 0xff);
336 331
337 uci->cpu_sig.rev = val[1]; 332 uci->cpu_sig.rev = val[1];
333 c->microcode = val[1];
338 334
339 return 0; 335 return 0;
340} 336}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
new file mode 100644
index 000000000000..e88f37b58ddd
--- /dev/null
+++ b/arch/x86/kernel/nmi.c
@@ -0,0 +1,435 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 * Copyright (C) 2011 Don Zickus Red Hat, Inc.
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 */
9
10/*
11 * Handle hardware traps and faults.
12 */
13#include <linux/spinlock.h>
14#include <linux/kprobes.h>
15#include <linux/kdebug.h>
16#include <linux/nmi.h>
17#include <linux/delay.h>
18#include <linux/hardirq.h>
19#include <linux/slab.h>
20#include <linux/export.h>
21
22#include <linux/mca.h>
23
24#if defined(CONFIG_EDAC)
25#include <linux/edac.h>
26#endif
27
28#include <linux/atomic.h>
29#include <asm/traps.h>
30#include <asm/mach_traps.h>
31#include <asm/nmi.h>
32#include <asm/x86_init.h>
33
34#define NMI_MAX_NAMELEN 16
35struct nmiaction {
36 struct list_head list;
37 nmi_handler_t handler;
38 unsigned int flags;
39 char *name;
40};
41
42struct nmi_desc {
43 spinlock_t lock;
44 struct list_head head;
45};
46
47static struct nmi_desc nmi_desc[NMI_MAX] =
48{
49 {
50 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
51 .head = LIST_HEAD_INIT(nmi_desc[0].head),
52 },
53 {
54 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
55 .head = LIST_HEAD_INIT(nmi_desc[1].head),
56 },
57
58};
59
60struct nmi_stats {
61 unsigned int normal;
62 unsigned int unknown;
63 unsigned int external;
64 unsigned int swallow;
65};
66
67static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
68
69static int ignore_nmis;
70
71int unknown_nmi_panic;
72/*
73 * Prevent NMI reason port (0x61) being accessed simultaneously, can
74 * only be used in NMI handler.
75 */
76static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
77
78static int __init setup_unknown_nmi_panic(char *str)
79{
80 unknown_nmi_panic = 1;
81 return 1;
82}
83__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
84
85#define nmi_to_desc(type) (&nmi_desc[type])
86
87static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
88{
89 struct nmi_desc *desc = nmi_to_desc(type);
90 struct nmiaction *a;
91 int handled=0;
92
93 rcu_read_lock();
94
95 /*
96 * NMIs are edge-triggered, which means if you have enough
97 * of them concurrently, you can lose some because only one
98 * can be latched at any given time. Walk the whole list
99 * to handle those situations.
100 */
101 list_for_each_entry_rcu(a, &desc->head, list)
102 handled += a->handler(type, regs);
103
104 rcu_read_unlock();
105
106 /* return total number of NMI events handled */
107 return handled;
108}
109
110static int __setup_nmi(unsigned int type, struct nmiaction *action)
111{
112 struct nmi_desc *desc = nmi_to_desc(type);
113 unsigned long flags;
114
115 spin_lock_irqsave(&desc->lock, flags);
116
117 /*
118 * most handlers of type NMI_UNKNOWN never return because
119 * they just assume the NMI is theirs. Just a sanity check
120 * to manage expectations
121 */
122 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
123
124 /*
125 * some handlers need to be executed first otherwise a fake
126 * event confuses some handlers (kdump uses this flag)
127 */
128 if (action->flags & NMI_FLAG_FIRST)
129 list_add_rcu(&action->list, &desc->head);
130 else
131 list_add_tail_rcu(&action->list, &desc->head);
132
133 spin_unlock_irqrestore(&desc->lock, flags);
134 return 0;
135}
136
137static struct nmiaction *__free_nmi(unsigned int type, const char *name)
138{
139 struct nmi_desc *desc = nmi_to_desc(type);
140 struct nmiaction *n;
141 unsigned long flags;
142
143 spin_lock_irqsave(&desc->lock, flags);
144
145 list_for_each_entry_rcu(n, &desc->head, list) {
146 /*
147 * the name passed in to describe the nmi handler
148 * is used as the lookup key
149 */
150 if (!strcmp(n->name, name)) {
151 WARN(in_nmi(),
152 "Trying to free NMI (%s) from NMI context!\n", n->name);
153 list_del_rcu(&n->list);
154 break;
155 }
156 }
157
158 spin_unlock_irqrestore(&desc->lock, flags);
159 synchronize_rcu();
160 return (n);
161}
162
163int register_nmi_handler(unsigned int type, nmi_handler_t handler,
164 unsigned long nmiflags, const char *devname)
165{
166 struct nmiaction *action;
167 int retval = -ENOMEM;
168
169 if (!handler)
170 return -EINVAL;
171
172 action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL);
173 if (!action)
174 goto fail_action;
175
176 action->handler = handler;
177 action->flags = nmiflags;
178 action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL);
179 if (!action->name)
180 goto fail_action_name;
181
182 retval = __setup_nmi(type, action);
183
184 if (retval)
185 goto fail_setup_nmi;
186
187 return retval;
188
189fail_setup_nmi:
190 kfree(action->name);
191fail_action_name:
192 kfree(action);
193fail_action:
194
195 return retval;
196}
197EXPORT_SYMBOL_GPL(register_nmi_handler);
198
199void unregister_nmi_handler(unsigned int type, const char *name)
200{
201 struct nmiaction *a;
202
203 a = __free_nmi(type, name);
204 if (a) {
205 kfree(a->name);
206 kfree(a);
207 }
208}
209
210EXPORT_SYMBOL_GPL(unregister_nmi_handler);
211
212static notrace __kprobes void
213pci_serr_error(unsigned char reason, struct pt_regs *regs)
214{
215 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
216 reason, smp_processor_id());
217
218 /*
219 * On some machines, PCI SERR line is used to report memory
220 * errors. EDAC makes use of it.
221 */
222#if defined(CONFIG_EDAC)
223 if (edac_handler_set()) {
224 edac_atomic_assert_error();
225 return;
226 }
227#endif
228
229 if (panic_on_unrecovered_nmi)
230 panic("NMI: Not continuing");
231
232 pr_emerg("Dazed and confused, but trying to continue\n");
233
234 /* Clear and disable the PCI SERR error line. */
235 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
236 outb(reason, NMI_REASON_PORT);
237}
238
239static notrace __kprobes void
240io_check_error(unsigned char reason, struct pt_regs *regs)
241{
242 unsigned long i;
243
244 pr_emerg(
245 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
246 reason, smp_processor_id());
247 show_registers(regs);
248
249 if (panic_on_io_nmi)
250 panic("NMI IOCK error: Not continuing");
251
252 /* Re-enable the IOCK line, wait for a few seconds */
253 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
254 outb(reason, NMI_REASON_PORT);
255
256 i = 20000;
257 while (--i) {
258 touch_nmi_watchdog();
259 udelay(100);
260 }
261
262 reason &= ~NMI_REASON_CLEAR_IOCHK;
263 outb(reason, NMI_REASON_PORT);
264}
265
266static notrace __kprobes void
267unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
268{
269 int handled;
270
271 /*
272 * Use 'false' as back-to-back NMIs are dealt with one level up.
273 * Of course this makes having multiple 'unknown' handlers useless
274 * as only the first one is ever run (unless it can actually determine
275 * if it caused the NMI)
276 */
277 handled = nmi_handle(NMI_UNKNOWN, regs, false);
278 if (handled) {
279 __this_cpu_add(nmi_stats.unknown, handled);
280 return;
281 }
282
283 __this_cpu_add(nmi_stats.unknown, 1);
284
285#ifdef CONFIG_MCA
286 /*
287 * Might actually be able to figure out what the guilty party
288 * is:
289 */
290 if (MCA_bus) {
291 mca_handle_nmi();
292 return;
293 }
294#endif
295 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
296 reason, smp_processor_id());
297
298 pr_emerg("Do you have a strange power saving mode enabled?\n");
299 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
300 panic("NMI: Not continuing");
301
302 pr_emerg("Dazed and confused, but trying to continue\n");
303}
304
305static DEFINE_PER_CPU(bool, swallow_nmi);
306static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
307
308static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
309{
310 unsigned char reason = 0;
311 int handled;
312 bool b2b = false;
313
314 /*
315 * CPU-specific NMI must be processed before non-CPU-specific
316 * NMI, otherwise we may lose it, because the CPU-specific
317 * NMI can not be detected/processed on other CPUs.
318 */
319
320 /*
321 * Back-to-back NMIs are interesting because they can either
322 * be two NMI or more than two NMIs (any thing over two is dropped
323 * due to NMI being edge-triggered). If this is the second half
324 * of the back-to-back NMI, assume we dropped things and process
325 * more handlers. Otherwise reset the 'swallow' NMI behaviour
326 */
327 if (regs->ip == __this_cpu_read(last_nmi_rip))
328 b2b = true;
329 else
330 __this_cpu_write(swallow_nmi, false);
331
332 __this_cpu_write(last_nmi_rip, regs->ip);
333
334 handled = nmi_handle(NMI_LOCAL, regs, b2b);
335 __this_cpu_add(nmi_stats.normal, handled);
336 if (handled) {
337 /*
338 * There are cases when a NMI handler handles multiple
339 * events in the current NMI. One of these events may
340 * be queued for in the next NMI. Because the event is
341 * already handled, the next NMI will result in an unknown
342 * NMI. Instead lets flag this for a potential NMI to
343 * swallow.
344 */
345 if (handled > 1)
346 __this_cpu_write(swallow_nmi, true);
347 return;
348 }
349
350 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
351 raw_spin_lock(&nmi_reason_lock);
352 reason = x86_platform.get_nmi_reason();
353
354 if (reason & NMI_REASON_MASK) {
355 if (reason & NMI_REASON_SERR)
356 pci_serr_error(reason, regs);
357 else if (reason & NMI_REASON_IOCHK)
358 io_check_error(reason, regs);
359#ifdef CONFIG_X86_32
360 /*
361 * Reassert NMI in case it became active
362 * meanwhile as it's edge-triggered:
363 */
364 reassert_nmi();
365#endif
366 __this_cpu_add(nmi_stats.external, 1);
367 raw_spin_unlock(&nmi_reason_lock);
368 return;
369 }
370 raw_spin_unlock(&nmi_reason_lock);
371
372 /*
373 * Only one NMI can be latched at a time. To handle
374 * this we may process multiple nmi handlers at once to
375 * cover the case where an NMI is dropped. The downside
376 * to this approach is we may process an NMI prematurely,
377 * while its real NMI is sitting latched. This will cause
378 * an unknown NMI on the next run of the NMI processing.
379 *
380 * We tried to flag that condition above, by setting the
381 * swallow_nmi flag when we process more than one event.
382 * This condition is also only present on the second half
383 * of a back-to-back NMI, so we flag that condition too.
384 *
385 * If both are true, we assume we already processed this
386 * NMI previously and we swallow it. Otherwise we reset
387 * the logic.
388 *
389 * There are scenarios where we may accidentally swallow
390 * a 'real' unknown NMI. For example, while processing
391 * a perf NMI another perf NMI comes in along with a
392 * 'real' unknown NMI. These two NMIs get combined into
393 * one (as descibed above). When the next NMI gets
394 * processed, it will be flagged by perf as handled, but
395 * noone will know that there was a 'real' unknown NMI sent
396 * also. As a result it gets swallowed. Or if the first
397 * perf NMI returns two events handled then the second
398 * NMI will get eaten by the logic below, again losing a
399 * 'real' unknown NMI. But this is the best we can do
400 * for now.
401 */
402 if (b2b && __this_cpu_read(swallow_nmi))
403 __this_cpu_add(nmi_stats.swallow, 1);
404 else
405 unknown_nmi_error(reason, regs);
406}
407
408dotraplinkage notrace __kprobes void
409do_nmi(struct pt_regs *regs, long error_code)
410{
411 nmi_enter();
412
413 inc_irq_stat(__nmi_count);
414
415 if (!ignore_nmis)
416 default_do_nmi(regs);
417
418 nmi_exit();
419}
420
421void stop_nmi(void)
422{
423 ignore_nmis++;
424}
425
426void restart_nmi(void)
427{
428 ignore_nmis--;
429}
430
431/* reset the back-to-back NMI logic */
432void local_touch_nmi(void)
433{
434 __this_cpu_write(last_nmi_rip, 0);
435}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 613a7931ecc1..d90272e6bc40 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
307 .paravirt_enabled = 0, 307 .paravirt_enabled = 0,
308 .kernel_rpl = 0, 308 .kernel_rpl = 0,
309 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ 309 .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
310
311#ifdef CONFIG_X86_64
312 .extra_user_64bit_cs = __USER_CS,
313#endif
310}; 314};
311 315
312struct pv_init_ops pv_init_ops = { 316struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e8c33a302006..726494b58345 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1553,7 +1553,7 @@ static void __init calgary_fixup_one_tce_space(struct pci_dev *dev)
1553 continue; 1553 continue;
1554 1554
1555 /* cover the whole region */ 1555 /* cover the whole region */
1556 npages = (r->end - r->start) >> PAGE_SHIFT; 1556 npages = resource_size(r) >> PAGE_SHIFT;
1557 npages++; 1557 npages++;
1558 1558
1559 iommu_range_reserve(tbl, r->start, npages); 1559 iommu_range_reserve(tbl, r->start, npages);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index b49d00da2aed..80dc793b3f63 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,6 +1,7 @@
1#include <linux/dma-mapping.h> 1#include <linux/dma-mapping.h>
2#include <linux/dma-debug.h> 2#include <linux/dma-debug.h>
3#include <linux/dmar.h> 3#include <linux/dmar.h>
4#include <linux/export.h>
4#include <linux/bootmem.h> 5#include <linux/bootmem.h>
5#include <linux/gfp.h> 6#include <linux/gfp.h>
6#include <linux/pci.h> 7#include <linux/pci.h>
@@ -117,8 +118,8 @@ again:
117} 118}
118 119
119/* 120/*
120 * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter 121 * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
121 * documentation. 122 * parameter documentation.
122 */ 123 */
123static __init int iommu_setup(char *p) 124static __init int iommu_setup(char *p)
124{ 125{
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index ba0a4cce53be..34e06e84ce31 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -10,9 +10,9 @@
10#include <linux/dmi.h> 10#include <linux/dmi.h>
11#include <linux/pfn.h> 11#include <linux/pfn.h>
12#include <linux/pci.h> 12#include <linux/pci.h>
13#include <asm/pci-direct.h> 13#include <linux/export.h>
14
15 14
15#include <asm/pci-direct.h>
16#include <asm/e820.h> 16#include <asm/e820.h>
17#include <asm/mmzone.h> 17#include <asm/mmzone.h>
18#include <asm/setup.h> 18#include <asm/setup.h>
@@ -234,7 +234,7 @@ void __init probe_roms(void)
234 /* check for extension rom (ignore length byte!) */ 234 /* check for extension rom (ignore length byte!) */
235 rom = isa_bus_to_virt(extension_rom_resource.start); 235 rom = isa_bus_to_virt(extension_rom_resource.start);
236 if (romsignature(rom)) { 236 if (romsignature(rom)) {
237 length = extension_rom_resource.end - extension_rom_resource.start + 1; 237 length = resource_size(&extension_rom_resource);
238 if (romchecksum(rom, length)) { 238 if (romchecksum(rom, length)) {
239 request_resource(&iomem_resource, &extension_rom_resource); 239 request_resource(&iomem_resource, &extension_rom_resource);
240 upper = extension_rom_resource.start; 240 upper = extension_rom_resource.start;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e1ba8cb24e4e..b9b3b1a51643 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -49,7 +49,7 @@ void free_thread_xstate(struct task_struct *tsk)
49void free_thread_info(struct thread_info *ti) 49void free_thread_info(struct thread_info *ti)
50{ 50{
51 free_thread_xstate(ti->task); 51 free_thread_xstate(ti->task);
52 free_pages((unsigned long)ti, get_order(THREAD_SIZE)); 52 free_pages((unsigned long)ti, THREAD_ORDER);
53} 53}
54 54
55void arch_task_cache_init(void) 55void arch_task_cache_init(void)
@@ -438,29 +438,6 @@ void cpu_idle_wait(void)
438} 438}
439EXPORT_SYMBOL_GPL(cpu_idle_wait); 439EXPORT_SYMBOL_GPL(cpu_idle_wait);
440 440
441/*
442 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
443 * which can obviate IPI to trigger checking of need_resched.
444 * We execute MONITOR against need_resched and enter optimized wait state
445 * through MWAIT. Whenever someone changes need_resched, we would be woken
446 * up from MWAIT (without an IPI).
447 *
448 * New with Core Duo processors, MWAIT can take some hints based on CPU
449 * capability.
450 */
451void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
452{
453 if (!need_resched()) {
454 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
455 clflush((void *)&current_thread_info()->flags);
456
457 __monitor((void *)&current_thread_info()->flags, 0, 0);
458 smp_mb();
459 if (!need_resched())
460 __mwait(ax, cx);
461 }
462}
463
464/* Default MONITOR/MWAIT with no hints, used for default C1 state */ 441/* Default MONITOR/MWAIT with no hints, used for default C1 state */
465static void mwait_idle(void) 442static void mwait_idle(void)
466{ 443{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc59067b..795b79f984c2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
38#include <linux/uaccess.h> 38#include <linux/uaccess.h>
39#include <linux/io.h> 39#include <linux/io.h>
40#include <linux/kdebug.h> 40#include <linux/kdebug.h>
41#include <linux/cpuidle.h>
41 42
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
@@ -56,6 +57,7 @@
56#include <asm/idle.h> 57#include <asm/idle.h>
57#include <asm/syscalls.h> 58#include <asm/syscalls.h>
58#include <asm/debugreg.h> 59#include <asm/debugreg.h>
60#include <asm/nmi.h>
59 61
60asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
61 63
@@ -106,10 +108,12 @@ void cpu_idle(void)
106 if (cpu_is_offline(cpu)) 108 if (cpu_is_offline(cpu))
107 play_dead(); 109 play_dead();
108 110
111 local_touch_nmi();
109 local_irq_disable(); 112 local_irq_disable();
110 /* Don't trace irqs off for idle */ 113 /* Don't trace irqs off for idle */
111 stop_critical_timings(); 114 stop_critical_timings();
112 pm_idle(); 115 if (cpuidle_idle_call())
116 pm_idle();
113 start_critical_timings(); 117 start_critical_timings();
114 } 118 }
115 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
@@ -260,7 +264,7 @@ EXPORT_SYMBOL_GPL(start_thread);
260 264
261 265
262/* 266/*
263 * switch_to(x,yn) should switch tasks from x to y. 267 * switch_to(x,y) should switch tasks from x to y.
264 * 268 *
265 * We fsave/fwait so that an exception goes off at the right time 269 * We fsave/fwait so that an exception goes off at the right time
266 * (as a call from the fsave or fwait in effect) rather than to 270 * (as a call from the fsave or fwait in effect) rather than to
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca6f7ab8df33..3bd7e6eebf31 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
37#include <linux/uaccess.h> 37#include <linux/uaccess.h>
38#include <linux/io.h> 38#include <linux/io.h>
39#include <linux/ftrace.h> 39#include <linux/ftrace.h>
40#include <linux/cpuidle.h>
40 41
41#include <asm/pgtable.h> 42#include <asm/pgtable.h>
42#include <asm/system.h> 43#include <asm/system.h>
@@ -50,6 +51,7 @@
50#include <asm/idle.h> 51#include <asm/idle.h>
51#include <asm/syscalls.h> 52#include <asm/syscalls.h>
52#include <asm/debugreg.h> 53#include <asm/debugreg.h>
54#include <asm/nmi.h>
53 55
54asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
55 57
@@ -132,11 +134,13 @@ void cpu_idle(void)
132 * from here on, until they go to idle. 134 * from here on, until they go to idle.
133 * Otherwise, idle callbacks can misfire. 135 * Otherwise, idle callbacks can misfire.
134 */ 136 */
137 local_touch_nmi();
135 local_irq_disable(); 138 local_irq_disable();
136 enter_idle(); 139 enter_idle();
137 /* Don't trace irqs off for idle */ 140 /* Don't trace irqs off for idle */
138 stop_critical_timings(); 141 stop_critical_timings();
139 pm_idle(); 142 if (cpuidle_idle_call())
143 pm_idle();
140 start_critical_timings(); 144 start_critical_timings();
141 145
142 /* In many cases the interrupt that ended idle 146 /* In many cases the interrupt that ended idle
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9242436e9937..e334be1182b9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -464,7 +464,7 @@ static inline void kb_wait(void)
464 } 464 }
465} 465}
466 466
467static void vmxoff_nmi(int cpu, struct die_args *args) 467static void vmxoff_nmi(int cpu, struct pt_regs *regs)
468{ 468{
469 cpu_emergency_vmxoff(); 469 cpu_emergency_vmxoff();
470} 470}
@@ -736,14 +736,10 @@ static nmi_shootdown_cb shootdown_callback;
736 736
737static atomic_t waiting_for_crash_ipi; 737static atomic_t waiting_for_crash_ipi;
738 738
739static int crash_nmi_callback(struct notifier_block *self, 739static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
740 unsigned long val, void *data)
741{ 740{
742 int cpu; 741 int cpu;
743 742
744 if (val != DIE_NMI)
745 return NOTIFY_OK;
746
747 cpu = raw_smp_processor_id(); 743 cpu = raw_smp_processor_id();
748 744
749 /* Don't do anything if this handler is invoked on crashing cpu. 745 /* Don't do anything if this handler is invoked on crashing cpu.
@@ -751,10 +747,10 @@ static int crash_nmi_callback(struct notifier_block *self,
751 * an NMI if system was initially booted with nmi_watchdog parameter. 747 * an NMI if system was initially booted with nmi_watchdog parameter.
752 */ 748 */
753 if (cpu == crashing_cpu) 749 if (cpu == crashing_cpu)
754 return NOTIFY_STOP; 750 return NMI_HANDLED;
755 local_irq_disable(); 751 local_irq_disable();
756 752
757 shootdown_callback(cpu, (struct die_args *)data); 753 shootdown_callback(cpu, regs);
758 754
759 atomic_dec(&waiting_for_crash_ipi); 755 atomic_dec(&waiting_for_crash_ipi);
760 /* Assume hlt works */ 756 /* Assume hlt works */
@@ -762,7 +758,7 @@ static int crash_nmi_callback(struct notifier_block *self,
762 for (;;) 758 for (;;)
763 cpu_relax(); 759 cpu_relax();
764 760
765 return 1; 761 return NMI_HANDLED;
766} 762}
767 763
768static void smp_send_nmi_allbutself(void) 764static void smp_send_nmi_allbutself(void)
@@ -770,12 +766,6 @@ static void smp_send_nmi_allbutself(void)
770 apic->send_IPI_allbutself(NMI_VECTOR); 766 apic->send_IPI_allbutself(NMI_VECTOR);
771} 767}
772 768
773static struct notifier_block crash_nmi_nb = {
774 .notifier_call = crash_nmi_callback,
775 /* we want to be the first one called */
776 .priority = NMI_LOCAL_HIGH_PRIOR+1,
777};
778
779/* Halt all other CPUs, calling the specified function on each of them 769/* Halt all other CPUs, calling the specified function on each of them
780 * 770 *
781 * This function can be used to halt all other CPUs on crash 771 * This function can be used to halt all other CPUs on crash
@@ -794,7 +784,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
794 784
795 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); 785 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
796 /* Would it be better to replace the trap vector here? */ 786 /* Would it be better to replace the trap vector here? */
797 if (register_die_notifier(&crash_nmi_nb)) 787 if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
788 NMI_FLAG_FIRST, "crash"))
798 return; /* return what? */ 789 return; /* return what? */
799 /* Ensure the new callback function is set before sending 790 /* Ensure the new callback function is set before sending
800 * out the NMI 791 * out the NMI
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 3f2ad2640d85..348ce016a835 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -5,6 +5,7 @@
5#include <linux/mc146818rtc.h> 5#include <linux/mc146818rtc.h>
6#include <linux/acpi.h> 6#include <linux/acpi.h>
7#include <linux/bcd.h> 7#include <linux/bcd.h>
8#include <linux/export.h>
8#include <linux/pnp.h> 9#include <linux/pnp.h>
9#include <linux/of.h> 10#include <linux/of.h>
10 11
@@ -42,8 +43,11 @@ int mach_set_rtc_mmss(unsigned long nowtime)
42{ 43{
43 int real_seconds, real_minutes, cmos_minutes; 44 int real_seconds, real_minutes, cmos_minutes;
44 unsigned char save_control, save_freq_select; 45 unsigned char save_control, save_freq_select;
46 unsigned long flags;
45 int retval = 0; 47 int retval = 0;
46 48
49 spin_lock_irqsave(&rtc_lock, flags);
50
47 /* tell the clock it's being set */ 51 /* tell the clock it's being set */
48 save_control = CMOS_READ(RTC_CONTROL); 52 save_control = CMOS_READ(RTC_CONTROL);
49 CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); 53 CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
@@ -93,12 +97,17 @@ int mach_set_rtc_mmss(unsigned long nowtime)
93 CMOS_WRITE(save_control, RTC_CONTROL); 97 CMOS_WRITE(save_control, RTC_CONTROL);
94 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); 98 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
95 99
100 spin_unlock_irqrestore(&rtc_lock, flags);
101
96 return retval; 102 return retval;
97} 103}
98 104
99unsigned long mach_get_cmos_time(void) 105unsigned long mach_get_cmos_time(void)
100{ 106{
101 unsigned int status, year, mon, day, hour, min, sec, century = 0; 107 unsigned int status, year, mon, day, hour, min, sec, century = 0;
108 unsigned long flags;
109
110 spin_lock_irqsave(&rtc_lock, flags);
102 111
103 /* 112 /*
104 * If UIP is clear, then we have >= 244 microseconds before 113 * If UIP is clear, then we have >= 244 microseconds before
@@ -125,6 +134,8 @@ unsigned long mach_get_cmos_time(void)
125 status = CMOS_READ(RTC_CONTROL); 134 status = CMOS_READ(RTC_CONTROL);
126 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); 135 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
127 136
137 spin_unlock_irqrestore(&rtc_lock, flags);
138
128 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { 139 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
129 sec = bcd2bin(sec); 140 sec = bcd2bin(sec);
130 min = bcd2bin(min); 141 min = bcd2bin(min);
@@ -169,24 +180,15 @@ EXPORT_SYMBOL(rtc_cmos_write);
169 180
170int update_persistent_clock(struct timespec now) 181int update_persistent_clock(struct timespec now)
171{ 182{
172 unsigned long flags; 183 return x86_platform.set_wallclock(now.tv_sec);
173 int retval;
174
175 spin_lock_irqsave(&rtc_lock, flags);
176 retval = x86_platform.set_wallclock(now.tv_sec);
177 spin_unlock_irqrestore(&rtc_lock, flags);
178
179 return retval;
180} 184}
181 185
182/* not static: needed by APM */ 186/* not static: needed by APM */
183void read_persistent_clock(struct timespec *ts) 187void read_persistent_clock(struct timespec *ts)
184{ 188{
185 unsigned long retval, flags; 189 unsigned long retval;
186 190
187 spin_lock_irqsave(&rtc_lock, flags);
188 retval = x86_platform.get_wallclock(); 191 retval = x86_platform.get_wallclock();
189 spin_unlock_irqrestore(&rtc_lock, flags);
190 192
191 ts->tv_sec = retval; 193 ts->tv_sec = retval;
192 ts->tv_nsec = 0; 194 ts->tv_nsec = 0;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index afaf38447ef5..cf0ef986cb6d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1045,6 +1045,8 @@ void __init setup_arch(char **cmdline_p)
1045 1045
1046 x86_init.timers.wallclock_init(); 1046 x86_init.timers.wallclock_init();
1047 1047
1048 x86_platform.wallclock_init();
1049
1048 mcheck_init(); 1050 mcheck_init();
1049 1051
1050 arch_init_ideal_nops(); 1052 arch_init_ideal_nops();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 013e7eba83bb..16204dc15484 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -16,6 +16,7 @@
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/delay.h> 17#include <linux/delay.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/export.h>
19#include <linux/kernel_stat.h> 20#include <linux/kernel_stat.h>
20#include <linux/mc146818rtc.h> 21#include <linux/mc146818rtc.h>
21#include <linux/cache.h> 22#include <linux/cache.h>
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7977f0cfe339..c346d1161488 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
74 74
75#ifdef CONFIG_X86_64 75#ifdef CONFIG_X86_64
76 case 0x40 ... 0x4f: 76 case 0x40 ... 0x4f:
77 if (regs->cs != __USER_CS) 77 if (!user_64bit_mode(regs))
78 /* 32-bit mode: register increment */ 78 /* 32-bit mode: register increment */
79 return 0; 79 return 0;
80 /* 64-bit mode: REX prefix */ 80 /* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index ff14a5044ce6..051489082d59 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -14,10 +14,73 @@
14#include <linux/personality.h> 14#include <linux/personality.h>
15#include <linux/random.h> 15#include <linux/random.h>
16#include <linux/uaccess.h> 16#include <linux/uaccess.h>
17#include <linux/elf.h>
17 18
18#include <asm/ia32.h> 19#include <asm/ia32.h>
19#include <asm/syscalls.h> 20#include <asm/syscalls.h>
20 21
22/*
23 * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
24 *
25 * @flags denotes the allocation direction - bottomup or topdown -
26 * or vDSO; see call sites below.
27 */
28unsigned long align_addr(unsigned long addr, struct file *filp,
29 enum align_flags flags)
30{
31 unsigned long tmp_addr;
32
33 /* handle 32- and 64-bit case with a single conditional */
34 if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
35 return addr;
36
37 if (!(current->flags & PF_RANDOMIZE))
38 return addr;
39
40 if (!((flags & ALIGN_VDSO) || filp))
41 return addr;
42
43 tmp_addr = addr;
44
45 /*
46 * We need an address which is <= than the original
47 * one only when in topdown direction.
48 */
49 if (!(flags & ALIGN_TOPDOWN))
50 tmp_addr += va_align.mask;
51
52 tmp_addr &= ~va_align.mask;
53
54 return tmp_addr;
55}
56
57static int __init control_va_addr_alignment(char *str)
58{
59 /* guard against enabling this on other CPU families */
60 if (va_align.flags < 0)
61 return 1;
62
63 if (*str == 0)
64 return 1;
65
66 if (*str == '=')
67 str++;
68
69 if (!strcmp(str, "32"))
70 va_align.flags = ALIGN_VA_32;
71 else if (!strcmp(str, "64"))
72 va_align.flags = ALIGN_VA_64;
73 else if (!strcmp(str, "off"))
74 va_align.flags = 0;
75 else if (!strcmp(str, "on"))
76 va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
77 else
78 return 0;
79
80 return 1;
81}
82__setup("align_va_addr", control_va_addr_alignment);
83
21SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, 84SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
22 unsigned long, prot, unsigned long, flags, 85 unsigned long, prot, unsigned long, flags,
23 unsigned long, fd, unsigned long, off) 86 unsigned long, fd, unsigned long, off)
@@ -92,6 +155,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
92 start_addr = addr; 155 start_addr = addr;
93 156
94full_search: 157full_search:
158
159 addr = align_addr(addr, filp, 0);
160
95 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
96 /* At this point: (!vma || addr < vma->vm_end). */ 162 /* At this point: (!vma || addr < vma->vm_end). */
97 if (end - len < addr) { 163 if (end - len < addr) {
@@ -117,6 +183,7 @@ full_search:
117 mm->cached_hole_size = vma->vm_start - addr; 183 mm->cached_hole_size = vma->vm_start - addr;
118 184
119 addr = vma->vm_end; 185 addr = vma->vm_end;
186 addr = align_addr(addr, filp, 0);
120 } 187 }
121} 188}
122 189
@@ -161,10 +228,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
161 228
162 /* make sure it can fit in the remaining address space */ 229 /* make sure it can fit in the remaining address space */
163 if (addr > len) { 230 if (addr > len) {
164 vma = find_vma(mm, addr-len); 231 unsigned long tmp_addr = align_addr(addr - len, filp,
165 if (!vma || addr <= vma->vm_start) 232 ALIGN_TOPDOWN);
233
234 vma = find_vma(mm, tmp_addr);
235 if (!vma || tmp_addr + len <= vma->vm_start)
166 /* remember the address as a hint for next time */ 236 /* remember the address as a hint for next time */
167 return mm->free_area_cache = addr-len; 237 return mm->free_area_cache = tmp_addr;
168 } 238 }
169 239
170 if (mm->mmap_base < len) 240 if (mm->mmap_base < len)
@@ -173,6 +243,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
173 addr = mm->mmap_base-len; 243 addr = mm->mmap_base-len;
174 244
175 do { 245 do {
246 addr = align_addr(addr, filp, ALIGN_TOPDOWN);
247
176 /* 248 /*
177 * Lookup failure means no vma is above this address, 249 * Lookup failure means no vma is above this address,
178 * else if new region fits below vma->vm_start, 250 * else if new region fits below vma->vm_start,
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index fbb0a045a1a2..9a0e31293920 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
168 .long ptregs_vm86 168 .long ptregs_vm86
169 .long sys_ni_syscall /* Old sys_query_module */ 169 .long sys_ni_syscall /* Old sys_query_module */
170 .long sys_poll 170 .long sys_poll
171 .long sys_nfsservctl 171 .long sys_ni_syscall /* Old nfsservctl */
172 .long sys_setresgid16 /* 170 */ 172 .long sys_setresgid16 /* 170 */
173 .long sys_getresgid16 173 .long sys_getresgid16
174 .long sys_prctl 174 .long sys_prctl
@@ -346,3 +346,5 @@ ENTRY(sys_call_table)
346 .long sys_syncfs 346 .long sys_syncfs
347 .long sys_sendmmsg /* 345 */ 347 .long sys_sendmmsg /* 345 */
348 .long sys_setns 348 .long sys_setns
349 .long sys_process_vm_readv
350 .long sys_process_vm_writev
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e07a2fc876b9..e2410e27f97e 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -22,6 +22,7 @@
22#include <linux/dma_remapping.h> 22#include <linux/dma_remapping.h>
23#include <linux/init_task.h> 23#include <linux/init_task.h>
24#include <linux/spinlock.h> 24#include <linux/spinlock.h>
25#include <linux/export.h>
25#include <linux/delay.h> 26#include <linux/delay.h>
26#include <linux/sched.h> 27#include <linux/sched.h>
27#include <linux/init.h> 28#include <linux/init.h>
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 5a64d057be57..dd5fbf4101fc 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -13,6 +13,7 @@
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/i8253.h> 14#include <linux/i8253.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/export.h>
16#include <linux/mca.h> 17#include <linux/mca.h>
17 18
18#include <asm/vsyscall.h> 19#include <asm/vsyscall.h>
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 8927486a4649..76ee97709a00 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -26,6 +26,7 @@
26 * Send feedback to <colpatch@us.ibm.com> 26 * Send feedback to <colpatch@us.ibm.com>
27 */ 27 */
28#include <linux/nodemask.h> 28#include <linux/nodemask.h>
29#include <linux/export.h>
29#include <linux/mmzone.h> 30#include <linux/mmzone.h>
30#include <linux/init.h> 31#include <linux/init.h>
31#include <linux/smp.h> 32#include <linux/smp.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fbc097a085ca..a8e3eb83466c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -49,7 +49,7 @@
49#include <asm/stacktrace.h> 49#include <asm/stacktrace.h>
50#include <asm/processor.h> 50#include <asm/processor.h>
51#include <asm/debugreg.h> 51#include <asm/debugreg.h>
52#include <asm/atomic.h> 52#include <linux/atomic.h>
53#include <asm/system.h> 53#include <asm/system.h>
54#include <asm/traps.h> 54#include <asm/traps.h>
55#include <asm/desc.h> 55#include <asm/desc.h>
@@ -81,15 +81,6 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
81DECLARE_BITMAP(used_vectors, NR_VECTORS); 81DECLARE_BITMAP(used_vectors, NR_VECTORS);
82EXPORT_SYMBOL_GPL(used_vectors); 82EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis;
85
86int unknown_nmi_panic;
87/*
88 * Prevent NMI reason port (0x61) being accessed simultaneously, can
89 * only be used in NMI handler.
90 */
91static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
92
93static inline void conditional_sti(struct pt_regs *regs) 84static inline void conditional_sti(struct pt_regs *regs)
94{ 85{
95 if (regs->flags & X86_EFLAGS_IF) 86 if (regs->flags & X86_EFLAGS_IF)
@@ -307,152 +298,6 @@ gp_in_kernel:
307 die("general protection fault", regs, error_code); 298 die("general protection fault", regs, error_code);
308} 299}
309 300
310static int __init setup_unknown_nmi_panic(char *str)
311{
312 unknown_nmi_panic = 1;
313 return 1;
314}
315__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
316
317static notrace __kprobes void
318pci_serr_error(unsigned char reason, struct pt_regs *regs)
319{
320 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
321 reason, smp_processor_id());
322
323 /*
324 * On some machines, PCI SERR line is used to report memory
325 * errors. EDAC makes use of it.
326 */
327#if defined(CONFIG_EDAC)
328 if (edac_handler_set()) {
329 edac_atomic_assert_error();
330 return;
331 }
332#endif
333
334 if (panic_on_unrecovered_nmi)
335 panic("NMI: Not continuing");
336
337 pr_emerg("Dazed and confused, but trying to continue\n");
338
339 /* Clear and disable the PCI SERR error line. */
340 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
341 outb(reason, NMI_REASON_PORT);
342}
343
344static notrace __kprobes void
345io_check_error(unsigned char reason, struct pt_regs *regs)
346{
347 unsigned long i;
348
349 pr_emerg(
350 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
351 reason, smp_processor_id());
352 show_registers(regs);
353
354 if (panic_on_io_nmi)
355 panic("NMI IOCK error: Not continuing");
356
357 /* Re-enable the IOCK line, wait for a few seconds */
358 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
359 outb(reason, NMI_REASON_PORT);
360
361 i = 20000;
362 while (--i) {
363 touch_nmi_watchdog();
364 udelay(100);
365 }
366
367 reason &= ~NMI_REASON_CLEAR_IOCHK;
368 outb(reason, NMI_REASON_PORT);
369}
370
371static notrace __kprobes void
372unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
373{
374 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
375 NOTIFY_STOP)
376 return;
377#ifdef CONFIG_MCA
378 /*
379 * Might actually be able to figure out what the guilty party
380 * is:
381 */
382 if (MCA_bus) {
383 mca_handle_nmi();
384 return;
385 }
386#endif
387 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
388 reason, smp_processor_id());
389
390 pr_emerg("Do you have a strange power saving mode enabled?\n");
391 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
392 panic("NMI: Not continuing");
393
394 pr_emerg("Dazed and confused, but trying to continue\n");
395}
396
397static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
398{
399 unsigned char reason = 0;
400
401 /*
402 * CPU-specific NMI must be processed before non-CPU-specific
403 * NMI, otherwise we may lose it, because the CPU-specific
404 * NMI can not be detected/processed on other CPUs.
405 */
406 if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
407 return;
408
409 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
410 raw_spin_lock(&nmi_reason_lock);
411 reason = get_nmi_reason();
412
413 if (reason & NMI_REASON_MASK) {
414 if (reason & NMI_REASON_SERR)
415 pci_serr_error(reason, regs);
416 else if (reason & NMI_REASON_IOCHK)
417 io_check_error(reason, regs);
418#ifdef CONFIG_X86_32
419 /*
420 * Reassert NMI in case it became active
421 * meanwhile as it's edge-triggered:
422 */
423 reassert_nmi();
424#endif
425 raw_spin_unlock(&nmi_reason_lock);
426 return;
427 }
428 raw_spin_unlock(&nmi_reason_lock);
429
430 unknown_nmi_error(reason, regs);
431}
432
433dotraplinkage notrace __kprobes void
434do_nmi(struct pt_regs *regs, long error_code)
435{
436 nmi_enter();
437
438 inc_irq_stat(__nmi_count);
439
440 if (!ignore_nmis)
441 default_do_nmi(regs);
442
443 nmi_exit();
444}
445
446void stop_nmi(void)
447{
448 ignore_nmis++;
449}
450
451void restart_nmi(void)
452{
453 ignore_nmis--;
454}
455
456/* May run on IST stack. */ 301/* May run on IST stack. */
457dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 302dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
458{ 303{
@@ -872,12 +717,6 @@ void __init trap_init(void)
872 set_bit(SYSCALL_VECTOR, used_vectors); 717 set_bit(SYSCALL_VECTOR, used_vectors);
873#endif 718#endif
874 719
875#ifdef CONFIG_X86_64
876 BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
877 set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
878 set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
879#endif
880
881 /* 720 /*
882 * Should be a barrier for any external CPU state: 721 * Should be a barrier for any external CPU state:
883 */ 722 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4aa9c54a9b76..0f703f10901a 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(6); /* RW_ */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
76 percpu PT_LOAD FLAGS(6); /* RW_ */ 75 percpu PT_LOAD FLAGS(6); /* RW_ */
77#endif 76#endif
@@ -154,44 +153,16 @@ SECTIONS
154 153
155#ifdef CONFIG_X86_64 154#ifdef CONFIG_X86_64
156 155
157#define VSYSCALL_ADDR (-10*1024*1024) 156 . = ALIGN(PAGE_SIZE);
158
159#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
160#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
161
162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
164
165 . = ALIGN(4096);
166 __vsyscall_0 = .;
167
168 . = VSYSCALL_ADDR;
169 .vsyscall : AT(VLOAD(.vsyscall)) {
170 *(.vsyscall_0)
171
172 . = 1024;
173 *(.vsyscall_1)
174
175 . = 2048;
176 *(.vsyscall_2)
177
178 . = 4096; /* Pad the whole page. */
179 } :user =0xcc
180 . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
181
182#undef VSYSCALL_ADDR
183#undef VLOAD_OFFSET
184#undef VLOAD
185#undef VVIRT_OFFSET
186#undef VVIRT
187
188 __vvar_page = .; 157 __vvar_page = .;
189 158
190 .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { 159 .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
160 /* work around gold bug 13023 */
161 __vvar_beginning_hack = .;
191 162
192 /* Place all vvars at the offsets in asm/vvar.h. */ 163 /* Place all vvars at the offsets in asm/vvar.h. */
193#define EMIT_VVAR(name, offset) \ 164#define EMIT_VVAR(name, offset) \
194 . = offset; \ 165 . = __vvar_beginning_hack + offset; \
195 *(.vvar_ ## name) 166 *(.vvar_ ## name)
196#define __VVAR_KERNEL_LDS 167#define __VVAR_KERNEL_LDS
197#include <asm/vvar.h> 168#include <asm/vvar.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dda7dff9cef7..e4d4a22e8b94 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,9 +18,6 @@
18 * use the vDSO. 18 * use the vDSO.
19 */ 19 */
20 20
21/* Disable profiling for userspace code: */
22#define DISABLE_BRANCH_PROFILING
23
24#include <linux/time.h> 21#include <linux/time.h>
25#include <linux/init.h> 22#include <linux/init.h>
26#include <linux/kernel.h> 23#include <linux/kernel.h>
@@ -28,6 +25,7 @@
28#include <linux/seqlock.h> 25#include <linux/seqlock.h>
29#include <linux/jiffies.h> 26#include <linux/jiffies.h>
30#include <linux/sysctl.h> 27#include <linux/sysctl.h>
28#include <linux/topology.h>
31#include <linux/clocksource.h> 29#include <linux/clocksource.h>
32#include <linux/getcpu.h> 30#include <linux/getcpu.h>
33#include <linux/cpu.h> 31#include <linux/cpu.h>
@@ -50,12 +48,36 @@
50#include <asm/vgtod.h> 48#include <asm/vgtod.h>
51#include <asm/traps.h> 49#include <asm/traps.h>
52 50
51#define CREATE_TRACE_POINTS
52#include "vsyscall_trace.h"
53
53DEFINE_VVAR(int, vgetcpu_mode); 54DEFINE_VVAR(int, vgetcpu_mode);
54DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = 55DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
55{ 56{
56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
57}; 58};
58 59
60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE;
61
62static int __init vsyscall_setup(char *str)
63{
64 if (str) {
65 if (!strcmp("emulate", str))
66 vsyscall_mode = EMULATE;
67 else if (!strcmp("native", str))
68 vsyscall_mode = NATIVE;
69 else if (!strcmp("none", str))
70 vsyscall_mode = NONE;
71 else
72 return -EINVAL;
73
74 return 0;
75 }
76
77 return -EINVAL;
78}
79early_param("vsyscall", vsyscall_setup);
80
59void update_vsyscall_tz(void) 81void update_vsyscall_tz(void)
60{ 82{
61 unsigned long flags; 83 unsigned long flags;
@@ -100,7 +122,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
100 122
101 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", 123 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
102 level, tsk->comm, task_pid_nr(tsk), 124 level, tsk->comm, task_pid_nr(tsk),
103 message, regs->ip - 2, regs->cs, 125 message, regs->ip, regs->cs,
104 regs->sp, regs->ax, regs->si, regs->di); 126 regs->sp, regs->ax, regs->si, regs->di);
105} 127}
106 128
@@ -118,46 +140,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
118 return nr; 140 return nr;
119} 141}
120 142
121void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) 143bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
122{ 144{
123 struct task_struct *tsk; 145 struct task_struct *tsk;
124 unsigned long caller; 146 unsigned long caller;
125 int vsyscall_nr; 147 int vsyscall_nr;
126 long ret; 148 long ret;
127 149
128 local_irq_enable();
129
130 /* 150 /*
131 * Real 64-bit user mode code has cs == __USER_CS. Anything else 151 * No point in checking CS -- the only way to get here is a user mode
132 * is bogus. 152 * trap to a high address, which means that we're in 64-bit user code.
133 */ 153 */
134 if (regs->cs != __USER_CS) {
135 /*
136 * If we trapped from kernel mode, we might as well OOPS now
137 * instead of returning to some random address and OOPSing
138 * then.
139 */
140 BUG_ON(!user_mode(regs));
141 154
142 /* Compat mode and non-compat 32-bit CS should both segfault. */ 155 WARN_ON_ONCE(address != regs->ip);
143 warn_bad_vsyscall(KERN_WARNING, regs, 156
144 "illegal int 0xcc from 32-bit mode"); 157 if (vsyscall_mode == NONE) {
145 goto sigsegv; 158 warn_bad_vsyscall(KERN_INFO, regs,
159 "vsyscall attempted with vsyscall=none");
160 return false;
146 } 161 }
147 162
148 /* 163 vsyscall_nr = addr_to_vsyscall_nr(address);
149 * x86-ism here: regs->ip points to the instruction after the int 0xcc, 164
150 * and int 0xcc is two bytes long. 165 trace_emulate_vsyscall(vsyscall_nr);
151 */ 166
152 vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
153 if (vsyscall_nr < 0) { 167 if (vsyscall_nr < 0) {
154 warn_bad_vsyscall(KERN_WARNING, regs, 168 warn_bad_vsyscall(KERN_WARNING, regs,
155 "illegal int 0xcc (exploit attempt?)"); 169 "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
156 goto sigsegv; 170 goto sigsegv;
157 } 171 }
158 172
159 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { 173 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
160 warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); 174 warn_bad_vsyscall(KERN_WARNING, regs,
175 "vsyscall with bad stack (exploit attempt?)");
161 goto sigsegv; 176 goto sigsegv;
162 } 177 }
163 178
@@ -202,13 +217,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
202 regs->ip = caller; 217 regs->ip = caller;
203 regs->sp += 8; 218 regs->sp += 8;
204 219
205 local_irq_disable(); 220 return true;
206 return;
207 221
208sigsegv: 222sigsegv:
209 regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
210 force_sig(SIGSEGV, current); 223 force_sig(SIGSEGV, current);
211 local_irq_disable(); 224 return true;
212} 225}
213 226
214/* 227/*
@@ -256,15 +269,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
256 269
257void __init map_vsyscall(void) 270void __init map_vsyscall(void)
258{ 271{
259 extern char __vsyscall_0; 272 extern char __vsyscall_page;
260 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 273 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
261 extern char __vvar_page; 274 extern char __vvar_page;
262 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); 275 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
263 276
264 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ 277 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
265 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 278 vsyscall_mode == NATIVE
279 ? PAGE_KERNEL_VSYSCALL
280 : PAGE_KERNEL_VVAR);
281 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
282 (unsigned long)VSYSCALL_START);
283
266 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); 284 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
267 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); 285 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
286 (unsigned long)VVAR_ADDRESS);
268} 287}
269 288
270static int __init vsyscall_init(void) 289static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845eae5ca..c9596a9af159 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10
10#include <asm/irq_vectors.h> 11#include <asm/irq_vectors.h>
12#include <asm/page_types.h>
13#include <asm/unistd_64.h>
14
15__PAGE_ALIGNED_DATA
16 .globl __vsyscall_page
17 .balign PAGE_SIZE, 0xcc
18 .type __vsyscall_page, @object
19__vsyscall_page:
20
21 mov $__NR_gettimeofday, %rax
22 syscall
23 ret
11 24
12/* The unused parts of the page are filled with 0xcc by the linker script. */ 25 .balign 1024, 0xcc
26 mov $__NR_time, %rax
27 syscall
28 ret
13 29
14.section .vsyscall_0, "a" 30 .balign 1024, 0xcc
15ENTRY(vsyscall_0) 31 mov $__NR_getcpu, %rax
16 int $VSYSCALL_EMU_VECTOR 32 syscall
17END(vsyscall_0) 33 ret
18 34
19.section .vsyscall_1, "a" 35 .balign 4096, 0xcc
20ENTRY(vsyscall_1)
21 int $VSYSCALL_EMU_VECTOR
22END(vsyscall_1)
23 36
24.section .vsyscall_2, "a" 37 .size __vsyscall_page, 4096
25ENTRY(vsyscall_2)
26 int $VSYSCALL_EMU_VECTOR
27END(vsyscall_2)
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h
new file mode 100644
index 000000000000..a8b2edec54fe
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM vsyscall
3
4#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
5#define __VSYSCALL_TRACE_H
6
7#include <linux/tracepoint.h>
8
9TRACE_EVENT(emulate_vsyscall,
10
11 TP_PROTO(int nr),
12
13 TP_ARGS(nr),
14
15 TP_STRUCT__entry(__field(int, nr)),
16
17 TP_fast_assign(
18 __entry->nr = nr;
19 ),
20
21 TP_printk("nr = %d", __entry->nr)
22);
23
24#endif
25
26#undef TRACE_INCLUDE_PATH
27#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
28#define TRACE_INCLUDE_FILE vsyscall_trace
29#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 6f164bd5e14d..c1d6cd549397 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -21,12 +21,14 @@
21#include <asm/pat.h> 21#include <asm/pat.h>
22#include <asm/tsc.h> 22#include <asm/tsc.h>
23#include <asm/iommu.h> 23#include <asm/iommu.h>
24#include <asm/mach_traps.h>
24 25
25void __cpuinit x86_init_noop(void) { } 26void __cpuinit x86_init_noop(void) { }
26void __init x86_init_uint_noop(unsigned int unused) { } 27void __init x86_init_uint_noop(unsigned int unused) { }
27void __init x86_init_pgd_noop(pgd_t *unused) { } 28void __init x86_init_pgd_noop(pgd_t *unused) { }
28int __init iommu_init_noop(void) { return 0; } 29int __init iommu_init_noop(void) { return 0; }
29void iommu_shutdown_noop(void) { } 30void iommu_shutdown_noop(void) { }
31void wallclock_init_noop(void) { }
30 32
31/* 33/*
32 * The platform setup functions are preset with the default functions 34 * The platform setup functions are preset with the default functions
@@ -97,11 +99,13 @@ static int default_i8042_detect(void) { return 1; };
97 99
98struct x86_platform_ops x86_platform = { 100struct x86_platform_ops x86_platform = {
99 .calibrate_tsc = native_calibrate_tsc, 101 .calibrate_tsc = native_calibrate_tsc,
102 .wallclock_init = wallclock_init_noop,
100 .get_wallclock = mach_get_cmos_time, 103 .get_wallclock = mach_get_cmos_time,
101 .set_wallclock = mach_set_rtc_mmss, 104 .set_wallclock = mach_set_rtc_mmss,
102 .iommu_shutdown = iommu_shutdown_noop, 105 .iommu_shutdown = iommu_shutdown_noop,
103 .is_untracked_pat_range = is_ISA_range, 106 .is_untracked_pat_range = is_ISA_range,
104 .nmi_init = default_nmi_init, 107 .nmi_init = default_nmi_init,
108 .get_nmi_reason = default_get_nmi_reason,
105 .i8042_detect = default_i8042_detect 109 .i8042_detect = default_i8042_detect
106}; 110};
107 111