aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c10
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/amd_nb.c8
-rw-r--r--arch/x86/kernel/aperture_64.c4
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c146
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c9
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c294
-rw-r--r--arch/x86/kernel/apic/io_apic.c15
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/check.c34
-rw-r--r--arch/x86/kernel/cpu/amd.c17
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/common.c14
-rw-r--r--arch/x86/kernel/cpu/cpu.h5
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c25
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c36
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c219
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c278
-rw-r--r--arch/x86/kernel/cpu/perf_event.h51
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c29
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c94
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c2
-rw-r--r--arch/x86/kernel/cpu/powerflags.c3
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpuid.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c8
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/e820.c58
-rw-r--r--arch/x86/kernel/entry_32.S4
-rw-r--r--arch/x86/kernel/entry_64.S31
-rw-r--r--arch/x86/kernel/head.c2
-rw-r--r--arch/x86/kernel/head32.c7
-rw-r--r--arch/x86/kernel/head64.c7
-rw-r--r--arch/x86/kernel/hpet.c29
-rw-r--r--arch/x86/kernel/irq.c11
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/kernel/kvmclock.c5
-rw-r--r--arch/x86/kernel/microcode_amd.c209
-rw-r--r--arch/x86/kernel/microcode_core.c91
-rw-r--r--arch/x86/kernel/mpparse.c14
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/process.c10
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c15
-rw-r--r--arch/x86/kernel/ptrace.c3
-rw-r--r--arch/x86/kernel/quirks.c13
-rw-r--r--arch/x86/kernel/reboot.c21
-rw-r--r--arch/x86/kernel/rtc.c5
-rw-r--r--arch/x86/kernel/setup.c23
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/trampoline.c4
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/kernel/tsc.c6
-rw-r--r--arch/x86/kernel/tsc_sync.c4
-rw-r--r--arch/x86/kernel/vsyscall_64.c77
-rw-r--r--arch/x86/kernel/x86_init.c5
68 files changed, 1499 insertions, 635 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4558f0d0822d..ce664f33ea8e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -219,6 +219,8 @@ static int __init
219acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) 219acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
220{ 220{
221 struct acpi_madt_local_x2apic *processor = NULL; 221 struct acpi_madt_local_x2apic *processor = NULL;
222 int apic_id;
223 u8 enabled;
222 224
223 processor = (struct acpi_madt_local_x2apic *)header; 225 processor = (struct acpi_madt_local_x2apic *)header;
224 226
@@ -227,6 +229,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
227 229
228 acpi_table_print_madt_entry(header); 230 acpi_table_print_madt_entry(header);
229 231
232 apic_id = processor->local_apic_id;
233 enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
230#ifdef CONFIG_X86_X2APIC 234#ifdef CONFIG_X86_X2APIC
231 /* 235 /*
232 * We need to register disabled CPU as well to permit 236 * We need to register disabled CPU as well to permit
@@ -235,8 +239,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
235 * to not preallocating memory for all NR_CPUS 239 * to not preallocating memory for all NR_CPUS
236 * when we use CPU hotplug. 240 * when we use CPU hotplug.
237 */ 241 */
238 acpi_register_lapic(processor->local_apic_id, /* APIC ID */ 242 if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled)
239 processor->lapic_flags & ACPI_MADT_ENABLED); 243 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
244 else
245 acpi_register_lapic(apic_id, enabled);
240#else 246#else
241 printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); 247 printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
242#endif 248#endif
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c63822816249..1f84794f0759 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -738,5 +738,5 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
738 738
739 atomic_set(&stop_machine_first, 1); 739 atomic_set(&stop_machine_first, 1);
740 wrote_text = 0; 740 wrote_text = 0;
741 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 741 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
742} 742}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4c39baa8facc..013c1810ce72 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -123,16 +123,14 @@ int amd_get_subcaches(int cpu)
123{ 123{
124 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; 124 struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
125 unsigned int mask; 125 unsigned int mask;
126 int cuid = 0; 126 int cuid;
127 127
128 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) 128 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
129 return 0; 129 return 0;
130 130
131 pci_read_config_dword(link, 0x1d4, &mask); 131 pci_read_config_dword(link, 0x1d4, &mask);
132 132
133#ifdef CONFIG_SMP
134 cuid = cpu_data(cpu).compute_unit_id; 133 cuid = cpu_data(cpu).compute_unit_id;
135#endif
136 return (mask >> (4 * cuid)) & 0xf; 134 return (mask >> (4 * cuid)) & 0xf;
137} 135}
138 136
@@ -141,7 +139,7 @@ int amd_set_subcaches(int cpu, int mask)
141 static unsigned int reset, ban; 139 static unsigned int reset, ban;
142 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); 140 struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
143 unsigned int reg; 141 unsigned int reg;
144 int cuid = 0; 142 int cuid;
145 143
146 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) 144 if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
147 return -EINVAL; 145 return -EINVAL;
@@ -159,9 +157,7 @@ int amd_set_subcaches(int cpu, int mask)
159 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); 157 pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
160 } 158 }
161 159
162#ifdef CONFIG_SMP
163 cuid = cpu_data(cpu).compute_unit_id; 160 cuid = cpu_data(cpu).compute_unit_id;
164#endif
165 mask <<= 4 * cuid; 161 mask <<= 4 * cuid;
166 mask |= (0xf ^ (1 << cuid)) << 26; 162 mask |= (0xf ^ (1 << cuid)) << 26;
167 163
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 3d2661ca6542..6e76c191a835 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
88 */ 88 */
89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR, 89 addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
90 aper_size, aper_size); 90 aper_size, aper_size);
91 if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) { 91 if (!addr || addr + aper_size > GART_MAX_ADDR) {
92 printk(KERN_ERR 92 printk(KERN_ERR
93 "Cannot allocate aperture memory hole (%lx,%uK)\n", 93 "Cannot allocate aperture memory hole (%lx,%uK)\n",
94 addr, aper_size>>10); 94 addr, aper_size>>10);
95 return 0; 95 return 0;
96 } 96 }
97 memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); 97 memblock_reserve(addr, aper_size);
98 /* 98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the 99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping. 100 * kernel direct mapping.
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 767fd04f2843..0ae0323b1f9c 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP) += ipi.o
10 10
11ifeq ($(CONFIG_X86_64),y) 11ifeq ($(CONFIG_X86_64),y)
12# APIC probe will depend on the listing order here 12# APIC probe will depend on the listing order here
13obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o
13obj-$(CONFIG_X86_UV) += x2apic_uv_x.o 14obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
14obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o 15obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
15obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o 16obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a2fd72e0ab35..2eec05b6d1b8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -146,16 +146,26 @@ __setup("apicpmtimer", setup_apicpmtimer);
146int x2apic_mode; 146int x2apic_mode;
147#ifdef CONFIG_X86_X2APIC 147#ifdef CONFIG_X86_X2APIC
148/* x2apic enabled before OS handover */ 148/* x2apic enabled before OS handover */
149static int x2apic_preenabled; 149int x2apic_preenabled;
150static int x2apic_disabled;
151static int nox2apic;
150static __init int setup_nox2apic(char *str) 152static __init int setup_nox2apic(char *str)
151{ 153{
152 if (x2apic_enabled()) { 154 if (x2apic_enabled()) {
153 pr_warning("Bios already enabled x2apic, " 155 int apicid = native_apic_msr_read(APIC_ID);
154 "can't enforce nox2apic"); 156
155 return 0; 157 if (apicid >= 255) {
156 } 158 pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
159 apicid);
160 return 0;
161 }
162
163 pr_warning("x2apic already enabled. will disable it\n");
164 } else
165 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
166
167 nox2apic = 1;
157 168
158 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
159 return 0; 169 return 0;
160} 170}
161early_param("nox2apic", setup_nox2apic); 171early_param("nox2apic", setup_nox2apic);
@@ -186,7 +196,7 @@ static struct resource lapic_resource = {
186 .flags = IORESOURCE_MEM | IORESOURCE_BUSY, 196 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
187}; 197};
188 198
189static unsigned int calibration_result; 199unsigned int lapic_timer_frequency = 0;
190 200
191static void apic_pm_activate(void); 201static void apic_pm_activate(void);
192 202
@@ -250,6 +260,7 @@ u32 native_safe_apic_wait_icr_idle(void)
250 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 260 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
251 if (!send_status) 261 if (!send_status)
252 break; 262 break;
263 inc_irq_stat(icr_read_retry_count);
253 udelay(100); 264 udelay(100);
254 } while (timeout++ < 1000); 265 } while (timeout++ < 1000);
255 266
@@ -454,7 +465,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
454 switch (mode) { 465 switch (mode) {
455 case CLOCK_EVT_MODE_PERIODIC: 466 case CLOCK_EVT_MODE_PERIODIC:
456 case CLOCK_EVT_MODE_ONESHOT: 467 case CLOCK_EVT_MODE_ONESHOT:
457 __setup_APIC_LVTT(calibration_result, 468 __setup_APIC_LVTT(lapic_timer_frequency,
458 mode != CLOCK_EVT_MODE_PERIODIC, 1); 469 mode != CLOCK_EVT_MODE_PERIODIC, 1);
459 break; 470 break;
460 case CLOCK_EVT_MODE_UNUSED: 471 case CLOCK_EVT_MODE_UNUSED:
@@ -638,6 +649,25 @@ static int __init calibrate_APIC_clock(void)
638 long delta, deltatsc; 649 long delta, deltatsc;
639 int pm_referenced = 0; 650 int pm_referenced = 0;
640 651
652 /**
653 * check if lapic timer has already been calibrated by platform
654 * specific routine, such as tsc calibration code. if so, we just fill
655 * in the clockevent structure and return.
656 */
657
658 if (lapic_timer_frequency) {
659 apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
660 lapic_timer_frequency);
661 lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
662 TICK_NSEC, lapic_clockevent.shift);
663 lapic_clockevent.max_delta_ns =
664 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
665 lapic_clockevent.min_delta_ns =
666 clockevent_delta2ns(0xF, &lapic_clockevent);
667 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
668 return 0;
669 }
670
641 local_irq_disable(); 671 local_irq_disable();
642 672
643 /* Replace the global interrupt handler */ 673 /* Replace the global interrupt handler */
@@ -679,12 +709,12 @@ static int __init calibrate_APIC_clock(void)
679 lapic_clockevent.min_delta_ns = 709 lapic_clockevent.min_delta_ns =
680 clockevent_delta2ns(0xF, &lapic_clockevent); 710 clockevent_delta2ns(0xF, &lapic_clockevent);
681 711
682 calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; 712 lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
683 713
684 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); 714 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
685 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); 715 apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
686 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", 716 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
687 calibration_result); 717 lapic_timer_frequency);
688 718
689 if (cpu_has_tsc) { 719 if (cpu_has_tsc) {
690 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 720 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
@@ -695,13 +725,13 @@ static int __init calibrate_APIC_clock(void)
695 725
696 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 726 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
697 "%u.%04u MHz.\n", 727 "%u.%04u MHz.\n",
698 calibration_result / (1000000 / HZ), 728 lapic_timer_frequency / (1000000 / HZ),
699 calibration_result % (1000000 / HZ)); 729 lapic_timer_frequency % (1000000 / HZ));
700 730
701 /* 731 /*
702 * Do a sanity check on the APIC calibration result 732 * Do a sanity check on the APIC calibration result
703 */ 733 */
704 if (calibration_result < (1000000 / HZ)) { 734 if (lapic_timer_frequency < (1000000 / HZ)) {
705 local_irq_enable(); 735 local_irq_enable();
706 pr_warning("APIC frequency too slow, disabling apic timer\n"); 736 pr_warning("APIC frequency too slow, disabling apic timer\n");
707 return -1; 737 return -1;
@@ -857,8 +887,8 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
857 * Besides, if we don't timer interrupts ignore the global 887 * Besides, if we don't timer interrupts ignore the global
858 * interrupt lock, which is the WrongThing (tm) to do. 888 * interrupt lock, which is the WrongThing (tm) to do.
859 */ 889 */
860 exit_idle();
861 irq_enter(); 890 irq_enter();
891 exit_idle();
862 local_apic_timer_interrupt(); 892 local_apic_timer_interrupt();
863 irq_exit(); 893 irq_exit();
864 894
@@ -1412,6 +1442,45 @@ void __init bsp_end_local_APIC_setup(void)
1412} 1442}
1413 1443
1414#ifdef CONFIG_X86_X2APIC 1444#ifdef CONFIG_X86_X2APIC
1445/*
1446 * Need to disable xapic and x2apic at the same time and then enable xapic mode
1447 */
1448static inline void __disable_x2apic(u64 msr)
1449{
1450 wrmsrl(MSR_IA32_APICBASE,
1451 msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
1452 wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
1453}
1454
1455static __init void disable_x2apic(void)
1456{
1457 u64 msr;
1458
1459 if (!cpu_has_x2apic)
1460 return;
1461
1462 rdmsrl(MSR_IA32_APICBASE, msr);
1463 if (msr & X2APIC_ENABLE) {
1464 u32 x2apic_id = read_apic_id();
1465
1466 if (x2apic_id >= 255)
1467 panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
1468
1469 pr_info("Disabling x2apic\n");
1470 __disable_x2apic(msr);
1471
1472 if (nox2apic) {
1473 clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
1474 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
1475 }
1476
1477 x2apic_disabled = 1;
1478 x2apic_mode = 0;
1479
1480 register_lapic_address(mp_lapic_addr);
1481 }
1482}
1483
1415void check_x2apic(void) 1484void check_x2apic(void)
1416{ 1485{
1417 if (x2apic_enabled()) { 1486 if (x2apic_enabled()) {
@@ -1422,15 +1491,20 @@ void check_x2apic(void)
1422 1491
1423void enable_x2apic(void) 1492void enable_x2apic(void)
1424{ 1493{
1425 int msr, msr2; 1494 u64 msr;
1495
1496 rdmsrl(MSR_IA32_APICBASE, msr);
1497 if (x2apic_disabled) {
1498 __disable_x2apic(msr);
1499 return;
1500 }
1426 1501
1427 if (!x2apic_mode) 1502 if (!x2apic_mode)
1428 return; 1503 return;
1429 1504
1430 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1431 if (!(msr & X2APIC_ENABLE)) { 1505 if (!(msr & X2APIC_ENABLE)) {
1432 printk_once(KERN_INFO "Enabling x2apic\n"); 1506 printk_once(KERN_INFO "Enabling x2apic\n");
1433 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2); 1507 wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
1434 } 1508 }
1435} 1509}
1436#endif /* CONFIG_X86_X2APIC */ 1510#endif /* CONFIG_X86_X2APIC */
@@ -1467,25 +1541,34 @@ void __init enable_IR_x2apic(void)
1467 ret = save_ioapic_entries(); 1541 ret = save_ioapic_entries();
1468 if (ret) { 1542 if (ret) {
1469 pr_info("Saving IO-APIC state failed: %d\n", ret); 1543 pr_info("Saving IO-APIC state failed: %d\n", ret);
1470 goto out; 1544 return;
1471 } 1545 }
1472 1546
1473 local_irq_save(flags); 1547 local_irq_save(flags);
1474 legacy_pic->mask_all(); 1548 legacy_pic->mask_all();
1475 mask_ioapic_entries(); 1549 mask_ioapic_entries();
1476 1550
1551 if (x2apic_preenabled && nox2apic)
1552 disable_x2apic();
1553
1477 if (dmar_table_init_ret) 1554 if (dmar_table_init_ret)
1478 ret = -1; 1555 ret = -1;
1479 else 1556 else
1480 ret = enable_IR(); 1557 ret = enable_IR();
1481 1558
1559 if (!x2apic_supported())
1560 goto skip_x2apic;
1561
1482 if (ret < 0) { 1562 if (ret < 0) {
1483 /* IR is required if there is APIC ID > 255 even when running 1563 /* IR is required if there is APIC ID > 255 even when running
1484 * under KVM 1564 * under KVM
1485 */ 1565 */
1486 if (max_physical_apicid > 255 || 1566 if (max_physical_apicid > 255 ||
1487 !hypervisor_x2apic_available()) 1567 !hypervisor_x2apic_available()) {
1488 goto nox2apic; 1568 if (x2apic_preenabled)
1569 disable_x2apic();
1570 goto skip_x2apic;
1571 }
1489 /* 1572 /*
1490 * without IR all CPUs can be addressed by IOAPIC/MSI 1573 * without IR all CPUs can be addressed by IOAPIC/MSI
1491 * only in physical mode 1574 * only in physical mode
@@ -1493,8 +1576,10 @@ void __init enable_IR_x2apic(void)
1493 x2apic_force_phys(); 1576 x2apic_force_phys();
1494 } 1577 }
1495 1578
1496 if (ret == IRQ_REMAP_XAPIC_MODE) 1579 if (ret == IRQ_REMAP_XAPIC_MODE) {
1497 goto nox2apic; 1580 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1581 goto skip_x2apic;
1582 }
1498 1583
1499 x2apic_enabled = 1; 1584 x2apic_enabled = 1;
1500 1585
@@ -1504,22 +1589,11 @@ void __init enable_IR_x2apic(void)
1504 pr_info("Enabled x2apic\n"); 1589 pr_info("Enabled x2apic\n");
1505 } 1590 }
1506 1591
1507nox2apic: 1592skip_x2apic:
1508 if (ret < 0) /* IR enabling failed */ 1593 if (ret < 0) /* IR enabling failed */
1509 restore_ioapic_entries(); 1594 restore_ioapic_entries();
1510 legacy_pic->restore_mask(); 1595 legacy_pic->restore_mask();
1511 local_irq_restore(flags); 1596 local_irq_restore(flags);
1512
1513out:
1514 if (x2apic_enabled || !x2apic_supported())
1515 return;
1516
1517 if (x2apic_preenabled)
1518 panic("x2apic: enabled by BIOS but kernel init failed.");
1519 else if (ret == IRQ_REMAP_XAPIC_MODE)
1520 pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
1521 else if (ret < 0)
1522 pr_info("x2apic not enabled, IRQ remapping init failed\n");
1523} 1597}
1524 1598
1525#ifdef CONFIG_X86_64 1599#ifdef CONFIG_X86_64
@@ -1790,8 +1864,8 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1790{ 1864{
1791 u32 v; 1865 u32 v;
1792 1866
1793 exit_idle();
1794 irq_enter(); 1867 irq_enter();
1868 exit_idle();
1795 /* 1869 /*
1796 * Check if this really is a spurious interrupt and ACK it 1870 * Check if this really is a spurious interrupt and ACK it
1797 * if it is a vectored one. Just in case... 1871 * if it is a vectored one. Just in case...
@@ -1827,8 +1901,8 @@ void smp_error_interrupt(struct pt_regs *regs)
1827 "Illegal register address", /* APIC Error Bit 7 */ 1901 "Illegal register address", /* APIC Error Bit 7 */
1828 }; 1902 };
1829 1903
1830 exit_idle();
1831 irq_enter(); 1904 irq_enter();
1905 exit_idle();
1832 /* First tickle the hardware, only then report what went on. -- REW */ 1906 /* First tickle the hardware, only then report what went on. -- REW */
1833 v0 = apic_read(APIC_ESR); 1907 v0 = apic_read(APIC_ESR);
1834 apic_write(APIC_ESR, 0); 1908 apic_write(APIC_ESR, 0);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index f7a41e4cae47..8c3cdded6f2b 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
62 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 62 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
63 * document number 292116). So here it goes... 63 * document number 292116). So here it goes...
64 */ 64 */
65static void flat_init_apic_ldr(void) 65void flat_init_apic_ldr(void)
66{ 66{
67 unsigned long val; 67 unsigned long val;
68 unsigned long num, id; 68 unsigned long num, id;
@@ -171,9 +171,14 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
171 return initial_apic_id >> index_msb; 171 return initial_apic_id >> index_msb;
172} 172}
173 173
174static int flat_probe(void)
175{
176 return 1;
177}
178
174static struct apic apic_flat = { 179static struct apic apic_flat = {
175 .name = "flat", 180 .name = "flat",
176 .probe = NULL, 181 .probe = flat_probe,
177 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 182 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
178 .apic_id_registered = flat_apic_id_registered, 183 .apic_id_registered = flat_apic_id_registered,
179 184
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 000000000000..09d3d8c1cd99
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,294 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Numascale NumaConnect-Specific APIC Code
7 *
8 * Copyright (C) 2011 Numascale AS. All rights reserved.
9 *
10 * Send feedback to <support@numascale.com>
11 *
12 */
13
14#include <linux/errno.h>
15#include <linux/threads.h>
16#include <linux/cpumask.h>
17#include <linux/string.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/ctype.h>
21#include <linux/init.h>
22#include <linux/hardirq.h>
23#include <linux/delay.h>
24
25#include <asm/numachip/numachip_csr.h>
26#include <asm/smp.h>
27#include <asm/apic.h>
28#include <asm/ipi.h>
29#include <asm/apic_flat_64.h>
30
31static int numachip_system __read_mostly;
32
33static struct apic apic_numachip __read_mostly;
34
35static unsigned int get_apic_id(unsigned long x)
36{
37 unsigned long value;
38 unsigned int id;
39
40 rdmsrl(MSR_FAM10H_NODE_ID, value);
41 id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
42
43 return id;
44}
45
46static unsigned long set_apic_id(unsigned int id)
47{
48 unsigned long x;
49
50 x = ((id & 0xffU) << 24);
51 return x;
52}
53
54static unsigned int read_xapic_id(void)
55{
56 return get_apic_id(apic_read(APIC_ID));
57}
58
59static int numachip_apic_id_registered(void)
60{
61 return physid_isset(read_xapic_id(), phys_cpu_present_map);
62}
63
64static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
65{
66 return initial_apic_id >> index_msb;
67}
68
69static const struct cpumask *numachip_target_cpus(void)
70{
71 return cpu_online_mask;
72}
73
74static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
75{
76 cpumask_clear(retmask);
77 cpumask_set_cpu(cpu, retmask);
78}
79
80static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
81{
82 union numachip_csr_g3_ext_irq_gen int_gen;
83
84 int_gen.s._destination_apic_id = phys_apicid;
85 int_gen.s._vector = 0;
86 int_gen.s._msgtype = APIC_DM_INIT >> 8;
87 int_gen.s._index = 0;
88
89 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
90
91 int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
92 int_gen.s._vector = start_rip >> 12;
93
94 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
95
96 atomic_set(&init_deasserted, 1);
97 return 0;
98}
99
100static void numachip_send_IPI_one(int cpu, int vector)
101{
102 union numachip_csr_g3_ext_irq_gen int_gen;
103 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
104
105 int_gen.s._destination_apic_id = apicid;
106 int_gen.s._vector = vector;
107 int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
108 int_gen.s._index = 0;
109
110 write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
111}
112
113static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
114{
115 unsigned int cpu;
116
117 for_each_cpu(cpu, mask)
118 numachip_send_IPI_one(cpu, vector);
119}
120
121static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
122 int vector)
123{
124 unsigned int this_cpu = smp_processor_id();
125 unsigned int cpu;
126
127 for_each_cpu(cpu, mask) {
128 if (cpu != this_cpu)
129 numachip_send_IPI_one(cpu, vector);
130 }
131}
132
133static void numachip_send_IPI_allbutself(int vector)
134{
135 unsigned int this_cpu = smp_processor_id();
136 unsigned int cpu;
137
138 for_each_online_cpu(cpu) {
139 if (cpu != this_cpu)
140 numachip_send_IPI_one(cpu, vector);
141 }
142}
143
144static void numachip_send_IPI_all(int vector)
145{
146 numachip_send_IPI_mask(cpu_online_mask, vector);
147}
148
149static void numachip_send_IPI_self(int vector)
150{
151 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
152}
153
154static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
155{
156 int cpu;
157
158 /*
159 * We're using fixed IRQ delivery, can only return one phys APIC ID.
160 * May as well be the first.
161 */
162 cpu = cpumask_first(cpumask);
163 if (likely((unsigned)cpu < nr_cpu_ids))
164 return per_cpu(x86_cpu_to_apicid, cpu);
165
166 return BAD_APICID;
167}
168
169static unsigned int
170numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
171 const struct cpumask *andmask)
172{
173 int cpu;
174
175 /*
176 * We're using fixed IRQ delivery, can only return one phys APIC ID.
177 * May as well be the first.
178 */
179 for_each_cpu_and(cpu, cpumask, andmask) {
180 if (cpumask_test_cpu(cpu, cpu_online_mask))
181 break;
182 }
183 return per_cpu(x86_cpu_to_apicid, cpu);
184}
185
186static int __init numachip_probe(void)
187{
188 return apic == &apic_numachip;
189}
190
191static void __init map_csrs(void)
192{
193 printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
194 NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
195 init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
196
197 printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
198 NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
199 init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
200}
201
202static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
203{
204 c->phys_proc_id = node;
205 per_cpu(cpu_llc_id, smp_processor_id()) = node;
206}
207
208static int __init numachip_system_init(void)
209{
210 unsigned int val;
211
212 if (!numachip_system)
213 return 0;
214
215 x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
216
217 map_csrs();
218
219 val = read_lcsr(CSR_G0_NODE_IDS);
220 printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
221
222 return 0;
223}
224early_initcall(numachip_system_init);
225
226static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
227{
228 if (!strncmp(oem_id, "NUMASC", 6)) {
229 numachip_system = 1;
230 return 1;
231 }
232
233 return 0;
234}
235
236static struct apic apic_numachip __refconst = {
237
238 .name = "NumaConnect system",
239 .probe = numachip_probe,
240 .acpi_madt_oem_check = numachip_acpi_madt_oem_check,
241 .apic_id_registered = numachip_apic_id_registered,
242
243 .irq_delivery_mode = dest_Fixed,
244 .irq_dest_mode = 0, /* physical */
245
246 .target_cpus = numachip_target_cpus,
247 .disable_esr = 0,
248 .dest_logical = 0,
249 .check_apicid_used = NULL,
250 .check_apicid_present = NULL,
251
252 .vector_allocation_domain = numachip_vector_allocation_domain,
253 .init_apic_ldr = flat_init_apic_ldr,
254
255 .ioapic_phys_id_map = NULL,
256 .setup_apic_routing = NULL,
257 .multi_timer_check = NULL,
258 .cpu_present_to_apicid = default_cpu_present_to_apicid,
259 .apicid_to_cpu_present = NULL,
260 .setup_portio_remap = NULL,
261 .check_phys_apicid_present = default_check_phys_apicid_present,
262 .enable_apic_mode = NULL,
263 .phys_pkg_id = numachip_phys_pkg_id,
264 .mps_oem_check = NULL,
265
266 .get_apic_id = get_apic_id,
267 .set_apic_id = set_apic_id,
268 .apic_id_mask = 0xffU << 24,
269
270 .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
271 .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
272
273 .send_IPI_mask = numachip_send_IPI_mask,
274 .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
275 .send_IPI_allbutself = numachip_send_IPI_allbutself,
276 .send_IPI_all = numachip_send_IPI_all,
277 .send_IPI_self = numachip_send_IPI_self,
278
279 .wakeup_secondary_cpu = numachip_wakeup_secondary,
280 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
281 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
282 .wait_for_init_deassert = NULL,
283 .smp_callin_clear_local_apic = NULL,
284 .inquire_remote_apic = NULL, /* REMRD not supported */
285
286 .read = native_apic_mem_read,
287 .write = native_apic_mem_write,
288 .icr_read = native_apic_icr_read,
289 .icr_write = native_apic_icr_write,
290 .wait_icr_idle = native_apic_wait_icr_idle,
291 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
292};
293apic_driver(apic_numachip);
294
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3c31fa98af6d..fb072754bc1d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -193,10 +193,8 @@ int __init arch_early_irq_init(void)
193 struct irq_cfg *cfg; 193 struct irq_cfg *cfg;
194 int count, node, i; 194 int count, node, i;
195 195
196 if (!legacy_pic->nr_legacy_irqs) { 196 if (!legacy_pic->nr_legacy_irqs)
197 nr_irqs_gsi = 0;
198 io_apic_irqs = ~0UL; 197 io_apic_irqs = ~0UL;
199 }
200 198
201 for (i = 0; i < nr_ioapics; i++) { 199 for (i = 0; i < nr_ioapics; i++) {
202 ioapics[i].saved_registers = 200 ioapics[i].saved_registers =
@@ -1696,6 +1694,7 @@ __apicdebuginit(void) print_IO_APICs(void)
1696 int ioapic_idx; 1694 int ioapic_idx;
1697 struct irq_cfg *cfg; 1695 struct irq_cfg *cfg;
1698 unsigned int irq; 1696 unsigned int irq;
1697 struct irq_chip *chip;
1699 1698
1700 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1701 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) 1700 for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++)
@@ -1716,6 +1715,10 @@ __apicdebuginit(void) print_IO_APICs(void)
1716 for_each_active_irq(irq) { 1715 for_each_active_irq(irq) {
1717 struct irq_pin_list *entry; 1716 struct irq_pin_list *entry;
1718 1717
1718 chip = irq_get_chip(irq);
1719 if (chip != &ioapic_chip)
1720 continue;
1721
1719 cfg = irq_get_chip_data(irq); 1722 cfg = irq_get_chip_data(irq);
1720 if (!cfg) 1723 if (!cfg)
1721 continue; 1724 continue;
@@ -2418,8 +2421,8 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2418 unsigned vector, me; 2421 unsigned vector, me;
2419 2422
2420 ack_APIC_irq(); 2423 ack_APIC_irq();
2421 exit_idle();
2422 irq_enter(); 2424 irq_enter();
2425 exit_idle();
2423 2426
2424 me = smp_processor_id(); 2427 me = smp_processor_id();
2425 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 2428 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -2945,6 +2948,10 @@ static inline void __init check_timer(void)
2945 } 2948 }
2946 local_irq_disable(); 2949 local_irq_disable();
2947 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 2950 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2951 if (x2apic_preenabled)
2952 apic_printk(APIC_QUIET, KERN_INFO
2953 "Perhaps problem with the pre-enabled x2apic mode\n"
2954 "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
2948 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 2955 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2949 "report. Then try booting with the 'noapic' option.\n"); 2956 "report. Then try booting with the 'noapic' option.\n");
2950out: 2957out:
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 62ae3001ae02..9d59bbacd4e3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -93,6 +93,8 @@ static int __init early_get_pnodeid(void)
93 93
94 if (node_id.s.part_number == UV2_HUB_PART_NUMBER) 94 if (node_id.s.part_number == UV2_HUB_PART_NUMBER)
95 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; 95 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
96 if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X)
97 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
96 98
97 uv_hub_info->hub_revision = uv_min_hub_revision_id; 99 uv_hub_info->hub_revision = uv_min_hub_revision_id;
98 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); 100 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 452932d34730..5da1269e8ddc 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
62 62
63void __init setup_bios_corruption_check(void) 63void __init setup_bios_corruption_check(void)
64{ 64{
65 u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ 65 phys_addr_t start, end;
66 u64 i;
66 67
67 if (memory_corruption_check == -1) { 68 if (memory_corruption_check == -1) {
68 memory_corruption_check = 69 memory_corruption_check =
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void)
82 83
83 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); 84 corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
84 85
85 while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { 86 for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
86 u64 size; 87 start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
87 addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE); 88 PAGE_SIZE, corruption_check_size);
89 end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
90 PAGE_SIZE, corruption_check_size);
91 if (start >= end)
92 continue;
88 93
89 if (addr == MEMBLOCK_ERROR) 94 memblock_reserve(start, end - start);
90 break; 95 scan_areas[num_scan_areas].addr = start;
91 96 scan_areas[num_scan_areas].size = end - start;
92 if (addr >= corruption_check_size)
93 break;
94
95 if ((addr + size) > corruption_check_size)
96 size = corruption_check_size - addr;
97
98 memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
99 scan_areas[num_scan_areas].addr = addr;
100 scan_areas[num_scan_areas].size = size;
101 num_scan_areas++;
102 97
103 /* Assume we've already mapped this early memory */ 98 /* Assume we've already mapped this early memory */
104 memset(__va(addr), 0, size); 99 memset(__va(start), 0, end - start);
105 100
106 addr += size; 101 if (++num_scan_areas >= MAX_SCAN_AREAS)
102 break;
107 } 103 }
108 104
109 if (num_scan_areas) 105 if (num_scan_areas)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c7e46cb35327..f4773f4aae35 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
148 148
149static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) 149static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
150{ 150{
151#ifdef CONFIG_SMP
152 /* calling is from identify_secondary_cpu() ? */ 151 /* calling is from identify_secondary_cpu() ? */
153 if (!c->cpu_index) 152 if (!c->cpu_index)
154 return; 153 return;
@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
192 191
193valid_k7: 192valid_k7:
194 ; 193 ;
195#endif
196} 194}
197 195
198static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) 196static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
@@ -353,6 +351,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
353 if (node == NUMA_NO_NODE) 351 if (node == NUMA_NO_NODE)
354 node = per_cpu(cpu_llc_id, cpu); 352 node = per_cpu(cpu_llc_id, cpu);
355 353
354 /*
355 * If core numbers are inconsistent, it's likely a multi-fabric platform,
356 * so invoke platform-specific handler
357 */
358 if (c->phys_proc_id != node)
359 x86_cpuinit.fixup_cpu_id(c, node);
360
356 if (!node_online(node)) { 361 if (!node_online(node)) {
357 /* 362 /*
358 * Two possibilities here: 363 * Two possibilities here:
@@ -442,8 +447,6 @@ static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
442 447
443static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 448static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
444{ 449{
445 u32 dummy;
446
447 early_init_amd_mc(c); 450 early_init_amd_mc(c);
448 451
449 /* 452 /*
@@ -473,12 +476,12 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
473 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 476 set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
474 } 477 }
475#endif 478#endif
476
477 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
478} 479}
479 480
480static void __cpuinit init_amd(struct cpuinfo_x86 *c) 481static void __cpuinit init_amd(struct cpuinfo_x86 *c)
481{ 482{
483 u32 dummy;
484
482#ifdef CONFIG_SMP 485#ifdef CONFIG_SMP
483 unsigned long long value; 486 unsigned long long value;
484 487
@@ -657,6 +660,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
657 checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); 660 checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
658 } 661 }
659 } 662 }
663
664 rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
660} 665}
661 666
662#ifdef CONFIG_X86_32 667#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e58d978e0758..159103c0b1f4 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
278 } 278 }
279#ifdef CONFIG_X86_32 279#ifdef CONFIG_X86_32
280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */ 280 /* Cyrix III family needs CX8 & PGE explicitly enabled. */
281 if (c->x86_model >= 6 && c->x86_model <= 9) { 281 if (c->x86_model >= 6 && c->x86_model <= 13) {
282 rdmsr(MSR_VIA_FCR, lo, hi); 282 rdmsr(MSR_VIA_FCR, lo, hi);
283 lo |= (1<<1 | 1<<7); 283 lo |= (1<<1 | 1<<7);
284 wrmsr(MSR_VIA_FCR, lo, hi); 284 wrmsr(MSR_VIA_FCR, lo, hi);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aa003b13a831..850f2963a420 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
676 if (this_cpu->c_early_init) 676 if (this_cpu->c_early_init)
677 this_cpu->c_early_init(c); 677 this_cpu->c_early_init(c);
678 678
679#ifdef CONFIG_SMP
680 c->cpu_index = 0; 679 c->cpu_index = 0;
681#endif
682 filter_cpuid_features(c, false); 680 filter_cpuid_features(c, false);
683 681
684 setup_smep(c); 682 setup_smep(c);
@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
764 c->apicid = c->initial_apicid; 762 c->apicid = c->initial_apicid;
765# endif 763# endif
766#endif 764#endif
767
768#ifdef CONFIG_X86_HT
769 c->phys_proc_id = c->initial_apicid; 765 c->phys_proc_id = c->initial_apicid;
770#endif
771 } 766 }
772 767
773 setup_smep(c); 768 setup_smep(c);
@@ -1141,6 +1136,15 @@ static void dbg_restore_debug_regs(void)
1141#endif /* ! CONFIG_KGDB */ 1136#endif /* ! CONFIG_KGDB */
1142 1137
1143/* 1138/*
1139 * Prints an error where the NUMA and configured core-number mismatch and the
1140 * platform didn't override this to fix it up
1141 */
1142void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
1143{
1144 pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
1145}
1146
1147/*
1144 * cpu_init() initializes state that is per-CPU. Some data is already 1148 * cpu_init() initializes state that is per-CPU. Some data is already
1145 * initialized (naturally) in the bootstrap process, such as the GDT 1149 * initialized (naturally) in the bootstrap process, such as the GDT
1146 * and IDT. We reload them nevertheless, this function acts as a 1150 * and IDT. We reload them nevertheless, this function acts as a
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 1b22dcc51af4..8bacc7826fb3 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,5 +1,4 @@
1#ifndef ARCH_X86_CPU_H 1#ifndef ARCH_X86_CPU_H
2
3#define ARCH_X86_CPU_H 2#define ARCH_X86_CPU_H
4 3
5struct cpu_model_info { 4struct cpu_model_info {
@@ -35,6 +34,4 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
35 34
36extern void get_cpu_cap(struct cpuinfo_x86 *c); 35extern void get_cpu_cap(struct cpuinfo_x86 *c);
37extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); 36extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
38extern void get_cpu_cap(struct cpuinfo_x86 *c); 37#endif /* ARCH_X86_CPU_H */
39
40#endif
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 523131213f08..3e6ff6cbf42a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void)
181 181
182static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) 182static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
183{ 183{
184#ifdef CONFIG_SMP
185 /* calling is from identify_secondary_cpu() ? */ 184 /* calling is from identify_secondary_cpu() ? */
186 if (!c->cpu_index) 185 if (!c->cpu_index)
187 return; 186 return;
@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
198 WARN_ONCE(1, "WARNING: SMP operation may be unreliable" 197 WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
199 "with B stepping processors.\n"); 198 "with B stepping processors.\n");
200 } 199 }
201#endif
202} 200}
203 201
204static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 202static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index a3b0811693c9..6b45e5e7a901 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -844,8 +844,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
844 844
845#include <linux/kobject.h> 845#include <linux/kobject.h>
846#include <linux/sysfs.h> 846#include <linux/sysfs.h>
847 847#include <linux/cpu.h>
848extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
849 848
850/* pointer to kobject for cpuX/cache */ 849/* pointer to kobject for cpuX/cache */
851static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); 850static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
@@ -1073,9 +1072,9 @@ err_out:
1073static DECLARE_BITMAP(cache_dev_map, NR_CPUS); 1072static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1074 1073
1075/* Add/Remove cache interface for CPU device */ 1074/* Add/Remove cache interface for CPU device */
1076static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 1075static int __cpuinit cache_add_dev(struct device *dev)
1077{ 1076{
1078 unsigned int cpu = sys_dev->id; 1077 unsigned int cpu = dev->id;
1079 unsigned long i, j; 1078 unsigned long i, j;
1080 struct _index_kobject *this_object; 1079 struct _index_kobject *this_object;
1081 struct _cpuid4_info *this_leaf; 1080 struct _cpuid4_info *this_leaf;
@@ -1087,7 +1086,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1087 1086
1088 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), 1087 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1089 &ktype_percpu_entry, 1088 &ktype_percpu_entry,
1090 &sys_dev->kobj, "%s", "cache"); 1089 &dev->kobj, "%s", "cache");
1091 if (retval < 0) { 1090 if (retval < 0) {
1092 cpuid4_cache_sysfs_exit(cpu); 1091 cpuid4_cache_sysfs_exit(cpu);
1093 return retval; 1092 return retval;
@@ -1124,9 +1123,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1124 return 0; 1123 return 0;
1125} 1124}
1126 1125
1127static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) 1126static void __cpuinit cache_remove_dev(struct device *dev)
1128{ 1127{
1129 unsigned int cpu = sys_dev->id; 1128 unsigned int cpu = dev->id;
1130 unsigned long i; 1129 unsigned long i;
1131 1130
1132 if (per_cpu(ici_cpuid4_info, cpu) == NULL) 1131 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
@@ -1145,17 +1144,17 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1145 unsigned long action, void *hcpu) 1144 unsigned long action, void *hcpu)
1146{ 1145{
1147 unsigned int cpu = (unsigned long)hcpu; 1146 unsigned int cpu = (unsigned long)hcpu;
1148 struct sys_device *sys_dev; 1147 struct device *dev;
1149 1148
1150 sys_dev = get_cpu_sysdev(cpu); 1149 dev = get_cpu_device(cpu);
1151 switch (action) { 1150 switch (action) {
1152 case CPU_ONLINE: 1151 case CPU_ONLINE:
1153 case CPU_ONLINE_FROZEN: 1152 case CPU_ONLINE_FROZEN:
1154 cache_add_dev(sys_dev); 1153 cache_add_dev(dev);
1155 break; 1154 break;
1156 case CPU_DEAD: 1155 case CPU_DEAD:
1157 case CPU_DEAD_FROZEN: 1156 case CPU_DEAD_FROZEN:
1158 cache_remove_dev(sys_dev); 1157 cache_remove_dev(dev);
1159 break; 1158 break;
1160 } 1159 }
1161 return NOTIFY_OK; 1160 return NOTIFY_OK;
@@ -1174,9 +1173,9 @@ static int __cpuinit cache_sysfs_init(void)
1174 1173
1175 for_each_online_cpu(i) { 1174 for_each_online_cpu(i) {
1176 int err; 1175 int err;
1177 struct sys_device *sys_dev = get_cpu_sysdev(i); 1176 struct device *dev = get_cpu_device(i);
1178 1177
1179 err = cache_add_dev(sys_dev); 1178 err = cache_add_dev(dev);
1180 if (err) 1179 if (err)
1181 return err; 1180 return err;
1182 } 1181 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 6199232161cf..fc4beb393577 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/preempt.h>
20#include <linux/smp.h> 21#include <linux/smp.h>
21#include <linux/notifier.h> 22#include <linux/notifier.h>
22#include <linux/kdebug.h> 23#include <linux/kdebug.h>
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
92 return NMI_HANDLED; 93 return NMI_HANDLED;
93} 94}
94 95
96static void mce_irq_ipi(void *info)
97{
98 int cpu = smp_processor_id();
99 struct mce *m = &__get_cpu_var(injectm);
100
101 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
102 m->inject_flags & MCJ_EXCEPTION) {
103 cpumask_clear_cpu(cpu, mce_inject_cpumask);
104 raise_exception(m, NULL);
105 }
106}
107
95/* Inject mce on current CPU */ 108/* Inject mce on current CPU */
96static int raise_local(void) 109static int raise_local(void)
97{ 110{
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m)
139 return; 152 return;
140 153
141#ifdef CONFIG_X86_LOCAL_APIC 154#ifdef CONFIG_X86_LOCAL_APIC
142 if (m->inject_flags & MCJ_NMI_BROADCAST) { 155 if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
143 unsigned long start; 156 unsigned long start;
144 int cpu; 157 int cpu;
158
145 get_online_cpus(); 159 get_online_cpus();
146 cpumask_copy(mce_inject_cpumask, cpu_online_mask); 160 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
147 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); 161 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m)
151 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) 165 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
152 cpumask_clear_cpu(cpu, mce_inject_cpumask); 166 cpumask_clear_cpu(cpu, mce_inject_cpumask);
153 } 167 }
154 if (!cpumask_empty(mce_inject_cpumask)) 168 if (!cpumask_empty(mce_inject_cpumask)) {
155 apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); 169 if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
170 /*
171 * don't wait because mce_irq_ipi is necessary
172 * to be sync with following raise_local
173 */
174 preempt_disable();
175 smp_call_function_many(mce_inject_cpumask,
176 mce_irq_ipi, NULL, 0);
177 preempt_enable();
178 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
179 apic->send_IPI_mask(mce_inject_cpumask,
180 NMI_VECTOR);
181 }
156 start = jiffies; 182 start = jiffies;
157 while (!cpumask_empty(mce_inject_cpumask)) { 183 while (!cpumask_empty(mce_inject_cpumask)) {
158 if (!time_before(jiffies, start + 2*HZ)) { 184 if (!time_before(jiffies, start + 2*HZ)) {
159 printk(KERN_ERR 185 printk(KERN_ERR
160 "Timeout waiting for mce inject NMI %lx\n", 186 "Timeout waiting for mce inject %lx\n",
161 *cpumask_bits(mce_inject_cpumask)); 187 *cpumask_bits(mce_inject_cpumask));
162 break; 188 break;
163 } 189 }
@@ -208,7 +234,7 @@ static int inject_init(void)
208 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) 234 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
209 return -ENOMEM; 235 return -ENOMEM;
210 printk(KERN_INFO "Machine check injector initialized\n"); 236 printk(KERN_INFO "Machine check injector initialized\n");
211 mce_chrdev_ops.write = mce_write; 237 register_mce_write_callback(mce_write);
212 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, 238 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0,
213 "mce_notify"); 239 "mce_notify");
214 return 0; 240 return 0;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index fefcc69ee8b5..ed44c8a65858 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,4 +1,4 @@
1#include <linux/sysdev.h> 1#include <linux/device.h>
2#include <asm/mce.h> 2#include <asm/mce.h>
3 3
4enum severity_level { 4enum severity_level {
@@ -17,7 +17,7 @@ enum severity_level {
17struct mce_bank { 17struct mce_bank {
18 u64 ctl; /* subevents to enable */ 18 u64 ctl; /* subevents to enable */
19 unsigned char init; /* initialise bank? */ 19 unsigned char init; /* initialise bank? */
20 struct sysdev_attribute attr; /* sysdev attribute */ 20 struct device_attribute attr; /* device attribute */
21 char attrname[ATTR_LEN]; /* attribute name */ 21 char attrname[ATTR_LEN]; /* attribute name */
22}; 22};
23 23
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 362056aefeb4..f22a9f7f6390 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -19,7 +19,7 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/string.h> 21#include <linux/string.h>
22#include <linux/sysdev.h> 22#include <linux/device.h>
23#include <linux/syscore_ops.h> 23#include <linux/syscore_ops.h>
24#include <linux/delay.h> 24#include <linux/delay.h>
25#include <linux/ctype.h> 25#include <linux/ctype.h>
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
95static DEFINE_PER_CPU(struct mce, mces_seen); 95static DEFINE_PER_CPU(struct mce, mces_seen);
96static int cpu_missing; 96static int cpu_missing;
97 97
98/*
99 * CPU/chipset specific EDAC code can register a notifier call here to print
100 * MCE errors in a human-readable form.
101 */
102ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
103EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
104
105/* MCA banks polled by the period polling timer for corrected events */ 98/* MCA banks polled by the period polling timer for corrected events */
106DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 99DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
107 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 100 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
109 102
110static DEFINE_PER_CPU(struct work_struct, mce_work); 103static DEFINE_PER_CPU(struct work_struct, mce_work);
111 104
105/*
106 * CPU/chipset specific EDAC code can register a notifier call here to print
107 * MCE errors in a human-readable form.
108 */
109ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
110
112/* Do initial initialization of a struct mce */ 111/* Do initial initialization of a struct mce */
113void mce_setup(struct mce *m) 112void mce_setup(struct mce *m)
114{ 113{
@@ -119,9 +118,7 @@ void mce_setup(struct mce *m)
119 m->time = get_seconds(); 118 m->time = get_seconds();
120 m->cpuvendor = boot_cpu_data.x86_vendor; 119 m->cpuvendor = boot_cpu_data.x86_vendor;
121 m->cpuid = cpuid_eax(1); 120 m->cpuid = cpuid_eax(1);
122#ifdef CONFIG_SMP
123 m->socketid = cpu_data(m->extcpu).phys_proc_id; 121 m->socketid = cpu_data(m->extcpu).phys_proc_id;
124#endif
125 m->apicid = cpu_data(m->extcpu).initial_apicid; 122 m->apicid = cpu_data(m->extcpu).initial_apicid;
126 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); 123 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
127} 124}
@@ -190,6 +187,57 @@ void mce_log(struct mce *mce)
190 set_bit(0, &mce_need_notify); 187 set_bit(0, &mce_need_notify);
191} 188}
192 189
190static void drain_mcelog_buffer(void)
191{
192 unsigned int next, i, prev = 0;
193
194 next = rcu_dereference_check_mce(mcelog.next);
195
196 do {
197 struct mce *m;
198
199 /* drain what was logged during boot */
200 for (i = prev; i < next; i++) {
201 unsigned long start = jiffies;
202 unsigned retries = 1;
203
204 m = &mcelog.entry[i];
205
206 while (!m->finished) {
207 if (time_after_eq(jiffies, start + 2*retries))
208 retries++;
209
210 cpu_relax();
211
212 if (!m->finished && retries >= 4) {
213 pr_err("MCE: skipping error being logged currently!\n");
214 break;
215 }
216 }
217 smp_rmb();
218 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
219 }
220
221 memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
222 prev = next;
223 next = cmpxchg(&mcelog.next, prev, 0);
224 } while (next != prev);
225}
226
227
228void mce_register_decode_chain(struct notifier_block *nb)
229{
230 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
231 drain_mcelog_buffer();
232}
233EXPORT_SYMBOL_GPL(mce_register_decode_chain);
234
235void mce_unregister_decode_chain(struct notifier_block *nb)
236{
237 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
238}
239EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
240
193static void print_mce(struct mce *m) 241static void print_mce(struct mce *m)
194{ 242{
195 int ret = 0; 243 int ret = 0;
@@ -1634,16 +1682,35 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
1634 } 1682 }
1635} 1683}
1636 1684
1637/* Modified in mce-inject.c, so not static or const */ 1685static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
1638struct file_operations mce_chrdev_ops = { 1686 size_t usize, loff_t *off);
1687
1688void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
1689 const char __user *ubuf,
1690 size_t usize, loff_t *off))
1691{
1692 mce_write = fn;
1693}
1694EXPORT_SYMBOL_GPL(register_mce_write_callback);
1695
1696ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
1697 size_t usize, loff_t *off)
1698{
1699 if (mce_write)
1700 return mce_write(filp, ubuf, usize, off);
1701 else
1702 return -EINVAL;
1703}
1704
1705static const struct file_operations mce_chrdev_ops = {
1639 .open = mce_chrdev_open, 1706 .open = mce_chrdev_open,
1640 .release = mce_chrdev_release, 1707 .release = mce_chrdev_release,
1641 .read = mce_chrdev_read, 1708 .read = mce_chrdev_read,
1709 .write = mce_chrdev_write,
1642 .poll = mce_chrdev_poll, 1710 .poll = mce_chrdev_poll,
1643 .unlocked_ioctl = mce_chrdev_ioctl, 1711 .unlocked_ioctl = mce_chrdev_ioctl,
1644 .llseek = no_llseek, 1712 .llseek = no_llseek,
1645}; 1713};
1646EXPORT_SYMBOL_GPL(mce_chrdev_ops);
1647 1714
1648static struct miscdevice mce_chrdev_device = { 1715static struct miscdevice mce_chrdev_device = {
1649 MISC_MCELOG_MINOR, 1716 MISC_MCELOG_MINOR,
@@ -1751,7 +1818,7 @@ static struct syscore_ops mce_syscore_ops = {
1751}; 1818};
1752 1819
1753/* 1820/*
1754 * mce_sysdev: Sysfs support 1821 * mce_device: Sysfs support
1755 */ 1822 */
1756 1823
1757static void mce_cpu_restart(void *data) 1824static void mce_cpu_restart(void *data)
@@ -1787,27 +1854,28 @@ static void mce_enable_ce(void *all)
1787 __mcheck_cpu_init_timer(); 1854 __mcheck_cpu_init_timer();
1788} 1855}
1789 1856
1790static struct sysdev_class mce_sysdev_class = { 1857static struct bus_type mce_subsys = {
1791 .name = "machinecheck", 1858 .name = "machinecheck",
1859 .dev_name = "machinecheck",
1792}; 1860};
1793 1861
1794DEFINE_PER_CPU(struct sys_device, mce_sysdev); 1862DEFINE_PER_CPU(struct device, mce_device);
1795 1863
1796__cpuinitdata 1864__cpuinitdata
1797void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1865void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1798 1866
1799static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) 1867static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
1800{ 1868{
1801 return container_of(attr, struct mce_bank, attr); 1869 return container_of(attr, struct mce_bank, attr);
1802} 1870}
1803 1871
1804static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 1872static ssize_t show_bank(struct device *s, struct device_attribute *attr,
1805 char *buf) 1873 char *buf)
1806{ 1874{
1807 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); 1875 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
1808} 1876}
1809 1877
1810static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1878static ssize_t set_bank(struct device *s, struct device_attribute *attr,
1811 const char *buf, size_t size) 1879 const char *buf, size_t size)
1812{ 1880{
1813 u64 new; 1881 u64 new;
@@ -1822,14 +1890,14 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1822} 1890}
1823 1891
1824static ssize_t 1892static ssize_t
1825show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) 1893show_trigger(struct device *s, struct device_attribute *attr, char *buf)
1826{ 1894{
1827 strcpy(buf, mce_helper); 1895 strcpy(buf, mce_helper);
1828 strcat(buf, "\n"); 1896 strcat(buf, "\n");
1829 return strlen(mce_helper) + 1; 1897 return strlen(mce_helper) + 1;
1830} 1898}
1831 1899
1832static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1900static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
1833 const char *buf, size_t siz) 1901 const char *buf, size_t siz)
1834{ 1902{
1835 char *p; 1903 char *p;
@@ -1844,8 +1912,8 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
1844 return strlen(mce_helper) + !!p; 1912 return strlen(mce_helper) + !!p;
1845} 1913}
1846 1914
1847static ssize_t set_ignore_ce(struct sys_device *s, 1915static ssize_t set_ignore_ce(struct device *s,
1848 struct sysdev_attribute *attr, 1916 struct device_attribute *attr,
1849 const char *buf, size_t size) 1917 const char *buf, size_t size)
1850{ 1918{
1851 u64 new; 1919 u64 new;
@@ -1868,8 +1936,8 @@ static ssize_t set_ignore_ce(struct sys_device *s,
1868 return size; 1936 return size;
1869} 1937}
1870 1938
1871static ssize_t set_cmci_disabled(struct sys_device *s, 1939static ssize_t set_cmci_disabled(struct device *s,
1872 struct sysdev_attribute *attr, 1940 struct device_attribute *attr,
1873 const char *buf, size_t size) 1941 const char *buf, size_t size)
1874{ 1942{
1875 u64 new; 1943 u64 new;
@@ -1891,108 +1959,107 @@ static ssize_t set_cmci_disabled(struct sys_device *s,
1891 return size; 1959 return size;
1892} 1960}
1893 1961
1894static ssize_t store_int_with_restart(struct sys_device *s, 1962static ssize_t store_int_with_restart(struct device *s,
1895 struct sysdev_attribute *attr, 1963 struct device_attribute *attr,
1896 const char *buf, size_t size) 1964 const char *buf, size_t size)
1897{ 1965{
1898 ssize_t ret = sysdev_store_int(s, attr, buf, size); 1966 ssize_t ret = device_store_int(s, attr, buf, size);
1899 mce_restart(); 1967 mce_restart();
1900 return ret; 1968 return ret;
1901} 1969}
1902 1970
1903static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 1971static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
1904static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 1972static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
1905static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 1973static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
1906static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); 1974static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
1907 1975
1908static struct sysdev_ext_attribute attr_check_interval = { 1976static struct dev_ext_attribute dev_attr_check_interval = {
1909 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, 1977 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
1910 store_int_with_restart),
1911 &check_interval 1978 &check_interval
1912}; 1979};
1913 1980
1914static struct sysdev_ext_attribute attr_ignore_ce = { 1981static struct dev_ext_attribute dev_attr_ignore_ce = {
1915 _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), 1982 __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce),
1916 &mce_ignore_ce 1983 &mce_ignore_ce
1917}; 1984};
1918 1985
1919static struct sysdev_ext_attribute attr_cmci_disabled = { 1986static struct dev_ext_attribute dev_attr_cmci_disabled = {
1920 _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), 1987 __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled),
1921 &mce_cmci_disabled 1988 &mce_cmci_disabled
1922}; 1989};
1923 1990
1924static struct sysdev_attribute *mce_sysdev_attrs[] = { 1991static struct device_attribute *mce_device_attrs[] = {
1925 &attr_tolerant.attr, 1992 &dev_attr_tolerant.attr,
1926 &attr_check_interval.attr, 1993 &dev_attr_check_interval.attr,
1927 &attr_trigger, 1994 &dev_attr_trigger,
1928 &attr_monarch_timeout.attr, 1995 &dev_attr_monarch_timeout.attr,
1929 &attr_dont_log_ce.attr, 1996 &dev_attr_dont_log_ce.attr,
1930 &attr_ignore_ce.attr, 1997 &dev_attr_ignore_ce.attr,
1931 &attr_cmci_disabled.attr, 1998 &dev_attr_cmci_disabled.attr,
1932 NULL 1999 NULL
1933}; 2000};
1934 2001
1935static cpumask_var_t mce_sysdev_initialized; 2002static cpumask_var_t mce_device_initialized;
1936 2003
1937/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ 2004/* Per cpu device init. All of the cpus still share the same ctrl bank: */
1938static __cpuinit int mce_sysdev_create(unsigned int cpu) 2005static __cpuinit int mce_device_create(unsigned int cpu)
1939{ 2006{
1940 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); 2007 struct device *dev = &per_cpu(mce_device, cpu);
1941 int err; 2008 int err;
1942 int i, j; 2009 int i, j;
1943 2010
1944 if (!mce_available(&boot_cpu_data)) 2011 if (!mce_available(&boot_cpu_data))
1945 return -EIO; 2012 return -EIO;
1946 2013
1947 memset(&sysdev->kobj, 0, sizeof(struct kobject)); 2014 memset(&dev->kobj, 0, sizeof(struct kobject));
1948 sysdev->id = cpu; 2015 dev->id = cpu;
1949 sysdev->cls = &mce_sysdev_class; 2016 dev->bus = &mce_subsys;
1950 2017
1951 err = sysdev_register(sysdev); 2018 err = device_register(dev);
1952 if (err) 2019 if (err)
1953 return err; 2020 return err;
1954 2021
1955 for (i = 0; mce_sysdev_attrs[i]; i++) { 2022 for (i = 0; mce_device_attrs[i]; i++) {
1956 err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); 2023 err = device_create_file(dev, mce_device_attrs[i]);
1957 if (err) 2024 if (err)
1958 goto error; 2025 goto error;
1959 } 2026 }
1960 for (j = 0; j < banks; j++) { 2027 for (j = 0; j < banks; j++) {
1961 err = sysdev_create_file(sysdev, &mce_banks[j].attr); 2028 err = device_create_file(dev, &mce_banks[j].attr);
1962 if (err) 2029 if (err)
1963 goto error2; 2030 goto error2;
1964 } 2031 }
1965 cpumask_set_cpu(cpu, mce_sysdev_initialized); 2032 cpumask_set_cpu(cpu, mce_device_initialized);
1966 2033
1967 return 0; 2034 return 0;
1968error2: 2035error2:
1969 while (--j >= 0) 2036 while (--j >= 0)
1970 sysdev_remove_file(sysdev, &mce_banks[j].attr); 2037 device_remove_file(dev, &mce_banks[j].attr);
1971error: 2038error:
1972 while (--i >= 0) 2039 while (--i >= 0)
1973 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); 2040 device_remove_file(dev, mce_device_attrs[i]);
1974 2041
1975 sysdev_unregister(sysdev); 2042 device_unregister(dev);
1976 2043
1977 return err; 2044 return err;
1978} 2045}
1979 2046
1980static __cpuinit void mce_sysdev_remove(unsigned int cpu) 2047static __cpuinit void mce_device_remove(unsigned int cpu)
1981{ 2048{
1982 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); 2049 struct device *dev = &per_cpu(mce_device, cpu);
1983 int i; 2050 int i;
1984 2051
1985 if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) 2052 if (!cpumask_test_cpu(cpu, mce_device_initialized))
1986 return; 2053 return;
1987 2054
1988 for (i = 0; mce_sysdev_attrs[i]; i++) 2055 for (i = 0; mce_device_attrs[i]; i++)
1989 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); 2056 device_remove_file(dev, mce_device_attrs[i]);
1990 2057
1991 for (i = 0; i < banks; i++) 2058 for (i = 0; i < banks; i++)
1992 sysdev_remove_file(sysdev, &mce_banks[i].attr); 2059 device_remove_file(dev, &mce_banks[i].attr);
1993 2060
1994 sysdev_unregister(sysdev); 2061 device_unregister(dev);
1995 cpumask_clear_cpu(cpu, mce_sysdev_initialized); 2062 cpumask_clear_cpu(cpu, mce_device_initialized);
1996} 2063}
1997 2064
1998/* Make sure there are no machine checks on offlined CPUs. */ 2065/* Make sure there are no machine checks on offlined CPUs. */
@@ -2042,7 +2109,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2042 switch (action) { 2109 switch (action) {
2043 case CPU_ONLINE: 2110 case CPU_ONLINE:
2044 case CPU_ONLINE_FROZEN: 2111 case CPU_ONLINE_FROZEN:
2045 mce_sysdev_create(cpu); 2112 mce_device_create(cpu);
2046 if (threshold_cpu_callback) 2113 if (threshold_cpu_callback)
2047 threshold_cpu_callback(action, cpu); 2114 threshold_cpu_callback(action, cpu);
2048 break; 2115 break;
@@ -2050,7 +2117,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2050 case CPU_DEAD_FROZEN: 2117 case CPU_DEAD_FROZEN:
2051 if (threshold_cpu_callback) 2118 if (threshold_cpu_callback)
2052 threshold_cpu_callback(action, cpu); 2119 threshold_cpu_callback(action, cpu);
2053 mce_sysdev_remove(cpu); 2120 mce_device_remove(cpu);
2054 break; 2121 break;
2055 case CPU_DOWN_PREPARE: 2122 case CPU_DOWN_PREPARE:
2056 case CPU_DOWN_PREPARE_FROZEN: 2123 case CPU_DOWN_PREPARE_FROZEN:
@@ -2084,7 +2151,7 @@ static __init void mce_init_banks(void)
2084 2151
2085 for (i = 0; i < banks; i++) { 2152 for (i = 0; i < banks; i++) {
2086 struct mce_bank *b = &mce_banks[i]; 2153 struct mce_bank *b = &mce_banks[i];
2087 struct sysdev_attribute *a = &b->attr; 2154 struct device_attribute *a = &b->attr;
2088 2155
2089 sysfs_attr_init(&a->attr); 2156 sysfs_attr_init(&a->attr);
2090 a->attr.name = b->attrname; 2157 a->attr.name = b->attrname;
@@ -2104,16 +2171,16 @@ static __init int mcheck_init_device(void)
2104 if (!mce_available(&boot_cpu_data)) 2171 if (!mce_available(&boot_cpu_data))
2105 return -EIO; 2172 return -EIO;
2106 2173
2107 zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); 2174 zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
2108 2175
2109 mce_init_banks(); 2176 mce_init_banks();
2110 2177
2111 err = sysdev_class_register(&mce_sysdev_class); 2178 err = subsys_system_register(&mce_subsys, NULL);
2112 if (err) 2179 if (err)
2113 return err; 2180 return err;
2114 2181
2115 for_each_online_cpu(i) { 2182 for_each_online_cpu(i) {
2116 err = mce_sysdev_create(i); 2183 err = mce_device_create(i);
2117 if (err) 2184 if (err)
2118 return err; 2185 return err;
2119 } 2186 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f5474218cffe..ba0b94a7e204 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -17,7 +17,6 @@
17#include <linux/notifier.h> 17#include <linux/notifier.h>
18#include <linux/kobject.h> 18#include <linux/kobject.h>
19#include <linux/percpu.h> 19#include <linux/percpu.h>
20#include <linux/sysdev.h>
21#include <linux/errno.h> 20#include <linux/errno.h>
22#include <linux/sched.h> 21#include <linux/sched.h>
23#include <linux/sysfs.h> 22#include <linux/sysfs.h>
@@ -64,11 +63,9 @@ struct threshold_bank {
64}; 63};
65static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); 64static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
66 65
67#ifdef CONFIG_SMP
68static unsigned char shared_bank[NR_BANKS] = { 66static unsigned char shared_bank[NR_BANKS] = {
69 0, 0, 0, 0, 1 67 0, 0, 0, 0, 1
70}; 68};
71#endif
72 69
73static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 70static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
74 71
@@ -202,10 +199,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
202 199
203 if (!block) 200 if (!block)
204 per_cpu(bank_map, cpu) |= (1 << bank); 201 per_cpu(bank_map, cpu) |= (1 << bank);
205#ifdef CONFIG_SMP
206 if (shared_bank[bank] && c->cpu_core_id) 202 if (shared_bank[bank] && c->cpu_core_id)
207 break; 203 break;
208#endif 204
209 offset = setup_APIC_mce(offset, 205 offset = setup_APIC_mce(offset,
210 (high & MASK_LVTOFF_HI) >> 20); 206 (high & MASK_LVTOFF_HI) >> 20);
211 207
@@ -531,7 +527,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
531 527
532 sprintf(name, "threshold_bank%i", bank); 528 sprintf(name, "threshold_bank%i", bank);
533 529
534#ifdef CONFIG_SMP
535 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 530 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
536 i = cpumask_first(cpu_llc_shared_mask(cpu)); 531 i = cpumask_first(cpu_llc_shared_mask(cpu));
537 532
@@ -548,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
548 if (!b) 543 if (!b)
549 goto out; 544 goto out;
550 545
551 err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj, 546 err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj,
552 b->kobj, name); 547 b->kobj, name);
553 if (err) 548 if (err)
554 goto out; 549 goto out;
@@ -558,7 +553,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
558 553
559 goto out; 554 goto out;
560 } 555 }
561#endif
562 556
563 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); 557 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
564 if (!b) { 558 if (!b) {
@@ -571,7 +565,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
571 goto out; 565 goto out;
572 } 566 }
573 567
574 b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj); 568 b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj);
575 if (!b->kobj) 569 if (!b->kobj)
576 goto out_free; 570 goto out_free;
577 571
@@ -591,7 +585,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
591 if (i == cpu) 585 if (i == cpu)
592 continue; 586 continue;
593 587
594 err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj, 588 err = sysfs_create_link(&per_cpu(mce_device, i).kobj,
595 b->kobj, name); 589 b->kobj, name);
596 if (err) 590 if (err)
597 goto out; 591 goto out;
@@ -669,7 +663,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
669#ifdef CONFIG_SMP 663#ifdef CONFIG_SMP
670 /* sibling symlink */ 664 /* sibling symlink */
671 if (shared_bank[bank] && b->blocks->cpu != cpu) { 665 if (shared_bank[bank] && b->blocks->cpu != cpu) {
672 sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name); 666 sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name);
673 per_cpu(threshold_banks, cpu)[bank] = NULL; 667 per_cpu(threshold_banks, cpu)[bank] = NULL;
674 668
675 return; 669 return;
@@ -681,7 +675,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
681 if (i == cpu) 675 if (i == cpu)
682 continue; 676 continue;
683 677
684 sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name); 678 sysfs_remove_link(&per_cpu(mce_device, i).kobj, name);
685 per_cpu(threshold_banks, i)[bank] = NULL; 679 per_cpu(threshold_banks, i)[bank] = NULL;
686 } 680 }
687 681
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 787e06c84ea6..67bb17a37a0a 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -19,7 +19,6 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/export.h> 21#include <linux/export.h>
22#include <linux/sysdev.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include <linux/init.h> 23#include <linux/init.h>
25#include <linux/smp.h> 24#include <linux/smp.h>
@@ -69,16 +68,16 @@ static atomic_t therm_throt_en = ATOMIC_INIT(0);
69static u32 lvtthmr_init __read_mostly; 68static u32 lvtthmr_init __read_mostly;
70 69
71#ifdef CONFIG_SYSFS 70#ifdef CONFIG_SYSFS
72#define define_therm_throt_sysdev_one_ro(_name) \ 71#define define_therm_throt_device_one_ro(_name) \
73 static SYSDEV_ATTR(_name, 0444, \ 72 static DEVICE_ATTR(_name, 0444, \
74 therm_throt_sysdev_show_##_name, \ 73 therm_throt_device_show_##_name, \
75 NULL) \ 74 NULL) \
76 75
77#define define_therm_throt_sysdev_show_func(event, name) \ 76#define define_therm_throt_device_show_func(event, name) \
78 \ 77 \
79static ssize_t therm_throt_sysdev_show_##event##_##name( \ 78static ssize_t therm_throt_device_show_##event##_##name( \
80 struct sys_device *dev, \ 79 struct device *dev, \
81 struct sysdev_attribute *attr, \ 80 struct device_attribute *attr, \
82 char *buf) \ 81 char *buf) \
83{ \ 82{ \
84 unsigned int cpu = dev->id; \ 83 unsigned int cpu = dev->id; \
@@ -95,20 +94,20 @@ static ssize_t therm_throt_sysdev_show_##event##_##name( \
95 return ret; \ 94 return ret; \
96} 95}
97 96
98define_therm_throt_sysdev_show_func(core_throttle, count); 97define_therm_throt_device_show_func(core_throttle, count);
99define_therm_throt_sysdev_one_ro(core_throttle_count); 98define_therm_throt_device_one_ro(core_throttle_count);
100 99
101define_therm_throt_sysdev_show_func(core_power_limit, count); 100define_therm_throt_device_show_func(core_power_limit, count);
102define_therm_throt_sysdev_one_ro(core_power_limit_count); 101define_therm_throt_device_one_ro(core_power_limit_count);
103 102
104define_therm_throt_sysdev_show_func(package_throttle, count); 103define_therm_throt_device_show_func(package_throttle, count);
105define_therm_throt_sysdev_one_ro(package_throttle_count); 104define_therm_throt_device_one_ro(package_throttle_count);
106 105
107define_therm_throt_sysdev_show_func(package_power_limit, count); 106define_therm_throt_device_show_func(package_power_limit, count);
108define_therm_throt_sysdev_one_ro(package_power_limit_count); 107define_therm_throt_device_one_ro(package_power_limit_count);
109 108
110static struct attribute *thermal_throttle_attrs[] = { 109static struct attribute *thermal_throttle_attrs[] = {
111 &attr_core_throttle_count.attr, 110 &dev_attr_core_throttle_count.attr,
112 NULL 111 NULL
113}; 112};
114 113
@@ -223,36 +222,36 @@ static int thresh_event_valid(int event)
223 222
224#ifdef CONFIG_SYSFS 223#ifdef CONFIG_SYSFS
225/* Add/Remove thermal_throttle interface for CPU device: */ 224/* Add/Remove thermal_throttle interface for CPU device: */
226static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, 225static __cpuinit int thermal_throttle_add_dev(struct device *dev,
227 unsigned int cpu) 226 unsigned int cpu)
228{ 227{
229 int err; 228 int err;
230 struct cpuinfo_x86 *c = &cpu_data(cpu); 229 struct cpuinfo_x86 *c = &cpu_data(cpu);
231 230
232 err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); 231 err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
233 if (err) 232 if (err)
234 return err; 233 return err;
235 234
236 if (cpu_has(c, X86_FEATURE_PLN)) 235 if (cpu_has(c, X86_FEATURE_PLN))
237 err = sysfs_add_file_to_group(&sys_dev->kobj, 236 err = sysfs_add_file_to_group(&dev->kobj,
238 &attr_core_power_limit_count.attr, 237 &dev_attr_core_power_limit_count.attr,
239 thermal_attr_group.name); 238 thermal_attr_group.name);
240 if (cpu_has(c, X86_FEATURE_PTS)) { 239 if (cpu_has(c, X86_FEATURE_PTS)) {
241 err = sysfs_add_file_to_group(&sys_dev->kobj, 240 err = sysfs_add_file_to_group(&dev->kobj,
242 &attr_package_throttle_count.attr, 241 &dev_attr_package_throttle_count.attr,
243 thermal_attr_group.name); 242 thermal_attr_group.name);
244 if (cpu_has(c, X86_FEATURE_PLN)) 243 if (cpu_has(c, X86_FEATURE_PLN))
245 err = sysfs_add_file_to_group(&sys_dev->kobj, 244 err = sysfs_add_file_to_group(&dev->kobj,
246 &attr_package_power_limit_count.attr, 245 &dev_attr_package_power_limit_count.attr,
247 thermal_attr_group.name); 246 thermal_attr_group.name);
248 } 247 }
249 248
250 return err; 249 return err;
251} 250}
252 251
253static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) 252static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
254{ 253{
255 sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); 254 sysfs_remove_group(&dev->kobj, &thermal_attr_group);
256} 255}
257 256
258/* Mutex protecting device creation against CPU hotplug: */ 257/* Mutex protecting device creation against CPU hotplug: */
@@ -265,16 +264,16 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
265 void *hcpu) 264 void *hcpu)
266{ 265{
267 unsigned int cpu = (unsigned long)hcpu; 266 unsigned int cpu = (unsigned long)hcpu;
268 struct sys_device *sys_dev; 267 struct device *dev;
269 int err = 0; 268 int err = 0;
270 269
271 sys_dev = get_cpu_sysdev(cpu); 270 dev = get_cpu_device(cpu);
272 271
273 switch (action) { 272 switch (action) {
274 case CPU_UP_PREPARE: 273 case CPU_UP_PREPARE:
275 case CPU_UP_PREPARE_FROZEN: 274 case CPU_UP_PREPARE_FROZEN:
276 mutex_lock(&therm_cpu_lock); 275 mutex_lock(&therm_cpu_lock);
277 err = thermal_throttle_add_dev(sys_dev, cpu); 276 err = thermal_throttle_add_dev(dev, cpu);
278 mutex_unlock(&therm_cpu_lock); 277 mutex_unlock(&therm_cpu_lock);
279 WARN_ON(err); 278 WARN_ON(err);
280 break; 279 break;
@@ -283,7 +282,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
283 case CPU_DEAD: 282 case CPU_DEAD:
284 case CPU_DEAD_FROZEN: 283 case CPU_DEAD_FROZEN:
285 mutex_lock(&therm_cpu_lock); 284 mutex_lock(&therm_cpu_lock);
286 thermal_throttle_remove_dev(sys_dev); 285 thermal_throttle_remove_dev(dev);
287 mutex_unlock(&therm_cpu_lock); 286 mutex_unlock(&therm_cpu_lock);
288 break; 287 break;
289 } 288 }
@@ -310,7 +309,7 @@ static __init int thermal_throttle_init_device(void)
310#endif 309#endif
311 /* connect live CPUs to sysfs */ 310 /* connect live CPUs to sysfs */
312 for_each_online_cpu(cpu) { 311 for_each_online_cpu(cpu) {
313 err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); 312 err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
314 WARN_ON(err); 313 WARN_ON(err);
315 } 314 }
316#ifdef CONFIG_HOTPLUG_CPU 315#ifdef CONFIG_HOTPLUG_CPU
@@ -323,17 +322,6 @@ device_initcall(thermal_throttle_init_device);
323 322
324#endif /* CONFIG_SYSFS */ 323#endif /* CONFIG_SYSFS */
325 324
326/*
327 * Set up the most two significant bit to notify mce log that this thermal
328 * event type.
329 * This is a temp solution. May be changed in the future with mce log
330 * infrasture.
331 */
332#define CORE_THROTTLED (0)
333#define CORE_POWER_LIMIT ((__u64)1 << 62)
334#define PACKAGE_THROTTLED ((__u64)2 << 62)
335#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
336
337static void notify_thresholds(__u64 msr_val) 325static void notify_thresholds(__u64 msr_val)
338{ 326{
339 /* check whether the interrupt handler is defined; 327 /* check whether the interrupt handler is defined;
@@ -363,27 +351,23 @@ static void intel_thermal_interrupt(void)
363 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 351 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
364 THERMAL_THROTTLING_EVENT, 352 THERMAL_THROTTLING_EVENT,
365 CORE_LEVEL) != 0) 353 CORE_LEVEL) != 0)
366 mce_log_therm_throt_event(CORE_THROTTLED | msr_val); 354 mce_log_therm_throt_event(msr_val);
367 355
368 if (this_cpu_has(X86_FEATURE_PLN)) 356 if (this_cpu_has(X86_FEATURE_PLN))
369 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 357 therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
370 POWER_LIMIT_EVENT, 358 POWER_LIMIT_EVENT,
371 CORE_LEVEL) != 0) 359 CORE_LEVEL);
372 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
373 360
374 if (this_cpu_has(X86_FEATURE_PTS)) { 361 if (this_cpu_has(X86_FEATURE_PTS)) {
375 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 362 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
376 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 363 therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
377 THERMAL_THROTTLING_EVENT, 364 THERMAL_THROTTLING_EVENT,
378 PACKAGE_LEVEL) != 0) 365 PACKAGE_LEVEL);
379 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
380 if (this_cpu_has(X86_FEATURE_PLN)) 366 if (this_cpu_has(X86_FEATURE_PLN))
381 if (therm_throt_process(msr_val & 367 therm_throt_process(msr_val &
382 PACKAGE_THERM_STATUS_POWER_LIMIT, 368 PACKAGE_THERM_STATUS_POWER_LIMIT,
383 POWER_LIMIT_EVENT, 369 POWER_LIMIT_EVENT,
384 PACKAGE_LEVEL) != 0) 370 PACKAGE_LEVEL);
385 mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
386 | msr_val);
387 } 371 }
388} 372}
389 373
@@ -397,8 +381,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
397 381
398asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) 382asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
399{ 383{
400 exit_idle();
401 irq_enter(); 384 irq_enter();
385 exit_idle();
402 inc_irq_stat(irq_thermal_count); 386 inc_irq_stat(irq_thermal_count);
403 smp_thermal_vector(); 387 smp_thermal_vector();
404 irq_exit(); 388 irq_exit();
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index d746df2909c9..aa578cadb940 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19 19
20asmlinkage void smp_threshold_interrupt(void) 20asmlinkage void smp_threshold_interrupt(void)
21{ 21{
22 exit_idle();
23 irq_enter(); 22 irq_enter();
23 exit_idle();
24 inc_irq_stat(irq_threshold_count); 24 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector(); 25 mce_threshold_vector();
26 irq_exit(); 26 irq_exit();
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index a71efcdbb092..97b26356e9ee 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -547,6 +547,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
547 547
548 if (tmp != mask_lo) { 548 if (tmp != mask_lo) {
549 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); 549 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
550 add_taint(TAINT_FIRMWARE_WORKAROUND);
550 mask_lo = tmp; 551 mask_lo = tmp;
551 } 552 }
552 } 553 }
@@ -693,6 +694,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
693 694
694 /* Disable MTRRs, and set the default type to uncached */ 695 /* Disable MTRRs, and set the default type to uncached */
695 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); 696 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
697 wbinvd();
696} 698}
697 699
698static void post_set(void) __releases(set_atomicity_lock) 700static void post_set(void) __releases(set_atomicity_lock)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 640891014b2a..5adce1040b11 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -312,12 +312,8 @@ int x86_setup_perfctr(struct perf_event *event)
312 return -EOPNOTSUPP; 312 return -EOPNOTSUPP;
313 } 313 }
314 314
315 /*
316 * Do not allow config1 (extended registers) to propagate,
317 * there's no sane user-space generalization yet:
318 */
319 if (attr->type == PERF_TYPE_RAW) 315 if (attr->type == PERF_TYPE_RAW)
320 return 0; 316 return x86_pmu_extra_regs(event->attr.config, event);
321 317
322 if (attr->type == PERF_TYPE_HW_CACHE) 318 if (attr->type == PERF_TYPE_HW_CACHE)
323 return set_ext_hw_attr(hwc, event); 319 return set_ext_hw_attr(hwc, event);
@@ -488,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
488 return event->pmu == &pmu; 484 return event->pmu == &pmu;
489} 485}
490 486
487/*
488 * Event scheduler state:
489 *
490 * Assign events iterating over all events and counters, beginning
491 * with events with least weights first. Keep the current iterator
492 * state in struct sched_state.
493 */
494struct sched_state {
495 int weight;
496 int event; /* event index */
497 int counter; /* counter index */
498 int unassigned; /* number of events to be assigned left */
499 unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
500};
501
502/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
503#define SCHED_STATES_MAX 2
504
505struct perf_sched {
506 int max_weight;
507 int max_events;
508 struct event_constraint **constraints;
509 struct sched_state state;
510 int saved_states;
511 struct sched_state saved[SCHED_STATES_MAX];
512};
513
514/*
515 * Initialize interator that runs through all events and counters.
516 */
517static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
518 int num, int wmin, int wmax)
519{
520 int idx;
521
522 memset(sched, 0, sizeof(*sched));
523 sched->max_events = num;
524 sched->max_weight = wmax;
525 sched->constraints = c;
526
527 for (idx = 0; idx < num; idx++) {
528 if (c[idx]->weight == wmin)
529 break;
530 }
531
532 sched->state.event = idx; /* start with min weight */
533 sched->state.weight = wmin;
534 sched->state.unassigned = num;
535}
536
537static void perf_sched_save_state(struct perf_sched *sched)
538{
539 if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
540 return;
541
542 sched->saved[sched->saved_states] = sched->state;
543 sched->saved_states++;
544}
545
546static bool perf_sched_restore_state(struct perf_sched *sched)
547{
548 if (!sched->saved_states)
549 return false;
550
551 sched->saved_states--;
552 sched->state = sched->saved[sched->saved_states];
553
554 /* continue with next counter: */
555 clear_bit(sched->state.counter++, sched->state.used);
556
557 return true;
558}
559
560/*
561 * Select a counter for the current event to schedule. Return true on
562 * success.
563 */
564static bool __perf_sched_find_counter(struct perf_sched *sched)
565{
566 struct event_constraint *c;
567 int idx;
568
569 if (!sched->state.unassigned)
570 return false;
571
572 if (sched->state.event >= sched->max_events)
573 return false;
574
575 c = sched->constraints[sched->state.event];
576
577 /* Prefer fixed purpose counters */
578 if (x86_pmu.num_counters_fixed) {
579 idx = X86_PMC_IDX_FIXED;
580 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
581 if (!__test_and_set_bit(idx, sched->state.used))
582 goto done;
583 }
584 }
585 /* Grab the first unused counter starting with idx */
586 idx = sched->state.counter;
587 for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
588 if (!__test_and_set_bit(idx, sched->state.used))
589 goto done;
590 }
591
592 return false;
593
594done:
595 sched->state.counter = idx;
596
597 if (c->overlap)
598 perf_sched_save_state(sched);
599
600 return true;
601}
602
603static bool perf_sched_find_counter(struct perf_sched *sched)
604{
605 while (!__perf_sched_find_counter(sched)) {
606 if (!perf_sched_restore_state(sched))
607 return false;
608 }
609
610 return true;
611}
612
613/*
614 * Go through all unassigned events and find the next one to schedule.
615 * Take events with the least weight first. Return true on success.
616 */
617static bool perf_sched_next_event(struct perf_sched *sched)
618{
619 struct event_constraint *c;
620
621 if (!sched->state.unassigned || !--sched->state.unassigned)
622 return false;
623
624 do {
625 /* next event */
626 sched->state.event++;
627 if (sched->state.event >= sched->max_events) {
628 /* next weight */
629 sched->state.event = 0;
630 sched->state.weight++;
631 if (sched->state.weight > sched->max_weight)
632 return false;
633 }
634 c = sched->constraints[sched->state.event];
635 } while (c->weight != sched->state.weight);
636
637 sched->state.counter = 0; /* start with first counter */
638
639 return true;
640}
641
642/*
643 * Assign a counter for each event.
644 */
645static int perf_assign_events(struct event_constraint **constraints, int n,
646 int wmin, int wmax, int *assign)
647{
648 struct perf_sched sched;
649
650 perf_sched_init(&sched, constraints, n, wmin, wmax);
651
652 do {
653 if (!perf_sched_find_counter(&sched))
654 break; /* failed */
655 if (assign)
656 assign[sched.state.event] = sched.state.counter;
657 } while (perf_sched_next_event(&sched));
658
659 return sched.state.unassigned;
660}
661
491int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 662int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
492{ 663{
493 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 664 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
494 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 665 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
495 int i, j, w, wmax, num = 0; 666 int i, wmin, wmax, num = 0;
496 struct hw_perf_event *hwc; 667 struct hw_perf_event *hwc;
497 668
498 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 669 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
499 670
500 for (i = 0; i < n; i++) { 671 for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
501 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 672 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
502 constraints[i] = c; 673 constraints[i] = c;
674 wmin = min(wmin, c->weight);
675 wmax = max(wmax, c->weight);
503 } 676 }
504 677
505 /* 678 /*
@@ -525,59 +698,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
525 if (assign) 698 if (assign)
526 assign[i] = hwc->idx; 699 assign[i] = hwc->idx;
527 } 700 }
528 if (i == n)
529 goto done;
530
531 /*
532 * begin slow path
533 */
534
535 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
536
537 /*
538 * weight = number of possible counters
539 *
540 * 1 = most constrained, only works on one counter
541 * wmax = least constrained, works on any counter
542 *
543 * assign events to counters starting with most
544 * constrained events.
545 */
546 wmax = x86_pmu.num_counters;
547
548 /*
549 * when fixed event counters are present,
550 * wmax is incremented by 1 to account
551 * for one more choice
552 */
553 if (x86_pmu.num_counters_fixed)
554 wmax++;
555
556 for (w = 1, num = n; num && w <= wmax; w++) {
557 /* for each event */
558 for (i = 0; num && i < n; i++) {
559 c = constraints[i];
560 hwc = &cpuc->event_list[i]->hw;
561
562 if (c->weight != w)
563 continue;
564 701
565 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { 702 /* slow path */
566 if (!test_bit(j, used_mask)) 703 if (i != n)
567 break; 704 num = perf_assign_events(constraints, n, wmin, wmax, assign);
568 }
569
570 if (j == X86_PMC_IDX_MAX)
571 break;
572 705
573 __set_bit(j, used_mask);
574
575 if (assign)
576 assign[i] = j;
577 num--;
578 }
579 }
580done:
581 /* 706 /*
582 * scheduling failed or is just a simulation, 707 * scheduling failed or is just a simulation,
583 * free resources if necessary 708 * free resources if necessary
@@ -588,7 +713,7 @@ done:
588 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); 713 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
589 } 714 }
590 } 715 }
591 return num ? -ENOSPC : 0; 716 return num ? -EINVAL : 0;
592} 717}
593 718
594/* 719/*
@@ -607,7 +732,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
607 732
608 if (is_x86_event(leader)) { 733 if (is_x86_event(leader)) {
609 if (n >= max_count) 734 if (n >= max_count)
610 return -ENOSPC; 735 return -EINVAL;
611 cpuc->event_list[n] = leader; 736 cpuc->event_list[n] = leader;
612 n++; 737 n++;
613 } 738 }
@@ -620,7 +745,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
620 continue; 745 continue;
621 746
622 if (n >= max_count) 747 if (n >= max_count)
623 return -ENOSPC; 748 return -EINVAL;
624 749
625 cpuc->event_list[n] = event; 750 cpuc->event_list[n] = event;
626 n++; 751 n++;
@@ -1123,6 +1248,7 @@ static void __init pmu_check_apic(void)
1123 1248
1124static int __init init_hw_perf_events(void) 1249static int __init init_hw_perf_events(void)
1125{ 1250{
1251 struct x86_pmu_quirk *quirk;
1126 struct event_constraint *c; 1252 struct event_constraint *c;
1127 int err; 1253 int err;
1128 1254
@@ -1151,8 +1277,8 @@ static int __init init_hw_perf_events(void)
1151 1277
1152 pr_cont("%s PMU driver.\n", x86_pmu.name); 1278 pr_cont("%s PMU driver.\n", x86_pmu.name);
1153 1279
1154 if (x86_pmu.quirks) 1280 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1155 x86_pmu.quirks(); 1281 quirk->func();
1156 1282
1157 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1283 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1158 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1284 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1175,12 +1301,18 @@ static int __init init_hw_perf_events(void)
1175 1301
1176 unconstrained = (struct event_constraint) 1302 unconstrained = (struct event_constraint)
1177 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1303 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1178 0, x86_pmu.num_counters); 1304 0, x86_pmu.num_counters, 0);
1179 1305
1180 if (x86_pmu.event_constraints) { 1306 if (x86_pmu.event_constraints) {
1307 /*
1308 * event on fixed counter2 (REF_CYCLES) only works on this
1309 * counter, so do not extend mask to generic counters
1310 */
1181 for_each_event_constraint(c, x86_pmu.event_constraints) { 1311 for_each_event_constraint(c, x86_pmu.event_constraints) {
1182 if (c->cmask != X86_RAW_EVENT_MASK) 1312 if (c->cmask != X86_RAW_EVENT_MASK
1313 || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
1183 continue; 1314 continue;
1315 }
1184 1316
1185 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; 1317 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1186 c->weight += x86_pmu.num_counters; 1318 c->weight += x86_pmu.num_counters;
@@ -1316,7 +1448,7 @@ static int validate_event(struct perf_event *event)
1316 c = x86_pmu.get_event_constraints(fake_cpuc, event); 1448 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1317 1449
1318 if (!c || !c->weight) 1450 if (!c || !c->weight)
1319 ret = -ENOSPC; 1451 ret = -EINVAL;
1320 1452
1321 if (x86_pmu.put_event_constraints) 1453 if (x86_pmu.put_event_constraints)
1322 x86_pmu.put_event_constraints(fake_cpuc, event); 1454 x86_pmu.put_event_constraints(fake_cpuc, event);
@@ -1341,7 +1473,7 @@ static int validate_group(struct perf_event *event)
1341{ 1473{
1342 struct perf_event *leader = event->group_leader; 1474 struct perf_event *leader = event->group_leader;
1343 struct cpu_hw_events *fake_cpuc; 1475 struct cpu_hw_events *fake_cpuc;
1344 int ret = -ENOSPC, n; 1476 int ret = -EINVAL, n;
1345 1477
1346 fake_cpuc = allocate_fake_cpuc(); 1478 fake_cpuc = allocate_fake_cpuc();
1347 if (IS_ERR(fake_cpuc)) 1479 if (IS_ERR(fake_cpuc))
@@ -1570,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
1570 1702
1571 return misc; 1703 return misc;
1572} 1704}
1705
1706void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
1707{
1708 cap->version = x86_pmu.version;
1709 cap->num_counters_gp = x86_pmu.num_counters;
1710 cap->num_counters_fixed = x86_pmu.num_counters_fixed;
1711 cap->bit_width_gp = x86_pmu.cntval_bits;
1712 cap->bit_width_fixed = x86_pmu.cntval_bits;
1713 cap->events_mask = (unsigned int)x86_pmu.events_maskl;
1714 cap->events_mask_len = x86_pmu.events_mask_len;
1715}
1716EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d40ac4b..8944062f46e2 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -45,6 +45,7 @@ struct event_constraint {
45 u64 code; 45 u64 code;
46 u64 cmask; 46 u64 cmask;
47 int weight; 47 int weight;
48 int overlap;
48}; 49};
49 50
50struct amd_nb { 51struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
151 void *kfree_on_online; 152 void *kfree_on_online;
152}; 153};
153 154
154#define __EVENT_CONSTRAINT(c, n, m, w) {\ 155#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
155 { .idxmsk64 = (n) }, \ 156 { .idxmsk64 = (n) }, \
156 .code = (c), \ 157 .code = (c), \
157 .cmask = (m), \ 158 .cmask = (m), \
158 .weight = (w), \ 159 .weight = (w), \
160 .overlap = (o), \
159} 161}
160 162
161#define EVENT_CONSTRAINT(c, n, m) \ 163#define EVENT_CONSTRAINT(c, n, m) \
162 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 164 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
165
166/*
167 * The overlap flag marks event constraints with overlapping counter
168 * masks. This is the case if the counter mask of such an event is not
169 * a subset of any other counter mask of a constraint with an equal or
170 * higher weight, e.g.:
171 *
172 * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
173 * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
174 * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
175 *
176 * The event scheduler may not select the correct counter in the first
177 * cycle because it needs to know which subsequent events will be
178 * scheduled. It may fail to schedule the events then. So we set the
179 * overlap flag for such constraints to give the scheduler a hint which
180 * events to select for counter rescheduling.
181 *
182 * Care must be taken as the rescheduling algorithm is O(n!) which
183 * will increase scheduling cycles for an over-commited system
184 * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
185 * and its counter masks must be kept at a minimum.
186 */
187#define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
188 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
163 189
164/* 190/*
165 * Constraint on the Event code. 191 * Constraint on the Event code.
@@ -235,6 +261,11 @@ union perf_capabilities {
235 u64 capabilities; 261 u64 capabilities;
236}; 262};
237 263
264struct x86_pmu_quirk {
265 struct x86_pmu_quirk *next;
266 void (*func)(void);
267};
268
238/* 269/*
239 * struct x86_pmu - generic x86 pmu 270 * struct x86_pmu - generic x86 pmu
240 */ 271 */
@@ -259,6 +290,11 @@ struct x86_pmu {
259 int num_counters_fixed; 290 int num_counters_fixed;
260 int cntval_bits; 291 int cntval_bits;
261 u64 cntval_mask; 292 u64 cntval_mask;
293 union {
294 unsigned long events_maskl;
295 unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
296 };
297 int events_mask_len;
262 int apic; 298 int apic;
263 u64 max_period; 299 u64 max_period;
264 struct event_constraint * 300 struct event_constraint *
@@ -268,7 +304,7 @@ struct x86_pmu {
268 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 304 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
269 struct perf_event *event); 305 struct perf_event *event);
270 struct event_constraint *event_constraints; 306 struct event_constraint *event_constraints;
271 void (*quirks)(void); 307 struct x86_pmu_quirk *quirks;
272 int perfctr_second_write; 308 int perfctr_second_write;
273 309
274 int (*cpu_prepare)(int cpu); 310 int (*cpu_prepare)(int cpu);
@@ -309,6 +345,15 @@ struct x86_pmu {
309 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 345 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
310}; 346};
311 347
348#define x86_add_quirk(func_) \
349do { \
350 static struct x86_pmu_quirk __quirk __initdata = { \
351 .func = func_, \
352 }; \
353 __quirk.next = x86_pmu.quirks; \
354 x86_pmu.quirks = &__quirk; \
355} while (0)
356
312#define ERF_NO_HT_SHARING 1 357#define ERF_NO_HT_SHARING 1
313#define ERF_HAS_RSP_1 2 358#define ERF_HAS_RSP_1 2
314 359
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index aeefd45697a2..0397b23be8e9 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = {
492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 492static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 493static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 494static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); 495static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 496static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 497static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
498 498
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index ab6343d21825..3b8a2d30d14e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -199,8 +199,7 @@ static int force_ibs_eilvt_setup(void)
199 goto out; 199 goto out;
200 } 200 }
201 201
202 pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); 202 pr_info("IBS: LVT offset %d assigned\n", offset);
203 pr_err(FW_BUG "workaround enabled for IBS LVT offset\n");
204 203
205 return 0; 204 return 0;
206out: 205out:
@@ -265,19 +264,23 @@ perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *h
265static __init int amd_ibs_init(void) 264static __init int amd_ibs_init(void)
266{ 265{
267 u32 caps; 266 u32 caps;
268 int ret; 267 int ret = -EINVAL;
269 268
270 caps = __get_ibs_caps(); 269 caps = __get_ibs_caps();
271 if (!caps) 270 if (!caps)
272 return -ENODEV; /* ibs not supported by the cpu */ 271 return -ENODEV; /* ibs not supported by the cpu */
273 272
274 if (!ibs_eilvt_valid()) { 273 /*
275 ret = force_ibs_eilvt_setup(); 274 * Force LVT offset assignment for family 10h: The offsets are
276 if (ret) { 275 * not assigned by the BIOS for this family, so the OS is
277 pr_err("Failed to setup IBS, %d\n", ret); 276 * responsible for doing it. If the OS assignment fails, fall
278 return ret; 277 * back to BIOS settings and try to setup this.
279 } 278 */
280 } 279 if (boot_cpu_data.x86 == 0x10)
280 force_ibs_eilvt_setup();
281
282 if (!ibs_eilvt_valid())
283 goto out;
281 284
282 get_online_cpus(); 285 get_online_cpus();
283 ibs_caps = caps; 286 ibs_caps = caps;
@@ -287,7 +290,11 @@ static __init int amd_ibs_init(void)
287 smp_call_function(setup_APIC_ibs, NULL, 1); 290 smp_call_function(setup_APIC_ibs, NULL, 1);
288 put_online_cpus(); 291 put_online_cpus();
289 292
290 return perf_event_ibs_init(); 293 ret = perf_event_ibs_init();
294out:
295 if (ret)
296 pr_err("Failed to setup IBS, %d\n", ret);
297 return ret;
291} 298}
292 299
293/* Since we need the pci subsystem to init ibs we can't do this earlier: */ 300/* Since we need the pci subsystem to init ibs we can't do this earlier: */
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 2be5ebe99872..3bd37bdf1b8e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
31 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
31}; 32};
32 33
33static struct event_constraint intel_core_event_constraints[] __read_mostly = 34static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly =
45{ 46{
46 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 47 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
47 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 48 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
48 /* 49 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
49 * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
50 * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
51 * ratio between these counters.
52 */
53 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
54 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ 50 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
55 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ 51 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
56 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ 52 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
68{ 64{
69 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 65 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
70 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 66 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 67 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
72 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ 68 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
73 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ 69 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
74 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ 70 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly
90{ 86{
91 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
92 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 88 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
93 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 89 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
94 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ 90 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
95 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ 91 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
96 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ 92 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
102{ 98{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 99 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 100 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 101 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
106 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 102 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
107 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 103 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
108 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
125{ 121{
126 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 122 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
127 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 123 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
128 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 124 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
129 EVENT_CONSTRAINT_END 125 EVENT_CONSTRAINT_END
130}; 126};
131 127
@@ -1169,7 +1165,7 @@ again:
1169 */ 1165 */
1170 c = &unconstrained; 1166 c = &unconstrained;
1171 } else if (intel_try_alt_er(event, orig_idx)) { 1167 } else if (intel_try_alt_er(event, orig_idx)) {
1172 raw_spin_unlock(&era->lock); 1168 raw_spin_unlock_irqrestore(&era->lock, flags);
1173 goto again; 1169 goto again;
1174 } 1170 }
1175 raw_spin_unlock_irqrestore(&era->lock, flags); 1171 raw_spin_unlock_irqrestore(&era->lock, flags);
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1519 .guest_get_msrs = intel_guest_get_msrs, 1515 .guest_get_msrs = intel_guest_get_msrs,
1520}; 1516};
1521 1517
1522static void intel_clovertown_quirks(void) 1518static __init void intel_clovertown_quirk(void)
1523{ 1519{
1524 /* 1520 /*
1525 * PEBS is unreliable due to: 1521 * PEBS is unreliable due to:
@@ -1545,12 +1541,60 @@ static void intel_clovertown_quirks(void)
1545 x86_pmu.pebs_constraints = NULL; 1541 x86_pmu.pebs_constraints = NULL;
1546} 1542}
1547 1543
1544static __init void intel_sandybridge_quirk(void)
1545{
1546 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1547 x86_pmu.pebs = 0;
1548 x86_pmu.pebs_constraints = NULL;
1549}
1550
1551static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
1552 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
1553 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
1554 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
1555 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
1556 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
1557 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
1558 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
1559};
1560
1561static __init void intel_arch_events_quirk(void)
1562{
1563 int bit;
1564
1565 /* disable event that reported as not presend by cpuid */
1566 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
1567 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
1568 printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
1569 intel_arch_events_map[bit].name);
1570 }
1571}
1572
1573static __init void intel_nehalem_quirk(void)
1574{
1575 union cpuid10_ebx ebx;
1576
1577 ebx.full = x86_pmu.events_maskl;
1578 if (ebx.split.no_branch_misses_retired) {
1579 /*
1580 * Erratum AAJ80 detected, we work it around by using
1581 * the BR_MISP_EXEC.ANY event. This will over-count
1582 * branch-misses, but it's still much better than the
1583 * architectural event which is often completely bogus:
1584 */
1585 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1586 ebx.split.no_branch_misses_retired = 0;
1587 x86_pmu.events_maskl = ebx.full;
1588 printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
1589 }
1590}
1591
1548__init int intel_pmu_init(void) 1592__init int intel_pmu_init(void)
1549{ 1593{
1550 union cpuid10_edx edx; 1594 union cpuid10_edx edx;
1551 union cpuid10_eax eax; 1595 union cpuid10_eax eax;
1596 union cpuid10_ebx ebx;
1552 unsigned int unused; 1597 unsigned int unused;
1553 unsigned int ebx;
1554 int version; 1598 int version;
1555 1599
1556 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 1600 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -1567,8 +1611,8 @@ __init int intel_pmu_init(void)
1567 * Check whether the Architectural PerfMon supports 1611 * Check whether the Architectural PerfMon supports
1568 * Branch Misses Retired hw_event or not. 1612 * Branch Misses Retired hw_event or not.
1569 */ 1613 */
1570 cpuid(10, &eax.full, &ebx, &unused, &edx.full); 1614 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
1571 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) 1615 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
1572 return -ENODEV; 1616 return -ENODEV;
1573 1617
1574 version = eax.split.version_id; 1618 version = eax.split.version_id;
@@ -1582,6 +1626,9 @@ __init int intel_pmu_init(void)
1582 x86_pmu.cntval_bits = eax.split.bit_width; 1626 x86_pmu.cntval_bits = eax.split.bit_width;
1583 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; 1627 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
1584 1628
1629 x86_pmu.events_maskl = ebx.full;
1630 x86_pmu.events_mask_len = eax.split.mask_length;
1631
1585 /* 1632 /*
1586 * Quirk: v2 perfmon does not report fixed-purpose events, so 1633 * Quirk: v2 perfmon does not report fixed-purpose events, so
1587 * assume at least 3 events: 1634 * assume at least 3 events:
@@ -1601,6 +1648,8 @@ __init int intel_pmu_init(void)
1601 1648
1602 intel_ds_init(); 1649 intel_ds_init();
1603 1650
1651 x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
1652
1604 /* 1653 /*
1605 * Install the hw-cache-events table: 1654 * Install the hw-cache-events table:
1606 */ 1655 */
@@ -1610,7 +1659,7 @@ __init int intel_pmu_init(void)
1610 break; 1659 break;
1611 1660
1612 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 1661 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1613 x86_pmu.quirks = intel_clovertown_quirks; 1662 x86_add_quirk(intel_clovertown_quirk);
1614 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 1663 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1615 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 1664 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1616 case 29: /* six-core 45 nm xeon "Dunnington" */ 1665 case 29: /* six-core 45 nm xeon "Dunnington" */
@@ -1644,17 +1693,8 @@ __init int intel_pmu_init(void)
1644 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ 1693 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1645 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; 1694 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1646 1695
1647 if (ebx & 0x40) { 1696 x86_add_quirk(intel_nehalem_quirk);
1648 /*
1649 * Erratum AAJ80 detected, we work it around by using
1650 * the BR_MISP_EXEC.ANY event. This will over-count
1651 * branch-misses, but it's still much better than the
1652 * architectural event which is often completely bogus:
1653 */
1654 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1655 1697
1656 pr_cont("erratum AAJ80 worked around, ");
1657 }
1658 pr_cont("Nehalem events, "); 1698 pr_cont("Nehalem events, ");
1659 break; 1699 break;
1660 1700
@@ -1694,6 +1734,7 @@ __init int intel_pmu_init(void)
1694 break; 1734 break;
1695 1735
1696 case 42: /* SandyBridge */ 1736 case 42: /* SandyBridge */
1737 x86_add_quirk(intel_sandybridge_quirk);
1697 case 45: /* SandyBridge, "Romely-EP" */ 1738 case 45: /* SandyBridge, "Romely-EP" */
1698 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1739 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1699 sizeof(hw_cache_event_ids)); 1740 sizeof(hw_cache_event_ids));
@@ -1730,5 +1771,6 @@ __init int intel_pmu_init(void)
1730 break; 1771 break;
1731 } 1772 }
1732 } 1773 }
1774
1733 return 0; 1775 return 0;
1734} 1776}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index c0d238f49db8..73da6b64f5b7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -493,6 +493,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
493 unsigned long from = cpuc->lbr_entries[0].from; 493 unsigned long from = cpuc->lbr_entries[0].from;
494 unsigned long old_to, to = cpuc->lbr_entries[0].to; 494 unsigned long old_to, to = cpuc->lbr_entries[0].to;
495 unsigned long ip = regs->ip; 495 unsigned long ip = regs->ip;
496 int is_64bit = 0;
496 497
497 /* 498 /*
498 * We don't need to fixup if the PEBS assist is fault like 499 * We don't need to fixup if the PEBS assist is fault like
@@ -544,7 +545,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
544 } else 545 } else
545 kaddr = (void *)to; 546 kaddr = (void *)to;
546 547
547 kernel_insn_init(&insn, kaddr); 548#ifdef CONFIG_X86_64
549 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
550#endif
551 insn_init(&insn, kaddr, is_64bit);
548 insn_get_length(&insn); 552 insn_get_length(&insn);
549 to += insn.length; 553 to += insn.length;
550 } while (to < ip); 554 } while (to < ip);
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 492bf1358a7c..ef484d9d0a25 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1268,7 +1268,7 @@ reserve:
1268 } 1268 }
1269 1269
1270done: 1270done:
1271 return num ? -ENOSPC : 0; 1271 return num ? -EINVAL : 0;
1272} 1272}
1273 1273
1274static __initconst const struct x86_pmu p4_pmu = { 1274static __initconst const struct x86_pmu p4_pmu = {
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 5abbea297e0c..7b3fe56b1c21 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = {
16 "100mhzsteps", 16 "100mhzsteps",
17 "hwpstate", 17 "hwpstate",
18 "", /* tsc invariant mapped to constant_tsc */ 18 "", /* tsc invariant mapped to constant_tsc */
19 /* nothing */ 19 "cpb", /* core performance boost */
20 "eff_freq_ro", /* Readonly aperf/mperf */
20}; 21};
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 14b23140e81f..8022c6681485 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -64,12 +64,10 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
64static int show_cpuinfo(struct seq_file *m, void *v) 64static int show_cpuinfo(struct seq_file *m, void *v)
65{ 65{
66 struct cpuinfo_x86 *c = v; 66 struct cpuinfo_x86 *c = v;
67 unsigned int cpu = 0; 67 unsigned int cpu;
68 int i; 68 int i;
69 69
70#ifdef CONFIG_SMP
71 cpu = c->cpu_index; 70 cpu = c->cpu_index;
72#endif
73 seq_printf(m, "processor\t: %u\n" 71 seq_printf(m, "processor\t: %u\n"
74 "vendor_id\t: %s\n" 72 "vendor_id\t: %s\n"
75 "cpu family\t: %d\n" 73 "cpu family\t: %d\n"
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 212a6a42527c..a524353d93f2 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -177,7 +177,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
177 .notifier_call = cpuid_class_cpu_callback, 177 .notifier_call = cpuid_class_cpu_callback,
178}; 178};
179 179
180static char *cpuid_devnode(struct device *dev, mode_t *mode) 180static char *cpuid_devnode(struct device *dev, umode_t *mode)
181{ 181{
182 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); 182 return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
183} 183}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 3b97a80ce329..c99f9ed013d5 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -116,16 +116,16 @@ void show_registers(struct pt_regs *regs)
116 for (i = 0; i < code_len; i++, ip++) { 116 for (i = 0; i < code_len; i++, ip++) {
117 if (ip < (u8 *)PAGE_OFFSET || 117 if (ip < (u8 *)PAGE_OFFSET ||
118 probe_kernel_address(ip, c)) { 118 probe_kernel_address(ip, c)) {
119 printk(" Bad EIP value."); 119 printk(KERN_CONT " Bad EIP value.");
120 break; 120 break;
121 } 121 }
122 if (ip == (u8 *)regs->ip) 122 if (ip == (u8 *)regs->ip)
123 printk("<%02x> ", c); 123 printk(KERN_CONT "<%02x> ", c);
124 else 124 else
125 printk("%02x ", c); 125 printk(KERN_CONT "%02x ", c);
126 } 126 }
127 } 127 }
128 printk("\n"); 128 printk(KERN_CONT "\n");
129} 129}
130 130
131int is_valid_bugaddr(unsigned long ip) 131int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 19853ad8afc5..6d728d9284bd 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -284,16 +284,16 @@ void show_registers(struct pt_regs *regs)
284 for (i = 0; i < code_len; i++, ip++) { 284 for (i = 0; i < code_len; i++, ip++) {
285 if (ip < (u8 *)PAGE_OFFSET || 285 if (ip < (u8 *)PAGE_OFFSET ||
286 probe_kernel_address(ip, c)) { 286 probe_kernel_address(ip, c)) {
287 printk(" Bad RIP value."); 287 printk(KERN_CONT " Bad RIP value.");
288 break; 288 break;
289 } 289 }
290 if (ip == (u8 *)regs->ip) 290 if (ip == (u8 *)regs->ip)
291 printk("<%02x> ", c); 291 printk(KERN_CONT "<%02x> ", c);
292 else 292 else
293 printk("%02x ", c); 293 printk(KERN_CONT "%02x ", c);
294 } 294 }
295 } 295 }
296 printk("\n"); 296 printk(KERN_CONT "\n");
297} 297}
298 298
299int is_valid_bugaddr(unsigned long ip) 299int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 303a0e48f076..8071e2f3d6eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -738,35 +738,17 @@ core_initcall(e820_mark_nvs_memory);
738/* 738/*
739 * pre allocated 4k and reserved it in memblock and e820_saved 739 * pre allocated 4k and reserved it in memblock and e820_saved
740 */ 740 */
741u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) 741u64 __init early_reserve_e820(u64 size, u64 align)
742{ 742{
743 u64 size = 0;
744 u64 addr; 743 u64 addr;
745 u64 start;
746 744
747 for (start = startt; ; start += size) { 745 addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
748 start = memblock_x86_find_in_range_size(start, &size, align); 746 if (addr) {
749 if (start == MEMBLOCK_ERROR) 747 e820_update_range_saved(addr, size, E820_RAM, E820_RESERVED);
750 return 0; 748 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
751 if (size >= sizet) 749 update_e820_saved();
752 break;
753 } 750 }
754 751
755#ifdef CONFIG_X86_32
756 if (start >= MAXMEM)
757 return 0;
758 if (start + size > MAXMEM)
759 size = MAXMEM - start;
760#endif
761
762 addr = round_down(start + size - sizet, align);
763 if (addr < start)
764 return 0;
765 memblock_x86_reserve_range(addr, addr + sizet, "new next");
766 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
767 printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
768 update_e820_saved();
769
770 return addr; 752 return addr;
771} 753}
772 754
@@ -1090,7 +1072,7 @@ void __init memblock_x86_fill(void)
1090 * We are safe to enable resizing, beause memblock_x86_fill() 1072 * We are safe to enable resizing, beause memblock_x86_fill()
1091 * is rather later for x86 1073 * is rather later for x86
1092 */ 1074 */
1093 memblock_can_resize = 1; 1075 memblock_allow_resize();
1094 1076
1095 for (i = 0; i < e820.nr_map; i++) { 1077 for (i = 0; i < e820.nr_map; i++) {
1096 struct e820entry *ei = &e820.map[i]; 1078 struct e820entry *ei = &e820.map[i];
@@ -1105,22 +1087,36 @@ void __init memblock_x86_fill(void)
1105 memblock_add(ei->addr, ei->size); 1087 memblock_add(ei->addr, ei->size);
1106 } 1088 }
1107 1089
1108 memblock_analyze();
1109 memblock_dump_all(); 1090 memblock_dump_all();
1110} 1091}
1111 1092
1112void __init memblock_find_dma_reserve(void) 1093void __init memblock_find_dma_reserve(void)
1113{ 1094{
1114#ifdef CONFIG_X86_64 1095#ifdef CONFIG_X86_64
1115 u64 free_size_pfn; 1096 u64 nr_pages = 0, nr_free_pages = 0;
1116 u64 mem_size_pfn; 1097 unsigned long start_pfn, end_pfn;
1098 phys_addr_t start, end;
1099 int i;
1100 u64 u;
1101
1117 /* 1102 /*
1118 * need to find out used area below MAX_DMA_PFN 1103 * need to find out used area below MAX_DMA_PFN
1119 * need to use memblock to get free size in [0, MAX_DMA_PFN] 1104 * need to use memblock to get free size in [0, MAX_DMA_PFN]
1120 * at first, and assume boot_mem will not take below MAX_DMA_PFN 1105 * at first, and assume boot_mem will not take below MAX_DMA_PFN
1121 */ 1106 */
1122 mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1107 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
1123 free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT; 1108 start_pfn = min_t(unsigned long, start_pfn, MAX_DMA_PFN);
1124 set_dma_reserve(mem_size_pfn - free_size_pfn); 1109 end_pfn = min_t(unsigned long, end_pfn, MAX_DMA_PFN);
1110 nr_pages += end_pfn - start_pfn;
1111 }
1112
1113 for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) {
1114 start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN);
1115 end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN);
1116 if (start_pfn < end_pfn)
1117 nr_free_pages += end_pfn - start_pfn;
1118 }
1119
1120 set_dma_reserve(nr_pages - nr_free_pages);
1125#endif 1121#endif
1126} 1122}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f3f6f5344001..22d0e21b4dd7 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and
625 movl %esp, %eax 625 movl %esp, %eax
626 jne work_notifysig_v86 # returning to kernel-space or 626 jne work_notifysig_v86 # returning to kernel-space or
627 # vm86-space 627 # vm86-space
628 TRACE_IRQS_ON
629 ENABLE_INTERRUPTS(CLBR_NONE)
628 xorl %edx, %edx 630 xorl %edx, %edx
629 call do_notify_resume 631 call do_notify_resume
630 jmp resume_userspace_sig 632 jmp resume_userspace_sig
@@ -638,6 +640,8 @@ work_notifysig_v86:
638#else 640#else
639 movl %esp, %eax 641 movl %esp, %eax
640#endif 642#endif
643 TRACE_IRQS_ON
644 ENABLE_INTERRUPTS(CLBR_NONE)
641 xorl %edx, %edx 645 xorl %edx, %edx
642 call do_notify_resume 646 call do_notify_resume
643 jmp resume_userspace_sig 647 jmp resume_userspace_sig
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index faf8d5e74b0b..a20e1cb9dc87 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64)
221 /*CFI_REL_OFFSET ss,0*/ 221 /*CFI_REL_OFFSET ss,0*/
222 pushq_cfi %rax /* rsp */ 222 pushq_cfi %rax /* rsp */
223 CFI_REL_OFFSET rsp,0 223 CFI_REL_OFFSET rsp,0
224 pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ 224 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
225 /*CFI_REL_OFFSET rflags,0*/ 225 /*CFI_REL_OFFSET rflags,0*/
226 pushq_cfi $__KERNEL_CS /* cs */ 226 pushq_cfi $__KERNEL_CS /* cs */
227 /*CFI_REL_OFFSET cs,0*/ 227 /*CFI_REL_OFFSET cs,0*/
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork)
411 RESTORE_REST 411 RESTORE_REST
412 412
413 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? 413 testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
414 je int_ret_from_sys_call 414 jz retint_restore_args
415 415
416 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET 416 testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
417 jnz int_ret_from_sys_call 417 jnz int_ret_from_sys_call
@@ -465,7 +465,7 @@ ENTRY(system_call)
465 * after the swapgs, so that it can do the swapgs 465 * after the swapgs, so that it can do the swapgs
466 * for the guest and jump here on syscall. 466 * for the guest and jump here on syscall.
467 */ 467 */
468ENTRY(system_call_after_swapgs) 468GLOBAL(system_call_after_swapgs)
469 469
470 movq %rsp,PER_CPU_VAR(old_rsp) 470 movq %rsp,PER_CPU_VAR(old_rsp)
471 movq PER_CPU_VAR(kernel_stack),%rsp 471 movq PER_CPU_VAR(kernel_stack),%rsp
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs)
478 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 478 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
479 movq %rcx,RIP-ARGOFFSET(%rsp) 479 movq %rcx,RIP-ARGOFFSET(%rsp)
480 CFI_REL_OFFSET rip,RIP-ARGOFFSET 480 CFI_REL_OFFSET rip,RIP-ARGOFFSET
481 GET_THREAD_INFO(%rcx) 481 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
482 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
483 jnz tracesys 482 jnz tracesys
484system_call_fastpath: 483system_call_fastpath:
485 cmpq $__NR_syscall_max,%rax 484 cmpq $__NR_syscall_max,%rax
@@ -496,10 +495,9 @@ ret_from_sys_call:
496 /* edi: flagmask */ 495 /* edi: flagmask */
497sysret_check: 496sysret_check:
498 LOCKDEP_SYS_EXIT 497 LOCKDEP_SYS_EXIT
499 GET_THREAD_INFO(%rcx)
500 DISABLE_INTERRUPTS(CLBR_NONE) 498 DISABLE_INTERRUPTS(CLBR_NONE)
501 TRACE_IRQS_OFF 499 TRACE_IRQS_OFF
502 movl TI_flags(%rcx),%edx 500 movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx
503 andl %edi,%edx 501 andl %edi,%edx
504 jnz sysret_careful 502 jnz sysret_careful
505 CFI_REMEMBER_STATE 503 CFI_REMEMBER_STATE
@@ -583,7 +581,7 @@ sysret_audit:
583 /* Do syscall tracing */ 581 /* Do syscall tracing */
584tracesys: 582tracesys:
585#ifdef CONFIG_AUDITSYSCALL 583#ifdef CONFIG_AUDITSYSCALL
586 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) 584 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
587 jz auditsys 585 jz auditsys
588#endif 586#endif
589 SAVE_REST 587 SAVE_REST
@@ -612,8 +610,6 @@ tracesys:
612GLOBAL(int_ret_from_sys_call) 610GLOBAL(int_ret_from_sys_call)
613 DISABLE_INTERRUPTS(CLBR_NONE) 611 DISABLE_INTERRUPTS(CLBR_NONE)
614 TRACE_IRQS_OFF 612 TRACE_IRQS_OFF
615 testl $3,CS-ARGOFFSET(%rsp)
616 je retint_restore_args
617 movl $_TIF_ALLWORK_MASK,%edi 613 movl $_TIF_ALLWORK_MASK,%edi
618 /* edi: mask to check */ 614 /* edi: mask to check */
619GLOBAL(int_with_check) 615GLOBAL(int_with_check)
@@ -953,6 +949,7 @@ END(common_interrupt)
953ENTRY(\sym) 949ENTRY(\sym)
954 INTR_FRAME 950 INTR_FRAME
955 pushq_cfi $~(\num) 951 pushq_cfi $~(\num)
952.Lcommon_\sym:
956 interrupt \do_sym 953 interrupt \do_sym
957 jmp ret_from_intr 954 jmp ret_from_intr
958 CFI_ENDPROC 955 CFI_ENDPROC
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
976 x86_platform_ipi smp_x86_platform_ipi 973 x86_platform_ipi smp_x86_platform_ipi
977 974
978#ifdef CONFIG_SMP 975#ifdef CONFIG_SMP
979.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ 976 ALIGN
977 INTR_FRAME
978.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
980 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 979 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
981.if NUM_INVALIDATE_TLB_VECTORS > \idx 980.if NUM_INVALIDATE_TLB_VECTORS > \idx
982apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ 981ENTRY(invalidate_interrupt\idx)
983 invalidate_interrupt\idx smp_invalidate_interrupt 982 pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
983 jmp .Lcommon_invalidate_interrupt0
984 CFI_ADJUST_CFA_OFFSET -8
985END(invalidate_interrupt\idx)
984.endif 986.endif
985.endr 987.endr
988 CFI_ENDPROC
989apicinterrupt INVALIDATE_TLB_VECTOR_START, \
990 invalidate_interrupt0, smp_invalidate_interrupt
986#endif 991#endif
987 992
988apicinterrupt THRESHOLD_APIC_VECTOR \ 993apicinterrupt THRESHOLD_APIC_VECTOR \
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index af0699ba48cf..48d9d4ea1020 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -52,5 +52,5 @@ void __init reserve_ebda_region(void)
52 lowmem = 0x9f000; 52 lowmem = 0x9f000;
53 53
54 /* reserve all memory between lowmem and the 1MB mark */ 54 /* reserve all memory between lowmem and the 1MB mark */
55 memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved"); 55 memblock_reserve(lowmem, 0x100000 - lowmem);
56} 56}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3bb08509a7a1..51ff18616d50 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -31,9 +31,8 @@ static void __init i386_default_early_setup(void)
31 31
32void __init i386_start_kernel(void) 32void __init i386_start_kernel(void)
33{ 33{
34 memblock_init(); 34 memblock_reserve(__pa_symbol(&_text),
35 35 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
36 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
37 36
38#ifdef CONFIG_BLK_DEV_INITRD 37#ifdef CONFIG_BLK_DEV_INITRD
39 /* Reserve INITRD */ 38 /* Reserve INITRD */
@@ -42,7 +41,7 @@ void __init i386_start_kernel(void)
42 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 41 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
43 u64 ramdisk_size = boot_params.hdr.ramdisk_size; 42 u64 ramdisk_size = boot_params.hdr.ramdisk_size;
44 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 43 u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
45 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 44 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
46 } 45 }
47#endif 46#endif
48 47
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5655c2272adb..3a3b779f41d3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -98,9 +98,8 @@ void __init x86_64_start_reservations(char *real_mode_data)
98{ 98{
99 copy_bootdata(__va(real_mode_data)); 99 copy_bootdata(__va(real_mode_data));
100 100
101 memblock_init(); 101 memblock_reserve(__pa_symbol(&_text),
102 102 __pa_symbol(&__bss_stop) - __pa_symbol(&_text));
103 memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 103
105#ifdef CONFIG_BLK_DEV_INITRD 104#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 105 /* Reserve INITRD */
@@ -109,7 +108,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
109 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; 108 unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
110 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; 109 unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
111 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); 110 unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
112 memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK"); 111 memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
113 } 112 }
114#endif 113#endif
115 114
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b946a9eac7d9..ad0de0c2714e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -2,7 +2,6 @@
2#include <linux/clockchips.h> 2#include <linux/clockchips.h>
3#include <linux/interrupt.h> 3#include <linux/interrupt.h>
4#include <linux/export.h> 4#include <linux/export.h>
5#include <linux/sysdev.h>
6#include <linux/delay.h> 5#include <linux/delay.h>
7#include <linux/errno.h> 6#include <linux/errno.h>
8#include <linux/i8253.h> 7#include <linux/i8253.h>
@@ -32,8 +31,6 @@
32#define HPET_MIN_CYCLES 128 31#define HPET_MIN_CYCLES 128
33#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) 32#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
34 33
35#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
36
37/* 34/*
38 * HPET address is set in acpi/boot.c, when an ACPI entry exists 35 * HPET address is set in acpi/boot.c, when an ACPI entry exists
39 */ 36 */
@@ -55,6 +52,11 @@ struct hpet_dev {
55 char name[10]; 52 char name[10];
56}; 53};
57 54
55inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
56{
57 return container_of(evtdev, struct hpet_dev, evt);
58}
59
58inline unsigned int hpet_readl(unsigned int a) 60inline unsigned int hpet_readl(unsigned int a)
59{ 61{
60 return readl(hpet_virt_address + a); 62 return readl(hpet_virt_address + a);
@@ -1049,6 +1051,14 @@ int hpet_rtc_timer_init(void)
1049} 1051}
1050EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); 1052EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
1051 1053
1054static void hpet_disable_rtc_channel(void)
1055{
1056 unsigned long cfg;
1057 cfg = hpet_readl(HPET_T1_CFG);
1058 cfg &= ~HPET_TN_ENABLE;
1059 hpet_writel(cfg, HPET_T1_CFG);
1060}
1061
1052/* 1062/*
1053 * The functions below are called from rtc driver. 1063 * The functions below are called from rtc driver.
1054 * Return 0 if HPET is not being used. 1064 * Return 0 if HPET is not being used.
@@ -1060,6 +1070,9 @@ int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
1060 return 0; 1070 return 0;
1061 1071
1062 hpet_rtc_flags &= ~bit_mask; 1072 hpet_rtc_flags &= ~bit_mask;
1073 if (unlikely(!hpet_rtc_flags))
1074 hpet_disable_rtc_channel();
1075
1063 return 1; 1076 return 1;
1064} 1077}
1065EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); 1078EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit);
@@ -1125,15 +1138,11 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq);
1125 1138
1126static void hpet_rtc_timer_reinit(void) 1139static void hpet_rtc_timer_reinit(void)
1127{ 1140{
1128 unsigned int cfg, delta; 1141 unsigned int delta;
1129 int lost_ints = -1; 1142 int lost_ints = -1;
1130 1143
1131 if (unlikely(!hpet_rtc_flags)) { 1144 if (unlikely(!hpet_rtc_flags))
1132 cfg = hpet_readl(HPET_T1_CFG); 1145 hpet_disable_rtc_channel();
1133 cfg &= ~HPET_TN_ENABLE;
1134 hpet_writel(cfg, HPET_T1_CFG);
1135 return;
1136 }
1137 1146
1138 if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) 1147 if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
1139 delta = hpet_default_delta; 1148 delta = hpet_default_delta;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 429e0c92924e..7943e0c21bde 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -74,6 +74,10 @@ int arch_show_interrupts(struct seq_file *p, int prec)
74 for_each_online_cpu(j) 74 for_each_online_cpu(j)
75 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); 75 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
76 seq_printf(p, " IRQ work interrupts\n"); 76 seq_printf(p, " IRQ work interrupts\n");
77 seq_printf(p, "%*s: ", prec, "RTR");
78 for_each_online_cpu(j)
79 seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
80 seq_printf(p, " APIC ICR read retries\n");
77#endif 81#endif
78 if (x86_platform_ipi_callback) { 82 if (x86_platform_ipi_callback) {
79 seq_printf(p, "%*s: ", prec, "PLT"); 83 seq_printf(p, "%*s: ", prec, "PLT");
@@ -136,6 +140,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
136 sum += irq_stats(cpu)->irq_spurious_count; 140 sum += irq_stats(cpu)->irq_spurious_count;
137 sum += irq_stats(cpu)->apic_perf_irqs; 141 sum += irq_stats(cpu)->apic_perf_irqs;
138 sum += irq_stats(cpu)->apic_irq_work_irqs; 142 sum += irq_stats(cpu)->apic_irq_work_irqs;
143 sum += irq_stats(cpu)->icr_read_retry_count;
139#endif 144#endif
140 if (x86_platform_ipi_callback) 145 if (x86_platform_ipi_callback)
141 sum += irq_stats(cpu)->x86_platform_ipis; 146 sum += irq_stats(cpu)->x86_platform_ipis;
@@ -181,8 +186,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
181 unsigned vector = ~regs->orig_ax; 186 unsigned vector = ~regs->orig_ax;
182 unsigned irq; 187 unsigned irq;
183 188
184 exit_idle();
185 irq_enter(); 189 irq_enter();
190 exit_idle();
186 191
187 irq = __this_cpu_read(vector_irq[vector]); 192 irq = __this_cpu_read(vector_irq[vector]);
188 193
@@ -209,10 +214,10 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
209 214
210 ack_APIC_irq(); 215 ack_APIC_irq();
211 216
212 exit_idle();
213
214 irq_enter(); 217 irq_enter();
215 218
219 exit_idle();
220
216 inc_irq_stat(x86_platform_ipis); 221 inc_irq_stat(x86_platform_ipis);
217 222
218 if (x86_platform_ipi_callback) 223 if (x86_platform_ipi_callback)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index acf8fbf8fbda..69bca468c47a 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -38,6 +38,9 @@ static inline void stack_overflow_check(struct pt_regs *regs)
38#ifdef CONFIG_DEBUG_STACKOVERFLOW 38#ifdef CONFIG_DEBUG_STACKOVERFLOW
39 u64 curbase = (u64)task_stack_page(current); 39 u64 curbase = (u64)task_stack_page(current);
40 40
41 if (user_mode_vm(regs))
42 return;
43
41 WARN_ONCE(regs->sp >= curbase && 44 WARN_ONCE(regs->sp >= curbase &&
42 regs->sp <= curbase + THREAD_SIZE && 45 regs->sp <= curbase + THREAD_SIZE &&
43 regs->sp < curbase + sizeof(struct thread_info) + 46 regs->sp < curbase + sizeof(struct thread_info) +
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index b3300e6bacef..313fb5cddbce 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -9,7 +9,7 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include <linux/init.h> 10#include <linux/init.h>
11#include <linux/kernel_stat.h> 11#include <linux/kernel_stat.h>
12#include <linux/sysdev.h> 12#include <linux/device.h>
13#include <linux/bitops.h> 13#include <linux/bitops.h>
14#include <linux/acpi.h> 14#include <linux/acpi.h>
15#include <linux/io.h> 15#include <linux/io.h>
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ea9d5f2f13ef..2889b3d43882 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -50,7 +50,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
50 put_online_cpus(); 50 put_online_cpus();
51} 51}
52 52
53void arch_jump_label_transform_static(struct jump_entry *entry, 53__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
54 enum jump_label_type type) 54 enum jump_label_type type)
55{ 55{
56 __jump_label_transform(entry, type, text_poke_early); 56 __jump_label_transform(entry, type, text_poke_early);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index c1a0188e29ae..44842d756b29 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -74,9 +74,10 @@ static cycle_t kvm_clock_read(void)
74 struct pvclock_vcpu_time_info *src; 74 struct pvclock_vcpu_time_info *src;
75 cycle_t ret; 75 cycle_t ret;
76 76
77 src = &get_cpu_var(hv_clock); 77 preempt_disable_notrace();
78 src = &__get_cpu_var(hv_clock);
78 ret = pvclock_clocksource_read(src); 79 ret = pvclock_clocksource_read(src);
79 put_cpu_var(hv_clock); 80 preempt_enable_notrace();
80 return ret; 81 return ret;
81} 82}
82 83
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index d494799aafcd..fe86493f3ed1 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -1,14 +1,18 @@
1/* 1/*
2 * AMD CPU Microcode Update Driver for Linux 2 * AMD CPU Microcode Update Driver for Linux
3 * Copyright (C) 2008 Advanced Micro Devices Inc. 3 * Copyright (C) 2008-2011 Advanced Micro Devices Inc.
4 * 4 *
5 * Author: Peter Oruba <peter.oruba@amd.com> 5 * Author: Peter Oruba <peter.oruba@amd.com>
6 * 6 *
7 * Based on work by: 7 * Based on work by:
8 * Tigran Aivazian <tigran@aivazian.fsnet.co.uk> 8 * Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
9 * 9 *
10 * This driver allows to upgrade microcode on AMD 10 * Maintainers:
11 * family 0x10 and 0x11 processors. 11 * Andreas Herrmann <andreas.herrmann3@amd.com>
12 * Borislav Petkov <borislav.petkov@amd.com>
13 *
14 * This driver allows to upgrade microcode on F10h AMD
15 * CPUs and later.
12 * 16 *
13 * Licensed under the terms of the GNU General Public 17 * Licensed under the terms of the GNU General Public
14 * License version 2. See file COPYING for details. 18 * License version 2. See file COPYING for details.
@@ -71,6 +75,9 @@ struct microcode_amd {
71 75
72static struct equiv_cpu_entry *equiv_cpu_table; 76static struct equiv_cpu_entry *equiv_cpu_table;
73 77
78/* page-sized ucode patch buffer */
79void *patch;
80
74static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 81static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
75{ 82{
76 struct cpuinfo_x86 *c = &cpu_data(cpu); 83 struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -86,27 +93,76 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
86 return 0; 93 return 0;
87} 94}
88 95
89static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, 96static unsigned int verify_ucode_size(int cpu, u32 patch_size,
90 int rev) 97 unsigned int size)
91{ 98{
92 unsigned int current_cpu_id; 99 struct cpuinfo_x86 *c = &cpu_data(cpu);
93 u16 equiv_cpu_id = 0; 100 u32 max_size;
94 unsigned int i = 0; 101
102#define F1XH_MPB_MAX_SIZE 2048
103#define F14H_MPB_MAX_SIZE 1824
104#define F15H_MPB_MAX_SIZE 4096
105
106 switch (c->x86) {
107 case 0x14:
108 max_size = F14H_MPB_MAX_SIZE;
109 break;
110 case 0x15:
111 max_size = F15H_MPB_MAX_SIZE;
112 break;
113 default:
114 max_size = F1XH_MPB_MAX_SIZE;
115 break;
116 }
117
118 if (patch_size > min_t(u32, size, max_size)) {
119 pr_err("patch size mismatch\n");
120 return 0;
121 }
122
123 return patch_size;
124}
125
126static u16 find_equiv_id(void)
127{
128 unsigned int current_cpu_id, i = 0;
95 129
96 BUG_ON(equiv_cpu_table == NULL); 130 BUG_ON(equiv_cpu_table == NULL);
131
97 current_cpu_id = cpuid_eax(0x00000001); 132 current_cpu_id = cpuid_eax(0x00000001);
98 133
99 while (equiv_cpu_table[i].installed_cpu != 0) { 134 while (equiv_cpu_table[i].installed_cpu != 0) {
100 if (current_cpu_id == equiv_cpu_table[i].installed_cpu) { 135 if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
101 equiv_cpu_id = equiv_cpu_table[i].equiv_cpu; 136 return equiv_cpu_table[i].equiv_cpu;
102 break; 137
103 }
104 i++; 138 i++;
105 } 139 }
140 return 0;
141}
106 142
143/*
144 * we signal a good patch is found by returning its size > 0
145 */
146static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
147 unsigned int leftover_size, int rev,
148 unsigned int *current_size)
149{
150 struct microcode_header_amd *mc_hdr;
151 unsigned int actual_size;
152 u16 equiv_cpu_id;
153
154 /* size of the current patch we're staring at */
155 *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE;
156
157 equiv_cpu_id = find_equiv_id();
107 if (!equiv_cpu_id) 158 if (!equiv_cpu_id)
108 return 0; 159 return 0;
109 160
161 /*
162 * let's look at the patch header itself now
163 */
164 mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
165
110 if (mc_hdr->processor_rev_id != equiv_cpu_id) 166 if (mc_hdr->processor_rev_id != equiv_cpu_id)
111 return 0; 167 return 0;
112 168
@@ -120,7 +176,20 @@ static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
120 if (mc_hdr->patch_id <= rev) 176 if (mc_hdr->patch_id <= rev)
121 return 0; 177 return 0;
122 178
123 return 1; 179 /*
180 * now that the header looks sane, verify its size
181 */
182 actual_size = verify_ucode_size(cpu, *current_size, leftover_size);
183 if (!actual_size)
184 return 0;
185
186 /* clear the patch buffer */
187 memset(patch, 0, PAGE_SIZE);
188
189 /* all looks ok, get the binary patch */
190 get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
191
192 return actual_size;
124} 193}
125 194
126static int apply_microcode_amd(int cpu) 195static int apply_microcode_amd(int cpu)
@@ -155,63 +224,6 @@ static int apply_microcode_amd(int cpu)
155 return 0; 224 return 0;
156} 225}
157 226
158static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
159{
160 struct cpuinfo_x86 *c = &cpu_data(cpu);
161 u32 max_size, actual_size;
162
163#define F1XH_MPB_MAX_SIZE 2048
164#define F14H_MPB_MAX_SIZE 1824
165#define F15H_MPB_MAX_SIZE 4096
166
167 switch (c->x86) {
168 case 0x14:
169 max_size = F14H_MPB_MAX_SIZE;
170 break;
171 case 0x15:
172 max_size = F15H_MPB_MAX_SIZE;
173 break;
174 default:
175 max_size = F1XH_MPB_MAX_SIZE;
176 break;
177 }
178
179 actual_size = *(u32 *)(buf + 4);
180
181 if (actual_size + SECTION_HDR_SIZE > size || actual_size > max_size) {
182 pr_err("section size mismatch\n");
183 return 0;
184 }
185
186 return actual_size;
187}
188
189static struct microcode_header_amd *
190get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
191{
192 struct microcode_header_amd *mc = NULL;
193 unsigned int actual_size = 0;
194
195 if (*(u32 *)buf != UCODE_UCODE_TYPE) {
196 pr_err("invalid type field in container file section header\n");
197 goto out;
198 }
199
200 actual_size = verify_ucode_size(cpu, buf, size);
201 if (!actual_size)
202 goto out;
203
204 mc = vzalloc(actual_size);
205 if (!mc)
206 goto out;
207
208 get_ucode_data(mc, buf + SECTION_HDR_SIZE, actual_size);
209 *mc_size = actual_size + SECTION_HDR_SIZE;
210
211out:
212 return mc;
213}
214
215static int install_equiv_cpu_table(const u8 *buf) 227static int install_equiv_cpu_table(const u8 *buf)
216{ 228{
217 unsigned int *ibuf = (unsigned int *)buf; 229 unsigned int *ibuf = (unsigned int *)buf;
@@ -247,36 +259,38 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
247{ 259{
248 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 260 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
249 struct microcode_header_amd *mc_hdr = NULL; 261 struct microcode_header_amd *mc_hdr = NULL;
250 unsigned int mc_size, leftover; 262 unsigned int mc_size, leftover, current_size = 0;
251 int offset; 263 int offset;
252 const u8 *ucode_ptr = data; 264 const u8 *ucode_ptr = data;
253 void *new_mc = NULL; 265 void *new_mc = NULL;
254 unsigned int new_rev = uci->cpu_sig.rev; 266 unsigned int new_rev = uci->cpu_sig.rev;
255 enum ucode_state state = UCODE_OK; 267 enum ucode_state state = UCODE_ERROR;
256 268
257 offset = install_equiv_cpu_table(ucode_ptr); 269 offset = install_equiv_cpu_table(ucode_ptr);
258 if (offset < 0) { 270 if (offset < 0) {
259 pr_err("failed to create equivalent cpu table\n"); 271 pr_err("failed to create equivalent cpu table\n");
260 return UCODE_ERROR; 272 goto out;
261 } 273 }
262
263 ucode_ptr += offset; 274 ucode_ptr += offset;
264 leftover = size - offset; 275 leftover = size - offset;
265 276
266 while (leftover) { 277 if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
267 mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size); 278 pr_err("invalid type field in container file section header\n");
268 if (!mc_hdr) 279 goto free_table;
269 break; 280 }
270 281
271 if (get_matching_microcode(cpu, mc_hdr, new_rev)) { 282 while (leftover) {
272 vfree(new_mc); 283 mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
284 new_rev, &current_size);
285 if (mc_size) {
286 mc_hdr = patch;
287 new_mc = patch;
273 new_rev = mc_hdr->patch_id; 288 new_rev = mc_hdr->patch_id;
274 new_mc = mc_hdr; 289 goto out_ok;
275 } else 290 }
276 vfree(mc_hdr);
277 291
278 ucode_ptr += mc_size; 292 ucode_ptr += current_size;
279 leftover -= mc_size; 293 leftover -= current_size;
280 } 294 }
281 295
282 if (!new_mc) { 296 if (!new_mc) {
@@ -284,19 +298,16 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
284 goto free_table; 298 goto free_table;
285 } 299 }
286 300
287 if (!leftover) { 301out_ok:
288 vfree(uci->mc); 302 uci->mc = new_mc;
289 uci->mc = new_mc; 303 state = UCODE_OK;
290 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", 304 pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
291 cpu, uci->cpu_sig.rev, new_rev); 305 cpu, uci->cpu_sig.rev, new_rev);
292 } else {
293 vfree(new_mc);
294 state = UCODE_ERROR;
295 }
296 306
297free_table: 307free_table:
298 free_equiv_cpu_table(); 308 free_equiv_cpu_table();
299 309
310out:
300 return state; 311 return state;
301} 312}
302 313
@@ -337,7 +348,6 @@ static void microcode_fini_cpu_amd(int cpu)
337{ 348{
338 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 349 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
339 350
340 vfree(uci->mc);
341 uci->mc = NULL; 351 uci->mc = NULL;
342} 352}
343 353
@@ -351,5 +361,14 @@ static struct microcode_ops microcode_amd_ops = {
351 361
352struct microcode_ops * __init init_amd_microcode(void) 362struct microcode_ops * __init init_amd_microcode(void)
353{ 363{
364 patch = (void *)get_zeroed_page(GFP_KERNEL);
365 if (!patch)
366 return NULL;
367
354 return &microcode_amd_ops; 368 return &microcode_amd_ops;
355} 369}
370
371void __exit exit_amd_microcode(void)
372{
373 free_page((unsigned long)patch);
374}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index f2d2a664e797..fda91c307104 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -256,7 +256,7 @@ static int __init microcode_dev_init(void)
256 return 0; 256 return 0;
257} 257}
258 258
259static void microcode_dev_exit(void) 259static void __exit microcode_dev_exit(void)
260{ 260{
261 misc_deregister(&microcode_dev); 261 misc_deregister(&microcode_dev);
262} 262}
@@ -292,8 +292,8 @@ static int reload_for_cpu(int cpu)
292 return err; 292 return err;
293} 293}
294 294
295static ssize_t reload_store(struct sys_device *dev, 295static ssize_t reload_store(struct device *dev,
296 struct sysdev_attribute *attr, 296 struct device_attribute *attr,
297 const char *buf, size_t size) 297 const char *buf, size_t size)
298{ 298{
299 unsigned long val; 299 unsigned long val;
@@ -318,30 +318,30 @@ static ssize_t reload_store(struct sys_device *dev,
318 return ret; 318 return ret;
319} 319}
320 320
321static ssize_t version_show(struct sys_device *dev, 321static ssize_t version_show(struct device *dev,
322 struct sysdev_attribute *attr, char *buf) 322 struct device_attribute *attr, char *buf)
323{ 323{
324 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; 324 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
325 325
326 return sprintf(buf, "0x%x\n", uci->cpu_sig.rev); 326 return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
327} 327}
328 328
329static ssize_t pf_show(struct sys_device *dev, 329static ssize_t pf_show(struct device *dev,
330 struct sysdev_attribute *attr, char *buf) 330 struct device_attribute *attr, char *buf)
331{ 331{
332 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; 332 struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
333 333
334 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf); 334 return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
335} 335}
336 336
337static SYSDEV_ATTR(reload, 0200, NULL, reload_store); 337static DEVICE_ATTR(reload, 0200, NULL, reload_store);
338static SYSDEV_ATTR(version, 0400, version_show, NULL); 338static DEVICE_ATTR(version, 0400, version_show, NULL);
339static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); 339static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
340 340
341static struct attribute *mc_default_attrs[] = { 341static struct attribute *mc_default_attrs[] = {
342 &attr_reload.attr, 342 &dev_attr_reload.attr,
343 &attr_version.attr, 343 &dev_attr_version.attr,
344 &attr_processor_flags.attr, 344 &dev_attr_processor_flags.attr,
345 NULL 345 NULL
346}; 346};
347 347
@@ -405,43 +405,45 @@ static enum ucode_state microcode_update_cpu(int cpu)
405 return ustate; 405 return ustate;
406} 406}
407 407
408static int mc_sysdev_add(struct sys_device *sys_dev) 408static int mc_device_add(struct device *dev, struct subsys_interface *sif)
409{ 409{
410 int err, cpu = sys_dev->id; 410 int err, cpu = dev->id;
411 411
412 if (!cpu_online(cpu)) 412 if (!cpu_online(cpu))
413 return 0; 413 return 0;
414 414
415 pr_debug("CPU%d added\n", cpu); 415 pr_debug("CPU%d added\n", cpu);
416 416
417 err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); 417 err = sysfs_create_group(&dev->kobj, &mc_attr_group);
418 if (err) 418 if (err)
419 return err; 419 return err;
420 420
421 if (microcode_init_cpu(cpu) == UCODE_ERROR) { 421 if (microcode_init_cpu(cpu) == UCODE_ERROR) {
422 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 422 sysfs_remove_group(&dev->kobj, &mc_attr_group);
423 return -EINVAL; 423 return -EINVAL;
424 } 424 }
425 425
426 return err; 426 return err;
427} 427}
428 428
429static int mc_sysdev_remove(struct sys_device *sys_dev) 429static int mc_device_remove(struct device *dev, struct subsys_interface *sif)
430{ 430{
431 int cpu = sys_dev->id; 431 int cpu = dev->id;
432 432
433 if (!cpu_online(cpu)) 433 if (!cpu_online(cpu))
434 return 0; 434 return 0;
435 435
436 pr_debug("CPU%d removed\n", cpu); 436 pr_debug("CPU%d removed\n", cpu);
437 microcode_fini_cpu(cpu); 437 microcode_fini_cpu(cpu);
438 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 438 sysfs_remove_group(&dev->kobj, &mc_attr_group);
439 return 0; 439 return 0;
440} 440}
441 441
442static struct sysdev_driver mc_sysdev_driver = { 442static struct subsys_interface mc_cpu_interface = {
443 .add = mc_sysdev_add, 443 .name = "microcode",
444 .remove = mc_sysdev_remove, 444 .subsys = &cpu_subsys,
445 .add_dev = mc_device_add,
446 .remove_dev = mc_device_remove,
445}; 447};
446 448
447/** 449/**
@@ -464,9 +466,9 @@ static __cpuinit int
464mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) 466mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
465{ 467{
466 unsigned int cpu = (unsigned long)hcpu; 468 unsigned int cpu = (unsigned long)hcpu;
467 struct sys_device *sys_dev; 469 struct device *dev;
468 470
469 sys_dev = get_cpu_sysdev(cpu); 471 dev = get_cpu_device(cpu);
470 switch (action) { 472 switch (action) {
471 case CPU_ONLINE: 473 case CPU_ONLINE:
472 case CPU_ONLINE_FROZEN: 474 case CPU_ONLINE_FROZEN:
@@ -474,13 +476,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
474 case CPU_DOWN_FAILED: 476 case CPU_DOWN_FAILED:
475 case CPU_DOWN_FAILED_FROZEN: 477 case CPU_DOWN_FAILED_FROZEN:
476 pr_debug("CPU%d added\n", cpu); 478 pr_debug("CPU%d added\n", cpu);
477 if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) 479 if (sysfs_create_group(&dev->kobj, &mc_attr_group))
478 pr_err("Failed to create group for CPU%d\n", cpu); 480 pr_err("Failed to create group for CPU%d\n", cpu);
479 break; 481 break;
480 case CPU_DOWN_PREPARE: 482 case CPU_DOWN_PREPARE:
481 case CPU_DOWN_PREPARE_FROZEN: 483 case CPU_DOWN_PREPARE_FROZEN:
482 /* Suspend is in progress, only remove the interface */ 484 /* Suspend is in progress, only remove the interface */
483 sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); 485 sysfs_remove_group(&dev->kobj, &mc_attr_group);
484 pr_debug("CPU%d removed\n", cpu); 486 pr_debug("CPU%d removed\n", cpu);
485 break; 487 break;
486 488
@@ -519,27 +521,23 @@ static int __init microcode_init(void)
519 521
520 microcode_pdev = platform_device_register_simple("microcode", -1, 522 microcode_pdev = platform_device_register_simple("microcode", -1,
521 NULL, 0); 523 NULL, 0);
522 if (IS_ERR(microcode_pdev)) { 524 if (IS_ERR(microcode_pdev))
523 microcode_dev_exit();
524 return PTR_ERR(microcode_pdev); 525 return PTR_ERR(microcode_pdev);
525 }
526 526
527 get_online_cpus(); 527 get_online_cpus();
528 mutex_lock(&microcode_mutex); 528 mutex_lock(&microcode_mutex);
529 529
530 error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); 530 error = subsys_interface_register(&mc_cpu_interface);
531 531
532 mutex_unlock(&microcode_mutex); 532 mutex_unlock(&microcode_mutex);
533 put_online_cpus(); 533 put_online_cpus();
534 534
535 if (error) { 535 if (error)
536 platform_device_unregister(microcode_pdev); 536 goto out_pdev;
537 return error;
538 }
539 537
540 error = microcode_dev_init(); 538 error = microcode_dev_init();
541 if (error) 539 if (error)
542 return error; 540 goto out_driver;
543 541
544 register_syscore_ops(&mc_syscore_ops); 542 register_syscore_ops(&mc_syscore_ops);
545 register_hotcpu_notifier(&mc_cpu_notifier); 543 register_hotcpu_notifier(&mc_cpu_notifier);
@@ -548,11 +546,27 @@ static int __init microcode_init(void)
548 " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); 546 " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
549 547
550 return 0; 548 return 0;
549
550out_driver:
551 get_online_cpus();
552 mutex_lock(&microcode_mutex);
553
554 subsys_interface_unregister(&mc_cpu_interface);
555
556 mutex_unlock(&microcode_mutex);
557 put_online_cpus();
558
559out_pdev:
560 platform_device_unregister(microcode_pdev);
561 return error;
562
551} 563}
552module_init(microcode_init); 564module_init(microcode_init);
553 565
554static void __exit microcode_exit(void) 566static void __exit microcode_exit(void)
555{ 567{
568 struct cpuinfo_x86 *c = &cpu_data(0);
569
556 microcode_dev_exit(); 570 microcode_dev_exit();
557 571
558 unregister_hotcpu_notifier(&mc_cpu_notifier); 572 unregister_hotcpu_notifier(&mc_cpu_notifier);
@@ -561,7 +575,7 @@ static void __exit microcode_exit(void)
561 get_online_cpus(); 575 get_online_cpus();
562 mutex_lock(&microcode_mutex); 576 mutex_lock(&microcode_mutex);
563 577
564 sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); 578 subsys_interface_unregister(&mc_cpu_interface);
565 579
566 mutex_unlock(&microcode_mutex); 580 mutex_unlock(&microcode_mutex);
567 put_online_cpus(); 581 put_online_cpus();
@@ -570,6 +584,9 @@ static void __exit microcode_exit(void)
570 584
571 microcode_ops = NULL; 585 microcode_ops = NULL;
572 586
587 if (c->x86_vendor == X86_VENDOR_AMD)
588 exit_amd_microcode();
589
573 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); 590 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
574} 591}
575module_exit(microcode_exit); 592module_exit(microcode_exit);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9103b89c145a..ca470e4c92dc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
95 } 95 }
96#endif 96#endif
97 97
98 set_bit(m->busid, mp_bus_not_pci);
98 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { 99 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
99 set_bit(m->busid, mp_bus_not_pci);
100#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 100#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
101 mp_bus_id_to_type[m->busid] = MP_BUS_ISA; 101 mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
102#endif 102#endif
@@ -564,9 +564,7 @@ void __init default_get_smp_config(unsigned int early)
564 564
565static void __init smp_reserve_memory(struct mpf_intel *mpf) 565static void __init smp_reserve_memory(struct mpf_intel *mpf)
566{ 566{
567 unsigned long size = get_mpc_size(mpf->physptr); 567 memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr));
568
569 memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
570} 568}
571 569
572static int __init smp_scan_config(unsigned long base, unsigned long length) 570static int __init smp_scan_config(unsigned long base, unsigned long length)
@@ -595,7 +593,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
595 mpf, (u64)virt_to_phys(mpf)); 593 mpf, (u64)virt_to_phys(mpf));
596 594
597 mem = virt_to_phys(mpf); 595 mem = virt_to_phys(mpf);
598 memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf"); 596 memblock_reserve(mem, sizeof(*mpf));
599 if (mpf->physptr) 597 if (mpf->physptr)
600 smp_reserve_memory(mpf); 598 smp_reserve_memory(mpf);
601 599
@@ -836,10 +834,8 @@ early_param("alloc_mptable", parse_alloc_mptable_opt);
836 834
837void __init early_reserve_e820_mpc_new(void) 835void __init early_reserve_e820_mpc_new(void)
838{ 836{
839 if (enable_update_mptable && alloc_mptable) { 837 if (enable_update_mptable && alloc_mptable)
840 u64 startt = 0; 838 mpc_new_phys = early_reserve_e820(mpc_new_length, 4);
841 mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
842 }
843} 839}
844 840
845static int __init update_mp_table(void) 841static int __init update_mp_table(void)
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 12fcbe2c143e..96356762a51d 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -236,7 +236,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
236 .notifier_call = msr_class_cpu_callback, 236 .notifier_call = msr_class_cpu_callback,
237}; 237};
238 238
239static char *msr_devnode(struct device *dev, mode_t *mode) 239static char *msr_devnode(struct device *dev, umode_t *mode)
240{ 240{
241 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); 241 return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
242} 242}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index b9c8628974af..e88f37b58ddd 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -29,6 +29,7 @@
29#include <asm/traps.h> 29#include <asm/traps.h>
30#include <asm/mach_traps.h> 30#include <asm/mach_traps.h>
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/x86_init.h>
32 33
33#define NMI_MAX_NAMELEN 16 34#define NMI_MAX_NAMELEN 16
34struct nmiaction { 35struct nmiaction {
@@ -348,7 +349,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
348 349
349 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ 350 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
350 raw_spin_lock(&nmi_reason_lock); 351 raw_spin_lock(&nmi_reason_lock);
351 reason = get_nmi_reason(); 352 reason = x86_platform.get_nmi_reason();
352 353
353 if (reason & NMI_REASON_MASK) { 354 if (reason & NMI_REASON_MASK) {
354 if (reason & NMI_REASON_SERR) 355 if (reason & NMI_REASON_SERR)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b9b3b1a51643..15763af7bfe3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
293 regs.orig_ax = -1; 293 regs.orig_ax = -1;
294 regs.ip = (unsigned long) kernel_thread_helper; 294 regs.ip = (unsigned long) kernel_thread_helper;
295 regs.cs = __KERNEL_CS | get_kernel_rpl(); 295 regs.cs = __KERNEL_CS | get_kernel_rpl();
296 regs.flags = X86_EFLAGS_IF | 0x2; 296 regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
297 297
298 /* Ok, create the new process.. */ 298 /* Ok, create the new process.. */
299 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL); 299 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
@@ -403,6 +403,14 @@ void default_idle(void)
403EXPORT_SYMBOL(default_idle); 403EXPORT_SYMBOL(default_idle);
404#endif 404#endif
405 405
406bool set_pm_idle_to_default(void)
407{
408 bool ret = !!pm_idle;
409
410 pm_idle = default_idle;
411
412 return ret;
413}
406void stop_this_cpu(void *dummy) 414void stop_this_cpu(void *dummy)
407{ 415{
408 local_irq_disable(); 416 local_irq_disable();
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 795b79f984c2..485204f58cda 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -99,7 +99,8 @@ void cpu_idle(void)
99 99
100 /* endless idle loop with no priority at all */ 100 /* endless idle loop with no priority at all */
101 while (1) { 101 while (1) {
102 tick_nohz_stop_sched_tick(1); 102 tick_nohz_idle_enter();
103 rcu_idle_enter();
103 while (!need_resched()) { 104 while (!need_resched()) {
104 105
105 check_pgt_cache(); 106 check_pgt_cache();
@@ -116,7 +117,8 @@ void cpu_idle(void)
116 pm_idle(); 117 pm_idle();
117 start_critical_timings(); 118 start_critical_timings();
118 } 119 }
119 tick_nohz_restart_sched_tick(); 120 rcu_idle_exit();
121 tick_nohz_idle_exit();
120 preempt_enable_no_resched(); 122 preempt_enable_no_resched();
121 schedule(); 123 schedule();
122 preempt_disable(); 124 preempt_disable();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3bd7e6eebf31..9b9fe4a85c87 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -122,7 +122,7 @@ void cpu_idle(void)
122 122
123 /* endless idle loop with no priority at all */ 123 /* endless idle loop with no priority at all */
124 while (1) { 124 while (1) {
125 tick_nohz_stop_sched_tick(1); 125 tick_nohz_idle_enter();
126 while (!need_resched()) { 126 while (!need_resched()) {
127 127
128 rmb(); 128 rmb();
@@ -139,8 +139,14 @@ void cpu_idle(void)
139 enter_idle(); 139 enter_idle();
140 /* Don't trace irqs off for idle */ 140 /* Don't trace irqs off for idle */
141 stop_critical_timings(); 141 stop_critical_timings();
142
143 /* enter_idle() needs rcu for notifiers */
144 rcu_idle_enter();
145
142 if (cpuidle_idle_call()) 146 if (cpuidle_idle_call())
143 pm_idle(); 147 pm_idle();
148
149 rcu_idle_exit();
144 start_critical_timings(); 150 start_critical_timings();
145 151
146 /* In many cases the interrupt that ended idle 152 /* In many cases the interrupt that ended idle
@@ -149,7 +155,7 @@ void cpu_idle(void)
149 __exit_idle(); 155 __exit_idle();
150 } 156 }
151 157
152 tick_nohz_restart_sched_tick(); 158 tick_nohz_idle_exit();
153 preempt_enable_no_resched(); 159 preempt_enable_no_resched();
154 schedule(); 160 schedule();
155 preempt_disable(); 161 preempt_disable();
@@ -293,13 +299,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
293 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); 299 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
294 300
295 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 301 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
296 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 302 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
303 IO_BITMAP_BYTES, GFP_KERNEL);
297 if (!p->thread.io_bitmap_ptr) { 304 if (!p->thread.io_bitmap_ptr) {
298 p->thread.io_bitmap_max = 0; 305 p->thread.io_bitmap_max = 0;
299 return -ENOMEM; 306 return -ENOMEM;
300 } 307 }
301 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
302 IO_BITMAP_BYTES);
303 set_tsk_thread_flag(p, TIF_IO_BITMAP); 308 set_tsk_thread_flag(p, TIF_IO_BITMAP);
304 } 309 }
305 310
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 82528799c5de..89a04c7b5bb6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -749,7 +749,8 @@ put:
749/* 749/*
750 * Handle PTRACE_POKEUSR calls for the debug register area. 750 * Handle PTRACE_POKEUSR calls for the debug register area.
751 */ 751 */
752int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) 752static int ptrace_set_debugreg(struct task_struct *tsk, int n,
753 unsigned long val)
753{ 754{
754 struct thread_struct *thread = &(tsk->thread); 755 struct thread_struct *thread = &(tsk->thread);
755 int rc = 0; 756 int rc = 0;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index b78643d0f9a5..03920a15a632 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -553,4 +553,17 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
553 quirk_amd_nb_node); 553 quirk_amd_nb_node);
554DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, 554DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
555 quirk_amd_nb_node); 555 quirk_amd_nb_node);
556DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F0,
557 quirk_amd_nb_node);
558DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F1,
559 quirk_amd_nb_node);
560DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F2,
561 quirk_amd_nb_node);
562DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3,
563 quirk_amd_nb_node);
564DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4,
565 quirk_amd_nb_node);
566DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5,
567 quirk_amd_nb_node);
568
556#endif 569#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e334be1182b9..37a458b521a6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -124,7 +124,7 @@ __setup("reboot=", reboot_setup);
124 */ 124 */
125 125
126/* 126/*
127 * Some machines require the "reboot=b" commandline option, 127 * Some machines require the "reboot=b" or "reboot=k" commandline options,
128 * this quirk makes that automatic. 128 * this quirk makes that automatic.
129 */ 129 */
130static int __init set_bios_reboot(const struct dmi_system_id *d) 130static int __init set_bios_reboot(const struct dmi_system_id *d)
@@ -136,6 +136,15 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
136 return 0; 136 return 0;
137} 137}
138 138
139static int __init set_kbd_reboot(const struct dmi_system_id *d)
140{
141 if (reboot_type != BOOT_KBD) {
142 reboot_type = BOOT_KBD;
143 printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident);
144 }
145 return 0;
146}
147
139static struct dmi_system_id __initdata reboot_dmi_table[] = { 148static struct dmi_system_id __initdata reboot_dmi_table[] = {
140 { /* Handle problems with rebooting on Dell E520's */ 149 { /* Handle problems with rebooting on Dell E520's */
141 .callback = set_bios_reboot, 150 .callback = set_bios_reboot,
@@ -295,7 +304,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
295 }, 304 },
296 }, 305 },
297 { /* Handle reboot issue on Acer Aspire one */ 306 { /* Handle reboot issue on Acer Aspire one */
298 .callback = set_bios_reboot, 307 .callback = set_kbd_reboot,
299 .ident = "Acer Aspire One A110", 308 .ident = "Acer Aspire One A110",
300 .matches = { 309 .matches = {
301 DMI_MATCH(DMI_SYS_VENDOR, "Acer"), 310 DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
@@ -443,6 +452,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
443 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"), 452 DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
444 }, 453 },
445 }, 454 },
455 { /* Handle problems with rebooting on the OptiPlex 990. */
456 .callback = set_pci_reboot,
457 .ident = "Dell OptiPlex 990",
458 .matches = {
459 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
460 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
461 },
462 },
446 { } 463 { }
447}; 464};
448 465
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 348ce016a835..af6db6ec5b2a 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -12,6 +12,7 @@
12#include <asm/vsyscall.h> 12#include <asm/vsyscall.h>
13#include <asm/x86_init.h> 13#include <asm/x86_init.h>
14#include <asm/time.h> 14#include <asm/time.h>
15#include <asm/mrst.h>
15 16
16#ifdef CONFIG_X86_32 17#ifdef CONFIG_X86_32
17/* 18/*
@@ -242,6 +243,10 @@ static __init int add_rtc_cmos(void)
242 if (of_have_populated_dt()) 243 if (of_have_populated_dt())
243 return 0; 244 return 0;
244 245
246 /* Intel MID platforms don't have ioport rtc */
247 if (mrst_identify_cpu())
248 return -ENODEV;
249
245 platform_device_register(&rtc_device); 250 platform_device_register(&rtc_device);
246 dev_info(&rtc_device.dev, 251 dev_info(&rtc_device.dev,
247 "registered platform RTC device (no PNP device found)\n"); 252 "registered platform RTC device (no PNP device found)\n");
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index afaf38447ef5..d05444ac2aea 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -306,7 +306,8 @@ static void __init cleanup_highmap(void)
306static void __init reserve_brk(void) 306static void __init reserve_brk(void)
307{ 307{
308 if (_brk_end > _brk_start) 308 if (_brk_end > _brk_start)
309 memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); 309 memblock_reserve(__pa(_brk_start),
310 __pa(_brk_end) - __pa(_brk_start));
310 311
311 /* Mark brk area as locked down and no longer taking any 312 /* Mark brk area as locked down and no longer taking any
312 new allocations */ 313 new allocations */
@@ -331,13 +332,13 @@ static void __init relocate_initrd(void)
331 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, 332 ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
332 PAGE_SIZE); 333 PAGE_SIZE);
333 334
334 if (ramdisk_here == MEMBLOCK_ERROR) 335 if (!ramdisk_here)
335 panic("Cannot find place for new RAMDISK of size %lld\n", 336 panic("Cannot find place for new RAMDISK of size %lld\n",
336 ramdisk_size); 337 ramdisk_size);
337 338
338 /* Note: this includes all the lowmem currently occupied by 339 /* Note: this includes all the lowmem currently occupied by
339 the initrd, we rely on that fact to keep the data intact. */ 340 the initrd, we rely on that fact to keep the data intact. */
340 memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); 341 memblock_reserve(ramdisk_here, area_size);
341 initrd_start = ramdisk_here + PAGE_OFFSET; 342 initrd_start = ramdisk_here + PAGE_OFFSET;
342 initrd_end = initrd_start + ramdisk_size; 343 initrd_end = initrd_start + ramdisk_size;
343 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", 344 printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -393,7 +394,7 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 394 initrd_start = 0;
394 395
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 396 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_x86_free_range(ramdisk_image, ramdisk_end); 397 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
397 printk(KERN_ERR "initrd too large to handle, " 398 printk(KERN_ERR "initrd too large to handle, "
398 "disabling initrd\n"); 399 "disabling initrd\n");
399 return; 400 return;
@@ -416,7 +417,7 @@ static void __init reserve_initrd(void)
416 417
417 relocate_initrd(); 418 relocate_initrd();
418 419
419 memblock_x86_free_range(ramdisk_image, ramdisk_end); 420 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
420} 421}
421#else 422#else
422static void __init reserve_initrd(void) 423static void __init reserve_initrd(void)
@@ -490,15 +491,13 @@ static void __init memblock_x86_reserve_range_setup_data(void)
490{ 491{
491 struct setup_data *data; 492 struct setup_data *data;
492 u64 pa_data; 493 u64 pa_data;
493 char buf[32];
494 494
495 if (boot_params.hdr.version < 0x0209) 495 if (boot_params.hdr.version < 0x0209)
496 return; 496 return;
497 pa_data = boot_params.hdr.setup_data; 497 pa_data = boot_params.hdr.setup_data;
498 while (pa_data) { 498 while (pa_data) {
499 data = early_memremap(pa_data, sizeof(*data)); 499 data = early_memremap(pa_data, sizeof(*data));
500 sprintf(buf, "setup data %x", data->type); 500 memblock_reserve(pa_data, sizeof(*data) + data->len);
501 memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
502 pa_data = data->next; 501 pa_data = data->next;
503 early_iounmap(data, sizeof(*data)); 502 early_iounmap(data, sizeof(*data));
504 } 503 }
@@ -554,7 +553,7 @@ static void __init reserve_crashkernel(void)
554 crash_base = memblock_find_in_range(alignment, 553 crash_base = memblock_find_in_range(alignment,
555 CRASH_KERNEL_ADDR_MAX, crash_size, alignment); 554 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
556 555
557 if (crash_base == MEMBLOCK_ERROR) { 556 if (!crash_base) {
558 pr_info("crashkernel reservation failed - No suitable area found.\n"); 557 pr_info("crashkernel reservation failed - No suitable area found.\n");
559 return; 558 return;
560 } 559 }
@@ -568,7 +567,7 @@ static void __init reserve_crashkernel(void)
568 return; 567 return;
569 } 568 }
570 } 569 }
571 memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); 570 memblock_reserve(crash_base, crash_size);
572 571
573 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " 572 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
574 "for crashkernel (System RAM: %ldMB)\n", 573 "for crashkernel (System RAM: %ldMB)\n",
@@ -626,7 +625,7 @@ static __init void reserve_ibft_region(void)
626 addr = find_ibft_region(&size); 625 addr = find_ibft_region(&size);
627 626
628 if (size) 627 if (size)
629 memblock_x86_reserve_range(addr, addr + size, "* ibft"); 628 memblock_reserve(addr, size);
630} 629}
631 630
632static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; 631static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
@@ -1045,6 +1044,8 @@ void __init setup_arch(char **cmdline_p)
1045 1044
1046 x86_init.timers.wallclock_init(); 1045 x86_init.timers.wallclock_init();
1047 1046
1047 x86_platform.wallclock_init();
1048
1048 mcheck_init(); 1049 mcheck_init();
1049 1050
1050 arch_init_ideal_nops(); 1051 arch_init_ideal_nops();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f548cb4a958..e38e21754eea 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -840,7 +840,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
840 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); 840 pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
841 841
842 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 842 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
843 !physid_isset(apicid, phys_cpu_present_map)) { 843 !physid_isset(apicid, phys_cpu_present_map) ||
844 (!x2apic_mode && apicid >= 255)) {
844 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); 845 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
845 return -EINVAL; 846 return -EINVAL;
846 } 847 }
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index a91ae7709b49..a73b61055ad6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -14,11 +14,11 @@ void __init setup_trampolines(void)
14 14
15 /* Has to be in very low memory so we can execute real-mode AP code. */ 15 /* Has to be in very low memory so we can execute real-mode AP code. */
16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); 16 mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
17 if (mem == MEMBLOCK_ERROR) 17 if (!mem)
18 panic("Cannot allocate trampoline\n"); 18 panic("Cannot allocate trampoline\n");
19 19
20 x86_trampoline_base = __va(mem); 20 x86_trampoline_base = __va(mem);
21 memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); 21 memblock_reserve(mem, size);
22 22
23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", 23 printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
24 x86_trampoline_base, (unsigned long long)mem, size); 24 x86_trampoline_base, (unsigned long long)mem, size);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a8e3eb83466c..fa1191fb679d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
306 == NOTIFY_STOP) 306 == NOTIFY_STOP)
307 return; 307 return;
308#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 308#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
309#ifdef CONFIG_KPROBES 309
310 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 310 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
311 == NOTIFY_STOP) 311 == NOTIFY_STOP)
312 return; 312 return;
313#else
314 if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
315 == NOTIFY_STOP)
316 return;
317#endif
318 313
319 preempt_conditional_sti(regs); 314 preempt_conditional_sti(regs);
320 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 315 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index db483369f10b..2c9cf0fd78f5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -35,7 +35,7 @@ static int __read_mostly tsc_unstable;
35 erroneous rdtsc usage on !cpu_has_tsc processors */ 35 erroneous rdtsc usage on !cpu_has_tsc processors */
36static int __read_mostly tsc_disabled = -1; 36static int __read_mostly tsc_disabled = -1;
37 37
38static int tsc_clocksource_reliable; 38int tsc_clocksource_reliable;
39/* 39/*
40 * Scheduler clock - returns current time in nanosec units. 40 * Scheduler clock - returns current time in nanosec units.
41 */ 41 */
@@ -178,11 +178,11 @@ static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2)
178} 178}
179 179
180#define CAL_MS 10 180#define CAL_MS 10
181#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) 181#define CAL_LATCH (PIT_TICK_RATE / (1000 / CAL_MS))
182#define CAL_PIT_LOOPS 1000 182#define CAL_PIT_LOOPS 1000
183 183
184#define CAL2_MS 50 184#define CAL2_MS 50
185#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) 185#define CAL2_LATCH (PIT_TICK_RATE / (1000 / CAL2_MS))
186#define CAL2_PIT_LOOPS 5000 186#define CAL2_PIT_LOOPS 5000
187 187
188 188
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 0aa5fed8b9e6..9eba29b46cb7 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -113,7 +113,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
113 if (unsynchronized_tsc()) 113 if (unsynchronized_tsc())
114 return; 114 return;
115 115
116 if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { 116 if (tsc_clocksource_reliable) {
117 if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) 117 if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
118 pr_info( 118 pr_info(
119 "Skipped synchronization checks as TSC is reliable.\n"); 119 "Skipped synchronization checks as TSC is reliable.\n");
@@ -172,7 +172,7 @@ void __cpuinit check_tsc_sync_target(void)
172{ 172{
173 int cpus = 2; 173 int cpus = 2;
174 174
175 if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) 175 if (unsynchronized_tsc() || tsc_clocksource_reliable)
176 return; 176 return;
177 177
178 /* 178 /*
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e4d4a22e8b94..b07ba9393564 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 57 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
58}; 58};
59 59
60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; 60static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
61 61
62static int __init vsyscall_setup(char *str) 62static int __init vsyscall_setup(char *str)
63{ 63{
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr)
140 return nr; 140 return nr;
141} 141}
142 142
143static bool write_ok_or_segv(unsigned long ptr, size_t size)
144{
145 /*
146 * XXX: if access_ok, get_user, and put_user handled
147 * sig_on_uaccess_error, this could go away.
148 */
149
150 if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
151 siginfo_t info;
152 struct thread_struct *thread = &current->thread;
153
154 thread->error_code = 6; /* user fault, no page, write */
155 thread->cr2 = ptr;
156 thread->trap_no = 14;
157
158 memset(&info, 0, sizeof(info));
159 info.si_signo = SIGSEGV;
160 info.si_errno = 0;
161 info.si_code = SEGV_MAPERR;
162 info.si_addr = (void __user *)ptr;
163
164 force_sig_info(SIGSEGV, &info, current);
165 return false;
166 } else {
167 return true;
168 }
169}
170
143bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) 171bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
144{ 172{
145 struct task_struct *tsk; 173 struct task_struct *tsk;
146 unsigned long caller; 174 unsigned long caller;
147 int vsyscall_nr; 175 int vsyscall_nr;
176 int prev_sig_on_uaccess_error;
148 long ret; 177 long ret;
149 178
150 /* 179 /*
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
180 if (seccomp_mode(&tsk->seccomp)) 209 if (seccomp_mode(&tsk->seccomp))
181 do_exit(SIGKILL); 210 do_exit(SIGKILL);
182 211
212 /*
213 * With a real vsyscall, page faults cause SIGSEGV. We want to
214 * preserve that behavior to make writing exploits harder.
215 */
216 prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
217 current_thread_info()->sig_on_uaccess_error = 1;
218
219 /*
220 * 0 is a valid user pointer (in the access_ok sense) on 32-bit and
221 * 64-bit, so we don't need to special-case it here. For all the
222 * vsyscalls, 0 means "don't write anything" not "write it at
223 * address 0".
224 */
225 ret = -EFAULT;
183 switch (vsyscall_nr) { 226 switch (vsyscall_nr) {
184 case 0: 227 case 0:
228 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
229 !write_ok_or_segv(regs->si, sizeof(struct timezone)))
230 break;
231
185 ret = sys_gettimeofday( 232 ret = sys_gettimeofday(
186 (struct timeval __user *)regs->di, 233 (struct timeval __user *)regs->di,
187 (struct timezone __user *)regs->si); 234 (struct timezone __user *)regs->si);
188 break; 235 break;
189 236
190 case 1: 237 case 1:
238 if (!write_ok_or_segv(regs->di, sizeof(time_t)))
239 break;
240
191 ret = sys_time((time_t __user *)regs->di); 241 ret = sys_time((time_t __user *)regs->di);
192 break; 242 break;
193 243
194 case 2: 244 case 2:
245 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
246 !write_ok_or_segv(regs->si, sizeof(unsigned)))
247 break;
248
195 ret = sys_getcpu((unsigned __user *)regs->di, 249 ret = sys_getcpu((unsigned __user *)regs->di,
196 (unsigned __user *)regs->si, 250 (unsigned __user *)regs->si,
197 0); 251 0);
198 break; 252 break;
199 } 253 }
200 254
255 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
256
201 if (ret == -EFAULT) { 257 if (ret == -EFAULT) {
202 /* 258 /* Bad news -- userspace fed a bad pointer to a vsyscall. */
203 * Bad news -- userspace fed a bad pointer to a vsyscall.
204 *
205 * With a real vsyscall, that would have caused SIGSEGV.
206 * To make writing reliable exploits using the emulated
207 * vsyscalls harder, generate SIGSEGV here as well.
208 */
209 warn_bad_vsyscall(KERN_INFO, regs, 259 warn_bad_vsyscall(KERN_INFO, regs,
210 "vsyscall fault (exploit attempt?)"); 260 "vsyscall fault (exploit attempt?)");
211 goto sigsegv; 261
262 /*
263 * If we failed to generate a signal for any reason,
264 * generate one here. (This should be impossible.)
265 */
266 if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
267 !sigismember(&tsk->pending.signal, SIGSEGV)))
268 goto sigsegv;
269
270 return true; /* Don't emulate the ret. */
212 } 271 }
213 272
214 regs->ax = ret; 273 regs->ax = ret;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 6f164bd5e14d..91f83e21b989 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -21,12 +21,14 @@
21#include <asm/pat.h> 21#include <asm/pat.h>
22#include <asm/tsc.h> 22#include <asm/tsc.h>
23#include <asm/iommu.h> 23#include <asm/iommu.h>
24#include <asm/mach_traps.h>
24 25
25void __cpuinit x86_init_noop(void) { } 26void __cpuinit x86_init_noop(void) { }
26void __init x86_init_uint_noop(unsigned int unused) { } 27void __init x86_init_uint_noop(unsigned int unused) { }
27void __init x86_init_pgd_noop(pgd_t *unused) { } 28void __init x86_init_pgd_noop(pgd_t *unused) { }
28int __init iommu_init_noop(void) { return 0; } 29int __init iommu_init_noop(void) { return 0; }
29void iommu_shutdown_noop(void) { } 30void iommu_shutdown_noop(void) { }
31void wallclock_init_noop(void) { }
30 32
31/* 33/*
32 * The platform setup functions are preset with the default functions 34 * The platform setup functions are preset with the default functions
@@ -90,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
90 92
91struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { 93struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
92 .setup_percpu_clockev = setup_secondary_APIC_clock, 94 .setup_percpu_clockev = setup_secondary_APIC_clock,
95 .fixup_cpu_id = x86_default_fixup_cpu_id,
93}; 96};
94 97
95static void default_nmi_init(void) { }; 98static void default_nmi_init(void) { };
@@ -97,11 +100,13 @@ static int default_i8042_detect(void) { return 1; };
97 100
98struct x86_platform_ops x86_platform = { 101struct x86_platform_ops x86_platform = {
99 .calibrate_tsc = native_calibrate_tsc, 102 .calibrate_tsc = native_calibrate_tsc,
103 .wallclock_init = wallclock_init_noop,
100 .get_wallclock = mach_get_cmos_time, 104 .get_wallclock = mach_get_cmos_time,
101 .set_wallclock = mach_set_rtc_mmss, 105 .set_wallclock = mach_set_rtc_mmss,
102 .iommu_shutdown = iommu_shutdown_noop, 106 .iommu_shutdown = iommu_shutdown_noop,
103 .is_untracked_pat_range = is_ISA_range, 107 .is_untracked_pat_range = is_ISA_range,
104 .nmi_init = default_nmi_init, 108 .nmi_init = default_nmi_init,
109 .get_nmi_reason = default_get_nmi_reason,
105 .i8042_detect = default_i8042_detect 110 .i8042_detect = default_i8042_detect
106}; 111};
107 112