aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2011-01-12 18:06:06 -0500
committerLen Brown <len.brown@intel.com>2011-01-12 18:06:06 -0500
commit56dbed129df3fdd4caf9018b6e7599ee258a5420 (patch)
treeb902491aef3a99efe0d9d49edd0f6e414dba654f /arch/x86/kernel
parent2a2d31c8dc6f1ebcf5eab1d93a0cb0fb4ed57c7c (diff)
parentf878133bf022717b880d0e0995b8f91436fd605c (diff)
Merge branch 'linus' into idle-test
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c59
-rw-r--r--arch/x86/kernel/alternative.c52
-rw-r--r--arch/x86/kernel/amd_nb.c142
-rw-r--r--arch/x86/kernel/apb_timer.c1
-rw-r--r--arch/x86/kernel/aperture_64.c54
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c192
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c39
-rw-r--r--arch/x86/kernel/apic/io_apic.c115
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c151
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c135
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event.c104
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c30
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c28
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c644
-rw-r--r--arch/x86/kernel/dumpstack.c18
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/early_printk.c3
-rw-r--r--arch/x86/kernel/early_printk_mrst.c319
-rw-r--r--arch/x86/kernel/entry_64.S36
-rw-r--r--arch/x86/kernel/ftrace.c9
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head_32.S83
-rw-r--r--arch/x86/kernel/hw_breakpoint.c12
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c4
-rw-r--r--arch/x86/kernel/kgdb.c7
-rw-r--r--arch/x86/kernel/kprobes.c127
-rw-r--r--arch/x86/kernel/microcode_amd.c34
-rw-r--r--arch/x86/kernel/mpparse.c114
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/process.c14
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c16
-rw-r--r--arch/x86/kernel/setup.c13
-rw-r--r--arch/x86/kernel/smpboot.c54
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/trampoline_64.S2
-rw-r--r--arch/x86/kernel/traps.c131
-rw-r--r--arch/x86/kernel/tsc.c98
-rw-r--r--arch/x86/kernel/verify_cpu.S (renamed from arch/x86/kernel/verify_cpu_64.S)49
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
57 files changed, 1213 insertions, 2543 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1e994754d323..34244b2cd880 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
85obj-$(CONFIG_KGDB) += kgdb.o 85obj-$(CONFIG_KGDB) += kgdb.o
86obj-$(CONFIG_VM86) += vm86_32.o 86obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
88obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
89 88
90obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
91obj-$(CONFIG_APB_TIMER) += apb_timer.o 90obj-$(CONFIG_APB_TIMER) += apb_timer.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b941b6c..ec881c6bfee0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
198{ 198{
199 unsigned int ver = 0; 199 unsigned int ver = 0;
200 200
201 if (id >= (MAX_LOCAL_APIC-1)) {
202 printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
203 return;
204 }
205
201 if (!enabled) { 206 if (!enabled) {
202 ++disabled_cpus; 207 ++disabled_cpus;
203 return; 208 return;
@@ -847,18 +852,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
847 * returns 0 on success, < 0 on error 852 * returns 0 on success, < 0 on error
848 */ 853 */
849 854
850static void __init acpi_register_lapic_address(unsigned long address)
851{
852 mp_lapic_addr = address;
853
854 set_fixmap_nocache(FIX_APIC_BASE, address);
855 if (boot_cpu_physical_apicid == -1U) {
856 boot_cpu_physical_apicid = read_apic_id();
857 apic_version[boot_cpu_physical_apicid] =
858 GET_APIC_VERSION(apic_read(APIC_LVR));
859 }
860}
861
862static int __init early_acpi_parse_madt_lapic_addr_ovr(void) 855static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
863{ 856{
864 int count; 857 int count;
@@ -880,7 +873,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
880 return count; 873 return count;
881 } 874 }
882 875
883 acpi_register_lapic_address(acpi_lapic_addr); 876 register_lapic_address(acpi_lapic_addr);
884 877
885 return count; 878 return count;
886} 879}
@@ -907,16 +900,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
907 return count; 900 return count;
908 } 901 }
909 902
910 acpi_register_lapic_address(acpi_lapic_addr); 903 register_lapic_address(acpi_lapic_addr);
911 904
912 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, 905 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
913 acpi_parse_sapic, MAX_APICS); 906 acpi_parse_sapic, MAX_LOCAL_APIC);
914 907
915 if (!count) { 908 if (!count) {
916 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, 909 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
917 acpi_parse_x2apic, MAX_APICS); 910 acpi_parse_x2apic, MAX_LOCAL_APIC);
918 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, 911 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
919 acpi_parse_lapic, MAX_APICS); 912 acpi_parse_lapic, MAX_LOCAL_APIC);
920 } 913 }
921 if (!count && !x2count) { 914 if (!count && !x2count) {
922 printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 915 printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -949,32 +942,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
949extern int es7000_plat; 942extern int es7000_plat;
950#endif 943#endif
951 944
952static void assign_to_mp_irq(struct mpc_intsrc *m,
953 struct mpc_intsrc *mp_irq)
954{
955 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
956}
957
958static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
959 struct mpc_intsrc *m)
960{
961 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
962}
963
964static void save_mp_irq(struct mpc_intsrc *m)
965{
966 int i;
967
968 for (i = 0; i < mp_irq_entries; i++) {
969 if (!mp_irq_cmp(&mp_irqs[i], m))
970 return;
971 }
972
973 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
974 if (++mp_irq_entries == MAX_IRQ_SOURCES)
975 panic("Max # of irq sources exceeded!!\n");
976}
977
978void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) 945void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
979{ 946{
980 int ioapic; 947 int ioapic;
@@ -1005,7 +972,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1005 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ 972 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1006 mp_irq.dstirq = pin; /* INTIN# */ 973 mp_irq.dstirq = pin; /* INTIN# */
1007 974
1008 save_mp_irq(&mp_irq); 975 mp_save_irq(&mp_irq);
1009 976
1010 isa_irq_to_gsi[bus_irq] = gsi; 977 isa_irq_to_gsi[bus_irq] = gsi;
1011} 978}
@@ -1080,7 +1047,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1080 mp_irq.srcbusirq = i; /* Identity mapped */ 1047 mp_irq.srcbusirq = i; /* Identity mapped */
1081 mp_irq.dstirq = pin; 1048 mp_irq.dstirq = pin;
1082 1049
1083 save_mp_irq(&mp_irq); 1050 mp_save_irq(&mp_irq);
1084 } 1051 }
1085} 1052}
1086 1053
@@ -1117,7 +1084,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1117 mp_irq.dstapic = mp_ioapics[ioapic].apicid; 1084 mp_irq.dstapic = mp_ioapics[ioapic].apicid;
1118 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); 1085 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1119 1086
1120 save_mp_irq(&mp_irq); 1087 mp_save_irq(&mp_irq);
1121#endif 1088#endif
1122 return 0; 1089 return 0;
1123} 1090}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24c955a..123608531c8f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
353 mutex_unlock(&smp_alt); 353 mutex_unlock(&smp_alt);
354} 354}
355 355
356bool skip_smp_alternatives;
356void alternatives_smp_switch(int smp) 357void alternatives_smp_switch(int smp)
357{ 358{
358 struct smp_alt_module *mod; 359 struct smp_alt_module *mod;
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp)
368 printk("lockdep: fixing up alternatives.\n"); 369 printk("lockdep: fixing up alternatives.\n");
369#endif 370#endif
370 371
371 if (noreplace_smp || smp_alt_once) 372 if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
372 return; 373 return;
373 BUG_ON(!smp && (num_online_cpus() > 1)); 374 BUG_ON(!smp && (num_online_cpus() > 1));
374 375
@@ -591,17 +592,21 @@ static atomic_t stop_machine_first;
591static int wrote_text; 592static int wrote_text;
592 593
593struct text_poke_params { 594struct text_poke_params {
594 void *addr; 595 struct text_poke_param *params;
595 const void *opcode; 596 int nparams;
596 size_t len;
597}; 597};
598 598
599static int __kprobes stop_machine_text_poke(void *data) 599static int __kprobes stop_machine_text_poke(void *data)
600{ 600{
601 struct text_poke_params *tpp = data; 601 struct text_poke_params *tpp = data;
602 struct text_poke_param *p;
603 int i;
602 604
603 if (atomic_dec_and_test(&stop_machine_first)) { 605 if (atomic_dec_and_test(&stop_machine_first)) {
604 text_poke(tpp->addr, tpp->opcode, tpp->len); 606 for (i = 0; i < tpp->nparams; i++) {
607 p = &tpp->params[i];
608 text_poke(p->addr, p->opcode, p->len);
609 }
605 smp_wmb(); /* Make sure other cpus see that this has run */ 610 smp_wmb(); /* Make sure other cpus see that this has run */
606 wrote_text = 1; 611 wrote_text = 1;
607 } else { 612 } else {
@@ -610,8 +615,12 @@ static int __kprobes stop_machine_text_poke(void *data)
610 smp_mb(); /* Load wrote_text before following execution */ 615 smp_mb(); /* Load wrote_text before following execution */
611 } 616 }
612 617
613 flush_icache_range((unsigned long)tpp->addr, 618 for (i = 0; i < tpp->nparams; i++) {
614 (unsigned long)tpp->addr + tpp->len); 619 p = &tpp->params[i];
620 flush_icache_range((unsigned long)p->addr,
621 (unsigned long)p->addr + p->len);
622 }
623
615 return 0; 624 return 0;
616} 625}
617 626
@@ -631,10 +640,13 @@ static int __kprobes stop_machine_text_poke(void *data)
631void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) 640void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
632{ 641{
633 struct text_poke_params tpp; 642 struct text_poke_params tpp;
643 struct text_poke_param p;
634 644
635 tpp.addr = addr; 645 p.addr = addr;
636 tpp.opcode = opcode; 646 p.opcode = opcode;
637 tpp.len = len; 647 p.len = len;
648 tpp.params = &p;
649 tpp.nparams = 1;
638 atomic_set(&stop_machine_first, 1); 650 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 651 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 652 /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +654,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
642 return addr; 654 return addr;
643} 655}
644 656
657/**
658 * text_poke_smp_batch - Update instructions on a live kernel on SMP
659 * @params: an array of text_poke parameters
660 * @n: the number of elements in params.
661 *
662 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
663 * stop_machine() is heavy task, it is better to aggregate text_poke requests
664 * and do it once if possible.
665 *
666 * Note: Must be called under get_online_cpus() and text_mutex.
667 */
668void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
669{
670 struct text_poke_params tpp = {.params = params, .nparams = n};
671
672 atomic_set(&stop_machine_first, 1);
673 wrote_text = 0;
674 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
675}
676
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 677#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 678
647#ifdef CONFIG_X86_64 679#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8f6463d8ed0d..0a99f7198bc3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,95 +12,123 @@
12 12
13static u32 *flush_words; 13static u32 *flush_words;
14 14
15struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id amd_nb_misc_ids[] = {
16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, 18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
19 {} 19 {}
20}; 20};
21EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23struct k8_northbridge_info k8_northbridges; 23const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
24EXPORT_SYMBOL(k8_northbridges); 24 { 0x00, 0x18, 0x20 },
25 { 0xff, 0x00, 0x20 },
26 { 0xfe, 0x00, 0x20 },
27 { }
28};
29
30struct amd_northbridge_info amd_northbridges;
31EXPORT_SYMBOL(amd_northbridges);
25 32
26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 33static struct pci_dev *next_northbridge(struct pci_dev *dev,
34 struct pci_device_id *ids)
27{ 35{
28 do { 36 do {
29 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); 37 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
30 if (!dev) 38 if (!dev)
31 break; 39 break;
32 } while (!pci_match_id(&k8_nb_ids[0], dev)); 40 } while (!pci_match_id(ids, dev));
33 return dev; 41 return dev;
34} 42}
35 43
36int cache_k8_northbridges(void) 44int amd_cache_northbridges(void)
37{ 45{
38 int i; 46 int i = 0;
39 struct pci_dev *dev; 47 struct amd_northbridge *nb;
48 struct pci_dev *misc;
40 49
41 if (k8_northbridges.num) 50 if (amd_nb_num())
42 return 0; 51 return 0;
43 52
44 dev = NULL; 53 misc = NULL;
45 while ((dev = next_k8_northbridge(dev)) != NULL) 54 while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
46 k8_northbridges.num++; 55 i++;
47 56
48 /* some CPU families (e.g. family 0x11) do not support GART */ 57 if (i == 0)
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || 58 return 0;
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
52 59
53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * 60 nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
54 sizeof(void *), GFP_KERNEL); 61 if (!nb)
55 if (!k8_northbridges.nb_misc)
56 return -ENOMEM; 62 return -ENOMEM;
57 63
58 if (!k8_northbridges.num) { 64 amd_northbridges.nb = nb;
59 k8_northbridges.nb_misc[0] = NULL; 65 amd_northbridges.num = i;
60 return 0;
61 }
62 66
63 if (k8_northbridges.gart_supported) { 67 misc = NULL;
64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32), 68 for (i = 0; i != amd_nb_num(); i++) {
65 GFP_KERNEL); 69 node_to_amd_nb(i)->misc = misc =
66 if (!flush_words) { 70 next_northbridge(misc, amd_nb_misc_ids);
67 kfree(k8_northbridges.nb_misc); 71 }
68 return -ENOMEM; 72
69 } 73 /* some CPU families (e.g. family 0x11) do not support GART */
70 } 74 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
75 boot_cpu_data.x86 == 0x15)
76 amd_northbridges.flags |= AMD_NB_GART;
77
78 /*
79 * Some CPU families support L3 Cache Index Disable. There are some
80 * limitations because of E382 and E388 on family 0x10.
81 */
82 if (boot_cpu_data.x86 == 0x10 &&
83 boot_cpu_data.x86_model >= 0x8 &&
84 (boot_cpu_data.x86_model > 0x9 ||
85 boot_cpu_data.x86_mask >= 0x1))
86 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
71 87
72 dev = NULL;
73 i = 0;
74 while ((dev = next_k8_northbridge(dev)) != NULL) {
75 k8_northbridges.nb_misc[i] = dev;
76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
78 }
79 k8_northbridges.nb_misc[i] = NULL;
80 return 0; 88 return 0;
81} 89}
82EXPORT_SYMBOL_GPL(cache_k8_northbridges); 90EXPORT_SYMBOL_GPL(amd_cache_northbridges);
83 91
84/* Ignores subdevice/subvendor but as far as I can figure out 92/* Ignores subdevice/subvendor but as far as I can figure out
85 they're useless anyways */ 93 they're useless anyways */
86int __init early_is_k8_nb(u32 device) 94int __init early_is_amd_nb(u32 device)
87{ 95{
88 struct pci_device_id *id; 96 struct pci_device_id *id;
89 u32 vendor = device & 0xffff; 97 u32 vendor = device & 0xffff;
90 device >>= 16; 98 device >>= 16;
91 for (id = k8_nb_ids; id->vendor; id++) 99 for (id = amd_nb_misc_ids; id->vendor; id++)
92 if (vendor == id->vendor && device == id->device) 100 if (vendor == id->vendor && device == id->device)
93 return 1; 101 return 1;
94 return 0; 102 return 0;
95} 103}
96 104
97void k8_flush_garts(void) 105int amd_cache_gart(void)
106{
107 int i;
108
109 if (!amd_nb_has_feature(AMD_NB_GART))
110 return 0;
111
112 flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
113 if (!flush_words) {
114 amd_northbridges.flags &= ~AMD_NB_GART;
115 return -ENOMEM;
116 }
117
118 for (i = 0; i != amd_nb_num(); i++)
119 pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
120 &flush_words[i]);
121
122 return 0;
123}
124
125void amd_flush_garts(void)
98{ 126{
99 int flushed, i; 127 int flushed, i;
100 unsigned long flags; 128 unsigned long flags;
101 static DEFINE_SPINLOCK(gart_lock); 129 static DEFINE_SPINLOCK(gart_lock);
102 130
103 if (!k8_northbridges.gart_supported) 131 if (!amd_nb_has_feature(AMD_NB_GART))
104 return; 132 return;
105 133
106 /* Avoid races between AGP and IOMMU. In theory it's not needed 134 /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +137,16 @@ void k8_flush_garts(void)
109 that it doesn't matter to serialize more. -AK */ 137 that it doesn't matter to serialize more. -AK */
110 spin_lock_irqsave(&gart_lock, flags); 138 spin_lock_irqsave(&gart_lock, flags);
111 flushed = 0; 139 flushed = 0;
112 for (i = 0; i < k8_northbridges.num; i++) { 140 for (i = 0; i < amd_nb_num(); i++) {
113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, 141 pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
114 flush_words[i]|1); 142 flush_words[i] | 1);
115 flushed++; 143 flushed++;
116 } 144 }
117 for (i = 0; i < k8_northbridges.num; i++) { 145 for (i = 0; i < amd_nb_num(); i++) {
118 u32 w; 146 u32 w;
119 /* Make sure the hardware actually executed the flush*/ 147 /* Make sure the hardware actually executed the flush*/
120 for (;;) { 148 for (;;) {
121 pci_read_config_dword(k8_northbridges.nb_misc[i], 149 pci_read_config_dword(node_to_amd_nb(i)->misc,
122 0x9c, &w); 150 0x9c, &w);
123 if (!(w & 1)) 151 if (!(w & 1))
124 break; 152 break;
@@ -129,19 +157,23 @@ void k8_flush_garts(void)
129 if (!flushed) 157 if (!flushed)
130 printk("nothing to flush?\n"); 158 printk("nothing to flush?\n");
131} 159}
132EXPORT_SYMBOL_GPL(k8_flush_garts); 160EXPORT_SYMBOL_GPL(amd_flush_garts);
133 161
134static __init int init_k8_nbs(void) 162static __init int init_amd_nbs(void)
135{ 163{
136 int err = 0; 164 int err = 0;
137 165
138 err = cache_k8_northbridges(); 166 err = amd_cache_northbridges();
139 167
140 if (err < 0) 168 if (err < 0)
141 printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); 169 printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
170
171 if (amd_cache_gart() < 0)
172 printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
173 "GART support disabled.\n");
142 174
143 return err; 175 return err;
144} 176}
145 177
146/* This has to go after the PCI subsystem */ 178/* This has to go after the PCI subsystem */
147fs_initcall(init_k8_nbs); 179fs_initcall(init_amd_nbs);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 92543c73cf8e..7c9ab59653e8 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
315 315
316 if (system_state == SYSTEM_BOOTING) { 316 if (system_state == SYSTEM_BOOTING) {
317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
318 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
318 /* APB timer irqs are set up as mp_irqs, timer is edge type */ 319 /* APB timer irqs are set up as mp_irqs, timer is edge type */
319 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); 320 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
320 if (request_irq(adev->irq, apbt_interrupt_handler, 321 if (request_irq(adev->irq, apbt_interrupt_handler,
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b3a16e8f0703..5955a7800a96 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -39,18 +39,6 @@ int fallback_aper_force __initdata;
39 39
40int fix_aperture __initdata = 1; 40int fix_aperture __initdata = 1;
41 41
42struct bus_dev_range {
43 int bus;
44 int dev_base;
45 int dev_limit;
46};
47
48static struct bus_dev_range bus_dev_ranges[] __initdata = {
49 { 0x00, 0x18, 0x20},
50 { 0xff, 0x00, 0x20},
51 { 0xfe, 0x00, 0x20}
52};
53
54static struct resource gart_resource = { 42static struct resource gart_resource = {
55 .name = "GART", 43 .name = "GART",
56 .flags = IORESOURCE_MEM, 44 .flags = IORESOURCE_MEM,
@@ -206,7 +194,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
206 * Do an PCI bus scan by hand because we're running before the PCI 194 * Do an PCI bus scan by hand because we're running before the PCI
207 * subsystem. 195 * subsystem.
208 * 196 *
209 * All K8 AGP bridges are AGPv3 compliant, so we can do this scan 197 * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
210 * generically. It's probably overkill to always scan all slots because 198 * generically. It's probably overkill to always scan all slots because
211 * the AGP bridges should be always an own bus on the HT hierarchy, 199 * the AGP bridges should be always an own bus on the HT hierarchy,
212 * but do it here for future safety. 200 * but do it here for future safety.
@@ -294,16 +282,16 @@ void __init early_gart_iommu_check(void)
294 search_agp_bridge(&agp_aper_order, &valid_agp); 282 search_agp_bridge(&agp_aper_order, &valid_agp);
295 283
296 fix = 0; 284 fix = 0;
297 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 285 for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) {
298 int bus; 286 int bus;
299 int dev_base, dev_limit; 287 int dev_base, dev_limit;
300 288
301 bus = bus_dev_ranges[i].bus; 289 bus = amd_nb_bus_dev_ranges[i].bus;
302 dev_base = bus_dev_ranges[i].dev_base; 290 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
303 dev_limit = bus_dev_ranges[i].dev_limit; 291 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
304 292
305 for (slot = dev_base; slot < dev_limit; slot++) { 293 for (slot = dev_base; slot < dev_limit; slot++) {
306 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 294 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
307 continue; 295 continue;
308 296
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 297 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -349,16 +337,16 @@ void __init early_gart_iommu_check(void)
349 return; 337 return;
350 338
351 /* disable them all at first */ 339 /* disable them all at first */
352 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 340 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
353 int bus; 341 int bus;
354 int dev_base, dev_limit; 342 int dev_base, dev_limit;
355 343
356 bus = bus_dev_ranges[i].bus; 344 bus = amd_nb_bus_dev_ranges[i].bus;
357 dev_base = bus_dev_ranges[i].dev_base; 345 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
358 dev_limit = bus_dev_ranges[i].dev_limit; 346 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
359 347
360 for (slot = dev_base; slot < dev_limit; slot++) { 348 for (slot = dev_base; slot < dev_limit; slot++) {
361 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 349 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
362 continue; 350 continue;
363 351
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 352 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -390,17 +378,17 @@ int __init gart_iommu_hole_init(void)
390 378
391 fix = 0; 379 fix = 0;
392 node = 0; 380 node = 0;
393 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 381 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
394 int bus; 382 int bus;
395 int dev_base, dev_limit; 383 int dev_base, dev_limit;
396 u32 ctl; 384 u32 ctl;
397 385
398 bus = bus_dev_ranges[i].bus; 386 bus = amd_nb_bus_dev_ranges[i].bus;
399 dev_base = bus_dev_ranges[i].dev_base; 387 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
400 dev_limit = bus_dev_ranges[i].dev_limit; 388 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
401 389
402 for (slot = dev_base; slot < dev_limit; slot++) { 390 for (slot = dev_base; slot < dev_limit; slot++) {
403 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 391 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
404 continue; 392 continue;
405 393
406 iommu_detected = 1; 394 iommu_detected = 1;
@@ -505,7 +493,7 @@ out:
505 } 493 }
506 494
507 /* Fix up the north bridges */ 495 /* Fix up the north bridges */
508 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 496 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
509 int bus, dev_base, dev_limit; 497 int bus, dev_base, dev_limit;
510 498
511 /* 499 /*
@@ -514,11 +502,11 @@ out:
514 */ 502 */
515 u32 ctl = DISTLBWALKPRB | aper_order << 1; 503 u32 ctl = DISTLBWALKPRB | aper_order << 1;
516 504
517 bus = bus_dev_ranges[i].bus; 505 bus = amd_nb_bus_dev_ranges[i].bus;
518 dev_base = bus_dev_ranges[i].dev_base; 506 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
519 dev_limit = bus_dev_ranges[i].dev_limit; 507 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
520 for (slot = dev_base; slot < dev_limit; slot++) { 508 for (slot = dev_base; slot < dev_limit; slot++) {
521 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 509 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
522 continue; 510 continue;
523 511
524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 512 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 78218135b48e..06c196d7e59c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -50,8 +49,8 @@
50#include <asm/mtrr.h> 49#include <asm/mtrr.h>
51#include <asm/smp.h> 50#include <asm/smp.h>
52#include <asm/mce.h> 51#include <asm/mce.h>
53#include <asm/kvm_para.h>
54#include <asm/tsc.h> 52#include <asm/tsc.h>
53#include <asm/hypervisor.h>
55 54
56unsigned int num_processors; 55unsigned int num_processors;
57 56
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
432 reserved = reserve_eilvt_offset(offset, new); 431 reserved = reserve_eilvt_offset(offset, new);
433 432
434 if (reserved != new) { 433 if (reserved != new) {
435 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " 434 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
436 "vector 0x%x was already reserved by another core, " 435 "vector 0x%x, but the register is already in use for "
437 "APIC%lX=0x%x\n", 436 "vector 0x%x on another cpu\n",
438 smp_processor_id(), new, reserved, reg, old); 437 smp_processor_id(), reg, offset, new, reserved);
439 return -EINVAL; 438 return -EINVAL;
440 } 439 }
441 440
442 if (!eilvt_entry_is_changeable(old, new)) { 441 if (!eilvt_entry_is_changeable(old, new)) {
443 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " 442 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
444 "register already in use, APIC%lX=0x%x\n", 443 "vector 0x%x, but the register is already in use for "
445 smp_processor_id(), new, reg, old); 444 "vector 0x%x on this cpu\n",
445 smp_processor_id(), reg, offset, new, old);
446 return -EBUSY; 446 return -EBUSY;
447 } 447 }
448 448
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(void)
516{ 516{
517 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 517 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
518 518
519 if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) { 519 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
520 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; 520 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
521 /* Make LAPIC timer preferrable over percpu HPET */ 521 /* Make LAPIC timer preferrable over percpu HPET */
522 lapic_clockevent.rating = 150; 522 lapic_clockevent.rating = 150;
@@ -684,7 +684,7 @@ static int __init calibrate_APIC_clock(void)
684 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 684 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
685 lapic_clockevent.shift); 685 lapic_clockevent.shift);
686 lapic_clockevent.max_delta_ns = 686 lapic_clockevent.max_delta_ns =
687 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); 687 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
688 lapic_clockevent.min_delta_ns = 688 lapic_clockevent.min_delta_ns =
689 clockevent_delta2ns(0xF, &lapic_clockevent); 689 clockevent_delta2ns(0xF, &lapic_clockevent);
690 690
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
799 * PIT/HPET going. Otherwise register lapic as a dummy 799 * PIT/HPET going. Otherwise register lapic as a dummy
800 * device. 800 * device.
801 */ 801 */
802 if (nmi_watchdog != NMI_IO_APIC) 802 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
803 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
804 else
805 pr_warning("APIC timer registered as dummy,"
806 " due to nmi_watchdog=%d!\n", nmi_watchdog);
807 803
808 /* Setup the lapic or request the broadcast */ 804 /* Setup the lapic or request the broadcast */
809 setup_APIC_timer(); 805 setup_APIC_timer();
@@ -1195,12 +1191,15 @@ static void __cpuinit lapic_setup_esr(void)
1195 oldvalue, value); 1191 oldvalue, value);
1196} 1192}
1197 1193
1198
1199/** 1194/**
1200 * setup_local_APIC - setup the local APIC 1195 * setup_local_APIC - setup the local APIC
1196 *
1197 * Used to setup local APIC while initializing BSP or bringin up APs.
1198 * Always called with preemption disabled.
1201 */ 1199 */
1202void __cpuinit setup_local_APIC(void) 1200void __cpuinit setup_local_APIC(void)
1203{ 1201{
1202 int cpu = smp_processor_id();
1204 unsigned int value, queued; 1203 unsigned int value, queued;
1205 int i, j, acked = 0; 1204 int i, j, acked = 0;
1206 unsigned long long tsc = 0, ntsc; 1205 unsigned long long tsc = 0, ntsc;
@@ -1225,8 +1224,6 @@ void __cpuinit setup_local_APIC(void)
1225#endif 1224#endif
1226 perf_events_lapic_init(); 1225 perf_events_lapic_init();
1227 1226
1228 preempt_disable();
1229
1230 /* 1227 /*
1231 * Double-check whether this APIC is really registered. 1228 * Double-check whether this APIC is really registered.
1232 * This is meaningless in clustered apic mode, so we skip it. 1229 * This is meaningless in clustered apic mode, so we skip it.
@@ -1342,21 +1339,19 @@ void __cpuinit setup_local_APIC(void)
1342 * TODO: set up through-local-APIC from through-I/O-APIC? --macro 1339 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1343 */ 1340 */
1344 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; 1341 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1345 if (!smp_processor_id() && (pic_mode || !value)) { 1342 if (!cpu && (pic_mode || !value)) {
1346 value = APIC_DM_EXTINT; 1343 value = APIC_DM_EXTINT;
1347 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", 1344 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1348 smp_processor_id());
1349 } else { 1345 } else {
1350 value = APIC_DM_EXTINT | APIC_LVT_MASKED; 1346 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1351 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", 1347 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1352 smp_processor_id());
1353 } 1348 }
1354 apic_write(APIC_LVT0, value); 1349 apic_write(APIC_LVT0, value);
1355 1350
1356 /* 1351 /*
1357 * only the BP should see the LINT1 NMI signal, obviously. 1352 * only the BP should see the LINT1 NMI signal, obviously.
1358 */ 1353 */
1359 if (!smp_processor_id()) 1354 if (!cpu)
1360 value = APIC_DM_NMI; 1355 value = APIC_DM_NMI;
1361 else 1356 else
1362 value = APIC_DM_NMI | APIC_LVT_MASKED; 1357 value = APIC_DM_NMI | APIC_LVT_MASKED;
@@ -1364,11 +1359,9 @@ void __cpuinit setup_local_APIC(void)
1364 value |= APIC_LVT_LEVEL_TRIGGER; 1359 value |= APIC_LVT_LEVEL_TRIGGER;
1365 apic_write(APIC_LVT1, value); 1360 apic_write(APIC_LVT1, value);
1366 1361
1367 preempt_enable();
1368
1369#ifdef CONFIG_X86_MCE_INTEL 1362#ifdef CONFIG_X86_MCE_INTEL
1370 /* Recheck CMCI information after local APIC is up on CPU #0 */ 1363 /* Recheck CMCI information after local APIC is up on CPU #0 */
1371 if (smp_processor_id() == 0) 1364 if (!cpu)
1372 cmci_recheck(); 1365 cmci_recheck();
1373#endif 1366#endif
1374} 1367}
@@ -1387,7 +1380,6 @@ void __cpuinit end_local_APIC_setup(void)
1387 } 1380 }
1388#endif 1381#endif
1389 1382
1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1383 apic_pm_activate();
1392 1384
1393 /* 1385 /*
@@ -1484,7 +1476,8 @@ void __init enable_IR_x2apic(void)
1484 /* IR is required if there is APIC ID > 255 even when running 1476 /* IR is required if there is APIC ID > 255 even when running
1485 * under KVM 1477 * under KVM
1486 */ 1478 */
1487 if (max_physical_apicid > 255 || !kvm_para_available()) 1479 if (max_physical_apicid > 255 ||
1480 !hypervisor_x2apic_available())
1488 goto nox2apic; 1481 goto nox2apic;
1489 /* 1482 /*
1490 * without IR all CPUs can be addressed by IOAPIC/MSI 1483 * without IR all CPUs can be addressed by IOAPIC/MSI
@@ -1538,13 +1531,60 @@ static int __init detect_init_APIC(void)
1538 return 0; 1531 return 0;
1539} 1532}
1540#else 1533#else
1534
1535static int apic_verify(void)
1536{
1537 u32 features, h, l;
1538
1539 /*
1540 * The APIC feature bit should now be enabled
1541 * in `cpuid'
1542 */
1543 features = cpuid_edx(1);
1544 if (!(features & (1 << X86_FEATURE_APIC))) {
1545 pr_warning("Could not enable APIC!\n");
1546 return -1;
1547 }
1548 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1549 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1550
1551 /* The BIOS may have set up the APIC at some other address */
1552 rdmsr(MSR_IA32_APICBASE, l, h);
1553 if (l & MSR_IA32_APICBASE_ENABLE)
1554 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1555
1556 pr_info("Found and enabled local APIC!\n");
1557 return 0;
1558}
1559
1560int apic_force_enable(void)
1561{
1562 u32 h, l;
1563
1564 if (disable_apic)
1565 return -1;
1566
1567 /*
1568 * Some BIOSes disable the local APIC in the APIC_BASE
1569 * MSR. This can only be done in software for Intel P6 or later
1570 * and AMD K7 (Model > 1) or later.
1571 */
1572 rdmsr(MSR_IA32_APICBASE, l, h);
1573 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1574 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1575 l &= ~MSR_IA32_APICBASE_BASE;
1576 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1577 wrmsr(MSR_IA32_APICBASE, l, h);
1578 enabled_via_apicbase = 1;
1579 }
1580 return apic_verify();
1581}
1582
1541/* 1583/*
1542 * Detect and initialize APIC 1584 * Detect and initialize APIC
1543 */ 1585 */
1544static int __init detect_init_APIC(void) 1586static int __init detect_init_APIC(void)
1545{ 1587{
1546 u32 h, l, features;
1547
1548 /* Disabled by kernel option? */ 1588 /* Disabled by kernel option? */
1549 if (disable_apic) 1589 if (disable_apic)
1550 return -1; 1590 return -1;
@@ -1574,38 +1614,12 @@ static int __init detect_init_APIC(void)
1574 "you can enable it with \"lapic\"\n"); 1614 "you can enable it with \"lapic\"\n");
1575 return -1; 1615 return -1;
1576 } 1616 }
1577 /* 1617 if (apic_force_enable())
1578 * Some BIOSes disable the local APIC in the APIC_BASE 1618 return -1;
1579 * MSR. This can only be done in software for Intel P6 or later 1619 } else {
1580 * and AMD K7 (Model > 1) or later. 1620 if (apic_verify())
1581 */ 1621 return -1;
1582 rdmsr(MSR_IA32_APICBASE, l, h);
1583 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1584 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1585 l &= ~MSR_IA32_APICBASE_BASE;
1586 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1587 wrmsr(MSR_IA32_APICBASE, l, h);
1588 enabled_via_apicbase = 1;
1589 }
1590 }
1591 /*
1592 * The APIC feature bit should now be enabled
1593 * in `cpuid'
1594 */
1595 features = cpuid_edx(1);
1596 if (!(features & (1 << X86_FEATURE_APIC))) {
1597 pr_warning("Could not enable APIC!\n");
1598 return -1;
1599 } 1622 }
1600 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1601 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1602
1603 /* The BIOS may have set up the APIC at some other address */
1604 rdmsr(MSR_IA32_APICBASE, l, h);
1605 if (l & MSR_IA32_APICBASE_ENABLE)
1606 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1607
1608 pr_info("Found and enabled local APIC!\n");
1609 1623
1610 apic_pm_activate(); 1624 apic_pm_activate();
1611 1625
@@ -1617,28 +1631,6 @@ no_apic:
1617} 1631}
1618#endif 1632#endif
1619 1633
1620#ifdef CONFIG_X86_64
1621void __init early_init_lapic_mapping(void)
1622{
1623 /*
1624 * If no local APIC can be found then go out
1625 * : it means there is no mpatable and MADT
1626 */
1627 if (!smp_found_config)
1628 return;
1629
1630 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
1631 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1632 APIC_BASE, mp_lapic_addr);
1633
1634 /*
1635 * Fetch the APIC ID of the BSP in case we have a
1636 * default configuration (or the MP table is broken).
1637 */
1638 boot_cpu_physical_apicid = read_apic_id();
1639}
1640#endif
1641
1642/** 1634/**
1643 * init_apic_mappings - initialize APIC mappings 1635 * init_apic_mappings - initialize APIC mappings
1644 */ 1636 */
@@ -1664,10 +1656,7 @@ void __init init_apic_mappings(void)
1664 * acpi_register_lapic_address() 1656 * acpi_register_lapic_address()
1665 */ 1657 */
1666 if (!acpi_lapic && !smp_found_config) 1658 if (!acpi_lapic && !smp_found_config)
1667 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 1659 register_lapic_address(apic_phys);
1668
1669 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
1670 APIC_BASE, apic_phys);
1671 } 1660 }
1672 1661
1673 /* 1662 /*
@@ -1689,11 +1678,27 @@ void __init init_apic_mappings(void)
1689 } 1678 }
1690} 1679}
1691 1680
1681void __init register_lapic_address(unsigned long address)
1682{
1683 mp_lapic_addr = address;
1684
1685 if (!x2apic_mode) {
1686 set_fixmap_nocache(FIX_APIC_BASE, address);
1687 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1688 APIC_BASE, mp_lapic_addr);
1689 }
1690 if (boot_cpu_physical_apicid == -1U) {
1691 boot_cpu_physical_apicid = read_apic_id();
1692 apic_version[boot_cpu_physical_apicid] =
1693 GET_APIC_VERSION(apic_read(APIC_LVR));
1694 }
1695}
1696
1692/* 1697/*
1693 * This initializes the IO-APIC and APIC hardware if this is 1698 * This initializes the IO-APIC and APIC hardware if this is
1694 * a UP kernel. 1699 * a UP kernel.
1695 */ 1700 */
1696int apic_version[MAX_APICS]; 1701int apic_version[MAX_LOCAL_APIC];
1697 1702
1698int __init APIC_init_uniprocessor(void) 1703int __init APIC_init_uniprocessor(void)
1699{ 1704{
@@ -1758,17 +1763,10 @@ int __init APIC_init_uniprocessor(void)
1758 setup_IO_APIC(); 1763 setup_IO_APIC();
1759 else { 1764 else {
1760 nr_ioapics = 0; 1765 nr_ioapics = 0;
1761 localise_nmi_watchdog();
1762 } 1766 }
1763#else
1764 localise_nmi_watchdog();
1765#endif 1767#endif
1766 1768
1767 x86_init.timers.setup_percpu_clockev(); 1769 x86_init.timers.setup_percpu_clockev();
1768#ifdef CONFIG_X86_64
1769 check_nmi_watchdog();
1770#endif
1771
1772 return 0; 1770 return 0;
1773} 1771}
1774 1772
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e55b90..79fd43ca6f96 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,20 +17,31 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20#ifdef CONFIG_HARDLOCKUP_DETECTOR
20u64 hw_nmi_get_sample_period(void) 21u64 hw_nmi_get_sample_period(void)
21{ 22{
22 return (u64)(cpu_khz) * 1000 * 60; 23 return (u64)(cpu_khz) * 1000 * 60;
23} 24}
25#endif
24 26
25#ifdef ARCH_HAS_NMI_WATCHDOG 27#ifdef arch_trigger_all_cpu_backtrace
26
27/* For reliability, we're prepared to waste bits here. */ 28/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 29static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29 30
31/* "in progress" flag of arch_trigger_all_cpu_backtrace */
32static unsigned long backtrace_flag;
33
30void arch_trigger_all_cpu_backtrace(void) 34void arch_trigger_all_cpu_backtrace(void)
31{ 35{
32 int i; 36 int i;
33 37
38 if (test_and_set_bit(0, &backtrace_flag))
39 /*
40 * If there is already a trigger_all_cpu_backtrace() in progress
41 * (backtrace_flag == 1), don't output double cpu dump infos.
42 */
43 return;
44
34 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 45 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
35 46
36 printk(KERN_INFO "sending NMI to all CPUs:\n"); 47 printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -42,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
42 break; 53 break;
43 mdelay(1); 54 mdelay(1);
44 } 55 }
56
57 clear_bit(0, &backtrace_flag);
58 smp_mb__after_clear_bit();
45} 59}
46 60
47static int __kprobes 61static int __kprobes
@@ -50,11 +64,10 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
50{ 64{
51 struct die_args *args = __args; 65 struct die_args *args = __args;
52 struct pt_regs *regs; 66 struct pt_regs *regs;
53 int cpu = smp_processor_id(); 67 int cpu;
54 68
55 switch (cmd) { 69 switch (cmd) {
56 case DIE_NMI: 70 case DIE_NMI:
57 case DIE_NMI_IPI:
58 break; 71 break;
59 72
60 default: 73 default:
@@ -62,6 +75,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
62 } 75 }
63 76
64 regs = args->regs; 77 regs = args->regs;
78 cpu = smp_processor_id();
65 79
66 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 80 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
67 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; 81 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -81,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
81static __read_mostly struct notifier_block backtrace_notifier = { 95static __read_mostly struct notifier_block backtrace_notifier = {
82 .notifier_call = arch_trigger_all_cpu_backtrace_handler, 96 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
83 .next = NULL, 97 .next = NULL,
84 .priority = 1 98 .priority = NMI_LOCAL_LOW_PRIOR,
85}; 99};
86 100
87static int __init register_trigger_all_cpu_backtrace(void) 101static int __init register_trigger_all_cpu_backtrace(void)
@@ -91,18 +105,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
91} 105}
92early_initcall(register_trigger_all_cpu_backtrace); 106early_initcall(register_trigger_all_cpu_backtrace);
93#endif 107#endif
94
95/* STUB calls to mimic old nmi_watchdog behaviour */
96#if defined(CONFIG_X86_LOCAL_APIC)
97unsigned int nmi_watchdog = NMI_NONE;
98EXPORT_SYMBOL(nmi_watchdog);
99void acpi_nmi_enable(void) { return; }
100void acpi_nmi_disable(void) { return; }
101#endif
102atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
103EXPORT_SYMBOL(nmi_active);
104int unknown_nmi_panic;
105void cpu_nmi_set_wd_enabled(void) { return; }
106void stop_apic_nmi_watchdog(void *unused) { return; }
107void setup_apic_nmi_watchdog(void *unused) { return; }
108int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fadcd743a74f..697dc34b7b87 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -126,6 +125,26 @@ static int __init parse_noapic(char *str)
126} 125}
127early_param("noapic", parse_noapic); 126early_param("noapic", parse_noapic);
128 127
128/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
129void mp_save_irq(struct mpc_intsrc *m)
130{
131 int i;
132
133 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
134 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
135 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
136 m->srcbusirq, m->dstapic, m->dstirq);
137
138 for (i = 0; i < mp_irq_entries; i++) {
139 if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
140 return;
141 }
142
143 memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
144 if (++mp_irq_entries == MAX_IRQ_SOURCES)
145 panic("Max # of irq sources exceeded!!\n");
146}
147
129struct irq_pin_list { 148struct irq_pin_list {
130 int apic, pin; 149 int apic, pin;
131 struct irq_pin_list *next; 150 struct irq_pin_list *next;
@@ -136,6 +155,7 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
136 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); 155 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
137} 156}
138 157
158
139/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 159/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
140#ifdef CONFIG_SPARSE_IRQ 160#ifdef CONFIG_SPARSE_IRQ
141static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; 161static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
@@ -1934,8 +1954,7 @@ void disable_IO_APIC(void)
1934 * 1954 *
1935 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 1955 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1936 */ 1956 */
1937 1957void __init setup_ioapic_ids_from_mpc_nocheck(void)
1938void __init setup_ioapic_ids_from_mpc(void)
1939{ 1958{
1940 union IO_APIC_reg_00 reg_00; 1959 union IO_APIC_reg_00 reg_00;
1941 physid_mask_t phys_id_present_map; 1960 physid_mask_t phys_id_present_map;
@@ -1944,15 +1963,6 @@ void __init setup_ioapic_ids_from_mpc(void)
1944 unsigned char old_id; 1963 unsigned char old_id;
1945 unsigned long flags; 1964 unsigned long flags;
1946 1965
1947 if (acpi_ioapic)
1948 return;
1949 /*
1950 * Don't check I/O APIC IDs for xAPIC systems. They have
1951 * no meaning without the serial APIC bus.
1952 */
1953 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1954 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1955 return;
1956 /* 1966 /*
1957 * This is broken; anything with a real cpu count has to 1967 * This is broken; anything with a real cpu count has to
1958 * circumvent this idiocy regardless. 1968 * circumvent this idiocy regardless.
@@ -2006,7 +2016,6 @@ void __init setup_ioapic_ids_from_mpc(void)
2006 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2016 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2007 } 2017 }
2008 2018
2009
2010 /* 2019 /*
2011 * We need to adjust the IRQ routing table 2020 * We need to adjust the IRQ routing table
2012 * if the ID changed. 2021 * if the ID changed.
@@ -2018,9 +2027,12 @@ void __init setup_ioapic_ids_from_mpc(void)
2018 = mp_ioapics[apic_id].apicid; 2027 = mp_ioapics[apic_id].apicid;
2019 2028
2020 /* 2029 /*
2021 * Read the right value from the MPC table and 2030 * Update the ID register according to the right value
2022 * write it into the ID register. 2031 * from the MPC table if they are different.
2023 */ 2032 */
2033 if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
2034 continue;
2035
2024 apic_printk(APIC_VERBOSE, KERN_INFO 2036 apic_printk(APIC_VERBOSE, KERN_INFO
2025 "...changing IO-APIC physical APIC ID to %d ...", 2037 "...changing IO-APIC physical APIC ID to %d ...",
2026 mp_ioapics[apic_id].apicid); 2038 mp_ioapics[apic_id].apicid);
@@ -2042,6 +2054,21 @@ void __init setup_ioapic_ids_from_mpc(void)
2042 apic_printk(APIC_VERBOSE, " ok.\n"); 2054 apic_printk(APIC_VERBOSE, " ok.\n");
2043 } 2055 }
2044} 2056}
2057
2058void __init setup_ioapic_ids_from_mpc(void)
2059{
2060
2061 if (acpi_ioapic)
2062 return;
2063 /*
2064 * Don't check I/O APIC IDs for xAPIC systems. They have
2065 * no meaning without the serial APIC bus.
2066 */
2067 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2068 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2069 return;
2070 setup_ioapic_ids_from_mpc_nocheck();
2071}
2045#endif 2072#endif
2046 2073
2047int no_timer_check __initdata; 2074int no_timer_check __initdata;
@@ -2302,7 +2329,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2302 unsigned int irr; 2329 unsigned int irr;
2303 struct irq_desc *desc; 2330 struct irq_desc *desc;
2304 struct irq_cfg *cfg; 2331 struct irq_cfg *cfg;
2305 irq = __get_cpu_var(vector_irq)[vector]; 2332 irq = __this_cpu_read(vector_irq[vector]);
2306 2333
2307 if (irq == -1) 2334 if (irq == -1)
2308 continue; 2335 continue;
@@ -2336,7 +2363,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2336 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); 2363 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2337 goto unlock; 2364 goto unlock;
2338 } 2365 }
2339 __get_cpu_var(vector_irq)[vector] = -1; 2366 __this_cpu_write(vector_irq[vector], -1);
2340unlock: 2367unlock:
2341 raw_spin_unlock(&desc->lock); 2368 raw_spin_unlock(&desc->lock);
2342 } 2369 }
@@ -2642,24 +2669,6 @@ static void lapic_register_intr(int irq)
2642 "edge"); 2669 "edge");
2643} 2670}
2644 2671
2645static void __init setup_nmi(void)
2646{
2647 /*
2648 * Dirty trick to enable the NMI watchdog ...
2649 * We put the 8259A master into AEOI mode and
2650 * unmask on all local APICs LVT0 as NMI.
2651 *
2652 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2653 * is from Maciej W. Rozycki - so we do not have to EOI from
2654 * the NMI handler or the timer interrupt.
2655 */
2656 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2657
2658 enable_NMI_through_LVT0();
2659
2660 apic_printk(APIC_VERBOSE, " done.\n");
2661}
2662
2663/* 2672/*
2664 * This looks a bit hackish but it's about the only one way of sending 2673 * This looks a bit hackish but it's about the only one way of sending
2665 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2674 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2765,15 +2774,6 @@ static inline void __init check_timer(void)
2765 */ 2774 */
2766 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2775 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2767 legacy_pic->init(1); 2776 legacy_pic->init(1);
2768#ifdef CONFIG_X86_32
2769 {
2770 unsigned int ver;
2771
2772 ver = apic_read(APIC_LVR);
2773 ver = GET_APIC_VERSION(ver);
2774 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2775 }
2776#endif
2777 2777
2778 pin1 = find_isa_irq_pin(0, mp_INT); 2778 pin1 = find_isa_irq_pin(0, mp_INT);
2779 apic1 = find_isa_irq_apic(0, mp_INT); 2779 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2821,6 @@ static inline void __init check_timer(void)
2821 unmask_ioapic(cfg); 2821 unmask_ioapic(cfg);
2822 } 2822 }
2823 if (timer_irq_works()) { 2823 if (timer_irq_works()) {
2824 if (nmi_watchdog == NMI_IO_APIC) {
2825 setup_nmi();
2826 legacy_pic->unmask(0);
2827 }
2828 if (disable_timer_pin_1 > 0) 2824 if (disable_timer_pin_1 > 0)
2829 clear_IO_APIC_pin(0, pin1); 2825 clear_IO_APIC_pin(0, pin1);
2830 goto out; 2826 goto out;
@@ -2850,11 +2846,6 @@ static inline void __init check_timer(void)
2850 if (timer_irq_works()) { 2846 if (timer_irq_works()) {
2851 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2847 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2852 timer_through_8259 = 1; 2848 timer_through_8259 = 1;
2853 if (nmi_watchdog == NMI_IO_APIC) {
2854 legacy_pic->mask(0);
2855 setup_nmi();
2856 legacy_pic->unmask(0);
2857 }
2858 goto out; 2849 goto out;
2859 } 2850 }
2860 /* 2851 /*
@@ -2866,15 +2857,6 @@ static inline void __init check_timer(void)
2866 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2857 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2867 } 2858 }
2868 2859
2869 if (nmi_watchdog == NMI_IO_APIC) {
2870 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2871 "through the IO-APIC - disabling NMI Watchdog!\n");
2872 nmi_watchdog = NMI_NONE;
2873 }
2874#ifdef CONFIG_X86_32
2875 timer_ack = 0;
2876#endif
2877
2878 apic_printk(APIC_QUIET, KERN_INFO 2860 apic_printk(APIC_QUIET, KERN_INFO
2879 "...trying to set up timer as Virtual Wire IRQ...\n"); 2861 "...trying to set up timer as Virtual Wire IRQ...\n");
2880 2862
@@ -3639,7 +3621,7 @@ int __init io_apic_get_redir_entries (int ioapic)
3639 return reg_01.bits.entries + 1; 3621 return reg_01.bits.entries + 1;
3640} 3622}
3641 3623
3642void __init probe_nr_irqs_gsi(void) 3624static void __init probe_nr_irqs_gsi(void)
3643{ 3625{
3644 int nr; 3626 int nr;
3645 3627
@@ -3956,7 +3938,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3956 return res; 3938 return res;
3957} 3939}
3958 3940
3959void __init ioapic_init_mappings(void) 3941void __init ioapic_and_gsi_init(void)
3960{ 3942{
3961 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3943 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3962 struct resource *ioapic_res; 3944 struct resource *ioapic_res;
@@ -3994,6 +3976,8 @@ fake_ioapic_page:
3994 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; 3976 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
3995 ioapic_res++; 3977 ioapic_res++;
3996 } 3978 }
3979
3980 probe_nr_irqs_gsi();
3997} 3981}
3998 3982
3999void __init ioapic_insert_resources(void) 3983void __init ioapic_insert_resources(void)
@@ -4103,7 +4087,8 @@ void __init pre_init_apic_IRQ0(void)
4103 4087
4104 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4088 printk(KERN_INFO "Early APIC setup for system timer0\n");
4105#ifndef CONFIG_SMP 4089#ifndef CONFIG_SMP
4106 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 4090 physid_set_mask_of_physid(boot_cpu_physical_apicid,
4091 &phys_cpu_present_map);
4107#endif 4092#endif
4108 /* Make sure the irq descriptor is set up */ 4093 /* Make sure the irq descriptor is set up */
4109 cfg = alloc_irq_and_cfg_at(0, 0); 4094 cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c1c52c341f40..bd16b58b8850 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits;
48EXPORT_SYMBOL_GPL(uv_apicid_hibits); 48EXPORT_SYMBOL_GPL(uv_apicid_hibits);
49static DEFINE_SPINLOCK(uv_nmi_lock); 49static DEFINE_SPINLOCK(uv_nmi_lock);
50 50
51static unsigned long __init uv_early_read_mmr(unsigned long addr)
52{
53 unsigned long val, *mmr;
54
55 mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
56 val = *mmr;
57 early_iounmap(mmr, sizeof(*mmr));
58 return val;
59}
60
51static inline bool is_GRU_range(u64 start, u64 end) 61static inline bool is_GRU_range(u64 start, u64 end)
52{ 62{
53 return start >= gru_start_paddr && end <= gru_end_paddr; 63 return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
58 return is_ISA_range(start, end) || is_GRU_range(start, end); 68 return is_ISA_range(start, end) || is_GRU_range(start, end);
59} 69}
60 70
61static int early_get_nodeid(void) 71static int __init early_get_pnodeid(void)
62{ 72{
63 union uvh_node_id_u node_id; 73 union uvh_node_id_u node_id;
64 unsigned long *mmr; 74 union uvh_rh_gam_config_mmr_u m_n_config;
65 75 int pnode;
66 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
67 node_id.v = *mmr;
68 early_iounmap(mmr, sizeof(*mmr));
69 76
70 /* Currently, all blades have same revision number */ 77 /* Currently, all blades have same revision number */
78 node_id.v = uv_early_read_mmr(UVH_NODE_ID);
79 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
71 uv_min_hub_revision_id = node_id.s.revision; 80 uv_min_hub_revision_id = node_id.s.revision;
72 81
73 return node_id.s.node_id; 82 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
83 return pnode;
74} 84}
75 85
76static void __init early_get_apic_pnode_shift(void) 86static void __init early_get_apic_pnode_shift(void)
77{ 87{
78 unsigned long *mmr; 88 uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
79
80 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
81 uvh_apicid.v = *mmr;
82 early_iounmap(mmr, sizeof(*mmr));
83 if (!uvh_apicid.v) 89 if (!uvh_apicid.v)
84 /* 90 /*
85 * Old bios, use default value 91 * Old bios, use default value
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void)
95static void __init uv_set_apicid_hibit(void) 101static void __init uv_set_apicid_hibit(void)
96{ 102{
97 union uvh_lb_target_physical_apic_id_mask_u apicid_mask; 103 union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
98 unsigned long *mmr;
99 104
100 mmr = early_ioremap(UV_LOCAL_MMR_BASE | 105 apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
101 UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
102 apicid_mask.v = *mmr;
103 early_iounmap(mmr, sizeof(*mmr));
104 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; 106 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
105} 107}
106 108
107static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 109static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
108{ 110{
109 int nodeid; 111 int pnodeid;
110 112
111 if (!strcmp(oem_id, "SGI")) { 113 if (!strcmp(oem_id, "SGI")) {
112 nodeid = early_get_nodeid(); 114 pnodeid = early_get_pnodeid();
113 early_get_apic_pnode_shift(); 115 early_get_apic_pnode_shift();
114 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 116 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
115 x86_platform.nmi_init = uv_nmi_init; 117 x86_platform.nmi_init = uv_nmi_init;
@@ -118,8 +120,8 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
118 else if (!strcmp(oem_table_id, "UVX")) 120 else if (!strcmp(oem_table_id, "UVX"))
119 uv_system_type = UV_X2APIC; 121 uv_system_type = UV_X2APIC;
120 else if (!strcmp(oem_table_id, "UVH")) { 122 else if (!strcmp(oem_table_id, "UVH")) {
121 __get_cpu_var(x2apic_extra_bits) = 123 __this_cpu_write(x2apic_extra_bits,
122 nodeid << (uvh_apicid.s.pnode_shift - 1); 124 pnodeid << uvh_apicid.s.pnode_shift);
123 uv_system_type = UV_NON_UNIQUE_APIC; 125 uv_system_type = UV_NON_UNIQUE_APIC;
124 uv_set_apicid_hibit(); 126 uv_set_apicid_hibit();
125 return 1; 127 return 1;
@@ -284,7 +286,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
284 unsigned int id; 286 unsigned int id;
285 287
286 WARN_ON(preemptible() && num_online_cpus() > 1); 288 WARN_ON(preemptible() && num_online_cpus() > 1);
287 id = x | __get_cpu_var(x2apic_extra_bits); 289 id = x | __this_cpu_read(x2apic_extra_bits);
288 290
289 return id; 291 return id;
290} 292}
@@ -376,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = {
376 378
377static __cpuinit void set_x2apic_extra_bits(int pnode) 379static __cpuinit void set_x2apic_extra_bits(int pnode)
378{ 380{
379 __get_cpu_var(x2apic_extra_bits) = (pnode << 6); 381 __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
380} 382}
381 383
382/* 384/*
@@ -639,7 +641,7 @@ void __cpuinit uv_cpu_init(void)
639 */ 641 */
640int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 642int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
641{ 643{
642 if (reason != DIE_NMI_IPI) 644 if (reason != DIE_NMIUNKNOWN)
643 return NOTIFY_OK; 645 return NOTIFY_OK;
644 646
645 if (in_crash_kexec) 647 if (in_crash_kexec)
@@ -682,27 +684,32 @@ void uv_nmi_init(void)
682void __init uv_system_init(void) 684void __init uv_system_init(void)
683{ 685{
684 union uvh_rh_gam_config_mmr_u m_n_config; 686 union uvh_rh_gam_config_mmr_u m_n_config;
687 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
685 union uvh_node_id_u node_id; 688 union uvh_node_id_u node_id;
686 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 689 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
687 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 690 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
688 int gnode_extra, max_pnode = 0; 691 int gnode_extra, max_pnode = 0;
689 unsigned long mmr_base, present, paddr; 692 unsigned long mmr_base, present, paddr;
690 unsigned short pnode_mask; 693 unsigned short pnode_mask, pnode_io_mask;
691 694
692 map_low_mmrs(); 695 map_low_mmrs();
693 696
694 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); 697 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
695 m_val = m_n_config.s.m_skt; 698 m_val = m_n_config.s.m_skt;
696 n_val = m_n_config.s.n_skt; 699 n_val = m_n_config.s.n_skt;
700 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
701 n_io = mmioh.s.n_io;
697 mmr_base = 702 mmr_base =
698 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 703 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
699 ~UV_MMR_ENABLE; 704 ~UV_MMR_ENABLE;
700 pnode_mask = (1 << n_val) - 1; 705 pnode_mask = (1 << n_val) - 1;
706 pnode_io_mask = (1 << n_io) - 1;
707
701 node_id.v = uv_read_local_mmr(UVH_NODE_ID); 708 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
702 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; 709 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
703 gnode_upper = ((unsigned long)gnode_extra << m_val); 710 gnode_upper = ((unsigned long)gnode_extra << m_val);
704 printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", 711 printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
705 n_val, m_val, gnode_upper, gnode_extra); 712 n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
706 713
707 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 714 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
708 715
@@ -735,7 +742,7 @@ void __init uv_system_init(void)
735 for (j = 0; j < 64; j++) { 742 for (j = 0; j < 64; j++) {
736 if (!test_bit(j, &present)) 743 if (!test_bit(j, &present))
737 continue; 744 continue;
738 pnode = (i * 64 + j); 745 pnode = (i * 64 + j) & pnode_mask;
739 uv_blade_info[blade].pnode = pnode; 746 uv_blade_info[blade].pnode = pnode;
740 uv_blade_info[blade].nr_possible_cpus = 0; 747 uv_blade_info[blade].nr_possible_cpus = 0;
741 uv_blade_info[blade].nr_online_cpus = 0; 748 uv_blade_info[blade].nr_online_cpus = 0;
@@ -756,6 +763,7 @@ void __init uv_system_init(void)
756 /* 763 /*
757 * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); 764 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
758 */ 765 */
766 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
759 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; 767 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
760 pnode = uv_apicid_to_pnode(apicid); 768 pnode = uv_apicid_to_pnode(apicid);
761 blade = boot_pnode_to_blade(pnode); 769 blade = boot_pnode_to_blade(pnode);
@@ -772,7 +780,6 @@ void __init uv_system_init(void)
772 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 780 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
773 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 781 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
774 uv_cpu_hub_info(cpu)->pnode = pnode; 782 uv_cpu_hub_info(cpu)->pnode = pnode;
775 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
776 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; 783 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
777 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 784 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
778 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 785 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -796,7 +803,7 @@ void __init uv_system_init(void)
796 803
797 map_gru_high(max_pnode); 804 map_gru_high(max_pnode);
798 map_mmr_high(max_pnode); 805 map_mmr_high(max_pnode);
799 map_mmioh_high(max_pnode); 806 map_mmioh_high(max_pnode & pnode_io_mask);
800 807
801 uv_cpu_init(); 808 uv_cpu_init();
802 uv_scir_register_cpu_notifier(); 809 uv_scir_register_cpu_notifier();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8fe78c..7c7bedb83c5a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383);
668 668
669bool cpu_has_amd_erratum(const int *erratum) 669bool cpu_has_amd_erratum(const int *erratum)
670{ 670{
671 struct cpuinfo_x86 *cpu = &current_cpu_data; 671 struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
672 int osvw_id = *erratum++; 672 int osvw_id = *erratum++;
673 u32 range; 673 u32 range;
674 u32 ms; 674 u32 ms;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
894#else 894#else
895 vgetcpu_set_mode(); 895 vgetcpu_set_mode();
896#endif 896#endif
897 init_hw_perf_events();
898} 897}
899 898
900void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 899void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 491977baf6c0..35c7e65e59be 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -521,7 +521,7 @@ static void check_supported_cpu(void *_rc)
521 521
522 *rc = -ENODEV; 522 *rc = -ENODEV;
523 523
524 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) 524 if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
525 return; 525 return;
526 526
527 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 527 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
@@ -1377,7 +1377,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1377static void query_values_on_cpu(void *_err) 1377static void query_values_on_cpu(void *_err)
1378{ 1378{
1379 int *err = _err; 1379 int *err = _err;
1380 struct powernow_k8_data *data = __get_cpu_var(powernow_data); 1380 struct powernow_k8_data *data = __this_cpu_read(powernow_data);
1381 1381
1382 *err = query_current_values_with_pending_wait(data); 1382 *err = query_current_values_with_pending_wait(data);
1383} 1383}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 17ad03366211..7283e98deaae 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
149}; 149};
150 150
151struct amd_l3_cache { 151struct amd_l3_cache {
152 struct pci_dev *dev; 152 struct amd_northbridge *nb;
153 bool can_disable;
154 unsigned indices; 153 unsigned indices;
155 u8 subcaches[4]; 154 u8 subcaches[4];
156}; 155};
@@ -266,7 +265,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
266 line_size = l2.line_size; 265 line_size = l2.line_size;
267 lines_per_tag = l2.lines_per_tag; 266 lines_per_tag = l2.lines_per_tag;
268 /* cpu_data has errata corrections for K7 applied */ 267 /* cpu_data has errata corrections for K7 applied */
269 size_in_kb = current_cpu_data.x86_cache_size; 268 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
270 break; 269 break;
271 case 3: 270 case 3:
272 if (!l3.val) 271 if (!l3.val)
@@ -288,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
288 eax->split.type = types[leaf]; 287 eax->split.type = types[leaf];
289 eax->split.level = levels[leaf]; 288 eax->split.level = levels[leaf];
290 eax->split.num_threads_sharing = 0; 289 eax->split.num_threads_sharing = 0;
291 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; 290 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
292 291
293 292
294 if (assoc == 0xffff) 293 if (assoc == 0xffff)
@@ -311,14 +310,12 @@ struct _cache_attr {
311/* 310/*
312 * L3 cache descriptors 311 * L3 cache descriptors
313 */ 312 */
314static struct amd_l3_cache **__cpuinitdata l3_caches;
315
316static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) 313static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
317{ 314{
318 unsigned int sc0, sc1, sc2, sc3; 315 unsigned int sc0, sc1, sc2, sc3;
319 u32 val = 0; 316 u32 val = 0;
320 317
321 pci_read_config_dword(l3->dev, 0x1C4, &val); 318 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
322 319
323 /* calculate subcache sizes */ 320 /* calculate subcache sizes */
324 l3->subcaches[0] = sc0 = !(val & BIT(0)); 321 l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
330 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 327 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
331} 328}
332 329
333static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) 330static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
334{ 331 int index)
335 struct amd_l3_cache *l3;
336 struct pci_dev *dev = node_to_k8_nb_misc(node);
337
338 l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
339 if (!l3) {
340 printk(KERN_WARNING "Error allocating L3 struct\n");
341 return NULL;
342 }
343
344 l3->dev = dev;
345
346 amd_calc_l3_indices(l3);
347
348 return l3;
349}
350
351static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
352 int index)
353{ 332{
333 static struct amd_l3_cache *__cpuinitdata l3_caches;
354 int node; 334 int node;
355 335
356 if (boot_cpu_data.x86 != 0x10) 336 /* only for L3, and not in virtualized environments */
357 return; 337 if (index < 3 || amd_nb_num() == 0)
358
359 if (index < 3)
360 return;
361
362 /* see errata #382 and #388 */
363 if (boot_cpu_data.x86_model < 0x8)
364 return;
365
366 if ((boot_cpu_data.x86_model == 0x8 ||
367 boot_cpu_data.x86_model == 0x9)
368 &&
369 boot_cpu_data.x86_mask < 0x1)
370 return;
371
372 /* not in virtualized environments */
373 if (k8_northbridges.num == 0)
374 return; 338 return;
375 339
376 /* 340 /*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
378 * never freed but this is done only on shutdown so it doesn't matter. 342 * never freed but this is done only on shutdown so it doesn't matter.
379 */ 343 */
380 if (!l3_caches) { 344 if (!l3_caches) {
381 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); 345 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
382 346
383 l3_caches = kzalloc(size, GFP_ATOMIC); 347 l3_caches = kzalloc(size, GFP_ATOMIC);
384 if (!l3_caches) 348 if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
387 351
388 node = amd_get_nb_id(smp_processor_id()); 352 node = amd_get_nb_id(smp_processor_id());
389 353
390 if (!l3_caches[node]) { 354 if (!l3_caches[node].nb) {
391 l3_caches[node] = amd_init_l3_cache(node); 355 l3_caches[node].nb = node_to_amd_nb(node);
392 l3_caches[node]->can_disable = true; 356 amd_calc_l3_indices(&l3_caches[node]);
393 } 357 }
394 358
395 WARN_ON(!l3_caches[node]); 359 this_leaf->l3 = &l3_caches[node];
396
397 this_leaf->l3 = l3_caches[node];
398} 360}
399 361
400/* 362/*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
408{ 370{
409 unsigned int reg = 0; 371 unsigned int reg = 0;
410 372
411 pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg); 373 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
412 374
413 /* check whether this slot is activated already */ 375 /* check whether this slot is activated already */
414 if (reg & (3UL << 30)) 376 if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
422{ 384{
423 int index; 385 int index;
424 386
425 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 387 if (!this_leaf->l3 ||
388 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
426 return -EINVAL; 389 return -EINVAL;
427 390
428 index = amd_get_l3_disable_slot(this_leaf->l3, slot); 391 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
457 if (!l3->subcaches[i]) 420 if (!l3->subcaches[i])
458 continue; 421 continue;
459 422
460 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 423 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
461 424
462 /* 425 /*
463 * We need to WBINVD on a core on the node containing the L3 426 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
467 wbinvd_on_cpu(cpu); 430 wbinvd_on_cpu(cpu);
468 431
469 reg |= BIT(31); 432 reg |= BIT(31);
470 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 433 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
471 } 434 }
472} 435}
473 436
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
524 if (!capable(CAP_SYS_ADMIN)) 487 if (!capable(CAP_SYS_ADMIN))
525 return -EPERM; 488 return -EPERM;
526 489
527 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 490 if (!this_leaf->l3 ||
491 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
528 return -EINVAL; 492 return -EINVAL;
529 493
530 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 494 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
545#define STORE_CACHE_DISABLE(slot) \ 509#define STORE_CACHE_DISABLE(slot) \
546static ssize_t \ 510static ssize_t \
547store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 511store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
548 const char *buf, size_t count) \ 512 const char *buf, size_t count) \
549{ \ 513{ \
550 return store_cache_disable(this_leaf, buf, count, slot); \ 514 return store_cache_disable(this_leaf, buf, count, slot); \
551} 515}
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
558 show_cache_disable_1, store_cache_disable_1); 522 show_cache_disable_1, store_cache_disable_1);
559 523
560#else /* CONFIG_AMD_NB */ 524#else /* CONFIG_AMD_NB */
561static void __cpuinit 525#define amd_init_l3_cache(x, y)
562amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
563{
564};
565#endif /* CONFIG_AMD_NB */ 526#endif /* CONFIG_AMD_NB */
566 527
567static int 528static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
575 536
576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 537 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
577 amd_cpuid4(index, &eax, &ebx, &ecx); 538 amd_cpuid4(index, &eax, &ebx, &ecx);
578 amd_check_l3_disable(this_leaf, index); 539 amd_init_l3_cache(this_leaf, index);
579 } else { 540 } else {
580 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 541 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
581 } 542 }
@@ -983,30 +944,48 @@ define_one_ro(size);
983define_one_ro(shared_cpu_map); 944define_one_ro(shared_cpu_map);
984define_one_ro(shared_cpu_list); 945define_one_ro(shared_cpu_list);
985 946
986#define DEFAULT_SYSFS_CACHE_ATTRS \
987 &type.attr, \
988 &level.attr, \
989 &coherency_line_size.attr, \
990 &physical_line_partition.attr, \
991 &ways_of_associativity.attr, \
992 &number_of_sets.attr, \
993 &size.attr, \
994 &shared_cpu_map.attr, \
995 &shared_cpu_list.attr
996
997static struct attribute *default_attrs[] = { 947static struct attribute *default_attrs[] = {
998 DEFAULT_SYSFS_CACHE_ATTRS, 948 &type.attr,
949 &level.attr,
950 &coherency_line_size.attr,
951 &physical_line_partition.attr,
952 &ways_of_associativity.attr,
953 &number_of_sets.attr,
954 &size.attr,
955 &shared_cpu_map.attr,
956 &shared_cpu_list.attr,
999 NULL 957 NULL
1000}; 958};
1001 959
1002static struct attribute *default_l3_attrs[] = {
1003 DEFAULT_SYSFS_CACHE_ATTRS,
1004#ifdef CONFIG_AMD_NB 960#ifdef CONFIG_AMD_NB
1005 &cache_disable_0.attr, 961static struct attribute ** __cpuinit amd_l3_attrs(void)
1006 &cache_disable_1.attr, 962{
963 static struct attribute **attrs;
964 int n;
965
966 if (attrs)
967 return attrs;
968
969 n = sizeof (default_attrs) / sizeof (struct attribute *);
970
971 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
972 n += 2;
973
974 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
975 if (attrs == NULL)
976 return attrs = default_attrs;
977
978 for (n = 0; default_attrs[n]; n++)
979 attrs[n] = default_attrs[n];
980
981 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
982 attrs[n++] = &cache_disable_0.attr;
983 attrs[n++] = &cache_disable_1.attr;
984 }
985
986 return attrs;
987}
1007#endif 988#endif
1008 NULL
1009};
1010 989
1011static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 990static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1012{ 991{
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1117 1096
1118 this_leaf = CPUID4_INFO_IDX(cpu, i); 1097 this_leaf = CPUID4_INFO_IDX(cpu, i);
1119 1098
1120 if (this_leaf->l3 && this_leaf->l3->can_disable) 1099 ktype_cache.default_attrs = default_attrs;
1121 ktype_cache.default_attrs = default_l3_attrs; 1100#ifdef CONFIG_AMD_NB
1122 else 1101 if (this_leaf->l3)
1123 ktype_cache.default_attrs = default_attrs; 1102 ktype_cache.default_attrs = amd_l3_attrs();
1124 1103#endif
1125 retval = kobject_init_and_add(&(this_object->kobj), 1104 retval = kobject_init_and_add(&(this_object->kobj),
1126 &ktype_cache, 1105 &ktype_cache,
1127 per_cpu(ici_cache_kobject, cpu), 1106 per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7bfedb..a77971979564 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
25#include <linux/gfp.h> 25#include <linux/gfp.h>
26#include <asm/mce.h> 26#include <asm/mce.h>
27#include <asm/apic.h> 27#include <asm/apic.h>
28#include <asm/nmi.h>
28 29
29/* Update fake mce registers on current CPU. */ 30/* Update fake mce registers on current CPU. */
30static void inject_mce(struct mce *m) 31static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
83 struct die_args *args = (struct die_args *)data; 84 struct die_args *args = (struct die_args *)data;
84 int cpu = smp_processor_id(); 85 int cpu = smp_processor_id();
85 struct mce *m = &__get_cpu_var(injectm); 86 struct mce *m = &__get_cpu_var(injectm);
86 if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 87 if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
87 return NOTIFY_DONE; 88 return NOTIFY_DONE;
88 cpumask_clear_cpu(cpu, mce_inject_cpumask); 89 cpumask_clear_cpu(cpu, mce_inject_cpumask);
89 if (m->inject_flags & MCJ_EXCEPTION) 90 if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
95 96
96static struct notifier_block mce_raise_nb = { 97static struct notifier_block mce_raise_nb = {
97 .notifier_call = mce_raise_notify, 98 .notifier_call = mce_raise_notify,
98 .priority = 1000, 99 .priority = NMI_LOCAL_NORMAL_PRIOR,
99}; 100};
100 101
101/* Inject mce on current CPU */ 102/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a35b72d7c03..d916183b7f9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -326,7 +326,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
326 326
327static int msr_to_offset(u32 msr) 327static int msr_to_offset(u32 msr)
328{ 328{
329 unsigned bank = __get_cpu_var(injectm.bank); 329 unsigned bank = __this_cpu_read(injectm.bank);
330 330
331 if (msr == rip_msr) 331 if (msr == rip_msr)
332 return offsetof(struct mce, ip); 332 return offsetof(struct mce, ip);
@@ -346,7 +346,7 @@ static u64 mce_rdmsrl(u32 msr)
346{ 346{
347 u64 v; 347 u64 v;
348 348
349 if (__get_cpu_var(injectm).finished) { 349 if (__this_cpu_read(injectm.finished)) {
350 int offset = msr_to_offset(msr); 350 int offset = msr_to_offset(msr);
351 351
352 if (offset < 0) 352 if (offset < 0)
@@ -369,7 +369,7 @@ static u64 mce_rdmsrl(u32 msr)
369 369
370static void mce_wrmsrl(u32 msr, u64 v) 370static void mce_wrmsrl(u32 msr, u64 v)
371{ 371{
372 if (__get_cpu_var(injectm).finished) { 372 if (__this_cpu_read(injectm.finished)) {
373 int offset = msr_to_offset(msr); 373 int offset = msr_to_offset(msr);
374 374
375 if (offset >= 0) 375 if (offset >= 0)
@@ -1159,7 +1159,7 @@ static void mce_start_timer(unsigned long data)
1159 1159
1160 WARN_ON(smp_processor_id() != data); 1160 WARN_ON(smp_processor_id() != data);
1161 1161
1162 if (mce_available(&current_cpu_data)) { 1162 if (mce_available(__this_cpu_ptr(&cpu_info))) {
1163 machine_check_poll(MCP_TIMESTAMP, 1163 machine_check_poll(MCP_TIMESTAMP,
1164 &__get_cpu_var(mce_poll_banks)); 1164 &__get_cpu_var(mce_poll_banks));
1165 } 1165 }
@@ -1767,7 +1767,7 @@ static int mce_shutdown(struct sys_device *dev)
1767static int mce_resume(struct sys_device *dev) 1767static int mce_resume(struct sys_device *dev)
1768{ 1768{
1769 __mcheck_cpu_init_generic(); 1769 __mcheck_cpu_init_generic();
1770 __mcheck_cpu_init_vendor(&current_cpu_data); 1770 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
1771 1771
1772 return 0; 1772 return 0;
1773} 1773}
@@ -1775,7 +1775,7 @@ static int mce_resume(struct sys_device *dev)
1775static void mce_cpu_restart(void *data) 1775static void mce_cpu_restart(void *data)
1776{ 1776{
1777 del_timer_sync(&__get_cpu_var(mce_timer)); 1777 del_timer_sync(&__get_cpu_var(mce_timer));
1778 if (!mce_available(&current_cpu_data)) 1778 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1779 return; 1779 return;
1780 __mcheck_cpu_init_generic(); 1780 __mcheck_cpu_init_generic();
1781 __mcheck_cpu_init_timer(); 1781 __mcheck_cpu_init_timer();
@@ -1790,7 +1790,7 @@ static void mce_restart(void)
1790/* Toggle features for corrected errors */ 1790/* Toggle features for corrected errors */
1791static void mce_disable_ce(void *all) 1791static void mce_disable_ce(void *all)
1792{ 1792{
1793 if (!mce_available(&current_cpu_data)) 1793 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1794 return; 1794 return;
1795 if (all) 1795 if (all)
1796 del_timer_sync(&__get_cpu_var(mce_timer)); 1796 del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1799,7 +1799,7 @@ static void mce_disable_ce(void *all)
1799 1799
1800static void mce_enable_ce(void *all) 1800static void mce_enable_ce(void *all)
1801{ 1801{
1802 if (!mce_available(&current_cpu_data)) 1802 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1803 return; 1803 return;
1804 cmci_reenable(); 1804 cmci_reenable();
1805 cmci_recheck(); 1805 cmci_recheck();
@@ -2022,7 +2022,7 @@ static void __cpuinit mce_disable_cpu(void *h)
2022 unsigned long action = *(unsigned long *)h; 2022 unsigned long action = *(unsigned long *)h;
2023 int i; 2023 int i;
2024 2024
2025 if (!mce_available(&current_cpu_data)) 2025 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2026 return; 2026 return;
2027 2027
2028 if (!(action & CPU_TASKS_FROZEN)) 2028 if (!(action & CPU_TASKS_FROZEN))
@@ -2040,7 +2040,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
2040 unsigned long action = *(unsigned long *)h; 2040 unsigned long action = *(unsigned long *)h;
2041 int i; 2041 int i;
2042 2042
2043 if (!mce_available(&current_cpu_data)) 2043 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2044 return; 2044 return;
2045 2045
2046 if (!(action & CPU_TASKS_FROZEN)) 2046 if (!(action & CPU_TASKS_FROZEN))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 80c482382d5c..5bf2fac52aca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -31,8 +31,6 @@
31#include <asm/mce.h> 31#include <asm/mce.h>
32#include <asm/msr.h> 32#include <asm/msr.h>
33 33
34#define PFX "mce_threshold: "
35#define VERSION "version 1.1.1"
36#define NR_BANKS 6 34#define NR_BANKS 6
37#define NR_BLOCKS 9 35#define NR_BLOCKS 9
38#define THRESHOLD_MAX 0xFFF 36#define THRESHOLD_MAX 0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
59 struct list_head miscj; 57 struct list_head miscj;
60}; 58};
61 59
62/* defaults used early on boot */
63static struct threshold_block threshold_defaults = {
64 .interrupt_enable = 0,
65 .threshold_limit = THRESHOLD_MAX,
66};
67
68struct threshold_bank { 60struct threshold_bank {
69 struct kobject *kobj; 61 struct kobject *kobj;
70 struct threshold_block *blocks; 62 struct threshold_block *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
89struct thresh_restart { 81struct thresh_restart {
90 struct threshold_block *b; 82 struct threshold_block *b;
91 int reset; 83 int reset;
84 int set_lvt_off;
85 int lvt_off;
92 u16 old_limit; 86 u16 old_limit;
93}; 87};
94 88
89static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
90{
91 int msr = (hi & MASK_LVTOFF_HI) >> 20;
92
93 if (apic < 0) {
94 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
95 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
96 b->bank, b->block, b->address, hi, lo);
97 return 0;
98 }
99
100 if (apic != msr) {
101 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
102 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
103 b->cpu, apic, b->bank, b->block, b->address, hi, lo);
104 return 0;
105 }
106
107 return 1;
108};
109
95/* must be called with correct cpu affinity */ 110/* must be called with correct cpu affinity */
96/* Called via smp_call_function_single() */ 111/* Called via smp_call_function_single() */
97static void threshold_restart_bank(void *_tr) 112static void threshold_restart_bank(void *_tr)
98{ 113{
99 struct thresh_restart *tr = _tr; 114 struct thresh_restart *tr = _tr;
100 u32 mci_misc_hi, mci_misc_lo; 115 u32 hi, lo;
101 116
102 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 117 rdmsr(tr->b->address, lo, hi);
103 118
104 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 119 if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
105 tr->reset = 1; /* limit cannot be lower than err count */ 120 tr->reset = 1; /* limit cannot be lower than err count */
106 121
107 if (tr->reset) { /* reset err count and overflow bit */ 122 if (tr->reset) { /* reset err count and overflow bit */
108 mci_misc_hi = 123 hi =
109 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 124 (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
110 (THRESHOLD_MAX - tr->b->threshold_limit); 125 (THRESHOLD_MAX - tr->b->threshold_limit);
111 } else if (tr->old_limit) { /* change limit w/o reset */ 126 } else if (tr->old_limit) { /* change limit w/o reset */
112 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 127 int new_count = (hi & THRESHOLD_MAX) +
113 (tr->old_limit - tr->b->threshold_limit); 128 (tr->old_limit - tr->b->threshold_limit);
114 129
115 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 130 hi = (hi & ~MASK_ERR_COUNT_HI) |
116 (new_count & THRESHOLD_MAX); 131 (new_count & THRESHOLD_MAX);
117 } 132 }
118 133
134 if (tr->set_lvt_off) {
135 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
136 /* set new lvt offset */
137 hi &= ~MASK_LVTOFF_HI;
138 hi |= tr->lvt_off << 20;
139 }
140 }
141
119 tr->b->interrupt_enable ? 142 tr->b->interrupt_enable ?
120 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 143 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
121 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 144 (hi &= ~MASK_INT_TYPE_HI);
122 145
123 mci_misc_hi |= MASK_COUNT_EN_HI; 146 hi |= MASK_COUNT_EN_HI;
124 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 147 wrmsr(tr->b->address, lo, hi);
148}
149
150static void mce_threshold_block_init(struct threshold_block *b, int offset)
151{
152 struct thresh_restart tr = {
153 .b = b,
154 .set_lvt_off = 1,
155 .lvt_off = offset,
156 };
157
158 b->threshold_limit = THRESHOLD_MAX;
159 threshold_restart_bank(&tr);
160};
161
162static int setup_APIC_mce(int reserved, int new)
163{
164 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
165 APIC_EILVT_MSG_FIX, 0))
166 return new;
167
168 return reserved;
125} 169}
126 170
127/* cpu init entry point, called from mce.c with preempt off */ 171/* cpu init entry point, called from mce.c with preempt off */
128void mce_amd_feature_init(struct cpuinfo_x86 *c) 172void mce_amd_feature_init(struct cpuinfo_x86 *c)
129{ 173{
174 struct threshold_block b;
130 unsigned int cpu = smp_processor_id(); 175 unsigned int cpu = smp_processor_id();
131 u32 low = 0, high = 0, address = 0; 176 u32 low = 0, high = 0, address = 0;
132 unsigned int bank, block; 177 unsigned int bank, block;
133 struct thresh_restart tr; 178 int offset = -1;
134 int lvt_off = -1;
135 u8 offset;
136 179
137 for (bank = 0; bank < NR_BANKS; ++bank) { 180 for (bank = 0; bank < NR_BANKS; ++bank) {
138 for (block = 0; block < NR_BLOCKS; ++block) { 181 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
163 if (shared_bank[bank] && c->cpu_core_id) 206 if (shared_bank[bank] && c->cpu_core_id)
164 break; 207 break;
165#endif 208#endif
166 offset = (high & MASK_LVTOFF_HI) >> 20; 209 offset = setup_APIC_mce(offset,
167 if (lvt_off < 0) { 210 (high & MASK_LVTOFF_HI) >> 20);
168 if (setup_APIC_eilvt(offset,
169 THRESHOLD_APIC_VECTOR,
170 APIC_EILVT_MSG_FIX, 0)) {
171 pr_err(FW_BUG "cpu %d, failed to "
172 "setup threshold interrupt "
173 "for bank %d, block %d "
174 "(MSR%08X=0x%x%08x)",
175 smp_processor_id(), bank, block,
176 address, high, low);
177 continue;
178 }
179 lvt_off = offset;
180 } else if (lvt_off != offset) {
181 pr_err(FW_BUG "cpu %d, invalid threshold "
182 "interrupt offset %d for bank %d,"
183 "block %d (MSR%08X=0x%x%08x)",
184 smp_processor_id(), lvt_off, bank,
185 block, address, high, low);
186 continue;
187 }
188
189 high &= ~MASK_LVTOFF_HI;
190 high |= lvt_off << 20;
191 wrmsr(address, low, high);
192 211
193 threshold_defaults.address = address; 212 memset(&b, 0, sizeof(b));
194 tr.b = &threshold_defaults; 213 b.cpu = cpu;
195 tr.reset = 0; 214 b.bank = bank;
196 tr.old_limit = 0; 215 b.block = block;
197 threshold_restart_bank(&tr); 216 b.address = address;
198 217
218 mce_threshold_block_init(&b, offset);
199 mce_threshold_vector = amd_threshold_interrupt; 219 mce_threshold_vector = amd_threshold_interrupt;
200 } 220 }
201 } 221 }
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
298 318
299 b->interrupt_enable = !!new; 319 b->interrupt_enable = !!new;
300 320
321 memset(&tr, 0, sizeof(tr));
301 tr.b = b; 322 tr.b = b;
302 tr.reset = 0;
303 tr.old_limit = 0;
304 323
305 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 324 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
306 325
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
321 if (new < 1) 340 if (new < 1)
322 new = 1; 341 new = 1;
323 342
343 memset(&tr, 0, sizeof(tr));
324 tr.old_limit = b->threshold_limit; 344 tr.old_limit = b->threshold_limit;
325 b->threshold_limit = new; 345 b->threshold_limit = new;
326 tr.b = b; 346 tr.b = b;
327 tr.reset = 0;
328 347
329 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 348 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
330 349
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
603 continue; 622 continue;
604 err = threshold_create_bank(cpu, bank); 623 err = threshold_create_bank(cpu, bank);
605 if (err) 624 if (err)
606 goto out; 625 return err;
607 } 626 }
608out: 627
609 return err; 628 return err;
610} 629}
611 630
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 6fcd0936194f..8694ef56459d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -130,7 +130,7 @@ void cmci_recheck(void)
130 unsigned long flags; 130 unsigned long flags;
131 int banks; 131 int banks;
132 132
133 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks)) 133 if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
134 return; 134 return;
135 local_irq_save(flags); 135 local_irq_save(flags);
136 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); 136 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4b683267eca5..e12246ff5aa6 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -53,8 +53,13 @@ struct thermal_state {
53 struct _thermal_state core_power_limit; 53 struct _thermal_state core_power_limit;
54 struct _thermal_state package_throttle; 54 struct _thermal_state package_throttle;
55 struct _thermal_state package_power_limit; 55 struct _thermal_state package_power_limit;
56 struct _thermal_state core_thresh0;
57 struct _thermal_state core_thresh1;
56}; 58};
57 59
60/* Callback to handle core threshold interrupts */
61int (*platform_thermal_notify)(__u64 msr_val);
62
58static DEFINE_PER_CPU(struct thermal_state, thermal_state); 63static DEFINE_PER_CPU(struct thermal_state, thermal_state);
59 64
60static atomic_t therm_throt_en = ATOMIC_INIT(0); 65static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
200 return 0; 205 return 0;
201} 206}
202 207
208static int thresh_event_valid(int event)
209{
210 struct _thermal_state *state;
211 unsigned int this_cpu = smp_processor_id();
212 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
213 u64 now = get_jiffies_64();
214
215 state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
216
217 if (time_before64(now, state->next_check))
218 return 0;
219
220 state->next_check = now + CHECK_INTERVAL;
221 return 1;
222}
223
203#ifdef CONFIG_SYSFS 224#ifdef CONFIG_SYSFS
204/* Add/Remove thermal_throttle interface for CPU device: */ 225/* Add/Remove thermal_throttle interface for CPU device: */
205static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, 226static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
313#define PACKAGE_THROTTLED ((__u64)2 << 62) 334#define PACKAGE_THROTTLED ((__u64)2 << 62)
314#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) 335#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
315 336
337static void notify_thresholds(__u64 msr_val)
338{
339 /* check whether the interrupt handler is defined;
340 * otherwise simply return
341 */
342 if (!platform_thermal_notify)
343 return;
344
345 /* lower threshold reached */
346 if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
347 platform_thermal_notify(msr_val);
348 /* higher threshold reached */
349 if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
350 platform_thermal_notify(msr_val);
351}
352
316/* Thermal transition interrupt handler */ 353/* Thermal transition interrupt handler */
317static void intel_thermal_interrupt(void) 354static void intel_thermal_interrupt(void)
318{ 355{
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
321 358
322 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 359 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
323 360
361 /* Check for violation of core thermal thresholds*/
362 notify_thresholds(msr_val);
363
324 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 364 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
325 THERMAL_THROTTLING_EVENT, 365 THERMAL_THROTTLING_EVENT,
326 CORE_LEVEL) != 0) 366 CORE_LEVEL) != 0)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b9145b13..9d977a2ea693 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
330{ 330{
331 int i; 331 int i;
332 332
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
338 goto perfctr_fail; 335 goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
355 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 353 release_perfctr_nmi(x86_pmu.perfctr + i);
357 354
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360
361 return false; 355 return false;
362} 356}
363 357
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
369 release_perfctr_nmi(x86_pmu.perfctr + i); 363 release_perfctr_nmi(x86_pmu.perfctr + i);
370 release_evntsel_nmi(x86_pmu.eventsel + i); 364 release_evntsel_nmi(x86_pmu.eventsel + i);
371 } 365 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 366}
376 367
377#else 368#else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
384static bool check_hw_exists(void) 375static bool check_hw_exists(void)
385{ 376{
386 u64 val, val_new = 0; 377 u64 val, val_new = 0;
387 int ret = 0; 378 int i, reg, ret = 0;
379
380 /*
381 * Check to see if the BIOS enabled any of the counters, if so
382 * complain and bail.
383 */
384 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i;
386 ret = rdmsrl_safe(reg, &val);
387 if (ret)
388 goto msr_fail;
389 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
390 goto bios_fail;
391 }
388 392
393 if (x86_pmu.num_counters_fixed) {
394 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
395 ret = rdmsrl_safe(reg, &val);
396 if (ret)
397 goto msr_fail;
398 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
399 if (val & (0x03 << i*4))
400 goto bios_fail;
401 }
402 }
403
404 /*
405 * Now write a value and read it back to see if it matches,
406 * this is needed to detect certain hardware emulators (qemu/kvm)
407 * that don't trap on the MSR access and always return 0s.
408 */
389 val = 0xabcdUL; 409 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val); 410 ret = checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new) 412 if (ret || val != val_new)
393 return false; 413 goto msr_fail;
394 414
395 return true; 415 return true;
416
417bios_fail:
418 printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
419 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
420 return false;
421
422msr_fail:
423 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
424 return false;
396} 425}
397 426
398static void reserve_ds_buffers(void); 427static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
451 struct hw_perf_event *hwc = &event->hw; 480 struct hw_perf_event *hwc = &event->hw;
452 u64 config; 481 u64 config;
453 482
454 if (!hwc->sample_period) { 483 if (!is_sampling_event(event)) {
455 hwc->sample_period = x86_pmu.max_period; 484 hwc->sample_period = x86_pmu.max_period;
456 hwc->last_period = hwc->sample_period; 485 hwc->last_period = hwc->sample_period;
457 local64_set(&hwc->period_left, hwc->sample_period); 486 local64_set(&hwc->period_left, hwc->sample_period);
@@ -968,8 +997,7 @@ x86_perf_event_set_period(struct perf_event *event)
968 997
969static void x86_pmu_enable_event(struct perf_event *event) 998static void x86_pmu_enable_event(struct perf_event *event)
970{ 999{
971 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1000 if (__this_cpu_read(cpu_hw_events.enabled))
972 if (cpuc->enabled)
973 __x86_pmu_enable_event(&event->hw, 1001 __x86_pmu_enable_event(&event->hw,
974 ARCH_PERFMON_EVENTSEL_ENABLE); 1002 ARCH_PERFMON_EVENTSEL_ENABLE);
975} 1003}
@@ -1239,11 +1267,10 @@ perf_event_nmi_handler(struct notifier_block *self,
1239 1267
1240 switch (cmd) { 1268 switch (cmd) {
1241 case DIE_NMI: 1269 case DIE_NMI:
1242 case DIE_NMI_IPI:
1243 break; 1270 break;
1244 case DIE_NMIUNKNOWN: 1271 case DIE_NMIUNKNOWN:
1245 this_nmi = percpu_read(irq_stat.__nmi_count); 1272 this_nmi = percpu_read(irq_stat.__nmi_count);
1246 if (this_nmi != __get_cpu_var(pmu_nmi).marked) 1273 if (this_nmi != __this_cpu_read(pmu_nmi.marked))
1247 /* let the kernel handle the unknown nmi */ 1274 /* let the kernel handle the unknown nmi */
1248 return NOTIFY_DONE; 1275 return NOTIFY_DONE;
1249 /* 1276 /*
@@ -1267,8 +1294,8 @@ perf_event_nmi_handler(struct notifier_block *self,
1267 this_nmi = percpu_read(irq_stat.__nmi_count); 1294 this_nmi = percpu_read(irq_stat.__nmi_count);
1268 if ((handled > 1) || 1295 if ((handled > 1) ||
1269 /* the next nmi could be a back-to-back nmi */ 1296 /* the next nmi could be a back-to-back nmi */
1270 ((__get_cpu_var(pmu_nmi).marked == this_nmi) && 1297 ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
1271 (__get_cpu_var(pmu_nmi).handled > 1))) { 1298 (__this_cpu_read(pmu_nmi.handled) > 1))) {
1272 /* 1299 /*
1273 * We could have two subsequent back-to-back nmis: The 1300 * We could have two subsequent back-to-back nmis: The
1274 * first handles more than one counter, the 2nd 1301 * first handles more than one counter, the 2nd
@@ -1279,8 +1306,8 @@ perf_event_nmi_handler(struct notifier_block *self,
1279 * handling more than one counter. We will mark the 1306 * handling more than one counter. We will mark the
1280 * next (3rd) and then drop it if unhandled. 1307 * next (3rd) and then drop it if unhandled.
1281 */ 1308 */
1282 __get_cpu_var(pmu_nmi).marked = this_nmi + 1; 1309 __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
1283 __get_cpu_var(pmu_nmi).handled = handled; 1310 __this_cpu_write(pmu_nmi.handled, handled);
1284 } 1311 }
1285 1312
1286 return NOTIFY_STOP; 1313 return NOTIFY_STOP;
@@ -1289,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1289static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1316static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1290 .notifier_call = perf_event_nmi_handler, 1317 .notifier_call = perf_event_nmi_handler,
1291 .next = NULL, 1318 .next = NULL,
1292 .priority = 1 1319 .priority = NMI_LOCAL_LOW_PRIOR,
1293}; 1320};
1294 1321
1295static struct event_constraint unconstrained; 1322static struct event_constraint unconstrained;
@@ -1362,7 +1389,7 @@ static void __init pmu_check_apic(void)
1362 pr_info("no hardware sampling interrupt available.\n"); 1389 pr_info("no hardware sampling interrupt available.\n");
1363} 1390}
1364 1391
1365void __init init_hw_perf_events(void) 1392int __init init_hw_perf_events(void)
1366{ 1393{
1367 struct event_constraint *c; 1394 struct event_constraint *c;
1368 int err; 1395 int err;
@@ -1377,20 +1404,18 @@ void __init init_hw_perf_events(void)
1377 err = amd_pmu_init(); 1404 err = amd_pmu_init();
1378 break; 1405 break;
1379 default: 1406 default:
1380 return; 1407 return 0;
1381 } 1408 }
1382 if (err != 0) { 1409 if (err != 0) {
1383 pr_cont("no PMU driver, software events only.\n"); 1410 pr_cont("no PMU driver, software events only.\n");
1384 return; 1411 return 0;
1385 } 1412 }
1386 1413
1387 pmu_check_apic(); 1414 pmu_check_apic();
1388 1415
1389 /* sanity check that the hardware exists or is emulated */ 1416 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) { 1417 if (!check_hw_exists())
1391 pr_cont("Broken PMU hardware detected, software events only.\n"); 1418 return 0;
1392 return;
1393 }
1394 1419
1395 pr_cont("%s PMU driver.\n", x86_pmu.name); 1420 pr_cont("%s PMU driver.\n", x86_pmu.name);
1396 1421
@@ -1438,9 +1463,12 @@ void __init init_hw_perf_events(void)
1438 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1463 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1439 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1464 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1440 1465
1441 perf_pmu_register(&pmu); 1466 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1442 perf_cpu_notifier(x86_pmu_notifier); 1467 perf_cpu_notifier(x86_pmu_notifier);
1468
1469 return 0;
1443} 1470}
1471early_initcall(init_hw_perf_events);
1444 1472
1445static inline void x86_pmu_read(struct perf_event *event) 1473static inline void x86_pmu_read(struct perf_event *event)
1446{ 1474{
@@ -1454,11 +1482,9 @@ static inline void x86_pmu_read(struct perf_event *event)
1454 */ 1482 */
1455static void x86_pmu_start_txn(struct pmu *pmu) 1483static void x86_pmu_start_txn(struct pmu *pmu)
1456{ 1484{
1457 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1458
1459 perf_pmu_disable(pmu); 1485 perf_pmu_disable(pmu);
1460 cpuc->group_flag |= PERF_EVENT_TXN; 1486 __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
1461 cpuc->n_txn = 0; 1487 __this_cpu_write(cpu_hw_events.n_txn, 0);
1462} 1488}
1463 1489
1464/* 1490/*
@@ -1468,14 +1494,12 @@ static void x86_pmu_start_txn(struct pmu *pmu)
1468 */ 1494 */
1469static void x86_pmu_cancel_txn(struct pmu *pmu) 1495static void x86_pmu_cancel_txn(struct pmu *pmu)
1470{ 1496{
1471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1497 __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
1472
1473 cpuc->group_flag &= ~PERF_EVENT_TXN;
1474 /* 1498 /*
1475 * Truncate the collected events. 1499 * Truncate the collected events.
1476 */ 1500 */
1477 cpuc->n_added -= cpuc->n_txn; 1501 __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
1478 cpuc->n_events -= cpuc->n_txn; 1502 __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
1479 perf_pmu_enable(pmu); 1503 perf_pmu_enable(pmu);
1480} 1504}
1481 1505
@@ -1686,7 +1710,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1686 1710
1687 perf_callchain_store(entry, regs->ip); 1711 perf_callchain_store(entry, regs->ip);
1688 1712
1689 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1713 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
1690} 1714}
1691 1715
1692#ifdef CONFIG_COMPAT 1716#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8cd6944..67e2202a6039 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#ifdef CONFIG_CPU_SUP_AMD
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst const u64 amd_hw_cache_event_ids 3static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 4 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 5 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
275 return &emptyconstraint; 273 return &emptyconstraint;
276} 274}
277 275
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 276static struct amd_nb *amd_alloc_nb(int cpu)
279{ 277{
280 struct amd_nb *nb; 278 struct amd_nb *nb;
281 int i; 279 int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
285 if (!nb) 283 if (!nb)
286 return NULL; 284 return NULL;
287 285
288 nb->nb_id = nb_id; 286 nb->nb_id = -1;
289 287
290 /* 288 /*
291 * initialize all possible NB constraints 289 * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
306 if (boot_cpu_data.x86_max_cores < 2) 304 if (boot_cpu_data.x86_max_cores < 2)
307 return NOTIFY_OK; 305 return NOTIFY_OK;
308 306
309 cpuc->amd_nb = amd_alloc_nb(cpu, -1); 307 cpuc->amd_nb = amd_alloc_nb(cpu);
310 if (!cpuc->amd_nb) 308 if (!cpuc->amd_nb)
311 return NOTIFY_BAD; 309 return NOTIFY_BAD;
312 310
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
325 nb_id = amd_get_nb_id(cpu); 323 nb_id = amd_get_nb_id(cpu);
326 WARN_ON_ONCE(nb_id == BAD_APICID); 324 WARN_ON_ONCE(nb_id == BAD_APICID);
327 325
328 raw_spin_lock(&amd_nb_lock);
329
330 for_each_online_cpu(i) { 326 for_each_online_cpu(i) {
331 nb = per_cpu(cpu_hw_events, i).amd_nb; 327 nb = per_cpu(cpu_hw_events, i).amd_nb;
332 if (WARN_ON_ONCE(!nb)) 328 if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
341 337
342 cpuc->amd_nb->nb_id = nb_id; 338 cpuc->amd_nb->nb_id = nb_id;
343 cpuc->amd_nb->refcnt++; 339 cpuc->amd_nb->refcnt++;
344
345 raw_spin_unlock(&amd_nb_lock);
346} 340}
347 341
348static void amd_pmu_cpu_dead(int cpu) 342static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
354 348
355 cpuhw = &per_cpu(cpu_hw_events, cpu); 349 cpuhw = &per_cpu(cpu_hw_events, cpu);
356 350
357 raw_spin_lock(&amd_nb_lock);
358
359 if (cpuhw->amd_nb) { 351 if (cpuhw->amd_nb) {
360 struct amd_nb *nb = cpuhw->amd_nb; 352 struct amd_nb *nb = cpuhw->amd_nb;
361 353
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
364 356
365 cpuhw->amd_nb = NULL; 357 cpuhw->amd_nb = NULL;
366 } 358 }
367
368 raw_spin_unlock(&amd_nb_lock);
369} 359}
370 360
371static __initconst const struct x86_pmu amd_pmu = { 361static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c088cad1..008835c1d79c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -649,7 +649,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
649 struct hw_perf_event *hwc = &event->hw; 649 struct hw_perf_event *hwc = &event->hw;
650 650
651 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 651 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
652 if (!__get_cpu_var(cpu_hw_events).enabled) 652 if (!__this_cpu_read(cpu_hw_events.enabled))
653 return; 653 return;
654 654
655 intel_pmu_enable_bts(hwc->config); 655 intel_pmu_enable_bts(hwc->config);
@@ -679,7 +679,7 @@ static int intel_pmu_save_and_restart(struct perf_event *event)
679 679
680static void intel_pmu_reset(void) 680static void intel_pmu_reset(void)
681{ 681{
682 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; 682 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
683 unsigned long flags; 683 unsigned long flags;
684 int idx; 684 int idx;
685 685
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
816 if (ret) 816 if (ret)
817 return ret; 817 return ret;
818 818
819 if (event->attr.precise_ip &&
820 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
821 /*
822 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
823 * (0x003c) so that we can use it with PEBS.
824 *
825 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
826 * PEBS capable. However we can use INST_RETIRED.ANY_P
827 * (0x00c0), which is a PEBS capable event, to get the same
828 * count.
829 *
830 * INST_RETIRED.ANY_P counts the number of cycles that retires
831 * CNTMASK instructions. By setting CNTMASK to a value (16)
832 * larger than the maximum number of instructions that can be
833 * retired per cycle (4) and then inverting the condition, we
834 * count all cycles that retire 16 or less instructions, which
835 * is every cycle.
836 *
837 * Thereby we gain a PEBS capable cycle counter.
838 */
839 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
840
841 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
842 event->hw.config = alt_config;
843 }
844
819 if (event->attr.type != PERF_TYPE_RAW) 845 if (event->attr.type != PERF_TYPE_RAW)
820 return 0; 846 return 0;
821 847
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 81400b93e694..e56b9bfbabd1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -753,19 +753,21 @@ out:
753 753
754static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) 754static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
755{ 755{
756 int overflow = 0; 756 u64 v;
757 u32 low, high;
758 757
759 rdmsr(hwc->config_base + hwc->idx, low, high); 758 /* an official way for overflow indication */
760 759 rdmsrl(hwc->config_base + hwc->idx, v);
761 /* we need to check high bit for unflagged overflows */ 760 if (v & P4_CCCR_OVF) {
762 if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { 761 wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
763 overflow = 1; 762 return 1;
764 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
765 ((u64)low) & ~P4_CCCR_OVF);
766 } 763 }
767 764
768 return overflow; 765 /* it might be unflagged overflow */
766 rdmsrl(hwc->event_base + hwc->idx, v);
767 if (!(v & ARCH_P4_CNTRVAL_MASK))
768 return 1;
769
770 return 0;
769} 771}
770 772
771static void p4_pmu_disable_pebs(void) 773static void p4_pmu_disable_pebs(void)
@@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = {
1152 */ 1154 */
1153 .num_counters = ARCH_P4_MAX_CCCR, 1155 .num_counters = ARCH_P4_MAX_CCCR,
1154 .apic = 1, 1156 .apic = 1,
1155 .cntval_bits = 40, 1157 .cntval_bits = ARCH_P4_CNTRVAL_BITS,
1156 .cntval_mask = (1ULL << 40) - 1, 1158 .cntval_mask = ARCH_P4_CNTRVAL_MASK,
1157 .max_period = (1ULL << 39) - 1, 1159 .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
1158 .hw_config = p4_hw_config, 1160 .hw_config = p4_hw_config,
1159 .schedule_events = p4_pmu_schedule_events, 1161 .schedule_events = p4_pmu_schedule_events,
1160 /* 1162 /*
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..d5a236615501 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,32 +16,12 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/nmi.h> 19#include <asm/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_event.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr;
27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
28 unsigned int evntsel_msr; /* the MSR to select the events to handle */
29};
30
31/* Interface defining a CPU specific perfctr watchdog */
32struct wd_ops {
33 int (*reserve)(void);
34 void (*unreserve)(void);
35 int (*setup)(unsigned nmi_hz);
36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 void (*stop)(void);
38 unsigned perfctr;
39 unsigned evntsel;
40 u64 checkbit;
41};
42
43static const struct wd_ops *wd_ops;
44
45/* 25/*
46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 26 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. 27 * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 40static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 41static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 42
63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65/* converts an msr to an appropriate reservation bit */ 43/* converts an msr to an appropriate reservation bit */
66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 44static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67{ 45{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
172 clear_bit(counter, evntsel_nmi_owner); 150 clear_bit(counter, evntsel_nmi_owner);
173} 151}
174EXPORT_SYMBOL(release_evntsel_nmi); 152EXPORT_SYMBOL(release_evntsel_nmi);
175
176void disable_lapic_nmi_watchdog(void)
177{
178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180 if (atomic_read(&nmi_active) <= 0)
181 return;
182
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185 if (wd_ops)
186 wd_ops->unreserve();
187
188 BUG_ON(atomic_read(&nmi_active) != 0);
189}
190
191void enable_lapic_nmi_watchdog(void)
192{
193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195 /* are we already enabled */
196 if (atomic_read(&nmi_active) != 0)
197 return;
198
199 /* are we lapic aware */
200 if (!wd_ops)
201 return;
202 if (!wd_ops->reserve()) {
203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204 return;
205 }
206
207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208 touch_nmi_watchdog();
209}
210
211/*
212 * Activate the NMI watchdog via the local APIC.
213 */
214
215static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216{
217 u64 counter_val;
218 unsigned int retval = hz;
219
220 /*
221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222 * are writable, with higher bits sign extending from bit 31.
223 * So, we can only program the counter with 31 bit values and
224 * 32nd bit should be 1, for 33.. to be 1.
225 * Find the appropriate nmi_hz
226 */
227 counter_val = (u64)cpu_khz * 1000;
228 do_div(counter_val, retval);
229 if (counter_val > 0x7fffffffULL) {
230 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, 0x7fffffffUL);
232 retval = count + 1;
233 }
234 return retval;
235}
236
237static void write_watchdog_counter(unsigned int perfctr_msr,
238 const char *descr, unsigned nmi_hz)
239{
240 u64 count = (u64)cpu_khz * 1000;
241
242 do_div(count, nmi_hz);
243 if (descr)
244 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245 wrmsrl(perfctr_msr, 0 - count);
246}
247
248static void write_watchdog_counter32(unsigned int perfctr_msr,
249 const char *descr, unsigned nmi_hz)
250{
251 u64 count = (u64)cpu_khz * 1000;
252
253 do_div(count, nmi_hz);
254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsr(perfctr_msr, (u32)(-count), 0);
257}
258
259/*
260 * AMD K7/K8/Family10h/Family11h support.
261 * AMD keeps this interface nicely stable so there is not much variety
262 */
263#define K7_EVNTSEL_ENABLE (1 << 22)
264#define K7_EVNTSEL_INT (1 << 20)
265#define K7_EVNTSEL_OS (1 << 17)
266#define K7_EVNTSEL_USR (1 << 16)
267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270static int setup_k7_watchdog(unsigned nmi_hz)
271{
272 unsigned int perfctr_msr, evntsel_msr;
273 unsigned int evntsel;
274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276 perfctr_msr = wd_ops->perfctr;
277 evntsel_msr = wd_ops->evntsel;
278
279 wrmsrl(perfctr_msr, 0UL);
280
281 evntsel = K7_EVNTSEL_INT
282 | K7_EVNTSEL_OS
283 | K7_EVNTSEL_USR
284 | K7_NMI_EVENT;
285
286 /* setup the timer */
287 wrmsr(evntsel_msr, evntsel, 0);
288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290 /* initialize the wd struct before enabling */
291 wd->perfctr_msr = perfctr_msr;
292 wd->evntsel_msr = evntsel_msr;
293 wd->cccr_msr = 0; /* unused */
294
295 /* ok, everything is initialized, announce that we're set */
296 cpu_nmi_set_wd_enabled();
297
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 return 1;
303}
304
305static void single_msr_stop_watchdog(void)
306{
307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309 wrmsr(wd->evntsel_msr, 0, 0);
310}
311
312static int single_msr_reserve(void)
313{
314 if (!reserve_perfctr_nmi(wd_ops->perfctr))
315 return 0;
316
317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318 release_perfctr_nmi(wd_ops->perfctr);
319 return 0;
320 }
321 return 1;
322}
323
324static void single_msr_unreserve(void)
325{
326 release_evntsel_nmi(wd_ops->evntsel);
327 release_perfctr_nmi(wd_ops->perfctr);
328}
329
330static void __kprobes
331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332{
333 /* start the cycle over again */
334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335}
336
337static const struct wd_ops k7_wd_ops = {
338 .reserve = single_msr_reserve,
339 .unreserve = single_msr_unreserve,
340 .setup = setup_k7_watchdog,
341 .rearm = single_msr_rearm,
342 .stop = single_msr_stop_watchdog,
343 .perfctr = MSR_K7_PERFCTR0,
344 .evntsel = MSR_K7_EVNTSEL0,
345 .checkbit = 1ULL << 47,
346};
347
348/*
349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350 */
351#define P6_EVNTSEL0_ENABLE (1 << 22)
352#define P6_EVNTSEL_INT (1 << 20)
353#define P6_EVNTSEL_OS (1 << 17)
354#define P6_EVNTSEL_USR (1 << 16)
355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358static int setup_p6_watchdog(unsigned nmi_hz)
359{
360 unsigned int perfctr_msr, evntsel_msr;
361 unsigned int evntsel;
362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364 perfctr_msr = wd_ops->perfctr;
365 evntsel_msr = wd_ops->evntsel;
366
367 /* KVM doesn't implement this MSR */
368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369 return 0;
370
371 evntsel = P6_EVNTSEL_INT
372 | P6_EVNTSEL_OS
373 | P6_EVNTSEL_USR
374 | P6_NMI_EVENT;
375
376 /* setup the timer */
377 wrmsr(evntsel_msr, evntsel, 0);
378 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381 /* initialize the wd struct before enabling */
382 wd->perfctr_msr = perfctr_msr;
383 wd->evntsel_msr = evntsel_msr;
384 wd->cccr_msr = 0; /* unused */
385
386 /* ok, everything is initialized, announce that we're set */
387 cpu_nmi_set_wd_enabled();
388
389 apic_write(APIC_LVTPC, APIC_DM_NMI);
390 evntsel |= P6_EVNTSEL0_ENABLE;
391 wrmsr(evntsel_msr, evntsel, 0);
392
393 return 1;
394}
395
396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397{
398 /*
399 * P6 based Pentium M need to re-unmask
400 * the apic vector but it doesn't hurt
401 * other P6 variant.
402 * ArchPerfom/Core Duo also needs this
403 */
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406 /* P6/ARCH_PERFMON has 32 bit counter write */
407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408}
409
410static const struct wd_ops p6_wd_ops = {
411 .reserve = single_msr_reserve,
412 .unreserve = single_msr_unreserve,
413 .setup = setup_p6_watchdog,
414 .rearm = p6_rearm,
415 .stop = single_msr_stop_watchdog,
416 .perfctr = MSR_P6_PERFCTR0,
417 .evntsel = MSR_P6_EVNTSEL0,
418 .checkbit = 1ULL << 39,
419};
420
421/*
422 * Intel P4 performance counters.
423 * By far the most complicated of all.
424 */
425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427#define P4_ESCR_OS (1 << 3)
428#define P4_ESCR_USR (1 << 2)
429#define P4_CCCR_OVF_PMI0 (1 << 26)
430#define P4_CCCR_OVF_PMI1 (1 << 27)
431#define P4_CCCR_THRESHOLD(N) ((N) << 20)
432#define P4_CCCR_COMPLEMENT (1 << 19)
433#define P4_CCCR_COMPARE (1 << 18)
434#define P4_CCCR_REQUIRED (3 << 16)
435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
436#define P4_CCCR_ENABLE (1 << 12)
437#define P4_CCCR_OVF (1 << 31)
438
439#define P4_CONTROLS 18
440static unsigned int p4_controls[18] = {
441 MSR_P4_BPU_CCCR0,
442 MSR_P4_BPU_CCCR1,
443 MSR_P4_BPU_CCCR2,
444 MSR_P4_BPU_CCCR3,
445 MSR_P4_MS_CCCR0,
446 MSR_P4_MS_CCCR1,
447 MSR_P4_MS_CCCR2,
448 MSR_P4_MS_CCCR3,
449 MSR_P4_FLAME_CCCR0,
450 MSR_P4_FLAME_CCCR1,
451 MSR_P4_FLAME_CCCR2,
452 MSR_P4_FLAME_CCCR3,
453 MSR_P4_IQ_CCCR0,
454 MSR_P4_IQ_CCCR1,
455 MSR_P4_IQ_CCCR2,
456 MSR_P4_IQ_CCCR3,
457 MSR_P4_IQ_CCCR4,
458 MSR_P4_IQ_CCCR5,
459};
460/*
461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462 * CRU_ESCR0 (with any non-null event selector) through a complemented
463 * max threshold. [IA32-Vol3, Section 14.9.9]
464 */
465static int setup_p4_watchdog(unsigned nmi_hz)
466{
467 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468 unsigned int evntsel, cccr_val;
469 unsigned int misc_enable, dummy;
470 unsigned int ht_num;
471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475 return 0;
476
477#ifdef CONFIG_SMP
478 /* detect which hyperthread we are on */
479 if (smp_num_siblings == 2) {
480 unsigned int ebx, apicid;
481
482 ebx = cpuid_ebx(1);
483 apicid = (ebx >> 24) & 0xff;
484 ht_num = apicid & 1;
485 } else
486#endif
487 ht_num = 0;
488
489 /*
490 * performance counters are shared resources
491 * assign each hyperthread its own set
492 * (re-use the ESCR0 register, seems safe
493 * and keeps the cccr_val the same)
494 */
495 if (!ht_num) {
496 /* logical cpu 0 */
497 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498 evntsel_msr = MSR_P4_CRU_ESCR0;
499 cccr_msr = MSR_P4_IQ_CCCR0;
500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502 /*
503 * If we're on the kdump kernel or other situation, we may
504 * still have other performance counter registers set to
505 * interrupt and they'll keep interrupting forever because
506 * of the P4_CCCR_OVF quirk. So we need to ACK all the
507 * pending interrupts and disable all the registers here,
508 * before reenabling the NMI delivery. Refer to p4_rearm()
509 * about the P4_CCCR_OVF quirk.
510 */
511 if (reset_devices) {
512 unsigned int low, high;
513 int i;
514
515 for (i = 0; i < P4_CONTROLS; i++) {
516 rdmsr(p4_controls[i], low, high);
517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518 wrmsr(p4_controls[i], low, high);
519 }
520 }
521 } else {
522 /* logical cpu 1 */
523 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524 evntsel_msr = MSR_P4_CRU_ESCR0;
525 cccr_msr = MSR_P4_IQ_CCCR1;
526
527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529 cccr_val = P4_CCCR_OVF_PMI0;
530 else
531 cccr_val = P4_CCCR_OVF_PMI1;
532 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533 }
534
535 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536 | P4_ESCR_OS
537 | P4_ESCR_USR;
538
539 cccr_val |= P4_CCCR_THRESHOLD(15)
540 | P4_CCCR_COMPLEMENT
541 | P4_CCCR_COMPARE
542 | P4_CCCR_REQUIRED;
543
544 wrmsr(evntsel_msr, evntsel, 0);
545 wrmsr(cccr_msr, cccr_val, 0);
546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548 wd->perfctr_msr = perfctr_msr;
549 wd->evntsel_msr = evntsel_msr;
550 wd->cccr_msr = cccr_msr;
551
552 /* ok, everything is initialized, announce that we're set */
553 cpu_nmi_set_wd_enabled();
554
555 apic_write(APIC_LVTPC, APIC_DM_NMI);
556 cccr_val |= P4_CCCR_ENABLE;
557 wrmsr(cccr_msr, cccr_val, 0);
558 return 1;
559}
560
561static void stop_p4_watchdog(void)
562{
563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564 wrmsr(wd->cccr_msr, 0, 0);
565 wrmsr(wd->evntsel_msr, 0, 0);
566}
567
568static int p4_reserve(void)
569{
570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571 return 0;
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574 goto fail1;
575#endif
576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577 goto fail2;
578 /* RED-PEN why is ESCR1 not reserved here? */
579 return 1;
580 fail2:
581#ifdef CONFIG_SMP
582 if (smp_num_siblings > 1)
583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584 fail1:
585#endif
586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587 return 0;
588}
589
590static void p4_unreserve(void)
591{
592#ifdef CONFIG_SMP
593 if (smp_num_siblings > 1)
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595#endif
596 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598}
599
600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601{
602 unsigned dummy;
603 /*
604 * P4 quirks:
605 * - An overflown perfctr will assert its interrupt
606 * until the OVF flag in its CCCR is cleared.
607 * - LVTPC is masked on interrupt and must be
608 * unmasked by the LVTPC handler.
609 */
610 rdmsrl(wd->cccr_msr, dummy);
611 dummy &= ~P4_CCCR_OVF;
612 wrmsrl(wd->cccr_msr, dummy);
613 apic_write(APIC_LVTPC, APIC_DM_NMI);
614 /* start the cycle over again */
615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616}
617
618static const struct wd_ops p4_wd_ops = {
619 .reserve = p4_reserve,
620 .unreserve = p4_unreserve,
621 .setup = setup_p4_watchdog,
622 .rearm = p4_rearm,
623 .stop = stop_p4_watchdog,
624 /* RED-PEN this is wrong for the other sibling */
625 .perfctr = MSR_P4_BPU_PERFCTR0,
626 .evntsel = MSR_P4_BSU_ESCR0,
627 .checkbit = 1ULL << 39,
628};
629
630/*
631 * Watchdog using the Intel architected PerfMon.
632 * Used for Core2 and hopefully all future Intel CPUs.
633 */
634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637static struct wd_ops intel_arch_wd_ops;
638
639static int setup_intel_arch_watchdog(unsigned nmi_hz)
640{
641 unsigned int ebx;
642 union cpuid10_eax eax;
643 unsigned int unused;
644 unsigned int perfctr_msr, evntsel_msr;
645 unsigned int evntsel;
646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648 /*
649 * Check whether the Architectural PerfMon supports
650 * Unhalted Core Cycles Event or not.
651 * NOTE: Corresponding bit = 0 in ebx indicates event present.
652 */
653 cpuid(10, &(eax.full), &ebx, &unused, &unused);
654 if ((eax.split.mask_length <
655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657 return 0;
658
659 perfctr_msr = wd_ops->perfctr;
660 evntsel_msr = wd_ops->evntsel;
661
662 wrmsrl(perfctr_msr, 0UL);
663
664 evntsel = ARCH_PERFMON_EVENTSEL_INT
665 | ARCH_PERFMON_EVENTSEL_OS
666 | ARCH_PERFMON_EVENTSEL_USR
667 | ARCH_PERFMON_NMI_EVENT_SEL
668 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670 /* setup the timer */
671 wrmsr(evntsel_msr, evntsel, 0);
672 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675 wd->perfctr_msr = perfctr_msr;
676 wd->evntsel_msr = evntsel_msr;
677 wd->cccr_msr = 0; /* unused */
678
679 /* ok, everything is initialized, announce that we're set */
680 cpu_nmi_set_wd_enabled();
681
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1;
687}
688
689static struct wd_ops intel_arch_wd_ops __read_mostly = {
690 .reserve = single_msr_reserve,
691 .unreserve = single_msr_unreserve,
692 .setup = setup_intel_arch_watchdog,
693 .rearm = p6_rearm,
694 .stop = single_msr_stop_watchdog,
695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
697};
698
699static void probe_nmi_watchdog(void)
700{
701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 == 6 ||
704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 wd_ops = &k7_wd_ops;
706 return;
707 case X86_VENDOR_INTEL:
708 /* Work around where perfctr1 doesn't have a working enable
709 * bit as described in the following errata:
710 * AE49 Core Duo and Intel Core Solo 65 nm
711 * AN49 Intel Pentium Dual-Core
712 * AF49 Dual-Core Intel Xeon Processor LV
713 */
714 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716 boot_cpu_data.x86_mask == 4))) {
717 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719 }
720 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721 wd_ops = &intel_arch_wd_ops;
722 break;
723 }
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 13)
727 return;
728
729 wd_ops = &p6_wd_ops;
730 break;
731 case 15:
732 wd_ops = &p4_wd_ops;
733 break;
734 default:
735 return;
736 }
737 break;
738 }
739}
740
741/* Interface to nmi.c */
742
743int lapic_watchdog_init(unsigned nmi_hz)
744{
745 if (!wd_ops) {
746 probe_nmi_watchdog();
747 if (!wd_ops) {
748 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749 return -1;
750 }
751
752 if (!wd_ops->reserve()) {
753 printk(KERN_ERR
754 "NMI watchdog: cannot reserve perfctrs\n");
755 return -1;
756 }
757 }
758
759 if (!(wd_ops->setup(nmi_hz))) {
760 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761 raw_smp_processor_id());
762 return -1;
763 }
764
765 return 0;
766}
767
768void lapic_watchdog_stop(void)
769{
770 if (wd_ops)
771 wd_ops->stop();
772}
773
774unsigned lapic_adjust_nmi_hz(unsigned hz)
775{
776 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779 hz = adjust_for_32bit_ctr(hz);
780 return hz;
781}
782
783int __kprobes lapic_wd_event(unsigned nmi_hz)
784{
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 ctr;
787
788 rdmsrl(wd->perfctr_msr, ctr);
789 if (ctr & wd_ops->checkbit) /* perfctr still running? */
790 return 0;
791
792 wd_ops->rearm(wd, nmi_hz);
793 return 1;
794}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..d6fb146c0d8b 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
175 175
176void 176void
177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
178 unsigned long *stack, unsigned long bp, char *log_lvl) 178 unsigned long *stack, char *log_lvl)
179{ 179{
180 printk("%sCall Trace:\n", log_lvl); 180 printk("%sCall Trace:\n", log_lvl);
181 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 181 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
182} 182}
183 183
184void show_trace(struct task_struct *task, struct pt_regs *regs, 184void show_trace(struct task_struct *task, struct pt_regs *regs,
185 unsigned long *stack, unsigned long bp) 185 unsigned long *stack)
186{ 186{
187 show_trace_log_lvl(task, regs, stack, bp, ""); 187 show_trace_log_lvl(task, regs, stack, "");
188} 188}
189 189
190void show_stack(struct task_struct *task, unsigned long *sp) 190void show_stack(struct task_struct *task, unsigned long *sp)
191{ 191{
192 show_stack_log_lvl(task, NULL, sp, 0, ""); 192 show_stack_log_lvl(task, NULL, sp, "");
193} 193}
194 194
195/* 195/*
@@ -197,20 +197,14 @@ void show_stack(struct task_struct *task, unsigned long *sp)
197 */ 197 */
198void dump_stack(void) 198void dump_stack(void)
199{ 199{
200 unsigned long bp = 0;
201 unsigned long stack; 200 unsigned long stack;
202 201
203#ifdef CONFIG_FRAME_POINTER
204 if (!bp)
205 get_bp(bp);
206#endif
207
208 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 202 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
209 current->pid, current->comm, print_tainted(), 203 current->pid, current->comm, print_tainted(),
210 init_utsname()->release, 204 init_utsname()->release,
211 (int)strcspn(init_utsname()->version, " "), 205 (int)strcspn(init_utsname()->version, " "),
212 init_utsname()->version); 206 init_utsname()->version);
213 show_trace(NULL, NULL, &stack, bp); 207 show_trace(NULL, NULL, &stack);
214} 208}
215EXPORT_SYMBOL(dump_stack); 209EXPORT_SYMBOL(dump_stack);
216 210
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19 19
20void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task,
21 unsigned long *stack, unsigned long bp, 21 struct pt_regs *regs, unsigned long *stack,
22 const struct stacktrace_ops *ops, void *data) 22 const struct stacktrace_ops *ops, void *data)
23{ 23{
24 int graph = 0; 24 int graph = 0;
25 unsigned long bp;
25 26
26 if (!task) 27 if (!task)
27 task = current; 28 task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
34 stack = (unsigned long *)task->thread.sp; 35 stack = (unsigned long *)task->thread.sp;
35 } 36 }
36 37
37#ifdef CONFIG_FRAME_POINTER 38 bp = stack_frame(task, regs);
38 if (!bp) {
39 if (task == current) {
40 /* Grab bp right from our regs */
41 get_bp(bp);
42 } else {
43 /* bp is the last reg pushed by switch_to */
44 bp = *(unsigned long *) task->thread.sp;
45 }
46 }
47#endif
48
49 for (;;) { 39 for (;;) {
50 struct thread_info *context; 40 struct thread_info *context;
51 41
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
65 55
66void 56void
67show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 57show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
68 unsigned long *sp, unsigned long bp, char *log_lvl) 58 unsigned long *sp, char *log_lvl)
69{ 59{
70 unsigned long *stack; 60 unsigned long *stack;
71 int i; 61 int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
87 touch_nmi_watchdog(); 77 touch_nmi_watchdog();
88 } 78 }
89 printk(KERN_CONT "\n"); 79 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 80 show_trace_log_lvl(task, regs, sp, log_lvl);
91} 81}
92 82
93 83
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
112 u8 *ip; 102 u8 *ip;
113 103
114 printk(KERN_EMERG "Stack:\n"); 104 printk(KERN_EMERG "Stack:\n");
115 show_stack_log_lvl(NULL, regs, &regs->sp, 105 show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
116 0, KERN_EMERG);
117 106
118 printk(KERN_EMERG "Code: "); 107 printk(KERN_EMERG "Code: ");
119 108
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
140 */ 140 */
141 141
142void dump_trace(struct task_struct *task, struct pt_regs *regs, 142void dump_trace(struct task_struct *task,
143 unsigned long *stack, unsigned long bp, 143 struct pt_regs *regs, unsigned long *stack,
144 const struct stacktrace_ops *ops, void *data) 144 const struct stacktrace_ops *ops, void *data)
145{ 145{
146 const unsigned cpu = get_cpu(); 146 const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 unsigned used = 0; 149 unsigned used = 0;
150 struct thread_info *tinfo; 150 struct thread_info *tinfo;
151 int graph = 0; 151 int graph = 0;
152 unsigned long bp;
152 153
153 if (!task) 154 if (!task)
154 task = current; 155 task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *)task->thread.sp; 161 stack = (unsigned long *)task->thread.sp;
161 } 162 }
162 163
163#ifdef CONFIG_FRAME_POINTER 164 bp = stack_frame(task, regs);
164 if (!bp) {
165 if (task == current) {
166 /* Grab bp right from our regs */
167 get_bp(bp);
168 } else {
169 /* bp is the last reg pushed by switch_to */
170 bp = *(unsigned long *) task->thread.sp;
171 }
172 }
173#endif
174
175 /* 165 /*
176 * Print function call entries in all stacks, starting at the 166 * Print function call entries in all stacks, starting at the
177 * current stack address. If the stacks consist of nested 167 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
235 225
236void 226void
237show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 227show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
238 unsigned long *sp, unsigned long bp, char *log_lvl) 228 unsigned long *sp, char *log_lvl)
239{ 229{
240 unsigned long *irq_stack_end; 230 unsigned long *irq_stack_end;
241 unsigned long *irq_stack; 231 unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
279 preempt_enable(); 269 preempt_enable();
280 270
281 printk(KERN_CONT "\n"); 271 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 272 show_trace_log_lvl(task, regs, sp, log_lvl);
283} 273}
284 274
285void show_registers(struct pt_regs *regs) 275void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
308 298
309 printk(KERN_EMERG "Stack:\n"); 299 printk(KERN_EMERG "Stack:\n");
310 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 300 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
311 regs->bp, KERN_EMERG); 301 KERN_EMERG);
312 302
313 printk(KERN_EMERG "Code: "); 303 printk(KERN_EMERG "Code: ");
314 304
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 4572f25f9325..cd28a350f7f9 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
240 if (!strncmp(buf, "xen", 3)) 240 if (!strncmp(buf, "xen", 3))
241 early_console_register(&xenboot_console, keep); 241 early_console_register(&xenboot_console, keep);
242#endif 242#endif
243#ifdef CONFIG_X86_MRST_EARLY_PRINTK 243#ifdef CONFIG_EARLY_PRINTK_MRST
244 if (!strncmp(buf, "mrst", 4)) { 244 if (!strncmp(buf, "mrst", 4)) {
245 mrst_early_console_init(); 245 mrst_early_console_init();
246 early_console_register(&early_mrst_console, keep); 246 early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
250 hsu_early_console_init(); 250 hsu_early_console_init();
251 early_console_register(&early_hsu_console, keep); 251 early_console_register(&early_hsu_console, keep);
252 } 252 }
253
254#endif 253#endif
255 buf++; 254 buf++;
256 } 255 }
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c
deleted file mode 100644
index 65df603622b2..000000000000
--- a/arch/x86/kernel/early_printk_mrst.c
+++ /dev/null
@@ -1,319 +0,0 @@
1/*
2 * early_printk_mrst.c - early consoles for Intel MID platforms
3 *
4 * Copyright (c) 2008-2010, Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11
12/*
13 * This file implements two early consoles named mrst and hsu.
14 * mrst is based on Maxim3110 spi-uart device, it exists in both
15 * Moorestown and Medfield platforms, while hsu is based on a High
16 * Speed UART device which only exists in the Medfield platform
17 */
18
19#include <linux/serial_reg.h>
20#include <linux/serial_mfd.h>
21#include <linux/kmsg_dump.h>
22#include <linux/console.h>
23#include <linux/kernel.h>
24#include <linux/delay.h>
25#include <linux/init.h>
26#include <linux/io.h>
27
28#include <asm/fixmap.h>
29#include <asm/pgtable.h>
30#include <asm/mrst.h>
31
32#define MRST_SPI_TIMEOUT 0x200000
33#define MRST_REGBASE_SPI0 0xff128000
34#define MRST_REGBASE_SPI1 0xff128400
35#define MRST_CLK_SPI0_REG 0xff11d86c
36
37/* Bit fields in CTRLR0 */
38#define SPI_DFS_OFFSET 0
39
40#define SPI_FRF_OFFSET 4
41#define SPI_FRF_SPI 0x0
42#define SPI_FRF_SSP 0x1
43#define SPI_FRF_MICROWIRE 0x2
44#define SPI_FRF_RESV 0x3
45
46#define SPI_MODE_OFFSET 6
47#define SPI_SCPH_OFFSET 6
48#define SPI_SCOL_OFFSET 7
49#define SPI_TMOD_OFFSET 8
50#define SPI_TMOD_TR 0x0 /* xmit & recv */
51#define SPI_TMOD_TO 0x1 /* xmit only */
52#define SPI_TMOD_RO 0x2 /* recv only */
53#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */
54
55#define SPI_SLVOE_OFFSET 10
56#define SPI_SRL_OFFSET 11
57#define SPI_CFS_OFFSET 12
58
59/* Bit fields in SR, 7 bits */
60#define SR_MASK 0x7f /* cover 7 bits */
61#define SR_BUSY (1 << 0)
62#define SR_TF_NOT_FULL (1 << 1)
63#define SR_TF_EMPT (1 << 2)
64#define SR_RF_NOT_EMPT (1 << 3)
65#define SR_RF_FULL (1 << 4)
66#define SR_TX_ERR (1 << 5)
67#define SR_DCOL (1 << 6)
68
69struct dw_spi_reg {
70 u32 ctrl0;
71 u32 ctrl1;
72 u32 ssienr;
73 u32 mwcr;
74 u32 ser;
75 u32 baudr;
76 u32 txfltr;
77 u32 rxfltr;
78 u32 txflr;
79 u32 rxflr;
80 u32 sr;
81 u32 imr;
82 u32 isr;
83 u32 risr;
84 u32 txoicr;
85 u32 rxoicr;
86 u32 rxuicr;
87 u32 msticr;
88 u32 icr;
89 u32 dmacr;
90 u32 dmatdlr;
91 u32 dmardlr;
92 u32 idr;
93 u32 version;
94
95 /* Currently operates as 32 bits, though only the low 16 bits matter */
96 u32 dr;
97} __packed;
98
99#define dw_readl(dw, name) __raw_readl(&(dw)->name)
100#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name)
101
102/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
103static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
104
105static u32 *pclk_spi0;
106/* Always contains an accessable address, start with 0 */
107static struct dw_spi_reg *pspi;
108
109static struct kmsg_dumper dw_dumper;
110static int dumper_registered;
111
112static void dw_kmsg_dump(struct kmsg_dumper *dumper,
113 enum kmsg_dump_reason reason,
114 const char *s1, unsigned long l1,
115 const char *s2, unsigned long l2)
116{
117 int i;
118
119 /* When run to this, we'd better re-init the HW */
120 mrst_early_console_init();
121
122 for (i = 0; i < l1; i++)
123 early_mrst_console.write(&early_mrst_console, s1 + i, 1);
124 for (i = 0; i < l2; i++)
125 early_mrst_console.write(&early_mrst_console, s2 + i, 1);
126}
127
128/* Set the ratio rate to 115200, 8n1, IRQ disabled */
129static void max3110_write_config(void)
130{
131 u16 config;
132
133 config = 0xc001;
134 dw_writel(pspi, dr, config);
135}
136
137/* Translate char to a eligible word and send to max3110 */
138static void max3110_write_data(char c)
139{
140 u16 data;
141
142 data = 0x8000 | c;
143 dw_writel(pspi, dr, data);
144}
145
146void mrst_early_console_init(void)
147{
148 u32 ctrlr0 = 0;
149 u32 spi0_cdiv;
150 u32 freq; /* Freqency info only need be searched once */
151
152 /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
153 pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
154 MRST_CLK_SPI0_REG);
155 spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
156 freq = 100000000 / (spi0_cdiv + 1);
157
158 if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
159 mrst_spi_paddr = MRST_REGBASE_SPI1;
160
161 pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
162 mrst_spi_paddr);
163
164 /* Disable SPI controller */
165 dw_writel(pspi, ssienr, 0);
166
167 /* Set control param, 8 bits, transmit only mode */
168 ctrlr0 = dw_readl(pspi, ctrl0);
169
170 ctrlr0 &= 0xfcc0;
171 ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
172 | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
173 dw_writel(pspi, ctrl0, ctrlr0);
174
175 /*
176 * Change the spi0 clk to comply with 115200 bps, use 100000 to
177 * calculate the clk dividor to make the clock a little slower
178 * than real baud rate.
179 */
180 dw_writel(pspi, baudr, freq/100000);
181
182 /* Disable all INT for early phase */
183 dw_writel(pspi, imr, 0x0);
184
185 /* Set the cs to spi-uart */
186 dw_writel(pspi, ser, 0x2);
187
188 /* Enable the HW, the last step for HW init */
189 dw_writel(pspi, ssienr, 0x1);
190
191 /* Set the default configuration */
192 max3110_write_config();
193
194 /* Register the kmsg dumper */
195 if (!dumper_registered) {
196 dw_dumper.dump = dw_kmsg_dump;
197 kmsg_dump_register(&dw_dumper);
198 dumper_registered = 1;
199 }
200}
201
202/* Slave select should be called in the read/write function */
203static void early_mrst_spi_putc(char c)
204{
205 unsigned int timeout;
206 u32 sr;
207
208 timeout = MRST_SPI_TIMEOUT;
209 /* Early putc needs to make sure the TX FIFO is not full */
210 while (--timeout) {
211 sr = dw_readl(pspi, sr);
212 if (!(sr & SR_TF_NOT_FULL))
213 cpu_relax();
214 else
215 break;
216 }
217
218 if (!timeout)
219 pr_warning("MRST earlycon: timed out\n");
220 else
221 max3110_write_data(c);
222}
223
224/* Early SPI only uses polling mode */
225static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
226{
227 int i;
228
229 for (i = 0; i < n && *str; i++) {
230 if (*str == '\n')
231 early_mrst_spi_putc('\r');
232 early_mrst_spi_putc(*str);
233 str++;
234 }
235}
236
237struct console early_mrst_console = {
238 .name = "earlymrst",
239 .write = early_mrst_spi_write,
240 .flags = CON_PRINTBUFFER,
241 .index = -1,
242};
243
244/*
245 * Following is the early console based on Medfield HSU (High
246 * Speed UART) device.
247 */
248#define HSU_PORT2_PADDR 0xffa28180
249
250static void __iomem *phsu;
251
252void hsu_early_console_init(void)
253{
254 u8 lcr;
255
256 phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
257 HSU_PORT2_PADDR);
258
259 /* Disable FIFO */
260 writeb(0x0, phsu + UART_FCR);
261
262 /* Set to default 115200 bps, 8n1 */
263 lcr = readb(phsu + UART_LCR);
264 writeb((0x80 | lcr), phsu + UART_LCR);
265 writeb(0x18, phsu + UART_DLL);
266 writeb(lcr, phsu + UART_LCR);
267 writel(0x3600, phsu + UART_MUL*4);
268
269 writeb(0x8, phsu + UART_MCR);
270 writeb(0x7, phsu + UART_FCR);
271 writeb(0x3, phsu + UART_LCR);
272
273 /* Clear IRQ status */
274 readb(phsu + UART_LSR);
275 readb(phsu + UART_RX);
276 readb(phsu + UART_IIR);
277 readb(phsu + UART_MSR);
278
279 /* Enable FIFO */
280 writeb(0x7, phsu + UART_FCR);
281}
282
283#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
284
285static void early_hsu_putc(char ch)
286{
287 unsigned int timeout = 10000; /* 10ms */
288 u8 status;
289
290 while (--timeout) {
291 status = readb(phsu + UART_LSR);
292 if (status & BOTH_EMPTY)
293 break;
294 udelay(1);
295 }
296
297 /* Only write the char when there was no timeout */
298 if (timeout)
299 writeb(ch, phsu + UART_TX);
300}
301
302static void early_hsu_write(struct console *con, const char *str, unsigned n)
303{
304 int i;
305
306 for (i = 0; i < n && *str; i++) {
307 if (*str == '\n')
308 early_hsu_putc('\r');
309 early_hsu_putc(*str);
310 str++;
311 }
312}
313
314struct console early_hsu_console = {
315 .name = "earlyhsu",
316 .write = early_hsu_write,
317 .flags = CON_PRINTBUFFER,
318 .index = -1,
319};
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417e8697..d3b895f375d3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64)
299ENTRY(save_args) 299ENTRY(save_args)
300 XCPT_FRAME 300 XCPT_FRAME
301 cld 301 cld
302 movq_cfi rdi, RDI+16-ARGOFFSET 302 /*
303 movq_cfi rsi, RSI+16-ARGOFFSET 303 * start from rbp in pt_regs and jump over
304 movq_cfi rdx, RDX+16-ARGOFFSET 304 * return address.
305 movq_cfi rcx, RCX+16-ARGOFFSET 305 */
306 movq_cfi rax, RAX+16-ARGOFFSET 306 movq_cfi rdi, RDI+8-RBP
307 movq_cfi r8, R8+16-ARGOFFSET 307 movq_cfi rsi, RSI+8-RBP
308 movq_cfi r9, R9+16-ARGOFFSET 308 movq_cfi rdx, RDX+8-RBP
309 movq_cfi r10, R10+16-ARGOFFSET 309 movq_cfi rcx, RCX+8-RBP
310 movq_cfi r11, R11+16-ARGOFFSET 310 movq_cfi rax, RAX+8-RBP
311 311 movq_cfi r8, R8+8-RBP
312 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ 312 movq_cfi r9, R9+8-RBP
313 movq_cfi r10, R10+8-RBP
314 movq_cfi r11, R11+8-RBP
315
316 leaq -RBP+8(%rsp),%rdi /* arg1 for handler */
313 movq_cfi rbp, 8 /* push %rbp */ 317 movq_cfi rbp, 8 /* push %rbp */
314 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 318 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
315 testl $3, CS(%rdi) 319 testl $3, CS(%rdi)
@@ -782,8 +786,9 @@ END(interrupt)
782 786
783/* 0(%rsp): ~(interrupt number) */ 787/* 0(%rsp): ~(interrupt number) */
784 .macro interrupt func 788 .macro interrupt func
785 subq $ORIG_RAX-ARGOFFSET+8, %rsp 789 /* reserve pt_regs for scratch regs and rbp */
786 CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 790 subq $ORIG_RAX-RBP, %rsp
791 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
787 call save_args 792 call save_args
788 PARTIAL_FRAME 0 793 PARTIAL_FRAME 0
789 call \func 794 call \func
@@ -808,9 +813,14 @@ ret_from_intr:
808 TRACE_IRQS_OFF 813 TRACE_IRQS_OFF
809 decl PER_CPU_VAR(irq_count) 814 decl PER_CPU_VAR(irq_count)
810 leaveq 815 leaveq
816
811 CFI_RESTORE rbp 817 CFI_RESTORE rbp
812 CFI_DEF_CFA_REGISTER rsp 818 CFI_DEF_CFA_REGISTER rsp
813 CFI_ADJUST_CFA_OFFSET -8 819 CFI_ADJUST_CFA_OFFSET -8
820
821 /* we did not save rbx, restore only from ARGOFFSET */
822 addq $8, %rsp
823 CFI_ADJUST_CFA_OFFSET -8
814exit_intr: 824exit_intr:
815 GET_THREAD_INFO(%rcx) 825 GET_THREAD_INFO(%rcx)
816 testl $3,CS-ARGOFFSET(%rsp) 826 testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3afb33f14d2d..382eb2936d4d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -19,6 +19,7 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/module.h>
22 23
23#include <trace/syscall.h> 24#include <trace/syscall.h>
24 25
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
49int ftrace_arch_code_modify_prepare(void) 50int ftrace_arch_code_modify_prepare(void)
50{ 51{
51 set_kernel_text_rw(); 52 set_kernel_text_rw();
53 set_all_modules_text_rw();
52 modifying_code = 1; 54 modifying_code = 1;
53 return 0; 55 return 0;
54} 56}
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
56int ftrace_arch_code_modify_post_process(void) 58int ftrace_arch_code_modify_post_process(void)
57{ 59{
58 modifying_code = 0; 60 modifying_code = 0;
61 set_all_modules_text_ro();
59 set_kernel_text_ro(); 62 set_kernel_text_ro();
60 return 0; 63 return 0;
61} 64}
@@ -167,9 +170,9 @@ static void ftrace_mod_code(void)
167 170
168void ftrace_nmi_enter(void) 171void ftrace_nmi_enter(void)
169{ 172{
170 __get_cpu_var(save_modifying_code) = modifying_code; 173 __this_cpu_write(save_modifying_code, modifying_code);
171 174
172 if (!__get_cpu_var(save_modifying_code)) 175 if (!__this_cpu_read(save_modifying_code))
173 return; 176 return;
174 177
175 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
@@ -183,7 +186,7 @@ void ftrace_nmi_enter(void)
183 186
184void ftrace_nmi_exit(void) 187void ftrace_nmi_exit(void)
185{ 188{
186 if (!__get_cpu_var(save_modifying_code)) 189 if (!__this_cpu_read(save_modifying_code))
187 return; 190 return;
188 191
189 /* Finish all executions before clearing nmi_running */ 192 /* Finish all executions before clearing nmi_running */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 763310165fa0..7f138b3c3c52 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
61 case X86_SUBARCH_MRST: 61 case X86_SUBARCH_MRST:
62 x86_mrst_early_setup(); 62 x86_mrst_early_setup();
63 break; 63 break;
64 case X86_SUBARCH_CE4100:
65 x86_ce4100_early_setup();
66 break;
64 default: 67 default:
65 i386_default_early_setup(); 68 i386_default_early_setup();
66 break; 69 break;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c0dbd9ac24f0..9f54b209c378 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -139,39 +139,6 @@ ENTRY(startup_32)
139 movl %eax, pa(olpc_ofw_pgd) 139 movl %eax, pa(olpc_ofw_pgd)
140#endif 140#endif
141 141
142#ifdef CONFIG_PARAVIRT
143 /* This is can only trip for a broken bootloader... */
144 cmpw $0x207, pa(boot_params + BP_version)
145 jb default_entry
146
147 /* Paravirt-compatible boot parameters. Look to see what architecture
148 we're booting under. */
149 movl pa(boot_params + BP_hardware_subarch), %eax
150 cmpl $num_subarch_entries, %eax
151 jae bad_subarch
152
153 movl pa(subarch_entries)(,%eax,4), %eax
154 subl $__PAGE_OFFSET, %eax
155 jmp *%eax
156
157bad_subarch:
158WEAK(lguest_entry)
159WEAK(xen_entry)
160 /* Unknown implementation; there's really
161 nothing we can do at this point. */
162 ud2a
163
164 __INITDATA
165
166subarch_entries:
167 .long default_entry /* normal x86/PC */
168 .long lguest_entry /* lguest hypervisor */
169 .long xen_entry /* Xen hypervisor */
170 .long default_entry /* Moorestown MID */
171num_subarch_entries = (. - subarch_entries) / 4
172.previous
173#endif /* CONFIG_PARAVIRT */
174
175/* 142/*
176 * Initialize page tables. This creates a PDE and a set of page 143 * Initialize page tables. This creates a PDE and a set of page
177 * tables, which are located immediately beyond __brk_base. The variable 144 * tables, which are located immediately beyond __brk_base. The variable
@@ -181,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4
181 * 148 *
182 * Note that the stack is not yet set up! 149 * Note that the stack is not yet set up!
183 */ 150 */
184default_entry:
185#ifdef CONFIG_X86_PAE 151#ifdef CONFIG_X86_PAE
186 152
187 /* 153 /*
@@ -261,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
261 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax 227 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
262 movl %eax,pa(initial_page_table+0xffc) 228 movl %eax,pa(initial_page_table+0xffc)
263#endif 229#endif
264 jmp 3f 230
231#ifdef CONFIG_PARAVIRT
232 /* This is can only trip for a broken bootloader... */
233 cmpw $0x207, pa(boot_params + BP_version)
234 jb default_entry
235
236 /* Paravirt-compatible boot parameters. Look to see what architecture
237 we're booting under. */
238 movl pa(boot_params + BP_hardware_subarch), %eax
239 cmpl $num_subarch_entries, %eax
240 jae bad_subarch
241
242 movl pa(subarch_entries)(,%eax,4), %eax
243 subl $__PAGE_OFFSET, %eax
244 jmp *%eax
245
246bad_subarch:
247WEAK(lguest_entry)
248WEAK(xen_entry)
249 /* Unknown implementation; there's really
250 nothing we can do at this point. */
251 ud2a
252
253 __INITDATA
254
255subarch_entries:
256 .long default_entry /* normal x86/PC */
257 .long lguest_entry /* lguest hypervisor */
258 .long xen_entry /* Xen hypervisor */
259 .long default_entry /* Moorestown MID */
260num_subarch_entries = (. - subarch_entries) / 4
261.previous
262#else
263 jmp default_entry
264#endif /* CONFIG_PARAVIRT */
265
265/* 266/*
266 * Non-boot CPU entry point; entered from trampoline.S 267 * Non-boot CPU entry point; entered from trampoline.S
267 * We can't lgdt here, because lgdt itself uses a data segment, but 268 * We can't lgdt here, because lgdt itself uses a data segment, but
@@ -282,7 +283,7 @@ ENTRY(startup_32_smp)
282 movl %eax,%fs 283 movl %eax,%fs
283 movl %eax,%gs 284 movl %eax,%gs
284#endif /* CONFIG_SMP */ 285#endif /* CONFIG_SMP */
2853: 286default_entry:
286 287
287/* 288/*
288 * New page tables may be in 4Mbyte page mode and may 289 * New page tables may be in 4Mbyte page mode and may
@@ -316,6 +317,10 @@ ENTRY(startup_32_smp)
316 subl $0x80000001, %eax 317 subl $0x80000001, %eax
317 cmpl $(0x8000ffff-0x80000001), %eax 318 cmpl $(0x8000ffff-0x80000001), %eax
318 ja 6f 319 ja 6f
320
321 /* Clear bogus XD_DISABLE bits */
322 call verify_cpu
323
319 mov $0x80000001, %eax 324 mov $0x80000001, %eax
320 cpuid 325 cpuid
321 /* Execute Disable bit supported? */ 326 /* Execute Disable bit supported? */
@@ -611,6 +616,8 @@ ignore_int:
611#endif 616#endif
612 iret 617 iret
613 618
619#include "verify_cpu.S"
620
614 __REFDATA 621 __REFDATA
615.align 4 622.align 4
616ENTRY(initial_code) 623ENTRY(initial_code)
@@ -622,13 +629,13 @@ ENTRY(initial_code)
622__PAGE_ALIGNED_BSS 629__PAGE_ALIGNED_BSS
623 .align PAGE_SIZE_asm 630 .align PAGE_SIZE_asm
624#ifdef CONFIG_X86_PAE 631#ifdef CONFIG_X86_PAE
625ENTRY(initial_pg_pmd) 632initial_pg_pmd:
626 .fill 1024*KPMDS,4,0 633 .fill 1024*KPMDS,4,0
627#else 634#else
628ENTRY(initial_page_table) 635ENTRY(initial_page_table)
629 .fill 1024,4,0 636 .fill 1024,4,0
630#endif 637#endif
631ENTRY(initial_pg_fixmap) 638initial_pg_fixmap:
632 .fill 1024,4,0 639 .fill 1024,4,0
633ENTRY(empty_zero_page) 640ENTRY(empty_zero_page)
634 .fill 4096,1,0 641 .fill 4096,1,0
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 42c594254507..02f07634d265 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
122 return -EBUSY; 122 return -EBUSY;
123 123
124 set_debugreg(info->address, i); 124 set_debugreg(info->address, i);
125 __get_cpu_var(cpu_debugreg[i]) = info->address; 125 __this_cpu_write(cpu_debugreg[i], info->address);
126 126
127 dr7 = &__get_cpu_var(cpu_dr7); 127 dr7 = &__get_cpu_var(cpu_dr7);
128 *dr7 |= encode_dr7(i, info->len, info->type); 128 *dr7 |= encode_dr7(i, info->len, info->type);
@@ -397,12 +397,12 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
397 397
398void hw_breakpoint_restore(void) 398void hw_breakpoint_restore(void)
399{ 399{
400 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); 400 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
401 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); 401 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
402 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); 402 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
403 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); 403 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
404 set_debugreg(current->thread.debugreg6, 6); 404 set_debugreg(current->thread.debugreg6, 6);
405 set_debugreg(__get_cpu_var(cpu_dr7), 7); 405 set_debugreg(__this_cpu_read(cpu_dr7), 7);
406} 406}
407EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 407EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
408 408
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 83ec0175f986..3a43caa3beb7 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -234,7 +234,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
234 exit_idle(); 234 exit_idle();
235 irq_enter(); 235 irq_enter();
236 236
237 irq = __get_cpu_var(vector_irq)[vector]; 237 irq = __this_cpu_read(vector_irq[vector]);
238 238
239 if (!handle_irq(irq, regs)) { 239 if (!handle_irq(irq, regs)) {
240 ack_APIC_irq(); 240 ack_APIC_irq();
@@ -350,12 +350,12 @@ void fixup_irqs(void)
350 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 350 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
351 unsigned int irr; 351 unsigned int irr;
352 352
353 if (__get_cpu_var(vector_irq)[vector] < 0) 353 if (__this_cpu_read(vector_irq[vector]) < 0)
354 continue; 354 continue;
355 355
356 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); 356 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
357 if (irr & (1 << (vector % 32))) { 357 if (irr & (1 << (vector % 32))) {
358 irq = __get_cpu_var(vector_irq)[vector]; 358 irq = __this_cpu_read(vector_irq[vector]);
359 359
360 data = irq_get_irq_data(irq); 360 data = irq_get_irq_data(irq);
361 raw_spin_lock(&desc->lock); 361 raw_spin_lock(&desc->lock);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 96656f207751..48ff6dcffa02 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -79,7 +79,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
79 u32 *isp, arg1, arg2; 79 u32 *isp, arg1, arg2;
80 80
81 curctx = (union irq_ctx *) current_thread_info(); 81 curctx = (union irq_ctx *) current_thread_info();
82 irqctx = __get_cpu_var(hardirq_ctx); 82 irqctx = __this_cpu_read(hardirq_ctx);
83 83
84 /* 84 /*
85 * this is where we switch to the IRQ stack. However, if we are 85 * this is where we switch to the IRQ stack. However, if we are
@@ -166,7 +166,7 @@ asmlinkage void do_softirq(void)
166 166
167 if (local_softirq_pending()) { 167 if (local_softirq_pending()) {
168 curctx = current_thread_info(); 168 curctx = current_thread_info();
169 irqctx = __get_cpu_var(softirq_ctx); 169 irqctx = __this_cpu_read(softirq_ctx);
170 irqctx->tinfo.task = curctx->task; 170 irqctx->tinfo.task = curctx->task;
171 irqctx->tinfo.previous_esp = current_stack_pointer; 171 irqctx->tinfo.previous_esp = current_stack_pointer;
172 172
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index cd21b654dec6..a4130005028a 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -48,6 +48,7 @@
48#include <asm/apicdef.h> 48#include <asm/apicdef.h>
49#include <asm/system.h> 49#include <asm/system.h>
50#include <asm/apic.h> 50#include <asm/apic.h>
51#include <asm/nmi.h>
51 52
52struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = 53struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
53{ 54{
@@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
525 } 526 }
526 return NOTIFY_DONE; 527 return NOTIFY_DONE;
527 528
528 case DIE_NMI_IPI:
529 /* Just ignore, we will handle the roundup on DIE_NMI. */
530 return NOTIFY_DONE;
531
532 case DIE_NMIUNKNOWN: 529 case DIE_NMIUNKNOWN:
533 if (was_in_debug_nmi[raw_smp_processor_id()]) { 530 if (was_in_debug_nmi[raw_smp_processor_id()]) {
534 was_in_debug_nmi[raw_smp_processor_id()] = 0; 531 was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = {
606 /* 603 /*
607 * Lowest-prio notifier priority, we want to be notified last: 604 * Lowest-prio notifier priority, we want to be notified last:
608 */ 605 */
609 .priority = -INT_MAX, 606 .priority = NMI_LOCAL_LOW_PRIOR,
610}; 607};
611 608
612/** 609/**
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..d91c477b3f62 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -403,7 +403,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
403 403
404static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 404static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
405{ 405{
406 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 406 __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
407 kcb->kprobe_status = kcb->prev_kprobe.status; 407 kcb->kprobe_status = kcb->prev_kprobe.status;
408 kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; 408 kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
409 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; 409 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
@@ -412,7 +412,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
412static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 412static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
413 struct kprobe_ctlblk *kcb) 413 struct kprobe_ctlblk *kcb)
414{ 414{
415 __get_cpu_var(current_kprobe) = p; 415 __this_cpu_write(current_kprobe, p);
416 kcb->kprobe_saved_flags = kcb->kprobe_old_flags 416 kcb->kprobe_saved_flags = kcb->kprobe_old_flags
417 = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); 417 = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
418 if (is_IF_modifier(p->ainsn.insn)) 418 if (is_IF_modifier(p->ainsn.insn))
@@ -586,7 +586,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
586 preempt_enable_no_resched(); 586 preempt_enable_no_resched();
587 return 1; 587 return 1;
588 } else if (kprobe_running()) { 588 } else if (kprobe_running()) {
589 p = __get_cpu_var(current_kprobe); 589 p = __this_cpu_read(current_kprobe);
590 if (p->break_handler && p->break_handler(p, regs)) { 590 if (p->break_handler && p->break_handler(p, regs)) {
591 setup_singlestep(p, regs, kcb, 0); 591 setup_singlestep(p, regs, kcb, 0);
592 return 1; 592 return 1;
@@ -759,11 +759,11 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
759 759
760 orig_ret_address = (unsigned long)ri->ret_addr; 760 orig_ret_address = (unsigned long)ri->ret_addr;
761 if (ri->rp && ri->rp->handler) { 761 if (ri->rp && ri->rp->handler) {
762 __get_cpu_var(current_kprobe) = &ri->rp->kp; 762 __this_cpu_write(current_kprobe, &ri->rp->kp);
763 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 763 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
764 ri->ret_addr = correct_ret_addr; 764 ri->ret_addr = correct_ret_addr;
765 ri->rp->handler(ri, regs); 765 ri->rp->handler(ri, regs);
766 __get_cpu_var(current_kprobe) = NULL; 766 __this_cpu_write(current_kprobe, NULL);
767 } 767 }
768 768
769 recycle_rp_inst(ri, &empty_rp); 769 recycle_rp_inst(ri, &empty_rp);
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 1186
1187 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp))
1189 return;
1190
1187 preempt_disable(); 1191 preempt_disable();
1188 if (kprobe_running()) { 1192 if (kprobe_running()) {
1189 kprobes_inc_nmissed_count(&op->kp); 1193 kprobes_inc_nmissed_count(&op->kp);
@@ -1198,10 +1202,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1198 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 1202 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1199 regs->orig_ax = ~0UL; 1203 regs->orig_ax = ~0UL;
1200 1204
1201 __get_cpu_var(current_kprobe) = &op->kp; 1205 __this_cpu_write(current_kprobe, &op->kp);
1202 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 1206 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1203 opt_pre_handler(&op->kp, regs); 1207 opt_pre_handler(&op->kp, regs);
1204 __get_cpu_var(current_kprobe) = NULL; 1208 __this_cpu_write(current_kprobe, NULL);
1205 } 1209 }
1206 preempt_enable_no_resched(); 1210 preempt_enable_no_resched();
1207} 1211}
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1401 return 0; 1405 return 0;
1402} 1406}
1403 1407
1404/* Replace a breakpoint (int3) with a relative jump. */ 1408#define MAX_OPTIMIZE_PROBES 256
1405int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) 1409static struct text_poke_param *jump_poke_params;
1410static struct jump_poke_buffer {
1411 u8 buf[RELATIVEJUMP_SIZE];
1412} *jump_poke_bufs;
1413
1414static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1415 u8 *insn_buf,
1416 struct optimized_kprobe *op)
1406{ 1417{
1407 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1408 s32 rel = (s32)((long)op->optinsn.insn - 1418 s32 rel = (s32)((long)op->optinsn.insn -
1409 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 1419 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1410 1420
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1412 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 1422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1413 RELATIVE_ADDR_SIZE); 1423 RELATIVE_ADDR_SIZE);
1414 1424
1415 jmp_code[0] = RELATIVEJUMP_OPCODE; 1425 insn_buf[0] = RELATIVEJUMP_OPCODE;
1416 *(s32 *)(&jmp_code[1]) = rel; 1426 *(s32 *)(&insn_buf[1]) = rel;
1427
1428 tprm->addr = op->kp.addr;
1429 tprm->opcode = insn_buf;
1430 tprm->len = RELATIVEJUMP_SIZE;
1431}
1432
1433/*
1434 * Replace breakpoints (int3) with relative jumps.
1435 * Caller must call with locking kprobe_mutex and text_mutex.
1436 */
1437void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1438{
1439 struct optimized_kprobe *op, *tmp;
1440 int c = 0;
1441
1442 list_for_each_entry_safe(op, tmp, oplist, list) {
1443 WARN_ON(kprobe_disabled(&op->kp));
1444 /* Setup param */
1445 setup_optimize_kprobe(&jump_poke_params[c],
1446 jump_poke_bufs[c].buf, op);
1447 list_del_init(&op->list);
1448 if (++c >= MAX_OPTIMIZE_PROBES)
1449 break;
1450 }
1417 1451
1418 /* 1452 /*
1419 * text_poke_smp doesn't support NMI/MCE code modifying. 1453 * text_poke_smp doesn't support NMI/MCE code modifying.
1420 * However, since kprobes itself also doesn't support NMI/MCE 1454 * However, since kprobes itself also doesn't support NMI/MCE
1421 * code probing, it's not a problem. 1455 * code probing, it's not a problem.
1422 */ 1456 */
1423 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); 1457 text_poke_smp_batch(jump_poke_params, c);
1424 return 0; 1458}
1459
1460static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1461 u8 *insn_buf,
1462 struct optimized_kprobe *op)
1463{
1464 /* Set int3 to first byte for kprobes */
1465 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1466 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1467
1468 tprm->addr = op->kp.addr;
1469 tprm->opcode = insn_buf;
1470 tprm->len = RELATIVEJUMP_SIZE;
1471}
1472
1473/*
1474 * Recover original instructions and breakpoints from relative jumps.
1475 * Caller must call with locking kprobe_mutex.
1476 */
1477extern void arch_unoptimize_kprobes(struct list_head *oplist,
1478 struct list_head *done_list)
1479{
1480 struct optimized_kprobe *op, *tmp;
1481 int c = 0;
1482
1483 list_for_each_entry_safe(op, tmp, oplist, list) {
1484 /* Setup param */
1485 setup_unoptimize_kprobe(&jump_poke_params[c],
1486 jump_poke_bufs[c].buf, op);
1487 list_move(&op->list, done_list);
1488 if (++c >= MAX_OPTIMIZE_PROBES)
1489 break;
1490 }
1491
1492 /*
1493 * text_poke_smp doesn't support NMI/MCE code modifying.
1494 * However, since kprobes itself also doesn't support NMI/MCE
1495 * code probing, it's not a problem.
1496 */
1497 text_poke_smp_batch(jump_poke_params, c);
1425} 1498}
1426 1499
1427/* Replace a relative jump with a breakpoint (int3). */ 1500/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
1453 } 1526 }
1454 return 0; 1527 return 0;
1455} 1528}
1529
1530static int __kprobes init_poke_params(void)
1531{
1532 /* Allocate code buffer and parameter array */
1533 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1534 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1535 if (!jump_poke_bufs)
1536 return -ENOMEM;
1537
1538 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1539 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1540 if (!jump_poke_params) {
1541 kfree(jump_poke_bufs);
1542 jump_poke_bufs = NULL;
1543 return -ENOMEM;
1544 }
1545
1546 return 0;
1547}
1548#else /* !CONFIG_OPTPROBES */
1549static int __kprobes init_poke_params(void)
1550{
1551 return 0;
1552}
1456#endif 1553#endif
1457 1554
1458int __init arch_init_kprobes(void) 1555int __init arch_init_kprobes(void)
1459{ 1556{
1460 return 0; 1557 return init_poke_params();
1461} 1558}
1462 1559
1463int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1560int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index ce0cb4721c9a..0fe6d1a66c38 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
155 return 0; 155 return 0;
156} 156}
157 157
158static int get_ucode_data(void *to, const u8 *from, size_t n)
159{
160 memcpy(to, from, n);
161 return 0;
162}
163
164static void * 158static void *
165get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) 159get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
166{ 160{
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
168 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 162 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
169 void *mc; 163 void *mc;
170 164
171 if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) 165 get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
172 return NULL;
173 166
174 if (section_hdr[0] != UCODE_UCODE_TYPE) { 167 if (section_hdr[0] != UCODE_UCODE_TYPE) {
175 pr_err("error: invalid type field in container file section header\n"); 168 pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
183 return NULL; 176 return NULL;
184 } 177 }
185 178
186 mc = vmalloc(UCODE_MAX_SIZE); 179 mc = vzalloc(UCODE_MAX_SIZE);
187 if (mc) { 180 if (!mc)
188 memset(mc, 0, UCODE_MAX_SIZE); 181 return NULL;
189 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, 182
190 total_size)) { 183 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
191 vfree(mc); 184 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
192 mc = NULL; 185
193 } else
194 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
195 }
196 return mc; 186 return mc;
197} 187}
198 188
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
202 unsigned int *buf_pos = (unsigned int *)container_hdr; 192 unsigned int *buf_pos = (unsigned int *)container_hdr;
203 unsigned long size; 193 unsigned long size;
204 194
205 if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) 195 get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
206 return 0;
207 196
208 size = buf_pos[2]; 197 size = buf_pos[2];
209 198
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
219 } 208 }
220 209
221 buf += UCODE_CONTAINER_HEADER_SIZE; 210 buf += UCODE_CONTAINER_HEADER_SIZE;
222 if (get_ucode_data(equiv_cpu_table, buf, size)) { 211 get_ucode_data(equiv_cpu_table, buf, size);
223 vfree(equiv_cpu_table);
224 return 0;
225 }
226 212
227 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 213 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
228} 214}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9af64d9c4b67..01b0f6d06451 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -118,21 +118,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
118 118
119static void __init MP_ioapic_info(struct mpc_ioapic *m) 119static void __init MP_ioapic_info(struct mpc_ioapic *m)
120{ 120{
121 if (!(m->flags & MPC_APIC_USABLE)) 121 if (m->flags & MPC_APIC_USABLE)
122 return; 122 mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
123
124 printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
125 m->apicid, m->apicver, m->apicaddr);
126
127 mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
128}
129
130static void print_MP_intsrc_info(struct mpc_intsrc *m)
131{
132 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
133 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
134 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
135 m->srcbusirq, m->dstapic, m->dstirq);
136} 123}
137 124
138static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) 125static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
@@ -144,73 +131,11 @@ static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
144 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); 131 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
145} 132}
146 133
147static void __init assign_to_mp_irq(struct mpc_intsrc *m,
148 struct mpc_intsrc *mp_irq)
149{
150 mp_irq->dstapic = m->dstapic;
151 mp_irq->type = m->type;
152 mp_irq->irqtype = m->irqtype;
153 mp_irq->irqflag = m->irqflag;
154 mp_irq->srcbus = m->srcbus;
155 mp_irq->srcbusirq = m->srcbusirq;
156 mp_irq->dstirq = m->dstirq;
157}
158
159static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
160 struct mpc_intsrc *m)
161{
162 m->dstapic = mp_irq->dstapic;
163 m->type = mp_irq->type;
164 m->irqtype = mp_irq->irqtype;
165 m->irqflag = mp_irq->irqflag;
166 m->srcbus = mp_irq->srcbus;
167 m->srcbusirq = mp_irq->srcbusirq;
168 m->dstirq = mp_irq->dstirq;
169}
170
171static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
172 struct mpc_intsrc *m)
173{
174 if (mp_irq->dstapic != m->dstapic)
175 return 1;
176 if (mp_irq->type != m->type)
177 return 2;
178 if (mp_irq->irqtype != m->irqtype)
179 return 3;
180 if (mp_irq->irqflag != m->irqflag)
181 return 4;
182 if (mp_irq->srcbus != m->srcbus)
183 return 5;
184 if (mp_irq->srcbusirq != m->srcbusirq)
185 return 6;
186 if (mp_irq->dstirq != m->dstirq)
187 return 7;
188
189 return 0;
190}
191
192static void __init MP_intsrc_info(struct mpc_intsrc *m)
193{
194 int i;
195
196 print_MP_intsrc_info(m);
197
198 for (i = 0; i < mp_irq_entries; i++) {
199 if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
200 return;
201 }
202
203 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
204 if (++mp_irq_entries == MAX_IRQ_SOURCES)
205 panic("Max # of irq sources exceeded!!\n");
206}
207#else /* CONFIG_X86_IO_APIC */ 134#else /* CONFIG_X86_IO_APIC */
208static inline void __init MP_bus_info(struct mpc_bus *m) {} 135static inline void __init MP_bus_info(struct mpc_bus *m) {}
209static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} 136static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
210static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
211#endif /* CONFIG_X86_IO_APIC */ 137#endif /* CONFIG_X86_IO_APIC */
212 138
213
214static void __init MP_lintsrc_info(struct mpc_lintsrc *m) 139static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
215{ 140{
216 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," 141 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
@@ -222,7 +147,6 @@ static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
222/* 147/*
223 * Read/parse the MPC 148 * Read/parse the MPC
224 */ 149 */
225
226static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) 150static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
227{ 151{
228 152
@@ -275,18 +199,6 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
275 199
276void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } 200void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
277 201
278static void __init smp_register_lapic_address(unsigned long address)
279{
280 mp_lapic_addr = address;
281
282 set_fixmap_nocache(FIX_APIC_BASE, address);
283 if (boot_cpu_physical_apicid == -1U) {
284 boot_cpu_physical_apicid = read_apic_id();
285 apic_version[boot_cpu_physical_apicid] =
286 GET_APIC_VERSION(apic_read(APIC_LVR));
287 }
288}
289
290static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) 202static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
291{ 203{
292 char str[16]; 204 char str[16];
@@ -301,17 +213,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
301#ifdef CONFIG_X86_32 213#ifdef CONFIG_X86_32
302 generic_mps_oem_check(mpc, oem, str); 214 generic_mps_oem_check(mpc, oem, str);
303#endif 215#endif
304 /* save the local APIC address, it might be non-default */ 216 /* Initialize the lapic mapping */
305 if (!acpi_lapic) 217 if (!acpi_lapic)
306 mp_lapic_addr = mpc->lapic; 218 register_lapic_address(mpc->lapic);
307 219
308 if (early) 220 if (early)
309 return 1; 221 return 1;
310 222
311 /* Initialize the lapic mapping */
312 if (!acpi_lapic)
313 smp_register_lapic_address(mpc->lapic);
314
315 if (mpc->oemptr) 223 if (mpc->oemptr)
316 x86_init.mpparse.smp_read_mpc_oem(mpc); 224 x86_init.mpparse.smp_read_mpc_oem(mpc);
317 225
@@ -337,7 +245,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
337 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); 245 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
338 break; 246 break;
339 case MP_INTSRC: 247 case MP_INTSRC:
340 MP_intsrc_info((struct mpc_intsrc *)mpt); 248 mp_save_irq((struct mpc_intsrc *)mpt);
341 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); 249 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
342 break; 250 break;
343 case MP_LINTSRC: 251 case MP_LINTSRC:
@@ -429,13 +337,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
429 337
430 intsrc.srcbusirq = i; 338 intsrc.srcbusirq = i;
431 intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ 339 intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
432 MP_intsrc_info(&intsrc); 340 mp_save_irq(&intsrc);
433 } 341 }
434 342
435 intsrc.irqtype = mp_ExtINT; 343 intsrc.irqtype = mp_ExtINT;
436 intsrc.srcbusirq = 0; 344 intsrc.srcbusirq = 0;
437 intsrc.dstirq = 0; /* 8259A to INTIN0 */ 345 intsrc.dstirq = 0; /* 8259A to INTIN0 */
438 MP_intsrc_info(&intsrc); 346 mp_save_irq(&intsrc);
439} 347}
440 348
441 349
@@ -784,11 +692,11 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
784 int i; 692 int i;
785 693
786 apic_printk(APIC_VERBOSE, "OLD "); 694 apic_printk(APIC_VERBOSE, "OLD ");
787 print_MP_intsrc_info(m); 695 print_mp_irq_info(m);
788 696
789 i = get_MP_intsrc_index(m); 697 i = get_MP_intsrc_index(m);
790 if (i > 0) { 698 if (i > 0) {
791 assign_to_mpc_intsrc(&mp_irqs[i], m); 699 memcpy(m, &mp_irqs[i], sizeof(*m));
792 apic_printk(APIC_VERBOSE, "NEW "); 700 apic_printk(APIC_VERBOSE, "NEW ");
793 print_mp_irq_info(&mp_irqs[i]); 701 print_mp_irq_info(&mp_irqs[i]);
794 return; 702 return;
@@ -875,14 +783,14 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
875 if (nr_m_spare > 0) { 783 if (nr_m_spare > 0) {
876 apic_printk(APIC_VERBOSE, "*NEW* found\n"); 784 apic_printk(APIC_VERBOSE, "*NEW* found\n");
877 nr_m_spare--; 785 nr_m_spare--;
878 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); 786 memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i]));
879 m_spare[nr_m_spare] = NULL; 787 m_spare[nr_m_spare] = NULL;
880 } else { 788 } else {
881 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 789 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
882 count += sizeof(struct mpc_intsrc); 790 count += sizeof(struct mpc_intsrc);
883 if (check_slot(mpc_new_phys, mpc_new_length, count) < 0) 791 if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
884 goto out; 792 goto out;
885 assign_to_mpc_intsrc(&mp_irqs[i], m); 793 memcpy(m, &mp_irqs[i], sizeof(*m));
886 mpc->length = count; 794 mpc->length = count;
887 mpt += sizeof(struct mpc_intsrc); 795 mpt += sizeof(struct mpc_intsrc);
888 } 796 }
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba0f0ca9f280..c01ffa5b9b87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -143,7 +143,7 @@ static void flush_gart(void)
143 143
144 spin_lock_irqsave(&iommu_bitmap_lock, flags); 144 spin_lock_irqsave(&iommu_bitmap_lock, flags);
145 if (need_flush) { 145 if (need_flush) {
146 k8_flush_garts(); 146 amd_flush_garts();
147 need_flush = false; 147 need_flush = false;
148 } 148 }
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
561{ 561{
562 int i; 562 int i;
563 563
564 if (!k8_northbridges.gart_supported) 564 if (!amd_nb_has_feature(AMD_NB_GART))
565 return; 565 return;
566 566
567 for (i = 0; i < k8_northbridges.num; i++) { 567 for (i = 0; i < amd_nb_num(); i++) {
568 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 568 struct pci_dev *dev = node_to_amd_nb(i)->misc;
569 569
570 enable_gart_translation(dev, __pa(agp_gatt_table)); 570 enable_gart_translation(dev, __pa(agp_gatt_table));
571 } 571 }
572 572
573 /* Flush the GART-TLB to remove stale entries */ 573 /* Flush the GART-TLB to remove stale entries */
574 k8_flush_garts(); 574 amd_flush_garts();
575} 575}
576 576
577/* 577/*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
596 if (!fix_up_north_bridges) 596 if (!fix_up_north_bridges)
597 return; 597 return;
598 598
599 if (!k8_northbridges.gart_supported) 599 if (!amd_nb_has_feature(AMD_NB_GART))
600 return; 600 return;
601 601
602 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 602 pr_info("PCI-DMA: Restoring GART aperture settings\n");
603 603
604 for (i = 0; i < k8_northbridges.num; i++) { 604 for (i = 0; i < amd_nb_num(); i++) {
605 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 605 struct pci_dev *dev = node_to_amd_nb(i)->misc;
606 606
607 /* 607 /*
608 * Don't enable translations just yet. That is the next 608 * Don't enable translations just yet. That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
644 * Private Northbridge GATT initialization in case we cannot use the 644 * Private Northbridge GATT initialization in case we cannot use the
645 * AGP driver for some reason. 645 * AGP driver for some reason.
646 */ 646 */
647static __init int init_k8_gatt(struct agp_kern_info *info) 647static __init int init_amd_gatt(struct agp_kern_info *info)
648{ 648{
649 unsigned aper_size, gatt_size, new_aper_size; 649 unsigned aper_size, gatt_size, new_aper_size;
650 unsigned aper_base, new_aper_base; 650 unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
656 656
657 aper_size = aper_base = info->aper_size = 0; 657 aper_size = aper_base = info->aper_size = 0;
658 dev = NULL; 658 dev = NULL;
659 for (i = 0; i < k8_northbridges.num; i++) { 659 for (i = 0; i < amd_nb_num(); i++) {
660 dev = k8_northbridges.nb_misc[i]; 660 dev = node_to_amd_nb(i)->misc;
661 new_aper_base = read_aperture(dev, &new_aper_size); 661 new_aper_base = read_aperture(dev, &new_aper_size);
662 if (!new_aper_base) 662 if (!new_aper_base)
663 goto nommu; 663 goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
725 if (!no_agp) 725 if (!no_agp)
726 return; 726 return;
727 727
728 if (!k8_northbridges.gart_supported) 728 if (!amd_nb_has_feature(AMD_NB_GART))
729 return; 729 return;
730 730
731 for (i = 0; i < k8_northbridges.num; i++) { 731 for (i = 0; i < amd_nb_num(); i++) {
732 u32 ctl; 732 u32 ctl;
733 733
734 dev = k8_northbridges.nb_misc[i]; 734 dev = node_to_amd_nb(i)->misc;
735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
736 736
737 ctl &= ~GARTEN; 737 ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
749 unsigned long scratch; 749 unsigned long scratch;
750 long i; 750 long i;
751 751
752 if (!k8_northbridges.gart_supported) 752 if (!amd_nb_has_feature(AMD_NB_GART))
753 return 0; 753 return 0;
754 754
755#ifndef CONFIG_AGP_AMD64 755#ifndef CONFIG_AGP_AMD64
756 no_agp = 1; 756 no_agp = 1;
757#else 757#else
758 /* Makefile puts PCI initialization via subsys_initcall first. */ 758 /* Makefile puts PCI initialization via subsys_initcall first. */
759 /* Add other K8 AGP bridge drivers here */ 759 /* Add other AMD AGP bridge drivers here */
760 no_agp = no_agp || 760 no_agp = no_agp ||
761 (agp_amd64_init() < 0) || 761 (agp_amd64_init() < 0) ||
762 (agp_copy_info(agp_bridge, &info) < 0); 762 (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
765 if (no_iommu || 765 if (no_iommu ||
766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) || 766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
767 !gart_iommu_aperture || 767 !gart_iommu_aperture ||
768 (no_agp && init_k8_gatt(&info) < 0)) { 768 (no_agp && init_amd_gatt(&info) < 0)) {
769 if (max_pfn > MAX_DMA32_PFN) { 769 if (max_pfn > MAX_DMA32_PFN) {
770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); 770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
771 pr_warning("falling back to iommu=soft.\n"); 771 pr_warning("falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b6472153e45b..7c23a0cd3eb9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -86,8 +86,7 @@ void exit_thread(void)
86void show_regs(struct pt_regs *regs) 86void show_regs(struct pt_regs *regs)
87{ 87{
88 show_registers(regs); 88 show_registers(regs);
89 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 89 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
90 regs->bp);
91} 90}
92 91
93void show_regs_common(void) 92void show_regs_common(void)
@@ -369,6 +368,7 @@ void default_idle(void)
369{ 368{
370 if (hlt_use_halt()) { 369 if (hlt_use_halt()) {
371 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 370 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
371 trace_cpu_idle(1, smp_processor_id());
372 current_thread_info()->status &= ~TS_POLLING; 372 current_thread_info()->status &= ~TS_POLLING;
373 /* 373 /*
374 * TS_POLLING-cleared state must be visible before we 374 * TS_POLLING-cleared state must be visible before we
@@ -439,8 +439,9 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
439void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 439void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
440{ 440{
441 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); 441 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
442 trace_cpu_idle((ax>>4)+1, smp_processor_id());
442 if (!need_resched()) { 443 if (!need_resched()) {
443 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 444 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
444 clflush((void *)&current_thread_info()->flags); 445 clflush((void *)&current_thread_info()->flags);
445 446
446 __monitor((void *)&current_thread_info()->flags, 0, 0); 447 __monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -455,7 +456,8 @@ static void mwait_idle(void)
455{ 456{
456 if (!need_resched()) { 457 if (!need_resched()) {
457 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 458 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
458 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 459 trace_cpu_idle(1, smp_processor_id());
460 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
459 clflush((void *)&current_thread_info()->flags); 461 clflush((void *)&current_thread_info()->flags);
460 462
461 __monitor((void *)&current_thread_info()->flags, 0, 0); 463 __monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -476,10 +478,12 @@ static void mwait_idle(void)
476static void poll_idle(void) 478static void poll_idle(void)
477{ 479{
478 trace_power_start(POWER_CSTATE, 0, smp_processor_id()); 480 trace_power_start(POWER_CSTATE, 0, smp_processor_id());
481 trace_cpu_idle(0, smp_processor_id());
479 local_irq_enable(); 482 local_irq_enable();
480 while (!need_resched()) 483 while (!need_resched())
481 cpu_relax(); 484 cpu_relax();
482 trace_power_end(0); 485 trace_power_end(smp_processor_id());
486 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
483} 487}
484 488
485/* 489/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 96586c3cbbbf..4b9befa0e347 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -113,8 +113,8 @@ void cpu_idle(void)
113 stop_critical_timings(); 113 stop_critical_timings();
114 pm_idle(); 114 pm_idle();
115 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id()); 116 trace_power_end(smp_processor_id());
117 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
118 } 118 }
119 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
120 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3d7a3a04f38..4c818a738396 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -142,6 +142,8 @@ void cpu_idle(void)
142 start_critical_timings(); 142 start_critical_timings();
143 143
144 trace_power_end(smp_processor_id()); 144 trace_power_end(smp_processor_id());
145 trace_cpu_idle(PWR_EVENT_EXIT,
146 smp_processor_id());
145 147
146 /* In many cases the interrupt that ended idle 148 /* In many cases the interrupt that ended idle
147 has already called exit_idle. But some idle 149 has already called exit_idle. But some idle
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c495aa8d4815..fc7aae1e2bc7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
18#include <asm/pci_x86.h> 18#include <asm/pci_x86.h>
19#include <asm/virtext.h> 19#include <asm/virtext.h>
20#include <asm/cpu.h> 20#include <asm/cpu.h>
21#include <asm/nmi.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23# include <linux/ctype.h> 24# include <linux/ctype.h>
@@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self,
747{ 748{
748 int cpu; 749 int cpu;
749 750
750 if (val != DIE_NMI_IPI) 751 if (val != DIE_NMI)
751 return NOTIFY_OK; 752 return NOTIFY_OK;
752 753
753 cpu = raw_smp_processor_id(); 754 cpu = raw_smp_processor_id();
@@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void)
778 779
779static struct notifier_block crash_nmi_nb = { 780static struct notifier_block crash_nmi_nb = {
780 .notifier_call = crash_nmi_callback, 781 .notifier_call = crash_nmi_callback,
782 /* we want to be the first one called */
783 .priority = NMI_LOCAL_HIGH_PRIOR+1,
781}; 784};
782 785
783/* Halt all other CPUs, calling the specified function on each of them 786/* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index fda313ebbb03..c8e41e90f59c 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
43 outb(1, 0x92); 43 outb(1, 0x92);
44} 44}
45 45
46static void ce4100_reset(struct pci_dev *dev)
47{
48 int i;
49
50 for (i = 0; i < 10; i++) {
51 outb(0x2, 0xcf9);
52 udelay(50);
53 }
54}
55
46struct device_fixup { 56struct device_fixup {
47 unsigned int vendor; 57 unsigned int vendor;
48 unsigned int device; 58 unsigned int device;
49 void (*reboot_fixup)(struct pci_dev *); 59 void (*reboot_fixup)(struct pci_dev *);
50}; 60};
51 61
62/*
63 * PCI ids solely used for fixups_table go here
64 */
65#define PCI_DEVICE_ID_INTEL_CE4100 0x0708
66
52static const struct device_fixup fixups_table[] = { 67static const struct device_fixup fixups_table[] = {
53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, 68{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, 69{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, 70{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
56{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, 71{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
72{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
57}; 73};
58 74
59/* 75/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a0f52af256a0..d3cfe26c0252 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -705,7 +705,7 @@ static u64 __init get_max_mapped(void)
705void __init setup_arch(char **cmdline_p) 705void __init setup_arch(char **cmdline_p)
706{ 706{
707 int acpi = 0; 707 int acpi = 0;
708 int k8 = 0; 708 int amd = 0;
709 unsigned long flags; 709 unsigned long flags;
710 710
711#ifdef CONFIG_X86_32 711#ifdef CONFIG_X86_32
@@ -991,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
991 acpi = acpi_numa_init(); 991 acpi = acpi_numa_init();
992#endif 992#endif
993 993
994#ifdef CONFIG_K8_NUMA 994#ifdef CONFIG_AMD_NUMA
995 if (!acpi) 995 if (!acpi)
996 k8 = !k8_numa_init(0, max_pfn); 996 amd = !amd_numa_init(0, max_pfn);
997#endif 997#endif
998 998
999 initmem_init(0, max_pfn, acpi, k8); 999 initmem_init(0, max_pfn, acpi, amd);
1000 memblock_find_dma_reserve(); 1000 memblock_find_dma_reserve();
1001 dma32_reserve_bootmem(); 1001 dma32_reserve_bootmem();
1002 1002
@@ -1045,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
1045#endif 1045#endif
1046 1046
1047 init_apic_mappings(); 1047 init_apic_mappings();
1048 ioapic_init_mappings(); 1048 ioapic_and_gsi_init();
1049
1050 /* need to wait for io_apic is mapped */
1051 probe_nr_irqs_gsi();
1052 1049
1053 kvm_guest_init(); 1050 kvm_guest_init();
1054 1051
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d1b7df..763df77343dd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,12 +97,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
97 */ 97 */
98static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); 98static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
99 99
100void cpu_hotplug_driver_lock() 100void cpu_hotplug_driver_lock(void)
101{ 101{
102 mutex_lock(&x86_cpu_hotplug_driver_mutex); 102 mutex_lock(&x86_cpu_hotplug_driver_mutex);
103} 103}
104 104
105void cpu_hotplug_driver_unlock() 105void cpu_hotplug_driver_unlock(void)
106{ 106{
107 mutex_unlock(&x86_cpu_hotplug_driver_mutex); 107 mutex_unlock(&x86_cpu_hotplug_driver_mutex);
108} 108}
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 /*
285 * This must be done before setting cpu_online_mask
286 * or calling notify_cpu_starting.
287 */
288 set_cpu_sibling_map(raw_smp_processor_id());
289 wmb();
290
284 notify_cpu_starting(cpuid); 291 notify_cpu_starting(cpuid);
285 292
286 /* 293 /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
316 */ 323 */
317 check_tsc_sync_target(); 324 check_tsc_sync_target();
318 325
319 if (nmi_watchdog == NMI_IO_APIC) {
320 legacy_pic->mask(0);
321 enable_NMI_through_LVT0();
322 legacy_pic->unmask(0);
323 }
324
325 /* This must be done before setting cpu_online_mask */
326 set_cpu_sibling_map(raw_smp_processor_id());
327 wmb();
328
329 /* 326 /*
330 * We need to hold call_lock, so there is no inconsistency 327 * We need to hold call_lock, so there is no inconsistency
331 * between the time smp_call_function() determines number of 328 * between the time smp_call_function() determines number of
@@ -430,7 +427,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
430 427
431 cpumask_set_cpu(cpu, c->llc_shared_map); 428 cpumask_set_cpu(cpu, c->llc_shared_map);
432 429
433 if (current_cpu_data.x86_max_cores == 1) { 430 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
434 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 431 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
435 c->booted_cores = 1; 432 c->booted_cores = 1;
436 return; 433 return;
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
1061 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1062 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1063 1060
1064 localise_nmi_watchdog();
1065
1066 connect_bsp_APIC(); 1061 connect_bsp_APIC();
1067 setup_local_APIC(); 1062 setup_local_APIC();
1068 end_local_APIC_setup(); 1063 end_local_APIC_setup();
@@ -1094,7 +1089,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1094 1089
1095 preempt_disable(); 1090 preempt_disable();
1096 smp_cpu_index_default(); 1091 smp_cpu_index_default();
1097 current_cpu_data = boot_cpu_data; 1092 memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
1098 cpumask_copy(cpu_callin_mask, cpumask_of(0)); 1093 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1099 mb(); 1094 mb();
1100 /* 1095 /*
@@ -1166,6 +1161,20 @@ out:
1166 preempt_enable(); 1161 preempt_enable();
1167} 1162}
1168 1163
1164void arch_disable_nonboot_cpus_begin(void)
1165{
1166 /*
1167 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
1168 * In the suspend path, we will be back in the SMP mode shortly anyways.
1169 */
1170 skip_smp_alternatives = true;
1171}
1172
1173void arch_disable_nonboot_cpus_end(void)
1174{
1175 skip_smp_alternatives = false;
1176}
1177
1169void arch_enable_nonboot_cpus_begin(void) 1178void arch_enable_nonboot_cpus_begin(void)
1170{ 1179{
1171 set_mtrr_aps_delayed_init(); 1180 set_mtrr_aps_delayed_init();
@@ -1196,7 +1205,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1196#ifdef CONFIG_X86_IO_APIC 1205#ifdef CONFIG_X86_IO_APIC
1197 setup_ioapic_dest(); 1206 setup_ioapic_dest();
1198#endif 1207#endif
1199 check_nmi_watchdog();
1200 mtrr_aps_init(); 1208 mtrr_aps_init();
1201} 1209}
1202 1210
@@ -1341,8 +1349,6 @@ int native_cpu_disable(void)
1341 if (cpu == 0) 1349 if (cpu == 0)
1342 return -EBUSY; 1350 return -EBUSY;
1343 1351
1344 if (nmi_watchdog == NMI_LOCAL_APIC)
1345 stop_apic_nmi_watchdog(NULL);
1346 clear_local_APIC(); 1352 clear_local_APIC();
1347 1353
1348 cpu_disable_common(); 1354 cpu_disable_common();
@@ -1377,7 +1383,7 @@ void play_dead_common(void)
1377 1383
1378 mb(); 1384 mb();
1379 /* Ack it */ 1385 /* Ack it */
1380 __get_cpu_var(cpu_state) = CPU_DEAD; 1386 __this_cpu_write(cpu_state, CPU_DEAD);
1381 1387
1382 /* 1388 /*
1383 * With physical CPU hotplug, we should halt the cpu 1389 * With physical CPU hotplug, we should halt the cpu
@@ -1397,11 +1403,11 @@ static inline void mwait_play_dead(void)
1397 int i; 1403 int i;
1398 void *mwait_ptr; 1404 void *mwait_ptr;
1399 1405
1400 if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT)) 1406 if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT))
1401 return; 1407 return;
1402 if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH)) 1408 if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
1403 return; 1409 return;
1404 if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1410 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1405 return; 1411 return;
1406 1412
1407 eax = CPUID_MWAIT_LEAF; 1413 eax = CPUID_MWAIT_LEAF;
@@ -1452,7 +1458,7 @@ static inline void mwait_play_dead(void)
1452 1458
1453static inline void hlt_play_dead(void) 1459static inline void hlt_play_dead(void)
1454{ 1460{
1455 if (current_cpu_data.x86 >= 4) 1461 if (__this_cpu_read(cpu_info.x86) >= 4)
1456 wbinvd(); 1462 wbinvd();
1457 1463
1458 while (1) { 1464 while (1) {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
73 */ 73 */
74void save_stack_trace(struct stack_trace *trace) 74void save_stack_trace(struct stack_trace *trace)
75{ 75{
76 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 76 dump_trace(current, NULL, NULL, &save_stack_ops, trace);
77 if (trace->nr_entries < trace->max_entries) 77 if (trace->nr_entries < trace->max_entries)
78 trace->entries[trace->nr_entries++] = ULONG_MAX; 78 trace->entries[trace->nr_entries++] = ULONG_MAX;
79} 79}
80EXPORT_SYMBOL_GPL(save_stack_trace); 80EXPORT_SYMBOL_GPL(save_stack_trace);
81 81
82void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) 82void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
83{ 83{
84 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); 84 dump_trace(current, regs, NULL, &save_stack_ops, trace);
85 if (trace->nr_entries < trace->max_entries) 85 if (trace->nr_entries < trace->max_entries)
86 trace->entries[trace->nr_entries++] = ULONG_MAX; 86 trace->entries[trace->nr_entries++] = ULONG_MAX;
87} 87}
88 88
89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
90{ 90{
91 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 91 dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
92 if (trace->nr_entries < trace->max_entries) 92 if (trace->nr_entries < trace->max_entries)
93 trace->entries[trace->nr_entries++] = ULONG_MAX; 93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94} 94}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
26int timer_ack;
27#endif
28
29#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
30volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
31#endif 27#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
63 /* Keep nmi watchdog up to date */ 59 /* Keep nmi watchdog up to date */
64 inc_irq_stat(irq0_irqs); 60 inc_irq_stat(irq0_irqs);
65 61
66 /* Optimized out for !IO_APIC and x86_64 */
67 if (timer_ack) {
68 /*
69 * Subtle, when I/O APICs are used we have to ack timer IRQ
70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system.
72 */
73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL);
77 raw_spin_unlock(&i8259A_lock);
78 }
79
80 global_clock_event->event_handler(global_clock_event); 62 global_clock_event->event_handler(global_clock_event);
81 63
82 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 64 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 3af2dff58b21..075d130efcf9 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -127,7 +127,7 @@ startup_64:
127no_longmode: 127no_longmode:
128 hlt 128 hlt
129 jmp no_longmode 129 jmp no_longmode
130#include "verify_cpu_64.S" 130#include "verify_cpu.S"
131 131
132 # Careful these need to be in the same 64K segment as the above; 132 # Careful these need to be in the same 64K segment as the above;
133tidt: 133tidt:
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..b9b67166f9de 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,13 @@ EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis; 84static int ignore_nmis;
85 85
86int unknown_nmi_panic;
87/*
88 * Prevent NMI reason port (0x61) being accessed simultaneously, can
89 * only be used in NMI handler.
90 */
91static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
92
86static inline void conditional_sti(struct pt_regs *regs) 93static inline void conditional_sti(struct pt_regs *regs)
87{ 94{
88 if (regs->flags & X86_EFLAGS_IF) 95 if (regs->flags & X86_EFLAGS_IF)
@@ -300,16 +307,23 @@ gp_in_kernel:
300 die("general protection fault", regs, error_code); 307 die("general protection fault", regs, error_code);
301} 308}
302 309
303static notrace __kprobes void 310static int __init setup_unknown_nmi_panic(char *str)
304mem_parity_error(unsigned char reason, struct pt_regs *regs)
305{ 311{
306 printk(KERN_EMERG 312 unknown_nmi_panic = 1;
307 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 313 return 1;
308 reason, smp_processor_id()); 314}
315__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
309 316
310 printk(KERN_EMERG 317static notrace __kprobes void
311 "You have some hardware problem, likely on the PCI bus.\n"); 318pci_serr_error(unsigned char reason, struct pt_regs *regs)
319{
320 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
321 reason, smp_processor_id());
312 322
323 /*
324 * On some machines, PCI SERR line is used to report memory
325 * errors. EDAC makes use of it.
326 */
313#if defined(CONFIG_EDAC) 327#if defined(CONFIG_EDAC)
314 if (edac_handler_set()) { 328 if (edac_handler_set()) {
315 edac_atomic_assert_error(); 329 edac_atomic_assert_error();
@@ -320,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
320 if (panic_on_unrecovered_nmi) 334 if (panic_on_unrecovered_nmi)
321 panic("NMI: Not continuing"); 335 panic("NMI: Not continuing");
322 336
323 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 337 pr_emerg("Dazed and confused, but trying to continue\n");
324 338
325 /* Clear and disable the memory parity error line. */ 339 /* Clear and disable the PCI SERR error line. */
326 reason = (reason & 0xf) | 4; 340 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
327 outb(reason, 0x61); 341 outb(reason, NMI_REASON_PORT);
328} 342}
329 343
330static notrace __kprobes void 344static notrace __kprobes void
@@ -332,22 +346,26 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
332{ 346{
333 unsigned long i; 347 unsigned long i;
334 348
335 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 349 pr_emerg(
350 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
351 reason, smp_processor_id());
336 show_registers(regs); 352 show_registers(regs);
337 353
338 if (panic_on_io_nmi) 354 if (panic_on_io_nmi)
339 panic("NMI IOCK error: Not continuing"); 355 panic("NMI IOCK error: Not continuing");
340 356
341 /* Re-enable the IOCK line, wait for a few seconds */ 357 /* Re-enable the IOCK line, wait for a few seconds */
342 reason = (reason & 0xf) | 8; 358 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
343 outb(reason, 0x61); 359 outb(reason, NMI_REASON_PORT);
344 360
345 i = 2000; 361 i = 20000;
346 while (--i) 362 while (--i) {
347 udelay(1000); 363 touch_nmi_watchdog();
364 udelay(100);
365 }
348 366
349 reason &= ~8; 367 reason &= ~NMI_REASON_CLEAR_IOCHK;
350 outb(reason, 0x61); 368 outb(reason, NMI_REASON_PORT);
351} 369}
352 370
353static notrace __kprobes void 371static notrace __kprobes void
@@ -366,69 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
366 return; 384 return;
367 } 385 }
368#endif 386#endif
369 printk(KERN_EMERG 387 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
370 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 388 reason, smp_processor_id());
371 reason, smp_processor_id());
372 389
373 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 390 pr_emerg("Do you have a strange power saving mode enabled?\n");
374 if (panic_on_unrecovered_nmi) 391 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
375 panic("NMI: Not continuing"); 392 panic("NMI: Not continuing");
376 393
377 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 394 pr_emerg("Dazed and confused, but trying to continue\n");
378} 395}
379 396
380static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 397static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
381{ 398{
382 unsigned char reason = 0; 399 unsigned char reason = 0;
383 int cpu;
384 400
385 cpu = smp_processor_id(); 401 /*
386 402 * CPU-specific NMI must be processed before non-CPU-specific
387 /* Only the BSP gets external NMIs from the system. */ 403 * NMI, otherwise we may lose it, because the CPU-specific
388 if (!cpu) 404 * NMI can not be detected/processed on other CPUs.
389 reason = get_nmi_reason(); 405 */
390 406 if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
391 if (!(reason & 0xc0)) { 407 return;
392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
393 == NOTIFY_STOP)
394 return;
395 408
396#ifdef CONFIG_X86_LOCAL_APIC 409 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 410 raw_spin_lock(&nmi_reason_lock);
398 == NOTIFY_STOP) 411 reason = get_nmi_reason();
399 return;
400 412
401#ifndef CONFIG_LOCKUP_DETECTOR 413 if (reason & NMI_REASON_MASK) {
414 if (reason & NMI_REASON_SERR)
415 pci_serr_error(reason, regs);
416 else if (reason & NMI_REASON_IOCHK)
417 io_check_error(reason, regs);
418#ifdef CONFIG_X86_32
402 /* 419 /*
403 * Ok, so this is none of the documented NMI sources, 420 * Reassert NMI in case it became active
404 * so it must be the NMI watchdog. 421 * meanwhile as it's edge-triggered:
405 */ 422 */
406 if (nmi_watchdog_tick(regs, reason)) 423 reassert_nmi();
407 return;
408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
410 unknown_nmi_error(reason, regs);
411#else
412 unknown_nmi_error(reason, regs);
413#endif 424#endif
414 425 raw_spin_unlock(&nmi_reason_lock);
415 return; 426 return;
416 } 427 }
417 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 428 raw_spin_unlock(&nmi_reason_lock);
418 return;
419 429
420 /* AK: following checks seem to be broken on modern chipsets. FIXME */ 430 unknown_nmi_error(reason, regs);
421 if (reason & 0x80)
422 mem_parity_error(reason, regs);
423 if (reason & 0x40)
424 io_check_error(reason, regs);
425#ifdef CONFIG_X86_32
426 /*
427 * Reassert NMI in case it became active meanwhile
428 * as it's edge-triggered:
429 */
430 reassert_nmi();
431#endif
432} 431}
433 432
434dotraplinkage notrace __kprobes void 433dotraplinkage notrace __kprobes void
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code)
446 445
447void stop_nmi(void) 446void stop_nmi(void)
448{ 447{
449 acpi_nmi_disable();
450 ignore_nmis++; 448 ignore_nmis++;
451} 449}
452 450
453void restart_nmi(void) 451void restart_nmi(void)
454{ 452{
455 ignore_nmis--; 453 ignore_nmis--;
456 acpi_nmi_enable();
457} 454}
458 455
459/* May run on IST stack. */ 456/* May run on IST stack. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0c40d8b72416..823f79a17ad1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -659,7 +659,7 @@ void restore_sched_clock_state(void)
659 659
660 local_irq_save(flags); 660 local_irq_save(flags);
661 661
662 __get_cpu_var(cyc2ns_offset) = 0; 662 __this_cpu_write(cyc2ns_offset, 0);
663 offset = cyc2ns_suspend - sched_clock(); 663 offset = cyc2ns_suspend - sched_clock();
664 664
665 for_each_possible_cpu(cpu) 665 for_each_possible_cpu(cpu)
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
872 872
873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
874 return 0; 874 return 0;
875
876 if (tsc_clocksource_reliable)
877 return 0;
875 /* 878 /*
876 * Intel systems are normally all synchronized. 879 * Intel systems are normally all synchronized.
877 * Exceptions must mark TSC as unstable: 880 * Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
879 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 882 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
880 /* assume multi socket systems are not synchronized: */ 883 /* assume multi socket systems are not synchronized: */
881 if (num_possible_cpus() > 1) 884 if (num_possible_cpus() > 1)
882 tsc_unstable = 1; 885 return 1;
883 } 886 }
884 887
885 return tsc_unstable; 888 return 0;
889}
890
891
892static void tsc_refine_calibration_work(struct work_struct *work);
893static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
894/**
895 * tsc_refine_calibration_work - Further refine tsc freq calibration
896 * @work - ignored.
897 *
898 * This functions uses delayed work over a period of a
899 * second to further refine the TSC freq value. Since this is
900 * timer based, instead of loop based, we don't block the boot
901 * process while this longer calibration is done.
902 *
903 * If there are any calibration anomolies (too many SMIs, etc),
904 * or the refined calibration is off by 1% of the fast early
905 * calibration, we throw out the new calibration and use the
906 * early calibration.
907 */
908static void tsc_refine_calibration_work(struct work_struct *work)
909{
910 static u64 tsc_start = -1, ref_start;
911 static int hpet;
912 u64 tsc_stop, ref_stop, delta;
913 unsigned long freq;
914
915 /* Don't bother refining TSC on unstable systems */
916 if (check_tsc_unstable())
917 goto out;
918
919 /*
920 * Since the work is started early in boot, we may be
921 * delayed the first time we expire. So set the workqueue
922 * again once we know timers are working.
923 */
924 if (tsc_start == -1) {
925 /*
926 * Only set hpet once, to avoid mixing hardware
927 * if the hpet becomes enabled later.
928 */
929 hpet = is_hpet_enabled();
930 schedule_delayed_work(&tsc_irqwork, HZ);
931 tsc_start = tsc_read_refs(&ref_start, hpet);
932 return;
933 }
934
935 tsc_stop = tsc_read_refs(&ref_stop, hpet);
936
937 /* hpet or pmtimer available ? */
938 if (!hpet && !ref_start && !ref_stop)
939 goto out;
940
941 /* Check, whether the sampling was disturbed by an SMI */
942 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
943 goto out;
944
945 delta = tsc_stop - tsc_start;
946 delta *= 1000000LL;
947 if (hpet)
948 freq = calc_hpet_ref(delta, ref_start, ref_stop);
949 else
950 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
951
952 /* Make sure we're within 1% */
953 if (abs(tsc_khz - freq) > tsc_khz/100)
954 goto out;
955
956 tsc_khz = freq;
957 printk(KERN_INFO "Refined TSC clocksource calibration: "
958 "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
959 (unsigned long)tsc_khz % 1000);
960
961out:
962 clocksource_register_khz(&clocksource_tsc, tsc_khz);
886} 963}
887 964
888static void __init init_tsc_clocksource(void) 965
966static int __init init_tsc_clocksource(void)
889{ 967{
968 if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
969 return 0;
970
890 if (tsc_clocksource_reliable) 971 if (tsc_clocksource_reliable)
891 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 972 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
892 /* lower the rating if we already know its unstable: */ 973 /* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
894 clocksource_tsc.rating = 0; 975 clocksource_tsc.rating = 0;
895 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 976 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
896 } 977 }
897 clocksource_register_khz(&clocksource_tsc, tsc_khz); 978 schedule_delayed_work(&tsc_irqwork, 0);
979 return 0;
898} 980}
981/*
982 * We use device_initcall here, to ensure we run after the hpet
983 * is fully initialized, which may occur at fs_initcall time.
984 */
985device_initcall(init_tsc_clocksource);
899 986
900void __init tsc_init(void) 987void __init tsc_init(void)
901{ 988{
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
949 mark_tsc_unstable("TSCs unsynchronized"); 1036 mark_tsc_unstable("TSCs unsynchronized");
950 1037
951 check_system_tsc_reliable(); 1038 check_system_tsc_reliable();
952 init_tsc_clocksource();
953} 1039}
954 1040
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu.S
index 56a8c2a867d9..0edefc19a113 100644
--- a/arch/x86/kernel/verify_cpu_64.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -7,6 +7,7 @@
7 * Copyright (c) 2007 Andi Kleen (ak@suse.de) 7 * Copyright (c) 2007 Andi Kleen (ak@suse.de)
8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) 8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) 9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
10 * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com)
10 * 11 *
11 * This source code is licensed under the GNU General Public License, 12 * This source code is licensed under the GNU General Public License,
12 * Version 2. See the file COPYING for more details. 13 * Version 2. See the file COPYING for more details.
@@ -14,18 +15,17 @@
14 * This is a common code for verification whether CPU supports 15 * This is a common code for verification whether CPU supports
15 * long mode and SSE or not. It is not called directly instead this 16 * long mode and SSE or not. It is not called directly instead this
16 * file is included at various places and compiled in that context. 17 * file is included at various places and compiled in that context.
17 * Following are the current usage. 18 * This file is expected to run in 32bit code. Currently:
18 * 19 *
19 * This file is included by both 16bit and 32bit code. 20 * arch/x86/boot/compressed/head_64.S: Boot cpu verification
21 * arch/x86/kernel/trampoline_64.S: secondary processor verfication
22 * arch/x86/kernel/head_32.S: processor startup
20 * 23 *
21 * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) 24 * verify_cpu, returns the status of longmode and SSE in register %eax.
22 * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
23 * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
24 * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
25 *
26 * verify_cpu, returns the status of cpu check in register %eax.
27 * 0: Success 1: Failure 25 * 0: Success 1: Failure
28 * 26 *
27 * On Intel, the XD_DISABLE flag will be cleared as a side-effect.
28 *
29 * The caller needs to check for the error code and take the action 29 * The caller needs to check for the error code and take the action
30 * appropriately. Either display a message or halt. 30 * appropriately. Either display a message or halt.
31 */ 31 */
@@ -62,8 +62,41 @@ verify_cpu:
62 cmpl $0x444d4163,%ecx 62 cmpl $0x444d4163,%ecx
63 jnz verify_cpu_noamd 63 jnz verify_cpu_noamd
64 mov $1,%di # cpu is from AMD 64 mov $1,%di # cpu is from AMD
65 jmp verify_cpu_check
65 66
66verify_cpu_noamd: 67verify_cpu_noamd:
68 cmpl $0x756e6547,%ebx # GenuineIntel?
69 jnz verify_cpu_check
70 cmpl $0x49656e69,%edx
71 jnz verify_cpu_check
72 cmpl $0x6c65746e,%ecx
73 jnz verify_cpu_check
74
75 # only call IA32_MISC_ENABLE when:
76 # family > 6 || (family == 6 && model >= 0xd)
77 movl $0x1, %eax # check CPU family and model
78 cpuid
79 movl %eax, %ecx
80
81 andl $0x0ff00f00, %eax # mask family and extended family
82 shrl $8, %eax
83 cmpl $6, %eax
84 ja verify_cpu_clear_xd # family > 6, ok
85 jb verify_cpu_check # family < 6, skip
86
87 andl $0x000f00f0, %ecx # mask model and extended model
88 shrl $4, %ecx
89 cmpl $0xd, %ecx
90 jb verify_cpu_check # family == 6, model < 0xd, skip
91
92verify_cpu_clear_xd:
93 movl $MSR_IA32_MISC_ENABLE, %ecx
94 rdmsr
95 btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
96 jnc verify_cpu_check # only write MSR if bit was changed
97 wrmsr
98
99verify_cpu_check:
67 movl $0x1,%eax # Does the cpu have what it takes 100 movl $0x1,%eax # Does the cpu have what it takes
68 cpuid 101 cpuid
69 andl $REQUIRED_MASK0,%edx 102 andl $REQUIRED_MASK0,%edx
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e03530aebfd0..bf4700755184 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
69 69
70PHDRS { 70PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(7); /* RWE */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */ 74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 75#ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
116 116
117 EXCEPTION_TABLE(16) :text = 0x9090 117 EXCEPTION_TABLE(16) :text = 0x9090
118 118
119#if defined(CONFIG_DEBUG_RODATA)
120 /* .text should occupy whole number of pages */
121 . = ALIGN(PAGE_SIZE);
122#endif
119 X64_ALIGN_DEBUG_RODATA_BEGIN 123 X64_ALIGN_DEBUG_RODATA_BEGIN
120 RO_DATA(PAGE_SIZE) 124 RO_DATA(PAGE_SIZE)
121 X64_ALIGN_DEBUG_RODATA_END 125 X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
335 __bss_start = .; 339 __bss_start = .;
336 *(.bss..page_aligned) 340 *(.bss..page_aligned)
337 *(.bss) 341 *(.bss)
338 . = ALIGN(4); 342 . = ALIGN(PAGE_SIZE);
339 __bss_stop = .; 343 __bss_stop = .;
340 } 344 }
341 345