aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c19
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/amd_iommu.c16
-rw-r--r--arch/x86/kernel/amd_iommu_init.c20
-rw-r--r--arch/x86/kernel/apic/apic.c41
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c138
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h23
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c89
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c28
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c41
-rw-r--r--arch/x86/kernel/cpuid.c2
-rw-r--r--arch/x86/kernel/init_task.c2
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/setup.c11
-rw-r--r--arch/x86/kernel/setup_percpu.c12
-rw-r--r--arch/x86/kernel/smpboot.c28
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
23 files changed, 402 insertions, 96 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 488be461a380..60cc4058ed5f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled);
63int acpi_noirq; /* skip ACPI IRQ initialization */ 63int acpi_noirq; /* skip ACPI IRQ initialization */
64int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */ 64int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
65EXPORT_SYMBOL(acpi_pci_disabled); 65EXPORT_SYMBOL(acpi_pci_disabled);
66int acpi_ht __initdata = 1; /* enable HT */
67 66
68int acpi_lapic; 67int acpi_lapic;
69int acpi_ioapic; 68int acpi_ioapic;
@@ -1501,9 +1500,8 @@ void __init acpi_boot_table_init(void)
1501 1500
1502 /* 1501 /*
1503 * If acpi_disabled, bail out 1502 * If acpi_disabled, bail out
1504 * One exception: acpi=ht continues far enough to enumerate LAPICs
1505 */ 1503 */
1506 if (acpi_disabled && !acpi_ht) 1504 if (acpi_disabled)
1507 return; 1505 return;
1508 1506
1509 /* 1507 /*
@@ -1534,9 +1532,8 @@ int __init early_acpi_boot_init(void)
1534{ 1532{
1535 /* 1533 /*
1536 * If acpi_disabled, bail out 1534 * If acpi_disabled, bail out
1537 * One exception: acpi=ht continues far enough to enumerate LAPICs
1538 */ 1535 */
1539 if (acpi_disabled && !acpi_ht) 1536 if (acpi_disabled)
1540 return 1; 1537 return 1;
1541 1538
1542 /* 1539 /*
@@ -1554,9 +1551,8 @@ int __init acpi_boot_init(void)
1554 1551
1555 /* 1552 /*
1556 * If acpi_disabled, bail out 1553 * If acpi_disabled, bail out
1557 * One exception: acpi=ht continues far enough to enumerate LAPICs
1558 */ 1554 */
1559 if (acpi_disabled && !acpi_ht) 1555 if (acpi_disabled)
1560 return 1; 1556 return 1;
1561 1557
1562 acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); 1558 acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1591,21 +1587,12 @@ static int __init parse_acpi(char *arg)
1591 /* acpi=force to over-ride black-list */ 1587 /* acpi=force to over-ride black-list */
1592 else if (strcmp(arg, "force") == 0) { 1588 else if (strcmp(arg, "force") == 0) {
1593 acpi_force = 1; 1589 acpi_force = 1;
1594 acpi_ht = 1;
1595 acpi_disabled = 0; 1590 acpi_disabled = 0;
1596 } 1591 }
1597 /* acpi=strict disables out-of-spec workarounds */ 1592 /* acpi=strict disables out-of-spec workarounds */
1598 else if (strcmp(arg, "strict") == 0) { 1593 else if (strcmp(arg, "strict") == 0) {
1599 acpi_strict = 1; 1594 acpi_strict = 1;
1600 } 1595 }
1601 /* Limit ACPI just to boot-time to enable HT */
1602 else if (strcmp(arg, "ht") == 0) {
1603 if (!acpi_force) {
1604 printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n");
1605 disable_acpi();
1606 }
1607 acpi_ht = 1;
1608 }
1609 /* acpi=rsdt use RSDT instead of XSDT */ 1596 /* acpi=rsdt use RSDT instead of XSDT */
1610 else if (strcmp(arg, "rsdt") == 0) { 1597 else if (strcmp(arg, "rsdt") == 0) {
1611 acpi_rsdt_forced = 1; 1598 acpi_rsdt_forced = 1;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index f9961034e557..82e508677b91 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,8 +162,6 @@ static int __init acpi_sleep_setup(char *str)
162#endif 162#endif
163 if (strncmp(str, "old_ordering", 12) == 0) 163 if (strncmp(str, "old_ordering", 12) == 0)
164 acpi_old_suspend_ordering(); 164 acpi_old_suspend_ordering();
165 if (strncmp(str, "sci_force_enable", 16) == 0)
166 acpi_set_sci_en_on_resume();
167 str = strchr(str, ','); 165 str = strchr(str, ',');
168 if (str != NULL) 166 if (str != NULL)
169 str += strspn(str, ", \t"); 167 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 8ded418b0593..13ab720573e3 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,4 +1,4 @@
1 .section .text.page_aligned 1 .section .text..page_aligned
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/page_types.h> 4#include <asm/page_types.h>
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index fa5a1474cd18..0d20286d78c6 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1487,6 +1487,7 @@ static int __attach_device(struct device *dev,
1487 struct protection_domain *domain) 1487 struct protection_domain *domain)
1488{ 1488{
1489 struct iommu_dev_data *dev_data, *alias_data; 1489 struct iommu_dev_data *dev_data, *alias_data;
1490 int ret;
1490 1491
1491 dev_data = get_dev_data(dev); 1492 dev_data = get_dev_data(dev);
1492 alias_data = get_dev_data(dev_data->alias); 1493 alias_data = get_dev_data(dev_data->alias);
@@ -1498,13 +1499,14 @@ static int __attach_device(struct device *dev,
1498 spin_lock(&domain->lock); 1499 spin_lock(&domain->lock);
1499 1500
1500 /* Some sanity checks */ 1501 /* Some sanity checks */
1502 ret = -EBUSY;
1501 if (alias_data->domain != NULL && 1503 if (alias_data->domain != NULL &&
1502 alias_data->domain != domain) 1504 alias_data->domain != domain)
1503 return -EBUSY; 1505 goto out_unlock;
1504 1506
1505 if (dev_data->domain != NULL && 1507 if (dev_data->domain != NULL &&
1506 dev_data->domain != domain) 1508 dev_data->domain != domain)
1507 return -EBUSY; 1509 goto out_unlock;
1508 1510
1509 /* Do real assignment */ 1511 /* Do real assignment */
1510 if (dev_data->alias != dev) { 1512 if (dev_data->alias != dev) {
@@ -1520,10 +1522,14 @@ static int __attach_device(struct device *dev,
1520 1522
1521 atomic_inc(&dev_data->bind); 1523 atomic_inc(&dev_data->bind);
1522 1524
1525 ret = 0;
1526
1527out_unlock:
1528
1523 /* ready */ 1529 /* ready */
1524 spin_unlock(&domain->lock); 1530 spin_unlock(&domain->lock);
1525 1531
1526 return 0; 1532 return ret;
1527} 1533}
1528 1534
1529/* 1535/*
@@ -2324,10 +2330,6 @@ int __init amd_iommu_init_dma_ops(void)
2324 2330
2325 iommu_detected = 1; 2331 iommu_detected = 1;
2326 swiotlb = 0; 2332 swiotlb = 0;
2327#ifdef CONFIG_GART_IOMMU
2328 gart_iommu_aperture_disabled = 1;
2329 gart_iommu_aperture = 0;
2330#endif
2331 2333
2332 /* Make the driver finally visible to the drivers */ 2334 /* Make the driver finally visible to the drivers */
2333 dma_ops = &amd_iommu_dma_ops; 2335 dma_ops = &amd_iommu_dma_ops;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 3bacb4d0844c..3cc63e2b8dd4 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -287,8 +287,12 @@ static u8 * __init iommu_map_mmio_space(u64 address)
287{ 287{
288 u8 *ret; 288 u8 *ret;
289 289
290 if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) 290 if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) {
291 pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n",
292 address);
293 pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
291 return NULL; 294 return NULL;
295 }
292 296
293 ret = ioremap_nocache(address, MMIO_REGION_LENGTH); 297 ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
294 if (ret != NULL) 298 if (ret != NULL)
@@ -1314,7 +1318,7 @@ static int __init amd_iommu_init(void)
1314 ret = amd_iommu_init_dma_ops(); 1318 ret = amd_iommu_init_dma_ops();
1315 1319
1316 if (ret) 1320 if (ret)
1317 goto free; 1321 goto free_disable;
1318 1322
1319 amd_iommu_init_api(); 1323 amd_iommu_init_api();
1320 1324
@@ -1332,9 +1336,10 @@ static int __init amd_iommu_init(void)
1332out: 1336out:
1333 return ret; 1337 return ret;
1334 1338
1335free: 1339free_disable:
1336 disable_iommus(); 1340 disable_iommus();
1337 1341
1342free:
1338 amd_iommu_uninit_devices(); 1343 amd_iommu_uninit_devices();
1339 1344
1340 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1345 free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
@@ -1353,6 +1358,15 @@ free:
1353 1358
1354 free_unity_maps(); 1359 free_unity_maps();
1355 1360
1361#ifdef CONFIG_GART_IOMMU
1362 /*
1363 * We failed to initialize the AMD IOMMU - try fallback to GART
1364 * if possible.
1365 */
1366 gart_iommu_init();
1367
1368#endif
1369
1356 goto out; 1370 goto out;
1357} 1371}
1358 1372
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e5a4a1e01618..c02cc692985c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -51,6 +51,7 @@
51#include <asm/smp.h> 51#include <asm/smp.h>
52#include <asm/mce.h> 52#include <asm/mce.h>
53#include <asm/kvm_para.h> 53#include <asm/kvm_para.h>
54#include <asm/tsc.h>
54 55
55unsigned int num_processors; 56unsigned int num_processors;
56 57
@@ -1151,8 +1152,13 @@ static void __cpuinit lapic_setup_esr(void)
1151 */ 1152 */
1152void __cpuinit setup_local_APIC(void) 1153void __cpuinit setup_local_APIC(void)
1153{ 1154{
1154 unsigned int value; 1155 unsigned int value, queued;
1155 int i, j; 1156 int i, j, acked = 0;
1157 unsigned long long tsc = 0, ntsc;
1158 long long max_loops = cpu_khz;
1159
1160 if (cpu_has_tsc)
1161 rdtscll(tsc);
1156 1162
1157 if (disable_apic) { 1163 if (disable_apic) {
1158 arch_disable_smp_support(); 1164 arch_disable_smp_support();
@@ -1204,13 +1210,32 @@ void __cpuinit setup_local_APIC(void)
1204 * the interrupt. Hence a vector might get locked. It was noticed 1210 * the interrupt. Hence a vector might get locked. It was noticed
1205 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. 1211 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
1206 */ 1212 */
1207 for (i = APIC_ISR_NR - 1; i >= 0; i--) { 1213 do {
1208 value = apic_read(APIC_ISR + i*0x10); 1214 queued = 0;
1209 for (j = 31; j >= 0; j--) { 1215 for (i = APIC_ISR_NR - 1; i >= 0; i--)
1210 if (value & (1<<j)) 1216 queued |= apic_read(APIC_IRR + i*0x10);
1211 ack_APIC_irq(); 1217
1218 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
1219 value = apic_read(APIC_ISR + i*0x10);
1220 for (j = 31; j >= 0; j--) {
1221 if (value & (1<<j)) {
1222 ack_APIC_irq();
1223 acked++;
1224 }
1225 }
1212 } 1226 }
1213 } 1227 if (acked > 256) {
1228 printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
1229 acked);
1230 break;
1231 }
1232 if (cpu_has_tsc) {
1233 rdtscll(ntsc);
1234 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1235 } else
1236 max_loops--;
1237 } while (queued && max_loops > 0);
1238 WARN_ON(max_loops <= 0);
1214 1239
1215 /* 1240 /*
1216 * Now that we are all set up, enable the APIC 1241 * Now that we are all set up, enable the APIC
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc83a002786e..68e4a6f2211e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1121,9 +1121,9 @@ void __cpuinit cpu_init(void)
1121 oist = &per_cpu(orig_ist, cpu); 1121 oist = &per_cpu(orig_ist, cpu);
1122 1122
1123#ifdef CONFIG_NUMA 1123#ifdef CONFIG_NUMA
1124 if (cpu != 0 && percpu_read(node_number) == 0 && 1124 if (cpu != 0 && percpu_read(numa_node) == 0 &&
1125 cpu_to_node(cpu) != NUMA_NO_NODE) 1125 early_cpu_to_node(cpu) != NUMA_NO_NODE)
1126 percpu_write(node_number, cpu_to_node(cpu)); 1126 set_numa_node(early_cpu_to_node(cpu));
1127#endif 1127#endif
1128 1128
1129 me = current; 1129 me = current;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6f3dc8fbbfdc..7ec2123838e6 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1497,8 +1497,8 @@ static struct cpufreq_driver cpufreq_amd64_driver = {
1497 * simply keep the boost-disable flag in sync with the current global 1497 * simply keep the boost-disable flag in sync with the current global
1498 * state. 1498 * state.
1499 */ 1499 */
1500static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action, 1500static int cpb_notify(struct notifier_block *nb, unsigned long action,
1501 void *hcpu) 1501 void *hcpu)
1502{ 1502{
1503 unsigned cpu = (long)hcpu; 1503 unsigned cpu = (long)hcpu;
1504 u32 lo, hi; 1504 u32 lo, hi;
@@ -1528,7 +1528,7 @@ static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action,
1528 return NOTIFY_OK; 1528 return NOTIFY_OK;
1529} 1529}
1530 1530
1531static struct notifier_block __cpuinitdata cpb_nb = { 1531static struct notifier_block cpb_nb = {
1532 .notifier_call = cpb_notify, 1532 .notifier_call = cpb_notify,
1533}; 1533};
1534 1534
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 4ac6d48fe11b..bb34b03af252 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
7obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 7obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
8 8
9obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o 9obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
10
11obj-$(CONFIG_ACPI_APEI) += mce-apei.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
new file mode 100644
index 000000000000..745b54f9be89
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -0,0 +1,138 @@
1/*
2 * Bridge between MCE and APEI
3 *
4 * On some machine, corrected memory errors are reported via APEI
5 * generic hardware error source (GHES) instead of corrected Machine
6 * Check. These corrected memory errors can be reported to user space
7 * through /dev/mcelog via faking a corrected Machine Check, so that
8 * the error memory page can be offlined by /sbin/mcelog if the error
9 * count for one page is beyond the threshold.
10 *
11 * For fatal MCE, save MCE record into persistent storage via ERST, so
12 * that the MCE record can be logged after reboot via ERST.
13 *
14 * Copyright 2010 Intel Corp.
15 * Author: Huang Ying <ying.huang@intel.com>
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License version
19 * 2 as published by the Free Software Foundation.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 */
30
31#include <linux/kernel.h>
32#include <linux/acpi.h>
33#include <linux/cper.h>
34#include <acpi/apei.h>
35#include <asm/mce.h>
36
37#include "mce-internal.h"
38
39void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
40{
41 struct mce m;
42
43 /* Only corrected MC is reported */
44 if (!corrected)
45 return;
46
47 mce_setup(&m);
48 m.bank = 1;
49 /* Fake a memory read corrected error with unknown channel */
50 m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
51 m.addr = mem_err->physical_addr;
52 mce_log(&m);
53 mce_notify_irq();
54}
55EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
56
57#define CPER_CREATOR_MCE \
58 UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
59 0x64, 0x90, 0xb8, 0x9d)
60#define CPER_SECTION_TYPE_MCE \
61 UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
62 0x04, 0x4a, 0x38, 0xfc)
63
64/*
65 * CPER specification (in UEFI specification 2.3 appendix N) requires
66 * byte-packed.
67 */
68struct cper_mce_record {
69 struct cper_record_header hdr;
70 struct cper_section_descriptor sec_hdr;
71 struct mce mce;
72} __packed;
73
74int apei_write_mce(struct mce *m)
75{
76 struct cper_mce_record rcd;
77
78 memset(&rcd, 0, sizeof(rcd));
79 memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
80 rcd.hdr.revision = CPER_RECORD_REV;
81 rcd.hdr.signature_end = CPER_SIG_END;
82 rcd.hdr.section_count = 1;
83 rcd.hdr.error_severity = CPER_SER_FATAL;
84 /* timestamp, platform_id, partition_id are all invalid */
85 rcd.hdr.validation_bits = 0;
86 rcd.hdr.record_length = sizeof(rcd);
87 rcd.hdr.creator_id = CPER_CREATOR_MCE;
88 rcd.hdr.notification_type = CPER_NOTIFY_MCE;
89 rcd.hdr.record_id = cper_next_record_id();
90 rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
91
92 rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
93 rcd.sec_hdr.section_length = sizeof(rcd.mce);
94 rcd.sec_hdr.revision = CPER_SEC_REV;
95 /* fru_id and fru_text is invalid */
96 rcd.sec_hdr.validation_bits = 0;
97 rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
98 rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
99 rcd.sec_hdr.section_severity = CPER_SER_FATAL;
100
101 memcpy(&rcd.mce, m, sizeof(*m));
102
103 return erst_write(&rcd.hdr);
104}
105
106ssize_t apei_read_mce(struct mce *m, u64 *record_id)
107{
108 struct cper_mce_record rcd;
109 ssize_t len;
110
111 len = erst_read_next(&rcd.hdr, sizeof(rcd));
112 if (len <= 0)
113 return len;
114 /* Can not skip other records in storage via ERST unless clear them */
115 else if (len != sizeof(rcd) ||
116 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
117 if (printk_ratelimit())
118 pr_warning(
119 "MCE-APEI: Can not skip the unknown record in ERST");
120 return -EIO;
121 }
122
123 memcpy(m, &rcd.mce, sizeof(*m));
124 *record_id = rcd.hdr.record_id;
125
126 return sizeof(*m);
127}
128
129/* Check whether there is record in ERST */
130int apei_check_mce(void)
131{
132 return erst_get_record_count();
133}
134
135int apei_clear_mce(u64 record_id)
136{
137 return erst_clear(record_id);
138}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 32996f9fab67..fefcc69ee8b5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,26 @@ extern int mce_ser;
28 28
29extern struct mce_bank *mce_banks; 29extern struct mce_bank *mce_banks;
30 30
31#ifdef CONFIG_ACPI_APEI
32int apei_write_mce(struct mce *m);
33ssize_t apei_read_mce(struct mce *m, u64 *record_id);
34int apei_check_mce(void);
35int apei_clear_mce(u64 record_id);
36#else
37static inline int apei_write_mce(struct mce *m)
38{
39 return -EINVAL;
40}
41static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
42{
43 return 0;
44}
45static inline int apei_check_mce(void)
46{
47 return 0;
48}
49static inline int apei_clear_mce(u64 record_id)
50{
51 return -EINVAL;
52}
53#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a355ddcc64b..18cc42562250 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,6 +36,7 @@
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/edac_mce.h>
39 40
40#include <asm/processor.h> 41#include <asm/processor.h>
41#include <asm/hw_irq.h> 42#include <asm/hw_irq.h>
@@ -169,6 +170,15 @@ void mce_log(struct mce *mce)
169 entry = rcu_dereference_check_mce(mcelog.next); 170 entry = rcu_dereference_check_mce(mcelog.next);
170 for (;;) { 171 for (;;) {
171 /* 172 /*
173 * If edac_mce is enabled, it will check the error type
174 * and will process it, if it is a known error.
175 * Otherwise, the error will be sent through mcelog
176 * interface
177 */
178 if (edac_mce_parse(mce))
179 return;
180
181 /*
172 * When the buffer fills up discard new entries. 182 * When the buffer fills up discard new entries.
173 * Assume that the earlier errors are the more 183 * Assume that the earlier errors are the more
174 * interesting ones: 184 * interesting ones:
@@ -264,7 +274,7 @@ static void wait_for_panic(void)
264 274
265static void mce_panic(char *msg, struct mce *final, char *exp) 275static void mce_panic(char *msg, struct mce *final, char *exp)
266{ 276{
267 int i; 277 int i, apei_err = 0;
268 278
269 if (!fake_panic) { 279 if (!fake_panic) {
270 /* 280 /*
@@ -287,8 +297,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
287 struct mce *m = &mcelog.entry[i]; 297 struct mce *m = &mcelog.entry[i];
288 if (!(m->status & MCI_STATUS_VAL)) 298 if (!(m->status & MCI_STATUS_VAL))
289 continue; 299 continue;
290 if (!(m->status & MCI_STATUS_UC)) 300 if (!(m->status & MCI_STATUS_UC)) {
291 print_mce(m); 301 print_mce(m);
302 if (!apei_err)
303 apei_err = apei_write_mce(m);
304 }
292 } 305 }
293 /* Now print uncorrected but with the final one last */ 306 /* Now print uncorrected but with the final one last */
294 for (i = 0; i < MCE_LOG_LEN; i++) { 307 for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -297,11 +310,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
297 continue; 310 continue;
298 if (!(m->status & MCI_STATUS_UC)) 311 if (!(m->status & MCI_STATUS_UC))
299 continue; 312 continue;
300 if (!final || memcmp(m, final, sizeof(struct mce))) 313 if (!final || memcmp(m, final, sizeof(struct mce))) {
301 print_mce(m); 314 print_mce(m);
315 if (!apei_err)
316 apei_err = apei_write_mce(m);
317 }
302 } 318 }
303 if (final) 319 if (final) {
304 print_mce(final); 320 print_mce(final);
321 if (!apei_err)
322 apei_err = apei_write_mce(final);
323 }
305 if (cpu_missing) 324 if (cpu_missing)
306 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); 325 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
307 print_mce_tail(); 326 print_mce_tail();
@@ -1493,6 +1512,43 @@ static void collect_tscs(void *data)
1493 rdtscll(cpu_tsc[smp_processor_id()]); 1512 rdtscll(cpu_tsc[smp_processor_id()]);
1494} 1513}
1495 1514
1515static int mce_apei_read_done;
1516
1517/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
1518static int __mce_read_apei(char __user **ubuf, size_t usize)
1519{
1520 int rc;
1521 u64 record_id;
1522 struct mce m;
1523
1524 if (usize < sizeof(struct mce))
1525 return -EINVAL;
1526
1527 rc = apei_read_mce(&m, &record_id);
1528 /* Error or no more MCE record */
1529 if (rc <= 0) {
1530 mce_apei_read_done = 1;
1531 return rc;
1532 }
1533 rc = -EFAULT;
1534 if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
1535 return rc;
1536 /*
1537 * In fact, we should have cleared the record after that has
1538 * been flushed to the disk or sent to network in
1539 * /sbin/mcelog, but we have no interface to support that now,
1540 * so just clear it to avoid duplication.
1541 */
1542 rc = apei_clear_mce(record_id);
1543 if (rc) {
1544 mce_apei_read_done = 1;
1545 return rc;
1546 }
1547 *ubuf += sizeof(struct mce);
1548
1549 return 0;
1550}
1551
1496static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 1552static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1497 loff_t *off) 1553 loff_t *off)
1498{ 1554{
@@ -1506,15 +1562,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1506 return -ENOMEM; 1562 return -ENOMEM;
1507 1563
1508 mutex_lock(&mce_read_mutex); 1564 mutex_lock(&mce_read_mutex);
1565
1566 if (!mce_apei_read_done) {
1567 err = __mce_read_apei(&buf, usize);
1568 if (err || buf != ubuf)
1569 goto out;
1570 }
1571
1509 next = rcu_dereference_check_mce(mcelog.next); 1572 next = rcu_dereference_check_mce(mcelog.next);
1510 1573
1511 /* Only supports full reads right now */ 1574 /* Only supports full reads right now */
1512 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 1575 err = -EINVAL;
1513 mutex_unlock(&mce_read_mutex); 1576 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
1514 kfree(cpu_tsc); 1577 goto out;
1515
1516 return -EINVAL;
1517 }
1518 1578
1519 err = 0; 1579 err = 0;
1520 prev = 0; 1580 prev = 0;
@@ -1562,10 +1622,15 @@ timeout:
1562 memset(&mcelog.entry[i], 0, sizeof(struct mce)); 1622 memset(&mcelog.entry[i], 0, sizeof(struct mce));
1563 } 1623 }
1564 } 1624 }
1625
1626 if (err)
1627 err = -EFAULT;
1628
1629out:
1565 mutex_unlock(&mce_read_mutex); 1630 mutex_unlock(&mce_read_mutex);
1566 kfree(cpu_tsc); 1631 kfree(cpu_tsc);
1567 1632
1568 return err ? -EFAULT : buf - ubuf; 1633 return err ? err : buf - ubuf;
1569} 1634}
1570 1635
1571static unsigned int mce_poll(struct file *file, poll_table *wait) 1636static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -1573,6 +1638,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
1573 poll_wait(file, &mce_wait, wait); 1638 poll_wait(file, &mce_wait, wait);
1574 if (rcu_dereference_check_mce(mcelog.next)) 1639 if (rcu_dereference_check_mce(mcelog.next))
1575 return POLLIN | POLLRDNORM; 1640 return POLLIN | POLLRDNORM;
1641 if (!mce_apei_read_done && apei_check_mce())
1642 return POLLIN | POLLRDNORM;
1576 return 0; 1643 return 0;
1577} 1644}
1578 1645
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 81c499eceb21..e1a0a3bf9716 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -190,7 +190,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
190 mutex_unlock(&therm_cpu_lock); 190 mutex_unlock(&therm_cpu_lock);
191 break; 191 break;
192 } 192 }
193 return err ? NOTIFY_BAD : NOTIFY_OK; 193 return notifier_from_errno(err);
194} 194}
195 195
196static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = 196static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index fd4db0db3708..5db5b7d65a18 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -106,6 +106,7 @@ struct cpu_hw_events {
106 106
107 int n_events; 107 int n_events;
108 int n_added; 108 int n_added;
109 int n_txn;
109 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 110 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
110 u64 tags[X86_PMC_IDX_MAX]; 111 u64 tags[X86_PMC_IDX_MAX];
111 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 112 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
@@ -983,6 +984,7 @@ static int x86_pmu_enable(struct perf_event *event)
983out: 984out:
984 cpuc->n_events = n; 985 cpuc->n_events = n;
985 cpuc->n_added += n - n0; 986 cpuc->n_added += n - n0;
987 cpuc->n_txn += n - n0;
986 988
987 return 0; 989 return 0;
988} 990}
@@ -1089,6 +1091,14 @@ static void x86_pmu_disable(struct perf_event *event)
1089 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1091 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1090 int i; 1092 int i;
1091 1093
1094 /*
1095 * If we're called during a txn, we don't need to do anything.
1096 * The events never got scheduled and ->cancel_txn will truncate
1097 * the event_list.
1098 */
1099 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
1100 return;
1101
1092 x86_pmu_stop(event); 1102 x86_pmu_stop(event);
1093 1103
1094 for (i = 0; i < cpuc->n_events; i++) { 1104 for (i = 0; i < cpuc->n_events; i++) {
@@ -1379,6 +1389,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1379 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1389 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1380 1390
1381 cpuc->group_flag |= PERF_EVENT_TXN_STARTED; 1391 cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
1392 cpuc->n_txn = 0;
1382} 1393}
1383 1394
1384/* 1395/*
@@ -1391,6 +1402,11 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1391 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1402 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1392 1403
1393 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; 1404 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
1405 /*
1406 * Truncate the collected events.
1407 */
1408 cpuc->n_added -= cpuc->n_txn;
1409 cpuc->n_events -= cpuc->n_txn;
1394} 1410}
1395 1411
1396/* 1412/*
@@ -1419,6 +1435,12 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1419 */ 1435 */
1420 memcpy(cpuc->assign, assign, n*sizeof(int)); 1436 memcpy(cpuc->assign, assign, n*sizeof(int));
1421 1437
1438 /*
1439 * Clear out the txn count so that ->cancel_txn() which gets
1440 * run after ->commit_txn() doesn't undo things.
1441 */
1442 cpuc->n_txn = 0;
1443
1422 return 0; 1444 return 0;
1423} 1445}
1424 1446
@@ -1717,7 +1739,11 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
1717 */ 1739 */
1718 regs->bp = rewind_frame_pointer(skip + 1); 1740 regs->bp = rewind_frame_pointer(skip + 1);
1719 regs->cs = __KERNEL_CS; 1741 regs->cs = __KERNEL_CS;
1720 local_save_flags(regs->flags); 1742 /*
1743 * We abuse bit 3 to pass exact information, see perf_misc_flags
1744 * and the comment with PERF_EFLAGS_EXACT.
1745 */
1746 regs->flags = 0;
1721} 1747}
1722 1748
1723unsigned long perf_instruction_pointer(struct pt_regs *regs) 1749unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 424fc8de68e4..ae85d69644d1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -465,15 +465,21 @@ out:
465 return rc; 465 return rc;
466} 466}
467 467
468static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) 468static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
469{ 469{
470 unsigned long dummy; 470 int overflow = 0;
471 u32 low, high;
471 472
472 rdmsrl(hwc->config_base + hwc->idx, dummy); 473 rdmsr(hwc->config_base + hwc->idx, low, high);
473 if (dummy & P4_CCCR_OVF) { 474
475 /* we need to check high bit for unflagged overflows */
476 if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) {
477 overflow = 1;
474 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 478 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
475 ((u64)dummy) & ~P4_CCCR_OVF); 479 ((u64)low) & ~P4_CCCR_OVF);
476 } 480 }
481
482 return overflow;
477} 483}
478 484
479static inline void p4_pmu_disable_event(struct perf_event *event) 485static inline void p4_pmu_disable_event(struct perf_event *event)
@@ -584,21 +590,15 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
584 590
585 WARN_ON_ONCE(hwc->idx != idx); 591 WARN_ON_ONCE(hwc->idx != idx);
586 592
587 /* 593 /* it might be unflagged overflow */
588 * FIXME: Redundant call, actually not needed 594 handled = p4_pmu_clear_cccr_ovf(hwc);
589 * but just to check if we're screwed
590 */
591 p4_pmu_clear_cccr_ovf(hwc);
592 595
593 val = x86_perf_event_update(event); 596 val = x86_perf_event_update(event);
594 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 597 if (!handled && (val & (1ULL << (x86_pmu.cntval_bits - 1))))
595 continue; 598 continue;
596 599
597 /* 600 /* event overflow for sure */
598 * event overflow 601 data.period = event->hw.last_period;
599 */
600 handled = 1;
601 data.period = event->hw.last_period;
602 602
603 if (!x86_perf_event_set_period(event)) 603 if (!x86_perf_event_set_period(event))
604 continue; 604 continue;
@@ -670,7 +670,7 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
670 670
671/* 671/*
672 * ESCR address hashing is tricky, ESCRs are not sequential 672 * ESCR address hashing is tricky, ESCRs are not sequential
673 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and 673 * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and
674 * the metric between any ESCRs is laid in range [0xa0,0xe1] 674 * the metric between any ESCRs is laid in range [0xa0,0xe1]
675 * 675 *
676 * so we make ~70% filled hashtable 676 * so we make ~70% filled hashtable
@@ -735,8 +735,9 @@ static int p4_get_escr_idx(unsigned int addr)
735{ 735{
736 unsigned int idx = P4_ESCR_MSR_IDX(addr); 736 unsigned int idx = P4_ESCR_MSR_IDX(addr);
737 737
738 if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || 738 if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE ||
739 !p4_escr_table[idx])) { 739 !p4_escr_table[idx] ||
740 p4_escr_table[idx] != addr)) {
740 WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); 741 WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr);
741 return -1; 742 return -1;
742 } 743 }
@@ -762,7 +763,7 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
762{ 763{
763 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 764 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
764 unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; 765 unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)];
765 int cpu = raw_smp_processor_id(); 766 int cpu = smp_processor_id();
766 struct hw_perf_event *hwc; 767 struct hw_perf_event *hwc;
767 struct p4_event_bind *bind; 768 struct p4_event_bind *bind;
768 unsigned int i, thread, num; 769 unsigned int i, thread, num;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 8b862d5900fe..1b7b31ab7d86 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -170,7 +170,7 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
170 cpuid_device_destroy(cpu); 170 cpuid_device_destroy(cpu);
171 break; 171 break;
172 } 172 }
173 return err ? NOTIFY_BAD : NOTIFY_OK; 173 return notifier_from_errno(err);
174} 174}
175 175
176static struct notifier_block __refdata cpuid_class_cpu_notifier = 176static struct notifier_block __refdata cpuid_class_cpu_notifier =
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 3a54dcb9cd0e..43e9ccf44947 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -34,7 +34,7 @@ EXPORT_SYMBOL(init_task);
34/* 34/*
35 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 35 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
36 * no more per-task TSS's. The TSS size is kept cacheline-aligned 36 * no more per-task TSS's. The TSS size is kept cacheline-aligned
37 * so they are allowed to end up in the .data.cacheline_aligned 37 * so they are allowed to end up in the .data..cacheline_aligned
38 * section. Since TSS's are completely CPU-local, we want them 38 * section. Since TSS's are completely CPU-local, we want them
39 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 39 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
40 */ 40 */
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 4d4468e9f47c..7bf2dc4c8f70 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -230,7 +230,7 @@ static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb,
230 msr_device_destroy(cpu); 230 msr_device_destroy(cpu);
231 break; 231 break;
232 } 232 }
233 return err ? NOTIFY_BAD : NOTIFY_OK; 233 return notifier_from_errno(err);
234} 234}
235 235
236static struct notifier_block __refdata msr_class_cpu_notifier = { 236static struct notifier_block __refdata msr_class_cpu_notifier = {
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 7d2829dde20e..a5bc528d4328 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -31,8 +31,6 @@ static struct dma_map_ops swiotlb_dma_ops = {
31 .free_coherent = swiotlb_free_coherent, 31 .free_coherent = swiotlb_free_coherent,
32 .sync_single_for_cpu = swiotlb_sync_single_for_cpu, 32 .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
33 .sync_single_for_device = swiotlb_sync_single_for_device, 33 .sync_single_for_device = swiotlb_sync_single_for_device,
34 .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
35 .sync_single_range_for_device = swiotlb_sync_single_range_for_device,
36 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, 34 .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
37 .sync_sg_for_device = swiotlb_sync_sg_for_device, 35 .sync_sg_for_device = swiotlb_sync_sg_for_device,
38 .map_sg = swiotlb_map_sg_attrs, 36 .map_sg = swiotlb_map_sg_attrs,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index e8029896309a..b4ae4acbd031 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -676,6 +676,17 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
676 DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), 676 DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
677 }, 677 },
678 }, 678 },
679 /*
680 * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
681 * match on the product name.
682 */
683 {
684 .callback = dmi_low_memory_corruption,
685 .ident = "Phoenix BIOS",
686 .matches = {
687 DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
688 },
689 },
679#endif 690#endif
680 {} 691 {}
681}; 692};
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ef6370b00e70..de3b63ae3da2 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -21,12 +21,6 @@
21#include <asm/cpu.h> 21#include <asm/cpu.h>
22#include <asm/stackprotector.h> 22#include <asm/stackprotector.h>
23 23
24#ifdef CONFIG_DEBUG_PER_CPU_MAPS
25# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__)
26#else
27# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0)
28#endif
29
30DEFINE_PER_CPU(int, cpu_number); 24DEFINE_PER_CPU(int, cpu_number);
31EXPORT_PER_CPU_SYMBOL(cpu_number); 25EXPORT_PER_CPU_SYMBOL(cpu_number);
32 26
@@ -247,7 +241,7 @@ void __init setup_per_cpu_areas(void)
247#endif 241#endif
248#endif 242#endif
249 /* 243 /*
250 * Up to this point, the boot CPU has been using .data.init 244 * Up to this point, the boot CPU has been using .init.data
251 * area. Reload any changed state for the boot CPU. 245 * area. Reload any changed state for the boot CPU.
252 */ 246 */
253 if (cpu == boot_cpu_id) 247 if (cpu == boot_cpu_id)
@@ -265,10 +259,10 @@ void __init setup_per_cpu_areas(void)
265 259
266#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) 260#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
267 /* 261 /*
268 * make sure boot cpu node_number is right, when boot cpu is on the 262 * make sure boot cpu numa_node is right, when boot cpu is on the
269 * node that doesn't have mem installed 263 * node that doesn't have mem installed
270 */ 264 */
271 per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); 265 set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id));
272#endif 266#endif
273 267
274 /* Setup node to cpumask map */ 268 /* Setup node to cpumask map */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 763d815e27a0..c4f33b2e77d6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -686,7 +686,7 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
686static void __cpuinit announce_cpu(int cpu, int apicid) 686static void __cpuinit announce_cpu(int cpu, int apicid)
687{ 687{
688 static int current_node = -1; 688 static int current_node = -1;
689 int node = cpu_to_node(cpu); 689 int node = early_cpu_to_node(cpu);
690 690
691 if (system_state == SYSTEM_BOOTING) { 691 if (system_state == SYSTEM_BOOTING) {
692 if (node != current_node) { 692 if (node != current_node) {
@@ -1215,9 +1215,17 @@ __init void prefill_possible_map(void)
1215 if (!num_processors) 1215 if (!num_processors)
1216 num_processors = 1; 1216 num_processors = 1;
1217 1217
1218 if (setup_possible_cpus == -1) 1218 i = setup_max_cpus ?: 1;
1219 possible = num_processors + disabled_cpus; 1219 if (setup_possible_cpus == -1) {
1220 else 1220 possible = num_processors;
1221#ifdef CONFIG_HOTPLUG_CPU
1222 if (setup_max_cpus)
1223 possible += disabled_cpus;
1224#else
1225 if (possible > i)
1226 possible = i;
1227#endif
1228 } else
1221 possible = setup_possible_cpus; 1229 possible = setup_possible_cpus;
1222 1230
1223 total_cpus = max_t(int, possible, num_processors + disabled_cpus); 1231 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
@@ -1230,11 +1238,23 @@ __init void prefill_possible_map(void)
1230 possible = nr_cpu_ids; 1238 possible = nr_cpu_ids;
1231 } 1239 }
1232 1240
1241#ifdef CONFIG_HOTPLUG_CPU
1242 if (!setup_max_cpus)
1243#endif
1244 if (possible > i) {
1245 printk(KERN_WARNING
1246 "%d Processors exceeds max_cpus limit of %u\n",
1247 possible, setup_max_cpus);
1248 possible = i;
1249 }
1250
1233 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1251 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
1234 possible, max_t(int, possible - num_processors, 0)); 1252 possible, max_t(int, possible - num_processors, 0));
1235 1253
1236 for (i = 0; i < possible; i++) 1254 for (i = 0; i < possible; i++)
1237 set_cpu_possible(i, true); 1255 set_cpu_possible(i, true);
1256 for (; i < NR_CPUS; i++)
1257 set_cpu_possible(i, false);
1238 1258
1239 nr_cpu_ids = possible; 1259 nr_cpu_ids = possible;
1240} 1260}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 2cc249718c46..d0bb52296fa3 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -97,7 +97,7 @@ SECTIONS
97 HEAD_TEXT 97 HEAD_TEXT
98#ifdef CONFIG_X86_32 98#ifdef CONFIG_X86_32
99 . = ALIGN(PAGE_SIZE); 99 . = ALIGN(PAGE_SIZE);
100 *(.text.page_aligned) 100 *(.text..page_aligned)
101#endif 101#endif
102 . = ALIGN(8); 102 . = ALIGN(8);
103 _stext = .; 103 _stext = .;
@@ -305,7 +305,7 @@ SECTIONS
305 . = ALIGN(PAGE_SIZE); 305 . = ALIGN(PAGE_SIZE);
306 .bss : AT(ADDR(.bss) - LOAD_OFFSET) { 306 .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
307 __bss_start = .; 307 __bss_start = .;
308 *(.bss.page_aligned) 308 *(.bss..page_aligned)
309 *(.bss) 309 *(.bss)
310 . = ALIGN(4); 310 . = ALIGN(4);
311 __bss_stop = .; 311 __bss_stop = .;