aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-06-20 08:22:32 -0400
committerIngo Molnar <mingo@kernel.org>2012-06-20 08:22:34 -0400
commit6a991acceedce3ca93caef8ba7af2468c9451614 (patch)
tree1997d71fc57bdebd12fc70a73070281614b52f15 /arch/x86/kernel
parent70fb74a5420f9caa3e001d65004e4b669124283e (diff)
parent485802a6c524e62b5924849dd727ddbb1497cc71 (diff)
Merge commit 'v3.5-rc3' into x86/debug
Merge it in to pick up a fix that we are going to clean up in this branch. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c145
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c9
-rw-r--r--arch/x86/kernel/kvmclock.c5
-rw-r--r--arch/x86/kernel/nmi_selftest.c4
-rw-r--r--arch/x86/kernel/pci-dma.c3
-rw-r--r--arch/x86/kernel/reboot.c6
-rw-r--r--arch/x86/kernel/smpboot.c16
12 files changed, 146 insertions, 69 deletions
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e76c191a83..d5fd66f0d4c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,7 +20,6 @@
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/ioport.h> 21#include <linux/ioport.h>
22#include <linux/suspend.h> 22#include <linux/suspend.h>
23#include <linux/kmemleak.h>
24#include <asm/e820.h> 23#include <asm/e820.h>
25#include <asm/io.h> 24#include <asm/io.h>
26#include <asm/iommu.h> 25#include <asm/iommu.h>
@@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void)
95 return 0; 94 return 0;
96 } 95 }
97 memblock_reserve(addr, aper_size); 96 memblock_reserve(addr, aper_size);
98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping.
101 */
102 kmemleak_ignore(phys_to_virt(addr));
103 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", 97 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
104 aper_size >> 10, addr); 98 aper_size >> 10, addr);
105 insert_aperture_resource((u32)addr, aper_size); 99 insert_aperture_resource((u32)addr, aper_size);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5155d6f806f..8704918514d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1195 BUG_ON(!cfg->vector); 1195 BUG_ON(!cfg->vector);
1196 1196
1197 vector = cfg->vector; 1197 vector = cfg->vector;
1198 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) 1198 for_each_cpu(cpu, cfg->domain)
1199 per_cpu(vector_irq, cpu)[vector] = -1; 1199 per_cpu(vector_irq, cpu)[vector] = -1;
1200 1200
1201 cfg->vector = 0; 1201 cfg->vector = 0;
@@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1203 1203
1204 if (likely(!cfg->move_in_progress)) 1204 if (likely(!cfg->move_in_progress))
1205 return; 1205 return;
1206 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { 1206 for_each_cpu(cpu, cfg->old_domain) {
1207 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1207 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1208 vector++) { 1208 vector++) {
1209 if (per_cpu(vector_irq, cpu)[vector] != irq) 1209 if (per_cpu(vector_irq, cpu)[vector] != irq)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5623b4b5d51..5a5a5dc1ff1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1278,7 +1278,7 @@ static void mce_timer_fn(unsigned long data)
1278 */ 1278 */
1279 iv = __this_cpu_read(mce_next_interval); 1279 iv = __this_cpu_read(mce_next_interval);
1280 if (mce_notify_irq()) 1280 if (mce_notify_irq())
1281 iv = max(iv, (unsigned long) HZ/100); 1281 iv = max(iv / 2, (unsigned long) HZ/100);
1282 else 1282 else
1283 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); 1283 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1284 __this_cpu_write(mce_next_interval, iv); 1284 __this_cpu_write(mce_next_interval, iv);
@@ -1560,7 +1560,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1560static void __mcheck_cpu_init_timer(void) 1560static void __mcheck_cpu_init_timer(void)
1561{ 1561{
1562 struct timer_list *t = &__get_cpu_var(mce_timer); 1562 struct timer_list *t = &__get_cpu_var(mce_timer);
1563 unsigned long iv = __this_cpu_read(mce_next_interval); 1563 unsigned long iv = check_interval * HZ;
1564 1564
1565 setup_timer(t, mce_timer_fn, smp_processor_id()); 1565 setup_timer(t, mce_timer_fn, smp_processor_id());
1566 1566
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da018..c4706cf9c01 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
1496 if (!cpuc->shared_regs) 1496 if (!cpuc->shared_regs)
1497 goto error; 1497 goto error;
1498 } 1498 }
1499 cpuc->is_fake = 1;
1499 return cpuc; 1500 return cpuc;
1500error: 1501error:
1501 free_fake_cpuc(cpuc); 1502 free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1756 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 1757 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1757} 1758}
1758 1759
1760static inline int
1761valid_user_frame(const void __user *fp, unsigned long size)
1762{
1763 return (__range_not_ok(fp, size, TASK_SIZE) == 0);
1764}
1765
1759#ifdef CONFIG_COMPAT 1766#ifdef CONFIG_COMPAT
1760 1767
1761#include <asm/compat.h> 1768#include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1780 if (bytes != sizeof(frame)) 1787 if (bytes != sizeof(frame))
1781 break; 1788 break;
1782 1789
1783 if (fp < compat_ptr(regs->sp)) 1790 if (!valid_user_frame(fp, sizeof(frame)))
1784 break; 1791 break;
1785 1792
1786 perf_callchain_store(entry, frame.return_address); 1793 perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1826 if (bytes != sizeof(frame)) 1833 if (bytes != sizeof(frame))
1827 break; 1834 break;
1828 1835
1829 if ((unsigned long)fp < regs->sp) 1836 if (!valid_user_frame(fp, sizeof(frame)))
1830 break; 1837 break;
1831 1838
1832 perf_callchain_store(entry, frame.return_address); 1839 perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf5449..7241e2fc3c1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
118 118
119 unsigned int group_flag; 119 unsigned int group_flag;
120 int is_fake;
120 121
121 /* 122 /*
122 * Intel DebugStore bits 123 * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
364 int pebs_record_size; 365 int pebs_record_size;
365 void (*drain_pebs)(struct pt_regs *regs); 366 void (*drain_pebs)(struct pt_regs *regs);
366 struct event_constraint *pebs_constraints; 367 struct event_constraint *pebs_constraints;
368 void (*pebs_aliases)(struct perf_event *event);
367 369
368 /* 370 /*
369 * Intel LBR 371 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9e3f5d6e3d2..5073bf1c1d8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1121,27 +1121,33 @@ intel_bts_constraints(struct perf_event *event)
1121 return NULL; 1121 return NULL;
1122} 1122}
1123 1123
1124static bool intel_try_alt_er(struct perf_event *event, int orig_idx) 1124static int intel_alt_er(int idx)
1125{ 1125{
1126 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) 1126 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1127 return false; 1127 return idx;
1128 1128
1129 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { 1129 if (idx == EXTRA_REG_RSP_0)
1130 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1130 return EXTRA_REG_RSP_1;
1131 event->hw.config |= 0x01bb; 1131
1132 event->hw.extra_reg.idx = EXTRA_REG_RSP_1; 1132 if (idx == EXTRA_REG_RSP_1)
1133 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1133 return EXTRA_REG_RSP_0;
1134 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { 1134
1135 return idx;
1136}
1137
1138static void intel_fixup_er(struct perf_event *event, int idx)
1139{
1140 event->hw.extra_reg.idx = idx;
1141
1142 if (idx == EXTRA_REG_RSP_0) {
1135 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1143 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1136 event->hw.config |= 0x01b7; 1144 event->hw.config |= 0x01b7;
1137 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1138 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 1145 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1146 } else if (idx == EXTRA_REG_RSP_1) {
1147 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1148 event->hw.config |= 0x01bb;
1149 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1139 } 1150 }
1140
1141 if (event->hw.extra_reg.idx == orig_idx)
1142 return false;
1143
1144 return true;
1145} 1151}
1146 1152
1147/* 1153/*
@@ -1159,14 +1165,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1159 struct event_constraint *c = &emptyconstraint; 1165 struct event_constraint *c = &emptyconstraint;
1160 struct er_account *era; 1166 struct er_account *era;
1161 unsigned long flags; 1167 unsigned long flags;
1162 int orig_idx = reg->idx; 1168 int idx = reg->idx;
1163 1169
1164 /* already allocated shared msr */ 1170 /*
1165 if (reg->alloc) 1171 * reg->alloc can be set due to existing state, so for fake cpuc we
1172 * need to ignore this, otherwise we might fail to allocate proper fake
1173 * state for this extra reg constraint. Also see the comment below.
1174 */
1175 if (reg->alloc && !cpuc->is_fake)
1166 return NULL; /* call x86_get_event_constraint() */ 1176 return NULL; /* call x86_get_event_constraint() */
1167 1177
1168again: 1178again:
1169 era = &cpuc->shared_regs->regs[reg->idx]; 1179 era = &cpuc->shared_regs->regs[idx];
1170 /* 1180 /*
1171 * we use spin_lock_irqsave() to avoid lockdep issues when 1181 * we use spin_lock_irqsave() to avoid lockdep issues when
1172 * passing a fake cpuc 1182 * passing a fake cpuc
@@ -1175,6 +1185,29 @@ again:
1175 1185
1176 if (!atomic_read(&era->ref) || era->config == reg->config) { 1186 if (!atomic_read(&era->ref) || era->config == reg->config) {
1177 1187
1188 /*
1189 * If its a fake cpuc -- as per validate_{group,event}() we
1190 * shouldn't touch event state and we can avoid doing so
1191 * since both will only call get_event_constraints() once
1192 * on each event, this avoids the need for reg->alloc.
1193 *
1194 * Not doing the ER fixup will only result in era->reg being
1195 * wrong, but since we won't actually try and program hardware
1196 * this isn't a problem either.
1197 */
1198 if (!cpuc->is_fake) {
1199 if (idx != reg->idx)
1200 intel_fixup_er(event, idx);
1201
1202 /*
1203 * x86_schedule_events() can call get_event_constraints()
1204 * multiple times on events in the case of incremental
1205 * scheduling(). reg->alloc ensures we only do the ER
1206 * allocation once.
1207 */
1208 reg->alloc = 1;
1209 }
1210
1178 /* lock in msr value */ 1211 /* lock in msr value */
1179 era->config = reg->config; 1212 era->config = reg->config;
1180 era->reg = reg->reg; 1213 era->reg = reg->reg;
@@ -1182,17 +1215,17 @@ again:
1182 /* one more user */ 1215 /* one more user */
1183 atomic_inc(&era->ref); 1216 atomic_inc(&era->ref);
1184 1217
1185 /* no need to reallocate during incremental event scheduling */
1186 reg->alloc = 1;
1187
1188 /* 1218 /*
1189 * need to call x86_get_event_constraint() 1219 * need to call x86_get_event_constraint()
1190 * to check if associated event has constraints 1220 * to check if associated event has constraints
1191 */ 1221 */
1192 c = NULL; 1222 c = NULL;
1193 } else if (intel_try_alt_er(event, orig_idx)) { 1223 } else {
1194 raw_spin_unlock_irqrestore(&era->lock, flags); 1224 idx = intel_alt_er(idx);
1195 goto again; 1225 if (idx != reg->idx) {
1226 raw_spin_unlock_irqrestore(&era->lock, flags);
1227 goto again;
1228 }
1196 } 1229 }
1197 raw_spin_unlock_irqrestore(&era->lock, flags); 1230 raw_spin_unlock_irqrestore(&era->lock, flags);
1198 1231
@@ -1206,11 +1239,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1206 struct er_account *era; 1239 struct er_account *era;
1207 1240
1208 /* 1241 /*
1209 * only put constraint if extra reg was actually 1242 * Only put constraint if extra reg was actually allocated. Also takes
1210 * allocated. Also takes care of event which do 1243 * care of event which do not use an extra shared reg.
1211 * not use an extra shared reg 1244 *
1245 * Also, if this is a fake cpuc we shouldn't touch any event state
1246 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1247 * either since it'll be thrown out.
1212 */ 1248 */
1213 if (!reg->alloc) 1249 if (!reg->alloc || cpuc->is_fake)
1214 return; 1250 return;
1215 1251
1216 era = &cpuc->shared_regs->regs[reg->idx]; 1252 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1302,15 +1338,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1302 intel_put_shared_regs_event_constraints(cpuc, event); 1338 intel_put_shared_regs_event_constraints(cpuc, event);
1303} 1339}
1304 1340
1305static int intel_pmu_hw_config(struct perf_event *event) 1341static void intel_pebs_aliases_core2(struct perf_event *event)
1306{ 1342{
1307 int ret = x86_pmu_hw_config(event); 1343 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1308
1309 if (ret)
1310 return ret;
1311
1312 if (event->attr.precise_ip &&
1313 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1314 /* 1344 /*
1315 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 1345 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1316 * (0x003c) so that we can use it with PEBS. 1346 * (0x003c) so that we can use it with PEBS.
@@ -1331,10 +1361,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
1331 */ 1361 */
1332 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 1362 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1333 1363
1364 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1365 event->hw.config = alt_config;
1366 }
1367}
1368
1369static void intel_pebs_aliases_snb(struct perf_event *event)
1370{
1371 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1372 /*
1373 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1374 * (0x003c) so that we can use it with PEBS.
1375 *
1376 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1377 * PEBS capable. However we can use UOPS_RETIRED.ALL
1378 * (0x01c2), which is a PEBS capable event, to get the same
1379 * count.
1380 *
1381 * UOPS_RETIRED.ALL counts the number of cycles that retires
1382 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1383 * larger than the maximum number of micro-ops that can be
1384 * retired per cycle (4) and then inverting the condition, we
1385 * count all cycles that retire 16 or less micro-ops, which
1386 * is every cycle.
1387 *
1388 * Thereby we gain a PEBS capable cycle counter.
1389 */
1390 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1334 1391
1335 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1392 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1336 event->hw.config = alt_config; 1393 event->hw.config = alt_config;
1337 } 1394 }
1395}
1396
1397static int intel_pmu_hw_config(struct perf_event *event)
1398{
1399 int ret = x86_pmu_hw_config(event);
1400
1401 if (ret)
1402 return ret;
1403
1404 if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1405 x86_pmu.pebs_aliases(event);
1338 1406
1339 if (intel_pmu_needs_lbr_smpl(event)) { 1407 if (intel_pmu_needs_lbr_smpl(event)) {
1340 ret = intel_pmu_setup_lbr_filter(event); 1408 ret = intel_pmu_setup_lbr_filter(event);
@@ -1609,6 +1677,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1609 .max_period = (1ULL << 31) - 1, 1677 .max_period = (1ULL << 31) - 1,
1610 .get_event_constraints = intel_get_event_constraints, 1678 .get_event_constraints = intel_get_event_constraints,
1611 .put_event_constraints = intel_put_event_constraints, 1679 .put_event_constraints = intel_put_event_constraints,
1680 .pebs_aliases = intel_pebs_aliases_core2,
1612 1681
1613 .format_attrs = intel_arch3_formats_attr, 1682 .format_attrs = intel_arch3_formats_attr,
1614 1683
@@ -1842,8 +1911,9 @@ __init int intel_pmu_init(void)
1842 break; 1911 break;
1843 1912
1844 case 42: /* SandyBridge */ 1913 case 42: /* SandyBridge */
1845 x86_add_quirk(intel_sandybridge_quirk);
1846 case 45: /* SandyBridge, "Romely-EP" */ 1914 case 45: /* SandyBridge, "Romely-EP" */
1915 x86_add_quirk(intel_sandybridge_quirk);
1916 case 58: /* IvyBridge */
1847 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1917 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1848 sizeof(hw_cache_event_ids)); 1918 sizeof(hw_cache_event_ids));
1849 1919
@@ -1851,6 +1921,7 @@ __init int intel_pmu_init(void)
1851 1921
1852 x86_pmu.event_constraints = intel_snb_event_constraints; 1922 x86_pmu.event_constraints = intel_snb_event_constraints;
1853 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1923 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1924 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
1854 x86_pmu.extra_regs = intel_snb_extra_regs; 1925 x86_pmu.extra_regs = intel_snb_extra_regs;
1855 /* all extra regs are per-cpu when HT is on */ 1926 /* all extra regs are per-cpu when HT is on */
1856 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1927 x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc27f6e..35e2192df9f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
403 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ 403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
405 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
406 INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
407 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
408 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
409 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
410 INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
411 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
412 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
413 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ 406 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 086eb58c6e8..f1b42b3a186 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -120,11 +120,6 @@ bool kvm_check_and_clear_guest_paused(void)
120 bool ret = false; 120 bool ret = false;
121 struct pvclock_vcpu_time_info *src; 121 struct pvclock_vcpu_time_info *src;
122 122
123 /*
124 * per_cpu() is safe here because this function is only called from
125 * timer functions where preemption is already disabled.
126 */
127 WARN_ON(!in_atomic());
128 src = &__get_cpu_var(hv_clock); 123 src = &__get_cpu_var(hv_clock);
129 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 124 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
130 __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); 125 __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index e31bf8d5c4d..149b8d9c6ad 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
42static void __init init_nmi_testsuite(void) 42static void __init init_nmi_testsuite(void)
43{ 43{
44 /* trap all the unknown NMIs we may generate */ 44 /* trap all the unknown NMIs we may generate */
45 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); 45 register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
46} 46}
47 47
48static void __init cleanup_nmi_testsuite(void) 48static void __init cleanup_nmi_testsuite(void)
@@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
64{ 64{
65 unsigned long timeout; 65 unsigned long timeout;
66 66
67 if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, 67 if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
68 NMI_FLAG_FIRST, "nmi_selftest")) { 68 NMI_FLAG_FIRST, "nmi_selftest")) {
69 nmi_fail = FAILURE; 69 nmi_fail = FAILURE;
70 return; 70 return;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 62c9457ccd2..c0f420f76cd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,7 +100,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
100 struct dma_attrs *attrs) 100 struct dma_attrs *attrs)
101{ 101{
102 unsigned long dma_mask; 102 unsigned long dma_mask;
103 struct page *page = NULL; 103 struct page *page;
104 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 104 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
105 dma_addr_t addr; 105 dma_addr_t addr;
106 106
@@ -108,6 +108,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
108 108
109 flag |= __GFP_ZERO; 109 flag |= __GFP_ZERO;
110again: 110again:
111 page = NULL;
111 if (!(flag & GFP_ATOMIC)) 112 if (!(flag & GFP_ATOMIC))
112 page = dma_alloc_from_contiguous(dev, count, get_order(size)); 113 page = dma_alloc_from_contiguous(dev, count, get_order(size));
113 if (!page) 114 if (!page)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ab3f0626071..10ae9be07b4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -643,9 +643,11 @@ void native_machine_shutdown(void)
643 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); 643 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
644 644
645 /* 645 /*
646 * O.K Now that I'm on the appropriate processor, 646 * O.K Now that I'm on the appropriate processor, stop all of the
647 * stop all of the others. 647 * others. Also disable the local irq to not receive the per-cpu
648 * timer interrupt which may trigger scheduler's load balance.
648 */ 649 */
650 local_irq_disable();
649 stop_other_cpus(); 651 stop_other_cpus();
650#endif 652#endif
651 653
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 456d64806c8..54e938de713 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -351,9 +351,12 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
351 351
352static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) 352static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
353{ 353{
354 if (c->phys_proc_id == o->phys_proc_id) 354 if (c->phys_proc_id == o->phys_proc_id) {
355 return topology_sane(c, o, "mc"); 355 if (cpu_has(c, X86_FEATURE_AMD_DCM))
356 return true;
356 357
358 return topology_sane(c, o, "mc");
359 }
357 return false; 360 return false;
358} 361}
359 362
@@ -384,6 +387,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
384 if ((i == cpu) || (has_mc && match_llc(c, o))) 387 if ((i == cpu) || (has_mc && match_llc(c, o)))
385 link_mask(llc_shared, cpu, i); 388 link_mask(llc_shared, cpu, i);
386 389
390 }
391
392 /*
393 * This needs a separate iteration over the cpus because we rely on all
394 * cpu_sibling_mask links to be set-up.
395 */
396 for_each_cpu(i, cpu_sibling_setup_mask) {
397 o = &cpu_data(i);
398
387 if ((i == cpu) || (has_mc && match_mc(c, o))) { 399 if ((i == cpu) || (has_mc && match_mc(c, o))) {
388 link_mask(core, cpu, i); 400 link_mask(core, cpu, i);
389 401