aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/aperture_64.c6
-rw-r--r--arch/x86/kernel/apic/io_apic.c4
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c37
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c145
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c9
-rw-r--r--arch/x86/kernel/entry_32.S13
-rw-r--r--arch/x86/kernel/entry_64.S44
-rw-r--r--arch/x86/kernel/ftrace.c102
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/kvmclock.c5
-rw-r--r--arch/x86/kernel/nmi.c6
-rw-r--r--arch/x86/kernel/nmi_selftest.c4
-rw-r--r--arch/x86/kernel/pci-dma.c3
-rw-r--r--arch/x86/kernel/ptrace.c6
-rw-r--r--arch/x86/kernel/reboot.c6
-rw-r--r--arch/x86/kernel/signal.c62
-rw-r--r--arch/x86/kernel/smpboot.c26
-rw-r--r--arch/x86/kernel/traps.c8
22 files changed, 342 insertions, 169 deletions
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e76c191a835..d5fd66f0d4cd 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,7 +20,6 @@
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/ioport.h> 21#include <linux/ioport.h>
22#include <linux/suspend.h> 22#include <linux/suspend.h>
23#include <linux/kmemleak.h>
24#include <asm/e820.h> 23#include <asm/e820.h>
25#include <asm/io.h> 24#include <asm/io.h>
26#include <asm/iommu.h> 25#include <asm/iommu.h>
@@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void)
95 return 0; 94 return 0;
96 } 95 }
97 memblock_reserve(addr, aper_size); 96 memblock_reserve(addr, aper_size);
98 /*
99 * Kmemleak should not scan this block as it may not be mapped via the
100 * kernel direct mapping.
101 */
102 kmemleak_ignore(phys_to_virt(addr));
103 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", 97 printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
104 aper_size >> 10, addr); 98 aper_size >> 10, addr);
105 insert_aperture_resource((u32)addr, aper_size); 99 insert_aperture_resource((u32)addr, aper_size);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ac96561d1a99..5f0ff597437c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1195 BUG_ON(!cfg->vector); 1195 BUG_ON(!cfg->vector);
1196 1196
1197 vector = cfg->vector; 1197 vector = cfg->vector;
1198 for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) 1198 for_each_cpu(cpu, cfg->domain)
1199 per_cpu(vector_irq, cpu)[vector] = -1; 1199 per_cpu(vector_irq, cpu)[vector] = -1;
1200 1200
1201 cfg->vector = 0; 1201 cfg->vector = 0;
@@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1203 1203
1204 if (likely(!cfg->move_in_progress)) 1204 if (likely(!cfg->move_in_progress))
1205 return; 1205 return;
1206 for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { 1206 for_each_cpu(cpu, cfg->old_domain) {
1207 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; 1207 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
1208 vector++) { 1208 vector++) {
1209 if (per_cpu(vector_irq, cpu)[vector] != irq) 1209 if (per_cpu(vector_irq, cpu)[vector] != irq)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 82f29e70d058..6b9333b429ba 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr)
1101 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1101 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
1102} 1102}
1103 1103
1104static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
1105
1104void debug_stack_set_zero(void) 1106void debug_stack_set_zero(void)
1105{ 1107{
1108 this_cpu_inc(debug_stack_use_ctr);
1106 load_idt((const struct desc_ptr *)&nmi_idt_descr); 1109 load_idt((const struct desc_ptr *)&nmi_idt_descr);
1107} 1110}
1108 1111
1109void debug_stack_reset(void) 1112void debug_stack_reset(void)
1110{ 1113{
1111 load_idt((const struct desc_ptr *)&idt_descr); 1114 if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
1115 return;
1116 if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
1117 load_idt((const struct desc_ptr *)&idt_descr);
1112} 1118}
1113 1119
1114#else /* CONFIG_X86_64 */ 1120#else /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b772dd6ad450..da27c5d2168a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1251,15 +1251,15 @@ void mce_log_therm_throt_event(__u64 status)
1251 * poller finds an MCE, poll 2x faster. When the poller finds no more 1251 * poller finds an MCE, poll 2x faster. When the poller finds no more
1252 * errors, poll 2x slower (up to check_interval seconds). 1252 * errors, poll 2x slower (up to check_interval seconds).
1253 */ 1253 */
1254static int check_interval = 5 * 60; /* 5 minutes */ 1254static unsigned long check_interval = 5 * 60; /* 5 minutes */
1255 1255
1256static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ 1256static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
1257static DEFINE_PER_CPU(struct timer_list, mce_timer); 1257static DEFINE_PER_CPU(struct timer_list, mce_timer);
1258 1258
1259static void mce_start_timer(unsigned long data) 1259static void mce_timer_fn(unsigned long data)
1260{ 1260{
1261 struct timer_list *t = &per_cpu(mce_timer, data); 1261 struct timer_list *t = &__get_cpu_var(mce_timer);
1262 int *n; 1262 unsigned long iv;
1263 1263
1264 WARN_ON(smp_processor_id() != data); 1264 WARN_ON(smp_processor_id() != data);
1265 1265
@@ -1272,13 +1272,14 @@ static void mce_start_timer(unsigned long data)
1272 * Alert userspace if needed. If we logged an MCE, reduce the 1272 * Alert userspace if needed. If we logged an MCE, reduce the
1273 * polling interval, otherwise increase the polling interval. 1273 * polling interval, otherwise increase the polling interval.
1274 */ 1274 */
1275 n = &__get_cpu_var(mce_next_interval); 1275 iv = __this_cpu_read(mce_next_interval);
1276 if (mce_notify_irq()) 1276 if (mce_notify_irq())
1277 *n = max(*n/2, HZ/100); 1277 iv = max(iv / 2, (unsigned long) HZ/100);
1278 else 1278 else
1279 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 1279 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1280 __this_cpu_write(mce_next_interval, iv);
1280 1281
1281 t->expires = jiffies + *n; 1282 t->expires = jiffies + iv;
1282 add_timer_on(t, smp_processor_id()); 1283 add_timer_on(t, smp_processor_id());
1283} 1284}
1284 1285
@@ -1472,9 +1473,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1472 rdmsrl(msrs[i], val); 1473 rdmsrl(msrs[i], val);
1473 1474
1474 /* CntP bit set? */ 1475 /* CntP bit set? */
1475 if (val & BIT(62)) { 1476 if (val & BIT_64(62)) {
1476 val &= ~BIT(62); 1477 val &= ~BIT_64(62);
1477 wrmsrl(msrs[i], val); 1478 wrmsrl(msrs[i], val);
1478 } 1479 }
1479 } 1480 }
1480 1481
@@ -1556,17 +1557,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1556static void __mcheck_cpu_init_timer(void) 1557static void __mcheck_cpu_init_timer(void)
1557{ 1558{
1558 struct timer_list *t = &__get_cpu_var(mce_timer); 1559 struct timer_list *t = &__get_cpu_var(mce_timer);
1559 int *n = &__get_cpu_var(mce_next_interval); 1560 unsigned long iv = check_interval * HZ;
1560 1561
1561 setup_timer(t, mce_start_timer, smp_processor_id()); 1562 setup_timer(t, mce_timer_fn, smp_processor_id());
1562 1563
1563 if (mce_ignore_ce) 1564 if (mce_ignore_ce)
1564 return; 1565 return;
1565 1566
1566 *n = check_interval * HZ; 1567 __this_cpu_write(mce_next_interval, iv);
1567 if (!*n) 1568 if (!iv)
1568 return; 1569 return;
1569 t->expires = round_jiffies(jiffies + *n); 1570 t->expires = round_jiffies(jiffies + iv);
1570 add_timer_on(t, smp_processor_id()); 1571 add_timer_on(t, smp_processor_id());
1571} 1572}
1572 1573
@@ -2276,7 +2277,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2276 case CPU_DOWN_FAILED_FROZEN: 2277 case CPU_DOWN_FAILED_FROZEN:
2277 if (!mce_ignore_ce && check_interval) { 2278 if (!mce_ignore_ce && check_interval) {
2278 t->expires = round_jiffies(jiffies + 2279 t->expires = round_jiffies(jiffies +
2279 __get_cpu_var(mce_next_interval)); 2280 per_cpu(mce_next_interval, cpu));
2280 add_timer_on(t, cpu); 2281 add_timer_on(t, cpu);
2281 } 2282 }
2282 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2283 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ac140c7be396..bdda2e6c673b 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -266,7 +266,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
266 if (align > max_align) 266 if (align > max_align)
267 align = max_align; 267 align = max_align;
268 268
269 sizek = 1 << align; 269 sizek = 1UL << align;
270 if (debug_print) { 270 if (debug_print) {
271 char start_factor = 'K', size_factor = 'K'; 271 char start_factor = 'K', size_factor = 'K';
272 unsigned long start_base, size_base; 272 unsigned long start_base, size_base;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da0183..c4706cf9c011 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
1496 if (!cpuc->shared_regs) 1496 if (!cpuc->shared_regs)
1497 goto error; 1497 goto error;
1498 } 1498 }
1499 cpuc->is_fake = 1;
1499 return cpuc; 1500 return cpuc;
1500error: 1501error:
1501 free_fake_cpuc(cpuc); 1502 free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1756 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 1757 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1757} 1758}
1758 1759
1760static inline int
1761valid_user_frame(const void __user *fp, unsigned long size)
1762{
1763 return (__range_not_ok(fp, size, TASK_SIZE) == 0);
1764}
1765
1759#ifdef CONFIG_COMPAT 1766#ifdef CONFIG_COMPAT
1760 1767
1761#include <asm/compat.h> 1768#include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1780 if (bytes != sizeof(frame)) 1787 if (bytes != sizeof(frame))
1781 break; 1788 break;
1782 1789
1783 if (fp < compat_ptr(regs->sp)) 1790 if (!valid_user_frame(fp, sizeof(frame)))
1784 break; 1791 break;
1785 1792
1786 perf_callchain_store(entry, frame.return_address); 1793 perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1826 if (bytes != sizeof(frame)) 1833 if (bytes != sizeof(frame))
1827 break; 1834 break;
1828 1835
1829 if ((unsigned long)fp < regs->sp) 1836 if (!valid_user_frame(fp, sizeof(frame)))
1830 break; 1837 break;
1831 1838
1832 perf_callchain_store(entry, frame.return_address); 1839 perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf54493..7241e2fc3c17 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
118 118
119 unsigned int group_flag; 119 unsigned int group_flag;
120 int is_fake;
120 121
121 /* 122 /*
122 * Intel DebugStore bits 123 * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
364 int pebs_record_size; 365 int pebs_record_size;
365 void (*drain_pebs)(struct pt_regs *regs); 366 void (*drain_pebs)(struct pt_regs *regs);
366 struct event_constraint *pebs_constraints; 367 struct event_constraint *pebs_constraints;
368 void (*pebs_aliases)(struct perf_event *event);
367 369
368 /* 370 /*
369 * Intel LBR 371 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..187c294bc658 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
1119 return NULL; 1119 return NULL;
1120} 1120}
1121 1121
1122static bool intel_try_alt_er(struct perf_event *event, int orig_idx) 1122static int intel_alt_er(int idx)
1123{ 1123{
1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) 1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1125 return false; 1125 return idx;
1126 1126
1127 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { 1127 if (idx == EXTRA_REG_RSP_0)
1128 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1128 return EXTRA_REG_RSP_1;
1129 event->hw.config |= 0x01bb; 1129
1130 event->hw.extra_reg.idx = EXTRA_REG_RSP_1; 1130 if (idx == EXTRA_REG_RSP_1)
1131 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1131 return EXTRA_REG_RSP_0;
1132 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { 1132
1133 return idx;
1134}
1135
1136static void intel_fixup_er(struct perf_event *event, int idx)
1137{
1138 event->hw.extra_reg.idx = idx;
1139
1140 if (idx == EXTRA_REG_RSP_0) {
1133 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1141 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1134 event->hw.config |= 0x01b7; 1142 event->hw.config |= 0x01b7;
1135 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1136 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 1143 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1144 } else if (idx == EXTRA_REG_RSP_1) {
1145 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1146 event->hw.config |= 0x01bb;
1147 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1137 } 1148 }
1138
1139 if (event->hw.extra_reg.idx == orig_idx)
1140 return false;
1141
1142 return true;
1143} 1149}
1144 1150
1145/* 1151/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1157 struct event_constraint *c = &emptyconstraint; 1163 struct event_constraint *c = &emptyconstraint;
1158 struct er_account *era; 1164 struct er_account *era;
1159 unsigned long flags; 1165 unsigned long flags;
1160 int orig_idx = reg->idx; 1166 int idx = reg->idx;
1161 1167
1162 /* already allocated shared msr */ 1168 /*
1163 if (reg->alloc) 1169 * reg->alloc can be set due to existing state, so for fake cpuc we
1170 * need to ignore this, otherwise we might fail to allocate proper fake
1171 * state for this extra reg constraint. Also see the comment below.
1172 */
1173 if (reg->alloc && !cpuc->is_fake)
1164 return NULL; /* call x86_get_event_constraint() */ 1174 return NULL; /* call x86_get_event_constraint() */
1165 1175
1166again: 1176again:
1167 era = &cpuc->shared_regs->regs[reg->idx]; 1177 era = &cpuc->shared_regs->regs[idx];
1168 /* 1178 /*
1169 * we use spin_lock_irqsave() to avoid lockdep issues when 1179 * we use spin_lock_irqsave() to avoid lockdep issues when
1170 * passing a fake cpuc 1180 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
1173 1183
1174 if (!atomic_read(&era->ref) || era->config == reg->config) { 1184 if (!atomic_read(&era->ref) || era->config == reg->config) {
1175 1185
1186 /*
1187 * If its a fake cpuc -- as per validate_{group,event}() we
1188 * shouldn't touch event state and we can avoid doing so
1189 * since both will only call get_event_constraints() once
1190 * on each event, this avoids the need for reg->alloc.
1191 *
1192 * Not doing the ER fixup will only result in era->reg being
1193 * wrong, but since we won't actually try and program hardware
1194 * this isn't a problem either.
1195 */
1196 if (!cpuc->is_fake) {
1197 if (idx != reg->idx)
1198 intel_fixup_er(event, idx);
1199
1200 /*
1201 * x86_schedule_events() can call get_event_constraints()
1202 * multiple times on events in the case of incremental
1203 * scheduling(). reg->alloc ensures we only do the ER
1204 * allocation once.
1205 */
1206 reg->alloc = 1;
1207 }
1208
1176 /* lock in msr value */ 1209 /* lock in msr value */
1177 era->config = reg->config; 1210 era->config = reg->config;
1178 era->reg = reg->reg; 1211 era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
1180 /* one more user */ 1213 /* one more user */
1181 atomic_inc(&era->ref); 1214 atomic_inc(&era->ref);
1182 1215
1183 /* no need to reallocate during incremental event scheduling */
1184 reg->alloc = 1;
1185
1186 /* 1216 /*
1187 * need to call x86_get_event_constraint() 1217 * need to call x86_get_event_constraint()
1188 * to check if associated event has constraints 1218 * to check if associated event has constraints
1189 */ 1219 */
1190 c = NULL; 1220 c = NULL;
1191 } else if (intel_try_alt_er(event, orig_idx)) { 1221 } else {
1192 raw_spin_unlock_irqrestore(&era->lock, flags); 1222 idx = intel_alt_er(idx);
1193 goto again; 1223 if (idx != reg->idx) {
1224 raw_spin_unlock_irqrestore(&era->lock, flags);
1225 goto again;
1226 }
1194 } 1227 }
1195 raw_spin_unlock_irqrestore(&era->lock, flags); 1228 raw_spin_unlock_irqrestore(&era->lock, flags);
1196 1229
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1204 struct er_account *era; 1237 struct er_account *era;
1205 1238
1206 /* 1239 /*
1207 * only put constraint if extra reg was actually 1240 * Only put constraint if extra reg was actually allocated. Also takes
1208 * allocated. Also takes care of event which do 1241 * care of event which do not use an extra shared reg.
1209 * not use an extra shared reg 1242 *
1243 * Also, if this is a fake cpuc we shouldn't touch any event state
1244 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1245 * either since it'll be thrown out.
1210 */ 1246 */
1211 if (!reg->alloc) 1247 if (!reg->alloc || cpuc->is_fake)
1212 return; 1248 return;
1213 1249
1214 era = &cpuc->shared_regs->regs[reg->idx]; 1250 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1300 intel_put_shared_regs_event_constraints(cpuc, event); 1336 intel_put_shared_regs_event_constraints(cpuc, event);
1301} 1337}
1302 1338
1303static int intel_pmu_hw_config(struct perf_event *event) 1339static void intel_pebs_aliases_core2(struct perf_event *event)
1304{ 1340{
1305 int ret = x86_pmu_hw_config(event); 1341 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1306
1307 if (ret)
1308 return ret;
1309
1310 if (event->attr.precise_ip &&
1311 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1312 /* 1342 /*
1313 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 1343 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1314 * (0x003c) so that we can use it with PEBS. 1344 * (0x003c) so that we can use it with PEBS.
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
1329 */ 1359 */
1330 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 1360 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1331 1361
1362 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1363 event->hw.config = alt_config;
1364 }
1365}
1366
1367static void intel_pebs_aliases_snb(struct perf_event *event)
1368{
1369 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1370 /*
1371 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1372 * (0x003c) so that we can use it with PEBS.
1373 *
1374 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1375 * PEBS capable. However we can use UOPS_RETIRED.ALL
1376 * (0x01c2), which is a PEBS capable event, to get the same
1377 * count.
1378 *
1379 * UOPS_RETIRED.ALL counts the number of cycles that retires
1380 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1381 * larger than the maximum number of micro-ops that can be
1382 * retired per cycle (4) and then inverting the condition, we
1383 * count all cycles that retire 16 or less micro-ops, which
1384 * is every cycle.
1385 *
1386 * Thereby we gain a PEBS capable cycle counter.
1387 */
1388 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1332 1389
1333 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1390 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1334 event->hw.config = alt_config; 1391 event->hw.config = alt_config;
1335 } 1392 }
1393}
1394
1395static int intel_pmu_hw_config(struct perf_event *event)
1396{
1397 int ret = x86_pmu_hw_config(event);
1398
1399 if (ret)
1400 return ret;
1401
1402 if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1403 x86_pmu.pebs_aliases(event);
1336 1404
1337 if (intel_pmu_needs_lbr_smpl(event)) { 1405 if (intel_pmu_needs_lbr_smpl(event)) {
1338 ret = intel_pmu_setup_lbr_filter(event); 1406 ret = intel_pmu_setup_lbr_filter(event);
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1607 .max_period = (1ULL << 31) - 1, 1675 .max_period = (1ULL << 31) - 1,
1608 .get_event_constraints = intel_get_event_constraints, 1676 .get_event_constraints = intel_get_event_constraints,
1609 .put_event_constraints = intel_put_event_constraints, 1677 .put_event_constraints = intel_put_event_constraints,
1678 .pebs_aliases = intel_pebs_aliases_core2,
1610 1679
1611 .format_attrs = intel_arch3_formats_attr, 1680 .format_attrs = intel_arch3_formats_attr,
1612 1681
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
1840 break; 1909 break;
1841 1910
1842 case 42: /* SandyBridge */ 1911 case 42: /* SandyBridge */
1843 x86_add_quirk(intel_sandybridge_quirk);
1844 case 45: /* SandyBridge, "Romely-EP" */ 1912 case 45: /* SandyBridge, "Romely-EP" */
1913 x86_add_quirk(intel_sandybridge_quirk);
1914 case 58: /* IvyBridge */
1845 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1915 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1846 sizeof(hw_cache_event_ids)); 1916 sizeof(hw_cache_event_ids));
1847 1917
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)
1849 1919
1850 x86_pmu.event_constraints = intel_snb_event_constraints; 1920 x86_pmu.event_constraints = intel_snb_event_constraints;
1851 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1921 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1922 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
1852 x86_pmu.extra_regs = intel_snb_extra_regs; 1923 x86_pmu.extra_regs = intel_snb_extra_regs;
1853 /* all extra regs are per-cpu when HT is on */ 1924 /* all extra regs are per-cpu when HT is on */
1854 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1925 x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc27f6e5..35e2192df9f4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
403 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ 403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
405 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
406 INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
407 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
408 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
409 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
410 INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
411 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
412 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
413 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ 406 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 01ccf9b71473..623f28837476 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -316,7 +316,6 @@ ret_from_exception:
316 preempt_stop(CLBR_ANY) 316 preempt_stop(CLBR_ANY)
317ret_from_intr: 317ret_from_intr:
318 GET_THREAD_INFO(%ebp) 318 GET_THREAD_INFO(%ebp)
319resume_userspace_sig:
320#ifdef CONFIG_VM86 319#ifdef CONFIG_VM86
321 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 320 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
322 movb PT_CS(%esp), %al 321 movb PT_CS(%esp), %al
@@ -615,9 +614,13 @@ work_notifysig: # deal with pending signals and
615 # vm86-space 614 # vm86-space
616 TRACE_IRQS_ON 615 TRACE_IRQS_ON
617 ENABLE_INTERRUPTS(CLBR_NONE) 616 ENABLE_INTERRUPTS(CLBR_NONE)
617 movb PT_CS(%esp), %bl
618 andb $SEGMENT_RPL_MASK, %bl
619 cmpb $USER_RPL, %bl
620 jb resume_kernel
618 xorl %edx, %edx 621 xorl %edx, %edx
619 call do_notify_resume 622 call do_notify_resume
620 jmp resume_userspace_sig 623 jmp resume_userspace
621 624
622 ALIGN 625 ALIGN
623work_notifysig_v86: 626work_notifysig_v86:
@@ -630,9 +633,13 @@ work_notifysig_v86:
630#endif 633#endif
631 TRACE_IRQS_ON 634 TRACE_IRQS_ON
632 ENABLE_INTERRUPTS(CLBR_NONE) 635 ENABLE_INTERRUPTS(CLBR_NONE)
636 movb PT_CS(%esp), %bl
637 andb $SEGMENT_RPL_MASK, %bl
638 cmpb $USER_RPL, %bl
639 jb resume_kernel
633 xorl %edx, %edx 640 xorl %edx, %edx
634 call do_notify_resume 641 call do_notify_resume
635 jmp resume_userspace_sig 642 jmp resume_userspace
636END(work_pending) 643END(work_pending)
637 644
638 # perform syscall exit tracing 645 # perform syscall exit tracing
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 320852d02026..7d65133b51be 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64)
191.endm 191.endm
192 192
193/* 193/*
194 * When dynamic function tracer is enabled it will add a breakpoint
195 * to all locations that it is about to modify, sync CPUs, update
196 * all the code, sync CPUs, then remove the breakpoints. In this time
197 * if lockdep is enabled, it might jump back into the debug handler
198 * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
199 *
200 * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
201 * make sure the stack pointer does not get reset back to the top
202 * of the debug stack, and instead just reuses the current stack.
203 */
204#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
205
206.macro TRACE_IRQS_OFF_DEBUG
207 call debug_stack_set_zero
208 TRACE_IRQS_OFF
209 call debug_stack_reset
210.endm
211
212.macro TRACE_IRQS_ON_DEBUG
213 call debug_stack_set_zero
214 TRACE_IRQS_ON
215 call debug_stack_reset
216.endm
217
218.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
219 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
220 jnc 1f
221 TRACE_IRQS_ON_DEBUG
2221:
223.endm
224
225#else
226# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
227# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
228# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
229#endif
230
231/*
194 * C code is not supposed to know about undefined top of stack. Every time 232 * C code is not supposed to know about undefined top of stack. Every time
195 * a C function with an pt_regs argument is called from the SYSCALL based 233 * a C function with an pt_regs argument is called from the SYSCALL based
196 * fast path FIXUP_TOP_OF_STACK is needed. 234 * fast path FIXUP_TOP_OF_STACK is needed.
@@ -1098,7 +1136,7 @@ ENTRY(\sym)
1098 subq $ORIG_RAX-R15, %rsp 1136 subq $ORIG_RAX-R15, %rsp
1099 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1137 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1100 call save_paranoid 1138 call save_paranoid
1101 TRACE_IRQS_OFF 1139 TRACE_IRQS_OFF_DEBUG
1102 movq %rsp,%rdi /* pt_regs pointer */ 1140 movq %rsp,%rdi /* pt_regs pointer */
1103 xorl %esi,%esi /* no error code */ 1141 xorl %esi,%esi /* no error code */
1104 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) 1142 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
1393ENTRY(paranoid_exit) 1431ENTRY(paranoid_exit)
1394 DEFAULT_FRAME 1432 DEFAULT_FRAME
1395 DISABLE_INTERRUPTS(CLBR_NONE) 1433 DISABLE_INTERRUPTS(CLBR_NONE)
1396 TRACE_IRQS_OFF 1434 TRACE_IRQS_OFF_DEBUG
1397 testl %ebx,%ebx /* swapgs needed? */ 1435 testl %ebx,%ebx /* swapgs needed? */
1398 jnz paranoid_restore 1436 jnz paranoid_restore
1399 testl $3,CS(%rsp) 1437 testl $3,CS(%rsp)
@@ -1404,7 +1442,7 @@ paranoid_swapgs:
1404 RESTORE_ALL 8 1442 RESTORE_ALL 8
1405 jmp irq_return 1443 jmp irq_return
1406paranoid_restore: 1444paranoid_restore:
1407 TRACE_IRQS_IRETQ 0 1445 TRACE_IRQS_IRETQ_DEBUG 0
1408 RESTORE_ALL 8 1446 RESTORE_ALL 8
1409 jmp irq_return 1447 jmp irq_return
1410paranoid_userspace: 1448paranoid_userspace:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 32ff36596ab1..c3a7cb4bf6e6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void)
100} 100}
101 101
102static int 102static int
103ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 103ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
104 unsigned const char *new_code) 104 unsigned const char *new_code)
105{ 105{
106 unsigned char replaced[MCOUNT_INSN_SIZE]; 106 unsigned char replaced[MCOUNT_INSN_SIZE];
@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod,
141 old = ftrace_call_replace(ip, addr); 141 old = ftrace_call_replace(ip, addr);
142 new = ftrace_nop_replace(); 142 new = ftrace_nop_replace();
143 143
144 return ftrace_modify_code(rec->ip, old, new); 144 /*
145 * On boot up, and when modules are loaded, the MCOUNT_ADDR
146 * is converted to a nop, and will never become MCOUNT_ADDR
147 * again. This code is either running before SMP (on boot up)
148 * or before the code will ever be executed (module load).
149 * We do not want to use the breakpoint version in this case,
150 * just modify the code directly.
151 */
152 if (addr == MCOUNT_ADDR)
153 return ftrace_modify_code_direct(rec->ip, old, new);
154
155 /* Normal cases use add_brk_on_nop */
156 WARN_ONCE(1, "invalid use of ftrace_make_nop");
157 return -EINVAL;
145} 158}
146 159
147int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) 160int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
152 old = ftrace_nop_replace(); 165 old = ftrace_nop_replace();
153 new = ftrace_call_replace(ip, addr); 166 new = ftrace_call_replace(ip, addr);
154 167
155 return ftrace_modify_code(rec->ip, old, new); 168 /* Should only be called when module is loaded */
169 return ftrace_modify_code_direct(rec->ip, old, new);
156} 170}
157 171
172/*
173 * The modifying_ftrace_code is used to tell the breakpoint
174 * handler to call ftrace_int3_handler(). If it fails to
175 * call this handler for a breakpoint added by ftrace, then
176 * the kernel may crash.
177 *
178 * As atomic_writes on x86 do not need a barrier, we do not
179 * need to add smp_mb()s for this to work. It is also considered
180 * that we can not read the modifying_ftrace_code before
181 * executing the breakpoint. That would be quite remarkable if
182 * it could do that. Here's the flow that is required:
183 *
184 * CPU-0 CPU-1
185 *
186 * atomic_inc(mfc);
187 * write int3s
188 * <trap-int3> // implicit (r)mb
189 * if (atomic_read(mfc))
190 * call ftrace_int3_handler()
191 *
192 * Then when we are finished:
193 *
194 * atomic_dec(mfc);
195 *
196 * If we hit a breakpoint that was not set by ftrace, it does not
197 * matter if ftrace_int3_handler() is called or not. It will
198 * simply be ignored. But it is crucial that a ftrace nop/caller
199 * breakpoint is handled. No other user should ever place a
200 * breakpoint on an ftrace nop/caller location. It must only
201 * be done by this code.
202 */
203atomic_t modifying_ftrace_code __read_mostly;
204
205static int
206ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
207 unsigned const char *new_code);
208
158int ftrace_update_ftrace_func(ftrace_func_t func) 209int ftrace_update_ftrace_func(ftrace_func_t func)
159{ 210{
160 unsigned long ip = (unsigned long)(&ftrace_call); 211 unsigned long ip = (unsigned long)(&ftrace_call);
@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
163 214
164 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 215 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
165 new = ftrace_call_replace(ip, (unsigned long)func); 216 new = ftrace_call_replace(ip, (unsigned long)func);
217
218 /* See comment above by declaration of modifying_ftrace_code */
219 atomic_inc(&modifying_ftrace_code);
220
166 ret = ftrace_modify_code(ip, old, new); 221 ret = ftrace_modify_code(ip, old, new);
167 222
223 atomic_dec(&modifying_ftrace_code);
224
168 return ret; 225 return ret;
169} 226}
170 227
171int modifying_ftrace_code __read_mostly;
172
173/* 228/*
174 * A breakpoint was added to the code address we are about to 229 * A breakpoint was added to the code address we are about to
175 * modify, and this is the handle that will just skip over it. 230 * modify, and this is the handle that will just skip over it.
@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable)
489 } 544 }
490} 545}
491 546
547static int
548ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
549 unsigned const char *new_code)
550{
551 int ret;
552
553 ret = add_break(ip, old_code);
554 if (ret)
555 goto out;
556
557 run_sync();
558
559 ret = add_update_code(ip, new_code);
560 if (ret)
561 goto fail_update;
562
563 run_sync();
564
565 ret = ftrace_write(ip, new_code, 1);
566 if (ret) {
567 ret = -EPERM;
568 goto out;
569 }
570 run_sync();
571 out:
572 return ret;
573
574 fail_update:
575 probe_kernel_write((void *)ip, &old_code[0], 1);
576 goto out;
577}
578
492void arch_ftrace_update_code(int command) 579void arch_ftrace_update_code(int command)
493{ 580{
494 modifying_ftrace_code++; 581 /* See comment above by declaration of modifying_ftrace_code */
582 atomic_inc(&modifying_ftrace_code);
495 583
496 ftrace_modify_all_code(command); 584 ftrace_modify_all_code(command);
497 585
498 modifying_ftrace_code--; 586 atomic_dec(&modifying_ftrace_code);
499} 587}
500 588
501int __init ftrace_dyn_arch_init(void *data) 589int __init ftrace_dyn_arch_init(void *data)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 9cc7b4392f7c..1460a5df92f7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -870,7 +870,7 @@ int __init hpet_enable(void)
870 else 870 else
871 pr_warn("HPET initial state will not be saved\n"); 871 pr_warn("HPET initial state will not be saved\n");
872 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); 872 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
873 hpet_writel(cfg, HPET_Tn_CFG(i)); 873 hpet_writel(cfg, HPET_CFG);
874 if (cfg) 874 if (cfg)
875 pr_warn("HPET: Unrecognized bits %#x set in global cfg\n", 875 pr_warn("HPET: Unrecognized bits %#x set in global cfg\n",
876 cfg); 876 cfg);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 086eb58c6e80..f1b42b3a186c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -120,11 +120,6 @@ bool kvm_check_and_clear_guest_paused(void)
120 bool ret = false; 120 bool ret = false;
121 struct pvclock_vcpu_time_info *src; 121 struct pvclock_vcpu_time_info *src;
122 122
123 /*
124 * per_cpu() is safe here because this function is only called from
125 * timer functions where preemption is already disabled.
126 */
127 WARN_ON(!in_atomic());
128 src = &__get_cpu_var(hv_clock); 123 src = &__get_cpu_var(hv_clock);
129 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { 124 if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
130 __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); 125 __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 90875279ef3d..a0b2f84457be 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
444 */ 444 */
445 if (unlikely(is_debug_stack(regs->sp))) { 445 if (unlikely(is_debug_stack(regs->sp))) {
446 debug_stack_set_zero(); 446 debug_stack_set_zero();
447 __get_cpu_var(update_debug_stack) = 1; 447 this_cpu_write(update_debug_stack, 1);
448 } 448 }
449} 449}
450 450
451static inline void nmi_nesting_postprocess(void) 451static inline void nmi_nesting_postprocess(void)
452{ 452{
453 if (unlikely(__get_cpu_var(update_debug_stack))) 453 if (unlikely(this_cpu_read(update_debug_stack))) {
454 debug_stack_reset(); 454 debug_stack_reset();
455 this_cpu_write(update_debug_stack, 0);
456 }
455} 457}
456#endif 458#endif
457 459
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index e31bf8d5c4d2..149b8d9c6ad4 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
42static void __init init_nmi_testsuite(void) 42static void __init init_nmi_testsuite(void)
43{ 43{
44 /* trap all the unknown NMIs we may generate */ 44 /* trap all the unknown NMIs we may generate */
45 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); 45 register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
46} 46}
47 47
48static void __init cleanup_nmi_testsuite(void) 48static void __init cleanup_nmi_testsuite(void)
@@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
64{ 64{
65 unsigned long timeout; 65 unsigned long timeout;
66 66
67 if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, 67 if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
68 NMI_FLAG_FIRST, "nmi_selftest")) { 68 NMI_FLAG_FIRST, "nmi_selftest")) {
69 nmi_fail = FAILURE; 69 nmi_fail = FAILURE;
70 return; 70 return;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 62c9457ccd2f..c0f420f76cd3 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,7 +100,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
100 struct dma_attrs *attrs) 100 struct dma_attrs *attrs)
101{ 101{
102 unsigned long dma_mask; 102 unsigned long dma_mask;
103 struct page *page = NULL; 103 struct page *page;
104 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 104 unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
105 dma_addr_t addr; 105 dma_addr_t addr;
106 106
@@ -108,6 +108,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
108 108
109 flag |= __GFP_ZERO; 109 flag |= __GFP_ZERO;
110again: 110again:
111 page = NULL;
111 if (!(flag & GFP_ATOMIC)) 112 if (!(flag & GFP_ATOMIC))
112 page = dma_alloc_from_contiguous(dev, count, get_order(size)); 113 page = dma_alloc_from_contiguous(dev, count, get_order(size));
113 if (!page) 114 if (!page)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 13b1990c7c58..c4c6a5c2bf0f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child,
1211 0, sizeof(struct user_i387_struct), 1211 0, sizeof(struct user_i387_struct),
1212 datap); 1212 datap);
1213 1213
1214 /* normal 64bit interface to access TLS data.
1215 Works just like arch_prctl, except that the arguments
1216 are reversed. */
1217 case PTRACE_ARCH_PRCTL:
1218 return do_arch_prctl(child, data, addr);
1219
1220 default: 1214 default:
1221 return compat_ptrace_request(child, request, addr, data); 1215 return compat_ptrace_request(child, request, addr, data);
1222 } 1216 }
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 79c45af81604..25b48edb847c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -639,9 +639,11 @@ void native_machine_shutdown(void)
639 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); 639 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
640 640
641 /* 641 /*
642 * O.K Now that I'm on the appropriate processor, 642 * O.K Now that I'm on the appropriate processor, stop all of the
643 * stop all of the others. 643 * others. Also disable the local irq to not receive the per-cpu
644 * timer interrupt which may trigger scheduler's load balance.
644 */ 645 */
646 local_irq_disable();
645 stop_other_cpus(); 647 stop_other_cpus();
646#endif 648#endif
647 649
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 965dfda0fd5e..21af737053aa 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -555,7 +555,6 @@ unsigned long sys_sigreturn(struct pt_regs *regs)
555 sizeof(frame->extramask)))) 555 sizeof(frame->extramask))))
556 goto badframe; 556 goto badframe;
557 557
558 sigdelsetmask(&set, ~_BLOCKABLE);
559 set_current_blocked(&set); 558 set_current_blocked(&set);
560 559
561 if (restore_sigcontext(regs, &frame->sc, &ax)) 560 if (restore_sigcontext(regs, &frame->sc, &ax))
@@ -581,7 +580,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
581 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) 580 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
582 goto badframe; 581 goto badframe;
583 582
584 sigdelsetmask(&set, ~_BLOCKABLE);
585 set_current_blocked(&set); 583 set_current_blocked(&set);
586 584
587 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) 585 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
@@ -647,42 +645,28 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
647 struct pt_regs *regs) 645 struct pt_regs *regs)
648{ 646{
649 int usig = signr_convert(sig); 647 int usig = signr_convert(sig);
650 sigset_t *set = &current->blocked; 648 sigset_t *set = sigmask_to_save();
651 int ret;
652
653 if (current_thread_info()->status & TS_RESTORE_SIGMASK)
654 set = &current->saved_sigmask;
655 649
656 /* Set up the stack frame */ 650 /* Set up the stack frame */
657 if (is_ia32) { 651 if (is_ia32) {
658 if (ka->sa.sa_flags & SA_SIGINFO) 652 if (ka->sa.sa_flags & SA_SIGINFO)
659 ret = ia32_setup_rt_frame(usig, ka, info, set, regs); 653 return ia32_setup_rt_frame(usig, ka, info, set, regs);
660 else 654 else
661 ret = ia32_setup_frame(usig, ka, set, regs); 655 return ia32_setup_frame(usig, ka, set, regs);
662#ifdef CONFIG_X86_X32_ABI 656#ifdef CONFIG_X86_X32_ABI
663 } else if (is_x32) { 657 } else if (is_x32) {
664 ret = x32_setup_rt_frame(usig, ka, info, 658 return x32_setup_rt_frame(usig, ka, info,
665 (compat_sigset_t *)set, regs); 659 (compat_sigset_t *)set, regs);
666#endif 660#endif
667 } else { 661 } else {
668 ret = __setup_rt_frame(sig, ka, info, set, regs); 662 return __setup_rt_frame(sig, ka, info, set, regs);
669 }
670
671 if (ret) {
672 force_sigsegv(sig, current);
673 return -EFAULT;
674 } 663 }
675
676 current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
677 return ret;
678} 664}
679 665
680static int 666static void
681handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, 667handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
682 struct pt_regs *regs) 668 struct pt_regs *regs)
683{ 669{
684 int ret;
685
686 /* Are we from a system call? */ 670 /* Are we from a system call? */
687 if (syscall_get_nr(current, regs) >= 0) { 671 if (syscall_get_nr(current, regs) >= 0) {
688 /* If so, check system call restarting.. */ 672 /* If so, check system call restarting.. */
@@ -713,10 +697,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
713 likely(test_and_clear_thread_flag(TIF_FORCED_TF))) 697 likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
714 regs->flags &= ~X86_EFLAGS_TF; 698 regs->flags &= ~X86_EFLAGS_TF;
715 699
716 ret = setup_rt_frame(sig, ka, info, regs); 700 if (setup_rt_frame(sig, ka, info, regs) < 0) {
717 701 force_sigsegv(sig, current);
718 if (ret) 702 return;
719 return ret; 703 }
720 704
721 /* 705 /*
722 * Clear the direction flag as per the ABI for function entry. 706 * Clear the direction flag as per the ABI for function entry.
@@ -731,12 +715,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
731 */ 715 */
732 regs->flags &= ~X86_EFLAGS_TF; 716 regs->flags &= ~X86_EFLAGS_TF;
733 717
734 block_sigmask(ka, sig); 718 signal_delivered(sig, info, ka, regs,
735 719 test_thread_flag(TIF_SINGLESTEP));
736 tracehook_signal_handler(sig, info, ka, regs,
737 test_thread_flag(TIF_SINGLESTEP));
738
739 return 0;
740} 720}
741 721
742#ifdef CONFIG_X86_32 722#ifdef CONFIG_X86_32
@@ -757,16 +737,6 @@ static void do_signal(struct pt_regs *regs)
757 siginfo_t info; 737 siginfo_t info;
758 int signr; 738 int signr;
759 739
760 /*
761 * We want the common case to go fast, which is why we may in certain
762 * cases get here from kernel mode. Just return without doing anything
763 * if so.
764 * X86_32: vm86 regs switched out by assembly code before reaching
765 * here, so testing against kernel CS suffices.
766 */
767 if (!user_mode(regs))
768 return;
769
770 signr = get_signal_to_deliver(&info, &ka, regs, NULL); 740 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
771 if (signr > 0) { 741 if (signr > 0) {
772 /* Whee! Actually deliver the signal. */ 742 /* Whee! Actually deliver the signal. */
@@ -796,10 +766,7 @@ static void do_signal(struct pt_regs *regs)
796 * If there's no signal to deliver, we just put the saved sigmask 766 * If there's no signal to deliver, we just put the saved sigmask
797 * back. 767 * back.
798 */ 768 */
799 if (current_thread_info()->status & TS_RESTORE_SIGMASK) { 769 restore_saved_sigmask();
800 current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
801 set_current_blocked(&current->saved_sigmask);
802 }
803} 770}
804 771
805/* 772/*
@@ -827,8 +794,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
827 if (thread_info_flags & _TIF_NOTIFY_RESUME) { 794 if (thread_info_flags & _TIF_NOTIFY_RESUME) {
828 clear_thread_flag(TIF_NOTIFY_RESUME); 795 clear_thread_flag(TIF_NOTIFY_RESUME);
829 tracehook_notify_resume(regs); 796 tracehook_notify_resume(regs);
830 if (current->replacement_session_keyring)
831 key_replace_session_keyring();
832 } 797 }
833 if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) 798 if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
834 fire_user_return_notifiers(); 799 fire_user_return_notifiers();
@@ -936,7 +901,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
936 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) 901 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
937 goto badframe; 902 goto badframe;
938 903
939 sigdelsetmask(&set, ~_BLOCKABLE);
940 set_current_blocked(&set); 904 set_current_blocked(&set);
941 905
942 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) 906 if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f56f96da77f5..7bd8a0823654 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -349,9 +349,12 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
349 349
350static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) 350static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
351{ 351{
352 if (c->phys_proc_id == o->phys_proc_id) 352 if (c->phys_proc_id == o->phys_proc_id) {
353 return topology_sane(c, o, "mc"); 353 if (cpu_has(c, X86_FEATURE_AMD_DCM))
354 return true;
354 355
356 return topology_sane(c, o, "mc");
357 }
355 return false; 358 return false;
356} 359}
357 360
@@ -382,6 +385,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
382 if ((i == cpu) || (has_mc && match_llc(c, o))) 385 if ((i == cpu) || (has_mc && match_llc(c, o)))
383 link_mask(llc_shared, cpu, i); 386 link_mask(llc_shared, cpu, i);
384 387
388 }
389
390 /*
391 * This needs a separate iteration over the cpus because we rely on all
392 * cpu_sibling_mask links to be set-up.
393 */
394 for_each_cpu(i, cpu_sibling_setup_mask) {
395 o = &cpu_data(i);
396
385 if ((i == cpu) || (has_mc && match_mc(c, o))) { 397 if ((i == cpu) || (has_mc && match_mc(c, o))) {
386 link_mask(core, cpu, i); 398 link_mask(core, cpu, i);
387 399
@@ -410,15 +422,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
410/* maps the cpu to the sched domain representing multi-core */ 422/* maps the cpu to the sched domain representing multi-core */
411const struct cpumask *cpu_coregroup_mask(int cpu) 423const struct cpumask *cpu_coregroup_mask(int cpu)
412{ 424{
413 struct cpuinfo_x86 *c = &cpu_data(cpu); 425 return cpu_llc_shared_mask(cpu);
414 /*
415 * For perf, we return last level cache shared map.
416 * And for power savings, we return cpu_core_map
417 */
418 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
419 return cpu_core_mask(cpu);
420 else
421 return cpu_llc_shared_mask(cpu);
422} 426}
423 427
424static void impress_friends(void) 428static void impress_friends(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff08457a025d..05b31d92f69c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -303,8 +303,12 @@ gp_in_kernel:
303dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) 303dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
304{ 304{
305#ifdef CONFIG_DYNAMIC_FTRACE 305#ifdef CONFIG_DYNAMIC_FTRACE
306 /* ftrace must be first, everything else may cause a recursive crash */ 306 /*
307 if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) 307 * ftrace must be first, everything else may cause a recursive crash.
308 * See note by declaration of modifying_ftrace_code in ftrace.c
309 */
310 if (unlikely(atomic_read(&modifying_ftrace_code)) &&
311 ftrace_int3_handler(regs))
308 return; 312 return;
309#endif 313#endif
310#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 314#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP