aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-08 12:14:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-08 12:14:46 -0400
commit106544d81d88069c2df66ebdee42a4ba8fcd25e9 (patch)
tree9a6233100699c28fafde9eaa1751de7ddc173f58 /arch/x86
parent03d8f5408235bfd2781142458e0c0671530e74e7 (diff)
parentdb0dc75d6403b6663c0eab4c6ccb672eb9b2ed72 (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "A bit larger than what I'd wish for - half of it is due to hw driver updates to Intel Ivy-Bridge which info got recently released, cycles:pp should work there now too, amongst other things. (but we are generally making exceptions for hardware enablement of this type.) There are also callchain fixes in it - responding to mostly theoretical (but valid) concerns. The tooling side sports perf.data endianness/portability fixes which did not make it for the merge window - and various other fixes as well." * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits) perf/x86: Check user address explicitly in copy_from_user_nmi() perf/x86: Check if user fp is valid perf: Limit callchains to 127 perf/x86: Allow multiple stacks perf/x86: Update SNB PEBS constraints perf/x86: Enable/Add IvyBridge hardware support perf/x86: Implement cycles:p for SNB/IVB perf/x86: Fix Intel shared extra MSR allocation x86/decoder: Fix bsr/bsf/jmpe decoding with operand-size prefix perf: Remove duplicate invocation on perf_event_for_each perf uprobes: Remove unnecessary check before strlist__delete perf symbols: Check for valid dso before creating map perf evsel: Fix 32 bit values endianity swap for sample_id_all header perf session: Handle endianity swap on sample_id_all header data perf symbols: Handle different endians properly during symbol load perf evlist: Pass third argument to ioctl explicitly perf tools: Update ioctl documentation for PERF_IOC_FLAG_GROUP perf tools: Make --version show kernel version instead of pull req tag perf tools: Check if callchain is corrupted perf callchain: Make callchain cursors TLS ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/uaccess.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c145
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c9
-rw-r--r--arch/x86/lib/usercopy.c4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt8
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk14
8 files changed, 143 insertions, 62 deletions
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 04cd6882308e..e1f3a17034fc 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -33,9 +33,8 @@
33#define segment_eq(a, b) ((a).seg == (b).seg) 33#define segment_eq(a, b) ((a).seg == (b).seg)
34 34
35#define user_addr_max() (current_thread_info()->addr_limit.seg) 35#define user_addr_max() (current_thread_info()->addr_limit.seg)
36#define __addr_ok(addr) \ 36#define __addr_ok(addr) \
37 ((unsigned long __force)(addr) < \ 37 ((unsigned long __force)(addr) < user_addr_max())
38 (current_thread_info()->addr_limit.seg))
39 38
40/* 39/*
41 * Test whether a block of memory is a valid user space address. 40 * Test whether a block of memory is a valid user space address.
@@ -47,14 +46,14 @@
47 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... 46 * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry...
48 */ 47 */
49 48
50#define __range_not_ok(addr, size) \ 49#define __range_not_ok(addr, size, limit) \
51({ \ 50({ \
52 unsigned long flag, roksum; \ 51 unsigned long flag, roksum; \
53 __chk_user_ptr(addr); \ 52 __chk_user_ptr(addr); \
54 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ 53 asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \
55 : "=&r" (flag), "=r" (roksum) \ 54 : "=&r" (flag), "=r" (roksum) \
56 : "1" (addr), "g" ((long)(size)), \ 55 : "1" (addr), "g" ((long)(size)), \
57 "rm" (current_thread_info()->addr_limit.seg)); \ 56 "rm" (limit)); \
58 flag; \ 57 flag; \
59}) 58})
60 59
@@ -77,7 +76,8 @@
77 * checks that the pointer is in the user space range - after calling 76 * checks that the pointer is in the user space range - after calling
78 * this function, memory access functions may still return -EFAULT. 77 * this function, memory access functions may still return -EFAULT.
79 */ 78 */
80#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) 79#define access_ok(type, addr, size) \
80 (likely(__range_not_ok(addr, size, user_addr_max()) == 0))
81 81
82/* 82/*
83 * The exception table consists of pairs of addresses relative to the 83 * The exception table consists of pairs of addresses relative to the
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da0183..c4706cf9c011 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
1496 if (!cpuc->shared_regs) 1496 if (!cpuc->shared_regs)
1497 goto error; 1497 goto error;
1498 } 1498 }
1499 cpuc->is_fake = 1;
1499 return cpuc; 1500 return cpuc;
1500error: 1501error:
1501 free_fake_cpuc(cpuc); 1502 free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1756 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); 1757 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1757} 1758}
1758 1759
1760static inline int
1761valid_user_frame(const void __user *fp, unsigned long size)
1762{
1763 return (__range_not_ok(fp, size, TASK_SIZE) == 0);
1764}
1765
1759#ifdef CONFIG_COMPAT 1766#ifdef CONFIG_COMPAT
1760 1767
1761#include <asm/compat.h> 1768#include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1780 if (bytes != sizeof(frame)) 1787 if (bytes != sizeof(frame))
1781 break; 1788 break;
1782 1789
1783 if (fp < compat_ptr(regs->sp)) 1790 if (!valid_user_frame(fp, sizeof(frame)))
1784 break; 1791 break;
1785 1792
1786 perf_callchain_store(entry, frame.return_address); 1793 perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1826 if (bytes != sizeof(frame)) 1833 if (bytes != sizeof(frame))
1827 break; 1834 break;
1828 1835
1829 if ((unsigned long)fp < regs->sp) 1836 if (!valid_user_frame(fp, sizeof(frame)))
1830 break; 1837 break;
1831 1838
1832 perf_callchain_store(entry, frame.return_address); 1839 perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf54493..7241e2fc3c17 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 117 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
118 118
119 unsigned int group_flag; 119 unsigned int group_flag;
120 int is_fake;
120 121
121 /* 122 /*
122 * Intel DebugStore bits 123 * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
364 int pebs_record_size; 365 int pebs_record_size;
365 void (*drain_pebs)(struct pt_regs *regs); 366 void (*drain_pebs)(struct pt_regs *regs);
366 struct event_constraint *pebs_constraints; 367 struct event_constraint *pebs_constraints;
368 void (*pebs_aliases)(struct perf_event *event);
367 369
368 /* 370 /*
369 * Intel LBR 371 * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 166546ec6aef..187c294bc658 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event)
1119 return NULL; 1119 return NULL;
1120} 1120}
1121 1121
1122static bool intel_try_alt_er(struct perf_event *event, int orig_idx) 1122static int intel_alt_er(int idx)
1123{ 1123{
1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) 1124 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1125 return false; 1125 return idx;
1126 1126
1127 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { 1127 if (idx == EXTRA_REG_RSP_0)
1128 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1128 return EXTRA_REG_RSP_1;
1129 event->hw.config |= 0x01bb; 1129
1130 event->hw.extra_reg.idx = EXTRA_REG_RSP_1; 1130 if (idx == EXTRA_REG_RSP_1)
1131 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; 1131 return EXTRA_REG_RSP_0;
1132 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { 1132
1133 return idx;
1134}
1135
1136static void intel_fixup_er(struct perf_event *event, int idx)
1137{
1138 event->hw.extra_reg.idx = idx;
1139
1140 if (idx == EXTRA_REG_RSP_0) {
1133 event->hw.config &= ~INTEL_ARCH_EVENT_MASK; 1141 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1134 event->hw.config |= 0x01b7; 1142 event->hw.config |= 0x01b7;
1135 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1136 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; 1143 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1144 } else if (idx == EXTRA_REG_RSP_1) {
1145 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1146 event->hw.config |= 0x01bb;
1147 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1137 } 1148 }
1138
1139 if (event->hw.extra_reg.idx == orig_idx)
1140 return false;
1141
1142 return true;
1143} 1149}
1144 1150
1145/* 1151/*
@@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1157 struct event_constraint *c = &emptyconstraint; 1163 struct event_constraint *c = &emptyconstraint;
1158 struct er_account *era; 1164 struct er_account *era;
1159 unsigned long flags; 1165 unsigned long flags;
1160 int orig_idx = reg->idx; 1166 int idx = reg->idx;
1161 1167
1162 /* already allocated shared msr */ 1168 /*
1163 if (reg->alloc) 1169 * reg->alloc can be set due to existing state, so for fake cpuc we
1170 * need to ignore this, otherwise we might fail to allocate proper fake
1171 * state for this extra reg constraint. Also see the comment below.
1172 */
1173 if (reg->alloc && !cpuc->is_fake)
1164 return NULL; /* call x86_get_event_constraint() */ 1174 return NULL; /* call x86_get_event_constraint() */
1165 1175
1166again: 1176again:
1167 era = &cpuc->shared_regs->regs[reg->idx]; 1177 era = &cpuc->shared_regs->regs[idx];
1168 /* 1178 /*
1169 * we use spin_lock_irqsave() to avoid lockdep issues when 1179 * we use spin_lock_irqsave() to avoid lockdep issues when
1170 * passing a fake cpuc 1180 * passing a fake cpuc
@@ -1173,6 +1183,29 @@ again:
1173 1183
1174 if (!atomic_read(&era->ref) || era->config == reg->config) { 1184 if (!atomic_read(&era->ref) || era->config == reg->config) {
1175 1185
1186 /*
1187 * If its a fake cpuc -- as per validate_{group,event}() we
1188 * shouldn't touch event state and we can avoid doing so
1189 * since both will only call get_event_constraints() once
1190 * on each event, this avoids the need for reg->alloc.
1191 *
1192 * Not doing the ER fixup will only result in era->reg being
1193 * wrong, but since we won't actually try and program hardware
1194 * this isn't a problem either.
1195 */
1196 if (!cpuc->is_fake) {
1197 if (idx != reg->idx)
1198 intel_fixup_er(event, idx);
1199
1200 /*
1201 * x86_schedule_events() can call get_event_constraints()
1202 * multiple times on events in the case of incremental
1203 * scheduling(). reg->alloc ensures we only do the ER
1204 * allocation once.
1205 */
1206 reg->alloc = 1;
1207 }
1208
1176 /* lock in msr value */ 1209 /* lock in msr value */
1177 era->config = reg->config; 1210 era->config = reg->config;
1178 era->reg = reg->reg; 1211 era->reg = reg->reg;
@@ -1180,17 +1213,17 @@ again:
1180 /* one more user */ 1213 /* one more user */
1181 atomic_inc(&era->ref); 1214 atomic_inc(&era->ref);
1182 1215
1183 /* no need to reallocate during incremental event scheduling */
1184 reg->alloc = 1;
1185
1186 /* 1216 /*
1187 * need to call x86_get_event_constraint() 1217 * need to call x86_get_event_constraint()
1188 * to check if associated event has constraints 1218 * to check if associated event has constraints
1189 */ 1219 */
1190 c = NULL; 1220 c = NULL;
1191 } else if (intel_try_alt_er(event, orig_idx)) { 1221 } else {
1192 raw_spin_unlock_irqrestore(&era->lock, flags); 1222 idx = intel_alt_er(idx);
1193 goto again; 1223 if (idx != reg->idx) {
1224 raw_spin_unlock_irqrestore(&era->lock, flags);
1225 goto again;
1226 }
1194 } 1227 }
1195 raw_spin_unlock_irqrestore(&era->lock, flags); 1228 raw_spin_unlock_irqrestore(&era->lock, flags);
1196 1229
@@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1204 struct er_account *era; 1237 struct er_account *era;
1205 1238
1206 /* 1239 /*
1207 * only put constraint if extra reg was actually 1240 * Only put constraint if extra reg was actually allocated. Also takes
1208 * allocated. Also takes care of event which do 1241 * care of event which do not use an extra shared reg.
1209 * not use an extra shared reg 1242 *
1243 * Also, if this is a fake cpuc we shouldn't touch any event state
1244 * (reg->alloc) and we don't care about leaving inconsistent cpuc state
1245 * either since it'll be thrown out.
1210 */ 1246 */
1211 if (!reg->alloc) 1247 if (!reg->alloc || cpuc->is_fake)
1212 return; 1248 return;
1213 1249
1214 era = &cpuc->shared_regs->regs[reg->idx]; 1250 era = &cpuc->shared_regs->regs[reg->idx];
@@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1300 intel_put_shared_regs_event_constraints(cpuc, event); 1336 intel_put_shared_regs_event_constraints(cpuc, event);
1301} 1337}
1302 1338
1303static int intel_pmu_hw_config(struct perf_event *event) 1339static void intel_pebs_aliases_core2(struct perf_event *event)
1304{ 1340{
1305 int ret = x86_pmu_hw_config(event); 1341 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1306
1307 if (ret)
1308 return ret;
1309
1310 if (event->attr.precise_ip &&
1311 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1312 /* 1342 /*
1313 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P 1343 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1314 * (0x003c) so that we can use it with PEBS. 1344 * (0x003c) so that we can use it with PEBS.
@@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
1329 */ 1359 */
1330 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); 1360 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
1331 1361
1362 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1363 event->hw.config = alt_config;
1364 }
1365}
1366
1367static void intel_pebs_aliases_snb(struct perf_event *event)
1368{
1369 if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1370 /*
1371 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1372 * (0x003c) so that we can use it with PEBS.
1373 *
1374 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1375 * PEBS capable. However we can use UOPS_RETIRED.ALL
1376 * (0x01c2), which is a PEBS capable event, to get the same
1377 * count.
1378 *
1379 * UOPS_RETIRED.ALL counts the number of cycles that retires
1380 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
1381 * larger than the maximum number of micro-ops that can be
1382 * retired per cycle (4) and then inverting the condition, we
1383 * count all cycles that retire 16 or less micro-ops, which
1384 * is every cycle.
1385 *
1386 * Thereby we gain a PEBS capable cycle counter.
1387 */
1388 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
1332 1389
1333 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); 1390 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1334 event->hw.config = alt_config; 1391 event->hw.config = alt_config;
1335 } 1392 }
1393}
1394
1395static int intel_pmu_hw_config(struct perf_event *event)
1396{
1397 int ret = x86_pmu_hw_config(event);
1398
1399 if (ret)
1400 return ret;
1401
1402 if (event->attr.precise_ip && x86_pmu.pebs_aliases)
1403 x86_pmu.pebs_aliases(event);
1336 1404
1337 if (intel_pmu_needs_lbr_smpl(event)) { 1405 if (intel_pmu_needs_lbr_smpl(event)) {
1338 ret = intel_pmu_setup_lbr_filter(event); 1406 ret = intel_pmu_setup_lbr_filter(event);
@@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = {
1607 .max_period = (1ULL << 31) - 1, 1675 .max_period = (1ULL << 31) - 1,
1608 .get_event_constraints = intel_get_event_constraints, 1676 .get_event_constraints = intel_get_event_constraints,
1609 .put_event_constraints = intel_put_event_constraints, 1677 .put_event_constraints = intel_put_event_constraints,
1678 .pebs_aliases = intel_pebs_aliases_core2,
1610 1679
1611 .format_attrs = intel_arch3_formats_attr, 1680 .format_attrs = intel_arch3_formats_attr,
1612 1681
@@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void)
1840 break; 1909 break;
1841 1910
1842 case 42: /* SandyBridge */ 1911 case 42: /* SandyBridge */
1843 x86_add_quirk(intel_sandybridge_quirk);
1844 case 45: /* SandyBridge, "Romely-EP" */ 1912 case 45: /* SandyBridge, "Romely-EP" */
1913 x86_add_quirk(intel_sandybridge_quirk);
1914 case 58: /* IvyBridge */
1845 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 1915 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1846 sizeof(hw_cache_event_ids)); 1916 sizeof(hw_cache_event_ids));
1847 1917
@@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void)
1849 1919
1850 x86_pmu.event_constraints = intel_snb_event_constraints; 1920 x86_pmu.event_constraints = intel_snb_event_constraints;
1851 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; 1921 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1922 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
1852 x86_pmu.extra_regs = intel_snb_extra_regs; 1923 x86_pmu.extra_regs = intel_snb_extra_regs;
1853 /* all extra regs are per-cpu when HT is on */ 1924 /* all extra regs are per-cpu when HT is on */
1854 x86_pmu.er_flags |= ERF_HAS_RSP_1; 1925 x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc27f6e5..35e2192df9f4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ 400 INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ 401 INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ 402 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */
403 INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ 403 INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
404 INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
405 INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
406 INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
407 INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
408 INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
409 INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
410 INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
411 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 404 INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
412 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 405 INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
413 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ 406 INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index f61ee67ec00f..677b1ed184c9 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9 9
10#include <asm/word-at-a-time.h> 10#include <asm/word-at-a-time.h>
11#include <linux/sched.h>
11 12
12/* 13/*
13 * best effort, GUP based copy_from_user() that is NMI-safe 14 * best effort, GUP based copy_from_user() that is NMI-safe
@@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
21 void *map; 22 void *map;
22 int ret; 23 int ret;
23 24
25 if (__range_not_ok(from, n, TASK_SIZE) == 0)
26 return len;
27
24 do { 28 do {
25 ret = __get_user_pages_fast(addr, 1, 0, &page); 29 ret = __get_user_pages_fast(addr, 1, 0, &page);
26 if (!ret) 30 if (!ret)
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 819137904428..5d7e51f3fd28 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -28,7 +28,7 @@
28# - (66): the last prefix is 0x66 28# - (66): the last prefix is 0x66
29# - (F3): the last prefix is 0xF3 29# - (F3): the last prefix is 0xF3
30# - (F2): the last prefix is 0xF2 30# - (F2): the last prefix is 0xF2
31# 31# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
32 32
33Table: one byte opcode 33Table: one byte opcode
34Referrer: 34Referrer:
@@ -515,12 +515,12 @@ b4: LFS Gv,Mp
515b5: LGS Gv,Mp 515b5: LGS Gv,Mp
516b6: MOVZX Gv,Eb 516b6: MOVZX Gv,Eb
517b7: MOVZX Gv,Ew 517b7: MOVZX Gv,Ew
518b8: JMPE | POPCNT Gv,Ev (F3) 518b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
519b9: Grp10 (1A) 519b9: Grp10 (1A)
520ba: Grp8 Ev,Ib (1A) 520ba: Grp8 Ev,Ib (1A)
521bb: BTC Ev,Gv 521bb: BTC Ev,Gv
522bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) 522bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
523bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) 523bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
524be: MOVSX Gv,Eb 524be: MOVSX Gv,Eb
525bf: MOVSX Gv,Ew 525bf: MOVSX Gv,Ew
526# 0x0f 0xc0-0xcf 526# 0x0f 0xc0-0xcf
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index 5f6a5b6c3a15..ddcf39b1a18d 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -66,9 +66,10 @@ BEGIN {
66 rex_expr = "^REX(\\.[XRWB]+)*" 66 rex_expr = "^REX(\\.[XRWB]+)*"
67 fpu_expr = "^ESC" # TODO 67 fpu_expr = "^ESC" # TODO
68 68
69 lprefix1_expr = "\\(66\\)" 69 lprefix1_expr = "\\((66|!F3)\\)"
70 lprefix2_expr = "\\(F3\\)" 70 lprefix2_expr = "\\(F3\\)"
71 lprefix3_expr = "\\(F2\\)" 71 lprefix3_expr = "\\((F2|!F3)\\)"
72 lprefix_expr = "\\((66|F2|F3)\\)"
72 max_lprefix = 4 73 max_lprefix = 4
73 74
74 # All opcodes starting with lower-case 'v' or with (v1) superscript 75 # All opcodes starting with lower-case 'v' or with (v1) superscript
@@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod)
333 if (match(ext, lprefix1_expr)) { 334 if (match(ext, lprefix1_expr)) {
334 lptable1[idx] = add_flags(lptable1[idx],flags) 335 lptable1[idx] = add_flags(lptable1[idx],flags)
335 variant = "INAT_VARIANT" 336 variant = "INAT_VARIANT"
336 } else if (match(ext, lprefix2_expr)) { 337 }
338 if (match(ext, lprefix2_expr)) {
337 lptable2[idx] = add_flags(lptable2[idx],flags) 339 lptable2[idx] = add_flags(lptable2[idx],flags)
338 variant = "INAT_VARIANT" 340 variant = "INAT_VARIANT"
339 } else if (match(ext, lprefix3_expr)) { 341 }
342 if (match(ext, lprefix3_expr)) {
340 lptable3[idx] = add_flags(lptable3[idx],flags) 343 lptable3[idx] = add_flags(lptable3[idx],flags)
341 variant = "INAT_VARIANT" 344 variant = "INAT_VARIANT"
342 } else { 345 }
346 if (!match(ext, lprefix_expr)){
343 table[idx] = add_flags(table[idx],flags) 347 table[idx] = add_flags(table[idx],flags)
344 } 348 }
345 } 349 }