aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2016-07-11 11:36:41 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2016-07-12 14:20:32 -0400
commit7d7d1bf1d1dabe435ef50efb051724b8664749cb (patch)
tree25b24227fb2d78e03262785ba893c1fb21306d1c
parentc4b6014e8bb0c8d47fe5c71ebc604f31091e5d3f (diff)
perf bench: Copy kernel files needed to build mem{cpy,set} x86_64 benchmarks
We can't access kernel files directly from tools/, so copy the required bits, and make sure that we detect when the original files, in the kernel, gets modified. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-z7e76274ch5j4nugv048qacb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h316
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h60
-rw-r--r--tools/arch/x86/include/asm/required-features.h103
-rw-r--r--tools/arch/x86/lib/memcpy_64.S297
-rw-r--r--tools/arch/x86/lib/memset_64.S138
-rw-r--r--tools/include/asm/alternative-asm.h (renamed from tools/perf/util/include/asm/alternative-asm.h)4
-rw-r--r--tools/perf/MANIFEST9
-rw-r--r--tools/perf/Makefile.perf15
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S2
10 files changed, 939 insertions, 7 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 000000000000..4a413485f9eb
--- /dev/null
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,316 @@
1#ifndef _ASM_X86_CPUFEATURES_H
2#define _ASM_X86_CPUFEATURES_H
3
4#ifndef _ASM_X86_REQUIRED_FEATURES_H
5#include <asm/required-features.h>
6#endif
7
8#ifndef _ASM_X86_DISABLED_FEATURES_H
9#include <asm/disabled-features.h>
10#endif
11
12/*
13 * Defines x86 CPU feature bits
14 */
15#define NCAPINTS 18 /* N 32-bit words worth of info */
16#define NBUGINTS 1 /* N 32-bit bug flags */
17
18/*
19 * Note: If the comment begins with a quoted string, that string is used
20 * in /proc/cpuinfo instead of the macro name. If the string is "",
21 * this feature bit is not displayed in /proc/cpuinfo at all.
22 */
23
24/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
25#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
26#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
27#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
28#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
29#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
30#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
31#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
32#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
33#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
34#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
35#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
36#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
37#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
38#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
39#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
40 /* (plus FCMOVcc, FCOMI with FPU) */
41#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
42#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
43#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
44#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
45#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
46#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
47#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
48#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
49#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
50#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
51#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
52#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
53#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
54#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
55#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
56
57/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
58/* Don't duplicate feature flags which are redundant with Intel! */
59#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
60#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
61#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
62#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
63#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
64#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
65#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
66#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
67#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
68#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
69
70/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
71#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
72#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
73#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
74
75/* Other features, Linux-defined mapping, word 3 */
76/* This range is used for feature bits which conflict or are synthesized */
77#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
78#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
79#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
80#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
81/* cpu types for specific tunings: */
82#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
83#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
84#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
85#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
86#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
87#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
88#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
89#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
90#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
91#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
92#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
93#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
94#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
95#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
96#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
97#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
98#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
99#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
100#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
101#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
102#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
103/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */
104#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
105#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
106#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
107#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
108#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
109#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
110
111/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
112#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
113#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
114#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
115#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
116#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
117#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
118#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
119#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
120#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
121#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
122#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
123#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
124#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
125#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
126#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
127#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
128#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
129#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
130#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
131#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
132#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
133#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
134#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
135#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
136#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
137#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
138#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
139#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
140#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
141#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
142#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
143
144/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
145#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
146#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
147#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
148#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
149#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
150#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
151#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
152#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
153#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
154#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
155
156/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
157#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
158#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
159#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
160#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
161#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
162#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
163#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
164#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
165#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
166#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
167#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
168#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
169#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
170#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
171#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
172#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
173#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
174#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
175#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
176#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
177#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
178#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
179#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
180#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
181#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
182#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
183
184/*
185 * Auxiliary flags: Linux defined - For features scattered in various
186 * CPUID levels like 0x6, 0xA etc, word 7.
187 *
188 * Reuse free bits when adding new feature flags!
189 */
190
191#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
192#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
193
194#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
195#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
196
197#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
198
199/* Virtualization flags: Linux defined, word 8 */
200#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
201#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
202#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
203#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
204#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
205
206#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
207#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
208
209
210/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
211#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
212#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
213#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
214#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
215#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
216#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
217#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
218#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
219#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
220#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
221#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
222#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
223#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
224#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
225#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
226#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
227#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
228#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
229#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
230#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
231#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
232#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
233#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
234#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
235#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
236#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
237
238/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
239#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
240#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
241#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
242#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
243
244/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
245#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
246
247/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
248#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
249#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
250#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
251
252/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
253#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
254#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
255
256/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
257#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
258#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
259#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
260#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
261#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
262#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
263#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
264#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
265#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
266#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
267
268/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
269#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
270#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
271#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
272#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
273#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
274#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
275#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
276#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
277#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
278#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
279#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
280
281/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
282#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
283#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
284
285/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
286#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
287#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
288#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
289
290/*
291 * BUG word(s)
292 */
293#define X86_BUG(x) (NCAPINTS*32 + (x))
294
295#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
296#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
297#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
298#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
299#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
300#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
301#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
302#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
303#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
304#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */
305#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */
306
307
308#ifdef CONFIG_X86_32
309/*
310 * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
311 * to avoid confusion.
312 */
313#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
314#endif
315
316#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
new file mode 100644
index 000000000000..911e9358ceb1
--- /dev/null
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -0,0 +1,60 @@
1#ifndef _ASM_X86_DISABLED_FEATURES_H
2#define _ASM_X86_DISABLED_FEATURES_H
3
4/* These features, although they might be available in a CPU
5 * will not be used because the compile options to support
6 * them are not present.
7 *
8 * This code allows them to be checked and disabled at
9 * compile time without an explicit #ifdef. Use
10 * cpu_feature_enabled().
11 */
12
13#ifdef CONFIG_X86_INTEL_MPX
14# define DISABLE_MPX 0
15#else
16# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
17#endif
18
19#ifdef CONFIG_X86_64
20# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
21# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
22# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
23# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
24#else
25# define DISABLE_VME 0
26# define DISABLE_K6_MTRR 0
27# define DISABLE_CYRIX_ARR 0
28# define DISABLE_CENTAUR_MCR 0
29#endif /* CONFIG_X86_64 */
30
31#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
32# define DISABLE_PKU 0
33# define DISABLE_OSPKE 0
34#else
35# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
36# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
37#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
38
39/*
40 * Make sure to add features to the correct mask
41 */
42#define DISABLED_MASK0 (DISABLE_VME)
43#define DISABLED_MASK1 0
44#define DISABLED_MASK2 0
45#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
46#define DISABLED_MASK4 0
47#define DISABLED_MASK5 0
48#define DISABLED_MASK6 0
49#define DISABLED_MASK7 0
50#define DISABLED_MASK8 0
51#define DISABLED_MASK9 (DISABLE_MPX)
52#define DISABLED_MASK10 0
53#define DISABLED_MASK11 0
54#define DISABLED_MASK12 0
55#define DISABLED_MASK13 0
56#define DISABLED_MASK14 0
57#define DISABLED_MASK15 0
58#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
59
60#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
new file mode 100644
index 000000000000..4916144e3c42
--- /dev/null
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -0,0 +1,103 @@
1#ifndef _ASM_X86_REQUIRED_FEATURES_H
2#define _ASM_X86_REQUIRED_FEATURES_H
3
4/* Define minimum CPUID feature set for kernel These bits are checked
5 really early to actually display a visible error message before the
6 kernel dies. Make sure to assign features to the proper mask!
7
8 Some requirements that are not in CPUID yet are also in the
9 CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
10
11 The real information is in arch/x86/Kconfig.cpu, this just converts
12 the CONFIGs into a bitmask */
13
14#ifndef CONFIG_MATH_EMULATION
15# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
16#else
17# define NEED_FPU 0
18#endif
19
20#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
21# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
22#else
23# define NEED_PAE 0
24#endif
25
26#ifdef CONFIG_X86_CMPXCHG64
27# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
28#else
29# define NEED_CX8 0
30#endif
31
32#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
33# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
34#else
35# define NEED_CMOV 0
36#endif
37
38#ifdef CONFIG_X86_USE_3DNOW
39# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31))
40#else
41# define NEED_3DNOW 0
42#endif
43
44#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
45# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
46#else
47# define NEED_NOPL 0
48#endif
49
50#ifdef CONFIG_MATOM
51# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
52#else
53# define NEED_MOVBE 0
54#endif
55
56#ifdef CONFIG_X86_64
57#ifdef CONFIG_PARAVIRT
58/* Paravirtualized systems may not have PSE or PGE available */
59#define NEED_PSE 0
60#define NEED_PGE 0
61#else
62#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
63#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
64#endif
65#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
66#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
67#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
68#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
69#define NEED_LM (1<<(X86_FEATURE_LM & 31))
70#else
71#define NEED_PSE 0
72#define NEED_MSR 0
73#define NEED_PGE 0
74#define NEED_FXSR 0
75#define NEED_XMM 0
76#define NEED_XMM2 0
77#define NEED_LM 0
78#endif
79
80#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
81 NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
82 NEED_XMM|NEED_XMM2)
83#define SSE_MASK (NEED_XMM|NEED_XMM2)
84
85#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
86
87#define REQUIRED_MASK2 0
88#define REQUIRED_MASK3 (NEED_NOPL)
89#define REQUIRED_MASK4 (NEED_MOVBE)
90#define REQUIRED_MASK5 0
91#define REQUIRED_MASK6 0
92#define REQUIRED_MASK7 0
93#define REQUIRED_MASK8 0
94#define REQUIRED_MASK9 0
95#define REQUIRED_MASK10 0
96#define REQUIRED_MASK11 0
97#define REQUIRED_MASK12 0
98#define REQUIRED_MASK13 0
99#define REQUIRED_MASK14 0
100#define REQUIRED_MASK15 0
101#define REQUIRED_MASK16 0
102
103#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
new file mode 100644
index 000000000000..2ec0b0abbfaa
--- /dev/null
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -0,0 +1,297 @@
1/* Copyright 2002 Andi Kleen */
2
3#include <linux/linkage.h>
4#include <asm/errno.h>
5#include <asm/cpufeatures.h>
6#include <asm/alternative-asm.h>
7
8/*
9 * We build a jump to memcpy_orig by default which gets NOPped out on
10 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
11 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
12 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
13 */
14
15.weak memcpy
16
17/*
18 * memcpy - Copy a memory block.
19 *
20 * Input:
21 * rdi destination
22 * rsi source
23 * rdx count
24 *
25 * Output:
26 * rax original destination
27 */
28ENTRY(__memcpy)
29ENTRY(memcpy)
30 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
31 "jmp memcpy_erms", X86_FEATURE_ERMS
32
33 movq %rdi, %rax
34 movq %rdx, %rcx
35 shrq $3, %rcx
36 andl $7, %edx
37 rep movsq
38 movl %edx, %ecx
39 rep movsb
40 ret
41ENDPROC(memcpy)
42ENDPROC(__memcpy)
43
44/*
45 * memcpy_erms() - enhanced fast string memcpy. This is faster and
46 * simpler than memcpy. Use memcpy_erms when possible.
47 */
48ENTRY(memcpy_erms)
49 movq %rdi, %rax
50 movq %rdx, %rcx
51 rep movsb
52 ret
53ENDPROC(memcpy_erms)
54
55ENTRY(memcpy_orig)
56 movq %rdi, %rax
57
58 cmpq $0x20, %rdx
59 jb .Lhandle_tail
60
61 /*
62 * We check whether memory false dependence could occur,
63 * then jump to corresponding copy mode.
64 */
65 cmp %dil, %sil
66 jl .Lcopy_backward
67 subq $0x20, %rdx
68.Lcopy_forward_loop:
69 subq $0x20, %rdx
70
71 /*
72 * Move in blocks of 4x8 bytes:
73 */
74 movq 0*8(%rsi), %r8
75 movq 1*8(%rsi), %r9
76 movq 2*8(%rsi), %r10
77 movq 3*8(%rsi), %r11
78 leaq 4*8(%rsi), %rsi
79
80 movq %r8, 0*8(%rdi)
81 movq %r9, 1*8(%rdi)
82 movq %r10, 2*8(%rdi)
83 movq %r11, 3*8(%rdi)
84 leaq 4*8(%rdi), %rdi
85 jae .Lcopy_forward_loop
86 addl $0x20, %edx
87 jmp .Lhandle_tail
88
89.Lcopy_backward:
90 /*
91 * Calculate copy position to tail.
92 */
93 addq %rdx, %rsi
94 addq %rdx, %rdi
95 subq $0x20, %rdx
96 /*
97 * At most 3 ALU operations in one cycle,
98 * so append NOPS in the same 16 bytes trunk.
99 */
100 .p2align 4
101.Lcopy_backward_loop:
102 subq $0x20, %rdx
103 movq -1*8(%rsi), %r8
104 movq -2*8(%rsi), %r9
105 movq -3*8(%rsi), %r10
106 movq -4*8(%rsi), %r11
107 leaq -4*8(%rsi), %rsi
108 movq %r8, -1*8(%rdi)
109 movq %r9, -2*8(%rdi)
110 movq %r10, -3*8(%rdi)
111 movq %r11, -4*8(%rdi)
112 leaq -4*8(%rdi), %rdi
113 jae .Lcopy_backward_loop
114
115 /*
116 * Calculate copy position to head.
117 */
118 addl $0x20, %edx
119 subq %rdx, %rsi
120 subq %rdx, %rdi
121.Lhandle_tail:
122 cmpl $16, %edx
123 jb .Lless_16bytes
124
125 /*
126 * Move data from 16 bytes to 31 bytes.
127 */
128 movq 0*8(%rsi), %r8
129 movq 1*8(%rsi), %r9
130 movq -2*8(%rsi, %rdx), %r10
131 movq -1*8(%rsi, %rdx), %r11
132 movq %r8, 0*8(%rdi)
133 movq %r9, 1*8(%rdi)
134 movq %r10, -2*8(%rdi, %rdx)
135 movq %r11, -1*8(%rdi, %rdx)
136 retq
137 .p2align 4
138.Lless_16bytes:
139 cmpl $8, %edx
140 jb .Lless_8bytes
141 /*
142 * Move data from 8 bytes to 15 bytes.
143 */
144 movq 0*8(%rsi), %r8
145 movq -1*8(%rsi, %rdx), %r9
146 movq %r8, 0*8(%rdi)
147 movq %r9, -1*8(%rdi, %rdx)
148 retq
149 .p2align 4
150.Lless_8bytes:
151 cmpl $4, %edx
152 jb .Lless_3bytes
153
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 movl (%rsi), %ecx
158 movl -4(%rsi, %rdx), %r8d
159 movl %ecx, (%rdi)
160 movl %r8d, -4(%rdi, %rdx)
161 retq
162 .p2align 4
163.Lless_3bytes:
164 subl $1, %edx
165 jb .Lend
166 /*
167 * Move data from 1 bytes to 3 bytes.
168 */
169 movzbl (%rsi), %ecx
170 jz .Lstore_1byte
171 movzbq 1(%rsi), %r8
172 movzbq (%rsi, %rdx), %r9
173 movb %r8b, 1(%rdi)
174 movb %r9b, (%rdi, %rdx)
175.Lstore_1byte:
176 movb %cl, (%rdi)
177
178.Lend:
179 retq
180ENDPROC(memcpy_orig)
181
182#ifndef CONFIG_UML
183/*
184 * memcpy_mcsafe - memory copy with machine check exception handling
185 * Note that we only catch machine checks when reading the source addresses.
186 * Writes to target are posted and don't generate machine checks.
187 */
188ENTRY(memcpy_mcsafe)
189 cmpl $8, %edx
190 /* Less than 8 bytes? Go to byte copy loop */
191 jb .L_no_whole_words
192
193 /* Check for bad alignment of source */
194 testl $7, %esi
195 /* Already aligned */
196 jz .L_8byte_aligned
197
198 /* Copy one byte at a time until source is 8-byte aligned */
199 movl %esi, %ecx
200 andl $7, %ecx
201 subl $8, %ecx
202 negl %ecx
203 subl %ecx, %edx
204.L_copy_leading_bytes:
205 movb (%rsi), %al
206 movb %al, (%rdi)
207 incq %rsi
208 incq %rdi
209 decl %ecx
210 jnz .L_copy_leading_bytes
211
212.L_8byte_aligned:
213 /* Figure out how many whole cache lines (64-bytes) to copy */
214 movl %edx, %ecx
215 andl $63, %edx
216 shrl $6, %ecx
217 jz .L_no_whole_cache_lines
218
219 /* Loop copying whole cache lines */
220.L_cache_w0: movq (%rsi), %r8
221.L_cache_w1: movq 1*8(%rsi), %r9
222.L_cache_w2: movq 2*8(%rsi), %r10
223.L_cache_w3: movq 3*8(%rsi), %r11
224 movq %r8, (%rdi)
225 movq %r9, 1*8(%rdi)
226 movq %r10, 2*8(%rdi)
227 movq %r11, 3*8(%rdi)
228.L_cache_w4: movq 4*8(%rsi), %r8
229.L_cache_w5: movq 5*8(%rsi), %r9
230.L_cache_w6: movq 6*8(%rsi), %r10
231.L_cache_w7: movq 7*8(%rsi), %r11
232 movq %r8, 4*8(%rdi)
233 movq %r9, 5*8(%rdi)
234 movq %r10, 6*8(%rdi)
235 movq %r11, 7*8(%rdi)
236 leaq 64(%rsi), %rsi
237 leaq 64(%rdi), %rdi
238 decl %ecx
239 jnz .L_cache_w0
240
241 /* Are there any trailing 8-byte words? */
242.L_no_whole_cache_lines:
243 movl %edx, %ecx
244 andl $7, %edx
245 shrl $3, %ecx
246 jz .L_no_whole_words
247
248 /* Copy trailing words */
249.L_copy_trailing_words:
250 movq (%rsi), %r8
251 mov %r8, (%rdi)
252 leaq 8(%rsi), %rsi
253 leaq 8(%rdi), %rdi
254 decl %ecx
255 jnz .L_copy_trailing_words
256
257 /* Any trailing bytes? */
258.L_no_whole_words:
259 andl %edx, %edx
260 jz .L_done_memcpy_trap
261
262 /* Copy trailing bytes */
263 movl %edx, %ecx
264.L_copy_trailing_bytes:
265 movb (%rsi), %al
266 movb %al, (%rdi)
267 incq %rsi
268 incq %rdi
269 decl %ecx
270 jnz .L_copy_trailing_bytes
271
272 /* Copy successful. Return zero */
273.L_done_memcpy_trap:
274 xorq %rax, %rax
275 ret
276ENDPROC(memcpy_mcsafe)
277
278 .section .fixup, "ax"
279 /* Return -EFAULT for any failure */
280.L_memcpy_mcsafe_fail:
281 mov $-EFAULT, %rax
282 ret
283
284 .previous
285
286 _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
287 _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
288 _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
289 _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
290 _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
291 _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
292 _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
293 _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
294 _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
295 _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
296 _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
297#endif
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
new file mode 100644
index 000000000000..e1229ecd2a82
--- /dev/null
+++ b/tools/arch/x86/lib/memset_64.S
@@ -0,0 +1,138 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs */
2
3#include <linux/linkage.h>
4#include <asm/cpufeatures.h>
5#include <asm/alternative-asm.h>
6
7.weak memset
8
9/*
10 * ISO C memset - set a memory block to a byte value. This function uses fast
11 * string to get better performance than the original function. The code is
12 * simpler and shorter than the original function as well.
13 *
14 * rdi destination
15 * rsi value (char)
16 * rdx count (bytes)
17 *
18 * rax original destination
19 */
20ENTRY(memset)
21ENTRY(__memset)
22 /*
23 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
24 * to use it when possible. If not available, use fast string instructions.
25 *
26 * Otherwise, use original memset function.
27 */
28 ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
29 "jmp memset_erms", X86_FEATURE_ERMS
30
31 movq %rdi,%r9
32 movq %rdx,%rcx
33 andl $7,%edx
34 shrq $3,%rcx
35 /* expand byte value */
36 movzbl %sil,%esi
37 movabs $0x0101010101010101,%rax
38 imulq %rsi,%rax
39 rep stosq
40 movl %edx,%ecx
41 rep stosb
42 movq %r9,%rax
43 ret
44ENDPROC(memset)
45ENDPROC(__memset)
46
47/*
48 * ISO C memset - set a memory block to a byte value. This function uses
49 * enhanced rep stosb to override the fast string function.
50 * The code is simpler and shorter than the fast string function as well.
51 *
52 * rdi destination
53 * rsi value (char)
54 * rdx count (bytes)
55 *
56 * rax original destination
57 */
58ENTRY(memset_erms)
59 movq %rdi,%r9
60 movb %sil,%al
61 movq %rdx,%rcx
62 rep stosb
63 movq %r9,%rax
64 ret
65ENDPROC(memset_erms)
66
67ENTRY(memset_orig)
68 movq %rdi,%r10
69
70 /* expand byte value */
71 movzbl %sil,%ecx
72 movabs $0x0101010101010101,%rax
73 imulq %rcx,%rax
74
75 /* align dst */
76 movl %edi,%r9d
77 andl $7,%r9d
78 jnz .Lbad_alignment
79.Lafter_bad_alignment:
80
81 movq %rdx,%rcx
82 shrq $6,%rcx
83 jz .Lhandle_tail
84
85 .p2align 4
86.Lloop_64:
87 decq %rcx
88 movq %rax,(%rdi)
89 movq %rax,8(%rdi)
90 movq %rax,16(%rdi)
91 movq %rax,24(%rdi)
92 movq %rax,32(%rdi)
93 movq %rax,40(%rdi)
94 movq %rax,48(%rdi)
95 movq %rax,56(%rdi)
96 leaq 64(%rdi),%rdi
97 jnz .Lloop_64
98
99 /* Handle tail in loops. The loops should be faster than hard
100 to predict jump tables. */
101 .p2align 4
102.Lhandle_tail:
103 movl %edx,%ecx
104 andl $63&(~7),%ecx
105 jz .Lhandle_7
106 shrl $3,%ecx
107 .p2align 4
108.Lloop_8:
109 decl %ecx
110 movq %rax,(%rdi)
111 leaq 8(%rdi),%rdi
112 jnz .Lloop_8
113
114.Lhandle_7:
115 andl $7,%edx
116 jz .Lende
117 .p2align 4
118.Lloop_1:
119 decl %edx
120 movb %al,(%rdi)
121 leaq 1(%rdi),%rdi
122 jnz .Lloop_1
123
124.Lende:
125 movq %r10,%rax
126 ret
127
128.Lbad_alignment:
129 cmpq $7,%rdx
130 jbe .Lhandle_7
131 movq %rax,(%rdi) /* unaligned store */
132 movq $8,%r8
133 subq %r9,%r8
134 addq %r8,%rdi
135 subq %r8,%rdx
136 jmp .Lafter_bad_alignment
137.Lfinal:
138ENDPROC(memset_orig)
diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h
index 3a3a0f16456a..2a4d1bfa2988 100644
--- a/tools/perf/util/include/asm/alternative-asm.h
+++ b/tools/include/asm/alternative-asm.h
@@ -1,5 +1,5 @@
1#ifndef _PERF_ASM_ALTERNATIVE_ASM_H 1#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
2#define _PERF_ASM_ALTERNATIVE_ASM_H 2#define _TOOLS_ASM_ALTERNATIVE_ASM_H
3 3
4/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ 4/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
5 5
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 0b1ebf3c08f6..cf85d1cd1c91 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -12,6 +12,11 @@ tools/arch/sparc/include/asm/barrier_32.h
12tools/arch/sparc/include/asm/barrier_64.h 12tools/arch/sparc/include/asm/barrier_64.h
13tools/arch/tile/include/asm/barrier.h 13tools/arch/tile/include/asm/barrier.h
14tools/arch/x86/include/asm/barrier.h 14tools/arch/x86/include/asm/barrier.h
15tools/arch/x86/include/asm/cpufeatures.h
16tools/arch/x86/include/asm/disabled-features.h
17tools/arch/x86/include/asm/required-features.h
18tools/arch/x86/lib/memcpy_64.S
19tools/arch/x86/lib/memset_64.S
15tools/arch/xtensa/include/asm/barrier.h 20tools/arch/xtensa/include/asm/barrier.h
16tools/scripts 21tools/scripts
17tools/build 22tools/build
@@ -31,6 +36,7 @@ tools/lib/find_bit.c
31tools/lib/bitmap.c 36tools/lib/bitmap.c
32tools/lib/str_error_r.c 37tools/lib/str_error_r.c
33tools/lib/vsprintf.c 38tools/lib/vsprintf.c
39tools/include/asm/alternative-asm.h
34tools/include/asm/atomic.h 40tools/include/asm/atomic.h
35tools/include/asm/barrier.h 41tools/include/asm/barrier.h
36tools/include/asm/bug.h 42tools/include/asm/bug.h
@@ -74,9 +80,6 @@ include/linux/swab.h
74arch/*/include/asm/unistd*.h 80arch/*/include/asm/unistd*.h
75arch/*/include/uapi/asm/unistd*.h 81arch/*/include/uapi/asm/unistd*.h
76arch/*/include/uapi/asm/perf_regs.h 82arch/*/include/uapi/asm/perf_regs.h
77arch/*/lib/memcpy*.S
78arch/*/lib/memset*.S
79arch/*/include/asm/*features.h
80include/linux/poison.h 83include/linux/poison.h
81include/linux/hw_breakpoint.h 84include/linux/hw_breakpoint.h
82include/uapi/linux/bpf.h 85include/uapi/linux/bpf.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5e5f8cb1dd83..809735c6cb26 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -348,6 +348,21 @@ $(PERF_IN): prepare FORCE
348 @(test -f ../../include/uapi/linux/perf_event.h && ( \ 348 @(test -f ../../include/uapi/linux/perf_event.h && ( \
349 (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ 349 (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \
350 || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true 350 || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true
351 @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \
352 (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \
353 || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true
354 @(test -f ../../arch/x86/include/asm/required-features.h && ( \
355 (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \
356 || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true
357 @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \
358 (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \
359 || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true
360 @(test -f ../../arch/x86/lib/memcpy_64.S && ( \
361 (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \
362 || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true
363 @(test -f ../../arch/x86/lib/memset_64.S && ( \
364 (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \
365 || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true
351 $(Q)$(MAKE) $(build)=perf 366 $(Q)$(MAKE) $(build)=perf
352 367
353$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) 368$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 5c3cce082cb8..f700369bb0f6 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -6,7 +6,7 @@
6#define globl p2align 4; .globl 6#define globl p2align 4; .globl
7#define _ASM_EXTABLE_FAULT(x, y) 7#define _ASM_EXTABLE_FAULT(x, y)
8 8
9#include "../../../arch/x86/lib/memcpy_64.S" 9#include "../../arch/x86/lib/memcpy_64.S"
10/* 10/*
11 * We need to provide note.GNU-stack section, saying that we want 11 * We need to provide note.GNU-stack section, saying that we want
12 * NOT executable stack. Otherwise the final linking will assume that 12 * NOT executable stack. Otherwise the final linking will assume that
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index de278784c866..58407aa24c1b 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,7 +1,7 @@
1#define memset MEMSET /* don't hide glibc's memset() */ 1#define memset MEMSET /* don't hide glibc's memset() */
2#define altinstr_replacement text 2#define altinstr_replacement text
3#define globl p2align 4; .globl 3#define globl p2align 4; .globl
4#include "../../../arch/x86/lib/memset_64.S" 4#include "../../arch/x86/lib/memset_64.S"
5 5
6/* 6/*
7 * We need to provide note.GNU-stack section, saying that we want 7 * We need to provide note.GNU-stack section, saying that we want