aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-04 00:33:49 -0500
committerIngo Molnar <mingo@kernel.org>2015-03-04 00:36:15 -0500
commitf8e92fb4b0ffc4d62279ab39f34e798e37e90b0b (patch)
tree9caa8df664792e64ddcb4ea03fd418a8a529c82e /tools
parentd2c032e3dc58137a7261a7824d3acce435db1d66 (diff)
parentdfecb95cdfeaf7872d83a96bec3a606e9cd95c8d (diff)
Merge tag 'alternatives_padding' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp into x86/asm
Pull alternative instructions framework improvements from Borislav Petkov: "A more involved rework of the alternatives framework to be able to pad instructions and thus make using the alternatives macros more straightforward and without having to figure out old and new instruction sizes but have the toolchain figure that out for us. Furthermore, it optimizes JMPs used so that fetch and decode can be relieved with smaller versions of the JMPs, where possible. Some stats: x86_64 defconfig: Alternatives sites total: 2478 Total padding added (in Bytes): 6051 The padding is currently done for: X86_FEATURE_ALWAYS X86_FEATURE_ERMS X86_FEATURE_LFENCE_RDTSC X86_FEATURE_MFENCE_RDTSC X86_FEATURE_SMAP This is with the latest version of the patchset. Of course, on each machine the alternatives sites actually being patched are a proper subset of the total number." Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h6
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memcpy.c128
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h6
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S2
-rw-r--r--tools/perf/util/include/asm/alternative-asm.h1
6 files changed, 73 insertions, 72 deletions
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d66ab799b35f..8c0c1a2770c8 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -1,12 +1,12 @@
1 1
2MEMCPY_FN(__memcpy, 2MEMCPY_FN(memcpy_orig,
3 "x86-64-unrolled", 3 "x86-64-unrolled",
4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S") 4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
5 5
6MEMCPY_FN(memcpy_c, 6MEMCPY_FN(__memcpy,
7 "x86-64-movsq", 7 "x86-64-movsq",
8 "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") 8 "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
9 9
10MEMCPY_FN(memcpy_c_e, 10MEMCPY_FN(memcpy_erms,
11 "x86-64-movsb", 11 "x86-64-movsb",
12 "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") 12 "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index fcd9cf00600a..e4c2c30143b9 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,8 +1,6 @@
1#define memcpy MEMCPY /* don't hide glibc's memcpy() */ 1#define memcpy MEMCPY /* don't hide glibc's memcpy() */
2#define altinstr_replacement text 2#define altinstr_replacement text
3#define globl p2align 4; .globl 3#define globl p2align 4; .globl
4#define Lmemcpy_c globl memcpy_c; memcpy_c
5#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
6#include "../../../arch/x86/lib/memcpy_64.S" 4#include "../../../arch/x86/lib/memcpy_64.S"
7/* 5/*
8 * We need to provide note.GNU-stack section, saying that we want 6 * We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db1d3a29d97f..d3dfb7936dcd 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -36,7 +36,7 @@ static const struct option options[] = {
36 "Specify length of memory to copy. " 36 "Specify length of memory to copy. "
37 "Available units: B, KB, MB, GB and TB (upper and lower)"), 37 "Available units: B, KB, MB, GB and TB (upper and lower)"),
38 OPT_STRING('r', "routine", &routine, "default", 38 OPT_STRING('r', "routine", &routine, "default",
39 "Specify routine to copy"), 39 "Specify routine to copy, \"all\" runs all available routines"),
40 OPT_INTEGER('i', "iterations", &iterations, 40 OPT_INTEGER('i', "iterations", &iterations,
41 "repeat memcpy() invocation this number of times"), 41 "repeat memcpy() invocation this number of times"),
42 OPT_BOOLEAN('c', "cycle", &use_cycle, 42 OPT_BOOLEAN('c', "cycle", &use_cycle,
@@ -135,55 +135,16 @@ struct bench_mem_info {
135 const char *const *usage; 135 const char *const *usage;
136}; 136};
137 137
138static int bench_mem_common(int argc, const char **argv, 138static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
139 const char *prefix __maybe_unused,
140 struct bench_mem_info *info)
141{ 139{
142 int i; 140 const struct routine *r = &info->routines[r_idx];
143 size_t len;
144 double totallen;
145 double result_bps[2]; 141 double result_bps[2];
146 u64 result_cycle[2]; 142 u64 result_cycle[2];
147 143
148 argc = parse_options(argc, argv, options,
149 info->usage, 0);
150
151 if (no_prefault && only_prefault) {
152 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
153 return 1;
154 }
155
156 if (use_cycle)
157 init_cycle();
158
159 len = (size_t)perf_atoll((char *)length_str);
160 totallen = (double)len * iterations;
161
162 result_cycle[0] = result_cycle[1] = 0ULL; 144 result_cycle[0] = result_cycle[1] = 0ULL;
163 result_bps[0] = result_bps[1] = 0.0; 145 result_bps[0] = result_bps[1] = 0.0;
164 146
165 if ((s64)len <= 0) { 147 printf("Routine %s (%s)\n", r->name, r->desc);
166 fprintf(stderr, "Invalid length:%s\n", length_str);
167 return 1;
168 }
169
170 /* same to without specifying either of prefault and no-prefault */
171 if (only_prefault && no_prefault)
172 only_prefault = no_prefault = false;
173
174 for (i = 0; info->routines[i].name; i++) {
175 if (!strcmp(info->routines[i].name, routine))
176 break;
177 }
178 if (!info->routines[i].name) {
179 printf("Unknown routine:%s\n", routine);
180 printf("Available routines...\n");
181 for (i = 0; info->routines[i].name; i++) {
182 printf("\t%s ... %s\n",
183 info->routines[i].name, info->routines[i].desc);
184 }
185 return 1;
186 }
187 148
188 if (bench_format == BENCH_FORMAT_DEFAULT) 149 if (bench_format == BENCH_FORMAT_DEFAULT)
189 printf("# Copying %s Bytes ...\n\n", length_str); 150 printf("# Copying %s Bytes ...\n\n", length_str);
@@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv,
191 if (!only_prefault && !no_prefault) { 152 if (!only_prefault && !no_prefault) {
192 /* show both of results */ 153 /* show both of results */
193 if (use_cycle) { 154 if (use_cycle) {
194 result_cycle[0] = 155 result_cycle[0] = info->do_cycle(r, len, false);
195 info->do_cycle(&info->routines[i], len, false); 156 result_cycle[1] = info->do_cycle(r, len, true);
196 result_cycle[1] =
197 info->do_cycle(&info->routines[i], len, true);
198 } else { 157 } else {
199 result_bps[0] = 158 result_bps[0] = info->do_gettimeofday(r, len, false);
200 info->do_gettimeofday(&info->routines[i], 159 result_bps[1] = info->do_gettimeofday(r, len, true);
201 len, false);
202 result_bps[1] =
203 info->do_gettimeofday(&info->routines[i],
204 len, true);
205 } 160 }
206 } else { 161 } else {
207 if (use_cycle) { 162 if (use_cycle)
208 result_cycle[pf] = 163 result_cycle[pf] = info->do_cycle(r, len, only_prefault);
209 info->do_cycle(&info->routines[i], 164 else
210 len, only_prefault); 165 result_bps[pf] = info->do_gettimeofday(r, len, only_prefault);
211 } else {
212 result_bps[pf] =
213 info->do_gettimeofday(&info->routines[i],
214 len, only_prefault);
215 }
216 } 166 }
217 167
218 switch (bench_format) { 168 switch (bench_format) {
@@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv,
265 die("unknown format: %d\n", bench_format); 215 die("unknown format: %d\n", bench_format);
266 break; 216 break;
267 } 217 }
218}
219
220static int bench_mem_common(int argc, const char **argv,
221 const char *prefix __maybe_unused,
222 struct bench_mem_info *info)
223{
224 int i;
225 size_t len;
226 double totallen;
227
228 argc = parse_options(argc, argv, options,
229 info->usage, 0);
230
231 if (no_prefault && only_prefault) {
232 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
233 return 1;
234 }
235
236 if (use_cycle)
237 init_cycle();
238
239 len = (size_t)perf_atoll((char *)length_str);
240 totallen = (double)len * iterations;
241
242 if ((s64)len <= 0) {
243 fprintf(stderr, "Invalid length:%s\n", length_str);
244 return 1;
245 }
246
247 /* same to without specifying either of prefault and no-prefault */
248 if (only_prefault && no_prefault)
249 only_prefault = no_prefault = false;
250
251 if (!strncmp(routine, "all", 3)) {
252 for (i = 0; info->routines[i].name; i++)
253 __bench_mem_routine(info, i, len, totallen);
254 return 0;
255 }
256
257 for (i = 0; info->routines[i].name; i++) {
258 if (!strcmp(info->routines[i].name, routine))
259 break;
260 }
261 if (!info->routines[i].name) {
262 printf("Unknown routine:%s\n", routine);
263 printf("Available routines...\n");
264 for (i = 0; info->routines[i].name; i++) {
265 printf("\t%s ... %s\n",
266 info->routines[i].name, info->routines[i].desc);
267 }
268 return 1;
269 }
270
271 __bench_mem_routine(info, i, len, totallen);
268 272
269 return 0; 273 return 0;
270} 274}
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index a71dff97c1f5..f02d028771d9 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -1,12 +1,12 @@
1 1
2MEMSET_FN(__memset, 2MEMSET_FN(memset_orig,
3 "x86-64-unrolled", 3 "x86-64-unrolled",
4 "unrolled memset() in arch/x86/lib/memset_64.S") 4 "unrolled memset() in arch/x86/lib/memset_64.S")
5 5
6MEMSET_FN(memset_c, 6MEMSET_FN(__memset,
7 "x86-64-stosq", 7 "x86-64-stosq",
8 "movsq-based memset() in arch/x86/lib/memset_64.S") 8 "movsq-based memset() in arch/x86/lib/memset_64.S")
9 9
10MEMSET_FN(memset_c_e, 10MEMSET_FN(memset_erms,
11 "x86-64-stosb", 11 "x86-64-stosb",
12 "movsb-based memset() in arch/x86/lib/memset_64.S") 12 "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index 9e5af89ed13a..de278784c866 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,8 +1,6 @@
1#define memset MEMSET /* don't hide glibc's memset() */ 1#define memset MEMSET /* don't hide glibc's memset() */
2#define altinstr_replacement text 2#define altinstr_replacement text
3#define globl p2align 4; .globl 3#define globl p2align 4; .globl
4#define Lmemset_c globl memset_c; memset_c
5#define Lmemset_c_e globl memset_c_e; memset_c_e
6#include "../../../arch/x86/lib/memset_64.S" 4#include "../../../arch/x86/lib/memset_64.S"
7 5
8/* 6/*
diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h
index 6789d788d494..3a3a0f16456a 100644
--- a/tools/perf/util/include/asm/alternative-asm.h
+++ b/tools/perf/util/include/asm/alternative-asm.h
@@ -4,5 +4,6 @@
4/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ 4/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
5 5
6#define altinstruction_entry # 6#define altinstruction_entry #
7#define ALTERNATIVE_2 #
7 8
8#endif 9#endif