aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/bench
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-10-19 04:04:21 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2015-10-19 15:03:31 -0400
commit6db175c7333e22ee818373cbea067e3eaa0236f7 (patch)
treefbbe89782cc7b6baa3e838060f5fed41827c6f25 /tools/perf/bench
parent9b2fa7f3e7799a335fd839906ab4d45b7d595dc4 (diff)
perf bench: Remove the prefaulting complication from 'perf bench mem mem*'
So 'perf bench mem memcpy/memset' has elaborate code to measure memcpy()/memset() performance both with freshly allocated buffers (which includes initial page fault overhead) and with preallocated buffers. But the thing is, the resulting bandwidth results are mostly meaningless, because page faults dominate so much of the cost. It might make sense to measure cache cold vs. cache hot performance, but the code does not do this. So remove this complication, and always prefault the ranges before using them. Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: David Ahern <dsahern@gmail.com> Cc: Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1445241870-24854-6-git-send-email-mingo@kernel.org [ Remove --no-prefault, --only-prefault from docs, noticed by David Ahern ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/bench')
-rw-r--r--tools/perf/bench/mem-functions.c146
1 files changed, 50 insertions, 96 deletions
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 7acb9b83382c..9c18a4b976b6 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -28,8 +28,6 @@ static const char *routine = "all";
28static int iterations = 1; 28static int iterations = 1;
29static bool use_cycle; 29static bool use_cycle;
30static int cycle_fd; 30static int cycle_fd;
31static bool only_prefault;
32static bool no_prefault;
33 31
34static const struct option options[] = { 32static const struct option options[] = {
35 OPT_STRING('l', "length", &length_str, "1MB", 33 OPT_STRING('l', "length", &length_str, "1MB",
@@ -41,10 +39,6 @@ static const struct option options[] = {
41 "repeat memcpy() invocation this number of times"), 39 "repeat memcpy() invocation this number of times"),
42 OPT_BOOLEAN('c', "cycle", &use_cycle, 40 OPT_BOOLEAN('c', "cycle", &use_cycle,
43 "Use cycles event instead of gettimeofday() for measuring"), 41 "Use cycles event instead of gettimeofday() for measuring"),
44 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
45 "Show only the result with page faults before memcpy()"),
46 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
47 "Show only the result without page faults before memcpy()"),
48 OPT_END() 42 OPT_END()
49}; 43};
50 44
@@ -110,103 +104,60 @@ static double timeval2double(struct timeval *ts)
110 return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000; 104 return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
111} 105}
112 106
113#define print_bps(x) do { \ 107#define print_bps(x) do { \
114 if (x < K) \ 108 if (x < K) \
115 printf(" %14lf B/Sec", x); \ 109 printf(" %14lf B/Sec\n", x); \
116 else if (x < K * K) \ 110 else if (x < K * K) \
117 printf(" %14lfd KB/Sec", x / K); \ 111 printf(" %14lfd KB/Sec\n", x / K); \
118 else if (x < K * K * K) \ 112 else if (x < K * K * K) \
119 printf(" %14lf MB/Sec", x / K / K); \ 113 printf(" %14lf MB/Sec\n", x / K / K); \
120 else \ 114 else \
121 printf(" %14lf GB/Sec", x / K / K / K); \ 115 printf(" %14lf GB/Sec\n", x / K / K / K); \
122 } while (0) 116 } while (0)
123 117
124struct bench_mem_info { 118struct bench_mem_info {
125 const struct routine *routines; 119 const struct routine *routines;
126 u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault); 120 u64 (*do_cycle)(const struct routine *r, size_t len);
127 double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault); 121 double (*do_gettimeofday)(const struct routine *r, size_t len);
128 const char *const *usage; 122 const char *const *usage;
129}; 123};
130 124
131static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) 125static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
132{ 126{
133 const struct routine *r = &info->routines[r_idx]; 127 const struct routine *r = &info->routines[r_idx];
134 double result_bps[2]; 128 double result_bps = 0.0;
135 u64 result_cycle[2]; 129 u64 result_cycle = 0;
136 int prefault = no_prefault ? 0 : 1;
137
138 result_cycle[0] = result_cycle[1] = 0ULL;
139 result_bps[0] = result_bps[1] = 0.0;
140 130
141 printf("Routine %s (%s)\n", r->name, r->desc); 131 printf("Routine %s (%s)\n", r->name, r->desc);
142 132
143 if (bench_format == BENCH_FORMAT_DEFAULT) 133 if (bench_format == BENCH_FORMAT_DEFAULT)
144 printf("# Copying %s Bytes ...\n\n", length_str); 134 printf("# Copying %s Bytes ...\n\n", length_str);
145 135
146 if (!only_prefault && prefault) { 136 if (use_cycle) {
147 /* Show both results: */ 137 result_cycle = info->do_cycle(r, len);
148 if (use_cycle) {
149 result_cycle[0] = info->do_cycle(r, len, false);
150 result_cycle[1] = info->do_cycle(r, len, true);
151 } else {
152 result_bps[0] = info->do_gettimeofday(r, len, false);
153 result_bps[1] = info->do_gettimeofday(r, len, true);
154 }
155 } else { 138 } else {
156 if (use_cycle) 139 result_bps = info->do_gettimeofday(r, len);
157 result_cycle[prefault] = info->do_cycle(r, len, only_prefault);
158 else
159 result_bps[prefault] = info->do_gettimeofday(r, len, only_prefault);
160 } 140 }
161 141
162 switch (bench_format) { 142 switch (bench_format) {
163 case BENCH_FORMAT_DEFAULT: 143 case BENCH_FORMAT_DEFAULT:
164 if (!only_prefault && prefault) { 144 if (use_cycle) {
165 if (use_cycle) { 145 printf(" %14lf Cycle/Byte\n", (double)result_cycle/totallen);
166 printf(" %14lf Cycle/Byte\n",
167 (double)result_cycle[0]
168 / totallen);
169 printf(" %14lf Cycle/Byte (with prefault)\n",
170 (double)result_cycle[1]
171 / totallen);
172 } else {
173 print_bps(result_bps[0]);
174 printf("\n");
175 print_bps(result_bps[1]);
176 printf(" (with prefault)\n");
177 }
178 } else { 146 } else {
179 if (use_cycle) { 147 print_bps(result_bps);
180 printf(" %14lf Cycle/Byte",
181 (double)result_cycle[prefault]
182 / totallen);
183 } else
184 print_bps(result_bps[prefault]);
185
186 printf("%s\n", only_prefault ? " (with prefault)" : "");
187 } 148 }
188 break; 149 break;
150
189 case BENCH_FORMAT_SIMPLE: 151 case BENCH_FORMAT_SIMPLE:
190 if (!only_prefault && prefault) { 152 if (use_cycle) {
191 if (use_cycle) { 153 printf("%lf\n", (double)result_cycle/totallen);
192 printf("%lf %lf\n",
193 (double)result_cycle[0] / totallen,
194 (double)result_cycle[1] / totallen);
195 } else {
196 printf("%lf %lf\n",
197 result_bps[0], result_bps[1]);
198 }
199 } else { 154 } else {
200 if (use_cycle) { 155 printf("%lf\n", result_bps);
201 printf("%lf\n", (double)result_cycle[prefault]
202 / totallen);
203 } else
204 printf("%lf\n", result_bps[prefault]);
205 } 156 }
206 break; 157 break;
158
207 default: 159 default:
208 /* Reaching this means there's some disaster: */ 160 BUG_ON(1);
209 die("unknown format: %d\n", bench_format);
210 break; 161 break;
211 } 162 }
212} 163}
@@ -219,11 +170,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
219 170
220 argc = parse_options(argc, argv, options, info->usage, 0); 171 argc = parse_options(argc, argv, options, info->usage, 0);
221 172
222 if (no_prefault && only_prefault) {
223 fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
224 return 1;
225 }
226
227 if (use_cycle) 173 if (use_cycle)
228 init_cycle(); 174 init_cycle();
229 175
@@ -235,10 +181,6 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
235 return 1; 181 return 1;
236 } 182 }
237 183
238 /* Same as without specifying either of prefault and no-prefault: */
239 if (only_prefault && no_prefault)
240 only_prefault = no_prefault = false;
241
242 if (!strncmp(routine, "all", 3)) { 184 if (!strncmp(routine, "all", 3)) {
243 for (i = 0; info->routines[i].name; i++) 185 for (i = 0; info->routines[i].name; i++)
244 __bench_mem_routine(info, i, len, totallen); 186 __bench_mem_routine(info, i, len, totallen);
@@ -278,7 +220,7 @@ static void memcpy_alloc_mem(void **dst, void **src, size_t length)
278 memset(*src, 0, length); 220 memset(*src, 0, length);
279} 221}
280 222
281static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) 223static u64 do_memcpy_cycle(const struct routine *r, size_t len)
282{ 224{
283 u64 cycle_start = 0ULL, cycle_end = 0ULL; 225 u64 cycle_start = 0ULL, cycle_end = 0ULL;
284 void *src = NULL, *dst = NULL; 226 void *src = NULL, *dst = NULL;
@@ -287,8 +229,11 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
287 229
288 memcpy_alloc_mem(&dst, &src, len); 230 memcpy_alloc_mem(&dst, &src, len);
289 231
290 if (prefault) 232 /*
291 fn(dst, src, len); 233 * We prefault the freshly allocated memory range here,
234 * to not measure page fault overhead:
235 */
236 fn(dst, src, len);
292 237
293 cycle_start = get_cycle(); 238 cycle_start = get_cycle();
294 for (i = 0; i < iterations; ++i) 239 for (i = 0; i < iterations; ++i)
@@ -300,7 +245,7 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
300 return cycle_end - cycle_start; 245 return cycle_end - cycle_start;
301} 246}
302 247
303static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool prefault) 248static double do_memcpy_gettimeofday(const struct routine *r, size_t len)
304{ 249{
305 struct timeval tv_start, tv_end, tv_diff; 250 struct timeval tv_start, tv_end, tv_diff;
306 memcpy_t fn = r->fn.memcpy; 251 memcpy_t fn = r->fn.memcpy;
@@ -309,8 +254,11 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p
309 254
310 memcpy_alloc_mem(&dst, &src, len); 255 memcpy_alloc_mem(&dst, &src, len);
311 256
312 if (prefault) 257 /*
313 fn(dst, src, len); 258 * We prefault the freshly allocated memory range here,
259 * to not measure page fault overhead:
260 */
261 fn(dst, src, len);
314 262
315 BUG_ON(gettimeofday(&tv_start, NULL)); 263 BUG_ON(gettimeofday(&tv_start, NULL));
316 for (i = 0; i < iterations; ++i) 264 for (i = 0; i < iterations; ++i)
@@ -321,6 +269,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, bool p
321 269
322 free(src); 270 free(src);
323 free(dst); 271 free(dst);
272
324 return (double)(((double)len * iterations) / timeval2double(&tv_diff)); 273 return (double)(((double)len * iterations) / timeval2double(&tv_diff));
325} 274}
326 275
@@ -343,7 +292,7 @@ static void memset_alloc_mem(void **dst, size_t length)
343 die("memory allocation failed - maybe length is too large?\n"); 292 die("memory allocation failed - maybe length is too large?\n");
344} 293}
345 294
346static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault) 295static u64 do_memset_cycle(const struct routine *r, size_t len)
347{ 296{
348 u64 cycle_start = 0ULL, cycle_end = 0ULL; 297 u64 cycle_start = 0ULL, cycle_end = 0ULL;
349 memset_t fn = r->fn.memset; 298 memset_t fn = r->fn.memset;
@@ -352,8 +301,11 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
352 301
353 memset_alloc_mem(&dst, len); 302 memset_alloc_mem(&dst, len);
354 303
355 if (prefault) 304 /*
356 fn(dst, -1, len); 305 * We prefault the freshly allocated memory range here,
306 * to not measure page fault overhead:
307 */
308 fn(dst, -1, len);
357 309
358 cycle_start = get_cycle(); 310 cycle_start = get_cycle();
359 for (i = 0; i < iterations; ++i) 311 for (i = 0; i < iterations; ++i)
@@ -364,8 +316,7 @@ static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
364 return cycle_end - cycle_start; 316 return cycle_end - cycle_start;
365} 317}
366 318
367static double do_memset_gettimeofday(const struct routine *r, size_t len, 319static double do_memset_gettimeofday(const struct routine *r, size_t len)
368 bool prefault)
369{ 320{
370 struct timeval tv_start, tv_end, tv_diff; 321 struct timeval tv_start, tv_end, tv_diff;
371 memset_t fn = r->fn.memset; 322 memset_t fn = r->fn.memset;
@@ -374,8 +325,11 @@ static double do_memset_gettimeofday(const struct routine *r, size_t len,
374 325
375 memset_alloc_mem(&dst, len); 326 memset_alloc_mem(&dst, len);
376 327
377 if (prefault) 328 /*
378 fn(dst, -1, len); 329 * We prefault the freshly allocated memory range here,
330 * to not measure page fault overhead:
331 */
332 fn(dst, -1, len);
379 333
380 BUG_ON(gettimeofday(&tv_start, NULL)); 334 BUG_ON(gettimeofday(&tv_start, NULL));
381 for (i = 0; i < iterations; ++i) 335 for (i = 0; i < iterations; ++i)