aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390/oprofile/init.c
diff options
context:
space:
mode:
authorAndreas Krebbel <krebbel@linux.vnet.ibm.com>2011-11-25 14:03:05 -0500
committerRobert Richter <robert.richter@amd.com>2011-12-07 05:47:09 -0500
commitdd3c4670d7fafeb18bf7542fbcfd2606fb06a4a1 (patch)
tree14258ccf4645c774bd0f765c9ec957fd0c5742ce /arch/s390/oprofile/init.c
parentf8c852031a383ac260ae37df7ad063d42d0ed271 (diff)
oprofile, s390: Add event interface to the System z hardware sampling module
With this patch the OProfile Basic Mode Sampling support for System z is enhanced with a counter file system. That way hardware sampling can be configured using the user space tools with only little modifications. With the patch by default new cpu_types (s390/z10, s390/z196) are returned in order to indicate that we are running a CPU which provides the hardware sampling facility. Existing user space tools will complain about an unknown cpu type. In order to be compatible with existing user space tools the `cpu_type' module parameter has been added. Setting the parameter to `timer' will force the module to return `timer' as cpu_type. The module will still try to use hardware sampling if available and the hwsampling virtual filesystem will be also be available for configuration. So this has a different effect than using the generic oprofile module parameter `timer=1'. If the basic mode sampling is enabled on the machine and the cpu_type=timer parameter is not used the kernel module will provide the following virtual filesystem: /dev/oprofile/0/enabled /dev/oprofile/0/event /dev/oprofile/0/count /dev/oprofile/0/unit_mask /dev/oprofile/0/kernel /dev/oprofile/0/user In the counter file system only the values of 'enabled', 'count', 'kernel', and 'user' are evaluated by the kernel module. Everything else must contain fixed values. The 'event' value only supports a single event - HWSAMPLING with value 0. The 'count' value specifies the hardware sampling rate as it is passed to the CPU measurement facility. The 'kernel' and 'user' flags can now be used to filter for samples when using hardware sampling. Additionally also the following file will be created: /dev/oprofile/timer/enabled This will always be the inverted value of /dev/oprofile/0/enabled. 0 is not accepted without hardware sampling. Signed-off-by: Andreas Krebbel <krebbel@linux.vnet.ibm.com> Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch/s390/oprofile/init.c')
-rw-r--r--arch/s390/oprofile/init.c372
1 files changed, 343 insertions, 29 deletions
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 113d7cbbc065..6cf2286d0405 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -2,10 +2,11 @@
2 * arch/s390/oprofile/init.c 2 * arch/s390/oprofile/init.c
3 * 3 *
4 * S390 Version 4 * S390 Version
5 * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation 5 * Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
6 * Author(s): Thomas Spatzier (tspat@de.ibm.com) 6 * Author(s): Thomas Spatzier (tspat@de.ibm.com)
7 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) 7 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
8 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) 8 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
9 * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
9 * 10 *
10 * @remark Copyright 2002-2011 OProfile authors 11 * @remark Copyright 2002-2011 OProfile authors
11 */ 12 */
@@ -14,6 +15,8 @@
14#include <linux/init.h> 15#include <linux/init.h>
15#include <linux/errno.h> 16#include <linux/errno.h>
16#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/module.h>
19#include <asm/processor.h>
17 20
18#include "../../../drivers/oprofile/oprof.h" 21#include "../../../drivers/oprofile/oprof.h"
19 22
@@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
22#ifdef CONFIG_64BIT 25#ifdef CONFIG_64BIT
23 26
24#include "hwsampler.h" 27#include "hwsampler.h"
28#include "op_counter.h"
25 29
26#define DEFAULT_INTERVAL 4127518 30#define DEFAULT_INTERVAL 4127518
27 31
@@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval;
35static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; 39static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
36static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; 40static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
37 41
38static int hwsampler_file; 42static int hwsampler_enabled;
39static int hwsampler_running; /* start_mutex must be held to change */ 43static int hwsampler_running; /* start_mutex must be held to change */
44static int hwsampler_available;
40 45
41static struct oprofile_operations timer_ops; 46static struct oprofile_operations timer_ops;
42 47
48struct op_counter_config counter_config;
49
50enum __force_cpu_type {
51 reserved = 0, /* do not force */
52 timer,
53};
54static int force_cpu_type;
55
56static int set_cpu_type(const char *str, struct kernel_param *kp)
57{
58 if (!strcmp(str, "timer")) {
59 force_cpu_type = timer;
60 printk(KERN_INFO "oprofile: forcing timer to be returned "
61 "as cpu type\n");
62 } else {
63 force_cpu_type = 0;
64 }
65
66 return 0;
67}
68module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
69MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
70 "(report cpu_type \"timer\"");
71
43static int oprofile_hwsampler_start(void) 72static int oprofile_hwsampler_start(void)
44{ 73{
45 int retval; 74 int retval;
46 75
47 hwsampler_running = hwsampler_file; 76 hwsampler_running = hwsampler_enabled;
48 77
49 if (!hwsampler_running) 78 if (!hwsampler_running)
50 return timer_ops.start(); 79 return timer_ops.start();
@@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void)
72 return; 101 return;
73} 102}
74 103
104/*
105 * File ops used for:
106 * /dev/oprofile/0/enabled
107 * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
108 */
109
75static ssize_t hwsampler_read(struct file *file, char __user *buf, 110static ssize_t hwsampler_read(struct file *file, char __user *buf,
76 size_t count, loff_t *offset) 111 size_t count, loff_t *offset)
77{ 112{
78 return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); 113 return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
79} 114}
80 115
81static ssize_t hwsampler_write(struct file *file, char const __user *buf, 116static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -91,6 +126,9 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
91 if (retval) 126 if (retval)
92 return retval; 127 return retval;
93 128
129 if (val != 0 && val != 1)
130 return -EINVAL;
131
94 if (oprofile_started) 132 if (oprofile_started)
95 /* 133 /*
96 * save to do without locking as we set 134 * save to do without locking as we set
@@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
99 */ 137 */
100 return -EBUSY; 138 return -EBUSY;
101 139
102 hwsampler_file = val; 140 hwsampler_enabled = val;
103 141
104 return count; 142 return count;
105} 143}
@@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = {
109 .write = hwsampler_write, 147 .write = hwsampler_write,
110}; 148};
111 149
150/*
151 * File ops used for:
152 * /dev/oprofile/0/count
153 * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
154 *
155 * Make sure that the value is within the hardware range.
156 */
157
158static ssize_t hw_interval_read(struct file *file, char __user *buf,
159 size_t count, loff_t *offset)
160{
161 return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
162 count, offset);
163}
164
165static ssize_t hw_interval_write(struct file *file, char const __user *buf,
166 size_t count, loff_t *offset)
167{
168 unsigned long val;
169 int retval;
170
171 if (*offset)
172 return -EINVAL;
173 retval = oprofilefs_ulong_from_user(&val, buf, count);
174 if (retval)
175 return retval;
176 if (val < oprofile_min_interval)
177 oprofile_hw_interval = oprofile_min_interval;
178 else if (val > oprofile_max_interval)
179 oprofile_hw_interval = oprofile_max_interval;
180 else
181 oprofile_hw_interval = val;
182
183 return count;
184}
185
186static const struct file_operations hw_interval_fops = {
187 .read = hw_interval_read,
188 .write = hw_interval_write,
189};
190
191/*
192 * File ops used for:
193 * /dev/oprofile/0/event
194 * Only a single event with number 0 is supported with this counter.
195 *
196 * /dev/oprofile/0/unit_mask
197 * This is a dummy file needed by the user space tools.
198 * No value other than 0 is accepted or returned.
199 */
200
201static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
202 size_t count, loff_t *offset)
203{
204 return oprofilefs_ulong_to_user(0, buf, count, offset);
205}
206
207static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
208 size_t count, loff_t *offset)
209{
210 unsigned long val;
211 int retval;
212
213 if (*offset)
214 return -EINVAL;
215
216 retval = oprofilefs_ulong_from_user(&val, buf, count);
217 if (retval)
218 return retval;
219 if (val != 0)
220 return -EINVAL;
221 return count;
222}
223
224static const struct file_operations zero_fops = {
225 .read = hwsampler_zero_read,
226 .write = hwsampler_zero_write,
227};
228
229/* /dev/oprofile/0/kernel file ops. */
230
231static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
232 size_t count, loff_t *offset)
233{
234 return oprofilefs_ulong_to_user(counter_config.kernel,
235 buf, count, offset);
236}
237
238static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
239 size_t count, loff_t *offset)
240{
241 unsigned long val;
242 int retval;
243
244 if (*offset)
245 return -EINVAL;
246
247 retval = oprofilefs_ulong_from_user(&val, buf, count);
248 if (retval)
249 return retval;
250
251 if (val != 0 && val != 1)
252 return -EINVAL;
253
254 counter_config.kernel = val;
255
256 return count;
257}
258
259static const struct file_operations kernel_fops = {
260 .read = hwsampler_kernel_read,
261 .write = hwsampler_kernel_write,
262};
263
264/* /dev/oprofile/0/user file ops. */
265
266static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
267 size_t count, loff_t *offset)
268{
269 return oprofilefs_ulong_to_user(counter_config.user,
270 buf, count, offset);
271}
272
273static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
274 size_t count, loff_t *offset)
275{
276 unsigned long val;
277 int retval;
278
279 if (*offset)
280 return -EINVAL;
281
282 retval = oprofilefs_ulong_from_user(&val, buf, count);
283 if (retval)
284 return retval;
285
286 if (val != 0 && val != 1)
287 return -EINVAL;
288
289 counter_config.user = val;
290
291 return count;
292}
293
294static const struct file_operations user_fops = {
295 .read = hwsampler_user_read,
296 .write = hwsampler_user_write,
297};
298
299
300/*
301 * File ops used for: /dev/oprofile/timer/enabled
302 * The value always has to be the inverted value of hwsampler_enabled. So
303 * no separate variable is created. That way we do not need locking.
304 */
305
306static ssize_t timer_enabled_read(struct file *file, char __user *buf,
307 size_t count, loff_t *offset)
308{
309 return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
310}
311
312static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
313 size_t count, loff_t *offset)
314{
315 unsigned long val;
316 int retval;
317
318 if (*offset)
319 return -EINVAL;
320
321 retval = oprofilefs_ulong_from_user(&val, buf, count);
322 if (retval)
323 return retval;
324
325 if (val != 0 && val != 1)
326 return -EINVAL;
327
328 /* Timer cannot be disabled without having hardware sampling. */
329 if (val == 0 && !hwsampler_available)
330 return -EINVAL;
331
332 if (oprofile_started)
333 /*
334 * save to do without locking as we set
335 * hwsampler_running in start() when start_mutex is
336 * held
337 */
338 return -EBUSY;
339
340 hwsampler_enabled = !val;
341
342 return count;
343}
344
345static const struct file_operations timer_enabled_fops = {
346 .read = timer_enabled_read,
347 .write = timer_enabled_write,
348};
349
350
112static int oprofile_create_hwsampling_files(struct super_block *sb, 351static int oprofile_create_hwsampling_files(struct super_block *sb,
113 struct dentry *root) 352 struct dentry *root)
114{ 353{
115 struct dentry *hw_dir; 354 struct dentry *dir;
355
356 dir = oprofilefs_mkdir(sb, root, "timer");
357 if (!dir)
358 return -EINVAL;
359
360 oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
361
362 if (!hwsampler_available)
363 return 0;
116 364
117 /* reinitialize default values */ 365 /* reinitialize default values */
118 hwsampler_file = 1; 366 hwsampler_enabled = 1;
367 counter_config.kernel = 1;
368 counter_config.user = 1;
119 369
120 hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); 370 if (!force_cpu_type) {
121 if (!hw_dir) 371 /*
122 return -EINVAL; 372 * Create the counter file system. A single virtual
373 * counter is created which can be used to
374 * enable/disable hardware sampling dynamically from
375 * user space. The user space will configure a single
376 * counter with a single event. The value of 'event'
377 * and 'unit_mask' are not evaluated by the kernel code
378 * and can only be set to 0.
379 */
380
381 dir = oprofilefs_mkdir(sb, root, "0");
382 if (!dir)
383 return -EINVAL;
123 384
124 oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); 385 oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
125 oprofilefs_create_ulong(sb, hw_dir, "hw_interval", 386 oprofilefs_create_file(sb, dir, "event", &zero_fops);
126 &oprofile_hw_interval); 387 oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
127 oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", 388 oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
128 &oprofile_min_interval); 389 oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
129 oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", 390 oprofilefs_create_file(sb, dir, "user", &user_fops);
130 &oprofile_max_interval); 391 oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
131 oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", 392 &oprofile_sdbt_blocks);
132 &oprofile_sdbt_blocks);
133 393
394 } else {
395 /*
396 * Hardware sampling can be used but the cpu_type is
397 * forced to timer in order to deal with legacy user
398 * space tools. The /dev/oprofile/hwsampling fs is
399 * provided in that case.
400 */
401 dir = oprofilefs_mkdir(sb, root, "hwsampling");
402 if (!dir)
403 return -EINVAL;
404
405 oprofilefs_create_file(sb, dir, "hwsampler",
406 &hwsampler_fops);
407 oprofilefs_create_file(sb, dir, "hw_interval",
408 &hw_interval_fops);
409 oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
410 &oprofile_min_interval);
411 oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
412 &oprofile_max_interval);
413 oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
414 &oprofile_sdbt_blocks);
415 }
134 return 0; 416 return 0;
135} 417}
136 418
137static int oprofile_hwsampler_init(struct oprofile_operations *ops) 419static int oprofile_hwsampler_init(struct oprofile_operations *ops)
138{ 420{
421 /*
422 * Initialize the timer mode infrastructure as well in order
423 * to be able to switch back dynamically. oprofile_timer_init
424 * is not supposed to fail.
425 */
426 if (oprofile_timer_init(ops))
427 BUG();
428
429 memcpy(&timer_ops, ops, sizeof(timer_ops));
430 ops->create_files = oprofile_create_hwsampling_files;
431
432 /*
433 * If the user space tools do not support newer cpu types,
434 * the force_cpu_type module parameter
435 * can be used to always return \"timer\" as cpu type.
436 */
437 if (force_cpu_type != timer) {
438 struct cpuid id;
439
440 get_cpu_id (&id);
441
442 switch (id.machine) {
443 case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
444 case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
445 default: return -ENODEV;
446 }
447 }
448
139 if (hwsampler_setup()) 449 if (hwsampler_setup())
140 return -ENODEV; 450 return -ENODEV;
141 451
142 /* 452 /*
143 * create hwsampler files only if hwsampler_setup() succeeds. 453 * Query the range for the sampling interval from the
454 * hardware.
144 */ 455 */
145 oprofile_min_interval = hwsampler_query_min_interval(); 456 oprofile_min_interval = hwsampler_query_min_interval();
146 if (oprofile_min_interval == 0) 457 if (oprofile_min_interval == 0)
@@ -155,16 +466,11 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
155 if (oprofile_hw_interval > oprofile_max_interval) 466 if (oprofile_hw_interval > oprofile_max_interval)
156 oprofile_hw_interval = oprofile_max_interval; 467 oprofile_hw_interval = oprofile_max_interval;
157 468
158 if (oprofile_timer_init(ops)) 469 printk(KERN_INFO "oprofile: System z hardware sampling "
159 return -ENODEV; 470 "facility found.\n");
160
161 printk(KERN_INFO "oprofile: using hardware sampling\n");
162
163 memcpy(&timer_ops, ops, sizeof(timer_ops));
164 471
165 ops->start = oprofile_hwsampler_start; 472 ops->start = oprofile_hwsampler_start;
166 ops->stop = oprofile_hwsampler_stop; 473 ops->stop = oprofile_hwsampler_stop;
167 ops->create_files = oprofile_create_hwsampling_files;
168 474
169 return 0; 475 return 0;
170} 476}
@@ -181,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
181 ops->backtrace = s390_backtrace; 487 ops->backtrace = s390_backtrace;
182 488
183#ifdef CONFIG_64BIT 489#ifdef CONFIG_64BIT
184 return oprofile_hwsampler_init(ops); 490
491 /*
492 * -ENODEV is not reported to the caller. The module itself
493 * will use the timer mode sampling as fallback and this is
494 * always available.
495 */
496 hwsampler_available = oprofile_hwsampler_init(ops) == 0;
497
498 return 0;
185#else 499#else
186 return -ENODEV; 500 return -ENODEV;
187#endif 501#endif