diff options
author | Jason Yeh <jason.yeh@amd.com> | 2009-07-08 07:49:38 -0400 |
---|---|---|
committer | Robert Richter <robert.richter@amd.com> | 2009-07-20 10:33:53 -0400 |
commit | 4d4036e0e7299c6cbb2d2421b4b30b7a409ce61a (patch) | |
tree | c9003cd927ed878412e89a59db0138b6b701b629 /drivers | |
parent | 6e63ea4b0b14ff5fb8a3ca704fcda7d28b95f079 (diff) |
oprofile: Implement performance counter multiplexing
The number of hardware counters is limited. The multiplexing feature
enables OProfile to gather more events than counters are provided by
the hardware. This is realized by switching between events at an user
specified time interval.
A new file (/dev/oprofile/time_slice) is added for the user to specify
the timer interval in ms. If the number of events to profile is higher
than the number of hardware counters available, the patch will
schedule a work queue that switches the event counter and re-writes
the different sets of values into it. The switching mechanism needs to
be implemented for each architecture to support multiplexing. This
patch only implements AMD CPU support, but multiplexing can be easily
extended for other models and architectures.
There are follow-on patches that rework parts of this patch.
Signed-off-by: Jason Yeh <jason.yeh@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/oprofile/oprof.c | 78 | ||||
-rw-r--r-- | drivers/oprofile/oprof.h | 2 | ||||
-rw-r--r-- | drivers/oprofile/oprofile_files.c | 43 | ||||
-rw-r--r-- | drivers/oprofile/oprofile_stats.c | 10 |
4 files changed, 133 insertions, 0 deletions
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 3cffce90f82a..7bc64af7cf99 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/oprofile.h> | 13 | #include <linux/oprofile.h> |
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <linux/workqueue.h> | ||
16 | #include <linux/time.h> | ||
15 | #include <asm/mutex.h> | 17 | #include <asm/mutex.h> |
16 | 18 | ||
17 | #include "oprof.h" | 19 | #include "oprof.h" |
@@ -27,6 +29,15 @@ unsigned long oprofile_backtrace_depth; | |||
27 | static unsigned long is_setup; | 29 | static unsigned long is_setup; |
28 | static DEFINE_MUTEX(start_mutex); | 30 | static DEFINE_MUTEX(start_mutex); |
29 | 31 | ||
32 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
33 | |||
34 | static void switch_worker(struct work_struct *work); | ||
35 | static DECLARE_DELAYED_WORK(switch_work, switch_worker); | ||
36 | unsigned long timeout_jiffies; | ||
37 | #define MULTIPLEXING_TIMER_DEFAULT 1 | ||
38 | |||
39 | #endif | ||
40 | |||
30 | /* timer | 41 | /* timer |
31 | 0 - use performance monitoring hardware if available | 42 | 0 - use performance monitoring hardware if available |
32 | 1 - use the timer int mechanism regardless | 43 | 1 - use the timer int mechanism regardless |
@@ -87,6 +98,20 @@ out: | |||
87 | return err; | 98 | return err; |
88 | } | 99 | } |
89 | 100 | ||
101 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
102 | |||
103 | static void start_switch_worker(void) | ||
104 | { | ||
105 | schedule_delayed_work(&switch_work, timeout_jiffies); | ||
106 | } | ||
107 | |||
108 | static void switch_worker(struct work_struct *work) | ||
109 | { | ||
110 | if (!oprofile_ops.switch_events()) | ||
111 | start_switch_worker(); | ||
112 | } | ||
113 | |||
114 | #endif | ||
90 | 115 | ||
91 | /* Actually start profiling (echo 1>/dev/oprofile/enable) */ | 116 | /* Actually start profiling (echo 1>/dev/oprofile/enable) */ |
92 | int oprofile_start(void) | 117 | int oprofile_start(void) |
@@ -108,6 +133,11 @@ int oprofile_start(void) | |||
108 | if ((err = oprofile_ops.start())) | 133 | if ((err = oprofile_ops.start())) |
109 | goto out; | 134 | goto out; |
110 | 135 | ||
136 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
137 | if (oprofile_ops.switch_events) | ||
138 | start_switch_worker(); | ||
139 | #endif | ||
140 | |||
111 | oprofile_started = 1; | 141 | oprofile_started = 1; |
112 | out: | 142 | out: |
113 | mutex_unlock(&start_mutex); | 143 | mutex_unlock(&start_mutex); |
@@ -123,6 +153,11 @@ void oprofile_stop(void) | |||
123 | goto out; | 153 | goto out; |
124 | oprofile_ops.stop(); | 154 | oprofile_ops.stop(); |
125 | oprofile_started = 0; | 155 | oprofile_started = 0; |
156 | |||
157 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
158 | cancel_delayed_work_sync(&switch_work); | ||
159 | #endif | ||
160 | |||
126 | /* wake up the daemon to read what remains */ | 161 | /* wake up the daemon to read what remains */ |
127 | wake_up_buffer_waiter(); | 162 | wake_up_buffer_waiter(); |
128 | out: | 163 | out: |
@@ -155,6 +190,36 @@ post_sync: | |||
155 | mutex_unlock(&start_mutex); | 190 | mutex_unlock(&start_mutex); |
156 | } | 191 | } |
157 | 192 | ||
193 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
194 | |||
195 | /* User inputs in ms, converts to jiffies */ | ||
196 | int oprofile_set_timeout(unsigned long val_msec) | ||
197 | { | ||
198 | int err = 0; | ||
199 | |||
200 | mutex_lock(&start_mutex); | ||
201 | |||
202 | if (oprofile_started) { | ||
203 | err = -EBUSY; | ||
204 | goto out; | ||
205 | } | ||
206 | |||
207 | if (!oprofile_ops.switch_events) { | ||
208 | err = -EINVAL; | ||
209 | goto out; | ||
210 | } | ||
211 | |||
212 | timeout_jiffies = msecs_to_jiffies(val_msec); | ||
213 | if (timeout_jiffies == MAX_JIFFY_OFFSET) | ||
214 | timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT); | ||
215 | |||
216 | out: | ||
217 | mutex_unlock(&start_mutex); | ||
218 | return err; | ||
219 | |||
220 | } | ||
221 | |||
222 | #endif | ||
158 | 223 | ||
159 | int oprofile_set_backtrace(unsigned long val) | 224 | int oprofile_set_backtrace(unsigned long val) |
160 | { | 225 | { |
@@ -179,10 +244,23 @@ out: | |||
179 | return err; | 244 | return err; |
180 | } | 245 | } |
181 | 246 | ||
247 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
248 | |||
249 | static void __init oprofile_multiplexing_init(void) | ||
250 | { | ||
251 | timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT); | ||
252 | } | ||
253 | |||
254 | #endif | ||
255 | |||
182 | static int __init oprofile_init(void) | 256 | static int __init oprofile_init(void) |
183 | { | 257 | { |
184 | int err; | 258 | int err; |
185 | 259 | ||
260 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
261 | oprofile_multiplexing_init(); | ||
262 | #endif | ||
263 | |||
186 | err = oprofile_arch_init(&oprofile_ops); | 264 | err = oprofile_arch_init(&oprofile_ops); |
187 | 265 | ||
188 | if (err < 0 || timer) { | 266 | if (err < 0 || timer) { |
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index c288d3c24b50..ee38abcc74f3 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h | |||
@@ -27,6 +27,7 @@ extern unsigned long oprofile_buffer_watershed; | |||
27 | extern struct oprofile_operations oprofile_ops; | 27 | extern struct oprofile_operations oprofile_ops; |
28 | extern unsigned long oprofile_started; | 28 | extern unsigned long oprofile_started; |
29 | extern unsigned long oprofile_backtrace_depth; | 29 | extern unsigned long oprofile_backtrace_depth; |
30 | extern unsigned long timeout_jiffies; | ||
30 | 31 | ||
31 | struct super_block; | 32 | struct super_block; |
32 | struct dentry; | 33 | struct dentry; |
@@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root); | |||
35 | void oprofile_timer_init(struct oprofile_operations *ops); | 36 | void oprofile_timer_init(struct oprofile_operations *ops); |
36 | 37 | ||
37 | int oprofile_set_backtrace(unsigned long depth); | 38 | int oprofile_set_backtrace(unsigned long depth); |
39 | int oprofile_set_timeout(unsigned long time); | ||
38 | 40 | ||
39 | #endif /* OPROF_H */ | 41 | #endif /* OPROF_H */ |
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index 5d36ffc30dd5..468ec3e4f856 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
11 | #include <linux/oprofile.h> | 11 | #include <linux/oprofile.h> |
12 | #include <linux/jiffies.h> | ||
12 | 13 | ||
13 | #include "event_buffer.h" | 14 | #include "event_buffer.h" |
14 | #include "oprofile_stats.h" | 15 | #include "oprofile_stats.h" |
@@ -22,6 +23,45 @@ unsigned long oprofile_buffer_size; | |||
22 | unsigned long oprofile_cpu_buffer_size; | 23 | unsigned long oprofile_cpu_buffer_size; |
23 | unsigned long oprofile_buffer_watershed; | 24 | unsigned long oprofile_buffer_watershed; |
24 | 25 | ||
26 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
27 | |||
28 | static ssize_t timeout_read(struct file *file, char __user *buf, | ||
29 | size_t count, loff_t *offset) | ||
30 | { | ||
31 | return oprofilefs_ulong_to_user(jiffies_to_msecs(timeout_jiffies), | ||
32 | buf, count, offset); | ||
33 | } | ||
34 | |||
35 | |||
36 | static ssize_t timeout_write(struct file *file, char const __user *buf, | ||
37 | size_t count, loff_t *offset) | ||
38 | { | ||
39 | unsigned long val; | ||
40 | int retval; | ||
41 | |||
42 | if (*offset) | ||
43 | return -EINVAL; | ||
44 | |||
45 | retval = oprofilefs_ulong_from_user(&val, buf, count); | ||
46 | if (retval) | ||
47 | return retval; | ||
48 | |||
49 | retval = oprofile_set_timeout(val); | ||
50 | |||
51 | if (retval) | ||
52 | return retval; | ||
53 | return count; | ||
54 | } | ||
55 | |||
56 | |||
57 | static const struct file_operations timeout_fops = { | ||
58 | .read = timeout_read, | ||
59 | .write = timeout_write, | ||
60 | }; | ||
61 | |||
62 | #endif | ||
63 | |||
64 | |||
25 | static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) | 65 | static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) |
26 | { | 66 | { |
27 | return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, | 67 | return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, |
@@ -139,6 +179,9 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root) | |||
139 | oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); | 179 | oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); |
140 | oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); | 180 | oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); |
141 | oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); | 181 | oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); |
182 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
183 | oprofilefs_create_file(sb, root, "time_slice", &timeout_fops); | ||
184 | #endif | ||
142 | oprofile_create_stats_files(sb, root); | 185 | oprofile_create_stats_files(sb, root); |
143 | if (oprofile_ops.create_files) | 186 | if (oprofile_ops.create_files) |
144 | oprofile_ops.create_files(sb, root); | 187 | oprofile_ops.create_files(sb, root); |
diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c index 3c2270a8300c..77a57a6792f6 100644 --- a/drivers/oprofile/oprofile_stats.c +++ b/drivers/oprofile/oprofile_stats.c | |||
@@ -16,6 +16,9 @@ | |||
16 | #include "cpu_buffer.h" | 16 | #include "cpu_buffer.h" |
17 | 17 | ||
18 | struct oprofile_stat_struct oprofile_stats; | 18 | struct oprofile_stat_struct oprofile_stats; |
19 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
20 | atomic_t multiplex_counter; | ||
21 | #endif | ||
19 | 22 | ||
20 | void oprofile_reset_stats(void) | 23 | void oprofile_reset_stats(void) |
21 | { | 24 | { |
@@ -34,6 +37,9 @@ void oprofile_reset_stats(void) | |||
34 | atomic_set(&oprofile_stats.sample_lost_no_mapping, 0); | 37 | atomic_set(&oprofile_stats.sample_lost_no_mapping, 0); |
35 | atomic_set(&oprofile_stats.event_lost_overflow, 0); | 38 | atomic_set(&oprofile_stats.event_lost_overflow, 0); |
36 | atomic_set(&oprofile_stats.bt_lost_no_mapping, 0); | 39 | atomic_set(&oprofile_stats.bt_lost_no_mapping, 0); |
40 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
41 | atomic_set(&multiplex_counter, 0); | ||
42 | #endif | ||
37 | } | 43 | } |
38 | 44 | ||
39 | 45 | ||
@@ -76,4 +82,8 @@ void oprofile_create_stats_files(struct super_block *sb, struct dentry *root) | |||
76 | &oprofile_stats.event_lost_overflow); | 82 | &oprofile_stats.event_lost_overflow); |
77 | oprofilefs_create_ro_atomic(sb, dir, "bt_lost_no_mapping", | 83 | oprofilefs_create_ro_atomic(sb, dir, "bt_lost_no_mapping", |
78 | &oprofile_stats.bt_lost_no_mapping); | 84 | &oprofile_stats.bt_lost_no_mapping); |
85 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
86 | oprofilefs_create_ro_atomic(sb, dir, "multiplex_counter", | ||
87 | &multiplex_counter); | ||
88 | #endif | ||
79 | } | 89 | } |