diff options
Diffstat (limited to 'kernel/marker.c')
-rw-r--r-- | kernel/marker.c | 677 |
1 files changed, 505 insertions, 172 deletions
diff --git a/kernel/marker.c b/kernel/marker.c index 5323cfaedbce..c4c2cd8b61f5 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
@@ -27,35 +27,42 @@ | |||
27 | extern struct marker __start___markers[]; | 27 | extern struct marker __start___markers[]; |
28 | extern struct marker __stop___markers[]; | 28 | extern struct marker __stop___markers[]; |
29 | 29 | ||
30 | /* Set to 1 to enable marker debug output */ | ||
31 | const int marker_debug; | ||
32 | |||
30 | /* | 33 | /* |
31 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin | 34 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin |
32 | * and module markers, the hash table and deferred_sync. | 35 | * and module markers and the hash table. |
33 | */ | 36 | */ |
34 | static DEFINE_MUTEX(markers_mutex); | 37 | static DEFINE_MUTEX(markers_mutex); |
35 | 38 | ||
36 | /* | 39 | /* |
37 | * Marker deferred synchronization. | ||
38 | * Upon marker probe_unregister, we delay call to synchronize_sched() to | ||
39 | * accelerate mass unregistration (only when there is no more reference to a | ||
40 | * given module do we call synchronize_sched()). However, we need to make sure | ||
41 | * every critical region has ended before we re-arm a marker that has been | ||
42 | * unregistered and then registered back with a different probe data. | ||
43 | */ | ||
44 | static int deferred_sync; | ||
45 | |||
46 | /* | ||
47 | * Marker hash table, containing the active markers. | 40 | * Marker hash table, containing the active markers. |
48 | * Protected by module_mutex. | 41 | * Protected by module_mutex. |
49 | */ | 42 | */ |
50 | #define MARKER_HASH_BITS 6 | 43 | #define MARKER_HASH_BITS 6 |
51 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) | 44 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) |
52 | 45 | ||
46 | /* | ||
47 | * Note about RCU : | ||
48 | * It is used to make sure every handler has finished using its private data | ||
49 | * between two consecutive operation (add or remove) on a given marker. It is | ||
50 | * also used to delay the free of multiple probes array until a quiescent state | ||
51 | * is reached. | ||
52 | * marker entries modifications are protected by the markers_mutex. | ||
53 | */ | ||
53 | struct marker_entry { | 54 | struct marker_entry { |
54 | struct hlist_node hlist; | 55 | struct hlist_node hlist; |
55 | char *format; | 56 | char *format; |
56 | marker_probe_func *probe; | 57 | void (*call)(const struct marker *mdata, /* Probe wrapper */ |
57 | void *private; | 58 | void *call_private, const char *fmt, ...); |
59 | struct marker_probe_closure single; | ||
60 | struct marker_probe_closure *multi; | ||
58 | int refcount; /* Number of times armed. 0 if disarmed. */ | 61 | int refcount; /* Number of times armed. 0 if disarmed. */ |
62 | struct rcu_head rcu; | ||
63 | void *oldptr; | ||
64 | char rcu_pending:1; | ||
65 | char ptype:1; | ||
59 | char name[0]; /* Contains name'\0'format'\0' */ | 66 | char name[0]; /* Contains name'\0'format'\0' */ |
60 | }; | 67 | }; |
61 | 68 | ||
@@ -63,7 +70,8 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | |||
63 | 70 | ||
64 | /** | 71 | /** |
65 | * __mark_empty_function - Empty probe callback | 72 | * __mark_empty_function - Empty probe callback |
66 | * @mdata: pointer of type const struct marker | 73 | * @probe_private: probe private data |
74 | * @call_private: call site private data | ||
67 | * @fmt: format string | 75 | * @fmt: format string |
68 | * @...: variable argument list | 76 | * @...: variable argument list |
69 | * | 77 | * |
@@ -72,13 +80,267 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | |||
72 | * though the function pointer change and the marker enabling are two distinct | 80 | * though the function pointer change and the marker enabling are two distinct |
73 | * operations that modifies the execution flow of preemptible code. | 81 | * operations that modifies the execution flow of preemptible code. |
74 | */ | 82 | */ |
75 | void __mark_empty_function(const struct marker *mdata, void *private, | 83 | void __mark_empty_function(void *probe_private, void *call_private, |
76 | const char *fmt, ...) | 84 | const char *fmt, va_list *args) |
77 | { | 85 | { |
78 | } | 86 | } |
79 | EXPORT_SYMBOL_GPL(__mark_empty_function); | 87 | EXPORT_SYMBOL_GPL(__mark_empty_function); |
80 | 88 | ||
81 | /* | 89 | /* |
90 | * marker_probe_cb Callback that prepares the variable argument list for probes. | ||
91 | * @mdata: pointer of type struct marker | ||
92 | * @call_private: caller site private data | ||
93 | * @fmt: format string | ||
94 | * @...: Variable argument list. | ||
95 | * | ||
96 | * Since we do not use "typical" pointer based RCU in the 1 argument case, we | ||
97 | * need to put a full smp_rmb() in this branch. This is why we do not use | ||
98 | * rcu_dereference() for the pointer read. | ||
99 | */ | ||
100 | void marker_probe_cb(const struct marker *mdata, void *call_private, | ||
101 | const char *fmt, ...) | ||
102 | { | ||
103 | va_list args; | ||
104 | char ptype; | ||
105 | |||
106 | /* | ||
107 | * disabling preemption to make sure the teardown of the callbacks can | ||
108 | * be done correctly when they are in modules and they insure RCU read | ||
109 | * coherency. | ||
110 | */ | ||
111 | preempt_disable(); | ||
112 | ptype = ACCESS_ONCE(mdata->ptype); | ||
113 | if (likely(!ptype)) { | ||
114 | marker_probe_func *func; | ||
115 | /* Must read the ptype before ptr. They are not data dependant, | ||
116 | * so we put an explicit smp_rmb() here. */ | ||
117 | smp_rmb(); | ||
118 | func = ACCESS_ONCE(mdata->single.func); | ||
119 | /* Must read the ptr before private data. They are not data | ||
120 | * dependant, so we put an explicit smp_rmb() here. */ | ||
121 | smp_rmb(); | ||
122 | va_start(args, fmt); | ||
123 | func(mdata->single.probe_private, call_private, fmt, &args); | ||
124 | va_end(args); | ||
125 | } else { | ||
126 | struct marker_probe_closure *multi; | ||
127 | int i; | ||
128 | /* | ||
129 | * multi points to an array, therefore accessing the array | ||
130 | * depends on reading multi. However, even in this case, | ||
131 | * we must insure that the pointer is read _before_ the array | ||
132 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
133 | * in the fast path, so put the explicit barrier here. | ||
134 | */ | ||
135 | smp_read_barrier_depends(); | ||
136 | multi = ACCESS_ONCE(mdata->multi); | ||
137 | for (i = 0; multi[i].func; i++) { | ||
138 | va_start(args, fmt); | ||
139 | multi[i].func(multi[i].probe_private, call_private, fmt, | ||
140 | &args); | ||
141 | va_end(args); | ||
142 | } | ||
143 | } | ||
144 | preempt_enable(); | ||
145 | } | ||
146 | EXPORT_SYMBOL_GPL(marker_probe_cb); | ||
147 | |||
148 | /* | ||
149 | * marker_probe_cb Callback that does not prepare the variable argument list. | ||
150 | * @mdata: pointer of type struct marker | ||
151 | * @call_private: caller site private data | ||
152 | * @fmt: format string | ||
153 | * @...: Variable argument list. | ||
154 | * | ||
155 | * Should be connected to markers "MARK_NOARGS". | ||
156 | */ | ||
157 | void marker_probe_cb_noarg(const struct marker *mdata, | ||
158 | void *call_private, const char *fmt, ...) | ||
159 | { | ||
160 | va_list args; /* not initialized */ | ||
161 | char ptype; | ||
162 | |||
163 | preempt_disable(); | ||
164 | ptype = ACCESS_ONCE(mdata->ptype); | ||
165 | if (likely(!ptype)) { | ||
166 | marker_probe_func *func; | ||
167 | /* Must read the ptype before ptr. They are not data dependant, | ||
168 | * so we put an explicit smp_rmb() here. */ | ||
169 | smp_rmb(); | ||
170 | func = ACCESS_ONCE(mdata->single.func); | ||
171 | /* Must read the ptr before private data. They are not data | ||
172 | * dependant, so we put an explicit smp_rmb() here. */ | ||
173 | smp_rmb(); | ||
174 | func(mdata->single.probe_private, call_private, fmt, &args); | ||
175 | } else { | ||
176 | struct marker_probe_closure *multi; | ||
177 | int i; | ||
178 | /* | ||
179 | * multi points to an array, therefore accessing the array | ||
180 | * depends on reading multi. However, even in this case, | ||
181 | * we must insure that the pointer is read _before_ the array | ||
182 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
183 | * in the fast path, so put the explicit barrier here. | ||
184 | */ | ||
185 | smp_read_barrier_depends(); | ||
186 | multi = ACCESS_ONCE(mdata->multi); | ||
187 | for (i = 0; multi[i].func; i++) | ||
188 | multi[i].func(multi[i].probe_private, call_private, fmt, | ||
189 | &args); | ||
190 | } | ||
191 | preempt_enable(); | ||
192 | } | ||
193 | EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); | ||
194 | |||
195 | static void free_old_closure(struct rcu_head *head) | ||
196 | { | ||
197 | struct marker_entry *entry = container_of(head, | ||
198 | struct marker_entry, rcu); | ||
199 | kfree(entry->oldptr); | ||
200 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
201 | smp_wmb(); | ||
202 | entry->rcu_pending = 0; | ||
203 | } | ||
204 | |||
205 | static void debug_print_probes(struct marker_entry *entry) | ||
206 | { | ||
207 | int i; | ||
208 | |||
209 | if (!marker_debug) | ||
210 | return; | ||
211 | |||
212 | if (!entry->ptype) { | ||
213 | printk(KERN_DEBUG "Single probe : %p %p\n", | ||
214 | entry->single.func, | ||
215 | entry->single.probe_private); | ||
216 | } else { | ||
217 | for (i = 0; entry->multi[i].func; i++) | ||
218 | printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, | ||
219 | entry->multi[i].func, | ||
220 | entry->multi[i].probe_private); | ||
221 | } | ||
222 | } | ||
223 | |||
224 | static struct marker_probe_closure * | ||
225 | marker_entry_add_probe(struct marker_entry *entry, | ||
226 | marker_probe_func *probe, void *probe_private) | ||
227 | { | ||
228 | int nr_probes = 0; | ||
229 | struct marker_probe_closure *old, *new; | ||
230 | |||
231 | WARN_ON(!probe); | ||
232 | |||
233 | debug_print_probes(entry); | ||
234 | old = entry->multi; | ||
235 | if (!entry->ptype) { | ||
236 | if (entry->single.func == probe && | ||
237 | entry->single.probe_private == probe_private) | ||
238 | return ERR_PTR(-EBUSY); | ||
239 | if (entry->single.func == __mark_empty_function) { | ||
240 | /* 0 -> 1 probes */ | ||
241 | entry->single.func = probe; | ||
242 | entry->single.probe_private = probe_private; | ||
243 | entry->refcount = 1; | ||
244 | entry->ptype = 0; | ||
245 | debug_print_probes(entry); | ||
246 | return NULL; | ||
247 | } else { | ||
248 | /* 1 -> 2 probes */ | ||
249 | nr_probes = 1; | ||
250 | old = NULL; | ||
251 | } | ||
252 | } else { | ||
253 | /* (N -> N+1), (N != 0, 1) probes */ | ||
254 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) | ||
255 | if (old[nr_probes].func == probe | ||
256 | && old[nr_probes].probe_private | ||
257 | == probe_private) | ||
258 | return ERR_PTR(-EBUSY); | ||
259 | } | ||
260 | /* + 2 : one for new probe, one for NULL func */ | ||
261 | new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), | ||
262 | GFP_KERNEL); | ||
263 | if (new == NULL) | ||
264 | return ERR_PTR(-ENOMEM); | ||
265 | if (!old) | ||
266 | new[0] = entry->single; | ||
267 | else | ||
268 | memcpy(new, old, | ||
269 | nr_probes * sizeof(struct marker_probe_closure)); | ||
270 | new[nr_probes].func = probe; | ||
271 | new[nr_probes].probe_private = probe_private; | ||
272 | entry->refcount = nr_probes + 1; | ||
273 | entry->multi = new; | ||
274 | entry->ptype = 1; | ||
275 | debug_print_probes(entry); | ||
276 | return old; | ||
277 | } | ||
278 | |||
279 | static struct marker_probe_closure * | ||
280 | marker_entry_remove_probe(struct marker_entry *entry, | ||
281 | marker_probe_func *probe, void *probe_private) | ||
282 | { | ||
283 | int nr_probes = 0, nr_del = 0, i; | ||
284 | struct marker_probe_closure *old, *new; | ||
285 | |||
286 | old = entry->multi; | ||
287 | |||
288 | debug_print_probes(entry); | ||
289 | if (!entry->ptype) { | ||
290 | /* 0 -> N is an error */ | ||
291 | WARN_ON(entry->single.func == __mark_empty_function); | ||
292 | /* 1 -> 0 probes */ | ||
293 | WARN_ON(probe && entry->single.func != probe); | ||
294 | WARN_ON(entry->single.probe_private != probe_private); | ||
295 | entry->single.func = __mark_empty_function; | ||
296 | entry->refcount = 0; | ||
297 | entry->ptype = 0; | ||
298 | debug_print_probes(entry); | ||
299 | return NULL; | ||
300 | } else { | ||
301 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
302 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | ||
303 | if ((!probe || old[nr_probes].func == probe) | ||
304 | && old[nr_probes].probe_private | ||
305 | == probe_private) | ||
306 | nr_del++; | ||
307 | } | ||
308 | } | ||
309 | |||
310 | if (nr_probes - nr_del == 0) { | ||
311 | /* N -> 0, (N > 1) */ | ||
312 | entry->single.func = __mark_empty_function; | ||
313 | entry->refcount = 0; | ||
314 | entry->ptype = 0; | ||
315 | } else if (nr_probes - nr_del == 1) { | ||
316 | /* N -> 1, (N > 1) */ | ||
317 | for (i = 0; old[i].func; i++) | ||
318 | if ((probe && old[i].func != probe) || | ||
319 | old[i].probe_private != probe_private) | ||
320 | entry->single = old[i]; | ||
321 | entry->refcount = 1; | ||
322 | entry->ptype = 0; | ||
323 | } else { | ||
324 | int j = 0; | ||
325 | /* N -> M, (N > 1, M > 1) */ | ||
326 | /* + 1 for NULL */ | ||
327 | new = kzalloc((nr_probes - nr_del + 1) | ||
328 | * sizeof(struct marker_probe_closure), GFP_KERNEL); | ||
329 | if (new == NULL) | ||
330 | return ERR_PTR(-ENOMEM); | ||
331 | for (i = 0; old[i].func; i++) | ||
332 | if ((probe && old[i].func != probe) || | ||
333 | old[i].probe_private != probe_private) | ||
334 | new[j++] = old[i]; | ||
335 | entry->refcount = nr_probes - nr_del; | ||
336 | entry->ptype = 1; | ||
337 | entry->multi = new; | ||
338 | } | ||
339 | debug_print_probes(entry); | ||
340 | return old; | ||
341 | } | ||
342 | |||
343 | /* | ||
82 | * Get marker if the marker is present in the marker hash table. | 344 | * Get marker if the marker is present in the marker hash table. |
83 | * Must be called with markers_mutex held. | 345 | * Must be called with markers_mutex held. |
84 | * Returns NULL if not present. | 346 | * Returns NULL if not present. |
@@ -102,8 +364,7 @@ static struct marker_entry *get_marker(const char *name) | |||
102 | * Add the marker to the marker hash table. Must be called with markers_mutex | 364 | * Add the marker to the marker hash table. Must be called with markers_mutex |
103 | * held. | 365 | * held. |
104 | */ | 366 | */ |
105 | static int add_marker(const char *name, const char *format, | 367 | static struct marker_entry *add_marker(const char *name, const char *format) |
106 | marker_probe_func *probe, void *private) | ||
107 | { | 368 | { |
108 | struct hlist_head *head; | 369 | struct hlist_head *head; |
109 | struct hlist_node *node; | 370 | struct hlist_node *node; |
@@ -118,9 +379,8 @@ static int add_marker(const char *name, const char *format, | |||
118 | hlist_for_each_entry(e, node, head, hlist) { | 379 | hlist_for_each_entry(e, node, head, hlist) { |
119 | if (!strcmp(name, e->name)) { | 380 | if (!strcmp(name, e->name)) { |
120 | printk(KERN_NOTICE | 381 | printk(KERN_NOTICE |
121 | "Marker %s busy, probe %p already installed\n", | 382 | "Marker %s busy\n", name); |
122 | name, e->probe); | 383 | return ERR_PTR(-EBUSY); /* Already there */ |
123 | return -EBUSY; /* Already there */ | ||
124 | } | 384 | } |
125 | } | 385 | } |
126 | /* | 386 | /* |
@@ -130,34 +390,42 @@ static int add_marker(const char *name, const char *format, | |||
130 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | 390 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, |
131 | GFP_KERNEL); | 391 | GFP_KERNEL); |
132 | if (!e) | 392 | if (!e) |
133 | return -ENOMEM; | 393 | return ERR_PTR(-ENOMEM); |
134 | memcpy(&e->name[0], name, name_len); | 394 | memcpy(&e->name[0], name, name_len); |
135 | if (format) { | 395 | if (format) { |
136 | e->format = &e->name[name_len]; | 396 | e->format = &e->name[name_len]; |
137 | memcpy(e->format, format, format_len); | 397 | memcpy(e->format, format, format_len); |
398 | if (strcmp(e->format, MARK_NOARGS) == 0) | ||
399 | e->call = marker_probe_cb_noarg; | ||
400 | else | ||
401 | e->call = marker_probe_cb; | ||
138 | trace_mark(core_marker_format, "name %s format %s", | 402 | trace_mark(core_marker_format, "name %s format %s", |
139 | e->name, e->format); | 403 | e->name, e->format); |
140 | } else | 404 | } else { |
141 | e->format = NULL; | 405 | e->format = NULL; |
142 | e->probe = probe; | 406 | e->call = marker_probe_cb; |
143 | e->private = private; | 407 | } |
408 | e->single.func = __mark_empty_function; | ||
409 | e->single.probe_private = NULL; | ||
410 | e->multi = NULL; | ||
411 | e->ptype = 0; | ||
144 | e->refcount = 0; | 412 | e->refcount = 0; |
413 | e->rcu_pending = 0; | ||
145 | hlist_add_head(&e->hlist, head); | 414 | hlist_add_head(&e->hlist, head); |
146 | return 0; | 415 | return e; |
147 | } | 416 | } |
148 | 417 | ||
149 | /* | 418 | /* |
150 | * Remove the marker from the marker hash table. Must be called with mutex_lock | 419 | * Remove the marker from the marker hash table. Must be called with mutex_lock |
151 | * held. | 420 | * held. |
152 | */ | 421 | */ |
153 | static void *remove_marker(const char *name) | 422 | static int remove_marker(const char *name) |
154 | { | 423 | { |
155 | struct hlist_head *head; | 424 | struct hlist_head *head; |
156 | struct hlist_node *node; | 425 | struct hlist_node *node; |
157 | struct marker_entry *e; | 426 | struct marker_entry *e; |
158 | int found = 0; | 427 | int found = 0; |
159 | size_t len = strlen(name) + 1; | 428 | size_t len = strlen(name) + 1; |
160 | void *private = NULL; | ||
161 | u32 hash = jhash(name, len-1, 0); | 429 | u32 hash = jhash(name, len-1, 0); |
162 | 430 | ||
163 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | 431 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; |
@@ -167,12 +435,16 @@ static void *remove_marker(const char *name) | |||
167 | break; | 435 | break; |
168 | } | 436 | } |
169 | } | 437 | } |
170 | if (found) { | 438 | if (!found) |
171 | private = e->private; | 439 | return -ENOENT; |
172 | hlist_del(&e->hlist); | 440 | if (e->single.func != __mark_empty_function) |
173 | kfree(e); | 441 | return -EBUSY; |
174 | } | 442 | hlist_del(&e->hlist); |
175 | return private; | 443 | /* Make sure the call_rcu has been executed */ |
444 | if (e->rcu_pending) | ||
445 | rcu_barrier(); | ||
446 | kfree(e); | ||
447 | return 0; | ||
176 | } | 448 | } |
177 | 449 | ||
178 | /* | 450 | /* |
@@ -184,6 +456,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
184 | size_t name_len = strlen((*entry)->name) + 1; | 456 | size_t name_len = strlen((*entry)->name) + 1; |
185 | size_t format_len = strlen(format) + 1; | 457 | size_t format_len = strlen(format) + 1; |
186 | 458 | ||
459 | |||
187 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | 460 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, |
188 | GFP_KERNEL); | 461 | GFP_KERNEL); |
189 | if (!e) | 462 | if (!e) |
@@ -191,11 +464,20 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
191 | memcpy(&e->name[0], (*entry)->name, name_len); | 464 | memcpy(&e->name[0], (*entry)->name, name_len); |
192 | e->format = &e->name[name_len]; | 465 | e->format = &e->name[name_len]; |
193 | memcpy(e->format, format, format_len); | 466 | memcpy(e->format, format, format_len); |
194 | e->probe = (*entry)->probe; | 467 | if (strcmp(e->format, MARK_NOARGS) == 0) |
195 | e->private = (*entry)->private; | 468 | e->call = marker_probe_cb_noarg; |
469 | else | ||
470 | e->call = marker_probe_cb; | ||
471 | e->single = (*entry)->single; | ||
472 | e->multi = (*entry)->multi; | ||
473 | e->ptype = (*entry)->ptype; | ||
196 | e->refcount = (*entry)->refcount; | 474 | e->refcount = (*entry)->refcount; |
475 | e->rcu_pending = 0; | ||
197 | hlist_add_before(&e->hlist, &(*entry)->hlist); | 476 | hlist_add_before(&e->hlist, &(*entry)->hlist); |
198 | hlist_del(&(*entry)->hlist); | 477 | hlist_del(&(*entry)->hlist); |
478 | /* Make sure the call_rcu has been executed */ | ||
479 | if ((*entry)->rcu_pending) | ||
480 | rcu_barrier(); | ||
199 | kfree(*entry); | 481 | kfree(*entry); |
200 | *entry = e; | 482 | *entry = e; |
201 | trace_mark(core_marker_format, "name %s format %s", | 483 | trace_mark(core_marker_format, "name %s format %s", |
@@ -206,7 +488,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
206 | /* | 488 | /* |
207 | * Sets the probe callback corresponding to one marker. | 489 | * Sets the probe callback corresponding to one marker. |
208 | */ | 490 | */ |
209 | static int set_marker(struct marker_entry **entry, struct marker *elem) | 491 | static int set_marker(struct marker_entry **entry, struct marker *elem, |
492 | int active) | ||
210 | { | 493 | { |
211 | int ret; | 494 | int ret; |
212 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | 495 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); |
@@ -226,9 +509,43 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) | |||
226 | if (ret) | 509 | if (ret) |
227 | return ret; | 510 | return ret; |
228 | } | 511 | } |
229 | elem->call = (*entry)->probe; | 512 | |
230 | elem->private = (*entry)->private; | 513 | /* |
231 | elem->state = 1; | 514 | * probe_cb setup (statically known) is done here. It is |
515 | * asynchronous with the rest of execution, therefore we only | ||
516 | * pass from a "safe" callback (with argument) to an "unsafe" | ||
517 | * callback (does not set arguments). | ||
518 | */ | ||
519 | elem->call = (*entry)->call; | ||
520 | /* | ||
521 | * Sanity check : | ||
522 | * We only update the single probe private data when the ptr is | ||
523 | * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) | ||
524 | */ | ||
525 | WARN_ON(elem->single.func != __mark_empty_function | ||
526 | && elem->single.probe_private | ||
527 | != (*entry)->single.probe_private && | ||
528 | !elem->ptype); | ||
529 | elem->single.probe_private = (*entry)->single.probe_private; | ||
530 | /* | ||
531 | * Make sure the private data is valid when we update the | ||
532 | * single probe ptr. | ||
533 | */ | ||
534 | smp_wmb(); | ||
535 | elem->single.func = (*entry)->single.func; | ||
536 | /* | ||
537 | * We also make sure that the new probe callbacks array is consistent | ||
538 | * before setting a pointer to it. | ||
539 | */ | ||
540 | rcu_assign_pointer(elem->multi, (*entry)->multi); | ||
541 | /* | ||
542 | * Update the function or multi probe array pointer before setting the | ||
543 | * ptype. | ||
544 | */ | ||
545 | smp_wmb(); | ||
546 | elem->ptype = (*entry)->ptype; | ||
547 | elem->state = active; | ||
548 | |||
232 | return 0; | 549 | return 0; |
233 | } | 550 | } |
234 | 551 | ||
@@ -240,8 +557,12 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) | |||
240 | */ | 557 | */ |
241 | static void disable_marker(struct marker *elem) | 558 | static void disable_marker(struct marker *elem) |
242 | { | 559 | { |
560 | /* leave "call" as is. It is known statically. */ | ||
243 | elem->state = 0; | 561 | elem->state = 0; |
244 | elem->call = __mark_empty_function; | 562 | elem->single.func = __mark_empty_function; |
563 | /* Update the function before setting the ptype */ | ||
564 | smp_wmb(); | ||
565 | elem->ptype = 0; /* single probe */ | ||
245 | /* | 566 | /* |
246 | * Leave the private data and id there, because removal is racy and | 567 | * Leave the private data and id there, because removal is racy and |
247 | * should be done only after a synchronize_sched(). These are never used | 568 | * should be done only after a synchronize_sched(). These are never used |
@@ -253,14 +574,11 @@ static void disable_marker(struct marker *elem) | |||
253 | * marker_update_probe_range - Update a probe range | 574 | * marker_update_probe_range - Update a probe range |
254 | * @begin: beginning of the range | 575 | * @begin: beginning of the range |
255 | * @end: end of the range | 576 | * @end: end of the range |
256 | * @probe_module: module address of the probe being updated | ||
257 | * @refcount: number of references left to the given probe_module (out) | ||
258 | * | 577 | * |
259 | * Updates the probe callback corresponding to a range of markers. | 578 | * Updates the probe callback corresponding to a range of markers. |
260 | */ | 579 | */ |
261 | void marker_update_probe_range(struct marker *begin, | 580 | void marker_update_probe_range(struct marker *begin, |
262 | struct marker *end, struct module *probe_module, | 581 | struct marker *end) |
263 | int *refcount) | ||
264 | { | 582 | { |
265 | struct marker *iter; | 583 | struct marker *iter; |
266 | struct marker_entry *mark_entry; | 584 | struct marker_entry *mark_entry; |
@@ -268,15 +586,12 @@ void marker_update_probe_range(struct marker *begin, | |||
268 | mutex_lock(&markers_mutex); | 586 | mutex_lock(&markers_mutex); |
269 | for (iter = begin; iter < end; iter++) { | 587 | for (iter = begin; iter < end; iter++) { |
270 | mark_entry = get_marker(iter->name); | 588 | mark_entry = get_marker(iter->name); |
271 | if (mark_entry && mark_entry->refcount) { | 589 | if (mark_entry) { |
272 | set_marker(&mark_entry, iter); | 590 | set_marker(&mark_entry, iter, |
591 | !!mark_entry->refcount); | ||
273 | /* | 592 | /* |
274 | * ignore error, continue | 593 | * ignore error, continue |
275 | */ | 594 | */ |
276 | if (probe_module) | ||
277 | if (probe_module == | ||
278 | __module_text_address((unsigned long)mark_entry->probe)) | ||
279 | (*refcount)++; | ||
280 | } else { | 595 | } else { |
281 | disable_marker(iter); | 596 | disable_marker(iter); |
282 | } | 597 | } |
@@ -289,20 +604,27 @@ void marker_update_probe_range(struct marker *begin, | |||
289 | * Issues a synchronize_sched() when no reference to the module passed | 604 | * Issues a synchronize_sched() when no reference to the module passed |
290 | * as parameter is found in the probes so the probe module can be | 605 | * as parameter is found in the probes so the probe module can be |
291 | * safely unloaded from now on. | 606 | * safely unloaded from now on. |
607 | * | ||
608 | * Internal callback only changed before the first probe is connected to it. | ||
609 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | ||
610 | * transitions. All other transitions will leave the old private data valid. | ||
611 | * This makes the non-atomicity of the callback/private data updates valid. | ||
612 | * | ||
613 | * "special case" updates : | ||
614 | * 0 -> 1 callback | ||
615 | * 1 -> 0 callback | ||
616 | * 1 -> 2 callbacks | ||
617 | * 2 -> 1 callbacks | ||
618 | * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. | ||
619 | * Site effect : marker_set_format may delete the marker entry (creating a | ||
620 | * replacement). | ||
292 | */ | 621 | */ |
293 | static void marker_update_probes(struct module *probe_module) | 622 | static void marker_update_probes(void) |
294 | { | 623 | { |
295 | int refcount = 0; | ||
296 | |||
297 | /* Core kernel markers */ | 624 | /* Core kernel markers */ |
298 | marker_update_probe_range(__start___markers, | 625 | marker_update_probe_range(__start___markers, __stop___markers); |
299 | __stop___markers, probe_module, &refcount); | ||
300 | /* Markers in modules. */ | 626 | /* Markers in modules. */ |
301 | module_update_markers(probe_module, &refcount); | 627 | module_update_markers(); |
302 | if (probe_module && refcount == 0) { | ||
303 | synchronize_sched(); | ||
304 | deferred_sync = 0; | ||
305 | } | ||
306 | } | 628 | } |
307 | 629 | ||
308 | /** | 630 | /** |
@@ -310,33 +632,49 @@ static void marker_update_probes(struct module *probe_module) | |||
310 | * @name: marker name | 632 | * @name: marker name |
311 | * @format: format string | 633 | * @format: format string |
312 | * @probe: probe handler | 634 | * @probe: probe handler |
313 | * @private: probe private data | 635 | * @probe_private: probe private data |
314 | * | 636 | * |
315 | * private data must be a valid allocated memory address, or NULL. | 637 | * private data must be a valid allocated memory address, or NULL. |
316 | * Returns 0 if ok, error value on error. | 638 | * Returns 0 if ok, error value on error. |
639 | * The probe address must at least be aligned on the architecture pointer size. | ||
317 | */ | 640 | */ |
318 | int marker_probe_register(const char *name, const char *format, | 641 | int marker_probe_register(const char *name, const char *format, |
319 | marker_probe_func *probe, void *private) | 642 | marker_probe_func *probe, void *probe_private) |
320 | { | 643 | { |
321 | struct marker_entry *entry; | 644 | struct marker_entry *entry; |
322 | int ret = 0; | 645 | int ret = 0; |
646 | struct marker_probe_closure *old; | ||
323 | 647 | ||
324 | mutex_lock(&markers_mutex); | 648 | mutex_lock(&markers_mutex); |
325 | entry = get_marker(name); | 649 | entry = get_marker(name); |
326 | if (entry && entry->refcount) { | 650 | if (!entry) { |
327 | ret = -EBUSY; | 651 | entry = add_marker(name, format); |
328 | goto end; | 652 | if (IS_ERR(entry)) { |
329 | } | 653 | ret = PTR_ERR(entry); |
330 | if (deferred_sync) { | 654 | goto end; |
331 | synchronize_sched(); | 655 | } |
332 | deferred_sync = 0; | ||
333 | } | 656 | } |
334 | ret = add_marker(name, format, probe, private); | 657 | /* |
335 | if (ret) | 658 | * If we detect that a call_rcu is pending for this marker, |
659 | * make sure it's executed now. | ||
660 | */ | ||
661 | if (entry->rcu_pending) | ||
662 | rcu_barrier(); | ||
663 | old = marker_entry_add_probe(entry, probe, probe_private); | ||
664 | if (IS_ERR(old)) { | ||
665 | ret = PTR_ERR(old); | ||
336 | goto end; | 666 | goto end; |
667 | } | ||
337 | mutex_unlock(&markers_mutex); | 668 | mutex_unlock(&markers_mutex); |
338 | marker_update_probes(NULL); | 669 | marker_update_probes(); /* may update entry */ |
339 | return ret; | 670 | mutex_lock(&markers_mutex); |
671 | entry = get_marker(name); | ||
672 | WARN_ON(!entry); | ||
673 | entry->oldptr = old; | ||
674 | entry->rcu_pending = 1; | ||
675 | /* write rcu_pending before calling the RCU callback */ | ||
676 | smp_wmb(); | ||
677 | call_rcu(&entry->rcu, free_old_closure); | ||
340 | end: | 678 | end: |
341 | mutex_unlock(&markers_mutex); | 679 | mutex_unlock(&markers_mutex); |
342 | return ret; | 680 | return ret; |
@@ -346,171 +684,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register); | |||
346 | /** | 684 | /** |
347 | * marker_probe_unregister - Disconnect a probe from a marker | 685 | * marker_probe_unregister - Disconnect a probe from a marker |
348 | * @name: marker name | 686 | * @name: marker name |
687 | * @probe: probe function pointer | ||
688 | * @probe_private: probe private data | ||
349 | * | 689 | * |
350 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | 690 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). |
691 | * We do not need to call a synchronize_sched to make sure the probes have | ||
692 | * finished running before doing a module unload, because the module unload | ||
693 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
694 | * have finished. | ||
351 | */ | 695 | */ |
352 | void *marker_probe_unregister(const char *name) | 696 | int marker_probe_unregister(const char *name, |
697 | marker_probe_func *probe, void *probe_private) | ||
353 | { | 698 | { |
354 | struct module *probe_module; | ||
355 | struct marker_entry *entry; | 699 | struct marker_entry *entry; |
356 | void *private; | 700 | struct marker_probe_closure *old; |
701 | int ret = 0; | ||
357 | 702 | ||
358 | mutex_lock(&markers_mutex); | 703 | mutex_lock(&markers_mutex); |
359 | entry = get_marker(name); | 704 | entry = get_marker(name); |
360 | if (!entry) { | 705 | if (!entry) { |
361 | private = ERR_PTR(-ENOENT); | 706 | ret = -ENOENT; |
362 | goto end; | 707 | goto end; |
363 | } | 708 | } |
364 | entry->refcount = 0; | 709 | if (entry->rcu_pending) |
365 | /* In what module is the probe handler ? */ | 710 | rcu_barrier(); |
366 | probe_module = __module_text_address((unsigned long)entry->probe); | 711 | old = marker_entry_remove_probe(entry, probe, probe_private); |
367 | private = remove_marker(name); | ||
368 | deferred_sync = 1; | ||
369 | mutex_unlock(&markers_mutex); | 712 | mutex_unlock(&markers_mutex); |
370 | marker_update_probes(probe_module); | 713 | marker_update_probes(); /* may update entry */ |
371 | return private; | 714 | mutex_lock(&markers_mutex); |
715 | entry = get_marker(name); | ||
716 | entry->oldptr = old; | ||
717 | entry->rcu_pending = 1; | ||
718 | /* write rcu_pending before calling the RCU callback */ | ||
719 | smp_wmb(); | ||
720 | call_rcu(&entry->rcu, free_old_closure); | ||
721 | remove_marker(name); /* Ignore busy error message */ | ||
372 | end: | 722 | end: |
373 | mutex_unlock(&markers_mutex); | 723 | mutex_unlock(&markers_mutex); |
374 | return private; | 724 | return ret; |
375 | } | 725 | } |
376 | EXPORT_SYMBOL_GPL(marker_probe_unregister); | 726 | EXPORT_SYMBOL_GPL(marker_probe_unregister); |
377 | 727 | ||
378 | /** | 728 | static struct marker_entry * |
379 | * marker_probe_unregister_private_data - Disconnect a probe from a marker | 729 | get_marker_from_private_data(marker_probe_func *probe, void *probe_private) |
380 | * @private: probe private data | ||
381 | * | ||
382 | * Unregister a marker by providing the registered private data. | ||
383 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | ||
384 | */ | ||
385 | void *marker_probe_unregister_private_data(void *private) | ||
386 | { | 730 | { |
387 | struct module *probe_module; | ||
388 | struct hlist_head *head; | ||
389 | struct hlist_node *node; | ||
390 | struct marker_entry *entry; | 731 | struct marker_entry *entry; |
391 | int found = 0; | ||
392 | unsigned int i; | 732 | unsigned int i; |
733 | struct hlist_head *head; | ||
734 | struct hlist_node *node; | ||
393 | 735 | ||
394 | mutex_lock(&markers_mutex); | ||
395 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { | 736 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { |
396 | head = &marker_table[i]; | 737 | head = &marker_table[i]; |
397 | hlist_for_each_entry(entry, node, head, hlist) { | 738 | hlist_for_each_entry(entry, node, head, hlist) { |
398 | if (entry->private == private) { | 739 | if (!entry->ptype) { |
399 | found = 1; | 740 | if (entry->single.func == probe |
400 | goto iter_end; | 741 | && entry->single.probe_private |
742 | == probe_private) | ||
743 | return entry; | ||
744 | } else { | ||
745 | struct marker_probe_closure *closure; | ||
746 | closure = entry->multi; | ||
747 | for (i = 0; closure[i].func; i++) { | ||
748 | if (closure[i].func == probe && | ||
749 | closure[i].probe_private | ||
750 | == probe_private) | ||
751 | return entry; | ||
752 | } | ||
401 | } | 753 | } |
402 | } | 754 | } |
403 | } | 755 | } |
404 | iter_end: | 756 | return NULL; |
405 | if (!found) { | ||
406 | private = ERR_PTR(-ENOENT); | ||
407 | goto end; | ||
408 | } | ||
409 | entry->refcount = 0; | ||
410 | /* In what module is the probe handler ? */ | ||
411 | probe_module = __module_text_address((unsigned long)entry->probe); | ||
412 | private = remove_marker(entry->name); | ||
413 | deferred_sync = 1; | ||
414 | mutex_unlock(&markers_mutex); | ||
415 | marker_update_probes(probe_module); | ||
416 | return private; | ||
417 | end: | ||
418 | mutex_unlock(&markers_mutex); | ||
419 | return private; | ||
420 | } | 757 | } |
421 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); | ||
422 | 758 | ||
423 | /** | 759 | /** |
424 | * marker_arm - Arm a marker | 760 | * marker_probe_unregister_private_data - Disconnect a probe from a marker |
425 | * @name: marker name | 761 | * @probe: probe function |
762 | * @probe_private: probe private data | ||
426 | * | 763 | * |
427 | * Activate a marker. It keeps a reference count of the number of | 764 | * Unregister a probe by providing the registered private data. |
428 | * arming/disarming done. | 765 | * Only removes the first marker found in hash table. |
429 | * Returns 0 if ok, error value on error. | 766 | * Return 0 on success or error value. |
767 | * We do not need to call a synchronize_sched to make sure the probes have | ||
768 | * finished running before doing a module unload, because the module unload | ||
769 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
770 | * have finished. | ||
430 | */ | 771 | */ |
431 | int marker_arm(const char *name) | 772 | int marker_probe_unregister_private_data(marker_probe_func *probe, |
773 | void *probe_private) | ||
432 | { | 774 | { |
433 | struct marker_entry *entry; | 775 | struct marker_entry *entry; |
434 | int ret = 0; | 776 | int ret = 0; |
777 | struct marker_probe_closure *old; | ||
435 | 778 | ||
436 | mutex_lock(&markers_mutex); | 779 | mutex_lock(&markers_mutex); |
437 | entry = get_marker(name); | 780 | entry = get_marker_from_private_data(probe, probe_private); |
438 | if (!entry) { | 781 | if (!entry) { |
439 | ret = -ENOENT; | 782 | ret = -ENOENT; |
440 | goto end; | 783 | goto end; |
441 | } | 784 | } |
442 | /* | 785 | if (entry->rcu_pending) |
443 | * Only need to update probes when refcount passes from 0 to 1. | 786 | rcu_barrier(); |
444 | */ | 787 | old = marker_entry_remove_probe(entry, NULL, probe_private); |
445 | if (entry->refcount++) | ||
446 | goto end; | ||
447 | end: | ||
448 | mutex_unlock(&markers_mutex); | 788 | mutex_unlock(&markers_mutex); |
449 | marker_update_probes(NULL); | 789 | marker_update_probes(); /* may update entry */ |
450 | return ret; | ||
451 | } | ||
452 | EXPORT_SYMBOL_GPL(marker_arm); | ||
453 | |||
454 | /** | ||
455 | * marker_disarm - Disarm a marker | ||
456 | * @name: marker name | ||
457 | * | ||
458 | * Disarm a marker. It keeps a reference count of the number of arming/disarming | ||
459 | * done. | ||
460 | * Returns 0 if ok, error value on error. | ||
461 | */ | ||
462 | int marker_disarm(const char *name) | ||
463 | { | ||
464 | struct marker_entry *entry; | ||
465 | int ret = 0; | ||
466 | |||
467 | mutex_lock(&markers_mutex); | 790 | mutex_lock(&markers_mutex); |
468 | entry = get_marker(name); | 791 | entry = get_marker_from_private_data(probe, probe_private); |
469 | if (!entry) { | 792 | WARN_ON(!entry); |
470 | ret = -ENOENT; | 793 | entry->oldptr = old; |
471 | goto end; | 794 | entry->rcu_pending = 1; |
472 | } | 795 | /* write rcu_pending before calling the RCU callback */ |
473 | /* | 796 | smp_wmb(); |
474 | * Only permit decrement refcount if higher than 0. | 797 | call_rcu(&entry->rcu, free_old_closure); |
475 | * Do probe update only on 1 -> 0 transition. | 798 | remove_marker(entry->name); /* Ignore busy error message */ |
476 | */ | ||
477 | if (entry->refcount) { | ||
478 | if (--entry->refcount) | ||
479 | goto end; | ||
480 | } else { | ||
481 | ret = -EPERM; | ||
482 | goto end; | ||
483 | } | ||
484 | end: | 799 | end: |
485 | mutex_unlock(&markers_mutex); | 800 | mutex_unlock(&markers_mutex); |
486 | marker_update_probes(NULL); | ||
487 | return ret; | 801 | return ret; |
488 | } | 802 | } |
489 | EXPORT_SYMBOL_GPL(marker_disarm); | 803 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); |
490 | 804 | ||
491 | /** | 805 | /** |
492 | * marker_get_private_data - Get a marker's probe private data | 806 | * marker_get_private_data - Get a marker's probe private data |
493 | * @name: marker name | 807 | * @name: marker name |
808 | * @probe: probe to match | ||
809 | * @num: get the nth matching probe's private data | ||
494 | * | 810 | * |
811 | * Returns the nth private data pointer (starting from 0) matching, or an | ||
812 | * ERR_PTR. | ||
495 | * Returns the private data pointer, or an ERR_PTR. | 813 | * Returns the private data pointer, or an ERR_PTR. |
496 | * The private data pointer should _only_ be dereferenced if the caller is the | 814 | * The private data pointer should _only_ be dereferenced if the caller is the |
497 | * owner of the data, or its content could vanish. This is mostly used to | 815 | * owner of the data, or its content could vanish. This is mostly used to |
498 | * confirm that a caller is the owner of a registered probe. | 816 | * confirm that a caller is the owner of a registered probe. |
499 | */ | 817 | */ |
500 | void *marker_get_private_data(const char *name) | 818 | void *marker_get_private_data(const char *name, marker_probe_func *probe, |
819 | int num) | ||
501 | { | 820 | { |
502 | struct hlist_head *head; | 821 | struct hlist_head *head; |
503 | struct hlist_node *node; | 822 | struct hlist_node *node; |
504 | struct marker_entry *e; | 823 | struct marker_entry *e; |
505 | size_t name_len = strlen(name) + 1; | 824 | size_t name_len = strlen(name) + 1; |
506 | u32 hash = jhash(name, name_len-1, 0); | 825 | u32 hash = jhash(name, name_len-1, 0); |
507 | int found = 0; | 826 | int i; |
508 | 827 | ||
509 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | 828 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; |
510 | hlist_for_each_entry(e, node, head, hlist) { | 829 | hlist_for_each_entry(e, node, head, hlist) { |
511 | if (!strcmp(name, e->name)) { | 830 | if (!strcmp(name, e->name)) { |
512 | found = 1; | 831 | if (!e->ptype) { |
513 | return e->private; | 832 | if (num == 0 && e->single.func == probe) |
833 | return e->single.probe_private; | ||
834 | else | ||
835 | break; | ||
836 | } else { | ||
837 | struct marker_probe_closure *closure; | ||
838 | int match = 0; | ||
839 | closure = e->multi; | ||
840 | for (i = 0; closure[i].func; i++) { | ||
841 | if (closure[i].func != probe) | ||
842 | continue; | ||
843 | if (match++ == num) | ||
844 | return closure[i].probe_private; | ||
845 | } | ||
846 | } | ||
514 | } | 847 | } |
515 | } | 848 | } |
516 | return ERR_PTR(-ENOENT); | 849 | return ERR_PTR(-ENOENT); |