aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMathieu Desnoyers <mathieu.desnoyers@polymtl.ca>2008-02-13 18:03:37 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-13 19:21:20 -0500
commitfb40bd78b0f91b274879cf5db8facd1e04b6052e (patch)
tree2347ccb5ad07f58ab5a4eb41174bb7b54d5f0c5b /kernel
parent9170d2f6e1dc4d79650fbf492d1cd45291c66504 (diff)
Linux Kernel Markers: support multiple probes
RCU style multiple probes support for the Linux Kernel Markers. Common case (one probe) is still fast and does not require dynamic allocation or a supplementary pointer dereference on the fast path. - Move preempt disable from the marker site to the callback. Since we now have an internal callback, move the preempt disable/enable to the callback instead of the marker site. Since the callback change is done asynchronously (passing from a handler that supports arguments to a handler that does not setup the arguments is no arguments are passed), we can safely update it even if it is outside the preempt disable section. - Move probe arm to probe connection. Now, a connected probe is automatically armed. Remove MARK_MAX_FORMAT_LEN, unused. This patch modifies the Linux Kernel Markers API : it removes the probe "arm/disarm" and changes the probe function prototype : it now expects a va_list * instead of a "...". If we want to have more than one probe connected to a marker at a given time (LTTng, or blktrace, ssytemtap) then we need this patch. Without it, connecting a second probe handler to a marker will fail. It allow us, for instance, to do interesting combinations : Do standard tracing with LTTng and, eventually, to compute statistics with SystemTAP, or to have a special trigger on an event that would call a systemtap script which would stop flight recorder tracing. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> Cc: Christoph Hellwig <hch@infradead.org> Cc: Mike Mason <mmlnx@us.ibm.com> Cc: Dipankar Sarma <dipankar@in.ibm.com> Cc: David Smith <dsmith@redhat.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Cc: "Frank Ch. Eigler" <fche@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/marker.c677
-rw-r--r--kernel/module.c7
2 files changed, 508 insertions, 176 deletions
diff --git a/kernel/marker.c b/kernel/marker.c
index 5323cfaedbce..c4c2cd8b61f5 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -27,35 +27,42 @@
27extern struct marker __start___markers[]; 27extern struct marker __start___markers[];
28extern struct marker __stop___markers[]; 28extern struct marker __stop___markers[];
29 29
30/* Set to 1 to enable marker debug output */
31const int marker_debug;
32
30/* 33/*
31 * markers_mutex nests inside module_mutex. Markers mutex protects the builtin 34 * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
32 * and module markers, the hash table and deferred_sync. 35 * and module markers and the hash table.
33 */ 36 */
34static DEFINE_MUTEX(markers_mutex); 37static DEFINE_MUTEX(markers_mutex);
35 38
36/* 39/*
37 * Marker deferred synchronization.
38 * Upon marker probe_unregister, we delay call to synchronize_sched() to
39 * accelerate mass unregistration (only when there is no more reference to a
40 * given module do we call synchronize_sched()). However, we need to make sure
41 * every critical region has ended before we re-arm a marker that has been
42 * unregistered and then registered back with a different probe data.
43 */
44static int deferred_sync;
45
46/*
47 * Marker hash table, containing the active markers. 40 * Marker hash table, containing the active markers.
48 * Protected by module_mutex. 41 * Protected by module_mutex.
49 */ 42 */
50#define MARKER_HASH_BITS 6 43#define MARKER_HASH_BITS 6
51#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) 44#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
52 45
46/*
47 * Note about RCU :
48 * It is used to make sure every handler has finished using its private data
49 * between two consecutive operation (add or remove) on a given marker. It is
50 * also used to delay the free of multiple probes array until a quiescent state
51 * is reached.
52 * marker entries modifications are protected by the markers_mutex.
53 */
53struct marker_entry { 54struct marker_entry {
54 struct hlist_node hlist; 55 struct hlist_node hlist;
55 char *format; 56 char *format;
56 marker_probe_func *probe; 57 void (*call)(const struct marker *mdata, /* Probe wrapper */
57 void *private; 58 void *call_private, const char *fmt, ...);
59 struct marker_probe_closure single;
60 struct marker_probe_closure *multi;
58 int refcount; /* Number of times armed. 0 if disarmed. */ 61 int refcount; /* Number of times armed. 0 if disarmed. */
62 struct rcu_head rcu;
63 void *oldptr;
64 char rcu_pending:1;
65 char ptype:1;
59 char name[0]; /* Contains name'\0'format'\0' */ 66 char name[0]; /* Contains name'\0'format'\0' */
60}; 67};
61 68
@@ -63,7 +70,8 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
63 70
64/** 71/**
65 * __mark_empty_function - Empty probe callback 72 * __mark_empty_function - Empty probe callback
66 * @mdata: pointer of type const struct marker 73 * @probe_private: probe private data
74 * @call_private: call site private data
67 * @fmt: format string 75 * @fmt: format string
68 * @...: variable argument list 76 * @...: variable argument list
69 * 77 *
@@ -72,13 +80,267 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE];
72 * though the function pointer change and the marker enabling are two distinct 80 * though the function pointer change and the marker enabling are two distinct
73 * operations that modifies the execution flow of preemptible code. 81 * operations that modifies the execution flow of preemptible code.
74 */ 82 */
75void __mark_empty_function(const struct marker *mdata, void *private, 83void __mark_empty_function(void *probe_private, void *call_private,
76 const char *fmt, ...) 84 const char *fmt, va_list *args)
77{ 85{
78} 86}
79EXPORT_SYMBOL_GPL(__mark_empty_function); 87EXPORT_SYMBOL_GPL(__mark_empty_function);
80 88
81/* 89/*
90 * marker_probe_cb Callback that prepares the variable argument list for probes.
91 * @mdata: pointer of type struct marker
92 * @call_private: caller site private data
93 * @fmt: format string
94 * @...: Variable argument list.
95 *
96 * Since we do not use "typical" pointer based RCU in the 1 argument case, we
97 * need to put a full smp_rmb() in this branch. This is why we do not use
98 * rcu_dereference() for the pointer read.
99 */
100void marker_probe_cb(const struct marker *mdata, void *call_private,
101 const char *fmt, ...)
102{
103 va_list args;
104 char ptype;
105
106 /*
107 * disabling preemption to make sure the teardown of the callbacks can
108 * be done correctly when they are in modules and they insure RCU read
109 * coherency.
110 */
111 preempt_disable();
112 ptype = ACCESS_ONCE(mdata->ptype);
113 if (likely(!ptype)) {
114 marker_probe_func *func;
115 /* Must read the ptype before ptr. They are not data dependant,
116 * so we put an explicit smp_rmb() here. */
117 smp_rmb();
118 func = ACCESS_ONCE(mdata->single.func);
119 /* Must read the ptr before private data. They are not data
120 * dependant, so we put an explicit smp_rmb() here. */
121 smp_rmb();
122 va_start(args, fmt);
123 func(mdata->single.probe_private, call_private, fmt, &args);
124 va_end(args);
125 } else {
126 struct marker_probe_closure *multi;
127 int i;
128 /*
129 * multi points to an array, therefore accessing the array
130 * depends on reading multi. However, even in this case,
131 * we must insure that the pointer is read _before_ the array
132 * data. Same as rcu_dereference, but we need a full smp_rmb()
133 * in the fast path, so put the explicit barrier here.
134 */
135 smp_read_barrier_depends();
136 multi = ACCESS_ONCE(mdata->multi);
137 for (i = 0; multi[i].func; i++) {
138 va_start(args, fmt);
139 multi[i].func(multi[i].probe_private, call_private, fmt,
140 &args);
141 va_end(args);
142 }
143 }
144 preempt_enable();
145}
146EXPORT_SYMBOL_GPL(marker_probe_cb);
147
148/*
149 * marker_probe_cb Callback that does not prepare the variable argument list.
150 * @mdata: pointer of type struct marker
151 * @call_private: caller site private data
152 * @fmt: format string
153 * @...: Variable argument list.
154 *
155 * Should be connected to markers "MARK_NOARGS".
156 */
157void marker_probe_cb_noarg(const struct marker *mdata,
158 void *call_private, const char *fmt, ...)
159{
160 va_list args; /* not initialized */
161 char ptype;
162
163 preempt_disable();
164 ptype = ACCESS_ONCE(mdata->ptype);
165 if (likely(!ptype)) {
166 marker_probe_func *func;
167 /* Must read the ptype before ptr. They are not data dependant,
168 * so we put an explicit smp_rmb() here. */
169 smp_rmb();
170 func = ACCESS_ONCE(mdata->single.func);
171 /* Must read the ptr before private data. They are not data
172 * dependant, so we put an explicit smp_rmb() here. */
173 smp_rmb();
174 func(mdata->single.probe_private, call_private, fmt, &args);
175 } else {
176 struct marker_probe_closure *multi;
177 int i;
178 /*
179 * multi points to an array, therefore accessing the array
180 * depends on reading multi. However, even in this case,
181 * we must insure that the pointer is read _before_ the array
182 * data. Same as rcu_dereference, but we need a full smp_rmb()
183 * in the fast path, so put the explicit barrier here.
184 */
185 smp_read_barrier_depends();
186 multi = ACCESS_ONCE(mdata->multi);
187 for (i = 0; multi[i].func; i++)
188 multi[i].func(multi[i].probe_private, call_private, fmt,
189 &args);
190 }
191 preempt_enable();
192}
193EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
194
195static void free_old_closure(struct rcu_head *head)
196{
197 struct marker_entry *entry = container_of(head,
198 struct marker_entry, rcu);
199 kfree(entry->oldptr);
200 /* Make sure we free the data before setting the pending flag to 0 */
201 smp_wmb();
202 entry->rcu_pending = 0;
203}
204
205static void debug_print_probes(struct marker_entry *entry)
206{
207 int i;
208
209 if (!marker_debug)
210 return;
211
212 if (!entry->ptype) {
213 printk(KERN_DEBUG "Single probe : %p %p\n",
214 entry->single.func,
215 entry->single.probe_private);
216 } else {
217 for (i = 0; entry->multi[i].func; i++)
218 printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
219 entry->multi[i].func,
220 entry->multi[i].probe_private);
221 }
222}
223
224static struct marker_probe_closure *
225marker_entry_add_probe(struct marker_entry *entry,
226 marker_probe_func *probe, void *probe_private)
227{
228 int nr_probes = 0;
229 struct marker_probe_closure *old, *new;
230
231 WARN_ON(!probe);
232
233 debug_print_probes(entry);
234 old = entry->multi;
235 if (!entry->ptype) {
236 if (entry->single.func == probe &&
237 entry->single.probe_private == probe_private)
238 return ERR_PTR(-EBUSY);
239 if (entry->single.func == __mark_empty_function) {
240 /* 0 -> 1 probes */
241 entry->single.func = probe;
242 entry->single.probe_private = probe_private;
243 entry->refcount = 1;
244 entry->ptype = 0;
245 debug_print_probes(entry);
246 return NULL;
247 } else {
248 /* 1 -> 2 probes */
249 nr_probes = 1;
250 old = NULL;
251 }
252 } else {
253 /* (N -> N+1), (N != 0, 1) probes */
254 for (nr_probes = 0; old[nr_probes].func; nr_probes++)
255 if (old[nr_probes].func == probe
256 && old[nr_probes].probe_private
257 == probe_private)
258 return ERR_PTR(-EBUSY);
259 }
260 /* + 2 : one for new probe, one for NULL func */
261 new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
262 GFP_KERNEL);
263 if (new == NULL)
264 return ERR_PTR(-ENOMEM);
265 if (!old)
266 new[0] = entry->single;
267 else
268 memcpy(new, old,
269 nr_probes * sizeof(struct marker_probe_closure));
270 new[nr_probes].func = probe;
271 new[nr_probes].probe_private = probe_private;
272 entry->refcount = nr_probes + 1;
273 entry->multi = new;
274 entry->ptype = 1;
275 debug_print_probes(entry);
276 return old;
277}
278
279static struct marker_probe_closure *
280marker_entry_remove_probe(struct marker_entry *entry,
281 marker_probe_func *probe, void *probe_private)
282{
283 int nr_probes = 0, nr_del = 0, i;
284 struct marker_probe_closure *old, *new;
285
286 old = entry->multi;
287
288 debug_print_probes(entry);
289 if (!entry->ptype) {
290 /* 0 -> N is an error */
291 WARN_ON(entry->single.func == __mark_empty_function);
292 /* 1 -> 0 probes */
293 WARN_ON(probe && entry->single.func != probe);
294 WARN_ON(entry->single.probe_private != probe_private);
295 entry->single.func = __mark_empty_function;
296 entry->refcount = 0;
297 entry->ptype = 0;
298 debug_print_probes(entry);
299 return NULL;
300 } else {
301 /* (N -> M), (N > 1, M >= 0) probes */
302 for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
303 if ((!probe || old[nr_probes].func == probe)
304 && old[nr_probes].probe_private
305 == probe_private)
306 nr_del++;
307 }
308 }
309
310 if (nr_probes - nr_del == 0) {
311 /* N -> 0, (N > 1) */
312 entry->single.func = __mark_empty_function;
313 entry->refcount = 0;
314 entry->ptype = 0;
315 } else if (nr_probes - nr_del == 1) {
316 /* N -> 1, (N > 1) */
317 for (i = 0; old[i].func; i++)
318 if ((probe && old[i].func != probe) ||
319 old[i].probe_private != probe_private)
320 entry->single = old[i];
321 entry->refcount = 1;
322 entry->ptype = 0;
323 } else {
324 int j = 0;
325 /* N -> M, (N > 1, M > 1) */
326 /* + 1 for NULL */
327 new = kzalloc((nr_probes - nr_del + 1)
328 * sizeof(struct marker_probe_closure), GFP_KERNEL);
329 if (new == NULL)
330 return ERR_PTR(-ENOMEM);
331 for (i = 0; old[i].func; i++)
332 if ((probe && old[i].func != probe) ||
333 old[i].probe_private != probe_private)
334 new[j++] = old[i];
335 entry->refcount = nr_probes - nr_del;
336 entry->ptype = 1;
337 entry->multi = new;
338 }
339 debug_print_probes(entry);
340 return old;
341}
342
343/*
82 * Get marker if the marker is present in the marker hash table. 344 * Get marker if the marker is present in the marker hash table.
83 * Must be called with markers_mutex held. 345 * Must be called with markers_mutex held.
84 * Returns NULL if not present. 346 * Returns NULL if not present.
@@ -102,8 +364,7 @@ static struct marker_entry *get_marker(const char *name)
102 * Add the marker to the marker hash table. Must be called with markers_mutex 364 * Add the marker to the marker hash table. Must be called with markers_mutex
103 * held. 365 * held.
104 */ 366 */
105static int add_marker(const char *name, const char *format, 367static struct marker_entry *add_marker(const char *name, const char *format)
106 marker_probe_func *probe, void *private)
107{ 368{
108 struct hlist_head *head; 369 struct hlist_head *head;
109 struct hlist_node *node; 370 struct hlist_node *node;
@@ -118,9 +379,8 @@ static int add_marker(const char *name, const char *format,
118 hlist_for_each_entry(e, node, head, hlist) { 379 hlist_for_each_entry(e, node, head, hlist) {
119 if (!strcmp(name, e->name)) { 380 if (!strcmp(name, e->name)) {
120 printk(KERN_NOTICE 381 printk(KERN_NOTICE
121 "Marker %s busy, probe %p already installed\n", 382 "Marker %s busy\n", name);
122 name, e->probe); 383 return ERR_PTR(-EBUSY); /* Already there */
123 return -EBUSY; /* Already there */
124 } 384 }
125 } 385 }
126 /* 386 /*
@@ -130,34 +390,42 @@ static int add_marker(const char *name, const char *format,
130 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, 390 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
131 GFP_KERNEL); 391 GFP_KERNEL);
132 if (!e) 392 if (!e)
133 return -ENOMEM; 393 return ERR_PTR(-ENOMEM);
134 memcpy(&e->name[0], name, name_len); 394 memcpy(&e->name[0], name, name_len);
135 if (format) { 395 if (format) {
136 e->format = &e->name[name_len]; 396 e->format = &e->name[name_len];
137 memcpy(e->format, format, format_len); 397 memcpy(e->format, format, format_len);
398 if (strcmp(e->format, MARK_NOARGS) == 0)
399 e->call = marker_probe_cb_noarg;
400 else
401 e->call = marker_probe_cb;
138 trace_mark(core_marker_format, "name %s format %s", 402 trace_mark(core_marker_format, "name %s format %s",
139 e->name, e->format); 403 e->name, e->format);
140 } else 404 } else {
141 e->format = NULL; 405 e->format = NULL;
142 e->probe = probe; 406 e->call = marker_probe_cb;
143 e->private = private; 407 }
408 e->single.func = __mark_empty_function;
409 e->single.probe_private = NULL;
410 e->multi = NULL;
411 e->ptype = 0;
144 e->refcount = 0; 412 e->refcount = 0;
413 e->rcu_pending = 0;
145 hlist_add_head(&e->hlist, head); 414 hlist_add_head(&e->hlist, head);
146 return 0; 415 return e;
147} 416}
148 417
149/* 418/*
150 * Remove the marker from the marker hash table. Must be called with mutex_lock 419 * Remove the marker from the marker hash table. Must be called with mutex_lock
151 * held. 420 * held.
152 */ 421 */
153static void *remove_marker(const char *name) 422static int remove_marker(const char *name)
154{ 423{
155 struct hlist_head *head; 424 struct hlist_head *head;
156 struct hlist_node *node; 425 struct hlist_node *node;
157 struct marker_entry *e; 426 struct marker_entry *e;
158 int found = 0; 427 int found = 0;
159 size_t len = strlen(name) + 1; 428 size_t len = strlen(name) + 1;
160 void *private = NULL;
161 u32 hash = jhash(name, len-1, 0); 429 u32 hash = jhash(name, len-1, 0);
162 430
163 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; 431 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
@@ -167,12 +435,16 @@ static void *remove_marker(const char *name)
167 break; 435 break;
168 } 436 }
169 } 437 }
170 if (found) { 438 if (!found)
171 private = e->private; 439 return -ENOENT;
172 hlist_del(&e->hlist); 440 if (e->single.func != __mark_empty_function)
173 kfree(e); 441 return -EBUSY;
174 } 442 hlist_del(&e->hlist);
175 return private; 443 /* Make sure the call_rcu has been executed */
444 if (e->rcu_pending)
445 rcu_barrier();
446 kfree(e);
447 return 0;
176} 448}
177 449
178/* 450/*
@@ -184,6 +456,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
184 size_t name_len = strlen((*entry)->name) + 1; 456 size_t name_len = strlen((*entry)->name) + 1;
185 size_t format_len = strlen(format) + 1; 457 size_t format_len = strlen(format) + 1;
186 458
459
187 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, 460 e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
188 GFP_KERNEL); 461 GFP_KERNEL);
189 if (!e) 462 if (!e)
@@ -191,11 +464,20 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
191 memcpy(&e->name[0], (*entry)->name, name_len); 464 memcpy(&e->name[0], (*entry)->name, name_len);
192 e->format = &e->name[name_len]; 465 e->format = &e->name[name_len];
193 memcpy(e->format, format, format_len); 466 memcpy(e->format, format, format_len);
194 e->probe = (*entry)->probe; 467 if (strcmp(e->format, MARK_NOARGS) == 0)
195 e->private = (*entry)->private; 468 e->call = marker_probe_cb_noarg;
469 else
470 e->call = marker_probe_cb;
471 e->single = (*entry)->single;
472 e->multi = (*entry)->multi;
473 e->ptype = (*entry)->ptype;
196 e->refcount = (*entry)->refcount; 474 e->refcount = (*entry)->refcount;
475 e->rcu_pending = 0;
197 hlist_add_before(&e->hlist, &(*entry)->hlist); 476 hlist_add_before(&e->hlist, &(*entry)->hlist);
198 hlist_del(&(*entry)->hlist); 477 hlist_del(&(*entry)->hlist);
478 /* Make sure the call_rcu has been executed */
479 if ((*entry)->rcu_pending)
480 rcu_barrier();
199 kfree(*entry); 481 kfree(*entry);
200 *entry = e; 482 *entry = e;
201 trace_mark(core_marker_format, "name %s format %s", 483 trace_mark(core_marker_format, "name %s format %s",
@@ -206,7 +488,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format)
206/* 488/*
207 * Sets the probe callback corresponding to one marker. 489 * Sets the probe callback corresponding to one marker.
208 */ 490 */
209static int set_marker(struct marker_entry **entry, struct marker *elem) 491static int set_marker(struct marker_entry **entry, struct marker *elem,
492 int active)
210{ 493{
211 int ret; 494 int ret;
212 WARN_ON(strcmp((*entry)->name, elem->name) != 0); 495 WARN_ON(strcmp((*entry)->name, elem->name) != 0);
@@ -226,9 +509,43 @@ static int set_marker(struct marker_entry **entry, struct marker *elem)
226 if (ret) 509 if (ret)
227 return ret; 510 return ret;
228 } 511 }
229 elem->call = (*entry)->probe; 512
230 elem->private = (*entry)->private; 513 /*
231 elem->state = 1; 514 * probe_cb setup (statically known) is done here. It is
515 * asynchronous with the rest of execution, therefore we only
516 * pass from a "safe" callback (with argument) to an "unsafe"
517 * callback (does not set arguments).
518 */
519 elem->call = (*entry)->call;
520 /*
521 * Sanity check :
522 * We only update the single probe private data when the ptr is
523 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
524 */
525 WARN_ON(elem->single.func != __mark_empty_function
526 && elem->single.probe_private
527 != (*entry)->single.probe_private &&
528 !elem->ptype);
529 elem->single.probe_private = (*entry)->single.probe_private;
530 /*
531 * Make sure the private data is valid when we update the
532 * single probe ptr.
533 */
534 smp_wmb();
535 elem->single.func = (*entry)->single.func;
536 /*
537 * We also make sure that the new probe callbacks array is consistent
538 * before setting a pointer to it.
539 */
540 rcu_assign_pointer(elem->multi, (*entry)->multi);
541 /*
542 * Update the function or multi probe array pointer before setting the
543 * ptype.
544 */
545 smp_wmb();
546 elem->ptype = (*entry)->ptype;
547 elem->state = active;
548
232 return 0; 549 return 0;
233} 550}
234 551
@@ -240,8 +557,12 @@ static int set_marker(struct marker_entry **entry, struct marker *elem)
240 */ 557 */
241static void disable_marker(struct marker *elem) 558static void disable_marker(struct marker *elem)
242{ 559{
560 /* leave "call" as is. It is known statically. */
243 elem->state = 0; 561 elem->state = 0;
244 elem->call = __mark_empty_function; 562 elem->single.func = __mark_empty_function;
563 /* Update the function before setting the ptype */
564 smp_wmb();
565 elem->ptype = 0; /* single probe */
245 /* 566 /*
246 * Leave the private data and id there, because removal is racy and 567 * Leave the private data and id there, because removal is racy and
247 * should be done only after a synchronize_sched(). These are never used 568 * should be done only after a synchronize_sched(). These are never used
@@ -253,14 +574,11 @@ static void disable_marker(struct marker *elem)
253 * marker_update_probe_range - Update a probe range 574 * marker_update_probe_range - Update a probe range
254 * @begin: beginning of the range 575 * @begin: beginning of the range
255 * @end: end of the range 576 * @end: end of the range
256 * @probe_module: module address of the probe being updated
257 * @refcount: number of references left to the given probe_module (out)
258 * 577 *
259 * Updates the probe callback corresponding to a range of markers. 578 * Updates the probe callback corresponding to a range of markers.
260 */ 579 */
261void marker_update_probe_range(struct marker *begin, 580void marker_update_probe_range(struct marker *begin,
262 struct marker *end, struct module *probe_module, 581 struct marker *end)
263 int *refcount)
264{ 582{
265 struct marker *iter; 583 struct marker *iter;
266 struct marker_entry *mark_entry; 584 struct marker_entry *mark_entry;
@@ -268,15 +586,12 @@ void marker_update_probe_range(struct marker *begin,
268 mutex_lock(&markers_mutex); 586 mutex_lock(&markers_mutex);
269 for (iter = begin; iter < end; iter++) { 587 for (iter = begin; iter < end; iter++) {
270 mark_entry = get_marker(iter->name); 588 mark_entry = get_marker(iter->name);
271 if (mark_entry && mark_entry->refcount) { 589 if (mark_entry) {
272 set_marker(&mark_entry, iter); 590 set_marker(&mark_entry, iter,
591 !!mark_entry->refcount);
273 /* 592 /*
274 * ignore error, continue 593 * ignore error, continue
275 */ 594 */
276 if (probe_module)
277 if (probe_module ==
278 __module_text_address((unsigned long)mark_entry->probe))
279 (*refcount)++;
280 } else { 595 } else {
281 disable_marker(iter); 596 disable_marker(iter);
282 } 597 }
@@ -289,20 +604,27 @@ void marker_update_probe_range(struct marker *begin,
289 * Issues a synchronize_sched() when no reference to the module passed 604 * Issues a synchronize_sched() when no reference to the module passed
290 * as parameter is found in the probes so the probe module can be 605 * as parameter is found in the probes so the probe module can be
291 * safely unloaded from now on. 606 * safely unloaded from now on.
607 *
608 * Internal callback only changed before the first probe is connected to it.
609 * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
610 * transitions. All other transitions will leave the old private data valid.
611 * This makes the non-atomicity of the callback/private data updates valid.
612 *
613 * "special case" updates :
614 * 0 -> 1 callback
615 * 1 -> 0 callback
616 * 1 -> 2 callbacks
617 * 2 -> 1 callbacks
618 * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
619 * Site effect : marker_set_format may delete the marker entry (creating a
620 * replacement).
292 */ 621 */
293static void marker_update_probes(struct module *probe_module) 622static void marker_update_probes(void)
294{ 623{
295 int refcount = 0;
296
297 /* Core kernel markers */ 624 /* Core kernel markers */
298 marker_update_probe_range(__start___markers, 625 marker_update_probe_range(__start___markers, __stop___markers);
299 __stop___markers, probe_module, &refcount);
300 /* Markers in modules. */ 626 /* Markers in modules. */
301 module_update_markers(probe_module, &refcount); 627 module_update_markers();
302 if (probe_module && refcount == 0) {
303 synchronize_sched();
304 deferred_sync = 0;
305 }
306} 628}
307 629
308/** 630/**
@@ -310,33 +632,49 @@ static void marker_update_probes(struct module *probe_module)
310 * @name: marker name 632 * @name: marker name
311 * @format: format string 633 * @format: format string
312 * @probe: probe handler 634 * @probe: probe handler
313 * @private: probe private data 635 * @probe_private: probe private data
314 * 636 *
315 * private data must be a valid allocated memory address, or NULL. 637 * private data must be a valid allocated memory address, or NULL.
316 * Returns 0 if ok, error value on error. 638 * Returns 0 if ok, error value on error.
639 * The probe address must at least be aligned on the architecture pointer size.
317 */ 640 */
318int marker_probe_register(const char *name, const char *format, 641int marker_probe_register(const char *name, const char *format,
319 marker_probe_func *probe, void *private) 642 marker_probe_func *probe, void *probe_private)
320{ 643{
321 struct marker_entry *entry; 644 struct marker_entry *entry;
322 int ret = 0; 645 int ret = 0;
646 struct marker_probe_closure *old;
323 647
324 mutex_lock(&markers_mutex); 648 mutex_lock(&markers_mutex);
325 entry = get_marker(name); 649 entry = get_marker(name);
326 if (entry && entry->refcount) { 650 if (!entry) {
327 ret = -EBUSY; 651 entry = add_marker(name, format);
328 goto end; 652 if (IS_ERR(entry)) {
329 } 653 ret = PTR_ERR(entry);
330 if (deferred_sync) { 654 goto end;
331 synchronize_sched(); 655 }
332 deferred_sync = 0;
333 } 656 }
334 ret = add_marker(name, format, probe, private); 657 /*
335 if (ret) 658 * If we detect that a call_rcu is pending for this marker,
659 * make sure it's executed now.
660 */
661 if (entry->rcu_pending)
662 rcu_barrier();
663 old = marker_entry_add_probe(entry, probe, probe_private);
664 if (IS_ERR(old)) {
665 ret = PTR_ERR(old);
336 goto end; 666 goto end;
667 }
337 mutex_unlock(&markers_mutex); 668 mutex_unlock(&markers_mutex);
338 marker_update_probes(NULL); 669 marker_update_probes(); /* may update entry */
339 return ret; 670 mutex_lock(&markers_mutex);
671 entry = get_marker(name);
672 WARN_ON(!entry);
673 entry->oldptr = old;
674 entry->rcu_pending = 1;
675 /* write rcu_pending before calling the RCU callback */
676 smp_wmb();
677 call_rcu(&entry->rcu, free_old_closure);
340end: 678end:
341 mutex_unlock(&markers_mutex); 679 mutex_unlock(&markers_mutex);
342 return ret; 680 return ret;
@@ -346,171 +684,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register);
346/** 684/**
347 * marker_probe_unregister - Disconnect a probe from a marker 685 * marker_probe_unregister - Disconnect a probe from a marker
348 * @name: marker name 686 * @name: marker name
687 * @probe: probe function pointer
688 * @probe_private: probe private data
349 * 689 *
350 * Returns the private data given to marker_probe_register, or an ERR_PTR(). 690 * Returns the private data given to marker_probe_register, or an ERR_PTR().
691 * We do not need to call a synchronize_sched to make sure the probes have
692 * finished running before doing a module unload, because the module unload
693 * itself uses stop_machine(), which insures that every preempt disabled section
694 * have finished.
351 */ 695 */
352void *marker_probe_unregister(const char *name) 696int marker_probe_unregister(const char *name,
697 marker_probe_func *probe, void *probe_private)
353{ 698{
354 struct module *probe_module;
355 struct marker_entry *entry; 699 struct marker_entry *entry;
356 void *private; 700 struct marker_probe_closure *old;
701 int ret = 0;
357 702
358 mutex_lock(&markers_mutex); 703 mutex_lock(&markers_mutex);
359 entry = get_marker(name); 704 entry = get_marker(name);
360 if (!entry) { 705 if (!entry) {
361 private = ERR_PTR(-ENOENT); 706 ret = -ENOENT;
362 goto end; 707 goto end;
363 } 708 }
364 entry->refcount = 0; 709 if (entry->rcu_pending)
365 /* In what module is the probe handler ? */ 710 rcu_barrier();
366 probe_module = __module_text_address((unsigned long)entry->probe); 711 old = marker_entry_remove_probe(entry, probe, probe_private);
367 private = remove_marker(name);
368 deferred_sync = 1;
369 mutex_unlock(&markers_mutex); 712 mutex_unlock(&markers_mutex);
370 marker_update_probes(probe_module); 713 marker_update_probes(); /* may update entry */
371 return private; 714 mutex_lock(&markers_mutex);
715 entry = get_marker(name);
716 entry->oldptr = old;
717 entry->rcu_pending = 1;
718 /* write rcu_pending before calling the RCU callback */
719 smp_wmb();
720 call_rcu(&entry->rcu, free_old_closure);
721 remove_marker(name); /* Ignore busy error message */
372end: 722end:
373 mutex_unlock(&markers_mutex); 723 mutex_unlock(&markers_mutex);
374 return private; 724 return ret;
375} 725}
376EXPORT_SYMBOL_GPL(marker_probe_unregister); 726EXPORT_SYMBOL_GPL(marker_probe_unregister);
377 727
378/** 728static struct marker_entry *
379 * marker_probe_unregister_private_data - Disconnect a probe from a marker 729get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
380 * @private: probe private data
381 *
382 * Unregister a marker by providing the registered private data.
383 * Returns the private data given to marker_probe_register, or an ERR_PTR().
384 */
385void *marker_probe_unregister_private_data(void *private)
386{ 730{
387 struct module *probe_module;
388 struct hlist_head *head;
389 struct hlist_node *node;
390 struct marker_entry *entry; 731 struct marker_entry *entry;
391 int found = 0;
392 unsigned int i; 732 unsigned int i;
733 struct hlist_head *head;
734 struct hlist_node *node;
393 735
394 mutex_lock(&markers_mutex);
395 for (i = 0; i < MARKER_TABLE_SIZE; i++) { 736 for (i = 0; i < MARKER_TABLE_SIZE; i++) {
396 head = &marker_table[i]; 737 head = &marker_table[i];
397 hlist_for_each_entry(entry, node, head, hlist) { 738 hlist_for_each_entry(entry, node, head, hlist) {
398 if (entry->private == private) { 739 if (!entry->ptype) {
399 found = 1; 740 if (entry->single.func == probe
400 goto iter_end; 741 && entry->single.probe_private
742 == probe_private)
743 return entry;
744 } else {
745 struct marker_probe_closure *closure;
746 closure = entry->multi;
747 for (i = 0; closure[i].func; i++) {
748 if (closure[i].func == probe &&
749 closure[i].probe_private
750 == probe_private)
751 return entry;
752 }
401 } 753 }
402 } 754 }
403 } 755 }
404iter_end: 756 return NULL;
405 if (!found) {
406 private = ERR_PTR(-ENOENT);
407 goto end;
408 }
409 entry->refcount = 0;
410 /* In what module is the probe handler ? */
411 probe_module = __module_text_address((unsigned long)entry->probe);
412 private = remove_marker(entry->name);
413 deferred_sync = 1;
414 mutex_unlock(&markers_mutex);
415 marker_update_probes(probe_module);
416 return private;
417end:
418 mutex_unlock(&markers_mutex);
419 return private;
420} 757}
421EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
422 758
423/** 759/**
424 * marker_arm - Arm a marker 760 * marker_probe_unregister_private_data - Disconnect a probe from a marker
425 * @name: marker name 761 * @probe: probe function
762 * @probe_private: probe private data
426 * 763 *
427 * Activate a marker. It keeps a reference count of the number of 764 * Unregister a probe by providing the registered private data.
428 * arming/disarming done. 765 * Only removes the first marker found in hash table.
429 * Returns 0 if ok, error value on error. 766 * Return 0 on success or error value.
767 * We do not need to call a synchronize_sched to make sure the probes have
768 * finished running before doing a module unload, because the module unload
769 * itself uses stop_machine(), which insures that every preempt disabled section
770 * have finished.
430 */ 771 */
431int marker_arm(const char *name) 772int marker_probe_unregister_private_data(marker_probe_func *probe,
773 void *probe_private)
432{ 774{
433 struct marker_entry *entry; 775 struct marker_entry *entry;
434 int ret = 0; 776 int ret = 0;
777 struct marker_probe_closure *old;
435 778
436 mutex_lock(&markers_mutex); 779 mutex_lock(&markers_mutex);
437 entry = get_marker(name); 780 entry = get_marker_from_private_data(probe, probe_private);
438 if (!entry) { 781 if (!entry) {
439 ret = -ENOENT; 782 ret = -ENOENT;
440 goto end; 783 goto end;
441 } 784 }
442 /* 785 if (entry->rcu_pending)
443 * Only need to update probes when refcount passes from 0 to 1. 786 rcu_barrier();
444 */ 787 old = marker_entry_remove_probe(entry, NULL, probe_private);
445 if (entry->refcount++)
446 goto end;
447end:
448 mutex_unlock(&markers_mutex); 788 mutex_unlock(&markers_mutex);
449 marker_update_probes(NULL); 789 marker_update_probes(); /* may update entry */
450 return ret;
451}
452EXPORT_SYMBOL_GPL(marker_arm);
453
454/**
455 * marker_disarm - Disarm a marker
456 * @name: marker name
457 *
458 * Disarm a marker. It keeps a reference count of the number of arming/disarming
459 * done.
460 * Returns 0 if ok, error value on error.
461 */
462int marker_disarm(const char *name)
463{
464 struct marker_entry *entry;
465 int ret = 0;
466
467 mutex_lock(&markers_mutex); 790 mutex_lock(&markers_mutex);
468 entry = get_marker(name); 791 entry = get_marker_from_private_data(probe, probe_private);
469 if (!entry) { 792 WARN_ON(!entry);
470 ret = -ENOENT; 793 entry->oldptr = old;
471 goto end; 794 entry->rcu_pending = 1;
472 } 795 /* write rcu_pending before calling the RCU callback */
473 /* 796 smp_wmb();
474 * Only permit decrement refcount if higher than 0. 797 call_rcu(&entry->rcu, free_old_closure);
475 * Do probe update only on 1 -> 0 transition. 798 remove_marker(entry->name); /* Ignore busy error message */
476 */
477 if (entry->refcount) {
478 if (--entry->refcount)
479 goto end;
480 } else {
481 ret = -EPERM;
482 goto end;
483 }
484end: 799end:
485 mutex_unlock(&markers_mutex); 800 mutex_unlock(&markers_mutex);
486 marker_update_probes(NULL);
487 return ret; 801 return ret;
488} 802}
489EXPORT_SYMBOL_GPL(marker_disarm); 803EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
490 804
491/** 805/**
492 * marker_get_private_data - Get a marker's probe private data 806 * marker_get_private_data - Get a marker's probe private data
493 * @name: marker name 807 * @name: marker name
808 * @probe: probe to match
809 * @num: get the nth matching probe's private data
494 * 810 *
811 * Returns the nth private data pointer (starting from 0) matching, or an
812 * ERR_PTR.
495 * Returns the private data pointer, or an ERR_PTR. 813 * Returns the private data pointer, or an ERR_PTR.
496 * The private data pointer should _only_ be dereferenced if the caller is the 814 * The private data pointer should _only_ be dereferenced if the caller is the
497 * owner of the data, or its content could vanish. This is mostly used to 815 * owner of the data, or its content could vanish. This is mostly used to
498 * confirm that a caller is the owner of a registered probe. 816 * confirm that a caller is the owner of a registered probe.
499 */ 817 */
500void *marker_get_private_data(const char *name) 818void *marker_get_private_data(const char *name, marker_probe_func *probe,
819 int num)
501{ 820{
502 struct hlist_head *head; 821 struct hlist_head *head;
503 struct hlist_node *node; 822 struct hlist_node *node;
504 struct marker_entry *e; 823 struct marker_entry *e;
505 size_t name_len = strlen(name) + 1; 824 size_t name_len = strlen(name) + 1;
506 u32 hash = jhash(name, name_len-1, 0); 825 u32 hash = jhash(name, name_len-1, 0);
507 int found = 0; 826 int i;
508 827
509 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; 828 head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
510 hlist_for_each_entry(e, node, head, hlist) { 829 hlist_for_each_entry(e, node, head, hlist) {
511 if (!strcmp(name, e->name)) { 830 if (!strcmp(name, e->name)) {
512 found = 1; 831 if (!e->ptype) {
513 return e->private; 832 if (num == 0 && e->single.func == probe)
833 return e->single.probe_private;
834 else
835 break;
836 } else {
837 struct marker_probe_closure *closure;
838 int match = 0;
839 closure = e->multi;
840 for (i = 0; closure[i].func; i++) {
841 if (closure[i].func != probe)
842 continue;
843 if (match++ == num)
844 return closure[i].probe_private;
845 }
846 }
514 } 847 }
515 } 848 }
516 return ERR_PTR(-ENOENT); 849 return ERR_PTR(-ENOENT);
diff --git a/kernel/module.c b/kernel/module.c
index 4202da97a1da..92595bad3812 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2038,7 +2038,7 @@ static struct module *load_module(void __user *umod,
2038#ifdef CONFIG_MARKERS 2038#ifdef CONFIG_MARKERS
2039 if (!mod->taints) 2039 if (!mod->taints)
2040 marker_update_probe_range(mod->markers, 2040 marker_update_probe_range(mod->markers,
2041 mod->markers + mod->num_markers, NULL, NULL); 2041 mod->markers + mod->num_markers);
2042#endif 2042#endif
2043 err = module_finalize(hdr, sechdrs, mod); 2043 err = module_finalize(hdr, sechdrs, mod);
2044 if (err < 0) 2044 if (err < 0)
@@ -2564,7 +2564,7 @@ EXPORT_SYMBOL(struct_module);
2564#endif 2564#endif
2565 2565
2566#ifdef CONFIG_MARKERS 2566#ifdef CONFIG_MARKERS
2567void module_update_markers(struct module *probe_module, int *refcount) 2567void module_update_markers(void)
2568{ 2568{
2569 struct module *mod; 2569 struct module *mod;
2570 2570
@@ -2572,8 +2572,7 @@ void module_update_markers(struct module *probe_module, int *refcount)
2572 list_for_each_entry(mod, &modules, list) 2572 list_for_each_entry(mod, &modules, list)
2573 if (!mod->taints) 2573 if (!mod->taints)
2574 marker_update_probe_range(mod->markers, 2574 marker_update_probe_range(mod->markers,
2575 mod->markers + mod->num_markers, 2575 mod->markers + mod->num_markers);
2576 probe_module, refcount);
2577 mutex_unlock(&module_mutex); 2576 mutex_unlock(&module_mutex);
2578} 2577}
2579#endif 2578#endif