aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrinivasa D S <srinivasa@in.ibm.com>2008-07-25 04:46:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 13:53:30 -0400
commitef53d9c5e4da147ecaa43c44c5e5945eb83970a2 (patch)
tree3b596445e5d0613fda4b33a4ae96e0e3fffdcf1e
parent53a9600c634e3bfd6230e0597aca159bf4d4d010 (diff)
kprobes: improve kretprobe scalability with hashed locking
Currently list of kretprobe instances are stored in kretprobe object (as used_instances,free_instances) and in kretprobe hash table. We have one global kretprobe lock to serialise the access to these lists. This causes only one kretprobe handler to execute at a time. Hence affects system performance, particularly on SMP systems and when return probe is set on lot of functions (like on all systemcalls). Solution proposed here gives fine-grain locks that performs better on SMP system compared to present kretprobe implementation. Solution: 1) Instead of having one global lock to protect kretprobe instances present in kretprobe object and kretprobe hash table. We will have two locks, one lock for protecting kretprobe hash table and another lock for kretporbe object. 2) We hold lock present in kretprobe object while we modify kretprobe instance in kretprobe object and we hold per-hash-list lock while modifying kretprobe instances present in that hash list. To prevent deadlock, we never grab a per-hash-list lock while holding a kretprobe lock. 3) We can remove used_instances from struct kretprobe, as we can track used instances of kretprobe instances using kretprobe hash table. Time duration for kernel compilation ("make -j 8") on a 8-way ppc64 system with return probes set on all systemcalls looks like this. cacheline non-cacheline Un-patched kernel aligned patch aligned patch =============================================================================== real 9m46.784s 9m54.412s 10m2.450s user 40m5.715s 40m7.142s 40m4.273s sys 2m57.754s 2m58.583s 3m17.430s =========================================================== Time duration for kernel compilation ("make -j 8) on the same system, when kernel is not probed. ========================= real 9m26.389s user 40m8.775s sys 2m7.283s ========================= Signed-off-by: Srinivasa DS <srinivasa@in.ibm.com> Signed-off-by: Jim Keniston <jkenisto@us.ibm.com> Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> Cc: David S. Miller <davem@davemloft.net> Cc: Masami Hiramatsu <mhiramat@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/arm/kernel/kprobes.c6
-rw-r--r--arch/ia64/kernel/kprobes.c6
-rw-r--r--arch/powerpc/kernel/kprobes.c6
-rw-r--r--arch/s390/kernel/kprobes.c6
-rw-r--r--arch/sparc64/kernel/kprobes.c11
-rw-r--r--arch/x86/kernel/kprobes.c6
-rw-r--r--include/linux/kprobes.h7
-rw-r--r--kernel/kprobes.c127
8 files changed, 108 insertions, 67 deletions
diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c
index 5ee39e10c8d1..d28513f14d05 100644
--- a/arch/arm/kernel/kprobes.c
+++ b/arch/arm/kernel/kprobes.c
@@ -296,8 +296,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
296 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 296 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
297 297
298 INIT_HLIST_HEAD(&empty_rp); 298 INIT_HLIST_HEAD(&empty_rp);
299 spin_lock_irqsave(&kretprobe_lock, flags); 299 kretprobe_hash_lock(current, &head, &flags);
300 head = kretprobe_inst_table_head(current);
301 300
302 /* 301 /*
303 * It is possible to have multiple instances associated with a given 302 * It is possible to have multiple instances associated with a given
@@ -337,7 +336,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
337 } 336 }
338 337
339 kretprobe_assert(ri, orig_ret_address, trampoline_address); 338 kretprobe_assert(ri, orig_ret_address, trampoline_address);
340 spin_unlock_irqrestore(&kretprobe_lock, flags); 339 kretprobe_hash_unlock(current, &flags);
341 340
342 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 341 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
343 hlist_del(&ri->hlist); 342 hlist_del(&ri->hlist);
@@ -347,7 +346,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
347 return (void *)orig_ret_address; 346 return (void *)orig_ret_address;
348} 347}
349 348
350/* Called with kretprobe_lock held. */
351void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 349void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
352 struct pt_regs *regs) 350 struct pt_regs *regs)
353{ 351{
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 233434f4f88f..f07688da947c 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -429,8 +429,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
429 ((struct fnptr *)kretprobe_trampoline)->ip; 429 ((struct fnptr *)kretprobe_trampoline)->ip;
430 430
431 INIT_HLIST_HEAD(&empty_rp); 431 INIT_HLIST_HEAD(&empty_rp);
432 spin_lock_irqsave(&kretprobe_lock, flags); 432 kretprobe_hash_lock(current, &head, &flags);
433 head = kretprobe_inst_table_head(current);
434 433
435 /* 434 /*
436 * It is possible to have multiple instances associated with a given 435 * It is possible to have multiple instances associated with a given
@@ -485,7 +484,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
485 kretprobe_assert(ri, orig_ret_address, trampoline_address); 484 kretprobe_assert(ri, orig_ret_address, trampoline_address);
486 485
487 reset_current_kprobe(); 486 reset_current_kprobe();
488 spin_unlock_irqrestore(&kretprobe_lock, flags); 487 kretprobe_hash_unlock(current, &flags);
489 preempt_enable_no_resched(); 488 preempt_enable_no_resched();
490 489
491 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 490 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -500,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
500 return 1; 499 return 1;
501} 500}
502 501
503/* Called with kretprobe_lock held */
504void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 502void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
505 struct pt_regs *regs) 503 struct pt_regs *regs)
506{ 504{
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 4ba2af125450..de79915452c8 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -144,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
144 kcb->kprobe_saved_msr = regs->msr; 144 kcb->kprobe_saved_msr = regs->msr;
145} 145}
146 146
147/* Called with kretprobe_lock held */
148void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 147void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
149 struct pt_regs *regs) 148 struct pt_regs *regs)
150{ 149{
@@ -312,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
312 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 311 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
313 312
314 INIT_HLIST_HEAD(&empty_rp); 313 INIT_HLIST_HEAD(&empty_rp);
315 spin_lock_irqsave(&kretprobe_lock, flags); 314 kretprobe_hash_lock(current, &head, &flags);
316 head = kretprobe_inst_table_head(current);
317 315
318 /* 316 /*
319 * It is possible to have multiple instances associated with a given 317 * It is possible to have multiple instances associated with a given
@@ -352,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
352 regs->nip = orig_ret_address; 350 regs->nip = orig_ret_address;
353 351
354 reset_current_kprobe(); 352 reset_current_kprobe();
355 spin_unlock_irqrestore(&kretprobe_lock, flags); 353 kretprobe_hash_unlock(current, &flags);
356 preempt_enable_no_resched(); 354 preempt_enable_no_resched();
357 355
358 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 356 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 288ad490a6dd..4f82e5b5f879 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -270,7 +270,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
270 __ctl_store(kcb->kprobe_saved_ctl, 9, 11); 270 __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
271} 271}
272 272
273/* Called with kretprobe_lock held */
274void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 273void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
275 struct pt_regs *regs) 274 struct pt_regs *regs)
276{ 275{
@@ -377,8 +376,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
377 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 376 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
378 377
379 INIT_HLIST_HEAD(&empty_rp); 378 INIT_HLIST_HEAD(&empty_rp);
380 spin_lock_irqsave(&kretprobe_lock, flags); 379 kretprobe_hash_lock(current, &head, &flags);
381 head = kretprobe_inst_table_head(current);
382 380
383 /* 381 /*
384 * It is possible to have multiple instances associated with a given 382 * It is possible to have multiple instances associated with a given
@@ -417,7 +415,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
417 regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE; 415 regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
418 416
419 reset_current_kprobe(); 417 reset_current_kprobe();
420 spin_unlock_irqrestore(&kretprobe_lock, flags); 418 kretprobe_hash_unlock(current, &flags);
421 preempt_enable_no_resched(); 419 preempt_enable_no_resched();
422 420
423 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 421 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index f43b5d755354..201a6e547e4a 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -478,9 +478,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
478 return 0; 478 return 0;
479} 479}
480 480
481/* Called with kretprobe_lock held. The value stored in the return 481/* The value stored in the return address register is actually 2
482 * address register is actually 2 instructions before where the 482 * instructions before where the callee will return to.
483 * callee will return to. Sequences usually look something like this 483 * Sequences usually look something like this
484 * 484 *
485 * call some_function <--- return register points here 485 * call some_function <--- return register points here
486 * nop <--- call delay slot 486 * nop <--- call delay slot
@@ -512,8 +512,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
512 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; 512 unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
513 513
514 INIT_HLIST_HEAD(&empty_rp); 514 INIT_HLIST_HEAD(&empty_rp);
515 spin_lock_irqsave(&kretprobe_lock, flags); 515 kretprobe_hash_lock(current, &head, &flags);
516 head = kretprobe_inst_table_head(current);
517 516
518 /* 517 /*
519 * It is possible to have multiple instances associated with a given 518 * It is possible to have multiple instances associated with a given
@@ -553,7 +552,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
553 regs->tnpc = orig_ret_address + 4; 552 regs->tnpc = orig_ret_address + 4;
554 553
555 reset_current_kprobe(); 554 reset_current_kprobe();
556 spin_unlock_irqrestore(&kretprobe_lock, flags); 555 kretprobe_hash_unlock(current, &flags);
557 preempt_enable_no_resched(); 556 preempt_enable_no_resched();
558 557
559 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 558 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 43c019f85f0d..6c27679ec6aa 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
431 regs->ip = (unsigned long)p->ainsn.insn; 431 regs->ip = (unsigned long)p->ainsn.insn;
432} 432}
433 433
434/* Called with kretprobe_lock held */
435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 434void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
436 struct pt_regs *regs) 435 struct pt_regs *regs)
437{ 436{
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
682 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 681 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
683 682
684 INIT_HLIST_HEAD(&empty_rp); 683 INIT_HLIST_HEAD(&empty_rp);
685 spin_lock_irqsave(&kretprobe_lock, flags); 684 kretprobe_hash_lock(current, &head, &flags);
686 head = kretprobe_inst_table_head(current);
687 /* fixup registers */ 685 /* fixup registers */
688#ifdef CONFIG_X86_64 686#ifdef CONFIG_X86_64
689 regs->cs = __KERNEL_CS; 687 regs->cs = __KERNEL_CS;
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
732 730
733 kretprobe_assert(ri, orig_ret_address, trampoline_address); 731 kretprobe_assert(ri, orig_ret_address, trampoline_address);
734 732
735 spin_unlock_irqrestore(&kretprobe_lock, flags); 733 kretprobe_hash_unlock(current, &flags);
736 734
737 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 735 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
738 hlist_del(&ri->hlist); 736 hlist_del(&ri->hlist);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04a3556bdea6..0be7795655fa 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -157,11 +157,10 @@ struct kretprobe {
157 int nmissed; 157 int nmissed;
158 size_t data_size; 158 size_t data_size;
159 struct hlist_head free_instances; 159 struct hlist_head free_instances;
160 struct hlist_head used_instances; 160 spinlock_t lock;
161}; 161};
162 162
163struct kretprobe_instance { 163struct kretprobe_instance {
164 struct hlist_node uflist; /* either on free list or used list */
165 struct hlist_node hlist; 164 struct hlist_node hlist;
166 struct kretprobe *rp; 165 struct kretprobe *rp;
167 kprobe_opcode_t *ret_addr; 166 kprobe_opcode_t *ret_addr;
@@ -201,7 +200,6 @@ static inline int init_test_probes(void)
201} 200}
202#endif /* CONFIG_KPROBES_SANITY_TEST */ 201#endif /* CONFIG_KPROBES_SANITY_TEST */
203 202
204extern spinlock_t kretprobe_lock;
205extern struct mutex kprobe_mutex; 203extern struct mutex kprobe_mutex;
206extern int arch_prepare_kprobe(struct kprobe *p); 204extern int arch_prepare_kprobe(struct kprobe *p);
207extern void arch_arm_kprobe(struct kprobe *p); 205extern void arch_arm_kprobe(struct kprobe *p);
@@ -214,6 +212,9 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
214 212
215/* Get the kprobe at this addr (if any) - called with preemption disabled */ 213/* Get the kprobe at this addr (if any) - called with preemption disabled */
216struct kprobe *get_kprobe(void *addr); 214struct kprobe *get_kprobe(void *addr);
215void kretprobe_hash_lock(struct task_struct *tsk,
216 struct hlist_head **head, unsigned long *flags);
217void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags);
217struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); 218struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk);
218 219
219/* kprobe_running() will just return the current_kprobe on this CPU */ 220/* kprobe_running() will just return the current_kprobe on this CPU */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1485ca8d0e00..cb0b3bde3617 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -62,6 +62,7 @@
62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name))) 62 addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
63#endif 63#endif
64 64
65static int kprobes_initialized;
65static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 66static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
66static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 67static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
67 68
@@ -69,8 +70,15 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
69static bool kprobe_enabled; 70static bool kprobe_enabled;
70 71
71DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
72DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
74static struct {
75 spinlock_t lock ____cacheline_aligned;
76} kretprobe_table_locks[KPROBE_TABLE_SIZE];
77
78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
79{
80 return &(kretprobe_table_locks[hash].lock);
81}
74 82
75/* 83/*
76 * Normally, functions that we'd want to prohibit kprobes in, are marked 84 * Normally, functions that we'd want to prohibit kprobes in, are marked
@@ -368,26 +376,53 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
368 return; 376 return;
369} 377}
370 378
371/* Called with kretprobe_lock held */
372void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 379void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
373 struct hlist_head *head) 380 struct hlist_head *head)
374{ 381{
382 struct kretprobe *rp = ri->rp;
383
375 /* remove rp inst off the rprobe_inst_table */ 384 /* remove rp inst off the rprobe_inst_table */
376 hlist_del(&ri->hlist); 385 hlist_del(&ri->hlist);
377 if (ri->rp) { 386 INIT_HLIST_NODE(&ri->hlist);
378 /* remove rp inst off the used list */ 387 if (likely(rp)) {
379 hlist_del(&ri->uflist); 388 spin_lock(&rp->lock);
380 /* put rp inst back onto the free list */ 389 hlist_add_head(&ri->hlist, &rp->free_instances);
381 INIT_HLIST_NODE(&ri->uflist); 390 spin_unlock(&rp->lock);
382 hlist_add_head(&ri->uflist, &ri->rp->free_instances);
383 } else 391 } else
384 /* Unregistering */ 392 /* Unregistering */
385 hlist_add_head(&ri->hlist, head); 393 hlist_add_head(&ri->hlist, head);
386} 394}
387 395
388struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) 396void kretprobe_hash_lock(struct task_struct *tsk,
397 struct hlist_head **head, unsigned long *flags)
398{
399 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
400 spinlock_t *hlist_lock;
401
402 *head = &kretprobe_inst_table[hash];
403 hlist_lock = kretprobe_table_lock_ptr(hash);
404 spin_lock_irqsave(hlist_lock, *flags);
405}
406
407void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
389{ 408{
390 return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; 409 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
410 spin_lock_irqsave(hlist_lock, *flags);
411}
412
413void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
414{
415 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
416 spinlock_t *hlist_lock;
417
418 hlist_lock = kretprobe_table_lock_ptr(hash);
419 spin_unlock_irqrestore(hlist_lock, *flags);
420}
421
422void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
423{
424 spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
425 spin_unlock_irqrestore(hlist_lock, *flags);
391} 426}
392 427
393/* 428/*
@@ -401,17 +436,21 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
401 struct kretprobe_instance *ri; 436 struct kretprobe_instance *ri;
402 struct hlist_head *head, empty_rp; 437 struct hlist_head *head, empty_rp;
403 struct hlist_node *node, *tmp; 438 struct hlist_node *node, *tmp;
404 unsigned long flags = 0; 439 unsigned long hash, flags = 0;
405 440
406 INIT_HLIST_HEAD(&empty_rp); 441 if (unlikely(!kprobes_initialized))
407 spin_lock_irqsave(&kretprobe_lock, flags); 442 /* Early boot. kretprobe_table_locks not yet initialized. */
408 head = kretprobe_inst_table_head(tk); 443 return;
444
445 hash = hash_ptr(tk, KPROBE_HASH_BITS);
446 head = &kretprobe_inst_table[hash];
447 kretprobe_table_lock(hash, &flags);
409 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { 448 hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
410 if (ri->task == tk) 449 if (ri->task == tk)
411 recycle_rp_inst(ri, &empty_rp); 450 recycle_rp_inst(ri, &empty_rp);
412 } 451 }
413 spin_unlock_irqrestore(&kretprobe_lock, flags); 452 kretprobe_table_unlock(hash, &flags);
414 453 INIT_HLIST_HEAD(&empty_rp);
415 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { 454 hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
416 hlist_del(&ri->hlist); 455 hlist_del(&ri->hlist);
417 kfree(ri); 456 kfree(ri);
@@ -423,24 +462,29 @@ static inline void free_rp_inst(struct kretprobe *rp)
423 struct kretprobe_instance *ri; 462 struct kretprobe_instance *ri;
424 struct hlist_node *pos, *next; 463 struct hlist_node *pos, *next;
425 464
426 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, uflist) { 465 hlist_for_each_entry_safe(ri, pos, next, &rp->free_instances, hlist) {
427 hlist_del(&ri->uflist); 466 hlist_del(&ri->hlist);
428 kfree(ri); 467 kfree(ri);
429 } 468 }
430} 469}
431 470
432static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 471static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
433{ 472{
434 unsigned long flags; 473 unsigned long flags, hash;
435 struct kretprobe_instance *ri; 474 struct kretprobe_instance *ri;
436 struct hlist_node *pos, *next; 475 struct hlist_node *pos, *next;
476 struct hlist_head *head;
477
437 /* No race here */ 478 /* No race here */
438 spin_lock_irqsave(&kretprobe_lock, flags); 479 for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
439 hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) { 480 kretprobe_table_lock(hash, &flags);
440 ri->rp = NULL; 481 head = &kretprobe_inst_table[hash];
441 hlist_del(&ri->uflist); 482 hlist_for_each_entry_safe(ri, pos, next, head, hlist) {
483 if (ri->rp == rp)
484 ri->rp = NULL;
485 }
486 kretprobe_table_unlock(hash, &flags);
442 } 487 }
443 spin_unlock_irqrestore(&kretprobe_lock, flags);
444 free_rp_inst(rp); 488 free_rp_inst(rp);
445} 489}
446 490
@@ -831,32 +875,37 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
831 struct pt_regs *regs) 875 struct pt_regs *regs)
832{ 876{
833 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 877 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
834 unsigned long flags = 0; 878 unsigned long hash, flags = 0;
879 struct kretprobe_instance *ri;
835 880
836 /*TODO: consider to only swap the RA after the last pre_handler fired */ 881 /*TODO: consider to only swap the RA after the last pre_handler fired */
837 spin_lock_irqsave(&kretprobe_lock, flags); 882 hash = hash_ptr(current, KPROBE_HASH_BITS);
883 spin_lock_irqsave(&rp->lock, flags);
838 if (!hlist_empty(&rp->free_instances)) { 884 if (!hlist_empty(&rp->free_instances)) {
839 struct kretprobe_instance *ri;
840
841 ri = hlist_entry(rp->free_instances.first, 885 ri = hlist_entry(rp->free_instances.first,
842 struct kretprobe_instance, uflist); 886 struct kretprobe_instance, hlist);
887 hlist_del(&ri->hlist);
888 spin_unlock_irqrestore(&rp->lock, flags);
889
843 ri->rp = rp; 890 ri->rp = rp;
844 ri->task = current; 891 ri->task = current;
845 892
846 if (rp->entry_handler && rp->entry_handler(ri, regs)) { 893 if (rp->entry_handler && rp->entry_handler(ri, regs)) {
847 spin_unlock_irqrestore(&kretprobe_lock, flags); 894 spin_unlock_irqrestore(&rp->lock, flags);
848 return 0; 895 return 0;
849 } 896 }
850 897
851 arch_prepare_kretprobe(ri, regs); 898 arch_prepare_kretprobe(ri, regs);
852 899
853 /* XXX(hch): why is there no hlist_move_head? */ 900 /* XXX(hch): why is there no hlist_move_head? */
854 hlist_del(&ri->uflist); 901 INIT_HLIST_NODE(&ri->hlist);
855 hlist_add_head(&ri->uflist, &ri->rp->used_instances); 902 kretprobe_table_lock(hash, &flags);
856 hlist_add_head(&ri->hlist, kretprobe_inst_table_head(ri->task)); 903 hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
857 } else 904 kretprobe_table_unlock(hash, &flags);
905 } else {
858 rp->nmissed++; 906 rp->nmissed++;
859 spin_unlock_irqrestore(&kretprobe_lock, flags); 907 spin_unlock_irqrestore(&rp->lock, flags);
908 }
860 return 0; 909 return 0;
861} 910}
862 911
@@ -892,7 +941,7 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
892 rp->maxactive = NR_CPUS; 941 rp->maxactive = NR_CPUS;
893#endif 942#endif
894 } 943 }
895 INIT_HLIST_HEAD(&rp->used_instances); 944 spin_lock_init(&rp->lock);
896 INIT_HLIST_HEAD(&rp->free_instances); 945 INIT_HLIST_HEAD(&rp->free_instances);
897 for (i = 0; i < rp->maxactive; i++) { 946 for (i = 0; i < rp->maxactive; i++) {
898 inst = kmalloc(sizeof(struct kretprobe_instance) + 947 inst = kmalloc(sizeof(struct kretprobe_instance) +
@@ -901,8 +950,8 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
901 free_rp_inst(rp); 950 free_rp_inst(rp);
902 return -ENOMEM; 951 return -ENOMEM;
903 } 952 }
904 INIT_HLIST_NODE(&inst->uflist); 953 INIT_HLIST_NODE(&inst->hlist);
905 hlist_add_head(&inst->uflist, &rp->free_instances); 954 hlist_add_head(&inst->hlist, &rp->free_instances);
906 } 955 }
907 956
908 rp->nmissed = 0; 957 rp->nmissed = 0;
@@ -1009,6 +1058,7 @@ static int __init init_kprobes(void)
1009 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1058 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1010 INIT_HLIST_HEAD(&kprobe_table[i]); 1059 INIT_HLIST_HEAD(&kprobe_table[i]);
1011 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 1060 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
1061 spin_lock_init(&(kretprobe_table_locks[i].lock));
1012 } 1062 }
1013 1063
1014 /* 1064 /*
@@ -1050,6 +1100,7 @@ static int __init init_kprobes(void)
1050 err = arch_init_kprobes(); 1100 err = arch_init_kprobes();
1051 if (!err) 1101 if (!err)
1052 err = register_die_notifier(&kprobe_exceptions_nb); 1102 err = register_die_notifier(&kprobe_exceptions_nb);
1103 kprobes_initialized = (err == 0);
1053 1104
1054 if (!err) 1105 if (!err)
1055 init_test_probes(); 1106 init_test_probes();