aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/kprobes.c565
-rw-r--r--kernel/perf_event.c200
-rw-r--r--kernel/sysctl.c16
-rw-r--r--kernel/trace/trace_event_perf.c31
-rw-r--r--kernel/watchdog.c7
5 files changed, 533 insertions, 286 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9737a76e106f..7663e5df0e6f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -354,13 +354,20 @@ static inline int kprobe_aggrprobe(struct kprobe *p)
354 return p->pre_handler == aggr_pre_handler; 354 return p->pre_handler == aggr_pre_handler;
355} 355}
356 356
357/* Return true(!0) if the kprobe is unused */
358static inline int kprobe_unused(struct kprobe *p)
359{
360 return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
361 list_empty(&p->list);
362}
363
357/* 364/*
358 * Keep all fields in the kprobe consistent 365 * Keep all fields in the kprobe consistent
359 */ 366 */
360static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) 367static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
361{ 368{
362 memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); 369 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
363 memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); 370 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
364} 371}
365 372
366#ifdef CONFIG_OPTPROBES 373#ifdef CONFIG_OPTPROBES
@@ -384,6 +391,17 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
384 } 391 }
385} 392}
386 393
394/* Free optimized instructions and optimized_kprobe */
395static __kprobes void free_aggr_kprobe(struct kprobe *p)
396{
397 struct optimized_kprobe *op;
398
399 op = container_of(p, struct optimized_kprobe, kp);
400 arch_remove_optimized_kprobe(op);
401 arch_remove_kprobe(p);
402 kfree(op);
403}
404
387/* Return true(!0) if the kprobe is ready for optimization. */ 405/* Return true(!0) if the kprobe is ready for optimization. */
388static inline int kprobe_optready(struct kprobe *p) 406static inline int kprobe_optready(struct kprobe *p)
389{ 407{
@@ -397,6 +415,33 @@ static inline int kprobe_optready(struct kprobe *p)
397 return 0; 415 return 0;
398} 416}
399 417
418/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
419static inline int kprobe_disarmed(struct kprobe *p)
420{
421 struct optimized_kprobe *op;
422
423 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
424 if (!kprobe_aggrprobe(p))
425 return kprobe_disabled(p);
426
427 op = container_of(p, struct optimized_kprobe, kp);
428
429 return kprobe_disabled(p) && list_empty(&op->list);
430}
431
432/* Return true(!0) if the probe is queued on (un)optimizing lists */
433static int __kprobes kprobe_queued(struct kprobe *p)
434{
435 struct optimized_kprobe *op;
436
437 if (kprobe_aggrprobe(p)) {
438 op = container_of(p, struct optimized_kprobe, kp);
439 if (!list_empty(&op->list))
440 return 1;
441 }
442 return 0;
443}
444
400/* 445/*
401 * Return an optimized kprobe whose optimizing code replaces 446 * Return an optimized kprobe whose optimizing code replaces
402 * instructions including addr (exclude breakpoint). 447 * instructions including addr (exclude breakpoint).
@@ -422,30 +467,23 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
422 467
423/* Optimization staging list, protected by kprobe_mutex */ 468/* Optimization staging list, protected by kprobe_mutex */
424static LIST_HEAD(optimizing_list); 469static LIST_HEAD(optimizing_list);
470static LIST_HEAD(unoptimizing_list);
425 471
426static void kprobe_optimizer(struct work_struct *work); 472static void kprobe_optimizer(struct work_struct *work);
427static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); 473static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
474static DECLARE_COMPLETION(optimizer_comp);
428#define OPTIMIZE_DELAY 5 475#define OPTIMIZE_DELAY 5
429 476
430/* Kprobe jump optimizer */ 477/*
431static __kprobes void kprobe_optimizer(struct work_struct *work) 478 * Optimize (replace a breakpoint with a jump) kprobes listed on
479 * optimizing_list.
480 */
481static __kprobes void do_optimize_kprobes(void)
432{ 482{
433 struct optimized_kprobe *op, *tmp; 483 /* Optimization never be done when disarmed */
434 484 if (kprobes_all_disarmed || !kprobes_allow_optimization ||
435 /* Lock modules while optimizing kprobes */ 485 list_empty(&optimizing_list))
436 mutex_lock(&module_mutex); 486 return;
437 mutex_lock(&kprobe_mutex);
438 if (kprobes_all_disarmed || !kprobes_allow_optimization)
439 goto end;
440
441 /*
442 * Wait for quiesence period to ensure all running interrupts
443 * are done. Because optprobe may modify multiple instructions
444 * there is a chance that Nth instruction is interrupted. In that
445 * case, running interrupt can return to 2nd-Nth byte of jump
446 * instruction. This wait is for avoiding it.
447 */
448 synchronize_sched();
449 487
450 /* 488 /*
451 * The optimization/unoptimization refers online_cpus via 489 * The optimization/unoptimization refers online_cpus via
@@ -459,17 +497,111 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
459 */ 497 */
460 get_online_cpus(); 498 get_online_cpus();
461 mutex_lock(&text_mutex); 499 mutex_lock(&text_mutex);
462 list_for_each_entry_safe(op, tmp, &optimizing_list, list) { 500 arch_optimize_kprobes(&optimizing_list);
463 WARN_ON(kprobe_disabled(&op->kp)); 501 mutex_unlock(&text_mutex);
464 if (arch_optimize_kprobe(op) < 0) 502 put_online_cpus();
465 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 503}
466 list_del_init(&op->list); 504
505/*
506 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
507 * if need) kprobes listed on unoptimizing_list.
508 */
509static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
510{
511 struct optimized_kprobe *op, *tmp;
512
513 /* Unoptimization must be done anytime */
514 if (list_empty(&unoptimizing_list))
515 return;
516
517 /* Ditto to do_optimize_kprobes */
518 get_online_cpus();
519 mutex_lock(&text_mutex);
520 arch_unoptimize_kprobes(&unoptimizing_list, free_list);
521 /* Loop free_list for disarming */
522 list_for_each_entry_safe(op, tmp, free_list, list) {
523 /* Disarm probes if marked disabled */
524 if (kprobe_disabled(&op->kp))
525 arch_disarm_kprobe(&op->kp);
526 if (kprobe_unused(&op->kp)) {
527 /*
528 * Remove unused probes from hash list. After waiting
529 * for synchronization, these probes are reclaimed.
530 * (reclaiming is done by do_free_cleaned_kprobes.)
531 */
532 hlist_del_rcu(&op->kp.hlist);
533 } else
534 list_del_init(&op->list);
467 } 535 }
468 mutex_unlock(&text_mutex); 536 mutex_unlock(&text_mutex);
469 put_online_cpus(); 537 put_online_cpus();
470end: 538}
539
540/* Reclaim all kprobes on the free_list */
541static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
542{
543 struct optimized_kprobe *op, *tmp;
544
545 list_for_each_entry_safe(op, tmp, free_list, list) {
546 BUG_ON(!kprobe_unused(&op->kp));
547 list_del_init(&op->list);
548 free_aggr_kprobe(&op->kp);
549 }
550}
551
552/* Start optimizer after OPTIMIZE_DELAY passed */
553static __kprobes void kick_kprobe_optimizer(void)
554{
555 if (!delayed_work_pending(&optimizing_work))
556 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
557}
558
559/* Kprobe jump optimizer */
560static __kprobes void kprobe_optimizer(struct work_struct *work)
561{
562 LIST_HEAD(free_list);
563
564 /* Lock modules while optimizing kprobes */
565 mutex_lock(&module_mutex);
566 mutex_lock(&kprobe_mutex);
567
568 /*
569 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
570 * kprobes before waiting for quiesence period.
571 */
572 do_unoptimize_kprobes(&free_list);
573
574 /*
575 * Step 2: Wait for quiesence period to ensure all running interrupts
576 * are done. Because optprobe may modify multiple instructions
577 * there is a chance that Nth instruction is interrupted. In that
578 * case, running interrupt can return to 2nd-Nth byte of jump
579 * instruction. This wait is for avoiding it.
580 */
581 synchronize_sched();
582
583 /* Step 3: Optimize kprobes after quiesence period */
584 do_optimize_kprobes();
585
586 /* Step 4: Free cleaned kprobes after quiesence period */
587 do_free_cleaned_kprobes(&free_list);
588
471 mutex_unlock(&kprobe_mutex); 589 mutex_unlock(&kprobe_mutex);
472 mutex_unlock(&module_mutex); 590 mutex_unlock(&module_mutex);
591
592 /* Step 5: Kick optimizer again if needed */
593 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
594 kick_kprobe_optimizer();
595 else
596 /* Wake up all waiters */
597 complete_all(&optimizer_comp);
598}
599
600/* Wait for completing optimization and unoptimization */
601static __kprobes void wait_for_kprobe_optimizer(void)
602{
603 if (delayed_work_pending(&optimizing_work))
604 wait_for_completion(&optimizer_comp);
473} 605}
474 606
475/* Optimize kprobe if p is ready to be optimized */ 607/* Optimize kprobe if p is ready to be optimized */
@@ -495,42 +627,99 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
495 /* Check if it is already optimized. */ 627 /* Check if it is already optimized. */
496 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) 628 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
497 return; 629 return;
498
499 op->kp.flags |= KPROBE_FLAG_OPTIMIZED; 630 op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
500 list_add(&op->list, &optimizing_list); 631
501 if (!delayed_work_pending(&optimizing_work)) 632 if (!list_empty(&op->list))
502 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 633 /* This is under unoptimizing. Just dequeue the probe */
634 list_del_init(&op->list);
635 else {
636 list_add(&op->list, &optimizing_list);
637 kick_kprobe_optimizer();
638 }
639}
640
641/* Short cut to direct unoptimizing */
642static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
643{
644 get_online_cpus();
645 arch_unoptimize_kprobe(op);
646 put_online_cpus();
647 if (kprobe_disabled(&op->kp))
648 arch_disarm_kprobe(&op->kp);
503} 649}
504 650
505/* Unoptimize a kprobe if p is optimized */ 651/* Unoptimize a kprobe if p is optimized */
506static __kprobes void unoptimize_kprobe(struct kprobe *p) 652static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
507{ 653{
508 struct optimized_kprobe *op; 654 struct optimized_kprobe *op;
509 655
510 if ((p->flags & KPROBE_FLAG_OPTIMIZED) && kprobe_aggrprobe(p)) { 656 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
511 op = container_of(p, struct optimized_kprobe, kp); 657 return; /* This is not an optprobe nor optimized */
512 if (!list_empty(&op->list)) 658
513 /* Dequeue from the optimization queue */ 659 op = container_of(p, struct optimized_kprobe, kp);
660 if (!kprobe_optimized(p)) {
661 /* Unoptimized or unoptimizing case */
662 if (force && !list_empty(&op->list)) {
663 /*
664 * Only if this is unoptimizing kprobe and forced,
665 * forcibly unoptimize it. (No need to unoptimize
666 * unoptimized kprobe again :)
667 */
514 list_del_init(&op->list); 668 list_del_init(&op->list);
515 else 669 force_unoptimize_kprobe(op);
516 /* Replace jump with break */ 670 }
517 arch_unoptimize_kprobe(op); 671 return;
518 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 672 }
673
674 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
675 if (!list_empty(&op->list)) {
676 /* Dequeue from the optimization queue */
677 list_del_init(&op->list);
678 return;
679 }
680 /* Optimized kprobe case */
681 if (force)
682 /* Forcibly update the code: this is a special case */
683 force_unoptimize_kprobe(op);
684 else {
685 list_add(&op->list, &unoptimizing_list);
686 kick_kprobe_optimizer();
519 } 687 }
520} 688}
521 689
690/* Cancel unoptimizing for reusing */
691static void reuse_unused_kprobe(struct kprobe *ap)
692{
693 struct optimized_kprobe *op;
694
695 BUG_ON(!kprobe_unused(ap));
696 /*
697 * Unused kprobe MUST be on the way of delayed unoptimizing (means
698 * there is still a relative jump) and disabled.
699 */
700 op = container_of(ap, struct optimized_kprobe, kp);
701 if (unlikely(list_empty(&op->list)))
702 printk(KERN_WARNING "Warning: found a stray unused "
703 "aggrprobe@%p\n", ap->addr);
704 /* Enable the probe again */
705 ap->flags &= ~KPROBE_FLAG_DISABLED;
706 /* Optimize it again (remove from op->list) */
707 BUG_ON(!kprobe_optready(ap));
708 optimize_kprobe(ap);
709}
710
522/* Remove optimized instructions */ 711/* Remove optimized instructions */
523static void __kprobes kill_optimized_kprobe(struct kprobe *p) 712static void __kprobes kill_optimized_kprobe(struct kprobe *p)
524{ 713{
525 struct optimized_kprobe *op; 714 struct optimized_kprobe *op;
526 715
527 op = container_of(p, struct optimized_kprobe, kp); 716 op = container_of(p, struct optimized_kprobe, kp);
528 if (!list_empty(&op->list)) { 717 if (!list_empty(&op->list))
529 /* Dequeue from the optimization queue */ 718 /* Dequeue from the (un)optimization queue */
530 list_del_init(&op->list); 719 list_del_init(&op->list);
531 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; 720
532 } 721 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
533 /* Don't unoptimize, because the target code will be freed. */ 722 /* Don't touch the code, because it is already freed. */
534 arch_remove_optimized_kprobe(op); 723 arch_remove_optimized_kprobe(op);
535} 724}
536 725
@@ -543,16 +732,6 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
543 arch_prepare_optimized_kprobe(op); 732 arch_prepare_optimized_kprobe(op);
544} 733}
545 734
546/* Free optimized instructions and optimized_kprobe */
547static __kprobes void free_aggr_kprobe(struct kprobe *p)
548{
549 struct optimized_kprobe *op;
550
551 op = container_of(p, struct optimized_kprobe, kp);
552 arch_remove_optimized_kprobe(op);
553 kfree(op);
554}
555
556/* Allocate new optimized_kprobe and try to prepare optimized instructions */ 735/* Allocate new optimized_kprobe and try to prepare optimized instructions */
557static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 736static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
558{ 737{
@@ -587,7 +766,8 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
587 op = container_of(ap, struct optimized_kprobe, kp); 766 op = container_of(ap, struct optimized_kprobe, kp);
588 if (!arch_prepared_optinsn(&op->optinsn)) { 767 if (!arch_prepared_optinsn(&op->optinsn)) {
589 /* If failed to setup optimizing, fallback to kprobe */ 768 /* If failed to setup optimizing, fallback to kprobe */
590 free_aggr_kprobe(ap); 769 arch_remove_optimized_kprobe(op);
770 kfree(op);
591 return; 771 return;
592 } 772 }
593 773
@@ -631,21 +811,16 @@ static void __kprobes unoptimize_all_kprobes(void)
631 return; 811 return;
632 812
633 kprobes_allow_optimization = false; 813 kprobes_allow_optimization = false;
634 printk(KERN_INFO "Kprobes globally unoptimized\n");
635 get_online_cpus(); /* For avoiding text_mutex deadlock */
636 mutex_lock(&text_mutex);
637 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 814 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
638 head = &kprobe_table[i]; 815 head = &kprobe_table[i];
639 hlist_for_each_entry_rcu(p, node, head, hlist) { 816 hlist_for_each_entry_rcu(p, node, head, hlist) {
640 if (!kprobe_disabled(p)) 817 if (!kprobe_disabled(p))
641 unoptimize_kprobe(p); 818 unoptimize_kprobe(p, false);
642 } 819 }
643 } 820 }
644 821 /* Wait for unoptimizing completion */
645 mutex_unlock(&text_mutex); 822 wait_for_kprobe_optimizer();
646 put_online_cpus(); 823 printk(KERN_INFO "Kprobes globally unoptimized\n");
647 /* Allow all currently running kprobes to complete */
648 synchronize_sched();
649} 824}
650 825
651int sysctl_kprobes_optimization; 826int sysctl_kprobes_optimization;
@@ -669,44 +844,60 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
669} 844}
670#endif /* CONFIG_SYSCTL */ 845#endif /* CONFIG_SYSCTL */
671 846
847/* Put a breakpoint for a probe. Must be called with text_mutex locked */
672static void __kprobes __arm_kprobe(struct kprobe *p) 848static void __kprobes __arm_kprobe(struct kprobe *p)
673{ 849{
674 struct kprobe *old_p; 850 struct kprobe *_p;
675 851
676 /* Check collision with other optimized kprobes */ 852 /* Check collision with other optimized kprobes */
677 old_p = get_optimized_kprobe((unsigned long)p->addr); 853 _p = get_optimized_kprobe((unsigned long)p->addr);
678 if (unlikely(old_p)) 854 if (unlikely(_p))
679 unoptimize_kprobe(old_p); /* Fallback to unoptimized kprobe */ 855 /* Fallback to unoptimized kprobe */
856 unoptimize_kprobe(_p, true);
680 857
681 arch_arm_kprobe(p); 858 arch_arm_kprobe(p);
682 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ 859 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
683} 860}
684 861
685static void __kprobes __disarm_kprobe(struct kprobe *p) 862/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
863static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
686{ 864{
687 struct kprobe *old_p; 865 struct kprobe *_p;
688 866
689 unoptimize_kprobe(p); /* Try to unoptimize */ 867 unoptimize_kprobe(p, false); /* Try to unoptimize */
690 arch_disarm_kprobe(p);
691 868
692 /* If another kprobe was blocked, optimize it. */ 869 if (!kprobe_queued(p)) {
693 old_p = get_optimized_kprobe((unsigned long)p->addr); 870 arch_disarm_kprobe(p);
694 if (unlikely(old_p)) 871 /* If another kprobe was blocked, optimize it. */
695 optimize_kprobe(old_p); 872 _p = get_optimized_kprobe((unsigned long)p->addr);
873 if (unlikely(_p) && reopt)
874 optimize_kprobe(_p);
875 }
876 /* TODO: reoptimize others after unoptimized this probe */
696} 877}
697 878
698#else /* !CONFIG_OPTPROBES */ 879#else /* !CONFIG_OPTPROBES */
699 880
700#define optimize_kprobe(p) do {} while (0) 881#define optimize_kprobe(p) do {} while (0)
701#define unoptimize_kprobe(p) do {} while (0) 882#define unoptimize_kprobe(p, f) do {} while (0)
702#define kill_optimized_kprobe(p) do {} while (0) 883#define kill_optimized_kprobe(p) do {} while (0)
703#define prepare_optimized_kprobe(p) do {} while (0) 884#define prepare_optimized_kprobe(p) do {} while (0)
704#define try_to_optimize_kprobe(p) do {} while (0) 885#define try_to_optimize_kprobe(p) do {} while (0)
705#define __arm_kprobe(p) arch_arm_kprobe(p) 886#define __arm_kprobe(p) arch_arm_kprobe(p)
706#define __disarm_kprobe(p) arch_disarm_kprobe(p) 887#define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
888#define kprobe_disarmed(p) kprobe_disabled(p)
889#define wait_for_kprobe_optimizer() do {} while (0)
890
891/* There should be no unused kprobes can be reused without optimization */
892static void reuse_unused_kprobe(struct kprobe *ap)
893{
894 printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
895 BUG_ON(kprobe_unused(ap));
896}
707 897
708static __kprobes void free_aggr_kprobe(struct kprobe *p) 898static __kprobes void free_aggr_kprobe(struct kprobe *p)
709{ 899{
900 arch_remove_kprobe(p);
710 kfree(p); 901 kfree(p);
711} 902}
712 903
@@ -732,11 +923,10 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
732/* Disarm a kprobe with text_mutex */ 923/* Disarm a kprobe with text_mutex */
733static void __kprobes disarm_kprobe(struct kprobe *kp) 924static void __kprobes disarm_kprobe(struct kprobe *kp)
734{ 925{
735 get_online_cpus(); /* For avoiding text_mutex deadlock */ 926 /* Ditto */
736 mutex_lock(&text_mutex); 927 mutex_lock(&text_mutex);
737 __disarm_kprobe(kp); 928 __disarm_kprobe(kp, true);
738 mutex_unlock(&text_mutex); 929 mutex_unlock(&text_mutex);
739 put_online_cpus();
740} 930}
741 931
742/* 932/*
@@ -942,7 +1132,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
942 BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); 1132 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
943 1133
944 if (p->break_handler || p->post_handler) 1134 if (p->break_handler || p->post_handler)
945 unoptimize_kprobe(ap); /* Fall back to normal kprobe */ 1135 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */
946 1136
947 if (p->break_handler) { 1137 if (p->break_handler) {
948 if (ap->break_handler) 1138 if (ap->break_handler)
@@ -993,19 +1183,21 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
993 * This is the second or subsequent kprobe at the address - handle 1183 * This is the second or subsequent kprobe at the address - handle
994 * the intricacies 1184 * the intricacies
995 */ 1185 */
996static int __kprobes register_aggr_kprobe(struct kprobe *old_p, 1186static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
997 struct kprobe *p) 1187 struct kprobe *p)
998{ 1188{
999 int ret = 0; 1189 int ret = 0;
1000 struct kprobe *ap = old_p; 1190 struct kprobe *ap = orig_p;
1001 1191
1002 if (!kprobe_aggrprobe(old_p)) { 1192 if (!kprobe_aggrprobe(orig_p)) {
1003 /* If old_p is not an aggr_kprobe, create new aggr_kprobe. */ 1193 /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
1004 ap = alloc_aggr_kprobe(old_p); 1194 ap = alloc_aggr_kprobe(orig_p);
1005 if (!ap) 1195 if (!ap)
1006 return -ENOMEM; 1196 return -ENOMEM;
1007 init_aggr_kprobe(ap, old_p); 1197 init_aggr_kprobe(ap, orig_p);
1008 } 1198 } else if (kprobe_unused(ap))
1199 /* This probe is going to die. Rescue it */
1200 reuse_unused_kprobe(ap);
1009 1201
1010 if (kprobe_gone(ap)) { 1202 if (kprobe_gone(ap)) {
1011 /* 1203 /*
@@ -1039,23 +1231,6 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
1039 return add_new_kprobe(ap, p); 1231 return add_new_kprobe(ap, p);
1040} 1232}
1041 1233
1042/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
1043static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
1044{
1045 struct kprobe *kp;
1046
1047 list_for_each_entry_rcu(kp, &p->list, list) {
1048 if (!kprobe_disabled(kp))
1049 /*
1050 * There is an active probe on the list.
1051 * We can't disable aggr_kprobe.
1052 */
1053 return 0;
1054 }
1055 p->flags |= KPROBE_FLAG_DISABLED;
1056 return 1;
1057}
1058
1059static int __kprobes in_kprobes_functions(unsigned long addr) 1234static int __kprobes in_kprobes_functions(unsigned long addr)
1060{ 1235{
1061 struct kprobe_blackpoint *kb; 1236 struct kprobe_blackpoint *kb;
@@ -1098,34 +1273,33 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
1098/* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1273/* Check passed kprobe is valid and return kprobe in kprobe_table. */
1099static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) 1274static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
1100{ 1275{
1101 struct kprobe *old_p, *list_p; 1276 struct kprobe *ap, *list_p;
1102 1277
1103 old_p = get_kprobe(p->addr); 1278 ap = get_kprobe(p->addr);
1104 if (unlikely(!old_p)) 1279 if (unlikely(!ap))
1105 return NULL; 1280 return NULL;
1106 1281
1107 if (p != old_p) { 1282 if (p != ap) {
1108 list_for_each_entry_rcu(list_p, &old_p->list, list) 1283 list_for_each_entry_rcu(list_p, &ap->list, list)
1109 if (list_p == p) 1284 if (list_p == p)
1110 /* kprobe p is a valid probe */ 1285 /* kprobe p is a valid probe */
1111 goto valid; 1286 goto valid;
1112 return NULL; 1287 return NULL;
1113 } 1288 }
1114valid: 1289valid:
1115 return old_p; 1290 return ap;
1116} 1291}
1117 1292
1118/* Return error if the kprobe is being re-registered */ 1293/* Return error if the kprobe is being re-registered */
1119static inline int check_kprobe_rereg(struct kprobe *p) 1294static inline int check_kprobe_rereg(struct kprobe *p)
1120{ 1295{
1121 int ret = 0; 1296 int ret = 0;
1122 struct kprobe *old_p;
1123 1297
1124 mutex_lock(&kprobe_mutex); 1298 mutex_lock(&kprobe_mutex);
1125 old_p = __get_valid_kprobe(p); 1299 if (__get_valid_kprobe(p))
1126 if (old_p)
1127 ret = -EINVAL; 1300 ret = -EINVAL;
1128 mutex_unlock(&kprobe_mutex); 1301 mutex_unlock(&kprobe_mutex);
1302
1129 return ret; 1303 return ret;
1130} 1304}
1131 1305
@@ -1229,67 +1403,121 @@ fail_with_jump_label:
1229} 1403}
1230EXPORT_SYMBOL_GPL(register_kprobe); 1404EXPORT_SYMBOL_GPL(register_kprobe);
1231 1405
1406/* Check if all probes on the aggrprobe are disabled */
1407static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
1408{
1409 struct kprobe *kp;
1410
1411 list_for_each_entry_rcu(kp, &ap->list, list)
1412 if (!kprobe_disabled(kp))
1413 /*
1414 * There is an active probe on the list.
1415 * We can't disable this ap.
1416 */
1417 return 0;
1418
1419 return 1;
1420}
1421
1422/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
1423static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
1424{
1425 struct kprobe *orig_p;
1426
1427 /* Get an original kprobe for return */
1428 orig_p = __get_valid_kprobe(p);
1429 if (unlikely(orig_p == NULL))
1430 return NULL;
1431
1432 if (!kprobe_disabled(p)) {
1433 /* Disable probe if it is a child probe */
1434 if (p != orig_p)
1435 p->flags |= KPROBE_FLAG_DISABLED;
1436
1437 /* Try to disarm and disable this/parent probe */
1438 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1439 disarm_kprobe(orig_p);
1440 orig_p->flags |= KPROBE_FLAG_DISABLED;
1441 }
1442 }
1443
1444 return orig_p;
1445}
1446
1232/* 1447/*
1233 * Unregister a kprobe without a scheduler synchronization. 1448 * Unregister a kprobe without a scheduler synchronization.
1234 */ 1449 */
1235static int __kprobes __unregister_kprobe_top(struct kprobe *p) 1450static int __kprobes __unregister_kprobe_top(struct kprobe *p)
1236{ 1451{
1237 struct kprobe *old_p, *list_p; 1452 struct kprobe *ap, *list_p;
1238 1453
1239 old_p = __get_valid_kprobe(p); 1454 /* Disable kprobe. This will disarm it if needed. */
1240 if (old_p == NULL) 1455 ap = __disable_kprobe(p);
1456 if (ap == NULL)
1241 return -EINVAL; 1457 return -EINVAL;
1242 1458
1243 if (old_p == p || 1459 if (ap == p)
1244 (kprobe_aggrprobe(old_p) &&
1245 list_is_singular(&old_p->list))) {
1246 /* 1460 /*
1247 * Only probe on the hash list. Disarm only if kprobes are 1461 * This probe is an independent(and non-optimized) kprobe
1248 * enabled and not gone - otherwise, the breakpoint would 1462 * (not an aggrprobe). Remove from the hash list.
1249 * already have been removed. We save on flushing icache.
1250 */ 1463 */
1251 if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) 1464 goto disarmed;
1252 disarm_kprobe(old_p); 1465
1253 hlist_del_rcu(&old_p->hlist); 1466 /* Following process expects this probe is an aggrprobe */
1254 } else { 1467 WARN_ON(!kprobe_aggrprobe(ap));
1468
1469 if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
1470 /*
1471 * !disarmed could be happen if the probe is under delayed
1472 * unoptimizing.
1473 */
1474 goto disarmed;
1475 else {
1476 /* If disabling probe has special handlers, update aggrprobe */
1255 if (p->break_handler && !kprobe_gone(p)) 1477 if (p->break_handler && !kprobe_gone(p))
1256 old_p->break_handler = NULL; 1478 ap->break_handler = NULL;
1257 if (p->post_handler && !kprobe_gone(p)) { 1479 if (p->post_handler && !kprobe_gone(p)) {
1258 list_for_each_entry_rcu(list_p, &old_p->list, list) { 1480 list_for_each_entry_rcu(list_p, &ap->list, list) {
1259 if ((list_p != p) && (list_p->post_handler)) 1481 if ((list_p != p) && (list_p->post_handler))
1260 goto noclean; 1482 goto noclean;
1261 } 1483 }
1262 old_p->post_handler = NULL; 1484 ap->post_handler = NULL;
1263 } 1485 }
1264noclean: 1486noclean:
1487 /*
1488 * Remove from the aggrprobe: this path will do nothing in
1489 * __unregister_kprobe_bottom().
1490 */
1265 list_del_rcu(&p->list); 1491 list_del_rcu(&p->list);
1266 if (!kprobe_disabled(old_p)) { 1492 if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1267 try_to_disable_aggr_kprobe(old_p); 1493 /*
1268 if (!kprobes_all_disarmed) { 1494 * Try to optimize this probe again, because post
1269 if (kprobe_disabled(old_p)) 1495 * handler may have been changed.
1270 disarm_kprobe(old_p); 1496 */
1271 else 1497 optimize_kprobe(ap);
1272 /* Try to optimize this probe again */
1273 optimize_kprobe(old_p);
1274 }
1275 }
1276 } 1498 }
1277 return 0; 1499 return 0;
1500
1501disarmed:
1502 BUG_ON(!kprobe_disarmed(ap));
1503 hlist_del_rcu(&ap->hlist);
1504 return 0;
1278} 1505}
1279 1506
1280static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 1507static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
1281{ 1508{
1282 struct kprobe *old_p; 1509 struct kprobe *ap;
1283 1510
1284 if (list_empty(&p->list)) 1511 if (list_empty(&p->list))
1512 /* This is an independent kprobe */
1285 arch_remove_kprobe(p); 1513 arch_remove_kprobe(p);
1286 else if (list_is_singular(&p->list)) { 1514 else if (list_is_singular(&p->list)) {
1287 /* "p" is the last child of an aggr_kprobe */ 1515 /* This is the last child of an aggrprobe */
1288 old_p = list_entry(p->list.next, struct kprobe, list); 1516 ap = list_entry(p->list.next, struct kprobe, list);
1289 list_del(&p->list); 1517 list_del(&p->list);
1290 arch_remove_kprobe(old_p); 1518 free_aggr_kprobe(ap);
1291 free_aggr_kprobe(old_p);
1292 } 1519 }
1520 /* Otherwise, do nothing. */
1293} 1521}
1294 1522
1295int __kprobes register_kprobes(struct kprobe **kps, int num) 1523int __kprobes register_kprobes(struct kprobe **kps, int num)
@@ -1607,29 +1835,13 @@ static void __kprobes kill_kprobe(struct kprobe *p)
1607int __kprobes disable_kprobe(struct kprobe *kp) 1835int __kprobes disable_kprobe(struct kprobe *kp)
1608{ 1836{
1609 int ret = 0; 1837 int ret = 0;
1610 struct kprobe *p;
1611 1838
1612 mutex_lock(&kprobe_mutex); 1839 mutex_lock(&kprobe_mutex);
1613 1840
1614 /* Check whether specified probe is valid. */ 1841 /* Disable this kprobe */
1615 p = __get_valid_kprobe(kp); 1842 if (__disable_kprobe(kp) == NULL)
1616 if (unlikely(p == NULL)) {
1617 ret = -EINVAL; 1843 ret = -EINVAL;
1618 goto out;
1619 }
1620 1844
1621 /* If the probe is already disabled (or gone), just return */
1622 if (kprobe_disabled(kp))
1623 goto out;
1624
1625 kp->flags |= KPROBE_FLAG_DISABLED;
1626 if (p != kp)
1627 /* When kp != p, p is always enabled. */
1628 try_to_disable_aggr_kprobe(p);
1629
1630 if (!kprobes_all_disarmed && kprobe_disabled(p))
1631 disarm_kprobe(p);
1632out:
1633 mutex_unlock(&kprobe_mutex); 1845 mutex_unlock(&kprobe_mutex);
1634 return ret; 1846 return ret;
1635} 1847}
@@ -1927,36 +2139,27 @@ static void __kprobes disarm_all_kprobes(void)
1927 mutex_lock(&kprobe_mutex); 2139 mutex_lock(&kprobe_mutex);
1928 2140
1929 /* If kprobes are already disarmed, just return */ 2141 /* If kprobes are already disarmed, just return */
1930 if (kprobes_all_disarmed) 2142 if (kprobes_all_disarmed) {
1931 goto already_disabled; 2143 mutex_unlock(&kprobe_mutex);
2144 return;
2145 }
1932 2146
1933 kprobes_all_disarmed = true; 2147 kprobes_all_disarmed = true;
1934 printk(KERN_INFO "Kprobes globally disabled\n"); 2148 printk(KERN_INFO "Kprobes globally disabled\n");
1935 2149
1936 /*
1937 * Here we call get_online_cpus() for avoiding text_mutex deadlock,
1938 * because disarming may also unoptimize kprobes.
1939 */
1940 get_online_cpus();
1941 mutex_lock(&text_mutex); 2150 mutex_lock(&text_mutex);
1942 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 2151 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1943 head = &kprobe_table[i]; 2152 head = &kprobe_table[i];
1944 hlist_for_each_entry_rcu(p, node, head, hlist) { 2153 hlist_for_each_entry_rcu(p, node, head, hlist) {
1945 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) 2154 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1946 __disarm_kprobe(p); 2155 __disarm_kprobe(p, false);
1947 } 2156 }
1948 } 2157 }
1949
1950 mutex_unlock(&text_mutex); 2158 mutex_unlock(&text_mutex);
1951 put_online_cpus();
1952 mutex_unlock(&kprobe_mutex); 2159 mutex_unlock(&kprobe_mutex);
1953 /* Allow all currently running kprobes to complete */
1954 synchronize_sched();
1955 return;
1956 2160
1957already_disabled: 2161 /* Wait for disarming all kprobes by optimizer */
1958 mutex_unlock(&kprobe_mutex); 2162 wait_for_kprobe_optimizer();
1959 return;
1960} 2163}
1961 2164
1962/* 2165/*
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index eac7e3364335..aede71245e9f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
312 ctx->nr_stat++; 312 ctx->nr_stat++;
313} 313}
314 314
315/*
316 * Called at perf_event creation and when events are attached/detached from a
317 * group.
318 */
319static void perf_event__read_size(struct perf_event *event)
320{
321 int entry = sizeof(u64); /* value */
322 int size = 0;
323 int nr = 1;
324
325 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
326 size += sizeof(u64);
327
328 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
329 size += sizeof(u64);
330
331 if (event->attr.read_format & PERF_FORMAT_ID)
332 entry += sizeof(u64);
333
334 if (event->attr.read_format & PERF_FORMAT_GROUP) {
335 nr += event->group_leader->nr_siblings;
336 size += sizeof(u64);
337 }
338
339 size += entry * nr;
340 event->read_size = size;
341}
342
343static void perf_event__header_size(struct perf_event *event)
344{
345 struct perf_sample_data *data;
346 u64 sample_type = event->attr.sample_type;
347 u16 size = 0;
348
349 perf_event__read_size(event);
350
351 if (sample_type & PERF_SAMPLE_IP)
352 size += sizeof(data->ip);
353
354 if (sample_type & PERF_SAMPLE_TID)
355 size += sizeof(data->tid_entry);
356
357 if (sample_type & PERF_SAMPLE_TIME)
358 size += sizeof(data->time);
359
360 if (sample_type & PERF_SAMPLE_ADDR)
361 size += sizeof(data->addr);
362
363 if (sample_type & PERF_SAMPLE_ID)
364 size += sizeof(data->id);
365
366 if (sample_type & PERF_SAMPLE_STREAM_ID)
367 size += sizeof(data->stream_id);
368
369 if (sample_type & PERF_SAMPLE_CPU)
370 size += sizeof(data->cpu_entry);
371
372 if (sample_type & PERF_SAMPLE_PERIOD)
373 size += sizeof(data->period);
374
375 if (sample_type & PERF_SAMPLE_READ)
376 size += event->read_size;
377
378 event->header_size = size;
379}
380
315static void perf_group_attach(struct perf_event *event) 381static void perf_group_attach(struct perf_event *event)
316{ 382{
317 struct perf_event *group_leader = event->group_leader; 383 struct perf_event *group_leader = event->group_leader, *pos;
318 384
319 /* 385 /*
320 * We can have double attach due to group movement in perf_event_open. 386 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event)
333 399
334 list_add_tail(&event->group_entry, &group_leader->sibling_list); 400 list_add_tail(&event->group_entry, &group_leader->sibling_list);
335 group_leader->nr_siblings++; 401 group_leader->nr_siblings++;
402
403 perf_event__header_size(group_leader);
404
405 list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
406 perf_event__header_size(pos);
336} 407}
337 408
338/* 409/*
@@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event)
391 if (event->group_leader != event) { 462 if (event->group_leader != event) {
392 list_del_init(&event->group_entry); 463 list_del_init(&event->group_entry);
393 event->group_leader->nr_siblings--; 464 event->group_leader->nr_siblings--;
394 return; 465 goto out;
395 } 466 }
396 467
397 if (!list_empty(&event->group_entry)) 468 if (!list_empty(&event->group_entry))
@@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event)
410 /* Inherit group flags from the previous leader */ 481 /* Inherit group flags from the previous leader */
411 sibling->group_flags = event->group_flags; 482 sibling->group_flags = event->group_flags;
412 } 483 }
484
485out:
486 perf_event__header_size(event->group_leader);
487
488 list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
489 perf_event__header_size(tmp);
413} 490}
414 491
415static inline int 492static inline int
@@ -1073,7 +1150,7 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1073 /* 1150 /*
1074 * not supported on inherited events 1151 * not supported on inherited events
1075 */ 1152 */
1076 if (event->attr.inherit) 1153 if (event->attr.inherit || !is_sampling_event(event))
1077 return -EINVAL; 1154 return -EINVAL;
1078 1155
1079 atomic_add(refresh, &event->event_limit); 1156 atomic_add(refresh, &event->event_limit);
@@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file)
2289 return perf_event_release_kernel(event); 2366 return perf_event_release_kernel(event);
2290} 2367}
2291 2368
2292static int perf_event_read_size(struct perf_event *event)
2293{
2294 int entry = sizeof(u64); /* value */
2295 int size = 0;
2296 int nr = 1;
2297
2298 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2299 size += sizeof(u64);
2300
2301 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2302 size += sizeof(u64);
2303
2304 if (event->attr.read_format & PERF_FORMAT_ID)
2305 entry += sizeof(u64);
2306
2307 if (event->attr.read_format & PERF_FORMAT_GROUP) {
2308 nr += event->group_leader->nr_siblings;
2309 size += sizeof(u64);
2310 }
2311
2312 size += entry * nr;
2313
2314 return size;
2315}
2316
2317u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 2369u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
2318{ 2370{
2319 struct perf_event *child; 2371 struct perf_event *child;
@@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
2428 if (event->state == PERF_EVENT_STATE_ERROR) 2480 if (event->state == PERF_EVENT_STATE_ERROR)
2429 return 0; 2481 return 0;
2430 2482
2431 if (count < perf_event_read_size(event)) 2483 if (count < event->read_size)
2432 return -ENOSPC; 2484 return -ENOSPC;
2433 2485
2434 WARN_ON_ONCE(event->ctx->parent_ctx); 2486 WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -2514,7 +2566,7 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
2514 int ret = 0; 2566 int ret = 0;
2515 u64 value; 2567 u64 value;
2516 2568
2517 if (!event->attr.sample_period) 2569 if (!is_sampling_event(event))
2518 return -EINVAL; 2570 return -EINVAL;
2519 2571
2520 if (copy_from_user(&value, arg, sizeof(value))) 2572 if (copy_from_user(&value, arg, sizeof(value)))
@@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header,
3606 data->type = sample_type; 3658 data->type = sample_type;
3607 3659
3608 header->type = PERF_RECORD_SAMPLE; 3660 header->type = PERF_RECORD_SAMPLE;
3609 header->size = sizeof(*header); 3661 header->size = sizeof(*header) + event->header_size;
3610 3662
3611 header->misc = 0; 3663 header->misc = 0;
3612 header->misc |= perf_misc_flags(regs); 3664 header->misc |= perf_misc_flags(regs);
3613 3665
3614 if (sample_type & PERF_SAMPLE_IP) { 3666 if (sample_type & PERF_SAMPLE_IP)
3615 data->ip = perf_instruction_pointer(regs); 3667 data->ip = perf_instruction_pointer(regs);
3616 3668
3617 header->size += sizeof(data->ip);
3618 }
3619
3620 if (sample_type & PERF_SAMPLE_TID) { 3669 if (sample_type & PERF_SAMPLE_TID) {
3621 /* namespace issues */ 3670 /* namespace issues */
3622 data->tid_entry.pid = perf_event_pid(event, current); 3671 data->tid_entry.pid = perf_event_pid(event, current);
3623 data->tid_entry.tid = perf_event_tid(event, current); 3672 data->tid_entry.tid = perf_event_tid(event, current);
3624
3625 header->size += sizeof(data->tid_entry);
3626 } 3673 }
3627 3674
3628 if (sample_type & PERF_SAMPLE_TIME) { 3675 if (sample_type & PERF_SAMPLE_TIME)
3629 data->time = perf_clock(); 3676 data->time = perf_clock();
3630 3677
3631 header->size += sizeof(data->time); 3678 if (sample_type & PERF_SAMPLE_ID)
3632 }
3633
3634 if (sample_type & PERF_SAMPLE_ADDR)
3635 header->size += sizeof(data->addr);
3636
3637 if (sample_type & PERF_SAMPLE_ID) {
3638 data->id = primary_event_id(event); 3679 data->id = primary_event_id(event);
3639 3680
3640 header->size += sizeof(data->id); 3681 if (sample_type & PERF_SAMPLE_STREAM_ID)
3641 }
3642
3643 if (sample_type & PERF_SAMPLE_STREAM_ID) {
3644 data->stream_id = event->id; 3682 data->stream_id = event->id;
3645 3683
3646 header->size += sizeof(data->stream_id);
3647 }
3648
3649 if (sample_type & PERF_SAMPLE_CPU) { 3684 if (sample_type & PERF_SAMPLE_CPU) {
3650 data->cpu_entry.cpu = raw_smp_processor_id(); 3685 data->cpu_entry.cpu = raw_smp_processor_id();
3651 data->cpu_entry.reserved = 0; 3686 data->cpu_entry.reserved = 0;
3652
3653 header->size += sizeof(data->cpu_entry);
3654 } 3687 }
3655 3688
3656 if (sample_type & PERF_SAMPLE_PERIOD)
3657 header->size += sizeof(data->period);
3658
3659 if (sample_type & PERF_SAMPLE_READ)
3660 header->size += perf_event_read_size(event);
3661
3662 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 3689 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
3663 int size = 1; 3690 int size = 1;
3664 3691
@@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event,
3726 .header = { 3753 .header = {
3727 .type = PERF_RECORD_READ, 3754 .type = PERF_RECORD_READ,
3728 .misc = 0, 3755 .misc = 0,
3729 .size = sizeof(read_event) + perf_event_read_size(event), 3756 .size = sizeof(read_event) + event->read_size,
3730 }, 3757 },
3731 .pid = perf_event_pid(event, task), 3758 .pid = perf_event_pid(event, task),
3732 .tid = perf_event_tid(event, task), 3759 .tid = perf_event_tid(event, task),
@@ -4240,6 +4267,13 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
4240 struct hw_perf_event *hwc = &event->hw; 4267 struct hw_perf_event *hwc = &event->hw;
4241 int ret = 0; 4268 int ret = 0;
4242 4269
4270 /*
4271 * Non-sampling counters might still use the PMI to fold short
4272 * hardware counters, ignore those.
4273 */
4274 if (unlikely(!is_sampling_event(event)))
4275 return 0;
4276
4243 if (!throttle) { 4277 if (!throttle) {
4244 hwc->interrupts++; 4278 hwc->interrupts++;
4245 } else { 4279 } else {
@@ -4385,7 +4419,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
4385 if (!regs) 4419 if (!regs)
4386 return; 4420 return;
4387 4421
4388 if (!hwc->sample_period) 4422 if (!is_sampling_event(event))
4389 return; 4423 return;
4390 4424
4391 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4425 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
@@ -4548,7 +4582,7 @@ static int perf_swevent_add(struct perf_event *event, int flags)
4548 struct hw_perf_event *hwc = &event->hw; 4582 struct hw_perf_event *hwc = &event->hw;
4549 struct hlist_head *head; 4583 struct hlist_head *head;
4550 4584
4551 if (hwc->sample_period) { 4585 if (is_sampling_event(event)) {
4552 hwc->last_period = hwc->sample_period; 4586 hwc->last_period = hwc->sample_period;
4553 perf_swevent_set_period(event); 4587 perf_swevent_set_period(event);
4554 } 4588 }
@@ -4805,15 +4839,6 @@ static int perf_tp_event_init(struct perf_event *event)
4805 if (event->attr.type != PERF_TYPE_TRACEPOINT) 4839 if (event->attr.type != PERF_TYPE_TRACEPOINT)
4806 return -ENOENT; 4840 return -ENOENT;
4807 4841
4808 /*
4809 * Raw tracepoint data is a severe data leak, only allow root to
4810 * have these.
4811 */
4812 if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
4813 perf_paranoid_tracepoint_raw() &&
4814 !capable(CAP_SYS_ADMIN))
4815 return -EPERM;
4816
4817 err = perf_trace_init(event); 4842 err = perf_trace_init(event);
4818 if (err) 4843 if (err)
4819 return err; 4844 return err;
@@ -4926,31 +4951,33 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4926static void perf_swevent_start_hrtimer(struct perf_event *event) 4951static void perf_swevent_start_hrtimer(struct perf_event *event)
4927{ 4952{
4928 struct hw_perf_event *hwc = &event->hw; 4953 struct hw_perf_event *hwc = &event->hw;
4954 s64 period;
4955
4956 if (!is_sampling_event(event))
4957 return;
4929 4958
4930 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 4959 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
4931 hwc->hrtimer.function = perf_swevent_hrtimer; 4960 hwc->hrtimer.function = perf_swevent_hrtimer;
4932 if (hwc->sample_period) {
4933 s64 period = local64_read(&hwc->period_left);
4934 4961
4935 if (period) { 4962 period = local64_read(&hwc->period_left);
4936 if (period < 0) 4963 if (period) {
4937 period = 10000; 4964 if (period < 0)
4965 period = 10000;
4938 4966
4939 local64_set(&hwc->period_left, 0); 4967 local64_set(&hwc->period_left, 0);
4940 } else { 4968 } else {
4941 period = max_t(u64, 10000, hwc->sample_period); 4969 period = max_t(u64, 10000, hwc->sample_period);
4942 } 4970 }
4943 __hrtimer_start_range_ns(&hwc->hrtimer, 4971 __hrtimer_start_range_ns(&hwc->hrtimer,
4944 ns_to_ktime(period), 0, 4972 ns_to_ktime(period), 0,
4945 HRTIMER_MODE_REL_PINNED, 0); 4973 HRTIMER_MODE_REL_PINNED, 0);
4946 }
4947} 4974}
4948 4975
4949static void perf_swevent_cancel_hrtimer(struct perf_event *event) 4976static void perf_swevent_cancel_hrtimer(struct perf_event *event)
4950{ 4977{
4951 struct hw_perf_event *hwc = &event->hw; 4978 struct hw_perf_event *hwc = &event->hw;
4952 4979
4953 if (hwc->sample_period) { 4980 if (is_sampling_event(event)) {
4954 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); 4981 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
4955 local64_set(&hwc->period_left, ktime_to_ns(remaining)); 4982 local64_set(&hwc->period_left, ktime_to_ns(remaining));
4956 4983
@@ -5715,6 +5742,11 @@ SYSCALL_DEFINE5(perf_event_open,
5715 mutex_unlock(&current->perf_event_mutex); 5742 mutex_unlock(&current->perf_event_mutex);
5716 5743
5717 /* 5744 /*
5745 * Precalculate sample_data sizes
5746 */
5747 perf_event__header_size(event);
5748
5749 /*
5718 * Drop the reference on the group_event after placing the 5750 * Drop the reference on the group_event after placing the
5719 * new event on the sibling_list. This ensures destruction 5751 * new event on the sibling_list. This ensures destruction
5720 * of the group leader will find the pointer to itself in 5752 * of the group leader will find the pointer to itself in
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5abfa1518554..cbd97da7a613 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -746,22 +746,6 @@ static struct ctl_table kern_table[] = {
746 .extra2 = &one, 746 .extra2 = &one,
747 }, 747 },
748#endif 748#endif
749#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
750 {
751 .procname = "unknown_nmi_panic",
752 .data = &unknown_nmi_panic,
753 .maxlen = sizeof (int),
754 .mode = 0644,
755 .proc_handler = proc_dointvec,
756 },
757 {
758 .procname = "nmi_watchdog",
759 .data = &nmi_watchdog_enabled,
760 .maxlen = sizeof (int),
761 .mode = 0644,
762 .proc_handler = proc_nmi_enabled,
763 },
764#endif
765#if defined(CONFIG_X86) 749#if defined(CONFIG_X86)
766 { 750 {
767 .procname = "panic_on_unrecovered_nmi", 751 .procname = "panic_on_unrecovered_nmi",
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 39c059ca670e..19a359d5e6d5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -21,17 +21,46 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
21/* Count the events in use (per event id, not per instance) */ 21/* Count the events in use (per event id, not per instance) */
22static int total_ref_count; 22static int total_ref_count;
23 23
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
24static int perf_trace_event_init(struct ftrace_event_call *tp_event, 47static int perf_trace_event_init(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event) 48 struct perf_event *p_event)
26{ 49{
27 struct hlist_head __percpu *list; 50 struct hlist_head __percpu *list;
28 int ret = -ENOMEM; 51 int ret;
29 int cpu; 52 int cpu;
30 53
54 ret = perf_trace_event_perm(tp_event, p_event);
55 if (ret)
56 return ret;
57
31 p_event->tp_event = tp_event; 58 p_event->tp_event = tp_event;
32 if (tp_event->perf_refcount++ > 0) 59 if (tp_event->perf_refcount++ > 0)
33 return 0; 60 return 0;
34 61
62 ret = -ENOMEM;
63
35 list = alloc_percpu(struct hlist_head); 64 list = alloc_percpu(struct hlist_head);
36 if (!list) 65 if (!list)
37 goto fail; 66 goto fail;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e3c41a4024c..cad4e42060a9 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -547,13 +547,13 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
547 .notifier_call = cpu_callback 547 .notifier_call = cpu_callback
548}; 548};
549 549
550static int __init spawn_watchdog_task(void) 550void __init lockup_detector_init(void)
551{ 551{
552 void *cpu = (void *)(long)smp_processor_id(); 552 void *cpu = (void *)(long)smp_processor_id();
553 int err; 553 int err;
554 554
555 if (no_watchdog) 555 if (no_watchdog)
556 return 0; 556 return;
557 557
558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 558 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
559 WARN_ON(notifier_to_errno(err)); 559 WARN_ON(notifier_to_errno(err));
@@ -561,6 +561,5 @@ static int __init spawn_watchdog_task(void)
561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 561 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
562 register_cpu_notifier(&cpu_nfb); 562 register_cpu_notifier(&cpu_nfb);
563 563
564 return 0; 564 return;
565} 565}
566early_initcall(spawn_watchdog_task);