diff options
Diffstat (limited to 'kernel')
58 files changed, 1553 insertions, 619 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index bab1dffe37e9..42423665660a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o | |||
| 74 | obj-$(CONFIG_KPROBES) += kprobes.o | 74 | obj-$(CONFIG_KPROBES) += kprobes.o |
| 75 | obj-$(CONFIG_KGDB) += kgdb.o | 75 | obj-$(CONFIG_KGDB) += kgdb.o |
| 76 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o | 76 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o |
| 77 | obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o | ||
| 77 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | 78 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ |
| 78 | obj-$(CONFIG_SECCOMP) += seccomp.o | 79 | obj-$(CONFIG_SECCOMP) += seccomp.o |
| 79 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 80 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 917ab9525568..6e7351739a82 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
| @@ -734,9 +734,6 @@ int audit_tag_tree(char *old, char *new) | |||
| 734 | dentry = dget(path.dentry); | 734 | dentry = dget(path.dentry); |
| 735 | path_put(&path); | 735 | path_put(&path); |
| 736 | 736 | ||
| 737 | if (dentry == tagged->mnt_root && dentry == mnt->mnt_root) | ||
| 738 | follow_up(&mnt, &dentry); | ||
| 739 | |||
| 740 | list_add_tail(&list, &tagged->mnt_list); | 737 | list_add_tail(&list, &tagged->mnt_list); |
| 741 | 738 | ||
| 742 | mutex_lock(&audit_filter_mutex); | 739 | mutex_lock(&audit_filter_mutex); |
diff --git a/kernel/exit.c b/kernel/exit.c index 6686ed1e4aa3..abf9cf3b95c6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -837,8 +837,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
| 837 | */ | 837 | */ |
| 838 | if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && | 838 | if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && |
| 839 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || | 839 | (tsk->parent_exec_id != tsk->real_parent->self_exec_id || |
| 840 | tsk->self_exec_id != tsk->parent_exec_id) && | 840 | tsk->self_exec_id != tsk->parent_exec_id)) |
| 841 | !capable(CAP_KILL)) | ||
| 842 | tsk->exit_signal = SIGCHLD; | 841 | tsk->exit_signal = SIGCHLD; |
| 843 | 842 | ||
| 844 | signal = tracehook_notify_death(tsk, &cookie, group_dead); | 843 | signal = tracehook_notify_death(tsk, &cookie, group_dead); |
| @@ -924,6 +923,8 @@ NORET_TYPE void do_exit(long code) | |||
| 924 | schedule(); | 923 | schedule(); |
| 925 | } | 924 | } |
| 926 | 925 | ||
| 926 | exit_irq_thread(); | ||
| 927 | |||
| 927 | exit_signals(tsk); /* sets PF_EXITING */ | 928 | exit_signals(tsk); /* sets PF_EXITING */ |
| 928 | /* | 929 | /* |
| 929 | * tsk->flags are checked in the futex code to protect against | 930 | * tsk->flags are checked in the futex code to protect against |
diff --git a/kernel/fork.c b/kernel/fork.c index 660c2b8765bc..b9e2edd00726 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
| 645 | 645 | ||
| 646 | tsk->min_flt = tsk->maj_flt = 0; | 646 | tsk->min_flt = tsk->maj_flt = 0; |
| 647 | tsk->nvcsw = tsk->nivcsw = 0; | 647 | tsk->nvcsw = tsk->nivcsw = 0; |
| 648 | #ifdef CONFIG_DETECT_HUNG_TASK | ||
| 649 | tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; | ||
| 650 | #endif | ||
| 648 | 651 | ||
| 649 | tsk->mm = NULL; | 652 | tsk->mm = NULL; |
| 650 | tsk->active_mm = NULL; | 653 | tsk->active_mm = NULL; |
| @@ -797,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
| 797 | sig->cputime_expires.virt_exp = cputime_zero; | 800 | sig->cputime_expires.virt_exp = cputime_zero; |
| 798 | sig->cputime_expires.sched_exp = 0; | 801 | sig->cputime_expires.sched_exp = 0; |
| 799 | 802 | ||
| 803 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
| 804 | sig->cputime_expires.prof_exp = | ||
| 805 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
| 806 | sig->cputimer.running = 1; | ||
| 807 | } | ||
| 808 | |||
| 800 | /* The timer lists. */ | 809 | /* The timer lists. */ |
| 801 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 810 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
| 802 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 811 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
| @@ -812,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 812 | atomic_inc(¤t->signal->live); | 821 | atomic_inc(¤t->signal->live); |
| 813 | return 0; | 822 | return 0; |
| 814 | } | 823 | } |
| 815 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | ||
| 816 | |||
| 817 | if (sig) | ||
| 818 | posix_cpu_timers_init_group(sig); | ||
| 819 | 824 | ||
| 825 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | ||
| 820 | tsk->signal = sig; | 826 | tsk->signal = sig; |
| 821 | if (!sig) | 827 | if (!sig) |
| 822 | return -ENOMEM; | 828 | return -ENOMEM; |
| @@ -856,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
| 856 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 862 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
| 857 | task_unlock(current->group_leader); | 863 | task_unlock(current->group_leader); |
| 858 | 864 | ||
| 865 | posix_cpu_timers_init_group(sig); | ||
| 866 | |||
| 859 | acct_init_pacct(&sig->pacct); | 867 | acct_init_pacct(&sig->pacct); |
| 860 | 868 | ||
| 861 | tty_audit_fork(sig); | 869 | tty_audit_fork(sig); |
| @@ -1032,11 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1032 | 1040 | ||
| 1033 | p->default_timer_slack_ns = current->timer_slack_ns; | 1041 | p->default_timer_slack_ns = current->timer_slack_ns; |
| 1034 | 1042 | ||
| 1035 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
| 1036 | p->last_switch_count = 0; | ||
| 1037 | p->last_switch_timestamp = 0; | ||
| 1038 | #endif | ||
| 1039 | |||
| 1040 | task_io_accounting_init(&p->ioac); | 1043 | task_io_accounting_init(&p->ioac); |
| 1041 | acct_clear_integrals(p); | 1044 | acct_clear_integrals(p); |
| 1042 | 1045 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 6b50a024bca2..eef8cd26b5e5 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -883,7 +883,12 @@ retry_private: | |||
| 883 | out_unlock: | 883 | out_unlock: |
| 884 | double_unlock_hb(hb1, hb2); | 884 | double_unlock_hb(hb1, hb2); |
| 885 | 885 | ||
| 886 | /* drop_futex_key_refs() must be called outside the spinlocks. */ | 886 | /* |
| 887 | * drop_futex_key_refs() must be called outside the spinlocks. During | ||
| 888 | * the requeue we moved futex_q's from the hash bucket at key1 to the | ||
| 889 | * one at key2 and updated their key pointer. We no longer need to | ||
| 890 | * hold the references to key1. | ||
| 891 | */ | ||
| 887 | while (--drop_count >= 0) | 892 | while (--drop_count >= 0) |
| 888 | drop_futex_key_refs(&key1); | 893 | drop_futex_key_refs(&key1); |
| 889 | 894 | ||
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index f394d2a42ca3..cb8a15c19583 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -651,14 +651,20 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) | |||
| 651 | * and expiry check is done in the hrtimer_interrupt or in the softirq. | 651 | * and expiry check is done in the hrtimer_interrupt or in the softirq. |
| 652 | */ | 652 | */ |
| 653 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | 653 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, |
| 654 | struct hrtimer_clock_base *base) | 654 | struct hrtimer_clock_base *base, |
| 655 | int wakeup) | ||
| 655 | { | 656 | { |
| 656 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { | 657 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { |
| 657 | spin_unlock(&base->cpu_base->lock); | 658 | if (wakeup) { |
| 658 | raise_softirq_irqoff(HRTIMER_SOFTIRQ); | 659 | spin_unlock(&base->cpu_base->lock); |
| 659 | spin_lock(&base->cpu_base->lock); | 660 | raise_softirq_irqoff(HRTIMER_SOFTIRQ); |
| 661 | spin_lock(&base->cpu_base->lock); | ||
| 662 | } else | ||
| 663 | __raise_softirq_irqoff(HRTIMER_SOFTIRQ); | ||
| 664 | |||
| 660 | return 1; | 665 | return 1; |
| 661 | } | 666 | } |
| 667 | |||
| 662 | return 0; | 668 | return 0; |
| 663 | } | 669 | } |
| 664 | 670 | ||
| @@ -703,7 +709,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } | |||
| 703 | static inline int hrtimer_switch_to_hres(void) { return 0; } | 709 | static inline int hrtimer_switch_to_hres(void) { return 0; } |
| 704 | static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } | 710 | static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } |
| 705 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | 711 | static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, |
| 706 | struct hrtimer_clock_base *base) | 712 | struct hrtimer_clock_base *base, |
| 713 | int wakeup) | ||
| 707 | { | 714 | { |
| 708 | return 0; | 715 | return 0; |
| 709 | } | 716 | } |
| @@ -886,20 +893,9 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
| 886 | return 0; | 893 | return 0; |
| 887 | } | 894 | } |
| 888 | 895 | ||
| 889 | /** | 896 | int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, |
| 890 | * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU | 897 | unsigned long delta_ns, const enum hrtimer_mode mode, |
| 891 | * @timer: the timer to be added | 898 | int wakeup) |
| 892 | * @tim: expiry time | ||
| 893 | * @delta_ns: "slack" range for the timer | ||
| 894 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | ||
| 895 | * | ||
| 896 | * Returns: | ||
| 897 | * 0 on success | ||
| 898 | * 1 when the timer was active | ||
| 899 | */ | ||
| 900 | int | ||
| 901 | hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, | ||
| 902 | const enum hrtimer_mode mode) | ||
| 903 | { | 899 | { |
| 904 | struct hrtimer_clock_base *base, *new_base; | 900 | struct hrtimer_clock_base *base, *new_base; |
| 905 | unsigned long flags; | 901 | unsigned long flags; |
| @@ -940,12 +936,29 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n | |||
| 940 | * XXX send_remote_softirq() ? | 936 | * XXX send_remote_softirq() ? |
| 941 | */ | 937 | */ |
| 942 | if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) | 938 | if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) |
| 943 | hrtimer_enqueue_reprogram(timer, new_base); | 939 | hrtimer_enqueue_reprogram(timer, new_base, wakeup); |
| 944 | 940 | ||
| 945 | unlock_hrtimer_base(timer, &flags); | 941 | unlock_hrtimer_base(timer, &flags); |
| 946 | 942 | ||
| 947 | return ret; | 943 | return ret; |
| 948 | } | 944 | } |
| 945 | |||
| 946 | /** | ||
| 947 | * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU | ||
| 948 | * @timer: the timer to be added | ||
| 949 | * @tim: expiry time | ||
| 950 | * @delta_ns: "slack" range for the timer | ||
| 951 | * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) | ||
| 952 | * | ||
| 953 | * Returns: | ||
| 954 | * 0 on success | ||
| 955 | * 1 when the timer was active | ||
| 956 | */ | ||
| 957 | int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | ||
| 958 | unsigned long delta_ns, const enum hrtimer_mode mode) | ||
| 959 | { | ||
| 960 | return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1); | ||
| 961 | } | ||
| 949 | EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); | 962 | EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); |
| 950 | 963 | ||
| 951 | /** | 964 | /** |
| @@ -961,7 +974,7 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); | |||
| 961 | int | 974 | int |
| 962 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | 975 | hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) |
| 963 | { | 976 | { |
| 964 | return hrtimer_start_range_ns(timer, tim, 0, mode); | 977 | return __hrtimer_start_range_ns(timer, tim, 0, mode, 1); |
| 965 | } | 978 | } |
| 966 | EXPORT_SYMBOL_GPL(hrtimer_start); | 979 | EXPORT_SYMBOL_GPL(hrtimer_start); |
| 967 | 980 | ||
diff --git a/kernel/hung_task.c b/kernel/hung_task.c new file mode 100644 index 000000000000..022a4927b785 --- /dev/null +++ b/kernel/hung_task.c | |||
| @@ -0,0 +1,217 @@ | |||
| 1 | /* | ||
| 2 | * Detect Hung Task | ||
| 3 | * | ||
| 4 | * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state | ||
| 5 | * | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/mm.h> | ||
| 9 | #include <linux/cpu.h> | ||
| 10 | #include <linux/nmi.h> | ||
| 11 | #include <linux/init.h> | ||
| 12 | #include <linux/delay.h> | ||
| 13 | #include <linux/freezer.h> | ||
| 14 | #include <linux/kthread.h> | ||
| 15 | #include <linux/lockdep.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/sysctl.h> | ||
| 18 | |||
| 19 | /* | ||
| 20 | * The number of tasks checked: | ||
| 21 | */ | ||
| 22 | unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Limit number of tasks checked in a batch. | ||
| 26 | * | ||
| 27 | * This value controls the preemptibility of khungtaskd since preemption | ||
| 28 | * is disabled during the critical section. It also controls the size of | ||
| 29 | * the RCU grace period. So it needs to be upper-bound. | ||
| 30 | */ | ||
| 31 | #define HUNG_TASK_BATCHING 1024 | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Zero means infinite timeout - no checking done: | ||
| 35 | */ | ||
| 36 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; | ||
| 37 | |||
| 38 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
| 39 | |||
| 40 | static int __read_mostly did_panic; | ||
| 41 | |||
| 42 | static struct task_struct *watchdog_task; | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Should we panic (and reboot, if panic_timeout= is set) when a | ||
| 46 | * hung task is detected: | ||
| 47 | */ | ||
| 48 | unsigned int __read_mostly sysctl_hung_task_panic = | ||
| 49 | CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; | ||
| 50 | |||
| 51 | static int __init hung_task_panic_setup(char *str) | ||
| 52 | { | ||
| 53 | sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); | ||
| 54 | |||
| 55 | return 1; | ||
| 56 | } | ||
| 57 | __setup("hung_task_panic=", hung_task_panic_setup); | ||
| 58 | |||
| 59 | static int | ||
| 60 | hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) | ||
| 61 | { | ||
| 62 | did_panic = 1; | ||
| 63 | |||
| 64 | return NOTIFY_DONE; | ||
| 65 | } | ||
| 66 | |||
| 67 | static struct notifier_block panic_block = { | ||
| 68 | .notifier_call = hung_task_panic, | ||
| 69 | }; | ||
| 70 | |||
| 71 | static void check_hung_task(struct task_struct *t, unsigned long timeout) | ||
| 72 | { | ||
| 73 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Ensure the task is not frozen. | ||
| 77 | * Also, when a freshly created task is scheduled once, changes | ||
| 78 | * its state to TASK_UNINTERRUPTIBLE without having ever been | ||
| 79 | * switched out once, it musn't be checked. | ||
| 80 | */ | ||
| 81 | if (unlikely(t->flags & PF_FROZEN || !switch_count)) | ||
| 82 | return; | ||
| 83 | |||
| 84 | if (switch_count != t->last_switch_count) { | ||
| 85 | t->last_switch_count = switch_count; | ||
| 86 | return; | ||
| 87 | } | ||
| 88 | if (!sysctl_hung_task_warnings) | ||
| 89 | return; | ||
| 90 | sysctl_hung_task_warnings--; | ||
| 91 | |||
| 92 | /* | ||
| 93 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
| 94 | * complain: | ||
| 95 | */ | ||
| 96 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
| 97 | "%ld seconds.\n", t->comm, t->pid, timeout); | ||
| 98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
| 99 | " disables this message.\n"); | ||
| 100 | sched_show_task(t); | ||
| 101 | __debug_show_held_locks(t); | ||
| 102 | |||
| 103 | touch_nmi_watchdog(); | ||
| 104 | |||
| 105 | if (sysctl_hung_task_panic) | ||
| 106 | panic("hung_task: blocked tasks"); | ||
| 107 | } | ||
| 108 | |||
| 109 | /* | ||
| 110 | * To avoid extending the RCU grace period for an unbounded amount of time, | ||
| 111 | * periodically exit the critical section and enter a new one. | ||
| 112 | * | ||
| 113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order | ||
| 114 | * exit the grace period. For classic RCU, a reschedule is required. | ||
| 115 | */ | ||
| 116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) | ||
| 117 | { | ||
| 118 | get_task_struct(g); | ||
| 119 | get_task_struct(t); | ||
| 120 | rcu_read_unlock(); | ||
| 121 | cond_resched(); | ||
| 122 | rcu_read_lock(); | ||
| 123 | put_task_struct(t); | ||
| 124 | put_task_struct(g); | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
| 129 | * a really long time (120 seconds). If that happens, print out | ||
| 130 | * a warning. | ||
| 131 | */ | ||
| 132 | static void check_hung_uninterruptible_tasks(unsigned long timeout) | ||
| 133 | { | ||
| 134 | int max_count = sysctl_hung_task_check_count; | ||
| 135 | int batch_count = HUNG_TASK_BATCHING; | ||
| 136 | struct task_struct *g, *t; | ||
| 137 | |||
| 138 | /* | ||
| 139 | * If the system crashed already then all bets are off, | ||
| 140 | * do not report extra hung tasks: | ||
| 141 | */ | ||
| 142 | if (test_taint(TAINT_DIE) || did_panic) | ||
| 143 | return; | ||
| 144 | |||
| 145 | rcu_read_lock(); | ||
| 146 | do_each_thread(g, t) { | ||
| 147 | if (!--max_count) | ||
| 148 | goto unlock; | ||
| 149 | if (!--batch_count) { | ||
| 150 | batch_count = HUNG_TASK_BATCHING; | ||
| 151 | rcu_lock_break(g, t); | ||
| 152 | /* Exit if t or g was unhashed during refresh. */ | ||
| 153 | if (t->state == TASK_DEAD || g->state == TASK_DEAD) | ||
| 154 | goto unlock; | ||
| 155 | } | ||
| 156 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
| 157 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
| 158 | check_hung_task(t, timeout); | ||
| 159 | } while_each_thread(g, t); | ||
| 160 | unlock: | ||
| 161 | rcu_read_unlock(); | ||
| 162 | } | ||
| 163 | |||
| 164 | static unsigned long timeout_jiffies(unsigned long timeout) | ||
| 165 | { | ||
| 166 | /* timeout of 0 will disable the watchdog */ | ||
| 167 | return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT; | ||
| 168 | } | ||
| 169 | |||
| 170 | /* | ||
| 171 | * Process updating of timeout sysctl | ||
| 172 | */ | ||
| 173 | int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | ||
| 174 | struct file *filp, void __user *buffer, | ||
| 175 | size_t *lenp, loff_t *ppos) | ||
| 176 | { | ||
| 177 | int ret; | ||
| 178 | |||
| 179 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
| 180 | |||
| 181 | if (ret || !write) | ||
| 182 | goto out; | ||
| 183 | |||
| 184 | wake_up_process(watchdog_task); | ||
| 185 | |||
| 186 | out: | ||
| 187 | return ret; | ||
| 188 | } | ||
| 189 | |||
| 190 | /* | ||
| 191 | * kthread which checks for tasks stuck in D state | ||
| 192 | */ | ||
| 193 | static int watchdog(void *dummy) | ||
| 194 | { | ||
| 195 | set_user_nice(current, 0); | ||
| 196 | |||
| 197 | for ( ; ; ) { | ||
| 198 | unsigned long timeout = sysctl_hung_task_timeout_secs; | ||
| 199 | |||
| 200 | while (schedule_timeout_interruptible(timeout_jiffies(timeout))) | ||
| 201 | timeout = sysctl_hung_task_timeout_secs; | ||
| 202 | |||
| 203 | check_hung_uninterruptible_tasks(timeout); | ||
| 204 | } | ||
| 205 | |||
| 206 | return 0; | ||
| 207 | } | ||
| 208 | |||
| 209 | static int __init hung_task_init(void) | ||
| 210 | { | ||
| 211 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); | ||
| 212 | watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); | ||
| 213 | |||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | |||
| 217 | module_init(hung_task_init); | ||
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 38a25b8d8bff..d06df9c41cba 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c | |||
| @@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data) | |||
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | /** | 28 | /** |
| 29 | * devm_request_irq - allocate an interrupt line for a managed device | 29 | * devm_request_threaded_irq - allocate an interrupt line for a managed device |
| 30 | * @dev: device to request interrupt for | 30 | * @dev: device to request interrupt for |
| 31 | * @irq: Interrupt line to allocate | 31 | * @irq: Interrupt line to allocate |
| 32 | * @handler: Function to be called when the IRQ occurs | 32 | * @handler: Function to be called when the IRQ occurs |
| 33 | * @thread_fn: function to be called in a threaded interrupt context. NULL | ||
| 34 | * for devices which handle everything in @handler | ||
| 33 | * @irqflags: Interrupt type flags | 35 | * @irqflags: Interrupt type flags |
| 34 | * @devname: An ascii name for the claiming device | 36 | * @devname: An ascii name for the claiming device |
| 35 | * @dev_id: A cookie passed back to the handler function | 37 | * @dev_id: A cookie passed back to the handler function |
| @@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data) | |||
| 42 | * If an IRQ allocated with this function needs to be freed | 44 | * If an IRQ allocated with this function needs to be freed |
| 43 | * separately, dev_free_irq() must be used. | 45 | * separately, dev_free_irq() must be used. |
| 44 | */ | 46 | */ |
| 45 | int devm_request_irq(struct device *dev, unsigned int irq, | 47 | int devm_request_threaded_irq(struct device *dev, unsigned int irq, |
| 46 | irq_handler_t handler, unsigned long irqflags, | 48 | irq_handler_t handler, irq_handler_t thread_fn, |
| 47 | const char *devname, void *dev_id) | 49 | unsigned long irqflags, const char *devname, |
| 50 | void *dev_id) | ||
| 48 | { | 51 | { |
| 49 | struct irq_devres *dr; | 52 | struct irq_devres *dr; |
| 50 | int rc; | 53 | int rc; |
| @@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq, | |||
| 54 | if (!dr) | 57 | if (!dr) |
| 55 | return -ENOMEM; | 58 | return -ENOMEM; |
| 56 | 59 | ||
| 57 | rc = request_irq(irq, handler, irqflags, devname, dev_id); | 60 | rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname, |
| 61 | dev_id); | ||
| 58 | if (rc) { | 62 | if (rc) { |
| 59 | devres_free(dr); | 63 | devres_free(dr); |
| 60 | return rc; | 64 | return rc; |
| @@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, | |||
| 66 | 70 | ||
| 67 | return 0; | 71 | return 0; |
| 68 | } | 72 | } |
| 69 | EXPORT_SYMBOL(devm_request_irq); | 73 | EXPORT_SYMBOL(devm_request_threaded_irq); |
| 70 | 74 | ||
| 71 | /** | 75 | /** |
| 72 | * devm_free_irq - free an interrupt | 76 | * devm_free_irq - free an interrupt |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 343acecae629..d82142be8dd2 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
| @@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id) | |||
| 339 | return IRQ_NONE; | 339 | return IRQ_NONE; |
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | static void warn_no_thread(unsigned int irq, struct irqaction *action) | ||
| 343 | { | ||
| 344 | if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags)) | ||
| 345 | return; | ||
| 346 | |||
| 347 | printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD " | ||
| 348 | "but no thread function available.", irq, action->name); | ||
| 349 | } | ||
| 350 | |||
| 342 | DEFINE_TRACE(irq_handler_entry); | 351 | DEFINE_TRACE(irq_handler_entry); |
| 343 | DEFINE_TRACE(irq_handler_exit); | 352 | DEFINE_TRACE(irq_handler_exit); |
| 344 | 353 | ||
| @@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
| 363 | trace_irq_handler_entry(irq, action); | 372 | trace_irq_handler_entry(irq, action); |
| 364 | ret = action->handler(irq, action->dev_id); | 373 | ret = action->handler(irq, action->dev_id); |
| 365 | trace_irq_handler_exit(irq, action, ret); | 374 | trace_irq_handler_exit(irq, action, ret); |
| 366 | if (ret == IRQ_HANDLED) | 375 | |
| 376 | switch (ret) { | ||
| 377 | case IRQ_WAKE_THREAD: | ||
| 378 | /* | ||
| 379 | * Set result to handled so the spurious check | ||
| 380 | * does not trigger. | ||
| 381 | */ | ||
| 382 | ret = IRQ_HANDLED; | ||
| 383 | |||
| 384 | /* | ||
| 385 | * Catch drivers which return WAKE_THREAD but | ||
| 386 | * did not set up a thread function | ||
| 387 | */ | ||
| 388 | if (unlikely(!action->thread_fn)) { | ||
| 389 | warn_no_thread(irq, action); | ||
| 390 | break; | ||
| 391 | } | ||
| 392 | |||
| 393 | /* | ||
| 394 | * Wake up the handler thread for this | ||
| 395 | * action. In case the thread crashed and was | ||
| 396 | * killed we just pretend that we handled the | ||
| 397 | * interrupt. The hardirq handler above has | ||
| 398 | * disabled the device interrupt, so no irq | ||
| 399 | * storm is lurking. | ||
| 400 | */ | ||
| 401 | if (likely(!test_bit(IRQTF_DIED, | ||
| 402 | &action->thread_flags))) { | ||
| 403 | set_bit(IRQTF_RUNTHREAD, &action->thread_flags); | ||
| 404 | wake_up_process(action->thread); | ||
| 405 | } | ||
| 406 | |||
| 407 | /* Fall through to add to randomness */ | ||
| 408 | case IRQ_HANDLED: | ||
| 367 | status |= action->flags; | 409 | status |= action->flags; |
| 410 | break; | ||
| 411 | |||
| 412 | default: | ||
| 413 | break; | ||
| 414 | } | ||
| 415 | |||
| 368 | retval |= ret; | 416 | retval |= ret; |
| 369 | action = action->next; | 417 | action = action->next; |
| 370 | } while (action); | 418 | } while (action); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1516ab77355c..2734eca59243 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -8,16 +8,15 @@ | |||
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #include <linux/irq.h> | 10 | #include <linux/irq.h> |
| 11 | #include <linux/kthread.h> | ||
| 11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 12 | #include <linux/random.h> | 13 | #include <linux/random.h> |
| 13 | #include <linux/interrupt.h> | 14 | #include <linux/interrupt.h> |
| 14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | #include <linux/sched.h> | ||
| 15 | 17 | ||
| 16 | #include "internals.h" | 18 | #include "internals.h" |
| 17 | 19 | ||
| 18 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | ||
| 19 | cpumask_var_t irq_default_affinity; | ||
| 20 | |||
| 21 | /** | 20 | /** |
| 22 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 21 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
| 23 | * @irq: interrupt number to wait for | 22 | * @irq: interrupt number to wait for |
| @@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq) | |||
| 53 | 52 | ||
| 54 | /* Oops, that failed? */ | 53 | /* Oops, that failed? */ |
| 55 | } while (status & IRQ_INPROGRESS); | 54 | } while (status & IRQ_INPROGRESS); |
| 55 | |||
| 56 | /* | ||
| 57 | * We made sure that no hardirq handler is running. Now verify | ||
| 58 | * that no threaded handlers are active. | ||
| 59 | */ | ||
| 60 | wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); | ||
| 56 | } | 61 | } |
| 57 | EXPORT_SYMBOL(synchronize_irq); | 62 | EXPORT_SYMBOL(synchronize_irq); |
| 58 | 63 | ||
| 64 | #ifdef CONFIG_SMP | ||
| 65 | cpumask_var_t irq_default_affinity; | ||
| 66 | |||
| 59 | /** | 67 | /** |
| 60 | * irq_can_set_affinity - Check if the affinity of a given irq can be set | 68 | * irq_can_set_affinity - Check if the affinity of a given irq can be set |
| 61 | * @irq: Interrupt to check | 69 | * @irq: Interrupt to check |
| @@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq) | |||
| 72 | return 1; | 80 | return 1; |
| 73 | } | 81 | } |
| 74 | 82 | ||
| 83 | static void | ||
| 84 | irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) | ||
| 85 | { | ||
| 86 | struct irqaction *action = desc->action; | ||
| 87 | |||
| 88 | while (action) { | ||
| 89 | if (action->thread) | ||
| 90 | set_cpus_allowed_ptr(action->thread, cpumask); | ||
| 91 | action = action->next; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 75 | /** | 95 | /** |
| 76 | * irq_set_affinity - Set the irq affinity of a given irq | 96 | * irq_set_affinity - Set the irq affinity of a given irq |
| 77 | * @irq: Interrupt to set affinity | 97 | * @irq: Interrupt to set affinity |
| @@ -89,10 +109,9 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | |||
| 89 | spin_lock_irqsave(&desc->lock, flags); | 109 | spin_lock_irqsave(&desc->lock, flags); |
| 90 | 110 | ||
| 91 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 111 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
| 92 | if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { | 112 | if (desc->status & IRQ_MOVE_PCNTXT) |
| 93 | cpumask_copy(desc->affinity, cpumask); | ||
| 94 | desc->chip->set_affinity(irq, cpumask); | 113 | desc->chip->set_affinity(irq, cpumask); |
| 95 | } else { | 114 | else { |
| 96 | desc->status |= IRQ_MOVE_PENDING; | 115 | desc->status |= IRQ_MOVE_PENDING; |
| 97 | cpumask_copy(desc->pending_mask, cpumask); | 116 | cpumask_copy(desc->pending_mask, cpumask); |
| 98 | } | 117 | } |
| @@ -100,6 +119,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | |||
| 100 | cpumask_copy(desc->affinity, cpumask); | 119 | cpumask_copy(desc->affinity, cpumask); |
| 101 | desc->chip->set_affinity(irq, cpumask); | 120 | desc->chip->set_affinity(irq, cpumask); |
| 102 | #endif | 121 | #endif |
| 122 | irq_set_thread_affinity(desc, cpumask); | ||
| 103 | desc->status |= IRQ_AFFINITY_SET; | 123 | desc->status |= IRQ_AFFINITY_SET; |
| 104 | spin_unlock_irqrestore(&desc->lock, flags); | 124 | spin_unlock_irqrestore(&desc->lock, flags); |
| 105 | return 0; | 125 | return 0; |
| @@ -150,6 +170,8 @@ int irq_select_affinity_usr(unsigned int irq) | |||
| 150 | 170 | ||
| 151 | spin_lock_irqsave(&desc->lock, flags); | 171 | spin_lock_irqsave(&desc->lock, flags); |
| 152 | ret = setup_affinity(irq, desc); | 172 | ret = setup_affinity(irq, desc); |
| 173 | if (!ret) | ||
| 174 | irq_set_thread_affinity(desc, desc->affinity); | ||
| 153 | spin_unlock_irqrestore(&desc->lock, flags); | 175 | spin_unlock_irqrestore(&desc->lock, flags); |
| 154 | 176 | ||
| 155 | return ret; | 177 | return ret; |
| @@ -401,6 +423,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 401 | return ret; | 423 | return ret; |
| 402 | } | 424 | } |
| 403 | 425 | ||
| 426 | static int irq_wait_for_interrupt(struct irqaction *action) | ||
| 427 | { | ||
| 428 | while (!kthread_should_stop()) { | ||
| 429 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 430 | |||
| 431 | if (test_and_clear_bit(IRQTF_RUNTHREAD, | ||
| 432 | &action->thread_flags)) { | ||
| 433 | __set_current_state(TASK_RUNNING); | ||
| 434 | return 0; | ||
| 435 | } | ||
| 436 | schedule(); | ||
| 437 | } | ||
| 438 | return -1; | ||
| 439 | } | ||
| 440 | |||
| 441 | /* | ||
| 442 | * Interrupt handler thread | ||
| 443 | */ | ||
| 444 | static int irq_thread(void *data) | ||
| 445 | { | ||
| 446 | struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; | ||
| 447 | struct irqaction *action = data; | ||
| 448 | struct irq_desc *desc = irq_to_desc(action->irq); | ||
| 449 | int wake; | ||
| 450 | |||
| 451 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
| 452 | current->irqaction = action; | ||
| 453 | |||
| 454 | while (!irq_wait_for_interrupt(action)) { | ||
| 455 | |||
| 456 | atomic_inc(&desc->threads_active); | ||
| 457 | |||
| 458 | spin_lock_irq(&desc->lock); | ||
| 459 | if (unlikely(desc->status & IRQ_DISABLED)) { | ||
| 460 | /* | ||
| 461 | * CHECKME: We might need a dedicated | ||
| 462 | * IRQ_THREAD_PENDING flag here, which | ||
| 463 | * retriggers the thread in check_irq_resend() | ||
| 464 | * but AFAICT IRQ_PENDING should be fine as it | ||
| 465 | * retriggers the interrupt itself --- tglx | ||
| 466 | */ | ||
| 467 | desc->status |= IRQ_PENDING; | ||
| 468 | spin_unlock_irq(&desc->lock); | ||
| 469 | } else { | ||
| 470 | spin_unlock_irq(&desc->lock); | ||
| 471 | |||
| 472 | action->thread_fn(action->irq, action->dev_id); | ||
| 473 | } | ||
| 474 | |||
| 475 | wake = atomic_dec_and_test(&desc->threads_active); | ||
| 476 | |||
| 477 | if (wake && waitqueue_active(&desc->wait_for_threads)) | ||
| 478 | wake_up(&desc->wait_for_threads); | ||
| 479 | } | ||
| 480 | |||
| 481 | /* | ||
| 482 | * Clear irqaction. Otherwise exit_irq_thread() would make | ||
| 483 | * fuzz about an active irq thread going into nirvana. | ||
| 484 | */ | ||
| 485 | current->irqaction = NULL; | ||
| 486 | return 0; | ||
| 487 | } | ||
| 488 | |||
| 489 | /* | ||
| 490 | * Called from do_exit() | ||
| 491 | */ | ||
| 492 | void exit_irq_thread(void) | ||
| 493 | { | ||
| 494 | struct task_struct *tsk = current; | ||
| 495 | |||
| 496 | if (!tsk->irqaction) | ||
| 497 | return; | ||
| 498 | |||
| 499 | printk(KERN_ERR | ||
| 500 | "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | ||
| 501 | tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); | ||
| 502 | |||
| 503 | /* | ||
| 504 | * Set the THREAD DIED flag to prevent further wakeups of the | ||
| 505 | * soon to be gone threaded handler. | ||
| 506 | */ | ||
| 507 | set_bit(IRQTF_DIED, &tsk->irqaction->flags); | ||
| 508 | } | ||
| 509 | |||
| 404 | /* | 510 | /* |
| 405 | * Internal function to register an irqaction - typically used to | 511 | * Internal function to register an irqaction - typically used to |
| 406 | * allocate special interrupts that are part of the architecture. | 512 | * allocate special interrupts that are part of the architecture. |
| @@ -437,6 +543,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 437 | } | 543 | } |
| 438 | 544 | ||
| 439 | /* | 545 | /* |
| 546 | * Threaded handler ? | ||
| 547 | */ | ||
| 548 | if (new->thread_fn) { | ||
| 549 | struct task_struct *t; | ||
| 550 | |||
| 551 | t = kthread_create(irq_thread, new, "irq/%d-%s", irq, | ||
| 552 | new->name); | ||
| 553 | if (IS_ERR(t)) | ||
| 554 | return PTR_ERR(t); | ||
| 555 | /* | ||
| 556 | * We keep the reference to the task struct even if | ||
| 557 | * the thread dies to avoid that the interrupt code | ||
| 558 | * references an already freed task_struct. | ||
| 559 | */ | ||
| 560 | get_task_struct(t); | ||
| 561 | new->thread = t; | ||
| 562 | wake_up_process(t); | ||
| 563 | } | ||
| 564 | |||
| 565 | /* | ||
| 440 | * The following block of code has to be executed atomically | 566 | * The following block of code has to be executed atomically |
| 441 | */ | 567 | */ |
| 442 | spin_lock_irqsave(&desc->lock, flags); | 568 | spin_lock_irqsave(&desc->lock, flags); |
| @@ -473,15 +599,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 473 | if (!shared) { | 599 | if (!shared) { |
| 474 | irq_chip_set_defaults(desc->chip); | 600 | irq_chip_set_defaults(desc->chip); |
| 475 | 601 | ||
| 602 | init_waitqueue_head(&desc->wait_for_threads); | ||
| 603 | |||
| 476 | /* Setup the type (level, edge polarity) if configured: */ | 604 | /* Setup the type (level, edge polarity) if configured: */ |
| 477 | if (new->flags & IRQF_TRIGGER_MASK) { | 605 | if (new->flags & IRQF_TRIGGER_MASK) { |
| 478 | ret = __irq_set_trigger(desc, irq, | 606 | ret = __irq_set_trigger(desc, irq, |
| 479 | new->flags & IRQF_TRIGGER_MASK); | 607 | new->flags & IRQF_TRIGGER_MASK); |
| 480 | 608 | ||
| 481 | if (ret) { | 609 | if (ret) |
| 482 | spin_unlock_irqrestore(&desc->lock, flags); | 610 | goto out_thread; |
| 483 | return ret; | ||
| 484 | } | ||
| 485 | } else | 611 | } else |
| 486 | compat_irq_chip_set_default_handler(desc); | 612 | compat_irq_chip_set_default_handler(desc); |
| 487 | #if defined(CONFIG_IRQ_PER_CPU) | 613 | #if defined(CONFIG_IRQ_PER_CPU) |
| @@ -549,8 +675,19 @@ mismatch: | |||
| 549 | dump_stack(); | 675 | dump_stack(); |
| 550 | } | 676 | } |
| 551 | #endif | 677 | #endif |
| 678 | ret = -EBUSY; | ||
| 679 | |||
| 680 | out_thread: | ||
| 552 | spin_unlock_irqrestore(&desc->lock, flags); | 681 | spin_unlock_irqrestore(&desc->lock, flags); |
| 553 | return -EBUSY; | 682 | if (new->thread) { |
| 683 | struct task_struct *t = new->thread; | ||
| 684 | |||
| 685 | new->thread = NULL; | ||
| 686 | if (likely(!test_bit(IRQTF_DIED, &new->thread_flags))) | ||
| 687 | kthread_stop(t); | ||
| 688 | put_task_struct(t); | ||
| 689 | } | ||
| 690 | return ret; | ||
| 554 | } | 691 | } |
| 555 | 692 | ||
| 556 | /** | 693 | /** |
| @@ -576,6 +713,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 576 | { | 713 | { |
| 577 | struct irq_desc *desc = irq_to_desc(irq); | 714 | struct irq_desc *desc = irq_to_desc(irq); |
| 578 | struct irqaction *action, **action_ptr; | 715 | struct irqaction *action, **action_ptr; |
| 716 | struct task_struct *irqthread; | ||
| 579 | unsigned long flags; | 717 | unsigned long flags; |
| 580 | 718 | ||
| 581 | WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); | 719 | WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); |
| @@ -622,6 +760,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 622 | else | 760 | else |
| 623 | desc->chip->disable(irq); | 761 | desc->chip->disable(irq); |
| 624 | } | 762 | } |
| 763 | |||
| 764 | irqthread = action->thread; | ||
| 765 | action->thread = NULL; | ||
| 766 | |||
| 625 | spin_unlock_irqrestore(&desc->lock, flags); | 767 | spin_unlock_irqrestore(&desc->lock, flags); |
| 626 | 768 | ||
| 627 | unregister_handler_proc(irq, action); | 769 | unregister_handler_proc(irq, action); |
| @@ -629,6 +771,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 629 | /* Make sure it's not being used on another CPU: */ | 771 | /* Make sure it's not being used on another CPU: */ |
| 630 | synchronize_irq(irq); | 772 | synchronize_irq(irq); |
| 631 | 773 | ||
| 774 | if (irqthread) { | ||
| 775 | if (!test_bit(IRQTF_DIED, &action->thread_flags)) | ||
| 776 | kthread_stop(irqthread); | ||
| 777 | put_task_struct(irqthread); | ||
| 778 | } | ||
| 779 | |||
| 632 | #ifdef CONFIG_DEBUG_SHIRQ | 780 | #ifdef CONFIG_DEBUG_SHIRQ |
| 633 | /* | 781 | /* |
| 634 | * It's a shared IRQ -- the driver ought to be prepared for an IRQ | 782 | * It's a shared IRQ -- the driver ought to be prepared for an IRQ |
| @@ -681,9 +829,12 @@ void free_irq(unsigned int irq, void *dev_id) | |||
| 681 | EXPORT_SYMBOL(free_irq); | 829 | EXPORT_SYMBOL(free_irq); |
| 682 | 830 | ||
| 683 | /** | 831 | /** |
| 684 | * request_irq - allocate an interrupt line | 832 | * request_threaded_irq - allocate an interrupt line |
| 685 | * @irq: Interrupt line to allocate | 833 | * @irq: Interrupt line to allocate |
| 686 | * @handler: Function to be called when the IRQ occurs | 834 | * @handler: Function to be called when the IRQ occurs. |
| 835 | * Primary handler for threaded interrupts | ||
| 836 | * @thread_fn: Function called from the irq handler thread | ||
| 837 | * If NULL, no irq thread is created | ||
| 687 | * @irqflags: Interrupt type flags | 838 | * @irqflags: Interrupt type flags |
| 688 | * @devname: An ascii name for the claiming device | 839 | * @devname: An ascii name for the claiming device |
| 689 | * @dev_id: A cookie passed back to the handler function | 840 | * @dev_id: A cookie passed back to the handler function |
| @@ -695,6 +846,15 @@ EXPORT_SYMBOL(free_irq); | |||
| 695 | * raises, you must take care both to initialise your hardware | 846 | * raises, you must take care both to initialise your hardware |
| 696 | * and to set up the interrupt handler in the right order. | 847 | * and to set up the interrupt handler in the right order. |
| 697 | * | 848 | * |
| 849 | * If you want to set up a threaded irq handler for your device | ||
| 850 | * then you need to supply @handler and @thread_fn. @handler ist | ||
| 851 | * still called in hard interrupt context and has to check | ||
| 852 | * whether the interrupt originates from the device. If yes it | ||
| 853 | * needs to disable the interrupt on the device and return | ||
| 854 | * IRQ_THREAD_WAKE which will wake up the handler thread and run | ||
| 855 | * @thread_fn. This split handler design is necessary to support | ||
| 856 | * shared interrupts. | ||
| 857 | * | ||
| 698 | * Dev_id must be globally unique. Normally the address of the | 858 | * Dev_id must be globally unique. Normally the address of the |
| 699 | * device data structure is used as the cookie. Since the handler | 859 | * device data structure is used as the cookie. Since the handler |
| 700 | * receives this value it makes sense to use it. | 860 | * receives this value it makes sense to use it. |
| @@ -710,8 +870,9 @@ EXPORT_SYMBOL(free_irq); | |||
| 710 | * IRQF_TRIGGER_* Specify active edge(s) or level | 870 | * IRQF_TRIGGER_* Specify active edge(s) or level |
| 711 | * | 871 | * |
| 712 | */ | 872 | */ |
| 713 | int request_irq(unsigned int irq, irq_handler_t handler, | 873 | int request_threaded_irq(unsigned int irq, irq_handler_t handler, |
| 714 | unsigned long irqflags, const char *devname, void *dev_id) | 874 | irq_handler_t thread_fn, unsigned long irqflags, |
| 875 | const char *devname, void *dev_id) | ||
| 715 | { | 876 | { |
| 716 | struct irqaction *action; | 877 | struct irqaction *action; |
| 717 | struct irq_desc *desc; | 878 | struct irq_desc *desc; |
| @@ -759,6 +920,7 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
| 759 | return -ENOMEM; | 920 | return -ENOMEM; |
| 760 | 921 | ||
| 761 | action->handler = handler; | 922 | action->handler = handler; |
| 923 | action->thread_fn = thread_fn; | ||
| 762 | action->flags = irqflags; | 924 | action->flags = irqflags; |
| 763 | action->name = devname; | 925 | action->name = devname; |
| 764 | action->dev_id = dev_id; | 926 | action->dev_id = dev_id; |
| @@ -788,4 +950,4 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
| 788 | #endif | 950 | #endif |
| 789 | return retval; | 951 | return retval; |
| 790 | } | 952 | } |
| 791 | EXPORT_SYMBOL(request_irq); | 953 | EXPORT_SYMBOL(request_threaded_irq); |
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 243d6121e50e..44bbdcbaf8d2 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c | |||
| @@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, | |||
| 54 | static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) | 54 | static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) |
| 55 | { | 55 | { |
| 56 | free_kstat_irqs(old_desc, desc); | 56 | free_kstat_irqs(old_desc, desc); |
| 57 | free_desc_masks(old_desc, desc); | ||
| 57 | arch_free_chip_data(old_desc, desc); | 58 | arch_free_chip_data(old_desc, desc); |
| 58 | } | 59 | } |
| 59 | 60 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5016bfb682b9..a5e74ddee0e2 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | |||
| 68 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 68 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
| 69 | 69 | ||
| 70 | /* NOTE: change this value only with kprobe_mutex held */ | 70 | /* NOTE: change this value only with kprobe_mutex held */ |
| 71 | static bool kprobe_enabled; | 71 | static bool kprobes_all_disarmed; |
| 72 | 72 | ||
| 73 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 73 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ |
| 74 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 74 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
| @@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
| 328 | struct kprobe *kp; | 328 | struct kprobe *kp; |
| 329 | 329 | ||
| 330 | list_for_each_entry_rcu(kp, &p->list, list) { | 330 | list_for_each_entry_rcu(kp, &p->list, list) { |
| 331 | if (kp->pre_handler && !kprobe_gone(kp)) { | 331 | if (kp->pre_handler && likely(!kprobe_disabled(kp))) { |
| 332 | set_kprobe_instance(kp); | 332 | set_kprobe_instance(kp); |
| 333 | if (kp->pre_handler(kp, regs)) | 333 | if (kp->pre_handler(kp, regs)) |
| 334 | return 1; | 334 | return 1; |
| @@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
| 344 | struct kprobe *kp; | 344 | struct kprobe *kp; |
| 345 | 345 | ||
| 346 | list_for_each_entry_rcu(kp, &p->list, list) { | 346 | list_for_each_entry_rcu(kp, &p->list, list) { |
| 347 | if (kp->post_handler && !kprobe_gone(kp)) { | 347 | if (kp->post_handler && likely(!kprobe_disabled(kp))) { |
| 348 | set_kprobe_instance(kp); | 348 | set_kprobe_instance(kp); |
| 349 | kp->post_handler(kp, regs, flags); | 349 | kp->post_handler(kp, regs, flags); |
| 350 | reset_kprobe_instance(); | 350 | reset_kprobe_instance(); |
| @@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
| 518 | } | 518 | } |
| 519 | 519 | ||
| 520 | /* | 520 | /* |
| 521 | * Add the new probe to old_p->list. Fail if this is the | 521 | * Add the new probe to ap->list. Fail if this is the |
| 522 | * second jprobe at the address - two jprobes can't coexist | 522 | * second jprobe at the address - two jprobes can't coexist |
| 523 | */ | 523 | */ |
| 524 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 524 | static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) |
| 525 | { | 525 | { |
| 526 | BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); | ||
| 526 | if (p->break_handler) { | 527 | if (p->break_handler) { |
| 527 | if (old_p->break_handler) | 528 | if (ap->break_handler) |
| 528 | return -EEXIST; | 529 | return -EEXIST; |
| 529 | list_add_tail_rcu(&p->list, &old_p->list); | 530 | list_add_tail_rcu(&p->list, &ap->list); |
| 530 | old_p->break_handler = aggr_break_handler; | 531 | ap->break_handler = aggr_break_handler; |
| 531 | } else | 532 | } else |
| 532 | list_add_rcu(&p->list, &old_p->list); | 533 | list_add_rcu(&p->list, &ap->list); |
| 533 | if (p->post_handler && !old_p->post_handler) | 534 | if (p->post_handler && !ap->post_handler) |
| 534 | old_p->post_handler = aggr_post_handler; | 535 | ap->post_handler = aggr_post_handler; |
| 536 | |||
| 537 | if (kprobe_disabled(ap) && !kprobe_disabled(p)) { | ||
| 538 | ap->flags &= ~KPROBE_FLAG_DISABLED; | ||
| 539 | if (!kprobes_all_disarmed) | ||
| 540 | /* Arm the breakpoint again. */ | ||
| 541 | arch_arm_kprobe(ap); | ||
| 542 | } | ||
| 535 | return 0; | 543 | return 0; |
| 536 | } | 544 | } |
| 537 | 545 | ||
| @@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
| 544 | copy_kprobe(p, ap); | 552 | copy_kprobe(p, ap); |
| 545 | flush_insn_slot(ap); | 553 | flush_insn_slot(ap); |
| 546 | ap->addr = p->addr; | 554 | ap->addr = p->addr; |
| 555 | ap->flags = p->flags; | ||
| 547 | ap->pre_handler = aggr_pre_handler; | 556 | ap->pre_handler = aggr_pre_handler; |
| 548 | ap->fault_handler = aggr_fault_handler; | 557 | ap->fault_handler = aggr_fault_handler; |
| 549 | /* We don't care the kprobe which has gone. */ | 558 | /* We don't care the kprobe which has gone. */ |
| @@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
| 566 | struct kprobe *p) | 575 | struct kprobe *p) |
| 567 | { | 576 | { |
| 568 | int ret = 0; | 577 | int ret = 0; |
| 569 | struct kprobe *ap; | 578 | struct kprobe *ap = old_p; |
| 570 | 579 | ||
| 571 | if (kprobe_gone(old_p)) { | 580 | if (old_p->pre_handler != aggr_pre_handler) { |
| 581 | /* If old_p is not an aggr_probe, create new aggr_kprobe. */ | ||
| 582 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); | ||
| 583 | if (!ap) | ||
| 584 | return -ENOMEM; | ||
| 585 | add_aggr_kprobe(ap, old_p); | ||
| 586 | } | ||
| 587 | |||
| 588 | if (kprobe_gone(ap)) { | ||
| 572 | /* | 589 | /* |
| 573 | * Attempting to insert new probe at the same location that | 590 | * Attempting to insert new probe at the same location that |
| 574 | * had a probe in the module vaddr area which already | 591 | * had a probe in the module vaddr area which already |
| 575 | * freed. So, the instruction slot has already been | 592 | * freed. So, the instruction slot has already been |
| 576 | * released. We need a new slot for the new probe. | 593 | * released. We need a new slot for the new probe. |
| 577 | */ | 594 | */ |
| 578 | ret = arch_prepare_kprobe(old_p); | 595 | ret = arch_prepare_kprobe(ap); |
| 579 | if (ret) | 596 | if (ret) |
| 597 | /* | ||
| 598 | * Even if fail to allocate new slot, don't need to | ||
| 599 | * free aggr_probe. It will be used next time, or | ||
| 600 | * freed by unregister_kprobe. | ||
| 601 | */ | ||
| 580 | return ret; | 602 | return ret; |
| 581 | } | 603 | |
| 582 | if (old_p->pre_handler == aggr_pre_handler) { | ||
| 583 | copy_kprobe(old_p, p); | ||
| 584 | ret = add_new_kprobe(old_p, p); | ||
| 585 | ap = old_p; | ||
| 586 | } else { | ||
| 587 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); | ||
| 588 | if (!ap) { | ||
| 589 | if (kprobe_gone(old_p)) | ||
| 590 | arch_remove_kprobe(old_p); | ||
| 591 | return -ENOMEM; | ||
| 592 | } | ||
| 593 | add_aggr_kprobe(ap, old_p); | ||
| 594 | copy_kprobe(ap, p); | ||
| 595 | ret = add_new_kprobe(ap, p); | ||
| 596 | } | ||
| 597 | if (kprobe_gone(old_p)) { | ||
| 598 | /* | 604 | /* |
| 599 | * If the old_p has gone, its breakpoint has been disarmed. | 605 | * Clear gone flag to prevent allocating new slot again, and |
| 600 | * We have to arm it again after preparing real kprobes. | 606 | * set disabled flag because it is not armed yet. |
| 601 | */ | 607 | */ |
| 602 | ap->flags &= ~KPROBE_FLAG_GONE; | 608 | ap->flags = (ap->flags & ~KPROBE_FLAG_GONE) |
| 603 | if (kprobe_enabled) | 609 | | KPROBE_FLAG_DISABLED; |
| 604 | arch_arm_kprobe(ap); | ||
| 605 | } | 610 | } |
| 606 | return ret; | 611 | |
| 612 | copy_kprobe(ap, p); | ||
| 613 | return add_new_kprobe(ap, p); | ||
| 614 | } | ||
| 615 | |||
| 616 | /* Try to disable aggr_kprobe, and return 1 if succeeded.*/ | ||
| 617 | static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p) | ||
| 618 | { | ||
| 619 | struct kprobe *kp; | ||
| 620 | |||
| 621 | list_for_each_entry_rcu(kp, &p->list, list) { | ||
| 622 | if (!kprobe_disabled(kp)) | ||
| 623 | /* | ||
| 624 | * There is an active probe on the list. | ||
| 625 | * We can't disable aggr_kprobe. | ||
| 626 | */ | ||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | p->flags |= KPROBE_FLAG_DISABLED; | ||
| 630 | return 1; | ||
| 607 | } | 631 | } |
| 608 | 632 | ||
| 609 | static int __kprobes in_kprobes_functions(unsigned long addr) | 633 | static int __kprobes in_kprobes_functions(unsigned long addr) |
| @@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 664 | return -EINVAL; | 688 | return -EINVAL; |
| 665 | } | 689 | } |
| 666 | 690 | ||
| 667 | p->flags = 0; | 691 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ |
| 692 | p->flags &= KPROBE_FLAG_DISABLED; | ||
| 693 | |||
| 668 | /* | 694 | /* |
| 669 | * Check if are we probing a module. | 695 | * Check if are we probing a module. |
| 670 | */ | 696 | */ |
| @@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 709 | hlist_add_head_rcu(&p->hlist, | 735 | hlist_add_head_rcu(&p->hlist, |
| 710 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 736 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
| 711 | 737 | ||
| 712 | if (kprobe_enabled) | 738 | if (!kprobes_all_disarmed && !kprobe_disabled(p)) |
| 713 | arch_arm_kprobe(p); | 739 | arch_arm_kprobe(p); |
| 714 | 740 | ||
| 715 | out_unlock_text: | 741 | out_unlock_text: |
| @@ -722,26 +748,39 @@ out: | |||
| 722 | 748 | ||
| 723 | return ret; | 749 | return ret; |
| 724 | } | 750 | } |
| 751 | EXPORT_SYMBOL_GPL(register_kprobe); | ||
| 725 | 752 | ||
| 726 | /* | 753 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ |
| 727 | * Unregister a kprobe without a scheduler synchronization. | 754 | static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) |
| 728 | */ | ||
| 729 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) | ||
| 730 | { | 755 | { |
| 731 | struct kprobe *old_p, *list_p; | 756 | struct kprobe *old_p, *list_p; |
| 732 | 757 | ||
| 733 | old_p = get_kprobe(p->addr); | 758 | old_p = get_kprobe(p->addr); |
| 734 | if (unlikely(!old_p)) | 759 | if (unlikely(!old_p)) |
| 735 | return -EINVAL; | 760 | return NULL; |
| 736 | 761 | ||
| 737 | if (p != old_p) { | 762 | if (p != old_p) { |
| 738 | list_for_each_entry_rcu(list_p, &old_p->list, list) | 763 | list_for_each_entry_rcu(list_p, &old_p->list, list) |
| 739 | if (list_p == p) | 764 | if (list_p == p) |
| 740 | /* kprobe p is a valid probe */ | 765 | /* kprobe p is a valid probe */ |
| 741 | goto valid_p; | 766 | goto valid; |
| 742 | return -EINVAL; | 767 | return NULL; |
| 743 | } | 768 | } |
| 744 | valid_p: | 769 | valid: |
| 770 | return old_p; | ||
| 771 | } | ||
| 772 | |||
| 773 | /* | ||
| 774 | * Unregister a kprobe without a scheduler synchronization. | ||
| 775 | */ | ||
| 776 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) | ||
| 777 | { | ||
| 778 | struct kprobe *old_p, *list_p; | ||
| 779 | |||
| 780 | old_p = __get_valid_kprobe(p); | ||
| 781 | if (old_p == NULL) | ||
| 782 | return -EINVAL; | ||
| 783 | |||
| 745 | if (old_p == p || | 784 | if (old_p == p || |
| 746 | (old_p->pre_handler == aggr_pre_handler && | 785 | (old_p->pre_handler == aggr_pre_handler && |
| 747 | list_is_singular(&old_p->list))) { | 786 | list_is_singular(&old_p->list))) { |
| @@ -750,7 +789,7 @@ valid_p: | |||
| 750 | * enabled and not gone - otherwise, the breakpoint would | 789 | * enabled and not gone - otherwise, the breakpoint would |
| 751 | * already have been removed. We save on flushing icache. | 790 | * already have been removed. We save on flushing icache. |
| 752 | */ | 791 | */ |
| 753 | if (kprobe_enabled && !kprobe_gone(old_p)) { | 792 | if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) { |
| 754 | mutex_lock(&text_mutex); | 793 | mutex_lock(&text_mutex); |
| 755 | arch_disarm_kprobe(p); | 794 | arch_disarm_kprobe(p); |
| 756 | mutex_unlock(&text_mutex); | 795 | mutex_unlock(&text_mutex); |
| @@ -768,6 +807,11 @@ valid_p: | |||
| 768 | } | 807 | } |
| 769 | noclean: | 808 | noclean: |
| 770 | list_del_rcu(&p->list); | 809 | list_del_rcu(&p->list); |
| 810 | if (!kprobe_disabled(old_p)) { | ||
| 811 | try_to_disable_aggr_kprobe(old_p); | ||
| 812 | if (!kprobes_all_disarmed && kprobe_disabled(old_p)) | ||
| 813 | arch_disarm_kprobe(old_p); | ||
| 814 | } | ||
| 771 | } | 815 | } |
| 772 | return 0; | 816 | return 0; |
| 773 | } | 817 | } |
| @@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num) | |||
| 803 | } | 847 | } |
| 804 | return ret; | 848 | return ret; |
| 805 | } | 849 | } |
| 850 | EXPORT_SYMBOL_GPL(register_kprobes); | ||
| 806 | 851 | ||
| 807 | void __kprobes unregister_kprobe(struct kprobe *p) | 852 | void __kprobes unregister_kprobe(struct kprobe *p) |
| 808 | { | 853 | { |
| 809 | unregister_kprobes(&p, 1); | 854 | unregister_kprobes(&p, 1); |
| 810 | } | 855 | } |
| 856 | EXPORT_SYMBOL_GPL(unregister_kprobe); | ||
| 811 | 857 | ||
| 812 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) | 858 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) |
| 813 | { | 859 | { |
| @@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num) | |||
| 826 | if (kps[i]->addr) | 872 | if (kps[i]->addr) |
| 827 | __unregister_kprobe_bottom(kps[i]); | 873 | __unregister_kprobe_bottom(kps[i]); |
| 828 | } | 874 | } |
| 875 | EXPORT_SYMBOL_GPL(unregister_kprobes); | ||
| 829 | 876 | ||
| 830 | static struct notifier_block kprobe_exceptions_nb = { | 877 | static struct notifier_block kprobe_exceptions_nb = { |
| 831 | .notifier_call = kprobe_exceptions_notify, | 878 | .notifier_call = kprobe_exceptions_notify, |
| @@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) | |||
| 865 | } | 912 | } |
| 866 | return ret; | 913 | return ret; |
| 867 | } | 914 | } |
| 915 | EXPORT_SYMBOL_GPL(register_jprobes); | ||
| 868 | 916 | ||
| 869 | int __kprobes register_jprobe(struct jprobe *jp) | 917 | int __kprobes register_jprobe(struct jprobe *jp) |
| 870 | { | 918 | { |
| 871 | return register_jprobes(&jp, 1); | 919 | return register_jprobes(&jp, 1); |
| 872 | } | 920 | } |
| 921 | EXPORT_SYMBOL_GPL(register_jprobe); | ||
| 873 | 922 | ||
| 874 | void __kprobes unregister_jprobe(struct jprobe *jp) | 923 | void __kprobes unregister_jprobe(struct jprobe *jp) |
| 875 | { | 924 | { |
| 876 | unregister_jprobes(&jp, 1); | 925 | unregister_jprobes(&jp, 1); |
| 877 | } | 926 | } |
| 927 | EXPORT_SYMBOL_GPL(unregister_jprobe); | ||
| 878 | 928 | ||
| 879 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) | 929 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) |
| 880 | { | 930 | { |
| @@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num) | |||
| 894 | __unregister_kprobe_bottom(&jps[i]->kp); | 944 | __unregister_kprobe_bottom(&jps[i]->kp); |
| 895 | } | 945 | } |
| 896 | } | 946 | } |
| 947 | EXPORT_SYMBOL_GPL(unregister_jprobes); | ||
| 897 | 948 | ||
| 898 | #ifdef CONFIG_KRETPROBES | 949 | #ifdef CONFIG_KRETPROBES |
| 899 | /* | 950 | /* |
| @@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
| 987 | free_rp_inst(rp); | 1038 | free_rp_inst(rp); |
| 988 | return ret; | 1039 | return ret; |
| 989 | } | 1040 | } |
| 1041 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
| 990 | 1042 | ||
| 991 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | 1043 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
| 992 | { | 1044 | { |
| @@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num) | |||
| 1004 | } | 1056 | } |
| 1005 | return ret; | 1057 | return ret; |
| 1006 | } | 1058 | } |
| 1059 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
| 1007 | 1060 | ||
| 1008 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 1061 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
| 1009 | { | 1062 | { |
| 1010 | unregister_kretprobes(&rp, 1); | 1063 | unregister_kretprobes(&rp, 1); |
| 1011 | } | 1064 | } |
| 1065 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
| 1012 | 1066 | ||
| 1013 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | 1067 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
| 1014 | { | 1068 | { |
| @@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | |||
| 1030 | } | 1084 | } |
| 1031 | } | 1085 | } |
| 1032 | } | 1086 | } |
| 1087 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
| 1033 | 1088 | ||
| 1034 | #else /* CONFIG_KRETPROBES */ | 1089 | #else /* CONFIG_KRETPROBES */ |
| 1035 | int __kprobes register_kretprobe(struct kretprobe *rp) | 1090 | int __kprobes register_kretprobe(struct kretprobe *rp) |
| 1036 | { | 1091 | { |
| 1037 | return -ENOSYS; | 1092 | return -ENOSYS; |
| 1038 | } | 1093 | } |
| 1094 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
| 1039 | 1095 | ||
| 1040 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | 1096 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
| 1041 | { | 1097 | { |
| 1042 | return -ENOSYS; | 1098 | return -ENOSYS; |
| 1043 | } | 1099 | } |
| 1100 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
| 1101 | |||
| 1044 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 1102 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
| 1045 | { | 1103 | { |
| 1046 | } | 1104 | } |
| 1105 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
| 1047 | 1106 | ||
| 1048 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | 1107 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
| 1049 | { | 1108 | { |
| 1050 | } | 1109 | } |
| 1110 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
| 1051 | 1111 | ||
| 1052 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, | 1112 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, |
| 1053 | struct pt_regs *regs) | 1113 | struct pt_regs *regs) |
| @@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
| 1061 | static void __kprobes kill_kprobe(struct kprobe *p) | 1121 | static void __kprobes kill_kprobe(struct kprobe *p) |
| 1062 | { | 1122 | { |
| 1063 | struct kprobe *kp; | 1123 | struct kprobe *kp; |
| 1124 | |||
| 1064 | p->flags |= KPROBE_FLAG_GONE; | 1125 | p->flags |= KPROBE_FLAG_GONE; |
| 1065 | if (p->pre_handler == aggr_pre_handler) { | 1126 | if (p->pre_handler == aggr_pre_handler) { |
| 1066 | /* | 1127 | /* |
| @@ -1173,8 +1234,8 @@ static int __init init_kprobes(void) | |||
| 1173 | } | 1234 | } |
| 1174 | } | 1235 | } |
| 1175 | 1236 | ||
| 1176 | /* By default, kprobes are enabled */ | 1237 | /* By default, kprobes are armed */ |
| 1177 | kprobe_enabled = true; | 1238 | kprobes_all_disarmed = false; |
| 1178 | 1239 | ||
| 1179 | err = arch_init_kprobes(); | 1240 | err = arch_init_kprobes(); |
| 1180 | if (!err) | 1241 | if (!err) |
| @@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, | |||
| 1202 | else | 1263 | else |
| 1203 | kprobe_type = "k"; | 1264 | kprobe_type = "k"; |
| 1204 | if (sym) | 1265 | if (sym) |
| 1205 | seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, | 1266 | seq_printf(pi, "%p %s %s+0x%x %s %s%s\n", |
| 1206 | sym, offset, (modname ? modname : " "), | 1267 | p->addr, kprobe_type, sym, offset, |
| 1207 | (kprobe_gone(p) ? "[GONE]" : "")); | 1268 | (modname ? modname : " "), |
| 1269 | (kprobe_gone(p) ? "[GONE]" : ""), | ||
| 1270 | ((kprobe_disabled(p) && !kprobe_gone(p)) ? | ||
| 1271 | "[DISABLED]" : "")); | ||
| 1208 | else | 1272 | else |
| 1209 | seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, | 1273 | seq_printf(pi, "%p %s %p %s%s\n", |
| 1210 | (kprobe_gone(p) ? "[GONE]" : "")); | 1274 | p->addr, kprobe_type, p->addr, |
| 1275 | (kprobe_gone(p) ? "[GONE]" : ""), | ||
| 1276 | ((kprobe_disabled(p) && !kprobe_gone(p)) ? | ||
| 1277 | "[DISABLED]" : "")); | ||
| 1211 | } | 1278 | } |
| 1212 | 1279 | ||
| 1213 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) | 1280 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) |
| @@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = { | |||
| 1272 | .release = seq_release, | 1339 | .release = seq_release, |
| 1273 | }; | 1340 | }; |
| 1274 | 1341 | ||
| 1275 | static void __kprobes enable_all_kprobes(void) | 1342 | /* Disable one kprobe */ |
| 1343 | int __kprobes disable_kprobe(struct kprobe *kp) | ||
| 1344 | { | ||
| 1345 | int ret = 0; | ||
| 1346 | struct kprobe *p; | ||
| 1347 | |||
| 1348 | mutex_lock(&kprobe_mutex); | ||
| 1349 | |||
| 1350 | /* Check whether specified probe is valid. */ | ||
| 1351 | p = __get_valid_kprobe(kp); | ||
| 1352 | if (unlikely(p == NULL)) { | ||
| 1353 | ret = -EINVAL; | ||
| 1354 | goto out; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | /* If the probe is already disabled (or gone), just return */ | ||
| 1358 | if (kprobe_disabled(kp)) | ||
| 1359 | goto out; | ||
| 1360 | |||
| 1361 | kp->flags |= KPROBE_FLAG_DISABLED; | ||
| 1362 | if (p != kp) | ||
| 1363 | /* When kp != p, p is always enabled. */ | ||
| 1364 | try_to_disable_aggr_kprobe(p); | ||
| 1365 | |||
| 1366 | if (!kprobes_all_disarmed && kprobe_disabled(p)) | ||
| 1367 | arch_disarm_kprobe(p); | ||
| 1368 | out: | ||
| 1369 | mutex_unlock(&kprobe_mutex); | ||
| 1370 | return ret; | ||
| 1371 | } | ||
| 1372 | EXPORT_SYMBOL_GPL(disable_kprobe); | ||
| 1373 | |||
| 1374 | /* Enable one kprobe */ | ||
| 1375 | int __kprobes enable_kprobe(struct kprobe *kp) | ||
| 1376 | { | ||
| 1377 | int ret = 0; | ||
| 1378 | struct kprobe *p; | ||
| 1379 | |||
| 1380 | mutex_lock(&kprobe_mutex); | ||
| 1381 | |||
| 1382 | /* Check whether specified probe is valid. */ | ||
| 1383 | p = __get_valid_kprobe(kp); | ||
| 1384 | if (unlikely(p == NULL)) { | ||
| 1385 | ret = -EINVAL; | ||
| 1386 | goto out; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | if (kprobe_gone(kp)) { | ||
| 1390 | /* This kprobe has gone, we couldn't enable it. */ | ||
| 1391 | ret = -EINVAL; | ||
| 1392 | goto out; | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | if (!kprobes_all_disarmed && kprobe_disabled(p)) | ||
| 1396 | arch_arm_kprobe(p); | ||
| 1397 | |||
| 1398 | p->flags &= ~KPROBE_FLAG_DISABLED; | ||
| 1399 | if (p != kp) | ||
| 1400 | kp->flags &= ~KPROBE_FLAG_DISABLED; | ||
| 1401 | out: | ||
| 1402 | mutex_unlock(&kprobe_mutex); | ||
| 1403 | return ret; | ||
| 1404 | } | ||
| 1405 | EXPORT_SYMBOL_GPL(enable_kprobe); | ||
| 1406 | |||
| 1407 | static void __kprobes arm_all_kprobes(void) | ||
| 1276 | { | 1408 | { |
| 1277 | struct hlist_head *head; | 1409 | struct hlist_head *head; |
| 1278 | struct hlist_node *node; | 1410 | struct hlist_node *node; |
| @@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void) | |||
| 1281 | 1413 | ||
| 1282 | mutex_lock(&kprobe_mutex); | 1414 | mutex_lock(&kprobe_mutex); |
| 1283 | 1415 | ||
| 1284 | /* If kprobes are already enabled, just return */ | 1416 | /* If kprobes are armed, just return */ |
| 1285 | if (kprobe_enabled) | 1417 | if (!kprobes_all_disarmed) |
| 1286 | goto already_enabled; | 1418 | goto already_enabled; |
| 1287 | 1419 | ||
| 1288 | mutex_lock(&text_mutex); | 1420 | mutex_lock(&text_mutex); |
| 1289 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1421 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| 1290 | head = &kprobe_table[i]; | 1422 | head = &kprobe_table[i]; |
| 1291 | hlist_for_each_entry_rcu(p, node, head, hlist) | 1423 | hlist_for_each_entry_rcu(p, node, head, hlist) |
| 1292 | if (!kprobe_gone(p)) | 1424 | if (!kprobe_disabled(p)) |
| 1293 | arch_arm_kprobe(p); | 1425 | arch_arm_kprobe(p); |
| 1294 | } | 1426 | } |
| 1295 | mutex_unlock(&text_mutex); | 1427 | mutex_unlock(&text_mutex); |
| 1296 | 1428 | ||
| 1297 | kprobe_enabled = true; | 1429 | kprobes_all_disarmed = false; |
| 1298 | printk(KERN_INFO "Kprobes globally enabled\n"); | 1430 | printk(KERN_INFO "Kprobes globally enabled\n"); |
| 1299 | 1431 | ||
| 1300 | already_enabled: | 1432 | already_enabled: |
| @@ -1302,7 +1434,7 @@ already_enabled: | |||
| 1302 | return; | 1434 | return; |
| 1303 | } | 1435 | } |
| 1304 | 1436 | ||
| 1305 | static void __kprobes disable_all_kprobes(void) | 1437 | static void __kprobes disarm_all_kprobes(void) |
| 1306 | { | 1438 | { |
| 1307 | struct hlist_head *head; | 1439 | struct hlist_head *head; |
| 1308 | struct hlist_node *node; | 1440 | struct hlist_node *node; |
| @@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void) | |||
| 1311 | 1443 | ||
| 1312 | mutex_lock(&kprobe_mutex); | 1444 | mutex_lock(&kprobe_mutex); |
| 1313 | 1445 | ||
| 1314 | /* If kprobes are already disabled, just return */ | 1446 | /* If kprobes are already disarmed, just return */ |
| 1315 | if (!kprobe_enabled) | 1447 | if (kprobes_all_disarmed) |
| 1316 | goto already_disabled; | 1448 | goto already_disabled; |
| 1317 | 1449 | ||
| 1318 | kprobe_enabled = false; | 1450 | kprobes_all_disarmed = true; |
| 1319 | printk(KERN_INFO "Kprobes globally disabled\n"); | 1451 | printk(KERN_INFO "Kprobes globally disabled\n"); |
| 1320 | mutex_lock(&text_mutex); | 1452 | mutex_lock(&text_mutex); |
| 1321 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1453 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| 1322 | head = &kprobe_table[i]; | 1454 | head = &kprobe_table[i]; |
| 1323 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 1455 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
| 1324 | if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) | 1456 | if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) |
| 1325 | arch_disarm_kprobe(p); | 1457 | arch_disarm_kprobe(p); |
| 1326 | } | 1458 | } |
| 1327 | } | 1459 | } |
| @@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file, | |||
| 1347 | { | 1479 | { |
| 1348 | char buf[3]; | 1480 | char buf[3]; |
| 1349 | 1481 | ||
| 1350 | if (kprobe_enabled) | 1482 | if (!kprobes_all_disarmed) |
| 1351 | buf[0] = '1'; | 1483 | buf[0] = '1'; |
| 1352 | else | 1484 | else |
| 1353 | buf[0] = '0'; | 1485 | buf[0] = '0'; |
| @@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file, | |||
| 1370 | case 'y': | 1502 | case 'y': |
| 1371 | case 'Y': | 1503 | case 'Y': |
| 1372 | case '1': | 1504 | case '1': |
| 1373 | enable_all_kprobes(); | 1505 | arm_all_kprobes(); |
| 1374 | break; | 1506 | break; |
| 1375 | case 'n': | 1507 | case 'n': |
| 1376 | case 'N': | 1508 | case 'N': |
| 1377 | case '0': | 1509 | case '0': |
| 1378 | disable_all_kprobes(); | 1510 | disarm_all_kprobes(); |
| 1379 | break; | 1511 | break; |
| 1380 | } | 1512 | } |
| 1381 | 1513 | ||
| @@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init); | |||
| 1418 | 1550 | ||
| 1419 | module_init(init_kprobes); | 1551 | module_init(init_kprobes); |
| 1420 | 1552 | ||
| 1421 | EXPORT_SYMBOL_GPL(register_kprobe); | 1553 | /* defined in arch/.../kernel/kprobes.c */ |
| 1422 | EXPORT_SYMBOL_GPL(unregister_kprobe); | ||
| 1423 | EXPORT_SYMBOL_GPL(register_kprobes); | ||
| 1424 | EXPORT_SYMBOL_GPL(unregister_kprobes); | ||
| 1425 | EXPORT_SYMBOL_GPL(register_jprobe); | ||
| 1426 | EXPORT_SYMBOL_GPL(unregister_jprobe); | ||
| 1427 | EXPORT_SYMBOL_GPL(register_jprobes); | ||
| 1428 | EXPORT_SYMBOL_GPL(unregister_jprobes); | ||
| 1429 | EXPORT_SYMBOL_GPL(jprobe_return); | 1554 | EXPORT_SYMBOL_GPL(jprobe_return); |
| 1430 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
| 1431 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
| 1432 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
| 1433 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index 84bbadd4d021..4ebaf8519abf 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
| @@ -76,6 +76,7 @@ static int kthread(void *_create) | |||
| 76 | 76 | ||
| 77 | /* OK, tell user we're spawned, wait for stop or wakeup */ | 77 | /* OK, tell user we're spawned, wait for stop or wakeup */ |
| 78 | __set_current_state(TASK_UNINTERRUPTIBLE); | 78 | __set_current_state(TASK_UNINTERRUPTIBLE); |
| 79 | create->result = current; | ||
| 79 | complete(&create->started); | 80 | complete(&create->started); |
| 80 | schedule(); | 81 | schedule(); |
| 81 | 82 | ||
| @@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create) | |||
| 96 | 97 | ||
| 97 | /* We want our own signal handler (we take no signals by default). */ | 98 | /* We want our own signal handler (we take no signals by default). */ |
| 98 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); | 99 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); |
| 99 | if (pid < 0) { | 100 | if (pid < 0) |
| 100 | create->result = ERR_PTR(pid); | 101 | create->result = ERR_PTR(pid); |
| 101 | } else { | 102 | else |
| 102 | struct sched_param param = { .sched_priority = 0 }; | ||
| 103 | wait_for_completion(&create->started); | 103 | wait_for_completion(&create->started); |
| 104 | read_lock(&tasklist_lock); | ||
| 105 | create->result = find_task_by_pid_ns(pid, &init_pid_ns); | ||
| 106 | read_unlock(&tasklist_lock); | ||
| 107 | /* | ||
| 108 | * root may have changed our (kthreadd's) priority or CPU mask. | ||
| 109 | * The kernel thread should not inherit these properties. | ||
| 110 | */ | ||
| 111 | sched_setscheduler(create->result, SCHED_NORMAL, ¶m); | ||
| 112 | set_user_nice(create->result, KTHREAD_NICE_LEVEL); | ||
| 113 | set_cpus_allowed_ptr(create->result, cpu_all_mask); | ||
| 114 | } | ||
| 115 | complete(&create->done); | 104 | complete(&create->done); |
| 116 | } | 105 | } |
| 117 | 106 | ||
| @@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
| 154 | wait_for_completion(&create.done); | 143 | wait_for_completion(&create.done); |
| 155 | 144 | ||
| 156 | if (!IS_ERR(create.result)) { | 145 | if (!IS_ERR(create.result)) { |
| 146 | struct sched_param param = { .sched_priority = 0 }; | ||
| 157 | va_list args; | 147 | va_list args; |
| 148 | |||
| 158 | va_start(args, namefmt); | 149 | va_start(args, namefmt); |
| 159 | vsnprintf(create.result->comm, sizeof(create.result->comm), | 150 | vsnprintf(create.result->comm, sizeof(create.result->comm), |
| 160 | namefmt, args); | 151 | namefmt, args); |
| 161 | va_end(args); | 152 | va_end(args); |
| 153 | /* | ||
| 154 | * root may have changed our (kthreadd's) priority or CPU mask. | ||
| 155 | * The kernel thread should not inherit these properties. | ||
| 156 | */ | ||
| 157 | sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); | ||
| 158 | set_user_nice(create.result, KTHREAD_NICE_LEVEL); | ||
| 159 | set_cpus_allowed_ptr(create.result, cpu_all_mask); | ||
| 162 | } | 160 | } |
| 163 | return create.result; | 161 | return create.result; |
| 164 | } | 162 | } |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 81b5f33970b8..accb40cdb12a 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -793,6 +793,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
| 793 | 793 | ||
| 794 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); | 794 | printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); |
| 795 | printk("turning off the locking correctness validator.\n"); | 795 | printk("turning off the locking correctness validator.\n"); |
| 796 | dump_stack(); | ||
| 796 | return NULL; | 797 | return NULL; |
| 797 | } | 798 | } |
| 798 | class = lock_classes + nr_lock_classes++; | 799 | class = lock_classes + nr_lock_classes++; |
| @@ -856,6 +857,7 @@ static struct lock_list *alloc_list_entry(void) | |||
| 856 | 857 | ||
| 857 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); | 858 | printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); |
| 858 | printk("turning off the locking correctness validator.\n"); | 859 | printk("turning off the locking correctness validator.\n"); |
| 860 | dump_stack(); | ||
| 859 | return NULL; | 861 | return NULL; |
| 860 | } | 862 | } |
| 861 | return list_entries + nr_list_entries++; | 863 | return list_entries + nr_list_entries++; |
| @@ -1682,6 +1684,7 @@ cache_hit: | |||
| 1682 | 1684 | ||
| 1683 | printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); | 1685 | printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); |
| 1684 | printk("turning off the locking correctness validator.\n"); | 1686 | printk("turning off the locking correctness validator.\n"); |
| 1687 | dump_stack(); | ||
| 1685 | return 0; | 1688 | return 0; |
| 1686 | } | 1689 | } |
| 1687 | chain = lock_chains + nr_lock_chains++; | 1690 | chain = lock_chains + nr_lock_chains++; |
| @@ -2487,13 +2490,20 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
| 2487 | void lockdep_init_map(struct lockdep_map *lock, const char *name, | 2490 | void lockdep_init_map(struct lockdep_map *lock, const char *name, |
| 2488 | struct lock_class_key *key, int subclass) | 2491 | struct lock_class_key *key, int subclass) |
| 2489 | { | 2492 | { |
| 2490 | if (unlikely(!debug_locks)) | 2493 | lock->class_cache = NULL; |
| 2494 | #ifdef CONFIG_LOCK_STAT | ||
| 2495 | lock->cpu = raw_smp_processor_id(); | ||
| 2496 | #endif | ||
| 2497 | |||
| 2498 | if (DEBUG_LOCKS_WARN_ON(!name)) { | ||
| 2499 | lock->name = "NULL"; | ||
| 2491 | return; | 2500 | return; |
| 2501 | } | ||
| 2502 | |||
| 2503 | lock->name = name; | ||
| 2492 | 2504 | ||
| 2493 | if (DEBUG_LOCKS_WARN_ON(!key)) | 2505 | if (DEBUG_LOCKS_WARN_ON(!key)) |
| 2494 | return; | 2506 | return; |
| 2495 | if (DEBUG_LOCKS_WARN_ON(!name)) | ||
| 2496 | return; | ||
| 2497 | /* | 2507 | /* |
| 2498 | * Sanity check, the lock-class key must be persistent: | 2508 | * Sanity check, the lock-class key must be persistent: |
| 2499 | */ | 2509 | */ |
| @@ -2502,12 +2512,11 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
| 2502 | DEBUG_LOCKS_WARN_ON(1); | 2512 | DEBUG_LOCKS_WARN_ON(1); |
| 2503 | return; | 2513 | return; |
| 2504 | } | 2514 | } |
| 2505 | lock->name = name; | ||
| 2506 | lock->key = key; | 2515 | lock->key = key; |
| 2507 | lock->class_cache = NULL; | 2516 | |
| 2508 | #ifdef CONFIG_LOCK_STAT | 2517 | if (unlikely(!debug_locks)) |
| 2509 | lock->cpu = raw_smp_processor_id(); | 2518 | return; |
| 2510 | #endif | 2519 | |
| 2511 | if (subclass) | 2520 | if (subclass) |
| 2512 | register_lock_class(lock, subclass, 1); | 2521 | register_lock_class(lock, subclass, 1); |
| 2513 | } | 2522 | } |
| @@ -2541,6 +2550,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 2541 | debug_locks_off(); | 2550 | debug_locks_off(); |
| 2542 | printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); | 2551 | printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); |
| 2543 | printk("turning off the locking correctness validator.\n"); | 2552 | printk("turning off the locking correctness validator.\n"); |
| 2553 | dump_stack(); | ||
| 2544 | return 0; | 2554 | return 0; |
| 2545 | } | 2555 | } |
| 2546 | 2556 | ||
| @@ -2637,6 +2647,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 2637 | debug_locks_off(); | 2647 | debug_locks_off(); |
| 2638 | printk("BUG: MAX_LOCK_DEPTH too low!\n"); | 2648 | printk("BUG: MAX_LOCK_DEPTH too low!\n"); |
| 2639 | printk("turning off the locking correctness validator.\n"); | 2649 | printk("turning off the locking correctness validator.\n"); |
| 2650 | dump_stack(); | ||
| 2640 | return 0; | 2651 | return 0; |
| 2641 | } | 2652 | } |
| 2642 | 2653 | ||
diff --git a/kernel/module.c b/kernel/module.c index c268a771595c..e797812a4d95 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 1952 | if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) | 1952 | if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) |
| 1953 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1953 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; |
| 1954 | #endif | 1954 | #endif |
| 1955 | /* Don't keep __versions around; it's just for loading. */ | ||
| 1956 | if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) | ||
| 1957 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | ||
| 1958 | } | 1955 | } |
| 1959 | 1956 | ||
| 1960 | modindex = find_sec(hdr, sechdrs, secstrings, | 1957 | modindex = find_sec(hdr, sechdrs, secstrings, |
| @@ -2391,6 +2388,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, | |||
| 2391 | blocking_notifier_call_chain(&module_notify_list, | 2388 | blocking_notifier_call_chain(&module_notify_list, |
| 2392 | MODULE_STATE_LIVE, mod); | 2389 | MODULE_STATE_LIVE, mod); |
| 2393 | 2390 | ||
| 2391 | /* We need to finish all async code before the module init sequence is done */ | ||
| 2392 | async_synchronize_full(); | ||
| 2393 | |||
| 2394 | mutex_lock(&module_mutex); | 2394 | mutex_lock(&module_mutex); |
| 2395 | /* Drop initial reference. */ | 2395 | /* Drop initial reference. */ |
| 2396 | module_put(mod); | 2396 | module_put(mod); |
diff --git a/kernel/mutex.c b/kernel/mutex.c index 5d79781394a3..507cf2b5e9f1 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
| @@ -148,7 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 148 | 148 | ||
| 149 | preempt_disable(); | 149 | preempt_disable(); |
| 150 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | 150 | mutex_acquire(&lock->dep_map, subclass, 0, ip); |
| 151 | #if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) | 151 | #if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \ |
| 152 | !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES) | ||
| 152 | /* | 153 | /* |
| 153 | * Optimistic spinning. | 154 | * Optimistic spinning. |
| 154 | * | 155 | * |
diff --git a/kernel/panic.c b/kernel/panic.c index 3fd8c5bf8b39..3dcaa1661357 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -213,8 +213,16 @@ unsigned long get_taint(void) | |||
| 213 | 213 | ||
| 214 | void add_taint(unsigned flag) | 214 | void add_taint(unsigned flag) |
| 215 | { | 215 | { |
| 216 | /* can't trust the integrity of the kernel anymore: */ | 216 | /* |
| 217 | debug_locks = 0; | 217 | * Can't trust the integrity of the kernel anymore. |
| 218 | * We don't call directly debug_locks_off() because the issue | ||
| 219 | * is not necessarily serious enough to set oops_in_progress to 1 | ||
| 220 | * Also we want to keep up lockdep for staging development and | ||
| 221 | * post-warning case. | ||
| 222 | */ | ||
| 223 | if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off()) | ||
| 224 | printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n"); | ||
| 225 | |||
| 218 | set_bit(flag, &tainted_mask); | 226 | set_bit(flag, &tainted_mask); |
| 219 | } | 227 | } |
| 220 | EXPORT_SYMBOL(add_taint); | 228 | EXPORT_SYMBOL(add_taint); |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8e5d9a68b022..c9dcf98b4463 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
| @@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new) | |||
| 18 | 18 | ||
| 19 | cputime = secs_to_cputime(rlim_new); | 19 | cputime = secs_to_cputime(rlim_new); |
| 20 | if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || | 20 | if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || |
| 21 | cputime_lt(current->signal->it_prof_expires, cputime)) { | 21 | cputime_gt(current->signal->it_prof_expires, cputime)) { |
| 22 | spin_lock_irq(¤t->sighand->siglock); | 22 | spin_lock_irq(¤t->sighand->siglock); |
| 23 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | 23 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); |
| 24 | spin_unlock_irq(¤t->sighand->siglock); | 24 | spin_unlock_irq(¤t->sighand->siglock); |
| @@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
| 224 | cpu->cpu = virt_ticks(p); | 224 | cpu->cpu = virt_ticks(p); |
| 225 | break; | 225 | break; |
| 226 | case CPUCLOCK_SCHED: | 226 | case CPUCLOCK_SCHED: |
| 227 | cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); | 227 | cpu->sched = task_sched_runtime(p); |
| 228 | break; | 228 | break; |
| 229 | } | 229 | } |
| 230 | return 0; | 230 | return 0; |
| @@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock, | |||
| 305 | { | 305 | { |
| 306 | struct task_cputime cputime; | 306 | struct task_cputime cputime; |
| 307 | 307 | ||
| 308 | thread_group_cputime(p, &cputime); | ||
| 309 | switch (CPUCLOCK_WHICH(which_clock)) { | 308 | switch (CPUCLOCK_WHICH(which_clock)) { |
| 310 | default: | 309 | default: |
| 311 | return -EINVAL; | 310 | return -EINVAL; |
| 312 | case CPUCLOCK_PROF: | 311 | case CPUCLOCK_PROF: |
| 312 | thread_group_cputime(p, &cputime); | ||
| 313 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); | 313 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); |
| 314 | break; | 314 | break; |
| 315 | case CPUCLOCK_VIRT: | 315 | case CPUCLOCK_VIRT: |
| 316 | thread_group_cputime(p, &cputime); | ||
| 316 | cpu->cpu = cputime.utime; | 317 | cpu->cpu = cputime.utime; |
| 317 | break; | 318 | break; |
| 318 | case CPUCLOCK_SCHED: | 319 | case CPUCLOCK_SCHED: |
| 319 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); | 320 | cpu->sched = thread_group_sched_runtime(p); |
| 320 | break; | 321 | break; |
| 321 | } | 322 | } |
| 322 | return 0; | 323 | return 0; |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 5f21ab2bbcdf..e71ca9cd81b2 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/console.h> | 22 | #include <linux/console.h> |
| 23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
| 24 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
| 25 | #include <scsi/scsi_scan.h> | ||
| 25 | #include <asm/suspend.h> | 26 | #include <asm/suspend.h> |
| 26 | 27 | ||
| 27 | #include "power.h" | 28 | #include "power.h" |
| @@ -655,32 +656,42 @@ static int software_resume(void) | |||
| 655 | * here to avoid lockdep complaining. | 656 | * here to avoid lockdep complaining. |
| 656 | */ | 657 | */ |
| 657 | mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); | 658 | mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); |
| 659 | |||
| 660 | if (swsusp_resume_device) | ||
| 661 | goto Check_image; | ||
| 662 | |||
| 663 | if (!strlen(resume_file)) { | ||
| 664 | error = -ENOENT; | ||
| 665 | goto Unlock; | ||
| 666 | } | ||
| 667 | |||
| 668 | pr_debug("PM: Checking image partition %s\n", resume_file); | ||
| 669 | |||
| 670 | /* Check if the device is there */ | ||
| 671 | swsusp_resume_device = name_to_dev_t(resume_file); | ||
| 658 | if (!swsusp_resume_device) { | 672 | if (!swsusp_resume_device) { |
| 659 | if (!strlen(resume_file)) { | ||
| 660 | mutex_unlock(&pm_mutex); | ||
| 661 | return -ENOENT; | ||
| 662 | } | ||
| 663 | /* | 673 | /* |
| 664 | * Some device discovery might still be in progress; we need | 674 | * Some device discovery might still be in progress; we need |
| 665 | * to wait for this to finish. | 675 | * to wait for this to finish. |
| 666 | */ | 676 | */ |
| 667 | wait_for_device_probe(); | 677 | wait_for_device_probe(); |
| 678 | /* | ||
| 679 | * We can't depend on SCSI devices being available after loading | ||
| 680 | * one of their modules until scsi_complete_async_scans() is | ||
| 681 | * called and the resume device usually is a SCSI one. | ||
| 682 | */ | ||
| 683 | scsi_complete_async_scans(); | ||
| 684 | |||
| 668 | swsusp_resume_device = name_to_dev_t(resume_file); | 685 | swsusp_resume_device = name_to_dev_t(resume_file); |
| 669 | pr_debug("PM: Resume from partition %s\n", resume_file); | 686 | if (!swsusp_resume_device) { |
| 670 | } else { | 687 | error = -ENODEV; |
| 671 | pr_debug("PM: Resume from partition %d:%d\n", | 688 | goto Unlock; |
| 672 | MAJOR(swsusp_resume_device), | 689 | } |
| 673 | MINOR(swsusp_resume_device)); | ||
| 674 | } | 690 | } |
| 675 | 691 | ||
| 676 | if (noresume) { | 692 | Check_image: |
| 677 | /** | 693 | pr_debug("PM: Resume from partition %d:%d\n", |
| 678 | * FIXME: If noresume is specified, we need to find the | 694 | MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device)); |
| 679 | * partition and reset it back to normal swap space. | ||
| 680 | */ | ||
| 681 | mutex_unlock(&pm_mutex); | ||
| 682 | return 0; | ||
| 683 | } | ||
| 684 | 695 | ||
| 685 | pr_debug("PM: Checking hibernation image.\n"); | 696 | pr_debug("PM: Checking hibernation image.\n"); |
| 686 | error = swsusp_check(); | 697 | error = swsusp_check(); |
diff --git a/kernel/power/main.c b/kernel/power/main.c index f172f41858bb..f99ed6a75eac 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
| @@ -291,20 +291,26 @@ static int suspend_enter(suspend_state_t state) | |||
| 291 | 291 | ||
| 292 | device_pm_lock(); | 292 | device_pm_lock(); |
| 293 | 293 | ||
| 294 | if (suspend_ops->prepare) { | ||
| 295 | error = suspend_ops->prepare(); | ||
| 296 | if (error) | ||
| 297 | goto Done; | ||
| 298 | } | ||
| 299 | |||
| 294 | error = device_power_down(PMSG_SUSPEND); | 300 | error = device_power_down(PMSG_SUSPEND); |
| 295 | if (error) { | 301 | if (error) { |
| 296 | printk(KERN_ERR "PM: Some devices failed to power down\n"); | 302 | printk(KERN_ERR "PM: Some devices failed to power down\n"); |
| 297 | goto Done; | 303 | goto Platfrom_finish; |
| 298 | } | 304 | } |
| 299 | 305 | ||
| 300 | if (suspend_ops->prepare) { | 306 | if (suspend_ops->prepare_late) { |
| 301 | error = suspend_ops->prepare(); | 307 | error = suspend_ops->prepare_late(); |
| 302 | if (error) | 308 | if (error) |
| 303 | goto Power_up_devices; | 309 | goto Power_up_devices; |
| 304 | } | 310 | } |
| 305 | 311 | ||
| 306 | if (suspend_test(TEST_PLATFORM)) | 312 | if (suspend_test(TEST_PLATFORM)) |
| 307 | goto Platfrom_finish; | 313 | goto Platform_wake; |
| 308 | 314 | ||
| 309 | error = disable_nonboot_cpus(); | 315 | error = disable_nonboot_cpus(); |
| 310 | if (error || suspend_test(TEST_CPUS)) | 316 | if (error || suspend_test(TEST_CPUS)) |
| @@ -326,13 +332,17 @@ static int suspend_enter(suspend_state_t state) | |||
| 326 | Enable_cpus: | 332 | Enable_cpus: |
| 327 | enable_nonboot_cpus(); | 333 | enable_nonboot_cpus(); |
| 328 | 334 | ||
| 329 | Platfrom_finish: | 335 | Platform_wake: |
| 330 | if (suspend_ops->finish) | 336 | if (suspend_ops->wake) |
| 331 | suspend_ops->finish(); | 337 | suspend_ops->wake(); |
| 332 | 338 | ||
| 333 | Power_up_devices: | 339 | Power_up_devices: |
| 334 | device_power_up(PMSG_RESUME); | 340 | device_power_up(PMSG_RESUME); |
| 335 | 341 | ||
| 342 | Platfrom_finish: | ||
| 343 | if (suspend_ops->finish) | ||
| 344 | suspend_ops->finish(); | ||
| 345 | |||
| 336 | Done: | 346 | Done: |
| 337 | device_pm_unlock(); | 347 | device_pm_unlock(); |
| 338 | 348 | ||
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 505f319e489c..8ba052c86d48 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
| @@ -64,8 +64,6 @@ static int submit(int rw, pgoff_t page_off, struct page *page, | |||
| 64 | struct bio *bio; | 64 | struct bio *bio; |
| 65 | 65 | ||
| 66 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); | 66 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); |
| 67 | if (!bio) | ||
| 68 | return -ENOMEM; | ||
| 69 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); | 67 | bio->bi_sector = page_off * (PAGE_SIZE >> 9); |
| 70 | bio->bi_bdev = resume_bdev; | 68 | bio->bi_bdev = resume_bdev; |
| 71 | bio->bi_end_io = end_swap_bio_read; | 69 | bio->bi_end_io = end_swap_bio_read; |
diff --git a/kernel/power/user.c b/kernel/power/user.c index 6c85359364f2..ed97375daae9 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
| 25 | #include <linux/freezer.h> | 25 | #include <linux/freezer.h> |
| 26 | #include <linux/smp_lock.h> | 26 | #include <linux/smp_lock.h> |
| 27 | #include <scsi/scsi_scan.h> | ||
| 27 | 28 | ||
| 28 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
| 29 | 30 | ||
| @@ -92,6 +93,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
| 92 | filp->private_data = data; | 93 | filp->private_data = data; |
| 93 | memset(&data->handle, 0, sizeof(struct snapshot_handle)); | 94 | memset(&data->handle, 0, sizeof(struct snapshot_handle)); |
| 94 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { | 95 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { |
| 96 | /* Hibernating. The image device should be accessible. */ | ||
| 95 | data->swap = swsusp_resume_device ? | 97 | data->swap = swsusp_resume_device ? |
| 96 | swap_type_of(swsusp_resume_device, 0, NULL) : -1; | 98 | swap_type_of(swsusp_resume_device, 0, NULL) : -1; |
| 97 | data->mode = O_RDONLY; | 99 | data->mode = O_RDONLY; |
| @@ -99,6 +101,13 @@ static int snapshot_open(struct inode *inode, struct file *filp) | |||
| 99 | if (error) | 101 | if (error) |
| 100 | pm_notifier_call_chain(PM_POST_HIBERNATION); | 102 | pm_notifier_call_chain(PM_POST_HIBERNATION); |
| 101 | } else { | 103 | } else { |
| 104 | /* | ||
| 105 | * Resuming. We may need to wait for the image device to | ||
| 106 | * appear. | ||
| 107 | */ | ||
| 108 | wait_for_device_probe(); | ||
| 109 | scsi_complete_async_scans(); | ||
| 110 | |||
| 102 | data->swap = -1; | 111 | data->swap = -1; |
| 103 | data->mode = O_WRONLY; | 112 | data->mode = O_WRONLY; |
| 104 | error = pm_notifier_call_chain(PM_RESTORE_PREPARE); | 113 | error = pm_notifier_call_chain(PM_RESTORE_PREPARE); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aaad0ec34194..0692ab5a0d67 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -21,9 +21,7 @@ | |||
| 21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
| 22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
| 23 | #include <linux/syscalls.h> | 23 | #include <linux/syscalls.h> |
| 24 | 24 | #include <linux/uaccess.h> | |
| 25 | #include <asm/pgtable.h> | ||
| 26 | #include <asm/uaccess.h> | ||
| 27 | 25 | ||
| 28 | 26 | ||
| 29 | /* | 27 | /* |
| @@ -48,7 +46,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) | |||
| 48 | list_add(&child->ptrace_entry, &new_parent->ptraced); | 46 | list_add(&child->ptrace_entry, &new_parent->ptraced); |
| 49 | child->parent = new_parent; | 47 | child->parent = new_parent; |
| 50 | } | 48 | } |
| 51 | 49 | ||
| 52 | /* | 50 | /* |
| 53 | * Turn a tracing stop into a normal stop now, since with no tracer there | 51 | * Turn a tracing stop into a normal stop now, since with no tracer there |
| 54 | * would be no way to wake it up with SIGCONT or SIGKILL. If there was a | 52 | * would be no way to wake it up with SIGCONT or SIGKILL. If there was a |
| @@ -173,7 +171,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
| 173 | task_lock(task); | 171 | task_lock(task); |
| 174 | err = __ptrace_may_access(task, mode); | 172 | err = __ptrace_may_access(task, mode); |
| 175 | task_unlock(task); | 173 | task_unlock(task); |
| 176 | return (!err ? true : false); | 174 | return !err; |
| 177 | } | 175 | } |
| 178 | 176 | ||
| 179 | int ptrace_attach(struct task_struct *task) | 177 | int ptrace_attach(struct task_struct *task) |
| @@ -190,7 +188,7 @@ int ptrace_attach(struct task_struct *task) | |||
| 190 | /* Protect exec's credential calculations against our interference; | 188 | /* Protect exec's credential calculations against our interference; |
| 191 | * SUID, SGID and LSM creds get determined differently under ptrace. | 189 | * SUID, SGID and LSM creds get determined differently under ptrace. |
| 192 | */ | 190 | */ |
| 193 | retval = mutex_lock_interruptible(¤t->cred_exec_mutex); | 191 | retval = mutex_lock_interruptible(&task->cred_exec_mutex); |
| 194 | if (retval < 0) | 192 | if (retval < 0) |
| 195 | goto out; | 193 | goto out; |
| 196 | 194 | ||
| @@ -234,7 +232,7 @@ repeat: | |||
| 234 | bad: | 232 | bad: |
| 235 | write_unlock_irqrestore(&tasklist_lock, flags); | 233 | write_unlock_irqrestore(&tasklist_lock, flags); |
| 236 | task_unlock(task); | 234 | task_unlock(task); |
| 237 | mutex_unlock(¤t->cred_exec_mutex); | 235 | mutex_unlock(&task->cred_exec_mutex); |
| 238 | out: | 236 | out: |
| 239 | return retval; | 237 | return retval; |
| 240 | } | 238 | } |
| @@ -358,7 +356,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst | |||
| 358 | copied += retval; | 356 | copied += retval; |
| 359 | src += retval; | 357 | src += retval; |
| 360 | dst += retval; | 358 | dst += retval; |
| 361 | len -= retval; | 359 | len -= retval; |
| 362 | } | 360 | } |
| 363 | return copied; | 361 | return copied; |
| 364 | } | 362 | } |
| @@ -383,7 +381,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
| 383 | copied += retval; | 381 | copied += retval; |
| 384 | src += retval; | 382 | src += retval; |
| 385 | dst += retval; | 383 | dst += retval; |
| 386 | len -= retval; | 384 | len -= retval; |
| 387 | } | 385 | } |
| 388 | return copied; | 386 | return copied; |
| 389 | } | 387 | } |
| @@ -496,9 +494,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data) | |||
| 496 | if (unlikely(!arch_has_single_step())) | 494 | if (unlikely(!arch_has_single_step())) |
| 497 | return -EIO; | 495 | return -EIO; |
| 498 | user_enable_single_step(child); | 496 | user_enable_single_step(child); |
| 499 | } | 497 | } else { |
| 500 | else | ||
| 501 | user_disable_single_step(child); | 498 | user_disable_single_step(child); |
| 499 | } | ||
| 502 | 500 | ||
| 503 | child->exit_code = data; | 501 | child->exit_code = data; |
| 504 | wake_up_process(child); | 502 | wake_up_process(child); |
| @@ -606,10 +604,11 @@ repeat: | |||
| 606 | ret = security_ptrace_traceme(current->parent); | 604 | ret = security_ptrace_traceme(current->parent); |
| 607 | 605 | ||
| 608 | /* | 606 | /* |
| 609 | * Set the ptrace bit in the process ptrace flags. | 607 | * Check PF_EXITING to ensure ->real_parent has not passed |
| 610 | * Then link us on our parent's ptraced list. | 608 | * exit_ptrace(). Otherwise we don't report the error but |
| 609 | * pretend ->real_parent untraces us right after return. | ||
| 611 | */ | 610 | */ |
| 612 | if (!ret) { | 611 | if (!ret && !(current->real_parent->flags & PF_EXITING)) { |
| 613 | current->ptrace |= PT_PTRACED; | 612 | current->ptrace |= PT_PTRACED; |
| 614 | __ptrace_link(current, current->real_parent); | 613 | __ptrace_link(current, current->real_parent); |
| 615 | } | 614 | } |
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index 654c640a6b9c..0f2b0b311304 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c | |||
| @@ -65,6 +65,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = { | |||
| 65 | .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), | 65 | .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), |
| 66 | .cpumask = CPU_BITS_NONE, | 66 | .cpumask = CPU_BITS_NONE, |
| 67 | }; | 67 | }; |
| 68 | |||
| 68 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | 69 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { |
| 69 | .cur = -300, | 70 | .cur = -300, |
| 70 | .completed = -300, | 71 | .completed = -300, |
| @@ -73,8 +74,26 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { | |||
| 73 | .cpumask = CPU_BITS_NONE, | 74 | .cpumask = CPU_BITS_NONE, |
| 74 | }; | 75 | }; |
| 75 | 76 | ||
| 76 | DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; | 77 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); |
| 77 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; | 78 | static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
| 79 | |||
| 80 | /* | ||
| 81 | * Increment the quiescent state counter. | ||
| 82 | * The counter is a bit degenerated: We do not need to know | ||
| 83 | * how many quiescent states passed, just if there was at least | ||
| 84 | * one since the start of the grace period. Thus just a flag. | ||
| 85 | */ | ||
| 86 | void rcu_qsctr_inc(int cpu) | ||
| 87 | { | ||
| 88 | struct rcu_data *rdp = &per_cpu(rcu_data, cpu); | ||
| 89 | rdp->passed_quiesc = 1; | ||
| 90 | } | ||
| 91 | |||
| 92 | void rcu_bh_qsctr_inc(int cpu) | ||
| 93 | { | ||
| 94 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | ||
| 95 | rdp->passed_quiesc = 1; | ||
| 96 | } | ||
| 78 | 97 | ||
| 79 | static int blimit = 10; | 98 | static int blimit = 10; |
| 80 | static int qhimark = 10000; | 99 | static int qhimark = 10000; |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2c7b8457d0d2..a967c9feb90a 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -58,6 +58,10 @@ static DEFINE_MUTEX(rcu_barrier_mutex); | |||
| 58 | static struct completion rcu_barrier_completion; | 58 | static struct completion rcu_barrier_completion; |
| 59 | int rcu_scheduler_active __read_mostly; | 59 | int rcu_scheduler_active __read_mostly; |
| 60 | 60 | ||
| 61 | static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0); | ||
| 62 | static struct rcu_head rcu_migrate_head[3]; | ||
| 63 | static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq); | ||
| 64 | |||
| 61 | /* | 65 | /* |
| 62 | * Awaken the corresponding synchronize_rcu() instance now that a | 66 | * Awaken the corresponding synchronize_rcu() instance now that a |
| 63 | * grace period has elapsed. | 67 | * grace period has elapsed. |
| @@ -122,7 +126,10 @@ static void rcu_barrier_func(void *type) | |||
| 122 | } | 126 | } |
| 123 | } | 127 | } |
| 124 | 128 | ||
| 125 | static inline void wait_migrated_callbacks(void); | 129 | static inline void wait_migrated_callbacks(void) |
| 130 | { | ||
| 131 | wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); | ||
| 132 | } | ||
| 126 | 133 | ||
| 127 | /* | 134 | /* |
| 128 | * Orchestrate the specified type of RCU barrier, waiting for all | 135 | * Orchestrate the specified type of RCU barrier, waiting for all |
| @@ -179,21 +186,12 @@ void rcu_barrier_sched(void) | |||
| 179 | } | 186 | } |
| 180 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 187 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
| 181 | 188 | ||
| 182 | static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0); | ||
| 183 | static struct rcu_head rcu_migrate_head[3]; | ||
| 184 | static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq); | ||
| 185 | |||
| 186 | static void rcu_migrate_callback(struct rcu_head *notused) | 189 | static void rcu_migrate_callback(struct rcu_head *notused) |
| 187 | { | 190 | { |
| 188 | if (atomic_dec_and_test(&rcu_migrate_type_count)) | 191 | if (atomic_dec_and_test(&rcu_migrate_type_count)) |
| 189 | wake_up(&rcu_migrate_wq); | 192 | wake_up(&rcu_migrate_wq); |
| 190 | } | 193 | } |
| 191 | 194 | ||
| 192 | static inline void wait_migrated_callbacks(void) | ||
| 193 | { | ||
| 194 | wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); | ||
| 195 | } | ||
| 196 | |||
| 197 | static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, | 195 | static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, |
| 198 | unsigned long action, void *hcpu) | 196 | unsigned long action, void *hcpu) |
| 199 | { | 197 | { |
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 5d59e850fb71..ce97a4df64d3 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
| @@ -147,7 +147,51 @@ struct rcu_ctrlblk { | |||
| 147 | wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ | 147 | wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ |
| 148 | }; | 148 | }; |
| 149 | 149 | ||
| 150 | struct rcu_dyntick_sched { | ||
| 151 | int dynticks; | ||
| 152 | int dynticks_snap; | ||
| 153 | int sched_qs; | ||
| 154 | int sched_qs_snap; | ||
| 155 | int sched_dynticks_snap; | ||
| 156 | }; | ||
| 157 | |||
| 158 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { | ||
| 159 | .dynticks = 1, | ||
| 160 | }; | ||
| 161 | |||
| 162 | void rcu_qsctr_inc(int cpu) | ||
| 163 | { | ||
| 164 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
| 165 | |||
| 166 | rdssp->sched_qs++; | ||
| 167 | } | ||
| 168 | |||
| 169 | #ifdef CONFIG_NO_HZ | ||
| 170 | |||
| 171 | void rcu_enter_nohz(void) | ||
| 172 | { | ||
| 173 | static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); | ||
| 174 | |||
| 175 | smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ | ||
| 176 | __get_cpu_var(rcu_dyntick_sched).dynticks++; | ||
| 177 | WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs); | ||
| 178 | } | ||
| 179 | |||
| 180 | void rcu_exit_nohz(void) | ||
| 181 | { | ||
| 182 | static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1); | ||
| 183 | |||
| 184 | __get_cpu_var(rcu_dyntick_sched).dynticks++; | ||
| 185 | smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ | ||
| 186 | WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1), | ||
| 187 | &rs); | ||
| 188 | } | ||
| 189 | |||
| 190 | #endif /* CONFIG_NO_HZ */ | ||
| 191 | |||
| 192 | |||
| 150 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); | 193 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); |
| 194 | |||
| 151 | static struct rcu_ctrlblk rcu_ctrlblk = { | 195 | static struct rcu_ctrlblk rcu_ctrlblk = { |
| 152 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), | 196 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), |
| 153 | .completed = 0, | 197 | .completed = 0, |
| @@ -427,10 +471,6 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) | |||
| 427 | } | 471 | } |
| 428 | } | 472 | } |
| 429 | 473 | ||
| 430 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { | ||
| 431 | .dynticks = 1, | ||
| 432 | }; | ||
| 433 | |||
| 434 | #ifdef CONFIG_NO_HZ | 474 | #ifdef CONFIG_NO_HZ |
| 435 | static DEFINE_PER_CPU(int, rcu_update_flag); | 475 | static DEFINE_PER_CPU(int, rcu_update_flag); |
| 436 | 476 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 97ce31579ec0..d2a372fb0b9b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -78,6 +78,26 @@ DEFINE_PER_CPU(struct rcu_data, rcu_data); | |||
| 78 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | 78 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); |
| 79 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 79 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
| 80 | 80 | ||
| 81 | /* | ||
| 82 | * Increment the quiescent state counter. | ||
| 83 | * The counter is a bit degenerated: We do not need to know | ||
| 84 | * how many quiescent states passed, just if there was at least | ||
| 85 | * one since the start of the grace period. Thus just a flag. | ||
| 86 | */ | ||
| 87 | void rcu_qsctr_inc(int cpu) | ||
| 88 | { | ||
| 89 | struct rcu_data *rdp = &per_cpu(rcu_data, cpu); | ||
| 90 | rdp->passed_quiesc = 1; | ||
| 91 | rdp->passed_quiesc_completed = rdp->completed; | ||
| 92 | } | ||
| 93 | |||
| 94 | void rcu_bh_qsctr_inc(int cpu) | ||
| 95 | { | ||
| 96 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); | ||
| 97 | rdp->passed_quiesc = 1; | ||
| 98 | rdp->passed_quiesc_completed = rdp->completed; | ||
| 99 | } | ||
| 100 | |||
| 81 | #ifdef CONFIG_NO_HZ | 101 | #ifdef CONFIG_NO_HZ |
| 82 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 102 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
| 83 | .dynticks_nesting = 1, | 103 | .dynticks_nesting = 1, |
| @@ -510,8 +530,6 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 510 | rdp->qs_pending = 1; | 530 | rdp->qs_pending = 1; |
| 511 | rdp->passed_quiesc = 0; | 531 | rdp->passed_quiesc = 0; |
| 512 | rdp->gpnum = rsp->gpnum; | 532 | rdp->gpnum = rsp->gpnum; |
| 513 | rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + | ||
| 514 | RCU_JIFFIES_TILL_FORCE_QS; | ||
| 515 | } | 533 | } |
| 516 | 534 | ||
| 517 | /* | 535 | /* |
| @@ -558,8 +576,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
| 558 | rsp->gpnum++; | 576 | rsp->gpnum++; |
| 559 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ | 577 | rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ |
| 560 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 578 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
| 561 | rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + | ||
| 562 | RCU_JIFFIES_TILL_FORCE_QS; | ||
| 563 | record_gp_stall_check_time(rsp); | 579 | record_gp_stall_check_time(rsp); |
| 564 | dyntick_record_completed(rsp, rsp->completed - 1); | 580 | dyntick_record_completed(rsp, rsp->completed - 1); |
| 565 | note_new_gpnum(rsp, rdp); | 581 | note_new_gpnum(rsp, rdp); |
| @@ -1035,7 +1051,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
| 1035 | { | 1051 | { |
| 1036 | unsigned long flags; | 1052 | unsigned long flags; |
| 1037 | long lastcomp; | 1053 | long lastcomp; |
| 1038 | struct rcu_data *rdp = rsp->rda[smp_processor_id()]; | ||
| 1039 | struct rcu_node *rnp = rcu_get_root(rsp); | 1054 | struct rcu_node *rnp = rcu_get_root(rsp); |
| 1040 | u8 signaled; | 1055 | u8 signaled; |
| 1041 | 1056 | ||
| @@ -1046,16 +1061,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
| 1046 | return; /* Someone else is already on the job. */ | 1061 | return; /* Someone else is already on the job. */ |
| 1047 | } | 1062 | } |
| 1048 | if (relaxed && | 1063 | if (relaxed && |
| 1049 | (long)(rsp->jiffies_force_qs - jiffies) >= 0 && | 1064 | (long)(rsp->jiffies_force_qs - jiffies) >= 0) |
| 1050 | (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0) | ||
| 1051 | goto unlock_ret; /* no emergency and done recently. */ | 1065 | goto unlock_ret; /* no emergency and done recently. */ |
| 1052 | rsp->n_force_qs++; | 1066 | rsp->n_force_qs++; |
| 1053 | spin_lock(&rnp->lock); | 1067 | spin_lock(&rnp->lock); |
| 1054 | lastcomp = rsp->completed; | 1068 | lastcomp = rsp->completed; |
| 1055 | signaled = rsp->signaled; | 1069 | signaled = rsp->signaled; |
| 1056 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; | 1070 | rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; |
| 1057 | rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + | ||
| 1058 | RCU_JIFFIES_TILL_FORCE_QS; | ||
| 1059 | if (lastcomp == rsp->gpnum) { | 1071 | if (lastcomp == rsp->gpnum) { |
| 1060 | rsp->n_force_qs_ngp++; | 1072 | rsp->n_force_qs_ngp++; |
| 1061 | spin_unlock(&rnp->lock); | 1073 | spin_unlock(&rnp->lock); |
| @@ -1124,8 +1136,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1124 | * If an RCU GP has gone long enough, go check for dyntick | 1136 | * If an RCU GP has gone long enough, go check for dyntick |
| 1125 | * idle CPUs and, if needed, send resched IPIs. | 1137 | * idle CPUs and, if needed, send resched IPIs. |
| 1126 | */ | 1138 | */ |
| 1127 | if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || | 1139 | if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) |
| 1128 | (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) | ||
| 1129 | force_quiescent_state(rsp, 1); | 1140 | force_quiescent_state(rsp, 1); |
| 1130 | 1141 | ||
| 1131 | /* | 1142 | /* |
| @@ -1210,8 +1221,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1210 | if (unlikely(++rdp->qlen > qhimark)) { | 1221 | if (unlikely(++rdp->qlen > qhimark)) { |
| 1211 | rdp->blimit = LONG_MAX; | 1222 | rdp->blimit = LONG_MAX; |
| 1212 | force_quiescent_state(rsp, 0); | 1223 | force_quiescent_state(rsp, 0); |
| 1213 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || | 1224 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) |
| 1214 | (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) | ||
| 1215 | force_quiescent_state(rsp, 1); | 1225 | force_quiescent_state(rsp, 1); |
| 1216 | local_irq_restore(flags); | 1226 | local_irq_restore(flags); |
| 1217 | } | 1227 | } |
| @@ -1270,8 +1280,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1270 | 1280 | ||
| 1271 | /* Has an RCU GP gone long enough to send resched IPIs &c? */ | 1281 | /* Has an RCU GP gone long enough to send resched IPIs &c? */ |
| 1272 | if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && | 1282 | if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && |
| 1273 | ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || | 1283 | ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) |
| 1274 | (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)) | ||
| 1275 | return 1; | 1284 | return 1; |
| 1276 | 1285 | ||
| 1277 | /* nothing to do */ | 1286 | /* nothing to do */ |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h new file mode 100644 index 000000000000..5e872bbf07f5 --- /dev/null +++ b/kernel/rcutree.h | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | |||
| 2 | /* | ||
| 3 | * RCU implementation internal declarations: | ||
| 4 | */ | ||
| 5 | extern struct rcu_state rcu_state; | ||
| 6 | DECLARE_PER_CPU(struct rcu_data, rcu_data); | ||
| 7 | |||
| 8 | extern struct rcu_state rcu_bh_state; | ||
| 9 | DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); | ||
| 10 | |||
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d6db3e837826..4b1875ba9404 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
| @@ -43,18 +43,18 @@ | |||
| 43 | #include <linux/debugfs.h> | 43 | #include <linux/debugfs.h> |
| 44 | #include <linux/seq_file.h> | 44 | #include <linux/seq_file.h> |
| 45 | 45 | ||
| 46 | #include "rcutree.h" | ||
| 47 | |||
| 46 | static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | 48 | static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) |
| 47 | { | 49 | { |
| 48 | if (!rdp->beenonline) | 50 | if (!rdp->beenonline) |
| 49 | return; | 51 | return; |
| 50 | seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x", | 52 | seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d", |
| 51 | rdp->cpu, | 53 | rdp->cpu, |
| 52 | cpu_is_offline(rdp->cpu) ? '!' : ' ', | 54 | cpu_is_offline(rdp->cpu) ? '!' : ' ', |
| 53 | rdp->completed, rdp->gpnum, | 55 | rdp->completed, rdp->gpnum, |
| 54 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 56 | rdp->passed_quiesc, rdp->passed_quiesc_completed, |
| 55 | rdp->qs_pending, | 57 | rdp->qs_pending); |
| 56 | rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, | ||
| 57 | (int)(rdp->n_rcu_pending & 0xffff)); | ||
| 58 | #ifdef CONFIG_NO_HZ | 58 | #ifdef CONFIG_NO_HZ |
| 59 | seq_printf(m, " dt=%d/%d dn=%d df=%lu", | 59 | seq_printf(m, " dt=%d/%d dn=%d df=%lu", |
| 60 | rdp->dynticks->dynticks, | 60 | rdp->dynticks->dynticks, |
| @@ -100,14 +100,12 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
| 100 | { | 100 | { |
| 101 | if (!rdp->beenonline) | 101 | if (!rdp->beenonline) |
| 102 | return; | 102 | return; |
| 103 | seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld", | 103 | seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d", |
| 104 | rdp->cpu, | 104 | rdp->cpu, |
| 105 | cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", | 105 | cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", |
| 106 | rdp->completed, rdp->gpnum, | 106 | rdp->completed, rdp->gpnum, |
| 107 | rdp->passed_quiesc, rdp->passed_quiesc_completed, | 107 | rdp->passed_quiesc, rdp->passed_quiesc_completed, |
| 108 | rdp->qs_pending, | 108 | rdp->qs_pending); |
| 109 | rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, | ||
| 110 | rdp->n_rcu_pending); | ||
| 111 | #ifdef CONFIG_NO_HZ | 109 | #ifdef CONFIG_NO_HZ |
| 112 | seq_printf(m, ",%d,%d,%d,%lu", | 110 | seq_printf(m, ",%d,%d,%d,%lu", |
| 113 | rdp->dynticks->dynticks, | 111 | rdp->dynticks->dynticks, |
| @@ -121,7 +119,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) | |||
| 121 | 119 | ||
| 122 | static int show_rcudata_csv(struct seq_file *m, void *unused) | 120 | static int show_rcudata_csv(struct seq_file *m, void *unused) |
| 123 | { | 121 | { |
| 124 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\","); | 122 | seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); |
| 125 | #ifdef CONFIG_NO_HZ | 123 | #ifdef CONFIG_NO_HZ |
| 126 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); | 124 | seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); |
| 127 | #endif /* #ifdef CONFIG_NO_HZ */ | 125 | #endif /* #ifdef CONFIG_NO_HZ */ |
diff --git a/kernel/resource.c b/kernel/resource.c index fd5d7d574bb9..ac5f3a36923f 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -533,43 +533,21 @@ static void __init __reserve_region_with_split(struct resource *root, | |||
| 533 | res->end = end; | 533 | res->end = end; |
| 534 | res->flags = IORESOURCE_BUSY; | 534 | res->flags = IORESOURCE_BUSY; |
| 535 | 535 | ||
| 536 | for (;;) { | 536 | conflict = __request_resource(parent, res); |
| 537 | conflict = __request_resource(parent, res); | 537 | if (!conflict) |
| 538 | if (!conflict) | 538 | return; |
| 539 | break; | ||
| 540 | if (conflict != parent) { | ||
| 541 | parent = conflict; | ||
| 542 | if (!(conflict->flags & IORESOURCE_BUSY)) | ||
| 543 | continue; | ||
| 544 | } | ||
| 545 | |||
| 546 | /* Uhhuh, that didn't work out.. */ | ||
| 547 | kfree(res); | ||
| 548 | res = NULL; | ||
| 549 | break; | ||
| 550 | } | ||
| 551 | |||
| 552 | if (!res) { | ||
| 553 | /* failed, split and try again */ | ||
| 554 | |||
| 555 | /* conflict covered whole area */ | ||
| 556 | if (conflict->start <= start && conflict->end >= end) | ||
| 557 | return; | ||
| 558 | 539 | ||
| 559 | if (conflict->start > start) | 540 | /* failed, split and try again */ |
| 560 | __reserve_region_with_split(root, start, conflict->start-1, name); | 541 | kfree(res); |
| 561 | if (!(conflict->flags & IORESOURCE_BUSY)) { | ||
| 562 | resource_size_t common_start, common_end; | ||
| 563 | 542 | ||
| 564 | common_start = max(conflict->start, start); | 543 | /* conflict covered whole area */ |
| 565 | common_end = min(conflict->end, end); | 544 | if (conflict->start <= start && conflict->end >= end) |
| 566 | if (common_start < common_end) | 545 | return; |
| 567 | __reserve_region_with_split(root, common_start, common_end, name); | ||
| 568 | } | ||
| 569 | if (conflict->end < end) | ||
| 570 | __reserve_region_with_split(root, conflict->end+1, end, name); | ||
| 571 | } | ||
| 572 | 546 | ||
| 547 | if (conflict->start > start) | ||
| 548 | __reserve_region_with_split(root, start, conflict->start-1, name); | ||
| 549 | if (conflict->end < end) | ||
| 550 | __reserve_region_with_split(root, conflict->end+1, end, name); | ||
| 573 | } | 551 | } |
| 574 | 552 | ||
| 575 | void __init reserve_region_with_split(struct resource *root, | 553 | void __init reserve_region_with_split(struct resource *root, |
diff --git a/kernel/sched.c b/kernel/sched.c index bec249885e17..b902e587a3a0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
| 231 | 231 | ||
| 232 | spin_lock(&rt_b->rt_runtime_lock); | 232 | spin_lock(&rt_b->rt_runtime_lock); |
| 233 | for (;;) { | 233 | for (;;) { |
| 234 | unsigned long delta; | ||
| 235 | ktime_t soft, hard; | ||
| 236 | |||
| 234 | if (hrtimer_active(&rt_b->rt_period_timer)) | 237 | if (hrtimer_active(&rt_b->rt_period_timer)) |
| 235 | break; | 238 | break; |
| 236 | 239 | ||
| 237 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); | 240 | now = hrtimer_cb_get_time(&rt_b->rt_period_timer); |
| 238 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); | 241 | hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); |
| 239 | hrtimer_start_expires(&rt_b->rt_period_timer, | 242 | |
| 240 | HRTIMER_MODE_ABS); | 243 | soft = hrtimer_get_softexpires(&rt_b->rt_period_timer); |
| 244 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); | ||
| 245 | delta = ktime_to_ns(ktime_sub(hard, soft)); | ||
| 246 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | ||
| 247 | HRTIMER_MODE_ABS, 0); | ||
| 241 | } | 248 | } |
| 242 | spin_unlock(&rt_b->rt_runtime_lock); | 249 | spin_unlock(&rt_b->rt_runtime_lock); |
| 243 | } | 250 | } |
| @@ -1146,7 +1153,8 @@ static __init void init_hrtick(void) | |||
| 1146 | */ | 1153 | */ |
| 1147 | static void hrtick_start(struct rq *rq, u64 delay) | 1154 | static void hrtick_start(struct rq *rq, u64 delay) |
| 1148 | { | 1155 | { |
| 1149 | hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); | 1156 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, |
| 1157 | HRTIMER_MODE_REL, 0); | ||
| 1150 | } | 1158 | } |
| 1151 | 1159 | ||
| 1152 | static inline void init_hrtick(void) | 1160 | static inline void init_hrtick(void) |
| @@ -1410,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 1410 | struct rq_iterator *iterator); | 1418 | struct rq_iterator *iterator); |
| 1411 | #endif | 1419 | #endif |
| 1412 | 1420 | ||
| 1421 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
| 1422 | enum cpuacct_stat_index { | ||
| 1423 | CPUACCT_STAT_USER, /* ... user mode */ | ||
| 1424 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
| 1425 | |||
| 1426 | CPUACCT_STAT_NSTATS, | ||
| 1427 | }; | ||
| 1428 | |||
| 1413 | #ifdef CONFIG_CGROUP_CPUACCT | 1429 | #ifdef CONFIG_CGROUP_CPUACCT |
| 1414 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 1430 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
| 1431 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
| 1432 | enum cpuacct_stat_index idx, cputime_t val); | ||
| 1415 | #else | 1433 | #else |
| 1416 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1434 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
| 1435 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
| 1436 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
| 1417 | #endif | 1437 | #endif |
| 1418 | 1438 | ||
| 1419 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | 1439 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) |
| @@ -4503,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
| 4503 | EXPORT_PER_CPU_SYMBOL(kstat); | 4523 | EXPORT_PER_CPU_SYMBOL(kstat); |
| 4504 | 4524 | ||
| 4505 | /* | 4525 | /* |
| 4506 | * Return any ns on the sched_clock that have not yet been banked in | 4526 | * Return any ns on the sched_clock that have not yet been accounted in |
| 4507 | * @p in case that task is currently running. | 4527 | * @p in case that task is currently running. |
| 4528 | * | ||
| 4529 | * Called with task_rq_lock() held on @rq. | ||
| 4508 | */ | 4530 | */ |
| 4531 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
| 4532 | { | ||
| 4533 | u64 ns = 0; | ||
| 4534 | |||
| 4535 | if (task_current(rq, p)) { | ||
| 4536 | update_rq_clock(rq); | ||
| 4537 | ns = rq->clock - p->se.exec_start; | ||
| 4538 | if ((s64)ns < 0) | ||
| 4539 | ns = 0; | ||
| 4540 | } | ||
| 4541 | |||
| 4542 | return ns; | ||
| 4543 | } | ||
| 4544 | |||
| 4509 | unsigned long long task_delta_exec(struct task_struct *p) | 4545 | unsigned long long task_delta_exec(struct task_struct *p) |
| 4510 | { | 4546 | { |
| 4511 | unsigned long flags; | 4547 | unsigned long flags; |
| @@ -4513,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
| 4513 | u64 ns = 0; | 4549 | u64 ns = 0; |
| 4514 | 4550 | ||
| 4515 | rq = task_rq_lock(p, &flags); | 4551 | rq = task_rq_lock(p, &flags); |
| 4552 | ns = do_task_delta_exec(p, rq); | ||
| 4553 | task_rq_unlock(rq, &flags); | ||
| 4516 | 4554 | ||
| 4517 | if (task_current(rq, p)) { | 4555 | return ns; |
| 4518 | u64 delta_exec; | 4556 | } |
| 4519 | 4557 | ||
| 4520 | update_rq_clock(rq); | 4558 | /* |
| 4521 | delta_exec = rq->clock - p->se.exec_start; | 4559 | * Return accounted runtime for the task. |
| 4522 | if ((s64)delta_exec > 0) | 4560 | * In case the task is currently running, return the runtime plus current's |
| 4523 | ns = delta_exec; | 4561 | * pending runtime that have not been accounted yet. |
| 4524 | } | 4562 | */ |
| 4563 | unsigned long long task_sched_runtime(struct task_struct *p) | ||
| 4564 | { | ||
| 4565 | unsigned long flags; | ||
| 4566 | struct rq *rq; | ||
| 4567 | u64 ns = 0; | ||
| 4525 | 4568 | ||
| 4569 | rq = task_rq_lock(p, &flags); | ||
| 4570 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
| 4571 | task_rq_unlock(rq, &flags); | ||
| 4572 | |||
| 4573 | return ns; | ||
| 4574 | } | ||
| 4575 | |||
| 4576 | /* | ||
| 4577 | * Return sum_exec_runtime for the thread group. | ||
| 4578 | * In case the task is currently running, return the sum plus current's | ||
| 4579 | * pending runtime that have not been accounted yet. | ||
| 4580 | * | ||
| 4581 | * Note that the thread group might have other running tasks as well, | ||
| 4582 | * so the return value not includes other pending runtime that other | ||
| 4583 | * running tasks might have. | ||
| 4584 | */ | ||
| 4585 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
| 4586 | { | ||
| 4587 | struct task_cputime totals; | ||
| 4588 | unsigned long flags; | ||
| 4589 | struct rq *rq; | ||
| 4590 | u64 ns; | ||
| 4591 | |||
| 4592 | rq = task_rq_lock(p, &flags); | ||
| 4593 | thread_group_cputime(p, &totals); | ||
| 4594 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
| 4526 | task_rq_unlock(rq, &flags); | 4595 | task_rq_unlock(rq, &flags); |
| 4527 | 4596 | ||
| 4528 | return ns; | 4597 | return ns; |
| @@ -4551,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
| 4551 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 4620 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
| 4552 | else | 4621 | else |
| 4553 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4622 | cpustat->user = cputime64_add(cpustat->user, tmp); |
| 4623 | |||
| 4624 | cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); | ||
| 4554 | /* Account for user time used */ | 4625 | /* Account for user time used */ |
| 4555 | acct_update_integrals(p); | 4626 | acct_update_integrals(p); |
| 4556 | } | 4627 | } |
| @@ -4612,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
| 4612 | else | 4683 | else |
| 4613 | cpustat->system = cputime64_add(cpustat->system, tmp); | 4684 | cpustat->system = cputime64_add(cpustat->system, tmp); |
| 4614 | 4685 | ||
| 4686 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
| 4687 | |||
| 4615 | /* Account for system time used */ | 4688 | /* Account for system time used */ |
| 4616 | acct_update_integrals(p); | 4689 | acct_update_integrals(p); |
| 4617 | } | 4690 | } |
| @@ -4773,7 +4846,7 @@ void scheduler_tick(void) | |||
| 4773 | #endif | 4846 | #endif |
| 4774 | } | 4847 | } |
| 4775 | 4848 | ||
| 4776 | unsigned long get_parent_ip(unsigned long addr) | 4849 | notrace unsigned long get_parent_ip(unsigned long addr) |
| 4777 | { | 4850 | { |
| 4778 | if (in_lock_functions(addr)) { | 4851 | if (in_lock_functions(addr)) { |
| 4779 | addr = CALLER_ADDR2; | 4852 | addr = CALLER_ADDR2; |
| @@ -7294,7 +7367,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 7294 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); | 7367 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); |
| 7295 | 7368 | ||
| 7296 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 7369 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
| 7370 | |||
| 7297 | printk(KERN_CONT " %s", str); | 7371 | printk(KERN_CONT " %s", str); |
| 7372 | if (group->__cpu_power != SCHED_LOAD_SCALE) { | ||
| 7373 | printk(KERN_CONT " (__cpu_power = %d)", | ||
| 7374 | group->__cpu_power); | ||
| 7375 | } | ||
| 7298 | 7376 | ||
| 7299 | group = group->next; | 7377 | group = group->next; |
| 7300 | } while (group != sd->groups); | 7378 | } while (group != sd->groups); |
| @@ -9917,6 +9995,7 @@ struct cpuacct { | |||
| 9917 | struct cgroup_subsys_state css; | 9995 | struct cgroup_subsys_state css; |
| 9918 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9996 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
| 9919 | u64 *cpuusage; | 9997 | u64 *cpuusage; |
| 9998 | struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; | ||
| 9920 | struct cpuacct *parent; | 9999 | struct cpuacct *parent; |
| 9921 | }; | 10000 | }; |
| 9922 | 10001 | ||
| @@ -9941,20 +10020,32 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
| 9941 | struct cgroup_subsys *ss, struct cgroup *cgrp) | 10020 | struct cgroup_subsys *ss, struct cgroup *cgrp) |
| 9942 | { | 10021 | { |
| 9943 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 10022 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
| 10023 | int i; | ||
| 9944 | 10024 | ||
| 9945 | if (!ca) | 10025 | if (!ca) |
| 9946 | return ERR_PTR(-ENOMEM); | 10026 | goto out; |
| 9947 | 10027 | ||
| 9948 | ca->cpuusage = alloc_percpu(u64); | 10028 | ca->cpuusage = alloc_percpu(u64); |
| 9949 | if (!ca->cpuusage) { | 10029 | if (!ca->cpuusage) |
| 9950 | kfree(ca); | 10030 | goto out_free_ca; |
| 9951 | return ERR_PTR(-ENOMEM); | 10031 | |
| 9952 | } | 10032 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
| 10033 | if (percpu_counter_init(&ca->cpustat[i], 0)) | ||
| 10034 | goto out_free_counters; | ||
| 9953 | 10035 | ||
| 9954 | if (cgrp->parent) | 10036 | if (cgrp->parent) |
| 9955 | ca->parent = cgroup_ca(cgrp->parent); | 10037 | ca->parent = cgroup_ca(cgrp->parent); |
| 9956 | 10038 | ||
| 9957 | return &ca->css; | 10039 | return &ca->css; |
| 10040 | |||
| 10041 | out_free_counters: | ||
| 10042 | while (--i >= 0) | ||
| 10043 | percpu_counter_destroy(&ca->cpustat[i]); | ||
| 10044 | free_percpu(ca->cpuusage); | ||
| 10045 | out_free_ca: | ||
| 10046 | kfree(ca); | ||
| 10047 | out: | ||
| 10048 | return ERR_PTR(-ENOMEM); | ||
| 9958 | } | 10049 | } |
| 9959 | 10050 | ||
| 9960 | /* destroy an existing cpu accounting group */ | 10051 | /* destroy an existing cpu accounting group */ |
| @@ -9962,7 +10053,10 @@ static void | |||
| 9962 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10053 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
| 9963 | { | 10054 | { |
| 9964 | struct cpuacct *ca = cgroup_ca(cgrp); | 10055 | struct cpuacct *ca = cgroup_ca(cgrp); |
| 10056 | int i; | ||
| 9965 | 10057 | ||
| 10058 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) | ||
| 10059 | percpu_counter_destroy(&ca->cpustat[i]); | ||
| 9966 | free_percpu(ca->cpuusage); | 10060 | free_percpu(ca->cpuusage); |
| 9967 | kfree(ca); | 10061 | kfree(ca); |
| 9968 | } | 10062 | } |
| @@ -10049,6 +10143,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
| 10049 | return 0; | 10143 | return 0; |
| 10050 | } | 10144 | } |
| 10051 | 10145 | ||
| 10146 | static const char *cpuacct_stat_desc[] = { | ||
| 10147 | [CPUACCT_STAT_USER] = "user", | ||
| 10148 | [CPUACCT_STAT_SYSTEM] = "system", | ||
| 10149 | }; | ||
| 10150 | |||
| 10151 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
| 10152 | struct cgroup_map_cb *cb) | ||
| 10153 | { | ||
| 10154 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
| 10155 | int i; | ||
| 10156 | |||
| 10157 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { | ||
| 10158 | s64 val = percpu_counter_read(&ca->cpustat[i]); | ||
| 10159 | val = cputime64_to_clock_t(val); | ||
| 10160 | cb->fill(cb, cpuacct_stat_desc[i], val); | ||
| 10161 | } | ||
| 10162 | return 0; | ||
| 10163 | } | ||
| 10164 | |||
| 10052 | static struct cftype files[] = { | 10165 | static struct cftype files[] = { |
| 10053 | { | 10166 | { |
| 10054 | .name = "usage", | 10167 | .name = "usage", |
| @@ -10059,7 +10172,10 @@ static struct cftype files[] = { | |||
| 10059 | .name = "usage_percpu", | 10172 | .name = "usage_percpu", |
| 10060 | .read_seq_string = cpuacct_percpu_seq_read, | 10173 | .read_seq_string = cpuacct_percpu_seq_read, |
| 10061 | }, | 10174 | }, |
| 10062 | 10175 | { | |
| 10176 | .name = "stat", | ||
| 10177 | .read_map = cpuacct_stats_show, | ||
| 10178 | }, | ||
| 10063 | }; | 10179 | }; |
| 10064 | 10180 | ||
| 10065 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10181 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
| @@ -10081,12 +10197,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
| 10081 | return; | 10197 | return; |
| 10082 | 10198 | ||
| 10083 | cpu = task_cpu(tsk); | 10199 | cpu = task_cpu(tsk); |
| 10200 | |||
| 10201 | rcu_read_lock(); | ||
| 10202 | |||
| 10084 | ca = task_ca(tsk); | 10203 | ca = task_ca(tsk); |
| 10085 | 10204 | ||
| 10086 | for (; ca; ca = ca->parent) { | 10205 | for (; ca; ca = ca->parent) { |
| 10087 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | 10206 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
| 10088 | *cpuusage += cputime; | 10207 | *cpuusage += cputime; |
| 10089 | } | 10208 | } |
| 10209 | |||
| 10210 | rcu_read_unlock(); | ||
| 10211 | } | ||
| 10212 | |||
| 10213 | /* | ||
| 10214 | * Charge the system/user time to the task's accounting group. | ||
| 10215 | */ | ||
| 10216 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
| 10217 | enum cpuacct_stat_index idx, cputime_t val) | ||
| 10218 | { | ||
| 10219 | struct cpuacct *ca; | ||
| 10220 | |||
| 10221 | if (unlikely(!cpuacct_subsys.active)) | ||
| 10222 | return; | ||
| 10223 | |||
| 10224 | rcu_read_lock(); | ||
| 10225 | ca = task_ca(tsk); | ||
| 10226 | |||
| 10227 | do { | ||
| 10228 | percpu_counter_add(&ca->cpustat[idx], val); | ||
| 10229 | ca = ca->parent; | ||
| 10230 | } while (ca); | ||
| 10231 | rcu_read_unlock(); | ||
| 10090 | } | 10232 | } |
| 10091 | 10233 | ||
| 10092 | struct cgroup_subsys cpuacct_subsys = { | 10234 | struct cgroup_subsys cpuacct_subsys = { |
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 1e00bfacf9b8..cdd3c89574cd 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
| @@ -55,7 +55,7 @@ static int convert_prio(int prio) | |||
| 55 | * cpupri_find - find the best (lowest-pri) CPU in the system | 55 | * cpupri_find - find the best (lowest-pri) CPU in the system |
| 56 | * @cp: The cpupri context | 56 | * @cp: The cpupri context |
| 57 | * @p: The task | 57 | * @p: The task |
| 58 | * @lowest_mask: A mask to fill in with selected CPUs | 58 | * @lowest_mask: A mask to fill in with selected CPUs (or NULL) |
| 59 | * | 59 | * |
| 60 | * Note: This function returns the recommended CPUs as calculated during the | 60 | * Note: This function returns the recommended CPUs as calculated during the |
| 61 | * current invokation. By the time the call returns, the CPUs may have in | 61 | * current invokation. By the time the call returns, the CPUs may have in |
| @@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
| 81 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | 81 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) |
| 82 | continue; | 82 | continue; |
| 83 | 83 | ||
| 84 | cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | 84 | if (lowest_mask) |
| 85 | cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | ||
| 85 | return 1; | 86 | return 1; |
| 86 | } | 87 | } |
| 87 | 88 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 299d012b4394..f2c66f8f9712 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
| 948 | 948 | ||
| 949 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | 949 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) |
| 950 | { | 950 | { |
| 951 | cpumask_var_t mask; | ||
| 952 | |||
| 953 | if (rq->curr->rt.nr_cpus_allowed == 1) | 951 | if (rq->curr->rt.nr_cpus_allowed == 1) |
| 954 | return; | 952 | return; |
| 955 | 953 | ||
| 956 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
| 957 | return; | ||
| 958 | |||
| 959 | if (p->rt.nr_cpus_allowed != 1 | 954 | if (p->rt.nr_cpus_allowed != 1 |
| 960 | && cpupri_find(&rq->rd->cpupri, p, mask)) | 955 | && cpupri_find(&rq->rd->cpupri, p, NULL)) |
| 961 | goto free; | 956 | return; |
| 962 | 957 | ||
| 963 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) | 958 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) |
| 964 | goto free; | 959 | return; |
| 965 | 960 | ||
| 966 | /* | 961 | /* |
| 967 | * There appears to be other cpus that can accept | 962 | * There appears to be other cpus that can accept |
| @@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
| 970 | */ | 965 | */ |
| 971 | requeue_task_rt(rq, p, 1); | 966 | requeue_task_rt(rq, p, 1); |
| 972 | resched_task(rq->curr); | 967 | resched_task(rq->curr); |
| 973 | free: | ||
| 974 | free_cpumask_var(mask); | ||
| 975 | } | 968 | } |
| 976 | 969 | ||
| 977 | #endif /* CONFIG_SMP */ | 970 | #endif /* CONFIG_SMP */ |
diff --git a/kernel/slow-work.c b/kernel/slow-work.c index cf2bc01186ef..b28d19135f43 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c | |||
| @@ -609,14 +609,14 @@ void slow_work_unregister_user(void) | |||
| 609 | if (slow_work_user_count == 0) { | 609 | if (slow_work_user_count == 0) { |
| 610 | printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); | 610 | printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); |
| 611 | slow_work_threads_should_exit = true; | 611 | slow_work_threads_should_exit = true; |
| 612 | del_timer_sync(&slow_work_cull_timer); | ||
| 613 | del_timer_sync(&slow_work_oom_timer); | ||
| 612 | wake_up_all(&slow_work_thread_wq); | 614 | wake_up_all(&slow_work_thread_wq); |
| 613 | wait_for_completion(&slow_work_last_thread_exited); | 615 | wait_for_completion(&slow_work_last_thread_exited); |
| 614 | printk(KERN_NOTICE "Slow work thread pool:" | 616 | printk(KERN_NOTICE "Slow work thread pool:" |
| 615 | " Shut down complete\n"); | 617 | " Shut down complete\n"); |
| 616 | } | 618 | } |
| 617 | 619 | ||
| 618 | del_timer_sync(&slow_work_cull_timer); | ||
| 619 | |||
| 620 | mutex_unlock(&slow_work_user_lock); | 620 | mutex_unlock(&slow_work_user_lock); |
| 621 | } | 621 | } |
| 622 | EXPORT_SYMBOL(slow_work_unregister_user); | 622 | EXPORT_SYMBOL(slow_work_unregister_user); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index d105a82543d0..b525dd348511 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -65,7 +65,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { | |||
| 65 | * to the pending events, so lets the scheduler to balance | 65 | * to the pending events, so lets the scheduler to balance |
| 66 | * the softirq load for us. | 66 | * the softirq load for us. |
| 67 | */ | 67 | */ |
| 68 | static inline void wakeup_softirqd(void) | 68 | void wakeup_softirqd(void) |
| 69 | { | 69 | { |
| 70 | /* Interrupts are disabled: no need to stop preemption */ | 70 | /* Interrupts are disabled: no need to stop preemption */ |
| 71 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); | 71 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); |
| @@ -472,9 +472,9 @@ void tasklet_kill(struct tasklet_struct *t) | |||
| 472 | printk("Attempt to kill tasklet from interrupt\n"); | 472 | printk("Attempt to kill tasklet from interrupt\n"); |
| 473 | 473 | ||
| 474 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | 474 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { |
| 475 | do | 475 | do { |
| 476 | yield(); | 476 | yield(); |
| 477 | while (test_bit(TASKLET_STATE_SCHED, &t->state)); | 477 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); |
| 478 | } | 478 | } |
| 479 | tasklet_unlock_wait(t); | 479 | tasklet_unlock_wait(t); |
| 480 | clear_bit(TASKLET_STATE_SCHED, &t->state); | 480 | clear_bit(TASKLET_STATE_SCHED, &t->state); |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 85d5a2455103..88796c330838 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -166,97 +166,11 @@ void softlockup_tick(void) | |||
| 166 | } | 166 | } |
| 167 | 167 | ||
| 168 | /* | 168 | /* |
| 169 | * Have a reasonable limit on the number of tasks checked: | ||
| 170 | */ | ||
| 171 | unsigned long __read_mostly sysctl_hung_task_check_count = 1024; | ||
| 172 | |||
| 173 | /* | ||
| 174 | * Zero means infinite timeout - no checking done: | ||
| 175 | */ | ||
| 176 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; | ||
| 177 | |||
| 178 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
| 179 | |||
| 180 | /* | ||
| 181 | * Only do the hung-tasks check on one CPU: | ||
| 182 | */ | ||
| 183 | static int check_cpu __read_mostly = -1; | ||
| 184 | |||
| 185 | static void check_hung_task(struct task_struct *t, unsigned long now) | ||
| 186 | { | ||
| 187 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
| 188 | |||
| 189 | if (t->flags & PF_FROZEN) | ||
| 190 | return; | ||
| 191 | |||
| 192 | if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { | ||
| 193 | t->last_switch_count = switch_count; | ||
| 194 | t->last_switch_timestamp = now; | ||
| 195 | return; | ||
| 196 | } | ||
| 197 | if ((long)(now - t->last_switch_timestamp) < | ||
| 198 | sysctl_hung_task_timeout_secs) | ||
| 199 | return; | ||
| 200 | if (!sysctl_hung_task_warnings) | ||
| 201 | return; | ||
| 202 | sysctl_hung_task_warnings--; | ||
| 203 | |||
| 204 | /* | ||
| 205 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
| 206 | * complain: | ||
| 207 | */ | ||
| 208 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
| 209 | "%ld seconds.\n", t->comm, t->pid, | ||
| 210 | sysctl_hung_task_timeout_secs); | ||
| 211 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
| 212 | " disables this message.\n"); | ||
| 213 | sched_show_task(t); | ||
| 214 | __debug_show_held_locks(t); | ||
| 215 | |||
| 216 | t->last_switch_timestamp = now; | ||
| 217 | touch_nmi_watchdog(); | ||
| 218 | |||
| 219 | if (softlockup_panic) | ||
| 220 | panic("softlockup: blocked tasks"); | ||
| 221 | } | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
| 225 | * a really long time (120 seconds). If that happens, print out | ||
| 226 | * a warning. | ||
| 227 | */ | ||
| 228 | static void check_hung_uninterruptible_tasks(int this_cpu) | ||
| 229 | { | ||
| 230 | int max_count = sysctl_hung_task_check_count; | ||
| 231 | unsigned long now = get_timestamp(this_cpu); | ||
| 232 | struct task_struct *g, *t; | ||
| 233 | |||
| 234 | /* | ||
| 235 | * If the system crashed already then all bets are off, | ||
| 236 | * do not report extra hung tasks: | ||
| 237 | */ | ||
| 238 | if (test_taint(TAINT_DIE) || did_panic) | ||
| 239 | return; | ||
| 240 | |||
| 241 | read_lock(&tasklist_lock); | ||
| 242 | do_each_thread(g, t) { | ||
| 243 | if (!--max_count) | ||
| 244 | goto unlock; | ||
| 245 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
| 246 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
| 247 | check_hung_task(t, now); | ||
| 248 | } while_each_thread(g, t); | ||
| 249 | unlock: | ||
| 250 | read_unlock(&tasklist_lock); | ||
| 251 | } | ||
| 252 | |||
| 253 | /* | ||
| 254 | * The watchdog thread - runs every second and touches the timestamp. | 169 | * The watchdog thread - runs every second and touches the timestamp. |
| 255 | */ | 170 | */ |
| 256 | static int watchdog(void *__bind_cpu) | 171 | static int watchdog(void *__bind_cpu) |
| 257 | { | 172 | { |
| 258 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 173 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
| 259 | int this_cpu = (long)__bind_cpu; | ||
| 260 | 174 | ||
| 261 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 175 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
| 262 | 176 | ||
| @@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu) | |||
| 276 | if (kthread_should_stop()) | 190 | if (kthread_should_stop()) |
| 277 | break; | 191 | break; |
| 278 | 192 | ||
| 279 | if (this_cpu == check_cpu) { | ||
| 280 | if (sysctl_hung_task_timeout_secs) | ||
| 281 | check_hung_uninterruptible_tasks(this_cpu); | ||
| 282 | } | ||
| 283 | |||
| 284 | set_current_state(TASK_INTERRUPTIBLE); | 193 | set_current_state(TASK_INTERRUPTIBLE); |
| 285 | } | 194 | } |
| 286 | __set_current_state(TASK_RUNNING); | 195 | __set_current_state(TASK_RUNNING); |
| @@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 312 | break; | 221 | break; |
| 313 | case CPU_ONLINE: | 222 | case CPU_ONLINE: |
| 314 | case CPU_ONLINE_FROZEN: | 223 | case CPU_ONLINE_FROZEN: |
| 315 | check_cpu = cpumask_any(cpu_online_mask); | ||
| 316 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | 224 | wake_up_process(per_cpu(watchdog_task, hotcpu)); |
| 317 | break; | 225 | break; |
| 318 | #ifdef CONFIG_HOTPLUG_CPU | 226 | #ifdef CONFIG_HOTPLUG_CPU |
| 319 | case CPU_DOWN_PREPARE: | ||
| 320 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 321 | if (hotcpu == check_cpu) { | ||
| 322 | /* Pick any other online cpu. */ | ||
| 323 | check_cpu = cpumask_any_but(cpu_online_mask, hotcpu); | ||
| 324 | } | ||
| 325 | break; | ||
| 326 | |||
| 327 | case CPU_UP_CANCELED: | 227 | case CPU_UP_CANCELED: |
| 328 | case CPU_UP_CANCELED_FROZEN: | 228 | case CPU_UP_CANCELED_FROZEN: |
| 329 | if (!per_cpu(watchdog_task, hotcpu)) | 229 | if (!per_cpu(watchdog_task, hotcpu)) |
diff --git a/kernel/sys.c b/kernel/sys.c index 51dbb55604e8..e7998cf31498 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -360,6 +360,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
| 360 | void __user *, arg) | 360 | void __user *, arg) |
| 361 | { | 361 | { |
| 362 | char buffer[256]; | 362 | char buffer[256]; |
| 363 | int ret = 0; | ||
| 363 | 364 | ||
| 364 | /* We only trust the superuser with rebooting the system. */ | 365 | /* We only trust the superuser with rebooting the system. */ |
| 365 | if (!capable(CAP_SYS_BOOT)) | 366 | if (!capable(CAP_SYS_BOOT)) |
| @@ -397,7 +398,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
| 397 | kernel_halt(); | 398 | kernel_halt(); |
| 398 | unlock_kernel(); | 399 | unlock_kernel(); |
| 399 | do_exit(0); | 400 | do_exit(0); |
| 400 | break; | 401 | panic("cannot halt"); |
| 401 | 402 | ||
| 402 | case LINUX_REBOOT_CMD_POWER_OFF: | 403 | case LINUX_REBOOT_CMD_POWER_OFF: |
| 403 | kernel_power_off(); | 404 | kernel_power_off(); |
| @@ -417,29 +418,22 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, | |||
| 417 | 418 | ||
| 418 | #ifdef CONFIG_KEXEC | 419 | #ifdef CONFIG_KEXEC |
| 419 | case LINUX_REBOOT_CMD_KEXEC: | 420 | case LINUX_REBOOT_CMD_KEXEC: |
| 420 | { | 421 | ret = kernel_kexec(); |
| 421 | int ret; | 422 | break; |
| 422 | ret = kernel_kexec(); | ||
| 423 | unlock_kernel(); | ||
| 424 | return ret; | ||
| 425 | } | ||
| 426 | #endif | 423 | #endif |
| 427 | 424 | ||
| 428 | #ifdef CONFIG_HIBERNATION | 425 | #ifdef CONFIG_HIBERNATION |
| 429 | case LINUX_REBOOT_CMD_SW_SUSPEND: | 426 | case LINUX_REBOOT_CMD_SW_SUSPEND: |
| 430 | { | 427 | ret = hibernate(); |
| 431 | int ret = hibernate(); | 428 | break; |
| 432 | unlock_kernel(); | ||
| 433 | return ret; | ||
| 434 | } | ||
| 435 | #endif | 429 | #endif |
| 436 | 430 | ||
| 437 | default: | 431 | default: |
| 438 | unlock_kernel(); | 432 | ret = -EINVAL; |
| 439 | return -EINVAL; | 433 | break; |
| 440 | } | 434 | } |
| 441 | unlock_kernel(); | 435 | unlock_kernel(); |
| 442 | return 0; | 436 | return ret; |
| 443 | } | 437 | } |
| 444 | 438 | ||
| 445 | static void deferred_cad(struct work_struct *dummy) | 439 | static void deferred_cad(struct work_struct *dummy) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 82350f8f04f6..ea78fa101ad6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -97,10 +97,14 @@ static int neg_one = -1; | |||
| 97 | #endif | 97 | #endif |
| 98 | 98 | ||
| 99 | static int zero; | 99 | static int zero; |
| 100 | static int one = 1; | 100 | static int __maybe_unused one = 1; |
| 101 | static int two = 2; | 101 | static int __maybe_unused two = 2; |
| 102 | static unsigned long one_ul = 1; | 102 | static unsigned long one_ul = 1; |
| 103 | static int one_hundred = 100; | 103 | static int one_hundred = 100; |
| 104 | static int one_thousand = 1000; | ||
| 105 | |||
| 106 | /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ | ||
| 107 | static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; | ||
| 104 | 108 | ||
| 105 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 109 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
| 106 | static int maxolduid = 65535; | 110 | static int maxolduid = 65535; |
| @@ -813,6 +817,19 @@ static struct ctl_table kern_table[] = { | |||
| 813 | .extra1 = &neg_one, | 817 | .extra1 = &neg_one, |
| 814 | .extra2 = &sixty, | 818 | .extra2 = &sixty, |
| 815 | }, | 819 | }, |
| 820 | #endif | ||
| 821 | #ifdef CONFIG_DETECT_HUNG_TASK | ||
| 822 | { | ||
| 823 | .ctl_name = CTL_UNNUMBERED, | ||
| 824 | .procname = "hung_task_panic", | ||
| 825 | .data = &sysctl_hung_task_panic, | ||
| 826 | .maxlen = sizeof(int), | ||
| 827 | .mode = 0644, | ||
| 828 | .proc_handler = &proc_dointvec_minmax, | ||
| 829 | .strategy = &sysctl_intvec, | ||
| 830 | .extra1 = &zero, | ||
| 831 | .extra2 = &one, | ||
| 832 | }, | ||
| 816 | { | 833 | { |
| 817 | .ctl_name = CTL_UNNUMBERED, | 834 | .ctl_name = CTL_UNNUMBERED, |
| 818 | .procname = "hung_task_check_count", | 835 | .procname = "hung_task_check_count", |
| @@ -828,7 +845,7 @@ static struct ctl_table kern_table[] = { | |||
| 828 | .data = &sysctl_hung_task_timeout_secs, | 845 | .data = &sysctl_hung_task_timeout_secs, |
| 829 | .maxlen = sizeof(unsigned long), | 846 | .maxlen = sizeof(unsigned long), |
| 830 | .mode = 0644, | 847 | .mode = 0644, |
| 831 | .proc_handler = &proc_doulongvec_minmax, | 848 | .proc_handler = &proc_dohung_task_timeout_secs, |
| 832 | .strategy = &sysctl_intvec, | 849 | .strategy = &sysctl_intvec, |
| 833 | }, | 850 | }, |
| 834 | { | 851 | { |
| @@ -888,16 +905,6 @@ static struct ctl_table kern_table[] = { | |||
| 888 | .proc_handler = &proc_dointvec, | 905 | .proc_handler = &proc_dointvec, |
| 889 | }, | 906 | }, |
| 890 | #endif | 907 | #endif |
| 891 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
| 892 | { | ||
| 893 | .ctl_name = CTL_UNNUMBERED, | ||
| 894 | .procname = "scan_unevictable_pages", | ||
| 895 | .data = &scan_unevictable_pages, | ||
| 896 | .maxlen = sizeof(scan_unevictable_pages), | ||
| 897 | .mode = 0644, | ||
| 898 | .proc_handler = &scan_unevictable_handler, | ||
| 899 | }, | ||
| 900 | #endif | ||
| 901 | #ifdef CONFIG_SLOW_WORK | 908 | #ifdef CONFIG_SLOW_WORK |
| 902 | { | 909 | { |
| 903 | .ctl_name = CTL_UNNUMBERED, | 910 | .ctl_name = CTL_UNNUMBERED, |
| @@ -1002,7 +1009,7 @@ static struct ctl_table vm_table[] = { | |||
| 1002 | .mode = 0644, | 1009 | .mode = 0644, |
| 1003 | .proc_handler = &dirty_bytes_handler, | 1010 | .proc_handler = &dirty_bytes_handler, |
| 1004 | .strategy = &sysctl_intvec, | 1011 | .strategy = &sysctl_intvec, |
| 1005 | .extra1 = &one_ul, | 1012 | .extra1 = &dirty_bytes_min, |
| 1006 | }, | 1013 | }, |
| 1007 | { | 1014 | { |
| 1008 | .procname = "dirty_writeback_centisecs", | 1015 | .procname = "dirty_writeback_centisecs", |
| @@ -1027,6 +1034,28 @@ static struct ctl_table vm_table[] = { | |||
| 1027 | .proc_handler = &proc_dointvec, | 1034 | .proc_handler = &proc_dointvec, |
| 1028 | }, | 1035 | }, |
| 1029 | { | 1036 | { |
| 1037 | .ctl_name = CTL_UNNUMBERED, | ||
| 1038 | .procname = "nr_pdflush_threads_min", | ||
| 1039 | .data = &nr_pdflush_threads_min, | ||
| 1040 | .maxlen = sizeof nr_pdflush_threads_min, | ||
| 1041 | .mode = 0644 /* read-write */, | ||
| 1042 | .proc_handler = &proc_dointvec_minmax, | ||
| 1043 | .strategy = &sysctl_intvec, | ||
| 1044 | .extra1 = &one, | ||
| 1045 | .extra2 = &nr_pdflush_threads_max, | ||
| 1046 | }, | ||
| 1047 | { | ||
| 1048 | .ctl_name = CTL_UNNUMBERED, | ||
| 1049 | .procname = "nr_pdflush_threads_max", | ||
| 1050 | .data = &nr_pdflush_threads_max, | ||
| 1051 | .maxlen = sizeof nr_pdflush_threads_max, | ||
| 1052 | .mode = 0644 /* read-write */, | ||
| 1053 | .proc_handler = &proc_dointvec_minmax, | ||
| 1054 | .strategy = &sysctl_intvec, | ||
| 1055 | .extra1 = &nr_pdflush_threads_min, | ||
| 1056 | .extra2 = &one_thousand, | ||
| 1057 | }, | ||
| 1058 | { | ||
| 1030 | .ctl_name = VM_SWAPPINESS, | 1059 | .ctl_name = VM_SWAPPINESS, |
| 1031 | .procname = "swappiness", | 1060 | .procname = "swappiness", |
| 1032 | .data = &vm_swappiness, | 1061 | .data = &vm_swappiness, |
| @@ -1266,6 +1295,16 @@ static struct ctl_table vm_table[] = { | |||
| 1266 | .extra2 = &one, | 1295 | .extra2 = &one, |
| 1267 | }, | 1296 | }, |
| 1268 | #endif | 1297 | #endif |
| 1298 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
| 1299 | { | ||
| 1300 | .ctl_name = CTL_UNNUMBERED, | ||
| 1301 | .procname = "scan_unevictable_pages", | ||
| 1302 | .data = &scan_unevictable_pages, | ||
| 1303 | .maxlen = sizeof(scan_unevictable_pages), | ||
| 1304 | .mode = 0644, | ||
| 1305 | .proc_handler = &scan_unevictable_handler, | ||
| 1306 | }, | ||
| 1307 | #endif | ||
| 1269 | /* | 1308 | /* |
| 1270 | * NOTE: do not add new entries to this table unless you have read | 1309 | * NOTE: do not add new entries to this table unless you have read |
| 1271 | * Documentation/sysctl/ctl_unnumbered.txt | 1310 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c46c931a7fe7..ecfd7b5187e0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
| @@ -181,12 +181,12 @@ static void clocksource_watchdog(unsigned long data) | |||
| 181 | 181 | ||
| 182 | resumed = test_and_clear_bit(0, &watchdog_resumed); | 182 | resumed = test_and_clear_bit(0, &watchdog_resumed); |
| 183 | 183 | ||
| 184 | wdnow = watchdog->read(); | 184 | wdnow = watchdog->read(watchdog); |
| 185 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); | 185 | wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); |
| 186 | watchdog_last = wdnow; | 186 | watchdog_last = wdnow; |
| 187 | 187 | ||
| 188 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { | 188 | list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { |
| 189 | csnow = cs->read(); | 189 | csnow = cs->read(cs); |
| 190 | 190 | ||
| 191 | if (unlikely(resumed)) { | 191 | if (unlikely(resumed)) { |
| 192 | cs->wd_last = csnow; | 192 | cs->wd_last = csnow; |
| @@ -247,7 +247,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
| 247 | 247 | ||
| 248 | list_add(&cs->wd_list, &watchdog_list); | 248 | list_add(&cs->wd_list, &watchdog_list); |
| 249 | if (!started && watchdog) { | 249 | if (!started && watchdog) { |
| 250 | watchdog_last = watchdog->read(); | 250 | watchdog_last = watchdog->read(watchdog); |
| 251 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | 251 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; |
| 252 | add_timer_on(&watchdog_timer, | 252 | add_timer_on(&watchdog_timer, |
| 253 | cpumask_first(cpu_online_mask)); | 253 | cpumask_first(cpu_online_mask)); |
| @@ -268,7 +268,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
| 268 | cse->flags &= ~CLOCK_SOURCE_WATCHDOG; | 268 | cse->flags &= ~CLOCK_SOURCE_WATCHDOG; |
| 269 | /* Start if list is not empty */ | 269 | /* Start if list is not empty */ |
| 270 | if (!list_empty(&watchdog_list)) { | 270 | if (!list_empty(&watchdog_list)) { |
| 271 | watchdog_last = watchdog->read(); | 271 | watchdog_last = watchdog->read(watchdog); |
| 272 | watchdog_timer.expires = | 272 | watchdog_timer.expires = |
| 273 | jiffies + WATCHDOG_INTERVAL; | 273 | jiffies + WATCHDOG_INTERVAL; |
| 274 | add_timer_on(&watchdog_timer, | 274 | add_timer_on(&watchdog_timer, |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 06f197560f3b..c3f6c30816e3 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
| @@ -50,7 +50,7 @@ | |||
| 50 | */ | 50 | */ |
| 51 | #define JIFFIES_SHIFT 8 | 51 | #define JIFFIES_SHIFT 8 |
| 52 | 52 | ||
| 53 | static cycle_t jiffies_read(void) | 53 | static cycle_t jiffies_read(struct clocksource *cs) |
| 54 | { | 54 | { |
| 55 | return (cycle_t) jiffies; | 55 | return (cycle_t) jiffies; |
| 56 | } | 56 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 900f1b6598d1..687dff49f6e7 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -182,7 +182,7 @@ EXPORT_SYMBOL(do_settimeofday); | |||
| 182 | */ | 182 | */ |
| 183 | static void change_clocksource(void) | 183 | static void change_clocksource(void) |
| 184 | { | 184 | { |
| 185 | struct clocksource *new; | 185 | struct clocksource *new, *old; |
| 186 | 186 | ||
| 187 | new = clocksource_get_next(); | 187 | new = clocksource_get_next(); |
| 188 | 188 | ||
| @@ -191,11 +191,16 @@ static void change_clocksource(void) | |||
| 191 | 191 | ||
| 192 | clocksource_forward_now(); | 192 | clocksource_forward_now(); |
| 193 | 193 | ||
| 194 | new->raw_time = clock->raw_time; | 194 | if (clocksource_enable(new)) |
| 195 | return; | ||
| 195 | 196 | ||
| 197 | new->raw_time = clock->raw_time; | ||
| 198 | old = clock; | ||
| 196 | clock = new; | 199 | clock = new; |
| 200 | clocksource_disable(old); | ||
| 201 | |||
| 197 | clock->cycle_last = 0; | 202 | clock->cycle_last = 0; |
| 198 | clock->cycle_last = clocksource_read(new); | 203 | clock->cycle_last = clocksource_read(clock); |
| 199 | clock->error = 0; | 204 | clock->error = 0; |
| 200 | clock->xtime_nsec = 0; | 205 | clock->xtime_nsec = 0; |
| 201 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); | 206 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
| @@ -292,6 +297,7 @@ void __init timekeeping_init(void) | |||
| 292 | ntp_init(); | 297 | ntp_init(); |
| 293 | 298 | ||
| 294 | clock = clocksource_get_next(); | 299 | clock = clocksource_get_next(); |
| 300 | clocksource_enable(clock); | ||
| 295 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); | 301 | clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); |
| 296 | clock->cycle_last = clocksource_read(clock); | 302 | clock->cycle_last = clocksource_read(clock); |
| 297 | 303 | ||
diff --git a/kernel/timer.c b/kernel/timer.c index b4555568b4e4..cffffad01c31 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -531,10 +531,13 @@ static void __init_timer(struct timer_list *timer, | |||
| 531 | } | 531 | } |
| 532 | 532 | ||
| 533 | /** | 533 | /** |
| 534 | * init_timer - initialize a timer. | 534 | * init_timer_key - initialize a timer |
| 535 | * @timer: the timer to be initialized | 535 | * @timer: the timer to be initialized |
| 536 | * @name: name of the timer | ||
| 537 | * @key: lockdep class key of the fake lock used for tracking timer | ||
| 538 | * sync lock dependencies | ||
| 536 | * | 539 | * |
| 537 | * init_timer() must be done to a timer prior calling *any* of the | 540 | * init_timer_key() must be done to a timer prior calling *any* of the |
| 538 | * other timer functions. | 541 | * other timer functions. |
| 539 | */ | 542 | */ |
| 540 | void init_timer_key(struct timer_list *timer, | 543 | void init_timer_key(struct timer_list *timer, |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2246141bda4d..417d1985e299 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
| @@ -312,7 +312,7 @@ config KMEMTRACE | |||
| 312 | and profile kernel code. | 312 | and profile kernel code. |
| 313 | 313 | ||
| 314 | This requires an userspace application to use. See | 314 | This requires an userspace application to use. See |
| 315 | Documentation/vm/kmemtrace.txt for more information. | 315 | Documentation/trace/kmemtrace.txt for more information. |
| 316 | 316 | ||
| 317 | Saying Y will make the kernel somewhat larger and slower. However, | 317 | Saying Y will make the kernel somewhat larger and slower. However, |
| 318 | if you disable kmemtrace at run-time or boot-time, the performance | 318 | if you disable kmemtrace at run-time or boot-time, the performance |
| @@ -403,7 +403,7 @@ config MMIOTRACE | |||
| 403 | implementation and works via page faults. Tracing is disabled by | 403 | implementation and works via page faults. Tracing is disabled by |
| 404 | default and can be enabled at run-time. | 404 | default and can be enabled at run-time. |
| 405 | 405 | ||
| 406 | See Documentation/tracers/mmiotrace.txt. | 406 | See Documentation/trace/mmiotrace.txt. |
| 407 | If you are not helping to develop drivers, say N. | 407 | If you are not helping to develop drivers, say N. |
| 408 | 408 | ||
| 409 | config MMIOTRACE_TEST | 409 | config MMIOTRACE_TEST |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 947c5b3f90c4..921ef5d1f0ba 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, | |||
| 327 | char *msg; | 327 | char *msg; |
| 328 | struct blk_trace *bt; | 328 | struct blk_trace *bt; |
| 329 | 329 | ||
| 330 | if (count > BLK_TN_MAX_MSG) | 330 | if (count >= BLK_TN_MAX_MSG) |
| 331 | return -EINVAL; | 331 | return -EINVAL; |
| 332 | 332 | ||
| 333 | msg = kmalloc(count, GFP_KERNEL); | 333 | msg = kmalloc(count + 1, GFP_KERNEL); |
| 334 | if (msg == NULL) | 334 | if (msg == NULL) |
| 335 | return -ENOMEM; | 335 | return -ENOMEM; |
| 336 | 336 | ||
| @@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, | |||
| 339 | return -EFAULT; | 339 | return -EFAULT; |
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | msg[count] = '\0'; | ||
| 342 | bt = filp->private_data; | 343 | bt = filp->private_data; |
| 343 | __trace_note_message(bt, "%s", msg); | 344 | __trace_note_message(bt, "%s", msg); |
| 344 | kfree(msg); | 345 | kfree(msg); |
| @@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | |||
| 642 | if (blk_pc_request(rq)) { | 643 | if (blk_pc_request(rq)) { |
| 643 | what |= BLK_TC_ACT(BLK_TC_PC); | 644 | what |= BLK_TC_ACT(BLK_TC_PC); |
| 644 | __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, | 645 | __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, |
| 645 | sizeof(rq->cmd), rq->cmd); | 646 | rq->cmd_len, rq->cmd); |
| 646 | } else { | 647 | } else { |
| 647 | what |= BLK_TC_ACT(BLK_TC_FS); | 648 | what |= BLK_TC_ACT(BLK_TC_FS); |
| 648 | __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, | 649 | __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, |
| @@ -1376,12 +1377,12 @@ static int blk_trace_str2mask(const char *str) | |||
| 1376 | { | 1377 | { |
| 1377 | int i; | 1378 | int i; |
| 1378 | int mask = 0; | 1379 | int mask = 0; |
| 1379 | char *s, *token; | 1380 | char *buf, *s, *token; |
| 1380 | 1381 | ||
| 1381 | s = kstrdup(str, GFP_KERNEL); | 1382 | buf = kstrdup(str, GFP_KERNEL); |
| 1382 | if (s == NULL) | 1383 | if (buf == NULL) |
| 1383 | return -ENOMEM; | 1384 | return -ENOMEM; |
| 1384 | s = strstrip(s); | 1385 | s = strstrip(buf); |
| 1385 | 1386 | ||
| 1386 | while (1) { | 1387 | while (1) { |
| 1387 | token = strsep(&s, ","); | 1388 | token = strsep(&s, ","); |
| @@ -1402,7 +1403,7 @@ static int blk_trace_str2mask(const char *str) | |||
| 1402 | break; | 1403 | break; |
| 1403 | } | 1404 | } |
| 1404 | } | 1405 | } |
| 1405 | kfree(s); | 1406 | kfree(buf); |
| 1406 | 1407 | ||
| 1407 | return mask; | 1408 | return mask; |
| 1408 | } | 1409 | } |
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index ae201b3eda89..5011f4d91e37 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c | |||
| @@ -6,14 +6,16 @@ | |||
| 6 | * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> | 6 | * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/dcache.h> | 9 | #include <linux/tracepoint.h> |
| 10 | #include <linux/seq_file.h> | ||
| 10 | #include <linux/debugfs.h> | 11 | #include <linux/debugfs.h> |
| 12 | #include <linux/dcache.h> | ||
| 11 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
| 12 | #include <linux/seq_file.h> | 14 | |
| 13 | #include <trace/kmemtrace.h> | 15 | #include <trace/kmemtrace.h> |
| 14 | 16 | ||
| 15 | #include "trace.h" | ||
| 16 | #include "trace_output.h" | 17 | #include "trace_output.h" |
| 18 | #include "trace.h" | ||
| 17 | 19 | ||
| 18 | /* Select an alternative, minimalistic output than the original one */ | 20 | /* Select an alternative, minimalistic output than the original one */ |
| 19 | #define TRACE_KMEM_OPT_MINIMAL 0x1 | 21 | #define TRACE_KMEM_OPT_MINIMAL 0x1 |
| @@ -25,14 +27,156 @@ static struct tracer_opt kmem_opts[] = { | |||
| 25 | }; | 27 | }; |
| 26 | 28 | ||
| 27 | static struct tracer_flags kmem_tracer_flags = { | 29 | static struct tracer_flags kmem_tracer_flags = { |
| 28 | .val = 0, | 30 | .val = 0, |
| 29 | .opts = kmem_opts | 31 | .opts = kmem_opts |
| 30 | }; | 32 | }; |
| 31 | 33 | ||
| 32 | |||
| 33 | static bool kmem_tracing_enabled __read_mostly; | ||
| 34 | static struct trace_array *kmemtrace_array; | 34 | static struct trace_array *kmemtrace_array; |
| 35 | 35 | ||
| 36 | /* Trace allocations */ | ||
| 37 | static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, | ||
| 38 | unsigned long call_site, | ||
| 39 | const void *ptr, | ||
| 40 | size_t bytes_req, | ||
| 41 | size_t bytes_alloc, | ||
| 42 | gfp_t gfp_flags, | ||
| 43 | int node) | ||
| 44 | { | ||
| 45 | struct trace_array *tr = kmemtrace_array; | ||
| 46 | struct kmemtrace_alloc_entry *entry; | ||
| 47 | struct ring_buffer_event *event; | ||
| 48 | |||
| 49 | event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); | ||
| 50 | if (!event) | ||
| 51 | return; | ||
| 52 | |||
| 53 | entry = ring_buffer_event_data(event); | ||
| 54 | tracing_generic_entry_update(&entry->ent, 0, 0); | ||
| 55 | |||
| 56 | entry->ent.type = TRACE_KMEM_ALLOC; | ||
| 57 | entry->type_id = type_id; | ||
| 58 | entry->call_site = call_site; | ||
| 59 | entry->ptr = ptr; | ||
| 60 | entry->bytes_req = bytes_req; | ||
| 61 | entry->bytes_alloc = bytes_alloc; | ||
| 62 | entry->gfp_flags = gfp_flags; | ||
| 63 | entry->node = node; | ||
| 64 | |||
| 65 | ring_buffer_unlock_commit(tr->buffer, event); | ||
| 66 | |||
| 67 | trace_wake_up(); | ||
| 68 | } | ||
| 69 | |||
| 70 | static inline void kmemtrace_free(enum kmemtrace_type_id type_id, | ||
| 71 | unsigned long call_site, | ||
| 72 | const void *ptr) | ||
| 73 | { | ||
| 74 | struct trace_array *tr = kmemtrace_array; | ||
| 75 | struct kmemtrace_free_entry *entry; | ||
| 76 | struct ring_buffer_event *event; | ||
| 77 | |||
| 78 | event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); | ||
| 79 | if (!event) | ||
| 80 | return; | ||
| 81 | entry = ring_buffer_event_data(event); | ||
| 82 | tracing_generic_entry_update(&entry->ent, 0, 0); | ||
| 83 | |||
| 84 | entry->ent.type = TRACE_KMEM_FREE; | ||
| 85 | entry->type_id = type_id; | ||
| 86 | entry->call_site = call_site; | ||
| 87 | entry->ptr = ptr; | ||
| 88 | |||
| 89 | ring_buffer_unlock_commit(tr->buffer, event); | ||
| 90 | |||
| 91 | trace_wake_up(); | ||
| 92 | } | ||
| 93 | |||
| 94 | static void kmemtrace_kmalloc(unsigned long call_site, | ||
| 95 | const void *ptr, | ||
| 96 | size_t bytes_req, | ||
| 97 | size_t bytes_alloc, | ||
| 98 | gfp_t gfp_flags) | ||
| 99 | { | ||
| 100 | kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, | ||
| 101 | bytes_req, bytes_alloc, gfp_flags, -1); | ||
| 102 | } | ||
| 103 | |||
| 104 | static void kmemtrace_kmem_cache_alloc(unsigned long call_site, | ||
| 105 | const void *ptr, | ||
| 106 | size_t bytes_req, | ||
| 107 | size_t bytes_alloc, | ||
| 108 | gfp_t gfp_flags) | ||
| 109 | { | ||
| 110 | kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, | ||
| 111 | bytes_req, bytes_alloc, gfp_flags, -1); | ||
| 112 | } | ||
| 113 | |||
| 114 | static void kmemtrace_kmalloc_node(unsigned long call_site, | ||
| 115 | const void *ptr, | ||
| 116 | size_t bytes_req, | ||
| 117 | size_t bytes_alloc, | ||
| 118 | gfp_t gfp_flags, | ||
| 119 | int node) | ||
| 120 | { | ||
| 121 | kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, | ||
| 122 | bytes_req, bytes_alloc, gfp_flags, node); | ||
| 123 | } | ||
| 124 | |||
| 125 | static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site, | ||
| 126 | const void *ptr, | ||
| 127 | size_t bytes_req, | ||
| 128 | size_t bytes_alloc, | ||
| 129 | gfp_t gfp_flags, | ||
| 130 | int node) | ||
| 131 | { | ||
| 132 | kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, | ||
| 133 | bytes_req, bytes_alloc, gfp_flags, node); | ||
| 134 | } | ||
| 135 | |||
| 136 | static void kmemtrace_kfree(unsigned long call_site, const void *ptr) | ||
| 137 | { | ||
| 138 | kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr); | ||
| 139 | } | ||
| 140 | |||
| 141 | static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr) | ||
| 142 | { | ||
| 143 | kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr); | ||
| 144 | } | ||
| 145 | |||
| 146 | static int kmemtrace_start_probes(void) | ||
| 147 | { | ||
| 148 | int err; | ||
| 149 | |||
| 150 | err = register_trace_kmalloc(kmemtrace_kmalloc); | ||
| 151 | if (err) | ||
| 152 | return err; | ||
| 153 | err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); | ||
| 154 | if (err) | ||
| 155 | return err; | ||
| 156 | err = register_trace_kmalloc_node(kmemtrace_kmalloc_node); | ||
| 157 | if (err) | ||
| 158 | return err; | ||
| 159 | err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); | ||
| 160 | if (err) | ||
| 161 | return err; | ||
| 162 | err = register_trace_kfree(kmemtrace_kfree); | ||
| 163 | if (err) | ||
| 164 | return err; | ||
| 165 | err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free); | ||
| 166 | |||
| 167 | return err; | ||
| 168 | } | ||
| 169 | |||
| 170 | static void kmemtrace_stop_probes(void) | ||
| 171 | { | ||
| 172 | unregister_trace_kmalloc(kmemtrace_kmalloc); | ||
| 173 | unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc); | ||
| 174 | unregister_trace_kmalloc_node(kmemtrace_kmalloc_node); | ||
| 175 | unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node); | ||
| 176 | unregister_trace_kfree(kmemtrace_kfree); | ||
| 177 | unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free); | ||
| 178 | } | ||
| 179 | |||
| 36 | static int kmem_trace_init(struct trace_array *tr) | 180 | static int kmem_trace_init(struct trace_array *tr) |
| 37 | { | 181 | { |
| 38 | int cpu; | 182 | int cpu; |
| @@ -41,14 +185,14 @@ static int kmem_trace_init(struct trace_array *tr) | |||
| 41 | for_each_cpu_mask(cpu, cpu_possible_map) | 185 | for_each_cpu_mask(cpu, cpu_possible_map) |
| 42 | tracing_reset(tr, cpu); | 186 | tracing_reset(tr, cpu); |
| 43 | 187 | ||
| 44 | kmem_tracing_enabled = true; | 188 | kmemtrace_start_probes(); |
| 45 | 189 | ||
| 46 | return 0; | 190 | return 0; |
| 47 | } | 191 | } |
| 48 | 192 | ||
| 49 | static void kmem_trace_reset(struct trace_array *tr) | 193 | static void kmem_trace_reset(struct trace_array *tr) |
| 50 | { | 194 | { |
| 51 | kmem_tracing_enabled = false; | 195 | kmemtrace_stop_probes(); |
| 52 | } | 196 | } |
| 53 | 197 | ||
| 54 | static void kmemtrace_headers(struct seq_file *s) | 198 | static void kmemtrace_headers(struct seq_file *s) |
| @@ -66,47 +210,84 @@ static void kmemtrace_headers(struct seq_file *s) | |||
| 66 | } | 210 | } |
| 67 | 211 | ||
| 68 | /* | 212 | /* |
| 69 | * The two following functions give the original output from kmemtrace, | 213 | * The following functions give the original output from kmemtrace, |
| 70 | * or something close to....perhaps they need some missing things | 214 | * plus the origin CPU, since reordering occurs in-kernel now. |
| 71 | */ | 215 | */ |
| 216 | |||
| 217 | #define KMEMTRACE_USER_ALLOC 0 | ||
| 218 | #define KMEMTRACE_USER_FREE 1 | ||
| 219 | |||
| 220 | struct kmemtrace_user_event { | ||
| 221 | u8 event_id; | ||
| 222 | u8 type_id; | ||
| 223 | u16 event_size; | ||
| 224 | u32 cpu; | ||
| 225 | u64 timestamp; | ||
| 226 | unsigned long call_site; | ||
| 227 | unsigned long ptr; | ||
| 228 | }; | ||
| 229 | |||
| 230 | struct kmemtrace_user_event_alloc { | ||
| 231 | size_t bytes_req; | ||
| 232 | size_t bytes_alloc; | ||
| 233 | unsigned gfp_flags; | ||
| 234 | int node; | ||
| 235 | }; | ||
| 236 | |||
| 72 | static enum print_line_t | 237 | static enum print_line_t |
| 73 | kmemtrace_print_alloc_original(struct trace_iterator *iter, | 238 | kmemtrace_print_alloc_user(struct trace_iterator *iter, |
| 74 | struct kmemtrace_alloc_entry *entry) | 239 | struct kmemtrace_alloc_entry *entry) |
| 75 | { | 240 | { |
| 241 | struct kmemtrace_user_event_alloc *ev_alloc; | ||
| 76 | struct trace_seq *s = &iter->seq; | 242 | struct trace_seq *s = &iter->seq; |
| 77 | int ret; | 243 | struct kmemtrace_user_event *ev; |
| 244 | |||
| 245 | ev = trace_seq_reserve(s, sizeof(*ev)); | ||
| 246 | if (!ev) | ||
| 247 | return TRACE_TYPE_PARTIAL_LINE; | ||
| 78 | 248 | ||
| 79 | /* Taken from the old linux/kmemtrace.h */ | 249 | ev->event_id = KMEMTRACE_USER_ALLOC; |
| 80 | ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu " | 250 | ev->type_id = entry->type_id; |
| 81 | "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", | 251 | ev->event_size = sizeof(*ev) + sizeof(*ev_alloc); |
| 82 | entry->type_id, entry->call_site, (unsigned long) entry->ptr, | 252 | ev->cpu = iter->cpu; |
| 83 | (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc, | 253 | ev->timestamp = iter->ts; |
| 84 | (unsigned long) entry->gfp_flags, entry->node); | 254 | ev->call_site = entry->call_site; |
| 255 | ev->ptr = (unsigned long)entry->ptr; | ||
| 85 | 256 | ||
| 86 | if (!ret) | 257 | ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc)); |
| 258 | if (!ev_alloc) | ||
| 87 | return TRACE_TYPE_PARTIAL_LINE; | 259 | return TRACE_TYPE_PARTIAL_LINE; |
| 88 | 260 | ||
| 261 | ev_alloc->bytes_req = entry->bytes_req; | ||
| 262 | ev_alloc->bytes_alloc = entry->bytes_alloc; | ||
| 263 | ev_alloc->gfp_flags = entry->gfp_flags; | ||
| 264 | ev_alloc->node = entry->node; | ||
| 265 | |||
| 89 | return TRACE_TYPE_HANDLED; | 266 | return TRACE_TYPE_HANDLED; |
| 90 | } | 267 | } |
| 91 | 268 | ||
| 92 | static enum print_line_t | 269 | static enum print_line_t |
| 93 | kmemtrace_print_free_original(struct trace_iterator *iter, | 270 | kmemtrace_print_free_user(struct trace_iterator *iter, |
| 94 | struct kmemtrace_free_entry *entry) | 271 | struct kmemtrace_free_entry *entry) |
| 95 | { | 272 | { |
| 96 | struct trace_seq *s = &iter->seq; | 273 | struct trace_seq *s = &iter->seq; |
| 97 | int ret; | 274 | struct kmemtrace_user_event *ev; |
| 98 | 275 | ||
| 99 | /* Taken from the old linux/kmemtrace.h */ | 276 | ev = trace_seq_reserve(s, sizeof(*ev)); |
| 100 | ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n", | 277 | if (!ev) |
| 101 | entry->type_id, entry->call_site, (unsigned long) entry->ptr); | ||
| 102 | |||
| 103 | if (!ret) | ||
| 104 | return TRACE_TYPE_PARTIAL_LINE; | 278 | return TRACE_TYPE_PARTIAL_LINE; |
| 105 | 279 | ||
| 280 | ev->event_id = KMEMTRACE_USER_FREE; | ||
| 281 | ev->type_id = entry->type_id; | ||
| 282 | ev->event_size = sizeof(*ev); | ||
| 283 | ev->cpu = iter->cpu; | ||
| 284 | ev->timestamp = iter->ts; | ||
| 285 | ev->call_site = entry->call_site; | ||
| 286 | ev->ptr = (unsigned long)entry->ptr; | ||
| 287 | |||
| 106 | return TRACE_TYPE_HANDLED; | 288 | return TRACE_TYPE_HANDLED; |
| 107 | } | 289 | } |
| 108 | 290 | ||
| 109 | |||
| 110 | /* The two other following provide a more minimalistic output */ | 291 | /* The two other following provide a more minimalistic output */ |
| 111 | static enum print_line_t | 292 | static enum print_line_t |
| 112 | kmemtrace_print_alloc_compress(struct trace_iterator *iter, | 293 | kmemtrace_print_alloc_compress(struct trace_iterator *iter, |
| @@ -178,7 +359,7 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter, | |||
| 178 | 359 | ||
| 179 | static enum print_line_t | 360 | static enum print_line_t |
| 180 | kmemtrace_print_free_compress(struct trace_iterator *iter, | 361 | kmemtrace_print_free_compress(struct trace_iterator *iter, |
| 181 | struct kmemtrace_free_entry *entry) | 362 | struct kmemtrace_free_entry *entry) |
| 182 | { | 363 | { |
| 183 | struct trace_seq *s = &iter->seq; | 364 | struct trace_seq *s = &iter->seq; |
| 184 | int ret; | 365 | int ret; |
| @@ -239,20 +420,22 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) | |||
| 239 | switch (entry->type) { | 420 | switch (entry->type) { |
| 240 | case TRACE_KMEM_ALLOC: { | 421 | case TRACE_KMEM_ALLOC: { |
| 241 | struct kmemtrace_alloc_entry *field; | 422 | struct kmemtrace_alloc_entry *field; |
| 423 | |||
| 242 | trace_assign_type(field, entry); | 424 | trace_assign_type(field, entry); |
| 243 | if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) | 425 | if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) |
| 244 | return kmemtrace_print_alloc_compress(iter, field); | 426 | return kmemtrace_print_alloc_compress(iter, field); |
| 245 | else | 427 | else |
| 246 | return kmemtrace_print_alloc_original(iter, field); | 428 | return kmemtrace_print_alloc_user(iter, field); |
| 247 | } | 429 | } |
| 248 | 430 | ||
| 249 | case TRACE_KMEM_FREE: { | 431 | case TRACE_KMEM_FREE: { |
| 250 | struct kmemtrace_free_entry *field; | 432 | struct kmemtrace_free_entry *field; |
| 433 | |||
| 251 | trace_assign_type(field, entry); | 434 | trace_assign_type(field, entry); |
| 252 | if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) | 435 | if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) |
| 253 | return kmemtrace_print_free_compress(iter, field); | 436 | return kmemtrace_print_free_compress(iter, field); |
| 254 | else | 437 | else |
| 255 | return kmemtrace_print_free_original(iter, field); | 438 | return kmemtrace_print_free_user(iter, field); |
| 256 | } | 439 | } |
| 257 | 440 | ||
| 258 | default: | 441 | default: |
| @@ -260,70 +443,13 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) | |||
| 260 | } | 443 | } |
| 261 | } | 444 | } |
| 262 | 445 | ||
| 263 | /* Trace allocations */ | ||
| 264 | void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, | ||
| 265 | unsigned long call_site, | ||
| 266 | const void *ptr, | ||
| 267 | size_t bytes_req, | ||
| 268 | size_t bytes_alloc, | ||
| 269 | gfp_t gfp_flags, | ||
| 270 | int node) | ||
| 271 | { | ||
| 272 | struct ring_buffer_event *event; | ||
| 273 | struct kmemtrace_alloc_entry *entry; | ||
| 274 | struct trace_array *tr = kmemtrace_array; | ||
| 275 | |||
| 276 | if (!kmem_tracing_enabled) | ||
| 277 | return; | ||
| 278 | |||
| 279 | event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC, | ||
| 280 | sizeof(*entry), 0, 0); | ||
| 281 | if (!event) | ||
| 282 | return; | ||
| 283 | entry = ring_buffer_event_data(event); | ||
| 284 | |||
| 285 | entry->call_site = call_site; | ||
| 286 | entry->ptr = ptr; | ||
| 287 | entry->bytes_req = bytes_req; | ||
| 288 | entry->bytes_alloc = bytes_alloc; | ||
| 289 | entry->gfp_flags = gfp_flags; | ||
| 290 | entry->node = node; | ||
| 291 | |||
| 292 | trace_buffer_unlock_commit(tr, event, 0, 0); | ||
| 293 | } | ||
| 294 | EXPORT_SYMBOL(kmemtrace_mark_alloc_node); | ||
| 295 | |||
| 296 | void kmemtrace_mark_free(enum kmemtrace_type_id type_id, | ||
| 297 | unsigned long call_site, | ||
| 298 | const void *ptr) | ||
| 299 | { | ||
| 300 | struct ring_buffer_event *event; | ||
| 301 | struct kmemtrace_free_entry *entry; | ||
| 302 | struct trace_array *tr = kmemtrace_array; | ||
| 303 | |||
| 304 | if (!kmem_tracing_enabled) | ||
| 305 | return; | ||
| 306 | |||
| 307 | event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE, | ||
| 308 | sizeof(*entry), 0, 0); | ||
| 309 | if (!event) | ||
| 310 | return; | ||
| 311 | entry = ring_buffer_event_data(event); | ||
| 312 | entry->type_id = type_id; | ||
| 313 | entry->call_site = call_site; | ||
| 314 | entry->ptr = ptr; | ||
| 315 | |||
| 316 | trace_buffer_unlock_commit(tr, event, 0, 0); | ||
| 317 | } | ||
| 318 | EXPORT_SYMBOL(kmemtrace_mark_free); | ||
| 319 | |||
| 320 | static struct tracer kmem_tracer __read_mostly = { | 446 | static struct tracer kmem_tracer __read_mostly = { |
| 321 | .name = "kmemtrace", | 447 | .name = "kmemtrace", |
| 322 | .init = kmem_trace_init, | 448 | .init = kmem_trace_init, |
| 323 | .reset = kmem_trace_reset, | 449 | .reset = kmem_trace_reset, |
| 324 | .print_line = kmemtrace_print_line, | 450 | .print_line = kmemtrace_print_line, |
| 325 | .print_header = kmemtrace_headers, | 451 | .print_header = kmemtrace_headers, |
| 326 | .flags = &kmem_tracer_flags | 452 | .flags = &kmem_tracer_flags |
| 327 | }; | 453 | }; |
| 328 | 454 | ||
| 329 | void kmemtrace_init(void) | 455 | void kmemtrace_init(void) |
| @@ -335,5 +461,4 @@ static int __init init_kmem_tracer(void) | |||
| 335 | { | 461 | { |
| 336 | return register_tracer(&kmem_tracer); | 462 | return register_tracer(&kmem_tracer); |
| 337 | } | 463 | } |
| 338 | |||
| 339 | device_initcall(init_kmem_tracer); | 464 | device_initcall(init_kmem_tracer); |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0174a40c563..1ce5dc6372b8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/percpu.h> | 30 | #include <linux/percpu.h> |
| 31 | #include <linux/splice.h> | 31 | #include <linux/splice.h> |
| 32 | #include <linux/kdebug.h> | 32 | #include <linux/kdebug.h> |
| 33 | #include <linux/string.h> | ||
| 33 | #include <linux/ctype.h> | 34 | #include <linux/ctype.h> |
| 34 | #include <linux/init.h> | 35 | #include <linux/init.h> |
| 35 | #include <linux/poll.h> | 36 | #include <linux/poll.h> |
| @@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str) | |||
| 147 | } | 148 | } |
| 148 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | 149 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); |
| 149 | 150 | ||
| 150 | long | 151 | unsigned long long ns2usecs(cycle_t nsec) |
| 151 | ns2usecs(cycle_t nsec) | ||
| 152 | { | 152 | { |
| 153 | nsec += 500; | 153 | nsec += 500; |
| 154 | do_div(nsec, 1000); | 154 | do_div(nsec, 1000); |
| @@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter) | |||
| 1632 | return; | 1632 | return; |
| 1633 | 1633 | ||
| 1634 | cpumask_set_cpu(iter->cpu, iter->started); | 1634 | cpumask_set_cpu(iter->cpu, iter->started); |
| 1635 | trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); | 1635 | |
| 1636 | /* Don't print started cpu buffer for the first entry of the trace */ | ||
| 1637 | if (iter->idx > 1) | ||
| 1638 | trace_seq_printf(s, "##### CPU %u buffer started ####\n", | ||
| 1639 | iter->cpu); | ||
| 1636 | } | 1640 | } |
| 1637 | 1641 | ||
| 1638 | static enum print_line_t print_trace_fmt(struct trace_iterator *iter) | 1642 | static enum print_line_t print_trace_fmt(struct trace_iterator *iter) |
| @@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file) | |||
| 1867 | if (current_trace) | 1871 | if (current_trace) |
| 1868 | *iter->trace = *current_trace; | 1872 | *iter->trace = *current_trace; |
| 1869 | 1873 | ||
| 1874 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) | ||
| 1875 | goto fail; | ||
| 1876 | |||
| 1877 | cpumask_clear(iter->started); | ||
| 1878 | |||
| 1870 | if (current_trace && current_trace->print_max) | 1879 | if (current_trace && current_trace->print_max) |
| 1871 | iter->tr = &max_tr; | 1880 | iter->tr = &max_tr; |
| 1872 | else | 1881 | else |
| @@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file) | |||
| 1917 | if (iter->buffer_iter[cpu]) | 1926 | if (iter->buffer_iter[cpu]) |
| 1918 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 1927 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
| 1919 | } | 1928 | } |
| 1929 | free_cpumask_var(iter->started); | ||
| 1920 | fail: | 1930 | fail: |
| 1921 | mutex_unlock(&trace_types_lock); | 1931 | mutex_unlock(&trace_types_lock); |
| 1922 | kfree(iter->trace); | 1932 | kfree(iter->trace); |
| @@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
| 1960 | 1970 | ||
| 1961 | seq_release(inode, file); | 1971 | seq_release(inode, file); |
| 1962 | mutex_destroy(&iter->mutex); | 1972 | mutex_destroy(&iter->mutex); |
| 1973 | free_cpumask_var(iter->started); | ||
| 1963 | kfree(iter->trace); | 1974 | kfree(iter->trace); |
| 1964 | kfree(iter); | 1975 | kfree(iter); |
| 1965 | return 0; | 1976 | return 0; |
| @@ -2358,9 +2369,9 @@ static const char readme_msg[] = | |||
| 2358 | "# mkdir /debug\n" | 2369 | "# mkdir /debug\n" |
| 2359 | "# mount -t debugfs nodev /debug\n\n" | 2370 | "# mount -t debugfs nodev /debug\n\n" |
| 2360 | "# cat /debug/tracing/available_tracers\n" | 2371 | "# cat /debug/tracing/available_tracers\n" |
| 2361 | "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" | 2372 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" |
| 2362 | "# cat /debug/tracing/current_tracer\n" | 2373 | "# cat /debug/tracing/current_tracer\n" |
| 2363 | "none\n" | 2374 | "nop\n" |
| 2364 | "# echo sched_switch > /debug/tracing/current_tracer\n" | 2375 | "# echo sched_switch > /debug/tracing/current_tracer\n" |
| 2365 | "# cat /debug/tracing/current_tracer\n" | 2376 | "# cat /debug/tracing/current_tracer\n" |
| 2366 | "sched_switch\n" | 2377 | "sched_switch\n" |
| @@ -3266,19 +3277,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
| 3266 | 3277 | ||
| 3267 | info->tr = &global_trace; | 3278 | info->tr = &global_trace; |
| 3268 | info->cpu = cpu; | 3279 | info->cpu = cpu; |
| 3269 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | 3280 | info->spare = NULL; |
| 3270 | /* Force reading ring buffer for first read */ | 3281 | /* Force reading ring buffer for first read */ |
| 3271 | info->read = (unsigned int)-1; | 3282 | info->read = (unsigned int)-1; |
| 3272 | if (!info->spare) | ||
| 3273 | goto out; | ||
| 3274 | 3283 | ||
| 3275 | filp->private_data = info; | 3284 | filp->private_data = info; |
| 3276 | 3285 | ||
| 3277 | return 0; | 3286 | return nonseekable_open(inode, filp); |
| 3278 | |||
| 3279 | out: | ||
| 3280 | kfree(info); | ||
| 3281 | return -ENOMEM; | ||
| 3282 | } | 3287 | } |
| 3283 | 3288 | ||
| 3284 | static ssize_t | 3289 | static ssize_t |
| @@ -3293,6 +3298,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
| 3293 | if (!count) | 3298 | if (!count) |
| 3294 | return 0; | 3299 | return 0; |
| 3295 | 3300 | ||
| 3301 | if (!info->spare) | ||
| 3302 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | ||
| 3303 | if (!info->spare) | ||
| 3304 | return -ENOMEM; | ||
| 3305 | |||
| 3296 | /* Do we have previous read data to read? */ | 3306 | /* Do we have previous read data to read? */ |
| 3297 | if (info->read < PAGE_SIZE) | 3307 | if (info->read < PAGE_SIZE) |
| 3298 | goto read; | 3308 | goto read; |
| @@ -3331,7 +3341,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) | |||
| 3331 | { | 3341 | { |
| 3332 | struct ftrace_buffer_info *info = file->private_data; | 3342 | struct ftrace_buffer_info *info = file->private_data; |
| 3333 | 3343 | ||
| 3334 | ring_buffer_free_read_page(info->tr->buffer, info->spare); | 3344 | if (info->spare) |
| 3345 | ring_buffer_free_read_page(info->tr->buffer, info->spare); | ||
| 3335 | kfree(info); | 3346 | kfree(info); |
| 3336 | 3347 | ||
| 3337 | return 0; | 3348 | return 0; |
| @@ -3417,14 +3428,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3417 | int size, i; | 3428 | int size, i; |
| 3418 | size_t ret; | 3429 | size_t ret; |
| 3419 | 3430 | ||
| 3420 | /* | 3431 | if (*ppos & (PAGE_SIZE - 1)) { |
| 3421 | * We can't seek on a buffer input | 3432 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); |
| 3422 | */ | 3433 | return -EINVAL; |
| 3423 | if (unlikely(*ppos)) | 3434 | } |
| 3424 | return -ESPIPE; | ||
| 3425 | 3435 | ||
| 3436 | if (len & (PAGE_SIZE - 1)) { | ||
| 3437 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); | ||
| 3438 | if (len < PAGE_SIZE) | ||
| 3439 | return -EINVAL; | ||
| 3440 | len &= PAGE_MASK; | ||
| 3441 | } | ||
| 3426 | 3442 | ||
| 3427 | for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { | 3443 | for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { |
| 3428 | struct page *page; | 3444 | struct page *page; |
| 3429 | int r; | 3445 | int r; |
| 3430 | 3446 | ||
| @@ -3463,6 +3479,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
| 3463 | spd.partial[i].offset = 0; | 3479 | spd.partial[i].offset = 0; |
| 3464 | spd.partial[i].private = (unsigned long)ref; | 3480 | spd.partial[i].private = (unsigned long)ref; |
| 3465 | spd.nr_pages++; | 3481 | spd.nr_pages++; |
| 3482 | *ppos += PAGE_SIZE; | ||
| 3466 | } | 3483 | } |
| 3467 | 3484 | ||
| 3468 | spd.nr_pages = i; | 3485 | spd.nr_pages = i; |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb0ce3fc36d3..e685ac2b2ba1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -182,6 +182,12 @@ struct trace_power { | |||
| 182 | struct power_trace state_data; | 182 | struct power_trace state_data; |
| 183 | }; | 183 | }; |
| 184 | 184 | ||
| 185 | enum kmemtrace_type_id { | ||
| 186 | KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */ | ||
| 187 | KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */ | ||
| 188 | KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */ | ||
| 189 | }; | ||
| 190 | |||
| 185 | struct kmemtrace_alloc_entry { | 191 | struct kmemtrace_alloc_entry { |
| 186 | struct trace_entry ent; | 192 | struct trace_entry ent; |
| 187 | enum kmemtrace_type_id type_id; | 193 | enum kmemtrace_type_id type_id; |
| @@ -596,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace, | |||
| 596 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 602 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
| 597 | 603 | ||
| 598 | extern void *head_page(struct trace_array_cpu *data); | 604 | extern void *head_page(struct trace_array_cpu *data); |
| 599 | extern long ns2usecs(cycle_t nsec); | 605 | extern unsigned long long ns2usecs(cycle_t nsec); |
| 600 | extern int | 606 | extern int |
| 601 | trace_vbprintk(unsigned long ip, const char *fmt, va_list args); | 607 | trace_vbprintk(unsigned long ip, const char *fmt, va_list args); |
| 602 | extern int | 608 | extern int |
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index ad8c22efff41..8333715e4066 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c | |||
| @@ -155,6 +155,13 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, | |||
| 155 | return TRACE_TYPE_HANDLED; | 155 | return TRACE_TYPE_HANDLED; |
| 156 | } | 156 | } |
| 157 | 157 | ||
| 158 | static void branch_print_header(struct seq_file *s) | ||
| 159 | { | ||
| 160 | seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT" | ||
| 161 | " FUNC:FILE:LINE\n"); | ||
| 162 | seq_puts(s, "# | | | | | " | ||
| 163 | " |\n"); | ||
| 164 | } | ||
| 158 | 165 | ||
| 159 | static struct trace_event trace_branch_event = { | 166 | static struct trace_event trace_branch_event = { |
| 160 | .type = TRACE_BRANCH, | 167 | .type = TRACE_BRANCH, |
| @@ -169,6 +176,7 @@ static struct tracer branch_trace __read_mostly = | |||
| 169 | #ifdef CONFIG_FTRACE_SELFTEST | 176 | #ifdef CONFIG_FTRACE_SELFTEST |
| 170 | .selftest = trace_selftest_startup_branch, | 177 | .selftest = trace_selftest_startup_branch, |
| 171 | #endif /* CONFIG_FTRACE_SELFTEST */ | 178 | #endif /* CONFIG_FTRACE_SELFTEST */ |
| 179 | .print_header = branch_print_header, | ||
| 172 | }; | 180 | }; |
| 173 | 181 | ||
| 174 | __init static int init_branch_tracer(void) | 182 | __init static int init_branch_tracer(void) |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64ec4d278ffb..576f4fa2af0d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 503 | 503 | ||
| 504 | if (copy_from_user(&buf, ubuf, cnt)) | 504 | if (copy_from_user(&buf, ubuf, cnt)) |
| 505 | return -EFAULT; | 505 | return -EFAULT; |
| 506 | buf[cnt] = '\0'; | ||
| 506 | 507 | ||
| 507 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | 508 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); |
| 508 | if (!pred) | 509 | if (!pred) |
| @@ -520,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 520 | return cnt; | 521 | return cnt; |
| 521 | } | 522 | } |
| 522 | 523 | ||
| 523 | if (filter_add_pred(call, pred)) { | 524 | err = filter_add_pred(call, pred); |
| 525 | if (err < 0) { | ||
| 524 | filter_free_pred(pred); | 526 | filter_free_pred(pred); |
| 525 | return -EINVAL; | 527 | return err; |
| 526 | } | 528 | } |
| 527 | 529 | ||
| 528 | *ppos += cnt; | 530 | *ppos += cnt; |
| @@ -569,6 +571,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 569 | 571 | ||
| 570 | if (copy_from_user(&buf, ubuf, cnt)) | 572 | if (copy_from_user(&buf, ubuf, cnt)) |
| 571 | return -EFAULT; | 573 | return -EFAULT; |
| 574 | buf[cnt] = '\0'; | ||
| 572 | 575 | ||
| 573 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); | 576 | pred = kzalloc(sizeof(*pred), GFP_KERNEL); |
| 574 | if (!pred) | 577 | if (!pred) |
| @@ -586,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, | |||
| 586 | return cnt; | 589 | return cnt; |
| 587 | } | 590 | } |
| 588 | 591 | ||
| 589 | if (filter_add_subsystem_pred(system, pred)) { | 592 | err = filter_add_subsystem_pred(system, pred); |
| 593 | if (err < 0) { | ||
| 590 | filter_free_subsystem_preds(system); | 594 | filter_free_subsystem_preds(system); |
| 591 | filter_free_pred(pred); | 595 | filter_free_pred(pred); |
| 592 | return -EINVAL; | 596 | return err; |
| 593 | } | 597 | } |
| 594 | 598 | ||
| 595 | *ppos += cnt; | 599 | *ppos += cnt; |
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 026be412f356..e03cbf1e38f3 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
| @@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call, | |||
| 215 | } | 215 | } |
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | return -ENOMEM; | 218 | return -ENOSPC; |
| 219 | } | 219 | } |
| 220 | 220 | ||
| 221 | static int is_string_field(const char *type) | 221 | static int is_string_field(const char *type) |
| @@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, | |||
| 319 | } | 319 | } |
| 320 | 320 | ||
| 321 | if (i == MAX_FILTER_PRED) | 321 | if (i == MAX_FILTER_PRED) |
| 322 | return -EINVAL; | 322 | return -ENOSPC; |
| 323 | 323 | ||
| 324 | events_for_each(call) { | 324 | events_for_each(call) { |
| 325 | int err; | 325 | int err; |
| @@ -410,16 +410,22 @@ int filter_parse(char **pbuf, struct filter_pred *pred) | |||
| 410 | } | 410 | } |
| 411 | } | 411 | } |
| 412 | 412 | ||
| 413 | if (!val_str) { | ||
| 414 | pred->field_name = NULL; | ||
| 415 | return -EINVAL; | ||
| 416 | } | ||
| 417 | |||
| 413 | pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); | 418 | pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); |
| 414 | if (!pred->field_name) | 419 | if (!pred->field_name) |
| 415 | return -ENOMEM; | 420 | return -ENOMEM; |
| 416 | 421 | ||
| 417 | pred->val = simple_strtoull(val_str, &tmp, 10); | 422 | pred->val = simple_strtoull(val_str, &tmp, 0); |
| 418 | if (tmp == val_str) { | 423 | if (tmp == val_str) { |
| 419 | pred->str_val = kstrdup(val_str, GFP_KERNEL); | 424 | pred->str_val = kstrdup(val_str, GFP_KERNEL); |
| 420 | if (!pred->str_val) | 425 | if (!pred->str_val) |
| 421 | return -ENOMEM; | 426 | return -ENOMEM; |
| 422 | } | 427 | } else if (*tmp != '\0') |
| 428 | return -EINVAL; | ||
| 423 | 429 | ||
| 424 | return 0; | 430 | return 0; |
| 425 | } | 431 | } |
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 30743f7d4110..d363c6672c6c 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h | |||
| @@ -105,10 +105,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ | |||
| 105 | return 0; | 105 | return 0; |
| 106 | 106 | ||
| 107 | #undef __entry | 107 | #undef __entry |
| 108 | #define __entry "REC" | 108 | #define __entry REC |
| 109 | 109 | ||
| 110 | #undef TP_printk | 110 | #undef TP_printk |
| 111 | #define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args | 111 | #define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) |
| 112 | 112 | ||
| 113 | #undef TP_fast_assign | 113 | #undef TP_fast_assign |
| 114 | #define TP_fast_assign(args...) args | 114 | #define TP_fast_assign(args...) args |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4d9952d3df50..07a22c33ebf3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
| @@ -40,7 +40,7 @@ | |||
| 40 | 40 | ||
| 41 | #undef TRACE_FIELD_ZERO_CHAR | 41 | #undef TRACE_FIELD_ZERO_CHAR |
| 42 | #define TRACE_FIELD_ZERO_CHAR(item) \ | 42 | #define TRACE_FIELD_ZERO_CHAR(item) \ |
| 43 | ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ | 43 | ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ |
| 44 | "offset:%u;\tsize:0;\n", \ | 44 | "offset:%u;\tsize:0;\n", \ |
| 45 | (unsigned int)offsetof(typeof(field), item)); \ | 45 | (unsigned int)offsetof(typeof(field), item)); \ |
| 46 | if (!ret) \ | 46 | if (!ret) \ |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d72b9a63b247..64b54a59c55b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter) | |||
| 423 | 423 | ||
| 424 | trace_find_cmdline(entry->pid, comm); | 424 | trace_find_cmdline(entry->pid, comm); |
| 425 | 425 | ||
| 426 | ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" | 426 | ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" |
| 427 | " %ld.%03ldms (+%ld.%03ldms): ", comm, | 427 | " %ld.%03ldms (+%ld.%03ldms): ", comm, |
| 428 | entry->pid, iter->cpu, entry->flags, | 428 | entry->pid, iter->cpu, entry->flags, |
| 429 | entry->preempt_count, iter->idx, | 429 | entry->preempt_count, iter->idx, |
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index bae791ebcc51..118439709fb7 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c | |||
| @@ -186,6 +186,12 @@ static enum print_line_t power_print_line(struct trace_iterator *iter) | |||
| 186 | return TRACE_TYPE_UNHANDLED; | 186 | return TRACE_TYPE_UNHANDLED; |
| 187 | } | 187 | } |
| 188 | 188 | ||
| 189 | static void power_print_header(struct seq_file *s) | ||
| 190 | { | ||
| 191 | seq_puts(s, "# TIMESTAMP STATE EVENT\n"); | ||
| 192 | seq_puts(s, "# | | |\n"); | ||
| 193 | } | ||
| 194 | |||
| 189 | static struct tracer power_tracer __read_mostly = | 195 | static struct tracer power_tracer __read_mostly = |
| 190 | { | 196 | { |
| 191 | .name = "power", | 197 | .name = "power", |
| @@ -194,6 +200,7 @@ static struct tracer power_tracer __read_mostly = | |||
| 194 | .stop = stop_power_trace, | 200 | .stop = stop_power_trace, |
| 195 | .reset = power_trace_reset, | 201 | .reset = power_trace_reset, |
| 196 | .print_line = power_print_line, | 202 | .print_line = power_print_line, |
| 203 | .print_header = power_print_header, | ||
| 197 | }; | 204 | }; |
| 198 | 205 | ||
| 199 | static int init_power_trace(void) | 206 | static int init_power_trace(void) |
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index de35f200abd3..9117cea6f1ae 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c | |||
| @@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) | |||
| 62 | pc = preempt_count(); | 62 | pc = preempt_count(); |
| 63 | tracing_record_cmdline(current); | 63 | tracing_record_cmdline(current); |
| 64 | 64 | ||
| 65 | if (sched_stopped) | ||
| 66 | return; | ||
| 67 | |||
| 65 | local_irq_save(flags); | 68 | local_irq_save(flags); |
| 66 | cpu = raw_smp_processor_id(); | 69 | cpu = raw_smp_processor_id(); |
| 67 | data = ctx_trace->data[cpu]; | 70 | data = ctx_trace->data[cpu]; |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3c5ad6b2ec84..5bc00e8f153e 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
| @@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | |||
| 154 | if (unlikely(!tracer_enabled || next != wakeup_task)) | 154 | if (unlikely(!tracer_enabled || next != wakeup_task)) |
| 155 | goto out_unlock; | 155 | goto out_unlock; |
| 156 | 156 | ||
| 157 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 157 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
| 158 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 158 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
| 159 | 159 | ||
| 160 | /* | 160 | /* |
| @@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) | |||
| 257 | data = wakeup_trace->data[wakeup_cpu]; | 257 | data = wakeup_trace->data[wakeup_cpu]; |
| 258 | data->preempt_timestamp = ftrace_now(cpu); | 258 | data->preempt_timestamp = ftrace_now(cpu); |
| 259 | tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); | 259 | tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); |
| 260 | |||
| 261 | /* | ||
| 262 | * We must be careful in using CALLER_ADDR2. But since wake_up | ||
| 263 | * is not called by an assembly function (where as schedule is) | ||
| 264 | * it should be safe to use it here. | ||
| 265 | */ | ||
| 260 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 266 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); |
| 261 | 267 | ||
| 262 | out_locked: | 268 | out_locked: |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a2a3af29c943..5e579645ac86 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | #include <trace/syscall.h> | ||
| 1 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
| 2 | #include <linux/ftrace.h> | ||
| 3 | #include <asm/syscall.h> | 3 | #include <asm/syscall.h> |
| 4 | 4 | ||
| 5 | #include "trace_output.h" | 5 | #include "trace_output.h" |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b6b966ce1451..f71fb2a08950 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -966,20 +966,20 @@ undo: | |||
| 966 | } | 966 | } |
| 967 | 967 | ||
| 968 | #ifdef CONFIG_SMP | 968 | #ifdef CONFIG_SMP |
| 969 | static struct workqueue_struct *work_on_cpu_wq __read_mostly; | ||
| 970 | 969 | ||
| 971 | struct work_for_cpu { | 970 | struct work_for_cpu { |
| 972 | struct work_struct work; | 971 | struct completion completion; |
| 973 | long (*fn)(void *); | 972 | long (*fn)(void *); |
| 974 | void *arg; | 973 | void *arg; |
| 975 | long ret; | 974 | long ret; |
| 976 | }; | 975 | }; |
| 977 | 976 | ||
| 978 | static void do_work_for_cpu(struct work_struct *w) | 977 | static int do_work_for_cpu(void *_wfc) |
| 979 | { | 978 | { |
| 980 | struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); | 979 | struct work_for_cpu *wfc = _wfc; |
| 981 | |||
| 982 | wfc->ret = wfc->fn(wfc->arg); | 980 | wfc->ret = wfc->fn(wfc->arg); |
| 981 | complete(&wfc->completion); | ||
| 982 | return 0; | ||
| 983 | } | 983 | } |
| 984 | 984 | ||
| 985 | /** | 985 | /** |
| @@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w) | |||
| 990 | * | 990 | * |
| 991 | * This will return the value @fn returns. | 991 | * This will return the value @fn returns. |
| 992 | * It is up to the caller to ensure that the cpu doesn't go offline. | 992 | * It is up to the caller to ensure that the cpu doesn't go offline. |
| 993 | * The caller must not hold any locks which would prevent @fn from completing. | ||
| 993 | */ | 994 | */ |
| 994 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) | 995 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) |
| 995 | { | 996 | { |
| 996 | struct work_for_cpu wfc; | 997 | struct task_struct *sub_thread; |
| 997 | 998 | struct work_for_cpu wfc = { | |
| 998 | INIT_WORK(&wfc.work, do_work_for_cpu); | 999 | .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), |
| 999 | wfc.fn = fn; | 1000 | .fn = fn, |
| 1000 | wfc.arg = arg; | 1001 | .arg = arg, |
| 1001 | queue_work_on(cpu, work_on_cpu_wq, &wfc.work); | 1002 | }; |
| 1002 | flush_work(&wfc.work); | 1003 | |
| 1003 | 1004 | sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); | |
| 1005 | if (IS_ERR(sub_thread)) | ||
| 1006 | return PTR_ERR(sub_thread); | ||
| 1007 | kthread_bind(sub_thread, cpu); | ||
| 1008 | wake_up_process(sub_thread); | ||
| 1009 | wait_for_completion(&wfc.completion); | ||
| 1004 | return wfc.ret; | 1010 | return wfc.ret; |
| 1005 | } | 1011 | } |
| 1006 | EXPORT_SYMBOL_GPL(work_on_cpu); | 1012 | EXPORT_SYMBOL_GPL(work_on_cpu); |
| @@ -1016,8 +1022,4 @@ void __init init_workqueues(void) | |||
| 1016 | hotcpu_notifier(workqueue_cpu_callback, 0); | 1022 | hotcpu_notifier(workqueue_cpu_callback, 0); |
| 1017 | keventd_wq = create_workqueue("events"); | 1023 | keventd_wq = create_workqueue("events"); |
| 1018 | BUG_ON(!keventd_wq); | 1024 | BUG_ON(!keventd_wq); |
| 1019 | #ifdef CONFIG_SMP | ||
| 1020 | work_on_cpu_wq = create_workqueue("work_on_cpu"); | ||
| 1021 | BUG_ON(!work_on_cpu_wq); | ||
| 1022 | #endif | ||
| 1023 | } | 1025 | } |
