diff options
Diffstat (limited to 'kernel')
38 files changed, 691 insertions, 527 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 21c7fa615bd3..91e53d04b6a9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1056,7 +1056,7 @@ static inline void audit_get_stamp(struct audit_context *ctx, | |||
1056 | static void wait_for_auditd(unsigned long sleep_time) | 1056 | static void wait_for_auditd(unsigned long sleep_time) |
1057 | { | 1057 | { |
1058 | DECLARE_WAITQUEUE(wait, current); | 1058 | DECLARE_WAITQUEUE(wait, current); |
1059 | set_current_state(TASK_INTERRUPTIBLE); | 1059 | set_current_state(TASK_UNINTERRUPTIBLE); |
1060 | add_wait_queue(&audit_backlog_wait, &wait); | 1060 | add_wait_queue(&audit_backlog_wait, &wait); |
1061 | 1061 | ||
1062 | if (audit_backlog_limit && | 1062 | if (audit_backlog_limit && |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index a291aa23fb3f..43c307dc9453 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -658,6 +658,7 @@ int audit_add_tree_rule(struct audit_krule *rule) | |||
658 | struct vfsmount *mnt; | 658 | struct vfsmount *mnt; |
659 | int err; | 659 | int err; |
660 | 660 | ||
661 | rule->tree = NULL; | ||
661 | list_for_each_entry(tree, &tree_list, list) { | 662 | list_for_each_entry(tree, &tree_list, list) { |
662 | if (!strcmp(seed->pathname, tree->pathname)) { | 663 | if (!strcmp(seed->pathname, tree->pathname)) { |
663 | put_tree(seed); | 664 | put_tree(seed); |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 83a2970295d1..6bd4a90d1991 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1021,9 +1021,6 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re | |||
1021 | * @seq: netlink audit message sequence (serial) number | 1021 | * @seq: netlink audit message sequence (serial) number |
1022 | * @data: payload data | 1022 | * @data: payload data |
1023 | * @datasz: size of payload data | 1023 | * @datasz: size of payload data |
1024 | * @loginuid: loginuid of sender | ||
1025 | * @sessionid: sessionid for netlink audit message | ||
1026 | * @sid: SE Linux Security ID of sender | ||
1027 | */ | 1024 | */ |
1028 | int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz) | 1025 | int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz) |
1029 | { | 1026 | { |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2a9926275f80..a7c9e6ddb979 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1686,11 +1686,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1686 | */ | 1686 | */ |
1687 | cgroup_drop_root(opts.new_root); | 1687 | cgroup_drop_root(opts.new_root); |
1688 | 1688 | ||
1689 | if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) && | 1689 | if (root->flags != opts.flags) { |
1690 | root->flags != opts.flags) { | 1690 | if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { |
1691 | pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); | 1691 | pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); |
1692 | ret = -EINVAL; | 1692 | ret = -EINVAL; |
1693 | goto drop_new_super; | 1693 | goto drop_new_super; |
1694 | } else { | ||
1695 | pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); | ||
1696 | } | ||
1694 | } | 1697 | } |
1695 | 1698 | ||
1696 | /* no subsys rebinding, so refcounts don't change */ | 1699 | /* no subsys rebinding, so refcounts don't change */ |
@@ -2699,13 +2702,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2699 | goto out; | 2702 | goto out; |
2700 | } | 2703 | } |
2701 | 2704 | ||
2705 | cfe->type = (void *)cft; | ||
2706 | cfe->dentry = dentry; | ||
2707 | dentry->d_fsdata = cfe; | ||
2708 | simple_xattrs_init(&cfe->xattrs); | ||
2709 | |||
2702 | mode = cgroup_file_mode(cft); | 2710 | mode = cgroup_file_mode(cft); |
2703 | error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); | 2711 | error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); |
2704 | if (!error) { | 2712 | if (!error) { |
2705 | cfe->type = (void *)cft; | ||
2706 | cfe->dentry = dentry; | ||
2707 | dentry->d_fsdata = cfe; | ||
2708 | simple_xattrs_init(&cfe->xattrs); | ||
2709 | list_add_tail(&cfe->node, &parent->files); | 2713 | list_add_tail(&cfe->node, &parent->files); |
2710 | cfe = NULL; | 2714 | cfe = NULL; |
2711 | } | 2715 | } |
@@ -2953,11 +2957,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
2953 | WARN_ON_ONCE(!rcu_read_lock_held()); | 2957 | WARN_ON_ONCE(!rcu_read_lock_held()); |
2954 | 2958 | ||
2955 | /* if first iteration, pretend we just visited @cgroup */ | 2959 | /* if first iteration, pretend we just visited @cgroup */ |
2956 | if (!pos) { | 2960 | if (!pos) |
2957 | if (list_empty(&cgroup->children)) | ||
2958 | return NULL; | ||
2959 | pos = cgroup; | 2961 | pos = cgroup; |
2960 | } | ||
2961 | 2962 | ||
2962 | /* visit the first child if exists */ | 2963 | /* visit the first child if exists */ |
2963 | next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); | 2964 | next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); |
@@ -2965,14 +2966,14 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
2965 | return next; | 2966 | return next; |
2966 | 2967 | ||
2967 | /* no child, visit my or the closest ancestor's next sibling */ | 2968 | /* no child, visit my or the closest ancestor's next sibling */ |
2968 | do { | 2969 | while (pos != cgroup) { |
2969 | next = list_entry_rcu(pos->sibling.next, struct cgroup, | 2970 | next = list_entry_rcu(pos->sibling.next, struct cgroup, |
2970 | sibling); | 2971 | sibling); |
2971 | if (&next->sibling != &pos->parent->children) | 2972 | if (&next->sibling != &pos->parent->children) |
2972 | return next; | 2973 | return next; |
2973 | 2974 | ||
2974 | pos = pos->parent; | 2975 | pos = pos->parent; |
2975 | } while (pos != cgroup); | 2976 | } |
2976 | 2977 | ||
2977 | return NULL; | 2978 | return NULL; |
2978 | } | 2979 | } |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 65349f07b878..383f8231e436 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -15,7 +15,6 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/context_tracking.h> | 17 | #include <linux/context_tracking.h> |
18 | #include <linux/kvm_host.h> | ||
19 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
20 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
21 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
@@ -71,6 +70,46 @@ void user_enter(void) | |||
71 | local_irq_restore(flags); | 70 | local_irq_restore(flags); |
72 | } | 71 | } |
73 | 72 | ||
73 | #ifdef CONFIG_PREEMPT | ||
74 | /** | ||
75 | * preempt_schedule_context - preempt_schedule called by tracing | ||
76 | * | ||
77 | * The tracing infrastructure uses preempt_enable_notrace to prevent | ||
78 | * recursion and tracing preempt enabling caused by the tracing | ||
79 | * infrastructure itself. But as tracing can happen in areas coming | ||
80 | * from userspace or just about to enter userspace, a preempt enable | ||
81 | * can occur before user_exit() is called. This will cause the scheduler | ||
82 | * to be called when the system is still in usermode. | ||
83 | * | ||
84 | * To prevent this, the preempt_enable_notrace will use this function | ||
85 | * instead of preempt_schedule() to exit user context if needed before | ||
86 | * calling the scheduler. | ||
87 | */ | ||
88 | void __sched notrace preempt_schedule_context(void) | ||
89 | { | ||
90 | struct thread_info *ti = current_thread_info(); | ||
91 | enum ctx_state prev_ctx; | ||
92 | |||
93 | if (likely(ti->preempt_count || irqs_disabled())) | ||
94 | return; | ||
95 | |||
96 | /* | ||
97 | * Need to disable preemption in case user_exit() is traced | ||
98 | * and the tracer calls preempt_enable_notrace() causing | ||
99 | * an infinite recursion. | ||
100 | */ | ||
101 | preempt_disable_notrace(); | ||
102 | prev_ctx = exception_enter(); | ||
103 | preempt_enable_no_resched_notrace(); | ||
104 | |||
105 | preempt_schedule(); | ||
106 | |||
107 | preempt_disable_notrace(); | ||
108 | exception_exit(prev_ctx); | ||
109 | preempt_enable_notrace(); | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(preempt_schedule_context); | ||
112 | #endif /* CONFIG_PREEMPT */ | ||
74 | 113 | ||
75 | /** | 114 | /** |
76 | * user_exit - Inform the context tracking that the CPU is | 115 | * user_exit - Inform the context tracking that the CPU is |
diff --git a/kernel/cpu.c b/kernel/cpu.c index b5e4ab2d427e..198a38883e64 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -133,6 +133,27 @@ static void cpu_hotplug_done(void) | |||
133 | mutex_unlock(&cpu_hotplug.lock); | 133 | mutex_unlock(&cpu_hotplug.lock); |
134 | } | 134 | } |
135 | 135 | ||
136 | /* | ||
137 | * Wait for currently running CPU hotplug operations to complete (if any) and | ||
138 | * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects | ||
139 | * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the | ||
140 | * hotplug path before performing hotplug operations. So acquiring that lock | ||
141 | * guarantees mutual exclusion from any currently running hotplug operations. | ||
142 | */ | ||
143 | void cpu_hotplug_disable(void) | ||
144 | { | ||
145 | cpu_maps_update_begin(); | ||
146 | cpu_hotplug_disabled = 1; | ||
147 | cpu_maps_update_done(); | ||
148 | } | ||
149 | |||
150 | void cpu_hotplug_enable(void) | ||
151 | { | ||
152 | cpu_maps_update_begin(); | ||
153 | cpu_hotplug_disabled = 0; | ||
154 | cpu_maps_update_done(); | ||
155 | } | ||
156 | |||
136 | #else /* #if CONFIG_HOTPLUG_CPU */ | 157 | #else /* #if CONFIG_HOTPLUG_CPU */ |
137 | static void cpu_hotplug_begin(void) {} | 158 | static void cpu_hotplug_begin(void) {} |
138 | static void cpu_hotplug_done(void) {} | 159 | static void cpu_hotplug_done(void) {} |
@@ -541,36 +562,6 @@ static int __init alloc_frozen_cpus(void) | |||
541 | core_initcall(alloc_frozen_cpus); | 562 | core_initcall(alloc_frozen_cpus); |
542 | 563 | ||
543 | /* | 564 | /* |
544 | * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU | ||
545 | * hotplug when tasks are about to be frozen. Also, don't allow the freezer | ||
546 | * to continue until any currently running CPU hotplug operation gets | ||
547 | * completed. | ||
548 | * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the | ||
549 | * 'cpu_add_remove_lock'. And this same lock is also taken by the regular | ||
550 | * CPU hotplug path and released only after it is complete. Thus, we | ||
551 | * (and hence the freezer) will block here until any currently running CPU | ||
552 | * hotplug operation gets completed. | ||
553 | */ | ||
554 | void cpu_hotplug_disable_before_freeze(void) | ||
555 | { | ||
556 | cpu_maps_update_begin(); | ||
557 | cpu_hotplug_disabled = 1; | ||
558 | cpu_maps_update_done(); | ||
559 | } | ||
560 | |||
561 | |||
562 | /* | ||
563 | * When tasks have been thawed, re-enable regular CPU hotplug (which had been | ||
564 | * disabled while beginning to freeze tasks). | ||
565 | */ | ||
566 | void cpu_hotplug_enable_after_thaw(void) | ||
567 | { | ||
568 | cpu_maps_update_begin(); | ||
569 | cpu_hotplug_disabled = 0; | ||
570 | cpu_maps_update_done(); | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * When callbacks for CPU hotplug notifications are being executed, we must | 565 | * When callbacks for CPU hotplug notifications are being executed, we must |
575 | * ensure that the state of the system with respect to the tasks being frozen | 566 | * ensure that the state of the system with respect to the tasks being frozen |
576 | * or not, as reported by the notification, remains unchanged *throughout the | 567 | * or not, as reported by the notification, remains unchanged *throughout the |
@@ -589,12 +580,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, | |||
589 | 580 | ||
590 | case PM_SUSPEND_PREPARE: | 581 | case PM_SUSPEND_PREPARE: |
591 | case PM_HIBERNATION_PREPARE: | 582 | case PM_HIBERNATION_PREPARE: |
592 | cpu_hotplug_disable_before_freeze(); | 583 | cpu_hotplug_disable(); |
593 | break; | 584 | break; |
594 | 585 | ||
595 | case PM_POST_SUSPEND: | 586 | case PM_POST_SUSPEND: |
596 | case PM_POST_HIBERNATION: | 587 | case PM_POST_HIBERNATION: |
597 | cpu_hotplug_enable_after_thaw(); | 588 | cpu_hotplug_enable(); |
598 | break; | 589 | break; |
599 | 590 | ||
600 | default: | 591 | default: |
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c index d5585f5e038e..e695c0a0bcb5 100644 --- a/kernel/cpu/idle.c +++ b/kernel/cpu/idle.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/cpu.h> | 5 | #include <linux/cpu.h> |
6 | #include <linux/tick.h> | 6 | #include <linux/tick.h> |
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include <linux/stackprotector.h> | ||
8 | 9 | ||
9 | #include <asm/tlb.h> | 10 | #include <asm/tlb.h> |
10 | 11 | ||
@@ -58,6 +59,7 @@ void __weak arch_cpu_idle_dead(void) { } | |||
58 | void __weak arch_cpu_idle(void) | 59 | void __weak arch_cpu_idle(void) |
59 | { | 60 | { |
60 | cpu_idle_force_poll = 1; | 61 | cpu_idle_force_poll = 1; |
62 | local_irq_enable(); | ||
61 | } | 63 | } |
62 | 64 | ||
63 | /* | 65 | /* |
@@ -112,6 +114,21 @@ static void cpu_idle_loop(void) | |||
112 | 114 | ||
113 | void cpu_startup_entry(enum cpuhp_state state) | 115 | void cpu_startup_entry(enum cpuhp_state state) |
114 | { | 116 | { |
117 | /* | ||
118 | * This #ifdef needs to die, but it's too late in the cycle to | ||
119 | * make this generic (arm and sh have never invoked the canary | ||
120 | * init for the non boot cpus!). Will be fixed in 3.11 | ||
121 | */ | ||
122 | #ifdef CONFIG_X86 | ||
123 | /* | ||
124 | * If we're the non-boot CPU, nothing set the stack canary up | ||
125 | * for us. The boot CPU already has it initialized but no harm | ||
126 | * in doing it again. This is a good place for updating it, as | ||
127 | * we wont ever return from this function (so the invalid | ||
128 | * canaries already on the stack wont ever trigger). | ||
129 | */ | ||
130 | boot_init_stack_canary(); | ||
131 | #endif | ||
115 | current_set_polling(); | 132 | current_set_polling(); |
116 | arch_cpu_idle_prepare(); | 133 | arch_cpu_idle_prepare(); |
117 | cpu_idle_loop(); | 134 | cpu_idle_loop(); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 9dc297faf7c0..b391907d5352 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -196,9 +196,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
196 | static void update_context_time(struct perf_event_context *ctx); | 196 | static void update_context_time(struct perf_event_context *ctx); |
197 | static u64 perf_event_time(struct perf_event *event); | 197 | static u64 perf_event_time(struct perf_event *event); |
198 | 198 | ||
199 | static void ring_buffer_attach(struct perf_event *event, | ||
200 | struct ring_buffer *rb); | ||
201 | |||
202 | void __weak perf_event_print_debug(void) { } | 199 | void __weak perf_event_print_debug(void) { } |
203 | 200 | ||
204 | extern __weak const char *perf_pmu_name(void) | 201 | extern __weak const char *perf_pmu_name(void) |
@@ -2918,6 +2915,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
2918 | } | 2915 | } |
2919 | 2916 | ||
2920 | static void ring_buffer_put(struct ring_buffer *rb); | 2917 | static void ring_buffer_put(struct ring_buffer *rb); |
2918 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); | ||
2921 | 2919 | ||
2922 | static void free_event(struct perf_event *event) | 2920 | static void free_event(struct perf_event *event) |
2923 | { | 2921 | { |
@@ -2942,15 +2940,30 @@ static void free_event(struct perf_event *event) | |||
2942 | if (has_branch_stack(event)) { | 2940 | if (has_branch_stack(event)) { |
2943 | static_key_slow_dec_deferred(&perf_sched_events); | 2941 | static_key_slow_dec_deferred(&perf_sched_events); |
2944 | /* is system-wide event */ | 2942 | /* is system-wide event */ |
2945 | if (!(event->attach_state & PERF_ATTACH_TASK)) | 2943 | if (!(event->attach_state & PERF_ATTACH_TASK)) { |
2946 | atomic_dec(&per_cpu(perf_branch_stack_events, | 2944 | atomic_dec(&per_cpu(perf_branch_stack_events, |
2947 | event->cpu)); | 2945 | event->cpu)); |
2946 | } | ||
2948 | } | 2947 | } |
2949 | } | 2948 | } |
2950 | 2949 | ||
2951 | if (event->rb) { | 2950 | if (event->rb) { |
2952 | ring_buffer_put(event->rb); | 2951 | struct ring_buffer *rb; |
2953 | event->rb = NULL; | 2952 | |
2953 | /* | ||
2954 | * Can happen when we close an event with re-directed output. | ||
2955 | * | ||
2956 | * Since we have a 0 refcount, perf_mmap_close() will skip | ||
2957 | * over us; possibly making our ring_buffer_put() the last. | ||
2958 | */ | ||
2959 | mutex_lock(&event->mmap_mutex); | ||
2960 | rb = event->rb; | ||
2961 | if (rb) { | ||
2962 | rcu_assign_pointer(event->rb, NULL); | ||
2963 | ring_buffer_detach(event, rb); | ||
2964 | ring_buffer_put(rb); /* could be last */ | ||
2965 | } | ||
2966 | mutex_unlock(&event->mmap_mutex); | ||
2954 | } | 2967 | } |
2955 | 2968 | ||
2956 | if (is_cgroup_event(event)) | 2969 | if (is_cgroup_event(event)) |
@@ -3188,30 +3201,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3188 | unsigned int events = POLL_HUP; | 3201 | unsigned int events = POLL_HUP; |
3189 | 3202 | ||
3190 | /* | 3203 | /* |
3191 | * Race between perf_event_set_output() and perf_poll(): perf_poll() | 3204 | * Pin the event->rb by taking event->mmap_mutex; otherwise |
3192 | * grabs the rb reference but perf_event_set_output() overrides it. | 3205 | * perf_event_set_output() can swizzle our rb and make us miss wakeups. |
3193 | * Here is the timeline for two threads T1, T2: | ||
3194 | * t0: T1, rb = rcu_dereference(event->rb) | ||
3195 | * t1: T2, old_rb = event->rb | ||
3196 | * t2: T2, event->rb = new rb | ||
3197 | * t3: T2, ring_buffer_detach(old_rb) | ||
3198 | * t4: T1, ring_buffer_attach(rb1) | ||
3199 | * t5: T1, poll_wait(event->waitq) | ||
3200 | * | ||
3201 | * To avoid this problem, we grab mmap_mutex in perf_poll() | ||
3202 | * thereby ensuring that the assignment of the new ring buffer | ||
3203 | * and the detachment of the old buffer appear atomic to perf_poll() | ||
3204 | */ | 3206 | */ |
3205 | mutex_lock(&event->mmap_mutex); | 3207 | mutex_lock(&event->mmap_mutex); |
3206 | 3208 | rb = event->rb; | |
3207 | rcu_read_lock(); | 3209 | if (rb) |
3208 | rb = rcu_dereference(event->rb); | ||
3209 | if (rb) { | ||
3210 | ring_buffer_attach(event, rb); | ||
3211 | events = atomic_xchg(&rb->poll, 0); | 3210 | events = atomic_xchg(&rb->poll, 0); |
3212 | } | ||
3213 | rcu_read_unlock(); | ||
3214 | |||
3215 | mutex_unlock(&event->mmap_mutex); | 3211 | mutex_unlock(&event->mmap_mutex); |
3216 | 3212 | ||
3217 | poll_wait(file, &event->waitq, wait); | 3213 | poll_wait(file, &event->waitq, wait); |
@@ -3521,16 +3517,12 @@ static void ring_buffer_attach(struct perf_event *event, | |||
3521 | return; | 3517 | return; |
3522 | 3518 | ||
3523 | spin_lock_irqsave(&rb->event_lock, flags); | 3519 | spin_lock_irqsave(&rb->event_lock, flags); |
3524 | if (!list_empty(&event->rb_entry)) | 3520 | if (list_empty(&event->rb_entry)) |
3525 | goto unlock; | 3521 | list_add(&event->rb_entry, &rb->event_list); |
3526 | |||
3527 | list_add(&event->rb_entry, &rb->event_list); | ||
3528 | unlock: | ||
3529 | spin_unlock_irqrestore(&rb->event_lock, flags); | 3522 | spin_unlock_irqrestore(&rb->event_lock, flags); |
3530 | } | 3523 | } |
3531 | 3524 | ||
3532 | static void ring_buffer_detach(struct perf_event *event, | 3525 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) |
3533 | struct ring_buffer *rb) | ||
3534 | { | 3526 | { |
3535 | unsigned long flags; | 3527 | unsigned long flags; |
3536 | 3528 | ||
@@ -3549,13 +3541,10 @@ static void ring_buffer_wakeup(struct perf_event *event) | |||
3549 | 3541 | ||
3550 | rcu_read_lock(); | 3542 | rcu_read_lock(); |
3551 | rb = rcu_dereference(event->rb); | 3543 | rb = rcu_dereference(event->rb); |
3552 | if (!rb) | 3544 | if (rb) { |
3553 | goto unlock; | 3545 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) |
3554 | 3546 | wake_up_all(&event->waitq); | |
3555 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) | 3547 | } |
3556 | wake_up_all(&event->waitq); | ||
3557 | |||
3558 | unlock: | ||
3559 | rcu_read_unlock(); | 3548 | rcu_read_unlock(); |
3560 | } | 3549 | } |
3561 | 3550 | ||
@@ -3584,18 +3573,10 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) | |||
3584 | 3573 | ||
3585 | static void ring_buffer_put(struct ring_buffer *rb) | 3574 | static void ring_buffer_put(struct ring_buffer *rb) |
3586 | { | 3575 | { |
3587 | struct perf_event *event, *n; | ||
3588 | unsigned long flags; | ||
3589 | |||
3590 | if (!atomic_dec_and_test(&rb->refcount)) | 3576 | if (!atomic_dec_and_test(&rb->refcount)) |
3591 | return; | 3577 | return; |
3592 | 3578 | ||
3593 | spin_lock_irqsave(&rb->event_lock, flags); | 3579 | WARN_ON_ONCE(!list_empty(&rb->event_list)); |
3594 | list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { | ||
3595 | list_del_init(&event->rb_entry); | ||
3596 | wake_up_all(&event->waitq); | ||
3597 | } | ||
3598 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3599 | 3580 | ||
3600 | call_rcu(&rb->rcu_head, rb_free_rcu); | 3581 | call_rcu(&rb->rcu_head, rb_free_rcu); |
3601 | } | 3582 | } |
@@ -3605,26 +3586,100 @@ static void perf_mmap_open(struct vm_area_struct *vma) | |||
3605 | struct perf_event *event = vma->vm_file->private_data; | 3586 | struct perf_event *event = vma->vm_file->private_data; |
3606 | 3587 | ||
3607 | atomic_inc(&event->mmap_count); | 3588 | atomic_inc(&event->mmap_count); |
3589 | atomic_inc(&event->rb->mmap_count); | ||
3608 | } | 3590 | } |
3609 | 3591 | ||
3592 | /* | ||
3593 | * A buffer can be mmap()ed multiple times; either directly through the same | ||
3594 | * event, or through other events by use of perf_event_set_output(). | ||
3595 | * | ||
3596 | * In order to undo the VM accounting done by perf_mmap() we need to destroy | ||
3597 | * the buffer here, where we still have a VM context. This means we need | ||
3598 | * to detach all events redirecting to us. | ||
3599 | */ | ||
3610 | static void perf_mmap_close(struct vm_area_struct *vma) | 3600 | static void perf_mmap_close(struct vm_area_struct *vma) |
3611 | { | 3601 | { |
3612 | struct perf_event *event = vma->vm_file->private_data; | 3602 | struct perf_event *event = vma->vm_file->private_data; |
3613 | 3603 | ||
3614 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 3604 | struct ring_buffer *rb = event->rb; |
3615 | unsigned long size = perf_data_size(event->rb); | 3605 | struct user_struct *mmap_user = rb->mmap_user; |
3616 | struct user_struct *user = event->mmap_user; | 3606 | int mmap_locked = rb->mmap_locked; |
3617 | struct ring_buffer *rb = event->rb; | 3607 | unsigned long size = perf_data_size(rb); |
3608 | |||
3609 | atomic_dec(&rb->mmap_count); | ||
3610 | |||
3611 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | ||
3612 | return; | ||
3618 | 3613 | ||
3619 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3614 | /* Detach current event from the buffer. */ |
3620 | vma->vm_mm->pinned_vm -= event->mmap_locked; | 3615 | rcu_assign_pointer(event->rb, NULL); |
3621 | rcu_assign_pointer(event->rb, NULL); | 3616 | ring_buffer_detach(event, rb); |
3622 | ring_buffer_detach(event, rb); | 3617 | mutex_unlock(&event->mmap_mutex); |
3618 | |||
3619 | /* If there's still other mmap()s of this buffer, we're done. */ | ||
3620 | if (atomic_read(&rb->mmap_count)) { | ||
3621 | ring_buffer_put(rb); /* can't be last */ | ||
3622 | return; | ||
3623 | } | ||
3624 | |||
3625 | /* | ||
3626 | * No other mmap()s, detach from all other events that might redirect | ||
3627 | * into the now unreachable buffer. Somewhat complicated by the | ||
3628 | * fact that rb::event_lock otherwise nests inside mmap_mutex. | ||
3629 | */ | ||
3630 | again: | ||
3631 | rcu_read_lock(); | ||
3632 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { | ||
3633 | if (!atomic_long_inc_not_zero(&event->refcount)) { | ||
3634 | /* | ||
3635 | * This event is en-route to free_event() which will | ||
3636 | * detach it and remove it from the list. | ||
3637 | */ | ||
3638 | continue; | ||
3639 | } | ||
3640 | rcu_read_unlock(); | ||
3641 | |||
3642 | mutex_lock(&event->mmap_mutex); | ||
3643 | /* | ||
3644 | * Check we didn't race with perf_event_set_output() which can | ||
3645 | * swizzle the rb from under us while we were waiting to | ||
3646 | * acquire mmap_mutex. | ||
3647 | * | ||
3648 | * If we find a different rb; ignore this event, a next | ||
3649 | * iteration will no longer find it on the list. We have to | ||
3650 | * still restart the iteration to make sure we're not now | ||
3651 | * iterating the wrong list. | ||
3652 | */ | ||
3653 | if (event->rb == rb) { | ||
3654 | rcu_assign_pointer(event->rb, NULL); | ||
3655 | ring_buffer_detach(event, rb); | ||
3656 | ring_buffer_put(rb); /* can't be last, we still have one */ | ||
3657 | } | ||
3623 | mutex_unlock(&event->mmap_mutex); | 3658 | mutex_unlock(&event->mmap_mutex); |
3659 | put_event(event); | ||
3624 | 3660 | ||
3625 | ring_buffer_put(rb); | 3661 | /* |
3626 | free_uid(user); | 3662 | * Restart the iteration; either we're on the wrong list or |
3663 | * destroyed its integrity by doing a deletion. | ||
3664 | */ | ||
3665 | goto again; | ||
3627 | } | 3666 | } |
3667 | rcu_read_unlock(); | ||
3668 | |||
3669 | /* | ||
3670 | * It could be there's still a few 0-ref events on the list; they'll | ||
3671 | * get cleaned up by free_event() -- they'll also still have their | ||
3672 | * ref on the rb and will free it whenever they are done with it. | ||
3673 | * | ||
3674 | * Aside from that, this buffer is 'fully' detached and unmapped, | ||
3675 | * undo the VM accounting. | ||
3676 | */ | ||
3677 | |||
3678 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); | ||
3679 | vma->vm_mm->pinned_vm -= mmap_locked; | ||
3680 | free_uid(mmap_user); | ||
3681 | |||
3682 | ring_buffer_put(rb); /* could be last */ | ||
3628 | } | 3683 | } |
3629 | 3684 | ||
3630 | static const struct vm_operations_struct perf_mmap_vmops = { | 3685 | static const struct vm_operations_struct perf_mmap_vmops = { |
@@ -3674,12 +3729,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3674 | return -EINVAL; | 3729 | return -EINVAL; |
3675 | 3730 | ||
3676 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3731 | WARN_ON_ONCE(event->ctx->parent_ctx); |
3732 | again: | ||
3677 | mutex_lock(&event->mmap_mutex); | 3733 | mutex_lock(&event->mmap_mutex); |
3678 | if (event->rb) { | 3734 | if (event->rb) { |
3679 | if (event->rb->nr_pages == nr_pages) | 3735 | if (event->rb->nr_pages != nr_pages) { |
3680 | atomic_inc(&event->rb->refcount); | ||
3681 | else | ||
3682 | ret = -EINVAL; | 3736 | ret = -EINVAL; |
3737 | goto unlock; | ||
3738 | } | ||
3739 | |||
3740 | if (!atomic_inc_not_zero(&event->rb->mmap_count)) { | ||
3741 | /* | ||
3742 | * Raced against perf_mmap_close() through | ||
3743 | * perf_event_set_output(). Try again, hope for better | ||
3744 | * luck. | ||
3745 | */ | ||
3746 | mutex_unlock(&event->mmap_mutex); | ||
3747 | goto again; | ||
3748 | } | ||
3749 | |||
3683 | goto unlock; | 3750 | goto unlock; |
3684 | } | 3751 | } |
3685 | 3752 | ||
@@ -3720,12 +3787,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3720 | ret = -ENOMEM; | 3787 | ret = -ENOMEM; |
3721 | goto unlock; | 3788 | goto unlock; |
3722 | } | 3789 | } |
3723 | rcu_assign_pointer(event->rb, rb); | 3790 | |
3791 | atomic_set(&rb->mmap_count, 1); | ||
3792 | rb->mmap_locked = extra; | ||
3793 | rb->mmap_user = get_current_user(); | ||
3724 | 3794 | ||
3725 | atomic_long_add(user_extra, &user->locked_vm); | 3795 | atomic_long_add(user_extra, &user->locked_vm); |
3726 | event->mmap_locked = extra; | 3796 | vma->vm_mm->pinned_vm += extra; |
3727 | event->mmap_user = get_current_user(); | 3797 | |
3728 | vma->vm_mm->pinned_vm += event->mmap_locked; | 3798 | ring_buffer_attach(event, rb); |
3799 | rcu_assign_pointer(event->rb, rb); | ||
3729 | 3800 | ||
3730 | perf_event_update_userpage(event); | 3801 | perf_event_update_userpage(event); |
3731 | 3802 | ||
@@ -3734,7 +3805,11 @@ unlock: | |||
3734 | atomic_inc(&event->mmap_count); | 3805 | atomic_inc(&event->mmap_count); |
3735 | mutex_unlock(&event->mmap_mutex); | 3806 | mutex_unlock(&event->mmap_mutex); |
3736 | 3807 | ||
3737 | vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; | 3808 | /* |
3809 | * Since pinned accounting is per vm we cannot allow fork() to copy our | ||
3810 | * vma. | ||
3811 | */ | ||
3812 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; | ||
3738 | vma->vm_ops = &perf_mmap_vmops; | 3813 | vma->vm_ops = &perf_mmap_vmops; |
3739 | 3814 | ||
3740 | return ret; | 3815 | return ret; |
@@ -6412,6 +6487,8 @@ set: | |||
6412 | if (atomic_read(&event->mmap_count)) | 6487 | if (atomic_read(&event->mmap_count)) |
6413 | goto unlock; | 6488 | goto unlock; |
6414 | 6489 | ||
6490 | old_rb = event->rb; | ||
6491 | |||
6415 | if (output_event) { | 6492 | if (output_event) { |
6416 | /* get the rb we want to redirect to */ | 6493 | /* get the rb we want to redirect to */ |
6417 | rb = ring_buffer_get(output_event); | 6494 | rb = ring_buffer_get(output_event); |
@@ -6419,16 +6496,28 @@ set: | |||
6419 | goto unlock; | 6496 | goto unlock; |
6420 | } | 6497 | } |
6421 | 6498 | ||
6422 | old_rb = event->rb; | ||
6423 | rcu_assign_pointer(event->rb, rb); | ||
6424 | if (old_rb) | 6499 | if (old_rb) |
6425 | ring_buffer_detach(event, old_rb); | 6500 | ring_buffer_detach(event, old_rb); |
6501 | |||
6502 | if (rb) | ||
6503 | ring_buffer_attach(event, rb); | ||
6504 | |||
6505 | rcu_assign_pointer(event->rb, rb); | ||
6506 | |||
6507 | if (old_rb) { | ||
6508 | ring_buffer_put(old_rb); | ||
6509 | /* | ||
6510 | * Since we detached before setting the new rb, so that we | ||
6511 | * could attach the new rb, we could have missed a wakeup. | ||
6512 | * Provide it now. | ||
6513 | */ | ||
6514 | wake_up_all(&event->waitq); | ||
6515 | } | ||
6516 | |||
6426 | ret = 0; | 6517 | ret = 0; |
6427 | unlock: | 6518 | unlock: |
6428 | mutex_unlock(&event->mmap_mutex); | 6519 | mutex_unlock(&event->mmap_mutex); |
6429 | 6520 | ||
6430 | if (old_rb) | ||
6431 | ring_buffer_put(old_rb); | ||
6432 | out: | 6521 | out: |
6433 | return ret; | 6522 | return ret; |
6434 | } | 6523 | } |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index a64f8aeb5c1f..20185ea64aa6 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -120,7 +120,7 @@ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) | |||
120 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { | 120 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { |
121 | if (iter->hw.bp_target == tsk && | 121 | if (iter->hw.bp_target == tsk && |
122 | find_slot_idx(iter) == type && | 122 | find_slot_idx(iter) == type && |
123 | cpu == iter->cpu) | 123 | (iter->cpu < 0 || cpu == iter->cpu)) |
124 | count += hw_breakpoint_weight(iter); | 124 | count += hw_breakpoint_weight(iter); |
125 | } | 125 | } |
126 | 126 | ||
@@ -149,7 +149,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, | |||
149 | return; | 149 | return; |
150 | } | 150 | } |
151 | 151 | ||
152 | for_each_online_cpu(cpu) { | 152 | for_each_possible_cpu(cpu) { |
153 | unsigned int nr; | 153 | unsigned int nr; |
154 | 154 | ||
155 | nr = per_cpu(nr_cpu_bp_pinned[type], cpu); | 155 | nr = per_cpu(nr_cpu_bp_pinned[type], cpu); |
@@ -235,7 +235,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, | |||
235 | if (cpu >= 0) { | 235 | if (cpu >= 0) { |
236 | toggle_bp_task_slot(bp, cpu, enable, type, weight); | 236 | toggle_bp_task_slot(bp, cpu, enable, type, weight); |
237 | } else { | 237 | } else { |
238 | for_each_online_cpu(cpu) | 238 | for_each_possible_cpu(cpu) |
239 | toggle_bp_task_slot(bp, cpu, enable, type, weight); | 239 | toggle_bp_task_slot(bp, cpu, enable, type, weight); |
240 | } | 240 | } |
241 | 241 | ||
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index eb675c4d59df..ca6599723be5 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -31,6 +31,10 @@ struct ring_buffer { | |||
31 | spinlock_t event_lock; | 31 | spinlock_t event_lock; |
32 | struct list_head event_list; | 32 | struct list_head event_list; |
33 | 33 | ||
34 | atomic_t mmap_count; | ||
35 | unsigned long mmap_locked; | ||
36 | struct user_struct *mmap_user; | ||
37 | |||
34 | struct perf_event_mmap_page *user_page; | 38 | struct perf_event_mmap_page *user_page; |
35 | void *data_pages[0]; | 39 | void *data_pages[0]; |
36 | }; | 40 | }; |
diff --git a/kernel/exit.c b/kernel/exit.c index af2eb3cbd499..7bb73f9d09db 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -649,7 +649,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
649 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | 649 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) |
650 | */ | 650 | */ |
651 | forget_original_parent(tsk); | 651 | forget_original_parent(tsk); |
652 | exit_task_namespaces(tsk); | ||
653 | 652 | ||
654 | write_lock_irq(&tasklist_lock); | 653 | write_lock_irq(&tasklist_lock); |
655 | if (group_dead) | 654 | if (group_dead) |
@@ -795,6 +794,7 @@ void do_exit(long code) | |||
795 | exit_shm(tsk); | 794 | exit_shm(tsk); |
796 | exit_files(tsk); | 795 | exit_files(tsk); |
797 | exit_fs(tsk); | 796 | exit_fs(tsk); |
797 | exit_task_namespaces(tsk); | ||
798 | exit_task_work(tsk); | 798 | exit_task_work(tsk); |
799 | check_stack_usage(); | 799 | check_stack_usage(); |
800 | exit_thread(); | 800 | exit_thread(); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 5a83dde8ca0c..54a4d5223238 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -143,7 +143,10 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | |||
143 | * irq_domain_add_simple() - Allocate and register a simple irq_domain. | 143 | * irq_domain_add_simple() - Allocate and register a simple irq_domain. |
144 | * @of_node: pointer to interrupt controller's device tree node. | 144 | * @of_node: pointer to interrupt controller's device tree node. |
145 | * @size: total number of irqs in mapping | 145 | * @size: total number of irqs in mapping |
146 | * @first_irq: first number of irq block assigned to the domain | 146 | * @first_irq: first number of irq block assigned to the domain, |
147 | * pass zero to assign irqs on-the-fly. This will result in a | ||
148 | * linear IRQ domain so it is important to use irq_create_mapping() | ||
149 | * for each used IRQ, especially when SPARSE_IRQ is enabled. | ||
147 | * @ops: map/unmap domain callbacks | 150 | * @ops: map/unmap domain callbacks |
148 | * @host_data: Controller private data pointer | 151 | * @host_data: Controller private data pointer |
149 | * | 152 | * |
@@ -191,6 +194,7 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | |||
191 | /* A linear domain is the default */ | 194 | /* A linear domain is the default */ |
192 | return irq_domain_add_linear(of_node, size, ops, host_data); | 195 | return irq_domain_add_linear(of_node, size, ops, host_data); |
193 | } | 196 | } |
197 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); | ||
194 | 198 | ||
195 | /** | 199 | /** |
196 | * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. | 200 | * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. |
@@ -397,11 +401,12 @@ static void irq_domain_disassociate_many(struct irq_domain *domain, | |||
397 | while (count--) { | 401 | while (count--) { |
398 | int irq = irq_base + count; | 402 | int irq = irq_base + count; |
399 | struct irq_data *irq_data = irq_get_irq_data(irq); | 403 | struct irq_data *irq_data = irq_get_irq_data(irq); |
400 | irq_hw_number_t hwirq = irq_data->hwirq; | 404 | irq_hw_number_t hwirq; |
401 | 405 | ||
402 | if (WARN_ON(!irq_data || irq_data->domain != domain)) | 406 | if (WARN_ON(!irq_data || irq_data->domain != domain)) |
403 | continue; | 407 | continue; |
404 | 408 | ||
409 | hwirq = irq_data->hwirq; | ||
405 | irq_set_status_flags(irq, IRQ_NOREQUEST); | 410 | irq_set_status_flags(irq, IRQ_NOREQUEST); |
406 | 411 | ||
407 | /* remove chip and handler */ | 412 | /* remove chip and handler */ |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 1296e72e4161..8241906c4b61 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -569,6 +569,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | |||
569 | int retval = 0; | 569 | int retval = 0; |
570 | 570 | ||
571 | helper_lock(); | 571 | helper_lock(); |
572 | if (!sub_info->path) { | ||
573 | retval = -EINVAL; | ||
574 | goto out; | ||
575 | } | ||
576 | |||
572 | if (sub_info->path[0] == '\0') | 577 | if (sub_info->path[0] == '\0') |
573 | goto out; | 578 | goto out; |
574 | 579 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3fed7f0cbcdf..bddf3b201a48 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -467,6 +467,7 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) | |||
467 | /* Optimization staging list, protected by kprobe_mutex */ | 467 | /* Optimization staging list, protected by kprobe_mutex */ |
468 | static LIST_HEAD(optimizing_list); | 468 | static LIST_HEAD(optimizing_list); |
469 | static LIST_HEAD(unoptimizing_list); | 469 | static LIST_HEAD(unoptimizing_list); |
470 | static LIST_HEAD(freeing_list); | ||
470 | 471 | ||
471 | static void kprobe_optimizer(struct work_struct *work); | 472 | static void kprobe_optimizer(struct work_struct *work); |
472 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); | 473 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); |
@@ -504,7 +505,7 @@ static __kprobes void do_optimize_kprobes(void) | |||
504 | * Unoptimize (replace a jump with a breakpoint and remove the breakpoint | 505 | * Unoptimize (replace a jump with a breakpoint and remove the breakpoint |
505 | * if need) kprobes listed on unoptimizing_list. | 506 | * if need) kprobes listed on unoptimizing_list. |
506 | */ | 507 | */ |
507 | static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | 508 | static __kprobes void do_unoptimize_kprobes(void) |
508 | { | 509 | { |
509 | struct optimized_kprobe *op, *tmp; | 510 | struct optimized_kprobe *op, *tmp; |
510 | 511 | ||
@@ -515,9 +516,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | |||
515 | /* Ditto to do_optimize_kprobes */ | 516 | /* Ditto to do_optimize_kprobes */ |
516 | get_online_cpus(); | 517 | get_online_cpus(); |
517 | mutex_lock(&text_mutex); | 518 | mutex_lock(&text_mutex); |
518 | arch_unoptimize_kprobes(&unoptimizing_list, free_list); | 519 | arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); |
519 | /* Loop free_list for disarming */ | 520 | /* Loop free_list for disarming */ |
520 | list_for_each_entry_safe(op, tmp, free_list, list) { | 521 | list_for_each_entry_safe(op, tmp, &freeing_list, list) { |
521 | /* Disarm probes if marked disabled */ | 522 | /* Disarm probes if marked disabled */ |
522 | if (kprobe_disabled(&op->kp)) | 523 | if (kprobe_disabled(&op->kp)) |
523 | arch_disarm_kprobe(&op->kp); | 524 | arch_disarm_kprobe(&op->kp); |
@@ -536,11 +537,11 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | |||
536 | } | 537 | } |
537 | 538 | ||
538 | /* Reclaim all kprobes on the free_list */ | 539 | /* Reclaim all kprobes on the free_list */ |
539 | static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list) | 540 | static __kprobes void do_free_cleaned_kprobes(void) |
540 | { | 541 | { |
541 | struct optimized_kprobe *op, *tmp; | 542 | struct optimized_kprobe *op, *tmp; |
542 | 543 | ||
543 | list_for_each_entry_safe(op, tmp, free_list, list) { | 544 | list_for_each_entry_safe(op, tmp, &freeing_list, list) { |
544 | BUG_ON(!kprobe_unused(&op->kp)); | 545 | BUG_ON(!kprobe_unused(&op->kp)); |
545 | list_del_init(&op->list); | 546 | list_del_init(&op->list); |
546 | free_aggr_kprobe(&op->kp); | 547 | free_aggr_kprobe(&op->kp); |
@@ -556,8 +557,6 @@ static __kprobes void kick_kprobe_optimizer(void) | |||
556 | /* Kprobe jump optimizer */ | 557 | /* Kprobe jump optimizer */ |
557 | static __kprobes void kprobe_optimizer(struct work_struct *work) | 558 | static __kprobes void kprobe_optimizer(struct work_struct *work) |
558 | { | 559 | { |
559 | LIST_HEAD(free_list); | ||
560 | |||
561 | mutex_lock(&kprobe_mutex); | 560 | mutex_lock(&kprobe_mutex); |
562 | /* Lock modules while optimizing kprobes */ | 561 | /* Lock modules while optimizing kprobes */ |
563 | mutex_lock(&module_mutex); | 562 | mutex_lock(&module_mutex); |
@@ -566,7 +565,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) | |||
566 | * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) | 565 | * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) |
567 | * kprobes before waiting for quiesence period. | 566 | * kprobes before waiting for quiesence period. |
568 | */ | 567 | */ |
569 | do_unoptimize_kprobes(&free_list); | 568 | do_unoptimize_kprobes(); |
570 | 569 | ||
571 | /* | 570 | /* |
572 | * Step 2: Wait for quiesence period to ensure all running interrupts | 571 | * Step 2: Wait for quiesence period to ensure all running interrupts |
@@ -581,7 +580,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) | |||
581 | do_optimize_kprobes(); | 580 | do_optimize_kprobes(); |
582 | 581 | ||
583 | /* Step 4: Free cleaned kprobes after quiesence period */ | 582 | /* Step 4: Free cleaned kprobes after quiesence period */ |
584 | do_free_cleaned_kprobes(&free_list); | 583 | do_free_cleaned_kprobes(); |
585 | 584 | ||
586 | mutex_unlock(&module_mutex); | 585 | mutex_unlock(&module_mutex); |
587 | mutex_unlock(&kprobe_mutex); | 586 | mutex_unlock(&kprobe_mutex); |
@@ -723,8 +722,19 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p) | |||
723 | if (!list_empty(&op->list)) | 722 | if (!list_empty(&op->list)) |
724 | /* Dequeue from the (un)optimization queue */ | 723 | /* Dequeue from the (un)optimization queue */ |
725 | list_del_init(&op->list); | 724 | list_del_init(&op->list); |
726 | |||
727 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; | 725 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; |
726 | |||
727 | if (kprobe_unused(p)) { | ||
728 | /* Enqueue if it is unused */ | ||
729 | list_add(&op->list, &freeing_list); | ||
730 | /* | ||
731 | * Remove unused probes from the hash list. After waiting | ||
732 | * for synchronization, this probe is reclaimed. | ||
733 | * (reclaiming is done by do_free_cleaned_kprobes().) | ||
734 | */ | ||
735 | hlist_del_rcu(&op->kp.hlist); | ||
736 | } | ||
737 | |||
728 | /* Don't touch the code, because it is already freed. */ | 738 | /* Don't touch the code, because it is already freed. */ |
729 | arch_remove_optimized_kprobe(op); | 739 | arch_remove_optimized_kprobe(op); |
730 | } | 740 | } |
diff --git a/kernel/module.c b/kernel/module.c index b049939177f6..cab4bce49c23 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2431,10 +2431,10 @@ static void kmemleak_load_module(const struct module *mod, | |||
2431 | kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); | 2431 | kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL); |
2432 | 2432 | ||
2433 | for (i = 1; i < info->hdr->e_shnum; i++) { | 2433 | for (i = 1; i < info->hdr->e_shnum; i++) { |
2434 | const char *name = info->secstrings + info->sechdrs[i].sh_name; | 2434 | /* Scan all writable sections that's not executable */ |
2435 | if (!(info->sechdrs[i].sh_flags & SHF_ALLOC)) | 2435 | if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) || |
2436 | continue; | 2436 | !(info->sechdrs[i].sh_flags & SHF_WRITE) || |
2437 | if (!strstarts(name, ".data") && !strstarts(name, ".bss")) | 2437 | (info->sechdrs[i].sh_flags & SHF_EXECINSTR)) |
2438 | continue; | 2438 | continue; |
2439 | 2439 | ||
2440 | kmemleak_scan_area((void *)info->sechdrs[i].sh_addr, | 2440 | kmemleak_scan_area((void *)info->sechdrs[i].sh_addr, |
@@ -2769,24 +2769,11 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
2769 | mod->trace_events = section_objs(info, "_ftrace_events", | 2769 | mod->trace_events = section_objs(info, "_ftrace_events", |
2770 | sizeof(*mod->trace_events), | 2770 | sizeof(*mod->trace_events), |
2771 | &mod->num_trace_events); | 2771 | &mod->num_trace_events); |
2772 | /* | ||
2773 | * This section contains pointers to allocated objects in the trace | ||
2774 | * code and not scanning it leads to false positives. | ||
2775 | */ | ||
2776 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * | ||
2777 | mod->num_trace_events, GFP_KERNEL); | ||
2778 | #endif | 2772 | #endif |
2779 | #ifdef CONFIG_TRACING | 2773 | #ifdef CONFIG_TRACING |
2780 | mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", | 2774 | mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", |
2781 | sizeof(*mod->trace_bprintk_fmt_start), | 2775 | sizeof(*mod->trace_bprintk_fmt_start), |
2782 | &mod->num_trace_bprintk_fmt); | 2776 | &mod->num_trace_bprintk_fmt); |
2783 | /* | ||
2784 | * This section contains pointers to allocated objects in the trace | ||
2785 | * code and not scanning it leads to false positives. | ||
2786 | */ | ||
2787 | kmemleak_scan_area(mod->trace_bprintk_fmt_start, | ||
2788 | sizeof(*mod->trace_bprintk_fmt_start) * | ||
2789 | mod->num_trace_bprintk_fmt, GFP_KERNEL); | ||
2790 | #endif | 2777 | #endif |
2791 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD | 2778 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD |
2792 | /* sechdrs[0].sh_size is always zero */ | 2779 | /* sechdrs[0].sh_size is always zero */ |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 42670e9b44e0..c7f31aa272f7 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -51,59 +51,28 @@ static int check_clock(const clockid_t which_clock) | |||
51 | return error; | 51 | return error; |
52 | } | 52 | } |
53 | 53 | ||
54 | static inline union cpu_time_count | 54 | static inline unsigned long long |
55 | timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) | 55 | timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) |
56 | { | 56 | { |
57 | union cpu_time_count ret; | 57 | unsigned long long ret; |
58 | ret.sched = 0; /* high half always zero when .cpu used */ | 58 | |
59 | ret = 0; /* high half always zero when .cpu used */ | ||
59 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | 60 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { |
60 | ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; | 61 | ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; |
61 | } else { | 62 | } else { |
62 | ret.cpu = timespec_to_cputime(tp); | 63 | ret = cputime_to_expires(timespec_to_cputime(tp)); |
63 | } | 64 | } |
64 | return ret; | 65 | return ret; |
65 | } | 66 | } |
66 | 67 | ||
67 | static void sample_to_timespec(const clockid_t which_clock, | 68 | static void sample_to_timespec(const clockid_t which_clock, |
68 | union cpu_time_count cpu, | 69 | unsigned long long expires, |
69 | struct timespec *tp) | 70 | struct timespec *tp) |
70 | { | 71 | { |
71 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) | 72 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) |
72 | *tp = ns_to_timespec(cpu.sched); | 73 | *tp = ns_to_timespec(expires); |
73 | else | 74 | else |
74 | cputime_to_timespec(cpu.cpu, tp); | 75 | cputime_to_timespec((__force cputime_t)expires, tp); |
75 | } | ||
76 | |||
77 | static inline int cpu_time_before(const clockid_t which_clock, | ||
78 | union cpu_time_count now, | ||
79 | union cpu_time_count then) | ||
80 | { | ||
81 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
82 | return now.sched < then.sched; | ||
83 | } else { | ||
84 | return now.cpu < then.cpu; | ||
85 | } | ||
86 | } | ||
87 | static inline void cpu_time_add(const clockid_t which_clock, | ||
88 | union cpu_time_count *acc, | ||
89 | union cpu_time_count val) | ||
90 | { | ||
91 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
92 | acc->sched += val.sched; | ||
93 | } else { | ||
94 | acc->cpu += val.cpu; | ||
95 | } | ||
96 | } | ||
97 | static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, | ||
98 | union cpu_time_count a, | ||
99 | union cpu_time_count b) | ||
100 | { | ||
101 | if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { | ||
102 | a.sched -= b.sched; | ||
103 | } else { | ||
104 | a.cpu -= b.cpu; | ||
105 | } | ||
106 | return a; | ||
107 | } | 76 | } |
108 | 77 | ||
109 | /* | 78 | /* |
@@ -111,47 +80,31 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock, | |||
111 | * given the current clock sample. | 80 | * given the current clock sample. |
112 | */ | 81 | */ |
113 | static void bump_cpu_timer(struct k_itimer *timer, | 82 | static void bump_cpu_timer(struct k_itimer *timer, |
114 | union cpu_time_count now) | 83 | unsigned long long now) |
115 | { | 84 | { |
116 | int i; | 85 | int i; |
86 | unsigned long long delta, incr; | ||
117 | 87 | ||
118 | if (timer->it.cpu.incr.sched == 0) | 88 | if (timer->it.cpu.incr == 0) |
119 | return; | 89 | return; |
120 | 90 | ||
121 | if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { | 91 | if (now < timer->it.cpu.expires) |
122 | unsigned long long delta, incr; | 92 | return; |
123 | 93 | ||
124 | if (now.sched < timer->it.cpu.expires.sched) | 94 | incr = timer->it.cpu.incr; |
125 | return; | 95 | delta = now + incr - timer->it.cpu.expires; |
126 | incr = timer->it.cpu.incr.sched; | ||
127 | delta = now.sched + incr - timer->it.cpu.expires.sched; | ||
128 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | ||
129 | for (i = 0; incr < delta - incr; i++) | ||
130 | incr = incr << 1; | ||
131 | for (; i >= 0; incr >>= 1, i--) { | ||
132 | if (delta < incr) | ||
133 | continue; | ||
134 | timer->it.cpu.expires.sched += incr; | ||
135 | timer->it_overrun += 1 << i; | ||
136 | delta -= incr; | ||
137 | } | ||
138 | } else { | ||
139 | cputime_t delta, incr; | ||
140 | 96 | ||
141 | if (now.cpu < timer->it.cpu.expires.cpu) | 97 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ |
142 | return; | 98 | for (i = 0; incr < delta - incr; i++) |
143 | incr = timer->it.cpu.incr.cpu; | 99 | incr = incr << 1; |
144 | delta = now.cpu + incr - timer->it.cpu.expires.cpu; | 100 | |
145 | /* Don't use (incr*2 < delta), incr*2 might overflow. */ | 101 | for (; i >= 0; incr >>= 1, i--) { |
146 | for (i = 0; incr < delta - incr; i++) | 102 | if (delta < incr) |
147 | incr += incr; | 103 | continue; |
148 | for (; i >= 0; incr = incr >> 1, i--) { | 104 | |
149 | if (delta < incr) | 105 | timer->it.cpu.expires += incr; |
150 | continue; | 106 | timer->it_overrun += 1 << i; |
151 | timer->it.cpu.expires.cpu += incr; | 107 | delta -= incr; |
152 | timer->it_overrun += 1 << i; | ||
153 | delta -= incr; | ||
154 | } | ||
155 | } | 108 | } |
156 | } | 109 | } |
157 | 110 | ||
@@ -170,21 +123,21 @@ static inline int task_cputime_zero(const struct task_cputime *cputime) | |||
170 | return 0; | 123 | return 0; |
171 | } | 124 | } |
172 | 125 | ||
173 | static inline cputime_t prof_ticks(struct task_struct *p) | 126 | static inline unsigned long long prof_ticks(struct task_struct *p) |
174 | { | 127 | { |
175 | cputime_t utime, stime; | 128 | cputime_t utime, stime; |
176 | 129 | ||
177 | task_cputime(p, &utime, &stime); | 130 | task_cputime(p, &utime, &stime); |
178 | 131 | ||
179 | return utime + stime; | 132 | return cputime_to_expires(utime + stime); |
180 | } | 133 | } |
181 | static inline cputime_t virt_ticks(struct task_struct *p) | 134 | static inline unsigned long long virt_ticks(struct task_struct *p) |
182 | { | 135 | { |
183 | cputime_t utime; | 136 | cputime_t utime; |
184 | 137 | ||
185 | task_cputime(p, &utime, NULL); | 138 | task_cputime(p, &utime, NULL); |
186 | 139 | ||
187 | return utime; | 140 | return cputime_to_expires(utime); |
188 | } | 141 | } |
189 | 142 | ||
190 | static int | 143 | static int |
@@ -225,19 +178,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) | |||
225 | * Sample a per-thread clock for the given task. | 178 | * Sample a per-thread clock for the given task. |
226 | */ | 179 | */ |
227 | static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | 180 | static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, |
228 | union cpu_time_count *cpu) | 181 | unsigned long long *sample) |
229 | { | 182 | { |
230 | switch (CPUCLOCK_WHICH(which_clock)) { | 183 | switch (CPUCLOCK_WHICH(which_clock)) { |
231 | default: | 184 | default: |
232 | return -EINVAL; | 185 | return -EINVAL; |
233 | case CPUCLOCK_PROF: | 186 | case CPUCLOCK_PROF: |
234 | cpu->cpu = prof_ticks(p); | 187 | *sample = prof_ticks(p); |
235 | break; | 188 | break; |
236 | case CPUCLOCK_VIRT: | 189 | case CPUCLOCK_VIRT: |
237 | cpu->cpu = virt_ticks(p); | 190 | *sample = virt_ticks(p); |
238 | break; | 191 | break; |
239 | case CPUCLOCK_SCHED: | 192 | case CPUCLOCK_SCHED: |
240 | cpu->sched = task_sched_runtime(p); | 193 | *sample = task_sched_runtime(p); |
241 | break; | 194 | break; |
242 | } | 195 | } |
243 | return 0; | 196 | return 0; |
@@ -284,7 +237,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) | |||
284 | */ | 237 | */ |
285 | static int cpu_clock_sample_group(const clockid_t which_clock, | 238 | static int cpu_clock_sample_group(const clockid_t which_clock, |
286 | struct task_struct *p, | 239 | struct task_struct *p, |
287 | union cpu_time_count *cpu) | 240 | unsigned long long *sample) |
288 | { | 241 | { |
289 | struct task_cputime cputime; | 242 | struct task_cputime cputime; |
290 | 243 | ||
@@ -293,15 +246,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock, | |||
293 | return -EINVAL; | 246 | return -EINVAL; |
294 | case CPUCLOCK_PROF: | 247 | case CPUCLOCK_PROF: |
295 | thread_group_cputime(p, &cputime); | 248 | thread_group_cputime(p, &cputime); |
296 | cpu->cpu = cputime.utime + cputime.stime; | 249 | *sample = cputime_to_expires(cputime.utime + cputime.stime); |
297 | break; | 250 | break; |
298 | case CPUCLOCK_VIRT: | 251 | case CPUCLOCK_VIRT: |
299 | thread_group_cputime(p, &cputime); | 252 | thread_group_cputime(p, &cputime); |
300 | cpu->cpu = cputime.utime; | 253 | *sample = cputime_to_expires(cputime.utime); |
301 | break; | 254 | break; |
302 | case CPUCLOCK_SCHED: | 255 | case CPUCLOCK_SCHED: |
303 | thread_group_cputime(p, &cputime); | 256 | thread_group_cputime(p, &cputime); |
304 | cpu->sched = cputime.sum_exec_runtime; | 257 | *sample = cputime.sum_exec_runtime; |
305 | break; | 258 | break; |
306 | } | 259 | } |
307 | return 0; | 260 | return 0; |
@@ -312,7 +265,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
312 | { | 265 | { |
313 | const pid_t pid = CPUCLOCK_PID(which_clock); | 266 | const pid_t pid = CPUCLOCK_PID(which_clock); |
314 | int error = -EINVAL; | 267 | int error = -EINVAL; |
315 | union cpu_time_count rtn; | 268 | unsigned long long rtn; |
316 | 269 | ||
317 | if (pid == 0) { | 270 | if (pid == 0) { |
318 | /* | 271 | /* |
@@ -446,6 +399,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer) | |||
446 | return ret; | 399 | return ret; |
447 | } | 400 | } |
448 | 401 | ||
402 | static void cleanup_timers_list(struct list_head *head, | ||
403 | unsigned long long curr) | ||
404 | { | ||
405 | struct cpu_timer_list *timer, *next; | ||
406 | |||
407 | list_for_each_entry_safe(timer, next, head, entry) | ||
408 | list_del_init(&timer->entry); | ||
409 | } | ||
410 | |||
449 | /* | 411 | /* |
450 | * Clean out CPU timers still ticking when a thread exited. The task | 412 | * Clean out CPU timers still ticking when a thread exited. The task |
451 | * pointer is cleared, and the expiry time is replaced with the residual | 413 | * pointer is cleared, and the expiry time is replaced with the residual |
@@ -456,37 +418,12 @@ static void cleanup_timers(struct list_head *head, | |||
456 | cputime_t utime, cputime_t stime, | 418 | cputime_t utime, cputime_t stime, |
457 | unsigned long long sum_exec_runtime) | 419 | unsigned long long sum_exec_runtime) |
458 | { | 420 | { |
459 | struct cpu_timer_list *timer, *next; | ||
460 | cputime_t ptime = utime + stime; | ||
461 | |||
462 | list_for_each_entry_safe(timer, next, head, entry) { | ||
463 | list_del_init(&timer->entry); | ||
464 | if (timer->expires.cpu < ptime) { | ||
465 | timer->expires.cpu = 0; | ||
466 | } else { | ||
467 | timer->expires.cpu -= ptime; | ||
468 | } | ||
469 | } | ||
470 | 421 | ||
471 | ++head; | 422 | cputime_t ptime = utime + stime; |
472 | list_for_each_entry_safe(timer, next, head, entry) { | ||
473 | list_del_init(&timer->entry); | ||
474 | if (timer->expires.cpu < utime) { | ||
475 | timer->expires.cpu = 0; | ||
476 | } else { | ||
477 | timer->expires.cpu -= utime; | ||
478 | } | ||
479 | } | ||
480 | 423 | ||
481 | ++head; | 424 | cleanup_timers_list(head, cputime_to_expires(ptime)); |
482 | list_for_each_entry_safe(timer, next, head, entry) { | 425 | cleanup_timers_list(++head, cputime_to_expires(utime)); |
483 | list_del_init(&timer->entry); | 426 | cleanup_timers_list(++head, sum_exec_runtime); |
484 | if (timer->expires.sched < sum_exec_runtime) { | ||
485 | timer->expires.sched = 0; | ||
486 | } else { | ||
487 | timer->expires.sched -= sum_exec_runtime; | ||
488 | } | ||
489 | } | ||
490 | } | 427 | } |
491 | 428 | ||
492 | /* | 429 | /* |
@@ -516,17 +453,21 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) | |||
516 | tsk->se.sum_exec_runtime + sig->sum_sched_runtime); | 453 | tsk->se.sum_exec_runtime + sig->sum_sched_runtime); |
517 | } | 454 | } |
518 | 455 | ||
519 | static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) | 456 | static void clear_dead_task(struct k_itimer *itimer, unsigned long long now) |
520 | { | 457 | { |
458 | struct cpu_timer_list *timer = &itimer->it.cpu; | ||
459 | |||
521 | /* | 460 | /* |
522 | * That's all for this thread or process. | 461 | * That's all for this thread or process. |
523 | * We leave our residual in expires to be reported. | 462 | * We leave our residual in expires to be reported. |
524 | */ | 463 | */ |
525 | put_task_struct(timer->it.cpu.task); | 464 | put_task_struct(timer->task); |
526 | timer->it.cpu.task = NULL; | 465 | timer->task = NULL; |
527 | timer->it.cpu.expires = cpu_time_sub(timer->it_clock, | 466 | if (timer->expires < now) { |
528 | timer->it.cpu.expires, | 467 | timer->expires = 0; |
529 | now); | 468 | } else { |
469 | timer->expires -= now; | ||
470 | } | ||
530 | } | 471 | } |
531 | 472 | ||
532 | static inline int expires_gt(cputime_t expires, cputime_t new_exp) | 473 | static inline int expires_gt(cputime_t expires, cputime_t new_exp) |
@@ -558,14 +499,14 @@ static void arm_timer(struct k_itimer *timer) | |||
558 | 499 | ||
559 | listpos = head; | 500 | listpos = head; |
560 | list_for_each_entry(next, head, entry) { | 501 | list_for_each_entry(next, head, entry) { |
561 | if (cpu_time_before(timer->it_clock, nt->expires, next->expires)) | 502 | if (nt->expires < next->expires) |
562 | break; | 503 | break; |
563 | listpos = &next->entry; | 504 | listpos = &next->entry; |
564 | } | 505 | } |
565 | list_add(&nt->entry, listpos); | 506 | list_add(&nt->entry, listpos); |
566 | 507 | ||
567 | if (listpos == head) { | 508 | if (listpos == head) { |
568 | union cpu_time_count *exp = &nt->expires; | 509 | unsigned long long exp = nt->expires; |
569 | 510 | ||
570 | /* | 511 | /* |
571 | * We are the new earliest-expiring POSIX 1.b timer, hence | 512 | * We are the new earliest-expiring POSIX 1.b timer, hence |
@@ -576,17 +517,17 @@ static void arm_timer(struct k_itimer *timer) | |||
576 | 517 | ||
577 | switch (CPUCLOCK_WHICH(timer->it_clock)) { | 518 | switch (CPUCLOCK_WHICH(timer->it_clock)) { |
578 | case CPUCLOCK_PROF: | 519 | case CPUCLOCK_PROF: |
579 | if (expires_gt(cputime_expires->prof_exp, exp->cpu)) | 520 | if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp))) |
580 | cputime_expires->prof_exp = exp->cpu; | 521 | cputime_expires->prof_exp = expires_to_cputime(exp); |
581 | break; | 522 | break; |
582 | case CPUCLOCK_VIRT: | 523 | case CPUCLOCK_VIRT: |
583 | if (expires_gt(cputime_expires->virt_exp, exp->cpu)) | 524 | if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp))) |
584 | cputime_expires->virt_exp = exp->cpu; | 525 | cputime_expires->virt_exp = expires_to_cputime(exp); |
585 | break; | 526 | break; |
586 | case CPUCLOCK_SCHED: | 527 | case CPUCLOCK_SCHED: |
587 | if (cputime_expires->sched_exp == 0 || | 528 | if (cputime_expires->sched_exp == 0 || |
588 | cputime_expires->sched_exp > exp->sched) | 529 | cputime_expires->sched_exp > exp) |
589 | cputime_expires->sched_exp = exp->sched; | 530 | cputime_expires->sched_exp = exp; |
590 | break; | 531 | break; |
591 | } | 532 | } |
592 | } | 533 | } |
@@ -601,20 +542,20 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
601 | /* | 542 | /* |
602 | * User don't want any signal. | 543 | * User don't want any signal. |
603 | */ | 544 | */ |
604 | timer->it.cpu.expires.sched = 0; | 545 | timer->it.cpu.expires = 0; |
605 | } else if (unlikely(timer->sigq == NULL)) { | 546 | } else if (unlikely(timer->sigq == NULL)) { |
606 | /* | 547 | /* |
607 | * This a special case for clock_nanosleep, | 548 | * This a special case for clock_nanosleep, |
608 | * not a normal timer from sys_timer_create. | 549 | * not a normal timer from sys_timer_create. |
609 | */ | 550 | */ |
610 | wake_up_process(timer->it_process); | 551 | wake_up_process(timer->it_process); |
611 | timer->it.cpu.expires.sched = 0; | 552 | timer->it.cpu.expires = 0; |
612 | } else if (timer->it.cpu.incr.sched == 0) { | 553 | } else if (timer->it.cpu.incr == 0) { |
613 | /* | 554 | /* |
614 | * One-shot timer. Clear it as soon as it's fired. | 555 | * One-shot timer. Clear it as soon as it's fired. |
615 | */ | 556 | */ |
616 | posix_timer_event(timer, 0); | 557 | posix_timer_event(timer, 0); |
617 | timer->it.cpu.expires.sched = 0; | 558 | timer->it.cpu.expires = 0; |
618 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { | 559 | } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) { |
619 | /* | 560 | /* |
620 | * The signal did not get queued because the signal | 561 | * The signal did not get queued because the signal |
@@ -632,7 +573,7 @@ static void cpu_timer_fire(struct k_itimer *timer) | |||
632 | */ | 573 | */ |
633 | static int cpu_timer_sample_group(const clockid_t which_clock, | 574 | static int cpu_timer_sample_group(const clockid_t which_clock, |
634 | struct task_struct *p, | 575 | struct task_struct *p, |
635 | union cpu_time_count *cpu) | 576 | unsigned long long *sample) |
636 | { | 577 | { |
637 | struct task_cputime cputime; | 578 | struct task_cputime cputime; |
638 | 579 | ||
@@ -641,13 +582,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
641 | default: | 582 | default: |
642 | return -EINVAL; | 583 | return -EINVAL; |
643 | case CPUCLOCK_PROF: | 584 | case CPUCLOCK_PROF: |
644 | cpu->cpu = cputime.utime + cputime.stime; | 585 | *sample = cputime_to_expires(cputime.utime + cputime.stime); |
645 | break; | 586 | break; |
646 | case CPUCLOCK_VIRT: | 587 | case CPUCLOCK_VIRT: |
647 | cpu->cpu = cputime.utime; | 588 | *sample = cputime_to_expires(cputime.utime); |
648 | break; | 589 | break; |
649 | case CPUCLOCK_SCHED: | 590 | case CPUCLOCK_SCHED: |
650 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); | 591 | *sample = cputime.sum_exec_runtime + task_delta_exec(p); |
651 | break; | 592 | break; |
652 | } | 593 | } |
653 | return 0; | 594 | return 0; |
@@ -694,7 +635,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
694 | struct itimerspec *new, struct itimerspec *old) | 635 | struct itimerspec *new, struct itimerspec *old) |
695 | { | 636 | { |
696 | struct task_struct *p = timer->it.cpu.task; | 637 | struct task_struct *p = timer->it.cpu.task; |
697 | union cpu_time_count old_expires, new_expires, old_incr, val; | 638 | unsigned long long old_expires, new_expires, old_incr, val; |
698 | int ret; | 639 | int ret; |
699 | 640 | ||
700 | if (unlikely(p == NULL)) { | 641 | if (unlikely(p == NULL)) { |
@@ -749,7 +690,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
749 | } | 690 | } |
750 | 691 | ||
751 | if (old) { | 692 | if (old) { |
752 | if (old_expires.sched == 0) { | 693 | if (old_expires == 0) { |
753 | old->it_value.tv_sec = 0; | 694 | old->it_value.tv_sec = 0; |
754 | old->it_value.tv_nsec = 0; | 695 | old->it_value.tv_nsec = 0; |
755 | } else { | 696 | } else { |
@@ -764,11 +705,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
764 | * new setting. | 705 | * new setting. |
765 | */ | 706 | */ |
766 | bump_cpu_timer(timer, val); | 707 | bump_cpu_timer(timer, val); |
767 | if (cpu_time_before(timer->it_clock, val, | 708 | if (val < timer->it.cpu.expires) { |
768 | timer->it.cpu.expires)) { | 709 | old_expires = timer->it.cpu.expires - val; |
769 | old_expires = cpu_time_sub( | ||
770 | timer->it_clock, | ||
771 | timer->it.cpu.expires, val); | ||
772 | sample_to_timespec(timer->it_clock, | 710 | sample_to_timespec(timer->it_clock, |
773 | old_expires, | 711 | old_expires, |
774 | &old->it_value); | 712 | &old->it_value); |
@@ -791,8 +729,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
791 | goto out; | 729 | goto out; |
792 | } | 730 | } |
793 | 731 | ||
794 | if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) { | 732 | if (new_expires != 0 && !(flags & TIMER_ABSTIME)) { |
795 | cpu_time_add(timer->it_clock, &new_expires, val); | 733 | new_expires += val; |
796 | } | 734 | } |
797 | 735 | ||
798 | /* | 736 | /* |
@@ -801,8 +739,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
801 | * arm the timer (we'll just fake it for timer_gettime). | 739 | * arm the timer (we'll just fake it for timer_gettime). |
802 | */ | 740 | */ |
803 | timer->it.cpu.expires = new_expires; | 741 | timer->it.cpu.expires = new_expires; |
804 | if (new_expires.sched != 0 && | 742 | if (new_expires != 0 && val < new_expires) { |
805 | cpu_time_before(timer->it_clock, val, new_expires)) { | ||
806 | arm_timer(timer); | 743 | arm_timer(timer); |
807 | } | 744 | } |
808 | 745 | ||
@@ -826,8 +763,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
826 | timer->it_overrun_last = 0; | 763 | timer->it_overrun_last = 0; |
827 | timer->it_overrun = -1; | 764 | timer->it_overrun = -1; |
828 | 765 | ||
829 | if (new_expires.sched != 0 && | 766 | if (new_expires != 0 && !(val < new_expires)) { |
830 | !cpu_time_before(timer->it_clock, val, new_expires)) { | ||
831 | /* | 767 | /* |
832 | * The designated time already passed, so we notify | 768 | * The designated time already passed, so we notify |
833 | * immediately, even if the thread never runs to | 769 | * immediately, even if the thread never runs to |
@@ -849,7 +785,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags, | |||
849 | 785 | ||
850 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | 786 | static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) |
851 | { | 787 | { |
852 | union cpu_time_count now; | 788 | unsigned long long now; |
853 | struct task_struct *p = timer->it.cpu.task; | 789 | struct task_struct *p = timer->it.cpu.task; |
854 | int clear_dead; | 790 | int clear_dead; |
855 | 791 | ||
@@ -859,7 +795,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
859 | sample_to_timespec(timer->it_clock, | 795 | sample_to_timespec(timer->it_clock, |
860 | timer->it.cpu.incr, &itp->it_interval); | 796 | timer->it.cpu.incr, &itp->it_interval); |
861 | 797 | ||
862 | if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */ | 798 | if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ |
863 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; | 799 | itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; |
864 | return; | 800 | return; |
865 | } | 801 | } |
@@ -891,7 +827,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
891 | */ | 827 | */ |
892 | put_task_struct(p); | 828 | put_task_struct(p); |
893 | timer->it.cpu.task = NULL; | 829 | timer->it.cpu.task = NULL; |
894 | timer->it.cpu.expires.sched = 0; | 830 | timer->it.cpu.expires = 0; |
895 | read_unlock(&tasklist_lock); | 831 | read_unlock(&tasklist_lock); |
896 | goto dead; | 832 | goto dead; |
897 | } else { | 833 | } else { |
@@ -912,10 +848,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
912 | goto dead; | 848 | goto dead; |
913 | } | 849 | } |
914 | 850 | ||
915 | if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) { | 851 | if (now < timer->it.cpu.expires) { |
916 | sample_to_timespec(timer->it_clock, | 852 | sample_to_timespec(timer->it_clock, |
917 | cpu_time_sub(timer->it_clock, | 853 | timer->it.cpu.expires - now, |
918 | timer->it.cpu.expires, now), | ||
919 | &itp->it_value); | 854 | &itp->it_value); |
920 | } else { | 855 | } else { |
921 | /* | 856 | /* |
@@ -927,6 +862,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
927 | } | 862 | } |
928 | } | 863 | } |
929 | 864 | ||
865 | static unsigned long long | ||
866 | check_timers_list(struct list_head *timers, | ||
867 | struct list_head *firing, | ||
868 | unsigned long long curr) | ||
869 | { | ||
870 | int maxfire = 20; | ||
871 | |||
872 | while (!list_empty(timers)) { | ||
873 | struct cpu_timer_list *t; | ||
874 | |||
875 | t = list_first_entry(timers, struct cpu_timer_list, entry); | ||
876 | |||
877 | if (!--maxfire || curr < t->expires) | ||
878 | return t->expires; | ||
879 | |||
880 | t->firing = 1; | ||
881 | list_move_tail(&t->entry, firing); | ||
882 | } | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | |||
930 | /* | 887 | /* |
931 | * Check for any per-thread CPU timers that have fired and move them off | 888 | * Check for any per-thread CPU timers that have fired and move them off |
932 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the | 889 | * the tsk->cpu_timers[N] list onto the firing list. Here we update the |
@@ -935,54 +892,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) | |||
935 | static void check_thread_timers(struct task_struct *tsk, | 892 | static void check_thread_timers(struct task_struct *tsk, |
936 | struct list_head *firing) | 893 | struct list_head *firing) |
937 | { | 894 | { |
938 | int maxfire; | ||
939 | struct list_head *timers = tsk->cpu_timers; | 895 | struct list_head *timers = tsk->cpu_timers; |
940 | struct signal_struct *const sig = tsk->signal; | 896 | struct signal_struct *const sig = tsk->signal; |
897 | struct task_cputime *tsk_expires = &tsk->cputime_expires; | ||
898 | unsigned long long expires; | ||
941 | unsigned long soft; | 899 | unsigned long soft; |
942 | 900 | ||
943 | maxfire = 20; | 901 | expires = check_timers_list(timers, firing, prof_ticks(tsk)); |
944 | tsk->cputime_expires.prof_exp = 0; | 902 | tsk_expires->prof_exp = expires_to_cputime(expires); |
945 | while (!list_empty(timers)) { | ||
946 | struct cpu_timer_list *t = list_first_entry(timers, | ||
947 | struct cpu_timer_list, | ||
948 | entry); | ||
949 | if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) { | ||
950 | tsk->cputime_expires.prof_exp = t->expires.cpu; | ||
951 | break; | ||
952 | } | ||
953 | t->firing = 1; | ||
954 | list_move_tail(&t->entry, firing); | ||
955 | } | ||
956 | 903 | ||
957 | ++timers; | 904 | expires = check_timers_list(++timers, firing, virt_ticks(tsk)); |
958 | maxfire = 20; | 905 | tsk_expires->virt_exp = expires_to_cputime(expires); |
959 | tsk->cputime_expires.virt_exp = 0; | ||
960 | while (!list_empty(timers)) { | ||
961 | struct cpu_timer_list *t = list_first_entry(timers, | ||
962 | struct cpu_timer_list, | ||
963 | entry); | ||
964 | if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) { | ||
965 | tsk->cputime_expires.virt_exp = t->expires.cpu; | ||
966 | break; | ||
967 | } | ||
968 | t->firing = 1; | ||
969 | list_move_tail(&t->entry, firing); | ||
970 | } | ||
971 | 906 | ||
972 | ++timers; | 907 | tsk_expires->sched_exp = check_timers_list(++timers, firing, |
973 | maxfire = 20; | 908 | tsk->se.sum_exec_runtime); |
974 | tsk->cputime_expires.sched_exp = 0; | ||
975 | while (!list_empty(timers)) { | ||
976 | struct cpu_timer_list *t = list_first_entry(timers, | ||
977 | struct cpu_timer_list, | ||
978 | entry); | ||
979 | if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { | ||
980 | tsk->cputime_expires.sched_exp = t->expires.sched; | ||
981 | break; | ||
982 | } | ||
983 | t->firing = 1; | ||
984 | list_move_tail(&t->entry, firing); | ||
985 | } | ||
986 | 909 | ||
987 | /* | 910 | /* |
988 | * Check for the special case thread timers. | 911 | * Check for the special case thread timers. |
@@ -1030,7 +953,8 @@ static void stop_process_timers(struct signal_struct *sig) | |||
1030 | static u32 onecputick; | 953 | static u32 onecputick; |
1031 | 954 | ||
1032 | static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | 955 | static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, |
1033 | cputime_t *expires, cputime_t cur_time, int signo) | 956 | unsigned long long *expires, |
957 | unsigned long long cur_time, int signo) | ||
1034 | { | 958 | { |
1035 | if (!it->expires) | 959 | if (!it->expires) |
1036 | return; | 960 | return; |
@@ -1066,9 +990,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, | |||
1066 | static void check_process_timers(struct task_struct *tsk, | 990 | static void check_process_timers(struct task_struct *tsk, |
1067 | struct list_head *firing) | 991 | struct list_head *firing) |
1068 | { | 992 | { |
1069 | int maxfire; | ||
1070 | struct signal_struct *const sig = tsk->signal; | 993 | struct signal_struct *const sig = tsk->signal; |
1071 | cputime_t utime, ptime, virt_expires, prof_expires; | 994 | unsigned long long utime, ptime, virt_expires, prof_expires; |
1072 | unsigned long long sum_sched_runtime, sched_expires; | 995 | unsigned long long sum_sched_runtime, sched_expires; |
1073 | struct list_head *timers = sig->cpu_timers; | 996 | struct list_head *timers = sig->cpu_timers; |
1074 | struct task_cputime cputime; | 997 | struct task_cputime cputime; |
@@ -1078,52 +1001,13 @@ static void check_process_timers(struct task_struct *tsk, | |||
1078 | * Collect the current process totals. | 1001 | * Collect the current process totals. |
1079 | */ | 1002 | */ |
1080 | thread_group_cputimer(tsk, &cputime); | 1003 | thread_group_cputimer(tsk, &cputime); |
1081 | utime = cputime.utime; | 1004 | utime = cputime_to_expires(cputime.utime); |
1082 | ptime = utime + cputime.stime; | 1005 | ptime = utime + cputime_to_expires(cputime.stime); |
1083 | sum_sched_runtime = cputime.sum_exec_runtime; | 1006 | sum_sched_runtime = cputime.sum_exec_runtime; |
1084 | maxfire = 20; | ||
1085 | prof_expires = 0; | ||
1086 | while (!list_empty(timers)) { | ||
1087 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
1088 | struct cpu_timer_list, | ||
1089 | entry); | ||
1090 | if (!--maxfire || ptime < tl->expires.cpu) { | ||
1091 | prof_expires = tl->expires.cpu; | ||
1092 | break; | ||
1093 | } | ||
1094 | tl->firing = 1; | ||
1095 | list_move_tail(&tl->entry, firing); | ||
1096 | } | ||
1097 | 1007 | ||
1098 | ++timers; | 1008 | prof_expires = check_timers_list(timers, firing, ptime); |
1099 | maxfire = 20; | 1009 | virt_expires = check_timers_list(++timers, firing, utime); |
1100 | virt_expires = 0; | 1010 | sched_expires = check_timers_list(++timers, firing, sum_sched_runtime); |
1101 | while (!list_empty(timers)) { | ||
1102 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
1103 | struct cpu_timer_list, | ||
1104 | entry); | ||
1105 | if (!--maxfire || utime < tl->expires.cpu) { | ||
1106 | virt_expires = tl->expires.cpu; | ||
1107 | break; | ||
1108 | } | ||
1109 | tl->firing = 1; | ||
1110 | list_move_tail(&tl->entry, firing); | ||
1111 | } | ||
1112 | |||
1113 | ++timers; | ||
1114 | maxfire = 20; | ||
1115 | sched_expires = 0; | ||
1116 | while (!list_empty(timers)) { | ||
1117 | struct cpu_timer_list *tl = list_first_entry(timers, | ||
1118 | struct cpu_timer_list, | ||
1119 | entry); | ||
1120 | if (!--maxfire || sum_sched_runtime < tl->expires.sched) { | ||
1121 | sched_expires = tl->expires.sched; | ||
1122 | break; | ||
1123 | } | ||
1124 | tl->firing = 1; | ||
1125 | list_move_tail(&tl->entry, firing); | ||
1126 | } | ||
1127 | 1011 | ||
1128 | /* | 1012 | /* |
1129 | * Check for the special case process timers. | 1013 | * Check for the special case process timers. |
@@ -1162,8 +1046,8 @@ static void check_process_timers(struct task_struct *tsk, | |||
1162 | } | 1046 | } |
1163 | } | 1047 | } |
1164 | 1048 | ||
1165 | sig->cputime_expires.prof_exp = prof_expires; | 1049 | sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires); |
1166 | sig->cputime_expires.virt_exp = virt_expires; | 1050 | sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires); |
1167 | sig->cputime_expires.sched_exp = sched_expires; | 1051 | sig->cputime_expires.sched_exp = sched_expires; |
1168 | if (task_cputime_zero(&sig->cputime_expires)) | 1052 | if (task_cputime_zero(&sig->cputime_expires)) |
1169 | stop_process_timers(sig); | 1053 | stop_process_timers(sig); |
@@ -1176,7 +1060,7 @@ static void check_process_timers(struct task_struct *tsk, | |||
1176 | void posix_cpu_timer_schedule(struct k_itimer *timer) | 1060 | void posix_cpu_timer_schedule(struct k_itimer *timer) |
1177 | { | 1061 | { |
1178 | struct task_struct *p = timer->it.cpu.task; | 1062 | struct task_struct *p = timer->it.cpu.task; |
1179 | union cpu_time_count now; | 1063 | unsigned long long now; |
1180 | 1064 | ||
1181 | if (unlikely(p == NULL)) | 1065 | if (unlikely(p == NULL)) |
1182 | /* | 1066 | /* |
@@ -1205,7 +1089,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1205 | */ | 1089 | */ |
1206 | put_task_struct(p); | 1090 | put_task_struct(p); |
1207 | timer->it.cpu.task = p = NULL; | 1091 | timer->it.cpu.task = p = NULL; |
1208 | timer->it.cpu.expires.sched = 0; | 1092 | timer->it.cpu.expires = 0; |
1209 | goto out_unlock; | 1093 | goto out_unlock; |
1210 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { | 1094 | } else if (unlikely(p->exit_state) && thread_group_empty(p)) { |
1211 | /* | 1095 | /* |
@@ -1213,6 +1097,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |||
1213 | * not yet reaped. Take this opportunity to | 1097 | * not yet reaped. Take this opportunity to |
1214 | * drop our task ref. | 1098 | * drop our task ref. |
1215 | */ | 1099 | */ |
1100 | cpu_timer_sample_group(timer->it_clock, p, &now); | ||
1216 | clear_dead_task(timer, now); | 1101 | clear_dead_task(timer, now); |
1217 | goto out_unlock; | 1102 | goto out_unlock; |
1218 | } | 1103 | } |
@@ -1387,7 +1272,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1387 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | 1272 | void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, |
1388 | cputime_t *newval, cputime_t *oldval) | 1273 | cputime_t *newval, cputime_t *oldval) |
1389 | { | 1274 | { |
1390 | union cpu_time_count now; | 1275 | unsigned long long now; |
1391 | 1276 | ||
1392 | BUG_ON(clock_idx == CPUCLOCK_SCHED); | 1277 | BUG_ON(clock_idx == CPUCLOCK_SCHED); |
1393 | cpu_timer_sample_group(clock_idx, tsk, &now); | 1278 | cpu_timer_sample_group(clock_idx, tsk, &now); |
@@ -1399,17 +1284,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, | |||
1399 | * it to be absolute. | 1284 | * it to be absolute. |
1400 | */ | 1285 | */ |
1401 | if (*oldval) { | 1286 | if (*oldval) { |
1402 | if (*oldval <= now.cpu) { | 1287 | if (*oldval <= now) { |
1403 | /* Just about to fire. */ | 1288 | /* Just about to fire. */ |
1404 | *oldval = cputime_one_jiffy; | 1289 | *oldval = cputime_one_jiffy; |
1405 | } else { | 1290 | } else { |
1406 | *oldval -= now.cpu; | 1291 | *oldval -= now; |
1407 | } | 1292 | } |
1408 | } | 1293 | } |
1409 | 1294 | ||
1410 | if (!*newval) | 1295 | if (!*newval) |
1411 | goto out; | 1296 | goto out; |
1412 | *newval += now.cpu; | 1297 | *newval += now; |
1413 | } | 1298 | } |
1414 | 1299 | ||
1415 | /* | 1300 | /* |
@@ -1459,7 +1344,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, | |||
1459 | } | 1344 | } |
1460 | 1345 | ||
1461 | while (!signal_pending(current)) { | 1346 | while (!signal_pending(current)) { |
1462 | if (timer.it.cpu.expires.sched == 0) { | 1347 | if (timer.it.cpu.expires == 0) { |
1463 | /* | 1348 | /* |
1464 | * Our timer fired and was reset, below | 1349 | * Our timer fired and was reset, below |
1465 | * deletion can not fail. | 1350 | * deletion can not fail. |
diff --git a/kernel/printk.c b/kernel/printk.c index fa36e1494420..8212c1aef125 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -363,6 +363,53 @@ static void log_store(int facility, int level, | |||
363 | log_next_seq++; | 363 | log_next_seq++; |
364 | } | 364 | } |
365 | 365 | ||
366 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
367 | int dmesg_restrict = 1; | ||
368 | #else | ||
369 | int dmesg_restrict; | ||
370 | #endif | ||
371 | |||
372 | static int syslog_action_restricted(int type) | ||
373 | { | ||
374 | if (dmesg_restrict) | ||
375 | return 1; | ||
376 | /* | ||
377 | * Unless restricted, we allow "read all" and "get buffer size" | ||
378 | * for everybody. | ||
379 | */ | ||
380 | return type != SYSLOG_ACTION_READ_ALL && | ||
381 | type != SYSLOG_ACTION_SIZE_BUFFER; | ||
382 | } | ||
383 | |||
384 | static int check_syslog_permissions(int type, bool from_file) | ||
385 | { | ||
386 | /* | ||
387 | * If this is from /proc/kmsg and we've already opened it, then we've | ||
388 | * already done the capabilities checks at open time. | ||
389 | */ | ||
390 | if (from_file && type != SYSLOG_ACTION_OPEN) | ||
391 | return 0; | ||
392 | |||
393 | if (syslog_action_restricted(type)) { | ||
394 | if (capable(CAP_SYSLOG)) | ||
395 | return 0; | ||
396 | /* | ||
397 | * For historical reasons, accept CAP_SYS_ADMIN too, with | ||
398 | * a warning. | ||
399 | */ | ||
400 | if (capable(CAP_SYS_ADMIN)) { | ||
401 | pr_warn_once("%s (%d): Attempt to access syslog with " | ||
402 | "CAP_SYS_ADMIN but no CAP_SYSLOG " | ||
403 | "(deprecated).\n", | ||
404 | current->comm, task_pid_nr(current)); | ||
405 | return 0; | ||
406 | } | ||
407 | return -EPERM; | ||
408 | } | ||
409 | return security_syslog(type); | ||
410 | } | ||
411 | |||
412 | |||
366 | /* /dev/kmsg - userspace message inject/listen interface */ | 413 | /* /dev/kmsg - userspace message inject/listen interface */ |
367 | struct devkmsg_user { | 414 | struct devkmsg_user { |
368 | u64 seq; | 415 | u64 seq; |
@@ -620,7 +667,8 @@ static int devkmsg_open(struct inode *inode, struct file *file) | |||
620 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) | 667 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) |
621 | return 0; | 668 | return 0; |
622 | 669 | ||
623 | err = security_syslog(SYSLOG_ACTION_READ_ALL); | 670 | err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, |
671 | SYSLOG_FROM_READER); | ||
624 | if (err) | 672 | if (err) |
625 | return err; | 673 | return err; |
626 | 674 | ||
@@ -813,45 +861,6 @@ static inline void boot_delay_msec(int level) | |||
813 | } | 861 | } |
814 | #endif | 862 | #endif |
815 | 863 | ||
816 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
817 | int dmesg_restrict = 1; | ||
818 | #else | ||
819 | int dmesg_restrict; | ||
820 | #endif | ||
821 | |||
822 | static int syslog_action_restricted(int type) | ||
823 | { | ||
824 | if (dmesg_restrict) | ||
825 | return 1; | ||
826 | /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ | ||
827 | return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; | ||
828 | } | ||
829 | |||
830 | static int check_syslog_permissions(int type, bool from_file) | ||
831 | { | ||
832 | /* | ||
833 | * If this is from /proc/kmsg and we've already opened it, then we've | ||
834 | * already done the capabilities checks at open time. | ||
835 | */ | ||
836 | if (from_file && type != SYSLOG_ACTION_OPEN) | ||
837 | return 0; | ||
838 | |||
839 | if (syslog_action_restricted(type)) { | ||
840 | if (capable(CAP_SYSLOG)) | ||
841 | return 0; | ||
842 | /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ | ||
843 | if (capable(CAP_SYS_ADMIN)) { | ||
844 | printk_once(KERN_WARNING "%s (%d): " | ||
845 | "Attempt to access syslog with CAP_SYS_ADMIN " | ||
846 | "but no CAP_SYSLOG (deprecated).\n", | ||
847 | current->comm, task_pid_nr(current)); | ||
848 | return 0; | ||
849 | } | ||
850 | return -EPERM; | ||
851 | } | ||
852 | return 0; | ||
853 | } | ||
854 | |||
855 | #if defined(CONFIG_PRINTK_TIME) | 864 | #if defined(CONFIG_PRINTK_TIME) |
856 | static bool printk_time = 1; | 865 | static bool printk_time = 1; |
857 | #else | 866 | #else |
@@ -1249,7 +1258,7 @@ out: | |||
1249 | 1258 | ||
1250 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | 1259 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) |
1251 | { | 1260 | { |
1252 | return do_syslog(type, buf, len, SYSLOG_FROM_CALL); | 1261 | return do_syslog(type, buf, len, SYSLOG_FROM_READER); |
1253 | } | 1262 | } |
1254 | 1263 | ||
1255 | /* | 1264 | /* |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aed981a3f69c..335a7ae697f5 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -665,20 +665,22 @@ static int ptrace_peek_siginfo(struct task_struct *child, | |||
665 | if (unlikely(is_compat_task())) { | 665 | if (unlikely(is_compat_task())) { |
666 | compat_siginfo_t __user *uinfo = compat_ptr(data); | 666 | compat_siginfo_t __user *uinfo = compat_ptr(data); |
667 | 667 | ||
668 | ret = copy_siginfo_to_user32(uinfo, &info); | 668 | if (copy_siginfo_to_user32(uinfo, &info) || |
669 | ret |= __put_user(info.si_code, &uinfo->si_code); | 669 | __put_user(info.si_code, &uinfo->si_code)) { |
670 | ret = -EFAULT; | ||
671 | break; | ||
672 | } | ||
673 | |||
670 | } else | 674 | } else |
671 | #endif | 675 | #endif |
672 | { | 676 | { |
673 | siginfo_t __user *uinfo = (siginfo_t __user *) data; | 677 | siginfo_t __user *uinfo = (siginfo_t __user *) data; |
674 | 678 | ||
675 | ret = copy_siginfo_to_user(uinfo, &info); | 679 | if (copy_siginfo_to_user(uinfo, &info) || |
676 | ret |= __put_user(info.si_code, &uinfo->si_code); | 680 | __put_user(info.si_code, &uinfo->si_code)) { |
677 | } | 681 | ret = -EFAULT; |
678 | 682 | break; | |
679 | if (ret) { | 683 | } |
680 | ret = -EFAULT; | ||
681 | break; | ||
682 | } | 684 | } |
683 | 685 | ||
684 | data += sizeof(siginfo_t); | 686 | data += sizeof(siginfo_t); |
diff --git a/kernel/range.c b/kernel/range.c index 071b0ab455cb..322ea8e93e4b 100644 --- a/kernel/range.c +++ b/kernel/range.c | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/init.h> | 5 | #include <linux/init.h> |
6 | #include <linux/sort.h> | 6 | #include <linux/sort.h> |
7 | 7 | #include <linux/string.h> | |
8 | #include <linux/range.h> | 8 | #include <linux/range.h> |
9 | 9 | ||
10 | int add_range(struct range *range, int az, int nr_range, u64 start, u64 end) | 10 | int add_range(struct range *range, int az, int nr_range, u64 start, u64 end) |
@@ -32,9 +32,8 @@ int add_range_with_merge(struct range *range, int az, int nr_range, | |||
32 | if (start >= end) | 32 | if (start >= end) |
33 | return nr_range; | 33 | return nr_range; |
34 | 34 | ||
35 | /* Try to merge it with old one: */ | 35 | /* get new start/end: */ |
36 | for (i = 0; i < nr_range; i++) { | 36 | for (i = 0; i < nr_range; i++) { |
37 | u64 final_start, final_end; | ||
38 | u64 common_start, common_end; | 37 | u64 common_start, common_end; |
39 | 38 | ||
40 | if (!range[i].end) | 39 | if (!range[i].end) |
@@ -45,12 +44,16 @@ int add_range_with_merge(struct range *range, int az, int nr_range, | |||
45 | if (common_start > common_end) | 44 | if (common_start > common_end) |
46 | continue; | 45 | continue; |
47 | 46 | ||
48 | final_start = min(range[i].start, start); | 47 | /* new start/end, will add it back at last */ |
49 | final_end = max(range[i].end, end); | 48 | start = min(range[i].start, start); |
49 | end = max(range[i].end, end); | ||
50 | 50 | ||
51 | range[i].start = final_start; | 51 | memmove(&range[i], &range[i + 1], |
52 | range[i].end = final_end; | 52 | (nr_range - (i + 1)) * sizeof(range[i])); |
53 | return nr_range; | 53 | range[nr_range - 1].start = 0; |
54 | range[nr_range - 1].end = 0; | ||
55 | nr_range--; | ||
56 | i--; | ||
54 | } | 57 | } |
55 | 58 | ||
56 | /* Need to add it: */ | 59 | /* Need to add it: */ |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 16ea67925015..35380019f0fc 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1451,9 +1451,9 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1451 | rnp->grphi, rnp->qsmask); | 1451 | rnp->grphi, rnp->qsmask); |
1452 | raw_spin_unlock_irq(&rnp->lock); | 1452 | raw_spin_unlock_irq(&rnp->lock); |
1453 | #ifdef CONFIG_PROVE_RCU_DELAY | 1453 | #ifdef CONFIG_PROVE_RCU_DELAY |
1454 | if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && | 1454 | if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 && |
1455 | system_state == SYSTEM_RUNNING) | 1455 | system_state == SYSTEM_RUNNING) |
1456 | schedule_timeout_uninterruptible(2); | 1456 | udelay(200); |
1457 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ | 1457 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ |
1458 | cond_resched(); | 1458 | cond_resched(); |
1459 | } | 1459 | } |
@@ -1613,6 +1613,14 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
1613 | } | 1613 | } |
1614 | } | 1614 | } |
1615 | 1615 | ||
1616 | static void rsp_wakeup(struct irq_work *work) | ||
1617 | { | ||
1618 | struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work); | ||
1619 | |||
1620 | /* Wake up rcu_gp_kthread() to start the grace period. */ | ||
1621 | wake_up(&rsp->gp_wq); | ||
1622 | } | ||
1623 | |||
1616 | /* | 1624 | /* |
1617 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | 1625 | * Start a new RCU grace period if warranted, re-initializing the hierarchy |
1618 | * in preparation for detecting the next grace period. The caller must hold | 1626 | * in preparation for detecting the next grace period. The caller must hold |
@@ -1637,8 +1645,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, | |||
1637 | } | 1645 | } |
1638 | rsp->gp_flags = RCU_GP_FLAG_INIT; | 1646 | rsp->gp_flags = RCU_GP_FLAG_INIT; |
1639 | 1647 | ||
1640 | /* Wake up rcu_gp_kthread() to start the grace period. */ | 1648 | /* |
1641 | wake_up(&rsp->gp_wq); | 1649 | * We can't do wakeups while holding the rnp->lock, as that |
1650 | * could cause possible deadlocks with the rq->lock. Deter | ||
1651 | * the wakeup to interrupt context. | ||
1652 | */ | ||
1653 | irq_work_queue(&rsp->wakeup_work); | ||
1642 | } | 1654 | } |
1643 | 1655 | ||
1644 | /* | 1656 | /* |
@@ -3235,6 +3247,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
3235 | 3247 | ||
3236 | rsp->rda = rda; | 3248 | rsp->rda = rda; |
3237 | init_waitqueue_head(&rsp->gp_wq); | 3249 | init_waitqueue_head(&rsp->gp_wq); |
3250 | init_irq_work(&rsp->wakeup_work, rsp_wakeup); | ||
3238 | rnp = rsp->level[rcu_num_lvls - 1]; | 3251 | rnp = rsp->level[rcu_num_lvls - 1]; |
3239 | for_each_possible_cpu(i) { | 3252 | for_each_possible_cpu(i) { |
3240 | while (i > rnp->grphi) | 3253 | while (i > rnp->grphi) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index da77a8f57ff9..4df503470e42 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/threads.h> | 27 | #include <linux/threads.h> |
28 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
29 | #include <linux/seqlock.h> | 29 | #include <linux/seqlock.h> |
30 | #include <linux/irq_work.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and | 33 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and |
@@ -442,6 +443,7 @@ struct rcu_state { | |||
442 | char *name; /* Name of structure. */ | 443 | char *name; /* Name of structure. */ |
443 | char abbr; /* Abbreviated name. */ | 444 | char abbr; /* Abbreviated name. */ |
444 | struct list_head flavors; /* List of RCU flavors. */ | 445 | struct list_head flavors; /* List of RCU flavors. */ |
446 | struct irq_work wakeup_work; /* Postponed wakeups */ | ||
445 | }; | 447 | }; |
446 | 448 | ||
447 | /* Values for rcu_state structure's gp_flags field. */ | 449 | /* Values for rcu_state structure's gp_flags field. */ |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 170814dc418f..3db5a375d8dd 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -88,7 +88,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
88 | #ifdef CONFIG_RCU_NOCB_CPU | 88 | #ifdef CONFIG_RCU_NOCB_CPU |
89 | #ifndef CONFIG_RCU_NOCB_CPU_NONE | 89 | #ifndef CONFIG_RCU_NOCB_CPU_NONE |
90 | if (!have_rcu_nocb_mask) { | 90 | if (!have_rcu_nocb_mask) { |
91 | alloc_bootmem_cpumask_var(&rcu_nocb_mask); | 91 | zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL); |
92 | have_rcu_nocb_mask = true; | 92 | have_rcu_nocb_mask = true; |
93 | } | 93 | } |
94 | #ifdef CONFIG_RCU_NOCB_CPU_ZERO | 94 | #ifdef CONFIG_RCU_NOCB_CPU_ZERO |
@@ -1667,7 +1667,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) | |||
1667 | rdtp->last_accelerate = jiffies; | 1667 | rdtp->last_accelerate = jiffies; |
1668 | 1668 | ||
1669 | /* Request timer delay depending on laziness, and round. */ | 1669 | /* Request timer delay depending on laziness, and round. */ |
1670 | if (rdtp->all_lazy) { | 1670 | if (!rdtp->all_lazy) { |
1671 | *dj = round_up(rcu_idle_gp_delay + jiffies, | 1671 | *dj = round_up(rcu_idle_gp_delay + jiffies, |
1672 | rcu_idle_gp_delay) - jiffies; | 1672 | rcu_idle_gp_delay) - jiffies; |
1673 | } else { | 1673 | } else { |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 58453b8272fd..e8b335016c52 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -633,7 +633,19 @@ void wake_up_nohz_cpu(int cpu) | |||
633 | static inline bool got_nohz_idle_kick(void) | 633 | static inline bool got_nohz_idle_kick(void) |
634 | { | 634 | { |
635 | int cpu = smp_processor_id(); | 635 | int cpu = smp_processor_id(); |
636 | return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); | 636 | |
637 | if (!test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu))) | ||
638 | return false; | ||
639 | |||
640 | if (idle_cpu(cpu) && !need_resched()) | ||
641 | return true; | ||
642 | |||
643 | /* | ||
644 | * We can't run Idle Load Balance on this CPU for this time so we | ||
645 | * cancel it and clear NOHZ_BALANCE_KICK | ||
646 | */ | ||
647 | clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)); | ||
648 | return false; | ||
637 | } | 649 | } |
638 | 650 | ||
639 | #else /* CONFIG_NO_HZ_COMMON */ | 651 | #else /* CONFIG_NO_HZ_COMMON */ |
@@ -1393,8 +1405,9 @@ static void sched_ttwu_pending(void) | |||
1393 | 1405 | ||
1394 | void scheduler_ipi(void) | 1406 | void scheduler_ipi(void) |
1395 | { | 1407 | { |
1396 | if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick() | 1408 | if (llist_empty(&this_rq()->wake_list) |
1397 | && !tick_nohz_full_cpu(smp_processor_id())) | 1409 | && !tick_nohz_full_cpu(smp_processor_id()) |
1410 | && !got_nohz_idle_kick()) | ||
1398 | return; | 1411 | return; |
1399 | 1412 | ||
1400 | /* | 1413 | /* |
@@ -1417,7 +1430,7 @@ void scheduler_ipi(void) | |||
1417 | /* | 1430 | /* |
1418 | * Check if someone kicked us for doing the nohz idle load balance. | 1431 | * Check if someone kicked us for doing the nohz idle load balance. |
1419 | */ | 1432 | */ |
1420 | if (unlikely(got_nohz_idle_kick() && !need_resched())) { | 1433 | if (unlikely(got_nohz_idle_kick())) { |
1421 | this_rq()->idle_balance = 1; | 1434 | this_rq()->idle_balance = 1; |
1422 | raise_softirq_irqoff(SCHED_SOFTIRQ); | 1435 | raise_softirq_irqoff(SCHED_SOFTIRQ); |
1423 | } | 1436 | } |
@@ -4745,7 +4758,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
4745 | */ | 4758 | */ |
4746 | idle->sched_class = &idle_sched_class; | 4759 | idle->sched_class = &idle_sched_class; |
4747 | ftrace_graph_init_idle_task(idle, cpu); | 4760 | ftrace_graph_init_idle_task(idle, cpu); |
4748 | vtime_init_idle(idle); | 4761 | vtime_init_idle(idle, cpu); |
4749 | #if defined(CONFIG_SMP) | 4762 | #if defined(CONFIG_SMP) |
4750 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | 4763 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); |
4751 | #endif | 4764 | #endif |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index cc2dc3eea8a3..b5ccba22603b 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -747,17 +747,17 @@ void arch_vtime_task_switch(struct task_struct *prev) | |||
747 | 747 | ||
748 | write_seqlock(¤t->vtime_seqlock); | 748 | write_seqlock(¤t->vtime_seqlock); |
749 | current->vtime_snap_whence = VTIME_SYS; | 749 | current->vtime_snap_whence = VTIME_SYS; |
750 | current->vtime_snap = sched_clock(); | 750 | current->vtime_snap = sched_clock_cpu(smp_processor_id()); |
751 | write_sequnlock(¤t->vtime_seqlock); | 751 | write_sequnlock(¤t->vtime_seqlock); |
752 | } | 752 | } |
753 | 753 | ||
754 | void vtime_init_idle(struct task_struct *t) | 754 | void vtime_init_idle(struct task_struct *t, int cpu) |
755 | { | 755 | { |
756 | unsigned long flags; | 756 | unsigned long flags; |
757 | 757 | ||
758 | write_seqlock_irqsave(&t->vtime_seqlock, flags); | 758 | write_seqlock_irqsave(&t->vtime_seqlock, flags); |
759 | t->vtime_snap_whence = VTIME_SYS; | 759 | t->vtime_snap_whence = VTIME_SYS; |
760 | t->vtime_snap = sched_clock(); | 760 | t->vtime_snap = sched_clock_cpu(cpu); |
761 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); | 761 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); |
762 | } | 762 | } |
763 | 763 | ||
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 2ef90a51ec5e..71bac979d5ee 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
@@ -162,6 +162,39 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
162 | */ | 162 | */ |
163 | 163 | ||
164 | /** | 164 | /** |
165 | * cputimer_running - return true if cputimer is running | ||
166 | * | ||
167 | * @tsk: Pointer to target task. | ||
168 | */ | ||
169 | static inline bool cputimer_running(struct task_struct *tsk) | ||
170 | |||
171 | { | ||
172 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | ||
173 | |||
174 | if (!cputimer->running) | ||
175 | return false; | ||
176 | |||
177 | /* | ||
178 | * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime | ||
179 | * in __exit_signal(), we won't account to the signal struct further | ||
180 | * cputime consumed by that task, even though the task can still be | ||
181 | * ticking after __exit_signal(). | ||
182 | * | ||
183 | * In order to keep a consistent behaviour between thread group cputime | ||
184 | * and thread group cputimer accounting, lets also ignore the cputime | ||
185 | * elapsing after __exit_signal() in any thread group timer running. | ||
186 | * | ||
187 | * This makes sure that POSIX CPU clocks and timers are synchronized, so | ||
188 | * that a POSIX CPU timer won't expire while the corresponding POSIX CPU | ||
189 | * clock delta is behind the expiring timer value. | ||
190 | */ | ||
191 | if (unlikely(!tsk->sighand)) | ||
192 | return false; | ||
193 | |||
194 | return true; | ||
195 | } | ||
196 | |||
197 | /** | ||
165 | * account_group_user_time - Maintain utime for a thread group. | 198 | * account_group_user_time - Maintain utime for a thread group. |
166 | * | 199 | * |
167 | * @tsk: Pointer to task structure. | 200 | * @tsk: Pointer to task structure. |
@@ -176,7 +209,7 @@ static inline void account_group_user_time(struct task_struct *tsk, | |||
176 | { | 209 | { |
177 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 210 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
178 | 211 | ||
179 | if (!cputimer->running) | 212 | if (!cputimer_running(tsk)) |
180 | return; | 213 | return; |
181 | 214 | ||
182 | raw_spin_lock(&cputimer->lock); | 215 | raw_spin_lock(&cputimer->lock); |
@@ -199,7 +232,7 @@ static inline void account_group_system_time(struct task_struct *tsk, | |||
199 | { | 232 | { |
200 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 233 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
201 | 234 | ||
202 | if (!cputimer->running) | 235 | if (!cputimer_running(tsk)) |
203 | return; | 236 | return; |
204 | 237 | ||
205 | raw_spin_lock(&cputimer->lock); | 238 | raw_spin_lock(&cputimer->lock); |
@@ -222,7 +255,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk, | |||
222 | { | 255 | { |
223 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; | 256 | struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; |
224 | 257 | ||
225 | if (!cputimer->running) | 258 | if (!cputimer_running(tsk)) |
226 | return; | 259 | return; |
227 | 260 | ||
228 | raw_spin_lock(&cputimer->lock); | 261 | raw_spin_lock(&cputimer->lock); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index b5197dcb0dad..3d6833f125d3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -195,8 +195,12 @@ void local_bh_enable_ip(unsigned long ip) | |||
195 | EXPORT_SYMBOL(local_bh_enable_ip); | 195 | EXPORT_SYMBOL(local_bh_enable_ip); |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * We restart softirq processing for at most 2 ms, | 198 | * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, |
199 | * and if need_resched() is not set. | 199 | * but break the loop if need_resched() is set or after 2 ms. |
200 | * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in | ||
201 | * certain cases, such as stop_machine(), jiffies may cease to | ||
202 | * increment and so we need the MAX_SOFTIRQ_RESTART limit as | ||
203 | * well to make sure we eventually return from this method. | ||
200 | * | 204 | * |
201 | * These limits have been established via experimentation. | 205 | * These limits have been established via experimentation. |
202 | * The two things to balance is latency against fairness - | 206 | * The two things to balance is latency against fairness - |
@@ -204,6 +208,7 @@ EXPORT_SYMBOL(local_bh_enable_ip); | |||
204 | * should not be able to lock up the box. | 208 | * should not be able to lock up the box. |
205 | */ | 209 | */ |
206 | #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) | 210 | #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) |
211 | #define MAX_SOFTIRQ_RESTART 10 | ||
207 | 212 | ||
208 | asmlinkage void __do_softirq(void) | 213 | asmlinkage void __do_softirq(void) |
209 | { | 214 | { |
@@ -212,6 +217,7 @@ asmlinkage void __do_softirq(void) | |||
212 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | 217 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
213 | int cpu; | 218 | int cpu; |
214 | unsigned long old_flags = current->flags; | 219 | unsigned long old_flags = current->flags; |
220 | int max_restart = MAX_SOFTIRQ_RESTART; | ||
215 | 221 | ||
216 | /* | 222 | /* |
217 | * Mask out PF_MEMALLOC s current task context is borrowed for the | 223 | * Mask out PF_MEMALLOC s current task context is borrowed for the |
@@ -265,7 +271,8 @@ restart: | |||
265 | 271 | ||
266 | pending = local_softirq_pending(); | 272 | pending = local_softirq_pending(); |
267 | if (pending) { | 273 | if (pending) { |
268 | if (time_before(jiffies, end) && !need_resched()) | 274 | if (time_before(jiffies, end) && !need_resched() && |
275 | --max_restart) | ||
269 | goto restart; | 276 | goto restart; |
270 | 277 | ||
271 | wakeup_softirqd(); | 278 | wakeup_softirqd(); |
diff --git a/kernel/sys.c b/kernel/sys.c index b95d3c72ba21..2bbd9a73b54c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -362,6 +362,29 @@ int unregister_reboot_notifier(struct notifier_block *nb) | |||
362 | } | 362 | } |
363 | EXPORT_SYMBOL(unregister_reboot_notifier); | 363 | EXPORT_SYMBOL(unregister_reboot_notifier); |
364 | 364 | ||
365 | /* Add backwards compatibility for stable trees. */ | ||
366 | #ifndef PF_NO_SETAFFINITY | ||
367 | #define PF_NO_SETAFFINITY PF_THREAD_BOUND | ||
368 | #endif | ||
369 | |||
370 | static void migrate_to_reboot_cpu(void) | ||
371 | { | ||
372 | /* The boot cpu is always logical cpu 0 */ | ||
373 | int cpu = 0; | ||
374 | |||
375 | cpu_hotplug_disable(); | ||
376 | |||
377 | /* Make certain the cpu I'm about to reboot on is online */ | ||
378 | if (!cpu_online(cpu)) | ||
379 | cpu = cpumask_first(cpu_online_mask); | ||
380 | |||
381 | /* Prevent races with other tasks migrating this task */ | ||
382 | current->flags |= PF_NO_SETAFFINITY; | ||
383 | |||
384 | /* Make certain I only run on the appropriate processor */ | ||
385 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
386 | } | ||
387 | |||
365 | /** | 388 | /** |
366 | * kernel_restart - reboot the system | 389 | * kernel_restart - reboot the system |
367 | * @cmd: pointer to buffer containing command to execute for restart | 390 | * @cmd: pointer to buffer containing command to execute for restart |
@@ -373,7 +396,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); | |||
373 | void kernel_restart(char *cmd) | 396 | void kernel_restart(char *cmd) |
374 | { | 397 | { |
375 | kernel_restart_prepare(cmd); | 398 | kernel_restart_prepare(cmd); |
376 | disable_nonboot_cpus(); | 399 | migrate_to_reboot_cpu(); |
377 | syscore_shutdown(); | 400 | syscore_shutdown(); |
378 | if (!cmd) | 401 | if (!cmd) |
379 | printk(KERN_EMERG "Restarting system.\n"); | 402 | printk(KERN_EMERG "Restarting system.\n"); |
@@ -400,7 +423,7 @@ static void kernel_shutdown_prepare(enum system_states state) | |||
400 | void kernel_halt(void) | 423 | void kernel_halt(void) |
401 | { | 424 | { |
402 | kernel_shutdown_prepare(SYSTEM_HALT); | 425 | kernel_shutdown_prepare(SYSTEM_HALT); |
403 | disable_nonboot_cpus(); | 426 | migrate_to_reboot_cpu(); |
404 | syscore_shutdown(); | 427 | syscore_shutdown(); |
405 | printk(KERN_EMERG "System halted.\n"); | 428 | printk(KERN_EMERG "System halted.\n"); |
406 | kmsg_dump(KMSG_DUMP_HALT); | 429 | kmsg_dump(KMSG_DUMP_HALT); |
@@ -419,7 +442,7 @@ void kernel_power_off(void) | |||
419 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | 442 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); |
420 | if (pm_power_off_prepare) | 443 | if (pm_power_off_prepare) |
421 | pm_power_off_prepare(); | 444 | pm_power_off_prepare(); |
422 | disable_nonboot_cpus(); | 445 | migrate_to_reboot_cpu(); |
423 | syscore_shutdown(); | 446 | syscore_shutdown(); |
424 | printk(KERN_EMERG "Power down.\n"); | 447 | printk(KERN_EMERG "Power down.\n"); |
425 | kmsg_dump(KMSG_DUMP_POWEROFF); | 448 | kmsg_dump(KMSG_DUMP_POWEROFF); |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 12ff13a838c6..8f5b3b98577b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -874,7 +874,6 @@ static void hardpps_update_phase(long error) | |||
874 | void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | 874 | void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) |
875 | { | 875 | { |
876 | struct pps_normtime pts_norm, freq_norm; | 876 | struct pps_normtime pts_norm, freq_norm; |
877 | unsigned long flags; | ||
878 | 877 | ||
879 | pts_norm = pps_normalize_ts(*phase_ts); | 878 | pts_norm = pps_normalize_ts(*phase_ts); |
880 | 879 | ||
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 4430fa695b48..6d3f91631de6 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -583,6 +583,12 @@ again: | |||
583 | } | 583 | } |
584 | } | 584 | } |
585 | 585 | ||
586 | /* | ||
587 | * Remove the current cpu from the pending mask. The event is | ||
588 | * delivered immediately in tick_do_broadcast() ! | ||
589 | */ | ||
590 | cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); | ||
591 | |||
586 | /* Take care of enforced broadcast requests */ | 592 | /* Take care of enforced broadcast requests */ |
587 | cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); | 593 | cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); |
588 | cpumask_clear(tick_broadcast_force_mask); | 594 | cpumask_clear(tick_broadcast_force_mask); |
@@ -654,8 +660,8 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
654 | 660 | ||
655 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 661 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
656 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | 662 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { |
657 | WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); | ||
658 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { | 663 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { |
664 | WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); | ||
659 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); | 665 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); |
660 | /* | 666 | /* |
661 | * We only reprogram the broadcast timer if we | 667 | * We only reprogram the broadcast timer if we |
@@ -672,8 +678,6 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
672 | } else { | 678 | } else { |
673 | if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { | 679 | if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { |
674 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); | 680 | clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); |
675 | if (dev->next_event.tv64 == KTIME_MAX) | ||
676 | goto out; | ||
677 | /* | 681 | /* |
678 | * The cpu which was handling the broadcast | 682 | * The cpu which was handling the broadcast |
679 | * timer marked this cpu in the broadcast | 683 | * timer marked this cpu in the broadcast |
@@ -688,6 +692,11 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
688 | goto out; | 692 | goto out; |
689 | 693 | ||
690 | /* | 694 | /* |
695 | * Bail out if there is no next event. | ||
696 | */ | ||
697 | if (dev->next_event.tv64 == KTIME_MAX) | ||
698 | goto out; | ||
699 | /* | ||
691 | * If the pending bit is not set, then we are | 700 | * If the pending bit is not set, then we are |
692 | * either the CPU handling the broadcast | 701 | * either the CPU handling the broadcast |
693 | * interrupt or we got woken by something else. | 702 | * interrupt or we got woken by something else. |
@@ -771,10 +780,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | |||
771 | 780 | ||
772 | bc->event_handler = tick_handle_oneshot_broadcast; | 781 | bc->event_handler = tick_handle_oneshot_broadcast; |
773 | 782 | ||
774 | /* Take the do_timer update */ | ||
775 | if (!tick_nohz_full_cpu(cpu)) | ||
776 | tick_do_timer_cpu = cpu; | ||
777 | |||
778 | /* | 783 | /* |
779 | * We must be careful here. There might be other CPUs | 784 | * We must be careful here. There might be other CPUs |
780 | * waiting for periodic broadcast. We need to set the | 785 | * waiting for periodic broadcast. We need to set the |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f4208138fbf4..0cf1c1453181 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -306,7 +306,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | |||
306 | * we can't safely shutdown that CPU. | 306 | * we can't safely shutdown that CPU. |
307 | */ | 307 | */ |
308 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | 308 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) |
309 | return -EINVAL; | 309 | return NOTIFY_BAD; |
310 | break; | 310 | break; |
311 | } | 311 | } |
312 | return NOTIFY_OK; | 312 | return NOTIFY_OK; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 846d0a1f235e..48b9fffabdc2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -991,6 +991,14 @@ static int timekeeping_suspend(void) | |||
991 | 991 | ||
992 | read_persistent_clock(&timekeeping_suspend_time); | 992 | read_persistent_clock(&timekeeping_suspend_time); |
993 | 993 | ||
994 | /* | ||
995 | * On some systems the persistent_clock can not be detected at | ||
996 | * timekeeping_init by its return value, so if we see a valid | ||
997 | * value returned, update the persistent_clock_exists flag. | ||
998 | */ | ||
999 | if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) | ||
1000 | persistent_clock_exist = true; | ||
1001 | |||
994 | raw_spin_lock_irqsave(&timekeeper_lock, flags); | 1002 | raw_spin_lock_irqsave(&timekeeper_lock, flags); |
995 | write_seqcount_begin(&timekeeper_seq); | 1003 | write_seqcount_begin(&timekeeper_seq); |
996 | timekeeping_forward_now(tk); | 1004 | timekeeping_forward_now(tk); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b549b0f5b977..6c508ff33c62 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -120,22 +120,22 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); | |||
120 | 120 | ||
121 | /* | 121 | /* |
122 | * Traverse the ftrace_global_list, invoking all entries. The reason that we | 122 | * Traverse the ftrace_global_list, invoking all entries. The reason that we |
123 | * can use rcu_dereference_raw() is that elements removed from this list | 123 | * can use rcu_dereference_raw_notrace() is that elements removed from this list |
124 | * are simply leaked, so there is no need to interact with a grace-period | 124 | * are simply leaked, so there is no need to interact with a grace-period |
125 | * mechanism. The rcu_dereference_raw() calls are needed to handle | 125 | * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle |
126 | * concurrent insertions into the ftrace_global_list. | 126 | * concurrent insertions into the ftrace_global_list. |
127 | * | 127 | * |
128 | * Silly Alpha and silly pointer-speculation compiler optimizations! | 128 | * Silly Alpha and silly pointer-speculation compiler optimizations! |
129 | */ | 129 | */ |
130 | #define do_for_each_ftrace_op(op, list) \ | 130 | #define do_for_each_ftrace_op(op, list) \ |
131 | op = rcu_dereference_raw(list); \ | 131 | op = rcu_dereference_raw_notrace(list); \ |
132 | do | 132 | do |
133 | 133 | ||
134 | /* | 134 | /* |
135 | * Optimized for just a single item in the list (as that is the normal case). | 135 | * Optimized for just a single item in the list (as that is the normal case). |
136 | */ | 136 | */ |
137 | #define while_for_each_ftrace_op(op) \ | 137 | #define while_for_each_ftrace_op(op) \ |
138 | while (likely(op = rcu_dereference_raw((op)->next)) && \ | 138 | while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \ |
139 | unlikely((op) != &ftrace_list_end)) | 139 | unlikely((op) != &ftrace_list_end)) |
140 | 140 | ||
141 | static inline void ftrace_ops_init(struct ftrace_ops *ops) | 141 | static inline void ftrace_ops_init(struct ftrace_ops *ops) |
@@ -779,7 +779,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) | |||
779 | if (hlist_empty(hhd)) | 779 | if (hlist_empty(hhd)) |
780 | return NULL; | 780 | return NULL; |
781 | 781 | ||
782 | hlist_for_each_entry_rcu(rec, hhd, node) { | 782 | hlist_for_each_entry_rcu_notrace(rec, hhd, node) { |
783 | if (rec->ip == ip) | 783 | if (rec->ip == ip) |
784 | return rec; | 784 | return rec; |
785 | } | 785 | } |
@@ -1165,7 +1165,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | |||
1165 | 1165 | ||
1166 | hhd = &hash->buckets[key]; | 1166 | hhd = &hash->buckets[key]; |
1167 | 1167 | ||
1168 | hlist_for_each_entry_rcu(entry, hhd, hlist) { | 1168 | hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) { |
1169 | if (entry->ip == ip) | 1169 | if (entry->ip == ip) |
1170 | return entry; | 1170 | return entry; |
1171 | } | 1171 | } |
@@ -1422,8 +1422,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
1422 | struct ftrace_hash *notrace_hash; | 1422 | struct ftrace_hash *notrace_hash; |
1423 | int ret; | 1423 | int ret; |
1424 | 1424 | ||
1425 | filter_hash = rcu_dereference_raw(ops->filter_hash); | 1425 | filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); |
1426 | notrace_hash = rcu_dereference_raw(ops->notrace_hash); | 1426 | notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); |
1427 | 1427 | ||
1428 | if ((ftrace_hash_empty(filter_hash) || | 1428 | if ((ftrace_hash_empty(filter_hash) || |
1429 | ftrace_lookup_ip(filter_hash, ip)) && | 1429 | ftrace_lookup_ip(filter_hash, ip)) && |
@@ -2920,7 +2920,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, | |||
2920 | * on the hash. rcu_read_lock is too dangerous here. | 2920 | * on the hash. rcu_read_lock is too dangerous here. |
2921 | */ | 2921 | */ |
2922 | preempt_disable_notrace(); | 2922 | preempt_disable_notrace(); |
2923 | hlist_for_each_entry_rcu(entry, hhd, node) { | 2923 | hlist_for_each_entry_rcu_notrace(entry, hhd, node) { |
2924 | if (entry->ip == ip) | 2924 | if (entry->ip == ip) |
2925 | entry->ops->func(ip, parent_ip, &entry->data); | 2925 | entry->ops->func(ip, parent_ip, &entry->data); |
2926 | } | 2926 | } |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b59aea2c48c2..e444ff88f0a4 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -620,6 +620,9 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | |||
620 | if (cpu == RING_BUFFER_ALL_CPUS) | 620 | if (cpu == RING_BUFFER_ALL_CPUS) |
621 | work = &buffer->irq_work; | 621 | work = &buffer->irq_work; |
622 | else { | 622 | else { |
623 | if (!cpumask_test_cpu(cpu, buffer->cpumask)) | ||
624 | return -EINVAL; | ||
625 | |||
623 | cpu_buffer = buffer->buffers[cpu]; | 626 | cpu_buffer = buffer->buffers[cpu]; |
624 | work = &cpu_buffer->irq_work; | 627 | work = &cpu_buffer->irq_work; |
625 | } | 628 | } |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ae6fa2d1cdf7..e71a8be4a6ee 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -652,8 +652,6 @@ static struct { | |||
652 | ARCH_TRACE_CLOCKS | 652 | ARCH_TRACE_CLOCKS |
653 | }; | 653 | }; |
654 | 654 | ||
655 | int trace_clock_id; | ||
656 | |||
657 | /* | 655 | /* |
658 | * trace_parser_get_init - gets the buffer for trace parser | 656 | * trace_parser_get_init - gets the buffer for trace parser |
659 | */ | 657 | */ |
@@ -843,7 +841,15 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
843 | 841 | ||
844 | memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); | 842 | memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); |
845 | max_data->pid = tsk->pid; | 843 | max_data->pid = tsk->pid; |
846 | max_data->uid = task_uid(tsk); | 844 | /* |
845 | * If tsk == current, then use current_uid(), as that does not use | ||
846 | * RCU. The irq tracer can be called out of RCU scope. | ||
847 | */ | ||
848 | if (tsk == current) | ||
849 | max_data->uid = current_uid(); | ||
850 | else | ||
851 | max_data->uid = task_uid(tsk); | ||
852 | |||
847 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; | 853 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; |
848 | max_data->policy = tsk->policy; | 854 | max_data->policy = tsk->policy; |
849 | max_data->rt_priority = tsk->rt_priority; | 855 | max_data->rt_priority = tsk->rt_priority; |
@@ -2818,7 +2824,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) | |||
2818 | iter->iter_flags |= TRACE_FILE_ANNOTATE; | 2824 | iter->iter_flags |= TRACE_FILE_ANNOTATE; |
2819 | 2825 | ||
2820 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ | 2826 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ |
2821 | if (trace_clocks[trace_clock_id].in_ns) | 2827 | if (trace_clocks[tr->clock_id].in_ns) |
2822 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; | 2828 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; |
2823 | 2829 | ||
2824 | /* stop the trace while dumping if we are not opening "snapshot" */ | 2830 | /* stop the trace while dumping if we are not opening "snapshot" */ |
@@ -3817,7 +3823,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
3817 | iter->iter_flags |= TRACE_FILE_LAT_FMT; | 3823 | iter->iter_flags |= TRACE_FILE_LAT_FMT; |
3818 | 3824 | ||
3819 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ | 3825 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ |
3820 | if (trace_clocks[trace_clock_id].in_ns) | 3826 | if (trace_clocks[tr->clock_id].in_ns) |
3821 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; | 3827 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; |
3822 | 3828 | ||
3823 | iter->cpu_file = tc->cpu; | 3829 | iter->cpu_file = tc->cpu; |
@@ -5087,7 +5093,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
5087 | cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); | 5093 | cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); |
5088 | trace_seq_printf(s, "bytes: %ld\n", cnt); | 5094 | trace_seq_printf(s, "bytes: %ld\n", cnt); |
5089 | 5095 | ||
5090 | if (trace_clocks[trace_clock_id].in_ns) { | 5096 | if (trace_clocks[tr->clock_id].in_ns) { |
5091 | /* local or global for trace_clock */ | 5097 | /* local or global for trace_clock */ |
5092 | t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); | 5098 | t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); |
5093 | usec_rem = do_div(t, USEC_PER_SEC); | 5099 | usec_rem = do_div(t, USEC_PER_SEC); |
@@ -6216,10 +6222,15 @@ __init static int tracer_alloc_buffers(void) | |||
6216 | 6222 | ||
6217 | trace_init_cmdlines(); | 6223 | trace_init_cmdlines(); |
6218 | 6224 | ||
6219 | register_tracer(&nop_trace); | 6225 | /* |
6220 | 6226 | * register_tracer() might reference current_trace, so it | |
6227 | * needs to be set before we register anything. This is | ||
6228 | * just a bootstrap of current_trace anyway. | ||
6229 | */ | ||
6221 | global_trace.current_trace = &nop_trace; | 6230 | global_trace.current_trace = &nop_trace; |
6222 | 6231 | ||
6232 | register_tracer(&nop_trace); | ||
6233 | |||
6223 | /* All seems OK, enable tracing */ | 6234 | /* All seems OK, enable tracing */ |
6224 | tracing_disabled = 0; | 6235 | tracing_disabled = 0; |
6225 | 6236 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 711ca7d3e7f1..20572ed88c5c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -700,8 +700,6 @@ enum print_line_t print_trace_line(struct trace_iterator *iter); | |||
700 | 700 | ||
701 | extern unsigned long trace_flags; | 701 | extern unsigned long trace_flags; |
702 | 702 | ||
703 | extern int trace_clock_id; | ||
704 | |||
705 | /* Standard output formatting function used for function return traces */ | 703 | /* Standard output formatting function used for function return traces */ |
706 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 704 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
707 | 705 | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7a0cf68027cc..27963e2bf4bf 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -2072,8 +2072,10 @@ event_enable_func(struct ftrace_hash *hash, | |||
2072 | out_reg: | 2072 | out_reg: |
2073 | /* Don't let event modules unload while probe registered */ | 2073 | /* Don't let event modules unload while probe registered */ |
2074 | ret = try_module_get(file->event_call->mod); | 2074 | ret = try_module_get(file->event_call->mod); |
2075 | if (!ret) | 2075 | if (!ret) { |
2076 | ret = -EBUSY; | ||
2076 | goto out_free; | 2077 | goto out_free; |
2078 | } | ||
2077 | 2079 | ||
2078 | ret = __ftrace_event_enable_disable(file, 1, 1); | 2080 | ret = __ftrace_event_enable_disable(file, 1, 1); |
2079 | if (ret < 0) | 2081 | if (ret < 0) |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 55e2cf66967b..2901e3b88590 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -1159,7 +1159,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) | |||
1159 | /* stop the tracing. */ | 1159 | /* stop the tracing. */ |
1160 | tracing_stop(); | 1160 | tracing_stop(); |
1161 | /* check the trace buffer */ | 1161 | /* check the trace buffer */ |
1162 | ret = trace_test_buffer(tr, &count); | 1162 | ret = trace_test_buffer(&tr->trace_buffer, &count); |
1163 | trace->reset(tr); | 1163 | trace->reset(tr); |
1164 | tracing_start(); | 1164 | tracing_start(); |
1165 | 1165 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1ae602809efb..ee8e29a2320c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -296,7 +296,7 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER); | |||
296 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; | 296 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; |
297 | 297 | ||
298 | struct workqueue_struct *system_wq __read_mostly; | 298 | struct workqueue_struct *system_wq __read_mostly; |
299 | EXPORT_SYMBOL_GPL(system_wq); | 299 | EXPORT_SYMBOL(system_wq); |
300 | struct workqueue_struct *system_highpri_wq __read_mostly; | 300 | struct workqueue_struct *system_highpri_wq __read_mostly; |
301 | EXPORT_SYMBOL_GPL(system_highpri_wq); | 301 | EXPORT_SYMBOL_GPL(system_highpri_wq); |
302 | struct workqueue_struct *system_long_wq __read_mostly; | 302 | struct workqueue_struct *system_long_wq __read_mostly; |
@@ -1411,7 +1411,7 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, | |||
1411 | local_irq_restore(flags); | 1411 | local_irq_restore(flags); |
1412 | return ret; | 1412 | return ret; |
1413 | } | 1413 | } |
1414 | EXPORT_SYMBOL_GPL(queue_work_on); | 1414 | EXPORT_SYMBOL(queue_work_on); |
1415 | 1415 | ||
1416 | void delayed_work_timer_fn(unsigned long __data) | 1416 | void delayed_work_timer_fn(unsigned long __data) |
1417 | { | 1417 | { |
@@ -1485,7 +1485,7 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
1485 | local_irq_restore(flags); | 1485 | local_irq_restore(flags); |
1486 | return ret; | 1486 | return ret; |
1487 | } | 1487 | } |
1488 | EXPORT_SYMBOL_GPL(queue_delayed_work_on); | 1488 | EXPORT_SYMBOL(queue_delayed_work_on); |
1489 | 1489 | ||
1490 | /** | 1490 | /** |
1491 | * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU | 1491 | * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU |
@@ -2059,6 +2059,7 @@ static bool manage_workers(struct worker *worker) | |||
2059 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { | 2059 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { |
2060 | spin_unlock_irq(&pool->lock); | 2060 | spin_unlock_irq(&pool->lock); |
2061 | mutex_lock(&pool->manager_mutex); | 2061 | mutex_lock(&pool->manager_mutex); |
2062 | spin_lock_irq(&pool->lock); | ||
2062 | ret = true; | 2063 | ret = true; |
2063 | } | 2064 | } |
2064 | 2065 | ||
@@ -4904,7 +4905,8 @@ static void __init wq_numa_init(void) | |||
4904 | BUG_ON(!tbl); | 4905 | BUG_ON(!tbl); |
4905 | 4906 | ||
4906 | for_each_node(node) | 4907 | for_each_node(node) |
4907 | BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, node)); | 4908 | BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, |
4909 | node_online(node) ? node : NUMA_NO_NODE)); | ||
4908 | 4910 | ||
4909 | for_each_possible_cpu(cpu) { | 4911 | for_each_possible_cpu(cpu) { |
4910 | node = cpu_to_node(cpu); | 4912 | node = cpu_to_node(cpu); |