diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 2 | ||||
-rw-r--r-- | kernel/audit_tree.c | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 31 | ||||
-rw-r--r-- | kernel/context_tracking.c | 1 | ||||
-rw-r--r-- | kernel/cpu.c | 55 | ||||
-rw-r--r-- | kernel/cpu/idle.c | 17 | ||||
-rw-r--r-- | kernel/events/core.c | 233 | ||||
-rw-r--r-- | kernel/events/internal.h | 4 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 9 | ||||
-rw-r--r-- | kernel/kprobes.c | 30 | ||||
-rw-r--r-- | kernel/printk.c | 91 | ||||
-rw-r--r-- | kernel/range.c | 8 | ||||
-rw-r--r-- | kernel/rcutree.c | 21 | ||||
-rw-r--r-- | kernel/rcutree.h | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 6 | ||||
-rw-r--r-- | kernel/softirq.c | 13 | ||||
-rw-r--r-- | kernel/sys.c | 29 | ||||
-rw-r--r-- | kernel/time/ntp.c | 1 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 12 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 8 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 18 | ||||
-rw-r--r-- | kernel/trace/trace.c | 18 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 2 |
27 files changed, 403 insertions, 217 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 21c7fa615bd3..91e53d04b6a9 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1056,7 +1056,7 @@ static inline void audit_get_stamp(struct audit_context *ctx, | |||
1056 | static void wait_for_auditd(unsigned long sleep_time) | 1056 | static void wait_for_auditd(unsigned long sleep_time) |
1057 | { | 1057 | { |
1058 | DECLARE_WAITQUEUE(wait, current); | 1058 | DECLARE_WAITQUEUE(wait, current); |
1059 | set_current_state(TASK_INTERRUPTIBLE); | 1059 | set_current_state(TASK_UNINTERRUPTIBLE); |
1060 | add_wait_queue(&audit_backlog_wait, &wait); | 1060 | add_wait_queue(&audit_backlog_wait, &wait); |
1061 | 1061 | ||
1062 | if (audit_backlog_limit && | 1062 | if (audit_backlog_limit && |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index a291aa23fb3f..43c307dc9453 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -658,6 +658,7 @@ int audit_add_tree_rule(struct audit_krule *rule) | |||
658 | struct vfsmount *mnt; | 658 | struct vfsmount *mnt; |
659 | int err; | 659 | int err; |
660 | 660 | ||
661 | rule->tree = NULL; | ||
661 | list_for_each_entry(tree, &tree_list, list) { | 662 | list_for_each_entry(tree, &tree_list, list) { |
662 | if (!strcmp(seed->pathname, tree->pathname)) { | 663 | if (!strcmp(seed->pathname, tree->pathname)) { |
663 | put_tree(seed); | 664 | put_tree(seed); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2a9926275f80..a7c9e6ddb979 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1686,11 +1686,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1686 | */ | 1686 | */ |
1687 | cgroup_drop_root(opts.new_root); | 1687 | cgroup_drop_root(opts.new_root); |
1688 | 1688 | ||
1689 | if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) && | 1689 | if (root->flags != opts.flags) { |
1690 | root->flags != opts.flags) { | 1690 | if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { |
1691 | pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); | 1691 | pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); |
1692 | ret = -EINVAL; | 1692 | ret = -EINVAL; |
1693 | goto drop_new_super; | 1693 | goto drop_new_super; |
1694 | } else { | ||
1695 | pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n"); | ||
1696 | } | ||
1694 | } | 1697 | } |
1695 | 1698 | ||
1696 | /* no subsys rebinding, so refcounts don't change */ | 1699 | /* no subsys rebinding, so refcounts don't change */ |
@@ -2699,13 +2702,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2699 | goto out; | 2702 | goto out; |
2700 | } | 2703 | } |
2701 | 2704 | ||
2705 | cfe->type = (void *)cft; | ||
2706 | cfe->dentry = dentry; | ||
2707 | dentry->d_fsdata = cfe; | ||
2708 | simple_xattrs_init(&cfe->xattrs); | ||
2709 | |||
2702 | mode = cgroup_file_mode(cft); | 2710 | mode = cgroup_file_mode(cft); |
2703 | error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); | 2711 | error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); |
2704 | if (!error) { | 2712 | if (!error) { |
2705 | cfe->type = (void *)cft; | ||
2706 | cfe->dentry = dentry; | ||
2707 | dentry->d_fsdata = cfe; | ||
2708 | simple_xattrs_init(&cfe->xattrs); | ||
2709 | list_add_tail(&cfe->node, &parent->files); | 2713 | list_add_tail(&cfe->node, &parent->files); |
2710 | cfe = NULL; | 2714 | cfe = NULL; |
2711 | } | 2715 | } |
@@ -2953,11 +2957,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
2953 | WARN_ON_ONCE(!rcu_read_lock_held()); | 2957 | WARN_ON_ONCE(!rcu_read_lock_held()); |
2954 | 2958 | ||
2955 | /* if first iteration, pretend we just visited @cgroup */ | 2959 | /* if first iteration, pretend we just visited @cgroup */ |
2956 | if (!pos) { | 2960 | if (!pos) |
2957 | if (list_empty(&cgroup->children)) | ||
2958 | return NULL; | ||
2959 | pos = cgroup; | 2961 | pos = cgroup; |
2960 | } | ||
2961 | 2962 | ||
2962 | /* visit the first child if exists */ | 2963 | /* visit the first child if exists */ |
2963 | next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); | 2964 | next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling); |
@@ -2965,14 +2966,14 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
2965 | return next; | 2966 | return next; |
2966 | 2967 | ||
2967 | /* no child, visit my or the closest ancestor's next sibling */ | 2968 | /* no child, visit my or the closest ancestor's next sibling */ |
2968 | do { | 2969 | while (pos != cgroup) { |
2969 | next = list_entry_rcu(pos->sibling.next, struct cgroup, | 2970 | next = list_entry_rcu(pos->sibling.next, struct cgroup, |
2970 | sibling); | 2971 | sibling); |
2971 | if (&next->sibling != &pos->parent->children) | 2972 | if (&next->sibling != &pos->parent->children) |
2972 | return next; | 2973 | return next; |
2973 | 2974 | ||
2974 | pos = pos->parent; | 2975 | pos = pos->parent; |
2975 | } while (pos != cgroup); | 2976 | } |
2976 | 2977 | ||
2977 | return NULL; | 2978 | return NULL; |
2978 | } | 2979 | } |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 66677003e223..383f8231e436 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -15,7 +15,6 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/context_tracking.h> | 17 | #include <linux/context_tracking.h> |
18 | #include <linux/kvm_host.h> | ||
19 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
20 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
21 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
diff --git a/kernel/cpu.c b/kernel/cpu.c index b5e4ab2d427e..198a38883e64 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -133,6 +133,27 @@ static void cpu_hotplug_done(void) | |||
133 | mutex_unlock(&cpu_hotplug.lock); | 133 | mutex_unlock(&cpu_hotplug.lock); |
134 | } | 134 | } |
135 | 135 | ||
136 | /* | ||
137 | * Wait for currently running CPU hotplug operations to complete (if any) and | ||
138 | * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects | ||
139 | * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the | ||
140 | * hotplug path before performing hotplug operations. So acquiring that lock | ||
141 | * guarantees mutual exclusion from any currently running hotplug operations. | ||
142 | */ | ||
143 | void cpu_hotplug_disable(void) | ||
144 | { | ||
145 | cpu_maps_update_begin(); | ||
146 | cpu_hotplug_disabled = 1; | ||
147 | cpu_maps_update_done(); | ||
148 | } | ||
149 | |||
150 | void cpu_hotplug_enable(void) | ||
151 | { | ||
152 | cpu_maps_update_begin(); | ||
153 | cpu_hotplug_disabled = 0; | ||
154 | cpu_maps_update_done(); | ||
155 | } | ||
156 | |||
136 | #else /* #if CONFIG_HOTPLUG_CPU */ | 157 | #else /* #if CONFIG_HOTPLUG_CPU */ |
137 | static void cpu_hotplug_begin(void) {} | 158 | static void cpu_hotplug_begin(void) {} |
138 | static void cpu_hotplug_done(void) {} | 159 | static void cpu_hotplug_done(void) {} |
@@ -541,36 +562,6 @@ static int __init alloc_frozen_cpus(void) | |||
541 | core_initcall(alloc_frozen_cpus); | 562 | core_initcall(alloc_frozen_cpus); |
542 | 563 | ||
543 | /* | 564 | /* |
544 | * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU | ||
545 | * hotplug when tasks are about to be frozen. Also, don't allow the freezer | ||
546 | * to continue until any currently running CPU hotplug operation gets | ||
547 | * completed. | ||
548 | * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the | ||
549 | * 'cpu_add_remove_lock'. And this same lock is also taken by the regular | ||
550 | * CPU hotplug path and released only after it is complete. Thus, we | ||
551 | * (and hence the freezer) will block here until any currently running CPU | ||
552 | * hotplug operation gets completed. | ||
553 | */ | ||
554 | void cpu_hotplug_disable_before_freeze(void) | ||
555 | { | ||
556 | cpu_maps_update_begin(); | ||
557 | cpu_hotplug_disabled = 1; | ||
558 | cpu_maps_update_done(); | ||
559 | } | ||
560 | |||
561 | |||
562 | /* | ||
563 | * When tasks have been thawed, re-enable regular CPU hotplug (which had been | ||
564 | * disabled while beginning to freeze tasks). | ||
565 | */ | ||
566 | void cpu_hotplug_enable_after_thaw(void) | ||
567 | { | ||
568 | cpu_maps_update_begin(); | ||
569 | cpu_hotplug_disabled = 0; | ||
570 | cpu_maps_update_done(); | ||
571 | } | ||
572 | |||
573 | /* | ||
574 | * When callbacks for CPU hotplug notifications are being executed, we must | 565 | * When callbacks for CPU hotplug notifications are being executed, we must |
575 | * ensure that the state of the system with respect to the tasks being frozen | 566 | * ensure that the state of the system with respect to the tasks being frozen |
576 | * or not, as reported by the notification, remains unchanged *throughout the | 567 | * or not, as reported by the notification, remains unchanged *throughout the |
@@ -589,12 +580,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, | |||
589 | 580 | ||
590 | case PM_SUSPEND_PREPARE: | 581 | case PM_SUSPEND_PREPARE: |
591 | case PM_HIBERNATION_PREPARE: | 582 | case PM_HIBERNATION_PREPARE: |
592 | cpu_hotplug_disable_before_freeze(); | 583 | cpu_hotplug_disable(); |
593 | break; | 584 | break; |
594 | 585 | ||
595 | case PM_POST_SUSPEND: | 586 | case PM_POST_SUSPEND: |
596 | case PM_POST_HIBERNATION: | 587 | case PM_POST_HIBERNATION: |
597 | cpu_hotplug_enable_after_thaw(); | 588 | cpu_hotplug_enable(); |
598 | break; | 589 | break; |
599 | 590 | ||
600 | default: | 591 | default: |
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c index d5585f5e038e..e695c0a0bcb5 100644 --- a/kernel/cpu/idle.c +++ b/kernel/cpu/idle.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/cpu.h> | 5 | #include <linux/cpu.h> |
6 | #include <linux/tick.h> | 6 | #include <linux/tick.h> |
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include <linux/stackprotector.h> | ||
8 | 9 | ||
9 | #include <asm/tlb.h> | 10 | #include <asm/tlb.h> |
10 | 11 | ||
@@ -58,6 +59,7 @@ void __weak arch_cpu_idle_dead(void) { } | |||
58 | void __weak arch_cpu_idle(void) | 59 | void __weak arch_cpu_idle(void) |
59 | { | 60 | { |
60 | cpu_idle_force_poll = 1; | 61 | cpu_idle_force_poll = 1; |
62 | local_irq_enable(); | ||
61 | } | 63 | } |
62 | 64 | ||
63 | /* | 65 | /* |
@@ -112,6 +114,21 @@ static void cpu_idle_loop(void) | |||
112 | 114 | ||
113 | void cpu_startup_entry(enum cpuhp_state state) | 115 | void cpu_startup_entry(enum cpuhp_state state) |
114 | { | 116 | { |
117 | /* | ||
118 | * This #ifdef needs to die, but it's too late in the cycle to | ||
119 | * make this generic (arm and sh have never invoked the canary | ||
120 | * init for the non boot cpus!). Will be fixed in 3.11 | ||
121 | */ | ||
122 | #ifdef CONFIG_X86 | ||
123 | /* | ||
124 | * If we're the non-boot CPU, nothing set the stack canary up | ||
125 | * for us. The boot CPU already has it initialized but no harm | ||
126 | * in doing it again. This is a good place for updating it, as | ||
127 | * we wont ever return from this function (so the invalid | ||
128 | * canaries already on the stack wont ever trigger). | ||
129 | */ | ||
130 | boot_init_stack_canary(); | ||
131 | #endif | ||
115 | current_set_polling(); | 132 | current_set_polling(); |
116 | arch_cpu_idle_prepare(); | 133 | arch_cpu_idle_prepare(); |
117 | cpu_idle_loop(); | 134 | cpu_idle_loop(); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 9dc297faf7c0..b391907d5352 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -196,9 +196,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
196 | static void update_context_time(struct perf_event_context *ctx); | 196 | static void update_context_time(struct perf_event_context *ctx); |
197 | static u64 perf_event_time(struct perf_event *event); | 197 | static u64 perf_event_time(struct perf_event *event); |
198 | 198 | ||
199 | static void ring_buffer_attach(struct perf_event *event, | ||
200 | struct ring_buffer *rb); | ||
201 | |||
202 | void __weak perf_event_print_debug(void) { } | 199 | void __weak perf_event_print_debug(void) { } |
203 | 200 | ||
204 | extern __weak const char *perf_pmu_name(void) | 201 | extern __weak const char *perf_pmu_name(void) |
@@ -2918,6 +2915,7 @@ static void free_event_rcu(struct rcu_head *head) | |||
2918 | } | 2915 | } |
2919 | 2916 | ||
2920 | static void ring_buffer_put(struct ring_buffer *rb); | 2917 | static void ring_buffer_put(struct ring_buffer *rb); |
2918 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); | ||
2921 | 2919 | ||
2922 | static void free_event(struct perf_event *event) | 2920 | static void free_event(struct perf_event *event) |
2923 | { | 2921 | { |
@@ -2942,15 +2940,30 @@ static void free_event(struct perf_event *event) | |||
2942 | if (has_branch_stack(event)) { | 2940 | if (has_branch_stack(event)) { |
2943 | static_key_slow_dec_deferred(&perf_sched_events); | 2941 | static_key_slow_dec_deferred(&perf_sched_events); |
2944 | /* is system-wide event */ | 2942 | /* is system-wide event */ |
2945 | if (!(event->attach_state & PERF_ATTACH_TASK)) | 2943 | if (!(event->attach_state & PERF_ATTACH_TASK)) { |
2946 | atomic_dec(&per_cpu(perf_branch_stack_events, | 2944 | atomic_dec(&per_cpu(perf_branch_stack_events, |
2947 | event->cpu)); | 2945 | event->cpu)); |
2946 | } | ||
2948 | } | 2947 | } |
2949 | } | 2948 | } |
2950 | 2949 | ||
2951 | if (event->rb) { | 2950 | if (event->rb) { |
2952 | ring_buffer_put(event->rb); | 2951 | struct ring_buffer *rb; |
2953 | event->rb = NULL; | 2952 | |
2953 | /* | ||
2954 | * Can happen when we close an event with re-directed output. | ||
2955 | * | ||
2956 | * Since we have a 0 refcount, perf_mmap_close() will skip | ||
2957 | * over us; possibly making our ring_buffer_put() the last. | ||
2958 | */ | ||
2959 | mutex_lock(&event->mmap_mutex); | ||
2960 | rb = event->rb; | ||
2961 | if (rb) { | ||
2962 | rcu_assign_pointer(event->rb, NULL); | ||
2963 | ring_buffer_detach(event, rb); | ||
2964 | ring_buffer_put(rb); /* could be last */ | ||
2965 | } | ||
2966 | mutex_unlock(&event->mmap_mutex); | ||
2954 | } | 2967 | } |
2955 | 2968 | ||
2956 | if (is_cgroup_event(event)) | 2969 | if (is_cgroup_event(event)) |
@@ -3188,30 +3201,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3188 | unsigned int events = POLL_HUP; | 3201 | unsigned int events = POLL_HUP; |
3189 | 3202 | ||
3190 | /* | 3203 | /* |
3191 | * Race between perf_event_set_output() and perf_poll(): perf_poll() | 3204 | * Pin the event->rb by taking event->mmap_mutex; otherwise |
3192 | * grabs the rb reference but perf_event_set_output() overrides it. | 3205 | * perf_event_set_output() can swizzle our rb and make us miss wakeups. |
3193 | * Here is the timeline for two threads T1, T2: | ||
3194 | * t0: T1, rb = rcu_dereference(event->rb) | ||
3195 | * t1: T2, old_rb = event->rb | ||
3196 | * t2: T2, event->rb = new rb | ||
3197 | * t3: T2, ring_buffer_detach(old_rb) | ||
3198 | * t4: T1, ring_buffer_attach(rb1) | ||
3199 | * t5: T1, poll_wait(event->waitq) | ||
3200 | * | ||
3201 | * To avoid this problem, we grab mmap_mutex in perf_poll() | ||
3202 | * thereby ensuring that the assignment of the new ring buffer | ||
3203 | * and the detachment of the old buffer appear atomic to perf_poll() | ||
3204 | */ | 3206 | */ |
3205 | mutex_lock(&event->mmap_mutex); | 3207 | mutex_lock(&event->mmap_mutex); |
3206 | 3208 | rb = event->rb; | |
3207 | rcu_read_lock(); | 3209 | if (rb) |
3208 | rb = rcu_dereference(event->rb); | ||
3209 | if (rb) { | ||
3210 | ring_buffer_attach(event, rb); | ||
3211 | events = atomic_xchg(&rb->poll, 0); | 3210 | events = atomic_xchg(&rb->poll, 0); |
3212 | } | ||
3213 | rcu_read_unlock(); | ||
3214 | |||
3215 | mutex_unlock(&event->mmap_mutex); | 3211 | mutex_unlock(&event->mmap_mutex); |
3216 | 3212 | ||
3217 | poll_wait(file, &event->waitq, wait); | 3213 | poll_wait(file, &event->waitq, wait); |
@@ -3521,16 +3517,12 @@ static void ring_buffer_attach(struct perf_event *event, | |||
3521 | return; | 3517 | return; |
3522 | 3518 | ||
3523 | spin_lock_irqsave(&rb->event_lock, flags); | 3519 | spin_lock_irqsave(&rb->event_lock, flags); |
3524 | if (!list_empty(&event->rb_entry)) | 3520 | if (list_empty(&event->rb_entry)) |
3525 | goto unlock; | 3521 | list_add(&event->rb_entry, &rb->event_list); |
3526 | |||
3527 | list_add(&event->rb_entry, &rb->event_list); | ||
3528 | unlock: | ||
3529 | spin_unlock_irqrestore(&rb->event_lock, flags); | 3522 | spin_unlock_irqrestore(&rb->event_lock, flags); |
3530 | } | 3523 | } |
3531 | 3524 | ||
3532 | static void ring_buffer_detach(struct perf_event *event, | 3525 | static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) |
3533 | struct ring_buffer *rb) | ||
3534 | { | 3526 | { |
3535 | unsigned long flags; | 3527 | unsigned long flags; |
3536 | 3528 | ||
@@ -3549,13 +3541,10 @@ static void ring_buffer_wakeup(struct perf_event *event) | |||
3549 | 3541 | ||
3550 | rcu_read_lock(); | 3542 | rcu_read_lock(); |
3551 | rb = rcu_dereference(event->rb); | 3543 | rb = rcu_dereference(event->rb); |
3552 | if (!rb) | 3544 | if (rb) { |
3553 | goto unlock; | 3545 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) |
3554 | 3546 | wake_up_all(&event->waitq); | |
3555 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) | 3547 | } |
3556 | wake_up_all(&event->waitq); | ||
3557 | |||
3558 | unlock: | ||
3559 | rcu_read_unlock(); | 3548 | rcu_read_unlock(); |
3560 | } | 3549 | } |
3561 | 3550 | ||
@@ -3584,18 +3573,10 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) | |||
3584 | 3573 | ||
3585 | static void ring_buffer_put(struct ring_buffer *rb) | 3574 | static void ring_buffer_put(struct ring_buffer *rb) |
3586 | { | 3575 | { |
3587 | struct perf_event *event, *n; | ||
3588 | unsigned long flags; | ||
3589 | |||
3590 | if (!atomic_dec_and_test(&rb->refcount)) | 3576 | if (!atomic_dec_and_test(&rb->refcount)) |
3591 | return; | 3577 | return; |
3592 | 3578 | ||
3593 | spin_lock_irqsave(&rb->event_lock, flags); | 3579 | WARN_ON_ONCE(!list_empty(&rb->event_list)); |
3594 | list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { | ||
3595 | list_del_init(&event->rb_entry); | ||
3596 | wake_up_all(&event->waitq); | ||
3597 | } | ||
3598 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
3599 | 3580 | ||
3600 | call_rcu(&rb->rcu_head, rb_free_rcu); | 3581 | call_rcu(&rb->rcu_head, rb_free_rcu); |
3601 | } | 3582 | } |
@@ -3605,26 +3586,100 @@ static void perf_mmap_open(struct vm_area_struct *vma) | |||
3605 | struct perf_event *event = vma->vm_file->private_data; | 3586 | struct perf_event *event = vma->vm_file->private_data; |
3606 | 3587 | ||
3607 | atomic_inc(&event->mmap_count); | 3588 | atomic_inc(&event->mmap_count); |
3589 | atomic_inc(&event->rb->mmap_count); | ||
3608 | } | 3590 | } |
3609 | 3591 | ||
3592 | /* | ||
3593 | * A buffer can be mmap()ed multiple times; either directly through the same | ||
3594 | * event, or through other events by use of perf_event_set_output(). | ||
3595 | * | ||
3596 | * In order to undo the VM accounting done by perf_mmap() we need to destroy | ||
3597 | * the buffer here, where we still have a VM context. This means we need | ||
3598 | * to detach all events redirecting to us. | ||
3599 | */ | ||
3610 | static void perf_mmap_close(struct vm_area_struct *vma) | 3600 | static void perf_mmap_close(struct vm_area_struct *vma) |
3611 | { | 3601 | { |
3612 | struct perf_event *event = vma->vm_file->private_data; | 3602 | struct perf_event *event = vma->vm_file->private_data; |
3613 | 3603 | ||
3614 | if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { | 3604 | struct ring_buffer *rb = event->rb; |
3615 | unsigned long size = perf_data_size(event->rb); | 3605 | struct user_struct *mmap_user = rb->mmap_user; |
3616 | struct user_struct *user = event->mmap_user; | 3606 | int mmap_locked = rb->mmap_locked; |
3617 | struct ring_buffer *rb = event->rb; | 3607 | unsigned long size = perf_data_size(rb); |
3608 | |||
3609 | atomic_dec(&rb->mmap_count); | ||
3610 | |||
3611 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | ||
3612 | return; | ||
3618 | 3613 | ||
3619 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3614 | /* Detach current event from the buffer. */ |
3620 | vma->vm_mm->pinned_vm -= event->mmap_locked; | 3615 | rcu_assign_pointer(event->rb, NULL); |
3621 | rcu_assign_pointer(event->rb, NULL); | 3616 | ring_buffer_detach(event, rb); |
3622 | ring_buffer_detach(event, rb); | 3617 | mutex_unlock(&event->mmap_mutex); |
3618 | |||
3619 | /* If there's still other mmap()s of this buffer, we're done. */ | ||
3620 | if (atomic_read(&rb->mmap_count)) { | ||
3621 | ring_buffer_put(rb); /* can't be last */ | ||
3622 | return; | ||
3623 | } | ||
3624 | |||
3625 | /* | ||
3626 | * No other mmap()s, detach from all other events that might redirect | ||
3627 | * into the now unreachable buffer. Somewhat complicated by the | ||
3628 | * fact that rb::event_lock otherwise nests inside mmap_mutex. | ||
3629 | */ | ||
3630 | again: | ||
3631 | rcu_read_lock(); | ||
3632 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { | ||
3633 | if (!atomic_long_inc_not_zero(&event->refcount)) { | ||
3634 | /* | ||
3635 | * This event is en-route to free_event() which will | ||
3636 | * detach it and remove it from the list. | ||
3637 | */ | ||
3638 | continue; | ||
3639 | } | ||
3640 | rcu_read_unlock(); | ||
3641 | |||
3642 | mutex_lock(&event->mmap_mutex); | ||
3643 | /* | ||
3644 | * Check we didn't race with perf_event_set_output() which can | ||
3645 | * swizzle the rb from under us while we were waiting to | ||
3646 | * acquire mmap_mutex. | ||
3647 | * | ||
3648 | * If we find a different rb; ignore this event, a next | ||
3649 | * iteration will no longer find it on the list. We have to | ||
3650 | * still restart the iteration to make sure we're not now | ||
3651 | * iterating the wrong list. | ||
3652 | */ | ||
3653 | if (event->rb == rb) { | ||
3654 | rcu_assign_pointer(event->rb, NULL); | ||
3655 | ring_buffer_detach(event, rb); | ||
3656 | ring_buffer_put(rb); /* can't be last, we still have one */ | ||
3657 | } | ||
3623 | mutex_unlock(&event->mmap_mutex); | 3658 | mutex_unlock(&event->mmap_mutex); |
3659 | put_event(event); | ||
3624 | 3660 | ||
3625 | ring_buffer_put(rb); | 3661 | /* |
3626 | free_uid(user); | 3662 | * Restart the iteration; either we're on the wrong list or |
3663 | * destroyed its integrity by doing a deletion. | ||
3664 | */ | ||
3665 | goto again; | ||
3627 | } | 3666 | } |
3667 | rcu_read_unlock(); | ||
3668 | |||
3669 | /* | ||
3670 | * It could be there's still a few 0-ref events on the list; they'll | ||
3671 | * get cleaned up by free_event() -- they'll also still have their | ||
3672 | * ref on the rb and will free it whenever they are done with it. | ||
3673 | * | ||
3674 | * Aside from that, this buffer is 'fully' detached and unmapped, | ||
3675 | * undo the VM accounting. | ||
3676 | */ | ||
3677 | |||
3678 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm); | ||
3679 | vma->vm_mm->pinned_vm -= mmap_locked; | ||
3680 | free_uid(mmap_user); | ||
3681 | |||
3682 | ring_buffer_put(rb); /* could be last */ | ||
3628 | } | 3683 | } |
3629 | 3684 | ||
3630 | static const struct vm_operations_struct perf_mmap_vmops = { | 3685 | static const struct vm_operations_struct perf_mmap_vmops = { |
@@ -3674,12 +3729,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3674 | return -EINVAL; | 3729 | return -EINVAL; |
3675 | 3730 | ||
3676 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3731 | WARN_ON_ONCE(event->ctx->parent_ctx); |
3732 | again: | ||
3677 | mutex_lock(&event->mmap_mutex); | 3733 | mutex_lock(&event->mmap_mutex); |
3678 | if (event->rb) { | 3734 | if (event->rb) { |
3679 | if (event->rb->nr_pages == nr_pages) | 3735 | if (event->rb->nr_pages != nr_pages) { |
3680 | atomic_inc(&event->rb->refcount); | ||
3681 | else | ||
3682 | ret = -EINVAL; | 3736 | ret = -EINVAL; |
3737 | goto unlock; | ||
3738 | } | ||
3739 | |||
3740 | if (!atomic_inc_not_zero(&event->rb->mmap_count)) { | ||
3741 | /* | ||
3742 | * Raced against perf_mmap_close() through | ||
3743 | * perf_event_set_output(). Try again, hope for better | ||
3744 | * luck. | ||
3745 | */ | ||
3746 | mutex_unlock(&event->mmap_mutex); | ||
3747 | goto again; | ||
3748 | } | ||
3749 | |||
3683 | goto unlock; | 3750 | goto unlock; |
3684 | } | 3751 | } |
3685 | 3752 | ||
@@ -3720,12 +3787,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3720 | ret = -ENOMEM; | 3787 | ret = -ENOMEM; |
3721 | goto unlock; | 3788 | goto unlock; |
3722 | } | 3789 | } |
3723 | rcu_assign_pointer(event->rb, rb); | 3790 | |
3791 | atomic_set(&rb->mmap_count, 1); | ||
3792 | rb->mmap_locked = extra; | ||
3793 | rb->mmap_user = get_current_user(); | ||
3724 | 3794 | ||
3725 | atomic_long_add(user_extra, &user->locked_vm); | 3795 | atomic_long_add(user_extra, &user->locked_vm); |
3726 | event->mmap_locked = extra; | 3796 | vma->vm_mm->pinned_vm += extra; |
3727 | event->mmap_user = get_current_user(); | 3797 | |
3728 | vma->vm_mm->pinned_vm += event->mmap_locked; | 3798 | ring_buffer_attach(event, rb); |
3799 | rcu_assign_pointer(event->rb, rb); | ||
3729 | 3800 | ||
3730 | perf_event_update_userpage(event); | 3801 | perf_event_update_userpage(event); |
3731 | 3802 | ||
@@ -3734,7 +3805,11 @@ unlock: | |||
3734 | atomic_inc(&event->mmap_count); | 3805 | atomic_inc(&event->mmap_count); |
3735 | mutex_unlock(&event->mmap_mutex); | 3806 | mutex_unlock(&event->mmap_mutex); |
3736 | 3807 | ||
3737 | vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; | 3808 | /* |
3809 | * Since pinned accounting is per vm we cannot allow fork() to copy our | ||
3810 | * vma. | ||
3811 | */ | ||
3812 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; | ||
3738 | vma->vm_ops = &perf_mmap_vmops; | 3813 | vma->vm_ops = &perf_mmap_vmops; |
3739 | 3814 | ||
3740 | return ret; | 3815 | return ret; |
@@ -6412,6 +6487,8 @@ set: | |||
6412 | if (atomic_read(&event->mmap_count)) | 6487 | if (atomic_read(&event->mmap_count)) |
6413 | goto unlock; | 6488 | goto unlock; |
6414 | 6489 | ||
6490 | old_rb = event->rb; | ||
6491 | |||
6415 | if (output_event) { | 6492 | if (output_event) { |
6416 | /* get the rb we want to redirect to */ | 6493 | /* get the rb we want to redirect to */ |
6417 | rb = ring_buffer_get(output_event); | 6494 | rb = ring_buffer_get(output_event); |
@@ -6419,16 +6496,28 @@ set: | |||
6419 | goto unlock; | 6496 | goto unlock; |
6420 | } | 6497 | } |
6421 | 6498 | ||
6422 | old_rb = event->rb; | ||
6423 | rcu_assign_pointer(event->rb, rb); | ||
6424 | if (old_rb) | 6499 | if (old_rb) |
6425 | ring_buffer_detach(event, old_rb); | 6500 | ring_buffer_detach(event, old_rb); |
6501 | |||
6502 | if (rb) | ||
6503 | ring_buffer_attach(event, rb); | ||
6504 | |||
6505 | rcu_assign_pointer(event->rb, rb); | ||
6506 | |||
6507 | if (old_rb) { | ||
6508 | ring_buffer_put(old_rb); | ||
6509 | /* | ||
6510 | * Since we detached before setting the new rb, so that we | ||
6511 | * could attach the new rb, we could have missed a wakeup. | ||
6512 | * Provide it now. | ||
6513 | */ | ||
6514 | wake_up_all(&event->waitq); | ||
6515 | } | ||
6516 | |||
6426 | ret = 0; | 6517 | ret = 0; |
6427 | unlock: | 6518 | unlock: |
6428 | mutex_unlock(&event->mmap_mutex); | 6519 | mutex_unlock(&event->mmap_mutex); |
6429 | 6520 | ||
6430 | if (old_rb) | ||
6431 | ring_buffer_put(old_rb); | ||
6432 | out: | 6521 | out: |
6433 | return ret; | 6522 | return ret; |
6434 | } | 6523 | } |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index eb675c4d59df..ca6599723be5 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
@@ -31,6 +31,10 @@ struct ring_buffer { | |||
31 | spinlock_t event_lock; | 31 | spinlock_t event_lock; |
32 | struct list_head event_list; | 32 | struct list_head event_list; |
33 | 33 | ||
34 | atomic_t mmap_count; | ||
35 | unsigned long mmap_locked; | ||
36 | struct user_struct *mmap_user; | ||
37 | |||
34 | struct perf_event_mmap_page *user_page; | 38 | struct perf_event_mmap_page *user_page; |
35 | void *data_pages[0]; | 39 | void *data_pages[0]; |
36 | }; | 40 | }; |
diff --git a/kernel/exit.c b/kernel/exit.c index af2eb3cbd499..7bb73f9d09db 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -649,7 +649,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
649 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | 649 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) |
650 | */ | 650 | */ |
651 | forget_original_parent(tsk); | 651 | forget_original_parent(tsk); |
652 | exit_task_namespaces(tsk); | ||
653 | 652 | ||
654 | write_lock_irq(&tasklist_lock); | 653 | write_lock_irq(&tasklist_lock); |
655 | if (group_dead) | 654 | if (group_dead) |
@@ -795,6 +794,7 @@ void do_exit(long code) | |||
795 | exit_shm(tsk); | 794 | exit_shm(tsk); |
796 | exit_files(tsk); | 795 | exit_files(tsk); |
797 | exit_fs(tsk); | 796 | exit_fs(tsk); |
797 | exit_task_namespaces(tsk); | ||
798 | exit_task_work(tsk); | 798 | exit_task_work(tsk); |
799 | check_stack_usage(); | 799 | check_stack_usage(); |
800 | exit_thread(); | 800 | exit_thread(); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 5a83dde8ca0c..54a4d5223238 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -143,7 +143,10 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain, | |||
143 | * irq_domain_add_simple() - Allocate and register a simple irq_domain. | 143 | * irq_domain_add_simple() - Allocate and register a simple irq_domain. |
144 | * @of_node: pointer to interrupt controller's device tree node. | 144 | * @of_node: pointer to interrupt controller's device tree node. |
145 | * @size: total number of irqs in mapping | 145 | * @size: total number of irqs in mapping |
146 | * @first_irq: first number of irq block assigned to the domain | 146 | * @first_irq: first number of irq block assigned to the domain, |
147 | * pass zero to assign irqs on-the-fly. This will result in a | ||
148 | * linear IRQ domain so it is important to use irq_create_mapping() | ||
149 | * for each used IRQ, especially when SPARSE_IRQ is enabled. | ||
147 | * @ops: map/unmap domain callbacks | 150 | * @ops: map/unmap domain callbacks |
148 | * @host_data: Controller private data pointer | 151 | * @host_data: Controller private data pointer |
149 | * | 152 | * |
@@ -191,6 +194,7 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node, | |||
191 | /* A linear domain is the default */ | 194 | /* A linear domain is the default */ |
192 | return irq_domain_add_linear(of_node, size, ops, host_data); | 195 | return irq_domain_add_linear(of_node, size, ops, host_data); |
193 | } | 196 | } |
197 | EXPORT_SYMBOL_GPL(irq_domain_add_simple); | ||
194 | 198 | ||
195 | /** | 199 | /** |
196 | * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. | 200 | * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. |
@@ -397,11 +401,12 @@ static void irq_domain_disassociate_many(struct irq_domain *domain, | |||
397 | while (count--) { | 401 | while (count--) { |
398 | int irq = irq_base + count; | 402 | int irq = irq_base + count; |
399 | struct irq_data *irq_data = irq_get_irq_data(irq); | 403 | struct irq_data *irq_data = irq_get_irq_data(irq); |
400 | irq_hw_number_t hwirq = irq_data->hwirq; | 404 | irq_hw_number_t hwirq; |
401 | 405 | ||
402 | if (WARN_ON(!irq_data || irq_data->domain != domain)) | 406 | if (WARN_ON(!irq_data || irq_data->domain != domain)) |
403 | continue; | 407 | continue; |
404 | 408 | ||
409 | hwirq = irq_data->hwirq; | ||
405 | irq_set_status_flags(irq, IRQ_NOREQUEST); | 410 | irq_set_status_flags(irq, IRQ_NOREQUEST); |
406 | 411 | ||
407 | /* remove chip and handler */ | 412 | /* remove chip and handler */ |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3fed7f0cbcdf..bddf3b201a48 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -467,6 +467,7 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) | |||
467 | /* Optimization staging list, protected by kprobe_mutex */ | 467 | /* Optimization staging list, protected by kprobe_mutex */ |
468 | static LIST_HEAD(optimizing_list); | 468 | static LIST_HEAD(optimizing_list); |
469 | static LIST_HEAD(unoptimizing_list); | 469 | static LIST_HEAD(unoptimizing_list); |
470 | static LIST_HEAD(freeing_list); | ||
470 | 471 | ||
471 | static void kprobe_optimizer(struct work_struct *work); | 472 | static void kprobe_optimizer(struct work_struct *work); |
472 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); | 473 | static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); |
@@ -504,7 +505,7 @@ static __kprobes void do_optimize_kprobes(void) | |||
504 | * Unoptimize (replace a jump with a breakpoint and remove the breakpoint | 505 | * Unoptimize (replace a jump with a breakpoint and remove the breakpoint |
505 | * if need) kprobes listed on unoptimizing_list. | 506 | * if need) kprobes listed on unoptimizing_list. |
506 | */ | 507 | */ |
507 | static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | 508 | static __kprobes void do_unoptimize_kprobes(void) |
508 | { | 509 | { |
509 | struct optimized_kprobe *op, *tmp; | 510 | struct optimized_kprobe *op, *tmp; |
510 | 511 | ||
@@ -515,9 +516,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | |||
515 | /* Ditto to do_optimize_kprobes */ | 516 | /* Ditto to do_optimize_kprobes */ |
516 | get_online_cpus(); | 517 | get_online_cpus(); |
517 | mutex_lock(&text_mutex); | 518 | mutex_lock(&text_mutex); |
518 | arch_unoptimize_kprobes(&unoptimizing_list, free_list); | 519 | arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); |
519 | /* Loop free_list for disarming */ | 520 | /* Loop free_list for disarming */ |
520 | list_for_each_entry_safe(op, tmp, free_list, list) { | 521 | list_for_each_entry_safe(op, tmp, &freeing_list, list) { |
521 | /* Disarm probes if marked disabled */ | 522 | /* Disarm probes if marked disabled */ |
522 | if (kprobe_disabled(&op->kp)) | 523 | if (kprobe_disabled(&op->kp)) |
523 | arch_disarm_kprobe(&op->kp); | 524 | arch_disarm_kprobe(&op->kp); |
@@ -536,11 +537,11 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list) | |||
536 | } | 537 | } |
537 | 538 | ||
538 | /* Reclaim all kprobes on the free_list */ | 539 | /* Reclaim all kprobes on the free_list */ |
539 | static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list) | 540 | static __kprobes void do_free_cleaned_kprobes(void) |
540 | { | 541 | { |
541 | struct optimized_kprobe *op, *tmp; | 542 | struct optimized_kprobe *op, *tmp; |
542 | 543 | ||
543 | list_for_each_entry_safe(op, tmp, free_list, list) { | 544 | list_for_each_entry_safe(op, tmp, &freeing_list, list) { |
544 | BUG_ON(!kprobe_unused(&op->kp)); | 545 | BUG_ON(!kprobe_unused(&op->kp)); |
545 | list_del_init(&op->list); | 546 | list_del_init(&op->list); |
546 | free_aggr_kprobe(&op->kp); | 547 | free_aggr_kprobe(&op->kp); |
@@ -556,8 +557,6 @@ static __kprobes void kick_kprobe_optimizer(void) | |||
556 | /* Kprobe jump optimizer */ | 557 | /* Kprobe jump optimizer */ |
557 | static __kprobes void kprobe_optimizer(struct work_struct *work) | 558 | static __kprobes void kprobe_optimizer(struct work_struct *work) |
558 | { | 559 | { |
559 | LIST_HEAD(free_list); | ||
560 | |||
561 | mutex_lock(&kprobe_mutex); | 560 | mutex_lock(&kprobe_mutex); |
562 | /* Lock modules while optimizing kprobes */ | 561 | /* Lock modules while optimizing kprobes */ |
563 | mutex_lock(&module_mutex); | 562 | mutex_lock(&module_mutex); |
@@ -566,7 +565,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) | |||
566 | * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) | 565 | * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) |
567 | * kprobes before waiting for quiesence period. | 566 | * kprobes before waiting for quiesence period. |
568 | */ | 567 | */ |
569 | do_unoptimize_kprobes(&free_list); | 568 | do_unoptimize_kprobes(); |
570 | 569 | ||
571 | /* | 570 | /* |
572 | * Step 2: Wait for quiesence period to ensure all running interrupts | 571 | * Step 2: Wait for quiesence period to ensure all running interrupts |
@@ -581,7 +580,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) | |||
581 | do_optimize_kprobes(); | 580 | do_optimize_kprobes(); |
582 | 581 | ||
583 | /* Step 4: Free cleaned kprobes after quiesence period */ | 582 | /* Step 4: Free cleaned kprobes after quiesence period */ |
584 | do_free_cleaned_kprobes(&free_list); | 583 | do_free_cleaned_kprobes(); |
585 | 584 | ||
586 | mutex_unlock(&module_mutex); | 585 | mutex_unlock(&module_mutex); |
587 | mutex_unlock(&kprobe_mutex); | 586 | mutex_unlock(&kprobe_mutex); |
@@ -723,8 +722,19 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p) | |||
723 | if (!list_empty(&op->list)) | 722 | if (!list_empty(&op->list)) |
724 | /* Dequeue from the (un)optimization queue */ | 723 | /* Dequeue from the (un)optimization queue */ |
725 | list_del_init(&op->list); | 724 | list_del_init(&op->list); |
726 | |||
727 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; | 725 | op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; |
726 | |||
727 | if (kprobe_unused(p)) { | ||
728 | /* Enqueue if it is unused */ | ||
729 | list_add(&op->list, &freeing_list); | ||
730 | /* | ||
731 | * Remove unused probes from the hash list. After waiting | ||
732 | * for synchronization, this probe is reclaimed. | ||
733 | * (reclaiming is done by do_free_cleaned_kprobes().) | ||
734 | */ | ||
735 | hlist_del_rcu(&op->kp.hlist); | ||
736 | } | ||
737 | |||
728 | /* Don't touch the code, because it is already freed. */ | 738 | /* Don't touch the code, because it is already freed. */ |
729 | arch_remove_optimized_kprobe(op); | 739 | arch_remove_optimized_kprobe(op); |
730 | } | 740 | } |
diff --git a/kernel/printk.c b/kernel/printk.c index fa36e1494420..8212c1aef125 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -363,6 +363,53 @@ static void log_store(int facility, int level, | |||
363 | log_next_seq++; | 363 | log_next_seq++; |
364 | } | 364 | } |
365 | 365 | ||
366 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
367 | int dmesg_restrict = 1; | ||
368 | #else | ||
369 | int dmesg_restrict; | ||
370 | #endif | ||
371 | |||
372 | static int syslog_action_restricted(int type) | ||
373 | { | ||
374 | if (dmesg_restrict) | ||
375 | return 1; | ||
376 | /* | ||
377 | * Unless restricted, we allow "read all" and "get buffer size" | ||
378 | * for everybody. | ||
379 | */ | ||
380 | return type != SYSLOG_ACTION_READ_ALL && | ||
381 | type != SYSLOG_ACTION_SIZE_BUFFER; | ||
382 | } | ||
383 | |||
384 | static int check_syslog_permissions(int type, bool from_file) | ||
385 | { | ||
386 | /* | ||
387 | * If this is from /proc/kmsg and we've already opened it, then we've | ||
388 | * already done the capabilities checks at open time. | ||
389 | */ | ||
390 | if (from_file && type != SYSLOG_ACTION_OPEN) | ||
391 | return 0; | ||
392 | |||
393 | if (syslog_action_restricted(type)) { | ||
394 | if (capable(CAP_SYSLOG)) | ||
395 | return 0; | ||
396 | /* | ||
397 | * For historical reasons, accept CAP_SYS_ADMIN too, with | ||
398 | * a warning. | ||
399 | */ | ||
400 | if (capable(CAP_SYS_ADMIN)) { | ||
401 | pr_warn_once("%s (%d): Attempt to access syslog with " | ||
402 | "CAP_SYS_ADMIN but no CAP_SYSLOG " | ||
403 | "(deprecated).\n", | ||
404 | current->comm, task_pid_nr(current)); | ||
405 | return 0; | ||
406 | } | ||
407 | return -EPERM; | ||
408 | } | ||
409 | return security_syslog(type); | ||
410 | } | ||
411 | |||
412 | |||
366 | /* /dev/kmsg - userspace message inject/listen interface */ | 413 | /* /dev/kmsg - userspace message inject/listen interface */ |
367 | struct devkmsg_user { | 414 | struct devkmsg_user { |
368 | u64 seq; | 415 | u64 seq; |
@@ -620,7 +667,8 @@ static int devkmsg_open(struct inode *inode, struct file *file) | |||
620 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) | 667 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) |
621 | return 0; | 668 | return 0; |
622 | 669 | ||
623 | err = security_syslog(SYSLOG_ACTION_READ_ALL); | 670 | err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, |
671 | SYSLOG_FROM_READER); | ||
624 | if (err) | 672 | if (err) |
625 | return err; | 673 | return err; |
626 | 674 | ||
@@ -813,45 +861,6 @@ static inline void boot_delay_msec(int level) | |||
813 | } | 861 | } |
814 | #endif | 862 | #endif |
815 | 863 | ||
816 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
817 | int dmesg_restrict = 1; | ||
818 | #else | ||
819 | int dmesg_restrict; | ||
820 | #endif | ||
821 | |||
822 | static int syslog_action_restricted(int type) | ||
823 | { | ||
824 | if (dmesg_restrict) | ||
825 | return 1; | ||
826 | /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ | ||
827 | return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; | ||
828 | } | ||
829 | |||
830 | static int check_syslog_permissions(int type, bool from_file) | ||
831 | { | ||
832 | /* | ||
833 | * If this is from /proc/kmsg and we've already opened it, then we've | ||
834 | * already done the capabilities checks at open time. | ||
835 | */ | ||
836 | if (from_file && type != SYSLOG_ACTION_OPEN) | ||
837 | return 0; | ||
838 | |||
839 | if (syslog_action_restricted(type)) { | ||
840 | if (capable(CAP_SYSLOG)) | ||
841 | return 0; | ||
842 | /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ | ||
843 | if (capable(CAP_SYS_ADMIN)) { | ||
844 | printk_once(KERN_WARNING "%s (%d): " | ||
845 | "Attempt to access syslog with CAP_SYS_ADMIN " | ||
846 | "but no CAP_SYSLOG (deprecated).\n", | ||
847 | current->comm, task_pid_nr(current)); | ||
848 | return 0; | ||
849 | } | ||
850 | return -EPERM; | ||
851 | } | ||
852 | return 0; | ||
853 | } | ||
854 | |||
855 | #if defined(CONFIG_PRINTK_TIME) | 864 | #if defined(CONFIG_PRINTK_TIME) |
856 | static bool printk_time = 1; | 865 | static bool printk_time = 1; |
857 | #else | 866 | #else |
@@ -1249,7 +1258,7 @@ out: | |||
1249 | 1258 | ||
1250 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | 1259 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) |
1251 | { | 1260 | { |
1252 | return do_syslog(type, buf, len, SYSLOG_FROM_CALL); | 1261 | return do_syslog(type, buf, len, SYSLOG_FROM_READER); |
1253 | } | 1262 | } |
1254 | 1263 | ||
1255 | /* | 1264 | /* |
diff --git a/kernel/range.c b/kernel/range.c index 071b0ab455cb..eb911dbce267 100644 --- a/kernel/range.c +++ b/kernel/range.c | |||
@@ -48,9 +48,11 @@ int add_range_with_merge(struct range *range, int az, int nr_range, | |||
48 | final_start = min(range[i].start, start); | 48 | final_start = min(range[i].start, start); |
49 | final_end = max(range[i].end, end); | 49 | final_end = max(range[i].end, end); |
50 | 50 | ||
51 | range[i].start = final_start; | 51 | /* clear it and add it back for further merge */ |
52 | range[i].end = final_end; | 52 | range[i].start = 0; |
53 | return nr_range; | 53 | range[i].end = 0; |
54 | return add_range_with_merge(range, az, nr_range, | ||
55 | final_start, final_end); | ||
54 | } | 56 | } |
55 | 57 | ||
56 | /* Need to add it: */ | 58 | /* Need to add it: */ |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 16ea67925015..35380019f0fc 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1451,9 +1451,9 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1451 | rnp->grphi, rnp->qsmask); | 1451 | rnp->grphi, rnp->qsmask); |
1452 | raw_spin_unlock_irq(&rnp->lock); | 1452 | raw_spin_unlock_irq(&rnp->lock); |
1453 | #ifdef CONFIG_PROVE_RCU_DELAY | 1453 | #ifdef CONFIG_PROVE_RCU_DELAY |
1454 | if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 && | 1454 | if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 && |
1455 | system_state == SYSTEM_RUNNING) | 1455 | system_state == SYSTEM_RUNNING) |
1456 | schedule_timeout_uninterruptible(2); | 1456 | udelay(200); |
1457 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ | 1457 | #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */ |
1458 | cond_resched(); | 1458 | cond_resched(); |
1459 | } | 1459 | } |
@@ -1613,6 +1613,14 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
1613 | } | 1613 | } |
1614 | } | 1614 | } |
1615 | 1615 | ||
1616 | static void rsp_wakeup(struct irq_work *work) | ||
1617 | { | ||
1618 | struct rcu_state *rsp = container_of(work, struct rcu_state, wakeup_work); | ||
1619 | |||
1620 | /* Wake up rcu_gp_kthread() to start the grace period. */ | ||
1621 | wake_up(&rsp->gp_wq); | ||
1622 | } | ||
1623 | |||
1616 | /* | 1624 | /* |
1617 | * Start a new RCU grace period if warranted, re-initializing the hierarchy | 1625 | * Start a new RCU grace period if warranted, re-initializing the hierarchy |
1618 | * in preparation for detecting the next grace period. The caller must hold | 1626 | * in preparation for detecting the next grace period. The caller must hold |
@@ -1637,8 +1645,12 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, | |||
1637 | } | 1645 | } |
1638 | rsp->gp_flags = RCU_GP_FLAG_INIT; | 1646 | rsp->gp_flags = RCU_GP_FLAG_INIT; |
1639 | 1647 | ||
1640 | /* Wake up rcu_gp_kthread() to start the grace period. */ | 1648 | /* |
1641 | wake_up(&rsp->gp_wq); | 1649 | * We can't do wakeups while holding the rnp->lock, as that |
1650 | * could cause possible deadlocks with the rq->lock. Deter | ||
1651 | * the wakeup to interrupt context. | ||
1652 | */ | ||
1653 | irq_work_queue(&rsp->wakeup_work); | ||
1642 | } | 1654 | } |
1643 | 1655 | ||
1644 | /* | 1656 | /* |
@@ -3235,6 +3247,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
3235 | 3247 | ||
3236 | rsp->rda = rda; | 3248 | rsp->rda = rda; |
3237 | init_waitqueue_head(&rsp->gp_wq); | 3249 | init_waitqueue_head(&rsp->gp_wq); |
3250 | init_irq_work(&rsp->wakeup_work, rsp_wakeup); | ||
3238 | rnp = rsp->level[rcu_num_lvls - 1]; | 3251 | rnp = rsp->level[rcu_num_lvls - 1]; |
3239 | for_each_possible_cpu(i) { | 3252 | for_each_possible_cpu(i) { |
3240 | while (i > rnp->grphi) | 3253 | while (i > rnp->grphi) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index da77a8f57ff9..4df503470e42 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/threads.h> | 27 | #include <linux/threads.h> |
28 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
29 | #include <linux/seqlock.h> | 29 | #include <linux/seqlock.h> |
30 | #include <linux/irq_work.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and | 33 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and |
@@ -442,6 +443,7 @@ struct rcu_state { | |||
442 | char *name; /* Name of structure. */ | 443 | char *name; /* Name of structure. */ |
443 | char abbr; /* Abbreviated name. */ | 444 | char abbr; /* Abbreviated name. */ |
444 | struct list_head flavors; /* List of RCU flavors. */ | 445 | struct list_head flavors; /* List of RCU flavors. */ |
446 | struct irq_work wakeup_work; /* Postponed wakeups */ | ||
445 | }; | 447 | }; |
446 | 448 | ||
447 | /* Values for rcu_state structure's gp_flags field. */ | 449 | /* Values for rcu_state structure's gp_flags field. */ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 919bee68032b..e8b335016c52 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4758,7 +4758,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
4758 | */ | 4758 | */ |
4759 | idle->sched_class = &idle_sched_class; | 4759 | idle->sched_class = &idle_sched_class; |
4760 | ftrace_graph_init_idle_task(idle, cpu); | 4760 | ftrace_graph_init_idle_task(idle, cpu); |
4761 | vtime_init_idle(idle); | 4761 | vtime_init_idle(idle, cpu); |
4762 | #if defined(CONFIG_SMP) | 4762 | #if defined(CONFIG_SMP) |
4763 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | 4763 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); |
4764 | #endif | 4764 | #endif |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index cc2dc3eea8a3..b5ccba22603b 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -747,17 +747,17 @@ void arch_vtime_task_switch(struct task_struct *prev) | |||
747 | 747 | ||
748 | write_seqlock(¤t->vtime_seqlock); | 748 | write_seqlock(¤t->vtime_seqlock); |
749 | current->vtime_snap_whence = VTIME_SYS; | 749 | current->vtime_snap_whence = VTIME_SYS; |
750 | current->vtime_snap = sched_clock(); | 750 | current->vtime_snap = sched_clock_cpu(smp_processor_id()); |
751 | write_sequnlock(¤t->vtime_seqlock); | 751 | write_sequnlock(¤t->vtime_seqlock); |
752 | } | 752 | } |
753 | 753 | ||
754 | void vtime_init_idle(struct task_struct *t) | 754 | void vtime_init_idle(struct task_struct *t, int cpu) |
755 | { | 755 | { |
756 | unsigned long flags; | 756 | unsigned long flags; |
757 | 757 | ||
758 | write_seqlock_irqsave(&t->vtime_seqlock, flags); | 758 | write_seqlock_irqsave(&t->vtime_seqlock, flags); |
759 | t->vtime_snap_whence = VTIME_SYS; | 759 | t->vtime_snap_whence = VTIME_SYS; |
760 | t->vtime_snap = sched_clock(); | 760 | t->vtime_snap = sched_clock_cpu(cpu); |
761 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); | 761 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); |
762 | } | 762 | } |
763 | 763 | ||
diff --git a/kernel/softirq.c b/kernel/softirq.c index b5197dcb0dad..3d6833f125d3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -195,8 +195,12 @@ void local_bh_enable_ip(unsigned long ip) | |||
195 | EXPORT_SYMBOL(local_bh_enable_ip); | 195 | EXPORT_SYMBOL(local_bh_enable_ip); |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * We restart softirq processing for at most 2 ms, | 198 | * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, |
199 | * and if need_resched() is not set. | 199 | * but break the loop if need_resched() is set or after 2 ms. |
200 | * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in | ||
201 | * certain cases, such as stop_machine(), jiffies may cease to | ||
202 | * increment and so we need the MAX_SOFTIRQ_RESTART limit as | ||
203 | * well to make sure we eventually return from this method. | ||
200 | * | 204 | * |
201 | * These limits have been established via experimentation. | 205 | * These limits have been established via experimentation. |
202 | * The two things to balance is latency against fairness - | 206 | * The two things to balance is latency against fairness - |
@@ -204,6 +208,7 @@ EXPORT_SYMBOL(local_bh_enable_ip); | |||
204 | * should not be able to lock up the box. | 208 | * should not be able to lock up the box. |
205 | */ | 209 | */ |
206 | #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) | 210 | #define MAX_SOFTIRQ_TIME msecs_to_jiffies(2) |
211 | #define MAX_SOFTIRQ_RESTART 10 | ||
207 | 212 | ||
208 | asmlinkage void __do_softirq(void) | 213 | asmlinkage void __do_softirq(void) |
209 | { | 214 | { |
@@ -212,6 +217,7 @@ asmlinkage void __do_softirq(void) | |||
212 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | 217 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
213 | int cpu; | 218 | int cpu; |
214 | unsigned long old_flags = current->flags; | 219 | unsigned long old_flags = current->flags; |
220 | int max_restart = MAX_SOFTIRQ_RESTART; | ||
215 | 221 | ||
216 | /* | 222 | /* |
217 | * Mask out PF_MEMALLOC s current task context is borrowed for the | 223 | * Mask out PF_MEMALLOC s current task context is borrowed for the |
@@ -265,7 +271,8 @@ restart: | |||
265 | 271 | ||
266 | pending = local_softirq_pending(); | 272 | pending = local_softirq_pending(); |
267 | if (pending) { | 273 | if (pending) { |
268 | if (time_before(jiffies, end) && !need_resched()) | 274 | if (time_before(jiffies, end) && !need_resched() && |
275 | --max_restart) | ||
269 | goto restart; | 276 | goto restart; |
270 | 277 | ||
271 | wakeup_softirqd(); | 278 | wakeup_softirqd(); |
diff --git a/kernel/sys.c b/kernel/sys.c index b95d3c72ba21..2bbd9a73b54c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -362,6 +362,29 @@ int unregister_reboot_notifier(struct notifier_block *nb) | |||
362 | } | 362 | } |
363 | EXPORT_SYMBOL(unregister_reboot_notifier); | 363 | EXPORT_SYMBOL(unregister_reboot_notifier); |
364 | 364 | ||
365 | /* Add backwards compatibility for stable trees. */ | ||
366 | #ifndef PF_NO_SETAFFINITY | ||
367 | #define PF_NO_SETAFFINITY PF_THREAD_BOUND | ||
368 | #endif | ||
369 | |||
370 | static void migrate_to_reboot_cpu(void) | ||
371 | { | ||
372 | /* The boot cpu is always logical cpu 0 */ | ||
373 | int cpu = 0; | ||
374 | |||
375 | cpu_hotplug_disable(); | ||
376 | |||
377 | /* Make certain the cpu I'm about to reboot on is online */ | ||
378 | if (!cpu_online(cpu)) | ||
379 | cpu = cpumask_first(cpu_online_mask); | ||
380 | |||
381 | /* Prevent races with other tasks migrating this task */ | ||
382 | current->flags |= PF_NO_SETAFFINITY; | ||
383 | |||
384 | /* Make certain I only run on the appropriate processor */ | ||
385 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | ||
386 | } | ||
387 | |||
365 | /** | 388 | /** |
366 | * kernel_restart - reboot the system | 389 | * kernel_restart - reboot the system |
367 | * @cmd: pointer to buffer containing command to execute for restart | 390 | * @cmd: pointer to buffer containing command to execute for restart |
@@ -373,7 +396,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); | |||
373 | void kernel_restart(char *cmd) | 396 | void kernel_restart(char *cmd) |
374 | { | 397 | { |
375 | kernel_restart_prepare(cmd); | 398 | kernel_restart_prepare(cmd); |
376 | disable_nonboot_cpus(); | 399 | migrate_to_reboot_cpu(); |
377 | syscore_shutdown(); | 400 | syscore_shutdown(); |
378 | if (!cmd) | 401 | if (!cmd) |
379 | printk(KERN_EMERG "Restarting system.\n"); | 402 | printk(KERN_EMERG "Restarting system.\n"); |
@@ -400,7 +423,7 @@ static void kernel_shutdown_prepare(enum system_states state) | |||
400 | void kernel_halt(void) | 423 | void kernel_halt(void) |
401 | { | 424 | { |
402 | kernel_shutdown_prepare(SYSTEM_HALT); | 425 | kernel_shutdown_prepare(SYSTEM_HALT); |
403 | disable_nonboot_cpus(); | 426 | migrate_to_reboot_cpu(); |
404 | syscore_shutdown(); | 427 | syscore_shutdown(); |
405 | printk(KERN_EMERG "System halted.\n"); | 428 | printk(KERN_EMERG "System halted.\n"); |
406 | kmsg_dump(KMSG_DUMP_HALT); | 429 | kmsg_dump(KMSG_DUMP_HALT); |
@@ -419,7 +442,7 @@ void kernel_power_off(void) | |||
419 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | 442 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); |
420 | if (pm_power_off_prepare) | 443 | if (pm_power_off_prepare) |
421 | pm_power_off_prepare(); | 444 | pm_power_off_prepare(); |
422 | disable_nonboot_cpus(); | 445 | migrate_to_reboot_cpu(); |
423 | syscore_shutdown(); | 446 | syscore_shutdown(); |
424 | printk(KERN_EMERG "Power down.\n"); | 447 | printk(KERN_EMERG "Power down.\n"); |
425 | kmsg_dump(KMSG_DUMP_POWEROFF); | 448 | kmsg_dump(KMSG_DUMP_POWEROFF); |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 12ff13a838c6..8f5b3b98577b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -874,7 +874,6 @@ static void hardpps_update_phase(long error) | |||
874 | void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) | 874 | void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) |
875 | { | 875 | { |
876 | struct pps_normtime pts_norm, freq_norm; | 876 | struct pps_normtime pts_norm, freq_norm; |
877 | unsigned long flags; | ||
878 | 877 | ||
879 | pts_norm = pps_normalize_ts(*phase_ts); | 878 | pts_norm = pps_normalize_ts(*phase_ts); |
880 | 879 | ||
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 24938d577669..b4c245580b79 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -511,6 +511,12 @@ again: | |||
511 | } | 511 | } |
512 | } | 512 | } |
513 | 513 | ||
514 | /* | ||
515 | * Remove the current cpu from the pending mask. The event is | ||
516 | * delivered immediately in tick_do_broadcast() ! | ||
517 | */ | ||
518 | cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); | ||
519 | |||
514 | /* Take care of enforced broadcast requests */ | 520 | /* Take care of enforced broadcast requests */ |
515 | cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); | 521 | cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); |
516 | cpumask_clear(tick_broadcast_force_mask); | 522 | cpumask_clear(tick_broadcast_force_mask); |
@@ -575,8 +581,8 @@ void tick_broadcast_oneshot_control(unsigned long reason) | |||
575 | 581 | ||
576 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); | 582 | raw_spin_lock_irqsave(&tick_broadcast_lock, flags); |
577 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { | 583 | if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { |
578 | WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); | ||
579 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { | 584 | if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { |
585 | WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); | ||
580 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); | 586 | clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); |
581 | /* | 587 | /* |
582 | * We only reprogram the broadcast timer if we | 588 | * We only reprogram the broadcast timer if we |
@@ -692,10 +698,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | |||
692 | 698 | ||
693 | bc->event_handler = tick_handle_oneshot_broadcast; | 699 | bc->event_handler = tick_handle_oneshot_broadcast; |
694 | 700 | ||
695 | /* Take the do_timer update */ | ||
696 | if (!tick_nohz_full_cpu(cpu)) | ||
697 | tick_do_timer_cpu = cpu; | ||
698 | |||
699 | /* | 701 | /* |
700 | * We must be careful here. There might be other CPUs | 702 | * We must be careful here. There might be other CPUs |
701 | * waiting for periodic broadcast. We need to set the | 703 | * waiting for periodic broadcast. We need to set the |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f4208138fbf4..0cf1c1453181 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -306,7 +306,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | |||
306 | * we can't safely shutdown that CPU. | 306 | * we can't safely shutdown that CPU. |
307 | */ | 307 | */ |
308 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | 308 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) |
309 | return -EINVAL; | 309 | return NOTIFY_BAD; |
310 | break; | 310 | break; |
311 | } | 311 | } |
312 | return NOTIFY_OK; | 312 | return NOTIFY_OK; |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 98cd470bbe49..baeeb5c87cf1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -975,6 +975,14 @@ static int timekeeping_suspend(void) | |||
975 | 975 | ||
976 | read_persistent_clock(&timekeeping_suspend_time); | 976 | read_persistent_clock(&timekeeping_suspend_time); |
977 | 977 | ||
978 | /* | ||
979 | * On some systems the persistent_clock can not be detected at | ||
980 | * timekeeping_init by its return value, so if we see a valid | ||
981 | * value returned, update the persistent_clock_exists flag. | ||
982 | */ | ||
983 | if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) | ||
984 | persistent_clock_exist = true; | ||
985 | |||
978 | raw_spin_lock_irqsave(&timekeeper_lock, flags); | 986 | raw_spin_lock_irqsave(&timekeeper_lock, flags); |
979 | write_seqcount_begin(&timekeeper_seq); | 987 | write_seqcount_begin(&timekeeper_seq); |
980 | timekeeping_forward_now(tk); | 988 | timekeeping_forward_now(tk); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b549b0f5b977..6c508ff33c62 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -120,22 +120,22 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); | |||
120 | 120 | ||
121 | /* | 121 | /* |
122 | * Traverse the ftrace_global_list, invoking all entries. The reason that we | 122 | * Traverse the ftrace_global_list, invoking all entries. The reason that we |
123 | * can use rcu_dereference_raw() is that elements removed from this list | 123 | * can use rcu_dereference_raw_notrace() is that elements removed from this list |
124 | * are simply leaked, so there is no need to interact with a grace-period | 124 | * are simply leaked, so there is no need to interact with a grace-period |
125 | * mechanism. The rcu_dereference_raw() calls are needed to handle | 125 | * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle |
126 | * concurrent insertions into the ftrace_global_list. | 126 | * concurrent insertions into the ftrace_global_list. |
127 | * | 127 | * |
128 | * Silly Alpha and silly pointer-speculation compiler optimizations! | 128 | * Silly Alpha and silly pointer-speculation compiler optimizations! |
129 | */ | 129 | */ |
130 | #define do_for_each_ftrace_op(op, list) \ | 130 | #define do_for_each_ftrace_op(op, list) \ |
131 | op = rcu_dereference_raw(list); \ | 131 | op = rcu_dereference_raw_notrace(list); \ |
132 | do | 132 | do |
133 | 133 | ||
134 | /* | 134 | /* |
135 | * Optimized for just a single item in the list (as that is the normal case). | 135 | * Optimized for just a single item in the list (as that is the normal case). |
136 | */ | 136 | */ |
137 | #define while_for_each_ftrace_op(op) \ | 137 | #define while_for_each_ftrace_op(op) \ |
138 | while (likely(op = rcu_dereference_raw((op)->next)) && \ | 138 | while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \ |
139 | unlikely((op) != &ftrace_list_end)) | 139 | unlikely((op) != &ftrace_list_end)) |
140 | 140 | ||
141 | static inline void ftrace_ops_init(struct ftrace_ops *ops) | 141 | static inline void ftrace_ops_init(struct ftrace_ops *ops) |
@@ -779,7 +779,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) | |||
779 | if (hlist_empty(hhd)) | 779 | if (hlist_empty(hhd)) |
780 | return NULL; | 780 | return NULL; |
781 | 781 | ||
782 | hlist_for_each_entry_rcu(rec, hhd, node) { | 782 | hlist_for_each_entry_rcu_notrace(rec, hhd, node) { |
783 | if (rec->ip == ip) | 783 | if (rec->ip == ip) |
784 | return rec; | 784 | return rec; |
785 | } | 785 | } |
@@ -1165,7 +1165,7 @@ ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) | |||
1165 | 1165 | ||
1166 | hhd = &hash->buckets[key]; | 1166 | hhd = &hash->buckets[key]; |
1167 | 1167 | ||
1168 | hlist_for_each_entry_rcu(entry, hhd, hlist) { | 1168 | hlist_for_each_entry_rcu_notrace(entry, hhd, hlist) { |
1169 | if (entry->ip == ip) | 1169 | if (entry->ip == ip) |
1170 | return entry; | 1170 | return entry; |
1171 | } | 1171 | } |
@@ -1422,8 +1422,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) | |||
1422 | struct ftrace_hash *notrace_hash; | 1422 | struct ftrace_hash *notrace_hash; |
1423 | int ret; | 1423 | int ret; |
1424 | 1424 | ||
1425 | filter_hash = rcu_dereference_raw(ops->filter_hash); | 1425 | filter_hash = rcu_dereference_raw_notrace(ops->filter_hash); |
1426 | notrace_hash = rcu_dereference_raw(ops->notrace_hash); | 1426 | notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash); |
1427 | 1427 | ||
1428 | if ((ftrace_hash_empty(filter_hash) || | 1428 | if ((ftrace_hash_empty(filter_hash) || |
1429 | ftrace_lookup_ip(filter_hash, ip)) && | 1429 | ftrace_lookup_ip(filter_hash, ip)) && |
@@ -2920,7 +2920,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, | |||
2920 | * on the hash. rcu_read_lock is too dangerous here. | 2920 | * on the hash. rcu_read_lock is too dangerous here. |
2921 | */ | 2921 | */ |
2922 | preempt_disable_notrace(); | 2922 | preempt_disable_notrace(); |
2923 | hlist_for_each_entry_rcu(entry, hhd, node) { | 2923 | hlist_for_each_entry_rcu_notrace(entry, hhd, node) { |
2924 | if (entry->ip == ip) | 2924 | if (entry->ip == ip) |
2925 | entry->ops->func(ip, parent_ip, &entry->data); | 2925 | entry->ops->func(ip, parent_ip, &entry->data); |
2926 | } | 2926 | } |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4d79485b3237..e71a8be4a6ee 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -652,8 +652,6 @@ static struct { | |||
652 | ARCH_TRACE_CLOCKS | 652 | ARCH_TRACE_CLOCKS |
653 | }; | 653 | }; |
654 | 654 | ||
655 | int trace_clock_id; | ||
656 | |||
657 | /* | 655 | /* |
658 | * trace_parser_get_init - gets the buffer for trace parser | 656 | * trace_parser_get_init - gets the buffer for trace parser |
659 | */ | 657 | */ |
@@ -843,7 +841,15 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
843 | 841 | ||
844 | memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); | 842 | memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN); |
845 | max_data->pid = tsk->pid; | 843 | max_data->pid = tsk->pid; |
846 | max_data->uid = task_uid(tsk); | 844 | /* |
845 | * If tsk == current, then use current_uid(), as that does not use | ||
846 | * RCU. The irq tracer can be called out of RCU scope. | ||
847 | */ | ||
848 | if (tsk == current) | ||
849 | max_data->uid = current_uid(); | ||
850 | else | ||
851 | max_data->uid = task_uid(tsk); | ||
852 | |||
847 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; | 853 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; |
848 | max_data->policy = tsk->policy; | 854 | max_data->policy = tsk->policy; |
849 | max_data->rt_priority = tsk->rt_priority; | 855 | max_data->rt_priority = tsk->rt_priority; |
@@ -2818,7 +2824,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) | |||
2818 | iter->iter_flags |= TRACE_FILE_ANNOTATE; | 2824 | iter->iter_flags |= TRACE_FILE_ANNOTATE; |
2819 | 2825 | ||
2820 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ | 2826 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ |
2821 | if (trace_clocks[trace_clock_id].in_ns) | 2827 | if (trace_clocks[tr->clock_id].in_ns) |
2822 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; | 2828 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; |
2823 | 2829 | ||
2824 | /* stop the trace while dumping if we are not opening "snapshot" */ | 2830 | /* stop the trace while dumping if we are not opening "snapshot" */ |
@@ -3817,7 +3823,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
3817 | iter->iter_flags |= TRACE_FILE_LAT_FMT; | 3823 | iter->iter_flags |= TRACE_FILE_LAT_FMT; |
3818 | 3824 | ||
3819 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ | 3825 | /* Output in nanoseconds only if we are using a clock in nanoseconds. */ |
3820 | if (trace_clocks[trace_clock_id].in_ns) | 3826 | if (trace_clocks[tr->clock_id].in_ns) |
3821 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; | 3827 | iter->iter_flags |= TRACE_FILE_TIME_IN_NS; |
3822 | 3828 | ||
3823 | iter->cpu_file = tc->cpu; | 3829 | iter->cpu_file = tc->cpu; |
@@ -5087,7 +5093,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
5087 | cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); | 5093 | cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); |
5088 | trace_seq_printf(s, "bytes: %ld\n", cnt); | 5094 | trace_seq_printf(s, "bytes: %ld\n", cnt); |
5089 | 5095 | ||
5090 | if (trace_clocks[trace_clock_id].in_ns) { | 5096 | if (trace_clocks[tr->clock_id].in_ns) { |
5091 | /* local or global for trace_clock */ | 5097 | /* local or global for trace_clock */ |
5092 | t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); | 5098 | t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); |
5093 | usec_rem = do_div(t, USEC_PER_SEC); | 5099 | usec_rem = do_div(t, USEC_PER_SEC); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 711ca7d3e7f1..20572ed88c5c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -700,8 +700,6 @@ enum print_line_t print_trace_line(struct trace_iterator *iter); | |||
700 | 700 | ||
701 | extern unsigned long trace_flags; | 701 | extern unsigned long trace_flags; |
702 | 702 | ||
703 | extern int trace_clock_id; | ||
704 | |||
705 | /* Standard output formatting function used for function return traces */ | 703 | /* Standard output formatting function used for function return traces */ |
706 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 704 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
707 | 705 | ||
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 55e2cf66967b..2901e3b88590 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -1159,7 +1159,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) | |||
1159 | /* stop the tracing. */ | 1159 | /* stop the tracing. */ |
1160 | tracing_stop(); | 1160 | tracing_stop(); |
1161 | /* check the trace buffer */ | 1161 | /* check the trace buffer */ |
1162 | ret = trace_test_buffer(tr, &count); | 1162 | ret = trace_test_buffer(&tr->trace_buffer, &count); |
1163 | trace->reset(tr); | 1163 | trace->reset(tr); |
1164 | tracing_start(); | 1164 | tracing_start(); |
1165 | 1165 | ||