diff options
| author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-10-09 03:02:35 -0400 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-10-09 03:02:35 -0400 |
| commit | 1236d6bb6e19fc72ffc6bbcdeb1bfefe450e54ee (patch) | |
| tree | 47da3feee8e263e8c9352c85cf518e624be3c211 /kernel | |
| parent | 750b1a6894ecc9b178c6e3d0a1170122971b2036 (diff) | |
| parent | 8a5776a5f49812d29fe4b2d0a2d71675c3facf3f (diff) | |
Merge 4.14-rc4 into staging-next
We want the staging/iio fixes in here as well to handle merge issues.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel')
37 files changed, 1324 insertions, 825 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 917cc04a0a94..7b62df86be1d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
| @@ -1022,7 +1022,7 @@ select_insn: | |||
| 1022 | struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; | 1022 | struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; |
| 1023 | struct bpf_array *array = container_of(map, struct bpf_array, map); | 1023 | struct bpf_array *array = container_of(map, struct bpf_array, map); |
| 1024 | struct bpf_prog *prog; | 1024 | struct bpf_prog *prog; |
| 1025 | u64 index = BPF_R3; | 1025 | u32 index = BPF_R3; |
| 1026 | 1026 | ||
| 1027 | if (unlikely(index >= array->map.max_entries)) | 1027 | if (unlikely(index >= array->map.max_entries)) |
| 1028 | goto out; | 1028 | goto out; |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 959c9a07f318..e093d9a2c4dd 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
| @@ -75,8 +75,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr) | |||
| 75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
| 76 | { | 76 | { |
| 77 | struct bpf_dtab *dtab; | 77 | struct bpf_dtab *dtab; |
| 78 | int err = -EINVAL; | ||
| 78 | u64 cost; | 79 | u64 cost; |
| 79 | int err; | ||
| 80 | 80 | ||
| 81 | /* check sanity of attributes */ | 81 | /* check sanity of attributes */ |
| 82 | if (attr->max_entries == 0 || attr->key_size != 4 || | 82 | if (attr->max_entries == 0 || attr->key_size != 4 || |
| @@ -108,6 +108,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
| 108 | if (err) | 108 | if (err) |
| 109 | goto free_dtab; | 109 | goto free_dtab; |
| 110 | 110 | ||
| 111 | err = -ENOMEM; | ||
| 112 | |||
| 111 | /* A per cpu bitfield with a bit per possible net device */ | 113 | /* A per cpu bitfield with a bit per possible net device */ |
| 112 | dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), | 114 | dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), |
| 113 | __alignof__(unsigned long)); | 115 | __alignof__(unsigned long)); |
| @@ -128,7 +130,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
| 128 | free_dtab: | 130 | free_dtab: |
| 129 | free_percpu(dtab->flush_needed); | 131 | free_percpu(dtab->flush_needed); |
| 130 | kfree(dtab); | 132 | kfree(dtab); |
| 131 | return ERR_PTR(-ENOMEM); | 133 | return ERR_PTR(err); |
| 132 | } | 134 | } |
| 133 | 135 | ||
| 134 | static void dev_map_free(struct bpf_map *map) | 136 | static void dev_map_free(struct bpf_map *map) |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cb17e1cd1d43..25d074920a00 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
| @@ -186,15 +186,17 @@ static int bpf_map_alloc_id(struct bpf_map *map) | |||
| 186 | 186 | ||
| 187 | static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) | 187 | static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) |
| 188 | { | 188 | { |
| 189 | unsigned long flags; | ||
| 190 | |||
| 189 | if (do_idr_lock) | 191 | if (do_idr_lock) |
| 190 | spin_lock_bh(&map_idr_lock); | 192 | spin_lock_irqsave(&map_idr_lock, flags); |
| 191 | else | 193 | else |
| 192 | __acquire(&map_idr_lock); | 194 | __acquire(&map_idr_lock); |
| 193 | 195 | ||
| 194 | idr_remove(&map_idr, map->id); | 196 | idr_remove(&map_idr, map->id); |
| 195 | 197 | ||
| 196 | if (do_idr_lock) | 198 | if (do_idr_lock) |
| 197 | spin_unlock_bh(&map_idr_lock); | 199 | spin_unlock_irqrestore(&map_idr_lock, flags); |
| 198 | else | 200 | else |
| 199 | __release(&map_idr_lock); | 201 | __release(&map_idr_lock); |
| 200 | } | 202 | } |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 799b2451ef2d..b914fbe1383e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -4205,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) | |||
| 4205 | } | 4205 | } |
| 4206 | 4206 | ||
| 4207 | if (insn->imm == BPF_FUNC_redirect_map) { | 4207 | if (insn->imm == BPF_FUNC_redirect_map) { |
| 4208 | u64 addr = (unsigned long)prog; | 4208 | /* Note, we cannot use prog directly as imm as subsequent |
| 4209 | * rewrites would still change the prog pointer. The only | ||
| 4210 | * stable address we can use is aux, which also works with | ||
| 4211 | * prog clones during blinding. | ||
| 4212 | */ | ||
| 4213 | u64 addr = (unsigned long)prog->aux; | ||
| 4209 | struct bpf_insn r4_ld[] = { | 4214 | struct bpf_insn r4_ld[] = { |
| 4210 | BPF_LD_IMM64(BPF_REG_4, addr), | 4215 | BPF_LD_IMM64(BPF_REG_4, addr), |
| 4211 | *insn, | 4216 | *insn, |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d6551cd45238..44857278eb8a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
| @@ -2311,6 +2311,14 @@ out_release_tset: | |||
| 2311 | list_del_init(&cset->mg_node); | 2311 | list_del_init(&cset->mg_node); |
| 2312 | } | 2312 | } |
| 2313 | spin_unlock_irq(&css_set_lock); | 2313 | spin_unlock_irq(&css_set_lock); |
| 2314 | |||
| 2315 | /* | ||
| 2316 | * Re-initialize the cgroup_taskset structure in case it is reused | ||
| 2317 | * again in another cgroup_migrate_add_task()/cgroup_migrate_execute() | ||
| 2318 | * iteration. | ||
| 2319 | */ | ||
| 2320 | tset->nr_tasks = 0; | ||
| 2321 | tset->csets = &tset->src_csets; | ||
| 2314 | return ret; | 2322 | return ret; |
| 2315 | } | 2323 | } |
| 2316 | 2324 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index acf5308fad51..d851df22f5c5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/lockdep.h> | 24 | #include <linux/lockdep.h> |
| 25 | #include <linux/tick.h> | 25 | #include <linux/tick.h> |
| 26 | #include <linux/irq.h> | 26 | #include <linux/irq.h> |
| 27 | #include <linux/nmi.h> | ||
| 27 | #include <linux/smpboot.h> | 28 | #include <linux/smpboot.h> |
| 28 | #include <linux/relay.h> | 29 | #include <linux/relay.h> |
| 29 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
| @@ -46,11 +47,13 @@ | |||
| 46 | * @bringup: Single callback bringup or teardown selector | 47 | * @bringup: Single callback bringup or teardown selector |
| 47 | * @cb_state: The state for a single callback (install/uninstall) | 48 | * @cb_state: The state for a single callback (install/uninstall) |
| 48 | * @result: Result of the operation | 49 | * @result: Result of the operation |
| 49 | * @done: Signal completion to the issuer of the task | 50 | * @done_up: Signal completion to the issuer of the task for cpu-up |
| 51 | * @done_down: Signal completion to the issuer of the task for cpu-down | ||
| 50 | */ | 52 | */ |
| 51 | struct cpuhp_cpu_state { | 53 | struct cpuhp_cpu_state { |
| 52 | enum cpuhp_state state; | 54 | enum cpuhp_state state; |
| 53 | enum cpuhp_state target; | 55 | enum cpuhp_state target; |
| 56 | enum cpuhp_state fail; | ||
| 54 | #ifdef CONFIG_SMP | 57 | #ifdef CONFIG_SMP |
| 55 | struct task_struct *thread; | 58 | struct task_struct *thread; |
| 56 | bool should_run; | 59 | bool should_run; |
| @@ -58,18 +61,39 @@ struct cpuhp_cpu_state { | |||
| 58 | bool single; | 61 | bool single; |
| 59 | bool bringup; | 62 | bool bringup; |
| 60 | struct hlist_node *node; | 63 | struct hlist_node *node; |
| 64 | struct hlist_node *last; | ||
| 61 | enum cpuhp_state cb_state; | 65 | enum cpuhp_state cb_state; |
| 62 | int result; | 66 | int result; |
| 63 | struct completion done; | 67 | struct completion done_up; |
| 68 | struct completion done_down; | ||
| 64 | #endif | 69 | #endif |
| 65 | }; | 70 | }; |
| 66 | 71 | ||
| 67 | static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state); | 72 | static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = { |
| 73 | .fail = CPUHP_INVALID, | ||
| 74 | }; | ||
| 68 | 75 | ||
| 69 | #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) | 76 | #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) |
| 70 | static struct lock_class_key cpuhp_state_key; | 77 | static struct lockdep_map cpuhp_state_up_map = |
| 71 | static struct lockdep_map cpuhp_state_lock_map = | 78 | STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); |
| 72 | STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key); | 79 | static struct lockdep_map cpuhp_state_down_map = |
| 80 | STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); | ||
| 81 | |||
| 82 | |||
| 83 | static void inline cpuhp_lock_acquire(bool bringup) | ||
| 84 | { | ||
| 85 | lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); | ||
| 86 | } | ||
| 87 | |||
| 88 | static void inline cpuhp_lock_release(bool bringup) | ||
| 89 | { | ||
| 90 | lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); | ||
| 91 | } | ||
| 92 | #else | ||
| 93 | |||
| 94 | static void inline cpuhp_lock_acquire(bool bringup) { } | ||
| 95 | static void inline cpuhp_lock_release(bool bringup) { } | ||
| 96 | |||
| 73 | #endif | 97 | #endif |
| 74 | 98 | ||
| 75 | /** | 99 | /** |
| @@ -123,13 +147,16 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) | |||
| 123 | /** | 147 | /** |
| 124 | * cpuhp_invoke_callback _ Invoke the callbacks for a given state | 148 | * cpuhp_invoke_callback _ Invoke the callbacks for a given state |
| 125 | * @cpu: The cpu for which the callback should be invoked | 149 | * @cpu: The cpu for which the callback should be invoked |
| 126 | * @step: The step in the state machine | 150 | * @state: The state to do callbacks for |
| 127 | * @bringup: True if the bringup callback should be invoked | 151 | * @bringup: True if the bringup callback should be invoked |
| 152 | * @node: For multi-instance, do a single entry callback for install/remove | ||
| 153 | * @lastp: For multi-instance rollback, remember how far we got | ||
| 128 | * | 154 | * |
| 129 | * Called from cpu hotplug and from the state register machinery. | 155 | * Called from cpu hotplug and from the state register machinery. |
| 130 | */ | 156 | */ |
| 131 | static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, | 157 | static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, |
| 132 | bool bringup, struct hlist_node *node) | 158 | bool bringup, struct hlist_node *node, |
| 159 | struct hlist_node **lastp) | ||
| 133 | { | 160 | { |
| 134 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | 161 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
| 135 | struct cpuhp_step *step = cpuhp_get_step(state); | 162 | struct cpuhp_step *step = cpuhp_get_step(state); |
| @@ -137,7 +164,17 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, | |||
| 137 | int (*cb)(unsigned int cpu); | 164 | int (*cb)(unsigned int cpu); |
| 138 | int ret, cnt; | 165 | int ret, cnt; |
| 139 | 166 | ||
| 167 | if (st->fail == state) { | ||
| 168 | st->fail = CPUHP_INVALID; | ||
| 169 | |||
| 170 | if (!(bringup ? step->startup.single : step->teardown.single)) | ||
| 171 | return 0; | ||
| 172 | |||
| 173 | return -EAGAIN; | ||
| 174 | } | ||
| 175 | |||
| 140 | if (!step->multi_instance) { | 176 | if (!step->multi_instance) { |
| 177 | WARN_ON_ONCE(lastp && *lastp); | ||
| 141 | cb = bringup ? step->startup.single : step->teardown.single; | 178 | cb = bringup ? step->startup.single : step->teardown.single; |
| 142 | if (!cb) | 179 | if (!cb) |
| 143 | return 0; | 180 | return 0; |
| @@ -152,6 +189,7 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, | |||
| 152 | 189 | ||
| 153 | /* Single invocation for instance add/remove */ | 190 | /* Single invocation for instance add/remove */ |
| 154 | if (node) { | 191 | if (node) { |
| 192 | WARN_ON_ONCE(lastp && *lastp); | ||
| 155 | trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); | 193 | trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); |
| 156 | ret = cbm(cpu, node); | 194 | ret = cbm(cpu, node); |
| 157 | trace_cpuhp_exit(cpu, st->state, state, ret); | 195 | trace_cpuhp_exit(cpu, st->state, state, ret); |
| @@ -161,13 +199,23 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, | |||
| 161 | /* State transition. Invoke on all instances */ | 199 | /* State transition. Invoke on all instances */ |
| 162 | cnt = 0; | 200 | cnt = 0; |
| 163 | hlist_for_each(node, &step->list) { | 201 | hlist_for_each(node, &step->list) { |
| 202 | if (lastp && node == *lastp) | ||
| 203 | break; | ||
| 204 | |||
| 164 | trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); | 205 | trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); |
| 165 | ret = cbm(cpu, node); | 206 | ret = cbm(cpu, node); |
| 166 | trace_cpuhp_exit(cpu, st->state, state, ret); | 207 | trace_cpuhp_exit(cpu, st->state, state, ret); |
| 167 | if (ret) | 208 | if (ret) { |
| 168 | goto err; | 209 | if (!lastp) |
| 210 | goto err; | ||
| 211 | |||
| 212 | *lastp = node; | ||
| 213 | return ret; | ||
| 214 | } | ||
| 169 | cnt++; | 215 | cnt++; |
| 170 | } | 216 | } |
| 217 | if (lastp) | ||
| 218 | *lastp = NULL; | ||
| 171 | return 0; | 219 | return 0; |
| 172 | err: | 220 | err: |
| 173 | /* Rollback the instances if one failed */ | 221 | /* Rollback the instances if one failed */ |
| @@ -178,12 +226,39 @@ err: | |||
| 178 | hlist_for_each(node, &step->list) { | 226 | hlist_for_each(node, &step->list) { |
| 179 | if (!cnt--) | 227 | if (!cnt--) |
| 180 | break; | 228 | break; |
| 181 | cbm(cpu, node); | 229 | |
| 230 | trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); | ||
| 231 | ret = cbm(cpu, node); | ||
| 232 | trace_cpuhp_exit(cpu, st->state, state, ret); | ||
| 233 | /* | ||
| 234 | * Rollback must not fail, | ||
| 235 | */ | ||
| 236 | WARN_ON_ONCE(ret); | ||
| 182 | } | 237 | } |
| 183 | return ret; | 238 | return ret; |
| 184 | } | 239 | } |
| 185 | 240 | ||
| 186 | #ifdef CONFIG_SMP | 241 | #ifdef CONFIG_SMP |
| 242 | static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup) | ||
| 243 | { | ||
| 244 | struct completion *done = bringup ? &st->done_up : &st->done_down; | ||
| 245 | wait_for_completion(done); | ||
| 246 | } | ||
| 247 | |||
| 248 | static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup) | ||
| 249 | { | ||
| 250 | struct completion *done = bringup ? &st->done_up : &st->done_down; | ||
| 251 | complete(done); | ||
| 252 | } | ||
| 253 | |||
| 254 | /* | ||
| 255 | * The former STARTING/DYING states, ran with IRQs disabled and must not fail. | ||
| 256 | */ | ||
| 257 | static bool cpuhp_is_atomic_state(enum cpuhp_state state) | ||
| 258 | { | ||
| 259 | return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; | ||
| 260 | } | ||
| 261 | |||
| 187 | /* Serializes the updates to cpu_online_mask, cpu_present_mask */ | 262 | /* Serializes the updates to cpu_online_mask, cpu_present_mask */ |
| 188 | static DEFINE_MUTEX(cpu_add_remove_lock); | 263 | static DEFINE_MUTEX(cpu_add_remove_lock); |
| 189 | bool cpuhp_tasks_frozen; | 264 | bool cpuhp_tasks_frozen; |
| @@ -271,14 +346,79 @@ void cpu_hotplug_enable(void) | |||
| 271 | EXPORT_SYMBOL_GPL(cpu_hotplug_enable); | 346 | EXPORT_SYMBOL_GPL(cpu_hotplug_enable); |
| 272 | #endif /* CONFIG_HOTPLUG_CPU */ | 347 | #endif /* CONFIG_HOTPLUG_CPU */ |
| 273 | 348 | ||
| 274 | static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st); | 349 | static inline enum cpuhp_state |
| 350 | cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) | ||
| 351 | { | ||
| 352 | enum cpuhp_state prev_state = st->state; | ||
| 353 | |||
| 354 | st->rollback = false; | ||
| 355 | st->last = NULL; | ||
| 356 | |||
| 357 | st->target = target; | ||
| 358 | st->single = false; | ||
| 359 | st->bringup = st->state < target; | ||
| 360 | |||
| 361 | return prev_state; | ||
| 362 | } | ||
| 363 | |||
| 364 | static inline void | ||
| 365 | cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) | ||
| 366 | { | ||
| 367 | st->rollback = true; | ||
| 368 | |||
| 369 | /* | ||
| 370 | * If we have st->last we need to undo partial multi_instance of this | ||
| 371 | * state first. Otherwise start undo at the previous state. | ||
| 372 | */ | ||
| 373 | if (!st->last) { | ||
| 374 | if (st->bringup) | ||
| 375 | st->state--; | ||
| 376 | else | ||
| 377 | st->state++; | ||
| 378 | } | ||
| 379 | |||
| 380 | st->target = prev_state; | ||
| 381 | st->bringup = !st->bringup; | ||
| 382 | } | ||
| 383 | |||
| 384 | /* Regular hotplug invocation of the AP hotplug thread */ | ||
| 385 | static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) | ||
| 386 | { | ||
| 387 | if (!st->single && st->state == st->target) | ||
| 388 | return; | ||
| 389 | |||
| 390 | st->result = 0; | ||
| 391 | /* | ||
| 392 | * Make sure the above stores are visible before should_run becomes | ||
| 393 | * true. Paired with the mb() above in cpuhp_thread_fun() | ||
| 394 | */ | ||
| 395 | smp_mb(); | ||
| 396 | st->should_run = true; | ||
| 397 | wake_up_process(st->thread); | ||
| 398 | wait_for_ap_thread(st, st->bringup); | ||
| 399 | } | ||
| 400 | |||
| 401 | static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) | ||
| 402 | { | ||
| 403 | enum cpuhp_state prev_state; | ||
| 404 | int ret; | ||
| 405 | |||
| 406 | prev_state = cpuhp_set_state(st, target); | ||
| 407 | __cpuhp_kick_ap(st); | ||
| 408 | if ((ret = st->result)) { | ||
| 409 | cpuhp_reset_state(st, prev_state); | ||
| 410 | __cpuhp_kick_ap(st); | ||
| 411 | } | ||
| 412 | |||
| 413 | return ret; | ||
| 414 | } | ||
| 275 | 415 | ||
| 276 | static int bringup_wait_for_ap(unsigned int cpu) | 416 | static int bringup_wait_for_ap(unsigned int cpu) |
| 277 | { | 417 | { |
| 278 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | 418 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
| 279 | 419 | ||
| 280 | /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ | 420 | /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ |
| 281 | wait_for_completion(&st->done); | 421 | wait_for_ap_thread(st, true); |
| 282 | if (WARN_ON_ONCE((!cpu_online(cpu)))) | 422 | if (WARN_ON_ONCE((!cpu_online(cpu)))) |
| 283 | return -ECANCELED; | 423 | return -ECANCELED; |
| 284 | 424 | ||
| @@ -286,12 +426,10 @@ static int bringup_wait_for_ap(unsigned int cpu) | |||
| 286 | stop_machine_unpark(cpu); | 426 | stop_machine_unpark(cpu); |
| 287 | kthread_unpark(st->thread); | 427 | kthread_unpark(st->thread); |
| 288 | 428 | ||
| 289 | /* Should we go further up ? */ | 429 | if (st->target <= CPUHP_AP_ONLINE_IDLE) |
| 290 | if (st->target > CPUHP_AP_ONLINE_IDLE) { | 430 | return 0; |
| 291 | __cpuhp_kick_ap_work(st); | 431 | |
| 292 | wait_for_completion(&st->done); | 432 | return cpuhp_kick_ap(st, st->target); |
| 293 | } | ||
| 294 | return st->result; | ||
| 295 | } | 433 | } |
| 296 | 434 | ||
| 297 | static int bringup_cpu(unsigned int cpu) | 435 | static int bringup_cpu(unsigned int cpu) |
| @@ -317,32 +455,6 @@ static int bringup_cpu(unsigned int cpu) | |||
| 317 | /* | 455 | /* |
| 318 | * Hotplug state machine related functions | 456 | * Hotplug state machine related functions |
| 319 | */ | 457 | */ |
| 320 | static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) | ||
| 321 | { | ||
| 322 | for (st->state++; st->state < st->target; st->state++) { | ||
| 323 | struct cpuhp_step *step = cpuhp_get_step(st->state); | ||
| 324 | |||
| 325 | if (!step->skip_onerr) | ||
| 326 | cpuhp_invoke_callback(cpu, st->state, true, NULL); | ||
| 327 | } | ||
| 328 | } | ||
| 329 | |||
| 330 | static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, | ||
| 331 | enum cpuhp_state target) | ||
| 332 | { | ||
| 333 | enum cpuhp_state prev_state = st->state; | ||
| 334 | int ret = 0; | ||
| 335 | |||
| 336 | for (; st->state > target; st->state--) { | ||
| 337 | ret = cpuhp_invoke_callback(cpu, st->state, false, NULL); | ||
| 338 | if (ret) { | ||
| 339 | st->target = prev_state; | ||
| 340 | undo_cpu_down(cpu, st); | ||
| 341 | break; | ||
| 342 | } | ||
| 343 | } | ||
| 344 | return ret; | ||
| 345 | } | ||
| 346 | 458 | ||
| 347 | static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) | 459 | static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) |
| 348 | { | 460 | { |
| @@ -350,7 +462,7 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) | |||
| 350 | struct cpuhp_step *step = cpuhp_get_step(st->state); | 462 | struct cpuhp_step *step = cpuhp_get_step(st->state); |
| 351 | 463 | ||
| 352 | if (!step->skip_onerr) | 464 | if (!step->skip_onerr) |
| 353 | cpuhp_invoke_callback(cpu, st->state, false, NULL); | 465 | cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); |
| 354 | } | 466 | } |
| 355 | } | 467 | } |
| 356 | 468 | ||
| @@ -362,7 +474,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, | |||
| 362 | 474 | ||
| 363 | while (st->state < target) { | 475 | while (st->state < target) { |
| 364 | st->state++; | 476 | st->state++; |
| 365 | ret = cpuhp_invoke_callback(cpu, st->state, true, NULL); | 477 | ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); |
| 366 | if (ret) { | 478 | if (ret) { |
| 367 | st->target = prev_state; | 479 | st->target = prev_state; |
| 368 | undo_cpu_up(cpu, st); | 480 | undo_cpu_up(cpu, st); |
| @@ -379,7 +491,8 @@ static void cpuhp_create(unsigned int cpu) | |||
| 379 | { | 491 | { |
| 380 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | 492 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
| 381 | 493 | ||
| 382 | init_completion(&st->done); | 494 | init_completion(&st->done_up); |
| 495 | init_completion(&st->done_down); | ||
| 383 | } | 496 | } |
| 384 | 497 | ||
| 385 | static int cpuhp_should_run(unsigned int cpu) | 498 | static int cpuhp_should_run(unsigned int cpu) |
| @@ -389,69 +502,90 @@ static int cpuhp_should_run(unsigned int cpu) | |||
| 389 | return st->should_run; | 502 | return st->should_run; |
| 390 | } | 503 | } |
| 391 | 504 | ||
| 392 | /* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */ | ||
| 393 | static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st) | ||
| 394 | { | ||
| 395 | enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU); | ||
| 396 | |||
| 397 | return cpuhp_down_callbacks(cpu, st, target); | ||
| 398 | } | ||
| 399 | |||
| 400 | /* Execute the online startup callbacks. Used to be CPU_ONLINE */ | ||
| 401 | static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st) | ||
| 402 | { | ||
| 403 | return cpuhp_up_callbacks(cpu, st, st->target); | ||
| 404 | } | ||
| 405 | |||
| 406 | /* | 505 | /* |
| 407 | * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke | 506 | * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke |
| 408 | * callbacks when a state gets [un]installed at runtime. | 507 | * callbacks when a state gets [un]installed at runtime. |
| 508 | * | ||
| 509 | * Each invocation of this function by the smpboot thread does a single AP | ||
| 510 | * state callback. | ||
| 511 | * | ||
| 512 | * It has 3 modes of operation: | ||
| 513 | * - single: runs st->cb_state | ||
| 514 | * - up: runs ++st->state, while st->state < st->target | ||
| 515 | * - down: runs st->state--, while st->state > st->target | ||
| 516 | * | ||
| 517 | * When complete or on error, should_run is cleared and the completion is fired. | ||
| 409 | */ | 518 | */ |
| 410 | static void cpuhp_thread_fun(unsigned int cpu) | 519 | static void cpuhp_thread_fun(unsigned int cpu) |
| 411 | { | 520 | { |
| 412 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); | 521 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); |
| 413 | int ret = 0; | 522 | bool bringup = st->bringup; |
| 523 | enum cpuhp_state state; | ||
| 414 | 524 | ||
| 415 | /* | 525 | /* |
| 416 | * Paired with the mb() in cpuhp_kick_ap_work and | 526 | * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures |
| 417 | * cpuhp_invoke_ap_callback, so the work set is consistent visible. | 527 | * that if we see ->should_run we also see the rest of the state. |
| 418 | */ | 528 | */ |
| 419 | smp_mb(); | 529 | smp_mb(); |
| 420 | if (!st->should_run) | 530 | |
| 531 | if (WARN_ON_ONCE(!st->should_run)) | ||
| 421 | return; | 532 | return; |
| 422 | 533 | ||
| 423 | st->should_run = false; | 534 | cpuhp_lock_acquire(bringup); |
| 424 | 535 | ||
| 425 | lock_map_acquire(&cpuhp_state_lock_map); | ||
| 426 | /* Single callback invocation for [un]install ? */ | ||
| 427 | if (st->single) { | 536 | if (st->single) { |
| 428 | if (st->cb_state < CPUHP_AP_ONLINE) { | 537 | state = st->cb_state; |
| 429 | local_irq_disable(); | 538 | st->should_run = false; |
| 430 | ret = cpuhp_invoke_callback(cpu, st->cb_state, | 539 | } else { |
| 431 | st->bringup, st->node); | 540 | if (bringup) { |
| 432 | local_irq_enable(); | 541 | st->state++; |
| 542 | state = st->state; | ||
| 543 | st->should_run = (st->state < st->target); | ||
| 544 | WARN_ON_ONCE(st->state > st->target); | ||
| 433 | } else { | 545 | } else { |
| 434 | ret = cpuhp_invoke_callback(cpu, st->cb_state, | 546 | state = st->state; |
| 435 | st->bringup, st->node); | 547 | st->state--; |
| 548 | st->should_run = (st->state > st->target); | ||
| 549 | WARN_ON_ONCE(st->state < st->target); | ||
| 436 | } | 550 | } |
| 437 | } else if (st->rollback) { | 551 | } |
| 438 | BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); | 552 | |
| 553 | WARN_ON_ONCE(!cpuhp_is_ap_state(state)); | ||
| 554 | |||
| 555 | if (st->rollback) { | ||
| 556 | struct cpuhp_step *step = cpuhp_get_step(state); | ||
| 557 | if (step->skip_onerr) | ||
| 558 | goto next; | ||
| 559 | } | ||
| 560 | |||
| 561 | if (cpuhp_is_atomic_state(state)) { | ||
| 562 | local_irq_disable(); | ||
| 563 | st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); | ||
| 564 | local_irq_enable(); | ||
| 439 | 565 | ||
| 440 | undo_cpu_down(cpu, st); | 566 | /* |
| 441 | st->rollback = false; | 567 | * STARTING/DYING must not fail! |
| 568 | */ | ||
| 569 | WARN_ON_ONCE(st->result); | ||
| 442 | } else { | 570 | } else { |
| 443 | /* Cannot happen .... */ | 571 | st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); |
| 444 | BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); | 572 | } |
| 445 | 573 | ||
| 446 | /* Regular hotplug work */ | 574 | if (st->result) { |
| 447 | if (st->state < st->target) | 575 | /* |
| 448 | ret = cpuhp_ap_online(cpu, st); | 576 | * If we fail on a rollback, we're up a creek without no |
| 449 | else if (st->state > st->target) | 577 | * paddle, no way forward, no way back. We loose, thanks for |
| 450 | ret = cpuhp_ap_offline(cpu, st); | 578 | * playing. |
| 579 | */ | ||
| 580 | WARN_ON_ONCE(st->rollback); | ||
| 581 | st->should_run = false; | ||
| 451 | } | 582 | } |
| 452 | lock_map_release(&cpuhp_state_lock_map); | 583 | |
| 453 | st->result = ret; | 584 | next: |
| 454 | complete(&st->done); | 585 | cpuhp_lock_release(bringup); |
| 586 | |||
| 587 | if (!st->should_run) | ||
| 588 | complete_ap_thread(st, bringup); | ||
| 455 | } | 589 | } |
| 456 | 590 | ||
| 457 | /* Invoke a single callback on a remote cpu */ | 591 | /* Invoke a single callback on a remote cpu */ |
| @@ -460,62 +594,64 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, | |||
| 460 | struct hlist_node *node) | 594 | struct hlist_node *node) |
| 461 | { | 595 | { |
| 462 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | 596 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
| 597 | int ret; | ||
| 463 | 598 | ||
| 464 | if (!cpu_online(cpu)) | 599 | if (!cpu_online(cpu)) |
| 465 | return 0; | 600 | return 0; |
| 466 | 601 | ||
| 467 | lock_map_acquire(&cpuhp_state_lock_map); | 602 | cpuhp_lock_acquire(false); |
| 468 | lock_map_release(&cpuhp_state_lock_map); | 603 | cpuhp_lock_release(false); |
| 604 | |||
| 605 | cpuhp_lock_acquire(true); | ||
| 606 | cpuhp_lock_release(true); | ||
| 469 | 607 | ||
| 470 | /* | 608 | /* |
| 471 | * If we are up and running, use the hotplug thread. For early calls | 609 | * If we are up and running, use the hotplug thread. For early calls |
| 472 | * we invoke the thread function directly. | 610 | * we invoke the thread function directly. |
| 473 | */ | 611 | */ |
| 474 | if (!st->thread) | 612 | if (!st->thread) |
| 475 | return cpuhp_invoke_callback(cpu, state, bringup, node); | 613 | return cpuhp_invoke_callback(cpu, state, bringup, node, NULL); |
| 614 | |||
| 615 | st->rollback = false; | ||
| 616 | st->last = NULL; | ||
| 476 | 617 | ||
| 618 | st->node = node; | ||
| 619 | st->bringup = bringup; | ||
| 477 | st->cb_state = state; | 620 | st->cb_state = state; |
| 478 | st->single = true; | 621 | st->single = true; |
| 479 | st->bringup = bringup; | ||
| 480 | st->node = node; | ||
| 481 | 622 | ||
| 482 | /* | 623 | __cpuhp_kick_ap(st); |
| 483 | * Make sure the above stores are visible before should_run becomes | ||
| 484 | * true. Paired with the mb() above in cpuhp_thread_fun() | ||
| 485 | */ | ||
| 486 | smp_mb(); | ||
| 487 | st->should_run = true; | ||
| 488 | wake_up_process(st->thread); | ||
| 489 | wait_for_completion(&st->done); | ||
| 490 | return st->result; | ||
| 491 | } | ||
| 492 | 624 | ||
| 493 | /* Regular hotplug invocation of the AP hotplug thread */ | ||
| 494 | static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st) | ||
| 495 | { | ||
| 496 | st->result = 0; | ||
| 497 | st->single = false; | ||
| 498 | /* | 625 | /* |
| 499 | * Make sure the above stores are visible before should_run becomes | 626 | * If we failed and did a partial, do a rollback. |
| 500 | * true. Paired with the mb() above in cpuhp_thread_fun() | ||
| 501 | */ | 627 | */ |
| 502 | smp_mb(); | 628 | if ((ret = st->result) && st->last) { |
| 503 | st->should_run = true; | 629 | st->rollback = true; |
| 504 | wake_up_process(st->thread); | 630 | st->bringup = !bringup; |
| 631 | |||
| 632 | __cpuhp_kick_ap(st); | ||
| 633 | } | ||
| 634 | |||
| 635 | return ret; | ||
| 505 | } | 636 | } |
| 506 | 637 | ||
| 507 | static int cpuhp_kick_ap_work(unsigned int cpu) | 638 | static int cpuhp_kick_ap_work(unsigned int cpu) |
| 508 | { | 639 | { |
| 509 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | 640 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
| 510 | enum cpuhp_state state = st->state; | 641 | enum cpuhp_state prev_state = st->state; |
| 642 | int ret; | ||
| 643 | |||
| 644 | cpuhp_lock_acquire(false); | ||
| 645 | cpuhp_lock_release(false); | ||
| 511 | 646 | ||
| 512 | trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work); | 647 | cpuhp_lock_acquire(true); |
| 513 | lock_map_acquire(&cpuhp_state_lock_map); | 648 | cpuhp_lock_release(true); |
| 514 | lock_map_release(&cpuhp_state_lock_map); | 649 | |
| 515 | __cpuhp_kick_ap_work(st); | 650 | trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); |
| 516 | wait_for_completion(&st->done); | 651 | ret = cpuhp_kick_ap(st, st->target); |
| 517 | trace_cpuhp_exit(cpu, st->state, state, st->result); | 652 | trace_cpuhp_exit(cpu, st->state, prev_state, ret); |
| 518 | return st->result; | 653 | |
| 654 | return ret; | ||
| 519 | } | 655 | } |
| 520 | 656 | ||
| 521 | static struct smp_hotplug_thread cpuhp_threads = { | 657 | static struct smp_hotplug_thread cpuhp_threads = { |
| @@ -581,6 +717,7 @@ static int take_cpu_down(void *_param) | |||
| 581 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); | 717 | struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); |
| 582 | enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); | 718 | enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); |
| 583 | int err, cpu = smp_processor_id(); | 719 | int err, cpu = smp_processor_id(); |
| 720 | int ret; | ||
| 584 | 721 | ||
| 585 | /* Ensure this CPU doesn't handle any more interrupts. */ | 722 | /* Ensure this CPU doesn't handle any more interrupts. */ |
| 586 | err = __cpu_disable(); | 723 | err = __cpu_disable(); |
| @@ -594,8 +731,13 @@ static int take_cpu_down(void *_param) | |||
| 594 | WARN_ON(st->state != CPUHP_TEARDOWN_CPU); | 731 | WARN_ON(st->state != CPUHP_TEARDOWN_CPU); |
| 595 | st->state--; | 732 | st->state--; |
| 596 | /* Invoke the former CPU_DYING callbacks */ | 733 | /* Invoke the former CPU_DYING callbacks */ |
| 597 | for (; st->state > target; st->state--) | 734 | for (; st->state > target; st->state--) { |
| 598 | cpuhp_invoke_callback(cpu, st->state, false, NULL); | 735 | ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); |
| 736 | /* | ||
| 737 | * DYING must not fail! | ||
| 738 | */ | ||
| 739 | WARN_ON_ONCE(ret); | ||
| 740 | } | ||
| 599 | 741 | ||
| 600 | /* Give up timekeeping duties */ | 742 | /* Give up timekeeping duties */ |
| 601 | tick_handover_do_timer(); | 743 | tick_handover_do_timer(); |
| @@ -639,7 +781,7 @@ static int takedown_cpu(unsigned int cpu) | |||
| 639 | * | 781 | * |
| 640 | * Wait for the stop thread to go away. | 782 | * Wait for the stop thread to go away. |
| 641 | */ | 783 | */ |
| 642 | wait_for_completion(&st->done); | 784 | wait_for_ap_thread(st, false); |
| 643 | BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); | 785 | BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); |
| 644 | 786 | ||
| 645 | /* Interrupts are moved away from the dying cpu, reenable alloc/free */ | 787 | /* Interrupts are moved away from the dying cpu, reenable alloc/free */ |
| @@ -658,7 +800,7 @@ static void cpuhp_complete_idle_dead(void *arg) | |||
| 658 | { | 800 | { |
| 659 | struct cpuhp_cpu_state *st = arg; | 801 | struct cpuhp_cpu_state *st = arg; |
| 660 | 802 | ||
| 661 | complete(&st->done); | 803 | complete_ap_thread(st, false); |
| 662 | } | 804 | } |
| 663 | 805 | ||
| 664 | void cpuhp_report_idle_dead(void) | 806 | void cpuhp_report_idle_dead(void) |
| @@ -676,11 +818,32 @@ void cpuhp_report_idle_dead(void) | |||
| 676 | cpuhp_complete_idle_dead, st, 0); | 818 | cpuhp_complete_idle_dead, st, 0); |
| 677 | } | 819 | } |
| 678 | 820 | ||
| 679 | #else | 821 | static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) |
| 680 | #define takedown_cpu NULL | 822 | { |
| 681 | #endif | 823 | for (st->state++; st->state < st->target; st->state++) { |
| 824 | struct cpuhp_step *step = cpuhp_get_step(st->state); | ||
| 682 | 825 | ||
| 683 | #ifdef CONFIG_HOTPLUG_CPU | 826 | if (!step->skip_onerr) |
| 827 | cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); | ||
| 828 | } | ||
| 829 | } | ||
| 830 | |||
| 831 | static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, | ||
| 832 | enum cpuhp_state target) | ||
| 833 | { | ||
| 834 | enum cpuhp_state prev_state = st->state; | ||
| 835 | int ret = 0; | ||
| 836 | |||
| 837 | for (; st->state > target; st->state--) { | ||
| 838 | ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); | ||
| 839 | if (ret) { | ||
| 840 | st->target = prev_state; | ||
| 841 | undo_cpu_down(cpu, st); | ||
| 842 | break; | ||
| 843 | } | ||
| 844 | } | ||
| 845 | return ret; | ||
| 846 | } | ||
| 684 | 847 | ||
| 685 | /* Requires cpu_add_remove_lock to be held */ | 848 | /* Requires cpu_add_remove_lock to be held */ |
| 686 | static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | 849 | static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, |
| @@ -699,13 +862,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |||
| 699 | 862 | ||
| 700 | cpuhp_tasks_frozen = tasks_frozen; | 863 | cpuhp_tasks_frozen = tasks_frozen; |
| 701 | 864 | ||
| 702 | prev_state = st->state; | 865 | prev_state = cpuhp_set_state(st, target); |
| 703 | st->target = target; | ||
| 704 | /* | 866 | /* |
| 705 | * If the current CPU state is in the range of the AP hotplug thread, | 867 | * If the current CPU state is in the range of the AP hotplug thread, |
| 706 | * then we need to kick the thread. | 868 | * then we need to kick the thread. |
| 707 | */ | 869 | */ |
| 708 | if (st->state > CPUHP_TEARDOWN_CPU) { | 870 | if (st->state > CPUHP_TEARDOWN_CPU) { |
| 871 | st->target = max((int)target, CPUHP_TEARDOWN_CPU); | ||
| 709 | ret = cpuhp_kick_ap_work(cpu); | 872 | ret = cpuhp_kick_ap_work(cpu); |
| 710 | /* | 873 | /* |
| 711 | * The AP side has done the error rollback already. Just | 874 | * The AP side has done the error rollback already. Just |
| @@ -720,6 +883,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |||
| 720 | */ | 883 | */ |
| 721 | if (st->state > CPUHP_TEARDOWN_CPU) | 884 | if (st->state > CPUHP_TEARDOWN_CPU) |
| 722 | goto out; | 885 | goto out; |
| 886 | |||
| 887 | st->target = target; | ||
| 723 | } | 888 | } |
| 724 | /* | 889 | /* |
| 725 | * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need | 890 | * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need |
| @@ -727,13 +892,17 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |||
| 727 | */ | 892 | */ |
| 728 | ret = cpuhp_down_callbacks(cpu, st, target); | 893 | ret = cpuhp_down_callbacks(cpu, st, target); |
| 729 | if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { | 894 | if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { |
| 730 | st->target = prev_state; | 895 | cpuhp_reset_state(st, prev_state); |
| 731 | st->rollback = true; | 896 | __cpuhp_kick_ap(st); |
| 732 | cpuhp_kick_ap_work(cpu); | ||
| 733 | } | 897 | } |
| 734 | 898 | ||
| 735 | out: | 899 | out: |
| 736 | cpus_write_unlock(); | 900 | cpus_write_unlock(); |
| 901 | /* | ||
| 902 | * Do post unplug cleanup. This is still protected against | ||
| 903 | * concurrent CPU hotplug via cpu_add_remove_lock. | ||
| 904 | */ | ||
| 905 | lockup_detector_cleanup(); | ||
| 737 | return ret; | 906 | return ret; |
| 738 | } | 907 | } |
| 739 | 908 | ||
| @@ -754,11 +923,15 @@ out: | |||
| 754 | cpu_maps_update_done(); | 923 | cpu_maps_update_done(); |
| 755 | return err; | 924 | return err; |
| 756 | } | 925 | } |
| 926 | |||
| 757 | int cpu_down(unsigned int cpu) | 927 | int cpu_down(unsigned int cpu) |
| 758 | { | 928 | { |
| 759 | return do_cpu_down(cpu, CPUHP_OFFLINE); | 929 | return do_cpu_down(cpu, CPUHP_OFFLINE); |
| 760 | } | 930 | } |
| 761 | EXPORT_SYMBOL(cpu_down); | 931 | EXPORT_SYMBOL(cpu_down); |
| 932 | |||
| 933 | #else | ||
| 934 | #define takedown_cpu NULL | ||
| 762 | #endif /*CONFIG_HOTPLUG_CPU*/ | 935 | #endif /*CONFIG_HOTPLUG_CPU*/ |
| 763 | 936 | ||
| 764 | /** | 937 | /** |
| @@ -772,11 +945,16 @@ void notify_cpu_starting(unsigned int cpu) | |||
| 772 | { | 945 | { |
| 773 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | 946 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
| 774 | enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); | 947 | enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); |
| 948 | int ret; | ||
| 775 | 949 | ||
| 776 | rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ | 950 | rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ |
| 777 | while (st->state < target) { | 951 | while (st->state < target) { |
| 778 | st->state++; | 952 | st->state++; |
| 779 | cpuhp_invoke_callback(cpu, st->state, true, NULL); | 953 | ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); |
| 954 | /* | ||
| 955 | * STARTING must not fail! | ||
| 956 | */ | ||
| 957 | WARN_ON_ONCE(ret); | ||
| 780 | } | 958 | } |
| 781 | } | 959 | } |
| 782 | 960 | ||
| @@ -794,7 +972,7 @@ void cpuhp_online_idle(enum cpuhp_state state) | |||
| 794 | return; | 972 | return; |
| 795 | 973 | ||
| 796 | st->state = CPUHP_AP_ONLINE_IDLE; | 974 | st->state = CPUHP_AP_ONLINE_IDLE; |
| 797 | complete(&st->done); | 975 | complete_ap_thread(st, true); |
| 798 | } | 976 | } |
| 799 | 977 | ||
| 800 | /* Requires cpu_add_remove_lock to be held */ | 978 | /* Requires cpu_add_remove_lock to be held */ |
| @@ -829,7 +1007,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) | |||
| 829 | 1007 | ||
| 830 | cpuhp_tasks_frozen = tasks_frozen; | 1008 | cpuhp_tasks_frozen = tasks_frozen; |
| 831 | 1009 | ||
| 832 | st->target = target; | 1010 | cpuhp_set_state(st, target); |
| 833 | /* | 1011 | /* |
| 834 | * If the current CPU state is in the range of the AP hotplug thread, | 1012 | * If the current CPU state is in the range of the AP hotplug thread, |
| 835 | * then we need to kick the thread once more. | 1013 | * then we need to kick the thread once more. |
| @@ -1296,6 +1474,10 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, | |||
| 1296 | struct cpuhp_step *sp = cpuhp_get_step(state); | 1474 | struct cpuhp_step *sp = cpuhp_get_step(state); |
| 1297 | int ret; | 1475 | int ret; |
| 1298 | 1476 | ||
| 1477 | /* | ||
| 1478 | * If there's nothing to do, we done. | ||
| 1479 | * Relies on the union for multi_instance. | ||
| 1480 | */ | ||
| 1299 | if ((bringup && !sp->startup.single) || | 1481 | if ((bringup && !sp->startup.single) || |
| 1300 | (!bringup && !sp->teardown.single)) | 1482 | (!bringup && !sp->teardown.single)) |
| 1301 | return 0; | 1483 | return 0; |
| @@ -1307,9 +1489,9 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, | |||
| 1307 | if (cpuhp_is_ap_state(state)) | 1489 | if (cpuhp_is_ap_state(state)) |
| 1308 | ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node); | 1490 | ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node); |
| 1309 | else | 1491 | else |
| 1310 | ret = cpuhp_invoke_callback(cpu, state, bringup, node); | 1492 | ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); |
| 1311 | #else | 1493 | #else |
| 1312 | ret = cpuhp_invoke_callback(cpu, state, bringup, node); | 1494 | ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); |
| 1313 | #endif | 1495 | #endif |
| 1314 | BUG_ON(ret && !bringup); | 1496 | BUG_ON(ret && !bringup); |
| 1315 | return ret; | 1497 | return ret; |
| @@ -1641,9 +1823,55 @@ static ssize_t show_cpuhp_target(struct device *dev, | |||
| 1641 | } | 1823 | } |
| 1642 | static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target); | 1824 | static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target); |
| 1643 | 1825 | ||
| 1826 | |||
| 1827 | static ssize_t write_cpuhp_fail(struct device *dev, | ||
| 1828 | struct device_attribute *attr, | ||
| 1829 | const char *buf, size_t count) | ||
| 1830 | { | ||
| 1831 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); | ||
| 1832 | struct cpuhp_step *sp; | ||
| 1833 | int fail, ret; | ||
| 1834 | |||
| 1835 | ret = kstrtoint(buf, 10, &fail); | ||
| 1836 | if (ret) | ||
| 1837 | return ret; | ||
| 1838 | |||
| 1839 | /* | ||
| 1840 | * Cannot fail STARTING/DYING callbacks. | ||
| 1841 | */ | ||
| 1842 | if (cpuhp_is_atomic_state(fail)) | ||
| 1843 | return -EINVAL; | ||
| 1844 | |||
| 1845 | /* | ||
| 1846 | * Cannot fail anything that doesn't have callbacks. | ||
| 1847 | */ | ||
| 1848 | mutex_lock(&cpuhp_state_mutex); | ||
| 1849 | sp = cpuhp_get_step(fail); | ||
| 1850 | if (!sp->startup.single && !sp->teardown.single) | ||
| 1851 | ret = -EINVAL; | ||
| 1852 | mutex_unlock(&cpuhp_state_mutex); | ||
| 1853 | if (ret) | ||
| 1854 | return ret; | ||
| 1855 | |||
| 1856 | st->fail = fail; | ||
| 1857 | |||
| 1858 | return count; | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | static ssize_t show_cpuhp_fail(struct device *dev, | ||
| 1862 | struct device_attribute *attr, char *buf) | ||
| 1863 | { | ||
| 1864 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); | ||
| 1865 | |||
| 1866 | return sprintf(buf, "%d\n", st->fail); | ||
| 1867 | } | ||
| 1868 | |||
| 1869 | static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail); | ||
| 1870 | |||
| 1644 | static struct attribute *cpuhp_cpu_attrs[] = { | 1871 | static struct attribute *cpuhp_cpu_attrs[] = { |
| 1645 | &dev_attr_state.attr, | 1872 | &dev_attr_state.attr, |
| 1646 | &dev_attr_target.attr, | 1873 | &dev_attr_target.attr, |
| 1874 | &dev_attr_fail.attr, | ||
| 1647 | NULL | 1875 | NULL |
| 1648 | }; | 1876 | }; |
| 1649 | 1877 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 3e691b75b2db..6bc21e202ae4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) | |||
| 8171 | } | 8171 | } |
| 8172 | } | 8172 | } |
| 8173 | event->tp_event->prog = prog; | 8173 | event->tp_event->prog = prog; |
| 8174 | event->tp_event->bpf_prog_owner = event; | ||
| 8174 | 8175 | ||
| 8175 | return 0; | 8176 | return 0; |
| 8176 | } | 8177 | } |
| @@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) | |||
| 8185 | return; | 8186 | return; |
| 8186 | 8187 | ||
| 8187 | prog = event->tp_event->prog; | 8188 | prog = event->tp_event->prog; |
| 8188 | if (prog) { | 8189 | if (prog && event->tp_event->bpf_prog_owner == event) { |
| 8189 | event->tp_event->prog = NULL; | 8190 | event->tp_event->prog = NULL; |
| 8190 | bpf_prog_put(prog); | 8191 | bpf_prog_put(prog); |
| 8191 | } | 8192 | } |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index af71a84e12ee..f684d8e5fa2b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -412,6 +412,19 @@ err: | |||
| 412 | return NULL; | 412 | return NULL; |
| 413 | } | 413 | } |
| 414 | 414 | ||
| 415 | static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb) | ||
| 416 | { | ||
| 417 | if (rb->aux_overwrite) | ||
| 418 | return false; | ||
| 419 | |||
| 420 | if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { | ||
| 421 | rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); | ||
| 422 | return true; | ||
| 423 | } | ||
| 424 | |||
| 425 | return false; | ||
| 426 | } | ||
| 427 | |||
| 415 | /* | 428 | /* |
| 416 | * Commit the data written by hardware into the ring buffer by adjusting | 429 | * Commit the data written by hardware into the ring buffer by adjusting |
| 417 | * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the | 430 | * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the |
| @@ -451,10 +464,8 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) | |||
| 451 | } | 464 | } |
| 452 | 465 | ||
| 453 | rb->user_page->aux_head = rb->aux_head; | 466 | rb->user_page->aux_head = rb->aux_head; |
| 454 | if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { | 467 | if (rb_need_aux_wakeup(rb)) |
| 455 | wakeup = true; | 468 | wakeup = true; |
| 456 | rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); | ||
| 457 | } | ||
| 458 | 469 | ||
| 459 | if (wakeup) { | 470 | if (wakeup) { |
| 460 | if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) | 471 | if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) |
| @@ -484,9 +495,8 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) | |||
| 484 | rb->aux_head += size; | 495 | rb->aux_head += size; |
| 485 | 496 | ||
| 486 | rb->user_page->aux_head = rb->aux_head; | 497 | rb->user_page->aux_head = rb->aux_head; |
| 487 | if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { | 498 | if (rb_need_aux_wakeup(rb)) { |
| 488 | perf_output_wakeup(handle); | 499 | perf_output_wakeup(handle); |
| 489 | rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); | ||
| 490 | handle->wakeup = rb->aux_wakeup + rb->aux_watermark; | 500 | handle->wakeup = rb->aux_wakeup + rb->aux_watermark; |
| 491 | } | 501 | } |
| 492 | 502 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 3481ababd06a..f2cd53e92147 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -1600,12 +1600,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
| 1600 | struct waitid_info info = {.status = 0}; | 1600 | struct waitid_info info = {.status = 0}; |
| 1601 | long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); | 1601 | long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); |
| 1602 | int signo = 0; | 1602 | int signo = 0; |
| 1603 | |||
| 1603 | if (err > 0) { | 1604 | if (err > 0) { |
| 1604 | signo = SIGCHLD; | 1605 | signo = SIGCHLD; |
| 1605 | err = 0; | 1606 | err = 0; |
| 1606 | } | ||
| 1607 | |||
| 1608 | if (!err) { | ||
| 1609 | if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) | 1607 | if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) |
| 1610 | return -EFAULT; | 1608 | return -EFAULT; |
| 1611 | } | 1609 | } |
| @@ -1723,16 +1721,15 @@ COMPAT_SYSCALL_DEFINE5(waitid, | |||
| 1723 | if (err > 0) { | 1721 | if (err > 0) { |
| 1724 | signo = SIGCHLD; | 1722 | signo = SIGCHLD; |
| 1725 | err = 0; | 1723 | err = 0; |
| 1726 | } | 1724 | if (uru) { |
| 1727 | 1725 | /* kernel_waitid() overwrites everything in ru */ | |
| 1728 | if (!err && uru) { | 1726 | if (COMPAT_USE_64BIT_TIME) |
| 1729 | /* kernel_waitid() overwrites everything in ru */ | 1727 | err = copy_to_user(uru, &ru, sizeof(ru)); |
| 1730 | if (COMPAT_USE_64BIT_TIME) | 1728 | else |
| 1731 | err = copy_to_user(uru, &ru, sizeof(ru)); | 1729 | err = put_compat_rusage(&ru, uru); |
| 1732 | else | 1730 | if (err) |
| 1733 | err = put_compat_rusage(&ru, uru); | 1731 | return -EFAULT; |
| 1734 | if (err) | 1732 | } |
| 1735 | return -EFAULT; | ||
| 1736 | } | 1733 | } |
| 1737 | 1734 | ||
| 1738 | if (!infop) | 1735 | if (!infop) |
diff --git a/kernel/extable.c b/kernel/extable.c index 38c2412401a1..9aa1cc41ecf7 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
| @@ -102,15 +102,7 @@ int core_kernel_data(unsigned long addr) | |||
| 102 | 102 | ||
| 103 | int __kernel_text_address(unsigned long addr) | 103 | int __kernel_text_address(unsigned long addr) |
| 104 | { | 104 | { |
| 105 | if (core_kernel_text(addr)) | 105 | if (kernel_text_address(addr)) |
| 106 | return 1; | ||
| 107 | if (is_module_text_address(addr)) | ||
| 108 | return 1; | ||
| 109 | if (is_ftrace_trampoline(addr)) | ||
| 110 | return 1; | ||
| 111 | if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) | ||
| 112 | return 1; | ||
| 113 | if (is_bpf_text_address(addr)) | ||
| 114 | return 1; | 106 | return 1; |
| 115 | /* | 107 | /* |
| 116 | * There might be init symbols in saved stacktraces. | 108 | * There might be init symbols in saved stacktraces. |
| @@ -127,17 +119,42 @@ int __kernel_text_address(unsigned long addr) | |||
| 127 | 119 | ||
| 128 | int kernel_text_address(unsigned long addr) | 120 | int kernel_text_address(unsigned long addr) |
| 129 | { | 121 | { |
| 122 | bool no_rcu; | ||
| 123 | int ret = 1; | ||
| 124 | |||
| 130 | if (core_kernel_text(addr)) | 125 | if (core_kernel_text(addr)) |
| 131 | return 1; | 126 | return 1; |
| 127 | |||
| 128 | /* | ||
| 129 | * If a stack dump happens while RCU is not watching, then | ||
| 130 | * RCU needs to be notified that it requires to start | ||
| 131 | * watching again. This can happen either by tracing that | ||
| 132 | * triggers a stack trace, or a WARN() that happens during | ||
| 133 | * coming back from idle, or cpu on or offlining. | ||
| 134 | * | ||
| 135 | * is_module_text_address() as well as the kprobe slots | ||
| 136 | * and is_bpf_text_address() require RCU to be watching. | ||
| 137 | */ | ||
| 138 | no_rcu = !rcu_is_watching(); | ||
| 139 | |||
| 140 | /* Treat this like an NMI as it can happen anywhere */ | ||
| 141 | if (no_rcu) | ||
| 142 | rcu_nmi_enter(); | ||
| 143 | |||
| 132 | if (is_module_text_address(addr)) | 144 | if (is_module_text_address(addr)) |
| 133 | return 1; | 145 | goto out; |
| 134 | if (is_ftrace_trampoline(addr)) | 146 | if (is_ftrace_trampoline(addr)) |
| 135 | return 1; | 147 | goto out; |
| 136 | if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) | 148 | if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) |
| 137 | return 1; | 149 | goto out; |
| 138 | if (is_bpf_text_address(addr)) | 150 | if (is_bpf_text_address(addr)) |
| 139 | return 1; | 151 | goto out; |
| 140 | return 0; | 152 | ret = 0; |
| 153 | out: | ||
| 154 | if (no_rcu) | ||
| 155 | rcu_nmi_exit(); | ||
| 156 | |||
| 157 | return ret; | ||
| 141 | } | 158 | } |
| 142 | 159 | ||
| 143 | /* | 160 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 10646182440f..e702cb9ffbd8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -946,6 +946,24 @@ void mmput(struct mm_struct *mm) | |||
| 946 | } | 946 | } |
| 947 | EXPORT_SYMBOL_GPL(mmput); | 947 | EXPORT_SYMBOL_GPL(mmput); |
| 948 | 948 | ||
| 949 | #ifdef CONFIG_MMU | ||
| 950 | static void mmput_async_fn(struct work_struct *work) | ||
| 951 | { | ||
| 952 | struct mm_struct *mm = container_of(work, struct mm_struct, | ||
| 953 | async_put_work); | ||
| 954 | |||
| 955 | __mmput(mm); | ||
| 956 | } | ||
| 957 | |||
| 958 | void mmput_async(struct mm_struct *mm) | ||
| 959 | { | ||
| 960 | if (atomic_dec_and_test(&mm->mm_users)) { | ||
| 961 | INIT_WORK(&mm->async_put_work, mmput_async_fn); | ||
| 962 | schedule_work(&mm->async_put_work); | ||
| 963 | } | ||
| 964 | } | ||
| 965 | #endif | ||
| 966 | |||
| 949 | /** | 967 | /** |
| 950 | * set_mm_exe_file - change a reference to the mm's executable file | 968 | * set_mm_exe_file - change a reference to the mm's executable file |
| 951 | * | 969 | * |
diff --git a/kernel/futex.c b/kernel/futex.c index 3d38eaf05492..0518a0bfc746 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -821,8 +821,6 @@ static void get_pi_state(struct futex_pi_state *pi_state) | |||
| 821 | /* | 821 | /* |
| 822 | * Drops a reference to the pi_state object and frees or caches it | 822 | * Drops a reference to the pi_state object and frees or caches it |
| 823 | * when the last reference is gone. | 823 | * when the last reference is gone. |
| 824 | * | ||
| 825 | * Must be called with the hb lock held. | ||
| 826 | */ | 824 | */ |
| 827 | static void put_pi_state(struct futex_pi_state *pi_state) | 825 | static void put_pi_state(struct futex_pi_state *pi_state) |
| 828 | { | 826 | { |
| @@ -837,16 +835,22 @@ static void put_pi_state(struct futex_pi_state *pi_state) | |||
| 837 | * and has cleaned up the pi_state already | 835 | * and has cleaned up the pi_state already |
| 838 | */ | 836 | */ |
| 839 | if (pi_state->owner) { | 837 | if (pi_state->owner) { |
| 840 | raw_spin_lock_irq(&pi_state->owner->pi_lock); | 838 | struct task_struct *owner; |
| 841 | list_del_init(&pi_state->list); | ||
| 842 | raw_spin_unlock_irq(&pi_state->owner->pi_lock); | ||
| 843 | 839 | ||
| 844 | rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); | 840 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
| 841 | owner = pi_state->owner; | ||
| 842 | if (owner) { | ||
| 843 | raw_spin_lock(&owner->pi_lock); | ||
| 844 | list_del_init(&pi_state->list); | ||
| 845 | raw_spin_unlock(&owner->pi_lock); | ||
| 846 | } | ||
| 847 | rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); | ||
| 848 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | ||
| 845 | } | 849 | } |
| 846 | 850 | ||
| 847 | if (current->pi_state_cache) | 851 | if (current->pi_state_cache) { |
| 848 | kfree(pi_state); | 852 | kfree(pi_state); |
| 849 | else { | 853 | } else { |
| 850 | /* | 854 | /* |
| 851 | * pi_state->list is already empty. | 855 | * pi_state->list is already empty. |
| 852 | * clear pi_state->owner. | 856 | * clear pi_state->owner. |
| @@ -907,13 +911,14 @@ void exit_pi_state_list(struct task_struct *curr) | |||
| 907 | raw_spin_unlock_irq(&curr->pi_lock); | 911 | raw_spin_unlock_irq(&curr->pi_lock); |
| 908 | 912 | ||
| 909 | spin_lock(&hb->lock); | 913 | spin_lock(&hb->lock); |
| 910 | 914 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
| 911 | raw_spin_lock_irq(&curr->pi_lock); | 915 | raw_spin_lock(&curr->pi_lock); |
| 912 | /* | 916 | /* |
| 913 | * We dropped the pi-lock, so re-check whether this | 917 | * We dropped the pi-lock, so re-check whether this |
| 914 | * task still owns the PI-state: | 918 | * task still owns the PI-state: |
| 915 | */ | 919 | */ |
| 916 | if (head->next != next) { | 920 | if (head->next != next) { |
| 921 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); | ||
| 917 | spin_unlock(&hb->lock); | 922 | spin_unlock(&hb->lock); |
| 918 | continue; | 923 | continue; |
| 919 | } | 924 | } |
| @@ -922,9 +927,10 @@ void exit_pi_state_list(struct task_struct *curr) | |||
| 922 | WARN_ON(list_empty(&pi_state->list)); | 927 | WARN_ON(list_empty(&pi_state->list)); |
| 923 | list_del_init(&pi_state->list); | 928 | list_del_init(&pi_state->list); |
| 924 | pi_state->owner = NULL; | 929 | pi_state->owner = NULL; |
| 925 | raw_spin_unlock_irq(&curr->pi_lock); | 930 | raw_spin_unlock(&curr->pi_lock); |
| 926 | 931 | ||
| 927 | get_pi_state(pi_state); | 932 | get_pi_state(pi_state); |
| 933 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | ||
| 928 | spin_unlock(&hb->lock); | 934 | spin_unlock(&hb->lock); |
| 929 | 935 | ||
| 930 | rt_mutex_futex_unlock(&pi_state->pi_mutex); | 936 | rt_mutex_futex_unlock(&pi_state->pi_mutex); |
| @@ -1208,6 +1214,10 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, | |||
| 1208 | 1214 | ||
| 1209 | WARN_ON(!list_empty(&pi_state->list)); | 1215 | WARN_ON(!list_empty(&pi_state->list)); |
| 1210 | list_add(&pi_state->list, &p->pi_state_list); | 1216 | list_add(&pi_state->list, &p->pi_state_list); |
| 1217 | /* | ||
| 1218 | * Assignment without holding pi_state->pi_mutex.wait_lock is safe | ||
| 1219 | * because there is no concurrency as the object is not published yet. | ||
| 1220 | */ | ||
| 1211 | pi_state->owner = p; | 1221 | pi_state->owner = p; |
| 1212 | raw_spin_unlock_irq(&p->pi_lock); | 1222 | raw_spin_unlock_irq(&p->pi_lock); |
| 1213 | 1223 | ||
| @@ -2878,6 +2888,7 @@ retry: | |||
| 2878 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | 2888 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
| 2879 | spin_unlock(&hb->lock); | 2889 | spin_unlock(&hb->lock); |
| 2880 | 2890 | ||
| 2891 | /* drops pi_state->pi_mutex.wait_lock */ | ||
| 2881 | ret = wake_futex_pi(uaddr, uval, pi_state); | 2892 | ret = wake_futex_pi(uaddr, uval, pi_state); |
| 2882 | 2893 | ||
| 2883 | put_pi_state(pi_state); | 2894 | put_pi_state(pi_state); |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f51b7b6d2451..6fc89fd93824 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -202,7 +202,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) | |||
| 202 | 202 | ||
| 203 | irqd_clr_managed_shutdown(d); | 203 | irqd_clr_managed_shutdown(d); |
| 204 | 204 | ||
| 205 | if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) { | 205 | if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) { |
| 206 | /* | 206 | /* |
| 207 | * Catch code which fiddles with enable_irq() on a managed | 207 | * Catch code which fiddles with enable_irq() on a managed |
| 208 | * and potentially shutdown IRQ. Chained interrupt | 208 | * and potentially shutdown IRQ. Chained interrupt |
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index f7086b78ad6e..5270a54b9fa4 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
| @@ -322,7 +322,6 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, | |||
| 322 | /* Calc pointer to the next generic chip */ | 322 | /* Calc pointer to the next generic chip */ |
| 323 | tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); | 323 | tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); |
| 324 | } | 324 | } |
| 325 | d->name = name; | ||
| 326 | return 0; | 325 | return 0; |
| 327 | } | 326 | } |
| 328 | EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); | 327 | EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index e84b7056bb08..ac4644e92b49 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
| @@ -945,7 +945,7 @@ static int virq_debug_show(struct seq_file *m, void *private) | |||
| 945 | struct irq_desc *desc; | 945 | struct irq_desc *desc; |
| 946 | struct irq_domain *domain; | 946 | struct irq_domain *domain; |
| 947 | struct radix_tree_iter iter; | 947 | struct radix_tree_iter iter; |
| 948 | void **slot; | 948 | void __rcu **slot; |
| 949 | int i; | 949 | int i; |
| 950 | 950 | ||
| 951 | seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", | 951 | seq_printf(m, " %-16s %-6s %-10s %-10s %s\n", |
| @@ -1453,7 +1453,7 @@ out_free_desc: | |||
| 1453 | /* The irq_data was moved, fix the revmap to refer to the new location */ | 1453 | /* The irq_data was moved, fix the revmap to refer to the new location */ |
| 1454 | static void irq_domain_fix_revmap(struct irq_data *d) | 1454 | static void irq_domain_fix_revmap(struct irq_data *d) |
| 1455 | { | 1455 | { |
| 1456 | void **slot; | 1456 | void __rcu **slot; |
| 1457 | 1457 | ||
| 1458 | if (d->hwirq < d->domain->revmap_size) | 1458 | if (d->hwirq < d->domain->revmap_size) |
| 1459 | return; /* Not using radix tree. */ | 1459 | return; /* Not using radix tree. */ |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 573dc52b0806..d00132b5c325 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -1643,6 +1643,10 @@ const void *free_irq(unsigned int irq, void *dev_id) | |||
| 1643 | #endif | 1643 | #endif |
| 1644 | 1644 | ||
| 1645 | action = __free_irq(irq, dev_id); | 1645 | action = __free_irq(irq, dev_id); |
| 1646 | |||
| 1647 | if (!action) | ||
| 1648 | return NULL; | ||
| 1649 | |||
| 1646 | devname = action->name; | 1650 | devname = action->name; |
| 1647 | kfree(action); | 1651 | kfree(action); |
| 1648 | return devname; | 1652 | return devname; |
diff --git a/kernel/kcmp.c b/kernel/kcmp.c index ea34ed8bb952..055bb2962a0b 100644 --- a/kernel/kcmp.c +++ b/kernel/kcmp.c | |||
| @@ -131,7 +131,7 @@ static int kcmp_epoll_target(struct task_struct *task1, | |||
| 131 | if (filp_epoll) { | 131 | if (filp_epoll) { |
| 132 | filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); | 132 | filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff); |
| 133 | fput(filp_epoll); | 133 | fput(filp_epoll); |
| 134 | } else | 134 | } |
| 135 | 135 | ||
| 136 | if (IS_ERR(filp_tgt)) | 136 | if (IS_ERR(filp_tgt)) |
| 137 | return PTR_ERR(filp_tgt); | 137 | return PTR_ERR(filp_tgt); |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 02f660666ab8..1fefe6dcafd7 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -613,6 +613,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) | |||
| 613 | DEFINE_WAKE_Q(wake_q); | 613 | DEFINE_WAKE_Q(wake_q); |
| 614 | 614 | ||
| 615 | /* | 615 | /* |
| 616 | * __rwsem_down_write_failed_common(sem) | ||
| 617 | * rwsem_optimistic_spin(sem) | ||
| 618 | * osq_unlock(sem->osq) | ||
| 619 | * ... | ||
| 620 | * atomic_long_add_return(&sem->count) | ||
| 621 | * | ||
| 622 | * - VS - | ||
| 623 | * | ||
| 624 | * __up_write() | ||
| 625 | * if (atomic_long_sub_return_release(&sem->count) < 0) | ||
| 626 | * rwsem_wake(sem) | ||
| 627 | * osq_is_locked(&sem->osq) | ||
| 628 | * | ||
| 629 | * And __up_write() must observe !osq_is_locked() when it observes the | ||
| 630 | * atomic_long_add_return() in order to not miss a wakeup. | ||
| 631 | * | ||
| 632 | * This boils down to: | ||
| 633 | * | ||
| 634 | * [S.rel] X = 1 [RmW] r0 = (Y += 0) | ||
| 635 | * MB RMB | ||
| 636 | * [RmW] Y += 1 [L] r1 = X | ||
| 637 | * | ||
| 638 | * exists (r0=1 /\ r1=0) | ||
| 639 | */ | ||
| 640 | smp_rmb(); | ||
| 641 | |||
| 642 | /* | ||
| 616 | * If a spinner is present, it is not necessary to do the wakeup. | 643 | * If a spinner is present, it is not necessary to do the wakeup. |
| 617 | * Try to do wakeup only if the trylock succeeds to minimize | 644 | * Try to do wakeup only if the trylock succeeds to minimize |
| 618 | * spinlock contention which may introduce too much delay in the | 645 | * spinlock contention which may introduce too much delay in the |
diff --git a/kernel/memremap.c b/kernel/memremap.c index 6bcbfbf1a8fd..403ab9cdb949 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c | |||
| @@ -350,7 +350,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, | |||
| 350 | pgprot_t pgprot = PAGE_KERNEL; | 350 | pgprot_t pgprot = PAGE_KERNEL; |
| 351 | struct dev_pagemap *pgmap; | 351 | struct dev_pagemap *pgmap; |
| 352 | struct page_map *page_map; | 352 | struct page_map *page_map; |
| 353 | int error, nid, is_ram; | 353 | int error, nid, is_ram, i = 0; |
| 354 | 354 | ||
| 355 | align_start = res->start & ~(SECTION_SIZE - 1); | 355 | align_start = res->start & ~(SECTION_SIZE - 1); |
| 356 | align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) | 356 | align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) |
| @@ -448,6 +448,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, | |||
| 448 | list_del(&page->lru); | 448 | list_del(&page->lru); |
| 449 | page->pgmap = pgmap; | 449 | page->pgmap = pgmap; |
| 450 | percpu_ref_get(ref); | 450 | percpu_ref_get(ref); |
| 451 | if (!(++i % 1024)) | ||
| 452 | cond_resched(); | ||
| 451 | } | 453 | } |
| 452 | devres_add(dev, page_map); | 454 | devres_add(dev, page_map); |
| 453 | return __va(res->start); | 455 | return __va(res->start); |
diff --git a/kernel/params.c b/kernel/params.c index 60b2d8101355..cc9108c2a1fd 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
| @@ -224,7 +224,7 @@ char *parse_args(const char *doing, | |||
| 224 | } \ | 224 | } \ |
| 225 | int param_get_##name(char *buffer, const struct kernel_param *kp) \ | 225 | int param_get_##name(char *buffer, const struct kernel_param *kp) \ |
| 226 | { \ | 226 | { \ |
| 227 | return scnprintf(buffer, PAGE_SIZE, format, \ | 227 | return scnprintf(buffer, PAGE_SIZE, format "\n", \ |
| 228 | *((type *)kp->arg)); \ | 228 | *((type *)kp->arg)); \ |
| 229 | } \ | 229 | } \ |
| 230 | const struct kernel_param_ops param_ops_##name = { \ | 230 | const struct kernel_param_ops param_ops_##name = { \ |
| @@ -236,14 +236,14 @@ char *parse_args(const char *doing, | |||
| 236 | EXPORT_SYMBOL(param_ops_##name) | 236 | EXPORT_SYMBOL(param_ops_##name) |
| 237 | 237 | ||
| 238 | 238 | ||
| 239 | STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", kstrtou8); | 239 | STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", kstrtou8); |
| 240 | STANDARD_PARAM_DEF(short, short, "%hi", kstrtos16); | 240 | STANDARD_PARAM_DEF(short, short, "%hi", kstrtos16); |
| 241 | STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", kstrtou16); | 241 | STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", kstrtou16); |
| 242 | STANDARD_PARAM_DEF(int, int, "%i", kstrtoint); | 242 | STANDARD_PARAM_DEF(int, int, "%i", kstrtoint); |
| 243 | STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint); | 243 | STANDARD_PARAM_DEF(uint, unsigned int, "%u", kstrtouint); |
| 244 | STANDARD_PARAM_DEF(long, long, "%li", kstrtol); | 244 | STANDARD_PARAM_DEF(long, long, "%li", kstrtol); |
| 245 | STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul); | 245 | STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", kstrtoul); |
| 246 | STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); | 246 | STANDARD_PARAM_DEF(ullong, unsigned long long, "%llu", kstrtoull); |
| 247 | 247 | ||
| 248 | int param_set_charp(const char *val, const struct kernel_param *kp) | 248 | int param_set_charp(const char *val, const struct kernel_param *kp) |
| 249 | { | 249 | { |
| @@ -270,7 +270,7 @@ EXPORT_SYMBOL(param_set_charp); | |||
| 270 | 270 | ||
| 271 | int param_get_charp(char *buffer, const struct kernel_param *kp) | 271 | int param_get_charp(char *buffer, const struct kernel_param *kp) |
| 272 | { | 272 | { |
| 273 | return scnprintf(buffer, PAGE_SIZE, "%s", *((char **)kp->arg)); | 273 | return scnprintf(buffer, PAGE_SIZE, "%s\n", *((char **)kp->arg)); |
| 274 | } | 274 | } |
| 275 | EXPORT_SYMBOL(param_get_charp); | 275 | EXPORT_SYMBOL(param_get_charp); |
| 276 | 276 | ||
| @@ -301,7 +301,7 @@ EXPORT_SYMBOL(param_set_bool); | |||
| 301 | int param_get_bool(char *buffer, const struct kernel_param *kp) | 301 | int param_get_bool(char *buffer, const struct kernel_param *kp) |
| 302 | { | 302 | { |
| 303 | /* Y and N chosen as being relatively non-coder friendly */ | 303 | /* Y and N chosen as being relatively non-coder friendly */ |
| 304 | return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N'); | 304 | return sprintf(buffer, "%c\n", *(bool *)kp->arg ? 'Y' : 'N'); |
| 305 | } | 305 | } |
| 306 | EXPORT_SYMBOL(param_get_bool); | 306 | EXPORT_SYMBOL(param_get_bool); |
| 307 | 307 | ||
| @@ -360,7 +360,7 @@ EXPORT_SYMBOL(param_set_invbool); | |||
| 360 | 360 | ||
| 361 | int param_get_invbool(char *buffer, const struct kernel_param *kp) | 361 | int param_get_invbool(char *buffer, const struct kernel_param *kp) |
| 362 | { | 362 | { |
| 363 | return sprintf(buffer, "%c", (*(bool *)kp->arg) ? 'N' : 'Y'); | 363 | return sprintf(buffer, "%c\n", (*(bool *)kp->arg) ? 'N' : 'Y'); |
| 364 | } | 364 | } |
| 365 | EXPORT_SYMBOL(param_get_invbool); | 365 | EXPORT_SYMBOL(param_get_invbool); |
| 366 | 366 | ||
| @@ -460,8 +460,9 @@ static int param_array_get(char *buffer, const struct kernel_param *kp) | |||
| 460 | struct kernel_param p = *kp; | 460 | struct kernel_param p = *kp; |
| 461 | 461 | ||
| 462 | for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { | 462 | for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { |
| 463 | /* Replace \n with comma */ | ||
| 463 | if (i) | 464 | if (i) |
| 464 | buffer[off++] = ','; | 465 | buffer[off - 1] = ','; |
| 465 | p.arg = arr->elem + arr->elemsize * i; | 466 | p.arg = arr->elem + arr->elemsize * i; |
| 466 | check_kparam_locked(p.mod); | 467 | check_kparam_locked(p.mod); |
| 467 | ret = arr->ops->get(buffer + off, &p); | 468 | ret = arr->ops->get(buffer + off, &p); |
| @@ -507,7 +508,7 @@ EXPORT_SYMBOL(param_set_copystring); | |||
| 507 | int param_get_string(char *buffer, const struct kernel_param *kp) | 508 | int param_get_string(char *buffer, const struct kernel_param *kp) |
| 508 | { | 509 | { |
| 509 | const struct kparam_string *kps = kp->str; | 510 | const struct kparam_string *kps = kp->str; |
| 510 | return strlcpy(buffer, kps->string, kps->maxlen); | 511 | return scnprintf(buffer, PAGE_SIZE, "%s\n", kps->string); |
| 511 | } | 512 | } |
| 512 | EXPORT_SYMBOL(param_get_string); | 513 | EXPORT_SYMBOL(param_get_string); |
| 513 | 514 | ||
| @@ -549,10 +550,6 @@ static ssize_t param_attr_show(struct module_attribute *mattr, | |||
| 549 | kernel_param_lock(mk->mod); | 550 | kernel_param_lock(mk->mod); |
| 550 | count = attribute->param->ops->get(buf, attribute->param); | 551 | count = attribute->param->ops->get(buf, attribute->param); |
| 551 | kernel_param_unlock(mk->mod); | 552 | kernel_param_unlock(mk->mod); |
| 552 | if (count > 0) { | ||
| 553 | strcat(buf, "\n"); | ||
| 554 | ++count; | ||
| 555 | } | ||
| 556 | return count; | 553 | return count; |
| 557 | } | 554 | } |
| 558 | 555 | ||
| @@ -600,7 +597,7 @@ EXPORT_SYMBOL(kernel_param_unlock); | |||
| 600 | /* | 597 | /* |
| 601 | * add_sysfs_param - add a parameter to sysfs | 598 | * add_sysfs_param - add a parameter to sysfs |
| 602 | * @mk: struct module_kobject | 599 | * @mk: struct module_kobject |
| 603 | * @kparam: the actual parameter definition to add to sysfs | 600 | * @kp: the actual parameter definition to add to sysfs |
| 604 | * @name: name of parameter | 601 | * @name: name of parameter |
| 605 | * | 602 | * |
| 606 | * Create a kobject if for a (per-module) parameter if mp NULL, and | 603 | * Create a kobject if for a (per-module) parameter if mp NULL, and |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 3e2b4f519009..ccd2d20e6b06 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -120,22 +120,26 @@ static void s2idle_loop(void) | |||
| 120 | * frozen processes + suspended devices + idle processors. | 120 | * frozen processes + suspended devices + idle processors. |
| 121 | * Thus s2idle_enter() should be called right after | 121 | * Thus s2idle_enter() should be called right after |
| 122 | * all devices have been suspended. | 122 | * all devices have been suspended. |
| 123 | * | ||
| 124 | * Wakeups during the noirq suspend of devices may be spurious, | ||
| 125 | * so prevent them from terminating the loop right away. | ||
| 123 | */ | 126 | */ |
| 124 | error = dpm_noirq_suspend_devices(PMSG_SUSPEND); | 127 | error = dpm_noirq_suspend_devices(PMSG_SUSPEND); |
| 125 | if (!error) | 128 | if (!error) |
| 126 | s2idle_enter(); | 129 | s2idle_enter(); |
| 130 | else if (error == -EBUSY && pm_wakeup_pending()) | ||
| 131 | error = 0; | ||
| 127 | 132 | ||
| 128 | dpm_noirq_resume_devices(PMSG_RESUME); | 133 | if (!error && s2idle_ops && s2idle_ops->wake) |
| 129 | if (error && (error != -EBUSY || !pm_wakeup_pending())) { | ||
| 130 | dpm_noirq_end(); | ||
| 131 | break; | ||
| 132 | } | ||
| 133 | |||
| 134 | if (s2idle_ops && s2idle_ops->wake) | ||
| 135 | s2idle_ops->wake(); | 134 | s2idle_ops->wake(); |
| 136 | 135 | ||
| 136 | dpm_noirq_resume_devices(PMSG_RESUME); | ||
| 137 | |||
| 137 | dpm_noirq_end(); | 138 | dpm_noirq_end(); |
| 138 | 139 | ||
| 140 | if (error) | ||
| 141 | break; | ||
| 142 | |||
| 139 | if (s2idle_ops && s2idle_ops->sync) | 143 | if (s2idle_ops && s2idle_ops->sync) |
| 140 | s2idle_ops->sync(); | 144 | s2idle_ops->sync(); |
| 141 | 145 | ||
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1250e4bd4b85..b0ad62b0e7b8 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -882,6 +882,11 @@ void rcu_irq_exit(void) | |||
| 882 | 882 | ||
| 883 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); | 883 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); |
| 884 | rdtp = this_cpu_ptr(&rcu_dynticks); | 884 | rdtp = this_cpu_ptr(&rcu_dynticks); |
| 885 | |||
| 886 | /* Page faults can happen in NMI handlers, so check... */ | ||
| 887 | if (rdtp->dynticks_nmi_nesting) | ||
| 888 | return; | ||
| 889 | |||
| 885 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && | 890 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && |
| 886 | rdtp->dynticks_nesting < 1); | 891 | rdtp->dynticks_nesting < 1); |
| 887 | if (rdtp->dynticks_nesting <= 1) { | 892 | if (rdtp->dynticks_nesting <= 1) { |
| @@ -1015,6 +1020,11 @@ void rcu_irq_enter(void) | |||
| 1015 | 1020 | ||
| 1016 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!"); | 1021 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!"); |
| 1017 | rdtp = this_cpu_ptr(&rcu_dynticks); | 1022 | rdtp = this_cpu_ptr(&rcu_dynticks); |
| 1023 | |||
| 1024 | /* Page faults can happen in NMI handlers, so check... */ | ||
| 1025 | if (rdtp->dynticks_nmi_nesting) | ||
| 1026 | return; | ||
| 1027 | |||
| 1018 | oldval = rdtp->dynticks_nesting; | 1028 | oldval = rdtp->dynticks_nesting; |
| 1019 | rdtp->dynticks_nesting++; | 1029 | rdtp->dynticks_nesting++; |
| 1020 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && | 1030 | WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 18a6966567da..d17c5da523a0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -5166,6 +5166,28 @@ void sched_show_task(struct task_struct *p) | |||
| 5166 | put_task_stack(p); | 5166 | put_task_stack(p); |
| 5167 | } | 5167 | } |
| 5168 | 5168 | ||
| 5169 | static inline bool | ||
| 5170 | state_filter_match(unsigned long state_filter, struct task_struct *p) | ||
| 5171 | { | ||
| 5172 | /* no filter, everything matches */ | ||
| 5173 | if (!state_filter) | ||
| 5174 | return true; | ||
| 5175 | |||
| 5176 | /* filter, but doesn't match */ | ||
| 5177 | if (!(p->state & state_filter)) | ||
| 5178 | return false; | ||
| 5179 | |||
| 5180 | /* | ||
| 5181 | * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows | ||
| 5182 | * TASK_KILLABLE). | ||
| 5183 | */ | ||
| 5184 | if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) | ||
| 5185 | return false; | ||
| 5186 | |||
| 5187 | return true; | ||
| 5188 | } | ||
| 5189 | |||
| 5190 | |||
| 5169 | void show_state_filter(unsigned long state_filter) | 5191 | void show_state_filter(unsigned long state_filter) |
| 5170 | { | 5192 | { |
| 5171 | struct task_struct *g, *p; | 5193 | struct task_struct *g, *p; |
| @@ -5188,7 +5210,7 @@ void show_state_filter(unsigned long state_filter) | |||
| 5188 | */ | 5210 | */ |
| 5189 | touch_nmi_watchdog(); | 5211 | touch_nmi_watchdog(); |
| 5190 | touch_all_softlockup_watchdogs(); | 5212 | touch_all_softlockup_watchdogs(); |
| 5191 | if (!state_filter || (p->state & state_filter)) | 5213 | if (state_filter_match(state_filter, p)) |
| 5192 | sched_show_task(p); | 5214 | sched_show_task(p); |
| 5193 | } | 5215 | } |
| 5194 | 5216 | ||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 01217fb5a5de..2f93e4a2d9f6 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
| @@ -466,8 +466,6 @@ static char *task_group_path(struct task_group *tg) | |||
| 466 | } | 466 | } |
| 467 | #endif | 467 | #endif |
| 468 | 468 | ||
| 469 | static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; | ||
| 470 | |||
| 471 | static void | 469 | static void |
| 472 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | 470 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) |
| 473 | { | 471 | { |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 98b59b5db90b..bb3a38005b9c 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
| @@ -17,11 +17,13 @@ | |||
| 17 | #include <linux/audit.h> | 17 | #include <linux/audit.h> |
| 18 | #include <linux/compat.h> | 18 | #include <linux/compat.h> |
| 19 | #include <linux/coredump.h> | 19 | #include <linux/coredump.h> |
| 20 | #include <linux/kmemleak.h> | ||
| 20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
| 21 | #include <linux/sched/task_stack.h> | 22 | #include <linux/sched/task_stack.h> |
| 22 | #include <linux/seccomp.h> | 23 | #include <linux/seccomp.h> |
| 23 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
| 24 | #include <linux/syscalls.h> | 25 | #include <linux/syscalls.h> |
| 26 | #include <linux/sysctl.h> | ||
| 25 | 27 | ||
| 26 | #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER | 28 | #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER |
| 27 | #include <asm/syscall.h> | 29 | #include <asm/syscall.h> |
| @@ -42,6 +44,7 @@ | |||
| 42 | * get/put helpers should be used when accessing an instance | 44 | * get/put helpers should be used when accessing an instance |
| 43 | * outside of a lifetime-guarded section. In general, this | 45 | * outside of a lifetime-guarded section. In general, this |
| 44 | * is only needed for handling filters shared across tasks. | 46 | * is only needed for handling filters shared across tasks. |
| 47 | * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged | ||
| 45 | * @prev: points to a previously installed, or inherited, filter | 48 | * @prev: points to a previously installed, or inherited, filter |
| 46 | * @prog: the BPF program to evaluate | 49 | * @prog: the BPF program to evaluate |
| 47 | * | 50 | * |
| @@ -57,6 +60,7 @@ | |||
| 57 | */ | 60 | */ |
| 58 | struct seccomp_filter { | 61 | struct seccomp_filter { |
| 59 | refcount_t usage; | 62 | refcount_t usage; |
| 63 | bool log; | ||
| 60 | struct seccomp_filter *prev; | 64 | struct seccomp_filter *prev; |
| 61 | struct bpf_prog *prog; | 65 | struct bpf_prog *prog; |
| 62 | }; | 66 | }; |
| @@ -171,10 +175,15 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) | |||
| 171 | /** | 175 | /** |
| 172 | * seccomp_run_filters - evaluates all seccomp filters against @sd | 176 | * seccomp_run_filters - evaluates all seccomp filters against @sd |
| 173 | * @sd: optional seccomp data to be passed to filters | 177 | * @sd: optional seccomp data to be passed to filters |
| 178 | * @match: stores struct seccomp_filter that resulted in the return value, | ||
| 179 | * unless filter returned SECCOMP_RET_ALLOW, in which case it will | ||
| 180 | * be unchanged. | ||
| 174 | * | 181 | * |
| 175 | * Returns valid seccomp BPF response codes. | 182 | * Returns valid seccomp BPF response codes. |
| 176 | */ | 183 | */ |
| 177 | static u32 seccomp_run_filters(const struct seccomp_data *sd) | 184 | #define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL))) |
| 185 | static u32 seccomp_run_filters(const struct seccomp_data *sd, | ||
| 186 | struct seccomp_filter **match) | ||
| 178 | { | 187 | { |
| 179 | struct seccomp_data sd_local; | 188 | struct seccomp_data sd_local; |
| 180 | u32 ret = SECCOMP_RET_ALLOW; | 189 | u32 ret = SECCOMP_RET_ALLOW; |
| @@ -184,7 +193,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd) | |||
| 184 | 193 | ||
| 185 | /* Ensure unexpected behavior doesn't result in failing open. */ | 194 | /* Ensure unexpected behavior doesn't result in failing open. */ |
| 186 | if (unlikely(WARN_ON(f == NULL))) | 195 | if (unlikely(WARN_ON(f == NULL))) |
| 187 | return SECCOMP_RET_KILL; | 196 | return SECCOMP_RET_KILL_PROCESS; |
| 188 | 197 | ||
| 189 | if (!sd) { | 198 | if (!sd) { |
| 190 | populate_seccomp_data(&sd_local); | 199 | populate_seccomp_data(&sd_local); |
| @@ -198,8 +207,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd) | |||
| 198 | for (; f; f = f->prev) { | 207 | for (; f; f = f->prev) { |
| 199 | u32 cur_ret = BPF_PROG_RUN(f->prog, sd); | 208 | u32 cur_ret = BPF_PROG_RUN(f->prog, sd); |
| 200 | 209 | ||
| 201 | if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) | 210 | if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) { |
| 202 | ret = cur_ret; | 211 | ret = cur_ret; |
| 212 | *match = f; | ||
| 213 | } | ||
| 203 | } | 214 | } |
| 204 | return ret; | 215 | return ret; |
| 205 | } | 216 | } |
| @@ -444,6 +455,10 @@ static long seccomp_attach_filter(unsigned int flags, | |||
| 444 | return ret; | 455 | return ret; |
| 445 | } | 456 | } |
| 446 | 457 | ||
| 458 | /* Set log flag, if present. */ | ||
| 459 | if (flags & SECCOMP_FILTER_FLAG_LOG) | ||
| 460 | filter->log = true; | ||
| 461 | |||
| 447 | /* | 462 | /* |
| 448 | * If there is an existing filter, make it the prev and don't drop its | 463 | * If there is an existing filter, make it the prev and don't drop its |
| 449 | * task reference. | 464 | * task reference. |
| @@ -458,14 +473,19 @@ static long seccomp_attach_filter(unsigned int flags, | |||
| 458 | return 0; | 473 | return 0; |
| 459 | } | 474 | } |
| 460 | 475 | ||
| 476 | void __get_seccomp_filter(struct seccomp_filter *filter) | ||
| 477 | { | ||
| 478 | /* Reference count is bounded by the number of total processes. */ | ||
| 479 | refcount_inc(&filter->usage); | ||
| 480 | } | ||
| 481 | |||
| 461 | /* get_seccomp_filter - increments the reference count of the filter on @tsk */ | 482 | /* get_seccomp_filter - increments the reference count of the filter on @tsk */ |
| 462 | void get_seccomp_filter(struct task_struct *tsk) | 483 | void get_seccomp_filter(struct task_struct *tsk) |
| 463 | { | 484 | { |
| 464 | struct seccomp_filter *orig = tsk->seccomp.filter; | 485 | struct seccomp_filter *orig = tsk->seccomp.filter; |
| 465 | if (!orig) | 486 | if (!orig) |
| 466 | return; | 487 | return; |
| 467 | /* Reference count is bounded by the number of total processes. */ | 488 | __get_seccomp_filter(orig); |
| 468 | refcount_inc(&orig->usage); | ||
| 469 | } | 489 | } |
| 470 | 490 | ||
| 471 | static inline void seccomp_filter_free(struct seccomp_filter *filter) | 491 | static inline void seccomp_filter_free(struct seccomp_filter *filter) |
| @@ -476,10 +496,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter) | |||
| 476 | } | 496 | } |
| 477 | } | 497 | } |
| 478 | 498 | ||
| 479 | /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ | 499 | static void __put_seccomp_filter(struct seccomp_filter *orig) |
| 480 | void put_seccomp_filter(struct task_struct *tsk) | ||
| 481 | { | 500 | { |
| 482 | struct seccomp_filter *orig = tsk->seccomp.filter; | ||
| 483 | /* Clean up single-reference branches iteratively. */ | 501 | /* Clean up single-reference branches iteratively. */ |
| 484 | while (orig && refcount_dec_and_test(&orig->usage)) { | 502 | while (orig && refcount_dec_and_test(&orig->usage)) { |
| 485 | struct seccomp_filter *freeme = orig; | 503 | struct seccomp_filter *freeme = orig; |
| @@ -488,6 +506,12 @@ void put_seccomp_filter(struct task_struct *tsk) | |||
| 488 | } | 506 | } |
| 489 | } | 507 | } |
| 490 | 508 | ||
| 509 | /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ | ||
| 510 | void put_seccomp_filter(struct task_struct *tsk) | ||
| 511 | { | ||
| 512 | __put_seccomp_filter(tsk->seccomp.filter); | ||
| 513 | } | ||
| 514 | |||
| 491 | static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason) | 515 | static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason) |
| 492 | { | 516 | { |
| 493 | memset(info, 0, sizeof(*info)); | 517 | memset(info, 0, sizeof(*info)); |
| @@ -514,6 +538,65 @@ static void seccomp_send_sigsys(int syscall, int reason) | |||
| 514 | } | 538 | } |
| 515 | #endif /* CONFIG_SECCOMP_FILTER */ | 539 | #endif /* CONFIG_SECCOMP_FILTER */ |
| 516 | 540 | ||
| 541 | /* For use with seccomp_actions_logged */ | ||
| 542 | #define SECCOMP_LOG_KILL_PROCESS (1 << 0) | ||
| 543 | #define SECCOMP_LOG_KILL_THREAD (1 << 1) | ||
| 544 | #define SECCOMP_LOG_TRAP (1 << 2) | ||
| 545 | #define SECCOMP_LOG_ERRNO (1 << 3) | ||
| 546 | #define SECCOMP_LOG_TRACE (1 << 4) | ||
| 547 | #define SECCOMP_LOG_LOG (1 << 5) | ||
| 548 | #define SECCOMP_LOG_ALLOW (1 << 6) | ||
| 549 | |||
| 550 | static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS | | ||
| 551 | SECCOMP_LOG_KILL_THREAD | | ||
| 552 | SECCOMP_LOG_TRAP | | ||
| 553 | SECCOMP_LOG_ERRNO | | ||
| 554 | SECCOMP_LOG_TRACE | | ||
| 555 | SECCOMP_LOG_LOG; | ||
| 556 | |||
| 557 | static inline void seccomp_log(unsigned long syscall, long signr, u32 action, | ||
| 558 | bool requested) | ||
| 559 | { | ||
| 560 | bool log = false; | ||
| 561 | |||
| 562 | switch (action) { | ||
| 563 | case SECCOMP_RET_ALLOW: | ||
| 564 | break; | ||
| 565 | case SECCOMP_RET_TRAP: | ||
| 566 | log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP; | ||
| 567 | break; | ||
| 568 | case SECCOMP_RET_ERRNO: | ||
| 569 | log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO; | ||
| 570 | break; | ||
| 571 | case SECCOMP_RET_TRACE: | ||
| 572 | log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; | ||
| 573 | break; | ||
| 574 | case SECCOMP_RET_LOG: | ||
| 575 | log = seccomp_actions_logged & SECCOMP_LOG_LOG; | ||
| 576 | break; | ||
| 577 | case SECCOMP_RET_KILL_THREAD: | ||
| 578 | log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD; | ||
| 579 | break; | ||
| 580 | case SECCOMP_RET_KILL_PROCESS: | ||
| 581 | default: | ||
| 582 | log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS; | ||
| 583 | } | ||
| 584 | |||
| 585 | /* | ||
| 586 | * Force an audit message to be emitted when the action is RET_KILL_*, | ||
| 587 | * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is | ||
| 588 | * allowed to be logged by the admin. | ||
| 589 | */ | ||
| 590 | if (log) | ||
| 591 | return __audit_seccomp(syscall, signr, action); | ||
| 592 | |||
| 593 | /* | ||
| 594 | * Let the audit subsystem decide if the action should be audited based | ||
| 595 | * on whether the current task itself is being audited. | ||
| 596 | */ | ||
| 597 | return audit_seccomp(syscall, signr, action); | ||
| 598 | } | ||
| 599 | |||
| 517 | /* | 600 | /* |
| 518 | * Secure computing mode 1 allows only read/write/exit/sigreturn. | 601 | * Secure computing mode 1 allows only read/write/exit/sigreturn. |
| 519 | * To be fully secure this must be combined with rlimit | 602 | * To be fully secure this must be combined with rlimit |
| @@ -539,7 +622,7 @@ static void __secure_computing_strict(int this_syscall) | |||
| 539 | #ifdef SECCOMP_DEBUG | 622 | #ifdef SECCOMP_DEBUG |
| 540 | dump_stack(); | 623 | dump_stack(); |
| 541 | #endif | 624 | #endif |
| 542 | audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); | 625 | seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); |
| 543 | do_exit(SIGKILL); | 626 | do_exit(SIGKILL); |
| 544 | } | 627 | } |
| 545 | 628 | ||
| @@ -566,6 +649,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, | |||
| 566 | const bool recheck_after_trace) | 649 | const bool recheck_after_trace) |
| 567 | { | 650 | { |
| 568 | u32 filter_ret, action; | 651 | u32 filter_ret, action; |
| 652 | struct seccomp_filter *match = NULL; | ||
| 569 | int data; | 653 | int data; |
| 570 | 654 | ||
| 571 | /* | 655 | /* |
| @@ -574,9 +658,9 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, | |||
| 574 | */ | 658 | */ |
| 575 | rmb(); | 659 | rmb(); |
| 576 | 660 | ||
| 577 | filter_ret = seccomp_run_filters(sd); | 661 | filter_ret = seccomp_run_filters(sd, &match); |
| 578 | data = filter_ret & SECCOMP_RET_DATA; | 662 | data = filter_ret & SECCOMP_RET_DATA; |
| 579 | action = filter_ret & SECCOMP_RET_ACTION; | 663 | action = filter_ret & SECCOMP_RET_ACTION_FULL; |
| 580 | 664 | ||
| 581 | switch (action) { | 665 | switch (action) { |
| 582 | case SECCOMP_RET_ERRNO: | 666 | case SECCOMP_RET_ERRNO: |
| @@ -637,14 +721,25 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, | |||
| 637 | 721 | ||
| 638 | return 0; | 722 | return 0; |
| 639 | 723 | ||
| 724 | case SECCOMP_RET_LOG: | ||
| 725 | seccomp_log(this_syscall, 0, action, true); | ||
| 726 | return 0; | ||
| 727 | |||
| 640 | case SECCOMP_RET_ALLOW: | 728 | case SECCOMP_RET_ALLOW: |
| 729 | /* | ||
| 730 | * Note that the "match" filter will always be NULL for | ||
| 731 | * this action since SECCOMP_RET_ALLOW is the starting | ||
| 732 | * state in seccomp_run_filters(). | ||
| 733 | */ | ||
| 641 | return 0; | 734 | return 0; |
| 642 | 735 | ||
| 643 | case SECCOMP_RET_KILL: | 736 | case SECCOMP_RET_KILL_THREAD: |
| 737 | case SECCOMP_RET_KILL_PROCESS: | ||
| 644 | default: | 738 | default: |
| 645 | audit_seccomp(this_syscall, SIGSYS, action); | 739 | seccomp_log(this_syscall, SIGSYS, action, true); |
| 646 | /* Dump core only if this is the last remaining thread. */ | 740 | /* Dump core only if this is the last remaining thread. */ |
| 647 | if (get_nr_threads(current) == 1) { | 741 | if (action == SECCOMP_RET_KILL_PROCESS || |
| 742 | get_nr_threads(current) == 1) { | ||
| 648 | siginfo_t info; | 743 | siginfo_t info; |
| 649 | 744 | ||
| 650 | /* Show the original registers in the dump. */ | 745 | /* Show the original registers in the dump. */ |
| @@ -653,13 +748,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, | |||
| 653 | seccomp_init_siginfo(&info, this_syscall, data); | 748 | seccomp_init_siginfo(&info, this_syscall, data); |
| 654 | do_coredump(&info); | 749 | do_coredump(&info); |
| 655 | } | 750 | } |
| 656 | do_exit(SIGSYS); | 751 | if (action == SECCOMP_RET_KILL_PROCESS) |
| 752 | do_group_exit(SIGSYS); | ||
| 753 | else | ||
| 754 | do_exit(SIGSYS); | ||
| 657 | } | 755 | } |
| 658 | 756 | ||
| 659 | unreachable(); | 757 | unreachable(); |
| 660 | 758 | ||
| 661 | skip: | 759 | skip: |
| 662 | audit_seccomp(this_syscall, 0, action); | 760 | seccomp_log(this_syscall, 0, action, match ? match->log : false); |
| 663 | return -1; | 761 | return -1; |
| 664 | } | 762 | } |
| 665 | #else | 763 | #else |
| @@ -794,6 +892,29 @@ static inline long seccomp_set_mode_filter(unsigned int flags, | |||
| 794 | } | 892 | } |
| 795 | #endif | 893 | #endif |
| 796 | 894 | ||
| 895 | static long seccomp_get_action_avail(const char __user *uaction) | ||
| 896 | { | ||
| 897 | u32 action; | ||
| 898 | |||
| 899 | if (copy_from_user(&action, uaction, sizeof(action))) | ||
| 900 | return -EFAULT; | ||
| 901 | |||
| 902 | switch (action) { | ||
| 903 | case SECCOMP_RET_KILL_PROCESS: | ||
| 904 | case SECCOMP_RET_KILL_THREAD: | ||
| 905 | case SECCOMP_RET_TRAP: | ||
| 906 | case SECCOMP_RET_ERRNO: | ||
| 907 | case SECCOMP_RET_TRACE: | ||
| 908 | case SECCOMP_RET_LOG: | ||
| 909 | case SECCOMP_RET_ALLOW: | ||
| 910 | break; | ||
| 911 | default: | ||
| 912 | return -EOPNOTSUPP; | ||
| 913 | } | ||
| 914 | |||
| 915 | return 0; | ||
| 916 | } | ||
| 917 | |||
| 797 | /* Common entry point for both prctl and syscall. */ | 918 | /* Common entry point for both prctl and syscall. */ |
| 798 | static long do_seccomp(unsigned int op, unsigned int flags, | 919 | static long do_seccomp(unsigned int op, unsigned int flags, |
| 799 | const char __user *uargs) | 920 | const char __user *uargs) |
| @@ -805,6 +926,11 @@ static long do_seccomp(unsigned int op, unsigned int flags, | |||
| 805 | return seccomp_set_mode_strict(); | 926 | return seccomp_set_mode_strict(); |
| 806 | case SECCOMP_SET_MODE_FILTER: | 927 | case SECCOMP_SET_MODE_FILTER: |
| 807 | return seccomp_set_mode_filter(flags, uargs); | 928 | return seccomp_set_mode_filter(flags, uargs); |
| 929 | case SECCOMP_GET_ACTION_AVAIL: | ||
| 930 | if (flags != 0) | ||
| 931 | return -EINVAL; | ||
| 932 | |||
| 933 | return seccomp_get_action_avail(uargs); | ||
| 808 | default: | 934 | default: |
| 809 | return -EINVAL; | 935 | return -EINVAL; |
| 810 | } | 936 | } |
| @@ -908,13 +1034,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, | |||
| 908 | if (!data) | 1034 | if (!data) |
| 909 | goto out; | 1035 | goto out; |
| 910 | 1036 | ||
| 911 | get_seccomp_filter(task); | 1037 | __get_seccomp_filter(filter); |
| 912 | spin_unlock_irq(&task->sighand->siglock); | 1038 | spin_unlock_irq(&task->sighand->siglock); |
| 913 | 1039 | ||
| 914 | if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) | 1040 | if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) |
| 915 | ret = -EFAULT; | 1041 | ret = -EFAULT; |
| 916 | 1042 | ||
| 917 | put_seccomp_filter(task); | 1043 | __put_seccomp_filter(filter); |
| 918 | return ret; | 1044 | return ret; |
| 919 | 1045 | ||
| 920 | out: | 1046 | out: |
| @@ -922,3 +1048,185 @@ out: | |||
| 922 | return ret; | 1048 | return ret; |
| 923 | } | 1049 | } |
| 924 | #endif | 1050 | #endif |
| 1051 | |||
| 1052 | #ifdef CONFIG_SYSCTL | ||
| 1053 | |||
| 1054 | /* Human readable action names for friendly sysctl interaction */ | ||
| 1055 | #define SECCOMP_RET_KILL_PROCESS_NAME "kill_process" | ||
| 1056 | #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" | ||
| 1057 | #define SECCOMP_RET_TRAP_NAME "trap" | ||
| 1058 | #define SECCOMP_RET_ERRNO_NAME "errno" | ||
| 1059 | #define SECCOMP_RET_TRACE_NAME "trace" | ||
| 1060 | #define SECCOMP_RET_LOG_NAME "log" | ||
| 1061 | #define SECCOMP_RET_ALLOW_NAME "allow" | ||
| 1062 | |||
| 1063 | static const char seccomp_actions_avail[] = | ||
| 1064 | SECCOMP_RET_KILL_PROCESS_NAME " " | ||
| 1065 | SECCOMP_RET_KILL_THREAD_NAME " " | ||
| 1066 | SECCOMP_RET_TRAP_NAME " " | ||
| 1067 | SECCOMP_RET_ERRNO_NAME " " | ||
| 1068 | SECCOMP_RET_TRACE_NAME " " | ||
| 1069 | SECCOMP_RET_LOG_NAME " " | ||
| 1070 | SECCOMP_RET_ALLOW_NAME; | ||
| 1071 | |||
| 1072 | struct seccomp_log_name { | ||
| 1073 | u32 log; | ||
| 1074 | const char *name; | ||
| 1075 | }; | ||
| 1076 | |||
| 1077 | static const struct seccomp_log_name seccomp_log_names[] = { | ||
| 1078 | { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME }, | ||
| 1079 | { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, | ||
| 1080 | { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, | ||
| 1081 | { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, | ||
| 1082 | { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, | ||
| 1083 | { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME }, | ||
| 1084 | { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, | ||
| 1085 | { } | ||
| 1086 | }; | ||
| 1087 | |||
| 1088 | static bool seccomp_names_from_actions_logged(char *names, size_t size, | ||
| 1089 | u32 actions_logged) | ||
| 1090 | { | ||
| 1091 | const struct seccomp_log_name *cur; | ||
| 1092 | bool append_space = false; | ||
| 1093 | |||
| 1094 | for (cur = seccomp_log_names; cur->name && size; cur++) { | ||
| 1095 | ssize_t ret; | ||
| 1096 | |||
| 1097 | if (!(actions_logged & cur->log)) | ||
| 1098 | continue; | ||
| 1099 | |||
| 1100 | if (append_space) { | ||
| 1101 | ret = strscpy(names, " ", size); | ||
| 1102 | if (ret < 0) | ||
| 1103 | return false; | ||
| 1104 | |||
| 1105 | names += ret; | ||
| 1106 | size -= ret; | ||
| 1107 | } else | ||
| 1108 | append_space = true; | ||
| 1109 | |||
| 1110 | ret = strscpy(names, cur->name, size); | ||
| 1111 | if (ret < 0) | ||
| 1112 | return false; | ||
| 1113 | |||
| 1114 | names += ret; | ||
| 1115 | size -= ret; | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | return true; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | static bool seccomp_action_logged_from_name(u32 *action_logged, | ||
| 1122 | const char *name) | ||
| 1123 | { | ||
| 1124 | const struct seccomp_log_name *cur; | ||
| 1125 | |||
| 1126 | for (cur = seccomp_log_names; cur->name; cur++) { | ||
| 1127 | if (!strcmp(cur->name, name)) { | ||
| 1128 | *action_logged = cur->log; | ||
| 1129 | return true; | ||
| 1130 | } | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | return false; | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names) | ||
| 1137 | { | ||
| 1138 | char *name; | ||
| 1139 | |||
| 1140 | *actions_logged = 0; | ||
| 1141 | while ((name = strsep(&names, " ")) && *name) { | ||
| 1142 | u32 action_logged = 0; | ||
| 1143 | |||
| 1144 | if (!seccomp_action_logged_from_name(&action_logged, name)) | ||
| 1145 | return false; | ||
| 1146 | |||
| 1147 | *actions_logged |= action_logged; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | return true; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, | ||
| 1154 | void __user *buffer, size_t *lenp, | ||
| 1155 | loff_t *ppos) | ||
| 1156 | { | ||
| 1157 | char names[sizeof(seccomp_actions_avail)]; | ||
| 1158 | struct ctl_table table; | ||
| 1159 | int ret; | ||
| 1160 | |||
| 1161 | if (write && !capable(CAP_SYS_ADMIN)) | ||
| 1162 | return -EPERM; | ||
| 1163 | |||
| 1164 | memset(names, 0, sizeof(names)); | ||
| 1165 | |||
| 1166 | if (!write) { | ||
| 1167 | if (!seccomp_names_from_actions_logged(names, sizeof(names), | ||
| 1168 | seccomp_actions_logged)) | ||
| 1169 | return -EINVAL; | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | table = *ro_table; | ||
| 1173 | table.data = names; | ||
| 1174 | table.maxlen = sizeof(names); | ||
| 1175 | ret = proc_dostring(&table, write, buffer, lenp, ppos); | ||
| 1176 | if (ret) | ||
| 1177 | return ret; | ||
| 1178 | |||
| 1179 | if (write) { | ||
| 1180 | u32 actions_logged; | ||
| 1181 | |||
| 1182 | if (!seccomp_actions_logged_from_names(&actions_logged, | ||
| 1183 | table.data)) | ||
| 1184 | return -EINVAL; | ||
| 1185 | |||
| 1186 | if (actions_logged & SECCOMP_LOG_ALLOW) | ||
| 1187 | return -EINVAL; | ||
| 1188 | |||
| 1189 | seccomp_actions_logged = actions_logged; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | return 0; | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | static struct ctl_path seccomp_sysctl_path[] = { | ||
| 1196 | { .procname = "kernel", }, | ||
| 1197 | { .procname = "seccomp", }, | ||
| 1198 | { } | ||
| 1199 | }; | ||
| 1200 | |||
| 1201 | static struct ctl_table seccomp_sysctl_table[] = { | ||
| 1202 | { | ||
| 1203 | .procname = "actions_avail", | ||
| 1204 | .data = (void *) &seccomp_actions_avail, | ||
| 1205 | .maxlen = sizeof(seccomp_actions_avail), | ||
| 1206 | .mode = 0444, | ||
| 1207 | .proc_handler = proc_dostring, | ||
| 1208 | }, | ||
| 1209 | { | ||
| 1210 | .procname = "actions_logged", | ||
| 1211 | .mode = 0644, | ||
| 1212 | .proc_handler = seccomp_actions_logged_handler, | ||
| 1213 | }, | ||
| 1214 | { } | ||
| 1215 | }; | ||
| 1216 | |||
| 1217 | static int __init seccomp_sysctl_init(void) | ||
| 1218 | { | ||
| 1219 | struct ctl_table_header *hdr; | ||
| 1220 | |||
| 1221 | hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table); | ||
| 1222 | if (!hdr) | ||
| 1223 | pr_warn("seccomp: sysctl registration failed\n"); | ||
| 1224 | else | ||
| 1225 | kmemleak_not_leak(hdr); | ||
| 1226 | |||
| 1227 | return 0; | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | device_initcall(seccomp_sysctl_init) | ||
| 1231 | |||
| 1232 | #endif /* CONFIG_SYSCTL */ | ||
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 1d71c051a951..5043e7433f4b 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
| @@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); | |||
| 344 | * by the client, but only by calling this function. | 344 | * by the client, but only by calling this function. |
| 345 | * This function can only be called on a registered smp_hotplug_thread. | 345 | * This function can only be called on a registered smp_hotplug_thread. |
| 346 | */ | 346 | */ |
| 347 | int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, | 347 | void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, |
| 348 | const struct cpumask *new) | 348 | const struct cpumask *new) |
| 349 | { | 349 | { |
| 350 | struct cpumask *old = plug_thread->cpumask; | 350 | struct cpumask *old = plug_thread->cpumask; |
| 351 | cpumask_var_t tmp; | 351 | static struct cpumask tmp; |
| 352 | unsigned int cpu; | 352 | unsigned int cpu; |
| 353 | 353 | ||
| 354 | if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) | 354 | lockdep_assert_cpus_held(); |
| 355 | return -ENOMEM; | ||
| 356 | |||
| 357 | get_online_cpus(); | ||
| 358 | mutex_lock(&smpboot_threads_lock); | 355 | mutex_lock(&smpboot_threads_lock); |
| 359 | 356 | ||
| 360 | /* Park threads that were exclusively enabled on the old mask. */ | 357 | /* Park threads that were exclusively enabled on the old mask. */ |
| 361 | cpumask_andnot(tmp, old, new); | 358 | cpumask_andnot(&tmp, old, new); |
| 362 | for_each_cpu_and(cpu, tmp, cpu_online_mask) | 359 | for_each_cpu_and(cpu, &tmp, cpu_online_mask) |
| 363 | smpboot_park_thread(plug_thread, cpu); | 360 | smpboot_park_thread(plug_thread, cpu); |
| 364 | 361 | ||
| 365 | /* Unpark threads that are exclusively enabled on the new mask. */ | 362 | /* Unpark threads that are exclusively enabled on the new mask. */ |
| 366 | cpumask_andnot(tmp, new, old); | 363 | cpumask_andnot(&tmp, new, old); |
| 367 | for_each_cpu_and(cpu, tmp, cpu_online_mask) | 364 | for_each_cpu_and(cpu, &tmp, cpu_online_mask) |
| 368 | smpboot_unpark_thread(plug_thread, cpu); | 365 | smpboot_unpark_thread(plug_thread, cpu); |
| 369 | 366 | ||
| 370 | cpumask_copy(old, new); | 367 | cpumask_copy(old, new); |
| 371 | 368 | ||
| 372 | mutex_unlock(&smpboot_threads_lock); | 369 | mutex_unlock(&smpboot_threads_lock); |
| 373 | put_online_cpus(); | ||
| 374 | |||
| 375 | free_cpumask_var(tmp); | ||
| 376 | |||
| 377 | return 0; | ||
| 378 | } | 370 | } |
| 379 | EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread); | ||
| 380 | 371 | ||
| 381 | static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); | 372 | static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); |
| 382 | 373 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6648fbbb8157..d9c31bc2eaea 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -367,7 +367,8 @@ static struct ctl_table kern_table[] = { | |||
| 367 | .data = &sysctl_sched_time_avg, | 367 | .data = &sysctl_sched_time_avg, |
| 368 | .maxlen = sizeof(unsigned int), | 368 | .maxlen = sizeof(unsigned int), |
| 369 | .mode = 0644, | 369 | .mode = 0644, |
| 370 | .proc_handler = proc_dointvec, | 370 | .proc_handler = proc_dointvec_minmax, |
| 371 | .extra1 = &one, | ||
| 371 | }, | 372 | }, |
| 372 | #ifdef CONFIG_SCHEDSTATS | 373 | #ifdef CONFIG_SCHEDSTATS |
| 373 | { | 374 | { |
| @@ -871,9 +872,9 @@ static struct ctl_table kern_table[] = { | |||
| 871 | #if defined(CONFIG_LOCKUP_DETECTOR) | 872 | #if defined(CONFIG_LOCKUP_DETECTOR) |
| 872 | { | 873 | { |
| 873 | .procname = "watchdog", | 874 | .procname = "watchdog", |
| 874 | .data = &watchdog_user_enabled, | 875 | .data = &watchdog_user_enabled, |
| 875 | .maxlen = sizeof (int), | 876 | .maxlen = sizeof(int), |
| 876 | .mode = 0644, | 877 | .mode = 0644, |
| 877 | .proc_handler = proc_watchdog, | 878 | .proc_handler = proc_watchdog, |
| 878 | .extra1 = &zero, | 879 | .extra1 = &zero, |
| 879 | .extra2 = &one, | 880 | .extra2 = &one, |
| @@ -889,16 +890,12 @@ static struct ctl_table kern_table[] = { | |||
| 889 | }, | 890 | }, |
| 890 | { | 891 | { |
| 891 | .procname = "nmi_watchdog", | 892 | .procname = "nmi_watchdog", |
| 892 | .data = &nmi_watchdog_enabled, | 893 | .data = &nmi_watchdog_user_enabled, |
| 893 | .maxlen = sizeof (int), | 894 | .maxlen = sizeof(int), |
| 894 | .mode = 0644, | 895 | .mode = NMI_WATCHDOG_SYSCTL_PERM, |
| 895 | .proc_handler = proc_nmi_watchdog, | 896 | .proc_handler = proc_nmi_watchdog, |
| 896 | .extra1 = &zero, | 897 | .extra1 = &zero, |
| 897 | #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) | ||
| 898 | .extra2 = &one, | 898 | .extra2 = &one, |
| 899 | #else | ||
| 900 | .extra2 = &zero, | ||
| 901 | #endif | ||
| 902 | }, | 899 | }, |
| 903 | { | 900 | { |
| 904 | .procname = "watchdog_cpumask", | 901 | .procname = "watchdog_cpumask", |
| @@ -910,9 +907,9 @@ static struct ctl_table kern_table[] = { | |||
| 910 | #ifdef CONFIG_SOFTLOCKUP_DETECTOR | 907 | #ifdef CONFIG_SOFTLOCKUP_DETECTOR |
| 911 | { | 908 | { |
| 912 | .procname = "soft_watchdog", | 909 | .procname = "soft_watchdog", |
| 913 | .data = &soft_watchdog_enabled, | 910 | .data = &soft_watchdog_user_enabled, |
| 914 | .maxlen = sizeof (int), | 911 | .maxlen = sizeof(int), |
| 915 | .mode = 0644, | 912 | .mode = 0644, |
| 916 | .proc_handler = proc_soft_watchdog, | 913 | .proc_handler = proc_soft_watchdog, |
| 917 | .extra1 = &zero, | 914 | .extra1 = &zero, |
| 918 | .extra2 = &one, | 915 | .extra2 = &one, |
| @@ -2187,8 +2184,6 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, | |||
| 2187 | if (write) { | 2184 | if (write) { |
| 2188 | if (*lvalp > UINT_MAX) | 2185 | if (*lvalp > UINT_MAX) |
| 2189 | return -EINVAL; | 2186 | return -EINVAL; |
| 2190 | if (*lvalp > UINT_MAX) | ||
| 2191 | return -EINVAL; | ||
| 2192 | *valp = *lvalp; | 2187 | *valp = *lvalp; |
| 2193 | } else { | 2188 | } else { |
| 2194 | unsigned int val = *valp; | 2189 | unsigned int val = *valp; |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2a685b45b73b..45a3928544ce 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start) | |||
| 648 | } | 648 | } |
| 649 | EXPORT_SYMBOL_GPL(blk_trace_startstop); | 649 | EXPORT_SYMBOL_GPL(blk_trace_startstop); |
| 650 | 650 | ||
| 651 | /* | ||
| 652 | * When reading or writing the blktrace sysfs files, the references to the | ||
| 653 | * opened sysfs or device files should prevent the underlying block device | ||
| 654 | * from being removed. So no further delete protection is really needed. | ||
| 655 | */ | ||
| 656 | |||
| 651 | /** | 657 | /** |
| 652 | * blk_trace_ioctl: - handle the ioctls associated with tracing | 658 | * blk_trace_ioctl: - handle the ioctls associated with tracing |
| 653 | * @bdev: the block device | 659 | * @bdev: the block device |
| @@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
| 665 | if (!q) | 671 | if (!q) |
| 666 | return -ENXIO; | 672 | return -ENXIO; |
| 667 | 673 | ||
| 668 | mutex_lock(&bdev->bd_mutex); | 674 | mutex_lock(&q->blk_trace_mutex); |
| 669 | 675 | ||
| 670 | switch (cmd) { | 676 | switch (cmd) { |
| 671 | case BLKTRACESETUP: | 677 | case BLKTRACESETUP: |
| @@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
| 691 | break; | 697 | break; |
| 692 | } | 698 | } |
| 693 | 699 | ||
| 694 | mutex_unlock(&bdev->bd_mutex); | 700 | mutex_unlock(&q->blk_trace_mutex); |
| 695 | return ret; | 701 | return ret; |
| 696 | } | 702 | } |
| 697 | 703 | ||
| @@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
| 1727 | if (q == NULL) | 1733 | if (q == NULL) |
| 1728 | goto out_bdput; | 1734 | goto out_bdput; |
| 1729 | 1735 | ||
| 1730 | mutex_lock(&bdev->bd_mutex); | 1736 | mutex_lock(&q->blk_trace_mutex); |
| 1731 | 1737 | ||
| 1732 | if (attr == &dev_attr_enable) { | 1738 | if (attr == &dev_attr_enable) { |
| 1733 | ret = sprintf(buf, "%u\n", !!q->blk_trace); | 1739 | ret = sprintf(buf, "%u\n", !!q->blk_trace); |
| @@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
| 1746 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); | 1752 | ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); |
| 1747 | 1753 | ||
| 1748 | out_unlock_bdev: | 1754 | out_unlock_bdev: |
| 1749 | mutex_unlock(&bdev->bd_mutex); | 1755 | mutex_unlock(&q->blk_trace_mutex); |
| 1750 | out_bdput: | 1756 | out_bdput: |
| 1751 | bdput(bdev); | 1757 | bdput(bdev); |
| 1752 | out: | 1758 | out: |
| @@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1788 | if (q == NULL) | 1794 | if (q == NULL) |
| 1789 | goto out_bdput; | 1795 | goto out_bdput; |
| 1790 | 1796 | ||
| 1791 | mutex_lock(&bdev->bd_mutex); | 1797 | mutex_lock(&q->blk_trace_mutex); |
| 1792 | 1798 | ||
| 1793 | if (attr == &dev_attr_enable) { | 1799 | if (attr == &dev_attr_enable) { |
| 1794 | if (value) | 1800 | if (value) |
| @@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
| 1814 | } | 1820 | } |
| 1815 | 1821 | ||
| 1816 | out_unlock_bdev: | 1822 | out_unlock_bdev: |
| 1817 | mutex_unlock(&bdev->bd_mutex); | 1823 | mutex_unlock(&q->blk_trace_mutex); |
| 1818 | out_bdput: | 1824 | out_bdput: |
| 1819 | bdput(bdev); | 1825 | bdput(bdev); |
| 1820 | out: | 1826 | out: |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6abfafd7f173..8319e09e15b9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -4954,9 +4954,6 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; | |||
| 4954 | static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; | 4954 | static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; |
| 4955 | static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer); | 4955 | static int ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer); |
| 4956 | 4956 | ||
| 4957 | static unsigned long save_global_trampoline; | ||
| 4958 | static unsigned long save_global_flags; | ||
| 4959 | |||
| 4960 | static int __init set_graph_function(char *str) | 4957 | static int __init set_graph_function(char *str) |
| 4961 | { | 4958 | { |
| 4962 | strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); | 4959 | strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); |
| @@ -6808,17 +6805,6 @@ void unregister_ftrace_graph(void) | |||
| 6808 | unregister_pm_notifier(&ftrace_suspend_notifier); | 6805 | unregister_pm_notifier(&ftrace_suspend_notifier); |
| 6809 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); | 6806 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); |
| 6810 | 6807 | ||
| 6811 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
| 6812 | /* | ||
| 6813 | * Function graph does not allocate the trampoline, but | ||
| 6814 | * other global_ops do. We need to reset the ALLOC_TRAMP flag | ||
| 6815 | * if one was used. | ||
| 6816 | */ | ||
| 6817 | global_ops.trampoline = save_global_trampoline; | ||
| 6818 | if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP) | ||
| 6819 | global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP; | ||
| 6820 | #endif | ||
| 6821 | |||
| 6822 | out: | 6808 | out: |
| 6823 | mutex_unlock(&ftrace_lock); | 6809 | mutex_unlock(&ftrace_lock); |
| 6824 | } | 6810 | } |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5360b7aec57a..752e5daf0896 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file) | |||
| 4020 | /* If this file was open for write, then erase contents */ | 4020 | /* If this file was open for write, then erase contents */ |
| 4021 | if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { | 4021 | if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { |
| 4022 | int cpu = tracing_get_cpu(inode); | 4022 | int cpu = tracing_get_cpu(inode); |
| 4023 | struct trace_buffer *trace_buf = &tr->trace_buffer; | ||
| 4024 | |||
| 4025 | #ifdef CONFIG_TRACER_MAX_TRACE | ||
| 4026 | if (tr->current_trace->print_max) | ||
| 4027 | trace_buf = &tr->max_buffer; | ||
| 4028 | #endif | ||
| 4023 | 4029 | ||
| 4024 | if (cpu == RING_BUFFER_ALL_CPUS) | 4030 | if (cpu == RING_BUFFER_ALL_CPUS) |
| 4025 | tracing_reset_online_cpus(&tr->trace_buffer); | 4031 | tracing_reset_online_cpus(trace_buf); |
| 4026 | else | 4032 | else |
| 4027 | tracing_reset(&tr->trace_buffer, cpu); | 4033 | tracing_reset(trace_buf, cpu); |
| 4028 | } | 4034 | } |
| 4029 | 4035 | ||
| 4030 | if (file->f_mode & FMODE_READ) { | 4036 | if (file->f_mode & FMODE_READ) { |
| @@ -5358,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) | |||
| 5358 | if (t == tr->current_trace) | 5364 | if (t == tr->current_trace) |
| 5359 | goto out; | 5365 | goto out; |
| 5360 | 5366 | ||
| 5367 | /* Some tracers won't work on kernel command line */ | ||
| 5368 | if (system_state < SYSTEM_RUNNING && t->noboot) { | ||
| 5369 | pr_warn("Tracer '%s' is not allowed on command line, ignored\n", | ||
| 5370 | t->name); | ||
| 5371 | goto out; | ||
| 5372 | } | ||
| 5373 | |||
| 5361 | /* Some tracers are only allowed for the top level buffer */ | 5374 | /* Some tracers are only allowed for the top level buffer */ |
| 5362 | if (!trace_ok_for_array(t, tr)) { | 5375 | if (!trace_ok_for_array(t, tr)) { |
| 5363 | ret = -EINVAL; | 5376 | ret = -EINVAL; |
| @@ -5667,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp) | |||
| 5667 | * | 5680 | * |
| 5668 | * iter->pos will be 0 if we haven't read anything. | 5681 | * iter->pos will be 0 if we haven't read anything. |
| 5669 | */ | 5682 | */ |
| 5670 | if (!tracing_is_on() && iter->pos) | 5683 | if (!tracer_tracing_is_on(iter->tr) && iter->pos) |
| 5671 | break; | 5684 | break; |
| 5672 | 5685 | ||
| 5673 | mutex_unlock(&iter->mutex); | 5686 | mutex_unlock(&iter->mutex); |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fb5d54d0d1b3..652c682707cd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -444,6 +444,8 @@ struct tracer { | |||
| 444 | #ifdef CONFIG_TRACER_MAX_TRACE | 444 | #ifdef CONFIG_TRACER_MAX_TRACE |
| 445 | bool use_max_tr; | 445 | bool use_max_tr; |
| 446 | #endif | 446 | #endif |
| 447 | /* True if tracer cannot be enabled in kernel param */ | ||
| 448 | bool noboot; | ||
| 447 | }; | 449 | }; |
| 448 | 450 | ||
| 449 | 451 | ||
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index cd7480d0a201..dca78fc48439 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
| @@ -282,6 +282,7 @@ static struct tracer mmio_tracer __read_mostly = | |||
| 282 | .close = mmio_close, | 282 | .close = mmio_close, |
| 283 | .read = mmio_read, | 283 | .read = mmio_read, |
| 284 | .print_line = mmio_print_line, | 284 | .print_line = mmio_print_line, |
| 285 | .noboot = true, | ||
| 285 | }; | 286 | }; |
| 286 | 287 | ||
| 287 | __init static int init_mmio_trace(void) | 288 | __init static int init_mmio_trace(void) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index bac629af2285..c738e764e2a5 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -656,15 +656,6 @@ int trace_print_lat_context(struct trace_iterator *iter) | |||
| 656 | return !trace_seq_has_overflowed(s); | 656 | return !trace_seq_has_overflowed(s); |
| 657 | } | 657 | } |
| 658 | 658 | ||
| 659 | static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; | ||
| 660 | |||
| 661 | static int task_state_char(unsigned long state) | ||
| 662 | { | ||
| 663 | int bit = state ? __ffs(state) + 1 : 0; | ||
| 664 | |||
| 665 | return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; | ||
| 666 | } | ||
| 667 | |||
| 668 | /** | 659 | /** |
| 669 | * ftrace_find_event - find a registered event | 660 | * ftrace_find_event - find a registered event |
| 670 | * @type: the type of event to look for | 661 | * @type: the type of event to look for |
| @@ -930,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, | |||
| 930 | 921 | ||
| 931 | trace_assign_type(field, iter->ent); | 922 | trace_assign_type(field, iter->ent); |
| 932 | 923 | ||
| 933 | T = task_state_char(field->next_state); | 924 | T = __task_state_to_char(field->next_state); |
| 934 | S = task_state_char(field->prev_state); | 925 | S = __task_state_to_char(field->prev_state); |
| 935 | trace_find_cmdline(field->next_pid, comm); | 926 | trace_find_cmdline(field->next_pid, comm); |
| 936 | trace_seq_printf(&iter->seq, | 927 | trace_seq_printf(&iter->seq, |
| 937 | " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", | 928 | " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", |
| @@ -966,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) | |||
| 966 | trace_assign_type(field, iter->ent); | 957 | trace_assign_type(field, iter->ent); |
| 967 | 958 | ||
| 968 | if (!S) | 959 | if (!S) |
| 969 | S = task_state_char(field->prev_state); | 960 | S = __task_state_to_char(field->prev_state); |
| 970 | T = task_state_char(field->next_state); | 961 | T = __task_state_to_char(field->next_state); |
| 971 | trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", | 962 | trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", |
| 972 | field->prev_pid, | 963 | field->prev_pid, |
| 973 | field->prev_prio, | 964 | field->prev_prio, |
| @@ -1002,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) | |||
| 1002 | trace_assign_type(field, iter->ent); | 993 | trace_assign_type(field, iter->ent); |
| 1003 | 994 | ||
| 1004 | if (!S) | 995 | if (!S) |
| 1005 | S = task_state_char(field->prev_state); | 996 | S = __task_state_to_char(field->prev_state); |
| 1006 | T = task_state_char(field->next_state); | 997 | T = __task_state_to_char(field->next_state); |
| 1007 | 998 | ||
| 1008 | SEQ_PUT_HEX_FIELD(s, field->prev_pid); | 999 | SEQ_PUT_HEX_FIELD(s, field->prev_pid); |
| 1009 | SEQ_PUT_HEX_FIELD(s, field->prev_prio); | 1000 | SEQ_PUT_HEX_FIELD(s, field->prev_prio); |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ddec53b67646..0c331978b1a6 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
| @@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr, | |||
| 397 | entry = ring_buffer_event_data(event); | 397 | entry = ring_buffer_event_data(event); |
| 398 | entry->prev_pid = prev->pid; | 398 | entry->prev_pid = prev->pid; |
| 399 | entry->prev_prio = prev->prio; | 399 | entry->prev_prio = prev->prio; |
| 400 | entry->prev_state = prev->state; | 400 | entry->prev_state = __get_task_state(prev); |
| 401 | entry->next_pid = next->pid; | 401 | entry->next_pid = next->pid; |
| 402 | entry->next_prio = next->prio; | 402 | entry->next_prio = next->prio; |
| 403 | entry->next_state = next->state; | 403 | entry->next_state = __get_task_state(next); |
| 404 | entry->next_cpu = task_cpu(next); | 404 | entry->next_cpu = task_cpu(next); |
| 405 | 405 | ||
| 406 | if (!call_filter_check_discard(call, entry, buffer, event)) | 406 | if (!call_filter_check_discard(call, entry, buffer, event)) |
| @@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr, | |||
| 425 | entry = ring_buffer_event_data(event); | 425 | entry = ring_buffer_event_data(event); |
| 426 | entry->prev_pid = curr->pid; | 426 | entry->prev_pid = curr->pid; |
| 427 | entry->prev_prio = curr->prio; | 427 | entry->prev_prio = curr->prio; |
| 428 | entry->prev_state = curr->state; | 428 | entry->prev_state = __get_task_state(curr); |
| 429 | entry->next_pid = wakee->pid; | 429 | entry->next_pid = wakee->pid; |
| 430 | entry->next_prio = wakee->prio; | 430 | entry->next_prio = wakee->prio; |
| 431 | entry->next_state = wakee->state; | 431 | entry->next_state = __get_task_state(wakee); |
| 432 | entry->next_cpu = task_cpu(wakee); | 432 | entry->next_cpu = task_cpu(wakee); |
| 433 | 433 | ||
| 434 | if (!call_filter_check_discard(call, entry, buffer, event)) | 434 | if (!call_filter_check_discard(call, entry, buffer, event)) |
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index a4df67cbc711..49cb41412eec 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
| @@ -96,23 +96,9 @@ check_stack(unsigned long ip, unsigned long *stack) | |||
| 96 | if (in_nmi()) | 96 | if (in_nmi()) |
| 97 | return; | 97 | return; |
| 98 | 98 | ||
| 99 | /* | ||
| 100 | * There's a slight chance that we are tracing inside the | ||
| 101 | * RCU infrastructure, and rcu_irq_enter() will not work | ||
| 102 | * as expected. | ||
| 103 | */ | ||
| 104 | if (unlikely(rcu_irq_enter_disabled())) | ||
| 105 | return; | ||
| 106 | |||
| 107 | local_irq_save(flags); | 99 | local_irq_save(flags); |
| 108 | arch_spin_lock(&stack_trace_max_lock); | 100 | arch_spin_lock(&stack_trace_max_lock); |
| 109 | 101 | ||
| 110 | /* | ||
| 111 | * RCU may not be watching, make it see us. | ||
| 112 | * The stack trace code uses rcu_sched. | ||
| 113 | */ | ||
| 114 | rcu_irq_enter(); | ||
| 115 | |||
| 116 | /* In case another CPU set the tracer_frame on us */ | 102 | /* In case another CPU set the tracer_frame on us */ |
| 117 | if (unlikely(!frame_size)) | 103 | if (unlikely(!frame_size)) |
| 118 | this_size -= tracer_frame; | 104 | this_size -= tracer_frame; |
| @@ -205,7 +191,6 @@ check_stack(unsigned long ip, unsigned long *stack) | |||
| 205 | } | 191 | } |
| 206 | 192 | ||
| 207 | out: | 193 | out: |
| 208 | rcu_irq_exit(); | ||
| 209 | arch_spin_unlock(&stack_trace_max_lock); | 194 | arch_spin_unlock(&stack_trace_max_lock); |
| 210 | local_irq_restore(flags); | 195 | local_irq_restore(flags); |
| 211 | } | 196 | } |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f5d52024f6b7..6bcb854909c0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -29,20 +29,29 @@ | |||
| 29 | #include <linux/kvm_para.h> | 29 | #include <linux/kvm_para.h> |
| 30 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
| 31 | 31 | ||
| 32 | /* Watchdog configuration */ | 32 | static DEFINE_MUTEX(watchdog_mutex); |
| 33 | static DEFINE_MUTEX(watchdog_proc_mutex); | ||
| 34 | |||
| 35 | int __read_mostly nmi_watchdog_enabled; | ||
| 36 | 33 | ||
| 37 | #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) | 34 | #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) |
| 38 | unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | | 35 | # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED) |
| 39 | NMI_WATCHDOG_ENABLED; | 36 | # define NMI_WATCHDOG_DEFAULT 1 |
| 40 | #else | 37 | #else |
| 41 | unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; | 38 | # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED) |
| 39 | # define NMI_WATCHDOG_DEFAULT 0 | ||
| 42 | #endif | 40 | #endif |
| 43 | 41 | ||
| 42 | unsigned long __read_mostly watchdog_enabled; | ||
| 43 | int __read_mostly watchdog_user_enabled = 1; | ||
| 44 | int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; | ||
| 45 | int __read_mostly soft_watchdog_user_enabled = 1; | ||
| 46 | int __read_mostly watchdog_thresh = 10; | ||
| 47 | int __read_mostly nmi_watchdog_available; | ||
| 48 | |||
| 49 | struct cpumask watchdog_allowed_mask __read_mostly; | ||
| 50 | |||
| 51 | struct cpumask watchdog_cpumask __read_mostly; | ||
| 52 | unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); | ||
| 53 | |||
| 44 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 54 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
| 45 | /* boot commands */ | ||
| 46 | /* | 55 | /* |
| 47 | * Should we panic when a soft-lockup or hard-lockup occurs: | 56 | * Should we panic when a soft-lockup or hard-lockup occurs: |
| 48 | */ | 57 | */ |
| @@ -56,9 +65,9 @@ unsigned int __read_mostly hardlockup_panic = | |||
| 56 | * kernel command line parameters are parsed, because otherwise it is not | 65 | * kernel command line parameters are parsed, because otherwise it is not |
| 57 | * possible to override this in hardlockup_panic_setup(). | 66 | * possible to override this in hardlockup_panic_setup(). |
| 58 | */ | 67 | */ |
| 59 | void hardlockup_detector_disable(void) | 68 | void __init hardlockup_detector_disable(void) |
| 60 | { | 69 | { |
| 61 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; | 70 | nmi_watchdog_user_enabled = 0; |
| 62 | } | 71 | } |
| 63 | 72 | ||
| 64 | static int __init hardlockup_panic_setup(char *str) | 73 | static int __init hardlockup_panic_setup(char *str) |
| @@ -68,48 +77,24 @@ static int __init hardlockup_panic_setup(char *str) | |||
| 68 | else if (!strncmp(str, "nopanic", 7)) | 77 | else if (!strncmp(str, "nopanic", 7)) |
| 69 | hardlockup_panic = 0; | 78 | hardlockup_panic = 0; |
| 70 | else if (!strncmp(str, "0", 1)) | 79 | else if (!strncmp(str, "0", 1)) |
| 71 | watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; | 80 | nmi_watchdog_user_enabled = 0; |
| 72 | else if (!strncmp(str, "1", 1)) | 81 | else if (!strncmp(str, "1", 1)) |
| 73 | watchdog_enabled |= NMI_WATCHDOG_ENABLED; | 82 | nmi_watchdog_user_enabled = 1; |
| 74 | return 1; | 83 | return 1; |
| 75 | } | 84 | } |
| 76 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 85 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
| 77 | 86 | ||
| 78 | #endif | 87 | # ifdef CONFIG_SMP |
| 79 | |||
| 80 | #ifdef CONFIG_SOFTLOCKUP_DETECTOR | ||
| 81 | int __read_mostly soft_watchdog_enabled; | ||
| 82 | #endif | ||
| 83 | |||
| 84 | int __read_mostly watchdog_user_enabled; | ||
| 85 | int __read_mostly watchdog_thresh = 10; | ||
| 86 | |||
| 87 | #ifdef CONFIG_SMP | ||
| 88 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; | ||
| 89 | int __read_mostly sysctl_hardlockup_all_cpu_backtrace; | 88 | int __read_mostly sysctl_hardlockup_all_cpu_backtrace; |
| 90 | #endif | ||
| 91 | struct cpumask watchdog_cpumask __read_mostly; | ||
| 92 | unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); | ||
| 93 | 89 | ||
| 94 | /* | 90 | static int __init hardlockup_all_cpu_backtrace_setup(char *str) |
| 95 | * The 'watchdog_running' variable is set to 1 when the watchdog threads | 91 | { |
| 96 | * are registered/started and is set to 0 when the watchdog threads are | 92 | sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); |
| 97 | * unregistered/stopped, so it is an indicator whether the threads exist. | 93 | return 1; |
| 98 | */ | 94 | } |
| 99 | static int __read_mostly watchdog_running; | 95 | __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); |
| 100 | /* | 96 | # endif /* CONFIG_SMP */ |
| 101 | * If a subsystem has a need to deactivate the watchdog temporarily, it | 97 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
| 102 | * can use the suspend/resume interface to achieve this. The content of | ||
| 103 | * the 'watchdog_suspended' variable reflects this state. Existing threads | ||
| 104 | * are parked/unparked by the lockup_detector_{suspend|resume} functions | ||
| 105 | * (see comment blocks pertaining to those functions for further details). | ||
| 106 | * | ||
| 107 | * 'watchdog_suspended' also prevents threads from being registered/started | ||
| 108 | * or unregistered/stopped via parameters in /proc/sys/kernel, so the state | ||
| 109 | * of 'watchdog_running' cannot change while the watchdog is deactivated | ||
| 110 | * temporarily (see related code in 'proc' handlers). | ||
| 111 | */ | ||
| 112 | int __read_mostly watchdog_suspended; | ||
| 113 | 98 | ||
| 114 | /* | 99 | /* |
| 115 | * These functions can be overridden if an architecture implements its | 100 | * These functions can be overridden if an architecture implements its |
| @@ -121,36 +106,68 @@ int __read_mostly watchdog_suspended; | |||
| 121 | */ | 106 | */ |
| 122 | int __weak watchdog_nmi_enable(unsigned int cpu) | 107 | int __weak watchdog_nmi_enable(unsigned int cpu) |
| 123 | { | 108 | { |
| 109 | hardlockup_detector_perf_enable(); | ||
| 124 | return 0; | 110 | return 0; |
| 125 | } | 111 | } |
| 112 | |||
| 126 | void __weak watchdog_nmi_disable(unsigned int cpu) | 113 | void __weak watchdog_nmi_disable(unsigned int cpu) |
| 127 | { | 114 | { |
| 115 | hardlockup_detector_perf_disable(); | ||
| 128 | } | 116 | } |
| 129 | 117 | ||
| 130 | /* | 118 | /* Return 0, if a NMI watchdog is available. Error code otherwise */ |
| 131 | * watchdog_nmi_reconfigure can be implemented to be notified after any | 119 | int __weak __init watchdog_nmi_probe(void) |
| 132 | * watchdog configuration change. The arch hardlockup watchdog should | 120 | { |
| 133 | * respond to the following variables: | 121 | return hardlockup_detector_perf_init(); |
| 134 | * - nmi_watchdog_enabled | 122 | } |
| 123 | |||
| 124 | /** | ||
| 125 | * watchdog_nmi_stop - Stop the watchdog for reconfiguration | ||
| 126 | * | ||
| 127 | * The reconfiguration steps are: | ||
| 128 | * watchdog_nmi_stop(); | ||
| 129 | * update_variables(); | ||
| 130 | * watchdog_nmi_start(); | ||
| 131 | */ | ||
| 132 | void __weak watchdog_nmi_stop(void) { } | ||
| 133 | |||
| 134 | /** | ||
| 135 | * watchdog_nmi_start - Start the watchdog after reconfiguration | ||
| 136 | * | ||
| 137 | * Counterpart to watchdog_nmi_stop(). | ||
| 138 | * | ||
| 139 | * The following variables have been updated in update_variables() and | ||
| 140 | * contain the currently valid configuration: | ||
| 141 | * - watchdog_enabled | ||
| 135 | * - watchdog_thresh | 142 | * - watchdog_thresh |
| 136 | * - watchdog_cpumask | 143 | * - watchdog_cpumask |
| 137 | * - sysctl_hardlockup_all_cpu_backtrace | ||
| 138 | * - hardlockup_panic | ||
| 139 | * - watchdog_suspended | ||
| 140 | */ | 144 | */ |
| 141 | void __weak watchdog_nmi_reconfigure(void) | 145 | void __weak watchdog_nmi_start(void) { } |
| 146 | |||
| 147 | /** | ||
| 148 | * lockup_detector_update_enable - Update the sysctl enable bit | ||
| 149 | * | ||
| 150 | * Caller needs to make sure that the NMI/perf watchdogs are off, so this | ||
| 151 | * can't race with watchdog_nmi_disable(). | ||
| 152 | */ | ||
| 153 | static void lockup_detector_update_enable(void) | ||
| 142 | { | 154 | { |
| 155 | watchdog_enabled = 0; | ||
| 156 | if (!watchdog_user_enabled) | ||
| 157 | return; | ||
| 158 | if (nmi_watchdog_available && nmi_watchdog_user_enabled) | ||
| 159 | watchdog_enabled |= NMI_WATCHDOG_ENABLED; | ||
| 160 | if (soft_watchdog_user_enabled) | ||
| 161 | watchdog_enabled |= SOFT_WATCHDOG_ENABLED; | ||
| 143 | } | 162 | } |
| 144 | 163 | ||
| 145 | |||
| 146 | #ifdef CONFIG_SOFTLOCKUP_DETECTOR | 164 | #ifdef CONFIG_SOFTLOCKUP_DETECTOR |
| 147 | 165 | ||
| 148 | /* Helper for online, unparked cpus. */ | 166 | /* Global variables, exported for sysctl */ |
| 149 | #define for_each_watchdog_cpu(cpu) \ | 167 | unsigned int __read_mostly softlockup_panic = |
| 150 | for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) | 168 | CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; |
| 151 | |||
| 152 | atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); | ||
| 153 | 169 | ||
| 170 | static bool softlockup_threads_initialized __read_mostly; | ||
| 154 | static u64 __read_mostly sample_period; | 171 | static u64 __read_mostly sample_period; |
| 155 | 172 | ||
| 156 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 173 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
| @@ -164,50 +181,40 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); | |||
| 164 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | 181 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); |
| 165 | static unsigned long soft_lockup_nmi_warn; | 182 | static unsigned long soft_lockup_nmi_warn; |
| 166 | 183 | ||
| 167 | unsigned int __read_mostly softlockup_panic = | ||
| 168 | CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; | ||
| 169 | |||
| 170 | static int __init softlockup_panic_setup(char *str) | 184 | static int __init softlockup_panic_setup(char *str) |
| 171 | { | 185 | { |
| 172 | softlockup_panic = simple_strtoul(str, NULL, 0); | 186 | softlockup_panic = simple_strtoul(str, NULL, 0); |
| 173 | |||
| 174 | return 1; | 187 | return 1; |
| 175 | } | 188 | } |
| 176 | __setup("softlockup_panic=", softlockup_panic_setup); | 189 | __setup("softlockup_panic=", softlockup_panic_setup); |
| 177 | 190 | ||
| 178 | static int __init nowatchdog_setup(char *str) | 191 | static int __init nowatchdog_setup(char *str) |
| 179 | { | 192 | { |
| 180 | watchdog_enabled = 0; | 193 | watchdog_user_enabled = 0; |
| 181 | return 1; | 194 | return 1; |
| 182 | } | 195 | } |
| 183 | __setup("nowatchdog", nowatchdog_setup); | 196 | __setup("nowatchdog", nowatchdog_setup); |
| 184 | 197 | ||
| 185 | static int __init nosoftlockup_setup(char *str) | 198 | static int __init nosoftlockup_setup(char *str) |
| 186 | { | 199 | { |
| 187 | watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; | 200 | soft_watchdog_user_enabled = 0; |
| 188 | return 1; | 201 | return 1; |
| 189 | } | 202 | } |
| 190 | __setup("nosoftlockup", nosoftlockup_setup); | 203 | __setup("nosoftlockup", nosoftlockup_setup); |
| 191 | 204 | ||
| 192 | #ifdef CONFIG_SMP | 205 | #ifdef CONFIG_SMP |
| 206 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; | ||
| 207 | |||
| 193 | static int __init softlockup_all_cpu_backtrace_setup(char *str) | 208 | static int __init softlockup_all_cpu_backtrace_setup(char *str) |
| 194 | { | 209 | { |
| 195 | sysctl_softlockup_all_cpu_backtrace = | 210 | sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0); |
| 196 | !!simple_strtol(str, NULL, 0); | ||
| 197 | return 1; | 211 | return 1; |
| 198 | } | 212 | } |
| 199 | __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); | 213 | __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); |
| 200 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | ||
| 201 | static int __init hardlockup_all_cpu_backtrace_setup(char *str) | ||
| 202 | { | ||
| 203 | sysctl_hardlockup_all_cpu_backtrace = | ||
| 204 | !!simple_strtol(str, NULL, 0); | ||
| 205 | return 1; | ||
| 206 | } | ||
| 207 | __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); | ||
| 208 | #endif | ||
| 209 | #endif | 214 | #endif |
| 210 | 215 | ||
| 216 | static void __lockup_detector_cleanup(void); | ||
| 217 | |||
| 211 | /* | 218 | /* |
| 212 | * Hard-lockup warnings should be triggered after just a few seconds. Soft- | 219 | * Hard-lockup warnings should be triggered after just a few seconds. Soft- |
| 213 | * lockups can have false positives under extreme conditions. So we generally | 220 | * lockups can have false positives under extreme conditions. So we generally |
| @@ -278,11 +285,15 @@ void touch_all_softlockup_watchdogs(void) | |||
| 278 | int cpu; | 285 | int cpu; |
| 279 | 286 | ||
| 280 | /* | 287 | /* |
| 281 | * this is done lockless | 288 | * watchdog_mutex cannpt be taken here, as this might be called |
| 282 | * do we care if a 0 races with a timestamp? | 289 | * from (soft)interrupt context, so the access to |
| 283 | * all it means is the softlock check starts one cycle later | 290 | * watchdog_allowed_cpumask might race with a concurrent update. |
| 291 | * | ||
| 292 | * The watchdog time stamp can race against a concurrent real | ||
| 293 | * update as well, the only side effect might be a cycle delay for | ||
| 294 | * the softlockup check. | ||
| 284 | */ | 295 | */ |
| 285 | for_each_watchdog_cpu(cpu) | 296 | for_each_cpu(cpu, &watchdog_allowed_mask) |
| 286 | per_cpu(watchdog_touch_ts, cpu) = 0; | 297 | per_cpu(watchdog_touch_ts, cpu) = 0; |
| 287 | wq_watchdog_touch(-1); | 298 | wq_watchdog_touch(-1); |
| 288 | } | 299 | } |
| @@ -322,9 +333,6 @@ static void watchdog_interrupt_count(void) | |||
| 322 | __this_cpu_inc(hrtimer_interrupts); | 333 | __this_cpu_inc(hrtimer_interrupts); |
| 323 | } | 334 | } |
| 324 | 335 | ||
| 325 | static int watchdog_enable_all_cpus(void); | ||
| 326 | static void watchdog_disable_all_cpus(void); | ||
| 327 | |||
| 328 | /* watchdog kicker functions */ | 336 | /* watchdog kicker functions */ |
| 329 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | 337 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) |
| 330 | { | 338 | { |
| @@ -333,7 +341,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
| 333 | int duration; | 341 | int duration; |
| 334 | int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; | 342 | int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; |
| 335 | 343 | ||
| 336 | if (atomic_read(&watchdog_park_in_progress) != 0) | 344 | if (!watchdog_enabled) |
| 337 | return HRTIMER_NORESTART; | 345 | return HRTIMER_NORESTART; |
| 338 | 346 | ||
| 339 | /* kick the hardlockup detector */ | 347 | /* kick the hardlockup detector */ |
| @@ -447,32 +455,38 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) | |||
| 447 | 455 | ||
| 448 | static void watchdog_enable(unsigned int cpu) | 456 | static void watchdog_enable(unsigned int cpu) |
| 449 | { | 457 | { |
| 450 | struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); | 458 | struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); |
| 451 | 459 | ||
| 452 | /* kick off the timer for the hardlockup detector */ | 460 | /* |
| 461 | * Start the timer first to prevent the NMI watchdog triggering | ||
| 462 | * before the timer has a chance to fire. | ||
| 463 | */ | ||
| 453 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 464 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
| 454 | hrtimer->function = watchdog_timer_fn; | 465 | hrtimer->function = watchdog_timer_fn; |
| 455 | |||
| 456 | /* Enable the perf event */ | ||
| 457 | watchdog_nmi_enable(cpu); | ||
| 458 | |||
| 459 | /* done here because hrtimer_start can only pin to smp_processor_id() */ | ||
| 460 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), | 466 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
| 461 | HRTIMER_MODE_REL_PINNED); | 467 | HRTIMER_MODE_REL_PINNED); |
| 462 | 468 | ||
| 463 | /* initialize timestamp */ | 469 | /* Initialize timestamp */ |
| 464 | watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); | ||
| 465 | __touch_watchdog(); | 470 | __touch_watchdog(); |
| 471 | /* Enable the perf event */ | ||
| 472 | if (watchdog_enabled & NMI_WATCHDOG_ENABLED) | ||
| 473 | watchdog_nmi_enable(cpu); | ||
| 474 | |||
| 475 | watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); | ||
| 466 | } | 476 | } |
| 467 | 477 | ||
| 468 | static void watchdog_disable(unsigned int cpu) | 478 | static void watchdog_disable(unsigned int cpu) |
| 469 | { | 479 | { |
| 470 | struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); | 480 | struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); |
| 471 | 481 | ||
| 472 | watchdog_set_prio(SCHED_NORMAL, 0); | 482 | watchdog_set_prio(SCHED_NORMAL, 0); |
| 473 | hrtimer_cancel(hrtimer); | 483 | /* |
| 474 | /* disable the perf event */ | 484 | * Disable the perf event first. That prevents that a large delay |
| 485 | * between disabling the timer and disabling the perf event causes | ||
| 486 | * the perf NMI to detect a false positive. | ||
| 487 | */ | ||
| 475 | watchdog_nmi_disable(cpu); | 488 | watchdog_nmi_disable(cpu); |
| 489 | hrtimer_cancel(hrtimer); | ||
| 476 | } | 490 | } |
| 477 | 491 | ||
| 478 | static void watchdog_cleanup(unsigned int cpu, bool online) | 492 | static void watchdog_cleanup(unsigned int cpu, bool online) |
| @@ -499,21 +513,6 @@ static void watchdog(unsigned int cpu) | |||
| 499 | __this_cpu_write(soft_lockup_hrtimer_cnt, | 513 | __this_cpu_write(soft_lockup_hrtimer_cnt, |
| 500 | __this_cpu_read(hrtimer_interrupts)); | 514 | __this_cpu_read(hrtimer_interrupts)); |
| 501 | __touch_watchdog(); | 515 | __touch_watchdog(); |
| 502 | |||
| 503 | /* | ||
| 504 | * watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the | ||
| 505 | * failure path. Check for failures that can occur asynchronously - | ||
| 506 | * for example, when CPUs are on-lined - and shut down the hardware | ||
| 507 | * perf event on each CPU accordingly. | ||
| 508 | * | ||
| 509 | * The only non-obvious place this bit can be cleared is through | ||
| 510 | * watchdog_nmi_enable(), so a pr_info() is placed there. Placing a | ||
| 511 | * pr_info here would be too noisy as it would result in a message | ||
| 512 | * every few seconds if the hardlockup was disabled but the softlockup | ||
| 513 | * enabled. | ||
| 514 | */ | ||
| 515 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) | ||
| 516 | watchdog_nmi_disable(cpu); | ||
| 517 | } | 516 | } |
| 518 | 517 | ||
| 519 | static struct smp_hotplug_thread watchdog_threads = { | 518 | static struct smp_hotplug_thread watchdog_threads = { |
| @@ -527,295 +526,174 @@ static struct smp_hotplug_thread watchdog_threads = { | |||
| 527 | .unpark = watchdog_enable, | 526 | .unpark = watchdog_enable, |
| 528 | }; | 527 | }; |
| 529 | 528 | ||
| 530 | /* | 529 | static void softlockup_update_smpboot_threads(void) |
| 531 | * park all watchdog threads that are specified in 'watchdog_cpumask' | ||
| 532 | * | ||
| 533 | * This function returns an error if kthread_park() of a watchdog thread | ||
| 534 | * fails. In this situation, the watchdog threads of some CPUs can already | ||
| 535 | * be parked and the watchdog threads of other CPUs can still be runnable. | ||
| 536 | * Callers are expected to handle this special condition as appropriate in | ||
| 537 | * their context. | ||
| 538 | * | ||
| 539 | * This function may only be called in a context that is protected against | ||
| 540 | * races with CPU hotplug - for example, via get_online_cpus(). | ||
| 541 | */ | ||
| 542 | static int watchdog_park_threads(void) | ||
| 543 | { | 530 | { |
| 544 | int cpu, ret = 0; | 531 | lockdep_assert_held(&watchdog_mutex); |
| 545 | 532 | ||
| 546 | atomic_set(&watchdog_park_in_progress, 1); | 533 | if (!softlockup_threads_initialized) |
| 534 | return; | ||
| 547 | 535 | ||
| 548 | for_each_watchdog_cpu(cpu) { | 536 | smpboot_update_cpumask_percpu_thread(&watchdog_threads, |
| 549 | ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); | 537 | &watchdog_allowed_mask); |
| 550 | if (ret) | ||
| 551 | break; | ||
| 552 | } | ||
| 553 | |||
| 554 | atomic_set(&watchdog_park_in_progress, 0); | ||
| 555 | |||
| 556 | return ret; | ||
| 557 | } | 538 | } |
| 558 | 539 | ||
| 559 | /* | 540 | /* Temporarily park all watchdog threads */ |
| 560 | * unpark all watchdog threads that are specified in 'watchdog_cpumask' | 541 | static void softlockup_park_all_threads(void) |
| 561 | * | ||
| 562 | * This function may only be called in a context that is protected against | ||
| 563 | * races with CPU hotplug - for example, via get_online_cpus(). | ||
| 564 | */ | ||
| 565 | static void watchdog_unpark_threads(void) | ||
| 566 | { | 542 | { |
| 567 | int cpu; | 543 | cpumask_clear(&watchdog_allowed_mask); |
| 568 | 544 | softlockup_update_smpboot_threads(); | |
| 569 | for_each_watchdog_cpu(cpu) | ||
| 570 | kthread_unpark(per_cpu(softlockup_watchdog, cpu)); | ||
| 571 | } | 545 | } |
| 572 | 546 | ||
| 573 | static int update_watchdog_all_cpus(void) | 547 | /* Unpark enabled threads */ |
| 548 | static void softlockup_unpark_threads(void) | ||
| 574 | { | 549 | { |
| 575 | int ret; | 550 | cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask); |
| 576 | 551 | softlockup_update_smpboot_threads(); | |
| 577 | ret = watchdog_park_threads(); | ||
| 578 | if (ret) | ||
| 579 | return ret; | ||
| 580 | |||
| 581 | watchdog_unpark_threads(); | ||
| 582 | |||
| 583 | return 0; | ||
| 584 | } | 552 | } |
| 585 | 553 | ||
| 586 | static int watchdog_enable_all_cpus(void) | 554 | static void lockup_detector_reconfigure(void) |
| 587 | { | 555 | { |
| 588 | int err = 0; | 556 | cpus_read_lock(); |
| 589 | 557 | watchdog_nmi_stop(); | |
| 590 | if (!watchdog_running) { | 558 | softlockup_park_all_threads(); |
| 591 | err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, | 559 | set_sample_period(); |
| 592 | &watchdog_cpumask); | 560 | lockup_detector_update_enable(); |
| 593 | if (err) | 561 | if (watchdog_enabled && watchdog_thresh) |
| 594 | pr_err("Failed to create watchdog threads, disabled\n"); | 562 | softlockup_unpark_threads(); |
| 595 | else | 563 | watchdog_nmi_start(); |
| 596 | watchdog_running = 1; | 564 | cpus_read_unlock(); |
| 597 | } else { | 565 | /* |
| 598 | /* | 566 | * Must be called outside the cpus locked section to prevent |
| 599 | * Enable/disable the lockup detectors or | 567 | * recursive locking in the perf code. |
| 600 | * change the sample period 'on the fly'. | 568 | */ |
| 601 | */ | 569 | __lockup_detector_cleanup(); |
| 602 | err = update_watchdog_all_cpus(); | ||
| 603 | |||
| 604 | if (err) { | ||
| 605 | watchdog_disable_all_cpus(); | ||
| 606 | pr_err("Failed to update lockup detectors, disabled\n"); | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | if (err) | ||
| 611 | watchdog_enabled = 0; | ||
| 612 | |||
| 613 | return err; | ||
| 614 | } | 570 | } |
| 615 | 571 | ||
| 616 | static void watchdog_disable_all_cpus(void) | 572 | /* |
| 573 | * Create the watchdog thread infrastructure and configure the detector(s). | ||
| 574 | * | ||
| 575 | * The threads are not unparked as watchdog_allowed_mask is empty. When | ||
| 576 | * the threads are sucessfully initialized, take the proper locks and | ||
| 577 | * unpark the threads in the watchdog_cpumask if the watchdog is enabled. | ||
| 578 | */ | ||
| 579 | static __init void lockup_detector_setup(void) | ||
| 617 | { | 580 | { |
| 618 | if (watchdog_running) { | 581 | int ret; |
| 619 | watchdog_running = 0; | ||
| 620 | smpboot_unregister_percpu_thread(&watchdog_threads); | ||
| 621 | } | ||
| 622 | } | ||
| 623 | 582 | ||
| 624 | #ifdef CONFIG_SYSCTL | 583 | /* |
| 625 | static int watchdog_update_cpus(void) | 584 | * If sysctl is off and watchdog got disabled on the command line, |
| 626 | { | 585 | * nothing to do here. |
| 627 | return smpboot_update_cpumask_percpu_thread( | 586 | */ |
| 628 | &watchdog_threads, &watchdog_cpumask); | 587 | lockup_detector_update_enable(); |
| 629 | } | ||
| 630 | #endif | ||
| 631 | 588 | ||
| 632 | #else /* SOFTLOCKUP */ | 589 | if (!IS_ENABLED(CONFIG_SYSCTL) && |
| 633 | static int watchdog_park_threads(void) | 590 | !(watchdog_enabled && watchdog_thresh)) |
| 634 | { | 591 | return; |
| 635 | return 0; | ||
| 636 | } | ||
| 637 | 592 | ||
| 638 | static void watchdog_unpark_threads(void) | 593 | ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads, |
| 639 | { | 594 | &watchdog_allowed_mask); |
| 640 | } | 595 | if (ret) { |
| 596 | pr_err("Failed to initialize soft lockup detector threads\n"); | ||
| 597 | return; | ||
| 598 | } | ||
| 641 | 599 | ||
| 642 | static int watchdog_enable_all_cpus(void) | 600 | mutex_lock(&watchdog_mutex); |
| 643 | { | 601 | softlockup_threads_initialized = true; |
| 644 | return 0; | 602 | lockup_detector_reconfigure(); |
| 603 | mutex_unlock(&watchdog_mutex); | ||
| 645 | } | 604 | } |
| 646 | 605 | ||
| 647 | static void watchdog_disable_all_cpus(void) | 606 | #else /* CONFIG_SOFTLOCKUP_DETECTOR */ |
| 607 | static inline int watchdog_park_threads(void) { return 0; } | ||
| 608 | static inline void watchdog_unpark_threads(void) { } | ||
| 609 | static inline int watchdog_enable_all_cpus(void) { return 0; } | ||
| 610 | static inline void watchdog_disable_all_cpus(void) { } | ||
| 611 | static void lockup_detector_reconfigure(void) | ||
| 648 | { | 612 | { |
| 613 | cpus_read_lock(); | ||
| 614 | watchdog_nmi_stop(); | ||
| 615 | lockup_detector_update_enable(); | ||
| 616 | watchdog_nmi_start(); | ||
| 617 | cpus_read_unlock(); | ||
| 649 | } | 618 | } |
| 650 | 619 | static inline void lockup_detector_setup(void) | |
| 651 | #ifdef CONFIG_SYSCTL | ||
| 652 | static int watchdog_update_cpus(void) | ||
| 653 | { | 620 | { |
| 654 | return 0; | 621 | lockup_detector_reconfigure(); |
| 655 | } | 622 | } |
| 656 | #endif | 623 | #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */ |
| 657 | 624 | ||
| 658 | static void set_sample_period(void) | 625 | static void __lockup_detector_cleanup(void) |
| 659 | { | 626 | { |
| 627 | lockdep_assert_held(&watchdog_mutex); | ||
| 628 | hardlockup_detector_perf_cleanup(); | ||
| 660 | } | 629 | } |
| 661 | #endif /* SOFTLOCKUP */ | ||
| 662 | 630 | ||
| 663 | /* | 631 | /** |
| 664 | * Suspend the hard and soft lockup detector by parking the watchdog threads. | 632 | * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes |
| 633 | * | ||
| 634 | * Caller must not hold the cpu hotplug rwsem. | ||
| 665 | */ | 635 | */ |
| 666 | int lockup_detector_suspend(void) | 636 | void lockup_detector_cleanup(void) |
| 667 | { | 637 | { |
| 668 | int ret = 0; | 638 | mutex_lock(&watchdog_mutex); |
| 669 | 639 | __lockup_detector_cleanup(); | |
| 670 | get_online_cpus(); | 640 | mutex_unlock(&watchdog_mutex); |
| 671 | mutex_lock(&watchdog_proc_mutex); | ||
| 672 | /* | ||
| 673 | * Multiple suspend requests can be active in parallel (counted by | ||
| 674 | * the 'watchdog_suspended' variable). If the watchdog threads are | ||
| 675 | * running, the first caller takes care that they will be parked. | ||
| 676 | * The state of 'watchdog_running' cannot change while a suspend | ||
| 677 | * request is active (see related code in 'proc' handlers). | ||
| 678 | */ | ||
| 679 | if (watchdog_running && !watchdog_suspended) | ||
| 680 | ret = watchdog_park_threads(); | ||
| 681 | |||
| 682 | if (ret == 0) | ||
| 683 | watchdog_suspended++; | ||
| 684 | else { | ||
| 685 | watchdog_disable_all_cpus(); | ||
| 686 | pr_err("Failed to suspend lockup detectors, disabled\n"); | ||
| 687 | watchdog_enabled = 0; | ||
| 688 | } | ||
| 689 | |||
| 690 | watchdog_nmi_reconfigure(); | ||
| 691 | |||
| 692 | mutex_unlock(&watchdog_proc_mutex); | ||
| 693 | |||
| 694 | return ret; | ||
| 695 | } | 641 | } |
| 696 | 642 | ||
| 697 | /* | 643 | /** |
| 698 | * Resume the hard and soft lockup detector by unparking the watchdog threads. | 644 | * lockup_detector_soft_poweroff - Interface to stop lockup detector(s) |
| 645 | * | ||
| 646 | * Special interface for parisc. It prevents lockup detector warnings from | ||
| 647 | * the default pm_poweroff() function which busy loops forever. | ||
| 699 | */ | 648 | */ |
| 700 | void lockup_detector_resume(void) | 649 | void lockup_detector_soft_poweroff(void) |
| 701 | { | 650 | { |
| 702 | mutex_lock(&watchdog_proc_mutex); | 651 | watchdog_enabled = 0; |
| 703 | |||
| 704 | watchdog_suspended--; | ||
| 705 | /* | ||
| 706 | * The watchdog threads are unparked if they were previously running | ||
| 707 | * and if there is no more active suspend request. | ||
| 708 | */ | ||
| 709 | if (watchdog_running && !watchdog_suspended) | ||
| 710 | watchdog_unpark_threads(); | ||
| 711 | |||
| 712 | watchdog_nmi_reconfigure(); | ||
| 713 | |||
| 714 | mutex_unlock(&watchdog_proc_mutex); | ||
| 715 | put_online_cpus(); | ||
| 716 | } | 652 | } |
| 717 | 653 | ||
| 718 | #ifdef CONFIG_SYSCTL | 654 | #ifdef CONFIG_SYSCTL |
| 719 | 655 | ||
| 720 | /* | 656 | /* Propagate any changes to the watchdog threads */ |
| 721 | * Update the run state of the lockup detectors. | 657 | static void proc_watchdog_update(void) |
| 722 | */ | ||
| 723 | static int proc_watchdog_update(void) | ||
| 724 | { | 658 | { |
| 725 | int err = 0; | 659 | /* Remove impossible cpus to keep sysctl output clean. */ |
| 726 | 660 | cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); | |
| 727 | /* | 661 | lockup_detector_reconfigure(); |
| 728 | * Watchdog threads won't be started if they are already active. | ||
| 729 | * The 'watchdog_running' variable in watchdog_*_all_cpus() takes | ||
| 730 | * care of this. If those threads are already active, the sample | ||
| 731 | * period will be updated and the lockup detectors will be enabled | ||
| 732 | * or disabled 'on the fly'. | ||
| 733 | */ | ||
| 734 | if (watchdog_enabled && watchdog_thresh) | ||
| 735 | err = watchdog_enable_all_cpus(); | ||
| 736 | else | ||
| 737 | watchdog_disable_all_cpus(); | ||
| 738 | |||
| 739 | watchdog_nmi_reconfigure(); | ||
| 740 | |||
| 741 | return err; | ||
| 742 | |||
| 743 | } | 662 | } |
| 744 | 663 | ||
| 745 | /* | 664 | /* |
| 746 | * common function for watchdog, nmi_watchdog and soft_watchdog parameter | 665 | * common function for watchdog, nmi_watchdog and soft_watchdog parameter |
| 747 | * | 666 | * |
| 748 | * caller | table->data points to | 'which' contains the flag(s) | 667 | * caller | table->data points to | 'which' |
| 749 | * -------------------|-----------------------|----------------------------- | 668 | * -------------------|----------------------------|-------------------------- |
| 750 | * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed | 669 | * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED | |
| 751 | * | | with SOFT_WATCHDOG_ENABLED | 670 | * | | SOFT_WATCHDOG_ENABLED |
| 752 | * -------------------|-----------------------|----------------------------- | 671 | * -------------------|----------------------------|-------------------------- |
| 753 | * proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED | 672 | * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
| 754 | * -------------------|-----------------------|----------------------------- | 673 | * -------------------|----------------------------|-------------------------- |
| 755 | * proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED | 674 | * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED |
| 756 | */ | 675 | */ |
| 757 | static int proc_watchdog_common(int which, struct ctl_table *table, int write, | 676 | static int proc_watchdog_common(int which, struct ctl_table *table, int write, |
| 758 | void __user *buffer, size_t *lenp, loff_t *ppos) | 677 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 759 | { | 678 | { |
| 760 | int err, old, new; | 679 | int err, old, *param = table->data; |
| 761 | int *watchdog_param = (int *)table->data; | ||
| 762 | 680 | ||
| 763 | get_online_cpus(); | 681 | mutex_lock(&watchdog_mutex); |
| 764 | mutex_lock(&watchdog_proc_mutex); | ||
| 765 | 682 | ||
| 766 | if (watchdog_suspended) { | ||
| 767 | /* no parameter changes allowed while watchdog is suspended */ | ||
| 768 | err = -EAGAIN; | ||
| 769 | goto out; | ||
| 770 | } | ||
| 771 | |||
| 772 | /* | ||
| 773 | * If the parameter is being read return the state of the corresponding | ||
| 774 | * bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the | ||
| 775 | * run state of the lockup detectors. | ||
| 776 | */ | ||
| 777 | if (!write) { | 683 | if (!write) { |
| 778 | *watchdog_param = (watchdog_enabled & which) != 0; | 684 | /* |
| 685 | * On read synchronize the userspace interface. This is a | ||
| 686 | * racy snapshot. | ||
| 687 | */ | ||
| 688 | *param = (watchdog_enabled & which) != 0; | ||
| 779 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 689 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 780 | } else { | 690 | } else { |
| 691 | old = READ_ONCE(*param); | ||
| 781 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 692 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 782 | if (err) | 693 | if (!err && old != READ_ONCE(*param)) |
| 783 | goto out; | 694 | proc_watchdog_update(); |
| 784 | |||
| 785 | /* | ||
| 786 | * There is a race window between fetching the current value | ||
| 787 | * from 'watchdog_enabled' and storing the new value. During | ||
| 788 | * this race window, watchdog_nmi_enable() can sneak in and | ||
| 789 | * clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'. | ||
| 790 | * The 'cmpxchg' detects this race and the loop retries. | ||
| 791 | */ | ||
| 792 | do { | ||
| 793 | old = watchdog_enabled; | ||
| 794 | /* | ||
| 795 | * If the parameter value is not zero set the | ||
| 796 | * corresponding bit(s), else clear it(them). | ||
| 797 | */ | ||
| 798 | if (*watchdog_param) | ||
| 799 | new = old | which; | ||
| 800 | else | ||
| 801 | new = old & ~which; | ||
| 802 | } while (cmpxchg(&watchdog_enabled, old, new) != old); | ||
| 803 | |||
| 804 | /* | ||
| 805 | * Update the run state of the lockup detectors. There is _no_ | ||
| 806 | * need to check the value returned by proc_watchdog_update() | ||
| 807 | * and to restore the previous value of 'watchdog_enabled' as | ||
| 808 | * both lockup detectors are disabled if proc_watchdog_update() | ||
| 809 | * returns an error. | ||
| 810 | */ | ||
| 811 | if (old == new) | ||
| 812 | goto out; | ||
| 813 | |||
| 814 | err = proc_watchdog_update(); | ||
| 815 | } | 695 | } |
| 816 | out: | 696 | mutex_unlock(&watchdog_mutex); |
| 817 | mutex_unlock(&watchdog_proc_mutex); | ||
| 818 | put_online_cpus(); | ||
| 819 | return err; | 697 | return err; |
| 820 | } | 698 | } |
| 821 | 699 | ||
| @@ -835,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write, | |||
| 835 | int proc_nmi_watchdog(struct ctl_table *table, int write, | 713 | int proc_nmi_watchdog(struct ctl_table *table, int write, |
| 836 | void __user *buffer, size_t *lenp, loff_t *ppos) | 714 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 837 | { | 715 | { |
| 716 | if (!nmi_watchdog_available && write) | ||
| 717 | return -ENOTSUPP; | ||
| 838 | return proc_watchdog_common(NMI_WATCHDOG_ENABLED, | 718 | return proc_watchdog_common(NMI_WATCHDOG_ENABLED, |
| 839 | table, write, buffer, lenp, ppos); | 719 | table, write, buffer, lenp, ppos); |
| 840 | } | 720 | } |
| @@ -855,39 +735,17 @@ int proc_soft_watchdog(struct ctl_table *table, int write, | |||
| 855 | int proc_watchdog_thresh(struct ctl_table *table, int write, | 735 | int proc_watchdog_thresh(struct ctl_table *table, int write, |
| 856 | void __user *buffer, size_t *lenp, loff_t *ppos) | 736 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 857 | { | 737 | { |
| 858 | int err, old, new; | 738 | int err, old; |
| 859 | |||
| 860 | get_online_cpus(); | ||
| 861 | mutex_lock(&watchdog_proc_mutex); | ||
| 862 | 739 | ||
| 863 | if (watchdog_suspended) { | 740 | mutex_lock(&watchdog_mutex); |
| 864 | /* no parameter changes allowed while watchdog is suspended */ | ||
| 865 | err = -EAGAIN; | ||
| 866 | goto out; | ||
| 867 | } | ||
| 868 | 741 | ||
| 869 | old = ACCESS_ONCE(watchdog_thresh); | 742 | old = READ_ONCE(watchdog_thresh); |
| 870 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 743 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 871 | 744 | ||
| 872 | if (err || !write) | 745 | if (!err && write && old != READ_ONCE(watchdog_thresh)) |
| 873 | goto out; | 746 | proc_watchdog_update(); |
| 874 | |||
| 875 | /* | ||
| 876 | * Update the sample period. Restore on failure. | ||
| 877 | */ | ||
| 878 | new = ACCESS_ONCE(watchdog_thresh); | ||
| 879 | if (old == new) | ||
| 880 | goto out; | ||
| 881 | 747 | ||
| 882 | set_sample_period(); | 748 | mutex_unlock(&watchdog_mutex); |
| 883 | err = proc_watchdog_update(); | ||
| 884 | if (err) { | ||
| 885 | watchdog_thresh = old; | ||
| 886 | set_sample_period(); | ||
| 887 | } | ||
| 888 | out: | ||
| 889 | mutex_unlock(&watchdog_proc_mutex); | ||
| 890 | put_online_cpus(); | ||
| 891 | return err; | 749 | return err; |
| 892 | } | 750 | } |
| 893 | 751 | ||
| @@ -902,45 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, | |||
| 902 | { | 760 | { |
| 903 | int err; | 761 | int err; |
| 904 | 762 | ||
| 905 | get_online_cpus(); | 763 | mutex_lock(&watchdog_mutex); |
| 906 | mutex_lock(&watchdog_proc_mutex); | ||
| 907 | |||
| 908 | if (watchdog_suspended) { | ||
| 909 | /* no parameter changes allowed while watchdog is suspended */ | ||
| 910 | err = -EAGAIN; | ||
| 911 | goto out; | ||
| 912 | } | ||
| 913 | 764 | ||
| 914 | err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); | 765 | err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); |
| 915 | if (!err && write) { | 766 | if (!err && write) |
| 916 | /* Remove impossible cpus to keep sysctl output cleaner. */ | 767 | proc_watchdog_update(); |
| 917 | cpumask_and(&watchdog_cpumask, &watchdog_cpumask, | ||
| 918 | cpu_possible_mask); | ||
| 919 | |||
| 920 | if (watchdog_running) { | ||
| 921 | /* | ||
| 922 | * Failure would be due to being unable to allocate | ||
| 923 | * a temporary cpumask, so we are likely not in a | ||
| 924 | * position to do much else to make things better. | ||
| 925 | */ | ||
| 926 | if (watchdog_update_cpus() != 0) | ||
| 927 | pr_err("cpumask update failed\n"); | ||
| 928 | } | ||
| 929 | 768 | ||
| 930 | watchdog_nmi_reconfigure(); | 769 | mutex_unlock(&watchdog_mutex); |
| 931 | } | ||
| 932 | out: | ||
| 933 | mutex_unlock(&watchdog_proc_mutex); | ||
| 934 | put_online_cpus(); | ||
| 935 | return err; | 770 | return err; |
| 936 | } | 771 | } |
| 937 | |||
| 938 | #endif /* CONFIG_SYSCTL */ | 772 | #endif /* CONFIG_SYSCTL */ |
| 939 | 773 | ||
| 940 | void __init lockup_detector_init(void) | 774 | void __init lockup_detector_init(void) |
| 941 | { | 775 | { |
| 942 | set_sample_period(); | ||
| 943 | |||
| 944 | #ifdef CONFIG_NO_HZ_FULL | 776 | #ifdef CONFIG_NO_HZ_FULL |
| 945 | if (tick_nohz_full_enabled()) { | 777 | if (tick_nohz_full_enabled()) { |
| 946 | pr_info("Disabling watchdog on nohz_full cores by default\n"); | 778 | pr_info("Disabling watchdog on nohz_full cores by default\n"); |
| @@ -951,6 +783,7 @@ void __init lockup_detector_init(void) | |||
| 951 | cpumask_copy(&watchdog_cpumask, cpu_possible_mask); | 783 | cpumask_copy(&watchdog_cpumask, cpu_possible_mask); |
| 952 | #endif | 784 | #endif |
| 953 | 785 | ||
| 954 | if (watchdog_enabled) | 786 | if (!watchdog_nmi_probe()) |
| 955 | watchdog_enable_all_cpus(); | 787 | nmi_watchdog_available = true; |
| 788 | lockup_detector_setup(); | ||
| 956 | } | 789 | } |
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 3a09ea1b1d3d..71a62ceacdc8 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c | |||
| @@ -21,8 +21,10 @@ | |||
| 21 | static DEFINE_PER_CPU(bool, hard_watchdog_warn); | 21 | static DEFINE_PER_CPU(bool, hard_watchdog_warn); |
| 22 | static DEFINE_PER_CPU(bool, watchdog_nmi_touch); | 22 | static DEFINE_PER_CPU(bool, watchdog_nmi_touch); |
| 23 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 23 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
| 24 | static struct cpumask dead_events_mask; | ||
| 24 | 25 | ||
| 25 | static unsigned long hardlockup_allcpu_dumped; | 26 | static unsigned long hardlockup_allcpu_dumped; |
| 27 | static unsigned int watchdog_cpus; | ||
| 26 | 28 | ||
| 27 | void arch_touch_nmi_watchdog(void) | 29 | void arch_touch_nmi_watchdog(void) |
| 28 | { | 30 | { |
| @@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = { | |||
| 103 | 105 | ||
| 104 | /* Callback function for perf event subsystem */ | 106 | /* Callback function for perf event subsystem */ |
| 105 | static void watchdog_overflow_callback(struct perf_event *event, | 107 | static void watchdog_overflow_callback(struct perf_event *event, |
| 106 | struct perf_sample_data *data, | 108 | struct perf_sample_data *data, |
| 107 | struct pt_regs *regs) | 109 | struct pt_regs *regs) |
| 108 | { | 110 | { |
| 109 | /* Ensure the watchdog never gets throttled */ | 111 | /* Ensure the watchdog never gets throttled */ |
| 110 | event->hw.interrupts = 0; | 112 | event->hw.interrupts = 0; |
| 111 | 113 | ||
| 112 | if (atomic_read(&watchdog_park_in_progress) != 0) | ||
| 113 | return; | ||
| 114 | |||
| 115 | if (__this_cpu_read(watchdog_nmi_touch) == true) { | 114 | if (__this_cpu_read(watchdog_nmi_touch) == true) { |
| 116 | __this_cpu_write(watchdog_nmi_touch, false); | 115 | __this_cpu_write(watchdog_nmi_touch, false); |
| 117 | return; | 116 | return; |
| @@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event, | |||
| 160 | return; | 159 | return; |
| 161 | } | 160 | } |
| 162 | 161 | ||
| 163 | /* | 162 | static int hardlockup_detector_event_create(void) |
| 164 | * People like the simple clean cpu node info on boot. | ||
| 165 | * Reduce the watchdog noise by only printing messages | ||
| 166 | * that are different from what cpu0 displayed. | ||
| 167 | */ | ||
| 168 | static unsigned long firstcpu_err; | ||
| 169 | static atomic_t watchdog_cpus; | ||
| 170 | |||
| 171 | int watchdog_nmi_enable(unsigned int cpu) | ||
| 172 | { | 163 | { |
| 164 | unsigned int cpu = smp_processor_id(); | ||
| 173 | struct perf_event_attr *wd_attr; | 165 | struct perf_event_attr *wd_attr; |
| 174 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | 166 | struct perf_event *evt; |
| 175 | int firstcpu = 0; | ||
| 176 | |||
| 177 | /* nothing to do if the hard lockup detector is disabled */ | ||
| 178 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) | ||
| 179 | goto out; | ||
| 180 | |||
| 181 | /* is it already setup and enabled? */ | ||
| 182 | if (event && event->state > PERF_EVENT_STATE_OFF) | ||
| 183 | goto out; | ||
| 184 | |||
| 185 | /* it is setup but not enabled */ | ||
| 186 | if (event != NULL) | ||
| 187 | goto out_enable; | ||
| 188 | |||
| 189 | if (atomic_inc_return(&watchdog_cpus) == 1) | ||
| 190 | firstcpu = 1; | ||
| 191 | 167 | ||
| 192 | wd_attr = &wd_hw_attr; | 168 | wd_attr = &wd_hw_attr; |
| 193 | wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); | 169 | wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); |
| 194 | 170 | ||
| 195 | /* Try to register using hardware perf events */ | 171 | /* Try to register using hardware perf events */ |
| 196 | event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); | 172 | evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, |
| 173 | watchdog_overflow_callback, NULL); | ||
| 174 | if (IS_ERR(evt)) { | ||
| 175 | pr_info("Perf event create on CPU %d failed with %ld\n", cpu, | ||
| 176 | PTR_ERR(evt)); | ||
| 177 | return PTR_ERR(evt); | ||
| 178 | } | ||
| 179 | this_cpu_write(watchdog_ev, evt); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 197 | 182 | ||
| 198 | /* save the first cpu's error for future comparision */ | 183 | /** |
| 199 | if (firstcpu && IS_ERR(event)) | 184 | * hardlockup_detector_perf_enable - Enable the local event |
| 200 | firstcpu_err = PTR_ERR(event); | 185 | */ |
| 186 | void hardlockup_detector_perf_enable(void) | ||
| 187 | { | ||
| 188 | if (hardlockup_detector_event_create()) | ||
| 189 | return; | ||
| 201 | 190 | ||
| 202 | if (!IS_ERR(event)) { | 191 | if (!watchdog_cpus++) |
| 203 | /* only print for the first cpu initialized */ | 192 | pr_info("Enabled. Permanently consumes one hw-PMU counter.\n"); |
| 204 | if (firstcpu || firstcpu_err) | ||
| 205 | pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n"); | ||
| 206 | goto out_save; | ||
| 207 | } | ||
| 208 | 193 | ||
| 209 | /* | 194 | perf_event_enable(this_cpu_read(watchdog_ev)); |
| 210 | * Disable the hard lockup detector if _any_ CPU fails to set up | ||
| 211 | * set up the hardware perf event. The watchdog() function checks | ||
| 212 | * the NMI_WATCHDOG_ENABLED bit periodically. | ||
| 213 | * | ||
| 214 | * The barriers are for syncing up watchdog_enabled across all the | ||
| 215 | * cpus, as clear_bit() does not use barriers. | ||
| 216 | */ | ||
| 217 | smp_mb__before_atomic(); | ||
| 218 | clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled); | ||
| 219 | smp_mb__after_atomic(); | ||
| 220 | |||
| 221 | /* skip displaying the same error again */ | ||
| 222 | if (!firstcpu && (PTR_ERR(event) == firstcpu_err)) | ||
| 223 | return PTR_ERR(event); | ||
| 224 | |||
| 225 | /* vary the KERN level based on the returned errno */ | ||
| 226 | if (PTR_ERR(event) == -EOPNOTSUPP) | ||
| 227 | pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu); | ||
| 228 | else if (PTR_ERR(event) == -ENOENT) | ||
| 229 | pr_warn("disabled (cpu%i): hardware events not enabled\n", | ||
| 230 | cpu); | ||
| 231 | else | ||
| 232 | pr_err("disabled (cpu%i): unable to create perf event: %ld\n", | ||
| 233 | cpu, PTR_ERR(event)); | ||
| 234 | |||
| 235 | pr_info("Shutting down hard lockup detector on all cpus\n"); | ||
| 236 | |||
| 237 | return PTR_ERR(event); | ||
| 238 | |||
| 239 | /* success path */ | ||
| 240 | out_save: | ||
| 241 | per_cpu(watchdog_ev, cpu) = event; | ||
| 242 | out_enable: | ||
| 243 | perf_event_enable(per_cpu(watchdog_ev, cpu)); | ||
| 244 | out: | ||
| 245 | return 0; | ||
| 246 | } | 195 | } |
| 247 | 196 | ||
| 248 | void watchdog_nmi_disable(unsigned int cpu) | 197 | /** |
| 198 | * hardlockup_detector_perf_disable - Disable the local event | ||
| 199 | */ | ||
| 200 | void hardlockup_detector_perf_disable(void) | ||
| 249 | { | 201 | { |
| 250 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | 202 | struct perf_event *event = this_cpu_read(watchdog_ev); |
| 251 | 203 | ||
| 252 | if (event) { | 204 | if (event) { |
| 253 | perf_event_disable(event); | 205 | perf_event_disable(event); |
| 206 | cpumask_set_cpu(smp_processor_id(), &dead_events_mask); | ||
| 207 | watchdog_cpus--; | ||
| 208 | } | ||
| 209 | } | ||
| 210 | |||
| 211 | /** | ||
| 212 | * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them | ||
| 213 | * | ||
| 214 | * Called from lockup_detector_cleanup(). Serialized by the caller. | ||
| 215 | */ | ||
| 216 | void hardlockup_detector_perf_cleanup(void) | ||
| 217 | { | ||
| 218 | int cpu; | ||
| 219 | |||
| 220 | for_each_cpu(cpu, &dead_events_mask) { | ||
| 221 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Required because for_each_cpu() reports unconditionally | ||
| 225 | * CPU0 as set on UP kernels. Sigh. | ||
| 226 | */ | ||
| 227 | if (event) | ||
| 228 | perf_event_release_kernel(event); | ||
| 254 | per_cpu(watchdog_ev, cpu) = NULL; | 229 | per_cpu(watchdog_ev, cpu) = NULL; |
| 230 | } | ||
| 231 | cpumask_clear(&dead_events_mask); | ||
| 232 | } | ||
| 233 | |||
| 234 | /** | ||
| 235 | * hardlockup_detector_perf_stop - Globally stop watchdog events | ||
| 236 | * | ||
| 237 | * Special interface for x86 to handle the perf HT bug. | ||
| 238 | */ | ||
| 239 | void __init hardlockup_detector_perf_stop(void) | ||
| 240 | { | ||
| 241 | int cpu; | ||
| 242 | |||
| 243 | lockdep_assert_cpus_held(); | ||
| 244 | |||
| 245 | for_each_online_cpu(cpu) { | ||
| 246 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | ||
| 247 | |||
| 248 | if (event) | ||
| 249 | perf_event_disable(event); | ||
| 250 | } | ||
| 251 | } | ||
| 255 | 252 | ||
| 256 | /* should be in cleanup, but blocks oprofile */ | 253 | /** |
| 257 | perf_event_release_kernel(event); | 254 | * hardlockup_detector_perf_restart - Globally restart watchdog events |
| 255 | * | ||
| 256 | * Special interface for x86 to handle the perf HT bug. | ||
| 257 | */ | ||
| 258 | void __init hardlockup_detector_perf_restart(void) | ||
| 259 | { | ||
| 260 | int cpu; | ||
| 261 | |||
| 262 | lockdep_assert_cpus_held(); | ||
| 263 | |||
| 264 | if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) | ||
| 265 | return; | ||
| 266 | |||
| 267 | for_each_online_cpu(cpu) { | ||
| 268 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | ||
| 269 | |||
| 270 | if (event) | ||
| 271 | perf_event_enable(event); | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | /** | ||
| 276 | * hardlockup_detector_perf_init - Probe whether NMI event is available at all | ||
| 277 | */ | ||
| 278 | int __init hardlockup_detector_perf_init(void) | ||
| 279 | { | ||
| 280 | int ret = hardlockup_detector_event_create(); | ||
| 258 | 281 | ||
| 259 | /* watchdog_nmi_enable() expects this to be zero initially. */ | 282 | if (ret) { |
| 260 | if (atomic_dec_and_test(&watchdog_cpus)) | 283 | pr_info("Perf NMI watchdog permanently disabled\n"); |
| 261 | firstcpu_err = 0; | 284 | } else { |
| 285 | perf_event_release_kernel(this_cpu_read(watchdog_ev)); | ||
| 286 | this_cpu_write(watchdog_ev, NULL); | ||
| 262 | } | 287 | } |
| 288 | return ret; | ||
| 263 | } | 289 | } |
