aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c3
-rw-r--r--kernel/bpf/cgroup.c2
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/percpu_freelist.c41
-rw-r--r--kernel/bpf/percpu_freelist.h4
-rw-r--r--kernel/bpf/syscall.c12
-rw-r--r--kernel/cpu.c38
-rw-r--r--kernel/events/core.c14
-rw-r--r--kernel/events/ring_buffer.c3
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/futex.c32
-rw-r--r--kernel/locking/rtmutex.c37
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched/fair.c1
-rw-r--r--kernel/sched/psi.c21
-rw-r--r--kernel/signal.c63
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/trace/bpf_trace.c14
-rw-r--r--kernel/trace/trace_uprobe.c9
-rw-r--r--kernel/workqueue.c23
-rw-r--r--kernel/workqueue_internal.h6
21 files changed, 243 insertions, 102 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index befe570be5ba..c57bd10340ed 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
1459 1459
1460 /* "typedef void new_void", "const void"...etc */ 1460 /* "typedef void new_void", "const void"...etc */
1461 if (!btf_type_is_void(next_type) && 1461 if (!btf_type_is_void(next_type) &&
1462 !btf_type_is_fwd(next_type)) { 1462 !btf_type_is_fwd(next_type) &&
1463 !btf_type_is_func_proto(next_type)) {
1463 btf_verifier_log_type(env, v->t, "Invalid type_id"); 1464 btf_verifier_log_type(env, v->t, "Invalid type_id");
1464 return -EINVAL; 1465 return -EINVAL;
1465 } 1466 }
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ab612fe9862f..d17d05570a3f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
572 bpf_compute_and_save_data_end(skb, &saved_data_end); 572 bpf_compute_and_save_data_end(skb, &saved_data_end);
573 573
574 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 574 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
575 bpf_prog_run_save_cb); 575 __bpf_prog_run_save_cb);
576 bpf_restore_data_end(skb, saved_data_end); 576 bpf_restore_data_end(skb, saved_data_end);
577 __skb_pull(skb, offset); 577 __skb_pull(skb, offset);
578 skb->sk = save_sk; 578 skb->sk = save_sk;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4b7c76765d9d..f9274114c88d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
686 } 686 }
687 687
688 if (htab_is_prealloc(htab)) { 688 if (htab_is_prealloc(htab)) {
689 pcpu_freelist_push(&htab->freelist, &l->fnode); 689 __pcpu_freelist_push(&htab->freelist, &l->fnode);
690 } else { 690 } else {
691 atomic_dec(&htab->count); 691 atomic_dec(&htab->count);
692 l->htab = htab; 692 l->htab = htab;
@@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
748 } else { 748 } else {
749 struct pcpu_freelist_node *l; 749 struct pcpu_freelist_node *l;
750 750
751 l = pcpu_freelist_pop(&htab->freelist); 751 l = __pcpu_freelist_pop(&htab->freelist);
752 if (!l) 752 if (!l)
753 return ERR_PTR(-E2BIG); 753 return ERR_PTR(-E2BIG);
754 l_new = container_of(l, struct htab_elem, fnode); 754 l_new = container_of(l, struct htab_elem, fnode);
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 673fa6fe2d73..0c1b4ba9e90e 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s)
28 free_percpu(s->freelist); 28 free_percpu(s->freelist);
29} 29}
30 30
31static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, 31static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
32 struct pcpu_freelist_node *node) 32 struct pcpu_freelist_node *node)
33{ 33{
34 raw_spin_lock(&head->lock); 34 raw_spin_lock(&head->lock);
35 node->next = head->first; 35 node->next = head->first;
@@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
37 raw_spin_unlock(&head->lock); 37 raw_spin_unlock(&head->lock);
38} 38}
39 39
40void pcpu_freelist_push(struct pcpu_freelist *s, 40void __pcpu_freelist_push(struct pcpu_freelist *s,
41 struct pcpu_freelist_node *node) 41 struct pcpu_freelist_node *node)
42{ 42{
43 struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); 43 struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
44 44
45 __pcpu_freelist_push(head, node); 45 ___pcpu_freelist_push(head, node);
46}
47
48void pcpu_freelist_push(struct pcpu_freelist *s,
49 struct pcpu_freelist_node *node)
50{
51 unsigned long flags;
52
53 local_irq_save(flags);
54 __pcpu_freelist_push(s, node);
55 local_irq_restore(flags);
46} 56}
47 57
48void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, 58void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
@@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
63 for_each_possible_cpu(cpu) { 73 for_each_possible_cpu(cpu) {
64again: 74again:
65 head = per_cpu_ptr(s->freelist, cpu); 75 head = per_cpu_ptr(s->freelist, cpu);
66 __pcpu_freelist_push(head, buf); 76 ___pcpu_freelist_push(head, buf);
67 i++; 77 i++;
68 buf += elem_size; 78 buf += elem_size;
69 if (i == nr_elems) 79 if (i == nr_elems)
@@ -74,14 +84,12 @@ again:
74 local_irq_restore(flags); 84 local_irq_restore(flags);
75} 85}
76 86
77struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) 87struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
78{ 88{
79 struct pcpu_freelist_head *head; 89 struct pcpu_freelist_head *head;
80 struct pcpu_freelist_node *node; 90 struct pcpu_freelist_node *node;
81 unsigned long flags;
82 int orig_cpu, cpu; 91 int orig_cpu, cpu;
83 92
84 local_irq_save(flags);
85 orig_cpu = cpu = raw_smp_processor_id(); 93 orig_cpu = cpu = raw_smp_processor_id();
86 while (1) { 94 while (1) {
87 head = per_cpu_ptr(s->freelist, cpu); 95 head = per_cpu_ptr(s->freelist, cpu);
@@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
89 node = head->first; 97 node = head->first;
90 if (node) { 98 if (node) {
91 head->first = node->next; 99 head->first = node->next;
92 raw_spin_unlock_irqrestore(&head->lock, flags); 100 raw_spin_unlock(&head->lock);
93 return node; 101 return node;
94 } 102 }
95 raw_spin_unlock(&head->lock); 103 raw_spin_unlock(&head->lock);
96 cpu = cpumask_next(cpu, cpu_possible_mask); 104 cpu = cpumask_next(cpu, cpu_possible_mask);
97 if (cpu >= nr_cpu_ids) 105 if (cpu >= nr_cpu_ids)
98 cpu = 0; 106 cpu = 0;
99 if (cpu == orig_cpu) { 107 if (cpu == orig_cpu)
100 local_irq_restore(flags);
101 return NULL; 108 return NULL;
102 }
103 } 109 }
104} 110}
111
112struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
113{
114 struct pcpu_freelist_node *ret;
115 unsigned long flags;
116
117 local_irq_save(flags);
118 ret = __pcpu_freelist_pop(s);
119 local_irq_restore(flags);
120 return ret;
121}
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
index 3049aae8ea1e..c3960118e617 100644
--- a/kernel/bpf/percpu_freelist.h
+++ b/kernel/bpf/percpu_freelist.h
@@ -22,8 +22,12 @@ struct pcpu_freelist_node {
22 struct pcpu_freelist_node *next; 22 struct pcpu_freelist_node *next;
23}; 23};
24 24
25/* pcpu_freelist_* do spin_lock_irqsave. */
25void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); 26void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
26struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); 27struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
28/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */
29void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
30struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *);
27void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, 31void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
28 u32 nr_elems); 32 u32 nr_elems);
29int pcpu_freelist_init(struct pcpu_freelist *); 33int pcpu_freelist_init(struct pcpu_freelist *);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..8577bb7f8be6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr)
713 713
714 if (bpf_map_is_dev_bound(map)) { 714 if (bpf_map_is_dev_bound(map)) {
715 err = bpf_map_offload_lookup_elem(map, key, value); 715 err = bpf_map_offload_lookup_elem(map, key, value);
716 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 716 goto done;
717 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 717 }
718
719 preempt_disable();
720 this_cpu_inc(bpf_prog_active);
721 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
722 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
718 err = bpf_percpu_hash_copy(map, key, value); 723 err = bpf_percpu_hash_copy(map, key, value);
719 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 724 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
720 err = bpf_percpu_array_copy(map, key, value); 725 err = bpf_percpu_array_copy(map, key, value);
@@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr)
744 } 749 }
745 rcu_read_unlock(); 750 rcu_read_unlock();
746 } 751 }
752 this_cpu_dec(bpf_prog_active);
753 preempt_enable();
747 754
755done:
748 if (err) 756 if (err)
749 goto free_value; 757 goto free_value;
750 758
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 91d5c38eb7e5..d1c6d152da89 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { }
376 376
377#ifdef CONFIG_HOTPLUG_SMT 377#ifdef CONFIG_HOTPLUG_SMT
378enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; 378enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
379EXPORT_SYMBOL_GPL(cpu_smt_control);
380
381static bool cpu_smt_available __read_mostly;
382 379
383void __init cpu_smt_disable(bool force) 380void __init cpu_smt_disable(bool force)
384{ 381{
@@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force)
397 394
398/* 395/*
399 * The decision whether SMT is supported can only be done after the full 396 * The decision whether SMT is supported can only be done after the full
400 * CPU identification. Called from architecture code before non boot CPUs 397 * CPU identification. Called from architecture code.
401 * are brought up.
402 */
403void __init cpu_smt_check_topology_early(void)
404{
405 if (!topology_smt_supported())
406 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
407}
408
409/*
410 * If SMT was disabled by BIOS, detect it here, after the CPUs have been
411 * brought online. This ensures the smt/l1tf sysfs entries are consistent
412 * with reality. cpu_smt_available is set to true during the bringup of non
413 * boot CPUs when a SMT sibling is detected. Note, this may overwrite
414 * cpu_smt_control's previous setting.
415 */ 398 */
416void __init cpu_smt_check_topology(void) 399void __init cpu_smt_check_topology(void)
417{ 400{
418 if (!cpu_smt_available) 401 if (!topology_smt_supported())
419 cpu_smt_control = CPU_SMT_NOT_SUPPORTED; 402 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
420} 403}
421 404
@@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable);
428 411
429static inline bool cpu_smt_allowed(unsigned int cpu) 412static inline bool cpu_smt_allowed(unsigned int cpu)
430{ 413{
431 if (topology_is_primary_thread(cpu)) 414 if (cpu_smt_control == CPU_SMT_ENABLED)
432 return true; 415 return true;
433 416
434 /* 417 if (topology_is_primary_thread(cpu))
435 * If the CPU is not a 'primary' thread and the booted_once bit is
436 * set then the processor has SMT support. Store this information
437 * for the late check of SMT support in cpu_smt_check_topology().
438 */
439 if (per_cpu(cpuhp_state, cpu).booted_once)
440 cpu_smt_available = true;
441
442 if (cpu_smt_control == CPU_SMT_ENABLED)
443 return true; 418 return true;
444 419
445 /* 420 /*
@@ -2090,10 +2065,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2090 */ 2065 */
2091 cpuhp_offline_cpu_device(cpu); 2066 cpuhp_offline_cpu_device(cpu);
2092 } 2067 }
2093 if (!ret) { 2068 if (!ret)
2094 cpu_smt_control = ctrlval; 2069 cpu_smt_control = ctrlval;
2095 arch_smt_update();
2096 }
2097 cpu_maps_update_done(); 2070 cpu_maps_update_done();
2098 return ret; 2071 return ret;
2099} 2072}
@@ -2104,7 +2077,6 @@ static int cpuhp_smt_enable(void)
2104 2077
2105 cpu_maps_update_begin(); 2078 cpu_maps_update_begin();
2106 cpu_smt_control = CPU_SMT_ENABLED; 2079 cpu_smt_control = CPU_SMT_ENABLED;
2107 arch_smt_update();
2108 for_each_present_cpu(cpu) { 2080 for_each_present_cpu(cpu) {
2109 /* Skip online CPUs and CPUs on offline nodes */ 2081 /* Skip online CPUs and CPUs on offline nodes */
2110 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) 2082 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cd13a30f732..e5ede6918050 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
436 void __user *buffer, size_t *lenp, 436 void __user *buffer, size_t *lenp,
437 loff_t *ppos) 437 loff_t *ppos)
438{ 438{
439 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 439 int ret;
440 440 int perf_cpu = sysctl_perf_cpu_time_max_percent;
441 if (ret || !write)
442 return ret;
443
444 /* 441 /*
445 * If throttling is disabled don't allow the write: 442 * If throttling is disabled don't allow the write:
446 */ 443 */
447 if (sysctl_perf_cpu_time_max_percent == 100 || 444 if (write && (perf_cpu == 100 || perf_cpu == 0))
448 sysctl_perf_cpu_time_max_percent == 0)
449 return -EINVAL; 445 return -EINVAL;
450 446
447 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
448 if (ret || !write)
449 return ret;
450
451 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); 451 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
452 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; 452 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
453 update_perf_cpu_limits(); 453 update_perf_cpu_limits();
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 4a9937076331..309ef5a64af5 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
734 size = sizeof(struct ring_buffer); 734 size = sizeof(struct ring_buffer);
735 size += nr_pages * sizeof(void *); 735 size += nr_pages * sizeof(void *);
736 736
737 if (order_base_2(size) >= MAX_ORDER)
738 goto fail;
739
737 rb = kzalloc(size, GFP_KERNEL); 740 rb = kzalloc(size, GFP_KERNEL);
738 if (!rb) 741 if (!rb)
739 goto fail; 742 goto fail;
diff --git a/kernel/exit.c b/kernel/exit.c
index 3fb7be001964..2639a30a8aa5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
558 return NULL; 558 return NULL;
559} 559}
560 560
561static struct task_struct *find_child_reaper(struct task_struct *father) 561static struct task_struct *find_child_reaper(struct task_struct *father,
562 struct list_head *dead)
562 __releases(&tasklist_lock) 563 __releases(&tasklist_lock)
563 __acquires(&tasklist_lock) 564 __acquires(&tasklist_lock)
564{ 565{
565 struct pid_namespace *pid_ns = task_active_pid_ns(father); 566 struct pid_namespace *pid_ns = task_active_pid_ns(father);
566 struct task_struct *reaper = pid_ns->child_reaper; 567 struct task_struct *reaper = pid_ns->child_reaper;
568 struct task_struct *p, *n;
567 569
568 if (likely(reaper != father)) 570 if (likely(reaper != father))
569 return reaper; 571 return reaper;
@@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
579 panic("Attempted to kill init! exitcode=0x%08x\n", 581 panic("Attempted to kill init! exitcode=0x%08x\n",
580 father->signal->group_exit_code ?: father->exit_code); 582 father->signal->group_exit_code ?: father->exit_code);
581 } 583 }
584
585 list_for_each_entry_safe(p, n, dead, ptrace_entry) {
586 list_del_init(&p->ptrace_entry);
587 release_task(p);
588 }
589
582 zap_pid_ns_processes(pid_ns); 590 zap_pid_ns_processes(pid_ns);
583 write_lock_irq(&tasklist_lock); 591 write_lock_irq(&tasklist_lock);
584 592
@@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father,
668 exit_ptrace(father, dead); 676 exit_ptrace(father, dead);
669 677
670 /* Can drop and reacquire tasklist_lock */ 678 /* Can drop and reacquire tasklist_lock */
671 reaper = find_child_reaper(father); 679 reaper = find_child_reaper(father, dead);
672 if (list_empty(&father->children)) 680 if (list_empty(&father->children))
673 return; 681 return;
674 682
diff --git a/kernel/futex.c b/kernel/futex.c
index fdd312da0992..a0514e01c3eb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2221,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
2221 * decrement the counter at queue_unlock() when some error has 2221 * decrement the counter at queue_unlock() when some error has
2222 * occurred and we don't end up adding the task to the list. 2222 * occurred and we don't end up adding the task to the list.
2223 */ 2223 */
2224 hb_waiters_inc(hb); 2224 hb_waiters_inc(hb); /* implies smp_mb(); (A) */
2225 2225
2226 q->lock_ptr = &hb->lock; 2226 q->lock_ptr = &hb->lock;
2227 2227
2228 spin_lock(&hb->lock); /* implies smp_mb(); (A) */ 2228 spin_lock(&hb->lock);
2229 return hb; 2229 return hb;
2230} 2230}
2231 2231
@@ -2861,35 +2861,39 @@ retry_private:
2861 * and BUG when futex_unlock_pi() interleaves with this. 2861 * and BUG when futex_unlock_pi() interleaves with this.
2862 * 2862 *
2863 * Therefore acquire wait_lock while holding hb->lock, but drop the 2863 * Therefore acquire wait_lock while holding hb->lock, but drop the
2864 * latter before calling rt_mutex_start_proxy_lock(). This still fully 2864 * latter before calling __rt_mutex_start_proxy_lock(). This
2865 * serializes against futex_unlock_pi() as that does the exact same 2865 * interleaves with futex_unlock_pi() -- which does a similar lock
2866 * lock handoff sequence. 2866 * handoff -- such that the latter can observe the futex_q::pi_state
2867 * before __rt_mutex_start_proxy_lock() is done.
2867 */ 2868 */
2868 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); 2869 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2869 spin_unlock(q.lock_ptr); 2870 spin_unlock(q.lock_ptr);
2871 /*
2872 * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
2873 * such that futex_unlock_pi() is guaranteed to observe the waiter when
2874 * it sees the futex_q::pi_state.
2875 */
2870 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); 2876 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2871 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); 2877 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2872 2878
2873 if (ret) { 2879 if (ret) {
2874 if (ret == 1) 2880 if (ret == 1)
2875 ret = 0; 2881 ret = 0;
2876 2882 goto cleanup;
2877 spin_lock(q.lock_ptr);
2878 goto no_block;
2879 } 2883 }
2880 2884
2881
2882 if (unlikely(to)) 2885 if (unlikely(to))
2883 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 2886 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
2884 2887
2885 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); 2888 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
2886 2889
2890cleanup:
2887 spin_lock(q.lock_ptr); 2891 spin_lock(q.lock_ptr);
2888 /* 2892 /*
2889 * If we failed to acquire the lock (signal/timeout), we must 2893 * If we failed to acquire the lock (deadlock/signal/timeout), we must
2890 * first acquire the hb->lock before removing the lock from the 2894 * first acquire the hb->lock before removing the lock from the
2891 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex 2895 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
2892 * wait lists consistent. 2896 * lists consistent.
2893 * 2897 *
2894 * In particular; it is important that futex_unlock_pi() can not 2898 * In particular; it is important that futex_unlock_pi() can not
2895 * observe this inconsistency. 2899 * observe this inconsistency.
@@ -3013,6 +3017,10 @@ retry:
3013 * there is no point where we hold neither; and therefore 3017 * there is no point where we hold neither; and therefore
3014 * wake_futex_pi() must observe a state consistent with what we 3018 * wake_futex_pi() must observe a state consistent with what we
3015 * observed. 3019 * observed.
3020 *
3021 * In particular; this forces __rt_mutex_start_proxy() to
3022 * complete such that we're guaranteed to observe the
3023 * rt_waiter. Also see the WARN in wake_futex_pi().
3016 */ 3024 */
3017 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); 3025 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
3018 spin_unlock(&hb->lock); 3026 spin_unlock(&hb->lock);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 581edcc63c26..978d63a8261c 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1726 rt_mutex_set_owner(lock, NULL); 1726 rt_mutex_set_owner(lock, NULL);
1727} 1727}
1728 1728
1729/**
1730 * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1731 * @lock: the rt_mutex to take
1732 * @waiter: the pre-initialized rt_mutex_waiter
1733 * @task: the task to prepare
1734 *
1735 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1736 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1737 *
1738 * NOTE: does _NOT_ remove the @waiter on failure; must either call
1739 * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
1740 *
1741 * Returns:
1742 * 0 - task blocked on lock
1743 * 1 - acquired the lock for task, caller should wake it up
1744 * <0 - error
1745 *
1746 * Special API call for PI-futex support.
1747 */
1729int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1748int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1730 struct rt_mutex_waiter *waiter, 1749 struct rt_mutex_waiter *waiter,
1731 struct task_struct *task) 1750 struct task_struct *task)
1732{ 1751{
1733 int ret; 1752 int ret;
1734 1753
1754 lockdep_assert_held(&lock->wait_lock);
1755
1735 if (try_to_take_rt_mutex(lock, task, NULL)) 1756 if (try_to_take_rt_mutex(lock, task, NULL))
1736 return 1; 1757 return 1;
1737 1758
@@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1749 ret = 0; 1770 ret = 0;
1750 } 1771 }
1751 1772
1752 if (unlikely(ret))
1753 remove_waiter(lock, waiter);
1754
1755 debug_rt_mutex_print_deadlock(waiter); 1773 debug_rt_mutex_print_deadlock(waiter);
1756 1774
1757 return ret; 1775 return ret;
@@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1763 * @waiter: the pre-initialized rt_mutex_waiter 1781 * @waiter: the pre-initialized rt_mutex_waiter
1764 * @task: the task to prepare 1782 * @task: the task to prepare
1765 * 1783 *
1784 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1785 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1786 *
1787 * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
1788 * on failure.
1789 *
1766 * Returns: 1790 * Returns:
1767 * 0 - task blocked on lock 1791 * 0 - task blocked on lock
1768 * 1 - acquired the lock for task, caller should wake it up 1792 * 1 - acquired the lock for task, caller should wake it up
1769 * <0 - error 1793 * <0 - error
1770 * 1794 *
1771 * Special API call for FUTEX_REQUEUE_PI support. 1795 * Special API call for PI-futex support.
1772 */ 1796 */
1773int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1797int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1774 struct rt_mutex_waiter *waiter, 1798 struct rt_mutex_waiter *waiter,
@@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1778 1802
1779 raw_spin_lock_irq(&lock->wait_lock); 1803 raw_spin_lock_irq(&lock->wait_lock);
1780 ret = __rt_mutex_start_proxy_lock(lock, waiter, task); 1804 ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1805 if (unlikely(ret))
1806 remove_waiter(lock, waiter);
1781 raw_spin_unlock_irq(&lock->wait_lock); 1807 raw_spin_unlock_irq(&lock->wait_lock);
1782 1808
1783 return ret; 1809 return ret;
@@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
1845 * @lock: the rt_mutex we were woken on 1871 * @lock: the rt_mutex we were woken on
1846 * @waiter: the pre-initialized rt_mutex_waiter 1872 * @waiter: the pre-initialized rt_mutex_waiter
1847 * 1873 *
1848 * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). 1874 * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
1875 * rt_mutex_wait_proxy_lock().
1849 * 1876 *
1850 * Unless we acquired the lock; we're still enqueued on the wait-list and can 1877 * Unless we acquired the lock; we're still enqueued on the wait-list and can
1851 * in fact still be granted ownership until we're removed. Therefore we can 1878 * in fact still be granted ownership until we're removed. Therefore we can
diff --git a/kernel/relay.c b/kernel/relay.c
index 04f248644e06..9e0f52375487 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan,
428 dentry = chan->cb->create_buf_file(tmpname, chan->parent, 428 dentry = chan->cb->create_buf_file(tmpname, chan->parent,
429 S_IRUSR, buf, 429 S_IRUSR, buf,
430 &chan->is_global); 430 &chan->is_global);
431 if (IS_ERR(dentry))
432 dentry = NULL;
431 433
432 kfree(tmpname); 434 kfree(tmpname);
433 435
@@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
461 dentry = chan->cb->create_buf_file(NULL, NULL, 463 dentry = chan->cb->create_buf_file(NULL, NULL,
462 S_IRUSR, buf, 464 S_IRUSR, buf,
463 &chan->is_global); 465 &chan->is_global);
464 if (WARN_ON(dentry)) 466 if (IS_ERR_OR_NULL(dentry))
465 goto free_buf; 467 goto free_buf;
466 } 468 }
467 469
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50aa2aba69bd..310d0637fe4b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
5980 5980
5981#ifdef CONFIG_SCHED_SMT 5981#ifdef CONFIG_SCHED_SMT
5982DEFINE_STATIC_KEY_FALSE(sched_smt_present); 5982DEFINE_STATIC_KEY_FALSE(sched_smt_present);
5983EXPORT_SYMBOL_GPL(sched_smt_present);
5983 5984
5984static inline void set_idle_cores(int cpu, int val) 5985static inline void set_idle_cores(int cpu, int val)
5985{ 5986{
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index fe24de3fbc93..c3484785b179 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -124,6 +124,7 @@
124 * sampling of the aggregate task states would be. 124 * sampling of the aggregate task states would be.
125 */ 125 */
126 126
127#include "../workqueue_internal.h"
127#include <linux/sched/loadavg.h> 128#include <linux/sched/loadavg.h>
128#include <linux/seq_file.h> 129#include <linux/seq_file.h>
129#include <linux/proc_fs.h> 130#include <linux/proc_fs.h>
@@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
480 groupc->tasks[t]++; 481 groupc->tasks[t]++;
481 482
482 write_seqcount_end(&groupc->seq); 483 write_seqcount_end(&groupc->seq);
483
484 if (!delayed_work_pending(&group->clock_work))
485 schedule_delayed_work(&group->clock_work, PSI_FREQ);
486} 484}
487 485
488static struct psi_group *iterate_groups(struct task_struct *task, void **iter) 486static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
@@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
513{ 511{
514 int cpu = task_cpu(task); 512 int cpu = task_cpu(task);
515 struct psi_group *group; 513 struct psi_group *group;
514 bool wake_clock = true;
516 void *iter = NULL; 515 void *iter = NULL;
517 516
518 if (!task->pid) 517 if (!task->pid)
@@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set)
530 task->psi_flags &= ~clear; 529 task->psi_flags &= ~clear;
531 task->psi_flags |= set; 530 task->psi_flags |= set;
532 531
533 while ((group = iterate_groups(task, &iter))) 532 /*
533 * Periodic aggregation shuts off if there is a period of no
534 * task changes, so we wake it back up if necessary. However,
535 * don't do this if the task change is the aggregation worker
536 * itself going to sleep, or we'll ping-pong forever.
537 */
538 if (unlikely((clear & TSK_RUNNING) &&
539 (task->flags & PF_WQ_WORKER) &&
540 wq_worker_last_func(task) == psi_update_work))
541 wake_clock = false;
542
543 while ((group = iterate_groups(task, &iter))) {
534 psi_group_change(group, cpu, clear, set); 544 psi_group_change(group, cpu, clear, set);
545 if (wake_clock && !delayed_work_pending(&group->clock_work))
546 schedule_delayed_work(&group->clock_work, PSI_FREQ);
547 }
535} 548}
536 549
537void psi_memstall_tick(struct task_struct *task, int cpu) 550void psi_memstall_tick(struct task_struct *task, int cpu)
diff --git a/kernel/signal.c b/kernel/signal.c
index e1d7ad8e6ab1..99fa8ff06fd9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -688,6 +688,48 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in
688} 688}
689EXPORT_SYMBOL_GPL(dequeue_signal); 689EXPORT_SYMBOL_GPL(dequeue_signal);
690 690
691static int dequeue_synchronous_signal(kernel_siginfo_t *info)
692{
693 struct task_struct *tsk = current;
694 struct sigpending *pending = &tsk->pending;
695 struct sigqueue *q, *sync = NULL;
696
697 /*
698 * Might a synchronous signal be in the queue?
699 */
700 if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK))
701 return 0;
702
703 /*
704 * Return the first synchronous signal in the queue.
705 */
706 list_for_each_entry(q, &pending->list, list) {
707 /* Synchronous signals have a postive si_code */
708 if ((q->info.si_code > SI_USER) &&
709 (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) {
710 sync = q;
711 goto next;
712 }
713 }
714 return 0;
715next:
716 /*
717 * Check if there is another siginfo for the same signal.
718 */
719 list_for_each_entry_continue(q, &pending->list, list) {
720 if (q->info.si_signo == sync->info.si_signo)
721 goto still_pending;
722 }
723
724 sigdelset(&pending->signal, sync->info.si_signo);
725 recalc_sigpending();
726still_pending:
727 list_del_init(&sync->list);
728 copy_siginfo(info, &sync->info);
729 __sigqueue_free(sync);
730 return info->si_signo;
731}
732
691/* 733/*
692 * Tell a process that it has a new active signal.. 734 * Tell a process that it has a new active signal..
693 * 735 *
@@ -1057,10 +1099,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
1057 1099
1058 result = TRACE_SIGNAL_DELIVERED; 1100 result = TRACE_SIGNAL_DELIVERED;
1059 /* 1101 /*
1060 * Skip useless siginfo allocation for SIGKILL SIGSTOP, 1102 * Skip useless siginfo allocation for SIGKILL and kernel threads.
1061 * and kernel threads.
1062 */ 1103 */
1063 if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD)) 1104 if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
1064 goto out_set; 1105 goto out_set;
1065 1106
1066 /* 1107 /*
@@ -2394,6 +2435,11 @@ relock:
2394 goto relock; 2435 goto relock;
2395 } 2436 }
2396 2437
2438 /* Has this task already been marked for death? */
2439 ksig->info.si_signo = signr = SIGKILL;
2440 if (signal_group_exit(signal))
2441 goto fatal;
2442
2397 for (;;) { 2443 for (;;) {
2398 struct k_sigaction *ka; 2444 struct k_sigaction *ka;
2399 2445
@@ -2407,7 +2453,15 @@ relock:
2407 goto relock; 2453 goto relock;
2408 } 2454 }
2409 2455
2410 signr = dequeue_signal(current, &current->blocked, &ksig->info); 2456 /*
2457 * Signals generated by the execution of an instruction
2458 * need to be delivered before any other pending signals
2459 * so that the instruction pointer in the signal stack
2460 * frame points to the faulting instruction.
2461 */
2462 signr = dequeue_synchronous_signal(&ksig->info);
2463 if (!signr)
2464 signr = dequeue_signal(current, &current->blocked, &ksig->info);
2411 2465
2412 if (!signr) 2466 if (!signr)
2413 break; /* will return 0 */ 2467 break; /* will return 0 */
@@ -2489,6 +2543,7 @@ relock:
2489 continue; 2543 continue;
2490 } 2544 }
2491 2545
2546 fatal:
2492 spin_unlock_irq(&sighand->siglock); 2547 spin_unlock_irq(&sighand->siglock);
2493 2548
2494 /* 2549 /*
diff --git a/kernel/smp.c b/kernel/smp.c
index 163c451af42e..f4cf1b0bb3b8 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -584,8 +584,6 @@ void __init smp_init(void)
584 num_nodes, (num_nodes > 1 ? "s" : ""), 584 num_nodes, (num_nodes > 1 ? "s" : ""),
585 num_cpus, (num_cpus > 1 ? "s" : "")); 585 num_cpus, (num_cpus > 1 ? "s" : ""));
586 586
587 /* Final decision about SMT support */
588 cpu_smt_check_topology();
589 /* Any cleanup work */ 587 /* Any cleanup work */
590 smp_cpus_done(setup_max_cpus); 588 smp_cpus_done(setup_max_cpus);
591} 589}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8b068adb9da1..f1a86a0d881d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
1204 1204
1205int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 1205int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1206{ 1206{
1207 int err; 1207 return __bpf_probe_register(btp, prog);
1208
1209 mutex_lock(&bpf_event_mutex);
1210 err = __bpf_probe_register(btp, prog);
1211 mutex_unlock(&bpf_event_mutex);
1212 return err;
1213} 1208}
1214 1209
1215int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 1210int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1216{ 1211{
1217 int err; 1212 return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
1218
1219 mutex_lock(&bpf_event_mutex);
1220 err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
1221 mutex_unlock(&bpf_event_mutex);
1222 return err;
1223} 1213}
1224 1214
1225int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, 1215int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index e335576b9411..9bde07c06362 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -5,7 +5,7 @@
5 * Copyright (C) IBM Corporation, 2010-2012 5 * Copyright (C) IBM Corporation, 2010-2012
6 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> 6 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
7 */ 7 */
8#define pr_fmt(fmt) "trace_kprobe: " fmt 8#define pr_fmt(fmt) "trace_uprobe: " fmt
9 9
10#include <linux/ctype.h> 10#include <linux/ctype.h>
11#include <linux/module.h> 11#include <linux/module.h>
@@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
160 if (ret >= 0) { 160 if (ret >= 0) {
161 if (ret == maxlen) 161 if (ret == maxlen)
162 dst[ret - 1] = '\0'; 162 dst[ret - 1] = '\0';
163 else
164 /*
165 * Include the terminating null byte. In this case it
166 * was copied by strncpy_from_user but not accounted
167 * for in ret.
168 */
169 ret++;
163 *(u32 *)dest = make_data_loc(ret, (void *)dst - base); 170 *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
164 } 171 }
165 172
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 392be4b252f6..fc5d23d752a5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -910,6 +910,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
910} 910}
911 911
912/** 912/**
913 * wq_worker_last_func - retrieve worker's last work function
914 *
915 * Determine the last function a worker executed. This is called from
916 * the scheduler to get a worker's last known identity.
917 *
918 * CONTEXT:
919 * spin_lock_irq(rq->lock)
920 *
921 * Return:
922 * The last work function %current executed as a worker, NULL if it
923 * hasn't executed any work yet.
924 */
925work_func_t wq_worker_last_func(struct task_struct *task)
926{
927 struct worker *worker = kthread_data(task);
928
929 return worker->last_func;
930}
931
932/**
913 * worker_set_flags - set worker flags and adjust nr_running accordingly 933 * worker_set_flags - set worker flags and adjust nr_running accordingly
914 * @worker: self 934 * @worker: self
915 * @flags: flags to set 935 * @flags: flags to set
@@ -2184,6 +2204,9 @@ __acquires(&pool->lock)
2184 if (unlikely(cpu_intensive)) 2204 if (unlikely(cpu_intensive))
2185 worker_clr_flags(worker, WORKER_CPU_INTENSIVE); 2205 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2186 2206
2207 /* tag the worker for identification in schedule() */
2208 worker->last_func = worker->current_func;
2209
2187 /* we're done with it, release */ 2210 /* we're done with it, release */
2188 hash_del(&worker->hentry); 2211 hash_del(&worker->hentry);
2189 worker->current_work = NULL; 2212 worker->current_work = NULL;
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 66fbb5a9e633..cb68b03ca89a 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -53,6 +53,9 @@ struct worker {
53 53
54 /* used only by rescuers to point to the target workqueue */ 54 /* used only by rescuers to point to the target workqueue */
55 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ 55 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
56
57 /* used by the scheduler to determine a worker's last known identity */
58 work_func_t last_func;
56}; 59};
57 60
58/** 61/**
@@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void)
67 70
68/* 71/*
69 * Scheduler hooks for concurrency managed workqueue. Only to be used from 72 * Scheduler hooks for concurrency managed workqueue. Only to be used from
70 * sched/core.c and workqueue.c. 73 * sched/ and workqueue.c.
71 */ 74 */
72void wq_worker_waking_up(struct task_struct *task, int cpu); 75void wq_worker_waking_up(struct task_struct *task, int cpu);
73struct task_struct *wq_worker_sleeping(struct task_struct *task); 76struct task_struct *wq_worker_sleeping(struct task_struct *task);
77work_func_t wq_worker_last_func(struct task_struct *task);
74 78
75#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ 79#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */