summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c17
-rw-r--r--kernel/bpf/cgroup.c3
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/map_in_map.c17
-rw-r--r--kernel/bpf/percpu_freelist.c41
-rw-r--r--kernel/bpf/percpu_freelist.h4
-rw-r--r--kernel/bpf/stackmap.c12
-rw-r--r--kernel/bpf/syscall.c12
-rw-r--r--kernel/bpf/verifier.c61
-rw-r--r--kernel/cpu.c38
-rw-r--r--kernel/dma/swiotlb.c2
-rw-r--r--kernel/events/core.c14
-rw-r--r--kernel/events/ring_buffer.c3
-rw-r--r--kernel/exit.c15
-rw-r--r--kernel/fork.c14
-rw-r--r--kernel/futex.c45
-rw-r--r--kernel/irq/chip.c1
-rw-r--r--kernel/irq/irq_sim.c12
-rw-r--r--kernel/irq/irqdesc.c2
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/locking/rtmutex.c37
-rw-r--r--kernel/locking/rwsem-xadd.c11
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched/core.c19
-rw-r--r--kernel/sched/fair.c1
-rw-r--r--kernel/sched/psi.c21
-rw-r--r--kernel/seccomp.c4
-rw-r--r--kernel/signal.c63
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/time/posix-cpu-timers.c1
-rw-r--r--kernel/trace/bpf_trace.c14
-rw-r--r--kernel/trace/trace_kprobe.c12
-rw-r--r--kernel/trace/trace_uprobe.c9
-rw-r--r--kernel/umh.c33
-rw-r--r--kernel/workqueue.c23
-rw-r--r--kernel/workqueue_internal.h6
37 files changed, 436 insertions, 147 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 715f9fcf4712..c57bd10340ed 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
467 return kind_ops[BTF_INFO_KIND(t->info)]; 467 return kind_ops[BTF_INFO_KIND(t->info)];
468} 468}
469 469
470bool btf_name_offset_valid(const struct btf *btf, u32 offset) 470static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
471{ 471{
472 return BTF_STR_OFFSET_VALID(offset) && 472 return BTF_STR_OFFSET_VALID(offset) &&
473 offset < btf->hdr.str_len; 473 offset < btf->hdr.str_len;
@@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset,
1219 u8 nr_copy_bits; 1219 u8 nr_copy_bits;
1220 u64 print_num; 1220 u64 print_num;
1221 1221
1222 data += BITS_ROUNDDOWN_BYTES(bits_offset);
1223 bits_offset = BITS_PER_BYTE_MASKED(bits_offset);
1224 nr_copy_bits = nr_bits + bits_offset; 1222 nr_copy_bits = nr_bits + bits_offset;
1225 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); 1223 nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
1226 1224
@@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf,
1255 * BTF_INT_OFFSET() cannot exceed 64 bits. 1253 * BTF_INT_OFFSET() cannot exceed 64 bits.
1256 */ 1254 */
1257 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); 1255 total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
1258 btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m); 1256 data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
1257 bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
1258 btf_bitfield_seq_show(data, bits_offset, nr_bits, m);
1259} 1259}
1260 1260
1261static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, 1261static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
@@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
1459 1459
1460 /* "typedef void new_void", "const void"...etc */ 1460 /* "typedef void new_void", "const void"...etc */
1461 if (!btf_type_is_void(next_type) && 1461 if (!btf_type_is_void(next_type) &&
1462 !btf_type_is_fwd(next_type)) { 1462 !btf_type_is_fwd(next_type) &&
1463 !btf_type_is_func_proto(next_type)) {
1463 btf_verifier_log_type(env, v->t, "Invalid type_id"); 1464 btf_verifier_log_type(env, v->t, "Invalid type_id");
1464 return -EINVAL; 1465 return -EINVAL;
1465 } 1466 }
@@ -2001,12 +2002,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
2001 2002
2002 member_offset = btf_member_bit_offset(t, member); 2003 member_offset = btf_member_bit_offset(t, member);
2003 bitfield_size = btf_member_bitfield_size(t, member); 2004 bitfield_size = btf_member_bitfield_size(t, member);
2005 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
2006 bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
2004 if (bitfield_size) { 2007 if (bitfield_size) {
2005 btf_bitfield_seq_show(data, member_offset, 2008 btf_bitfield_seq_show(data + bytes_offset, bits8_offset,
2006 bitfield_size, m); 2009 bitfield_size, m);
2007 } else { 2010 } else {
2008 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
2009 bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
2010 ops = btf_type_ops(member_type); 2011 ops = btf_type_ops(member_type);
2011 ops->seq_show(btf, member_type, member->type, 2012 ops->seq_show(btf, member_type, member->type,
2012 data + bytes_offset, bits8_offset, m); 2013 data + bytes_offset, bits8_offset, m);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9425c2fb872f..d17d05570a3f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
572 bpf_compute_and_save_data_end(skb, &saved_data_end); 572 bpf_compute_and_save_data_end(skb, &saved_data_end);
573 573
574 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 574 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
575 bpf_prog_run_save_cb); 575 __bpf_prog_run_save_cb);
576 bpf_restore_data_end(skb, saved_data_end); 576 bpf_restore_data_end(skb, saved_data_end);
577 __skb_pull(skb, offset); 577 __skb_pull(skb, offset);
578 skb->sk = save_sk; 578 skb->sk = save_sk;
@@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
718 case BPF_FUNC_trace_printk: 718 case BPF_FUNC_trace_printk:
719 if (capable(CAP_SYS_ADMIN)) 719 if (capable(CAP_SYS_ADMIN))
720 return bpf_get_trace_printk_proto(); 720 return bpf_get_trace_printk_proto();
721 /* fall through */
721 default: 722 default:
722 return NULL; 723 return NULL;
723 } 724 }
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4b7c76765d9d..f9274114c88d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
686 } 686 }
687 687
688 if (htab_is_prealloc(htab)) { 688 if (htab_is_prealloc(htab)) {
689 pcpu_freelist_push(&htab->freelist, &l->fnode); 689 __pcpu_freelist_push(&htab->freelist, &l->fnode);
690 } else { 690 } else {
691 atomic_dec(&htab->count); 691 atomic_dec(&htab->count);
692 l->htab = htab; 692 l->htab = htab;
@@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
748 } else { 748 } else {
749 struct pcpu_freelist_node *l; 749 struct pcpu_freelist_node *l;
750 750
751 l = pcpu_freelist_pop(&htab->freelist); 751 l = __pcpu_freelist_pop(&htab->freelist);
752 if (!l) 752 if (!l)
753 return ERR_PTR(-E2BIG); 753 return ERR_PTR(-E2BIG);
754 l_new = container_of(l, struct htab_elem, fnode); 754 l_new = container_of(l, struct htab_elem, fnode);
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 99d243e1ad6e..52378d3e34b3 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -12,6 +12,7 @@
12struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) 12struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
13{ 13{
14 struct bpf_map *inner_map, *inner_map_meta; 14 struct bpf_map *inner_map, *inner_map_meta;
15 u32 inner_map_meta_size;
15 struct fd f; 16 struct fd f;
16 17
17 f = fdget(inner_map_ufd); 18 f = fdget(inner_map_ufd);
@@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
36 return ERR_PTR(-EINVAL); 37 return ERR_PTR(-EINVAL);
37 } 38 }
38 39
39 inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); 40 inner_map_meta_size = sizeof(*inner_map_meta);
41 /* In some cases verifier needs to access beyond just base map. */
42 if (inner_map->ops == &array_map_ops)
43 inner_map_meta_size = sizeof(struct bpf_array);
44
45 inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
40 if (!inner_map_meta) { 46 if (!inner_map_meta) {
41 fdput(f); 47 fdput(f);
42 return ERR_PTR(-ENOMEM); 48 return ERR_PTR(-ENOMEM);
@@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
46 inner_map_meta->key_size = inner_map->key_size; 52 inner_map_meta->key_size = inner_map->key_size;
47 inner_map_meta->value_size = inner_map->value_size; 53 inner_map_meta->value_size = inner_map->value_size;
48 inner_map_meta->map_flags = inner_map->map_flags; 54 inner_map_meta->map_flags = inner_map->map_flags;
49 inner_map_meta->ops = inner_map->ops;
50 inner_map_meta->max_entries = inner_map->max_entries; 55 inner_map_meta->max_entries = inner_map->max_entries;
51 56
57 /* Misc members not needed in bpf_map_meta_equal() check. */
58 inner_map_meta->ops = inner_map->ops;
59 if (inner_map->ops == &array_map_ops) {
60 inner_map_meta->unpriv_array = inner_map->unpriv_array;
61 container_of(inner_map_meta, struct bpf_array, map)->index_mask =
62 container_of(inner_map, struct bpf_array, map)->index_mask;
63 }
64
52 fdput(f); 65 fdput(f);
53 return inner_map_meta; 66 return inner_map_meta;
54} 67}
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index 673fa6fe2d73..0c1b4ba9e90e 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s)
28 free_percpu(s->freelist); 28 free_percpu(s->freelist);
29} 29}
30 30
31static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, 31static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
32 struct pcpu_freelist_node *node) 32 struct pcpu_freelist_node *node)
33{ 33{
34 raw_spin_lock(&head->lock); 34 raw_spin_lock(&head->lock);
35 node->next = head->first; 35 node->next = head->first;
@@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
37 raw_spin_unlock(&head->lock); 37 raw_spin_unlock(&head->lock);
38} 38}
39 39
40void pcpu_freelist_push(struct pcpu_freelist *s, 40void __pcpu_freelist_push(struct pcpu_freelist *s,
41 struct pcpu_freelist_node *node) 41 struct pcpu_freelist_node *node)
42{ 42{
43 struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); 43 struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
44 44
45 __pcpu_freelist_push(head, node); 45 ___pcpu_freelist_push(head, node);
46}
47
48void pcpu_freelist_push(struct pcpu_freelist *s,
49 struct pcpu_freelist_node *node)
50{
51 unsigned long flags;
52
53 local_irq_save(flags);
54 __pcpu_freelist_push(s, node);
55 local_irq_restore(flags);
46} 56}
47 57
48void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, 58void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
@@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
63 for_each_possible_cpu(cpu) { 73 for_each_possible_cpu(cpu) {
64again: 74again:
65 head = per_cpu_ptr(s->freelist, cpu); 75 head = per_cpu_ptr(s->freelist, cpu);
66 __pcpu_freelist_push(head, buf); 76 ___pcpu_freelist_push(head, buf);
67 i++; 77 i++;
68 buf += elem_size; 78 buf += elem_size;
69 if (i == nr_elems) 79 if (i == nr_elems)
@@ -74,14 +84,12 @@ again:
74 local_irq_restore(flags); 84 local_irq_restore(flags);
75} 85}
76 86
77struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) 87struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
78{ 88{
79 struct pcpu_freelist_head *head; 89 struct pcpu_freelist_head *head;
80 struct pcpu_freelist_node *node; 90 struct pcpu_freelist_node *node;
81 unsigned long flags;
82 int orig_cpu, cpu; 91 int orig_cpu, cpu;
83 92
84 local_irq_save(flags);
85 orig_cpu = cpu = raw_smp_processor_id(); 93 orig_cpu = cpu = raw_smp_processor_id();
86 while (1) { 94 while (1) {
87 head = per_cpu_ptr(s->freelist, cpu); 95 head = per_cpu_ptr(s->freelist, cpu);
@@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
89 node = head->first; 97 node = head->first;
90 if (node) { 98 if (node) {
91 head->first = node->next; 99 head->first = node->next;
92 raw_spin_unlock_irqrestore(&head->lock, flags); 100 raw_spin_unlock(&head->lock);
93 return node; 101 return node;
94 } 102 }
95 raw_spin_unlock(&head->lock); 103 raw_spin_unlock(&head->lock);
96 cpu = cpumask_next(cpu, cpu_possible_mask); 104 cpu = cpumask_next(cpu, cpu_possible_mask);
97 if (cpu >= nr_cpu_ids) 105 if (cpu >= nr_cpu_ids)
98 cpu = 0; 106 cpu = 0;
99 if (cpu == orig_cpu) { 107 if (cpu == orig_cpu)
100 local_irq_restore(flags);
101 return NULL; 108 return NULL;
102 }
103 } 109 }
104} 110}
111
112struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
113{
114 struct pcpu_freelist_node *ret;
115 unsigned long flags;
116
117 local_irq_save(flags);
118 ret = __pcpu_freelist_pop(s);
119 local_irq_restore(flags);
120 return ret;
121}
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
index 3049aae8ea1e..c3960118e617 100644
--- a/kernel/bpf/percpu_freelist.h
+++ b/kernel/bpf/percpu_freelist.h
@@ -22,8 +22,12 @@ struct pcpu_freelist_node {
22 struct pcpu_freelist_node *next; 22 struct pcpu_freelist_node *next;
23}; 23};
24 24
25/* pcpu_freelist_* do spin_lock_irqsave. */
25void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); 26void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
26struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); 27struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
28/* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */
29void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
30struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *);
27void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, 31void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
28 u32 nr_elems); 32 u32 nr_elems);
29int pcpu_freelist_init(struct pcpu_freelist *); 33int pcpu_freelist_init(struct pcpu_freelist *);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 90daf285de03..d43b14535827 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr,
180 180
181 if (nhdr->n_type == BPF_BUILD_ID && 181 if (nhdr->n_type == BPF_BUILD_ID &&
182 nhdr->n_namesz == sizeof("GNU") && 182 nhdr->n_namesz == sizeof("GNU") &&
183 nhdr->n_descsz == BPF_BUILD_ID_SIZE) { 183 nhdr->n_descsz > 0 &&
184 nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
184 memcpy(build_id, 185 memcpy(build_id,
185 note_start + note_offs + 186 note_start + note_offs +
186 ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), 187 ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
187 BPF_BUILD_ID_SIZE); 188 nhdr->n_descsz);
189 memset(build_id + nhdr->n_descsz, 0,
190 BPF_BUILD_ID_SIZE - nhdr->n_descsz);
188 return 0; 191 return 0;
189 } 192 }
190 new_offs = note_offs + sizeof(Elf32_Nhdr) + 193 new_offs = note_offs + sizeof(Elf32_Nhdr) +
@@ -260,7 +263,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
260 return -EFAULT; /* page not mapped */ 263 return -EFAULT; /* page not mapped */
261 264
262 ret = -EINVAL; 265 ret = -EINVAL;
263 page_addr = page_address(page); 266 page_addr = kmap_atomic(page);
264 ehdr = (Elf32_Ehdr *)page_addr; 267 ehdr = (Elf32_Ehdr *)page_addr;
265 268
266 /* compare magic x7f "ELF" */ 269 /* compare magic x7f "ELF" */
@@ -276,6 +279,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
276 else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) 279 else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
277 ret = stack_map_get_build_id_64(page_addr, build_id); 280 ret = stack_map_get_build_id_64(page_addr, build_id);
278out: 281out:
282 kunmap_atomic(page_addr);
279 put_page(page); 283 put_page(page);
280 return ret; 284 return ret;
281} 285}
@@ -310,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
310 for (i = 0; i < trace_nr; i++) { 314 for (i = 0; i < trace_nr; i++) {
311 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 315 id_offs[i].status = BPF_STACK_BUILD_ID_IP;
312 id_offs[i].ip = ips[i]; 316 id_offs[i].ip = ips[i];
317 memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
313 } 318 }
314 return; 319 return;
315 } 320 }
@@ -320,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
320 /* per entry fall back to ips */ 325 /* per entry fall back to ips */
321 id_offs[i].status = BPF_STACK_BUILD_ID_IP; 326 id_offs[i].status = BPF_STACK_BUILD_ID_IP;
322 id_offs[i].ip = ips[i]; 327 id_offs[i].ip = ips[i];
328 memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
323 continue; 329 continue;
324 } 330 }
325 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] 331 id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..8577bb7f8be6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr)
713 713
714 if (bpf_map_is_dev_bound(map)) { 714 if (bpf_map_is_dev_bound(map)) {
715 err = bpf_map_offload_lookup_elem(map, key, value); 715 err = bpf_map_offload_lookup_elem(map, key, value);
716 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 716 goto done;
717 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 717 }
718
719 preempt_disable();
720 this_cpu_inc(bpf_prog_active);
721 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
722 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
718 err = bpf_percpu_hash_copy(map, key, value); 723 err = bpf_percpu_hash_copy(map, key, value);
719 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 724 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
720 err = bpf_percpu_array_copy(map, key, value); 725 err = bpf_percpu_array_copy(map, key, value);
@@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr)
744 } 749 }
745 rcu_read_unlock(); 750 rcu_read_unlock();
746 } 751 }
752 this_cpu_dec(bpf_prog_active);
753 preempt_enable();
747 754
755done:
748 if (err) 756 if (err)
749 goto free_value; 757 goto free_value;
750 758
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f6bc62a9ee8e..56674a7c3778 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3103,6 +3103,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
3103 } 3103 }
3104} 3104}
3105 3105
3106static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
3107 const struct bpf_insn *insn)
3108{
3109 return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
3110}
3111
3112static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
3113 u32 alu_state, u32 alu_limit)
3114{
3115 /* If we arrived here from different branches with different
3116 * state or limits to sanitize, then this won't work.
3117 */
3118 if (aux->alu_state &&
3119 (aux->alu_state != alu_state ||
3120 aux->alu_limit != alu_limit))
3121 return -EACCES;
3122
3123 /* Corresponding fixup done in fixup_bpf_calls(). */
3124 aux->alu_state = alu_state;
3125 aux->alu_limit = alu_limit;
3126 return 0;
3127}
3128
3129static int sanitize_val_alu(struct bpf_verifier_env *env,
3130 struct bpf_insn *insn)
3131{
3132 struct bpf_insn_aux_data *aux = cur_aux(env);
3133
3134 if (can_skip_alu_sanitation(env, insn))
3135 return 0;
3136
3137 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
3138}
3139
3106static int sanitize_ptr_alu(struct bpf_verifier_env *env, 3140static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3107 struct bpf_insn *insn, 3141 struct bpf_insn *insn,
3108 const struct bpf_reg_state *ptr_reg, 3142 const struct bpf_reg_state *ptr_reg,
@@ -3117,7 +3151,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3117 struct bpf_reg_state tmp; 3151 struct bpf_reg_state tmp;
3118 bool ret; 3152 bool ret;
3119 3153
3120 if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) 3154 if (can_skip_alu_sanitation(env, insn))
3121 return 0; 3155 return 0;
3122 3156
3123 /* We already marked aux for masking from non-speculative 3157 /* We already marked aux for masking from non-speculative
@@ -3133,19 +3167,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
3133 3167
3134 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) 3168 if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
3135 return 0; 3169 return 0;
3136 3170 if (update_alu_sanitation_state(aux, alu_state, alu_limit))
3137 /* If we arrived here from different branches with different
3138 * limits to sanitize, then this won't work.
3139 */
3140 if (aux->alu_state &&
3141 (aux->alu_state != alu_state ||
3142 aux->alu_limit != alu_limit))
3143 return -EACCES; 3171 return -EACCES;
3144
3145 /* Corresponding fixup done in fixup_bpf_calls(). */
3146 aux->alu_state = alu_state;
3147 aux->alu_limit = alu_limit;
3148
3149do_sim: 3172do_sim:
3150 /* Simulate and find potential out-of-bounds access under 3173 /* Simulate and find potential out-of-bounds access under
3151 * speculative execution from truncation as a result of 3174 * speculative execution from truncation as a result of
@@ -3418,6 +3441,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3418 s64 smin_val, smax_val; 3441 s64 smin_val, smax_val;
3419 u64 umin_val, umax_val; 3442 u64 umin_val, umax_val;
3420 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 3443 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
3444 u32 dst = insn->dst_reg;
3445 int ret;
3421 3446
3422 if (insn_bitness == 32) { 3447 if (insn_bitness == 32) {
3423 /* Relevant for 32-bit RSH: Information can propagate towards 3448 /* Relevant for 32-bit RSH: Information can propagate towards
@@ -3452,6 +3477,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3452 3477
3453 switch (opcode) { 3478 switch (opcode) {
3454 case BPF_ADD: 3479 case BPF_ADD:
3480 ret = sanitize_val_alu(env, insn);
3481 if (ret < 0) {
3482 verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
3483 return ret;
3484 }
3455 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 3485 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
3456 signed_add_overflows(dst_reg->smax_value, smax_val)) { 3486 signed_add_overflows(dst_reg->smax_value, smax_val)) {
3457 dst_reg->smin_value = S64_MIN; 3487 dst_reg->smin_value = S64_MIN;
@@ -3471,6 +3501,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
3471 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); 3501 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
3472 break; 3502 break;
3473 case BPF_SUB: 3503 case BPF_SUB:
3504 ret = sanitize_val_alu(env, insn);
3505 if (ret < 0) {
3506 verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
3507 return ret;
3508 }
3474 if (signed_sub_overflows(dst_reg->smin_value, smax_val) || 3509 if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
3475 signed_sub_overflows(dst_reg->smax_value, smin_val)) { 3510 signed_sub_overflows(dst_reg->smax_value, smin_val)) {
3476 /* Overflow possible, we know nothing */ 3511 /* Overflow possible, we know nothing */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 91d5c38eb7e5..d1c6d152da89 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { }
376 376
377#ifdef CONFIG_HOTPLUG_SMT 377#ifdef CONFIG_HOTPLUG_SMT
378enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; 378enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
379EXPORT_SYMBOL_GPL(cpu_smt_control);
380
381static bool cpu_smt_available __read_mostly;
382 379
383void __init cpu_smt_disable(bool force) 380void __init cpu_smt_disable(bool force)
384{ 381{
@@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force)
397 394
398/* 395/*
399 * The decision whether SMT is supported can only be done after the full 396 * The decision whether SMT is supported can only be done after the full
400 * CPU identification. Called from architecture code before non boot CPUs 397 * CPU identification. Called from architecture code.
401 * are brought up.
402 */
403void __init cpu_smt_check_topology_early(void)
404{
405 if (!topology_smt_supported())
406 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
407}
408
409/*
410 * If SMT was disabled by BIOS, detect it here, after the CPUs have been
411 * brought online. This ensures the smt/l1tf sysfs entries are consistent
412 * with reality. cpu_smt_available is set to true during the bringup of non
413 * boot CPUs when a SMT sibling is detected. Note, this may overwrite
414 * cpu_smt_control's previous setting.
415 */ 398 */
416void __init cpu_smt_check_topology(void) 399void __init cpu_smt_check_topology(void)
417{ 400{
418 if (!cpu_smt_available) 401 if (!topology_smt_supported())
419 cpu_smt_control = CPU_SMT_NOT_SUPPORTED; 402 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
420} 403}
421 404
@@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable);
428 411
429static inline bool cpu_smt_allowed(unsigned int cpu) 412static inline bool cpu_smt_allowed(unsigned int cpu)
430{ 413{
431 if (topology_is_primary_thread(cpu)) 414 if (cpu_smt_control == CPU_SMT_ENABLED)
432 return true; 415 return true;
433 416
434 /* 417 if (topology_is_primary_thread(cpu))
435 * If the CPU is not a 'primary' thread and the booted_once bit is
436 * set then the processor has SMT support. Store this information
437 * for the late check of SMT support in cpu_smt_check_topology().
438 */
439 if (per_cpu(cpuhp_state, cpu).booted_once)
440 cpu_smt_available = true;
441
442 if (cpu_smt_control == CPU_SMT_ENABLED)
443 return true; 418 return true;
444 419
445 /* 420 /*
@@ -2090,10 +2065,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2090 */ 2065 */
2091 cpuhp_offline_cpu_device(cpu); 2066 cpuhp_offline_cpu_device(cpu);
2092 } 2067 }
2093 if (!ret) { 2068 if (!ret)
2094 cpu_smt_control = ctrlval; 2069 cpu_smt_control = ctrlval;
2095 arch_smt_update();
2096 }
2097 cpu_maps_update_done(); 2070 cpu_maps_update_done();
2098 return ret; 2071 return ret;
2099} 2072}
@@ -2104,7 +2077,6 @@ static int cpuhp_smt_enable(void)
2104 2077
2105 cpu_maps_update_begin(); 2078 cpu_maps_update_begin();
2106 cpu_smt_control = CPU_SMT_ENABLED; 2079 cpu_smt_control = CPU_SMT_ENABLED;
2107 arch_smt_update();
2108 for_each_present_cpu(cpu) { 2080 for_each_present_cpu(cpu) {
2109 /* Skip online CPUs and CPUs on offline nodes */ 2081 /* Skip online CPUs and CPUs on offline nodes */
2110 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) 2082 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d6361776dc5c..1fb6fd68b9c7 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -378,6 +378,8 @@ void __init swiotlb_exit(void)
378 memblock_free_late(io_tlb_start, 378 memblock_free_late(io_tlb_start,
379 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); 379 PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
380 } 380 }
381 io_tlb_start = 0;
382 io_tlb_end = 0;
381 io_tlb_nslabs = 0; 383 io_tlb_nslabs = 0;
382 max_segment = 0; 384 max_segment = 0;
383} 385}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cd13a30f732..e5ede6918050 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
436 void __user *buffer, size_t *lenp, 436 void __user *buffer, size_t *lenp,
437 loff_t *ppos) 437 loff_t *ppos)
438{ 438{
439 int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 439 int ret;
440 440 int perf_cpu = sysctl_perf_cpu_time_max_percent;
441 if (ret || !write)
442 return ret;
443
444 /* 441 /*
445 * If throttling is disabled don't allow the write: 442 * If throttling is disabled don't allow the write:
446 */ 443 */
447 if (sysctl_perf_cpu_time_max_percent == 100 || 444 if (write && (perf_cpu == 100 || perf_cpu == 0))
448 sysctl_perf_cpu_time_max_percent == 0)
449 return -EINVAL; 445 return -EINVAL;
450 446
447 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
448 if (ret || !write)
449 return ret;
450
451 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); 451 max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
452 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; 452 perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
453 update_perf_cpu_limits(); 453 update_perf_cpu_limits();
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 4a9937076331..309ef5a64af5 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
734 size = sizeof(struct ring_buffer); 734 size = sizeof(struct ring_buffer);
735 size += nr_pages * sizeof(void *); 735 size += nr_pages * sizeof(void *);
736 736
737 if (order_base_2(size) >= MAX_ORDER)
738 goto fail;
739
737 rb = kzalloc(size, GFP_KERNEL); 740 rb = kzalloc(size, GFP_KERNEL);
738 if (!rb) 741 if (!rb)
739 goto fail; 742 goto fail;
diff --git a/kernel/exit.c b/kernel/exit.c
index 2d14979577ee..2639a30a8aa5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -307,7 +307,7 @@ void rcuwait_wake_up(struct rcuwait *w)
307 * MB (A) MB (B) 307 * MB (A) MB (B)
308 * [L] cond [L] tsk 308 * [L] cond [L] tsk
309 */ 309 */
310 smp_rmb(); /* (B) */ 310 smp_mb(); /* (B) */
311 311
312 /* 312 /*
313 * Avoid using task_rcu_dereference() magic as long as we are careful, 313 * Avoid using task_rcu_dereference() magic as long as we are careful,
@@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
558 return NULL; 558 return NULL;
559} 559}
560 560
561static struct task_struct *find_child_reaper(struct task_struct *father) 561static struct task_struct *find_child_reaper(struct task_struct *father,
562 struct list_head *dead)
562 __releases(&tasklist_lock) 563 __releases(&tasklist_lock)
563 __acquires(&tasklist_lock) 564 __acquires(&tasklist_lock)
564{ 565{
565 struct pid_namespace *pid_ns = task_active_pid_ns(father); 566 struct pid_namespace *pid_ns = task_active_pid_ns(father);
566 struct task_struct *reaper = pid_ns->child_reaper; 567 struct task_struct *reaper = pid_ns->child_reaper;
568 struct task_struct *p, *n;
567 569
568 if (likely(reaper != father)) 570 if (likely(reaper != father))
569 return reaper; 571 return reaper;
@@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
579 panic("Attempted to kill init! exitcode=0x%08x\n", 581 panic("Attempted to kill init! exitcode=0x%08x\n",
580 father->signal->group_exit_code ?: father->exit_code); 582 father->signal->group_exit_code ?: father->exit_code);
581 } 583 }
584
585 list_for_each_entry_safe(p, n, dead, ptrace_entry) {
586 list_del_init(&p->ptrace_entry);
587 release_task(p);
588 }
589
582 zap_pid_ns_processes(pid_ns); 590 zap_pid_ns_processes(pid_ns);
583 write_lock_irq(&tasklist_lock); 591 write_lock_irq(&tasklist_lock);
584 592
@@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father,
668 exit_ptrace(father, dead); 676 exit_ptrace(father, dead);
669 677
670 /* Can drop and reacquire tasklist_lock */ 678 /* Can drop and reacquire tasklist_lock */
671 reaper = find_child_reaper(father); 679 reaper = find_child_reaper(father, dead);
672 if (list_empty(&father->children)) 680 if (list_empty(&father->children))
673 return; 681 return;
674 682
@@ -866,6 +874,7 @@ void __noreturn do_exit(long code)
866 exit_task_namespaces(tsk); 874 exit_task_namespaces(tsk);
867 exit_task_work(tsk); 875 exit_task_work(tsk);
868 exit_thread(tsk); 876 exit_thread(tsk);
877 exit_umh(tsk);
869 878
870 /* 879 /*
871 * Flush inherited counters to the parent - before the parent 880 * Flush inherited counters to the parent - before the parent
diff --git a/kernel/fork.c b/kernel/fork.c
index a60459947f18..b69248e6f0e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -217,6 +217,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
217 memset(s->addr, 0, THREAD_SIZE); 217 memset(s->addr, 0, THREAD_SIZE);
218 218
219 tsk->stack_vm_area = s; 219 tsk->stack_vm_area = s;
220 tsk->stack = s->addr;
220 return s->addr; 221 return s->addr;
221 } 222 }
222 223
@@ -1833,8 +1834,6 @@ static __latent_entropy struct task_struct *copy_process(
1833 1834
1834 posix_cpu_timers_init(p); 1835 posix_cpu_timers_init(p);
1835 1836
1836 p->start_time = ktime_get_ns();
1837 p->real_start_time = ktime_get_boot_ns();
1838 p->io_context = NULL; 1837 p->io_context = NULL;
1839 audit_set_context(p, NULL); 1838 audit_set_context(p, NULL);
1840 cgroup_fork(p); 1839 cgroup_fork(p);
@@ -2001,6 +2000,17 @@ static __latent_entropy struct task_struct *copy_process(
2001 goto bad_fork_free_pid; 2000 goto bad_fork_free_pid;
2002 2001
2003 /* 2002 /*
2003 * From this point on we must avoid any synchronous user-space
2004 * communication until we take the tasklist-lock. In particular, we do
2005 * not want user-space to be able to predict the process start-time by
2006 * stalling fork(2) after we recorded the start_time but before it is
2007 * visible to the system.
2008 */
2009
2010 p->start_time = ktime_get_ns();
2011 p->real_start_time = ktime_get_boot_ns();
2012
2013 /*
2004 * Make it visible to the rest of the system, but dont wake it up yet. 2014 * Make it visible to the rest of the system, but dont wake it up yet.
2005 * Need tasklist lock for parent etc handling! 2015 * Need tasklist lock for parent etc handling!
2006 */ 2016 */
diff --git a/kernel/futex.c b/kernel/futex.c
index be3bff2315ff..a0514e01c3eb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1452,11 +1452,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1452 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) 1452 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
1453 return; 1453 return;
1454 1454
1455 /* 1455 get_task_struct(p);
1456 * Queue the task for later wakeup for after we've released
1457 * the hb->lock. wake_q_add() grabs reference to p.
1458 */
1459 wake_q_add(wake_q, p);
1460 __unqueue_futex(q); 1456 __unqueue_futex(q);
1461 /* 1457 /*
1462 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL 1458 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
@@ -1466,6 +1462,13 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
1466 * plist_del in __unqueue_futex(). 1462 * plist_del in __unqueue_futex().
1467 */ 1463 */
1468 smp_store_release(&q->lock_ptr, NULL); 1464 smp_store_release(&q->lock_ptr, NULL);
1465
1466 /*
1467 * Queue the task for later wakeup for after we've released
1468 * the hb->lock. wake_q_add() grabs reference to p.
1469 */
1470 wake_q_add(wake_q, p);
1471 put_task_struct(p);
1469} 1472}
1470 1473
1471/* 1474/*
@@ -2218,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
2218 * decrement the counter at queue_unlock() when some error has 2221 * decrement the counter at queue_unlock() when some error has
2219 * occurred and we don't end up adding the task to the list. 2222 * occurred and we don't end up adding the task to the list.
2220 */ 2223 */
2221 hb_waiters_inc(hb); 2224 hb_waiters_inc(hb); /* implies smp_mb(); (A) */
2222 2225
2223 q->lock_ptr = &hb->lock; 2226 q->lock_ptr = &hb->lock;
2224 2227
2225 spin_lock(&hb->lock); /* implies smp_mb(); (A) */ 2228 spin_lock(&hb->lock);
2226 return hb; 2229 return hb;
2227} 2230}
2228 2231
@@ -2858,35 +2861,39 @@ retry_private:
2858 * and BUG when futex_unlock_pi() interleaves with this. 2861 * and BUG when futex_unlock_pi() interleaves with this.
2859 * 2862 *
2860 * Therefore acquire wait_lock while holding hb->lock, but drop the 2863 * Therefore acquire wait_lock while holding hb->lock, but drop the
2861 * latter before calling rt_mutex_start_proxy_lock(). This still fully 2864 * latter before calling __rt_mutex_start_proxy_lock(). This
2862 * serializes against futex_unlock_pi() as that does the exact same 2865 * interleaves with futex_unlock_pi() -- which does a similar lock
2863 * lock handoff sequence. 2866 * handoff -- such that the latter can observe the futex_q::pi_state
2867 * before __rt_mutex_start_proxy_lock() is done.
2864 */ 2868 */
2865 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); 2869 raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2866 spin_unlock(q.lock_ptr); 2870 spin_unlock(q.lock_ptr);
2871 /*
2872 * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
2873 * such that futex_unlock_pi() is guaranteed to observe the waiter when
2874 * it sees the futex_q::pi_state.
2875 */
2867 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); 2876 ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2868 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); 2877 raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2869 2878
2870 if (ret) { 2879 if (ret) {
2871 if (ret == 1) 2880 if (ret == 1)
2872 ret = 0; 2881 ret = 0;
2873 2882 goto cleanup;
2874 spin_lock(q.lock_ptr);
2875 goto no_block;
2876 } 2883 }
2877 2884
2878
2879 if (unlikely(to)) 2885 if (unlikely(to))
2880 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); 2886 hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
2881 2887
2882 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); 2888 ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
2883 2889
2890cleanup:
2884 spin_lock(q.lock_ptr); 2891 spin_lock(q.lock_ptr);
2885 /* 2892 /*
2886 * If we failed to acquire the lock (signal/timeout), we must 2893 * If we failed to acquire the lock (deadlock/signal/timeout), we must
2887 * first acquire the hb->lock before removing the lock from the 2894 * first acquire the hb->lock before removing the lock from the
2888 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex 2895 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
2889 * wait lists consistent. 2896 * lists consistent.
2890 * 2897 *
2891 * In particular; it is important that futex_unlock_pi() can not 2898 * In particular; it is important that futex_unlock_pi() can not
2892 * observe this inconsistency. 2899 * observe this inconsistency.
@@ -3010,6 +3017,10 @@ retry:
3010 * there is no point where we hold neither; and therefore 3017 * there is no point where we hold neither; and therefore
3011 * wake_futex_pi() must observe a state consistent with what we 3018 * wake_futex_pi() must observe a state consistent with what we
3012 * observed. 3019 * observed.
3020 *
3021 * In particular; this forces __rt_mutex_start_proxy() to
3022 * complete such that we're guaranteed to observe the
3023 * rt_waiter. Also see the WARN in wake_futex_pi().
3013 */ 3024 */
3014 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); 3025 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
3015 spin_unlock(&hb->lock); 3026 spin_unlock(&hb->lock);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 982b75e127c5..0aefc2e69cf5 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1392,6 +1392,7 @@ int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
1392 1392
1393 return -ENOSYS; 1393 return -ENOSYS;
1394} 1394}
1395EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent);
1395#endif 1396#endif
1396 1397
1397/** 1398/**
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index 98a20e1594ce..b992f88c5613 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -25,10 +25,22 @@ static void irq_sim_irqunmask(struct irq_data *data)
25 irq_ctx->enabled = true; 25 irq_ctx->enabled = true;
26} 26}
27 27
28static int irq_sim_set_type(struct irq_data *data, unsigned int type)
29{
30 /* We only support rising and falling edge trigger types. */
31 if (type & ~IRQ_TYPE_EDGE_BOTH)
32 return -EINVAL;
33
34 irqd_set_trigger_type(data, type);
35
36 return 0;
37}
38
28static struct irq_chip irq_sim_irqchip = { 39static struct irq_chip irq_sim_irqchip = {
29 .name = "irq_sim", 40 .name = "irq_sim",
30 .irq_mask = irq_sim_irqmask, 41 .irq_mask = irq_sim_irqmask,
31 .irq_unmask = irq_sim_irqunmask, 42 .irq_unmask = irq_sim_irqunmask,
43 .irq_set_type = irq_sim_set_type,
32}; 44};
33 45
34static void irq_sim_handle_irq(struct irq_work *work) 46static void irq_sim_handle_irq(struct irq_work *work)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index ee062b7939d3..ef8ad36cadcf 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -457,7 +457,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
457 457
458 /* Validate affinity mask(s) */ 458 /* Validate affinity mask(s) */
459 if (affinity) { 459 if (affinity) {
460 for (i = 0; i < cnt; i++, i++) { 460 for (i = 0; i < cnt; i++) {
461 if (cpumask_empty(&affinity[i].mask)) 461 if (cpumask_empty(&affinity[i].mask))
462 return -EINVAL; 462 return -EINVAL;
463 } 463 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a4888ce4667a..84b54a17b95d 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -393,6 +393,9 @@ int irq_setup_affinity(struct irq_desc *desc)
393 } 393 }
394 394
395 cpumask_and(&mask, cpu_online_mask, set); 395 cpumask_and(&mask, cpu_online_mask, set);
396 if (cpumask_empty(&mask))
397 cpumask_copy(&mask, cpu_online_mask);
398
396 if (node != NUMA_NO_NODE) { 399 if (node != NUMA_NO_NODE) {
397 const struct cpumask *nodemask = cpumask_of_node(node); 400 const struct cpumask *nodemask = cpumask_of_node(node);
398 401
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 581edcc63c26..978d63a8261c 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1726 rt_mutex_set_owner(lock, NULL); 1726 rt_mutex_set_owner(lock, NULL);
1727} 1727}
1728 1728
1729/**
1730 * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1731 * @lock: the rt_mutex to take
1732 * @waiter: the pre-initialized rt_mutex_waiter
1733 * @task: the task to prepare
1734 *
1735 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1736 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1737 *
1738 * NOTE: does _NOT_ remove the @waiter on failure; must either call
1739 * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
1740 *
1741 * Returns:
1742 * 0 - task blocked on lock
1743 * 1 - acquired the lock for task, caller should wake it up
1744 * <0 - error
1745 *
1746 * Special API call for PI-futex support.
1747 */
1729int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1748int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1730 struct rt_mutex_waiter *waiter, 1749 struct rt_mutex_waiter *waiter,
1731 struct task_struct *task) 1750 struct task_struct *task)
1732{ 1751{
1733 int ret; 1752 int ret;
1734 1753
1754 lockdep_assert_held(&lock->wait_lock);
1755
1735 if (try_to_take_rt_mutex(lock, task, NULL)) 1756 if (try_to_take_rt_mutex(lock, task, NULL))
1736 return 1; 1757 return 1;
1737 1758
@@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1749 ret = 0; 1770 ret = 0;
1750 } 1771 }
1751 1772
1752 if (unlikely(ret))
1753 remove_waiter(lock, waiter);
1754
1755 debug_rt_mutex_print_deadlock(waiter); 1773 debug_rt_mutex_print_deadlock(waiter);
1756 1774
1757 return ret; 1775 return ret;
@@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1763 * @waiter: the pre-initialized rt_mutex_waiter 1781 * @waiter: the pre-initialized rt_mutex_waiter
1764 * @task: the task to prepare 1782 * @task: the task to prepare
1765 * 1783 *
1784 * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1785 * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1786 *
1787 * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
1788 * on failure.
1789 *
1766 * Returns: 1790 * Returns:
1767 * 0 - task blocked on lock 1791 * 0 - task blocked on lock
1768 * 1 - acquired the lock for task, caller should wake it up 1792 * 1 - acquired the lock for task, caller should wake it up
1769 * <0 - error 1793 * <0 - error
1770 * 1794 *
1771 * Special API call for FUTEX_REQUEUE_PI support. 1795 * Special API call for PI-futex support.
1772 */ 1796 */
1773int rt_mutex_start_proxy_lock(struct rt_mutex *lock, 1797int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1774 struct rt_mutex_waiter *waiter, 1798 struct rt_mutex_waiter *waiter,
@@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1778 1802
1779 raw_spin_lock_irq(&lock->wait_lock); 1803 raw_spin_lock_irq(&lock->wait_lock);
1780 ret = __rt_mutex_start_proxy_lock(lock, waiter, task); 1804 ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1805 if (unlikely(ret))
1806 remove_waiter(lock, waiter);
1781 raw_spin_unlock_irq(&lock->wait_lock); 1807 raw_spin_unlock_irq(&lock->wait_lock);
1782 1808
1783 return ret; 1809 return ret;
@@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
1845 * @lock: the rt_mutex we were woken on 1871 * @lock: the rt_mutex we were woken on
1846 * @waiter: the pre-initialized rt_mutex_waiter 1872 * @waiter: the pre-initialized rt_mutex_waiter
1847 * 1873 *
1848 * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). 1874 * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
1875 * rt_mutex_wait_proxy_lock().
1849 * 1876 *
1850 * Unless we acquired the lock; we're still enqueued on the wait-list and can 1877 * Unless we acquired the lock; we're still enqueued on the wait-list and can
1851 * in fact still be granted ownership until we're removed. Therefore we can 1878 * in fact still be granted ownership until we're removed. Therefore we can
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 09b180063ee1..50d9af615dc4 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -198,15 +198,22 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
198 woken++; 198 woken++;
199 tsk = waiter->task; 199 tsk = waiter->task;
200 200
201 wake_q_add(wake_q, tsk); 201 get_task_struct(tsk);
202 list_del(&waiter->list); 202 list_del(&waiter->list);
203 /* 203 /*
204 * Ensure that the last operation is setting the reader 204 * Ensure calling get_task_struct() before setting the reader
205 * waiter to nil such that rwsem_down_read_failed() cannot 205 * waiter to nil such that rwsem_down_read_failed() cannot
206 * race with do_exit() by always holding a reference count 206 * race with do_exit() by always holding a reference count
207 * to the task to wakeup. 207 * to the task to wakeup.
208 */ 208 */
209 smp_store_release(&waiter->task, NULL); 209 smp_store_release(&waiter->task, NULL);
210 /*
211 * Ensure issuing the wakeup (either by us or someone else)
212 * after setting the reader waiter to nil.
213 */
214 wake_q_add(wake_q, tsk);
215 /* wake_q_add() already take the task ref */
216 put_task_struct(tsk);
210 } 217 }
211 218
212 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; 219 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
diff --git a/kernel/relay.c b/kernel/relay.c
index 04f248644e06..9e0f52375487 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan,
428 dentry = chan->cb->create_buf_file(tmpname, chan->parent, 428 dentry = chan->cb->create_buf_file(tmpname, chan->parent,
429 S_IRUSR, buf, 429 S_IRUSR, buf,
430 &chan->is_global); 430 &chan->is_global);
431 if (IS_ERR(dentry))
432 dentry = NULL;
431 433
432 kfree(tmpname); 434 kfree(tmpname);
433 435
@@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
461 dentry = chan->cb->create_buf_file(NULL, NULL, 463 dentry = chan->cb->create_buf_file(NULL, NULL,
462 S_IRUSR, buf, 464 S_IRUSR, buf,
463 &chan->is_global); 465 &chan->is_global);
464 if (WARN_ON(dentry)) 466 if (IS_ERR_OR_NULL(dentry))
465 goto free_buf; 467 goto free_buf;
466 } 468 }
467 469
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a674c7db2f29..d8d76a65cfdd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -396,6 +396,18 @@ static bool set_nr_if_polling(struct task_struct *p)
396#endif 396#endif
397#endif 397#endif
398 398
399/**
400 * wake_q_add() - queue a wakeup for 'later' waking.
401 * @head: the wake_q_head to add @task to
402 * @task: the task to queue for 'later' wakeup
403 *
404 * Queue a task for later wakeup, most likely by the wake_up_q() call in the
405 * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
406 * instantly.
407 *
408 * This function must be used as-if it were wake_up_process(); IOW the task
409 * must be ready to be woken at this location.
410 */
399void wake_q_add(struct wake_q_head *head, struct task_struct *task) 411void wake_q_add(struct wake_q_head *head, struct task_struct *task)
400{ 412{
401 struct wake_q_node *node = &task->wake_q; 413 struct wake_q_node *node = &task->wake_q;
@@ -405,10 +417,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
405 * its already queued (either by us or someone else) and will get the 417 * its already queued (either by us or someone else) and will get the
406 * wakeup due to that. 418 * wakeup due to that.
407 * 419 *
408 * This cmpxchg() executes a full barrier, which pairs with the full 420 * In order to ensure that a pending wakeup will observe our pending
409 * barrier executed by the wakeup in wake_up_q(). 421 * state, even in the failed case, an explicit smp_mb() must be used.
410 */ 422 */
411 if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) 423 smp_mb__before_atomic();
424 if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
412 return; 425 return;
413 426
414 get_task_struct(task); 427 get_task_struct(task);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 50aa2aba69bd..310d0637fe4b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
5980 5980
5981#ifdef CONFIG_SCHED_SMT 5981#ifdef CONFIG_SCHED_SMT
5982DEFINE_STATIC_KEY_FALSE(sched_smt_present); 5982DEFINE_STATIC_KEY_FALSE(sched_smt_present);
5983EXPORT_SYMBOL_GPL(sched_smt_present);
5983 5984
5984static inline void set_idle_cores(int cpu, int val) 5985static inline void set_idle_cores(int cpu, int val)
5985{ 5986{
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index fe24de3fbc93..c3484785b179 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -124,6 +124,7 @@
124 * sampling of the aggregate task states would be. 124 * sampling of the aggregate task states would be.
125 */ 125 */
126 126
127#include "../workqueue_internal.h"
127#include <linux/sched/loadavg.h> 128#include <linux/sched/loadavg.h>
128#include <linux/seq_file.h> 129#include <linux/seq_file.h>
129#include <linux/proc_fs.h> 130#include <linux/proc_fs.h>
@@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
480 groupc->tasks[t]++; 481 groupc->tasks[t]++;
481 482
482 write_seqcount_end(&groupc->seq); 483 write_seqcount_end(&groupc->seq);
483
484 if (!delayed_work_pending(&group->clock_work))
485 schedule_delayed_work(&group->clock_work, PSI_FREQ);
486} 484}
487 485
488static struct psi_group *iterate_groups(struct task_struct *task, void **iter) 486static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
@@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
513{ 511{
514 int cpu = task_cpu(task); 512 int cpu = task_cpu(task);
515 struct psi_group *group; 513 struct psi_group *group;
514 bool wake_clock = true;
516 void *iter = NULL; 515 void *iter = NULL;
517 516
518 if (!task->pid) 517 if (!task->pid)
@@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set)
530 task->psi_flags &= ~clear; 529 task->psi_flags &= ~clear;
531 task->psi_flags |= set; 530 task->psi_flags |= set;
532 531
533 while ((group = iterate_groups(task, &iter))) 532 /*
533 * Periodic aggregation shuts off if there is a period of no
534 * task changes, so we wake it back up if necessary. However,
535 * don't do this if the task change is the aggregation worker
536 * itself going to sleep, or we'll ping-pong forever.
537 */
538 if (unlikely((clear & TSK_RUNNING) &&
539 (task->flags & PF_WQ_WORKER) &&
540 wq_worker_last_func(task) == psi_update_work))
541 wake_clock = false;
542
543 while ((group = iterate_groups(task, &iter))) {
534 psi_group_change(group, cpu, clear, set); 544 psi_group_change(group, cpu, clear, set);
545 if (wake_clock && !delayed_work_pending(&group->clock_work))
546 schedule_delayed_work(&group->clock_work, PSI_FREQ);
547 }
535} 548}
536 549
537void psi_memstall_tick(struct task_struct *task, int cpu) 550void psi_memstall_tick(struct task_struct *task, int cpu)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d7f538847b84..e815781ed751 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -976,6 +976,9 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
976 struct seccomp_filter *filter = file->private_data; 976 struct seccomp_filter *filter = file->private_data;
977 struct seccomp_knotif *knotif; 977 struct seccomp_knotif *knotif;
978 978
979 if (!filter)
980 return 0;
981
979 mutex_lock(&filter->notify_lock); 982 mutex_lock(&filter->notify_lock);
980 983
981 /* 984 /*
@@ -1300,6 +1303,7 @@ out:
1300out_put_fd: 1303out_put_fd:
1301 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { 1304 if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
1302 if (ret < 0) { 1305 if (ret < 0) {
1306 listener_f->private_data = NULL;
1303 fput(listener_f); 1307 fput(listener_f);
1304 put_unused_fd(listener); 1308 put_unused_fd(listener);
1305 } else { 1309 } else {
diff --git a/kernel/signal.c b/kernel/signal.c
index e1d7ad8e6ab1..99fa8ff06fd9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -688,6 +688,48 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in
688} 688}
689EXPORT_SYMBOL_GPL(dequeue_signal); 689EXPORT_SYMBOL_GPL(dequeue_signal);
690 690
691static int dequeue_synchronous_signal(kernel_siginfo_t *info)
692{
693 struct task_struct *tsk = current;
694 struct sigpending *pending = &tsk->pending;
695 struct sigqueue *q, *sync = NULL;
696
697 /*
698 * Might a synchronous signal be in the queue?
699 */
700 if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK))
701 return 0;
702
703 /*
704 * Return the first synchronous signal in the queue.
705 */
706 list_for_each_entry(q, &pending->list, list) {
707 /* Synchronous signals have a postive si_code */
708 if ((q->info.si_code > SI_USER) &&
709 (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) {
710 sync = q;
711 goto next;
712 }
713 }
714 return 0;
715next:
716 /*
717 * Check if there is another siginfo for the same signal.
718 */
719 list_for_each_entry_continue(q, &pending->list, list) {
720 if (q->info.si_signo == sync->info.si_signo)
721 goto still_pending;
722 }
723
724 sigdelset(&pending->signal, sync->info.si_signo);
725 recalc_sigpending();
726still_pending:
727 list_del_init(&sync->list);
728 copy_siginfo(info, &sync->info);
729 __sigqueue_free(sync);
730 return info->si_signo;
731}
732
691/* 733/*
692 * Tell a process that it has a new active signal.. 734 * Tell a process that it has a new active signal..
693 * 735 *
@@ -1057,10 +1099,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
1057 1099
1058 result = TRACE_SIGNAL_DELIVERED; 1100 result = TRACE_SIGNAL_DELIVERED;
1059 /* 1101 /*
1060 * Skip useless siginfo allocation for SIGKILL SIGSTOP, 1102 * Skip useless siginfo allocation for SIGKILL and kernel threads.
1061 * and kernel threads.
1062 */ 1103 */
1063 if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD)) 1104 if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
1064 goto out_set; 1105 goto out_set;
1065 1106
1066 /* 1107 /*
@@ -2394,6 +2435,11 @@ relock:
2394 goto relock; 2435 goto relock;
2395 } 2436 }
2396 2437
2438 /* Has this task already been marked for death? */
2439 ksig->info.si_signo = signr = SIGKILL;
2440 if (signal_group_exit(signal))
2441 goto fatal;
2442
2397 for (;;) { 2443 for (;;) {
2398 struct k_sigaction *ka; 2444 struct k_sigaction *ka;
2399 2445
@@ -2407,7 +2453,15 @@ relock:
2407 goto relock; 2453 goto relock;
2408 } 2454 }
2409 2455
2410 signr = dequeue_signal(current, &current->blocked, &ksig->info); 2456 /*
2457 * Signals generated by the execution of an instruction
2458 * need to be delivered before any other pending signals
2459 * so that the instruction pointer in the signal stack
2460 * frame points to the faulting instruction.
2461 */
2462 signr = dequeue_synchronous_signal(&ksig->info);
2463 if (!signr)
2464 signr = dequeue_signal(current, &current->blocked, &ksig->info);
2411 2465
2412 if (!signr) 2466 if (!signr)
2413 break; /* will return 0 */ 2467 break; /* will return 0 */
@@ -2489,6 +2543,7 @@ relock:
2489 continue; 2543 continue;
2490 } 2544 }
2491 2545
2546 fatal:
2492 spin_unlock_irq(&sighand->siglock); 2547 spin_unlock_irq(&sighand->siglock);
2493 2548
2494 /* 2549 /*
diff --git a/kernel/smp.c b/kernel/smp.c
index 163c451af42e..f4cf1b0bb3b8 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -584,8 +584,6 @@ void __init smp_init(void)
584 num_nodes, (num_nodes > 1 ? "s" : ""), 584 num_nodes, (num_nodes > 1 ? "s" : ""),
585 num_cpus, (num_cpus > 1 ? "s" : "")); 585 num_cpus, (num_cpus > 1 ? "s" : ""));
586 586
587 /* Final decision about SMT support */
588 cpu_smt_check_topology();
589 /* Any cleanup work */ 587 /* Any cleanup work */
590 smp_cpus_done(setup_max_cpus); 588 smp_cpus_done(setup_max_cpus);
591} 589}
diff --git a/kernel/sys.c b/kernel/sys.c
index a48cbf1414b8..f7eb62eceb24 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1207,7 +1207,8 @@ DECLARE_RWSEM(uts_sem);
1207/* 1207/*
1208 * Work around broken programs that cannot handle "Linux 3.0". 1208 * Work around broken programs that cannot handle "Linux 3.0".
1209 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1209 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
1210 * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60. 1210 * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be
1211 * 2.6.60.
1211 */ 1212 */
1212static int override_release(char __user *release, size_t len) 1213static int override_release(char __user *release, size_t len)
1213{ 1214{
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 8f0644af40be..80f955210861 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
685 * set up the signal and overrun bookkeeping. 685 * set up the signal and overrun bookkeeping.
686 */ 686 */
687 timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); 687 timer->it.cpu.incr = timespec64_to_ns(&new->it_interval);
688 timer->it_interval = ns_to_ktime(timer->it.cpu.incr);
688 689
689 /* 690 /*
690 * This acts as a modification timestamp for the timer, 691 * This acts as a modification timestamp for the timer,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8b068adb9da1..f1a86a0d881d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
1204 1204
1205int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 1205int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1206{ 1206{
1207 int err; 1207 return __bpf_probe_register(btp, prog);
1208
1209 mutex_lock(&bpf_event_mutex);
1210 err = __bpf_probe_register(btp, prog);
1211 mutex_unlock(&bpf_event_mutex);
1212 return err;
1213} 1208}
1214 1209
1215int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) 1210int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1216{ 1211{
1217 int err; 1212 return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
1218
1219 mutex_lock(&bpf_event_mutex);
1220 err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
1221 mutex_unlock(&bpf_event_mutex);
1222 return err;
1223} 1213}
1224 1214
1225int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, 1215int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5c19b8c41c7e..d5fb09ebba8b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -607,11 +607,17 @@ static int trace_kprobe_create(int argc, const char *argv[])
607 char buf[MAX_EVENT_NAME_LEN]; 607 char buf[MAX_EVENT_NAME_LEN];
608 unsigned int flags = TPARG_FL_KERNEL; 608 unsigned int flags = TPARG_FL_KERNEL;
609 609
610 /* argc must be >= 1 */ 610 switch (argv[0][0]) {
611 if (argv[0][0] == 'r') { 611 case 'r':
612 is_return = true; 612 is_return = true;
613 flags |= TPARG_FL_RETURN; 613 flags |= TPARG_FL_RETURN;
614 } else if (argv[0][0] != 'p' || argc < 2) 614 break;
615 case 'p':
616 break;
617 default:
618 return -ECANCELED;
619 }
620 if (argc < 2)
615 return -ECANCELED; 621 return -ECANCELED;
616 622
617 event = strchr(&argv[0][1], ':'); 623 event = strchr(&argv[0][1], ':');
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index e335576b9411..9bde07c06362 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -5,7 +5,7 @@
5 * Copyright (C) IBM Corporation, 2010-2012 5 * Copyright (C) IBM Corporation, 2010-2012
6 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> 6 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
7 */ 7 */
8#define pr_fmt(fmt) "trace_kprobe: " fmt 8#define pr_fmt(fmt) "trace_uprobe: " fmt
9 9
10#include <linux/ctype.h> 10#include <linux/ctype.h>
11#include <linux/module.h> 11#include <linux/module.h>
@@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
160 if (ret >= 0) { 160 if (ret >= 0) {
161 if (ret == maxlen) 161 if (ret == maxlen)
162 dst[ret - 1] = '\0'; 162 dst[ret - 1] = '\0';
163 else
164 /*
165 * Include the terminating null byte. In this case it
166 * was copied by strncpy_from_user but not accounted
167 * for in ret.
168 */
169 ret++;
163 *(u32 *)dest = make_data_loc(ret, (void *)dst - base); 170 *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
164 } 171 }
165 172
diff --git a/kernel/umh.c b/kernel/umh.c
index 0baa672e023c..d937cbad903a 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -37,6 +37,8 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
37static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; 37static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
38static DEFINE_SPINLOCK(umh_sysctl_lock); 38static DEFINE_SPINLOCK(umh_sysctl_lock);
39static DECLARE_RWSEM(umhelper_sem); 39static DECLARE_RWSEM(umhelper_sem);
40static LIST_HEAD(umh_list);
41static DEFINE_MUTEX(umh_list_lock);
40 42
41static void call_usermodehelper_freeinfo(struct subprocess_info *info) 43static void call_usermodehelper_freeinfo(struct subprocess_info *info)
42{ 44{
@@ -100,10 +102,12 @@ static int call_usermodehelper_exec_async(void *data)
100 commit_creds(new); 102 commit_creds(new);
101 103
102 sub_info->pid = task_pid_nr(current); 104 sub_info->pid = task_pid_nr(current);
103 if (sub_info->file) 105 if (sub_info->file) {
104 retval = do_execve_file(sub_info->file, 106 retval = do_execve_file(sub_info->file,
105 sub_info->argv, sub_info->envp); 107 sub_info->argv, sub_info->envp);
106 else 108 if (!retval)
109 current->flags |= PF_UMH;
110 } else
107 retval = do_execve(getname_kernel(sub_info->path), 111 retval = do_execve(getname_kernel(sub_info->path),
108 (const char __user *const __user *)sub_info->argv, 112 (const char __user *const __user *)sub_info->argv,
109 (const char __user *const __user *)sub_info->envp); 113 (const char __user *const __user *)sub_info->envp);
@@ -517,6 +521,11 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
517 goto out; 521 goto out;
518 522
519 err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); 523 err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
524 if (!err) {
525 mutex_lock(&umh_list_lock);
526 list_add(&info->list, &umh_list);
527 mutex_unlock(&umh_list_lock);
528 }
520out: 529out:
521 fput(file); 530 fput(file);
522 return err; 531 return err;
@@ -679,6 +688,26 @@ static int proc_cap_handler(struct ctl_table *table, int write,
679 return 0; 688 return 0;
680} 689}
681 690
691void __exit_umh(struct task_struct *tsk)
692{
693 struct umh_info *info;
694 pid_t pid = tsk->pid;
695
696 mutex_lock(&umh_list_lock);
697 list_for_each_entry(info, &umh_list, list) {
698 if (info->pid == pid) {
699 list_del(&info->list);
700 mutex_unlock(&umh_list_lock);
701 goto out;
702 }
703 }
704 mutex_unlock(&umh_list_lock);
705 return;
706out:
707 if (info->cleanup)
708 info->cleanup(info);
709}
710
682struct ctl_table usermodehelper_table[] = { 711struct ctl_table usermodehelper_table[] = {
683 { 712 {
684 .procname = "bset", 713 .procname = "bset",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 392be4b252f6..fc5d23d752a5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -910,6 +910,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
910} 910}
911 911
912/** 912/**
913 * wq_worker_last_func - retrieve worker's last work function
914 *
915 * Determine the last function a worker executed. This is called from
916 * the scheduler to get a worker's last known identity.
917 *
918 * CONTEXT:
919 * spin_lock_irq(rq->lock)
920 *
921 * Return:
922 * The last work function %current executed as a worker, NULL if it
923 * hasn't executed any work yet.
924 */
925work_func_t wq_worker_last_func(struct task_struct *task)
926{
927 struct worker *worker = kthread_data(task);
928
929 return worker->last_func;
930}
931
932/**
913 * worker_set_flags - set worker flags and adjust nr_running accordingly 933 * worker_set_flags - set worker flags and adjust nr_running accordingly
914 * @worker: self 934 * @worker: self
915 * @flags: flags to set 935 * @flags: flags to set
@@ -2184,6 +2204,9 @@ __acquires(&pool->lock)
2184 if (unlikely(cpu_intensive)) 2204 if (unlikely(cpu_intensive))
2185 worker_clr_flags(worker, WORKER_CPU_INTENSIVE); 2205 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2186 2206
2207 /* tag the worker for identification in schedule() */
2208 worker->last_func = worker->current_func;
2209
2187 /* we're done with it, release */ 2210 /* we're done with it, release */
2188 hash_del(&worker->hentry); 2211 hash_del(&worker->hentry);
2189 worker->current_work = NULL; 2212 worker->current_work = NULL;
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 66fbb5a9e633..cb68b03ca89a 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -53,6 +53,9 @@ struct worker {
53 53
54 /* used only by rescuers to point to the target workqueue */ 54 /* used only by rescuers to point to the target workqueue */
55 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ 55 struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
56
57 /* used by the scheduler to determine a worker's last known identity */
58 work_func_t last_func;
56}; 59};
57 60
58/** 61/**
@@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void)
67 70
68/* 71/*
69 * Scheduler hooks for concurrency managed workqueue. Only to be used from 72 * Scheduler hooks for concurrency managed workqueue. Only to be used from
70 * sched/core.c and workqueue.c. 73 * sched/ and workqueue.c.
71 */ 74 */
72void wq_worker_waking_up(struct task_struct *task, int cpu); 75void wq_worker_waking_up(struct task_struct *task, int cpu);
73struct task_struct *wq_worker_sleeping(struct task_struct *task); 76struct task_struct *wq_worker_sleeping(struct task_struct *task);
77work_func_t wq_worker_last_func(struct task_struct *task);
74 78
75#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ 79#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */