diff options
Diffstat (limited to 'kernel')
38 files changed, 465 insertions, 164 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 715f9fcf4712..c57bd10340ed 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c | |||
| @@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) | |||
| 467 | return kind_ops[BTF_INFO_KIND(t->info)]; | 467 | return kind_ops[BTF_INFO_KIND(t->info)]; |
| 468 | } | 468 | } |
| 469 | 469 | ||
| 470 | bool btf_name_offset_valid(const struct btf *btf, u32 offset) | 470 | static bool btf_name_offset_valid(const struct btf *btf, u32 offset) |
| 471 | { | 471 | { |
| 472 | return BTF_STR_OFFSET_VALID(offset) && | 472 | return BTF_STR_OFFSET_VALID(offset) && |
| 473 | offset < btf->hdr.str_len; | 473 | offset < btf->hdr.str_len; |
| @@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset, | |||
| 1219 | u8 nr_copy_bits; | 1219 | u8 nr_copy_bits; |
| 1220 | u64 print_num; | 1220 | u64 print_num; |
| 1221 | 1221 | ||
| 1222 | data += BITS_ROUNDDOWN_BYTES(bits_offset); | ||
| 1223 | bits_offset = BITS_PER_BYTE_MASKED(bits_offset); | ||
| 1224 | nr_copy_bits = nr_bits + bits_offset; | 1222 | nr_copy_bits = nr_bits + bits_offset; |
| 1225 | nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); | 1223 | nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); |
| 1226 | 1224 | ||
| @@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf, | |||
| 1255 | * BTF_INT_OFFSET() cannot exceed 64 bits. | 1253 | * BTF_INT_OFFSET() cannot exceed 64 bits. |
| 1256 | */ | 1254 | */ |
| 1257 | total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); | 1255 | total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); |
| 1258 | btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m); | 1256 | data += BITS_ROUNDDOWN_BYTES(total_bits_offset); |
| 1257 | bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); | ||
| 1258 | btf_bitfield_seq_show(data, bits_offset, nr_bits, m); | ||
| 1259 | } | 1259 | } |
| 1260 | 1260 | ||
| 1261 | static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, | 1261 | static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, |
| @@ -1459,7 +1459,8 @@ static int btf_modifier_resolve(struct btf_verifier_env *env, | |||
| 1459 | 1459 | ||
| 1460 | /* "typedef void new_void", "const void"...etc */ | 1460 | /* "typedef void new_void", "const void"...etc */ |
| 1461 | if (!btf_type_is_void(next_type) && | 1461 | if (!btf_type_is_void(next_type) && |
| 1462 | !btf_type_is_fwd(next_type)) { | 1462 | !btf_type_is_fwd(next_type) && |
| 1463 | !btf_type_is_func_proto(next_type)) { | ||
| 1463 | btf_verifier_log_type(env, v->t, "Invalid type_id"); | 1464 | btf_verifier_log_type(env, v->t, "Invalid type_id"); |
| 1464 | return -EINVAL; | 1465 | return -EINVAL; |
| 1465 | } | 1466 | } |
| @@ -2001,12 +2002,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, | |||
| 2001 | 2002 | ||
| 2002 | member_offset = btf_member_bit_offset(t, member); | 2003 | member_offset = btf_member_bit_offset(t, member); |
| 2003 | bitfield_size = btf_member_bitfield_size(t, member); | 2004 | bitfield_size = btf_member_bitfield_size(t, member); |
| 2005 | bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); | ||
| 2006 | bits8_offset = BITS_PER_BYTE_MASKED(member_offset); | ||
| 2004 | if (bitfield_size) { | 2007 | if (bitfield_size) { |
| 2005 | btf_bitfield_seq_show(data, member_offset, | 2008 | btf_bitfield_seq_show(data + bytes_offset, bits8_offset, |
| 2006 | bitfield_size, m); | 2009 | bitfield_size, m); |
| 2007 | } else { | 2010 | } else { |
| 2008 | bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); | ||
| 2009 | bits8_offset = BITS_PER_BYTE_MASKED(member_offset); | ||
| 2010 | ops = btf_type_ops(member_type); | 2011 | ops = btf_type_ops(member_type); |
| 2011 | ops->seq_show(btf, member_type, member->type, | 2012 | ops->seq_show(btf, member_type, member->type, |
| 2012 | data + bytes_offset, bits8_offset, m); | 2013 | data + bytes_offset, bits8_offset, m); |
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9425c2fb872f..d17d05570a3f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c | |||
| @@ -572,7 +572,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, | |||
| 572 | bpf_compute_and_save_data_end(skb, &saved_data_end); | 572 | bpf_compute_and_save_data_end(skb, &saved_data_end); |
| 573 | 573 | ||
| 574 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, | 574 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, |
| 575 | bpf_prog_run_save_cb); | 575 | __bpf_prog_run_save_cb); |
| 576 | bpf_restore_data_end(skb, saved_data_end); | 576 | bpf_restore_data_end(skb, saved_data_end); |
| 577 | __skb_pull(skb, offset); | 577 | __skb_pull(skb, offset); |
| 578 | skb->sk = save_sk; | 578 | skb->sk = save_sk; |
| @@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
| 718 | case BPF_FUNC_trace_printk: | 718 | case BPF_FUNC_trace_printk: |
| 719 | if (capable(CAP_SYS_ADMIN)) | 719 | if (capable(CAP_SYS_ADMIN)) |
| 720 | return bpf_get_trace_printk_proto(); | 720 | return bpf_get_trace_printk_proto(); |
| 721 | /* fall through */ | ||
| 721 | default: | 722 | default: |
| 722 | return NULL; | 723 | return NULL; |
| 723 | } | 724 | } |
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..f9274114c88d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
| @@ -686,7 +686,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) | |||
| 686 | } | 686 | } |
| 687 | 687 | ||
| 688 | if (htab_is_prealloc(htab)) { | 688 | if (htab_is_prealloc(htab)) { |
| 689 | pcpu_freelist_push(&htab->freelist, &l->fnode); | 689 | __pcpu_freelist_push(&htab->freelist, &l->fnode); |
| 690 | } else { | 690 | } else { |
| 691 | atomic_dec(&htab->count); | 691 | atomic_dec(&htab->count); |
| 692 | l->htab = htab; | 692 | l->htab = htab; |
| @@ -748,7 +748,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, | |||
| 748 | } else { | 748 | } else { |
| 749 | struct pcpu_freelist_node *l; | 749 | struct pcpu_freelist_node *l; |
| 750 | 750 | ||
| 751 | l = pcpu_freelist_pop(&htab->freelist); | 751 | l = __pcpu_freelist_pop(&htab->freelist); |
| 752 | if (!l) | 752 | if (!l) |
| 753 | return ERR_PTR(-E2BIG); | 753 | return ERR_PTR(-E2BIG); |
| 754 | l_new = container_of(l, struct htab_elem, fnode); | 754 | l_new = container_of(l, struct htab_elem, fnode); |
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index abf1002080df..93a5cbbde421 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c | |||
| @@ -471,6 +471,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) | |||
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | if (!node || node->prefixlen != key->prefixlen || | 473 | if (!node || node->prefixlen != key->prefixlen || |
| 474 | node->prefixlen != matchlen || | ||
| 474 | (node->flags & LPM_TREE_NODE_FLAG_IM)) { | 475 | (node->flags & LPM_TREE_NODE_FLAG_IM)) { |
| 475 | ret = -ENOENT; | 476 | ret = -ENOENT; |
| 476 | goto out; | 477 | goto out; |
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 99d243e1ad6e..52378d3e34b3 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | 12 | struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) |
| 13 | { | 13 | { |
| 14 | struct bpf_map *inner_map, *inner_map_meta; | 14 | struct bpf_map *inner_map, *inner_map_meta; |
| 15 | u32 inner_map_meta_size; | ||
| 15 | struct fd f; | 16 | struct fd f; |
| 16 | 17 | ||
| 17 | f = fdget(inner_map_ufd); | 18 | f = fdget(inner_map_ufd); |
| @@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | |||
| 36 | return ERR_PTR(-EINVAL); | 37 | return ERR_PTR(-EINVAL); |
| 37 | } | 38 | } |
| 38 | 39 | ||
| 39 | inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); | 40 | inner_map_meta_size = sizeof(*inner_map_meta); |
| 41 | /* In some cases verifier needs to access beyond just base map. */ | ||
| 42 | if (inner_map->ops == &array_map_ops) | ||
| 43 | inner_map_meta_size = sizeof(struct bpf_array); | ||
| 44 | |||
| 45 | inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); | ||
| 40 | if (!inner_map_meta) { | 46 | if (!inner_map_meta) { |
| 41 | fdput(f); | 47 | fdput(f); |
| 42 | return ERR_PTR(-ENOMEM); | 48 | return ERR_PTR(-ENOMEM); |
| @@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | |||
| 46 | inner_map_meta->key_size = inner_map->key_size; | 52 | inner_map_meta->key_size = inner_map->key_size; |
| 47 | inner_map_meta->value_size = inner_map->value_size; | 53 | inner_map_meta->value_size = inner_map->value_size; |
| 48 | inner_map_meta->map_flags = inner_map->map_flags; | 54 | inner_map_meta->map_flags = inner_map->map_flags; |
| 49 | inner_map_meta->ops = inner_map->ops; | ||
| 50 | inner_map_meta->max_entries = inner_map->max_entries; | 55 | inner_map_meta->max_entries = inner_map->max_entries; |
| 51 | 56 | ||
| 57 | /* Misc members not needed in bpf_map_meta_equal() check. */ | ||
| 58 | inner_map_meta->ops = inner_map->ops; | ||
| 59 | if (inner_map->ops == &array_map_ops) { | ||
| 60 | inner_map_meta->unpriv_array = inner_map->unpriv_array; | ||
| 61 | container_of(inner_map_meta, struct bpf_array, map)->index_mask = | ||
| 62 | container_of(inner_map, struct bpf_array, map)->index_mask; | ||
| 63 | } | ||
| 64 | |||
| 52 | fdput(f); | 65 | fdput(f); |
| 53 | return inner_map_meta; | 66 | return inner_map_meta; |
| 54 | } | 67 | } |
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 673fa6fe2d73..0c1b4ba9e90e 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c | |||
| @@ -28,8 +28,8 @@ void pcpu_freelist_destroy(struct pcpu_freelist *s) | |||
| 28 | free_percpu(s->freelist); | 28 | free_percpu(s->freelist); |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, | 31 | static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head, |
| 32 | struct pcpu_freelist_node *node) | 32 | struct pcpu_freelist_node *node) |
| 33 | { | 33 | { |
| 34 | raw_spin_lock(&head->lock); | 34 | raw_spin_lock(&head->lock); |
| 35 | node->next = head->first; | 35 | node->next = head->first; |
| @@ -37,12 +37,22 @@ static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head, | |||
| 37 | raw_spin_unlock(&head->lock); | 37 | raw_spin_unlock(&head->lock); |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | void pcpu_freelist_push(struct pcpu_freelist *s, | 40 | void __pcpu_freelist_push(struct pcpu_freelist *s, |
| 41 | struct pcpu_freelist_node *node) | 41 | struct pcpu_freelist_node *node) |
| 42 | { | 42 | { |
| 43 | struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); | 43 | struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist); |
| 44 | 44 | ||
| 45 | __pcpu_freelist_push(head, node); | 45 | ___pcpu_freelist_push(head, node); |
| 46 | } | ||
| 47 | |||
| 48 | void pcpu_freelist_push(struct pcpu_freelist *s, | ||
| 49 | struct pcpu_freelist_node *node) | ||
| 50 | { | ||
| 51 | unsigned long flags; | ||
| 52 | |||
| 53 | local_irq_save(flags); | ||
| 54 | __pcpu_freelist_push(s, node); | ||
| 55 | local_irq_restore(flags); | ||
| 46 | } | 56 | } |
| 47 | 57 | ||
| 48 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, | 58 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, |
| @@ -63,7 +73,7 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, | |||
| 63 | for_each_possible_cpu(cpu) { | 73 | for_each_possible_cpu(cpu) { |
| 64 | again: | 74 | again: |
| 65 | head = per_cpu_ptr(s->freelist, cpu); | 75 | head = per_cpu_ptr(s->freelist, cpu); |
| 66 | __pcpu_freelist_push(head, buf); | 76 | ___pcpu_freelist_push(head, buf); |
| 67 | i++; | 77 | i++; |
| 68 | buf += elem_size; | 78 | buf += elem_size; |
| 69 | if (i == nr_elems) | 79 | if (i == nr_elems) |
| @@ -74,14 +84,12 @@ again: | |||
| 74 | local_irq_restore(flags); | 84 | local_irq_restore(flags); |
| 75 | } | 85 | } |
| 76 | 86 | ||
| 77 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) | 87 | struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s) |
| 78 | { | 88 | { |
| 79 | struct pcpu_freelist_head *head; | 89 | struct pcpu_freelist_head *head; |
| 80 | struct pcpu_freelist_node *node; | 90 | struct pcpu_freelist_node *node; |
| 81 | unsigned long flags; | ||
| 82 | int orig_cpu, cpu; | 91 | int orig_cpu, cpu; |
| 83 | 92 | ||
| 84 | local_irq_save(flags); | ||
| 85 | orig_cpu = cpu = raw_smp_processor_id(); | 93 | orig_cpu = cpu = raw_smp_processor_id(); |
| 86 | while (1) { | 94 | while (1) { |
| 87 | head = per_cpu_ptr(s->freelist, cpu); | 95 | head = per_cpu_ptr(s->freelist, cpu); |
| @@ -89,16 +97,25 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) | |||
| 89 | node = head->first; | 97 | node = head->first; |
| 90 | if (node) { | 98 | if (node) { |
| 91 | head->first = node->next; | 99 | head->first = node->next; |
| 92 | raw_spin_unlock_irqrestore(&head->lock, flags); | 100 | raw_spin_unlock(&head->lock); |
| 93 | return node; | 101 | return node; |
| 94 | } | 102 | } |
| 95 | raw_spin_unlock(&head->lock); | 103 | raw_spin_unlock(&head->lock); |
| 96 | cpu = cpumask_next(cpu, cpu_possible_mask); | 104 | cpu = cpumask_next(cpu, cpu_possible_mask); |
| 97 | if (cpu >= nr_cpu_ids) | 105 | if (cpu >= nr_cpu_ids) |
| 98 | cpu = 0; | 106 | cpu = 0; |
| 99 | if (cpu == orig_cpu) { | 107 | if (cpu == orig_cpu) |
| 100 | local_irq_restore(flags); | ||
| 101 | return NULL; | 108 | return NULL; |
| 102 | } | ||
| 103 | } | 109 | } |
| 104 | } | 110 | } |
| 111 | |||
| 112 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) | ||
| 113 | { | ||
| 114 | struct pcpu_freelist_node *ret; | ||
| 115 | unsigned long flags; | ||
| 116 | |||
| 117 | local_irq_save(flags); | ||
| 118 | ret = __pcpu_freelist_pop(s); | ||
| 119 | local_irq_restore(flags); | ||
| 120 | return ret; | ||
| 121 | } | ||
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h index 3049aae8ea1e..c3960118e617 100644 --- a/kernel/bpf/percpu_freelist.h +++ b/kernel/bpf/percpu_freelist.h | |||
| @@ -22,8 +22,12 @@ struct pcpu_freelist_node { | |||
| 22 | struct pcpu_freelist_node *next; | 22 | struct pcpu_freelist_node *next; |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | /* pcpu_freelist_* do spin_lock_irqsave. */ | ||
| 25 | void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); | 26 | void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); |
| 26 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); | 27 | struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *); |
| 28 | /* __pcpu_freelist_* do spin_lock only. caller must disable irqs. */ | ||
| 29 | void __pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *); | ||
| 30 | struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *); | ||
| 27 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, | 31 | void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, |
| 28 | u32 nr_elems); | 32 | u32 nr_elems); |
| 29 | int pcpu_freelist_init(struct pcpu_freelist *); | 33 | int pcpu_freelist_init(struct pcpu_freelist *); |
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 90daf285de03..950ab2f28922 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
| @@ -44,7 +44,7 @@ static void do_up_read(struct irq_work *entry) | |||
| 44 | struct stack_map_irq_work *work; | 44 | struct stack_map_irq_work *work; |
| 45 | 45 | ||
| 46 | work = container_of(entry, struct stack_map_irq_work, irq_work); | 46 | work = container_of(entry, struct stack_map_irq_work, irq_work); |
| 47 | up_read(work->sem); | 47 | up_read_non_owner(work->sem); |
| 48 | work->sem = NULL; | 48 | work->sem = NULL; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| @@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr, | |||
| 180 | 180 | ||
| 181 | if (nhdr->n_type == BPF_BUILD_ID && | 181 | if (nhdr->n_type == BPF_BUILD_ID && |
| 182 | nhdr->n_namesz == sizeof("GNU") && | 182 | nhdr->n_namesz == sizeof("GNU") && |
| 183 | nhdr->n_descsz == BPF_BUILD_ID_SIZE) { | 183 | nhdr->n_descsz > 0 && |
| 184 | nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { | ||
| 184 | memcpy(build_id, | 185 | memcpy(build_id, |
| 185 | note_start + note_offs + | 186 | note_start + note_offs + |
| 186 | ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), | 187 | ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), |
| 187 | BPF_BUILD_ID_SIZE); | 188 | nhdr->n_descsz); |
| 189 | memset(build_id + nhdr->n_descsz, 0, | ||
| 190 | BPF_BUILD_ID_SIZE - nhdr->n_descsz); | ||
| 188 | return 0; | 191 | return 0; |
| 189 | } | 192 | } |
| 190 | new_offs = note_offs + sizeof(Elf32_Nhdr) + | 193 | new_offs = note_offs + sizeof(Elf32_Nhdr) + |
| @@ -260,7 +263,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, | |||
| 260 | return -EFAULT; /* page not mapped */ | 263 | return -EFAULT; /* page not mapped */ |
| 261 | 264 | ||
| 262 | ret = -EINVAL; | 265 | ret = -EINVAL; |
| 263 | page_addr = page_address(page); | 266 | page_addr = kmap_atomic(page); |
| 264 | ehdr = (Elf32_Ehdr *)page_addr; | 267 | ehdr = (Elf32_Ehdr *)page_addr; |
| 265 | 268 | ||
| 266 | /* compare magic x7f "ELF" */ | 269 | /* compare magic x7f "ELF" */ |
| @@ -276,6 +279,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, | |||
| 276 | else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) | 279 | else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) |
| 277 | ret = stack_map_get_build_id_64(page_addr, build_id); | 280 | ret = stack_map_get_build_id_64(page_addr, build_id); |
| 278 | out: | 281 | out: |
| 282 | kunmap_atomic(page_addr); | ||
| 279 | put_page(page); | 283 | put_page(page); |
| 280 | return ret; | 284 | return ret; |
| 281 | } | 285 | } |
| @@ -310,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
| 310 | for (i = 0; i < trace_nr; i++) { | 314 | for (i = 0; i < trace_nr; i++) { |
| 311 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; | 315 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
| 312 | id_offs[i].ip = ips[i]; | 316 | id_offs[i].ip = ips[i]; |
| 317 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); | ||
| 313 | } | 318 | } |
| 314 | return; | 319 | return; |
| 315 | } | 320 | } |
| @@ -320,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
| 320 | /* per entry fall back to ips */ | 325 | /* per entry fall back to ips */ |
| 321 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; | 326 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
| 322 | id_offs[i].ip = ips[i]; | 327 | id_offs[i].ip = ips[i]; |
| 328 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); | ||
| 323 | continue; | 329 | continue; |
| 324 | } | 330 | } |
| 325 | id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] | 331 | id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] |
| @@ -332,6 +338,12 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
| 332 | } else { | 338 | } else { |
| 333 | work->sem = ¤t->mm->mmap_sem; | 339 | work->sem = ¤t->mm->mmap_sem; |
| 334 | irq_work_queue(&work->irq_work); | 340 | irq_work_queue(&work->irq_work); |
| 341 | /* | ||
| 342 | * The irq_work will release the mmap_sem with | ||
| 343 | * up_read_non_owner(). The rwsem_release() is called | ||
| 344 | * here to release the lock from lockdep's perspective. | ||
| 345 | */ | ||
| 346 | rwsem_release(¤t->mm->mmap_sem.dep_map, 1, _RET_IP_); | ||
| 335 | } | 347 | } |
| 336 | } | 348 | } |
| 337 | 349 | ||
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..8577bb7f8be6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
| @@ -713,8 +713,13 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
| 713 | 713 | ||
| 714 | if (bpf_map_is_dev_bound(map)) { | 714 | if (bpf_map_is_dev_bound(map)) { |
| 715 | err = bpf_map_offload_lookup_elem(map, key, value); | 715 | err = bpf_map_offload_lookup_elem(map, key, value); |
| 716 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || | 716 | goto done; |
| 717 | map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { | 717 | } |
| 718 | |||
| 719 | preempt_disable(); | ||
| 720 | this_cpu_inc(bpf_prog_active); | ||
| 721 | if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || | ||
| 722 | map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { | ||
| 718 | err = bpf_percpu_hash_copy(map, key, value); | 723 | err = bpf_percpu_hash_copy(map, key, value); |
| 719 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { | 724 | } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { |
| 720 | err = bpf_percpu_array_copy(map, key, value); | 725 | err = bpf_percpu_array_copy(map, key, value); |
| @@ -744,7 +749,10 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
| 744 | } | 749 | } |
| 745 | rcu_read_unlock(); | 750 | rcu_read_unlock(); |
| 746 | } | 751 | } |
| 752 | this_cpu_dec(bpf_prog_active); | ||
| 753 | preempt_enable(); | ||
| 747 | 754 | ||
| 755 | done: | ||
| 748 | if (err) | 756 | if (err) |
| 749 | goto free_value; | 757 | goto free_value; |
| 750 | 758 | ||
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f6bc62a9ee8e..8f295b790297 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -1617,12 +1617,13 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off, | |||
| 1617 | return 0; | 1617 | return 0; |
| 1618 | } | 1618 | } |
| 1619 | 1619 | ||
| 1620 | static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, | 1620 | static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, |
| 1621 | int size, enum bpf_access_type t) | 1621 | u32 regno, int off, int size, |
| 1622 | enum bpf_access_type t) | ||
| 1622 | { | 1623 | { |
| 1623 | struct bpf_reg_state *regs = cur_regs(env); | 1624 | struct bpf_reg_state *regs = cur_regs(env); |
| 1624 | struct bpf_reg_state *reg = ®s[regno]; | 1625 | struct bpf_reg_state *reg = ®s[regno]; |
| 1625 | struct bpf_insn_access_aux info; | 1626 | struct bpf_insn_access_aux info = {}; |
| 1626 | 1627 | ||
| 1627 | if (reg->smin_value < 0) { | 1628 | if (reg->smin_value < 0) { |
| 1628 | verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", | 1629 | verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", |
| @@ -1636,6 +1637,8 @@ static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off, | |||
| 1636 | return -EACCES; | 1637 | return -EACCES; |
| 1637 | } | 1638 | } |
| 1638 | 1639 | ||
| 1640 | env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; | ||
| 1641 | |||
| 1639 | return 0; | 1642 | return 0; |
| 1640 | } | 1643 | } |
| 1641 | 1644 | ||
| @@ -2032,7 +2035,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
| 2032 | verbose(env, "cannot write into socket\n"); | 2035 | verbose(env, "cannot write into socket\n"); |
| 2033 | return -EACCES; | 2036 | return -EACCES; |
| 2034 | } | 2037 | } |
| 2035 | err = check_sock_access(env, regno, off, size, t); | 2038 | err = check_sock_access(env, insn_idx, regno, off, size, t); |
| 2036 | if (!err && value_regno >= 0) | 2039 | if (!err && value_regno >= 0) |
| 2037 | mark_reg_unknown(env, regs, value_regno); | 2040 | mark_reg_unknown(env, regs, value_regno); |
| 2038 | } else { | 2041 | } else { |
| @@ -3103,6 +3106,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, | |||
| 3103 | } | 3106 | } |
| 3104 | } | 3107 | } |
| 3105 | 3108 | ||
| 3109 | static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, | ||
| 3110 | const struct bpf_insn *insn) | ||
| 3111 | { | ||
| 3112 | return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; | ||
| 3113 | } | ||
| 3114 | |||
| 3115 | static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, | ||
| 3116 | u32 alu_state, u32 alu_limit) | ||
| 3117 | { | ||
| 3118 | /* If we arrived here from different branches with different | ||
| 3119 | * state or limits to sanitize, then this won't work. | ||
| 3120 | */ | ||
| 3121 | if (aux->alu_state && | ||
| 3122 | (aux->alu_state != alu_state || | ||
| 3123 | aux->alu_limit != alu_limit)) | ||
| 3124 | return -EACCES; | ||
| 3125 | |||
| 3126 | /* Corresponding fixup done in fixup_bpf_calls(). */ | ||
| 3127 | aux->alu_state = alu_state; | ||
| 3128 | aux->alu_limit = alu_limit; | ||
| 3129 | return 0; | ||
| 3130 | } | ||
| 3131 | |||
| 3132 | static int sanitize_val_alu(struct bpf_verifier_env *env, | ||
| 3133 | struct bpf_insn *insn) | ||
| 3134 | { | ||
| 3135 | struct bpf_insn_aux_data *aux = cur_aux(env); | ||
| 3136 | |||
| 3137 | if (can_skip_alu_sanitation(env, insn)) | ||
| 3138 | return 0; | ||
| 3139 | |||
| 3140 | return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); | ||
| 3141 | } | ||
| 3142 | |||
| 3106 | static int sanitize_ptr_alu(struct bpf_verifier_env *env, | 3143 | static int sanitize_ptr_alu(struct bpf_verifier_env *env, |
| 3107 | struct bpf_insn *insn, | 3144 | struct bpf_insn *insn, |
| 3108 | const struct bpf_reg_state *ptr_reg, | 3145 | const struct bpf_reg_state *ptr_reg, |
| @@ -3117,7 +3154,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, | |||
| 3117 | struct bpf_reg_state tmp; | 3154 | struct bpf_reg_state tmp; |
| 3118 | bool ret; | 3155 | bool ret; |
| 3119 | 3156 | ||
| 3120 | if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) | 3157 | if (can_skip_alu_sanitation(env, insn)) |
| 3121 | return 0; | 3158 | return 0; |
| 3122 | 3159 | ||
| 3123 | /* We already marked aux for masking from non-speculative | 3160 | /* We already marked aux for masking from non-speculative |
| @@ -3133,19 +3170,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, | |||
| 3133 | 3170 | ||
| 3134 | if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) | 3171 | if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) |
| 3135 | return 0; | 3172 | return 0; |
| 3136 | 3173 | if (update_alu_sanitation_state(aux, alu_state, alu_limit)) | |
| 3137 | /* If we arrived here from different branches with different | ||
| 3138 | * limits to sanitize, then this won't work. | ||
| 3139 | */ | ||
| 3140 | if (aux->alu_state && | ||
| 3141 | (aux->alu_state != alu_state || | ||
| 3142 | aux->alu_limit != alu_limit)) | ||
| 3143 | return -EACCES; | 3174 | return -EACCES; |
| 3144 | |||
| 3145 | /* Corresponding fixup done in fixup_bpf_calls(). */ | ||
| 3146 | aux->alu_state = alu_state; | ||
| 3147 | aux->alu_limit = alu_limit; | ||
| 3148 | |||
| 3149 | do_sim: | 3175 | do_sim: |
| 3150 | /* Simulate and find potential out-of-bounds access under | 3176 | /* Simulate and find potential out-of-bounds access under |
| 3151 | * speculative execution from truncation as a result of | 3177 | * speculative execution from truncation as a result of |
| @@ -3418,6 +3444,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 3418 | s64 smin_val, smax_val; | 3444 | s64 smin_val, smax_val; |
| 3419 | u64 umin_val, umax_val; | 3445 | u64 umin_val, umax_val; |
| 3420 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; | 3446 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; |
| 3447 | u32 dst = insn->dst_reg; | ||
| 3448 | int ret; | ||
| 3421 | 3449 | ||
| 3422 | if (insn_bitness == 32) { | 3450 | if (insn_bitness == 32) { |
| 3423 | /* Relevant for 32-bit RSH: Information can propagate towards | 3451 | /* Relevant for 32-bit RSH: Information can propagate towards |
| @@ -3452,6 +3480,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 3452 | 3480 | ||
| 3453 | switch (opcode) { | 3481 | switch (opcode) { |
| 3454 | case BPF_ADD: | 3482 | case BPF_ADD: |
| 3483 | ret = sanitize_val_alu(env, insn); | ||
| 3484 | if (ret < 0) { | ||
| 3485 | verbose(env, "R%d tried to add from different pointers or scalars\n", dst); | ||
| 3486 | return ret; | ||
| 3487 | } | ||
| 3455 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || | 3488 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || |
| 3456 | signed_add_overflows(dst_reg->smax_value, smax_val)) { | 3489 | signed_add_overflows(dst_reg->smax_value, smax_val)) { |
| 3457 | dst_reg->smin_value = S64_MIN; | 3490 | dst_reg->smin_value = S64_MIN; |
| @@ -3471,6 +3504,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
| 3471 | dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); | 3504 | dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); |
| 3472 | break; | 3505 | break; |
| 3473 | case BPF_SUB: | 3506 | case BPF_SUB: |
| 3507 | ret = sanitize_val_alu(env, insn); | ||
| 3508 | if (ret < 0) { | ||
| 3509 | verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); | ||
| 3510 | return ret; | ||
| 3511 | } | ||
| 3474 | if (signed_sub_overflows(dst_reg->smin_value, smax_val) || | 3512 | if (signed_sub_overflows(dst_reg->smin_value, smax_val) || |
| 3475 | signed_sub_overflows(dst_reg->smax_value, smin_val)) { | 3513 | signed_sub_overflows(dst_reg->smax_value, smin_val)) { |
| 3476 | /* Overflow possible, we know nothing */ | 3514 | /* Overflow possible, we know nothing */ |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 91d5c38eb7e5..d1c6d152da89 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { } | |||
| 376 | 376 | ||
| 377 | #ifdef CONFIG_HOTPLUG_SMT | 377 | #ifdef CONFIG_HOTPLUG_SMT |
| 378 | enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; | 378 | enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; |
| 379 | EXPORT_SYMBOL_GPL(cpu_smt_control); | ||
| 380 | |||
| 381 | static bool cpu_smt_available __read_mostly; | ||
| 382 | 379 | ||
| 383 | void __init cpu_smt_disable(bool force) | 380 | void __init cpu_smt_disable(bool force) |
| 384 | { | 381 | { |
| @@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force) | |||
| 397 | 394 | ||
| 398 | /* | 395 | /* |
| 399 | * The decision whether SMT is supported can only be done after the full | 396 | * The decision whether SMT is supported can only be done after the full |
| 400 | * CPU identification. Called from architecture code before non boot CPUs | 397 | * CPU identification. Called from architecture code. |
| 401 | * are brought up. | ||
| 402 | */ | ||
| 403 | void __init cpu_smt_check_topology_early(void) | ||
| 404 | { | ||
| 405 | if (!topology_smt_supported()) | ||
| 406 | cpu_smt_control = CPU_SMT_NOT_SUPPORTED; | ||
| 407 | } | ||
| 408 | |||
| 409 | /* | ||
| 410 | * If SMT was disabled by BIOS, detect it here, after the CPUs have been | ||
| 411 | * brought online. This ensures the smt/l1tf sysfs entries are consistent | ||
| 412 | * with reality. cpu_smt_available is set to true during the bringup of non | ||
| 413 | * boot CPUs when a SMT sibling is detected. Note, this may overwrite | ||
| 414 | * cpu_smt_control's previous setting. | ||
| 415 | */ | 398 | */ |
| 416 | void __init cpu_smt_check_topology(void) | 399 | void __init cpu_smt_check_topology(void) |
| 417 | { | 400 | { |
| 418 | if (!cpu_smt_available) | 401 | if (!topology_smt_supported()) |
| 419 | cpu_smt_control = CPU_SMT_NOT_SUPPORTED; | 402 | cpu_smt_control = CPU_SMT_NOT_SUPPORTED; |
| 420 | } | 403 | } |
| 421 | 404 | ||
| @@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable); | |||
| 428 | 411 | ||
| 429 | static inline bool cpu_smt_allowed(unsigned int cpu) | 412 | static inline bool cpu_smt_allowed(unsigned int cpu) |
| 430 | { | 413 | { |
| 431 | if (topology_is_primary_thread(cpu)) | 414 | if (cpu_smt_control == CPU_SMT_ENABLED) |
| 432 | return true; | 415 | return true; |
| 433 | 416 | ||
| 434 | /* | 417 | if (topology_is_primary_thread(cpu)) |
| 435 | * If the CPU is not a 'primary' thread and the booted_once bit is | ||
| 436 | * set then the processor has SMT support. Store this information | ||
| 437 | * for the late check of SMT support in cpu_smt_check_topology(). | ||
| 438 | */ | ||
| 439 | if (per_cpu(cpuhp_state, cpu).booted_once) | ||
| 440 | cpu_smt_available = true; | ||
| 441 | |||
| 442 | if (cpu_smt_control == CPU_SMT_ENABLED) | ||
| 443 | return true; | 418 | return true; |
| 444 | 419 | ||
| 445 | /* | 420 | /* |
| @@ -2090,10 +2065,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) | |||
| 2090 | */ | 2065 | */ |
| 2091 | cpuhp_offline_cpu_device(cpu); | 2066 | cpuhp_offline_cpu_device(cpu); |
| 2092 | } | 2067 | } |
| 2093 | if (!ret) { | 2068 | if (!ret) |
| 2094 | cpu_smt_control = ctrlval; | 2069 | cpu_smt_control = ctrlval; |
| 2095 | arch_smt_update(); | ||
| 2096 | } | ||
| 2097 | cpu_maps_update_done(); | 2070 | cpu_maps_update_done(); |
| 2098 | return ret; | 2071 | return ret; |
| 2099 | } | 2072 | } |
| @@ -2104,7 +2077,6 @@ static int cpuhp_smt_enable(void) | |||
| 2104 | 2077 | ||
| 2105 | cpu_maps_update_begin(); | 2078 | cpu_maps_update_begin(); |
| 2106 | cpu_smt_control = CPU_SMT_ENABLED; | 2079 | cpu_smt_control = CPU_SMT_ENABLED; |
| 2107 | arch_smt_update(); | ||
| 2108 | for_each_present_cpu(cpu) { | 2080 | for_each_present_cpu(cpu) { |
| 2109 | /* Skip online CPUs and CPUs on offline nodes */ | 2081 | /* Skip online CPUs and CPUs on offline nodes */ |
| 2110 | if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) | 2082 | if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) |
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d6361776dc5c..1fb6fd68b9c7 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c | |||
| @@ -378,6 +378,8 @@ void __init swiotlb_exit(void) | |||
| 378 | memblock_free_late(io_tlb_start, | 378 | memblock_free_late(io_tlb_start, |
| 379 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); | 379 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); |
| 380 | } | 380 | } |
| 381 | io_tlb_start = 0; | ||
| 382 | io_tlb_end = 0; | ||
| 381 | io_tlb_nslabs = 0; | 383 | io_tlb_nslabs = 0; |
| 382 | max_segment = 0; | 384 | max_segment = 0; |
| 383 | } | 385 | } |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cd13a30f732..26d6edab051a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write, | |||
| 436 | void __user *buffer, size_t *lenp, | 436 | void __user *buffer, size_t *lenp, |
| 437 | loff_t *ppos) | 437 | loff_t *ppos) |
| 438 | { | 438 | { |
| 439 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 439 | int ret; |
| 440 | 440 | int perf_cpu = sysctl_perf_cpu_time_max_percent; | |
| 441 | if (ret || !write) | ||
| 442 | return ret; | ||
| 443 | |||
| 444 | /* | 441 | /* |
| 445 | * If throttling is disabled don't allow the write: | 442 | * If throttling is disabled don't allow the write: |
| 446 | */ | 443 | */ |
| 447 | if (sysctl_perf_cpu_time_max_percent == 100 || | 444 | if (write && (perf_cpu == 100 || perf_cpu == 0)) |
| 448 | sysctl_perf_cpu_time_max_percent == 0) | ||
| 449 | return -EINVAL; | 445 | return -EINVAL; |
| 450 | 446 | ||
| 447 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
| 448 | if (ret || !write) | ||
| 449 | return ret; | ||
| 450 | |||
| 451 | max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); | 451 | max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); |
| 452 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; | 452 | perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; |
| 453 | update_perf_cpu_limits(); | 453 | update_perf_cpu_limits(); |
| @@ -4963,6 +4963,11 @@ static void __perf_event_period(struct perf_event *event, | |||
| 4963 | } | 4963 | } |
| 4964 | } | 4964 | } |
| 4965 | 4965 | ||
| 4966 | static int perf_event_check_period(struct perf_event *event, u64 value) | ||
| 4967 | { | ||
| 4968 | return event->pmu->check_period(event, value); | ||
| 4969 | } | ||
| 4970 | |||
| 4966 | static int perf_event_period(struct perf_event *event, u64 __user *arg) | 4971 | static int perf_event_period(struct perf_event *event, u64 __user *arg) |
| 4967 | { | 4972 | { |
| 4968 | u64 value; | 4973 | u64 value; |
| @@ -4979,6 +4984,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) | |||
| 4979 | if (event->attr.freq && value > sysctl_perf_event_sample_rate) | 4984 | if (event->attr.freq && value > sysctl_perf_event_sample_rate) |
| 4980 | return -EINVAL; | 4985 | return -EINVAL; |
| 4981 | 4986 | ||
| 4987 | if (perf_event_check_period(event, value)) | ||
| 4988 | return -EINVAL; | ||
| 4989 | |||
| 4982 | event_function_call(event, __perf_event_period, &value); | 4990 | event_function_call(event, __perf_event_period, &value); |
| 4983 | 4991 | ||
| 4984 | return 0; | 4992 | return 0; |
| @@ -9391,6 +9399,11 @@ static int perf_pmu_nop_int(struct pmu *pmu) | |||
| 9391 | return 0; | 9399 | return 0; |
| 9392 | } | 9400 | } |
| 9393 | 9401 | ||
| 9402 | static int perf_event_nop_int(struct perf_event *event, u64 value) | ||
| 9403 | { | ||
| 9404 | return 0; | ||
| 9405 | } | ||
| 9406 | |||
| 9394 | static DEFINE_PER_CPU(unsigned int, nop_txn_flags); | 9407 | static DEFINE_PER_CPU(unsigned int, nop_txn_flags); |
| 9395 | 9408 | ||
| 9396 | static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) | 9409 | static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) |
| @@ -9691,6 +9704,9 @@ got_cpu_context: | |||
| 9691 | pmu->pmu_disable = perf_pmu_nop_void; | 9704 | pmu->pmu_disable = perf_pmu_nop_void; |
| 9692 | } | 9705 | } |
| 9693 | 9706 | ||
| 9707 | if (!pmu->check_period) | ||
| 9708 | pmu->check_period = perf_event_nop_int; | ||
| 9709 | |||
| 9694 | if (!pmu->event_idx) | 9710 | if (!pmu->event_idx) |
| 9695 | pmu->event_idx = perf_event_idx_default; | 9711 | pmu->event_idx = perf_event_idx_default; |
| 9696 | 9712 | ||
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 4a9937076331..5ab4fe3b1dcc 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -734,6 +734,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) | |||
| 734 | size = sizeof(struct ring_buffer); | 734 | size = sizeof(struct ring_buffer); |
| 735 | size += nr_pages * sizeof(void *); | 735 | size += nr_pages * sizeof(void *); |
| 736 | 736 | ||
| 737 | if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) | ||
| 738 | goto fail; | ||
| 739 | |||
| 737 | rb = kzalloc(size, GFP_KERNEL); | 740 | rb = kzalloc(size, GFP_KERNEL); |
| 738 | if (!rb) | 741 | if (!rb) |
| 739 | goto fail; | 742 | goto fail; |
diff --git a/kernel/exit.c b/kernel/exit.c index 2d14979577ee..2639a30a8aa5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -307,7 +307,7 @@ void rcuwait_wake_up(struct rcuwait *w) | |||
| 307 | * MB (A) MB (B) | 307 | * MB (A) MB (B) |
| 308 | * [L] cond [L] tsk | 308 | * [L] cond [L] tsk |
| 309 | */ | 309 | */ |
| 310 | smp_rmb(); /* (B) */ | 310 | smp_mb(); /* (B) */ |
| 311 | 311 | ||
| 312 | /* | 312 | /* |
| 313 | * Avoid using task_rcu_dereference() magic as long as we are careful, | 313 | * Avoid using task_rcu_dereference() magic as long as we are careful, |
| @@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p) | |||
| 558 | return NULL; | 558 | return NULL; |
| 559 | } | 559 | } |
| 560 | 560 | ||
| 561 | static struct task_struct *find_child_reaper(struct task_struct *father) | 561 | static struct task_struct *find_child_reaper(struct task_struct *father, |
| 562 | struct list_head *dead) | ||
| 562 | __releases(&tasklist_lock) | 563 | __releases(&tasklist_lock) |
| 563 | __acquires(&tasklist_lock) | 564 | __acquires(&tasklist_lock) |
| 564 | { | 565 | { |
| 565 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | 566 | struct pid_namespace *pid_ns = task_active_pid_ns(father); |
| 566 | struct task_struct *reaper = pid_ns->child_reaper; | 567 | struct task_struct *reaper = pid_ns->child_reaper; |
| 568 | struct task_struct *p, *n; | ||
| 567 | 569 | ||
| 568 | if (likely(reaper != father)) | 570 | if (likely(reaper != father)) |
| 569 | return reaper; | 571 | return reaper; |
| @@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father) | |||
| 579 | panic("Attempted to kill init! exitcode=0x%08x\n", | 581 | panic("Attempted to kill init! exitcode=0x%08x\n", |
| 580 | father->signal->group_exit_code ?: father->exit_code); | 582 | father->signal->group_exit_code ?: father->exit_code); |
| 581 | } | 583 | } |
| 584 | |||
| 585 | list_for_each_entry_safe(p, n, dead, ptrace_entry) { | ||
| 586 | list_del_init(&p->ptrace_entry); | ||
| 587 | release_task(p); | ||
| 588 | } | ||
| 589 | |||
| 582 | zap_pid_ns_processes(pid_ns); | 590 | zap_pid_ns_processes(pid_ns); |
| 583 | write_lock_irq(&tasklist_lock); | 591 | write_lock_irq(&tasklist_lock); |
| 584 | 592 | ||
| @@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father, | |||
| 668 | exit_ptrace(father, dead); | 676 | exit_ptrace(father, dead); |
| 669 | 677 | ||
| 670 | /* Can drop and reacquire tasklist_lock */ | 678 | /* Can drop and reacquire tasklist_lock */ |
| 671 | reaper = find_child_reaper(father); | 679 | reaper = find_child_reaper(father, dead); |
| 672 | if (list_empty(&father->children)) | 680 | if (list_empty(&father->children)) |
| 673 | return; | 681 | return; |
| 674 | 682 | ||
| @@ -866,6 +874,7 @@ void __noreturn do_exit(long code) | |||
| 866 | exit_task_namespaces(tsk); | 874 | exit_task_namespaces(tsk); |
| 867 | exit_task_work(tsk); | 875 | exit_task_work(tsk); |
| 868 | exit_thread(tsk); | 876 | exit_thread(tsk); |
| 877 | exit_umh(tsk); | ||
| 869 | 878 | ||
| 870 | /* | 879 | /* |
| 871 | * Flush inherited counters to the parent - before the parent | 880 | * Flush inherited counters to the parent - before the parent |
diff --git a/kernel/fork.c b/kernel/fork.c index a60459947f18..b69248e6f0e0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -217,6 +217,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) | |||
| 217 | memset(s->addr, 0, THREAD_SIZE); | 217 | memset(s->addr, 0, THREAD_SIZE); |
| 218 | 218 | ||
| 219 | tsk->stack_vm_area = s; | 219 | tsk->stack_vm_area = s; |
| 220 | tsk->stack = s->addr; | ||
| 220 | return s->addr; | 221 | return s->addr; |
| 221 | } | 222 | } |
| 222 | 223 | ||
| @@ -1833,8 +1834,6 @@ static __latent_entropy struct task_struct *copy_process( | |||
| 1833 | 1834 | ||
| 1834 | posix_cpu_timers_init(p); | 1835 | posix_cpu_timers_init(p); |
| 1835 | 1836 | ||
| 1836 | p->start_time = ktime_get_ns(); | ||
| 1837 | p->real_start_time = ktime_get_boot_ns(); | ||
| 1838 | p->io_context = NULL; | 1837 | p->io_context = NULL; |
| 1839 | audit_set_context(p, NULL); | 1838 | audit_set_context(p, NULL); |
| 1840 | cgroup_fork(p); | 1839 | cgroup_fork(p); |
| @@ -2001,6 +2000,17 @@ static __latent_entropy struct task_struct *copy_process( | |||
| 2001 | goto bad_fork_free_pid; | 2000 | goto bad_fork_free_pid; |
| 2002 | 2001 | ||
| 2003 | /* | 2002 | /* |
| 2003 | * From this point on we must avoid any synchronous user-space | ||
| 2004 | * communication until we take the tasklist-lock. In particular, we do | ||
| 2005 | * not want user-space to be able to predict the process start-time by | ||
| 2006 | * stalling fork(2) after we recorded the start_time but before it is | ||
| 2007 | * visible to the system. | ||
| 2008 | */ | ||
| 2009 | |||
| 2010 | p->start_time = ktime_get_ns(); | ||
| 2011 | p->real_start_time = ktime_get_boot_ns(); | ||
| 2012 | |||
| 2013 | /* | ||
| 2004 | * Make it visible to the rest of the system, but dont wake it up yet. | 2014 | * Make it visible to the rest of the system, but dont wake it up yet. |
| 2005 | * Need tasklist lock for parent etc handling! | 2015 | * Need tasklist lock for parent etc handling! |
| 2006 | */ | 2016 | */ |
diff --git a/kernel/futex.c b/kernel/futex.c index be3bff2315ff..a0514e01c3eb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -1452,11 +1452,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) | |||
| 1452 | if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) | 1452 | if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) |
| 1453 | return; | 1453 | return; |
| 1454 | 1454 | ||
| 1455 | /* | 1455 | get_task_struct(p); |
| 1456 | * Queue the task for later wakeup for after we've released | ||
| 1457 | * the hb->lock. wake_q_add() grabs reference to p. | ||
| 1458 | */ | ||
| 1459 | wake_q_add(wake_q, p); | ||
| 1460 | __unqueue_futex(q); | 1456 | __unqueue_futex(q); |
| 1461 | /* | 1457 | /* |
| 1462 | * The waiting task can free the futex_q as soon as q->lock_ptr = NULL | 1458 | * The waiting task can free the futex_q as soon as q->lock_ptr = NULL |
| @@ -1466,6 +1462,13 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) | |||
| 1466 | * plist_del in __unqueue_futex(). | 1462 | * plist_del in __unqueue_futex(). |
| 1467 | */ | 1463 | */ |
| 1468 | smp_store_release(&q->lock_ptr, NULL); | 1464 | smp_store_release(&q->lock_ptr, NULL); |
| 1465 | |||
| 1466 | /* | ||
| 1467 | * Queue the task for later wakeup for after we've released | ||
| 1468 | * the hb->lock. wake_q_add() grabs reference to p. | ||
| 1469 | */ | ||
| 1470 | wake_q_add(wake_q, p); | ||
| 1471 | put_task_struct(p); | ||
| 1469 | } | 1472 | } |
| 1470 | 1473 | ||
| 1471 | /* | 1474 | /* |
| @@ -2218,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 2218 | * decrement the counter at queue_unlock() when some error has | 2221 | * decrement the counter at queue_unlock() when some error has |
| 2219 | * occurred and we don't end up adding the task to the list. | 2222 | * occurred and we don't end up adding the task to the list. |
| 2220 | */ | 2223 | */ |
| 2221 | hb_waiters_inc(hb); | 2224 | hb_waiters_inc(hb); /* implies smp_mb(); (A) */ |
| 2222 | 2225 | ||
| 2223 | q->lock_ptr = &hb->lock; | 2226 | q->lock_ptr = &hb->lock; |
| 2224 | 2227 | ||
| 2225 | spin_lock(&hb->lock); /* implies smp_mb(); (A) */ | 2228 | spin_lock(&hb->lock); |
| 2226 | return hb; | 2229 | return hb; |
| 2227 | } | 2230 | } |
| 2228 | 2231 | ||
| @@ -2858,35 +2861,39 @@ retry_private: | |||
| 2858 | * and BUG when futex_unlock_pi() interleaves with this. | 2861 | * and BUG when futex_unlock_pi() interleaves with this. |
| 2859 | * | 2862 | * |
| 2860 | * Therefore acquire wait_lock while holding hb->lock, but drop the | 2863 | * Therefore acquire wait_lock while holding hb->lock, but drop the |
| 2861 | * latter before calling rt_mutex_start_proxy_lock(). This still fully | 2864 | * latter before calling __rt_mutex_start_proxy_lock(). This |
| 2862 | * serializes against futex_unlock_pi() as that does the exact same | 2865 | * interleaves with futex_unlock_pi() -- which does a similar lock |
| 2863 | * lock handoff sequence. | 2866 | * handoff -- such that the latter can observe the futex_q::pi_state |
| 2867 | * before __rt_mutex_start_proxy_lock() is done. | ||
| 2864 | */ | 2868 | */ |
| 2865 | raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); | 2869 | raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); |
| 2866 | spin_unlock(q.lock_ptr); | 2870 | spin_unlock(q.lock_ptr); |
| 2871 | /* | ||
| 2872 | * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter | ||
| 2873 | * such that futex_unlock_pi() is guaranteed to observe the waiter when | ||
| 2874 | * it sees the futex_q::pi_state. | ||
| 2875 | */ | ||
| 2867 | ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); | 2876 | ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); |
| 2868 | raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); | 2877 | raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); |
| 2869 | 2878 | ||
| 2870 | if (ret) { | 2879 | if (ret) { |
| 2871 | if (ret == 1) | 2880 | if (ret == 1) |
| 2872 | ret = 0; | 2881 | ret = 0; |
| 2873 | 2882 | goto cleanup; | |
| 2874 | spin_lock(q.lock_ptr); | ||
| 2875 | goto no_block; | ||
| 2876 | } | 2883 | } |
| 2877 | 2884 | ||
| 2878 | |||
| 2879 | if (unlikely(to)) | 2885 | if (unlikely(to)) |
| 2880 | hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); | 2886 | hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); |
| 2881 | 2887 | ||
| 2882 | ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); | 2888 | ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); |
| 2883 | 2889 | ||
| 2890 | cleanup: | ||
| 2884 | spin_lock(q.lock_ptr); | 2891 | spin_lock(q.lock_ptr); |
| 2885 | /* | 2892 | /* |
| 2886 | * If we failed to acquire the lock (signal/timeout), we must | 2893 | * If we failed to acquire the lock (deadlock/signal/timeout), we must |
| 2887 | * first acquire the hb->lock before removing the lock from the | 2894 | * first acquire the hb->lock before removing the lock from the |
| 2888 | * rt_mutex waitqueue, such that we can keep the hb and rt_mutex | 2895 | * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait |
| 2889 | * wait lists consistent. | 2896 | * lists consistent. |
| 2890 | * | 2897 | * |
| 2891 | * In particular; it is important that futex_unlock_pi() can not | 2898 | * In particular; it is important that futex_unlock_pi() can not |
| 2892 | * observe this inconsistency. | 2899 | * observe this inconsistency. |
| @@ -3010,6 +3017,10 @@ retry: | |||
| 3010 | * there is no point where we hold neither; and therefore | 3017 | * there is no point where we hold neither; and therefore |
| 3011 | * wake_futex_pi() must observe a state consistent with what we | 3018 | * wake_futex_pi() must observe a state consistent with what we |
| 3012 | * observed. | 3019 | * observed. |
| 3020 | * | ||
| 3021 | * In particular; this forces __rt_mutex_start_proxy() to | ||
| 3022 | * complete such that we're guaranteed to observe the | ||
| 3023 | * rt_waiter. Also see the WARN in wake_futex_pi(). | ||
| 3013 | */ | 3024 | */ |
| 3014 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | 3025 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); |
| 3015 | spin_unlock(&hb->lock); | 3026 | spin_unlock(&hb->lock); |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index ee062b7939d3..ef8ad36cadcf 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
| @@ -457,7 +457,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, | |||
| 457 | 457 | ||
| 458 | /* Validate affinity mask(s) */ | 458 | /* Validate affinity mask(s) */ |
| 459 | if (affinity) { | 459 | if (affinity) { |
| 460 | for (i = 0; i < cnt; i++, i++) { | 460 | for (i = 0; i < cnt; i++) { |
| 461 | if (cpumask_empty(&affinity[i].mask)) | 461 | if (cpumask_empty(&affinity[i].mask)) |
| 462 | return -EINVAL; | 462 | return -EINVAL; |
| 463 | } | 463 | } |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a4888ce4667a..84b54a17b95d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -393,6 +393,9 @@ int irq_setup_affinity(struct irq_desc *desc) | |||
| 393 | } | 393 | } |
| 394 | 394 | ||
| 395 | cpumask_and(&mask, cpu_online_mask, set); | 395 | cpumask_and(&mask, cpu_online_mask, set); |
| 396 | if (cpumask_empty(&mask)) | ||
| 397 | cpumask_copy(&mask, cpu_online_mask); | ||
| 398 | |||
| 396 | if (node != NUMA_NO_NODE) { | 399 | if (node != NUMA_NO_NODE) { |
| 397 | const struct cpumask *nodemask = cpumask_of_node(node); | 400 | const struct cpumask *nodemask = cpumask_of_node(node); |
| 398 | 401 | ||
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 581edcc63c26..978d63a8261c 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
| @@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, | |||
| 1726 | rt_mutex_set_owner(lock, NULL); | 1726 | rt_mutex_set_owner(lock, NULL); |
| 1727 | } | 1727 | } |
| 1728 | 1728 | ||
| 1729 | /** | ||
| 1730 | * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task | ||
| 1731 | * @lock: the rt_mutex to take | ||
| 1732 | * @waiter: the pre-initialized rt_mutex_waiter | ||
| 1733 | * @task: the task to prepare | ||
| 1734 | * | ||
| 1735 | * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock | ||
| 1736 | * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. | ||
| 1737 | * | ||
| 1738 | * NOTE: does _NOT_ remove the @waiter on failure; must either call | ||
| 1739 | * rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this. | ||
| 1740 | * | ||
| 1741 | * Returns: | ||
| 1742 | * 0 - task blocked on lock | ||
| 1743 | * 1 - acquired the lock for task, caller should wake it up | ||
| 1744 | * <0 - error | ||
| 1745 | * | ||
| 1746 | * Special API call for PI-futex support. | ||
| 1747 | */ | ||
| 1729 | int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, | 1748 | int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
| 1730 | struct rt_mutex_waiter *waiter, | 1749 | struct rt_mutex_waiter *waiter, |
| 1731 | struct task_struct *task) | 1750 | struct task_struct *task) |
| 1732 | { | 1751 | { |
| 1733 | int ret; | 1752 | int ret; |
| 1734 | 1753 | ||
| 1754 | lockdep_assert_held(&lock->wait_lock); | ||
| 1755 | |||
| 1735 | if (try_to_take_rt_mutex(lock, task, NULL)) | 1756 | if (try_to_take_rt_mutex(lock, task, NULL)) |
| 1736 | return 1; | 1757 | return 1; |
| 1737 | 1758 | ||
| @@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |||
| 1749 | ret = 0; | 1770 | ret = 0; |
| 1750 | } | 1771 | } |
| 1751 | 1772 | ||
| 1752 | if (unlikely(ret)) | ||
| 1753 | remove_waiter(lock, waiter); | ||
| 1754 | |||
| 1755 | debug_rt_mutex_print_deadlock(waiter); | 1773 | debug_rt_mutex_print_deadlock(waiter); |
| 1756 | 1774 | ||
| 1757 | return ret; | 1775 | return ret; |
| @@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |||
| 1763 | * @waiter: the pre-initialized rt_mutex_waiter | 1781 | * @waiter: the pre-initialized rt_mutex_waiter |
| 1764 | * @task: the task to prepare | 1782 | * @task: the task to prepare |
| 1765 | * | 1783 | * |
| 1784 | * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock | ||
| 1785 | * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. | ||
| 1786 | * | ||
| 1787 | * NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter | ||
| 1788 | * on failure. | ||
| 1789 | * | ||
| 1766 | * Returns: | 1790 | * Returns: |
| 1767 | * 0 - task blocked on lock | 1791 | * 0 - task blocked on lock |
| 1768 | * 1 - acquired the lock for task, caller should wake it up | 1792 | * 1 - acquired the lock for task, caller should wake it up |
| 1769 | * <0 - error | 1793 | * <0 - error |
| 1770 | * | 1794 | * |
| 1771 | * Special API call for FUTEX_REQUEUE_PI support. | 1795 | * Special API call for PI-futex support. |
| 1772 | */ | 1796 | */ |
| 1773 | int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | 1797 | int rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
| 1774 | struct rt_mutex_waiter *waiter, | 1798 | struct rt_mutex_waiter *waiter, |
| @@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |||
| 1778 | 1802 | ||
| 1779 | raw_spin_lock_irq(&lock->wait_lock); | 1803 | raw_spin_lock_irq(&lock->wait_lock); |
| 1780 | ret = __rt_mutex_start_proxy_lock(lock, waiter, task); | 1804 | ret = __rt_mutex_start_proxy_lock(lock, waiter, task); |
| 1805 | if (unlikely(ret)) | ||
| 1806 | remove_waiter(lock, waiter); | ||
| 1781 | raw_spin_unlock_irq(&lock->wait_lock); | 1807 | raw_spin_unlock_irq(&lock->wait_lock); |
| 1782 | 1808 | ||
| 1783 | return ret; | 1809 | return ret; |
| @@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, | |||
| 1845 | * @lock: the rt_mutex we were woken on | 1871 | * @lock: the rt_mutex we were woken on |
| 1846 | * @waiter: the pre-initialized rt_mutex_waiter | 1872 | * @waiter: the pre-initialized rt_mutex_waiter |
| 1847 | * | 1873 | * |
| 1848 | * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). | 1874 | * Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or |
| 1875 | * rt_mutex_wait_proxy_lock(). | ||
| 1849 | * | 1876 | * |
| 1850 | * Unless we acquired the lock; we're still enqueued on the wait-list and can | 1877 | * Unless we acquired the lock; we're still enqueued on the wait-list and can |
| 1851 | * in fact still be granted ownership until we're removed. Therefore we can | 1878 | * in fact still be granted ownership until we're removed. Therefore we can |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 09b180063ee1..50d9af615dc4 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -198,15 +198,22 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
| 198 | woken++; | 198 | woken++; |
| 199 | tsk = waiter->task; | 199 | tsk = waiter->task; |
| 200 | 200 | ||
| 201 | wake_q_add(wake_q, tsk); | 201 | get_task_struct(tsk); |
| 202 | list_del(&waiter->list); | 202 | list_del(&waiter->list); |
| 203 | /* | 203 | /* |
| 204 | * Ensure that the last operation is setting the reader | 204 | * Ensure calling get_task_struct() before setting the reader |
| 205 | * waiter to nil such that rwsem_down_read_failed() cannot | 205 | * waiter to nil such that rwsem_down_read_failed() cannot |
| 206 | * race with do_exit() by always holding a reference count | 206 | * race with do_exit() by always holding a reference count |
| 207 | * to the task to wakeup. | 207 | * to the task to wakeup. |
| 208 | */ | 208 | */ |
| 209 | smp_store_release(&waiter->task, NULL); | 209 | smp_store_release(&waiter->task, NULL); |
| 210 | /* | ||
| 211 | * Ensure issuing the wakeup (either by us or someone else) | ||
| 212 | * after setting the reader waiter to nil. | ||
| 213 | */ | ||
| 214 | wake_q_add(wake_q, tsk); | ||
| 215 | /* wake_q_add() already take the task ref */ | ||
| 216 | put_task_struct(tsk); | ||
| 210 | } | 217 | } |
| 211 | 218 | ||
| 212 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; | 219 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; |
diff --git a/kernel/relay.c b/kernel/relay.c index 04f248644e06..9e0f52375487 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
| @@ -428,6 +428,8 @@ static struct dentry *relay_create_buf_file(struct rchan *chan, | |||
| 428 | dentry = chan->cb->create_buf_file(tmpname, chan->parent, | 428 | dentry = chan->cb->create_buf_file(tmpname, chan->parent, |
| 429 | S_IRUSR, buf, | 429 | S_IRUSR, buf, |
| 430 | &chan->is_global); | 430 | &chan->is_global); |
| 431 | if (IS_ERR(dentry)) | ||
| 432 | dentry = NULL; | ||
| 431 | 433 | ||
| 432 | kfree(tmpname); | 434 | kfree(tmpname); |
| 433 | 435 | ||
| @@ -461,7 +463,7 @@ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) | |||
| 461 | dentry = chan->cb->create_buf_file(NULL, NULL, | 463 | dentry = chan->cb->create_buf_file(NULL, NULL, |
| 462 | S_IRUSR, buf, | 464 | S_IRUSR, buf, |
| 463 | &chan->is_global); | 465 | &chan->is_global); |
| 464 | if (WARN_ON(dentry)) | 466 | if (IS_ERR_OR_NULL(dentry)) |
| 465 | goto free_buf; | 467 | goto free_buf; |
| 466 | } | 468 | } |
| 467 | 469 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a674c7db2f29..d8d76a65cfdd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -396,6 +396,18 @@ static bool set_nr_if_polling(struct task_struct *p) | |||
| 396 | #endif | 396 | #endif |
| 397 | #endif | 397 | #endif |
| 398 | 398 | ||
| 399 | /** | ||
| 400 | * wake_q_add() - queue a wakeup for 'later' waking. | ||
| 401 | * @head: the wake_q_head to add @task to | ||
| 402 | * @task: the task to queue for 'later' wakeup | ||
| 403 | * | ||
| 404 | * Queue a task for later wakeup, most likely by the wake_up_q() call in the | ||
| 405 | * same context, _HOWEVER_ this is not guaranteed, the wakeup can come | ||
| 406 | * instantly. | ||
| 407 | * | ||
| 408 | * This function must be used as-if it were wake_up_process(); IOW the task | ||
| 409 | * must be ready to be woken at this location. | ||
| 410 | */ | ||
| 399 | void wake_q_add(struct wake_q_head *head, struct task_struct *task) | 411 | void wake_q_add(struct wake_q_head *head, struct task_struct *task) |
| 400 | { | 412 | { |
| 401 | struct wake_q_node *node = &task->wake_q; | 413 | struct wake_q_node *node = &task->wake_q; |
| @@ -405,10 +417,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) | |||
| 405 | * its already queued (either by us or someone else) and will get the | 417 | * its already queued (either by us or someone else) and will get the |
| 406 | * wakeup due to that. | 418 | * wakeup due to that. |
| 407 | * | 419 | * |
| 408 | * This cmpxchg() executes a full barrier, which pairs with the full | 420 | * In order to ensure that a pending wakeup will observe our pending |
| 409 | * barrier executed by the wakeup in wake_up_q(). | 421 | * state, even in the failed case, an explicit smp_mb() must be used. |
| 410 | */ | 422 | */ |
| 411 | if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) | 423 | smp_mb__before_atomic(); |
| 424 | if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) | ||
| 412 | return; | 425 | return; |
| 413 | 426 | ||
| 414 | get_task_struct(task); | 427 | get_task_struct(task); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 50aa2aba69bd..310d0637fe4b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p | |||
| 5980 | 5980 | ||
| 5981 | #ifdef CONFIG_SCHED_SMT | 5981 | #ifdef CONFIG_SCHED_SMT |
| 5982 | DEFINE_STATIC_KEY_FALSE(sched_smt_present); | 5982 | DEFINE_STATIC_KEY_FALSE(sched_smt_present); |
| 5983 | EXPORT_SYMBOL_GPL(sched_smt_present); | ||
| 5983 | 5984 | ||
| 5984 | static inline void set_idle_cores(int cpu, int val) | 5985 | static inline void set_idle_cores(int cpu, int val) |
| 5985 | { | 5986 | { |
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index fe24de3fbc93..0e97ca9306ef 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c | |||
| @@ -124,6 +124,7 @@ | |||
| 124 | * sampling of the aggregate task states would be. | 124 | * sampling of the aggregate task states would be. |
| 125 | */ | 125 | */ |
| 126 | 126 | ||
| 127 | #include "../workqueue_internal.h" | ||
| 127 | #include <linux/sched/loadavg.h> | 128 | #include <linux/sched/loadavg.h> |
| 128 | #include <linux/seq_file.h> | 129 | #include <linux/seq_file.h> |
| 129 | #include <linux/proc_fs.h> | 130 | #include <linux/proc_fs.h> |
| @@ -321,7 +322,7 @@ static bool update_stats(struct psi_group *group) | |||
| 321 | expires = group->next_update; | 322 | expires = group->next_update; |
| 322 | if (now < expires) | 323 | if (now < expires) |
| 323 | goto out; | 324 | goto out; |
| 324 | if (now - expires > psi_period) | 325 | if (now - expires >= psi_period) |
| 325 | missed_periods = div_u64(now - expires, psi_period); | 326 | missed_periods = div_u64(now - expires, psi_period); |
| 326 | 327 | ||
| 327 | /* | 328 | /* |
| @@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu, | |||
| 480 | groupc->tasks[t]++; | 481 | groupc->tasks[t]++; |
| 481 | 482 | ||
| 482 | write_seqcount_end(&groupc->seq); | 483 | write_seqcount_end(&groupc->seq); |
| 483 | |||
| 484 | if (!delayed_work_pending(&group->clock_work)) | ||
| 485 | schedule_delayed_work(&group->clock_work, PSI_FREQ); | ||
| 486 | } | 484 | } |
| 487 | 485 | ||
| 488 | static struct psi_group *iterate_groups(struct task_struct *task, void **iter) | 486 | static struct psi_group *iterate_groups(struct task_struct *task, void **iter) |
| @@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set) | |||
| 513 | { | 511 | { |
| 514 | int cpu = task_cpu(task); | 512 | int cpu = task_cpu(task); |
| 515 | struct psi_group *group; | 513 | struct psi_group *group; |
| 514 | bool wake_clock = true; | ||
| 516 | void *iter = NULL; | 515 | void *iter = NULL; |
| 517 | 516 | ||
| 518 | if (!task->pid) | 517 | if (!task->pid) |
| @@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set) | |||
| 530 | task->psi_flags &= ~clear; | 529 | task->psi_flags &= ~clear; |
| 531 | task->psi_flags |= set; | 530 | task->psi_flags |= set; |
| 532 | 531 | ||
| 533 | while ((group = iterate_groups(task, &iter))) | 532 | /* |
| 533 | * Periodic aggregation shuts off if there is a period of no | ||
| 534 | * task changes, so we wake it back up if necessary. However, | ||
| 535 | * don't do this if the task change is the aggregation worker | ||
| 536 | * itself going to sleep, or we'll ping-pong forever. | ||
| 537 | */ | ||
| 538 | if (unlikely((clear & TSK_RUNNING) && | ||
| 539 | (task->flags & PF_WQ_WORKER) && | ||
| 540 | wq_worker_last_func(task) == psi_update_work)) | ||
| 541 | wake_clock = false; | ||
| 542 | |||
| 543 | while ((group = iterate_groups(task, &iter))) { | ||
| 534 | psi_group_change(group, cpu, clear, set); | 544 | psi_group_change(group, cpu, clear, set); |
| 545 | if (wake_clock && !delayed_work_pending(&group->clock_work)) | ||
| 546 | schedule_delayed_work(&group->clock_work, PSI_FREQ); | ||
| 547 | } | ||
| 535 | } | 548 | } |
| 536 | 549 | ||
| 537 | void psi_memstall_tick(struct task_struct *task, int cpu) | 550 | void psi_memstall_tick(struct task_struct *task, int cpu) |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d7f538847b84..e815781ed751 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
| @@ -976,6 +976,9 @@ static int seccomp_notify_release(struct inode *inode, struct file *file) | |||
| 976 | struct seccomp_filter *filter = file->private_data; | 976 | struct seccomp_filter *filter = file->private_data; |
| 977 | struct seccomp_knotif *knotif; | 977 | struct seccomp_knotif *knotif; |
| 978 | 978 | ||
| 979 | if (!filter) | ||
| 980 | return 0; | ||
| 981 | |||
| 979 | mutex_lock(&filter->notify_lock); | 982 | mutex_lock(&filter->notify_lock); |
| 980 | 983 | ||
| 981 | /* | 984 | /* |
| @@ -1300,6 +1303,7 @@ out: | |||
| 1300 | out_put_fd: | 1303 | out_put_fd: |
| 1301 | if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { | 1304 | if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { |
| 1302 | if (ret < 0) { | 1305 | if (ret < 0) { |
| 1306 | listener_f->private_data = NULL; | ||
| 1303 | fput(listener_f); | 1307 | fput(listener_f); |
| 1304 | put_unused_fd(listener); | 1308 | put_unused_fd(listener); |
| 1305 | } else { | 1309 | } else { |
diff --git a/kernel/signal.c b/kernel/signal.c index e1d7ad8e6ab1..57b7771e20d7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -688,6 +688,48 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *in | |||
| 688 | } | 688 | } |
| 689 | EXPORT_SYMBOL_GPL(dequeue_signal); | 689 | EXPORT_SYMBOL_GPL(dequeue_signal); |
| 690 | 690 | ||
| 691 | static int dequeue_synchronous_signal(kernel_siginfo_t *info) | ||
| 692 | { | ||
| 693 | struct task_struct *tsk = current; | ||
| 694 | struct sigpending *pending = &tsk->pending; | ||
| 695 | struct sigqueue *q, *sync = NULL; | ||
| 696 | |||
| 697 | /* | ||
| 698 | * Might a synchronous signal be in the queue? | ||
| 699 | */ | ||
| 700 | if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK)) | ||
| 701 | return 0; | ||
| 702 | |||
| 703 | /* | ||
| 704 | * Return the first synchronous signal in the queue. | ||
| 705 | */ | ||
| 706 | list_for_each_entry(q, &pending->list, list) { | ||
| 707 | /* Synchronous signals have a postive si_code */ | ||
| 708 | if ((q->info.si_code > SI_USER) && | ||
| 709 | (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) { | ||
| 710 | sync = q; | ||
| 711 | goto next; | ||
| 712 | } | ||
| 713 | } | ||
| 714 | return 0; | ||
| 715 | next: | ||
| 716 | /* | ||
| 717 | * Check if there is another siginfo for the same signal. | ||
| 718 | */ | ||
| 719 | list_for_each_entry_continue(q, &pending->list, list) { | ||
| 720 | if (q->info.si_signo == sync->info.si_signo) | ||
| 721 | goto still_pending; | ||
| 722 | } | ||
| 723 | |||
| 724 | sigdelset(&pending->signal, sync->info.si_signo); | ||
| 725 | recalc_sigpending(); | ||
| 726 | still_pending: | ||
| 727 | list_del_init(&sync->list); | ||
| 728 | copy_siginfo(info, &sync->info); | ||
| 729 | __sigqueue_free(sync); | ||
| 730 | return info->si_signo; | ||
| 731 | } | ||
| 732 | |||
| 691 | /* | 733 | /* |
| 692 | * Tell a process that it has a new active signal.. | 734 | * Tell a process that it has a new active signal.. |
| 693 | * | 735 | * |
| @@ -1057,10 +1099,9 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc | |||
| 1057 | 1099 | ||
| 1058 | result = TRACE_SIGNAL_DELIVERED; | 1100 | result = TRACE_SIGNAL_DELIVERED; |
| 1059 | /* | 1101 | /* |
| 1060 | * Skip useless siginfo allocation for SIGKILL SIGSTOP, | 1102 | * Skip useless siginfo allocation for SIGKILL and kernel threads. |
| 1061 | * and kernel threads. | ||
| 1062 | */ | 1103 | */ |
| 1063 | if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD)) | 1104 | if ((sig == SIGKILL) || (t->flags & PF_KTHREAD)) |
| 1064 | goto out_set; | 1105 | goto out_set; |
| 1065 | 1106 | ||
| 1066 | /* | 1107 | /* |
| @@ -2394,6 +2435,14 @@ relock: | |||
| 2394 | goto relock; | 2435 | goto relock; |
| 2395 | } | 2436 | } |
| 2396 | 2437 | ||
| 2438 | /* Has this task already been marked for death? */ | ||
| 2439 | if (signal_group_exit(signal)) { | ||
| 2440 | ksig->info.si_signo = signr = SIGKILL; | ||
| 2441 | sigdelset(¤t->pending.signal, SIGKILL); | ||
| 2442 | recalc_sigpending(); | ||
| 2443 | goto fatal; | ||
| 2444 | } | ||
| 2445 | |||
| 2397 | for (;;) { | 2446 | for (;;) { |
| 2398 | struct k_sigaction *ka; | 2447 | struct k_sigaction *ka; |
| 2399 | 2448 | ||
| @@ -2407,7 +2456,15 @@ relock: | |||
| 2407 | goto relock; | 2456 | goto relock; |
| 2408 | } | 2457 | } |
| 2409 | 2458 | ||
| 2410 | signr = dequeue_signal(current, ¤t->blocked, &ksig->info); | 2459 | /* |
| 2460 | * Signals generated by the execution of an instruction | ||
| 2461 | * need to be delivered before any other pending signals | ||
| 2462 | * so that the instruction pointer in the signal stack | ||
| 2463 | * frame points to the faulting instruction. | ||
| 2464 | */ | ||
| 2465 | signr = dequeue_synchronous_signal(&ksig->info); | ||
| 2466 | if (!signr) | ||
| 2467 | signr = dequeue_signal(current, ¤t->blocked, &ksig->info); | ||
| 2411 | 2468 | ||
| 2412 | if (!signr) | 2469 | if (!signr) |
| 2413 | break; /* will return 0 */ | 2470 | break; /* will return 0 */ |
| @@ -2489,6 +2546,7 @@ relock: | |||
| 2489 | continue; | 2546 | continue; |
| 2490 | } | 2547 | } |
| 2491 | 2548 | ||
| 2549 | fatal: | ||
| 2492 | spin_unlock_irq(&sighand->siglock); | 2550 | spin_unlock_irq(&sighand->siglock); |
| 2493 | 2551 | ||
| 2494 | /* | 2552 | /* |
diff --git a/kernel/smp.c b/kernel/smp.c index 163c451af42e..f4cf1b0bb3b8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -584,8 +584,6 @@ void __init smp_init(void) | |||
| 584 | num_nodes, (num_nodes > 1 ? "s" : ""), | 584 | num_nodes, (num_nodes > 1 ? "s" : ""), |
| 585 | num_cpus, (num_cpus > 1 ? "s" : "")); | 585 | num_cpus, (num_cpus > 1 ? "s" : "")); |
| 586 | 586 | ||
| 587 | /* Final decision about SMT support */ | ||
| 588 | cpu_smt_check_topology(); | ||
| 589 | /* Any cleanup work */ | 587 | /* Any cleanup work */ |
| 590 | smp_cpus_done(setup_max_cpus); | 588 | smp_cpus_done(setup_max_cpus); |
| 591 | } | 589 | } |
diff --git a/kernel/sys.c b/kernel/sys.c index a48cbf1414b8..f7eb62eceb24 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -1207,7 +1207,8 @@ DECLARE_RWSEM(uts_sem); | |||
| 1207 | /* | 1207 | /* |
| 1208 | * Work around broken programs that cannot handle "Linux 3.0". | 1208 | * Work around broken programs that cannot handle "Linux 3.0". |
| 1209 | * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 | 1209 | * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 |
| 1210 | * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60. | 1210 | * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be |
| 1211 | * 2.6.60. | ||
| 1211 | */ | 1212 | */ |
| 1212 | static int override_release(char __user *release, size_t len) | 1213 | static int override_release(char __user *release, size_t len) |
| 1213 | { | 1214 | { |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 8f0644af40be..80f955210861 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
| @@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
| 685 | * set up the signal and overrun bookkeeping. | 685 | * set up the signal and overrun bookkeeping. |
| 686 | */ | 686 | */ |
| 687 | timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); | 687 | timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); |
| 688 | timer->it_interval = ns_to_ktime(timer->it.cpu.incr); | ||
| 688 | 689 | ||
| 689 | /* | 690 | /* |
| 690 | * This acts as a modification timestamp for the timer, | 691 | * This acts as a modification timestamp for the timer, |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 8b068adb9da1..f1a86a0d881d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
| @@ -1204,22 +1204,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * | |||
| 1204 | 1204 | ||
| 1205 | int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) | 1205 | int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) |
| 1206 | { | 1206 | { |
| 1207 | int err; | 1207 | return __bpf_probe_register(btp, prog); |
| 1208 | |||
| 1209 | mutex_lock(&bpf_event_mutex); | ||
| 1210 | err = __bpf_probe_register(btp, prog); | ||
| 1211 | mutex_unlock(&bpf_event_mutex); | ||
| 1212 | return err; | ||
| 1213 | } | 1208 | } |
| 1214 | 1209 | ||
| 1215 | int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) | 1210 | int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) |
| 1216 | { | 1211 | { |
| 1217 | int err; | 1212 | return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); |
| 1218 | |||
| 1219 | mutex_lock(&bpf_event_mutex); | ||
| 1220 | err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); | ||
| 1221 | mutex_unlock(&bpf_event_mutex); | ||
| 1222 | return err; | ||
| 1223 | } | 1213 | } |
| 1224 | 1214 | ||
| 1225 | int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, | 1215 | int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c521b7347482..c4238b441624 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -3384,6 +3384,8 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file | |||
| 3384 | const char tgid_space[] = " "; | 3384 | const char tgid_space[] = " "; |
| 3385 | const char space[] = " "; | 3385 | const char space[] = " "; |
| 3386 | 3386 | ||
| 3387 | print_event_info(buf, m); | ||
| 3388 | |||
| 3387 | seq_printf(m, "# %s _-----=> irqs-off\n", | 3389 | seq_printf(m, "# %s _-----=> irqs-off\n", |
| 3388 | tgid ? tgid_space : space); | 3390 | tgid ? tgid_space : space); |
| 3389 | seq_printf(m, "# %s / _----=> need-resched\n", | 3391 | seq_printf(m, "# %s / _----=> need-resched\n", |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5c19b8c41c7e..9eaf07f99212 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
| @@ -607,11 +607,17 @@ static int trace_kprobe_create(int argc, const char *argv[]) | |||
| 607 | char buf[MAX_EVENT_NAME_LEN]; | 607 | char buf[MAX_EVENT_NAME_LEN]; |
| 608 | unsigned int flags = TPARG_FL_KERNEL; | 608 | unsigned int flags = TPARG_FL_KERNEL; |
| 609 | 609 | ||
| 610 | /* argc must be >= 1 */ | 610 | switch (argv[0][0]) { |
| 611 | if (argv[0][0] == 'r') { | 611 | case 'r': |
| 612 | is_return = true; | 612 | is_return = true; |
| 613 | flags |= TPARG_FL_RETURN; | 613 | flags |= TPARG_FL_RETURN; |
| 614 | } else if (argv[0][0] != 'p' || argc < 2) | 614 | break; |
| 615 | case 'p': | ||
| 616 | break; | ||
| 617 | default: | ||
| 618 | return -ECANCELED; | ||
| 619 | } | ||
| 620 | if (argc < 2) | ||
| 615 | return -ECANCELED; | 621 | return -ECANCELED; |
| 616 | 622 | ||
| 617 | event = strchr(&argv[0][1], ':'); | 623 | event = strchr(&argv[0][1], ':'); |
| @@ -855,22 +861,14 @@ static const struct file_operations kprobe_profile_ops = { | |||
| 855 | static nokprobe_inline int | 861 | static nokprobe_inline int |
| 856 | fetch_store_strlen(unsigned long addr) | 862 | fetch_store_strlen(unsigned long addr) |
| 857 | { | 863 | { |
| 858 | mm_segment_t old_fs; | ||
| 859 | int ret, len = 0; | 864 | int ret, len = 0; |
| 860 | u8 c; | 865 | u8 c; |
| 861 | 866 | ||
| 862 | old_fs = get_fs(); | ||
| 863 | set_fs(KERNEL_DS); | ||
| 864 | pagefault_disable(); | ||
| 865 | |||
| 866 | do { | 867 | do { |
| 867 | ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); | 868 | ret = probe_mem_read(&c, (u8 *)addr + len, 1); |
| 868 | len++; | 869 | len++; |
| 869 | } while (c && ret == 0 && len < MAX_STRING_SIZE); | 870 | } while (c && ret == 0 && len < MAX_STRING_SIZE); |
| 870 | 871 | ||
| 871 | pagefault_enable(); | ||
| 872 | set_fs(old_fs); | ||
| 873 | |||
| 874 | return (ret < 0) ? ret : len; | 872 | return (ret < 0) ? ret : len; |
| 875 | } | 873 | } |
| 876 | 874 | ||
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 5c56afc17cf8..4737bb8c07a3 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h | |||
| @@ -180,10 +180,12 @@ store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs, | |||
| 180 | if (unlikely(arg->dynamic)) | 180 | if (unlikely(arg->dynamic)) |
| 181 | *dl = make_data_loc(maxlen, dyndata - base); | 181 | *dl = make_data_loc(maxlen, dyndata - base); |
| 182 | ret = process_fetch_insn(arg->code, regs, dl, base); | 182 | ret = process_fetch_insn(arg->code, regs, dl, base); |
| 183 | if (unlikely(ret < 0 && arg->dynamic)) | 183 | if (unlikely(ret < 0 && arg->dynamic)) { |
| 184 | *dl = make_data_loc(0, dyndata - base); | 184 | *dl = make_data_loc(0, dyndata - base); |
| 185 | else | 185 | } else { |
| 186 | dyndata += ret; | 186 | dyndata += ret; |
| 187 | maxlen -= ret; | ||
| 188 | } | ||
| 187 | } | 189 | } |
| 188 | } | 190 | } |
| 189 | 191 | ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index e335576b9411..9bde07c06362 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * Copyright (C) IBM Corporation, 2010-2012 | 5 | * Copyright (C) IBM Corporation, 2010-2012 |
| 6 | * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> | 6 | * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> |
| 7 | */ | 7 | */ |
| 8 | #define pr_fmt(fmt) "trace_kprobe: " fmt | 8 | #define pr_fmt(fmt) "trace_uprobe: " fmt |
| 9 | 9 | ||
| 10 | #include <linux/ctype.h> | 10 | #include <linux/ctype.h> |
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| @@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base) | |||
| 160 | if (ret >= 0) { | 160 | if (ret >= 0) { |
| 161 | if (ret == maxlen) | 161 | if (ret == maxlen) |
| 162 | dst[ret - 1] = '\0'; | 162 | dst[ret - 1] = '\0'; |
| 163 | else | ||
| 164 | /* | ||
| 165 | * Include the terminating null byte. In this case it | ||
| 166 | * was copied by strncpy_from_user but not accounted | ||
| 167 | * for in ret. | ||
| 168 | */ | ||
| 169 | ret++; | ||
| 163 | *(u32 *)dest = make_data_loc(ret, (void *)dst - base); | 170 | *(u32 *)dest = make_data_loc(ret, (void *)dst - base); |
| 164 | } | 171 | } |
| 165 | 172 | ||
diff --git a/kernel/umh.c b/kernel/umh.c index 0baa672e023c..d937cbad903a 100644 --- a/kernel/umh.c +++ b/kernel/umh.c | |||
| @@ -37,6 +37,8 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; | |||
| 37 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; | 37 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; |
| 38 | static DEFINE_SPINLOCK(umh_sysctl_lock); | 38 | static DEFINE_SPINLOCK(umh_sysctl_lock); |
| 39 | static DECLARE_RWSEM(umhelper_sem); | 39 | static DECLARE_RWSEM(umhelper_sem); |
| 40 | static LIST_HEAD(umh_list); | ||
| 41 | static DEFINE_MUTEX(umh_list_lock); | ||
| 40 | 42 | ||
| 41 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) | 43 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) |
| 42 | { | 44 | { |
| @@ -100,10 +102,12 @@ static int call_usermodehelper_exec_async(void *data) | |||
| 100 | commit_creds(new); | 102 | commit_creds(new); |
| 101 | 103 | ||
| 102 | sub_info->pid = task_pid_nr(current); | 104 | sub_info->pid = task_pid_nr(current); |
| 103 | if (sub_info->file) | 105 | if (sub_info->file) { |
| 104 | retval = do_execve_file(sub_info->file, | 106 | retval = do_execve_file(sub_info->file, |
| 105 | sub_info->argv, sub_info->envp); | 107 | sub_info->argv, sub_info->envp); |
| 106 | else | 108 | if (!retval) |
| 109 | current->flags |= PF_UMH; | ||
| 110 | } else | ||
| 107 | retval = do_execve(getname_kernel(sub_info->path), | 111 | retval = do_execve(getname_kernel(sub_info->path), |
| 108 | (const char __user *const __user *)sub_info->argv, | 112 | (const char __user *const __user *)sub_info->argv, |
| 109 | (const char __user *const __user *)sub_info->envp); | 113 | (const char __user *const __user *)sub_info->envp); |
| @@ -517,6 +521,11 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info) | |||
| 517 | goto out; | 521 | goto out; |
| 518 | 522 | ||
| 519 | err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); | 523 | err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); |
| 524 | if (!err) { | ||
| 525 | mutex_lock(&umh_list_lock); | ||
| 526 | list_add(&info->list, &umh_list); | ||
| 527 | mutex_unlock(&umh_list_lock); | ||
| 528 | } | ||
| 520 | out: | 529 | out: |
| 521 | fput(file); | 530 | fput(file); |
| 522 | return err; | 531 | return err; |
| @@ -679,6 +688,26 @@ static int proc_cap_handler(struct ctl_table *table, int write, | |||
| 679 | return 0; | 688 | return 0; |
| 680 | } | 689 | } |
| 681 | 690 | ||
| 691 | void __exit_umh(struct task_struct *tsk) | ||
| 692 | { | ||
| 693 | struct umh_info *info; | ||
| 694 | pid_t pid = tsk->pid; | ||
| 695 | |||
| 696 | mutex_lock(&umh_list_lock); | ||
| 697 | list_for_each_entry(info, &umh_list, list) { | ||
| 698 | if (info->pid == pid) { | ||
| 699 | list_del(&info->list); | ||
| 700 | mutex_unlock(&umh_list_lock); | ||
| 701 | goto out; | ||
| 702 | } | ||
| 703 | } | ||
| 704 | mutex_unlock(&umh_list_lock); | ||
| 705 | return; | ||
| 706 | out: | ||
| 707 | if (info->cleanup) | ||
| 708 | info->cleanup(info); | ||
| 709 | } | ||
| 710 | |||
| 682 | struct ctl_table usermodehelper_table[] = { | 711 | struct ctl_table usermodehelper_table[] = { |
| 683 | { | 712 | { |
| 684 | .procname = "bset", | 713 | .procname = "bset", |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 392be4b252f6..fc5d23d752a5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -910,6 +910,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) | |||
| 910 | } | 910 | } |
| 911 | 911 | ||
| 912 | /** | 912 | /** |
| 913 | * wq_worker_last_func - retrieve worker's last work function | ||
| 914 | * | ||
| 915 | * Determine the last function a worker executed. This is called from | ||
| 916 | * the scheduler to get a worker's last known identity. | ||
| 917 | * | ||
| 918 | * CONTEXT: | ||
| 919 | * spin_lock_irq(rq->lock) | ||
| 920 | * | ||
| 921 | * Return: | ||
| 922 | * The last work function %current executed as a worker, NULL if it | ||
| 923 | * hasn't executed any work yet. | ||
| 924 | */ | ||
| 925 | work_func_t wq_worker_last_func(struct task_struct *task) | ||
| 926 | { | ||
| 927 | struct worker *worker = kthread_data(task); | ||
| 928 | |||
| 929 | return worker->last_func; | ||
| 930 | } | ||
| 931 | |||
| 932 | /** | ||
| 913 | * worker_set_flags - set worker flags and adjust nr_running accordingly | 933 | * worker_set_flags - set worker flags and adjust nr_running accordingly |
| 914 | * @worker: self | 934 | * @worker: self |
| 915 | * @flags: flags to set | 935 | * @flags: flags to set |
| @@ -2184,6 +2204,9 @@ __acquires(&pool->lock) | |||
| 2184 | if (unlikely(cpu_intensive)) | 2204 | if (unlikely(cpu_intensive)) |
| 2185 | worker_clr_flags(worker, WORKER_CPU_INTENSIVE); | 2205 | worker_clr_flags(worker, WORKER_CPU_INTENSIVE); |
| 2186 | 2206 | ||
| 2207 | /* tag the worker for identification in schedule() */ | ||
| 2208 | worker->last_func = worker->current_func; | ||
| 2209 | |||
| 2187 | /* we're done with it, release */ | 2210 | /* we're done with it, release */ |
| 2188 | hash_del(&worker->hentry); | 2211 | hash_del(&worker->hentry); |
| 2189 | worker->current_work = NULL; | 2212 | worker->current_work = NULL; |
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index 66fbb5a9e633..cb68b03ca89a 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h | |||
| @@ -53,6 +53,9 @@ struct worker { | |||
| 53 | 53 | ||
| 54 | /* used only by rescuers to point to the target workqueue */ | 54 | /* used only by rescuers to point to the target workqueue */ |
| 55 | struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ | 55 | struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ |
| 56 | |||
| 57 | /* used by the scheduler to determine a worker's last known identity */ | ||
| 58 | work_func_t last_func; | ||
| 56 | }; | 59 | }; |
| 57 | 60 | ||
| 58 | /** | 61 | /** |
| @@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void) | |||
| 67 | 70 | ||
| 68 | /* | 71 | /* |
| 69 | * Scheduler hooks for concurrency managed workqueue. Only to be used from | 72 | * Scheduler hooks for concurrency managed workqueue. Only to be used from |
| 70 | * sched/core.c and workqueue.c. | 73 | * sched/ and workqueue.c. |
| 71 | */ | 74 | */ |
| 72 | void wq_worker_waking_up(struct task_struct *task, int cpu); | 75 | void wq_worker_waking_up(struct task_struct *task, int cpu); |
| 73 | struct task_struct *wq_worker_sleeping(struct task_struct *task); | 76 | struct task_struct *wq_worker_sleeping(struct task_struct *task); |
| 77 | work_func_t wq_worker_last_func(struct task_struct *task); | ||
| 74 | 78 | ||
| 75 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ | 79 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ |
