diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/btf.c | 14 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 1 | ||||
-rw-r--r-- | kernel/bpf/map_in_map.c | 17 | ||||
-rw-r--r-- | kernel/bpf/stackmap.c | 12 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 61 | ||||
-rw-r--r-- | kernel/dma/swiotlb.c | 2 | ||||
-rw-r--r-- | kernel/exit.c | 3 | ||||
-rw-r--r-- | kernel/fork.c | 14 | ||||
-rw-r--r-- | kernel/futex.c | 13 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 2 | ||||
-rw-r--r-- | kernel/irq/manage.c | 3 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 11 | ||||
-rw-r--r-- | kernel/sched/core.c | 19 | ||||
-rw-r--r-- | kernel/seccomp.c | 4 | ||||
-rw-r--r-- | kernel/sys.c | 3 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 12 | ||||
-rw-r--r-- | kernel/umh.c | 33 |
18 files changed, 180 insertions, 45 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 715f9fcf4712..befe570be5ba 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c | |||
@@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) | |||
467 | return kind_ops[BTF_INFO_KIND(t->info)]; | 467 | return kind_ops[BTF_INFO_KIND(t->info)]; |
468 | } | 468 | } |
469 | 469 | ||
470 | bool btf_name_offset_valid(const struct btf *btf, u32 offset) | 470 | static bool btf_name_offset_valid(const struct btf *btf, u32 offset) |
471 | { | 471 | { |
472 | return BTF_STR_OFFSET_VALID(offset) && | 472 | return BTF_STR_OFFSET_VALID(offset) && |
473 | offset < btf->hdr.str_len; | 473 | offset < btf->hdr.str_len; |
@@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset, | |||
1219 | u8 nr_copy_bits; | 1219 | u8 nr_copy_bits; |
1220 | u64 print_num; | 1220 | u64 print_num; |
1221 | 1221 | ||
1222 | data += BITS_ROUNDDOWN_BYTES(bits_offset); | ||
1223 | bits_offset = BITS_PER_BYTE_MASKED(bits_offset); | ||
1224 | nr_copy_bits = nr_bits + bits_offset; | 1222 | nr_copy_bits = nr_bits + bits_offset; |
1225 | nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); | 1223 | nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); |
1226 | 1224 | ||
@@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf, | |||
1255 | * BTF_INT_OFFSET() cannot exceed 64 bits. | 1253 | * BTF_INT_OFFSET() cannot exceed 64 bits. |
1256 | */ | 1254 | */ |
1257 | total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); | 1255 | total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); |
1258 | btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m); | 1256 | data += BITS_ROUNDDOWN_BYTES(total_bits_offset); |
1257 | bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); | ||
1258 | btf_bitfield_seq_show(data, bits_offset, nr_bits, m); | ||
1259 | } | 1259 | } |
1260 | 1260 | ||
1261 | static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, | 1261 | static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, |
@@ -2001,12 +2001,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, | |||
2001 | 2001 | ||
2002 | member_offset = btf_member_bit_offset(t, member); | 2002 | member_offset = btf_member_bit_offset(t, member); |
2003 | bitfield_size = btf_member_bitfield_size(t, member); | 2003 | bitfield_size = btf_member_bitfield_size(t, member); |
2004 | bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); | ||
2005 | bits8_offset = BITS_PER_BYTE_MASKED(member_offset); | ||
2004 | if (bitfield_size) { | 2006 | if (bitfield_size) { |
2005 | btf_bitfield_seq_show(data, member_offset, | 2007 | btf_bitfield_seq_show(data + bytes_offset, bits8_offset, |
2006 | bitfield_size, m); | 2008 | bitfield_size, m); |
2007 | } else { | 2009 | } else { |
2008 | bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); | ||
2009 | bits8_offset = BITS_PER_BYTE_MASKED(member_offset); | ||
2010 | ops = btf_type_ops(member_type); | 2010 | ops = btf_type_ops(member_type); |
2011 | ops->seq_show(btf, member_type, member->type, | 2011 | ops->seq_show(btf, member_type, member->type, |
2012 | data + bytes_offset, bits8_offset, m); | 2012 | data + bytes_offset, bits8_offset, m); |
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9425c2fb872f..ab612fe9862f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c | |||
@@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |||
718 | case BPF_FUNC_trace_printk: | 718 | case BPF_FUNC_trace_printk: |
719 | if (capable(CAP_SYS_ADMIN)) | 719 | if (capable(CAP_SYS_ADMIN)) |
720 | return bpf_get_trace_printk_proto(); | 720 | return bpf_get_trace_printk_proto(); |
721 | /* fall through */ | ||
721 | default: | 722 | default: |
722 | return NULL; | 723 | return NULL; |
723 | } | 724 | } |
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 99d243e1ad6e..52378d3e34b3 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c | |||
@@ -12,6 +12,7 @@ | |||
12 | struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | 12 | struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) |
13 | { | 13 | { |
14 | struct bpf_map *inner_map, *inner_map_meta; | 14 | struct bpf_map *inner_map, *inner_map_meta; |
15 | u32 inner_map_meta_size; | ||
15 | struct fd f; | 16 | struct fd f; |
16 | 17 | ||
17 | f = fdget(inner_map_ufd); | 18 | f = fdget(inner_map_ufd); |
@@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | |||
36 | return ERR_PTR(-EINVAL); | 37 | return ERR_PTR(-EINVAL); |
37 | } | 38 | } |
38 | 39 | ||
39 | inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); | 40 | inner_map_meta_size = sizeof(*inner_map_meta); |
41 | /* In some cases verifier needs to access beyond just base map. */ | ||
42 | if (inner_map->ops == &array_map_ops) | ||
43 | inner_map_meta_size = sizeof(struct bpf_array); | ||
44 | |||
45 | inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); | ||
40 | if (!inner_map_meta) { | 46 | if (!inner_map_meta) { |
41 | fdput(f); | 47 | fdput(f); |
42 | return ERR_PTR(-ENOMEM); | 48 | return ERR_PTR(-ENOMEM); |
@@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) | |||
46 | inner_map_meta->key_size = inner_map->key_size; | 52 | inner_map_meta->key_size = inner_map->key_size; |
47 | inner_map_meta->value_size = inner_map->value_size; | 53 | inner_map_meta->value_size = inner_map->value_size; |
48 | inner_map_meta->map_flags = inner_map->map_flags; | 54 | inner_map_meta->map_flags = inner_map->map_flags; |
49 | inner_map_meta->ops = inner_map->ops; | ||
50 | inner_map_meta->max_entries = inner_map->max_entries; | 55 | inner_map_meta->max_entries = inner_map->max_entries; |
51 | 56 | ||
57 | /* Misc members not needed in bpf_map_meta_equal() check. */ | ||
58 | inner_map_meta->ops = inner_map->ops; | ||
59 | if (inner_map->ops == &array_map_ops) { | ||
60 | inner_map_meta->unpriv_array = inner_map->unpriv_array; | ||
61 | container_of(inner_map_meta, struct bpf_array, map)->index_mask = | ||
62 | container_of(inner_map, struct bpf_array, map)->index_mask; | ||
63 | } | ||
64 | |||
52 | fdput(f); | 65 | fdput(f); |
53 | return inner_map_meta; | 66 | return inner_map_meta; |
54 | } | 67 | } |
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 90daf285de03..d43b14535827 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c | |||
@@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr, | |||
180 | 180 | ||
181 | if (nhdr->n_type == BPF_BUILD_ID && | 181 | if (nhdr->n_type == BPF_BUILD_ID && |
182 | nhdr->n_namesz == sizeof("GNU") && | 182 | nhdr->n_namesz == sizeof("GNU") && |
183 | nhdr->n_descsz == BPF_BUILD_ID_SIZE) { | 183 | nhdr->n_descsz > 0 && |
184 | nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { | ||
184 | memcpy(build_id, | 185 | memcpy(build_id, |
185 | note_start + note_offs + | 186 | note_start + note_offs + |
186 | ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), | 187 | ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), |
187 | BPF_BUILD_ID_SIZE); | 188 | nhdr->n_descsz); |
189 | memset(build_id + nhdr->n_descsz, 0, | ||
190 | BPF_BUILD_ID_SIZE - nhdr->n_descsz); | ||
188 | return 0; | 191 | return 0; |
189 | } | 192 | } |
190 | new_offs = note_offs + sizeof(Elf32_Nhdr) + | 193 | new_offs = note_offs + sizeof(Elf32_Nhdr) + |
@@ -260,7 +263,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, | |||
260 | return -EFAULT; /* page not mapped */ | 263 | return -EFAULT; /* page not mapped */ |
261 | 264 | ||
262 | ret = -EINVAL; | 265 | ret = -EINVAL; |
263 | page_addr = page_address(page); | 266 | page_addr = kmap_atomic(page); |
264 | ehdr = (Elf32_Ehdr *)page_addr; | 267 | ehdr = (Elf32_Ehdr *)page_addr; |
265 | 268 | ||
266 | /* compare magic x7f "ELF" */ | 269 | /* compare magic x7f "ELF" */ |
@@ -276,6 +279,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma, | |||
276 | else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) | 279 | else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) |
277 | ret = stack_map_get_build_id_64(page_addr, build_id); | 280 | ret = stack_map_get_build_id_64(page_addr, build_id); |
278 | out: | 281 | out: |
282 | kunmap_atomic(page_addr); | ||
279 | put_page(page); | 283 | put_page(page); |
280 | return ret; | 284 | return ret; |
281 | } | 285 | } |
@@ -310,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
310 | for (i = 0; i < trace_nr; i++) { | 314 | for (i = 0; i < trace_nr; i++) { |
311 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; | 315 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
312 | id_offs[i].ip = ips[i]; | 316 | id_offs[i].ip = ips[i]; |
317 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); | ||
313 | } | 318 | } |
314 | return; | 319 | return; |
315 | } | 320 | } |
@@ -320,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | |||
320 | /* per entry fall back to ips */ | 325 | /* per entry fall back to ips */ |
321 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; | 326 | id_offs[i].status = BPF_STACK_BUILD_ID_IP; |
322 | id_offs[i].ip = ips[i]; | 327 | id_offs[i].ip = ips[i]; |
328 | memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); | ||
323 | continue; | 329 | continue; |
324 | } | 330 | } |
325 | id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] | 331 | id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f6bc62a9ee8e..56674a7c3778 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -3103,6 +3103,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, | |||
3103 | } | 3103 | } |
3104 | } | 3104 | } |
3105 | 3105 | ||
3106 | static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, | ||
3107 | const struct bpf_insn *insn) | ||
3108 | { | ||
3109 | return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; | ||
3110 | } | ||
3111 | |||
3112 | static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, | ||
3113 | u32 alu_state, u32 alu_limit) | ||
3114 | { | ||
3115 | /* If we arrived here from different branches with different | ||
3116 | * state or limits to sanitize, then this won't work. | ||
3117 | */ | ||
3118 | if (aux->alu_state && | ||
3119 | (aux->alu_state != alu_state || | ||
3120 | aux->alu_limit != alu_limit)) | ||
3121 | return -EACCES; | ||
3122 | |||
3123 | /* Corresponding fixup done in fixup_bpf_calls(). */ | ||
3124 | aux->alu_state = alu_state; | ||
3125 | aux->alu_limit = alu_limit; | ||
3126 | return 0; | ||
3127 | } | ||
3128 | |||
3129 | static int sanitize_val_alu(struct bpf_verifier_env *env, | ||
3130 | struct bpf_insn *insn) | ||
3131 | { | ||
3132 | struct bpf_insn_aux_data *aux = cur_aux(env); | ||
3133 | |||
3134 | if (can_skip_alu_sanitation(env, insn)) | ||
3135 | return 0; | ||
3136 | |||
3137 | return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); | ||
3138 | } | ||
3139 | |||
3106 | static int sanitize_ptr_alu(struct bpf_verifier_env *env, | 3140 | static int sanitize_ptr_alu(struct bpf_verifier_env *env, |
3107 | struct bpf_insn *insn, | 3141 | struct bpf_insn *insn, |
3108 | const struct bpf_reg_state *ptr_reg, | 3142 | const struct bpf_reg_state *ptr_reg, |
@@ -3117,7 +3151,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, | |||
3117 | struct bpf_reg_state tmp; | 3151 | struct bpf_reg_state tmp; |
3118 | bool ret; | 3152 | bool ret; |
3119 | 3153 | ||
3120 | if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) | 3154 | if (can_skip_alu_sanitation(env, insn)) |
3121 | return 0; | 3155 | return 0; |
3122 | 3156 | ||
3123 | /* We already marked aux for masking from non-speculative | 3157 | /* We already marked aux for masking from non-speculative |
@@ -3133,19 +3167,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, | |||
3133 | 3167 | ||
3134 | if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) | 3168 | if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) |
3135 | return 0; | 3169 | return 0; |
3136 | 3170 | if (update_alu_sanitation_state(aux, alu_state, alu_limit)) | |
3137 | /* If we arrived here from different branches with different | ||
3138 | * limits to sanitize, then this won't work. | ||
3139 | */ | ||
3140 | if (aux->alu_state && | ||
3141 | (aux->alu_state != alu_state || | ||
3142 | aux->alu_limit != alu_limit)) | ||
3143 | return -EACCES; | 3171 | return -EACCES; |
3144 | |||
3145 | /* Corresponding fixup done in fixup_bpf_calls(). */ | ||
3146 | aux->alu_state = alu_state; | ||
3147 | aux->alu_limit = alu_limit; | ||
3148 | |||
3149 | do_sim: | 3172 | do_sim: |
3150 | /* Simulate and find potential out-of-bounds access under | 3173 | /* Simulate and find potential out-of-bounds access under |
3151 | * speculative execution from truncation as a result of | 3174 | * speculative execution from truncation as a result of |
@@ -3418,6 +3441,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
3418 | s64 smin_val, smax_val; | 3441 | s64 smin_val, smax_val; |
3419 | u64 umin_val, umax_val; | 3442 | u64 umin_val, umax_val; |
3420 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; | 3443 | u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; |
3444 | u32 dst = insn->dst_reg; | ||
3445 | int ret; | ||
3421 | 3446 | ||
3422 | if (insn_bitness == 32) { | 3447 | if (insn_bitness == 32) { |
3423 | /* Relevant for 32-bit RSH: Information can propagate towards | 3448 | /* Relevant for 32-bit RSH: Information can propagate towards |
@@ -3452,6 +3477,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
3452 | 3477 | ||
3453 | switch (opcode) { | 3478 | switch (opcode) { |
3454 | case BPF_ADD: | 3479 | case BPF_ADD: |
3480 | ret = sanitize_val_alu(env, insn); | ||
3481 | if (ret < 0) { | ||
3482 | verbose(env, "R%d tried to add from different pointers or scalars\n", dst); | ||
3483 | return ret; | ||
3484 | } | ||
3455 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || | 3485 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || |
3456 | signed_add_overflows(dst_reg->smax_value, smax_val)) { | 3486 | signed_add_overflows(dst_reg->smax_value, smax_val)) { |
3457 | dst_reg->smin_value = S64_MIN; | 3487 | dst_reg->smin_value = S64_MIN; |
@@ -3471,6 +3501,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | |||
3471 | dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); | 3501 | dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); |
3472 | break; | 3502 | break; |
3473 | case BPF_SUB: | 3503 | case BPF_SUB: |
3504 | ret = sanitize_val_alu(env, insn); | ||
3505 | if (ret < 0) { | ||
3506 | verbose(env, "R%d tried to sub from different pointers or scalars\n", dst); | ||
3507 | return ret; | ||
3508 | } | ||
3474 | if (signed_sub_overflows(dst_reg->smin_value, smax_val) || | 3509 | if (signed_sub_overflows(dst_reg->smin_value, smax_val) || |
3475 | signed_sub_overflows(dst_reg->smax_value, smin_val)) { | 3510 | signed_sub_overflows(dst_reg->smax_value, smin_val)) { |
3476 | /* Overflow possible, we know nothing */ | 3511 | /* Overflow possible, we know nothing */ |
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index d6361776dc5c..1fb6fd68b9c7 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c | |||
@@ -378,6 +378,8 @@ void __init swiotlb_exit(void) | |||
378 | memblock_free_late(io_tlb_start, | 378 | memblock_free_late(io_tlb_start, |
379 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); | 379 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); |
380 | } | 380 | } |
381 | io_tlb_start = 0; | ||
382 | io_tlb_end = 0; | ||
381 | io_tlb_nslabs = 0; | 383 | io_tlb_nslabs = 0; |
382 | max_segment = 0; | 384 | max_segment = 0; |
383 | } | 385 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index 2d14979577ee..3fb7be001964 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -307,7 +307,7 @@ void rcuwait_wake_up(struct rcuwait *w) | |||
307 | * MB (A) MB (B) | 307 | * MB (A) MB (B) |
308 | * [L] cond [L] tsk | 308 | * [L] cond [L] tsk |
309 | */ | 309 | */ |
310 | smp_rmb(); /* (B) */ | 310 | smp_mb(); /* (B) */ |
311 | 311 | ||
312 | /* | 312 | /* |
313 | * Avoid using task_rcu_dereference() magic as long as we are careful, | 313 | * Avoid using task_rcu_dereference() magic as long as we are careful, |
@@ -866,6 +866,7 @@ void __noreturn do_exit(long code) | |||
866 | exit_task_namespaces(tsk); | 866 | exit_task_namespaces(tsk); |
867 | exit_task_work(tsk); | 867 | exit_task_work(tsk); |
868 | exit_thread(tsk); | 868 | exit_thread(tsk); |
869 | exit_umh(tsk); | ||
869 | 870 | ||
870 | /* | 871 | /* |
871 | * Flush inherited counters to the parent - before the parent | 872 | * Flush inherited counters to the parent - before the parent |
diff --git a/kernel/fork.c b/kernel/fork.c index a60459947f18..b69248e6f0e0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -217,6 +217,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) | |||
217 | memset(s->addr, 0, THREAD_SIZE); | 217 | memset(s->addr, 0, THREAD_SIZE); |
218 | 218 | ||
219 | tsk->stack_vm_area = s; | 219 | tsk->stack_vm_area = s; |
220 | tsk->stack = s->addr; | ||
220 | return s->addr; | 221 | return s->addr; |
221 | } | 222 | } |
222 | 223 | ||
@@ -1833,8 +1834,6 @@ static __latent_entropy struct task_struct *copy_process( | |||
1833 | 1834 | ||
1834 | posix_cpu_timers_init(p); | 1835 | posix_cpu_timers_init(p); |
1835 | 1836 | ||
1836 | p->start_time = ktime_get_ns(); | ||
1837 | p->real_start_time = ktime_get_boot_ns(); | ||
1838 | p->io_context = NULL; | 1837 | p->io_context = NULL; |
1839 | audit_set_context(p, NULL); | 1838 | audit_set_context(p, NULL); |
1840 | cgroup_fork(p); | 1839 | cgroup_fork(p); |
@@ -2001,6 +2000,17 @@ static __latent_entropy struct task_struct *copy_process( | |||
2001 | goto bad_fork_free_pid; | 2000 | goto bad_fork_free_pid; |
2002 | 2001 | ||
2003 | /* | 2002 | /* |
2003 | * From this point on we must avoid any synchronous user-space | ||
2004 | * communication until we take the tasklist-lock. In particular, we do | ||
2005 | * not want user-space to be able to predict the process start-time by | ||
2006 | * stalling fork(2) after we recorded the start_time but before it is | ||
2007 | * visible to the system. | ||
2008 | */ | ||
2009 | |||
2010 | p->start_time = ktime_get_ns(); | ||
2011 | p->real_start_time = ktime_get_boot_ns(); | ||
2012 | |||
2013 | /* | ||
2004 | * Make it visible to the rest of the system, but dont wake it up yet. | 2014 | * Make it visible to the rest of the system, but dont wake it up yet. |
2005 | * Need tasklist lock for parent etc handling! | 2015 | * Need tasklist lock for parent etc handling! |
2006 | */ | 2016 | */ |
diff --git a/kernel/futex.c b/kernel/futex.c index be3bff2315ff..fdd312da0992 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1452,11 +1452,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) | |||
1452 | if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) | 1452 | if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) |
1453 | return; | 1453 | return; |
1454 | 1454 | ||
1455 | /* | 1455 | get_task_struct(p); |
1456 | * Queue the task for later wakeup for after we've released | ||
1457 | * the hb->lock. wake_q_add() grabs reference to p. | ||
1458 | */ | ||
1459 | wake_q_add(wake_q, p); | ||
1460 | __unqueue_futex(q); | 1456 | __unqueue_futex(q); |
1461 | /* | 1457 | /* |
1462 | * The waiting task can free the futex_q as soon as q->lock_ptr = NULL | 1458 | * The waiting task can free the futex_q as soon as q->lock_ptr = NULL |
@@ -1466,6 +1462,13 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) | |||
1466 | * plist_del in __unqueue_futex(). | 1462 | * plist_del in __unqueue_futex(). |
1467 | */ | 1463 | */ |
1468 | smp_store_release(&q->lock_ptr, NULL); | 1464 | smp_store_release(&q->lock_ptr, NULL); |
1465 | |||
1466 | /* | ||
1467 | * Queue the task for later wakeup for after we've released | ||
1468 | * the hb->lock. wake_q_add() grabs reference to p. | ||
1469 | */ | ||
1470 | wake_q_add(wake_q, p); | ||
1471 | put_task_struct(p); | ||
1469 | } | 1472 | } |
1470 | 1473 | ||
1471 | /* | 1474 | /* |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index ee062b7939d3..ef8ad36cadcf 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -457,7 +457,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, | |||
457 | 457 | ||
458 | /* Validate affinity mask(s) */ | 458 | /* Validate affinity mask(s) */ |
459 | if (affinity) { | 459 | if (affinity) { |
460 | for (i = 0; i < cnt; i++, i++) { | 460 | for (i = 0; i < cnt; i++) { |
461 | if (cpumask_empty(&affinity[i].mask)) | 461 | if (cpumask_empty(&affinity[i].mask)) |
462 | return -EINVAL; | 462 | return -EINVAL; |
463 | } | 463 | } |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index a4888ce4667a..84b54a17b95d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -393,6 +393,9 @@ int irq_setup_affinity(struct irq_desc *desc) | |||
393 | } | 393 | } |
394 | 394 | ||
395 | cpumask_and(&mask, cpu_online_mask, set); | 395 | cpumask_and(&mask, cpu_online_mask, set); |
396 | if (cpumask_empty(&mask)) | ||
397 | cpumask_copy(&mask, cpu_online_mask); | ||
398 | |||
396 | if (node != NUMA_NO_NODE) { | 399 | if (node != NUMA_NO_NODE) { |
397 | const struct cpumask *nodemask = cpumask_of_node(node); | 400 | const struct cpumask *nodemask = cpumask_of_node(node); |
398 | 401 | ||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 09b180063ee1..50d9af615dc4 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
@@ -198,15 +198,22 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
198 | woken++; | 198 | woken++; |
199 | tsk = waiter->task; | 199 | tsk = waiter->task; |
200 | 200 | ||
201 | wake_q_add(wake_q, tsk); | 201 | get_task_struct(tsk); |
202 | list_del(&waiter->list); | 202 | list_del(&waiter->list); |
203 | /* | 203 | /* |
204 | * Ensure that the last operation is setting the reader | 204 | * Ensure calling get_task_struct() before setting the reader |
205 | * waiter to nil such that rwsem_down_read_failed() cannot | 205 | * waiter to nil such that rwsem_down_read_failed() cannot |
206 | * race with do_exit() by always holding a reference count | 206 | * race with do_exit() by always holding a reference count |
207 | * to the task to wakeup. | 207 | * to the task to wakeup. |
208 | */ | 208 | */ |
209 | smp_store_release(&waiter->task, NULL); | 209 | smp_store_release(&waiter->task, NULL); |
210 | /* | ||
211 | * Ensure issuing the wakeup (either by us or someone else) | ||
212 | * after setting the reader waiter to nil. | ||
213 | */ | ||
214 | wake_q_add(wake_q, tsk); | ||
215 | /* wake_q_add() already take the task ref */ | ||
216 | put_task_struct(tsk); | ||
210 | } | 217 | } |
211 | 218 | ||
212 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; | 219 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a674c7db2f29..d8d76a65cfdd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -396,6 +396,18 @@ static bool set_nr_if_polling(struct task_struct *p) | |||
396 | #endif | 396 | #endif |
397 | #endif | 397 | #endif |
398 | 398 | ||
399 | /** | ||
400 | * wake_q_add() - queue a wakeup for 'later' waking. | ||
401 | * @head: the wake_q_head to add @task to | ||
402 | * @task: the task to queue for 'later' wakeup | ||
403 | * | ||
404 | * Queue a task for later wakeup, most likely by the wake_up_q() call in the | ||
405 | * same context, _HOWEVER_ this is not guaranteed, the wakeup can come | ||
406 | * instantly. | ||
407 | * | ||
408 | * This function must be used as-if it were wake_up_process(); IOW the task | ||
409 | * must be ready to be woken at this location. | ||
410 | */ | ||
399 | void wake_q_add(struct wake_q_head *head, struct task_struct *task) | 411 | void wake_q_add(struct wake_q_head *head, struct task_struct *task) |
400 | { | 412 | { |
401 | struct wake_q_node *node = &task->wake_q; | 413 | struct wake_q_node *node = &task->wake_q; |
@@ -405,10 +417,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) | |||
405 | * its already queued (either by us or someone else) and will get the | 417 | * its already queued (either by us or someone else) and will get the |
406 | * wakeup due to that. | 418 | * wakeup due to that. |
407 | * | 419 | * |
408 | * This cmpxchg() executes a full barrier, which pairs with the full | 420 | * In order to ensure that a pending wakeup will observe our pending |
409 | * barrier executed by the wakeup in wake_up_q(). | 421 | * state, even in the failed case, an explicit smp_mb() must be used. |
410 | */ | 422 | */ |
411 | if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) | 423 | smp_mb__before_atomic(); |
424 | if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) | ||
412 | return; | 425 | return; |
413 | 426 | ||
414 | get_task_struct(task); | 427 | get_task_struct(task); |
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d7f538847b84..e815781ed751 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c | |||
@@ -976,6 +976,9 @@ static int seccomp_notify_release(struct inode *inode, struct file *file) | |||
976 | struct seccomp_filter *filter = file->private_data; | 976 | struct seccomp_filter *filter = file->private_data; |
977 | struct seccomp_knotif *knotif; | 977 | struct seccomp_knotif *knotif; |
978 | 978 | ||
979 | if (!filter) | ||
980 | return 0; | ||
981 | |||
979 | mutex_lock(&filter->notify_lock); | 982 | mutex_lock(&filter->notify_lock); |
980 | 983 | ||
981 | /* | 984 | /* |
@@ -1300,6 +1303,7 @@ out: | |||
1300 | out_put_fd: | 1303 | out_put_fd: |
1301 | if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { | 1304 | if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) { |
1302 | if (ret < 0) { | 1305 | if (ret < 0) { |
1306 | listener_f->private_data = NULL; | ||
1303 | fput(listener_f); | 1307 | fput(listener_f); |
1304 | put_unused_fd(listener); | 1308 | put_unused_fd(listener); |
1305 | } else { | 1309 | } else { |
diff --git a/kernel/sys.c b/kernel/sys.c index a48cbf1414b8..f7eb62eceb24 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1207,7 +1207,8 @@ DECLARE_RWSEM(uts_sem); | |||
1207 | /* | 1207 | /* |
1208 | * Work around broken programs that cannot handle "Linux 3.0". | 1208 | * Work around broken programs that cannot handle "Linux 3.0". |
1209 | * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 | 1209 | * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 |
1210 | * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60. | 1210 | * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be |
1211 | * 2.6.60. | ||
1211 | */ | 1212 | */ |
1212 | static int override_release(char __user *release, size_t len) | 1213 | static int override_release(char __user *release, size_t len) |
1213 | { | 1214 | { |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 8f0644af40be..80f955210861 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |||
685 | * set up the signal and overrun bookkeeping. | 685 | * set up the signal and overrun bookkeeping. |
686 | */ | 686 | */ |
687 | timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); | 687 | timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); |
688 | timer->it_interval = ns_to_ktime(timer->it.cpu.incr); | ||
688 | 689 | ||
689 | /* | 690 | /* |
690 | * This acts as a modification timestamp for the timer, | 691 | * This acts as a modification timestamp for the timer, |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5c19b8c41c7e..d5fb09ebba8b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -607,11 +607,17 @@ static int trace_kprobe_create(int argc, const char *argv[]) | |||
607 | char buf[MAX_EVENT_NAME_LEN]; | 607 | char buf[MAX_EVENT_NAME_LEN]; |
608 | unsigned int flags = TPARG_FL_KERNEL; | 608 | unsigned int flags = TPARG_FL_KERNEL; |
609 | 609 | ||
610 | /* argc must be >= 1 */ | 610 | switch (argv[0][0]) { |
611 | if (argv[0][0] == 'r') { | 611 | case 'r': |
612 | is_return = true; | 612 | is_return = true; |
613 | flags |= TPARG_FL_RETURN; | 613 | flags |= TPARG_FL_RETURN; |
614 | } else if (argv[0][0] != 'p' || argc < 2) | 614 | break; |
615 | case 'p': | ||
616 | break; | ||
617 | default: | ||
618 | return -ECANCELED; | ||
619 | } | ||
620 | if (argc < 2) | ||
615 | return -ECANCELED; | 621 | return -ECANCELED; |
616 | 622 | ||
617 | event = strchr(&argv[0][1], ':'); | 623 | event = strchr(&argv[0][1], ':'); |
diff --git a/kernel/umh.c b/kernel/umh.c index 0baa672e023c..d937cbad903a 100644 --- a/kernel/umh.c +++ b/kernel/umh.c | |||
@@ -37,6 +37,8 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; | |||
37 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; | 37 | static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; |
38 | static DEFINE_SPINLOCK(umh_sysctl_lock); | 38 | static DEFINE_SPINLOCK(umh_sysctl_lock); |
39 | static DECLARE_RWSEM(umhelper_sem); | 39 | static DECLARE_RWSEM(umhelper_sem); |
40 | static LIST_HEAD(umh_list); | ||
41 | static DEFINE_MUTEX(umh_list_lock); | ||
40 | 42 | ||
41 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) | 43 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) |
42 | { | 44 | { |
@@ -100,10 +102,12 @@ static int call_usermodehelper_exec_async(void *data) | |||
100 | commit_creds(new); | 102 | commit_creds(new); |
101 | 103 | ||
102 | sub_info->pid = task_pid_nr(current); | 104 | sub_info->pid = task_pid_nr(current); |
103 | if (sub_info->file) | 105 | if (sub_info->file) { |
104 | retval = do_execve_file(sub_info->file, | 106 | retval = do_execve_file(sub_info->file, |
105 | sub_info->argv, sub_info->envp); | 107 | sub_info->argv, sub_info->envp); |
106 | else | 108 | if (!retval) |
109 | current->flags |= PF_UMH; | ||
110 | } else | ||
107 | retval = do_execve(getname_kernel(sub_info->path), | 111 | retval = do_execve(getname_kernel(sub_info->path), |
108 | (const char __user *const __user *)sub_info->argv, | 112 | (const char __user *const __user *)sub_info->argv, |
109 | (const char __user *const __user *)sub_info->envp); | 113 | (const char __user *const __user *)sub_info->envp); |
@@ -517,6 +521,11 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info) | |||
517 | goto out; | 521 | goto out; |
518 | 522 | ||
519 | err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); | 523 | err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); |
524 | if (!err) { | ||
525 | mutex_lock(&umh_list_lock); | ||
526 | list_add(&info->list, &umh_list); | ||
527 | mutex_unlock(&umh_list_lock); | ||
528 | } | ||
520 | out: | 529 | out: |
521 | fput(file); | 530 | fput(file); |
522 | return err; | 531 | return err; |
@@ -679,6 +688,26 @@ static int proc_cap_handler(struct ctl_table *table, int write, | |||
679 | return 0; | 688 | return 0; |
680 | } | 689 | } |
681 | 690 | ||
691 | void __exit_umh(struct task_struct *tsk) | ||
692 | { | ||
693 | struct umh_info *info; | ||
694 | pid_t pid = tsk->pid; | ||
695 | |||
696 | mutex_lock(&umh_list_lock); | ||
697 | list_for_each_entry(info, &umh_list, list) { | ||
698 | if (info->pid == pid) { | ||
699 | list_del(&info->list); | ||
700 | mutex_unlock(&umh_list_lock); | ||
701 | goto out; | ||
702 | } | ||
703 | } | ||
704 | mutex_unlock(&umh_list_lock); | ||
705 | return; | ||
706 | out: | ||
707 | if (info->cleanup) | ||
708 | info->cleanup(info); | ||
709 | } | ||
710 | |||
682 | struct ctl_table usermodehelper_table[] = { | 711 | struct ctl_table usermodehelper_table[] = { |
683 | { | 712 | { |
684 | .procname = "bset", | 713 | .procname = "bset", |