diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/arraymap.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/devmap.c | 10 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
| -rw-r--r-- | kernel/bpf/sockmap.c | 28 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 65 | ||||
| -rw-r--r-- | kernel/cpu.c | 5 | ||||
| -rw-r--r-- | kernel/exit.c | 4 | ||||
| -rw-r--r-- | kernel/irq/generic-chip.c | 15 | ||||
| -rw-r--r-- | kernel/workqueue.c | 37 |
9 files changed, 111 insertions, 59 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98c0f00c3f5e..e2636737b69b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c | |||
| @@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |||
| 98 | array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); | 98 | array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); |
| 99 | 99 | ||
| 100 | if (array_size >= U32_MAX - PAGE_SIZE || | 100 | if (array_size >= U32_MAX - PAGE_SIZE || |
| 101 | elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { | 101 | bpf_array_alloc_percpu(array)) { |
| 102 | bpf_map_area_free(array); | 102 | bpf_map_area_free(array); |
| 103 | return ERR_PTR(-ENOMEM); | 103 | return ERR_PTR(-ENOMEM); |
| 104 | } | 104 | } |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e093d9a2c4dd..e745d6a88224 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
| @@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list); | |||
| 69 | 69 | ||
| 70 | static u64 dev_map_bitmap_size(const union bpf_attr *attr) | 70 | static u64 dev_map_bitmap_size(const union bpf_attr *attr) |
| 71 | { | 71 | { |
| 72 | return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); | 72 | return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
| @@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
| 78 | int err = -EINVAL; | 78 | int err = -EINVAL; |
| 79 | u64 cost; | 79 | u64 cost; |
| 80 | 80 | ||
| 81 | if (!capable(CAP_NET_ADMIN)) | ||
| 82 | return ERR_PTR(-EPERM); | ||
| 83 | |||
| 81 | /* check sanity of attributes */ | 84 | /* check sanity of attributes */ |
| 82 | if (attr->max_entries == 0 || attr->key_size != 4 || | 85 | if (attr->max_entries == 0 || attr->key_size != 4 || |
| 83 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) | 86 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) |
| @@ -111,8 +114,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
| 111 | err = -ENOMEM; | 114 | err = -ENOMEM; |
| 112 | 115 | ||
| 113 | /* A per cpu bitfield with a bit per possible net device */ | 116 | /* A per cpu bitfield with a bit per possible net device */ |
| 114 | dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), | 117 | dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), |
| 115 | __alignof__(unsigned long)); | 118 | __alignof__(unsigned long), |
| 119 | GFP_KERNEL | __GFP_NOWARN); | ||
| 116 | if (!dtab->flush_needed) | 120 | if (!dtab->flush_needed) |
| 117 | goto free_dtab; | 121 | goto free_dtab; |
| 118 | 122 | ||
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 431126f31ea3..6533f08d1238 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
| @@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
| 317 | */ | 317 | */ |
| 318 | goto free_htab; | 318 | goto free_htab; |
| 319 | 319 | ||
| 320 | if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE) | ||
| 321 | /* make sure the size for pcpu_alloc() is reasonable */ | ||
| 322 | goto free_htab; | ||
| 323 | |||
| 324 | htab->elem_size = sizeof(struct htab_elem) + | 320 | htab->elem_size = sizeof(struct htab_elem) + |
| 325 | round_up(htab->map.key_size, 8); | 321 | round_up(htab->map.key_size, 8); |
| 326 | if (percpu) | 322 | if (percpu) |
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..2b6eb35ae5d3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/workqueue.h> | 39 | #include <linux/workqueue.h> |
| 40 | #include <linux/list.h> | 40 | #include <linux/list.h> |
| 41 | #include <net/strparser.h> | 41 | #include <net/strparser.h> |
| 42 | #include <net/tcp.h> | ||
| 42 | 43 | ||
| 43 | struct bpf_stab { | 44 | struct bpf_stab { |
| 44 | struct bpf_map map; | 45 | struct bpf_map map; |
| @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) | |||
| 101 | return SK_DROP; | 102 | return SK_DROP; |
| 102 | 103 | ||
| 103 | skb_orphan(skb); | 104 | skb_orphan(skb); |
| 105 | /* We need to ensure that BPF metadata for maps is also cleared | ||
| 106 | * when we orphan the skb so that we don't have the possibility | ||
| 107 | * to reference a stale map. | ||
| 108 | */ | ||
| 109 | TCP_SKB_CB(skb)->bpf.map = NULL; | ||
| 104 | skb->sk = psock->sock; | 110 | skb->sk = psock->sock; |
| 105 | bpf_compute_data_end(skb); | 111 | bpf_compute_data_end(skb); |
| 112 | preempt_disable(); | ||
| 106 | rc = (*prog->bpf_func)(skb, prog->insnsi); | 113 | rc = (*prog->bpf_func)(skb, prog->insnsi); |
| 114 | preempt_enable(); | ||
| 107 | skb->sk = NULL; | 115 | skb->sk = NULL; |
| 108 | 116 | ||
| 109 | return rc; | 117 | return rc; |
| @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
| 114 | struct sock *sk; | 122 | struct sock *sk; |
| 115 | int rc; | 123 | int rc; |
| 116 | 124 | ||
| 117 | /* Because we use per cpu values to feed input from sock redirect | ||
| 118 | * in BPF program to do_sk_redirect_map() call we need to ensure we | ||
| 119 | * are not preempted. RCU read lock is not sufficient in this case | ||
| 120 | * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. | ||
| 121 | */ | ||
| 122 | preempt_disable(); | ||
| 123 | rc = smap_verdict_func(psock, skb); | 125 | rc = smap_verdict_func(psock, skb); |
| 124 | switch (rc) { | 126 | switch (rc) { |
| 125 | case SK_REDIRECT: | 127 | case SK_REDIRECT: |
| 126 | sk = do_sk_redirect_map(); | 128 | sk = do_sk_redirect_map(skb); |
| 127 | preempt_enable(); | ||
| 128 | if (likely(sk)) { | 129 | if (likely(sk)) { |
| 129 | struct smap_psock *peer = smap_psock_sk(sk); | 130 | struct smap_psock *peer = smap_psock_sk(sk); |
| 130 | 131 | ||
| @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
| 141 | /* Fall through and free skb otherwise */ | 142 | /* Fall through and free skb otherwise */ |
| 142 | case SK_DROP: | 143 | case SK_DROP: |
| 143 | default: | 144 | default: |
| 144 | if (rc != SK_REDIRECT) | ||
| 145 | preempt_enable(); | ||
| 146 | kfree_skb(skb); | 145 | kfree_skb(skb); |
| 147 | } | 146 | } |
| 148 | } | 147 | } |
| @@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) | |||
| 487 | int err = -EINVAL; | 486 | int err = -EINVAL; |
| 488 | u64 cost; | 487 | u64 cost; |
| 489 | 488 | ||
| 489 | if (!capable(CAP_NET_ADMIN)) | ||
| 490 | return ERR_PTR(-EPERM); | ||
| 491 | |||
| 490 | /* check sanity of attributes */ | 492 | /* check sanity of attributes */ |
| 491 | if (attr->max_entries == 0 || attr->key_size != 4 || | 493 | if (attr->max_entries == 0 || attr->key_size != 4 || |
| 492 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) | 494 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) |
| @@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map, | |||
| 840 | return -EINVAL; | 842 | return -EINVAL; |
| 841 | } | 843 | } |
| 842 | 844 | ||
| 845 | if (skops.sk->sk_type != SOCK_STREAM || | ||
| 846 | skops.sk->sk_protocol != IPPROTO_TCP) { | ||
| 847 | fput(socket->file); | ||
| 848 | return -EOPNOTSUPP; | ||
| 849 | } | ||
| 850 | |||
| 843 | err = sock_map_ctx_update_elem(&skops, map, key, flags); | 851 | err = sock_map_ctx_update_elem(&skops, map, key, flags); |
| 844 | fput(socket->file); | 852 | fput(socket->file); |
| 845 | return err; | 853 | return err; |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b8d6ba39e23..c48ca2a34b5e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
| 1116 | /* ctx accesses must be at a fixed offset, so that we can | 1116 | /* ctx accesses must be at a fixed offset, so that we can |
| 1117 | * determine what type of data were returned. | 1117 | * determine what type of data were returned. |
| 1118 | */ | 1118 | */ |
| 1119 | if (!tnum_is_const(reg->var_off)) { | 1119 | if (reg->off) { |
| 1120 | verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", | ||
| 1121 | regno, reg->off, off - reg->off); | ||
| 1122 | return -EACCES; | ||
| 1123 | } | ||
| 1124 | if (!tnum_is_const(reg->var_off) || reg->var_off.value) { | ||
| 1120 | char tn_buf[48]; | 1125 | char tn_buf[48]; |
| 1121 | 1126 | ||
| 1122 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); | 1127 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
| @@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
| 1124 | tn_buf, off, size); | 1129 | tn_buf, off, size); |
| 1125 | return -EACCES; | 1130 | return -EACCES; |
| 1126 | } | 1131 | } |
| 1127 | off += reg->var_off.value; | ||
| 1128 | err = check_ctx_access(env, insn_idx, off, size, t, ®_type); | 1132 | err = check_ctx_access(env, insn_idx, off, size, t, ®_type); |
| 1129 | if (!err && t == BPF_READ && value_regno >= 0) { | 1133 | if (!err && t == BPF_READ && value_regno >= 0) { |
| 1130 | /* ctx access returns either a scalar, or a | 1134 | /* ctx access returns either a scalar, or a |
| @@ -2426,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
| 2426 | } | 2430 | } |
| 2427 | 2431 | ||
| 2428 | static void find_good_pkt_pointers(struct bpf_verifier_state *state, | 2432 | static void find_good_pkt_pointers(struct bpf_verifier_state *state, |
| 2429 | struct bpf_reg_state *dst_reg) | 2433 | struct bpf_reg_state *dst_reg, |
| 2434 | bool range_right_open) | ||
| 2430 | { | 2435 | { |
| 2431 | struct bpf_reg_state *regs = state->regs, *reg; | 2436 | struct bpf_reg_state *regs = state->regs, *reg; |
| 2437 | u16 new_range; | ||
| 2432 | int i; | 2438 | int i; |
| 2433 | 2439 | ||
| 2434 | if (dst_reg->off < 0) | 2440 | if (dst_reg->off < 0 || |
| 2441 | (dst_reg->off == 0 && range_right_open)) | ||
| 2435 | /* This doesn't give us any range */ | 2442 | /* This doesn't give us any range */ |
| 2436 | return; | 2443 | return; |
| 2437 | 2444 | ||
| @@ -2442,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
| 2442 | */ | 2449 | */ |
| 2443 | return; | 2450 | return; |
| 2444 | 2451 | ||
| 2445 | /* LLVM can generate four kind of checks: | 2452 | new_range = dst_reg->off; |
| 2453 | if (range_right_open) | ||
| 2454 | new_range--; | ||
| 2455 | |||
| 2456 | /* Examples for register markings: | ||
| 2446 | * | 2457 | * |
| 2447 | * Type 1/2: | 2458 | * pkt_data in dst register: |
| 2448 | * | 2459 | * |
| 2449 | * r2 = r3; | 2460 | * r2 = r3; |
| 2450 | * r2 += 8; | 2461 | * r2 += 8; |
| @@ -2461,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
| 2461 | * r2=pkt(id=n,off=8,r=0) | 2472 | * r2=pkt(id=n,off=8,r=0) |
| 2462 | * r3=pkt(id=n,off=0,r=0) | 2473 | * r3=pkt(id=n,off=0,r=0) |
| 2463 | * | 2474 | * |
| 2464 | * Type 3/4: | 2475 | * pkt_data in src register: |
| 2465 | * | 2476 | * |
| 2466 | * r2 = r3; | 2477 | * r2 = r3; |
| 2467 | * r2 += 8; | 2478 | * r2 += 8; |
| @@ -2479,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
| 2479 | * r3=pkt(id=n,off=0,r=0) | 2490 | * r3=pkt(id=n,off=0,r=0) |
| 2480 | * | 2491 | * |
| 2481 | * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) | 2492 | * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) |
| 2482 | * so that range of bytes [r3, r3 + 8) is safe to access. | 2493 | * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) |
| 2494 | * and [r3, r3 + 8-1) respectively is safe to access depending on | ||
| 2495 | * the check. | ||
| 2483 | */ | 2496 | */ |
| 2484 | 2497 | ||
| 2485 | /* If our ids match, then we must have the same max_value. And we | 2498 | /* If our ids match, then we must have the same max_value. And we |
| @@ -2490,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
| 2490 | for (i = 0; i < MAX_BPF_REG; i++) | 2503 | for (i = 0; i < MAX_BPF_REG; i++) |
| 2491 | if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) | 2504 | if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) |
| 2492 | /* keep the maximum range already checked */ | 2505 | /* keep the maximum range already checked */ |
| 2493 | regs[i].range = max_t(u16, regs[i].range, dst_reg->off); | 2506 | regs[i].range = max(regs[i].range, new_range); |
| 2494 | 2507 | ||
| 2495 | for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { | 2508 | for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { |
| 2496 | if (state->stack_slot_type[i] != STACK_SPILL) | 2509 | if (state->stack_slot_type[i] != STACK_SPILL) |
| 2497 | continue; | 2510 | continue; |
| 2498 | reg = &state->spilled_regs[i / BPF_REG_SIZE]; | 2511 | reg = &state->spilled_regs[i / BPF_REG_SIZE]; |
| 2499 | if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) | 2512 | if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) |
| 2500 | reg->range = max_t(u16, reg->range, dst_reg->off); | 2513 | reg->range = max(reg->range, new_range); |
| 2501 | } | 2514 | } |
| 2502 | } | 2515 | } |
| 2503 | 2516 | ||
| @@ -2861,19 +2874,43 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, | |||
| 2861 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && | 2874 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && |
| 2862 | dst_reg->type == PTR_TO_PACKET && | 2875 | dst_reg->type == PTR_TO_PACKET && |
| 2863 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | 2876 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { |
| 2864 | find_good_pkt_pointers(this_branch, dst_reg); | 2877 | /* pkt_data' > pkt_end */ |
| 2878 | find_good_pkt_pointers(this_branch, dst_reg, false); | ||
| 2879 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && | ||
| 2880 | dst_reg->type == PTR_TO_PACKET_END && | ||
| 2881 | regs[insn->src_reg].type == PTR_TO_PACKET) { | ||
| 2882 | /* pkt_end > pkt_data' */ | ||
| 2883 | find_good_pkt_pointers(other_branch, ®s[insn->src_reg], true); | ||
| 2865 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && | 2884 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && |
| 2866 | dst_reg->type == PTR_TO_PACKET && | 2885 | dst_reg->type == PTR_TO_PACKET && |
| 2867 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | 2886 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { |
| 2868 | find_good_pkt_pointers(other_branch, dst_reg); | 2887 | /* pkt_data' < pkt_end */ |
| 2888 | find_good_pkt_pointers(other_branch, dst_reg, true); | ||
| 2889 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && | ||
| 2890 | dst_reg->type == PTR_TO_PACKET_END && | ||
| 2891 | regs[insn->src_reg].type == PTR_TO_PACKET) { | ||
| 2892 | /* pkt_end < pkt_data' */ | ||
| 2893 | find_good_pkt_pointers(this_branch, ®s[insn->src_reg], false); | ||
| 2894 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && | ||
| 2895 | dst_reg->type == PTR_TO_PACKET && | ||
| 2896 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | ||
| 2897 | /* pkt_data' >= pkt_end */ | ||
| 2898 | find_good_pkt_pointers(this_branch, dst_reg, true); | ||
| 2869 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && | 2899 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && |
| 2870 | dst_reg->type == PTR_TO_PACKET_END && | 2900 | dst_reg->type == PTR_TO_PACKET_END && |
| 2871 | regs[insn->src_reg].type == PTR_TO_PACKET) { | 2901 | regs[insn->src_reg].type == PTR_TO_PACKET) { |
| 2872 | find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); | 2902 | /* pkt_end >= pkt_data' */ |
| 2903 | find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); | ||
| 2904 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && | ||
| 2905 | dst_reg->type == PTR_TO_PACKET && | ||
| 2906 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | ||
| 2907 | /* pkt_data' <= pkt_end */ | ||
| 2908 | find_good_pkt_pointers(other_branch, dst_reg, false); | ||
| 2873 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && | 2909 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && |
| 2874 | dst_reg->type == PTR_TO_PACKET_END && | 2910 | dst_reg->type == PTR_TO_PACKET_END && |
| 2875 | regs[insn->src_reg].type == PTR_TO_PACKET) { | 2911 | regs[insn->src_reg].type == PTR_TO_PACKET) { |
| 2876 | find_good_pkt_pointers(this_branch, ®s[insn->src_reg]); | 2912 | /* pkt_end <= pkt_data' */ |
| 2913 | find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); | ||
| 2877 | } else if (is_pointer_value(env, insn->dst_reg)) { | 2914 | } else if (is_pointer_value(env, insn->dst_reg)) { |
| 2878 | verbose("R%d pointer comparison prohibited\n", insn->dst_reg); | 2915 | verbose("R%d pointer comparison prohibited\n", insn->dst_reg); |
| 2879 | return -EACCES; | 2916 | return -EACCES; |
diff --git a/kernel/cpu.c b/kernel/cpu.c index d851df22f5c5..04892a82f6ac 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, | |||
| 632 | __cpuhp_kick_ap(st); | 632 | __cpuhp_kick_ap(st); |
| 633 | } | 633 | } |
| 634 | 634 | ||
| 635 | /* | ||
| 636 | * Clean up the leftovers so the next hotplug operation wont use stale | ||
| 637 | * data. | ||
| 638 | */ | ||
| 639 | st->node = st->last = NULL; | ||
| 635 | return ret; | 640 | return ret; |
| 636 | } | 641 | } |
| 637 | 642 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index cf28528842bc..f6cad39f35df 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -1611,7 +1611,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
| 1611 | return err; | 1611 | return err; |
| 1612 | 1612 | ||
| 1613 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) | 1613 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) |
| 1614 | goto Efault; | 1614 | return -EFAULT; |
| 1615 | 1615 | ||
| 1616 | user_access_begin(); | 1616 | user_access_begin(); |
| 1617 | unsafe_put_user(signo, &infop->si_signo, Efault); | 1617 | unsafe_put_user(signo, &infop->si_signo, Efault); |
| @@ -1739,7 +1739,7 @@ COMPAT_SYSCALL_DEFINE5(waitid, | |||
| 1739 | return err; | 1739 | return err; |
| 1740 | 1740 | ||
| 1741 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) | 1741 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) |
| 1742 | goto Efault; | 1742 | return -EFAULT; |
| 1743 | 1743 | ||
| 1744 | user_access_begin(); | 1744 | user_access_begin(); |
| 1745 | unsafe_put_user(signo, &infop->si_signo, Efault); | 1745 | unsafe_put_user(signo, &infop->si_signo, Efault); |
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 5270a54b9fa4..c26c5bb6b491 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
| @@ -135,17 +135,26 @@ void irq_gc_ack_clr_bit(struct irq_data *d) | |||
| 135 | } | 135 | } |
| 136 | 136 | ||
| 137 | /** | 137 | /** |
| 138 | * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt | 138 | * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt |
| 139 | * @d: irq_data | 139 | * @d: irq_data |
| 140 | * | ||
| 141 | * This generic implementation of the irq_mask_ack method is for chips | ||
| 142 | * with separate enable/disable registers instead of a single mask | ||
| 143 | * register and where a pending interrupt is acknowledged by setting a | ||
| 144 | * bit. | ||
| 145 | * | ||
| 146 | * Note: This is the only permutation currently used. Similar generic | ||
| 147 | * functions should be added here if other permutations are required. | ||
| 140 | */ | 148 | */ |
| 141 | void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) | 149 | void irq_gc_mask_disable_and_ack_set(struct irq_data *d) |
| 142 | { | 150 | { |
| 143 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); | 151 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); |
| 144 | struct irq_chip_type *ct = irq_data_get_chip_type(d); | 152 | struct irq_chip_type *ct = irq_data_get_chip_type(d); |
| 145 | u32 mask = d->mask; | 153 | u32 mask = d->mask; |
| 146 | 154 | ||
| 147 | irq_gc_lock(gc); | 155 | irq_gc_lock(gc); |
| 148 | irq_reg_writel(gc, mask, ct->regs.mask); | 156 | irq_reg_writel(gc, mask, ct->regs.disable); |
| 157 | *ct->mask_cache &= ~mask; | ||
| 149 | irq_reg_writel(gc, mask, ct->regs.ack); | 158 | irq_reg_writel(gc, mask, ct->regs.ack); |
| 150 | irq_gc_unlock(gc); | 159 | irq_gc_unlock(gc); |
| 151 | } | 160 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 64d0edf428f8..a2dccfe1acec 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -68,6 +68,7 @@ enum { | |||
| 68 | * attach_mutex to avoid changing binding state while | 68 | * attach_mutex to avoid changing binding state while |
| 69 | * worker_attach_to_pool() is in progress. | 69 | * worker_attach_to_pool() is in progress. |
| 70 | */ | 70 | */ |
| 71 | POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */ | ||
| 71 | POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ | 72 | POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ |
| 72 | 73 | ||
| 73 | /* worker flags */ | 74 | /* worker flags */ |
| @@ -165,7 +166,6 @@ struct worker_pool { | |||
| 165 | /* L: hash of busy workers */ | 166 | /* L: hash of busy workers */ |
| 166 | 167 | ||
| 167 | /* see manage_workers() for details on the two manager mutexes */ | 168 | /* see manage_workers() for details on the two manager mutexes */ |
| 168 | struct mutex manager_arb; /* manager arbitration */ | ||
| 169 | struct worker *manager; /* L: purely informational */ | 169 | struct worker *manager; /* L: purely informational */ |
| 170 | struct mutex attach_mutex; /* attach/detach exclusion */ | 170 | struct mutex attach_mutex; /* attach/detach exclusion */ |
| 171 | struct list_head workers; /* A: attached workers */ | 171 | struct list_head workers; /* A: attached workers */ |
| @@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; | |||
| 299 | 299 | ||
| 300 | static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ | 300 | static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ |
| 301 | static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ | 301 | static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ |
| 302 | static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ | ||
| 302 | 303 | ||
| 303 | static LIST_HEAD(workqueues); /* PR: list of all workqueues */ | 304 | static LIST_HEAD(workqueues); /* PR: list of all workqueues */ |
| 304 | static bool workqueue_freezing; /* PL: have wqs started freezing? */ | 305 | static bool workqueue_freezing; /* PL: have wqs started freezing? */ |
| @@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool) | |||
| 801 | /* Do we have too many workers and should some go away? */ | 802 | /* Do we have too many workers and should some go away? */ |
| 802 | static bool too_many_workers(struct worker_pool *pool) | 803 | static bool too_many_workers(struct worker_pool *pool) |
| 803 | { | 804 | { |
| 804 | bool managing = mutex_is_locked(&pool->manager_arb); | 805 | bool managing = pool->flags & POOL_MANAGER_ACTIVE; |
| 805 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ | 806 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ |
| 806 | int nr_busy = pool->nr_workers - nr_idle; | 807 | int nr_busy = pool->nr_workers - nr_idle; |
| 807 | 808 | ||
| @@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker) | |||
| 1980 | { | 1981 | { |
| 1981 | struct worker_pool *pool = worker->pool; | 1982 | struct worker_pool *pool = worker->pool; |
| 1982 | 1983 | ||
| 1983 | /* | 1984 | if (pool->flags & POOL_MANAGER_ACTIVE) |
| 1984 | * Anyone who successfully grabs manager_arb wins the arbitration | ||
| 1985 | * and becomes the manager. mutex_trylock() on pool->manager_arb | ||
| 1986 | * failure while holding pool->lock reliably indicates that someone | ||
| 1987 | * else is managing the pool and the worker which failed trylock | ||
| 1988 | * can proceed to executing work items. This means that anyone | ||
| 1989 | * grabbing manager_arb is responsible for actually performing | ||
| 1990 | * manager duties. If manager_arb is grabbed and released without | ||
| 1991 | * actual management, the pool may stall indefinitely. | ||
| 1992 | */ | ||
| 1993 | if (!mutex_trylock(&pool->manager_arb)) | ||
| 1994 | return false; | 1985 | return false; |
| 1986 | |||
| 1987 | pool->flags |= POOL_MANAGER_ACTIVE; | ||
| 1995 | pool->manager = worker; | 1988 | pool->manager = worker; |
| 1996 | 1989 | ||
| 1997 | maybe_create_worker(pool); | 1990 | maybe_create_worker(pool); |
| 1998 | 1991 | ||
| 1999 | pool->manager = NULL; | 1992 | pool->manager = NULL; |
| 2000 | mutex_unlock(&pool->manager_arb); | 1993 | pool->flags &= ~POOL_MANAGER_ACTIVE; |
| 1994 | wake_up(&wq_manager_wait); | ||
| 2001 | return true; | 1995 | return true; |
| 2002 | } | 1996 | } |
| 2003 | 1997 | ||
| @@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool) | |||
| 3248 | setup_timer(&pool->mayday_timer, pool_mayday_timeout, | 3242 | setup_timer(&pool->mayday_timer, pool_mayday_timeout, |
| 3249 | (unsigned long)pool); | 3243 | (unsigned long)pool); |
| 3250 | 3244 | ||
| 3251 | mutex_init(&pool->manager_arb); | ||
| 3252 | mutex_init(&pool->attach_mutex); | 3245 | mutex_init(&pool->attach_mutex); |
| 3253 | INIT_LIST_HEAD(&pool->workers); | 3246 | INIT_LIST_HEAD(&pool->workers); |
| 3254 | 3247 | ||
| @@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool) | |||
| 3318 | hash_del(&pool->hash_node); | 3311 | hash_del(&pool->hash_node); |
| 3319 | 3312 | ||
| 3320 | /* | 3313 | /* |
| 3321 | * Become the manager and destroy all workers. Grabbing | 3314 | * Become the manager and destroy all workers. This prevents |
| 3322 | * manager_arb prevents @pool's workers from blocking on | 3315 | * @pool's workers from blocking on attach_mutex. We're the last |
| 3323 | * attach_mutex. | 3316 | * manager and @pool gets freed with the flag set. |
| 3324 | */ | 3317 | */ |
| 3325 | mutex_lock(&pool->manager_arb); | ||
| 3326 | |||
| 3327 | spin_lock_irq(&pool->lock); | 3318 | spin_lock_irq(&pool->lock); |
| 3319 | wait_event_lock_irq(wq_manager_wait, | ||
| 3320 | !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); | ||
| 3321 | pool->flags |= POOL_MANAGER_ACTIVE; | ||
| 3322 | |||
| 3328 | while ((worker = first_idle_worker(pool))) | 3323 | while ((worker = first_idle_worker(pool))) |
| 3329 | destroy_worker(worker); | 3324 | destroy_worker(worker); |
| 3330 | WARN_ON(pool->nr_workers || pool->nr_idle); | 3325 | WARN_ON(pool->nr_workers || pool->nr_idle); |
| @@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool) | |||
| 3338 | if (pool->detach_completion) | 3333 | if (pool->detach_completion) |
| 3339 | wait_for_completion(pool->detach_completion); | 3334 | wait_for_completion(pool->detach_completion); |
| 3340 | 3335 | ||
| 3341 | mutex_unlock(&pool->manager_arb); | ||
| 3342 | |||
| 3343 | /* shut down the timers */ | 3336 | /* shut down the timers */ |
| 3344 | del_timer_sync(&pool->idle_timer); | 3337 | del_timer_sync(&pool->idle_timer); |
| 3345 | del_timer_sync(&pool->mayday_timer); | 3338 | del_timer_sync(&pool->mayday_timer); |
