aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-10-27 04:31:44 -0400
committerIngo Molnar <mingo@kernel.org>2017-10-27 04:31:44 -0400
commit6856b8e53609ee3eff7e3173e4e5d979f47d834d (patch)
treec2f9e364889fa07e20081b198a085a4ed7b26bd3 /kernel
parent57646b6fda9b751e62929c73b1e6df06b108a3c9 (diff)
parent2eece390bf68ec8f733d7e4a3ba8a5ea350082ae (diff)
Merge branch 'perf/urgent' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/devmap.c10
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/sockmap.c28
-rw-r--r--kernel/bpf/verifier.c65
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/irq/generic-chip.c15
-rw-r--r--kernel/workqueue.c37
9 files changed, 111 insertions, 59 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 98c0f00c3f5e..e2636737b69b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
98 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); 98 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
99 99
100 if (array_size >= U32_MAX - PAGE_SIZE || 100 if (array_size >= U32_MAX - PAGE_SIZE ||
101 elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { 101 bpf_array_alloc_percpu(array)) {
102 bpf_map_area_free(array); 102 bpf_map_area_free(array);
103 return ERR_PTR(-ENOMEM); 103 return ERR_PTR(-ENOMEM);
104 } 104 }
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index e093d9a2c4dd..e745d6a88224 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list);
69 69
70static u64 dev_map_bitmap_size(const union bpf_attr *attr) 70static u64 dev_map_bitmap_size(const union bpf_attr *attr)
71{ 71{
72 return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); 72 return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
73} 73}
74 74
75static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 75static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
78 int err = -EINVAL; 78 int err = -EINVAL;
79 u64 cost; 79 u64 cost;
80 80
81 if (!capable(CAP_NET_ADMIN))
82 return ERR_PTR(-EPERM);
83
81 /* check sanity of attributes */ 84 /* check sanity of attributes */
82 if (attr->max_entries == 0 || attr->key_size != 4 || 85 if (attr->max_entries == 0 || attr->key_size != 4 ||
83 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 86 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -111,8 +114,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
111 err = -ENOMEM; 114 err = -ENOMEM;
112 115
113 /* A per cpu bitfield with a bit per possible net device */ 116 /* A per cpu bitfield with a bit per possible net device */
114 dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), 117 dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
115 __alignof__(unsigned long)); 118 __alignof__(unsigned long),
119 GFP_KERNEL | __GFP_NOWARN);
116 if (!dtab->flush_needed) 120 if (!dtab->flush_needed)
117 goto free_dtab; 121 goto free_dtab;
118 122
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 431126f31ea3..6533f08d1238 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
317 */ 317 */
318 goto free_htab; 318 goto free_htab;
319 319
320 if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
321 /* make sure the size for pcpu_alloc() is reasonable */
322 goto free_htab;
323
324 htab->elem_size = sizeof(struct htab_elem) + 320 htab->elem_size = sizeof(struct htab_elem) +
325 round_up(htab->map.key_size, 8); 321 round_up(htab->map.key_size, 8);
326 if (percpu) 322 if (percpu)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 6424ce0e4969..2b6eb35ae5d3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -39,6 +39,7 @@
39#include <linux/workqueue.h> 39#include <linux/workqueue.h>
40#include <linux/list.h> 40#include <linux/list.h>
41#include <net/strparser.h> 41#include <net/strparser.h>
42#include <net/tcp.h>
42 43
43struct bpf_stab { 44struct bpf_stab {
44 struct bpf_map map; 45 struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
101 return SK_DROP; 102 return SK_DROP;
102 103
103 skb_orphan(skb); 104 skb_orphan(skb);
105 /* We need to ensure that BPF metadata for maps is also cleared
106 * when we orphan the skb so that we don't have the possibility
107 * to reference a stale map.
108 */
109 TCP_SKB_CB(skb)->bpf.map = NULL;
104 skb->sk = psock->sock; 110 skb->sk = psock->sock;
105 bpf_compute_data_end(skb); 111 bpf_compute_data_end(skb);
112 preempt_disable();
106 rc = (*prog->bpf_func)(skb, prog->insnsi); 113 rc = (*prog->bpf_func)(skb, prog->insnsi);
114 preempt_enable();
107 skb->sk = NULL; 115 skb->sk = NULL;
108 116
109 return rc; 117 return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
114 struct sock *sk; 122 struct sock *sk;
115 int rc; 123 int rc;
116 124
117 /* Because we use per cpu values to feed input from sock redirect
118 * in BPF program to do_sk_redirect_map() call we need to ensure we
119 * are not preempted. RCU read lock is not sufficient in this case
120 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
121 */
122 preempt_disable();
123 rc = smap_verdict_func(psock, skb); 125 rc = smap_verdict_func(psock, skb);
124 switch (rc) { 126 switch (rc) {
125 case SK_REDIRECT: 127 case SK_REDIRECT:
126 sk = do_sk_redirect_map(); 128 sk = do_sk_redirect_map(skb);
127 preempt_enable();
128 if (likely(sk)) { 129 if (likely(sk)) {
129 struct smap_psock *peer = smap_psock_sk(sk); 130 struct smap_psock *peer = smap_psock_sk(sk);
130 131
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
141 /* Fall through and free skb otherwise */ 142 /* Fall through and free skb otherwise */
142 case SK_DROP: 143 case SK_DROP:
143 default: 144 default:
144 if (rc != SK_REDIRECT)
145 preempt_enable();
146 kfree_skb(skb); 145 kfree_skb(skb);
147 } 146 }
148} 147}
@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
487 int err = -EINVAL; 486 int err = -EINVAL;
488 u64 cost; 487 u64 cost;
489 488
489 if (!capable(CAP_NET_ADMIN))
490 return ERR_PTR(-EPERM);
491
490 /* check sanity of attributes */ 492 /* check sanity of attributes */
491 if (attr->max_entries == 0 || attr->key_size != 4 || 493 if (attr->max_entries == 0 || attr->key_size != 4 ||
492 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 494 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
840 return -EINVAL; 842 return -EINVAL;
841 } 843 }
842 844
845 if (skops.sk->sk_type != SOCK_STREAM ||
846 skops.sk->sk_protocol != IPPROTO_TCP) {
847 fput(socket->file);
848 return -EOPNOTSUPP;
849 }
850
843 err = sock_map_ctx_update_elem(&skops, map, key, flags); 851 err = sock_map_ctx_update_elem(&skops, map, key, flags);
844 fput(socket->file); 852 fput(socket->file);
845 return err; 853 return err;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8b8d6ba39e23..c48ca2a34b5e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1116 /* ctx accesses must be at a fixed offset, so that we can 1116 /* ctx accesses must be at a fixed offset, so that we can
1117 * determine what type of data were returned. 1117 * determine what type of data were returned.
1118 */ 1118 */
1119 if (!tnum_is_const(reg->var_off)) { 1119 if (reg->off) {
1120 verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
1121 regno, reg->off, off - reg->off);
1122 return -EACCES;
1123 }
1124 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1120 char tn_buf[48]; 1125 char tn_buf[48];
1121 1126
1122 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 1127 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1124 tn_buf, off, size); 1129 tn_buf, off, size);
1125 return -EACCES; 1130 return -EACCES;
1126 } 1131 }
1127 off += reg->var_off.value;
1128 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type); 1132 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
1129 if (!err && t == BPF_READ && value_regno >= 0) { 1133 if (!err && t == BPF_READ && value_regno >= 0) {
1130 /* ctx access returns either a scalar, or a 1134 /* ctx access returns either a scalar, or a
@@ -2426,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2426} 2430}
2427 2431
2428static void find_good_pkt_pointers(struct bpf_verifier_state *state, 2432static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2429 struct bpf_reg_state *dst_reg) 2433 struct bpf_reg_state *dst_reg,
2434 bool range_right_open)
2430{ 2435{
2431 struct bpf_reg_state *regs = state->regs, *reg; 2436 struct bpf_reg_state *regs = state->regs, *reg;
2437 u16 new_range;
2432 int i; 2438 int i;
2433 2439
2434 if (dst_reg->off < 0) 2440 if (dst_reg->off < 0 ||
2441 (dst_reg->off == 0 && range_right_open))
2435 /* This doesn't give us any range */ 2442 /* This doesn't give us any range */
2436 return; 2443 return;
2437 2444
@@ -2442,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2442 */ 2449 */
2443 return; 2450 return;
2444 2451
2445 /* LLVM can generate four kind of checks: 2452 new_range = dst_reg->off;
2453 if (range_right_open)
2454 new_range--;
2455
2456 /* Examples for register markings:
2446 * 2457 *
2447 * Type 1/2: 2458 * pkt_data in dst register:
2448 * 2459 *
2449 * r2 = r3; 2460 * r2 = r3;
2450 * r2 += 8; 2461 * r2 += 8;
@@ -2461,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2461 * r2=pkt(id=n,off=8,r=0) 2472 * r2=pkt(id=n,off=8,r=0)
2462 * r3=pkt(id=n,off=0,r=0) 2473 * r3=pkt(id=n,off=0,r=0)
2463 * 2474 *
2464 * Type 3/4: 2475 * pkt_data in src register:
2465 * 2476 *
2466 * r2 = r3; 2477 * r2 = r3;
2467 * r2 += 8; 2478 * r2 += 8;
@@ -2479,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2479 * r3=pkt(id=n,off=0,r=0) 2490 * r3=pkt(id=n,off=0,r=0)
2480 * 2491 *
2481 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 2492 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
2482 * so that range of bytes [r3, r3 + 8) is safe to access. 2493 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
2494 * and [r3, r3 + 8-1) respectively is safe to access depending on
2495 * the check.
2483 */ 2496 */
2484 2497
2485 /* If our ids match, then we must have the same max_value. And we 2498 /* If our ids match, then we must have the same max_value. And we
@@ -2490,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2490 for (i = 0; i < MAX_BPF_REG; i++) 2503 for (i = 0; i < MAX_BPF_REG; i++)
2491 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) 2504 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
2492 /* keep the maximum range already checked */ 2505 /* keep the maximum range already checked */
2493 regs[i].range = max_t(u16, regs[i].range, dst_reg->off); 2506 regs[i].range = max(regs[i].range, new_range);
2494 2507
2495 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { 2508 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
2496 if (state->stack_slot_type[i] != STACK_SPILL) 2509 if (state->stack_slot_type[i] != STACK_SPILL)
2497 continue; 2510 continue;
2498 reg = &state->spilled_regs[i / BPF_REG_SIZE]; 2511 reg = &state->spilled_regs[i / BPF_REG_SIZE];
2499 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) 2512 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
2500 reg->range = max_t(u16, reg->range, dst_reg->off); 2513 reg->range = max(reg->range, new_range);
2501 } 2514 }
2502} 2515}
2503 2516
@@ -2861,19 +2874,43 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
2861 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && 2874 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
2862 dst_reg->type == PTR_TO_PACKET && 2875 dst_reg->type == PTR_TO_PACKET &&
2863 regs[insn->src_reg].type == PTR_TO_PACKET_END) { 2876 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2864 find_good_pkt_pointers(this_branch, dst_reg); 2877 /* pkt_data' > pkt_end */
2878 find_good_pkt_pointers(this_branch, dst_reg, false);
2879 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
2880 dst_reg->type == PTR_TO_PACKET_END &&
2881 regs[insn->src_reg].type == PTR_TO_PACKET) {
2882 /* pkt_end > pkt_data' */
2883 find_good_pkt_pointers(other_branch, &regs[insn->src_reg], true);
2865 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && 2884 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
2866 dst_reg->type == PTR_TO_PACKET && 2885 dst_reg->type == PTR_TO_PACKET &&
2867 regs[insn->src_reg].type == PTR_TO_PACKET_END) { 2886 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2868 find_good_pkt_pointers(other_branch, dst_reg); 2887 /* pkt_data' < pkt_end */
2888 find_good_pkt_pointers(other_branch, dst_reg, true);
2889 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
2890 dst_reg->type == PTR_TO_PACKET_END &&
2891 regs[insn->src_reg].type == PTR_TO_PACKET) {
2892 /* pkt_end < pkt_data' */
2893 find_good_pkt_pointers(this_branch, &regs[insn->src_reg], false);
2894 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
2895 dst_reg->type == PTR_TO_PACKET &&
2896 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2897 /* pkt_data' >= pkt_end */
2898 find_good_pkt_pointers(this_branch, dst_reg, true);
2869 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && 2899 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
2870 dst_reg->type == PTR_TO_PACKET_END && 2900 dst_reg->type == PTR_TO_PACKET_END &&
2871 regs[insn->src_reg].type == PTR_TO_PACKET) { 2901 regs[insn->src_reg].type == PTR_TO_PACKET) {
2872 find_good_pkt_pointers(other_branch, &regs[insn->src_reg]); 2902 /* pkt_end >= pkt_data' */
2903 find_good_pkt_pointers(other_branch, &regs[insn->src_reg], false);
2904 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
2905 dst_reg->type == PTR_TO_PACKET &&
2906 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2907 /* pkt_data' <= pkt_end */
2908 find_good_pkt_pointers(other_branch, dst_reg, false);
2873 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && 2909 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
2874 dst_reg->type == PTR_TO_PACKET_END && 2910 dst_reg->type == PTR_TO_PACKET_END &&
2875 regs[insn->src_reg].type == PTR_TO_PACKET) { 2911 regs[insn->src_reg].type == PTR_TO_PACKET) {
2876 find_good_pkt_pointers(this_branch, &regs[insn->src_reg]); 2912 /* pkt_end <= pkt_data' */
2913 find_good_pkt_pointers(this_branch, &regs[insn->src_reg], true);
2877 } else if (is_pointer_value(env, insn->dst_reg)) { 2914 } else if (is_pointer_value(env, insn->dst_reg)) {
2878 verbose("R%d pointer comparison prohibited\n", insn->dst_reg); 2915 verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
2879 return -EACCES; 2916 return -EACCES;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d851df22f5c5..04892a82f6ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
632 __cpuhp_kick_ap(st); 632 __cpuhp_kick_ap(st);
633 } 633 }
634 634
635 /*
636 * Clean up the leftovers so the next hotplug operation wont use stale
637 * data.
638 */
639 st->node = st->last = NULL;
635 return ret; 640 return ret;
636} 641}
637 642
diff --git a/kernel/exit.c b/kernel/exit.c
index cf28528842bc..f6cad39f35df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1611,7 +1611,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1611 return err; 1611 return err;
1612 1612
1613 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) 1613 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
1614 goto Efault; 1614 return -EFAULT;
1615 1615
1616 user_access_begin(); 1616 user_access_begin();
1617 unsafe_put_user(signo, &infop->si_signo, Efault); 1617 unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1739,7 +1739,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
1739 return err; 1739 return err;
1740 1740
1741 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) 1741 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
1742 goto Efault; 1742 return -EFAULT;
1743 1743
1744 user_access_begin(); 1744 user_access_begin();
1745 unsafe_put_user(signo, &infop->si_signo, Efault); 1745 unsafe_put_user(signo, &infop->si_signo, Efault);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 5270a54b9fa4..c26c5bb6b491 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -135,17 +135,26 @@ void irq_gc_ack_clr_bit(struct irq_data *d)
135} 135}
136 136
137/** 137/**
138 * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt 138 * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt
139 * @d: irq_data 139 * @d: irq_data
140 *
141 * This generic implementation of the irq_mask_ack method is for chips
142 * with separate enable/disable registers instead of a single mask
143 * register and where a pending interrupt is acknowledged by setting a
144 * bit.
145 *
146 * Note: This is the only permutation currently used. Similar generic
147 * functions should be added here if other permutations are required.
140 */ 148 */
141void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) 149void irq_gc_mask_disable_and_ack_set(struct irq_data *d)
142{ 150{
143 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); 151 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
144 struct irq_chip_type *ct = irq_data_get_chip_type(d); 152 struct irq_chip_type *ct = irq_data_get_chip_type(d);
145 u32 mask = d->mask; 153 u32 mask = d->mask;
146 154
147 irq_gc_lock(gc); 155 irq_gc_lock(gc);
148 irq_reg_writel(gc, mask, ct->regs.mask); 156 irq_reg_writel(gc, mask, ct->regs.disable);
157 *ct->mask_cache &= ~mask;
149 irq_reg_writel(gc, mask, ct->regs.ack); 158 irq_reg_writel(gc, mask, ct->regs.ack);
150 irq_gc_unlock(gc); 159 irq_gc_unlock(gc);
151} 160}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 64d0edf428f8..a2dccfe1acec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,7 @@ enum {
68 * attach_mutex to avoid changing binding state while 68 * attach_mutex to avoid changing binding state while
69 * worker_attach_to_pool() is in progress. 69 * worker_attach_to_pool() is in progress.
70 */ 70 */
71 POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
71 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ 72 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
72 73
73 /* worker flags */ 74 /* worker flags */
@@ -165,7 +166,6 @@ struct worker_pool {
165 /* L: hash of busy workers */ 166 /* L: hash of busy workers */
166 167
167 /* see manage_workers() for details on the two manager mutexes */ 168 /* see manage_workers() for details on the two manager mutexes */
168 struct mutex manager_arb; /* manager arbitration */
169 struct worker *manager; /* L: purely informational */ 169 struct worker *manager; /* L: purely informational */
170 struct mutex attach_mutex; /* attach/detach exclusion */ 170 struct mutex attach_mutex; /* attach/detach exclusion */
171 struct list_head workers; /* A: attached workers */ 171 struct list_head workers; /* A: attached workers */
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
299 299
300static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ 300static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
301static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ 301static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
302static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
302 303
303static LIST_HEAD(workqueues); /* PR: list of all workqueues */ 304static LIST_HEAD(workqueues); /* PR: list of all workqueues */
304static bool workqueue_freezing; /* PL: have wqs started freezing? */ 305static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
801/* Do we have too many workers and should some go away? */ 802/* Do we have too many workers and should some go away? */
802static bool too_many_workers(struct worker_pool *pool) 803static bool too_many_workers(struct worker_pool *pool)
803{ 804{
804 bool managing = mutex_is_locked(&pool->manager_arb); 805 bool managing = pool->flags & POOL_MANAGER_ACTIVE;
805 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ 806 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
806 int nr_busy = pool->nr_workers - nr_idle; 807 int nr_busy = pool->nr_workers - nr_idle;
807 808
@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
1980{ 1981{
1981 struct worker_pool *pool = worker->pool; 1982 struct worker_pool *pool = worker->pool;
1982 1983
1983 /* 1984 if (pool->flags & POOL_MANAGER_ACTIVE)
1984 * Anyone who successfully grabs manager_arb wins the arbitration
1985 * and becomes the manager. mutex_trylock() on pool->manager_arb
1986 * failure while holding pool->lock reliably indicates that someone
1987 * else is managing the pool and the worker which failed trylock
1988 * can proceed to executing work items. This means that anyone
1989 * grabbing manager_arb is responsible for actually performing
1990 * manager duties. If manager_arb is grabbed and released without
1991 * actual management, the pool may stall indefinitely.
1992 */
1993 if (!mutex_trylock(&pool->manager_arb))
1994 return false; 1985 return false;
1986
1987 pool->flags |= POOL_MANAGER_ACTIVE;
1995 pool->manager = worker; 1988 pool->manager = worker;
1996 1989
1997 maybe_create_worker(pool); 1990 maybe_create_worker(pool);
1998 1991
1999 pool->manager = NULL; 1992 pool->manager = NULL;
2000 mutex_unlock(&pool->manager_arb); 1993 pool->flags &= ~POOL_MANAGER_ACTIVE;
1994 wake_up(&wq_manager_wait);
2001 return true; 1995 return true;
2002} 1996}
2003 1997
@@ -3248,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
3248 setup_timer(&pool->mayday_timer, pool_mayday_timeout, 3242 setup_timer(&pool->mayday_timer, pool_mayday_timeout,
3249 (unsigned long)pool); 3243 (unsigned long)pool);
3250 3244
3251 mutex_init(&pool->manager_arb);
3252 mutex_init(&pool->attach_mutex); 3245 mutex_init(&pool->attach_mutex);
3253 INIT_LIST_HEAD(&pool->workers); 3246 INIT_LIST_HEAD(&pool->workers);
3254 3247
@@ -3318,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
3318 hash_del(&pool->hash_node); 3311 hash_del(&pool->hash_node);
3319 3312
3320 /* 3313 /*
3321 * Become the manager and destroy all workers. Grabbing 3314 * Become the manager and destroy all workers. This prevents
3322 * manager_arb prevents @pool's workers from blocking on 3315 * @pool's workers from blocking on attach_mutex. We're the last
3323 * attach_mutex. 3316 * manager and @pool gets freed with the flag set.
3324 */ 3317 */
3325 mutex_lock(&pool->manager_arb);
3326
3327 spin_lock_irq(&pool->lock); 3318 spin_lock_irq(&pool->lock);
3319 wait_event_lock_irq(wq_manager_wait,
3320 !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3321 pool->flags |= POOL_MANAGER_ACTIVE;
3322
3328 while ((worker = first_idle_worker(pool))) 3323 while ((worker = first_idle_worker(pool)))
3329 destroy_worker(worker); 3324 destroy_worker(worker);
3330 WARN_ON(pool->nr_workers || pool->nr_idle); 3325 WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3338,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
3338 if (pool->detach_completion) 3333 if (pool->detach_completion)
3339 wait_for_completion(pool->detach_completion); 3334 wait_for_completion(pool->detach_completion);
3340 3335
3341 mutex_unlock(&pool->manager_arb);
3342
3343 /* shut down the timers */ 3336 /* shut down the timers */
3344 del_timer_sync(&pool->idle_timer); 3337 del_timer_sync(&pool->idle_timer);
3345 del_timer_sync(&pool->mayday_timer); 3338 del_timer_sync(&pool->mayday_timer);