diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/arraymap.c | 2 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 10 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
-rw-r--r-- | kernel/bpf/sockmap.c | 28 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 65 | ||||
-rw-r--r-- | kernel/cpu.c | 5 | ||||
-rw-r--r-- | kernel/exit.c | 4 | ||||
-rw-r--r-- | kernel/irq/generic-chip.c | 15 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 2 | ||||
-rw-r--r-- | kernel/rcu/sync.c | 9 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 18 | ||||
-rw-r--r-- | kernel/sched/membarrier.c | 34 |
12 files changed, 143 insertions, 53 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 98c0f00c3f5e..e2636737b69b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c | |||
@@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |||
98 | array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); | 98 | array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); |
99 | 99 | ||
100 | if (array_size >= U32_MAX - PAGE_SIZE || | 100 | if (array_size >= U32_MAX - PAGE_SIZE || |
101 | elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { | 101 | bpf_array_alloc_percpu(array)) { |
102 | bpf_map_area_free(array); | 102 | bpf_map_area_free(array); |
103 | return ERR_PTR(-ENOMEM); | 103 | return ERR_PTR(-ENOMEM); |
104 | } | 104 | } |
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e093d9a2c4dd..e745d6a88224 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c | |||
@@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list); | |||
69 | 69 | ||
70 | static u64 dev_map_bitmap_size(const union bpf_attr *attr) | 70 | static u64 dev_map_bitmap_size(const union bpf_attr *attr) |
71 | { | 71 | { |
72 | return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); | 72 | return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long); |
73 | } | 73 | } |
74 | 74 | ||
75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | 75 | static struct bpf_map *dev_map_alloc(union bpf_attr *attr) |
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
78 | int err = -EINVAL; | 78 | int err = -EINVAL; |
79 | u64 cost; | 79 | u64 cost; |
80 | 80 | ||
81 | if (!capable(CAP_NET_ADMIN)) | ||
82 | return ERR_PTR(-EPERM); | ||
83 | |||
81 | /* check sanity of attributes */ | 84 | /* check sanity of attributes */ |
82 | if (attr->max_entries == 0 || attr->key_size != 4 || | 85 | if (attr->max_entries == 0 || attr->key_size != 4 || |
83 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) | 86 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) |
@@ -111,8 +114,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) | |||
111 | err = -ENOMEM; | 114 | err = -ENOMEM; |
112 | 115 | ||
113 | /* A per cpu bitfield with a bit per possible net device */ | 116 | /* A per cpu bitfield with a bit per possible net device */ |
114 | dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), | 117 | dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), |
115 | __alignof__(unsigned long)); | 118 | __alignof__(unsigned long), |
119 | GFP_KERNEL | __GFP_NOWARN); | ||
116 | if (!dtab->flush_needed) | 120 | if (!dtab->flush_needed) |
117 | goto free_dtab; | 121 | goto free_dtab; |
118 | 122 | ||
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 431126f31ea3..6533f08d1238 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
317 | */ | 317 | */ |
318 | goto free_htab; | 318 | goto free_htab; |
319 | 319 | ||
320 | if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE) | ||
321 | /* make sure the size for pcpu_alloc() is reasonable */ | ||
322 | goto free_htab; | ||
323 | |||
324 | htab->elem_size = sizeof(struct htab_elem) + | 320 | htab->elem_size = sizeof(struct htab_elem) + |
325 | round_up(htab->map.key_size, 8); | 321 | round_up(htab->map.key_size, 8); |
326 | if (percpu) | 322 | if (percpu) |
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..2b6eb35ae5d3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/workqueue.h> | 39 | #include <linux/workqueue.h> |
40 | #include <linux/list.h> | 40 | #include <linux/list.h> |
41 | #include <net/strparser.h> | 41 | #include <net/strparser.h> |
42 | #include <net/tcp.h> | ||
42 | 43 | ||
43 | struct bpf_stab { | 44 | struct bpf_stab { |
44 | struct bpf_map map; | 45 | struct bpf_map map; |
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) | |||
101 | return SK_DROP; | 102 | return SK_DROP; |
102 | 103 | ||
103 | skb_orphan(skb); | 104 | skb_orphan(skb); |
105 | /* We need to ensure that BPF metadata for maps is also cleared | ||
106 | * when we orphan the skb so that we don't have the possibility | ||
107 | * to reference a stale map. | ||
108 | */ | ||
109 | TCP_SKB_CB(skb)->bpf.map = NULL; | ||
104 | skb->sk = psock->sock; | 110 | skb->sk = psock->sock; |
105 | bpf_compute_data_end(skb); | 111 | bpf_compute_data_end(skb); |
112 | preempt_disable(); | ||
106 | rc = (*prog->bpf_func)(skb, prog->insnsi); | 113 | rc = (*prog->bpf_func)(skb, prog->insnsi); |
114 | preempt_enable(); | ||
107 | skb->sk = NULL; | 115 | skb->sk = NULL; |
108 | 116 | ||
109 | return rc; | 117 | return rc; |
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
114 | struct sock *sk; | 122 | struct sock *sk; |
115 | int rc; | 123 | int rc; |
116 | 124 | ||
117 | /* Because we use per cpu values to feed input from sock redirect | ||
118 | * in BPF program to do_sk_redirect_map() call we need to ensure we | ||
119 | * are not preempted. RCU read lock is not sufficient in this case | ||
120 | * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. | ||
121 | */ | ||
122 | preempt_disable(); | ||
123 | rc = smap_verdict_func(psock, skb); | 125 | rc = smap_verdict_func(psock, skb); |
124 | switch (rc) { | 126 | switch (rc) { |
125 | case SK_REDIRECT: | 127 | case SK_REDIRECT: |
126 | sk = do_sk_redirect_map(); | 128 | sk = do_sk_redirect_map(skb); |
127 | preempt_enable(); | ||
128 | if (likely(sk)) { | 129 | if (likely(sk)) { |
129 | struct smap_psock *peer = smap_psock_sk(sk); | 130 | struct smap_psock *peer = smap_psock_sk(sk); |
130 | 131 | ||
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
141 | /* Fall through and free skb otherwise */ | 142 | /* Fall through and free skb otherwise */ |
142 | case SK_DROP: | 143 | case SK_DROP: |
143 | default: | 144 | default: |
144 | if (rc != SK_REDIRECT) | ||
145 | preempt_enable(); | ||
146 | kfree_skb(skb); | 145 | kfree_skb(skb); |
147 | } | 146 | } |
148 | } | 147 | } |
@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) | |||
487 | int err = -EINVAL; | 486 | int err = -EINVAL; |
488 | u64 cost; | 487 | u64 cost; |
489 | 488 | ||
489 | if (!capable(CAP_NET_ADMIN)) | ||
490 | return ERR_PTR(-EPERM); | ||
491 | |||
490 | /* check sanity of attributes */ | 492 | /* check sanity of attributes */ |
491 | if (attr->max_entries == 0 || attr->key_size != 4 || | 493 | if (attr->max_entries == 0 || attr->key_size != 4 || |
492 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) | 494 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) |
@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map, | |||
840 | return -EINVAL; | 842 | return -EINVAL; |
841 | } | 843 | } |
842 | 844 | ||
845 | if (skops.sk->sk_type != SOCK_STREAM || | ||
846 | skops.sk->sk_protocol != IPPROTO_TCP) { | ||
847 | fput(socket->file); | ||
848 | return -EOPNOTSUPP; | ||
849 | } | ||
850 | |||
843 | err = sock_map_ctx_update_elem(&skops, map, key, flags); | 851 | err = sock_map_ctx_update_elem(&skops, map, key, flags); |
844 | fput(socket->file); | 852 | fput(socket->file); |
845 | return err; | 853 | return err; |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b8d6ba39e23..c48ca2a34b5e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
1116 | /* ctx accesses must be at a fixed offset, so that we can | 1116 | /* ctx accesses must be at a fixed offset, so that we can |
1117 | * determine what type of data were returned. | 1117 | * determine what type of data were returned. |
1118 | */ | 1118 | */ |
1119 | if (!tnum_is_const(reg->var_off)) { | 1119 | if (reg->off) { |
1120 | verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", | ||
1121 | regno, reg->off, off - reg->off); | ||
1122 | return -EACCES; | ||
1123 | } | ||
1124 | if (!tnum_is_const(reg->var_off) || reg->var_off.value) { | ||
1120 | char tn_buf[48]; | 1125 | char tn_buf[48]; |
1121 | 1126 | ||
1122 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); | 1127 | tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); |
@@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | |||
1124 | tn_buf, off, size); | 1129 | tn_buf, off, size); |
1125 | return -EACCES; | 1130 | return -EACCES; |
1126 | } | 1131 | } |
1127 | off += reg->var_off.value; | ||
1128 | err = check_ctx_access(env, insn_idx, off, size, t, ®_type); | 1132 | err = check_ctx_access(env, insn_idx, off, size, t, ®_type); |
1129 | if (!err && t == BPF_READ && value_regno >= 0) { | 1133 | if (!err && t == BPF_READ && value_regno >= 0) { |
1130 | /* ctx access returns either a scalar, or a | 1134 | /* ctx access returns either a scalar, or a |
@@ -2426,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) | |||
2426 | } | 2430 | } |
2427 | 2431 | ||
2428 | static void find_good_pkt_pointers(struct bpf_verifier_state *state, | 2432 | static void find_good_pkt_pointers(struct bpf_verifier_state *state, |
2429 | struct bpf_reg_state *dst_reg) | 2433 | struct bpf_reg_state *dst_reg, |
2434 | bool range_right_open) | ||
2430 | { | 2435 | { |
2431 | struct bpf_reg_state *regs = state->regs, *reg; | 2436 | struct bpf_reg_state *regs = state->regs, *reg; |
2437 | u16 new_range; | ||
2432 | int i; | 2438 | int i; |
2433 | 2439 | ||
2434 | if (dst_reg->off < 0) | 2440 | if (dst_reg->off < 0 || |
2441 | (dst_reg->off == 0 && range_right_open)) | ||
2435 | /* This doesn't give us any range */ | 2442 | /* This doesn't give us any range */ |
2436 | return; | 2443 | return; |
2437 | 2444 | ||
@@ -2442,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
2442 | */ | 2449 | */ |
2443 | return; | 2450 | return; |
2444 | 2451 | ||
2445 | /* LLVM can generate four kind of checks: | 2452 | new_range = dst_reg->off; |
2453 | if (range_right_open) | ||
2454 | new_range--; | ||
2455 | |||
2456 | /* Examples for register markings: | ||
2446 | * | 2457 | * |
2447 | * Type 1/2: | 2458 | * pkt_data in dst register: |
2448 | * | 2459 | * |
2449 | * r2 = r3; | 2460 | * r2 = r3; |
2450 | * r2 += 8; | 2461 | * r2 += 8; |
@@ -2461,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
2461 | * r2=pkt(id=n,off=8,r=0) | 2472 | * r2=pkt(id=n,off=8,r=0) |
2462 | * r3=pkt(id=n,off=0,r=0) | 2473 | * r3=pkt(id=n,off=0,r=0) |
2463 | * | 2474 | * |
2464 | * Type 3/4: | 2475 | * pkt_data in src register: |
2465 | * | 2476 | * |
2466 | * r2 = r3; | 2477 | * r2 = r3; |
2467 | * r2 += 8; | 2478 | * r2 += 8; |
@@ -2479,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
2479 | * r3=pkt(id=n,off=0,r=0) | 2490 | * r3=pkt(id=n,off=0,r=0) |
2480 | * | 2491 | * |
2481 | * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) | 2492 | * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) |
2482 | * so that range of bytes [r3, r3 + 8) is safe to access. | 2493 | * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) |
2494 | * and [r3, r3 + 8-1) respectively is safe to access depending on | ||
2495 | * the check. | ||
2483 | */ | 2496 | */ |
2484 | 2497 | ||
2485 | /* If our ids match, then we must have the same max_value. And we | 2498 | /* If our ids match, then we must have the same max_value. And we |
@@ -2490,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, | |||
2490 | for (i = 0; i < MAX_BPF_REG; i++) | 2503 | for (i = 0; i < MAX_BPF_REG; i++) |
2491 | if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) | 2504 | if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) |
2492 | /* keep the maximum range already checked */ | 2505 | /* keep the maximum range already checked */ |
2493 | regs[i].range = max_t(u16, regs[i].range, dst_reg->off); | 2506 | regs[i].range = max(regs[i].range, new_range); |
2494 | 2507 | ||
2495 | for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { | 2508 | for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { |
2496 | if (state->stack_slot_type[i] != STACK_SPILL) | 2509 | if (state->stack_slot_type[i] != STACK_SPILL) |
2497 | continue; | 2510 | continue; |
2498 | reg = &state->spilled_regs[i / BPF_REG_SIZE]; | 2511 | reg = &state->spilled_regs[i / BPF_REG_SIZE]; |
2499 | if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) | 2512 | if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) |
2500 | reg->range = max_t(u16, reg->range, dst_reg->off); | 2513 | reg->range = max(reg->range, new_range); |
2501 | } | 2514 | } |
2502 | } | 2515 | } |
2503 | 2516 | ||
@@ -2861,19 +2874,43 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, | |||
2861 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && | 2874 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && |
2862 | dst_reg->type == PTR_TO_PACKET && | 2875 | dst_reg->type == PTR_TO_PACKET && |
2863 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | 2876 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { |
2864 | find_good_pkt_pointers(this_branch, dst_reg); | 2877 | /* pkt_data' > pkt_end */ |
2878 | find_good_pkt_pointers(this_branch, dst_reg, false); | ||
2879 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && | ||
2880 | dst_reg->type == PTR_TO_PACKET_END && | ||
2881 | regs[insn->src_reg].type == PTR_TO_PACKET) { | ||
2882 | /* pkt_end > pkt_data' */ | ||
2883 | find_good_pkt_pointers(other_branch, ®s[insn->src_reg], true); | ||
2865 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && | 2884 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && |
2866 | dst_reg->type == PTR_TO_PACKET && | 2885 | dst_reg->type == PTR_TO_PACKET && |
2867 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | 2886 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { |
2868 | find_good_pkt_pointers(other_branch, dst_reg); | 2887 | /* pkt_data' < pkt_end */ |
2888 | find_good_pkt_pointers(other_branch, dst_reg, true); | ||
2889 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && | ||
2890 | dst_reg->type == PTR_TO_PACKET_END && | ||
2891 | regs[insn->src_reg].type == PTR_TO_PACKET) { | ||
2892 | /* pkt_end < pkt_data' */ | ||
2893 | find_good_pkt_pointers(this_branch, ®s[insn->src_reg], false); | ||
2894 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && | ||
2895 | dst_reg->type == PTR_TO_PACKET && | ||
2896 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | ||
2897 | /* pkt_data' >= pkt_end */ | ||
2898 | find_good_pkt_pointers(this_branch, dst_reg, true); | ||
2869 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && | 2899 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && |
2870 | dst_reg->type == PTR_TO_PACKET_END && | 2900 | dst_reg->type == PTR_TO_PACKET_END && |
2871 | regs[insn->src_reg].type == PTR_TO_PACKET) { | 2901 | regs[insn->src_reg].type == PTR_TO_PACKET) { |
2872 | find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); | 2902 | /* pkt_end >= pkt_data' */ |
2903 | find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); | ||
2904 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && | ||
2905 | dst_reg->type == PTR_TO_PACKET && | ||
2906 | regs[insn->src_reg].type == PTR_TO_PACKET_END) { | ||
2907 | /* pkt_data' <= pkt_end */ | ||
2908 | find_good_pkt_pointers(other_branch, dst_reg, false); | ||
2873 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && | 2909 | } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && |
2874 | dst_reg->type == PTR_TO_PACKET_END && | 2910 | dst_reg->type == PTR_TO_PACKET_END && |
2875 | regs[insn->src_reg].type == PTR_TO_PACKET) { | 2911 | regs[insn->src_reg].type == PTR_TO_PACKET) { |
2876 | find_good_pkt_pointers(this_branch, ®s[insn->src_reg]); | 2912 | /* pkt_end <= pkt_data' */ |
2913 | find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); | ||
2877 | } else if (is_pointer_value(env, insn->dst_reg)) { | 2914 | } else if (is_pointer_value(env, insn->dst_reg)) { |
2878 | verbose("R%d pointer comparison prohibited\n", insn->dst_reg); | 2915 | verbose("R%d pointer comparison prohibited\n", insn->dst_reg); |
2879 | return -EACCES; | 2916 | return -EACCES; |
diff --git a/kernel/cpu.c b/kernel/cpu.c index d851df22f5c5..04892a82f6ac 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, | |||
632 | __cpuhp_kick_ap(st); | 632 | __cpuhp_kick_ap(st); |
633 | } | 633 | } |
634 | 634 | ||
635 | /* | ||
636 | * Clean up the leftovers so the next hotplug operation wont use stale | ||
637 | * data. | ||
638 | */ | ||
639 | st->node = st->last = NULL; | ||
635 | return ret; | 640 | return ret; |
636 | } | 641 | } |
637 | 642 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index cf28528842bc..f6cad39f35df 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -1611,7 +1611,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, | |||
1611 | return err; | 1611 | return err; |
1612 | 1612 | ||
1613 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) | 1613 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) |
1614 | goto Efault; | 1614 | return -EFAULT; |
1615 | 1615 | ||
1616 | user_access_begin(); | 1616 | user_access_begin(); |
1617 | unsafe_put_user(signo, &infop->si_signo, Efault); | 1617 | unsafe_put_user(signo, &infop->si_signo, Efault); |
@@ -1739,7 +1739,7 @@ COMPAT_SYSCALL_DEFINE5(waitid, | |||
1739 | return err; | 1739 | return err; |
1740 | 1740 | ||
1741 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) | 1741 | if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) |
1742 | goto Efault; | 1742 | return -EFAULT; |
1743 | 1743 | ||
1744 | user_access_begin(); | 1744 | user_access_begin(); |
1745 | unsafe_put_user(signo, &infop->si_signo, Efault); | 1745 | unsafe_put_user(signo, &infop->si_signo, Efault); |
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 5270a54b9fa4..c26c5bb6b491 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
@@ -135,17 +135,26 @@ void irq_gc_ack_clr_bit(struct irq_data *d) | |||
135 | } | 135 | } |
136 | 136 | ||
137 | /** | 137 | /** |
138 | * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt | 138 | * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt |
139 | * @d: irq_data | 139 | * @d: irq_data |
140 | * | ||
141 | * This generic implementation of the irq_mask_ack method is for chips | ||
142 | * with separate enable/disable registers instead of a single mask | ||
143 | * register and where a pending interrupt is acknowledged by setting a | ||
144 | * bit. | ||
145 | * | ||
146 | * Note: This is the only permutation currently used. Similar generic | ||
147 | * functions should be added here if other permutations are required. | ||
140 | */ | 148 | */ |
141 | void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) | 149 | void irq_gc_mask_disable_and_ack_set(struct irq_data *d) |
142 | { | 150 | { |
143 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); | 151 | struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); |
144 | struct irq_chip_type *ct = irq_data_get_chip_type(d); | 152 | struct irq_chip_type *ct = irq_data_get_chip_type(d); |
145 | u32 mask = d->mask; | 153 | u32 mask = d->mask; |
146 | 154 | ||
147 | irq_gc_lock(gc); | 155 | irq_gc_lock(gc); |
148 | irq_reg_writel(gc, mask, ct->regs.mask); | 156 | irq_reg_writel(gc, mask, ct->regs.disable); |
157 | *ct->mask_cache &= ~mask; | ||
149 | irq_reg_writel(gc, mask, ct->regs.ack); | 158 | irq_reg_writel(gc, mask, ct->regs.ack); |
150 | irq_gc_unlock(gc); | 159 | irq_gc_unlock(gc); |
151 | } | 160 | } |
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 729a8706751d..6d5880089ff6 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c | |||
@@ -854,7 +854,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, | |||
854 | /** | 854 | /** |
855 | * call_srcu() - Queue a callback for invocation after an SRCU grace period | 855 | * call_srcu() - Queue a callback for invocation after an SRCU grace period |
856 | * @sp: srcu_struct in queue the callback | 856 | * @sp: srcu_struct in queue the callback |
857 | * @head: structure to be used for queueing the SRCU callback. | 857 | * @rhp: structure to be used for queueing the SRCU callback. |
858 | * @func: function to be invoked after the SRCU grace period | 858 | * @func: function to be invoked after the SRCU grace period |
859 | * | 859 | * |
860 | * The callback function will be invoked some time after a full SRCU | 860 | * The callback function will be invoked some time after a full SRCU |
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 50d1861f7759..3f943efcf61c 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c | |||
@@ -85,6 +85,9 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) | |||
85 | } | 85 | } |
86 | 86 | ||
87 | /** | 87 | /** |
88 | * rcu_sync_enter_start - Force readers onto slow path for multiple updates | ||
89 | * @rsp: Pointer to rcu_sync structure to use for synchronization | ||
90 | * | ||
88 | * Must be called after rcu_sync_init() and before first use. | 91 | * Must be called after rcu_sync_init() and before first use. |
89 | * | 92 | * |
90 | * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() | 93 | * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() |
@@ -142,7 +145,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) | |||
142 | 145 | ||
143 | /** | 146 | /** |
144 | * rcu_sync_func() - Callback function managing reader access to fastpath | 147 | * rcu_sync_func() - Callback function managing reader access to fastpath |
145 | * @rsp: Pointer to rcu_sync structure to use for synchronization | 148 | * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization |
146 | * | 149 | * |
147 | * This function is passed to one of the call_rcu() functions by | 150 | * This function is passed to one of the call_rcu() functions by |
148 | * rcu_sync_exit(), so that it is invoked after a grace period following the | 151 | * rcu_sync_exit(), so that it is invoked after a grace period following the |
@@ -158,9 +161,9 @@ void rcu_sync_enter(struct rcu_sync *rsp) | |||
158 | * rcu_sync_exit(). Otherwise, set all state back to idle so that readers | 161 | * rcu_sync_exit(). Otherwise, set all state back to idle so that readers |
159 | * can again use their fastpaths. | 162 | * can again use their fastpaths. |
160 | */ | 163 | */ |
161 | static void rcu_sync_func(struct rcu_head *rcu) | 164 | static void rcu_sync_func(struct rcu_head *rhp) |
162 | { | 165 | { |
163 | struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head); | 166 | struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); |
164 | unsigned long flags; | 167 | unsigned long flags; |
165 | 168 | ||
166 | BUG_ON(rsp->gp_state != GP_PASSED); | 169 | BUG_ON(rsp->gp_state != GP_PASSED); |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b0ad62b0e7b8..3e3650e94ae6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -3097,9 +3097,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, | |||
3097 | * read-side critical sections have completed. call_rcu_sched() assumes | 3097 | * read-side critical sections have completed. call_rcu_sched() assumes |
3098 | * that the read-side critical sections end on enabling of preemption | 3098 | * that the read-side critical sections end on enabling of preemption |
3099 | * or on voluntary preemption. | 3099 | * or on voluntary preemption. |
3100 | * RCU read-side critical sections are delimited by : | 3100 | * RCU read-side critical sections are delimited by: |
3101 | * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR | 3101 | * |
3102 | * - anything that disables preemption. | 3102 | * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR |
3103 | * - anything that disables preemption. | ||
3103 | * | 3104 | * |
3104 | * These may be nested. | 3105 | * These may be nested. |
3105 | * | 3106 | * |
@@ -3124,11 +3125,12 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); | |||
3124 | * handler. This means that read-side critical sections in process | 3125 | * handler. This means that read-side critical sections in process |
3125 | * context must not be interrupted by softirqs. This interface is to be | 3126 | * context must not be interrupted by softirqs. This interface is to be |
3126 | * used when most of the read-side critical sections are in softirq context. | 3127 | * used when most of the read-side critical sections are in softirq context. |
3127 | * RCU read-side critical sections are delimited by : | 3128 | * RCU read-side critical sections are delimited by: |
3128 | * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. | 3129 | * |
3129 | * OR | 3130 | * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR |
3130 | * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. | 3131 | * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. |
3131 | * These may be nested. | 3132 | * |
3133 | * These may be nested. | ||
3132 | * | 3134 | * |
3133 | * See the description of call_rcu() for more detailed information on | 3135 | * See the description of call_rcu() for more detailed information on |
3134 | * memory ordering guarantees. | 3136 | * memory ordering guarantees. |
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index a92fddc22747..dd7908743dab 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/membarrier.h> | 18 | #include <linux/membarrier.h> |
19 | #include <linux/tick.h> | 19 | #include <linux/tick.h> |
20 | #include <linux/cpumask.h> | 20 | #include <linux/cpumask.h> |
21 | #include <linux/atomic.h> | ||
21 | 22 | ||
22 | #include "sched.h" /* for cpu_rq(). */ | 23 | #include "sched.h" /* for cpu_rq(). */ |
23 | 24 | ||
@@ -26,21 +27,26 @@ | |||
26 | * except MEMBARRIER_CMD_QUERY. | 27 | * except MEMBARRIER_CMD_QUERY. |
27 | */ | 28 | */ |
28 | #define MEMBARRIER_CMD_BITMASK \ | 29 | #define MEMBARRIER_CMD_BITMASK \ |
29 | (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) | 30 | (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ |
31 | | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) | ||
30 | 32 | ||
31 | static void ipi_mb(void *info) | 33 | static void ipi_mb(void *info) |
32 | { | 34 | { |
33 | smp_mb(); /* IPIs should be serializing but paranoid. */ | 35 | smp_mb(); /* IPIs should be serializing but paranoid. */ |
34 | } | 36 | } |
35 | 37 | ||
36 | static void membarrier_private_expedited(void) | 38 | static int membarrier_private_expedited(void) |
37 | { | 39 | { |
38 | int cpu; | 40 | int cpu; |
39 | bool fallback = false; | 41 | bool fallback = false; |
40 | cpumask_var_t tmpmask; | 42 | cpumask_var_t tmpmask; |
41 | 43 | ||
44 | if (!(atomic_read(¤t->mm->membarrier_state) | ||
45 | & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) | ||
46 | return -EPERM; | ||
47 | |||
42 | if (num_online_cpus() == 1) | 48 | if (num_online_cpus() == 1) |
43 | return; | 49 | return 0; |
44 | 50 | ||
45 | /* | 51 | /* |
46 | * Matches memory barriers around rq->curr modification in | 52 | * Matches memory barriers around rq->curr modification in |
@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void) | |||
94 | * rq->curr modification in scheduler. | 100 | * rq->curr modification in scheduler. |
95 | */ | 101 | */ |
96 | smp_mb(); /* exit from system call is not a mb */ | 102 | smp_mb(); /* exit from system call is not a mb */ |
103 | return 0; | ||
104 | } | ||
105 | |||
106 | static void membarrier_register_private_expedited(void) | ||
107 | { | ||
108 | struct task_struct *p = current; | ||
109 | struct mm_struct *mm = p->mm; | ||
110 | |||
111 | /* | ||
112 | * We need to consider threads belonging to different thread | ||
113 | * groups, which use the same mm. (CLONE_VM but not | ||
114 | * CLONE_THREAD). | ||
115 | */ | ||
116 | if (atomic_read(&mm->membarrier_state) | ||
117 | & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) | ||
118 | return; | ||
119 | atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, | ||
120 | &mm->membarrier_state); | ||
97 | } | 121 | } |
98 | 122 | ||
99 | /** | 123 | /** |
@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) | |||
144 | synchronize_sched(); | 168 | synchronize_sched(); |
145 | return 0; | 169 | return 0; |
146 | case MEMBARRIER_CMD_PRIVATE_EXPEDITED: | 170 | case MEMBARRIER_CMD_PRIVATE_EXPEDITED: |
147 | membarrier_private_expedited(); | 171 | return membarrier_private_expedited(); |
172 | case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: | ||
173 | membarrier_register_private_expedited(); | ||
148 | return 0; | 174 | return 0; |
149 | default: | 175 | default: |
150 | return -EINVAL; | 176 | return -EINVAL; |