aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/devmap.c10
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/sockmap.c28
-rw-r--r--kernel/bpf/verifier.c65
-rw-r--r--kernel/cpu.c5
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/irq/generic-chip.c15
-rw-r--r--kernel/rcu/srcutree.c2
-rw-r--r--kernel/rcu/sync.c9
-rw-r--r--kernel/rcu/tree.c18
-rw-r--r--kernel/sched/membarrier.c34
12 files changed, 143 insertions, 53 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 98c0f00c3f5e..e2636737b69b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
98 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); 98 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
99 99
100 if (array_size >= U32_MAX - PAGE_SIZE || 100 if (array_size >= U32_MAX - PAGE_SIZE ||
101 elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { 101 bpf_array_alloc_percpu(array)) {
102 bpf_map_area_free(array); 102 bpf_map_area_free(array);
103 return ERR_PTR(-ENOMEM); 103 return ERR_PTR(-ENOMEM);
104 } 104 }
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index e093d9a2c4dd..e745d6a88224 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list);
69 69
70static u64 dev_map_bitmap_size(const union bpf_attr *attr) 70static u64 dev_map_bitmap_size(const union bpf_attr *attr)
71{ 71{
72 return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); 72 return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
73} 73}
74 74
75static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 75static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
78 int err = -EINVAL; 78 int err = -EINVAL;
79 u64 cost; 79 u64 cost;
80 80
81 if (!capable(CAP_NET_ADMIN))
82 return ERR_PTR(-EPERM);
83
81 /* check sanity of attributes */ 84 /* check sanity of attributes */
82 if (attr->max_entries == 0 || attr->key_size != 4 || 85 if (attr->max_entries == 0 || attr->key_size != 4 ||
83 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 86 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -111,8 +114,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
111 err = -ENOMEM; 114 err = -ENOMEM;
112 115
113 /* A per cpu bitfield with a bit per possible net device */ 116 /* A per cpu bitfield with a bit per possible net device */
114 dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), 117 dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
115 __alignof__(unsigned long)); 118 __alignof__(unsigned long),
119 GFP_KERNEL | __GFP_NOWARN);
116 if (!dtab->flush_needed) 120 if (!dtab->flush_needed)
117 goto free_dtab; 121 goto free_dtab;
118 122
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 431126f31ea3..6533f08d1238 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
317 */ 317 */
318 goto free_htab; 318 goto free_htab;
319 319
320 if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
321 /* make sure the size for pcpu_alloc() is reasonable */
322 goto free_htab;
323
324 htab->elem_size = sizeof(struct htab_elem) + 320 htab->elem_size = sizeof(struct htab_elem) +
325 round_up(htab->map.key_size, 8); 321 round_up(htab->map.key_size, 8);
326 if (percpu) 322 if (percpu)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 6424ce0e4969..2b6eb35ae5d3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -39,6 +39,7 @@
39#include <linux/workqueue.h> 39#include <linux/workqueue.h>
40#include <linux/list.h> 40#include <linux/list.h>
41#include <net/strparser.h> 41#include <net/strparser.h>
42#include <net/tcp.h>
42 43
43struct bpf_stab { 44struct bpf_stab {
44 struct bpf_map map; 45 struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
101 return SK_DROP; 102 return SK_DROP;
102 103
103 skb_orphan(skb); 104 skb_orphan(skb);
105 /* We need to ensure that BPF metadata for maps is also cleared
106 * when we orphan the skb so that we don't have the possibility
107 * to reference a stale map.
108 */
109 TCP_SKB_CB(skb)->bpf.map = NULL;
104 skb->sk = psock->sock; 110 skb->sk = psock->sock;
105 bpf_compute_data_end(skb); 111 bpf_compute_data_end(skb);
112 preempt_disable();
106 rc = (*prog->bpf_func)(skb, prog->insnsi); 113 rc = (*prog->bpf_func)(skb, prog->insnsi);
114 preempt_enable();
107 skb->sk = NULL; 115 skb->sk = NULL;
108 116
109 return rc; 117 return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
114 struct sock *sk; 122 struct sock *sk;
115 int rc; 123 int rc;
116 124
117 /* Because we use per cpu values to feed input from sock redirect
118 * in BPF program to do_sk_redirect_map() call we need to ensure we
119 * are not preempted. RCU read lock is not sufficient in this case
120 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
121 */
122 preempt_disable();
123 rc = smap_verdict_func(psock, skb); 125 rc = smap_verdict_func(psock, skb);
124 switch (rc) { 126 switch (rc) {
125 case SK_REDIRECT: 127 case SK_REDIRECT:
126 sk = do_sk_redirect_map(); 128 sk = do_sk_redirect_map(skb);
127 preempt_enable();
128 if (likely(sk)) { 129 if (likely(sk)) {
129 struct smap_psock *peer = smap_psock_sk(sk); 130 struct smap_psock *peer = smap_psock_sk(sk);
130 131
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
141 /* Fall through and free skb otherwise */ 142 /* Fall through and free skb otherwise */
142 case SK_DROP: 143 case SK_DROP:
143 default: 144 default:
144 if (rc != SK_REDIRECT)
145 preempt_enable();
146 kfree_skb(skb); 145 kfree_skb(skb);
147 } 146 }
148} 147}
@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
487 int err = -EINVAL; 486 int err = -EINVAL;
488 u64 cost; 487 u64 cost;
489 488
489 if (!capable(CAP_NET_ADMIN))
490 return ERR_PTR(-EPERM);
491
490 /* check sanity of attributes */ 492 /* check sanity of attributes */
491 if (attr->max_entries == 0 || attr->key_size != 4 || 493 if (attr->max_entries == 0 || attr->key_size != 4 ||
492 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 494 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
840 return -EINVAL; 842 return -EINVAL;
841 } 843 }
842 844
845 if (skops.sk->sk_type != SOCK_STREAM ||
846 skops.sk->sk_protocol != IPPROTO_TCP) {
847 fput(socket->file);
848 return -EOPNOTSUPP;
849 }
850
843 err = sock_map_ctx_update_elem(&skops, map, key, flags); 851 err = sock_map_ctx_update_elem(&skops, map, key, flags);
844 fput(socket->file); 852 fput(socket->file);
845 return err; 853 return err;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8b8d6ba39e23..c48ca2a34b5e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1116 /* ctx accesses must be at a fixed offset, so that we can 1116 /* ctx accesses must be at a fixed offset, so that we can
1117 * determine what type of data were returned. 1117 * determine what type of data were returned.
1118 */ 1118 */
1119 if (!tnum_is_const(reg->var_off)) { 1119 if (reg->off) {
1120 verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
1121 regno, reg->off, off - reg->off);
1122 return -EACCES;
1123 }
1124 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1120 char tn_buf[48]; 1125 char tn_buf[48];
1121 1126
1122 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 1127 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1124 tn_buf, off, size); 1129 tn_buf, off, size);
1125 return -EACCES; 1130 return -EACCES;
1126 } 1131 }
1127 off += reg->var_off.value;
1128 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type); 1132 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
1129 if (!err && t == BPF_READ && value_regno >= 0) { 1133 if (!err && t == BPF_READ && value_regno >= 0) {
1130 /* ctx access returns either a scalar, or a 1134 /* ctx access returns either a scalar, or a
@@ -2426,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2426} 2430}
2427 2431
2428static void find_good_pkt_pointers(struct bpf_verifier_state *state, 2432static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2429 struct bpf_reg_state *dst_reg) 2433 struct bpf_reg_state *dst_reg,
2434 bool range_right_open)
2430{ 2435{
2431 struct bpf_reg_state *regs = state->regs, *reg; 2436 struct bpf_reg_state *regs = state->regs, *reg;
2437 u16 new_range;
2432 int i; 2438 int i;
2433 2439
2434 if (dst_reg->off < 0) 2440 if (dst_reg->off < 0 ||
2441 (dst_reg->off == 0 && range_right_open))
2435 /* This doesn't give us any range */ 2442 /* This doesn't give us any range */
2436 return; 2443 return;
2437 2444
@@ -2442,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2442 */ 2449 */
2443 return; 2450 return;
2444 2451
2445 /* LLVM can generate four kind of checks: 2452 new_range = dst_reg->off;
2453 if (range_right_open)
2454 new_range--;
2455
2456 /* Examples for register markings:
2446 * 2457 *
2447 * Type 1/2: 2458 * pkt_data in dst register:
2448 * 2459 *
2449 * r2 = r3; 2460 * r2 = r3;
2450 * r2 += 8; 2461 * r2 += 8;
@@ -2461,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2461 * r2=pkt(id=n,off=8,r=0) 2472 * r2=pkt(id=n,off=8,r=0)
2462 * r3=pkt(id=n,off=0,r=0) 2473 * r3=pkt(id=n,off=0,r=0)
2463 * 2474 *
2464 * Type 3/4: 2475 * pkt_data in src register:
2465 * 2476 *
2466 * r2 = r3; 2477 * r2 = r3;
2467 * r2 += 8; 2478 * r2 += 8;
@@ -2479,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2479 * r3=pkt(id=n,off=0,r=0) 2490 * r3=pkt(id=n,off=0,r=0)
2480 * 2491 *
2481 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 2492 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
2482 * so that range of bytes [r3, r3 + 8) is safe to access. 2493 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
2494 * and [r3, r3 + 8-1) respectively is safe to access depending on
2495 * the check.
2483 */ 2496 */
2484 2497
2485 /* If our ids match, then we must have the same max_value. And we 2498 /* If our ids match, then we must have the same max_value. And we
@@ -2490,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2490 for (i = 0; i < MAX_BPF_REG; i++) 2503 for (i = 0; i < MAX_BPF_REG; i++)
2491 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) 2504 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
2492 /* keep the maximum range already checked */ 2505 /* keep the maximum range already checked */
2493 regs[i].range = max_t(u16, regs[i].range, dst_reg->off); 2506 regs[i].range = max(regs[i].range, new_range);
2494 2507
2495 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { 2508 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
2496 if (state->stack_slot_type[i] != STACK_SPILL) 2509 if (state->stack_slot_type[i] != STACK_SPILL)
2497 continue; 2510 continue;
2498 reg = &state->spilled_regs[i / BPF_REG_SIZE]; 2511 reg = &state->spilled_regs[i / BPF_REG_SIZE];
2499 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) 2512 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
2500 reg->range = max_t(u16, reg->range, dst_reg->off); 2513 reg->range = max(reg->range, new_range);
2501 } 2514 }
2502} 2515}
2503 2516
@@ -2861,19 +2874,43 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
2861 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && 2874 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
2862 dst_reg->type == PTR_TO_PACKET && 2875 dst_reg->type == PTR_TO_PACKET &&
2863 regs[insn->src_reg].type == PTR_TO_PACKET_END) { 2876 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2864 find_good_pkt_pointers(this_branch, dst_reg); 2877 /* pkt_data' > pkt_end */
2878 find_good_pkt_pointers(this_branch, dst_reg, false);
2879 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
2880 dst_reg->type == PTR_TO_PACKET_END &&
2881 regs[insn->src_reg].type == PTR_TO_PACKET) {
2882 /* pkt_end > pkt_data' */
2883 find_good_pkt_pointers(other_branch, &regs[insn->src_reg], true);
2865 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && 2884 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
2866 dst_reg->type == PTR_TO_PACKET && 2885 dst_reg->type == PTR_TO_PACKET &&
2867 regs[insn->src_reg].type == PTR_TO_PACKET_END) { 2886 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2868 find_good_pkt_pointers(other_branch, dst_reg); 2887 /* pkt_data' < pkt_end */
2888 find_good_pkt_pointers(other_branch, dst_reg, true);
2889 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
2890 dst_reg->type == PTR_TO_PACKET_END &&
2891 regs[insn->src_reg].type == PTR_TO_PACKET) {
2892 /* pkt_end < pkt_data' */
2893 find_good_pkt_pointers(this_branch, &regs[insn->src_reg], false);
2894 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
2895 dst_reg->type == PTR_TO_PACKET &&
2896 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2897 /* pkt_data' >= pkt_end */
2898 find_good_pkt_pointers(this_branch, dst_reg, true);
2869 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && 2899 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
2870 dst_reg->type == PTR_TO_PACKET_END && 2900 dst_reg->type == PTR_TO_PACKET_END &&
2871 regs[insn->src_reg].type == PTR_TO_PACKET) { 2901 regs[insn->src_reg].type == PTR_TO_PACKET) {
2872 find_good_pkt_pointers(other_branch, &regs[insn->src_reg]); 2902 /* pkt_end >= pkt_data' */
2903 find_good_pkt_pointers(other_branch, &regs[insn->src_reg], false);
2904 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
2905 dst_reg->type == PTR_TO_PACKET &&
2906 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2907 /* pkt_data' <= pkt_end */
2908 find_good_pkt_pointers(other_branch, dst_reg, false);
2873 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && 2909 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
2874 dst_reg->type == PTR_TO_PACKET_END && 2910 dst_reg->type == PTR_TO_PACKET_END &&
2875 regs[insn->src_reg].type == PTR_TO_PACKET) { 2911 regs[insn->src_reg].type == PTR_TO_PACKET) {
2876 find_good_pkt_pointers(this_branch, &regs[insn->src_reg]); 2912 /* pkt_end <= pkt_data' */
2913 find_good_pkt_pointers(this_branch, &regs[insn->src_reg], true);
2877 } else if (is_pointer_value(env, insn->dst_reg)) { 2914 } else if (is_pointer_value(env, insn->dst_reg)) {
2878 verbose("R%d pointer comparison prohibited\n", insn->dst_reg); 2915 verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
2879 return -EACCES; 2916 return -EACCES;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d851df22f5c5..04892a82f6ac 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -632,6 +632,11 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
632 __cpuhp_kick_ap(st); 632 __cpuhp_kick_ap(st);
633 } 633 }
634 634
635 /*
636 * Clean up the leftovers so the next hotplug operation wont use stale
637 * data.
638 */
639 st->node = st->last = NULL;
635 return ret; 640 return ret;
636} 641}
637 642
diff --git a/kernel/exit.c b/kernel/exit.c
index cf28528842bc..f6cad39f35df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1611,7 +1611,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
1611 return err; 1611 return err;
1612 1612
1613 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) 1613 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
1614 goto Efault; 1614 return -EFAULT;
1615 1615
1616 user_access_begin(); 1616 user_access_begin();
1617 unsafe_put_user(signo, &infop->si_signo, Efault); 1617 unsafe_put_user(signo, &infop->si_signo, Efault);
@@ -1739,7 +1739,7 @@ COMPAT_SYSCALL_DEFINE5(waitid,
1739 return err; 1739 return err;
1740 1740
1741 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop))) 1741 if (!access_ok(VERIFY_WRITE, infop, sizeof(*infop)))
1742 goto Efault; 1742 return -EFAULT;
1743 1743
1744 user_access_begin(); 1744 user_access_begin();
1745 unsafe_put_user(signo, &infop->si_signo, Efault); 1745 unsafe_put_user(signo, &infop->si_signo, Efault);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index 5270a54b9fa4..c26c5bb6b491 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -135,17 +135,26 @@ void irq_gc_ack_clr_bit(struct irq_data *d)
135} 135}
136 136
137/** 137/**
138 * irq_gc_mask_disable_reg_and_ack - Mask and ack pending interrupt 138 * irq_gc_mask_disable_and_ack_set - Mask and ack pending interrupt
139 * @d: irq_data 139 * @d: irq_data
140 *
141 * This generic implementation of the irq_mask_ack method is for chips
142 * with separate enable/disable registers instead of a single mask
143 * register and where a pending interrupt is acknowledged by setting a
144 * bit.
145 *
146 * Note: This is the only permutation currently used. Similar generic
147 * functions should be added here if other permutations are required.
140 */ 148 */
141void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) 149void irq_gc_mask_disable_and_ack_set(struct irq_data *d)
142{ 150{
143 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); 151 struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
144 struct irq_chip_type *ct = irq_data_get_chip_type(d); 152 struct irq_chip_type *ct = irq_data_get_chip_type(d);
145 u32 mask = d->mask; 153 u32 mask = d->mask;
146 154
147 irq_gc_lock(gc); 155 irq_gc_lock(gc);
148 irq_reg_writel(gc, mask, ct->regs.mask); 156 irq_reg_writel(gc, mask, ct->regs.disable);
157 *ct->mask_cache &= ~mask;
149 irq_reg_writel(gc, mask, ct->regs.ack); 158 irq_reg_writel(gc, mask, ct->regs.ack);
150 irq_gc_unlock(gc); 159 irq_gc_unlock(gc);
151} 160}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 729a8706751d..6d5880089ff6 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -854,7 +854,7 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
854/** 854/**
855 * call_srcu() - Queue a callback for invocation after an SRCU grace period 855 * call_srcu() - Queue a callback for invocation after an SRCU grace period
856 * @sp: srcu_struct in queue the callback 856 * @sp: srcu_struct in queue the callback
857 * @head: structure to be used for queueing the SRCU callback. 857 * @rhp: structure to be used for queueing the SRCU callback.
858 * @func: function to be invoked after the SRCU grace period 858 * @func: function to be invoked after the SRCU grace period
859 * 859 *
860 * The callback function will be invoked some time after a full SRCU 860 * The callback function will be invoked some time after a full SRCU
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index 50d1861f7759..3f943efcf61c 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -85,6 +85,9 @@ void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
85} 85}
86 86
87/** 87/**
88 * rcu_sync_enter_start - Force readers onto slow path for multiple updates
89 * @rsp: Pointer to rcu_sync structure to use for synchronization
90 *
88 * Must be called after rcu_sync_init() and before first use. 91 * Must be called after rcu_sync_init() and before first use.
89 * 92 *
90 * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() 93 * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
@@ -142,7 +145,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
142 145
143/** 146/**
144 * rcu_sync_func() - Callback function managing reader access to fastpath 147 * rcu_sync_func() - Callback function managing reader access to fastpath
145 * @rsp: Pointer to rcu_sync structure to use for synchronization 148 * @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
146 * 149 *
147 * This function is passed to one of the call_rcu() functions by 150 * This function is passed to one of the call_rcu() functions by
148 * rcu_sync_exit(), so that it is invoked after a grace period following the 151 * rcu_sync_exit(), so that it is invoked after a grace period following the
@@ -158,9 +161,9 @@ void rcu_sync_enter(struct rcu_sync *rsp)
158 * rcu_sync_exit(). Otherwise, set all state back to idle so that readers 161 * rcu_sync_exit(). Otherwise, set all state back to idle so that readers
159 * can again use their fastpaths. 162 * can again use their fastpaths.
160 */ 163 */
161static void rcu_sync_func(struct rcu_head *rcu) 164static void rcu_sync_func(struct rcu_head *rhp)
162{ 165{
163 struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head); 166 struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
164 unsigned long flags; 167 unsigned long flags;
165 168
166 BUG_ON(rsp->gp_state != GP_PASSED); 169 BUG_ON(rsp->gp_state != GP_PASSED);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b0ad62b0e7b8..3e3650e94ae6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3097,9 +3097,10 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3097 * read-side critical sections have completed. call_rcu_sched() assumes 3097 * read-side critical sections have completed. call_rcu_sched() assumes
3098 * that the read-side critical sections end on enabling of preemption 3098 * that the read-side critical sections end on enabling of preemption
3099 * or on voluntary preemption. 3099 * or on voluntary preemption.
3100 * RCU read-side critical sections are delimited by : 3100 * RCU read-side critical sections are delimited by:
3101 * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR 3101 *
3102 * - anything that disables preemption. 3102 * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
3103 * - anything that disables preemption.
3103 * 3104 *
3104 * These may be nested. 3105 * These may be nested.
3105 * 3106 *
@@ -3124,11 +3125,12 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
3124 * handler. This means that read-side critical sections in process 3125 * handler. This means that read-side critical sections in process
3125 * context must not be interrupted by softirqs. This interface is to be 3126 * context must not be interrupted by softirqs. This interface is to be
3126 * used when most of the read-side critical sections are in softirq context. 3127 * used when most of the read-side critical sections are in softirq context.
3127 * RCU read-side critical sections are delimited by : 3128 * RCU read-side critical sections are delimited by:
3128 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. 3129 *
3129 * OR 3130 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR
3130 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. 3131 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
3131 * These may be nested. 3132 *
3133 * These may be nested.
3132 * 3134 *
3133 * See the description of call_rcu() for more detailed information on 3135 * See the description of call_rcu() for more detailed information on
3134 * memory ordering guarantees. 3136 * memory ordering guarantees.
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index a92fddc22747..dd7908743dab 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -18,6 +18,7 @@
18#include <linux/membarrier.h> 18#include <linux/membarrier.h>
19#include <linux/tick.h> 19#include <linux/tick.h>
20#include <linux/cpumask.h> 20#include <linux/cpumask.h>
21#include <linux/atomic.h>
21 22
22#include "sched.h" /* for cpu_rq(). */ 23#include "sched.h" /* for cpu_rq(). */
23 24
@@ -26,21 +27,26 @@
26 * except MEMBARRIER_CMD_QUERY. 27 * except MEMBARRIER_CMD_QUERY.
27 */ 28 */
28#define MEMBARRIER_CMD_BITMASK \ 29#define MEMBARRIER_CMD_BITMASK \
29 (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED) 30 (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
31 | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
30 32
31static void ipi_mb(void *info) 33static void ipi_mb(void *info)
32{ 34{
33 smp_mb(); /* IPIs should be serializing but paranoid. */ 35 smp_mb(); /* IPIs should be serializing but paranoid. */
34} 36}
35 37
36static void membarrier_private_expedited(void) 38static int membarrier_private_expedited(void)
37{ 39{
38 int cpu; 40 int cpu;
39 bool fallback = false; 41 bool fallback = false;
40 cpumask_var_t tmpmask; 42 cpumask_var_t tmpmask;
41 43
44 if (!(atomic_read(&current->mm->membarrier_state)
45 & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
46 return -EPERM;
47
42 if (num_online_cpus() == 1) 48 if (num_online_cpus() == 1)
43 return; 49 return 0;
44 50
45 /* 51 /*
46 * Matches memory barriers around rq->curr modification in 52 * Matches memory barriers around rq->curr modification in
@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)
94 * rq->curr modification in scheduler. 100 * rq->curr modification in scheduler.
95 */ 101 */
96 smp_mb(); /* exit from system call is not a mb */ 102 smp_mb(); /* exit from system call is not a mb */
103 return 0;
104}
105
106static void membarrier_register_private_expedited(void)
107{
108 struct task_struct *p = current;
109 struct mm_struct *mm = p->mm;
110
111 /*
112 * We need to consider threads belonging to different thread
113 * groups, which use the same mm. (CLONE_VM but not
114 * CLONE_THREAD).
115 */
116 if (atomic_read(&mm->membarrier_state)
117 & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
118 return;
119 atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
120 &mm->membarrier_state);
97} 121}
98 122
99/** 123/**
@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
144 synchronize_sched(); 168 synchronize_sched();
145 return 0; 169 return 0;
146 case MEMBARRIER_CMD_PRIVATE_EXPEDITED: 170 case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
147 membarrier_private_expedited(); 171 return membarrier_private_expedited();
172 case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
173 membarrier_register_private_expedited();
148 return 0; 174 return 0;
149 default: 175 default:
150 return -EINVAL; 176 return -EINVAL;