aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-10-21 22:44:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-10-21 22:44:48 -0400
commitb5ac3beb5a9f0ef0ea64cd85faf94c0dc4de0e42 (patch)
tree70a8d684c18f71aa4be2d67896362f864afb4d57 /kernel
parente5f468b3f23313994c5e6c356135f9b0d76bcb94 (diff)
parent8d5f4b07174976c55a5f5d6967777373c6826944 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "A little more than usual this time around. Been travelling, so that is part of it. Anyways, here are the highlights: 1) Deal with memcontrol races wrt. listener dismantle, from Eric Dumazet. 2) Handle page allocation failures properly in nfp driver, from Jaku Kicinski. 3) Fix memory leaks in macsec, from Sabrina Dubroca. 4) Fix crashes in pppol2tp_session_ioctl(), from Guillaume Nault. 5) Several fixes in bnxt_en driver, including preventing potential NVRAM parameter corruption from Michael Chan. 6) Fix for KRACK attacks in wireless, from Johannes Berg. 7) rtnetlink event generation fixes from Xin Long. 8) Deadlock in mlxsw driver, from Ido Schimmel. 9) Disallow arithmetic operations on context pointers in bpf, from Jakub Kicinski. 10) Missing sock_owned_by_user() check in sctp_icmp_redirect(), from Xin Long. 11) Only TCP is supported for sockmap, make that explicit with a check, from John Fastabend. 12) Fix IP options state races in DCCP and TCP, from Eric Dumazet. 13) Fix panic in packet_getsockopt(), also from Eric Dumazet. 14) Add missing locked in hv_sock layer, from Dexuan Cui. 15) Various aquantia bug fixes, including several statistics handling cures. From Igor Russkikh et al. 16) Fix arithmetic overflow in devmap code, from John Fastabend. 17) Fix busted socket memory accounting when we get a fault in the tcp zero copy paths. From Willem de Bruijn. 18) Don't leave opt->tot_len uninitialized in ipv6, from Eric Dumazet" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (106 commits) stmmac: Don't access tx_q->dirty_tx before netif_tx_lock ipv6: flowlabel: do not leave opt->tot_len with garbage of_mdio: Fix broken PHY IRQ in case of probe deferral textsearch: fix typos in library helpers rxrpc: Don't release call mutex on error pointer net: stmmac: Prevent infinite loop in get_rx_timestamp_status() net: stmmac: Fix stmmac_get_rx_hwtstamp() net: stmmac: Add missing call to dev_kfree_skb() mlxsw: spectrum_router: Configure TIGCR on init mlxsw: reg: Add Tunneling IPinIP General Configuration Register net: ethtool: remove error check for legacy setting transceiver type soreuseport: fix initialization race net: bridge: fix returning of vlan range op errors sock: correct sk_wmem_queued accounting on efault in tcp zerocopy bpf: add test cases to bpf selftests to cover all access tests bpf: fix pattern matches for direct packet access bpf: fix off by one for range markings with L{T, E} patterns bpf: devmap fix arithmetic overflow in bitmap_size calculation net: aquantia: Bad udp rate on default interrupt coalescing net: aquantia: Enable coalescing management via ethtool interface ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/devmap.c10
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/sockmap.c28
-rw-r--r--kernel/bpf/verifier.c65
5 files changed, 77 insertions, 32 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 98c0f00c3f5e..e2636737b69b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -98,7 +98,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
98 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); 98 array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
99 99
100 if (array_size >= U32_MAX - PAGE_SIZE || 100 if (array_size >= U32_MAX - PAGE_SIZE ||
101 elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) { 101 bpf_array_alloc_percpu(array)) {
102 bpf_map_area_free(array); 102 bpf_map_area_free(array);
103 return ERR_PTR(-ENOMEM); 103 return ERR_PTR(-ENOMEM);
104 } 104 }
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index e093d9a2c4dd..e745d6a88224 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -69,7 +69,7 @@ static LIST_HEAD(dev_map_list);
69 69
70static u64 dev_map_bitmap_size(const union bpf_attr *attr) 70static u64 dev_map_bitmap_size(const union bpf_attr *attr)
71{ 71{
72 return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long); 72 return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
73} 73}
74 74
75static struct bpf_map *dev_map_alloc(union bpf_attr *attr) 75static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
78 int err = -EINVAL; 78 int err = -EINVAL;
79 u64 cost; 79 u64 cost;
80 80
81 if (!capable(CAP_NET_ADMIN))
82 return ERR_PTR(-EPERM);
83
81 /* check sanity of attributes */ 84 /* check sanity of attributes */
82 if (attr->max_entries == 0 || attr->key_size != 4 || 85 if (attr->max_entries == 0 || attr->key_size != 4 ||
83 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 86 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -111,8 +114,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
111 err = -ENOMEM; 114 err = -ENOMEM;
112 115
113 /* A per cpu bitfield with a bit per possible net device */ 116 /* A per cpu bitfield with a bit per possible net device */
114 dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), 117 dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
115 __alignof__(unsigned long)); 118 __alignof__(unsigned long),
119 GFP_KERNEL | __GFP_NOWARN);
116 if (!dtab->flush_needed) 120 if (!dtab->flush_needed)
117 goto free_dtab; 121 goto free_dtab;
118 122
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 431126f31ea3..6533f08d1238 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -317,10 +317,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
317 */ 317 */
318 goto free_htab; 318 goto free_htab;
319 319
320 if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
321 /* make sure the size for pcpu_alloc() is reasonable */
322 goto free_htab;
323
324 htab->elem_size = sizeof(struct htab_elem) + 320 htab->elem_size = sizeof(struct htab_elem) +
325 round_up(htab->map.key_size, 8); 321 round_up(htab->map.key_size, 8);
326 if (percpu) 322 if (percpu)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 6424ce0e4969..2b6eb35ae5d3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -39,6 +39,7 @@
39#include <linux/workqueue.h> 39#include <linux/workqueue.h>
40#include <linux/list.h> 40#include <linux/list.h>
41#include <net/strparser.h> 41#include <net/strparser.h>
42#include <net/tcp.h>
42 43
43struct bpf_stab { 44struct bpf_stab {
44 struct bpf_map map; 45 struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
101 return SK_DROP; 102 return SK_DROP;
102 103
103 skb_orphan(skb); 104 skb_orphan(skb);
105 /* We need to ensure that BPF metadata for maps is also cleared
106 * when we orphan the skb so that we don't have the possibility
107 * to reference a stale map.
108 */
109 TCP_SKB_CB(skb)->bpf.map = NULL;
104 skb->sk = psock->sock; 110 skb->sk = psock->sock;
105 bpf_compute_data_end(skb); 111 bpf_compute_data_end(skb);
112 preempt_disable();
106 rc = (*prog->bpf_func)(skb, prog->insnsi); 113 rc = (*prog->bpf_func)(skb, prog->insnsi);
114 preempt_enable();
107 skb->sk = NULL; 115 skb->sk = NULL;
108 116
109 return rc; 117 return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
114 struct sock *sk; 122 struct sock *sk;
115 int rc; 123 int rc;
116 124
117 /* Because we use per cpu values to feed input from sock redirect
118 * in BPF program to do_sk_redirect_map() call we need to ensure we
119 * are not preempted. RCU read lock is not sufficient in this case
120 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
121 */
122 preempt_disable();
123 rc = smap_verdict_func(psock, skb); 125 rc = smap_verdict_func(psock, skb);
124 switch (rc) { 126 switch (rc) {
125 case SK_REDIRECT: 127 case SK_REDIRECT:
126 sk = do_sk_redirect_map(); 128 sk = do_sk_redirect_map(skb);
127 preempt_enable();
128 if (likely(sk)) { 129 if (likely(sk)) {
129 struct smap_psock *peer = smap_psock_sk(sk); 130 struct smap_psock *peer = smap_psock_sk(sk);
130 131
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
141 /* Fall through and free skb otherwise */ 142 /* Fall through and free skb otherwise */
142 case SK_DROP: 143 case SK_DROP:
143 default: 144 default:
144 if (rc != SK_REDIRECT)
145 preempt_enable();
146 kfree_skb(skb); 145 kfree_skb(skb);
147 } 146 }
148} 147}
@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
487 int err = -EINVAL; 486 int err = -EINVAL;
488 u64 cost; 487 u64 cost;
489 488
489 if (!capable(CAP_NET_ADMIN))
490 return ERR_PTR(-EPERM);
491
490 /* check sanity of attributes */ 492 /* check sanity of attributes */
491 if (attr->max_entries == 0 || attr->key_size != 4 || 493 if (attr->max_entries == 0 || attr->key_size != 4 ||
492 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) 494 attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
840 return -EINVAL; 842 return -EINVAL;
841 } 843 }
842 844
845 if (skops.sk->sk_type != SOCK_STREAM ||
846 skops.sk->sk_protocol != IPPROTO_TCP) {
847 fput(socket->file);
848 return -EOPNOTSUPP;
849 }
850
843 err = sock_map_ctx_update_elem(&skops, map, key, flags); 851 err = sock_map_ctx_update_elem(&skops, map, key, flags);
844 fput(socket->file); 852 fput(socket->file);
845 return err; 853 return err;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8b8d6ba39e23..c48ca2a34b5e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1116 /* ctx accesses must be at a fixed offset, so that we can 1116 /* ctx accesses must be at a fixed offset, so that we can
1117 * determine what type of data were returned. 1117 * determine what type of data were returned.
1118 */ 1118 */
1119 if (!tnum_is_const(reg->var_off)) { 1119 if (reg->off) {
1120 verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
1121 regno, reg->off, off - reg->off);
1122 return -EACCES;
1123 }
1124 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
1120 char tn_buf[48]; 1125 char tn_buf[48];
1121 1126
1122 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); 1127 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1124 tn_buf, off, size); 1129 tn_buf, off, size);
1125 return -EACCES; 1130 return -EACCES;
1126 } 1131 }
1127 off += reg->var_off.value;
1128 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type); 1132 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
1129 if (!err && t == BPF_READ && value_regno >= 0) { 1133 if (!err && t == BPF_READ && value_regno >= 0) {
1130 /* ctx access returns either a scalar, or a 1134 /* ctx access returns either a scalar, or a
@@ -2426,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2426} 2430}
2427 2431
2428static void find_good_pkt_pointers(struct bpf_verifier_state *state, 2432static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2429 struct bpf_reg_state *dst_reg) 2433 struct bpf_reg_state *dst_reg,
2434 bool range_right_open)
2430{ 2435{
2431 struct bpf_reg_state *regs = state->regs, *reg; 2436 struct bpf_reg_state *regs = state->regs, *reg;
2437 u16 new_range;
2432 int i; 2438 int i;
2433 2439
2434 if (dst_reg->off < 0) 2440 if (dst_reg->off < 0 ||
2441 (dst_reg->off == 0 && range_right_open))
2435 /* This doesn't give us any range */ 2442 /* This doesn't give us any range */
2436 return; 2443 return;
2437 2444
@@ -2442,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2442 */ 2449 */
2443 return; 2450 return;
2444 2451
2445 /* LLVM can generate four kind of checks: 2452 new_range = dst_reg->off;
2453 if (range_right_open)
2454 new_range--;
2455
2456 /* Examples for register markings:
2446 * 2457 *
2447 * Type 1/2: 2458 * pkt_data in dst register:
2448 * 2459 *
2449 * r2 = r3; 2460 * r2 = r3;
2450 * r2 += 8; 2461 * r2 += 8;
@@ -2461,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2461 * r2=pkt(id=n,off=8,r=0) 2472 * r2=pkt(id=n,off=8,r=0)
2462 * r3=pkt(id=n,off=0,r=0) 2473 * r3=pkt(id=n,off=0,r=0)
2463 * 2474 *
2464 * Type 3/4: 2475 * pkt_data in src register:
2465 * 2476 *
2466 * r2 = r3; 2477 * r2 = r3;
2467 * r2 += 8; 2478 * r2 += 8;
@@ -2479,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2479 * r3=pkt(id=n,off=0,r=0) 2490 * r3=pkt(id=n,off=0,r=0)
2480 * 2491 *
2481 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) 2492 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
2482 * so that range of bytes [r3, r3 + 8) is safe to access. 2493 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
2494 * and [r3, r3 + 8-1) respectively is safe to access depending on
2495 * the check.
2483 */ 2496 */
2484 2497
2485 /* If our ids match, then we must have the same max_value. And we 2498 /* If our ids match, then we must have the same max_value. And we
@@ -2490,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
2490 for (i = 0; i < MAX_BPF_REG; i++) 2503 for (i = 0; i < MAX_BPF_REG; i++)
2491 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) 2504 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
2492 /* keep the maximum range already checked */ 2505 /* keep the maximum range already checked */
2493 regs[i].range = max_t(u16, regs[i].range, dst_reg->off); 2506 regs[i].range = max(regs[i].range, new_range);
2494 2507
2495 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { 2508 for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
2496 if (state->stack_slot_type[i] != STACK_SPILL) 2509 if (state->stack_slot_type[i] != STACK_SPILL)
2497 continue; 2510 continue;
2498 reg = &state->spilled_regs[i / BPF_REG_SIZE]; 2511 reg = &state->spilled_regs[i / BPF_REG_SIZE];
2499 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) 2512 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
2500 reg->range = max_t(u16, reg->range, dst_reg->off); 2513 reg->range = max(reg->range, new_range);
2501 } 2514 }
2502} 2515}
2503 2516
@@ -2861,19 +2874,43 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
2861 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && 2874 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
2862 dst_reg->type == PTR_TO_PACKET && 2875 dst_reg->type == PTR_TO_PACKET &&
2863 regs[insn->src_reg].type == PTR_TO_PACKET_END) { 2876 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2864 find_good_pkt_pointers(this_branch, dst_reg); 2877 /* pkt_data' > pkt_end */
2878 find_good_pkt_pointers(this_branch, dst_reg, false);
2879 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
2880 dst_reg->type == PTR_TO_PACKET_END &&
2881 regs[insn->src_reg].type == PTR_TO_PACKET) {
2882 /* pkt_end > pkt_data' */
2883 find_good_pkt_pointers(other_branch, &regs[insn->src_reg], true);
2865 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && 2884 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
2866 dst_reg->type == PTR_TO_PACKET && 2885 dst_reg->type == PTR_TO_PACKET &&
2867 regs[insn->src_reg].type == PTR_TO_PACKET_END) { 2886 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2868 find_good_pkt_pointers(other_branch, dst_reg); 2887 /* pkt_data' < pkt_end */
2888 find_good_pkt_pointers(other_branch, dst_reg, true);
2889 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
2890 dst_reg->type == PTR_TO_PACKET_END &&
2891 regs[insn->src_reg].type == PTR_TO_PACKET) {
2892 /* pkt_end < pkt_data' */
2893 find_good_pkt_pointers(this_branch, &regs[insn->src_reg], false);
2894 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
2895 dst_reg->type == PTR_TO_PACKET &&
2896 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2897 /* pkt_data' >= pkt_end */
2898 find_good_pkt_pointers(this_branch, dst_reg, true);
2869 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && 2899 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
2870 dst_reg->type == PTR_TO_PACKET_END && 2900 dst_reg->type == PTR_TO_PACKET_END &&
2871 regs[insn->src_reg].type == PTR_TO_PACKET) { 2901 regs[insn->src_reg].type == PTR_TO_PACKET) {
2872 find_good_pkt_pointers(other_branch, &regs[insn->src_reg]); 2902 /* pkt_end >= pkt_data' */
2903 find_good_pkt_pointers(other_branch, &regs[insn->src_reg], false);
2904 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
2905 dst_reg->type == PTR_TO_PACKET &&
2906 regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2907 /* pkt_data' <= pkt_end */
2908 find_good_pkt_pointers(other_branch, dst_reg, false);
2873 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && 2909 } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
2874 dst_reg->type == PTR_TO_PACKET_END && 2910 dst_reg->type == PTR_TO_PACKET_END &&
2875 regs[insn->src_reg].type == PTR_TO_PACKET) { 2911 regs[insn->src_reg].type == PTR_TO_PACKET) {
2876 find_good_pkt_pointers(this_branch, &regs[insn->src_reg]); 2912 /* pkt_end <= pkt_data' */
2913 find_good_pkt_pointers(this_branch, &regs[insn->src_reg], true);
2877 } else if (is_pointer_value(env, insn->dst_reg)) { 2914 } else if (is_pointer_value(env, insn->dst_reg)) {
2878 verbose("R%d pointer comparison prohibited\n", insn->dst_reg); 2915 verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
2879 return -EACCES; 2916 return -EACCES;