aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2019-02-10 02:22:20 -0500
committerAlexei Starovoitov <ast@kernel.org>2019-02-10 22:46:17 -0500
commit46f8bc92758c6259bcf945e9216098661c1587cd (patch)
tree61e7dbd8136c73ed010b3bc53ae18eb50dc251db /kernel
parent5f4566498dee5e38e36a015a968c22ed21568f0b (diff)
bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper
In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)" before accessing the fields in sock. For example, in __netdev_pick_tx: static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { /* ... */ struct sock *sk = skb->sk; if (queue_index != new_index && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); /* ... */ return queue_index; } This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff" where a few of the convert_ctx_access() in filter.c has already been accessing the skb->sk sock_common's fields, e.g. sock_ops_convert_ctx_access(). "__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier. Some of the fileds in "bpf_sock" will not be directly accessible through the "__sk_buff->sk" pointer. It is limited by the new "bpf_sock_common_is_valid_access()". e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock are not allowed. The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)" can be used to get a sk with all accessible fields in "bpf_sock". This helper is added to both cg_skb and sched_(cls|act). int cg_skb_foo(struct __sk_buff *skb) { struct bpf_sock *sk; sk = skb->sk; if (!sk) return 1; sk = bpf_sk_fullsock(sk); if (!sk) return 1; if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP) return 1; /* some_traffic_shaping(); */ return 1; } (1) The sk is read only (2) There is no new "struct bpf_sock_common" introduced. (3) Future kernel sock's members could be added to bpf_sock only instead of repeatedly adding at multiple places like currently in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc. (4) After "sk = skb->sk", the reg holding sk is in type PTR_TO_SOCK_COMMON_OR_NULL. (5) After bpf_sk_fullsock(), the return type will be in type PTR_TO_SOCKET_OR_NULL which is the same as the return type of bpf_sk_lookup_xxx(). However, bpf_sk_fullsock() does not take refcnt. The acquire_reference_state() is only depending on the return type now. To avoid it, a new is_acquire_function() is checked before calling acquire_reference_state(). (6) The WARN_ON in "release_reference_state()" is no longer an internal verifier bug. When reg->id is not found in state->refs[], it means the bpf_prog does something wrong like "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has never been acquired by calling "bpf_sk_fullsock(skb->sk)". A -EINVAL and a verbose are done instead of WARN_ON. A test is added to the test_verifier in a later patch. Since the WARN_ON in "release_reference_state()" is no longer needed, "__release_reference_state()" is folded into "release_reference_state()" also. Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c132
1 files changed, 92 insertions, 40 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 516dfc6d78de..b755d55a3791 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -331,10 +331,17 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
331 type == PTR_TO_PACKET_META; 331 type == PTR_TO_PACKET_META;
332} 332}
333 333
334static bool type_is_sk_pointer(enum bpf_reg_type type)
335{
336 return type == PTR_TO_SOCKET ||
337 type == PTR_TO_SOCK_COMMON;
338}
339
334static bool reg_type_may_be_null(enum bpf_reg_type type) 340static bool reg_type_may_be_null(enum bpf_reg_type type)
335{ 341{
336 return type == PTR_TO_MAP_VALUE_OR_NULL || 342 return type == PTR_TO_MAP_VALUE_OR_NULL ||
337 type == PTR_TO_SOCKET_OR_NULL; 343 type == PTR_TO_SOCKET_OR_NULL ||
344 type == PTR_TO_SOCK_COMMON_OR_NULL;
338} 345}
339 346
340static bool type_is_refcounted(enum bpf_reg_type type) 347static bool type_is_refcounted(enum bpf_reg_type type)
@@ -377,6 +384,12 @@ static bool is_release_function(enum bpf_func_id func_id)
377 return func_id == BPF_FUNC_sk_release; 384 return func_id == BPF_FUNC_sk_release;
378} 385}
379 386
387static bool is_acquire_function(enum bpf_func_id func_id)
388{
389 return func_id == BPF_FUNC_sk_lookup_tcp ||
390 func_id == BPF_FUNC_sk_lookup_udp;
391}
392
380/* string representation of 'enum bpf_reg_type' */ 393/* string representation of 'enum bpf_reg_type' */
381static const char * const reg_type_str[] = { 394static const char * const reg_type_str[] = {
382 [NOT_INIT] = "?", 395 [NOT_INIT] = "?",
@@ -392,6 +405,8 @@ static const char * const reg_type_str[] = {
392 [PTR_TO_FLOW_KEYS] = "flow_keys", 405 [PTR_TO_FLOW_KEYS] = "flow_keys",
393 [PTR_TO_SOCKET] = "sock", 406 [PTR_TO_SOCKET] = "sock",
394 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", 407 [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
408 [PTR_TO_SOCK_COMMON] = "sock_common",
409 [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
395}; 410};
396 411
397static char slot_type_char[] = { 412static char slot_type_char[] = {
@@ -618,13 +633,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
618} 633}
619 634
620/* release function corresponding to acquire_reference_state(). Idempotent. */ 635/* release function corresponding to acquire_reference_state(). Idempotent. */
621static int __release_reference_state(struct bpf_func_state *state, int ptr_id) 636static int release_reference_state(struct bpf_func_state *state, int ptr_id)
622{ 637{
623 int i, last_idx; 638 int i, last_idx;
624 639
625 if (!ptr_id)
626 return -EFAULT;
627
628 last_idx = state->acquired_refs - 1; 640 last_idx = state->acquired_refs - 1;
629 for (i = 0; i < state->acquired_refs; i++) { 641 for (i = 0; i < state->acquired_refs; i++) {
630 if (state->refs[i].id == ptr_id) { 642 if (state->refs[i].id == ptr_id) {
@@ -636,21 +648,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
636 return 0; 648 return 0;
637 } 649 }
638 } 650 }
639 return -EFAULT; 651 return -EINVAL;
640}
641
642/* variation on the above for cases where we expect that there must be an
643 * outstanding reference for the specified ptr_id.
644 */
645static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
646{
647 struct bpf_func_state *state = cur_func(env);
648 int err;
649
650 err = __release_reference_state(state, ptr_id);
651 if (WARN_ON_ONCE(err != 0))
652 verbose(env, "verifier internal error: can't release reference\n");
653 return err;
654} 652}
655 653
656static int transfer_reference_state(struct bpf_func_state *dst, 654static int transfer_reference_state(struct bpf_func_state *dst,
@@ -1209,6 +1207,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
1209 case CONST_PTR_TO_MAP: 1207 case CONST_PTR_TO_MAP:
1210 case PTR_TO_SOCKET: 1208 case PTR_TO_SOCKET:
1211 case PTR_TO_SOCKET_OR_NULL: 1209 case PTR_TO_SOCKET_OR_NULL:
1210 case PTR_TO_SOCK_COMMON:
1211 case PTR_TO_SOCK_COMMON_OR_NULL:
1212 return true; 1212 return true;
1213 default: 1213 default:
1214 return false; 1214 return false;
@@ -1647,6 +1647,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1647 struct bpf_reg_state *regs = cur_regs(env); 1647 struct bpf_reg_state *regs = cur_regs(env);
1648 struct bpf_reg_state *reg = &regs[regno]; 1648 struct bpf_reg_state *reg = &regs[regno];
1649 struct bpf_insn_access_aux info = {}; 1649 struct bpf_insn_access_aux info = {};
1650 bool valid;
1650 1651
1651 if (reg->smin_value < 0) { 1652 if (reg->smin_value < 0) {
1652 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", 1653 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@@ -1654,15 +1655,28 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
1654 return -EACCES; 1655 return -EACCES;
1655 } 1656 }
1656 1657
1657 if (!bpf_sock_is_valid_access(off, size, t, &info)) { 1658 switch (reg->type) {
1658 verbose(env, "invalid bpf_sock access off=%d size=%d\n", 1659 case PTR_TO_SOCK_COMMON:
1659 off, size); 1660 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
1660 return -EACCES; 1661 break;
1662 case PTR_TO_SOCKET:
1663 valid = bpf_sock_is_valid_access(off, size, t, &info);
1664 break;
1665 default:
1666 valid = false;
1661 } 1667 }
1662 1668
1663 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
1664 1669
1665 return 0; 1670 if (valid) {
1671 env->insn_aux_data[insn_idx].ctx_field_size =
1672 info.ctx_field_size;
1673 return 0;
1674 }
1675
1676 verbose(env, "R%d invalid %s access off=%d size=%d\n",
1677 regno, reg_type_str[reg->type], off, size);
1678
1679 return -EACCES;
1666} 1680}
1667 1681
1668static bool __is_pointer_value(bool allow_ptr_leaks, 1682static bool __is_pointer_value(bool allow_ptr_leaks,
@@ -1688,8 +1702,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1688{ 1702{
1689 const struct bpf_reg_state *reg = reg_state(env, regno); 1703 const struct bpf_reg_state *reg = reg_state(env, regno);
1690 1704
1691 return reg->type == PTR_TO_CTX || 1705 return reg->type == PTR_TO_CTX;
1692 reg->type == PTR_TO_SOCKET; 1706}
1707
1708static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
1709{
1710 const struct bpf_reg_state *reg = reg_state(env, regno);
1711
1712 return type_is_sk_pointer(reg->type);
1693} 1713}
1694 1714
1695static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) 1715static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1800,6 +1820,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1800 case PTR_TO_SOCKET: 1820 case PTR_TO_SOCKET:
1801 pointer_desc = "sock "; 1821 pointer_desc = "sock ";
1802 break; 1822 break;
1823 case PTR_TO_SOCK_COMMON:
1824 pointer_desc = "sock_common ";
1825 break;
1803 default: 1826 default:
1804 break; 1827 break;
1805 } 1828 }
@@ -2003,11 +2026,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
2003 * PTR_TO_PACKET[_META,_END]. In the latter 2026 * PTR_TO_PACKET[_META,_END]. In the latter
2004 * case, we know the offset is zero. 2027 * case, we know the offset is zero.
2005 */ 2028 */
2006 if (reg_type == SCALAR_VALUE) 2029 if (reg_type == SCALAR_VALUE) {
2007 mark_reg_unknown(env, regs, value_regno); 2030 mark_reg_unknown(env, regs, value_regno);
2008 else 2031 } else {
2009 mark_reg_known_zero(env, regs, 2032 mark_reg_known_zero(env, regs,
2010 value_regno); 2033 value_regno);
2034 if (reg_type_may_be_null(reg_type))
2035 regs[value_regno].id = ++env->id_gen;
2036 }
2011 regs[value_regno].type = reg_type; 2037 regs[value_regno].type = reg_type;
2012 } 2038 }
2013 2039
@@ -2053,9 +2079,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
2053 err = check_flow_keys_access(env, off, size); 2079 err = check_flow_keys_access(env, off, size);
2054 if (!err && t == BPF_READ && value_regno >= 0) 2080 if (!err && t == BPF_READ && value_regno >= 0)
2055 mark_reg_unknown(env, regs, value_regno); 2081 mark_reg_unknown(env, regs, value_regno);
2056 } else if (reg->type == PTR_TO_SOCKET) { 2082 } else if (type_is_sk_pointer(reg->type)) {
2057 if (t == BPF_WRITE) { 2083 if (t == BPF_WRITE) {
2058 verbose(env, "cannot write into socket\n"); 2084 verbose(env, "R%d cannot write into %s\n",
2085 regno, reg_type_str[reg->type]);
2059 return -EACCES; 2086 return -EACCES;
2060 } 2087 }
2061 err = check_sock_access(env, insn_idx, regno, off, size, t); 2088 err = check_sock_access(env, insn_idx, regno, off, size, t);
@@ -2102,7 +2129,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
2102 2129
2103 if (is_ctx_reg(env, insn->dst_reg) || 2130 if (is_ctx_reg(env, insn->dst_reg) ||
2104 is_pkt_reg(env, insn->dst_reg) || 2131 is_pkt_reg(env, insn->dst_reg) ||
2105 is_flow_key_reg(env, insn->dst_reg)) { 2132 is_flow_key_reg(env, insn->dst_reg) ||
2133 is_sk_reg(env, insn->dst_reg)) {
2106 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", 2134 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2107 insn->dst_reg, 2135 insn->dst_reg,
2108 reg_type_str[reg_state(env, insn->dst_reg)->type]); 2136 reg_type_str[reg_state(env, insn->dst_reg)->type]);
@@ -2369,6 +2397,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
2369 err = check_ctx_reg(env, reg, regno); 2397 err = check_ctx_reg(env, reg, regno);
2370 if (err < 0) 2398 if (err < 0)
2371 return err; 2399 return err;
2400 } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
2401 expected_type = PTR_TO_SOCK_COMMON;
2402 /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2403 if (!type_is_sk_pointer(type))
2404 goto err_type;
2372 } else if (arg_type == ARG_PTR_TO_SOCKET) { 2405 } else if (arg_type == ARG_PTR_TO_SOCKET) {
2373 expected_type = PTR_TO_SOCKET; 2406 expected_type = PTR_TO_SOCKET;
2374 if (type != expected_type) 2407 if (type != expected_type)
@@ -2783,7 +2816,7 @@ static int release_reference(struct bpf_verifier_env *env,
2783 for (i = 0; i <= vstate->curframe; i++) 2816 for (i = 0; i <= vstate->curframe; i++)
2784 release_reg_references(env, vstate->frame[i], meta->ptr_id); 2817 release_reg_references(env, vstate->frame[i], meta->ptr_id);
2785 2818
2786 return release_reference_state(env, meta->ptr_id); 2819 return release_reference_state(cur_func(env), meta->ptr_id);
2787} 2820}
2788 2821
2789static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 2822static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3049,8 +3082,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
3049 } 3082 }
3050 } else if (is_release_function(func_id)) { 3083 } else if (is_release_function(func_id)) {
3051 err = release_reference(env, &meta); 3084 err = release_reference(env, &meta);
3052 if (err) 3085 if (err) {
3086 verbose(env, "func %s#%d reference has not been acquired before\n",
3087 func_id_name(func_id), func_id);
3053 return err; 3088 return err;
3089 }
3054 } 3090 }
3055 3091
3056 regs = cur_regs(env); 3092 regs = cur_regs(env);
@@ -3099,12 +3135,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
3099 regs[BPF_REG_0].id = ++env->id_gen; 3135 regs[BPF_REG_0].id = ++env->id_gen;
3100 } 3136 }
3101 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { 3137 } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
3102 int id = acquire_reference_state(env, insn_idx);
3103 if (id < 0)
3104 return id;
3105 mark_reg_known_zero(env, regs, BPF_REG_0); 3138 mark_reg_known_zero(env, regs, BPF_REG_0);
3106 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; 3139 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
3107 regs[BPF_REG_0].id = id; 3140 if (is_acquire_function(func_id)) {
3141 int id = acquire_reference_state(env, insn_idx);
3142
3143 if (id < 0)
3144 return id;
3145 /* For release_reference() */
3146 regs[BPF_REG_0].id = id;
3147 } else {
3148 /* For mark_ptr_or_null_reg() */
3149 regs[BPF_REG_0].id = ++env->id_gen;
3150 }
3108 } else { 3151 } else {
3109 verbose(env, "unknown return type %d of func %s#%d\n", 3152 verbose(env, "unknown return type %d of func %s#%d\n",
3110 fn->ret_type, func_id_name(func_id), func_id); 3153 fn->ret_type, func_id_name(func_id), func_id);
@@ -3364,6 +3407,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
3364 case PTR_TO_PACKET_END: 3407 case PTR_TO_PACKET_END:
3365 case PTR_TO_SOCKET: 3408 case PTR_TO_SOCKET:
3366 case PTR_TO_SOCKET_OR_NULL: 3409 case PTR_TO_SOCKET_OR_NULL:
3410 case PTR_TO_SOCK_COMMON:
3411 case PTR_TO_SOCK_COMMON_OR_NULL:
3367 verbose(env, "R%d pointer arithmetic on %s prohibited\n", 3412 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
3368 dst, reg_type_str[ptr_reg->type]); 3413 dst, reg_type_str[ptr_reg->type]);
3369 return -EACCES; 3414 return -EACCES;
@@ -4597,6 +4642,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
4597 } 4642 }
4598 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { 4643 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
4599 reg->type = PTR_TO_SOCKET; 4644 reg->type = PTR_TO_SOCKET;
4645 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
4646 reg->type = PTR_TO_SOCK_COMMON;
4600 } 4647 }
4601 if (is_null || !(reg_is_refcounted(reg) || 4648 if (is_null || !(reg_is_refcounted(reg) ||
4602 reg_may_point_to_spin_lock(reg))) { 4649 reg_may_point_to_spin_lock(reg))) {
@@ -4621,7 +4668,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
4621 int i, j; 4668 int i, j;
4622 4669
4623 if (reg_is_refcounted_or_null(&regs[regno]) && is_null) 4670 if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
4624 __release_reference_state(state, id); 4671 release_reference_state(state, id);
4625 4672
4626 for (i = 0; i < MAX_BPF_REG; i++) 4673 for (i = 0; i < MAX_BPF_REG; i++)
4627 mark_ptr_or_null_reg(state, &regs[i], id, is_null); 4674 mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -5790,6 +5837,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
5790 case PTR_TO_FLOW_KEYS: 5837 case PTR_TO_FLOW_KEYS:
5791 case PTR_TO_SOCKET: 5838 case PTR_TO_SOCKET:
5792 case PTR_TO_SOCKET_OR_NULL: 5839 case PTR_TO_SOCKET_OR_NULL:
5840 case PTR_TO_SOCK_COMMON:
5841 case PTR_TO_SOCK_COMMON_OR_NULL:
5793 /* Only valid matches are exact, which memcmp() above 5842 /* Only valid matches are exact, which memcmp() above
5794 * would have accepted 5843 * would have accepted
5795 */ 5844 */
@@ -6110,6 +6159,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
6110 case PTR_TO_CTX: 6159 case PTR_TO_CTX:
6111 case PTR_TO_SOCKET: 6160 case PTR_TO_SOCKET:
6112 case PTR_TO_SOCKET_OR_NULL: 6161 case PTR_TO_SOCKET_OR_NULL:
6162 case PTR_TO_SOCK_COMMON:
6163 case PTR_TO_SOCK_COMMON_OR_NULL:
6113 return false; 6164 return false;
6114 default: 6165 default:
6115 return true; 6166 return true;
@@ -7112,6 +7163,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
7112 convert_ctx_access = ops->convert_ctx_access; 7163 convert_ctx_access = ops->convert_ctx_access;
7113 break; 7164 break;
7114 case PTR_TO_SOCKET: 7165 case PTR_TO_SOCKET:
7166 case PTR_TO_SOCK_COMMON:
7115 convert_ctx_access = bpf_sock_convert_ctx_access; 7167 convert_ctx_access = bpf_sock_convert_ctx_access;
7116 break; 7168 break;
7117 default: 7169 default: