diff options
-rw-r--r-- | include/linux/bpf_types.h | 3 | ||||
-rw-r--r-- | include/linux/filter.h | 15 | ||||
-rw-r--r-- | include/net/addrconf.h | 1 | ||||
-rw-r--r-- | include/net/sock_reuseport.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 36 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 9 | ||||
-rw-r--r-- | net/core/filter.c | 269 | ||||
-rw-r--r-- | net/core/sock_reuseport.c | 20 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 9 | ||||
-rw-r--r-- | net/ipv4/inet_hashtables.c | 5 | ||||
-rw-r--r-- | net/ipv4/udp.c | 5 |
11 files changed, 365 insertions, 13 deletions
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 14fd6c02d258..cd26c090e7c0 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h | |||
@@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) | |||
29 | #ifdef CONFIG_BPF_LIRC_MODE2 | 29 | #ifdef CONFIG_BPF_LIRC_MODE2 |
30 | BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) | 30 | BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) |
31 | #endif | 31 | #endif |
32 | #ifdef CONFIG_INET | ||
33 | BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) | ||
34 | #endif | ||
32 | 35 | ||
33 | BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) | 36 | BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) |
34 | BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) | 37 | BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) |
diff --git a/include/linux/filter.h b/include/linux/filter.h index 2b072dab32c0..70e9d57677fe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
@@ -32,6 +32,7 @@ struct seccomp_data; | |||
32 | struct bpf_prog_aux; | 32 | struct bpf_prog_aux; |
33 | struct xdp_rxq_info; | 33 | struct xdp_rxq_info; |
34 | struct xdp_buff; | 34 | struct xdp_buff; |
35 | struct sock_reuseport; | ||
35 | 36 | ||
36 | /* ArgX, context and stack frame pointer register positions. Note, | 37 | /* ArgX, context and stack frame pointer register positions. Note, |
37 | * Arg1, Arg2, Arg3, etc are used as argument mappings of function | 38 | * Arg1, Arg2, Arg3, etc are used as argument mappings of function |
@@ -833,6 +834,20 @@ void bpf_warn_invalid_xdp_action(u32 act); | |||
833 | struct sock *do_sk_redirect_map(struct sk_buff *skb); | 834 | struct sock *do_sk_redirect_map(struct sk_buff *skb); |
834 | struct sock *do_msg_redirect_map(struct sk_msg_buff *md); | 835 | struct sock *do_msg_redirect_map(struct sk_msg_buff *md); |
835 | 836 | ||
837 | #ifdef CONFIG_INET | ||
838 | struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, | ||
839 | struct bpf_prog *prog, struct sk_buff *skb, | ||
840 | u32 hash); | ||
841 | #else | ||
842 | static inline struct sock * | ||
843 | bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, | ||
844 | struct bpf_prog *prog, struct sk_buff *skb, | ||
845 | u32 hash) | ||
846 | { | ||
847 | return NULL; | ||
848 | } | ||
849 | #endif | ||
850 | |||
836 | #ifdef CONFIG_BPF_JIT | 851 | #ifdef CONFIG_BPF_JIT |
837 | extern int bpf_jit_enable; | 852 | extern int bpf_jit_enable; |
838 | extern int bpf_jit_harden; | 853 | extern int bpf_jit_harden; |
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 5f43f7a70fe6..6def0351bcc3 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h | |||
@@ -108,6 +108,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, | |||
108 | u32 banned_flags); | 108 | u32 banned_flags); |
109 | bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, | 109 | bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, |
110 | bool match_wildcard); | 110 | bool match_wildcard); |
111 | bool inet_rcv_saddr_any(const struct sock *sk); | ||
111 | void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); | 112 | void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); |
112 | void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); | 113 | void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); |
113 | 114 | ||
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index e1a7681856f7..73b569556be6 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h | |||
@@ -21,12 +21,14 @@ struct sock_reuseport { | |||
21 | unsigned int synq_overflow_ts; | 21 | unsigned int synq_overflow_ts; |
22 | /* ID stays the same even after the size of socks[] grows. */ | 22 | /* ID stays the same even after the size of socks[] grows. */ |
23 | unsigned int reuseport_id; | 23 | unsigned int reuseport_id; |
24 | bool bind_inany; | ||
24 | struct bpf_prog __rcu *prog; /* optional BPF sock selector */ | 25 | struct bpf_prog __rcu *prog; /* optional BPF sock selector */ |
25 | struct sock *socks[0]; /* array of sock pointers */ | 26 | struct sock *socks[0]; /* array of sock pointers */ |
26 | }; | 27 | }; |
27 | 28 | ||
28 | extern int reuseport_alloc(struct sock *sk); | 29 | extern int reuseport_alloc(struct sock *sk, bool bind_inany); |
29 | extern int reuseport_add_sock(struct sock *sk, struct sock *sk2); | 30 | extern int reuseport_add_sock(struct sock *sk, struct sock *sk2, |
31 | bool bind_inany); | ||
30 | extern void reuseport_detach_sock(struct sock *sk); | 32 | extern void reuseport_detach_sock(struct sock *sk); |
31 | extern struct sock *reuseport_select_sock(struct sock *sk, | 33 | extern struct sock *reuseport_select_sock(struct sock *sk, |
32 | u32 hash, | 34 | u32 hash, |
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 40f584bc7da0..3102a2a23c31 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -151,6 +151,7 @@ enum bpf_prog_type { | |||
151 | BPF_PROG_TYPE_CGROUP_SOCK_ADDR, | 151 | BPF_PROG_TYPE_CGROUP_SOCK_ADDR, |
152 | BPF_PROG_TYPE_LWT_SEG6LOCAL, | 152 | BPF_PROG_TYPE_LWT_SEG6LOCAL, |
153 | BPF_PROG_TYPE_LIRC_MODE2, | 153 | BPF_PROG_TYPE_LIRC_MODE2, |
154 | BPF_PROG_TYPE_SK_REUSEPORT, | ||
154 | }; | 155 | }; |
155 | 156 | ||
156 | enum bpf_attach_type { | 157 | enum bpf_attach_type { |
@@ -2114,6 +2115,14 @@ union bpf_attr { | |||
2114 | * the shared data. | 2115 | * the shared data. |
2115 | * Return | 2116 | * Return |
2116 | * Pointer to the local storage area. | 2117 | * Pointer to the local storage area. |
2118 | * | ||
2119 | * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) | ||
2120 | * Description | ||
2121 | * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map | ||
2122 | * It checks the selected sk is matching the incoming | ||
2123 | * request in the skb. | ||
2124 | * Return | ||
2125 | * 0 on success, or a negative error in case of failure. | ||
2117 | */ | 2126 | */ |
2118 | #define __BPF_FUNC_MAPPER(FN) \ | 2127 | #define __BPF_FUNC_MAPPER(FN) \ |
2119 | FN(unspec), \ | 2128 | FN(unspec), \ |
@@ -2197,7 +2206,8 @@ union bpf_attr { | |||
2197 | FN(rc_keydown), \ | 2206 | FN(rc_keydown), \ |
2198 | FN(skb_cgroup_id), \ | 2207 | FN(skb_cgroup_id), \ |
2199 | FN(get_current_cgroup_id), \ | 2208 | FN(get_current_cgroup_id), \ |
2200 | FN(get_local_storage), | 2209 | FN(get_local_storage), \ |
2210 | FN(sk_select_reuseport), | ||
2201 | 2211 | ||
2202 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper | 2212 | /* integer value in 'imm' field of BPF_CALL instruction selects which helper |
2203 | * function eBPF program intends to call | 2213 | * function eBPF program intends to call |
@@ -2414,6 +2424,30 @@ struct sk_msg_md { | |||
2414 | __u32 local_port; /* stored in host byte order */ | 2424 | __u32 local_port; /* stored in host byte order */ |
2415 | }; | 2425 | }; |
2416 | 2426 | ||
2427 | struct sk_reuseport_md { | ||
2428 | /* | ||
2429 | * Start of directly accessible data. It begins from | ||
2430 | * the tcp/udp header. | ||
2431 | */ | ||
2432 | void *data; | ||
2433 | void *data_end; /* End of directly accessible data */ | ||
2434 | /* | ||
2435 | * Total length of packet (starting from the tcp/udp header). | ||
2436 | * Note that the directly accessible bytes (data_end - data) | ||
2437 | * could be less than this "len". Those bytes could be | ||
2438 | * indirectly read by a helper "bpf_skb_load_bytes()". | ||
2439 | */ | ||
2440 | __u32 len; | ||
2441 | /* | ||
2442 | * Eth protocol in the mac header (network byte order). e.g. | ||
2443 | * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD) | ||
2444 | */ | ||
2445 | __u32 eth_protocol; | ||
2446 | __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ | ||
2447 | __u32 bind_inany; /* Is sock bound to an INANY address? */ | ||
2448 | __u32 hash; /* A hash of the packet 4 tuples */ | ||
2449 | }; | ||
2450 | |||
2417 | #define BPF_TAG_SIZE 8 | 2451 | #define BPF_TAG_SIZE 8 |
2418 | 2452 | ||
2419 | struct bpf_prog_info { | 2453 | struct bpf_prog_info { |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 587468a9c37d..ca90679a7fe5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -1310,6 +1310,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, | |||
1310 | case BPF_PROG_TYPE_LWT_IN: | 1310 | case BPF_PROG_TYPE_LWT_IN: |
1311 | case BPF_PROG_TYPE_LWT_OUT: | 1311 | case BPF_PROG_TYPE_LWT_OUT: |
1312 | case BPF_PROG_TYPE_LWT_SEG6LOCAL: | 1312 | case BPF_PROG_TYPE_LWT_SEG6LOCAL: |
1313 | case BPF_PROG_TYPE_SK_REUSEPORT: | ||
1313 | /* dst_input() and dst_output() can't write for now */ | 1314 | /* dst_input() and dst_output() can't write for now */ |
1314 | if (t == BPF_WRITE) | 1315 | if (t == BPF_WRITE) |
1315 | return false; | 1316 | return false; |
@@ -2166,6 +2167,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
2166 | func_id != BPF_FUNC_msg_redirect_hash) | 2167 | func_id != BPF_FUNC_msg_redirect_hash) |
2167 | goto error; | 2168 | goto error; |
2168 | break; | 2169 | break; |
2170 | case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: | ||
2171 | if (func_id != BPF_FUNC_sk_select_reuseport) | ||
2172 | goto error; | ||
2173 | break; | ||
2169 | default: | 2174 | default: |
2170 | break; | 2175 | break; |
2171 | } | 2176 | } |
@@ -2217,6 +2222,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, | |||
2217 | if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) | 2222 | if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) |
2218 | goto error; | 2223 | goto error; |
2219 | break; | 2224 | break; |
2225 | case BPF_FUNC_sk_select_reuseport: | ||
2226 | if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) | ||
2227 | goto error; | ||
2228 | break; | ||
2220 | default: | 2229 | default: |
2221 | break; | 2230 | break; |
2222 | } | 2231 | } |
diff --git a/net/core/filter.c b/net/core/filter.c index 2de7dd9f2a57..142595b4e0d1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -1462,7 +1462,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) | |||
1462 | return -ENOMEM; | 1462 | return -ENOMEM; |
1463 | 1463 | ||
1464 | if (sk_unhashed(sk) && sk->sk_reuseport) { | 1464 | if (sk_unhashed(sk) && sk->sk_reuseport) { |
1465 | err = reuseport_alloc(sk); | 1465 | err = reuseport_alloc(sk, false); |
1466 | if (err) | 1466 | if (err) |
1467 | return err; | 1467 | return err; |
1468 | } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { | 1468 | } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { |
@@ -7013,3 +7013,270 @@ out: | |||
7013 | release_sock(sk); | 7013 | release_sock(sk); |
7014 | return ret; | 7014 | return ret; |
7015 | } | 7015 | } |
7016 | |||
7017 | #ifdef CONFIG_INET | ||
7018 | struct sk_reuseport_kern { | ||
7019 | struct sk_buff *skb; | ||
7020 | struct sock *sk; | ||
7021 | struct sock *selected_sk; | ||
7022 | void *data_end; | ||
7023 | u32 hash; | ||
7024 | u32 reuseport_id; | ||
7025 | bool bind_inany; | ||
7026 | }; | ||
7027 | |||
7028 | static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern, | ||
7029 | struct sock_reuseport *reuse, | ||
7030 | struct sock *sk, struct sk_buff *skb, | ||
7031 | u32 hash) | ||
7032 | { | ||
7033 | reuse_kern->skb = skb; | ||
7034 | reuse_kern->sk = sk; | ||
7035 | reuse_kern->selected_sk = NULL; | ||
7036 | reuse_kern->data_end = skb->data + skb_headlen(skb); | ||
7037 | reuse_kern->hash = hash; | ||
7038 | reuse_kern->reuseport_id = reuse->reuseport_id; | ||
7039 | reuse_kern->bind_inany = reuse->bind_inany; | ||
7040 | } | ||
7041 | |||
7042 | struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, | ||
7043 | struct bpf_prog *prog, struct sk_buff *skb, | ||
7044 | u32 hash) | ||
7045 | { | ||
7046 | struct sk_reuseport_kern reuse_kern; | ||
7047 | enum sk_action action; | ||
7048 | |||
7049 | bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash); | ||
7050 | action = BPF_PROG_RUN(prog, &reuse_kern); | ||
7051 | |||
7052 | if (action == SK_PASS) | ||
7053 | return reuse_kern.selected_sk; | ||
7054 | else | ||
7055 | return ERR_PTR(-ECONNREFUSED); | ||
7056 | } | ||
7057 | |||
7058 | BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, | ||
7059 | struct bpf_map *, map, void *, key, u32, flags) | ||
7060 | { | ||
7061 | struct sock_reuseport *reuse; | ||
7062 | struct sock *selected_sk; | ||
7063 | |||
7064 | selected_sk = map->ops->map_lookup_elem(map, key); | ||
7065 | if (!selected_sk) | ||
7066 | return -ENOENT; | ||
7067 | |||
7068 | reuse = rcu_dereference(selected_sk->sk_reuseport_cb); | ||
7069 | if (!reuse) | ||
7070 | /* selected_sk is unhashed (e.g. by close()) after the | ||
7071 | * above map_lookup_elem(). Treat selected_sk has already | ||
7072 | * been removed from the map. | ||
7073 | */ | ||
7074 | return -ENOENT; | ||
7075 | |||
7076 | if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { | ||
7077 | struct sock *sk; | ||
7078 | |||
7079 | if (unlikely(!reuse_kern->reuseport_id)) | ||
7080 | /* There is a small race between adding the | ||
7081 | * sk to the map and setting the | ||
7082 | * reuse_kern->reuseport_id. | ||
7083 | * Treat it as the sk has not been added to | ||
7084 | * the bpf map yet. | ||
7085 | */ | ||
7086 | return -ENOENT; | ||
7087 | |||
7088 | sk = reuse_kern->sk; | ||
7089 | if (sk->sk_protocol != selected_sk->sk_protocol) | ||
7090 | return -EPROTOTYPE; | ||
7091 | else if (sk->sk_family != selected_sk->sk_family) | ||
7092 | return -EAFNOSUPPORT; | ||
7093 | |||
7094 | /* Catch all. Likely bound to a different sockaddr. */ | ||
7095 | return -EBADFD; | ||
7096 | } | ||
7097 | |||
7098 | reuse_kern->selected_sk = selected_sk; | ||
7099 | |||
7100 | return 0; | ||
7101 | } | ||
7102 | |||
7103 | static const struct bpf_func_proto sk_select_reuseport_proto = { | ||
7104 | .func = sk_select_reuseport, | ||
7105 | .gpl_only = false, | ||
7106 | .ret_type = RET_INTEGER, | ||
7107 | .arg1_type = ARG_PTR_TO_CTX, | ||
7108 | .arg2_type = ARG_CONST_MAP_PTR, | ||
7109 | .arg3_type = ARG_PTR_TO_MAP_KEY, | ||
7110 | .arg4_type = ARG_ANYTHING, | ||
7111 | }; | ||
7112 | |||
7113 | BPF_CALL_4(sk_reuseport_load_bytes, | ||
7114 | const struct sk_reuseport_kern *, reuse_kern, u32, offset, | ||
7115 | void *, to, u32, len) | ||
7116 | { | ||
7117 | return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len); | ||
7118 | } | ||
7119 | |||
7120 | static const struct bpf_func_proto sk_reuseport_load_bytes_proto = { | ||
7121 | .func = sk_reuseport_load_bytes, | ||
7122 | .gpl_only = false, | ||
7123 | .ret_type = RET_INTEGER, | ||
7124 | .arg1_type = ARG_PTR_TO_CTX, | ||
7125 | .arg2_type = ARG_ANYTHING, | ||
7126 | .arg3_type = ARG_PTR_TO_UNINIT_MEM, | ||
7127 | .arg4_type = ARG_CONST_SIZE, | ||
7128 | }; | ||
7129 | |||
7130 | BPF_CALL_5(sk_reuseport_load_bytes_relative, | ||
7131 | const struct sk_reuseport_kern *, reuse_kern, u32, offset, | ||
7132 | void *, to, u32, len, u32, start_header) | ||
7133 | { | ||
7134 | return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to, | ||
7135 | len, start_header); | ||
7136 | } | ||
7137 | |||
7138 | static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = { | ||
7139 | .func = sk_reuseport_load_bytes_relative, | ||
7140 | .gpl_only = false, | ||
7141 | .ret_type = RET_INTEGER, | ||
7142 | .arg1_type = ARG_PTR_TO_CTX, | ||
7143 | .arg2_type = ARG_ANYTHING, | ||
7144 | .arg3_type = ARG_PTR_TO_UNINIT_MEM, | ||
7145 | .arg4_type = ARG_CONST_SIZE, | ||
7146 | .arg5_type = ARG_ANYTHING, | ||
7147 | }; | ||
7148 | |||
7149 | static const struct bpf_func_proto * | ||
7150 | sk_reuseport_func_proto(enum bpf_func_id func_id, | ||
7151 | const struct bpf_prog *prog) | ||
7152 | { | ||
7153 | switch (func_id) { | ||
7154 | case BPF_FUNC_sk_select_reuseport: | ||
7155 | return &sk_select_reuseport_proto; | ||
7156 | case BPF_FUNC_skb_load_bytes: | ||
7157 | return &sk_reuseport_load_bytes_proto; | ||
7158 | case BPF_FUNC_skb_load_bytes_relative: | ||
7159 | return &sk_reuseport_load_bytes_relative_proto; | ||
7160 | default: | ||
7161 | return bpf_base_func_proto(func_id); | ||
7162 | } | ||
7163 | } | ||
7164 | |||
7165 | static bool | ||
7166 | sk_reuseport_is_valid_access(int off, int size, | ||
7167 | enum bpf_access_type type, | ||
7168 | const struct bpf_prog *prog, | ||
7169 | struct bpf_insn_access_aux *info) | ||
7170 | { | ||
7171 | const u32 size_default = sizeof(__u32); | ||
7172 | |||
7173 | if (off < 0 || off >= sizeof(struct sk_reuseport_md) || | ||
7174 | off % size || type != BPF_READ) | ||
7175 | return false; | ||
7176 | |||
7177 | switch (off) { | ||
7178 | case offsetof(struct sk_reuseport_md, data): | ||
7179 | info->reg_type = PTR_TO_PACKET; | ||
7180 | return size == sizeof(__u64); | ||
7181 | |||
7182 | case offsetof(struct sk_reuseport_md, data_end): | ||
7183 | info->reg_type = PTR_TO_PACKET_END; | ||
7184 | return size == sizeof(__u64); | ||
7185 | |||
7186 | case offsetof(struct sk_reuseport_md, hash): | ||
7187 | return size == size_default; | ||
7188 | |||
7189 | /* Fields that allow narrowing */ | ||
7190 | case offsetof(struct sk_reuseport_md, eth_protocol): | ||
7191 | if (size < FIELD_SIZEOF(struct sk_buff, protocol)) | ||
7192 | return false; | ||
7193 | case offsetof(struct sk_reuseport_md, ip_protocol): | ||
7194 | case offsetof(struct sk_reuseport_md, bind_inany): | ||
7195 | case offsetof(struct sk_reuseport_md, len): | ||
7196 | bpf_ctx_record_field_size(info, size_default); | ||
7197 | return bpf_ctx_narrow_access_ok(off, size, size_default); | ||
7198 | |||
7199 | default: | ||
7200 | return false; | ||
7201 | } | ||
7202 | } | ||
7203 | |||
7204 | #define SK_REUSEPORT_LOAD_FIELD(F) ({ \ | ||
7205 | *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \ | ||
7206 | si->dst_reg, si->src_reg, \ | ||
7207 | bpf_target_off(struct sk_reuseport_kern, F, \ | ||
7208 | FIELD_SIZEOF(struct sk_reuseport_kern, F), \ | ||
7209 | target_size)); \ | ||
7210 | }) | ||
7211 | |||
7212 | #define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \ | ||
7213 | SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \ | ||
7214 | struct sk_buff, \ | ||
7215 | skb, \ | ||
7216 | SKB_FIELD) | ||
7217 | |||
7218 | #define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \ | ||
7219 | SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \ | ||
7220 | struct sock, \ | ||
7221 | sk, \ | ||
7222 | SK_FIELD, BPF_SIZE, EXTRA_OFF) | ||
7223 | |||
7224 | static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, | ||
7225 | const struct bpf_insn *si, | ||
7226 | struct bpf_insn *insn_buf, | ||
7227 | struct bpf_prog *prog, | ||
7228 | u32 *target_size) | ||
7229 | { | ||
7230 | struct bpf_insn *insn = insn_buf; | ||
7231 | |||
7232 | switch (si->off) { | ||
7233 | case offsetof(struct sk_reuseport_md, data): | ||
7234 | SK_REUSEPORT_LOAD_SKB_FIELD(data); | ||
7235 | break; | ||
7236 | |||
7237 | case offsetof(struct sk_reuseport_md, len): | ||
7238 | SK_REUSEPORT_LOAD_SKB_FIELD(len); | ||
7239 | break; | ||
7240 | |||
7241 | case offsetof(struct sk_reuseport_md, eth_protocol): | ||
7242 | SK_REUSEPORT_LOAD_SKB_FIELD(protocol); | ||
7243 | break; | ||
7244 | |||
7245 | case offsetof(struct sk_reuseport_md, ip_protocol): | ||
7246 | BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE); | ||
7247 | SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, | ||
7248 | BPF_W, 0); | ||
7249 | *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); | ||
7250 | *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, | ||
7251 | SK_FL_PROTO_SHIFT); | ||
7252 | /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian | ||
7253 | * aware. No further narrowing or masking is needed. | ||
7254 | */ | ||
7255 | *target_size = 1; | ||
7256 | break; | ||
7257 | |||
7258 | case offsetof(struct sk_reuseport_md, data_end): | ||
7259 | SK_REUSEPORT_LOAD_FIELD(data_end); | ||
7260 | break; | ||
7261 | |||
7262 | case offsetof(struct sk_reuseport_md, hash): | ||
7263 | SK_REUSEPORT_LOAD_FIELD(hash); | ||
7264 | break; | ||
7265 | |||
7266 | case offsetof(struct sk_reuseport_md, bind_inany): | ||
7267 | SK_REUSEPORT_LOAD_FIELD(bind_inany); | ||
7268 | break; | ||
7269 | } | ||
7270 | |||
7271 | return insn - insn_buf; | ||
7272 | } | ||
7273 | |||
7274 | const struct bpf_verifier_ops sk_reuseport_verifier_ops = { | ||
7275 | .get_func_proto = sk_reuseport_func_proto, | ||
7276 | .is_valid_access = sk_reuseport_is_valid_access, | ||
7277 | .convert_ctx_access = sk_reuseport_convert_ctx_access, | ||
7278 | }; | ||
7279 | |||
7280 | const struct bpf_prog_ops sk_reuseport_prog_ops = { | ||
7281 | }; | ||
7282 | #endif /* CONFIG_INET */ | ||
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 8235f2439816..d260167f5f77 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c | |||
@@ -51,7 +51,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks) | |||
51 | return reuse; | 51 | return reuse; |
52 | } | 52 | } |
53 | 53 | ||
54 | int reuseport_alloc(struct sock *sk) | 54 | int reuseport_alloc(struct sock *sk, bool bind_inany) |
55 | { | 55 | { |
56 | struct sock_reuseport *reuse; | 56 | struct sock_reuseport *reuse; |
57 | 57 | ||
@@ -63,9 +63,17 @@ int reuseport_alloc(struct sock *sk) | |||
63 | /* Allocation attempts can occur concurrently via the setsockopt path | 63 | /* Allocation attempts can occur concurrently via the setsockopt path |
64 | * and the bind/hash path. Nothing to do when we lose the race. | 64 | * and the bind/hash path. Nothing to do when we lose the race. |
65 | */ | 65 | */ |
66 | if (rcu_dereference_protected(sk->sk_reuseport_cb, | 66 | reuse = rcu_dereference_protected(sk->sk_reuseport_cb, |
67 | lockdep_is_held(&reuseport_lock))) | 67 | lockdep_is_held(&reuseport_lock)); |
68 | if (reuse) { | ||
69 | /* Only set reuse->bind_inany if the bind_inany is true. | ||
70 | * Otherwise, it will overwrite the reuse->bind_inany | ||
71 | * which was set by the bind/hash path. | ||
72 | */ | ||
73 | if (bind_inany) | ||
74 | reuse->bind_inany = bind_inany; | ||
68 | goto out; | 75 | goto out; |
76 | } | ||
69 | 77 | ||
70 | reuse = __reuseport_alloc(INIT_SOCKS); | 78 | reuse = __reuseport_alloc(INIT_SOCKS); |
71 | if (!reuse) { | 79 | if (!reuse) { |
@@ -75,6 +83,7 @@ int reuseport_alloc(struct sock *sk) | |||
75 | 83 | ||
76 | reuse->socks[0] = sk; | 84 | reuse->socks[0] = sk; |
77 | reuse->num_socks = 1; | 85 | reuse->num_socks = 1; |
86 | reuse->bind_inany = bind_inany; | ||
78 | rcu_assign_pointer(sk->sk_reuseport_cb, reuse); | 87 | rcu_assign_pointer(sk->sk_reuseport_cb, reuse); |
79 | 88 | ||
80 | out: | 89 | out: |
@@ -101,6 +110,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) | |||
101 | more_reuse->num_socks = reuse->num_socks; | 110 | more_reuse->num_socks = reuse->num_socks; |
102 | more_reuse->prog = reuse->prog; | 111 | more_reuse->prog = reuse->prog; |
103 | more_reuse->reuseport_id = reuse->reuseport_id; | 112 | more_reuse->reuseport_id = reuse->reuseport_id; |
113 | more_reuse->bind_inany = reuse->bind_inany; | ||
104 | 114 | ||
105 | memcpy(more_reuse->socks, reuse->socks, | 115 | memcpy(more_reuse->socks, reuse->socks, |
106 | reuse->num_socks * sizeof(struct sock *)); | 116 | reuse->num_socks * sizeof(struct sock *)); |
@@ -136,12 +146,12 @@ static void reuseport_free_rcu(struct rcu_head *head) | |||
136 | * @sk2: Socket belonging to the existing reuseport group. | 146 | * @sk2: Socket belonging to the existing reuseport group. |
137 | * May return ENOMEM and not add socket to group under memory pressure. | 147 | * May return ENOMEM and not add socket to group under memory pressure. |
138 | */ | 148 | */ |
139 | int reuseport_add_sock(struct sock *sk, struct sock *sk2) | 149 | int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) |
140 | { | 150 | { |
141 | struct sock_reuseport *old_reuse, *reuse; | 151 | struct sock_reuseport *old_reuse, *reuse; |
142 | 152 | ||
143 | if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { | 153 | if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { |
144 | int err = reuseport_alloc(sk2); | 154 | int err = reuseport_alloc(sk2, bind_inany); |
145 | 155 | ||
146 | if (err) | 156 | if (err) |
147 | return err; | 157 | return err; |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 33a88e045efd..dfd5009f96ef 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -107,6 +107,15 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, | |||
107 | } | 107 | } |
108 | EXPORT_SYMBOL(inet_rcv_saddr_equal); | 108 | EXPORT_SYMBOL(inet_rcv_saddr_equal); |
109 | 109 | ||
110 | bool inet_rcv_saddr_any(const struct sock *sk) | ||
111 | { | ||
112 | #if IS_ENABLED(CONFIG_IPV6) | ||
113 | if (sk->sk_family == AF_INET6) | ||
114 | return ipv6_addr_any(&sk->sk_v6_rcv_saddr); | ||
115 | #endif | ||
116 | return !sk->sk_rcv_saddr; | ||
117 | } | ||
118 | |||
110 | void inet_get_local_port_range(struct net *net, int *low, int *high) | 119 | void inet_get_local_port_range(struct net *net, int *low, int *high) |
111 | { | 120 | { |
112 | unsigned int seq; | 121 | unsigned int seq; |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3647167c8fa3..370e24463fb7 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -567,10 +567,11 @@ static int inet_reuseport_add_sock(struct sock *sk, | |||
567 | inet_csk(sk2)->icsk_bind_hash == tb && | 567 | inet_csk(sk2)->icsk_bind_hash == tb && |
568 | sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && | 568 | sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && |
569 | inet_rcv_saddr_equal(sk, sk2, false)) | 569 | inet_rcv_saddr_equal(sk, sk2, false)) |
570 | return reuseport_add_sock(sk, sk2); | 570 | return reuseport_add_sock(sk, sk2, |
571 | inet_rcv_saddr_any(sk)); | ||
571 | } | 572 | } |
572 | 573 | ||
573 | return reuseport_alloc(sk); | 574 | return reuseport_alloc(sk, inet_rcv_saddr_any(sk)); |
574 | } | 575 | } |
575 | 576 | ||
576 | int __inet_hash(struct sock *sk, struct sock *osk) | 577 | int __inet_hash(struct sock *sk, struct sock *osk) |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 060e841dde40..038dd7909051 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -221,11 +221,12 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) | |||
221 | (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && | 221 | (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && |
222 | sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && | 222 | sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && |
223 | inet_rcv_saddr_equal(sk, sk2, false)) { | 223 | inet_rcv_saddr_equal(sk, sk2, false)) { |
224 | return reuseport_add_sock(sk, sk2); | 224 | return reuseport_add_sock(sk, sk2, |
225 | inet_rcv_saddr_any(sk)); | ||
225 | } | 226 | } |
226 | } | 227 | } |
227 | 228 | ||
228 | return reuseport_alloc(sk); | 229 | return reuseport_alloc(sk, inet_rcv_saddr_any(sk)); |
229 | } | 230 | } |
230 | 231 | ||
231 | /** | 232 | /** |