aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLawrence Brakmo <brakmo@fb.com>2017-06-30 23:02:40 -0400
committerDavid S. Miller <davem@davemloft.net>2017-07-01 19:15:13 -0400
commit40304b2a1567fecc321f640ee4239556dd0f3ee0 (patch)
tree093568073bae656d93f5b878ffcbb6cefbb3853e
parent57a53a0b6788e1e3e660987e3771837efa90d980 (diff)
bpf: BPF support for sock_ops
Created a new BPF program type, BPF_PROG_TYPE_SOCK_OPS, and a corresponding struct that allows BPF programs of this type to access some of the socket's fields (such as IP addresses, ports, etc.). It uses the existing bpf cgroups infrastructure so the programs can be attached per cgroup with full inheritance support. The program will be called at appropriate times to set relevant connections parameters such as buffer sizes, SYN and SYN-ACK RTOs, etc., based on connection information such as IP addresses, port numbers, etc. Alghough there are already 3 mechanisms to set parameters (sysctls, route metrics and setsockopts), this new mechanism provides some distinct advantages. Unlike sysctls, it can set parameters per connection. In contrast to route metrics, it can also use port numbers and information provided by a user level program. In addition, it could set parameters probabilistically for evaluation purposes (i.e. do something different on 10% of the flows and compare results with the other 90% of the flows). Also, in cases where IPv6 addresses contain geographic information, the rules to make changes based on the distance (or RTT) between the hosts are much easier than route metric rules and can be global. Finally, unlike setsockopt, it oes not require application changes and it can be updated easily at any time. Although the bpf cgroup framework already contains a sock related program type (BPF_PROG_TYPE_CGROUP_SOCK), I created the new type (BPF_PROG_TYPE_SOCK_OPS) beccause the existing type expects to be called only once during the connections's lifetime. In contrast, the new program type will be called multiple times from different places in the network stack code. For example, before sending SYN and SYN-ACKs to set an appropriate timeout, when the connection is established to set congestion control, etc. As a result it has "op" field to specify the type of operation requested. The purpose of this new program type is to simplify setting connection parameters, such as buffer sizes, TCP's SYN RTO, etc. For example, it is easy to use facebook's internal IPv6 addresses to determine if both hosts of a connection are in the same datacenter. Therefore, it is easy to write a BPF program to choose a small SYN RTO value when both hosts are in the same datacenter. This patch only contains the framework to support the new BPF program type, following patches add the functionality to set various connection parameters. This patch defines a new BPF program type: BPF_PROG_TYPE_SOCKET_OPS and a new bpf syscall command to load a new program of this type: BPF_PROG_LOAD_SOCKET_OPS. Two new corresponding structs (one for the kernel one for the user/BPF program): /* kernel version */ struct bpf_sock_ops_kern { struct sock *sk; __u32 op; union { __u32 reply; __u32 replylong[4]; }; }; /* user version * Some fields are in network byte order reflecting the sock struct * Use the bpf_ntohl helper macro in samples/bpf/bpf_endian.h to * convert them to host byte order. */ struct bpf_sock_ops { __u32 op; union { __u32 reply; __u32 replylong[4]; }; __u32 family; __u32 remote_ip4; /* In network byte order */ __u32 local_ip4; /* In network byte order */ __u32 remote_ip6[4]; /* In network byte order */ __u32 local_ip6[4]; /* In network byte order */ __u32 remote_port; /* In network byte order */ __u32 local_port; /* In host byte horder */ }; Currently there are two types of ops. The first type expects the BPF program to return a value which is then used by the caller (or a negative value to indicate the operation is not supported). The second type expects state changes to be done by the BPF program, for example through a setsockopt BPF helper function, and they ignore the return value. The reply fields of the bpf_sockt_ops struct are there in case a bpf program needs to return a value larger than an integer. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bpf-cgroup.h18
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/filter.h9
-rw-r--r--include/net/tcp.h36
-rw-r--r--include/uapi/linux/bpf.h30
-rw-r--r--kernel/bpf/cgroup.c37
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--net/core/filter.c168
-rw-r--r--samples/bpf/bpf_load.c13
9 files changed, 314 insertions, 3 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c970a25d2a49..360c082e885c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -7,6 +7,7 @@
7struct sock; 7struct sock;
8struct cgroup; 8struct cgroup;
9struct sk_buff; 9struct sk_buff;
10struct bpf_sock_ops_kern;
10 11
11#ifdef CONFIG_CGROUP_BPF 12#ifdef CONFIG_CGROUP_BPF
12 13
@@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
42int __cgroup_bpf_run_filter_sk(struct sock *sk, 43int __cgroup_bpf_run_filter_sk(struct sock *sk,
43 enum bpf_attach_type type); 44 enum bpf_attach_type type);
44 45
46int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
47 struct bpf_sock_ops_kern *sock_ops,
48 enum bpf_attach_type type);
49
45/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ 50/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
46#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ 51#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
47({ \ 52({ \
@@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
75 __ret; \ 80 __ret; \
76}) 81})
77 82
83#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
84({ \
85 int __ret = 0; \
86 if (cgroup_bpf_enabled && (sock_ops)->sk) { \
87 typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
88 if (sk_fullsock(__sk)) \
89 __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
90 sock_ops, \
91 BPF_CGROUP_SOCK_OPS); \
92 } \
93 __ret; \
94})
78#else 95#else
79 96
80struct cgroup_bpf {}; 97struct cgroup_bpf {};
@@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
85#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) 102#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
86#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) 103#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
87#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) 104#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
105#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
88 106
89#endif /* CONFIG_CGROUP_BPF */ 107#endif /* CONFIG_CGROUP_BPF */
90 108
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 03bf223f18be..3d137c33d664 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
10BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) 10BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
11BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) 11BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
12BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) 12BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
13BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
13#endif 14#endif
14#ifdef CONFIG_BPF_EVENTS 15#ifdef CONFIG_BPF_EVENTS
15BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) 16BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1fa26dc562ce..738f8b14f025 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -898,4 +898,13 @@ static inline int bpf_tell_extensions(void)
898 return SKF_AD_MAX; 898 return SKF_AD_MAX;
899} 899}
900 900
901struct bpf_sock_ops_kern {
902 struct sock *sk;
903 u32 op;
904 union {
905 u32 reply;
906 u32 replylong[4];
907 };
908};
909
901#endif /* __LINUX_FILTER_H__ */ 910#endif /* __LINUX_FILTER_H__ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d0751b79d99c..e58500825006 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
46#include <linux/seq_file.h> 46#include <linux/seq_file.h>
47#include <linux/memcontrol.h> 47#include <linux/memcontrol.h>
48 48
49#include <linux/bpf.h>
50#include <linux/filter.h>
51#include <linux/bpf-cgroup.h>
52
49extern struct inet_hashinfo tcp_hashinfo; 53extern struct inet_hashinfo tcp_hashinfo;
50 54
51extern struct percpu_counter tcp_orphan_count; 55extern struct percpu_counter tcp_orphan_count;
@@ -2021,4 +2025,36 @@ int tcp_set_ulp(struct sock *sk, const char *name);
2021void tcp_get_available_ulp(char *buf, size_t len); 2025void tcp_get_available_ulp(char *buf, size_t len);
2022void tcp_cleanup_ulp(struct sock *sk); 2026void tcp_cleanup_ulp(struct sock *sk);
2023 2027
2028/* Call BPF_SOCK_OPS program that returns an int. If the return value
2029 * is < 0, then the BPF op failed (for example if the loaded BPF
2030 * program does not support the chosen operation or there is no BPF
2031 * program loaded).
2032 */
2033#ifdef CONFIG_BPF
2034static inline int tcp_call_bpf(struct sock *sk, int op)
2035{
2036 struct bpf_sock_ops_kern sock_ops;
2037 int ret;
2038
2039 if (sk_fullsock(sk))
2040 sock_owned_by_me(sk);
2041
2042 memset(&sock_ops, 0, sizeof(sock_ops));
2043 sock_ops.sk = sk;
2044 sock_ops.op = op;
2045
2046 ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
2047 if (ret == 0)
2048 ret = sock_ops.reply;
2049 else
2050 ret = -1;
2051 return ret;
2052}
2053#else
2054static inline int tcp_call_bpf(struct sock *sk, int op)
2055{
2056 return -EPERM;
2057}
2058#endif
2059
2024#endif /* _TCP_H */ 2060#endif /* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f94b48b168dc..01cd485ccd4f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
120 BPF_PROG_TYPE_LWT_IN, 120 BPF_PROG_TYPE_LWT_IN,
121 BPF_PROG_TYPE_LWT_OUT, 121 BPF_PROG_TYPE_LWT_OUT,
122 BPF_PROG_TYPE_LWT_XMIT, 122 BPF_PROG_TYPE_LWT_XMIT,
123 BPF_PROG_TYPE_SOCK_OPS,
123}; 124};
124 125
125enum bpf_attach_type { 126enum bpf_attach_type {
126 BPF_CGROUP_INET_INGRESS, 127 BPF_CGROUP_INET_INGRESS,
127 BPF_CGROUP_INET_EGRESS, 128 BPF_CGROUP_INET_EGRESS,
128 BPF_CGROUP_INET_SOCK_CREATE, 129 BPF_CGROUP_INET_SOCK_CREATE,
130 BPF_CGROUP_SOCK_OPS,
129 __MAX_BPF_ATTACH_TYPE 131 __MAX_BPF_ATTACH_TYPE
130}; 132};
131 133
@@ -720,4 +722,32 @@ struct bpf_map_info {
720 __u32 map_flags; 722 __u32 map_flags;
721} __attribute__((aligned(8))); 723} __attribute__((aligned(8)));
722 724
725/* User bpf_sock_ops struct to access socket values and specify request ops
726 * and their replies.
727 * Some of this fields are in network (bigendian) byte order and may need
728 * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
729 * New fields can only be added at the end of this structure
730 */
731struct bpf_sock_ops {
732 __u32 op;
733 union {
734 __u32 reply;
735 __u32 replylong[4];
736 };
737 __u32 family;
738 __u32 remote_ip4; /* Stored in network byte order */
739 __u32 local_ip4; /* Stored in network byte order */
740 __u32 remote_ip6[4]; /* Stored in network byte order */
741 __u32 local_ip6[4]; /* Stored in network byte order */
742 __u32 remote_port; /* Stored in network byte order */
743 __u32 local_port; /* stored in host byte order */
744};
745
746/* List of known BPF sock_ops operators.
747 * New entries can only be added at the end
748 */
749enum {
750 BPF_SOCK_OPS_VOID,
751};
752
723#endif /* _UAPI__LINUX_BPF_H__ */ 753#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ea6033cba947..546113430049 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
236 return ret; 236 return ret;
237} 237}
238EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 238EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
239
240/**
241 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
242 * @sk: socket to get cgroup from
243 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
244 * sk with connection information (IP addresses, etc.) May not contain
245 * cgroup info if it is a req sock.
246 * @type: The type of program to be exectuted
247 *
248 * socket passed is expected to be of type INET or INET6.
249 *
250 * The program type passed in via @type must be suitable for sock_ops
251 * filtering. No further check is performed to assert that.
252 *
253 * This function will return %-EPERM if any if an attached program was found
254 * and if it returned != 1 during execution. In all other cases, 0 is returned.
255 */
256int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
257 struct bpf_sock_ops_kern *sock_ops,
258 enum bpf_attach_type type)
259{
260 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
261 struct bpf_prog *prog;
262 int ret = 0;
263
264
265 rcu_read_lock();
266
267 prog = rcu_dereference(cgrp->bpf.effective[type]);
268 if (prog)
269 ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
270
271 rcu_read_unlock();
272
273 return ret;
274}
275EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4409ccca8831..d4d47de75bba 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1079,6 +1079,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1079 case BPF_CGROUP_INET_SOCK_CREATE: 1079 case BPF_CGROUP_INET_SOCK_CREATE:
1080 ptype = BPF_PROG_TYPE_CGROUP_SOCK; 1080 ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1081 break; 1081 break;
1082 case BPF_CGROUP_SOCK_OPS:
1083 ptype = BPF_PROG_TYPE_SOCK_OPS;
1084 break;
1082 default: 1085 default:
1083 return -EINVAL; 1086 return -EINVAL;
1084 } 1087 }
@@ -1119,6 +1122,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1119 case BPF_CGROUP_INET_INGRESS: 1122 case BPF_CGROUP_INET_INGRESS:
1120 case BPF_CGROUP_INET_EGRESS: 1123 case BPF_CGROUP_INET_EGRESS:
1121 case BPF_CGROUP_INET_SOCK_CREATE: 1124 case BPF_CGROUP_INET_SOCK_CREATE:
1125 case BPF_CGROUP_SOCK_OPS:
1122 cgrp = cgroup_get_from_fd(attr->target_fd); 1126 cgrp = cgroup_get_from_fd(attr->target_fd);
1123 if (IS_ERR(cgrp)) 1127 if (IS_ERR(cgrp))
1124 return PTR_ERR(cgrp); 1128 return PTR_ERR(cgrp);
@@ -1133,6 +1137,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1133 1137
1134 return ret; 1138 return ret;
1135} 1139}
1140
1136#endif /* CONFIG_CGROUP_BPF */ 1141#endif /* CONFIG_CGROUP_BPF */
1137 1142
1138#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1143#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
diff --git a/net/core/filter.c b/net/core/filter.c
index b39c869d22e3..1f6a26c4f8b9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3110,6 +3110,36 @@ void bpf_warn_invalid_xdp_action(u32 act)
3110} 3110}
3111EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); 3111EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
3112 3112
3113static bool __is_valid_sock_ops_access(int off, int size)
3114{
3115 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
3116 return false;
3117 /* The verifier guarantees that size > 0. */
3118 if (off % size != 0)
3119 return false;
3120 if (size != sizeof(__u32))
3121 return false;
3122
3123 return true;
3124}
3125
3126static bool sock_ops_is_valid_access(int off, int size,
3127 enum bpf_access_type type,
3128 struct bpf_insn_access_aux *info)
3129{
3130 if (type == BPF_WRITE) {
3131 switch (off) {
3132 case offsetof(struct bpf_sock_ops, op) ...
3133 offsetof(struct bpf_sock_ops, replylong[3]):
3134 break;
3135 default:
3136 return false;
3137 }
3138 }
3139
3140 return __is_valid_sock_ops_access(off, size);
3141}
3142
3113static u32 bpf_convert_ctx_access(enum bpf_access_type type, 3143static u32 bpf_convert_ctx_access(enum bpf_access_type type,
3114 const struct bpf_insn *si, 3144 const struct bpf_insn *si,
3115 struct bpf_insn *insn_buf, 3145 struct bpf_insn *insn_buf,
@@ -3379,6 +3409,138 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
3379 return insn - insn_buf; 3409 return insn - insn_buf;
3380} 3410}
3381 3411
3412static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
3413 const struct bpf_insn *si,
3414 struct bpf_insn *insn_buf,
3415 struct bpf_prog *prog)
3416{
3417 struct bpf_insn *insn = insn_buf;
3418 int off;
3419
3420 switch (si->off) {
3421 case offsetof(struct bpf_sock_ops, op) ...
3422 offsetof(struct bpf_sock_ops, replylong[3]):
3423 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
3424 FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
3425 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
3426 FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
3427 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
3428 FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
3429 off = si->off;
3430 off -= offsetof(struct bpf_sock_ops, op);
3431 off += offsetof(struct bpf_sock_ops_kern, op);
3432 if (type == BPF_WRITE)
3433 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
3434 off);
3435 else
3436 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
3437 off);
3438 break;
3439
3440 case offsetof(struct bpf_sock_ops, family):
3441 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
3442
3443 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
3444 struct bpf_sock_ops_kern, sk),
3445 si->dst_reg, si->src_reg,
3446 offsetof(struct bpf_sock_ops_kern, sk));
3447 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
3448 offsetof(struct sock_common, skc_family));
3449 break;
3450
3451 case offsetof(struct bpf_sock_ops, remote_ip4):
3452 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
3453
3454 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
3455 struct bpf_sock_ops_kern, sk),
3456 si->dst_reg, si->src_reg,
3457 offsetof(struct bpf_sock_ops_kern, sk));
3458 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
3459 offsetof(struct sock_common, skc_daddr));
3460 break;
3461
3462 case offsetof(struct bpf_sock_ops, local_ip4):
3463 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
3464
3465 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
3466 struct bpf_sock_ops_kern, sk),
3467 si->dst_reg, si->src_reg,
3468 offsetof(struct bpf_sock_ops_kern, sk));
3469 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
3470 offsetof(struct sock_common,
3471 skc_rcv_saddr));
3472 break;
3473
3474 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
3475 offsetof(struct bpf_sock_ops, remote_ip6[3]):
3476#if IS_ENABLED(CONFIG_IPV6)
3477 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
3478 skc_v6_daddr.s6_addr32[0]) != 4);
3479
3480 off = si->off;
3481 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
3482 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
3483 struct bpf_sock_ops_kern, sk),
3484 si->dst_reg, si->src_reg,
3485 offsetof(struct bpf_sock_ops_kern, sk));
3486 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
3487 offsetof(struct sock_common,
3488 skc_v6_daddr.s6_addr32[0]) +
3489 off);
3490#else
3491 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
3492#endif
3493 break;
3494
3495 case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
3496 offsetof(struct bpf_sock_ops, local_ip6[3]):
3497#if IS_ENABLED(CONFIG_IPV6)
3498 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
3499 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
3500
3501 off = si->off;
3502 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
3503 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
3504 struct bpf_sock_ops_kern, sk),
3505 si->dst_reg, si->src_reg,
3506 offsetof(struct bpf_sock_ops_kern, sk));
3507 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
3508 offsetof(struct sock_common,
3509 skc_v6_rcv_saddr.s6_addr32[0]) +
3510 off);
3511#else
3512 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
3513#endif
3514 break;
3515
3516 case offsetof(struct bpf_sock_ops, remote_port):
3517 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
3518
3519 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
3520 struct bpf_sock_ops_kern, sk),
3521 si->dst_reg, si->src_reg,
3522 offsetof(struct bpf_sock_ops_kern, sk));
3523 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
3524 offsetof(struct sock_common, skc_dport));
3525#ifndef __BIG_ENDIAN_BITFIELD
3526 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
3527#endif
3528 break;
3529
3530 case offsetof(struct bpf_sock_ops, local_port):
3531 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
3532
3533 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
3534 struct bpf_sock_ops_kern, sk),
3535 si->dst_reg, si->src_reg,
3536 offsetof(struct bpf_sock_ops_kern, sk));
3537 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
3538 offsetof(struct sock_common, skc_num));
3539 break;
3540 }
3541 return insn - insn_buf;
3542}
3543
3382const struct bpf_verifier_ops sk_filter_prog_ops = { 3544const struct bpf_verifier_ops sk_filter_prog_ops = {
3383 .get_func_proto = sk_filter_func_proto, 3545 .get_func_proto = sk_filter_func_proto,
3384 .is_valid_access = sk_filter_is_valid_access, 3546 .is_valid_access = sk_filter_is_valid_access,
@@ -3428,6 +3590,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = {
3428 .convert_ctx_access = sock_filter_convert_ctx_access, 3590 .convert_ctx_access = sock_filter_convert_ctx_access,
3429}; 3591};
3430 3592
3593const struct bpf_verifier_ops sock_ops_prog_ops = {
3594 .get_func_proto = bpf_base_func_proto,
3595 .is_valid_access = sock_ops_is_valid_access,
3596 .convert_ctx_access = sock_ops_convert_ctx_access,
3597};
3598
3431int sk_detach_filter(struct sock *sk) 3599int sk_detach_filter(struct sock *sk)
3432{ 3600{
3433 int ret = -ENOENT; 3601 int ret = -ENOENT;
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index a91c57dd8571..a4be7cfa6519 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
64 bool is_perf_event = strncmp(event, "perf_event", 10) == 0; 64 bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
65 bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; 65 bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
66 bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; 66 bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
67 bool is_sockops = strncmp(event, "sockops", 7) == 0;
67 size_t insns_cnt = size / sizeof(struct bpf_insn); 68 size_t insns_cnt = size / sizeof(struct bpf_insn);
68 enum bpf_prog_type prog_type; 69 enum bpf_prog_type prog_type;
69 char buf[256]; 70 char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
89 prog_type = BPF_PROG_TYPE_CGROUP_SKB; 90 prog_type = BPF_PROG_TYPE_CGROUP_SKB;
90 } else if (is_cgroup_sk) { 91 } else if (is_cgroup_sk) {
91 prog_type = BPF_PROG_TYPE_CGROUP_SOCK; 92 prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
93 } else if (is_sockops) {
94 prog_type = BPF_PROG_TYPE_SOCK_OPS;
92 } else { 95 } else {
93 printf("Unknown event '%s'\n", event); 96 printf("Unknown event '%s'\n", event);
94 return -1; 97 return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
106 if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) 109 if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
107 return 0; 110 return 0;
108 111
109 if (is_socket) { 112 if (is_socket || is_sockops) {
110 event += 6; 113 if (is_socket)
114 event += 6;
115 else
116 event += 7;
111 if (*event != '/') 117 if (*event != '/')
112 return 0; 118 return 0;
113 event++; 119 event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
560 memcmp(shname, "xdp", 3) == 0 || 566 memcmp(shname, "xdp", 3) == 0 ||
561 memcmp(shname, "perf_event", 10) == 0 || 567 memcmp(shname, "perf_event", 10) == 0 ||
562 memcmp(shname, "socket", 6) == 0 || 568 memcmp(shname, "socket", 6) == 0 ||
563 memcmp(shname, "cgroup/", 7) == 0) 569 memcmp(shname, "cgroup/", 7) == 0 ||
570 memcmp(shname, "sockops", 7) == 0)
564 load_and_attach(shname, data->d_buf, data->d_size); 571 load_and_attach(shname, data->d_buf, data->d_size);
565 } 572 }
566 573