bpf: BPF support for sock_ops

Created a new BPF program type, BPF_PROG_TYPE_SOCK_OPS, and a corresponding struct that allows BPF programs of this type to access some of the socket's fields (such as IP addresses, ports, etc.). It uses the existing bpf cgroups infrastructure so the programs can be attached per cgroup with full inheritance support. The program will be called at appropriate times to set relevant connections parameters such as buffer sizes, SYN and SYN-ACK RTOs, etc., based on connection information such as IP addresses, port numbers, etc. Alghough there are already 3 mechanisms to set parameters (sysctls, route metrics and setsockopts), this new mechanism provides some distinct advantages. Unlike sysctls, it can set parameters per connection. In contrast to route metrics, it can also use port numbers and information provided by a user level program. In addition, it could set parameters probabilistically for evaluation purposes (i.e. do something different on 10% of the flows and compare results with the other 90% of the flows). Also, in cases where IPv6 addresses contain geographic information, the rules to make changes based on the distance (or RTT) between the hosts are much easier than route metric rules and can be global. Finally, unlike setsockopt, it oes not require application changes and it can be updated easily at any time. Although the bpf cgroup framework already contains a sock related program type (BPF_PROG_TYPE_CGROUP_SOCK), I created the new type (BPF_PROG_TYPE_SOCK_OPS) beccause the existing type expects to be called only once during the connections's lifetime. In contrast, the new program type will be called multiple times from different places in the network stack code. For example, before sending SYN and SYN-ACKs to set an appropriate timeout, when the connection is established to set congestion control, etc. As a result it has "op" field to specify the type of operation requested. The purpose of this new program type is to simplify setting connection parameters, such as buffer sizes, TCP's SYN RTO, etc. For example, it is easy to use facebook's internal IPv6 addresses to determine if both hosts of a connection are in the same datacenter. Therefore, it is easy to write a BPF program to choose a small SYN RTO value when both hosts are in the same datacenter. This patch only contains the framework to support the new BPF program type, following patches add the functionality to set various connection parameters. This patch defines a new BPF program type: BPF_PROG_TYPE_SOCKET_OPS and a new bpf syscall command to load a new program of this type: BPF_PROG_LOAD_SOCKET_OPS. Two new corresponding structs (one for the kernel one for the user/BPF program): /* kernel version */ struct bpf_sock_ops_kern { struct sock *sk; __u32 op; union { __u32 reply; __u32 replylong[4]; }; }; /* user version * Some fields are in network byte order reflecting the sock struct * Use the bpf_ntohl helper macro in samples/bpf/bpf_endian.h to * convert them to host byte order. */ struct bpf_sock_ops { __u32 op; union { __u32 reply; __u32 replylong[4]; }; __u32 family; __u32 remote_ip4; /* In network byte order */ __u32 local_ip4; /* In network byte order */ __u32 remote_ip6[4]; /* In network byte order */ __u32 local_ip6[4]; /* In network byte order */ __u32 remote_port; /* In network byte order */ __u32 local_port; /* In host byte horder */ }; Currently there are two types of ops. The first type expects the BPF program to return a value which is then used by the caller (or a negative value to indicate the operation is not supported). The second type expects state changes to be done by the BPF program, for example through a setsockopt BPF helper function, and they ignore the return value. The reply fields of the bpf_sockt_ops struct are there in case a bpf program needs to return a value larger than an integer. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Lawrence Brakmo <brakmo@fb.com> 2017-06-30 23:02:40 -0400
committer: David S. Miller <davem@davemloft.net> 2017-07-01 19:15:13 -0400
commit: 40304b2a1567fecc321f640ee4239556dd0f3ee0 (patch)
tree: 093568073bae656d93f5b878ffcbb6cefbb3853e
parent: 57a53a0b6788e1e3e660987e3771837efa90d980 (diff)
9 files changed, 314 insertions, 3 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c970a25d2a49..360c082e885c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -7,6 +7,7 @@
 struct sock;
 struct cgroup;
 struct sk_buff;
+struct bpf_sock_ops_kern;
 #ifdef CONFIG_CGROUP_BPF
@@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 int __cgroup_bpf_run_filter_sk(struct sock *sk,
                               enum bpf_attach_type type);
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+                                     struct bpf_sock_ops_kern *sock_ops,
+                                     enum bpf_attach_type type);
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                             \
 ({                                                                            \
@@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
        __ret;                                                                 \
 })
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops)                                 \
+({                                                                             \
+        int __ret = 0;                                                         \
+        if (cgroup_bpf_enabled && (sock_ops)->sk) {            \
+                typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk);               \
+                if (sk_fullsock(__sk))                                         \
+                        __ret = __cgroup_bpf_run_filter_sock_ops(__sk,         \
+                                                                 sock_ops,     \
+                                                         BPF_CGROUP_SOCK_OPS); \
+        }                                                                      \
+        __ret;                                                                 \
+})
 #else
 struct cgroup_bpf {};
@@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #endif /* CONFIG_CGROUP_BPF */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 03bf223f18be..3d137c33d664 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1fa26dc562ce..738f8b14f025 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -898,4 +898,13 @@ static inline int bpf_tell_extensions(void)
        return SKF_AD_MAX;
 }
+struct bpf_sock_ops_kern {
+        struct  sock *sk;
+        u32     op;
+        union {
+                u32 reply;
+                u32 replylong[4];
+        };
+};
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d0751b79d99c..e58500825006 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
 #include <linux/seq_file.h>
 #include <linux/memcontrol.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/bpf-cgroup.h>
 extern struct inet_hashinfo tcp_hashinfo;
 extern struct percpu_counter tcp_orphan_count;
@@ -2021,4 +2025,36 @@ int tcp_set_ulp(struct sock *sk, const char *name);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
+/* Call BPF_SOCK_OPS program that returns an int. If the return value
+ * is < 0, then the BPF op failed (for example if the loaded BPF
+ * program does not support the chosen operation or there is no BPF
+ * program loaded).
+ */
+#ifdef CONFIG_BPF
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+        struct bpf_sock_ops_kern sock_ops;
+        int ret;
+        if (sk_fullsock(sk))
+                sock_owned_by_me(sk);
+        memset(&sock_ops, 0, sizeof(sock_ops));
+        sock_ops.sk = sk;
+        sock_ops.op = op;
+        ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
+        if (ret == 0)
+                ret = sock_ops.reply;
+        else
+                ret = -1;
+        return ret;
+}
+#else
+static inline int tcp_call_bpf(struct sock *sk, int op)
+{
+        return -EPERM;
+}
+#endif
 #endif  /* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f94b48b168dc..01cd485ccd4f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_LWT_IN,
        BPF_PROG_TYPE_LWT_OUT,
        BPF_PROG_TYPE_LWT_XMIT,
+        BPF_PROG_TYPE_SOCK_OPS,
 };
 enum bpf_attach_type {
        BPF_CGROUP_INET_INGRESS,
        BPF_CGROUP_INET_EGRESS,
        BPF_CGROUP_INET_SOCK_CREATE,
+        BPF_CGROUP_SOCK_OPS,
        __MAX_BPF_ATTACH_TYPE
 };
@@ -720,4 +722,32 @@ struct bpf_map_info {
        __u32 map_flags;
 } __attribute__((aligned(8)));
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+        __u32 op;
+        union {
+                __u32 reply;
+                __u32 replylong[4];
+        };
+        __u32 family;
+        __u32 remote_ip4;       /* Stored in network byte order */
+        __u32 local_ip4;        /* Stored in network byte order */
+        __u32 remote_ip6[4];    /* Stored in network byte order */
+        __u32 local_ip6[4];     /* Stored in network byte order */
+        __u32 remote_port;      /* Stored in network byte order */
+        __u32 local_port;       /* stored in host byte order */
+};
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+        BPF_SOCK_OPS_VOID,
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ea6033cba947..546113430049 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
        return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
+/**
+ * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
+ * @sk: socket to get cgroup from
+ * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
+ * sk with connection information (IP addresses, etc.) May not contain
+ * cgroup info if it is a req sock.
+ * @type: The type of program to be exectuted
+ *
+ * socket passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock_ops
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
+                                     struct bpf_sock_ops_kern *sock_ops,
+                                     enum bpf_attach_type type)
+{
+        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+        struct bpf_prog *prog;
+        int ret = 0;
+        rcu_read_lock();
+        prog = rcu_dereference(cgrp->bpf.effective[type]);
+        if (prog)
+                ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
+        rcu_read_unlock();
+        return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4409ccca8831..d4d47de75bba 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1079,6 +1079,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
        case BPF_CGROUP_INET_SOCK_CREATE:
                ptype = BPF_PROG_TYPE_CGROUP_SOCK;
                break;
+        case BPF_CGROUP_SOCK_OPS:
+                ptype = BPF_PROG_TYPE_SOCK_OPS;
+                break;
        default:
                return -EINVAL;
        }
@@ -1119,6 +1122,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
        case BPF_CGROUP_INET_INGRESS:
        case BPF_CGROUP_INET_EGRESS:
        case BPF_CGROUP_INET_SOCK_CREATE:
+        case BPF_CGROUP_SOCK_OPS:
                cgrp = cgroup_get_from_fd(attr->target_fd);
                if (IS_ERR(cgrp))
                        return PTR_ERR(cgrp);
@@ -1133,6 +1137,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
        return ret;
 }
 #endif /* CONFIG_CGROUP_BPF */
 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
diff --git a/net/core/filter.c b/net/core/filter.c
index b39c869d22e3..1f6a26c4f8b9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3110,6 +3110,36 @@ void bpf_warn_invalid_xdp_action(u32 act)
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
+static bool __is_valid_sock_ops_access(int off, int size)
+{
+        if (off < 0 || off >= sizeof(struct bpf_sock_ops))
+                return false;
+        /* The verifier guarantees that size > 0. */
+        if (off % size != 0)
+                return false;
+        if (size != sizeof(__u32))
+                return false;
+        return true;
+}
+static bool sock_ops_is_valid_access(int off, int size,
+                                     enum bpf_access_type type,
+                                     struct bpf_insn_access_aux *info)
+{
+        if (type == BPF_WRITE) {
+                switch (off) {
+                case offsetof(struct bpf_sock_ops, op) ...
+                     offsetof(struct bpf_sock_ops, replylong[3]):
+                        break;
+                default:
+                        return false;
+                }
+        }
+        return __is_valid_sock_ops_access(off, size);
+}
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
                                  const struct bpf_insn *si,
                                  struct bpf_insn *insn_buf,
@@ -3379,6 +3409,138 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
        return insn - insn_buf;
 }
+static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
+                                       const struct bpf_insn *si,
+                                       struct bpf_insn *insn_buf,
+                                       struct bpf_prog *prog)
+{
+        struct bpf_insn *insn = insn_buf;
+        int off;
+        switch (si->off) {
+        case offsetof(struct bpf_sock_ops, op) ...
+             offsetof(struct bpf_sock_ops, replylong[3]):
+                BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
+                             FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
+                BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
+                             FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
+                BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
+                             FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+                off = si->off;
+                off -= offsetof(struct bpf_sock_ops, op);
+                off += offsetof(struct bpf_sock_ops_kern, op);
+                if (type == BPF_WRITE)
+                        *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
+                                              off);
+                else
+                        *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+                                              off);
+                break;
+        case offsetof(struct bpf_sock_ops, family):
+                BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                              struct bpf_sock_ops_kern, sk),
+                                      si->dst_reg, si->src_reg,
+                                      offsetof(struct bpf_sock_ops_kern, sk));
+                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+                                      offsetof(struct sock_common, skc_family));
+                break;
+        case offsetof(struct bpf_sock_ops, remote_ip4):
+                BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                                struct bpf_sock_ops_kern, sk),
+                                      si->dst_reg, si->src_reg,
+                                      offsetof(struct bpf_sock_ops_kern, sk));
+                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                      offsetof(struct sock_common, skc_daddr));
+                break;
+        case offsetof(struct bpf_sock_ops, local_ip4):
+                BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
+                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                              struct bpf_sock_ops_kern, sk),
+                                      si->dst_reg, si->src_reg,
+                                      offsetof(struct bpf_sock_ops_kern, sk));
+                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                      offsetof(struct sock_common,
+                                               skc_rcv_saddr));
+                break;
+        case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
+             offsetof(struct bpf_sock_ops, remote_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+                BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+                                          skc_v6_daddr.s6_addr32[0]) != 4);
+                off = si->off;
+                off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
+                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                                struct bpf_sock_ops_kern, sk),
+                                      si->dst_reg, si->src_reg,
+                                      offsetof(struct bpf_sock_ops_kern, sk));
+                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                      offsetof(struct sock_common,
+                                               skc_v6_daddr.s6_addr32[0]) +
+                                      off);
+#else
+                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+                break;
+        case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
+             offsetof(struct bpf_sock_ops, local_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+                BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+                                          skc_v6_rcv_saddr.s6_addr32[0]) != 4);
+                off = si->off;
+                off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
+                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                                struct bpf_sock_ops_kern, sk),
+                                      si->dst_reg, si->src_reg,
+                                      offsetof(struct bpf_sock_ops_kern, sk));
+                *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+                                      offsetof(struct sock_common,
+                                               skc_v6_rcv_saddr.s6_addr32[0]) +
+                                      off);
+#else
+                *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+#endif
+                break;
+        case offsetof(struct bpf_sock_ops, remote_port):
+                BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                                struct bpf_sock_ops_kern, sk),
+                                      si->dst_reg, si->src_reg,
+                                      offsetof(struct bpf_sock_ops_kern, sk));
+                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+                                      offsetof(struct sock_common, skc_dport));
+#ifndef __BIG_ENDIAN_BITFIELD
+                *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
+#endif
+                break;
+        case offsetof(struct bpf_sock_ops, local_port):
+                BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+                *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+                                                struct bpf_sock_ops_kern, sk),
+                                      si->dst_reg, si->src_reg,
+                                      offsetof(struct bpf_sock_ops_kern, sk));
+                *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+                                      offsetof(struct sock_common, skc_num));
+                break;
+        }
+        return insn - insn_buf;
+}
 const struct bpf_verifier_ops sk_filter_prog_ops = {
        .get_func_proto         = sk_filter_func_proto,
        .is_valid_access        = sk_filter_is_valid_access,
@@ -3428,6 +3590,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = {
        .convert_ctx_access     = sock_filter_convert_ctx_access,
 };
+const struct bpf_verifier_ops sock_ops_prog_ops = {
+        .get_func_proto         = bpf_base_func_proto,
+        .is_valid_access        = sock_ops_is_valid_access,
+        .convert_ctx_access     = sock_ops_convert_ctx_access,
+};
 int sk_detach_filter(struct sock *sk)
 {
        int ret = -ENOENT;
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index a91c57dd8571..a4be7cfa6519 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
        bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
        bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
        bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
+        bool is_sockops = strncmp(event, "sockops", 7) == 0;
        size_t insns_cnt = size / sizeof(struct bpf_insn);
        enum bpf_prog_type prog_type;
        char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                prog_type = BPF_PROG_TYPE_CGROUP_SKB;
        } else if (is_cgroup_sk) {
                prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+        } else if (is_sockops) {
+                prog_type = BPF_PROG_TYPE_SOCK_OPS;
        } else {
                printf("Unknown event '%s'\n", event);
                return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
        if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
                return 0;
-        if (is_socket) {
+        if (is_socket || is_sockops) {
-                event += 6;
+                if (is_socket)
+                        event += 6;
+                else
+                        event += 7;
                if (*event != '/')
                        return 0;
                event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
                    memcmp(shname, "xdp", 3) == 0 ||
                    memcmp(shname, "perf_event", 10) == 0 ||
                    memcmp(shname, "socket", 6) == 0 ||
-                    memcmp(shname, "cgroup/", 7) == 0)
+                    memcmp(shname, "cgroup/", 7) == 0 ||
+                    memcmp(shname, "sockops", 7) == 0)
                        load_and_attach(shname, data->d_buf, data->d_size);
        }
author	Lawrence Brakmo <brakmo@fb.com>	2017-06-30 23:02:40 -0400
committer	David S. Miller <davem@davemloft.net>	2017-07-01 19:15:13 -0400
commit	40304b2a1567fecc321f640ee4239556dd0f3ee0 (patch)
tree	093568073bae656d93f5b878ffcbb6cefbb3853e
parent	57a53a0b6788e1e3e660987e3771837efa90d980 (diff)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c970a25d2a49..360c082e885c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h
@@ -7,6 +7,7 @@
7	struct sock;	7	struct sock;
8	struct cgroup;	8	struct cgroup;
9	struct sk_buff;	9	struct sk_buff;
		10	struct bpf_sock_ops_kern;
10		11
11	#ifdef CONFIG_CGROUP_BPF	12	#ifdef CONFIG_CGROUP_BPF
12		13
@@ -42,6 +43,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
42	int __cgroup_bpf_run_filter_sk(struct sock *sk,	43	int __cgroup_bpf_run_filter_sk(struct sock *sk,
43	enum bpf_attach_type type);	44	enum bpf_attach_type type);
44		45
		46	int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
		47	struct bpf_sock_ops_kern *sock_ops,
		48	enum bpf_attach_type type);
		49
45	/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */	50	/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
46	#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \	51	#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
47	({ \	52	({ \
@@ -75,6 +80,18 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
75	__ret; \	80	__ret; \
76	})	81	})
77		82
		83	#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
		84	({ \
		85	int __ret = 0; \
		86	if (cgroup_bpf_enabled && (sock_ops)->sk) { \
		87	typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
		88	if (sk_fullsock(__sk)) \
		89	__ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
		90	sock_ops, \
		91	BPF_CGROUP_SOCK_OPS); \
		92	} \
		93	__ret; \
		94	})
78	#else	95	#else
79		96
80	struct cgroup_bpf {};	97	struct cgroup_bpf {};
@@ -85,6 +102,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
85	#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })	102	#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
86	#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })	103	#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
87	#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })	104	#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
		105	#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
88		106
89	#endif /* CONFIG_CGROUP_BPF */	107	#endif /* CONFIG_CGROUP_BPF */
90		108


diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 03bf223f18be..3d137c33d664 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h
@@ -10,6 +10,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
10	BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)	10	BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
11	BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)	11	BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
12	BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)	12	BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
		13	BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
13	#endif	14	#endif
14	#ifdef CONFIG_BPF_EVENTS	15	#ifdef CONFIG_BPF_EVENTS
15	BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)	16	BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)


diff --git a/include/linux/filter.h b/include/linux/filter.h index 1fa26dc562ce..738f8b14f025 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h
@@ -898,4 +898,13 @@ static inline int bpf_tell_extensions(void)
898	return SKF_AD_MAX;	898	return SKF_AD_MAX;
899	}	899	}
900		900
		901	struct bpf_sock_ops_kern {
		902	struct sock *sk;
		903	u32 op;
		904	union {
		905	u32 reply;
		906	u32 replylong[4];
		907	};
		908	};
		909
901	#endif /* __LINUX_FILTER_H__ */	910	#endif /* __LINUX_FILTER_H__ */


diff --git a/include/net/tcp.h b/include/net/tcp.h index d0751b79d99c..e58500825006 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h
@@ -46,6 +46,10 @@
46	#include <linux/seq_file.h>	46	#include <linux/seq_file.h>
47	#include <linux/memcontrol.h>	47	#include <linux/memcontrol.h>
48		48
		49	#include <linux/bpf.h>
		50	#include <linux/filter.h>
		51	#include <linux/bpf-cgroup.h>
		52
49	extern struct inet_hashinfo tcp_hashinfo;	53	extern struct inet_hashinfo tcp_hashinfo;
50		54
51	extern struct percpu_counter tcp_orphan_count;	55	extern struct percpu_counter tcp_orphan_count;
@@ -2021,4 +2025,36 @@ int tcp_set_ulp(struct sock sk, const char name);
2021	void tcp_get_available_ulp(char *buf, size_t len);	2025	void tcp_get_available_ulp(char *buf, size_t len);
2022	void tcp_cleanup_ulp(struct sock *sk);	2026	void tcp_cleanup_ulp(struct sock *sk);
2023		2027
		2028	/* Call BPF_SOCK_OPS program that returns an int. If the return value
		2029	* is < 0, then the BPF op failed (for example if the loaded BPF
		2030	* program does not support the chosen operation or there is no BPF
		2031	* program loaded).
		2032	*/
		2033	#ifdef CONFIG_BPF
		2034	static inline int tcp_call_bpf(struct sock *sk, int op)
		2035	{
		2036	struct bpf_sock_ops_kern sock_ops;
		2037	int ret;
		2038
		2039	if (sk_fullsock(sk))
		2040	sock_owned_by_me(sk);
		2041
		2042	memset(&sock_ops, 0, sizeof(sock_ops));
		2043	sock_ops.sk = sk;
		2044	sock_ops.op = op;
		2045
		2046	ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
		2047	if (ret == 0)
		2048	ret = sock_ops.reply;
		2049	else
		2050	ret = -1;
		2051	return ret;
		2052	}
		2053	#else
		2054	static inline int tcp_call_bpf(struct sock *sk, int op)
		2055	{
		2056	return -EPERM;
		2057	}
		2058	#endif
		2059
2024	#endif /* _TCP_H */	2060	#endif /* _TCP_H */


diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f94b48b168dc..01cd485ccd4f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h
@@ -120,12 +120,14 @@ enum bpf_prog_type {
120	BPF_PROG_TYPE_LWT_IN,	120	BPF_PROG_TYPE_LWT_IN,
121	BPF_PROG_TYPE_LWT_OUT,	121	BPF_PROG_TYPE_LWT_OUT,
122	BPF_PROG_TYPE_LWT_XMIT,	122	BPF_PROG_TYPE_LWT_XMIT,
		123	BPF_PROG_TYPE_SOCK_OPS,
123	};	124	};
124		125
125	enum bpf_attach_type {	126	enum bpf_attach_type {
126	BPF_CGROUP_INET_INGRESS,	127	BPF_CGROUP_INET_INGRESS,
127	BPF_CGROUP_INET_EGRESS,	128	BPF_CGROUP_INET_EGRESS,
128	BPF_CGROUP_INET_SOCK_CREATE,	129	BPF_CGROUP_INET_SOCK_CREATE,
		130	BPF_CGROUP_SOCK_OPS,
129	__MAX_BPF_ATTACH_TYPE	131	__MAX_BPF_ATTACH_TYPE
130	};	132	};
131		133
@@ -720,4 +722,32 @@ struct bpf_map_info {
720	__u32 map_flags;	722	__u32 map_flags;
721	} __attribute__((aligned(8)));	723	} __attribute__((aligned(8)));
722		724
		725	/* User bpf_sock_ops struct to access socket values and specify request ops
		726	* and their replies.
		727	* Some of this fields are in network (bigendian) byte order and may need
		728	* to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
		729	* New fields can only be added at the end of this structure
		730	*/
		731	struct bpf_sock_ops {
		732	__u32 op;
		733	union {
		734	__u32 reply;
		735	__u32 replylong[4];
		736	};
		737	__u32 family;
		738	__u32 remote_ip4; /* Stored in network byte order */
		739	__u32 local_ip4; /* Stored in network byte order */
		740	__u32 remote_ip6[4]; /* Stored in network byte order */
		741	__u32 local_ip6[4]; /* Stored in network byte order */
		742	__u32 remote_port; /* Stored in network byte order */
		743	__u32 local_port; /* stored in host byte order */
		744	};
		745
		746	/* List of known BPF sock_ops operators.
		747	* New entries can only be added at the end
		748	*/
		749	enum {
		750	BPF_SOCK_OPS_VOID,
		751	};
		752
723	#endif /* _UAPI__LINUX_BPF_H__ */	753	#endif /* _UAPI__LINUX_BPF_H__ */


diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ea6033cba947..546113430049 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c
@@ -236,3 +236,40 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
236	return ret;	236	return ret;
237	}	237	}
238	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);	238	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
		239
		240	/**
		241	* __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
		242	* @sk: socket to get cgroup from
		243	* @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
		244	* sk with connection information (IP addresses, etc.) May not contain
		245	* cgroup info if it is a req sock.
		246	* @type: The type of program to be exectuted
		247	*
		248	* socket passed is expected to be of type INET or INET6.
		249	*
		250	* The program type passed in via @type must be suitable for sock_ops
		251	* filtering. No further check is performed to assert that.
		252	*
		253	* This function will return %-EPERM if any if an attached program was found
		254	* and if it returned != 1 during execution. In all other cases, 0 is returned.
		255	*/
		256	int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
		257	struct bpf_sock_ops_kern *sock_ops,
		258	enum bpf_attach_type type)
		259	{
		260	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
		261	struct bpf_prog *prog;
		262	int ret = 0;
		263
		264
		265	rcu_read_lock();
		266
		267	prog = rcu_dereference(cgrp->bpf.effective[type]);
		268	if (prog)
		269	ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
		270
		271	rcu_read_unlock();
		272
		273	return ret;
		274	}
		275	EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);


diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4409ccca8831..d4d47de75bba 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c
@@ -1079,6 +1079,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1079	case BPF_CGROUP_INET_SOCK_CREATE:	1079	case BPF_CGROUP_INET_SOCK_CREATE:
1080	ptype = BPF_PROG_TYPE_CGROUP_SOCK;	1080	ptype = BPF_PROG_TYPE_CGROUP_SOCK;
1081	break;	1081	break;
		1082	case BPF_CGROUP_SOCK_OPS:
		1083	ptype = BPF_PROG_TYPE_SOCK_OPS;
		1084	break;
1082	default:	1085	default:
1083	return -EINVAL;	1086	return -EINVAL;
1084	}	1087	}
@@ -1119,6 +1122,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1119	case BPF_CGROUP_INET_INGRESS:	1122	case BPF_CGROUP_INET_INGRESS:
1120	case BPF_CGROUP_INET_EGRESS:	1123	case BPF_CGROUP_INET_EGRESS:
1121	case BPF_CGROUP_INET_SOCK_CREATE:	1124	case BPF_CGROUP_INET_SOCK_CREATE:
		1125	case BPF_CGROUP_SOCK_OPS:
1122	cgrp = cgroup_get_from_fd(attr->target_fd);	1126	cgrp = cgroup_get_from_fd(attr->target_fd);
1123	if (IS_ERR(cgrp))	1127	if (IS_ERR(cgrp))
1124	return PTR_ERR(cgrp);	1128	return PTR_ERR(cgrp);
@@ -1133,6 +1137,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1133		1137
1134	return ret;	1138	return ret;
1135	}	1139	}
		1140
1136	#endif /* CONFIG_CGROUP_BPF */	1141	#endif /* CONFIG_CGROUP_BPF */
1137		1142
1138	#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration	1143	#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration


diff --git a/net/core/filter.c b/net/core/filter.c index b39c869d22e3..1f6a26c4f8b9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c
@@ -3110,6 +3110,36 @@ void bpf_warn_invalid_xdp_action(u32 act)
3110	}	3110	}
3111	EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);	3111	EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
3112		3112
		3113	static bool __is_valid_sock_ops_access(int off, int size)
		3114	{
		3115	if (off < 0 \|\| off >= sizeof(struct bpf_sock_ops))
		3116	return false;
		3117	/* The verifier guarantees that size > 0. */
		3118	if (off % size != 0)
		3119	return false;
		3120	if (size != sizeof(__u32))
		3121	return false;
		3122
		3123	return true;
		3124	}
		3125
		3126	static bool sock_ops_is_valid_access(int off, int size,
		3127	enum bpf_access_type type,
		3128	struct bpf_insn_access_aux *info)
		3129	{
		3130	if (type == BPF_WRITE) {
		3131	switch (off) {
		3132	case offsetof(struct bpf_sock_ops, op) ...
		3133	offsetof(struct bpf_sock_ops, replylong[3]):
		3134	break;
		3135	default:
		3136	return false;
		3137	}
		3138	}
		3139
		3140	return __is_valid_sock_ops_access(off, size);
		3141	}
		3142
3113	static u32 bpf_convert_ctx_access(enum bpf_access_type type,	3143	static u32 bpf_convert_ctx_access(enum bpf_access_type type,
3114	const struct bpf_insn *si,	3144	const struct bpf_insn *si,
3115	struct bpf_insn *insn_buf,	3145	struct bpf_insn *insn_buf,
@@ -3379,6 +3409,138 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
3379	return insn - insn_buf;	3409	return insn - insn_buf;
3380	}	3410	}
3381		3411
		3412	static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
		3413	const struct bpf_insn *si,
		3414	struct bpf_insn *insn_buf,
		3415	struct bpf_prog *prog)
		3416	{
		3417	struct bpf_insn *insn = insn_buf;
		3418	int off;
		3419
		3420	switch (si->off) {
		3421	case offsetof(struct bpf_sock_ops, op) ...
		3422	offsetof(struct bpf_sock_ops, replylong[3]):
		3423	BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
		3424	FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
		3425	BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
		3426	FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
		3427	BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
		3428	FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
		3429	off = si->off;
		3430	off -= offsetof(struct bpf_sock_ops, op);
		3431	off += offsetof(struct bpf_sock_ops_kern, op);
		3432	if (type == BPF_WRITE)
		3433	*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
		3434	off);
		3435	else
		3436	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
		3437	off);
		3438	break;
		3439
		3440	case offsetof(struct bpf_sock_ops, family):
		3441	BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
		3442
		3443	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
		3444	struct bpf_sock_ops_kern, sk),
		3445	si->dst_reg, si->src_reg,
		3446	offsetof(struct bpf_sock_ops_kern, sk));
		3447	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
		3448	offsetof(struct sock_common, skc_family));
		3449	break;
		3450
		3451	case offsetof(struct bpf_sock_ops, remote_ip4):
		3452	BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
		3453
		3454	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
		3455	struct bpf_sock_ops_kern, sk),
		3456	si->dst_reg, si->src_reg,
		3457	offsetof(struct bpf_sock_ops_kern, sk));
		3458	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
		3459	offsetof(struct sock_common, skc_daddr));
		3460	break;
		3461
		3462	case offsetof(struct bpf_sock_ops, local_ip4):
		3463	BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
		3464
		3465	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
		3466	struct bpf_sock_ops_kern, sk),
		3467	si->dst_reg, si->src_reg,
		3468	offsetof(struct bpf_sock_ops_kern, sk));
		3469	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
		3470	offsetof(struct sock_common,
		3471	skc_rcv_saddr));
		3472	break;
		3473
		3474	case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
		3475	offsetof(struct bpf_sock_ops, remote_ip6[3]):
		3476	#if IS_ENABLED(CONFIG_IPV6)
		3477	BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
		3478	skc_v6_daddr.s6_addr32[0]) != 4);
		3479
		3480	off = si->off;
		3481	off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
		3482	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
		3483	struct bpf_sock_ops_kern, sk),
		3484	si->dst_reg, si->src_reg,
		3485	offsetof(struct bpf_sock_ops_kern, sk));
		3486	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
		3487	offsetof(struct sock_common,
		3488	skc_v6_daddr.s6_addr32[0]) +
		3489	off);
		3490	#else
		3491	*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
		3492	#endif
		3493	break;
		3494
		3495	case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
		3496	offsetof(struct bpf_sock_ops, local_ip6[3]):
		3497	#if IS_ENABLED(CONFIG_IPV6)
		3498	BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
		3499	skc_v6_rcv_saddr.s6_addr32[0]) != 4);
		3500
		3501	off = si->off;
		3502	off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
		3503	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
		3504	struct bpf_sock_ops_kern, sk),
		3505	si->dst_reg, si->src_reg,
		3506	offsetof(struct bpf_sock_ops_kern, sk));
		3507	*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
		3508	offsetof(struct sock_common,
		3509	skc_v6_rcv_saddr.s6_addr32[0]) +
		3510	off);
		3511	#else
		3512	*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
		3513	#endif
		3514	break;
		3515
		3516	case offsetof(struct bpf_sock_ops, remote_port):
		3517	BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
		3518
		3519	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
		3520	struct bpf_sock_ops_kern, sk),
		3521	si->dst_reg, si->src_reg,
		3522	offsetof(struct bpf_sock_ops_kern, sk));
		3523	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
		3524	offsetof(struct sock_common, skc_dport));
		3525	#ifndef __BIG_ENDIAN_BITFIELD
		3526	*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
		3527	#endif
		3528	break;
		3529
		3530	case offsetof(struct bpf_sock_ops, local_port):
		3531	BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
		3532
		3533	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
		3534	struct bpf_sock_ops_kern, sk),
		3535	si->dst_reg, si->src_reg,
		3536	offsetof(struct bpf_sock_ops_kern, sk));
		3537	*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
		3538	offsetof(struct sock_common, skc_num));
		3539	break;
		3540	}
		3541	return insn - insn_buf;
		3542	}
		3543
3382	const struct bpf_verifier_ops sk_filter_prog_ops = {	3544	const struct bpf_verifier_ops sk_filter_prog_ops = {
3383	.get_func_proto = sk_filter_func_proto,	3545	.get_func_proto = sk_filter_func_proto,
3384	.is_valid_access = sk_filter_is_valid_access,	3546	.is_valid_access = sk_filter_is_valid_access,
@@ -3428,6 +3590,12 @@ const struct bpf_verifier_ops cg_sock_prog_ops = {
3428	.convert_ctx_access = sock_filter_convert_ctx_access,	3590	.convert_ctx_access = sock_filter_convert_ctx_access,
3429	};	3591	};
3430		3592
		3593	const struct bpf_verifier_ops sock_ops_prog_ops = {
		3594	.get_func_proto = bpf_base_func_proto,
		3595	.is_valid_access = sock_ops_is_valid_access,
		3596	.convert_ctx_access = sock_ops_convert_ctx_access,
		3597	};
		3598
3431	int sk_detach_filter(struct sock *sk)	3599	int sk_detach_filter(struct sock *sk)
3432	{	3600	{
3433	int ret = -ENOENT;	3601	int ret = -ENOENT;


diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index a91c57dd8571..a4be7cfa6519 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c
@@ -64,6 +64,7 @@ static int load_and_attach(const char event, struct bpf_insn prog, int size)
64	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;	64	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
65	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;	65	bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
66	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;	66	bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
		67	bool is_sockops = strncmp(event, "sockops", 7) == 0;
67	size_t insns_cnt = size / sizeof(struct bpf_insn);	68	size_t insns_cnt = size / sizeof(struct bpf_insn);
68	enum bpf_prog_type prog_type;	69	enum bpf_prog_type prog_type;
69	char buf[256];	70	char buf[256];
@@ -89,6 +90,8 @@ static int load_and_attach(const char event, struct bpf_insn prog, int size)
89	prog_type = BPF_PROG_TYPE_CGROUP_SKB;	90	prog_type = BPF_PROG_TYPE_CGROUP_SKB;
90	} else if (is_cgroup_sk) {	91	} else if (is_cgroup_sk) {
91	prog_type = BPF_PROG_TYPE_CGROUP_SOCK;	92	prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
		93	} else if (is_sockops) {
		94	prog_type = BPF_PROG_TYPE_SOCK_OPS;
92	} else {	95	} else {
93	printf("Unknown event '%s'\n", event);	96	printf("Unknown event '%s'\n", event);
94	return -1;	97	return -1;
@@ -106,8 +109,11 @@ static int load_and_attach(const char event, struct bpf_insn prog, int size)
106	if (is_xdp \|\| is_perf_event \|\| is_cgroup_skb \|\| is_cgroup_sk)	109	if (is_xdp \|\| is_perf_event \|\| is_cgroup_skb \|\| is_cgroup_sk)
107	return 0;	110	return 0;
108		111
109	if (is_socket) {	112	if (is_socket \|\| is_sockops) {
110	event += 6;	113	if (is_socket)
		114	event += 6;
		115	else
		116	event += 7;
111	if (*event != '/')	117	if (*event != '/')
112	return 0;	118	return 0;
113	event++;	119	event++;
@@ -560,7 +566,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
560	memcmp(shname, "xdp", 3) == 0 \|\|	566	memcmp(shname, "xdp", 3) == 0 \|\|
561	memcmp(shname, "perf_event", 10) == 0 \|\|	567	memcmp(shname, "perf_event", 10) == 0 \|\|
562	memcmp(shname, "socket", 6) == 0 \|\|	568	memcmp(shname, "socket", 6) == 0 \|\|
563	memcmp(shname, "cgroup/", 7) == 0)	569	memcmp(shname, "cgroup/", 7) == 0 \|\|
		570	memcmp(shname, "sockops", 7) == 0)
564	load_and_attach(shname, data->d_buf, data->d_size);	571	load_and_attach(shname, data->d_buf, data->d_size);
565	}	572	}
566		573