aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-12-06 00:47:48 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-06 00:47:48 -0500
commit8d0c4697534a739725e429ff062dea393d8860d1 (patch)
treeee28163a6c53e0131fd2d3d626d02b0610eaed2b
parentf51a5e82ea9aaf05106c00d976e772ca384a9199 (diff)
parentfbe3310840c65f3cf97dd90d23e177d061c376f2 (diff)
Merge branch 'ebpf-next'
Alexei Starovoitov says: ==================== allow eBPF programs to be attached to sockets V1->V2: fixed comments in sample code to state clearly that packet data is accessed with LD_ABS instructions and not internal skb fields. Also replaced constants in: BPF_LD_ABS(BPF_B, 14 + 9 /* R0 = ip->proto */), with: BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */), V1 cover: Introduce BPF_PROG_TYPE_SOCKET_FILTER type of eBPF programs that can be attached to sockets with setsockopt(). Allow such programs to access maps via lookup/update/delete helpers. This feature was previewed by bpf manpage in commit b4fc1a460f30("Merge branch 'bpf-next'") Now it can actually run. 1st patch adds LD_ABS/LD_IND instruction verification and 2nd patch adds new setsockopt() flag. Patches 3-6 are examples in assembler and in C. Though native eBPF programs are way more powerful than classic filters (attachable through similar setsockopt() call), they don't have skb field accessors yet. Like skb->pkt_type, skb->dev->ifindex are not accessible. There are sevaral ways to achieve that. That will be in the next set of patches. So in this set native eBPF programs can only read data from packet and access maps. The most powerful example is sockex2_kern.c from patch 6 where ~200 lines of C are compiled into ~300 of eBPF instructions. It shows how quite complex packet parsing can be done. LLVM used to build examples is at https://github.com/iovisor/llvm which is fork of llvm trunk that I'm cleaning up for upstreaming. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/alpha/include/uapi/asm/socket.h3
-rw-r--r--arch/avr32/include/uapi/asm/socket.h3
-rw-r--r--arch/cris/include/uapi/asm/socket.h3
-rw-r--r--arch/frv/include/uapi/asm/socket.h3
-rw-r--r--arch/ia64/include/uapi/asm/socket.h3
-rw-r--r--arch/m32r/include/uapi/asm/socket.h3
-rw-r--r--arch/mips/include/uapi/asm/socket.h3
-rw-r--r--arch/mn10300/include/uapi/asm/socket.h3
-rw-r--r--arch/parisc/include/uapi/asm/socket.h3
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h3
-rw-r--r--arch/s390/include/uapi/asm/socket.h3
-rw-r--r--arch/sparc/include/uapi/asm/socket.h3
-rw-r--r--arch/xtensa/include/uapi/asm/socket.h3
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/uapi/asm-generic/socket.h3
-rw-r--r--include/uapi/linux/bpf.h1
-rw-r--r--kernel/bpf/verifier.c70
-rw-r--r--net/core/filter.c97
-rw-r--r--net/core/sock.c13
-rw-r--r--samples/bpf/Makefile20
-rw-r--r--samples/bpf/bpf_helpers.h40
-rw-r--r--samples/bpf/bpf_load.c203
-rw-r--r--samples/bpf/bpf_load.h24
-rw-r--r--samples/bpf/libbpf.c28
-rw-r--r--samples/bpf/libbpf.h15
-rw-r--r--samples/bpf/sock_example.c101
-rw-r--r--samples/bpf/sockex1_kern.c25
-rw-r--r--samples/bpf/sockex1_user.c49
-rw-r--r--samples/bpf/sockex2_kern.c215
-rw-r--r--samples/bpf/sockex2_user.c44
31 files changed, 987 insertions, 5 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index e2fe0700b3b4..9a20821b111c 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -89,4 +89,7 @@
89 89
90#define SO_INCOMING_CPU 49 90#define SO_INCOMING_CPU 49
91 91
92#define SO_ATTACH_BPF 50
93#define SO_DETACH_BPF SO_DETACH_FILTER
94
92#endif /* _UAPI_ASM_SOCKET_H */ 95#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 92121b0f5b98..2b65ed6b277c 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -82,4 +82,7 @@
82 82
83#define SO_INCOMING_CPU 49 83#define SO_INCOMING_CPU 49
84 84
85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER
87
85#endif /* _UAPI__ASM_AVR32_SOCKET_H */ 88#endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index 60f60f5b9b35..e2503d9f1869 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -84,6 +84,9 @@
84 84
85#define SO_INCOMING_CPU 49 85#define SO_INCOMING_CPU 49
86 86
87#define SO_ATTACH_BPF 50
88#define SO_DETACH_BPF SO_DETACH_FILTER
89
87#endif /* _ASM_SOCKET_H */ 90#endif /* _ASM_SOCKET_H */
88 91
89 92
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 2c6890209ea6..4823ad125578 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -82,5 +82,8 @@
82 82
83#define SO_INCOMING_CPU 49 83#define SO_INCOMING_CPU 49
84 84
85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER
87
85#endif /* _ASM_SOCKET_H */ 88#endif /* _ASM_SOCKET_H */
86 89
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 09a93fb566f6..59be3d87f86d 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -91,4 +91,7 @@
91 91
92#define SO_INCOMING_CPU 49 92#define SO_INCOMING_CPU 49
93 93
94#define SO_ATTACH_BPF 50
95#define SO_DETACH_BPF SO_DETACH_FILTER
96
94#endif /* _ASM_IA64_SOCKET_H */ 97#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index e8589819c274..7bc4cb273856 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -82,4 +82,7 @@
82 82
83#define SO_INCOMING_CPU 49 83#define SO_INCOMING_CPU 49
84 84
85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER
87
85#endif /* _ASM_M32R_SOCKET_H */ 88#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 2e9ee8c55a10..dec3c850f36b 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -100,4 +100,7 @@
100 100
101#define SO_INCOMING_CPU 49 101#define SO_INCOMING_CPU 49
102 102
103#define SO_ATTACH_BPF 50
104#define SO_DETACH_BPF SO_DETACH_FILTER
105
103#endif /* _UAPI_ASM_SOCKET_H */ 106#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index f3492e8c9f70..cab7d6d50051 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -82,4 +82,7 @@
82 82
83#define SO_INCOMING_CPU 49 83#define SO_INCOMING_CPU 49
84 84
85#define SO_ATTACH_BPF 50
86#define SO_DETACH_BPF SO_DETACH_FILTER
87
85#endif /* _ASM_SOCKET_H */ 88#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 7984a1cab3da..a5cd40cd8ee1 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -81,4 +81,7 @@
81 81
82#define SO_INCOMING_CPU 0x402A 82#define SO_INCOMING_CPU 0x402A
83 83
84#define SO_ATTACH_BPF 0x402B
85#define SO_DETACH_BPF SO_DETACH_FILTER
86
84#endif /* _UAPI_ASM_SOCKET_H */ 87#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 3474e4ef166d..c046666038f8 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -89,4 +89,7 @@
89 89
90#define SO_INCOMING_CPU 49 90#define SO_INCOMING_CPU 49
91 91
92#define SO_ATTACH_BPF 50
93#define SO_DETACH_BPF SO_DETACH_FILTER
94
92#endif /* _ASM_POWERPC_SOCKET_H */ 95#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 8457636c33e1..296942d56e6a 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -88,4 +88,7 @@
88 88
89#define SO_INCOMING_CPU 49 89#define SO_INCOMING_CPU 49
90 90
91#define SO_ATTACH_BPF 50
92#define SO_DETACH_BPF SO_DETACH_FILTER
93
91#endif /* _ASM_SOCKET_H */ 94#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 4a8003a94163..e6a16c40be5f 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -78,6 +78,9 @@
78 78
79#define SO_INCOMING_CPU 0x0033 79#define SO_INCOMING_CPU 0x0033
80 80
81#define SO_ATTACH_BPF 0x0034
82#define SO_DETACH_BPF SO_DETACH_FILTER
83
81/* Security levels - as per NRL IPv6 - don't actually do anything */ 84/* Security levels - as per NRL IPv6 - don't actually do anything */
82#define SO_SECURITY_AUTHENTICATION 0x5001 85#define SO_SECURITY_AUTHENTICATION 0x5001
83#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 86#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index c46f6a696849..4120af086160 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -93,4 +93,7 @@
93 93
94#define SO_INCOMING_CPU 49 94#define SO_INCOMING_CPU 49
95 95
96#define SO_ATTACH_BPF 50
97#define SO_DETACH_BPF SO_DETACH_FILTER
98
96#endif /* _XTENSA_SOCKET_H */ 99#endif /* _XTENSA_SOCKET_H */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 75e94eaa228b..bbfceb756452 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -128,7 +128,11 @@ struct bpf_prog_aux {
128 struct work_struct work; 128 struct work_struct work;
129}; 129};
130 130
131#ifdef CONFIG_BPF_SYSCALL
131void bpf_prog_put(struct bpf_prog *prog); 132void bpf_prog_put(struct bpf_prog *prog);
133#else
134static inline void bpf_prog_put(struct bpf_prog *prog) {}
135#endif
132struct bpf_prog *bpf_prog_get(u32 ufd); 136struct bpf_prog *bpf_prog_get(u32 ufd);
133/* verify correctness of eBPF program */ 137/* verify correctness of eBPF program */
134int bpf_check(struct bpf_prog *fp, union bpf_attr *attr); 138int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ca95abd2bed1..caac2087a4d5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -381,6 +381,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
381void bpf_prog_destroy(struct bpf_prog *fp); 381void bpf_prog_destroy(struct bpf_prog *fp);
382 382
383int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); 383int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
384int sk_attach_bpf(u32 ufd, struct sock *sk);
384int sk_detach_filter(struct sock *sk); 385int sk_detach_filter(struct sock *sk);
385 386
386int bpf_check_classic(const struct sock_filter *filter, unsigned int flen); 387int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index f541ccefd4ac..5c15c2a5c123 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -84,4 +84,7 @@
84 84
85#define SO_INCOMING_CPU 49 85#define SO_INCOMING_CPU 49
86 86
87#define SO_ATTACH_BPF 50
88#define SO_DETACH_BPF SO_DETACH_FILTER
89
87#endif /* __ASM_GENERIC_SOCKET_H */ 90#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4a3d0f84f178..45da7ec7d274 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -117,6 +117,7 @@ enum bpf_map_type {
117 117
118enum bpf_prog_type { 118enum bpf_prog_type {
119 BPF_PROG_TYPE_UNSPEC, 119 BPF_PROG_TYPE_UNSPEC,
120 BPF_PROG_TYPE_SOCKET_FILTER,
120}; 121};
121 122
122/* flags for BPF_MAP_UPDATE_ELEM command */ 123/* flags for BPF_MAP_UPDATE_ELEM command */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b6a1f7c14a67..a28e09c7825d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1172,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
1172 return 0; 1172 return 0;
1173} 1173}
1174 1174
1175/* verify safety of LD_ABS|LD_IND instructions:
1176 * - they can only appear in the programs where ctx == skb
1177 * - since they are wrappers of function calls, they scratch R1-R5 registers,
1178 * preserve R6-R9, and store return value into R0
1179 *
1180 * Implicit input:
1181 * ctx == skb == R6 == CTX
1182 *
1183 * Explicit input:
1184 * SRC == any register
1185 * IMM == 32-bit immediate
1186 *
1187 * Output:
1188 * R0 - 8/16/32-bit skb data converted to cpu endianness
1189 */
1190static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
1191{
1192 struct reg_state *regs = env->cur_state.regs;
1193 u8 mode = BPF_MODE(insn->code);
1194 struct reg_state *reg;
1195 int i, err;
1196
1197 if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
1198 verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
1199 return -EINVAL;
1200 }
1201
1202 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
1203 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
1204 verbose("BPF_LD_ABS uses reserved fields\n");
1205 return -EINVAL;
1206 }
1207
1208 /* check whether implicit source operand (register R6) is readable */
1209 err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
1210 if (err)
1211 return err;
1212
1213 if (regs[BPF_REG_6].type != PTR_TO_CTX) {
1214 verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
1215 return -EINVAL;
1216 }
1217
1218 if (mode == BPF_IND) {
1219 /* check explicit source operand */
1220 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1221 if (err)
1222 return err;
1223 }
1224
1225 /* reset caller saved regs to unreadable */
1226 for (i = 0; i < CALLER_SAVED_REGS; i++) {
1227 reg = regs + caller_saved[i];
1228 reg->type = NOT_INIT;
1229 reg->imm = 0;
1230 }
1231
1232 /* mark destination R0 register as readable, since it contains
1233 * the value fetched from the packet
1234 */
1235 regs[BPF_REG_0].type = UNKNOWN_VALUE;
1236 return 0;
1237}
1238
1175/* non-recursive DFS pseudo code 1239/* non-recursive DFS pseudo code
1176 * 1 procedure DFS-iterative(G,v): 1240 * 1 procedure DFS-iterative(G,v):
1177 * 2 label v as discovered 1241 * 2 label v as discovered
@@ -1677,8 +1741,10 @@ process_bpf_exit:
1677 u8 mode = BPF_MODE(insn->code); 1741 u8 mode = BPF_MODE(insn->code);
1678 1742
1679 if (mode == BPF_ABS || mode == BPF_IND) { 1743 if (mode == BPF_ABS || mode == BPF_IND) {
1680 verbose("LD_ABS is not supported yet\n"); 1744 err = check_ld_abs(env, insn);
1681 return -EINVAL; 1745 if (err)
1746 return err;
1747
1682 } else if (mode == BPF_IMM) { 1748 } else if (mode == BPF_IMM) {
1683 err = check_ld_imm(env, insn); 1749 err = check_ld_imm(env, insn);
1684 if (err) 1750 if (err)
diff --git a/net/core/filter.c b/net/core/filter.c
index 647b12265e18..8cc3c03078b3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -44,6 +44,7 @@
44#include <linux/ratelimit.h> 44#include <linux/ratelimit.h>
45#include <linux/seccomp.h> 45#include <linux/seccomp.h>
46#include <linux/if_vlan.h> 46#include <linux/if_vlan.h>
47#include <linux/bpf.h>
47 48
48/** 49/**
49 * sk_filter - run a packet through a socket filter 50 * sk_filter - run a packet through a socket filter
@@ -813,8 +814,12 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
813 814
814static void __bpf_prog_release(struct bpf_prog *prog) 815static void __bpf_prog_release(struct bpf_prog *prog)
815{ 816{
816 bpf_release_orig_filter(prog); 817 if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
817 bpf_prog_free(prog); 818 bpf_prog_put(prog);
819 } else {
820 bpf_release_orig_filter(prog);
821 bpf_prog_free(prog);
822 }
818} 823}
819 824
820static void __sk_filter_release(struct sk_filter *fp) 825static void __sk_filter_release(struct sk_filter *fp)
@@ -1088,6 +1093,94 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1088} 1093}
1089EXPORT_SYMBOL_GPL(sk_attach_filter); 1094EXPORT_SYMBOL_GPL(sk_attach_filter);
1090 1095
1096#ifdef CONFIG_BPF_SYSCALL
1097int sk_attach_bpf(u32 ufd, struct sock *sk)
1098{
1099 struct sk_filter *fp, *old_fp;
1100 struct bpf_prog *prog;
1101
1102 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1103 return -EPERM;
1104
1105 prog = bpf_prog_get(ufd);
1106 if (!prog)
1107 return -EINVAL;
1108
1109 if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
1110 /* valid fd, but invalid program type */
1111 bpf_prog_put(prog);
1112 return -EINVAL;
1113 }
1114
1115 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1116 if (!fp) {
1117 bpf_prog_put(prog);
1118 return -ENOMEM;
1119 }
1120 fp->prog = prog;
1121
1122 atomic_set(&fp->refcnt, 0);
1123
1124 if (!sk_filter_charge(sk, fp)) {
1125 __sk_filter_release(fp);
1126 return -ENOMEM;
1127 }
1128
1129 old_fp = rcu_dereference_protected(sk->sk_filter,
1130 sock_owned_by_user(sk));
1131 rcu_assign_pointer(sk->sk_filter, fp);
1132
1133 if (old_fp)
1134 sk_filter_uncharge(sk, old_fp);
1135
1136 return 0;
1137}
1138
1139/* allow socket filters to call
1140 * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
1141 */
1142static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
1143{
1144 switch (func_id) {
1145 case BPF_FUNC_map_lookup_elem:
1146 return &bpf_map_lookup_elem_proto;
1147 case BPF_FUNC_map_update_elem:
1148 return &bpf_map_update_elem_proto;
1149 case BPF_FUNC_map_delete_elem:
1150 return &bpf_map_delete_elem_proto;
1151 default:
1152 return NULL;
1153 }
1154}
1155
1156static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
1157{
1158 /* skb fields cannot be accessed yet */
1159 return false;
1160}
1161
1162static struct bpf_verifier_ops sock_filter_ops = {
1163 .get_func_proto = sock_filter_func_proto,
1164 .is_valid_access = sock_filter_is_valid_access,
1165};
1166
1167static struct bpf_prog_type_list tl = {
1168 .ops = &sock_filter_ops,
1169 .type = BPF_PROG_TYPE_SOCKET_FILTER,
1170};
1171
1172static int __init register_sock_filter_ops(void)
1173{
1174 bpf_register_prog_type(&tl);
1175 return 0;
1176}
1177late_initcall(register_sock_filter_ops);
1178#else
1179int sk_attach_bpf(u32 ufd, struct sock *sk)
1180{
1181 return -EOPNOTSUPP;
1182}
1183#endif
1091int sk_detach_filter(struct sock *sk) 1184int sk_detach_filter(struct sock *sk)
1092{ 1185{
1093 int ret = -ENOENT; 1186 int ret = -ENOENT;
diff --git a/net/core/sock.c b/net/core/sock.c
index 0725cf0cb685..9a56b2000c3f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -888,6 +888,19 @@ set_rcvbuf:
888 } 888 }
889 break; 889 break;
890 890
891 case SO_ATTACH_BPF:
892 ret = -EINVAL;
893 if (optlen == sizeof(u32)) {
894 u32 ufd;
895
896 ret = -EFAULT;
897 if (copy_from_user(&ufd, optval, sizeof(ufd)))
898 break;
899
900 ret = sk_attach_bpf(ufd, sk);
901 }
902 break;
903
891 case SO_DETACH_FILTER: 904 case SO_DETACH_FILTER:
892 ret = sk_detach_filter(sk); 905 ret = sk_detach_filter(sk);
893 break; 906 break;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0718d9ce4619..b5b3600dcdf5 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -3,11 +3,31 @@ obj- := dummy.o
3 3
4# List of programs to build 4# List of programs to build
5hostprogs-y := test_verifier test_maps 5hostprogs-y := test_verifier test_maps
6hostprogs-y += sock_example
7hostprogs-y += sockex1
8hostprogs-y += sockex2
6 9
7test_verifier-objs := test_verifier.o libbpf.o 10test_verifier-objs := test_verifier.o libbpf.o
8test_maps-objs := test_maps.o libbpf.o 11test_maps-objs := test_maps.o libbpf.o
12sock_example-objs := sock_example.o libbpf.o
13sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
14sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
9 15
10# Tell kbuild to always build the programs 16# Tell kbuild to always build the programs
11always := $(hostprogs-y) 17always := $(hostprogs-y)
18always += sockex1_kern.o
19always += sockex2_kern.o
12 20
13HOSTCFLAGS += -I$(objtree)/usr/include 21HOSTCFLAGS += -I$(objtree)/usr/include
22
23HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
24HOSTLOADLIBES_sockex1 += -lelf
25HOSTLOADLIBES_sockex2 += -lelf
26
27# point this to your LLVM backend with bpf support
28LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
29
30%.o: %.c
31 clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
32 -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
33 -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
new file mode 100644
index 000000000000..ca0333146006
--- /dev/null
+++ b/samples/bpf/bpf_helpers.h
@@ -0,0 +1,40 @@
1#ifndef __BPF_HELPERS_H
2#define __BPF_HELPERS_H
3
4/* helper macro to place programs, maps, license in
5 * different sections in elf_bpf file. Section names
6 * are interpreted by elf_bpf loader
7 */
8#define SEC(NAME) __attribute__((section(NAME), used))
9
10/* helper functions called from eBPF programs written in C */
11static void *(*bpf_map_lookup_elem)(void *map, void *key) =
12 (void *) BPF_FUNC_map_lookup_elem;
13static int (*bpf_map_update_elem)(void *map, void *key, void *value,
14 unsigned long long flags) =
15 (void *) BPF_FUNC_map_update_elem;
16static int (*bpf_map_delete_elem)(void *map, void *key) =
17 (void *) BPF_FUNC_map_delete_elem;
18
19/* llvm builtin functions that eBPF C program may use to
20 * emit BPF_LD_ABS and BPF_LD_IND instructions
21 */
22struct sk_buff;
23unsigned long long load_byte(void *skb,
24 unsigned long long off) asm("llvm.bpf.load.byte");
25unsigned long long load_half(void *skb,
26 unsigned long long off) asm("llvm.bpf.load.half");
27unsigned long long load_word(void *skb,
28 unsigned long long off) asm("llvm.bpf.load.word");
29
30/* a helper structure used by eBPF C program
31 * to describe map attributes to elf_bpf loader
32 */
33struct bpf_map_def {
34 unsigned int type;
35 unsigned int key_size;
36 unsigned int value_size;
37 unsigned int max_entries;
38};
39
40#endif
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
new file mode 100644
index 000000000000..1831d236382b
--- /dev/null
+++ b/samples/bpf/bpf_load.c
@@ -0,0 +1,203 @@
1#include <stdio.h>
2#include <sys/types.h>
3#include <sys/stat.h>
4#include <fcntl.h>
5#include <libelf.h>
6#include <gelf.h>
7#include <errno.h>
8#include <unistd.h>
9#include <string.h>
10#include <stdbool.h>
11#include <linux/bpf.h>
12#include <linux/filter.h>
13#include "libbpf.h"
14#include "bpf_helpers.h"
15#include "bpf_load.h"
16
17static char license[128];
18static bool processed_sec[128];
19int map_fd[MAX_MAPS];
20int prog_fd[MAX_PROGS];
21int prog_cnt;
22
23static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
24{
25 int fd;
26 bool is_socket = strncmp(event, "socket", 6) == 0;
27
28 if (!is_socket)
29 /* tracing events tbd */
30 return -1;
31
32 fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
33 prog, size, license);
34
35 if (fd < 0) {
36 printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
37 return -1;
38 }
39
40 prog_fd[prog_cnt++] = fd;
41
42 return 0;
43}
44
45static int load_maps(struct bpf_map_def *maps, int len)
46{
47 int i;
48
49 for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
50
51 map_fd[i] = bpf_create_map(maps[i].type,
52 maps[i].key_size,
53 maps[i].value_size,
54 maps[i].max_entries);
55 if (map_fd[i] < 0)
56 return 1;
57 }
58 return 0;
59}
60
61static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
62 GElf_Shdr *shdr, Elf_Data **data)
63{
64 Elf_Scn *scn;
65
66 scn = elf_getscn(elf, i);
67 if (!scn)
68 return 1;
69
70 if (gelf_getshdr(scn, shdr) != shdr)
71 return 2;
72
73 *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
74 if (!*shname || !shdr->sh_size)
75 return 3;
76
77 *data = elf_getdata(scn, 0);
78 if (!*data || elf_getdata(scn, *data) != NULL)
79 return 4;
80
81 return 0;
82}
83
84static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
85 GElf_Shdr *shdr, struct bpf_insn *insn)
86{
87 int i, nrels;
88
89 nrels = shdr->sh_size / shdr->sh_entsize;
90
91 for (i = 0; i < nrels; i++) {
92 GElf_Sym sym;
93 GElf_Rel rel;
94 unsigned int insn_idx;
95
96 gelf_getrel(data, i, &rel);
97
98 insn_idx = rel.r_offset / sizeof(struct bpf_insn);
99
100 gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
101
102 if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
103 printf("invalid relo for insn[%d].code 0x%x\n",
104 insn_idx, insn[insn_idx].code);
105 return 1;
106 }
107 insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
108 insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
109 }
110
111 return 0;
112}
113
114int load_bpf_file(char *path)
115{
116 int fd, i;
117 Elf *elf;
118 GElf_Ehdr ehdr;
119 GElf_Shdr shdr, shdr_prog;
120 Elf_Data *data, *data_prog, *symbols = NULL;
121 char *shname, *shname_prog;
122
123 if (elf_version(EV_CURRENT) == EV_NONE)
124 return 1;
125
126 fd = open(path, O_RDONLY, 0);
127 if (fd < 0)
128 return 1;
129
130 elf = elf_begin(fd, ELF_C_READ, NULL);
131
132 if (!elf)
133 return 1;
134
135 if (gelf_getehdr(elf, &ehdr) != &ehdr)
136 return 1;
137
138 /* scan over all elf sections to get license and map info */
139 for (i = 1; i < ehdr.e_shnum; i++) {
140
141 if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
142 continue;
143
144 if (0) /* helpful for llvm debugging */
145 printf("section %d:%s data %p size %zd link %d flags %d\n",
146 i, shname, data->d_buf, data->d_size,
147 shdr.sh_link, (int) shdr.sh_flags);
148
149 if (strcmp(shname, "license") == 0) {
150 processed_sec[i] = true;
151 memcpy(license, data->d_buf, data->d_size);
152 } else if (strcmp(shname, "maps") == 0) {
153 processed_sec[i] = true;
154 if (load_maps(data->d_buf, data->d_size))
155 return 1;
156 } else if (shdr.sh_type == SHT_SYMTAB) {
157 symbols = data;
158 }
159 }
160
161 /* load programs that need map fixup (relocations) */
162 for (i = 1; i < ehdr.e_shnum; i++) {
163
164 if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
165 continue;
166 if (shdr.sh_type == SHT_REL) {
167 struct bpf_insn *insns;
168
169 if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
170 &shdr_prog, &data_prog))
171 continue;
172
173 insns = (struct bpf_insn *) data_prog->d_buf;
174
175 processed_sec[shdr.sh_info] = true;
176 processed_sec[i] = true;
177
178 if (parse_relo_and_apply(data, symbols, &shdr, insns))
179 continue;
180
181 if (memcmp(shname_prog, "events/", 7) == 0 ||
182 memcmp(shname_prog, "socket", 6) == 0)
183 load_and_attach(shname_prog, insns, data_prog->d_size);
184 }
185 }
186
187 /* load programs that don't use maps */
188 for (i = 1; i < ehdr.e_shnum; i++) {
189
190 if (processed_sec[i])
191 continue;
192
193 if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
194 continue;
195
196 if (memcmp(shname, "events/", 7) == 0 ||
197 memcmp(shname, "socket", 6) == 0)
198 load_and_attach(shname, data->d_buf, data->d_size);
199 }
200
201 close(fd);
202 return 0;
203}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
new file mode 100644
index 000000000000..27789a34f5e6
--- /dev/null
+++ b/samples/bpf/bpf_load.h
@@ -0,0 +1,24 @@
1#ifndef __BPF_LOAD_H
2#define __BPF_LOAD_H
3
4#define MAX_MAPS 32
5#define MAX_PROGS 32
6
7extern int map_fd[MAX_MAPS];
8extern int prog_fd[MAX_PROGS];
9
10/* parses elf file compiled by llvm .c->.o
11 * . parses 'maps' section and creates maps via BPF syscall
12 * . parses 'license' section and passes it to syscall
13 * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
14 * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
15 * . loads eBPF programs via BPF syscall
16 *
17 * One ELF file can contain multiple BPF programs which will be loaded
18 * and their FDs stored stored in prog_fd array
19 *
20 * returns zero on success
21 */
22int load_bpf_file(char *path);
23
24#endif
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 17bb520eb57f..46d50b7ddf79 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -7,6 +7,10 @@
7#include <linux/netlink.h> 7#include <linux/netlink.h>
8#include <linux/bpf.h> 8#include <linux/bpf.h>
9#include <errno.h> 9#include <errno.h>
10#include <net/ethernet.h>
11#include <net/if.h>
12#include <linux/if_packet.h>
13#include <arpa/inet.h>
10#include "libbpf.h" 14#include "libbpf.h"
11 15
12static __u64 ptr_to_u64(void *ptr) 16static __u64 ptr_to_u64(void *ptr)
@@ -93,3 +97,27 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
93 97
94 return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); 98 return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
95} 99}
100
101int open_raw_sock(const char *name)
102{
103 struct sockaddr_ll sll;
104 int sock;
105
106 sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
107 if (sock < 0) {
108 printf("cannot create raw socket\n");
109 return -1;
110 }
111
112 memset(&sll, 0, sizeof(sll));
113 sll.sll_family = AF_PACKET;
114 sll.sll_ifindex = if_nametoindex(name);
115 sll.sll_protocol = htons(ETH_P_ALL);
116 if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
117 printf("bind to %s: %s\n", name, strerror(errno));
118 close(sock);
119 return -1;
120 }
121
122 return sock;
123}
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index f8678e5f48bf..58c5fe1bdba1 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -15,7 +15,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
15 const struct bpf_insn *insns, int insn_len, 15 const struct bpf_insn *insns, int insn_len,
16 const char *license); 16 const char *license);
17 17
18#define LOG_BUF_SIZE 8192 18#define LOG_BUF_SIZE 65536
19extern char bpf_log_buf[LOG_BUF_SIZE]; 19extern char bpf_log_buf[LOG_BUF_SIZE];
20 20
21/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ 21/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
@@ -99,6 +99,16 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
99 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) 99 BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
100 100
101 101
102/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
103
104#define BPF_LD_ABS(SIZE, IMM) \
105 ((struct bpf_insn) { \
106 .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
107 .dst_reg = 0, \
108 .src_reg = 0, \
109 .off = 0, \
110 .imm = IMM })
111
102/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ 112/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
103 113
104#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ 114#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
@@ -169,4 +179,7 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
169 .off = 0, \ 179 .off = 0, \
170 .imm = 0 }) 180 .imm = 0 })
171 181
182/* create RAW socket and bind to interface 'name' */
183int open_raw_sock(const char *name);
184
172#endif 185#endif
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
new file mode 100644
index 000000000000..c8ad0404416f
--- /dev/null
+++ b/samples/bpf/sock_example.c
@@ -0,0 +1,101 @@
1/* eBPF example program:
2 * - creates arraymap in kernel with key 4 bytes and value 8 bytes
3 *
4 * - loads eBPF program:
5 * r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)];
6 * *(u32*)(fp - 4) = r0;
7 * // assuming packet is IPv4, lookup ip->proto in a map
8 * value = bpf_map_lookup_elem(map_fd, fp - 4);
9 * if (value)
10 * (*(u64*)value) += 1;
11 *
12 * - attaches this program to eth0 raw socket
13 *
14 * - every second user space reads map[tcp], map[udp], map[icmp] to see
15 * how many packets of given protocol were seen on eth0
16 */
17#include <stdio.h>
18#include <unistd.h>
19#include <assert.h>
20#include <linux/bpf.h>
21#include <string.h>
22#include <stdlib.h>
23#include <errno.h>
24#include <sys/socket.h>
25#include <arpa/inet.h>
26#include <linux/if_ether.h>
27#include <linux/ip.h>
28#include <stddef.h>
29#include "libbpf.h"
30
31static int test_sock(void)
32{
33 int sock = -1, map_fd, prog_fd, i, key;
34 long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
35
36 map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
37 256);
38 if (map_fd < 0) {
39 printf("failed to create map '%s'\n", strerror(errno));
40 goto cleanup;
41 }
42
43 struct bpf_insn prog[] = {
44 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
45 BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */),
46 BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
47 BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
48 BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
49 BPF_LD_MAP_FD(BPF_REG_1, map_fd),
50 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
51 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
52 BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
53 BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
54 BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
55 BPF_EXIT_INSN(),
56 };
57
58 prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog),
59 "GPL");
60 if (prog_fd < 0) {
61 printf("failed to load prog '%s'\n", strerror(errno));
62 goto cleanup;
63 }
64
65 sock = open_raw_sock("lo");
66
67 if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
68 sizeof(prog_fd)) < 0) {
69 printf("setsockopt %s\n", strerror(errno));
70 goto cleanup;
71 }
72
73 for (i = 0; i < 10; i++) {
74 key = IPPROTO_TCP;
75 assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
76
77 key = IPPROTO_UDP;
78 assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0);
79
80 key = IPPROTO_ICMP;
81 assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
82
83 printf("TCP %lld UDP %lld ICMP %lld packets\n",
84 tcp_cnt, udp_cnt, icmp_cnt);
85 sleep(1);
86 }
87
88cleanup:
89 /* maps, programs, raw sockets will auto cleanup on process exit */
90 return 0;
91}
92
93int main(void)
94{
95 FILE *f;
96
97 f = popen("ping -c5 localhost", "r");
98 (void)f;
99
100 return test_sock();
101}
diff --git a/samples/bpf/sockex1_kern.c b/samples/bpf/sockex1_kern.c
new file mode 100644
index 000000000000..066892662915
--- /dev/null
+++ b/samples/bpf/sockex1_kern.c
@@ -0,0 +1,25 @@
1#include <uapi/linux/bpf.h>
2#include <uapi/linux/if_ether.h>
3#include <uapi/linux/ip.h>
4#include "bpf_helpers.h"
5
6struct bpf_map_def SEC("maps") my_map = {
7 .type = BPF_MAP_TYPE_ARRAY,
8 .key_size = sizeof(u32),
9 .value_size = sizeof(long),
10 .max_entries = 256,
11};
12
13SEC("socket1")
14int bpf_prog1(struct sk_buff *skb)
15{
16 int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
17 long *value;
18
19 value = bpf_map_lookup_elem(&my_map, &index);
20 if (value)
21 __sync_fetch_and_add(value, 1);
22
23 return 0;
24}
25char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c
new file mode 100644
index 000000000000..34a443ff3831
--- /dev/null
+++ b/samples/bpf/sockex1_user.c
@@ -0,0 +1,49 @@
1#include <stdio.h>
2#include <assert.h>
3#include <linux/bpf.h>
4#include "libbpf.h"
5#include "bpf_load.h"
6#include <unistd.h>
7#include <arpa/inet.h>
8
9int main(int ac, char **argv)
10{
11 char filename[256];
12 FILE *f;
13 int i, sock;
14
15 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
16
17 if (load_bpf_file(filename)) {
18 printf("%s", bpf_log_buf);
19 return 1;
20 }
21
22 sock = open_raw_sock("lo");
23
24 assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
25 sizeof(prog_fd[0])) == 0);
26
27 f = popen("ping -c5 localhost", "r");
28 (void) f;
29
30 for (i = 0; i < 5; i++) {
31 long long tcp_cnt, udp_cnt, icmp_cnt;
32 int key;
33
34 key = IPPROTO_TCP;
35 assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
36
37 key = IPPROTO_UDP;
38 assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
39
40 key = IPPROTO_ICMP;
41 assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
42
43 printf("TCP %lld UDP %lld ICMP %lld packets\n",
44 tcp_cnt, udp_cnt, icmp_cnt);
45 sleep(1);
46 }
47
48 return 0;
49}
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
new file mode 100644
index 000000000000..6f0135f0f217
--- /dev/null
+++ b/samples/bpf/sockex2_kern.c
@@ -0,0 +1,215 @@
1#include <uapi/linux/bpf.h>
2#include "bpf_helpers.h"
3#include <uapi/linux/in.h>
4#include <uapi/linux/if.h>
5#include <uapi/linux/if_ether.h>
6#include <uapi/linux/ip.h>
7#include <uapi/linux/ipv6.h>
8#include <uapi/linux/if_tunnel.h>
9#define IP_MF 0x2000
10#define IP_OFFSET 0x1FFF
11
12struct vlan_hdr {
13 __be16 h_vlan_TCI;
14 __be16 h_vlan_encapsulated_proto;
15};
16
17struct flow_keys {
18 __be32 src;
19 __be32 dst;
20 union {
21 __be32 ports;
22 __be16 port16[2];
23 };
24 __u16 thoff;
25 __u8 ip_proto;
26};
27
28static inline int proto_ports_offset(__u64 proto)
29{
30 switch (proto) {
31 case IPPROTO_TCP:
32 case IPPROTO_UDP:
33 case IPPROTO_DCCP:
34 case IPPROTO_ESP:
35 case IPPROTO_SCTP:
36 case IPPROTO_UDPLITE:
37 return 0;
38 case IPPROTO_AH:
39 return 4;
40 default:
41 return 0;
42 }
43}
44
45static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff)
46{
47 return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
48 & (IP_MF | IP_OFFSET);
49}
50
51static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off)
52{
53 __u64 w0 = load_word(ctx, off);
54 __u64 w1 = load_word(ctx, off + 4);
55 __u64 w2 = load_word(ctx, off + 8);
56 __u64 w3 = load_word(ctx, off + 12);
57
58 return (__u32)(w0 ^ w1 ^ w2 ^ w3);
59}
60
61static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
62 struct flow_keys *flow)
63{
64 __u64 verlen;
65
66 if (unlikely(ip_is_fragment(skb, nhoff)))
67 *ip_proto = 0;
68 else
69 *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
70
71 if (*ip_proto != IPPROTO_GRE) {
72 flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
73 flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
74 }
75
76 verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
77 if (likely(verlen == 0x45))
78 nhoff += 20;
79 else
80 nhoff += (verlen & 0xF) << 2;
81
82 return nhoff;
83}
84
85static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
86 struct flow_keys *flow)
87{
88 *ip_proto = load_byte(skb,
89 nhoff + offsetof(struct ipv6hdr, nexthdr));
90 flow->src = ipv6_addr_hash(skb,
91 nhoff + offsetof(struct ipv6hdr, saddr));
92 flow->dst = ipv6_addr_hash(skb,
93 nhoff + offsetof(struct ipv6hdr, daddr));
94 nhoff += sizeof(struct ipv6hdr);
95
96 return nhoff;
97}
98
99static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow)
100{
101 __u64 nhoff = ETH_HLEN;
102 __u64 ip_proto;
103 __u64 proto = load_half(skb, 12);
104 int poff;
105
106 if (proto == ETH_P_8021AD) {
107 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
108 h_vlan_encapsulated_proto));
109 nhoff += sizeof(struct vlan_hdr);
110 }
111
112 if (proto == ETH_P_8021Q) {
113 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
114 h_vlan_encapsulated_proto));
115 nhoff += sizeof(struct vlan_hdr);
116 }
117
118 if (likely(proto == ETH_P_IP))
119 nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
120 else if (proto == ETH_P_IPV6)
121 nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
122 else
123 return false;
124
125 switch (ip_proto) {
126 case IPPROTO_GRE: {
127 struct gre_hdr {
128 __be16 flags;
129 __be16 proto;
130 };
131
132 __u64 gre_flags = load_half(skb,
133 nhoff + offsetof(struct gre_hdr, flags));
134 __u64 gre_proto = load_half(skb,
135 nhoff + offsetof(struct gre_hdr, proto));
136
137 if (gre_flags & (GRE_VERSION|GRE_ROUTING))
138 break;
139
140 proto = gre_proto;
141 nhoff += 4;
142 if (gre_flags & GRE_CSUM)
143 nhoff += 4;
144 if (gre_flags & GRE_KEY)
145 nhoff += 4;
146 if (gre_flags & GRE_SEQ)
147 nhoff += 4;
148
149 if (proto == ETH_P_8021Q) {
150 proto = load_half(skb,
151 nhoff + offsetof(struct vlan_hdr,
152 h_vlan_encapsulated_proto));
153 nhoff += sizeof(struct vlan_hdr);
154 }
155
156 if (proto == ETH_P_IP)
157 nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
158 else if (proto == ETH_P_IPV6)
159 nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
160 else
161 return false;
162 break;
163 }
164 case IPPROTO_IPIP:
165 nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
166 break;
167 case IPPROTO_IPV6:
168 nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
169 break;
170 default:
171 break;
172 }
173
174 flow->ip_proto = ip_proto;
175 poff = proto_ports_offset(ip_proto);
176 if (poff >= 0) {
177 nhoff += poff;
178 flow->ports = load_word(skb, nhoff);
179 }
180
181 flow->thoff = (__u16) nhoff;
182
183 return true;
184}
185
186struct bpf_map_def SEC("maps") hash_map = {
187 .type = BPF_MAP_TYPE_HASH,
188 .key_size = sizeof(__be32),
189 .value_size = sizeof(long),
190 .max_entries = 1024,
191};
192
193SEC("socket2")
194int bpf_prog2(struct sk_buff *skb)
195{
196 struct flow_keys flow;
197 long *value;
198 u32 key;
199
200 if (!flow_dissector(skb, &flow))
201 return 0;
202
203 key = flow.dst;
204 value = bpf_map_lookup_elem(&hash_map, &key);
205 if (value) {
206 __sync_fetch_and_add(value, 1);
207 } else {
208 long val = 1;
209
210 bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
211 }
212 return 0;
213}
214
215char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
new file mode 100644
index 000000000000..d2d5f5a790d3
--- /dev/null
+++ b/samples/bpf/sockex2_user.c
@@ -0,0 +1,44 @@
1#include <stdio.h>
2#include <assert.h>
3#include <linux/bpf.h>
4#include "libbpf.h"
5#include "bpf_load.h"
6#include <unistd.h>
7#include <arpa/inet.h>
8
9int main(int ac, char **argv)
10{
11 char filename[256];
12 FILE *f;
13 int i, sock;
14
15 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
16
17 if (load_bpf_file(filename)) {
18 printf("%s", bpf_log_buf);
19 return 1;
20 }
21
22 sock = open_raw_sock("lo");
23
24 assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
25 sizeof(prog_fd[0])) == 0);
26
27 f = popen("ping -c5 localhost", "r");
28 (void) f;
29
30 for (i = 0; i < 5; i++) {
31 int key = 0, next_key;
32 long long value;
33
34 while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
35 bpf_lookup_elem(map_fd[0], &next_key, &value);
36 printf("ip %s count %lld\n",
37 inet_ntoa((struct in_addr){htonl(next_key)}),
38 value);
39 key = next_key;
40 }
41 sleep(1);
42 }
43 return 0;
44}