diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2015-07-20 23:34:18 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-07-20 23:52:31 -0400 |
commit | 4e10df9a60d96ced321dd2af71da558c6b750078 (patch) | |
tree | 6a913ab31a28d48de3b3510470320220eb75e6eb | |
parent | f3120acc7851bffd1cd15acd044b7fa6fa520e75 (diff) |
bpf: introduce bpf_skb_vlan_push/pop() helpers
Allow eBPF programs attached to TC qdiscs call skb_vlan_push/pop via
helper functions. These functions may change skb->data/hlen which are
cached by some JITs to improve performance of ld_abs/ld_ind instructions.
Therefore JITs need to recognize bpf_skb_vlan_push/pop() calls,
re-compute header len and re-cache skb->data/hlen back into cpu registers.
Note, skb->data/hlen are not directly accessible from the programs,
so any changes to skb->data done either by these helpers or by other
TC actions are safe.
eBPF JIT supported by three architectures:
- arm64 JIT is using bpf_load_pointer() without caching, so it's ok as-is.
- x64 JIT re-caches skb->data/hlen unconditionally after vlan_push/pop calls
(experiments showed that conditional re-caching is slower).
- s390 JIT falls back to interpreter for now when bpf_skb_vlan_push() is present
in the program (re-caching is tbd).
These helpers allow more scalable handling of vlan from the programs.
Instead of creating thousands of vlan netdevs on top of eth0 and attaching
TC+ingress+bpf to all of them, the program can be attached to eth0 directly
and manipulate vlans as necessary.
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 4 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 80 | ||||
-rw-r--r-- | include/linux/bpf.h | 2 | ||||
-rw-r--r-- | include/linux/filter.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 2 | ||||
-rw-r--r-- | net/core/filter.c | 48 |
6 files changed, 99 insertions, 38 deletions
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index fee782acc2ee..79c731e8d178 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c | |||
@@ -973,6 +973,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | |||
973 | */ | 973 | */ |
974 | const u64 func = (u64)__bpf_call_base + imm; | 974 | const u64 func = (u64)__bpf_call_base + imm; |
975 | 975 | ||
976 | if (bpf_helper_changes_skb_data((void *)func)) | ||
977 | /* TODO reload skb->data, hlen */ | ||
978 | return -1; | ||
979 | |||
976 | REG_SET_SEEN(BPF_REG_5); | 980 | REG_SET_SEEN(BPF_REG_5); |
977 | jit->seen |= SEEN_FUNC; | 981 | jit->seen |= SEEN_FUNC; |
978 | /* lg %w1,<d(imm)>(%l) */ | 982 | /* lg %w1,<d(imm)>(%l) */ |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 579a8fd74be0..6c335a8fc086 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -315,6 +315,26 @@ static void emit_bpf_tail_call(u8 **pprog) | |||
315 | *pprog = prog; | 315 | *pprog = prog; |
316 | } | 316 | } |
317 | 317 | ||
318 | |||
319 | static void emit_load_skb_data_hlen(u8 **pprog) | ||
320 | { | ||
321 | u8 *prog = *pprog; | ||
322 | int cnt = 0; | ||
323 | |||
324 | /* r9d = skb->len - skb->data_len (headlen) | ||
325 | * r10 = skb->data | ||
326 | */ | ||
327 | /* mov %r9d, off32(%rdi) */ | ||
328 | EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len)); | ||
329 | |||
330 | /* sub %r9d, off32(%rdi) */ | ||
331 | EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len)); | ||
332 | |||
333 | /* mov %r10, off32(%rdi) */ | ||
334 | EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data)); | ||
335 | *pprog = prog; | ||
336 | } | ||
337 | |||
318 | static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | 338 | static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, |
319 | int oldproglen, struct jit_context *ctx) | 339 | int oldproglen, struct jit_context *ctx) |
320 | { | 340 | { |
@@ -329,36 +349,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
329 | 349 | ||
330 | emit_prologue(&prog); | 350 | emit_prologue(&prog); |
331 | 351 | ||
332 | if (seen_ld_abs) { | 352 | if (seen_ld_abs) |
333 | /* r9d : skb->len - skb->data_len (headlen) | 353 | emit_load_skb_data_hlen(&prog); |
334 | * r10 : skb->data | ||
335 | */ | ||
336 | if (is_imm8(offsetof(struct sk_buff, len))) | ||
337 | /* mov %r9d, off8(%rdi) */ | ||
338 | EMIT4(0x44, 0x8b, 0x4f, | ||
339 | offsetof(struct sk_buff, len)); | ||
340 | else | ||
341 | /* mov %r9d, off32(%rdi) */ | ||
342 | EMIT3_off32(0x44, 0x8b, 0x8f, | ||
343 | offsetof(struct sk_buff, len)); | ||
344 | |||
345 | if (is_imm8(offsetof(struct sk_buff, data_len))) | ||
346 | /* sub %r9d, off8(%rdi) */ | ||
347 | EMIT4(0x44, 0x2b, 0x4f, | ||
348 | offsetof(struct sk_buff, data_len)); | ||
349 | else | ||
350 | EMIT3_off32(0x44, 0x2b, 0x8f, | ||
351 | offsetof(struct sk_buff, data_len)); | ||
352 | |||
353 | if (is_imm8(offsetof(struct sk_buff, data))) | ||
354 | /* mov %r10, off8(%rdi) */ | ||
355 | EMIT4(0x4c, 0x8b, 0x57, | ||
356 | offsetof(struct sk_buff, data)); | ||
357 | else | ||
358 | /* mov %r10, off32(%rdi) */ | ||
359 | EMIT3_off32(0x4c, 0x8b, 0x97, | ||
360 | offsetof(struct sk_buff, data)); | ||
361 | } | ||
362 | 354 | ||
363 | for (i = 0; i < insn_cnt; i++, insn++) { | 355 | for (i = 0; i < insn_cnt; i++, insn++) { |
364 | const s32 imm32 = insn->imm; | 356 | const s32 imm32 = insn->imm; |
@@ -367,6 +359,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, | |||
367 | u8 b1 = 0, b2 = 0, b3 = 0; | 359 | u8 b1 = 0, b2 = 0, b3 = 0; |
368 | s64 jmp_offset; | 360 | s64 jmp_offset; |
369 | u8 jmp_cond; | 361 | u8 jmp_cond; |
362 | bool reload_skb_data; | ||
370 | int ilen; | 363 | int ilen; |
371 | u8 *func; | 364 | u8 *func; |
372 | 365 | ||
@@ -818,12 +811,18 @@ xadd: if (is_imm8(insn->off)) | |||
818 | func = (u8 *) __bpf_call_base + imm32; | 811 | func = (u8 *) __bpf_call_base + imm32; |
819 | jmp_offset = func - (image + addrs[i]); | 812 | jmp_offset = func - (image + addrs[i]); |
820 | if (seen_ld_abs) { | 813 | if (seen_ld_abs) { |
821 | EMIT2(0x41, 0x52); /* push %r10 */ | 814 | reload_skb_data = bpf_helper_changes_skb_data(func); |
822 | EMIT2(0x41, 0x51); /* push %r9 */ | 815 | if (reload_skb_data) { |
823 | /* need to adjust jmp offset, since | 816 | EMIT1(0x57); /* push %rdi */ |
824 | * pop %r9, pop %r10 take 4 bytes after call insn | 817 | jmp_offset += 22; /* pop, mov, sub, mov */ |
825 | */ | 818 | } else { |
826 | jmp_offset += 4; | 819 | EMIT2(0x41, 0x52); /* push %r10 */ |
820 | EMIT2(0x41, 0x51); /* push %r9 */ | ||
821 | /* need to adjust jmp offset, since | ||
822 | * pop %r9, pop %r10 take 4 bytes after call insn | ||
823 | */ | ||
824 | jmp_offset += 4; | ||
825 | } | ||
827 | } | 826 | } |
828 | if (!imm32 || !is_simm32(jmp_offset)) { | 827 | if (!imm32 || !is_simm32(jmp_offset)) { |
829 | pr_err("unsupported bpf func %d addr %p image %p\n", | 828 | pr_err("unsupported bpf func %d addr %p image %p\n", |
@@ -832,8 +831,13 @@ xadd: if (is_imm8(insn->off)) | |||
832 | } | 831 | } |
833 | EMIT1_off32(0xE8, jmp_offset); | 832 | EMIT1_off32(0xE8, jmp_offset); |
834 | if (seen_ld_abs) { | 833 | if (seen_ld_abs) { |
835 | EMIT2(0x41, 0x59); /* pop %r9 */ | 834 | if (reload_skb_data) { |
836 | EMIT2(0x41, 0x5A); /* pop %r10 */ | 835 | EMIT1(0x5F); /* pop %rdi */ |
836 | emit_load_skb_data_hlen(&prog); | ||
837 | } else { | ||
838 | EMIT2(0x41, 0x59); /* pop %r9 */ | ||
839 | EMIT2(0x41, 0x5A); /* pop %r10 */ | ||
840 | } | ||
837 | } | 841 | } |
838 | break; | 842 | break; |
839 | 843 | ||
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4383476a0d48..139d6d2e123f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h | |||
@@ -192,5 +192,7 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto; | |||
192 | extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; | 192 | extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; |
193 | extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; | 193 | extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; |
194 | extern const struct bpf_func_proto bpf_get_current_comm_proto; | 194 | extern const struct bpf_func_proto bpf_get_current_comm_proto; |
195 | extern const struct bpf_func_proto bpf_skb_vlan_push_proto; | ||
196 | extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; | ||
195 | 197 | ||
196 | #endif /* _LINUX_BPF_H */ | 198 | #endif /* _LINUX_BPF_H */ |
diff --git a/include/linux/filter.h b/include/linux/filter.h index 17724f6ea983..69d00555ce35 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h | |||
@@ -411,6 +411,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); | |||
411 | 411 | ||
412 | u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); | 412 | u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); |
413 | void bpf_int_jit_compile(struct bpf_prog *fp); | 413 | void bpf_int_jit_compile(struct bpf_prog *fp); |
414 | bool bpf_helper_changes_skb_data(void *func); | ||
414 | 415 | ||
415 | #ifdef CONFIG_BPF_JIT | 416 | #ifdef CONFIG_BPF_JIT |
416 | typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); | 417 | typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); |
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2de87e58b12b..2f6c83d714e9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h | |||
@@ -256,6 +256,8 @@ enum bpf_func_id { | |||
256 | * Return: classid if != 0 | 256 | * Return: classid if != 0 |
257 | */ | 257 | */ |
258 | BPF_FUNC_get_cgroup_classid, | 258 | BPF_FUNC_get_cgroup_classid, |
259 | BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ | ||
260 | BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ | ||
259 | __BPF_FUNC_MAX_ID, | 261 | __BPF_FUNC_MAX_ID, |
260 | }; | 262 | }; |
261 | 263 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index 247450a5e387..50338071fac4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -1437,6 +1437,50 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { | |||
1437 | .arg1_type = ARG_PTR_TO_CTX, | 1437 | .arg1_type = ARG_PTR_TO_CTX, |
1438 | }; | 1438 | }; |
1439 | 1439 | ||
1440 | static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) | ||
1441 | { | ||
1442 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | ||
1443 | __be16 vlan_proto = (__force __be16) r2; | ||
1444 | |||
1445 | if (unlikely(vlan_proto != htons(ETH_P_8021Q) && | ||
1446 | vlan_proto != htons(ETH_P_8021AD))) | ||
1447 | vlan_proto = htons(ETH_P_8021Q); | ||
1448 | |||
1449 | return skb_vlan_push(skb, vlan_proto, vlan_tci); | ||
1450 | } | ||
1451 | |||
1452 | const struct bpf_func_proto bpf_skb_vlan_push_proto = { | ||
1453 | .func = bpf_skb_vlan_push, | ||
1454 | .gpl_only = false, | ||
1455 | .ret_type = RET_INTEGER, | ||
1456 | .arg1_type = ARG_PTR_TO_CTX, | ||
1457 | .arg2_type = ARG_ANYTHING, | ||
1458 | .arg3_type = ARG_ANYTHING, | ||
1459 | }; | ||
1460 | |||
1461 | static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
1462 | { | ||
1463 | struct sk_buff *skb = (struct sk_buff *) (long) r1; | ||
1464 | |||
1465 | return skb_vlan_pop(skb); | ||
1466 | } | ||
1467 | |||
1468 | const struct bpf_func_proto bpf_skb_vlan_pop_proto = { | ||
1469 | .func = bpf_skb_vlan_pop, | ||
1470 | .gpl_only = false, | ||
1471 | .ret_type = RET_INTEGER, | ||
1472 | .arg1_type = ARG_PTR_TO_CTX, | ||
1473 | }; | ||
1474 | |||
1475 | bool bpf_helper_changes_skb_data(void *func) | ||
1476 | { | ||
1477 | if (func == bpf_skb_vlan_push) | ||
1478 | return true; | ||
1479 | if (func == bpf_skb_vlan_pop) | ||
1480 | return true; | ||
1481 | return false; | ||
1482 | } | ||
1483 | |||
1440 | static const struct bpf_func_proto * | 1484 | static const struct bpf_func_proto * |
1441 | sk_filter_func_proto(enum bpf_func_id func_id) | 1485 | sk_filter_func_proto(enum bpf_func_id func_id) |
1442 | { | 1486 | { |
@@ -1476,6 +1520,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) | |||
1476 | return &bpf_clone_redirect_proto; | 1520 | return &bpf_clone_redirect_proto; |
1477 | case BPF_FUNC_get_cgroup_classid: | 1521 | case BPF_FUNC_get_cgroup_classid: |
1478 | return &bpf_get_cgroup_classid_proto; | 1522 | return &bpf_get_cgroup_classid_proto; |
1523 | case BPF_FUNC_skb_vlan_push: | ||
1524 | return &bpf_skb_vlan_push_proto; | ||
1525 | case BPF_FUNC_skb_vlan_pop: | ||
1526 | return &bpf_skb_vlan_pop_proto; | ||
1479 | default: | 1527 | default: |
1480 | return sk_filter_func_proto(func_id); | 1528 | return sk_filter_func_proto(func_id); |
1481 | } | 1529 | } |