aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWillem de Bruijn <willemb@google.com>2015-08-14 22:31:34 -0400
committerDavid S. Miller <davem@davemloft.net>2015-08-17 17:22:47 -0400
commit47dceb8ecdc1c3ad1818dfea3d659a05b74c3fc2 (patch)
tree6329539935535b2b5ed438612464e168b02f03ce
parenta1c234f95cae2d293047bb6c36e7a4840dbac815 (diff)
packet: add classic BPF fanout mode
Add fanout mode PACKET_FANOUT_CBPF that accepts a classic BPF program to select a socket. This avoids having to keep adding special case fanout modes. One example use case is application layer load balancing. The QUIC protocol, for instance, encodes a connection ID in UDP payload. Also add socket option SOL_PACKET/PACKET_FANOUT_DATA that updates data associated with the socket group. Fanout mode PACKET_FANOUT_CBPF is the only user so far. Signed-off-by: Willem de Bruijn <willemb@google.com> Acked-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/uapi/linux/if_packet.h2
-rw-r--r--net/packet/af_packet.c99
-rw-r--r--net/packet/internal.h5
3 files changed, 104 insertions, 2 deletions
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index d3d715f8c88f..a4bb16fa822e 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -55,6 +55,7 @@ struct sockaddr_ll {
55#define PACKET_TX_HAS_OFF 19 55#define PACKET_TX_HAS_OFF 19
56#define PACKET_QDISC_BYPASS 20 56#define PACKET_QDISC_BYPASS 20
57#define PACKET_ROLLOVER_STATS 21 57#define PACKET_ROLLOVER_STATS 21
58#define PACKET_FANOUT_DATA 22
58 59
59#define PACKET_FANOUT_HASH 0 60#define PACKET_FANOUT_HASH 0
60#define PACKET_FANOUT_LB 1 61#define PACKET_FANOUT_LB 1
@@ -62,6 +63,7 @@ struct sockaddr_ll {
62#define PACKET_FANOUT_ROLLOVER 3 63#define PACKET_FANOUT_ROLLOVER 3
63#define PACKET_FANOUT_RND 4 64#define PACKET_FANOUT_RND 4
64#define PACKET_FANOUT_QM 5 65#define PACKET_FANOUT_QM 5
66#define PACKET_FANOUT_CBPF 6
65#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000 67#define PACKET_FANOUT_FLAG_ROLLOVER 0x1000
66#define PACKET_FANOUT_FLAG_DEFRAG 0x8000 68#define PACKET_FANOUT_FLAG_DEFRAG 0x8000
67 69
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5afe538bb88..8869d07013e6 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -92,6 +92,7 @@
92#ifdef CONFIG_INET 92#ifdef CONFIG_INET
93#include <net/inet_common.h> 93#include <net/inet_common.h>
94#endif 94#endif
95#include <linux/bpf.h>
95 96
96#include "internal.h" 97#include "internal.h"
97 98
@@ -1410,6 +1411,22 @@ static unsigned int fanout_demux_qm(struct packet_fanout *f,
1410 return skb_get_queue_mapping(skb) % num; 1411 return skb_get_queue_mapping(skb) % num;
1411} 1412}
1412 1413
1414static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1415 struct sk_buff *skb,
1416 unsigned int num)
1417{
1418 struct bpf_prog *prog;
1419 unsigned int ret = 0;
1420
1421 rcu_read_lock();
1422 prog = rcu_dereference(f->bpf_prog);
1423 if (prog)
1424 ret = BPF_PROG_RUN(prog, skb) % num;
1425 rcu_read_unlock();
1426
1427 return ret;
1428}
1429
1413static bool fanout_has_flag(struct packet_fanout *f, u16 flag) 1430static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1414{ 1431{
1415 return f->flags & (flag >> 8); 1432 return f->flags & (flag >> 8);
@@ -1454,6 +1471,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1454 case PACKET_FANOUT_ROLLOVER: 1471 case PACKET_FANOUT_ROLLOVER:
1455 idx = fanout_demux_rollover(f, skb, 0, false, num); 1472 idx = fanout_demux_rollover(f, skb, 0, false, num);
1456 break; 1473 break;
1474 case PACKET_FANOUT_CBPF:
1475 idx = fanout_demux_bpf(f, skb, num);
1476 break;
1457 } 1477 }
1458 1478
1459 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) 1479 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
@@ -1502,6 +1522,74 @@ static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1502 return false; 1522 return false;
1503} 1523}
1504 1524
1525static void fanout_init_data(struct packet_fanout *f)
1526{
1527 switch (f->type) {
1528 case PACKET_FANOUT_LB:
1529 atomic_set(&f->rr_cur, 0);
1530 break;
1531 case PACKET_FANOUT_CBPF:
1532 RCU_INIT_POINTER(f->bpf_prog, NULL);
1533 break;
1534 }
1535}
1536
1537static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
1538{
1539 struct bpf_prog *old;
1540
1541 spin_lock(&f->lock);
1542 old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
1543 rcu_assign_pointer(f->bpf_prog, new);
1544 spin_unlock(&f->lock);
1545
1546 if (old) {
1547 synchronize_net();
1548 bpf_prog_destroy(old);
1549 }
1550}
1551
1552static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
1553 unsigned int len)
1554{
1555 struct bpf_prog *new;
1556 struct sock_fprog fprog;
1557 int ret;
1558
1559 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1560 return -EPERM;
1561 if (len != sizeof(fprog))
1562 return -EINVAL;
1563 if (copy_from_user(&fprog, data, len))
1564 return -EFAULT;
1565
1566 ret = bpf_prog_create_from_user(&new, &fprog, NULL);
1567 if (ret)
1568 return ret;
1569
1570 __fanout_set_data_bpf(po->fanout, new);
1571 return 0;
1572}
1573
1574static int fanout_set_data(struct packet_sock *po, char __user *data,
1575 unsigned int len)
1576{
1577 switch (po->fanout->type) {
1578 case PACKET_FANOUT_CBPF:
1579 return fanout_set_data_cbpf(po, data, len);
1580 default:
1581 return -EINVAL;
1582 };
1583}
1584
1585static void fanout_release_data(struct packet_fanout *f)
1586{
1587 switch (f->type) {
1588 case PACKET_FANOUT_CBPF:
1589 __fanout_set_data_bpf(f, NULL);
1590 };
1591}
1592
1505static int fanout_add(struct sock *sk, u16 id, u16 type_flags) 1593static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1506{ 1594{
1507 struct packet_sock *po = pkt_sk(sk); 1595 struct packet_sock *po = pkt_sk(sk);
@@ -1519,6 +1607,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1519 case PACKET_FANOUT_CPU: 1607 case PACKET_FANOUT_CPU:
1520 case PACKET_FANOUT_RND: 1608 case PACKET_FANOUT_RND:
1521 case PACKET_FANOUT_QM: 1609 case PACKET_FANOUT_QM:
1610 case PACKET_FANOUT_CBPF:
1522 break; 1611 break;
1523 default: 1612 default:
1524 return -EINVAL; 1613 return -EINVAL;
@@ -1561,10 +1650,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1561 match->id = id; 1650 match->id = id;
1562 match->type = type; 1651 match->type = type;
1563 match->flags = flags; 1652 match->flags = flags;
1564 atomic_set(&match->rr_cur, 0);
1565 INIT_LIST_HEAD(&match->list); 1653 INIT_LIST_HEAD(&match->list);
1566 spin_lock_init(&match->lock); 1654 spin_lock_init(&match->lock);
1567 atomic_set(&match->sk_ref, 0); 1655 atomic_set(&match->sk_ref, 0);
1656 fanout_init_data(match);
1568 match->prot_hook.type = po->prot_hook.type; 1657 match->prot_hook.type = po->prot_hook.type;
1569 match->prot_hook.dev = po->prot_hook.dev; 1658 match->prot_hook.dev = po->prot_hook.dev;
1570 match->prot_hook.func = packet_rcv_fanout; 1659 match->prot_hook.func = packet_rcv_fanout;
@@ -1610,6 +1699,7 @@ static void fanout_release(struct sock *sk)
1610 if (atomic_dec_and_test(&f->sk_ref)) { 1699 if (atomic_dec_and_test(&f->sk_ref)) {
1611 list_del(&f->list); 1700 list_del(&f->list);
1612 dev_remove_pack(&f->prot_hook); 1701 dev_remove_pack(&f->prot_hook);
1702 fanout_release_data(f);
1613 kfree(f); 1703 kfree(f);
1614 } 1704 }
1615 mutex_unlock(&fanout_mutex); 1705 mutex_unlock(&fanout_mutex);
@@ -3529,6 +3619,13 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
3529 3619
3530 return fanout_add(sk, val & 0xffff, val >> 16); 3620 return fanout_add(sk, val & 0xffff, val >> 16);
3531 } 3621 }
3622 case PACKET_FANOUT_DATA:
3623 {
3624 if (!po->fanout)
3625 return -EINVAL;
3626
3627 return fanout_set_data(po, optval, optlen);
3628 }
3532 case PACKET_TX_HAS_OFF: 3629 case PACKET_TX_HAS_OFF:
3533 { 3630 {
3534 unsigned int val; 3631 unsigned int val;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e20b3e8829b8..9ee46314b7d7 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -79,7 +79,10 @@ struct packet_fanout {
79 u16 id; 79 u16 id;
80 u8 type; 80 u8 type;
81 u8 flags; 81 u8 flags;
82 atomic_t rr_cur; 82 union {
83 atomic_t rr_cur;
84 struct bpf_prog __rcu *bpf_prog;
85 };
83 struct list_head list; 86 struct list_head list;
84 struct sock *arr[PACKET_FANOUT_MAX]; 87 struct sock *arr[PACKET_FANOUT_MAX];
85 spinlock_t lock; 88 spinlock_t lock;