aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-04-18 15:36:58 -0400
committerDavid S. Miller <davem@davemloft.net>2017-04-25 13:33:49 -0400
commitb5cdae3291f7be7a34e75affe4c0ec1f7f328b64 (patch)
treeca1a8fc4ef95aa3e6e66353791dcb74cad8bc0c2
parent2f7878c06e2d227aa5c405ddde356403b83e3509 (diff)
net: Generic XDP
This provides a generic SKB based non-optimized XDP path which is used if either the driver lacks a specific XDP implementation, or the user requests it via a new IFLA_XDP_FLAGS value named XDP_FLAGS_SKB_MODE. It is arguable that perhaps I should have required something like this as part of the initial XDP feature merge. I believe this is critical for two reasons: 1) Accessibility. More people can play with XDP with less dependencies. Yes I know we have XDP support in virtio_net, but that just creates another depedency for learning how to use this facility. I wrote this to make life easier for the XDP newbies. 2) As a model for what the expected semantics are. If there is a pure generic core implementation, it serves as a semantic example for driver folks adding XDP support. One thing I have not tried to address here is the issue of XDP_PACKET_HEADROOM, thanks to Daniel for spotting that. It seems incredibly expensive to do a skb_cow(skb, XDP_PACKET_HEADROOM) or whatever even if the XDP program doesn't try to push headers at all. I think we really need the verifier to somehow propagate whether certain XDP helpers are used or not. v5: - Handle both negative and positive offset after running prog - Fix mac length in XDP_TX case (Alexei) - Use rcu_dereference_protected() in free_netdev (kbuild test robot) v4: - Fix MAC header adjustmnet before calling prog (David Ahern) - Disable LRO when generic XDP is installed (Michael Chan) - Bypass qdisc et al. on XDP_TX and record the event (Alexei) - Do not perform generic XDP on reinjected packets (DaveM) v3: - Make sure XDP program sees packet at MAC header, push back MAC header if we do XDP_TX. (Alexei) - Elide GRO when generic XDP is in use. (Alexei) - Add XDP_FLAG_SKB_MODE flag which the user can use to request generic XDP even if the driver has an XDP implementation. (Alexei) - Report whether SKB mode is in use in rtnl_xdp_fill() via XDP_FLAGS attribute. (Daniel) v2: - Add some "fall through" comments in switch statements based upon feedback from Andrew Lunn - Use RCU for generic xdp_prog, thanks to Johannes Berg. Tested-by: Andy Gospodarek <andy@greyhouse.net> Tested-by: Jesper Dangaard Brouer <brouer@redhat.com> Tested-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h8
-rw-r--r--include/uapi/linux/if_link.h4
-rw-r--r--net/core/dev.c155
-rw-r--r--net/core/gro_cells.c2
-rw-r--r--net/core/rtnetlink.c40
5 files changed, 187 insertions, 22 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5d5267febd56..46d220c2bf92 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1905,9 +1905,17 @@ struct net_device {
1905 struct lock_class_key *qdisc_tx_busylock; 1905 struct lock_class_key *qdisc_tx_busylock;
1906 struct lock_class_key *qdisc_running_key; 1906 struct lock_class_key *qdisc_running_key;
1907 bool proto_down; 1907 bool proto_down;
1908 struct bpf_prog __rcu *xdp_prog;
1908}; 1909};
1909#define to_net_dev(d) container_of(d, struct net_device, dev) 1910#define to_net_dev(d) container_of(d, struct net_device, dev)
1910 1911
1912static inline bool netif_elide_gro(const struct net_device *dev)
1913{
1914 if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog)
1915 return true;
1916 return false;
1917}
1918
1911#define NETDEV_ALIGN 32 1919#define NETDEV_ALIGN 32
1912 1920
1913static inline 1921static inline
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8b405afb2376..633aa0276d32 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -887,7 +887,9 @@ enum {
887/* XDP section */ 887/* XDP section */
888 888
889#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) 889#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
890#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST) 890#define XDP_FLAGS_SKB_MODE (2U << 0)
891#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
892 XDP_FLAGS_SKB_MODE)
891 893
892enum { 894enum {
893 IFLA_XDP_UNSPEC, 895 IFLA_XDP_UNSPEC,
diff --git a/net/core/dev.c b/net/core/dev.c
index db6e31564d06..1b3317c026c6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -95,6 +95,7 @@
95#include <linux/notifier.h> 95#include <linux/notifier.h>
96#include <linux/skbuff.h> 96#include <linux/skbuff.h>
97#include <linux/bpf.h> 97#include <linux/bpf.h>
98#include <linux/bpf_trace.h>
98#include <net/net_namespace.h> 99#include <net/net_namespace.h>
99#include <net/sock.h> 100#include <net/sock.h>
100#include <net/busy_poll.h> 101#include <net/busy_poll.h>
@@ -4251,6 +4252,125 @@ static int __netif_receive_skb(struct sk_buff *skb)
4251 return ret; 4252 return ret;
4252} 4253}
4253 4254
4255static struct static_key generic_xdp_needed __read_mostly;
4256
4257static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
4258{
4259 struct bpf_prog *new = xdp->prog;
4260 int ret = 0;
4261
4262 switch (xdp->command) {
4263 case XDP_SETUP_PROG: {
4264 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
4265
4266 rcu_assign_pointer(dev->xdp_prog, new);
4267 if (old)
4268 bpf_prog_put(old);
4269
4270 if (old && !new) {
4271 static_key_slow_dec(&generic_xdp_needed);
4272 } else if (new && !old) {
4273 static_key_slow_inc(&generic_xdp_needed);
4274 dev_disable_lro(dev);
4275 }
4276 break;
4277 }
4278
4279 case XDP_QUERY_PROG:
4280 xdp->prog_attached = !!rcu_access_pointer(dev->xdp_prog);
4281 break;
4282
4283 default:
4284 ret = -EINVAL;
4285 break;
4286 }
4287
4288 return ret;
4289}
4290
4291static u32 netif_receive_generic_xdp(struct sk_buff *skb,
4292 struct bpf_prog *xdp_prog)
4293{
4294 struct xdp_buff xdp;
4295 u32 act = XDP_DROP;
4296 void *orig_data;
4297 int hlen, off;
4298 u32 mac_len;
4299
4300 /* Reinjected packets coming from act_mirred or similar should
4301 * not get XDP generic processing.
4302 */
4303 if (skb_cloned(skb))
4304 return XDP_PASS;
4305
4306 if (skb_linearize(skb))
4307 goto do_drop;
4308
4309 /* The XDP program wants to see the packet starting at the MAC
4310 * header.
4311 */
4312 mac_len = skb->data - skb_mac_header(skb);
4313 hlen = skb_headlen(skb) + mac_len;
4314 xdp.data = skb->data - mac_len;
4315 xdp.data_end = xdp.data + hlen;
4316 xdp.data_hard_start = skb->data - skb_headroom(skb);
4317 orig_data = xdp.data;
4318
4319 act = bpf_prog_run_xdp(xdp_prog, &xdp);
4320
4321 off = xdp.data - orig_data;
4322 if (off > 0)
4323 __skb_pull(skb, off);
4324 else if (off < 0)
4325 __skb_push(skb, -off);
4326
4327 switch (act) {
4328 case XDP_TX:
4329 __skb_push(skb, mac_len);
4330 /* fall through */
4331 case XDP_PASS:
4332 break;
4333
4334 default:
4335 bpf_warn_invalid_xdp_action(act);
4336 /* fall through */
4337 case XDP_ABORTED:
4338 trace_xdp_exception(skb->dev, xdp_prog, act);
4339 /* fall through */
4340 case XDP_DROP:
4341 do_drop:
4342 kfree_skb(skb);
4343 break;
4344 }
4345
4346 return act;
4347}
4348
4349/* When doing generic XDP we have to bypass the qdisc layer and the
4350 * network taps in order to match in-driver-XDP behavior.
4351 */
4352static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
4353{
4354 struct net_device *dev = skb->dev;
4355 struct netdev_queue *txq;
4356 bool free_skb = true;
4357 int cpu, rc;
4358
4359 txq = netdev_pick_tx(dev, skb, NULL);
4360 cpu = smp_processor_id();
4361 HARD_TX_LOCK(dev, txq, cpu);
4362 if (!netif_xmit_stopped(txq)) {
4363 rc = netdev_start_xmit(skb, dev, txq, 0);
4364 if (dev_xmit_complete(rc))
4365 free_skb = false;
4366 }
4367 HARD_TX_UNLOCK(dev, txq);
4368 if (free_skb) {
4369 trace_xdp_exception(dev, xdp_prog, XDP_TX);
4370 kfree_skb(skb);
4371 }
4372}
4373
4254static int netif_receive_skb_internal(struct sk_buff *skb) 4374static int netif_receive_skb_internal(struct sk_buff *skb)
4255{ 4375{
4256 int ret; 4376 int ret;
@@ -4262,6 +4382,21 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
4262 4382
4263 rcu_read_lock(); 4383 rcu_read_lock();
4264 4384
4385 if (static_key_false(&generic_xdp_needed)) {
4386 struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
4387
4388 if (xdp_prog) {
4389 u32 act = netif_receive_generic_xdp(skb, xdp_prog);
4390
4391 if (act != XDP_PASS) {
4392 rcu_read_unlock();
4393 if (act == XDP_TX)
4394 generic_xdp_tx(skb, xdp_prog);
4395 return NET_RX_DROP;
4396 }
4397 }
4398 }
4399
4265#ifdef CONFIG_RPS 4400#ifdef CONFIG_RPS
4266 if (static_key_false(&rps_needed)) { 4401 if (static_key_false(&rps_needed)) {
4267 struct rps_dev_flow voidflow, *rflow = &voidflow; 4402 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -4494,7 +4629,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
4494 enum gro_result ret; 4629 enum gro_result ret;
4495 int grow; 4630 int grow;
4496 4631
4497 if (!(skb->dev->features & NETIF_F_GRO)) 4632 if (netif_elide_gro(skb->dev))
4498 goto normal; 4633 goto normal;
4499 4634
4500 if (skb->csum_bad) 4635 if (skb->csum_bad)
@@ -6723,6 +6858,7 @@ EXPORT_SYMBOL(dev_change_proto_down);
6723 */ 6858 */
6724int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags) 6859int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
6725{ 6860{
6861 int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp);
6726 const struct net_device_ops *ops = dev->netdev_ops; 6862 const struct net_device_ops *ops = dev->netdev_ops;
6727 struct bpf_prog *prog = NULL; 6863 struct bpf_prog *prog = NULL;
6728 struct netdev_xdp xdp; 6864 struct netdev_xdp xdp;
@@ -6730,14 +6866,16 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
6730 6866
6731 ASSERT_RTNL(); 6867 ASSERT_RTNL();
6732 6868
6733 if (!ops->ndo_xdp) 6869 xdp_op = ops->ndo_xdp;
6734 return -EOPNOTSUPP; 6870 if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
6871 xdp_op = generic_xdp_install;
6872
6735 if (fd >= 0) { 6873 if (fd >= 0) {
6736 if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { 6874 if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
6737 memset(&xdp, 0, sizeof(xdp)); 6875 memset(&xdp, 0, sizeof(xdp));
6738 xdp.command = XDP_QUERY_PROG; 6876 xdp.command = XDP_QUERY_PROG;
6739 6877
6740 err = ops->ndo_xdp(dev, &xdp); 6878 err = xdp_op(dev, &xdp);
6741 if (err < 0) 6879 if (err < 0)
6742 return err; 6880 return err;
6743 if (xdp.prog_attached) 6881 if (xdp.prog_attached)
@@ -6753,7 +6891,7 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
6753 xdp.command = XDP_SETUP_PROG; 6891 xdp.command = XDP_SETUP_PROG;
6754 xdp.prog = prog; 6892 xdp.prog = prog;
6755 6893
6756 err = ops->ndo_xdp(dev, &xdp); 6894 err = xdp_op(dev, &xdp);
6757 if (err < 0 && prog) 6895 if (err < 0 && prog)
6758 bpf_prog_put(prog); 6896 bpf_prog_put(prog);
6759 6897
@@ -7793,6 +7931,7 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
7793void free_netdev(struct net_device *dev) 7931void free_netdev(struct net_device *dev)
7794{ 7932{
7795 struct napi_struct *p, *n; 7933 struct napi_struct *p, *n;
7934 struct bpf_prog *prog;
7796 7935
7797 might_sleep(); 7936 might_sleep();
7798 netif_free_tx_queues(dev); 7937 netif_free_tx_queues(dev);
@@ -7811,6 +7950,12 @@ void free_netdev(struct net_device *dev)
7811 free_percpu(dev->pcpu_refcnt); 7950 free_percpu(dev->pcpu_refcnt);
7812 dev->pcpu_refcnt = NULL; 7951 dev->pcpu_refcnt = NULL;
7813 7952
7953 prog = rcu_dereference_protected(dev->xdp_prog, 1);
7954 if (prog) {
7955 bpf_prog_put(prog);
7956 static_key_slow_dec(&generic_xdp_needed);
7957 }
7958
7814 /* Compatibility with error handling in drivers */ 7959 /* Compatibility with error handling in drivers */
7815 if (dev->reg_state == NETREG_UNINITIALIZED) { 7960 if (dev->reg_state == NETREG_UNINITIALIZED) {
7816 netdev_freemem(dev); 7961 netdev_freemem(dev);
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index c98bbfbd26b8..814e58a3ce8b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -13,7 +13,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
13 struct net_device *dev = skb->dev; 13 struct net_device *dev = skb->dev;
14 struct gro_cell *cell; 14 struct gro_cell *cell;
15 15
16 if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) 16 if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev))
17 return netif_rx(skb); 17 return netif_rx(skb);
18 18
19 cell = this_cpu_ptr(gcells->cells); 19 cell = this_cpu_ptr(gcells->cells);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 088f9c8b4196..9031a6c8bfa7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -896,15 +896,13 @@ static size_t rtnl_port_size(const struct net_device *dev,
896 return port_self_size; 896 return port_self_size;
897} 897}
898 898
899static size_t rtnl_xdp_size(const struct net_device *dev) 899static size_t rtnl_xdp_size(void)
900{ 900{
901 size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ 901 size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */
902 nla_total_size(1); /* XDP_ATTACHED */ 902 nla_total_size(1) + /* XDP_ATTACHED */
903 nla_total_size(4); /* XDP_FLAGS */
903 904
904 if (!dev->netdev_ops->ndo_xdp) 905 return xdp_size;
905 return 0;
906 else
907 return xdp_size;
908} 906}
909 907
910static noinline size_t if_nlmsg_size(const struct net_device *dev, 908static noinline size_t if_nlmsg_size(const struct net_device *dev,
@@ -943,7 +941,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
943 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ 941 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
944 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ 942 + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
945 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ 943 + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
946 + rtnl_xdp_size(dev) /* IFLA_XDP */ 944 + rtnl_xdp_size() /* IFLA_XDP */
947 + nla_total_size(1); /* IFLA_PROTO_DOWN */ 945 + nla_total_size(1); /* IFLA_PROTO_DOWN */
948 946
949} 947}
@@ -1251,23 +1249,35 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
1251 1249
1252static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) 1250static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
1253{ 1251{
1254 struct netdev_xdp xdp_op = {};
1255 struct nlattr *xdp; 1252 struct nlattr *xdp;
1253 u32 xdp_flags = 0;
1254 u8 val = 0;
1256 int err; 1255 int err;
1257 1256
1258 if (!dev->netdev_ops->ndo_xdp)
1259 return 0;
1260 xdp = nla_nest_start(skb, IFLA_XDP); 1257 xdp = nla_nest_start(skb, IFLA_XDP);
1261 if (!xdp) 1258 if (!xdp)
1262 return -EMSGSIZE; 1259 return -EMSGSIZE;
1263 xdp_op.command = XDP_QUERY_PROG; 1260 if (rcu_access_pointer(dev->xdp_prog)) {
1264 err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); 1261 xdp_flags = XDP_FLAGS_SKB_MODE;
1265 if (err) 1262 val = 1;
1266 goto err_cancel; 1263 } else if (dev->netdev_ops->ndo_xdp) {
1267 err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached); 1264 struct netdev_xdp xdp_op = {};
1265
1266 xdp_op.command = XDP_QUERY_PROG;
1267 err = dev->netdev_ops->ndo_xdp(dev, &xdp_op);
1268 if (err)
1269 goto err_cancel;
1270 val = xdp_op.prog_attached;
1271 }
1272 err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val);
1268 if (err) 1273 if (err)
1269 goto err_cancel; 1274 goto err_cancel;
1270 1275
1276 if (xdp_flags) {
1277 err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags);
1278 if (err)
1279 goto err_cancel;
1280 }
1271 nla_nest_end(skb, xdp); 1281 nla_nest_end(skb, xdp);
1272 return 0; 1282 return 0;
1273 1283