diff options
author | Alexei Starovoitov <ast@kernel.org> | 2018-01-05 18:30:47 -0500 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2018-01-05 18:31:20 -0500 |
commit | 11d16edb04f113348b0c1d0c26cb666e9baaa7d3 (patch) | |
tree | 3e3d4ae57b0ebd5158afd33be318775883e943fa /net/core/dev.c | |
parent | 5f103c5d4dbadec0f2cacd39b6429e1b8a8cf983 (diff) | |
parent | 0fca931a6f21c11f675363b92b5a4fe86da59f30 (diff) |
Merge branch 'xdp_rxq_info'
Jesper Dangaard Brouer says:
====================
V4:
* Added reviewers/acks to patches
* Fix patch desc in i40e that got out-of-sync with code
* Add SPDX license headers for the two new files added in patch 14
V3:
* Fixed bug in virtio_net driver
* Removed export of xdp_rxq_info_init()
V2:
* Changed API exposed to drivers
- Removed invocation of "init" in drivers, and only call "reg"
(Suggested by Saeed)
- Allow "reg" to fail and handle this in drivers
(Suggested by David Ahern)
* Removed the SINKQ qtype, instead allow to register as "unused"
* Also fixed some drivers during testing on actual HW (noted in patches)
There is a need for XDP to know more about the RX-queue a given XDP
frames have arrived on. For both the XDP bpf-prog and kernel side.
Instead of extending struct xdp_buff each time new info is needed,
this patchset takes a different approach. Struct xdp_buff is only
extended with a pointer to a struct xdp_rxq_info (allowing for easier
extending this later). This xdp_rxq_info contains information related
to how the driver have setup the individual RX-queue's. This is
read-mostly information, and all xdp_buff frames (in drivers
napi_poll) point to the same xdp_rxq_info (per RX-queue).
We stress this data/cache-line is for read-mostly info. This is NOT
for dynamic per packet info, use the data_meta for such use-cases.
This patchset start out small, and only expose ingress_ifindex and the
RX-queue index to the XDP/BPF program. Access to tangible info like
the ingress ifindex and RX queue index, is fairly easy to comprehent.
The other future use-cases could allow XDP frames to be recycled back
to the originating device driver, by providing info on RX device and
queue number.
As XDP doesn't have driver feature flags, and eBPF code due to
bpf-tail-calls cannot determine that XDP driver invoke it, this
patchset have to update every driver that support XDP.
For driver developers (review individual driver patches!):
The xdp_rxq_info is tied to the drivers RX-ring(s). Whenever a RX-ring
modification require (temporary) stopping RX frames, then the
xdp_rxq_info should (likely) also be unregistred and re-registered,
especially if reallocating the pages in the ring. Make sure ethtool
set_channels does the right thing. When replacing XDP prog, if and
only if RX-ring need to be changed, then also re-register the
xdp_rxq_info.
I'm Cc'ing the individual driver patches to the registered maintainers.
Testing:
I've only tested the NIC drivers I have hardware for. The general
test procedure is to (DUT = Device Under Test):
(1) run pktgen script pktgen_sample04_many_flows.sh (against DUT)
(2) run samples/bpf program xdp_rxq_info --dev $DEV (on DUT)
(3) runtime modify number of NIC queues via ethtool -L (on DUT)
(4) runtime modify number of NIC ring-size via ethtool -G (on DUT)
Patch based on git tree bpf-next (at commit fb982666e380c1632a):
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 69 |
1 files changed, 59 insertions, 10 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 2eb66c0d9cdb..d7925ef8743d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3906,9 +3906,33 @@ drop: | |||
3906 | return NET_RX_DROP; | 3906 | return NET_RX_DROP; |
3907 | } | 3907 | } |
3908 | 3908 | ||
3909 | static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) | ||
3910 | { | ||
3911 | struct net_device *dev = skb->dev; | ||
3912 | struct netdev_rx_queue *rxqueue; | ||
3913 | |||
3914 | rxqueue = dev->_rx; | ||
3915 | |||
3916 | if (skb_rx_queue_recorded(skb)) { | ||
3917 | u16 index = skb_get_rx_queue(skb); | ||
3918 | |||
3919 | if (unlikely(index >= dev->real_num_rx_queues)) { | ||
3920 | WARN_ONCE(dev->real_num_rx_queues > 1, | ||
3921 | "%s received packet on queue %u, but number " | ||
3922 | "of RX queues is %u\n", | ||
3923 | dev->name, index, dev->real_num_rx_queues); | ||
3924 | |||
3925 | return rxqueue; /* Return first rxqueue */ | ||
3926 | } | ||
3927 | rxqueue += index; | ||
3928 | } | ||
3929 | return rxqueue; | ||
3930 | } | ||
3931 | |||
3909 | static u32 netif_receive_generic_xdp(struct sk_buff *skb, | 3932 | static u32 netif_receive_generic_xdp(struct sk_buff *skb, |
3910 | struct bpf_prog *xdp_prog) | 3933 | struct bpf_prog *xdp_prog) |
3911 | { | 3934 | { |
3935 | struct netdev_rx_queue *rxqueue; | ||
3912 | u32 metalen, act = XDP_DROP; | 3936 | u32 metalen, act = XDP_DROP; |
3913 | struct xdp_buff xdp; | 3937 | struct xdp_buff xdp; |
3914 | void *orig_data; | 3938 | void *orig_data; |
@@ -3952,6 +3976,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, | |||
3952 | xdp.data_hard_start = skb->data - skb_headroom(skb); | 3976 | xdp.data_hard_start = skb->data - skb_headroom(skb); |
3953 | orig_data = xdp.data; | 3977 | orig_data = xdp.data; |
3954 | 3978 | ||
3979 | rxqueue = netif_get_rxqueue(skb); | ||
3980 | xdp.rxq = &rxqueue->xdp_rxq; | ||
3981 | |||
3955 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | 3982 | act = bpf_prog_run_xdp(xdp_prog, &xdp); |
3956 | 3983 | ||
3957 | off = xdp.data - orig_data; | 3984 | off = xdp.data - orig_data; |
@@ -7589,12 +7616,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, | |||
7589 | } | 7616 | } |
7590 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); | 7617 | EXPORT_SYMBOL(netif_stacked_transfer_operstate); |
7591 | 7618 | ||
7592 | #ifdef CONFIG_SYSFS | ||
7593 | static int netif_alloc_rx_queues(struct net_device *dev) | 7619 | static int netif_alloc_rx_queues(struct net_device *dev) |
7594 | { | 7620 | { |
7595 | unsigned int i, count = dev->num_rx_queues; | 7621 | unsigned int i, count = dev->num_rx_queues; |
7596 | struct netdev_rx_queue *rx; | 7622 | struct netdev_rx_queue *rx; |
7597 | size_t sz = count * sizeof(*rx); | 7623 | size_t sz = count * sizeof(*rx); |
7624 | int err = 0; | ||
7598 | 7625 | ||
7599 | BUG_ON(count < 1); | 7626 | BUG_ON(count < 1); |
7600 | 7627 | ||
@@ -7604,11 +7631,39 @@ static int netif_alloc_rx_queues(struct net_device *dev) | |||
7604 | 7631 | ||
7605 | dev->_rx = rx; | 7632 | dev->_rx = rx; |
7606 | 7633 | ||
7607 | for (i = 0; i < count; i++) | 7634 | for (i = 0; i < count; i++) { |
7608 | rx[i].dev = dev; | 7635 | rx[i].dev = dev; |
7636 | |||
7637 | /* XDP RX-queue setup */ | ||
7638 | err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i); | ||
7639 | if (err < 0) | ||
7640 | goto err_rxq_info; | ||
7641 | } | ||
7609 | return 0; | 7642 | return 0; |
7643 | |||
7644 | err_rxq_info: | ||
7645 | /* Rollback successful reg's and free other resources */ | ||
7646 | while (i--) | ||
7647 | xdp_rxq_info_unreg(&rx[i].xdp_rxq); | ||
7648 | kfree(dev->_rx); | ||
7649 | dev->_rx = NULL; | ||
7650 | return err; | ||
7651 | } | ||
7652 | |||
7653 | static void netif_free_rx_queues(struct net_device *dev) | ||
7654 | { | ||
7655 | unsigned int i, count = dev->num_rx_queues; | ||
7656 | struct netdev_rx_queue *rx; | ||
7657 | |||
7658 | /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */ | ||
7659 | if (!dev->_rx) | ||
7660 | return; | ||
7661 | |||
7662 | rx = dev->_rx; | ||
7663 | |||
7664 | for (i = 0; i < count; i++) | ||
7665 | xdp_rxq_info_unreg(&rx[i].xdp_rxq); | ||
7610 | } | 7666 | } |
7611 | #endif | ||
7612 | 7667 | ||
7613 | static void netdev_init_one_queue(struct net_device *dev, | 7668 | static void netdev_init_one_queue(struct net_device *dev, |
7614 | struct netdev_queue *queue, void *_unused) | 7669 | struct netdev_queue *queue, void *_unused) |
@@ -8169,12 +8224,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
8169 | return NULL; | 8224 | return NULL; |
8170 | } | 8225 | } |
8171 | 8226 | ||
8172 | #ifdef CONFIG_SYSFS | ||
8173 | if (rxqs < 1) { | 8227 | if (rxqs < 1) { |
8174 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); | 8228 | pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); |
8175 | return NULL; | 8229 | return NULL; |
8176 | } | 8230 | } |
8177 | #endif | ||
8178 | 8231 | ||
8179 | alloc_size = sizeof(struct net_device); | 8232 | alloc_size = sizeof(struct net_device); |
8180 | if (sizeof_priv) { | 8233 | if (sizeof_priv) { |
@@ -8231,12 +8284,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
8231 | if (netif_alloc_netdev_queues(dev)) | 8284 | if (netif_alloc_netdev_queues(dev)) |
8232 | goto free_all; | 8285 | goto free_all; |
8233 | 8286 | ||
8234 | #ifdef CONFIG_SYSFS | ||
8235 | dev->num_rx_queues = rxqs; | 8287 | dev->num_rx_queues = rxqs; |
8236 | dev->real_num_rx_queues = rxqs; | 8288 | dev->real_num_rx_queues = rxqs; |
8237 | if (netif_alloc_rx_queues(dev)) | 8289 | if (netif_alloc_rx_queues(dev)) |
8238 | goto free_all; | 8290 | goto free_all; |
8239 | #endif | ||
8240 | 8291 | ||
8241 | strcpy(dev->name, name); | 8292 | strcpy(dev->name, name); |
8242 | dev->name_assign_type = name_assign_type; | 8293 | dev->name_assign_type = name_assign_type; |
@@ -8275,9 +8326,7 @@ void free_netdev(struct net_device *dev) | |||
8275 | 8326 | ||
8276 | might_sleep(); | 8327 | might_sleep(); |
8277 | netif_free_tx_queues(dev); | 8328 | netif_free_tx_queues(dev); |
8278 | #ifdef CONFIG_SYSFS | 8329 | netif_free_rx_queues(dev); |
8279 | kvfree(dev->_rx); | ||
8280 | #endif | ||
8281 | 8330 | ||
8282 | kfree(rcu_dereference_protected(dev->ingress_queue, 1)); | 8331 | kfree(rcu_dereference_protected(dev->ingress_queue, 1)); |
8283 | 8332 | ||