aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-01-05 18:30:47 -0500
committerAlexei Starovoitov <ast@kernel.org>2018-01-05 18:31:20 -0500
commit11d16edb04f113348b0c1d0c26cb666e9baaa7d3 (patch)
tree3e3d4ae57b0ebd5158afd33be318775883e943fa /net/core/dev.c
parent5f103c5d4dbadec0f2cacd39b6429e1b8a8cf983 (diff)
parent0fca931a6f21c11f675363b92b5a4fe86da59f30 (diff)
Merge branch 'xdp_rxq_info'
Jesper Dangaard Brouer says: ==================== V4: * Added reviewers/acks to patches * Fix patch desc in i40e that got out-of-sync with code * Add SPDX license headers for the two new files added in patch 14 V3: * Fixed bug in virtio_net driver * Removed export of xdp_rxq_info_init() V2: * Changed API exposed to drivers - Removed invocation of "init" in drivers, and only call "reg" (Suggested by Saeed) - Allow "reg" to fail and handle this in drivers (Suggested by David Ahern) * Removed the SINKQ qtype, instead allow to register as "unused" * Also fixed some drivers during testing on actual HW (noted in patches) There is a need for XDP to know more about the RX-queue a given XDP frames have arrived on. For both the XDP bpf-prog and kernel side. Instead of extending struct xdp_buff each time new info is needed, this patchset takes a different approach. Struct xdp_buff is only extended with a pointer to a struct xdp_rxq_info (allowing for easier extending this later). This xdp_rxq_info contains information related to how the driver have setup the individual RX-queue's. This is read-mostly information, and all xdp_buff frames (in drivers napi_poll) point to the same xdp_rxq_info (per RX-queue). We stress this data/cache-line is for read-mostly info. This is NOT for dynamic per packet info, use the data_meta for such use-cases. This patchset start out small, and only expose ingress_ifindex and the RX-queue index to the XDP/BPF program. Access to tangible info like the ingress ifindex and RX queue index, is fairly easy to comprehent. The other future use-cases could allow XDP frames to be recycled back to the originating device driver, by providing info on RX device and queue number. As XDP doesn't have driver feature flags, and eBPF code due to bpf-tail-calls cannot determine that XDP driver invoke it, this patchset have to update every driver that support XDP. For driver developers (review individual driver patches!): The xdp_rxq_info is tied to the drivers RX-ring(s). Whenever a RX-ring modification require (temporary) stopping RX frames, then the xdp_rxq_info should (likely) also be unregistred and re-registered, especially if reallocating the pages in the ring. Make sure ethtool set_channels does the right thing. When replacing XDP prog, if and only if RX-ring need to be changed, then also re-register the xdp_rxq_info. I'm Cc'ing the individual driver patches to the registered maintainers. Testing: I've only tested the NIC drivers I have hardware for. The general test procedure is to (DUT = Device Under Test): (1) run pktgen script pktgen_sample04_many_flows.sh (against DUT) (2) run samples/bpf program xdp_rxq_info --dev $DEV (on DUT) (3) runtime modify number of NIC queues via ethtool -L (on DUT) (4) runtime modify number of NIC ring-size via ethtool -G (on DUT) Patch based on git tree bpf-next (at commit fb982666e380c1632a): https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/ ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c69
1 files changed, 59 insertions, 10 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 2eb66c0d9cdb..d7925ef8743d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3906,9 +3906,33 @@ drop:
3906 return NET_RX_DROP; 3906 return NET_RX_DROP;
3907} 3907}
3908 3908
3909static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
3910{
3911 struct net_device *dev = skb->dev;
3912 struct netdev_rx_queue *rxqueue;
3913
3914 rxqueue = dev->_rx;
3915
3916 if (skb_rx_queue_recorded(skb)) {
3917 u16 index = skb_get_rx_queue(skb);
3918
3919 if (unlikely(index >= dev->real_num_rx_queues)) {
3920 WARN_ONCE(dev->real_num_rx_queues > 1,
3921 "%s received packet on queue %u, but number "
3922 "of RX queues is %u\n",
3923 dev->name, index, dev->real_num_rx_queues);
3924
3925 return rxqueue; /* Return first rxqueue */
3926 }
3927 rxqueue += index;
3928 }
3929 return rxqueue;
3930}
3931
3909static u32 netif_receive_generic_xdp(struct sk_buff *skb, 3932static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3910 struct bpf_prog *xdp_prog) 3933 struct bpf_prog *xdp_prog)
3911{ 3934{
3935 struct netdev_rx_queue *rxqueue;
3912 u32 metalen, act = XDP_DROP; 3936 u32 metalen, act = XDP_DROP;
3913 struct xdp_buff xdp; 3937 struct xdp_buff xdp;
3914 void *orig_data; 3938 void *orig_data;
@@ -3952,6 +3976,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3952 xdp.data_hard_start = skb->data - skb_headroom(skb); 3976 xdp.data_hard_start = skb->data - skb_headroom(skb);
3953 orig_data = xdp.data; 3977 orig_data = xdp.data;
3954 3978
3979 rxqueue = netif_get_rxqueue(skb);
3980 xdp.rxq = &rxqueue->xdp_rxq;
3981
3955 act = bpf_prog_run_xdp(xdp_prog, &xdp); 3982 act = bpf_prog_run_xdp(xdp_prog, &xdp);
3956 3983
3957 off = xdp.data - orig_data; 3984 off = xdp.data - orig_data;
@@ -7589,12 +7616,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
7589} 7616}
7590EXPORT_SYMBOL(netif_stacked_transfer_operstate); 7617EXPORT_SYMBOL(netif_stacked_transfer_operstate);
7591 7618
7592#ifdef CONFIG_SYSFS
7593static int netif_alloc_rx_queues(struct net_device *dev) 7619static int netif_alloc_rx_queues(struct net_device *dev)
7594{ 7620{
7595 unsigned int i, count = dev->num_rx_queues; 7621 unsigned int i, count = dev->num_rx_queues;
7596 struct netdev_rx_queue *rx; 7622 struct netdev_rx_queue *rx;
7597 size_t sz = count * sizeof(*rx); 7623 size_t sz = count * sizeof(*rx);
7624 int err = 0;
7598 7625
7599 BUG_ON(count < 1); 7626 BUG_ON(count < 1);
7600 7627
@@ -7604,11 +7631,39 @@ static int netif_alloc_rx_queues(struct net_device *dev)
7604 7631
7605 dev->_rx = rx; 7632 dev->_rx = rx;
7606 7633
7607 for (i = 0; i < count; i++) 7634 for (i = 0; i < count; i++) {
7608 rx[i].dev = dev; 7635 rx[i].dev = dev;
7636
7637 /* XDP RX-queue setup */
7638 err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
7639 if (err < 0)
7640 goto err_rxq_info;
7641 }
7609 return 0; 7642 return 0;
7643
7644err_rxq_info:
7645 /* Rollback successful reg's and free other resources */
7646 while (i--)
7647 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
7648 kfree(dev->_rx);
7649 dev->_rx = NULL;
7650 return err;
7651}
7652
7653static void netif_free_rx_queues(struct net_device *dev)
7654{
7655 unsigned int i, count = dev->num_rx_queues;
7656 struct netdev_rx_queue *rx;
7657
7658 /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
7659 if (!dev->_rx)
7660 return;
7661
7662 rx = dev->_rx;
7663
7664 for (i = 0; i < count; i++)
7665 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
7610} 7666}
7611#endif
7612 7667
7613static void netdev_init_one_queue(struct net_device *dev, 7668static void netdev_init_one_queue(struct net_device *dev,
7614 struct netdev_queue *queue, void *_unused) 7669 struct netdev_queue *queue, void *_unused)
@@ -8169,12 +8224,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
8169 return NULL; 8224 return NULL;
8170 } 8225 }
8171 8226
8172#ifdef CONFIG_SYSFS
8173 if (rxqs < 1) { 8227 if (rxqs < 1) {
8174 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 8228 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
8175 return NULL; 8229 return NULL;
8176 } 8230 }
8177#endif
8178 8231
8179 alloc_size = sizeof(struct net_device); 8232 alloc_size = sizeof(struct net_device);
8180 if (sizeof_priv) { 8233 if (sizeof_priv) {
@@ -8231,12 +8284,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
8231 if (netif_alloc_netdev_queues(dev)) 8284 if (netif_alloc_netdev_queues(dev))
8232 goto free_all; 8285 goto free_all;
8233 8286
8234#ifdef CONFIG_SYSFS
8235 dev->num_rx_queues = rxqs; 8287 dev->num_rx_queues = rxqs;
8236 dev->real_num_rx_queues = rxqs; 8288 dev->real_num_rx_queues = rxqs;
8237 if (netif_alloc_rx_queues(dev)) 8289 if (netif_alloc_rx_queues(dev))
8238 goto free_all; 8290 goto free_all;
8239#endif
8240 8291
8241 strcpy(dev->name, name); 8292 strcpy(dev->name, name);
8242 dev->name_assign_type = name_assign_type; 8293 dev->name_assign_type = name_assign_type;
@@ -8275,9 +8326,7 @@ void free_netdev(struct net_device *dev)
8275 8326
8276 might_sleep(); 8327 might_sleep();
8277 netif_free_tx_queues(dev); 8328 netif_free_tx_queues(dev);
8278#ifdef CONFIG_SYSFS 8329 netif_free_rx_queues(dev);
8279 kvfree(dev->_rx);
8280#endif
8281 8330
8282 kfree(rcu_dereference_protected(dev->ingress_queue, 1)); 8331 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
8283 8332