aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-08-25 02:02:53 -0400
committerDavid S. Miller <davem@davemloft.net>2014-08-25 02:02:53 -0400
commitfe88e6dd8b9ac65912d0d9d9372fe421d6eeb21e (patch)
treec38c0439f002092e03a52fc0743d429c7d6dfbc1
parent4c83acbc565d53296f1731034c5041a0fbabcaeb (diff)
parentc223a078cbe0a87d470b08db7c83c7053931ae63 (diff)
Merge branch 'ndo_xmit_flush'
Basic deferred TX queue flushing infrastructure. Over time, and specifically and more recently at the Networking Workshop during Kernel SUmmit in Chicago, we have discussed the idea of having some way to optimize transmits of multiple TX packets at a time. There are several areas of overhead that could be amortized with such schemes. One has to do with locking and transactional overhead, the other has to do with device specific costs. This patch set here is more aimed at device specific costs. Typically a device queues up a packet in the TX queue and then has to do something to have the device start processing that new entry. Sometimes this is composed of doing an MMIO write to a "tail" register, and in other cases it can involve something as expensive as a hypervisor call. The basic setup defined here is that when the driver supports deferred TX queue flushing, ndo_start_xmit should no longer perform that operation. Instead a new operation, ndo_xmit_flush, should do it. I have converted IGB and virtio_net as example initial users. The IGB conversion is tested, virtio_net is not but it does compile :-) All ndo_start_xmit call sites have been abstracted behind a new helper called netdev_start_xmit(). This just adds the infrastructure, it does not actually add any instances of actually doing multiple ndo_start_xmit calls per ndo_xmit_flush invocation. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c35
-rw-r--r--drivers/net/virtio_net.c10
-rw-r--r--drivers/net/wan/dlci.c2
-rw-r--r--drivers/usb/gadget/function/f_ncm.c2
-rw-r--r--include/linux/netdevice.h35
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/netpoll.c3
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/packet/af_packet.c3
-rw-r--r--net/sched/sch_teql.c3
11 files changed, 77 insertions, 27 deletions
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index cb14bbdfb056..b9c020a05fb8 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -136,6 +136,7 @@ static void igb_update_phy_info(unsigned long);
136static void igb_watchdog(unsigned long); 136static void igb_watchdog(unsigned long);
137static void igb_watchdog_task(struct work_struct *); 137static void igb_watchdog_task(struct work_struct *);
138static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); 138static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
139static void igb_xmit_flush(struct net_device *netdev, u16 queue);
139static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, 140static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
140 struct rtnl_link_stats64 *stats); 141 struct rtnl_link_stats64 *stats);
141static int igb_change_mtu(struct net_device *, int); 142static int igb_change_mtu(struct net_device *, int);
@@ -2075,6 +2076,7 @@ static const struct net_device_ops igb_netdev_ops = {
2075 .ndo_open = igb_open, 2076 .ndo_open = igb_open,
2076 .ndo_stop = igb_close, 2077 .ndo_stop = igb_close,
2077 .ndo_start_xmit = igb_xmit_frame, 2078 .ndo_start_xmit = igb_xmit_frame,
2079 .ndo_xmit_flush = igb_xmit_flush,
2078 .ndo_get_stats64 = igb_get_stats64, 2080 .ndo_get_stats64 = igb_get_stats64,
2079 .ndo_set_rx_mode = igb_set_rx_mode, 2081 .ndo_set_rx_mode = igb_set_rx_mode,
2080 .ndo_set_mac_address = igb_set_mac, 2082 .ndo_set_mac_address = igb_set_mac,
@@ -4915,13 +4917,6 @@ static void igb_tx_map(struct igb_ring *tx_ring,
4915 4917
4916 tx_ring->next_to_use = i; 4918 tx_ring->next_to_use = i;
4917 4919
4918 writel(i, tx_ring->tail);
4919
4920 /* we need this if more than one processor can write to our tail
4921 * at a time, it synchronizes IO on IA64/Altix systems
4922 */
4923 mmiowb();
4924
4925 return; 4920 return;
4926 4921
4927dma_error: 4922dma_error:
@@ -5057,17 +5052,20 @@ out_drop:
5057 return NETDEV_TX_OK; 5052 return NETDEV_TX_OK;
5058} 5053}
5059 5054
5060static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, 5055static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx)
5061 struct sk_buff *skb)
5062{ 5056{
5063 unsigned int r_idx = skb->queue_mapping;
5064
5065 if (r_idx >= adapter->num_tx_queues) 5057 if (r_idx >= adapter->num_tx_queues)
5066 r_idx = r_idx % adapter->num_tx_queues; 5058 r_idx = r_idx % adapter->num_tx_queues;
5067 5059
5068 return adapter->tx_ring[r_idx]; 5060 return adapter->tx_ring[r_idx];
5069} 5061}
5070 5062
5063static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
5064 struct sk_buff *skb)
5065{
5066 return __igb_tx_queue_mapping(adapter, skb->queue_mapping);
5067}
5068
5071static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, 5069static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
5072 struct net_device *netdev) 5070 struct net_device *netdev)
5073{ 5071{
@@ -5096,6 +5094,21 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
5096 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); 5094 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
5097} 5095}
5098 5096
5097static void igb_xmit_flush(struct net_device *netdev, u16 queue)
5098{
5099 struct igb_adapter *adapter = netdev_priv(netdev);
5100 struct igb_ring *tx_ring;
5101
5102 tx_ring = __igb_tx_queue_mapping(adapter, queue);
5103
5104 writel(tx_ring->next_to_use, tx_ring->tail);
5105
5106 /* we need this if more than one processor can write to our tail
5107 * at a time, it synchronizes IO on IA64/Altix systems
5108 */
5109 mmiowb();
5110}
5111
5099/** 5112/**
5100 * igb_tx_timeout - Respond to a Tx Hang 5113 * igb_tx_timeout - Respond to a Tx Hang
5101 * @netdev: network interface device structure 5114 * @netdev: network interface device structure
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 59caa06f34a6..62421086d3e6 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -934,7 +934,6 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
934 dev_kfree_skb_any(skb); 934 dev_kfree_skb_any(skb);
935 return NETDEV_TX_OK; 935 return NETDEV_TX_OK;
936 } 936 }
937 virtqueue_kick(sq->vq);
938 937
939 /* Don't wait up for transmitted skbs to be freed. */ 938 /* Don't wait up for transmitted skbs to be freed. */
940 skb_orphan(skb); 939 skb_orphan(skb);
@@ -957,6 +956,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
957 return NETDEV_TX_OK; 956 return NETDEV_TX_OK;
958} 957}
959 958
959static void xmit_flush(struct net_device *dev, u16 qnum)
960{
961 struct virtnet_info *vi = netdev_priv(dev);
962 struct send_queue *sq = &vi->sq[qnum];
963
964 virtqueue_kick(sq->vq);
965}
966
960/* 967/*
961 * Send command via the control virtqueue and check status. Commands 968 * Send command via the control virtqueue and check status. Commands
962 * supported by the hypervisor, as indicated by feature bits, should 969 * supported by the hypervisor, as indicated by feature bits, should
@@ -1386,6 +1393,7 @@ static const struct net_device_ops virtnet_netdev = {
1386 .ndo_open = virtnet_open, 1393 .ndo_open = virtnet_open,
1387 .ndo_stop = virtnet_close, 1394 .ndo_stop = virtnet_close,
1388 .ndo_start_xmit = start_xmit, 1395 .ndo_start_xmit = start_xmit,
1396 .ndo_xmit_flush = xmit_flush,
1389 .ndo_validate_addr = eth_validate_addr, 1397 .ndo_validate_addr = eth_validate_addr,
1390 .ndo_set_mac_address = virtnet_set_mac_address, 1398 .ndo_set_mac_address = virtnet_set_mac_address,
1391 .ndo_set_rx_mode = virtnet_set_rx_mode, 1399 .ndo_set_rx_mode = virtnet_set_rx_mode,
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 43c9960dce1c..81b22a180aad 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -193,7 +193,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
193 struct dlci_local *dlp = netdev_priv(dev); 193 struct dlci_local *dlp = netdev_priv(dev);
194 194
195 if (skb) 195 if (skb)
196 dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave); 196 netdev_start_xmit(skb, dlp->slave);
197 return NETDEV_TX_OK; 197 return NETDEV_TX_OK;
198} 198}
199 199
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index bcdc882cd415..cb5d646db6a7 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1101,7 +1101,7 @@ static void ncm_tx_tasklet(unsigned long data)
1101 /* Only send if data is available. */ 1101 /* Only send if data is available. */
1102 if (ncm->skb_tx_data) { 1102 if (ncm->skb_tx_data) {
1103 ncm->timer_force_tx = true; 1103 ncm->timer_force_tx = true;
1104 ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev); 1104 netdev_start_xmit(NULL, ncm->netdev);
1105 ncm->timer_force_tx = false; 1105 ncm->timer_force_tx = false;
1106 } 1106 }
1107} 1107}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eb73444e1bd0..220c50984688 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,6 +782,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
782 * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) 782 * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
783 * Required can not be NULL. 783 * Required can not be NULL.
784 * 784 *
785 * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
786 * A driver implements this function when it wishes to support
787 * deferred TX queue flushing. The idea is that the expensive
788 * operation to trigger TX queue processing can be done after
789 * N calls to ndo_start_xmit rather than being done every single
790 * time. In this regime ndo_start_xmit will be called one or more
791 * times, and then a final ndo_xmit_flush call will be made to
792 * have the driver tell the device about the new pending TX queue
793 * entries. The kernel keeps track of which queues need flushing
794 * by monitoring skb->queue_mapping of the packets it submits to
795 * ndo_start_xmit. This is the queue value that will be passed
796 * to ndo_xmit_flush.
797 *
785 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, 798 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
786 * void *accel_priv, select_queue_fallback_t fallback); 799 * void *accel_priv, select_queue_fallback_t fallback);
787 * Called to decide which queue to when device supports multiple 800 * Called to decide which queue to when device supports multiple
@@ -1005,6 +1018,7 @@ struct net_device_ops {
1005 int (*ndo_stop)(struct net_device *dev); 1018 int (*ndo_stop)(struct net_device *dev);
1006 netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, 1019 netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb,
1007 struct net_device *dev); 1020 struct net_device *dev);
1021 void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
1008 u16 (*ndo_select_queue)(struct net_device *dev, 1022 u16 (*ndo_select_queue)(struct net_device *dev,
1009 struct sk_buff *skb, 1023 struct sk_buff *skb,
1010 void *accel_priv, 1024 void *accel_priv,
@@ -3430,6 +3444,27 @@ int __init dev_proc_init(void);
3430#define dev_proc_init() 0 3444#define dev_proc_init() 0
3431#endif 3445#endif
3432 3446
3447static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
3448 struct sk_buff *skb, struct net_device *dev)
3449{
3450 netdev_tx_t ret;
3451 u16 q;
3452
3453 q = skb->queue_mapping;
3454 ret = ops->ndo_start_xmit(skb, dev);
3455 if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
3456 ops->ndo_xmit_flush(dev, q);
3457
3458 return ret;
3459}
3460
3461static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
3462{
3463 const struct net_device_ops *ops = dev->netdev_ops;
3464
3465 return __netdev_start_xmit(ops, skb, dev);
3466}
3467
3433int netdev_class_create_file_ns(struct class_attribute *class_attr, 3468int netdev_class_create_file_ns(struct class_attribute *class_attr,
3434 const void *ns); 3469 const void *ns);
3435void netdev_class_remove_file_ns(struct class_attribute *class_attr, 3470void netdev_class_remove_file_ns(struct class_attribute *class_attr,
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index e8e0e7a8a23d..d662da161e5a 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
599 } 599 }
600 600
601non_ip: 601non_ip:
602 return mpc->old_ops->ndo_start_xmit(skb, dev); 602 return __netdev_start_xmit(mpc->old_ops, skb, dev);
603} 603}
604 604
605static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) 605static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/core/dev.c b/net/core/dev.c
index b6a718ec11c1..26d296c2447c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2602,7 +2602,6 @@ EXPORT_SYMBOL(netif_skb_features);
2602int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 2602int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2603 struct netdev_queue *txq) 2603 struct netdev_queue *txq)
2604{ 2604{
2605 const struct net_device_ops *ops = dev->netdev_ops;
2606 int rc = NETDEV_TX_OK; 2605 int rc = NETDEV_TX_OK;
2607 unsigned int skb_len; 2606 unsigned int skb_len;
2608 2607
@@ -2667,7 +2666,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2667 2666
2668 skb_len = skb->len; 2667 skb_len = skb->len;
2669 trace_net_dev_start_xmit(skb, dev); 2668 trace_net_dev_start_xmit(skb, dev);
2670 rc = ops->ndo_start_xmit(skb, dev); 2669 rc = netdev_start_xmit(skb, dev);
2671 trace_net_dev_xmit(skb, rc, dev, skb_len); 2670 trace_net_dev_xmit(skb, rc, dev, skb_len);
2672 if (rc == NETDEV_TX_OK) 2671 if (rc == NETDEV_TX_OK)
2673 txq_trans_update(txq); 2672 txq_trans_update(txq);
@@ -2686,7 +2685,7 @@ gso:
2686 2685
2687 skb_len = nskb->len; 2686 skb_len = nskb->len;
2688 trace_net_dev_start_xmit(nskb, dev); 2687 trace_net_dev_start_xmit(nskb, dev);
2689 rc = ops->ndo_start_xmit(nskb, dev); 2688 rc = netdev_start_xmit(nskb, dev);
2690 trace_net_dev_xmit(nskb, rc, dev, skb_len); 2689 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2691 if (unlikely(rc != NETDEV_TX_OK)) { 2690 if (unlikely(rc != NETDEV_TX_OK)) {
2692 if (rc & ~NETDEV_TX_MASK) 2691 if (rc & ~NETDEV_TX_MASK)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 907fb5e36c02..a5ad06828d67 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644);
72static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, 72static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
73 struct netdev_queue *txq) 73 struct netdev_queue *txq)
74{ 74{
75 const struct net_device_ops *ops = dev->netdev_ops;
76 int status = NETDEV_TX_OK; 75 int status = NETDEV_TX_OK;
77 netdev_features_t features; 76 netdev_features_t features;
78 77
@@ -92,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
92 skb->vlan_tci = 0; 91 skb->vlan_tci = 0;
93 } 92 }
94 93
95 status = ops->ndo_start_xmit(skb, dev); 94 status = netdev_start_xmit(skb, dev);
96 if (status == NETDEV_TX_OK) 95 if (status == NETDEV_TX_OK)
97 txq_trans_update(txq); 96 txq_trans_update(txq);
98 97
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8b849ddfef2e..83e2b4b19eb7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3285,8 +3285,6 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
3285static void pktgen_xmit(struct pktgen_dev *pkt_dev) 3285static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3286{ 3286{
3287 struct net_device *odev = pkt_dev->odev; 3287 struct net_device *odev = pkt_dev->odev;
3288 netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
3289 = odev->netdev_ops->ndo_start_xmit;
3290 struct netdev_queue *txq; 3288 struct netdev_queue *txq;
3291 u16 queue_map; 3289 u16 queue_map;
3292 int ret; 3290 int ret;
@@ -3339,7 +3337,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3339 goto unlock; 3337 goto unlock;
3340 } 3338 }
3341 atomic_inc(&(pkt_dev->skb->users)); 3339 atomic_inc(&(pkt_dev->skb->users));
3342 ret = (*xmit)(pkt_dev->skb, odev); 3340 ret = netdev_start_xmit(pkt_dev->skb, odev);
3343 3341
3344 switch (ret) { 3342 switch (ret) {
3345 case NETDEV_TX_OK: 3343 case NETDEV_TX_OK:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 93896d2092f6..0dfa990d4eaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -240,7 +240,6 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
240static int packet_direct_xmit(struct sk_buff *skb) 240static int packet_direct_xmit(struct sk_buff *skb)
241{ 241{
242 struct net_device *dev = skb->dev; 242 struct net_device *dev = skb->dev;
243 const struct net_device_ops *ops = dev->netdev_ops;
244 netdev_features_t features; 243 netdev_features_t features;
245 struct netdev_queue *txq; 244 struct netdev_queue *txq;
246 int ret = NETDEV_TX_BUSY; 245 int ret = NETDEV_TX_BUSY;
@@ -262,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
262 261
263 HARD_TX_LOCK(dev, txq, smp_processor_id()); 262 HARD_TX_LOCK(dev, txq, smp_processor_id());
264 if (!netif_xmit_frozen_or_drv_stopped(txq)) { 263 if (!netif_xmit_frozen_or_drv_stopped(txq)) {
265 ret = ops->ndo_start_xmit(skb, dev); 264 ret = netdev_start_xmit(skb, dev);
266 if (ret == NETDEV_TX_OK) 265 if (ret == NETDEV_TX_OK)
267 txq_trans_update(txq); 266 txq_trans_update(txq);
268 } 267 }
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index bd33793b527e..64cd93ca8104 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,7 +301,6 @@ restart:
301 do { 301 do {
302 struct net_device *slave = qdisc_dev(q); 302 struct net_device *slave = qdisc_dev(q);
303 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); 303 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304 const struct net_device_ops *slave_ops = slave->netdev_ops;
305 304
306 if (slave_txq->qdisc_sleeping != q) 305 if (slave_txq->qdisc_sleeping != q)
307 continue; 306 continue;
@@ -317,7 +316,7 @@ restart:
317 unsigned int length = qdisc_pkt_len(skb); 316 unsigned int length = qdisc_pkt_len(skb);
318 317
319 if (!netif_xmit_frozen_or_stopped(slave_txq) && 318 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
320 slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { 319 netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
321 txq_trans_update(slave_txq); 320 txq_trans_update(slave_txq);
322 __netif_tx_unlock(slave_txq); 321 __netif_tx_unlock(slave_txq);
323 master->slaves = NEXT_SLAVE(q); 322 master->slaves = NEXT_SLAVE(q);