aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-08-25 18:51:53 -0400
committerDavid S. Miller <davem@davemloft.net>2014-08-25 19:29:42 -0400
commit0b725a2ca61bedc33a2a63d0451d528b268cf975 (patch)
treeefe818013ee258eeff23f83ca0c8d01b5117a316
parent44a52ffd6402a19544fb9dee081730d36d413202 (diff)
net: Remove ndo_xmit_flush netdev operation, use signalling instead.
As reported by Jesper Dangaard Brouer, for high packet rates the overhead of having another indirect call in the TX path is non-trivial. There is the indirect call itself, and then there is all of the reloading of the state to refetch the tail pointer value and then write the device register. Move to a more passive scheme, which requires very light modifications to the device drivers. The signal is a new skb->xmit_more value, if it is non-zero it means that more SKBs are pending to be transmitted on the same queue as the current SKB. And therefore, the driver may elide the tail pointer update. Right now skb->xmit_more is always zero. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c36
-rw-r--r--drivers/net/virtio_net.c12
-rw-r--r--include/linux/netdevice.h25
-rw-r--r--include/linux/skbuff.h2
4 files changed, 19 insertions, 56 deletions
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b9c020a05fb8..89c29b40d61c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -136,7 +136,6 @@ static void igb_update_phy_info(unsigned long);
136static void igb_watchdog(unsigned long); 136static void igb_watchdog(unsigned long);
137static void igb_watchdog_task(struct work_struct *); 137static void igb_watchdog_task(struct work_struct *);
138static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); 138static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
139static void igb_xmit_flush(struct net_device *netdev, u16 queue);
140static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, 139static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
141 struct rtnl_link_stats64 *stats); 140 struct rtnl_link_stats64 *stats);
142static int igb_change_mtu(struct net_device *, int); 141static int igb_change_mtu(struct net_device *, int);
@@ -2076,7 +2075,6 @@ static const struct net_device_ops igb_netdev_ops = {
2076 .ndo_open = igb_open, 2075 .ndo_open = igb_open,
2077 .ndo_stop = igb_close, 2076 .ndo_stop = igb_close,
2078 .ndo_start_xmit = igb_xmit_frame, 2077 .ndo_start_xmit = igb_xmit_frame,
2079 .ndo_xmit_flush = igb_xmit_flush,
2080 .ndo_get_stats64 = igb_get_stats64, 2078 .ndo_get_stats64 = igb_get_stats64,
2081 .ndo_set_rx_mode = igb_set_rx_mode, 2079 .ndo_set_rx_mode = igb_set_rx_mode,
2082 .ndo_set_mac_address = igb_set_mac, 2080 .ndo_set_mac_address = igb_set_mac,
@@ -4917,6 +4915,14 @@ static void igb_tx_map(struct igb_ring *tx_ring,
4917 4915
4918 tx_ring->next_to_use = i; 4916 tx_ring->next_to_use = i;
4919 4917
4918 if (!skb->xmit_more) {
4919 writel(i, tx_ring->tail);
4920
4921 /* we need this if more than one processor can write to our tail
4922 * at a time, it synchronizes IO on IA64/Altix systems
4923 */
4924 mmiowb();
4925 }
4920 return; 4926 return;
4921 4927
4922dma_error: 4928dma_error:
@@ -5052,20 +5058,17 @@ out_drop:
5052 return NETDEV_TX_OK; 5058 return NETDEV_TX_OK;
5053} 5059}
5054 5060
5055static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx) 5061static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
5062 struct sk_buff *skb)
5056{ 5063{
5064 unsigned int r_idx = skb->queue_mapping;
5065
5057 if (r_idx >= adapter->num_tx_queues) 5066 if (r_idx >= adapter->num_tx_queues)
5058 r_idx = r_idx % adapter->num_tx_queues; 5067 r_idx = r_idx % adapter->num_tx_queues;
5059 5068
5060 return adapter->tx_ring[r_idx]; 5069 return adapter->tx_ring[r_idx];
5061} 5070}
5062 5071
5063static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
5064 struct sk_buff *skb)
5065{
5066 return __igb_tx_queue_mapping(adapter, skb->queue_mapping);
5067}
5068
5069static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, 5072static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
5070 struct net_device *netdev) 5073 struct net_device *netdev)
5071{ 5074{
@@ -5094,21 +5097,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
5094 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); 5097 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
5095} 5098}
5096 5099
5097static void igb_xmit_flush(struct net_device *netdev, u16 queue)
5098{
5099 struct igb_adapter *adapter = netdev_priv(netdev);
5100 struct igb_ring *tx_ring;
5101
5102 tx_ring = __igb_tx_queue_mapping(adapter, queue);
5103
5104 writel(tx_ring->next_to_use, tx_ring->tail);
5105
5106 /* we need this if more than one processor can write to our tail
5107 * at a time, it synchronizes IO on IA64/Altix systems
5108 */
5109 mmiowb();
5110}
5111
5112/** 5100/**
5113 * igb_tx_timeout - Respond to a Tx Hang 5101 * igb_tx_timeout - Respond to a Tx Hang
5114 * @netdev: network interface device structure 5102 * @netdev: network interface device structure
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 62421086d3e6..f0c2824f5e0f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -953,15 +953,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
953 } 953 }
954 } 954 }
955 955
956 return NETDEV_TX_OK; 956 if (!skb->xmit_more)
957} 957 virtqueue_kick(sq->vq);
958 958
959static void xmit_flush(struct net_device *dev, u16 qnum) 959 return NETDEV_TX_OK;
960{
961 struct virtnet_info *vi = netdev_priv(dev);
962 struct send_queue *sq = &vi->sq[qnum];
963
964 virtqueue_kick(sq->vq);
965} 960}
966 961
967/* 962/*
@@ -1393,7 +1388,6 @@ static const struct net_device_ops virtnet_netdev = {
1393 .ndo_open = virtnet_open, 1388 .ndo_open = virtnet_open,
1394 .ndo_stop = virtnet_close, 1389 .ndo_stop = virtnet_close,
1395 .ndo_start_xmit = start_xmit, 1390 .ndo_start_xmit = start_xmit,
1396 .ndo_xmit_flush = xmit_flush,
1397 .ndo_validate_addr = eth_validate_addr, 1391 .ndo_validate_addr = eth_validate_addr,
1398 .ndo_set_mac_address = virtnet_set_mac_address, 1392 .ndo_set_mac_address = virtnet_set_mac_address,
1399 .ndo_set_rx_mode = virtnet_set_rx_mode, 1393 .ndo_set_rx_mode = virtnet_set_rx_mode,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 220c50984688..039b23786c22 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,19 +782,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
782 * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX) 782 * (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
783 * Required can not be NULL. 783 * Required can not be NULL.
784 * 784 *
785 * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
786 * A driver implements this function when it wishes to support
787 * deferred TX queue flushing. The idea is that the expensive
788 * operation to trigger TX queue processing can be done after
789 * N calls to ndo_start_xmit rather than being done every single
790 * time. In this regime ndo_start_xmit will be called one or more
791 * times, and then a final ndo_xmit_flush call will be made to
792 * have the driver tell the device about the new pending TX queue
793 * entries. The kernel keeps track of which queues need flushing
794 * by monitoring skb->queue_mapping of the packets it submits to
795 * ndo_start_xmit. This is the queue value that will be passed
796 * to ndo_xmit_flush.
797 *
798 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, 785 * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
799 * void *accel_priv, select_queue_fallback_t fallback); 786 * void *accel_priv, select_queue_fallback_t fallback);
800 * Called to decide which queue to when device supports multiple 787 * Called to decide which queue to when device supports multiple
@@ -1018,7 +1005,6 @@ struct net_device_ops {
1018 int (*ndo_stop)(struct net_device *dev); 1005 int (*ndo_stop)(struct net_device *dev);
1019 netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb, 1006 netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb,
1020 struct net_device *dev); 1007 struct net_device *dev);
1021 void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
1022 u16 (*ndo_select_queue)(struct net_device *dev, 1008 u16 (*ndo_select_queue)(struct net_device *dev,
1023 struct sk_buff *skb, 1009 struct sk_buff *skb,
1024 void *accel_priv, 1010 void *accel_priv,
@@ -3447,15 +3433,8 @@ int __init dev_proc_init(void);
3447static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, 3433static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
3448 struct sk_buff *skb, struct net_device *dev) 3434 struct sk_buff *skb, struct net_device *dev)
3449{ 3435{
3450 netdev_tx_t ret; 3436 skb->xmit_more = 0;
3451 u16 q; 3437 return ops->ndo_start_xmit(skb, dev);
3452
3453 q = skb->queue_mapping;
3454 ret = ops->ndo_start_xmit(skb, dev);
3455 if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
3456 ops->ndo_xmit_flush(dev, q);
3457
3458 return ret;
3459} 3438}
3460 3439
3461static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev) 3440static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 18ddf9684a27..9b3802a197a8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -452,6 +452,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
452 * @tc_verd: traffic control verdict 452 * @tc_verd: traffic control verdict
453 * @hash: the packet hash 453 * @hash: the packet hash
454 * @queue_mapping: Queue mapping for multiqueue devices 454 * @queue_mapping: Queue mapping for multiqueue devices
455 * @xmit_more: More SKBs are pending for this queue
455 * @ndisc_nodetype: router type (from link layer) 456 * @ndisc_nodetype: router type (from link layer)
456 * @ooo_okay: allow the mapping of a socket to a queue to be changed 457 * @ooo_okay: allow the mapping of a socket to a queue to be changed
457 * @l4_hash: indicate hash is a canonical 4-tuple hash over transport 458 * @l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -558,6 +559,7 @@ struct sk_buff {
558 559
559 __u16 queue_mapping; 560 __u16 queue_mapping;
560 kmemcheck_bitfield_begin(flags2); 561 kmemcheck_bitfield_begin(flags2);
562 __u8 xmit_more:1;
561#ifdef CONFIG_IPV6_NDISC_NODETYPE 563#ifdef CONFIG_IPV6_NDISC_NODETYPE
562 __u8 ndisc_nodetype:2; 564 __u8 ndisc_nodetype:2;
563#endif 565#endif