aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-10-05 21:38:35 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-07 13:22:11 -0400
commit0287587884b15041203b3a362d485e1ab1f24445 (patch)
tree675ae57663c1ba3ee8768e65e7fb0e6d0259e04c /net/core
parentfe971b95c22578456ff7198537827841c726d3f7 (diff)
net: better IFF_XMIT_DST_RELEASE support
Testing xmit_more support with netperf and connected UDP sockets, I found strange dst refcount false sharing. Current handling of IFF_XMIT_DST_RELEASE is not optimal. Dropping dst in validate_xmit_skb() is certainly too late in case packet was queued by cpu X but dequeued by cpu Y The logical point to take care of drop/force is in __dev_queue_xmit() before even taking qdisc lock. As Julian Anastasov pointed out, need for skb_dst() might come from some packet schedulers or classifiers. This patch adds new helper to cleanly express needs of various drivers or qdiscs/classifiers. Drivers that need skb_dst() in their ndo_start_xmit() should call following helper in their setup instead of the prior : dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; -> netif_keep_dst(dev); Instead of using a single bit, we use two bits, one being eventually rebuilt in bonding/team drivers. The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being rebuilt in bonding/team. Eventually, we could add something smarter later. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index a63b8c43c1b6..3c5bdaa44486 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2665,12 +2665,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
2665 if (skb->next) 2665 if (skb->next)
2666 return skb; 2666 return skb;
2667 2667
2668 /* If device doesn't need skb->dst, release it right now while
2669 * its hot in this cpu cache
2670 */
2671 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2672 skb_dst_drop(skb);
2673
2674 features = netif_skb_features(skb); 2668 features = netif_skb_features(skb);
2675 skb = validate_xmit_vlan(skb, features); 2669 skb = validate_xmit_vlan(skb, features);
2676 if (unlikely(!skb)) 2670 if (unlikely(!skb))
@@ -2811,8 +2805,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2811 * waiting to be sent out; and the qdisc is not running - 2805 * waiting to be sent out; and the qdisc is not running -
2812 * xmit the skb directly. 2806 * xmit the skb directly.
2813 */ 2807 */
2814 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2815 skb_dst_force(skb);
2816 2808
2817 qdisc_bstats_update(q, skb); 2809 qdisc_bstats_update(q, skb);
2818 2810
@@ -2827,7 +2819,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2827 2819
2828 rc = NET_XMIT_SUCCESS; 2820 rc = NET_XMIT_SUCCESS;
2829 } else { 2821 } else {
2830 skb_dst_force(skb);
2831 rc = q->enqueue(skb, q) & NET_XMIT_MASK; 2822 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2832 if (qdisc_run_begin(q)) { 2823 if (qdisc_run_begin(q)) {
2833 if (unlikely(contended)) { 2824 if (unlikely(contended)) {
@@ -2924,6 +2915,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
2924 2915
2925 skb_update_prio(skb); 2916 skb_update_prio(skb);
2926 2917
2918 /* If device/qdisc don't need skb->dst, release it right now while
2919 * its hot in this cpu cache.
2920 */
2921 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2922 skb_dst_drop(skb);
2923 else
2924 skb_dst_force(skb);
2925
2927 txq = netdev_pick_tx(dev, skb, accel_priv); 2926 txq = netdev_pick_tx(dev, skb, accel_priv);
2928 q = rcu_dereference_bh(txq->qdisc); 2927 q = rcu_dereference_bh(txq->qdisc);
2929 2928
@@ -6674,7 +6673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6674 INIT_LIST_HEAD(&dev->adj_list.lower); 6673 INIT_LIST_HEAD(&dev->adj_list.lower);
6675 INIT_LIST_HEAD(&dev->all_adj_list.upper); 6674 INIT_LIST_HEAD(&dev->all_adj_list.upper);
6676 INIT_LIST_HEAD(&dev->all_adj_list.lower); 6675 INIT_LIST_HEAD(&dev->all_adj_list.lower);
6677 dev->priv_flags = IFF_XMIT_DST_RELEASE; 6676 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
6678 setup(dev); 6677 setup(dev);
6679 6678
6680 dev->num_tx_queues = txqs; 6679 dev->num_tx_queues = txqs;