diff options
author | Eric Dumazet <edumazet@google.com> | 2014-10-05 21:38:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-10-07 13:22:11 -0400 |
commit | 0287587884b15041203b3a362d485e1ab1f24445 (patch) | |
tree | 675ae57663c1ba3ee8768e65e7fb0e6d0259e04c /net/core | |
parent | fe971b95c22578456ff7198537827841c726d3f7 (diff) |
net: better IFF_XMIT_DST_RELEASE support
Testing xmit_more support with netperf and connected UDP sockets,
I found strange dst refcount false sharing.
Current handling of IFF_XMIT_DST_RELEASE is not optimal.
Dropping dst in validate_xmit_skb() is certainly too late in case
packet was queued by cpu X but dequeued by cpu Y
The logical point to take care of drop/force is in __dev_queue_xmit()
before even taking qdisc lock.
As Julian Anastasov pointed out, need for skb_dst() might come from some
packet schedulers or classifiers.
This patch adds new helper to cleanly express needs of various drivers
or qdiscs/classifiers.
Drivers that need skb_dst() in their ndo_start_xmit() should call
following helper in their setup instead of the prior :
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
->
netif_keep_dst(dev);
Instead of using a single bit, we use two bits, one being
eventually rebuilt in bonding/team drivers.
The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being
rebuilt in bonding/team. Eventually, we could add something
smarter later.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index a63b8c43c1b6..3c5bdaa44486 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -2665,12 +2665,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device | |||
2665 | if (skb->next) | 2665 | if (skb->next) |
2666 | return skb; | 2666 | return skb; |
2667 | 2667 | ||
2668 | /* If device doesn't need skb->dst, release it right now while | ||
2669 | * its hot in this cpu cache | ||
2670 | */ | ||
2671 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | ||
2672 | skb_dst_drop(skb); | ||
2673 | |||
2674 | features = netif_skb_features(skb); | 2668 | features = netif_skb_features(skb); |
2675 | skb = validate_xmit_vlan(skb, features); | 2669 | skb = validate_xmit_vlan(skb, features); |
2676 | if (unlikely(!skb)) | 2670 | if (unlikely(!skb)) |
@@ -2811,8 +2805,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2811 | * waiting to be sent out; and the qdisc is not running - | 2805 | * waiting to be sent out; and the qdisc is not running - |
2812 | * xmit the skb directly. | 2806 | * xmit the skb directly. |
2813 | */ | 2807 | */ |
2814 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | ||
2815 | skb_dst_force(skb); | ||
2816 | 2808 | ||
2817 | qdisc_bstats_update(q, skb); | 2809 | qdisc_bstats_update(q, skb); |
2818 | 2810 | ||
@@ -2827,7 +2819,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2827 | 2819 | ||
2828 | rc = NET_XMIT_SUCCESS; | 2820 | rc = NET_XMIT_SUCCESS; |
2829 | } else { | 2821 | } else { |
2830 | skb_dst_force(skb); | ||
2831 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; | 2822 | rc = q->enqueue(skb, q) & NET_XMIT_MASK; |
2832 | if (qdisc_run_begin(q)) { | 2823 | if (qdisc_run_begin(q)) { |
2833 | if (unlikely(contended)) { | 2824 | if (unlikely(contended)) { |
@@ -2924,6 +2915,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |||
2924 | 2915 | ||
2925 | skb_update_prio(skb); | 2916 | skb_update_prio(skb); |
2926 | 2917 | ||
2918 | /* If device/qdisc don't need skb->dst, release it right now while | ||
2919 | * its hot in this cpu cache. | ||
2920 | */ | ||
2921 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | ||
2922 | skb_dst_drop(skb); | ||
2923 | else | ||
2924 | skb_dst_force(skb); | ||
2925 | |||
2927 | txq = netdev_pick_tx(dev, skb, accel_priv); | 2926 | txq = netdev_pick_tx(dev, skb, accel_priv); |
2928 | q = rcu_dereference_bh(txq->qdisc); | 2927 | q = rcu_dereference_bh(txq->qdisc); |
2929 | 2928 | ||
@@ -6674,7 +6673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
6674 | INIT_LIST_HEAD(&dev->adj_list.lower); | 6673 | INIT_LIST_HEAD(&dev->adj_list.lower); |
6675 | INIT_LIST_HEAD(&dev->all_adj_list.upper); | 6674 | INIT_LIST_HEAD(&dev->all_adj_list.upper); |
6676 | INIT_LIST_HEAD(&dev->all_adj_list.lower); | 6675 | INIT_LIST_HEAD(&dev->all_adj_list.lower); |
6677 | dev->priv_flags = IFF_XMIT_DST_RELEASE; | 6676 | dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; |
6678 | setup(dev); | 6677 | setup(dev); |
6679 | 6678 | ||
6680 | dev->num_tx_queues = txqs; | 6679 | dev->num_tx_queues = txqs; |