aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-04-16 08:18:22 -0400
committerDavid S. Miller <davem@davemloft.net>2010-04-18 05:39:41 -0400
commitfc6055a5ba31e2c14e36e8939f9bf2b6d586a7f5 (patch)
treeb55954230d0d849d1f7b0517ced4cc1ee6fd8157 /net/core
parent9958da0501fced47c1ac5c5a3a7731c87e45472c (diff)
net: Introduce skb_orphan_try()
Transmitted skb might be attached to a socket and a destructor, for memory accounting purposes. Traditionally, this destructor is called at tx completion time, when skb is freed. When tx completion is performed by another cpu than the sender, this forces some cache lines to change ownership. XPS was an attempt to give tx completion to initial cpu. David idea is to call destructor right before giving skb to device (call to ndo_start_xmit()). Because device queues are usually small, orphaning skb before tx completion is not a big deal. Some drivers already do this, we could do it in upper level. There is one known exception to this early orphaning, called tx timestamping. It needs to keep a reference to socket until device can give a hardware or software timestamp. This patch adds a skb_orphan_try() helper, to centralize all exceptions to early orphaning in one spot, and use it in dev_hard_start_xmit(). "tbench 16" results on a Nehalem machine (2 X5570 @ 2.93GHz) before: Throughput 4428.9 MB/sec 16 procs after: Throughput 4448.14 MB/sec 16 procs UDP should get even better results, its destructor being more complex, since SOCK_USE_WRITE_QUEUE is not set (four atomic ops instead of one) Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c27
1 files changed, 13 insertions, 14 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 8092f01713fb..8eb50e2292fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1880,6 +1880,17 @@ static int dev_gso_segment(struct sk_buff *skb)
1880 return 0; 1880 return 0;
1881} 1881}
1882 1882
1883/*
1884 * Try to orphan skb early, right before transmission by the device.
1885 * We cannot orphan skb if tx timestamp is requested, since
1886 * drivers need to call skb_tstamp_tx() to send the timestamp.
1887 */
1888static inline void skb_orphan_try(struct sk_buff *skb)
1889{
1890 if (!skb_tx(skb)->flags)
1891 skb_orphan(skb);
1892}
1893
1883int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1894int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1884 struct netdev_queue *txq) 1895 struct netdev_queue *txq)
1885{ 1896{
@@ -1904,23 +1915,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1904 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 1915 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1905 skb_dst_drop(skb); 1916 skb_dst_drop(skb);
1906 1917
1918 skb_orphan_try(skb);
1907 rc = ops->ndo_start_xmit(skb, dev); 1919 rc = ops->ndo_start_xmit(skb, dev);
1908 if (rc == NETDEV_TX_OK) 1920 if (rc == NETDEV_TX_OK)
1909 txq_trans_update(txq); 1921 txq_trans_update(txq);
1910 /*
1911 * TODO: if skb_orphan() was called by
1912 * dev->hard_start_xmit() (for example, the unmodified
1913 * igb driver does that; bnx2 doesn't), then
1914 * skb_tx_software_timestamp() will be unable to send
1915 * back the time stamp.
1916 *
1917 * How can this be prevented? Always create another
1918 * reference to the socket before calling
1919 * dev->hard_start_xmit()? Prevent that skb_orphan()
1920 * does anything in dev->hard_start_xmit() by clearing
1921 * the skb destructor before the call and restoring it
1922 * afterwards, then doing the skb_orphan() ourselves?
1923 */
1924 return rc; 1922 return rc;
1925 } 1923 }
1926 1924
@@ -1938,6 +1936,7 @@ gso:
1938 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 1936 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1939 skb_dst_drop(nskb); 1937 skb_dst_drop(nskb);
1940 1938
1939 skb_orphan_try(nskb);
1941 rc = ops->ndo_start_xmit(nskb, dev); 1940 rc = ops->ndo_start_xmit(nskb, dev);
1942 if (unlikely(rc != NETDEV_TX_OK)) { 1941 if (unlikely(rc != NETDEV_TX_OK)) {
1943 if (rc & ~NETDEV_TX_MASK) 1942 if (rc & ~NETDEV_TX_MASK)