aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-01-06 12:54:31 -0500
committerDavid S. Miller <davem@davemloft.net>2014-01-06 16:37:09 -0500
commit8f646c922d55047ecd6c65ada49ead88ed0db61e (patch)
treea8298774a1675f87a3ca71725bc01d20b0e1a279 /drivers/net/vxlan.c
parent996b175e39ed42ec2aa0c63b4a03cc500aa6269f (diff)
vxlan: keep original skb ownership
Sathya Perla posted a patch trying to address following problem : <quote> The vxlan driver sets itself as the socket owner for all the TX flows it encapsulates (using vxlan_set_owner()) and assigns it's own skb destructor. This causes all tunneled traffic to land up on only one TXQ as all encapsulated skbs refer to the vxlan socket and not the original socket. Also, the vxlan skb destructor breaks some functionality for tunneled traffic like wmem accounting and as TCP small queues and FQ/pacing packet scheduler. </quote> I reworked Sathya patch and added some explanations. vxlan_xmit() can avoid one skb_clone()/dev_kfree_skb() pair and gain better drop monitor accuracy, by calling kfree_skb() when appropriate. The UDP socket used by vxlan to perform encapsulation of xmit packets do not need to be alive while packets leave vxlan code. Its better to keep original socket ownership to get proper feedback from qdisc and NIC layers. We use skb->sk to A) control amount of bytes/packets queued on behalf of a socket, but prior vxlan code did the skb->sk transfert without any limit/control on vxlan socket sk_sndbuf. B) security purposes (as selinux) or netfilter uses, and I do not think anything is prepared to handle vxlan stacked case in this area. By not changing ownership, vxlan tunnels behave like other tunnels. As Stephen mentioned, we might do the same change in L2TP. Reported-by: Sathya Perla <sathya.perla@emulex.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Stephen Hemminger <stephen@networkplumber.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c31
1 files changed, 10 insertions, 21 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 474a99ed0222..ab2e92eec949 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1381,20 +1381,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
1381 return false; 1381 return false;
1382} 1382}
1383 1383
1384static void vxlan_sock_put(struct sk_buff *skb)
1385{
1386 sock_put(skb->sk);
1387}
1388
1389/* On transmit, associate with the tunnel socket */
1390static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb)
1391{
1392 skb_orphan(skb);
1393 sock_hold(sk);
1394 skb->sk = sk;
1395 skb->destructor = vxlan_sock_put;
1396}
1397
1398/* Compute source port for outgoing packet 1384/* Compute source port for outgoing packet
1399 * first choice to use L4 flow hash since it will spread 1385 * first choice to use L4 flow hash since it will spread
1400 * better and maybe available from hardware 1386 * better and maybe available from hardware
@@ -1514,8 +1500,6 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
1514 ip6h->daddr = *daddr; 1500 ip6h->daddr = *daddr;
1515 ip6h->saddr = *saddr; 1501 ip6h->saddr = *saddr;
1516 1502
1517 vxlan_set_owner(vs->sock->sk, skb);
1518
1519 err = handle_offloads(skb); 1503 err = handle_offloads(skb);
1520 if (err) 1504 if (err)
1521 return err; 1505 return err;
@@ -1572,8 +1556,6 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
1572 uh->len = htons(skb->len); 1556 uh->len = htons(skb->len);
1573 uh->check = 0; 1557 uh->check = 0;
1574 1558
1575 vxlan_set_owner(vs->sock->sk, skb);
1576
1577 err = handle_offloads(skb); 1559 err = handle_offloads(skb);
1578 if (err) 1560 if (err)
1579 return err; 1561 return err;
@@ -1786,7 +1768,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
1786 struct vxlan_dev *vxlan = netdev_priv(dev); 1768 struct vxlan_dev *vxlan = netdev_priv(dev);
1787 struct ethhdr *eth; 1769 struct ethhdr *eth;
1788 bool did_rsc = false; 1770 bool did_rsc = false;
1789 struct vxlan_rdst *rdst; 1771 struct vxlan_rdst *rdst, *fdst = NULL;
1790 struct vxlan_fdb *f; 1772 struct vxlan_fdb *f;
1791 1773
1792 skb_reset_mac_header(skb); 1774 skb_reset_mac_header(skb);
@@ -1828,7 +1810,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
1828 vxlan_fdb_miss(vxlan, eth->h_dest); 1810 vxlan_fdb_miss(vxlan, eth->h_dest);
1829 1811
1830 dev->stats.tx_dropped++; 1812 dev->stats.tx_dropped++;
1831 dev_kfree_skb(skb); 1813 kfree_skb(skb);
1832 return NETDEV_TX_OK; 1814 return NETDEV_TX_OK;
1833 } 1815 }
1834 } 1816 }
@@ -1836,12 +1818,19 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
1836 list_for_each_entry_rcu(rdst, &f->remotes, list) { 1818 list_for_each_entry_rcu(rdst, &f->remotes, list) {
1837 struct sk_buff *skb1; 1819 struct sk_buff *skb1;
1838 1820
1821 if (!fdst) {
1822 fdst = rdst;
1823 continue;
1824 }
1839 skb1 = skb_clone(skb, GFP_ATOMIC); 1825 skb1 = skb_clone(skb, GFP_ATOMIC);
1840 if (skb1) 1826 if (skb1)
1841 vxlan_xmit_one(skb1, dev, rdst, did_rsc); 1827 vxlan_xmit_one(skb1, dev, rdst, did_rsc);
1842 } 1828 }
1843 1829
1844 dev_kfree_skb(skb); 1830 if (fdst)
1831 vxlan_xmit_one(skb, dev, fdst, did_rsc);
1832 else
1833 kfree_skb(skb);
1845 return NETDEV_TX_OK; 1834 return NETDEV_TX_OK;
1846} 1835}
1847 1836