aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-11 15:05:25 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-16 14:13:19 -0400
commit564824b0c52c34692d804bb6ea214451615b0b50 (patch)
treed836fa51848026df74e2bec2b634f1fcf3c6d02f
parent6f0333b8fde44b8c04a53b2461504f0e8f1cebe6 (diff)
net: allocate skbs on local node
commit b30973f877 (node-aware skb allocation) spread a wrong habit of allocating net drivers skbs on a given memory node : The one closest to the NIC hardware. This is wrong because as soon as we try to scale network stack, we need to use many cpus to handle traffic and hit slub/slab management on cross-node allocations/frees when these cpus have to alloc/free skbs bound to a central node. skb allocated in RX path are ephemeral, they have a very short lifetime : Extra cost to maintain NUMA affinity is too expensive. What appeared as a nice idea four years ago is in fact a bad one. In 2010, NIC hardwares are multiqueue, or we use RPS to spread the load, and two 10Gb NIC might deliver more than 28 million packets per second, needing all the available cpus. Cost of cross-node handling in network and vm stacks outperforms the small benefit hardware had when doing its DMA transfert in its 'local' memory node at RX time. Even trying to differentiate the two allocations done for one skb (the sk_buff on local node, the data part on NIC hardware node) is not enough to bring good performance. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Tom Herbert <therbert@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h20
-rw-r--r--net/core/skbuff.c13
2 files changed, 17 insertions, 16 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0b53c43ac92e..05a358f1ba11 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -496,13 +496,13 @@ extern struct sk_buff *__alloc_skb(unsigned int size,
496static inline struct sk_buff *alloc_skb(unsigned int size, 496static inline struct sk_buff *alloc_skb(unsigned int size,
497 gfp_t priority) 497 gfp_t priority)
498{ 498{
499 return __alloc_skb(size, priority, 0, -1); 499 return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
500} 500}
501 501
502static inline struct sk_buff *alloc_skb_fclone(unsigned int size, 502static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
503 gfp_t priority) 503 gfp_t priority)
504{ 504{
505 return __alloc_skb(size, priority, 1, -1); 505 return __alloc_skb(size, priority, 1, NUMA_NO_NODE);
506} 506}
507 507
508extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); 508extern bool skb_recycle_check(struct sk_buff *skb, int skb_size);
@@ -1563,13 +1563,25 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
1563 return skb; 1563 return skb;
1564} 1564}
1565 1565
1566extern struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask); 1566/**
1567 * __netdev_alloc_page - allocate a page for ps-rx on a specific device
1568 * @dev: network device to receive on
1569 * @gfp_mask: alloc_pages_node mask
1570 *
1571 * Allocate a new page. dev currently unused.
1572 *
1573 * %NULL is returned if there is no free memory.
1574 */
1575static inline struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
1576{
1577 return alloc_pages_node(NUMA_NO_NODE, gfp_mask, 0);
1578}
1567 1579
1568/** 1580/**
1569 * netdev_alloc_page - allocate a page for ps-rx on a specific device 1581 * netdev_alloc_page - allocate a page for ps-rx on a specific device
1570 * @dev: network device to receive on 1582 * @dev: network device to receive on
1571 * 1583 *
1572 * Allocate a new page node local to the specified device. 1584 * Allocate a new page. dev currently unused.
1573 * 1585 *
1574 * %NULL is returned if there is no free memory. 1586 * %NULL is returned if there is no free memory.
1575 */ 1587 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 752c1972b3a7..4e8b82e167d8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -247,10 +247,9 @@ EXPORT_SYMBOL(__alloc_skb);
247struct sk_buff *__netdev_alloc_skb(struct net_device *dev, 247struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
248 unsigned int length, gfp_t gfp_mask) 248 unsigned int length, gfp_t gfp_mask)
249{ 249{
250 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
251 struct sk_buff *skb; 250 struct sk_buff *skb;
252 251
253 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); 252 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
254 if (likely(skb)) { 253 if (likely(skb)) {
255 skb_reserve(skb, NET_SKB_PAD); 254 skb_reserve(skb, NET_SKB_PAD);
256 skb->dev = dev; 255 skb->dev = dev;
@@ -259,16 +258,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
259} 258}
260EXPORT_SYMBOL(__netdev_alloc_skb); 259EXPORT_SYMBOL(__netdev_alloc_skb);
261 260
262struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
263{
264 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
265 struct page *page;
266
267 page = alloc_pages_node(node, gfp_mask, 0);
268 return page;
269}
270EXPORT_SYMBOL(__netdev_alloc_page);
271
272void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, 261void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
273 int size) 262 int size)
274{ 263{