aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2005-08-17 17:57:30 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 19:01:54 -0400
commitd179cd12928443f3ec29cfbc3567439644bd0afc (patch)
tree0bfc57e73f0bf9f7bb9d5c8ce7d3d5afe550f94e
parente92ae93a8aa66aea12935420cb22d4df1c18d023 (diff)
[NET]: Implement SKB fast cloning.
Protocols that make extensive use of SKB cloning, for example TCP, eat at least 2 allocations per packet sent as a result. To cut the kmalloc() count in half, we implement a pre-allocation scheme wherein we allocate 2 sk_buff objects in advance, then use a simple reference count to free up the memory at the correct time. Based upon an initial patch by Thomas Graf and suggestions from Herbert Xu. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h26
-rw-r--r--include/net/sock.h2
-rw-r--r--net/core/skbuff.c82
-rw-r--r--net/ipv4/tcp_output.c4
4 files changed, 98 insertions, 16 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index db10335e4192..42edce6abe23 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -162,6 +162,13 @@ struct skb_timeval {
162 u32 off_usec; 162 u32 off_usec;
163}; 163};
164 164
165
166enum {
167 SKB_FCLONE_UNAVAILABLE,
168 SKB_FCLONE_ORIG,
169 SKB_FCLONE_CLONE,
170};
171
165/** 172/**
166 * struct sk_buff - socket buffer 173 * struct sk_buff - socket buffer
167 * @next: Next buffer in list 174 * @next: Next buffer in list
@@ -255,7 +262,8 @@ struct sk_buff {
255 ip_summed:2, 262 ip_summed:2,
256 nohdr:1, 263 nohdr:1,
257 nfctinfo:3; 264 nfctinfo:3;
258 __u8 pkt_type; 265 __u8 pkt_type:3,
266 fclone:2;
259 __be16 protocol; 267 __be16 protocol;
260 268
261 void (*destructor)(struct sk_buff *skb); 269 void (*destructor)(struct sk_buff *skb);
@@ -295,8 +303,20 @@ struct sk_buff {
295#include <asm/system.h> 303#include <asm/system.h>
296 304
297extern void __kfree_skb(struct sk_buff *skb); 305extern void __kfree_skb(struct sk_buff *skb);
298extern struct sk_buff *alloc_skb(unsigned int size, 306extern struct sk_buff *__alloc_skb(unsigned int size,
299 unsigned int __nocast priority); 307 unsigned int __nocast priority, int fclone);
308static inline struct sk_buff *alloc_skb(unsigned int size,
309 unsigned int __nocast priority)
310{
311 return __alloc_skb(size, priority, 0);
312}
313
314static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
315 unsigned int __nocast priority)
316{
317 return __alloc_skb(size, priority, 1);
318}
319
300extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 320extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
301 unsigned int size, 321 unsigned int size,
302 unsigned int __nocast priority); 322 unsigned int __nocast priority);
diff --git a/include/net/sock.h b/include/net/sock.h
index 14183883e8e6..d57aece9492c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
1200 int hdr_len; 1200 int hdr_len;
1201 1201
1202 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); 1202 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
1203 skb = alloc_skb(size + hdr_len, gfp); 1203 skb = alloc_skb_fclone(size + hdr_len, gfp);
1204 if (skb) { 1204 if (skb) {
1205 skb->truesize += mem; 1205 skb->truesize += mem;
1206 if (sk->sk_forward_alloc >= (int)skb->truesize || 1206 if (sk->sk_forward_alloc >= (int)skb->truesize ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 39a161dbc16d..b853a9b29eb6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@
69#include <asm/system.h> 69#include <asm/system.h>
70 70
71static kmem_cache_t *skbuff_head_cache; 71static kmem_cache_t *skbuff_head_cache;
72static kmem_cache_t *skbuff_fclone_cache;
72 73
73struct timeval __read_mostly skb_tv_base; 74struct timeval __read_mostly skb_tv_base;
74 75
@@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
120 */ 121 */
121 122
122/** 123/**
123 * alloc_skb - allocate a network buffer 124 * __alloc_skb - allocate a network buffer
124 * @size: size to allocate 125 * @size: size to allocate
125 * @gfp_mask: allocation mask 126 * @gfp_mask: allocation mask
126 * 127 *
@@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
131 * Buffers may only be allocated from interrupts using a @gfp_mask of 132 * Buffers may only be allocated from interrupts using a @gfp_mask of
132 * %GFP_ATOMIC. 133 * %GFP_ATOMIC.
133 */ 134 */
134struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) 135struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask,
136 int fclone)
135{ 137{
136 struct sk_buff *skb; 138 struct sk_buff *skb;
137 u8 *data; 139 u8 *data;
138 140
139 /* Get the HEAD */ 141 /* Get the HEAD */
140 skb = kmem_cache_alloc(skbuff_head_cache, 142 if (fclone)
141 gfp_mask & ~__GFP_DMA); 143 skb = kmem_cache_alloc(skbuff_fclone_cache,
144 gfp_mask & ~__GFP_DMA);
145 else
146 skb = kmem_cache_alloc(skbuff_head_cache,
147 gfp_mask & ~__GFP_DMA);
148
142 if (!skb) 149 if (!skb)
143 goto out; 150 goto out;
144 151
@@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
155 skb->data = data; 162 skb->data = data;
156 skb->tail = data; 163 skb->tail = data;
157 skb->end = data + size; 164 skb->end = data + size;
165 if (fclone) {
166 struct sk_buff *child = skb + 1;
167 atomic_t *fclone_ref = (atomic_t *) (child + 1);
158 168
169 skb->fclone = SKB_FCLONE_ORIG;
170 atomic_set(fclone_ref, 1);
171
172 child->fclone = SKB_FCLONE_UNAVAILABLE;
173 }
159 atomic_set(&(skb_shinfo(skb)->dataref), 1); 174 atomic_set(&(skb_shinfo(skb)->dataref), 1);
160 skb_shinfo(skb)->nr_frags = 0; 175 skb_shinfo(skb)->nr_frags = 0;
161 skb_shinfo(skb)->tso_size = 0; 176 skb_shinfo(skb)->tso_size = 0;
@@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb)
268 */ 283 */
269void kfree_skbmem(struct sk_buff *skb) 284void kfree_skbmem(struct sk_buff *skb)
270{ 285{
286 struct sk_buff *other;
287 atomic_t *fclone_ref;
288
271 skb_release_data(skb); 289 skb_release_data(skb);
272 kmem_cache_free(skbuff_head_cache, skb); 290 switch (skb->fclone) {
291 case SKB_FCLONE_UNAVAILABLE:
292 kmem_cache_free(skbuff_head_cache, skb);
293 break;
294
295 case SKB_FCLONE_ORIG:
296 fclone_ref = (atomic_t *) (skb + 2);
297 if (atomic_dec_and_test(fclone_ref))
298 kmem_cache_free(skbuff_fclone_cache, skb);
299 break;
300
301 case SKB_FCLONE_CLONE:
302 fclone_ref = (atomic_t *) (skb + 1);
303 other = skb - 1;
304
305 /* The clone portion is available for
306 * fast-cloning again.
307 */
308 skb->fclone = SKB_FCLONE_UNAVAILABLE;
309
310 if (atomic_dec_and_test(fclone_ref))
311 kmem_cache_free(skbuff_fclone_cache, other);
312 break;
313 };
273} 314}
274 315
275/** 316/**
@@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb)
324 365
325struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) 366struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
326{ 367{
327 struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 368 struct sk_buff *n;
328 369
329 if (!n) 370 n = skb + 1;
330 return NULL; 371 if (skb->fclone == SKB_FCLONE_ORIG &&
372 n->fclone == SKB_FCLONE_UNAVAILABLE) {
373 atomic_t *fclone_ref = (atomic_t *) (n + 1);
374 n->fclone = SKB_FCLONE_CLONE;
375 atomic_inc(fclone_ref);
376 } else {
377 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
378 if (!n)
379 return NULL;
380 n->fclone = SKB_FCLONE_UNAVAILABLE;
381 }
331 382
332#define C(x) n->x = skb->x 383#define C(x) n->x = skb->x
333 384
@@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
409 new->mac.raw = old->mac.raw + offset; 460 new->mac.raw = old->mac.raw + offset;
410 memcpy(new->cb, old->cb, sizeof(old->cb)); 461 memcpy(new->cb, old->cb, sizeof(old->cb));
411 new->local_df = old->local_df; 462 new->local_df = old->local_df;
463 new->fclone = SKB_FCLONE_UNAVAILABLE;
412 new->pkt_type = old->pkt_type; 464 new->pkt_type = old->pkt_type;
413 new->tstamp = old->tstamp; 465 new->tstamp = old->tstamp;
414 new->destructor = NULL; 466 new->destructor = NULL;
@@ -1647,13 +1699,23 @@ void __init skb_init(void)
1647 NULL, NULL); 1699 NULL, NULL);
1648 if (!skbuff_head_cache) 1700 if (!skbuff_head_cache)
1649 panic("cannot create skbuff cache"); 1701 panic("cannot create skbuff cache");
1702
1703 skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
1704 (2*sizeof(struct sk_buff)) +
1705 sizeof(atomic_t),
1706 0,
1707 SLAB_HWCACHE_ALIGN,
1708 NULL, NULL);
1709 if (!skbuff_fclone_cache)
1710 panic("cannot create skbuff cache");
1711
1650 do_gettimeofday(&skb_tv_base); 1712 do_gettimeofday(&skb_tv_base);
1651} 1713}
1652 1714
1653EXPORT_SYMBOL(___pskb_trim); 1715EXPORT_SYMBOL(___pskb_trim);
1654EXPORT_SYMBOL(__kfree_skb); 1716EXPORT_SYMBOL(__kfree_skb);
1655EXPORT_SYMBOL(__pskb_pull_tail); 1717EXPORT_SYMBOL(__pskb_pull_tail);
1656EXPORT_SYMBOL(alloc_skb); 1718EXPORT_SYMBOL(__alloc_skb);
1657EXPORT_SYMBOL(pskb_copy); 1719EXPORT_SYMBOL(pskb_copy);
1658EXPORT_SYMBOL(pskb_expand_head); 1720EXPORT_SYMBOL(pskb_expand_head);
1659EXPORT_SYMBOL(skb_checksum); 1721EXPORT_SYMBOL(skb_checksum);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8d92ab562aed..75b68116682a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk)
1582 } else { 1582 } else {
1583 /* Socket is locked, keep trying until memory is available. */ 1583 /* Socket is locked, keep trying until memory is available. */
1584 for (;;) { 1584 for (;;) {
1585 skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); 1585 skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
1586 if (skb) 1586 if (skb)
1587 break; 1587 break;
1588 yield(); 1588 yield();
@@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk)
1804 1804
1805 tcp_connect_init(sk); 1805 tcp_connect_init(sk);
1806 1806
1807 buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation); 1807 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
1808 if (unlikely(buff == NULL)) 1808 if (unlikely(buff == NULL))
1809 return -ENOBUFS; 1809 return -ENOBUFS;
1810 1810