diff options
author | David S. Miller <davem@davemloft.net> | 2005-08-17 17:57:30 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 19:01:54 -0400 |
commit | d179cd12928443f3ec29cfbc3567439644bd0afc (patch) | |
tree | 0bfc57e73f0bf9f7bb9d5c8ce7d3d5afe550f94e | |
parent | e92ae93a8aa66aea12935420cb22d4df1c18d023 (diff) |
[NET]: Implement SKB fast cloning.
Protocols that make extensive use of SKB cloning,
for example TCP, eat at least 2 allocations per
packet sent as a result.
To cut the kmalloc() count in half, we implement
a pre-allocation scheme wherein we allocate
2 sk_buff objects in advance, then use a simple
reference count to free up the memory at the
correct time.
Based upon an initial patch by Thomas Graf and
suggestions from Herbert Xu.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/skbuff.h | 26 | ||||
-rw-r--r-- | include/net/sock.h | 2 | ||||
-rw-r--r-- | net/core/skbuff.c | 82 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 4 |
4 files changed, 98 insertions, 16 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index db10335e4192..42edce6abe23 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h | |||
@@ -162,6 +162,13 @@ struct skb_timeval { | |||
162 | u32 off_usec; | 162 | u32 off_usec; |
163 | }; | 163 | }; |
164 | 164 | ||
165 | |||
166 | enum { | ||
167 | SKB_FCLONE_UNAVAILABLE, | ||
168 | SKB_FCLONE_ORIG, | ||
169 | SKB_FCLONE_CLONE, | ||
170 | }; | ||
171 | |||
165 | /** | 172 | /** |
166 | * struct sk_buff - socket buffer | 173 | * struct sk_buff - socket buffer |
167 | * @next: Next buffer in list | 174 | * @next: Next buffer in list |
@@ -255,7 +262,8 @@ struct sk_buff { | |||
255 | ip_summed:2, | 262 | ip_summed:2, |
256 | nohdr:1, | 263 | nohdr:1, |
257 | nfctinfo:3; | 264 | nfctinfo:3; |
258 | __u8 pkt_type; | 265 | __u8 pkt_type:3, |
266 | fclone:2; | ||
259 | __be16 protocol; | 267 | __be16 protocol; |
260 | 268 | ||
261 | void (*destructor)(struct sk_buff *skb); | 269 | void (*destructor)(struct sk_buff *skb); |
@@ -295,8 +303,20 @@ struct sk_buff { | |||
295 | #include <asm/system.h> | 303 | #include <asm/system.h> |
296 | 304 | ||
297 | extern void __kfree_skb(struct sk_buff *skb); | 305 | extern void __kfree_skb(struct sk_buff *skb); |
298 | extern struct sk_buff *alloc_skb(unsigned int size, | 306 | extern struct sk_buff *__alloc_skb(unsigned int size, |
299 | unsigned int __nocast priority); | 307 | unsigned int __nocast priority, int fclone); |
308 | static inline struct sk_buff *alloc_skb(unsigned int size, | ||
309 | unsigned int __nocast priority) | ||
310 | { | ||
311 | return __alloc_skb(size, priority, 0); | ||
312 | } | ||
313 | |||
314 | static inline struct sk_buff *alloc_skb_fclone(unsigned int size, | ||
315 | unsigned int __nocast priority) | ||
316 | { | ||
317 | return __alloc_skb(size, priority, 1); | ||
318 | } | ||
319 | |||
300 | extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, | 320 | extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, |
301 | unsigned int size, | 321 | unsigned int size, |
302 | unsigned int __nocast priority); | 322 | unsigned int __nocast priority); |
diff --git a/include/net/sock.h b/include/net/sock.h index 14183883e8e6..d57aece9492c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, | |||
1200 | int hdr_len; | 1200 | int hdr_len; |
1201 | 1201 | ||
1202 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); | 1202 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); |
1203 | skb = alloc_skb(size + hdr_len, gfp); | 1203 | skb = alloc_skb_fclone(size + hdr_len, gfp); |
1204 | if (skb) { | 1204 | if (skb) { |
1205 | skb->truesize += mem; | 1205 | skb->truesize += mem; |
1206 | if (sk->sk_forward_alloc >= (int)skb->truesize || | 1206 | if (sk->sk_forward_alloc >= (int)skb->truesize || |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 39a161dbc16d..b853a9b29eb6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #include <asm/system.h> | 69 | #include <asm/system.h> |
70 | 70 | ||
71 | static kmem_cache_t *skbuff_head_cache; | 71 | static kmem_cache_t *skbuff_head_cache; |
72 | static kmem_cache_t *skbuff_fclone_cache; | ||
72 | 73 | ||
73 | struct timeval __read_mostly skb_tv_base; | 74 | struct timeval __read_mostly skb_tv_base; |
74 | 75 | ||
@@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
120 | */ | 121 | */ |
121 | 122 | ||
122 | /** | 123 | /** |
123 | * alloc_skb - allocate a network buffer | 124 | * __alloc_skb - allocate a network buffer |
124 | * @size: size to allocate | 125 | * @size: size to allocate |
125 | * @gfp_mask: allocation mask | 126 | * @gfp_mask: allocation mask |
126 | * | 127 | * |
@@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
131 | * Buffers may only be allocated from interrupts using a @gfp_mask of | 132 | * Buffers may only be allocated from interrupts using a @gfp_mask of |
132 | * %GFP_ATOMIC. | 133 | * %GFP_ATOMIC. |
133 | */ | 134 | */ |
134 | struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) | 135 | struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, |
136 | int fclone) | ||
135 | { | 137 | { |
136 | struct sk_buff *skb; | 138 | struct sk_buff *skb; |
137 | u8 *data; | 139 | u8 *data; |
138 | 140 | ||
139 | /* Get the HEAD */ | 141 | /* Get the HEAD */ |
140 | skb = kmem_cache_alloc(skbuff_head_cache, | 142 | if (fclone) |
141 | gfp_mask & ~__GFP_DMA); | 143 | skb = kmem_cache_alloc(skbuff_fclone_cache, |
144 | gfp_mask & ~__GFP_DMA); | ||
145 | else | ||
146 | skb = kmem_cache_alloc(skbuff_head_cache, | ||
147 | gfp_mask & ~__GFP_DMA); | ||
148 | |||
142 | if (!skb) | 149 | if (!skb) |
143 | goto out; | 150 | goto out; |
144 | 151 | ||
@@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) | |||
155 | skb->data = data; | 162 | skb->data = data; |
156 | skb->tail = data; | 163 | skb->tail = data; |
157 | skb->end = data + size; | 164 | skb->end = data + size; |
165 | if (fclone) { | ||
166 | struct sk_buff *child = skb + 1; | ||
167 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | ||
158 | 168 | ||
169 | skb->fclone = SKB_FCLONE_ORIG; | ||
170 | atomic_set(fclone_ref, 1); | ||
171 | |||
172 | child->fclone = SKB_FCLONE_UNAVAILABLE; | ||
173 | } | ||
159 | atomic_set(&(skb_shinfo(skb)->dataref), 1); | 174 | atomic_set(&(skb_shinfo(skb)->dataref), 1); |
160 | skb_shinfo(skb)->nr_frags = 0; | 175 | skb_shinfo(skb)->nr_frags = 0; |
161 | skb_shinfo(skb)->tso_size = 0; | 176 | skb_shinfo(skb)->tso_size = 0; |
@@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb) | |||
268 | */ | 283 | */ |
269 | void kfree_skbmem(struct sk_buff *skb) | 284 | void kfree_skbmem(struct sk_buff *skb) |
270 | { | 285 | { |
286 | struct sk_buff *other; | ||
287 | atomic_t *fclone_ref; | ||
288 | |||
271 | skb_release_data(skb); | 289 | skb_release_data(skb); |
272 | kmem_cache_free(skbuff_head_cache, skb); | 290 | switch (skb->fclone) { |
291 | case SKB_FCLONE_UNAVAILABLE: | ||
292 | kmem_cache_free(skbuff_head_cache, skb); | ||
293 | break; | ||
294 | |||
295 | case SKB_FCLONE_ORIG: | ||
296 | fclone_ref = (atomic_t *) (skb + 2); | ||
297 | if (atomic_dec_and_test(fclone_ref)) | ||
298 | kmem_cache_free(skbuff_fclone_cache, skb); | ||
299 | break; | ||
300 | |||
301 | case SKB_FCLONE_CLONE: | ||
302 | fclone_ref = (atomic_t *) (skb + 1); | ||
303 | other = skb - 1; | ||
304 | |||
305 | /* The clone portion is available for | ||
306 | * fast-cloning again. | ||
307 | */ | ||
308 | skb->fclone = SKB_FCLONE_UNAVAILABLE; | ||
309 | |||
310 | if (atomic_dec_and_test(fclone_ref)) | ||
311 | kmem_cache_free(skbuff_fclone_cache, other); | ||
312 | break; | ||
313 | }; | ||
273 | } | 314 | } |
274 | 315 | ||
275 | /** | 316 | /** |
@@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb) | |||
324 | 365 | ||
325 | struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | 366 | struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) |
326 | { | 367 | { |
327 | struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 368 | struct sk_buff *n; |
328 | 369 | ||
329 | if (!n) | 370 | n = skb + 1; |
330 | return NULL; | 371 | if (skb->fclone == SKB_FCLONE_ORIG && |
372 | n->fclone == SKB_FCLONE_UNAVAILABLE) { | ||
373 | atomic_t *fclone_ref = (atomic_t *) (n + 1); | ||
374 | n->fclone = SKB_FCLONE_CLONE; | ||
375 | atomic_inc(fclone_ref); | ||
376 | } else { | ||
377 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | ||
378 | if (!n) | ||
379 | return NULL; | ||
380 | n->fclone = SKB_FCLONE_UNAVAILABLE; | ||
381 | } | ||
331 | 382 | ||
332 | #define C(x) n->x = skb->x | 383 | #define C(x) n->x = skb->x |
333 | 384 | ||
@@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
409 | new->mac.raw = old->mac.raw + offset; | 460 | new->mac.raw = old->mac.raw + offset; |
410 | memcpy(new->cb, old->cb, sizeof(old->cb)); | 461 | memcpy(new->cb, old->cb, sizeof(old->cb)); |
411 | new->local_df = old->local_df; | 462 | new->local_df = old->local_df; |
463 | new->fclone = SKB_FCLONE_UNAVAILABLE; | ||
412 | new->pkt_type = old->pkt_type; | 464 | new->pkt_type = old->pkt_type; |
413 | new->tstamp = old->tstamp; | 465 | new->tstamp = old->tstamp; |
414 | new->destructor = NULL; | 466 | new->destructor = NULL; |
@@ -1647,13 +1699,23 @@ void __init skb_init(void) | |||
1647 | NULL, NULL); | 1699 | NULL, NULL); |
1648 | if (!skbuff_head_cache) | 1700 | if (!skbuff_head_cache) |
1649 | panic("cannot create skbuff cache"); | 1701 | panic("cannot create skbuff cache"); |
1702 | |||
1703 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", | ||
1704 | (2*sizeof(struct sk_buff)) + | ||
1705 | sizeof(atomic_t), | ||
1706 | 0, | ||
1707 | SLAB_HWCACHE_ALIGN, | ||
1708 | NULL, NULL); | ||
1709 | if (!skbuff_fclone_cache) | ||
1710 | panic("cannot create skbuff cache"); | ||
1711 | |||
1650 | do_gettimeofday(&skb_tv_base); | 1712 | do_gettimeofday(&skb_tv_base); |
1651 | } | 1713 | } |
1652 | 1714 | ||
1653 | EXPORT_SYMBOL(___pskb_trim); | 1715 | EXPORT_SYMBOL(___pskb_trim); |
1654 | EXPORT_SYMBOL(__kfree_skb); | 1716 | EXPORT_SYMBOL(__kfree_skb); |
1655 | EXPORT_SYMBOL(__pskb_pull_tail); | 1717 | EXPORT_SYMBOL(__pskb_pull_tail); |
1656 | EXPORT_SYMBOL(alloc_skb); | 1718 | EXPORT_SYMBOL(__alloc_skb); |
1657 | EXPORT_SYMBOL(pskb_copy); | 1719 | EXPORT_SYMBOL(pskb_copy); |
1658 | EXPORT_SYMBOL(pskb_expand_head); | 1720 | EXPORT_SYMBOL(pskb_expand_head); |
1659 | EXPORT_SYMBOL(skb_checksum); | 1721 | EXPORT_SYMBOL(skb_checksum); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8d92ab562aed..75b68116682a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk) | |||
1582 | } else { | 1582 | } else { |
1583 | /* Socket is locked, keep trying until memory is available. */ | 1583 | /* Socket is locked, keep trying until memory is available. */ |
1584 | for (;;) { | 1584 | for (;;) { |
1585 | skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); | 1585 | skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); |
1586 | if (skb) | 1586 | if (skb) |
1587 | break; | 1587 | break; |
1588 | yield(); | 1588 | yield(); |
@@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk) | |||
1804 | 1804 | ||
1805 | tcp_connect_init(sk); | 1805 | tcp_connect_init(sk); |
1806 | 1806 | ||
1807 | buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation); | 1807 | buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); |
1808 | if (unlikely(buff == NULL)) | 1808 | if (unlikely(buff == NULL)) |
1809 | return -ENOBUFS; | 1809 | return -ENOBUFS; |
1810 | 1810 | ||