diff options
Diffstat (limited to 'net/core/skbuff.c')
| -rw-r--r-- | net/core/skbuff.c | 158 |
1 files changed, 103 insertions, 55 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7eab867ede59..f80a28785610 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -68,7 +68,10 @@ | |||
| 68 | #include <asm/uaccess.h> | 68 | #include <asm/uaccess.h> |
| 69 | #include <asm/system.h> | 69 | #include <asm/system.h> |
| 70 | 70 | ||
| 71 | static kmem_cache_t *skbuff_head_cache; | 71 | static kmem_cache_t *skbuff_head_cache __read_mostly; |
| 72 | static kmem_cache_t *skbuff_fclone_cache __read_mostly; | ||
| 73 | |||
| 74 | struct timeval __read_mostly skb_tv_base; | ||
| 72 | 75 | ||
| 73 | /* | 76 | /* |
| 74 | * Keep out-of-line to prevent kernel bloat. | 77 | * Keep out-of-line to prevent kernel bloat. |
| @@ -118,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
| 118 | */ | 121 | */ |
| 119 | 122 | ||
| 120 | /** | 123 | /** |
| 121 | * alloc_skb - allocate a network buffer | 124 | * __alloc_skb - allocate a network buffer |
| 122 | * @size: size to allocate | 125 | * @size: size to allocate |
| 123 | * @gfp_mask: allocation mask | 126 | * @gfp_mask: allocation mask |
| 124 | * | 127 | * |
| @@ -129,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
| 129 | * Buffers may only be allocated from interrupts using a @gfp_mask of | 132 | * Buffers may only be allocated from interrupts using a @gfp_mask of |
| 130 | * %GFP_ATOMIC. | 133 | * %GFP_ATOMIC. |
| 131 | */ | 134 | */ |
| 132 | struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) | 135 | struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, |
| 136 | int fclone) | ||
| 133 | { | 137 | { |
| 134 | struct sk_buff *skb; | 138 | struct sk_buff *skb; |
| 135 | u8 *data; | 139 | u8 *data; |
| 136 | 140 | ||
| 137 | /* Get the HEAD */ | 141 | /* Get the HEAD */ |
| 138 | skb = kmem_cache_alloc(skbuff_head_cache, | 142 | if (fclone) |
| 139 | gfp_mask & ~__GFP_DMA); | 143 | skb = kmem_cache_alloc(skbuff_fclone_cache, |
| 144 | gfp_mask & ~__GFP_DMA); | ||
| 145 | else | ||
| 146 | skb = kmem_cache_alloc(skbuff_head_cache, | ||
| 147 | gfp_mask & ~__GFP_DMA); | ||
| 148 | |||
| 140 | if (!skb) | 149 | if (!skb) |
| 141 | goto out; | 150 | goto out; |
| 142 | 151 | ||
| @@ -153,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) | |||
| 153 | skb->data = data; | 162 | skb->data = data; |
| 154 | skb->tail = data; | 163 | skb->tail = data; |
| 155 | skb->end = data + size; | 164 | skb->end = data + size; |
| 165 | if (fclone) { | ||
| 166 | struct sk_buff *child = skb + 1; | ||
| 167 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | ||
| 156 | 168 | ||
| 169 | skb->fclone = SKB_FCLONE_ORIG; | ||
| 170 | atomic_set(fclone_ref, 1); | ||
| 171 | |||
| 172 | child->fclone = SKB_FCLONE_UNAVAILABLE; | ||
| 173 | } | ||
| 157 | atomic_set(&(skb_shinfo(skb)->dataref), 1); | 174 | atomic_set(&(skb_shinfo(skb)->dataref), 1); |
| 158 | skb_shinfo(skb)->nr_frags = 0; | 175 | skb_shinfo(skb)->nr_frags = 0; |
| 159 | skb_shinfo(skb)->tso_size = 0; | 176 | skb_shinfo(skb)->tso_size = 0; |
| @@ -266,8 +283,34 @@ void skb_release_data(struct sk_buff *skb) | |||
| 266 | */ | 283 | */ |
| 267 | void kfree_skbmem(struct sk_buff *skb) | 284 | void kfree_skbmem(struct sk_buff *skb) |
| 268 | { | 285 | { |
| 286 | struct sk_buff *other; | ||
| 287 | atomic_t *fclone_ref; | ||
| 288 | |||
| 269 | skb_release_data(skb); | 289 | skb_release_data(skb); |
| 270 | kmem_cache_free(skbuff_head_cache, skb); | 290 | switch (skb->fclone) { |
| 291 | case SKB_FCLONE_UNAVAILABLE: | ||
| 292 | kmem_cache_free(skbuff_head_cache, skb); | ||
| 293 | break; | ||
| 294 | |||
| 295 | case SKB_FCLONE_ORIG: | ||
| 296 | fclone_ref = (atomic_t *) (skb + 2); | ||
| 297 | if (atomic_dec_and_test(fclone_ref)) | ||
| 298 | kmem_cache_free(skbuff_fclone_cache, skb); | ||
| 299 | break; | ||
| 300 | |||
| 301 | case SKB_FCLONE_CLONE: | ||
| 302 | fclone_ref = (atomic_t *) (skb + 1); | ||
| 303 | other = skb - 1; | ||
| 304 | |||
| 305 | /* The clone portion is available for | ||
| 306 | * fast-cloning again. | ||
| 307 | */ | ||
| 308 | skb->fclone = SKB_FCLONE_UNAVAILABLE; | ||
| 309 | |||
| 310 | if (atomic_dec_and_test(fclone_ref)) | ||
| 311 | kmem_cache_free(skbuff_fclone_cache, other); | ||
| 312 | break; | ||
| 313 | }; | ||
| 271 | } | 314 | } |
| 272 | 315 | ||
| 273 | /** | 316 | /** |
| @@ -281,8 +324,6 @@ void kfree_skbmem(struct sk_buff *skb) | |||
| 281 | 324 | ||
| 282 | void __kfree_skb(struct sk_buff *skb) | 325 | void __kfree_skb(struct sk_buff *skb) |
| 283 | { | 326 | { |
| 284 | BUG_ON(skb->list != NULL); | ||
| 285 | |||
| 286 | dst_release(skb->dst); | 327 | dst_release(skb->dst); |
| 287 | #ifdef CONFIG_XFRM | 328 | #ifdef CONFIG_XFRM |
| 288 | secpath_put(skb->sp); | 329 | secpath_put(skb->sp); |
| @@ -302,7 +343,6 @@ void __kfree_skb(struct sk_buff *skb) | |||
| 302 | skb->tc_index = 0; | 343 | skb->tc_index = 0; |
| 303 | #ifdef CONFIG_NET_CLS_ACT | 344 | #ifdef CONFIG_NET_CLS_ACT |
| 304 | skb->tc_verd = 0; | 345 | skb->tc_verd = 0; |
| 305 | skb->tc_classid = 0; | ||
| 306 | #endif | 346 | #endif |
| 307 | #endif | 347 | #endif |
| 308 | 348 | ||
| @@ -325,19 +365,27 @@ void __kfree_skb(struct sk_buff *skb) | |||
| 325 | 365 | ||
| 326 | struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | 366 | struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) |
| 327 | { | 367 | { |
| 328 | struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 368 | struct sk_buff *n; |
| 329 | 369 | ||
| 330 | if (!n) | 370 | n = skb + 1; |
| 331 | return NULL; | 371 | if (skb->fclone == SKB_FCLONE_ORIG && |
| 372 | n->fclone == SKB_FCLONE_UNAVAILABLE) { | ||
| 373 | atomic_t *fclone_ref = (atomic_t *) (n + 1); | ||
| 374 | n->fclone = SKB_FCLONE_CLONE; | ||
| 375 | atomic_inc(fclone_ref); | ||
| 376 | } else { | ||
| 377 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | ||
| 378 | if (!n) | ||
| 379 | return NULL; | ||
| 380 | n->fclone = SKB_FCLONE_UNAVAILABLE; | ||
| 381 | } | ||
| 332 | 382 | ||
| 333 | #define C(x) n->x = skb->x | 383 | #define C(x) n->x = skb->x |
| 334 | 384 | ||
| 335 | n->next = n->prev = NULL; | 385 | n->next = n->prev = NULL; |
| 336 | n->list = NULL; | ||
| 337 | n->sk = NULL; | 386 | n->sk = NULL; |
| 338 | C(stamp); | 387 | C(tstamp); |
| 339 | C(dev); | 388 | C(dev); |
| 340 | C(real_dev); | ||
| 341 | C(h); | 389 | C(h); |
| 342 | C(nh); | 390 | C(nh); |
| 343 | C(mac); | 391 | C(mac); |
| @@ -361,7 +409,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | |||
| 361 | n->destructor = NULL; | 409 | n->destructor = NULL; |
| 362 | #ifdef CONFIG_NETFILTER | 410 | #ifdef CONFIG_NETFILTER |
| 363 | C(nfmark); | 411 | C(nfmark); |
| 364 | C(nfcache); | ||
| 365 | C(nfct); | 412 | C(nfct); |
| 366 | nf_conntrack_get(skb->nfct); | 413 | nf_conntrack_get(skb->nfct); |
| 367 | C(nfctinfo); | 414 | C(nfctinfo); |
| @@ -370,9 +417,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | |||
| 370 | nf_bridge_get(skb->nf_bridge); | 417 | nf_bridge_get(skb->nf_bridge); |
| 371 | #endif | 418 | #endif |
| 372 | #endif /*CONFIG_NETFILTER*/ | 419 | #endif /*CONFIG_NETFILTER*/ |
| 373 | #if defined(CONFIG_HIPPI) | ||
| 374 | C(private); | ||
| 375 | #endif | ||
| 376 | #ifdef CONFIG_NET_SCHED | 420 | #ifdef CONFIG_NET_SCHED |
| 377 | C(tc_index); | 421 | C(tc_index); |
| 378 | #ifdef CONFIG_NET_CLS_ACT | 422 | #ifdef CONFIG_NET_CLS_ACT |
| @@ -380,7 +424,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) | |||
| 380 | n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); | 424 | n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); |
| 381 | n->tc_verd = CLR_TC_MUNGED(n->tc_verd); | 425 | n->tc_verd = CLR_TC_MUNGED(n->tc_verd); |
| 382 | C(input_dev); | 426 | C(input_dev); |
| 383 | C(tc_classid); | ||
| 384 | #endif | 427 | #endif |
| 385 | 428 | ||
| 386 | #endif | 429 | #endif |
| @@ -404,10 +447,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 404 | */ | 447 | */ |
| 405 | unsigned long offset = new->data - old->data; | 448 | unsigned long offset = new->data - old->data; |
| 406 | 449 | ||
| 407 | new->list = NULL; | ||
| 408 | new->sk = NULL; | 450 | new->sk = NULL; |
| 409 | new->dev = old->dev; | 451 | new->dev = old->dev; |
| 410 | new->real_dev = old->real_dev; | ||
| 411 | new->priority = old->priority; | 452 | new->priority = old->priority; |
| 412 | new->protocol = old->protocol; | 453 | new->protocol = old->protocol; |
| 413 | new->dst = dst_clone(old->dst); | 454 | new->dst = dst_clone(old->dst); |
| @@ -419,12 +460,12 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 419 | new->mac.raw = old->mac.raw + offset; | 460 | new->mac.raw = old->mac.raw + offset; |
| 420 | memcpy(new->cb, old->cb, sizeof(old->cb)); | 461 | memcpy(new->cb, old->cb, sizeof(old->cb)); |
| 421 | new->local_df = old->local_df; | 462 | new->local_df = old->local_df; |
| 463 | new->fclone = SKB_FCLONE_UNAVAILABLE; | ||
| 422 | new->pkt_type = old->pkt_type; | 464 | new->pkt_type = old->pkt_type; |
| 423 | new->stamp = old->stamp; | 465 | new->tstamp = old->tstamp; |
| 424 | new->destructor = NULL; | 466 | new->destructor = NULL; |
| 425 | #ifdef CONFIG_NETFILTER | 467 | #ifdef CONFIG_NETFILTER |
| 426 | new->nfmark = old->nfmark; | 468 | new->nfmark = old->nfmark; |
| 427 | new->nfcache = old->nfcache; | ||
| 428 | new->nfct = old->nfct; | 469 | new->nfct = old->nfct; |
| 429 | nf_conntrack_get(old->nfct); | 470 | nf_conntrack_get(old->nfct); |
| 430 | new->nfctinfo = old->nfctinfo; | 471 | new->nfctinfo = old->nfctinfo; |
| @@ -1344,50 +1385,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) | |||
| 1344 | __skb_queue_tail(list, newsk); | 1385 | __skb_queue_tail(list, newsk); |
| 1345 | spin_unlock_irqrestore(&list->lock, flags); | 1386 | spin_unlock_irqrestore(&list->lock, flags); |
| 1346 | } | 1387 | } |
| 1388 | |||
| 1347 | /** | 1389 | /** |
| 1348 | * skb_unlink - remove a buffer from a list | 1390 | * skb_unlink - remove a buffer from a list |
| 1349 | * @skb: buffer to remove | 1391 | * @skb: buffer to remove |
| 1392 | * @list: list to use | ||
| 1350 | * | 1393 | * |
| 1351 | * Place a packet after a given packet in a list. The list locks are taken | 1394 | * Remove a packet from a list. The list locks are taken and this |
| 1352 | * and this function is atomic with respect to other list locked calls | 1395 | * function is atomic with respect to other list locked calls |
| 1353 | * | 1396 | * |
| 1354 | * Works even without knowing the list it is sitting on, which can be | 1397 | * You must know what list the SKB is on. |
| 1355 | * handy at times. It also means that THE LIST MUST EXIST when you | ||
| 1356 | * unlink. Thus a list must have its contents unlinked before it is | ||
| 1357 | * destroyed. | ||
| 1358 | */ | 1398 | */ |
| 1359 | void skb_unlink(struct sk_buff *skb) | 1399 | void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) |
| 1360 | { | 1400 | { |
| 1361 | struct sk_buff_head *list = skb->list; | 1401 | unsigned long flags; |
| 1362 | |||
| 1363 | if (list) { | ||
| 1364 | unsigned long flags; | ||
| 1365 | 1402 | ||
| 1366 | spin_lock_irqsave(&list->lock, flags); | 1403 | spin_lock_irqsave(&list->lock, flags); |
| 1367 | if (skb->list == list) | 1404 | __skb_unlink(skb, list); |
| 1368 | __skb_unlink(skb, skb->list); | 1405 | spin_unlock_irqrestore(&list->lock, flags); |
| 1369 | spin_unlock_irqrestore(&list->lock, flags); | ||
| 1370 | } | ||
| 1371 | } | 1406 | } |
| 1372 | 1407 | ||
| 1373 | |||
| 1374 | /** | 1408 | /** |
| 1375 | * skb_append - append a buffer | 1409 | * skb_append - append a buffer |
| 1376 | * @old: buffer to insert after | 1410 | * @old: buffer to insert after |
| 1377 | * @newsk: buffer to insert | 1411 | * @newsk: buffer to insert |
| 1412 | * @list: list to use | ||
| 1378 | * | 1413 | * |
| 1379 | * Place a packet after a given packet in a list. The list locks are taken | 1414 | * Place a packet after a given packet in a list. The list locks are taken |
| 1380 | * and this function is atomic with respect to other list locked calls. | 1415 | * and this function is atomic with respect to other list locked calls. |
| 1381 | * A buffer cannot be placed on two lists at the same time. | 1416 | * A buffer cannot be placed on two lists at the same time. |
| 1382 | */ | 1417 | */ |
| 1383 | 1418 | void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
| 1384 | void skb_append(struct sk_buff *old, struct sk_buff *newsk) | ||
| 1385 | { | 1419 | { |
| 1386 | unsigned long flags; | 1420 | unsigned long flags; |
| 1387 | 1421 | ||
| 1388 | spin_lock_irqsave(&old->list->lock, flags); | 1422 | spin_lock_irqsave(&list->lock, flags); |
| 1389 | __skb_append(old, newsk); | 1423 | __skb_append(old, newsk, list); |
| 1390 | spin_unlock_irqrestore(&old->list->lock, flags); | 1424 | spin_unlock_irqrestore(&list->lock, flags); |
| 1391 | } | 1425 | } |
| 1392 | 1426 | ||
| 1393 | 1427 | ||
| @@ -1395,19 +1429,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) | |||
| 1395 | * skb_insert - insert a buffer | 1429 | * skb_insert - insert a buffer |
| 1396 | * @old: buffer to insert before | 1430 | * @old: buffer to insert before |
| 1397 | * @newsk: buffer to insert | 1431 | * @newsk: buffer to insert |
| 1432 | * @list: list to use | ||
| 1433 | * | ||
| 1434 | * Place a packet before a given packet in a list. The list locks are | ||
| 1435 | * taken and this function is atomic with respect to other list locked | ||
| 1436 | * calls. | ||
| 1398 | * | 1437 | * |
| 1399 | * Place a packet before a given packet in a list. The list locks are taken | ||
| 1400 | * and this function is atomic with respect to other list locked calls | ||
| 1401 | * A buffer cannot be placed on two lists at the same time. | 1438 | * A buffer cannot be placed on two lists at the same time. |
| 1402 | */ | 1439 | */ |
| 1403 | 1440 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) | |
| 1404 | void skb_insert(struct sk_buff *old, struct sk_buff *newsk) | ||
| 1405 | { | 1441 | { |
| 1406 | unsigned long flags; | 1442 | unsigned long flags; |
| 1407 | 1443 | ||
| 1408 | spin_lock_irqsave(&old->list->lock, flags); | 1444 | spin_lock_irqsave(&list->lock, flags); |
| 1409 | __skb_insert(newsk, old->prev, old, old->list); | 1445 | __skb_insert(newsk, old->prev, old, list); |
| 1410 | spin_unlock_irqrestore(&old->list->lock, flags); | 1446 | spin_unlock_irqrestore(&list->lock, flags); |
| 1411 | } | 1447 | } |
| 1412 | 1448 | ||
| 1413 | #if 0 | 1449 | #if 0 |
| @@ -1663,12 +1699,23 @@ void __init skb_init(void) | |||
| 1663 | NULL, NULL); | 1699 | NULL, NULL); |
| 1664 | if (!skbuff_head_cache) | 1700 | if (!skbuff_head_cache) |
| 1665 | panic("cannot create skbuff cache"); | 1701 | panic("cannot create skbuff cache"); |
| 1702 | |||
| 1703 | skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", | ||
| 1704 | (2*sizeof(struct sk_buff)) + | ||
| 1705 | sizeof(atomic_t), | ||
| 1706 | 0, | ||
| 1707 | SLAB_HWCACHE_ALIGN, | ||
| 1708 | NULL, NULL); | ||
| 1709 | if (!skbuff_fclone_cache) | ||
| 1710 | panic("cannot create skbuff cache"); | ||
| 1711 | |||
| 1712 | do_gettimeofday(&skb_tv_base); | ||
| 1666 | } | 1713 | } |
| 1667 | 1714 | ||
| 1668 | EXPORT_SYMBOL(___pskb_trim); | 1715 | EXPORT_SYMBOL(___pskb_trim); |
| 1669 | EXPORT_SYMBOL(__kfree_skb); | 1716 | EXPORT_SYMBOL(__kfree_skb); |
| 1670 | EXPORT_SYMBOL(__pskb_pull_tail); | 1717 | EXPORT_SYMBOL(__pskb_pull_tail); |
| 1671 | EXPORT_SYMBOL(alloc_skb); | 1718 | EXPORT_SYMBOL(__alloc_skb); |
| 1672 | EXPORT_SYMBOL(pskb_copy); | 1719 | EXPORT_SYMBOL(pskb_copy); |
| 1673 | EXPORT_SYMBOL(pskb_expand_head); | 1720 | EXPORT_SYMBOL(pskb_expand_head); |
| 1674 | EXPORT_SYMBOL(skb_checksum); | 1721 | EXPORT_SYMBOL(skb_checksum); |
| @@ -1696,3 +1743,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); | |||
| 1696 | EXPORT_SYMBOL(skb_seq_read); | 1743 | EXPORT_SYMBOL(skb_seq_read); |
| 1697 | EXPORT_SYMBOL(skb_abort_seq_read); | 1744 | EXPORT_SYMBOL(skb_abort_seq_read); |
| 1698 | EXPORT_SYMBOL(skb_find_text); | 1745 | EXPORT_SYMBOL(skb_find_text); |
| 1746 | EXPORT_SYMBOL(skb_tv_base); | ||
