aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/skbuff.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/core/skbuff.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r--net/core/skbuff.c147
1 files changed, 76 insertions, 71 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c83b421341c0..46cbd28f40f9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -57,6 +57,7 @@
57#include <linux/init.h> 57#include <linux/init.h>
58#include <linux/scatterlist.h> 58#include <linux/scatterlist.h>
59#include <linux/errqueue.h> 59#include <linux/errqueue.h>
60#include <linux/prefetch.h>
60 61
61#include <net/protocol.h> 62#include <net/protocol.h>
62#include <net/dst.h> 63#include <net/dst.h>
@@ -202,8 +203,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
202 skb->data = data; 203 skb->data = data;
203 skb_reset_tail_pointer(skb); 204 skb_reset_tail_pointer(skb);
204 skb->end = skb->tail + size; 205 skb->end = skb->tail + size;
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
207#ifdef NET_SKBUFF_DATA_USES_OFFSET 206#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U; 207 skb->mac_header = ~0U;
209#endif 208#endif
@@ -212,6 +211,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
212 shinfo = skb_shinfo(skb); 211 shinfo = skb_shinfo(skb);
213 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); 212 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
214 atomic_set(&shinfo->dataref, 1); 213 atomic_set(&shinfo->dataref, 1);
214 kmemcheck_annotate_variable(shinfo->destructor_arg);
215 215
216 if (fclone) { 216 if (fclone) {
217 struct sk_buff *child = skb + 1; 217 struct sk_buff *child = skb + 1;
@@ -249,10 +249,9 @@ EXPORT_SYMBOL(__alloc_skb);
249struct sk_buff *__netdev_alloc_skb(struct net_device *dev, 249struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
250 unsigned int length, gfp_t gfp_mask) 250 unsigned int length, gfp_t gfp_mask)
251{ 251{
252 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
253 struct sk_buff *skb; 252 struct sk_buff *skb;
254 253
255 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); 254 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
256 if (likely(skb)) { 255 if (likely(skb)) {
257 skb_reserve(skb, NET_SKB_PAD); 256 skb_reserve(skb, NET_SKB_PAD);
258 skb->dev = dev; 257 skb->dev = dev;
@@ -261,16 +260,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
261} 260}
262EXPORT_SYMBOL(__netdev_alloc_skb); 261EXPORT_SYMBOL(__netdev_alloc_skb);
263 262
264struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
265{
266 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
267 struct page *page;
268
269 page = alloc_pages_node(node, gfp_mask, 0);
270 return page;
271}
272EXPORT_SYMBOL(__netdev_alloc_page);
273
274void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, 263void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
275 int size) 264 int size)
276{ 265{
@@ -340,7 +329,7 @@ static void skb_release_data(struct sk_buff *skb)
340 put_page(skb_shinfo(skb)->frags[i].page); 329 put_page(skb_shinfo(skb)->frags[i].page);
341 } 330 }
342 331
343 if (skb_has_frags(skb)) 332 if (skb_has_frag_list(skb))
344 skb_drop_fraglist(skb); 333 skb_drop_fraglist(skb);
345 334
346 kfree(skb->head); 335 kfree(skb->head);
@@ -393,6 +382,8 @@ static void skb_release_head_state(struct sk_buff *skb)
393 } 382 }
394#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 383#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
395 nf_conntrack_put(skb->nfct); 384 nf_conntrack_put(skb->nfct);
385#endif
386#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
396 nf_conntrack_put_reasm(skb->nfct_reasm); 387 nf_conntrack_put_reasm(skb->nfct_reasm);
397#endif 388#endif
398#ifdef CONFIG_BRIDGE_NETFILTER 389#ifdef CONFIG_BRIDGE_NETFILTER
@@ -466,6 +457,7 @@ void consume_skb(struct sk_buff *skb)
466 smp_rmb(); 457 smp_rmb();
467 else if (likely(!atomic_dec_and_test(&skb->users))) 458 else if (likely(!atomic_dec_and_test(&skb->users)))
468 return; 459 return;
460 trace_consume_skb(skb);
469 __kfree_skb(skb); 461 __kfree_skb(skb);
470} 462}
471EXPORT_SYMBOL(consume_skb); 463EXPORT_SYMBOL(consume_skb);
@@ -532,7 +524,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
532 new->ip_summed = old->ip_summed; 524 new->ip_summed = old->ip_summed;
533 skb_copy_queue_mapping(new, old); 525 skb_copy_queue_mapping(new, old);
534 new->priority = old->priority; 526 new->priority = old->priority;
535 new->deliver_no_wcard = old->deliver_no_wcard;
536#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 527#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
537 new->ipvs_property = old->ipvs_property; 528 new->ipvs_property = old->ipvs_property;
538#endif 529#endif
@@ -685,16 +676,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
685 676
686struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 677struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
687{ 678{
688 int headerlen = skb->data - skb->head; 679 int headerlen = skb_headroom(skb);
689 /* 680 unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
690 * Allocate the copy buffer 681 struct sk_buff *n = alloc_skb(size, gfp_mask);
691 */ 682
692 struct sk_buff *n;
693#ifdef NET_SKBUFF_DATA_USES_OFFSET
694 n = alloc_skb(skb->end + skb->data_len, gfp_mask);
695#else
696 n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
697#endif
698 if (!n) 683 if (!n)
699 return NULL; 684 return NULL;
700 685
@@ -726,20 +711,14 @@ EXPORT_SYMBOL(skb_copy);
726 711
727struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 712struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
728{ 713{
729 /* 714 unsigned int size = skb_end_pointer(skb) - skb->head;
730 * Allocate the copy buffer 715 struct sk_buff *n = alloc_skb(size, gfp_mask);
731 */ 716
732 struct sk_buff *n;
733#ifdef NET_SKBUFF_DATA_USES_OFFSET
734 n = alloc_skb(skb->end, gfp_mask);
735#else
736 n = alloc_skb(skb->end - skb->head, gfp_mask);
737#endif
738 if (!n) 717 if (!n)
739 goto out; 718 goto out;
740 719
741 /* Set the data pointer */ 720 /* Set the data pointer */
742 skb_reserve(n, skb->data - skb->head); 721 skb_reserve(n, skb_headroom(skb));
743 /* Set the tail pointer and length */ 722 /* Set the tail pointer and length */
744 skb_put(n, skb_headlen(skb)); 723 skb_put(n, skb_headlen(skb));
745 /* Copy the bytes */ 724 /* Copy the bytes */
@@ -759,7 +738,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
759 skb_shinfo(n)->nr_frags = i; 738 skb_shinfo(n)->nr_frags = i;
760 } 739 }
761 740
762 if (skb_has_frags(skb)) { 741 if (skb_has_frag_list(skb)) {
763 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 742 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
764 skb_clone_fraglist(n); 743 skb_clone_fraglist(n);
765 } 744 }
@@ -791,12 +770,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
791{ 770{
792 int i; 771 int i;
793 u8 *data; 772 u8 *data;
794#ifdef NET_SKBUFF_DATA_USES_OFFSET 773 int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
795 int size = nhead + skb->end + ntail;
796#else
797 int size = nhead + (skb->end - skb->head) + ntail;
798#endif
799 long off; 774 long off;
775 bool fastpath;
800 776
801 BUG_ON(nhead < 0); 777 BUG_ON(nhead < 0);
802 778
@@ -805,31 +781,56 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
805 781
806 size = SKB_DATA_ALIGN(size); 782 size = SKB_DATA_ALIGN(size);
807 783
784 /* Check if we can avoid taking references on fragments if we own
785 * the last reference on skb->head. (see skb_release_data())
786 */
787 if (!skb->cloned)
788 fastpath = true;
789 else {
790 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
791
792 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
793 }
794
795 if (fastpath &&
796 size + sizeof(struct skb_shared_info) <= ksize(skb->head)) {
797 memmove(skb->head + size, skb_shinfo(skb),
798 offsetof(struct skb_shared_info,
799 frags[skb_shinfo(skb)->nr_frags]));
800 memmove(skb->head + nhead, skb->head,
801 skb_tail_pointer(skb) - skb->head);
802 off = nhead;
803 goto adjust_others;
804 }
805
808 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 806 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
809 if (!data) 807 if (!data)
810 goto nodata; 808 goto nodata;
811 809
812 /* Copy only real data... and, alas, header. This should be 810 /* Copy only real data... and, alas, header. This should be
813 * optimized for the cases when header is void. */ 811 * optimized for the cases when header is void.
814#ifdef NET_SKBUFF_DATA_USES_OFFSET 812 */
815 memcpy(data + nhead, skb->head, skb->tail); 813 memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
816#else
817 memcpy(data + nhead, skb->head, skb->tail - skb->head);
818#endif
819 memcpy(data + size, skb_end_pointer(skb),
820 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
821 814
822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 815 memcpy((struct skb_shared_info *)(data + size),
823 get_page(skb_shinfo(skb)->frags[i].page); 816 skb_shinfo(skb),
817 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
824 818
825 if (skb_has_frags(skb)) 819 if (fastpath) {
826 skb_clone_fraglist(skb); 820 kfree(skb->head);
821 } else {
822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
823 get_page(skb_shinfo(skb)->frags[i].page);
827 824
828 skb_release_data(skb); 825 if (skb_has_frag_list(skb))
826 skb_clone_fraglist(skb);
829 827
828 skb_release_data(skb);
829 }
830 off = (data + nhead) - skb->head; 830 off = (data + nhead) - skb->head;
831 831
832 skb->head = data; 832 skb->head = data;
833adjust_others:
833 skb->data += off; 834 skb->data += off;
834#ifdef NET_SKBUFF_DATA_USES_OFFSET 835#ifdef NET_SKBUFF_DATA_USES_OFFSET
835 skb->end = size; 836 skb->end = size;
@@ -1099,7 +1100,7 @@ drop_pages:
1099 for (; i < nfrags; i++) 1100 for (; i < nfrags; i++)
1100 put_page(skb_shinfo(skb)->frags[i].page); 1101 put_page(skb_shinfo(skb)->frags[i].page);
1101 1102
1102 if (skb_has_frags(skb)) 1103 if (skb_has_frag_list(skb))
1103 skb_drop_fraglist(skb); 1104 skb_drop_fraglist(skb);
1104 goto done; 1105 goto done;
1105 } 1106 }
@@ -1194,7 +1195,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1194 /* Optimization: no fragments, no reasons to preestimate 1195 /* Optimization: no fragments, no reasons to preestimate
1195 * size of pulled pages. Superb. 1196 * size of pulled pages. Superb.
1196 */ 1197 */
1197 if (!skb_has_frags(skb)) 1198 if (!skb_has_frag_list(skb))
1198 goto pull_pages; 1199 goto pull_pages;
1199 1200
1200 /* Estimate size of pulled pages. */ 1201 /* Estimate size of pulled pages. */
@@ -1826,7 +1827,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1826 long csstart; 1827 long csstart;
1827 1828
1828 if (skb->ip_summed == CHECKSUM_PARTIAL) 1829 if (skb->ip_summed == CHECKSUM_PARTIAL)
1829 csstart = skb->csum_start - skb_headroom(skb); 1830 csstart = skb_checksum_start_offset(skb);
1830 else 1831 else
1831 csstart = skb_headlen(skb); 1832 csstart = skb_headlen(skb);
1832 1833
@@ -2267,7 +2268,7 @@ EXPORT_SYMBOL(skb_prepare_seq_read);
2267 * of bytes already consumed and the next call to 2268 * of bytes already consumed and the next call to
2268 * skb_seq_read() will return the remaining part of the block. 2269 * skb_seq_read() will return the remaining part of the block.
2269 * 2270 *
2270 * Note 1: The size of each block of data returned can be arbitary, 2271 * Note 1: The size of each block of data returned can be arbitrary,
2271 * this limitation is the cost for zerocopy seqeuental 2272 * this limitation is the cost for zerocopy seqeuental
2272 * reads of potentially non linear data. 2273 * reads of potentially non linear data.
2273 * 2274 *
@@ -2323,7 +2324,7 @@ next_skb:
2323 st->frag_data = NULL; 2324 st->frag_data = NULL;
2324 } 2325 }
2325 2326
2326 if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { 2327 if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
2327 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 2328 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2328 st->frag_idx = 0; 2329 st->frag_idx = 0;
2329 goto next_skb; 2330 goto next_skb;
@@ -2433,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2433 return -ENOMEM; 2434 return -ENOMEM;
2434 2435
2435 /* initialize the next frag */ 2436 /* initialize the next frag */
2436 sk->sk_sndmsg_page = page;
2437 sk->sk_sndmsg_off = 0;
2438 skb_fill_page_desc(skb, frg_cnt, page, 0, 0); 2437 skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
2439 skb->truesize += PAGE_SIZE; 2438 skb->truesize += PAGE_SIZE;
2440 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); 2439 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
@@ -2454,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2454 return -EFAULT; 2453 return -EFAULT;
2455 2454
2456 /* copy was successful so update the size parameters */ 2455 /* copy was successful so update the size parameters */
2457 sk->sk_sndmsg_off += copy;
2458 frag->size += copy; 2456 frag->size += copy;
2459 skb->len += copy; 2457 skb->len += copy;
2460 skb->data_len += copy; 2458 skb->data_len += copy;
@@ -2497,7 +2495,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2497 * a pointer to the first in a list of new skbs for the segments. 2495 * a pointer to the first in a list of new skbs for the segments.
2498 * In case of error it returns ERR_PTR(err). 2496 * In case of error it returns ERR_PTR(err).
2499 */ 2497 */
2500struct sk_buff *skb_segment(struct sk_buff *skb, int features) 2498struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
2501{ 2499{
2502 struct sk_buff *segs = NULL; 2500 struct sk_buff *segs = NULL;
2503 struct sk_buff *tail = NULL; 2501 struct sk_buff *tail = NULL;
@@ -2507,7 +2505,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2507 unsigned int offset = doffset; 2505 unsigned int offset = doffset;
2508 unsigned int headroom; 2506 unsigned int headroom;
2509 unsigned int len; 2507 unsigned int len;
2510 int sg = features & NETIF_F_SG; 2508 int sg = !!(features & NETIF_F_SG);
2511 int nfrags = skb_shinfo(skb)->nr_frags; 2509 int nfrags = skb_shinfo(skb)->nr_frags;
2512 int err = -ENOMEM; 2510 int err = -ENOMEM;
2513 int i = 0; 2511 int i = 0;
@@ -2744,8 +2742,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2744 2742
2745merge: 2743merge:
2746 if (offset > headlen) { 2744 if (offset > headlen) {
2747 skbinfo->frags[0].page_offset += offset - headlen; 2745 unsigned int eat = offset - headlen;
2748 skbinfo->frags[0].size -= offset - headlen; 2746
2747 skbinfo->frags[0].page_offset += eat;
2748 skbinfo->frags[0].size -= eat;
2749 skb->data_len -= eat;
2750 skb->len -= eat;
2749 offset = headlen; 2751 offset = headlen;
2750 } 2752 }
2751 2753
@@ -2893,7 +2895,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2893 return -ENOMEM; 2895 return -ENOMEM;
2894 2896
2895 /* Easy case. Most of packets will go this way. */ 2897 /* Easy case. Most of packets will go this way. */
2896 if (!skb_has_frags(skb)) { 2898 if (!skb_has_frag_list(skb)) {
2897 /* A little of trouble, not enough of space for trailer. 2899 /* A little of trouble, not enough of space for trailer.
2898 * This should not happen, when stack is tuned to generate 2900 * This should not happen, when stack is tuned to generate
2899 * good frames. OK, on miss we reallocate and reserve even more 2901 * good frames. OK, on miss we reallocate and reserve even more
@@ -2928,7 +2930,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2928 2930
2929 if (skb1->next == NULL && tailbits) { 2931 if (skb1->next == NULL && tailbits) {
2930 if (skb_shinfo(skb1)->nr_frags || 2932 if (skb_shinfo(skb1)->nr_frags ||
2931 skb_has_frags(skb1) || 2933 skb_has_frag_list(skb1) ||
2932 skb_tailroom(skb1) < tailbits) 2934 skb_tailroom(skb1) < tailbits)
2933 ntail = tailbits + 128; 2935 ntail = tailbits + 128;
2934 } 2936 }
@@ -2937,7 +2939,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2937 skb_cloned(skb1) || 2939 skb_cloned(skb1) ||
2938 ntail || 2940 ntail ||
2939 skb_shinfo(skb1)->nr_frags || 2941 skb_shinfo(skb1)->nr_frags ||
2940 skb_has_frags(skb1)) { 2942 skb_has_frag_list(skb1)) {
2941 struct sk_buff *skb2; 2943 struct sk_buff *skb2;
2942 2944
2943 /* Fuck, we are miserable poor guys... */ 2945 /* Fuck, we are miserable poor guys... */
@@ -2992,6 +2994,9 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
2992 skb->destructor = sock_rmem_free; 2994 skb->destructor = sock_rmem_free;
2993 atomic_add(skb->truesize, &sk->sk_rmem_alloc); 2995 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
2994 2996
2997 /* before exiting rcu section, make sure dst is refcounted */
2998 skb_dst_force(skb);
2999
2995 skb_queue_tail(&sk->sk_error_queue, skb); 3000 skb_queue_tail(&sk->sk_error_queue, skb);
2996 if (!sock_flag(sk, SOCK_DEAD)) 3001 if (!sock_flag(sk, SOCK_DEAD))
2997 sk->sk_data_ready(sk, skb->len); 3002 sk->sk_data_ready(sk, skb->len);
@@ -3020,7 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3020 } else { 3025 } else {
3021 /* 3026 /*
3022 * no hardware time stamps available, 3027 * no hardware time stamps available,
3023 * so keep the skb_shared_tx and only 3028 * so keep the shared tx_flags and only
3024 * store software time stamp 3029 * store software time stamp
3025 */ 3030 */
3026 skb->tstamp = ktime_get_real(); 3031 skb->tstamp = ktime_get_real();