diff options
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r-- | net/core/skbuff.c | 124 |
1 files changed, 99 insertions, 25 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4f..fe00d1208167 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
145 | BUG(); | 145 | BUG(); |
146 | } | 146 | } |
147 | 147 | ||
148 | |||
149 | /* | ||
150 | * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells | ||
151 | * the caller if emergency pfmemalloc reserves are being used. If it is and | ||
152 | * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves | ||
153 | * may be used. Otherwise, the packet data may be discarded until enough | ||
154 | * memory is free | ||
155 | */ | ||
156 | #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ | ||
157 | __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) | ||
158 | void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, | ||
159 | bool *pfmemalloc) | ||
160 | { | ||
161 | void *obj; | ||
162 | bool ret_pfmemalloc = false; | ||
163 | |||
164 | /* | ||
165 | * Try a regular allocation, when that fails and we're not entitled | ||
166 | * to the reserves, fail. | ||
167 | */ | ||
168 | obj = kmalloc_node_track_caller(size, | ||
169 | flags | __GFP_NOMEMALLOC | __GFP_NOWARN, | ||
170 | node); | ||
171 | if (obj || !(gfp_pfmemalloc_allowed(flags))) | ||
172 | goto out; | ||
173 | |||
174 | /* Try again but now we are using pfmemalloc reserves */ | ||
175 | ret_pfmemalloc = true; | ||
176 | obj = kmalloc_node_track_caller(size, flags, node); | ||
177 | |||
178 | out: | ||
179 | if (pfmemalloc) | ||
180 | *pfmemalloc = ret_pfmemalloc; | ||
181 | |||
182 | return obj; | ||
183 | } | ||
184 | |||
148 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | 185 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
149 | * 'private' fields and also do memory statistics to find all the | 186 | * 'private' fields and also do memory statistics to find all the |
150 | * [BEEP] leaks. | 187 | * [BEEP] leaks. |
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
155 | * __alloc_skb - allocate a network buffer | 192 | * __alloc_skb - allocate a network buffer |
156 | * @size: size to allocate | 193 | * @size: size to allocate |
157 | * @gfp_mask: allocation mask | 194 | * @gfp_mask: allocation mask |
158 | * @fclone: allocate from fclone cache instead of head cache | 195 | * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache |
159 | * and allocate a cloned (child) skb | 196 | * instead of head cache and allocate a cloned (child) skb. |
197 | * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for | ||
198 | * allocations in case the data is required for writeback | ||
160 | * @node: numa node to allocate memory on | 199 | * @node: numa node to allocate memory on |
161 | * | 200 | * |
162 | * Allocate a new &sk_buff. The returned buffer has no headroom and a | 201 | * Allocate a new &sk_buff. The returned buffer has no headroom and a |
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
167 | * %GFP_ATOMIC. | 206 | * %GFP_ATOMIC. |
168 | */ | 207 | */ |
169 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | 208 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, |
170 | int fclone, int node) | 209 | int flags, int node) |
171 | { | 210 | { |
172 | struct kmem_cache *cache; | 211 | struct kmem_cache *cache; |
173 | struct skb_shared_info *shinfo; | 212 | struct skb_shared_info *shinfo; |
174 | struct sk_buff *skb; | 213 | struct sk_buff *skb; |
175 | u8 *data; | 214 | u8 *data; |
215 | bool pfmemalloc; | ||
176 | 216 | ||
177 | cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; | 217 | cache = (flags & SKB_ALLOC_FCLONE) |
218 | ? skbuff_fclone_cache : skbuff_head_cache; | ||
219 | |||
220 | if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) | ||
221 | gfp_mask |= __GFP_MEMALLOC; | ||
178 | 222 | ||
179 | /* Get the HEAD */ | 223 | /* Get the HEAD */ |
180 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 224 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
189 | */ | 233 | */ |
190 | size = SKB_DATA_ALIGN(size); | 234 | size = SKB_DATA_ALIGN(size); |
191 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 235 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
192 | data = kmalloc_node_track_caller(size, gfp_mask, node); | 236 | data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); |
193 | if (!data) | 237 | if (!data) |
194 | goto nodata; | 238 | goto nodata; |
195 | /* kmalloc(size) might give us more room than requested. | 239 | /* kmalloc(size) might give us more room than requested. |
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
207 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 251 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
208 | /* Account for allocated memory : skb + skb->head */ | 252 | /* Account for allocated memory : skb + skb->head */ |
209 | skb->truesize = SKB_TRUESIZE(size); | 253 | skb->truesize = SKB_TRUESIZE(size); |
254 | skb->pfmemalloc = pfmemalloc; | ||
210 | atomic_set(&skb->users, 1); | 255 | atomic_set(&skb->users, 1); |
211 | skb->head = data; | 256 | skb->head = data; |
212 | skb->data = data; | 257 | skb->data = data; |
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
222 | atomic_set(&shinfo->dataref, 1); | 267 | atomic_set(&shinfo->dataref, 1); |
223 | kmemcheck_annotate_variable(shinfo->destructor_arg); | 268 | kmemcheck_annotate_variable(shinfo->destructor_arg); |
224 | 269 | ||
225 | if (fclone) { | 270 | if (flags & SKB_ALLOC_FCLONE) { |
226 | struct sk_buff *child = skb + 1; | 271 | struct sk_buff *child = skb + 1; |
227 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | 272 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
228 | 273 | ||
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
232 | atomic_set(fclone_ref, 1); | 277 | atomic_set(fclone_ref, 1); |
233 | 278 | ||
234 | child->fclone = SKB_FCLONE_UNAVAILABLE; | 279 | child->fclone = SKB_FCLONE_UNAVAILABLE; |
280 | child->pfmemalloc = pfmemalloc; | ||
235 | } | 281 | } |
236 | out: | 282 | out: |
237 | return skb; | 283 | return skb; |
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | |||
302 | 348 | ||
303 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) | 349 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) |
304 | 350 | ||
305 | /** | 351 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
306 | * netdev_alloc_frag - allocate a page fragment | ||
307 | * @fragsz: fragment size | ||
308 | * | ||
309 | * Allocates a frag from a page for receive buffer. | ||
310 | * Uses GFP_ATOMIC allocations. | ||
311 | */ | ||
312 | void *netdev_alloc_frag(unsigned int fragsz) | ||
313 | { | 352 | { |
314 | struct netdev_alloc_cache *nc; | 353 | struct netdev_alloc_cache *nc; |
315 | void *data = NULL; | 354 | void *data = NULL; |
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz) | |||
319 | nc = &__get_cpu_var(netdev_alloc_cache); | 358 | nc = &__get_cpu_var(netdev_alloc_cache); |
320 | if (unlikely(!nc->page)) { | 359 | if (unlikely(!nc->page)) { |
321 | refill: | 360 | refill: |
322 | nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); | 361 | nc->page = alloc_page(gfp_mask); |
323 | if (unlikely(!nc->page)) | 362 | if (unlikely(!nc->page)) |
324 | goto end; | 363 | goto end; |
325 | recycle: | 364 | recycle: |
@@ -343,6 +382,18 @@ end: | |||
343 | local_irq_restore(flags); | 382 | local_irq_restore(flags); |
344 | return data; | 383 | return data; |
345 | } | 384 | } |
385 | |||
386 | /** | ||
387 | * netdev_alloc_frag - allocate a page fragment | ||
388 | * @fragsz: fragment size | ||
389 | * | ||
390 | * Allocates a frag from a page for receive buffer. | ||
391 | * Uses GFP_ATOMIC allocations. | ||
392 | */ | ||
393 | void *netdev_alloc_frag(unsigned int fragsz) | ||
394 | { | ||
395 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | ||
396 | } | ||
346 | EXPORT_SYMBOL(netdev_alloc_frag); | 397 | EXPORT_SYMBOL(netdev_alloc_frag); |
347 | 398 | ||
348 | /** | 399 | /** |
@@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
366 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 417 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
367 | 418 | ||
368 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { | 419 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { |
369 | void *data = netdev_alloc_frag(fragsz); | 420 | void *data; |
421 | |||
422 | if (sk_memalloc_socks()) | ||
423 | gfp_mask |= __GFP_MEMALLOC; | ||
424 | |||
425 | data = __netdev_alloc_frag(fragsz, gfp_mask); | ||
370 | 426 | ||
371 | if (likely(data)) { | 427 | if (likely(data)) { |
372 | skb = build_skb(data, fragsz); | 428 | skb = build_skb(data, fragsz); |
@@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
374 | put_page(virt_to_head_page(data)); | 430 | put_page(virt_to_head_page(data)); |
375 | } | 431 | } |
376 | } else { | 432 | } else { |
377 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); | 433 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, |
434 | SKB_ALLOC_RX, NUMA_NO_NODE); | ||
378 | } | 435 | } |
379 | if (likely(skb)) { | 436 | if (likely(skb)) { |
380 | skb_reserve(skb, NET_SKB_PAD); | 437 | skb_reserve(skb, NET_SKB_PAD); |
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
656 | #if IS_ENABLED(CONFIG_IP_VS) | 713 | #if IS_ENABLED(CONFIG_IP_VS) |
657 | new->ipvs_property = old->ipvs_property; | 714 | new->ipvs_property = old->ipvs_property; |
658 | #endif | 715 | #endif |
716 | new->pfmemalloc = old->pfmemalloc; | ||
659 | new->protocol = old->protocol; | 717 | new->protocol = old->protocol; |
660 | new->mark = old->mark; | 718 | new->mark = old->mark; |
661 | new->skb_iif = old->skb_iif; | 719 | new->skb_iif = old->skb_iif; |
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
814 | n->fclone = SKB_FCLONE_CLONE; | 872 | n->fclone = SKB_FCLONE_CLONE; |
815 | atomic_inc(fclone_ref); | 873 | atomic_inc(fclone_ref); |
816 | } else { | 874 | } else { |
875 | if (skb_pfmemalloc(skb)) | ||
876 | gfp_mask |= __GFP_MEMALLOC; | ||
877 | |||
817 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 878 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); |
818 | if (!n) | 879 | if (!n) |
819 | return NULL; | 880 | return NULL; |
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
850 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; | 911 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; |
851 | } | 912 | } |
852 | 913 | ||
914 | static inline int skb_alloc_rx_flag(const struct sk_buff *skb) | ||
915 | { | ||
916 | if (skb_pfmemalloc(skb)) | ||
917 | return SKB_ALLOC_RX; | ||
918 | return 0; | ||
919 | } | ||
920 | |||
853 | /** | 921 | /** |
854 | * skb_copy - create private copy of an sk_buff | 922 | * skb_copy - create private copy of an sk_buff |
855 | * @skb: buffer to copy | 923 | * @skb: buffer to copy |
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |||
871 | { | 939 | { |
872 | int headerlen = skb_headroom(skb); | 940 | int headerlen = skb_headroom(skb); |
873 | unsigned int size = skb_end_offset(skb) + skb->data_len; | 941 | unsigned int size = skb_end_offset(skb) + skb->data_len; |
874 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 942 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
943 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
875 | 944 | ||
876 | if (!n) | 945 | if (!n) |
877 | return NULL; | 946 | return NULL; |
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy); | |||
906 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) | 975 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) |
907 | { | 976 | { |
908 | unsigned int size = skb_headlen(skb) + headroom; | 977 | unsigned int size = skb_headlen(skb) + headroom; |
909 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 978 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
979 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
910 | 980 | ||
911 | if (!n) | 981 | if (!n) |
912 | goto out; | 982 | goto out; |
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |||
979 | 1049 | ||
980 | size = SKB_DATA_ALIGN(size); | 1050 | size = SKB_DATA_ALIGN(size); |
981 | 1051 | ||
982 | data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | 1052 | if (skb_pfmemalloc(skb)) |
983 | gfp_mask); | 1053 | gfp_mask |= __GFP_MEMALLOC; |
1054 | data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | ||
1055 | gfp_mask, NUMA_NO_NODE, NULL); | ||
984 | if (!data) | 1056 | if (!data) |
985 | goto nodata; | 1057 | goto nodata; |
986 | size = SKB_WITH_OVERHEAD(ksize(data)); | 1058 | size = SKB_WITH_OVERHEAD(ksize(data)); |
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | |||
1092 | /* | 1164 | /* |
1093 | * Allocate the copy buffer | 1165 | * Allocate the copy buffer |
1094 | */ | 1166 | */ |
1095 | struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, | 1167 | struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, |
1096 | gfp_mask); | 1168 | gfp_mask, skb_alloc_rx_flag(skb), |
1169 | NUMA_NO_NODE); | ||
1097 | int oldheadroom = skb_headroom(skb); | 1170 | int oldheadroom = skb_headroom(skb); |
1098 | int head_copy_len, head_copy_off; | 1171 | int head_copy_len, head_copy_off; |
1099 | int off; | 1172 | int off; |
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
2775 | skb_release_head_state(nskb); | 2848 | skb_release_head_state(nskb); |
2776 | __skb_push(nskb, doffset); | 2849 | __skb_push(nskb, doffset); |
2777 | } else { | 2850 | } else { |
2778 | nskb = alloc_skb(hsize + doffset + headroom, | 2851 | nskb = __alloc_skb(hsize + doffset + headroom, |
2779 | GFP_ATOMIC); | 2852 | GFP_ATOMIC, skb_alloc_rx_flag(skb), |
2853 | NUMA_NO_NODE); | ||
2780 | 2854 | ||
2781 | if (unlikely(!nskb)) | 2855 | if (unlikely(!nskb)) |
2782 | goto err; | 2856 | goto err; |