aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 22:25:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 22:25:39 -0400
commitac694dbdbc403c00e2c14d10bc7b8412cc378259 (patch)
treee37328cfbeaf43716dd5914cad9179e57e84df76 /net/core
parenta40a1d3d0a2fd613fdec6d89d3c053268ced76ed (diff)
parent437ea90cc3afdca5229b41c6b1d38c4842756cb9 (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge Andrew's second set of patches: - MM - a few random fixes - a couple of RTC leftovers * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (120 commits) rtc/rtc-88pm80x: remove unneed devm_kfree rtc/rtc-88pm80x: assign ret only when rtc_register_driver fails mm: hugetlbfs: close race during teardown of hugetlbfs shared page tables tmpfs: distribute interleave better across nodes mm: remove redundant initialization mm: warn if pg_data_t isn't initialized with zero mips: zero out pg_data_t when it's allocated memcg: gix memory accounting scalability in shrink_page_list mm/sparse: remove index_init_lock mm/sparse: more checks on mem_section number mm/sparse: optimize sparse_index_alloc memcg: add mem_cgroup_from_css() helper memcg: further prevent OOM with too many dirty pages memcg: prevent OOM with too many dirty pages mm: mmu_notifier: fix freed page still mapped in secondary MMU mm: memcg: only check anon swapin page charges for swap cache mm: memcg: only check swap cache pages for repeated charging mm: memcg: split swapin charge function into private and public part mm: memcg: remove needless !mm fixup to init_mm when charging mm: memcg: remove unneeded shmem charge type ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c53
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/skbuff.c124
-rw-r--r--net/core/sock.c59
4 files changed, 211 insertions, 33 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index c8569f826b71..0cb3fe8d8e72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3156,6 +3156,23 @@ void netdev_rx_handler_unregister(struct net_device *dev)
3156} 3156}
3157EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3157EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3158 3158
3159/*
3160 * Limit the use of PFMEMALLOC reserves to those protocols that implement
3161 * the special handling of PFMEMALLOC skbs.
3162 */
3163static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3164{
3165 switch (skb->protocol) {
3166 case __constant_htons(ETH_P_ARP):
3167 case __constant_htons(ETH_P_IP):
3168 case __constant_htons(ETH_P_IPV6):
3169 case __constant_htons(ETH_P_8021Q):
3170 return true;
3171 default:
3172 return false;
3173 }
3174}
3175
3159static int __netif_receive_skb(struct sk_buff *skb) 3176static int __netif_receive_skb(struct sk_buff *skb)
3160{ 3177{
3161 struct packet_type *ptype, *pt_prev; 3178 struct packet_type *ptype, *pt_prev;
@@ -3165,14 +3182,27 @@ static int __netif_receive_skb(struct sk_buff *skb)
3165 bool deliver_exact = false; 3182 bool deliver_exact = false;
3166 int ret = NET_RX_DROP; 3183 int ret = NET_RX_DROP;
3167 __be16 type; 3184 __be16 type;
3185 unsigned long pflags = current->flags;
3168 3186
3169 net_timestamp_check(!netdev_tstamp_prequeue, skb); 3187 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3170 3188
3171 trace_netif_receive_skb(skb); 3189 trace_netif_receive_skb(skb);
3172 3190
3191 /*
3192 * PFMEMALLOC skbs are special, they should
3193 * - be delivered to SOCK_MEMALLOC sockets only
3194 * - stay away from userspace
3195 * - have bounded memory usage
3196 *
3197 * Use PF_MEMALLOC as this saves us from propagating the allocation
3198 * context down to all allocation sites.
3199 */
3200 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3201 current->flags |= PF_MEMALLOC;
3202
3173 /* if we've gotten here through NAPI, check netpoll */ 3203 /* if we've gotten here through NAPI, check netpoll */
3174 if (netpoll_receive_skb(skb)) 3204 if (netpoll_receive_skb(skb))
3175 return NET_RX_DROP; 3205 goto out;
3176 3206
3177 orig_dev = skb->dev; 3207 orig_dev = skb->dev;
3178 3208
@@ -3192,7 +3222,7 @@ another_round:
3192 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { 3222 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3193 skb = vlan_untag(skb); 3223 skb = vlan_untag(skb);
3194 if (unlikely(!skb)) 3224 if (unlikely(!skb))
3195 goto out; 3225 goto unlock;
3196 } 3226 }
3197 3227
3198#ifdef CONFIG_NET_CLS_ACT 3228#ifdef CONFIG_NET_CLS_ACT
@@ -3202,6 +3232,9 @@ another_round:
3202 } 3232 }
3203#endif 3233#endif
3204 3234
3235 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3236 goto skip_taps;
3237
3205 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3238 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3206 if (!ptype->dev || ptype->dev == skb->dev) { 3239 if (!ptype->dev || ptype->dev == skb->dev) {
3207 if (pt_prev) 3240 if (pt_prev)
@@ -3210,13 +3243,18 @@ another_round:
3210 } 3243 }
3211 } 3244 }
3212 3245
3246skip_taps:
3213#ifdef CONFIG_NET_CLS_ACT 3247#ifdef CONFIG_NET_CLS_ACT
3214 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 3248 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3215 if (!skb) 3249 if (!skb)
3216 goto out; 3250 goto unlock;
3217ncls: 3251ncls:
3218#endif 3252#endif
3219 3253
3254 if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3255 && !skb_pfmemalloc_protocol(skb))
3256 goto drop;
3257
3220 rx_handler = rcu_dereference(skb->dev->rx_handler); 3258 rx_handler = rcu_dereference(skb->dev->rx_handler);
3221 if (vlan_tx_tag_present(skb)) { 3259 if (vlan_tx_tag_present(skb)) {
3222 if (pt_prev) { 3260 if (pt_prev) {
@@ -3226,7 +3264,7 @@ ncls:
3226 if (vlan_do_receive(&skb, !rx_handler)) 3264 if (vlan_do_receive(&skb, !rx_handler))
3227 goto another_round; 3265 goto another_round;
3228 else if (unlikely(!skb)) 3266 else if (unlikely(!skb))
3229 goto out; 3267 goto unlock;
3230 } 3268 }
3231 3269
3232 if (rx_handler) { 3270 if (rx_handler) {
@@ -3236,7 +3274,7 @@ ncls:
3236 } 3274 }
3237 switch (rx_handler(&skb)) { 3275 switch (rx_handler(&skb)) {
3238 case RX_HANDLER_CONSUMED: 3276 case RX_HANDLER_CONSUMED:
3239 goto out; 3277 goto unlock;
3240 case RX_HANDLER_ANOTHER: 3278 case RX_HANDLER_ANOTHER:
3241 goto another_round; 3279 goto another_round;
3242 case RX_HANDLER_EXACT: 3280 case RX_HANDLER_EXACT:
@@ -3269,6 +3307,7 @@ ncls:
3269 else 3307 else
3270 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3308 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3271 } else { 3309 } else {
3310drop:
3272 atomic_long_inc(&skb->dev->rx_dropped); 3311 atomic_long_inc(&skb->dev->rx_dropped);
3273 kfree_skb(skb); 3312 kfree_skb(skb);
3274 /* Jamal, now you will not able to escape explaining 3313 /* Jamal, now you will not able to escape explaining
@@ -3277,8 +3316,10 @@ ncls:
3277 ret = NET_RX_DROP; 3316 ret = NET_RX_DROP;
3278 } 3317 }
3279 3318
3280out: 3319unlock:
3281 rcu_read_unlock(); 3320 rcu_read_unlock();
3321out:
3322 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3282 return ret; 3323 return ret;
3283} 3324}
3284 3325
diff --git a/net/core/filter.c b/net/core/filter.c
index d4ce2dc712e3..907efd27ec77 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
83 int err; 83 int err;
84 struct sk_filter *filter; 84 struct sk_filter *filter;
85 85
86 /*
87 * If the skb was allocated from pfmemalloc reserves, only
88 * allow SOCK_MEMALLOC sockets to use it as this socket is
89 * helping free memory
90 */
91 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
92 return -ENOMEM;
93
86 err = security_sock_rcv_skb(sk, skb); 94 err = security_sock_rcv_skb(sk, skb);
87 if (err) 95 if (err)
88 return err; 96 return err;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 368f65c15e4f..fe00d1208167 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
145 BUG(); 145 BUG();
146} 146}
147 147
148
149/*
150 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
151 * the caller if emergency pfmemalloc reserves are being used. If it is and
152 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
153 * may be used. Otherwise, the packet data may be discarded until enough
154 * memory is free
155 */
156#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
157 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
158void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
159 bool *pfmemalloc)
160{
161 void *obj;
162 bool ret_pfmemalloc = false;
163
164 /*
165 * Try a regular allocation, when that fails and we're not entitled
166 * to the reserves, fail.
167 */
168 obj = kmalloc_node_track_caller(size,
169 flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
170 node);
171 if (obj || !(gfp_pfmemalloc_allowed(flags)))
172 goto out;
173
174 /* Try again but now we are using pfmemalloc reserves */
175 ret_pfmemalloc = true;
176 obj = kmalloc_node_track_caller(size, flags, node);
177
178out:
179 if (pfmemalloc)
180 *pfmemalloc = ret_pfmemalloc;
181
182 return obj;
183}
184
148/* Allocate a new skbuff. We do this ourselves so we can fill in a few 185/* Allocate a new skbuff. We do this ourselves so we can fill in a few
149 * 'private' fields and also do memory statistics to find all the 186 * 'private' fields and also do memory statistics to find all the
150 * [BEEP] leaks. 187 * [BEEP] leaks.
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
155 * __alloc_skb - allocate a network buffer 192 * __alloc_skb - allocate a network buffer
156 * @size: size to allocate 193 * @size: size to allocate
157 * @gfp_mask: allocation mask 194 * @gfp_mask: allocation mask
158 * @fclone: allocate from fclone cache instead of head cache 195 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
159 * and allocate a cloned (child) skb 196 * instead of head cache and allocate a cloned (child) skb.
197 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
198 * allocations in case the data is required for writeback
160 * @node: numa node to allocate memory on 199 * @node: numa node to allocate memory on
161 * 200 *
162 * Allocate a new &sk_buff. The returned buffer has no headroom and a 201 * Allocate a new &sk_buff. The returned buffer has no headroom and a
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
167 * %GFP_ATOMIC. 206 * %GFP_ATOMIC.
168 */ 207 */
169struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, 208struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
170 int fclone, int node) 209 int flags, int node)
171{ 210{
172 struct kmem_cache *cache; 211 struct kmem_cache *cache;
173 struct skb_shared_info *shinfo; 212 struct skb_shared_info *shinfo;
174 struct sk_buff *skb; 213 struct sk_buff *skb;
175 u8 *data; 214 u8 *data;
215 bool pfmemalloc;
176 216
177 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; 217 cache = (flags & SKB_ALLOC_FCLONE)
218 ? skbuff_fclone_cache : skbuff_head_cache;
219
220 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
221 gfp_mask |= __GFP_MEMALLOC;
178 222
179 /* Get the HEAD */ 223 /* Get the HEAD */
180 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); 224 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
189 */ 233 */
190 size = SKB_DATA_ALIGN(size); 234 size = SKB_DATA_ALIGN(size);
191 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 235 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
192 data = kmalloc_node_track_caller(size, gfp_mask, node); 236 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
193 if (!data) 237 if (!data)
194 goto nodata; 238 goto nodata;
195 /* kmalloc(size) might give us more room than requested. 239 /* kmalloc(size) might give us more room than requested.
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
207 memset(skb, 0, offsetof(struct sk_buff, tail)); 251 memset(skb, 0, offsetof(struct sk_buff, tail));
208 /* Account for allocated memory : skb + skb->head */ 252 /* Account for allocated memory : skb + skb->head */
209 skb->truesize = SKB_TRUESIZE(size); 253 skb->truesize = SKB_TRUESIZE(size);
254 skb->pfmemalloc = pfmemalloc;
210 atomic_set(&skb->users, 1); 255 atomic_set(&skb->users, 1);
211 skb->head = data; 256 skb->head = data;
212 skb->data = data; 257 skb->data = data;
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
222 atomic_set(&shinfo->dataref, 1); 267 atomic_set(&shinfo->dataref, 1);
223 kmemcheck_annotate_variable(shinfo->destructor_arg); 268 kmemcheck_annotate_variable(shinfo->destructor_arg);
224 269
225 if (fclone) { 270 if (flags & SKB_ALLOC_FCLONE) {
226 struct sk_buff *child = skb + 1; 271 struct sk_buff *child = skb + 1;
227 atomic_t *fclone_ref = (atomic_t *) (child + 1); 272 atomic_t *fclone_ref = (atomic_t *) (child + 1);
228 273
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
232 atomic_set(fclone_ref, 1); 277 atomic_set(fclone_ref, 1);
233 278
234 child->fclone = SKB_FCLONE_UNAVAILABLE; 279 child->fclone = SKB_FCLONE_UNAVAILABLE;
280 child->pfmemalloc = pfmemalloc;
235 } 281 }
236out: 282out:
237 return skb; 283 return skb;
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
302 348
303#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) 349#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
304 350
305/** 351static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
306 * netdev_alloc_frag - allocate a page fragment
307 * @fragsz: fragment size
308 *
309 * Allocates a frag from a page for receive buffer.
310 * Uses GFP_ATOMIC allocations.
311 */
312void *netdev_alloc_frag(unsigned int fragsz)
313{ 352{
314 struct netdev_alloc_cache *nc; 353 struct netdev_alloc_cache *nc;
315 void *data = NULL; 354 void *data = NULL;
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz)
319 nc = &__get_cpu_var(netdev_alloc_cache); 358 nc = &__get_cpu_var(netdev_alloc_cache);
320 if (unlikely(!nc->page)) { 359 if (unlikely(!nc->page)) {
321refill: 360refill:
322 nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); 361 nc->page = alloc_page(gfp_mask);
323 if (unlikely(!nc->page)) 362 if (unlikely(!nc->page))
324 goto end; 363 goto end;
325recycle: 364recycle:
@@ -343,6 +382,18 @@ end:
343 local_irq_restore(flags); 382 local_irq_restore(flags);
344 return data; 383 return data;
345} 384}
385
386/**
387 * netdev_alloc_frag - allocate a page fragment
388 * @fragsz: fragment size
389 *
390 * Allocates a frag from a page for receive buffer.
391 * Uses GFP_ATOMIC allocations.
392 */
393void *netdev_alloc_frag(unsigned int fragsz)
394{
395 return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
396}
346EXPORT_SYMBOL(netdev_alloc_frag); 397EXPORT_SYMBOL(netdev_alloc_frag);
347 398
348/** 399/**
@@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
366 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 417 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
367 418
368 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { 419 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
369 void *data = netdev_alloc_frag(fragsz); 420 void *data;
421
422 if (sk_memalloc_socks())
423 gfp_mask |= __GFP_MEMALLOC;
424
425 data = __netdev_alloc_frag(fragsz, gfp_mask);
370 426
371 if (likely(data)) { 427 if (likely(data)) {
372 skb = build_skb(data, fragsz); 428 skb = build_skb(data, fragsz);
@@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
374 put_page(virt_to_head_page(data)); 430 put_page(virt_to_head_page(data));
375 } 431 }
376 } else { 432 } else {
377 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); 433 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
434 SKB_ALLOC_RX, NUMA_NO_NODE);
378 } 435 }
379 if (likely(skb)) { 436 if (likely(skb)) {
380 skb_reserve(skb, NET_SKB_PAD); 437 skb_reserve(skb, NET_SKB_PAD);
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
656#if IS_ENABLED(CONFIG_IP_VS) 713#if IS_ENABLED(CONFIG_IP_VS)
657 new->ipvs_property = old->ipvs_property; 714 new->ipvs_property = old->ipvs_property;
658#endif 715#endif
716 new->pfmemalloc = old->pfmemalloc;
659 new->protocol = old->protocol; 717 new->protocol = old->protocol;
660 new->mark = old->mark; 718 new->mark = old->mark;
661 new->skb_iif = old->skb_iif; 719 new->skb_iif = old->skb_iif;
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
814 n->fclone = SKB_FCLONE_CLONE; 872 n->fclone = SKB_FCLONE_CLONE;
815 atomic_inc(fclone_ref); 873 atomic_inc(fclone_ref);
816 } else { 874 } else {
875 if (skb_pfmemalloc(skb))
876 gfp_mask |= __GFP_MEMALLOC;
877
817 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 878 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
818 if (!n) 879 if (!n)
819 return NULL; 880 return NULL;
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
850 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; 911 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
851} 912}
852 913
914static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
915{
916 if (skb_pfmemalloc(skb))
917 return SKB_ALLOC_RX;
918 return 0;
919}
920
853/** 921/**
854 * skb_copy - create private copy of an sk_buff 922 * skb_copy - create private copy of an sk_buff
855 * @skb: buffer to copy 923 * @skb: buffer to copy
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
871{ 939{
872 int headerlen = skb_headroom(skb); 940 int headerlen = skb_headroom(skb);
873 unsigned int size = skb_end_offset(skb) + skb->data_len; 941 unsigned int size = skb_end_offset(skb) + skb->data_len;
874 struct sk_buff *n = alloc_skb(size, gfp_mask); 942 struct sk_buff *n = __alloc_skb(size, gfp_mask,
943 skb_alloc_rx_flag(skb), NUMA_NO_NODE);
875 944
876 if (!n) 945 if (!n)
877 return NULL; 946 return NULL;
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy);
906struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) 975struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
907{ 976{
908 unsigned int size = skb_headlen(skb) + headroom; 977 unsigned int size = skb_headlen(skb) + headroom;
909 struct sk_buff *n = alloc_skb(size, gfp_mask); 978 struct sk_buff *n = __alloc_skb(size, gfp_mask,
979 skb_alloc_rx_flag(skb), NUMA_NO_NODE);
910 980
911 if (!n) 981 if (!n)
912 goto out; 982 goto out;
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
979 1049
980 size = SKB_DATA_ALIGN(size); 1050 size = SKB_DATA_ALIGN(size);
981 1051
982 data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), 1052 if (skb_pfmemalloc(skb))
983 gfp_mask); 1053 gfp_mask |= __GFP_MEMALLOC;
1054 data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
1055 gfp_mask, NUMA_NO_NODE, NULL);
984 if (!data) 1056 if (!data)
985 goto nodata; 1057 goto nodata;
986 size = SKB_WITH_OVERHEAD(ksize(data)); 1058 size = SKB_WITH_OVERHEAD(ksize(data));
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1092 /* 1164 /*
1093 * Allocate the copy buffer 1165 * Allocate the copy buffer
1094 */ 1166 */
1095 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 1167 struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
1096 gfp_mask); 1168 gfp_mask, skb_alloc_rx_flag(skb),
1169 NUMA_NO_NODE);
1097 int oldheadroom = skb_headroom(skb); 1170 int oldheadroom = skb_headroom(skb);
1098 int head_copy_len, head_copy_off; 1171 int head_copy_len, head_copy_off;
1099 int off; 1172 int off;
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2775 skb_release_head_state(nskb); 2848 skb_release_head_state(nskb);
2776 __skb_push(nskb, doffset); 2849 __skb_push(nskb, doffset);
2777 } else { 2850 } else {
2778 nskb = alloc_skb(hsize + doffset + headroom, 2851 nskb = __alloc_skb(hsize + doffset + headroom,
2779 GFP_ATOMIC); 2852 GFP_ATOMIC, skb_alloc_rx_flag(skb),
2853 NUMA_NO_NODE);
2780 2854
2781 if (unlikely(!nskb)) 2855 if (unlikely(!nskb))
2782 goto err; 2856 goto err;
diff --git a/net/core/sock.c b/net/core/sock.c
index 2676a88f533e..6b654b3ddfda 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -142,7 +142,7 @@
142static DEFINE_MUTEX(proto_list_mutex); 142static DEFINE_MUTEX(proto_list_mutex);
143static LIST_HEAD(proto_list); 143static LIST_HEAD(proto_list);
144 144
145#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 145#ifdef CONFIG_MEMCG_KMEM
146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
147{ 147{
148 struct proto *proto; 148 struct proto *proto;
@@ -271,6 +271,61 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
271int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 271int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
272EXPORT_SYMBOL(sysctl_optmem_max); 272EXPORT_SYMBOL(sysctl_optmem_max);
273 273
274struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
275EXPORT_SYMBOL_GPL(memalloc_socks);
276
277/**
278 * sk_set_memalloc - sets %SOCK_MEMALLOC
279 * @sk: socket to set it on
280 *
281 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
282 * It's the responsibility of the admin to adjust min_free_kbytes
283 * to meet the requirements
284 */
285void sk_set_memalloc(struct sock *sk)
286{
287 sock_set_flag(sk, SOCK_MEMALLOC);
288 sk->sk_allocation |= __GFP_MEMALLOC;
289 static_key_slow_inc(&memalloc_socks);
290}
291EXPORT_SYMBOL_GPL(sk_set_memalloc);
292
293void sk_clear_memalloc(struct sock *sk)
294{
295 sock_reset_flag(sk, SOCK_MEMALLOC);
296 sk->sk_allocation &= ~__GFP_MEMALLOC;
297 static_key_slow_dec(&memalloc_socks);
298
299 /*
300 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
301 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
302 * it has rmem allocations there is a risk that the user of the
303 * socket cannot make forward progress due to exceeding the rmem
304 * limits. By rights, sk_clear_memalloc() should only be called
305 * on sockets being torn down but warn and reset the accounting if
306 * that assumption breaks.
307 */
308 if (WARN_ON(sk->sk_forward_alloc))
309 sk_mem_reclaim(sk);
310}
311EXPORT_SYMBOL_GPL(sk_clear_memalloc);
312
313int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
314{
315 int ret;
316 unsigned long pflags = current->flags;
317
318 /* these should have been dropped before queueing */
319 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
320
321 current->flags |= PF_MEMALLOC;
322 ret = sk->sk_backlog_rcv(sk, skb);
323 tsk_restore_flags(current, pflags, PF_MEMALLOC);
324
325 return ret;
326}
327EXPORT_SYMBOL(__sk_backlog_rcv);
328
274#if defined(CONFIG_CGROUPS) 329#if defined(CONFIG_CGROUPS)
275#if !defined(CONFIG_NET_CLS_CGROUP) 330#if !defined(CONFIG_NET_CLS_CGROUP)
276int net_cls_subsys_id = -1; 331int net_cls_subsys_id = -1;
@@ -353,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
353 if (err) 408 if (err)
354 return err; 409 return err;
355 410
356 if (!sk_rmem_schedule(sk, skb->truesize)) { 411 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
357 atomic_inc(&sk->sk_drops); 412 atomic_inc(&sk->sk_drops);
358 return -ENOBUFS; 413 return -ENOBUFS;
359 } 414 }