diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 22:25:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 22:25:39 -0400 |
commit | ac694dbdbc403c00e2c14d10bc7b8412cc378259 (patch) | |
tree | e37328cfbeaf43716dd5914cad9179e57e84df76 /net/core | |
parent | a40a1d3d0a2fd613fdec6d89d3c053268ced76ed (diff) | |
parent | 437ea90cc3afdca5229b41c6b1d38c4842756cb9 (diff) |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge Andrew's second set of patches:
- MM
- a few random fixes
- a couple of RTC leftovers
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (120 commits)
rtc/rtc-88pm80x: remove unneed devm_kfree
rtc/rtc-88pm80x: assign ret only when rtc_register_driver fails
mm: hugetlbfs: close race during teardown of hugetlbfs shared page tables
tmpfs: distribute interleave better across nodes
mm: remove redundant initialization
mm: warn if pg_data_t isn't initialized with zero
mips: zero out pg_data_t when it's allocated
memcg: gix memory accounting scalability in shrink_page_list
mm/sparse: remove index_init_lock
mm/sparse: more checks on mem_section number
mm/sparse: optimize sparse_index_alloc
memcg: add mem_cgroup_from_css() helper
memcg: further prevent OOM with too many dirty pages
memcg: prevent OOM with too many dirty pages
mm: mmu_notifier: fix freed page still mapped in secondary MMU
mm: memcg: only check anon swapin page charges for swap cache
mm: memcg: only check swap cache pages for repeated charging
mm: memcg: split swapin charge function into private and public part
mm: memcg: remove needless !mm fixup to init_mm when charging
mm: memcg: remove unneeded shmem charge type
...
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 53 | ||||
-rw-r--r-- | net/core/filter.c | 8 | ||||
-rw-r--r-- | net/core/skbuff.c | 124 | ||||
-rw-r--r-- | net/core/sock.c | 59 |
4 files changed, 211 insertions, 33 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index c8569f826b71..0cb3fe8d8e72 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3156,6 +3156,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) | |||
3156 | } | 3156 | } |
3157 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | 3157 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); |
3158 | 3158 | ||
3159 | /* | ||
3160 | * Limit the use of PFMEMALLOC reserves to those protocols that implement | ||
3161 | * the special handling of PFMEMALLOC skbs. | ||
3162 | */ | ||
3163 | static bool skb_pfmemalloc_protocol(struct sk_buff *skb) | ||
3164 | { | ||
3165 | switch (skb->protocol) { | ||
3166 | case __constant_htons(ETH_P_ARP): | ||
3167 | case __constant_htons(ETH_P_IP): | ||
3168 | case __constant_htons(ETH_P_IPV6): | ||
3169 | case __constant_htons(ETH_P_8021Q): | ||
3170 | return true; | ||
3171 | default: | ||
3172 | return false; | ||
3173 | } | ||
3174 | } | ||
3175 | |||
3159 | static int __netif_receive_skb(struct sk_buff *skb) | 3176 | static int __netif_receive_skb(struct sk_buff *skb) |
3160 | { | 3177 | { |
3161 | struct packet_type *ptype, *pt_prev; | 3178 | struct packet_type *ptype, *pt_prev; |
@@ -3165,14 +3182,27 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3165 | bool deliver_exact = false; | 3182 | bool deliver_exact = false; |
3166 | int ret = NET_RX_DROP; | 3183 | int ret = NET_RX_DROP; |
3167 | __be16 type; | 3184 | __be16 type; |
3185 | unsigned long pflags = current->flags; | ||
3168 | 3186 | ||
3169 | net_timestamp_check(!netdev_tstamp_prequeue, skb); | 3187 | net_timestamp_check(!netdev_tstamp_prequeue, skb); |
3170 | 3188 | ||
3171 | trace_netif_receive_skb(skb); | 3189 | trace_netif_receive_skb(skb); |
3172 | 3190 | ||
3191 | /* | ||
3192 | * PFMEMALLOC skbs are special, they should | ||
3193 | * - be delivered to SOCK_MEMALLOC sockets only | ||
3194 | * - stay away from userspace | ||
3195 | * - have bounded memory usage | ||
3196 | * | ||
3197 | * Use PF_MEMALLOC as this saves us from propagating the allocation | ||
3198 | * context down to all allocation sites. | ||
3199 | */ | ||
3200 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | ||
3201 | current->flags |= PF_MEMALLOC; | ||
3202 | |||
3173 | /* if we've gotten here through NAPI, check netpoll */ | 3203 | /* if we've gotten here through NAPI, check netpoll */ |
3174 | if (netpoll_receive_skb(skb)) | 3204 | if (netpoll_receive_skb(skb)) |
3175 | return NET_RX_DROP; | 3205 | goto out; |
3176 | 3206 | ||
3177 | orig_dev = skb->dev; | 3207 | orig_dev = skb->dev; |
3178 | 3208 | ||
@@ -3192,7 +3222,7 @@ another_round: | |||
3192 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { | 3222 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { |
3193 | skb = vlan_untag(skb); | 3223 | skb = vlan_untag(skb); |
3194 | if (unlikely(!skb)) | 3224 | if (unlikely(!skb)) |
3195 | goto out; | 3225 | goto unlock; |
3196 | } | 3226 | } |
3197 | 3227 | ||
3198 | #ifdef CONFIG_NET_CLS_ACT | 3228 | #ifdef CONFIG_NET_CLS_ACT |
@@ -3202,6 +3232,9 @@ another_round: | |||
3202 | } | 3232 | } |
3203 | #endif | 3233 | #endif |
3204 | 3234 | ||
3235 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | ||
3236 | goto skip_taps; | ||
3237 | |||
3205 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3238 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
3206 | if (!ptype->dev || ptype->dev == skb->dev) { | 3239 | if (!ptype->dev || ptype->dev == skb->dev) { |
3207 | if (pt_prev) | 3240 | if (pt_prev) |
@@ -3210,13 +3243,18 @@ another_round: | |||
3210 | } | 3243 | } |
3211 | } | 3244 | } |
3212 | 3245 | ||
3246 | skip_taps: | ||
3213 | #ifdef CONFIG_NET_CLS_ACT | 3247 | #ifdef CONFIG_NET_CLS_ACT |
3214 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); | 3248 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); |
3215 | if (!skb) | 3249 | if (!skb) |
3216 | goto out; | 3250 | goto unlock; |
3217 | ncls: | 3251 | ncls: |
3218 | #endif | 3252 | #endif |
3219 | 3253 | ||
3254 | if (sk_memalloc_socks() && skb_pfmemalloc(skb) | ||
3255 | && !skb_pfmemalloc_protocol(skb)) | ||
3256 | goto drop; | ||
3257 | |||
3220 | rx_handler = rcu_dereference(skb->dev->rx_handler); | 3258 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
3221 | if (vlan_tx_tag_present(skb)) { | 3259 | if (vlan_tx_tag_present(skb)) { |
3222 | if (pt_prev) { | 3260 | if (pt_prev) { |
@@ -3226,7 +3264,7 @@ ncls: | |||
3226 | if (vlan_do_receive(&skb, !rx_handler)) | 3264 | if (vlan_do_receive(&skb, !rx_handler)) |
3227 | goto another_round; | 3265 | goto another_round; |
3228 | else if (unlikely(!skb)) | 3266 | else if (unlikely(!skb)) |
3229 | goto out; | 3267 | goto unlock; |
3230 | } | 3268 | } |
3231 | 3269 | ||
3232 | if (rx_handler) { | 3270 | if (rx_handler) { |
@@ -3236,7 +3274,7 @@ ncls: | |||
3236 | } | 3274 | } |
3237 | switch (rx_handler(&skb)) { | 3275 | switch (rx_handler(&skb)) { |
3238 | case RX_HANDLER_CONSUMED: | 3276 | case RX_HANDLER_CONSUMED: |
3239 | goto out; | 3277 | goto unlock; |
3240 | case RX_HANDLER_ANOTHER: | 3278 | case RX_HANDLER_ANOTHER: |
3241 | goto another_round; | 3279 | goto another_round; |
3242 | case RX_HANDLER_EXACT: | 3280 | case RX_HANDLER_EXACT: |
@@ -3269,6 +3307,7 @@ ncls: | |||
3269 | else | 3307 | else |
3270 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3308 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
3271 | } else { | 3309 | } else { |
3310 | drop: | ||
3272 | atomic_long_inc(&skb->dev->rx_dropped); | 3311 | atomic_long_inc(&skb->dev->rx_dropped); |
3273 | kfree_skb(skb); | 3312 | kfree_skb(skb); |
3274 | /* Jamal, now you will not able to escape explaining | 3313 | /* Jamal, now you will not able to escape explaining |
@@ -3277,8 +3316,10 @@ ncls: | |||
3277 | ret = NET_RX_DROP; | 3316 | ret = NET_RX_DROP; |
3278 | } | 3317 | } |
3279 | 3318 | ||
3280 | out: | 3319 | unlock: |
3281 | rcu_read_unlock(); | 3320 | rcu_read_unlock(); |
3321 | out: | ||
3322 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | ||
3282 | return ret; | 3323 | return ret; |
3283 | } | 3324 | } |
3284 | 3325 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index d4ce2dc712e3..907efd27ec77 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) | |||
83 | int err; | 83 | int err; |
84 | struct sk_filter *filter; | 84 | struct sk_filter *filter; |
85 | 85 | ||
86 | /* | ||
87 | * If the skb was allocated from pfmemalloc reserves, only | ||
88 | * allow SOCK_MEMALLOC sockets to use it as this socket is | ||
89 | * helping free memory | ||
90 | */ | ||
91 | if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) | ||
92 | return -ENOMEM; | ||
93 | |||
86 | err = security_sock_rcv_skb(sk, skb); | 94 | err = security_sock_rcv_skb(sk, skb); |
87 | if (err) | 95 | if (err) |
88 | return err; | 96 | return err; |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4f..fe00d1208167 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
145 | BUG(); | 145 | BUG(); |
146 | } | 146 | } |
147 | 147 | ||
148 | |||
149 | /* | ||
150 | * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells | ||
151 | * the caller if emergency pfmemalloc reserves are being used. If it is and | ||
152 | * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves | ||
153 | * may be used. Otherwise, the packet data may be discarded until enough | ||
154 | * memory is free | ||
155 | */ | ||
156 | #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ | ||
157 | __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) | ||
158 | void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, | ||
159 | bool *pfmemalloc) | ||
160 | { | ||
161 | void *obj; | ||
162 | bool ret_pfmemalloc = false; | ||
163 | |||
164 | /* | ||
165 | * Try a regular allocation, when that fails and we're not entitled | ||
166 | * to the reserves, fail. | ||
167 | */ | ||
168 | obj = kmalloc_node_track_caller(size, | ||
169 | flags | __GFP_NOMEMALLOC | __GFP_NOWARN, | ||
170 | node); | ||
171 | if (obj || !(gfp_pfmemalloc_allowed(flags))) | ||
172 | goto out; | ||
173 | |||
174 | /* Try again but now we are using pfmemalloc reserves */ | ||
175 | ret_pfmemalloc = true; | ||
176 | obj = kmalloc_node_track_caller(size, flags, node); | ||
177 | |||
178 | out: | ||
179 | if (pfmemalloc) | ||
180 | *pfmemalloc = ret_pfmemalloc; | ||
181 | |||
182 | return obj; | ||
183 | } | ||
184 | |||
148 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | 185 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
149 | * 'private' fields and also do memory statistics to find all the | 186 | * 'private' fields and also do memory statistics to find all the |
150 | * [BEEP] leaks. | 187 | * [BEEP] leaks. |
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
155 | * __alloc_skb - allocate a network buffer | 192 | * __alloc_skb - allocate a network buffer |
156 | * @size: size to allocate | 193 | * @size: size to allocate |
157 | * @gfp_mask: allocation mask | 194 | * @gfp_mask: allocation mask |
158 | * @fclone: allocate from fclone cache instead of head cache | 195 | * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache |
159 | * and allocate a cloned (child) skb | 196 | * instead of head cache and allocate a cloned (child) skb. |
197 | * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for | ||
198 | * allocations in case the data is required for writeback | ||
160 | * @node: numa node to allocate memory on | 199 | * @node: numa node to allocate memory on |
161 | * | 200 | * |
162 | * Allocate a new &sk_buff. The returned buffer has no headroom and a | 201 | * Allocate a new &sk_buff. The returned buffer has no headroom and a |
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
167 | * %GFP_ATOMIC. | 206 | * %GFP_ATOMIC. |
168 | */ | 207 | */ |
169 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | 208 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, |
170 | int fclone, int node) | 209 | int flags, int node) |
171 | { | 210 | { |
172 | struct kmem_cache *cache; | 211 | struct kmem_cache *cache; |
173 | struct skb_shared_info *shinfo; | 212 | struct skb_shared_info *shinfo; |
174 | struct sk_buff *skb; | 213 | struct sk_buff *skb; |
175 | u8 *data; | 214 | u8 *data; |
215 | bool pfmemalloc; | ||
176 | 216 | ||
177 | cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; | 217 | cache = (flags & SKB_ALLOC_FCLONE) |
218 | ? skbuff_fclone_cache : skbuff_head_cache; | ||
219 | |||
220 | if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) | ||
221 | gfp_mask |= __GFP_MEMALLOC; | ||
178 | 222 | ||
179 | /* Get the HEAD */ | 223 | /* Get the HEAD */ |
180 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 224 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
189 | */ | 233 | */ |
190 | size = SKB_DATA_ALIGN(size); | 234 | size = SKB_DATA_ALIGN(size); |
191 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 235 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
192 | data = kmalloc_node_track_caller(size, gfp_mask, node); | 236 | data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); |
193 | if (!data) | 237 | if (!data) |
194 | goto nodata; | 238 | goto nodata; |
195 | /* kmalloc(size) might give us more room than requested. | 239 | /* kmalloc(size) might give us more room than requested. |
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
207 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 251 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
208 | /* Account for allocated memory : skb + skb->head */ | 252 | /* Account for allocated memory : skb + skb->head */ |
209 | skb->truesize = SKB_TRUESIZE(size); | 253 | skb->truesize = SKB_TRUESIZE(size); |
254 | skb->pfmemalloc = pfmemalloc; | ||
210 | atomic_set(&skb->users, 1); | 255 | atomic_set(&skb->users, 1); |
211 | skb->head = data; | 256 | skb->head = data; |
212 | skb->data = data; | 257 | skb->data = data; |
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
222 | atomic_set(&shinfo->dataref, 1); | 267 | atomic_set(&shinfo->dataref, 1); |
223 | kmemcheck_annotate_variable(shinfo->destructor_arg); | 268 | kmemcheck_annotate_variable(shinfo->destructor_arg); |
224 | 269 | ||
225 | if (fclone) { | 270 | if (flags & SKB_ALLOC_FCLONE) { |
226 | struct sk_buff *child = skb + 1; | 271 | struct sk_buff *child = skb + 1; |
227 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | 272 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
228 | 273 | ||
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
232 | atomic_set(fclone_ref, 1); | 277 | atomic_set(fclone_ref, 1); |
233 | 278 | ||
234 | child->fclone = SKB_FCLONE_UNAVAILABLE; | 279 | child->fclone = SKB_FCLONE_UNAVAILABLE; |
280 | child->pfmemalloc = pfmemalloc; | ||
235 | } | 281 | } |
236 | out: | 282 | out: |
237 | return skb; | 283 | return skb; |
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | |||
302 | 348 | ||
303 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) | 349 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) |
304 | 350 | ||
305 | /** | 351 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
306 | * netdev_alloc_frag - allocate a page fragment | ||
307 | * @fragsz: fragment size | ||
308 | * | ||
309 | * Allocates a frag from a page for receive buffer. | ||
310 | * Uses GFP_ATOMIC allocations. | ||
311 | */ | ||
312 | void *netdev_alloc_frag(unsigned int fragsz) | ||
313 | { | 352 | { |
314 | struct netdev_alloc_cache *nc; | 353 | struct netdev_alloc_cache *nc; |
315 | void *data = NULL; | 354 | void *data = NULL; |
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz) | |||
319 | nc = &__get_cpu_var(netdev_alloc_cache); | 358 | nc = &__get_cpu_var(netdev_alloc_cache); |
320 | if (unlikely(!nc->page)) { | 359 | if (unlikely(!nc->page)) { |
321 | refill: | 360 | refill: |
322 | nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); | 361 | nc->page = alloc_page(gfp_mask); |
323 | if (unlikely(!nc->page)) | 362 | if (unlikely(!nc->page)) |
324 | goto end; | 363 | goto end; |
325 | recycle: | 364 | recycle: |
@@ -343,6 +382,18 @@ end: | |||
343 | local_irq_restore(flags); | 382 | local_irq_restore(flags); |
344 | return data; | 383 | return data; |
345 | } | 384 | } |
385 | |||
386 | /** | ||
387 | * netdev_alloc_frag - allocate a page fragment | ||
388 | * @fragsz: fragment size | ||
389 | * | ||
390 | * Allocates a frag from a page for receive buffer. | ||
391 | * Uses GFP_ATOMIC allocations. | ||
392 | */ | ||
393 | void *netdev_alloc_frag(unsigned int fragsz) | ||
394 | { | ||
395 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | ||
396 | } | ||
346 | EXPORT_SYMBOL(netdev_alloc_frag); | 397 | EXPORT_SYMBOL(netdev_alloc_frag); |
347 | 398 | ||
348 | /** | 399 | /** |
@@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
366 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 417 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
367 | 418 | ||
368 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { | 419 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { |
369 | void *data = netdev_alloc_frag(fragsz); | 420 | void *data; |
421 | |||
422 | if (sk_memalloc_socks()) | ||
423 | gfp_mask |= __GFP_MEMALLOC; | ||
424 | |||
425 | data = __netdev_alloc_frag(fragsz, gfp_mask); | ||
370 | 426 | ||
371 | if (likely(data)) { | 427 | if (likely(data)) { |
372 | skb = build_skb(data, fragsz); | 428 | skb = build_skb(data, fragsz); |
@@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
374 | put_page(virt_to_head_page(data)); | 430 | put_page(virt_to_head_page(data)); |
375 | } | 431 | } |
376 | } else { | 432 | } else { |
377 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); | 433 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, |
434 | SKB_ALLOC_RX, NUMA_NO_NODE); | ||
378 | } | 435 | } |
379 | if (likely(skb)) { | 436 | if (likely(skb)) { |
380 | skb_reserve(skb, NET_SKB_PAD); | 437 | skb_reserve(skb, NET_SKB_PAD); |
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
656 | #if IS_ENABLED(CONFIG_IP_VS) | 713 | #if IS_ENABLED(CONFIG_IP_VS) |
657 | new->ipvs_property = old->ipvs_property; | 714 | new->ipvs_property = old->ipvs_property; |
658 | #endif | 715 | #endif |
716 | new->pfmemalloc = old->pfmemalloc; | ||
659 | new->protocol = old->protocol; | 717 | new->protocol = old->protocol; |
660 | new->mark = old->mark; | 718 | new->mark = old->mark; |
661 | new->skb_iif = old->skb_iif; | 719 | new->skb_iif = old->skb_iif; |
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
814 | n->fclone = SKB_FCLONE_CLONE; | 872 | n->fclone = SKB_FCLONE_CLONE; |
815 | atomic_inc(fclone_ref); | 873 | atomic_inc(fclone_ref); |
816 | } else { | 874 | } else { |
875 | if (skb_pfmemalloc(skb)) | ||
876 | gfp_mask |= __GFP_MEMALLOC; | ||
877 | |||
817 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 878 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); |
818 | if (!n) | 879 | if (!n) |
819 | return NULL; | 880 | return NULL; |
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
850 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; | 911 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; |
851 | } | 912 | } |
852 | 913 | ||
914 | static inline int skb_alloc_rx_flag(const struct sk_buff *skb) | ||
915 | { | ||
916 | if (skb_pfmemalloc(skb)) | ||
917 | return SKB_ALLOC_RX; | ||
918 | return 0; | ||
919 | } | ||
920 | |||
853 | /** | 921 | /** |
854 | * skb_copy - create private copy of an sk_buff | 922 | * skb_copy - create private copy of an sk_buff |
855 | * @skb: buffer to copy | 923 | * @skb: buffer to copy |
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |||
871 | { | 939 | { |
872 | int headerlen = skb_headroom(skb); | 940 | int headerlen = skb_headroom(skb); |
873 | unsigned int size = skb_end_offset(skb) + skb->data_len; | 941 | unsigned int size = skb_end_offset(skb) + skb->data_len; |
874 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 942 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
943 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
875 | 944 | ||
876 | if (!n) | 945 | if (!n) |
877 | return NULL; | 946 | return NULL; |
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy); | |||
906 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) | 975 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) |
907 | { | 976 | { |
908 | unsigned int size = skb_headlen(skb) + headroom; | 977 | unsigned int size = skb_headlen(skb) + headroom; |
909 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 978 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
979 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
910 | 980 | ||
911 | if (!n) | 981 | if (!n) |
912 | goto out; | 982 | goto out; |
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |||
979 | 1049 | ||
980 | size = SKB_DATA_ALIGN(size); | 1050 | size = SKB_DATA_ALIGN(size); |
981 | 1051 | ||
982 | data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | 1052 | if (skb_pfmemalloc(skb)) |
983 | gfp_mask); | 1053 | gfp_mask |= __GFP_MEMALLOC; |
1054 | data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | ||
1055 | gfp_mask, NUMA_NO_NODE, NULL); | ||
984 | if (!data) | 1056 | if (!data) |
985 | goto nodata; | 1057 | goto nodata; |
986 | size = SKB_WITH_OVERHEAD(ksize(data)); | 1058 | size = SKB_WITH_OVERHEAD(ksize(data)); |
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | |||
1092 | /* | 1164 | /* |
1093 | * Allocate the copy buffer | 1165 | * Allocate the copy buffer |
1094 | */ | 1166 | */ |
1095 | struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, | 1167 | struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, |
1096 | gfp_mask); | 1168 | gfp_mask, skb_alloc_rx_flag(skb), |
1169 | NUMA_NO_NODE); | ||
1097 | int oldheadroom = skb_headroom(skb); | 1170 | int oldheadroom = skb_headroom(skb); |
1098 | int head_copy_len, head_copy_off; | 1171 | int head_copy_len, head_copy_off; |
1099 | int off; | 1172 | int off; |
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
2775 | skb_release_head_state(nskb); | 2848 | skb_release_head_state(nskb); |
2776 | __skb_push(nskb, doffset); | 2849 | __skb_push(nskb, doffset); |
2777 | } else { | 2850 | } else { |
2778 | nskb = alloc_skb(hsize + doffset + headroom, | 2851 | nskb = __alloc_skb(hsize + doffset + headroom, |
2779 | GFP_ATOMIC); | 2852 | GFP_ATOMIC, skb_alloc_rx_flag(skb), |
2853 | NUMA_NO_NODE); | ||
2780 | 2854 | ||
2781 | if (unlikely(!nskb)) | 2855 | if (unlikely(!nskb)) |
2782 | goto err; | 2856 | goto err; |
diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533e..6b654b3ddfda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -142,7 +142,7 @@ | |||
142 | static DEFINE_MUTEX(proto_list_mutex); | 142 | static DEFINE_MUTEX(proto_list_mutex); |
143 | static LIST_HEAD(proto_list); | 143 | static LIST_HEAD(proto_list); |
144 | 144 | ||
145 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 145 | #ifdef CONFIG_MEMCG_KMEM |
146 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 146 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
147 | { | 147 | { |
148 | struct proto *proto; | 148 | struct proto *proto; |
@@ -271,6 +271,61 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | |||
271 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 271 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
272 | EXPORT_SYMBOL(sysctl_optmem_max); | 272 | EXPORT_SYMBOL(sysctl_optmem_max); |
273 | 273 | ||
274 | struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; | ||
275 | EXPORT_SYMBOL_GPL(memalloc_socks); | ||
276 | |||
277 | /** | ||
278 | * sk_set_memalloc - sets %SOCK_MEMALLOC | ||
279 | * @sk: socket to set it on | ||
280 | * | ||
281 | * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. | ||
282 | * It's the responsibility of the admin to adjust min_free_kbytes | ||
283 | * to meet the requirements | ||
284 | */ | ||
285 | void sk_set_memalloc(struct sock *sk) | ||
286 | { | ||
287 | sock_set_flag(sk, SOCK_MEMALLOC); | ||
288 | sk->sk_allocation |= __GFP_MEMALLOC; | ||
289 | static_key_slow_inc(&memalloc_socks); | ||
290 | } | ||
291 | EXPORT_SYMBOL_GPL(sk_set_memalloc); | ||
292 | |||
293 | void sk_clear_memalloc(struct sock *sk) | ||
294 | { | ||
295 | sock_reset_flag(sk, SOCK_MEMALLOC); | ||
296 | sk->sk_allocation &= ~__GFP_MEMALLOC; | ||
297 | static_key_slow_dec(&memalloc_socks); | ||
298 | |||
299 | /* | ||
300 | * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward | ||
301 | * progress of swapping. However, if SOCK_MEMALLOC is cleared while | ||
302 | * it has rmem allocations there is a risk that the user of the | ||
303 | * socket cannot make forward progress due to exceeding the rmem | ||
304 | * limits. By rights, sk_clear_memalloc() should only be called | ||
305 | * on sockets being torn down but warn and reset the accounting if | ||
306 | * that assumption breaks. | ||
307 | */ | ||
308 | if (WARN_ON(sk->sk_forward_alloc)) | ||
309 | sk_mem_reclaim(sk); | ||
310 | } | ||
311 | EXPORT_SYMBOL_GPL(sk_clear_memalloc); | ||
312 | |||
313 | int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) | ||
314 | { | ||
315 | int ret; | ||
316 | unsigned long pflags = current->flags; | ||
317 | |||
318 | /* these should have been dropped before queueing */ | ||
319 | BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); | ||
320 | |||
321 | current->flags |= PF_MEMALLOC; | ||
322 | ret = sk->sk_backlog_rcv(sk, skb); | ||
323 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | ||
324 | |||
325 | return ret; | ||
326 | } | ||
327 | EXPORT_SYMBOL(__sk_backlog_rcv); | ||
328 | |||
274 | #if defined(CONFIG_CGROUPS) | 329 | #if defined(CONFIG_CGROUPS) |
275 | #if !defined(CONFIG_NET_CLS_CGROUP) | 330 | #if !defined(CONFIG_NET_CLS_CGROUP) |
276 | int net_cls_subsys_id = -1; | 331 | int net_cls_subsys_id = -1; |
@@ -353,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
353 | if (err) | 408 | if (err) |
354 | return err; | 409 | return err; |
355 | 410 | ||
356 | if (!sk_rmem_schedule(sk, skb->truesize)) { | 411 | if (!sk_rmem_schedule(sk, skb, skb->truesize)) { |
357 | atomic_inc(&sk->sk_drops); | 412 | atomic_inc(&sk->sk_drops); |
358 | return -ENOBUFS; | 413 | return -ENOBUFS; |
359 | } | 414 | } |