aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 22:25:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 22:25:39 -0400
commitac694dbdbc403c00e2c14d10bc7b8412cc378259 (patch)
treee37328cfbeaf43716dd5914cad9179e57e84df76 /net
parenta40a1d3d0a2fd613fdec6d89d3c053268ced76ed (diff)
parent437ea90cc3afdca5229b41c6b1d38c4842756cb9 (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge Andrew's second set of patches: - MM - a few random fixes - a couple of RTC leftovers * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (120 commits) rtc/rtc-88pm80x: remove unneed devm_kfree rtc/rtc-88pm80x: assign ret only when rtc_register_driver fails mm: hugetlbfs: close race during teardown of hugetlbfs shared page tables tmpfs: distribute interleave better across nodes mm: remove redundant initialization mm: warn if pg_data_t isn't initialized with zero mips: zero out pg_data_t when it's allocated memcg: gix memory accounting scalability in shrink_page_list mm/sparse: remove index_init_lock mm/sparse: more checks on mem_section number mm/sparse: optimize sparse_index_alloc memcg: add mem_cgroup_from_css() helper memcg: further prevent OOM with too many dirty pages memcg: prevent OOM with too many dirty pages mm: mmu_notifier: fix freed page still mapped in secondary MMU mm: memcg: only check anon swapin page charges for swap cache mm: memcg: only check swap cache pages for repeated charging mm: memcg: split swapin charge function into private and public part mm: memcg: remove needless !mm fixup to init_mm when charging mm: memcg: remove unneeded shmem charge type ...
Diffstat (limited to 'net')
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/core/dev.c53
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/skbuff.c124
-rw-r--r--net/core/sock.c59
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c4
-rw-r--r--net/ipv4/tcp_input.c21
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv6/tcp_ipv6.c10
-rw-r--r--net/sctp/ulpevent.c3
-rw-r--r--net/sunrpc/Kconfig5
-rw-r--r--net/sunrpc/clnt.c9
-rw-r--r--net/sunrpc/sched.c7
-rw-r--r--net/sunrpc/xprtsock.c43
16 files changed, 304 insertions, 60 deletions
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 78f1cdad5b33..095259f83902 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
141 err = sk_filter(sk, skb); 141 err = sk_filter(sk, skb);
142 if (err) 142 if (err)
143 return err; 143 return err;
144 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { 144 if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) {
145 set_rx_flow_off(cf_sk); 145 set_rx_flow_off(cf_sk);
146 net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); 146 net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n");
147 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 147 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
diff --git a/net/core/dev.c b/net/core/dev.c
index c8569f826b71..0cb3fe8d8e72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3156,6 +3156,23 @@ void netdev_rx_handler_unregister(struct net_device *dev)
3156} 3156}
3157EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3157EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3158 3158
3159/*
3160 * Limit the use of PFMEMALLOC reserves to those protocols that implement
3161 * the special handling of PFMEMALLOC skbs.
3162 */
3163static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3164{
3165 switch (skb->protocol) {
3166 case __constant_htons(ETH_P_ARP):
3167 case __constant_htons(ETH_P_IP):
3168 case __constant_htons(ETH_P_IPV6):
3169 case __constant_htons(ETH_P_8021Q):
3170 return true;
3171 default:
3172 return false;
3173 }
3174}
3175
3159static int __netif_receive_skb(struct sk_buff *skb) 3176static int __netif_receive_skb(struct sk_buff *skb)
3160{ 3177{
3161 struct packet_type *ptype, *pt_prev; 3178 struct packet_type *ptype, *pt_prev;
@@ -3165,14 +3182,27 @@ static int __netif_receive_skb(struct sk_buff *skb)
3165 bool deliver_exact = false; 3182 bool deliver_exact = false;
3166 int ret = NET_RX_DROP; 3183 int ret = NET_RX_DROP;
3167 __be16 type; 3184 __be16 type;
3185 unsigned long pflags = current->flags;
3168 3186
3169 net_timestamp_check(!netdev_tstamp_prequeue, skb); 3187 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3170 3188
3171 trace_netif_receive_skb(skb); 3189 trace_netif_receive_skb(skb);
3172 3190
3191 /*
3192 * PFMEMALLOC skbs are special, they should
3193 * - be delivered to SOCK_MEMALLOC sockets only
3194 * - stay away from userspace
3195 * - have bounded memory usage
3196 *
3197 * Use PF_MEMALLOC as this saves us from propagating the allocation
3198 * context down to all allocation sites.
3199 */
3200 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3201 current->flags |= PF_MEMALLOC;
3202
3173 /* if we've gotten here through NAPI, check netpoll */ 3203 /* if we've gotten here through NAPI, check netpoll */
3174 if (netpoll_receive_skb(skb)) 3204 if (netpoll_receive_skb(skb))
3175 return NET_RX_DROP; 3205 goto out;
3176 3206
3177 orig_dev = skb->dev; 3207 orig_dev = skb->dev;
3178 3208
@@ -3192,7 +3222,7 @@ another_round:
3192 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { 3222 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3193 skb = vlan_untag(skb); 3223 skb = vlan_untag(skb);
3194 if (unlikely(!skb)) 3224 if (unlikely(!skb))
3195 goto out; 3225 goto unlock;
3196 } 3226 }
3197 3227
3198#ifdef CONFIG_NET_CLS_ACT 3228#ifdef CONFIG_NET_CLS_ACT
@@ -3202,6 +3232,9 @@ another_round:
3202 } 3232 }
3203#endif 3233#endif
3204 3234
3235 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3236 goto skip_taps;
3237
3205 list_for_each_entry_rcu(ptype, &ptype_all, list) { 3238 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3206 if (!ptype->dev || ptype->dev == skb->dev) { 3239 if (!ptype->dev || ptype->dev == skb->dev) {
3207 if (pt_prev) 3240 if (pt_prev)
@@ -3210,13 +3243,18 @@ another_round:
3210 } 3243 }
3211 } 3244 }
3212 3245
3246skip_taps:
3213#ifdef CONFIG_NET_CLS_ACT 3247#ifdef CONFIG_NET_CLS_ACT
3214 skb = handle_ing(skb, &pt_prev, &ret, orig_dev); 3248 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3215 if (!skb) 3249 if (!skb)
3216 goto out; 3250 goto unlock;
3217ncls: 3251ncls:
3218#endif 3252#endif
3219 3253
3254 if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3255 && !skb_pfmemalloc_protocol(skb))
3256 goto drop;
3257
3220 rx_handler = rcu_dereference(skb->dev->rx_handler); 3258 rx_handler = rcu_dereference(skb->dev->rx_handler);
3221 if (vlan_tx_tag_present(skb)) { 3259 if (vlan_tx_tag_present(skb)) {
3222 if (pt_prev) { 3260 if (pt_prev) {
@@ -3226,7 +3264,7 @@ ncls:
3226 if (vlan_do_receive(&skb, !rx_handler)) 3264 if (vlan_do_receive(&skb, !rx_handler))
3227 goto another_round; 3265 goto another_round;
3228 else if (unlikely(!skb)) 3266 else if (unlikely(!skb))
3229 goto out; 3267 goto unlock;
3230 } 3268 }
3231 3269
3232 if (rx_handler) { 3270 if (rx_handler) {
@@ -3236,7 +3274,7 @@ ncls:
3236 } 3274 }
3237 switch (rx_handler(&skb)) { 3275 switch (rx_handler(&skb)) {
3238 case RX_HANDLER_CONSUMED: 3276 case RX_HANDLER_CONSUMED:
3239 goto out; 3277 goto unlock;
3240 case RX_HANDLER_ANOTHER: 3278 case RX_HANDLER_ANOTHER:
3241 goto another_round; 3279 goto another_round;
3242 case RX_HANDLER_EXACT: 3280 case RX_HANDLER_EXACT:
@@ -3269,6 +3307,7 @@ ncls:
3269 else 3307 else
3270 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3308 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3271 } else { 3309 } else {
3310drop:
3272 atomic_long_inc(&skb->dev->rx_dropped); 3311 atomic_long_inc(&skb->dev->rx_dropped);
3273 kfree_skb(skb); 3312 kfree_skb(skb);
3274 /* Jamal, now you will not able to escape explaining 3313 /* Jamal, now you will not able to escape explaining
@@ -3277,8 +3316,10 @@ ncls:
3277 ret = NET_RX_DROP; 3316 ret = NET_RX_DROP;
3278 } 3317 }
3279 3318
3280out: 3319unlock:
3281 rcu_read_unlock(); 3320 rcu_read_unlock();
3321out:
3322 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3282 return ret; 3323 return ret;
3283} 3324}
3284 3325
diff --git a/net/core/filter.c b/net/core/filter.c
index d4ce2dc712e3..907efd27ec77 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
83 int err; 83 int err;
84 struct sk_filter *filter; 84 struct sk_filter *filter;
85 85
86 /*
87 * If the skb was allocated from pfmemalloc reserves, only
88 * allow SOCK_MEMALLOC sockets to use it as this socket is
89 * helping free memory
90 */
91 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
92 return -ENOMEM;
93
86 err = security_sock_rcv_skb(sk, skb); 94 err = security_sock_rcv_skb(sk, skb);
87 if (err) 95 if (err)
88 return err; 96 return err;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 368f65c15e4f..fe00d1208167 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
145 BUG(); 145 BUG();
146} 146}
147 147
148
149/*
150 * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
151 * the caller if emergency pfmemalloc reserves are being used. If it is and
152 * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
153 * may be used. Otherwise, the packet data may be discarded until enough
154 * memory is free
155 */
156#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
157 __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
158void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
159 bool *pfmemalloc)
160{
161 void *obj;
162 bool ret_pfmemalloc = false;
163
164 /*
165 * Try a regular allocation, when that fails and we're not entitled
166 * to the reserves, fail.
167 */
168 obj = kmalloc_node_track_caller(size,
169 flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
170 node);
171 if (obj || !(gfp_pfmemalloc_allowed(flags)))
172 goto out;
173
174 /* Try again but now we are using pfmemalloc reserves */
175 ret_pfmemalloc = true;
176 obj = kmalloc_node_track_caller(size, flags, node);
177
178out:
179 if (pfmemalloc)
180 *pfmemalloc = ret_pfmemalloc;
181
182 return obj;
183}
184
148/* Allocate a new skbuff. We do this ourselves so we can fill in a few 185/* Allocate a new skbuff. We do this ourselves so we can fill in a few
149 * 'private' fields and also do memory statistics to find all the 186 * 'private' fields and also do memory statistics to find all the
150 * [BEEP] leaks. 187 * [BEEP] leaks.
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
155 * __alloc_skb - allocate a network buffer 192 * __alloc_skb - allocate a network buffer
156 * @size: size to allocate 193 * @size: size to allocate
157 * @gfp_mask: allocation mask 194 * @gfp_mask: allocation mask
158 * @fclone: allocate from fclone cache instead of head cache 195 * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
159 * and allocate a cloned (child) skb 196 * instead of head cache and allocate a cloned (child) skb.
197 * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
198 * allocations in case the data is required for writeback
160 * @node: numa node to allocate memory on 199 * @node: numa node to allocate memory on
161 * 200 *
162 * Allocate a new &sk_buff. The returned buffer has no headroom and a 201 * Allocate a new &sk_buff. The returned buffer has no headroom and a
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
167 * %GFP_ATOMIC. 206 * %GFP_ATOMIC.
168 */ 207 */
169struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, 208struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
170 int fclone, int node) 209 int flags, int node)
171{ 210{
172 struct kmem_cache *cache; 211 struct kmem_cache *cache;
173 struct skb_shared_info *shinfo; 212 struct skb_shared_info *shinfo;
174 struct sk_buff *skb; 213 struct sk_buff *skb;
175 u8 *data; 214 u8 *data;
215 bool pfmemalloc;
176 216
177 cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; 217 cache = (flags & SKB_ALLOC_FCLONE)
218 ? skbuff_fclone_cache : skbuff_head_cache;
219
220 if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
221 gfp_mask |= __GFP_MEMALLOC;
178 222
179 /* Get the HEAD */ 223 /* Get the HEAD */
180 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); 224 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
189 */ 233 */
190 size = SKB_DATA_ALIGN(size); 234 size = SKB_DATA_ALIGN(size);
191 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 235 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
192 data = kmalloc_node_track_caller(size, gfp_mask, node); 236 data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
193 if (!data) 237 if (!data)
194 goto nodata; 238 goto nodata;
195 /* kmalloc(size) might give us more room than requested. 239 /* kmalloc(size) might give us more room than requested.
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
207 memset(skb, 0, offsetof(struct sk_buff, tail)); 251 memset(skb, 0, offsetof(struct sk_buff, tail));
208 /* Account for allocated memory : skb + skb->head */ 252 /* Account for allocated memory : skb + skb->head */
209 skb->truesize = SKB_TRUESIZE(size); 253 skb->truesize = SKB_TRUESIZE(size);
254 skb->pfmemalloc = pfmemalloc;
210 atomic_set(&skb->users, 1); 255 atomic_set(&skb->users, 1);
211 skb->head = data; 256 skb->head = data;
212 skb->data = data; 257 skb->data = data;
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
222 atomic_set(&shinfo->dataref, 1); 267 atomic_set(&shinfo->dataref, 1);
223 kmemcheck_annotate_variable(shinfo->destructor_arg); 268 kmemcheck_annotate_variable(shinfo->destructor_arg);
224 269
225 if (fclone) { 270 if (flags & SKB_ALLOC_FCLONE) {
226 struct sk_buff *child = skb + 1; 271 struct sk_buff *child = skb + 1;
227 atomic_t *fclone_ref = (atomic_t *) (child + 1); 272 atomic_t *fclone_ref = (atomic_t *) (child + 1);
228 273
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
232 atomic_set(fclone_ref, 1); 277 atomic_set(fclone_ref, 1);
233 278
234 child->fclone = SKB_FCLONE_UNAVAILABLE; 279 child->fclone = SKB_FCLONE_UNAVAILABLE;
280 child->pfmemalloc = pfmemalloc;
235 } 281 }
236out: 282out:
237 return skb; 283 return skb;
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
302 348
303#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) 349#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
304 350
305/** 351static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
306 * netdev_alloc_frag - allocate a page fragment
307 * @fragsz: fragment size
308 *
309 * Allocates a frag from a page for receive buffer.
310 * Uses GFP_ATOMIC allocations.
311 */
312void *netdev_alloc_frag(unsigned int fragsz)
313{ 352{
314 struct netdev_alloc_cache *nc; 353 struct netdev_alloc_cache *nc;
315 void *data = NULL; 354 void *data = NULL;
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz)
319 nc = &__get_cpu_var(netdev_alloc_cache); 358 nc = &__get_cpu_var(netdev_alloc_cache);
320 if (unlikely(!nc->page)) { 359 if (unlikely(!nc->page)) {
321refill: 360refill:
322 nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); 361 nc->page = alloc_page(gfp_mask);
323 if (unlikely(!nc->page)) 362 if (unlikely(!nc->page))
324 goto end; 363 goto end;
325recycle: 364recycle:
@@ -343,6 +382,18 @@ end:
343 local_irq_restore(flags); 382 local_irq_restore(flags);
344 return data; 383 return data;
345} 384}
385
386/**
387 * netdev_alloc_frag - allocate a page fragment
388 * @fragsz: fragment size
389 *
390 * Allocates a frag from a page for receive buffer.
391 * Uses GFP_ATOMIC allocations.
392 */
393void *netdev_alloc_frag(unsigned int fragsz)
394{
395 return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
396}
346EXPORT_SYMBOL(netdev_alloc_frag); 397EXPORT_SYMBOL(netdev_alloc_frag);
347 398
348/** 399/**
@@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
366 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 417 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
367 418
368 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { 419 if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
369 void *data = netdev_alloc_frag(fragsz); 420 void *data;
421
422 if (sk_memalloc_socks())
423 gfp_mask |= __GFP_MEMALLOC;
424
425 data = __netdev_alloc_frag(fragsz, gfp_mask);
370 426
371 if (likely(data)) { 427 if (likely(data)) {
372 skb = build_skb(data, fragsz); 428 skb = build_skb(data, fragsz);
@@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
374 put_page(virt_to_head_page(data)); 430 put_page(virt_to_head_page(data));
375 } 431 }
376 } else { 432 } else {
377 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); 433 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
434 SKB_ALLOC_RX, NUMA_NO_NODE);
378 } 435 }
379 if (likely(skb)) { 436 if (likely(skb)) {
380 skb_reserve(skb, NET_SKB_PAD); 437 skb_reserve(skb, NET_SKB_PAD);
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
656#if IS_ENABLED(CONFIG_IP_VS) 713#if IS_ENABLED(CONFIG_IP_VS)
657 new->ipvs_property = old->ipvs_property; 714 new->ipvs_property = old->ipvs_property;
658#endif 715#endif
716 new->pfmemalloc = old->pfmemalloc;
659 new->protocol = old->protocol; 717 new->protocol = old->protocol;
660 new->mark = old->mark; 718 new->mark = old->mark;
661 new->skb_iif = old->skb_iif; 719 new->skb_iif = old->skb_iif;
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
814 n->fclone = SKB_FCLONE_CLONE; 872 n->fclone = SKB_FCLONE_CLONE;
815 atomic_inc(fclone_ref); 873 atomic_inc(fclone_ref);
816 } else { 874 } else {
875 if (skb_pfmemalloc(skb))
876 gfp_mask |= __GFP_MEMALLOC;
877
817 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 878 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
818 if (!n) 879 if (!n)
819 return NULL; 880 return NULL;
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
850 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; 911 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
851} 912}
852 913
914static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
915{
916 if (skb_pfmemalloc(skb))
917 return SKB_ALLOC_RX;
918 return 0;
919}
920
853/** 921/**
854 * skb_copy - create private copy of an sk_buff 922 * skb_copy - create private copy of an sk_buff
855 * @skb: buffer to copy 923 * @skb: buffer to copy
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
871{ 939{
872 int headerlen = skb_headroom(skb); 940 int headerlen = skb_headroom(skb);
873 unsigned int size = skb_end_offset(skb) + skb->data_len; 941 unsigned int size = skb_end_offset(skb) + skb->data_len;
874 struct sk_buff *n = alloc_skb(size, gfp_mask); 942 struct sk_buff *n = __alloc_skb(size, gfp_mask,
943 skb_alloc_rx_flag(skb), NUMA_NO_NODE);
875 944
876 if (!n) 945 if (!n)
877 return NULL; 946 return NULL;
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy);
906struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) 975struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
907{ 976{
908 unsigned int size = skb_headlen(skb) + headroom; 977 unsigned int size = skb_headlen(skb) + headroom;
909 struct sk_buff *n = alloc_skb(size, gfp_mask); 978 struct sk_buff *n = __alloc_skb(size, gfp_mask,
979 skb_alloc_rx_flag(skb), NUMA_NO_NODE);
910 980
911 if (!n) 981 if (!n)
912 goto out; 982 goto out;
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
979 1049
980 size = SKB_DATA_ALIGN(size); 1050 size = SKB_DATA_ALIGN(size);
981 1051
982 data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), 1052 if (skb_pfmemalloc(skb))
983 gfp_mask); 1053 gfp_mask |= __GFP_MEMALLOC;
1054 data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
1055 gfp_mask, NUMA_NO_NODE, NULL);
984 if (!data) 1056 if (!data)
985 goto nodata; 1057 goto nodata;
986 size = SKB_WITH_OVERHEAD(ksize(data)); 1058 size = SKB_WITH_OVERHEAD(ksize(data));
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1092 /* 1164 /*
1093 * Allocate the copy buffer 1165 * Allocate the copy buffer
1094 */ 1166 */
1095 struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, 1167 struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
1096 gfp_mask); 1168 gfp_mask, skb_alloc_rx_flag(skb),
1169 NUMA_NO_NODE);
1097 int oldheadroom = skb_headroom(skb); 1170 int oldheadroom = skb_headroom(skb);
1098 int head_copy_len, head_copy_off; 1171 int head_copy_len, head_copy_off;
1099 int off; 1172 int off;
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2775 skb_release_head_state(nskb); 2848 skb_release_head_state(nskb);
2776 __skb_push(nskb, doffset); 2849 __skb_push(nskb, doffset);
2777 } else { 2850 } else {
2778 nskb = alloc_skb(hsize + doffset + headroom, 2851 nskb = __alloc_skb(hsize + doffset + headroom,
2779 GFP_ATOMIC); 2852 GFP_ATOMIC, skb_alloc_rx_flag(skb),
2853 NUMA_NO_NODE);
2780 2854
2781 if (unlikely(!nskb)) 2855 if (unlikely(!nskb))
2782 goto err; 2856 goto err;
diff --git a/net/core/sock.c b/net/core/sock.c
index 2676a88f533e..6b654b3ddfda 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -142,7 +142,7 @@
142static DEFINE_MUTEX(proto_list_mutex); 142static DEFINE_MUTEX(proto_list_mutex);
143static LIST_HEAD(proto_list); 143static LIST_HEAD(proto_list);
144 144
145#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 145#ifdef CONFIG_MEMCG_KMEM
146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) 146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
147{ 147{
148 struct proto *proto; 148 struct proto *proto;
@@ -271,6 +271,61 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
271int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 271int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
272EXPORT_SYMBOL(sysctl_optmem_max); 272EXPORT_SYMBOL(sysctl_optmem_max);
273 273
274struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
275EXPORT_SYMBOL_GPL(memalloc_socks);
276
277/**
278 * sk_set_memalloc - sets %SOCK_MEMALLOC
279 * @sk: socket to set it on
280 *
281 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
282 * It's the responsibility of the admin to adjust min_free_kbytes
283 * to meet the requirements
284 */
285void sk_set_memalloc(struct sock *sk)
286{
287 sock_set_flag(sk, SOCK_MEMALLOC);
288 sk->sk_allocation |= __GFP_MEMALLOC;
289 static_key_slow_inc(&memalloc_socks);
290}
291EXPORT_SYMBOL_GPL(sk_set_memalloc);
292
293void sk_clear_memalloc(struct sock *sk)
294{
295 sock_reset_flag(sk, SOCK_MEMALLOC);
296 sk->sk_allocation &= ~__GFP_MEMALLOC;
297 static_key_slow_dec(&memalloc_socks);
298
299 /*
300 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
301 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
302 * it has rmem allocations there is a risk that the user of the
303 * socket cannot make forward progress due to exceeding the rmem
304 * limits. By rights, sk_clear_memalloc() should only be called
305 * on sockets being torn down but warn and reset the accounting if
306 * that assumption breaks.
307 */
308 if (WARN_ON(sk->sk_forward_alloc))
309 sk_mem_reclaim(sk);
310}
311EXPORT_SYMBOL_GPL(sk_clear_memalloc);
312
313int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
314{
315 int ret;
316 unsigned long pflags = current->flags;
317
318 /* these should have been dropped before queueing */
319 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
320
321 current->flags |= PF_MEMALLOC;
322 ret = sk->sk_backlog_rcv(sk, skb);
323 tsk_restore_flags(current, pflags, PF_MEMALLOC);
324
325 return ret;
326}
327EXPORT_SYMBOL(__sk_backlog_rcv);
328
274#if defined(CONFIG_CGROUPS) 329#if defined(CONFIG_CGROUPS)
275#if !defined(CONFIG_NET_CLS_CGROUP) 330#if !defined(CONFIG_NET_CLS_CGROUP)
276int net_cls_subsys_id = -1; 331int net_cls_subsys_id = -1;
@@ -353,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
353 if (err) 408 if (err)
354 return err; 409 return err;
355 410
356 if (!sk_rmem_schedule(sk, skb->truesize)) { 411 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
357 atomic_inc(&sk->sk_drops); 412 atomic_inc(&sk->sk_drops);
358 return -ENOBUFS; 413 return -ENOBUFS;
359 } 414 }
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ae2ccf2890e4..15ca63ec604e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 49obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
50obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o 50obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
51obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o 51obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
52obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o 52obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
53obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 53obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
54 54
55obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 55obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4b6487a68279..1b5ce96707a3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
184 int ret; 184 int ret;
185 unsigned long vec[3]; 185 unsigned long vec[3];
186 struct net *net = current->nsproxy->net_ns; 186 struct net *net = current->nsproxy->net_ns;
187#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 187#ifdef CONFIG_MEMCG_KMEM
188 struct mem_cgroup *memcg; 188 struct mem_cgroup *memcg;
189#endif 189#endif
190 190
@@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
203 if (ret) 203 if (ret)
204 return ret; 204 return ret;
205 205
206#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 206#ifdef CONFIG_MEMCG_KMEM
207 rcu_read_lock(); 207 rcu_read_lock();
208 memcg = mem_cgroup_from_task(current); 208 memcg = mem_cgroup_from_task(current);
209 209
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9be30b039ae3..2fd2bc9e3c64 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4351,19 +4351,20 @@ static void tcp_ofo_queue(struct sock *sk)
4351static bool tcp_prune_ofo_queue(struct sock *sk); 4351static bool tcp_prune_ofo_queue(struct sock *sk);
4352static int tcp_prune_queue(struct sock *sk); 4352static int tcp_prune_queue(struct sock *sk);
4353 4353
4354static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) 4354static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4355 unsigned int size)
4355{ 4356{
4356 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 4357 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4357 !sk_rmem_schedule(sk, size)) { 4358 !sk_rmem_schedule(sk, skb, size)) {
4358 4359
4359 if (tcp_prune_queue(sk) < 0) 4360 if (tcp_prune_queue(sk) < 0)
4360 return -1; 4361 return -1;
4361 4362
4362 if (!sk_rmem_schedule(sk, size)) { 4363 if (!sk_rmem_schedule(sk, skb, size)) {
4363 if (!tcp_prune_ofo_queue(sk)) 4364 if (!tcp_prune_ofo_queue(sk))
4364 return -1; 4365 return -1;
4365 4366
4366 if (!sk_rmem_schedule(sk, size)) 4367 if (!sk_rmem_schedule(sk, skb, size))
4367 return -1; 4368 return -1;
4368 } 4369 }
4369 } 4370 }
@@ -4418,7 +4419,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4418 4419
4419 TCP_ECN_check_ce(tp, skb); 4420 TCP_ECN_check_ce(tp, skb);
4420 4421
4421 if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) { 4422 if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
4422 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); 4423 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
4423 __kfree_skb(skb); 4424 __kfree_skb(skb);
4424 return; 4425 return;
@@ -4552,17 +4553,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
4552 4553
4553int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) 4554int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
4554{ 4555{
4555 struct sk_buff *skb; 4556 struct sk_buff *skb = NULL;
4556 struct tcphdr *th; 4557 struct tcphdr *th;
4557 bool fragstolen; 4558 bool fragstolen;
4558 4559
4559 if (tcp_try_rmem_schedule(sk, size + sizeof(*th)))
4560 goto err;
4561
4562 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); 4560 skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
4563 if (!skb) 4561 if (!skb)
4564 goto err; 4562 goto err;
4565 4563
4564 if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
4565 goto err_free;
4566
4566 th = (struct tcphdr *)skb_put(skb, sizeof(*th)); 4567 th = (struct tcphdr *)skb_put(skb, sizeof(*th));
4567 skb_reset_transport_header(skb); 4568 skb_reset_transport_header(skb);
4568 memset(th, 0, sizeof(*th)); 4569 memset(th, 0, sizeof(*th));
@@ -4633,7 +4634,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4633 if (eaten <= 0) { 4634 if (eaten <= 0) {
4634queue_and_out: 4635queue_and_out:
4635 if (eaten < 0 && 4636 if (eaten < 0 &&
4636 tcp_try_rmem_schedule(sk, skb->truesize)) 4637 tcp_try_rmem_schedule(sk, skb, skb->truesize))
4637 goto drop; 4638 goto drop;
4638 4639
4639 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); 4640 eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f91e5ac8277..42b2a6a73092 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2633,7 +2633,7 @@ struct proto tcp_prot = {
2633 .compat_setsockopt = compat_tcp_setsockopt, 2633 .compat_setsockopt = compat_tcp_setsockopt,
2634 .compat_getsockopt = compat_tcp_getsockopt, 2634 .compat_getsockopt = compat_tcp_getsockopt,
2635#endif 2635#endif
2636#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 2636#ifdef CONFIG_MEMCG_KMEM
2637 .init_cgroup = tcp_init_cgroup, 2637 .init_cgroup = tcp_init_cgroup,
2638 .destroy_cgroup = tcp_destroy_cgroup, 2638 .destroy_cgroup = tcp_destroy_cgroup,
2639 .proto_cgroup = tcp_proto_cgroup, 2639 .proto_cgroup = tcp_proto_cgroup,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 33cd065cfbd8..3f1bcff0b10b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2045,7 +2045,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2045 if (unlikely(sk->sk_state == TCP_CLOSE)) 2045 if (unlikely(sk->sk_state == TCP_CLOSE))
2046 return; 2046 return;
2047 2047
2048 if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) 2048 if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
2049 sk_gfp_atomic(sk, GFP_ATOMIC)))
2049 tcp_check_probe_timer(sk); 2050 tcp_check_probe_timer(sk);
2050} 2051}
2051 2052
@@ -2666,7 +2667,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2666 2667
2667 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) 2668 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2668 s_data_desired = cvp->s_data_desired; 2669 s_data_desired = cvp->s_data_desired;
2669 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); 2670 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2671 sk_gfp_atomic(sk, GFP_ATOMIC));
2670 if (unlikely(!skb)) { 2672 if (unlikely(!skb)) {
2671 dst_release(dst); 2673 dst_release(dst);
2672 return NULL; 2674 return NULL;
@@ -3064,7 +3066,7 @@ void tcp_send_ack(struct sock *sk)
3064 * tcp_transmit_skb() will set the ownership to this 3066 * tcp_transmit_skb() will set the ownership to this
3065 * sock. 3067 * sock.
3066 */ 3068 */
3067 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 3069 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3068 if (buff == NULL) { 3070 if (buff == NULL) {
3069 inet_csk_schedule_ack(sk); 3071 inet_csk_schedule_ack(sk);
3070 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 3072 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
@@ -3079,7 +3081,7 @@ void tcp_send_ack(struct sock *sk)
3079 3081
3080 /* Send it off, this clears delayed acks for us. */ 3082 /* Send it off, this clears delayed acks for us. */
3081 TCP_SKB_CB(buff)->when = tcp_time_stamp; 3083 TCP_SKB_CB(buff)->when = tcp_time_stamp;
3082 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); 3084 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
3083} 3085}
3084 3086
3085/* This routine sends a packet with an out of date sequence 3087/* This routine sends a packet with an out of date sequence
@@ -3099,7 +3101,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3099 struct sk_buff *skb; 3101 struct sk_buff *skb;
3100 3102
3101 /* We don't queue it, tcp_transmit_skb() sets ownership. */ 3103 /* We don't queue it, tcp_transmit_skb() sets ownership. */
3102 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 3104 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3103 if (skb == NULL) 3105 if (skb == NULL)
3104 return -1; 3106 return -1;
3105 3107
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 221224e72507..c66b90f71c9b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1299,7 +1299,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1299 /* Clone pktoptions received with SYN */ 1299 /* Clone pktoptions received with SYN */
1300 newnp->pktoptions = NULL; 1300 newnp->pktoptions = NULL;
1301 if (treq->pktopts != NULL) { 1301 if (treq->pktopts != NULL) {
1302 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); 1302 newnp->pktoptions = skb_clone(treq->pktopts,
1303 sk_gfp_atomic(sk, GFP_ATOMIC));
1303 consume_skb(treq->pktopts); 1304 consume_skb(treq->pktopts);
1304 treq->pktopts = NULL; 1305 treq->pktopts = NULL;
1305 if (newnp->pktoptions) 1306 if (newnp->pktoptions)
@@ -1349,7 +1350,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1349 * across. Shucks. 1350 * across. Shucks.
1350 */ 1351 */
1351 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, 1352 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1352 AF_INET6, key->key, key->keylen, GFP_ATOMIC); 1353 AF_INET6, key->key, key->keylen,
1354 sk_gfp_atomic(sk, GFP_ATOMIC));
1353 } 1355 }
1354#endif 1356#endif
1355 1357
@@ -1442,7 +1444,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1442 --ANK (980728) 1444 --ANK (980728)
1443 */ 1445 */
1444 if (np->rxopt.all) 1446 if (np->rxopt.all)
1445 opt_skb = skb_clone(skb, GFP_ATOMIC); 1447 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1446 1448
1447 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1449 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1448 sock_rps_save_rxhash(sk, skb); 1450 sock_rps_save_rxhash(sk, skb);
@@ -2015,7 +2017,7 @@ struct proto tcpv6_prot = {
2015 .compat_setsockopt = compat_tcp_setsockopt, 2017 .compat_setsockopt = compat_tcp_setsockopt,
2016 .compat_getsockopt = compat_tcp_getsockopt, 2018 .compat_getsockopt = compat_tcp_getsockopt,
2017#endif 2019#endif
2018#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 2020#ifdef CONFIG_MEMCG_KMEM
2019 .proto_cgroup = tcp_proto_cgroup, 2021 .proto_cgroup = tcp_proto_cgroup,
2020#endif 2022#endif
2021}; 2023};
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 33d894776192..10c018a5b9fe 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
702 if (rx_count >= asoc->base.sk->sk_rcvbuf) { 702 if (rx_count >= asoc->base.sk->sk_rcvbuf) {
703 703
704 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || 704 if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
705 (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) 705 (!sk_rmem_schedule(asoc->base.sk, chunk->skb,
706 chunk->skb->truesize)))
706 goto fail; 707 goto fail;
707 } 708 }
708 709
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 9fe8857d8d59..03d03e37a7d5 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA
21 21
22 If unsure, say N. 22 If unsure, say N.
23 23
24config SUNRPC_SWAP
25 bool
26 depends on SUNRPC
27 select NETVM
28
24config RPCSEC_GSS_KRB5 29config RPCSEC_GSS_KRB5
25 tristate "Secure RPC: Kerberos V mechanism" 30 tristate "Secure RPC: Kerberos V mechanism"
26 depends on SUNRPC && CRYPTO 31 depends on SUNRPC && CRYPTO
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b05df36692ff..fa48c60aef23 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
717 atomic_inc(&clnt->cl_count); 717 atomic_inc(&clnt->cl_count);
718 if (clnt->cl_softrtry) 718 if (clnt->cl_softrtry)
719 task->tk_flags |= RPC_TASK_SOFT; 719 task->tk_flags |= RPC_TASK_SOFT;
720 if (sk_memalloc_socks()) {
721 struct rpc_xprt *xprt;
722
723 rcu_read_lock();
724 xprt = rcu_dereference(clnt->cl_xprt);
725 if (xprt->swapper)
726 task->tk_flags |= RPC_TASK_SWAPPER;
727 rcu_read_unlock();
728 }
720 /* Add to the client's list of all tasks */ 729 /* Add to the client's list of all tasks */
721 spin_lock(&clnt->cl_lock); 730 spin_lock(&clnt->cl_lock);
722 list_add_tail(&task->tk_task, &clnt->cl_tasks); 731 list_add_tail(&task->tk_task, &clnt->cl_tasks);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 1f19aa15f89b..128494ec9a64 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -815,7 +815,10 @@ static void rpc_async_schedule(struct work_struct *work)
815void *rpc_malloc(struct rpc_task *task, size_t size) 815void *rpc_malloc(struct rpc_task *task, size_t size)
816{ 816{
817 struct rpc_buffer *buf; 817 struct rpc_buffer *buf;
818 gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; 818 gfp_t gfp = GFP_NOWAIT;
819
820 if (RPC_IS_SWAPPER(task))
821 gfp |= __GFP_MEMALLOC;
819 822
820 size += sizeof(struct rpc_buffer); 823 size += sizeof(struct rpc_buffer);
821 if (size <= RPC_BUFFER_MAXSIZE) 824 if (size <= RPC_BUFFER_MAXSIZE)
@@ -889,7 +892,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
889static struct rpc_task * 892static struct rpc_task *
890rpc_alloc_task(void) 893rpc_alloc_task(void)
891{ 894{
892 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); 895 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
893} 896}
894 897
895/* 898/*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 926679459e71..400567243f84 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1930,6 +1930,45 @@ out:
1930 current->flags &= ~PF_FSTRANS; 1930 current->flags &= ~PF_FSTRANS;
1931} 1931}
1932 1932
1933#ifdef CONFIG_SUNRPC_SWAP
1934static void xs_set_memalloc(struct rpc_xprt *xprt)
1935{
1936 struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
1937 xprt);
1938
1939 if (xprt->swapper)
1940 sk_set_memalloc(transport->inet);
1941}
1942
1943/**
1944 * xs_swapper - Tag this transport as being used for swap.
1945 * @xprt: transport to tag
1946 * @enable: enable/disable
1947 *
1948 */
1949int xs_swapper(struct rpc_xprt *xprt, int enable)
1950{
1951 struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
1952 xprt);
1953 int err = 0;
1954
1955 if (enable) {
1956 xprt->swapper++;
1957 xs_set_memalloc(xprt);
1958 } else if (xprt->swapper) {
1959 xprt->swapper--;
1960 sk_clear_memalloc(transport->inet);
1961 }
1962
1963 return err;
1964}
1965EXPORT_SYMBOL_GPL(xs_swapper);
1966#else
1967static void xs_set_memalloc(struct rpc_xprt *xprt)
1968{
1969}
1970#endif
1971
1933static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1972static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1934{ 1973{
1935 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1974 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1954,6 +1993,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1954 transport->sock = sock; 1993 transport->sock = sock;
1955 transport->inet = sk; 1994 transport->inet = sk;
1956 1995
1996 xs_set_memalloc(xprt);
1997
1957 write_unlock_bh(&sk->sk_callback_lock); 1998 write_unlock_bh(&sk->sk_callback_lock);
1958 } 1999 }
1959 xs_udp_do_set_buffer_size(xprt); 2000 xs_udp_do_set_buffer_size(xprt);
@@ -2081,6 +2122,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2081 if (!xprt_bound(xprt)) 2122 if (!xprt_bound(xprt))
2082 goto out; 2123 goto out;
2083 2124
2125 xs_set_memalloc(xprt);
2126
2084 /* Tell the socket layer to start connecting... */ 2127 /* Tell the socket layer to start connecting... */
2085 xprt->stat.connect_count++; 2128 xprt->stat.connect_count++;
2086 xprt->stat.connect_start = jiffies; 2129 xprt->stat.connect_start = jiffies;