diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 22:25:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 22:25:39 -0400 |
commit | ac694dbdbc403c00e2c14d10bc7b8412cc378259 (patch) | |
tree | e37328cfbeaf43716dd5914cad9179e57e84df76 /net | |
parent | a40a1d3d0a2fd613fdec6d89d3c053268ced76ed (diff) | |
parent | 437ea90cc3afdca5229b41c6b1d38c4842756cb9 (diff) |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge Andrew's second set of patches:
- MM
- a few random fixes
- a couple of RTC leftovers
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (120 commits)
rtc/rtc-88pm80x: remove unneed devm_kfree
rtc/rtc-88pm80x: assign ret only when rtc_register_driver fails
mm: hugetlbfs: close race during teardown of hugetlbfs shared page tables
tmpfs: distribute interleave better across nodes
mm: remove redundant initialization
mm: warn if pg_data_t isn't initialized with zero
mips: zero out pg_data_t when it's allocated
memcg: gix memory accounting scalability in shrink_page_list
mm/sparse: remove index_init_lock
mm/sparse: more checks on mem_section number
mm/sparse: optimize sparse_index_alloc
memcg: add mem_cgroup_from_css() helper
memcg: further prevent OOM with too many dirty pages
memcg: prevent OOM with too many dirty pages
mm: mmu_notifier: fix freed page still mapped in secondary MMU
mm: memcg: only check anon swapin page charges for swap cache
mm: memcg: only check swap cache pages for repeated charging
mm: memcg: split swapin charge function into private and public part
mm: memcg: remove needless !mm fixup to init_mm when charging
mm: memcg: remove unneeded shmem charge type
...
Diffstat (limited to 'net')
-rw-r--r-- | net/caif/caif_socket.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 53 | ||||
-rw-r--r-- | net/core/filter.c | 8 | ||||
-rw-r--r-- | net/core/skbuff.c | 124 | ||||
-rw-r--r-- | net/core/sock.c | 59 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 21 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 12 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 10 | ||||
-rw-r--r-- | net/sctp/ulpevent.c | 3 | ||||
-rw-r--r-- | net/sunrpc/Kconfig | 5 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 9 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 7 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 43 |
16 files changed, 304 insertions, 60 deletions
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 78f1cdad5b33..095259f83902 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c | |||
@@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
141 | err = sk_filter(sk, skb); | 141 | err = sk_filter(sk, skb); |
142 | if (err) | 142 | if (err) |
143 | return err; | 143 | return err; |
144 | if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { | 144 | if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { |
145 | set_rx_flow_off(cf_sk); | 145 | set_rx_flow_off(cf_sk); |
146 | net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); | 146 | net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); |
147 | caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); | 147 | caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); |
diff --git a/net/core/dev.c b/net/core/dev.c index c8569f826b71..0cb3fe8d8e72 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -3156,6 +3156,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) | |||
3156 | } | 3156 | } |
3157 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | 3157 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); |
3158 | 3158 | ||
3159 | /* | ||
3160 | * Limit the use of PFMEMALLOC reserves to those protocols that implement | ||
3161 | * the special handling of PFMEMALLOC skbs. | ||
3162 | */ | ||
3163 | static bool skb_pfmemalloc_protocol(struct sk_buff *skb) | ||
3164 | { | ||
3165 | switch (skb->protocol) { | ||
3166 | case __constant_htons(ETH_P_ARP): | ||
3167 | case __constant_htons(ETH_P_IP): | ||
3168 | case __constant_htons(ETH_P_IPV6): | ||
3169 | case __constant_htons(ETH_P_8021Q): | ||
3170 | return true; | ||
3171 | default: | ||
3172 | return false; | ||
3173 | } | ||
3174 | } | ||
3175 | |||
3159 | static int __netif_receive_skb(struct sk_buff *skb) | 3176 | static int __netif_receive_skb(struct sk_buff *skb) |
3160 | { | 3177 | { |
3161 | struct packet_type *ptype, *pt_prev; | 3178 | struct packet_type *ptype, *pt_prev; |
@@ -3165,14 +3182,27 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
3165 | bool deliver_exact = false; | 3182 | bool deliver_exact = false; |
3166 | int ret = NET_RX_DROP; | 3183 | int ret = NET_RX_DROP; |
3167 | __be16 type; | 3184 | __be16 type; |
3185 | unsigned long pflags = current->flags; | ||
3168 | 3186 | ||
3169 | net_timestamp_check(!netdev_tstamp_prequeue, skb); | 3187 | net_timestamp_check(!netdev_tstamp_prequeue, skb); |
3170 | 3188 | ||
3171 | trace_netif_receive_skb(skb); | 3189 | trace_netif_receive_skb(skb); |
3172 | 3190 | ||
3191 | /* | ||
3192 | * PFMEMALLOC skbs are special, they should | ||
3193 | * - be delivered to SOCK_MEMALLOC sockets only | ||
3194 | * - stay away from userspace | ||
3195 | * - have bounded memory usage | ||
3196 | * | ||
3197 | * Use PF_MEMALLOC as this saves us from propagating the allocation | ||
3198 | * context down to all allocation sites. | ||
3199 | */ | ||
3200 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | ||
3201 | current->flags |= PF_MEMALLOC; | ||
3202 | |||
3173 | /* if we've gotten here through NAPI, check netpoll */ | 3203 | /* if we've gotten here through NAPI, check netpoll */ |
3174 | if (netpoll_receive_skb(skb)) | 3204 | if (netpoll_receive_skb(skb)) |
3175 | return NET_RX_DROP; | 3205 | goto out; |
3176 | 3206 | ||
3177 | orig_dev = skb->dev; | 3207 | orig_dev = skb->dev; |
3178 | 3208 | ||
@@ -3192,7 +3222,7 @@ another_round: | |||
3192 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { | 3222 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { |
3193 | skb = vlan_untag(skb); | 3223 | skb = vlan_untag(skb); |
3194 | if (unlikely(!skb)) | 3224 | if (unlikely(!skb)) |
3195 | goto out; | 3225 | goto unlock; |
3196 | } | 3226 | } |
3197 | 3227 | ||
3198 | #ifdef CONFIG_NET_CLS_ACT | 3228 | #ifdef CONFIG_NET_CLS_ACT |
@@ -3202,6 +3232,9 @@ another_round: | |||
3202 | } | 3232 | } |
3203 | #endif | 3233 | #endif |
3204 | 3234 | ||
3235 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | ||
3236 | goto skip_taps; | ||
3237 | |||
3205 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3238 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
3206 | if (!ptype->dev || ptype->dev == skb->dev) { | 3239 | if (!ptype->dev || ptype->dev == skb->dev) { |
3207 | if (pt_prev) | 3240 | if (pt_prev) |
@@ -3210,13 +3243,18 @@ another_round: | |||
3210 | } | 3243 | } |
3211 | } | 3244 | } |
3212 | 3245 | ||
3246 | skip_taps: | ||
3213 | #ifdef CONFIG_NET_CLS_ACT | 3247 | #ifdef CONFIG_NET_CLS_ACT |
3214 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); | 3248 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); |
3215 | if (!skb) | 3249 | if (!skb) |
3216 | goto out; | 3250 | goto unlock; |
3217 | ncls: | 3251 | ncls: |
3218 | #endif | 3252 | #endif |
3219 | 3253 | ||
3254 | if (sk_memalloc_socks() && skb_pfmemalloc(skb) | ||
3255 | && !skb_pfmemalloc_protocol(skb)) | ||
3256 | goto drop; | ||
3257 | |||
3220 | rx_handler = rcu_dereference(skb->dev->rx_handler); | 3258 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
3221 | if (vlan_tx_tag_present(skb)) { | 3259 | if (vlan_tx_tag_present(skb)) { |
3222 | if (pt_prev) { | 3260 | if (pt_prev) { |
@@ -3226,7 +3264,7 @@ ncls: | |||
3226 | if (vlan_do_receive(&skb, !rx_handler)) | 3264 | if (vlan_do_receive(&skb, !rx_handler)) |
3227 | goto another_round; | 3265 | goto another_round; |
3228 | else if (unlikely(!skb)) | 3266 | else if (unlikely(!skb)) |
3229 | goto out; | 3267 | goto unlock; |
3230 | } | 3268 | } |
3231 | 3269 | ||
3232 | if (rx_handler) { | 3270 | if (rx_handler) { |
@@ -3236,7 +3274,7 @@ ncls: | |||
3236 | } | 3274 | } |
3237 | switch (rx_handler(&skb)) { | 3275 | switch (rx_handler(&skb)) { |
3238 | case RX_HANDLER_CONSUMED: | 3276 | case RX_HANDLER_CONSUMED: |
3239 | goto out; | 3277 | goto unlock; |
3240 | case RX_HANDLER_ANOTHER: | 3278 | case RX_HANDLER_ANOTHER: |
3241 | goto another_round; | 3279 | goto another_round; |
3242 | case RX_HANDLER_EXACT: | 3280 | case RX_HANDLER_EXACT: |
@@ -3269,6 +3307,7 @@ ncls: | |||
3269 | else | 3307 | else |
3270 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3308 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
3271 | } else { | 3309 | } else { |
3310 | drop: | ||
3272 | atomic_long_inc(&skb->dev->rx_dropped); | 3311 | atomic_long_inc(&skb->dev->rx_dropped); |
3273 | kfree_skb(skb); | 3312 | kfree_skb(skb); |
3274 | /* Jamal, now you will not able to escape explaining | 3313 | /* Jamal, now you will not able to escape explaining |
@@ -3277,8 +3316,10 @@ ncls: | |||
3277 | ret = NET_RX_DROP; | 3316 | ret = NET_RX_DROP; |
3278 | } | 3317 | } |
3279 | 3318 | ||
3280 | out: | 3319 | unlock: |
3281 | rcu_read_unlock(); | 3320 | rcu_read_unlock(); |
3321 | out: | ||
3322 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | ||
3282 | return ret; | 3323 | return ret; |
3283 | } | 3324 | } |
3284 | 3325 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index d4ce2dc712e3..907efd27ec77 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) | |||
83 | int err; | 83 | int err; |
84 | struct sk_filter *filter; | 84 | struct sk_filter *filter; |
85 | 85 | ||
86 | /* | ||
87 | * If the skb was allocated from pfmemalloc reserves, only | ||
88 | * allow SOCK_MEMALLOC sockets to use it as this socket is | ||
89 | * helping free memory | ||
90 | */ | ||
91 | if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) | ||
92 | return -ENOMEM; | ||
93 | |||
86 | err = security_sock_rcv_skb(sk, skb); | 94 | err = security_sock_rcv_skb(sk, skb); |
87 | if (err) | 95 | if (err) |
88 | return err; | 96 | return err; |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4f..fe00d1208167 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
145 | BUG(); | 145 | BUG(); |
146 | } | 146 | } |
147 | 147 | ||
148 | |||
149 | /* | ||
150 | * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells | ||
151 | * the caller if emergency pfmemalloc reserves are being used. If it is and | ||
152 | * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves | ||
153 | * may be used. Otherwise, the packet data may be discarded until enough | ||
154 | * memory is free | ||
155 | */ | ||
156 | #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ | ||
157 | __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) | ||
158 | void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, | ||
159 | bool *pfmemalloc) | ||
160 | { | ||
161 | void *obj; | ||
162 | bool ret_pfmemalloc = false; | ||
163 | |||
164 | /* | ||
165 | * Try a regular allocation, when that fails and we're not entitled | ||
166 | * to the reserves, fail. | ||
167 | */ | ||
168 | obj = kmalloc_node_track_caller(size, | ||
169 | flags | __GFP_NOMEMALLOC | __GFP_NOWARN, | ||
170 | node); | ||
171 | if (obj || !(gfp_pfmemalloc_allowed(flags))) | ||
172 | goto out; | ||
173 | |||
174 | /* Try again but now we are using pfmemalloc reserves */ | ||
175 | ret_pfmemalloc = true; | ||
176 | obj = kmalloc_node_track_caller(size, flags, node); | ||
177 | |||
178 | out: | ||
179 | if (pfmemalloc) | ||
180 | *pfmemalloc = ret_pfmemalloc; | ||
181 | |||
182 | return obj; | ||
183 | } | ||
184 | |||
148 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | 185 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
149 | * 'private' fields and also do memory statistics to find all the | 186 | * 'private' fields and also do memory statistics to find all the |
150 | * [BEEP] leaks. | 187 | * [BEEP] leaks. |
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
155 | * __alloc_skb - allocate a network buffer | 192 | * __alloc_skb - allocate a network buffer |
156 | * @size: size to allocate | 193 | * @size: size to allocate |
157 | * @gfp_mask: allocation mask | 194 | * @gfp_mask: allocation mask |
158 | * @fclone: allocate from fclone cache instead of head cache | 195 | * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache |
159 | * and allocate a cloned (child) skb | 196 | * instead of head cache and allocate a cloned (child) skb. |
197 | * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for | ||
198 | * allocations in case the data is required for writeback | ||
160 | * @node: numa node to allocate memory on | 199 | * @node: numa node to allocate memory on |
161 | * | 200 | * |
162 | * Allocate a new &sk_buff. The returned buffer has no headroom and a | 201 | * Allocate a new &sk_buff. The returned buffer has no headroom and a |
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
167 | * %GFP_ATOMIC. | 206 | * %GFP_ATOMIC. |
168 | */ | 207 | */ |
169 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | 208 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, |
170 | int fclone, int node) | 209 | int flags, int node) |
171 | { | 210 | { |
172 | struct kmem_cache *cache; | 211 | struct kmem_cache *cache; |
173 | struct skb_shared_info *shinfo; | 212 | struct skb_shared_info *shinfo; |
174 | struct sk_buff *skb; | 213 | struct sk_buff *skb; |
175 | u8 *data; | 214 | u8 *data; |
215 | bool pfmemalloc; | ||
176 | 216 | ||
177 | cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; | 217 | cache = (flags & SKB_ALLOC_FCLONE) |
218 | ? skbuff_fclone_cache : skbuff_head_cache; | ||
219 | |||
220 | if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) | ||
221 | gfp_mask |= __GFP_MEMALLOC; | ||
178 | 222 | ||
179 | /* Get the HEAD */ | 223 | /* Get the HEAD */ |
180 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 224 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
189 | */ | 233 | */ |
190 | size = SKB_DATA_ALIGN(size); | 234 | size = SKB_DATA_ALIGN(size); |
191 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 235 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
192 | data = kmalloc_node_track_caller(size, gfp_mask, node); | 236 | data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); |
193 | if (!data) | 237 | if (!data) |
194 | goto nodata; | 238 | goto nodata; |
195 | /* kmalloc(size) might give us more room than requested. | 239 | /* kmalloc(size) might give us more room than requested. |
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
207 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 251 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
208 | /* Account for allocated memory : skb + skb->head */ | 252 | /* Account for allocated memory : skb + skb->head */ |
209 | skb->truesize = SKB_TRUESIZE(size); | 253 | skb->truesize = SKB_TRUESIZE(size); |
254 | skb->pfmemalloc = pfmemalloc; | ||
210 | atomic_set(&skb->users, 1); | 255 | atomic_set(&skb->users, 1); |
211 | skb->head = data; | 256 | skb->head = data; |
212 | skb->data = data; | 257 | skb->data = data; |
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
222 | atomic_set(&shinfo->dataref, 1); | 267 | atomic_set(&shinfo->dataref, 1); |
223 | kmemcheck_annotate_variable(shinfo->destructor_arg); | 268 | kmemcheck_annotate_variable(shinfo->destructor_arg); |
224 | 269 | ||
225 | if (fclone) { | 270 | if (flags & SKB_ALLOC_FCLONE) { |
226 | struct sk_buff *child = skb + 1; | 271 | struct sk_buff *child = skb + 1; |
227 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | 272 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
228 | 273 | ||
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
232 | atomic_set(fclone_ref, 1); | 277 | atomic_set(fclone_ref, 1); |
233 | 278 | ||
234 | child->fclone = SKB_FCLONE_UNAVAILABLE; | 279 | child->fclone = SKB_FCLONE_UNAVAILABLE; |
280 | child->pfmemalloc = pfmemalloc; | ||
235 | } | 281 | } |
236 | out: | 282 | out: |
237 | return skb; | 283 | return skb; |
@@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | |||
302 | 348 | ||
303 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) | 349 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) |
304 | 350 | ||
305 | /** | 351 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
306 | * netdev_alloc_frag - allocate a page fragment | ||
307 | * @fragsz: fragment size | ||
308 | * | ||
309 | * Allocates a frag from a page for receive buffer. | ||
310 | * Uses GFP_ATOMIC allocations. | ||
311 | */ | ||
312 | void *netdev_alloc_frag(unsigned int fragsz) | ||
313 | { | 352 | { |
314 | struct netdev_alloc_cache *nc; | 353 | struct netdev_alloc_cache *nc; |
315 | void *data = NULL; | 354 | void *data = NULL; |
@@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz) | |||
319 | nc = &__get_cpu_var(netdev_alloc_cache); | 358 | nc = &__get_cpu_var(netdev_alloc_cache); |
320 | if (unlikely(!nc->page)) { | 359 | if (unlikely(!nc->page)) { |
321 | refill: | 360 | refill: |
322 | nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); | 361 | nc->page = alloc_page(gfp_mask); |
323 | if (unlikely(!nc->page)) | 362 | if (unlikely(!nc->page)) |
324 | goto end; | 363 | goto end; |
325 | recycle: | 364 | recycle: |
@@ -343,6 +382,18 @@ end: | |||
343 | local_irq_restore(flags); | 382 | local_irq_restore(flags); |
344 | return data; | 383 | return data; |
345 | } | 384 | } |
385 | |||
386 | /** | ||
387 | * netdev_alloc_frag - allocate a page fragment | ||
388 | * @fragsz: fragment size | ||
389 | * | ||
390 | * Allocates a frag from a page for receive buffer. | ||
391 | * Uses GFP_ATOMIC allocations. | ||
392 | */ | ||
393 | void *netdev_alloc_frag(unsigned int fragsz) | ||
394 | { | ||
395 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | ||
396 | } | ||
346 | EXPORT_SYMBOL(netdev_alloc_frag); | 397 | EXPORT_SYMBOL(netdev_alloc_frag); |
347 | 398 | ||
348 | /** | 399 | /** |
@@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
366 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 417 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
367 | 418 | ||
368 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { | 419 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { |
369 | void *data = netdev_alloc_frag(fragsz); | 420 | void *data; |
421 | |||
422 | if (sk_memalloc_socks()) | ||
423 | gfp_mask |= __GFP_MEMALLOC; | ||
424 | |||
425 | data = __netdev_alloc_frag(fragsz, gfp_mask); | ||
370 | 426 | ||
371 | if (likely(data)) { | 427 | if (likely(data)) { |
372 | skb = build_skb(data, fragsz); | 428 | skb = build_skb(data, fragsz); |
@@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
374 | put_page(virt_to_head_page(data)); | 430 | put_page(virt_to_head_page(data)); |
375 | } | 431 | } |
376 | } else { | 432 | } else { |
377 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); | 433 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, |
434 | SKB_ALLOC_RX, NUMA_NO_NODE); | ||
378 | } | 435 | } |
379 | if (likely(skb)) { | 436 | if (likely(skb)) { |
380 | skb_reserve(skb, NET_SKB_PAD); | 437 | skb_reserve(skb, NET_SKB_PAD); |
@@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
656 | #if IS_ENABLED(CONFIG_IP_VS) | 713 | #if IS_ENABLED(CONFIG_IP_VS) |
657 | new->ipvs_property = old->ipvs_property; | 714 | new->ipvs_property = old->ipvs_property; |
658 | #endif | 715 | #endif |
716 | new->pfmemalloc = old->pfmemalloc; | ||
659 | new->protocol = old->protocol; | 717 | new->protocol = old->protocol; |
660 | new->mark = old->mark; | 718 | new->mark = old->mark; |
661 | new->skb_iif = old->skb_iif; | 719 | new->skb_iif = old->skb_iif; |
@@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
814 | n->fclone = SKB_FCLONE_CLONE; | 872 | n->fclone = SKB_FCLONE_CLONE; |
815 | atomic_inc(fclone_ref); | 873 | atomic_inc(fclone_ref); |
816 | } else { | 874 | } else { |
875 | if (skb_pfmemalloc(skb)) | ||
876 | gfp_mask |= __GFP_MEMALLOC; | ||
877 | |||
817 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 878 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); |
818 | if (!n) | 879 | if (!n) |
819 | return NULL; | 880 | return NULL; |
@@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
850 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; | 911 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; |
851 | } | 912 | } |
852 | 913 | ||
914 | static inline int skb_alloc_rx_flag(const struct sk_buff *skb) | ||
915 | { | ||
916 | if (skb_pfmemalloc(skb)) | ||
917 | return SKB_ALLOC_RX; | ||
918 | return 0; | ||
919 | } | ||
920 | |||
853 | /** | 921 | /** |
854 | * skb_copy - create private copy of an sk_buff | 922 | * skb_copy - create private copy of an sk_buff |
855 | * @skb: buffer to copy | 923 | * @skb: buffer to copy |
@@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |||
871 | { | 939 | { |
872 | int headerlen = skb_headroom(skb); | 940 | int headerlen = skb_headroom(skb); |
873 | unsigned int size = skb_end_offset(skb) + skb->data_len; | 941 | unsigned int size = skb_end_offset(skb) + skb->data_len; |
874 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 942 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
943 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
875 | 944 | ||
876 | if (!n) | 945 | if (!n) |
877 | return NULL; | 946 | return NULL; |
@@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy); | |||
906 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) | 975 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) |
907 | { | 976 | { |
908 | unsigned int size = skb_headlen(skb) + headroom; | 977 | unsigned int size = skb_headlen(skb) + headroom; |
909 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 978 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
979 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
910 | 980 | ||
911 | if (!n) | 981 | if (!n) |
912 | goto out; | 982 | goto out; |
@@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |||
979 | 1049 | ||
980 | size = SKB_DATA_ALIGN(size); | 1050 | size = SKB_DATA_ALIGN(size); |
981 | 1051 | ||
982 | data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | 1052 | if (skb_pfmemalloc(skb)) |
983 | gfp_mask); | 1053 | gfp_mask |= __GFP_MEMALLOC; |
1054 | data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | ||
1055 | gfp_mask, NUMA_NO_NODE, NULL); | ||
984 | if (!data) | 1056 | if (!data) |
985 | goto nodata; | 1057 | goto nodata; |
986 | size = SKB_WITH_OVERHEAD(ksize(data)); | 1058 | size = SKB_WITH_OVERHEAD(ksize(data)); |
@@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | |||
1092 | /* | 1164 | /* |
1093 | * Allocate the copy buffer | 1165 | * Allocate the copy buffer |
1094 | */ | 1166 | */ |
1095 | struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, | 1167 | struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, |
1096 | gfp_mask); | 1168 | gfp_mask, skb_alloc_rx_flag(skb), |
1169 | NUMA_NO_NODE); | ||
1097 | int oldheadroom = skb_headroom(skb); | 1170 | int oldheadroom = skb_headroom(skb); |
1098 | int head_copy_len, head_copy_off; | 1171 | int head_copy_len, head_copy_off; |
1099 | int off; | 1172 | int off; |
@@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
2775 | skb_release_head_state(nskb); | 2848 | skb_release_head_state(nskb); |
2776 | __skb_push(nskb, doffset); | 2849 | __skb_push(nskb, doffset); |
2777 | } else { | 2850 | } else { |
2778 | nskb = alloc_skb(hsize + doffset + headroom, | 2851 | nskb = __alloc_skb(hsize + doffset + headroom, |
2779 | GFP_ATOMIC); | 2852 | GFP_ATOMIC, skb_alloc_rx_flag(skb), |
2853 | NUMA_NO_NODE); | ||
2780 | 2854 | ||
2781 | if (unlikely(!nskb)) | 2855 | if (unlikely(!nskb)) |
2782 | goto err; | 2856 | goto err; |
diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533e..6b654b3ddfda 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -142,7 +142,7 @@ | |||
142 | static DEFINE_MUTEX(proto_list_mutex); | 142 | static DEFINE_MUTEX(proto_list_mutex); |
143 | static LIST_HEAD(proto_list); | 143 | static LIST_HEAD(proto_list); |
144 | 144 | ||
145 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 145 | #ifdef CONFIG_MEMCG_KMEM |
146 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 146 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
147 | { | 147 | { |
148 | struct proto *proto; | 148 | struct proto *proto; |
@@ -271,6 +271,61 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | |||
271 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 271 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
272 | EXPORT_SYMBOL(sysctl_optmem_max); | 272 | EXPORT_SYMBOL(sysctl_optmem_max); |
273 | 273 | ||
274 | struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; | ||
275 | EXPORT_SYMBOL_GPL(memalloc_socks); | ||
276 | |||
277 | /** | ||
278 | * sk_set_memalloc - sets %SOCK_MEMALLOC | ||
279 | * @sk: socket to set it on | ||
280 | * | ||
281 | * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. | ||
282 | * It's the responsibility of the admin to adjust min_free_kbytes | ||
283 | * to meet the requirements | ||
284 | */ | ||
285 | void sk_set_memalloc(struct sock *sk) | ||
286 | { | ||
287 | sock_set_flag(sk, SOCK_MEMALLOC); | ||
288 | sk->sk_allocation |= __GFP_MEMALLOC; | ||
289 | static_key_slow_inc(&memalloc_socks); | ||
290 | } | ||
291 | EXPORT_SYMBOL_GPL(sk_set_memalloc); | ||
292 | |||
293 | void sk_clear_memalloc(struct sock *sk) | ||
294 | { | ||
295 | sock_reset_flag(sk, SOCK_MEMALLOC); | ||
296 | sk->sk_allocation &= ~__GFP_MEMALLOC; | ||
297 | static_key_slow_dec(&memalloc_socks); | ||
298 | |||
299 | /* | ||
300 | * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward | ||
301 | * progress of swapping. However, if SOCK_MEMALLOC is cleared while | ||
302 | * it has rmem allocations there is a risk that the user of the | ||
303 | * socket cannot make forward progress due to exceeding the rmem | ||
304 | * limits. By rights, sk_clear_memalloc() should only be called | ||
305 | * on sockets being torn down but warn and reset the accounting if | ||
306 | * that assumption breaks. | ||
307 | */ | ||
308 | if (WARN_ON(sk->sk_forward_alloc)) | ||
309 | sk_mem_reclaim(sk); | ||
310 | } | ||
311 | EXPORT_SYMBOL_GPL(sk_clear_memalloc); | ||
312 | |||
313 | int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) | ||
314 | { | ||
315 | int ret; | ||
316 | unsigned long pflags = current->flags; | ||
317 | |||
318 | /* these should have been dropped before queueing */ | ||
319 | BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); | ||
320 | |||
321 | current->flags |= PF_MEMALLOC; | ||
322 | ret = sk->sk_backlog_rcv(sk, skb); | ||
323 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | ||
324 | |||
325 | return ret; | ||
326 | } | ||
327 | EXPORT_SYMBOL(__sk_backlog_rcv); | ||
328 | |||
274 | #if defined(CONFIG_CGROUPS) | 329 | #if defined(CONFIG_CGROUPS) |
275 | #if !defined(CONFIG_NET_CLS_CGROUP) | 330 | #if !defined(CONFIG_NET_CLS_CGROUP) |
276 | int net_cls_subsys_id = -1; | 331 | int net_cls_subsys_id = -1; |
@@ -353,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
353 | if (err) | 408 | if (err) |
354 | return err; | 409 | return err; |
355 | 410 | ||
356 | if (!sk_rmem_schedule(sk, skb->truesize)) { | 411 | if (!sk_rmem_schedule(sk, skb, skb->truesize)) { |
357 | atomic_inc(&sk->sk_drops); | 412 | atomic_inc(&sk->sk_drops); |
358 | return -ENOBUFS; | 413 | return -ENOBUFS; |
359 | } | 414 | } |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ae2ccf2890e4..15ca63ec604e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | |||
49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o | 50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o |
51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o | 51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o |
52 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o | 52 | obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o |
53 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | 53 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o |
54 | 54 | ||
55 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 55 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4b6487a68279..1b5ce96707a3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
184 | int ret; | 184 | int ret; |
185 | unsigned long vec[3]; | 185 | unsigned long vec[3]; |
186 | struct net *net = current->nsproxy->net_ns; | 186 | struct net *net = current->nsproxy->net_ns; |
187 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 187 | #ifdef CONFIG_MEMCG_KMEM |
188 | struct mem_cgroup *memcg; | 188 | struct mem_cgroup *memcg; |
189 | #endif | 189 | #endif |
190 | 190 | ||
@@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
203 | if (ret) | 203 | if (ret) |
204 | return ret; | 204 | return ret; |
205 | 205 | ||
206 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 206 | #ifdef CONFIG_MEMCG_KMEM |
207 | rcu_read_lock(); | 207 | rcu_read_lock(); |
208 | memcg = mem_cgroup_from_task(current); | 208 | memcg = mem_cgroup_from_task(current); |
209 | 209 | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9be30b039ae3..2fd2bc9e3c64 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4351,19 +4351,20 @@ static void tcp_ofo_queue(struct sock *sk) | |||
4351 | static bool tcp_prune_ofo_queue(struct sock *sk); | 4351 | static bool tcp_prune_ofo_queue(struct sock *sk); |
4352 | static int tcp_prune_queue(struct sock *sk); | 4352 | static int tcp_prune_queue(struct sock *sk); |
4353 | 4353 | ||
4354 | static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | 4354 | static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, |
4355 | unsigned int size) | ||
4355 | { | 4356 | { |
4356 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 4357 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || |
4357 | !sk_rmem_schedule(sk, size)) { | 4358 | !sk_rmem_schedule(sk, skb, size)) { |
4358 | 4359 | ||
4359 | if (tcp_prune_queue(sk) < 0) | 4360 | if (tcp_prune_queue(sk) < 0) |
4360 | return -1; | 4361 | return -1; |
4361 | 4362 | ||
4362 | if (!sk_rmem_schedule(sk, size)) { | 4363 | if (!sk_rmem_schedule(sk, skb, size)) { |
4363 | if (!tcp_prune_ofo_queue(sk)) | 4364 | if (!tcp_prune_ofo_queue(sk)) |
4364 | return -1; | 4365 | return -1; |
4365 | 4366 | ||
4366 | if (!sk_rmem_schedule(sk, size)) | 4367 | if (!sk_rmem_schedule(sk, skb, size)) |
4367 | return -1; | 4368 | return -1; |
4368 | } | 4369 | } |
4369 | } | 4370 | } |
@@ -4418,7 +4419,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4418 | 4419 | ||
4419 | TCP_ECN_check_ce(tp, skb); | 4420 | TCP_ECN_check_ce(tp, skb); |
4420 | 4421 | ||
4421 | if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) { | 4422 | if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { |
4422 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); | 4423 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); |
4423 | __kfree_skb(skb); | 4424 | __kfree_skb(skb); |
4424 | return; | 4425 | return; |
@@ -4552,17 +4553,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
4552 | 4553 | ||
4553 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | 4554 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) |
4554 | { | 4555 | { |
4555 | struct sk_buff *skb; | 4556 | struct sk_buff *skb = NULL; |
4556 | struct tcphdr *th; | 4557 | struct tcphdr *th; |
4557 | bool fragstolen; | 4558 | bool fragstolen; |
4558 | 4559 | ||
4559 | if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) | ||
4560 | goto err; | ||
4561 | |||
4562 | skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); | 4560 | skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); |
4563 | if (!skb) | 4561 | if (!skb) |
4564 | goto err; | 4562 | goto err; |
4565 | 4563 | ||
4564 | if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th))) | ||
4565 | goto err_free; | ||
4566 | |||
4566 | th = (struct tcphdr *)skb_put(skb, sizeof(*th)); | 4567 | th = (struct tcphdr *)skb_put(skb, sizeof(*th)); |
4567 | skb_reset_transport_header(skb); | 4568 | skb_reset_transport_header(skb); |
4568 | memset(th, 0, sizeof(*th)); | 4569 | memset(th, 0, sizeof(*th)); |
@@ -4633,7 +4634,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4633 | if (eaten <= 0) { | 4634 | if (eaten <= 0) { |
4634 | queue_and_out: | 4635 | queue_and_out: |
4635 | if (eaten < 0 && | 4636 | if (eaten < 0 && |
4636 | tcp_try_rmem_schedule(sk, skb->truesize)) | 4637 | tcp_try_rmem_schedule(sk, skb, skb->truesize)) |
4637 | goto drop; | 4638 | goto drop; |
4638 | 4639 | ||
4639 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); | 4640 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7f91e5ac8277..42b2a6a73092 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -2633,7 +2633,7 @@ struct proto tcp_prot = { | |||
2633 | .compat_setsockopt = compat_tcp_setsockopt, | 2633 | .compat_setsockopt = compat_tcp_setsockopt, |
2634 | .compat_getsockopt = compat_tcp_getsockopt, | 2634 | .compat_getsockopt = compat_tcp_getsockopt, |
2635 | #endif | 2635 | #endif |
2636 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 2636 | #ifdef CONFIG_MEMCG_KMEM |
2637 | .init_cgroup = tcp_init_cgroup, | 2637 | .init_cgroup = tcp_init_cgroup, |
2638 | .destroy_cgroup = tcp_destroy_cgroup, | 2638 | .destroy_cgroup = tcp_destroy_cgroup, |
2639 | .proto_cgroup = tcp_proto_cgroup, | 2639 | .proto_cgroup = tcp_proto_cgroup, |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 33cd065cfbd8..3f1bcff0b10b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -2045,7 +2045,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, | |||
2045 | if (unlikely(sk->sk_state == TCP_CLOSE)) | 2045 | if (unlikely(sk->sk_state == TCP_CLOSE)) |
2046 | return; | 2046 | return; |
2047 | 2047 | ||
2048 | if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) | 2048 | if (tcp_write_xmit(sk, cur_mss, nonagle, 0, |
2049 | sk_gfp_atomic(sk, GFP_ATOMIC))) | ||
2049 | tcp_check_probe_timer(sk); | 2050 | tcp_check_probe_timer(sk); |
2050 | } | 2051 | } |
2051 | 2052 | ||
@@ -2666,7 +2667,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2666 | 2667 | ||
2667 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) | 2668 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) |
2668 | s_data_desired = cvp->s_data_desired; | 2669 | s_data_desired = cvp->s_data_desired; |
2669 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); | 2670 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, |
2671 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
2670 | if (unlikely(!skb)) { | 2672 | if (unlikely(!skb)) { |
2671 | dst_release(dst); | 2673 | dst_release(dst); |
2672 | return NULL; | 2674 | return NULL; |
@@ -3064,7 +3066,7 @@ void tcp_send_ack(struct sock *sk) | |||
3064 | * tcp_transmit_skb() will set the ownership to this | 3066 | * tcp_transmit_skb() will set the ownership to this |
3065 | * sock. | 3067 | * sock. |
3066 | */ | 3068 | */ |
3067 | buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); | 3069 | buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); |
3068 | if (buff == NULL) { | 3070 | if (buff == NULL) { |
3069 | inet_csk_schedule_ack(sk); | 3071 | inet_csk_schedule_ack(sk); |
3070 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | 3072 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; |
@@ -3079,7 +3081,7 @@ void tcp_send_ack(struct sock *sk) | |||
3079 | 3081 | ||
3080 | /* Send it off, this clears delayed acks for us. */ | 3082 | /* Send it off, this clears delayed acks for us. */ |
3081 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 3083 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
3082 | tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); | 3084 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |
3083 | } | 3085 | } |
3084 | 3086 | ||
3085 | /* This routine sends a packet with an out of date sequence | 3087 | /* This routine sends a packet with an out of date sequence |
@@ -3099,7 +3101,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
3099 | struct sk_buff *skb; | 3101 | struct sk_buff *skb; |
3100 | 3102 | ||
3101 | /* We don't queue it, tcp_transmit_skb() sets ownership. */ | 3103 | /* We don't queue it, tcp_transmit_skb() sets ownership. */ |
3102 | skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); | 3104 | skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); |
3103 | if (skb == NULL) | 3105 | if (skb == NULL) |
3104 | return -1; | 3106 | return -1; |
3105 | 3107 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 221224e72507..c66b90f71c9b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1299,7 +1299,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1299 | /* Clone pktoptions received with SYN */ | 1299 | /* Clone pktoptions received with SYN */ |
1300 | newnp->pktoptions = NULL; | 1300 | newnp->pktoptions = NULL; |
1301 | if (treq->pktopts != NULL) { | 1301 | if (treq->pktopts != NULL) { |
1302 | newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); | 1302 | newnp->pktoptions = skb_clone(treq->pktopts, |
1303 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
1303 | consume_skb(treq->pktopts); | 1304 | consume_skb(treq->pktopts); |
1304 | treq->pktopts = NULL; | 1305 | treq->pktopts = NULL; |
1305 | if (newnp->pktoptions) | 1306 | if (newnp->pktoptions) |
@@ -1349,7 +1350,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1349 | * across. Shucks. | 1350 | * across. Shucks. |
1350 | */ | 1351 | */ |
1351 | tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, | 1352 | tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, |
1352 | AF_INET6, key->key, key->keylen, GFP_ATOMIC); | 1353 | AF_INET6, key->key, key->keylen, |
1354 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
1353 | } | 1355 | } |
1354 | #endif | 1356 | #endif |
1355 | 1357 | ||
@@ -1442,7 +1444,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
1442 | --ANK (980728) | 1444 | --ANK (980728) |
1443 | */ | 1445 | */ |
1444 | if (np->rxopt.all) | 1446 | if (np->rxopt.all) |
1445 | opt_skb = skb_clone(skb, GFP_ATOMIC); | 1447 | opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); |
1446 | 1448 | ||
1447 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1449 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
1448 | sock_rps_save_rxhash(sk, skb); | 1450 | sock_rps_save_rxhash(sk, skb); |
@@ -2015,7 +2017,7 @@ struct proto tcpv6_prot = { | |||
2015 | .compat_setsockopt = compat_tcp_setsockopt, | 2017 | .compat_setsockopt = compat_tcp_setsockopt, |
2016 | .compat_getsockopt = compat_tcp_getsockopt, | 2018 | .compat_getsockopt = compat_tcp_getsockopt, |
2017 | #endif | 2019 | #endif |
2018 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 2020 | #ifdef CONFIG_MEMCG_KMEM |
2019 | .proto_cgroup = tcp_proto_cgroup, | 2021 | .proto_cgroup = tcp_proto_cgroup, |
2020 | #endif | 2022 | #endif |
2021 | }; | 2023 | }; |
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 33d894776192..10c018a5b9fe 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c | |||
@@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, | |||
702 | if (rx_count >= asoc->base.sk->sk_rcvbuf) { | 702 | if (rx_count >= asoc->base.sk->sk_rcvbuf) { |
703 | 703 | ||
704 | if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || | 704 | if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || |
705 | (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) | 705 | (!sk_rmem_schedule(asoc->base.sk, chunk->skb, |
706 | chunk->skb->truesize))) | ||
706 | goto fail; | 707 | goto fail; |
707 | } | 708 | } |
708 | 709 | ||
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 9fe8857d8d59..03d03e37a7d5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig | |||
@@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA | |||
21 | 21 | ||
22 | If unsure, say N. | 22 | If unsure, say N. |
23 | 23 | ||
24 | config SUNRPC_SWAP | ||
25 | bool | ||
26 | depends on SUNRPC | ||
27 | select NETVM | ||
28 | |||
24 | config RPCSEC_GSS_KRB5 | 29 | config RPCSEC_GSS_KRB5 |
25 | tristate "Secure RPC: Kerberos V mechanism" | 30 | tristate "Secure RPC: Kerberos V mechanism" |
26 | depends on SUNRPC && CRYPTO | 31 | depends on SUNRPC && CRYPTO |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b05df36692ff..fa48c60aef23 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) | |||
717 | atomic_inc(&clnt->cl_count); | 717 | atomic_inc(&clnt->cl_count); |
718 | if (clnt->cl_softrtry) | 718 | if (clnt->cl_softrtry) |
719 | task->tk_flags |= RPC_TASK_SOFT; | 719 | task->tk_flags |= RPC_TASK_SOFT; |
720 | if (sk_memalloc_socks()) { | ||
721 | struct rpc_xprt *xprt; | ||
722 | |||
723 | rcu_read_lock(); | ||
724 | xprt = rcu_dereference(clnt->cl_xprt); | ||
725 | if (xprt->swapper) | ||
726 | task->tk_flags |= RPC_TASK_SWAPPER; | ||
727 | rcu_read_unlock(); | ||
728 | } | ||
720 | /* Add to the client's list of all tasks */ | 729 | /* Add to the client's list of all tasks */ |
721 | spin_lock(&clnt->cl_lock); | 730 | spin_lock(&clnt->cl_lock); |
722 | list_add_tail(&task->tk_task, &clnt->cl_tasks); | 731 | list_add_tail(&task->tk_task, &clnt->cl_tasks); |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 1f19aa15f89b..128494ec9a64 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -815,7 +815,10 @@ static void rpc_async_schedule(struct work_struct *work) | |||
815 | void *rpc_malloc(struct rpc_task *task, size_t size) | 815 | void *rpc_malloc(struct rpc_task *task, size_t size) |
816 | { | 816 | { |
817 | struct rpc_buffer *buf; | 817 | struct rpc_buffer *buf; |
818 | gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; | 818 | gfp_t gfp = GFP_NOWAIT; |
819 | |||
820 | if (RPC_IS_SWAPPER(task)) | ||
821 | gfp |= __GFP_MEMALLOC; | ||
819 | 822 | ||
820 | size += sizeof(struct rpc_buffer); | 823 | size += sizeof(struct rpc_buffer); |
821 | if (size <= RPC_BUFFER_MAXSIZE) | 824 | if (size <= RPC_BUFFER_MAXSIZE) |
@@ -889,7 +892,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta | |||
889 | static struct rpc_task * | 892 | static struct rpc_task * |
890 | rpc_alloc_task(void) | 893 | rpc_alloc_task(void) |
891 | { | 894 | { |
892 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); | 895 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); |
893 | } | 896 | } |
894 | 897 | ||
895 | /* | 898 | /* |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 926679459e71..400567243f84 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -1930,6 +1930,45 @@ out: | |||
1930 | current->flags &= ~PF_FSTRANS; | 1930 | current->flags &= ~PF_FSTRANS; |
1931 | } | 1931 | } |
1932 | 1932 | ||
1933 | #ifdef CONFIG_SUNRPC_SWAP | ||
1934 | static void xs_set_memalloc(struct rpc_xprt *xprt) | ||
1935 | { | ||
1936 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, | ||
1937 | xprt); | ||
1938 | |||
1939 | if (xprt->swapper) | ||
1940 | sk_set_memalloc(transport->inet); | ||
1941 | } | ||
1942 | |||
1943 | /** | ||
1944 | * xs_swapper - Tag this transport as being used for swap. | ||
1945 | * @xprt: transport to tag | ||
1946 | * @enable: enable/disable | ||
1947 | * | ||
1948 | */ | ||
1949 | int xs_swapper(struct rpc_xprt *xprt, int enable) | ||
1950 | { | ||
1951 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, | ||
1952 | xprt); | ||
1953 | int err = 0; | ||
1954 | |||
1955 | if (enable) { | ||
1956 | xprt->swapper++; | ||
1957 | xs_set_memalloc(xprt); | ||
1958 | } else if (xprt->swapper) { | ||
1959 | xprt->swapper--; | ||
1960 | sk_clear_memalloc(transport->inet); | ||
1961 | } | ||
1962 | |||
1963 | return err; | ||
1964 | } | ||
1965 | EXPORT_SYMBOL_GPL(xs_swapper); | ||
1966 | #else | ||
1967 | static void xs_set_memalloc(struct rpc_xprt *xprt) | ||
1968 | { | ||
1969 | } | ||
1970 | #endif | ||
1971 | |||
1933 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | 1972 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
1934 | { | 1973 | { |
1935 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | 1974 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
@@ -1954,6 +1993,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
1954 | transport->sock = sock; | 1993 | transport->sock = sock; |
1955 | transport->inet = sk; | 1994 | transport->inet = sk; |
1956 | 1995 | ||
1996 | xs_set_memalloc(xprt); | ||
1997 | |||
1957 | write_unlock_bh(&sk->sk_callback_lock); | 1998 | write_unlock_bh(&sk->sk_callback_lock); |
1958 | } | 1999 | } |
1959 | xs_udp_do_set_buffer_size(xprt); | 2000 | xs_udp_do_set_buffer_size(xprt); |
@@ -2081,6 +2122,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
2081 | if (!xprt_bound(xprt)) | 2122 | if (!xprt_bound(xprt)) |
2082 | goto out; | 2123 | goto out; |
2083 | 2124 | ||
2125 | xs_set_memalloc(xprt); | ||
2126 | |||
2084 | /* Tell the socket layer to start connecting... */ | 2127 | /* Tell the socket layer to start connecting... */ |
2085 | xprt->stat.connect_count++; | 2128 | xprt->stat.connect_count++; |
2086 | xprt->stat.connect_start = jiffies; | 2129 | xprt->stat.connect_start = jiffies; |