summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2015-08-21 17:11:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-21 17:30:10 -0400
commit2f064f3485cd29633ad1b3cfb00cc519509a3d72 (patch)
tree1581de52d696908aea01910e2a25b6725f4ccaa2 /mm/page_alloc.c
parente45fc85a2f371f388ff3804271375a1aedbe3744 (diff)
mm: make page pfmemalloc check more robust
Commit c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb") added checks for page->pfmemalloc to __skb_fill_page_desc(): if (page->pfmemalloc && !page->mapping) skb->pfmemalloc = true; It assumes page->mapping == NULL implies that page->pfmemalloc can be trusted. However, __delete_from_page_cache() can set set page->mapping to NULL and leave page->index value alone. Due to being in union, a non-zero page->index will be interpreted as true page->pfmemalloc. So the assumption is invalid if the networking code can see such a page. And it seems it can. We have encountered this with a NFS over loopback setup when such a page is attached to a new skbuf. There is no copying going on in this case so the page confuses __skb_fill_page_desc which interprets the index as pfmemalloc flag and the network stack drops packets that have been allocated using the reserves unless they are to be queued on sockets handling the swapping which is the case here and that leads to hangs when the nfs client waits for a response from the server which has been dropped and thus never arrive. The struct page is already heavily packed so rather than finding another hole to put it in, let's do a trick instead. We can reuse the index again but define it to an impossible value (-1UL). This is the page index so it should never see the value that large. Replace all direct users of page->pfmemalloc by page_is_pfmemalloc which will hide this nastiness from unspoiled eyes. The information will get lost if somebody wants to use page->index obviously but that was the case before and the original code expected that the information should be persisted somewhere else if that is really needed (e.g. what SLAB and SLUB do). [akpm@linux-foundation.org: fix blooper in slub] Fixes: c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb") Signed-off-by: Michal Hocko <mhocko@suse.com> Debugged-by: Vlastimil Babka <vbabka@suse.com> Debugged-by: Jiri Bohac <jbohac@suse.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: David Miller <davem@davemloft.net> Acked-by: Mel Gorman <mgorman@suse.de> Cc: <stable@vger.kernel.org> [3.6+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c9
1 files changed, 6 insertions, 3 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df959b7d6085..5b5240b7f642 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1343,12 +1343,15 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
1343 set_page_owner(page, order, gfp_flags); 1343 set_page_owner(page, order, gfp_flags);
1344 1344
1345 /* 1345 /*
1346 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to 1346 * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
1347 * allocate the page. The expectation is that the caller is taking 1347 * allocate the page. The expectation is that the caller is taking
1348 * steps that will free more memory. The caller should avoid the page 1348 * steps that will free more memory. The caller should avoid the page
1349 * being used for !PFMEMALLOC purposes. 1349 * being used for !PFMEMALLOC purposes.
1350 */ 1350 */
1351 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); 1351 if (alloc_flags & ALLOC_NO_WATERMARKS)
1352 set_page_pfmemalloc(page);
1353 else
1354 clear_page_pfmemalloc(page);
1352 1355
1353 return 0; 1356 return 0;
1354} 1357}
@@ -3345,7 +3348,7 @@ refill:
3345 atomic_add(size - 1, &page->_count); 3348 atomic_add(size - 1, &page->_count);
3346 3349
3347 /* reset page count bias and offset to start of new frag */ 3350 /* reset page count bias and offset to start of new frag */
3348 nc->pfmemalloc = page->pfmemalloc; 3351 nc->pfmemalloc = page_is_pfmemalloc(page);
3349 nc->pagecnt_bias = size; 3352 nc->pagecnt_bias = size;
3350 nc->offset = size; 3353 nc->offset = size;
3351 } 3354 }