aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2015-08-21 17:11:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-21 17:30:10 -0400
commit2f064f3485cd29633ad1b3cfb00cc519509a3d72 (patch)
tree1581de52d696908aea01910e2a25b6725f4ccaa2 /include
parente45fc85a2f371f388ff3804271375a1aedbe3744 (diff)
mm: make page pfmemalloc check more robust
Commit c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb") added checks for page->pfmemalloc to __skb_fill_page_desc(): if (page->pfmemalloc && !page->mapping) skb->pfmemalloc = true; It assumes page->mapping == NULL implies that page->pfmemalloc can be trusted. However, __delete_from_page_cache() can set set page->mapping to NULL and leave page->index value alone. Due to being in union, a non-zero page->index will be interpreted as true page->pfmemalloc. So the assumption is invalid if the networking code can see such a page. And it seems it can. We have encountered this with a NFS over loopback setup when such a page is attached to a new skbuf. There is no copying going on in this case so the page confuses __skb_fill_page_desc which interprets the index as pfmemalloc flag and the network stack drops packets that have been allocated using the reserves unless they are to be queued on sockets handling the swapping which is the case here and that leads to hangs when the nfs client waits for a response from the server which has been dropped and thus never arrive. The struct page is already heavily packed so rather than finding another hole to put it in, let's do a trick instead. We can reuse the index again but define it to an impossible value (-1UL). This is the page index so it should never see the value that large. Replace all direct users of page->pfmemalloc by page_is_pfmemalloc which will hide this nastiness from unspoiled eyes. The information will get lost if somebody wants to use page->index obviously but that was the case before and the original code expected that the information should be persisted somewhere else if that is really needed (e.g. what SLAB and SLUB do). [akpm@linux-foundation.org: fix blooper in slub] Fixes: c48a11c7ad26 ("netvm: propagate page->pfmemalloc to skb") Signed-off-by: Michal Hocko <mhocko@suse.com> Debugged-by: Vlastimil Babka <vbabka@suse.com> Debugged-by: Jiri Bohac <jbohac@suse.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: David Miller <davem@davemloft.net> Acked-by: Mel Gorman <mgorman@suse.de> Cc: <stable@vger.kernel.org> [3.6+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mm.h28
-rw-r--r--include/linux/mm_types.h9
-rw-r--r--include/linux/skbuff.h14
3 files changed, 33 insertions, 18 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2e872f92dbac..bf6f117fcf4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1003,6 +1003,34 @@ static inline int page_mapped(struct page *page)
1003} 1003}
1004 1004
1005/* 1005/*
1006 * Return true only if the page has been allocated with
1007 * ALLOC_NO_WATERMARKS and the low watermark was not
1008 * met implying that the system is under some pressure.
1009 */
1010static inline bool page_is_pfmemalloc(struct page *page)
1011{
1012 /*
1013 * Page index cannot be this large so this must be
1014 * a pfmemalloc page.
1015 */
1016 return page->index == -1UL;
1017}
1018
1019/*
1020 * Only to be called by the page allocator on a freshly allocated
1021 * page.
1022 */
1023static inline void set_page_pfmemalloc(struct page *page)
1024{
1025 page->index = -1UL;
1026}
1027
1028static inline void clear_page_pfmemalloc(struct page *page)
1029{
1030 page->index = 0;
1031}
1032
1033/*
1006 * Different kinds of faults, as returned by handle_mm_fault(). 1034 * Different kinds of faults, as returned by handle_mm_fault().
1007 * Used to decide whether a process gets delivered SIGBUS or 1035 * Used to decide whether a process gets delivered SIGBUS or
1008 * just gets major/minor fault counters bumped up. 1036 * just gets major/minor fault counters bumped up.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0038ac7466fd..15549578d559 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -63,15 +63,6 @@ struct page {
63 union { 63 union {
64 pgoff_t index; /* Our offset within mapping. */ 64 pgoff_t index; /* Our offset within mapping. */
65 void *freelist; /* sl[aou]b first free object */ 65 void *freelist; /* sl[aou]b first free object */
66 bool pfmemalloc; /* If set by the page allocator,
67 * ALLOC_NO_WATERMARKS was set
68 * and the low watermark was not
69 * met implying that the system
70 * is under some pressure. The
71 * caller should try ensure
72 * this page is only used to
73 * free other pages.
74 */
75 }; 66 };
76 67
77 union { 68 union {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 22b6d9ca1654..9b88536487e6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1602,20 +1602,16 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
1602 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 1602 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1603 1603
1604 /* 1604 /*
1605 * Propagate page->pfmemalloc to the skb if we can. The problem is 1605 * Propagate page pfmemalloc to the skb if we can. The problem is
1606 * that not all callers have unique ownership of the page. If 1606 * that not all callers have unique ownership of the page but rely
1607 * pfmemalloc is set, we check the mapping as a mapping implies 1607 * on page_is_pfmemalloc doing the right thing(tm).
1608 * page->index is set (index and pfmemalloc share space).
1609 * If it's a valid mapping, we cannot use page->pfmemalloc but we
1610 * do not lose pfmemalloc information as the pages would not be
1611 * allocated using __GFP_MEMALLOC.
1612 */ 1608 */
1613 frag->page.p = page; 1609 frag->page.p = page;
1614 frag->page_offset = off; 1610 frag->page_offset = off;
1615 skb_frag_size_set(frag, size); 1611 skb_frag_size_set(frag, size);
1616 1612
1617 page = compound_head(page); 1613 page = compound_head(page);
1618 if (page->pfmemalloc && !page->mapping) 1614 if (page_is_pfmemalloc(page))
1619 skb->pfmemalloc = true; 1615 skb->pfmemalloc = true;
1620} 1616}
1621 1617
@@ -2263,7 +2259,7 @@ static inline struct page *dev_alloc_page(void)
2263static inline void skb_propagate_pfmemalloc(struct page *page, 2259static inline void skb_propagate_pfmemalloc(struct page *page,
2264 struct sk_buff *skb) 2260 struct sk_buff *skb)
2265{ 2261{
2266 if (page && page->pfmemalloc) 2262 if (page_is_pfmemalloc(page))
2267 skb->pfmemalloc = true; 2263 skb->pfmemalloc = true;
2268} 2264}
2269 2265