aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2015-02-11 18:25:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 20:06:02 -0500
commit753791910e23a95aade78f69e49713acddf8bb8c (patch)
treeb20876b979ddac75dda3b780c073143ceefe5dc1 /mm/page_alloc.c
parent4ecf886045152d2ddf98ae74e39f789868ac1f98 (diff)
mm: set page->pfmemalloc in prep_new_page()
The possibility of replacing the numerous parameters of alloc_pages* functions with a single structure has been discussed when Minchan proposed to expand the x86 kernel stack [1]. This series implements the change, along with few more cleanups/microoptimizations. The series is based on next-20150108 and I used gcc 4.8.3 20140627 on openSUSE 13.2 for compiling. Config includess NUMA and COMPACTION. The core change is the introduction of a new struct alloc_context, which looks like this: struct alloc_context { struct zonelist *zonelist; nodemask_t *nodemask; struct zone *preferred_zone; int classzone_idx; int migratetype; enum zone_type high_zoneidx; }; All the contents is mostly constant, except that __alloc_pages_slowpath() changes preferred_zone, classzone_idx and potentially zonelist. But that's not a problem in case control returns to retry_cpuset: in __alloc_pages_nodemask(), those will be reset to initial values again (although it's a bit subtle). On the other hand, gfp_flags and alloc_info mutate so much that it doesn't make sense to put them into alloc_context. Still, the result is one parameter instead of up to 7. This is all in Patch 2. Patch 3 is a step to expand alloc_context usage out of page_alloc.c itself. The function try_to_compact_pages() can also much benefit from the parameter reduction, but it means the struct definition has to be moved to a shared header. Patch 1 should IMHO be included even if the rest is deemed not useful enough. It improves maintainability and also has some code/stack reduction. Patch 4 is OTOH a tiny optimization. Overall bloat-o-meter results: add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-460 (-460) function old new delta nr_free_zone_pages 129 115 -14 __alloc_pages_direct_compact 329 256 -73 get_page_from_freelist 2670 2576 -94 __alloc_pages_nodemask 2564 2285 -279 try_to_compact_pages 582 579 -3 Overall stack sizes per ./scripts/checkstack.pl: old new delta get_page_from_freelist: 184 184 0 __alloc_pages_nodemask 248 200 -48 __alloc_pages_direct_c 40 - -40 try_to_compact_pages 72 72 0 -88 [1] http://marc.info/?l=linux-mm&m=140142462528257&w=2 This patch (of 4): prep_new_page() sets almost everything in the struct page of the page being allocated, except page->pfmemalloc. This is not obvious and has at least once led to a bug where page->pfmemalloc was forgotten to be set correctly, see commit 8fb74b9fb2b1 ("mm: compaction: partially revert capture of suitable high-order page"). This patch moves the pfmemalloc setting to prep_new_page(), which means it needs to gain alloc_flags parameter. The call to prep_new_page is moved from buffered_rmqueue() to get_page_from_freelist(), which also leads to simpler code. An obsolete comment for buffered_rmqueue() is replaced. In addition to better maintainability there is a small reduction of code and stack usage for get_page_from_freelist(), which inlines the other functions involved. add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-145 (-145) function old new delta get_page_from_freelist 2670 2525 -145 Stack usage is reduced from 184 to 168 bytes. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Mel Gorman <mgorman@suse.de> Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Cc: Minchan Kim <minchan@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c37
1 files changed, 16 insertions, 21 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a88cb0cbf352..30a3250c0a21 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -970,7 +970,8 @@ static inline int check_new_page(struct page *page)
970 return 0; 970 return 0;
971} 971}
972 972
973static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags) 973static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
974 int alloc_flags)
974{ 975{
975 int i; 976 int i;
976 977
@@ -994,6 +995,14 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
994 995
995 set_page_owner(page, order, gfp_flags); 996 set_page_owner(page, order, gfp_flags);
996 997
998 /*
999 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to
1000 * allocate the page. The expectation is that the caller is taking
1001 * steps that will free more memory. The caller should avoid the page
1002 * being used for !PFMEMALLOC purposes.
1003 */
1004 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
1005
997 return 0; 1006 return 0;
998} 1007}
999 1008
@@ -1642,9 +1651,7 @@ int split_free_page(struct page *page)
1642} 1651}
1643 1652
1644/* 1653/*
1645 * Really, prep_compound_page() should be called from __rmqueue_bulk(). But 1654 * Allocate a page from the given zone. Use pcplists for order-0 allocations.
1646 * we cheat by calling it from here, in the order > 0 path. Saves a branch
1647 * or two.
1648 */ 1655 */
1649static inline 1656static inline
1650struct page *buffered_rmqueue(struct zone *preferred_zone, 1657struct page *buffered_rmqueue(struct zone *preferred_zone,
@@ -1655,7 +1662,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
1655 struct page *page; 1662 struct page *page;
1656 bool cold = ((gfp_flags & __GFP_COLD) != 0); 1663 bool cold = ((gfp_flags & __GFP_COLD) != 0);
1657 1664
1658again:
1659 if (likely(order == 0)) { 1665 if (likely(order == 0)) {
1660 struct per_cpu_pages *pcp; 1666 struct per_cpu_pages *pcp;
1661 struct list_head *list; 1667 struct list_head *list;
@@ -1711,8 +1717,6 @@ again:
1711 local_irq_restore(flags); 1717 local_irq_restore(flags);
1712 1718
1713 VM_BUG_ON_PAGE(bad_range(zone, page), page); 1719 VM_BUG_ON_PAGE(bad_range(zone, page), page);
1714 if (prep_new_page(page, order, gfp_flags))
1715 goto again;
1716 return page; 1720 return page;
1717 1721
1718failed: 1722failed:
@@ -2177,25 +2181,16 @@ zonelist_scan:
2177try_this_zone: 2181try_this_zone:
2178 page = buffered_rmqueue(preferred_zone, zone, order, 2182 page = buffered_rmqueue(preferred_zone, zone, order,
2179 gfp_mask, migratetype); 2183 gfp_mask, migratetype);
2180 if (page) 2184 if (page) {
2181 break; 2185 if (prep_new_page(page, order, gfp_mask, alloc_flags))
2186 goto try_this_zone;
2187 return page;
2188 }
2182this_zone_full: 2189this_zone_full:
2183 if (IS_ENABLED(CONFIG_NUMA) && zlc_active) 2190 if (IS_ENABLED(CONFIG_NUMA) && zlc_active)
2184 zlc_mark_zone_full(zonelist, z); 2191 zlc_mark_zone_full(zonelist, z);
2185 } 2192 }
2186 2193
2187 if (page) {
2188 /*
2189 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
2190 * necessary to allocate the page. The expectation is
2191 * that the caller is taking steps that will free more
2192 * memory. The caller should avoid the page being used
2193 * for !PFMEMALLOC purposes.
2194 */
2195 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
2196 return page;
2197 }
2198
2199 /* 2194 /*
2200 * The first pass makes sure allocations are spread fairly within the 2195 * The first pass makes sure allocations are spread fairly within the
2201 * local node. However, the local node might have free pages left 2196 * local node. However, the local node might have free pages left