summaryrefslogtreecommitdiffstats
path: root/include/linux/mmzone.h
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2016-05-19 20:13:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 22:12:14 -0400
commit682a3385e7734fa3abbd504cbeb5fe91793f1827 (patch)
tree1371b57acda5bb81e15bed95d82d155dae00bf65 /include/linux/mmzone.h
parent060e74173f292fb3e0398b3dca8765568d195ff1 (diff)
mm, page_alloc: inline the fast path of the zonelist iterator
The page allocator iterates through a zonelist for zones that match the addressing limitations and nodemask of the caller but many allocations will not be restricted. Despite this, there is always functional call overhead which builds up. This patch inlines the optimistic basic case and only calls the iterator function for the complex case. A hindrance was the fact that cpuset_current_mems_allowed is used in the fastpath as the allowed nodemask even though all nodes are allowed on most systems. The patch handles this by only considering cpuset_current_mems_allowed if a cpuset exists. As well as being faster in the fast-path, this removes some junk in the slowpath. The performance difference on a page allocator microbenchmark is; 4.6.0-rc2 4.6.0-rc2 statinline-v1r20 optiter-v1r20 Min alloc-odr0-1 412.00 ( 0.00%) 382.00 ( 7.28%) Min alloc-odr0-2 301.00 ( 0.00%) 282.00 ( 6.31%) Min alloc-odr0-4 247.00 ( 0.00%) 233.00 ( 5.67%) Min alloc-odr0-8 215.00 ( 0.00%) 203.00 ( 5.58%) Min alloc-odr0-16 199.00 ( 0.00%) 188.00 ( 5.53%) Min alloc-odr0-32 191.00 ( 0.00%) 182.00 ( 4.71%) Min alloc-odr0-64 187.00 ( 0.00%) 177.00 ( 5.35%) Min alloc-odr0-128 185.00 ( 0.00%) 175.00 ( 5.41%) Min alloc-odr0-256 193.00 ( 0.00%) 184.00 ( 4.66%) Min alloc-odr0-512 207.00 ( 0.00%) 197.00 ( 4.83%) Min alloc-odr0-1024 213.00 ( 0.00%) 203.00 ( 4.69%) Min alloc-odr0-2048 220.00 ( 0.00%) 209.00 ( 5.00%) Min alloc-odr0-4096 226.00 ( 0.00%) 214.00 ( 5.31%) Min alloc-odr0-8192 229.00 ( 0.00%) 218.00 ( 4.80%) Min alloc-odr0-16384 229.00 ( 0.00%) 219.00 ( 4.37%) perf indicated that next_zones_zonelist disappeared in the profile and __next_zones_zonelist did not appear. This is expected as the micro-benchmark would hit the inlined fast-path every time. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/mmzone.h')
-rw-r--r--include/linux/mmzone.h13
1 files changed, 11 insertions, 2 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 150c6049f961..cfcd7723edb6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -919,6 +919,10 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
919#endif /* CONFIG_NUMA */ 919#endif /* CONFIG_NUMA */
920} 920}
921 921
922struct zoneref *__next_zones_zonelist(struct zoneref *z,
923 enum zone_type highest_zoneidx,
924 nodemask_t *nodes);
925
922/** 926/**
923 * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point 927 * next_zones_zonelist - Returns the next zone at or below highest_zoneidx within the allowed nodemask using a cursor within a zonelist as a starting point
924 * @z - The cursor used as a starting point for the search 928 * @z - The cursor used as a starting point for the search
@@ -931,9 +935,14 @@ static inline int zonelist_node_idx(struct zoneref *zoneref)
931 * being examined. It should be advanced by one before calling 935 * being examined. It should be advanced by one before calling
932 * next_zones_zonelist again. 936 * next_zones_zonelist again.
933 */ 937 */
934struct zoneref *next_zones_zonelist(struct zoneref *z, 938static __always_inline struct zoneref *next_zones_zonelist(struct zoneref *z,
935 enum zone_type highest_zoneidx, 939 enum zone_type highest_zoneidx,
936 nodemask_t *nodes); 940 nodemask_t *nodes)
941{
942 if (likely(!nodes && zonelist_zone_idx(z) <= highest_zoneidx))
943 return z;
944 return __next_zones_zonelist(z, highest_zoneidx, nodes);
945}
937 946
938/** 947/**
939 * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist 948 * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist