page allocator: use allocation flags as an index to the zone watermark

ALLOC_WMARK_MIN, ALLOC_WMARK_LOW and ALLOC_WMARK_HIGH determin whether pages_min, pages_low or pages_high is used as the zone watermark when allocating the pages. Two branches in the allocator hotpath determine which watermark to use. This patch uses the flags as an array index into a watermark array that is indexed with WMARK_* defines accessed via helpers. All call sites that use zone->pages_* are updated to use the helpers for accessing the values and the array offsets for setting. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mel@csn.ul.ie> 2009-06-16 18:32:12 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-06-16 22:47:35 -0400
commit: 418589663d6011de9006425b6c5721e1544fb47a (patch)
tree: ef37fb026d3e38191d6b5c99bc95c190fa98d0fb /mm
parent: a3af9c389a7f3e675313f442fdd8c247c1cdb66b (diff)
3 files changed, 50 insertions, 46 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8485735fc690..abe26003124d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1150,10 +1150,15 @@ failed:
        return NULL;
 }
-#define ALLOC_NO_WATERMARKS     0x01 /* don't check watermarks at all */
+/* The ALLOC_WMARK bits are used as an index to zone->watermark */
-#define ALLOC_WMARK_MIN         0x02 /* use pages_min watermark */
+#define ALLOC_WMARK_MIN         WMARK_MIN
-#define ALLOC_WMARK_LOW         0x04 /* use pages_low watermark */
+#define ALLOC_WMARK_LOW         WMARK_LOW
-#define ALLOC_WMARK_HIGH        0x08 /* use pages_high watermark */
+#define ALLOC_WMARK_HIGH        WMARK_HIGH
+#define ALLOC_NO_WATERMARKS     0x04 /* don't check watermarks at all */
+/* Mask to get the watermark bits */
+#define ALLOC_WMARK_MASK        (ALLOC_NO_WATERMARKS-1)
 #define ALLOC_HARDER            0x10 /* try to alloc harder */
 #define ALLOC_HIGH              0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET            0x40 /* check for correct cpuset */
@@ -1440,14 +1445,10 @@ zonelist_scan:
                        !cpuset_zone_allowed_softwall(zone, gfp_mask))
                                goto try_next_zone;
+                BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
                if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
                        unsigned long mark;
-                        if (alloc_flags & ALLOC_WMARK_MIN)
+                        mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
-                                mark = zone->pages_min;
-                        else if (alloc_flags & ALLOC_WMARK_LOW)
-                                mark = zone->pages_low;
-                        else
-                                mark = zone->pages_high;
                        if (!zone_watermark_ok(zone, order, mark,
                                    classzone_idx, alloc_flags)) {
                                if (!zone_reclaim_mode ||
@@ -1959,7 +1960,7 @@ static unsigned int nr_free_zone_pages(int offset)
        for_each_zone_zonelist(zone, z, zonelist, offset) {
                unsigned long size = zone->present_pages;
-                unsigned long high = zone->pages_high;
+                unsigned long high = high_wmark_pages(zone);
                if (size > high)
                        sum += size - high;
        }
@@ -2096,9 +2097,9 @@ void show_free_areas(void)
                        "\n",
                        zone->name,
                        K(zone_page_state(zone, NR_FREE_PAGES)),
-                        K(zone->pages_min),
+                        K(min_wmark_pages(zone)),
-                        K(zone->pages_low),
+                        K(low_wmark_pages(zone)),
-                        K(zone->pages_high),
+                        K(high_wmark_pages(zone)),
                        K(zone_page_state(zone, NR_ACTIVE_ANON)),
                        K(zone_page_state(zone, NR_INACTIVE_ANON)),
                        K(zone_page_state(zone, NR_ACTIVE_FILE)),
@@ -2702,8 +2703,8 @@ static inline unsigned long wait_table_bits(unsigned long size)
 /*
 * Mark a number of pageblocks as MIGRATE_RESERVE. The number
- * of blocks reserved is based on zone->pages_min. The memory within the
+ * of blocks reserved is based on min_wmark_pages(zone). The memory within
- * reserve will tend to store contiguous free pages. Setting min_free_kbytes
+ * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
 * higher will lead to a bigger reserve which will get freed as contiguous
 * blocks as reclaim kicks in
 */
@@ -2716,7 +2717,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        /* Get the start pfn, end pfn and the number of blocks to reserve */
        start_pfn = zone->zone_start_pfn;
        end_pfn = start_pfn + zone->spanned_pages;
-        reserve = roundup(zone->pages_min, pageblock_nr_pages) >>
+        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;
        for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
@@ -4319,8 +4320,8 @@ static void calculate_totalreserve_pages(void)
                                        max = zone->lowmem_reserve[j];
                        }
-                        /* we treat pages_high as reserved pages. */
+                        /* we treat the high watermark as reserved pages. */
-                        max += zone->pages_high;
+                        max += high_wmark_pages(zone);
                        if (max > zone->present_pages)
                                max = zone->present_pages;
@@ -4400,7 +4401,7 @@ void setup_per_zone_pages_min(void)
                         * need highmem pages, so cap pages_min to a small
                         * value here.
                         *
-                         * The (pages_high-pages_low) and (pages_low-pages_min)
+                         * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
                         * deltas controls asynch page reclaim, and so should
                         * not be capped for highmem.
                         */
@@ -4411,17 +4412,17 @@ void setup_per_zone_pages_min(void)
                                min_pages = SWAP_CLUSTER_MAX;
                        if (min_pages > 128)
                                min_pages = 128;
-                        zone->pages_min = min_pages;
+                        zone->watermark[WMARK_MIN] = min_pages;
                } else {
                        /*
                         * If it's a lowmem zone, reserve a number of pages
                         * proportionate to the zone's size.
                         */
-                        zone->pages_min = tmp;
+                        zone->watermark[WMARK_MIN] = tmp;
                }
-                zone->pages_low   = zone->pages_min + (tmp >> 2);
+                zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
-                zone->pages_high  = zone->pages_min + (tmp >> 1);
+                zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
                setup_zone_migrate_reserve(zone);
                spin_unlock_irqrestore(&zone->lock, flags);
        }
@@ -4566,7 +4567,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
 *      whenever sysctl_lowmem_reserve_ratio changes.
 *
 * The reserve ratio obviously has absolutely no relation with the
- * pages_min watermarks. The lowmem reserve ratio can only make sense
+ * minimum watermarks. The lowmem reserve ratio can only make sense
 * if in function of the boot time zone sizes.
 */
 int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6b7d14812e6..e5245d051647 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1401,7 +1401,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
                free  = zone_page_state(zone, NR_FREE_PAGES);
                /* If we have very few page cache pages,
                   force-scan anon pages. */
-                if (unlikely(file + free <= zone->pages_high)) {
+                if (unlikely(file + free <= high_wmark_pages(zone))) {
                        percent[0] = 100;
                        percent[1] = 0;
                        return;
@@ -1533,11 +1533,13 @@ static void shrink_zone(int priority, struct zone *zone,
 * try to reclaim pages from zones which will satisfy the caller's allocation
 * request.
 *
- * We reclaim from a zone even if that zone is over pages_high.  Because:
+ * We reclaim from a zone even if that zone is over high_wmark_pages(zone).
+ * Because:
 * a) The caller may be trying to free *extra* pages to satisfy a higher-order
 *    allocation or
- * b) The zones may be over pages_high but they must go *over* pages_high to
+ * b) The target zone may be at high_wmark_pages(zone) but the lower zones
- *    satisfy the `incremental min' zone defense algorithm.
+ *    must go *over* high_wmark_pages(zone) to satisfy the `incremental min'
+ *    zone defense algorithm.
 *
 * If a zone is deemed to be full of pinned pages then just give it a light
 * scan then give up on it.
@@ -1743,7 +1745,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 /*
 * For kswapd, balance_pgdat() will work across all this node's zones until
- * they are all at pages_high.
+ * they are all at high_wmark_pages(zone).
 *
 * Returns the number of pages which were actually freed.
 *
@@ -1756,11 +1758,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 * the zone for when the problem goes away.
 *
 * kswapd scans the zones in the highmem->normal->dma direction.  It skips
- * zones which have free_pages > pages_high, but once a zone is found to have
+ * zones which have free_pages > high_wmark_pages(zone), but once a zone is
- * free_pages <= pages_high, we scan that zone and the lower zones regardless
+ * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the
- * of the number of free pages in the lower zones.  This interoperates with
+ * lower zones regardless of the number of free pages in the lower zones. This
- * the page allocator fallback scheme to ensure that aging of pages is balanced
+ * interoperates with the page allocator fallback scheme to ensure that aging
- * across the zones.
+ * of pages is balanced across the zones.
 */
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 {
@@ -1781,7 +1783,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
        };
        /*
         * temp_priority is used to remember the scanning priority at which
-         * this zone was successfully refilled to free_pages == pages_high.
+         * this zone was successfully refilled to
+         * free_pages == high_wmark_pages(zone).
         */
        int temp_priority[MAX_NR_ZONES];
@@ -1826,8 +1829,8 @@ loop_again:
                                shrink_active_list(SWAP_CLUSTER_MAX, zone,
                                                        &sc, priority, 0);
-                        if (!zone_watermark_ok(zone, order, zone->pages_high,
+                        if (!zone_watermark_ok(zone, order,
-                                               0, 0)) {
+                                        high_wmark_pages(zone), 0, 0)) {
                                end_zone = i;
                                break;
                        }
@@ -1861,8 +1864,8 @@ loop_again:
                                        priority != DEF_PRIORITY)
                                continue;
-                        if (!zone_watermark_ok(zone, order, zone->pages_high,
+                        if (!zone_watermark_ok(zone, order,
-                                               end_zone, 0))
+                                        high_wmark_pages(zone), end_zone, 0))
                                all_zones_ok = 0;
                        temp_priority[i] = priority;
                        sc.nr_scanned = 0;
@@ -1871,8 +1874,8 @@ loop_again:
                         * We put equal pressure on every zone, unless one
                         * zone has way too many pages free already.
                         */
-                        if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
+                        if (!zone_watermark_ok(zone, order,
-                                                end_zone, 0))
+                                        8*high_wmark_pages(zone), end_zone, 0))
                                shrink_zone(priority, zone, &sc);
                        reclaim_state->reclaimed_slab = 0;
                        nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -2038,7 +2041,7 @@ void wakeup_kswapd(struct zone *zone, int order)
                return;
        pgdat = zone->zone_pgdat;
-        if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
+        if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
                return;
        if (pgdat->kswapd_max_order < order)
                pgdat->kswapd_max_order = order;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 74d66dba0cbe..415110772c73 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -714,9 +714,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   "\n        spanned  %lu"
                   "\n        present  %lu",
                   zone_page_state(zone, NR_FREE_PAGES),
-                   zone->pages_min,
+                   min_wmark_pages(zone),
-                   zone->pages_low,
+                   low_wmark_pages(zone),
-                   zone->pages_high,
+                   high_wmark_pages(zone),
                   zone->pages_scanned,
                   zone->lru[LRU_ACTIVE_ANON].nr_scan,
                   zone->lru[LRU_INACTIVE_ANON].nr_scan,
author	Mel Gorman <mel@csn.ul.ie>	2009-06-16 18:32:12 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-06-16 22:47:35 -0400
commit	418589663d6011de9006425b6c5721e1544fb47a (patch)
tree	ef37fb026d3e38191d6b5c99bc95c190fa98d0fb /mm
parent	a3af9c389a7f3e675313f442fdd8c247c1cdb66b (diff)