aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2009-06-16 18:32:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-16 22:47:35 -0400
commit418589663d6011de9006425b6c5721e1544fb47a (patch)
treeef37fb026d3e38191d6b5c99bc95c190fa98d0fb /mm
parenta3af9c389a7f3e675313f442fdd8c247c1cdb66b (diff)
page allocator: use allocation flags as an index to the zone watermark
ALLOC_WMARK_MIN, ALLOC_WMARK_LOW and ALLOC_WMARK_HIGH determin whether pages_min, pages_low or pages_high is used as the zone watermark when allocating the pages. Two branches in the allocator hotpath determine which watermark to use. This patch uses the flags as an array index into a watermark array that is indexed with WMARK_* defines accessed via helpers. All call sites that use zone->pages_* are updated to use the helpers for accessing the values and the array offsets for setting. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c51
-rw-r--r--mm/vmscan.c39
-rw-r--r--mm/vmstat.c6
3 files changed, 50 insertions, 46 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8485735fc690..abe26003124d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1150,10 +1150,15 @@ failed:
1150 return NULL; 1150 return NULL;
1151} 1151}
1152 1152
1153#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */ 1153/* The ALLOC_WMARK bits are used as an index to zone->watermark */
1154#define ALLOC_WMARK_MIN 0x02 /* use pages_min watermark */ 1154#define ALLOC_WMARK_MIN WMARK_MIN
1155#define ALLOC_WMARK_LOW 0x04 /* use pages_low watermark */ 1155#define ALLOC_WMARK_LOW WMARK_LOW
1156#define ALLOC_WMARK_HIGH 0x08 /* use pages_high watermark */ 1156#define ALLOC_WMARK_HIGH WMARK_HIGH
1157#define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */
1158
1159/* Mask to get the watermark bits */
1160#define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1)
1161
1157#define ALLOC_HARDER 0x10 /* try to alloc harder */ 1162#define ALLOC_HARDER 0x10 /* try to alloc harder */
1158#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ 1163#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
1159#define ALLOC_CPUSET 0x40 /* check for correct cpuset */ 1164#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
@@ -1440,14 +1445,10 @@ zonelist_scan:
1440 !cpuset_zone_allowed_softwall(zone, gfp_mask)) 1445 !cpuset_zone_allowed_softwall(zone, gfp_mask))
1441 goto try_next_zone; 1446 goto try_next_zone;
1442 1447
1448 BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
1443 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { 1449 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
1444 unsigned long mark; 1450 unsigned long mark;
1445 if (alloc_flags & ALLOC_WMARK_MIN) 1451 mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
1446 mark = zone->pages_min;
1447 else if (alloc_flags & ALLOC_WMARK_LOW)
1448 mark = zone->pages_low;
1449 else
1450 mark = zone->pages_high;
1451 if (!zone_watermark_ok(zone, order, mark, 1452 if (!zone_watermark_ok(zone, order, mark,
1452 classzone_idx, alloc_flags)) { 1453 classzone_idx, alloc_flags)) {
1453 if (!zone_reclaim_mode || 1454 if (!zone_reclaim_mode ||
@@ -1959,7 +1960,7 @@ static unsigned int nr_free_zone_pages(int offset)
1959 1960
1960 for_each_zone_zonelist(zone, z, zonelist, offset) { 1961 for_each_zone_zonelist(zone, z, zonelist, offset) {
1961 unsigned long size = zone->present_pages; 1962 unsigned long size = zone->present_pages;
1962 unsigned long high = zone->pages_high; 1963 unsigned long high = high_wmark_pages(zone);
1963 if (size > high) 1964 if (size > high)
1964 sum += size - high; 1965 sum += size - high;
1965 } 1966 }
@@ -2096,9 +2097,9 @@ void show_free_areas(void)
2096 "\n", 2097 "\n",
2097 zone->name, 2098 zone->name,
2098 K(zone_page_state(zone, NR_FREE_PAGES)), 2099 K(zone_page_state(zone, NR_FREE_PAGES)),
2099 K(zone->pages_min), 2100 K(min_wmark_pages(zone)),
2100 K(zone->pages_low), 2101 K(low_wmark_pages(zone)),
2101 K(zone->pages_high), 2102 K(high_wmark_pages(zone)),
2102 K(zone_page_state(zone, NR_ACTIVE_ANON)), 2103 K(zone_page_state(zone, NR_ACTIVE_ANON)),
2103 K(zone_page_state(zone, NR_INACTIVE_ANON)), 2104 K(zone_page_state(zone, NR_INACTIVE_ANON)),
2104 K(zone_page_state(zone, NR_ACTIVE_FILE)), 2105 K(zone_page_state(zone, NR_ACTIVE_FILE)),
@@ -2702,8 +2703,8 @@ static inline unsigned long wait_table_bits(unsigned long size)
2702 2703
2703/* 2704/*
2704 * Mark a number of pageblocks as MIGRATE_RESERVE. The number 2705 * Mark a number of pageblocks as MIGRATE_RESERVE. The number
2705 * of blocks reserved is based on zone->pages_min. The memory within the 2706 * of blocks reserved is based on min_wmark_pages(zone). The memory within
2706 * reserve will tend to store contiguous free pages. Setting min_free_kbytes 2707 * the reserve will tend to store contiguous free pages. Setting min_free_kbytes
2707 * higher will lead to a bigger reserve which will get freed as contiguous 2708 * higher will lead to a bigger reserve which will get freed as contiguous
2708 * blocks as reclaim kicks in 2709 * blocks as reclaim kicks in
2709 */ 2710 */
@@ -2716,7 +2717,7 @@ static void setup_zone_migrate_reserve(struct zone *zone)
2716 /* Get the start pfn, end pfn and the number of blocks to reserve */ 2717 /* Get the start pfn, end pfn and the number of blocks to reserve */
2717 start_pfn = zone->zone_start_pfn; 2718 start_pfn = zone->zone_start_pfn;
2718 end_pfn = start_pfn + zone->spanned_pages; 2719 end_pfn = start_pfn + zone->spanned_pages;
2719 reserve = roundup(zone->pages_min, pageblock_nr_pages) >> 2720 reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
2720 pageblock_order; 2721 pageblock_order;
2721 2722
2722 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 2723 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
@@ -4319,8 +4320,8 @@ static void calculate_totalreserve_pages(void)
4319 max = zone->lowmem_reserve[j]; 4320 max = zone->lowmem_reserve[j];
4320 } 4321 }
4321 4322
4322 /* we treat pages_high as reserved pages. */ 4323 /* we treat the high watermark as reserved pages. */
4323 max += zone->pages_high; 4324 max += high_wmark_pages(zone);
4324 4325
4325 if (max > zone->present_pages) 4326 if (max > zone->present_pages)
4326 max = zone->present_pages; 4327 max = zone->present_pages;
@@ -4400,7 +4401,7 @@ void setup_per_zone_pages_min(void)
4400 * need highmem pages, so cap pages_min to a small 4401 * need highmem pages, so cap pages_min to a small
4401 * value here. 4402 * value here.
4402 * 4403 *
4403 * The (pages_high-pages_low) and (pages_low-pages_min) 4404 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
4404 * deltas controls asynch page reclaim, and so should 4405 * deltas controls asynch page reclaim, and so should
4405 * not be capped for highmem. 4406 * not be capped for highmem.
4406 */ 4407 */
@@ -4411,17 +4412,17 @@ void setup_per_zone_pages_min(void)
4411 min_pages = SWAP_CLUSTER_MAX; 4412 min_pages = SWAP_CLUSTER_MAX;
4412 if (min_pages > 128) 4413 if (min_pages > 128)
4413 min_pages = 128; 4414 min_pages = 128;
4414 zone->pages_min = min_pages; 4415 zone->watermark[WMARK_MIN] = min_pages;
4415 } else { 4416 } else {
4416 /* 4417 /*
4417 * If it's a lowmem zone, reserve a number of pages 4418 * If it's a lowmem zone, reserve a number of pages
4418 * proportionate to the zone's size. 4419 * proportionate to the zone's size.
4419 */ 4420 */
4420 zone->pages_min = tmp; 4421 zone->watermark[WMARK_MIN] = tmp;
4421 } 4422 }
4422 4423
4423 zone->pages_low = zone->pages_min + (tmp >> 2); 4424 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2);
4424 zone->pages_high = zone->pages_min + (tmp >> 1); 4425 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
4425 setup_zone_migrate_reserve(zone); 4426 setup_zone_migrate_reserve(zone);
4426 spin_unlock_irqrestore(&zone->lock, flags); 4427 spin_unlock_irqrestore(&zone->lock, flags);
4427 } 4428 }
@@ -4566,7 +4567,7 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
4566 * whenever sysctl_lowmem_reserve_ratio changes. 4567 * whenever sysctl_lowmem_reserve_ratio changes.
4567 * 4568 *
4568 * The reserve ratio obviously has absolutely no relation with the 4569 * The reserve ratio obviously has absolutely no relation with the
4569 * pages_min watermarks. The lowmem reserve ratio can only make sense 4570 * minimum watermarks. The lowmem reserve ratio can only make sense
4570 * if in function of the boot time zone sizes. 4571 * if in function of the boot time zone sizes.
4571 */ 4572 */
4572int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, 4573int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6b7d14812e6..e5245d051647 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1401,7 +1401,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1401 free = zone_page_state(zone, NR_FREE_PAGES); 1401 free = zone_page_state(zone, NR_FREE_PAGES);
1402 /* If we have very few page cache pages, 1402 /* If we have very few page cache pages,
1403 force-scan anon pages. */ 1403 force-scan anon pages. */
1404 if (unlikely(file + free <= zone->pages_high)) { 1404 if (unlikely(file + free <= high_wmark_pages(zone))) {
1405 percent[0] = 100; 1405 percent[0] = 100;
1406 percent[1] = 0; 1406 percent[1] = 0;
1407 return; 1407 return;
@@ -1533,11 +1533,13 @@ static void shrink_zone(int priority, struct zone *zone,
1533 * try to reclaim pages from zones which will satisfy the caller's allocation 1533 * try to reclaim pages from zones which will satisfy the caller's allocation
1534 * request. 1534 * request.
1535 * 1535 *
1536 * We reclaim from a zone even if that zone is over pages_high. Because: 1536 * We reclaim from a zone even if that zone is over high_wmark_pages(zone).
1537 * Because:
1537 * a) The caller may be trying to free *extra* pages to satisfy a higher-order 1538 * a) The caller may be trying to free *extra* pages to satisfy a higher-order
1538 * allocation or 1539 * allocation or
1539 * b) The zones may be over pages_high but they must go *over* pages_high to 1540 * b) The target zone may be at high_wmark_pages(zone) but the lower zones
1540 * satisfy the `incremental min' zone defense algorithm. 1541 * must go *over* high_wmark_pages(zone) to satisfy the `incremental min'
1542 * zone defense algorithm.
1541 * 1543 *
1542 * If a zone is deemed to be full of pinned pages then just give it a light 1544 * If a zone is deemed to be full of pinned pages then just give it a light
1543 * scan then give up on it. 1545 * scan then give up on it.
@@ -1743,7 +1745,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1743 1745
1744/* 1746/*
1745 * For kswapd, balance_pgdat() will work across all this node's zones until 1747 * For kswapd, balance_pgdat() will work across all this node's zones until
1746 * they are all at pages_high. 1748 * they are all at high_wmark_pages(zone).
1747 * 1749 *
1748 * Returns the number of pages which were actually freed. 1750 * Returns the number of pages which were actually freed.
1749 * 1751 *
@@ -1756,11 +1758,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1756 * the zone for when the problem goes away. 1758 * the zone for when the problem goes away.
1757 * 1759 *
1758 * kswapd scans the zones in the highmem->normal->dma direction. It skips 1760 * kswapd scans the zones in the highmem->normal->dma direction. It skips
1759 * zones which have free_pages > pages_high, but once a zone is found to have 1761 * zones which have free_pages > high_wmark_pages(zone), but once a zone is
1760 * free_pages <= pages_high, we scan that zone and the lower zones regardless 1762 * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the
1761 * of the number of free pages in the lower zones. This interoperates with 1763 * lower zones regardless of the number of free pages in the lower zones. This
1762 * the page allocator fallback scheme to ensure that aging of pages is balanced 1764 * interoperates with the page allocator fallback scheme to ensure that aging
1763 * across the zones. 1765 * of pages is balanced across the zones.
1764 */ 1766 */
1765static unsigned long balance_pgdat(pg_data_t *pgdat, int order) 1767static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1766{ 1768{
@@ -1781,7 +1783,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1781 }; 1783 };
1782 /* 1784 /*
1783 * temp_priority is used to remember the scanning priority at which 1785 * temp_priority is used to remember the scanning priority at which
1784 * this zone was successfully refilled to free_pages == pages_high. 1786 * this zone was successfully refilled to
1787 * free_pages == high_wmark_pages(zone).
1785 */ 1788 */
1786 int temp_priority[MAX_NR_ZONES]; 1789 int temp_priority[MAX_NR_ZONES];
1787 1790
@@ -1826,8 +1829,8 @@ loop_again:
1826 shrink_active_list(SWAP_CLUSTER_MAX, zone, 1829 shrink_active_list(SWAP_CLUSTER_MAX, zone,
1827 &sc, priority, 0); 1830 &sc, priority, 0);
1828 1831
1829 if (!zone_watermark_ok(zone, order, zone->pages_high, 1832 if (!zone_watermark_ok(zone, order,
1830 0, 0)) { 1833 high_wmark_pages(zone), 0, 0)) {
1831 end_zone = i; 1834 end_zone = i;
1832 break; 1835 break;
1833 } 1836 }
@@ -1861,8 +1864,8 @@ loop_again:
1861 priority != DEF_PRIORITY) 1864 priority != DEF_PRIORITY)
1862 continue; 1865 continue;
1863 1866
1864 if (!zone_watermark_ok(zone, order, zone->pages_high, 1867 if (!zone_watermark_ok(zone, order,
1865 end_zone, 0)) 1868 high_wmark_pages(zone), end_zone, 0))
1866 all_zones_ok = 0; 1869 all_zones_ok = 0;
1867 temp_priority[i] = priority; 1870 temp_priority[i] = priority;
1868 sc.nr_scanned = 0; 1871 sc.nr_scanned = 0;
@@ -1871,8 +1874,8 @@ loop_again:
1871 * We put equal pressure on every zone, unless one 1874 * We put equal pressure on every zone, unless one
1872 * zone has way too many pages free already. 1875 * zone has way too many pages free already.
1873 */ 1876 */
1874 if (!zone_watermark_ok(zone, order, 8*zone->pages_high, 1877 if (!zone_watermark_ok(zone, order,
1875 end_zone, 0)) 1878 8*high_wmark_pages(zone), end_zone, 0))
1876 shrink_zone(priority, zone, &sc); 1879 shrink_zone(priority, zone, &sc);
1877 reclaim_state->reclaimed_slab = 0; 1880 reclaim_state->reclaimed_slab = 0;
1878 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, 1881 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -2038,7 +2041,7 @@ void wakeup_kswapd(struct zone *zone, int order)
2038 return; 2041 return;
2039 2042
2040 pgdat = zone->zone_pgdat; 2043 pgdat = zone->zone_pgdat;
2041 if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) 2044 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
2042 return; 2045 return;
2043 if (pgdat->kswapd_max_order < order) 2046 if (pgdat->kswapd_max_order < order)
2044 pgdat->kswapd_max_order = order; 2047 pgdat->kswapd_max_order = order;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 74d66dba0cbe..415110772c73 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -714,9 +714,9 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
714 "\n spanned %lu" 714 "\n spanned %lu"
715 "\n present %lu", 715 "\n present %lu",
716 zone_page_state(zone, NR_FREE_PAGES), 716 zone_page_state(zone, NR_FREE_PAGES),
717 zone->pages_min, 717 min_wmark_pages(zone),
718 zone->pages_low, 718 low_wmark_pages(zone),
719 zone->pages_high, 719 high_wmark_pages(zone),
720 zone->pages_scanned, 720 zone->pages_scanned,
721 zone->lru[LRU_ACTIVE_ANON].nr_scan, 721 zone->lru[LRU_ACTIVE_ANON].nr_scan,
722 zone->lru[LRU_INACTIVE_ANON].nr_scan, 722 zone->lru[LRU_INACTIVE_ANON].nr_scan,