aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2011-07-11 08:15:48 -0400
committerJiri Kosina <jkosina@suse.cz>2011-07-11 08:15:55 -0400
commitb7e9c223be8ce335e30f2cf6ba588e6a4092275c (patch)
tree2d1e3b75606abc18df7ad65e51ac3f90cd68b38d /mm/vmscan.c
parentc172d82500a6cf3c32d1e650722a1055d72ce858 (diff)
parente3bbfa78bab125f58b831b5f7f45b5a305091d72 (diff)
Merge branch 'master' into for-next
Sync with Linus' tree to be able to apply pending patches that are based on newer code already present upstream.
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c106
1 files changed, 66 insertions, 40 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index faa0a088f9cc..5ed24b94c5e6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1124,8 +1124,20 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1124 nr_lumpy_dirty++; 1124 nr_lumpy_dirty++;
1125 scan++; 1125 scan++;
1126 } else { 1126 } else {
1127 /* the page is freed already. */ 1127 /*
1128 if (!page_count(cursor_page)) 1128 * Check if the page is freed already.
1129 *
1130 * We can't use page_count() as that
1131 * requires compound_head and we don't
1132 * have a pin on the page here. If a
1133 * page is tail, we may or may not
1134 * have isolated the head, so assume
1135 * it's not free, it'd be tricky to
1136 * track the head status without a
1137 * page pin.
1138 */
1139 if (!PageTail(cursor_page) &&
1140 !atomic_read(&cursor_page->_count))
1129 continue; 1141 continue;
1130 break; 1142 break;
1131 } 1143 }
@@ -1983,14 +1995,13 @@ restart:
1983 * If a zone is deemed to be full of pinned pages then just give it a light 1995 * If a zone is deemed to be full of pinned pages then just give it a light
1984 * scan then give up on it. 1996 * scan then give up on it.
1985 */ 1997 */
1986static unsigned long shrink_zones(int priority, struct zonelist *zonelist, 1998static void shrink_zones(int priority, struct zonelist *zonelist,
1987 struct scan_control *sc) 1999 struct scan_control *sc)
1988{ 2000{
1989 struct zoneref *z; 2001 struct zoneref *z;
1990 struct zone *zone; 2002 struct zone *zone;
1991 unsigned long nr_soft_reclaimed; 2003 unsigned long nr_soft_reclaimed;
1992 unsigned long nr_soft_scanned; 2004 unsigned long nr_soft_scanned;
1993 unsigned long total_scanned = 0;
1994 2005
1995 for_each_zone_zonelist_nodemask(zone, z, zonelist, 2006 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1996 gfp_zone(sc->gfp_mask), sc->nodemask) { 2007 gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2005,19 +2016,23 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
2005 continue; 2016 continue;
2006 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 2017 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
2007 continue; /* Let kswapd poll it */ 2018 continue; /* Let kswapd poll it */
2019 /*
2020 * This steals pages from memory cgroups over softlimit
2021 * and returns the number of reclaimed pages and
2022 * scanned pages. This works for global memory pressure
2023 * and balancing, not for a memcg's limit.
2024 */
2025 nr_soft_scanned = 0;
2026 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
2027 sc->order, sc->gfp_mask,
2028 &nr_soft_scanned);
2029 sc->nr_reclaimed += nr_soft_reclaimed;
2030 sc->nr_scanned += nr_soft_scanned;
2031 /* need some check for avoid more shrink_zone() */
2008 } 2032 }
2009 2033
2010 nr_soft_scanned = 0;
2011 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
2012 sc->order, sc->gfp_mask,
2013 &nr_soft_scanned);
2014 sc->nr_reclaimed += nr_soft_reclaimed;
2015 total_scanned += nr_soft_scanned;
2016
2017 shrink_zone(priority, zone, sc); 2034 shrink_zone(priority, zone, sc);
2018 } 2035 }
2019
2020 return total_scanned;
2021} 2036}
2022 2037
2023static bool zone_reclaimable(struct zone *zone) 2038static bool zone_reclaimable(struct zone *zone)
@@ -2081,8 +2096,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2081 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2096 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2082 sc->nr_scanned = 0; 2097 sc->nr_scanned = 0;
2083 if (!priority) 2098 if (!priority)
2084 disable_swap_token(); 2099 disable_swap_token(sc->mem_cgroup);
2085 total_scanned += shrink_zones(priority, zonelist, sc); 2100 shrink_zones(priority, zonelist, sc);
2086 /* 2101 /*
2087 * Don't shrink slabs when reclaiming memory from 2102 * Don't shrink slabs when reclaiming memory from
2088 * over limit cgroups 2103 * over limit cgroups
@@ -2311,7 +2326,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
2311 return true; 2326 return true;
2312 2327
2313 /* Check the watermark levels */ 2328 /* Check the watermark levels */
2314 for (i = 0; i < pgdat->nr_zones; i++) { 2329 for (i = 0; i <= classzone_idx; i++) {
2315 struct zone *zone = pgdat->node_zones + i; 2330 struct zone *zone = pgdat->node_zones + i;
2316 2331
2317 if (!populated_zone(zone)) 2332 if (!populated_zone(zone))
@@ -2329,7 +2344,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
2329 } 2344 }
2330 2345
2331 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), 2346 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
2332 classzone_idx, 0)) 2347 i, 0))
2333 all_zones_ok = false; 2348 all_zones_ok = false;
2334 else 2349 else
2335 balanced += zone->present_pages; 2350 balanced += zone->present_pages;
@@ -2407,7 +2422,7 @@ loop_again:
2407 2422
2408 /* The swap token gets in the way of swapout... */ 2423 /* The swap token gets in the way of swapout... */
2409 if (!priority) 2424 if (!priority)
2410 disable_swap_token(); 2425 disable_swap_token(NULL);
2411 2426
2412 all_zones_ok = 1; 2427 all_zones_ok = 1;
2413 balanced = 0; 2428 balanced = 0;
@@ -2436,7 +2451,6 @@ loop_again:
2436 if (!zone_watermark_ok_safe(zone, order, 2451 if (!zone_watermark_ok_safe(zone, order,
2437 high_wmark_pages(zone), 0, 0)) { 2452 high_wmark_pages(zone), 0, 0)) {
2438 end_zone = i; 2453 end_zone = i;
2439 *classzone_idx = i;
2440 break; 2454 break;
2441 } 2455 }
2442 } 2456 }
@@ -2495,18 +2509,18 @@ loop_again:
2495 KSWAPD_ZONE_BALANCE_GAP_RATIO); 2509 KSWAPD_ZONE_BALANCE_GAP_RATIO);
2496 if (!zone_watermark_ok_safe(zone, order, 2510 if (!zone_watermark_ok_safe(zone, order,
2497 high_wmark_pages(zone) + balance_gap, 2511 high_wmark_pages(zone) + balance_gap,
2498 end_zone, 0)) 2512 end_zone, 0)) {
2499 shrink_zone(priority, zone, &sc); 2513 shrink_zone(priority, zone, &sc);
2500 reclaim_state->reclaimed_slab = 0;
2501 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
2502 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2503 total_scanned += sc.nr_scanned;
2504 2514
2505 if (zone->all_unreclaimable) 2515 reclaim_state->reclaimed_slab = 0;
2506 continue; 2516 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
2507 if (nr_slab == 0 && 2517 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2508 !zone_reclaimable(zone)) 2518 total_scanned += sc.nr_scanned;
2509 zone->all_unreclaimable = 1; 2519
2520 if (nr_slab == 0 && !zone_reclaimable(zone))
2521 zone->all_unreclaimable = 1;
2522 }
2523
2510 /* 2524 /*
2511 * If we've done a decent amount of scanning and 2525 * If we've done a decent amount of scanning and
2512 * the reclaim ratio is low, start doing writepage 2526 * the reclaim ratio is low, start doing writepage
@@ -2516,6 +2530,12 @@ loop_again:
2516 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) 2530 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2517 sc.may_writepage = 1; 2531 sc.may_writepage = 1;
2518 2532
2533 if (zone->all_unreclaimable) {
2534 if (end_zone && end_zone == i)
2535 end_zone--;
2536 continue;
2537 }
2538
2519 if (!zone_watermark_ok_safe(zone, order, 2539 if (!zone_watermark_ok_safe(zone, order,
2520 high_wmark_pages(zone), end_zone, 0)) { 2540 high_wmark_pages(zone), end_zone, 0)) {
2521 all_zones_ok = 0; 2541 all_zones_ok = 0;
@@ -2694,8 +2714,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
2694 */ 2714 */
2695static int kswapd(void *p) 2715static int kswapd(void *p)
2696{ 2716{
2697 unsigned long order; 2717 unsigned long order, new_order;
2698 int classzone_idx; 2718 int classzone_idx, new_classzone_idx;
2699 pg_data_t *pgdat = (pg_data_t*)p; 2719 pg_data_t *pgdat = (pg_data_t*)p;
2700 struct task_struct *tsk = current; 2720 struct task_struct *tsk = current;
2701 2721
@@ -2725,17 +2745,23 @@ static int kswapd(void *p)
2725 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2745 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2726 set_freezable(); 2746 set_freezable();
2727 2747
2728 order = 0; 2748 order = new_order = 0;
2729 classzone_idx = MAX_NR_ZONES - 1; 2749 classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
2730 for ( ; ; ) { 2750 for ( ; ; ) {
2731 unsigned long new_order;
2732 int new_classzone_idx;
2733 int ret; 2751 int ret;
2734 2752
2735 new_order = pgdat->kswapd_max_order; 2753 /*
2736 new_classzone_idx = pgdat->classzone_idx; 2754 * If the last balance_pgdat was unsuccessful it's unlikely a
2737 pgdat->kswapd_max_order = 0; 2755 * new request of a similar or harder type will succeed soon
2738 pgdat->classzone_idx = MAX_NR_ZONES - 1; 2756 * so consider going to sleep on the basis we reclaimed at
2757 */
2758 if (classzone_idx >= new_classzone_idx && order == new_order) {
2759 new_order = pgdat->kswapd_max_order;
2760 new_classzone_idx = pgdat->classzone_idx;
2761 pgdat->kswapd_max_order = 0;
2762 pgdat->classzone_idx = pgdat->nr_zones - 1;
2763 }
2764
2739 if (order < new_order || classzone_idx > new_classzone_idx) { 2765 if (order < new_order || classzone_idx > new_classzone_idx) {
2740 /* 2766 /*
2741 * Don't sleep if someone wants a larger 'order' 2767 * Don't sleep if someone wants a larger 'order'
@@ -2748,7 +2774,7 @@ static int kswapd(void *p)
2748 order = pgdat->kswapd_max_order; 2774 order = pgdat->kswapd_max_order;
2749 classzone_idx = pgdat->classzone_idx; 2775 classzone_idx = pgdat->classzone_idx;
2750 pgdat->kswapd_max_order = 0; 2776 pgdat->kswapd_max_order = 0;
2751 pgdat->classzone_idx = MAX_NR_ZONES - 1; 2777 pgdat->classzone_idx = pgdat->nr_zones - 1;
2752 } 2778 }
2753 2779
2754 ret = try_to_freeze(); 2780 ret = try_to_freeze();