diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 106 |
1 files changed, 66 insertions, 40 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index faa0a088f9cc..5ed24b94c5e6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1124,8 +1124,20 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1124 | nr_lumpy_dirty++; | 1124 | nr_lumpy_dirty++; |
1125 | scan++; | 1125 | scan++; |
1126 | } else { | 1126 | } else { |
1127 | /* the page is freed already. */ | 1127 | /* |
1128 | if (!page_count(cursor_page)) | 1128 | * Check if the page is freed already. |
1129 | * | ||
1130 | * We can't use page_count() as that | ||
1131 | * requires compound_head and we don't | ||
1132 | * have a pin on the page here. If a | ||
1133 | * page is tail, we may or may not | ||
1134 | * have isolated the head, so assume | ||
1135 | * it's not free, it'd be tricky to | ||
1136 | * track the head status without a | ||
1137 | * page pin. | ||
1138 | */ | ||
1139 | if (!PageTail(cursor_page) && | ||
1140 | !atomic_read(&cursor_page->_count)) | ||
1129 | continue; | 1141 | continue; |
1130 | break; | 1142 | break; |
1131 | } | 1143 | } |
@@ -1983,14 +1995,13 @@ restart: | |||
1983 | * If a zone is deemed to be full of pinned pages then just give it a light | 1995 | * If a zone is deemed to be full of pinned pages then just give it a light |
1984 | * scan then give up on it. | 1996 | * scan then give up on it. |
1985 | */ | 1997 | */ |
1986 | static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | 1998 | static void shrink_zones(int priority, struct zonelist *zonelist, |
1987 | struct scan_control *sc) | 1999 | struct scan_control *sc) |
1988 | { | 2000 | { |
1989 | struct zoneref *z; | 2001 | struct zoneref *z; |
1990 | struct zone *zone; | 2002 | struct zone *zone; |
1991 | unsigned long nr_soft_reclaimed; | 2003 | unsigned long nr_soft_reclaimed; |
1992 | unsigned long nr_soft_scanned; | 2004 | unsigned long nr_soft_scanned; |
1993 | unsigned long total_scanned = 0; | ||
1994 | 2005 | ||
1995 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 2006 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
1996 | gfp_zone(sc->gfp_mask), sc->nodemask) { | 2007 | gfp_zone(sc->gfp_mask), sc->nodemask) { |
@@ -2005,19 +2016,23 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | |||
2005 | continue; | 2016 | continue; |
2006 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2017 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
2007 | continue; /* Let kswapd poll it */ | 2018 | continue; /* Let kswapd poll it */ |
2019 | /* | ||
2020 | * This steals pages from memory cgroups over softlimit | ||
2021 | * and returns the number of reclaimed pages and | ||
2022 | * scanned pages. This works for global memory pressure | ||
2023 | * and balancing, not for a memcg's limit. | ||
2024 | */ | ||
2025 | nr_soft_scanned = 0; | ||
2026 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
2027 | sc->order, sc->gfp_mask, | ||
2028 | &nr_soft_scanned); | ||
2029 | sc->nr_reclaimed += nr_soft_reclaimed; | ||
2030 | sc->nr_scanned += nr_soft_scanned; | ||
2031 | /* need some check for avoid more shrink_zone() */ | ||
2008 | } | 2032 | } |
2009 | 2033 | ||
2010 | nr_soft_scanned = 0; | ||
2011 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
2012 | sc->order, sc->gfp_mask, | ||
2013 | &nr_soft_scanned); | ||
2014 | sc->nr_reclaimed += nr_soft_reclaimed; | ||
2015 | total_scanned += nr_soft_scanned; | ||
2016 | |||
2017 | shrink_zone(priority, zone, sc); | 2034 | shrink_zone(priority, zone, sc); |
2018 | } | 2035 | } |
2019 | |||
2020 | return total_scanned; | ||
2021 | } | 2036 | } |
2022 | 2037 | ||
2023 | static bool zone_reclaimable(struct zone *zone) | 2038 | static bool zone_reclaimable(struct zone *zone) |
@@ -2081,8 +2096,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2081 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2096 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
2082 | sc->nr_scanned = 0; | 2097 | sc->nr_scanned = 0; |
2083 | if (!priority) | 2098 | if (!priority) |
2084 | disable_swap_token(); | 2099 | disable_swap_token(sc->mem_cgroup); |
2085 | total_scanned += shrink_zones(priority, zonelist, sc); | 2100 | shrink_zones(priority, zonelist, sc); |
2086 | /* | 2101 | /* |
2087 | * Don't shrink slabs when reclaiming memory from | 2102 | * Don't shrink slabs when reclaiming memory from |
2088 | * over limit cgroups | 2103 | * over limit cgroups |
@@ -2311,7 +2326,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining, | |||
2311 | return true; | 2326 | return true; |
2312 | 2327 | ||
2313 | /* Check the watermark levels */ | 2328 | /* Check the watermark levels */ |
2314 | for (i = 0; i < pgdat->nr_zones; i++) { | 2329 | for (i = 0; i <= classzone_idx; i++) { |
2315 | struct zone *zone = pgdat->node_zones + i; | 2330 | struct zone *zone = pgdat->node_zones + i; |
2316 | 2331 | ||
2317 | if (!populated_zone(zone)) | 2332 | if (!populated_zone(zone)) |
@@ -2329,7 +2344,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining, | |||
2329 | } | 2344 | } |
2330 | 2345 | ||
2331 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), | 2346 | if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), |
2332 | classzone_idx, 0)) | 2347 | i, 0)) |
2333 | all_zones_ok = false; | 2348 | all_zones_ok = false; |
2334 | else | 2349 | else |
2335 | balanced += zone->present_pages; | 2350 | balanced += zone->present_pages; |
@@ -2407,7 +2422,7 @@ loop_again: | |||
2407 | 2422 | ||
2408 | /* The swap token gets in the way of swapout... */ | 2423 | /* The swap token gets in the way of swapout... */ |
2409 | if (!priority) | 2424 | if (!priority) |
2410 | disable_swap_token(); | 2425 | disable_swap_token(NULL); |
2411 | 2426 | ||
2412 | all_zones_ok = 1; | 2427 | all_zones_ok = 1; |
2413 | balanced = 0; | 2428 | balanced = 0; |
@@ -2436,7 +2451,6 @@ loop_again: | |||
2436 | if (!zone_watermark_ok_safe(zone, order, | 2451 | if (!zone_watermark_ok_safe(zone, order, |
2437 | high_wmark_pages(zone), 0, 0)) { | 2452 | high_wmark_pages(zone), 0, 0)) { |
2438 | end_zone = i; | 2453 | end_zone = i; |
2439 | *classzone_idx = i; | ||
2440 | break; | 2454 | break; |
2441 | } | 2455 | } |
2442 | } | 2456 | } |
@@ -2495,18 +2509,18 @@ loop_again: | |||
2495 | KSWAPD_ZONE_BALANCE_GAP_RATIO); | 2509 | KSWAPD_ZONE_BALANCE_GAP_RATIO); |
2496 | if (!zone_watermark_ok_safe(zone, order, | 2510 | if (!zone_watermark_ok_safe(zone, order, |
2497 | high_wmark_pages(zone) + balance_gap, | 2511 | high_wmark_pages(zone) + balance_gap, |
2498 | end_zone, 0)) | 2512 | end_zone, 0)) { |
2499 | shrink_zone(priority, zone, &sc); | 2513 | shrink_zone(priority, zone, &sc); |
2500 | reclaim_state->reclaimed_slab = 0; | ||
2501 | nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); | ||
2502 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; | ||
2503 | total_scanned += sc.nr_scanned; | ||
2504 | 2514 | ||
2505 | if (zone->all_unreclaimable) | 2515 | reclaim_state->reclaimed_slab = 0; |
2506 | continue; | 2516 | nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); |
2507 | if (nr_slab == 0 && | 2517 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; |
2508 | !zone_reclaimable(zone)) | 2518 | total_scanned += sc.nr_scanned; |
2509 | zone->all_unreclaimable = 1; | 2519 | |
2520 | if (nr_slab == 0 && !zone_reclaimable(zone)) | ||
2521 | zone->all_unreclaimable = 1; | ||
2522 | } | ||
2523 | |||
2510 | /* | 2524 | /* |
2511 | * If we've done a decent amount of scanning and | 2525 | * If we've done a decent amount of scanning and |
2512 | * the reclaim ratio is low, start doing writepage | 2526 | * the reclaim ratio is low, start doing writepage |
@@ -2516,6 +2530,12 @@ loop_again: | |||
2516 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) | 2530 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) |
2517 | sc.may_writepage = 1; | 2531 | sc.may_writepage = 1; |
2518 | 2532 | ||
2533 | if (zone->all_unreclaimable) { | ||
2534 | if (end_zone && end_zone == i) | ||
2535 | end_zone--; | ||
2536 | continue; | ||
2537 | } | ||
2538 | |||
2519 | if (!zone_watermark_ok_safe(zone, order, | 2539 | if (!zone_watermark_ok_safe(zone, order, |
2520 | high_wmark_pages(zone), end_zone, 0)) { | 2540 | high_wmark_pages(zone), end_zone, 0)) { |
2521 | all_zones_ok = 0; | 2541 | all_zones_ok = 0; |
@@ -2694,8 +2714,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) | |||
2694 | */ | 2714 | */ |
2695 | static int kswapd(void *p) | 2715 | static int kswapd(void *p) |
2696 | { | 2716 | { |
2697 | unsigned long order; | 2717 | unsigned long order, new_order; |
2698 | int classzone_idx; | 2718 | int classzone_idx, new_classzone_idx; |
2699 | pg_data_t *pgdat = (pg_data_t*)p; | 2719 | pg_data_t *pgdat = (pg_data_t*)p; |
2700 | struct task_struct *tsk = current; | 2720 | struct task_struct *tsk = current; |
2701 | 2721 | ||
@@ -2725,17 +2745,23 @@ static int kswapd(void *p) | |||
2725 | tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; | 2745 | tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; |
2726 | set_freezable(); | 2746 | set_freezable(); |
2727 | 2747 | ||
2728 | order = 0; | 2748 | order = new_order = 0; |
2729 | classzone_idx = MAX_NR_ZONES - 1; | 2749 | classzone_idx = new_classzone_idx = pgdat->nr_zones - 1; |
2730 | for ( ; ; ) { | 2750 | for ( ; ; ) { |
2731 | unsigned long new_order; | ||
2732 | int new_classzone_idx; | ||
2733 | int ret; | 2751 | int ret; |
2734 | 2752 | ||
2735 | new_order = pgdat->kswapd_max_order; | 2753 | /* |
2736 | new_classzone_idx = pgdat->classzone_idx; | 2754 | * If the last balance_pgdat was unsuccessful it's unlikely a |
2737 | pgdat->kswapd_max_order = 0; | 2755 | * new request of a similar or harder type will succeed soon |
2738 | pgdat->classzone_idx = MAX_NR_ZONES - 1; | 2756 | * so consider going to sleep on the basis we reclaimed at |
2757 | */ | ||
2758 | if (classzone_idx >= new_classzone_idx && order == new_order) { | ||
2759 | new_order = pgdat->kswapd_max_order; | ||
2760 | new_classzone_idx = pgdat->classzone_idx; | ||
2761 | pgdat->kswapd_max_order = 0; | ||
2762 | pgdat->classzone_idx = pgdat->nr_zones - 1; | ||
2763 | } | ||
2764 | |||
2739 | if (order < new_order || classzone_idx > new_classzone_idx) { | 2765 | if (order < new_order || classzone_idx > new_classzone_idx) { |
2740 | /* | 2766 | /* |
2741 | * Don't sleep if someone wants a larger 'order' | 2767 | * Don't sleep if someone wants a larger 'order' |
@@ -2748,7 +2774,7 @@ static int kswapd(void *p) | |||
2748 | order = pgdat->kswapd_max_order; | 2774 | order = pgdat->kswapd_max_order; |
2749 | classzone_idx = pgdat->classzone_idx; | 2775 | classzone_idx = pgdat->classzone_idx; |
2750 | pgdat->kswapd_max_order = 0; | 2776 | pgdat->kswapd_max_order = 0; |
2751 | pgdat->classzone_idx = MAX_NR_ZONES - 1; | 2777 | pgdat->classzone_idx = pgdat->nr_zones - 1; |
2752 | } | 2778 | } |
2753 | 2779 | ||
2754 | ret = try_to_freeze(); | 2780 | ret = try_to_freeze(); |