aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWu Fengguang <fengguang.wu@intel.com>2009-09-21 20:03:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:39 -0400
commitf86296317434b21585e229f6c49a33cb9ebab4d3 (patch)
treed4fb05d4aee1a8e373ec053e7316dc9847b2c417
parent1a8670a29b5277cbe601f74ab63d2c5211fb3005 (diff)
mm: do batched scans for mem_cgroup
For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in which case shrink_list() _still_ calls isolate_pages() with the much larger SWAP_CLUSTER_MAX. It effectively scales up the inactive list scan rate by up to 32 times. For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4. So when shrink_zone() expects to scan 4 pages in the active/inactive list, the active list will be scanned 4 pages, while the inactive list will be (over) scanned SWAP_CLUSTER_MAX=32 pages in effect. And that could break the balance between the two lists. It can further impact the scan of anon active list, due to the anon active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone(): inactive anon list over scanned => inactive_anon_is_low() == TRUE => shrink_active_list() => active anon list over scanned So the end result may be - anon inactive => over scanned - anon active => over scanned (maybe not as much) - file inactive => over scanned - file active => under scanned (relatively) The accesses to nr_saved_scan are not lock protected and so not 100% accurate, however we can tolerate small errors and the resulted small imbalanced scan rates between zones. Cc: Rik van Riel <riel@redhat.com> Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/vmscan.c20
3 files changed, 17 insertions, 11 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9c50309b30a1..c188ea624c74 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -273,6 +273,11 @@ struct zone_reclaim_stat {
273 */ 273 */
274 unsigned long recent_rotated[2]; 274 unsigned long recent_rotated[2];
275 unsigned long recent_scanned[2]; 275 unsigned long recent_scanned[2];
276
277 /*
278 * accumulated for batching
279 */
280 unsigned long nr_saved_scan[NR_LRU_LISTS];
276}; 281};
277 282
278struct zone { 283struct zone {
@@ -327,7 +332,6 @@ struct zone {
327 spinlock_t lru_lock; 332 spinlock_t lru_lock;
328 struct zone_lru { 333 struct zone_lru {
329 struct list_head list; 334 struct list_head list;
330 unsigned long nr_saved_scan; /* accumulated for batching */
331 } lru[NR_LRU_LISTS]; 335 } lru[NR_LRU_LISTS];
332 336
333 struct zone_reclaim_stat reclaim_stat; 337 struct zone_reclaim_stat reclaim_stat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 770f011e1c12..84d9da1e8f4c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3809,7 +3809,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
3809 zone_pcp_init(zone); 3809 zone_pcp_init(zone);
3810 for_each_lru(l) { 3810 for_each_lru(l) {
3811 INIT_LIST_HEAD(&zone->lru[l].list); 3811 INIT_LIST_HEAD(&zone->lru[l].list);
3812 zone->lru[l].nr_saved_scan = 0; 3812 zone->reclaim_stat.nr_saved_scan[l] = 0;
3813 } 3813 }
3814 zone->reclaim_stat.recent_rotated[0] = 0; 3814 zone->reclaim_stat.recent_rotated[0] = 0;
3815 zone->reclaim_stat.recent_rotated[1] = 0; 3815 zone->reclaim_stat.recent_rotated[1] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5432c230c4cb..0e7f5e4a22d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1586,6 +1586,7 @@ static void shrink_zone(int priority, struct zone *zone,
1586 enum lru_list l; 1586 enum lru_list l;
1587 unsigned long nr_reclaimed = sc->nr_reclaimed; 1587 unsigned long nr_reclaimed = sc->nr_reclaimed;
1588 unsigned long swap_cluster_max = sc->swap_cluster_max; 1588 unsigned long swap_cluster_max = sc->swap_cluster_max;
1589 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1589 int noswap = 0; 1590 int noswap = 0;
1590 1591
1591 /* If we have no swap space, do not bother scanning anon pages. */ 1592 /* If we have no swap space, do not bother scanning anon pages. */
@@ -1605,12 +1606,9 @@ static void shrink_zone(int priority, struct zone *zone,
1605 scan >>= priority; 1606 scan >>= priority;
1606 scan = (scan * percent[file]) / 100; 1607 scan = (scan * percent[file]) / 100;
1607 } 1608 }
1608 if (scanning_global_lru(sc)) 1609 nr[l] = nr_scan_try_batch(scan,
1609 nr[l] = nr_scan_try_batch(scan, 1610 &reclaim_stat->nr_saved_scan[l],
1610 &zone->lru[l].nr_saved_scan, 1611 swap_cluster_max);
1611 swap_cluster_max);
1612 else
1613 nr[l] = scan;
1614 } 1612 }
1615 1613
1616 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1614 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2220,6 +2218,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
2220{ 2218{
2221 struct zone *zone; 2219 struct zone *zone;
2222 unsigned long nr_reclaimed = 0; 2220 unsigned long nr_reclaimed = 0;
2221 struct zone_reclaim_stat *reclaim_stat;
2223 2222
2224 for_each_populated_zone(zone) { 2223 for_each_populated_zone(zone) {
2225 enum lru_list l; 2224 enum lru_list l;
@@ -2236,11 +2235,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
2236 l == LRU_ACTIVE_FILE)) 2235 l == LRU_ACTIVE_FILE))
2237 continue; 2236 continue;
2238 2237
2239 zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; 2238 reclaim_stat = get_reclaim_stat(zone, sc);
2240 if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { 2239 reclaim_stat->nr_saved_scan[l] +=
2240 (lru_pages >> prio) + 1;
2241 if (reclaim_stat->nr_saved_scan[l]
2242 >= nr_pages || pass > 3) {
2241 unsigned long nr_to_scan; 2243 unsigned long nr_to_scan;
2242 2244
2243 zone->lru[l].nr_saved_scan = 0; 2245 reclaim_stat->nr_saved_scan[l] = 0;
2244 nr_to_scan = min(nr_pages, lru_pages); 2246 nr_to_scan = min(nr_pages, lru_pages);
2245 nr_reclaimed += shrink_list(l, nr_to_scan, zone, 2247 nr_reclaimed += shrink_list(l, nr_to_scan, zone,
2246 sc, prio); 2248 sc, prio);