diff options
author | Wu Fengguang <fengguang.wu@intel.com> | 2009-09-21 20:03:11 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-22 10:17:39 -0400 |
commit | f86296317434b21585e229f6c49a33cb9ebab4d3 (patch) | |
tree | d4fb05d4aee1a8e373ec053e7316dc9847b2c417 | |
parent | 1a8670a29b5277cbe601f74ab63d2c5211fb3005 (diff) |
mm: do batched scans for mem_cgroup
For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in
which case shrink_list() _still_ calls isolate_pages() with the much
larger SWAP_CLUSTER_MAX. It effectively scales up the inactive list scan
rate by up to 32 times.
For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4.
So when shrink_zone() expects to scan 4 pages in the active/inactive list,
the active list will be scanned 4 pages, while the inactive list will be
(over) scanned SWAP_CLUSTER_MAX=32 pages in effect. And that could break
the balance between the two lists.
It can further impact the scan of anon active list, due to the anon
active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone():
inactive anon list over scanned => inactive_anon_is_low() == TRUE
=> shrink_active_list()
=> active anon list over scanned
So the end result may be
- anon inactive => over scanned
- anon active => over scanned (maybe not as much)
- file inactive => over scanned
- file active => under scanned (relatively)
The accesses to nr_saved_scan are not lock protected and so not 100%
accurate, however we can tolerate small errors and the resulted small
imbalanced scan rates between zones.
Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 20 |
3 files changed, 17 insertions, 11 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9c50309b30a1..c188ea624c74 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -273,6 +273,11 @@ struct zone_reclaim_stat { | |||
273 | */ | 273 | */ |
274 | unsigned long recent_rotated[2]; | 274 | unsigned long recent_rotated[2]; |
275 | unsigned long recent_scanned[2]; | 275 | unsigned long recent_scanned[2]; |
276 | |||
277 | /* | ||
278 | * accumulated for batching | ||
279 | */ | ||
280 | unsigned long nr_saved_scan[NR_LRU_LISTS]; | ||
276 | }; | 281 | }; |
277 | 282 | ||
278 | struct zone { | 283 | struct zone { |
@@ -327,7 +332,6 @@ struct zone { | |||
327 | spinlock_t lru_lock; | 332 | spinlock_t lru_lock; |
328 | struct zone_lru { | 333 | struct zone_lru { |
329 | struct list_head list; | 334 | struct list_head list; |
330 | unsigned long nr_saved_scan; /* accumulated for batching */ | ||
331 | } lru[NR_LRU_LISTS]; | 335 | } lru[NR_LRU_LISTS]; |
332 | 336 | ||
333 | struct zone_reclaim_stat reclaim_stat; | 337 | struct zone_reclaim_stat reclaim_stat; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 770f011e1c12..84d9da1e8f4c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3809,7 +3809,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, | |||
3809 | zone_pcp_init(zone); | 3809 | zone_pcp_init(zone); |
3810 | for_each_lru(l) { | 3810 | for_each_lru(l) { |
3811 | INIT_LIST_HEAD(&zone->lru[l].list); | 3811 | INIT_LIST_HEAD(&zone->lru[l].list); |
3812 | zone->lru[l].nr_saved_scan = 0; | 3812 | zone->reclaim_stat.nr_saved_scan[l] = 0; |
3813 | } | 3813 | } |
3814 | zone->reclaim_stat.recent_rotated[0] = 0; | 3814 | zone->reclaim_stat.recent_rotated[0] = 0; |
3815 | zone->reclaim_stat.recent_rotated[1] = 0; | 3815 | zone->reclaim_stat.recent_rotated[1] = 0; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 5432c230c4cb..0e7f5e4a22d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1586,6 +1586,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1586 | enum lru_list l; | 1586 | enum lru_list l; |
1587 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1587 | unsigned long nr_reclaimed = sc->nr_reclaimed; |
1588 | unsigned long swap_cluster_max = sc->swap_cluster_max; | 1588 | unsigned long swap_cluster_max = sc->swap_cluster_max; |
1589 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | ||
1589 | int noswap = 0; | 1590 | int noswap = 0; |
1590 | 1591 | ||
1591 | /* If we have no swap space, do not bother scanning anon pages. */ | 1592 | /* If we have no swap space, do not bother scanning anon pages. */ |
@@ -1605,12 +1606,9 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1605 | scan >>= priority; | 1606 | scan >>= priority; |
1606 | scan = (scan * percent[file]) / 100; | 1607 | scan = (scan * percent[file]) / 100; |
1607 | } | 1608 | } |
1608 | if (scanning_global_lru(sc)) | 1609 | nr[l] = nr_scan_try_batch(scan, |
1609 | nr[l] = nr_scan_try_batch(scan, | 1610 | &reclaim_stat->nr_saved_scan[l], |
1610 | &zone->lru[l].nr_saved_scan, | 1611 | swap_cluster_max); |
1611 | swap_cluster_max); | ||
1612 | else | ||
1613 | nr[l] = scan; | ||
1614 | } | 1612 | } |
1615 | 1613 | ||
1616 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1614 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
@@ -2220,6 +2218,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, | |||
2220 | { | 2218 | { |
2221 | struct zone *zone; | 2219 | struct zone *zone; |
2222 | unsigned long nr_reclaimed = 0; | 2220 | unsigned long nr_reclaimed = 0; |
2221 | struct zone_reclaim_stat *reclaim_stat; | ||
2223 | 2222 | ||
2224 | for_each_populated_zone(zone) { | 2223 | for_each_populated_zone(zone) { |
2225 | enum lru_list l; | 2224 | enum lru_list l; |
@@ -2236,11 +2235,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, | |||
2236 | l == LRU_ACTIVE_FILE)) | 2235 | l == LRU_ACTIVE_FILE)) |
2237 | continue; | 2236 | continue; |
2238 | 2237 | ||
2239 | zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; | 2238 | reclaim_stat = get_reclaim_stat(zone, sc); |
2240 | if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { | 2239 | reclaim_stat->nr_saved_scan[l] += |
2240 | (lru_pages >> prio) + 1; | ||
2241 | if (reclaim_stat->nr_saved_scan[l] | ||
2242 | >= nr_pages || pass > 3) { | ||
2241 | unsigned long nr_to_scan; | 2243 | unsigned long nr_to_scan; |
2242 | 2244 | ||
2243 | zone->lru[l].nr_saved_scan = 0; | 2245 | reclaim_stat->nr_saved_scan[l] = 0; |
2244 | nr_to_scan = min(nr_pages, lru_pages); | 2246 | nr_to_scan = min(nr_pages, lru_pages); |
2245 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | 2247 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, |
2246 | sc, prio); | 2248 | sc, prio); |