aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2010-08-09 20:19:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-09 23:45:00 -0400
commit25edde0332916ae706ccf83de688be57bcc844b7 (patch)
tree35a5b0e651f9cdb48d9a55a748970339c4f681bc /mm/vmscan.c
parentb898cc70019ce1835bbf6c47bdf978adc36faa42 (diff)
vmscan: kill prev_priority completely
Since 2.6.28 zone->prev_priority is unused. Then it can be removed safely. It reduce stack usage slightly. Now I have to say that I'm sorry. 2 years ago, I thought prev_priority can be integrate again, it's useful. but four (or more) times trying haven't got good performance number. Thus I give up such approach. The rest of this changelog is notes on prev_priority and why it existed in the first place and why it might be not necessary any more. This information is based heavily on discussions between Andrew Morton, Rik van Riel and Kosaki Motohiro who is heavily quotes from. Historically prev_priority was important because it determined when the VM would start unmapping PTE pages. i.e. there are no balances of note within the VM, Anon vs File and Mapped vs Unmapped. Without prev_priority, there is a potential risk of unnecessarily increasing minor faults as a large amount of read activity of use-once pages could push mapped pages to the end of the LRU and get unmapped. There is no proof this is still a problem but currently it is not considered to be. Active files are not deactivated if the active file list is smaller than the inactive list reducing the liklihood that file-mapped pages are being pushed off the LRU and referenced executable pages are kept on the active list to avoid them getting pushed out by read activity. Even if it is a problem, prev_priority prev_priority wouldn't works nowadays. First of all, current vmscan still a lot of UP centric code. it expose some weakness on some dozens CPUs machine. I think we need more and more improvement. The problem is, current vmscan mix up per-system-pressure, per-zone-pressure and per-task-pressure a bit. example, prev_priority try to boost priority to other concurrent priority. but if the another task have mempolicy restriction, it is unnecessary, but also makes wrong big latency and exceeding reclaim. per-task based priority + prev_priority adjustment make the emulation of per-system pressure. but it have two issue 1) too rough and brutal emulation 2) we need per-zone pressure, not per-system. Another example, currently DEF_PRIORITY is 12. it mean the lru rotate about 2 cycle (1/4096 + 1/2048 + 1/1024 + .. + 1) before invoking OOM-Killer. but if 10,0000 thrreads enter DEF_PRIORITY reclaim at the same time, the system have higher memory pressure than priority==0 (1/4096*10,000 > 2). prev_priority can't solve such multithreads workload issue. In other word, prev_priority concept assume the sysmtem don't have lots threads." Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Christoph Hellwig <hch@infradead.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Michael Rubin <mrubin@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c57
1 files changed, 0 insertions, 57 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7a4e6a3cf89..594eba8a44c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1290,20 +1290,6 @@ done:
1290} 1290}
1291 1291
1292/* 1292/*
1293 * We are about to scan this zone at a certain priority level. If that priority
1294 * level is smaller (ie: more urgent) than the previous priority, then note
1295 * that priority level within the zone. This is done so that when the next
1296 * process comes in to scan this zone, it will immediately start out at this
1297 * priority level rather than having to build up its own scanning priority.
1298 * Here, this priority affects only the reclaim-mapped threshold.
1299 */
1300static inline void note_zone_scanning_priority(struct zone *zone, int priority)
1301{
1302 if (priority < zone->prev_priority)
1303 zone->prev_priority = priority;
1304}
1305
1306/*
1307 * This moves pages from the active list to the inactive list. 1293 * This moves pages from the active list to the inactive list.
1308 * 1294 *
1309 * We move them the other way if the page is referenced by one or more 1295 * We move them the other way if the page is referenced by one or more
@@ -1766,17 +1752,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
1766 if (scanning_global_lru(sc)) { 1752 if (scanning_global_lru(sc)) {
1767 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1753 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1768 continue; 1754 continue;
1769 note_zone_scanning_priority(zone, priority);
1770
1771 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 1755 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
1772 continue; /* Let kswapd poll it */ 1756 continue; /* Let kswapd poll it */
1773 } else {
1774 /*
1775 * Ignore cpuset limitation here. We just want to reduce
1776 * # of used pages by us regardless of memory shortage.
1777 */
1778 mem_cgroup_note_reclaim_priority(sc->mem_cgroup,
1779 priority);
1780 } 1757 }
1781 1758
1782 shrink_zone(priority, zone, sc); 1759 shrink_zone(priority, zone, sc);
@@ -1877,17 +1854,6 @@ out:
1877 if (priority < 0) 1854 if (priority < 0)
1878 priority = 0; 1855 priority = 0;
1879 1856
1880 if (scanning_global_lru(sc)) {
1881 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
1882
1883 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1884 continue;
1885
1886 zone->prev_priority = priority;
1887 }
1888 } else
1889 mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
1890
1891 delayacct_freepages_end(); 1857 delayacct_freepages_end();
1892 put_mems_allowed(); 1858 put_mems_allowed();
1893 1859
@@ -2053,22 +2019,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
2053 .order = order, 2019 .order = order,
2054 .mem_cgroup = NULL, 2020 .mem_cgroup = NULL,
2055 }; 2021 };
2056 /*
2057 * temp_priority is used to remember the scanning priority at which
2058 * this zone was successfully refilled to
2059 * free_pages == high_wmark_pages(zone).
2060 */
2061 int temp_priority[MAX_NR_ZONES];
2062
2063loop_again: 2022loop_again:
2064 total_scanned = 0; 2023 total_scanned = 0;
2065 sc.nr_reclaimed = 0; 2024 sc.nr_reclaimed = 0;
2066 sc.may_writepage = !laptop_mode; 2025 sc.may_writepage = !laptop_mode;
2067 count_vm_event(PAGEOUTRUN); 2026 count_vm_event(PAGEOUTRUN);
2068 2027
2069 for (i = 0; i < pgdat->nr_zones; i++)
2070 temp_priority[i] = DEF_PRIORITY;
2071
2072 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2028 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2073 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2029 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2074 unsigned long lru_pages = 0; 2030 unsigned long lru_pages = 0;
@@ -2136,9 +2092,7 @@ loop_again:
2136 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 2092 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
2137 continue; 2093 continue;
2138 2094
2139 temp_priority[i] = priority;
2140 sc.nr_scanned = 0; 2095 sc.nr_scanned = 0;
2141 note_zone_scanning_priority(zone, priority);
2142 2096
2143 nid = pgdat->node_id; 2097 nid = pgdat->node_id;
2144 zid = zone_idx(zone); 2098 zid = zone_idx(zone);
@@ -2211,16 +2165,6 @@ loop_again:
2211 break; 2165 break;
2212 } 2166 }
2213out: 2167out:
2214 /*
2215 * Note within each zone the priority level at which this zone was
2216 * brought into a happy state. So that the next thread which scans this
2217 * zone will start out at that priority level.
2218 */
2219 for (i = 0; i < pgdat->nr_zones; i++) {
2220 struct zone *zone = pgdat->node_zones + i;
2221
2222 zone->prev_priority = temp_priority[i];
2223 }
2224 if (!all_zones_ok) { 2168 if (!all_zones_ok) {
2225 cond_resched(); 2169 cond_resched();
2226 2170
@@ -2639,7 +2583,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
2639 */ 2583 */
2640 priority = ZONE_RECLAIM_PRIORITY; 2584 priority = ZONE_RECLAIM_PRIORITY;
2641 do { 2585 do {
2642 note_zone_scanning_priority(zone, priority);
2643 shrink_zone(priority, zone, &sc); 2586 shrink_zone(priority, zone, &sc);
2644 priority--; 2587 priority--;
2645 } while (priority >= 0 && sc.nr_reclaimed < nr_pages); 2588 } while (priority >= 0 && sc.nr_reclaimed < nr_pages);