aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2007-02-10 04:43:01 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-11 13:51:17 -0500
commitc878538598d1e7ab41ecc0de8894e34e2fdef630 (patch)
treed22e73fddef75521e287c3e7754a1d3224c348d9 /mm
parentc3704ceb4ad055b489b143f4e37c57d128908012 (diff)
[PATCH] Use ZVC for inactive and active counts
The determination of the dirty ratio to determine writeback behavior is currently based on the number of total pages on the system. However, not all pages in the system may be dirtied. Thus the ratio is always too low and can never reach 100%. The ratio may be particularly skewed if large hugepage allocations, slab allocations or device driver buffers make large sections of memory not available anymore. In that case we may get into a situation in which f.e. the background writeback ratio of 40% cannot be reached anymore which leads to undesired writeback behavior. This patchset fixes that issue by determining the ratio based on the actual pages that may potentially be dirty. These are the pages on the active and the inactive list plus free pages. The problem with those counts has so far been that it is expensive to calculate these because counts from multiple nodes and multiple zones will have to be summed up. This patchset makes these counters ZVC counters. This means that a current sum per zone, per node and for the whole system is always available via global variables and not expensive anymore to calculate. The patchset results in some other good side effects: - Removal of the various functions that sum up free, active and inactive page counts - Cleanup of the functions that display information via the proc filesystem. This patch: The use of a ZVC for nr_inactive and nr_active allows a simplification of some counter operations. More ZVC functionality is used for sums etc in the following patches. [akpm@osdl.org: UP build fix] Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/vmscan.c51
-rw-r--r--mm/vmstat.c28
3 files changed, 43 insertions, 42 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f26fdc94393e..07c954e53270 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1616,8 +1616,8 @@ void show_free_areas(void)
1616 K(zone->pages_min), 1616 K(zone->pages_min),
1617 K(zone->pages_low), 1617 K(zone->pages_low),
1618 K(zone->pages_high), 1618 K(zone->pages_high),
1619 K(zone->nr_active), 1619 K(zone_page_state(zone, NR_ACTIVE)),
1620 K(zone->nr_inactive), 1620 K(zone_page_state(zone, NR_INACTIVE)),
1621 K(zone->present_pages), 1621 K(zone->present_pages),
1622 zone->pages_scanned, 1622 zone->pages_scanned,
1623 (zone->all_unreclaimable ? "yes" : "no") 1623 (zone->all_unreclaimable ? "yes" : "no")
@@ -2684,8 +2684,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
2684 INIT_LIST_HEAD(&zone->inactive_list); 2684 INIT_LIST_HEAD(&zone->inactive_list);
2685 zone->nr_scan_active = 0; 2685 zone->nr_scan_active = 0;
2686 zone->nr_scan_inactive = 0; 2686 zone->nr_scan_inactive = 0;
2687 zone->nr_active = 0;
2688 zone->nr_inactive = 0;
2689 zap_zone_vm_stats(zone); 2687 zap_zone_vm_stats(zone);
2690 atomic_set(&zone->reclaim_in_progress, 0); 2688 atomic_set(&zone->reclaim_in_progress, 0);
2691 if (!size) 2689 if (!size)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7430df68cb64..0655d5fe73e8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -679,7 +679,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
679 nr_taken = isolate_lru_pages(sc->swap_cluster_max, 679 nr_taken = isolate_lru_pages(sc->swap_cluster_max,
680 &zone->inactive_list, 680 &zone->inactive_list,
681 &page_list, &nr_scan); 681 &page_list, &nr_scan);
682 zone->nr_inactive -= nr_taken; 682 __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken);
683 zone->pages_scanned += nr_scan; 683 zone->pages_scanned += nr_scan;
684 spin_unlock_irq(&zone->lru_lock); 684 spin_unlock_irq(&zone->lru_lock);
685 685
@@ -740,7 +740,8 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
740 740
741static inline int zone_is_near_oom(struct zone *zone) 741static inline int zone_is_near_oom(struct zone *zone)
742{ 742{
743 return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; 743 return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE)
744 + zone_page_state(zone, NR_INACTIVE))*3;
744} 745}
745 746
746/* 747/*
@@ -825,7 +826,7 @@ force_reclaim_mapped:
825 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, 826 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
826 &l_hold, &pgscanned); 827 &l_hold, &pgscanned);
827 zone->pages_scanned += pgscanned; 828 zone->pages_scanned += pgscanned;
828 zone->nr_active -= pgmoved; 829 __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
829 spin_unlock_irq(&zone->lru_lock); 830 spin_unlock_irq(&zone->lru_lock);
830 831
831 while (!list_empty(&l_hold)) { 832 while (!list_empty(&l_hold)) {
@@ -857,7 +858,7 @@ force_reclaim_mapped:
857 list_move(&page->lru, &zone->inactive_list); 858 list_move(&page->lru, &zone->inactive_list);
858 pgmoved++; 859 pgmoved++;
859 if (!pagevec_add(&pvec, page)) { 860 if (!pagevec_add(&pvec, page)) {
860 zone->nr_inactive += pgmoved; 861 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
861 spin_unlock_irq(&zone->lru_lock); 862 spin_unlock_irq(&zone->lru_lock);
862 pgdeactivate += pgmoved; 863 pgdeactivate += pgmoved;
863 pgmoved = 0; 864 pgmoved = 0;
@@ -867,7 +868,7 @@ force_reclaim_mapped:
867 spin_lock_irq(&zone->lru_lock); 868 spin_lock_irq(&zone->lru_lock);
868 } 869 }
869 } 870 }
870 zone->nr_inactive += pgmoved; 871 __mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
871 pgdeactivate += pgmoved; 872 pgdeactivate += pgmoved;
872 if (buffer_heads_over_limit) { 873 if (buffer_heads_over_limit) {
873 spin_unlock_irq(&zone->lru_lock); 874 spin_unlock_irq(&zone->lru_lock);
@@ -885,14 +886,14 @@ force_reclaim_mapped:
885 list_move(&page->lru, &zone->active_list); 886 list_move(&page->lru, &zone->active_list);
886 pgmoved++; 887 pgmoved++;
887 if (!pagevec_add(&pvec, page)) { 888 if (!pagevec_add(&pvec, page)) {
888 zone->nr_active += pgmoved; 889 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
889 pgmoved = 0; 890 pgmoved = 0;
890 spin_unlock_irq(&zone->lru_lock); 891 spin_unlock_irq(&zone->lru_lock);
891 __pagevec_release(&pvec); 892 __pagevec_release(&pvec);
892 spin_lock_irq(&zone->lru_lock); 893 spin_lock_irq(&zone->lru_lock);
893 } 894 }
894 } 895 }
895 zone->nr_active += pgmoved; 896 __mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
896 897
897 __count_zone_vm_events(PGREFILL, zone, pgscanned); 898 __count_zone_vm_events(PGREFILL, zone, pgscanned);
898 __count_vm_events(PGDEACTIVATE, pgdeactivate); 899 __count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -918,14 +919,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
918 * Add one to `nr_to_scan' just to make sure that the kernel will 919 * Add one to `nr_to_scan' just to make sure that the kernel will
919 * slowly sift through the active list. 920 * slowly sift through the active list.
920 */ 921 */
921 zone->nr_scan_active += (zone->nr_active >> priority) + 1; 922 zone->nr_scan_active +=
923 (zone_page_state(zone, NR_ACTIVE) >> priority) + 1;
922 nr_active = zone->nr_scan_active; 924 nr_active = zone->nr_scan_active;
923 if (nr_active >= sc->swap_cluster_max) 925 if (nr_active >= sc->swap_cluster_max)
924 zone->nr_scan_active = 0; 926 zone->nr_scan_active = 0;
925 else 927 else
926 nr_active = 0; 928 nr_active = 0;
927 929
928 zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1; 930 zone->nr_scan_inactive +=
931 (zone_page_state(zone, NR_INACTIVE) >> priority) + 1;
929 nr_inactive = zone->nr_scan_inactive; 932 nr_inactive = zone->nr_scan_inactive;
930 if (nr_inactive >= sc->swap_cluster_max) 933 if (nr_inactive >= sc->swap_cluster_max)
931 zone->nr_scan_inactive = 0; 934 zone->nr_scan_inactive = 0;
@@ -1037,7 +1040,8 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1037 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) 1040 if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
1038 continue; 1041 continue;
1039 1042
1040 lru_pages += zone->nr_active + zone->nr_inactive; 1043 lru_pages += zone_page_state(zone, NR_ACTIVE)
1044 + zone_page_state(zone, NR_INACTIVE);
1041 } 1045 }
1042 1046
1043 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1047 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
@@ -1182,7 +1186,8 @@ loop_again:
1182 for (i = 0; i <= end_zone; i++) { 1186 for (i = 0; i <= end_zone; i++) {
1183 struct zone *zone = pgdat->node_zones + i; 1187 struct zone *zone = pgdat->node_zones + i;
1184 1188
1185 lru_pages += zone->nr_active + zone->nr_inactive; 1189 lru_pages += zone_page_state(zone, NR_ACTIVE)
1190 + zone_page_state(zone, NR_INACTIVE);
1186 } 1191 }
1187 1192
1188 /* 1193 /*
@@ -1219,8 +1224,9 @@ loop_again:
1219 if (zone->all_unreclaimable) 1224 if (zone->all_unreclaimable)
1220 continue; 1225 continue;
1221 if (nr_slab == 0 && zone->pages_scanned >= 1226 if (nr_slab == 0 && zone->pages_scanned >=
1222 (zone->nr_active + zone->nr_inactive) * 6) 1227 (zone_page_state(zone, NR_ACTIVE)
1223 zone->all_unreclaimable = 1; 1228 + zone_page_state(zone, NR_INACTIVE)) * 6)
1229 zone->all_unreclaimable = 1;
1224 /* 1230 /*
1225 * If we've done a decent amount of scanning and 1231 * If we've done a decent amount of scanning and
1226 * the reclaim ratio is low, start doing writepage 1232 * the reclaim ratio is low, start doing writepage
@@ -1385,18 +1391,22 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1385 1391
1386 /* For pass = 0 we don't shrink the active list */ 1392 /* For pass = 0 we don't shrink the active list */
1387 if (pass > 0) { 1393 if (pass > 0) {
1388 zone->nr_scan_active += (zone->nr_active >> prio) + 1; 1394 zone->nr_scan_active +=
1395 (zone_page_state(zone, NR_ACTIVE) >> prio) + 1;
1389 if (zone->nr_scan_active >= nr_pages || pass > 3) { 1396 if (zone->nr_scan_active >= nr_pages || pass > 3) {
1390 zone->nr_scan_active = 0; 1397 zone->nr_scan_active = 0;
1391 nr_to_scan = min(nr_pages, zone->nr_active); 1398 nr_to_scan = min(nr_pages,
1399 zone_page_state(zone, NR_ACTIVE));
1392 shrink_active_list(nr_to_scan, zone, sc, prio); 1400 shrink_active_list(nr_to_scan, zone, sc, prio);
1393 } 1401 }
1394 } 1402 }
1395 1403
1396 zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1; 1404 zone->nr_scan_inactive +=
1405 (zone_page_state(zone, NR_INACTIVE) >> prio) + 1;
1397 if (zone->nr_scan_inactive >= nr_pages || pass > 3) { 1406 if (zone->nr_scan_inactive >= nr_pages || pass > 3) {
1398 zone->nr_scan_inactive = 0; 1407 zone->nr_scan_inactive = 0;
1399 nr_to_scan = min(nr_pages, zone->nr_inactive); 1408 nr_to_scan = min(nr_pages,
1409 zone_page_state(zone, NR_INACTIVE));
1400 ret += shrink_inactive_list(nr_to_scan, zone, sc); 1410 ret += shrink_inactive_list(nr_to_scan, zone, sc);
1401 if (ret >= nr_pages) 1411 if (ret >= nr_pages)
1402 return ret; 1412 return ret;
@@ -1408,12 +1418,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1408 1418
1409static unsigned long count_lru_pages(void) 1419static unsigned long count_lru_pages(void)
1410{ 1420{
1411 struct zone *zone; 1421 return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE);
1412 unsigned long ret = 0;
1413
1414 for_each_zone(zone)
1415 ret += zone->nr_active + zone->nr_inactive;
1416 return ret;
1417} 1422}
1418 1423
1419/* 1424/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index bf62a8232100..5462106725d7 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -19,12 +19,10 @@ void __get_zone_counts(unsigned long *active, unsigned long *inactive,
19 struct zone *zones = pgdat->node_zones; 19 struct zone *zones = pgdat->node_zones;
20 int i; 20 int i;
21 21
22 *active = 0; 22 *active = node_page_state(pgdat->node_id, NR_ACTIVE);
23 *inactive = 0; 23 *inactive = node_page_state(pgdat->node_id, NR_INACTIVE);
24 *free = 0; 24 *free = 0;
25 for (i = 0; i < MAX_NR_ZONES; i++) { 25 for (i = 0; i < MAX_NR_ZONES; i++) {
26 *active += zones[i].nr_active;
27 *inactive += zones[i].nr_inactive;
28 *free += zones[i].free_pages; 26 *free += zones[i].free_pages;
29 } 27 }
30} 28}
@@ -34,14 +32,12 @@ void get_zone_counts(unsigned long *active,
34{ 32{
35 struct pglist_data *pgdat; 33 struct pglist_data *pgdat;
36 34
37 *active = 0; 35 *active = global_page_state(NR_ACTIVE);
38 *inactive = 0; 36 *inactive = global_page_state(NR_INACTIVE);
39 *free = 0; 37 *free = 0;
40 for_each_online_pgdat(pgdat) { 38 for_each_online_pgdat(pgdat) {
41 unsigned long l, m, n; 39 unsigned long l, m, n;
42 __get_zone_counts(&l, &m, &n, pgdat); 40 __get_zone_counts(&l, &m, &n, pgdat);
43 *active += l;
44 *inactive += m;
45 *free += n; 41 *free += n;
46 } 42 }
47} 43}
@@ -239,7 +235,7 @@ EXPORT_SYMBOL(mod_zone_page_state);
239 * in between and therefore the atomicity vs. interrupt cannot be exploited 235 * in between and therefore the atomicity vs. interrupt cannot be exploited
240 * in a useful way here. 236 * in a useful way here.
241 */ 237 */
242static void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 238void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
243{ 239{
244 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 240 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
245 s8 *p = pcp->vm_stat_diff + item; 241 s8 *p = pcp->vm_stat_diff + item;
@@ -260,9 +256,8 @@ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
260} 256}
261EXPORT_SYMBOL(__inc_zone_page_state); 257EXPORT_SYMBOL(__inc_zone_page_state);
262 258
263void __dec_zone_page_state(struct page *page, enum zone_stat_item item) 259void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
264{ 260{
265 struct zone *zone = page_zone(page);
266 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); 261 struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
267 s8 *p = pcp->vm_stat_diff + item; 262 s8 *p = pcp->vm_stat_diff + item;
268 263
@@ -275,6 +270,11 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
275 *p = overstep; 270 *p = overstep;
276 } 271 }
277} 272}
273
274void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
275{
276 __dec_zone_state(page_zone(page), item);
277}
278EXPORT_SYMBOL(__dec_zone_page_state); 278EXPORT_SYMBOL(__dec_zone_page_state);
279 279
280void inc_zone_state(struct zone *zone, enum zone_stat_item item) 280void inc_zone_state(struct zone *zone, enum zone_stat_item item)
@@ -454,6 +454,8 @@ const struct seq_operations fragmentation_op = {
454 454
455static const char * const vmstat_text[] = { 455static const char * const vmstat_text[] = {
456 /* Zoned VM counters */ 456 /* Zoned VM counters */
457 "nr_active",
458 "nr_inactive",
457 "nr_anon_pages", 459 "nr_anon_pages",
458 "nr_mapped", 460 "nr_mapped",
459 "nr_file_pages", 461 "nr_file_pages",
@@ -529,8 +531,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
529 "\n min %lu" 531 "\n min %lu"
530 "\n low %lu" 532 "\n low %lu"
531 "\n high %lu" 533 "\n high %lu"
532 "\n active %lu"
533 "\n inactive %lu"
534 "\n scanned %lu (a: %lu i: %lu)" 534 "\n scanned %lu (a: %lu i: %lu)"
535 "\n spanned %lu" 535 "\n spanned %lu"
536 "\n present %lu", 536 "\n present %lu",
@@ -538,8 +538,6 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
538 zone->pages_min, 538 zone->pages_min,
539 zone->pages_low, 539 zone->pages_low,
540 zone->pages_high, 540 zone->pages_high,
541 zone->nr_active,
542 zone->nr_inactive,
543 zone->pages_scanned, 541 zone->pages_scanned,
544 zone->nr_scan_active, zone->nr_scan_inactive, 542 zone->nr_scan_active, zone->nr_scan_inactive,
545 zone->spanned_pages, 543 zone->spanned_pages,