aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c75
1 files changed, 54 insertions, 21 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eca70310adb2..518540a4a2a6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -378,6 +378,12 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
378 return PAGE_CLEAN; 378 return PAGE_CLEAN;
379} 379}
380 380
381/*
382 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
383 * someone else has a ref on the page, abort and return 0. If it was
384 * successfully detached, return 1. Assumes the caller has a single ref on
385 * this page.
386 */
381int remove_mapping(struct address_space *mapping, struct page *page) 387int remove_mapping(struct address_space *mapping, struct page *page)
382{ 388{
383 BUG_ON(!PageLocked(page)); 389 BUG_ON(!PageLocked(page));
@@ -717,6 +723,20 @@ done:
717 return nr_reclaimed; 723 return nr_reclaimed;
718} 724}
719 725
726/*
727 * We are about to scan this zone at a certain priority level. If that priority
728 * level is smaller (ie: more urgent) than the previous priority, then note
729 * that priority level within the zone. This is done so that when the next
730 * process comes in to scan this zone, it will immediately start out at this
731 * priority level rather than having to build up its own scanning priority.
732 * Here, this priority affects only the reclaim-mapped threshold.
733 */
734static inline void note_zone_scanning_priority(struct zone *zone, int priority)
735{
736 if (priority < zone->prev_priority)
737 zone->prev_priority = priority;
738}
739
720static inline int zone_is_near_oom(struct zone *zone) 740static inline int zone_is_near_oom(struct zone *zone)
721{ 741{
722 return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; 742 return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
@@ -740,7 +760,7 @@ static inline int zone_is_near_oom(struct zone *zone)
740 * But we had to alter page->flags anyway. 760 * But we had to alter page->flags anyway.
741 */ 761 */
742static void shrink_active_list(unsigned long nr_pages, struct zone *zone, 762static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
743 struct scan_control *sc) 763 struct scan_control *sc, int priority)
744{ 764{
745 unsigned long pgmoved; 765 unsigned long pgmoved;
746 int pgdeactivate = 0; 766 int pgdeactivate = 0;
@@ -764,7 +784,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
764 * `distress' is a measure of how much trouble we're having 784 * `distress' is a measure of how much trouble we're having
765 * reclaiming pages. 0 -> no problems. 100 -> great trouble. 785 * reclaiming pages. 0 -> no problems. 100 -> great trouble.
766 */ 786 */
767 distress = 100 >> zone->prev_priority; 787 distress = 100 >> min(zone->prev_priority, priority);
768 788
769 /* 789 /*
770 * The point of this algorithm is to decide when to start 790 * The point of this algorithm is to decide when to start
@@ -916,7 +936,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
916 nr_to_scan = min(nr_active, 936 nr_to_scan = min(nr_active,
917 (unsigned long)sc->swap_cluster_max); 937 (unsigned long)sc->swap_cluster_max);
918 nr_active -= nr_to_scan; 938 nr_active -= nr_to_scan;
919 shrink_active_list(nr_to_scan, zone, sc); 939 shrink_active_list(nr_to_scan, zone, sc, priority);
920 } 940 }
921 941
922 if (nr_inactive) { 942 if (nr_inactive) {
@@ -966,9 +986,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
966 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 986 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
967 continue; 987 continue;
968 988
969 zone->temp_priority = priority; 989 note_zone_scanning_priority(zone, priority);
970 if (zone->prev_priority > priority)
971 zone->prev_priority = priority;
972 990
973 if (zone->all_unreclaimable && priority != DEF_PRIORITY) 991 if (zone->all_unreclaimable && priority != DEF_PRIORITY)
974 continue; /* Let kswapd poll it */ 992 continue; /* Let kswapd poll it */
@@ -1018,7 +1036,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1018 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1036 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
1019 continue; 1037 continue;
1020 1038
1021 zone->temp_priority = DEF_PRIORITY;
1022 lru_pages += zone->nr_active + zone->nr_inactive; 1039 lru_pages += zone->nr_active + zone->nr_inactive;
1023 } 1040 }
1024 1041
@@ -1053,19 +1070,28 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1053 1070
1054 /* Take a nap, wait for some writeback to complete */ 1071 /* Take a nap, wait for some writeback to complete */
1055 if (sc.nr_scanned && priority < DEF_PRIORITY - 2) 1072 if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
1056 blk_congestion_wait(WRITE, HZ/10); 1073 congestion_wait(WRITE, HZ/10);
1057 } 1074 }
1058 /* top priority shrink_caches still had more to do? don't OOM, then */ 1075 /* top priority shrink_caches still had more to do? don't OOM, then */
1059 if (!sc.all_unreclaimable) 1076 if (!sc.all_unreclaimable)
1060 ret = 1; 1077 ret = 1;
1061out: 1078out:
1079 /*
1080 * Now that we've scanned all the zones at this priority level, note
1081 * that level within the zone so that the next thread which performs
1082 * scanning of this zone will immediately start out at this priority
1083 * level. This affects only the decision whether or not to bring
1084 * mapped pages onto the inactive list.
1085 */
1086 if (priority < 0)
1087 priority = 0;
1062 for (i = 0; zones[i] != 0; i++) { 1088 for (i = 0; zones[i] != 0; i++) {
1063 struct zone *zone = zones[i]; 1089 struct zone *zone = zones[i];
1064 1090
1065 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) 1091 if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
1066 continue; 1092 continue;
1067 1093
1068 zone->prev_priority = zone->temp_priority; 1094 zone->prev_priority = priority;
1069 } 1095 }
1070 return ret; 1096 return ret;
1071} 1097}
@@ -1105,6 +1131,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1105 .swap_cluster_max = SWAP_CLUSTER_MAX, 1131 .swap_cluster_max = SWAP_CLUSTER_MAX,
1106 .swappiness = vm_swappiness, 1132 .swappiness = vm_swappiness,
1107 }; 1133 };
1134 /*
1135 * temp_priority is used to remember the scanning priority at which
1136 * this zone was successfully refilled to free_pages == pages_high.
1137 */
1138 int temp_priority[MAX_NR_ZONES];
1108 1139
1109loop_again: 1140loop_again:
1110 total_scanned = 0; 1141 total_scanned = 0;
@@ -1112,11 +1143,8 @@ loop_again:
1112 sc.may_writepage = !laptop_mode; 1143 sc.may_writepage = !laptop_mode;
1113 count_vm_event(PAGEOUTRUN); 1144 count_vm_event(PAGEOUTRUN);
1114 1145
1115 for (i = 0; i < pgdat->nr_zones; i++) { 1146 for (i = 0; i < pgdat->nr_zones; i++)
1116 struct zone *zone = pgdat->node_zones + i; 1147 temp_priority[i] = DEF_PRIORITY;
1117
1118 zone->temp_priority = DEF_PRIORITY;
1119 }
1120 1148
1121 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1149 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1122 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 1150 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
@@ -1177,10 +1205,9 @@ scan:
1177 if (!zone_watermark_ok(zone, order, zone->pages_high, 1205 if (!zone_watermark_ok(zone, order, zone->pages_high,
1178 end_zone, 0)) 1206 end_zone, 0))
1179 all_zones_ok = 0; 1207 all_zones_ok = 0;
1180 zone->temp_priority = priority; 1208 temp_priority[i] = priority;
1181 if (zone->prev_priority > priority)
1182 zone->prev_priority = priority;
1183 sc.nr_scanned = 0; 1209 sc.nr_scanned = 0;
1210 note_zone_scanning_priority(zone, priority);
1184 nr_reclaimed += shrink_zone(priority, zone, &sc); 1211 nr_reclaimed += shrink_zone(priority, zone, &sc);
1185 reclaim_state->reclaimed_slab = 0; 1212 reclaim_state->reclaimed_slab = 0;
1186 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, 1213 nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -1208,7 +1235,7 @@ scan:
1208 * another pass across the zones. 1235 * another pass across the zones.
1209 */ 1236 */
1210 if (total_scanned && priority < DEF_PRIORITY - 2) 1237 if (total_scanned && priority < DEF_PRIORITY - 2)
1211 blk_congestion_wait(WRITE, HZ/10); 1238 congestion_wait(WRITE, HZ/10);
1212 1239
1213 /* 1240 /*
1214 * We do this so kswapd doesn't build up large priorities for 1241 * We do this so kswapd doesn't build up large priorities for
@@ -1220,10 +1247,15 @@ scan:
1220 break; 1247 break;
1221 } 1248 }
1222out: 1249out:
1250 /*
1251 * Note within each zone the priority level at which this zone was
1252 * brought into a happy state. So that the next thread which scans this
1253 * zone will start out at that priority level.
1254 */
1223 for (i = 0; i < pgdat->nr_zones; i++) { 1255 for (i = 0; i < pgdat->nr_zones; i++) {
1224 struct zone *zone = pgdat->node_zones + i; 1256 struct zone *zone = pgdat->node_zones + i;
1225 1257
1226 zone->prev_priority = zone->temp_priority; 1258 zone->prev_priority = temp_priority[i];
1227 } 1259 }
1228 if (!all_zones_ok) { 1260 if (!all_zones_ok) {
1229 cond_resched(); 1261 cond_resched();
@@ -1352,7 +1384,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int pass,
1352 if (zone->nr_scan_active >= nr_pages || pass > 3) { 1384 if (zone->nr_scan_active >= nr_pages || pass > 3) {
1353 zone->nr_scan_active = 0; 1385 zone->nr_scan_active = 0;
1354 nr_to_scan = min(nr_pages, zone->nr_active); 1386 nr_to_scan = min(nr_pages, zone->nr_active);
1355 shrink_active_list(nr_to_scan, zone, sc); 1387 shrink_active_list(nr_to_scan, zone, sc, prio);
1356 } 1388 }
1357 } 1389 }
1358 1390
@@ -1452,7 +1484,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1452 goto out; 1484 goto out;
1453 1485
1454 if (sc.nr_scanned && prio < DEF_PRIORITY - 2) 1486 if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
1455 blk_congestion_wait(WRITE, HZ / 10); 1487 congestion_wait(WRITE, HZ / 10);
1456 } 1488 }
1457 1489
1458 lru_pages = 0; 1490 lru_pages = 0;
@@ -1608,6 +1640,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1608 */ 1640 */
1609 priority = ZONE_RECLAIM_PRIORITY; 1641 priority = ZONE_RECLAIM_PRIORITY;
1610 do { 1642 do {
1643 note_zone_scanning_priority(zone, priority);
1611 nr_reclaimed += shrink_zone(priority, zone, &sc); 1644 nr_reclaimed += shrink_zone(priority, zone, &sc);
1612 priority--; 1645 priority--;
1613 } while (priority >= 0 && nr_reclaimed < nr_pages); 1646 } while (priority >= 0 && nr_reclaimed < nr_pages);