diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/vmscan.c | 219 |
1 files changed, 164 insertions, 55 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 440a733fe2e9..46be8a02280e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -61,6 +61,8 @@ struct scan_control { | |||
61 | * In this context, it doesn't matter that we scan the | 61 | * In this context, it doesn't matter that we scan the |
62 | * whole list at once. */ | 62 | * whole list at once. */ |
63 | int swap_cluster_max; | 63 | int swap_cluster_max; |
64 | |||
65 | int swappiness; | ||
64 | }; | 66 | }; |
65 | 67 | ||
66 | /* | 68 | /* |
@@ -741,7 +743,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
741 | * A 100% value of vm_swappiness overrides this algorithm | 743 | * A 100% value of vm_swappiness overrides this algorithm |
742 | * altogether. | 744 | * altogether. |
743 | */ | 745 | */ |
744 | swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; | 746 | swap_tendency = mapped_ratio / 2 + distress + sc->swappiness; |
745 | 747 | ||
746 | /* | 748 | /* |
747 | * Now use this metric to decide whether to start moving mapped | 749 | * Now use this metric to decide whether to start moving mapped |
@@ -957,6 +959,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
957 | .may_writepage = !laptop_mode, | 959 | .may_writepage = !laptop_mode, |
958 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 960 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
959 | .may_swap = 1, | 961 | .may_swap = 1, |
962 | .swappiness = vm_swappiness, | ||
960 | }; | 963 | }; |
961 | 964 | ||
962 | inc_page_state(allocstall); | 965 | inc_page_state(allocstall); |
@@ -1021,10 +1024,6 @@ out: | |||
1021 | * For kswapd, balance_pgdat() will work across all this node's zones until | 1024 | * For kswapd, balance_pgdat() will work across all this node's zones until |
1022 | * they are all at pages_high. | 1025 | * they are all at pages_high. |
1023 | * | 1026 | * |
1024 | * If `nr_pages' is non-zero then it is the number of pages which are to be | ||
1025 | * reclaimed, regardless of the zone occupancies. This is a software suspend | ||
1026 | * special. | ||
1027 | * | ||
1028 | * Returns the number of pages which were actually freed. | 1027 | * Returns the number of pages which were actually freed. |
1029 | * | 1028 | * |
1030 | * There is special handling here for zones which are full of pinned pages. | 1029 | * There is special handling here for zones which are full of pinned pages. |
@@ -1042,10 +1041,8 @@ out: | |||
1042 | * the page allocator fallback scheme to ensure that aging of pages is balanced | 1041 | * the page allocator fallback scheme to ensure that aging of pages is balanced |
1043 | * across the zones. | 1042 | * across the zones. |
1044 | */ | 1043 | */ |
1045 | static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages, | 1044 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order) |
1046 | int order) | ||
1047 | { | 1045 | { |
1048 | unsigned long to_free = nr_pages; | ||
1049 | int all_zones_ok; | 1046 | int all_zones_ok; |
1050 | int priority; | 1047 | int priority; |
1051 | int i; | 1048 | int i; |
@@ -1055,7 +1052,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages, | |||
1055 | struct scan_control sc = { | 1052 | struct scan_control sc = { |
1056 | .gfp_mask = GFP_KERNEL, | 1053 | .gfp_mask = GFP_KERNEL, |
1057 | .may_swap = 1, | 1054 | .may_swap = 1, |
1058 | .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX, | 1055 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1056 | .swappiness = vm_swappiness, | ||
1059 | }; | 1057 | }; |
1060 | 1058 | ||
1061 | loop_again: | 1059 | loop_again: |
@@ -1082,31 +1080,26 @@ loop_again: | |||
1082 | 1080 | ||
1083 | all_zones_ok = 1; | 1081 | all_zones_ok = 1; |
1084 | 1082 | ||
1085 | if (nr_pages == 0) { | 1083 | /* |
1086 | /* | 1084 | * Scan in the highmem->dma direction for the highest |
1087 | * Scan in the highmem->dma direction for the highest | 1085 | * zone which needs scanning |
1088 | * zone which needs scanning | 1086 | */ |
1089 | */ | 1087 | for (i = pgdat->nr_zones - 1; i >= 0; i--) { |
1090 | for (i = pgdat->nr_zones - 1; i >= 0; i--) { | 1088 | struct zone *zone = pgdat->node_zones + i; |
1091 | struct zone *zone = pgdat->node_zones + i; | ||
1092 | 1089 | ||
1093 | if (!populated_zone(zone)) | 1090 | if (!populated_zone(zone)) |
1094 | continue; | 1091 | continue; |
1095 | 1092 | ||
1096 | if (zone->all_unreclaimable && | 1093 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
1097 | priority != DEF_PRIORITY) | 1094 | continue; |
1098 | continue; | ||
1099 | 1095 | ||
1100 | if (!zone_watermark_ok(zone, order, | 1096 | if (!zone_watermark_ok(zone, order, zone->pages_high, |
1101 | zone->pages_high, 0, 0)) { | 1097 | 0, 0)) { |
1102 | end_zone = i; | 1098 | end_zone = i; |
1103 | goto scan; | 1099 | goto scan; |
1104 | } | ||
1105 | } | 1100 | } |
1106 | goto out; | ||
1107 | } else { | ||
1108 | end_zone = pgdat->nr_zones - 1; | ||
1109 | } | 1101 | } |
1102 | goto out; | ||
1110 | scan: | 1103 | scan: |
1111 | for (i = 0; i <= end_zone; i++) { | 1104 | for (i = 0; i <= end_zone; i++) { |
1112 | struct zone *zone = pgdat->node_zones + i; | 1105 | struct zone *zone = pgdat->node_zones + i; |
@@ -1133,11 +1126,9 @@ scan: | |||
1133 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 1126 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
1134 | continue; | 1127 | continue; |
1135 | 1128 | ||
1136 | if (nr_pages == 0) { /* Not software suspend */ | 1129 | if (!zone_watermark_ok(zone, order, zone->pages_high, |
1137 | if (!zone_watermark_ok(zone, order, | 1130 | end_zone, 0)) |
1138 | zone->pages_high, end_zone, 0)) | 1131 | all_zones_ok = 0; |
1139 | all_zones_ok = 0; | ||
1140 | } | ||
1141 | zone->temp_priority = priority; | 1132 | zone->temp_priority = priority; |
1142 | if (zone->prev_priority > priority) | 1133 | if (zone->prev_priority > priority) |
1143 | zone->prev_priority = priority; | 1134 | zone->prev_priority = priority; |
@@ -1162,8 +1153,6 @@ scan: | |||
1162 | total_scanned > nr_reclaimed + nr_reclaimed / 2) | 1153 | total_scanned > nr_reclaimed + nr_reclaimed / 2) |
1163 | sc.may_writepage = 1; | 1154 | sc.may_writepage = 1; |
1164 | } | 1155 | } |
1165 | if (nr_pages && to_free > nr_reclaimed) | ||
1166 | continue; /* swsusp: need to do more work */ | ||
1167 | if (all_zones_ok) | 1156 | if (all_zones_ok) |
1168 | break; /* kswapd: all done */ | 1157 | break; /* kswapd: all done */ |
1169 | /* | 1158 | /* |
@@ -1179,7 +1168,7 @@ scan: | |||
1179 | * matches the direct reclaim path behaviour in terms of impact | 1168 | * matches the direct reclaim path behaviour in terms of impact |
1180 | * on zone->*_priority. | 1169 | * on zone->*_priority. |
1181 | */ | 1170 | */ |
1182 | if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages) | 1171 | if (nr_reclaimed >= SWAP_CLUSTER_MAX) |
1183 | break; | 1172 | break; |
1184 | } | 1173 | } |
1185 | out: | 1174 | out: |
@@ -1261,7 +1250,7 @@ static int kswapd(void *p) | |||
1261 | } | 1250 | } |
1262 | finish_wait(&pgdat->kswapd_wait, &wait); | 1251 | finish_wait(&pgdat->kswapd_wait, &wait); |
1263 | 1252 | ||
1264 | balance_pgdat(pgdat, 0, order); | 1253 | balance_pgdat(pgdat, order); |
1265 | } | 1254 | } |
1266 | return 0; | 1255 | return 0; |
1267 | } | 1256 | } |
@@ -1290,35 +1279,154 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
1290 | 1279 | ||
1291 | #ifdef CONFIG_PM | 1280 | #ifdef CONFIG_PM |
1292 | /* | 1281 | /* |
1293 | * Try to free `nr_pages' of memory, system-wide. Returns the number of freed | 1282 | * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages |
1294 | * pages. | 1283 | * from LRU lists system-wide, for given pass and priority, and returns the |
1284 | * number of reclaimed pages | ||
1285 | * | ||
1286 | * For pass > 3 we also try to shrink the LRU lists that contain a few pages | ||
1287 | */ | ||
1288 | static unsigned long shrink_all_zones(unsigned long nr_pages, int pass, | ||
1289 | int prio, struct scan_control *sc) | ||
1290 | { | ||
1291 | struct zone *zone; | ||
1292 | unsigned long nr_to_scan, ret = 0; | ||
1293 | |||
1294 | for_each_zone(zone) { | ||
1295 | |||
1296 | if (!populated_zone(zone)) | ||
1297 | continue; | ||
1298 | |||
1299 | if (zone->all_unreclaimable && prio != DEF_PRIORITY) | ||
1300 | continue; | ||
1301 | |||
1302 | /* For pass = 0 we don't shrink the active list */ | ||
1303 | if (pass > 0) { | ||
1304 | zone->nr_scan_active += (zone->nr_active >> prio) + 1; | ||
1305 | if (zone->nr_scan_active >= nr_pages || pass > 3) { | ||
1306 | zone->nr_scan_active = 0; | ||
1307 | nr_to_scan = min(nr_pages, zone->nr_active); | ||
1308 | shrink_active_list(nr_to_scan, zone, sc); | ||
1309 | } | ||
1310 | } | ||
1311 | |||
1312 | zone->nr_scan_inactive += (zone->nr_inactive >> prio) + 1; | ||
1313 | if (zone->nr_scan_inactive >= nr_pages || pass > 3) { | ||
1314 | zone->nr_scan_inactive = 0; | ||
1315 | nr_to_scan = min(nr_pages, zone->nr_inactive); | ||
1316 | ret += shrink_inactive_list(nr_to_scan, zone, sc); | ||
1317 | if (ret >= nr_pages) | ||
1318 | return ret; | ||
1319 | } | ||
1320 | } | ||
1321 | |||
1322 | return ret; | ||
1323 | } | ||
1324 | |||
1325 | /* | ||
1326 | * Try to free `nr_pages' of memory, system-wide, and return the number of | ||
1327 | * freed pages. | ||
1328 | * | ||
1329 | * Rather than trying to age LRUs the aim is to preserve the overall | ||
1330 | * LRU order by reclaiming preferentially | ||
1331 | * inactive > active > active referenced > active mapped | ||
1295 | */ | 1332 | */ |
1296 | unsigned long shrink_all_memory(unsigned long nr_pages) | 1333 | unsigned long shrink_all_memory(unsigned long nr_pages) |
1297 | { | 1334 | { |
1298 | pg_data_t *pgdat; | 1335 | unsigned long lru_pages, nr_slab; |
1299 | unsigned long nr_to_free = nr_pages; | ||
1300 | unsigned long ret = 0; | 1336 | unsigned long ret = 0; |
1301 | unsigned retry = 2; | 1337 | int pass; |
1302 | struct reclaim_state reclaim_state = { | 1338 | struct reclaim_state reclaim_state; |
1303 | .reclaimed_slab = 0, | 1339 | struct zone *zone; |
1340 | struct scan_control sc = { | ||
1341 | .gfp_mask = GFP_KERNEL, | ||
1342 | .may_swap = 0, | ||
1343 | .swap_cluster_max = nr_pages, | ||
1344 | .may_writepage = 1, | ||
1345 | .swappiness = vm_swappiness, | ||
1304 | }; | 1346 | }; |
1305 | 1347 | ||
1306 | current->reclaim_state = &reclaim_state; | 1348 | current->reclaim_state = &reclaim_state; |
1307 | repeat: | ||
1308 | for_each_online_pgdat(pgdat) { | ||
1309 | unsigned long freed; | ||
1310 | 1349 | ||
1311 | freed = balance_pgdat(pgdat, nr_to_free, 0); | 1350 | lru_pages = 0; |
1312 | ret += freed; | 1351 | for_each_zone(zone) |
1313 | nr_to_free -= freed; | 1352 | lru_pages += zone->nr_active + zone->nr_inactive; |
1314 | if ((long)nr_to_free <= 0) | 1353 | |
1354 | nr_slab = read_page_state(nr_slab); | ||
1355 | /* If slab caches are huge, it's better to hit them first */ | ||
1356 | while (nr_slab >= lru_pages) { | ||
1357 | reclaim_state.reclaimed_slab = 0; | ||
1358 | shrink_slab(nr_pages, sc.gfp_mask, lru_pages); | ||
1359 | if (!reclaim_state.reclaimed_slab) | ||
1315 | break; | 1360 | break; |
1361 | |||
1362 | ret += reclaim_state.reclaimed_slab; | ||
1363 | if (ret >= nr_pages) | ||
1364 | goto out; | ||
1365 | |||
1366 | nr_slab -= reclaim_state.reclaimed_slab; | ||
1316 | } | 1367 | } |
1317 | if (retry-- && ret < nr_pages) { | 1368 | |
1318 | blk_congestion_wait(WRITE, HZ/5); | 1369 | /* |
1319 | goto repeat; | 1370 | * We try to shrink LRUs in 5 passes: |
1371 | * 0 = Reclaim from inactive_list only | ||
1372 | * 1 = Reclaim from active list but don't reclaim mapped | ||
1373 | * 2 = 2nd pass of type 1 | ||
1374 | * 3 = Reclaim mapped (normal reclaim) | ||
1375 | * 4 = 2nd pass of type 3 | ||
1376 | */ | ||
1377 | for (pass = 0; pass < 5; pass++) { | ||
1378 | int prio; | ||
1379 | |||
1380 | /* Needed for shrinking slab caches later on */ | ||
1381 | if (!lru_pages) | ||
1382 | for_each_zone(zone) { | ||
1383 | lru_pages += zone->nr_active; | ||
1384 | lru_pages += zone->nr_inactive; | ||
1385 | } | ||
1386 | |||
1387 | /* Force reclaiming mapped pages in the passes #3 and #4 */ | ||
1388 | if (pass > 2) { | ||
1389 | sc.may_swap = 1; | ||
1390 | sc.swappiness = 100; | ||
1391 | } | ||
1392 | |||
1393 | for (prio = DEF_PRIORITY; prio >= 0; prio--) { | ||
1394 | unsigned long nr_to_scan = nr_pages - ret; | ||
1395 | |||
1396 | sc.nr_mapped = read_page_state(nr_mapped); | ||
1397 | sc.nr_scanned = 0; | ||
1398 | |||
1399 | ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); | ||
1400 | if (ret >= nr_pages) | ||
1401 | goto out; | ||
1402 | |||
1403 | reclaim_state.reclaimed_slab = 0; | ||
1404 | shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages); | ||
1405 | ret += reclaim_state.reclaimed_slab; | ||
1406 | if (ret >= nr_pages) | ||
1407 | goto out; | ||
1408 | |||
1409 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | ||
1410 | blk_congestion_wait(WRITE, HZ / 10); | ||
1411 | } | ||
1412 | |||
1413 | lru_pages = 0; | ||
1320 | } | 1414 | } |
1415 | |||
1416 | /* | ||
1417 | * If ret = 0, we could not shrink LRUs, but there may be something | ||
1418 | * in slab caches | ||
1419 | */ | ||
1420 | if (!ret) | ||
1421 | do { | ||
1422 | reclaim_state.reclaimed_slab = 0; | ||
1423 | shrink_slab(nr_pages, sc.gfp_mask, lru_pages); | ||
1424 | ret += reclaim_state.reclaimed_slab; | ||
1425 | } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); | ||
1426 | |||
1427 | out: | ||
1321 | current->reclaim_state = NULL; | 1428 | current->reclaim_state = NULL; |
1429 | |||
1322 | return ret; | 1430 | return ret; |
1323 | } | 1431 | } |
1324 | #endif | 1432 | #endif |
@@ -1416,6 +1524,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1416 | .swap_cluster_max = max_t(unsigned long, nr_pages, | 1524 | .swap_cluster_max = max_t(unsigned long, nr_pages, |
1417 | SWAP_CLUSTER_MAX), | 1525 | SWAP_CLUSTER_MAX), |
1418 | .gfp_mask = gfp_mask, | 1526 | .gfp_mask = gfp_mask, |
1527 | .swappiness = vm_swappiness, | ||
1419 | }; | 1528 | }; |
1420 | 1529 | ||
1421 | disable_swap_token(); | 1530 | disable_swap_token(); |