diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 91 |
1 files changed, 35 insertions, 56 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 45711585684e..eceac9f9032f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -70,13 +70,6 @@ struct scan_control { | |||
70 | 70 | ||
71 | int order; | 71 | int order; |
72 | 72 | ||
73 | /* | ||
74 | * Pages that have (or should have) IO pending. If we run into | ||
75 | * a lot of these, we're better off waiting a little for IO to | ||
76 | * finish rather than scanning more pages in the VM. | ||
77 | */ | ||
78 | int nr_io_pages; | ||
79 | |||
80 | /* Which cgroup do we reclaim from */ | 73 | /* Which cgroup do we reclaim from */ |
81 | struct mem_cgroup *mem_cgroup; | 74 | struct mem_cgroup *mem_cgroup; |
82 | 75 | ||
@@ -512,10 +505,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
512 | */ | 505 | */ |
513 | if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) | 506 | if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) |
514 | wait_on_page_writeback(page); | 507 | wait_on_page_writeback(page); |
515 | else { | 508 | else |
516 | sc->nr_io_pages++; | ||
517 | goto keep_locked; | 509 | goto keep_locked; |
518 | } | ||
519 | } | 510 | } |
520 | 511 | ||
521 | referenced = page_referenced(page, 1, sc->mem_cgroup); | 512 | referenced = page_referenced(page, 1, sc->mem_cgroup); |
@@ -554,10 +545,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
554 | if (PageDirty(page)) { | 545 | if (PageDirty(page)) { |
555 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced) | 546 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced) |
556 | goto keep_locked; | 547 | goto keep_locked; |
557 | if (!may_enter_fs) { | 548 | if (!may_enter_fs) |
558 | sc->nr_io_pages++; | ||
559 | goto keep_locked; | 549 | goto keep_locked; |
560 | } | ||
561 | if (!sc->may_writepage) | 550 | if (!sc->may_writepage) |
562 | goto keep_locked; | 551 | goto keep_locked; |
563 | 552 | ||
@@ -568,10 +557,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
568 | case PAGE_ACTIVATE: | 557 | case PAGE_ACTIVATE: |
569 | goto activate_locked; | 558 | goto activate_locked; |
570 | case PAGE_SUCCESS: | 559 | case PAGE_SUCCESS: |
571 | if (PageWriteback(page) || PageDirty(page)) { | 560 | if (PageWriteback(page) || PageDirty(page)) |
572 | sc->nr_io_pages++; | ||
573 | goto keep; | 561 | goto keep; |
574 | } | ||
575 | /* | 562 | /* |
576 | * A synchronous write - probably a ramdisk. Go | 563 | * A synchronous write - probably a ramdisk. Go |
577 | * ahead and try to reclaim the page. | 564 | * ahead and try to reclaim the page. |
@@ -1259,17 +1246,16 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1259 | * If a zone is deemed to be full of pinned pages then just give it a light | 1246 | * If a zone is deemed to be full of pinned pages then just give it a light |
1260 | * scan then give up on it. | 1247 | * scan then give up on it. |
1261 | */ | 1248 | */ |
1262 | static unsigned long shrink_zones(int priority, struct zone **zones, | 1249 | static unsigned long shrink_zones(int priority, struct zonelist *zonelist, |
1263 | struct scan_control *sc) | 1250 | struct scan_control *sc) |
1264 | { | 1251 | { |
1252 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | ||
1265 | unsigned long nr_reclaimed = 0; | 1253 | unsigned long nr_reclaimed = 0; |
1266 | int i; | 1254 | struct zoneref *z; |
1267 | 1255 | struct zone *zone; | |
1268 | 1256 | ||
1269 | sc->all_unreclaimable = 1; | 1257 | sc->all_unreclaimable = 1; |
1270 | for (i = 0; zones[i] != NULL; i++) { | 1258 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1271 | struct zone *zone = zones[i]; | ||
1272 | |||
1273 | if (!populated_zone(zone)) | 1259 | if (!populated_zone(zone)) |
1274 | continue; | 1260 | continue; |
1275 | /* | 1261 | /* |
@@ -1314,8 +1300,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones, | |||
1314 | * holds filesystem locks which prevent writeout this might not work, and the | 1300 | * holds filesystem locks which prevent writeout this might not work, and the |
1315 | * allocation attempt will fail. | 1301 | * allocation attempt will fail. |
1316 | */ | 1302 | */ |
1317 | static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask, | 1303 | static unsigned long do_try_to_free_pages(struct zonelist *zonelist, |
1318 | struct scan_control *sc) | 1304 | struct scan_control *sc) |
1319 | { | 1305 | { |
1320 | int priority; | 1306 | int priority; |
1321 | int ret = 0; | 1307 | int ret = 0; |
@@ -1323,7 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask, | |||
1323 | unsigned long nr_reclaimed = 0; | 1309 | unsigned long nr_reclaimed = 0; |
1324 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1310 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1325 | unsigned long lru_pages = 0; | 1311 | unsigned long lru_pages = 0; |
1326 | int i; | 1312 | struct zoneref *z; |
1313 | struct zone *zone; | ||
1314 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | ||
1327 | 1315 | ||
1328 | if (scan_global_lru(sc)) | 1316 | if (scan_global_lru(sc)) |
1329 | count_vm_event(ALLOCSTALL); | 1317 | count_vm_event(ALLOCSTALL); |
@@ -1331,8 +1319,7 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask, | |||
1331 | * mem_cgroup will not do shrink_slab. | 1319 | * mem_cgroup will not do shrink_slab. |
1332 | */ | 1320 | */ |
1333 | if (scan_global_lru(sc)) { | 1321 | if (scan_global_lru(sc)) { |
1334 | for (i = 0; zones[i] != NULL; i++) { | 1322 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1335 | struct zone *zone = zones[i]; | ||
1336 | 1323 | ||
1337 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1324 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1338 | continue; | 1325 | continue; |
@@ -1344,16 +1331,15 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask, | |||
1344 | 1331 | ||
1345 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 1332 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
1346 | sc->nr_scanned = 0; | 1333 | sc->nr_scanned = 0; |
1347 | sc->nr_io_pages = 0; | ||
1348 | if (!priority) | 1334 | if (!priority) |
1349 | disable_swap_token(); | 1335 | disable_swap_token(); |
1350 | nr_reclaimed += shrink_zones(priority, zones, sc); | 1336 | nr_reclaimed += shrink_zones(priority, zonelist, sc); |
1351 | /* | 1337 | /* |
1352 | * Don't shrink slabs when reclaiming memory from | 1338 | * Don't shrink slabs when reclaiming memory from |
1353 | * over limit cgroups | 1339 | * over limit cgroups |
1354 | */ | 1340 | */ |
1355 | if (scan_global_lru(sc)) { | 1341 | if (scan_global_lru(sc)) { |
1356 | shrink_slab(sc->nr_scanned, gfp_mask, lru_pages); | 1342 | shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); |
1357 | if (reclaim_state) { | 1343 | if (reclaim_state) { |
1358 | nr_reclaimed += reclaim_state->reclaimed_slab; | 1344 | nr_reclaimed += reclaim_state->reclaimed_slab; |
1359 | reclaim_state->reclaimed_slab = 0; | 1345 | reclaim_state->reclaimed_slab = 0; |
@@ -1379,8 +1365,7 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask, | |||
1379 | } | 1365 | } |
1380 | 1366 | ||
1381 | /* Take a nap, wait for some writeback to complete */ | 1367 | /* Take a nap, wait for some writeback to complete */ |
1382 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2 && | 1368 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) |
1383 | sc->nr_io_pages > sc->swap_cluster_max) | ||
1384 | congestion_wait(WRITE, HZ/10); | 1369 | congestion_wait(WRITE, HZ/10); |
1385 | } | 1370 | } |
1386 | /* top priority shrink_caches still had more to do? don't OOM, then */ | 1371 | /* top priority shrink_caches still had more to do? don't OOM, then */ |
@@ -1398,8 +1383,7 @@ out: | |||
1398 | priority = 0; | 1383 | priority = 0; |
1399 | 1384 | ||
1400 | if (scan_global_lru(sc)) { | 1385 | if (scan_global_lru(sc)) { |
1401 | for (i = 0; zones[i] != NULL; i++) { | 1386 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1402 | struct zone *zone = zones[i]; | ||
1403 | 1387 | ||
1404 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1388 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1405 | continue; | 1389 | continue; |
@@ -1412,7 +1396,8 @@ out: | |||
1412 | return ret; | 1396 | return ret; |
1413 | } | 1397 | } |
1414 | 1398 | ||
1415 | unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) | 1399 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
1400 | gfp_t gfp_mask) | ||
1416 | { | 1401 | { |
1417 | struct scan_control sc = { | 1402 | struct scan_control sc = { |
1418 | .gfp_mask = gfp_mask, | 1403 | .gfp_mask = gfp_mask, |
@@ -1425,7 +1410,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) | |||
1425 | .isolate_pages = isolate_pages_global, | 1410 | .isolate_pages = isolate_pages_global, |
1426 | }; | 1411 | }; |
1427 | 1412 | ||
1428 | return do_try_to_free_pages(zones, gfp_mask, &sc); | 1413 | return do_try_to_free_pages(zonelist, &sc); |
1429 | } | 1414 | } |
1430 | 1415 | ||
1431 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 1416 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
@@ -1434,7 +1419,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1434 | gfp_t gfp_mask) | 1419 | gfp_t gfp_mask) |
1435 | { | 1420 | { |
1436 | struct scan_control sc = { | 1421 | struct scan_control sc = { |
1437 | .gfp_mask = gfp_mask, | ||
1438 | .may_writepage = !laptop_mode, | 1422 | .may_writepage = !laptop_mode, |
1439 | .may_swap = 1, | 1423 | .may_swap = 1, |
1440 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1424 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
@@ -1443,13 +1427,12 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1443 | .mem_cgroup = mem_cont, | 1427 | .mem_cgroup = mem_cont, |
1444 | .isolate_pages = mem_cgroup_isolate_pages, | 1428 | .isolate_pages = mem_cgroup_isolate_pages, |
1445 | }; | 1429 | }; |
1446 | struct zone **zones; | 1430 | struct zonelist *zonelist; |
1447 | int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE); | ||
1448 | 1431 | ||
1449 | zones = NODE_DATA(numa_node_id())->node_zonelists[target_zone].zones; | 1432 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
1450 | if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) | 1433 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
1451 | return 1; | 1434 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; |
1452 | return 0; | 1435 | return do_try_to_free_pages(zonelist, &sc); |
1453 | } | 1436 | } |
1454 | #endif | 1437 | #endif |
1455 | 1438 | ||
@@ -1514,7 +1497,6 @@ loop_again: | |||
1514 | if (!priority) | 1497 | if (!priority) |
1515 | disable_swap_token(); | 1498 | disable_swap_token(); |
1516 | 1499 | ||
1517 | sc.nr_io_pages = 0; | ||
1518 | all_zones_ok = 1; | 1500 | all_zones_ok = 1; |
1519 | 1501 | ||
1520 | /* | 1502 | /* |
@@ -1607,8 +1589,7 @@ loop_again: | |||
1607 | * OK, kswapd is getting into trouble. Take a nap, then take | 1589 | * OK, kswapd is getting into trouble. Take a nap, then take |
1608 | * another pass across the zones. | 1590 | * another pass across the zones. |
1609 | */ | 1591 | */ |
1610 | if (total_scanned && priority < DEF_PRIORITY - 2 && | 1592 | if (total_scanned && priority < DEF_PRIORITY - 2) |
1611 | sc.nr_io_pages > sc.swap_cluster_max) | ||
1612 | congestion_wait(WRITE, HZ/10); | 1593 | congestion_wait(WRITE, HZ/10); |
1613 | 1594 | ||
1614 | /* | 1595 | /* |
@@ -1664,11 +1645,10 @@ static int kswapd(void *p) | |||
1664 | struct reclaim_state reclaim_state = { | 1645 | struct reclaim_state reclaim_state = { |
1665 | .reclaimed_slab = 0, | 1646 | .reclaimed_slab = 0, |
1666 | }; | 1647 | }; |
1667 | cpumask_t cpumask; | 1648 | node_to_cpumask_ptr(cpumask, pgdat->node_id); |
1668 | 1649 | ||
1669 | cpumask = node_to_cpumask(pgdat->node_id); | 1650 | if (!cpus_empty(*cpumask)) |
1670 | if (!cpus_empty(cpumask)) | 1651 | set_cpus_allowed_ptr(tsk, cpumask); |
1671 | set_cpus_allowed(tsk, cpumask); | ||
1672 | current->reclaim_state = &reclaim_state; | 1652 | current->reclaim_state = &reclaim_state; |
1673 | 1653 | ||
1674 | /* | 1654 | /* |
@@ -1897,17 +1877,16 @@ out: | |||
1897 | static int __devinit cpu_callback(struct notifier_block *nfb, | 1877 | static int __devinit cpu_callback(struct notifier_block *nfb, |
1898 | unsigned long action, void *hcpu) | 1878 | unsigned long action, void *hcpu) |
1899 | { | 1879 | { |
1900 | pg_data_t *pgdat; | ||
1901 | cpumask_t mask; | ||
1902 | int nid; | 1880 | int nid; |
1903 | 1881 | ||
1904 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { | 1882 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { |
1905 | for_each_node_state(nid, N_HIGH_MEMORY) { | 1883 | for_each_node_state(nid, N_HIGH_MEMORY) { |
1906 | pgdat = NODE_DATA(nid); | 1884 | pg_data_t *pgdat = NODE_DATA(nid); |
1907 | mask = node_to_cpumask(pgdat->node_id); | 1885 | node_to_cpumask_ptr(mask, pgdat->node_id); |
1908 | if (any_online_cpu(mask) != NR_CPUS) | 1886 | |
1887 | if (any_online_cpu(*mask) < nr_cpu_ids) | ||
1909 | /* One of our CPUs online: restore mask */ | 1888 | /* One of our CPUs online: restore mask */ |
1910 | set_cpus_allowed(pgdat->kswapd, mask); | 1889 | set_cpus_allowed_ptr(pgdat->kswapd, mask); |
1911 | } | 1890 | } |
1912 | } | 1891 | } |
1913 | return NOTIFY_OK; | 1892 | return NOTIFY_OK; |