diff options
Diffstat (limited to 'mm/vmscan.c')
| -rw-r--r-- | mm/vmscan.c | 464 |
1 files changed, 239 insertions, 225 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 777af57fd8c8..3ff3311447f5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
| 15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| 16 | #include <linux/slab.h> | 16 | #include <linux/gfp.h> |
| 17 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
| 18 | #include <linux/swap.h> | 18 | #include <linux/swap.h> |
| 19 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
| @@ -55,6 +55,11 @@ struct scan_control { | |||
| 55 | /* Number of pages freed so far during a call to shrink_zones() */ | 55 | /* Number of pages freed so far during a call to shrink_zones() */ |
| 56 | unsigned long nr_reclaimed; | 56 | unsigned long nr_reclaimed; |
| 57 | 57 | ||
| 58 | /* How many pages shrink_list() should reclaim */ | ||
| 59 | unsigned long nr_to_reclaim; | ||
| 60 | |||
| 61 | unsigned long hibernation_mode; | ||
| 62 | |||
| 58 | /* This context's GFP mask */ | 63 | /* This context's GFP mask */ |
| 59 | gfp_t gfp_mask; | 64 | gfp_t gfp_mask; |
| 60 | 65 | ||
| @@ -66,12 +71,6 @@ struct scan_control { | |||
| 66 | /* Can pages be swapped as part of reclaim? */ | 71 | /* Can pages be swapped as part of reclaim? */ |
| 67 | int may_swap; | 72 | int may_swap; |
| 68 | 73 | ||
| 69 | /* This context's SWAP_CLUSTER_MAX. If freeing memory for | ||
| 70 | * suspend, we effectively ignore SWAP_CLUSTER_MAX. | ||
| 71 | * In this context, it doesn't matter that we scan the | ||
| 72 | * whole list at once. */ | ||
| 73 | int swap_cluster_max; | ||
| 74 | |||
| 75 | int swappiness; | 74 | int swappiness; |
| 76 | 75 | ||
| 77 | int all_unreclaimable; | 76 | int all_unreclaimable; |
| @@ -263,27 +262,6 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |||
| 263 | return ret; | 262 | return ret; |
| 264 | } | 263 | } |
| 265 | 264 | ||
| 266 | /* Called without lock on whether page is mapped, so answer is unstable */ | ||
| 267 | static inline int page_mapping_inuse(struct page *page) | ||
| 268 | { | ||
| 269 | struct address_space *mapping; | ||
| 270 | |||
| 271 | /* Page is in somebody's page tables. */ | ||
| 272 | if (page_mapped(page)) | ||
| 273 | return 1; | ||
| 274 | |||
| 275 | /* Be more reluctant to reclaim swapcache than pagecache */ | ||
| 276 | if (PageSwapCache(page)) | ||
| 277 | return 1; | ||
| 278 | |||
| 279 | mapping = page_mapping(page); | ||
| 280 | if (!mapping) | ||
| 281 | return 0; | ||
| 282 | |||
| 283 | /* File is mmap'd by somebody? */ | ||
| 284 | return mapping_mapped(mapping); | ||
| 285 | } | ||
| 286 | |||
| 287 | static inline int is_page_cache_freeable(struct page *page) | 265 | static inline int is_page_cache_freeable(struct page *page) |
| 288 | { | 266 | { |
| 289 | /* | 267 | /* |
| @@ -358,7 +336,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
| 358 | * stalls if we need to run get_block(). We could test | 336 | * stalls if we need to run get_block(). We could test |
| 359 | * PagePrivate for that. | 337 | * PagePrivate for that. |
| 360 | * | 338 | * |
| 361 | * If this process is currently in generic_file_write() against | 339 | * If this process is currently in __generic_file_aio_write() against |
| 362 | * this page's queue, we can perform writeback even if that | 340 | * this page's queue, we can perform writeback even if that |
| 363 | * will block. | 341 | * will block. |
| 364 | * | 342 | * |
| @@ -580,6 +558,65 @@ redo: | |||
| 580 | put_page(page); /* drop ref from isolate */ | 558 | put_page(page); /* drop ref from isolate */ |
| 581 | } | 559 | } |
| 582 | 560 | ||
| 561 | enum page_references { | ||
| 562 | PAGEREF_RECLAIM, | ||
| 563 | PAGEREF_RECLAIM_CLEAN, | ||
| 564 | PAGEREF_KEEP, | ||
| 565 | PAGEREF_ACTIVATE, | ||
| 566 | }; | ||
| 567 | |||
| 568 | static enum page_references page_check_references(struct page *page, | ||
| 569 | struct scan_control *sc) | ||
| 570 | { | ||
| 571 | int referenced_ptes, referenced_page; | ||
| 572 | unsigned long vm_flags; | ||
| 573 | |||
| 574 | referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags); | ||
| 575 | referenced_page = TestClearPageReferenced(page); | ||
| 576 | |||
| 577 | /* Lumpy reclaim - ignore references */ | ||
| 578 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
| 579 | return PAGEREF_RECLAIM; | ||
| 580 | |||
| 581 | /* | ||
| 582 | * Mlock lost the isolation race with us. Let try_to_unmap() | ||
| 583 | * move the page to the unevictable list. | ||
| 584 | */ | ||
| 585 | if (vm_flags & VM_LOCKED) | ||
| 586 | return PAGEREF_RECLAIM; | ||
| 587 | |||
| 588 | if (referenced_ptes) { | ||
| 589 | if (PageAnon(page)) | ||
| 590 | return PAGEREF_ACTIVATE; | ||
| 591 | /* | ||
| 592 | * All mapped pages start out with page table | ||
| 593 | * references from the instantiating fault, so we need | ||
| 594 | * to look twice if a mapped file page is used more | ||
| 595 | * than once. | ||
| 596 | * | ||
| 597 | * Mark it and spare it for another trip around the | ||
| 598 | * inactive list. Another page table reference will | ||
| 599 | * lead to its activation. | ||
| 600 | * | ||
| 601 | * Note: the mark is set for activated pages as well | ||
| 602 | * so that recently deactivated but used pages are | ||
| 603 | * quickly recovered. | ||
| 604 | */ | ||
| 605 | SetPageReferenced(page); | ||
| 606 | |||
| 607 | if (referenced_page) | ||
| 608 | return PAGEREF_ACTIVATE; | ||
| 609 | |||
| 610 | return PAGEREF_KEEP; | ||
| 611 | } | ||
| 612 | |||
| 613 | /* Reclaim if clean, defer dirty pages to writeback */ | ||
| 614 | if (referenced_page) | ||
| 615 | return PAGEREF_RECLAIM_CLEAN; | ||
| 616 | |||
| 617 | return PAGEREF_RECLAIM; | ||
| 618 | } | ||
| 619 | |||
| 583 | /* | 620 | /* |
| 584 | * shrink_page_list() returns the number of reclaimed pages | 621 | * shrink_page_list() returns the number of reclaimed pages |
| 585 | */ | 622 | */ |
| @@ -591,16 +628,15 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 591 | struct pagevec freed_pvec; | 628 | struct pagevec freed_pvec; |
| 592 | int pgactivate = 0; | 629 | int pgactivate = 0; |
| 593 | unsigned long nr_reclaimed = 0; | 630 | unsigned long nr_reclaimed = 0; |
| 594 | unsigned long vm_flags; | ||
| 595 | 631 | ||
| 596 | cond_resched(); | 632 | cond_resched(); |
| 597 | 633 | ||
| 598 | pagevec_init(&freed_pvec, 1); | 634 | pagevec_init(&freed_pvec, 1); |
| 599 | while (!list_empty(page_list)) { | 635 | while (!list_empty(page_list)) { |
| 636 | enum page_references references; | ||
| 600 | struct address_space *mapping; | 637 | struct address_space *mapping; |
| 601 | struct page *page; | 638 | struct page *page; |
| 602 | int may_enter_fs; | 639 | int may_enter_fs; |
| 603 | int referenced; | ||
| 604 | 640 | ||
| 605 | cond_resched(); | 641 | cond_resched(); |
| 606 | 642 | ||
| @@ -642,17 +678,16 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 642 | goto keep_locked; | 678 | goto keep_locked; |
| 643 | } | 679 | } |
| 644 | 680 | ||
| 645 | referenced = page_referenced(page, 1, | 681 | references = page_check_references(page, sc); |
| 646 | sc->mem_cgroup, &vm_flags); | 682 | switch (references) { |
| 647 | /* | 683 | case PAGEREF_ACTIVATE: |
| 648 | * In active use or really unfreeable? Activate it. | ||
| 649 | * If page which have PG_mlocked lost isoltation race, | ||
| 650 | * try_to_unmap moves it to unevictable list | ||
| 651 | */ | ||
| 652 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && | ||
| 653 | referenced && page_mapping_inuse(page) | ||
| 654 | && !(vm_flags & VM_LOCKED)) | ||
| 655 | goto activate_locked; | 684 | goto activate_locked; |
| 685 | case PAGEREF_KEEP: | ||
| 686 | goto keep_locked; | ||
| 687 | case PAGEREF_RECLAIM: | ||
| 688 | case PAGEREF_RECLAIM_CLEAN: | ||
| 689 | ; /* try to reclaim the page below */ | ||
| 690 | } | ||
| 656 | 691 | ||
| 657 | /* | 692 | /* |
| 658 | * Anonymous process memory has backing store? | 693 | * Anonymous process memory has backing store? |
| @@ -686,7 +721,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 686 | } | 721 | } |
| 687 | 722 | ||
| 688 | if (PageDirty(page)) { | 723 | if (PageDirty(page)) { |
| 689 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced) | 724 | if (references == PAGEREF_RECLAIM_CLEAN) |
| 690 | goto keep_locked; | 725 | goto keep_locked; |
| 691 | if (!may_enter_fs) | 726 | if (!may_enter_fs) |
| 692 | goto keep_locked; | 727 | goto keep_locked; |
| @@ -1132,7 +1167,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
| 1132 | unsigned long nr_anon; | 1167 | unsigned long nr_anon; |
| 1133 | unsigned long nr_file; | 1168 | unsigned long nr_file; |
| 1134 | 1169 | ||
| 1135 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, | 1170 | nr_taken = sc->isolate_pages(SWAP_CLUSTER_MAX, |
| 1136 | &page_list, &nr_scan, sc->order, mode, | 1171 | &page_list, &nr_scan, sc->order, mode, |
| 1137 | zone, sc->mem_cgroup, 0, file); | 1172 | zone, sc->mem_cgroup, 0, file); |
| 1138 | 1173 | ||
| @@ -1166,10 +1201,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
| 1166 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); | 1201 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); |
| 1167 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); | 1202 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); |
| 1168 | 1203 | ||
| 1169 | reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON]; | 1204 | reclaim_stat->recent_scanned[0] += nr_anon; |
| 1170 | reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON]; | 1205 | reclaim_stat->recent_scanned[1] += nr_file; |
| 1171 | reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE]; | ||
| 1172 | reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE]; | ||
| 1173 | 1206 | ||
| 1174 | spin_unlock_irq(&zone->lru_lock); | 1207 | spin_unlock_irq(&zone->lru_lock); |
| 1175 | 1208 | ||
| @@ -1353,9 +1386,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1353 | continue; | 1386 | continue; |
| 1354 | } | 1387 | } |
| 1355 | 1388 | ||
| 1356 | /* page_referenced clears PageReferenced */ | 1389 | if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { |
| 1357 | if (page_mapping_inuse(page) && | ||
| 1358 | page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { | ||
| 1359 | nr_rotated++; | 1390 | nr_rotated++; |
| 1360 | /* | 1391 | /* |
| 1361 | * Identify referenced, file-backed active pages and | 1392 | * Identify referenced, file-backed active pages and |
| @@ -1464,20 +1495,26 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | |||
| 1464 | return low; | 1495 | return low; |
| 1465 | } | 1496 | } |
| 1466 | 1497 | ||
| 1498 | static int inactive_list_is_low(struct zone *zone, struct scan_control *sc, | ||
| 1499 | int file) | ||
| 1500 | { | ||
| 1501 | if (file) | ||
| 1502 | return inactive_file_is_low(zone, sc); | ||
| 1503 | else | ||
| 1504 | return inactive_anon_is_low(zone, sc); | ||
| 1505 | } | ||
| 1506 | |||
| 1467 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1507 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
| 1468 | struct zone *zone, struct scan_control *sc, int priority) | 1508 | struct zone *zone, struct scan_control *sc, int priority) |
| 1469 | { | 1509 | { |
| 1470 | int file = is_file_lru(lru); | 1510 | int file = is_file_lru(lru); |
| 1471 | 1511 | ||
| 1472 | if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) { | 1512 | if (is_active_lru(lru)) { |
| 1473 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1513 | if (inactive_list_is_low(zone, sc, file)) |
| 1514 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | ||
| 1474 | return 0; | 1515 | return 0; |
| 1475 | } | 1516 | } |
| 1476 | 1517 | ||
| 1477 | if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) { | ||
| 1478 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | ||
| 1479 | return 0; | ||
| 1480 | } | ||
| 1481 | return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); | 1518 | return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); |
| 1482 | } | 1519 | } |
| 1483 | 1520 | ||
| @@ -1567,15 +1604,14 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
| 1567 | * until we collected @swap_cluster_max pages to scan. | 1604 | * until we collected @swap_cluster_max pages to scan. |
| 1568 | */ | 1605 | */ |
| 1569 | static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, | 1606 | static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, |
| 1570 | unsigned long *nr_saved_scan, | 1607 | unsigned long *nr_saved_scan) |
| 1571 | unsigned long swap_cluster_max) | ||
| 1572 | { | 1608 | { |
| 1573 | unsigned long nr; | 1609 | unsigned long nr; |
| 1574 | 1610 | ||
| 1575 | *nr_saved_scan += nr_to_scan; | 1611 | *nr_saved_scan += nr_to_scan; |
| 1576 | nr = *nr_saved_scan; | 1612 | nr = *nr_saved_scan; |
| 1577 | 1613 | ||
| 1578 | if (nr >= swap_cluster_max) | 1614 | if (nr >= SWAP_CLUSTER_MAX) |
| 1579 | *nr_saved_scan = 0; | 1615 | *nr_saved_scan = 0; |
| 1580 | else | 1616 | else |
| 1581 | nr = 0; | 1617 | nr = 0; |
| @@ -1594,7 +1630,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1594 | unsigned long percent[2]; /* anon @ 0; file @ 1 */ | 1630 | unsigned long percent[2]; /* anon @ 0; file @ 1 */ |
| 1595 | enum lru_list l; | 1631 | enum lru_list l; |
| 1596 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1632 | unsigned long nr_reclaimed = sc->nr_reclaimed; |
| 1597 | unsigned long swap_cluster_max = sc->swap_cluster_max; | 1633 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
| 1598 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1634 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
| 1599 | int noswap = 0; | 1635 | int noswap = 0; |
| 1600 | 1636 | ||
| @@ -1616,15 +1652,15 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1616 | scan = (scan * percent[file]) / 100; | 1652 | scan = (scan * percent[file]) / 100; |
| 1617 | } | 1653 | } |
| 1618 | nr[l] = nr_scan_try_batch(scan, | 1654 | nr[l] = nr_scan_try_batch(scan, |
| 1619 | &reclaim_stat->nr_saved_scan[l], | 1655 | &reclaim_stat->nr_saved_scan[l]); |
| 1620 | swap_cluster_max); | ||
| 1621 | } | 1656 | } |
| 1622 | 1657 | ||
| 1623 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1658 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
| 1624 | nr[LRU_INACTIVE_FILE]) { | 1659 | nr[LRU_INACTIVE_FILE]) { |
| 1625 | for_each_evictable_lru(l) { | 1660 | for_each_evictable_lru(l) { |
| 1626 | if (nr[l]) { | 1661 | if (nr[l]) { |
| 1627 | nr_to_scan = min(nr[l], swap_cluster_max); | 1662 | nr_to_scan = min_t(unsigned long, |
| 1663 | nr[l], SWAP_CLUSTER_MAX); | ||
| 1628 | nr[l] -= nr_to_scan; | 1664 | nr[l] -= nr_to_scan; |
| 1629 | 1665 | ||
| 1630 | nr_reclaimed += shrink_list(l, nr_to_scan, | 1666 | nr_reclaimed += shrink_list(l, nr_to_scan, |
| @@ -1639,8 +1675,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1639 | * with multiple processes reclaiming pages, the total | 1675 | * with multiple processes reclaiming pages, the total |
| 1640 | * freeing target can get unreasonably large. | 1676 | * freeing target can get unreasonably large. |
| 1641 | */ | 1677 | */ |
| 1642 | if (nr_reclaimed > swap_cluster_max && | 1678 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) |
| 1643 | priority < DEF_PRIORITY && !current_is_kswapd()) | ||
| 1644 | break; | 1679 | break; |
| 1645 | } | 1680 | } |
| 1646 | 1681 | ||
| @@ -1693,8 +1728,7 @@ static void shrink_zones(int priority, struct zonelist *zonelist, | |||
| 1693 | continue; | 1728 | continue; |
| 1694 | note_zone_scanning_priority(zone, priority); | 1729 | note_zone_scanning_priority(zone, priority); |
| 1695 | 1730 | ||
| 1696 | if (zone_is_all_unreclaimable(zone) && | 1731 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
| 1697 | priority != DEF_PRIORITY) | ||
| 1698 | continue; /* Let kswapd poll it */ | 1732 | continue; /* Let kswapd poll it */ |
| 1699 | sc->all_unreclaimable = 0; | 1733 | sc->all_unreclaimable = 0; |
| 1700 | } else { | 1734 | } else { |
| @@ -1738,6 +1772,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
| 1738 | struct zoneref *z; | 1772 | struct zoneref *z; |
| 1739 | struct zone *zone; | 1773 | struct zone *zone; |
| 1740 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | 1774 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); |
| 1775 | unsigned long writeback_threshold; | ||
| 1741 | 1776 | ||
| 1742 | delayacct_freepages_start(); | 1777 | delayacct_freepages_start(); |
| 1743 | 1778 | ||
| @@ -1773,7 +1808,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
| 1773 | } | 1808 | } |
| 1774 | } | 1809 | } |
| 1775 | total_scanned += sc->nr_scanned; | 1810 | total_scanned += sc->nr_scanned; |
| 1776 | if (sc->nr_reclaimed >= sc->swap_cluster_max) { | 1811 | if (sc->nr_reclaimed >= sc->nr_to_reclaim) { |
| 1777 | ret = sc->nr_reclaimed; | 1812 | ret = sc->nr_reclaimed; |
| 1778 | goto out; | 1813 | goto out; |
| 1779 | } | 1814 | } |
| @@ -1785,14 +1820,15 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
| 1785 | * that's undesirable in laptop mode, where we *want* lumpy | 1820 | * that's undesirable in laptop mode, where we *want* lumpy |
| 1786 | * writeout. So in laptop mode, write out the whole world. | 1821 | * writeout. So in laptop mode, write out the whole world. |
| 1787 | */ | 1822 | */ |
| 1788 | if (total_scanned > sc->swap_cluster_max + | 1823 | writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2; |
| 1789 | sc->swap_cluster_max / 2) { | 1824 | if (total_scanned > writeback_threshold) { |
| 1790 | wakeup_flusher_threads(laptop_mode ? 0 : total_scanned); | 1825 | wakeup_flusher_threads(laptop_mode ? 0 : total_scanned); |
| 1791 | sc->may_writepage = 1; | 1826 | sc->may_writepage = 1; |
| 1792 | } | 1827 | } |
| 1793 | 1828 | ||
| 1794 | /* Take a nap, wait for some writeback to complete */ | 1829 | /* Take a nap, wait for some writeback to complete */ |
| 1795 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) | 1830 | if (!sc->hibernation_mode && sc->nr_scanned && |
| 1831 | priority < DEF_PRIORITY - 2) | ||
| 1796 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1832 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
| 1797 | } | 1833 | } |
| 1798 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 1834 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
| @@ -1831,7 +1867,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
| 1831 | struct scan_control sc = { | 1867 | struct scan_control sc = { |
| 1832 | .gfp_mask = gfp_mask, | 1868 | .gfp_mask = gfp_mask, |
| 1833 | .may_writepage = !laptop_mode, | 1869 | .may_writepage = !laptop_mode, |
| 1834 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1870 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
| 1835 | .may_unmap = 1, | 1871 | .may_unmap = 1, |
| 1836 | .may_swap = 1, | 1872 | .may_swap = 1, |
| 1837 | .swappiness = vm_swappiness, | 1873 | .swappiness = vm_swappiness, |
| @@ -1855,7 +1891,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
| 1855 | .may_writepage = !laptop_mode, | 1891 | .may_writepage = !laptop_mode, |
| 1856 | .may_unmap = 1, | 1892 | .may_unmap = 1, |
| 1857 | .may_swap = !noswap, | 1893 | .may_swap = !noswap, |
| 1858 | .swap_cluster_max = SWAP_CLUSTER_MAX, | ||
| 1859 | .swappiness = swappiness, | 1894 | .swappiness = swappiness, |
| 1860 | .order = 0, | 1895 | .order = 0, |
| 1861 | .mem_cgroup = mem, | 1896 | .mem_cgroup = mem, |
| @@ -1889,7 +1924,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
| 1889 | .may_writepage = !laptop_mode, | 1924 | .may_writepage = !laptop_mode, |
| 1890 | .may_unmap = 1, | 1925 | .may_unmap = 1, |
| 1891 | .may_swap = !noswap, | 1926 | .may_swap = !noswap, |
| 1892 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1927 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
| 1893 | .swappiness = swappiness, | 1928 | .swappiness = swappiness, |
| 1894 | .order = 0, | 1929 | .order = 0, |
| 1895 | .mem_cgroup = mem_cont, | 1930 | .mem_cgroup = mem_cont, |
| @@ -1904,6 +1939,33 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
| 1904 | } | 1939 | } |
| 1905 | #endif | 1940 | #endif |
| 1906 | 1941 | ||
| 1942 | /* is kswapd sleeping prematurely? */ | ||
| 1943 | static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | ||
| 1944 | { | ||
| 1945 | int i; | ||
| 1946 | |||
| 1947 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ | ||
| 1948 | if (remaining) | ||
| 1949 | return 1; | ||
| 1950 | |||
| 1951 | /* If after HZ/10, a zone is below the high mark, it's premature */ | ||
| 1952 | for (i = 0; i < pgdat->nr_zones; i++) { | ||
| 1953 | struct zone *zone = pgdat->node_zones + i; | ||
| 1954 | |||
| 1955 | if (!populated_zone(zone)) | ||
| 1956 | continue; | ||
| 1957 | |||
| 1958 | if (zone->all_unreclaimable) | ||
| 1959 | continue; | ||
| 1960 | |||
| 1961 | if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), | ||
| 1962 | 0, 0)) | ||
| 1963 | return 1; | ||
| 1964 | } | ||
| 1965 | |||
| 1966 | return 0; | ||
| 1967 | } | ||
| 1968 | |||
| 1907 | /* | 1969 | /* |
| 1908 | * For kswapd, balance_pgdat() will work across all this node's zones until | 1970 | * For kswapd, balance_pgdat() will work across all this node's zones until |
| 1909 | * they are all at high_wmark_pages(zone). | 1971 | * they are all at high_wmark_pages(zone). |
| @@ -1936,7 +1998,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
| 1936 | .gfp_mask = GFP_KERNEL, | 1998 | .gfp_mask = GFP_KERNEL, |
| 1937 | .may_unmap = 1, | 1999 | .may_unmap = 1, |
| 1938 | .may_swap = 1, | 2000 | .may_swap = 1, |
| 1939 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 2001 | /* |
| 2002 | * kswapd doesn't want to be bailed out while reclaim. because | ||
| 2003 | * we want to put equal scanning pressure on each zone. | ||
| 2004 | */ | ||
| 2005 | .nr_to_reclaim = ULONG_MAX, | ||
| 1940 | .swappiness = vm_swappiness, | 2006 | .swappiness = vm_swappiness, |
| 1941 | .order = order, | 2007 | .order = order, |
| 1942 | .mem_cgroup = NULL, | 2008 | .mem_cgroup = NULL, |
| @@ -1961,6 +2027,7 @@ loop_again: | |||
| 1961 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2027 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
| 1962 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2028 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
| 1963 | unsigned long lru_pages = 0; | 2029 | unsigned long lru_pages = 0; |
| 2030 | int has_under_min_watermark_zone = 0; | ||
| 1964 | 2031 | ||
| 1965 | /* The swap token gets in the way of swapout... */ | 2032 | /* The swap token gets in the way of swapout... */ |
| 1966 | if (!priority) | 2033 | if (!priority) |
| @@ -1978,8 +2045,7 @@ loop_again: | |||
| 1978 | if (!populated_zone(zone)) | 2045 | if (!populated_zone(zone)) |
| 1979 | continue; | 2046 | continue; |
| 1980 | 2047 | ||
| 1981 | if (zone_is_all_unreclaimable(zone) && | 2048 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
| 1982 | priority != DEF_PRIORITY) | ||
| 1983 | continue; | 2049 | continue; |
| 1984 | 2050 | ||
| 1985 | /* | 2051 | /* |
| @@ -2022,13 +2088,9 @@ loop_again: | |||
| 2022 | if (!populated_zone(zone)) | 2088 | if (!populated_zone(zone)) |
| 2023 | continue; | 2089 | continue; |
| 2024 | 2090 | ||
| 2025 | if (zone_is_all_unreclaimable(zone) && | 2091 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
| 2026 | priority != DEF_PRIORITY) | ||
| 2027 | continue; | 2092 | continue; |
| 2028 | 2093 | ||
| 2029 | if (!zone_watermark_ok(zone, order, | ||
| 2030 | high_wmark_pages(zone), end_zone, 0)) | ||
| 2031 | all_zones_ok = 0; | ||
| 2032 | temp_priority[i] = priority; | 2094 | temp_priority[i] = priority; |
| 2033 | sc.nr_scanned = 0; | 2095 | sc.nr_scanned = 0; |
| 2034 | note_zone_scanning_priority(zone, priority); | 2096 | note_zone_scanning_priority(zone, priority); |
| @@ -2053,12 +2115,11 @@ loop_again: | |||
| 2053 | lru_pages); | 2115 | lru_pages); |
| 2054 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; | 2116 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; |
| 2055 | total_scanned += sc.nr_scanned; | 2117 | total_scanned += sc.nr_scanned; |
| 2056 | if (zone_is_all_unreclaimable(zone)) | 2118 | if (zone->all_unreclaimable) |
| 2057 | continue; | 2119 | continue; |
| 2058 | if (nr_slab == 0 && zone->pages_scanned >= | 2120 | if (nr_slab == 0 && |
| 2059 | (zone_reclaimable_pages(zone) * 6)) | 2121 | zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6)) |
| 2060 | zone_set_flag(zone, | 2122 | zone->all_unreclaimable = 1; |
| 2061 | ZONE_ALL_UNRECLAIMABLE); | ||
| 2062 | /* | 2123 | /* |
| 2063 | * If we've done a decent amount of scanning and | 2124 | * If we've done a decent amount of scanning and |
| 2064 | * the reclaim ratio is low, start doing writepage | 2125 | * the reclaim ratio is low, start doing writepage |
| @@ -2067,6 +2128,20 @@ loop_again: | |||
| 2067 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && | 2128 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && |
| 2068 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) | 2129 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) |
| 2069 | sc.may_writepage = 1; | 2130 | sc.may_writepage = 1; |
| 2131 | |||
| 2132 | if (!zone_watermark_ok(zone, order, | ||
| 2133 | high_wmark_pages(zone), end_zone, 0)) { | ||
| 2134 | all_zones_ok = 0; | ||
| 2135 | /* | ||
| 2136 | * We are still under min water mark. This | ||
| 2137 | * means that we have a GFP_ATOMIC allocation | ||
| 2138 | * failure risk. Hurry up! | ||
| 2139 | */ | ||
| 2140 | if (!zone_watermark_ok(zone, order, | ||
| 2141 | min_wmark_pages(zone), end_zone, 0)) | ||
| 2142 | has_under_min_watermark_zone = 1; | ||
| 2143 | } | ||
| 2144 | |||
| 2070 | } | 2145 | } |
| 2071 | if (all_zones_ok) | 2146 | if (all_zones_ok) |
| 2072 | break; /* kswapd: all done */ | 2147 | break; /* kswapd: all done */ |
| @@ -2074,8 +2149,12 @@ loop_again: | |||
| 2074 | * OK, kswapd is getting into trouble. Take a nap, then take | 2149 | * OK, kswapd is getting into trouble. Take a nap, then take |
| 2075 | * another pass across the zones. | 2150 | * another pass across the zones. |
| 2076 | */ | 2151 | */ |
| 2077 | if (total_scanned && priority < DEF_PRIORITY - 2) | 2152 | if (total_scanned && (priority < DEF_PRIORITY - 2)) { |
| 2078 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 2153 | if (has_under_min_watermark_zone) |
| 2154 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); | ||
| 2155 | else | ||
| 2156 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
| 2157 | } | ||
| 2079 | 2158 | ||
| 2080 | /* | 2159 | /* |
| 2081 | * We do this so kswapd doesn't build up large priorities for | 2160 | * We do this so kswapd doesn't build up large priorities for |
| @@ -2173,6 +2252,7 @@ static int kswapd(void *p) | |||
| 2173 | order = 0; | 2252 | order = 0; |
| 2174 | for ( ; ; ) { | 2253 | for ( ; ; ) { |
| 2175 | unsigned long new_order; | 2254 | unsigned long new_order; |
| 2255 | int ret; | ||
| 2176 | 2256 | ||
| 2177 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); | 2257 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); |
| 2178 | new_order = pgdat->kswapd_max_order; | 2258 | new_order = pgdat->kswapd_max_order; |
| @@ -2184,19 +2264,45 @@ static int kswapd(void *p) | |||
| 2184 | */ | 2264 | */ |
| 2185 | order = new_order; | 2265 | order = new_order; |
| 2186 | } else { | 2266 | } else { |
| 2187 | if (!freezing(current)) | 2267 | if (!freezing(current) && !kthread_should_stop()) { |
| 2188 | schedule(); | 2268 | long remaining = 0; |
| 2269 | |||
| 2270 | /* Try to sleep for a short interval */ | ||
| 2271 | if (!sleeping_prematurely(pgdat, order, remaining)) { | ||
| 2272 | remaining = schedule_timeout(HZ/10); | ||
| 2273 | finish_wait(&pgdat->kswapd_wait, &wait); | ||
| 2274 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); | ||
| 2275 | } | ||
| 2276 | |||
| 2277 | /* | ||
| 2278 | * After a short sleep, check if it was a | ||
| 2279 | * premature sleep. If not, then go fully | ||
| 2280 | * to sleep until explicitly woken up | ||
| 2281 | */ | ||
| 2282 | if (!sleeping_prematurely(pgdat, order, remaining)) | ||
| 2283 | schedule(); | ||
| 2284 | else { | ||
| 2285 | if (remaining) | ||
| 2286 | count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY); | ||
| 2287 | else | ||
| 2288 | count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY); | ||
| 2289 | } | ||
| 2290 | } | ||
| 2189 | 2291 | ||
| 2190 | order = pgdat->kswapd_max_order; | 2292 | order = pgdat->kswapd_max_order; |
| 2191 | } | 2293 | } |
| 2192 | finish_wait(&pgdat->kswapd_wait, &wait); | 2294 | finish_wait(&pgdat->kswapd_wait, &wait); |
| 2193 | 2295 | ||
| 2194 | if (!try_to_freeze()) { | 2296 | ret = try_to_freeze(); |
| 2195 | /* We can speed up thawing tasks if we don't call | 2297 | if (kthread_should_stop()) |
| 2196 | * balance_pgdat after returning from the refrigerator | 2298 | break; |
| 2197 | */ | 2299 | |
| 2300 | /* | ||
| 2301 | * We can speed up thawing tasks if we don't call balance_pgdat | ||
| 2302 | * after returning from the refrigerator | ||
| 2303 | */ | ||
| 2304 | if (!ret) | ||
| 2198 | balance_pgdat(pgdat, order); | 2305 | balance_pgdat(pgdat, order); |
| 2199 | } | ||
| 2200 | } | 2306 | } |
| 2201 | return 0; | 2307 | return 0; |
| 2202 | } | 2308 | } |
| @@ -2260,148 +2366,43 @@ unsigned long zone_reclaimable_pages(struct zone *zone) | |||
| 2260 | 2366 | ||
| 2261 | #ifdef CONFIG_HIBERNATION | 2367 | #ifdef CONFIG_HIBERNATION |
| 2262 | /* | 2368 | /* |
| 2263 | * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages | 2369 | * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of |
| 2264 | * from LRU lists system-wide, for given pass and priority. | ||
| 2265 | * | ||
| 2266 | * For pass > 3 we also try to shrink the LRU lists that contain a few pages | ||
| 2267 | */ | ||
| 2268 | static void shrink_all_zones(unsigned long nr_pages, int prio, | ||
| 2269 | int pass, struct scan_control *sc) | ||
| 2270 | { | ||
| 2271 | struct zone *zone; | ||
| 2272 | unsigned long nr_reclaimed = 0; | ||
| 2273 | struct zone_reclaim_stat *reclaim_stat; | ||
| 2274 | |||
| 2275 | for_each_populated_zone(zone) { | ||
| 2276 | enum lru_list l; | ||
| 2277 | |||
| 2278 | if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) | ||
| 2279 | continue; | ||
| 2280 | |||
| 2281 | for_each_evictable_lru(l) { | ||
| 2282 | enum zone_stat_item ls = NR_LRU_BASE + l; | ||
| 2283 | unsigned long lru_pages = zone_page_state(zone, ls); | ||
| 2284 | |||
| 2285 | /* For pass = 0, we don't shrink the active list */ | ||
| 2286 | if (pass == 0 && (l == LRU_ACTIVE_ANON || | ||
| 2287 | l == LRU_ACTIVE_FILE)) | ||
| 2288 | continue; | ||
| 2289 | |||
| 2290 | reclaim_stat = get_reclaim_stat(zone, sc); | ||
| 2291 | reclaim_stat->nr_saved_scan[l] += | ||
| 2292 | (lru_pages >> prio) + 1; | ||
| 2293 | if (reclaim_stat->nr_saved_scan[l] | ||
| 2294 | >= nr_pages || pass > 3) { | ||
| 2295 | unsigned long nr_to_scan; | ||
| 2296 | |||
| 2297 | reclaim_stat->nr_saved_scan[l] = 0; | ||
| 2298 | nr_to_scan = min(nr_pages, lru_pages); | ||
| 2299 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | ||
| 2300 | sc, prio); | ||
| 2301 | if (nr_reclaimed >= nr_pages) { | ||
| 2302 | sc->nr_reclaimed += nr_reclaimed; | ||
| 2303 | return; | ||
| 2304 | } | ||
| 2305 | } | ||
| 2306 | } | ||
| 2307 | } | ||
| 2308 | sc->nr_reclaimed += nr_reclaimed; | ||
| 2309 | } | ||
| 2310 | |||
| 2311 | /* | ||
| 2312 | * Try to free `nr_pages' of memory, system-wide, and return the number of | ||
| 2313 | * freed pages. | 2370 | * freed pages. |
| 2314 | * | 2371 | * |
| 2315 | * Rather than trying to age LRUs the aim is to preserve the overall | 2372 | * Rather than trying to age LRUs the aim is to preserve the overall |
| 2316 | * LRU order by reclaiming preferentially | 2373 | * LRU order by reclaiming preferentially |
| 2317 | * inactive > active > active referenced > active mapped | 2374 | * inactive > active > active referenced > active mapped |
| 2318 | */ | 2375 | */ |
| 2319 | unsigned long shrink_all_memory(unsigned long nr_pages) | 2376 | unsigned long shrink_all_memory(unsigned long nr_to_reclaim) |
| 2320 | { | 2377 | { |
| 2321 | unsigned long lru_pages, nr_slab; | ||
| 2322 | int pass; | ||
| 2323 | struct reclaim_state reclaim_state; | 2378 | struct reclaim_state reclaim_state; |
| 2324 | struct scan_control sc = { | 2379 | struct scan_control sc = { |
| 2325 | .gfp_mask = GFP_KERNEL, | 2380 | .gfp_mask = GFP_HIGHUSER_MOVABLE, |
| 2326 | .may_unmap = 0, | 2381 | .may_swap = 1, |
| 2382 | .may_unmap = 1, | ||
| 2327 | .may_writepage = 1, | 2383 | .may_writepage = 1, |
| 2384 | .nr_to_reclaim = nr_to_reclaim, | ||
| 2385 | .hibernation_mode = 1, | ||
| 2386 | .swappiness = vm_swappiness, | ||
| 2387 | .order = 0, | ||
| 2328 | .isolate_pages = isolate_pages_global, | 2388 | .isolate_pages = isolate_pages_global, |
| 2329 | .nr_reclaimed = 0, | ||
| 2330 | }; | 2389 | }; |
| 2390 | struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); | ||
| 2391 | struct task_struct *p = current; | ||
| 2392 | unsigned long nr_reclaimed; | ||
| 2331 | 2393 | ||
| 2332 | current->reclaim_state = &reclaim_state; | 2394 | p->flags |= PF_MEMALLOC; |
| 2333 | 2395 | lockdep_set_current_reclaim_state(sc.gfp_mask); | |
| 2334 | lru_pages = global_reclaimable_pages(); | 2396 | reclaim_state.reclaimed_slab = 0; |
| 2335 | nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); | 2397 | p->reclaim_state = &reclaim_state; |
| 2336 | /* If slab caches are huge, it's better to hit them first */ | ||
| 2337 | while (nr_slab >= lru_pages) { | ||
| 2338 | reclaim_state.reclaimed_slab = 0; | ||
| 2339 | shrink_slab(nr_pages, sc.gfp_mask, lru_pages); | ||
| 2340 | if (!reclaim_state.reclaimed_slab) | ||
| 2341 | break; | ||
| 2342 | |||
| 2343 | sc.nr_reclaimed += reclaim_state.reclaimed_slab; | ||
| 2344 | if (sc.nr_reclaimed >= nr_pages) | ||
| 2345 | goto out; | ||
| 2346 | |||
| 2347 | nr_slab -= reclaim_state.reclaimed_slab; | ||
| 2348 | } | ||
| 2349 | |||
| 2350 | /* | ||
| 2351 | * We try to shrink LRUs in 5 passes: | ||
| 2352 | * 0 = Reclaim from inactive_list only | ||
| 2353 | * 1 = Reclaim from active list but don't reclaim mapped | ||
| 2354 | * 2 = 2nd pass of type 1 | ||
| 2355 | * 3 = Reclaim mapped (normal reclaim) | ||
| 2356 | * 4 = 2nd pass of type 3 | ||
| 2357 | */ | ||
| 2358 | for (pass = 0; pass < 5; pass++) { | ||
| 2359 | int prio; | ||
| 2360 | |||
| 2361 | /* Force reclaiming mapped pages in the passes #3 and #4 */ | ||
| 2362 | if (pass > 2) | ||
| 2363 | sc.may_unmap = 1; | ||
| 2364 | |||
| 2365 | for (prio = DEF_PRIORITY; prio >= 0; prio--) { | ||
| 2366 | unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed; | ||
| 2367 | |||
| 2368 | sc.nr_scanned = 0; | ||
| 2369 | sc.swap_cluster_max = nr_to_scan; | ||
| 2370 | shrink_all_zones(nr_to_scan, prio, pass, &sc); | ||
| 2371 | if (sc.nr_reclaimed >= nr_pages) | ||
| 2372 | goto out; | ||
| 2373 | |||
| 2374 | reclaim_state.reclaimed_slab = 0; | ||
| 2375 | shrink_slab(sc.nr_scanned, sc.gfp_mask, | ||
| 2376 | global_reclaimable_pages()); | ||
| 2377 | sc.nr_reclaimed += reclaim_state.reclaimed_slab; | ||
| 2378 | if (sc.nr_reclaimed >= nr_pages) | ||
| 2379 | goto out; | ||
| 2380 | |||
| 2381 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | ||
| 2382 | congestion_wait(BLK_RW_ASYNC, HZ / 10); | ||
| 2383 | } | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | /* | ||
| 2387 | * If sc.nr_reclaimed = 0, we could not shrink LRUs, but there may be | ||
| 2388 | * something in slab caches | ||
| 2389 | */ | ||
| 2390 | if (!sc.nr_reclaimed) { | ||
| 2391 | do { | ||
| 2392 | reclaim_state.reclaimed_slab = 0; | ||
| 2393 | shrink_slab(nr_pages, sc.gfp_mask, | ||
| 2394 | global_reclaimable_pages()); | ||
| 2395 | sc.nr_reclaimed += reclaim_state.reclaimed_slab; | ||
| 2396 | } while (sc.nr_reclaimed < nr_pages && | ||
| 2397 | reclaim_state.reclaimed_slab > 0); | ||
| 2398 | } | ||
| 2399 | 2398 | ||
| 2399 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | ||
| 2400 | 2400 | ||
| 2401 | out: | 2401 | p->reclaim_state = NULL; |
| 2402 | current->reclaim_state = NULL; | 2402 | lockdep_clear_current_reclaim_state(); |
| 2403 | p->flags &= ~PF_MEMALLOC; | ||
| 2403 | 2404 | ||
| 2404 | return sc.nr_reclaimed; | 2405 | return nr_reclaimed; |
| 2405 | } | 2406 | } |
| 2406 | #endif /* CONFIG_HIBERNATION */ | 2407 | #endif /* CONFIG_HIBERNATION */ |
| 2407 | 2408 | ||
| @@ -2451,6 +2452,17 @@ int kswapd_run(int nid) | |||
| 2451 | return ret; | 2452 | return ret; |
| 2452 | } | 2453 | } |
| 2453 | 2454 | ||
| 2455 | /* | ||
| 2456 | * Called by memory hotplug when all memory in a node is offlined. | ||
| 2457 | */ | ||
| 2458 | void kswapd_stop(int nid) | ||
| 2459 | { | ||
| 2460 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; | ||
| 2461 | |||
| 2462 | if (kswapd) | ||
| 2463 | kthread_stop(kswapd); | ||
| 2464 | } | ||
| 2465 | |||
| 2454 | static int __init kswapd_init(void) | 2466 | static int __init kswapd_init(void) |
| 2455 | { | 2467 | { |
| 2456 | int nid; | 2468 | int nid; |
| @@ -2553,8 +2565,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 2553 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | 2565 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), |
| 2554 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), | 2566 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
| 2555 | .may_swap = 1, | 2567 | .may_swap = 1, |
| 2556 | .swap_cluster_max = max_t(unsigned long, nr_pages, | 2568 | .nr_to_reclaim = max_t(unsigned long, nr_pages, |
| 2557 | SWAP_CLUSTER_MAX), | 2569 | SWAP_CLUSTER_MAX), |
| 2558 | .gfp_mask = gfp_mask, | 2570 | .gfp_mask = gfp_mask, |
| 2559 | .swappiness = vm_swappiness, | 2571 | .swappiness = vm_swappiness, |
| 2560 | .order = order, | 2572 | .order = order, |
| @@ -2570,6 +2582,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 2570 | * and RECLAIM_SWAP. | 2582 | * and RECLAIM_SWAP. |
| 2571 | */ | 2583 | */ |
| 2572 | p->flags |= PF_MEMALLOC | PF_SWAPWRITE; | 2584 | p->flags |= PF_MEMALLOC | PF_SWAPWRITE; |
| 2585 | lockdep_set_current_reclaim_state(gfp_mask); | ||
| 2573 | reclaim_state.reclaimed_slab = 0; | 2586 | reclaim_state.reclaimed_slab = 0; |
| 2574 | p->reclaim_state = &reclaim_state; | 2587 | p->reclaim_state = &reclaim_state; |
| 2575 | 2588 | ||
| @@ -2613,6 +2626,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 2613 | 2626 | ||
| 2614 | p->reclaim_state = NULL; | 2627 | p->reclaim_state = NULL; |
| 2615 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); | 2628 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); |
| 2629 | lockdep_clear_current_reclaim_state(); | ||
| 2616 | return sc.nr_reclaimed >= nr_pages; | 2630 | return sc.nr_reclaimed >= nr_pages; |
| 2617 | } | 2631 | } |
| 2618 | 2632 | ||
| @@ -2635,7 +2649,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 2635 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) | 2649 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) |
| 2636 | return ZONE_RECLAIM_FULL; | 2650 | return ZONE_RECLAIM_FULL; |
| 2637 | 2651 | ||
| 2638 | if (zone_is_all_unreclaimable(zone)) | 2652 | if (zone->all_unreclaimable) |
| 2639 | return ZONE_RECLAIM_FULL; | 2653 | return ZONE_RECLAIM_FULL; |
| 2640 | 2654 | ||
| 2641 | /* | 2655 | /* |
