diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 110 |
1 files changed, 75 insertions, 35 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 5d4c4d02254d..87779dda4ec6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -62,6 +62,8 @@ struct scan_control { | |||
62 | int swap_cluster_max; | 62 | int swap_cluster_max; |
63 | 63 | ||
64 | int swappiness; | 64 | int swappiness; |
65 | |||
66 | int all_unreclaimable; | ||
65 | }; | 67 | }; |
66 | 68 | ||
67 | /* | 69 | /* |
@@ -377,8 +379,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) | |||
377 | 379 | ||
378 | int remove_mapping(struct address_space *mapping, struct page *page) | 380 | int remove_mapping(struct address_space *mapping, struct page *page) |
379 | { | 381 | { |
380 | if (!mapping) | 382 | BUG_ON(!PageLocked(page)); |
381 | return 0; /* truncate got there first */ | 383 | BUG_ON(mapping != page_mapping(page)); |
382 | 384 | ||
383 | write_lock_irq(&mapping->tree_lock); | 385 | write_lock_irq(&mapping->tree_lock); |
384 | 386 | ||
@@ -440,7 +442,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
440 | if (TestSetPageLocked(page)) | 442 | if (TestSetPageLocked(page)) |
441 | goto keep; | 443 | goto keep; |
442 | 444 | ||
443 | BUG_ON(PageActive(page)); | 445 | VM_BUG_ON(PageActive(page)); |
444 | 446 | ||
445 | sc->nr_scanned++; | 447 | sc->nr_scanned++; |
446 | 448 | ||
@@ -547,7 +549,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
547 | goto free_it; | 549 | goto free_it; |
548 | } | 550 | } |
549 | 551 | ||
550 | if (!remove_mapping(mapping, page)) | 552 | if (!mapping || !remove_mapping(mapping, page)) |
551 | goto keep_locked; | 553 | goto keep_locked; |
552 | 554 | ||
553 | free_it: | 555 | free_it: |
@@ -564,7 +566,7 @@ keep_locked: | |||
564 | unlock_page(page); | 566 | unlock_page(page); |
565 | keep: | 567 | keep: |
566 | list_add(&page->lru, &ret_pages); | 568 | list_add(&page->lru, &ret_pages); |
567 | BUG_ON(PageLRU(page)); | 569 | VM_BUG_ON(PageLRU(page)); |
568 | } | 570 | } |
569 | list_splice(&ret_pages, page_list); | 571 | list_splice(&ret_pages, page_list); |
570 | if (pagevec_count(&freed_pvec)) | 572 | if (pagevec_count(&freed_pvec)) |
@@ -603,7 +605,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
603 | page = lru_to_page(src); | 605 | page = lru_to_page(src); |
604 | prefetchw_prev_lru_page(page, src, flags); | 606 | prefetchw_prev_lru_page(page, src, flags); |
605 | 607 | ||
606 | BUG_ON(!PageLRU(page)); | 608 | VM_BUG_ON(!PageLRU(page)); |
607 | 609 | ||
608 | list_del(&page->lru); | 610 | list_del(&page->lru); |
609 | target = src; | 611 | target = src; |
@@ -674,7 +676,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
674 | */ | 676 | */ |
675 | while (!list_empty(&page_list)) { | 677 | while (!list_empty(&page_list)) { |
676 | page = lru_to_page(&page_list); | 678 | page = lru_to_page(&page_list); |
677 | BUG_ON(PageLRU(page)); | 679 | VM_BUG_ON(PageLRU(page)); |
678 | SetPageLRU(page); | 680 | SetPageLRU(page); |
679 | list_del(&page->lru); | 681 | list_del(&page->lru); |
680 | if (PageActive(page)) | 682 | if (PageActive(page)) |
@@ -695,6 +697,11 @@ done: | |||
695 | return nr_reclaimed; | 697 | return nr_reclaimed; |
696 | } | 698 | } |
697 | 699 | ||
700 | static inline int zone_is_near_oom(struct zone *zone) | ||
701 | { | ||
702 | return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; | ||
703 | } | ||
704 | |||
698 | /* | 705 | /* |
699 | * This moves pages from the active list to the inactive list. | 706 | * This moves pages from the active list to the inactive list. |
700 | * | 707 | * |
@@ -730,6 +737,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
730 | long distress; | 737 | long distress; |
731 | long swap_tendency; | 738 | long swap_tendency; |
732 | 739 | ||
740 | if (zone_is_near_oom(zone)) | ||
741 | goto force_reclaim_mapped; | ||
742 | |||
733 | /* | 743 | /* |
734 | * `distress' is a measure of how much trouble we're having | 744 | * `distress' is a measure of how much trouble we're having |
735 | * reclaiming pages. 0 -> no problems. 100 -> great trouble. | 745 | * reclaiming pages. 0 -> no problems. 100 -> great trouble. |
@@ -765,6 +775,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
765 | * memory onto the inactive list. | 775 | * memory onto the inactive list. |
766 | */ | 776 | */ |
767 | if (swap_tendency >= 100) | 777 | if (swap_tendency >= 100) |
778 | force_reclaim_mapped: | ||
768 | reclaim_mapped = 1; | 779 | reclaim_mapped = 1; |
769 | } | 780 | } |
770 | 781 | ||
@@ -797,9 +808,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
797 | while (!list_empty(&l_inactive)) { | 808 | while (!list_empty(&l_inactive)) { |
798 | page = lru_to_page(&l_inactive); | 809 | page = lru_to_page(&l_inactive); |
799 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 810 | prefetchw_prev_lru_page(page, &l_inactive, flags); |
800 | BUG_ON(PageLRU(page)); | 811 | VM_BUG_ON(PageLRU(page)); |
801 | SetPageLRU(page); | 812 | SetPageLRU(page); |
802 | BUG_ON(!PageActive(page)); | 813 | VM_BUG_ON(!PageActive(page)); |
803 | ClearPageActive(page); | 814 | ClearPageActive(page); |
804 | 815 | ||
805 | list_move(&page->lru, &zone->inactive_list); | 816 | list_move(&page->lru, &zone->inactive_list); |
@@ -827,9 +838,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
827 | while (!list_empty(&l_active)) { | 838 | while (!list_empty(&l_active)) { |
828 | page = lru_to_page(&l_active); | 839 | page = lru_to_page(&l_active); |
829 | prefetchw_prev_lru_page(page, &l_active, flags); | 840 | prefetchw_prev_lru_page(page, &l_active, flags); |
830 | BUG_ON(PageLRU(page)); | 841 | VM_BUG_ON(PageLRU(page)); |
831 | SetPageLRU(page); | 842 | SetPageLRU(page); |
832 | BUG_ON(!PageActive(page)); | 843 | VM_BUG_ON(!PageActive(page)); |
833 | list_move(&page->lru, &zone->active_list); | 844 | list_move(&page->lru, &zone->active_list); |
834 | pgmoved++; | 845 | pgmoved++; |
835 | if (!pagevec_add(&pvec, page)) { | 846 | if (!pagevec_add(&pvec, page)) { |
@@ -925,6 +936,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, | |||
925 | unsigned long nr_reclaimed = 0; | 936 | unsigned long nr_reclaimed = 0; |
926 | int i; | 937 | int i; |
927 | 938 | ||
939 | sc->all_unreclaimable = 1; | ||
928 | for (i = 0; zones[i] != NULL; i++) { | 940 | for (i = 0; zones[i] != NULL; i++) { |
929 | struct zone *zone = zones[i]; | 941 | struct zone *zone = zones[i]; |
930 | 942 | ||
@@ -941,6 +953,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones, | |||
941 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 953 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
942 | continue; /* Let kswapd poll it */ | 954 | continue; /* Let kswapd poll it */ |
943 | 955 | ||
956 | sc->all_unreclaimable = 0; | ||
957 | |||
944 | nr_reclaimed += shrink_zone(priority, zone, sc); | 958 | nr_reclaimed += shrink_zone(priority, zone, sc); |
945 | } | 959 | } |
946 | return nr_reclaimed; | 960 | return nr_reclaimed; |
@@ -1021,6 +1035,9 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1021 | if (sc.nr_scanned && priority < DEF_PRIORITY - 2) | 1035 | if (sc.nr_scanned && priority < DEF_PRIORITY - 2) |
1022 | blk_congestion_wait(WRITE, HZ/10); | 1036 | blk_congestion_wait(WRITE, HZ/10); |
1023 | } | 1037 | } |
1038 | /* top priority shrink_caches still had more to do? don't OOM, then */ | ||
1039 | if (!sc.all_unreclaimable) | ||
1040 | ret = 1; | ||
1024 | out: | 1041 | out: |
1025 | for (i = 0; zones[i] != 0; i++) { | 1042 | for (i = 0; zones[i] != 0; i++) { |
1026 | struct zone *zone = zones[i]; | 1043 | struct zone *zone = zones[i]; |
@@ -1153,7 +1170,7 @@ scan: | |||
1153 | if (zone->all_unreclaimable) | 1170 | if (zone->all_unreclaimable) |
1154 | continue; | 1171 | continue; |
1155 | if (nr_slab == 0 && zone->pages_scanned >= | 1172 | if (nr_slab == 0 && zone->pages_scanned >= |
1156 | (zone->nr_active + zone->nr_inactive) * 4) | 1173 | (zone->nr_active + zone->nr_inactive) * 6) |
1157 | zone->all_unreclaimable = 1; | 1174 | zone->all_unreclaimable = 1; |
1158 | /* | 1175 | /* |
1159 | * If we've done a decent amount of scanning and | 1176 | * If we've done a decent amount of scanning and |
@@ -1361,7 +1378,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
1361 | for_each_zone(zone) | 1378 | for_each_zone(zone) |
1362 | lru_pages += zone->nr_active + zone->nr_inactive; | 1379 | lru_pages += zone->nr_active + zone->nr_inactive; |
1363 | 1380 | ||
1364 | nr_slab = global_page_state(NR_SLAB); | 1381 | nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); |
1365 | /* If slab caches are huge, it's better to hit them first */ | 1382 | /* If slab caches are huge, it's better to hit them first */ |
1366 | while (nr_slab >= lru_pages) { | 1383 | while (nr_slab >= lru_pages) { |
1367 | reclaim_state.reclaimed_slab = 0; | 1384 | reclaim_state.reclaimed_slab = 0; |
@@ -1510,7 +1527,6 @@ int zone_reclaim_mode __read_mostly; | |||
1510 | #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ | 1527 | #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ |
1511 | #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ | 1528 | #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ |
1512 | #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ | 1529 | #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ |
1513 | #define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ | ||
1514 | 1530 | ||
1515 | /* | 1531 | /* |
1516 | * Priority for ZONE_RECLAIM. This determines the fraction of pages | 1532 | * Priority for ZONE_RECLAIM. This determines the fraction of pages |
@@ -1526,6 +1542,12 @@ int zone_reclaim_mode __read_mostly; | |||
1526 | int sysctl_min_unmapped_ratio = 1; | 1542 | int sysctl_min_unmapped_ratio = 1; |
1527 | 1543 | ||
1528 | /* | 1544 | /* |
1545 | * If the number of slab pages in a zone grows beyond this percentage then | ||
1546 | * slab reclaim needs to occur. | ||
1547 | */ | ||
1548 | int sysctl_min_slab_ratio = 5; | ||
1549 | |||
1550 | /* | ||
1529 | * Try to free up some pages from this zone through reclaim. | 1551 | * Try to free up some pages from this zone through reclaim. |
1530 | */ | 1552 | */ |
1531 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 1553 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
@@ -1544,6 +1566,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1544 | .gfp_mask = gfp_mask, | 1566 | .gfp_mask = gfp_mask, |
1545 | .swappiness = vm_swappiness, | 1567 | .swappiness = vm_swappiness, |
1546 | }; | 1568 | }; |
1569 | unsigned long slab_reclaimable; | ||
1547 | 1570 | ||
1548 | disable_swap_token(); | 1571 | disable_swap_token(); |
1549 | cond_resched(); | 1572 | cond_resched(); |
@@ -1556,29 +1579,43 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1556 | reclaim_state.reclaimed_slab = 0; | 1579 | reclaim_state.reclaimed_slab = 0; |
1557 | p->reclaim_state = &reclaim_state; | 1580 | p->reclaim_state = &reclaim_state; |
1558 | 1581 | ||
1559 | /* | 1582 | if (zone_page_state(zone, NR_FILE_PAGES) - |
1560 | * Free memory by calling shrink zone with increasing priorities | 1583 | zone_page_state(zone, NR_FILE_MAPPED) > |
1561 | * until we have enough memory freed. | 1584 | zone->min_unmapped_pages) { |
1562 | */ | 1585 | /* |
1563 | priority = ZONE_RECLAIM_PRIORITY; | 1586 | * Free memory by calling shrink zone with increasing |
1564 | do { | 1587 | * priorities until we have enough memory freed. |
1565 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 1588 | */ |
1566 | priority--; | 1589 | priority = ZONE_RECLAIM_PRIORITY; |
1567 | } while (priority >= 0 && nr_reclaimed < nr_pages); | 1590 | do { |
1591 | nr_reclaimed += shrink_zone(priority, zone, &sc); | ||
1592 | priority--; | ||
1593 | } while (priority >= 0 && nr_reclaimed < nr_pages); | ||
1594 | } | ||
1568 | 1595 | ||
1569 | if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { | 1596 | slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE); |
1597 | if (slab_reclaimable > zone->min_slab_pages) { | ||
1570 | /* | 1598 | /* |
1571 | * shrink_slab() does not currently allow us to determine how | 1599 | * shrink_slab() does not currently allow us to determine how |
1572 | * many pages were freed in this zone. So we just shake the slab | 1600 | * many pages were freed in this zone. So we take the current |
1573 | * a bit and then go off node for this particular allocation | 1601 | * number of slab pages and shake the slab until it is reduced |
1574 | * despite possibly having freed enough memory to allocate in | 1602 | * by the same nr_pages that we used for reclaiming unmapped |
1575 | * this zone. If we freed local memory then the next | 1603 | * pages. |
1576 | * allocations will be local again. | ||
1577 | * | 1604 | * |
1578 | * shrink_slab will free memory on all zones and may take | 1605 | * Note that shrink_slab will free memory on all zones and may |
1579 | * a long time. | 1606 | * take a long time. |
1607 | */ | ||
1608 | while (shrink_slab(sc.nr_scanned, gfp_mask, order) && | ||
1609 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) > | ||
1610 | slab_reclaimable - nr_pages) | ||
1611 | ; | ||
1612 | |||
1613 | /* | ||
1614 | * Update nr_reclaimed by the number of slab pages we | ||
1615 | * reclaimed from this zone. | ||
1580 | */ | 1616 | */ |
1581 | shrink_slab(sc.nr_scanned, gfp_mask, order); | 1617 | nr_reclaimed += slab_reclaimable - |
1618 | zone_page_state(zone, NR_SLAB_RECLAIMABLE); | ||
1582 | } | 1619 | } |
1583 | 1620 | ||
1584 | p->reclaim_state = NULL; | 1621 | p->reclaim_state = NULL; |
@@ -1592,7 +1629,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1592 | int node_id; | 1629 | int node_id; |
1593 | 1630 | ||
1594 | /* | 1631 | /* |
1595 | * Zone reclaim reclaims unmapped file backed pages. | 1632 | * Zone reclaim reclaims unmapped file backed pages and |
1633 | * slab pages if we are over the defined limits. | ||
1596 | * | 1634 | * |
1597 | * A small portion of unmapped file backed pages is needed for | 1635 | * A small portion of unmapped file backed pages is needed for |
1598 | * file I/O otherwise pages read by file I/O will be immediately | 1636 | * file I/O otherwise pages read by file I/O will be immediately |
@@ -1601,7 +1639,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1601 | * unmapped file backed pages. | 1639 | * unmapped file backed pages. |
1602 | */ | 1640 | */ |
1603 | if (zone_page_state(zone, NR_FILE_PAGES) - | 1641 | if (zone_page_state(zone, NR_FILE_PAGES) - |
1604 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) | 1642 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages |
1643 | && zone_page_state(zone, NR_SLAB_RECLAIMABLE) | ||
1644 | <= zone->min_slab_pages) | ||
1605 | return 0; | 1645 | return 0; |
1606 | 1646 | ||
1607 | /* | 1647 | /* |
@@ -1621,7 +1661,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1621 | * over remote processors and spread off node memory allocations | 1661 | * over remote processors and spread off node memory allocations |
1622 | * as wide as possible. | 1662 | * as wide as possible. |
1623 | */ | 1663 | */ |
1624 | node_id = zone->zone_pgdat->node_id; | 1664 | node_id = zone_to_nid(zone); |
1625 | mask = node_to_cpumask(node_id); | 1665 | mask = node_to_cpumask(node_id); |
1626 | if (!cpus_empty(mask) && node_id != numa_node_id()) | 1666 | if (!cpus_empty(mask) && node_id != numa_node_id()) |
1627 | return 0; | 1667 | return 0; |