diff options
-rw-r--r-- | fs/buffer.c | 2 | ||||
-rw-r--r-- | include/linux/mmzone.h | 8 | ||||
-rw-r--r-- | include/linux/swap.h | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 5 | ||||
-rw-r--r-- | mm/vmscan.c | 171 |
5 files changed, 163 insertions, 26 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 94344b2e0b46..d654a3b6209e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -356,7 +356,7 @@ static void free_more_memory(void) | |||
356 | for_each_online_pgdat(pgdat) { | 356 | for_each_online_pgdat(pgdat) { |
357 | zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; | 357 | zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; |
358 | if (*zones) | 358 | if (*zones) |
359 | try_to_free_pages(zones, GFP_NOFS); | 359 | try_to_free_pages(zones, 0, GFP_NOFS); |
360 | } | 360 | } |
361 | } | 361 | } |
362 | 362 | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d71ff763c9df..da8eb8ad9e9b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -24,6 +24,14 @@ | |||
24 | #endif | 24 | #endif |
25 | #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) | 25 | #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) |
26 | 26 | ||
27 | /* | ||
28 | * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed | ||
29 | * costly to service. That is between allocation orders which should | ||
30 | * coelesce naturally under reasonable reclaim pressure and those which | ||
31 | * will not. | ||
32 | */ | ||
33 | #define PAGE_ALLOC_COSTLY_ORDER 3 | ||
34 | |||
27 | struct free_area { | 35 | struct free_area { |
28 | struct list_head free_list; | 36 | struct list_head free_list; |
29 | unsigned long nr_free; | 37 | unsigned long nr_free; |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 006868881346..665f85f2a3af 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -188,7 +188,8 @@ extern int rotate_reclaimable_page(struct page *page); | |||
188 | extern void swap_setup(void); | 188 | extern void swap_setup(void); |
189 | 189 | ||
190 | /* linux/mm/vmscan.c */ | 190 | /* linux/mm/vmscan.c */ |
191 | extern unsigned long try_to_free_pages(struct zone **, gfp_t); | 191 | extern unsigned long try_to_free_pages(struct zone **zones, int order, |
192 | gfp_t gfp_mask); | ||
192 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | 193 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
193 | extern int vm_swappiness; | 194 | extern int vm_swappiness; |
194 | extern int remove_mapping(struct address_space *mapping, struct page *page); | 195 | extern int remove_mapping(struct address_space *mapping, struct page *page); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ac4f8c6b5c10..1a889c3fec59 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1333,7 +1333,7 @@ nofail_alloc: | |||
1333 | reclaim_state.reclaimed_slab = 0; | 1333 | reclaim_state.reclaimed_slab = 0; |
1334 | p->reclaim_state = &reclaim_state; | 1334 | p->reclaim_state = &reclaim_state; |
1335 | 1335 | ||
1336 | did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask); | 1336 | did_some_progress = try_to_free_pages(zonelist->zones, order, gfp_mask); |
1337 | 1337 | ||
1338 | p->reclaim_state = NULL; | 1338 | p->reclaim_state = NULL; |
1339 | p->flags &= ~PF_MEMALLOC; | 1339 | p->flags &= ~PF_MEMALLOC; |
@@ -1370,7 +1370,8 @@ nofail_alloc: | |||
1370 | */ | 1370 | */ |
1371 | do_retry = 0; | 1371 | do_retry = 0; |
1372 | if (!(gfp_mask & __GFP_NORETRY)) { | 1372 | if (!(gfp_mask & __GFP_NORETRY)) { |
1373 | if ((order <= 3) || (gfp_mask & __GFP_REPEAT)) | 1373 | if ((order <= PAGE_ALLOC_COSTLY_ORDER) || |
1374 | (gfp_mask & __GFP_REPEAT)) | ||
1374 | do_retry = 1; | 1375 | do_retry = 1; |
1375 | if (gfp_mask & __GFP_NOFAIL) | 1376 | if (gfp_mask & __GFP_NOFAIL) |
1376 | do_retry = 1; | 1377 | do_retry = 1; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 1be5a6376ef0..1d9971d8924b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -66,6 +66,8 @@ struct scan_control { | |||
66 | int swappiness; | 66 | int swappiness; |
67 | 67 | ||
68 | int all_unreclaimable; | 68 | int all_unreclaimable; |
69 | |||
70 | int order; | ||
69 | }; | 71 | }; |
70 | 72 | ||
71 | /* | 73 | /* |
@@ -481,7 +483,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
481 | 483 | ||
482 | referenced = page_referenced(page, 1); | 484 | referenced = page_referenced(page, 1); |
483 | /* In active use or really unfreeable? Activate it. */ | 485 | /* In active use or really unfreeable? Activate it. */ |
484 | if (referenced && page_mapping_inuse(page)) | 486 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && |
487 | referenced && page_mapping_inuse(page)) | ||
485 | goto activate_locked; | 488 | goto activate_locked; |
486 | 489 | ||
487 | #ifdef CONFIG_SWAP | 490 | #ifdef CONFIG_SWAP |
@@ -514,7 +517,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
514 | } | 517 | } |
515 | 518 | ||
516 | if (PageDirty(page)) { | 519 | if (PageDirty(page)) { |
517 | if (referenced) | 520 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced) |
518 | goto keep_locked; | 521 | goto keep_locked; |
519 | if (!may_enter_fs) | 522 | if (!may_enter_fs) |
520 | goto keep_locked; | 523 | goto keep_locked; |
@@ -598,6 +601,51 @@ keep: | |||
598 | return nr_reclaimed; | 601 | return nr_reclaimed; |
599 | } | 602 | } |
600 | 603 | ||
604 | /* LRU Isolation modes. */ | ||
605 | #define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */ | ||
606 | #define ISOLATE_ACTIVE 1 /* Isolate active pages. */ | ||
607 | #define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */ | ||
608 | |||
609 | /* | ||
610 | * Attempt to remove the specified page from its LRU. Only take this page | ||
611 | * if it is of the appropriate PageActive status. Pages which are being | ||
612 | * freed elsewhere are also ignored. | ||
613 | * | ||
614 | * page: page to consider | ||
615 | * mode: one of the LRU isolation modes defined above | ||
616 | * | ||
617 | * returns 0 on success, -ve errno on failure. | ||
618 | */ | ||
619 | static int __isolate_lru_page(struct page *page, int mode) | ||
620 | { | ||
621 | int ret = -EINVAL; | ||
622 | |||
623 | /* Only take pages on the LRU. */ | ||
624 | if (!PageLRU(page)) | ||
625 | return ret; | ||
626 | |||
627 | /* | ||
628 | * When checking the active state, we need to be sure we are | ||
629 | * dealing with comparible boolean values. Take the logical not | ||
630 | * of each. | ||
631 | */ | ||
632 | if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) | ||
633 | return ret; | ||
634 | |||
635 | ret = -EBUSY; | ||
636 | if (likely(get_page_unless_zero(page))) { | ||
637 | /* | ||
638 | * Be careful not to clear PageLRU until after we're | ||
639 | * sure the page is not being freed elsewhere -- the | ||
640 | * page release code relies on it. | ||
641 | */ | ||
642 | ClearPageLRU(page); | ||
643 | ret = 0; | ||
644 | } | ||
645 | |||
646 | return ret; | ||
647 | } | ||
648 | |||
601 | /* | 649 | /* |
602 | * zone->lru_lock is heavily contended. Some of the functions that | 650 | * zone->lru_lock is heavily contended. Some of the functions that |
603 | * shrink the lists perform better by taking out a batch of pages | 651 | * shrink the lists perform better by taking out a batch of pages |
@@ -612,38 +660,90 @@ keep: | |||
612 | * @src: The LRU list to pull pages off. | 660 | * @src: The LRU list to pull pages off. |
613 | * @dst: The temp list to put pages on to. | 661 | * @dst: The temp list to put pages on to. |
614 | * @scanned: The number of pages that were scanned. | 662 | * @scanned: The number of pages that were scanned. |
663 | * @order: The caller's attempted allocation order | ||
664 | * @mode: One of the LRU isolation modes | ||
615 | * | 665 | * |
616 | * returns how many pages were moved onto *@dst. | 666 | * returns how many pages were moved onto *@dst. |
617 | */ | 667 | */ |
618 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | 668 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, |
619 | struct list_head *src, struct list_head *dst, | 669 | struct list_head *src, struct list_head *dst, |
620 | unsigned long *scanned) | 670 | unsigned long *scanned, int order, int mode) |
621 | { | 671 | { |
622 | unsigned long nr_taken = 0; | 672 | unsigned long nr_taken = 0; |
623 | struct page *page; | ||
624 | unsigned long scan; | 673 | unsigned long scan; |
625 | 674 | ||
626 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { | 675 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
627 | struct list_head *target; | 676 | struct page *page; |
677 | unsigned long pfn; | ||
678 | unsigned long end_pfn; | ||
679 | unsigned long page_pfn; | ||
680 | int zone_id; | ||
681 | |||
628 | page = lru_to_page(src); | 682 | page = lru_to_page(src); |
629 | prefetchw_prev_lru_page(page, src, flags); | 683 | prefetchw_prev_lru_page(page, src, flags); |
630 | 684 | ||
631 | VM_BUG_ON(!PageLRU(page)); | 685 | VM_BUG_ON(!PageLRU(page)); |
632 | 686 | ||
633 | list_del(&page->lru); | 687 | switch (__isolate_lru_page(page, mode)) { |
634 | target = src; | 688 | case 0: |
635 | if (likely(get_page_unless_zero(page))) { | 689 | list_move(&page->lru, dst); |
636 | /* | ||
637 | * Be careful not to clear PageLRU until after we're | ||
638 | * sure the page is not being freed elsewhere -- the | ||
639 | * page release code relies on it. | ||
640 | */ | ||
641 | ClearPageLRU(page); | ||
642 | target = dst; | ||
643 | nr_taken++; | 690 | nr_taken++; |
644 | } /* else it is being freed elsewhere */ | 691 | break; |
645 | 692 | ||
646 | list_add(&page->lru, target); | 693 | case -EBUSY: |
694 | /* else it is being freed elsewhere */ | ||
695 | list_move(&page->lru, src); | ||
696 | continue; | ||
697 | |||
698 | default: | ||
699 | BUG(); | ||
700 | } | ||
701 | |||
702 | if (!order) | ||
703 | continue; | ||
704 | |||
705 | /* | ||
706 | * Attempt to take all pages in the order aligned region | ||
707 | * surrounding the tag page. Only take those pages of | ||
708 | * the same active state as that tag page. We may safely | ||
709 | * round the target page pfn down to the requested order | ||
710 | * as the mem_map is guarenteed valid out to MAX_ORDER, | ||
711 | * where that page is in a different zone we will detect | ||
712 | * it from its zone id and abort this block scan. | ||
713 | */ | ||
714 | zone_id = page_zone_id(page); | ||
715 | page_pfn = page_to_pfn(page); | ||
716 | pfn = page_pfn & ~((1 << order) - 1); | ||
717 | end_pfn = pfn + (1 << order); | ||
718 | for (; pfn < end_pfn; pfn++) { | ||
719 | struct page *cursor_page; | ||
720 | |||
721 | /* The target page is in the block, ignore it. */ | ||
722 | if (unlikely(pfn == page_pfn)) | ||
723 | continue; | ||
724 | |||
725 | /* Avoid holes within the zone. */ | ||
726 | if (unlikely(!pfn_valid_within(pfn))) | ||
727 | break; | ||
728 | |||
729 | cursor_page = pfn_to_page(pfn); | ||
730 | /* Check that we have not crossed a zone boundary. */ | ||
731 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | ||
732 | continue; | ||
733 | switch (__isolate_lru_page(cursor_page, mode)) { | ||
734 | case 0: | ||
735 | list_move(&cursor_page->lru, dst); | ||
736 | nr_taken++; | ||
737 | scan++; | ||
738 | break; | ||
739 | |||
740 | case -EBUSY: | ||
741 | /* else it is being freed elsewhere */ | ||
742 | list_move(&cursor_page->lru, src); | ||
743 | default: | ||
744 | break; | ||
745 | } | ||
746 | } | ||
647 | } | 747 | } |
648 | 748 | ||
649 | *scanned = scan; | 749 | *scanned = scan; |
@@ -651,6 +751,24 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
651 | } | 751 | } |
652 | 752 | ||
653 | /* | 753 | /* |
754 | * clear_active_flags() is a helper for shrink_active_list(), clearing | ||
755 | * any active bits from the pages in the list. | ||
756 | */ | ||
757 | static unsigned long clear_active_flags(struct list_head *page_list) | ||
758 | { | ||
759 | int nr_active = 0; | ||
760 | struct page *page; | ||
761 | |||
762 | list_for_each_entry(page, page_list, lru) | ||
763 | if (PageActive(page)) { | ||
764 | ClearPageActive(page); | ||
765 | nr_active++; | ||
766 | } | ||
767 | |||
768 | return nr_active; | ||
769 | } | ||
770 | |||
771 | /* | ||
654 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number | 772 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number |
655 | * of reclaimed pages | 773 | * of reclaimed pages |
656 | */ | 774 | */ |
@@ -671,11 +789,18 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
671 | unsigned long nr_taken; | 789 | unsigned long nr_taken; |
672 | unsigned long nr_scan; | 790 | unsigned long nr_scan; |
673 | unsigned long nr_freed; | 791 | unsigned long nr_freed; |
792 | unsigned long nr_active; | ||
674 | 793 | ||
675 | nr_taken = isolate_lru_pages(sc->swap_cluster_max, | 794 | nr_taken = isolate_lru_pages(sc->swap_cluster_max, |
676 | &zone->inactive_list, | 795 | &zone->inactive_list, |
677 | &page_list, &nr_scan); | 796 | &page_list, &nr_scan, sc->order, |
678 | __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken); | 797 | (sc->order > PAGE_ALLOC_COSTLY_ORDER)? |
798 | ISOLATE_BOTH : ISOLATE_INACTIVE); | ||
799 | nr_active = clear_active_flags(&page_list); | ||
800 | |||
801 | __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); | ||
802 | __mod_zone_page_state(zone, NR_INACTIVE, | ||
803 | -(nr_taken - nr_active)); | ||
679 | zone->pages_scanned += nr_scan; | 804 | zone->pages_scanned += nr_scan; |
680 | spin_unlock_irq(&zone->lru_lock); | 805 | spin_unlock_irq(&zone->lru_lock); |
681 | 806 | ||
@@ -820,7 +945,7 @@ force_reclaim_mapped: | |||
820 | lru_add_drain(); | 945 | lru_add_drain(); |
821 | spin_lock_irq(&zone->lru_lock); | 946 | spin_lock_irq(&zone->lru_lock); |
822 | pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, | 947 | pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, |
823 | &l_hold, &pgscanned); | 948 | &l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE); |
824 | zone->pages_scanned += pgscanned; | 949 | zone->pages_scanned += pgscanned; |
825 | __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); | 950 | __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); |
826 | spin_unlock_irq(&zone->lru_lock); | 951 | spin_unlock_irq(&zone->lru_lock); |
@@ -1011,7 +1136,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, | |||
1011 | * holds filesystem locks which prevent writeout this might not work, and the | 1136 | * holds filesystem locks which prevent writeout this might not work, and the |
1012 | * allocation attempt will fail. | 1137 | * allocation attempt will fail. |
1013 | */ | 1138 | */ |
1014 | unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | 1139 | unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) |
1015 | { | 1140 | { |
1016 | int priority; | 1141 | int priority; |
1017 | int ret = 0; | 1142 | int ret = 0; |
@@ -1026,6 +1151,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1026 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1151 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1027 | .may_swap = 1, | 1152 | .may_swap = 1, |
1028 | .swappiness = vm_swappiness, | 1153 | .swappiness = vm_swappiness, |
1154 | .order = order, | ||
1029 | }; | 1155 | }; |
1030 | 1156 | ||
1031 | count_vm_event(ALLOCSTALL); | 1157 | count_vm_event(ALLOCSTALL); |
@@ -1131,6 +1257,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1131 | .may_swap = 1, | 1257 | .may_swap = 1, |
1132 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1258 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1133 | .swappiness = vm_swappiness, | 1259 | .swappiness = vm_swappiness, |
1260 | .order = order, | ||
1134 | }; | 1261 | }; |
1135 | /* | 1262 | /* |
1136 | * temp_priority is used to remember the scanning priority at which | 1263 | * temp_priority is used to remember the scanning priority at which |