aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/buffer.c2
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/swap.h3
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/vmscan.c171
5 files changed, 163 insertions, 26 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 94344b2e0b46..d654a3b6209e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -356,7 +356,7 @@ static void free_more_memory(void)
356 for_each_online_pgdat(pgdat) { 356 for_each_online_pgdat(pgdat) {
357 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones; 357 zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
358 if (*zones) 358 if (*zones)
359 try_to_free_pages(zones, GFP_NOFS); 359 try_to_free_pages(zones, 0, GFP_NOFS);
360 } 360 }
361} 361}
362 362
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d71ff763c9df..da8eb8ad9e9b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -24,6 +24,14 @@
24#endif 24#endif
25#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) 25#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
26 26
27/*
28 * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
29 * costly to service. That is between allocation orders which should
30 * coelesce naturally under reasonable reclaim pressure and those which
31 * will not.
32 */
33#define PAGE_ALLOC_COSTLY_ORDER 3
34
27struct free_area { 35struct free_area {
28 struct list_head free_list; 36 struct list_head free_list;
29 unsigned long nr_free; 37 unsigned long nr_free;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 006868881346..665f85f2a3af 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -188,7 +188,8 @@ extern int rotate_reclaimable_page(struct page *page);
188extern void swap_setup(void); 188extern void swap_setup(void);
189 189
190/* linux/mm/vmscan.c */ 190/* linux/mm/vmscan.c */
191extern unsigned long try_to_free_pages(struct zone **, gfp_t); 191extern unsigned long try_to_free_pages(struct zone **zones, int order,
192 gfp_t gfp_mask);
192extern unsigned long shrink_all_memory(unsigned long nr_pages); 193extern unsigned long shrink_all_memory(unsigned long nr_pages);
193extern int vm_swappiness; 194extern int vm_swappiness;
194extern int remove_mapping(struct address_space *mapping, struct page *page); 195extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ac4f8c6b5c10..1a889c3fec59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1333,7 +1333,7 @@ nofail_alloc:
1333 reclaim_state.reclaimed_slab = 0; 1333 reclaim_state.reclaimed_slab = 0;
1334 p->reclaim_state = &reclaim_state; 1334 p->reclaim_state = &reclaim_state;
1335 1335
1336 did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask); 1336 did_some_progress = try_to_free_pages(zonelist->zones, order, gfp_mask);
1337 1337
1338 p->reclaim_state = NULL; 1338 p->reclaim_state = NULL;
1339 p->flags &= ~PF_MEMALLOC; 1339 p->flags &= ~PF_MEMALLOC;
@@ -1370,7 +1370,8 @@ nofail_alloc:
1370 */ 1370 */
1371 do_retry = 0; 1371 do_retry = 0;
1372 if (!(gfp_mask & __GFP_NORETRY)) { 1372 if (!(gfp_mask & __GFP_NORETRY)) {
1373 if ((order <= 3) || (gfp_mask & __GFP_REPEAT)) 1373 if ((order <= PAGE_ALLOC_COSTLY_ORDER) ||
1374 (gfp_mask & __GFP_REPEAT))
1374 do_retry = 1; 1375 do_retry = 1;
1375 if (gfp_mask & __GFP_NOFAIL) 1376 if (gfp_mask & __GFP_NOFAIL)
1376 do_retry = 1; 1377 do_retry = 1;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1be5a6376ef0..1d9971d8924b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -66,6 +66,8 @@ struct scan_control {
66 int swappiness; 66 int swappiness;
67 67
68 int all_unreclaimable; 68 int all_unreclaimable;
69
70 int order;
69}; 71};
70 72
71/* 73/*
@@ -481,7 +483,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
481 483
482 referenced = page_referenced(page, 1); 484 referenced = page_referenced(page, 1);
483 /* In active use or really unfreeable? Activate it. */ 485 /* In active use or really unfreeable? Activate it. */
484 if (referenced && page_mapping_inuse(page)) 486 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
487 referenced && page_mapping_inuse(page))
485 goto activate_locked; 488 goto activate_locked;
486 489
487#ifdef CONFIG_SWAP 490#ifdef CONFIG_SWAP
@@ -514,7 +517,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
514 } 517 }
515 518
516 if (PageDirty(page)) { 519 if (PageDirty(page)) {
517 if (referenced) 520 if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
518 goto keep_locked; 521 goto keep_locked;
519 if (!may_enter_fs) 522 if (!may_enter_fs)
520 goto keep_locked; 523 goto keep_locked;
@@ -598,6 +601,51 @@ keep:
598 return nr_reclaimed; 601 return nr_reclaimed;
599} 602}
600 603
604/* LRU Isolation modes. */
605#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */
606#define ISOLATE_ACTIVE 1 /* Isolate active pages. */
607#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */
608
609/*
610 * Attempt to remove the specified page from its LRU. Only take this page
611 * if it is of the appropriate PageActive status. Pages which are being
612 * freed elsewhere are also ignored.
613 *
614 * page: page to consider
615 * mode: one of the LRU isolation modes defined above
616 *
617 * returns 0 on success, -ve errno on failure.
618 */
619static int __isolate_lru_page(struct page *page, int mode)
620{
621 int ret = -EINVAL;
622
623 /* Only take pages on the LRU. */
624 if (!PageLRU(page))
625 return ret;
626
627 /*
628 * When checking the active state, we need to be sure we are
629 * dealing with comparible boolean values. Take the logical not
630 * of each.
631 */
632 if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
633 return ret;
634
635 ret = -EBUSY;
636 if (likely(get_page_unless_zero(page))) {
637 /*
638 * Be careful not to clear PageLRU until after we're
639 * sure the page is not being freed elsewhere -- the
640 * page release code relies on it.
641 */
642 ClearPageLRU(page);
643 ret = 0;
644 }
645
646 return ret;
647}
648
601/* 649/*
602 * zone->lru_lock is heavily contended. Some of the functions that 650 * zone->lru_lock is heavily contended. Some of the functions that
603 * shrink the lists perform better by taking out a batch of pages 651 * shrink the lists perform better by taking out a batch of pages
@@ -612,38 +660,90 @@ keep:
612 * @src: The LRU list to pull pages off. 660 * @src: The LRU list to pull pages off.
613 * @dst: The temp list to put pages on to. 661 * @dst: The temp list to put pages on to.
614 * @scanned: The number of pages that were scanned. 662 * @scanned: The number of pages that were scanned.
663 * @order: The caller's attempted allocation order
664 * @mode: One of the LRU isolation modes
615 * 665 *
616 * returns how many pages were moved onto *@dst. 666 * returns how many pages were moved onto *@dst.
617 */ 667 */
618static unsigned long isolate_lru_pages(unsigned long nr_to_scan, 668static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
619 struct list_head *src, struct list_head *dst, 669 struct list_head *src, struct list_head *dst,
620 unsigned long *scanned) 670 unsigned long *scanned, int order, int mode)
621{ 671{
622 unsigned long nr_taken = 0; 672 unsigned long nr_taken = 0;
623 struct page *page;
624 unsigned long scan; 673 unsigned long scan;
625 674
626 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { 675 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
627 struct list_head *target; 676 struct page *page;
677 unsigned long pfn;
678 unsigned long end_pfn;
679 unsigned long page_pfn;
680 int zone_id;
681
628 page = lru_to_page(src); 682 page = lru_to_page(src);
629 prefetchw_prev_lru_page(page, src, flags); 683 prefetchw_prev_lru_page(page, src, flags);
630 684
631 VM_BUG_ON(!PageLRU(page)); 685 VM_BUG_ON(!PageLRU(page));
632 686
633 list_del(&page->lru); 687 switch (__isolate_lru_page(page, mode)) {
634 target = src; 688 case 0:
635 if (likely(get_page_unless_zero(page))) { 689 list_move(&page->lru, dst);
636 /*
637 * Be careful not to clear PageLRU until after we're
638 * sure the page is not being freed elsewhere -- the
639 * page release code relies on it.
640 */
641 ClearPageLRU(page);
642 target = dst;
643 nr_taken++; 690 nr_taken++;
644 } /* else it is being freed elsewhere */ 691 break;
645 692
646 list_add(&page->lru, target); 693 case -EBUSY:
694 /* else it is being freed elsewhere */
695 list_move(&page->lru, src);
696 continue;
697
698 default:
699 BUG();
700 }
701
702 if (!order)
703 continue;
704
705 /*
706 * Attempt to take all pages in the order aligned region
707 * surrounding the tag page. Only take those pages of
708 * the same active state as that tag page. We may safely
709 * round the target page pfn down to the requested order
710 * as the mem_map is guarenteed valid out to MAX_ORDER,
711 * where that page is in a different zone we will detect
712 * it from its zone id and abort this block scan.
713 */
714 zone_id = page_zone_id(page);
715 page_pfn = page_to_pfn(page);
716 pfn = page_pfn & ~((1 << order) - 1);
717 end_pfn = pfn + (1 << order);
718 for (; pfn < end_pfn; pfn++) {
719 struct page *cursor_page;
720
721 /* The target page is in the block, ignore it. */
722 if (unlikely(pfn == page_pfn))
723 continue;
724
725 /* Avoid holes within the zone. */
726 if (unlikely(!pfn_valid_within(pfn)))
727 break;
728
729 cursor_page = pfn_to_page(pfn);
730 /* Check that we have not crossed a zone boundary. */
731 if (unlikely(page_zone_id(cursor_page) != zone_id))
732 continue;
733 switch (__isolate_lru_page(cursor_page, mode)) {
734 case 0:
735 list_move(&cursor_page->lru, dst);
736 nr_taken++;
737 scan++;
738 break;
739
740 case -EBUSY:
741 /* else it is being freed elsewhere */
742 list_move(&cursor_page->lru, src);
743 default:
744 break;
745 }
746 }
647 } 747 }
648 748
649 *scanned = scan; 749 *scanned = scan;
@@ -651,6 +751,24 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
651} 751}
652 752
653/* 753/*
754 * clear_active_flags() is a helper for shrink_active_list(), clearing
755 * any active bits from the pages in the list.
756 */
757static unsigned long clear_active_flags(struct list_head *page_list)
758{
759 int nr_active = 0;
760 struct page *page;
761
762 list_for_each_entry(page, page_list, lru)
763 if (PageActive(page)) {
764 ClearPageActive(page);
765 nr_active++;
766 }
767
768 return nr_active;
769}
770
771/*
654 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 772 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
655 * of reclaimed pages 773 * of reclaimed pages
656 */ 774 */
@@ -671,11 +789,18 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
671 unsigned long nr_taken; 789 unsigned long nr_taken;
672 unsigned long nr_scan; 790 unsigned long nr_scan;
673 unsigned long nr_freed; 791 unsigned long nr_freed;
792 unsigned long nr_active;
674 793
675 nr_taken = isolate_lru_pages(sc->swap_cluster_max, 794 nr_taken = isolate_lru_pages(sc->swap_cluster_max,
676 &zone->inactive_list, 795 &zone->inactive_list,
677 &page_list, &nr_scan); 796 &page_list, &nr_scan, sc->order,
678 __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken); 797 (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
798 ISOLATE_BOTH : ISOLATE_INACTIVE);
799 nr_active = clear_active_flags(&page_list);
800
801 __mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
802 __mod_zone_page_state(zone, NR_INACTIVE,
803 -(nr_taken - nr_active));
679 zone->pages_scanned += nr_scan; 804 zone->pages_scanned += nr_scan;
680 spin_unlock_irq(&zone->lru_lock); 805 spin_unlock_irq(&zone->lru_lock);
681 806
@@ -820,7 +945,7 @@ force_reclaim_mapped:
820 lru_add_drain(); 945 lru_add_drain();
821 spin_lock_irq(&zone->lru_lock); 946 spin_lock_irq(&zone->lru_lock);
822 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list, 947 pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
823 &l_hold, &pgscanned); 948 &l_hold, &pgscanned, sc->order, ISOLATE_ACTIVE);
824 zone->pages_scanned += pgscanned; 949 zone->pages_scanned += pgscanned;
825 __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); 950 __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved);
826 spin_unlock_irq(&zone->lru_lock); 951 spin_unlock_irq(&zone->lru_lock);
@@ -1011,7 +1136,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
1011 * holds filesystem locks which prevent writeout this might not work, and the 1136 * holds filesystem locks which prevent writeout this might not work, and the
1012 * allocation attempt will fail. 1137 * allocation attempt will fail.
1013 */ 1138 */
1014unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) 1139unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
1015{ 1140{
1016 int priority; 1141 int priority;
1017 int ret = 0; 1142 int ret = 0;
@@ -1026,6 +1151,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1026 .swap_cluster_max = SWAP_CLUSTER_MAX, 1151 .swap_cluster_max = SWAP_CLUSTER_MAX,
1027 .may_swap = 1, 1152 .may_swap = 1,
1028 .swappiness = vm_swappiness, 1153 .swappiness = vm_swappiness,
1154 .order = order,
1029 }; 1155 };
1030 1156
1031 count_vm_event(ALLOCSTALL); 1157 count_vm_event(ALLOCSTALL);
@@ -1131,6 +1257,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
1131 .may_swap = 1, 1257 .may_swap = 1,
1132 .swap_cluster_max = SWAP_CLUSTER_MAX, 1258 .swap_cluster_max = SWAP_CLUSTER_MAX,
1133 .swappiness = vm_swappiness, 1259 .swappiness = vm_swappiness,
1260 .order = order,
1134 }; 1261 };
1135 /* 1262 /*
1136 * temp_priority is used to remember the scanning priority at which 1263 * temp_priority is used to remember the scanning priority at which