aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-10-18 23:26:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:25 -0400
commit62695a84eb8f2e718bf4dfb21700afaa7a08e0ea (patch)
tree0af7bac599748a7e462bff16d70c702c9e33a2fb
parent71088785c6bc68fddb450063d57b1bd1c78e0ea1 (diff)
vmscan: move isolate_lru_page() to vmscan.c
On large memory systems, the VM can spend way too much time scanning through pages that it cannot (or should not) evict from memory. Not only does it use up CPU time, but it also provokes lock contention and can leave large systems under memory presure in a catatonic state. This patch series improves VM scalability by: 1) putting filesystem backed, swap backed and unevictable pages onto their own LRUs, so the system only scans the pages that it can/should evict from memory 2) switching to two handed clock replacement for the anonymous LRUs, so the number of pages that need to be scanned when the system starts swapping is bound to a reasonable number 3) keeping unevictable pages off the LRU completely, so the VM does not waste CPU time scanning them. ramfs, ramdisk, SHM_LOCKED shared memory segments and mlock()ed VMA pages are keept on the unevictable list. This patch: isolate_lru_page logically belongs to be in vmscan.c than migrate.c. It is tough, because we don't need that function without memory migration so there is a valid argument to have it in migrate.c. However a subsequent patch needs to make use of it in the core mm, so we can happily move it to vmscan.c. Also, make the function a little more generic by not requiring that it adds an isolated page to a given list. Callers can do that. Note that we now have '__isolate_lru_page()', that does something quite different, visible outside of vmscan.c for use with memory controller. Methinks we need to rationalize these names/purposes. --lts [akpm@linux-foundation.org: fix mm/memory_hotplug.c build] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/migrate.h3
-rw-r--r--mm/internal.h2
-rw-r--r--mm/memory_hotplug.c3
-rw-r--r--mm/mempolicy.c9
-rw-r--r--mm/migrate.c34
-rw-r--r--mm/vmscan.c45
6 files changed, 59 insertions, 37 deletions
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 03aea612d284..3f34005068d4 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,7 +7,6 @@
7typedef struct page *new_page_t(struct page *, unsigned long private, int **); 7typedef struct page *new_page_t(struct page *, unsigned long private, int **);
8 8
9#ifdef CONFIG_MIGRATION 9#ifdef CONFIG_MIGRATION
10extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
11extern int putback_lru_pages(struct list_head *l); 10extern int putback_lru_pages(struct list_head *l);
12extern int migrate_page(struct address_space *, 11extern int migrate_page(struct address_space *,
13 struct page *, struct page *); 12 struct page *, struct page *);
@@ -21,8 +20,6 @@ extern int migrate_vmas(struct mm_struct *mm,
21 const nodemask_t *from, const nodemask_t *to, 20 const nodemask_t *from, const nodemask_t *to,
22 unsigned long flags); 21 unsigned long flags);
23#else 22#else
24static inline int isolate_lru_page(struct page *p, struct list_head *list)
25 { return -ENOSYS; }
26static inline int putback_lru_pages(struct list_head *l) { return 0; } 23static inline int putback_lru_pages(struct list_head *l) { return 0; }
27static inline int migrate_pages(struct list_head *l, new_page_t x, 24static inline int migrate_pages(struct list_head *l, new_page_t x,
28 unsigned long private) { return -ENOSYS; } 25 unsigned long private) { return -ENOSYS; }
diff --git a/mm/internal.h b/mm/internal.h
index 1f43f7416972..4e8e78b978b5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,6 +39,8 @@ static inline void __put_page(struct page *page)
39 atomic_dec(&page->_count); 39 atomic_dec(&page->_count);
40} 40}
41 41
42extern int isolate_lru_page(struct page *page);
43
42extern void __free_pages_bootmem(struct page *page, unsigned int order); 44extern void __free_pages_bootmem(struct page *page, unsigned int order);
43 45
44/* 46/*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c299d083d8e2..3b4975815141 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -658,8 +658,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
658 * We can skip free pages. And we can only deal with pages on 658 * We can skip free pages. And we can only deal with pages on
659 * LRU. 659 * LRU.
660 */ 660 */
661 ret = isolate_lru_page(page, &source); 661 ret = isolate_lru_page(page);
662 if (!ret) { /* Success */ 662 if (!ret) { /* Success */
663 list_add_tail(&page->lru, &source);
663 move_pages--; 664 move_pages--;
664 } else { 665 } else {
665 /* Becasue we don't have big zone->lock. we should 666 /* Becasue we don't have big zone->lock. we should
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83369058ec13..71b47491487d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -93,6 +93,8 @@
93#include <asm/tlbflush.h> 93#include <asm/tlbflush.h>
94#include <asm/uaccess.h> 94#include <asm/uaccess.h>
95 95
96#include "internal.h"
97
96/* Internal flags */ 98/* Internal flags */
97#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ 99#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
98#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ 100#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
@@ -762,8 +764,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
762 /* 764 /*
763 * Avoid migrating a page that is shared with others. 765 * Avoid migrating a page that is shared with others.
764 */ 766 */
765 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) 767 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
766 isolate_lru_page(page, pagelist); 768 if (!isolate_lru_page(page)) {
769 list_add_tail(&page->lru, pagelist);
770 }
771 }
767} 772}
768 773
769static struct page *new_node_page(struct page *page, unsigned long node, int **x) 774static struct page *new_node_page(struct page *page, unsigned long node, int **x)
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a80136b23bb..da73742e52a5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,36 +37,6 @@
37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
38 38
39/* 39/*
40 * Isolate one page from the LRU lists. If successful put it onto
41 * the indicated list with elevated page count.
42 *
43 * Result:
44 * -EBUSY: page not on LRU list
45 * 0: page removed from LRU list and added to the specified list.
46 */
47int isolate_lru_page(struct page *page, struct list_head *pagelist)
48{
49 int ret = -EBUSY;
50
51 if (PageLRU(page)) {
52 struct zone *zone = page_zone(page);
53
54 spin_lock_irq(&zone->lru_lock);
55 if (PageLRU(page) && get_page_unless_zero(page)) {
56 ret = 0;
57 ClearPageLRU(page);
58 if (PageActive(page))
59 del_page_from_active_list(zone, page);
60 else
61 del_page_from_inactive_list(zone, page);
62 list_add_tail(&page->lru, pagelist);
63 }
64 spin_unlock_irq(&zone->lru_lock);
65 }
66 return ret;
67}
68
69/*
70 * migrate_prep() needs to be called before we start compiling a list of pages 40 * migrate_prep() needs to be called before we start compiling a list of pages
71 * to be migrated using isolate_lru_page(). 41 * to be migrated using isolate_lru_page().
72 */ 42 */
@@ -914,7 +884,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
914 !migrate_all) 884 !migrate_all)
915 goto put_and_set; 885 goto put_and_set;
916 886
917 err = isolate_lru_page(page, &pagelist); 887 err = isolate_lru_page(page);
888 if (!err)
889 list_add_tail(&page->lru, &pagelist);
918put_and_set: 890put_and_set:
919 /* 891 /*
920 * Either remove the duplicate refcount from 892 * Either remove the duplicate refcount from
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1ff1a58e7c10..1fd4912a596c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -844,6 +844,51 @@ static unsigned long clear_active_flags(struct list_head *page_list)
844 return nr_active; 844 return nr_active;
845} 845}
846 846
847/**
848 * isolate_lru_page - tries to isolate a page from its LRU list
849 * @page: page to isolate from its LRU list
850 *
851 * Isolates a @page from an LRU list, clears PageLRU and adjusts the
852 * vmstat statistic corresponding to whatever LRU list the page was on.
853 *
854 * Returns 0 if the page was removed from an LRU list.
855 * Returns -EBUSY if the page was not on an LRU list.
856 *
857 * The returned page will have PageLRU() cleared. If it was found on
858 * the active list, it will have PageActive set. That flag may need
859 * to be cleared by the caller before letting the page go.
860 *
861 * The vmstat statistic corresponding to the list on which the page was
862 * found will be decremented.
863 *
864 * Restrictions:
865 * (1) Must be called with an elevated refcount on the page. This is a
866 * fundamentnal difference from isolate_lru_pages (which is called
867 * without a stable reference).
868 * (2) the lru_lock must not be held.
869 * (3) interrupts must be enabled.
870 */
871int isolate_lru_page(struct page *page)
872{
873 int ret = -EBUSY;
874
875 if (PageLRU(page)) {
876 struct zone *zone = page_zone(page);
877
878 spin_lock_irq(&zone->lru_lock);
879 if (PageLRU(page) && get_page_unless_zero(page)) {
880 ret = 0;
881 ClearPageLRU(page);
882 if (PageActive(page))
883 del_page_from_active_list(zone, page);
884 else
885 del_page_from_inactive_list(zone, page);
886 }
887 spin_unlock_irq(&zone->lru_lock);
888 }
889 return ret;
890}
891
847/* 892/*
848 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 893 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
849 * of reclaimed pages 894 * of reclaimed pages