aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-12-14 20:59:29 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:19 -0500
commit80e148226028257ec0a1909d99b2c40d0ffe17f2 (patch)
tree1e3cae42144f5c80e215ba254e01bd6847ba1b36 /mm
parent4035c07a895974d0ac06a56fe870ad293fc451a7 (diff)
ksm: share anon page without allocating
When ksm pages were unswappable, it made no sense to include them in mem cgroup accounting; but now that they are swappable (although I see no strict logical connection) the principle of least surprise implies that they should be accounted (with the usual dissatisfaction, that a shared page is accounted to only one of the cgroups using it). This patch was intended to add mem cgroup accounting where necessary; but turned inside out, it now avoids allocating a ksm page, instead upgrading an anon page to ksm - which brings its existing mem cgroup accounting with it. Thus mem cgroups don't appear in the patch at all. This upgrade from PageAnon to PageKsm takes place under page lock (via a somewhat hacky NULL kpage interface), and audit showed only one place which needed to cope with the race - page_referenced() is sometimes used without page lock, so page_lock_anon_vma() needs an ACCESS_ONCE() to be sure of getting anon_vma and flags together (no problem if the page goes ksm an instant after, the integrity of that anon_vma list is unaffected). Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Izik Eidus <ieidus@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wright <chrisw@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c67
-rw-r--r--mm/rmap.c6
2 files changed, 25 insertions, 48 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index 37cc92f83a8d..20f46a7b2799 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -831,7 +831,8 @@ out:
831 * try_to_merge_one_page - take two pages and merge them into one 831 * try_to_merge_one_page - take two pages and merge them into one
832 * @vma: the vma that holds the pte pointing to page 832 * @vma: the vma that holds the pte pointing to page
833 * @page: the PageAnon page that we want to replace with kpage 833 * @page: the PageAnon page that we want to replace with kpage
834 * @kpage: the PageKsm page that we want to map instead of page 834 * @kpage: the PageKsm page that we want to map instead of page,
835 * or NULL the first time when we want to use page as kpage.
835 * 836 *
836 * This function returns 0 if the pages were merged, -EFAULT otherwise. 837 * This function returns 0 if the pages were merged, -EFAULT otherwise.
837 */ 838 */
@@ -864,15 +865,24 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
864 * ptes are necessarily already write-protected. But in either 865 * ptes are necessarily already write-protected. But in either
865 * case, we need to lock and check page_count is not raised. 866 * case, we need to lock and check page_count is not raised.
866 */ 867 */
867 if (write_protect_page(vma, page, &orig_pte) == 0 && 868 if (write_protect_page(vma, page, &orig_pte) == 0) {
868 pages_identical(page, kpage)) 869 if (!kpage) {
869 err = replace_page(vma, page, kpage, orig_pte); 870 /*
871 * While we hold page lock, upgrade page from
872 * PageAnon+anon_vma to PageKsm+NULL stable_node:
873 * stable_tree_insert() will update stable_node.
874 */
875 set_page_stable_node(page, NULL);
876 mark_page_accessed(page);
877 err = 0;
878 } else if (pages_identical(page, kpage))
879 err = replace_page(vma, page, kpage, orig_pte);
880 }
870 881
871 if ((vma->vm_flags & VM_LOCKED) && !err) { 882 if ((vma->vm_flags & VM_LOCKED) && kpage && !err) {
872 munlock_vma_page(page); 883 munlock_vma_page(page);
873 if (!PageMlocked(kpage)) { 884 if (!PageMlocked(kpage)) {
874 unlock_page(page); 885 unlock_page(page);
875 lru_add_drain();
876 lock_page(kpage); 886 lock_page(kpage);
877 mlock_vma_page(kpage); 887 mlock_vma_page(kpage);
878 page = kpage; /* for final unlock */ 888 page = kpage; /* for final unlock */
@@ -922,7 +932,7 @@ out:
922 * This function returns the kpage if we successfully merged two identical 932 * This function returns the kpage if we successfully merged two identical
923 * pages into one ksm page, NULL otherwise. 933 * pages into one ksm page, NULL otherwise.
924 * 934 *
925 * Note that this function allocates a new kernel page: if one of the pages 935 * Note that this function upgrades page to ksm page: if one of the pages
926 * is already a ksm page, try_to_merge_with_ksm_page should be used. 936 * is already a ksm page, try_to_merge_with_ksm_page should be used.
927 */ 937 */
928static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item, 938static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
@@ -930,10 +940,7 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
930 struct rmap_item *tree_rmap_item, 940 struct rmap_item *tree_rmap_item,
931 struct page *tree_page) 941 struct page *tree_page)
932{ 942{
933 struct mm_struct *mm = rmap_item->mm; 943 int err;
934 struct vm_area_struct *vma;
935 struct page *kpage;
936 int err = -EFAULT;
937 944
938 /* 945 /*
939 * The number of nodes in the stable tree 946 * The number of nodes in the stable tree
@@ -943,37 +950,10 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
943 ksm_max_kernel_pages <= ksm_pages_shared) 950 ksm_max_kernel_pages <= ksm_pages_shared)
944 return NULL; 951 return NULL;
945 952
946 kpage = alloc_page(GFP_HIGHUSER); 953 err = try_to_merge_with_ksm_page(rmap_item, page, NULL);
947 if (!kpage)
948 return NULL;
949
950 down_read(&mm->mmap_sem);
951 if (ksm_test_exit(mm))
952 goto up;
953 vma = find_vma(mm, rmap_item->address);
954 if (!vma || vma->vm_start > rmap_item->address)
955 goto up;
956
957 copy_user_highpage(kpage, page, rmap_item->address, vma);
958
959 SetPageDirty(kpage);
960 __SetPageUptodate(kpage);
961 SetPageSwapBacked(kpage);
962 set_page_stable_node(kpage, NULL); /* mark it PageKsm */
963 lru_cache_add_lru(kpage, LRU_ACTIVE_ANON);
964
965 err = try_to_merge_one_page(vma, page, kpage);
966 if (err)
967 goto up;
968
969 /* Must get reference to anon_vma while still holding mmap_sem */
970 hold_anon_vma(rmap_item, vma->anon_vma);
971up:
972 up_read(&mm->mmap_sem);
973
974 if (!err) { 954 if (!err) {
975 err = try_to_merge_with_ksm_page(tree_rmap_item, 955 err = try_to_merge_with_ksm_page(tree_rmap_item,
976 tree_page, kpage); 956 tree_page, page);
977 /* 957 /*
978 * If that fails, we have a ksm page with only one pte 958 * If that fails, we have a ksm page with only one pte
979 * pointing to it: so break it. 959 * pointing to it: so break it.
@@ -981,11 +961,7 @@ up:
981 if (err) 961 if (err)
982 break_cow(rmap_item); 962 break_cow(rmap_item);
983 } 963 }
984 if (err) { 964 return err ? NULL : page;
985 put_page(kpage);
986 kpage = NULL;
987 }
988 return kpage;
989} 965}
990 966
991/* 967/*
@@ -1244,7 +1220,6 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1244 stable_tree_append(rmap_item, stable_node); 1220 stable_tree_append(rmap_item, stable_node);
1245 } 1221 }
1246 unlock_page(kpage); 1222 unlock_page(kpage);
1247 put_page(kpage);
1248 1223
1249 /* 1224 /*
1250 * If we fail to insert the page into the stable tree, 1225 * If we fail to insert the page into the stable tree,
diff --git a/mm/rmap.c b/mm/rmap.c
index ebdf582ef185..2e38e9048327 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -204,7 +204,7 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
204 unsigned long anon_mapping; 204 unsigned long anon_mapping;
205 205
206 rcu_read_lock(); 206 rcu_read_lock();
207 anon_mapping = (unsigned long) page->mapping; 207 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
208 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) 208 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
209 goto out; 209 goto out;
210 if (!page_mapped(page)) 210 if (!page_mapped(page))
@@ -666,7 +666,9 @@ static void __page_check_anon_rmap(struct page *page,
666 * @address: the user virtual address mapped 666 * @address: the user virtual address mapped
667 * 667 *
668 * The caller needs to hold the pte lock, and the page must be locked in 668 * The caller needs to hold the pte lock, and the page must be locked in
669 * the anon_vma case: to serialize mapping,index checking after setting. 669 * the anon_vma case: to serialize mapping,index checking after setting,
670 * and to ensure that PageAnon is not being upgraded racily to PageKsm
671 * (but PageKsm is never downgraded to PageAnon).
670 */ 672 */
671void page_add_anon_rmap(struct page *page, 673void page_add_anon_rmap(struct page *page,
672 struct vm_area_struct *vma, unsigned long address) 674 struct vm_area_struct *vma, unsigned long address)