aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-12-14 20:59:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:19 -0500
commit5ad6468801d28c4d4ac9f48ec19297817c915f6a (patch)
treeedd8dc48693f43278d6fe1614aca2bf660d4dc10
parent73848b4684e84a84cfd1555af78d41158f31e16b (diff)
ksm: let shared pages be swappable
Initial implementation for swapping out KSM's shared pages: add page_referenced_ksm() and try_to_unmap_ksm(), which rmap.c calls when faced with a PageKsm page. Most of what's needed can be got from the rmap_items listed from the stable_node of the ksm page, without discovering the actual vma: so in this patch just fake up a struct vma for page_referenced_one() or try_to_unmap_one(), then refine that in the next patch. Add VM_NONLINEAR to ksm_madvise()'s list of exclusions: it has always been implicit there (being only set with VM_SHARED, already excluded), but let's make it explicit, to help justify the lack of nonlinear unmap. Rely on the page lock to protect against concurrent modifications to that page's node of the stable tree. The awkward part is not swapout but swapin: do_swap_page() and page_add_anon_rmap() now have to allow for new possibilities - perhaps a ksm page still in swapcache, perhaps a swapcache page associated with one location in one anon_vma now needed for another location or anon_vma. (And the vma might even be no longer VM_MERGEABLE when that happens.) ksm_might_need_to_copy() checks for that case, and supplies a duplicate page when necessary, simply leaving it to a subsequent pass of ksmd to rediscover the identity and merge them back into one ksm page. Disappointingly primitive: but the alternative would have to accumulate unswappable info about the swapped out ksm pages, limiting swappability. Remove page_add_ksm_rmap(): page_add_anon_rmap() now has to allow for the particular case it was handling, so just use it instead. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Izik Eidus <ieidus@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wright <chrisw@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/ksm.h54
-rw-r--r--include/linux/rmap.h5
-rw-r--r--mm/ksm.c172
-rw-r--r--mm/memory.c6
-rw-r--r--mm/rmap.c65
-rw-r--r--mm/swapfile.c11
6 files changed, 264 insertions, 49 deletions
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index ef55ce14a2ce..157d83dbaef8 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -9,10 +9,12 @@
9 9
10#include <linux/bitops.h> 10#include <linux/bitops.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/pagemap.h>
13#include <linux/rmap.h>
12#include <linux/sched.h> 14#include <linux/sched.h>
13#include <linux/vmstat.h>
14 15
15struct stable_node; 16struct stable_node;
17struct mem_cgroup;
16 18
17#ifdef CONFIG_KSM 19#ifdef CONFIG_KSM
18int ksm_madvise(struct vm_area_struct *vma, unsigned long start, 20int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -57,11 +59,36 @@ static inline void set_page_stable_node(struct page *page,
57 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); 59 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
58} 60}
59 61
60static inline void page_add_ksm_rmap(struct page *page) 62/*
63 * When do_swap_page() first faults in from swap what used to be a KSM page,
64 * no problem, it will be assigned to this vma's anon_vma; but thereafter,
65 * it might be faulted into a different anon_vma (or perhaps to a different
66 * offset in the same anon_vma). do_swap_page() cannot do all the locking
67 * needed to reconstitute a cross-anon_vma KSM page: for now it has to make
68 * a copy, and leave remerging the pages to a later pass of ksmd.
69 *
70 * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
71 * but what if the vma was unmerged while the page was swapped out?
72 */
73struct page *ksm_does_need_to_copy(struct page *page,
74 struct vm_area_struct *vma, unsigned long address);
75static inline struct page *ksm_might_need_to_copy(struct page *page,
76 struct vm_area_struct *vma, unsigned long address)
61{ 77{
62 if (atomic_inc_and_test(&page->_mapcount)) 78 struct anon_vma *anon_vma = page_anon_vma(page);
63 __inc_zone_page_state(page, NR_ANON_PAGES); 79
80 if (!anon_vma ||
81 (anon_vma == vma->anon_vma &&
82 page->index == linear_page_index(vma, address)))
83 return page;
84
85 return ksm_does_need_to_copy(page, vma, address);
64} 86}
87
88int page_referenced_ksm(struct page *page,
89 struct mem_cgroup *memcg, unsigned long *vm_flags);
90int try_to_unmap_ksm(struct page *page, enum ttu_flags flags);
91
65#else /* !CONFIG_KSM */ 92#else /* !CONFIG_KSM */
66 93
67static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, 94static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -84,7 +111,22 @@ static inline int PageKsm(struct page *page)
84 return 0; 111 return 0;
85} 112}
86 113
87/* No stub required for page_add_ksm_rmap(page) */ 114static inline struct page *ksm_might_need_to_copy(struct page *page,
115 struct vm_area_struct *vma, unsigned long address)
116{
117 return page;
118}
119
120static inline int page_referenced_ksm(struct page *page,
121 struct mem_cgroup *memcg, unsigned long *vm_flags)
122{
123 return 0;
124}
125
126static inline int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
127{
128 return 0;
129}
88#endif /* !CONFIG_KSM */ 130#endif /* !CONFIG_KSM */
89 131
90#endif 132#endif /* __LINUX_KSM_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1f65af44c6d2..0b4913a4a344 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -89,6 +89,9 @@ static inline void page_dup_rmap(struct page *page)
89 */ 89 */
90int page_referenced(struct page *, int is_locked, 90int page_referenced(struct page *, int is_locked,
91 struct mem_cgroup *cnt, unsigned long *vm_flags); 91 struct mem_cgroup *cnt, unsigned long *vm_flags);
92int page_referenced_one(struct page *, struct vm_area_struct *,
93 unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
94
92enum ttu_flags { 95enum ttu_flags {
93 TTU_UNMAP = 0, /* unmap mode */ 96 TTU_UNMAP = 0, /* unmap mode */
94 TTU_MIGRATION = 1, /* migration mode */ 97 TTU_MIGRATION = 1, /* migration mode */
@@ -102,6 +105,8 @@ enum ttu_flags {
102#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) 105#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
103 106
104int try_to_unmap(struct page *, enum ttu_flags flags); 107int try_to_unmap(struct page *, enum ttu_flags flags);
108int try_to_unmap_one(struct page *, struct vm_area_struct *,
109 unsigned long address, enum ttu_flags flags);
105 110
106/* 111/*
107 * Called from mm/filemap_xip.c to unmap empty zero page 112 * Called from mm/filemap_xip.c to unmap empty zero page
diff --git a/mm/ksm.c b/mm/ksm.c
index af5f571185d5..2f58ceebfe8f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -196,6 +196,13 @@ static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
196static DEFINE_MUTEX(ksm_thread_mutex); 196static DEFINE_MUTEX(ksm_thread_mutex);
197static DEFINE_SPINLOCK(ksm_mmlist_lock); 197static DEFINE_SPINLOCK(ksm_mmlist_lock);
198 198
199/*
200 * Temporary hack for page_referenced_ksm() and try_to_unmap_ksm(),
201 * later we rework things a little to get the right vma to them.
202 */
203static DEFINE_SPINLOCK(ksm_fallback_vma_lock);
204static struct vm_area_struct ksm_fallback_vma;
205
199#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\ 206#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
200 sizeof(struct __struct), __alignof__(struct __struct),\ 207 sizeof(struct __struct), __alignof__(struct __struct),\
201 (__flags), NULL) 208 (__flags), NULL)
@@ -445,14 +452,20 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
445{ 452{
446 if (rmap_item->address & STABLE_FLAG) { 453 if (rmap_item->address & STABLE_FLAG) {
447 struct stable_node *stable_node; 454 struct stable_node *stable_node;
455 struct page *page;
448 456
449 stable_node = rmap_item->head; 457 stable_node = rmap_item->head;
458 page = stable_node->page;
459 lock_page(page);
460
450 hlist_del(&rmap_item->hlist); 461 hlist_del(&rmap_item->hlist);
451 if (stable_node->hlist.first) 462 if (stable_node->hlist.first) {
463 unlock_page(page);
452 ksm_pages_sharing--; 464 ksm_pages_sharing--;
453 else { 465 } else {
454 set_page_stable_node(stable_node->page, NULL); 466 set_page_stable_node(page, NULL);
455 put_page(stable_node->page); 467 unlock_page(page);
468 put_page(page);
456 469
457 rb_erase(&stable_node->node, &root_stable_tree); 470 rb_erase(&stable_node->node, &root_stable_tree);
458 free_stable_node(stable_node); 471 free_stable_node(stable_node);
@@ -710,7 +723,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
710 } 723 }
711 724
712 get_page(kpage); 725 get_page(kpage);
713 page_add_ksm_rmap(kpage); 726 page_add_anon_rmap(kpage, vma, addr);
714 727
715 flush_cache_page(vma, addr, pte_pfn(*ptep)); 728 flush_cache_page(vma, addr, pte_pfn(*ptep));
716 ptep_clear_flush(vma, addr, ptep); 729 ptep_clear_flush(vma, addr, ptep);
@@ -763,8 +776,16 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
763 pages_identical(page, kpage)) 776 pages_identical(page, kpage))
764 err = replace_page(vma, page, kpage, orig_pte); 777 err = replace_page(vma, page, kpage, orig_pte);
765 778
766 if ((vma->vm_flags & VM_LOCKED) && !err) 779 if ((vma->vm_flags & VM_LOCKED) && !err) {
767 munlock_vma_page(page); 780 munlock_vma_page(page);
781 if (!PageMlocked(kpage)) {
782 unlock_page(page);
783 lru_add_drain();
784 lock_page(kpage);
785 mlock_vma_page(kpage);
786 page = kpage; /* for final unlock */
787 }
788 }
768 789
769 unlock_page(page); 790 unlock_page(page);
770out: 791out:
@@ -841,7 +862,11 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
841 862
842 copy_user_highpage(kpage, page, rmap_item->address, vma); 863 copy_user_highpage(kpage, page, rmap_item->address, vma);
843 864
865 SetPageDirty(kpage);
866 __SetPageUptodate(kpage);
867 SetPageSwapBacked(kpage);
844 set_page_stable_node(kpage, NULL); /* mark it PageKsm */ 868 set_page_stable_node(kpage, NULL); /* mark it PageKsm */
869 lru_cache_add_lru(kpage, LRU_ACTIVE_ANON);
845 870
846 err = try_to_merge_one_page(vma, page, kpage); 871 err = try_to_merge_one_page(vma, page, kpage);
847up: 872up:
@@ -1071,7 +1096,9 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1071 * The page was successfully merged: 1096 * The page was successfully merged:
1072 * add its rmap_item to the stable tree. 1097 * add its rmap_item to the stable tree.
1073 */ 1098 */
1099 lock_page(kpage);
1074 stable_tree_append(rmap_item, stable_node); 1100 stable_tree_append(rmap_item, stable_node);
1101 unlock_page(kpage);
1075 } 1102 }
1076 put_page(kpage); 1103 put_page(kpage);
1077 return; 1104 return;
@@ -1112,11 +1139,13 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1112 if (kpage) { 1139 if (kpage) {
1113 remove_rmap_item_from_tree(tree_rmap_item); 1140 remove_rmap_item_from_tree(tree_rmap_item);
1114 1141
1142 lock_page(kpage);
1115 stable_node = stable_tree_insert(kpage); 1143 stable_node = stable_tree_insert(kpage);
1116 if (stable_node) { 1144 if (stable_node) {
1117 stable_tree_append(tree_rmap_item, stable_node); 1145 stable_tree_append(tree_rmap_item, stable_node);
1118 stable_tree_append(rmap_item, stable_node); 1146 stable_tree_append(rmap_item, stable_node);
1119 } 1147 }
1148 unlock_page(kpage);
1120 put_page(kpage); 1149 put_page(kpage);
1121 1150
1122 /* 1151 /*
@@ -1285,14 +1314,6 @@ static void ksm_do_scan(unsigned int scan_npages)
1285 return; 1314 return;
1286 if (!PageKsm(page) || !in_stable_tree(rmap_item)) 1315 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1287 cmp_and_merge_page(page, rmap_item); 1316 cmp_and_merge_page(page, rmap_item);
1288 else if (page_mapcount(page) == 1) {
1289 /*
1290 * Replace now-unshared ksm page by ordinary page.
1291 */
1292 break_cow(rmap_item);
1293 remove_rmap_item_from_tree(rmap_item);
1294 rmap_item->oldchecksum = calc_checksum(page);
1295 }
1296 put_page(page); 1317 put_page(page);
1297 } 1318 }
1298} 1319}
@@ -1337,7 +1358,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1337 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | 1358 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1338 VM_PFNMAP | VM_IO | VM_DONTEXPAND | 1359 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1339 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | 1360 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1340 VM_MIXEDMAP | VM_SAO)) 1361 VM_NONLINEAR | VM_MIXEDMAP | VM_SAO))
1341 return 0; /* just ignore the advice */ 1362 return 0; /* just ignore the advice */
1342 1363
1343 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { 1364 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
@@ -1435,6 +1456,127 @@ void __ksm_exit(struct mm_struct *mm)
1435 } 1456 }
1436} 1457}
1437 1458
1459struct page *ksm_does_need_to_copy(struct page *page,
1460 struct vm_area_struct *vma, unsigned long address)
1461{
1462 struct page *new_page;
1463
1464 unlock_page(page); /* any racers will COW it, not modify it */
1465
1466 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1467 if (new_page) {
1468 copy_user_highpage(new_page, page, address, vma);
1469
1470 SetPageDirty(new_page);
1471 __SetPageUptodate(new_page);
1472 SetPageSwapBacked(new_page);
1473 __set_page_locked(new_page);
1474
1475 if (page_evictable(new_page, vma))
1476 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1477 else
1478 add_page_to_unevictable_list(new_page);
1479 }
1480
1481 page_cache_release(page);
1482 return new_page;
1483}
1484
1485int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1486 unsigned long *vm_flags)
1487{
1488 struct stable_node *stable_node;
1489 struct rmap_item *rmap_item;
1490 struct hlist_node *hlist;
1491 unsigned int mapcount = page_mapcount(page);
1492 int referenced = 0;
1493 struct vm_area_struct *vma;
1494
1495 VM_BUG_ON(!PageKsm(page));
1496 VM_BUG_ON(!PageLocked(page));
1497
1498 stable_node = page_stable_node(page);
1499 if (!stable_node)
1500 return 0;
1501
1502 /*
1503 * Temporary hack: really we need anon_vma in rmap_item, to
1504 * provide the correct vma, and to find recently forked instances.
1505 * Use zalloc to avoid weirdness if any other fields are involved.
1506 */
1507 vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
1508 if (!vma) {
1509 spin_lock(&ksm_fallback_vma_lock);
1510 vma = &ksm_fallback_vma;
1511 }
1512
1513 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1514 if (memcg && !mm_match_cgroup(rmap_item->mm, memcg))
1515 continue;
1516
1517 vma->vm_mm = rmap_item->mm;
1518 vma->vm_start = rmap_item->address;
1519 vma->vm_end = vma->vm_start + PAGE_SIZE;
1520
1521 referenced += page_referenced_one(page, vma,
1522 rmap_item->address, &mapcount, vm_flags);
1523 if (!mapcount)
1524 goto out;
1525 }
1526out:
1527 if (vma == &ksm_fallback_vma)
1528 spin_unlock(&ksm_fallback_vma_lock);
1529 else
1530 kmem_cache_free(vm_area_cachep, vma);
1531 return referenced;
1532}
1533
1534int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1535{
1536 struct stable_node *stable_node;
1537 struct hlist_node *hlist;
1538 struct rmap_item *rmap_item;
1539 int ret = SWAP_AGAIN;
1540 struct vm_area_struct *vma;
1541
1542 VM_BUG_ON(!PageKsm(page));
1543 VM_BUG_ON(!PageLocked(page));
1544
1545 stable_node = page_stable_node(page);
1546 if (!stable_node)
1547 return SWAP_FAIL;
1548
1549 /*
1550 * Temporary hack: really we need anon_vma in rmap_item, to
1551 * provide the correct vma, and to find recently forked instances.
1552 * Use zalloc to avoid weirdness if any other fields are involved.
1553 */
1554 if (TTU_ACTION(flags) != TTU_UNMAP)
1555 return SWAP_FAIL;
1556
1557 vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
1558 if (!vma) {
1559 spin_lock(&ksm_fallback_vma_lock);
1560 vma = &ksm_fallback_vma;
1561 }
1562
1563 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1564 vma->vm_mm = rmap_item->mm;
1565 vma->vm_start = rmap_item->address;
1566 vma->vm_end = vma->vm_start + PAGE_SIZE;
1567
1568 ret = try_to_unmap_one(page, vma, rmap_item->address, flags);
1569 if (ret != SWAP_AGAIN || !page_mapped(page))
1570 goto out;
1571 }
1572out:
1573 if (vma == &ksm_fallback_vma)
1574 spin_unlock(&ksm_fallback_vma_lock);
1575 else
1576 kmem_cache_free(vm_area_cachep, vma);
1577 return ret;
1578}
1579
1438#ifdef CONFIG_SYSFS 1580#ifdef CONFIG_SYSFS
1439/* 1581/*
1440 * This all compiles without CONFIG_SYSFS, but is a waste of space. 1582 * This all compiles without CONFIG_SYSFS, but is a waste of space.
diff --git a/mm/memory.c b/mm/memory.c
index 1c9dc46da3db..a54b2c498444 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2561,6 +2561,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2561 lock_page(page); 2561 lock_page(page);
2562 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2562 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2563 2563
2564 page = ksm_might_need_to_copy(page, vma, address);
2565 if (!page) {
2566 ret = VM_FAULT_OOM;
2567 goto out;
2568 }
2569
2564 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { 2570 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
2565 ret = VM_FAULT_OOM; 2571 ret = VM_FAULT_OOM;
2566 goto out_page; 2572 goto out_page;
diff --git a/mm/rmap.c b/mm/rmap.c
index ebee81688736..869aaa3206a2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
49#include <linux/swapops.h> 49#include <linux/swapops.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/ksm.h>
52#include <linux/rmap.h> 53#include <linux/rmap.h>
53#include <linux/rcupdate.h> 54#include <linux/rcupdate.h>
54#include <linux/module.h> 55#include <linux/module.h>
@@ -336,9 +337,9 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
336 * Subfunctions of page_referenced: page_referenced_one called 337 * Subfunctions of page_referenced: page_referenced_one called
337 * repeatedly from either page_referenced_anon or page_referenced_file. 338 * repeatedly from either page_referenced_anon or page_referenced_file.
338 */ 339 */
339static int page_referenced_one(struct page *page, struct vm_area_struct *vma, 340int page_referenced_one(struct page *page, struct vm_area_struct *vma,
340 unsigned long address, unsigned int *mapcount, 341 unsigned long address, unsigned int *mapcount,
341 unsigned long *vm_flags) 342 unsigned long *vm_flags)
342{ 343{
343 struct mm_struct *mm = vma->vm_mm; 344 struct mm_struct *mm = vma->vm_mm;
344 pte_t *pte; 345 pte_t *pte;
@@ -507,28 +508,33 @@ int page_referenced(struct page *page,
507 unsigned long *vm_flags) 508 unsigned long *vm_flags)
508{ 509{
509 int referenced = 0; 510 int referenced = 0;
511 int we_locked = 0;
510 512
511 if (TestClearPageReferenced(page)) 513 if (TestClearPageReferenced(page))
512 referenced++; 514 referenced++;
513 515
514 *vm_flags = 0; 516 *vm_flags = 0;
515 if (page_mapped(page) && page_rmapping(page)) { 517 if (page_mapped(page) && page_rmapping(page)) {
516 if (PageAnon(page)) 518 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
519 we_locked = trylock_page(page);
520 if (!we_locked) {
521 referenced++;
522 goto out;
523 }
524 }
525 if (unlikely(PageKsm(page)))
526 referenced += page_referenced_ksm(page, mem_cont,
527 vm_flags);
528 else if (PageAnon(page))
517 referenced += page_referenced_anon(page, mem_cont, 529 referenced += page_referenced_anon(page, mem_cont,
518 vm_flags); 530 vm_flags);
519 else if (is_locked) 531 else if (page->mapping)
520 referenced += page_referenced_file(page, mem_cont, 532 referenced += page_referenced_file(page, mem_cont,
521 vm_flags); 533 vm_flags);
522 else if (!trylock_page(page)) 534 if (we_locked)
523 referenced++;
524 else {
525 if (page->mapping)
526 referenced += page_referenced_file(page,
527 mem_cont, vm_flags);
528 unlock_page(page); 535 unlock_page(page);
529 }
530 } 536 }
531 537out:
532 if (page_test_and_clear_young(page)) 538 if (page_test_and_clear_young(page))
533 referenced++; 539 referenced++;
534 540
@@ -620,14 +626,7 @@ static void __page_set_anon_rmap(struct page *page,
620 BUG_ON(!anon_vma); 626 BUG_ON(!anon_vma);
621 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 627 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
622 page->mapping = (struct address_space *) anon_vma; 628 page->mapping = (struct address_space *) anon_vma;
623
624 page->index = linear_page_index(vma, address); 629 page->index = linear_page_index(vma, address);
625
626 /*
627 * nr_mapped state can be updated without turning off
628 * interrupts because it is not modified via interrupt.
629 */
630 __inc_zone_page_state(page, NR_ANON_PAGES);
631} 630}
632 631
633/** 632/**
@@ -665,14 +664,21 @@ static void __page_check_anon_rmap(struct page *page,
665 * @vma: the vm area in which the mapping is added 664 * @vma: the vm area in which the mapping is added
666 * @address: the user virtual address mapped 665 * @address: the user virtual address mapped
667 * 666 *
668 * The caller needs to hold the pte lock and the page must be locked. 667 * The caller needs to hold the pte lock, and the page must be locked in
668 * the anon_vma case: to serialize mapping,index checking after setting.
669 */ 669 */
670void page_add_anon_rmap(struct page *page, 670void page_add_anon_rmap(struct page *page,
671 struct vm_area_struct *vma, unsigned long address) 671 struct vm_area_struct *vma, unsigned long address)
672{ 672{
673 int first = atomic_inc_and_test(&page->_mapcount);
674 if (first)
675 __inc_zone_page_state(page, NR_ANON_PAGES);
676 if (unlikely(PageKsm(page)))
677 return;
678
673 VM_BUG_ON(!PageLocked(page)); 679 VM_BUG_ON(!PageLocked(page));
674 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 680 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
675 if (atomic_inc_and_test(&page->_mapcount)) 681 if (first)
676 __page_set_anon_rmap(page, vma, address); 682 __page_set_anon_rmap(page, vma, address);
677 else 683 else
678 __page_check_anon_rmap(page, vma, address); 684 __page_check_anon_rmap(page, vma, address);
@@ -694,6 +700,7 @@ void page_add_new_anon_rmap(struct page *page,
694 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 700 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
695 SetPageSwapBacked(page); 701 SetPageSwapBacked(page);
696 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 702 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
703 __inc_zone_page_state(page, NR_ANON_PAGES);
697 __page_set_anon_rmap(page, vma, address); 704 __page_set_anon_rmap(page, vma, address);
698 if (page_evictable(page, vma)) 705 if (page_evictable(page, vma))
699 lru_cache_add_lru(page, LRU_ACTIVE_ANON); 706 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
@@ -760,8 +767,8 @@ void page_remove_rmap(struct page *page)
760 * Subfunctions of try_to_unmap: try_to_unmap_one called 767 * Subfunctions of try_to_unmap: try_to_unmap_one called
761 * repeatedly from either try_to_unmap_anon or try_to_unmap_file. 768 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
762 */ 769 */
763static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, 770int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
764 unsigned long address, enum ttu_flags flags) 771 unsigned long address, enum ttu_flags flags)
765{ 772{
766 struct mm_struct *mm = vma->vm_mm; 773 struct mm_struct *mm = vma->vm_mm;
767 pte_t *pte; 774 pte_t *pte;
@@ -1156,7 +1163,9 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1156 1163
1157 BUG_ON(!PageLocked(page)); 1164 BUG_ON(!PageLocked(page));
1158 1165
1159 if (PageAnon(page)) 1166 if (unlikely(PageKsm(page)))
1167 ret = try_to_unmap_ksm(page, flags);
1168 else if (PageAnon(page))
1160 ret = try_to_unmap_anon(page, flags); 1169 ret = try_to_unmap_anon(page, flags);
1161 else 1170 else
1162 ret = try_to_unmap_file(page, flags); 1171 ret = try_to_unmap_file(page, flags);
@@ -1177,15 +1186,17 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1177 * 1186 *
1178 * SWAP_AGAIN - no vma is holding page mlocked, or, 1187 * SWAP_AGAIN - no vma is holding page mlocked, or,
1179 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem 1188 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
1189 * SWAP_FAIL - page cannot be located at present
1180 * SWAP_MLOCK - page is now mlocked. 1190 * SWAP_MLOCK - page is now mlocked.
1181 */ 1191 */
1182int try_to_munlock(struct page *page) 1192int try_to_munlock(struct page *page)
1183{ 1193{
1184 VM_BUG_ON(!PageLocked(page) || PageLRU(page)); 1194 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1185 1195
1186 if (PageAnon(page)) 1196 if (unlikely(PageKsm(page)))
1197 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1198 else if (PageAnon(page))
1187 return try_to_unmap_anon(page, TTU_MUNLOCK); 1199 return try_to_unmap_anon(page, TTU_MUNLOCK);
1188 else 1200 else
1189 return try_to_unmap_file(page, TTU_MUNLOCK); 1201 return try_to_unmap_file(page, TTU_MUNLOCK);
1190} 1202}
1191
diff --git a/mm/swapfile.c b/mm/swapfile.c
index e74112e8e5f4..6c0585b16418 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -22,6 +22,7 @@
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/ksm.h>
25#include <linux/rmap.h> 26#include <linux/rmap.h>
26#include <linux/security.h> 27#include <linux/security.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
@@ -650,6 +651,8 @@ int reuse_swap_page(struct page *page)
650 int count; 651 int count;
651 652
652 VM_BUG_ON(!PageLocked(page)); 653 VM_BUG_ON(!PageLocked(page));
654 if (unlikely(PageKsm(page)))
655 return 0;
653 count = page_mapcount(page); 656 count = page_mapcount(page);
654 if (count <= 1 && PageSwapCache(page)) { 657 if (count <= 1 && PageSwapCache(page)) {
655 count += page_swapcount(page); 658 count += page_swapcount(page);
@@ -658,7 +661,7 @@ int reuse_swap_page(struct page *page)
658 SetPageDirty(page); 661 SetPageDirty(page);
659 } 662 }
660 } 663 }
661 return count == 1; 664 return count <= 1;
662} 665}
663 666
664/* 667/*
@@ -1185,6 +1188,12 @@ static int try_to_unuse(unsigned int type)
1185 * read from disk into another page. Splitting into two 1188 * read from disk into another page. Splitting into two
1186 * pages would be incorrect if swap supported "shared 1189 * pages would be incorrect if swap supported "shared
1187 * private" pages, but they are handled by tmpfs files. 1190 * private" pages, but they are handled by tmpfs files.
1191 *
1192 * Given how unuse_vma() targets one particular offset
1193 * in an anon_vma, once the anon_vma has been determined,
1194 * this splitting happens to be just what is needed to
1195 * handle where KSM pages have been swapped out: re-reading
1196 * is unnecessarily slow, but we can fix that later on.
1188 */ 1197 */
1189 if (swap_count(*swap_map) && 1198 if (swap_count(*swap_map) &&
1190 PageDirty(page) && PageSwapCache(page)) { 1199 PageDirty(page) && PageSwapCache(page)) {