aboutsummaryrefslogtreecommitdiffstats
path: root/mm/ksm.c
diff options
context:
space:
mode:
authorHugh Dickins <hugh.dickins@tiscali.co.uk>2009-12-14 20:59:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:19 -0500
commit5ad6468801d28c4d4ac9f48ec19297817c915f6a (patch)
treeedd8dc48693f43278d6fe1614aca2bf660d4dc10 /mm/ksm.c
parent73848b4684e84a84cfd1555af78d41158f31e16b (diff)
ksm: let shared pages be swappable
Initial implementation for swapping out KSM's shared pages: add page_referenced_ksm() and try_to_unmap_ksm(), which rmap.c calls when faced with a PageKsm page. Most of what's needed can be got from the rmap_items listed from the stable_node of the ksm page, without discovering the actual vma: so in this patch just fake up a struct vma for page_referenced_one() or try_to_unmap_one(), then refine that in the next patch. Add VM_NONLINEAR to ksm_madvise()'s list of exclusions: it has always been implicit there (being only set with VM_SHARED, already excluded), but let's make it explicit, to help justify the lack of nonlinear unmap. Rely on the page lock to protect against concurrent modifications to that page's node of the stable tree. The awkward part is not swapout but swapin: do_swap_page() and page_add_anon_rmap() now have to allow for new possibilities - perhaps a ksm page still in swapcache, perhaps a swapcache page associated with one location in one anon_vma now needed for another location or anon_vma. (And the vma might even be no longer VM_MERGEABLE when that happens.) ksm_might_need_to_copy() checks for that case, and supplies a duplicate page when necessary, simply leaving it to a subsequent pass of ksmd to rediscover the identity and merge them back into one ksm page. Disappointingly primitive: but the alternative would have to accumulate unswappable info about the swapped out ksm pages, limiting swappability. Remove page_add_ksm_rmap(): page_add_anon_rmap() now has to allow for the particular case it was handling, so just use it instead. Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Izik Eidus <ieidus@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Chris Wright <chrisw@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c172
1 files changed, 157 insertions, 15 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index af5f571185d5..2f58ceebfe8f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -196,6 +196,13 @@ static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
196static DEFINE_MUTEX(ksm_thread_mutex); 196static DEFINE_MUTEX(ksm_thread_mutex);
197static DEFINE_SPINLOCK(ksm_mmlist_lock); 197static DEFINE_SPINLOCK(ksm_mmlist_lock);
198 198
199/*
200 * Temporary hack for page_referenced_ksm() and try_to_unmap_ksm(),
201 * later we rework things a little to get the right vma to them.
202 */
203static DEFINE_SPINLOCK(ksm_fallback_vma_lock);
204static struct vm_area_struct ksm_fallback_vma;
205
199#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\ 206#define KSM_KMEM_CACHE(__struct, __flags) kmem_cache_create("ksm_"#__struct,\
200 sizeof(struct __struct), __alignof__(struct __struct),\ 207 sizeof(struct __struct), __alignof__(struct __struct),\
201 (__flags), NULL) 208 (__flags), NULL)
@@ -445,14 +452,20 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
445{ 452{
446 if (rmap_item->address & STABLE_FLAG) { 453 if (rmap_item->address & STABLE_FLAG) {
447 struct stable_node *stable_node; 454 struct stable_node *stable_node;
455 struct page *page;
448 456
449 stable_node = rmap_item->head; 457 stable_node = rmap_item->head;
458 page = stable_node->page;
459 lock_page(page);
460
450 hlist_del(&rmap_item->hlist); 461 hlist_del(&rmap_item->hlist);
451 if (stable_node->hlist.first) 462 if (stable_node->hlist.first) {
463 unlock_page(page);
452 ksm_pages_sharing--; 464 ksm_pages_sharing--;
453 else { 465 } else {
454 set_page_stable_node(stable_node->page, NULL); 466 set_page_stable_node(page, NULL);
455 put_page(stable_node->page); 467 unlock_page(page);
468 put_page(page);
456 469
457 rb_erase(&stable_node->node, &root_stable_tree); 470 rb_erase(&stable_node->node, &root_stable_tree);
458 free_stable_node(stable_node); 471 free_stable_node(stable_node);
@@ -710,7 +723,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
710 } 723 }
711 724
712 get_page(kpage); 725 get_page(kpage);
713 page_add_ksm_rmap(kpage); 726 page_add_anon_rmap(kpage, vma, addr);
714 727
715 flush_cache_page(vma, addr, pte_pfn(*ptep)); 728 flush_cache_page(vma, addr, pte_pfn(*ptep));
716 ptep_clear_flush(vma, addr, ptep); 729 ptep_clear_flush(vma, addr, ptep);
@@ -763,8 +776,16 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
763 pages_identical(page, kpage)) 776 pages_identical(page, kpage))
764 err = replace_page(vma, page, kpage, orig_pte); 777 err = replace_page(vma, page, kpage, orig_pte);
765 778
766 if ((vma->vm_flags & VM_LOCKED) && !err) 779 if ((vma->vm_flags & VM_LOCKED) && !err) {
767 munlock_vma_page(page); 780 munlock_vma_page(page);
781 if (!PageMlocked(kpage)) {
782 unlock_page(page);
783 lru_add_drain();
784 lock_page(kpage);
785 mlock_vma_page(kpage);
786 page = kpage; /* for final unlock */
787 }
788 }
768 789
769 unlock_page(page); 790 unlock_page(page);
770out: 791out:
@@ -841,7 +862,11 @@ static struct page *try_to_merge_two_pages(struct rmap_item *rmap_item,
841 862
842 copy_user_highpage(kpage, page, rmap_item->address, vma); 863 copy_user_highpage(kpage, page, rmap_item->address, vma);
843 864
865 SetPageDirty(kpage);
866 __SetPageUptodate(kpage);
867 SetPageSwapBacked(kpage);
844 set_page_stable_node(kpage, NULL); /* mark it PageKsm */ 868 set_page_stable_node(kpage, NULL); /* mark it PageKsm */
869 lru_cache_add_lru(kpage, LRU_ACTIVE_ANON);
845 870
846 err = try_to_merge_one_page(vma, page, kpage); 871 err = try_to_merge_one_page(vma, page, kpage);
847up: 872up:
@@ -1071,7 +1096,9 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1071 * The page was successfully merged: 1096 * The page was successfully merged:
1072 * add its rmap_item to the stable tree. 1097 * add its rmap_item to the stable tree.
1073 */ 1098 */
1099 lock_page(kpage);
1074 stable_tree_append(rmap_item, stable_node); 1100 stable_tree_append(rmap_item, stable_node);
1101 unlock_page(kpage);
1075 } 1102 }
1076 put_page(kpage); 1103 put_page(kpage);
1077 return; 1104 return;
@@ -1112,11 +1139,13 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1112 if (kpage) { 1139 if (kpage) {
1113 remove_rmap_item_from_tree(tree_rmap_item); 1140 remove_rmap_item_from_tree(tree_rmap_item);
1114 1141
1142 lock_page(kpage);
1115 stable_node = stable_tree_insert(kpage); 1143 stable_node = stable_tree_insert(kpage);
1116 if (stable_node) { 1144 if (stable_node) {
1117 stable_tree_append(tree_rmap_item, stable_node); 1145 stable_tree_append(tree_rmap_item, stable_node);
1118 stable_tree_append(rmap_item, stable_node); 1146 stable_tree_append(rmap_item, stable_node);
1119 } 1147 }
1148 unlock_page(kpage);
1120 put_page(kpage); 1149 put_page(kpage);
1121 1150
1122 /* 1151 /*
@@ -1285,14 +1314,6 @@ static void ksm_do_scan(unsigned int scan_npages)
1285 return; 1314 return;
1286 if (!PageKsm(page) || !in_stable_tree(rmap_item)) 1315 if (!PageKsm(page) || !in_stable_tree(rmap_item))
1287 cmp_and_merge_page(page, rmap_item); 1316 cmp_and_merge_page(page, rmap_item);
1288 else if (page_mapcount(page) == 1) {
1289 /*
1290 * Replace now-unshared ksm page by ordinary page.
1291 */
1292 break_cow(rmap_item);
1293 remove_rmap_item_from_tree(rmap_item);
1294 rmap_item->oldchecksum = calc_checksum(page);
1295 }
1296 put_page(page); 1317 put_page(page);
1297 } 1318 }
1298} 1319}
@@ -1337,7 +1358,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1337 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | 1358 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1338 VM_PFNMAP | VM_IO | VM_DONTEXPAND | 1359 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1339 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | 1360 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
1340 VM_MIXEDMAP | VM_SAO)) 1361 VM_NONLINEAR | VM_MIXEDMAP | VM_SAO))
1341 return 0; /* just ignore the advice */ 1362 return 0; /* just ignore the advice */
1342 1363
1343 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { 1364 if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) {
@@ -1435,6 +1456,127 @@ void __ksm_exit(struct mm_struct *mm)
1435 } 1456 }
1436} 1457}
1437 1458
1459struct page *ksm_does_need_to_copy(struct page *page,
1460 struct vm_area_struct *vma, unsigned long address)
1461{
1462 struct page *new_page;
1463
1464 unlock_page(page); /* any racers will COW it, not modify it */
1465
1466 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1467 if (new_page) {
1468 copy_user_highpage(new_page, page, address, vma);
1469
1470 SetPageDirty(new_page);
1471 __SetPageUptodate(new_page);
1472 SetPageSwapBacked(new_page);
1473 __set_page_locked(new_page);
1474
1475 if (page_evictable(new_page, vma))
1476 lru_cache_add_lru(new_page, LRU_ACTIVE_ANON);
1477 else
1478 add_page_to_unevictable_list(new_page);
1479 }
1480
1481 page_cache_release(page);
1482 return new_page;
1483}
1484
1485int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1486 unsigned long *vm_flags)
1487{
1488 struct stable_node *stable_node;
1489 struct rmap_item *rmap_item;
1490 struct hlist_node *hlist;
1491 unsigned int mapcount = page_mapcount(page);
1492 int referenced = 0;
1493 struct vm_area_struct *vma;
1494
1495 VM_BUG_ON(!PageKsm(page));
1496 VM_BUG_ON(!PageLocked(page));
1497
1498 stable_node = page_stable_node(page);
1499 if (!stable_node)
1500 return 0;
1501
1502 /*
1503 * Temporary hack: really we need anon_vma in rmap_item, to
1504 * provide the correct vma, and to find recently forked instances.
1505 * Use zalloc to avoid weirdness if any other fields are involved.
1506 */
1507 vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
1508 if (!vma) {
1509 spin_lock(&ksm_fallback_vma_lock);
1510 vma = &ksm_fallback_vma;
1511 }
1512
1513 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1514 if (memcg && !mm_match_cgroup(rmap_item->mm, memcg))
1515 continue;
1516
1517 vma->vm_mm = rmap_item->mm;
1518 vma->vm_start = rmap_item->address;
1519 vma->vm_end = vma->vm_start + PAGE_SIZE;
1520
1521 referenced += page_referenced_one(page, vma,
1522 rmap_item->address, &mapcount, vm_flags);
1523 if (!mapcount)
1524 goto out;
1525 }
1526out:
1527 if (vma == &ksm_fallback_vma)
1528 spin_unlock(&ksm_fallback_vma_lock);
1529 else
1530 kmem_cache_free(vm_area_cachep, vma);
1531 return referenced;
1532}
1533
1534int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1535{
1536 struct stable_node *stable_node;
1537 struct hlist_node *hlist;
1538 struct rmap_item *rmap_item;
1539 int ret = SWAP_AGAIN;
1540 struct vm_area_struct *vma;
1541
1542 VM_BUG_ON(!PageKsm(page));
1543 VM_BUG_ON(!PageLocked(page));
1544
1545 stable_node = page_stable_node(page);
1546 if (!stable_node)
1547 return SWAP_FAIL;
1548
1549 /*
1550 * Temporary hack: really we need anon_vma in rmap_item, to
1551 * provide the correct vma, and to find recently forked instances.
1552 * Use zalloc to avoid weirdness if any other fields are involved.
1553 */
1554 if (TTU_ACTION(flags) != TTU_UNMAP)
1555 return SWAP_FAIL;
1556
1557 vma = kmem_cache_zalloc(vm_area_cachep, GFP_ATOMIC);
1558 if (!vma) {
1559 spin_lock(&ksm_fallback_vma_lock);
1560 vma = &ksm_fallback_vma;
1561 }
1562
1563 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1564 vma->vm_mm = rmap_item->mm;
1565 vma->vm_start = rmap_item->address;
1566 vma->vm_end = vma->vm_start + PAGE_SIZE;
1567
1568 ret = try_to_unmap_one(page, vma, rmap_item->address, flags);
1569 if (ret != SWAP_AGAIN || !page_mapped(page))
1570 goto out;
1571 }
1572out:
1573 if (vma == &ksm_fallback_vma)
1574 spin_unlock(&ksm_fallback_vma_lock);
1575 else
1576 kmem_cache_free(vm_area_cachep, vma);
1577 return ret;
1578}
1579
1438#ifdef CONFIG_SYSFS 1580#ifdef CONFIG_SYSFS
1439/* 1581/*
1440 * This all compiles without CONFIG_SYSFS, but is a waste of space. 1582 * This all compiles without CONFIG_SYSFS, but is a waste of space.