aboutsummaryrefslogtreecommitdiffstats
path: root/mm/ksm.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c149
1 files changed, 110 insertions, 39 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index f7d121c42d01..37cc92f83a8d 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -413,6 +413,12 @@ static void break_cow(struct rmap_item *rmap_item)
413 unsigned long addr = rmap_item->address; 413 unsigned long addr = rmap_item->address;
414 struct vm_area_struct *vma; 414 struct vm_area_struct *vma;
415 415
416 /*
417 * It is not an accident that whenever we want to break COW
418 * to undo, we also need to drop a reference to the anon_vma.
419 */
420 drop_anon_vma(rmap_item);
421
416 down_read(&mm->mmap_sem); 422 down_read(&mm->mmap_sem);
417 if (ksm_test_exit(mm)) 423 if (ksm_test_exit(mm))
418 goto out; 424 goto out;
@@ -456,6 +462,79 @@ out: page = NULL;
456 return page; 462 return page;
457} 463}
458 464
465static void remove_node_from_stable_tree(struct stable_node *stable_node)
466{
467 struct rmap_item *rmap_item;
468 struct hlist_node *hlist;
469
470 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
471 if (rmap_item->hlist.next)
472 ksm_pages_sharing--;
473 else
474 ksm_pages_shared--;
475 drop_anon_vma(rmap_item);
476 rmap_item->address &= PAGE_MASK;
477 cond_resched();
478 }
479
480 rb_erase(&stable_node->node, &root_stable_tree);
481 free_stable_node(stable_node);
482}
483
484/*
485 * get_ksm_page: checks if the page indicated by the stable node
486 * is still its ksm page, despite having held no reference to it.
487 * In which case we can trust the content of the page, and it
488 * returns the gotten page; but if the page has now been zapped,
489 * remove the stale node from the stable tree and return NULL.
490 *
491 * You would expect the stable_node to hold a reference to the ksm page.
492 * But if it increments the page's count, swapping out has to wait for
493 * ksmd to come around again before it can free the page, which may take
494 * seconds or even minutes: much too unresponsive. So instead we use a
495 * "keyhole reference": access to the ksm page from the stable node peeps
496 * out through its keyhole to see if that page still holds the right key,
497 * pointing back to this stable node. This relies on freeing a PageAnon
498 * page to reset its page->mapping to NULL, and relies on no other use of
499 * a page to put something that might look like our key in page->mapping.
500 *
501 * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
502 * but this is different - made simpler by ksm_thread_mutex being held, but
503 * interesting for assuming that no other use of the struct page could ever
504 * put our expected_mapping into page->mapping (or a field of the union which
505 * coincides with page->mapping). The RCU calls are not for KSM at all, but
506 * to keep the page_count protocol described with page_cache_get_speculative.
507 *
508 * Note: it is possible that get_ksm_page() will return NULL one moment,
509 * then page the next, if the page is in between page_freeze_refs() and
510 * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
511 * is on its way to being freed; but it is an anomaly to bear in mind.
512 */
513static struct page *get_ksm_page(struct stable_node *stable_node)
514{
515 struct page *page;
516 void *expected_mapping;
517
518 page = stable_node->page;
519 expected_mapping = (void *)stable_node +
520 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
521 rcu_read_lock();
522 if (page->mapping != expected_mapping)
523 goto stale;
524 if (!get_page_unless_zero(page))
525 goto stale;
526 if (page->mapping != expected_mapping) {
527 put_page(page);
528 goto stale;
529 }
530 rcu_read_unlock();
531 return page;
532stale:
533 rcu_read_unlock();
534 remove_node_from_stable_tree(stable_node);
535 return NULL;
536}
537
459/* 538/*
460 * Removing rmap_item from stable or unstable tree. 539 * Removing rmap_item from stable or unstable tree.
461 * This function will clean the information from the stable/unstable tree. 540 * This function will clean the information from the stable/unstable tree.
@@ -467,22 +546,19 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
467 struct page *page; 546 struct page *page;
468 547
469 stable_node = rmap_item->head; 548 stable_node = rmap_item->head;
470 page = stable_node->page; 549 page = get_ksm_page(stable_node);
471 lock_page(page); 550 if (!page)
551 goto out;
472 552
553 lock_page(page);
473 hlist_del(&rmap_item->hlist); 554 hlist_del(&rmap_item->hlist);
474 if (stable_node->hlist.first) { 555 unlock_page(page);
475 unlock_page(page); 556 put_page(page);
476 ksm_pages_sharing--;
477 } else {
478 set_page_stable_node(page, NULL);
479 unlock_page(page);
480 put_page(page);
481 557
482 rb_erase(&stable_node->node, &root_stable_tree); 558 if (stable_node->hlist.first)
483 free_stable_node(stable_node); 559 ksm_pages_sharing--;
560 else
484 ksm_pages_shared--; 561 ksm_pages_shared--;
485 }
486 562
487 drop_anon_vma(rmap_item); 563 drop_anon_vma(rmap_item);
488 rmap_item->address &= PAGE_MASK; 564 rmap_item->address &= PAGE_MASK;
@@ -504,7 +580,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
504 ksm_pages_unshared--; 580 ksm_pages_unshared--;
505 rmap_item->address &= PAGE_MASK; 581 rmap_item->address &= PAGE_MASK;
506 } 582 }
507 583out:
508 cond_resched(); /* we're called from many long loops */ 584 cond_resched(); /* we're called from many long loops */
509} 585}
510 586
@@ -902,10 +978,8 @@ up:
902 * If that fails, we have a ksm page with only one pte 978 * If that fails, we have a ksm page with only one pte
903 * pointing to it: so break it. 979 * pointing to it: so break it.
904 */ 980 */
905 if (err) { 981 if (err)
906 drop_anon_vma(rmap_item);
907 break_cow(rmap_item); 982 break_cow(rmap_item);
908 }
909 } 983 }
910 if (err) { 984 if (err) {
911 put_page(kpage); 985 put_page(kpage);
@@ -935,21 +1009,25 @@ static struct stable_node *stable_tree_search(struct page *page)
935 } 1009 }
936 1010
937 while (node) { 1011 while (node) {
1012 struct page *tree_page;
938 int ret; 1013 int ret;
939 1014
940 cond_resched(); 1015 cond_resched();
941 stable_node = rb_entry(node, struct stable_node, node); 1016 stable_node = rb_entry(node, struct stable_node, node);
1017 tree_page = get_ksm_page(stable_node);
1018 if (!tree_page)
1019 return NULL;
942 1020
943 ret = memcmp_pages(page, stable_node->page); 1021 ret = memcmp_pages(page, tree_page);
944 1022
945 if (ret < 0) 1023 if (ret < 0) {
1024 put_page(tree_page);
946 node = node->rb_left; 1025 node = node->rb_left;
947 else if (ret > 0) 1026 } else if (ret > 0) {
1027 put_page(tree_page);
948 node = node->rb_right; 1028 node = node->rb_right;
949 else { 1029 } else
950 get_page(stable_node->page);
951 return stable_node; 1030 return stable_node;
952 }
953 } 1031 }
954 1032
955 return NULL; 1033 return NULL;
@@ -969,12 +1047,17 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
969 struct stable_node *stable_node; 1047 struct stable_node *stable_node;
970 1048
971 while (*new) { 1049 while (*new) {
1050 struct page *tree_page;
972 int ret; 1051 int ret;
973 1052
974 cond_resched(); 1053 cond_resched();
975 stable_node = rb_entry(*new, struct stable_node, node); 1054 stable_node = rb_entry(*new, struct stable_node, node);
1055 tree_page = get_ksm_page(stable_node);
1056 if (!tree_page)
1057 return NULL;
976 1058
977 ret = memcmp_pages(kpage, stable_node->page); 1059 ret = memcmp_pages(kpage, tree_page);
1060 put_page(tree_page);
978 1061
979 parent = *new; 1062 parent = *new;
980 if (ret < 0) 1063 if (ret < 0)
@@ -1000,7 +1083,6 @@ static struct stable_node *stable_tree_insert(struct page *kpage)
1000 1083
1001 INIT_HLIST_HEAD(&stable_node->hlist); 1084 INIT_HLIST_HEAD(&stable_node->hlist);
1002 1085
1003 get_page(kpage);
1004 stable_node->page = kpage; 1086 stable_node->page = kpage;
1005 set_page_stable_node(kpage, stable_node); 1087 set_page_stable_node(kpage, stable_node);
1006 1088
@@ -1130,19 +1212,10 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1130 } 1212 }
1131 1213
1132 /* 1214 /*
1133 * A ksm page might have got here by fork, but its other 1215 * If the hash value of the page has changed from the last time
1134 * references have already been removed from the stable tree. 1216 * we calculated it, this page is changing frequently: therefore we
1135 * Or it might be left over from a break_ksm which failed 1217 * don't want to insert it in the unstable tree, and we don't want
1136 * when the mem_cgroup had reached its limit: try again now. 1218 * to waste our time searching for something identical to it there.
1137 */
1138 if (PageKsm(page))
1139 break_cow(rmap_item);
1140
1141 /*
1142 * In case the hash value of the page was changed from the last time we
1143 * have calculated it, this page to be changed frequely, therefore we
1144 * don't want to insert it to the unstable tree, and we don't want to
1145 * waste our time to search if there is something identical to it there.
1146 */ 1219 */
1147 checksum = calc_checksum(page); 1220 checksum = calc_checksum(page);
1148 if (rmap_item->oldchecksum != checksum) { 1221 if (rmap_item->oldchecksum != checksum) {
@@ -1180,9 +1253,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
1180 * in which case we need to break_cow on both. 1253 * in which case we need to break_cow on both.
1181 */ 1254 */
1182 if (!stable_node) { 1255 if (!stable_node) {
1183 drop_anon_vma(tree_rmap_item);
1184 break_cow(tree_rmap_item); 1256 break_cow(tree_rmap_item);
1185 drop_anon_vma(rmap_item);
1186 break_cow(rmap_item); 1257 break_cow(rmap_item);
1187 } 1258 }
1188 } 1259 }