aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c94
-rw-r--r--mm/migrate.c5
2 files changed, 77 insertions, 22 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index 4c22cdff02ad..df0529926703 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -499,6 +499,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
499 * In which case we can trust the content of the page, and it 499 * In which case we can trust the content of the page, and it
500 * returns the gotten page; but if the page has now been zapped, 500 * returns the gotten page; but if the page has now been zapped,
501 * remove the stale node from the stable tree and return NULL. 501 * remove the stale node from the stable tree and return NULL.
502 * But beware, the stable node's page might be being migrated.
502 * 503 *
503 * You would expect the stable_node to hold a reference to the ksm page. 504 * You would expect the stable_node to hold a reference to the ksm page.
504 * But if it increments the page's count, swapping out has to wait for 505 * But if it increments the page's count, swapping out has to wait for
@@ -509,44 +510,77 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
509 * pointing back to this stable node. This relies on freeing a PageAnon 510 * pointing back to this stable node. This relies on freeing a PageAnon
510 * page to reset its page->mapping to NULL, and relies on no other use of 511 * page to reset its page->mapping to NULL, and relies on no other use of
511 * a page to put something that might look like our key in page->mapping. 512 * a page to put something that might look like our key in page->mapping.
512 *
513 * include/linux/pagemap.h page_cache_get_speculative() is a good reference,
514 * but this is different - made simpler by ksm_thread_mutex being held, but
515 * interesting for assuming that no other use of the struct page could ever
516 * put our expected_mapping into page->mapping (or a field of the union which
517 * coincides with page->mapping).
518 *
519 * Note: it is possible that get_ksm_page() will return NULL one moment,
520 * then page the next, if the page is in between page_freeze_refs() and
521 * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page
522 * is on its way to being freed; but it is an anomaly to bear in mind. 513 * is on its way to being freed; but it is an anomaly to bear in mind.
523 */ 514 */
524static struct page *get_ksm_page(struct stable_node *stable_node, bool locked) 515static struct page *get_ksm_page(struct stable_node *stable_node, bool locked)
525{ 516{
526 struct page *page; 517 struct page *page;
527 void *expected_mapping; 518 void *expected_mapping;
519 unsigned long kpfn;
528 520
529 page = pfn_to_page(stable_node->kpfn);
530 expected_mapping = (void *)stable_node + 521 expected_mapping = (void *)stable_node +
531 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); 522 (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM);
532 if (page->mapping != expected_mapping) 523again:
533 goto stale; 524 kpfn = ACCESS_ONCE(stable_node->kpfn);
534 if (!get_page_unless_zero(page)) 525 page = pfn_to_page(kpfn);
526
527 /*
528 * page is computed from kpfn, so on most architectures reading
529 * page->mapping is naturally ordered after reading node->kpfn,
530 * but on Alpha we need to be more careful.
531 */
532 smp_read_barrier_depends();
533 if (ACCESS_ONCE(page->mapping) != expected_mapping)
535 goto stale; 534 goto stale;
536 if (page->mapping != expected_mapping) { 535
536 /*
537 * We cannot do anything with the page while its refcount is 0.
538 * Usually 0 means free, or tail of a higher-order page: in which
539 * case this node is no longer referenced, and should be freed;
540 * however, it might mean that the page is under page_freeze_refs().
541 * The __remove_mapping() case is easy, again the node is now stale;
542 * but if page is swapcache in migrate_page_move_mapping(), it might
543 * still be our page, in which case it's essential to keep the node.
544 */
545 while (!get_page_unless_zero(page)) {
546 /*
547 * Another check for page->mapping != expected_mapping would
548 * work here too. We have chosen the !PageSwapCache test to
549 * optimize the common case, when the page is or is about to
550 * be freed: PageSwapCache is cleared (under spin_lock_irq)
551 * in the freeze_refs section of __remove_mapping(); but Anon
552 * page->mapping reset to NULL later, in free_pages_prepare().
553 */
554 if (!PageSwapCache(page))
555 goto stale;
556 cpu_relax();
557 }
558
559 if (ACCESS_ONCE(page->mapping) != expected_mapping) {
537 put_page(page); 560 put_page(page);
538 goto stale; 561 goto stale;
539 } 562 }
563
540 if (locked) { 564 if (locked) {
541 lock_page(page); 565 lock_page(page);
542 if (page->mapping != expected_mapping) { 566 if (ACCESS_ONCE(page->mapping) != expected_mapping) {
543 unlock_page(page); 567 unlock_page(page);
544 put_page(page); 568 put_page(page);
545 goto stale; 569 goto stale;
546 } 570 }
547 } 571 }
548 return page; 572 return page;
573
549stale: 574stale:
575 /*
576 * We come here from above when page->mapping or !PageSwapCache
577 * suggests that the node is stale; but it might be under migration.
578 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
579 * before checking whether node->kpfn has been changed.
580 */
581 smp_rmb();
582 if (ACCESS_ONCE(stable_node->kpfn) != kpfn)
583 goto again;
550 remove_node_from_stable_tree(stable_node); 584 remove_node_from_stable_tree(stable_node);
551 return NULL; 585 return NULL;
552} 586}
@@ -1103,15 +1137,25 @@ static struct page *stable_tree_search(struct page *page)
1103 return NULL; 1137 return NULL;
1104 1138
1105 ret = memcmp_pages(page, tree_page); 1139 ret = memcmp_pages(page, tree_page);
1140 put_page(tree_page);
1106 1141
1107 if (ret < 0) { 1142 if (ret < 0)
1108 put_page(tree_page);
1109 node = node->rb_left; 1143 node = node->rb_left;
1110 } else if (ret > 0) { 1144 else if (ret > 0)
1111 put_page(tree_page);
1112 node = node->rb_right; 1145 node = node->rb_right;
1113 } else 1146 else {
1147 /*
1148 * Lock and unlock the stable_node's page (which
1149 * might already have been migrated) so that page
1150 * migration is sure to notice its raised count.
1151 * It would be more elegant to return stable_node
1152 * than kpage, but that involves more changes.
1153 */
1154 tree_page = get_ksm_page(stable_node, true);
1155 if (tree_page)
1156 unlock_page(tree_page);
1114 return tree_page; 1157 return tree_page;
1158 }
1115 } 1159 }
1116 1160
1117 return NULL; 1161 return NULL;
@@ -1903,6 +1947,14 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
1903 if (stable_node) { 1947 if (stable_node) {
1904 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); 1948 VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage));
1905 stable_node->kpfn = page_to_pfn(newpage); 1949 stable_node->kpfn = page_to_pfn(newpage);
1950 /*
1951 * newpage->mapping was set in advance; now we need smp_wmb()
1952 * to make sure that the new stable_node->kpfn is visible
1953 * to get_ksm_page() before it can see that oldpage->mapping
1954 * has gone stale (or that PageSwapCache has been cleared).
1955 */
1956 smp_wmb();
1957 set_page_stable_node(oldpage, NULL);
1906 } 1958 }
1907} 1959}
1908#endif /* CONFIG_MIGRATION */ 1960#endif /* CONFIG_MIGRATION */
diff --git a/mm/migrate.c b/mm/migrate.c
index de5c371a7969..e545ce7ddc17 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -464,7 +464,10 @@ void migrate_page_copy(struct page *newpage, struct page *page)
464 464
465 mlock_migrate_page(newpage, page); 465 mlock_migrate_page(newpage, page);
466 ksm_migrate_page(newpage, page); 466 ksm_migrate_page(newpage, page);
467 467 /*
468 * Please do not reorder this without considering how mm/ksm.c's
469 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
470 */
468 ClearPageSwapCache(page); 471 ClearPageSwapCache(page);
469 ClearPagePrivate(page); 472 ClearPagePrivate(page);
470 set_page_private(page, 0); 473 set_page_private(page, 0);