diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/ksm.c | 94 | ||||
-rw-r--r-- | mm/migrate.c | 5 |
2 files changed, 77 insertions, 22 deletions
@@ -499,6 +499,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) | |||
499 | * In which case we can trust the content of the page, and it | 499 | * In which case we can trust the content of the page, and it |
500 | * returns the gotten page; but if the page has now been zapped, | 500 | * returns the gotten page; but if the page has now been zapped, |
501 | * remove the stale node from the stable tree and return NULL. | 501 | * remove the stale node from the stable tree and return NULL. |
502 | * But beware, the stable node's page might be being migrated. | ||
502 | * | 503 | * |
503 | * You would expect the stable_node to hold a reference to the ksm page. | 504 | * You would expect the stable_node to hold a reference to the ksm page. |
504 | * But if it increments the page's count, swapping out has to wait for | 505 | * But if it increments the page's count, swapping out has to wait for |
@@ -509,44 +510,77 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) | |||
509 | * pointing back to this stable node. This relies on freeing a PageAnon | 510 | * pointing back to this stable node. This relies on freeing a PageAnon |
510 | * page to reset its page->mapping to NULL, and relies on no other use of | 511 | * page to reset its page->mapping to NULL, and relies on no other use of |
511 | * a page to put something that might look like our key in page->mapping. | 512 | * a page to put something that might look like our key in page->mapping. |
512 | * | ||
513 | * include/linux/pagemap.h page_cache_get_speculative() is a good reference, | ||
514 | * but this is different - made simpler by ksm_thread_mutex being held, but | ||
515 | * interesting for assuming that no other use of the struct page could ever | ||
516 | * put our expected_mapping into page->mapping (or a field of the union which | ||
517 | * coincides with page->mapping). | ||
518 | * | ||
519 | * Note: it is possible that get_ksm_page() will return NULL one moment, | ||
520 | * then page the next, if the page is in between page_freeze_refs() and | ||
521 | * page_unfreeze_refs(): this shouldn't be a problem anywhere, the page | ||
522 | * is on its way to being freed; but it is an anomaly to bear in mind. | 513 | * is on its way to being freed; but it is an anomaly to bear in mind. |
523 | */ | 514 | */ |
524 | static struct page *get_ksm_page(struct stable_node *stable_node, bool locked) | 515 | static struct page *get_ksm_page(struct stable_node *stable_node, bool locked) |
525 | { | 516 | { |
526 | struct page *page; | 517 | struct page *page; |
527 | void *expected_mapping; | 518 | void *expected_mapping; |
519 | unsigned long kpfn; | ||
528 | 520 | ||
529 | page = pfn_to_page(stable_node->kpfn); | ||
530 | expected_mapping = (void *)stable_node + | 521 | expected_mapping = (void *)stable_node + |
531 | (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); | 522 | (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM); |
532 | if (page->mapping != expected_mapping) | 523 | again: |
533 | goto stale; | 524 | kpfn = ACCESS_ONCE(stable_node->kpfn); |
534 | if (!get_page_unless_zero(page)) | 525 | page = pfn_to_page(kpfn); |
526 | |||
527 | /* | ||
528 | * page is computed from kpfn, so on most architectures reading | ||
529 | * page->mapping is naturally ordered after reading node->kpfn, | ||
530 | * but on Alpha we need to be more careful. | ||
531 | */ | ||
532 | smp_read_barrier_depends(); | ||
533 | if (ACCESS_ONCE(page->mapping) != expected_mapping) | ||
535 | goto stale; | 534 | goto stale; |
536 | if (page->mapping != expected_mapping) { | 535 | |
536 | /* | ||
537 | * We cannot do anything with the page while its refcount is 0. | ||
538 | * Usually 0 means free, or tail of a higher-order page: in which | ||
539 | * case this node is no longer referenced, and should be freed; | ||
540 | * however, it might mean that the page is under page_freeze_refs(). | ||
541 | * The __remove_mapping() case is easy, again the node is now stale; | ||
542 | * but if page is swapcache in migrate_page_move_mapping(), it might | ||
543 | * still be our page, in which case it's essential to keep the node. | ||
544 | */ | ||
545 | while (!get_page_unless_zero(page)) { | ||
546 | /* | ||
547 | * Another check for page->mapping != expected_mapping would | ||
548 | * work here too. We have chosen the !PageSwapCache test to | ||
549 | * optimize the common case, when the page is or is about to | ||
550 | * be freed: PageSwapCache is cleared (under spin_lock_irq) | ||
551 | * in the freeze_refs section of __remove_mapping(); but Anon | ||
552 | * page->mapping reset to NULL later, in free_pages_prepare(). | ||
553 | */ | ||
554 | if (!PageSwapCache(page)) | ||
555 | goto stale; | ||
556 | cpu_relax(); | ||
557 | } | ||
558 | |||
559 | if (ACCESS_ONCE(page->mapping) != expected_mapping) { | ||
537 | put_page(page); | 560 | put_page(page); |
538 | goto stale; | 561 | goto stale; |
539 | } | 562 | } |
563 | |||
540 | if (locked) { | 564 | if (locked) { |
541 | lock_page(page); | 565 | lock_page(page); |
542 | if (page->mapping != expected_mapping) { | 566 | if (ACCESS_ONCE(page->mapping) != expected_mapping) { |
543 | unlock_page(page); | 567 | unlock_page(page); |
544 | put_page(page); | 568 | put_page(page); |
545 | goto stale; | 569 | goto stale; |
546 | } | 570 | } |
547 | } | 571 | } |
548 | return page; | 572 | return page; |
573 | |||
549 | stale: | 574 | stale: |
575 | /* | ||
576 | * We come here from above when page->mapping or !PageSwapCache | ||
577 | * suggests that the node is stale; but it might be under migration. | ||
578 | * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(), | ||
579 | * before checking whether node->kpfn has been changed. | ||
580 | */ | ||
581 | smp_rmb(); | ||
582 | if (ACCESS_ONCE(stable_node->kpfn) != kpfn) | ||
583 | goto again; | ||
550 | remove_node_from_stable_tree(stable_node); | 584 | remove_node_from_stable_tree(stable_node); |
551 | return NULL; | 585 | return NULL; |
552 | } | 586 | } |
@@ -1103,15 +1137,25 @@ static struct page *stable_tree_search(struct page *page) | |||
1103 | return NULL; | 1137 | return NULL; |
1104 | 1138 | ||
1105 | ret = memcmp_pages(page, tree_page); | 1139 | ret = memcmp_pages(page, tree_page); |
1140 | put_page(tree_page); | ||
1106 | 1141 | ||
1107 | if (ret < 0) { | 1142 | if (ret < 0) |
1108 | put_page(tree_page); | ||
1109 | node = node->rb_left; | 1143 | node = node->rb_left; |
1110 | } else if (ret > 0) { | 1144 | else if (ret > 0) |
1111 | put_page(tree_page); | ||
1112 | node = node->rb_right; | 1145 | node = node->rb_right; |
1113 | } else | 1146 | else { |
1147 | /* | ||
1148 | * Lock and unlock the stable_node's page (which | ||
1149 | * might already have been migrated) so that page | ||
1150 | * migration is sure to notice its raised count. | ||
1151 | * It would be more elegant to return stable_node | ||
1152 | * than kpage, but that involves more changes. | ||
1153 | */ | ||
1154 | tree_page = get_ksm_page(stable_node, true); | ||
1155 | if (tree_page) | ||
1156 | unlock_page(tree_page); | ||
1114 | return tree_page; | 1157 | return tree_page; |
1158 | } | ||
1115 | } | 1159 | } |
1116 | 1160 | ||
1117 | return NULL; | 1161 | return NULL; |
@@ -1903,6 +1947,14 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage) | |||
1903 | if (stable_node) { | 1947 | if (stable_node) { |
1904 | VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); | 1948 | VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); |
1905 | stable_node->kpfn = page_to_pfn(newpage); | 1949 | stable_node->kpfn = page_to_pfn(newpage); |
1950 | /* | ||
1951 | * newpage->mapping was set in advance; now we need smp_wmb() | ||
1952 | * to make sure that the new stable_node->kpfn is visible | ||
1953 | * to get_ksm_page() before it can see that oldpage->mapping | ||
1954 | * has gone stale (or that PageSwapCache has been cleared). | ||
1955 | */ | ||
1956 | smp_wmb(); | ||
1957 | set_page_stable_node(oldpage, NULL); | ||
1906 | } | 1958 | } |
1907 | } | 1959 | } |
1908 | #endif /* CONFIG_MIGRATION */ | 1960 | #endif /* CONFIG_MIGRATION */ |
diff --git a/mm/migrate.c b/mm/migrate.c index de5c371a7969..e545ce7ddc17 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -464,7 +464,10 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
464 | 464 | ||
465 | mlock_migrate_page(newpage, page); | 465 | mlock_migrate_page(newpage, page); |
466 | ksm_migrate_page(newpage, page); | 466 | ksm_migrate_page(newpage, page); |
467 | 467 | /* | |
468 | * Please do not reorder this without considering how mm/ksm.c's | ||
469 | * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache(). | ||
470 | */ | ||
468 | ClearPageSwapCache(page); | 471 | ClearPageSwapCache(page); |
469 | ClearPagePrivate(page); | 472 | ClearPagePrivate(page); |
470 | set_page_private(page, 0); | 473 | set_page_private(page, 0); |