aboutsummaryrefslogtreecommitdiffstats
path: root/mm/ksm.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/ksm.c')
-rw-r--r--mm/ksm.c126
1 files changed, 98 insertions, 28 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index 65ab5c7067d9..9a68b0cf0a1c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -34,6 +34,8 @@
34#include <linux/swap.h> 34#include <linux/swap.h>
35#include <linux/ksm.h> 35#include <linux/ksm.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/freezer.h>
38#include <linux/oom.h>
37 39
38#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
39#include "internal.h" 41#include "internal.h"
@@ -300,20 +302,6 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
300 return rmap_item->address & STABLE_FLAG; 302 return rmap_item->address & STABLE_FLAG;
301} 303}
302 304
303static void hold_anon_vma(struct rmap_item *rmap_item,
304 struct anon_vma *anon_vma)
305{
306 rmap_item->anon_vma = anon_vma;
307 get_anon_vma(anon_vma);
308}
309
310static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
311{
312 struct anon_vma *anon_vma = rmap_item->anon_vma;
313
314 drop_anon_vma(anon_vma);
315}
316
317/* 305/*
318 * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's 306 * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
319 * page tables after it has passed through ksm_exit() - which, if necessary, 307 * page tables after it has passed through ksm_exit() - which, if necessary,
@@ -396,7 +384,7 @@ static void break_cow(struct rmap_item *rmap_item)
396 * It is not an accident that whenever we want to break COW 384 * It is not an accident that whenever we want to break COW
397 * to undo, we also need to drop a reference to the anon_vma. 385 * to undo, we also need to drop a reference to the anon_vma.
398 */ 386 */
399 ksm_drop_anon_vma(rmap_item); 387 put_anon_vma(rmap_item->anon_vma);
400 388
401 down_read(&mm->mmap_sem); 389 down_read(&mm->mmap_sem);
402 if (ksm_test_exit(mm)) 390 if (ksm_test_exit(mm))
@@ -411,6 +399,20 @@ out:
411 up_read(&mm->mmap_sem); 399 up_read(&mm->mmap_sem);
412} 400}
413 401
402static struct page *page_trans_compound_anon(struct page *page)
403{
404 if (PageTransCompound(page)) {
405 struct page *head = compound_trans_head(page);
406 /*
407 * head may actually be splitted and freed from under
408 * us but it's ok here.
409 */
410 if (PageAnon(head))
411 return head;
412 }
413 return NULL;
414}
415
414static struct page *get_mergeable_page(struct rmap_item *rmap_item) 416static struct page *get_mergeable_page(struct rmap_item *rmap_item)
415{ 417{
416 struct mm_struct *mm = rmap_item->mm; 418 struct mm_struct *mm = rmap_item->mm;
@@ -430,7 +432,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
430 page = follow_page(vma, addr, FOLL_GET); 432 page = follow_page(vma, addr, FOLL_GET);
431 if (IS_ERR_OR_NULL(page)) 433 if (IS_ERR_OR_NULL(page))
432 goto out; 434 goto out;
433 if (PageAnon(page)) { 435 if (PageAnon(page) || page_trans_compound_anon(page)) {
434 flush_anon_page(vma, page, addr); 436 flush_anon_page(vma, page, addr);
435 flush_dcache_page(page); 437 flush_dcache_page(page);
436 } else { 438 } else {
@@ -451,7 +453,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
451 ksm_pages_sharing--; 453 ksm_pages_sharing--;
452 else 454 else
453 ksm_pages_shared--; 455 ksm_pages_shared--;
454 ksm_drop_anon_vma(rmap_item); 456 put_anon_vma(rmap_item->anon_vma);
455 rmap_item->address &= PAGE_MASK; 457 rmap_item->address &= PAGE_MASK;
456 cond_resched(); 458 cond_resched();
457 } 459 }
@@ -539,7 +541,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
539 else 541 else
540 ksm_pages_shared--; 542 ksm_pages_shared--;
541 543
542 ksm_drop_anon_vma(rmap_item); 544 put_anon_vma(rmap_item->anon_vma);
543 rmap_item->address &= PAGE_MASK; 545 rmap_item->address &= PAGE_MASK;
544 546
545 } else if (rmap_item->address & UNSTABLE_FLAG) { 547 } else if (rmap_item->address & UNSTABLE_FLAG) {
@@ -708,6 +710,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
708 if (addr == -EFAULT) 710 if (addr == -EFAULT)
709 goto out; 711 goto out;
710 712
713 BUG_ON(PageTransCompound(page));
711 ptep = page_check_address(page, mm, addr, &ptl, 0); 714 ptep = page_check_address(page, mm, addr, &ptl, 0);
712 if (!ptep) 715 if (!ptep)
713 goto out; 716 goto out;
@@ -718,7 +721,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
718 swapped = PageSwapCache(page); 721 swapped = PageSwapCache(page);
719 flush_cache_page(vma, addr, page_to_pfn(page)); 722 flush_cache_page(vma, addr, page_to_pfn(page));
720 /* 723 /*
721 * Ok this is tricky, when get_user_pages_fast() run it doesnt 724 * Ok this is tricky, when get_user_pages_fast() run it doesn't
722 * take any lock, therefore the check that we are going to make 725 * take any lock, therefore the check that we are going to make
723 * with the pagecount against the mapcount is racey and 726 * with the pagecount against the mapcount is racey and
724 * O_DIRECT can happen right after the check. 727 * O_DIRECT can happen right after the check.
@@ -783,6 +786,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
783 goto out; 786 goto out;
784 787
785 pmd = pmd_offset(pud, addr); 788 pmd = pmd_offset(pud, addr);
789 BUG_ON(pmd_trans_huge(*pmd));
786 if (!pmd_present(*pmd)) 790 if (!pmd_present(*pmd))
787 goto out; 791 goto out;
788 792
@@ -800,6 +804,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
800 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); 804 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
801 805
802 page_remove_rmap(page); 806 page_remove_rmap(page);
807 if (!page_mapped(page))
808 try_to_free_swap(page);
803 put_page(page); 809 put_page(page);
804 810
805 pte_unmap_unlock(ptep, ptl); 811 pte_unmap_unlock(ptep, ptl);
@@ -808,6 +814,33 @@ out:
808 return err; 814 return err;
809} 815}
810 816
817static int page_trans_compound_anon_split(struct page *page)
818{
819 int ret = 0;
820 struct page *transhuge_head = page_trans_compound_anon(page);
821 if (transhuge_head) {
822 /* Get the reference on the head to split it. */
823 if (get_page_unless_zero(transhuge_head)) {
824 /*
825 * Recheck we got the reference while the head
826 * was still anonymous.
827 */
828 if (PageAnon(transhuge_head))
829 ret = split_huge_page(transhuge_head);
830 else
831 /*
832 * Retry later if split_huge_page run
833 * from under us.
834 */
835 ret = 1;
836 put_page(transhuge_head);
837 } else
838 /* Retry later if split_huge_page run from under us. */
839 ret = 1;
840 }
841 return ret;
842}
843
811/* 844/*
812 * try_to_merge_one_page - take two pages and merge them into one 845 * try_to_merge_one_page - take two pages and merge them into one
813 * @vma: the vma that holds the pte pointing to page 846 * @vma: the vma that holds the pte pointing to page
@@ -828,6 +861,9 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
828 861
829 if (!(vma->vm_flags & VM_MERGEABLE)) 862 if (!(vma->vm_flags & VM_MERGEABLE))
830 goto out; 863 goto out;
864 if (PageTransCompound(page) && page_trans_compound_anon_split(page))
865 goto out;
866 BUG_ON(PageTransCompound(page));
831 if (!PageAnon(page)) 867 if (!PageAnon(page))
832 goto out; 868 goto out;
833 869
@@ -900,7 +936,8 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
900 goto out; 936 goto out;
901 937
902 /* Must get reference to anon_vma while still holding mmap_sem */ 938 /* Must get reference to anon_vma while still holding mmap_sem */
903 hold_anon_vma(rmap_item, vma->anon_vma); 939 rmap_item->anon_vma = vma->anon_vma;
940 get_anon_vma(vma->anon_vma);
904out: 941out:
905 up_read(&mm->mmap_sem); 942 up_read(&mm->mmap_sem);
906 return err; 943 return err;
@@ -1247,12 +1284,30 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
1247 1284
1248 slot = ksm_scan.mm_slot; 1285 slot = ksm_scan.mm_slot;
1249 if (slot == &ksm_mm_head) { 1286 if (slot == &ksm_mm_head) {
1287 /*
1288 * A number of pages can hang around indefinitely on per-cpu
1289 * pagevecs, raised page count preventing write_protect_page
1290 * from merging them. Though it doesn't really matter much,
1291 * it is puzzling to see some stuck in pages_volatile until
1292 * other activity jostles them out, and they also prevented
1293 * LTP's KSM test from succeeding deterministically; so drain
1294 * them here (here rather than on entry to ksm_do_scan(),
1295 * so we don't IPI too often when pages_to_scan is set low).
1296 */
1297 lru_add_drain_all();
1298
1250 root_unstable_tree = RB_ROOT; 1299 root_unstable_tree = RB_ROOT;
1251 1300
1252 spin_lock(&ksm_mmlist_lock); 1301 spin_lock(&ksm_mmlist_lock);
1253 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list); 1302 slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
1254 ksm_scan.mm_slot = slot; 1303 ksm_scan.mm_slot = slot;
1255 spin_unlock(&ksm_mmlist_lock); 1304 spin_unlock(&ksm_mmlist_lock);
1305 /*
1306 * Although we tested list_empty() above, a racing __ksm_exit
1307 * of the last mm on the list may have removed it since then.
1308 */
1309 if (slot == &ksm_mm_head)
1310 return NULL;
1256next_mm: 1311next_mm:
1257 ksm_scan.address = 0; 1312 ksm_scan.address = 0;
1258 ksm_scan.rmap_list = &slot->rmap_list; 1313 ksm_scan.rmap_list = &slot->rmap_list;
@@ -1277,7 +1332,13 @@ next_mm:
1277 if (ksm_test_exit(mm)) 1332 if (ksm_test_exit(mm))
1278 break; 1333 break;
1279 *page = follow_page(vma, ksm_scan.address, FOLL_GET); 1334 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1280 if (!IS_ERR_OR_NULL(*page) && PageAnon(*page)) { 1335 if (IS_ERR_OR_NULL(*page)) {
1336 ksm_scan.address += PAGE_SIZE;
1337 cond_resched();
1338 continue;
1339 }
1340 if (PageAnon(*page) ||
1341 page_trans_compound_anon(*page)) {
1281 flush_anon_page(vma, *page, ksm_scan.address); 1342 flush_anon_page(vma, *page, ksm_scan.address);
1282 flush_dcache_page(*page); 1343 flush_dcache_page(*page);
1283 rmap_item = get_next_rmap_item(slot, 1344 rmap_item = get_next_rmap_item(slot,
@@ -1291,8 +1352,7 @@ next_mm:
1291 up_read(&mm->mmap_sem); 1352 up_read(&mm->mmap_sem);
1292 return rmap_item; 1353 return rmap_item;
1293 } 1354 }
1294 if (!IS_ERR_OR_NULL(*page)) 1355 put_page(*page);
1295 put_page(*page);
1296 ksm_scan.address += PAGE_SIZE; 1356 ksm_scan.address += PAGE_SIZE;
1297 cond_resched(); 1357 cond_resched();
1298 } 1358 }
@@ -1352,7 +1412,7 @@ static void ksm_do_scan(unsigned int scan_npages)
1352 struct rmap_item *rmap_item; 1412 struct rmap_item *rmap_item;
1353 struct page *uninitialized_var(page); 1413 struct page *uninitialized_var(page);
1354 1414
1355 while (scan_npages--) { 1415 while (scan_npages-- && likely(!freezing(current))) {
1356 cond_resched(); 1416 cond_resched();
1357 rmap_item = scan_get_next_rmap_item(&page); 1417 rmap_item = scan_get_next_rmap_item(&page);
1358 if (!rmap_item) 1418 if (!rmap_item)
@@ -1370,6 +1430,7 @@ static int ksmd_should_run(void)
1370 1430
1371static int ksm_scan_thread(void *nothing) 1431static int ksm_scan_thread(void *nothing)
1372{ 1432{
1433 set_freezable();
1373 set_user_nice(current, 5); 1434 set_user_nice(current, 5);
1374 1435
1375 while (!kthread_should_stop()) { 1436 while (!kthread_should_stop()) {
@@ -1378,11 +1439,13 @@ static int ksm_scan_thread(void *nothing)
1378 ksm_do_scan(ksm_thread_pages_to_scan); 1439 ksm_do_scan(ksm_thread_pages_to_scan);
1379 mutex_unlock(&ksm_thread_mutex); 1440 mutex_unlock(&ksm_thread_mutex);
1380 1441
1442 try_to_freeze();
1443
1381 if (ksmd_should_run()) { 1444 if (ksmd_should_run()) {
1382 schedule_timeout_interruptible( 1445 schedule_timeout_interruptible(
1383 msecs_to_jiffies(ksm_thread_sleep_millisecs)); 1446 msecs_to_jiffies(ksm_thread_sleep_millisecs));
1384 } else { 1447 } else {
1385 wait_event_interruptible(ksm_thread_wait, 1448 wait_event_freezable(ksm_thread_wait,
1386 ksmd_should_run() || kthread_should_stop()); 1449 ksmd_should_run() || kthread_should_stop());
1387 } 1450 }
1388 } 1451 }
@@ -1724,8 +1787,13 @@ static int ksm_memory_callback(struct notifier_block *self,
1724 /* 1787 /*
1725 * Keep it very simple for now: just lock out ksmd and 1788 * Keep it very simple for now: just lock out ksmd and
1726 * MADV_UNMERGEABLE while any memory is going offline. 1789 * MADV_UNMERGEABLE while any memory is going offline.
1790 * mutex_lock_nested() is necessary because lockdep was alarmed
1791 * that here we take ksm_thread_mutex inside notifier chain
1792 * mutex, and later take notifier chain mutex inside
1793 * ksm_thread_mutex to unlock it. But that's safe because both
1794 * are inside mem_hotplug_mutex.
1727 */ 1795 */
1728 mutex_lock(&ksm_thread_mutex); 1796 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
1729 break; 1797 break;
1730 1798
1731 case MEM_OFFLINE: 1799 case MEM_OFFLINE:
@@ -1833,9 +1901,11 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
1833 if (ksm_run != flags) { 1901 if (ksm_run != flags) {
1834 ksm_run = flags; 1902 ksm_run = flags;
1835 if (flags & KSM_RUN_UNMERGE) { 1903 if (flags & KSM_RUN_UNMERGE) {
1836 current->flags |= PF_OOM_ORIGIN; 1904 int oom_score_adj;
1905
1906 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1837 err = unmerge_and_remove_all_rmap_items(); 1907 err = unmerge_and_remove_all_rmap_items();
1838 current->flags &= ~PF_OOM_ORIGIN; 1908 test_set_oom_score_adj(oom_score_adj);
1839 if (err) { 1909 if (err) {
1840 ksm_run = KSM_RUN_STOP; 1910 ksm_run = KSM_RUN_STOP;
1841 count = err; 1911 count = err;