diff options
Diffstat (limited to 'mm/ksm.c')
-rw-r--r-- | mm/ksm.c | 88 |
1 files changed, 81 insertions, 7 deletions
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
35 | #include <linux/ksm.h> | 35 | #include <linux/ksm.h> |
36 | #include <linux/hash.h> | 36 | #include <linux/hash.h> |
37 | #include <linux/freezer.h> | ||
37 | 38 | ||
38 | #include <asm/tlbflush.h> | 39 | #include <asm/tlbflush.h> |
39 | #include "internal.h" | 40 | #include "internal.h" |
@@ -411,6 +412,20 @@ out: | |||
411 | up_read(&mm->mmap_sem); | 412 | up_read(&mm->mmap_sem); |
412 | } | 413 | } |
413 | 414 | ||
415 | static struct page *page_trans_compound_anon(struct page *page) | ||
416 | { | ||
417 | if (PageTransCompound(page)) { | ||
418 | struct page *head = compound_trans_head(page); | ||
419 | /* | ||
420 | * head may actually be splitted and freed from under | ||
421 | * us but it's ok here. | ||
422 | */ | ||
423 | if (PageAnon(head)) | ||
424 | return head; | ||
425 | } | ||
426 | return NULL; | ||
427 | } | ||
428 | |||
414 | static struct page *get_mergeable_page(struct rmap_item *rmap_item) | 429 | static struct page *get_mergeable_page(struct rmap_item *rmap_item) |
415 | { | 430 | { |
416 | struct mm_struct *mm = rmap_item->mm; | 431 | struct mm_struct *mm = rmap_item->mm; |
@@ -430,7 +445,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) | |||
430 | page = follow_page(vma, addr, FOLL_GET); | 445 | page = follow_page(vma, addr, FOLL_GET); |
431 | if (IS_ERR_OR_NULL(page)) | 446 | if (IS_ERR_OR_NULL(page)) |
432 | goto out; | 447 | goto out; |
433 | if (PageAnon(page)) { | 448 | if (PageAnon(page) || page_trans_compound_anon(page)) { |
434 | flush_anon_page(vma, page, addr); | 449 | flush_anon_page(vma, page, addr); |
435 | flush_dcache_page(page); | 450 | flush_dcache_page(page); |
436 | } else { | 451 | } else { |
@@ -708,6 +723,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, | |||
708 | if (addr == -EFAULT) | 723 | if (addr == -EFAULT) |
709 | goto out; | 724 | goto out; |
710 | 725 | ||
726 | BUG_ON(PageTransCompound(page)); | ||
711 | ptep = page_check_address(page, mm, addr, &ptl, 0); | 727 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
712 | if (!ptep) | 728 | if (!ptep) |
713 | goto out; | 729 | goto out; |
@@ -783,6 +799,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
783 | goto out; | 799 | goto out; |
784 | 800 | ||
785 | pmd = pmd_offset(pud, addr); | 801 | pmd = pmd_offset(pud, addr); |
802 | BUG_ON(pmd_trans_huge(*pmd)); | ||
786 | if (!pmd_present(*pmd)) | 803 | if (!pmd_present(*pmd)) |
787 | goto out; | 804 | goto out; |
788 | 805 | ||
@@ -800,6 +817,8 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
800 | set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); | 817 | set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); |
801 | 818 | ||
802 | page_remove_rmap(page); | 819 | page_remove_rmap(page); |
820 | if (!page_mapped(page)) | ||
821 | try_to_free_swap(page); | ||
803 | put_page(page); | 822 | put_page(page); |
804 | 823 | ||
805 | pte_unmap_unlock(ptep, ptl); | 824 | pte_unmap_unlock(ptep, ptl); |
@@ -808,6 +827,33 @@ out: | |||
808 | return err; | 827 | return err; |
809 | } | 828 | } |
810 | 829 | ||
830 | static int page_trans_compound_anon_split(struct page *page) | ||
831 | { | ||
832 | int ret = 0; | ||
833 | struct page *transhuge_head = page_trans_compound_anon(page); | ||
834 | if (transhuge_head) { | ||
835 | /* Get the reference on the head to split it. */ | ||
836 | if (get_page_unless_zero(transhuge_head)) { | ||
837 | /* | ||
838 | * Recheck we got the reference while the head | ||
839 | * was still anonymous. | ||
840 | */ | ||
841 | if (PageAnon(transhuge_head)) | ||
842 | ret = split_huge_page(transhuge_head); | ||
843 | else | ||
844 | /* | ||
845 | * Retry later if split_huge_page run | ||
846 | * from under us. | ||
847 | */ | ||
848 | ret = 1; | ||
849 | put_page(transhuge_head); | ||
850 | } else | ||
851 | /* Retry later if split_huge_page run from under us. */ | ||
852 | ret = 1; | ||
853 | } | ||
854 | return ret; | ||
855 | } | ||
856 | |||
811 | /* | 857 | /* |
812 | * try_to_merge_one_page - take two pages and merge them into one | 858 | * try_to_merge_one_page - take two pages and merge them into one |
813 | * @vma: the vma that holds the pte pointing to page | 859 | * @vma: the vma that holds the pte pointing to page |
@@ -828,6 +874,9 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, | |||
828 | 874 | ||
829 | if (!(vma->vm_flags & VM_MERGEABLE)) | 875 | if (!(vma->vm_flags & VM_MERGEABLE)) |
830 | goto out; | 876 | goto out; |
877 | if (PageTransCompound(page) && page_trans_compound_anon_split(page)) | ||
878 | goto out; | ||
879 | BUG_ON(PageTransCompound(page)); | ||
831 | if (!PageAnon(page)) | 880 | if (!PageAnon(page)) |
832 | goto out; | 881 | goto out; |
833 | 882 | ||
@@ -1247,6 +1296,18 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) | |||
1247 | 1296 | ||
1248 | slot = ksm_scan.mm_slot; | 1297 | slot = ksm_scan.mm_slot; |
1249 | if (slot == &ksm_mm_head) { | 1298 | if (slot == &ksm_mm_head) { |
1299 | /* | ||
1300 | * A number of pages can hang around indefinitely on per-cpu | ||
1301 | * pagevecs, raised page count preventing write_protect_page | ||
1302 | * from merging them. Though it doesn't really matter much, | ||
1303 | * it is puzzling to see some stuck in pages_volatile until | ||
1304 | * other activity jostles them out, and they also prevented | ||
1305 | * LTP's KSM test from succeeding deterministically; so drain | ||
1306 | * them here (here rather than on entry to ksm_do_scan(), | ||
1307 | * so we don't IPI too often when pages_to_scan is set low). | ||
1308 | */ | ||
1309 | lru_add_drain_all(); | ||
1310 | |||
1250 | root_unstable_tree = RB_ROOT; | 1311 | root_unstable_tree = RB_ROOT; |
1251 | 1312 | ||
1252 | spin_lock(&ksm_mmlist_lock); | 1313 | spin_lock(&ksm_mmlist_lock); |
@@ -1277,7 +1338,13 @@ next_mm: | |||
1277 | if (ksm_test_exit(mm)) | 1338 | if (ksm_test_exit(mm)) |
1278 | break; | 1339 | break; |
1279 | *page = follow_page(vma, ksm_scan.address, FOLL_GET); | 1340 | *page = follow_page(vma, ksm_scan.address, FOLL_GET); |
1280 | if (!IS_ERR_OR_NULL(*page) && PageAnon(*page)) { | 1341 | if (IS_ERR_OR_NULL(*page)) { |
1342 | ksm_scan.address += PAGE_SIZE; | ||
1343 | cond_resched(); | ||
1344 | continue; | ||
1345 | } | ||
1346 | if (PageAnon(*page) || | ||
1347 | page_trans_compound_anon(*page)) { | ||
1281 | flush_anon_page(vma, *page, ksm_scan.address); | 1348 | flush_anon_page(vma, *page, ksm_scan.address); |
1282 | flush_dcache_page(*page); | 1349 | flush_dcache_page(*page); |
1283 | rmap_item = get_next_rmap_item(slot, | 1350 | rmap_item = get_next_rmap_item(slot, |
@@ -1291,8 +1358,7 @@ next_mm: | |||
1291 | up_read(&mm->mmap_sem); | 1358 | up_read(&mm->mmap_sem); |
1292 | return rmap_item; | 1359 | return rmap_item; |
1293 | } | 1360 | } |
1294 | if (!IS_ERR_OR_NULL(*page)) | 1361 | put_page(*page); |
1295 | put_page(*page); | ||
1296 | ksm_scan.address += PAGE_SIZE; | 1362 | ksm_scan.address += PAGE_SIZE; |
1297 | cond_resched(); | 1363 | cond_resched(); |
1298 | } | 1364 | } |
@@ -1352,7 +1418,7 @@ static void ksm_do_scan(unsigned int scan_npages) | |||
1352 | struct rmap_item *rmap_item; | 1418 | struct rmap_item *rmap_item; |
1353 | struct page *uninitialized_var(page); | 1419 | struct page *uninitialized_var(page); |
1354 | 1420 | ||
1355 | while (scan_npages--) { | 1421 | while (scan_npages-- && likely(!freezing(current))) { |
1356 | cond_resched(); | 1422 | cond_resched(); |
1357 | rmap_item = scan_get_next_rmap_item(&page); | 1423 | rmap_item = scan_get_next_rmap_item(&page); |
1358 | if (!rmap_item) | 1424 | if (!rmap_item) |
@@ -1370,6 +1436,7 @@ static int ksmd_should_run(void) | |||
1370 | 1436 | ||
1371 | static int ksm_scan_thread(void *nothing) | 1437 | static int ksm_scan_thread(void *nothing) |
1372 | { | 1438 | { |
1439 | set_freezable(); | ||
1373 | set_user_nice(current, 5); | 1440 | set_user_nice(current, 5); |
1374 | 1441 | ||
1375 | while (!kthread_should_stop()) { | 1442 | while (!kthread_should_stop()) { |
@@ -1378,11 +1445,13 @@ static int ksm_scan_thread(void *nothing) | |||
1378 | ksm_do_scan(ksm_thread_pages_to_scan); | 1445 | ksm_do_scan(ksm_thread_pages_to_scan); |
1379 | mutex_unlock(&ksm_thread_mutex); | 1446 | mutex_unlock(&ksm_thread_mutex); |
1380 | 1447 | ||
1448 | try_to_freeze(); | ||
1449 | |||
1381 | if (ksmd_should_run()) { | 1450 | if (ksmd_should_run()) { |
1382 | schedule_timeout_interruptible( | 1451 | schedule_timeout_interruptible( |
1383 | msecs_to_jiffies(ksm_thread_sleep_millisecs)); | 1452 | msecs_to_jiffies(ksm_thread_sleep_millisecs)); |
1384 | } else { | 1453 | } else { |
1385 | wait_event_interruptible(ksm_thread_wait, | 1454 | wait_event_freezable(ksm_thread_wait, |
1386 | ksmd_should_run() || kthread_should_stop()); | 1455 | ksmd_should_run() || kthread_should_stop()); |
1387 | } | 1456 | } |
1388 | } | 1457 | } |
@@ -1724,8 +1793,13 @@ static int ksm_memory_callback(struct notifier_block *self, | |||
1724 | /* | 1793 | /* |
1725 | * Keep it very simple for now: just lock out ksmd and | 1794 | * Keep it very simple for now: just lock out ksmd and |
1726 | * MADV_UNMERGEABLE while any memory is going offline. | 1795 | * MADV_UNMERGEABLE while any memory is going offline. |
1796 | * mutex_lock_nested() is necessary because lockdep was alarmed | ||
1797 | * that here we take ksm_thread_mutex inside notifier chain | ||
1798 | * mutex, and later take notifier chain mutex inside | ||
1799 | * ksm_thread_mutex to unlock it. But that's safe because both | ||
1800 | * are inside mem_hotplug_mutex. | ||
1727 | */ | 1801 | */ |
1728 | mutex_lock(&ksm_thread_mutex); | 1802 | mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING); |
1729 | break; | 1803 | break; |
1730 | 1804 | ||
1731 | case MEM_OFFLINE: | 1805 | case MEM_OFFLINE: |