aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c49
1 files changed, 39 insertions, 10 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 7e91b5f9f690..09e4b1be7b67 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -572,7 +572,7 @@ out:
572 * covered by this vma. 572 * covered by this vma.
573 */ 573 */
574 574
575static inline void 575static inline unsigned long
576copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, 576copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
577 pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, 577 pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
578 unsigned long addr, int *rss) 578 unsigned long addr, int *rss)
@@ -586,7 +586,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
586 if (!pte_file(pte)) { 586 if (!pte_file(pte)) {
587 swp_entry_t entry = pte_to_swp_entry(pte); 587 swp_entry_t entry = pte_to_swp_entry(pte);
588 588
589 swap_duplicate(entry); 589 if (swap_duplicate(entry) < 0)
590 return entry.val;
591
590 /* make sure dst_mm is on swapoff's mmlist. */ 592 /* make sure dst_mm is on swapoff's mmlist. */
591 if (unlikely(list_empty(&dst_mm->mmlist))) { 593 if (unlikely(list_empty(&dst_mm->mmlist))) {
592 spin_lock(&mmlist_lock); 594 spin_lock(&mmlist_lock);
@@ -635,16 +637,19 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
635 637
636out_set_pte: 638out_set_pte:
637 set_pte_at(dst_mm, addr, dst_pte, pte); 639 set_pte_at(dst_mm, addr, dst_pte, pte);
640 return 0;
638} 641}
639 642
640static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 643static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
641 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, 644 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
642 unsigned long addr, unsigned long end) 645 unsigned long addr, unsigned long end)
643{ 646{
647 pte_t *orig_src_pte, *orig_dst_pte;
644 pte_t *src_pte, *dst_pte; 648 pte_t *src_pte, *dst_pte;
645 spinlock_t *src_ptl, *dst_ptl; 649 spinlock_t *src_ptl, *dst_ptl;
646 int progress = 0; 650 int progress = 0;
647 int rss[2]; 651 int rss[2];
652 swp_entry_t entry = (swp_entry_t){0};
648 653
649again: 654again:
650 rss[1] = rss[0] = 0; 655 rss[1] = rss[0] = 0;
@@ -654,6 +659,8 @@ again:
654 src_pte = pte_offset_map_nested(src_pmd, addr); 659 src_pte = pte_offset_map_nested(src_pmd, addr);
655 src_ptl = pte_lockptr(src_mm, src_pmd); 660 src_ptl = pte_lockptr(src_mm, src_pmd);
656 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); 661 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
662 orig_src_pte = src_pte;
663 orig_dst_pte = dst_pte;
657 arch_enter_lazy_mmu_mode(); 664 arch_enter_lazy_mmu_mode();
658 665
659 do { 666 do {
@@ -671,16 +678,25 @@ again:
671 progress++; 678 progress++;
672 continue; 679 continue;
673 } 680 }
674 copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); 681 entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
682 vma, addr, rss);
683 if (entry.val)
684 break;
675 progress += 8; 685 progress += 8;
676 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); 686 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
677 687
678 arch_leave_lazy_mmu_mode(); 688 arch_leave_lazy_mmu_mode();
679 spin_unlock(src_ptl); 689 spin_unlock(src_ptl);
680 pte_unmap_nested(src_pte - 1); 690 pte_unmap_nested(orig_src_pte);
681 add_mm_rss(dst_mm, rss[0], rss[1]); 691 add_mm_rss(dst_mm, rss[0], rss[1]);
682 pte_unmap_unlock(dst_pte - 1, dst_ptl); 692 pte_unmap_unlock(orig_dst_pte, dst_ptl);
683 cond_resched(); 693 cond_resched();
694
695 if (entry.val) {
696 if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
697 return -ENOMEM;
698 progress = 0;
699 }
684 if (addr != end) 700 if (addr != end)
685 goto again; 701 goto again;
686 return 0; 702 return 0;
@@ -940,6 +956,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
940 details = NULL; 956 details = NULL;
941 957
942 BUG_ON(addr >= end); 958 BUG_ON(addr >= end);
959 mem_cgroup_uncharge_start();
943 tlb_start_vma(tlb, vma); 960 tlb_start_vma(tlb, vma);
944 pgd = pgd_offset(vma->vm_mm, addr); 961 pgd = pgd_offset(vma->vm_mm, addr);
945 do { 962 do {
@@ -952,6 +969,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb,
952 zap_work, details); 969 zap_work, details);
953 } while (pgd++, addr = next, (addr != end && *zap_work > 0)); 970 } while (pgd++, addr = next, (addr != end && *zap_work > 0));
954 tlb_end_vma(tlb, vma); 971 tlb_end_vma(tlb, vma);
972 mem_cgroup_uncharge_end();
955 973
956 return addr; 974 return addr;
957} 975}
@@ -1820,10 +1838,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
1820 token = pmd_pgtable(*pmd); 1838 token = pmd_pgtable(*pmd);
1821 1839
1822 do { 1840 do {
1823 err = fn(pte, token, addr, data); 1841 err = fn(pte++, token, addr, data);
1824 if (err) 1842 if (err)
1825 break; 1843 break;
1826 } while (pte++, addr += PAGE_SIZE, addr != end); 1844 } while (addr += PAGE_SIZE, addr != end);
1827 1845
1828 arch_leave_lazy_mmu_mode(); 1846 arch_leave_lazy_mmu_mode();
1829 1847
@@ -2511,7 +2529,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2511 ret = VM_FAULT_HWPOISON; 2529 ret = VM_FAULT_HWPOISON;
2512 } else { 2530 } else {
2513 print_bad_pte(vma, address, orig_pte, NULL); 2531 print_bad_pte(vma, address, orig_pte, NULL);
2514 ret = VM_FAULT_OOM; 2532 ret = VM_FAULT_SIGBUS;
2515 } 2533 }
2516 goto out; 2534 goto out;
2517 } 2535 }
@@ -2537,14 +2555,24 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2537 ret = VM_FAULT_MAJOR; 2555 ret = VM_FAULT_MAJOR;
2538 count_vm_event(PGMAJFAULT); 2556 count_vm_event(PGMAJFAULT);
2539 } else if (PageHWPoison(page)) { 2557 } else if (PageHWPoison(page)) {
2558 /*
2559 * hwpoisoned dirty swapcache pages are kept for killing
2560 * owner processes (which may be unknown at hwpoison time)
2561 */
2540 ret = VM_FAULT_HWPOISON; 2562 ret = VM_FAULT_HWPOISON;
2541 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2563 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2542 goto out; 2564 goto out_release;
2543 } 2565 }
2544 2566
2545 lock_page(page); 2567 lock_page(page);
2546 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2568 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2547 2569
2570 page = ksm_might_need_to_copy(page, vma, address);
2571 if (!page) {
2572 ret = VM_FAULT_OOM;
2573 goto out;
2574 }
2575
2548 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { 2576 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
2549 ret = VM_FAULT_OOM; 2577 ret = VM_FAULT_OOM;
2550 goto out_page; 2578 goto out_page;
@@ -2611,6 +2639,7 @@ out_nomap:
2611 pte_unmap_unlock(page_table, ptl); 2639 pte_unmap_unlock(page_table, ptl);
2612out_page: 2640out_page:
2613 unlock_page(page); 2641 unlock_page(page);
2642out_release:
2614 page_cache_release(page); 2643 page_cache_release(page);
2615 return ret; 2644 return ret;
2616} 2645}
@@ -2906,7 +2935,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2906 * Page table corrupted: show pte and kill process. 2935 * Page table corrupted: show pte and kill process.
2907 */ 2936 */
2908 print_bad_pte(vma, address, orig_pte, NULL); 2937 print_bad_pte(vma, address, orig_pte, NULL);
2909 return VM_FAULT_OOM; 2938 return VM_FAULT_SIGBUS;
2910 } 2939 }
2911 2940
2912 pgoff = pte_to_pgoff(orig_pte); 2941 pgoff = pte_to_pgoff(orig_pte);