diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 49 |
1 files changed, 39 insertions, 10 deletions
diff --git a/mm/memory.c b/mm/memory.c index 7e91b5f9f690..09e4b1be7b67 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -572,7 +572,7 @@ out: | |||
572 | * covered by this vma. | 572 | * covered by this vma. |
573 | */ | 573 | */ |
574 | 574 | ||
575 | static inline void | 575 | static inline unsigned long |
576 | copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | 576 | copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, |
577 | pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, | 577 | pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, |
578 | unsigned long addr, int *rss) | 578 | unsigned long addr, int *rss) |
@@ -586,7 +586,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
586 | if (!pte_file(pte)) { | 586 | if (!pte_file(pte)) { |
587 | swp_entry_t entry = pte_to_swp_entry(pte); | 587 | swp_entry_t entry = pte_to_swp_entry(pte); |
588 | 588 | ||
589 | swap_duplicate(entry); | 589 | if (swap_duplicate(entry) < 0) |
590 | return entry.val; | ||
591 | |||
590 | /* make sure dst_mm is on swapoff's mmlist. */ | 592 | /* make sure dst_mm is on swapoff's mmlist. */ |
591 | if (unlikely(list_empty(&dst_mm->mmlist))) { | 593 | if (unlikely(list_empty(&dst_mm->mmlist))) { |
592 | spin_lock(&mmlist_lock); | 594 | spin_lock(&mmlist_lock); |
@@ -635,16 +637,19 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
635 | 637 | ||
636 | out_set_pte: | 638 | out_set_pte: |
637 | set_pte_at(dst_mm, addr, dst_pte, pte); | 639 | set_pte_at(dst_mm, addr, dst_pte, pte); |
640 | return 0; | ||
638 | } | 641 | } |
639 | 642 | ||
640 | static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | 643 | static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, |
641 | pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, | 644 | pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, |
642 | unsigned long addr, unsigned long end) | 645 | unsigned long addr, unsigned long end) |
643 | { | 646 | { |
647 | pte_t *orig_src_pte, *orig_dst_pte; | ||
644 | pte_t *src_pte, *dst_pte; | 648 | pte_t *src_pte, *dst_pte; |
645 | spinlock_t *src_ptl, *dst_ptl; | 649 | spinlock_t *src_ptl, *dst_ptl; |
646 | int progress = 0; | 650 | int progress = 0; |
647 | int rss[2]; | 651 | int rss[2]; |
652 | swp_entry_t entry = (swp_entry_t){0}; | ||
648 | 653 | ||
649 | again: | 654 | again: |
650 | rss[1] = rss[0] = 0; | 655 | rss[1] = rss[0] = 0; |
@@ -654,6 +659,8 @@ again: | |||
654 | src_pte = pte_offset_map_nested(src_pmd, addr); | 659 | src_pte = pte_offset_map_nested(src_pmd, addr); |
655 | src_ptl = pte_lockptr(src_mm, src_pmd); | 660 | src_ptl = pte_lockptr(src_mm, src_pmd); |
656 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | 661 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); |
662 | orig_src_pte = src_pte; | ||
663 | orig_dst_pte = dst_pte; | ||
657 | arch_enter_lazy_mmu_mode(); | 664 | arch_enter_lazy_mmu_mode(); |
658 | 665 | ||
659 | do { | 666 | do { |
@@ -671,16 +678,25 @@ again: | |||
671 | progress++; | 678 | progress++; |
672 | continue; | 679 | continue; |
673 | } | 680 | } |
674 | copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); | 681 | entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, |
682 | vma, addr, rss); | ||
683 | if (entry.val) | ||
684 | break; | ||
675 | progress += 8; | 685 | progress += 8; |
676 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); | 686 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); |
677 | 687 | ||
678 | arch_leave_lazy_mmu_mode(); | 688 | arch_leave_lazy_mmu_mode(); |
679 | spin_unlock(src_ptl); | 689 | spin_unlock(src_ptl); |
680 | pte_unmap_nested(src_pte - 1); | 690 | pte_unmap_nested(orig_src_pte); |
681 | add_mm_rss(dst_mm, rss[0], rss[1]); | 691 | add_mm_rss(dst_mm, rss[0], rss[1]); |
682 | pte_unmap_unlock(dst_pte - 1, dst_ptl); | 692 | pte_unmap_unlock(orig_dst_pte, dst_ptl); |
683 | cond_resched(); | 693 | cond_resched(); |
694 | |||
695 | if (entry.val) { | ||
696 | if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) | ||
697 | return -ENOMEM; | ||
698 | progress = 0; | ||
699 | } | ||
684 | if (addr != end) | 700 | if (addr != end) |
685 | goto again; | 701 | goto again; |
686 | return 0; | 702 | return 0; |
@@ -940,6 +956,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
940 | details = NULL; | 956 | details = NULL; |
941 | 957 | ||
942 | BUG_ON(addr >= end); | 958 | BUG_ON(addr >= end); |
959 | mem_cgroup_uncharge_start(); | ||
943 | tlb_start_vma(tlb, vma); | 960 | tlb_start_vma(tlb, vma); |
944 | pgd = pgd_offset(vma->vm_mm, addr); | 961 | pgd = pgd_offset(vma->vm_mm, addr); |
945 | do { | 962 | do { |
@@ -952,6 +969,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
952 | zap_work, details); | 969 | zap_work, details); |
953 | } while (pgd++, addr = next, (addr != end && *zap_work > 0)); | 970 | } while (pgd++, addr = next, (addr != end && *zap_work > 0)); |
954 | tlb_end_vma(tlb, vma); | 971 | tlb_end_vma(tlb, vma); |
972 | mem_cgroup_uncharge_end(); | ||
955 | 973 | ||
956 | return addr; | 974 | return addr; |
957 | } | 975 | } |
@@ -1820,10 +1838,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1820 | token = pmd_pgtable(*pmd); | 1838 | token = pmd_pgtable(*pmd); |
1821 | 1839 | ||
1822 | do { | 1840 | do { |
1823 | err = fn(pte, token, addr, data); | 1841 | err = fn(pte++, token, addr, data); |
1824 | if (err) | 1842 | if (err) |
1825 | break; | 1843 | break; |
1826 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1844 | } while (addr += PAGE_SIZE, addr != end); |
1827 | 1845 | ||
1828 | arch_leave_lazy_mmu_mode(); | 1846 | arch_leave_lazy_mmu_mode(); |
1829 | 1847 | ||
@@ -2511,7 +2529,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2511 | ret = VM_FAULT_HWPOISON; | 2529 | ret = VM_FAULT_HWPOISON; |
2512 | } else { | 2530 | } else { |
2513 | print_bad_pte(vma, address, orig_pte, NULL); | 2531 | print_bad_pte(vma, address, orig_pte, NULL); |
2514 | ret = VM_FAULT_OOM; | 2532 | ret = VM_FAULT_SIGBUS; |
2515 | } | 2533 | } |
2516 | goto out; | 2534 | goto out; |
2517 | } | 2535 | } |
@@ -2537,14 +2555,24 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2537 | ret = VM_FAULT_MAJOR; | 2555 | ret = VM_FAULT_MAJOR; |
2538 | count_vm_event(PGMAJFAULT); | 2556 | count_vm_event(PGMAJFAULT); |
2539 | } else if (PageHWPoison(page)) { | 2557 | } else if (PageHWPoison(page)) { |
2558 | /* | ||
2559 | * hwpoisoned dirty swapcache pages are kept for killing | ||
2560 | * owner processes (which may be unknown at hwpoison time) | ||
2561 | */ | ||
2540 | ret = VM_FAULT_HWPOISON; | 2562 | ret = VM_FAULT_HWPOISON; |
2541 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2563 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2542 | goto out; | 2564 | goto out_release; |
2543 | } | 2565 | } |
2544 | 2566 | ||
2545 | lock_page(page); | 2567 | lock_page(page); |
2546 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2568 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2547 | 2569 | ||
2570 | page = ksm_might_need_to_copy(page, vma, address); | ||
2571 | if (!page) { | ||
2572 | ret = VM_FAULT_OOM; | ||
2573 | goto out; | ||
2574 | } | ||
2575 | |||
2548 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { | 2576 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { |
2549 | ret = VM_FAULT_OOM; | 2577 | ret = VM_FAULT_OOM; |
2550 | goto out_page; | 2578 | goto out_page; |
@@ -2611,6 +2639,7 @@ out_nomap: | |||
2611 | pte_unmap_unlock(page_table, ptl); | 2639 | pte_unmap_unlock(page_table, ptl); |
2612 | out_page: | 2640 | out_page: |
2613 | unlock_page(page); | 2641 | unlock_page(page); |
2642 | out_release: | ||
2614 | page_cache_release(page); | 2643 | page_cache_release(page); |
2615 | return ret; | 2644 | return ret; |
2616 | } | 2645 | } |
@@ -2906,7 +2935,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2906 | * Page table corrupted: show pte and kill process. | 2935 | * Page table corrupted: show pte and kill process. |
2907 | */ | 2936 | */ |
2908 | print_bad_pte(vma, address, orig_pte, NULL); | 2937 | print_bad_pte(vma, address, orig_pte, NULL); |
2909 | return VM_FAULT_OOM; | 2938 | return VM_FAULT_SIGBUS; |
2910 | } | 2939 | } |
2911 | 2940 | ||
2912 | pgoff = pte_to_pgoff(orig_pte); | 2941 | pgoff = pte_to_pgoff(orig_pte); |