diff options
| author | Andi Kleen <ak@linux.intel.com> | 2010-10-22 11:40:48 -0400 | 
|---|---|---|
| committer | Andi Kleen <ak@linux.intel.com> | 2010-10-22 11:40:48 -0400 | 
| commit | 46e387bbd82d438b9131e237e6e2cb55a825da49 (patch) | |
| tree | 414948afd6b4d63c6ea8cc79ce022128bc1bf2eb | |
| parent | e9d08567ef72a2d0fb9b14dded386352d3136442 (diff) | |
| parent | 3ef8fd7f720fc4f462fcdcae2fcde6f1c0536bfe (diff) | |
Merge branch 'hwpoison-hugepages' into hwpoison
Conflicts:
	mm/memory-failure.c
| -rw-r--r-- | arch/x86/mm/fault.c | 19 | ||||
| -rw-r--r-- | fs/hugetlbfs/inode.c | 15 | ||||
| -rw-r--r-- | include/linux/hugetlb.h | 17 | ||||
| -rw-r--r-- | include/linux/migrate.h | 16 | ||||
| -rw-r--r-- | include/linux/mm.h | 12 | ||||
| -rw-r--r-- | mm/hugetlb.c | 233 | ||||
| -rw-r--r-- | mm/memory-failure.c | 102 | ||||
| -rw-r--r-- | mm/memory.c | 3 | ||||
| -rw-r--r-- | mm/migrate.c | 234 | ||||
| -rw-r--r-- | mm/rmap.c | 25 | 
10 files changed, 551 insertions, 125 deletions
| diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 79b0b372d2d0..852b319edbdc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <linux/kprobes.h> /* __kprobes, ... */ | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ | 
| 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ | 
| 13 | #include <linux/perf_event.h> /* perf_sw_event */ | 13 | #include <linux/perf_event.h> /* perf_sw_event */ | 
| 14 | #include <linux/hugetlb.h> /* hstate_index_to_shift */ | ||
| 14 | 15 | ||
| 15 | #include <asm/traps.h> /* dotraplinkage, ... */ | 16 | #include <asm/traps.h> /* dotraplinkage, ... */ | 
| 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 17 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 
| @@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) | |||
| 160 | 161 | ||
| 161 | static void | 162 | static void | 
| 162 | force_sig_info_fault(int si_signo, int si_code, unsigned long address, | 163 | force_sig_info_fault(int si_signo, int si_code, unsigned long address, | 
| 163 | struct task_struct *tsk) | 164 | struct task_struct *tsk, int fault) | 
| 164 | { | 165 | { | 
| 166 | unsigned lsb = 0; | ||
| 165 | siginfo_t info; | 167 | siginfo_t info; | 
| 166 | 168 | ||
| 167 | info.si_signo = si_signo; | 169 | info.si_signo = si_signo; | 
| 168 | info.si_errno = 0; | 170 | info.si_errno = 0; | 
| 169 | info.si_code = si_code; | 171 | info.si_code = si_code; | 
| 170 | info.si_addr = (void __user *)address; | 172 | info.si_addr = (void __user *)address; | 
| 171 | info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0; | 173 | if (fault & VM_FAULT_HWPOISON_LARGE) | 
| 174 | lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); | ||
| 175 | if (fault & VM_FAULT_HWPOISON) | ||
| 176 | lsb = PAGE_SHIFT; | ||
| 177 | info.si_addr_lsb = lsb; | ||
| 172 | 178 | ||
| 173 | force_sig_info(si_signo, &info, tsk); | 179 | force_sig_info(si_signo, &info, tsk); | 
| 174 | } | 180 | } | 
| @@ -722,7 +728,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | |||
| 722 | tsk->thread.error_code = error_code | (address >= TASK_SIZE); | 728 | tsk->thread.error_code = error_code | (address >= TASK_SIZE); | 
| 723 | tsk->thread.trap_no = 14; | 729 | tsk->thread.trap_no = 14; | 
| 724 | 730 | ||
| 725 | force_sig_info_fault(SIGSEGV, si_code, address, tsk); | 731 | force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); | 
| 726 | 732 | ||
| 727 | return; | 733 | return; | 
| 728 | } | 734 | } | 
| @@ -807,14 +813,14 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, | |||
| 807 | tsk->thread.trap_no = 14; | 813 | tsk->thread.trap_no = 14; | 
| 808 | 814 | ||
| 809 | #ifdef CONFIG_MEMORY_FAILURE | 815 | #ifdef CONFIG_MEMORY_FAILURE | 
| 810 | if (fault & VM_FAULT_HWPOISON) { | 816 | if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { | 
| 811 | printk(KERN_ERR | 817 | printk(KERN_ERR | 
| 812 | "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", | 818 | "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", | 
| 813 | tsk->comm, tsk->pid, address); | 819 | tsk->comm, tsk->pid, address); | 
| 814 | code = BUS_MCEERR_AR; | 820 | code = BUS_MCEERR_AR; | 
| 815 | } | 821 | } | 
| 816 | #endif | 822 | #endif | 
| 817 | force_sig_info_fault(SIGBUS, code, address, tsk); | 823 | force_sig_info_fault(SIGBUS, code, address, tsk, fault); | 
| 818 | } | 824 | } | 
| 819 | 825 | ||
| 820 | static noinline void | 826 | static noinline void | 
| @@ -824,7 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
| 824 | if (fault & VM_FAULT_OOM) { | 830 | if (fault & VM_FAULT_OOM) { | 
| 825 | out_of_memory(regs, error_code, address); | 831 | out_of_memory(regs, error_code, address); | 
| 826 | } else { | 832 | } else { | 
| 827 | if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON)) | 833 | if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| | 
| 834 | VM_FAULT_HWPOISON_LARGE)) | ||
| 828 | do_sigbus(regs, error_code, address, fault); | 835 | do_sigbus(regs, error_code, address, fault); | 
| 829 | else | 836 | else | 
| 830 | BUG(); | 837 | BUG(); | 
| diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6e5bd42f3860..1f7ca505d48e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> | 
| 32 | #include <linux/security.h> | 32 | #include <linux/security.h> | 
| 33 | #include <linux/magic.h> | 33 | #include <linux/magic.h> | 
| 34 | #include <linux/migrate.h> | ||
| 34 | 35 | ||
| 35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> | 
| 36 | 37 | ||
| @@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page) | |||
| 573 | return 0; | 574 | return 0; | 
| 574 | } | 575 | } | 
| 575 | 576 | ||
| 577 | static int hugetlbfs_migrate_page(struct address_space *mapping, | ||
| 578 | struct page *newpage, struct page *page) | ||
| 579 | { | ||
| 580 | int rc; | ||
| 581 | |||
| 582 | rc = migrate_huge_page_move_mapping(mapping, newpage, page); | ||
| 583 | if (rc) | ||
| 584 | return rc; | ||
| 585 | migrate_page_copy(newpage, page); | ||
| 586 | |||
| 587 | return 0; | ||
| 588 | } | ||
| 589 | |||
| 576 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 590 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 
| 577 | { | 591 | { | 
| 578 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); | 592 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); | 
| @@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = { | |||
| 659 | .write_begin = hugetlbfs_write_begin, | 673 | .write_begin = hugetlbfs_write_begin, | 
| 660 | .write_end = hugetlbfs_write_end, | 674 | .write_end = hugetlbfs_write_end, | 
| 661 | .set_page_dirty = hugetlbfs_set_page_dirty, | 675 | .set_page_dirty = hugetlbfs_set_page_dirty, | 
| 676 | .migratepage = hugetlbfs_migrate_page, | ||
| 662 | }; | 677 | }; | 
| 663 | 678 | ||
| 664 | 679 | ||
| diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f479700df61b..943c76b3d4bb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
| @@ -43,7 +43,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, | |||
| 43 | struct vm_area_struct *vma, | 43 | struct vm_area_struct *vma, | 
| 44 | int acctflags); | 44 | int acctflags); | 
| 45 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); | 45 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); | 
| 46 | void __isolate_hwpoisoned_huge_page(struct page *page); | 46 | int dequeue_hwpoisoned_huge_page(struct page *page); | 
| 47 | void copy_huge_page(struct page *dst, struct page *src); | ||
| 47 | 48 | ||
| 48 | extern unsigned long hugepages_treat_as_movable; | 49 | extern unsigned long hugepages_treat_as_movable; | 
| 49 | extern const unsigned long hugetlb_zero, hugetlb_infinity; | 50 | extern const unsigned long hugetlb_zero, hugetlb_infinity; | 
| @@ -101,7 +102,10 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) | |||
| 101 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) | 102 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) | 
| 102 | #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) | 103 | #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) | 
| 103 | #define huge_pte_offset(mm, address) 0 | 104 | #define huge_pte_offset(mm, address) 0 | 
| 104 | #define __isolate_hwpoisoned_huge_page(page) 0 | 105 | #define dequeue_hwpoisoned_huge_page(page) 0 | 
| 106 | static inline void copy_huge_page(struct page *dst, struct page *src) | ||
| 107 | { | ||
| 108 | } | ||
| 105 | 109 | ||
| 106 | #define hugetlb_change_protection(vma, address, end, newprot) | 110 | #define hugetlb_change_protection(vma, address, end, newprot) | 
| 107 | 111 | ||
| @@ -228,6 +232,8 @@ struct huge_bootmem_page { | |||
| 228 | struct hstate *hstate; | 232 | struct hstate *hstate; | 
| 229 | }; | 233 | }; | 
| 230 | 234 | ||
| 235 | struct page *alloc_huge_page_node(struct hstate *h, int nid); | ||
| 236 | |||
| 231 | /* arch callback */ | 237 | /* arch callback */ | 
| 232 | int __init alloc_bootmem_huge_page(struct hstate *h); | 238 | int __init alloc_bootmem_huge_page(struct hstate *h); | 
| 233 | 239 | ||
| @@ -301,8 +307,14 @@ static inline struct hstate *page_hstate(struct page *page) | |||
| 301 | return size_to_hstate(PAGE_SIZE << compound_order(page)); | 307 | return size_to_hstate(PAGE_SIZE << compound_order(page)); | 
| 302 | } | 308 | } | 
| 303 | 309 | ||
| 310 | static inline unsigned hstate_index_to_shift(unsigned index) | ||
| 311 | { | ||
| 312 | return hstates[index].order + PAGE_SHIFT; | ||
| 313 | } | ||
| 314 | |||
| 304 | #else | 315 | #else | 
| 305 | struct hstate {}; | 316 | struct hstate {}; | 
| 317 | #define alloc_huge_page_node(h, nid) NULL | ||
| 306 | #define alloc_bootmem_huge_page(h) NULL | 318 | #define alloc_bootmem_huge_page(h) NULL | 
| 307 | #define hstate_file(f) NULL | 319 | #define hstate_file(f) NULL | 
| 308 | #define hstate_vma(v) NULL | 320 | #define hstate_vma(v) NULL | 
| @@ -317,6 +329,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h) | |||
| 317 | { | 329 | { | 
| 318 | return 1; | 330 | return 1; | 
| 319 | } | 331 | } | 
| 332 | #define hstate_index_to_shift(index) 0 | ||
| 320 | #endif | 333 | #endif | 
| 321 | 334 | ||
| 322 | #endif /* _LINUX_HUGETLB_H */ | 335 | #endif /* _LINUX_HUGETLB_H */ | 
| diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 7238231b8dd4..085527fb8261 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
| @@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *, | |||
| 14 | struct page *, struct page *); | 14 | struct page *, struct page *); | 
| 15 | extern int migrate_pages(struct list_head *l, new_page_t x, | 15 | extern int migrate_pages(struct list_head *l, new_page_t x, | 
| 16 | unsigned long private, int offlining); | 16 | unsigned long private, int offlining); | 
| 17 | extern int migrate_huge_pages(struct list_head *l, new_page_t x, | ||
| 18 | unsigned long private, int offlining); | ||
| 17 | 19 | ||
| 18 | extern int fail_migrate_page(struct address_space *, | 20 | extern int fail_migrate_page(struct address_space *, | 
| 19 | struct page *, struct page *); | 21 | struct page *, struct page *); | 
| @@ -23,12 +25,17 @@ extern int migrate_prep_local(void); | |||
| 23 | extern int migrate_vmas(struct mm_struct *mm, | 25 | extern int migrate_vmas(struct mm_struct *mm, | 
| 24 | const nodemask_t *from, const nodemask_t *to, | 26 | const nodemask_t *from, const nodemask_t *to, | 
| 25 | unsigned long flags); | 27 | unsigned long flags); | 
| 28 | extern void migrate_page_copy(struct page *newpage, struct page *page); | ||
| 29 | extern int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
| 30 | struct page *newpage, struct page *page); | ||
| 26 | #else | 31 | #else | 
| 27 | #define PAGE_MIGRATION 0 | 32 | #define PAGE_MIGRATION 0 | 
| 28 | 33 | ||
| 29 | static inline void putback_lru_pages(struct list_head *l) {} | 34 | static inline void putback_lru_pages(struct list_head *l) {} | 
| 30 | static inline int migrate_pages(struct list_head *l, new_page_t x, | 35 | static inline int migrate_pages(struct list_head *l, new_page_t x, | 
| 31 | unsigned long private, int offlining) { return -ENOSYS; } | 36 | unsigned long private, int offlining) { return -ENOSYS; } | 
| 37 | static inline int migrate_huge_pages(struct list_head *l, new_page_t x, | ||
| 38 | unsigned long private, int offlining) { return -ENOSYS; } | ||
| 32 | 39 | ||
| 33 | static inline int migrate_prep(void) { return -ENOSYS; } | 40 | static inline int migrate_prep(void) { return -ENOSYS; } | 
| 34 | static inline int migrate_prep_local(void) { return -ENOSYS; } | 41 | static inline int migrate_prep_local(void) { return -ENOSYS; } | 
| @@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm, | |||
| 40 | return -ENOSYS; | 47 | return -ENOSYS; | 
| 41 | } | 48 | } | 
| 42 | 49 | ||
| 50 | static inline void migrate_page_copy(struct page *newpage, | ||
| 51 | struct page *page) {} | ||
| 52 | |||
| 53 | static inline int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
| 54 | struct page *newpage, struct page *page) | ||
| 55 | { | ||
| 56 | return -ENOSYS; | ||
| 57 | } | ||
| 58 | |||
| 43 | /* Possible settings for the migrate_page() method in address_operations */ | 59 | /* Possible settings for the migrate_page() method in address_operations */ | 
| 44 | #define migrate_page NULL | 60 | #define migrate_page NULL | 
| 45 | #define fail_migrate_page NULL | 61 | #define fail_migrate_page NULL | 
| diff --git a/include/linux/mm.h b/include/linux/mm.h index 7687228dd3b7..a4c66846fb8f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -718,12 +718,20 @@ static inline int page_mapped(struct page *page) | |||
| 718 | #define VM_FAULT_SIGBUS 0x0002 | 718 | #define VM_FAULT_SIGBUS 0x0002 | 
| 719 | #define VM_FAULT_MAJOR 0x0004 | 719 | #define VM_FAULT_MAJOR 0x0004 | 
| 720 | #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ | 720 | #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ | 
| 721 | #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */ | 721 | #define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ | 
| 722 | #define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ | ||
| 722 | 723 | ||
| 723 | #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ | 724 | #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ | 
| 724 | #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ | 725 | #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ | 
| 725 | 726 | ||
| 726 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON) | 727 | #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ | 
| 728 | |||
| 729 | #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ | ||
| 730 | VM_FAULT_HWPOISON_LARGE) | ||
| 731 | |||
| 732 | /* Encode hstate index for a hwpoisoned large page */ | ||
| 733 | #define VM_FAULT_SET_HINDEX(x) ((x) << 12) | ||
| 734 | #define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) | ||
| 727 | 735 | ||
| 728 | /* | 736 | /* | 
| 729 | * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. | 737 | * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. | 
| diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c03273807182..96991ded82fe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -423,14 +423,14 @@ static void clear_huge_page(struct page *page, | |||
| 423 | } | 423 | } | 
| 424 | } | 424 | } | 
| 425 | 425 | ||
| 426 | static void copy_gigantic_page(struct page *dst, struct page *src, | 426 | static void copy_user_gigantic_page(struct page *dst, struct page *src, | 
| 427 | unsigned long addr, struct vm_area_struct *vma) | 427 | unsigned long addr, struct vm_area_struct *vma) | 
| 428 | { | 428 | { | 
| 429 | int i; | 429 | int i; | 
| 430 | struct hstate *h = hstate_vma(vma); | 430 | struct hstate *h = hstate_vma(vma); | 
| 431 | struct page *dst_base = dst; | 431 | struct page *dst_base = dst; | 
| 432 | struct page *src_base = src; | 432 | struct page *src_base = src; | 
| 433 | might_sleep(); | 433 | |
| 434 | for (i = 0; i < pages_per_huge_page(h); ) { | 434 | for (i = 0; i < pages_per_huge_page(h); ) { | 
| 435 | cond_resched(); | 435 | cond_resched(); | 
| 436 | copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); | 436 | copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); | 
| @@ -440,14 +440,15 @@ static void copy_gigantic_page(struct page *dst, struct page *src, | |||
| 440 | src = mem_map_next(src, src_base, i); | 440 | src = mem_map_next(src, src_base, i); | 
| 441 | } | 441 | } | 
| 442 | } | 442 | } | 
| 443 | static void copy_huge_page(struct page *dst, struct page *src, | 443 | |
| 444 | static void copy_user_huge_page(struct page *dst, struct page *src, | ||
| 444 | unsigned long addr, struct vm_area_struct *vma) | 445 | unsigned long addr, struct vm_area_struct *vma) | 
| 445 | { | 446 | { | 
| 446 | int i; | 447 | int i; | 
| 447 | struct hstate *h = hstate_vma(vma); | 448 | struct hstate *h = hstate_vma(vma); | 
| 448 | 449 | ||
| 449 | if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { | 450 | if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { | 
| 450 | copy_gigantic_page(dst, src, addr, vma); | 451 | copy_user_gigantic_page(dst, src, addr, vma); | 
| 451 | return; | 452 | return; | 
| 452 | } | 453 | } | 
| 453 | 454 | ||
| @@ -458,6 +459,40 @@ static void copy_huge_page(struct page *dst, struct page *src, | |||
| 458 | } | 459 | } | 
| 459 | } | 460 | } | 
| 460 | 461 | ||
| 462 | static void copy_gigantic_page(struct page *dst, struct page *src) | ||
| 463 | { | ||
| 464 | int i; | ||
| 465 | struct hstate *h = page_hstate(src); | ||
| 466 | struct page *dst_base = dst; | ||
| 467 | struct page *src_base = src; | ||
| 468 | |||
| 469 | for (i = 0; i < pages_per_huge_page(h); ) { | ||
| 470 | cond_resched(); | ||
| 471 | copy_highpage(dst, src); | ||
| 472 | |||
| 473 | i++; | ||
| 474 | dst = mem_map_next(dst, dst_base, i); | ||
| 475 | src = mem_map_next(src, src_base, i); | ||
| 476 | } | ||
| 477 | } | ||
| 478 | |||
| 479 | void copy_huge_page(struct page *dst, struct page *src) | ||
| 480 | { | ||
| 481 | int i; | ||
| 482 | struct hstate *h = page_hstate(src); | ||
| 483 | |||
| 484 | if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { | ||
| 485 | copy_gigantic_page(dst, src); | ||
| 486 | return; | ||
| 487 | } | ||
| 488 | |||
| 489 | might_sleep(); | ||
| 490 | for (i = 0; i < pages_per_huge_page(h); i++) { | ||
| 491 | cond_resched(); | ||
| 492 | copy_highpage(dst + i, src + i); | ||
| 493 | } | ||
| 494 | } | ||
| 495 | |||
| 461 | static void enqueue_huge_page(struct hstate *h, struct page *page) | 496 | static void enqueue_huge_page(struct hstate *h, struct page *page) | 
| 462 | { | 497 | { | 
| 463 | int nid = page_to_nid(page); | 498 | int nid = page_to_nid(page); | 
| @@ -466,11 +501,24 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) | |||
| 466 | h->free_huge_pages_node[nid]++; | 501 | h->free_huge_pages_node[nid]++; | 
| 467 | } | 502 | } | 
| 468 | 503 | ||
| 504 | static struct page *dequeue_huge_page_node(struct hstate *h, int nid) | ||
| 505 | { | ||
| 506 | struct page *page; | ||
| 507 | |||
| 508 | if (list_empty(&h->hugepage_freelists[nid])) | ||
| 509 | return NULL; | ||
| 510 | page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); | ||
| 511 | list_del(&page->lru); | ||
| 512 | set_page_refcounted(page); | ||
| 513 | h->free_huge_pages--; | ||
| 514 | h->free_huge_pages_node[nid]--; | ||
| 515 | return page; | ||
| 516 | } | ||
| 517 | |||
| 469 | static struct page *dequeue_huge_page_vma(struct hstate *h, | 518 | static struct page *dequeue_huge_page_vma(struct hstate *h, | 
| 470 | struct vm_area_struct *vma, | 519 | struct vm_area_struct *vma, | 
| 471 | unsigned long address, int avoid_reserve) | 520 | unsigned long address, int avoid_reserve) | 
| 472 | { | 521 | { | 
| 473 | int nid; | ||
| 474 | struct page *page = NULL; | 522 | struct page *page = NULL; | 
| 475 | struct mempolicy *mpol; | 523 | struct mempolicy *mpol; | 
| 476 | nodemask_t *nodemask; | 524 | nodemask_t *nodemask; | 
| @@ -496,19 +544,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, | |||
| 496 | 544 | ||
| 497 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 545 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 
| 498 | MAX_NR_ZONES - 1, nodemask) { | 546 | MAX_NR_ZONES - 1, nodemask) { | 
| 499 | nid = zone_to_nid(zone); | 547 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) { | 
| 500 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && | 548 | page = dequeue_huge_page_node(h, zone_to_nid(zone)); | 
| 501 | !list_empty(&h->hugepage_freelists[nid])) { | 549 | if (page) { | 
| 502 | page = list_entry(h->hugepage_freelists[nid].next, | 550 | if (!avoid_reserve) | 
| 503 | struct page, lru); | 551 | decrement_hugepage_resv_vma(h, vma); | 
| 504 | list_del(&page->lru); | 552 | break; | 
| 505 | h->free_huge_pages--; | 553 | } | 
| 506 | h->free_huge_pages_node[nid]--; | ||
| 507 | |||
| 508 | if (!avoid_reserve) | ||
| 509 | decrement_hugepage_resv_vma(h, vma); | ||
| 510 | |||
| 511 | break; | ||
| 512 | } | 554 | } | 
| 513 | } | 555 | } | 
| 514 | err: | 556 | err: | 
| @@ -770,11 +812,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, | |||
| 770 | return ret; | 812 | return ret; | 
| 771 | } | 813 | } | 
| 772 | 814 | ||
| 773 | static struct page *alloc_buddy_huge_page(struct hstate *h, | 815 | static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) | 
| 774 | struct vm_area_struct *vma, unsigned long address) | ||
| 775 | { | 816 | { | 
| 776 | struct page *page; | 817 | struct page *page; | 
| 777 | unsigned int nid; | 818 | unsigned int r_nid; | 
| 778 | 819 | ||
| 779 | if (h->order >= MAX_ORDER) | 820 | if (h->order >= MAX_ORDER) | 
| 780 | return NULL; | 821 | return NULL; | 
| @@ -812,9 +853,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, | |||
| 812 | } | 853 | } | 
| 813 | spin_unlock(&hugetlb_lock); | 854 | spin_unlock(&hugetlb_lock); | 
| 814 | 855 | ||
| 815 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| | 856 | if (nid == NUMA_NO_NODE) | 
| 816 | __GFP_REPEAT|__GFP_NOWARN, | 857 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| | 
| 817 | huge_page_order(h)); | 858 | __GFP_REPEAT|__GFP_NOWARN, | 
| 859 | huge_page_order(h)); | ||
| 860 | else | ||
| 861 | page = alloc_pages_exact_node(nid, | ||
| 862 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| | ||
| 863 | __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); | ||
| 818 | 864 | ||
| 819 | if (page && arch_prepare_hugepage(page)) { | 865 | if (page && arch_prepare_hugepage(page)) { | 
| 820 | __free_pages(page, huge_page_order(h)); | 866 | __free_pages(page, huge_page_order(h)); | 
| @@ -823,19 +869,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, | |||
| 823 | 869 | ||
| 824 | spin_lock(&hugetlb_lock); | 870 | spin_lock(&hugetlb_lock); | 
| 825 | if (page) { | 871 | if (page) { | 
| 826 | /* | 872 | r_nid = page_to_nid(page); | 
| 827 | * This page is now managed by the hugetlb allocator and has | ||
| 828 | * no users -- drop the buddy allocator's reference. | ||
| 829 | */ | ||
| 830 | put_page_testzero(page); | ||
| 831 | VM_BUG_ON(page_count(page)); | ||
| 832 | nid = page_to_nid(page); | ||
| 833 | set_compound_page_dtor(page, free_huge_page); | 873 | set_compound_page_dtor(page, free_huge_page); | 
| 834 | /* | 874 | /* | 
| 835 | * We incremented the global counters already | 875 | * We incremented the global counters already | 
| 836 | */ | 876 | */ | 
| 837 | h->nr_huge_pages_node[nid]++; | 877 | h->nr_huge_pages_node[r_nid]++; | 
| 838 | h->surplus_huge_pages_node[nid]++; | 878 | h->surplus_huge_pages_node[r_nid]++; | 
| 839 | __count_vm_event(HTLB_BUDDY_PGALLOC); | 879 | __count_vm_event(HTLB_BUDDY_PGALLOC); | 
| 840 | } else { | 880 | } else { | 
| 841 | h->nr_huge_pages--; | 881 | h->nr_huge_pages--; | 
| @@ -848,6 +888,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, | |||
| 848 | } | 888 | } | 
| 849 | 889 | ||
| 850 | /* | 890 | /* | 
| 891 | * This allocation function is useful in the context where vma is irrelevant. | ||
| 892 | * E.g. soft-offlining uses this function because it only cares physical | ||
| 893 | * address of error page. | ||
| 894 | */ | ||
| 895 | struct page *alloc_huge_page_node(struct hstate *h, int nid) | ||
| 896 | { | ||
| 897 | struct page *page; | ||
| 898 | |||
| 899 | spin_lock(&hugetlb_lock); | ||
| 900 | page = dequeue_huge_page_node(h, nid); | ||
| 901 | spin_unlock(&hugetlb_lock); | ||
| 902 | |||
| 903 | if (!page) | ||
| 904 | page = alloc_buddy_huge_page(h, nid); | ||
| 905 | |||
| 906 | return page; | ||
| 907 | } | ||
| 908 | |||
| 909 | /* | ||
| 851 | * Increase the hugetlb pool such that it can accomodate a reservation | 910 | * Increase the hugetlb pool such that it can accomodate a reservation | 
| 852 | * of size 'delta'. | 911 | * of size 'delta'. | 
| 853 | */ | 912 | */ | 
| @@ -871,17 +930,14 @@ static int gather_surplus_pages(struct hstate *h, int delta) | |||
| 871 | retry: | 930 | retry: | 
| 872 | spin_unlock(&hugetlb_lock); | 931 | spin_unlock(&hugetlb_lock); | 
| 873 | for (i = 0; i < needed; i++) { | 932 | for (i = 0; i < needed; i++) { | 
| 874 | page = alloc_buddy_huge_page(h, NULL, 0); | 933 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); | 
| 875 | if (!page) { | 934 | if (!page) | 
| 876 | /* | 935 | /* | 
| 877 | * We were not able to allocate enough pages to | 936 | * We were not able to allocate enough pages to | 
| 878 | * satisfy the entire reservation so we free what | 937 | * satisfy the entire reservation so we free what | 
| 879 | * we've allocated so far. | 938 | * we've allocated so far. | 
| 880 | */ | 939 | */ | 
| 881 | spin_lock(&hugetlb_lock); | ||
| 882 | needed = 0; | ||
| 883 | goto free; | 940 | goto free; | 
| 884 | } | ||
| 885 | 941 | ||
| 886 | list_add(&page->lru, &surplus_list); | 942 | list_add(&page->lru, &surplus_list); | 
| 887 | } | 943 | } | 
| @@ -908,31 +964,31 @@ retry: | |||
| 908 | needed += allocated; | 964 | needed += allocated; | 
| 909 | h->resv_huge_pages += delta; | 965 | h->resv_huge_pages += delta; | 
| 910 | ret = 0; | 966 | ret = 0; | 
| 911 | free: | 967 | |
| 968 | spin_unlock(&hugetlb_lock); | ||
| 912 | /* Free the needed pages to the hugetlb pool */ | 969 | /* Free the needed pages to the hugetlb pool */ | 
| 913 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 970 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 
| 914 | if ((--needed) < 0) | 971 | if ((--needed) < 0) | 
| 915 | break; | 972 | break; | 
| 916 | list_del(&page->lru); | 973 | list_del(&page->lru); | 
| 974 | /* | ||
| 975 | * This page is now managed by the hugetlb allocator and has | ||
| 976 | * no users -- drop the buddy allocator's reference. | ||
| 977 | */ | ||
| 978 | put_page_testzero(page); | ||
| 979 | VM_BUG_ON(page_count(page)); | ||
| 917 | enqueue_huge_page(h, page); | 980 | enqueue_huge_page(h, page); | 
| 918 | } | 981 | } | 
| 919 | 982 | ||
| 920 | /* Free unnecessary surplus pages to the buddy allocator */ | 983 | /* Free unnecessary surplus pages to the buddy allocator */ | 
| 984 | free: | ||
| 921 | if (!list_empty(&surplus_list)) { | 985 | if (!list_empty(&surplus_list)) { | 
| 922 | spin_unlock(&hugetlb_lock); | ||
| 923 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 986 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 
| 924 | list_del(&page->lru); | 987 | list_del(&page->lru); | 
| 925 | /* | 988 | put_page(page); | 
| 926 | * The page has a reference count of zero already, so | ||
| 927 | * call free_huge_page directly instead of using | ||
| 928 | * put_page. This must be done with hugetlb_lock | ||
| 929 | * unlocked which is safe because free_huge_page takes | ||
| 930 | * hugetlb_lock before deciding how to free the page. | ||
| 931 | */ | ||
| 932 | free_huge_page(page); | ||
| 933 | } | 989 | } | 
| 934 | spin_lock(&hugetlb_lock); | ||
| 935 | } | 990 | } | 
| 991 | spin_lock(&hugetlb_lock); | ||
| 936 | 992 | ||
| 937 | return ret; | 993 | return ret; | 
| 938 | } | 994 | } | 
| @@ -1052,14 +1108,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
| 1052 | spin_unlock(&hugetlb_lock); | 1108 | spin_unlock(&hugetlb_lock); | 
| 1053 | 1109 | ||
| 1054 | if (!page) { | 1110 | if (!page) { | 
| 1055 | page = alloc_buddy_huge_page(h, vma, addr); | 1111 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); | 
| 1056 | if (!page) { | 1112 | if (!page) { | 
| 1057 | hugetlb_put_quota(inode->i_mapping, chg); | 1113 | hugetlb_put_quota(inode->i_mapping, chg); | 
| 1058 | return ERR_PTR(-VM_FAULT_SIGBUS); | 1114 | return ERR_PTR(-VM_FAULT_SIGBUS); | 
| 1059 | } | 1115 | } | 
| 1060 | } | 1116 | } | 
| 1061 | 1117 | ||
| 1062 | set_page_refcounted(page); | ||
| 1063 | set_page_private(page, (unsigned long) mapping); | 1118 | set_page_private(page, (unsigned long) mapping); | 
| 1064 | 1119 | ||
| 1065 | vma_commit_reservation(h, vma, addr); | 1120 | vma_commit_reservation(h, vma, addr); | 
| @@ -2153,6 +2208,19 @@ nomem: | |||
| 2153 | return -ENOMEM; | 2208 | return -ENOMEM; | 
| 2154 | } | 2209 | } | 
| 2155 | 2210 | ||
| 2211 | static int is_hugetlb_entry_migration(pte_t pte) | ||
| 2212 | { | ||
| 2213 | swp_entry_t swp; | ||
| 2214 | |||
| 2215 | if (huge_pte_none(pte) || pte_present(pte)) | ||
| 2216 | return 0; | ||
| 2217 | swp = pte_to_swp_entry(pte); | ||
| 2218 | if (non_swap_entry(swp) && is_migration_entry(swp)) { | ||
| 2219 | return 1; | ||
| 2220 | } else | ||
| 2221 | return 0; | ||
| 2222 | } | ||
| 2223 | |||
| 2156 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | 2224 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | 
| 2157 | { | 2225 | { | 
| 2158 | swp_entry_t swp; | 2226 | swp_entry_t swp; | 
| @@ -2383,7 +2451,7 @@ retry_avoidcopy: | |||
| 2383 | if (unlikely(anon_vma_prepare(vma))) | 2451 | if (unlikely(anon_vma_prepare(vma))) | 
| 2384 | return VM_FAULT_OOM; | 2452 | return VM_FAULT_OOM; | 
| 2385 | 2453 | ||
| 2386 | copy_huge_page(new_page, old_page, address, vma); | 2454 | copy_user_huge_page(new_page, old_page, address, vma); | 
| 2387 | __SetPageUptodate(new_page); | 2455 | __SetPageUptodate(new_page); | 
| 2388 | 2456 | ||
| 2389 | /* | 2457 | /* | 
| @@ -2515,22 +2583,20 @@ retry: | |||
| 2515 | hugepage_add_new_anon_rmap(page, vma, address); | 2583 | hugepage_add_new_anon_rmap(page, vma, address); | 
| 2516 | } | 2584 | } | 
| 2517 | } else { | 2585 | } else { | 
| 2586 | /* | ||
| 2587 | * If memory error occurs between mmap() and fault, some process | ||
| 2588 | * don't have hwpoisoned swap entry for errored virtual address. | ||
| 2589 | * So we need to block hugepage fault by PG_hwpoison bit check. | ||
| 2590 | */ | ||
| 2591 | if (unlikely(PageHWPoison(page))) { | ||
| 2592 | ret = VM_FAULT_HWPOISON | | ||
| 2593 | VM_FAULT_SET_HINDEX(h - hstates); | ||
| 2594 | goto backout_unlocked; | ||
| 2595 | } | ||
| 2518 | page_dup_rmap(page); | 2596 | page_dup_rmap(page); | 
| 2519 | } | 2597 | } | 
| 2520 | 2598 | ||
| 2521 | /* | 2599 | /* | 
| 2522 | * Since memory error handler replaces pte into hwpoison swap entry | ||
| 2523 | * at the time of error handling, a process which reserved but not have | ||
| 2524 | * the mapping to the error hugepage does not have hwpoison swap entry. | ||
| 2525 | * So we need to block accesses from such a process by checking | ||
| 2526 | * PG_hwpoison bit here. | ||
| 2527 | */ | ||
| 2528 | if (unlikely(PageHWPoison(page))) { | ||
| 2529 | ret = VM_FAULT_HWPOISON; | ||
| 2530 | goto backout_unlocked; | ||
| 2531 | } | ||
| 2532 | |||
| 2533 | /* | ||
| 2534 | * If we are going to COW a private mapping later, we examine the | 2600 | * If we are going to COW a private mapping later, we examine the | 
| 2535 | * pending reservations for this page now. This will ensure that | 2601 | * pending reservations for this page now. This will ensure that | 
| 2536 | * any allocations necessary to record that reservation occur outside | 2602 | * any allocations necessary to record that reservation occur outside | 
| @@ -2587,8 +2653,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2587 | ptep = huge_pte_offset(mm, address); | 2653 | ptep = huge_pte_offset(mm, address); | 
| 2588 | if (ptep) { | 2654 | if (ptep) { | 
| 2589 | entry = huge_ptep_get(ptep); | 2655 | entry = huge_ptep_get(ptep); | 
| 2590 | if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | 2656 | if (unlikely(is_hugetlb_entry_migration(entry))) { | 
| 2591 | return VM_FAULT_HWPOISON; | 2657 | migration_entry_wait(mm, (pmd_t *)ptep, address); | 
| 2658 | return 0; | ||
| 2659 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | ||
| 2660 | return VM_FAULT_HWPOISON_LARGE | | ||
| 2661 | VM_FAULT_SET_HINDEX(h - hstates); | ||
| 2592 | } | 2662 | } | 
| 2593 | 2663 | ||
| 2594 | ptep = huge_pte_alloc(mm, address, huge_page_size(h)); | 2664 | ptep = huge_pte_alloc(mm, address, huge_page_size(h)); | 
| @@ -2878,18 +2948,41 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
| 2878 | hugetlb_acct_memory(h, -(chg - freed)); | 2948 | hugetlb_acct_memory(h, -(chg - freed)); | 
| 2879 | } | 2949 | } | 
| 2880 | 2950 | ||
| 2951 | #ifdef CONFIG_MEMORY_FAILURE | ||
| 2952 | |||
| 2953 | /* Should be called in hugetlb_lock */ | ||
| 2954 | static int is_hugepage_on_freelist(struct page *hpage) | ||
| 2955 | { | ||
| 2956 | struct page *page; | ||
| 2957 | struct page *tmp; | ||
| 2958 | struct hstate *h = page_hstate(hpage); | ||
| 2959 | int nid = page_to_nid(hpage); | ||
| 2960 | |||
| 2961 | list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru) | ||
| 2962 | if (page == hpage) | ||
| 2963 | return 1; | ||
| 2964 | return 0; | ||
| 2965 | } | ||
| 2966 | |||
| 2881 | /* | 2967 | /* | 
| 2882 | * This function is called from memory failure code. | 2968 | * This function is called from memory failure code. | 
| 2883 | * Assume the caller holds page lock of the head page. | 2969 | * Assume the caller holds page lock of the head page. | 
| 2884 | */ | 2970 | */ | 
| 2885 | void __isolate_hwpoisoned_huge_page(struct page *hpage) | 2971 | int dequeue_hwpoisoned_huge_page(struct page *hpage) | 
| 2886 | { | 2972 | { | 
| 2887 | struct hstate *h = page_hstate(hpage); | 2973 | struct hstate *h = page_hstate(hpage); | 
| 2888 | int nid = page_to_nid(hpage); | 2974 | int nid = page_to_nid(hpage); | 
| 2975 | int ret = -EBUSY; | ||
| 2889 | 2976 | ||
| 2890 | spin_lock(&hugetlb_lock); | 2977 | spin_lock(&hugetlb_lock); | 
| 2891 | list_del(&hpage->lru); | 2978 | if (is_hugepage_on_freelist(hpage)) { | 
| 2892 | h->free_huge_pages--; | 2979 | list_del(&hpage->lru); | 
| 2893 | h->free_huge_pages_node[nid]--; | 2980 | set_page_refcounted(hpage); | 
| 2981 | h->free_huge_pages--; | ||
| 2982 | h->free_huge_pages_node[nid]--; | ||
| 2983 | ret = 0; | ||
| 2984 | } | ||
| 2894 | spin_unlock(&hugetlb_lock); | 2985 | spin_unlock(&hugetlb_lock); | 
| 2986 | return ret; | ||
| 2895 | } | 2987 | } | 
| 2988 | #endif | ||
| diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2044fe8920c2..44a8cefeae6e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -697,11 +697,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) | |||
| 697 | * Issues: | 697 | * Issues: | 
| 698 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) | 698 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) | 
| 699 | * To narrow down kill region to one page, we need to break up pmd. | 699 | * To narrow down kill region to one page, we need to break up pmd. | 
| 700 | * - To support soft-offlining for hugepage, we need to support hugepage | ||
| 701 | * migration. | ||
| 702 | */ | 700 | */ | 
| 703 | static int me_huge_page(struct page *p, unsigned long pfn) | 701 | static int me_huge_page(struct page *p, unsigned long pfn) | 
| 704 | { | 702 | { | 
| 703 | int res = 0; | ||
| 705 | struct page *hpage = compound_head(p); | 704 | struct page *hpage = compound_head(p); | 
| 706 | /* | 705 | /* | 
| 707 | * We can safely recover from error on free or reserved (i.e. | 706 | * We can safely recover from error on free or reserved (i.e. | 
| @@ -714,8 +713,9 @@ static int me_huge_page(struct page *p, unsigned long pfn) | |||
| 714 | * so there is no race between isolation and mapping/unmapping. | 713 | * so there is no race between isolation and mapping/unmapping. | 
| 715 | */ | 714 | */ | 
| 716 | if (!(page_mapping(hpage) || PageAnon(hpage))) { | 715 | if (!(page_mapping(hpage) || PageAnon(hpage))) { | 
| 717 | __isolate_hwpoisoned_huge_page(hpage); | 716 | res = dequeue_hwpoisoned_huge_page(hpage); | 
| 718 | return RECOVERED; | 717 | if (!res) | 
| 718 | return RECOVERED; | ||
| 719 | } | 719 | } | 
| 720 | return DELAYED; | 720 | return DELAYED; | 
| 721 | } | 721 | } | 
| @@ -972,7 +972,10 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
| 972 | * We need/can do nothing about count=0 pages. | 972 | * We need/can do nothing about count=0 pages. | 
| 973 | * 1) it's a free page, and therefore in safe hand: | 973 | * 1) it's a free page, and therefore in safe hand: | 
| 974 | * prep_new_page() will be the gate keeper. | 974 | * prep_new_page() will be the gate keeper. | 
| 975 | * 2) it's part of a non-compound high order page. | 975 | * 2) it's a free hugepage, which is also safe: | 
| 976 | * an affected hugepage will be dequeued from hugepage freelist, | ||
| 977 | * so there's no concern about reusing it ever after. | ||
| 978 | * 3) it's part of a non-compound high order page. | ||
| 976 | * Implies some kernel user: cannot stop them from | 979 | * Implies some kernel user: cannot stop them from | 
| 977 | * R/W the page; let's pray that the page has been | 980 | * R/W the page; let's pray that the page has been | 
| 978 | * used and will be freed some time later. | 981 | * used and will be freed some time later. | 
| @@ -984,6 +987,24 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
| 984 | if (is_free_buddy_page(p)) { | 987 | if (is_free_buddy_page(p)) { | 
| 985 | action_result(pfn, "free buddy", DELAYED); | 988 | action_result(pfn, "free buddy", DELAYED); | 
| 986 | return 0; | 989 | return 0; | 
| 990 | } else if (PageHuge(hpage)) { | ||
| 991 | /* | ||
| 992 | * Check "just unpoisoned", "filter hit", and | ||
| 993 | * "race with other subpage." | ||
| 994 | */ | ||
| 995 | lock_page_nosync(hpage); | ||
| 996 | if (!PageHWPoison(hpage) | ||
| 997 | || (hwpoison_filter(p) && TestClearPageHWPoison(p)) | ||
| 998 | || (p != hpage && TestSetPageHWPoison(hpage))) { | ||
| 999 | atomic_long_sub(nr_pages, &mce_bad_pages); | ||
| 1000 | return 0; | ||
| 1001 | } | ||
| 1002 | set_page_hwpoison_huge_page(hpage); | ||
| 1003 | res = dequeue_hwpoisoned_huge_page(hpage); | ||
| 1004 | action_result(pfn, "free huge", | ||
| 1005 | res ? IGNORED : DELAYED); | ||
| 1006 | unlock_page(hpage); | ||
| 1007 | return res; | ||
| 987 | } else { | 1008 | } else { | 
| 988 | action_result(pfn, "high order kernel", IGNORED); | 1009 | action_result(pfn, "high order kernel", IGNORED); | 
| 989 | return -EBUSY; | 1010 | return -EBUSY; | 
| @@ -1145,6 +1166,16 @@ int unpoison_memory(unsigned long pfn) | |||
| 1145 | nr_pages = 1 << compound_order(page); | 1166 | nr_pages = 1 << compound_order(page); | 
| 1146 | 1167 | ||
| 1147 | if (!get_page_unless_zero(page)) { | 1168 | if (!get_page_unless_zero(page)) { | 
| 1169 | /* | ||
| 1170 | * Since HWPoisoned hugepage should have non-zero refcount, | ||
| 1171 | * race between memory failure and unpoison seems to happen. | ||
| 1172 | * In such case unpoison fails and memory failure runs | ||
| 1173 | * to the end. | ||
| 1174 | */ | ||
| 1175 | if (PageHuge(page)) { | ||
| 1176 | pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn); | ||
| 1177 | return 0; | ||
| 1178 | } | ||
| 1148 | if (TestClearPageHWPoison(p)) | 1179 | if (TestClearPageHWPoison(p)) | 
| 1149 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1180 | atomic_long_sub(nr_pages, &mce_bad_pages); | 
| 1150 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); | 1181 | pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); | 
| @@ -1162,9 +1193,9 @@ int unpoison_memory(unsigned long pfn) | |||
| 1162 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); | 1193 | pr_info("MCE: Software-unpoisoned page %#lx\n", pfn); | 
| 1163 | atomic_long_sub(nr_pages, &mce_bad_pages); | 1194 | atomic_long_sub(nr_pages, &mce_bad_pages); | 
| 1164 | freeit = 1; | 1195 | freeit = 1; | 
| 1196 | if (PageHuge(page)) | ||
| 1197 | clear_page_hwpoison_huge_page(page); | ||
| 1165 | } | 1198 | } | 
| 1166 | if (PageHuge(p)) | ||
| 1167 | clear_page_hwpoison_huge_page(page); | ||
| 1168 | unlock_page(page); | 1199 | unlock_page(page); | 
| 1169 | 1200 | ||
| 1170 | put_page(page); | 1201 | put_page(page); | 
| @@ -1178,7 +1209,11 @@ EXPORT_SYMBOL(unpoison_memory); | |||
| 1178 | static struct page *new_page(struct page *p, unsigned long private, int **x) | 1209 | static struct page *new_page(struct page *p, unsigned long private, int **x) | 
| 1179 | { | 1210 | { | 
| 1180 | int nid = page_to_nid(p); | 1211 | int nid = page_to_nid(p); | 
| 1181 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | 1212 | if (PageHuge(p)) | 
| 1213 | return alloc_huge_page_node(page_hstate(compound_head(p)), | ||
| 1214 | nid); | ||
| 1215 | else | ||
| 1216 | return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); | ||
| 1182 | } | 1217 | } | 
| 1183 | 1218 | ||
| 1184 | /* | 1219 | /* | 
| @@ -1206,8 +1241,15 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
| 1206 | * was free. | 1241 | * was free. | 
| 1207 | */ | 1242 | */ | 
| 1208 | set_migratetype_isolate(p); | 1243 | set_migratetype_isolate(p); | 
| 1244 | /* | ||
| 1245 | * When the target page is a free hugepage, just remove it | ||
| 1246 | * from free hugepage list. | ||
| 1247 | */ | ||
| 1209 | if (!get_page_unless_zero(compound_head(p))) { | 1248 | if (!get_page_unless_zero(compound_head(p))) { | 
| 1210 | if (is_free_buddy_page(p)) { | 1249 | if (PageHuge(p)) { | 
| 1250 | pr_info("get_any_page: %#lx free huge page\n", pfn); | ||
| 1251 | ret = dequeue_hwpoisoned_huge_page(compound_head(p)); | ||
| 1252 | } else if (is_free_buddy_page(p)) { | ||
| 1211 | pr_info("get_any_page: %#lx free buddy page\n", pfn); | 1253 | pr_info("get_any_page: %#lx free buddy page\n", pfn); | 
| 1212 | /* Set hwpoison bit while page is still isolated */ | 1254 | /* Set hwpoison bit while page is still isolated */ | 
| 1213 | SetPageHWPoison(p); | 1255 | SetPageHWPoison(p); | 
| @@ -1226,6 +1268,45 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
| 1226 | return ret; | 1268 | return ret; | 
| 1227 | } | 1269 | } | 
| 1228 | 1270 | ||
| 1271 | static int soft_offline_huge_page(struct page *page, int flags) | ||
| 1272 | { | ||
| 1273 | int ret; | ||
| 1274 | unsigned long pfn = page_to_pfn(page); | ||
| 1275 | struct page *hpage = compound_head(page); | ||
| 1276 | LIST_HEAD(pagelist); | ||
| 1277 | |||
| 1278 | ret = get_any_page(page, pfn, flags); | ||
| 1279 | if (ret < 0) | ||
| 1280 | return ret; | ||
| 1281 | if (ret == 0) | ||
| 1282 | goto done; | ||
| 1283 | |||
| 1284 | if (PageHWPoison(hpage)) { | ||
| 1285 | put_page(hpage); | ||
| 1286 | pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn); | ||
| 1287 | return -EBUSY; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | /* Keep page count to indicate a given hugepage is isolated. */ | ||
| 1291 | |||
| 1292 | list_add(&hpage->lru, &pagelist); | ||
| 1293 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0); | ||
| 1294 | if (ret) { | ||
| 1295 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | ||
| 1296 | pfn, ret, page->flags); | ||
| 1297 | if (ret > 0) | ||
| 1298 | ret = -EIO; | ||
| 1299 | return ret; | ||
| 1300 | } | ||
| 1301 | done: | ||
| 1302 | if (!PageHWPoison(hpage)) | ||
| 1303 | atomic_long_add(1 << compound_order(hpage), &mce_bad_pages); | ||
| 1304 | set_page_hwpoison_huge_page(hpage); | ||
| 1305 | dequeue_hwpoisoned_huge_page(hpage); | ||
| 1306 | /* keep elevated page count for bad page */ | ||
| 1307 | return ret; | ||
| 1308 | } | ||
| 1309 | |||
| 1229 | /** | 1310 | /** | 
| 1230 | * soft_offline_page - Soft offline a page. | 1311 | * soft_offline_page - Soft offline a page. | 
| 1231 | * @page: page to offline | 1312 | * @page: page to offline | 
| @@ -1253,6 +1334,9 @@ int soft_offline_page(struct page *page, int flags) | |||
| 1253 | int ret; | 1334 | int ret; | 
| 1254 | unsigned long pfn = page_to_pfn(page); | 1335 | unsigned long pfn = page_to_pfn(page); | 
| 1255 | 1336 | ||
| 1337 | if (PageHuge(page)) | ||
| 1338 | return soft_offline_huge_page(page, flags); | ||
| 1339 | |||
| 1256 | ret = get_any_page(page, pfn, flags); | 1340 | ret = get_any_page(page, pfn, flags); | 
| 1257 | if (ret < 0) | 1341 | if (ret < 0) | 
| 1258 | return ret; | 1342 | return ret; | 
| diff --git a/mm/memory.c b/mm/memory.c index 98b58fecedef..af82741caaa4 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -1450,7 +1450,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
| 1450 | if (ret & VM_FAULT_OOM) | 1450 | if (ret & VM_FAULT_OOM) | 
| 1451 | return i ? i : -ENOMEM; | 1451 | return i ? i : -ENOMEM; | 
| 1452 | if (ret & | 1452 | if (ret & | 
| 1453 | (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS)) | 1453 | (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE| | 
| 1454 | VM_FAULT_SIGBUS)) | ||
| 1454 | return i ? i : -EFAULT; | 1455 | return i ? i : -EFAULT; | 
| 1455 | BUG(); | 1456 | BUG(); | 
| 1456 | } | 1457 | } | 
| diff --git a/mm/migrate.c b/mm/migrate.c index 38e7cad782f4..f8c9bccf2520 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/security.h> | 32 | #include <linux/security.h> | 
| 33 | #include <linux/memcontrol.h> | 33 | #include <linux/memcontrol.h> | 
| 34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> | 
| 35 | #include <linux/hugetlb.h> | ||
| 35 | #include <linux/gfp.h> | 36 | #include <linux/gfp.h> | 
| 36 | 37 | ||
| 37 | #include "internal.h" | 38 | #include "internal.h" | 
| @@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
| 95 | pte_t *ptep, pte; | 96 | pte_t *ptep, pte; | 
| 96 | spinlock_t *ptl; | 97 | spinlock_t *ptl; | 
| 97 | 98 | ||
| 98 | pgd = pgd_offset(mm, addr); | 99 | if (unlikely(PageHuge(new))) { | 
| 99 | if (!pgd_present(*pgd)) | 100 | ptep = huge_pte_offset(mm, addr); | 
| 100 | goto out; | 101 | if (!ptep) | 
| 102 | goto out; | ||
| 103 | ptl = &mm->page_table_lock; | ||
| 104 | } else { | ||
| 105 | pgd = pgd_offset(mm, addr); | ||
| 106 | if (!pgd_present(*pgd)) | ||
| 107 | goto out; | ||
| 101 | 108 | ||
| 102 | pud = pud_offset(pgd, addr); | 109 | pud = pud_offset(pgd, addr); | 
| 103 | if (!pud_present(*pud)) | 110 | if (!pud_present(*pud)) | 
| 104 | goto out; | 111 | goto out; | 
| 105 | 112 | ||
| 106 | pmd = pmd_offset(pud, addr); | 113 | pmd = pmd_offset(pud, addr); | 
| 107 | if (!pmd_present(*pmd)) | 114 | if (!pmd_present(*pmd)) | 
| 108 | goto out; | 115 | goto out; | 
| 109 | 116 | ||
| 110 | ptep = pte_offset_map(pmd, addr); | 117 | ptep = pte_offset_map(pmd, addr); | 
| 111 | 118 | ||
| 112 | if (!is_swap_pte(*ptep)) { | 119 | if (!is_swap_pte(*ptep)) { | 
| 113 | pte_unmap(ptep); | 120 | pte_unmap(ptep); | 
| 114 | goto out; | 121 | goto out; | 
| 115 | } | 122 | } | 
| 123 | |||
| 124 | ptl = pte_lockptr(mm, pmd); | ||
| 125 | } | ||
| 116 | 126 | ||
| 117 | ptl = pte_lockptr(mm, pmd); | ||
| 118 | spin_lock(ptl); | 127 | spin_lock(ptl); | 
| 119 | pte = *ptep; | 128 | pte = *ptep; | 
| 120 | if (!is_swap_pte(pte)) | 129 | if (!is_swap_pte(pte)) | 
| @@ -130,10 +139,19 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
| 130 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 
| 131 | if (is_write_migration_entry(entry)) | 140 | if (is_write_migration_entry(entry)) | 
| 132 | pte = pte_mkwrite(pte); | 141 | pte = pte_mkwrite(pte); | 
| 142 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 143 | if (PageHuge(new)) | ||
| 144 | pte = pte_mkhuge(pte); | ||
| 145 | #endif | ||
| 133 | flush_cache_page(vma, addr, pte_pfn(pte)); | 146 | flush_cache_page(vma, addr, pte_pfn(pte)); | 
| 134 | set_pte_at(mm, addr, ptep, pte); | 147 | set_pte_at(mm, addr, ptep, pte); | 
| 135 | 148 | ||
| 136 | if (PageAnon(new)) | 149 | if (PageHuge(new)) { | 
| 150 | if (PageAnon(new)) | ||
| 151 | hugepage_add_anon_rmap(new, vma, addr); | ||
| 152 | else | ||
| 153 | page_dup_rmap(new); | ||
| 154 | } else if (PageAnon(new)) | ||
| 137 | page_add_anon_rmap(new, vma, addr); | 155 | page_add_anon_rmap(new, vma, addr); | 
| 138 | else | 156 | else | 
| 139 | page_add_file_rmap(new); | 157 | page_add_file_rmap(new); | 
| @@ -276,11 +294,59 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
| 276 | } | 294 | } | 
| 277 | 295 | ||
| 278 | /* | 296 | /* | 
| 297 | * The expected number of remaining references is the same as that | ||
| 298 | * of migrate_page_move_mapping(). | ||
| 299 | */ | ||
| 300 | int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
| 301 | struct page *newpage, struct page *page) | ||
| 302 | { | ||
| 303 | int expected_count; | ||
| 304 | void **pslot; | ||
| 305 | |||
| 306 | if (!mapping) { | ||
| 307 | if (page_count(page) != 1) | ||
| 308 | return -EAGAIN; | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | |||
| 312 | spin_lock_irq(&mapping->tree_lock); | ||
| 313 | |||
| 314 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | ||
| 315 | page_index(page)); | ||
| 316 | |||
| 317 | expected_count = 2 + page_has_private(page); | ||
| 318 | if (page_count(page) != expected_count || | ||
| 319 | (struct page *)radix_tree_deref_slot(pslot) != page) { | ||
| 320 | spin_unlock_irq(&mapping->tree_lock); | ||
| 321 | return -EAGAIN; | ||
| 322 | } | ||
| 323 | |||
| 324 | if (!page_freeze_refs(page, expected_count)) { | ||
| 325 | spin_unlock_irq(&mapping->tree_lock); | ||
| 326 | return -EAGAIN; | ||
| 327 | } | ||
| 328 | |||
| 329 | get_page(newpage); | ||
| 330 | |||
| 331 | radix_tree_replace_slot(pslot, newpage); | ||
| 332 | |||
| 333 | page_unfreeze_refs(page, expected_count); | ||
| 334 | |||
| 335 | __put_page(page); | ||
| 336 | |||
| 337 | spin_unlock_irq(&mapping->tree_lock); | ||
| 338 | return 0; | ||
| 339 | } | ||
| 340 | |||
| 341 | /* | ||
| 279 | * Copy the page to its new location | 342 | * Copy the page to its new location | 
| 280 | */ | 343 | */ | 
| 281 | static void migrate_page_copy(struct page *newpage, struct page *page) | 344 | void migrate_page_copy(struct page *newpage, struct page *page) | 
| 282 | { | 345 | { | 
| 283 | copy_highpage(newpage, page); | 346 | if (PageHuge(page)) | 
| 347 | copy_huge_page(newpage, page); | ||
| 348 | else | ||
| 349 | copy_highpage(newpage, page); | ||
| 284 | 350 | ||
| 285 | if (PageError(page)) | 351 | if (PageError(page)) | 
| 286 | SetPageError(newpage); | 352 | SetPageError(newpage); | 
| @@ -724,6 +790,92 @@ move_newpage: | |||
| 724 | } | 790 | } | 
| 725 | 791 | ||
| 726 | /* | 792 | /* | 
| 793 | * Counterpart of unmap_and_move_page() for hugepage migration. | ||
| 794 | * | ||
| 795 | * This function doesn't wait the completion of hugepage I/O | ||
| 796 | * because there is no race between I/O and migration for hugepage. | ||
| 797 | * Note that currently hugepage I/O occurs only in direct I/O | ||
| 798 | * where no lock is held and PG_writeback is irrelevant, | ||
| 799 | * and writeback status of all subpages are counted in the reference | ||
| 800 | * count of the head page (i.e. if all subpages of a 2MB hugepage are | ||
| 801 | * under direct I/O, the reference of the head page is 512 and a bit more.) | ||
| 802 | * This means that when we try to migrate hugepage whose subpages are | ||
| 803 | * doing direct I/O, some references remain after try_to_unmap() and | ||
| 804 | * hugepage migration fails without data corruption. | ||
| 805 | * | ||
| 806 | * There is also no race when direct I/O is issued on the page under migration, | ||
| 807 | * because then pte is replaced with migration swap entry and direct I/O code | ||
| 808 | * will wait in the page fault for migration to complete. | ||
| 809 | */ | ||
| 810 | static int unmap_and_move_huge_page(new_page_t get_new_page, | ||
| 811 | unsigned long private, struct page *hpage, | ||
| 812 | int force, int offlining) | ||
| 813 | { | ||
| 814 | int rc = 0; | ||
| 815 | int *result = NULL; | ||
| 816 | struct page *new_hpage = get_new_page(hpage, private, &result); | ||
| 817 | int rcu_locked = 0; | ||
| 818 | struct anon_vma *anon_vma = NULL; | ||
| 819 | |||
| 820 | if (!new_hpage) | ||
| 821 | return -ENOMEM; | ||
| 822 | |||
| 823 | rc = -EAGAIN; | ||
| 824 | |||
| 825 | if (!trylock_page(hpage)) { | ||
| 826 | if (!force) | ||
| 827 | goto out; | ||
| 828 | lock_page(hpage); | ||
| 829 | } | ||
| 830 | |||
| 831 | if (PageAnon(hpage)) { | ||
| 832 | rcu_read_lock(); | ||
| 833 | rcu_locked = 1; | ||
| 834 | |||
| 835 | if (page_mapped(hpage)) { | ||
| 836 | anon_vma = page_anon_vma(hpage); | ||
| 837 | atomic_inc(&anon_vma->external_refcount); | ||
| 838 | } | ||
| 839 | } | ||
| 840 | |||
| 841 | try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); | ||
| 842 | |||
| 843 | if (!page_mapped(hpage)) | ||
| 844 | rc = move_to_new_page(new_hpage, hpage, 1); | ||
| 845 | |||
| 846 | if (rc) | ||
| 847 | remove_migration_ptes(hpage, hpage); | ||
| 848 | |||
| 849 | if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, | ||
| 850 | &anon_vma->lock)) { | ||
| 851 | int empty = list_empty(&anon_vma->head); | ||
| 852 | spin_unlock(&anon_vma->lock); | ||
| 853 | if (empty) | ||
| 854 | anon_vma_free(anon_vma); | ||
| 855 | } | ||
| 856 | |||
| 857 | if (rcu_locked) | ||
| 858 | rcu_read_unlock(); | ||
| 859 | out: | ||
| 860 | unlock_page(hpage); | ||
| 861 | |||
| 862 | if (rc != -EAGAIN) { | ||
| 863 | list_del(&hpage->lru); | ||
| 864 | put_page(hpage); | ||
| 865 | } | ||
| 866 | |||
| 867 | put_page(new_hpage); | ||
| 868 | |||
| 869 | if (result) { | ||
| 870 | if (rc) | ||
| 871 | *result = rc; | ||
| 872 | else | ||
| 873 | *result = page_to_nid(new_hpage); | ||
| 874 | } | ||
| 875 | return rc; | ||
| 876 | } | ||
| 877 | |||
| 878 | /* | ||
| 727 | * migrate_pages | 879 | * migrate_pages | 
| 728 | * | 880 | * | 
| 729 | * The function takes one list of pages to migrate and a function | 881 | * The function takes one list of pages to migrate and a function | 
| @@ -788,6 +940,52 @@ out: | |||
| 788 | return nr_failed + retry; | 940 | return nr_failed + retry; | 
| 789 | } | 941 | } | 
| 790 | 942 | ||
| 943 | int migrate_huge_pages(struct list_head *from, | ||
| 944 | new_page_t get_new_page, unsigned long private, int offlining) | ||
| 945 | { | ||
| 946 | int retry = 1; | ||
| 947 | int nr_failed = 0; | ||
| 948 | int pass = 0; | ||
| 949 | struct page *page; | ||
| 950 | struct page *page2; | ||
| 951 | int rc; | ||
| 952 | |||
| 953 | for (pass = 0; pass < 10 && retry; pass++) { | ||
| 954 | retry = 0; | ||
| 955 | |||
| 956 | list_for_each_entry_safe(page, page2, from, lru) { | ||
| 957 | cond_resched(); | ||
| 958 | |||
| 959 | rc = unmap_and_move_huge_page(get_new_page, | ||
| 960 | private, page, pass > 2, offlining); | ||
| 961 | |||
| 962 | switch(rc) { | ||
| 963 | case -ENOMEM: | ||
| 964 | goto out; | ||
| 965 | case -EAGAIN: | ||
| 966 | retry++; | ||
| 967 | break; | ||
| 968 | case 0: | ||
| 969 | break; | ||
| 970 | default: | ||
| 971 | /* Permanent failure */ | ||
| 972 | nr_failed++; | ||
| 973 | break; | ||
| 974 | } | ||
| 975 | } | ||
| 976 | } | ||
| 977 | rc = 0; | ||
| 978 | out: | ||
| 979 | |||
| 980 | list_for_each_entry_safe(page, page2, from, lru) | ||
| 981 | put_page(page); | ||
| 982 | |||
| 983 | if (rc) | ||
| 984 | return rc; | ||
| 985 | |||
| 986 | return nr_failed + retry; | ||
| 987 | } | ||
| 988 | |||
| 791 | #ifdef CONFIG_NUMA | 989 | #ifdef CONFIG_NUMA | 
| 792 | /* | 990 | /* | 
| 793 | * Move a list of individual pages | 991 | * Move a list of individual pages | 
| @@ -780,10 +780,10 @@ void page_move_anon_rmap(struct page *page, | |||
| 780 | } | 780 | } | 
| 781 | 781 | ||
| 782 | /** | 782 | /** | 
| 783 | * __page_set_anon_rmap - setup new anonymous rmap | 783 | * __page_set_anon_rmap - set up new anonymous rmap | 
| 784 | * @page: the page to add the mapping to | 784 | * @page: Page to add to rmap | 
| 785 | * @vma: the vm area in which the mapping is added | 785 | * @vma: VM area to add page to. | 
| 786 | * @address: the user virtual address mapped | 786 | * @address: User virtual address of the mapping | 
| 787 | * @exclusive: the page is exclusively owned by the current process | 787 | * @exclusive: the page is exclusively owned by the current process | 
| 788 | */ | 788 | */ | 
| 789 | static void __page_set_anon_rmap(struct page *page, | 789 | static void __page_set_anon_rmap(struct page *page, | 
| @@ -793,25 +793,16 @@ static void __page_set_anon_rmap(struct page *page, | |||
| 793 | 793 | ||
| 794 | BUG_ON(!anon_vma); | 794 | BUG_ON(!anon_vma); | 
| 795 | 795 | ||
| 796 | if (PageAnon(page)) | ||
| 797 | return; | ||
| 798 | |||
| 796 | /* | 799 | /* | 
| 797 | * If the page isn't exclusively mapped into this vma, | 800 | * If the page isn't exclusively mapped into this vma, | 
| 798 | * we must use the _oldest_ possible anon_vma for the | 801 | * we must use the _oldest_ possible anon_vma for the | 
| 799 | * page mapping! | 802 | * page mapping! | 
| 800 | */ | 803 | */ | 
| 801 | if (!exclusive) { | 804 | if (!exclusive) | 
| 802 | if (PageAnon(page)) | ||
| 803 | return; | ||
| 804 | anon_vma = anon_vma->root; | 805 | anon_vma = anon_vma->root; | 
| 805 | } else { | ||
| 806 | /* | ||
| 807 | * In this case, swapped-out-but-not-discarded swap-cache | ||
| 808 | * is remapped. So, no need to update page->mapping here. | ||
| 809 | * We convice anon_vma poitned by page->mapping is not obsolete | ||
| 810 | * because vma->anon_vma is necessary to be a family of it. | ||
| 811 | */ | ||
| 812 | if (PageAnon(page)) | ||
| 813 | return; | ||
| 814 | } | ||
| 815 | 806 | ||
| 816 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | 807 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | 
| 817 | page->mapping = (struct address_space *) anon_vma; | 808 | page->mapping = (struct address_space *) anon_vma; | 
