diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2010-05-27 20:29:20 -0400 |
---|---|---|
committer | Andi Kleen <ak@linux.intel.com> | 2010-08-11 03:22:46 -0400 |
commit | 93f70f900da36fbc19c13c2aa04b2e468c8d00fb (patch) | |
tree | 7868f891bca0ed18c9806771a68feac0b4010517 | |
parent | c9fbdd5f131440981b124883656ea21fb12cde4a (diff) |
HWPOISON, hugetlb: isolate corrupted hugepage
If error hugepage is not in-use, we can fully recovery from error
by dequeuing it from freelist, so return RECOVERY.
Otherwise whether or not we can recovery depends on user processes,
so return DELAYED.
Dependency:
"HWPOISON, hugetlb: enable error handling path for hugepage"
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
-rw-r--r-- | include/linux/hugetlb.h | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 16 | ||||
-rw-r--r-- | mm/memory-failure.c | 28 |
3 files changed, 38 insertions, 8 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e688fd89354d..f479700df61b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -43,6 +43,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, | |||
43 | struct vm_area_struct *vma, | 43 | struct vm_area_struct *vma, |
44 | int acctflags); | 44 | int acctflags); |
45 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); | 45 | void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); |
46 | void __isolate_hwpoisoned_huge_page(struct page *page); | ||
46 | 47 | ||
47 | extern unsigned long hugepages_treat_as_movable; | 48 | extern unsigned long hugepages_treat_as_movable; |
48 | extern const unsigned long hugetlb_zero, hugetlb_infinity; | 49 | extern const unsigned long hugetlb_zero, hugetlb_infinity; |
@@ -100,6 +101,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) | |||
100 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) | 101 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) |
101 | #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) | 102 | #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) |
102 | #define huge_pte_offset(mm, address) 0 | 103 | #define huge_pte_offset(mm, address) 0 |
104 | #define __isolate_hwpoisoned_huge_page(page) 0 | ||
103 | 105 | ||
104 | #define hugetlb_change_protection(vma, address, end, newprot) | 106 | #define hugetlb_change_protection(vma, address, end, newprot) |
105 | 107 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index aa3c51739378..8c163f64cf10 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2825,3 +2825,19 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
2825 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); | 2825 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); |
2826 | hugetlb_acct_memory(h, -(chg - freed)); | 2826 | hugetlb_acct_memory(h, -(chg - freed)); |
2827 | } | 2827 | } |
2828 | |||
2829 | /* | ||
2830 | * This function is called from memory failure code. | ||
2831 | * Assume the caller holds page lock of the head page. | ||
2832 | */ | ||
2833 | void __isolate_hwpoisoned_huge_page(struct page *hpage) | ||
2834 | { | ||
2835 | struct hstate *h = page_hstate(hpage); | ||
2836 | int nid = page_to_nid(hpage); | ||
2837 | |||
2838 | spin_lock(&hugetlb_lock); | ||
2839 | list_del(&hpage->lru); | ||
2840 | h->free_huge_pages--; | ||
2841 | h->free_huge_pages_node[nid]--; | ||
2842 | spin_unlock(&hugetlb_lock); | ||
2843 | } | ||
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 473f15a3356d..d0b420aba726 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -690,17 +690,29 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) | |||
690 | /* | 690 | /* |
691 | * Huge pages. Needs work. | 691 | * Huge pages. Needs work. |
692 | * Issues: | 692 | * Issues: |
693 | * No rmap support so we cannot find the original mapper. In theory could walk | 693 | * - Error on hugepage is contained in hugepage unit (not in raw page unit.) |
694 | * all MMs and look for the mappings, but that would be non atomic and racy. | 694 | * To narrow down kill region to one page, we need to break up pmd. |
695 | * Need rmap for hugepages for this. Alternatively we could employ a heuristic, | 695 | * - To support soft-offlining for hugepage, we need to support hugepage |
696 | * like just walking the current process and hoping it has it mapped (that | 696 | * migration. |
697 | * should be usually true for the common "shared database cache" case) | ||
698 | * Should handle free huge pages and dequeue them too, but this needs to | ||
699 | * handle huge page accounting correctly. | ||
700 | */ | 697 | */ |
701 | static int me_huge_page(struct page *p, unsigned long pfn) | 698 | static int me_huge_page(struct page *p, unsigned long pfn) |
702 | { | 699 | { |
703 | return FAILED; | 700 | struct page *hpage = compound_head(p); |
701 | /* | ||
702 | * We can safely recover from error on free or reserved (i.e. | ||
703 | * not in-use) hugepage by dequeuing it from freelist. | ||
704 | * To check whether a hugepage is in-use or not, we can't use | ||
705 | * page->lru because it can be used in other hugepage operations, | ||
706 | * such as __unmap_hugepage_range() and gather_surplus_pages(). | ||
707 | * So instead we use page_mapping() and PageAnon(). | ||
708 | * We assume that this function is called with page lock held, | ||
709 | * so there is no race between isolation and mapping/unmapping. | ||
710 | */ | ||
711 | if (!(page_mapping(hpage) || PageAnon(hpage))) { | ||
712 | __isolate_hwpoisoned_huge_page(hpage); | ||
713 | return RECOVERED; | ||
714 | } | ||
715 | return DELAYED; | ||
704 | } | 716 | } |
705 | 717 | ||
706 | /* | 718 | /* |