aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2010-05-27 20:29:20 -0400
committerAndi Kleen <ak@linux.intel.com>2010-08-11 03:22:46 -0400
commit93f70f900da36fbc19c13c2aa04b2e468c8d00fb (patch)
tree7868f891bca0ed18c9806771a68feac0b4010517
parentc9fbdd5f131440981b124883656ea21fb12cde4a (diff)
HWPOISON, hugetlb: isolate corrupted hugepage
If error hugepage is not in-use, we can fully recovery from error by dequeuing it from freelist, so return RECOVERY. Otherwise whether or not we can recovery depends on user processes, so return DELAYED. Dependency: "HWPOISON, hugetlb: enable error handling path for hugepage" Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andrew Morton <akpm@linux-foundation.org> Acked-by: Fengguang Wu <fengguang.wu@intel.com> Signed-off-by: Andi Kleen <ak@linux.intel.com>
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--mm/hugetlb.c16
-rw-r--r--mm/memory-failure.c28
3 files changed, 38 insertions, 8 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e688fd89354d..f479700df61b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -43,6 +43,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
43 struct vm_area_struct *vma, 43 struct vm_area_struct *vma,
44 int acctflags); 44 int acctflags);
45void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); 45void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
46void __isolate_hwpoisoned_huge_page(struct page *page);
46 47
47extern unsigned long hugepages_treat_as_movable; 48extern unsigned long hugepages_treat_as_movable;
48extern const unsigned long hugetlb_zero, hugetlb_infinity; 49extern const unsigned long hugetlb_zero, hugetlb_infinity;
@@ -100,6 +101,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
100#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) 101#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
101#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) 102#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
102#define huge_pte_offset(mm, address) 0 103#define huge_pte_offset(mm, address) 0
104#define __isolate_hwpoisoned_huge_page(page) 0
103 105
104#define hugetlb_change_protection(vma, address, end, newprot) 106#define hugetlb_change_protection(vma, address, end, newprot)
105 107
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index aa3c51739378..8c163f64cf10 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2825,3 +2825,19 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
2825 hugetlb_put_quota(inode->i_mapping, (chg - freed)); 2825 hugetlb_put_quota(inode->i_mapping, (chg - freed));
2826 hugetlb_acct_memory(h, -(chg - freed)); 2826 hugetlb_acct_memory(h, -(chg - freed));
2827} 2827}
2828
2829/*
2830 * This function is called from memory failure code.
2831 * Assume the caller holds page lock of the head page.
2832 */
2833void __isolate_hwpoisoned_huge_page(struct page *hpage)
2834{
2835 struct hstate *h = page_hstate(hpage);
2836 int nid = page_to_nid(hpage);
2837
2838 spin_lock(&hugetlb_lock);
2839 list_del(&hpage->lru);
2840 h->free_huge_pages--;
2841 h->free_huge_pages_node[nid]--;
2842 spin_unlock(&hugetlb_lock);
2843}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 473f15a3356d..d0b420aba726 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -690,17 +690,29 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
690/* 690/*
691 * Huge pages. Needs work. 691 * Huge pages. Needs work.
692 * Issues: 692 * Issues:
693 * No rmap support so we cannot find the original mapper. In theory could walk 693 * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
694 * all MMs and look for the mappings, but that would be non atomic and racy. 694 * To narrow down kill region to one page, we need to break up pmd.
695 * Need rmap for hugepages for this. Alternatively we could employ a heuristic, 695 * - To support soft-offlining for hugepage, we need to support hugepage
696 * like just walking the current process and hoping it has it mapped (that 696 * migration.
697 * should be usually true for the common "shared database cache" case)
698 * Should handle free huge pages and dequeue them too, but this needs to
699 * handle huge page accounting correctly.
700 */ 697 */
701static int me_huge_page(struct page *p, unsigned long pfn) 698static int me_huge_page(struct page *p, unsigned long pfn)
702{ 699{
703 return FAILED; 700 struct page *hpage = compound_head(p);
701 /*
702 * We can safely recover from error on free or reserved (i.e.
703 * not in-use) hugepage by dequeuing it from freelist.
704 * To check whether a hugepage is in-use or not, we can't use
705 * page->lru because it can be used in other hugepage operations,
706 * such as __unmap_hugepage_range() and gather_surplus_pages().
707 * So instead we use page_mapping() and PageAnon().
708 * We assume that this function is called with page lock held,
709 * so there is no race between isolation and mapping/unmapping.
710 */
711 if (!(page_mapping(hpage) || PageAnon(hpage))) {
712 __isolate_hwpoisoned_huge_page(hpage);
713 return RECOVERED;
714 }
715 return DELAYED;
704} 716}
705 717
706/* 718/*