From d00285884c0892bb1310df96bce6056e9ce9b9d9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 22 Mar 2013 15:04:40 -0700 Subject: mm/hugetlb: fix total hugetlbfs pages count when using memory overcommit accouting hugetlb_total_pages is used for overcommit calculations but the current implementation considers only the default hugetlb page size (which is either the first defined hugepage size or the one specified by default_hugepagesz kernel boot parameter). If the system is configured for more than one hugepage size, which is possible since commit a137e1cc6d6e ("hugetlbfs: per mount huge page sizes") then the overcommit estimation done by __vm_enough_memory() (resp. shown by meminfo_proc_show) is not precise - there is an impression of more available/allowed memory. This can lead to an unexpected ENOMEM/EFAULT resp. SIGSEGV when memory is accounted. Testcase: boot: hugepagesz=1G hugepages=1 the default overcommit ratio is 50 before patch: egrep 'CommitLimit' /proc/meminfo CommitLimit: 55434168 kB after patch: egrep 'CommitLimit' /proc/meminfo CommitLimit: 54909880 kB [akpm@linux-foundation.org: coding-style tweak] Signed-off-by: Wanpeng Li Acked-by: Michal Hocko Cc: "Aneesh Kumar K.V" Cc: Hillf Danton Cc: KAMEZAWA Hiroyuki Cc: [3.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0a0be33bb199..ca9a7c6d7e97 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2124,8 +2124,12 @@ int hugetlb_report_node_meminfo(int nid, char *buf) /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ unsigned long hugetlb_total_pages(void) { - struct hstate *h = &default_hstate; - return h->nr_huge_pages * pages_per_huge_page(h); + struct hstate *h; + unsigned long nr_total_pages = 0; + + for_each_hstate(h) + nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h); + return nr_total_pages; } static int hugetlb_acct_memory(struct hstate *h, long delta) -- cgit v1.2.2 From 9cc3a5bd40067b9a0fbd49199d0780463fc2140f Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 17 Apr 2013 15:58:30 -0700 Subject: hugetlbfs: add swap entry check in follow_hugetlb_page() With applying the previous patch "hugetlbfs: stop setting VM_DONTDUMP in initializing vma(VM_HUGETLB)" to reenable hugepage coredump, if a memory error happens on a hugepage and the affected processes try to access the error hugepage, we hit VM_BUG_ON(atomic_read(&page->_count) <= 0) in get_page(). The reason for this bug is that coredump-related code doesn't recognise "hugepage hwpoison entry" with which a pmd entry is replaced when a memory error occurs on a hugepage. In other words, physical address information is stored in different bit layout between hugepage hwpoison entry and pmd entry, so follow_hugetlb_page() which is called in get_dump_page() returns a wrong page from a given address. The expected behavior is like this: absent is_swap_pte FOLL_DUMP Expected behavior ------------------------------------------------------------------- true false false hugetlb_fault false true false hugetlb_fault false false false return page true false true skip page (to avoid allocation) false true true hugetlb_fault false false true return page With this patch, we can call hugetlb_fault() and take proper actions (we wait for migration entries, fail with VM_FAULT_HWPOISON_LARGE for hwpoisoned entries,) and as the result we can dump all hugepages except for hwpoisoned ones. Signed-off-by: Naoya Horiguchi Cc: Rik van Riel Acked-by: Michal Hocko Cc: HATAYAMA Daisuke Acked-by: KOSAKI Motohiro Acked-by: David Rientjes Cc: [2.6.34+?] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'mm/hugetlb.c') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ca9a7c6d7e97..1a12f5b9a0ab 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2961,7 +2961,17 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, break; } - if (absent || + /* + * We need call hugetlb_fault for both hugepages under migration + * (in which case hugetlb_fault waits for the migration,) and + * hwpoisoned hugepages (in which case we need to prevent the + * caller from accessing to them.) In order to do this, we use + * here is_swap_pte instead of is_hugetlb_entry_migration and + * is_hugetlb_entry_hwpoisoned. This is because it simply covers + * both cases, and because we can't follow correct pages + * directly from any kind of swap entries. + */ + if (absent || is_swap_pte(huge_ptep_get(pte)) || ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) { int ret; -- cgit v1.2.2