hugepage: support ZERO_PAGE()

Presently hugepage doesn't use zero page at all because zero page is only used for coredumping and hugepage can't core dump. However we have now implemented hugepage coredumping. Therefore we should implement the zero page of hugepage. Implementation note: o Why do we only check VM_SHARED for zero page? normal page checked as .. static inline int use_zero_page(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) return 0; return !vma->vm_ops || !vma->vm_ops->fault; } First, hugepages are never mlock()ed. We aren't concerned with VM_LOCKED. Second, hugetlbfs is a pseudo filesystem, not a real filesystem and it doesn't have any file backing. Thus ops->fault checking is meaningless. o Why don't we use zero page if !pte. !pte indicate {pud, pmd} doesn't exist or some error happened. So we shouldn't return zero page if any error occurred. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Kawai Hidehiro <hidehiro.kawai.ez@hitachi.com> Cc: Mel Gorman <mel@skynet.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> 2008-10-18 23:27:10 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-10-20 11:52:32 -0400
commit: 4b2e38ad703541f7845c2d766426148b8d1aa329 (patch)
tree: aaafbec5325d15c38c382c655120fb6492c11f82 /mm
parent: e575f111dc0f27044e170580e7de50985ab3e011 (diff)
1 files changed, 19 insertions, 3 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ab79cd4dd23c..ce8cbb29860b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2071,6 +2071,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
        return NULL;
 }
+static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
+{
+        if (!ptep || write || shared)
+                return 0;
+        else
+                return huge_pte_none(huge_ptep_get(ptep));
+}
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        struct page **pages, struct vm_area_struct **vmas,
                        unsigned long *position, int *length, int i,
@@ -2080,6 +2088,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long vaddr = *position;
        int remainder = *length;
        struct hstate *h = hstate_vma(vma);
+        int zeropage_ok = 0;
+        int shared = vma->vm_flags & VM_SHARED;
        spin_lock(&mm->page_table_lock);
        while (vaddr < vma->vm_end && remainder) {
@@ -2092,8 +2102,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 * first, for the page indexing below to work.
                 */
                pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+                if (huge_zeropage_ok(pte, write, shared))
+                        zeropage_ok = 1;
-                if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
+                if (!pte ||
+                    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
                    (write && !pte_write(huge_ptep_get(pte)))) {
                        int ret;
@@ -2113,8 +2126,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                page = pte_page(huge_ptep_get(pte));
 same_page:
                if (pages) {
-                        get_page(page);
+                        if (zeropage_ok)
-                        pages[i] = page + pfn_offset;
+                                pages[i] = ZERO_PAGE(0);
+                        else
+                                pages[i] = page + pfn_offset;
+                        get_page(pages[i]);
                }
                if (vmas)
author	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>	2008-10-18 23:27:10 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-10-20 11:52:32 -0400
commit	4b2e38ad703541f7845c2d766426148b8d1aa329 (patch)
tree	aaafbec5325d15c38c382c655120fb6492c11f82 /mm
parent	e575f111dc0f27044e170580e7de50985ab3e011 (diff)