From 4b2e38ad703541f7845c2d766426148b8d1aa329 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Sat, 18 Oct 2008 20:27:10 -0700 Subject: hugepage: support ZERO_PAGE() Presently hugepage doesn't use zero page at all because zero page is only used for coredumping and hugepage can't core dump. However we have now implemented hugepage coredumping. Therefore we should implement the zero page of hugepage. Implementation note: o Why do we only check VM_SHARED for zero page? normal page checked as .. static inline int use_zero_page(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) return 0; return !vma->vm_ops || !vma->vm_ops->fault; } First, hugepages are never mlock()ed. We aren't concerned with VM_LOCKED. Second, hugetlbfs is a pseudo filesystem, not a real filesystem and it doesn't have any file backing. Thus ops->fault checking is meaningless. o Why don't we use zero page if !pte. !pte indicate {pud, pmd} doesn't exist or some error happened. So we shouldn't return zero page if any error occurred. Signed-off-by: KOSAKI Motohiro Cc: Adam Litke Cc: Hugh Dickins Cc: Kawai Hidehiro Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ab79cd4dd23c..ce8cbb29860b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2071,6 +2071,14 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return NULL; } +static int huge_zeropage_ok(pte_t *ptep, int write, int shared) +{ + if (!ptep || write || shared) + return 0; + else + return huge_pte_none(huge_ptep_get(ptep)); +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, @@ -2080,6 +2088,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr = *position; int remainder = *length; struct hstate *h = hstate_vma(vma); + int zeropage_ok = 0; + int shared = vma->vm_flags & VM_SHARED; spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { @@ -2092,8 +2102,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * first, for the page indexing below to work. */ pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); + if (huge_zeropage_ok(pte, write, shared)) + zeropage_ok = 1; - if (!pte || huge_pte_none(huge_ptep_get(pte)) || + if (!pte || + (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) || (write && !pte_write(huge_ptep_get(pte)))) { int ret; @@ -2113,8 +2126,11 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, page = pte_page(huge_ptep_get(pte)); same_page: if (pages) { - get_page(page); - pages[i] = page + pfn_offset; + if (zeropage_ok) + pages[i] = ZERO_PAGE(0); + else + pages[i] = page + pfn_offset; + get_page(pages[i]); } if (vmas) -- cgit v1.2.2