diff options
author | Chen, Kenneth W <kenneth.w.chen@intel.com> | 2006-03-22 03:09:03 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-22 10:54:04 -0500 |
commit | d5d4b0aa4e1430d73050babba999365593bdb9d2 (patch) | |
tree | 67199d156f61217f9493d31aa4a9bfbb9c97412e /mm | |
parent | bba1e9b2111b14625f670bd07e57fd7ed57ce804 (diff) |
[PATCH] optimize follow_hugetlb_page
follow_hugetlb_page() walks a range of user virtual address and then fills
in list of struct page * into an array that is passed from the argument
list. It also gets a reference count via get_page(). For compound page,
get_page() actually traverse back to head page via page_private() macro and
then adds a reference count to the head page. Since we are doing a virt to
pte look up, kernel already has a struct page pointer into the head page.
So instead of traverse into the small unit page struct and then follow a
link back to the head page, optimize that with incrementing the reference
count directly on the head page.
The benefit is that we don't take a cache miss on accessing page struct for
the corresponding user address and more importantly, not to pollute the
cache with a "not very useful" round trip of pointer chasing. This adds a
moderate performance gain on an I/O intensive database transaction
workload.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 075877b1cbc0..06699d871a8e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -661,10 +661,10 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
661 | struct page **pages, struct vm_area_struct **vmas, | 661 | struct page **pages, struct vm_area_struct **vmas, |
662 | unsigned long *position, int *length, int i) | 662 | unsigned long *position, int *length, int i) |
663 | { | 663 | { |
664 | unsigned long vpfn, vaddr = *position; | 664 | unsigned long pfn_offset; |
665 | unsigned long vaddr = *position; | ||
665 | int remainder = *length; | 666 | int remainder = *length; |
666 | 667 | ||
667 | vpfn = vaddr/PAGE_SIZE; | ||
668 | spin_lock(&mm->page_table_lock); | 668 | spin_lock(&mm->page_table_lock); |
669 | while (vaddr < vma->vm_end && remainder) { | 669 | while (vaddr < vma->vm_end && remainder) { |
670 | pte_t *pte; | 670 | pte_t *pte; |
@@ -692,19 +692,28 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
692 | break; | 692 | break; |
693 | } | 693 | } |
694 | 694 | ||
695 | if (pages) { | 695 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; |
696 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 696 | page = pte_page(*pte); |
697 | get_page(page); | 697 | same_page: |
698 | pages[i] = page; | 698 | get_page(page); |
699 | } | 699 | if (pages) |
700 | pages[i] = page + pfn_offset; | ||
700 | 701 | ||
701 | if (vmas) | 702 | if (vmas) |
702 | vmas[i] = vma; | 703 | vmas[i] = vma; |
703 | 704 | ||
704 | vaddr += PAGE_SIZE; | 705 | vaddr += PAGE_SIZE; |
705 | ++vpfn; | 706 | ++pfn_offset; |
706 | --remainder; | 707 | --remainder; |
707 | ++i; | 708 | ++i; |
709 | if (vaddr < vma->vm_end && remainder && | ||
710 | pfn_offset < HPAGE_SIZE/PAGE_SIZE) { | ||
711 | /* | ||
712 | * We use pfn_offset to avoid touching the pageframes | ||
713 | * of this compound page. | ||
714 | */ | ||
715 | goto same_page; | ||
716 | } | ||
708 | } | 717 | } |
709 | spin_unlock(&mm->page_table_lock); | 718 | spin_unlock(&mm->page_table_lock); |
710 | *length = remainder; | 719 | *length = remainder; |