diff options
| author | Chen, Kenneth W <kenneth.w.chen@intel.com> | 2006-03-22 03:09:03 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-22 10:54:04 -0500 |
| commit | d5d4b0aa4e1430d73050babba999365593bdb9d2 (patch) | |
| tree | 67199d156f61217f9493d31aa4a9bfbb9c97412e | |
| parent | bba1e9b2111b14625f670bd07e57fd7ed57ce804 (diff) | |
[PATCH] optimize follow_hugetlb_page
follow_hugetlb_page() walks a range of user virtual address and then fills
in list of struct page * into an array that is passed from the argument
list. It also gets a reference count via get_page(). For compound page,
get_page() actually traverse back to head page via page_private() macro and
then adds a reference count to the head page. Since we are doing a virt to
pte look up, kernel already has a struct page pointer into the head page.
So instead of traverse into the small unit page struct and then follow a
link back to the head page, optimize that with incrementing the reference
count directly on the head page.
The benefit is that we don't take a cache miss on accessing page struct for
the corresponding user address and more importantly, not to pollute the
cache with a "not very useful" round trip of pointer chasing. This adds a
moderate performance gain on an I/O intensive database transaction
workload.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | mm/hugetlb.c | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 075877b1cbc..06699d871a8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -661,10 +661,10 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 661 | struct page **pages, struct vm_area_struct **vmas, | 661 | struct page **pages, struct vm_area_struct **vmas, |
| 662 | unsigned long *position, int *length, int i) | 662 | unsigned long *position, int *length, int i) |
| 663 | { | 663 | { |
| 664 | unsigned long vpfn, vaddr = *position; | 664 | unsigned long pfn_offset; |
| 665 | unsigned long vaddr = *position; | ||
| 665 | int remainder = *length; | 666 | int remainder = *length; |
| 666 | 667 | ||
| 667 | vpfn = vaddr/PAGE_SIZE; | ||
| 668 | spin_lock(&mm->page_table_lock); | 668 | spin_lock(&mm->page_table_lock); |
| 669 | while (vaddr < vma->vm_end && remainder) { | 669 | while (vaddr < vma->vm_end && remainder) { |
| 670 | pte_t *pte; | 670 | pte_t *pte; |
| @@ -692,19 +692,28 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 692 | break; | 692 | break; |
| 693 | } | 693 | } |
| 694 | 694 | ||
| 695 | if (pages) { | 695 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; |
| 696 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 696 | page = pte_page(*pte); |
| 697 | get_page(page); | 697 | same_page: |
| 698 | pages[i] = page; | 698 | get_page(page); |
| 699 | } | 699 | if (pages) |
| 700 | pages[i] = page + pfn_offset; | ||
| 700 | 701 | ||
| 701 | if (vmas) | 702 | if (vmas) |
| 702 | vmas[i] = vma; | 703 | vmas[i] = vma; |
| 703 | 704 | ||
| 704 | vaddr += PAGE_SIZE; | 705 | vaddr += PAGE_SIZE; |
| 705 | ++vpfn; | 706 | ++pfn_offset; |
| 706 | --remainder; | 707 | --remainder; |
| 707 | ++i; | 708 | ++i; |
| 709 | if (vaddr < vma->vm_end && remainder && | ||
| 710 | pfn_offset < HPAGE_SIZE/PAGE_SIZE) { | ||
| 711 | /* | ||
| 712 | * We use pfn_offset to avoid touching the pageframes | ||
| 713 | * of this compound page. | ||
| 714 | */ | ||
| 715 | goto same_page; | ||
| 716 | } | ||
| 708 | } | 717 | } |
| 709 | spin_unlock(&mm->page_table_lock); | 718 | spin_unlock(&mm->page_table_lock); |
| 710 | *length = remainder; | 719 | *length = remainder; |
