aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2010-05-27 20:29:16 -0400
committerAndi Kleen <ak@linux.intel.com>2010-08-11 03:21:15 -0400
commit0fe6e20b9c4c53b3e97096ee73a0857f60aad43f (patch)
tree3014636f2ed66fdebecb6f6bab338b39c3543a07 /mm/hugetlb.c
parent8edf344c66a3f214d709dad1421c29d678915b3f (diff)
hugetlb, rmap: add reverse mapping for hugepage
This patch adds reverse mapping feature for hugepage by introducing mapcount for shared/private-mapped hugepage and anon_vma for private-mapped hugepage. While hugepage is not currently swappable, reverse mapping can be useful for memory error handler. Without this patch, memory error handler cannot identify processes using the bad hugepage nor unmap it from them. That is: - for shared hugepage: we can collect processes using a hugepage through pagecache, but can not unmap the hugepage because of the lack of mapcount. - for privately mapped hugepage: we can neither collect processes nor unmap the hugepage. This patch solves these problems. This patch include the bug fix given by commit 23be7468e8, so reverts it. Dependency: "hugetlb: move definition of is_vm_hugetlb_page() to hugepage_inline.h" ChangeLog since May 24. - create hugetlb_inline.h and move is_vm_hugetlb_index() in it. - move functions setting up anon_vma for hugepage into mm/rmap.c. ChangeLog since May 13. - rebased to 2.6.34 - fix logic error (in case that private mapping and shared mapping coexist) - move is_vm_hugetlb_page() into include/linux/mm.h to use this function from linear_page_index() - define and use linear_hugepage_index() instead of compound_order() - use page_move_anon_rmap() in hugetlb_cow() - copy exclusive switch of __set_page_anon_rmap() into hugepage counterpart. - revert commit 24be7468 completely Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Acked-by: Fengguang Wu <fengguang.wu@intel.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andi Kleen <ak@linux.intel.com>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c44
1 files changed, 42 insertions, 2 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 54d42b009dbe..aa3c51739378 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
18#include <linux/bootmem.h> 18#include <linux/bootmem.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/rmap.h>
21 22
22#include <asm/page.h> 23#include <asm/page.h>
23#include <asm/pgtable.h> 24#include <asm/pgtable.h>
@@ -220,6 +221,12 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,
220 (vma->vm_pgoff >> huge_page_order(h)); 221 (vma->vm_pgoff >> huge_page_order(h));
221} 222}
222 223
224pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
225 unsigned long address)
226{
227 return vma_hugecache_offset(hstate_vma(vma), vma, address);
228}
229
223/* 230/*
224 * Return the size of the pages allocated when backing a VMA. In the majority 231 * Return the size of the pages allocated when backing a VMA. In the majority
225 * cases this will be same size as used by the page table entries. 232 * cases this will be same size as used by the page table entries.
@@ -552,6 +559,7 @@ static void free_huge_page(struct page *page)
552 set_page_private(page, 0); 559 set_page_private(page, 0);
553 page->mapping = NULL; 560 page->mapping = NULL;
554 BUG_ON(page_count(page)); 561 BUG_ON(page_count(page));
562 BUG_ON(page_mapcount(page));
555 INIT_LIST_HEAD(&page->lru); 563 INIT_LIST_HEAD(&page->lru);
556 564
557 spin_lock(&hugetlb_lock); 565 spin_lock(&hugetlb_lock);
@@ -2129,6 +2137,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
2129 entry = huge_ptep_get(src_pte); 2137 entry = huge_ptep_get(src_pte);
2130 ptepage = pte_page(entry); 2138 ptepage = pte_page(entry);
2131 get_page(ptepage); 2139 get_page(ptepage);
2140 page_dup_rmap(ptepage);
2132 set_huge_pte_at(dst, addr, dst_pte, entry); 2141 set_huge_pte_at(dst, addr, dst_pte, entry);
2133 } 2142 }
2134 spin_unlock(&src->page_table_lock); 2143 spin_unlock(&src->page_table_lock);
@@ -2207,6 +2216,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2207 flush_tlb_range(vma, start, end); 2216 flush_tlb_range(vma, start, end);
2208 mmu_notifier_invalidate_range_end(mm, start, end); 2217 mmu_notifier_invalidate_range_end(mm, start, end);
2209 list_for_each_entry_safe(page, tmp, &page_list, lru) { 2218 list_for_each_entry_safe(page, tmp, &page_list, lru) {
2219 page_remove_rmap(page);
2210 list_del(&page->lru); 2220 list_del(&page->lru);
2211 put_page(page); 2221 put_page(page);
2212 } 2222 }
@@ -2272,6 +2282,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2272 return 1; 2282 return 1;
2273} 2283}
2274 2284
2285/*
2286 * Hugetlb_cow() should be called with page lock of the original hugepage held.
2287 */
2275static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, 2288static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
2276 unsigned long address, pte_t *ptep, pte_t pte, 2289 unsigned long address, pte_t *ptep, pte_t pte,
2277 struct page *pagecache_page) 2290 struct page *pagecache_page)
@@ -2286,8 +2299,11 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
2286retry_avoidcopy: 2299retry_avoidcopy:
2287 /* If no-one else is actually using this page, avoid the copy 2300 /* If no-one else is actually using this page, avoid the copy
2288 * and just make the page writable */ 2301 * and just make the page writable */
2289 avoidcopy = (page_count(old_page) == 1); 2302 avoidcopy = (page_mapcount(old_page) == 1);
2290 if (avoidcopy) { 2303 if (avoidcopy) {
2304 if (!trylock_page(old_page))
2305 if (PageAnon(old_page))
2306 page_move_anon_rmap(old_page, vma, address);
2291 set_huge_ptep_writable(vma, address, ptep); 2307 set_huge_ptep_writable(vma, address, ptep);
2292 return 0; 2308 return 0;
2293 } 2309 }
@@ -2338,6 +2354,13 @@ retry_avoidcopy:
2338 return -PTR_ERR(new_page); 2354 return -PTR_ERR(new_page);
2339 } 2355 }
2340 2356
2357 /*
2358 * When the original hugepage is shared one, it does not have
2359 * anon_vma prepared.
2360 */
2361 if (unlikely(anon_vma_prepare(vma)))
2362 return VM_FAULT_OOM;
2363
2341 copy_huge_page(new_page, old_page, address, vma); 2364 copy_huge_page(new_page, old_page, address, vma);
2342 __SetPageUptodate(new_page); 2365 __SetPageUptodate(new_page);
2343 2366
@@ -2352,6 +2375,8 @@ retry_avoidcopy:
2352 huge_ptep_clear_flush(vma, address, ptep); 2375 huge_ptep_clear_flush(vma, address, ptep);
2353 set_huge_pte_at(mm, address, ptep, 2376 set_huge_pte_at(mm, address, ptep,
2354 make_huge_pte(vma, new_page, 1)); 2377 make_huge_pte(vma, new_page, 1));
2378 page_remove_rmap(old_page);
2379 hugepage_add_anon_rmap(new_page, vma, address);
2355 /* Make the old page be freed below */ 2380 /* Make the old page be freed below */
2356 new_page = old_page; 2381 new_page = old_page;
2357 } 2382 }
@@ -2452,10 +2477,17 @@ retry:
2452 spin_lock(&inode->i_lock); 2477 spin_lock(&inode->i_lock);
2453 inode->i_blocks += blocks_per_huge_page(h); 2478 inode->i_blocks += blocks_per_huge_page(h);
2454 spin_unlock(&inode->i_lock); 2479 spin_unlock(&inode->i_lock);
2480 page_dup_rmap(page);
2455 } else { 2481 } else {
2456 lock_page(page); 2482 lock_page(page);
2457 page->mapping = HUGETLB_POISON; 2483 if (unlikely(anon_vma_prepare(vma))) {
2484 ret = VM_FAULT_OOM;
2485 goto backout_unlocked;
2486 }
2487 hugepage_add_new_anon_rmap(page, vma, address);
2458 } 2488 }
2489 } else {
2490 page_dup_rmap(page);
2459 } 2491 }
2460 2492
2461 /* 2493 /*
@@ -2507,6 +2539,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2507 pte_t *ptep; 2539 pte_t *ptep;
2508 pte_t entry; 2540 pte_t entry;
2509 int ret; 2541 int ret;
2542 struct page *page = NULL;
2510 struct page *pagecache_page = NULL; 2543 struct page *pagecache_page = NULL;
2511 static DEFINE_MUTEX(hugetlb_instantiation_mutex); 2544 static DEFINE_MUTEX(hugetlb_instantiation_mutex);
2512 struct hstate *h = hstate_vma(vma); 2545 struct hstate *h = hstate_vma(vma);
@@ -2548,6 +2581,11 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2548 vma, address); 2581 vma, address);
2549 } 2582 }
2550 2583
2584 if (!pagecache_page) {
2585 page = pte_page(entry);
2586 lock_page(page);
2587 }
2588
2551 spin_lock(&mm->page_table_lock); 2589 spin_lock(&mm->page_table_lock);
2552 /* Check for a racing update before calling hugetlb_cow */ 2590 /* Check for a racing update before calling hugetlb_cow */
2553 if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) 2591 if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
@@ -2573,6 +2611,8 @@ out_page_table_lock:
2573 if (pagecache_page) { 2611 if (pagecache_page) {
2574 unlock_page(pagecache_page); 2612 unlock_page(pagecache_page);
2575 put_page(pagecache_page); 2613 put_page(pagecache_page);
2614 } else {
2615 unlock_page(page);
2576 } 2616 }
2577 2617
2578out_mutex: 2618out_mutex: