aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c104
1 files changed, 102 insertions, 2 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b61d2db9f34e..cc5be788a39f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,9 @@
18#include <linux/bootmem.h> 18#include <linux/bootmem.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/rmap.h>
22#include <linux/swap.h>
23#include <linux/swapops.h>
21 24
22#include <asm/page.h> 25#include <asm/page.h>
23#include <asm/pgtable.h> 26#include <asm/pgtable.h>
@@ -220,6 +223,12 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,
220 (vma->vm_pgoff >> huge_page_order(h)); 223 (vma->vm_pgoff >> huge_page_order(h));
221} 224}
222 225
226pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
227 unsigned long address)
228{
229 return vma_hugecache_offset(hstate_vma(vma), vma, address);
230}
231
223/* 232/*
224 * Return the size of the pages allocated when backing a VMA. In the majority 233 * Return the size of the pages allocated when backing a VMA. In the majority
225 * cases this will be same size as used by the page table entries. 234 * cases this will be same size as used by the page table entries.
@@ -552,6 +561,7 @@ static void free_huge_page(struct page *page)
552 set_page_private(page, 0); 561 set_page_private(page, 0);
553 page->mapping = NULL; 562 page->mapping = NULL;
554 BUG_ON(page_count(page)); 563 BUG_ON(page_count(page));
564 BUG_ON(page_mapcount(page));
555 INIT_LIST_HEAD(&page->lru); 565 INIT_LIST_HEAD(&page->lru);
556 566
557 spin_lock(&hugetlb_lock); 567 spin_lock(&hugetlb_lock);
@@ -605,6 +615,8 @@ int PageHuge(struct page *page)
605 return dtor == free_huge_page; 615 return dtor == free_huge_page;
606} 616}
607 617
618EXPORT_SYMBOL_GPL(PageHuge);
619
608static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) 620static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
609{ 621{
610 struct page *page; 622 struct page *page;
@@ -2129,6 +2141,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
2129 entry = huge_ptep_get(src_pte); 2141 entry = huge_ptep_get(src_pte);
2130 ptepage = pte_page(entry); 2142 ptepage = pte_page(entry);
2131 get_page(ptepage); 2143 get_page(ptepage);
2144 page_dup_rmap(ptepage);
2132 set_huge_pte_at(dst, addr, dst_pte, entry); 2145 set_huge_pte_at(dst, addr, dst_pte, entry);
2133 } 2146 }
2134 spin_unlock(&src->page_table_lock); 2147 spin_unlock(&src->page_table_lock);
@@ -2140,6 +2153,19 @@ nomem:
2140 return -ENOMEM; 2153 return -ENOMEM;
2141} 2154}
2142 2155
2156static int is_hugetlb_entry_hwpoisoned(pte_t pte)
2157{
2158 swp_entry_t swp;
2159
2160 if (huge_pte_none(pte) || pte_present(pte))
2161 return 0;
2162 swp = pte_to_swp_entry(pte);
2163 if (non_swap_entry(swp) && is_hwpoison_entry(swp)) {
2164 return 1;
2165 } else
2166 return 0;
2167}
2168
2143void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 2169void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2144 unsigned long end, struct page *ref_page) 2170 unsigned long end, struct page *ref_page)
2145{ 2171{
@@ -2198,6 +2224,12 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2198 if (huge_pte_none(pte)) 2224 if (huge_pte_none(pte))
2199 continue; 2225 continue;
2200 2226
2227 /*
2228 * HWPoisoned hugepage is already unmapped and dropped reference
2229 */
2230 if (unlikely(is_hugetlb_entry_hwpoisoned(pte)))
2231 continue;
2232
2201 page = pte_page(pte); 2233 page = pte_page(pte);
2202 if (pte_dirty(pte)) 2234 if (pte_dirty(pte))
2203 set_page_dirty(page); 2235 set_page_dirty(page);
@@ -2207,6 +2239,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
2207 flush_tlb_range(vma, start, end); 2239 flush_tlb_range(vma, start, end);
2208 mmu_notifier_invalidate_range_end(mm, start, end); 2240 mmu_notifier_invalidate_range_end(mm, start, end);
2209 list_for_each_entry_safe(page, tmp, &page_list, lru) { 2241 list_for_each_entry_safe(page, tmp, &page_list, lru) {
2242 page_remove_rmap(page);
2210 list_del(&page->lru); 2243 list_del(&page->lru);
2211 put_page(page); 2244 put_page(page);
2212 } 2245 }
@@ -2272,6 +2305,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
2272 return 1; 2305 return 1;
2273} 2306}
2274 2307
2308/*
2309 * Hugetlb_cow() should be called with page lock of the original hugepage held.
2310 */
2275static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, 2311static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
2276 unsigned long address, pte_t *ptep, pte_t pte, 2312 unsigned long address, pte_t *ptep, pte_t pte,
2277 struct page *pagecache_page) 2313 struct page *pagecache_page)
@@ -2286,8 +2322,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
2286retry_avoidcopy: 2322retry_avoidcopy:
2287 /* If no-one else is actually using this page, avoid the copy 2323 /* If no-one else is actually using this page, avoid the copy
2288 * and just make the page writable */ 2324 * and just make the page writable */
2289 avoidcopy = (page_count(old_page) == 1); 2325 avoidcopy = (page_mapcount(old_page) == 1);
2290 if (avoidcopy) { 2326 if (avoidcopy) {
2327 if (!trylock_page(old_page)) {
2328 if (PageAnon(old_page))
2329 page_move_anon_rmap(old_page, vma, address);
2330 } else
2331 unlock_page(old_page);
2291 set_huge_ptep_writable(vma, address, ptep); 2332 set_huge_ptep_writable(vma, address, ptep);
2292 return 0; 2333 return 0;
2293 } 2334 }
@@ -2338,6 +2379,13 @@ retry_avoidcopy:
2338 return -PTR_ERR(new_page); 2379 return -PTR_ERR(new_page);
2339 } 2380 }
2340 2381
2382 /*
2383 * When the original hugepage is shared one, it does not have
2384 * anon_vma prepared.
2385 */
2386 if (unlikely(anon_vma_prepare(vma)))
2387 return VM_FAULT_OOM;
2388
2341 copy_huge_page(new_page, old_page, address, vma); 2389 copy_huge_page(new_page, old_page, address, vma);
2342 __SetPageUptodate(new_page); 2390 __SetPageUptodate(new_page);
2343 2391
@@ -2355,6 +2403,8 @@ retry_avoidcopy:
2355 huge_ptep_clear_flush(vma, address, ptep); 2403 huge_ptep_clear_flush(vma, address, ptep);
2356 set_huge_pte_at(mm, address, ptep, 2404 set_huge_pte_at(mm, address, ptep,
2357 make_huge_pte(vma, new_page, 1)); 2405 make_huge_pte(vma, new_page, 1));
2406 page_remove_rmap(old_page);
2407 hugepage_add_anon_rmap(new_page, vma, address);
2358 /* Make the old page be freed below */ 2408 /* Make the old page be freed below */
2359 new_page = old_page; 2409 new_page = old_page;
2360 mmu_notifier_invalidate_range_end(mm, 2410 mmu_notifier_invalidate_range_end(mm,
@@ -2458,10 +2508,29 @@ retry:
2458 spin_lock(&inode->i_lock); 2508 spin_lock(&inode->i_lock);
2459 inode->i_blocks += blocks_per_huge_page(h); 2509 inode->i_blocks += blocks_per_huge_page(h);
2460 spin_unlock(&inode->i_lock); 2510 spin_unlock(&inode->i_lock);
2511 page_dup_rmap(page);
2461 } else { 2512 } else {
2462 lock_page(page); 2513 lock_page(page);
2463 page->mapping = HUGETLB_POISON; 2514 if (unlikely(anon_vma_prepare(vma))) {
2515 ret = VM_FAULT_OOM;
2516 goto backout_unlocked;
2517 }
2518 hugepage_add_new_anon_rmap(page, vma, address);
2464 } 2519 }
2520 } else {
2521 page_dup_rmap(page);
2522 }
2523
2524 /*
2525 * Since memory error handler replaces pte into hwpoison swap entry
2526 * at the time of error handling, a process which reserved but not have
2527 * the mapping to the error hugepage does not have hwpoison swap entry.
2528 * So we need to block accesses from such a process by checking
2529 * PG_hwpoison bit here.
2530 */
2531 if (unlikely(PageHWPoison(page))) {
2532 ret = VM_FAULT_HWPOISON;
2533 goto backout_unlocked;
2465 } 2534 }
2466 2535
2467 /* 2536 /*
@@ -2513,10 +2582,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2513 pte_t *ptep; 2582 pte_t *ptep;
2514 pte_t entry; 2583 pte_t entry;
2515 int ret; 2584 int ret;
2585 struct page *page = NULL;
2516 struct page *pagecache_page = NULL; 2586 struct page *pagecache_page = NULL;
2517 static DEFINE_MUTEX(hugetlb_instantiation_mutex); 2587 static DEFINE_MUTEX(hugetlb_instantiation_mutex);
2518 struct hstate *h = hstate_vma(vma); 2588 struct hstate *h = hstate_vma(vma);
2519 2589
2590 ptep = huge_pte_offset(mm, address);
2591 if (ptep) {
2592 entry = huge_ptep_get(ptep);
2593 if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
2594 return VM_FAULT_HWPOISON;
2595 }
2596
2520 ptep = huge_pte_alloc(mm, address, huge_page_size(h)); 2597 ptep = huge_pte_alloc(mm, address, huge_page_size(h));
2521 if (!ptep) 2598 if (!ptep)
2522 return VM_FAULT_OOM; 2599 return VM_FAULT_OOM;
@@ -2554,6 +2631,11 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2554 vma, address); 2631 vma, address);
2555 } 2632 }
2556 2633
2634 if (!pagecache_page) {
2635 page = pte_page(entry);
2636 lock_page(page);
2637 }
2638
2557 spin_lock(&mm->page_table_lock); 2639 spin_lock(&mm->page_table_lock);
2558 /* Check for a racing update before calling hugetlb_cow */ 2640 /* Check for a racing update before calling hugetlb_cow */
2559 if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) 2641 if (unlikely(!pte_same(entry, huge_ptep_get(ptep))))
@@ -2579,6 +2661,8 @@ out_page_table_lock:
2579 if (pagecache_page) { 2661 if (pagecache_page) {
2580 unlock_page(pagecache_page); 2662 unlock_page(pagecache_page);
2581 put_page(pagecache_page); 2663 put_page(pagecache_page);
2664 } else {
2665 unlock_page(page);
2582 } 2666 }
2583 2667
2584out_mutex: 2668out_mutex:
@@ -2791,3 +2875,19 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
2791 hugetlb_put_quota(inode->i_mapping, (chg - freed)); 2875 hugetlb_put_quota(inode->i_mapping, (chg - freed));
2792 hugetlb_acct_memory(h, -(chg - freed)); 2876 hugetlb_acct_memory(h, -(chg - freed));
2793} 2877}
2878
2879/*
2880 * This function is called from memory failure code.
2881 * Assume the caller holds page lock of the head page.
2882 */
2883void __isolate_hwpoisoned_huge_page(struct page *hpage)
2884{
2885 struct hstate *h = page_hstate(hpage);
2886 int nid = page_to_nid(hpage);
2887
2888 spin_lock(&hugetlb_lock);
2889 list_del(&hpage->lru);
2890 h->free_huge_pages--;
2891 h->free_huge_pages_node[nid]--;
2892 spin_unlock(&hugetlb_lock);
2893}