diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 104 |
1 files changed, 102 insertions, 2 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b61d2db9f34e..cc5be788a39f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
19 | #include <linux/sysfs.h> | 19 | #include <linux/sysfs.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/rmap.h> | ||
22 | #include <linux/swap.h> | ||
23 | #include <linux/swapops.h> | ||
21 | 24 | ||
22 | #include <asm/page.h> | 25 | #include <asm/page.h> |
23 | #include <asm/pgtable.h> | 26 | #include <asm/pgtable.h> |
@@ -220,6 +223,12 @@ static pgoff_t vma_hugecache_offset(struct hstate *h, | |||
220 | (vma->vm_pgoff >> huge_page_order(h)); | 223 | (vma->vm_pgoff >> huge_page_order(h)); |
221 | } | 224 | } |
222 | 225 | ||
226 | pgoff_t linear_hugepage_index(struct vm_area_struct *vma, | ||
227 | unsigned long address) | ||
228 | { | ||
229 | return vma_hugecache_offset(hstate_vma(vma), vma, address); | ||
230 | } | ||
231 | |||
223 | /* | 232 | /* |
224 | * Return the size of the pages allocated when backing a VMA. In the majority | 233 | * Return the size of the pages allocated when backing a VMA. In the majority |
225 | * cases this will be same size as used by the page table entries. | 234 | * cases this will be same size as used by the page table entries. |
@@ -552,6 +561,7 @@ static void free_huge_page(struct page *page) | |||
552 | set_page_private(page, 0); | 561 | set_page_private(page, 0); |
553 | page->mapping = NULL; | 562 | page->mapping = NULL; |
554 | BUG_ON(page_count(page)); | 563 | BUG_ON(page_count(page)); |
564 | BUG_ON(page_mapcount(page)); | ||
555 | INIT_LIST_HEAD(&page->lru); | 565 | INIT_LIST_HEAD(&page->lru); |
556 | 566 | ||
557 | spin_lock(&hugetlb_lock); | 567 | spin_lock(&hugetlb_lock); |
@@ -605,6 +615,8 @@ int PageHuge(struct page *page) | |||
605 | return dtor == free_huge_page; | 615 | return dtor == free_huge_page; |
606 | } | 616 | } |
607 | 617 | ||
618 | EXPORT_SYMBOL_GPL(PageHuge); | ||
619 | |||
608 | static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) | 620 | static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) |
609 | { | 621 | { |
610 | struct page *page; | 622 | struct page *page; |
@@ -2129,6 +2141,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2129 | entry = huge_ptep_get(src_pte); | 2141 | entry = huge_ptep_get(src_pte); |
2130 | ptepage = pte_page(entry); | 2142 | ptepage = pte_page(entry); |
2131 | get_page(ptepage); | 2143 | get_page(ptepage); |
2144 | page_dup_rmap(ptepage); | ||
2132 | set_huge_pte_at(dst, addr, dst_pte, entry); | 2145 | set_huge_pte_at(dst, addr, dst_pte, entry); |
2133 | } | 2146 | } |
2134 | spin_unlock(&src->page_table_lock); | 2147 | spin_unlock(&src->page_table_lock); |
@@ -2140,6 +2153,19 @@ nomem: | |||
2140 | return -ENOMEM; | 2153 | return -ENOMEM; |
2141 | } | 2154 | } |
2142 | 2155 | ||
2156 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | ||
2157 | { | ||
2158 | swp_entry_t swp; | ||
2159 | |||
2160 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2161 | return 0; | ||
2162 | swp = pte_to_swp_entry(pte); | ||
2163 | if (non_swap_entry(swp) && is_hwpoison_entry(swp)) { | ||
2164 | return 1; | ||
2165 | } else | ||
2166 | return 0; | ||
2167 | } | ||
2168 | |||
2143 | void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | 2169 | void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, |
2144 | unsigned long end, struct page *ref_page) | 2170 | unsigned long end, struct page *ref_page) |
2145 | { | 2171 | { |
@@ -2198,6 +2224,12 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
2198 | if (huge_pte_none(pte)) | 2224 | if (huge_pte_none(pte)) |
2199 | continue; | 2225 | continue; |
2200 | 2226 | ||
2227 | /* | ||
2228 | * HWPoisoned hugepage is already unmapped and dropped reference | ||
2229 | */ | ||
2230 | if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) | ||
2231 | continue; | ||
2232 | |||
2201 | page = pte_page(pte); | 2233 | page = pte_page(pte); |
2202 | if (pte_dirty(pte)) | 2234 | if (pte_dirty(pte)) |
2203 | set_page_dirty(page); | 2235 | set_page_dirty(page); |
@@ -2207,6 +2239,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
2207 | flush_tlb_range(vma, start, end); | 2239 | flush_tlb_range(vma, start, end); |
2208 | mmu_notifier_invalidate_range_end(mm, start, end); | 2240 | mmu_notifier_invalidate_range_end(mm, start, end); |
2209 | list_for_each_entry_safe(page, tmp, &page_list, lru) { | 2241 | list_for_each_entry_safe(page, tmp, &page_list, lru) { |
2242 | page_remove_rmap(page); | ||
2210 | list_del(&page->lru); | 2243 | list_del(&page->lru); |
2211 | put_page(page); | 2244 | put_page(page); |
2212 | } | 2245 | } |
@@ -2272,6 +2305,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2272 | return 1; | 2305 | return 1; |
2273 | } | 2306 | } |
2274 | 2307 | ||
2308 | /* | ||
2309 | * Hugetlb_cow() should be called with page lock of the original hugepage held. | ||
2310 | */ | ||
2275 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | 2311 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, |
2276 | unsigned long address, pte_t *ptep, pte_t pte, | 2312 | unsigned long address, pte_t *ptep, pte_t pte, |
2277 | struct page *pagecache_page) | 2313 | struct page *pagecache_page) |
@@ -2286,8 +2322,13 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2286 | retry_avoidcopy: | 2322 | retry_avoidcopy: |
2287 | /* If no-one else is actually using this page, avoid the copy | 2323 | /* If no-one else is actually using this page, avoid the copy |
2288 | * and just make the page writable */ | 2324 | * and just make the page writable */ |
2289 | avoidcopy = (page_count(old_page) == 1); | 2325 | avoidcopy = (page_mapcount(old_page) == 1); |
2290 | if (avoidcopy) { | 2326 | if (avoidcopy) { |
2327 | if (!trylock_page(old_page)) { | ||
2328 | if (PageAnon(old_page)) | ||
2329 | page_move_anon_rmap(old_page, vma, address); | ||
2330 | } else | ||
2331 | unlock_page(old_page); | ||
2291 | set_huge_ptep_writable(vma, address, ptep); | 2332 | set_huge_ptep_writable(vma, address, ptep); |
2292 | return 0; | 2333 | return 0; |
2293 | } | 2334 | } |
@@ -2338,6 +2379,13 @@ retry_avoidcopy: | |||
2338 | return -PTR_ERR(new_page); | 2379 | return -PTR_ERR(new_page); |
2339 | } | 2380 | } |
2340 | 2381 | ||
2382 | /* | ||
2383 | * When the original hugepage is shared one, it does not have | ||
2384 | * anon_vma prepared. | ||
2385 | */ | ||
2386 | if (unlikely(anon_vma_prepare(vma))) | ||
2387 | return VM_FAULT_OOM; | ||
2388 | |||
2341 | copy_huge_page(new_page, old_page, address, vma); | 2389 | copy_huge_page(new_page, old_page, address, vma); |
2342 | __SetPageUptodate(new_page); | 2390 | __SetPageUptodate(new_page); |
2343 | 2391 | ||
@@ -2355,6 +2403,8 @@ retry_avoidcopy: | |||
2355 | huge_ptep_clear_flush(vma, address, ptep); | 2403 | huge_ptep_clear_flush(vma, address, ptep); |
2356 | set_huge_pte_at(mm, address, ptep, | 2404 | set_huge_pte_at(mm, address, ptep, |
2357 | make_huge_pte(vma, new_page, 1)); | 2405 | make_huge_pte(vma, new_page, 1)); |
2406 | page_remove_rmap(old_page); | ||
2407 | hugepage_add_anon_rmap(new_page, vma, address); | ||
2358 | /* Make the old page be freed below */ | 2408 | /* Make the old page be freed below */ |
2359 | new_page = old_page; | 2409 | new_page = old_page; |
2360 | mmu_notifier_invalidate_range_end(mm, | 2410 | mmu_notifier_invalidate_range_end(mm, |
@@ -2458,10 +2508,29 @@ retry: | |||
2458 | spin_lock(&inode->i_lock); | 2508 | spin_lock(&inode->i_lock); |
2459 | inode->i_blocks += blocks_per_huge_page(h); | 2509 | inode->i_blocks += blocks_per_huge_page(h); |
2460 | spin_unlock(&inode->i_lock); | 2510 | spin_unlock(&inode->i_lock); |
2511 | page_dup_rmap(page); | ||
2461 | } else { | 2512 | } else { |
2462 | lock_page(page); | 2513 | lock_page(page); |
2463 | page->mapping = HUGETLB_POISON; | 2514 | if (unlikely(anon_vma_prepare(vma))) { |
2515 | ret = VM_FAULT_OOM; | ||
2516 | goto backout_unlocked; | ||
2517 | } | ||
2518 | hugepage_add_new_anon_rmap(page, vma, address); | ||
2464 | } | 2519 | } |
2520 | } else { | ||
2521 | page_dup_rmap(page); | ||
2522 | } | ||
2523 | |||
2524 | /* | ||
2525 | * Since memory error handler replaces pte into hwpoison swap entry | ||
2526 | * at the time of error handling, a process which reserved but not have | ||
2527 | * the mapping to the error hugepage does not have hwpoison swap entry. | ||
2528 | * So we need to block accesses from such a process by checking | ||
2529 | * PG_hwpoison bit here. | ||
2530 | */ | ||
2531 | if (unlikely(PageHWPoison(page))) { | ||
2532 | ret = VM_FAULT_HWPOISON; | ||
2533 | goto backout_unlocked; | ||
2465 | } | 2534 | } |
2466 | 2535 | ||
2467 | /* | 2536 | /* |
@@ -2513,10 +2582,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2513 | pte_t *ptep; | 2582 | pte_t *ptep; |
2514 | pte_t entry; | 2583 | pte_t entry; |
2515 | int ret; | 2584 | int ret; |
2585 | struct page *page = NULL; | ||
2516 | struct page *pagecache_page = NULL; | 2586 | struct page *pagecache_page = NULL; |
2517 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); | 2587 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); |
2518 | struct hstate *h = hstate_vma(vma); | 2588 | struct hstate *h = hstate_vma(vma); |
2519 | 2589 | ||
2590 | ptep = huge_pte_offset(mm, address); | ||
2591 | if (ptep) { | ||
2592 | entry = huge_ptep_get(ptep); | ||
2593 | if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | ||
2594 | return VM_FAULT_HWPOISON; | ||
2595 | } | ||
2596 | |||
2520 | ptep = huge_pte_alloc(mm, address, huge_page_size(h)); | 2597 | ptep = huge_pte_alloc(mm, address, huge_page_size(h)); |
2521 | if (!ptep) | 2598 | if (!ptep) |
2522 | return VM_FAULT_OOM; | 2599 | return VM_FAULT_OOM; |
@@ -2554,6 +2631,11 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2554 | vma, address); | 2631 | vma, address); |
2555 | } | 2632 | } |
2556 | 2633 | ||
2634 | if (!pagecache_page) { | ||
2635 | page = pte_page(entry); | ||
2636 | lock_page(page); | ||
2637 | } | ||
2638 | |||
2557 | spin_lock(&mm->page_table_lock); | 2639 | spin_lock(&mm->page_table_lock); |
2558 | /* Check for a racing update before calling hugetlb_cow */ | 2640 | /* Check for a racing update before calling hugetlb_cow */ |
2559 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) | 2641 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) |
@@ -2579,6 +2661,8 @@ out_page_table_lock: | |||
2579 | if (pagecache_page) { | 2661 | if (pagecache_page) { |
2580 | unlock_page(pagecache_page); | 2662 | unlock_page(pagecache_page); |
2581 | put_page(pagecache_page); | 2663 | put_page(pagecache_page); |
2664 | } else { | ||
2665 | unlock_page(page); | ||
2582 | } | 2666 | } |
2583 | 2667 | ||
2584 | out_mutex: | 2668 | out_mutex: |
@@ -2791,3 +2875,19 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) | |||
2791 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); | 2875 | hugetlb_put_quota(inode->i_mapping, (chg - freed)); |
2792 | hugetlb_acct_memory(h, -(chg - freed)); | 2876 | hugetlb_acct_memory(h, -(chg - freed)); |
2793 | } | 2877 | } |
2878 | |||
2879 | /* | ||
2880 | * This function is called from memory failure code. | ||
2881 | * Assume the caller holds page lock of the head page. | ||
2882 | */ | ||
2883 | void __isolate_hwpoisoned_huge_page(struct page *hpage) | ||
2884 | { | ||
2885 | struct hstate *h = page_hstate(hpage); | ||
2886 | int nid = page_to_nid(hpage); | ||
2887 | |||
2888 | spin_lock(&hugetlb_lock); | ||
2889 | list_del(&hpage->lru); | ||
2890 | h->free_huge_pages--; | ||
2891 | h->free_huge_pages_node[nid]--; | ||
2892 | spin_unlock(&hugetlb_lock); | ||
2893 | } | ||