diff options
-rw-r--r-- | include/linux/hugetlb.h | 1 | ||||
-rw-r--r-- | include/linux/pagemap.h | 8 | ||||
-rw-r--r-- | include/linux/poison.h | 9 | ||||
-rw-r--r-- | include/linux/rmap.h | 5 | ||||
-rw-r--r-- | mm/hugetlb.c | 44 | ||||
-rw-r--r-- | mm/rmap.c | 59 |
6 files changed, 114 insertions, 12 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d47a7c41745d..e688fd89354d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -99,6 +99,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) | |||
99 | #define is_hugepage_only_range(mm, addr, len) 0 | 99 | #define is_hugepage_only_range(mm, addr, len) 0 |
100 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) | 100 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) |
101 | #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) | 101 | #define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) |
102 | #define huge_pte_offset(mm, address) 0 | ||
102 | 103 | ||
103 | #define hugetlb_change_protection(vma, address, end, newprot) | 104 | #define hugetlb_change_protection(vma, address, end, newprot) |
104 | 105 | ||
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b2bd2bae9775..a547d9689170 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -282,10 +282,16 @@ static inline loff_t page_offset(struct page *page) | |||
282 | return ((loff_t)page->index) << PAGE_CACHE_SHIFT; | 282 | return ((loff_t)page->index) << PAGE_CACHE_SHIFT; |
283 | } | 283 | } |
284 | 284 | ||
285 | extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, | ||
286 | unsigned long address); | ||
287 | |||
285 | static inline pgoff_t linear_page_index(struct vm_area_struct *vma, | 288 | static inline pgoff_t linear_page_index(struct vm_area_struct *vma, |
286 | unsigned long address) | 289 | unsigned long address) |
287 | { | 290 | { |
288 | pgoff_t pgoff = (address - vma->vm_start) >> PAGE_SHIFT; | 291 | pgoff_t pgoff; |
292 | if (unlikely(is_vm_hugetlb_page(vma))) | ||
293 | return linear_hugepage_index(vma, address); | ||
294 | pgoff = (address - vma->vm_start) >> PAGE_SHIFT; | ||
289 | pgoff += vma->vm_pgoff; | 295 | pgoff += vma->vm_pgoff; |
290 | return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 296 | return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); |
291 | } | 297 | } |
diff --git a/include/linux/poison.h b/include/linux/poison.h index 34066ffd893d..2110a81c5e2a 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h | |||
@@ -48,15 +48,6 @@ | |||
48 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ | 48 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ |
49 | #define POISON_END 0xa5 /* end-byte of poisoning */ | 49 | #define POISON_END 0xa5 /* end-byte of poisoning */ |
50 | 50 | ||
51 | /********** mm/hugetlb.c **********/ | ||
52 | /* | ||
53 | * Private mappings of hugetlb pages use this poisoned value for | ||
54 | * page->mapping. The core VM should not be doing anything with this mapping | ||
55 | * but futex requires the existence of some page->mapping value even though it | ||
56 | * is unused if PAGE_MAPPING_ANON is set. | ||
57 | */ | ||
58 | #define HUGETLB_POISON ((void *)(0x00300300 + POISON_POINTER_DELTA + PAGE_MAPPING_ANON)) | ||
59 | |||
60 | /********** arch/$ARCH/mm/init.c **********/ | 51 | /********** arch/$ARCH/mm/init.c **********/ |
61 | #define POISON_FREE_INITMEM 0xcc | 52 | #define POISON_FREE_INITMEM 0xcc |
62 | 53 | ||
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 77216742c178..9d50e7ef5f5a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -140,6 +140,11 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon | |||
140 | void page_add_file_rmap(struct page *); | 140 | void page_add_file_rmap(struct page *); |
141 | void page_remove_rmap(struct page *); | 141 | void page_remove_rmap(struct page *); |
142 | 142 | ||
143 | void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, | ||
144 | unsigned long); | ||
145 | void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, | ||
146 | unsigned long); | ||
147 | |||
143 | static inline void page_dup_rmap(struct page *page) | 148 | static inline void page_dup_rmap(struct page *page) |
144 | { | 149 | { |
145 | atomic_inc(&page->_mapcount); | 150 | atomic_inc(&page->_mapcount); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 54d42b009dbe..aa3c51739378 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
19 | #include <linux/sysfs.h> | 19 | #include <linux/sysfs.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/rmap.h> | ||
21 | 22 | ||
22 | #include <asm/page.h> | 23 | #include <asm/page.h> |
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
@@ -220,6 +221,12 @@ static pgoff_t vma_hugecache_offset(struct hstate *h, | |||
220 | (vma->vm_pgoff >> huge_page_order(h)); | 221 | (vma->vm_pgoff >> huge_page_order(h)); |
221 | } | 222 | } |
222 | 223 | ||
224 | pgoff_t linear_hugepage_index(struct vm_area_struct *vma, | ||
225 | unsigned long address) | ||
226 | { | ||
227 | return vma_hugecache_offset(hstate_vma(vma), vma, address); | ||
228 | } | ||
229 | |||
223 | /* | 230 | /* |
224 | * Return the size of the pages allocated when backing a VMA. In the majority | 231 | * Return the size of the pages allocated when backing a VMA. In the majority |
225 | * cases this will be same size as used by the page table entries. | 232 | * cases this will be same size as used by the page table entries. |
@@ -552,6 +559,7 @@ static void free_huge_page(struct page *page) | |||
552 | set_page_private(page, 0); | 559 | set_page_private(page, 0); |
553 | page->mapping = NULL; | 560 | page->mapping = NULL; |
554 | BUG_ON(page_count(page)); | 561 | BUG_ON(page_count(page)); |
562 | BUG_ON(page_mapcount(page)); | ||
555 | INIT_LIST_HEAD(&page->lru); | 563 | INIT_LIST_HEAD(&page->lru); |
556 | 564 | ||
557 | spin_lock(&hugetlb_lock); | 565 | spin_lock(&hugetlb_lock); |
@@ -2129,6 +2137,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2129 | entry = huge_ptep_get(src_pte); | 2137 | entry = huge_ptep_get(src_pte); |
2130 | ptepage = pte_page(entry); | 2138 | ptepage = pte_page(entry); |
2131 | get_page(ptepage); | 2139 | get_page(ptepage); |
2140 | page_dup_rmap(ptepage); | ||
2132 | set_huge_pte_at(dst, addr, dst_pte, entry); | 2141 | set_huge_pte_at(dst, addr, dst_pte, entry); |
2133 | } | 2142 | } |
2134 | spin_unlock(&src->page_table_lock); | 2143 | spin_unlock(&src->page_table_lock); |
@@ -2207,6 +2216,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
2207 | flush_tlb_range(vma, start, end); | 2216 | flush_tlb_range(vma, start, end); |
2208 | mmu_notifier_invalidate_range_end(mm, start, end); | 2217 | mmu_notifier_invalidate_range_end(mm, start, end); |
2209 | list_for_each_entry_safe(page, tmp, &page_list, lru) { | 2218 | list_for_each_entry_safe(page, tmp, &page_list, lru) { |
2219 | page_remove_rmap(page); | ||
2210 | list_del(&page->lru); | 2220 | list_del(&page->lru); |
2211 | put_page(page); | 2221 | put_page(page); |
2212 | } | 2222 | } |
@@ -2272,6 +2282,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2272 | return 1; | 2282 | return 1; |
2273 | } | 2283 | } |
2274 | 2284 | ||
2285 | /* | ||
2286 | * Hugetlb_cow() should be called with page lock of the original hugepage held. | ||
2287 | */ | ||
2275 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | 2288 | static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, |
2276 | unsigned long address, pte_t *ptep, pte_t pte, | 2289 | unsigned long address, pte_t *ptep, pte_t pte, |
2277 | struct page *pagecache_page) | 2290 | struct page *pagecache_page) |
@@ -2286,8 +2299,11 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2286 | retry_avoidcopy: | 2299 | retry_avoidcopy: |
2287 | /* If no-one else is actually using this page, avoid the copy | 2300 | /* If no-one else is actually using this page, avoid the copy |
2288 | * and just make the page writable */ | 2301 | * and just make the page writable */ |
2289 | avoidcopy = (page_count(old_page) == 1); | 2302 | avoidcopy = (page_mapcount(old_page) == 1); |
2290 | if (avoidcopy) { | 2303 | if (avoidcopy) { |
2304 | if (!trylock_page(old_page)) | ||
2305 | if (PageAnon(old_page)) | ||
2306 | page_move_anon_rmap(old_page, vma, address); | ||
2291 | set_huge_ptep_writable(vma, address, ptep); | 2307 | set_huge_ptep_writable(vma, address, ptep); |
2292 | return 0; | 2308 | return 0; |
2293 | } | 2309 | } |
@@ -2338,6 +2354,13 @@ retry_avoidcopy: | |||
2338 | return -PTR_ERR(new_page); | 2354 | return -PTR_ERR(new_page); |
2339 | } | 2355 | } |
2340 | 2356 | ||
2357 | /* | ||
2358 | * When the original hugepage is shared one, it does not have | ||
2359 | * anon_vma prepared. | ||
2360 | */ | ||
2361 | if (unlikely(anon_vma_prepare(vma))) | ||
2362 | return VM_FAULT_OOM; | ||
2363 | |||
2341 | copy_huge_page(new_page, old_page, address, vma); | 2364 | copy_huge_page(new_page, old_page, address, vma); |
2342 | __SetPageUptodate(new_page); | 2365 | __SetPageUptodate(new_page); |
2343 | 2366 | ||
@@ -2352,6 +2375,8 @@ retry_avoidcopy: | |||
2352 | huge_ptep_clear_flush(vma, address, ptep); | 2375 | huge_ptep_clear_flush(vma, address, ptep); |
2353 | set_huge_pte_at(mm, address, ptep, | 2376 | set_huge_pte_at(mm, address, ptep, |
2354 | make_huge_pte(vma, new_page, 1)); | 2377 | make_huge_pte(vma, new_page, 1)); |
2378 | page_remove_rmap(old_page); | ||
2379 | hugepage_add_anon_rmap(new_page, vma, address); | ||
2355 | /* Make the old page be freed below */ | 2380 | /* Make the old page be freed below */ |
2356 | new_page = old_page; | 2381 | new_page = old_page; |
2357 | } | 2382 | } |
@@ -2452,10 +2477,17 @@ retry: | |||
2452 | spin_lock(&inode->i_lock); | 2477 | spin_lock(&inode->i_lock); |
2453 | inode->i_blocks += blocks_per_huge_page(h); | 2478 | inode->i_blocks += blocks_per_huge_page(h); |
2454 | spin_unlock(&inode->i_lock); | 2479 | spin_unlock(&inode->i_lock); |
2480 | page_dup_rmap(page); | ||
2455 | } else { | 2481 | } else { |
2456 | lock_page(page); | 2482 | lock_page(page); |
2457 | page->mapping = HUGETLB_POISON; | 2483 | if (unlikely(anon_vma_prepare(vma))) { |
2484 | ret = VM_FAULT_OOM; | ||
2485 | goto backout_unlocked; | ||
2486 | } | ||
2487 | hugepage_add_new_anon_rmap(page, vma, address); | ||
2458 | } | 2488 | } |
2489 | } else { | ||
2490 | page_dup_rmap(page); | ||
2459 | } | 2491 | } |
2460 | 2492 | ||
2461 | /* | 2493 | /* |
@@ -2507,6 +2539,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2507 | pte_t *ptep; | 2539 | pte_t *ptep; |
2508 | pte_t entry; | 2540 | pte_t entry; |
2509 | int ret; | 2541 | int ret; |
2542 | struct page *page = NULL; | ||
2510 | struct page *pagecache_page = NULL; | 2543 | struct page *pagecache_page = NULL; |
2511 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); | 2544 | static DEFINE_MUTEX(hugetlb_instantiation_mutex); |
2512 | struct hstate *h = hstate_vma(vma); | 2545 | struct hstate *h = hstate_vma(vma); |
@@ -2548,6 +2581,11 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2548 | vma, address); | 2581 | vma, address); |
2549 | } | 2582 | } |
2550 | 2583 | ||
2584 | if (!pagecache_page) { | ||
2585 | page = pte_page(entry); | ||
2586 | lock_page(page); | ||
2587 | } | ||
2588 | |||
2551 | spin_lock(&mm->page_table_lock); | 2589 | spin_lock(&mm->page_table_lock); |
2552 | /* Check for a racing update before calling hugetlb_cow */ | 2590 | /* Check for a racing update before calling hugetlb_cow */ |
2553 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) | 2591 | if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) |
@@ -2573,6 +2611,8 @@ out_page_table_lock: | |||
2573 | if (pagecache_page) { | 2611 | if (pagecache_page) { |
2574 | unlock_page(pagecache_page); | 2612 | unlock_page(pagecache_page); |
2575 | put_page(pagecache_page); | 2613 | put_page(pagecache_page); |
2614 | } else { | ||
2615 | unlock_page(page); | ||
2576 | } | 2616 | } |
2577 | 2617 | ||
2578 | out_mutex: | 2618 | out_mutex: |
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/memcontrol.h> | 56 | #include <linux/memcontrol.h> |
57 | #include <linux/mmu_notifier.h> | 57 | #include <linux/mmu_notifier.h> |
58 | #include <linux/migrate.h> | 58 | #include <linux/migrate.h> |
59 | #include <linux/hugetlb.h> | ||
59 | 60 | ||
60 | #include <asm/tlbflush.h> | 61 | #include <asm/tlbflush.h> |
61 | 62 | ||
@@ -326,6 +327,8 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
326 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 327 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); |
327 | unsigned long address; | 328 | unsigned long address; |
328 | 329 | ||
330 | if (unlikely(is_vm_hugetlb_page(vma))) | ||
331 | pgoff = page->index << huge_page_order(page_hstate(page)); | ||
329 | address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 332 | address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
330 | if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { | 333 | if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { |
331 | /* page should be within @vma mapping range */ | 334 | /* page should be within @vma mapping range */ |
@@ -369,6 +372,12 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
369 | pte_t *pte; | 372 | pte_t *pte; |
370 | spinlock_t *ptl; | 373 | spinlock_t *ptl; |
371 | 374 | ||
375 | if (unlikely(PageHuge(page))) { | ||
376 | pte = huge_pte_offset(mm, address); | ||
377 | ptl = &mm->page_table_lock; | ||
378 | goto check; | ||
379 | } | ||
380 | |||
372 | pgd = pgd_offset(mm, address); | 381 | pgd = pgd_offset(mm, address); |
373 | if (!pgd_present(*pgd)) | 382 | if (!pgd_present(*pgd)) |
374 | return NULL; | 383 | return NULL; |
@@ -389,6 +398,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, | |||
389 | } | 398 | } |
390 | 399 | ||
391 | ptl = pte_lockptr(mm, pmd); | 400 | ptl = pte_lockptr(mm, pmd); |
401 | check: | ||
392 | spin_lock(ptl); | 402 | spin_lock(ptl); |
393 | if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { | 403 | if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { |
394 | *ptlp = ptl; | 404 | *ptlp = ptl; |
@@ -873,6 +883,12 @@ void page_remove_rmap(struct page *page) | |||
873 | page_clear_dirty(page); | 883 | page_clear_dirty(page); |
874 | set_page_dirty(page); | 884 | set_page_dirty(page); |
875 | } | 885 | } |
886 | /* | ||
887 | * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED | ||
888 | * and not charged by memcg for now. | ||
889 | */ | ||
890 | if (unlikely(PageHuge(page))) | ||
891 | return; | ||
876 | if (PageAnon(page)) { | 892 | if (PageAnon(page)) { |
877 | mem_cgroup_uncharge_page(page); | 893 | mem_cgroup_uncharge_page(page); |
878 | __dec_zone_page_state(page, NR_ANON_PAGES); | 894 | __dec_zone_page_state(page, NR_ANON_PAGES); |
@@ -1445,3 +1461,46 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *, | |||
1445 | return rmap_walk_file(page, rmap_one, arg); | 1461 | return rmap_walk_file(page, rmap_one, arg); |
1446 | } | 1462 | } |
1447 | #endif /* CONFIG_MIGRATION */ | 1463 | #endif /* CONFIG_MIGRATION */ |
1464 | |||
1465 | #ifdef CONFIG_HUGETLBFS | ||
1466 | /* | ||
1467 | * The following three functions are for anonymous (private mapped) hugepages. | ||
1468 | * Unlike common anonymous pages, anonymous hugepages have no accounting code | ||
1469 | * and no lru code, because we handle hugepages differently from common pages. | ||
1470 | */ | ||
1471 | static void __hugepage_set_anon_rmap(struct page *page, | ||
1472 | struct vm_area_struct *vma, unsigned long address, int exclusive) | ||
1473 | { | ||
1474 | struct anon_vma *anon_vma = vma->anon_vma; | ||
1475 | BUG_ON(!anon_vma); | ||
1476 | if (!exclusive) { | ||
1477 | struct anon_vma_chain *avc; | ||
1478 | avc = list_entry(vma->anon_vma_chain.prev, | ||
1479 | struct anon_vma_chain, same_vma); | ||
1480 | anon_vma = avc->anon_vma; | ||
1481 | } | ||
1482 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | ||
1483 | page->mapping = (struct address_space *) anon_vma; | ||
1484 | page->index = linear_page_index(vma, address); | ||
1485 | } | ||
1486 | |||
1487 | void hugepage_add_anon_rmap(struct page *page, | ||
1488 | struct vm_area_struct *vma, unsigned long address) | ||
1489 | { | ||
1490 | struct anon_vma *anon_vma = vma->anon_vma; | ||
1491 | int first; | ||
1492 | BUG_ON(!anon_vma); | ||
1493 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); | ||
1494 | first = atomic_inc_and_test(&page->_mapcount); | ||
1495 | if (first) | ||
1496 | __hugepage_set_anon_rmap(page, vma, address, 0); | ||
1497 | } | ||
1498 | |||
1499 | void hugepage_add_new_anon_rmap(struct page *page, | ||
1500 | struct vm_area_struct *vma, unsigned long address) | ||
1501 | { | ||
1502 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); | ||
1503 | atomic_set(&page->_mapcount, 0); | ||
1504 | __hugepage_set_anon_rmap(page, vma, address, 1); | ||
1505 | } | ||
1506 | #endif /* CONFIG_HUGETLBFS */ | ||