aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2012-12-12 16:51:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 20:38:31 -0500
commitc5a647d09fe9fc3e0241c89845cf8e6220b916f5 (patch)
tree6d436d79bfb11b202da4de02a7cd1e6e3ee38847 /mm/huge_memory.c
parente180377f1ae48b3cbc559c9875d9b038f7f000c6 (diff)
thp: implement splitting pmd for huge zero page
We can't split huge zero page itself (and it's bug if we try), but we can split the pmd which points to it. On splitting the pmd we create a table with all ptes set to normal zero page. [akpm@linux-foundation.org: fix build error] Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: "H. Peter Anvin" <hpa@linux.intel.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c43
1 files changed, 42 insertions, 1 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de6aa5f3fdd2..ea0e23fd6967 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1616,6 +1616,7 @@ int split_huge_page(struct page *page)
1616 struct anon_vma *anon_vma; 1616 struct anon_vma *anon_vma;
1617 int ret = 1; 1617 int ret = 1;
1618 1618
1619 BUG_ON(is_huge_zero_pfn(page_to_pfn(page)));
1619 BUG_ON(!PageAnon(page)); 1620 BUG_ON(!PageAnon(page));
1620 anon_vma = page_lock_anon_vma(page); 1621 anon_vma = page_lock_anon_vma(page);
1621 if (!anon_vma) 1622 if (!anon_vma)
@@ -2475,24 +2476,64 @@ static int khugepaged(void *none)
2475 return 0; 2476 return 0;
2476} 2477}
2477 2478
2479static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
2480 unsigned long haddr, pmd_t *pmd)
2481{
2482 struct mm_struct *mm = vma->vm_mm;
2483 pgtable_t pgtable;
2484 pmd_t _pmd;
2485 int i;
2486
2487 pmdp_clear_flush(vma, haddr, pmd);
2488 /* leave pmd empty until pte is filled */
2489
2490 pgtable = pgtable_trans_huge_withdraw(mm);
2491 pmd_populate(mm, &_pmd, pgtable);
2492
2493 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
2494 pte_t *pte, entry;
2495 entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
2496 entry = pte_mkspecial(entry);
2497 pte = pte_offset_map(&_pmd, haddr);
2498 VM_BUG_ON(!pte_none(*pte));
2499 set_pte_at(mm, haddr, pte, entry);
2500 pte_unmap(pte);
2501 }
2502 smp_wmb(); /* make pte visible before pmd */
2503 pmd_populate(mm, pmd, pgtable);
2504}
2505
2478void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, 2506void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
2479 pmd_t *pmd) 2507 pmd_t *pmd)
2480{ 2508{
2481 struct page *page; 2509 struct page *page;
2482 unsigned long haddr = address & HPAGE_PMD_MASK;
2483 struct mm_struct *mm = vma->vm_mm; 2510 struct mm_struct *mm = vma->vm_mm;
2511 unsigned long haddr = address & HPAGE_PMD_MASK;
2512 unsigned long mmun_start; /* For mmu_notifiers */
2513 unsigned long mmun_end; /* For mmu_notifiers */
2484 2514
2485 BUG_ON(vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE); 2515 BUG_ON(vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE);
2486 2516
2517 mmun_start = haddr;
2518 mmun_end = haddr + HPAGE_PMD_SIZE;
2519 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2487 spin_lock(&mm->page_table_lock); 2520 spin_lock(&mm->page_table_lock);
2488 if (unlikely(!pmd_trans_huge(*pmd))) { 2521 if (unlikely(!pmd_trans_huge(*pmd))) {
2489 spin_unlock(&mm->page_table_lock); 2522 spin_unlock(&mm->page_table_lock);
2523 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2524 return;
2525 }
2526 if (is_huge_zero_pmd(*pmd)) {
2527 __split_huge_zero_page_pmd(vma, haddr, pmd);
2528 spin_unlock(&mm->page_table_lock);
2529 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2490 return; 2530 return;
2491 } 2531 }
2492 page = pmd_page(*pmd); 2532 page = pmd_page(*pmd);
2493 VM_BUG_ON(!page_count(page)); 2533 VM_BUG_ON(!page_count(page));
2494 get_page(page); 2534 get_page(page);
2495 spin_unlock(&mm->page_table_lock); 2535 spin_unlock(&mm->page_table_lock);
2536 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2496 2537
2497 split_huge_page(page); 2538 split_huge_page(page);
2498 2539