aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-10-25 08:16:32 -0400
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:44 -0500
commit4b10e7d562c90d0a72f324832c26653947a07381 (patch)
tree733e0fc3ac3fdfe27a312bc72e4ffb07bbf0aa56 /mm/mempolicy.c
parentb24f53a0bea38b266d219ee651b22dba727c44ae (diff)
mm: mempolicy: Implement change_prot_numa() in terms of change_protection()
This patch converts change_prot_numa() to use change_protection(). As pte_numa and friends check the PTE bits directly it is necessary for change_protection() to use pmd_mknuma(). Hence the required modifications to change_protection() are a little clumsy but the end result is that most of the numa page table helpers are just one or two instructions. Signed-off-by: Mel Gorman <mgorman@suse.de>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c137
1 files changed, 13 insertions, 124 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 51d3ebd8561e..75d4600a5e92 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -568,134 +568,23 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
568 568
569#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE 569#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
570/* 570/*
571 * Here we search for not shared page mappings (mapcount == 1) and we 571 * This is used to mark a range of virtual addresses to be inaccessible.
572 * set up the pmd/pte_numa on those mappings so the very next access 572 * These are later cleared by a NUMA hinting fault. Depending on these
573 * will fire a NUMA hinting page fault. 573 * faults, pages may be migrated for better NUMA placement.
574 *
575 * This is assuming that NUMA faults are handled using PROT_NONE. If
576 * an architecture makes a different choice, it will need further
577 * changes to the core.
574 */ 578 */
575static int 579unsigned long change_prot_numa(struct vm_area_struct *vma,
576change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma, 580 unsigned long addr, unsigned long end)
577 unsigned long address)
578{
579 pgd_t *pgd;
580 pud_t *pud;
581 pmd_t *pmd;
582 pte_t *pte, *_pte;
583 struct page *page;
584 unsigned long _address, end;
585 spinlock_t *ptl;
586 int ret = 0;
587
588 VM_BUG_ON(address & ~PAGE_MASK);
589
590 pgd = pgd_offset(mm, address);
591 if (!pgd_present(*pgd))
592 goto out;
593
594 pud = pud_offset(pgd, address);
595 if (!pud_present(*pud))
596 goto out;
597
598 pmd = pmd_offset(pud, address);
599 if (pmd_none(*pmd))
600 goto out;
601
602 if (pmd_trans_huge_lock(pmd, vma) == 1) {
603 int page_nid;
604 ret = HPAGE_PMD_NR;
605
606 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
607
608 if (pmd_numa(*pmd)) {
609 spin_unlock(&mm->page_table_lock);
610 goto out;
611 }
612
613 page = pmd_page(*pmd);
614
615 /* only check non-shared pages */
616 if (page_mapcount(page) != 1) {
617 spin_unlock(&mm->page_table_lock);
618 goto out;
619 }
620
621 page_nid = page_to_nid(page);
622
623 if (pmd_numa(*pmd)) {
624 spin_unlock(&mm->page_table_lock);
625 goto out;
626 }
627
628 set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
629 ret += HPAGE_PMD_NR;
630 /* defer TLB flush to lower the overhead */
631 spin_unlock(&mm->page_table_lock);
632 goto out;
633 }
634
635 if (pmd_trans_unstable(pmd))
636 goto out;
637 VM_BUG_ON(!pmd_present(*pmd));
638
639 end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
640 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
641 for (_address = address, _pte = pte; _address < end;
642 _pte++, _address += PAGE_SIZE) {
643 pte_t pteval = *_pte;
644 if (!pte_present(pteval))
645 continue;
646 if (pte_numa(pteval))
647 continue;
648 page = vm_normal_page(vma, _address, pteval);
649 if (unlikely(!page))
650 continue;
651 /* only check non-shared pages */
652 if (page_mapcount(page) != 1)
653 continue;
654
655 set_pte_at(mm, _address, _pte, pte_mknuma(pteval));
656
657 /* defer TLB flush to lower the overhead */
658 ret++;
659 }
660 pte_unmap_unlock(pte, ptl);
661
662 if (ret && !pmd_numa(*pmd)) {
663 spin_lock(&mm->page_table_lock);
664 set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
665 spin_unlock(&mm->page_table_lock);
666 /* defer TLB flush to lower the overhead */
667 }
668
669out:
670 return ret;
671}
672
673/* Assumes mmap_sem is held */
674void
675change_prot_numa(struct vm_area_struct *vma,
676 unsigned long address, unsigned long end)
677{ 581{
678 struct mm_struct *mm = vma->vm_mm; 582 int nr_updated;
679 int progress = 0; 583 BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE);
680
681 while (address < end) {
682 VM_BUG_ON(address < vma->vm_start ||
683 address + PAGE_SIZE > vma->vm_end);
684 584
685 progress += change_prot_numa_range(mm, vma, address); 585 nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
686 address = (address + PMD_SIZE) & PMD_MASK;
687 }
688 586
689 /* 587 return nr_updated;
690 * Flush the TLB for the mm to start the NUMA hinting
691 * page faults after we finish scanning this vma part
692 * if there were any PTE updates
693 */
694 if (progress) {
695 mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
696 flush_tlb_range(vma, address, end);
697 mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
698 }
699} 588}
700#else 589#else
701static unsigned long change_prot_numa(struct vm_area_struct *vma, 590static unsigned long change_prot_numa(struct vm_area_struct *vma,