aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-02 07:33:45 -0400
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:42 -0500
commit4daae3b4b9e49b7e0935499a352f1c59d90287d2 (patch)
tree2ac600b955c89e3b1b2070110a9b7293a4511b19
parent149c33e1c98f83050870514f380902dc6d617bd5 (diff)
mm: mempolicy: Use _PAGE_NUMA to migrate pages
Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but sufficiently different that the signed-off-bys were dropped Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page() pieces into an effective migrate on fault scheme. Note that (on x86) we rely on PROT_NONE pages being !present and avoid the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the page-migration performance. Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r--include/linux/huge_mm.h9
-rw-r--r--mm/huge_memory.c31
-rw-r--r--mm/memory.c32
3 files changed, 60 insertions, 12 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a1d26a98c655..dabb5108d6c0 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page)
160 return page; 160 return page;
161} 161}
162 162
163extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, 163extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
164 pmd_t pmd, pmd_t *pmdp); 164 unsigned long addr, pmd_t pmd, pmd_t *pmdp);
165 165
166#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 166#else /* CONFIG_TRANSPARENT_HUGEPAGE */
167#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) 167#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
200 return 0; 200 return 0;
201} 201}
202 202
203static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, 203static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
204 pmd_t pmd, pmd_t *pmdp) 204 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
205{ 205{
206 return 0;
206} 207}
207 208
208#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 209#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f5f37630c54d..5723b551c023 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -18,6 +18,7 @@
18#include <linux/freezer.h> 18#include <linux/freezer.h>
19#include <linux/mman.h> 19#include <linux/mman.h>
20#include <linux/pagemap.h> 20#include <linux/pagemap.h>
21#include <linux/migrate.h>
21#include <asm/tlb.h> 22#include <asm/tlb.h>
22#include <asm/pgalloc.h> 23#include <asm/pgalloc.h>
23#include "internal.h" 24#include "internal.h"
@@ -1019,17 +1020,39 @@ out:
1019} 1020}
1020 1021
1021/* NUMA hinting page fault entry point for trans huge pmds */ 1022/* NUMA hinting page fault entry point for trans huge pmds */
1022int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, 1023int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1023 pmd_t pmd, pmd_t *pmdp) 1024 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1024{ 1025{
1025 struct page *page; 1026 struct page *page = NULL;
1026 unsigned long haddr = addr & HPAGE_PMD_MASK; 1027 unsigned long haddr = addr & HPAGE_PMD_MASK;
1028 int target_nid;
1027 1029
1028 spin_lock(&mm->page_table_lock); 1030 spin_lock(&mm->page_table_lock);
1029 if (unlikely(!pmd_same(pmd, *pmdp))) 1031 if (unlikely(!pmd_same(pmd, *pmdp)))
1030 goto out_unlock; 1032 goto out_unlock;
1031 1033
1032 page = pmd_page(pmd); 1034 page = pmd_page(pmd);
1035 get_page(page);
1036 spin_unlock(&mm->page_table_lock);
1037
1038 target_nid = mpol_misplaced(page, vma, haddr);
1039 if (target_nid == -1)
1040 goto clear_pmdnuma;
1041
1042 /*
1043 * Due to lacking code to migrate thp pages, we'll split
1044 * (which preserves the special PROT_NONE) and re-take the
1045 * fault on the normal pages.
1046 */
1047 split_huge_page(page);
1048 put_page(page);
1049 return 0;
1050
1051clear_pmdnuma:
1052 spin_lock(&mm->page_table_lock);
1053 if (unlikely(!pmd_same(pmd, *pmdp)))
1054 goto out_unlock;
1055
1033 pmd = pmd_mknonnuma(pmd); 1056 pmd = pmd_mknonnuma(pmd);
1034 set_pmd_at(mm, haddr, pmdp, pmd); 1057 set_pmd_at(mm, haddr, pmdp, pmd);
1035 VM_BUG_ON(pmd_numa(*pmdp)); 1058 VM_BUG_ON(pmd_numa(*pmdp));
@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
1037 1060
1038out_unlock: 1061out_unlock:
1039 spin_unlock(&mm->page_table_lock); 1062 spin_unlock(&mm->page_table_lock);
1063 if (page)
1064 put_page(page);
1040 return 0; 1065 return 0;
1041} 1066}
1042 1067
diff --git a/mm/memory.c b/mm/memory.c
index e30616f2cc3d..d52542680e10 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
57#include <linux/swapops.h> 57#include <linux/swapops.h>
58#include <linux/elf.h> 58#include <linux/elf.h>
59#include <linux/gfp.h> 59#include <linux/gfp.h>
60#include <linux/migrate.h>
60 61
61#include <asm/io.h> 62#include <asm/io.h>
62#include <asm/pgalloc.h> 63#include <asm/pgalloc.h>
@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3451int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 3452int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3452 unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) 3453 unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
3453{ 3454{
3454 struct page *page; 3455 struct page *page = NULL;
3455 spinlock_t *ptl; 3456 spinlock_t *ptl;
3457 int current_nid, target_nid;
3456 3458
3457 /* 3459 /*
3458 * The "pte" at this point cannot be used safely without 3460 * The "pte" at this point cannot be used safely without
@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3465 */ 3467 */
3466 ptl = pte_lockptr(mm, pmd); 3468 ptl = pte_lockptr(mm, pmd);
3467 spin_lock(ptl); 3469 spin_lock(ptl);
3468 if (unlikely(!pte_same(*ptep, pte))) 3470 if (unlikely(!pte_same(*ptep, pte))) {
3469 goto out_unlock; 3471 pte_unmap_unlock(ptep, ptl);
3472 goto out;
3473 }
3474
3470 pte = pte_mknonnuma(pte); 3475 pte = pte_mknonnuma(pte);
3471 set_pte_at(mm, addr, ptep, pte); 3476 set_pte_at(mm, addr, ptep, pte);
3472 update_mmu_cache(vma, addr, ptep); 3477 update_mmu_cache(vma, addr, ptep);
@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3477 return 0; 3482 return 0;
3478 } 3483 }
3479 3484
3480out_unlock: 3485 get_page(page);
3486 current_nid = page_to_nid(page);
3487 target_nid = mpol_misplaced(page, vma, addr);
3481 pte_unmap_unlock(ptep, ptl); 3488 pte_unmap_unlock(ptep, ptl);
3489 if (target_nid == -1) {
3490 /*
3491 * Account for the fault against the current node if it not
3492 * being replaced regardless of where the page is located.
3493 */
3494 current_nid = numa_node_id();
3495 put_page(page);
3496 goto out;
3497 }
3498
3499 /* Migrate to the requested node */
3500 if (migrate_misplaced_page(page, target_nid))
3501 current_nid = target_nid;
3502
3503out:
3482 return 0; 3504 return 0;
3483} 3505}
3484 3506
@@ -3655,7 +3677,7 @@ retry:
3655 barrier(); 3677 barrier();
3656 if (pmd_trans_huge(orig_pmd)) { 3678 if (pmd_trans_huge(orig_pmd)) {
3657 if (pmd_numa(*pmd)) 3679 if (pmd_numa(*pmd))
3658 return do_huge_pmd_numa_page(mm, address, 3680 return do_huge_pmd_numa_page(mm, vma, address,
3659 orig_pmd, pmd); 3681 orig_pmd, pmd);
3660 3682
3661 if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { 3683 if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) {