diff options
author | Mel Gorman <mgorman@suse.de> | 2012-11-02 07:33:45 -0400 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 09:42:42 -0500 |
commit | 4daae3b4b9e49b7e0935499a352f1c59d90287d2 (patch) | |
tree | 2ac600b955c89e3b1b2070110a9b7293a4511b19 | |
parent | 149c33e1c98f83050870514f380902dc6d617bd5 (diff) |
mm: mempolicy: Use _PAGE_NUMA to migrate pages
Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages" but
sufficiently different that the signed-off-bys were dropped
Combine our previous _PAGE_NUMA, mpol_misplaced and migrate_misplaced_page()
pieces into an effective migrate on fault scheme.
Note that (on x86) we rely on PROT_NONE pages being !present and avoid
the TLB flush from try_to_unmap(TTU_MIGRATION). This greatly improves the
page-migration performance.
Based-on-work-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r-- | include/linux/huge_mm.h | 9 | ||||
-rw-r--r-- | mm/huge_memory.c | 31 | ||||
-rw-r--r-- | mm/memory.c | 32 |
3 files changed, 60 insertions, 12 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a1d26a98c655..dabb5108d6c0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page *page) | |||
160 | return page; | 160 | return page; |
161 | } | 161 | } |
162 | 162 | ||
163 | extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, | 163 | extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
164 | pmd_t pmd, pmd_t *pmdp); | 164 | unsigned long addr, pmd_t pmd, pmd_t *pmdp); |
165 | 165 | ||
166 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ | 166 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ |
167 | #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) | 167 | #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) |
@@ -200,9 +200,10 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd, | |||
200 | return 0; | 200 | return 0; |
201 | } | 201 | } |
202 | 202 | ||
203 | static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, | 203 | static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
204 | pmd_t pmd, pmd_t *pmdp) | 204 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) |
205 | { | 205 | { |
206 | return 0; | ||
206 | } | 207 | } |
207 | 208 | ||
208 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 209 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f5f37630c54d..5723b551c023 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/freezer.h> | 18 | #include <linux/freezer.h> |
19 | #include <linux/mman.h> | 19 | #include <linux/mman.h> |
20 | #include <linux/pagemap.h> | 20 | #include <linux/pagemap.h> |
21 | #include <linux/migrate.h> | ||
21 | #include <asm/tlb.h> | 22 | #include <asm/tlb.h> |
22 | #include <asm/pgalloc.h> | 23 | #include <asm/pgalloc.h> |
23 | #include "internal.h" | 24 | #include "internal.h" |
@@ -1019,17 +1020,39 @@ out: | |||
1019 | } | 1020 | } |
1020 | 1021 | ||
1021 | /* NUMA hinting page fault entry point for trans huge pmds */ | 1022 | /* NUMA hinting page fault entry point for trans huge pmds */ |
1022 | int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, | 1023 | int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1023 | pmd_t pmd, pmd_t *pmdp) | 1024 | unsigned long addr, pmd_t pmd, pmd_t *pmdp) |
1024 | { | 1025 | { |
1025 | struct page *page; | 1026 | struct page *page = NULL; |
1026 | unsigned long haddr = addr & HPAGE_PMD_MASK; | 1027 | unsigned long haddr = addr & HPAGE_PMD_MASK; |
1028 | int target_nid; | ||
1027 | 1029 | ||
1028 | spin_lock(&mm->page_table_lock); | 1030 | spin_lock(&mm->page_table_lock); |
1029 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1031 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1030 | goto out_unlock; | 1032 | goto out_unlock; |
1031 | 1033 | ||
1032 | page = pmd_page(pmd); | 1034 | page = pmd_page(pmd); |
1035 | get_page(page); | ||
1036 | spin_unlock(&mm->page_table_lock); | ||
1037 | |||
1038 | target_nid = mpol_misplaced(page, vma, haddr); | ||
1039 | if (target_nid == -1) | ||
1040 | goto clear_pmdnuma; | ||
1041 | |||
1042 | /* | ||
1043 | * Due to lacking code to migrate thp pages, we'll split | ||
1044 | * (which preserves the special PROT_NONE) and re-take the | ||
1045 | * fault on the normal pages. | ||
1046 | */ | ||
1047 | split_huge_page(page); | ||
1048 | put_page(page); | ||
1049 | return 0; | ||
1050 | |||
1051 | clear_pmdnuma: | ||
1052 | spin_lock(&mm->page_table_lock); | ||
1053 | if (unlikely(!pmd_same(pmd, *pmdp))) | ||
1054 | goto out_unlock; | ||
1055 | |||
1033 | pmd = pmd_mknonnuma(pmd); | 1056 | pmd = pmd_mknonnuma(pmd); |
1034 | set_pmd_at(mm, haddr, pmdp, pmd); | 1057 | set_pmd_at(mm, haddr, pmdp, pmd); |
1035 | VM_BUG_ON(pmd_numa(*pmdp)); | 1058 | VM_BUG_ON(pmd_numa(*pmdp)); |
@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr, | |||
1037 | 1060 | ||
1038 | out_unlock: | 1061 | out_unlock: |
1039 | spin_unlock(&mm->page_table_lock); | 1062 | spin_unlock(&mm->page_table_lock); |
1063 | if (page) | ||
1064 | put_page(page); | ||
1040 | return 0; | 1065 | return 0; |
1041 | } | 1066 | } |
1042 | 1067 | ||
diff --git a/mm/memory.c b/mm/memory.c index e30616f2cc3d..d52542680e10 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/swapops.h> | 57 | #include <linux/swapops.h> |
58 | #include <linux/elf.h> | 58 | #include <linux/elf.h> |
59 | #include <linux/gfp.h> | 59 | #include <linux/gfp.h> |
60 | #include <linux/migrate.h> | ||
60 | 61 | ||
61 | #include <asm/io.h> | 62 | #include <asm/io.h> |
62 | #include <asm/pgalloc.h> | 63 | #include <asm/pgalloc.h> |
@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3451 | int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | 3452 | int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, |
3452 | unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) | 3453 | unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) |
3453 | { | 3454 | { |
3454 | struct page *page; | 3455 | struct page *page = NULL; |
3455 | spinlock_t *ptl; | 3456 | spinlock_t *ptl; |
3457 | int current_nid, target_nid; | ||
3456 | 3458 | ||
3457 | /* | 3459 | /* |
3458 | * The "pte" at this point cannot be used safely without | 3460 | * The "pte" at this point cannot be used safely without |
@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3465 | */ | 3467 | */ |
3466 | ptl = pte_lockptr(mm, pmd); | 3468 | ptl = pte_lockptr(mm, pmd); |
3467 | spin_lock(ptl); | 3469 | spin_lock(ptl); |
3468 | if (unlikely(!pte_same(*ptep, pte))) | 3470 | if (unlikely(!pte_same(*ptep, pte))) { |
3469 | goto out_unlock; | 3471 | pte_unmap_unlock(ptep, ptl); |
3472 | goto out; | ||
3473 | } | ||
3474 | |||
3470 | pte = pte_mknonnuma(pte); | 3475 | pte = pte_mknonnuma(pte); |
3471 | set_pte_at(mm, addr, ptep, pte); | 3476 | set_pte_at(mm, addr, ptep, pte); |
3472 | update_mmu_cache(vma, addr, ptep); | 3477 | update_mmu_cache(vma, addr, ptep); |
@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3477 | return 0; | 3482 | return 0; |
3478 | } | 3483 | } |
3479 | 3484 | ||
3480 | out_unlock: | 3485 | get_page(page); |
3486 | current_nid = page_to_nid(page); | ||
3487 | target_nid = mpol_misplaced(page, vma, addr); | ||
3481 | pte_unmap_unlock(ptep, ptl); | 3488 | pte_unmap_unlock(ptep, ptl); |
3489 | if (target_nid == -1) { | ||
3490 | /* | ||
3491 | * Account for the fault against the current node if it not | ||
3492 | * being replaced regardless of where the page is located. | ||
3493 | */ | ||
3494 | current_nid = numa_node_id(); | ||
3495 | put_page(page); | ||
3496 | goto out; | ||
3497 | } | ||
3498 | |||
3499 | /* Migrate to the requested node */ | ||
3500 | if (migrate_misplaced_page(page, target_nid)) | ||
3501 | current_nid = target_nid; | ||
3502 | |||
3503 | out: | ||
3482 | return 0; | 3504 | return 0; |
3483 | } | 3505 | } |
3484 | 3506 | ||
@@ -3655,7 +3677,7 @@ retry: | |||
3655 | barrier(); | 3677 | barrier(); |
3656 | if (pmd_trans_huge(orig_pmd)) { | 3678 | if (pmd_trans_huge(orig_pmd)) { |
3657 | if (pmd_numa(*pmd)) | 3679 | if (pmd_numa(*pmd)) |
3658 | return do_huge_pmd_numa_page(mm, address, | 3680 | return do_huge_pmd_numa_page(mm, vma, address, |
3659 | orig_pmd, pmd); | 3681 | orig_pmd, pmd); |
3660 | 3682 | ||
3661 | if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { | 3683 | if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) { |