aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-11-19 07:35:47 -0500
committerMel Gorman <mgorman@suse.de>2012-12-11 09:42:57 -0500
commitb32967ff101a7508f70be8de59b278d4df92fa00 (patch)
treeb106d5eea06f97d0174f483d6a05a8b7ddd64154 /mm/huge_memory.c
parent5bca23035391928c4c7301835accca3551b96cc2 (diff)
mm: numa: Add THP migration for the NUMA working set scanning fault case.
Note: This is very heavily based on a patch from Peter Zijlstra with fixes from Ingo Molnar, Hugh Dickins and Johannes Weiner. That patch put a lot of migration logic into mm/huge_memory.c where it does not belong. This version puts tries to share some of the migration logic with migrate_misplaced_page. However, it should be noted that now migrate.c is doing more with the pagetable manipulation than is preferred. The end result is barely recognisable so as before, the signed-offs had to be removed but will be re-added if the original authors are ok with it. Add THP migration for the NUMA working set scanning fault case. It uses the page lock to serialize. No migration pte dance is necessary because the pte is already unmapped when we decide to migrate. [dhillf@gmail.com: Fix memory leak on isolation failure] [dhillf@gmail.com: Fix transfer of last_nid information] Signed-off-by: Mel Gorman <mgorman@suse.de>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c59
1 files changed, 40 insertions, 19 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 199b261a257e..711baf84b153 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -600,7 +600,7 @@ out:
600} 600}
601__setup("transparent_hugepage=", setup_transparent_hugepage); 601__setup("transparent_hugepage=", setup_transparent_hugepage);
602 602
603static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) 603pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
604{ 604{
605 if (likely(vma->vm_flags & VM_WRITE)) 605 if (likely(vma->vm_flags & VM_WRITE))
606 pmd = pmd_mkwrite(pmd); 606 pmd = pmd_mkwrite(pmd);
@@ -1023,10 +1023,12 @@ out:
1023int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, 1023int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1024 unsigned long addr, pmd_t pmd, pmd_t *pmdp) 1024 unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1025{ 1025{
1026 struct page *page = NULL; 1026 struct page *page;
1027 unsigned long haddr = addr & HPAGE_PMD_MASK; 1027 unsigned long haddr = addr & HPAGE_PMD_MASK;
1028 int target_nid; 1028 int target_nid;
1029 int current_nid = -1; 1029 int current_nid = -1;
1030 bool migrated;
1031 bool page_locked = false;
1030 1032
1031 spin_lock(&mm->page_table_lock); 1033 spin_lock(&mm->page_table_lock);
1032 if (unlikely(!pmd_same(pmd, *pmdp))) 1034 if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,42 +1036,61 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1034 1036
1035 page = pmd_page(pmd); 1037 page = pmd_page(pmd);
1036 get_page(page); 1038 get_page(page);
1037 spin_unlock(&mm->page_table_lock);
1038 current_nid = page_to_nid(page); 1039 current_nid = page_to_nid(page);
1039 count_vm_numa_event(NUMA_HINT_FAULTS); 1040 count_vm_numa_event(NUMA_HINT_FAULTS);
1040 if (current_nid == numa_node_id()) 1041 if (current_nid == numa_node_id())
1041 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 1042 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1042 1043
1043 target_nid = mpol_misplaced(page, vma, haddr); 1044 target_nid = mpol_misplaced(page, vma, haddr);
1044 if (target_nid == -1) 1045 if (target_nid == -1) {
1046 put_page(page);
1045 goto clear_pmdnuma; 1047 goto clear_pmdnuma;
1048 }
1046 1049
1047 /* 1050 /* Acquire the page lock to serialise THP migrations */
1048 * Due to lacking code to migrate thp pages, we'll split 1051 spin_unlock(&mm->page_table_lock);
1049 * (which preserves the special PROT_NONE) and re-take the 1052 lock_page(page);
1050 * fault on the normal pages. 1053 page_locked = true;
1051 */
1052 split_huge_page(page);
1053 put_page(page);
1054
1055 return 0;
1056 1054
1057clear_pmdnuma: 1055 /* Confirm the PTE did not while locked */
1058 spin_lock(&mm->page_table_lock); 1056 spin_lock(&mm->page_table_lock);
1059 if (unlikely(!pmd_same(pmd, *pmdp))) 1057 if (unlikely(!pmd_same(pmd, *pmdp))) {
1058 unlock_page(page);
1059 put_page(page);
1060 goto out_unlock; 1060 goto out_unlock;
1061 }
1062 spin_unlock(&mm->page_table_lock);
1063
1064 /* Migrate the THP to the requested node */
1065 migrated = migrate_misplaced_transhuge_page(mm, vma,
1066 pmdp, pmd, addr,
1067 page, target_nid);
1068 if (migrated)
1069 current_nid = target_nid;
1070 else {
1071 spin_lock(&mm->page_table_lock);
1072 if (unlikely(!pmd_same(pmd, *pmdp))) {
1073 unlock_page(page);
1074 goto out_unlock;
1075 }
1076 goto clear_pmdnuma;
1077 }
1078
1079 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
1080 return 0;
1061 1081
1082clear_pmdnuma:
1062 pmd = pmd_mknonnuma(pmd); 1083 pmd = pmd_mknonnuma(pmd);
1063 set_pmd_at(mm, haddr, pmdp, pmd); 1084 set_pmd_at(mm, haddr, pmdp, pmd);
1064 VM_BUG_ON(pmd_numa(*pmdp)); 1085 VM_BUG_ON(pmd_numa(*pmdp));
1065 update_mmu_cache_pmd(vma, addr, pmdp); 1086 update_mmu_cache_pmd(vma, addr, pmdp);
1087 if (page_locked)
1088 unlock_page(page);
1066 1089
1067out_unlock: 1090out_unlock:
1068 spin_unlock(&mm->page_table_lock); 1091 spin_unlock(&mm->page_table_lock);
1069 if (page) { 1092 if (current_nid != -1)
1070 put_page(page); 1093 task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
1071 task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
1072 }
1073 return 0; 1094 return 0;
1074} 1095}
1075 1096