mm: numa: Add THP migration for the NUMA working set scanning fault case.

Note: This is very heavily based on a patch from Peter Zijlstra with fixes from Ingo Molnar, Hugh Dickins and Johannes Weiner. That patch put a lot of migration logic into mm/huge_memory.c where it does not belong. This version puts tries to share some of the migration logic with migrate_misplaced_page. However, it should be noted that now migrate.c is doing more with the pagetable manipulation than is preferred. The end result is barely recognisable so as before, the signed-offs had to be removed but will be re-added if the original authors are ok with it. Add THP migration for the NUMA working set scanning fault case. It uses the page lock to serialize. No migration pte dance is necessary because the pte is already unmapped when we decide to migrate. [dhillf@gmail.com: Fix memory leak on isolation failure] [dhillf@gmail.com: Fix transfer of last_nid information] Signed-off-by: Mel Gorman <mgorman@suse.de>
author: Mel Gorman <mgorman@suse.de> 2012-11-19 07:35:47 -0500
committer: Mel Gorman <mgorman@suse.de> 2012-12-11 09:42:57 -0500
commit: b32967ff101a7508f70be8de59b278d4df92fa00 (patch)
tree: b106d5eea06f97d0174f483d6a05a8b7ddd64154 /mm
parent: 5bca23035391928c4c7301835accca3551b96cc2 (diff)
4 files changed, 240 insertions, 64 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 199b261a257e..711baf84b153 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -600,7 +600,7 @@ out:
 }
 __setup("transparent_hugepage=", setup_transparent_hugepage);
-static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
+pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 {
        if (likely(vma->vm_flags & VM_WRITE))
                pmd = pmd_mkwrite(pmd);
@@ -1023,10 +1023,12 @@ out:
 int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
-        struct page *page = NULL;
+        struct page *page;
        unsigned long haddr = addr & HPAGE_PMD_MASK;
        int target_nid;
        int current_nid = -1;
+        bool migrated;
+        bool page_locked = false;
        spin_lock(&mm->page_table_lock);
        if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,42 +1036,61 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        page = pmd_page(pmd);
        get_page(page);
-        spin_unlock(&mm->page_table_lock);
        current_nid = page_to_nid(page);
        count_vm_numa_event(NUMA_HINT_FAULTS);
        if (current_nid == numa_node_id())
                count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
        target_nid = mpol_misplaced(page, vma, haddr);
-        if (target_nid == -1)
+        if (target_nid == -1) {
+                put_page(page);
                goto clear_pmdnuma;
+        }
-        /*
+        /* Acquire the page lock to serialise THP migrations */
-         * Due to lacking code to migrate thp pages, we'll split
+        spin_unlock(&mm->page_table_lock);
-         * (which preserves the special PROT_NONE) and re-take the
+        lock_page(page);
-         * fault on the normal pages.
+        page_locked = true;
-         */
-        split_huge_page(page);
-        put_page(page);
-        return 0;
-clear_pmdnuma:
+        /* Confirm the PTE did not while locked */
        spin_lock(&mm->page_table_lock);
-        if (unlikely(!pmd_same(pmd, *pmdp)))
+        if (unlikely(!pmd_same(pmd, *pmdp))) {
+                unlock_page(page);
+                put_page(page);
                goto out_unlock;
+        }
+        spin_unlock(&mm->page_table_lock);
+        /* Migrate the THP to the requested node */
+        migrated = migrate_misplaced_transhuge_page(mm, vma,
+                                pmdp, pmd, addr,
+                                page, target_nid);
+        if (migrated)
+                current_nid = target_nid;
+        else {
+                spin_lock(&mm->page_table_lock);
+                if (unlikely(!pmd_same(pmd, *pmdp))) {
+                        unlock_page(page);
+                        goto out_unlock;
+                }
+                goto clear_pmdnuma;
+        }
+        task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
+        return 0;
+clear_pmdnuma:
        pmd = pmd_mknonnuma(pmd);
        set_pmd_at(mm, haddr, pmdp, pmd);
        VM_BUG_ON(pmd_numa(*pmdp));
        update_mmu_cache_pmd(vma, addr, pmdp);
+        if (page_locked)
+                unlock_page(page);
 out_unlock:
        spin_unlock(&mm->page_table_lock);
-        if (page) {
+        if (current_nid != -1)
-                put_page(page);
+                task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
-                task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
-        }
        return 0;
 }
diff --git a/mm/internal.h b/mm/internal.h
index a4fa284f6bc2..7e60ac826f2b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -212,15 +212,18 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 {
        if (TestClearPageMlocked(page)) {
                unsigned long flags;
+                int nr_pages = hpage_nr_pages(page);
                local_irq_save(flags);
-                __dec_zone_page_state(page, NR_MLOCK);
+                __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
                SetPageMlocked(newpage);
-                __inc_zone_page_state(newpage, NR_MLOCK);
+                __mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
                local_irq_restore(flags);
        }
 }
+extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern unsigned long vma_address(struct page *page,
                                 struct vm_area_struct *vma);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dd39ba000b31..d97af9636ab2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3288,15 +3288,18 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
                                  struct mem_cgroup **memcgp)
 {
        struct mem_cgroup *memcg = NULL;
+        unsigned int nr_pages = 1;
        struct page_cgroup *pc;
        enum charge_type ctype;
        *memcgp = NULL;
-        VM_BUG_ON(PageTransHuge(page));
        if (mem_cgroup_disabled())
                return;
+        if (PageTransHuge(page))
+                nr_pages <<= compound_order(page);
        pc = lookup_page_cgroup(page);
        lock_page_cgroup(pc);
        if (PageCgroupUsed(pc)) {
@@ -3358,7 +3361,7 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
         * charged to the res_counter since we plan on replacing the
         * old one and only one page is going to be left afterwards.
         */
-        __mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
+        __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
 }
 /* remove redundant charge if migration failed*/
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a5ce135eef0..c9400960fd52 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 */
 void migrate_page_copy(struct page *newpage, struct page *page)
 {
-        if (PageHuge(page))
+        if (PageHuge(page) || PageTransHuge(page))
                copy_huge_page(newpage, page);
        else
                copy_highpage(newpage, page);
@@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node)
        return true;
 }
-/*
+/* Returns true if the node is migrate rate-limited after the update */
- * Attempt to migrate a misplaced page to the specified destination
+bool numamigrate_update_ratelimit(pg_data_t *pgdat)
- * node. Caller is expected to have an elevated reference count on
- * the page that will be dropped by this function before returning.
- */
-int migrate_misplaced_page(struct page *page, int node)
 {
-        pg_data_t *pgdat = NODE_DATA(node);
+        bool rate_limited = false;
-        int isolated = 0;
-        LIST_HEAD(migratepages);
-        /*
-         * Don't migrate pages that are mapped in multiple processes.
-         * TODO: Handle false sharing detection instead of this hammer
-         */
-        if (page_mapcount(page) != 1) {
-                put_page(page);
-                goto out;
-        }
        /*
         * Rate-limit the amount of data that is being migrated to a node.
@@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node)
                pgdat->numabalancing_migrate_next_window = jiffies +
                        msecs_to_jiffies(migrate_interval_millisecs);
        }
-        if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
+        if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
-                spin_unlock(&pgdat->numabalancing_migrate_lock);
+                rate_limited = true;
-                put_page(page);
+        else
-                goto out;
+                pgdat->numabalancing_migrate_nr_pages++;
-        }
-        pgdat->numabalancing_migrate_nr_pages++;
        spin_unlock(&pgdat->numabalancing_migrate_lock);
+        
+        return rate_limited;
+}
+int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
+{
+        int ret = 0;
        /* Avoid migrating to a node that is nearly full */
        if (migrate_balanced_pgdat(pgdat, 1)) {
@@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node)
                if (isolate_lru_page(page)) {
                        put_page(page);
-                        goto out;
+                        return 0;
                }
-                isolated = 1;
+                /* Page is isolated */
+                ret = 1;
                page_lru = page_is_file_cache(page);
-                inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+                if (!PageTransHuge(page))
-                list_add(&page->lru, &migratepages);
+                        inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
+                else
+                        mod_zone_page_state(page_zone(page),
+                                        NR_ISOLATED_ANON + page_lru,
+                                        HPAGE_PMD_NR);
        }
        /*
@@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node)
         */
        put_page(page);
-        if (isolated) {
+        return ret;
-                int nr_remaining;
+}
-                nr_remaining = migrate_pages(&migratepages,
+/*
-                                alloc_misplaced_dst_page,
+ * Attempt to migrate a misplaced page to the specified destination
-                                node, false, MIGRATE_ASYNC,
+ * node. Caller is expected to have an elevated reference count on
-                                MR_NUMA_MISPLACED);
+ * the page that will be dropped by this function before returning.
-                if (nr_remaining) {
+ */
-                        putback_lru_pages(&migratepages);
+int migrate_misplaced_page(struct page *page, int node)
-                        isolated = 0;
+{
-                } else
+        pg_data_t *pgdat = NODE_DATA(node);
-                        count_vm_numa_event(NUMA_PAGE_MIGRATE);
+        int isolated = 0;
+        int nr_remaining;
+        LIST_HEAD(migratepages);
+        /*
+         * Don't migrate pages that are mapped in multiple processes.
+         * TODO: Handle false sharing detection instead of this hammer
+         */
+        if (page_mapcount(page) != 1) {
+                put_page(page);
+                goto out;
        }
+        /*
+         * Rate-limit the amount of data that is being migrated to a node.
+         * Optimal placement is no good if the memory bus is saturated and
+         * all the time is being spent migrating!
+         */
+        if (numamigrate_update_ratelimit(pgdat)) {
+                put_page(page);
+                goto out;
+        }
+        isolated = numamigrate_isolate_page(pgdat, page);
+        if (!isolated)
+                goto out;
+        list_add(&page->lru, &migratepages);
+        nr_remaining = migrate_pages(&migratepages,
+                        alloc_misplaced_dst_page,
+                        node, false, MIGRATE_ASYNC,
+                        MR_NUMA_MISPLACED);
+        if (nr_remaining) {
+                putback_lru_pages(&migratepages);
+                isolated = 0;
+        } else
+                count_vm_numa_event(NUMA_PAGE_MIGRATE);
        BUG_ON(!list_empty(&migratepages));
 out:
        return isolated;
 }
+int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+                                struct vm_area_struct *vma,
+                                pmd_t *pmd, pmd_t entry,
+                                unsigned long address,
+                                struct page *page, int node)
+{
+        unsigned long haddr = address & HPAGE_PMD_MASK;
+        pg_data_t *pgdat = NODE_DATA(node);
+        int isolated = 0;
+        struct page *new_page = NULL;
+        struct mem_cgroup *memcg = NULL;
+        int page_lru = page_is_file_cache(page);
+        /*
+         * Don't migrate pages that are mapped in multiple processes.
+         * TODO: Handle false sharing detection instead of this hammer
+         */
+        if (page_mapcount(page) != 1)
+                goto out_dropref;
+        /*
+         * Rate-limit the amount of data that is being migrated to a node.
+         * Optimal placement is no good if the memory bus is saturated and
+         * all the time is being spent migrating!
+         */
+        if (numamigrate_update_ratelimit(pgdat))
+                goto out_dropref;
+        new_page = alloc_pages_node(node,
+                (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
+        if (!new_page)
+                goto out_dropref;
+        page_xchg_last_nid(new_page, page_last_nid(page));
+        isolated = numamigrate_isolate_page(pgdat, page);
+        if (!isolated) {
+                put_page(new_page);
+                goto out_keep_locked;
+        }
+        /* Prepare a page as a migration target */
+        __set_page_locked(new_page);
+        SetPageSwapBacked(new_page);
+        /* anon mapping, we can simply copy page->mapping to the new page: */
+        new_page->mapping = page->mapping;
+        new_page->index = page->index;
+        migrate_page_copy(new_page, page);
+        WARN_ON(PageLRU(new_page));
+        /* Recheck the target PMD */
+        spin_lock(&mm->page_table_lock);
+        if (unlikely(!pmd_same(*pmd, entry))) {
+                spin_unlock(&mm->page_table_lock);
+                /* Reverse changes made by migrate_page_copy() */
+                if (TestClearPageActive(new_page))
+                        SetPageActive(page);
+                if (TestClearPageUnevictable(new_page))
+                        SetPageUnevictable(page);
+                mlock_migrate_page(page, new_page);
+                unlock_page(new_page);
+                put_page(new_page);             /* Free it */
+                unlock_page(page);
+                putback_lru_page(page);
+                count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
+                goto out;
+        }
+        /*
+         * Traditional migration needs to prepare the memcg charge
+         * transaction early to prevent the old page from being
+         * uncharged when installing migration entries.  Here we can
+         * save the potential rollback and start the charge transfer
+         * only when migration is already known to end successfully.
+         */
+        mem_cgroup_prepare_migration(page, new_page, &memcg);
+        entry = mk_pmd(new_page, vma->vm_page_prot);
+        entry = pmd_mknonnuma(entry);
+        entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+        entry = pmd_mkhuge(entry);
+        page_add_new_anon_rmap(new_page, vma, haddr);
+        set_pmd_at(mm, haddr, pmd, entry);
+        update_mmu_cache_pmd(vma, address, entry);
+        page_remove_rmap(page);
+        /*
+         * Finish the charge transaction under the page table lock to
+         * prevent split_huge_page() from dividing up the charge
+         * before it's fully transferred to the new page.
+         */
+        mem_cgroup_end_migration(memcg, page, new_page, true);
+        spin_unlock(&mm->page_table_lock);
+        unlock_page(new_page);
+        unlock_page(page);
+        put_page(page);                 /* Drop the rmap reference */
+        put_page(page);                 /* Drop the LRU isolation reference */
+        count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
+        count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
+out:
+        mod_zone_page_state(page_zone(page),
+                        NR_ISOLATED_ANON + page_lru,
+                        -HPAGE_PMD_NR);
+        return isolated;
+out_dropref:
+        put_page(page);
+out_keep_locked:
+        return 0;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_NUMA */
author	Mel Gorman <mgorman@suse.de>	2012-11-19 07:35:47 -0500
committer	Mel Gorman <mgorman@suse.de>	2012-12-11 09:42:57 -0500
commit	b32967ff101a7508f70be8de59b278d4df92fa00 (patch)
tree	b106d5eea06f97d0174f483d6a05a8b7ddd64154 /mm
parent	5bca23035391928c4c7301835accca3551b96cc2 (diff)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 199b261a257e..711baf84b153 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c
@@ -600,7 +600,7 @@ out:
600	}	600	}
601	__setup("transparent_hugepage=", setup_transparent_hugepage);	601	__setup("transparent_hugepage=", setup_transparent_hugepage);
602		602
603	static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)	603	pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
604	{	604	{
605	if (likely(vma->vm_flags & VM_WRITE))	605	if (likely(vma->vm_flags & VM_WRITE))
606	pmd = pmd_mkwrite(pmd);	606	pmd = pmd_mkwrite(pmd);
@@ -1023,10 +1023,12 @@ out:
1023	int do_huge_pmd_numa_page(struct mm_struct mm, struct vm_area_struct vma,	1023	int do_huge_pmd_numa_page(struct mm_struct mm, struct vm_area_struct vma,
1024	unsigned long addr, pmd_t pmd, pmd_t *pmdp)	1024	unsigned long addr, pmd_t pmd, pmd_t *pmdp)
1025	{	1025	{
1026	struct page *page = NULL;	1026	struct page *page;
1027	unsigned long haddr = addr & HPAGE_PMD_MASK;	1027	unsigned long haddr = addr & HPAGE_PMD_MASK;
1028	int target_nid;	1028	int target_nid;
1029	int current_nid = -1;	1029	int current_nid = -1;
		1030	bool migrated;
		1031	bool page_locked = false;
1030		1032
1031	spin_lock(&mm->page_table_lock);	1033	spin_lock(&mm->page_table_lock);
1032	if (unlikely(!pmd_same(pmd, *pmdp)))	1034	if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1034,42 +1036,61 @@ int do_huge_pmd_numa_page(struct mm_struct mm, struct vm_area_struct vma,
1034		1036
1035	page = pmd_page(pmd);	1037	page = pmd_page(pmd);
1036	get_page(page);	1038	get_page(page);
1037	spin_unlock(&mm->page_table_lock);
1038	current_nid = page_to_nid(page);	1039	current_nid = page_to_nid(page);
1039	count_vm_numa_event(NUMA_HINT_FAULTS);	1040	count_vm_numa_event(NUMA_HINT_FAULTS);
1040	if (current_nid == numa_node_id())	1041	if (current_nid == numa_node_id())
1041	count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);	1042	count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1042		1043
1043	target_nid = mpol_misplaced(page, vma, haddr);	1044	target_nid = mpol_misplaced(page, vma, haddr);
1044	if (target_nid == -1)	1045	if (target_nid == -1) {
		1046	put_page(page);
1045	goto clear_pmdnuma;	1047	goto clear_pmdnuma;
		1048	}
1046		1049
1047	/*	1050	/* Acquire the page lock to serialise THP migrations */
1048	* Due to lacking code to migrate thp pages, we'll split	1051	spin_unlock(&mm->page_table_lock);
1049	* (which preserves the special PROT_NONE) and re-take the	1052	lock_page(page);
1050	* fault on the normal pages.	1053	page_locked = true;
1051	*/
1052	split_huge_page(page);
1053	put_page(page);
1054
1055	return 0;
1056		1054
1057	clear_pmdnuma:	1055	/* Confirm the PTE did not while locked */
1058	spin_lock(&mm->page_table_lock);	1056	spin_lock(&mm->page_table_lock);
1059	if (unlikely(!pmd_same(pmd, *pmdp)))	1057	if (unlikely(!pmd_same(pmd, *pmdp))) {
		1058	unlock_page(page);
		1059	put_page(page);
1060	goto out_unlock;	1060	goto out_unlock;
		1061	}
		1062	spin_unlock(&mm->page_table_lock);
		1063
		1064	/* Migrate the THP to the requested node */
		1065	migrated = migrate_misplaced_transhuge_page(mm, vma,
		1066	pmdp, pmd, addr,
		1067	page, target_nid);
		1068	if (migrated)
		1069	current_nid = target_nid;
		1070	else {
		1071	spin_lock(&mm->page_table_lock);
		1072	if (unlikely(!pmd_same(pmd, *pmdp))) {
		1073	unlock_page(page);
		1074	goto out_unlock;
		1075	}
		1076	goto clear_pmdnuma;
		1077	}
		1078
		1079	task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
		1080	return 0;
1061		1081
		1082	clear_pmdnuma:
1062	pmd = pmd_mknonnuma(pmd);	1083	pmd = pmd_mknonnuma(pmd);
1063	set_pmd_at(mm, haddr, pmdp, pmd);	1084	set_pmd_at(mm, haddr, pmdp, pmd);
1064	VM_BUG_ON(pmd_numa(*pmdp));	1085	VM_BUG_ON(pmd_numa(*pmdp));
1065	update_mmu_cache_pmd(vma, addr, pmdp);	1086	update_mmu_cache_pmd(vma, addr, pmdp);
		1087	if (page_locked)
		1088	unlock_page(page);
1066		1089
1067	out_unlock:	1090	out_unlock:
1068	spin_unlock(&mm->page_table_lock);	1091	spin_unlock(&mm->page_table_lock);
1069	if (page) {	1092	if (current_nid != -1)
1070	put_page(page);	1093	task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
1071	task_numa_fault(numa_node_id(), HPAGE_PMD_NR, false);
1072	}
1073	return 0;	1094	return 0;
1074	}	1095	}
1075		1096


diff --git a/mm/internal.h b/mm/internal.h index a4fa284f6bc2..7e60ac826f2b 100644 --- a/mm/internal.h +++ b/mm/internal.h
@@ -212,15 +212,18 @@ static inline void mlock_migrate_page(struct page newpage, struct page page)
212	{	212	{
213	if (TestClearPageMlocked(page)) {	213	if (TestClearPageMlocked(page)) {
214	unsigned long flags;	214	unsigned long flags;
		215	int nr_pages = hpage_nr_pages(page);
215		216
216	local_irq_save(flags);	217	local_irq_save(flags);
217	__dec_zone_page_state(page, NR_MLOCK);	218	__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
218	SetPageMlocked(newpage);	219	SetPageMlocked(newpage);
219	__inc_zone_page_state(newpage, NR_MLOCK);	220	__mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages);
220	local_irq_restore(flags);	221	local_irq_restore(flags);
221	}	222	}
222	}	223	}
223		224
		225	extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
		226
224	#ifdef CONFIG_TRANSPARENT_HUGEPAGE	227	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
225	extern unsigned long vma_address(struct page *page,	228	extern unsigned long vma_address(struct page *page,
226	struct vm_area_struct *vma);	229	struct vm_area_struct *vma);


diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dd39ba000b31..d97af9636ab2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -3288,15 +3288,18 @@ void mem_cgroup_prepare_migration(struct page page, struct page newpage,
3288	struct mem_cgroup **memcgp)	3288	struct mem_cgroup **memcgp)
3289	{	3289	{
3290	struct mem_cgroup *memcg = NULL;	3290	struct mem_cgroup *memcg = NULL;
		3291	unsigned int nr_pages = 1;
3291	struct page_cgroup *pc;	3292	struct page_cgroup *pc;
3292	enum charge_type ctype;	3293	enum charge_type ctype;
3293		3294
3294	*memcgp = NULL;	3295	*memcgp = NULL;
3295		3296
3296	VM_BUG_ON(PageTransHuge(page));
3297	if (mem_cgroup_disabled())	3297	if (mem_cgroup_disabled())
3298	return;	3298	return;
3299		3299
		3300	if (PageTransHuge(page))
		3301	nr_pages <<= compound_order(page);
		3302
3300	pc = lookup_page_cgroup(page);	3303	pc = lookup_page_cgroup(page);
3301	lock_page_cgroup(pc);	3304	lock_page_cgroup(pc);
3302	if (PageCgroupUsed(pc)) {	3305	if (PageCgroupUsed(pc)) {
@@ -3358,7 +3361,7 @@ void mem_cgroup_prepare_migration(struct page page, struct page newpage,
3358	* charged to the res_counter since we plan on replacing the	3361	* charged to the res_counter since we plan on replacing the
3359	* old one and only one page is going to be left afterwards.	3362	* old one and only one page is going to be left afterwards.
3360	*/	3363	*/
3361	__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);	3364	__mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false);
3362	}	3365	}
3363		3366
3364	/* remove redundant charge if migration failed*/	3367	/* remove redundant charge if migration failed*/


diff --git a/mm/migrate.c b/mm/migrate.c index 2a5ce135eef0..c9400960fd52 100644 --- a/mm/migrate.c +++ b/mm/migrate.c
@@ -410,7 +410,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
410	*/	410	*/
411	void migrate_page_copy(struct page newpage, struct page page)	411	void migrate_page_copy(struct page newpage, struct page page)
412	{	412	{
413	if (PageHuge(page))	413	if (PageHuge(page) \|\| PageTransHuge(page))
414	copy_huge_page(newpage, page);	414	copy_huge_page(newpage, page);
415	else	415	else
416	copy_highpage(newpage, page);	416	copy_highpage(newpage, page);
@@ -1491,25 +1491,10 @@ bool migrate_ratelimited(int node)
1491	return true;	1491	return true;
1492	}	1492	}
1493		1493
1494	/*	1494	/* Returns true if the node is migrate rate-limited after the update */
1495	* Attempt to migrate a misplaced page to the specified destination	1495	bool numamigrate_update_ratelimit(pg_data_t *pgdat)
1496	* node. Caller is expected to have an elevated reference count on
1497	* the page that will be dropped by this function before returning.
1498	*/
1499	int migrate_misplaced_page(struct page *page, int node)
1500	{	1496	{
1501	pg_data_t *pgdat = NODE_DATA(node);	1497	bool rate_limited = false;
1502	int isolated = 0;
1503	LIST_HEAD(migratepages);
1504
1505	/*
1506	* Don't migrate pages that are mapped in multiple processes.
1507	* TODO: Handle false sharing detection instead of this hammer
1508	*/
1509	if (page_mapcount(page) != 1) {
1510	put_page(page);
1511	goto out;
1512	}
1513		1498
1514	/*	1499	/*
1515	* Rate-limit the amount of data that is being migrated to a node.	1500	* Rate-limit the amount of data that is being migrated to a node.
@@ -1522,13 +1507,18 @@ int migrate_misplaced_page(struct page *page, int node)
1522	pgdat->numabalancing_migrate_next_window = jiffies +	1507	pgdat->numabalancing_migrate_next_window = jiffies +
1523	msecs_to_jiffies(migrate_interval_millisecs);	1508	msecs_to_jiffies(migrate_interval_millisecs);
1524	}	1509	}
1525	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {	1510	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
1526	spin_unlock(&pgdat->numabalancing_migrate_lock);	1511	rate_limited = true;
1527	put_page(page);	1512	else
1528	goto out;	1513	pgdat->numabalancing_migrate_nr_pages++;
1529	}
1530	pgdat->numabalancing_migrate_nr_pages++;
1531	spin_unlock(&pgdat->numabalancing_migrate_lock);	1514	spin_unlock(&pgdat->numabalancing_migrate_lock);
		1515
		1516	return rate_limited;
		1517	}
		1518
		1519	int numamigrate_isolate_page(pg_data_t pgdat, struct page page)
		1520	{
		1521	int ret = 0;
1532		1522
1533	/* Avoid migrating to a node that is nearly full */	1523	/* Avoid migrating to a node that is nearly full */
1534	if (migrate_balanced_pgdat(pgdat, 1)) {	1524	if (migrate_balanced_pgdat(pgdat, 1)) {
@@ -1536,13 +1526,18 @@ int migrate_misplaced_page(struct page *page, int node)
1536		1526
1537	if (isolate_lru_page(page)) {	1527	if (isolate_lru_page(page)) {
1538	put_page(page);	1528	put_page(page);
1539	goto out;	1529	return 0;
1540	}	1530	}
1541	isolated = 1;
1542		1531
		1532	/* Page is isolated */
		1533	ret = 1;
1543	page_lru = page_is_file_cache(page);	1534	page_lru = page_is_file_cache(page);
1544	inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);	1535	if (!PageTransHuge(page))
1545	list_add(&page->lru, &migratepages);	1536	inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
		1537	else
		1538	mod_zone_page_state(page_zone(page),
		1539	NR_ISOLATED_ANON + page_lru,
		1540	HPAGE_PMD_NR);
1546	}	1541	}
1547		1542
1548	/*	1543	/*
@@ -1555,23 +1550,177 @@ int migrate_misplaced_page(struct page *page, int node)
1555	*/	1550	*/
1556	put_page(page);	1551	put_page(page);
1557		1552
1558	if (isolated) {	1553	return ret;
1559	int nr_remaining;	1554	}
1560		1555
1561	nr_remaining = migrate_pages(&migratepages,	1556	/*
1562	alloc_misplaced_dst_page,	1557	* Attempt to migrate a misplaced page to the specified destination
1563	node, false, MIGRATE_ASYNC,	1558	* node. Caller is expected to have an elevated reference count on
1564	MR_NUMA_MISPLACED);	1559	* the page that will be dropped by this function before returning.
1565	if (nr_remaining) {	1560	*/
1566	putback_lru_pages(&migratepages);	1561	int migrate_misplaced_page(struct page *page, int node)
1567	isolated = 0;	1562	{
1568	} else	1563	pg_data_t *pgdat = NODE_DATA(node);
1569	count_vm_numa_event(NUMA_PAGE_MIGRATE);	1564	int isolated = 0;
		1565	int nr_remaining;
		1566	LIST_HEAD(migratepages);
		1567
		1568	/*
		1569	* Don't migrate pages that are mapped in multiple processes.
		1570	* TODO: Handle false sharing detection instead of this hammer
		1571	*/
		1572	if (page_mapcount(page) != 1) {
		1573	put_page(page);
		1574	goto out;
1570	}	1575	}
		1576
		1577	/*
		1578	* Rate-limit the amount of data that is being migrated to a node.
		1579	* Optimal placement is no good if the memory bus is saturated and
		1580	* all the time is being spent migrating!
		1581	*/
		1582	if (numamigrate_update_ratelimit(pgdat)) {
		1583	put_page(page);
		1584	goto out;
		1585	}
		1586
		1587	isolated = numamigrate_isolate_page(pgdat, page);
		1588	if (!isolated)
		1589	goto out;
		1590
		1591	list_add(&page->lru, &migratepages);
		1592	nr_remaining = migrate_pages(&migratepages,
		1593	alloc_misplaced_dst_page,
		1594	node, false, MIGRATE_ASYNC,
		1595	MR_NUMA_MISPLACED);
		1596	if (nr_remaining) {
		1597	putback_lru_pages(&migratepages);
		1598	isolated = 0;
		1599	} else
		1600	count_vm_numa_event(NUMA_PAGE_MIGRATE);
1571	BUG_ON(!list_empty(&migratepages));	1601	BUG_ON(!list_empty(&migratepages));
1572	out:	1602	out:
1573	return isolated;	1603	return isolated;
1574	}	1604	}
		1605
		1606	int migrate_misplaced_transhuge_page(struct mm_struct *mm,
		1607	struct vm_area_struct *vma,
		1608	pmd_t *pmd, pmd_t entry,
		1609	unsigned long address,
		1610	struct page *page, int node)
		1611	{
		1612	unsigned long haddr = address & HPAGE_PMD_MASK;
		1613	pg_data_t *pgdat = NODE_DATA(node);
		1614	int isolated = 0;
		1615	struct page *new_page = NULL;
		1616	struct mem_cgroup *memcg = NULL;
		1617	int page_lru = page_is_file_cache(page);
		1618
		1619	/*
		1620	* Don't migrate pages that are mapped in multiple processes.
		1621	* TODO: Handle false sharing detection instead of this hammer
		1622	*/
		1623	if (page_mapcount(page) != 1)
		1624	goto out_dropref;
		1625
		1626	/*
		1627	* Rate-limit the amount of data that is being migrated to a node.
		1628	* Optimal placement is no good if the memory bus is saturated and
		1629	* all the time is being spent migrating!
		1630	*/
		1631	if (numamigrate_update_ratelimit(pgdat))
		1632	goto out_dropref;
		1633
		1634	new_page = alloc_pages_node(node,
		1635	(GFP_TRANSHUGE \| GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
		1636	if (!new_page)
		1637	goto out_dropref;
		1638	page_xchg_last_nid(new_page, page_last_nid(page));
		1639
		1640	isolated = numamigrate_isolate_page(pgdat, page);
		1641	if (!isolated) {
		1642	put_page(new_page);
		1643	goto out_keep_locked;
		1644	}
		1645
		1646	/* Prepare a page as a migration target */
		1647	__set_page_locked(new_page);
		1648	SetPageSwapBacked(new_page);
		1649
		1650	/* anon mapping, we can simply copy page->mapping to the new page: */
		1651	new_page->mapping = page->mapping;
		1652	new_page->index = page->index;
		1653	migrate_page_copy(new_page, page);
		1654	WARN_ON(PageLRU(new_page));
		1655
		1656	/* Recheck the target PMD */
		1657	spin_lock(&mm->page_table_lock);
		1658	if (unlikely(!pmd_same(*pmd, entry))) {
		1659	spin_unlock(&mm->page_table_lock);
		1660
		1661	/* Reverse changes made by migrate_page_copy() */
		1662	if (TestClearPageActive(new_page))
		1663	SetPageActive(page);
		1664	if (TestClearPageUnevictable(new_page))
		1665	SetPageUnevictable(page);
		1666	mlock_migrate_page(page, new_page);
		1667
		1668	unlock_page(new_page);
		1669	put_page(new_page); /* Free it */
		1670
		1671	unlock_page(page);
		1672	putback_lru_page(page);
		1673
		1674	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
		1675	goto out;
		1676	}
		1677
		1678	/*
		1679	* Traditional migration needs to prepare the memcg charge
		1680	* transaction early to prevent the old page from being
		1681	* uncharged when installing migration entries. Here we can
		1682	* save the potential rollback and start the charge transfer
		1683	* only when migration is already known to end successfully.
		1684	*/
		1685	mem_cgroup_prepare_migration(page, new_page, &memcg);
		1686
		1687	entry = mk_pmd(new_page, vma->vm_page_prot);
		1688	entry = pmd_mknonnuma(entry);
		1689	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
		1690	entry = pmd_mkhuge(entry);
		1691
		1692	page_add_new_anon_rmap(new_page, vma, haddr);
		1693
		1694	set_pmd_at(mm, haddr, pmd, entry);
		1695	update_mmu_cache_pmd(vma, address, entry);
		1696	page_remove_rmap(page);
		1697	/*
		1698	* Finish the charge transaction under the page table lock to
		1699	* prevent split_huge_page() from dividing up the charge
		1700	* before it's fully transferred to the new page.
		1701	*/
		1702	mem_cgroup_end_migration(memcg, page, new_page, true);
		1703	spin_unlock(&mm->page_table_lock);
		1704
		1705	unlock_page(new_page);
		1706	unlock_page(page);
		1707	put_page(page); /* Drop the rmap reference */
		1708	put_page(page); /* Drop the LRU isolation reference */
		1709
		1710	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
		1711	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
		1712
		1713	out:
		1714	mod_zone_page_state(page_zone(page),
		1715	NR_ISOLATED_ANON + page_lru,
		1716	-HPAGE_PMD_NR);
		1717	return isolated;
		1718
		1719	out_dropref:
		1720	put_page(page);
		1721	out_keep_locked:
		1722	return 0;
		1723	}
1575	#endif /* CONFIG_NUMA_BALANCING */	1724	#endif /* CONFIG_NUMA_BALANCING */
1576		1725
1577	#endif /* CONFIG_NUMA */	1726	#endif /* CONFIG_NUMA */