2 files changed, 4 insertions, 144 deletions
diff --git a/mm/memory.c b/mm/memory.c
index eba846bcf124..9898eeb9a21c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3606,103 +3606,6 @@ out:
        return 0;
 }
-/* NUMA hinting page fault entry point for regular pmds */
-#ifdef CONFIG_NUMA_BALANCING
-static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-                     unsigned long addr, pmd_t *pmdp)
-{
-        pmd_t pmd;
-        pte_t *pte, *orig_pte;
-        unsigned long _addr = addr & PMD_MASK;
-        unsigned long offset;
-        spinlock_t *ptl;
-        bool numa = false;
-        int last_cpupid;
-        spin_lock(&mm->page_table_lock);
-        pmd = *pmdp;
-        if (pmd_numa(pmd)) {
-                set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
-                numa = true;
-        }
-        spin_unlock(&mm->page_table_lock);
-        if (!numa)
-                return 0;
-        /* we're in a page fault so some vma must be in the range */
-        BUG_ON(!vma);
-        BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
-        offset = max(_addr, vma->vm_start) & ~PMD_MASK;
-        VM_BUG_ON(offset >= PMD_SIZE);
-        orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
-        pte += offset >> PAGE_SHIFT;
-        for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
-                pte_t pteval = *pte;
-                struct page *page;
-                int page_nid = -1;
-                int target_nid;
-                bool migrated = false;
-                int flags = 0;
-                if (!pte_present(pteval))
-                        continue;
-                if (!pte_numa(pteval))
-                        continue;
-                if (addr >= vma->vm_end) {
-                        vma = find_vma(mm, addr);
-                        /* there's a pte present so there must be a vma */
-                        BUG_ON(!vma);
-                        BUG_ON(addr < vma->vm_start);
-                }
-                if (pte_numa(pteval)) {
-                        pteval = pte_mknonnuma(pteval);
-                        set_pte_at(mm, addr, pte, pteval);
-                }
-                page = vm_normal_page(vma, addr, pteval);
-                if (unlikely(!page))
-                        continue;
-                /*
-                 * Avoid grouping on DSO/COW pages in specific and RO pages
-                 * in general, RO pages shouldn't hurt as much anyway since
-                 * they can be in shared cache state.
-                 */
-                if (!pte_write(pteval))
-                        flags |= TNF_NO_GROUP;
-                last_cpupid = page_cpupid_last(page);
-                page_nid = page_to_nid(page);
-                target_nid = numa_migrate_prep(page, vma, addr, page_nid);
-                pte_unmap_unlock(pte, ptl);
-                if (target_nid != -1) {
-                        migrated = migrate_misplaced_page(page, vma, target_nid);
-                        if (migrated) {
-                                page_nid = target_nid;
-                                flags |= TNF_MIGRATED;
-                        }
-                } else {
-                        put_page(page);
-                }
-                if (page_nid != -1)
-                        task_numa_fault(last_cpupid, page_nid, 1, flags);
-                pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
-        }
-        pte_unmap_unlock(orig_pte, ptl);
-        return 0;
-}
-#else
-static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-                     unsigned long addr, pmd_t *pmdp)
-{
-        BUG();
-        return 0;
-}
-#endif /* CONFIG_NUMA_BALANCING */
 /*
 * These routines also need to handle stuff like marking pages dirty
 * and/or accessed for architectures that don't do it in hardware (most
@@ -3841,8 +3744,8 @@ retry:
                }
        }
-        if (pmd_numa(*pmd))
+        /* THP should already have been handled */
-                return do_pmd_numa_page(mm, vma, address, pmd);
+        BUG_ON(pmd_numa(*pmd));
        /*
         * Use __pte_alloc instead of pte_alloc_map, because we can't
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 9a74855f1241..a0302ac0be98 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -37,15 +37,12 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, unsigned long end, pgprot_t newprot,
-                int dirty_accountable, int prot_numa, bool *ret_all_same_cpupid)
+                int dirty_accountable, int prot_numa)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte, oldpte;
        spinlock_t *ptl;
        unsigned long pages = 0;
-        bool all_same_cpupid = true;
-        int last_cpu = -1;
-        int last_pid = -1;
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
        arch_enter_lazy_mmu_mode();
@@ -64,19 +61,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                                page = vm_normal_page(vma, addr, oldpte);
                                if (page) {
-                                        int cpupid = page_cpupid_last(page);
-                                        int this_cpu = cpupid_to_cpu(cpupid);
-                                        int this_pid = cpupid_to_pid(cpupid);
-                                        if (last_cpu == -1)
-                                                last_cpu = this_cpu;
-                                        if (last_pid == -1)
-                                                last_pid = this_pid;
-                                        if (last_cpu != this_cpu ||
-                                            last_pid != this_pid) {
-                                                all_same_cpupid = false;
-                                        }
                                        if (!pte_numa(oldpte)) {
                                                ptent = pte_mknuma(ptent);
                                                updated = true;
@@ -115,26 +99,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
-        *ret_all_same_cpupid = all_same_cpupid;
        return pages;
 }
-#ifdef CONFIG_NUMA_BALANCING
-static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
-                                       pmd_t *pmd)
-{
-        spin_lock(&mm->page_table_lock);
-        set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
-        spin_unlock(&mm->page_table_lock);
-}
-#else
-static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
-                                       pmd_t *pmd)
-{
-        BUG();
-}
-#endif /* CONFIG_NUMA_BALANCING */
 static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                pud_t *pud, unsigned long addr, unsigned long end,
                pgprot_t newprot, int dirty_accountable, int prot_numa)
@@ -142,7 +109,6 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
        pmd_t *pmd;
        unsigned long next;
        unsigned long pages = 0;
-        bool all_same_cpupid;
        pmd = pmd_offset(pud, addr);
        do {
@@ -168,17 +134,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                if (pmd_none_or_clear_bad(pmd))
                        continue;
                this_pages = change_pte_range(vma, pmd, addr, next, newprot,
-                                 dirty_accountable, prot_numa, &all_same_cpupid);
+                                 dirty_accountable, prot_numa);
                pages += this_pages;
-                /*
-                 * If we are changing protections for NUMA hinting faults then
-                 * set pmd_numa if the examined pages were all on the same
-                 * node. This allows a regular PMD to be handled as one fault
-                 * and effectively batches the taking of the PTL
-                 */
-                if (prot_numa && this_pages && all_same_cpupid)
-                        change_pmd_protnuma(vma->vm_mm, addr, pmd);
        } while (pmd++, addr = next, addr != end);
        return pages;

diff --git a/mm/memory.c b/mm/memory.c index eba846bcf124..9898eeb9a21c 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -3606,103 +3606,6 @@ out:
3606	return 0;	3606	return 0;
3607	}	3607	}
3608		3608
3609	/* NUMA hinting page fault entry point for regular pmds */
3610	#ifdef CONFIG_NUMA_BALANCING
3611	static int do_pmd_numa_page(struct mm_struct mm, struct vm_area_struct vma,
3612	unsigned long addr, pmd_t *pmdp)
3613	{
3614	pmd_t pmd;
3615	pte_t pte, orig_pte;
3616	unsigned long _addr = addr & PMD_MASK;
3617	unsigned long offset;
3618	spinlock_t *ptl;
3619	bool numa = false;
3620	int last_cpupid;
3621
3622	spin_lock(&mm->page_table_lock);
3623	pmd = *pmdp;
3624	if (pmd_numa(pmd)) {
3625	set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
3626	numa = true;
3627	}
3628	spin_unlock(&mm->page_table_lock);
3629
3630	if (!numa)
3631	return 0;
3632
3633	/* we're in a page fault so some vma must be in the range */
3634	BUG_ON(!vma);
3635	BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
3636	offset = max(_addr, vma->vm_start) & ~PMD_MASK;
3637	VM_BUG_ON(offset >= PMD_SIZE);
3638	orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
3639	pte += offset >> PAGE_SHIFT;
3640	for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
3641	pte_t pteval = *pte;
3642	struct page *page;
3643	int page_nid = -1;
3644	int target_nid;
3645	bool migrated = false;
3646	int flags = 0;
3647
3648	if (!pte_present(pteval))
3649	continue;
3650	if (!pte_numa(pteval))
3651	continue;
3652	if (addr >= vma->vm_end) {
3653	vma = find_vma(mm, addr);
3654	/* there's a pte present so there must be a vma */
3655	BUG_ON(!vma);
3656	BUG_ON(addr < vma->vm_start);
3657	}
3658	if (pte_numa(pteval)) {
3659	pteval = pte_mknonnuma(pteval);
3660	set_pte_at(mm, addr, pte, pteval);
3661	}
3662	page = vm_normal_page(vma, addr, pteval);
3663	if (unlikely(!page))
3664	continue;
3665
3666	/*
3667	* Avoid grouping on DSO/COW pages in specific and RO pages
3668	* in general, RO pages shouldn't hurt as much anyway since
3669	* they can be in shared cache state.
3670	*/
3671	if (!pte_write(pteval))
3672	flags \|= TNF_NO_GROUP;
3673
3674	last_cpupid = page_cpupid_last(page);
3675	page_nid = page_to_nid(page);
3676	target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3677	pte_unmap_unlock(pte, ptl);
3678	if (target_nid != -1) {
3679	migrated = migrate_misplaced_page(page, vma, target_nid);
3680	if (migrated) {
3681	page_nid = target_nid;
3682	flags \|= TNF_MIGRATED;
3683	}
3684	} else {
3685	put_page(page);
3686	}
3687
3688	if (page_nid != -1)
3689	task_numa_fault(last_cpupid, page_nid, 1, flags);
3690
3691	pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3692	}
3693	pte_unmap_unlock(orig_pte, ptl);
3694
3695	return 0;
3696	}
3697	#else
3698	static int do_pmd_numa_page(struct mm_struct mm, struct vm_area_struct vma,
3699	unsigned long addr, pmd_t *pmdp)
3700	{
3701	BUG();
3702	return 0;
3703	}
3704	#endif /* CONFIG_NUMA_BALANCING */
3705
3706	/*	3609	/*
3707	* These routines also need to handle stuff like marking pages dirty	3610	* These routines also need to handle stuff like marking pages dirty
3708	* and/or accessed for architectures that don't do it in hardware (most	3611	* and/or accessed for architectures that don't do it in hardware (most
@@ -3841,8 +3744,8 @@ retry:
3841	}	3744	}
3842	}	3745	}
3843		3746
3844	if (pmd_numa(*pmd))	3747	/* THP should already have been handled */
3845	return do_pmd_numa_page(mm, vma, address, pmd);	3748	BUG_ON(pmd_numa(*pmd));
3846		3749
3847	/*	3750	/*
3848	* Use __pte_alloc instead of pte_alloc_map, because we can't	3751	* Use __pte_alloc instead of pte_alloc_map, because we can't


diff --git a/mm/mprotect.c b/mm/mprotect.c index 9a74855f1241..a0302ac0be98 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c
@@ -37,15 +37,12 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
37		37
38	static unsigned long change_pte_range(struct vm_area_struct vma, pmd_t pmd,	38	static unsigned long change_pte_range(struct vm_area_struct vma, pmd_t pmd,
39	unsigned long addr, unsigned long end, pgprot_t newprot,	39	unsigned long addr, unsigned long end, pgprot_t newprot,
40	int dirty_accountable, int prot_numa, bool *ret_all_same_cpupid)	40	int dirty_accountable, int prot_numa)
41	{	41	{
42	struct mm_struct *mm = vma->vm_mm;	42	struct mm_struct *mm = vma->vm_mm;
43	pte_t *pte, oldpte;	43	pte_t *pte, oldpte;
44	spinlock_t *ptl;	44	spinlock_t *ptl;
45	unsigned long pages = 0;	45	unsigned long pages = 0;
46	bool all_same_cpupid = true;
47	int last_cpu = -1;
48	int last_pid = -1;
49		46
50	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);	47	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
51	arch_enter_lazy_mmu_mode();	48	arch_enter_lazy_mmu_mode();
@@ -64,19 +61,6 @@ static unsigned long change_pte_range(struct vm_area_struct vma, pmd_t pmd,
64		61
65	page = vm_normal_page(vma, addr, oldpte);	62	page = vm_normal_page(vma, addr, oldpte);
66	if (page) {	63	if (page) {
67	int cpupid = page_cpupid_last(page);
68	int this_cpu = cpupid_to_cpu(cpupid);
69	int this_pid = cpupid_to_pid(cpupid);
70
71	if (last_cpu == -1)
72	last_cpu = this_cpu;
73	if (last_pid == -1)
74	last_pid = this_pid;
75	if (last_cpu != this_cpu \|\|
76	last_pid != this_pid) {
77	all_same_cpupid = false;
78	}
79
80	if (!pte_numa(oldpte)) {	64	if (!pte_numa(oldpte)) {
81	ptent = pte_mknuma(ptent);	65	ptent = pte_mknuma(ptent);
82	updated = true;	66	updated = true;
@@ -115,26 +99,9 @@ static unsigned long change_pte_range(struct vm_area_struct vma, pmd_t pmd,
115	arch_leave_lazy_mmu_mode();	99	arch_leave_lazy_mmu_mode();
116	pte_unmap_unlock(pte - 1, ptl);	100	pte_unmap_unlock(pte - 1, ptl);
117		101
118	*ret_all_same_cpupid = all_same_cpupid;
119	return pages;	102	return pages;
120	}	103	}
121		104
122	#ifdef CONFIG_NUMA_BALANCING
123	static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
124	pmd_t *pmd)
125	{
126	spin_lock(&mm->page_table_lock);
127	set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd));
128	spin_unlock(&mm->page_table_lock);
129	}
130	#else
131	static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr,
132	pmd_t *pmd)
133	{
134	BUG();
135	}
136	#endif /* CONFIG_NUMA_BALANCING */
137
138	static inline unsigned long change_pmd_range(struct vm_area_struct *vma,	105	static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
139	pud_t *pud, unsigned long addr, unsigned long end,	106	pud_t *pud, unsigned long addr, unsigned long end,
140	pgprot_t newprot, int dirty_accountable, int prot_numa)	107	pgprot_t newprot, int dirty_accountable, int prot_numa)
@@ -142,7 +109,6 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
142	pmd_t *pmd;	109	pmd_t *pmd;
143	unsigned long next;	110	unsigned long next;
144	unsigned long pages = 0;	111	unsigned long pages = 0;
145	bool all_same_cpupid;
146		112
147	pmd = pmd_offset(pud, addr);	113	pmd = pmd_offset(pud, addr);
148	do {	114	do {
@@ -168,17 +134,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
168	if (pmd_none_or_clear_bad(pmd))	134	if (pmd_none_or_clear_bad(pmd))
169	continue;	135	continue;
170	this_pages = change_pte_range(vma, pmd, addr, next, newprot,	136	this_pages = change_pte_range(vma, pmd, addr, next, newprot,
171	dirty_accountable, prot_numa, &all_same_cpupid);	137	dirty_accountable, prot_numa);
172	pages += this_pages;	138	pages += this_pages;
173
174	/*
175	* If we are changing protections for NUMA hinting faults then
176	* set pmd_numa if the examined pages were all on the same
177	* node. This allows a regular PMD to be handled as one fault
178	* and effectively batches the taking of the PTL
179	*/
180	if (prot_numa && this_pages && all_same_cpupid)
181	change_pmd_protnuma(vma->vm_mm, addr, pmd);
182	} while (pmd++, addr = next, addr != end);	139	} while (pmd++, addr = next, addr != end);
183		140
184	return pages;	141	return pages;