sched/numa: Use {cpu, pid} to create task groups for shared faults

While parallel applications tend to align their data on the cache boundary, they tend not to align on the page or THP boundary. Consequently tasks that partition their data can still "false-share" pages presenting a problem for optimal NUMA placement. This patch uses NUMA hinting faults to chain tasks together into numa_groups. As well as storing the NID a task was running on when accessing a page a truncated representation of the faulting PID is stored. If subsequent faults are from different PIDs it is reasonable to assume that those two tasks share a page and are candidates for being grouped together. Note that this patch makes no scheduling decisions based on the grouping information. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Link: http://lkml.kernel.org/r/1381141781-10992-44-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2013-10-07 06:29:21 -0400
committer: Ingo Molnar <mingo@kernel.org> 2013-10-09 08:47:47 -0400
commit: 8c8a743c5087bac9caac8155b8f3b367e75cdd0b (patch)
tree: d5cc5c5f4368cf7d3deb627388c869dbea2e83f2
parent: 90572890d202527c366aa9489b32404e88a7c020 (diff)
6 files changed, 182 insertions, 13 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce464cd4777e..81443d557a2e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -691,6 +691,12 @@ static inline bool cpupid_cpu_unset(int cpupid)
        return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
 }
+static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
+{
+        return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
+}
+#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
 #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
 static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
 {
@@ -760,6 +766,11 @@ static inline bool cpupid_pid_unset(int cpupid)
 static inline void page_cpupid_reset_last(struct page *page)
 {
 }
+static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
+{
+        return false;
+}
 #endif /* CONFIG_NUMA_BALANCING */
 static inline struct zone *page_zone(const struct page *page)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b6619792bb13..f587ded5c148 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1347,6 +1347,9 @@ struct task_struct {
        u64 node_stamp;                 /* migration stamp  */
        struct callback_head numa_work;
+        struct list_head numa_entry;
+        struct numa_group *numa_group;
        /*
         * Exponential decaying average of faults on a per-node basis.
         * Scheduling placement decisions are made based on the these counts.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1fe59da280e3..51092d5cc64c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1733,6 +1733,9 @@ static void __sched_fork(struct task_struct *p)
        p->numa_work.next = &p->numa_work;
        p->numa_faults = NULL;
        p->numa_faults_buffer = NULL;
+        INIT_LIST_HEAD(&p->numa_entry);
+        p->numa_group = NULL;
 #endif /* CONFIG_NUMA_BALANCING */
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dbe0f628efa3..85565053a6ed 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -888,6 +888,17 @@ static unsigned int task_scan_max(struct task_struct *p)
 */
 unsigned int sysctl_numa_balancing_settle_count __read_mostly = 4;
+struct numa_group {
+        atomic_t refcount;
+        spinlock_t lock; /* nr_tasks, tasks */
+        int nr_tasks;
+        struct list_head task_list;
+        struct rcu_head rcu;
+        atomic_long_t faults[0];
+};
 static inline int task_faults_idx(int nid, int priv)
 {
        return 2 * nid + priv;
@@ -1182,7 +1193,10 @@ static void task_numa_placement(struct task_struct *p)
                int priv, i;
                for (priv = 0; priv < 2; priv++) {
+                        long diff;
                        i = task_faults_idx(nid, priv);
+                        diff = -p->numa_faults[i];
                        /* Decay existing window, copy faults since last scan */
                        p->numa_faults[i] >>= 1;
@@ -1190,6 +1204,11 @@ static void task_numa_placement(struct task_struct *p)
                        p->numa_faults_buffer[i] = 0;
                        faults += p->numa_faults[i];
+                        diff += p->numa_faults[i];
+                        if (p->numa_group) {
+                                /* safe because we can only change our own group */
+                                atomic_long_add(diff, &p->numa_group->faults[i]);
+                        }
                }
                if (faults > max_faults) {
@@ -1207,6 +1226,131 @@ static void task_numa_placement(struct task_struct *p)
        }
 }
+static inline int get_numa_group(struct numa_group *grp)
+{
+        return atomic_inc_not_zero(&grp->refcount);
+}
+static inline void put_numa_group(struct numa_group *grp)
+{
+        if (atomic_dec_and_test(&grp->refcount))
+                kfree_rcu(grp, rcu);
+}
+static void double_lock(spinlock_t *l1, spinlock_t *l2)
+{
+        if (l1 > l2)
+                swap(l1, l2);
+        spin_lock(l1);
+        spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
+}
+static void task_numa_group(struct task_struct *p, int cpupid)
+{
+        struct numa_group *grp, *my_grp;
+        struct task_struct *tsk;
+        bool join = false;
+        int cpu = cpupid_to_cpu(cpupid);
+        int i;
+        if (unlikely(!p->numa_group)) {
+                unsigned int size = sizeof(struct numa_group) +
+                                    2*nr_node_ids*sizeof(atomic_long_t);
+                grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+                if (!grp)
+                        return;
+                atomic_set(&grp->refcount, 1);
+                spin_lock_init(&grp->lock);
+                INIT_LIST_HEAD(&grp->task_list);
+                for (i = 0; i < 2*nr_node_ids; i++)
+                        atomic_long_set(&grp->faults[i], p->numa_faults[i]);
+                list_add(&p->numa_entry, &grp->task_list);
+                grp->nr_tasks++;
+                rcu_assign_pointer(p->numa_group, grp);
+        }
+        rcu_read_lock();
+        tsk = ACCESS_ONCE(cpu_rq(cpu)->curr);
+        if (!cpupid_match_pid(tsk, cpupid))
+                goto unlock;
+        grp = rcu_dereference(tsk->numa_group);
+        if (!grp)
+                goto unlock;
+        my_grp = p->numa_group;
+        if (grp == my_grp)
+                goto unlock;
+        /*
+         * Only join the other group if its bigger; if we're the bigger group,
+         * the other task will join us.
+         */
+        if (my_grp->nr_tasks > grp->nr_tasks)
+                goto unlock;
+        /*
+         * Tie-break on the grp address.
+         */
+        if (my_grp->nr_tasks == grp->nr_tasks && my_grp > grp)
+                goto unlock;
+        if (!get_numa_group(grp))
+                goto unlock;
+        join = true;
+unlock:
+        rcu_read_unlock();
+        if (!join)
+                return;
+        for (i = 0; i < 2*nr_node_ids; i++) {
+                atomic_long_sub(p->numa_faults[i], &my_grp->faults[i]);
+                atomic_long_add(p->numa_faults[i], &grp->faults[i]);
+        }
+        double_lock(&my_grp->lock, &grp->lock);
+        list_move(&p->numa_entry, &grp->task_list);
+        my_grp->nr_tasks--;
+        grp->nr_tasks++;
+        spin_unlock(&my_grp->lock);
+        spin_unlock(&grp->lock);
+        rcu_assign_pointer(p->numa_group, grp);
+        put_numa_group(my_grp);
+}
+void task_numa_free(struct task_struct *p)
+{
+        struct numa_group *grp = p->numa_group;
+        int i;
+        if (grp) {
+                for (i = 0; i < 2*nr_node_ids; i++)
+                        atomic_long_sub(p->numa_faults[i], &grp->faults[i]);
+                spin_lock(&grp->lock);
+                list_del(&p->numa_entry);
+                grp->nr_tasks--;
+                spin_unlock(&grp->lock);
+                rcu_assign_pointer(p->numa_group, NULL);
+                put_numa_group(grp);
+        }
+        kfree(p->numa_faults);
+}
 /*
 * Got a PROT_NONE fault for a page on @node.
 */
@@ -1222,15 +1366,6 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
        if (!p->mm)
                return;
-        /*
-         * First accesses are treated as private, otherwise consider accesses
-         * to be private if the accessing pid has not changed
-         */
-        if (!cpupid_pid_unset(last_cpupid))
-                priv = ((p->pid & LAST__PID_MASK) == cpupid_to_pid(last_cpupid));
-        else
-                priv = 1;
        /* Allocate buffer to track faults on a per-node basis */
        if (unlikely(!p->numa_faults)) {
                int size = sizeof(*p->numa_faults) * 2 * nr_node_ids;
@@ -1245,6 +1380,18 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
        }
        /*
+         * First accesses are treated as private, otherwise consider accesses
+         * to be private if the accessing pid has not changed
+         */
+        if (unlikely(last_cpupid == (-1 & LAST_CPUPID_MASK))) {
+                priv = 1;
+        } else {
+                priv = cpupid_match_pid(p, last_cpupid);
+                if (!priv)
+                        task_numa_group(p, last_cpupid);
+        }
+        /*
         * If pages are properly placed (did not migrate) then scan slower.
         * This is reset periodically in case of phase changes
         */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 691e96964dcc..8037b10a256f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -559,10 +559,7 @@ static inline u64 rq_clock_task(struct rq *rq)
 #ifdef CONFIG_NUMA_BALANCING
 extern int migrate_task_to(struct task_struct *p, int cpu);
 extern int migrate_swap(struct task_struct *, struct task_struct *);
-static inline void task_numa_free(struct task_struct *p)
+extern void task_numa_free(struct task_struct *p);
-{
-        kfree(p->numa_faults);
-}
 #else /* CONFIG_NUMA_BALANCING */
 static inline void task_numa_free(struct task_struct *p)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 5162e6d0d652..c57efa25cdbb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2719,6 +2719,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                get_page(dirty_page);
 reuse:
+                /*
+                 * Clear the pages cpupid information as the existing
+                 * information potentially belongs to a now completely
+                 * unrelated process.
+                 */
+                if (old_page)
+                        page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1);
                flush_cache_page(vma, address, pte_pfn(orig_pte));
                entry = pte_mkyoung(orig_pte);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
author	Peter Zijlstra <peterz@infradead.org>	2013-10-07 06:29:21 -0400
committer	Ingo Molnar <mingo@kernel.org>	2013-10-09 08:47:47 -0400
commit	8c8a743c5087bac9caac8155b8f3b367e75cdd0b (patch)
tree	d5cc5c5f4368cf7d3deb627388c869dbea2e83f2
parent	90572890d202527c366aa9489b32404e88a7c020 (diff)

diff --git a/include/linux/mm.h b/include/linux/mm.h index ce464cd4777e..81443d557a2e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -691,6 +691,12 @@ static inline bool cpupid_cpu_unset(int cpupid)
691	return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);	691	return cpupid_to_cpu(cpupid) == (-1 & LAST__CPU_MASK);
692	}	692	}
693		693
		694	static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid)
		695	{
		696	return (task_pid & LAST__PID_MASK) == cpupid_to_pid(cpupid);
		697	}
		698
		699	#define cpupid_match_pid(task, cpupid) __cpupid_match_pid(task->pid, cpupid)
694	#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS	700	#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
695	static inline int page_cpupid_xchg_last(struct page *page, int cpupid)	701	static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
696	{	702	{
@@ -760,6 +766,11 @@ static inline bool cpupid_pid_unset(int cpupid)
760	static inline void page_cpupid_reset_last(struct page *page)	766	static inline void page_cpupid_reset_last(struct page *page)
761	{	767	{
762	}	768	}
		769
		770	static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
		771	{
		772	return false;
		773	}
763	#endif /* CONFIG_NUMA_BALANCING */	774	#endif /* CONFIG_NUMA_BALANCING */
764		775
765	static inline struct zone page_zone(const struct page page)	776	static inline struct zone page_zone(const struct page page)


diff --git a/include/linux/sched.h b/include/linux/sched.h index b6619792bb13..f587ded5c148 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -1347,6 +1347,9 @@ struct task_struct {
1347	u64 node_stamp; /* migration stamp */	1347	u64 node_stamp; /* migration stamp */
1348	struct callback_head numa_work;	1348	struct callback_head numa_work;
1349		1349
		1350	struct list_head numa_entry;
		1351	struct numa_group *numa_group;
		1352
1350	/*	1353	/*
1351	* Exponential decaying average of faults on a per-node basis.	1354	* Exponential decaying average of faults on a per-node basis.
1352	* Scheduling placement decisions are made based on the these counts.	1355	* Scheduling placement decisions are made based on the these counts.


diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1fe59da280e3..51092d5cc64c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -1733,6 +1733,9 @@ static void __sched_fork(struct task_struct *p)
1733	p->numa_work.next = &p->numa_work;	1733	p->numa_work.next = &p->numa_work;
1734	p->numa_faults = NULL;	1734	p->numa_faults = NULL;
1735	p->numa_faults_buffer = NULL;	1735	p->numa_faults_buffer = NULL;
		1736
		1737	INIT_LIST_HEAD(&p->numa_entry);
		1738	p->numa_group = NULL;
1736	#endif /* CONFIG_NUMA_BALANCING */	1739	#endif /* CONFIG_NUMA_BALANCING */
1737	}	1740	}
1738		1741


diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dbe0f628efa3..85565053a6ed 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c
@@ -888,6 +888,17 @@ static unsigned int task_scan_max(struct task_struct *p)
888	*/	888	*/
889	unsigned int sysctl_numa_balancing_settle_count __read_mostly = 4;	889	unsigned int sysctl_numa_balancing_settle_count __read_mostly = 4;
890		890
		891	struct numa_group {
		892	atomic_t refcount;
		893
		894	spinlock_t lock; /* nr_tasks, tasks */
		895	int nr_tasks;
		896	struct list_head task_list;
		897
		898	struct rcu_head rcu;
		899	atomic_long_t faults[0];
		900	};
		901
891	static inline int task_faults_idx(int nid, int priv)	902	static inline int task_faults_idx(int nid, int priv)
892	{	903	{
893	return 2 * nid + priv;	904	return 2 * nid + priv;
@@ -1182,7 +1193,10 @@ static void task_numa_placement(struct task_struct *p)
1182	int priv, i;	1193	int priv, i;
1183		1194
1184	for (priv = 0; priv < 2; priv++) {	1195	for (priv = 0; priv < 2; priv++) {
		1196	long diff;
		1197
1185	i = task_faults_idx(nid, priv);	1198	i = task_faults_idx(nid, priv);
		1199	diff = -p->numa_faults[i];
1186		1200
1187	/* Decay existing window, copy faults since last scan */	1201	/* Decay existing window, copy faults since last scan */
1188	p->numa_faults[i] >>= 1;	1202	p->numa_faults[i] >>= 1;
@@ -1190,6 +1204,11 @@ static void task_numa_placement(struct task_struct *p)
1190	p->numa_faults_buffer[i] = 0;	1204	p->numa_faults_buffer[i] = 0;
1191		1205
1192	faults += p->numa_faults[i];	1206	faults += p->numa_faults[i];
		1207	diff += p->numa_faults[i];
		1208	if (p->numa_group) {
		1209	/* safe because we can only change our own group */
		1210	atomic_long_add(diff, &p->numa_group->faults[i]);
		1211	}
1193	}	1212	}
1194		1213
1195	if (faults > max_faults) {	1214	if (faults > max_faults) {
@@ -1207,6 +1226,131 @@ static void task_numa_placement(struct task_struct *p)
1207	}	1226	}
1208	}	1227	}
1209		1228
		1229	static inline int get_numa_group(struct numa_group *grp)
		1230	{
		1231	return atomic_inc_not_zero(&grp->refcount);
		1232	}
		1233
		1234	static inline void put_numa_group(struct numa_group *grp)
		1235	{
		1236	if (atomic_dec_and_test(&grp->refcount))
		1237	kfree_rcu(grp, rcu);
		1238	}
		1239
		1240	static void double_lock(spinlock_t l1, spinlock_t l2)
		1241	{
		1242	if (l1 > l2)
		1243	swap(l1, l2);
		1244
		1245	spin_lock(l1);
		1246	spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
		1247	}
		1248
		1249	static void task_numa_group(struct task_struct *p, int cpupid)
		1250	{
		1251	struct numa_group grp, my_grp;
		1252	struct task_struct *tsk;
		1253	bool join = false;
		1254	int cpu = cpupid_to_cpu(cpupid);
		1255	int i;
		1256
		1257	if (unlikely(!p->numa_group)) {
		1258	unsigned int size = sizeof(struct numa_group) +
		1259	2nr_node_idssizeof(atomic_long_t);
		1260
		1261	grp = kzalloc(size, GFP_KERNEL \| __GFP_NOWARN);
		1262	if (!grp)
		1263	return;
		1264
		1265	atomic_set(&grp->refcount, 1);
		1266	spin_lock_init(&grp->lock);
		1267	INIT_LIST_HEAD(&grp->task_list);
		1268
		1269	for (i = 0; i < 2*nr_node_ids; i++)
		1270	atomic_long_set(&grp->faults[i], p->numa_faults[i]);
		1271
		1272	list_add(&p->numa_entry, &grp->task_list);
		1273	grp->nr_tasks++;
		1274	rcu_assign_pointer(p->numa_group, grp);
		1275	}
		1276
		1277	rcu_read_lock();
		1278	tsk = ACCESS_ONCE(cpu_rq(cpu)->curr);
		1279
		1280	if (!cpupid_match_pid(tsk, cpupid))
		1281	goto unlock;
		1282
		1283	grp = rcu_dereference(tsk->numa_group);
		1284	if (!grp)
		1285	goto unlock;
		1286
		1287	my_grp = p->numa_group;
		1288	if (grp == my_grp)
		1289	goto unlock;
		1290
		1291	/*
		1292	* Only join the other group if its bigger; if we're the bigger group,
		1293	* the other task will join us.
		1294	*/
		1295	if (my_grp->nr_tasks > grp->nr_tasks)
		1296	goto unlock;
		1297
		1298	/*
		1299	* Tie-break on the grp address.
		1300	*/
		1301	if (my_grp->nr_tasks == grp->nr_tasks && my_grp > grp)
		1302	goto unlock;
		1303
		1304	if (!get_numa_group(grp))
		1305	goto unlock;
		1306
		1307	join = true;
		1308
		1309	unlock:
		1310	rcu_read_unlock();
		1311
		1312	if (!join)
		1313	return;
		1314
		1315	for (i = 0; i < 2*nr_node_ids; i++) {
		1316	atomic_long_sub(p->numa_faults[i], &my_grp->faults[i]);
		1317	atomic_long_add(p->numa_faults[i], &grp->faults[i]);
		1318	}
		1319
		1320	double_lock(&my_grp->lock, &grp->lock);
		1321
		1322	list_move(&p->numa_entry, &grp->task_list);
		1323	my_grp->nr_tasks--;
		1324	grp->nr_tasks++;
		1325
		1326	spin_unlock(&my_grp->lock);
		1327	spin_unlock(&grp->lock);
		1328
		1329	rcu_assign_pointer(p->numa_group, grp);
		1330
		1331	put_numa_group(my_grp);
		1332	}
		1333
		1334	void task_numa_free(struct task_struct *p)
		1335	{
		1336	struct numa_group *grp = p->numa_group;
		1337	int i;
		1338
		1339	if (grp) {
		1340	for (i = 0; i < 2*nr_node_ids; i++)
		1341	atomic_long_sub(p->numa_faults[i], &grp->faults[i]);
		1342
		1343	spin_lock(&grp->lock);
		1344	list_del(&p->numa_entry);
		1345	grp->nr_tasks--;
		1346	spin_unlock(&grp->lock);
		1347	rcu_assign_pointer(p->numa_group, NULL);
		1348	put_numa_group(grp);
		1349	}
		1350
		1351	kfree(p->numa_faults);
		1352	}
		1353
1210	/*	1354	/*
1211	* Got a PROT_NONE fault for a page on @node.	1355	* Got a PROT_NONE fault for a page on @node.
1212	*/	1356	*/
@@ -1222,15 +1366,6 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
1222	if (!p->mm)	1366	if (!p->mm)
1223	return;	1367	return;
1224		1368
1225	/*
1226	* First accesses are treated as private, otherwise consider accesses
1227	* to be private if the accessing pid has not changed
1228	*/
1229	if (!cpupid_pid_unset(last_cpupid))
1230	priv = ((p->pid & LAST__PID_MASK) == cpupid_to_pid(last_cpupid));
1231	else
1232	priv = 1;
1233
1234	/* Allocate buffer to track faults on a per-node basis */	1369	/* Allocate buffer to track faults on a per-node basis */
1235	if (unlikely(!p->numa_faults)) {	1370	if (unlikely(!p->numa_faults)) {
1236	int size = sizeof(p->numa_faults) 2 * nr_node_ids;	1371	int size = sizeof(p->numa_faults) 2 * nr_node_ids;
@@ -1245,6 +1380,18 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
1245	}	1380	}
1246		1381
1247	/*	1382	/*
		1383	* First accesses are treated as private, otherwise consider accesses
		1384	* to be private if the accessing pid has not changed
		1385	*/
		1386	if (unlikely(last_cpupid == (-1 & LAST_CPUPID_MASK))) {
		1387	priv = 1;
		1388	} else {
		1389	priv = cpupid_match_pid(p, last_cpupid);
		1390	if (!priv)
		1391	task_numa_group(p, last_cpupid);
		1392	}
		1393
		1394	/*
1248	* If pages are properly placed (did not migrate) then scan slower.	1395	* If pages are properly placed (did not migrate) then scan slower.
1249	* This is reset periodically in case of phase changes	1396	* This is reset periodically in case of phase changes
1250	*/	1397	*/


diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 691e96964dcc..8037b10a256f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -559,10 +559,7 @@ static inline u64 rq_clock_task(struct rq *rq)
559	#ifdef CONFIG_NUMA_BALANCING	559	#ifdef CONFIG_NUMA_BALANCING
560	extern int migrate_task_to(struct task_struct *p, int cpu);	560	extern int migrate_task_to(struct task_struct *p, int cpu);
561	extern int migrate_swap(struct task_struct , struct task_struct );	561	extern int migrate_swap(struct task_struct , struct task_struct );
562	static inline void task_numa_free(struct task_struct *p)	562	extern void task_numa_free(struct task_struct *p);
563	{
564	kfree(p->numa_faults);
565	}
566	#else /* CONFIG_NUMA_BALANCING */	563	#else /* CONFIG_NUMA_BALANCING */
567	static inline void task_numa_free(struct task_struct *p)	564	static inline void task_numa_free(struct task_struct *p)
568	{	565	{


diff --git a/mm/memory.c b/mm/memory.c index 5162e6d0d652..c57efa25cdbb 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -2719,6 +2719,14 @@ static int do_wp_page(struct mm_struct mm, struct vm_area_struct vma,
2719	get_page(dirty_page);	2719	get_page(dirty_page);
2720		2720
2721	reuse:	2721	reuse:
		2722	/*
		2723	* Clear the pages cpupid information as the existing
		2724	* information potentially belongs to a now completely
		2725	* unrelated process.
		2726	*/
		2727	if (old_page)
		2728	page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1);
		2729
2722	flush_cache_page(vma, address, pte_pfn(orig_pte));	2730	flush_cache_page(vma, address, pte_pfn(orig_pte));
2723	entry = pte_mkyoung(orig_pte);	2731	entry = pte_mkyoung(orig_pte);
2724	entry = maybe_mkwrite(pte_mkdirty(entry), vma);	2732	entry = maybe_mkwrite(pte_mkdirty(entry), vma);