aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2013-10-07 06:29:24 -0400
committerIngo Molnar <mingo@kernel.org>2013-10-09 08:47:53 -0400
commit6688cc05473b36a0a3d3971e1adf1712919b32eb (patch)
tree2305dca14e2df669b48138088bd93cf51db37721
parent7851a45cd3f6198bf542c30e27b330e8eeb3736c (diff)
mm: numa: Do not group on RO pages
And here's a little something to make sure not the whole world ends up in a single group. As while we don't migrate shared executable pages, we do scan/fault on them. And since everybody links to libc, everybody ends up in the same group. Suggested-by: Rik van Riel <riel@redhat.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Link: http://lkml.kernel.org/r/1381141781-10992-47-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/sched.h7
-rw-r--r--kernel/sched/fair.c5
-rw-r--r--mm/huge_memory.c15
-rw-r--r--mm/memory.c30
4 files changed, 47 insertions, 10 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0b343b1ba64..ff543851a18a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1450,13 +1450,16 @@ struct task_struct {
1450/* Future-safe accessor for struct task_struct's cpus_allowed. */ 1450/* Future-safe accessor for struct task_struct's cpus_allowed. */
1451#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) 1451#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
1452 1452
1453#define TNF_MIGRATED 0x01
1454#define TNF_NO_GROUP 0x02
1455
1453#ifdef CONFIG_NUMA_BALANCING 1456#ifdef CONFIG_NUMA_BALANCING
1454extern void task_numa_fault(int last_node, int node, int pages, bool migrated); 1457extern void task_numa_fault(int last_node, int node, int pages, int flags);
1455extern pid_t task_numa_group_id(struct task_struct *p); 1458extern pid_t task_numa_group_id(struct task_struct *p);
1456extern void set_numabalancing_state(bool enabled); 1459extern void set_numabalancing_state(bool enabled);
1457#else 1460#else
1458static inline void task_numa_fault(int last_node, int node, int pages, 1461static inline void task_numa_fault(int last_node, int node, int pages,
1459 bool migrated) 1462 int flags)
1460{ 1463{
1461} 1464}
1462static inline pid_t task_numa_group_id(struct task_struct *p) 1465static inline pid_t task_numa_group_id(struct task_struct *p)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5bd309c035c7..35661b8afb4e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1361,9 +1361,10 @@ void task_numa_free(struct task_struct *p)
1361/* 1361/*
1362 * Got a PROT_NONE fault for a page on @node. 1362 * Got a PROT_NONE fault for a page on @node.
1363 */ 1363 */
1364void task_numa_fault(int last_cpupid, int node, int pages, bool migrated) 1364void task_numa_fault(int last_cpupid, int node, int pages, int flags)
1365{ 1365{
1366 struct task_struct *p = current; 1366 struct task_struct *p = current;
1367 bool migrated = flags & TNF_MIGRATED;
1367 int priv; 1368 int priv;
1368 1369
1369 if (!numabalancing_enabled) 1370 if (!numabalancing_enabled)
@@ -1394,7 +1395,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
1394 priv = 1; 1395 priv = 1;
1395 } else { 1396 } else {
1396 priv = cpupid_match_pid(p, last_cpupid); 1397 priv = cpupid_match_pid(p, last_cpupid);
1397 if (!priv) 1398 if (!priv && !(flags & TNF_NO_GROUP))
1398 task_numa_group(p, last_cpupid); 1399 task_numa_group(p, last_cpupid);
1399 } 1400 }
1400 1401
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index becf92ca54f3..7ab4e32afe12 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1285,6 +1285,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1285 int target_nid, last_cpupid = -1; 1285 int target_nid, last_cpupid = -1;
1286 bool page_locked; 1286 bool page_locked;
1287 bool migrated = false; 1287 bool migrated = false;
1288 int flags = 0;
1288 1289
1289 spin_lock(&mm->page_table_lock); 1290 spin_lock(&mm->page_table_lock);
1290 if (unlikely(!pmd_same(pmd, *pmdp))) 1291 if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1299,6 +1300,14 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1299 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); 1300 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
1300 1301
1301 /* 1302 /*
1303 * Avoid grouping on DSO/COW pages in specific and RO pages
1304 * in general, RO pages shouldn't hurt as much anyway since
1305 * they can be in shared cache state.
1306 */
1307 if (!pmd_write(pmd))
1308 flags |= TNF_NO_GROUP;
1309
1310 /*
1302 * Acquire the page lock to serialise THP migrations but avoid dropping 1311 * Acquire the page lock to serialise THP migrations but avoid dropping
1303 * page_table_lock if at all possible 1312 * page_table_lock if at all possible
1304 */ 1313 */
@@ -1343,8 +1352,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1343 spin_unlock(&mm->page_table_lock); 1352 spin_unlock(&mm->page_table_lock);
1344 migrated = migrate_misplaced_transhuge_page(mm, vma, 1353 migrated = migrate_misplaced_transhuge_page(mm, vma,
1345 pmdp, pmd, addr, page, target_nid); 1354 pmdp, pmd, addr, page, target_nid);
1346 if (migrated) 1355 if (migrated) {
1356 flags |= TNF_MIGRATED;
1347 page_nid = target_nid; 1357 page_nid = target_nid;
1358 }
1348 1359
1349 goto out; 1360 goto out;
1350clear_pmdnuma: 1361clear_pmdnuma:
@@ -1362,7 +1373,7 @@ out:
1362 page_unlock_anon_vma_read(anon_vma); 1373 page_unlock_anon_vma_read(anon_vma);
1363 1374
1364 if (page_nid != -1) 1375 if (page_nid != -1)
1365 task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, migrated); 1376 task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
1366 1377
1367 return 0; 1378 return 0;
1368} 1379}
diff --git a/mm/memory.c b/mm/memory.c
index c57efa25cdbb..eba846bcf124 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3547,6 +3547,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3547 int last_cpupid; 3547 int last_cpupid;
3548 int target_nid; 3548 int target_nid;
3549 bool migrated = false; 3549 bool migrated = false;
3550 int flags = 0;
3550 3551
3551 /* 3552 /*
3552 * The "pte" at this point cannot be used safely without 3553 * The "pte" at this point cannot be used safely without
@@ -3575,6 +3576,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3575 } 3576 }
3576 BUG_ON(is_zero_pfn(page_to_pfn(page))); 3577 BUG_ON(is_zero_pfn(page_to_pfn(page)));
3577 3578
3579 /*
3580 * Avoid grouping on DSO/COW pages in specific and RO pages
3581 * in general, RO pages shouldn't hurt as much anyway since
3582 * they can be in shared cache state.
3583 */
3584 if (!pte_write(pte))
3585 flags |= TNF_NO_GROUP;
3586
3578 last_cpupid = page_cpupid_last(page); 3587 last_cpupid = page_cpupid_last(page);
3579 page_nid = page_to_nid(page); 3588 page_nid = page_to_nid(page);
3580 target_nid = numa_migrate_prep(page, vma, addr, page_nid); 3589 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
@@ -3586,12 +3595,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3586 3595
3587 /* Migrate to the requested node */ 3596 /* Migrate to the requested node */
3588 migrated = migrate_misplaced_page(page, vma, target_nid); 3597 migrated = migrate_misplaced_page(page, vma, target_nid);
3589 if (migrated) 3598 if (migrated) {
3590 page_nid = target_nid; 3599 page_nid = target_nid;
3600 flags |= TNF_MIGRATED;
3601 }
3591 3602
3592out: 3603out:
3593 if (page_nid != -1) 3604 if (page_nid != -1)
3594 task_numa_fault(last_cpupid, page_nid, 1, migrated); 3605 task_numa_fault(last_cpupid, page_nid, 1, flags);
3595 return 0; 3606 return 0;
3596} 3607}
3597 3608
@@ -3632,6 +3643,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3632 int page_nid = -1; 3643 int page_nid = -1;
3633 int target_nid; 3644 int target_nid;
3634 bool migrated = false; 3645 bool migrated = false;
3646 int flags = 0;
3635 3647
3636 if (!pte_present(pteval)) 3648 if (!pte_present(pteval))
3637 continue; 3649 continue;
@@ -3651,20 +3663,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
3651 if (unlikely(!page)) 3663 if (unlikely(!page))
3652 continue; 3664 continue;
3653 3665
3666 /*
3667 * Avoid grouping on DSO/COW pages in specific and RO pages
3668 * in general, RO pages shouldn't hurt as much anyway since
3669 * they can be in shared cache state.
3670 */
3671 if (!pte_write(pteval))
3672 flags |= TNF_NO_GROUP;
3673
3654 last_cpupid = page_cpupid_last(page); 3674 last_cpupid = page_cpupid_last(page);
3655 page_nid = page_to_nid(page); 3675 page_nid = page_to_nid(page);
3656 target_nid = numa_migrate_prep(page, vma, addr, page_nid); 3676 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
3657 pte_unmap_unlock(pte, ptl); 3677 pte_unmap_unlock(pte, ptl);
3658 if (target_nid != -1) { 3678 if (target_nid != -1) {
3659 migrated = migrate_misplaced_page(page, vma, target_nid); 3679 migrated = migrate_misplaced_page(page, vma, target_nid);
3660 if (migrated) 3680 if (migrated) {
3661 page_nid = target_nid; 3681 page_nid = target_nid;
3682 flags |= TNF_MIGRATED;
3683 }
3662 } else { 3684 } else {
3663 put_page(page); 3685 put_page(page);
3664 } 3686 }
3665 3687
3666 if (page_nid != -1) 3688 if (page_nid != -1)
3667 task_numa_fault(last_cpupid, page_nid, 1, migrated); 3689 task_numa_fault(last_cpupid, page_nid, 1, flags);
3668 3690
3669 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); 3691 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
3670 } 3692 }