diff options
author | Peter Zijlstra <peterz@infradead.org> | 2013-10-07 06:29:24 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-09 08:47:53 -0400 |
commit | 6688cc05473b36a0a3d3971e1adf1712919b32eb (patch) | |
tree | 2305dca14e2df669b48138088bd93cf51db37721 | |
parent | 7851a45cd3f6198bf542c30e27b330e8eeb3736c (diff) |
mm: numa: Do not group on RO pages
And here's a little something to make sure not the whole world ends up
in a single group.
As while we don't migrate shared executable pages, we do scan/fault on
them. And since everybody links to libc, everybody ends up in the same
group.
Suggested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-47-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/sched.h | 7 | ||||
-rw-r--r-- | kernel/sched/fair.c | 5 | ||||
-rw-r--r-- | mm/huge_memory.c | 15 | ||||
-rw-r--r-- | mm/memory.c | 30 |
4 files changed, 47 insertions, 10 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index b0b343b1ba64..ff543851a18a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1450,13 +1450,16 @@ struct task_struct { | |||
1450 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1450 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
1451 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | 1451 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) |
1452 | 1452 | ||
1453 | #define TNF_MIGRATED 0x01 | ||
1454 | #define TNF_NO_GROUP 0x02 | ||
1455 | |||
1453 | #ifdef CONFIG_NUMA_BALANCING | 1456 | #ifdef CONFIG_NUMA_BALANCING |
1454 | extern void task_numa_fault(int last_node, int node, int pages, bool migrated); | 1457 | extern void task_numa_fault(int last_node, int node, int pages, int flags); |
1455 | extern pid_t task_numa_group_id(struct task_struct *p); | 1458 | extern pid_t task_numa_group_id(struct task_struct *p); |
1456 | extern void set_numabalancing_state(bool enabled); | 1459 | extern void set_numabalancing_state(bool enabled); |
1457 | #else | 1460 | #else |
1458 | static inline void task_numa_fault(int last_node, int node, int pages, | 1461 | static inline void task_numa_fault(int last_node, int node, int pages, |
1459 | bool migrated) | 1462 | int flags) |
1460 | { | 1463 | { |
1461 | } | 1464 | } |
1462 | static inline pid_t task_numa_group_id(struct task_struct *p) | 1465 | static inline pid_t task_numa_group_id(struct task_struct *p) |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5bd309c035c7..35661b8afb4e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1361,9 +1361,10 @@ void task_numa_free(struct task_struct *p) | |||
1361 | /* | 1361 | /* |
1362 | * Got a PROT_NONE fault for a page on @node. | 1362 | * Got a PROT_NONE fault for a page on @node. |
1363 | */ | 1363 | */ |
1364 | void task_numa_fault(int last_cpupid, int node, int pages, bool migrated) | 1364 | void task_numa_fault(int last_cpupid, int node, int pages, int flags) |
1365 | { | 1365 | { |
1366 | struct task_struct *p = current; | 1366 | struct task_struct *p = current; |
1367 | bool migrated = flags & TNF_MIGRATED; | ||
1367 | int priv; | 1368 | int priv; |
1368 | 1369 | ||
1369 | if (!numabalancing_enabled) | 1370 | if (!numabalancing_enabled) |
@@ -1394,7 +1395,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated) | |||
1394 | priv = 1; | 1395 | priv = 1; |
1395 | } else { | 1396 | } else { |
1396 | priv = cpupid_match_pid(p, last_cpupid); | 1397 | priv = cpupid_match_pid(p, last_cpupid); |
1397 | if (!priv) | 1398 | if (!priv && !(flags & TNF_NO_GROUP)) |
1398 | task_numa_group(p, last_cpupid); | 1399 | task_numa_group(p, last_cpupid); |
1399 | } | 1400 | } |
1400 | 1401 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index becf92ca54f3..7ab4e32afe12 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1285,6 +1285,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1285 | int target_nid, last_cpupid = -1; | 1285 | int target_nid, last_cpupid = -1; |
1286 | bool page_locked; | 1286 | bool page_locked; |
1287 | bool migrated = false; | 1287 | bool migrated = false; |
1288 | int flags = 0; | ||
1288 | 1289 | ||
1289 | spin_lock(&mm->page_table_lock); | 1290 | spin_lock(&mm->page_table_lock); |
1290 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1291 | if (unlikely(!pmd_same(pmd, *pmdp))) |
@@ -1299,6 +1300,14 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1299 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 1300 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
1300 | 1301 | ||
1301 | /* | 1302 | /* |
1303 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
1304 | * in general, RO pages shouldn't hurt as much anyway since | ||
1305 | * they can be in shared cache state. | ||
1306 | */ | ||
1307 | if (!pmd_write(pmd)) | ||
1308 | flags |= TNF_NO_GROUP; | ||
1309 | |||
1310 | /* | ||
1302 | * Acquire the page lock to serialise THP migrations but avoid dropping | 1311 | * Acquire the page lock to serialise THP migrations but avoid dropping |
1303 | * page_table_lock if at all possible | 1312 | * page_table_lock if at all possible |
1304 | */ | 1313 | */ |
@@ -1343,8 +1352,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1343 | spin_unlock(&mm->page_table_lock); | 1352 | spin_unlock(&mm->page_table_lock); |
1344 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1353 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1345 | pmdp, pmd, addr, page, target_nid); | 1354 | pmdp, pmd, addr, page, target_nid); |
1346 | if (migrated) | 1355 | if (migrated) { |
1356 | flags |= TNF_MIGRATED; | ||
1347 | page_nid = target_nid; | 1357 | page_nid = target_nid; |
1358 | } | ||
1348 | 1359 | ||
1349 | goto out; | 1360 | goto out; |
1350 | clear_pmdnuma: | 1361 | clear_pmdnuma: |
@@ -1362,7 +1373,7 @@ out: | |||
1362 | page_unlock_anon_vma_read(anon_vma); | 1373 | page_unlock_anon_vma_read(anon_vma); |
1363 | 1374 | ||
1364 | if (page_nid != -1) | 1375 | if (page_nid != -1) |
1365 | task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, migrated); | 1376 | task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); |
1366 | 1377 | ||
1367 | return 0; | 1378 | return 0; |
1368 | } | 1379 | } |
diff --git a/mm/memory.c b/mm/memory.c index c57efa25cdbb..eba846bcf124 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3547,6 +3547,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3547 | int last_cpupid; | 3547 | int last_cpupid; |
3548 | int target_nid; | 3548 | int target_nid; |
3549 | bool migrated = false; | 3549 | bool migrated = false; |
3550 | int flags = 0; | ||
3550 | 3551 | ||
3551 | /* | 3552 | /* |
3552 | * The "pte" at this point cannot be used safely without | 3553 | * The "pte" at this point cannot be used safely without |
@@ -3575,6 +3576,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3575 | } | 3576 | } |
3576 | BUG_ON(is_zero_pfn(page_to_pfn(page))); | 3577 | BUG_ON(is_zero_pfn(page_to_pfn(page))); |
3577 | 3578 | ||
3579 | /* | ||
3580 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
3581 | * in general, RO pages shouldn't hurt as much anyway since | ||
3582 | * they can be in shared cache state. | ||
3583 | */ | ||
3584 | if (!pte_write(pte)) | ||
3585 | flags |= TNF_NO_GROUP; | ||
3586 | |||
3578 | last_cpupid = page_cpupid_last(page); | 3587 | last_cpupid = page_cpupid_last(page); |
3579 | page_nid = page_to_nid(page); | 3588 | page_nid = page_to_nid(page); |
3580 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | 3589 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
@@ -3586,12 +3595,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3586 | 3595 | ||
3587 | /* Migrate to the requested node */ | 3596 | /* Migrate to the requested node */ |
3588 | migrated = migrate_misplaced_page(page, vma, target_nid); | 3597 | migrated = migrate_misplaced_page(page, vma, target_nid); |
3589 | if (migrated) | 3598 | if (migrated) { |
3590 | page_nid = target_nid; | 3599 | page_nid = target_nid; |
3600 | flags |= TNF_MIGRATED; | ||
3601 | } | ||
3591 | 3602 | ||
3592 | out: | 3603 | out: |
3593 | if (page_nid != -1) | 3604 | if (page_nid != -1) |
3594 | task_numa_fault(last_cpupid, page_nid, 1, migrated); | 3605 | task_numa_fault(last_cpupid, page_nid, 1, flags); |
3595 | return 0; | 3606 | return 0; |
3596 | } | 3607 | } |
3597 | 3608 | ||
@@ -3632,6 +3643,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3632 | int page_nid = -1; | 3643 | int page_nid = -1; |
3633 | int target_nid; | 3644 | int target_nid; |
3634 | bool migrated = false; | 3645 | bool migrated = false; |
3646 | int flags = 0; | ||
3635 | 3647 | ||
3636 | if (!pte_present(pteval)) | 3648 | if (!pte_present(pteval)) |
3637 | continue; | 3649 | continue; |
@@ -3651,20 +3663,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3651 | if (unlikely(!page)) | 3663 | if (unlikely(!page)) |
3652 | continue; | 3664 | continue; |
3653 | 3665 | ||
3666 | /* | ||
3667 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
3668 | * in general, RO pages shouldn't hurt as much anyway since | ||
3669 | * they can be in shared cache state. | ||
3670 | */ | ||
3671 | if (!pte_write(pteval)) | ||
3672 | flags |= TNF_NO_GROUP; | ||
3673 | |||
3654 | last_cpupid = page_cpupid_last(page); | 3674 | last_cpupid = page_cpupid_last(page); |
3655 | page_nid = page_to_nid(page); | 3675 | page_nid = page_to_nid(page); |
3656 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | 3676 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
3657 | pte_unmap_unlock(pte, ptl); | 3677 | pte_unmap_unlock(pte, ptl); |
3658 | if (target_nid != -1) { | 3678 | if (target_nid != -1) { |
3659 | migrated = migrate_misplaced_page(page, vma, target_nid); | 3679 | migrated = migrate_misplaced_page(page, vma, target_nid); |
3660 | if (migrated) | 3680 | if (migrated) { |
3661 | page_nid = target_nid; | 3681 | page_nid = target_nid; |
3682 | flags |= TNF_MIGRATED; | ||
3683 | } | ||
3662 | } else { | 3684 | } else { |
3663 | put_page(page); | 3685 | put_page(page); |
3664 | } | 3686 | } |
3665 | 3687 | ||
3666 | if (page_nid != -1) | 3688 | if (page_nid != -1) |
3667 | task_numa_fault(last_cpupid, page_nid, 1, migrated); | 3689 | task_numa_fault(last_cpupid, page_nid, 1, flags); |
3668 | 3690 | ||
3669 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | 3691 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); |
3670 | } | 3692 | } |