diff options
| author | Peter Zijlstra <peterz@infradead.org> | 2013-10-07 06:29:24 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2013-10-09 08:47:53 -0400 |
| commit | 6688cc05473b36a0a3d3971e1adf1712919b32eb (patch) | |
| tree | 2305dca14e2df669b48138088bd93cf51db37721 | |
| parent | 7851a45cd3f6198bf542c30e27b330e8eeb3736c (diff) | |
mm: numa: Do not group on RO pages
And here's a little something to make sure not the whole world ends up
in a single group.
As while we don't migrate shared executable pages, we do scan/fault on
them. And since everybody links to libc, everybody ends up in the same
group.
Suggested-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1381141781-10992-47-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | include/linux/sched.h | 7 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 5 | ||||
| -rw-r--r-- | mm/huge_memory.c | 15 | ||||
| -rw-r--r-- | mm/memory.c | 30 |
4 files changed, 47 insertions, 10 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index b0b343b1ba64..ff543851a18a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1450,13 +1450,16 @@ struct task_struct { | |||
| 1450 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1450 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
| 1451 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | 1451 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) |
| 1452 | 1452 | ||
| 1453 | #define TNF_MIGRATED 0x01 | ||
| 1454 | #define TNF_NO_GROUP 0x02 | ||
| 1455 | |||
| 1453 | #ifdef CONFIG_NUMA_BALANCING | 1456 | #ifdef CONFIG_NUMA_BALANCING |
| 1454 | extern void task_numa_fault(int last_node, int node, int pages, bool migrated); | 1457 | extern void task_numa_fault(int last_node, int node, int pages, int flags); |
| 1455 | extern pid_t task_numa_group_id(struct task_struct *p); | 1458 | extern pid_t task_numa_group_id(struct task_struct *p); |
| 1456 | extern void set_numabalancing_state(bool enabled); | 1459 | extern void set_numabalancing_state(bool enabled); |
| 1457 | #else | 1460 | #else |
| 1458 | static inline void task_numa_fault(int last_node, int node, int pages, | 1461 | static inline void task_numa_fault(int last_node, int node, int pages, |
| 1459 | bool migrated) | 1462 | int flags) |
| 1460 | { | 1463 | { |
| 1461 | } | 1464 | } |
| 1462 | static inline pid_t task_numa_group_id(struct task_struct *p) | 1465 | static inline pid_t task_numa_group_id(struct task_struct *p) |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5bd309c035c7..35661b8afb4e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -1361,9 +1361,10 @@ void task_numa_free(struct task_struct *p) | |||
| 1361 | /* | 1361 | /* |
| 1362 | * Got a PROT_NONE fault for a page on @node. | 1362 | * Got a PROT_NONE fault for a page on @node. |
| 1363 | */ | 1363 | */ |
| 1364 | void task_numa_fault(int last_cpupid, int node, int pages, bool migrated) | 1364 | void task_numa_fault(int last_cpupid, int node, int pages, int flags) |
| 1365 | { | 1365 | { |
| 1366 | struct task_struct *p = current; | 1366 | struct task_struct *p = current; |
| 1367 | bool migrated = flags & TNF_MIGRATED; | ||
| 1367 | int priv; | 1368 | int priv; |
| 1368 | 1369 | ||
| 1369 | if (!numabalancing_enabled) | 1370 | if (!numabalancing_enabled) |
| @@ -1394,7 +1395,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated) | |||
| 1394 | priv = 1; | 1395 | priv = 1; |
| 1395 | } else { | 1396 | } else { |
| 1396 | priv = cpupid_match_pid(p, last_cpupid); | 1397 | priv = cpupid_match_pid(p, last_cpupid); |
| 1397 | if (!priv) | 1398 | if (!priv && !(flags & TNF_NO_GROUP)) |
| 1398 | task_numa_group(p, last_cpupid); | 1399 | task_numa_group(p, last_cpupid); |
| 1399 | } | 1400 | } |
| 1400 | 1401 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index becf92ca54f3..7ab4e32afe12 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
| @@ -1285,6 +1285,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1285 | int target_nid, last_cpupid = -1; | 1285 | int target_nid, last_cpupid = -1; |
| 1286 | bool page_locked; | 1286 | bool page_locked; |
| 1287 | bool migrated = false; | 1287 | bool migrated = false; |
| 1288 | int flags = 0; | ||
| 1288 | 1289 | ||
| 1289 | spin_lock(&mm->page_table_lock); | 1290 | spin_lock(&mm->page_table_lock); |
| 1290 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1291 | if (unlikely(!pmd_same(pmd, *pmdp))) |
| @@ -1299,6 +1300,14 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1299 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 1300 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
| 1300 | 1301 | ||
| 1301 | /* | 1302 | /* |
| 1303 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
| 1304 | * in general, RO pages shouldn't hurt as much anyway since | ||
| 1305 | * they can be in shared cache state. | ||
| 1306 | */ | ||
| 1307 | if (!pmd_write(pmd)) | ||
| 1308 | flags |= TNF_NO_GROUP; | ||
| 1309 | |||
| 1310 | /* | ||
| 1302 | * Acquire the page lock to serialise THP migrations but avoid dropping | 1311 | * Acquire the page lock to serialise THP migrations but avoid dropping |
| 1303 | * page_table_lock if at all possible | 1312 | * page_table_lock if at all possible |
| 1304 | */ | 1313 | */ |
| @@ -1343,8 +1352,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1343 | spin_unlock(&mm->page_table_lock); | 1352 | spin_unlock(&mm->page_table_lock); |
| 1344 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1353 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
| 1345 | pmdp, pmd, addr, page, target_nid); | 1354 | pmdp, pmd, addr, page, target_nid); |
| 1346 | if (migrated) | 1355 | if (migrated) { |
| 1356 | flags |= TNF_MIGRATED; | ||
| 1347 | page_nid = target_nid; | 1357 | page_nid = target_nid; |
| 1358 | } | ||
| 1348 | 1359 | ||
| 1349 | goto out; | 1360 | goto out; |
| 1350 | clear_pmdnuma: | 1361 | clear_pmdnuma: |
| @@ -1362,7 +1373,7 @@ out: | |||
| 1362 | page_unlock_anon_vma_read(anon_vma); | 1373 | page_unlock_anon_vma_read(anon_vma); |
| 1363 | 1374 | ||
| 1364 | if (page_nid != -1) | 1375 | if (page_nid != -1) |
| 1365 | task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, migrated); | 1376 | task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); |
| 1366 | 1377 | ||
| 1367 | return 0; | 1378 | return 0; |
| 1368 | } | 1379 | } |
diff --git a/mm/memory.c b/mm/memory.c index c57efa25cdbb..eba846bcf124 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -3547,6 +3547,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3547 | int last_cpupid; | 3547 | int last_cpupid; |
| 3548 | int target_nid; | 3548 | int target_nid; |
| 3549 | bool migrated = false; | 3549 | bool migrated = false; |
| 3550 | int flags = 0; | ||
| 3550 | 3551 | ||
| 3551 | /* | 3552 | /* |
| 3552 | * The "pte" at this point cannot be used safely without | 3553 | * The "pte" at this point cannot be used safely without |
| @@ -3575,6 +3576,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3575 | } | 3576 | } |
| 3576 | BUG_ON(is_zero_pfn(page_to_pfn(page))); | 3577 | BUG_ON(is_zero_pfn(page_to_pfn(page))); |
| 3577 | 3578 | ||
| 3579 | /* | ||
| 3580 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
| 3581 | * in general, RO pages shouldn't hurt as much anyway since | ||
| 3582 | * they can be in shared cache state. | ||
| 3583 | */ | ||
| 3584 | if (!pte_write(pte)) | ||
| 3585 | flags |= TNF_NO_GROUP; | ||
| 3586 | |||
| 3578 | last_cpupid = page_cpupid_last(page); | 3587 | last_cpupid = page_cpupid_last(page); |
| 3579 | page_nid = page_to_nid(page); | 3588 | page_nid = page_to_nid(page); |
| 3580 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | 3589 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
| @@ -3586,12 +3595,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3586 | 3595 | ||
| 3587 | /* Migrate to the requested node */ | 3596 | /* Migrate to the requested node */ |
| 3588 | migrated = migrate_misplaced_page(page, vma, target_nid); | 3597 | migrated = migrate_misplaced_page(page, vma, target_nid); |
| 3589 | if (migrated) | 3598 | if (migrated) { |
| 3590 | page_nid = target_nid; | 3599 | page_nid = target_nid; |
| 3600 | flags |= TNF_MIGRATED; | ||
| 3601 | } | ||
| 3591 | 3602 | ||
| 3592 | out: | 3603 | out: |
| 3593 | if (page_nid != -1) | 3604 | if (page_nid != -1) |
| 3594 | task_numa_fault(last_cpupid, page_nid, 1, migrated); | 3605 | task_numa_fault(last_cpupid, page_nid, 1, flags); |
| 3595 | return 0; | 3606 | return 0; |
| 3596 | } | 3607 | } |
| 3597 | 3608 | ||
| @@ -3632,6 +3643,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3632 | int page_nid = -1; | 3643 | int page_nid = -1; |
| 3633 | int target_nid; | 3644 | int target_nid; |
| 3634 | bool migrated = false; | 3645 | bool migrated = false; |
| 3646 | int flags = 0; | ||
| 3635 | 3647 | ||
| 3636 | if (!pte_present(pteval)) | 3648 | if (!pte_present(pteval)) |
| 3637 | continue; | 3649 | continue; |
| @@ -3651,20 +3663,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3651 | if (unlikely(!page)) | 3663 | if (unlikely(!page)) |
| 3652 | continue; | 3664 | continue; |
| 3653 | 3665 | ||
| 3666 | /* | ||
| 3667 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
| 3668 | * in general, RO pages shouldn't hurt as much anyway since | ||
| 3669 | * they can be in shared cache state. | ||
| 3670 | */ | ||
| 3671 | if (!pte_write(pteval)) | ||
| 3672 | flags |= TNF_NO_GROUP; | ||
| 3673 | |||
| 3654 | last_cpupid = page_cpupid_last(page); | 3674 | last_cpupid = page_cpupid_last(page); |
| 3655 | page_nid = page_to_nid(page); | 3675 | page_nid = page_to_nid(page); |
| 3656 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | 3676 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); |
| 3657 | pte_unmap_unlock(pte, ptl); | 3677 | pte_unmap_unlock(pte, ptl); |
| 3658 | if (target_nid != -1) { | 3678 | if (target_nid != -1) { |
| 3659 | migrated = migrate_misplaced_page(page, vma, target_nid); | 3679 | migrated = migrate_misplaced_page(page, vma, target_nid); |
| 3660 | if (migrated) | 3680 | if (migrated) { |
| 3661 | page_nid = target_nid; | 3681 | page_nid = target_nid; |
| 3682 | flags |= TNF_MIGRATED; | ||
| 3683 | } | ||
| 3662 | } else { | 3684 | } else { |
| 3663 | put_page(page); | 3685 | put_page(page); |
| 3664 | } | 3686 | } |
| 3665 | 3687 | ||
| 3666 | if (page_nid != -1) | 3688 | if (page_nid != -1) |
| 3667 | task_numa_fault(last_cpupid, page_nid, 1, migrated); | 3689 | task_numa_fault(last_cpupid, page_nid, 1, flags); |
| 3668 | 3690 | ||
| 3669 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | 3691 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); |
| 3670 | } | 3692 | } |
