diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-11 20:20:12 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-11 20:20:12 -0500 |
commit | 39cf275a1a18ba3c7eb9b986c5c9b35b57332798 (patch) | |
tree | 40b119ca9d2fbaf8128d3fa25f4c64669002b0c0 /mm | |
parent | ad5d69899e52792671c1aa6c7360464c7edfe09c (diff) | |
parent | e5137b50a0640009fd63a3e65c14bc6e1be8796a (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"The main changes in this cycle are:
- (much) improved CONFIG_NUMA_BALANCING support from Mel Gorman, Rik
van Riel, Peter Zijlstra et al. Yay!
- optimize preemption counter handling: merge the NEED_RESCHED flag
into the preempt_count variable, by Peter Zijlstra.
- wait.h fixes and code reorganization from Peter Zijlstra
- cfs_bandwidth fixes from Ben Segall
- SMP load-balancer cleanups from Peter Zijstra
- idle balancer improvements from Jason Low
- other fixes and cleanups"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (129 commits)
ftrace, sched: Add TRACE_FLAG_PREEMPT_RESCHED
stop_machine: Fix race between stop_two_cpus() and stop_cpus()
sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus
sched: Fix asymmetric scheduling for POWER7
sched: Move completion code from core.c to completion.c
sched: Move wait code from core.c to wait.c
sched: Move wait.c into kernel/sched/
sched/wait: Fix __wait_event_interruptible_lock_irq_timeout()
sched: Avoid throttle_cfs_rq() racing with period_timer stopping
sched: Guarantee new group-entities always have weight
sched: Fix hrtimer_cancel()/rq->lock deadlock
sched: Fix cfs_bandwidth misuse of hrtimer_expires_remaining
sched: Fix race on toggling cfs_bandwidth_used
sched: Remove extra put_online_cpus() inside sched_setaffinity()
sched/rt: Fix task_tick_rt() comment
sched/wait: Fix build breakage
sched/wait: Introduce prepare_to_wait_event()
sched/wait: Add ___wait_cond_timeout() to wait_event*_timeout() too
sched: Remove get_online_cpus() usage
sched: Fix race in migrate_swap_stop()
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 55 | ||||
-rw-r--r-- | mm/memory.c | 139 | ||||
-rw-r--r-- | mm/mempolicy.c | 82 | ||||
-rw-r--r-- | mm/migrate.c | 30 | ||||
-rw-r--r-- | mm/mm_init.c | 18 | ||||
-rw-r--r-- | mm/mmzone.c | 14 | ||||
-rw-r--r-- | mm/mprotect.c | 65 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 |
8 files changed, 218 insertions, 189 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cca80d96e509..2612f60f53ee 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1282,19 +1282,32 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1282 | struct page *page; | 1282 | struct page *page; |
1283 | unsigned long haddr = addr & HPAGE_PMD_MASK; | 1283 | unsigned long haddr = addr & HPAGE_PMD_MASK; |
1284 | int page_nid = -1, this_nid = numa_node_id(); | 1284 | int page_nid = -1, this_nid = numa_node_id(); |
1285 | int target_nid; | 1285 | int target_nid, last_cpupid = -1; |
1286 | bool page_locked; | 1286 | bool page_locked; |
1287 | bool migrated = false; | 1287 | bool migrated = false; |
1288 | int flags = 0; | ||
1288 | 1289 | ||
1289 | spin_lock(&mm->page_table_lock); | 1290 | spin_lock(&mm->page_table_lock); |
1290 | if (unlikely(!pmd_same(pmd, *pmdp))) | 1291 | if (unlikely(!pmd_same(pmd, *pmdp))) |
1291 | goto out_unlock; | 1292 | goto out_unlock; |
1292 | 1293 | ||
1293 | page = pmd_page(pmd); | 1294 | page = pmd_page(pmd); |
1295 | BUG_ON(is_huge_zero_page(page)); | ||
1294 | page_nid = page_to_nid(page); | 1296 | page_nid = page_to_nid(page); |
1297 | last_cpupid = page_cpupid_last(page); | ||
1295 | count_vm_numa_event(NUMA_HINT_FAULTS); | 1298 | count_vm_numa_event(NUMA_HINT_FAULTS); |
1296 | if (page_nid == this_nid) | 1299 | if (page_nid == this_nid) { |
1297 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 1300 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
1301 | flags |= TNF_FAULT_LOCAL; | ||
1302 | } | ||
1303 | |||
1304 | /* | ||
1305 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
1306 | * in general, RO pages shouldn't hurt as much anyway since | ||
1307 | * they can be in shared cache state. | ||
1308 | */ | ||
1309 | if (!pmd_write(pmd)) | ||
1310 | flags |= TNF_NO_GROUP; | ||
1298 | 1311 | ||
1299 | /* | 1312 | /* |
1300 | * Acquire the page lock to serialise THP migrations but avoid dropping | 1313 | * Acquire the page lock to serialise THP migrations but avoid dropping |
@@ -1325,7 +1338,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1325 | lock_page(page); | 1338 | lock_page(page); |
1326 | anon_vma = page_lock_anon_vma_read(page); | 1339 | anon_vma = page_lock_anon_vma_read(page); |
1327 | 1340 | ||
1328 | /* Confirm the PTE did not while locked */ | 1341 | /* Confirm the PMD did not change while page_table_lock was released */ |
1329 | spin_lock(&mm->page_table_lock); | 1342 | spin_lock(&mm->page_table_lock); |
1330 | if (unlikely(!pmd_same(pmd, *pmdp))) { | 1343 | if (unlikely(!pmd_same(pmd, *pmdp))) { |
1331 | unlock_page(page); | 1344 | unlock_page(page); |
@@ -1341,8 +1354,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1341 | spin_unlock(&mm->page_table_lock); | 1354 | spin_unlock(&mm->page_table_lock); |
1342 | migrated = migrate_misplaced_transhuge_page(mm, vma, | 1355 | migrated = migrate_misplaced_transhuge_page(mm, vma, |
1343 | pmdp, pmd, addr, page, target_nid); | 1356 | pmdp, pmd, addr, page, target_nid); |
1344 | if (migrated) | 1357 | if (migrated) { |
1358 | flags |= TNF_MIGRATED; | ||
1345 | page_nid = target_nid; | 1359 | page_nid = target_nid; |
1360 | } | ||
1346 | 1361 | ||
1347 | goto out; | 1362 | goto out; |
1348 | clear_pmdnuma: | 1363 | clear_pmdnuma: |
@@ -1360,7 +1375,7 @@ out: | |||
1360 | page_unlock_anon_vma_read(anon_vma); | 1375 | page_unlock_anon_vma_read(anon_vma); |
1361 | 1376 | ||
1362 | if (page_nid != -1) | 1377 | if (page_nid != -1) |
1363 | task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); | 1378 | task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags); |
1364 | 1379 | ||
1365 | return 0; | 1380 | return 0; |
1366 | } | 1381 | } |
@@ -1458,6 +1473,12 @@ out: | |||
1458 | return ret; | 1473 | return ret; |
1459 | } | 1474 | } |
1460 | 1475 | ||
1476 | /* | ||
1477 | * Returns | ||
1478 | * - 0 if PMD could not be locked | ||
1479 | * - 1 if PMD was locked but protections unchange and TLB flush unnecessary | ||
1480 | * - HPAGE_PMD_NR is protections changed and TLB flush necessary | ||
1481 | */ | ||
1461 | int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | 1482 | int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, |
1462 | unsigned long addr, pgprot_t newprot, int prot_numa) | 1483 | unsigned long addr, pgprot_t newprot, int prot_numa) |
1463 | { | 1484 | { |
@@ -1466,22 +1487,34 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1466 | 1487 | ||
1467 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { | 1488 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { |
1468 | pmd_t entry; | 1489 | pmd_t entry; |
1469 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1490 | ret = 1; |
1470 | if (!prot_numa) { | 1491 | if (!prot_numa) { |
1492 | entry = pmdp_get_and_clear(mm, addr, pmd); | ||
1471 | entry = pmd_modify(entry, newprot); | 1493 | entry = pmd_modify(entry, newprot); |
1494 | ret = HPAGE_PMD_NR; | ||
1472 | BUG_ON(pmd_write(entry)); | 1495 | BUG_ON(pmd_write(entry)); |
1473 | } else { | 1496 | } else { |
1474 | struct page *page = pmd_page(*pmd); | 1497 | struct page *page = pmd_page(*pmd); |
1475 | 1498 | ||
1476 | /* only check non-shared pages */ | 1499 | /* |
1477 | if (page_mapcount(page) == 1 && | 1500 | * Do not trap faults against the zero page. The |
1501 | * read-only data is likely to be read-cached on the | ||
1502 | * local CPU cache and it is less useful to know about | ||
1503 | * local vs remote hits on the zero page. | ||
1504 | */ | ||
1505 | if (!is_huge_zero_page(page) && | ||
1478 | !pmd_numa(*pmd)) { | 1506 | !pmd_numa(*pmd)) { |
1507 | entry = pmdp_get_and_clear(mm, addr, pmd); | ||
1479 | entry = pmd_mknuma(entry); | 1508 | entry = pmd_mknuma(entry); |
1509 | ret = HPAGE_PMD_NR; | ||
1480 | } | 1510 | } |
1481 | } | 1511 | } |
1482 | set_pmd_at(mm, addr, pmd, entry); | 1512 | |
1513 | /* Set PMD if cleared earlier */ | ||
1514 | if (ret == HPAGE_PMD_NR) | ||
1515 | set_pmd_at(mm, addr, pmd, entry); | ||
1516 | |||
1483 | spin_unlock(&vma->vm_mm->page_table_lock); | 1517 | spin_unlock(&vma->vm_mm->page_table_lock); |
1484 | ret = 1; | ||
1485 | } | 1518 | } |
1486 | 1519 | ||
1487 | return ret; | 1520 | return ret; |
@@ -1662,7 +1695,7 @@ static void __split_huge_page_refcount(struct page *page, | |||
1662 | page_tail->mapping = page->mapping; | 1695 | page_tail->mapping = page->mapping; |
1663 | 1696 | ||
1664 | page_tail->index = page->index + i; | 1697 | page_tail->index = page->index + i; |
1665 | page_nid_xchg_last(page_tail, page_nid_last(page)); | 1698 | page_cpupid_xchg_last(page_tail, page_cpupid_last(page)); |
1666 | 1699 | ||
1667 | BUG_ON(!PageAnon(page_tail)); | 1700 | BUG_ON(!PageAnon(page_tail)); |
1668 | BUG_ON(!PageUptodate(page_tail)); | 1701 | BUG_ON(!PageUptodate(page_tail)); |
diff --git a/mm/memory.c b/mm/memory.c index d176154c243f..1f2287eaa88e 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -69,8 +69,8 @@ | |||
69 | 69 | ||
70 | #include "internal.h" | 70 | #include "internal.h" |
71 | 71 | ||
72 | #ifdef LAST_NID_NOT_IN_PAGE_FLAGS | 72 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS |
73 | #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid. | 73 | #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. |
74 | #endif | 74 | #endif |
75 | 75 | ||
76 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 76 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
@@ -2721,6 +2721,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2721 | get_page(dirty_page); | 2721 | get_page(dirty_page); |
2722 | 2722 | ||
2723 | reuse: | 2723 | reuse: |
2724 | /* | ||
2725 | * Clear the pages cpupid information as the existing | ||
2726 | * information potentially belongs to a now completely | ||
2727 | * unrelated process. | ||
2728 | */ | ||
2729 | if (old_page) | ||
2730 | page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1); | ||
2731 | |||
2724 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 2732 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
2725 | entry = pte_mkyoung(orig_pte); | 2733 | entry = pte_mkyoung(orig_pte); |
2726 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2734 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
@@ -3521,13 +3529,16 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3521 | } | 3529 | } |
3522 | 3530 | ||
3523 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, | 3531 | int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, |
3524 | unsigned long addr, int page_nid) | 3532 | unsigned long addr, int page_nid, |
3533 | int *flags) | ||
3525 | { | 3534 | { |
3526 | get_page(page); | 3535 | get_page(page); |
3527 | 3536 | ||
3528 | count_vm_numa_event(NUMA_HINT_FAULTS); | 3537 | count_vm_numa_event(NUMA_HINT_FAULTS); |
3529 | if (page_nid == numa_node_id()) | 3538 | if (page_nid == numa_node_id()) { |
3530 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); | 3539 | count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); |
3540 | *flags |= TNF_FAULT_LOCAL; | ||
3541 | } | ||
3531 | 3542 | ||
3532 | return mpol_misplaced(page, vma, addr); | 3543 | return mpol_misplaced(page, vma, addr); |
3533 | } | 3544 | } |
@@ -3538,8 +3549,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3538 | struct page *page = NULL; | 3549 | struct page *page = NULL; |
3539 | spinlock_t *ptl; | 3550 | spinlock_t *ptl; |
3540 | int page_nid = -1; | 3551 | int page_nid = -1; |
3552 | int last_cpupid; | ||
3541 | int target_nid; | 3553 | int target_nid; |
3542 | bool migrated = false; | 3554 | bool migrated = false; |
3555 | int flags = 0; | ||
3543 | 3556 | ||
3544 | /* | 3557 | /* |
3545 | * The "pte" at this point cannot be used safely without | 3558 | * The "pte" at this point cannot be used safely without |
@@ -3566,9 +3579,26 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3566 | pte_unmap_unlock(ptep, ptl); | 3579 | pte_unmap_unlock(ptep, ptl); |
3567 | return 0; | 3580 | return 0; |
3568 | } | 3581 | } |
3582 | BUG_ON(is_zero_pfn(page_to_pfn(page))); | ||
3569 | 3583 | ||
3584 | /* | ||
3585 | * Avoid grouping on DSO/COW pages in specific and RO pages | ||
3586 | * in general, RO pages shouldn't hurt as much anyway since | ||
3587 | * they can be in shared cache state. | ||
3588 | */ | ||
3589 | if (!pte_write(pte)) | ||
3590 | flags |= TNF_NO_GROUP; | ||
3591 | |||
3592 | /* | ||
3593 | * Flag if the page is shared between multiple address spaces. This | ||
3594 | * is later used when determining whether to group tasks together | ||
3595 | */ | ||
3596 | if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED)) | ||
3597 | flags |= TNF_SHARED; | ||
3598 | |||
3599 | last_cpupid = page_cpupid_last(page); | ||
3570 | page_nid = page_to_nid(page); | 3600 | page_nid = page_to_nid(page); |
3571 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | 3601 | target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags); |
3572 | pte_unmap_unlock(ptep, ptl); | 3602 | pte_unmap_unlock(ptep, ptl); |
3573 | if (target_nid == -1) { | 3603 | if (target_nid == -1) { |
3574 | put_page(page); | 3604 | put_page(page); |
@@ -3576,102 +3606,17 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3576 | } | 3606 | } |
3577 | 3607 | ||
3578 | /* Migrate to the requested node */ | 3608 | /* Migrate to the requested node */ |
3579 | migrated = migrate_misplaced_page(page, target_nid); | 3609 | migrated = migrate_misplaced_page(page, vma, target_nid); |
3580 | if (migrated) | 3610 | if (migrated) { |
3581 | page_nid = target_nid; | 3611 | page_nid = target_nid; |
3612 | flags |= TNF_MIGRATED; | ||
3613 | } | ||
3582 | 3614 | ||
3583 | out: | 3615 | out: |
3584 | if (page_nid != -1) | 3616 | if (page_nid != -1) |
3585 | task_numa_fault(page_nid, 1, migrated); | 3617 | task_numa_fault(last_cpupid, page_nid, 1, flags); |
3586 | return 0; | ||
3587 | } | ||
3588 | |||
3589 | /* NUMA hinting page fault entry point for regular pmds */ | ||
3590 | #ifdef CONFIG_NUMA_BALANCING | ||
3591 | static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
3592 | unsigned long addr, pmd_t *pmdp) | ||
3593 | { | ||
3594 | pmd_t pmd; | ||
3595 | pte_t *pte, *orig_pte; | ||
3596 | unsigned long _addr = addr & PMD_MASK; | ||
3597 | unsigned long offset; | ||
3598 | spinlock_t *ptl; | ||
3599 | bool numa = false; | ||
3600 | |||
3601 | spin_lock(&mm->page_table_lock); | ||
3602 | pmd = *pmdp; | ||
3603 | if (pmd_numa(pmd)) { | ||
3604 | set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd)); | ||
3605 | numa = true; | ||
3606 | } | ||
3607 | spin_unlock(&mm->page_table_lock); | ||
3608 | |||
3609 | if (!numa) | ||
3610 | return 0; | ||
3611 | |||
3612 | /* we're in a page fault so some vma must be in the range */ | ||
3613 | BUG_ON(!vma); | ||
3614 | BUG_ON(vma->vm_start >= _addr + PMD_SIZE); | ||
3615 | offset = max(_addr, vma->vm_start) & ~PMD_MASK; | ||
3616 | VM_BUG_ON(offset >= PMD_SIZE); | ||
3617 | orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl); | ||
3618 | pte += offset >> PAGE_SHIFT; | ||
3619 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { | ||
3620 | pte_t pteval = *pte; | ||
3621 | struct page *page; | ||
3622 | int page_nid = -1; | ||
3623 | int target_nid; | ||
3624 | bool migrated = false; | ||
3625 | |||
3626 | if (!pte_present(pteval)) | ||
3627 | continue; | ||
3628 | if (!pte_numa(pteval)) | ||
3629 | continue; | ||
3630 | if (addr >= vma->vm_end) { | ||
3631 | vma = find_vma(mm, addr); | ||
3632 | /* there's a pte present so there must be a vma */ | ||
3633 | BUG_ON(!vma); | ||
3634 | BUG_ON(addr < vma->vm_start); | ||
3635 | } | ||
3636 | if (pte_numa(pteval)) { | ||
3637 | pteval = pte_mknonnuma(pteval); | ||
3638 | set_pte_at(mm, addr, pte, pteval); | ||
3639 | } | ||
3640 | page = vm_normal_page(vma, addr, pteval); | ||
3641 | if (unlikely(!page)) | ||
3642 | continue; | ||
3643 | /* only check non-shared pages */ | ||
3644 | if (unlikely(page_mapcount(page) != 1)) | ||
3645 | continue; | ||
3646 | |||
3647 | page_nid = page_to_nid(page); | ||
3648 | target_nid = numa_migrate_prep(page, vma, addr, page_nid); | ||
3649 | pte_unmap_unlock(pte, ptl); | ||
3650 | if (target_nid != -1) { | ||
3651 | migrated = migrate_misplaced_page(page, target_nid); | ||
3652 | if (migrated) | ||
3653 | page_nid = target_nid; | ||
3654 | } else { | ||
3655 | put_page(page); | ||
3656 | } | ||
3657 | |||
3658 | if (page_nid != -1) | ||
3659 | task_numa_fault(page_nid, 1, migrated); | ||
3660 | |||
3661 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | ||
3662 | } | ||
3663 | pte_unmap_unlock(orig_pte, ptl); | ||
3664 | |||
3665 | return 0; | ||
3666 | } | ||
3667 | #else | ||
3668 | static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
3669 | unsigned long addr, pmd_t *pmdp) | ||
3670 | { | ||
3671 | BUG(); | ||
3672 | return 0; | 3618 | return 0; |
3673 | } | 3619 | } |
3674 | #endif /* CONFIG_NUMA_BALANCING */ | ||
3675 | 3620 | ||
3676 | /* | 3621 | /* |
3677 | * These routines also need to handle stuff like marking pages dirty | 3622 | * These routines also need to handle stuff like marking pages dirty |
@@ -3811,8 +3756,8 @@ retry: | |||
3811 | } | 3756 | } |
3812 | } | 3757 | } |
3813 | 3758 | ||
3814 | if (pmd_numa(*pmd)) | 3759 | /* THP should already have been handled */ |
3815 | return do_pmd_numa_page(mm, vma, address, pmd); | 3760 | BUG_ON(pmd_numa(*pmd)); |
3816 | 3761 | ||
3817 | /* | 3762 | /* |
3818 | * Use __pte_alloc instead of pte_alloc_map, because we can't | 3763 | * Use __pte_alloc instead of pte_alloc_map, because we can't |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 04729647f359..71cb253368cb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1679,6 +1679,30 @@ struct mempolicy *get_vma_policy(struct task_struct *task, | |||
1679 | return pol; | 1679 | return pol; |
1680 | } | 1680 | } |
1681 | 1681 | ||
1682 | bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma) | ||
1683 | { | ||
1684 | struct mempolicy *pol = get_task_policy(task); | ||
1685 | if (vma) { | ||
1686 | if (vma->vm_ops && vma->vm_ops->get_policy) { | ||
1687 | bool ret = false; | ||
1688 | |||
1689 | pol = vma->vm_ops->get_policy(vma, vma->vm_start); | ||
1690 | if (pol && (pol->flags & MPOL_F_MOF)) | ||
1691 | ret = true; | ||
1692 | mpol_cond_put(pol); | ||
1693 | |||
1694 | return ret; | ||
1695 | } else if (vma->vm_policy) { | ||
1696 | pol = vma->vm_policy; | ||
1697 | } | ||
1698 | } | ||
1699 | |||
1700 | if (!pol) | ||
1701 | return default_policy.flags & MPOL_F_MOF; | ||
1702 | |||
1703 | return pol->flags & MPOL_F_MOF; | ||
1704 | } | ||
1705 | |||
1682 | static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone) | 1706 | static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone) |
1683 | { | 1707 | { |
1684 | enum zone_type dynamic_policy_zone = policy_zone; | 1708 | enum zone_type dynamic_policy_zone = policy_zone; |
@@ -2277,6 +2301,35 @@ static void sp_free(struct sp_node *n) | |||
2277 | kmem_cache_free(sn_cache, n); | 2301 | kmem_cache_free(sn_cache, n); |
2278 | } | 2302 | } |
2279 | 2303 | ||
2304 | #ifdef CONFIG_NUMA_BALANCING | ||
2305 | static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) | ||
2306 | { | ||
2307 | /* Never defer a private fault */ | ||
2308 | if (cpupid_match_pid(p, last_cpupid)) | ||
2309 | return false; | ||
2310 | |||
2311 | if (p->numa_migrate_deferred) { | ||
2312 | p->numa_migrate_deferred--; | ||
2313 | return true; | ||
2314 | } | ||
2315 | return false; | ||
2316 | } | ||
2317 | |||
2318 | static inline void defer_numa_migrate(struct task_struct *p) | ||
2319 | { | ||
2320 | p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred; | ||
2321 | } | ||
2322 | #else | ||
2323 | static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) | ||
2324 | { | ||
2325 | return false; | ||
2326 | } | ||
2327 | |||
2328 | static inline void defer_numa_migrate(struct task_struct *p) | ||
2329 | { | ||
2330 | } | ||
2331 | #endif /* CONFIG_NUMA_BALANCING */ | ||
2332 | |||
2280 | /** | 2333 | /** |
2281 | * mpol_misplaced - check whether current page node is valid in policy | 2334 | * mpol_misplaced - check whether current page node is valid in policy |
2282 | * | 2335 | * |
@@ -2300,6 +2353,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long | |||
2300 | struct zone *zone; | 2353 | struct zone *zone; |
2301 | int curnid = page_to_nid(page); | 2354 | int curnid = page_to_nid(page); |
2302 | unsigned long pgoff; | 2355 | unsigned long pgoff; |
2356 | int thiscpu = raw_smp_processor_id(); | ||
2357 | int thisnid = cpu_to_node(thiscpu); | ||
2303 | int polnid = -1; | 2358 | int polnid = -1; |
2304 | int ret = -1; | 2359 | int ret = -1; |
2305 | 2360 | ||
@@ -2348,9 +2403,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long | |||
2348 | 2403 | ||
2349 | /* Migrate the page towards the node whose CPU is referencing it */ | 2404 | /* Migrate the page towards the node whose CPU is referencing it */ |
2350 | if (pol->flags & MPOL_F_MORON) { | 2405 | if (pol->flags & MPOL_F_MORON) { |
2351 | int last_nid; | 2406 | int last_cpupid; |
2407 | int this_cpupid; | ||
2352 | 2408 | ||
2353 | polnid = numa_node_id(); | 2409 | polnid = thisnid; |
2410 | this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid); | ||
2354 | 2411 | ||
2355 | /* | 2412 | /* |
2356 | * Multi-stage node selection is used in conjunction | 2413 | * Multi-stage node selection is used in conjunction |
@@ -2373,8 +2430,25 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long | |||
2373 | * it less likely we act on an unlikely task<->page | 2430 | * it less likely we act on an unlikely task<->page |
2374 | * relation. | 2431 | * relation. |
2375 | */ | 2432 | */ |
2376 | last_nid = page_nid_xchg_last(page, polnid); | 2433 | last_cpupid = page_cpupid_xchg_last(page, this_cpupid); |
2377 | if (last_nid != polnid) | 2434 | if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) { |
2435 | |||
2436 | /* See sysctl_numa_balancing_migrate_deferred comment */ | ||
2437 | if (!cpupid_match_pid(current, last_cpupid)) | ||
2438 | defer_numa_migrate(current); | ||
2439 | |||
2440 | goto out; | ||
2441 | } | ||
2442 | |||
2443 | /* | ||
2444 | * The quadratic filter above reduces extraneous migration | ||
2445 | * of shared pages somewhat. This code reduces it even more, | ||
2446 | * reducing the overhead of page migrations of shared pages. | ||
2447 | * This makes workloads with shared pages rely more on | ||
2448 | * "move task near its memory", and less on "move memory | ||
2449 | * towards its task", which is exactly what we want. | ||
2450 | */ | ||
2451 | if (numa_migrate_deferred(current, last_cpupid)) | ||
2378 | goto out; | 2452 | goto out; |
2379 | } | 2453 | } |
2380 | 2454 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index c04692774e88..dfc8300ecbb2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -445,6 +445,8 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
445 | */ | 445 | */ |
446 | void migrate_page_copy(struct page *newpage, struct page *page) | 446 | void migrate_page_copy(struct page *newpage, struct page *page) |
447 | { | 447 | { |
448 | int cpupid; | ||
449 | |||
448 | if (PageHuge(page) || PageTransHuge(page)) | 450 | if (PageHuge(page) || PageTransHuge(page)) |
449 | copy_huge_page(newpage, page); | 451 | copy_huge_page(newpage, page); |
450 | else | 452 | else |
@@ -481,6 +483,13 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
481 | __set_page_dirty_nobuffers(newpage); | 483 | __set_page_dirty_nobuffers(newpage); |
482 | } | 484 | } |
483 | 485 | ||
486 | /* | ||
487 | * Copy NUMA information to the new page, to prevent over-eager | ||
488 | * future migrations of this same page. | ||
489 | */ | ||
490 | cpupid = page_cpupid_xchg_last(page, -1); | ||
491 | page_cpupid_xchg_last(newpage, cpupid); | ||
492 | |||
484 | mlock_migrate_page(newpage, page); | 493 | mlock_migrate_page(newpage, page); |
485 | ksm_migrate_page(newpage, page); | 494 | ksm_migrate_page(newpage, page); |
486 | /* | 495 | /* |
@@ -1500,7 +1509,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, | |||
1500 | __GFP_NOWARN) & | 1509 | __GFP_NOWARN) & |
1501 | ~GFP_IOFS, 0); | 1510 | ~GFP_IOFS, 0); |
1502 | if (newpage) | 1511 | if (newpage) |
1503 | page_nid_xchg_last(newpage, page_nid_last(page)); | 1512 | page_cpupid_xchg_last(newpage, page_cpupid_last(page)); |
1504 | 1513 | ||
1505 | return newpage; | 1514 | return newpage; |
1506 | } | 1515 | } |
@@ -1601,7 +1610,8 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) | |||
1601 | * node. Caller is expected to have an elevated reference count on | 1610 | * node. Caller is expected to have an elevated reference count on |
1602 | * the page that will be dropped by this function before returning. | 1611 | * the page that will be dropped by this function before returning. |
1603 | */ | 1612 | */ |
1604 | int migrate_misplaced_page(struct page *page, int node) | 1613 | int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, |
1614 | int node) | ||
1605 | { | 1615 | { |
1606 | pg_data_t *pgdat = NODE_DATA(node); | 1616 | pg_data_t *pgdat = NODE_DATA(node); |
1607 | int isolated; | 1617 | int isolated; |
@@ -1609,10 +1619,11 @@ int migrate_misplaced_page(struct page *page, int node) | |||
1609 | LIST_HEAD(migratepages); | 1619 | LIST_HEAD(migratepages); |
1610 | 1620 | ||
1611 | /* | 1621 | /* |
1612 | * Don't migrate pages that are mapped in multiple processes. | 1622 | * Don't migrate file pages that are mapped in multiple processes |
1613 | * TODO: Handle false sharing detection instead of this hammer | 1623 | * with execute permissions as they are probably shared libraries. |
1614 | */ | 1624 | */ |
1615 | if (page_mapcount(page) != 1) | 1625 | if (page_mapcount(page) != 1 && page_is_file_cache(page) && |
1626 | (vma->vm_flags & VM_EXEC)) | ||
1616 | goto out; | 1627 | goto out; |
1617 | 1628 | ||
1618 | /* | 1629 | /* |
@@ -1663,13 +1674,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1663 | int page_lru = page_is_file_cache(page); | 1674 | int page_lru = page_is_file_cache(page); |
1664 | 1675 | ||
1665 | /* | 1676 | /* |
1666 | * Don't migrate pages that are mapped in multiple processes. | ||
1667 | * TODO: Handle false sharing detection instead of this hammer | ||
1668 | */ | ||
1669 | if (page_mapcount(page) != 1) | ||
1670 | goto out_dropref; | ||
1671 | |||
1672 | /* | ||
1673 | * Rate-limit the amount of data that is being migrated to a node. | 1677 | * Rate-limit the amount of data that is being migrated to a node. |
1674 | * Optimal placement is no good if the memory bus is saturated and | 1678 | * Optimal placement is no good if the memory bus is saturated and |
1675 | * all the time is being spent migrating! | 1679 | * all the time is being spent migrating! |
@@ -1682,7 +1686,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, | |||
1682 | if (!new_page) | 1686 | if (!new_page) |
1683 | goto out_fail; | 1687 | goto out_fail; |
1684 | 1688 | ||
1685 | page_nid_xchg_last(new_page, page_nid_last(page)); | 1689 | page_cpupid_xchg_last(new_page, page_cpupid_last(page)); |
1686 | 1690 | ||
1687 | isolated = numamigrate_isolate_page(pgdat, page); | 1691 | isolated = numamigrate_isolate_page(pgdat, page); |
1688 | if (!isolated) { | 1692 | if (!isolated) { |
diff --git a/mm/mm_init.c b/mm/mm_init.c index 633c08863fd8..68562e92d50c 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c | |||
@@ -71,26 +71,26 @@ void __init mminit_verify_pageflags_layout(void) | |||
71 | unsigned long or_mask, add_mask; | 71 | unsigned long or_mask, add_mask; |
72 | 72 | ||
73 | shift = 8 * sizeof(unsigned long); | 73 | shift = 8 * sizeof(unsigned long); |
74 | width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_NID_SHIFT; | 74 | width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - LAST_CPUPID_SHIFT; |
75 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", | 75 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", |
76 | "Section %d Node %d Zone %d Lastnid %d Flags %d\n", | 76 | "Section %d Node %d Zone %d Lastcpupid %d Flags %d\n", |
77 | SECTIONS_WIDTH, | 77 | SECTIONS_WIDTH, |
78 | NODES_WIDTH, | 78 | NODES_WIDTH, |
79 | ZONES_WIDTH, | 79 | ZONES_WIDTH, |
80 | LAST_NID_WIDTH, | 80 | LAST_CPUPID_WIDTH, |
81 | NR_PAGEFLAGS); | 81 | NR_PAGEFLAGS); |
82 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", | 82 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", |
83 | "Section %d Node %d Zone %d Lastnid %d\n", | 83 | "Section %d Node %d Zone %d Lastcpupid %d\n", |
84 | SECTIONS_SHIFT, | 84 | SECTIONS_SHIFT, |
85 | NODES_SHIFT, | 85 | NODES_SHIFT, |
86 | ZONES_SHIFT, | 86 | ZONES_SHIFT, |
87 | LAST_NID_SHIFT); | 87 | LAST_CPUPID_SHIFT); |
88 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts", | 88 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts", |
89 | "Section %lu Node %lu Zone %lu Lastnid %lu\n", | 89 | "Section %lu Node %lu Zone %lu Lastcpupid %lu\n", |
90 | (unsigned long)SECTIONS_PGSHIFT, | 90 | (unsigned long)SECTIONS_PGSHIFT, |
91 | (unsigned long)NODES_PGSHIFT, | 91 | (unsigned long)NODES_PGSHIFT, |
92 | (unsigned long)ZONES_PGSHIFT, | 92 | (unsigned long)ZONES_PGSHIFT, |
93 | (unsigned long)LAST_NID_PGSHIFT); | 93 | (unsigned long)LAST_CPUPID_PGSHIFT); |
94 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid", | 94 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid", |
95 | "Node/Zone ID: %lu -> %lu\n", | 95 | "Node/Zone ID: %lu -> %lu\n", |
96 | (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT), | 96 | (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT), |
@@ -102,9 +102,9 @@ void __init mminit_verify_pageflags_layout(void) | |||
102 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", | 102 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", |
103 | "Node not in page flags"); | 103 | "Node not in page flags"); |
104 | #endif | 104 | #endif |
105 | #ifdef LAST_NID_NOT_IN_PAGE_FLAGS | 105 | #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS |
106 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", | 106 | mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodeflags", |
107 | "Last nid not in page flags"); | 107 | "Last cpupid not in page flags"); |
108 | #endif | 108 | #endif |
109 | 109 | ||
110 | if (SECTIONS_WIDTH) { | 110 | if (SECTIONS_WIDTH) { |
diff --git a/mm/mmzone.c b/mm/mmzone.c index 2ac0afbd68f3..bf34fb8556db 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c | |||
@@ -97,20 +97,20 @@ void lruvec_init(struct lruvec *lruvec) | |||
97 | INIT_LIST_HEAD(&lruvec->lists[lru]); | 97 | INIT_LIST_HEAD(&lruvec->lists[lru]); |
98 | } | 98 | } |
99 | 99 | ||
100 | #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_NID_NOT_IN_PAGE_FLAGS) | 100 | #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) |
101 | int page_nid_xchg_last(struct page *page, int nid) | 101 | int page_cpupid_xchg_last(struct page *page, int cpupid) |
102 | { | 102 | { |
103 | unsigned long old_flags, flags; | 103 | unsigned long old_flags, flags; |
104 | int last_nid; | 104 | int last_cpupid; |
105 | 105 | ||
106 | do { | 106 | do { |
107 | old_flags = flags = page->flags; | 107 | old_flags = flags = page->flags; |
108 | last_nid = page_nid_last(page); | 108 | last_cpupid = page_cpupid_last(page); |
109 | 109 | ||
110 | flags &= ~(LAST_NID_MASK << LAST_NID_PGSHIFT); | 110 | flags &= ~(LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT); |
111 | flags |= (nid & LAST_NID_MASK) << LAST_NID_PGSHIFT; | 111 | flags |= (cpupid & LAST_CPUPID_MASK) << LAST_CPUPID_PGSHIFT; |
112 | } while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags)); | 112 | } while (unlikely(cmpxchg(&page->flags, old_flags, flags) != old_flags)); |
113 | 113 | ||
114 | return last_nid; | 114 | return last_cpupid; |
115 | } | 115 | } |
116 | #endif | 116 | #endif |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 412ba2b7326a..a597f2ffcd6f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -37,14 +37,12 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
37 | 37 | ||
38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 38 | static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
39 | unsigned long addr, unsigned long end, pgprot_t newprot, | 39 | unsigned long addr, unsigned long end, pgprot_t newprot, |
40 | int dirty_accountable, int prot_numa, bool *ret_all_same_node) | 40 | int dirty_accountable, int prot_numa) |
41 | { | 41 | { |
42 | struct mm_struct *mm = vma->vm_mm; | 42 | struct mm_struct *mm = vma->vm_mm; |
43 | pte_t *pte, oldpte; | 43 | pte_t *pte, oldpte; |
44 | spinlock_t *ptl; | 44 | spinlock_t *ptl; |
45 | unsigned long pages = 0; | 45 | unsigned long pages = 0; |
46 | bool all_same_node = true; | ||
47 | int last_nid = -1; | ||
48 | 46 | ||
49 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 47 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
50 | arch_enter_lazy_mmu_mode(); | 48 | arch_enter_lazy_mmu_mode(); |
@@ -63,15 +61,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
63 | 61 | ||
64 | page = vm_normal_page(vma, addr, oldpte); | 62 | page = vm_normal_page(vma, addr, oldpte); |
65 | if (page) { | 63 | if (page) { |
66 | int this_nid = page_to_nid(page); | 64 | if (!pte_numa(oldpte)) { |
67 | if (last_nid == -1) | ||
68 | last_nid = this_nid; | ||
69 | if (last_nid != this_nid) | ||
70 | all_same_node = false; | ||
71 | |||
72 | /* only check non-shared pages */ | ||
73 | if (!pte_numa(oldpte) && | ||
74 | page_mapcount(page) == 1) { | ||
75 | ptent = pte_mknuma(ptent); | 65 | ptent = pte_mknuma(ptent); |
76 | updated = true; | 66 | updated = true; |
77 | } | 67 | } |
@@ -104,33 +94,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
104 | if (pte_swp_soft_dirty(oldpte)) | 94 | if (pte_swp_soft_dirty(oldpte)) |
105 | newpte = pte_swp_mksoft_dirty(newpte); | 95 | newpte = pte_swp_mksoft_dirty(newpte); |
106 | set_pte_at(mm, addr, pte, newpte); | 96 | set_pte_at(mm, addr, pte, newpte); |
97 | |||
98 | pages++; | ||
107 | } | 99 | } |
108 | pages++; | ||
109 | } | 100 | } |
110 | } while (pte++, addr += PAGE_SIZE, addr != end); | 101 | } while (pte++, addr += PAGE_SIZE, addr != end); |
111 | arch_leave_lazy_mmu_mode(); | 102 | arch_leave_lazy_mmu_mode(); |
112 | pte_unmap_unlock(pte - 1, ptl); | 103 | pte_unmap_unlock(pte - 1, ptl); |
113 | 104 | ||
114 | *ret_all_same_node = all_same_node; | ||
115 | return pages; | 105 | return pages; |
116 | } | 106 | } |
117 | 107 | ||
118 | #ifdef CONFIG_NUMA_BALANCING | ||
119 | static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, | ||
120 | pmd_t *pmd) | ||
121 | { | ||
122 | spin_lock(&mm->page_table_lock); | ||
123 | set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd)); | ||
124 | spin_unlock(&mm->page_table_lock); | ||
125 | } | ||
126 | #else | ||
127 | static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, | ||
128 | pmd_t *pmd) | ||
129 | { | ||
130 | BUG(); | ||
131 | } | ||
132 | #endif /* CONFIG_NUMA_BALANCING */ | ||
133 | |||
134 | static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | 108 | static inline unsigned long change_pmd_range(struct vm_area_struct *vma, |
135 | pud_t *pud, unsigned long addr, unsigned long end, | 109 | pud_t *pud, unsigned long addr, unsigned long end, |
136 | pgprot_t newprot, int dirty_accountable, int prot_numa) | 110 | pgprot_t newprot, int dirty_accountable, int prot_numa) |
@@ -138,34 +112,33 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
138 | pmd_t *pmd; | 112 | pmd_t *pmd; |
139 | unsigned long next; | 113 | unsigned long next; |
140 | unsigned long pages = 0; | 114 | unsigned long pages = 0; |
141 | bool all_same_node; | ||
142 | 115 | ||
143 | pmd = pmd_offset(pud, addr); | 116 | pmd = pmd_offset(pud, addr); |
144 | do { | 117 | do { |
118 | unsigned long this_pages; | ||
119 | |||
145 | next = pmd_addr_end(addr, end); | 120 | next = pmd_addr_end(addr, end); |
146 | if (pmd_trans_huge(*pmd)) { | 121 | if (pmd_trans_huge(*pmd)) { |
147 | if (next - addr != HPAGE_PMD_SIZE) | 122 | if (next - addr != HPAGE_PMD_SIZE) |
148 | split_huge_page_pmd(vma, addr, pmd); | 123 | split_huge_page_pmd(vma, addr, pmd); |
149 | else if (change_huge_pmd(vma, pmd, addr, newprot, | 124 | else { |
150 | prot_numa)) { | 125 | int nr_ptes = change_huge_pmd(vma, pmd, addr, |
151 | pages++; | 126 | newprot, prot_numa); |
152 | continue; | 127 | |
128 | if (nr_ptes) { | ||
129 | if (nr_ptes == HPAGE_PMD_NR) | ||
130 | pages++; | ||
131 | |||
132 | continue; | ||
133 | } | ||
153 | } | 134 | } |
154 | /* fall through */ | 135 | /* fall through */ |
155 | } | 136 | } |
156 | if (pmd_none_or_clear_bad(pmd)) | 137 | if (pmd_none_or_clear_bad(pmd)) |
157 | continue; | 138 | continue; |
158 | pages += change_pte_range(vma, pmd, addr, next, newprot, | 139 | this_pages = change_pte_range(vma, pmd, addr, next, newprot, |
159 | dirty_accountable, prot_numa, &all_same_node); | 140 | dirty_accountable, prot_numa); |
160 | 141 | pages += this_pages; | |
161 | /* | ||
162 | * If we are changing protections for NUMA hinting faults then | ||
163 | * set pmd_numa if the examined pages were all on the same | ||
164 | * node. This allows a regular PMD to be handled as one fault | ||
165 | * and effectively batches the taking of the PTL | ||
166 | */ | ||
167 | if (prot_numa && all_same_node) | ||
168 | change_pmd_protnuma(vma->vm_mm, addr, pmd); | ||
169 | } while (pmd++, addr = next, addr != end); | 142 | } while (pmd++, addr = next, addr != end); |
170 | 143 | ||
171 | return pages; | 144 | return pages; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dd886fac451a..73d812f16dde 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -626,7 +626,7 @@ static inline int free_pages_check(struct page *page) | |||
626 | bad_page(page); | 626 | bad_page(page); |
627 | return 1; | 627 | return 1; |
628 | } | 628 | } |
629 | page_nid_reset_last(page); | 629 | page_cpupid_reset_last(page); |
630 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) | 630 | if (page->flags & PAGE_FLAGS_CHECK_AT_PREP) |
631 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 631 | page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; |
632 | return 0; | 632 | return 0; |
@@ -4015,7 +4015,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
4015 | mminit_verify_page_links(page, zone, nid, pfn); | 4015 | mminit_verify_page_links(page, zone, nid, pfn); |
4016 | init_page_count(page); | 4016 | init_page_count(page); |
4017 | page_mapcount_reset(page); | 4017 | page_mapcount_reset(page); |
4018 | page_nid_reset_last(page); | 4018 | page_cpupid_reset_last(page); |
4019 | SetPageReserved(page); | 4019 | SetPageReserved(page); |
4020 | /* | 4020 | /* |
4021 | * Mark the block movable so that blocks are reserved for | 4021 | * Mark the block movable so that blocks are reserved for |