diff options
-rw-r--r-- | arch/sh/mm/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | include/linux/mm_types.h | 11 | ||||
-rw-r--r-- | include/linux/sched.h | 20 | ||||
-rw-r--r-- | kernel/sched/core.c | 13 | ||||
-rw-r--r-- | kernel/sched/fair.c | 125 | ||||
-rw-r--r-- | kernel/sched/features.h | 7 | ||||
-rw-r--r-- | kernel/sched/sched.h | 6 | ||||
-rw-r--r-- | kernel/sysctl.c | 24 | ||||
-rw-r--r-- | mm/huge_memory.c | 5 | ||||
-rw-r--r-- | mm/memory.c | 14 |
11 files changed, 224 insertions, 4 deletions
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index cb8f9920f4dd..0f7c852f355c 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig | |||
@@ -111,6 +111,7 @@ config VSYSCALL | |||
111 | config NUMA | 111 | config NUMA |
112 | bool "Non Uniform Memory Access (NUMA) Support" | 112 | bool "Non Uniform Memory Access (NUMA) Support" |
113 | depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL | 113 | depends on MMU && SYS_SUPPORTS_NUMA && EXPERIMENTAL |
114 | select ARCH_WANT_NUMA_VARIABLE_LOCALITY | ||
114 | default n | 115 | default n |
115 | help | 116 | help |
116 | Some SH systems have many various memories scattered around | 117 | Some SH systems have many various memories scattered around |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff3ced2..1137028fc6d9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -22,6 +22,8 @@ config X86 | |||
22 | def_bool y | 22 | def_bool y |
23 | select HAVE_AOUT if X86_32 | 23 | select HAVE_AOUT if X86_32 |
24 | select HAVE_UNSTABLE_SCHED_CLOCK | 24 | select HAVE_UNSTABLE_SCHED_CLOCK |
25 | select ARCH_SUPPORTS_NUMA_BALANCING | ||
26 | select ARCH_WANTS_PROT_NUMA_PROT_NONE | ||
25 | select HAVE_IDE | 27 | select HAVE_IDE |
26 | select HAVE_OPROFILE | 28 | select HAVE_OPROFILE |
27 | select HAVE_PCSPKR_PLATFORM | 29 | select HAVE_PCSPKR_PLATFORM |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 31f8a3af7d94..ed8638c29b3e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -398,6 +398,17 @@ struct mm_struct { | |||
398 | #ifdef CONFIG_CPUMASK_OFFSTACK | 398 | #ifdef CONFIG_CPUMASK_OFFSTACK |
399 | struct cpumask cpumask_allocation; | 399 | struct cpumask cpumask_allocation; |
400 | #endif | 400 | #endif |
401 | #ifdef CONFIG_NUMA_BALANCING | ||
402 | /* | ||
403 | * numa_next_scan is the next time when the PTEs will me marked | ||
404 | * pte_numa to gather statistics and migrate pages to new nodes | ||
405 | * if necessary | ||
406 | */ | ||
407 | unsigned long numa_next_scan; | ||
408 | |||
409 | /* numa_scan_seq prevents two threads setting pte_numa */ | ||
410 | int numa_scan_seq; | ||
411 | #endif | ||
401 | struct uprobes_state uprobes_state; | 412 | struct uprobes_state uprobes_state; |
402 | }; | 413 | }; |
403 | 414 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 0dd42a02df2e..844af5b12cb2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1479,6 +1479,14 @@ struct task_struct { | |||
1479 | short il_next; | 1479 | short il_next; |
1480 | short pref_node_fork; | 1480 | short pref_node_fork; |
1481 | #endif | 1481 | #endif |
1482 | #ifdef CONFIG_NUMA_BALANCING | ||
1483 | int numa_scan_seq; | ||
1484 | int numa_migrate_seq; | ||
1485 | unsigned int numa_scan_period; | ||
1486 | u64 node_stamp; /* migration stamp */ | ||
1487 | struct callback_head numa_work; | ||
1488 | #endif /* CONFIG_NUMA_BALANCING */ | ||
1489 | |||
1482 | struct rcu_head rcu; | 1490 | struct rcu_head rcu; |
1483 | 1491 | ||
1484 | /* | 1492 | /* |
@@ -1553,6 +1561,14 @@ struct task_struct { | |||
1553 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ | 1561 | /* Future-safe accessor for struct task_struct's cpus_allowed. */ |
1554 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | 1562 | #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) |
1555 | 1563 | ||
1564 | #ifdef CONFIG_NUMA_BALANCING | ||
1565 | extern void task_numa_fault(int node, int pages); | ||
1566 | #else | ||
1567 | static inline void task_numa_fault(int node, int pages) | ||
1568 | { | ||
1569 | } | ||
1570 | #endif | ||
1571 | |||
1556 | /* | 1572 | /* |
1557 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT | 1573 | * Priority of a process goes from 0..MAX_PRIO-1, valid RT |
1558 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH | 1574 | * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH |
@@ -1990,6 +2006,10 @@ enum sched_tunable_scaling { | |||
1990 | }; | 2006 | }; |
1991 | extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; | 2007 | extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; |
1992 | 2008 | ||
2009 | extern unsigned int sysctl_numa_balancing_scan_period_min; | ||
2010 | extern unsigned int sysctl_numa_balancing_scan_period_max; | ||
2011 | extern unsigned int sysctl_numa_balancing_settle_count; | ||
2012 | |||
1993 | #ifdef CONFIG_SCHED_DEBUG | 2013 | #ifdef CONFIG_SCHED_DEBUG |
1994 | extern unsigned int sysctl_sched_migration_cost; | 2014 | extern unsigned int sysctl_sched_migration_cost; |
1995 | extern unsigned int sysctl_sched_nr_migrate; | 2015 | extern unsigned int sysctl_sched_nr_migrate; |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d8927fda712..cad0d092ce3b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1533,6 +1533,19 @@ static void __sched_fork(struct task_struct *p) | |||
1533 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 1533 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
1534 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 1534 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
1535 | #endif | 1535 | #endif |
1536 | |||
1537 | #ifdef CONFIG_NUMA_BALANCING | ||
1538 | if (p->mm && atomic_read(&p->mm->mm_users) == 1) { | ||
1539 | p->mm->numa_next_scan = jiffies; | ||
1540 | p->mm->numa_scan_seq = 0; | ||
1541 | } | ||
1542 | |||
1543 | p->node_stamp = 0ULL; | ||
1544 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; | ||
1545 | p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0; | ||
1546 | p->numa_scan_period = sysctl_numa_balancing_scan_period_min; | ||
1547 | p->numa_work.next = &p->numa_work; | ||
1548 | #endif /* CONFIG_NUMA_BALANCING */ | ||
1536 | } | 1549 | } |
1537 | 1550 | ||
1538 | /* | 1551 | /* |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b800a14b990..6831abb5dbef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/profile.h> | 27 | #include <linux/profile.h> |
28 | #include <linux/interrupt.h> | 28 | #include <linux/interrupt.h> |
29 | #include <linux/mempolicy.h> | ||
30 | #include <linux/task_work.h> | ||
29 | 31 | ||
30 | #include <trace/events/sched.h> | 32 | #include <trace/events/sched.h> |
31 | 33 | ||
@@ -776,6 +778,126 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
776 | * Scheduling class queueing methods: | 778 | * Scheduling class queueing methods: |
777 | */ | 779 | */ |
778 | 780 | ||
781 | #ifdef CONFIG_NUMA_BALANCING | ||
782 | /* | ||
783 | * numa task sample period in ms: 5s | ||
784 | */ | ||
785 | unsigned int sysctl_numa_balancing_scan_period_min = 5000; | ||
786 | unsigned int sysctl_numa_balancing_scan_period_max = 5000*16; | ||
787 | |||
788 | static void task_numa_placement(struct task_struct *p) | ||
789 | { | ||
790 | int seq = ACCESS_ONCE(p->mm->numa_scan_seq); | ||
791 | |||
792 | if (p->numa_scan_seq == seq) | ||
793 | return; | ||
794 | p->numa_scan_seq = seq; | ||
795 | |||
796 | /* FIXME: Scheduling placement policy hints go here */ | ||
797 | } | ||
798 | |||
799 | /* | ||
800 | * Got a PROT_NONE fault for a page on @node. | ||
801 | */ | ||
802 | void task_numa_fault(int node, int pages) | ||
803 | { | ||
804 | struct task_struct *p = current; | ||
805 | |||
806 | /* FIXME: Allocate task-specific structure for placement policy here */ | ||
807 | |||
808 | task_numa_placement(p); | ||
809 | } | ||
810 | |||
811 | /* | ||
812 | * The expensive part of numa migration is done from task_work context. | ||
813 | * Triggered from task_tick_numa(). | ||
814 | */ | ||
815 | void task_numa_work(struct callback_head *work) | ||
816 | { | ||
817 | unsigned long migrate, next_scan, now = jiffies; | ||
818 | struct task_struct *p = current; | ||
819 | struct mm_struct *mm = p->mm; | ||
820 | |||
821 | WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work)); | ||
822 | |||
823 | work->next = work; /* protect against double add */ | ||
824 | /* | ||
825 | * Who cares about NUMA placement when they're dying. | ||
826 | * | ||
827 | * NOTE: make sure not to dereference p->mm before this check, | ||
828 | * exit_task_work() happens _after_ exit_mm() so we could be called | ||
829 | * without p->mm even though we still had it when we enqueued this | ||
830 | * work. | ||
831 | */ | ||
832 | if (p->flags & PF_EXITING) | ||
833 | return; | ||
834 | |||
835 | /* | ||
836 | * Enforce maximal scan/migration frequency.. | ||
837 | */ | ||
838 | migrate = mm->numa_next_scan; | ||
839 | if (time_before(now, migrate)) | ||
840 | return; | ||
841 | |||
842 | if (p->numa_scan_period == 0) | ||
843 | p->numa_scan_period = sysctl_numa_balancing_scan_period_min; | ||
844 | |||
845 | next_scan = now + 2*msecs_to_jiffies(p->numa_scan_period); | ||
846 | if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate) | ||
847 | return; | ||
848 | |||
849 | ACCESS_ONCE(mm->numa_scan_seq)++; | ||
850 | { | ||
851 | struct vm_area_struct *vma; | ||
852 | |||
853 | down_read(&mm->mmap_sem); | ||
854 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | ||
855 | if (!vma_migratable(vma)) | ||
856 | continue; | ||
857 | change_prot_numa(vma, vma->vm_start, vma->vm_end); | ||
858 | } | ||
859 | up_read(&mm->mmap_sem); | ||
860 | } | ||
861 | } | ||
862 | |||
863 | /* | ||
864 | * Drive the periodic memory faults.. | ||
865 | */ | ||
866 | void task_tick_numa(struct rq *rq, struct task_struct *curr) | ||
867 | { | ||
868 | struct callback_head *work = &curr->numa_work; | ||
869 | u64 period, now; | ||
870 | |||
871 | /* | ||
872 | * We don't care about NUMA placement if we don't have memory. | ||
873 | */ | ||
874 | if (!curr->mm || (curr->flags & PF_EXITING) || work->next != work) | ||
875 | return; | ||
876 | |||
877 | /* | ||
878 | * Using runtime rather than walltime has the dual advantage that | ||
879 | * we (mostly) drive the selection from busy threads and that the | ||
880 | * task needs to have done some actual work before we bother with | ||
881 | * NUMA placement. | ||
882 | */ | ||
883 | now = curr->se.sum_exec_runtime; | ||
884 | period = (u64)curr->numa_scan_period * NSEC_PER_MSEC; | ||
885 | |||
886 | if (now - curr->node_stamp > period) { | ||
887 | curr->node_stamp = now; | ||
888 | |||
889 | if (!time_before(jiffies, curr->mm->numa_next_scan)) { | ||
890 | init_task_work(work, task_numa_work); /* TODO: move this into sched_fork() */ | ||
891 | task_work_add(curr, work, true); | ||
892 | } | ||
893 | } | ||
894 | } | ||
895 | #else | ||
896 | static void task_tick_numa(struct rq *rq, struct task_struct *curr) | ||
897 | { | ||
898 | } | ||
899 | #endif /* CONFIG_NUMA_BALANCING */ | ||
900 | |||
779 | static void | 901 | static void |
780 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 902 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
781 | { | 903 | { |
@@ -4954,6 +5076,9 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
4954 | cfs_rq = cfs_rq_of(se); | 5076 | cfs_rq = cfs_rq_of(se); |
4955 | entity_tick(cfs_rq, se, queued); | 5077 | entity_tick(cfs_rq, se, queued); |
4956 | } | 5078 | } |
5079 | |||
5080 | if (sched_feat_numa(NUMA)) | ||
5081 | task_tick_numa(rq, curr); | ||
4957 | } | 5082 | } |
4958 | 5083 | ||
4959 | /* | 5084 | /* |
diff --git a/kernel/sched/features.h b/kernel/sched/features.h index eebefcad7027..5fb7aefbec80 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h | |||
@@ -61,3 +61,10 @@ SCHED_FEAT(TTWU_QUEUE, true) | |||
61 | SCHED_FEAT(FORCE_SD_OVERLAP, false) | 61 | SCHED_FEAT(FORCE_SD_OVERLAP, false) |
62 | SCHED_FEAT(RT_RUNTIME_SHARE, true) | 62 | SCHED_FEAT(RT_RUNTIME_SHARE, true) |
63 | SCHED_FEAT(LB_MIN, false) | 63 | SCHED_FEAT(LB_MIN, false) |
64 | |||
65 | /* | ||
66 | * Apply the automatic NUMA scheduling policy | ||
67 | */ | ||
68 | #ifdef CONFIG_NUMA_BALANCING | ||
69 | SCHED_FEAT(NUMA, true) | ||
70 | #endif | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7a7db09cfabc..ae31c051ff2f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -648,6 +648,12 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; | |||
648 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) | 648 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) |
649 | #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */ | 649 | #endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */ |
650 | 650 | ||
651 | #ifdef CONFIG_NUMA_BALANCING | ||
652 | #define sched_feat_numa(x) sched_feat(x) | ||
653 | #else | ||
654 | #define sched_feat_numa(x) (0) | ||
655 | #endif | ||
656 | |||
651 | static inline u64 global_rt_period(void) | 657 | static inline u64 global_rt_period(void) |
652 | { | 658 | { |
653 | return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; | 659 | return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 26f65eaa01f9..025e1ae50ef1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -256,9 +256,11 @@ static int min_sched_granularity_ns = 100000; /* 100 usecs */ | |||
256 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 256 | static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
257 | static int min_wakeup_granularity_ns; /* 0 usecs */ | 257 | static int min_wakeup_granularity_ns; /* 0 usecs */ |
258 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ | 258 | static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */ |
259 | #ifdef CONFIG_SMP | ||
259 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; | 260 | static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; |
260 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; | 261 | static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1; |
261 | #endif | 262 | #endif /* CONFIG_SMP */ |
263 | #endif /* CONFIG_SCHED_DEBUG */ | ||
262 | 264 | ||
263 | #ifdef CONFIG_COMPACTION | 265 | #ifdef CONFIG_COMPACTION |
264 | static int min_extfrag_threshold; | 266 | static int min_extfrag_threshold; |
@@ -301,6 +303,7 @@ static struct ctl_table kern_table[] = { | |||
301 | .extra1 = &min_wakeup_granularity_ns, | 303 | .extra1 = &min_wakeup_granularity_ns, |
302 | .extra2 = &max_wakeup_granularity_ns, | 304 | .extra2 = &max_wakeup_granularity_ns, |
303 | }, | 305 | }, |
306 | #ifdef CONFIG_SMP | ||
304 | { | 307 | { |
305 | .procname = "sched_tunable_scaling", | 308 | .procname = "sched_tunable_scaling", |
306 | .data = &sysctl_sched_tunable_scaling, | 309 | .data = &sysctl_sched_tunable_scaling, |
@@ -347,7 +350,24 @@ static struct ctl_table kern_table[] = { | |||
347 | .extra1 = &zero, | 350 | .extra1 = &zero, |
348 | .extra2 = &one, | 351 | .extra2 = &one, |
349 | }, | 352 | }, |
350 | #endif | 353 | #endif /* CONFIG_SMP */ |
354 | #ifdef CONFIG_NUMA_BALANCING | ||
355 | { | ||
356 | .procname = "numa_balancing_scan_period_min_ms", | ||
357 | .data = &sysctl_numa_balancing_scan_period_min, | ||
358 | .maxlen = sizeof(unsigned int), | ||
359 | .mode = 0644, | ||
360 | .proc_handler = proc_dointvec, | ||
361 | }, | ||
362 | { | ||
363 | .procname = "numa_balancing_scan_period_max_ms", | ||
364 | .data = &sysctl_numa_balancing_scan_period_max, | ||
365 | .maxlen = sizeof(unsigned int), | ||
366 | .mode = 0644, | ||
367 | .proc_handler = proc_dointvec, | ||
368 | }, | ||
369 | #endif /* CONFIG_NUMA_BALANCING */ | ||
370 | #endif /* CONFIG_SCHED_DEBUG */ | ||
351 | { | 371 | { |
352 | .procname = "sched_rt_period_us", | 372 | .procname = "sched_rt_period_us", |
353 | .data = &sysctl_sched_rt_period, | 373 | .data = &sysctl_sched_rt_period, |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d79f7a55bf6f..ee8133794a56 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1046,6 +1046,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1046 | */ | 1046 | */ |
1047 | split_huge_page(page); | 1047 | split_huge_page(page); |
1048 | put_page(page); | 1048 | put_page(page); |
1049 | |||
1049 | return 0; | 1050 | return 0; |
1050 | 1051 | ||
1051 | clear_pmdnuma: | 1052 | clear_pmdnuma: |
@@ -1060,8 +1061,10 @@ clear_pmdnuma: | |||
1060 | 1061 | ||
1061 | out_unlock: | 1062 | out_unlock: |
1062 | spin_unlock(&mm->page_table_lock); | 1063 | spin_unlock(&mm->page_table_lock); |
1063 | if (page) | 1064 | if (page) { |
1064 | put_page(page); | 1065 | put_page(page); |
1066 | task_numa_fault(numa_node_id(), HPAGE_PMD_NR); | ||
1067 | } | ||
1065 | return 0; | 1068 | return 0; |
1066 | } | 1069 | } |
1067 | 1070 | ||
diff --git a/mm/memory.c b/mm/memory.c index d52542680e10..8012c1907895 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3454,7 +3454,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3454 | { | 3454 | { |
3455 | struct page *page = NULL; | 3455 | struct page *page = NULL; |
3456 | spinlock_t *ptl; | 3456 | spinlock_t *ptl; |
3457 | int current_nid, target_nid; | 3457 | int current_nid = -1; |
3458 | int target_nid; | ||
3458 | 3459 | ||
3459 | /* | 3460 | /* |
3460 | * The "pte" at this point cannot be used safely without | 3461 | * The "pte" at this point cannot be used safely without |
@@ -3501,6 +3502,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3501 | current_nid = target_nid; | 3502 | current_nid = target_nid; |
3502 | 3503 | ||
3503 | out: | 3504 | out: |
3505 | task_numa_fault(current_nid, 1); | ||
3504 | return 0; | 3506 | return 0; |
3505 | } | 3507 | } |
3506 | 3508 | ||
@@ -3537,6 +3539,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3537 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { | 3539 | for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { |
3538 | pte_t pteval = *pte; | 3540 | pte_t pteval = *pte; |
3539 | struct page *page; | 3541 | struct page *page; |
3542 | int curr_nid; | ||
3540 | if (!pte_present(pteval)) | 3543 | if (!pte_present(pteval)) |
3541 | continue; | 3544 | continue; |
3542 | if (!pte_numa(pteval)) | 3545 | if (!pte_numa(pteval)) |
@@ -3554,6 +3557,15 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3554 | page = vm_normal_page(vma, addr, pteval); | 3557 | page = vm_normal_page(vma, addr, pteval); |
3555 | if (unlikely(!page)) | 3558 | if (unlikely(!page)) |
3556 | continue; | 3559 | continue; |
3560 | /* only check non-shared pages */ | ||
3561 | if (unlikely(page_mapcount(page) != 1)) | ||
3562 | continue; | ||
3563 | pte_unmap_unlock(pte, ptl); | ||
3564 | |||
3565 | curr_nid = page_to_nid(page); | ||
3566 | task_numa_fault(curr_nid, 1); | ||
3567 | |||
3568 | pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); | ||
3557 | } | 3569 | } |
3558 | pte_unmap_unlock(orig_pte, ptl); | 3570 | pte_unmap_unlock(orig_pte, ptl); |
3559 | 3571 | ||