diff options
author | Mel Gorman <mgorman@suse.de> | 2013-10-07 06:29:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-10-09 06:40:30 -0400 |
commit | ac8e895bd260cb8bb19ade6a3abd44e7abe9a01d (patch) | |
tree | fe0d50baf0dad412fd7d5ba0286ce95e08a363ac /kernel | |
parent | e6628d5b0a2979f3e0ee6f7783ede5df50cb9ede (diff) |
sched/numa: Add infrastructure for split shared/private accounting of NUMA hinting faults
Ideally it would be possible to distinguish between NUMA hinting faults
that are private to a task and those that are shared. This patch prepares
infrastructure for separately accounting shared and private faults by
allocating the necessary buffers and passing in relevant information. For
now, all faults are treated as private and detection will be introduced
later.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-26-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/fair.c | 46 |
1 files changed, 35 insertions, 11 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8b15e9e1d1b8..89eeb89fd99a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -886,6 +886,20 @@ static unsigned int task_scan_max(struct task_struct *p) | |||
886 | */ | 886 | */ |
887 | unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3; | 887 | unsigned int sysctl_numa_balancing_settle_count __read_mostly = 3; |
888 | 888 | ||
889 | static inline int task_faults_idx(int nid, int priv) | ||
890 | { | ||
891 | return 2 * nid + priv; | ||
892 | } | ||
893 | |||
894 | static inline unsigned long task_faults(struct task_struct *p, int nid) | ||
895 | { | ||
896 | if (!p->numa_faults) | ||
897 | return 0; | ||
898 | |||
899 | return p->numa_faults[task_faults_idx(nid, 0)] + | ||
900 | p->numa_faults[task_faults_idx(nid, 1)]; | ||
901 | } | ||
902 | |||
889 | static unsigned long weighted_cpuload(const int cpu); | 903 | static unsigned long weighted_cpuload(const int cpu); |
890 | 904 | ||
891 | 905 | ||
@@ -928,13 +942,19 @@ static void task_numa_placement(struct task_struct *p) | |||
928 | /* Find the node with the highest number of faults */ | 942 | /* Find the node with the highest number of faults */ |
929 | for_each_online_node(nid) { | 943 | for_each_online_node(nid) { |
930 | unsigned long faults; | 944 | unsigned long faults; |
945 | int priv, i; | ||
931 | 946 | ||
932 | /* Decay existing window and copy faults since last scan */ | 947 | for (priv = 0; priv < 2; priv++) { |
933 | p->numa_faults[nid] >>= 1; | 948 | i = task_faults_idx(nid, priv); |
934 | p->numa_faults[nid] += p->numa_faults_buffer[nid]; | ||
935 | p->numa_faults_buffer[nid] = 0; | ||
936 | 949 | ||
937 | faults = p->numa_faults[nid]; | 950 | /* Decay existing window, copy faults since last scan */ |
951 | p->numa_faults[i] >>= 1; | ||
952 | p->numa_faults[i] += p->numa_faults_buffer[i]; | ||
953 | p->numa_faults_buffer[i] = 0; | ||
954 | } | ||
955 | |||
956 | /* Find maximum private faults */ | ||
957 | faults = p->numa_faults[task_faults_idx(nid, 1)]; | ||
938 | if (faults > max_faults) { | 958 | if (faults > max_faults) { |
939 | max_faults = faults; | 959 | max_faults = faults; |
940 | max_nid = nid; | 960 | max_nid = nid; |
@@ -970,16 +990,20 @@ static void task_numa_placement(struct task_struct *p) | |||
970 | /* | 990 | /* |
971 | * Got a PROT_NONE fault for a page on @node. | 991 | * Got a PROT_NONE fault for a page on @node. |
972 | */ | 992 | */ |
973 | void task_numa_fault(int node, int pages, bool migrated) | 993 | void task_numa_fault(int last_nid, int node, int pages, bool migrated) |
974 | { | 994 | { |
975 | struct task_struct *p = current; | 995 | struct task_struct *p = current; |
996 | int priv; | ||
976 | 997 | ||
977 | if (!numabalancing_enabled) | 998 | if (!numabalancing_enabled) |
978 | return; | 999 | return; |
979 | 1000 | ||
1001 | /* For now, do not attempt to detect private/shared accesses */ | ||
1002 | priv = 1; | ||
1003 | |||
980 | /* Allocate buffer to track faults on a per-node basis */ | 1004 | /* Allocate buffer to track faults on a per-node basis */ |
981 | if (unlikely(!p->numa_faults)) { | 1005 | if (unlikely(!p->numa_faults)) { |
982 | int size = sizeof(*p->numa_faults) * nr_node_ids; | 1006 | int size = sizeof(*p->numa_faults) * 2 * nr_node_ids; |
983 | 1007 | ||
984 | /* numa_faults and numa_faults_buffer share the allocation */ | 1008 | /* numa_faults and numa_faults_buffer share the allocation */ |
985 | p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); | 1009 | p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); |
@@ -987,7 +1011,7 @@ void task_numa_fault(int node, int pages, bool migrated) | |||
987 | return; | 1011 | return; |
988 | 1012 | ||
989 | BUG_ON(p->numa_faults_buffer); | 1013 | BUG_ON(p->numa_faults_buffer); |
990 | p->numa_faults_buffer = p->numa_faults + nr_node_ids; | 1014 | p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids); |
991 | } | 1015 | } |
992 | 1016 | ||
993 | /* | 1017 | /* |
@@ -1005,7 +1029,7 @@ void task_numa_fault(int node, int pages, bool migrated) | |||
1005 | 1029 | ||
1006 | task_numa_placement(p); | 1030 | task_numa_placement(p); |
1007 | 1031 | ||
1008 | p->numa_faults_buffer[node] += pages; | 1032 | p->numa_faults_buffer[task_faults_idx(node, priv)] += pages; |
1009 | } | 1033 | } |
1010 | 1034 | ||
1011 | static void reset_ptenuma_scan(struct task_struct *p) | 1035 | static void reset_ptenuma_scan(struct task_struct *p) |
@@ -4146,7 +4170,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | |||
4146 | return false; | 4170 | return false; |
4147 | 4171 | ||
4148 | if (dst_nid == p->numa_preferred_nid || | 4172 | if (dst_nid == p->numa_preferred_nid || |
4149 | p->numa_faults[dst_nid] > p->numa_faults[src_nid]) | 4173 | task_faults(p, dst_nid) > task_faults(p, src_nid)) |
4150 | return true; | 4174 | return true; |
4151 | 4175 | ||
4152 | return false; | 4176 | return false; |
@@ -4170,7 +4194,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | |||
4170 | p->numa_migrate_seq >= sysctl_numa_balancing_settle_count) | 4194 | p->numa_migrate_seq >= sysctl_numa_balancing_settle_count) |
4171 | return false; | 4195 | return false; |
4172 | 4196 | ||
4173 | if (p->numa_faults[dst_nid] < p->numa_faults[src_nid]) | 4197 | if (task_faults(p, dst_nid) < task_faults(p, src_nid)) |
4174 | return true; | 4198 | return true; |
4175 | 4199 | ||
4176 | return false; | 4200 | return false; |