aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c110
1 files changed, 57 insertions, 53 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d03d76de7aff..826fdf326683 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -896,18 +896,24 @@ pid_t task_numa_group_id(struct task_struct *p)
896 return p->numa_group ? p->numa_group->gid : 0; 896 return p->numa_group ? p->numa_group->gid : 0;
897} 897}
898 898
899static inline int task_faults_idx(int nid, int priv) 899/*
900 * The averaged statistics, shared & private, memory & cpu,
901 * occupy the first half of the array. The second half of the
902 * array is for current counters, which are averaged into the
903 * first set by task_numa_placement.
904 */
905static inline int task_faults_idx(enum numa_faults_stats s, int nid, int priv)
900{ 906{
901 return NR_NUMA_HINT_FAULT_TYPES * nid + priv; 907 return NR_NUMA_HINT_FAULT_TYPES * (s * nr_node_ids + nid) + priv;
902} 908}
903 909
904static inline unsigned long task_faults(struct task_struct *p, int nid) 910static inline unsigned long task_faults(struct task_struct *p, int nid)
905{ 911{
906 if (!p->numa_faults_memory) 912 if (!p->numa_faults)
907 return 0; 913 return 0;
908 914
909 return p->numa_faults_memory[task_faults_idx(nid, 0)] + 915 return p->numa_faults[task_faults_idx(NUMA_MEM, nid, 0)] +
910 p->numa_faults_memory[task_faults_idx(nid, 1)]; 916 p->numa_faults[task_faults_idx(NUMA_MEM, nid, 1)];
911} 917}
912 918
913static inline unsigned long group_faults(struct task_struct *p, int nid) 919static inline unsigned long group_faults(struct task_struct *p, int nid)
@@ -915,14 +921,14 @@ static inline unsigned long group_faults(struct task_struct *p, int nid)
915 if (!p->numa_group) 921 if (!p->numa_group)
916 return 0; 922 return 0;
917 923
918 return p->numa_group->faults[task_faults_idx(nid, 0)] + 924 return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
919 p->numa_group->faults[task_faults_idx(nid, 1)]; 925 p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
920} 926}
921 927
922static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) 928static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
923{ 929{
924 return group->faults_cpu[task_faults_idx(nid, 0)] + 930 return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] +
925 group->faults_cpu[task_faults_idx(nid, 1)]; 931 group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
926} 932}
927 933
928/* Handle placement on systems where not all nodes are directly connected. */ 934/* Handle placement on systems where not all nodes are directly connected. */
@@ -1001,7 +1007,7 @@ static inline unsigned long task_weight(struct task_struct *p, int nid,
1001{ 1007{
1002 unsigned long faults, total_faults; 1008 unsigned long faults, total_faults;
1003 1009
1004 if (!p->numa_faults_memory) 1010 if (!p->numa_faults)
1005 return 0; 1011 return 0;
1006 1012
1007 total_faults = p->total_numa_faults; 1013 total_faults = p->total_numa_faults;
@@ -1517,7 +1523,7 @@ static void numa_migrate_preferred(struct task_struct *p)
1517 unsigned long interval = HZ; 1523 unsigned long interval = HZ;
1518 1524
1519 /* This task has no NUMA fault statistics yet */ 1525 /* This task has no NUMA fault statistics yet */
1520 if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory)) 1526 if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
1521 return; 1527 return;
1522 1528
1523 /* Periodically retry migrating the task to the preferred node */ 1529 /* Periodically retry migrating the task to the preferred node */
@@ -1779,18 +1785,23 @@ static void task_numa_placement(struct task_struct *p)
1779 1785
1780 /* Find the node with the highest number of faults */ 1786 /* Find the node with the highest number of faults */
1781 for_each_online_node(nid) { 1787 for_each_online_node(nid) {
1788 /* Keep track of the offsets in numa_faults array */
1789 int mem_idx, membuf_idx, cpu_idx, cpubuf_idx;
1782 unsigned long faults = 0, group_faults = 0; 1790 unsigned long faults = 0, group_faults = 0;
1783 int priv, i; 1791 int priv;
1784 1792
1785 for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) { 1793 for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) {
1786 long diff, f_diff, f_weight; 1794 long diff, f_diff, f_weight;
1787 1795
1788 i = task_faults_idx(nid, priv); 1796 mem_idx = task_faults_idx(NUMA_MEM, nid, priv);
1797 membuf_idx = task_faults_idx(NUMA_MEMBUF, nid, priv);
1798 cpu_idx = task_faults_idx(NUMA_CPU, nid, priv);
1799 cpubuf_idx = task_faults_idx(NUMA_CPUBUF, nid, priv);
1789 1800
1790 /* Decay existing window, copy faults since last scan */ 1801 /* Decay existing window, copy faults since last scan */
1791 diff = p->numa_faults_buffer_memory[i] - p->numa_faults_memory[i] / 2; 1802 diff = p->numa_faults[membuf_idx] - p->numa_faults[mem_idx] / 2;
1792 fault_types[priv] += p->numa_faults_buffer_memory[i]; 1803 fault_types[priv] += p->numa_faults[membuf_idx];
1793 p->numa_faults_buffer_memory[i] = 0; 1804 p->numa_faults[membuf_idx] = 0;
1794 1805
1795 /* 1806 /*
1796 * Normalize the faults_from, so all tasks in a group 1807 * Normalize the faults_from, so all tasks in a group
@@ -1800,21 +1811,27 @@ static void task_numa_placement(struct task_struct *p)
1800 * faults are less important. 1811 * faults are less important.
1801 */ 1812 */
1802 f_weight = div64_u64(runtime << 16, period + 1); 1813 f_weight = div64_u64(runtime << 16, period + 1);
1803 f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) / 1814 f_weight = (f_weight * p->numa_faults[cpubuf_idx]) /
1804 (total_faults + 1); 1815 (total_faults + 1);
1805 f_diff = f_weight - p->numa_faults_cpu[i] / 2; 1816 f_diff = f_weight - p->numa_faults[cpu_idx] / 2;
1806 p->numa_faults_buffer_cpu[i] = 0; 1817 p->numa_faults[cpubuf_idx] = 0;
1807 1818
1808 p->numa_faults_memory[i] += diff; 1819 p->numa_faults[mem_idx] += diff;
1809 p->numa_faults_cpu[i] += f_diff; 1820 p->numa_faults[cpu_idx] += f_diff;
1810 faults += p->numa_faults_memory[i]; 1821 faults += p->numa_faults[mem_idx];
1811 p->total_numa_faults += diff; 1822 p->total_numa_faults += diff;
1812 if (p->numa_group) { 1823 if (p->numa_group) {
1813 /* safe because we can only change our own group */ 1824 /*
1814 p->numa_group->faults[i] += diff; 1825 * safe because we can only change our own group
1815 p->numa_group->faults_cpu[i] += f_diff; 1826 *
1827 * mem_idx represents the offset for a given
1828 * nid and priv in a specific region because it
1829 * is at the beginning of the numa_faults array.
1830 */
1831 p->numa_group->faults[mem_idx] += diff;
1832 p->numa_group->faults_cpu[mem_idx] += f_diff;
1816 p->numa_group->total_faults += diff; 1833 p->numa_group->total_faults += diff;
1817 group_faults += p->numa_group->faults[i]; 1834 group_faults += p->numa_group->faults[mem_idx];
1818 } 1835 }
1819 } 1836 }
1820 1837
@@ -1886,7 +1903,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
1886 node_set(task_node(current), grp->active_nodes); 1903 node_set(task_node(current), grp->active_nodes);
1887 1904
1888 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) 1905 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
1889 grp->faults[i] = p->numa_faults_memory[i]; 1906 grp->faults[i] = p->numa_faults[i];
1890 1907
1891 grp->total_faults = p->total_numa_faults; 1908 grp->total_faults = p->total_numa_faults;
1892 1909
@@ -1945,8 +1962,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
1945 double_lock_irq(&my_grp->lock, &grp->lock); 1962 double_lock_irq(&my_grp->lock, &grp->lock);
1946 1963
1947 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { 1964 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
1948 my_grp->faults[i] -= p->numa_faults_memory[i]; 1965 my_grp->faults[i] -= p->numa_faults[i];
1949 grp->faults[i] += p->numa_faults_memory[i]; 1966 grp->faults[i] += p->numa_faults[i];
1950 } 1967 }
1951 my_grp->total_faults -= p->total_numa_faults; 1968 my_grp->total_faults -= p->total_numa_faults;
1952 grp->total_faults += p->total_numa_faults; 1969 grp->total_faults += p->total_numa_faults;
@@ -1971,14 +1988,14 @@ no_join:
1971void task_numa_free(struct task_struct *p) 1988void task_numa_free(struct task_struct *p)
1972{ 1989{
1973 struct numa_group *grp = p->numa_group; 1990 struct numa_group *grp = p->numa_group;
1974 void *numa_faults = p->numa_faults_memory; 1991 void *numa_faults = p->numa_faults;
1975 unsigned long flags; 1992 unsigned long flags;
1976 int i; 1993 int i;
1977 1994
1978 if (grp) { 1995 if (grp) {
1979 spin_lock_irqsave(&grp->lock, flags); 1996 spin_lock_irqsave(&grp->lock, flags);
1980 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) 1997 for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
1981 grp->faults[i] -= p->numa_faults_memory[i]; 1998 grp->faults[i] -= p->numa_faults[i];
1982 grp->total_faults -= p->total_numa_faults; 1999 grp->total_faults -= p->total_numa_faults;
1983 2000
1984 list_del(&p->numa_entry); 2001 list_del(&p->numa_entry);
@@ -1988,10 +2005,7 @@ void task_numa_free(struct task_struct *p)
1988 put_numa_group(grp); 2005 put_numa_group(grp);
1989 } 2006 }
1990 2007
1991 p->numa_faults_memory = NULL; 2008 p->numa_faults = NULL;
1992 p->numa_faults_buffer_memory = NULL;
1993 p->numa_faults_cpu= NULL;
1994 p->numa_faults_buffer_cpu = NULL;
1995 kfree(numa_faults); 2009 kfree(numa_faults);
1996} 2010}
1997 2011
@@ -2014,24 +2028,14 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
2014 return; 2028 return;
2015 2029
2016 /* Allocate buffer to track faults on a per-node basis */ 2030 /* Allocate buffer to track faults on a per-node basis */
2017 if (unlikely(!p->numa_faults_memory)) { 2031 if (unlikely(!p->numa_faults)) {
2018 int size = sizeof(*p->numa_faults_memory) * 2032 int size = sizeof(*p->numa_faults) *
2019 NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids; 2033 NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids;
2020 2034
2021 p->numa_faults_memory = kzalloc(size, GFP_KERNEL|__GFP_NOWARN); 2035 p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
2022 if (!p->numa_faults_memory) 2036 if (!p->numa_faults)
2023 return; 2037 return;
2024 2038
2025 BUG_ON(p->numa_faults_buffer_memory);
2026 /*
2027 * The averaged statistics, shared & private, memory & cpu,
2028 * occupy the first half of the array. The second half of the
2029 * array is for current counters, which are averaged into the
2030 * first set by task_numa_placement.
2031 */
2032 p->numa_faults_cpu = p->numa_faults_memory + (2 * nr_node_ids);
2033 p->numa_faults_buffer_memory = p->numa_faults_memory + (4 * nr_node_ids);
2034 p->numa_faults_buffer_cpu = p->numa_faults_memory + (6 * nr_node_ids);
2035 p->total_numa_faults = 0; 2039 p->total_numa_faults = 0;
2036 memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); 2040 memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality));
2037 } 2041 }
@@ -2071,8 +2075,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
2071 if (migrated) 2075 if (migrated)
2072 p->numa_pages_migrated += pages; 2076 p->numa_pages_migrated += pages;
2073 2077
2074 p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; 2078 p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
2075 p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; 2079 p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
2076 p->numa_faults_locality[local] += pages; 2080 p->numa_faults_locality[local] += pages;
2077} 2081}
2078 2082
@@ -5361,7 +5365,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
5361 struct numa_group *numa_group = rcu_dereference(p->numa_group); 5365 struct numa_group *numa_group = rcu_dereference(p->numa_group);
5362 int src_nid, dst_nid; 5366 int src_nid, dst_nid;
5363 5367
5364 if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory || 5368 if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults ||
5365 !(env->sd->flags & SD_NUMA)) { 5369 !(env->sd->flags & SD_NUMA)) {
5366 return false; 5370 return false;
5367 } 5371 }
@@ -5400,7 +5404,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
5400 if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) 5404 if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER))
5401 return false; 5405 return false;
5402 5406
5403 if (!p->numa_faults_memory || !(env->sd->flags & SD_NUMA)) 5407 if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
5404 return false; 5408 return false;
5405 5409
5406 src_nid = cpu_to_node(env->src_cpu); 5410 src_nid = cpu_to_node(env->src_cpu);