diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 110 |
1 files changed, 57 insertions, 53 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d03d76de7aff..826fdf326683 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -896,18 +896,24 @@ pid_t task_numa_group_id(struct task_struct *p) | |||
896 | return p->numa_group ? p->numa_group->gid : 0; | 896 | return p->numa_group ? p->numa_group->gid : 0; |
897 | } | 897 | } |
898 | 898 | ||
899 | static inline int task_faults_idx(int nid, int priv) | 899 | /* |
900 | * The averaged statistics, shared & private, memory & cpu, | ||
901 | * occupy the first half of the array. The second half of the | ||
902 | * array is for current counters, which are averaged into the | ||
903 | * first set by task_numa_placement. | ||
904 | */ | ||
905 | static inline int task_faults_idx(enum numa_faults_stats s, int nid, int priv) | ||
900 | { | 906 | { |
901 | return NR_NUMA_HINT_FAULT_TYPES * nid + priv; | 907 | return NR_NUMA_HINT_FAULT_TYPES * (s * nr_node_ids + nid) + priv; |
902 | } | 908 | } |
903 | 909 | ||
904 | static inline unsigned long task_faults(struct task_struct *p, int nid) | 910 | static inline unsigned long task_faults(struct task_struct *p, int nid) |
905 | { | 911 | { |
906 | if (!p->numa_faults_memory) | 912 | if (!p->numa_faults) |
907 | return 0; | 913 | return 0; |
908 | 914 | ||
909 | return p->numa_faults_memory[task_faults_idx(nid, 0)] + | 915 | return p->numa_faults[task_faults_idx(NUMA_MEM, nid, 0)] + |
910 | p->numa_faults_memory[task_faults_idx(nid, 1)]; | 916 | p->numa_faults[task_faults_idx(NUMA_MEM, nid, 1)]; |
911 | } | 917 | } |
912 | 918 | ||
913 | static inline unsigned long group_faults(struct task_struct *p, int nid) | 919 | static inline unsigned long group_faults(struct task_struct *p, int nid) |
@@ -915,14 +921,14 @@ static inline unsigned long group_faults(struct task_struct *p, int nid) | |||
915 | if (!p->numa_group) | 921 | if (!p->numa_group) |
916 | return 0; | 922 | return 0; |
917 | 923 | ||
918 | return p->numa_group->faults[task_faults_idx(nid, 0)] + | 924 | return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] + |
919 | p->numa_group->faults[task_faults_idx(nid, 1)]; | 925 | p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)]; |
920 | } | 926 | } |
921 | 927 | ||
922 | static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) | 928 | static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) |
923 | { | 929 | { |
924 | return group->faults_cpu[task_faults_idx(nid, 0)] + | 930 | return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] + |
925 | group->faults_cpu[task_faults_idx(nid, 1)]; | 931 | group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)]; |
926 | } | 932 | } |
927 | 933 | ||
928 | /* Handle placement on systems where not all nodes are directly connected. */ | 934 | /* Handle placement on systems where not all nodes are directly connected. */ |
@@ -1001,7 +1007,7 @@ static inline unsigned long task_weight(struct task_struct *p, int nid, | |||
1001 | { | 1007 | { |
1002 | unsigned long faults, total_faults; | 1008 | unsigned long faults, total_faults; |
1003 | 1009 | ||
1004 | if (!p->numa_faults_memory) | 1010 | if (!p->numa_faults) |
1005 | return 0; | 1011 | return 0; |
1006 | 1012 | ||
1007 | total_faults = p->total_numa_faults; | 1013 | total_faults = p->total_numa_faults; |
@@ -1517,7 +1523,7 @@ static void numa_migrate_preferred(struct task_struct *p) | |||
1517 | unsigned long interval = HZ; | 1523 | unsigned long interval = HZ; |
1518 | 1524 | ||
1519 | /* This task has no NUMA fault statistics yet */ | 1525 | /* This task has no NUMA fault statistics yet */ |
1520 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory)) | 1526 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) |
1521 | return; | 1527 | return; |
1522 | 1528 | ||
1523 | /* Periodically retry migrating the task to the preferred node */ | 1529 | /* Periodically retry migrating the task to the preferred node */ |
@@ -1779,18 +1785,23 @@ static void task_numa_placement(struct task_struct *p) | |||
1779 | 1785 | ||
1780 | /* Find the node with the highest number of faults */ | 1786 | /* Find the node with the highest number of faults */ |
1781 | for_each_online_node(nid) { | 1787 | for_each_online_node(nid) { |
1788 | /* Keep track of the offsets in numa_faults array */ | ||
1789 | int mem_idx, membuf_idx, cpu_idx, cpubuf_idx; | ||
1782 | unsigned long faults = 0, group_faults = 0; | 1790 | unsigned long faults = 0, group_faults = 0; |
1783 | int priv, i; | 1791 | int priv; |
1784 | 1792 | ||
1785 | for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) { | 1793 | for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) { |
1786 | long diff, f_diff, f_weight; | 1794 | long diff, f_diff, f_weight; |
1787 | 1795 | ||
1788 | i = task_faults_idx(nid, priv); | 1796 | mem_idx = task_faults_idx(NUMA_MEM, nid, priv); |
1797 | membuf_idx = task_faults_idx(NUMA_MEMBUF, nid, priv); | ||
1798 | cpu_idx = task_faults_idx(NUMA_CPU, nid, priv); | ||
1799 | cpubuf_idx = task_faults_idx(NUMA_CPUBUF, nid, priv); | ||
1789 | 1800 | ||
1790 | /* Decay existing window, copy faults since last scan */ | 1801 | /* Decay existing window, copy faults since last scan */ |
1791 | diff = p->numa_faults_buffer_memory[i] - p->numa_faults_memory[i] / 2; | 1802 | diff = p->numa_faults[membuf_idx] - p->numa_faults[mem_idx] / 2; |
1792 | fault_types[priv] += p->numa_faults_buffer_memory[i]; | 1803 | fault_types[priv] += p->numa_faults[membuf_idx]; |
1793 | p->numa_faults_buffer_memory[i] = 0; | 1804 | p->numa_faults[membuf_idx] = 0; |
1794 | 1805 | ||
1795 | /* | 1806 | /* |
1796 | * Normalize the faults_from, so all tasks in a group | 1807 | * Normalize the faults_from, so all tasks in a group |
@@ -1800,21 +1811,27 @@ static void task_numa_placement(struct task_struct *p) | |||
1800 | * faults are less important. | 1811 | * faults are less important. |
1801 | */ | 1812 | */ |
1802 | f_weight = div64_u64(runtime << 16, period + 1); | 1813 | f_weight = div64_u64(runtime << 16, period + 1); |
1803 | f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) / | 1814 | f_weight = (f_weight * p->numa_faults[cpubuf_idx]) / |
1804 | (total_faults + 1); | 1815 | (total_faults + 1); |
1805 | f_diff = f_weight - p->numa_faults_cpu[i] / 2; | 1816 | f_diff = f_weight - p->numa_faults[cpu_idx] / 2; |
1806 | p->numa_faults_buffer_cpu[i] = 0; | 1817 | p->numa_faults[cpubuf_idx] = 0; |
1807 | 1818 | ||
1808 | p->numa_faults_memory[i] += diff; | 1819 | p->numa_faults[mem_idx] += diff; |
1809 | p->numa_faults_cpu[i] += f_diff; | 1820 | p->numa_faults[cpu_idx] += f_diff; |
1810 | faults += p->numa_faults_memory[i]; | 1821 | faults += p->numa_faults[mem_idx]; |
1811 | p->total_numa_faults += diff; | 1822 | p->total_numa_faults += diff; |
1812 | if (p->numa_group) { | 1823 | if (p->numa_group) { |
1813 | /* safe because we can only change our own group */ | 1824 | /* |
1814 | p->numa_group->faults[i] += diff; | 1825 | * safe because we can only change our own group |
1815 | p->numa_group->faults_cpu[i] += f_diff; | 1826 | * |
1827 | * mem_idx represents the offset for a given | ||
1828 | * nid and priv in a specific region because it | ||
1829 | * is at the beginning of the numa_faults array. | ||
1830 | */ | ||
1831 | p->numa_group->faults[mem_idx] += diff; | ||
1832 | p->numa_group->faults_cpu[mem_idx] += f_diff; | ||
1816 | p->numa_group->total_faults += diff; | 1833 | p->numa_group->total_faults += diff; |
1817 | group_faults += p->numa_group->faults[i]; | 1834 | group_faults += p->numa_group->faults[mem_idx]; |
1818 | } | 1835 | } |
1819 | } | 1836 | } |
1820 | 1837 | ||
@@ -1886,7 +1903,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1886 | node_set(task_node(current), grp->active_nodes); | 1903 | node_set(task_node(current), grp->active_nodes); |
1887 | 1904 | ||
1888 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | 1905 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
1889 | grp->faults[i] = p->numa_faults_memory[i]; | 1906 | grp->faults[i] = p->numa_faults[i]; |
1890 | 1907 | ||
1891 | grp->total_faults = p->total_numa_faults; | 1908 | grp->total_faults = p->total_numa_faults; |
1892 | 1909 | ||
@@ -1945,8 +1962,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1945 | double_lock_irq(&my_grp->lock, &grp->lock); | 1962 | double_lock_irq(&my_grp->lock, &grp->lock); |
1946 | 1963 | ||
1947 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { | 1964 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { |
1948 | my_grp->faults[i] -= p->numa_faults_memory[i]; | 1965 | my_grp->faults[i] -= p->numa_faults[i]; |
1949 | grp->faults[i] += p->numa_faults_memory[i]; | 1966 | grp->faults[i] += p->numa_faults[i]; |
1950 | } | 1967 | } |
1951 | my_grp->total_faults -= p->total_numa_faults; | 1968 | my_grp->total_faults -= p->total_numa_faults; |
1952 | grp->total_faults += p->total_numa_faults; | 1969 | grp->total_faults += p->total_numa_faults; |
@@ -1971,14 +1988,14 @@ no_join: | |||
1971 | void task_numa_free(struct task_struct *p) | 1988 | void task_numa_free(struct task_struct *p) |
1972 | { | 1989 | { |
1973 | struct numa_group *grp = p->numa_group; | 1990 | struct numa_group *grp = p->numa_group; |
1974 | void *numa_faults = p->numa_faults_memory; | 1991 | void *numa_faults = p->numa_faults; |
1975 | unsigned long flags; | 1992 | unsigned long flags; |
1976 | int i; | 1993 | int i; |
1977 | 1994 | ||
1978 | if (grp) { | 1995 | if (grp) { |
1979 | spin_lock_irqsave(&grp->lock, flags); | 1996 | spin_lock_irqsave(&grp->lock, flags); |
1980 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | 1997 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
1981 | grp->faults[i] -= p->numa_faults_memory[i]; | 1998 | grp->faults[i] -= p->numa_faults[i]; |
1982 | grp->total_faults -= p->total_numa_faults; | 1999 | grp->total_faults -= p->total_numa_faults; |
1983 | 2000 | ||
1984 | list_del(&p->numa_entry); | 2001 | list_del(&p->numa_entry); |
@@ -1988,10 +2005,7 @@ void task_numa_free(struct task_struct *p) | |||
1988 | put_numa_group(grp); | 2005 | put_numa_group(grp); |
1989 | } | 2006 | } |
1990 | 2007 | ||
1991 | p->numa_faults_memory = NULL; | 2008 | p->numa_faults = NULL; |
1992 | p->numa_faults_buffer_memory = NULL; | ||
1993 | p->numa_faults_cpu= NULL; | ||
1994 | p->numa_faults_buffer_cpu = NULL; | ||
1995 | kfree(numa_faults); | 2009 | kfree(numa_faults); |
1996 | } | 2010 | } |
1997 | 2011 | ||
@@ -2014,24 +2028,14 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
2014 | return; | 2028 | return; |
2015 | 2029 | ||
2016 | /* Allocate buffer to track faults on a per-node basis */ | 2030 | /* Allocate buffer to track faults on a per-node basis */ |
2017 | if (unlikely(!p->numa_faults_memory)) { | 2031 | if (unlikely(!p->numa_faults)) { |
2018 | int size = sizeof(*p->numa_faults_memory) * | 2032 | int size = sizeof(*p->numa_faults) * |
2019 | NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids; | 2033 | NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids; |
2020 | 2034 | ||
2021 | p->numa_faults_memory = kzalloc(size, GFP_KERNEL|__GFP_NOWARN); | 2035 | p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN); |
2022 | if (!p->numa_faults_memory) | 2036 | if (!p->numa_faults) |
2023 | return; | 2037 | return; |
2024 | 2038 | ||
2025 | BUG_ON(p->numa_faults_buffer_memory); | ||
2026 | /* | ||
2027 | * The averaged statistics, shared & private, memory & cpu, | ||
2028 | * occupy the first half of the array. The second half of the | ||
2029 | * array is for current counters, which are averaged into the | ||
2030 | * first set by task_numa_placement. | ||
2031 | */ | ||
2032 | p->numa_faults_cpu = p->numa_faults_memory + (2 * nr_node_ids); | ||
2033 | p->numa_faults_buffer_memory = p->numa_faults_memory + (4 * nr_node_ids); | ||
2034 | p->numa_faults_buffer_cpu = p->numa_faults_memory + (6 * nr_node_ids); | ||
2035 | p->total_numa_faults = 0; | 2039 | p->total_numa_faults = 0; |
2036 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); | 2040 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); |
2037 | } | 2041 | } |
@@ -2071,8 +2075,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
2071 | if (migrated) | 2075 | if (migrated) |
2072 | p->numa_pages_migrated += pages; | 2076 | p->numa_pages_migrated += pages; |
2073 | 2077 | ||
2074 | p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; | 2078 | p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; |
2075 | p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; | 2079 | p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; |
2076 | p->numa_faults_locality[local] += pages; | 2080 | p->numa_faults_locality[local] += pages; |
2077 | } | 2081 | } |
2078 | 2082 | ||
@@ -5361,7 +5365,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | |||
5361 | struct numa_group *numa_group = rcu_dereference(p->numa_group); | 5365 | struct numa_group *numa_group = rcu_dereference(p->numa_group); |
5362 | int src_nid, dst_nid; | 5366 | int src_nid, dst_nid; |
5363 | 5367 | ||
5364 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory || | 5368 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults || |
5365 | !(env->sd->flags & SD_NUMA)) { | 5369 | !(env->sd->flags & SD_NUMA)) { |
5366 | return false; | 5370 | return false; |
5367 | } | 5371 | } |
@@ -5400,7 +5404,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | |||
5400 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) | 5404 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) |
5401 | return false; | 5405 | return false; |
5402 | 5406 | ||
5403 | if (!p->numa_faults_memory || !(env->sd->flags & SD_NUMA)) | 5407 | if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) |
5404 | return false; | 5408 | return false; |
5405 | 5409 | ||
5406 | src_nid = cpu_to_node(env->src_cpu); | 5410 | src_nid = cpu_to_node(env->src_cpu); |