aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c233
1 files changed, 120 insertions, 113 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 13584f1cccfc..7d332b7899cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5566,17 +5566,6 @@ static int __init isolated_cpu_setup(char *str)
5566 5566
5567__setup("isolcpus=", isolated_cpu_setup); 5567__setup("isolcpus=", isolated_cpu_setup);
5568 5568
5569static const struct cpumask *cpu_cpu_mask(int cpu)
5570{
5571 return cpumask_of_node(cpu_to_node(cpu));
5572}
5573
5574struct sd_data {
5575 struct sched_domain **__percpu sd;
5576 struct sched_group **__percpu sg;
5577 struct sched_group_power **__percpu sgp;
5578};
5579
5580struct s_data { 5569struct s_data {
5581 struct sched_domain ** __percpu sd; 5570 struct sched_domain ** __percpu sd;
5582 struct root_domain *rd; 5571 struct root_domain *rd;
@@ -5589,21 +5578,6 @@ enum s_alloc {
5589 sa_none, 5578 sa_none,
5590}; 5579};
5591 5580
5592struct sched_domain_topology_level;
5593
5594typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
5595typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
5596
5597#define SDTL_OVERLAP 0x01
5598
5599struct sched_domain_topology_level {
5600 sched_domain_init_f init;
5601 sched_domain_mask_f mask;
5602 int flags;
5603 int numa_level;
5604 struct sd_data data;
5605};
5606
5607/* 5581/*
5608 * Build an iteration mask that can exclude certain CPUs from the upwards 5582 * Build an iteration mask that can exclude certain CPUs from the upwards
5609 * domain traversal. 5583 * domain traversal.
@@ -5832,34 +5806,6 @@ int __weak arch_sd_sibling_asym_packing(void)
5832 * Non-inlined to reduce accumulated stack pressure in build_sched_domains() 5806 * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
5833 */ 5807 */
5834 5808
5835#ifdef CONFIG_SCHED_DEBUG
5836# define SD_INIT_NAME(sd, type) sd->name = #type
5837#else
5838# define SD_INIT_NAME(sd, type) do { } while (0)
5839#endif
5840
5841#define SD_INIT_FUNC(type) \
5842static noinline struct sched_domain * \
5843sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \
5844{ \
5845 struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \
5846 *sd = SD_##type##_INIT; \
5847 SD_INIT_NAME(sd, type); \
5848 sd->private = &tl->data; \
5849 return sd; \
5850}
5851
5852SD_INIT_FUNC(CPU)
5853#ifdef CONFIG_SCHED_SMT
5854 SD_INIT_FUNC(SIBLING)
5855#endif
5856#ifdef CONFIG_SCHED_MC
5857 SD_INIT_FUNC(MC)
5858#endif
5859#ifdef CONFIG_SCHED_BOOK
5860 SD_INIT_FUNC(BOOK)
5861#endif
5862
5863static int default_relax_domain_level = -1; 5809static int default_relax_domain_level = -1;
5864int sched_domain_level_max; 5810int sched_domain_level_max;
5865 5811
@@ -5947,99 +5893,156 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
5947 *per_cpu_ptr(sdd->sgp, cpu) = NULL; 5893 *per_cpu_ptr(sdd->sgp, cpu) = NULL;
5948} 5894}
5949 5895
5950#ifdef CONFIG_SCHED_SMT
5951static const struct cpumask *cpu_smt_mask(int cpu)
5952{
5953 return topology_thread_cpumask(cpu);
5954}
5955#endif
5956
5957/*
5958 * Topology list, bottom-up.
5959 */
5960static struct sched_domain_topology_level default_topology[] = {
5961#ifdef CONFIG_SCHED_SMT
5962 { sd_init_SIBLING, cpu_smt_mask, },
5963#endif
5964#ifdef CONFIG_SCHED_MC
5965 { sd_init_MC, cpu_coregroup_mask, },
5966#endif
5967#ifdef CONFIG_SCHED_BOOK
5968 { sd_init_BOOK, cpu_book_mask, },
5969#endif
5970 { sd_init_CPU, cpu_cpu_mask, },
5971 { NULL, },
5972};
5973
5974static struct sched_domain_topology_level *sched_domain_topology = default_topology;
5975
5976#define for_each_sd_topology(tl) \
5977 for (tl = sched_domain_topology; tl->init; tl++)
5978
5979#ifdef CONFIG_NUMA 5896#ifdef CONFIG_NUMA
5980
5981static int sched_domains_numa_levels; 5897static int sched_domains_numa_levels;
5982static int *sched_domains_numa_distance; 5898static int *sched_domains_numa_distance;
5983static struct cpumask ***sched_domains_numa_masks; 5899static struct cpumask ***sched_domains_numa_masks;
5984static int sched_domains_curr_level; 5900static int sched_domains_curr_level;
5901#endif
5985 5902
5986static inline int sd_local_flags(int level) 5903/*
5987{ 5904 * SD_flags allowed in topology descriptions.
5988 if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) 5905 *
5989 return 0; 5906 * SD_SHARE_CPUPOWER - describes SMT topologies
5990 5907 * SD_SHARE_PKG_RESOURCES - describes shared caches
5991 return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; 5908 * SD_NUMA - describes NUMA topologies
5992} 5909 *
5910 * Odd one out:
5911 * SD_ASYM_PACKING - describes SMT quirks
5912 */
5913#define TOPOLOGY_SD_FLAGS \
5914 (SD_SHARE_CPUPOWER | \
5915 SD_SHARE_PKG_RESOURCES | \
5916 SD_NUMA | \
5917 SD_ASYM_PACKING)
5993 5918
5994static struct sched_domain * 5919static struct sched_domain *
5995sd_numa_init(struct sched_domain_topology_level *tl, int cpu) 5920sd_init(struct sched_domain_topology_level *tl, int cpu)
5996{ 5921{
5997 struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); 5922 struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
5998 int level = tl->numa_level; 5923 int sd_weight, sd_flags = 0;
5999 int sd_weight = cpumask_weight( 5924
6000 sched_domains_numa_masks[level][cpu_to_node(cpu)]); 5925#ifdef CONFIG_NUMA
5926 /*
5927 * Ugly hack to pass state to sd_numa_mask()...
5928 */
5929 sched_domains_curr_level = tl->numa_level;
5930#endif
5931
5932 sd_weight = cpumask_weight(tl->mask(cpu));
5933
5934 if (tl->sd_flags)
5935 sd_flags = (*tl->sd_flags)();
5936 if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
5937 "wrong sd_flags in topology description\n"))
5938 sd_flags &= ~TOPOLOGY_SD_FLAGS;
6001 5939
6002 *sd = (struct sched_domain){ 5940 *sd = (struct sched_domain){
6003 .min_interval = sd_weight, 5941 .min_interval = sd_weight,
6004 .max_interval = 2*sd_weight, 5942 .max_interval = 2*sd_weight,
6005 .busy_factor = 32, 5943 .busy_factor = 32,
6006 .imbalance_pct = 125, 5944 .imbalance_pct = 125,
6007 .cache_nice_tries = 2, 5945
6008 .busy_idx = 3, 5946 .cache_nice_tries = 0,
6009 .idle_idx = 2, 5947 .busy_idx = 0,
5948 .idle_idx = 0,
6010 .newidle_idx = 0, 5949 .newidle_idx = 0,
6011 .wake_idx = 0, 5950 .wake_idx = 0,
6012 .forkexec_idx = 0, 5951 .forkexec_idx = 0,
6013 5952
6014 .flags = 1*SD_LOAD_BALANCE 5953 .flags = 1*SD_LOAD_BALANCE
6015 | 1*SD_BALANCE_NEWIDLE 5954 | 1*SD_BALANCE_NEWIDLE
6016 | 0*SD_BALANCE_EXEC 5955 | 1*SD_BALANCE_EXEC
6017 | 0*SD_BALANCE_FORK 5956 | 1*SD_BALANCE_FORK
6018 | 0*SD_BALANCE_WAKE 5957 | 0*SD_BALANCE_WAKE
6019 | 0*SD_WAKE_AFFINE 5958 | 1*SD_WAKE_AFFINE
6020 | 0*SD_SHARE_CPUPOWER 5959 | 0*SD_SHARE_CPUPOWER
6021 | 0*SD_SHARE_PKG_RESOURCES 5960 | 0*SD_SHARE_PKG_RESOURCES
6022 | 1*SD_SERIALIZE 5961 | 0*SD_SERIALIZE
6023 | 0*SD_PREFER_SIBLING 5962 | 0*SD_PREFER_SIBLING
6024 | 1*SD_NUMA 5963 | 0*SD_NUMA
6025 | sd_local_flags(level) 5964 | sd_flags
6026 , 5965 ,
5966
6027 .last_balance = jiffies, 5967 .last_balance = jiffies,
6028 .balance_interval = sd_weight, 5968 .balance_interval = sd_weight,
5969 .smt_gain = 0,
6029 .max_newidle_lb_cost = 0, 5970 .max_newidle_lb_cost = 0,
6030 .next_decay_max_lb_cost = jiffies, 5971 .next_decay_max_lb_cost = jiffies,
5972#ifdef CONFIG_SCHED_DEBUG
5973 .name = tl->name,
5974#endif
6031 }; 5975 };
6032 SD_INIT_NAME(sd, NUMA);
6033 sd->private = &tl->data;
6034 5976
6035 /* 5977 /*
6036 * Ugly hack to pass state to sd_numa_mask()... 5978 * Convert topological properties into behaviour.
6037 */ 5979 */
6038 sched_domains_curr_level = tl->numa_level; 5980
5981 if (sd->flags & SD_SHARE_CPUPOWER) {
5982 sd->imbalance_pct = 110;
5983 sd->smt_gain = 1178; /* ~15% */
5984 sd->flags |= arch_sd_sibling_asym_packing();
5985
5986 } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
5987 sd->imbalance_pct = 117;
5988 sd->cache_nice_tries = 1;
5989 sd->busy_idx = 2;
5990
5991#ifdef CONFIG_NUMA
5992 } else if (sd->flags & SD_NUMA) {
5993 sd->cache_nice_tries = 2;
5994 sd->busy_idx = 3;
5995 sd->idle_idx = 2;
5996
5997 sd->flags |= SD_SERIALIZE;
5998 if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
5999 sd->flags &= ~(SD_BALANCE_EXEC |
6000 SD_BALANCE_FORK |
6001 SD_WAKE_AFFINE);
6002 }
6003
6004#endif
6005 } else {
6006 sd->flags |= SD_PREFER_SIBLING;
6007 sd->cache_nice_tries = 1;
6008 sd->busy_idx = 2;
6009 sd->idle_idx = 1;
6010 }
6011
6012 sd->private = &tl->data;
6039 6013
6040 return sd; 6014 return sd;
6041} 6015}
6042 6016
6017/*
6018 * Topology list, bottom-up.
6019 */
6020static struct sched_domain_topology_level default_topology[] = {
6021#ifdef CONFIG_SCHED_SMT
6022 { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
6023#endif
6024#ifdef CONFIG_SCHED_MC
6025 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
6026#endif
6027#ifdef CONFIG_SCHED_BOOK
6028 { cpu_book_mask, SD_INIT_NAME(BOOK) },
6029#endif
6030 { cpu_cpu_mask, SD_INIT_NAME(DIE) },
6031 { NULL, },
6032};
6033
6034struct sched_domain_topology_level *sched_domain_topology = default_topology;
6035
6036#define for_each_sd_topology(tl) \
6037 for (tl = sched_domain_topology; tl->mask; tl++)
6038
6039void set_sched_topology(struct sched_domain_topology_level *tl)
6040{
6041 sched_domain_topology = tl;
6042}
6043
6044#ifdef CONFIG_NUMA
6045
6043static const struct cpumask *sd_numa_mask(int cpu) 6046static const struct cpumask *sd_numa_mask(int cpu)
6044{ 6047{
6045 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; 6048 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
@@ -6183,7 +6186,10 @@ static void sched_init_numa(void)
6183 } 6186 }
6184 } 6187 }
6185 6188
6186 tl = kzalloc((ARRAY_SIZE(default_topology) + level) * 6189 /* Compute default topology size */
6190 for (i = 0; sched_domain_topology[i].mask; i++);
6191
6192 tl = kzalloc((i + level) *
6187 sizeof(struct sched_domain_topology_level), GFP_KERNEL); 6193 sizeof(struct sched_domain_topology_level), GFP_KERNEL);
6188 if (!tl) 6194 if (!tl)
6189 return; 6195 return;
@@ -6191,18 +6197,19 @@ static void sched_init_numa(void)
6191 /* 6197 /*
6192 * Copy the default topology bits.. 6198 * Copy the default topology bits..
6193 */ 6199 */
6194 for (i = 0; default_topology[i].init; i++) 6200 for (i = 0; sched_domain_topology[i].mask; i++)
6195 tl[i] = default_topology[i]; 6201 tl[i] = sched_domain_topology[i];
6196 6202
6197 /* 6203 /*
6198 * .. and append 'j' levels of NUMA goodness. 6204 * .. and append 'j' levels of NUMA goodness.
6199 */ 6205 */
6200 for (j = 0; j < level; i++, j++) { 6206 for (j = 0; j < level; i++, j++) {
6201 tl[i] = (struct sched_domain_topology_level){ 6207 tl[i] = (struct sched_domain_topology_level){
6202 .init = sd_numa_init,
6203 .mask = sd_numa_mask, 6208 .mask = sd_numa_mask,
6209 .sd_flags = cpu_numa_flags,
6204 .flags = SDTL_OVERLAP, 6210 .flags = SDTL_OVERLAP,
6205 .numa_level = j, 6211 .numa_level = j,
6212 SD_INIT_NAME(NUMA)
6206 }; 6213 };
6207 } 6214 }
6208 6215
@@ -6360,7 +6367,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
6360 const struct cpumask *cpu_map, struct sched_domain_attr *attr, 6367 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
6361 struct sched_domain *child, int cpu) 6368 struct sched_domain *child, int cpu)
6362{ 6369{
6363 struct sched_domain *sd = tl->init(tl, cpu); 6370 struct sched_domain *sd = sd_init(tl, cpu);
6364 if (!sd) 6371 if (!sd)
6365 return child; 6372 return child;
6366 6373