diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 233 |
1 files changed, 120 insertions, 113 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 13584f1cccfc..7d332b7899cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5566,17 +5566,6 @@ static int __init isolated_cpu_setup(char *str) | |||
5566 | 5566 | ||
5567 | __setup("isolcpus=", isolated_cpu_setup); | 5567 | __setup("isolcpus=", isolated_cpu_setup); |
5568 | 5568 | ||
5569 | static const struct cpumask *cpu_cpu_mask(int cpu) | ||
5570 | { | ||
5571 | return cpumask_of_node(cpu_to_node(cpu)); | ||
5572 | } | ||
5573 | |||
5574 | struct sd_data { | ||
5575 | struct sched_domain **__percpu sd; | ||
5576 | struct sched_group **__percpu sg; | ||
5577 | struct sched_group_power **__percpu sgp; | ||
5578 | }; | ||
5579 | |||
5580 | struct s_data { | 5569 | struct s_data { |
5581 | struct sched_domain ** __percpu sd; | 5570 | struct sched_domain ** __percpu sd; |
5582 | struct root_domain *rd; | 5571 | struct root_domain *rd; |
@@ -5589,21 +5578,6 @@ enum s_alloc { | |||
5589 | sa_none, | 5578 | sa_none, |
5590 | }; | 5579 | }; |
5591 | 5580 | ||
5592 | struct sched_domain_topology_level; | ||
5593 | |||
5594 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); | ||
5595 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); | ||
5596 | |||
5597 | #define SDTL_OVERLAP 0x01 | ||
5598 | |||
5599 | struct sched_domain_topology_level { | ||
5600 | sched_domain_init_f init; | ||
5601 | sched_domain_mask_f mask; | ||
5602 | int flags; | ||
5603 | int numa_level; | ||
5604 | struct sd_data data; | ||
5605 | }; | ||
5606 | |||
5607 | /* | 5581 | /* |
5608 | * Build an iteration mask that can exclude certain CPUs from the upwards | 5582 | * Build an iteration mask that can exclude certain CPUs from the upwards |
5609 | * domain traversal. | 5583 | * domain traversal. |
@@ -5832,34 +5806,6 @@ int __weak arch_sd_sibling_asym_packing(void) | |||
5832 | * Non-inlined to reduce accumulated stack pressure in build_sched_domains() | 5806 | * Non-inlined to reduce accumulated stack pressure in build_sched_domains() |
5833 | */ | 5807 | */ |
5834 | 5808 | ||
5835 | #ifdef CONFIG_SCHED_DEBUG | ||
5836 | # define SD_INIT_NAME(sd, type) sd->name = #type | ||
5837 | #else | ||
5838 | # define SD_INIT_NAME(sd, type) do { } while (0) | ||
5839 | #endif | ||
5840 | |||
5841 | #define SD_INIT_FUNC(type) \ | ||
5842 | static noinline struct sched_domain * \ | ||
5843 | sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ | ||
5844 | { \ | ||
5845 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \ | ||
5846 | *sd = SD_##type##_INIT; \ | ||
5847 | SD_INIT_NAME(sd, type); \ | ||
5848 | sd->private = &tl->data; \ | ||
5849 | return sd; \ | ||
5850 | } | ||
5851 | |||
5852 | SD_INIT_FUNC(CPU) | ||
5853 | #ifdef CONFIG_SCHED_SMT | ||
5854 | SD_INIT_FUNC(SIBLING) | ||
5855 | #endif | ||
5856 | #ifdef CONFIG_SCHED_MC | ||
5857 | SD_INIT_FUNC(MC) | ||
5858 | #endif | ||
5859 | #ifdef CONFIG_SCHED_BOOK | ||
5860 | SD_INIT_FUNC(BOOK) | ||
5861 | #endif | ||
5862 | |||
5863 | static int default_relax_domain_level = -1; | 5809 | static int default_relax_domain_level = -1; |
5864 | int sched_domain_level_max; | 5810 | int sched_domain_level_max; |
5865 | 5811 | ||
@@ -5947,99 +5893,156 @@ static void claim_allocations(int cpu, struct sched_domain *sd) | |||
5947 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; | 5893 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; |
5948 | } | 5894 | } |
5949 | 5895 | ||
5950 | #ifdef CONFIG_SCHED_SMT | ||
5951 | static const struct cpumask *cpu_smt_mask(int cpu) | ||
5952 | { | ||
5953 | return topology_thread_cpumask(cpu); | ||
5954 | } | ||
5955 | #endif | ||
5956 | |||
5957 | /* | ||
5958 | * Topology list, bottom-up. | ||
5959 | */ | ||
5960 | static struct sched_domain_topology_level default_topology[] = { | ||
5961 | #ifdef CONFIG_SCHED_SMT | ||
5962 | { sd_init_SIBLING, cpu_smt_mask, }, | ||
5963 | #endif | ||
5964 | #ifdef CONFIG_SCHED_MC | ||
5965 | { sd_init_MC, cpu_coregroup_mask, }, | ||
5966 | #endif | ||
5967 | #ifdef CONFIG_SCHED_BOOK | ||
5968 | { sd_init_BOOK, cpu_book_mask, }, | ||
5969 | #endif | ||
5970 | { sd_init_CPU, cpu_cpu_mask, }, | ||
5971 | { NULL, }, | ||
5972 | }; | ||
5973 | |||
5974 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; | ||
5975 | |||
5976 | #define for_each_sd_topology(tl) \ | ||
5977 | for (tl = sched_domain_topology; tl->init; tl++) | ||
5978 | |||
5979 | #ifdef CONFIG_NUMA | 5896 | #ifdef CONFIG_NUMA |
5980 | |||
5981 | static int sched_domains_numa_levels; | 5897 | static int sched_domains_numa_levels; |
5982 | static int *sched_domains_numa_distance; | 5898 | static int *sched_domains_numa_distance; |
5983 | static struct cpumask ***sched_domains_numa_masks; | 5899 | static struct cpumask ***sched_domains_numa_masks; |
5984 | static int sched_domains_curr_level; | 5900 | static int sched_domains_curr_level; |
5901 | #endif | ||
5985 | 5902 | ||
5986 | static inline int sd_local_flags(int level) | 5903 | /* |
5987 | { | 5904 | * SD_flags allowed in topology descriptions. |
5988 | if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) | 5905 | * |
5989 | return 0; | 5906 | * SD_SHARE_CPUPOWER - describes SMT topologies |
5990 | 5907 | * SD_SHARE_PKG_RESOURCES - describes shared caches | |
5991 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; | 5908 | * SD_NUMA - describes NUMA topologies |
5992 | } | 5909 | * |
5910 | * Odd one out: | ||
5911 | * SD_ASYM_PACKING - describes SMT quirks | ||
5912 | */ | ||
5913 | #define TOPOLOGY_SD_FLAGS \ | ||
5914 | (SD_SHARE_CPUPOWER | \ | ||
5915 | SD_SHARE_PKG_RESOURCES | \ | ||
5916 | SD_NUMA | \ | ||
5917 | SD_ASYM_PACKING) | ||
5993 | 5918 | ||
5994 | static struct sched_domain * | 5919 | static struct sched_domain * |
5995 | sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | 5920 | sd_init(struct sched_domain_topology_level *tl, int cpu) |
5996 | { | 5921 | { |
5997 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); | 5922 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); |
5998 | int level = tl->numa_level; | 5923 | int sd_weight, sd_flags = 0; |
5999 | int sd_weight = cpumask_weight( | 5924 | |
6000 | sched_domains_numa_masks[level][cpu_to_node(cpu)]); | 5925 | #ifdef CONFIG_NUMA |
5926 | /* | ||
5927 | * Ugly hack to pass state to sd_numa_mask()... | ||
5928 | */ | ||
5929 | sched_domains_curr_level = tl->numa_level; | ||
5930 | #endif | ||
5931 | |||
5932 | sd_weight = cpumask_weight(tl->mask(cpu)); | ||
5933 | |||
5934 | if (tl->sd_flags) | ||
5935 | sd_flags = (*tl->sd_flags)(); | ||
5936 | if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS, | ||
5937 | "wrong sd_flags in topology description\n")) | ||
5938 | sd_flags &= ~TOPOLOGY_SD_FLAGS; | ||
6001 | 5939 | ||
6002 | *sd = (struct sched_domain){ | 5940 | *sd = (struct sched_domain){ |
6003 | .min_interval = sd_weight, | 5941 | .min_interval = sd_weight, |
6004 | .max_interval = 2*sd_weight, | 5942 | .max_interval = 2*sd_weight, |
6005 | .busy_factor = 32, | 5943 | .busy_factor = 32, |
6006 | .imbalance_pct = 125, | 5944 | .imbalance_pct = 125, |
6007 | .cache_nice_tries = 2, | 5945 | |
6008 | .busy_idx = 3, | 5946 | .cache_nice_tries = 0, |
6009 | .idle_idx = 2, | 5947 | .busy_idx = 0, |
5948 | .idle_idx = 0, | ||
6010 | .newidle_idx = 0, | 5949 | .newidle_idx = 0, |
6011 | .wake_idx = 0, | 5950 | .wake_idx = 0, |
6012 | .forkexec_idx = 0, | 5951 | .forkexec_idx = 0, |
6013 | 5952 | ||
6014 | .flags = 1*SD_LOAD_BALANCE | 5953 | .flags = 1*SD_LOAD_BALANCE |
6015 | | 1*SD_BALANCE_NEWIDLE | 5954 | | 1*SD_BALANCE_NEWIDLE |
6016 | | 0*SD_BALANCE_EXEC | 5955 | | 1*SD_BALANCE_EXEC |
6017 | | 0*SD_BALANCE_FORK | 5956 | | 1*SD_BALANCE_FORK |
6018 | | 0*SD_BALANCE_WAKE | 5957 | | 0*SD_BALANCE_WAKE |
6019 | | 0*SD_WAKE_AFFINE | 5958 | | 1*SD_WAKE_AFFINE |
6020 | | 0*SD_SHARE_CPUPOWER | 5959 | | 0*SD_SHARE_CPUPOWER |
6021 | | 0*SD_SHARE_PKG_RESOURCES | 5960 | | 0*SD_SHARE_PKG_RESOURCES |
6022 | | 1*SD_SERIALIZE | 5961 | | 0*SD_SERIALIZE |
6023 | | 0*SD_PREFER_SIBLING | 5962 | | 0*SD_PREFER_SIBLING |
6024 | | 1*SD_NUMA | 5963 | | 0*SD_NUMA |
6025 | | sd_local_flags(level) | 5964 | | sd_flags |
6026 | , | 5965 | , |
5966 | |||
6027 | .last_balance = jiffies, | 5967 | .last_balance = jiffies, |
6028 | .balance_interval = sd_weight, | 5968 | .balance_interval = sd_weight, |
5969 | .smt_gain = 0, | ||
6029 | .max_newidle_lb_cost = 0, | 5970 | .max_newidle_lb_cost = 0, |
6030 | .next_decay_max_lb_cost = jiffies, | 5971 | .next_decay_max_lb_cost = jiffies, |
5972 | #ifdef CONFIG_SCHED_DEBUG | ||
5973 | .name = tl->name, | ||
5974 | #endif | ||
6031 | }; | 5975 | }; |
6032 | SD_INIT_NAME(sd, NUMA); | ||
6033 | sd->private = &tl->data; | ||
6034 | 5976 | ||
6035 | /* | 5977 | /* |
6036 | * Ugly hack to pass state to sd_numa_mask()... | 5978 | * Convert topological properties into behaviour. |
6037 | */ | 5979 | */ |
6038 | sched_domains_curr_level = tl->numa_level; | 5980 | |
5981 | if (sd->flags & SD_SHARE_CPUPOWER) { | ||
5982 | sd->imbalance_pct = 110; | ||
5983 | sd->smt_gain = 1178; /* ~15% */ | ||
5984 | sd->flags |= arch_sd_sibling_asym_packing(); | ||
5985 | |||
5986 | } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { | ||
5987 | sd->imbalance_pct = 117; | ||
5988 | sd->cache_nice_tries = 1; | ||
5989 | sd->busy_idx = 2; | ||
5990 | |||
5991 | #ifdef CONFIG_NUMA | ||
5992 | } else if (sd->flags & SD_NUMA) { | ||
5993 | sd->cache_nice_tries = 2; | ||
5994 | sd->busy_idx = 3; | ||
5995 | sd->idle_idx = 2; | ||
5996 | |||
5997 | sd->flags |= SD_SERIALIZE; | ||
5998 | if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) { | ||
5999 | sd->flags &= ~(SD_BALANCE_EXEC | | ||
6000 | SD_BALANCE_FORK | | ||
6001 | SD_WAKE_AFFINE); | ||
6002 | } | ||
6003 | |||
6004 | #endif | ||
6005 | } else { | ||
6006 | sd->flags |= SD_PREFER_SIBLING; | ||
6007 | sd->cache_nice_tries = 1; | ||
6008 | sd->busy_idx = 2; | ||
6009 | sd->idle_idx = 1; | ||
6010 | } | ||
6011 | |||
6012 | sd->private = &tl->data; | ||
6039 | 6013 | ||
6040 | return sd; | 6014 | return sd; |
6041 | } | 6015 | } |
6042 | 6016 | ||
6017 | /* | ||
6018 | * Topology list, bottom-up. | ||
6019 | */ | ||
6020 | static struct sched_domain_topology_level default_topology[] = { | ||
6021 | #ifdef CONFIG_SCHED_SMT | ||
6022 | { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, | ||
6023 | #endif | ||
6024 | #ifdef CONFIG_SCHED_MC | ||
6025 | { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, | ||
6026 | #endif | ||
6027 | #ifdef CONFIG_SCHED_BOOK | ||
6028 | { cpu_book_mask, SD_INIT_NAME(BOOK) }, | ||
6029 | #endif | ||
6030 | { cpu_cpu_mask, SD_INIT_NAME(DIE) }, | ||
6031 | { NULL, }, | ||
6032 | }; | ||
6033 | |||
6034 | struct sched_domain_topology_level *sched_domain_topology = default_topology; | ||
6035 | |||
6036 | #define for_each_sd_topology(tl) \ | ||
6037 | for (tl = sched_domain_topology; tl->mask; tl++) | ||
6038 | |||
6039 | void set_sched_topology(struct sched_domain_topology_level *tl) | ||
6040 | { | ||
6041 | sched_domain_topology = tl; | ||
6042 | } | ||
6043 | |||
6044 | #ifdef CONFIG_NUMA | ||
6045 | |||
6043 | static const struct cpumask *sd_numa_mask(int cpu) | 6046 | static const struct cpumask *sd_numa_mask(int cpu) |
6044 | { | 6047 | { |
6045 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; | 6048 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; |
@@ -6183,7 +6186,10 @@ static void sched_init_numa(void) | |||
6183 | } | 6186 | } |
6184 | } | 6187 | } |
6185 | 6188 | ||
6186 | tl = kzalloc((ARRAY_SIZE(default_topology) + level) * | 6189 | /* Compute default topology size */ |
6190 | for (i = 0; sched_domain_topology[i].mask; i++); | ||
6191 | |||
6192 | tl = kzalloc((i + level) * | ||
6187 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); | 6193 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); |
6188 | if (!tl) | 6194 | if (!tl) |
6189 | return; | 6195 | return; |
@@ -6191,18 +6197,19 @@ static void sched_init_numa(void) | |||
6191 | /* | 6197 | /* |
6192 | * Copy the default topology bits.. | 6198 | * Copy the default topology bits.. |
6193 | */ | 6199 | */ |
6194 | for (i = 0; default_topology[i].init; i++) | 6200 | for (i = 0; sched_domain_topology[i].mask; i++) |
6195 | tl[i] = default_topology[i]; | 6201 | tl[i] = sched_domain_topology[i]; |
6196 | 6202 | ||
6197 | /* | 6203 | /* |
6198 | * .. and append 'j' levels of NUMA goodness. | 6204 | * .. and append 'j' levels of NUMA goodness. |
6199 | */ | 6205 | */ |
6200 | for (j = 0; j < level; i++, j++) { | 6206 | for (j = 0; j < level; i++, j++) { |
6201 | tl[i] = (struct sched_domain_topology_level){ | 6207 | tl[i] = (struct sched_domain_topology_level){ |
6202 | .init = sd_numa_init, | ||
6203 | .mask = sd_numa_mask, | 6208 | .mask = sd_numa_mask, |
6209 | .sd_flags = cpu_numa_flags, | ||
6204 | .flags = SDTL_OVERLAP, | 6210 | .flags = SDTL_OVERLAP, |
6205 | .numa_level = j, | 6211 | .numa_level = j, |
6212 | SD_INIT_NAME(NUMA) | ||
6206 | }; | 6213 | }; |
6207 | } | 6214 | } |
6208 | 6215 | ||
@@ -6360,7 +6367,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | |||
6360 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 6367 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, |
6361 | struct sched_domain *child, int cpu) | 6368 | struct sched_domain *child, int cpu) |
6362 | { | 6369 | { |
6363 | struct sched_domain *sd = tl->init(tl, cpu); | 6370 | struct sched_domain *sd = sd_init(tl, cpu); |
6364 | if (!sd) | 6371 | if (!sd) |
6365 | return child; | 6372 | return child; |
6366 | 6373 | ||