diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 299 |
1 files changed, 12 insertions, 287 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dd09def88567..e94842d4400c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -512,11 +512,6 @@ static inline void init_hrtick(void) | |||
512 | * the target CPU. | 512 | * the target CPU. |
513 | */ | 513 | */ |
514 | #ifdef CONFIG_SMP | 514 | #ifdef CONFIG_SMP |
515 | |||
516 | #ifndef tsk_is_polling | ||
517 | #define tsk_is_polling(t) 0 | ||
518 | #endif | ||
519 | |||
520 | void resched_task(struct task_struct *p) | 515 | void resched_task(struct task_struct *p) |
521 | { | 516 | { |
522 | int cpu; | 517 | int cpu; |
@@ -1536,8 +1531,10 @@ static void try_to_wake_up_local(struct task_struct *p) | |||
1536 | { | 1531 | { |
1537 | struct rq *rq = task_rq(p); | 1532 | struct rq *rq = task_rq(p); |
1538 | 1533 | ||
1539 | BUG_ON(rq != this_rq()); | 1534 | if (WARN_ON_ONCE(rq != this_rq()) || |
1540 | BUG_ON(p == current); | 1535 | WARN_ON_ONCE(p == current)) |
1536 | return; | ||
1537 | |||
1541 | lockdep_assert_held(&rq->lock); | 1538 | lockdep_assert_held(&rq->lock); |
1542 | 1539 | ||
1543 | if (!raw_spin_trylock(&p->pi_lock)) { | 1540 | if (!raw_spin_trylock(&p->pi_lock)) { |
@@ -3037,51 +3034,6 @@ void __sched schedule_preempt_disabled(void) | |||
3037 | preempt_disable(); | 3034 | preempt_disable(); |
3038 | } | 3035 | } |
3039 | 3036 | ||
3040 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | ||
3041 | |||
3042 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | ||
3043 | { | ||
3044 | if (lock->owner != owner) | ||
3045 | return false; | ||
3046 | |||
3047 | /* | ||
3048 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | ||
3049 | * lock->owner still matches owner, if that fails, owner might | ||
3050 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
3051 | * ensures the memory stays valid. | ||
3052 | */ | ||
3053 | barrier(); | ||
3054 | |||
3055 | return owner->on_cpu; | ||
3056 | } | ||
3057 | |||
3058 | /* | ||
3059 | * Look out! "owner" is an entirely speculative pointer | ||
3060 | * access and not reliable. | ||
3061 | */ | ||
3062 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | ||
3063 | { | ||
3064 | if (!sched_feat(OWNER_SPIN)) | ||
3065 | return 0; | ||
3066 | |||
3067 | rcu_read_lock(); | ||
3068 | while (owner_running(lock, owner)) { | ||
3069 | if (need_resched()) | ||
3070 | break; | ||
3071 | |||
3072 | arch_mutex_cpu_relax(); | ||
3073 | } | ||
3074 | rcu_read_unlock(); | ||
3075 | |||
3076 | /* | ||
3077 | * We break out the loop above on need_resched() and when the | ||
3078 | * owner changed, which is a sign for heavy contention. Return | ||
3079 | * success only when lock->owner is NULL. | ||
3080 | */ | ||
3081 | return lock->owner == NULL; | ||
3082 | } | ||
3083 | #endif | ||
3084 | |||
3085 | #ifdef CONFIG_PREEMPT | 3037 | #ifdef CONFIG_PREEMPT |
3086 | /* | 3038 | /* |
3087 | * this is the entry point to schedule() from in-kernel preemption | 3039 | * this is the entry point to schedule() from in-kernel preemption |
@@ -4170,6 +4122,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4170 | get_task_struct(p); | 4122 | get_task_struct(p); |
4171 | rcu_read_unlock(); | 4123 | rcu_read_unlock(); |
4172 | 4124 | ||
4125 | if (p->flags & PF_NO_SETAFFINITY) { | ||
4126 | retval = -EINVAL; | ||
4127 | goto out_put_task; | ||
4128 | } | ||
4173 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | 4129 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
4174 | retval = -ENOMEM; | 4130 | retval = -ENOMEM; |
4175 | goto out_put_task; | 4131 | goto out_put_task; |
@@ -4817,11 +4773,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
4817 | goto out; | 4773 | goto out; |
4818 | } | 4774 | } |
4819 | 4775 | ||
4820 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { | ||
4821 | ret = -EINVAL; | ||
4822 | goto out; | ||
4823 | } | ||
4824 | |||
4825 | do_set_cpus_allowed(p, new_mask); | 4776 | do_set_cpus_allowed(p, new_mask); |
4826 | 4777 | ||
4827 | /* Can the task run on the task's current CPU? If so, we're done */ | 4778 | /* Can the task run on the task's current CPU? If so, we're done */ |
@@ -5043,7 +4994,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep) | |||
5043 | } | 4994 | } |
5044 | 4995 | ||
5045 | static int min_load_idx = 0; | 4996 | static int min_load_idx = 0; |
5046 | static int max_load_idx = CPU_LOAD_IDX_MAX; | 4997 | static int max_load_idx = CPU_LOAD_IDX_MAX-1; |
5047 | 4998 | ||
5048 | static void | 4999 | static void |
5049 | set_table_entry(struct ctl_table *entry, | 5000 | set_table_entry(struct ctl_table *entry, |
@@ -6292,7 +6243,7 @@ static void sched_init_numa(void) | |||
6292 | * 'level' contains the number of unique distances, excluding the | 6243 | * 'level' contains the number of unique distances, excluding the |
6293 | * identity distance node_distance(i,i). | 6244 | * identity distance node_distance(i,i). |
6294 | * | 6245 | * |
6295 | * The sched_domains_nume_distance[] array includes the actual distance | 6246 | * The sched_domains_numa_distance[] array includes the actual distance |
6296 | * numbers. | 6247 | * numbers. |
6297 | */ | 6248 | */ |
6298 | 6249 | ||
@@ -6913,7 +6864,7 @@ struct task_group root_task_group; | |||
6913 | LIST_HEAD(task_groups); | 6864 | LIST_HEAD(task_groups); |
6914 | #endif | 6865 | #endif |
6915 | 6866 | ||
6916 | DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | 6867 | DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); |
6917 | 6868 | ||
6918 | void __init sched_init(void) | 6869 | void __init sched_init(void) |
6919 | { | 6870 | { |
@@ -6950,7 +6901,7 @@ void __init sched_init(void) | |||
6950 | #endif /* CONFIG_RT_GROUP_SCHED */ | 6901 | #endif /* CONFIG_RT_GROUP_SCHED */ |
6951 | #ifdef CONFIG_CPUMASK_OFFSTACK | 6902 | #ifdef CONFIG_CPUMASK_OFFSTACK |
6952 | for_each_possible_cpu(i) { | 6903 | for_each_possible_cpu(i) { |
6953 | per_cpu(load_balance_tmpmask, i) = (void *)ptr; | 6904 | per_cpu(load_balance_mask, i) = (void *)ptr; |
6954 | ptr += cpumask_size(); | 6905 | ptr += cpumask_size(); |
6955 | } | 6906 | } |
6956 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | 6907 | #endif /* CONFIG_CPUMASK_OFFSTACK */ |
@@ -6976,12 +6927,6 @@ void __init sched_init(void) | |||
6976 | 6927 | ||
6977 | #endif /* CONFIG_CGROUP_SCHED */ | 6928 | #endif /* CONFIG_CGROUP_SCHED */ |
6978 | 6929 | ||
6979 | #ifdef CONFIG_CGROUP_CPUACCT | ||
6980 | root_cpuacct.cpustat = &kernel_cpustat; | ||
6981 | root_cpuacct.cpuusage = alloc_percpu(u64); | ||
6982 | /* Too early, not expected to fail */ | ||
6983 | BUG_ON(!root_cpuacct.cpuusage); | ||
6984 | #endif | ||
6985 | for_each_possible_cpu(i) { | 6930 | for_each_possible_cpu(i) { |
6986 | struct rq *rq; | 6931 | struct rq *rq; |
6987 | 6932 | ||
@@ -8083,226 +8028,6 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
8083 | 8028 | ||
8084 | #endif /* CONFIG_CGROUP_SCHED */ | 8029 | #endif /* CONFIG_CGROUP_SCHED */ |
8085 | 8030 | ||
8086 | #ifdef CONFIG_CGROUP_CPUACCT | ||
8087 | |||
8088 | /* | ||
8089 | * CPU accounting code for task groups. | ||
8090 | * | ||
8091 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | ||
8092 | * (balbir@in.ibm.com). | ||
8093 | */ | ||
8094 | |||
8095 | struct cpuacct root_cpuacct; | ||
8096 | |||
8097 | /* create a new cpu accounting group */ | ||
8098 | static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) | ||
8099 | { | ||
8100 | struct cpuacct *ca; | ||
8101 | |||
8102 | if (!cgrp->parent) | ||
8103 | return &root_cpuacct.css; | ||
8104 | |||
8105 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | ||
8106 | if (!ca) | ||
8107 | goto out; | ||
8108 | |||
8109 | ca->cpuusage = alloc_percpu(u64); | ||
8110 | if (!ca->cpuusage) | ||
8111 | goto out_free_ca; | ||
8112 | |||
8113 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | ||
8114 | if (!ca->cpustat) | ||
8115 | goto out_free_cpuusage; | ||
8116 | |||
8117 | return &ca->css; | ||
8118 | |||
8119 | out_free_cpuusage: | ||
8120 | free_percpu(ca->cpuusage); | ||
8121 | out_free_ca: | ||
8122 | kfree(ca); | ||
8123 | out: | ||
8124 | return ERR_PTR(-ENOMEM); | ||
8125 | } | ||
8126 | |||
8127 | /* destroy an existing cpu accounting group */ | ||
8128 | static void cpuacct_css_free(struct cgroup *cgrp) | ||
8129 | { | ||
8130 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8131 | |||
8132 | free_percpu(ca->cpustat); | ||
8133 | free_percpu(ca->cpuusage); | ||
8134 | kfree(ca); | ||
8135 | } | ||
8136 | |||
8137 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | ||
8138 | { | ||
8139 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
8140 | u64 data; | ||
8141 | |||
8142 | #ifndef CONFIG_64BIT | ||
8143 | /* | ||
8144 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | ||
8145 | */ | ||
8146 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | ||
8147 | data = *cpuusage; | ||
8148 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
8149 | #else | ||
8150 | data = *cpuusage; | ||
8151 | #endif | ||
8152 | |||
8153 | return data; | ||
8154 | } | ||
8155 | |||
8156 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | ||
8157 | { | ||
8158 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
8159 | |||
8160 | #ifndef CONFIG_64BIT | ||
8161 | /* | ||
8162 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | ||
8163 | */ | ||
8164 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | ||
8165 | *cpuusage = val; | ||
8166 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | ||
8167 | #else | ||
8168 | *cpuusage = val; | ||
8169 | #endif | ||
8170 | } | ||
8171 | |||
8172 | /* return total cpu usage (in nanoseconds) of a group */ | ||
8173 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | ||
8174 | { | ||
8175 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8176 | u64 totalcpuusage = 0; | ||
8177 | int i; | ||
8178 | |||
8179 | for_each_present_cpu(i) | ||
8180 | totalcpuusage += cpuacct_cpuusage_read(ca, i); | ||
8181 | |||
8182 | return totalcpuusage; | ||
8183 | } | ||
8184 | |||
8185 | static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | ||
8186 | u64 reset) | ||
8187 | { | ||
8188 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8189 | int err = 0; | ||
8190 | int i; | ||
8191 | |||
8192 | if (reset) { | ||
8193 | err = -EINVAL; | ||
8194 | goto out; | ||
8195 | } | ||
8196 | |||
8197 | for_each_present_cpu(i) | ||
8198 | cpuacct_cpuusage_write(ca, i, 0); | ||
8199 | |||
8200 | out: | ||
8201 | return err; | ||
8202 | } | ||
8203 | |||
8204 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | ||
8205 | struct seq_file *m) | ||
8206 | { | ||
8207 | struct cpuacct *ca = cgroup_ca(cgroup); | ||
8208 | u64 percpu; | ||
8209 | int i; | ||
8210 | |||
8211 | for_each_present_cpu(i) { | ||
8212 | percpu = cpuacct_cpuusage_read(ca, i); | ||
8213 | seq_printf(m, "%llu ", (unsigned long long) percpu); | ||
8214 | } | ||
8215 | seq_printf(m, "\n"); | ||
8216 | return 0; | ||
8217 | } | ||
8218 | |||
8219 | static const char *cpuacct_stat_desc[] = { | ||
8220 | [CPUACCT_STAT_USER] = "user", | ||
8221 | [CPUACCT_STAT_SYSTEM] = "system", | ||
8222 | }; | ||
8223 | |||
8224 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
8225 | struct cgroup_map_cb *cb) | ||
8226 | { | ||
8227 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
8228 | int cpu; | ||
8229 | s64 val = 0; | ||
8230 | |||
8231 | for_each_online_cpu(cpu) { | ||
8232 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | ||
8233 | val += kcpustat->cpustat[CPUTIME_USER]; | ||
8234 | val += kcpustat->cpustat[CPUTIME_NICE]; | ||
8235 | } | ||
8236 | val = cputime64_to_clock_t(val); | ||
8237 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | ||
8238 | |||
8239 | val = 0; | ||
8240 | for_each_online_cpu(cpu) { | ||
8241 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | ||
8242 | val += kcpustat->cpustat[CPUTIME_SYSTEM]; | ||
8243 | val += kcpustat->cpustat[CPUTIME_IRQ]; | ||
8244 | val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; | ||
8245 | } | ||
8246 | |||
8247 | val = cputime64_to_clock_t(val); | ||
8248 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | ||
8249 | |||
8250 | return 0; | ||
8251 | } | ||
8252 | |||
8253 | static struct cftype files[] = { | ||
8254 | { | ||
8255 | .name = "usage", | ||
8256 | .read_u64 = cpuusage_read, | ||
8257 | .write_u64 = cpuusage_write, | ||
8258 | }, | ||
8259 | { | ||
8260 | .name = "usage_percpu", | ||
8261 | .read_seq_string = cpuacct_percpu_seq_read, | ||
8262 | }, | ||
8263 | { | ||
8264 | .name = "stat", | ||
8265 | .read_map = cpuacct_stats_show, | ||
8266 | }, | ||
8267 | { } /* terminate */ | ||
8268 | }; | ||
8269 | |||
8270 | /* | ||
8271 | * charge this task's execution time to its accounting group. | ||
8272 | * | ||
8273 | * called with rq->lock held. | ||
8274 | */ | ||
8275 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | ||
8276 | { | ||
8277 | struct cpuacct *ca; | ||
8278 | int cpu; | ||
8279 | |||
8280 | if (unlikely(!cpuacct_subsys.active)) | ||
8281 | return; | ||
8282 | |||
8283 | cpu = task_cpu(tsk); | ||
8284 | |||
8285 | rcu_read_lock(); | ||
8286 | |||
8287 | ca = task_ca(tsk); | ||
8288 | |||
8289 | for (; ca; ca = parent_ca(ca)) { | ||
8290 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | ||
8291 | *cpuusage += cputime; | ||
8292 | } | ||
8293 | |||
8294 | rcu_read_unlock(); | ||
8295 | } | ||
8296 | |||
8297 | struct cgroup_subsys cpuacct_subsys = { | ||
8298 | .name = "cpuacct", | ||
8299 | .css_alloc = cpuacct_css_alloc, | ||
8300 | .css_free = cpuacct_css_free, | ||
8301 | .subsys_id = cpuacct_subsys_id, | ||
8302 | .base_cftypes = files, | ||
8303 | }; | ||
8304 | #endif /* CONFIG_CGROUP_CPUACCT */ | ||
8305 | |||
8306 | void dump_cpu_task(int cpu) | 8031 | void dump_cpu_task(int cpu) |
8307 | { | 8032 | { |
8308 | pr_info("Task dump for CPU %d:\n", cpu); | 8033 | pr_info("Task dump for CPU %d:\n", cpu); |