diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 110 |
1 files changed, 76 insertions, 34 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index a56446d7fda2..e5205811c19e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -39,7 +39,7 @@ | |||
39 | #include <linux/completion.h> | 39 | #include <linux/completion.h> |
40 | #include <linux/kernel_stat.h> | 40 | #include <linux/kernel_stat.h> |
41 | #include <linux/debug_locks.h> | 41 | #include <linux/debug_locks.h> |
42 | #include <linux/perf_counter.h> | 42 | #include <linux/perf_event.h> |
43 | #include <linux/security.h> | 43 | #include <linux/security.h> |
44 | #include <linux/notifier.h> | 44 | #include <linux/notifier.h> |
45 | #include <linux/profile.h> | 45 | #include <linux/profile.h> |
@@ -676,6 +676,7 @@ inline void update_rq_clock(struct rq *rq) | |||
676 | 676 | ||
677 | /** | 677 | /** |
678 | * runqueue_is_locked | 678 | * runqueue_is_locked |
679 | * @cpu: the processor in question. | ||
679 | * | 680 | * |
680 | * Returns true if the current cpu runqueue is locked. | 681 | * Returns true if the current cpu runqueue is locked. |
681 | * This interface allows printk to be called with the runqueue lock | 682 | * This interface allows printk to be called with the runqueue lock |
@@ -780,7 +781,7 @@ static int sched_feat_open(struct inode *inode, struct file *filp) | |||
780 | return single_open(filp, sched_feat_show, NULL); | 781 | return single_open(filp, sched_feat_show, NULL); |
781 | } | 782 | } |
782 | 783 | ||
783 | static struct file_operations sched_feat_fops = { | 784 | static const struct file_operations sched_feat_fops = { |
784 | .open = sched_feat_open, | 785 | .open = sched_feat_open, |
785 | .write = sched_feat_write, | 786 | .write = sched_feat_write, |
786 | .read = seq_read, | 787 | .read = seq_read, |
@@ -2053,7 +2054,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2053 | if (task_hot(p, old_rq->clock, NULL)) | 2054 | if (task_hot(p, old_rq->clock, NULL)) |
2054 | schedstat_inc(p, se.nr_forced2_migrations); | 2055 | schedstat_inc(p, se.nr_forced2_migrations); |
2055 | #endif | 2056 | #endif |
2056 | perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2057 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
2057 | 1, 1, NULL, 0); | 2058 | 1, 1, NULL, 0); |
2058 | } | 2059 | } |
2059 | p->se.vruntime -= old_cfsrq->min_vruntime - | 2060 | p->se.vruntime -= old_cfsrq->min_vruntime - |
@@ -2311,7 +2312,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2311 | { | 2312 | { |
2312 | int cpu, orig_cpu, this_cpu, success = 0; | 2313 | int cpu, orig_cpu, this_cpu, success = 0; |
2313 | unsigned long flags; | 2314 | unsigned long flags; |
2314 | struct rq *rq; | 2315 | struct rq *rq, *orig_rq; |
2315 | 2316 | ||
2316 | if (!sched_feat(SYNC_WAKEUPS)) | 2317 | if (!sched_feat(SYNC_WAKEUPS)) |
2317 | wake_flags &= ~WF_SYNC; | 2318 | wake_flags &= ~WF_SYNC; |
@@ -2319,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2319 | this_cpu = get_cpu(); | 2320 | this_cpu = get_cpu(); |
2320 | 2321 | ||
2321 | smp_wmb(); | 2322 | smp_wmb(); |
2322 | rq = task_rq_lock(p, &flags); | 2323 | rq = orig_rq = task_rq_lock(p, &flags); |
2323 | update_rq_clock(rq); | 2324 | update_rq_clock(rq); |
2324 | if (!(p->state & state)) | 2325 | if (!(p->state & state)) |
2325 | goto out; | 2326 | goto out; |
@@ -2350,6 +2351,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2350 | set_task_cpu(p, cpu); | 2351 | set_task_cpu(p, cpu); |
2351 | 2352 | ||
2352 | rq = task_rq_lock(p, &flags); | 2353 | rq = task_rq_lock(p, &flags); |
2354 | |||
2355 | if (rq != orig_rq) | ||
2356 | update_rq_clock(rq); | ||
2357 | |||
2353 | WARN_ON(p->state != TASK_WAKING); | 2358 | WARN_ON(p->state != TASK_WAKING); |
2354 | cpu = task_cpu(p); | 2359 | cpu = task_cpu(p); |
2355 | 2360 | ||
@@ -2515,22 +2520,17 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2515 | __sched_fork(p); | 2520 | __sched_fork(p); |
2516 | 2521 | ||
2517 | /* | 2522 | /* |
2518 | * Make sure we do not leak PI boosting priority to the child. | ||
2519 | */ | ||
2520 | p->prio = current->normal_prio; | ||
2521 | |||
2522 | /* | ||
2523 | * Revert to default priority/policy on fork if requested. | 2523 | * Revert to default priority/policy on fork if requested. |
2524 | */ | 2524 | */ |
2525 | if (unlikely(p->sched_reset_on_fork)) { | 2525 | if (unlikely(p->sched_reset_on_fork)) { |
2526 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) | 2526 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { |
2527 | p->policy = SCHED_NORMAL; | 2527 | p->policy = SCHED_NORMAL; |
2528 | 2528 | p->normal_prio = p->static_prio; | |
2529 | if (p->normal_prio < DEFAULT_PRIO) | 2529 | } |
2530 | p->prio = DEFAULT_PRIO; | ||
2531 | 2530 | ||
2532 | if (PRIO_TO_NICE(p->static_prio) < 0) { | 2531 | if (PRIO_TO_NICE(p->static_prio) < 0) { |
2533 | p->static_prio = NICE_TO_PRIO(0); | 2532 | p->static_prio = NICE_TO_PRIO(0); |
2533 | p->normal_prio = p->static_prio; | ||
2534 | set_load_weight(p); | 2534 | set_load_weight(p); |
2535 | } | 2535 | } |
2536 | 2536 | ||
@@ -2541,6 +2541,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2541 | p->sched_reset_on_fork = 0; | 2541 | p->sched_reset_on_fork = 0; |
2542 | } | 2542 | } |
2543 | 2543 | ||
2544 | /* | ||
2545 | * Make sure we do not leak PI boosting priority to the child. | ||
2546 | */ | ||
2547 | p->prio = current->normal_prio; | ||
2548 | |||
2544 | if (!rt_prio(p->prio)) | 2549 | if (!rt_prio(p->prio)) |
2545 | p->sched_class = &fair_sched_class; | 2550 | p->sched_class = &fair_sched_class; |
2546 | 2551 | ||
@@ -2581,8 +2586,6 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2581 | BUG_ON(p->state != TASK_RUNNING); | 2586 | BUG_ON(p->state != TASK_RUNNING); |
2582 | update_rq_clock(rq); | 2587 | update_rq_clock(rq); |
2583 | 2588 | ||
2584 | p->prio = effective_prio(p); | ||
2585 | |||
2586 | if (!p->sched_class->task_new || !current->se.on_rq) { | 2589 | if (!p->sched_class->task_new || !current->se.on_rq) { |
2587 | activate_task(rq, p, 0); | 2590 | activate_task(rq, p, 0); |
2588 | } else { | 2591 | } else { |
@@ -2718,7 +2721,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2718 | */ | 2721 | */ |
2719 | prev_state = prev->state; | 2722 | prev_state = prev->state; |
2720 | finish_arch_switch(prev); | 2723 | finish_arch_switch(prev); |
2721 | perf_counter_task_sched_in(current, cpu_of(rq)); | 2724 | perf_event_task_sched_in(current, cpu_of(rq)); |
2722 | finish_lock_switch(rq, prev); | 2725 | finish_lock_switch(rq, prev); |
2723 | 2726 | ||
2724 | fire_sched_in_preempt_notifiers(current); | 2727 | fire_sched_in_preempt_notifiers(current); |
@@ -2904,6 +2907,19 @@ unsigned long nr_iowait(void) | |||
2904 | return sum; | 2907 | return sum; |
2905 | } | 2908 | } |
2906 | 2909 | ||
2910 | unsigned long nr_iowait_cpu(void) | ||
2911 | { | ||
2912 | struct rq *this = this_rq(); | ||
2913 | return atomic_read(&this->nr_iowait); | ||
2914 | } | ||
2915 | |||
2916 | unsigned long this_cpu_load(void) | ||
2917 | { | ||
2918 | struct rq *this = this_rq(); | ||
2919 | return this->cpu_load[0]; | ||
2920 | } | ||
2921 | |||
2922 | |||
2907 | /* Variables and functions for calc_load */ | 2923 | /* Variables and functions for calc_load */ |
2908 | static atomic_long_t calc_load_tasks; | 2924 | static atomic_long_t calc_load_tasks; |
2909 | static unsigned long calc_load_update; | 2925 | static unsigned long calc_load_update; |
@@ -3645,6 +3661,7 @@ static void update_group_power(struct sched_domain *sd, int cpu) | |||
3645 | 3661 | ||
3646 | /** | 3662 | /** |
3647 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. | 3663 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. |
3664 | * @sd: The sched_domain whose statistics are to be updated. | ||
3648 | * @group: sched_group whose statistics are to be updated. | 3665 | * @group: sched_group whose statistics are to be updated. |
3649 | * @this_cpu: Cpu for which load balance is currently performed. | 3666 | * @this_cpu: Cpu for which load balance is currently performed. |
3650 | * @idle: Idle status of this_cpu | 3667 | * @idle: Idle status of this_cpu |
@@ -5079,17 +5096,16 @@ void account_idle_time(cputime_t cputime) | |||
5079 | */ | 5096 | */ |
5080 | void account_process_tick(struct task_struct *p, int user_tick) | 5097 | void account_process_tick(struct task_struct *p, int user_tick) |
5081 | { | 5098 | { |
5082 | cputime_t one_jiffy = jiffies_to_cputime(1); | 5099 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
5083 | cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy); | ||
5084 | struct rq *rq = this_rq(); | 5100 | struct rq *rq = this_rq(); |
5085 | 5101 | ||
5086 | if (user_tick) | 5102 | if (user_tick) |
5087 | account_user_time(p, one_jiffy, one_jiffy_scaled); | 5103 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
5088 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 5104 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
5089 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, | 5105 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, |
5090 | one_jiffy_scaled); | 5106 | one_jiffy_scaled); |
5091 | else | 5107 | else |
5092 | account_idle_time(one_jiffy); | 5108 | account_idle_time(cputime_one_jiffy); |
5093 | } | 5109 | } |
5094 | 5110 | ||
5095 | /* | 5111 | /* |
@@ -5193,7 +5209,7 @@ void scheduler_tick(void) | |||
5193 | curr->sched_class->task_tick(rq, curr, 0); | 5209 | curr->sched_class->task_tick(rq, curr, 0); |
5194 | spin_unlock(&rq->lock); | 5210 | spin_unlock(&rq->lock); |
5195 | 5211 | ||
5196 | perf_counter_task_tick(curr, cpu); | 5212 | perf_event_task_tick(curr, cpu); |
5197 | 5213 | ||
5198 | #ifdef CONFIG_SMP | 5214 | #ifdef CONFIG_SMP |
5199 | rq->idle_at_tick = idle_cpu(cpu); | 5215 | rq->idle_at_tick = idle_cpu(cpu); |
@@ -5409,7 +5425,7 @@ need_resched_nonpreemptible: | |||
5409 | 5425 | ||
5410 | if (likely(prev != next)) { | 5426 | if (likely(prev != next)) { |
5411 | sched_info_switch(prev, next); | 5427 | sched_info_switch(prev, next); |
5412 | perf_counter_task_sched_out(prev, next, cpu); | 5428 | perf_event_task_sched_out(prev, next, cpu); |
5413 | 5429 | ||
5414 | rq->nr_switches++; | 5430 | rq->nr_switches++; |
5415 | rq->curr = next; | 5431 | rq->curr = next; |
@@ -6708,9 +6724,6 @@ EXPORT_SYMBOL(yield); | |||
6708 | /* | 6724 | /* |
6709 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 6725 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
6710 | * that process accounting knows that this is a task in IO wait state. | 6726 | * that process accounting knows that this is a task in IO wait state. |
6711 | * | ||
6712 | * But don't do that if it is a deliberate, throttling IO wait (this task | ||
6713 | * has set its backing_dev_info: the queue against which it should throttle) | ||
6714 | */ | 6727 | */ |
6715 | void __sched io_schedule(void) | 6728 | void __sched io_schedule(void) |
6716 | { | 6729 | { |
@@ -7671,7 +7684,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7671 | /* | 7684 | /* |
7672 | * Register at high priority so that task migration (migrate_all_tasks) | 7685 | * Register at high priority so that task migration (migrate_all_tasks) |
7673 | * happens before everything else. This has to be lower priority than | 7686 | * happens before everything else. This has to be lower priority than |
7674 | * the notifier in the perf_counter subsystem, though. | 7687 | * the notifier in the perf_event subsystem, though. |
7675 | */ | 7688 | */ |
7676 | static struct notifier_block __cpuinitdata migration_notifier = { | 7689 | static struct notifier_block __cpuinitdata migration_notifier = { |
7677 | .notifier_call = migration_call, | 7690 | .notifier_call = migration_call, |
@@ -9524,7 +9537,7 @@ void __init sched_init(void) | |||
9524 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9537 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
9525 | #endif /* SMP */ | 9538 | #endif /* SMP */ |
9526 | 9539 | ||
9527 | perf_counter_init(); | 9540 | perf_event_init(); |
9528 | 9541 | ||
9529 | scheduler_running = 1; | 9542 | scheduler_running = 1; |
9530 | } | 9543 | } |
@@ -10296,7 +10309,7 @@ static int sched_rt_global_constraints(void) | |||
10296 | #endif /* CONFIG_RT_GROUP_SCHED */ | 10309 | #endif /* CONFIG_RT_GROUP_SCHED */ |
10297 | 10310 | ||
10298 | int sched_rt_handler(struct ctl_table *table, int write, | 10311 | int sched_rt_handler(struct ctl_table *table, int write, |
10299 | struct file *filp, void __user *buffer, size_t *lenp, | 10312 | void __user *buffer, size_t *lenp, |
10300 | loff_t *ppos) | 10313 | loff_t *ppos) |
10301 | { | 10314 | { |
10302 | int ret; | 10315 | int ret; |
@@ -10307,7 +10320,7 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
10307 | old_period = sysctl_sched_rt_period; | 10320 | old_period = sysctl_sched_rt_period; |
10308 | old_runtime = sysctl_sched_rt_runtime; | 10321 | old_runtime = sysctl_sched_rt_runtime; |
10309 | 10322 | ||
10310 | ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 10323 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
10311 | 10324 | ||
10312 | if (!ret && write) { | 10325 | if (!ret && write) { |
10313 | ret = sched_rt_global_constraints(); | 10326 | ret = sched_rt_global_constraints(); |
@@ -10361,8 +10374,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
10361 | } | 10374 | } |
10362 | 10375 | ||
10363 | static int | 10376 | static int |
10364 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 10377 | cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
10365 | struct task_struct *tsk) | ||
10366 | { | 10378 | { |
10367 | #ifdef CONFIG_RT_GROUP_SCHED | 10379 | #ifdef CONFIG_RT_GROUP_SCHED |
10368 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) | 10380 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) |
@@ -10372,15 +10384,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
10372 | if (tsk->sched_class != &fair_sched_class) | 10384 | if (tsk->sched_class != &fair_sched_class) |
10373 | return -EINVAL; | 10385 | return -EINVAL; |
10374 | #endif | 10386 | #endif |
10387 | return 0; | ||
10388 | } | ||
10375 | 10389 | ||
10390 | static int | ||
10391 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
10392 | struct task_struct *tsk, bool threadgroup) | ||
10393 | { | ||
10394 | int retval = cpu_cgroup_can_attach_task(cgrp, tsk); | ||
10395 | if (retval) | ||
10396 | return retval; | ||
10397 | if (threadgroup) { | ||
10398 | struct task_struct *c; | ||
10399 | rcu_read_lock(); | ||
10400 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
10401 | retval = cpu_cgroup_can_attach_task(cgrp, c); | ||
10402 | if (retval) { | ||
10403 | rcu_read_unlock(); | ||
10404 | return retval; | ||
10405 | } | ||
10406 | } | ||
10407 | rcu_read_unlock(); | ||
10408 | } | ||
10376 | return 0; | 10409 | return 0; |
10377 | } | 10410 | } |
10378 | 10411 | ||
10379 | static void | 10412 | static void |
10380 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 10413 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
10381 | struct cgroup *old_cont, struct task_struct *tsk) | 10414 | struct cgroup *old_cont, struct task_struct *tsk, |
10415 | bool threadgroup) | ||
10382 | { | 10416 | { |
10383 | sched_move_task(tsk); | 10417 | sched_move_task(tsk); |
10418 | if (threadgroup) { | ||
10419 | struct task_struct *c; | ||
10420 | rcu_read_lock(); | ||
10421 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
10422 | sched_move_task(c); | ||
10423 | } | ||
10424 | rcu_read_unlock(); | ||
10425 | } | ||
10384 | } | 10426 | } |
10385 | 10427 | ||
10386 | #ifdef CONFIG_FAIR_GROUP_SCHED | 10428 | #ifdef CONFIG_FAIR_GROUP_SCHED |