aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c110
1 files changed, 76 insertions, 34 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index a56446d7fda2..e5205811c19e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -39,7 +39,7 @@
39#include <linux/completion.h> 39#include <linux/completion.h>
40#include <linux/kernel_stat.h> 40#include <linux/kernel_stat.h>
41#include <linux/debug_locks.h> 41#include <linux/debug_locks.h>
42#include <linux/perf_counter.h> 42#include <linux/perf_event.h>
43#include <linux/security.h> 43#include <linux/security.h>
44#include <linux/notifier.h> 44#include <linux/notifier.h>
45#include <linux/profile.h> 45#include <linux/profile.h>
@@ -676,6 +676,7 @@ inline void update_rq_clock(struct rq *rq)
676 676
677/** 677/**
678 * runqueue_is_locked 678 * runqueue_is_locked
679 * @cpu: the processor in question.
679 * 680 *
680 * Returns true if the current cpu runqueue is locked. 681 * Returns true if the current cpu runqueue is locked.
681 * This interface allows printk to be called with the runqueue lock 682 * This interface allows printk to be called with the runqueue lock
@@ -780,7 +781,7 @@ static int sched_feat_open(struct inode *inode, struct file *filp)
780 return single_open(filp, sched_feat_show, NULL); 781 return single_open(filp, sched_feat_show, NULL);
781} 782}
782 783
783static struct file_operations sched_feat_fops = { 784static const struct file_operations sched_feat_fops = {
784 .open = sched_feat_open, 785 .open = sched_feat_open,
785 .write = sched_feat_write, 786 .write = sched_feat_write,
786 .read = seq_read, 787 .read = seq_read,
@@ -2053,7 +2054,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2053 if (task_hot(p, old_rq->clock, NULL)) 2054 if (task_hot(p, old_rq->clock, NULL))
2054 schedstat_inc(p, se.nr_forced2_migrations); 2055 schedstat_inc(p, se.nr_forced2_migrations);
2055#endif 2056#endif
2056 perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, 2057 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
2057 1, 1, NULL, 0); 2058 1, 1, NULL, 0);
2058 } 2059 }
2059 p->se.vruntime -= old_cfsrq->min_vruntime - 2060 p->se.vruntime -= old_cfsrq->min_vruntime -
@@ -2311,7 +2312,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2311{ 2312{
2312 int cpu, orig_cpu, this_cpu, success = 0; 2313 int cpu, orig_cpu, this_cpu, success = 0;
2313 unsigned long flags; 2314 unsigned long flags;
2314 struct rq *rq; 2315 struct rq *rq, *orig_rq;
2315 2316
2316 if (!sched_feat(SYNC_WAKEUPS)) 2317 if (!sched_feat(SYNC_WAKEUPS))
2317 wake_flags &= ~WF_SYNC; 2318 wake_flags &= ~WF_SYNC;
@@ -2319,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2319 this_cpu = get_cpu(); 2320 this_cpu = get_cpu();
2320 2321
2321 smp_wmb(); 2322 smp_wmb();
2322 rq = task_rq_lock(p, &flags); 2323 rq = orig_rq = task_rq_lock(p, &flags);
2323 update_rq_clock(rq); 2324 update_rq_clock(rq);
2324 if (!(p->state & state)) 2325 if (!(p->state & state))
2325 goto out; 2326 goto out;
@@ -2350,6 +2351,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2350 set_task_cpu(p, cpu); 2351 set_task_cpu(p, cpu);
2351 2352
2352 rq = task_rq_lock(p, &flags); 2353 rq = task_rq_lock(p, &flags);
2354
2355 if (rq != orig_rq)
2356 update_rq_clock(rq);
2357
2353 WARN_ON(p->state != TASK_WAKING); 2358 WARN_ON(p->state != TASK_WAKING);
2354 cpu = task_cpu(p); 2359 cpu = task_cpu(p);
2355 2360
@@ -2515,22 +2520,17 @@ void sched_fork(struct task_struct *p, int clone_flags)
2515 __sched_fork(p); 2520 __sched_fork(p);
2516 2521
2517 /* 2522 /*
2518 * Make sure we do not leak PI boosting priority to the child.
2519 */
2520 p->prio = current->normal_prio;
2521
2522 /*
2523 * Revert to default priority/policy on fork if requested. 2523 * Revert to default priority/policy on fork if requested.
2524 */ 2524 */
2525 if (unlikely(p->sched_reset_on_fork)) { 2525 if (unlikely(p->sched_reset_on_fork)) {
2526 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) 2526 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
2527 p->policy = SCHED_NORMAL; 2527 p->policy = SCHED_NORMAL;
2528 2528 p->normal_prio = p->static_prio;
2529 if (p->normal_prio < DEFAULT_PRIO) 2529 }
2530 p->prio = DEFAULT_PRIO;
2531 2530
2532 if (PRIO_TO_NICE(p->static_prio) < 0) { 2531 if (PRIO_TO_NICE(p->static_prio) < 0) {
2533 p->static_prio = NICE_TO_PRIO(0); 2532 p->static_prio = NICE_TO_PRIO(0);
2533 p->normal_prio = p->static_prio;
2534 set_load_weight(p); 2534 set_load_weight(p);
2535 } 2535 }
2536 2536
@@ -2541,6 +2541,11 @@ void sched_fork(struct task_struct *p, int clone_flags)
2541 p->sched_reset_on_fork = 0; 2541 p->sched_reset_on_fork = 0;
2542 } 2542 }
2543 2543
2544 /*
2545 * Make sure we do not leak PI boosting priority to the child.
2546 */
2547 p->prio = current->normal_prio;
2548
2544 if (!rt_prio(p->prio)) 2549 if (!rt_prio(p->prio))
2545 p->sched_class = &fair_sched_class; 2550 p->sched_class = &fair_sched_class;
2546 2551
@@ -2581,8 +2586,6 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2581 BUG_ON(p->state != TASK_RUNNING); 2586 BUG_ON(p->state != TASK_RUNNING);
2582 update_rq_clock(rq); 2587 update_rq_clock(rq);
2583 2588
2584 p->prio = effective_prio(p);
2585
2586 if (!p->sched_class->task_new || !current->se.on_rq) { 2589 if (!p->sched_class->task_new || !current->se.on_rq) {
2587 activate_task(rq, p, 0); 2590 activate_task(rq, p, 0);
2588 } else { 2591 } else {
@@ -2718,7 +2721,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2718 */ 2721 */
2719 prev_state = prev->state; 2722 prev_state = prev->state;
2720 finish_arch_switch(prev); 2723 finish_arch_switch(prev);
2721 perf_counter_task_sched_in(current, cpu_of(rq)); 2724 perf_event_task_sched_in(current, cpu_of(rq));
2722 finish_lock_switch(rq, prev); 2725 finish_lock_switch(rq, prev);
2723 2726
2724 fire_sched_in_preempt_notifiers(current); 2727 fire_sched_in_preempt_notifiers(current);
@@ -2904,6 +2907,19 @@ unsigned long nr_iowait(void)
2904 return sum; 2907 return sum;
2905} 2908}
2906 2909
2910unsigned long nr_iowait_cpu(void)
2911{
2912 struct rq *this = this_rq();
2913 return atomic_read(&this->nr_iowait);
2914}
2915
2916unsigned long this_cpu_load(void)
2917{
2918 struct rq *this = this_rq();
2919 return this->cpu_load[0];
2920}
2921
2922
2907/* Variables and functions for calc_load */ 2923/* Variables and functions for calc_load */
2908static atomic_long_t calc_load_tasks; 2924static atomic_long_t calc_load_tasks;
2909static unsigned long calc_load_update; 2925static unsigned long calc_load_update;
@@ -3645,6 +3661,7 @@ static void update_group_power(struct sched_domain *sd, int cpu)
3645 3661
3646/** 3662/**
3647 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 3663 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
3664 * @sd: The sched_domain whose statistics are to be updated.
3648 * @group: sched_group whose statistics are to be updated. 3665 * @group: sched_group whose statistics are to be updated.
3649 * @this_cpu: Cpu for which load balance is currently performed. 3666 * @this_cpu: Cpu for which load balance is currently performed.
3650 * @idle: Idle status of this_cpu 3667 * @idle: Idle status of this_cpu
@@ -5079,17 +5096,16 @@ void account_idle_time(cputime_t cputime)
5079 */ 5096 */
5080void account_process_tick(struct task_struct *p, int user_tick) 5097void account_process_tick(struct task_struct *p, int user_tick)
5081{ 5098{
5082 cputime_t one_jiffy = jiffies_to_cputime(1); 5099 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
5083 cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
5084 struct rq *rq = this_rq(); 5100 struct rq *rq = this_rq();
5085 5101
5086 if (user_tick) 5102 if (user_tick)
5087 account_user_time(p, one_jiffy, one_jiffy_scaled); 5103 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
5088 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 5104 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
5089 account_system_time(p, HARDIRQ_OFFSET, one_jiffy, 5105 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
5090 one_jiffy_scaled); 5106 one_jiffy_scaled);
5091 else 5107 else
5092 account_idle_time(one_jiffy); 5108 account_idle_time(cputime_one_jiffy);
5093} 5109}
5094 5110
5095/* 5111/*
@@ -5193,7 +5209,7 @@ void scheduler_tick(void)
5193 curr->sched_class->task_tick(rq, curr, 0); 5209 curr->sched_class->task_tick(rq, curr, 0);
5194 spin_unlock(&rq->lock); 5210 spin_unlock(&rq->lock);
5195 5211
5196 perf_counter_task_tick(curr, cpu); 5212 perf_event_task_tick(curr, cpu);
5197 5213
5198#ifdef CONFIG_SMP 5214#ifdef CONFIG_SMP
5199 rq->idle_at_tick = idle_cpu(cpu); 5215 rq->idle_at_tick = idle_cpu(cpu);
@@ -5409,7 +5425,7 @@ need_resched_nonpreemptible:
5409 5425
5410 if (likely(prev != next)) { 5426 if (likely(prev != next)) {
5411 sched_info_switch(prev, next); 5427 sched_info_switch(prev, next);
5412 perf_counter_task_sched_out(prev, next, cpu); 5428 perf_event_task_sched_out(prev, next, cpu);
5413 5429
5414 rq->nr_switches++; 5430 rq->nr_switches++;
5415 rq->curr = next; 5431 rq->curr = next;
@@ -6708,9 +6724,6 @@ EXPORT_SYMBOL(yield);
6708/* 6724/*
6709 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 6725 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
6710 * that process accounting knows that this is a task in IO wait state. 6726 * that process accounting knows that this is a task in IO wait state.
6711 *
6712 * But don't do that if it is a deliberate, throttling IO wait (this task
6713 * has set its backing_dev_info: the queue against which it should throttle)
6714 */ 6727 */
6715void __sched io_schedule(void) 6728void __sched io_schedule(void)
6716{ 6729{
@@ -7671,7 +7684,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7671/* 7684/*
7672 * Register at high priority so that task migration (migrate_all_tasks) 7685 * Register at high priority so that task migration (migrate_all_tasks)
7673 * happens before everything else. This has to be lower priority than 7686 * happens before everything else. This has to be lower priority than
7674 * the notifier in the perf_counter subsystem, though. 7687 * the notifier in the perf_event subsystem, though.
7675 */ 7688 */
7676static struct notifier_block __cpuinitdata migration_notifier = { 7689static struct notifier_block __cpuinitdata migration_notifier = {
7677 .notifier_call = migration_call, 7690 .notifier_call = migration_call,
@@ -9524,7 +9537,7 @@ void __init sched_init(void)
9524 alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); 9537 alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
9525#endif /* SMP */ 9538#endif /* SMP */
9526 9539
9527 perf_counter_init(); 9540 perf_event_init();
9528 9541
9529 scheduler_running = 1; 9542 scheduler_running = 1;
9530} 9543}
@@ -10296,7 +10309,7 @@ static int sched_rt_global_constraints(void)
10296#endif /* CONFIG_RT_GROUP_SCHED */ 10309#endif /* CONFIG_RT_GROUP_SCHED */
10297 10310
10298int sched_rt_handler(struct ctl_table *table, int write, 10311int sched_rt_handler(struct ctl_table *table, int write,
10299 struct file *filp, void __user *buffer, size_t *lenp, 10312 void __user *buffer, size_t *lenp,
10300 loff_t *ppos) 10313 loff_t *ppos)
10301{ 10314{
10302 int ret; 10315 int ret;
@@ -10307,7 +10320,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
10307 old_period = sysctl_sched_rt_period; 10320 old_period = sysctl_sched_rt_period;
10308 old_runtime = sysctl_sched_rt_runtime; 10321 old_runtime = sysctl_sched_rt_runtime;
10309 10322
10310 ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); 10323 ret = proc_dointvec(table, write, buffer, lenp, ppos);
10311 10324
10312 if (!ret && write) { 10325 if (!ret && write) {
10313 ret = sched_rt_global_constraints(); 10326 ret = sched_rt_global_constraints();
@@ -10361,8 +10374,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
10361} 10374}
10362 10375
10363static int 10376static int
10364cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 10377cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
10365 struct task_struct *tsk)
10366{ 10378{
10367#ifdef CONFIG_RT_GROUP_SCHED 10379#ifdef CONFIG_RT_GROUP_SCHED
10368 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) 10380 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10372,15 +10384,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10372 if (tsk->sched_class != &fair_sched_class) 10384 if (tsk->sched_class != &fair_sched_class)
10373 return -EINVAL; 10385 return -EINVAL;
10374#endif 10386#endif
10387 return 0;
10388}
10375 10389
10390static int
10391cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10392 struct task_struct *tsk, bool threadgroup)
10393{
10394 int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
10395 if (retval)
10396 return retval;
10397 if (threadgroup) {
10398 struct task_struct *c;
10399 rcu_read_lock();
10400 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
10401 retval = cpu_cgroup_can_attach_task(cgrp, c);
10402 if (retval) {
10403 rcu_read_unlock();
10404 return retval;
10405 }
10406 }
10407 rcu_read_unlock();
10408 }
10376 return 0; 10409 return 0;
10377} 10410}
10378 10411
10379static void 10412static void
10380cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 10413cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10381 struct cgroup *old_cont, struct task_struct *tsk) 10414 struct cgroup *old_cont, struct task_struct *tsk,
10415 bool threadgroup)
10382{ 10416{
10383 sched_move_task(tsk); 10417 sched_move_task(tsk);
10418 if (threadgroup) {
10419 struct task_struct *c;
10420 rcu_read_lock();
10421 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
10422 sched_move_task(c);
10423 }
10424 rcu_read_unlock();
10425 }
10384} 10426}
10385 10427
10386#ifdef CONFIG_FAIR_GROUP_SCHED 10428#ifdef CONFIG_FAIR_GROUP_SCHED