aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c145
1 files changed, 103 insertions, 42 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 0ac9053c21d6..28dd4f490bfc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -676,6 +676,7 @@ inline void update_rq_clock(struct rq *rq)
676 676
677/** 677/**
678 * runqueue_is_locked 678 * runqueue_is_locked
679 * @cpu: the processor in question.
679 * 680 *
680 * Returns true if the current cpu runqueue is locked. 681 * Returns true if the current cpu runqueue is locked.
681 * This interface allows printk to be called with the runqueue lock 682 * This interface allows printk to be called with the runqueue lock
@@ -780,7 +781,7 @@ static int sched_feat_open(struct inode *inode, struct file *filp)
780 return single_open(filp, sched_feat_show, NULL); 781 return single_open(filp, sched_feat_show, NULL);
781} 782}
782 783
783static struct file_operations sched_feat_fops = { 784static const struct file_operations sched_feat_fops = {
784 .open = sched_feat_open, 785 .open = sched_feat_open,
785 .write = sched_feat_write, 786 .write = sched_feat_write,
786 .read = seq_read, 787 .read = seq_read,
@@ -1563,11 +1564,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1563 1564
1564#ifdef CONFIG_FAIR_GROUP_SCHED 1565#ifdef CONFIG_FAIR_GROUP_SCHED
1565 1566
1566struct update_shares_data { 1567static __read_mostly unsigned long *update_shares_data;
1567 unsigned long rq_weight[NR_CPUS];
1568};
1569
1570static DEFINE_PER_CPU(struct update_shares_data, update_shares_data);
1571 1568
1572static void __set_se_shares(struct sched_entity *se, unsigned long shares); 1569static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1573 1570
@@ -1577,12 +1574,12 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1577static void update_group_shares_cpu(struct task_group *tg, int cpu, 1574static void update_group_shares_cpu(struct task_group *tg, int cpu,
1578 unsigned long sd_shares, 1575 unsigned long sd_shares,
1579 unsigned long sd_rq_weight, 1576 unsigned long sd_rq_weight,
1580 struct update_shares_data *usd) 1577 unsigned long *usd_rq_weight)
1581{ 1578{
1582 unsigned long shares, rq_weight; 1579 unsigned long shares, rq_weight;
1583 int boost = 0; 1580 int boost = 0;
1584 1581
1585 rq_weight = usd->rq_weight[cpu]; 1582 rq_weight = usd_rq_weight[cpu];
1586 if (!rq_weight) { 1583 if (!rq_weight) {
1587 boost = 1; 1584 boost = 1;
1588 rq_weight = NICE_0_LOAD; 1585 rq_weight = NICE_0_LOAD;
@@ -1617,7 +1614,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
1617static int tg_shares_up(struct task_group *tg, void *data) 1614static int tg_shares_up(struct task_group *tg, void *data)
1618{ 1615{
1619 unsigned long weight, rq_weight = 0, shares = 0; 1616 unsigned long weight, rq_weight = 0, shares = 0;
1620 struct update_shares_data *usd; 1617 unsigned long *usd_rq_weight;
1621 struct sched_domain *sd = data; 1618 struct sched_domain *sd = data;
1622 unsigned long flags; 1619 unsigned long flags;
1623 int i; 1620 int i;
@@ -1626,11 +1623,11 @@ static int tg_shares_up(struct task_group *tg, void *data)
1626 return 0; 1623 return 0;
1627 1624
1628 local_irq_save(flags); 1625 local_irq_save(flags);
1629 usd = &__get_cpu_var(update_shares_data); 1626 usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
1630 1627
1631 for_each_cpu(i, sched_domain_span(sd)) { 1628 for_each_cpu(i, sched_domain_span(sd)) {
1632 weight = tg->cfs_rq[i]->load.weight; 1629 weight = tg->cfs_rq[i]->load.weight;
1633 usd->rq_weight[i] = weight; 1630 usd_rq_weight[i] = weight;
1634 1631
1635 /* 1632 /*
1636 * If there are currently no tasks on the cpu pretend there 1633 * If there are currently no tasks on the cpu pretend there
@@ -1651,7 +1648,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1651 shares = tg->shares; 1648 shares = tg->shares;
1652 1649
1653 for_each_cpu(i, sched_domain_span(sd)) 1650 for_each_cpu(i, sched_domain_span(sd))
1654 update_group_shares_cpu(tg, i, shares, rq_weight, usd); 1651 update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
1655 1652
1656 local_irq_restore(flags); 1653 local_irq_restore(flags);
1657 1654
@@ -1995,6 +1992,38 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1995 p->sched_class->prio_changed(rq, p, oldprio, running); 1992 p->sched_class->prio_changed(rq, p, oldprio, running);
1996} 1993}
1997 1994
1995/**
1996 * kthread_bind - bind a just-created kthread to a cpu.
1997 * @k: thread created by kthread_create().
1998 * @cpu: cpu (might not be online, must be possible) for @k to run on.
1999 *
2000 * Description: This function is equivalent to set_cpus_allowed(),
2001 * except that @cpu doesn't need to be online, and the thread must be
2002 * stopped (i.e., just returned from kthread_create()).
2003 *
2004 * Function lives here instead of kthread.c because it messes with
2005 * scheduler internals which require locking.
2006 */
2007void kthread_bind(struct task_struct *p, unsigned int cpu)
2008{
2009 struct rq *rq = cpu_rq(cpu);
2010 unsigned long flags;
2011
2012 /* Must have done schedule() in kthread() before we set_task_cpu */
2013 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
2014 WARN_ON(1);
2015 return;
2016 }
2017
2018 spin_lock_irqsave(&rq->lock, flags);
2019 set_task_cpu(p, cpu);
2020 p->cpus_allowed = cpumask_of_cpu(cpu);
2021 p->rt.nr_cpus_allowed = 1;
2022 p->flags |= PF_THREAD_BOUND;
2023 spin_unlock_irqrestore(&rq->lock, flags);
2024}
2025EXPORT_SYMBOL(kthread_bind);
2026
1998#ifdef CONFIG_SMP 2027#ifdef CONFIG_SMP
1999/* 2028/*
2000 * Is this task likely cache-hot: 2029 * Is this task likely cache-hot:
@@ -2007,7 +2036,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2007 /* 2036 /*
2008 * Buddy candidates are cache hot: 2037 * Buddy candidates are cache hot:
2009 */ 2038 */
2010 if (sched_feat(CACHE_HOT_BUDDY) && 2039 if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
2011 (&p->se == cfs_rq_of(&p->se)->next || 2040 (&p->se == cfs_rq_of(&p->se)->next ||
2012 &p->se == cfs_rq_of(&p->se)->last)) 2041 &p->se == cfs_rq_of(&p->se)->last))
2013 return 1; 2042 return 1;
@@ -2311,7 +2340,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2311{ 2340{
2312 int cpu, orig_cpu, this_cpu, success = 0; 2341 int cpu, orig_cpu, this_cpu, success = 0;
2313 unsigned long flags; 2342 unsigned long flags;
2314 struct rq *rq; 2343 struct rq *rq, *orig_rq;
2315 2344
2316 if (!sched_feat(SYNC_WAKEUPS)) 2345 if (!sched_feat(SYNC_WAKEUPS))
2317 wake_flags &= ~WF_SYNC; 2346 wake_flags &= ~WF_SYNC;
@@ -2319,7 +2348,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2319 this_cpu = get_cpu(); 2348 this_cpu = get_cpu();
2320 2349
2321 smp_wmb(); 2350 smp_wmb();
2322 rq = task_rq_lock(p, &flags); 2351 rq = orig_rq = task_rq_lock(p, &flags);
2323 update_rq_clock(rq); 2352 update_rq_clock(rq);
2324 if (!(p->state & state)) 2353 if (!(p->state & state))
2325 goto out; 2354 goto out;
@@ -2350,6 +2379,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2350 set_task_cpu(p, cpu); 2379 set_task_cpu(p, cpu);
2351 2380
2352 rq = task_rq_lock(p, &flags); 2381 rq = task_rq_lock(p, &flags);
2382
2383 if (rq != orig_rq)
2384 update_rq_clock(rq);
2385
2353 WARN_ON(p->state != TASK_WAKING); 2386 WARN_ON(p->state != TASK_WAKING);
2354 cpu = task_cpu(p); 2387 cpu = task_cpu(p);
2355 2388
@@ -2515,22 +2548,17 @@ void sched_fork(struct task_struct *p, int clone_flags)
2515 __sched_fork(p); 2548 __sched_fork(p);
2516 2549
2517 /* 2550 /*
2518 * Make sure we do not leak PI boosting priority to the child.
2519 */
2520 p->prio = current->normal_prio;
2521
2522 /*
2523 * Revert to default priority/policy on fork if requested. 2551 * Revert to default priority/policy on fork if requested.
2524 */ 2552 */
2525 if (unlikely(p->sched_reset_on_fork)) { 2553 if (unlikely(p->sched_reset_on_fork)) {
2526 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) 2554 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
2527 p->policy = SCHED_NORMAL; 2555 p->policy = SCHED_NORMAL;
2528 2556 p->normal_prio = p->static_prio;
2529 if (p->normal_prio < DEFAULT_PRIO) 2557 }
2530 p->prio = DEFAULT_PRIO;
2531 2558
2532 if (PRIO_TO_NICE(p->static_prio) < 0) { 2559 if (PRIO_TO_NICE(p->static_prio) < 0) {
2533 p->static_prio = NICE_TO_PRIO(0); 2560 p->static_prio = NICE_TO_PRIO(0);
2561 p->normal_prio = p->static_prio;
2534 set_load_weight(p); 2562 set_load_weight(p);
2535 } 2563 }
2536 2564
@@ -2541,6 +2569,11 @@ void sched_fork(struct task_struct *p, int clone_flags)
2541 p->sched_reset_on_fork = 0; 2569 p->sched_reset_on_fork = 0;
2542 } 2570 }
2543 2571
2572 /*
2573 * Make sure we do not leak PI boosting priority to the child.
2574 */
2575 p->prio = current->normal_prio;
2576
2544 if (!rt_prio(p->prio)) 2577 if (!rt_prio(p->prio))
2545 p->sched_class = &fair_sched_class; 2578 p->sched_class = &fair_sched_class;
2546 2579
@@ -2581,8 +2614,6 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2581 BUG_ON(p->state != TASK_RUNNING); 2614 BUG_ON(p->state != TASK_RUNNING);
2582 update_rq_clock(rq); 2615 update_rq_clock(rq);
2583 2616
2584 p->prio = effective_prio(p);
2585
2586 if (!p->sched_class->task_new || !current->se.on_rq) { 2617 if (!p->sched_class->task_new || !current->se.on_rq) {
2587 activate_task(rq, p, 0); 2618 activate_task(rq, p, 0);
2588 } else { 2619 } else {
@@ -3658,6 +3689,7 @@ static void update_group_power(struct sched_domain *sd, int cpu)
3658 3689
3659/** 3690/**
3660 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 3691 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
3692 * @sd: The sched_domain whose statistics are to be updated.
3661 * @group: sched_group whose statistics are to be updated. 3693 * @group: sched_group whose statistics are to be updated.
3662 * @this_cpu: Cpu for which load balance is currently performed. 3694 * @this_cpu: Cpu for which load balance is currently performed.
3663 * @idle: Idle status of this_cpu 3695 * @idle: Idle status of this_cpu
@@ -5092,17 +5124,16 @@ void account_idle_time(cputime_t cputime)
5092 */ 5124 */
5093void account_process_tick(struct task_struct *p, int user_tick) 5125void account_process_tick(struct task_struct *p, int user_tick)
5094{ 5126{
5095 cputime_t one_jiffy = jiffies_to_cputime(1); 5127 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
5096 cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
5097 struct rq *rq = this_rq(); 5128 struct rq *rq = this_rq();
5098 5129
5099 if (user_tick) 5130 if (user_tick)
5100 account_user_time(p, one_jiffy, one_jiffy_scaled); 5131 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
5101 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 5132 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
5102 account_system_time(p, HARDIRQ_OFFSET, one_jiffy, 5133 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
5103 one_jiffy_scaled); 5134 one_jiffy_scaled);
5104 else 5135 else
5105 account_idle_time(one_jiffy); 5136 account_idle_time(cputime_one_jiffy);
5106} 5137}
5107 5138
5108/* 5139/*
@@ -6721,9 +6752,6 @@ EXPORT_SYMBOL(yield);
6721/* 6752/*
6722 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 6753 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
6723 * that process accounting knows that this is a task in IO wait state. 6754 * that process accounting knows that this is a task in IO wait state.
6724 *
6725 * But don't do that if it is a deliberate, throttling IO wait (this task
6726 * has set its backing_dev_info: the queue against which it should throttle)
6727 */ 6755 */
6728void __sched io_schedule(void) 6756void __sched io_schedule(void)
6729{ 6757{
@@ -9407,6 +9435,10 @@ void __init sched_init(void)
9407#endif /* CONFIG_USER_SCHED */ 9435#endif /* CONFIG_USER_SCHED */
9408#endif /* CONFIG_GROUP_SCHED */ 9436#endif /* CONFIG_GROUP_SCHED */
9409 9437
9438#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
9439 update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
9440 __alignof__(unsigned long));
9441#endif
9410 for_each_possible_cpu(i) { 9442 for_each_possible_cpu(i) {
9411 struct rq *rq; 9443 struct rq *rq;
9412 9444
@@ -9532,13 +9564,13 @@ void __init sched_init(void)
9532 current->sched_class = &fair_sched_class; 9564 current->sched_class = &fair_sched_class;
9533 9565
9534 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ 9566 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
9535 alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); 9567 zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
9536#ifdef CONFIG_SMP 9568#ifdef CONFIG_SMP
9537#ifdef CONFIG_NO_HZ 9569#ifdef CONFIG_NO_HZ
9538 alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); 9570 zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
9539 alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); 9571 alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
9540#endif 9572#endif
9541 alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); 9573 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
9542#endif /* SMP */ 9574#endif /* SMP */
9543 9575
9544 perf_event_init(); 9576 perf_event_init();
@@ -10313,7 +10345,7 @@ static int sched_rt_global_constraints(void)
10313#endif /* CONFIG_RT_GROUP_SCHED */ 10345#endif /* CONFIG_RT_GROUP_SCHED */
10314 10346
10315int sched_rt_handler(struct ctl_table *table, int write, 10347int sched_rt_handler(struct ctl_table *table, int write,
10316 struct file *filp, void __user *buffer, size_t *lenp, 10348 void __user *buffer, size_t *lenp,
10317 loff_t *ppos) 10349 loff_t *ppos)
10318{ 10350{
10319 int ret; 10351 int ret;
@@ -10324,7 +10356,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
10324 old_period = sysctl_sched_rt_period; 10356 old_period = sysctl_sched_rt_period;
10325 old_runtime = sysctl_sched_rt_runtime; 10357 old_runtime = sysctl_sched_rt_runtime;
10326 10358
10327 ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); 10359 ret = proc_dointvec(table, write, buffer, lenp, ppos);
10328 10360
10329 if (!ret && write) { 10361 if (!ret && write) {
10330 ret = sched_rt_global_constraints(); 10362 ret = sched_rt_global_constraints();
@@ -10378,8 +10410,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
10378} 10410}
10379 10411
10380static int 10412static int
10381cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 10413cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
10382 struct task_struct *tsk)
10383{ 10414{
10384#ifdef CONFIG_RT_GROUP_SCHED 10415#ifdef CONFIG_RT_GROUP_SCHED
10385 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) 10416 if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10389,15 +10420,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10389 if (tsk->sched_class != &fair_sched_class) 10420 if (tsk->sched_class != &fair_sched_class)
10390 return -EINVAL; 10421 return -EINVAL;
10391#endif 10422#endif
10423 return 0;
10424}
10392 10425
10426static int
10427cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10428 struct task_struct *tsk, bool threadgroup)
10429{
10430 int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
10431 if (retval)
10432 return retval;
10433 if (threadgroup) {
10434 struct task_struct *c;
10435 rcu_read_lock();
10436 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
10437 retval = cpu_cgroup_can_attach_task(cgrp, c);
10438 if (retval) {
10439 rcu_read_unlock();
10440 return retval;
10441 }
10442 }
10443 rcu_read_unlock();
10444 }
10393 return 0; 10445 return 0;
10394} 10446}
10395 10447
10396static void 10448static void
10397cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 10449cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
10398 struct cgroup *old_cont, struct task_struct *tsk) 10450 struct cgroup *old_cont, struct task_struct *tsk,
10451 bool threadgroup)
10399{ 10452{
10400 sched_move_task(tsk); 10453 sched_move_task(tsk);
10454 if (threadgroup) {
10455 struct task_struct *c;
10456 rcu_read_lock();
10457 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
10458 sched_move_task(c);
10459 }
10460 rcu_read_unlock();
10461 }
10401} 10462}
10402 10463
10403#ifdef CONFIG_FAIR_GROUP_SCHED 10464#ifdef CONFIG_FAIR_GROUP_SCHED