diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 98 |
1 files changed, 66 insertions, 32 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 1535f3884b88..3c11ae0a948d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -309,6 +309,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); | |||
309 | */ | 309 | */ |
310 | static DEFINE_SPINLOCK(task_group_lock); | 310 | static DEFINE_SPINLOCK(task_group_lock); |
311 | 311 | ||
312 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
313 | |||
312 | #ifdef CONFIG_SMP | 314 | #ifdef CONFIG_SMP |
313 | static int root_task_group_empty(void) | 315 | static int root_task_group_empty(void) |
314 | { | 316 | { |
@@ -316,7 +318,6 @@ static int root_task_group_empty(void) | |||
316 | } | 318 | } |
317 | #endif | 319 | #endif |
318 | 320 | ||
319 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
320 | #ifdef CONFIG_USER_SCHED | 321 | #ifdef CONFIG_USER_SCHED |
321 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 322 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
322 | #else /* !CONFIG_USER_SCHED */ | 323 | #else /* !CONFIG_USER_SCHED */ |
@@ -676,6 +677,7 @@ inline void update_rq_clock(struct rq *rq) | |||
676 | 677 | ||
677 | /** | 678 | /** |
678 | * runqueue_is_locked | 679 | * runqueue_is_locked |
680 | * @cpu: the processor in question. | ||
679 | * | 681 | * |
680 | * Returns true if the current cpu runqueue is locked. | 682 | * Returns true if the current cpu runqueue is locked. |
681 | * This interface allows printk to be called with the runqueue lock | 683 | * This interface allows printk to be called with the runqueue lock |
@@ -1563,11 +1565,7 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
1563 | 1565 | ||
1564 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1566 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1565 | 1567 | ||
1566 | struct update_shares_data { | 1568 | static __read_mostly unsigned long *update_shares_data; |
1567 | unsigned long rq_weight[NR_CPUS]; | ||
1568 | }; | ||
1569 | |||
1570 | static DEFINE_PER_CPU(struct update_shares_data, update_shares_data); | ||
1571 | 1569 | ||
1572 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | 1570 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); |
1573 | 1571 | ||
@@ -1577,12 +1575,12 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
1577 | static void update_group_shares_cpu(struct task_group *tg, int cpu, | 1575 | static void update_group_shares_cpu(struct task_group *tg, int cpu, |
1578 | unsigned long sd_shares, | 1576 | unsigned long sd_shares, |
1579 | unsigned long sd_rq_weight, | 1577 | unsigned long sd_rq_weight, |
1580 | struct update_shares_data *usd) | 1578 | unsigned long *usd_rq_weight) |
1581 | { | 1579 | { |
1582 | unsigned long shares, rq_weight; | 1580 | unsigned long shares, rq_weight; |
1583 | int boost = 0; | 1581 | int boost = 0; |
1584 | 1582 | ||
1585 | rq_weight = usd->rq_weight[cpu]; | 1583 | rq_weight = usd_rq_weight[cpu]; |
1586 | if (!rq_weight) { | 1584 | if (!rq_weight) { |
1587 | boost = 1; | 1585 | boost = 1; |
1588 | rq_weight = NICE_0_LOAD; | 1586 | rq_weight = NICE_0_LOAD; |
@@ -1617,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1617 | static int tg_shares_up(struct task_group *tg, void *data) | 1615 | static int tg_shares_up(struct task_group *tg, void *data) |
1618 | { | 1616 | { |
1619 | unsigned long weight, rq_weight = 0, shares = 0; | 1617 | unsigned long weight, rq_weight = 0, shares = 0; |
1620 | struct update_shares_data *usd; | 1618 | unsigned long *usd_rq_weight; |
1621 | struct sched_domain *sd = data; | 1619 | struct sched_domain *sd = data; |
1622 | unsigned long flags; | 1620 | unsigned long flags; |
1623 | int i; | 1621 | int i; |
@@ -1626,11 +1624,11 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1626 | return 0; | 1624 | return 0; |
1627 | 1625 | ||
1628 | local_irq_save(flags); | 1626 | local_irq_save(flags); |
1629 | usd = &__get_cpu_var(update_shares_data); | 1627 | usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); |
1630 | 1628 | ||
1631 | for_each_cpu(i, sched_domain_span(sd)) { | 1629 | for_each_cpu(i, sched_domain_span(sd)) { |
1632 | weight = tg->cfs_rq[i]->load.weight; | 1630 | weight = tg->cfs_rq[i]->load.weight; |
1633 | usd->rq_weight[i] = weight; | 1631 | usd_rq_weight[i] = weight; |
1634 | 1632 | ||
1635 | /* | 1633 | /* |
1636 | * If there are currently no tasks on the cpu pretend there | 1634 | * If there are currently no tasks on the cpu pretend there |
@@ -1651,7 +1649,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1651 | shares = tg->shares; | 1649 | shares = tg->shares; |
1652 | 1650 | ||
1653 | for_each_cpu(i, sched_domain_span(sd)) | 1651 | for_each_cpu(i, sched_domain_span(sd)) |
1654 | update_group_shares_cpu(tg, i, shares, rq_weight, usd); | 1652 | update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); |
1655 | 1653 | ||
1656 | local_irq_restore(flags); | 1654 | local_irq_restore(flags); |
1657 | 1655 | ||
@@ -1995,6 +1993,38 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1995 | p->sched_class->prio_changed(rq, p, oldprio, running); | 1993 | p->sched_class->prio_changed(rq, p, oldprio, running); |
1996 | } | 1994 | } |
1997 | 1995 | ||
1996 | /** | ||
1997 | * kthread_bind - bind a just-created kthread to a cpu. | ||
1998 | * @p: thread created by kthread_create(). | ||
1999 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
2000 | * | ||
2001 | * Description: This function is equivalent to set_cpus_allowed(), | ||
2002 | * except that @cpu doesn't need to be online, and the thread must be | ||
2003 | * stopped (i.e., just returned from kthread_create()). | ||
2004 | * | ||
2005 | * Function lives here instead of kthread.c because it messes with | ||
2006 | * scheduler internals which require locking. | ||
2007 | */ | ||
2008 | void kthread_bind(struct task_struct *p, unsigned int cpu) | ||
2009 | { | ||
2010 | struct rq *rq = cpu_rq(cpu); | ||
2011 | unsigned long flags; | ||
2012 | |||
2013 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
2014 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | ||
2015 | WARN_ON(1); | ||
2016 | return; | ||
2017 | } | ||
2018 | |||
2019 | spin_lock_irqsave(&rq->lock, flags); | ||
2020 | set_task_cpu(p, cpu); | ||
2021 | p->cpus_allowed = cpumask_of_cpu(cpu); | ||
2022 | p->rt.nr_cpus_allowed = 1; | ||
2023 | p->flags |= PF_THREAD_BOUND; | ||
2024 | spin_unlock_irqrestore(&rq->lock, flags); | ||
2025 | } | ||
2026 | EXPORT_SYMBOL(kthread_bind); | ||
2027 | |||
1998 | #ifdef CONFIG_SMP | 2028 | #ifdef CONFIG_SMP |
1999 | /* | 2029 | /* |
2000 | * Is this task likely cache-hot: | 2030 | * Is this task likely cache-hot: |
@@ -2007,7 +2037,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2007 | /* | 2037 | /* |
2008 | * Buddy candidates are cache hot: | 2038 | * Buddy candidates are cache hot: |
2009 | */ | 2039 | */ |
2010 | if (sched_feat(CACHE_HOT_BUDDY) && | 2040 | if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && |
2011 | (&p->se == cfs_rq_of(&p->se)->next || | 2041 | (&p->se == cfs_rq_of(&p->se)->next || |
2012 | &p->se == cfs_rq_of(&p->se)->last)) | 2042 | &p->se == cfs_rq_of(&p->se)->last)) |
2013 | return 1; | 2043 | return 1; |
@@ -2311,7 +2341,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2311 | { | 2341 | { |
2312 | int cpu, orig_cpu, this_cpu, success = 0; | 2342 | int cpu, orig_cpu, this_cpu, success = 0; |
2313 | unsigned long flags; | 2343 | unsigned long flags; |
2314 | struct rq *rq; | 2344 | struct rq *rq, *orig_rq; |
2315 | 2345 | ||
2316 | if (!sched_feat(SYNC_WAKEUPS)) | 2346 | if (!sched_feat(SYNC_WAKEUPS)) |
2317 | wake_flags &= ~WF_SYNC; | 2347 | wake_flags &= ~WF_SYNC; |
@@ -2319,7 +2349,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2319 | this_cpu = get_cpu(); | 2349 | this_cpu = get_cpu(); |
2320 | 2350 | ||
2321 | smp_wmb(); | 2351 | smp_wmb(); |
2322 | rq = task_rq_lock(p, &flags); | 2352 | rq = orig_rq = task_rq_lock(p, &flags); |
2323 | update_rq_clock(rq); | 2353 | update_rq_clock(rq); |
2324 | if (!(p->state & state)) | 2354 | if (!(p->state & state)) |
2325 | goto out; | 2355 | goto out; |
@@ -2350,6 +2380,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2350 | set_task_cpu(p, cpu); | 2380 | set_task_cpu(p, cpu); |
2351 | 2381 | ||
2352 | rq = task_rq_lock(p, &flags); | 2382 | rq = task_rq_lock(p, &flags); |
2383 | |||
2384 | if (rq != orig_rq) | ||
2385 | update_rq_clock(rq); | ||
2386 | |||
2353 | WARN_ON(p->state != TASK_WAKING); | 2387 | WARN_ON(p->state != TASK_WAKING); |
2354 | cpu = task_cpu(p); | 2388 | cpu = task_cpu(p); |
2355 | 2389 | ||
@@ -2515,22 +2549,17 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2515 | __sched_fork(p); | 2549 | __sched_fork(p); |
2516 | 2550 | ||
2517 | /* | 2551 | /* |
2518 | * Make sure we do not leak PI boosting priority to the child. | ||
2519 | */ | ||
2520 | p->prio = current->normal_prio; | ||
2521 | |||
2522 | /* | ||
2523 | * Revert to default priority/policy on fork if requested. | 2552 | * Revert to default priority/policy on fork if requested. |
2524 | */ | 2553 | */ |
2525 | if (unlikely(p->sched_reset_on_fork)) { | 2554 | if (unlikely(p->sched_reset_on_fork)) { |
2526 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) | 2555 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { |
2527 | p->policy = SCHED_NORMAL; | 2556 | p->policy = SCHED_NORMAL; |
2528 | 2557 | p->normal_prio = p->static_prio; | |
2529 | if (p->normal_prio < DEFAULT_PRIO) | 2558 | } |
2530 | p->prio = DEFAULT_PRIO; | ||
2531 | 2559 | ||
2532 | if (PRIO_TO_NICE(p->static_prio) < 0) { | 2560 | if (PRIO_TO_NICE(p->static_prio) < 0) { |
2533 | p->static_prio = NICE_TO_PRIO(0); | 2561 | p->static_prio = NICE_TO_PRIO(0); |
2562 | p->normal_prio = p->static_prio; | ||
2534 | set_load_weight(p); | 2563 | set_load_weight(p); |
2535 | } | 2564 | } |
2536 | 2565 | ||
@@ -2541,6 +2570,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2541 | p->sched_reset_on_fork = 0; | 2570 | p->sched_reset_on_fork = 0; |
2542 | } | 2571 | } |
2543 | 2572 | ||
2573 | /* | ||
2574 | * Make sure we do not leak PI boosting priority to the child. | ||
2575 | */ | ||
2576 | p->prio = current->normal_prio; | ||
2577 | |||
2544 | if (!rt_prio(p->prio)) | 2578 | if (!rt_prio(p->prio)) |
2545 | p->sched_class = &fair_sched_class; | 2579 | p->sched_class = &fair_sched_class; |
2546 | 2580 | ||
@@ -2581,8 +2615,6 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2581 | BUG_ON(p->state != TASK_RUNNING); | 2615 | BUG_ON(p->state != TASK_RUNNING); |
2582 | update_rq_clock(rq); | 2616 | update_rq_clock(rq); |
2583 | 2617 | ||
2584 | p->prio = effective_prio(p); | ||
2585 | |||
2586 | if (!p->sched_class->task_new || !current->se.on_rq) { | 2618 | if (!p->sched_class->task_new || !current->se.on_rq) { |
2587 | activate_task(rq, p, 0); | 2619 | activate_task(rq, p, 0); |
2588 | } else { | 2620 | } else { |
@@ -3658,6 +3690,7 @@ static void update_group_power(struct sched_domain *sd, int cpu) | |||
3658 | 3690 | ||
3659 | /** | 3691 | /** |
3660 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. | 3692 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. |
3693 | * @sd: The sched_domain whose statistics are to be updated. | ||
3661 | * @group: sched_group whose statistics are to be updated. | 3694 | * @group: sched_group whose statistics are to be updated. |
3662 | * @this_cpu: Cpu for which load balance is currently performed. | 3695 | * @this_cpu: Cpu for which load balance is currently performed. |
3663 | * @idle: Idle status of this_cpu | 3696 | * @idle: Idle status of this_cpu |
@@ -6720,9 +6753,6 @@ EXPORT_SYMBOL(yield); | |||
6720 | /* | 6753 | /* |
6721 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 6754 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
6722 | * that process accounting knows that this is a task in IO wait state. | 6755 | * that process accounting knows that this is a task in IO wait state. |
6723 | * | ||
6724 | * But don't do that if it is a deliberate, throttling IO wait (this task | ||
6725 | * has set its backing_dev_info: the queue against which it should throttle) | ||
6726 | */ | 6756 | */ |
6727 | void __sched io_schedule(void) | 6757 | void __sched io_schedule(void) |
6728 | { | 6758 | { |
@@ -9406,6 +9436,10 @@ void __init sched_init(void) | |||
9406 | #endif /* CONFIG_USER_SCHED */ | 9436 | #endif /* CONFIG_USER_SCHED */ |
9407 | #endif /* CONFIG_GROUP_SCHED */ | 9437 | #endif /* CONFIG_GROUP_SCHED */ |
9408 | 9438 | ||
9439 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP | ||
9440 | update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), | ||
9441 | __alignof__(unsigned long)); | ||
9442 | #endif | ||
9409 | for_each_possible_cpu(i) { | 9443 | for_each_possible_cpu(i) { |
9410 | struct rq *rq; | 9444 | struct rq *rq; |
9411 | 9445 | ||
@@ -9531,13 +9565,13 @@ void __init sched_init(void) | |||
9531 | current->sched_class = &fair_sched_class; | 9565 | current->sched_class = &fair_sched_class; |
9532 | 9566 | ||
9533 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | 9567 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ |
9534 | alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | 9568 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
9535 | #ifdef CONFIG_SMP | 9569 | #ifdef CONFIG_SMP |
9536 | #ifdef CONFIG_NO_HZ | 9570 | #ifdef CONFIG_NO_HZ |
9537 | alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); | 9571 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9538 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | 9572 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); |
9539 | #endif | 9573 | #endif |
9540 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9574 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
9541 | #endif /* SMP */ | 9575 | #endif /* SMP */ |
9542 | 9576 | ||
9543 | perf_event_init(); | 9577 | perf_event_init(); |