aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c556
1 files changed, 339 insertions, 217 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1535f3884b88..ff39cadf621e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -309,6 +309,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
309 */ 309 */
310static DEFINE_SPINLOCK(task_group_lock); 310static DEFINE_SPINLOCK(task_group_lock);
311 311
312#ifdef CONFIG_FAIR_GROUP_SCHED
313
312#ifdef CONFIG_SMP 314#ifdef CONFIG_SMP
313static int root_task_group_empty(void) 315static int root_task_group_empty(void)
314{ 316{
@@ -316,7 +318,6 @@ static int root_task_group_empty(void)
316} 318}
317#endif 319#endif
318 320
319#ifdef CONFIG_FAIR_GROUP_SCHED
320#ifdef CONFIG_USER_SCHED 321#ifdef CONFIG_USER_SCHED
321# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) 322# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
322#else /* !CONFIG_USER_SCHED */ 323#else /* !CONFIG_USER_SCHED */
@@ -534,14 +535,12 @@ struct rq {
534 #define CPU_LOAD_IDX_MAX 5 535 #define CPU_LOAD_IDX_MAX 5
535 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; 536 unsigned long cpu_load[CPU_LOAD_IDX_MAX];
536#ifdef CONFIG_NO_HZ 537#ifdef CONFIG_NO_HZ
537 unsigned long last_tick_seen;
538 unsigned char in_nohz_recently; 538 unsigned char in_nohz_recently;
539#endif 539#endif
540 /* capture load from *all* tasks on this cpu: */ 540 /* capture load from *all* tasks on this cpu: */
541 struct load_weight load; 541 struct load_weight load;
542 unsigned long nr_load_updates; 542 unsigned long nr_load_updates;
543 u64 nr_switches; 543 u64 nr_switches;
544 u64 nr_migrations_in;
545 544
546 struct cfs_rq cfs; 545 struct cfs_rq cfs;
547 struct rt_rq rt; 546 struct rt_rq rt;
@@ -590,6 +589,8 @@ struct rq {
590 589
591 u64 rt_avg; 590 u64 rt_avg;
592 u64 age_stamp; 591 u64 age_stamp;
592 u64 idle_stamp;
593 u64 avg_idle;
593#endif 594#endif
594 595
595 /* calc_load related fields */ 596 /* calc_load related fields */
@@ -676,6 +677,7 @@ inline void update_rq_clock(struct rq *rq)
676 677
677/** 678/**
678 * runqueue_is_locked 679 * runqueue_is_locked
680 * @cpu: the processor in question.
679 * 681 *
680 * Returns true if the current cpu runqueue is locked. 682 * Returns true if the current cpu runqueue is locked.
681 * This interface allows printk to be called with the runqueue lock 683 * This interface allows printk to be called with the runqueue lock
@@ -770,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
770 if (!sched_feat_names[i]) 772 if (!sched_feat_names[i])
771 return -EINVAL; 773 return -EINVAL;
772 774
773 filp->f_pos += cnt; 775 *ppos += cnt;
774 776
775 return cnt; 777 return cnt;
776} 778}
@@ -812,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
812 * default: 0.25ms 814 * default: 0.25ms
813 */ 815 */
814unsigned int sysctl_sched_shares_ratelimit = 250000; 816unsigned int sysctl_sched_shares_ratelimit = 250000;
817unsigned int normalized_sysctl_sched_shares_ratelimit = 250000;
815 818
816/* 819/*
817 * Inject some fuzzyness into changing the per-cpu group shares 820 * Inject some fuzzyness into changing the per-cpu group shares
@@ -1563,11 +1566,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1563 1566
1564#ifdef CONFIG_FAIR_GROUP_SCHED 1567#ifdef CONFIG_FAIR_GROUP_SCHED
1565 1568
1566struct update_shares_data { 1569static __read_mostly unsigned long *update_shares_data;
1567 unsigned long rq_weight[NR_CPUS];
1568};
1569
1570static DEFINE_PER_CPU(struct update_shares_data, update_shares_data);
1571 1570
1572static void __set_se_shares(struct sched_entity *se, unsigned long shares); 1571static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1573 1572
@@ -1577,12 +1576,12 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1577static void update_group_shares_cpu(struct task_group *tg, int cpu, 1576static void update_group_shares_cpu(struct task_group *tg, int cpu,
1578 unsigned long sd_shares, 1577 unsigned long sd_shares,
1579 unsigned long sd_rq_weight, 1578 unsigned long sd_rq_weight,
1580 struct update_shares_data *usd) 1579 unsigned long *usd_rq_weight)
1581{ 1580{
1582 unsigned long shares, rq_weight; 1581 unsigned long shares, rq_weight;
1583 int boost = 0; 1582 int boost = 0;
1584 1583
1585 rq_weight = usd->rq_weight[cpu]; 1584 rq_weight = usd_rq_weight[cpu];
1586 if (!rq_weight) { 1585 if (!rq_weight) {
1587 boost = 1; 1586 boost = 1;
1588 rq_weight = NICE_0_LOAD; 1587 rq_weight = NICE_0_LOAD;
@@ -1616,8 +1615,8 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
1616 */ 1615 */
1617static int tg_shares_up(struct task_group *tg, void *data) 1616static int tg_shares_up(struct task_group *tg, void *data)
1618{ 1617{
1619 unsigned long weight, rq_weight = 0, shares = 0; 1618 unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0;
1620 struct update_shares_data *usd; 1619 unsigned long *usd_rq_weight;
1621 struct sched_domain *sd = data; 1620 struct sched_domain *sd = data;
1622 unsigned long flags; 1621 unsigned long flags;
1623 int i; 1622 int i;
@@ -1626,12 +1625,13 @@ static int tg_shares_up(struct task_group *tg, void *data)
1626 return 0; 1625 return 0;
1627 1626
1628 local_irq_save(flags); 1627 local_irq_save(flags);
1629 usd = &__get_cpu_var(update_shares_data); 1628 usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
1630 1629
1631 for_each_cpu(i, sched_domain_span(sd)) { 1630 for_each_cpu(i, sched_domain_span(sd)) {
1632 weight = tg->cfs_rq[i]->load.weight; 1631 weight = tg->cfs_rq[i]->load.weight;
1633 usd->rq_weight[i] = weight; 1632 usd_rq_weight[i] = weight;
1634 1633
1634 rq_weight += weight;
1635 /* 1635 /*
1636 * If there are currently no tasks on the cpu pretend there 1636 * If there are currently no tasks on the cpu pretend there
1637 * is one of average load so that when a new task gets to 1637 * is one of average load so that when a new task gets to
@@ -1640,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data)
1640 if (!weight) 1640 if (!weight)
1641 weight = NICE_0_LOAD; 1641 weight = NICE_0_LOAD;
1642 1642
1643 rq_weight += weight; 1643 sum_weight += weight;
1644 shares += tg->cfs_rq[i]->shares; 1644 shares += tg->cfs_rq[i]->shares;
1645 } 1645 }
1646 1646
1647 if (!rq_weight)
1648 rq_weight = sum_weight;
1649
1647 if ((!shares && rq_weight) || shares > tg->shares) 1650 if ((!shares && rq_weight) || shares > tg->shares)
1648 shares = tg->shares; 1651 shares = tg->shares;
1649 1652
@@ -1651,7 +1654,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1651 shares = tg->shares; 1654 shares = tg->shares;
1652 1655
1653 for_each_cpu(i, sched_domain_span(sd)) 1656 for_each_cpu(i, sched_domain_span(sd))
1654 update_group_shares_cpu(tg, i, shares, rq_weight, usd); 1657 update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
1655 1658
1656 local_irq_restore(flags); 1659 local_irq_restore(flags);
1657 1660
@@ -1812,6 +1815,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
1812#endif 1815#endif
1813 1816
1814static void calc_load_account_active(struct rq *this_rq); 1817static void calc_load_account_active(struct rq *this_rq);
1818static void update_sysctl(void);
1819static int get_update_sysctl_factor(void);
1820
1821static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1822{
1823 set_task_rq(p, cpu);
1824#ifdef CONFIG_SMP
1825 /*
1826 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1827 * successfuly executed on another CPU. We must ensure that updates of
1828 * per-task data have been completed by this moment.
1829 */
1830 smp_wmb();
1831 task_thread_info(p)->cpu = cpu;
1832#endif
1833}
1815 1834
1816#include "sched_stats.h" 1835#include "sched_stats.h"
1817#include "sched_idletask.c" 1836#include "sched_idletask.c"
@@ -1969,20 +1988,6 @@ inline int task_curr(const struct task_struct *p)
1969 return cpu_curr(task_cpu(p)) == p; 1988 return cpu_curr(task_cpu(p)) == p;
1970} 1989}
1971 1990
1972static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1973{
1974 set_task_rq(p, cpu);
1975#ifdef CONFIG_SMP
1976 /*
1977 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
1978 * successfuly executed on another CPU. We must ensure that updates of
1979 * per-task data have been completed by this moment.
1980 */
1981 smp_wmb();
1982 task_thread_info(p)->cpu = cpu;
1983#endif
1984}
1985
1986static inline void check_class_changed(struct rq *rq, struct task_struct *p, 1991static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1987 const struct sched_class *prev_class, 1992 const struct sched_class *prev_class,
1988 int oldprio, int running) 1993 int oldprio, int running)
@@ -1995,6 +2000,39 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1995 p->sched_class->prio_changed(rq, p, oldprio, running); 2000 p->sched_class->prio_changed(rq, p, oldprio, running);
1996} 2001}
1997 2002
2003/**
2004 * kthread_bind - bind a just-created kthread to a cpu.
2005 * @p: thread created by kthread_create().
2006 * @cpu: cpu (might not be online, must be possible) for @k to run on.
2007 *
2008 * Description: This function is equivalent to set_cpus_allowed(),
2009 * except that @cpu doesn't need to be online, and the thread must be
2010 * stopped (i.e., just returned from kthread_create()).
2011 *
2012 * Function lives here instead of kthread.c because it messes with
2013 * scheduler internals which require locking.
2014 */
2015void kthread_bind(struct task_struct *p, unsigned int cpu)
2016{
2017 struct rq *rq = cpu_rq(cpu);
2018 unsigned long flags;
2019
2020 /* Must have done schedule() in kthread() before we set_task_cpu */
2021 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
2022 WARN_ON(1);
2023 return;
2024 }
2025
2026 spin_lock_irqsave(&rq->lock, flags);
2027 update_rq_clock(rq);
2028 set_task_cpu(p, cpu);
2029 p->cpus_allowed = cpumask_of_cpu(cpu);
2030 p->rt.nr_cpus_allowed = 1;
2031 p->flags |= PF_THREAD_BOUND;
2032 spin_unlock_irqrestore(&rq->lock, flags);
2033}
2034EXPORT_SYMBOL(kthread_bind);
2035
1998#ifdef CONFIG_SMP 2036#ifdef CONFIG_SMP
1999/* 2037/*
2000 * Is this task likely cache-hot: 2038 * Is this task likely cache-hot:
@@ -2007,7 +2045,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2007 /* 2045 /*
2008 * Buddy candidates are cache hot: 2046 * Buddy candidates are cache hot:
2009 */ 2047 */
2010 if (sched_feat(CACHE_HOT_BUDDY) && 2048 if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
2011 (&p->se == cfs_rq_of(&p->se)->next || 2049 (&p->se == cfs_rq_of(&p->se)->next ||
2012 &p->se == cfs_rq_of(&p->se)->last)) 2050 &p->se == cfs_rq_of(&p->se)->last))
2013 return 1; 2051 return 1;
@@ -2029,30 +2067,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2029void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 2067void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2030{ 2068{
2031 int old_cpu = task_cpu(p); 2069 int old_cpu = task_cpu(p);
2032 struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
2033 struct cfs_rq *old_cfsrq = task_cfs_rq(p), 2070 struct cfs_rq *old_cfsrq = task_cfs_rq(p),
2034 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); 2071 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
2035 u64 clock_offset;
2036
2037 clock_offset = old_rq->clock - new_rq->clock;
2038 2072
2039 trace_sched_migrate_task(p, new_cpu); 2073 trace_sched_migrate_task(p, new_cpu);
2040 2074
2041#ifdef CONFIG_SCHEDSTATS
2042 if (p->se.wait_start)
2043 p->se.wait_start -= clock_offset;
2044 if (p->se.sleep_start)
2045 p->se.sleep_start -= clock_offset;
2046 if (p->se.block_start)
2047 p->se.block_start -= clock_offset;
2048#endif
2049 if (old_cpu != new_cpu) { 2075 if (old_cpu != new_cpu) {
2050 p->se.nr_migrations++; 2076 p->se.nr_migrations++;
2051 new_rq->nr_migrations_in++;
2052#ifdef CONFIG_SCHEDSTATS
2053 if (task_hot(p, old_rq->clock, NULL))
2054 schedstat_inc(p, se.nr_forced2_migrations);
2055#endif
2056 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 2077 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
2057 1, 1, NULL, 0); 2078 1, 1, NULL, 0);
2058 } 2079 }
@@ -2085,6 +2106,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2085 * it is sufficient to simply update the task's cpu field. 2106 * it is sufficient to simply update the task's cpu field.
2086 */ 2107 */
2087 if (!p->se.on_rq && !task_running(rq, p)) { 2108 if (!p->se.on_rq && !task_running(rq, p)) {
2109 update_rq_clock(rq);
2088 set_task_cpu(p, dest_cpu); 2110 set_task_cpu(p, dest_cpu);
2089 return 0; 2111 return 0;
2090 } 2112 }
@@ -2292,6 +2314,14 @@ void task_oncpu_function_call(struct task_struct *p,
2292 preempt_enable(); 2314 preempt_enable();
2293} 2315}
2294 2316
2317#ifdef CONFIG_SMP
2318static inline
2319int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2320{
2321 return p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2322}
2323#endif
2324
2295/*** 2325/***
2296 * try_to_wake_up - wake up a thread 2326 * try_to_wake_up - wake up a thread
2297 * @p: the to-be-woken-up thread 2327 * @p: the to-be-woken-up thread
@@ -2311,7 +2341,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2311{ 2341{
2312 int cpu, orig_cpu, this_cpu, success = 0; 2342 int cpu, orig_cpu, this_cpu, success = 0;
2313 unsigned long flags; 2343 unsigned long flags;
2314 struct rq *rq; 2344 struct rq *rq, *orig_rq;
2315 2345
2316 if (!sched_feat(SYNC_WAKEUPS)) 2346 if (!sched_feat(SYNC_WAKEUPS))
2317 wake_flags &= ~WF_SYNC; 2347 wake_flags &= ~WF_SYNC;
@@ -2319,7 +2349,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2319 this_cpu = get_cpu(); 2349 this_cpu = get_cpu();
2320 2350
2321 smp_wmb(); 2351 smp_wmb();
2322 rq = task_rq_lock(p, &flags); 2352 rq = orig_rq = task_rq_lock(p, &flags);
2323 update_rq_clock(rq); 2353 update_rq_clock(rq);
2324 if (!(p->state & state)) 2354 if (!(p->state & state))
2325 goto out; 2355 goto out;
@@ -2343,13 +2373,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2343 if (task_contributes_to_load(p)) 2373 if (task_contributes_to_load(p))
2344 rq->nr_uninterruptible--; 2374 rq->nr_uninterruptible--;
2345 p->state = TASK_WAKING; 2375 p->state = TASK_WAKING;
2346 task_rq_unlock(rq, &flags); 2376 __task_rq_unlock(rq);
2347 2377
2348 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2378 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2349 if (cpu != orig_cpu) 2379 if (cpu != orig_cpu)
2350 set_task_cpu(p, cpu); 2380 set_task_cpu(p, cpu);
2351 2381
2352 rq = task_rq_lock(p, &flags); 2382 rq = __task_rq_lock(p);
2383 update_rq_clock(rq);
2384
2353 WARN_ON(p->state != TASK_WAKING); 2385 WARN_ON(p->state != TASK_WAKING);
2354 cpu = task_cpu(p); 2386 cpu = task_cpu(p);
2355 2387
@@ -2406,6 +2438,17 @@ out_running:
2406#ifdef CONFIG_SMP 2438#ifdef CONFIG_SMP
2407 if (p->sched_class->task_wake_up) 2439 if (p->sched_class->task_wake_up)
2408 p->sched_class->task_wake_up(rq, p); 2440 p->sched_class->task_wake_up(rq, p);
2441
2442 if (unlikely(rq->idle_stamp)) {
2443 u64 delta = rq->clock - rq->idle_stamp;
2444 u64 max = 2*sysctl_sched_migration_cost;
2445
2446 if (delta > max)
2447 rq->avg_idle = max;
2448 else
2449 update_avg(&rq->avg_idle, delta);
2450 rq->idle_stamp = 0;
2451 }
2409#endif 2452#endif
2410out: 2453out:
2411 task_rq_unlock(rq, &flags); 2454 task_rq_unlock(rq, &flags);
@@ -2452,7 +2495,6 @@ static void __sched_fork(struct task_struct *p)
2452 p->se.avg_overlap = 0; 2495 p->se.avg_overlap = 0;
2453 p->se.start_runtime = 0; 2496 p->se.start_runtime = 0;
2454 p->se.avg_wakeup = sysctl_sched_wakeup_granularity; 2497 p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
2455 p->se.avg_running = 0;
2456 2498
2457#ifdef CONFIG_SCHEDSTATS 2499#ifdef CONFIG_SCHEDSTATS
2458 p->se.wait_start = 0; 2500 p->se.wait_start = 0;
@@ -2474,7 +2516,6 @@ static void __sched_fork(struct task_struct *p)
2474 p->se.nr_failed_migrations_running = 0; 2516 p->se.nr_failed_migrations_running = 0;
2475 p->se.nr_failed_migrations_hot = 0; 2517 p->se.nr_failed_migrations_hot = 0;
2476 p->se.nr_forced_migrations = 0; 2518 p->se.nr_forced_migrations = 0;
2477 p->se.nr_forced2_migrations = 0;
2478 2519
2479 p->se.nr_wakeups = 0; 2520 p->se.nr_wakeups = 0;
2480 p->se.nr_wakeups_sync = 0; 2521 p->se.nr_wakeups_sync = 0;
@@ -2515,22 +2556,17 @@ void sched_fork(struct task_struct *p, int clone_flags)
2515 __sched_fork(p); 2556 __sched_fork(p);
2516 2557
2517 /* 2558 /*
2518 * Make sure we do not leak PI boosting priority to the child.
2519 */
2520 p->prio = current->normal_prio;
2521
2522 /*
2523 * Revert to default priority/policy on fork if requested. 2559 * Revert to default priority/policy on fork if requested.
2524 */ 2560 */
2525 if (unlikely(p->sched_reset_on_fork)) { 2561 if (unlikely(p->sched_reset_on_fork)) {
2526 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) 2562 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
2527 p->policy = SCHED_NORMAL; 2563 p->policy = SCHED_NORMAL;
2528 2564 p->normal_prio = p->static_prio;
2529 if (p->normal_prio < DEFAULT_PRIO) 2565 }
2530 p->prio = DEFAULT_PRIO;
2531 2566
2532 if (PRIO_TO_NICE(p->static_prio) < 0) { 2567 if (PRIO_TO_NICE(p->static_prio) < 0) {
2533 p->static_prio = NICE_TO_PRIO(0); 2568 p->static_prio = NICE_TO_PRIO(0);
2569 p->normal_prio = p->static_prio;
2534 set_load_weight(p); 2570 set_load_weight(p);
2535 } 2571 }
2536 2572
@@ -2541,11 +2577,19 @@ void sched_fork(struct task_struct *p, int clone_flags)
2541 p->sched_reset_on_fork = 0; 2577 p->sched_reset_on_fork = 0;
2542 } 2578 }
2543 2579
2580 /*
2581 * Make sure we do not leak PI boosting priority to the child.
2582 */
2583 p->prio = current->normal_prio;
2584
2544 if (!rt_prio(p->prio)) 2585 if (!rt_prio(p->prio))
2545 p->sched_class = &fair_sched_class; 2586 p->sched_class = &fair_sched_class;
2546 2587
2588 if (p->sched_class->task_fork)
2589 p->sched_class->task_fork(p);
2590
2547#ifdef CONFIG_SMP 2591#ifdef CONFIG_SMP
2548 cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); 2592 cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
2549#endif 2593#endif
2550 set_task_cpu(p, cpu); 2594 set_task_cpu(p, cpu);
2551 2595
@@ -2580,19 +2624,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2580 rq = task_rq_lock(p, &flags); 2624 rq = task_rq_lock(p, &flags);
2581 BUG_ON(p->state != TASK_RUNNING); 2625 BUG_ON(p->state != TASK_RUNNING);
2582 update_rq_clock(rq); 2626 update_rq_clock(rq);
2583 2627 activate_task(rq, p, 0);
2584 p->prio = effective_prio(p);
2585
2586 if (!p->sched_class->task_new || !current->se.on_rq) {
2587 activate_task(rq, p, 0);
2588 } else {
2589 /*
2590 * Let the scheduling class do new task startup
2591 * management (if any):
2592 */
2593 p->sched_class->task_new(rq, p);
2594 inc_nr_running(rq);
2595 }
2596 trace_sched_wakeup_new(rq, p, 1); 2628 trace_sched_wakeup_new(rq, p, 1);
2597 check_preempt_curr(rq, p, WF_FORK); 2629 check_preempt_curr(rq, p, WF_FORK);
2598#ifdef CONFIG_SMP 2630#ifdef CONFIG_SMP
@@ -2816,14 +2848,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
2816 */ 2848 */
2817 arch_start_context_switch(prev); 2849 arch_start_context_switch(prev);
2818 2850
2819 if (unlikely(!mm)) { 2851 if (likely(!mm)) {
2820 next->active_mm = oldmm; 2852 next->active_mm = oldmm;
2821 atomic_inc(&oldmm->mm_count); 2853 atomic_inc(&oldmm->mm_count);
2822 enter_lazy_tlb(oldmm, next); 2854 enter_lazy_tlb(oldmm, next);
2823 } else 2855 } else
2824 switch_mm(oldmm, mm, next); 2856 switch_mm(oldmm, mm, next);
2825 2857
2826 if (unlikely(!prev->mm)) { 2858 if (likely(!prev->mm)) {
2827 prev->active_mm = NULL; 2859 prev->active_mm = NULL;
2828 rq->prev_mm = oldmm; 2860 rq->prev_mm = oldmm;
2829 } 2861 }
@@ -2986,15 +3018,6 @@ static void calc_load_account_active(struct rq *this_rq)
2986} 3018}
2987 3019
2988/* 3020/*
2989 * Externally visible per-cpu scheduler statistics:
2990 * cpu_nr_migrations(cpu) - number of migrations into that cpu
2991 */
2992u64 cpu_nr_migrations(int cpu)
2993{
2994 return cpu_rq(cpu)->nr_migrations_in;
2995}
2996
2997/*
2998 * Update rq->cpu_load[] statistics. This function is usually called every 3021 * Update rq->cpu_load[] statistics. This function is usually called every
2999 * scheduler tick (TICK_NSEC). 3022 * scheduler tick (TICK_NSEC).
3000 */ 3023 */
@@ -3116,7 +3139,7 @@ out:
3116void sched_exec(void) 3139void sched_exec(void)
3117{ 3140{
3118 int new_cpu, this_cpu = get_cpu(); 3141 int new_cpu, this_cpu = get_cpu();
3119 new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); 3142 new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
3120 put_cpu(); 3143 put_cpu();
3121 if (new_cpu != this_cpu) 3144 if (new_cpu != this_cpu)
3122 sched_migrate_task(current, new_cpu); 3145 sched_migrate_task(current, new_cpu);
@@ -3132,10 +3155,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
3132 deactivate_task(src_rq, p, 0); 3155 deactivate_task(src_rq, p, 0);
3133 set_task_cpu(p, this_cpu); 3156 set_task_cpu(p, this_cpu);
3134 activate_task(this_rq, p, 0); 3157 activate_task(this_rq, p, 0);
3135 /*
3136 * Note that idle threads have a prio of MAX_PRIO, for this test
3137 * to be always true for them.
3138 */
3139 check_preempt_curr(this_rq, p, 0); 3158 check_preempt_curr(this_rq, p, 0);
3140} 3159}
3141 3160
@@ -3658,6 +3677,7 @@ static void update_group_power(struct sched_domain *sd, int cpu)
3658 3677
3659/** 3678/**
3660 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 3679 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
3680 * @sd: The sched_domain whose statistics are to be updated.
3661 * @group: sched_group whose statistics are to be updated. 3681 * @group: sched_group whose statistics are to be updated.
3662 * @this_cpu: Cpu for which load balance is currently performed. 3682 * @this_cpu: Cpu for which load balance is currently performed.
3663 * @idle: Idle status of this_cpu 3683 * @idle: Idle status of this_cpu
@@ -4093,7 +4113,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4093 unsigned long flags; 4113 unsigned long flags;
4094 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4114 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4095 4115
4096 cpumask_setall(cpus); 4116 cpumask_copy(cpus, cpu_active_mask);
4097 4117
4098 /* 4118 /*
4099 * When power savings policy is enabled for the parent domain, idle 4119 * When power savings policy is enabled for the parent domain, idle
@@ -4256,7 +4276,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
4256 int all_pinned = 0; 4276 int all_pinned = 0;
4257 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); 4277 struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
4258 4278
4259 cpumask_setall(cpus); 4279 cpumask_copy(cpus, cpu_active_mask);
4260 4280
4261 /* 4281 /*
4262 * When power savings policy is enabled for the parent domain, idle 4282 * When power savings policy is enabled for the parent domain, idle
@@ -4396,6 +4416,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4396 int pulled_task = 0; 4416 int pulled_task = 0;
4397 unsigned long next_balance = jiffies + HZ; 4417 unsigned long next_balance = jiffies + HZ;
4398 4418
4419 this_rq->idle_stamp = this_rq->clock;
4420
4421 if (this_rq->avg_idle < sysctl_sched_migration_cost)
4422 return;
4423
4399 for_each_domain(this_cpu, sd) { 4424 for_each_domain(this_cpu, sd) {
4400 unsigned long interval; 4425 unsigned long interval;
4401 4426
@@ -4410,8 +4435,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
4410 interval = msecs_to_jiffies(sd->balance_interval); 4435 interval = msecs_to_jiffies(sd->balance_interval);
4411 if (time_after(next_balance, sd->last_balance + interval)) 4436 if (time_after(next_balance, sd->last_balance + interval))
4412 next_balance = sd->last_balance + interval; 4437 next_balance = sd->last_balance + interval;
4413 if (pulled_task) 4438 if (pulled_task) {
4439 this_rq->idle_stamp = 0;
4414 break; 4440 break;
4441 }
4415 } 4442 }
4416 if (pulled_task || time_after(jiffies, this_rq->next_balance)) { 4443 if (pulled_task || time_after(jiffies, this_rq->next_balance)) {
4417 /* 4444 /*
@@ -4646,7 +4673,7 @@ int select_nohz_load_balancer(int stop_tick)
4646 cpumask_set_cpu(cpu, nohz.cpu_mask); 4673 cpumask_set_cpu(cpu, nohz.cpu_mask);
4647 4674
4648 /* time for ilb owner also to sleep */ 4675 /* time for ilb owner also to sleep */
4649 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { 4676 if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
4650 if (atomic_read(&nohz.load_balancer) == cpu) 4677 if (atomic_read(&nohz.load_balancer) == cpu)
4651 atomic_set(&nohz.load_balancer, -1); 4678 atomic_set(&nohz.load_balancer, -1);
4652 return 0; 4679 return 0;
@@ -5013,8 +5040,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
5013 p->gtime = cputime_add(p->gtime, cputime); 5040 p->gtime = cputime_add(p->gtime, cputime);
5014 5041
5015 /* Add guest time to cpustat. */ 5042 /* Add guest time to cpustat. */
5016 cpustat->user = cputime64_add(cpustat->user, tmp); 5043 if (TASK_NICE(p) > 0) {
5017 cpustat->guest = cputime64_add(cpustat->guest, tmp); 5044 cpustat->nice = cputime64_add(cpustat->nice, tmp);
5045 cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp);
5046 } else {
5047 cpustat->user = cputime64_add(cpustat->user, tmp);
5048 cpustat->guest = cputime64_add(cpustat->guest, tmp);
5049 }
5018} 5050}
5019 5051
5020/* 5052/*
@@ -5129,60 +5161,86 @@ void account_idle_ticks(unsigned long ticks)
5129 * Use precise platform statistics if available: 5161 * Use precise platform statistics if available:
5130 */ 5162 */
5131#ifdef CONFIG_VIRT_CPU_ACCOUNTING 5163#ifdef CONFIG_VIRT_CPU_ACCOUNTING
5132cputime_t task_utime(struct task_struct *p) 5164void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5133{ 5165{
5134 return p->utime; 5166 *ut = p->utime;
5167 *st = p->stime;
5135} 5168}
5136 5169
5137cputime_t task_stime(struct task_struct *p) 5170void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5138{ 5171{
5139 return p->stime; 5172 struct task_cputime cputime;
5173
5174 thread_group_cputime(p, &cputime);
5175
5176 *ut = cputime.utime;
5177 *st = cputime.stime;
5140} 5178}
5141#else 5179#else
5142cputime_t task_utime(struct task_struct *p) 5180
5181#ifndef nsecs_to_cputime
5182# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
5183#endif
5184
5185void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5143{ 5186{
5144 clock_t utime = cputime_to_clock_t(p->utime), 5187 cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime);
5145 total = utime + cputime_to_clock_t(p->stime);
5146 u64 temp;
5147 5188
5148 /* 5189 /*
5149 * Use CFS's precise accounting: 5190 * Use CFS's precise accounting:
5150 */ 5191 */
5151 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); 5192 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
5152 5193
5153 if (total) { 5194 if (total) {
5154 temp *= utime; 5195 u64 temp;
5196
5197 temp = (u64)(rtime * utime);
5155 do_div(temp, total); 5198 do_div(temp, total);
5156 } 5199 utime = (cputime_t)temp;
5157 utime = (clock_t)temp; 5200 } else
5201 utime = rtime;
5158 5202
5159 p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); 5203 /*
5160 return p->prev_utime; 5204 * Compare with previous values, to keep monotonicity:
5205 */
5206 p->prev_utime = max(p->prev_utime, utime);
5207 p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime));
5208
5209 *ut = p->prev_utime;
5210 *st = p->prev_stime;
5161} 5211}
5162 5212
5163cputime_t task_stime(struct task_struct *p) 5213/*
5214 * Must be called with siglock held.
5215 */
5216void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
5164{ 5217{
5165 clock_t stime; 5218 struct signal_struct *sig = p->signal;
5219 struct task_cputime cputime;
5220 cputime_t rtime, utime, total;
5166 5221
5167 /* 5222 thread_group_cputime(p, &cputime);
5168 * Use CFS's precise accounting. (we subtract utime from
5169 * the total, to make sure the total observed by userspace
5170 * grows monotonically - apps rely on that):
5171 */
5172 stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
5173 cputime_to_clock_t(task_utime(p));
5174 5223
5175 if (stime >= 0) 5224 total = cputime_add(cputime.utime, cputime.stime);
5176 p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); 5225 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
5177 5226
5178 return p->prev_stime; 5227 if (total) {
5179} 5228 u64 temp;
5180#endif
5181 5229
5182inline cputime_t task_gtime(struct task_struct *p) 5230 temp = (u64)(rtime * cputime.utime);
5183{ 5231 do_div(temp, total);
5184 return p->gtime; 5232 utime = (cputime_t)temp;
5233 } else
5234 utime = rtime;
5235
5236 sig->prev_utime = max(sig->prev_utime, utime);
5237 sig->prev_stime = max(sig->prev_stime,
5238 cputime_sub(rtime, sig->prev_utime));
5239
5240 *ut = sig->prev_utime;
5241 *st = sig->prev_stime;
5185} 5242}
5243#endif
5186 5244
5187/* 5245/*
5188 * This function gets called by the timer code, with HZ frequency. 5246 * This function gets called by the timer code, with HZ frequency.
@@ -5317,13 +5375,14 @@ static inline void schedule_debug(struct task_struct *prev)
5317#endif 5375#endif
5318} 5376}
5319 5377
5320static void put_prev_task(struct rq *rq, struct task_struct *p) 5378static void put_prev_task(struct rq *rq, struct task_struct *prev)
5321{ 5379{
5322 u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; 5380 if (prev->state == TASK_RUNNING) {
5381 u64 runtime = prev->se.sum_exec_runtime;
5323 5382
5324 update_avg(&p->se.avg_running, runtime); 5383 runtime -= prev->se.prev_sum_exec_runtime;
5384 runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
5325 5385
5326 if (p->state == TASK_RUNNING) {
5327 /* 5386 /*
5328 * In order to avoid avg_overlap growing stale when we are 5387 * In order to avoid avg_overlap growing stale when we are
5329 * indeed overlapping and hence not getting put to sleep, grow 5388 * indeed overlapping and hence not getting put to sleep, grow
@@ -5333,12 +5392,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p)
5333 * correlates to the amount of cache footprint a task can 5392 * correlates to the amount of cache footprint a task can
5334 * build up. 5393 * build up.
5335 */ 5394 */
5336 runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); 5395 update_avg(&prev->se.avg_overlap, runtime);
5337 update_avg(&p->se.avg_overlap, runtime);
5338 } else {
5339 update_avg(&p->se.avg_running, 0);
5340 } 5396 }
5341 p->sched_class->put_prev_task(rq, p); 5397 prev->sched_class->put_prev_task(rq, prev);
5342} 5398}
5343 5399
5344/* 5400/*
@@ -5448,7 +5504,7 @@ need_resched_nonpreemptible:
5448} 5504}
5449EXPORT_SYMBOL(schedule); 5505EXPORT_SYMBOL(schedule);
5450 5506
5451#ifdef CONFIG_SMP 5507#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
5452/* 5508/*
5453 * Look out! "owner" is an entirely speculative pointer 5509 * Look out! "owner" is an entirely speculative pointer
5454 * access and not reliable. 5510 * access and not reliable.
@@ -6142,22 +6198,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
6142 BUG_ON(p->se.on_rq); 6198 BUG_ON(p->se.on_rq);
6143 6199
6144 p->policy = policy; 6200 p->policy = policy;
6145 switch (p->policy) {
6146 case SCHED_NORMAL:
6147 case SCHED_BATCH:
6148 case SCHED_IDLE:
6149 p->sched_class = &fair_sched_class;
6150 break;
6151 case SCHED_FIFO:
6152 case SCHED_RR:
6153 p->sched_class = &rt_sched_class;
6154 break;
6155 }
6156
6157 p->rt_priority = prio; 6201 p->rt_priority = prio;
6158 p->normal_prio = normal_prio(p); 6202 p->normal_prio = normal_prio(p);
6159 /* we are holding p->pi_lock already */ 6203 /* we are holding p->pi_lock already */
6160 p->prio = rt_mutex_getprio(p); 6204 p->prio = rt_mutex_getprio(p);
6205 if (rt_prio(p->prio))
6206 p->sched_class = &rt_sched_class;
6207 else
6208 p->sched_class = &fair_sched_class;
6161 set_load_weight(p); 6209 set_load_weight(p);
6162} 6210}
6163 6211
@@ -6560,6 +6608,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
6560long sched_getaffinity(pid_t pid, struct cpumask *mask) 6608long sched_getaffinity(pid_t pid, struct cpumask *mask)
6561{ 6609{
6562 struct task_struct *p; 6610 struct task_struct *p;
6611 unsigned long flags;
6612 struct rq *rq;
6563 int retval; 6613 int retval;
6564 6614
6565 get_online_cpus(); 6615 get_online_cpus();
@@ -6574,7 +6624,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
6574 if (retval) 6624 if (retval)
6575 goto out_unlock; 6625 goto out_unlock;
6576 6626
6627 rq = task_rq_lock(p, &flags);
6577 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); 6628 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
6629 task_rq_unlock(rq, &flags);
6578 6630
6579out_unlock: 6631out_unlock:
6580 read_unlock(&tasklist_lock); 6632 read_unlock(&tasklist_lock);
@@ -6720,9 +6772,6 @@ EXPORT_SYMBOL(yield);
6720/* 6772/*
6721 * This task is about to go to sleep on IO. Increment rq->nr_iowait so 6773 * This task is about to go to sleep on IO. Increment rq->nr_iowait so
6722 * that process accounting knows that this is a task in IO wait state. 6774 * that process accounting knows that this is a task in IO wait state.
6723 *
6724 * But don't do that if it is a deliberate, throttling IO wait (this task
6725 * has set its backing_dev_info: the queue against which it should throttle)
6726 */ 6775 */
6727void __sched io_schedule(void) 6776void __sched io_schedule(void)
6728{ 6777{
@@ -6815,6 +6864,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6815{ 6864{
6816 struct task_struct *p; 6865 struct task_struct *p;
6817 unsigned int time_slice; 6866 unsigned int time_slice;
6867 unsigned long flags;
6868 struct rq *rq;
6818 int retval; 6869 int retval;
6819 struct timespec t; 6870 struct timespec t;
6820 6871
@@ -6831,7 +6882,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6831 if (retval) 6882 if (retval)
6832 goto out_unlock; 6883 goto out_unlock;
6833 6884
6834 time_slice = p->sched_class->get_rr_interval(p); 6885 rq = task_rq_lock(p, &flags);
6886 time_slice = p->sched_class->get_rr_interval(rq, p);
6887 task_rq_unlock(rq, &flags);
6835 6888
6836 read_unlock(&tasklist_lock); 6889 read_unlock(&tasklist_lock);
6837 jiffies_to_timespec(time_slice, &t); 6890 jiffies_to_timespec(time_slice, &t);
@@ -6905,7 +6958,7 @@ void show_state_filter(unsigned long state_filter)
6905 /* 6958 /*
6906 * Only show locks if all tasks are dumped: 6959 * Only show locks if all tasks are dumped:
6907 */ 6960 */
6908 if (state_filter == -1) 6961 if (!state_filter)
6909 debug_show_all_locks(); 6962 debug_show_all_locks();
6910} 6963}
6911 6964
@@ -6932,7 +6985,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
6932 __sched_fork(idle); 6985 __sched_fork(idle);
6933 idle->se.exec_start = sched_clock(); 6986 idle->se.exec_start = sched_clock();
6934 6987
6935 idle->prio = idle->normal_prio = MAX_PRIO;
6936 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 6988 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
6937 __set_task_cpu(idle, cpu); 6989 __set_task_cpu(idle, cpu);
6938 6990
@@ -6973,22 +7025,43 @@ cpumask_var_t nohz_cpu_mask;
6973 * 7025 *
6974 * This idea comes from the SD scheduler of Con Kolivas: 7026 * This idea comes from the SD scheduler of Con Kolivas:
6975 */ 7027 */
6976static inline void sched_init_granularity(void) 7028static int get_update_sysctl_factor(void)
6977{ 7029{
6978 unsigned int factor = 1 + ilog2(num_online_cpus()); 7030 unsigned int cpus = min_t(int, num_online_cpus(), 8);
6979 const unsigned long limit = 200000000; 7031 unsigned int factor;
7032
7033 switch (sysctl_sched_tunable_scaling) {
7034 case SCHED_TUNABLESCALING_NONE:
7035 factor = 1;
7036 break;
7037 case SCHED_TUNABLESCALING_LINEAR:
7038 factor = cpus;
7039 break;
7040 case SCHED_TUNABLESCALING_LOG:
7041 default:
7042 factor = 1 + ilog2(cpus);
7043 break;
7044 }
6980 7045
6981 sysctl_sched_min_granularity *= factor; 7046 return factor;
6982 if (sysctl_sched_min_granularity > limit) 7047}
6983 sysctl_sched_min_granularity = limit;
6984 7048
6985 sysctl_sched_latency *= factor; 7049static void update_sysctl(void)
6986 if (sysctl_sched_latency > limit) 7050{
6987 sysctl_sched_latency = limit; 7051 unsigned int factor = get_update_sysctl_factor();
6988 7052
6989 sysctl_sched_wakeup_granularity *= factor; 7053#define SET_SYSCTL(name) \
7054 (sysctl_##name = (factor) * normalized_sysctl_##name)
7055 SET_SYSCTL(sched_min_granularity);
7056 SET_SYSCTL(sched_latency);
7057 SET_SYSCTL(sched_wakeup_granularity);
7058 SET_SYSCTL(sched_shares_ratelimit);
7059#undef SET_SYSCTL
7060}
6990 7061
6991 sysctl_sched_shares_ratelimit *= factor; 7062static inline void sched_init_granularity(void)
7063{
7064 update_sysctl();
6992} 7065}
6993 7066
6994#ifdef CONFIG_SMP 7067#ifdef CONFIG_SMP
@@ -7025,7 +7098,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7025 int ret = 0; 7098 int ret = 0;
7026 7099
7027 rq = task_rq_lock(p, &flags); 7100 rq = task_rq_lock(p, &flags);
7028 if (!cpumask_intersects(new_mask, cpu_online_mask)) { 7101 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
7029 ret = -EINVAL; 7102 ret = -EINVAL;
7030 goto out; 7103 goto out;
7031 } 7104 }
@@ -7047,7 +7120,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7047 if (cpumask_test_cpu(task_cpu(p), new_mask)) 7120 if (cpumask_test_cpu(task_cpu(p), new_mask))
7048 goto out; 7121 goto out;
7049 7122
7050 if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { 7123 if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
7051 /* Need help from migration thread: drop lock and wait. */ 7124 /* Need help from migration thread: drop lock and wait. */
7052 struct task_struct *mt = rq->migration_thread; 7125 struct task_struct *mt = rq->migration_thread;
7053 7126
@@ -7201,19 +7274,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
7201 7274
7202again: 7275again:
7203 /* Look for allowed, online CPU in same node. */ 7276 /* Look for allowed, online CPU in same node. */
7204 for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) 7277 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
7205 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 7278 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7206 goto move; 7279 goto move;
7207 7280
7208 /* Any allowed, online CPU? */ 7281 /* Any allowed, online CPU? */
7209 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); 7282 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
7210 if (dest_cpu < nr_cpu_ids) 7283 if (dest_cpu < nr_cpu_ids)
7211 goto move; 7284 goto move;
7212 7285
7213 /* No more Mr. Nice Guy. */ 7286 /* No more Mr. Nice Guy. */
7214 if (dest_cpu >= nr_cpu_ids) { 7287 if (dest_cpu >= nr_cpu_ids) {
7215 cpuset_cpus_allowed_locked(p, &p->cpus_allowed); 7288 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
7216 dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); 7289 dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
7217 7290
7218 /* 7291 /*
7219 * Don't tell them about moving exiting tasks or 7292 * Don't tell them about moving exiting tasks or
@@ -7242,7 +7315,7 @@ move:
7242 */ 7315 */
7243static void migrate_nr_uninterruptible(struct rq *rq_src) 7316static void migrate_nr_uninterruptible(struct rq *rq_src)
7244{ 7317{
7245 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); 7318 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask));
7246 unsigned long flags; 7319 unsigned long flags;
7247 7320
7248 local_irq_save(flags); 7321 local_irq_save(flags);
@@ -7376,17 +7449,16 @@ static struct ctl_table sd_ctl_dir[] = {
7376 .procname = "sched_domain", 7449 .procname = "sched_domain",
7377 .mode = 0555, 7450 .mode = 0555,
7378 }, 7451 },
7379 {0, }, 7452 {}
7380}; 7453};
7381 7454
7382static struct ctl_table sd_ctl_root[] = { 7455static struct ctl_table sd_ctl_root[] = {
7383 { 7456 {
7384 .ctl_name = CTL_KERN,
7385 .procname = "kernel", 7457 .procname = "kernel",
7386 .mode = 0555, 7458 .mode = 0555,
7387 .child = sd_ctl_dir, 7459 .child = sd_ctl_dir,
7388 }, 7460 },
7389 {0, }, 7461 {}
7390}; 7462};
7391 7463
7392static struct ctl_table *sd_alloc_ctl_entry(int n) 7464static struct ctl_table *sd_alloc_ctl_entry(int n)
@@ -7496,7 +7568,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
7496static struct ctl_table_header *sd_sysctl_header; 7568static struct ctl_table_header *sd_sysctl_header;
7497static void register_sched_domain_sysctl(void) 7569static void register_sched_domain_sysctl(void)
7498{ 7570{
7499 int i, cpu_num = num_online_cpus(); 7571 int i, cpu_num = num_possible_cpus();
7500 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); 7572 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
7501 char buf[32]; 7573 char buf[32];
7502 7574
@@ -7506,7 +7578,7 @@ static void register_sched_domain_sysctl(void)
7506 if (entry == NULL) 7578 if (entry == NULL)
7507 return; 7579 return;
7508 7580
7509 for_each_online_cpu(i) { 7581 for_each_possible_cpu(i) {
7510 snprintf(buf, 32, "cpu%d", i); 7582 snprintf(buf, 32, "cpu%d", i);
7511 entry->procname = kstrdup(buf, GFP_KERNEL); 7583 entry->procname = kstrdup(buf, GFP_KERNEL);
7512 entry->mode = 0555; 7584 entry->mode = 0555;
@@ -7636,7 +7708,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7636 spin_lock_irq(&rq->lock); 7708 spin_lock_irq(&rq->lock);
7637 update_rq_clock(rq); 7709 update_rq_clock(rq);
7638 deactivate_task(rq, rq->idle, 0); 7710 deactivate_task(rq, rq->idle, 0);
7639 rq->idle->static_prio = MAX_PRIO;
7640 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); 7711 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
7641 rq->idle->sched_class = &idle_sched_class; 7712 rq->idle->sched_class = &idle_sched_class;
7642 migrate_dead_tasks(cpu); 7713 migrate_dead_tasks(cpu);
@@ -7710,6 +7781,16 @@ early_initcall(migration_init);
7710 7781
7711#ifdef CONFIG_SCHED_DEBUG 7782#ifdef CONFIG_SCHED_DEBUG
7712 7783
7784static __read_mostly int sched_domain_debug_enabled;
7785
7786static int __init sched_domain_debug_setup(char *str)
7787{
7788 sched_domain_debug_enabled = 1;
7789
7790 return 0;
7791}
7792early_param("sched_debug", sched_domain_debug_setup);
7793
7713static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 7794static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7714 struct cpumask *groupmask) 7795 struct cpumask *groupmask)
7715{ 7796{
@@ -7796,6 +7877,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
7796 cpumask_var_t groupmask; 7877 cpumask_var_t groupmask;
7797 int level = 0; 7878 int level = 0;
7798 7879
7880 if (!sched_domain_debug_enabled)
7881 return;
7882
7799 if (!sd) { 7883 if (!sd) {
7800 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); 7884 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
7801 return; 7885 return;
@@ -7875,6 +7959,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
7875 7959
7876static void free_rootdomain(struct root_domain *rd) 7960static void free_rootdomain(struct root_domain *rd)
7877{ 7961{
7962 synchronize_sched();
7963
7878 cpupri_cleanup(&rd->cpupri); 7964 cpupri_cleanup(&rd->cpupri);
7879 7965
7880 free_cpumask_var(rd->rto_mask); 7966 free_cpumask_var(rd->rto_mask);
@@ -8015,6 +8101,7 @@ static cpumask_var_t cpu_isolated_map;
8015/* Setup the mask of cpus configured for isolated domains */ 8101/* Setup the mask of cpus configured for isolated domains */
8016static int __init isolated_cpu_setup(char *str) 8102static int __init isolated_cpu_setup(char *str)
8017{ 8103{
8104 alloc_bootmem_cpumask_var(&cpu_isolated_map);
8018 cpulist_parse(str, cpu_isolated_map); 8105 cpulist_parse(str, cpu_isolated_map);
8019 return 1; 8106 return 1;
8020} 8107}
@@ -8851,7 +8938,7 @@ static int build_sched_domains(const struct cpumask *cpu_map)
8851 return __build_sched_domains(cpu_map, NULL); 8938 return __build_sched_domains(cpu_map, NULL);
8852} 8939}
8853 8940
8854static struct cpumask *doms_cur; /* current sched domains */ 8941static cpumask_var_t *doms_cur; /* current sched domains */
8855static int ndoms_cur; /* number of sched domains in 'doms_cur' */ 8942static int ndoms_cur; /* number of sched domains in 'doms_cur' */
8856static struct sched_domain_attr *dattr_cur; 8943static struct sched_domain_attr *dattr_cur;
8857 /* attribues of custom domains in 'doms_cur' */ 8944 /* attribues of custom domains in 'doms_cur' */
@@ -8873,6 +8960,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void)
8873 return 0; 8960 return 0;
8874} 8961}
8875 8962
8963cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
8964{
8965 int i;
8966 cpumask_var_t *doms;
8967
8968 doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL);
8969 if (!doms)
8970 return NULL;
8971 for (i = 0; i < ndoms; i++) {
8972 if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
8973 free_sched_domains(doms, i);
8974 return NULL;
8975 }
8976 }
8977 return doms;
8978}
8979
8980void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
8981{
8982 unsigned int i;
8983 for (i = 0; i < ndoms; i++)
8984 free_cpumask_var(doms[i]);
8985 kfree(doms);
8986}
8987
8876/* 8988/*
8877 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 8989 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
8878 * For now this just excludes isolated cpus, but could be used to 8990 * For now this just excludes isolated cpus, but could be used to
@@ -8884,12 +8996,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map)
8884 8996
8885 arch_update_cpu_topology(); 8997 arch_update_cpu_topology();
8886 ndoms_cur = 1; 8998 ndoms_cur = 1;
8887 doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); 8999 doms_cur = alloc_sched_domains(ndoms_cur);
8888 if (!doms_cur) 9000 if (!doms_cur)
8889 doms_cur = fallback_doms; 9001 doms_cur = &fallback_doms;
8890 cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); 9002 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
8891 dattr_cur = NULL; 9003 dattr_cur = NULL;
8892 err = build_sched_domains(doms_cur); 9004 err = build_sched_domains(doms_cur[0]);
8893 register_sched_domain_sysctl(); 9005 register_sched_domain_sysctl();
8894 9006
8895 return err; 9007 return err;
@@ -8939,19 +9051,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
8939 * doms_new[] to the current sched domain partitioning, doms_cur[]. 9051 * doms_new[] to the current sched domain partitioning, doms_cur[].
8940 * It destroys each deleted domain and builds each new domain. 9052 * It destroys each deleted domain and builds each new domain.
8941 * 9053 *
8942 * 'doms_new' is an array of cpumask's of length 'ndoms_new'. 9054 * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
8943 * The masks don't intersect (don't overlap.) We should setup one 9055 * The masks don't intersect (don't overlap.) We should setup one
8944 * sched domain for each mask. CPUs not in any of the cpumasks will 9056 * sched domain for each mask. CPUs not in any of the cpumasks will
8945 * not be load balanced. If the same cpumask appears both in the 9057 * not be load balanced. If the same cpumask appears both in the
8946 * current 'doms_cur' domains and in the new 'doms_new', we can leave 9058 * current 'doms_cur' domains and in the new 'doms_new', we can leave
8947 * it as it is. 9059 * it as it is.
8948 * 9060 *
8949 * The passed in 'doms_new' should be kmalloc'd. This routine takes 9061 * The passed in 'doms_new' should be allocated using
8950 * ownership of it and will kfree it when done with it. If the caller 9062 * alloc_sched_domains. This routine takes ownership of it and will
8951 * failed the kmalloc call, then it can pass in doms_new == NULL && 9063 * free_sched_domains it when done with it. If the caller failed the
8952 * ndoms_new == 1, and partition_sched_domains() will fallback to 9064 * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
8953 * the single partition 'fallback_doms', it also forces the domains 9065 * and partition_sched_domains() will fallback to the single partition
8954 * to be rebuilt. 9066 * 'fallback_doms', it also forces the domains to be rebuilt.
8955 * 9067 *
8956 * If doms_new == NULL it will be replaced with cpu_online_mask. 9068 * If doms_new == NULL it will be replaced with cpu_online_mask.
8957 * ndoms_new == 0 is a special case for destroying existing domains, 9069 * ndoms_new == 0 is a special case for destroying existing domains,
@@ -8959,8 +9071,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
8959 * 9071 *
8960 * Call with hotplug lock held 9072 * Call with hotplug lock held
8961 */ 9073 */
8962/* FIXME: Change to struct cpumask *doms_new[] */ 9074void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
8963void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
8964 struct sched_domain_attr *dattr_new) 9075 struct sched_domain_attr *dattr_new)
8965{ 9076{
8966 int i, j, n; 9077 int i, j, n;
@@ -8979,40 +9090,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
8979 /* Destroy deleted domains */ 9090 /* Destroy deleted domains */
8980 for (i = 0; i < ndoms_cur; i++) { 9091 for (i = 0; i < ndoms_cur; i++) {
8981 for (j = 0; j < n && !new_topology; j++) { 9092 for (j = 0; j < n && !new_topology; j++) {
8982 if (cpumask_equal(&doms_cur[i], &doms_new[j]) 9093 if (cpumask_equal(doms_cur[i], doms_new[j])
8983 && dattrs_equal(dattr_cur, i, dattr_new, j)) 9094 && dattrs_equal(dattr_cur, i, dattr_new, j))
8984 goto match1; 9095 goto match1;
8985 } 9096 }
8986 /* no match - a current sched domain not in new doms_new[] */ 9097 /* no match - a current sched domain not in new doms_new[] */
8987 detach_destroy_domains(doms_cur + i); 9098 detach_destroy_domains(doms_cur[i]);
8988match1: 9099match1:
8989 ; 9100 ;
8990 } 9101 }
8991 9102
8992 if (doms_new == NULL) { 9103 if (doms_new == NULL) {
8993 ndoms_cur = 0; 9104 ndoms_cur = 0;
8994 doms_new = fallback_doms; 9105 doms_new = &fallback_doms;
8995 cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); 9106 cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
8996 WARN_ON_ONCE(dattr_new); 9107 WARN_ON_ONCE(dattr_new);
8997 } 9108 }
8998 9109
8999 /* Build new domains */ 9110 /* Build new domains */
9000 for (i = 0; i < ndoms_new; i++) { 9111 for (i = 0; i < ndoms_new; i++) {
9001 for (j = 0; j < ndoms_cur && !new_topology; j++) { 9112 for (j = 0; j < ndoms_cur && !new_topology; j++) {
9002 if (cpumask_equal(&doms_new[i], &doms_cur[j]) 9113 if (cpumask_equal(doms_new[i], doms_cur[j])
9003 && dattrs_equal(dattr_new, i, dattr_cur, j)) 9114 && dattrs_equal(dattr_new, i, dattr_cur, j))
9004 goto match2; 9115 goto match2;
9005 } 9116 }
9006 /* no match - add a new doms_new */ 9117 /* no match - add a new doms_new */
9007 __build_sched_domains(doms_new + i, 9118 __build_sched_domains(doms_new[i],
9008 dattr_new ? dattr_new + i : NULL); 9119 dattr_new ? dattr_new + i : NULL);
9009match2: 9120match2:
9010 ; 9121 ;
9011 } 9122 }
9012 9123
9013 /* Remember the new sched domains */ 9124 /* Remember the new sched domains */
9014 if (doms_cur != fallback_doms) 9125 if (doms_cur != &fallback_doms)
9015 kfree(doms_cur); 9126 free_sched_domains(doms_cur, ndoms_cur);
9016 kfree(dattr_cur); /* kfree(NULL) is safe */ 9127 kfree(dattr_cur); /* kfree(NULL) is safe */
9017 doms_cur = doms_new; 9128 doms_cur = doms_new;
9018 dattr_cur = dattr_new; 9129 dattr_cur = dattr_new;
@@ -9123,8 +9234,10 @@ static int update_sched_domains(struct notifier_block *nfb,
9123 switch (action) { 9234 switch (action) {
9124 case CPU_ONLINE: 9235 case CPU_ONLINE:
9125 case CPU_ONLINE_FROZEN: 9236 case CPU_ONLINE_FROZEN:
9126 case CPU_DEAD: 9237 case CPU_DOWN_PREPARE:
9127 case CPU_DEAD_FROZEN: 9238 case CPU_DOWN_PREPARE_FROZEN:
9239 case CPU_DOWN_FAILED:
9240 case CPU_DOWN_FAILED_FROZEN:
9128 partition_sched_domains(1, NULL, NULL); 9241 partition_sched_domains(1, NULL, NULL);
9129 return NOTIFY_OK; 9242 return NOTIFY_OK;
9130 9243
@@ -9171,7 +9284,7 @@ void __init sched_init_smp(void)
9171#endif 9284#endif
9172 get_online_cpus(); 9285 get_online_cpus();
9173 mutex_lock(&sched_domains_mutex); 9286 mutex_lock(&sched_domains_mutex);
9174 arch_init_sched_domains(cpu_online_mask); 9287 arch_init_sched_domains(cpu_active_mask);
9175 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); 9288 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
9176 if (cpumask_empty(non_isolated_cpus)) 9289 if (cpumask_empty(non_isolated_cpus))
9177 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); 9290 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
@@ -9334,10 +9447,6 @@ void __init sched_init(void)
9334#ifdef CONFIG_CPUMASK_OFFSTACK 9447#ifdef CONFIG_CPUMASK_OFFSTACK
9335 alloc_size += num_possible_cpus() * cpumask_size(); 9448 alloc_size += num_possible_cpus() * cpumask_size();
9336#endif 9449#endif
9337 /*
9338 * As sched_init() is called before page_alloc is setup,
9339 * we use alloc_bootmem().
9340 */
9341 if (alloc_size) { 9450 if (alloc_size) {
9342 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); 9451 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
9343 9452
@@ -9406,6 +9515,10 @@ void __init sched_init(void)
9406#endif /* CONFIG_USER_SCHED */ 9515#endif /* CONFIG_USER_SCHED */
9407#endif /* CONFIG_GROUP_SCHED */ 9516#endif /* CONFIG_GROUP_SCHED */
9408 9517
9518#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
9519 update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
9520 __alignof__(unsigned long));
9521#endif
9409 for_each_possible_cpu(i) { 9522 for_each_possible_cpu(i) {
9410 struct rq *rq; 9523 struct rq *rq;
9411 9524
@@ -9488,6 +9601,8 @@ void __init sched_init(void)
9488 rq->cpu = i; 9601 rq->cpu = i;
9489 rq->online = 0; 9602 rq->online = 0;
9490 rq->migration_thread = NULL; 9603 rq->migration_thread = NULL;
9604 rq->idle_stamp = 0;
9605 rq->avg_idle = 2*sysctl_sched_migration_cost;
9491 INIT_LIST_HEAD(&rq->migration_queue); 9606 INIT_LIST_HEAD(&rq->migration_queue);
9492 rq_attach_root(rq, &def_root_domain); 9607 rq_attach_root(rq, &def_root_domain);
9493#endif 9608#endif
@@ -9531,13 +9646,15 @@ void __init sched_init(void)
9531 current->sched_class = &fair_sched_class; 9646 current->sched_class = &fair_sched_class;
9532 9647
9533 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ 9648 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
9534 alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); 9649 zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
9535#ifdef CONFIG_SMP 9650#ifdef CONFIG_SMP
9536#ifdef CONFIG_NO_HZ 9651#ifdef CONFIG_NO_HZ
9537 alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); 9652 zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
9538 alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); 9653 alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
9539#endif 9654#endif
9540 alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); 9655 /* May be allocated at isolcpus cmdline parse time */
9656 if (cpu_isolated_map == NULL)
9657 zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
9541#endif /* SMP */ 9658#endif /* SMP */
9542 9659
9543 perf_event_init(); 9660 perf_event_init();
@@ -9731,13 +9848,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
9731 se = kzalloc_node(sizeof(struct sched_entity), 9848 se = kzalloc_node(sizeof(struct sched_entity),
9732 GFP_KERNEL, cpu_to_node(i)); 9849 GFP_KERNEL, cpu_to_node(i));
9733 if (!se) 9850 if (!se)
9734 goto err; 9851 goto err_free_rq;
9735 9852
9736 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); 9853 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
9737 } 9854 }
9738 9855
9739 return 1; 9856 return 1;
9740 9857
9858 err_free_rq:
9859 kfree(cfs_rq);
9741 err: 9860 err:
9742 return 0; 9861 return 0;
9743} 9862}
@@ -9819,13 +9938,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
9819 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 9938 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
9820 GFP_KERNEL, cpu_to_node(i)); 9939 GFP_KERNEL, cpu_to_node(i));
9821 if (!rt_se) 9940 if (!rt_se)
9822 goto err; 9941 goto err_free_rq;
9823 9942
9824 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); 9943 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
9825 } 9944 }
9826 9945
9827 return 1; 9946 return 1;
9828 9947
9948 err_free_rq:
9949 kfree(rt_rq);
9829 err: 9950 err:
9830 return 0; 9951 return 0;
9831} 9952}
@@ -10867,6 +10988,7 @@ void synchronize_sched_expedited(void)
10867 spin_unlock_irqrestore(&rq->lock, flags); 10988 spin_unlock_irqrestore(&rq->lock, flags);
10868 } 10989 }
10869 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; 10990 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10991 synchronize_sched_expedited_count++;
10870 mutex_unlock(&rcu_sched_expedited_mutex); 10992 mutex_unlock(&rcu_sched_expedited_mutex);
10871 put_online_cpus(); 10993 put_online_cpus();
10872 if (need_full_sync) 10994 if (need_full_sync)