diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 1016 |
1 files changed, 591 insertions, 425 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 76c0e9691fc0..3a8fb30a91b1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -141,7 +141,7 @@ struct rt_prio_array { | |||
141 | 141 | ||
142 | struct rt_bandwidth { | 142 | struct rt_bandwidth { |
143 | /* nests inside the rq lock: */ | 143 | /* nests inside the rq lock: */ |
144 | spinlock_t rt_runtime_lock; | 144 | raw_spinlock_t rt_runtime_lock; |
145 | ktime_t rt_period; | 145 | ktime_t rt_period; |
146 | u64 rt_runtime; | 146 | u64 rt_runtime; |
147 | struct hrtimer rt_period_timer; | 147 | struct hrtimer rt_period_timer; |
@@ -178,7 +178,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
178 | rt_b->rt_period = ns_to_ktime(period); | 178 | rt_b->rt_period = ns_to_ktime(period); |
179 | rt_b->rt_runtime = runtime; | 179 | rt_b->rt_runtime = runtime; |
180 | 180 | ||
181 | spin_lock_init(&rt_b->rt_runtime_lock); | 181 | raw_spin_lock_init(&rt_b->rt_runtime_lock); |
182 | 182 | ||
183 | hrtimer_init(&rt_b->rt_period_timer, | 183 | hrtimer_init(&rt_b->rt_period_timer, |
184 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 184 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
@@ -200,7 +200,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
200 | if (hrtimer_active(&rt_b->rt_period_timer)) | 200 | if (hrtimer_active(&rt_b->rt_period_timer)) |
201 | return; | 201 | return; |
202 | 202 | ||
203 | spin_lock(&rt_b->rt_runtime_lock); | 203 | raw_spin_lock(&rt_b->rt_runtime_lock); |
204 | for (;;) { | 204 | for (;;) { |
205 | unsigned long delta; | 205 | unsigned long delta; |
206 | ktime_t soft, hard; | 206 | ktime_t soft, hard; |
@@ -217,7 +217,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
217 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | 217 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, |
218 | HRTIMER_MODE_ABS_PINNED, 0); | 218 | HRTIMER_MODE_ABS_PINNED, 0); |
219 | } | 219 | } |
220 | spin_unlock(&rt_b->rt_runtime_lock); | 220 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
221 | } | 221 | } |
222 | 222 | ||
223 | #ifdef CONFIG_RT_GROUP_SCHED | 223 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -298,7 +298,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq); | |||
298 | 298 | ||
299 | #ifdef CONFIG_RT_GROUP_SCHED | 299 | #ifdef CONFIG_RT_GROUP_SCHED |
300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); | 301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var); |
302 | #endif /* CONFIG_RT_GROUP_SCHED */ | 302 | #endif /* CONFIG_RT_GROUP_SCHED */ |
303 | #else /* !CONFIG_USER_SCHED */ | 303 | #else /* !CONFIG_USER_SCHED */ |
304 | #define root_task_group init_task_group | 304 | #define root_task_group init_task_group |
@@ -309,6 +309,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); | |||
309 | */ | 309 | */ |
310 | static DEFINE_SPINLOCK(task_group_lock); | 310 | static DEFINE_SPINLOCK(task_group_lock); |
311 | 311 | ||
312 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
313 | |||
312 | #ifdef CONFIG_SMP | 314 | #ifdef CONFIG_SMP |
313 | static int root_task_group_empty(void) | 315 | static int root_task_group_empty(void) |
314 | { | 316 | { |
@@ -316,7 +318,6 @@ static int root_task_group_empty(void) | |||
316 | } | 318 | } |
317 | #endif | 319 | #endif |
318 | 320 | ||
319 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
320 | #ifdef CONFIG_USER_SCHED | 321 | #ifdef CONFIG_USER_SCHED |
321 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 322 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
322 | #else /* !CONFIG_USER_SCHED */ | 323 | #else /* !CONFIG_USER_SCHED */ |
@@ -469,7 +470,7 @@ struct rt_rq { | |||
469 | u64 rt_time; | 470 | u64 rt_time; |
470 | u64 rt_runtime; | 471 | u64 rt_runtime; |
471 | /* Nests inside the rq lock: */ | 472 | /* Nests inside the rq lock: */ |
472 | spinlock_t rt_runtime_lock; | 473 | raw_spinlock_t rt_runtime_lock; |
473 | 474 | ||
474 | #ifdef CONFIG_RT_GROUP_SCHED | 475 | #ifdef CONFIG_RT_GROUP_SCHED |
475 | unsigned long rt_nr_boosted; | 476 | unsigned long rt_nr_boosted; |
@@ -524,7 +525,7 @@ static struct root_domain def_root_domain; | |||
524 | */ | 525 | */ |
525 | struct rq { | 526 | struct rq { |
526 | /* runqueue lock: */ | 527 | /* runqueue lock: */ |
527 | spinlock_t lock; | 528 | raw_spinlock_t lock; |
528 | 529 | ||
529 | /* | 530 | /* |
530 | * nr_running and cpu_load should be in the same cacheline because | 531 | * nr_running and cpu_load should be in the same cacheline because |
@@ -534,14 +535,12 @@ struct rq { | |||
534 | #define CPU_LOAD_IDX_MAX 5 | 535 | #define CPU_LOAD_IDX_MAX 5 |
535 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
536 | #ifdef CONFIG_NO_HZ | 537 | #ifdef CONFIG_NO_HZ |
537 | unsigned long last_tick_seen; | ||
538 | unsigned char in_nohz_recently; | 538 | unsigned char in_nohz_recently; |
539 | #endif | 539 | #endif |
540 | /* capture load from *all* tasks on this cpu: */ | 540 | /* capture load from *all* tasks on this cpu: */ |
541 | struct load_weight load; | 541 | struct load_weight load; |
542 | unsigned long nr_load_updates; | 542 | unsigned long nr_load_updates; |
543 | u64 nr_switches; | 543 | u64 nr_switches; |
544 | u64 nr_migrations_in; | ||
545 | 544 | ||
546 | struct cfs_rq cfs; | 545 | struct cfs_rq cfs; |
547 | struct rt_rq rt; | 546 | struct rt_rq rt; |
@@ -590,6 +589,8 @@ struct rq { | |||
590 | 589 | ||
591 | u64 rt_avg; | 590 | u64 rt_avg; |
592 | u64 age_stamp; | 591 | u64 age_stamp; |
592 | u64 idle_stamp; | ||
593 | u64 avg_idle; | ||
593 | #endif | 594 | #endif |
594 | 595 | ||
595 | /* calc_load related fields */ | 596 | /* calc_load related fields */ |
@@ -676,6 +677,7 @@ inline void update_rq_clock(struct rq *rq) | |||
676 | 677 | ||
677 | /** | 678 | /** |
678 | * runqueue_is_locked | 679 | * runqueue_is_locked |
680 | * @cpu: the processor in question. | ||
679 | * | 681 | * |
680 | * Returns true if the current cpu runqueue is locked. | 682 | * Returns true if the current cpu runqueue is locked. |
681 | * This interface allows printk to be called with the runqueue lock | 683 | * This interface allows printk to be called with the runqueue lock |
@@ -683,7 +685,7 @@ inline void update_rq_clock(struct rq *rq) | |||
683 | */ | 685 | */ |
684 | int runqueue_is_locked(int cpu) | 686 | int runqueue_is_locked(int cpu) |
685 | { | 687 | { |
686 | return spin_is_locked(&cpu_rq(cpu)->lock); | 688 | return raw_spin_is_locked(&cpu_rq(cpu)->lock); |
687 | } | 689 | } |
688 | 690 | ||
689 | /* | 691 | /* |
@@ -770,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
770 | if (!sched_feat_names[i]) | 772 | if (!sched_feat_names[i]) |
771 | return -EINVAL; | 773 | return -EINVAL; |
772 | 774 | ||
773 | filp->f_pos += cnt; | 775 | *ppos += cnt; |
774 | 776 | ||
775 | return cnt; | 777 | return cnt; |
776 | } | 778 | } |
@@ -812,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
812 | * default: 0.25ms | 814 | * default: 0.25ms |
813 | */ | 815 | */ |
814 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
817 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
815 | 818 | ||
816 | /* | 819 | /* |
817 | * Inject some fuzzyness into changing the per-cpu group shares | 820 | * Inject some fuzzyness into changing the per-cpu group shares |
@@ -890,7 +893,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
890 | */ | 893 | */ |
891 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); | 894 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); |
892 | 895 | ||
893 | spin_unlock_irq(&rq->lock); | 896 | raw_spin_unlock_irq(&rq->lock); |
894 | } | 897 | } |
895 | 898 | ||
896 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | 899 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ |
@@ -914,9 +917,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
914 | next->oncpu = 1; | 917 | next->oncpu = 1; |
915 | #endif | 918 | #endif |
916 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 919 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
917 | spin_unlock_irq(&rq->lock); | 920 | raw_spin_unlock_irq(&rq->lock); |
918 | #else | 921 | #else |
919 | spin_unlock(&rq->lock); | 922 | raw_spin_unlock(&rq->lock); |
920 | #endif | 923 | #endif |
921 | } | 924 | } |
922 | 925 | ||
@@ -946,10 +949,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
946 | { | 949 | { |
947 | for (;;) { | 950 | for (;;) { |
948 | struct rq *rq = task_rq(p); | 951 | struct rq *rq = task_rq(p); |
949 | spin_lock(&rq->lock); | 952 | raw_spin_lock(&rq->lock); |
950 | if (likely(rq == task_rq(p))) | 953 | if (likely(rq == task_rq(p))) |
951 | return rq; | 954 | return rq; |
952 | spin_unlock(&rq->lock); | 955 | raw_spin_unlock(&rq->lock); |
953 | } | 956 | } |
954 | } | 957 | } |
955 | 958 | ||
@@ -966,10 +969,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
966 | for (;;) { | 969 | for (;;) { |
967 | local_irq_save(*flags); | 970 | local_irq_save(*flags); |
968 | rq = task_rq(p); | 971 | rq = task_rq(p); |
969 | spin_lock(&rq->lock); | 972 | raw_spin_lock(&rq->lock); |
970 | if (likely(rq == task_rq(p))) | 973 | if (likely(rq == task_rq(p))) |
971 | return rq; | 974 | return rq; |
972 | spin_unlock_irqrestore(&rq->lock, *flags); | 975 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
973 | } | 976 | } |
974 | } | 977 | } |
975 | 978 | ||
@@ -978,19 +981,19 @@ void task_rq_unlock_wait(struct task_struct *p) | |||
978 | struct rq *rq = task_rq(p); | 981 | struct rq *rq = task_rq(p); |
979 | 982 | ||
980 | smp_mb(); /* spin-unlock-wait is not a full memory barrier */ | 983 | smp_mb(); /* spin-unlock-wait is not a full memory barrier */ |
981 | spin_unlock_wait(&rq->lock); | 984 | raw_spin_unlock_wait(&rq->lock); |
982 | } | 985 | } |
983 | 986 | ||
984 | static void __task_rq_unlock(struct rq *rq) | 987 | static void __task_rq_unlock(struct rq *rq) |
985 | __releases(rq->lock) | 988 | __releases(rq->lock) |
986 | { | 989 | { |
987 | spin_unlock(&rq->lock); | 990 | raw_spin_unlock(&rq->lock); |
988 | } | 991 | } |
989 | 992 | ||
990 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | 993 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) |
991 | __releases(rq->lock) | 994 | __releases(rq->lock) |
992 | { | 995 | { |
993 | spin_unlock_irqrestore(&rq->lock, *flags); | 996 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
994 | } | 997 | } |
995 | 998 | ||
996 | /* | 999 | /* |
@@ -1003,7 +1006,7 @@ static struct rq *this_rq_lock(void) | |||
1003 | 1006 | ||
1004 | local_irq_disable(); | 1007 | local_irq_disable(); |
1005 | rq = this_rq(); | 1008 | rq = this_rq(); |
1006 | spin_lock(&rq->lock); | 1009 | raw_spin_lock(&rq->lock); |
1007 | 1010 | ||
1008 | return rq; | 1011 | return rq; |
1009 | } | 1012 | } |
@@ -1050,10 +1053,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
1050 | 1053 | ||
1051 | WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); | 1054 | WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); |
1052 | 1055 | ||
1053 | spin_lock(&rq->lock); | 1056 | raw_spin_lock(&rq->lock); |
1054 | update_rq_clock(rq); | 1057 | update_rq_clock(rq); |
1055 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); | 1058 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); |
1056 | spin_unlock(&rq->lock); | 1059 | raw_spin_unlock(&rq->lock); |
1057 | 1060 | ||
1058 | return HRTIMER_NORESTART; | 1061 | return HRTIMER_NORESTART; |
1059 | } | 1062 | } |
@@ -1066,10 +1069,10 @@ static void __hrtick_start(void *arg) | |||
1066 | { | 1069 | { |
1067 | struct rq *rq = arg; | 1070 | struct rq *rq = arg; |
1068 | 1071 | ||
1069 | spin_lock(&rq->lock); | 1072 | raw_spin_lock(&rq->lock); |
1070 | hrtimer_restart(&rq->hrtick_timer); | 1073 | hrtimer_restart(&rq->hrtick_timer); |
1071 | rq->hrtick_csd_pending = 0; | 1074 | rq->hrtick_csd_pending = 0; |
1072 | spin_unlock(&rq->lock); | 1075 | raw_spin_unlock(&rq->lock); |
1073 | } | 1076 | } |
1074 | 1077 | ||
1075 | /* | 1078 | /* |
@@ -1176,7 +1179,7 @@ static void resched_task(struct task_struct *p) | |||
1176 | { | 1179 | { |
1177 | int cpu; | 1180 | int cpu; |
1178 | 1181 | ||
1179 | assert_spin_locked(&task_rq(p)->lock); | 1182 | assert_raw_spin_locked(&task_rq(p)->lock); |
1180 | 1183 | ||
1181 | if (test_tsk_need_resched(p)) | 1184 | if (test_tsk_need_resched(p)) |
1182 | return; | 1185 | return; |
@@ -1198,10 +1201,10 @@ static void resched_cpu(int cpu) | |||
1198 | struct rq *rq = cpu_rq(cpu); | 1201 | struct rq *rq = cpu_rq(cpu); |
1199 | unsigned long flags; | 1202 | unsigned long flags; |
1200 | 1203 | ||
1201 | if (!spin_trylock_irqsave(&rq->lock, flags)) | 1204 | if (!raw_spin_trylock_irqsave(&rq->lock, flags)) |
1202 | return; | 1205 | return; |
1203 | resched_task(cpu_curr(cpu)); | 1206 | resched_task(cpu_curr(cpu)); |
1204 | spin_unlock_irqrestore(&rq->lock, flags); | 1207 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1205 | } | 1208 | } |
1206 | 1209 | ||
1207 | #ifdef CONFIG_NO_HZ | 1210 | #ifdef CONFIG_NO_HZ |
@@ -1270,7 +1273,7 @@ static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | |||
1270 | #else /* !CONFIG_SMP */ | 1273 | #else /* !CONFIG_SMP */ |
1271 | static void resched_task(struct task_struct *p) | 1274 | static void resched_task(struct task_struct *p) |
1272 | { | 1275 | { |
1273 | assert_spin_locked(&task_rq(p)->lock); | 1276 | assert_raw_spin_locked(&task_rq(p)->lock); |
1274 | set_tsk_need_resched(p); | 1277 | set_tsk_need_resched(p); |
1275 | } | 1278 | } |
1276 | 1279 | ||
@@ -1563,11 +1566,7 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
1563 | 1566 | ||
1564 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1567 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1565 | 1568 | ||
1566 | struct update_shares_data { | 1569 | static __read_mostly unsigned long *update_shares_data; |
1567 | unsigned long rq_weight[NR_CPUS]; | ||
1568 | }; | ||
1569 | |||
1570 | static DEFINE_PER_CPU(struct update_shares_data, update_shares_data); | ||
1571 | 1570 | ||
1572 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | 1571 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); |
1573 | 1572 | ||
@@ -1577,12 +1576,12 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares); | |||
1577 | static void update_group_shares_cpu(struct task_group *tg, int cpu, | 1576 | static void update_group_shares_cpu(struct task_group *tg, int cpu, |
1578 | unsigned long sd_shares, | 1577 | unsigned long sd_shares, |
1579 | unsigned long sd_rq_weight, | 1578 | unsigned long sd_rq_weight, |
1580 | struct update_shares_data *usd) | 1579 | unsigned long *usd_rq_weight) |
1581 | { | 1580 | { |
1582 | unsigned long shares, rq_weight; | 1581 | unsigned long shares, rq_weight; |
1583 | int boost = 0; | 1582 | int boost = 0; |
1584 | 1583 | ||
1585 | rq_weight = usd->rq_weight[cpu]; | 1584 | rq_weight = usd_rq_weight[cpu]; |
1586 | if (!rq_weight) { | 1585 | if (!rq_weight) { |
1587 | boost = 1; | 1586 | boost = 1; |
1588 | rq_weight = NICE_0_LOAD; | 1587 | rq_weight = NICE_0_LOAD; |
@@ -1601,11 +1600,11 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1601 | struct rq *rq = cpu_rq(cpu); | 1600 | struct rq *rq = cpu_rq(cpu); |
1602 | unsigned long flags; | 1601 | unsigned long flags; |
1603 | 1602 | ||
1604 | spin_lock_irqsave(&rq->lock, flags); | 1603 | raw_spin_lock_irqsave(&rq->lock, flags); |
1605 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; | 1604 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; |
1606 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1605 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; |
1607 | __set_se_shares(tg->se[cpu], shares); | 1606 | __set_se_shares(tg->se[cpu], shares); |
1608 | spin_unlock_irqrestore(&rq->lock, flags); | 1607 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1609 | } | 1608 | } |
1610 | } | 1609 | } |
1611 | 1610 | ||
@@ -1616,8 +1615,8 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1616 | */ | 1615 | */ |
1617 | static int tg_shares_up(struct task_group *tg, void *data) | 1616 | static int tg_shares_up(struct task_group *tg, void *data) |
1618 | { | 1617 | { |
1619 | unsigned long weight, rq_weight = 0, shares = 0; | 1618 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; |
1620 | struct update_shares_data *usd; | 1619 | unsigned long *usd_rq_weight; |
1621 | struct sched_domain *sd = data; | 1620 | struct sched_domain *sd = data; |
1622 | unsigned long flags; | 1621 | unsigned long flags; |
1623 | int i; | 1622 | int i; |
@@ -1626,12 +1625,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1626 | return 0; | 1625 | return 0; |
1627 | 1626 | ||
1628 | local_irq_save(flags); | 1627 | local_irq_save(flags); |
1629 | usd = &__get_cpu_var(update_shares_data); | 1628 | usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); |
1630 | 1629 | ||
1631 | for_each_cpu(i, sched_domain_span(sd)) { | 1630 | for_each_cpu(i, sched_domain_span(sd)) { |
1632 | weight = tg->cfs_rq[i]->load.weight; | 1631 | weight = tg->cfs_rq[i]->load.weight; |
1633 | usd->rq_weight[i] = weight; | 1632 | usd_rq_weight[i] = weight; |
1634 | 1633 | ||
1634 | rq_weight += weight; | ||
1635 | /* | 1635 | /* |
1636 | * If there are currently no tasks on the cpu pretend there | 1636 | * If there are currently no tasks on the cpu pretend there |
1637 | * is one of average load so that when a new task gets to | 1637 | * is one of average load so that when a new task gets to |
@@ -1640,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1640 | if (!weight) | 1640 | if (!weight) |
1641 | weight = NICE_0_LOAD; | 1641 | weight = NICE_0_LOAD; |
1642 | 1642 | ||
1643 | rq_weight += weight; | 1643 | sum_weight += weight; |
1644 | shares += tg->cfs_rq[i]->shares; | 1644 | shares += tg->cfs_rq[i]->shares; |
1645 | } | 1645 | } |
1646 | 1646 | ||
1647 | if (!rq_weight) | ||
1648 | rq_weight = sum_weight; | ||
1649 | |||
1647 | if ((!shares && rq_weight) || shares > tg->shares) | 1650 | if ((!shares && rq_weight) || shares > tg->shares) |
1648 | shares = tg->shares; | 1651 | shares = tg->shares; |
1649 | 1652 | ||
@@ -1651,7 +1654,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1651 | shares = tg->shares; | 1654 | shares = tg->shares; |
1652 | 1655 | ||
1653 | for_each_cpu(i, sched_domain_span(sd)) | 1656 | for_each_cpu(i, sched_domain_span(sd)) |
1654 | update_group_shares_cpu(tg, i, shares, rq_weight, usd); | 1657 | update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); |
1655 | 1658 | ||
1656 | local_irq_restore(flags); | 1659 | local_irq_restore(flags); |
1657 | 1660 | ||
@@ -1703,9 +1706,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1703 | if (root_task_group_empty()) | 1706 | if (root_task_group_empty()) |
1704 | return; | 1707 | return; |
1705 | 1708 | ||
1706 | spin_unlock(&rq->lock); | 1709 | raw_spin_unlock(&rq->lock); |
1707 | update_shares(sd); | 1710 | update_shares(sd); |
1708 | spin_lock(&rq->lock); | 1711 | raw_spin_lock(&rq->lock); |
1709 | } | 1712 | } |
1710 | 1713 | ||
1711 | static void update_h_load(long cpu) | 1714 | static void update_h_load(long cpu) |
@@ -1745,7 +1748,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1745 | __acquires(busiest->lock) | 1748 | __acquires(busiest->lock) |
1746 | __acquires(this_rq->lock) | 1749 | __acquires(this_rq->lock) |
1747 | { | 1750 | { |
1748 | spin_unlock(&this_rq->lock); | 1751 | raw_spin_unlock(&this_rq->lock); |
1749 | double_rq_lock(this_rq, busiest); | 1752 | double_rq_lock(this_rq, busiest); |
1750 | 1753 | ||
1751 | return 1; | 1754 | return 1; |
@@ -1766,14 +1769,16 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1766 | { | 1769 | { |
1767 | int ret = 0; | 1770 | int ret = 0; |
1768 | 1771 | ||
1769 | if (unlikely(!spin_trylock(&busiest->lock))) { | 1772 | if (unlikely(!raw_spin_trylock(&busiest->lock))) { |
1770 | if (busiest < this_rq) { | 1773 | if (busiest < this_rq) { |
1771 | spin_unlock(&this_rq->lock); | 1774 | raw_spin_unlock(&this_rq->lock); |
1772 | spin_lock(&busiest->lock); | 1775 | raw_spin_lock(&busiest->lock); |
1773 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); | 1776 | raw_spin_lock_nested(&this_rq->lock, |
1777 | SINGLE_DEPTH_NESTING); | ||
1774 | ret = 1; | 1778 | ret = 1; |
1775 | } else | 1779 | } else |
1776 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); | 1780 | raw_spin_lock_nested(&busiest->lock, |
1781 | SINGLE_DEPTH_NESTING); | ||
1777 | } | 1782 | } |
1778 | return ret; | 1783 | return ret; |
1779 | } | 1784 | } |
@@ -1787,7 +1792,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1787 | { | 1792 | { |
1788 | if (unlikely(!irqs_disabled())) { | 1793 | if (unlikely(!irqs_disabled())) { |
1789 | /* printk() doesn't work good under rq->lock */ | 1794 | /* printk() doesn't work good under rq->lock */ |
1790 | spin_unlock(&this_rq->lock); | 1795 | raw_spin_unlock(&this_rq->lock); |
1791 | BUG_ON(1); | 1796 | BUG_ON(1); |
1792 | } | 1797 | } |
1793 | 1798 | ||
@@ -1797,7 +1802,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1797 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | 1802 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) |
1798 | __releases(busiest->lock) | 1803 | __releases(busiest->lock) |
1799 | { | 1804 | { |
1800 | spin_unlock(&busiest->lock); | 1805 | raw_spin_unlock(&busiest->lock); |
1801 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | 1806 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); |
1802 | } | 1807 | } |
1803 | #endif | 1808 | #endif |
@@ -1812,6 +1817,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1812 | #endif | 1817 | #endif |
1813 | 1818 | ||
1814 | static void calc_load_account_active(struct rq *this_rq); | 1819 | static void calc_load_account_active(struct rq *this_rq); |
1820 | static void update_sysctl(void); | ||
1821 | static int get_update_sysctl_factor(void); | ||
1822 | |||
1823 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1824 | { | ||
1825 | set_task_rq(p, cpu); | ||
1826 | #ifdef CONFIG_SMP | ||
1827 | /* | ||
1828 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1829 | * successfuly executed on another CPU. We must ensure that updates of | ||
1830 | * per-task data have been completed by this moment. | ||
1831 | */ | ||
1832 | smp_wmb(); | ||
1833 | task_thread_info(p)->cpu = cpu; | ||
1834 | #endif | ||
1835 | } | ||
1815 | 1836 | ||
1816 | #include "sched_stats.h" | 1837 | #include "sched_stats.h" |
1817 | #include "sched_idletask.c" | 1838 | #include "sched_idletask.c" |
@@ -1969,20 +1990,6 @@ inline int task_curr(const struct task_struct *p) | |||
1969 | return cpu_curr(task_cpu(p)) == p; | 1990 | return cpu_curr(task_cpu(p)) == p; |
1970 | } | 1991 | } |
1971 | 1992 | ||
1972 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1973 | { | ||
1974 | set_task_rq(p, cpu); | ||
1975 | #ifdef CONFIG_SMP | ||
1976 | /* | ||
1977 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1978 | * successfuly executed on another CPU. We must ensure that updates of | ||
1979 | * per-task data have been completed by this moment. | ||
1980 | */ | ||
1981 | smp_wmb(); | ||
1982 | task_thread_info(p)->cpu = cpu; | ||
1983 | #endif | ||
1984 | } | ||
1985 | |||
1986 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1993 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1987 | const struct sched_class *prev_class, | 1994 | const struct sched_class *prev_class, |
1988 | int oldprio, int running) | 1995 | int oldprio, int running) |
@@ -2004,17 +2011,17 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2004 | { | 2011 | { |
2005 | s64 delta; | 2012 | s64 delta; |
2006 | 2013 | ||
2014 | if (p->sched_class != &fair_sched_class) | ||
2015 | return 0; | ||
2016 | |||
2007 | /* | 2017 | /* |
2008 | * Buddy candidates are cache hot: | 2018 | * Buddy candidates are cache hot: |
2009 | */ | 2019 | */ |
2010 | if (sched_feat(CACHE_HOT_BUDDY) && | 2020 | if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && |
2011 | (&p->se == cfs_rq_of(&p->se)->next || | 2021 | (&p->se == cfs_rq_of(&p->se)->next || |
2012 | &p->se == cfs_rq_of(&p->se)->last)) | 2022 | &p->se == cfs_rq_of(&p->se)->last)) |
2013 | return 1; | 2023 | return 1; |
2014 | 2024 | ||
2015 | if (p->sched_class != &fair_sched_class) | ||
2016 | return 0; | ||
2017 | |||
2018 | if (sysctl_sched_migration_cost == -1) | 2025 | if (sysctl_sched_migration_cost == -1) |
2019 | return 1; | 2026 | return 1; |
2020 | if (sysctl_sched_migration_cost == 0) | 2027 | if (sysctl_sched_migration_cost == 0) |
@@ -2025,39 +2032,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2025 | return delta < (s64)sysctl_sched_migration_cost; | 2032 | return delta < (s64)sysctl_sched_migration_cost; |
2026 | } | 2033 | } |
2027 | 2034 | ||
2028 | |||
2029 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2035 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2030 | { | 2036 | { |
2031 | int old_cpu = task_cpu(p); | 2037 | #ifdef CONFIG_SCHED_DEBUG |
2032 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | 2038 | /* |
2033 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2039 | * We should never call set_task_cpu() on a blocked task, |
2034 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2040 | * ttwu() will sort out the placement. |
2035 | u64 clock_offset; | 2041 | */ |
2036 | 2042 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | |
2037 | clock_offset = old_rq->clock - new_rq->clock; | 2043 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
2044 | #endif | ||
2038 | 2045 | ||
2039 | trace_sched_migrate_task(p, new_cpu); | 2046 | trace_sched_migrate_task(p, new_cpu); |
2040 | 2047 | ||
2041 | #ifdef CONFIG_SCHEDSTATS | 2048 | if (task_cpu(p) != new_cpu) { |
2042 | if (p->se.wait_start) | ||
2043 | p->se.wait_start -= clock_offset; | ||
2044 | if (p->se.sleep_start) | ||
2045 | p->se.sleep_start -= clock_offset; | ||
2046 | if (p->se.block_start) | ||
2047 | p->se.block_start -= clock_offset; | ||
2048 | #endif | ||
2049 | if (old_cpu != new_cpu) { | ||
2050 | p->se.nr_migrations++; | 2049 | p->se.nr_migrations++; |
2051 | new_rq->nr_migrations_in++; | 2050 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); |
2052 | #ifdef CONFIG_SCHEDSTATS | ||
2053 | if (task_hot(p, old_rq->clock, NULL)) | ||
2054 | schedstat_inc(p, se.nr_forced2_migrations); | ||
2055 | #endif | ||
2056 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | ||
2057 | 1, 1, NULL, 0); | ||
2058 | } | 2051 | } |
2059 | p->se.vruntime -= old_cfsrq->min_vruntime - | ||
2060 | new_cfsrq->min_vruntime; | ||
2061 | 2052 | ||
2062 | __set_task_cpu(p, new_cpu); | 2053 | __set_task_cpu(p, new_cpu); |
2063 | } | 2054 | } |
@@ -2082,12 +2073,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2082 | 2073 | ||
2083 | /* | 2074 | /* |
2084 | * If the task is not on a runqueue (and not running), then | 2075 | * If the task is not on a runqueue (and not running), then |
2085 | * it is sufficient to simply update the task's cpu field. | 2076 | * the next wake-up will properly place the task. |
2086 | */ | 2077 | */ |
2087 | if (!p->se.on_rq && !task_running(rq, p)) { | 2078 | if (!p->se.on_rq && !task_running(rq, p)) |
2088 | set_task_cpu(p, dest_cpu); | ||
2089 | return 0; | 2079 | return 0; |
2090 | } | ||
2091 | 2080 | ||
2092 | init_completion(&req->done); | 2081 | init_completion(&req->done); |
2093 | req->task = p; | 2082 | req->task = p; |
@@ -2292,6 +2281,75 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2292 | preempt_enable(); | 2281 | preempt_enable(); |
2293 | } | 2282 | } |
2294 | 2283 | ||
2284 | #ifdef CONFIG_SMP | ||
2285 | static int select_fallback_rq(int cpu, struct task_struct *p) | ||
2286 | { | ||
2287 | int dest_cpu; | ||
2288 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | ||
2289 | |||
2290 | /* Look for allowed, online CPU in same node. */ | ||
2291 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | ||
2292 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
2293 | return dest_cpu; | ||
2294 | |||
2295 | /* Any allowed, online CPU? */ | ||
2296 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | ||
2297 | if (dest_cpu < nr_cpu_ids) | ||
2298 | return dest_cpu; | ||
2299 | |||
2300 | /* No more Mr. Nice Guy. */ | ||
2301 | if (dest_cpu >= nr_cpu_ids) { | ||
2302 | rcu_read_lock(); | ||
2303 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
2304 | rcu_read_unlock(); | ||
2305 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | ||
2306 | |||
2307 | /* | ||
2308 | * Don't tell them about moving exiting tasks or | ||
2309 | * kernel threads (both mm NULL), since they never | ||
2310 | * leave kernel. | ||
2311 | */ | ||
2312 | if (p->mm && printk_ratelimit()) { | ||
2313 | printk(KERN_INFO "process %d (%s) no " | ||
2314 | "longer affine to cpu%d\n", | ||
2315 | task_pid_nr(p), p->comm, cpu); | ||
2316 | } | ||
2317 | } | ||
2318 | |||
2319 | return dest_cpu; | ||
2320 | } | ||
2321 | |||
2322 | /* | ||
2323 | * Gets called from 3 sites (exec, fork, wakeup), since it is called without | ||
2324 | * holding rq->lock we need to ensure ->cpus_allowed is stable, this is done | ||
2325 | * by: | ||
2326 | * | ||
2327 | * exec: is unstable, retry loop | ||
2328 | * fork & wake-up: serialize ->cpus_allowed against TASK_WAKING | ||
2329 | */ | ||
2330 | static inline | ||
2331 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | ||
2332 | { | ||
2333 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); | ||
2334 | |||
2335 | /* | ||
2336 | * In order not to call set_task_cpu() on a blocking task we need | ||
2337 | * to rely on ttwu() to place the task on a valid ->cpus_allowed | ||
2338 | * cpu. | ||
2339 | * | ||
2340 | * Since this is common to all placement strategies, this lives here. | ||
2341 | * | ||
2342 | * [ this allows ->select_task() to simply return task_cpu(p) and | ||
2343 | * not worry about this generic constraint ] | ||
2344 | */ | ||
2345 | if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || | ||
2346 | !cpu_online(cpu))) | ||
2347 | cpu = select_fallback_rq(task_cpu(p), p); | ||
2348 | |||
2349 | return cpu; | ||
2350 | } | ||
2351 | #endif | ||
2352 | |||
2295 | /*** | 2353 | /*** |
2296 | * try_to_wake_up - wake up a thread | 2354 | * try_to_wake_up - wake up a thread |
2297 | * @p: the to-be-woken-up thread | 2355 | * @p: the to-be-woken-up thread |
@@ -2311,7 +2369,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2311 | { | 2369 | { |
2312 | int cpu, orig_cpu, this_cpu, success = 0; | 2370 | int cpu, orig_cpu, this_cpu, success = 0; |
2313 | unsigned long flags; | 2371 | unsigned long flags; |
2314 | struct rq *rq; | 2372 | struct rq *rq, *orig_rq; |
2315 | 2373 | ||
2316 | if (!sched_feat(SYNC_WAKEUPS)) | 2374 | if (!sched_feat(SYNC_WAKEUPS)) |
2317 | wake_flags &= ~WF_SYNC; | 2375 | wake_flags &= ~WF_SYNC; |
@@ -2319,7 +2377,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2319 | this_cpu = get_cpu(); | 2377 | this_cpu = get_cpu(); |
2320 | 2378 | ||
2321 | smp_wmb(); | 2379 | smp_wmb(); |
2322 | rq = task_rq_lock(p, &flags); | 2380 | rq = orig_rq = task_rq_lock(p, &flags); |
2323 | update_rq_clock(rq); | 2381 | update_rq_clock(rq); |
2324 | if (!(p->state & state)) | 2382 | if (!(p->state & state)) |
2325 | goto out; | 2383 | goto out; |
@@ -2343,13 +2401,19 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2343 | if (task_contributes_to_load(p)) | 2401 | if (task_contributes_to_load(p)) |
2344 | rq->nr_uninterruptible--; | 2402 | rq->nr_uninterruptible--; |
2345 | p->state = TASK_WAKING; | 2403 | p->state = TASK_WAKING; |
2346 | task_rq_unlock(rq, &flags); | ||
2347 | 2404 | ||
2348 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2405 | if (p->sched_class->task_waking) |
2406 | p->sched_class->task_waking(rq, p); | ||
2407 | |||
2408 | __task_rq_unlock(rq); | ||
2409 | |||
2410 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
2349 | if (cpu != orig_cpu) | 2411 | if (cpu != orig_cpu) |
2350 | set_task_cpu(p, cpu); | 2412 | set_task_cpu(p, cpu); |
2351 | 2413 | ||
2352 | rq = task_rq_lock(p, &flags); | 2414 | rq = __task_rq_lock(p); |
2415 | update_rq_clock(rq); | ||
2416 | |||
2353 | WARN_ON(p->state != TASK_WAKING); | 2417 | WARN_ON(p->state != TASK_WAKING); |
2354 | cpu = task_cpu(p); | 2418 | cpu = task_cpu(p); |
2355 | 2419 | ||
@@ -2404,8 +2468,19 @@ out_running: | |||
2404 | 2468 | ||
2405 | p->state = TASK_RUNNING; | 2469 | p->state = TASK_RUNNING; |
2406 | #ifdef CONFIG_SMP | 2470 | #ifdef CONFIG_SMP |
2407 | if (p->sched_class->task_wake_up) | 2471 | if (p->sched_class->task_woken) |
2408 | p->sched_class->task_wake_up(rq, p); | 2472 | p->sched_class->task_woken(rq, p); |
2473 | |||
2474 | if (unlikely(rq->idle_stamp)) { | ||
2475 | u64 delta = rq->clock - rq->idle_stamp; | ||
2476 | u64 max = 2*sysctl_sched_migration_cost; | ||
2477 | |||
2478 | if (delta > max) | ||
2479 | rq->avg_idle = max; | ||
2480 | else | ||
2481 | update_avg(&rq->avg_idle, delta); | ||
2482 | rq->idle_stamp = 0; | ||
2483 | } | ||
2409 | #endif | 2484 | #endif |
2410 | out: | 2485 | out: |
2411 | task_rq_unlock(rq, &flags); | 2486 | task_rq_unlock(rq, &flags); |
@@ -2452,7 +2527,6 @@ static void __sched_fork(struct task_struct *p) | |||
2452 | p->se.avg_overlap = 0; | 2527 | p->se.avg_overlap = 0; |
2453 | p->se.start_runtime = 0; | 2528 | p->se.start_runtime = 0; |
2454 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2529 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2455 | p->se.avg_running = 0; | ||
2456 | 2530 | ||
2457 | #ifdef CONFIG_SCHEDSTATS | 2531 | #ifdef CONFIG_SCHEDSTATS |
2458 | p->se.wait_start = 0; | 2532 | p->se.wait_start = 0; |
@@ -2474,7 +2548,6 @@ static void __sched_fork(struct task_struct *p) | |||
2474 | p->se.nr_failed_migrations_running = 0; | 2548 | p->se.nr_failed_migrations_running = 0; |
2475 | p->se.nr_failed_migrations_hot = 0; | 2549 | p->se.nr_failed_migrations_hot = 0; |
2476 | p->se.nr_forced_migrations = 0; | 2550 | p->se.nr_forced_migrations = 0; |
2477 | p->se.nr_forced2_migrations = 0; | ||
2478 | 2551 | ||
2479 | p->se.nr_wakeups = 0; | 2552 | p->se.nr_wakeups = 0; |
2480 | p->se.nr_wakeups_sync = 0; | 2553 | p->se.nr_wakeups_sync = 0; |
@@ -2495,14 +2568,6 @@ static void __sched_fork(struct task_struct *p) | |||
2495 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2568 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2496 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2569 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
2497 | #endif | 2570 | #endif |
2498 | |||
2499 | /* | ||
2500 | * We mark the process as running here, but have not actually | ||
2501 | * inserted it onto the runqueue yet. This guarantees that | ||
2502 | * nobody will actually run it, and a signal or other external | ||
2503 | * event cannot wake it up and insert it on the runqueue either. | ||
2504 | */ | ||
2505 | p->state = TASK_RUNNING; | ||
2506 | } | 2571 | } |
2507 | 2572 | ||
2508 | /* | 2573 | /* |
@@ -2513,6 +2578,12 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2513 | int cpu = get_cpu(); | 2578 | int cpu = get_cpu(); |
2514 | 2579 | ||
2515 | __sched_fork(p); | 2580 | __sched_fork(p); |
2581 | /* | ||
2582 | * We mark the process as waking here. This guarantees that | ||
2583 | * nobody will actually run it, and a signal or other external | ||
2584 | * event cannot wake it up and insert it on the runqueue either. | ||
2585 | */ | ||
2586 | p->state = TASK_WAKING; | ||
2516 | 2587 | ||
2517 | /* | 2588 | /* |
2518 | * Revert to default priority/policy on fork if requested. | 2589 | * Revert to default priority/policy on fork if requested. |
@@ -2544,9 +2615,9 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2544 | if (!rt_prio(p->prio)) | 2615 | if (!rt_prio(p->prio)) |
2545 | p->sched_class = &fair_sched_class; | 2616 | p->sched_class = &fair_sched_class; |
2546 | 2617 | ||
2547 | #ifdef CONFIG_SMP | 2618 | if (p->sched_class->task_fork) |
2548 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2619 | p->sched_class->task_fork(p); |
2549 | #endif | 2620 | |
2550 | set_task_cpu(p, cpu); | 2621 | set_task_cpu(p, cpu); |
2551 | 2622 | ||
2552 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2623 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
@@ -2576,28 +2647,35 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2576 | { | 2647 | { |
2577 | unsigned long flags; | 2648 | unsigned long flags; |
2578 | struct rq *rq; | 2649 | struct rq *rq; |
2650 | int cpu = get_cpu(); | ||
2651 | |||
2652 | #ifdef CONFIG_SMP | ||
2653 | /* | ||
2654 | * Fork balancing, do it here and not earlier because: | ||
2655 | * - cpus_allowed can change in the fork path | ||
2656 | * - any previously selected cpu might disappear through hotplug | ||
2657 | * | ||
2658 | * We still have TASK_WAKING but PF_STARTING is gone now, meaning | ||
2659 | * ->cpus_allowed is stable, we have preemption disabled, meaning | ||
2660 | * cpu_online_mask is stable. | ||
2661 | */ | ||
2662 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2663 | set_task_cpu(p, cpu); | ||
2664 | #endif | ||
2579 | 2665 | ||
2580 | rq = task_rq_lock(p, &flags); | 2666 | rq = task_rq_lock(p, &flags); |
2581 | BUG_ON(p->state != TASK_RUNNING); | 2667 | BUG_ON(p->state != TASK_WAKING); |
2668 | p->state = TASK_RUNNING; | ||
2582 | update_rq_clock(rq); | 2669 | update_rq_clock(rq); |
2583 | 2670 | activate_task(rq, p, 0); | |
2584 | if (!p->sched_class->task_new || !current->se.on_rq) { | ||
2585 | activate_task(rq, p, 0); | ||
2586 | } else { | ||
2587 | /* | ||
2588 | * Let the scheduling class do new task startup | ||
2589 | * management (if any): | ||
2590 | */ | ||
2591 | p->sched_class->task_new(rq, p); | ||
2592 | inc_nr_running(rq); | ||
2593 | } | ||
2594 | trace_sched_wakeup_new(rq, p, 1); | 2671 | trace_sched_wakeup_new(rq, p, 1); |
2595 | check_preempt_curr(rq, p, WF_FORK); | 2672 | check_preempt_curr(rq, p, WF_FORK); |
2596 | #ifdef CONFIG_SMP | 2673 | #ifdef CONFIG_SMP |
2597 | if (p->sched_class->task_wake_up) | 2674 | if (p->sched_class->task_woken) |
2598 | p->sched_class->task_wake_up(rq, p); | 2675 | p->sched_class->task_woken(rq, p); |
2599 | #endif | 2676 | #endif |
2600 | task_rq_unlock(rq, &flags); | 2677 | task_rq_unlock(rq, &flags); |
2678 | put_cpu(); | ||
2601 | } | 2679 | } |
2602 | 2680 | ||
2603 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2681 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -2747,10 +2825,10 @@ static inline void post_schedule(struct rq *rq) | |||
2747 | if (rq->post_schedule) { | 2825 | if (rq->post_schedule) { |
2748 | unsigned long flags; | 2826 | unsigned long flags; |
2749 | 2827 | ||
2750 | spin_lock_irqsave(&rq->lock, flags); | 2828 | raw_spin_lock_irqsave(&rq->lock, flags); |
2751 | if (rq->curr->sched_class->post_schedule) | 2829 | if (rq->curr->sched_class->post_schedule) |
2752 | rq->curr->sched_class->post_schedule(rq); | 2830 | rq->curr->sched_class->post_schedule(rq); |
2753 | spin_unlock_irqrestore(&rq->lock, flags); | 2831 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
2754 | 2832 | ||
2755 | rq->post_schedule = 0; | 2833 | rq->post_schedule = 0; |
2756 | } | 2834 | } |
@@ -2814,14 +2892,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2814 | */ | 2892 | */ |
2815 | arch_start_context_switch(prev); | 2893 | arch_start_context_switch(prev); |
2816 | 2894 | ||
2817 | if (unlikely(!mm)) { | 2895 | if (likely(!mm)) { |
2818 | next->active_mm = oldmm; | 2896 | next->active_mm = oldmm; |
2819 | atomic_inc(&oldmm->mm_count); | 2897 | atomic_inc(&oldmm->mm_count); |
2820 | enter_lazy_tlb(oldmm, next); | 2898 | enter_lazy_tlb(oldmm, next); |
2821 | } else | 2899 | } else |
2822 | switch_mm(oldmm, mm, next); | 2900 | switch_mm(oldmm, mm, next); |
2823 | 2901 | ||
2824 | if (unlikely(!prev->mm)) { | 2902 | if (likely(!prev->mm)) { |
2825 | prev->active_mm = NULL; | 2903 | prev->active_mm = NULL; |
2826 | rq->prev_mm = oldmm; | 2904 | rq->prev_mm = oldmm; |
2827 | } | 2905 | } |
@@ -2984,15 +3062,6 @@ static void calc_load_account_active(struct rq *this_rq) | |||
2984 | } | 3062 | } |
2985 | 3063 | ||
2986 | /* | 3064 | /* |
2987 | * Externally visible per-cpu scheduler statistics: | ||
2988 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
2989 | */ | ||
2990 | u64 cpu_nr_migrations(int cpu) | ||
2991 | { | ||
2992 | return cpu_rq(cpu)->nr_migrations_in; | ||
2993 | } | ||
2994 | |||
2995 | /* | ||
2996 | * Update rq->cpu_load[] statistics. This function is usually called every | 3065 | * Update rq->cpu_load[] statistics. This function is usually called every |
2997 | * scheduler tick (TICK_NSEC). | 3066 | * scheduler tick (TICK_NSEC). |
2998 | */ | 3067 | */ |
@@ -3041,15 +3110,15 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) | |||
3041 | { | 3110 | { |
3042 | BUG_ON(!irqs_disabled()); | 3111 | BUG_ON(!irqs_disabled()); |
3043 | if (rq1 == rq2) { | 3112 | if (rq1 == rq2) { |
3044 | spin_lock(&rq1->lock); | 3113 | raw_spin_lock(&rq1->lock); |
3045 | __acquire(rq2->lock); /* Fake it out ;) */ | 3114 | __acquire(rq2->lock); /* Fake it out ;) */ |
3046 | } else { | 3115 | } else { |
3047 | if (rq1 < rq2) { | 3116 | if (rq1 < rq2) { |
3048 | spin_lock(&rq1->lock); | 3117 | raw_spin_lock(&rq1->lock); |
3049 | spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); | 3118 | raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); |
3050 | } else { | 3119 | } else { |
3051 | spin_lock(&rq2->lock); | 3120 | raw_spin_lock(&rq2->lock); |
3052 | spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); | 3121 | raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); |
3053 | } | 3122 | } |
3054 | } | 3123 | } |
3055 | update_rq_clock(rq1); | 3124 | update_rq_clock(rq1); |
@@ -3066,29 +3135,44 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
3066 | __releases(rq1->lock) | 3135 | __releases(rq1->lock) |
3067 | __releases(rq2->lock) | 3136 | __releases(rq2->lock) |
3068 | { | 3137 | { |
3069 | spin_unlock(&rq1->lock); | 3138 | raw_spin_unlock(&rq1->lock); |
3070 | if (rq1 != rq2) | 3139 | if (rq1 != rq2) |
3071 | spin_unlock(&rq2->lock); | 3140 | raw_spin_unlock(&rq2->lock); |
3072 | else | 3141 | else |
3073 | __release(rq2->lock); | 3142 | __release(rq2->lock); |
3074 | } | 3143 | } |
3075 | 3144 | ||
3076 | /* | 3145 | /* |
3077 | * If dest_cpu is allowed for this process, migrate the task to it. | 3146 | * sched_exec - execve() is a valuable balancing opportunity, because at |
3078 | * This is accomplished by forcing the cpu_allowed mask to only | 3147 | * this point the task has the smallest effective memory and cache footprint. |
3079 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | ||
3080 | * the cpu_allowed mask is restored. | ||
3081 | */ | 3148 | */ |
3082 | static void sched_migrate_task(struct task_struct *p, int dest_cpu) | 3149 | void sched_exec(void) |
3083 | { | 3150 | { |
3151 | struct task_struct *p = current; | ||
3084 | struct migration_req req; | 3152 | struct migration_req req; |
3153 | int dest_cpu, this_cpu; | ||
3085 | unsigned long flags; | 3154 | unsigned long flags; |
3086 | struct rq *rq; | 3155 | struct rq *rq; |
3087 | 3156 | ||
3157 | again: | ||
3158 | this_cpu = get_cpu(); | ||
3159 | dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0); | ||
3160 | if (dest_cpu == this_cpu) { | ||
3161 | put_cpu(); | ||
3162 | return; | ||
3163 | } | ||
3164 | |||
3088 | rq = task_rq_lock(p, &flags); | 3165 | rq = task_rq_lock(p, &flags); |
3166 | put_cpu(); | ||
3167 | |||
3168 | /* | ||
3169 | * select_task_rq() can race against ->cpus_allowed | ||
3170 | */ | ||
3089 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) | 3171 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) |
3090 | || unlikely(!cpu_active(dest_cpu))) | 3172 | || unlikely(!cpu_active(dest_cpu))) { |
3091 | goto out; | 3173 | task_rq_unlock(rq, &flags); |
3174 | goto again; | ||
3175 | } | ||
3092 | 3176 | ||
3093 | /* force the process onto the specified CPU */ | 3177 | /* force the process onto the specified CPU */ |
3094 | if (migrate_task(p, dest_cpu, &req)) { | 3178 | if (migrate_task(p, dest_cpu, &req)) { |
@@ -3103,24 +3187,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
3103 | 3187 | ||
3104 | return; | 3188 | return; |
3105 | } | 3189 | } |
3106 | out: | ||
3107 | task_rq_unlock(rq, &flags); | 3190 | task_rq_unlock(rq, &flags); |
3108 | } | 3191 | } |
3109 | 3192 | ||
3110 | /* | 3193 | /* |
3111 | * sched_exec - execve() is a valuable balancing opportunity, because at | ||
3112 | * this point the task has the smallest effective memory and cache footprint. | ||
3113 | */ | ||
3114 | void sched_exec(void) | ||
3115 | { | ||
3116 | int new_cpu, this_cpu = get_cpu(); | ||
3117 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); | ||
3118 | put_cpu(); | ||
3119 | if (new_cpu != this_cpu) | ||
3120 | sched_migrate_task(current, new_cpu); | ||
3121 | } | ||
3122 | |||
3123 | /* | ||
3124 | * pull_task - move a task from a remote runqueue to the local runqueue. | 3194 | * pull_task - move a task from a remote runqueue to the local runqueue. |
3125 | * Both runqueues must be locked. | 3195 | * Both runqueues must be locked. |
3126 | */ | 3196 | */ |
@@ -3130,10 +3200,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
3130 | deactivate_task(src_rq, p, 0); | 3200 | deactivate_task(src_rq, p, 0); |
3131 | set_task_cpu(p, this_cpu); | 3201 | set_task_cpu(p, this_cpu); |
3132 | activate_task(this_rq, p, 0); | 3202 | activate_task(this_rq, p, 0); |
3133 | /* | ||
3134 | * Note that idle threads have a prio of MAX_PRIO, for this test | ||
3135 | * to be always true for them. | ||
3136 | */ | ||
3137 | check_preempt_curr(this_rq, p, 0); | 3203 | check_preempt_curr(this_rq, p, 0); |
3138 | } | 3204 | } |
3139 | 3205 | ||
@@ -3656,6 +3722,7 @@ static void update_group_power(struct sched_domain *sd, int cpu) | |||
3656 | 3722 | ||
3657 | /** | 3723 | /** |
3658 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. | 3724 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. |
3725 | * @sd: The sched_domain whose statistics are to be updated. | ||
3659 | * @group: sched_group whose statistics are to be updated. | 3726 | * @group: sched_group whose statistics are to be updated. |
3660 | * @this_cpu: Cpu for which load balance is currently performed. | 3727 | * @this_cpu: Cpu for which load balance is currently performed. |
3661 | * @idle: Idle status of this_cpu | 3728 | * @idle: Idle status of this_cpu |
@@ -4091,7 +4158,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4091 | unsigned long flags; | 4158 | unsigned long flags; |
4092 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4159 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4093 | 4160 | ||
4094 | cpumask_setall(cpus); | 4161 | cpumask_copy(cpus, cpu_active_mask); |
4095 | 4162 | ||
4096 | /* | 4163 | /* |
4097 | * When power savings policy is enabled for the parent domain, idle | 4164 | * When power savings policy is enabled for the parent domain, idle |
@@ -4164,14 +4231,15 @@ redo: | |||
4164 | 4231 | ||
4165 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 4232 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
4166 | 4233 | ||
4167 | spin_lock_irqsave(&busiest->lock, flags); | 4234 | raw_spin_lock_irqsave(&busiest->lock, flags); |
4168 | 4235 | ||
4169 | /* don't kick the migration_thread, if the curr | 4236 | /* don't kick the migration_thread, if the curr |
4170 | * task on busiest cpu can't be moved to this_cpu | 4237 | * task on busiest cpu can't be moved to this_cpu |
4171 | */ | 4238 | */ |
4172 | if (!cpumask_test_cpu(this_cpu, | 4239 | if (!cpumask_test_cpu(this_cpu, |
4173 | &busiest->curr->cpus_allowed)) { | 4240 | &busiest->curr->cpus_allowed)) { |
4174 | spin_unlock_irqrestore(&busiest->lock, flags); | 4241 | raw_spin_unlock_irqrestore(&busiest->lock, |
4242 | flags); | ||
4175 | all_pinned = 1; | 4243 | all_pinned = 1; |
4176 | goto out_one_pinned; | 4244 | goto out_one_pinned; |
4177 | } | 4245 | } |
@@ -4181,7 +4249,7 @@ redo: | |||
4181 | busiest->push_cpu = this_cpu; | 4249 | busiest->push_cpu = this_cpu; |
4182 | active_balance = 1; | 4250 | active_balance = 1; |
4183 | } | 4251 | } |
4184 | spin_unlock_irqrestore(&busiest->lock, flags); | 4252 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
4185 | if (active_balance) | 4253 | if (active_balance) |
4186 | wake_up_process(busiest->migration_thread); | 4254 | wake_up_process(busiest->migration_thread); |
4187 | 4255 | ||
@@ -4254,7 +4322,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4254 | int all_pinned = 0; | 4322 | int all_pinned = 0; |
4255 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4323 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4256 | 4324 | ||
4257 | cpumask_setall(cpus); | 4325 | cpumask_copy(cpus, cpu_active_mask); |
4258 | 4326 | ||
4259 | /* | 4327 | /* |
4260 | * When power savings policy is enabled for the parent domain, idle | 4328 | * When power savings policy is enabled for the parent domain, idle |
@@ -4363,10 +4431,10 @@ redo: | |||
4363 | /* | 4431 | /* |
4364 | * Should not call ttwu while holding a rq->lock | 4432 | * Should not call ttwu while holding a rq->lock |
4365 | */ | 4433 | */ |
4366 | spin_unlock(&this_rq->lock); | 4434 | raw_spin_unlock(&this_rq->lock); |
4367 | if (active_balance) | 4435 | if (active_balance) |
4368 | wake_up_process(busiest->migration_thread); | 4436 | wake_up_process(busiest->migration_thread); |
4369 | spin_lock(&this_rq->lock); | 4437 | raw_spin_lock(&this_rq->lock); |
4370 | 4438 | ||
4371 | } else | 4439 | } else |
4372 | sd->nr_balance_failed = 0; | 4440 | sd->nr_balance_failed = 0; |
@@ -4394,6 +4462,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4394 | int pulled_task = 0; | 4462 | int pulled_task = 0; |
4395 | unsigned long next_balance = jiffies + HZ; | 4463 | unsigned long next_balance = jiffies + HZ; |
4396 | 4464 | ||
4465 | this_rq->idle_stamp = this_rq->clock; | ||
4466 | |||
4467 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | ||
4468 | return; | ||
4469 | |||
4397 | for_each_domain(this_cpu, sd) { | 4470 | for_each_domain(this_cpu, sd) { |
4398 | unsigned long interval; | 4471 | unsigned long interval; |
4399 | 4472 | ||
@@ -4408,8 +4481,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4408 | interval = msecs_to_jiffies(sd->balance_interval); | 4481 | interval = msecs_to_jiffies(sd->balance_interval); |
4409 | if (time_after(next_balance, sd->last_balance + interval)) | 4482 | if (time_after(next_balance, sd->last_balance + interval)) |
4410 | next_balance = sd->last_balance + interval; | 4483 | next_balance = sd->last_balance + interval; |
4411 | if (pulled_task) | 4484 | if (pulled_task) { |
4485 | this_rq->idle_stamp = 0; | ||
4412 | break; | 4486 | break; |
4487 | } | ||
4413 | } | 4488 | } |
4414 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 4489 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
4415 | /* | 4490 | /* |
@@ -4644,7 +4719,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
4644 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4719 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
4645 | 4720 | ||
4646 | /* time for ilb owner also to sleep */ | 4721 | /* time for ilb owner also to sleep */ |
4647 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4722 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
4648 | if (atomic_read(&nohz.load_balancer) == cpu) | 4723 | if (atomic_read(&nohz.load_balancer) == cpu) |
4649 | atomic_set(&nohz.load_balancer, -1); | 4724 | atomic_set(&nohz.load_balancer, -1); |
4650 | return 0; | 4725 | return 0; |
@@ -5011,8 +5086,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
5011 | p->gtime = cputime_add(p->gtime, cputime); | 5086 | p->gtime = cputime_add(p->gtime, cputime); |
5012 | 5087 | ||
5013 | /* Add guest time to cpustat. */ | 5088 | /* Add guest time to cpustat. */ |
5014 | cpustat->user = cputime64_add(cpustat->user, tmp); | 5089 | if (TASK_NICE(p) > 0) { |
5015 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | 5090 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
5091 | cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); | ||
5092 | } else { | ||
5093 | cpustat->user = cputime64_add(cpustat->user, tmp); | ||
5094 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | ||
5095 | } | ||
5016 | } | 5096 | } |
5017 | 5097 | ||
5018 | /* | 5098 | /* |
@@ -5127,60 +5207,86 @@ void account_idle_ticks(unsigned long ticks) | |||
5127 | * Use precise platform statistics if available: | 5207 | * Use precise platform statistics if available: |
5128 | */ | 5208 | */ |
5129 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 5209 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
5130 | cputime_t task_utime(struct task_struct *p) | 5210 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5131 | { | 5211 | { |
5132 | return p->utime; | 5212 | *ut = p->utime; |
5213 | *st = p->stime; | ||
5133 | } | 5214 | } |
5134 | 5215 | ||
5135 | cputime_t task_stime(struct task_struct *p) | 5216 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5136 | { | 5217 | { |
5137 | return p->stime; | 5218 | struct task_cputime cputime; |
5219 | |||
5220 | thread_group_cputime(p, &cputime); | ||
5221 | |||
5222 | *ut = cputime.utime; | ||
5223 | *st = cputime.stime; | ||
5138 | } | 5224 | } |
5139 | #else | 5225 | #else |
5140 | cputime_t task_utime(struct task_struct *p) | 5226 | |
5227 | #ifndef nsecs_to_cputime | ||
5228 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
5229 | #endif | ||
5230 | |||
5231 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5141 | { | 5232 | { |
5142 | clock_t utime = cputime_to_clock_t(p->utime), | 5233 | cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); |
5143 | total = utime + cputime_to_clock_t(p->stime); | ||
5144 | u64 temp; | ||
5145 | 5234 | ||
5146 | /* | 5235 | /* |
5147 | * Use CFS's precise accounting: | 5236 | * Use CFS's precise accounting: |
5148 | */ | 5237 | */ |
5149 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | 5238 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); |
5150 | 5239 | ||
5151 | if (total) { | 5240 | if (total) { |
5152 | temp *= utime; | 5241 | u64 temp; |
5242 | |||
5243 | temp = (u64)(rtime * utime); | ||
5153 | do_div(temp, total); | 5244 | do_div(temp, total); |
5154 | } | 5245 | utime = (cputime_t)temp; |
5155 | utime = (clock_t)temp; | 5246 | } else |
5247 | utime = rtime; | ||
5248 | |||
5249 | /* | ||
5250 | * Compare with previous values, to keep monotonicity: | ||
5251 | */ | ||
5252 | p->prev_utime = max(p->prev_utime, utime); | ||
5253 | p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime)); | ||
5156 | 5254 | ||
5157 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | 5255 | *ut = p->prev_utime; |
5158 | return p->prev_utime; | 5256 | *st = p->prev_stime; |
5159 | } | 5257 | } |
5160 | 5258 | ||
5161 | cputime_t task_stime(struct task_struct *p) | 5259 | /* |
5260 | * Must be called with siglock held. | ||
5261 | */ | ||
5262 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5162 | { | 5263 | { |
5163 | clock_t stime; | 5264 | struct signal_struct *sig = p->signal; |
5265 | struct task_cputime cputime; | ||
5266 | cputime_t rtime, utime, total; | ||
5164 | 5267 | ||
5165 | /* | 5268 | thread_group_cputime(p, &cputime); |
5166 | * Use CFS's precise accounting. (we subtract utime from | ||
5167 | * the total, to make sure the total observed by userspace | ||
5168 | * grows monotonically - apps rely on that): | ||
5169 | */ | ||
5170 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
5171 | cputime_to_clock_t(task_utime(p)); | ||
5172 | 5269 | ||
5173 | if (stime >= 0) | 5270 | total = cputime_add(cputime.utime, cputime.stime); |
5174 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | 5271 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); |
5175 | 5272 | ||
5176 | return p->prev_stime; | 5273 | if (total) { |
5177 | } | 5274 | u64 temp; |
5178 | #endif | ||
5179 | 5275 | ||
5180 | inline cputime_t task_gtime(struct task_struct *p) | 5276 | temp = (u64)(rtime * cputime.utime); |
5181 | { | 5277 | do_div(temp, total); |
5182 | return p->gtime; | 5278 | utime = (cputime_t)temp; |
5279 | } else | ||
5280 | utime = rtime; | ||
5281 | |||
5282 | sig->prev_utime = max(sig->prev_utime, utime); | ||
5283 | sig->prev_stime = max(sig->prev_stime, | ||
5284 | cputime_sub(rtime, sig->prev_utime)); | ||
5285 | |||
5286 | *ut = sig->prev_utime; | ||
5287 | *st = sig->prev_stime; | ||
5183 | } | 5288 | } |
5289 | #endif | ||
5184 | 5290 | ||
5185 | /* | 5291 | /* |
5186 | * This function gets called by the timer code, with HZ frequency. | 5292 | * This function gets called by the timer code, with HZ frequency. |
@@ -5197,11 +5303,11 @@ void scheduler_tick(void) | |||
5197 | 5303 | ||
5198 | sched_clock_tick(); | 5304 | sched_clock_tick(); |
5199 | 5305 | ||
5200 | spin_lock(&rq->lock); | 5306 | raw_spin_lock(&rq->lock); |
5201 | update_rq_clock(rq); | 5307 | update_rq_clock(rq); |
5202 | update_cpu_load(rq); | 5308 | update_cpu_load(rq); |
5203 | curr->sched_class->task_tick(rq, curr, 0); | 5309 | curr->sched_class->task_tick(rq, curr, 0); |
5204 | spin_unlock(&rq->lock); | 5310 | raw_spin_unlock(&rq->lock); |
5205 | 5311 | ||
5206 | perf_event_task_tick(curr, cpu); | 5312 | perf_event_task_tick(curr, cpu); |
5207 | 5313 | ||
@@ -5315,13 +5421,14 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5315 | #endif | 5421 | #endif |
5316 | } | 5422 | } |
5317 | 5423 | ||
5318 | static void put_prev_task(struct rq *rq, struct task_struct *p) | 5424 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
5319 | { | 5425 | { |
5320 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; | 5426 | if (prev->state == TASK_RUNNING) { |
5427 | u64 runtime = prev->se.sum_exec_runtime; | ||
5321 | 5428 | ||
5322 | update_avg(&p->se.avg_running, runtime); | 5429 | runtime -= prev->se.prev_sum_exec_runtime; |
5430 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5323 | 5431 | ||
5324 | if (p->state == TASK_RUNNING) { | ||
5325 | /* | 5432 | /* |
5326 | * In order to avoid avg_overlap growing stale when we are | 5433 | * In order to avoid avg_overlap growing stale when we are |
5327 | * indeed overlapping and hence not getting put to sleep, grow | 5434 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5331,12 +5438,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p) | |||
5331 | * correlates to the amount of cache footprint a task can | 5438 | * correlates to the amount of cache footprint a task can |
5332 | * build up. | 5439 | * build up. |
5333 | */ | 5440 | */ |
5334 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | 5441 | update_avg(&prev->se.avg_overlap, runtime); |
5335 | update_avg(&p->se.avg_overlap, runtime); | ||
5336 | } else { | ||
5337 | update_avg(&p->se.avg_running, 0); | ||
5338 | } | 5442 | } |
5339 | p->sched_class->put_prev_task(rq, p); | 5443 | prev->sched_class->put_prev_task(rq, prev); |
5340 | } | 5444 | } |
5341 | 5445 | ||
5342 | /* | 5446 | /* |
@@ -5397,7 +5501,7 @@ need_resched_nonpreemptible: | |||
5397 | if (sched_feat(HRTICK)) | 5501 | if (sched_feat(HRTICK)) |
5398 | hrtick_clear(rq); | 5502 | hrtick_clear(rq); |
5399 | 5503 | ||
5400 | spin_lock_irq(&rq->lock); | 5504 | raw_spin_lock_irq(&rq->lock); |
5401 | update_rq_clock(rq); | 5505 | update_rq_clock(rq); |
5402 | clear_tsk_need_resched(prev); | 5506 | clear_tsk_need_resched(prev); |
5403 | 5507 | ||
@@ -5433,12 +5537,15 @@ need_resched_nonpreemptible: | |||
5433 | cpu = smp_processor_id(); | 5537 | cpu = smp_processor_id(); |
5434 | rq = cpu_rq(cpu); | 5538 | rq = cpu_rq(cpu); |
5435 | } else | 5539 | } else |
5436 | spin_unlock_irq(&rq->lock); | 5540 | raw_spin_unlock_irq(&rq->lock); |
5437 | 5541 | ||
5438 | post_schedule(rq); | 5542 | post_schedule(rq); |
5439 | 5543 | ||
5440 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 5544 | if (unlikely(reacquire_kernel_lock(current) < 0)) { |
5545 | prev = rq->curr; | ||
5546 | switch_count = &prev->nivcsw; | ||
5441 | goto need_resched_nonpreemptible; | 5547 | goto need_resched_nonpreemptible; |
5548 | } | ||
5442 | 5549 | ||
5443 | preempt_enable_no_resched(); | 5550 | preempt_enable_no_resched(); |
5444 | if (need_resched()) | 5551 | if (need_resched()) |
@@ -5446,7 +5553,7 @@ need_resched_nonpreemptible: | |||
5446 | } | 5553 | } |
5447 | EXPORT_SYMBOL(schedule); | 5554 | EXPORT_SYMBOL(schedule); |
5448 | 5555 | ||
5449 | #ifdef CONFIG_SMP | 5556 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
5450 | /* | 5557 | /* |
5451 | * Look out! "owner" is an entirely speculative pointer | 5558 | * Look out! "owner" is an entirely speculative pointer |
5452 | * access and not reliable. | 5559 | * access and not reliable. |
@@ -5850,14 +5957,15 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
5850 | */ | 5957 | */ |
5851 | bool try_wait_for_completion(struct completion *x) | 5958 | bool try_wait_for_completion(struct completion *x) |
5852 | { | 5959 | { |
5960 | unsigned long flags; | ||
5853 | int ret = 1; | 5961 | int ret = 1; |
5854 | 5962 | ||
5855 | spin_lock_irq(&x->wait.lock); | 5963 | spin_lock_irqsave(&x->wait.lock, flags); |
5856 | if (!x->done) | 5964 | if (!x->done) |
5857 | ret = 0; | 5965 | ret = 0; |
5858 | else | 5966 | else |
5859 | x->done--; | 5967 | x->done--; |
5860 | spin_unlock_irq(&x->wait.lock); | 5968 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5861 | return ret; | 5969 | return ret; |
5862 | } | 5970 | } |
5863 | EXPORT_SYMBOL(try_wait_for_completion); | 5971 | EXPORT_SYMBOL(try_wait_for_completion); |
@@ -5872,12 +5980,13 @@ EXPORT_SYMBOL(try_wait_for_completion); | |||
5872 | */ | 5980 | */ |
5873 | bool completion_done(struct completion *x) | 5981 | bool completion_done(struct completion *x) |
5874 | { | 5982 | { |
5983 | unsigned long flags; | ||
5875 | int ret = 1; | 5984 | int ret = 1; |
5876 | 5985 | ||
5877 | spin_lock_irq(&x->wait.lock); | 5986 | spin_lock_irqsave(&x->wait.lock, flags); |
5878 | if (!x->done) | 5987 | if (!x->done) |
5879 | ret = 0; | 5988 | ret = 0; |
5880 | spin_unlock_irq(&x->wait.lock); | 5989 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5881 | return ret; | 5990 | return ret; |
5882 | } | 5991 | } |
5883 | EXPORT_SYMBOL(completion_done); | 5992 | EXPORT_SYMBOL(completion_done); |
@@ -6140,22 +6249,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
6140 | BUG_ON(p->se.on_rq); | 6249 | BUG_ON(p->se.on_rq); |
6141 | 6250 | ||
6142 | p->policy = policy; | 6251 | p->policy = policy; |
6143 | switch (p->policy) { | ||
6144 | case SCHED_NORMAL: | ||
6145 | case SCHED_BATCH: | ||
6146 | case SCHED_IDLE: | ||
6147 | p->sched_class = &fair_sched_class; | ||
6148 | break; | ||
6149 | case SCHED_FIFO: | ||
6150 | case SCHED_RR: | ||
6151 | p->sched_class = &rt_sched_class; | ||
6152 | break; | ||
6153 | } | ||
6154 | |||
6155 | p->rt_priority = prio; | 6252 | p->rt_priority = prio; |
6156 | p->normal_prio = normal_prio(p); | 6253 | p->normal_prio = normal_prio(p); |
6157 | /* we are holding p->pi_lock already */ | 6254 | /* we are holding p->pi_lock already */ |
6158 | p->prio = rt_mutex_getprio(p); | 6255 | p->prio = rt_mutex_getprio(p); |
6256 | if (rt_prio(p->prio)) | ||
6257 | p->sched_class = &rt_sched_class; | ||
6258 | else | ||
6259 | p->sched_class = &fair_sched_class; | ||
6159 | set_load_weight(p); | 6260 | set_load_weight(p); |
6160 | } | 6261 | } |
6161 | 6262 | ||
@@ -6270,7 +6371,7 @@ recheck: | |||
6270 | * make sure no PI-waiters arrive (or leave) while we are | 6371 | * make sure no PI-waiters arrive (or leave) while we are |
6271 | * changing the priority of the task: | 6372 | * changing the priority of the task: |
6272 | */ | 6373 | */ |
6273 | spin_lock_irqsave(&p->pi_lock, flags); | 6374 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
6274 | /* | 6375 | /* |
6275 | * To be able to change p->policy safely, the apropriate | 6376 | * To be able to change p->policy safely, the apropriate |
6276 | * runqueue lock must be held. | 6377 | * runqueue lock must be held. |
@@ -6280,7 +6381,7 @@ recheck: | |||
6280 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 6381 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
6281 | policy = oldpolicy = -1; | 6382 | policy = oldpolicy = -1; |
6282 | __task_rq_unlock(rq); | 6383 | __task_rq_unlock(rq); |
6283 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6384 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
6284 | goto recheck; | 6385 | goto recheck; |
6285 | } | 6386 | } |
6286 | update_rq_clock(rq); | 6387 | update_rq_clock(rq); |
@@ -6304,7 +6405,7 @@ recheck: | |||
6304 | check_class_changed(rq, p, prev_class, oldprio, running); | 6405 | check_class_changed(rq, p, prev_class, oldprio, running); |
6305 | } | 6406 | } |
6306 | __task_rq_unlock(rq); | 6407 | __task_rq_unlock(rq); |
6307 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6408 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
6308 | 6409 | ||
6309 | rt_mutex_adjust_pi(p); | 6410 | rt_mutex_adjust_pi(p); |
6310 | 6411 | ||
@@ -6404,7 +6505,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6404 | return -EINVAL; | 6505 | return -EINVAL; |
6405 | 6506 | ||
6406 | retval = -ESRCH; | 6507 | retval = -ESRCH; |
6407 | read_lock(&tasklist_lock); | 6508 | rcu_read_lock(); |
6408 | p = find_process_by_pid(pid); | 6509 | p = find_process_by_pid(pid); |
6409 | if (p) { | 6510 | if (p) { |
6410 | retval = security_task_getscheduler(p); | 6511 | retval = security_task_getscheduler(p); |
@@ -6412,7 +6513,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6412 | retval = p->policy | 6513 | retval = p->policy |
6413 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); | 6514 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); |
6414 | } | 6515 | } |
6415 | read_unlock(&tasklist_lock); | 6516 | rcu_read_unlock(); |
6416 | return retval; | 6517 | return retval; |
6417 | } | 6518 | } |
6418 | 6519 | ||
@@ -6430,7 +6531,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6430 | if (!param || pid < 0) | 6531 | if (!param || pid < 0) |
6431 | return -EINVAL; | 6532 | return -EINVAL; |
6432 | 6533 | ||
6433 | read_lock(&tasklist_lock); | 6534 | rcu_read_lock(); |
6434 | p = find_process_by_pid(pid); | 6535 | p = find_process_by_pid(pid); |
6435 | retval = -ESRCH; | 6536 | retval = -ESRCH; |
6436 | if (!p) | 6537 | if (!p) |
@@ -6441,7 +6542,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6441 | goto out_unlock; | 6542 | goto out_unlock; |
6442 | 6543 | ||
6443 | lp.sched_priority = p->rt_priority; | 6544 | lp.sched_priority = p->rt_priority; |
6444 | read_unlock(&tasklist_lock); | 6545 | rcu_read_unlock(); |
6445 | 6546 | ||
6446 | /* | 6547 | /* |
6447 | * This one might sleep, we cannot do it with a spinlock held ... | 6548 | * This one might sleep, we cannot do it with a spinlock held ... |
@@ -6451,7 +6552,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6451 | return retval; | 6552 | return retval; |
6452 | 6553 | ||
6453 | out_unlock: | 6554 | out_unlock: |
6454 | read_unlock(&tasklist_lock); | 6555 | rcu_read_unlock(); |
6455 | return retval; | 6556 | return retval; |
6456 | } | 6557 | } |
6457 | 6558 | ||
@@ -6462,22 +6563,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
6462 | int retval; | 6563 | int retval; |
6463 | 6564 | ||
6464 | get_online_cpus(); | 6565 | get_online_cpus(); |
6465 | read_lock(&tasklist_lock); | 6566 | rcu_read_lock(); |
6466 | 6567 | ||
6467 | p = find_process_by_pid(pid); | 6568 | p = find_process_by_pid(pid); |
6468 | if (!p) { | 6569 | if (!p) { |
6469 | read_unlock(&tasklist_lock); | 6570 | rcu_read_unlock(); |
6470 | put_online_cpus(); | 6571 | put_online_cpus(); |
6471 | return -ESRCH; | 6572 | return -ESRCH; |
6472 | } | 6573 | } |
6473 | 6574 | ||
6474 | /* | 6575 | /* Prevent p going away */ |
6475 | * It is not safe to call set_cpus_allowed with the | ||
6476 | * tasklist_lock held. We will bump the task_struct's | ||
6477 | * usage count and then drop tasklist_lock. | ||
6478 | */ | ||
6479 | get_task_struct(p); | 6576 | get_task_struct(p); |
6480 | read_unlock(&tasklist_lock); | 6577 | rcu_read_unlock(); |
6481 | 6578 | ||
6482 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | 6579 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
6483 | retval = -ENOMEM; | 6580 | retval = -ENOMEM; |
@@ -6558,10 +6655,12 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, | |||
6558 | long sched_getaffinity(pid_t pid, struct cpumask *mask) | 6655 | long sched_getaffinity(pid_t pid, struct cpumask *mask) |
6559 | { | 6656 | { |
6560 | struct task_struct *p; | 6657 | struct task_struct *p; |
6658 | unsigned long flags; | ||
6659 | struct rq *rq; | ||
6561 | int retval; | 6660 | int retval; |
6562 | 6661 | ||
6563 | get_online_cpus(); | 6662 | get_online_cpus(); |
6564 | read_lock(&tasklist_lock); | 6663 | rcu_read_lock(); |
6565 | 6664 | ||
6566 | retval = -ESRCH; | 6665 | retval = -ESRCH; |
6567 | p = find_process_by_pid(pid); | 6666 | p = find_process_by_pid(pid); |
@@ -6572,10 +6671,12 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6572 | if (retval) | 6671 | if (retval) |
6573 | goto out_unlock; | 6672 | goto out_unlock; |
6574 | 6673 | ||
6674 | rq = task_rq_lock(p, &flags); | ||
6575 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 6675 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
6676 | task_rq_unlock(rq, &flags); | ||
6576 | 6677 | ||
6577 | out_unlock: | 6678 | out_unlock: |
6578 | read_unlock(&tasklist_lock); | 6679 | rcu_read_unlock(); |
6579 | put_online_cpus(); | 6680 | put_online_cpus(); |
6580 | 6681 | ||
6581 | return retval; | 6682 | return retval; |
@@ -6630,7 +6731,7 @@ SYSCALL_DEFINE0(sched_yield) | |||
6630 | */ | 6731 | */ |
6631 | __release(rq->lock); | 6732 | __release(rq->lock); |
6632 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 6733 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
6633 | _raw_spin_unlock(&rq->lock); | 6734 | do_raw_spin_unlock(&rq->lock); |
6634 | preempt_enable_no_resched(); | 6735 | preempt_enable_no_resched(); |
6635 | 6736 | ||
6636 | schedule(); | 6737 | schedule(); |
@@ -6718,9 +6819,6 @@ EXPORT_SYMBOL(yield); | |||
6718 | /* | 6819 | /* |
6719 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so | 6820 | * This task is about to go to sleep on IO. Increment rq->nr_iowait so |
6720 | * that process accounting knows that this is a task in IO wait state. | 6821 | * that process accounting knows that this is a task in IO wait state. |
6721 | * | ||
6722 | * But don't do that if it is a deliberate, throttling IO wait (this task | ||
6723 | * has set its backing_dev_info: the queue against which it should throttle) | ||
6724 | */ | 6822 | */ |
6725 | void __sched io_schedule(void) | 6823 | void __sched io_schedule(void) |
6726 | { | 6824 | { |
@@ -6813,6 +6911,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6813 | { | 6911 | { |
6814 | struct task_struct *p; | 6912 | struct task_struct *p; |
6815 | unsigned int time_slice; | 6913 | unsigned int time_slice; |
6914 | unsigned long flags; | ||
6915 | struct rq *rq; | ||
6816 | int retval; | 6916 | int retval; |
6817 | struct timespec t; | 6917 | struct timespec t; |
6818 | 6918 | ||
@@ -6820,7 +6920,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6820 | return -EINVAL; | 6920 | return -EINVAL; |
6821 | 6921 | ||
6822 | retval = -ESRCH; | 6922 | retval = -ESRCH; |
6823 | read_lock(&tasklist_lock); | 6923 | rcu_read_lock(); |
6824 | p = find_process_by_pid(pid); | 6924 | p = find_process_by_pid(pid); |
6825 | if (!p) | 6925 | if (!p) |
6826 | goto out_unlock; | 6926 | goto out_unlock; |
@@ -6829,15 +6929,17 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6829 | if (retval) | 6929 | if (retval) |
6830 | goto out_unlock; | 6930 | goto out_unlock; |
6831 | 6931 | ||
6832 | time_slice = p->sched_class->get_rr_interval(p); | 6932 | rq = task_rq_lock(p, &flags); |
6933 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
6934 | task_rq_unlock(rq, &flags); | ||
6833 | 6935 | ||
6834 | read_unlock(&tasklist_lock); | 6936 | rcu_read_unlock(); |
6835 | jiffies_to_timespec(time_slice, &t); | 6937 | jiffies_to_timespec(time_slice, &t); |
6836 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 6938 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
6837 | return retval; | 6939 | return retval; |
6838 | 6940 | ||
6839 | out_unlock: | 6941 | out_unlock: |
6840 | read_unlock(&tasklist_lock); | 6942 | rcu_read_unlock(); |
6841 | return retval; | 6943 | return retval; |
6842 | } | 6944 | } |
6843 | 6945 | ||
@@ -6903,7 +7005,7 @@ void show_state_filter(unsigned long state_filter) | |||
6903 | /* | 7005 | /* |
6904 | * Only show locks if all tasks are dumped: | 7006 | * Only show locks if all tasks are dumped: |
6905 | */ | 7007 | */ |
6906 | if (state_filter == -1) | 7008 | if (!state_filter) |
6907 | debug_show_all_locks(); | 7009 | debug_show_all_locks(); |
6908 | } | 7010 | } |
6909 | 7011 | ||
@@ -6925,12 +7027,12 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6925 | struct rq *rq = cpu_rq(cpu); | 7027 | struct rq *rq = cpu_rq(cpu); |
6926 | unsigned long flags; | 7028 | unsigned long flags; |
6927 | 7029 | ||
6928 | spin_lock_irqsave(&rq->lock, flags); | 7030 | raw_spin_lock_irqsave(&rq->lock, flags); |
6929 | 7031 | ||
6930 | __sched_fork(idle); | 7032 | __sched_fork(idle); |
7033 | idle->state = TASK_RUNNING; | ||
6931 | idle->se.exec_start = sched_clock(); | 7034 | idle->se.exec_start = sched_clock(); |
6932 | 7035 | ||
6933 | idle->prio = idle->normal_prio = MAX_PRIO; | ||
6934 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 7036 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
6935 | __set_task_cpu(idle, cpu); | 7037 | __set_task_cpu(idle, cpu); |
6936 | 7038 | ||
@@ -6938,7 +7040,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6938 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 7040 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
6939 | idle->oncpu = 1; | 7041 | idle->oncpu = 1; |
6940 | #endif | 7042 | #endif |
6941 | spin_unlock_irqrestore(&rq->lock, flags); | 7043 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6942 | 7044 | ||
6943 | /* Set the preempt count _outside_ the spinlocks! */ | 7045 | /* Set the preempt count _outside_ the spinlocks! */ |
6944 | #if defined(CONFIG_PREEMPT) | 7046 | #if defined(CONFIG_PREEMPT) |
@@ -6971,22 +7073,43 @@ cpumask_var_t nohz_cpu_mask; | |||
6971 | * | 7073 | * |
6972 | * This idea comes from the SD scheduler of Con Kolivas: | 7074 | * This idea comes from the SD scheduler of Con Kolivas: |
6973 | */ | 7075 | */ |
6974 | static inline void sched_init_granularity(void) | 7076 | static int get_update_sysctl_factor(void) |
6975 | { | 7077 | { |
6976 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 7078 | unsigned int cpus = min_t(int, num_online_cpus(), 8); |
6977 | const unsigned long limit = 200000000; | 7079 | unsigned int factor; |
6978 | 7080 | ||
6979 | sysctl_sched_min_granularity *= factor; | 7081 | switch (sysctl_sched_tunable_scaling) { |
6980 | if (sysctl_sched_min_granularity > limit) | 7082 | case SCHED_TUNABLESCALING_NONE: |
6981 | sysctl_sched_min_granularity = limit; | 7083 | factor = 1; |
7084 | break; | ||
7085 | case SCHED_TUNABLESCALING_LINEAR: | ||
7086 | factor = cpus; | ||
7087 | break; | ||
7088 | case SCHED_TUNABLESCALING_LOG: | ||
7089 | default: | ||
7090 | factor = 1 + ilog2(cpus); | ||
7091 | break; | ||
7092 | } | ||
6982 | 7093 | ||
6983 | sysctl_sched_latency *= factor; | 7094 | return factor; |
6984 | if (sysctl_sched_latency > limit) | 7095 | } |
6985 | sysctl_sched_latency = limit; | ||
6986 | 7096 | ||
6987 | sysctl_sched_wakeup_granularity *= factor; | 7097 | static void update_sysctl(void) |
7098 | { | ||
7099 | unsigned int factor = get_update_sysctl_factor(); | ||
7100 | |||
7101 | #define SET_SYSCTL(name) \ | ||
7102 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
7103 | SET_SYSCTL(sched_min_granularity); | ||
7104 | SET_SYSCTL(sched_latency); | ||
7105 | SET_SYSCTL(sched_wakeup_granularity); | ||
7106 | SET_SYSCTL(sched_shares_ratelimit); | ||
7107 | #undef SET_SYSCTL | ||
7108 | } | ||
6988 | 7109 | ||
6989 | sysctl_sched_shares_ratelimit *= factor; | 7110 | static inline void sched_init_granularity(void) |
7111 | { | ||
7112 | update_sysctl(); | ||
6990 | } | 7113 | } |
6991 | 7114 | ||
6992 | #ifdef CONFIG_SMP | 7115 | #ifdef CONFIG_SMP |
@@ -7022,8 +7145,28 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7022 | struct rq *rq; | 7145 | struct rq *rq; |
7023 | int ret = 0; | 7146 | int ret = 0; |
7024 | 7147 | ||
7148 | /* | ||
7149 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
7150 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
7151 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
7152 | * TASK_WAKING to avoid that. | ||
7153 | * | ||
7154 | * Make an exception for freshly cloned tasks, since cpuset namespaces | ||
7155 | * might move the task about, we have to validate the target in | ||
7156 | * wake_up_new_task() anyway since the cpu might have gone away. | ||
7157 | */ | ||
7158 | again: | ||
7159 | while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) | ||
7160 | cpu_relax(); | ||
7161 | |||
7025 | rq = task_rq_lock(p, &flags); | 7162 | rq = task_rq_lock(p, &flags); |
7026 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7163 | |
7164 | if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { | ||
7165 | task_rq_unlock(rq, &flags); | ||
7166 | goto again; | ||
7167 | } | ||
7168 | |||
7169 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | ||
7027 | ret = -EINVAL; | 7170 | ret = -EINVAL; |
7028 | goto out; | 7171 | goto out; |
7029 | } | 7172 | } |
@@ -7045,7 +7188,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7045 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7188 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
7046 | goto out; | 7189 | goto out; |
7047 | 7190 | ||
7048 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7191 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
7049 | /* Need help from migration thread: drop lock and wait. */ | 7192 | /* Need help from migration thread: drop lock and wait. */ |
7050 | struct task_struct *mt = rq->migration_thread; | 7193 | struct task_struct *mt = rq->migration_thread; |
7051 | 7194 | ||
@@ -7078,7 +7221,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | |||
7078 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 7221 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
7079 | { | 7222 | { |
7080 | struct rq *rq_dest, *rq_src; | 7223 | struct rq *rq_dest, *rq_src; |
7081 | int ret = 0, on_rq; | 7224 | int ret = 0; |
7082 | 7225 | ||
7083 | if (unlikely(!cpu_active(dest_cpu))) | 7226 | if (unlikely(!cpu_active(dest_cpu))) |
7084 | return ret; | 7227 | return ret; |
@@ -7094,12 +7237,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7094 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7237 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7095 | goto fail; | 7238 | goto fail; |
7096 | 7239 | ||
7097 | on_rq = p->se.on_rq; | 7240 | /* |
7098 | if (on_rq) | 7241 | * If we're not on a rq, the next wake-up will ensure we're |
7242 | * placed properly. | ||
7243 | */ | ||
7244 | if (p->se.on_rq) { | ||
7099 | deactivate_task(rq_src, p, 0); | 7245 | deactivate_task(rq_src, p, 0); |
7100 | 7246 | set_task_cpu(p, dest_cpu); | |
7101 | set_task_cpu(p, dest_cpu); | ||
7102 | if (on_rq) { | ||
7103 | activate_task(rq_dest, p, 0); | 7247 | activate_task(rq_dest, p, 0); |
7104 | check_preempt_curr(rq_dest, p, 0); | 7248 | check_preempt_curr(rq_dest, p, 0); |
7105 | } | 7249 | } |
@@ -7134,10 +7278,10 @@ static int migration_thread(void *data) | |||
7134 | struct migration_req *req; | 7278 | struct migration_req *req; |
7135 | struct list_head *head; | 7279 | struct list_head *head; |
7136 | 7280 | ||
7137 | spin_lock_irq(&rq->lock); | 7281 | raw_spin_lock_irq(&rq->lock); |
7138 | 7282 | ||
7139 | if (cpu_is_offline(cpu)) { | 7283 | if (cpu_is_offline(cpu)) { |
7140 | spin_unlock_irq(&rq->lock); | 7284 | raw_spin_unlock_irq(&rq->lock); |
7141 | break; | 7285 | break; |
7142 | } | 7286 | } |
7143 | 7287 | ||
@@ -7149,7 +7293,7 @@ static int migration_thread(void *data) | |||
7149 | head = &rq->migration_queue; | 7293 | head = &rq->migration_queue; |
7150 | 7294 | ||
7151 | if (list_empty(head)) { | 7295 | if (list_empty(head)) { |
7152 | spin_unlock_irq(&rq->lock); | 7296 | raw_spin_unlock_irq(&rq->lock); |
7153 | schedule(); | 7297 | schedule(); |
7154 | set_current_state(TASK_INTERRUPTIBLE); | 7298 | set_current_state(TASK_INTERRUPTIBLE); |
7155 | continue; | 7299 | continue; |
@@ -7158,14 +7302,14 @@ static int migration_thread(void *data) | |||
7158 | list_del_init(head->next); | 7302 | list_del_init(head->next); |
7159 | 7303 | ||
7160 | if (req->task != NULL) { | 7304 | if (req->task != NULL) { |
7161 | spin_unlock(&rq->lock); | 7305 | raw_spin_unlock(&rq->lock); |
7162 | __migrate_task(req->task, cpu, req->dest_cpu); | 7306 | __migrate_task(req->task, cpu, req->dest_cpu); |
7163 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { | 7307 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { |
7164 | req->dest_cpu = RCU_MIGRATION_GOT_QS; | 7308 | req->dest_cpu = RCU_MIGRATION_GOT_QS; |
7165 | spin_unlock(&rq->lock); | 7309 | raw_spin_unlock(&rq->lock); |
7166 | } else { | 7310 | } else { |
7167 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; | 7311 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; |
7168 | spin_unlock(&rq->lock); | 7312 | raw_spin_unlock(&rq->lock); |
7169 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); | 7313 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); |
7170 | } | 7314 | } |
7171 | local_irq_enable(); | 7315 | local_irq_enable(); |
@@ -7195,37 +7339,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7195 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 7339 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
7196 | { | 7340 | { |
7197 | int dest_cpu; | 7341 | int dest_cpu; |
7198 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu)); | ||
7199 | 7342 | ||
7200 | again: | 7343 | again: |
7201 | /* Look for allowed, online CPU in same node. */ | 7344 | dest_cpu = select_fallback_rq(dead_cpu, p); |
7202 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | ||
7203 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
7204 | goto move; | ||
7205 | |||
7206 | /* Any allowed, online CPU? */ | ||
7207 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | ||
7208 | if (dest_cpu < nr_cpu_ids) | ||
7209 | goto move; | ||
7210 | |||
7211 | /* No more Mr. Nice Guy. */ | ||
7212 | if (dest_cpu >= nr_cpu_ids) { | ||
7213 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
7214 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | ||
7215 | 7345 | ||
7216 | /* | ||
7217 | * Don't tell them about moving exiting tasks or | ||
7218 | * kernel threads (both mm NULL), since they never | ||
7219 | * leave kernel. | ||
7220 | */ | ||
7221 | if (p->mm && printk_ratelimit()) { | ||
7222 | printk(KERN_INFO "process %d (%s) no " | ||
7223 | "longer affine to cpu%d\n", | ||
7224 | task_pid_nr(p), p->comm, dead_cpu); | ||
7225 | } | ||
7226 | } | ||
7227 | |||
7228 | move: | ||
7229 | /* It can have affinity changed while we were choosing. */ | 7346 | /* It can have affinity changed while we were choosing. */ |
7230 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) | 7347 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) |
7231 | goto again; | 7348 | goto again; |
@@ -7240,7 +7357,7 @@ move: | |||
7240 | */ | 7357 | */ |
7241 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7358 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
7242 | { | 7359 | { |
7243 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7360 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
7244 | unsigned long flags; | 7361 | unsigned long flags; |
7245 | 7362 | ||
7246 | local_irq_save(flags); | 7363 | local_irq_save(flags); |
@@ -7288,14 +7405,14 @@ void sched_idle_next(void) | |||
7288 | * Strictly not necessary since rest of the CPUs are stopped by now | 7405 | * Strictly not necessary since rest of the CPUs are stopped by now |
7289 | * and interrupts disabled on the current cpu. | 7406 | * and interrupts disabled on the current cpu. |
7290 | */ | 7407 | */ |
7291 | spin_lock_irqsave(&rq->lock, flags); | 7408 | raw_spin_lock_irqsave(&rq->lock, flags); |
7292 | 7409 | ||
7293 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | 7410 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
7294 | 7411 | ||
7295 | update_rq_clock(rq); | 7412 | update_rq_clock(rq); |
7296 | activate_task(rq, p, 0); | 7413 | activate_task(rq, p, 0); |
7297 | 7414 | ||
7298 | spin_unlock_irqrestore(&rq->lock, flags); | 7415 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7299 | } | 7416 | } |
7300 | 7417 | ||
7301 | /* | 7418 | /* |
@@ -7331,9 +7448,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
7331 | * that's OK. No task can be added to this CPU, so iteration is | 7448 | * that's OK. No task can be added to this CPU, so iteration is |
7332 | * fine. | 7449 | * fine. |
7333 | */ | 7450 | */ |
7334 | spin_unlock_irq(&rq->lock); | 7451 | raw_spin_unlock_irq(&rq->lock); |
7335 | move_task_off_dead_cpu(dead_cpu, p); | 7452 | move_task_off_dead_cpu(dead_cpu, p); |
7336 | spin_lock_irq(&rq->lock); | 7453 | raw_spin_lock_irq(&rq->lock); |
7337 | 7454 | ||
7338 | put_task_struct(p); | 7455 | put_task_struct(p); |
7339 | } | 7456 | } |
@@ -7374,17 +7491,16 @@ static struct ctl_table sd_ctl_dir[] = { | |||
7374 | .procname = "sched_domain", | 7491 | .procname = "sched_domain", |
7375 | .mode = 0555, | 7492 | .mode = 0555, |
7376 | }, | 7493 | }, |
7377 | {0, }, | 7494 | {} |
7378 | }; | 7495 | }; |
7379 | 7496 | ||
7380 | static struct ctl_table sd_ctl_root[] = { | 7497 | static struct ctl_table sd_ctl_root[] = { |
7381 | { | 7498 | { |
7382 | .ctl_name = CTL_KERN, | ||
7383 | .procname = "kernel", | 7499 | .procname = "kernel", |
7384 | .mode = 0555, | 7500 | .mode = 0555, |
7385 | .child = sd_ctl_dir, | 7501 | .child = sd_ctl_dir, |
7386 | }, | 7502 | }, |
7387 | {0, }, | 7503 | {} |
7388 | }; | 7504 | }; |
7389 | 7505 | ||
7390 | static struct ctl_table *sd_alloc_ctl_entry(int n) | 7506 | static struct ctl_table *sd_alloc_ctl_entry(int n) |
@@ -7494,7 +7610,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
7494 | static struct ctl_table_header *sd_sysctl_header; | 7610 | static struct ctl_table_header *sd_sysctl_header; |
7495 | static void register_sched_domain_sysctl(void) | 7611 | static void register_sched_domain_sysctl(void) |
7496 | { | 7612 | { |
7497 | int i, cpu_num = num_online_cpus(); | 7613 | int i, cpu_num = num_possible_cpus(); |
7498 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7614 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
7499 | char buf[32]; | 7615 | char buf[32]; |
7500 | 7616 | ||
@@ -7504,7 +7620,7 @@ static void register_sched_domain_sysctl(void) | |||
7504 | if (entry == NULL) | 7620 | if (entry == NULL) |
7505 | return; | 7621 | return; |
7506 | 7622 | ||
7507 | for_each_online_cpu(i) { | 7623 | for_each_possible_cpu(i) { |
7508 | snprintf(buf, 32, "cpu%d", i); | 7624 | snprintf(buf, 32, "cpu%d", i); |
7509 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7625 | entry->procname = kstrdup(buf, GFP_KERNEL); |
7510 | entry->mode = 0555; | 7626 | entry->mode = 0555; |
@@ -7600,13 +7716,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7600 | 7716 | ||
7601 | /* Update our root-domain */ | 7717 | /* Update our root-domain */ |
7602 | rq = cpu_rq(cpu); | 7718 | rq = cpu_rq(cpu); |
7603 | spin_lock_irqsave(&rq->lock, flags); | 7719 | raw_spin_lock_irqsave(&rq->lock, flags); |
7604 | if (rq->rd) { | 7720 | if (rq->rd) { |
7605 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7721 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7606 | 7722 | ||
7607 | set_rq_online(rq); | 7723 | set_rq_online(rq); |
7608 | } | 7724 | } |
7609 | spin_unlock_irqrestore(&rq->lock, flags); | 7725 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7610 | break; | 7726 | break; |
7611 | 7727 | ||
7612 | #ifdef CONFIG_HOTPLUG_CPU | 7728 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -7631,14 +7747,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7631 | put_task_struct(rq->migration_thread); | 7747 | put_task_struct(rq->migration_thread); |
7632 | rq->migration_thread = NULL; | 7748 | rq->migration_thread = NULL; |
7633 | /* Idle task back to normal (off runqueue, low prio) */ | 7749 | /* Idle task back to normal (off runqueue, low prio) */ |
7634 | spin_lock_irq(&rq->lock); | 7750 | raw_spin_lock_irq(&rq->lock); |
7635 | update_rq_clock(rq); | 7751 | update_rq_clock(rq); |
7636 | deactivate_task(rq, rq->idle, 0); | 7752 | deactivate_task(rq, rq->idle, 0); |
7637 | rq->idle->static_prio = MAX_PRIO; | ||
7638 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 7753 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
7639 | rq->idle->sched_class = &idle_sched_class; | 7754 | rq->idle->sched_class = &idle_sched_class; |
7640 | migrate_dead_tasks(cpu); | 7755 | migrate_dead_tasks(cpu); |
7641 | spin_unlock_irq(&rq->lock); | 7756 | raw_spin_unlock_irq(&rq->lock); |
7642 | cpuset_unlock(); | 7757 | cpuset_unlock(); |
7643 | migrate_nr_uninterruptible(rq); | 7758 | migrate_nr_uninterruptible(rq); |
7644 | BUG_ON(rq->nr_running != 0); | 7759 | BUG_ON(rq->nr_running != 0); |
@@ -7648,30 +7763,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7648 | * they didn't take sched_hotcpu_mutex. Just wake up | 7763 | * they didn't take sched_hotcpu_mutex. Just wake up |
7649 | * the requestors. | 7764 | * the requestors. |
7650 | */ | 7765 | */ |
7651 | spin_lock_irq(&rq->lock); | 7766 | raw_spin_lock_irq(&rq->lock); |
7652 | while (!list_empty(&rq->migration_queue)) { | 7767 | while (!list_empty(&rq->migration_queue)) { |
7653 | struct migration_req *req; | 7768 | struct migration_req *req; |
7654 | 7769 | ||
7655 | req = list_entry(rq->migration_queue.next, | 7770 | req = list_entry(rq->migration_queue.next, |
7656 | struct migration_req, list); | 7771 | struct migration_req, list); |
7657 | list_del_init(&req->list); | 7772 | list_del_init(&req->list); |
7658 | spin_unlock_irq(&rq->lock); | 7773 | raw_spin_unlock_irq(&rq->lock); |
7659 | complete(&req->done); | 7774 | complete(&req->done); |
7660 | spin_lock_irq(&rq->lock); | 7775 | raw_spin_lock_irq(&rq->lock); |
7661 | } | 7776 | } |
7662 | spin_unlock_irq(&rq->lock); | 7777 | raw_spin_unlock_irq(&rq->lock); |
7663 | break; | 7778 | break; |
7664 | 7779 | ||
7665 | case CPU_DYING: | 7780 | case CPU_DYING: |
7666 | case CPU_DYING_FROZEN: | 7781 | case CPU_DYING_FROZEN: |
7667 | /* Update our root-domain */ | 7782 | /* Update our root-domain */ |
7668 | rq = cpu_rq(cpu); | 7783 | rq = cpu_rq(cpu); |
7669 | spin_lock_irqsave(&rq->lock, flags); | 7784 | raw_spin_lock_irqsave(&rq->lock, flags); |
7670 | if (rq->rd) { | 7785 | if (rq->rd) { |
7671 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7786 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7672 | set_rq_offline(rq); | 7787 | set_rq_offline(rq); |
7673 | } | 7788 | } |
7674 | spin_unlock_irqrestore(&rq->lock, flags); | 7789 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7675 | break; | 7790 | break; |
7676 | #endif | 7791 | #endif |
7677 | } | 7792 | } |
@@ -7708,6 +7823,16 @@ early_initcall(migration_init); | |||
7708 | 7823 | ||
7709 | #ifdef CONFIG_SCHED_DEBUG | 7824 | #ifdef CONFIG_SCHED_DEBUG |
7710 | 7825 | ||
7826 | static __read_mostly int sched_domain_debug_enabled; | ||
7827 | |||
7828 | static int __init sched_domain_debug_setup(char *str) | ||
7829 | { | ||
7830 | sched_domain_debug_enabled = 1; | ||
7831 | |||
7832 | return 0; | ||
7833 | } | ||
7834 | early_param("sched_debug", sched_domain_debug_setup); | ||
7835 | |||
7711 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 7836 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
7712 | struct cpumask *groupmask) | 7837 | struct cpumask *groupmask) |
7713 | { | 7838 | { |
@@ -7794,6 +7919,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
7794 | cpumask_var_t groupmask; | 7919 | cpumask_var_t groupmask; |
7795 | int level = 0; | 7920 | int level = 0; |
7796 | 7921 | ||
7922 | if (!sched_domain_debug_enabled) | ||
7923 | return; | ||
7924 | |||
7797 | if (!sd) { | 7925 | if (!sd) { |
7798 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); | 7926 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); |
7799 | return; | 7927 | return; |
@@ -7873,6 +8001,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
7873 | 8001 | ||
7874 | static void free_rootdomain(struct root_domain *rd) | 8002 | static void free_rootdomain(struct root_domain *rd) |
7875 | { | 8003 | { |
8004 | synchronize_sched(); | ||
8005 | |||
7876 | cpupri_cleanup(&rd->cpupri); | 8006 | cpupri_cleanup(&rd->cpupri); |
7877 | 8007 | ||
7878 | free_cpumask_var(rd->rto_mask); | 8008 | free_cpumask_var(rd->rto_mask); |
@@ -7886,7 +8016,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7886 | struct root_domain *old_rd = NULL; | 8016 | struct root_domain *old_rd = NULL; |
7887 | unsigned long flags; | 8017 | unsigned long flags; |
7888 | 8018 | ||
7889 | spin_lock_irqsave(&rq->lock, flags); | 8019 | raw_spin_lock_irqsave(&rq->lock, flags); |
7890 | 8020 | ||
7891 | if (rq->rd) { | 8021 | if (rq->rd) { |
7892 | old_rd = rq->rd; | 8022 | old_rd = rq->rd; |
@@ -7912,7 +8042,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7912 | if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) | 8042 | if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) |
7913 | set_rq_online(rq); | 8043 | set_rq_online(rq); |
7914 | 8044 | ||
7915 | spin_unlock_irqrestore(&rq->lock, flags); | 8045 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7916 | 8046 | ||
7917 | if (old_rd) | 8047 | if (old_rd) |
7918 | free_rootdomain(old_rd); | 8048 | free_rootdomain(old_rd); |
@@ -8013,6 +8143,7 @@ static cpumask_var_t cpu_isolated_map; | |||
8013 | /* Setup the mask of cpus configured for isolated domains */ | 8143 | /* Setup the mask of cpus configured for isolated domains */ |
8014 | static int __init isolated_cpu_setup(char *str) | 8144 | static int __init isolated_cpu_setup(char *str) |
8015 | { | 8145 | { |
8146 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | ||
8016 | cpulist_parse(str, cpu_isolated_map); | 8147 | cpulist_parse(str, cpu_isolated_map); |
8017 | return 1; | 8148 | return 1; |
8018 | } | 8149 | } |
@@ -8197,14 +8328,14 @@ enum s_alloc { | |||
8197 | */ | 8328 | */ |
8198 | #ifdef CONFIG_SCHED_SMT | 8329 | #ifdef CONFIG_SCHED_SMT |
8199 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); | 8330 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); |
8200 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); | 8331 | static DEFINE_PER_CPU(struct static_sched_group, sched_groups); |
8201 | 8332 | ||
8202 | static int | 8333 | static int |
8203 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | 8334 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, |
8204 | struct sched_group **sg, struct cpumask *unused) | 8335 | struct sched_group **sg, struct cpumask *unused) |
8205 | { | 8336 | { |
8206 | if (sg) | 8337 | if (sg) |
8207 | *sg = &per_cpu(sched_group_cpus, cpu).sg; | 8338 | *sg = &per_cpu(sched_groups, cpu).sg; |
8208 | return cpu; | 8339 | return cpu; |
8209 | } | 8340 | } |
8210 | #endif /* CONFIG_SCHED_SMT */ | 8341 | #endif /* CONFIG_SCHED_SMT */ |
@@ -8849,7 +8980,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) | |||
8849 | return __build_sched_domains(cpu_map, NULL); | 8980 | return __build_sched_domains(cpu_map, NULL); |
8850 | } | 8981 | } |
8851 | 8982 | ||
8852 | static struct cpumask *doms_cur; /* current sched domains */ | 8983 | static cpumask_var_t *doms_cur; /* current sched domains */ |
8853 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 8984 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
8854 | static struct sched_domain_attr *dattr_cur; | 8985 | static struct sched_domain_attr *dattr_cur; |
8855 | /* attribues of custom domains in 'doms_cur' */ | 8986 | /* attribues of custom domains in 'doms_cur' */ |
@@ -8871,6 +9002,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) | |||
8871 | return 0; | 9002 | return 0; |
8872 | } | 9003 | } |
8873 | 9004 | ||
9005 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms) | ||
9006 | { | ||
9007 | int i; | ||
9008 | cpumask_var_t *doms; | ||
9009 | |||
9010 | doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); | ||
9011 | if (!doms) | ||
9012 | return NULL; | ||
9013 | for (i = 0; i < ndoms; i++) { | ||
9014 | if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { | ||
9015 | free_sched_domains(doms, i); | ||
9016 | return NULL; | ||
9017 | } | ||
9018 | } | ||
9019 | return doms; | ||
9020 | } | ||
9021 | |||
9022 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) | ||
9023 | { | ||
9024 | unsigned int i; | ||
9025 | for (i = 0; i < ndoms; i++) | ||
9026 | free_cpumask_var(doms[i]); | ||
9027 | kfree(doms); | ||
9028 | } | ||
9029 | |||
8874 | /* | 9030 | /* |
8875 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 9031 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
8876 | * For now this just excludes isolated cpus, but could be used to | 9032 | * For now this just excludes isolated cpus, but could be used to |
@@ -8882,12 +9038,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) | |||
8882 | 9038 | ||
8883 | arch_update_cpu_topology(); | 9039 | arch_update_cpu_topology(); |
8884 | ndoms_cur = 1; | 9040 | ndoms_cur = 1; |
8885 | doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); | 9041 | doms_cur = alloc_sched_domains(ndoms_cur); |
8886 | if (!doms_cur) | 9042 | if (!doms_cur) |
8887 | doms_cur = fallback_doms; | 9043 | doms_cur = &fallback_doms; |
8888 | cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); | 9044 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
8889 | dattr_cur = NULL; | 9045 | dattr_cur = NULL; |
8890 | err = build_sched_domains(doms_cur); | 9046 | err = build_sched_domains(doms_cur[0]); |
8891 | register_sched_domain_sysctl(); | 9047 | register_sched_domain_sysctl(); |
8892 | 9048 | ||
8893 | return err; | 9049 | return err; |
@@ -8937,19 +9093,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8937 | * doms_new[] to the current sched domain partitioning, doms_cur[]. | 9093 | * doms_new[] to the current sched domain partitioning, doms_cur[]. |
8938 | * It destroys each deleted domain and builds each new domain. | 9094 | * It destroys each deleted domain and builds each new domain. |
8939 | * | 9095 | * |
8940 | * 'doms_new' is an array of cpumask's of length 'ndoms_new'. | 9096 | * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. |
8941 | * The masks don't intersect (don't overlap.) We should setup one | 9097 | * The masks don't intersect (don't overlap.) We should setup one |
8942 | * sched domain for each mask. CPUs not in any of the cpumasks will | 9098 | * sched domain for each mask. CPUs not in any of the cpumasks will |
8943 | * not be load balanced. If the same cpumask appears both in the | 9099 | * not be load balanced. If the same cpumask appears both in the |
8944 | * current 'doms_cur' domains and in the new 'doms_new', we can leave | 9100 | * current 'doms_cur' domains and in the new 'doms_new', we can leave |
8945 | * it as it is. | 9101 | * it as it is. |
8946 | * | 9102 | * |
8947 | * The passed in 'doms_new' should be kmalloc'd. This routine takes | 9103 | * The passed in 'doms_new' should be allocated using |
8948 | * ownership of it and will kfree it when done with it. If the caller | 9104 | * alloc_sched_domains. This routine takes ownership of it and will |
8949 | * failed the kmalloc call, then it can pass in doms_new == NULL && | 9105 | * free_sched_domains it when done with it. If the caller failed the |
8950 | * ndoms_new == 1, and partition_sched_domains() will fallback to | 9106 | * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, |
8951 | * the single partition 'fallback_doms', it also forces the domains | 9107 | * and partition_sched_domains() will fallback to the single partition |
8952 | * to be rebuilt. | 9108 | * 'fallback_doms', it also forces the domains to be rebuilt. |
8953 | * | 9109 | * |
8954 | * If doms_new == NULL it will be replaced with cpu_online_mask. | 9110 | * If doms_new == NULL it will be replaced with cpu_online_mask. |
8955 | * ndoms_new == 0 is a special case for destroying existing domains, | 9111 | * ndoms_new == 0 is a special case for destroying existing domains, |
@@ -8957,8 +9113,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8957 | * | 9113 | * |
8958 | * Call with hotplug lock held | 9114 | * Call with hotplug lock held |
8959 | */ | 9115 | */ |
8960 | /* FIXME: Change to struct cpumask *doms_new[] */ | 9116 | void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
8961 | void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | ||
8962 | struct sched_domain_attr *dattr_new) | 9117 | struct sched_domain_attr *dattr_new) |
8963 | { | 9118 | { |
8964 | int i, j, n; | 9119 | int i, j, n; |
@@ -8977,40 +9132,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
8977 | /* Destroy deleted domains */ | 9132 | /* Destroy deleted domains */ |
8978 | for (i = 0; i < ndoms_cur; i++) { | 9133 | for (i = 0; i < ndoms_cur; i++) { |
8979 | for (j = 0; j < n && !new_topology; j++) { | 9134 | for (j = 0; j < n && !new_topology; j++) { |
8980 | if (cpumask_equal(&doms_cur[i], &doms_new[j]) | 9135 | if (cpumask_equal(doms_cur[i], doms_new[j]) |
8981 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 9136 | && dattrs_equal(dattr_cur, i, dattr_new, j)) |
8982 | goto match1; | 9137 | goto match1; |
8983 | } | 9138 | } |
8984 | /* no match - a current sched domain not in new doms_new[] */ | 9139 | /* no match - a current sched domain not in new doms_new[] */ |
8985 | detach_destroy_domains(doms_cur + i); | 9140 | detach_destroy_domains(doms_cur[i]); |
8986 | match1: | 9141 | match1: |
8987 | ; | 9142 | ; |
8988 | } | 9143 | } |
8989 | 9144 | ||
8990 | if (doms_new == NULL) { | 9145 | if (doms_new == NULL) { |
8991 | ndoms_cur = 0; | 9146 | ndoms_cur = 0; |
8992 | doms_new = fallback_doms; | 9147 | doms_new = &fallback_doms; |
8993 | cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); | 9148 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
8994 | WARN_ON_ONCE(dattr_new); | 9149 | WARN_ON_ONCE(dattr_new); |
8995 | } | 9150 | } |
8996 | 9151 | ||
8997 | /* Build new domains */ | 9152 | /* Build new domains */ |
8998 | for (i = 0; i < ndoms_new; i++) { | 9153 | for (i = 0; i < ndoms_new; i++) { |
8999 | for (j = 0; j < ndoms_cur && !new_topology; j++) { | 9154 | for (j = 0; j < ndoms_cur && !new_topology; j++) { |
9000 | if (cpumask_equal(&doms_new[i], &doms_cur[j]) | 9155 | if (cpumask_equal(doms_new[i], doms_cur[j]) |
9001 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 9156 | && dattrs_equal(dattr_new, i, dattr_cur, j)) |
9002 | goto match2; | 9157 | goto match2; |
9003 | } | 9158 | } |
9004 | /* no match - add a new doms_new */ | 9159 | /* no match - add a new doms_new */ |
9005 | __build_sched_domains(doms_new + i, | 9160 | __build_sched_domains(doms_new[i], |
9006 | dattr_new ? dattr_new + i : NULL); | 9161 | dattr_new ? dattr_new + i : NULL); |
9007 | match2: | 9162 | match2: |
9008 | ; | 9163 | ; |
9009 | } | 9164 | } |
9010 | 9165 | ||
9011 | /* Remember the new sched domains */ | 9166 | /* Remember the new sched domains */ |
9012 | if (doms_cur != fallback_doms) | 9167 | if (doms_cur != &fallback_doms) |
9013 | kfree(doms_cur); | 9168 | free_sched_domains(doms_cur, ndoms_cur); |
9014 | kfree(dattr_cur); /* kfree(NULL) is safe */ | 9169 | kfree(dattr_cur); /* kfree(NULL) is safe */ |
9015 | doms_cur = doms_new; | 9170 | doms_cur = doms_new; |
9016 | dattr_cur = dattr_new; | 9171 | dattr_cur = dattr_new; |
@@ -9121,8 +9276,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
9121 | switch (action) { | 9276 | switch (action) { |
9122 | case CPU_ONLINE: | 9277 | case CPU_ONLINE: |
9123 | case CPU_ONLINE_FROZEN: | 9278 | case CPU_ONLINE_FROZEN: |
9124 | case CPU_DEAD: | 9279 | case CPU_DOWN_PREPARE: |
9125 | case CPU_DEAD_FROZEN: | 9280 | case CPU_DOWN_PREPARE_FROZEN: |
9281 | case CPU_DOWN_FAILED: | ||
9282 | case CPU_DOWN_FAILED_FROZEN: | ||
9126 | partition_sched_domains(1, NULL, NULL); | 9283 | partition_sched_domains(1, NULL, NULL); |
9127 | return NOTIFY_OK; | 9284 | return NOTIFY_OK; |
9128 | 9285 | ||
@@ -9169,7 +9326,7 @@ void __init sched_init_smp(void) | |||
9169 | #endif | 9326 | #endif |
9170 | get_online_cpus(); | 9327 | get_online_cpus(); |
9171 | mutex_lock(&sched_domains_mutex); | 9328 | mutex_lock(&sched_domains_mutex); |
9172 | arch_init_sched_domains(cpu_online_mask); | 9329 | arch_init_sched_domains(cpu_active_mask); |
9173 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9330 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
9174 | if (cpumask_empty(non_isolated_cpus)) | 9331 | if (cpumask_empty(non_isolated_cpus)) |
9175 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9332 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
@@ -9242,13 +9399,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
9242 | #ifdef CONFIG_SMP | 9399 | #ifdef CONFIG_SMP |
9243 | rt_rq->rt_nr_migratory = 0; | 9400 | rt_rq->rt_nr_migratory = 0; |
9244 | rt_rq->overloaded = 0; | 9401 | rt_rq->overloaded = 0; |
9245 | plist_head_init(&rt_rq->pushable_tasks, &rq->lock); | 9402 | plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock); |
9246 | #endif | 9403 | #endif |
9247 | 9404 | ||
9248 | rt_rq->rt_time = 0; | 9405 | rt_rq->rt_time = 0; |
9249 | rt_rq->rt_throttled = 0; | 9406 | rt_rq->rt_throttled = 0; |
9250 | rt_rq->rt_runtime = 0; | 9407 | rt_rq->rt_runtime = 0; |
9251 | spin_lock_init(&rt_rq->rt_runtime_lock); | 9408 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); |
9252 | 9409 | ||
9253 | #ifdef CONFIG_RT_GROUP_SCHED | 9410 | #ifdef CONFIG_RT_GROUP_SCHED |
9254 | rt_rq->rt_nr_boosted = 0; | 9411 | rt_rq->rt_nr_boosted = 0; |
@@ -9332,10 +9489,6 @@ void __init sched_init(void) | |||
9332 | #ifdef CONFIG_CPUMASK_OFFSTACK | 9489 | #ifdef CONFIG_CPUMASK_OFFSTACK |
9333 | alloc_size += num_possible_cpus() * cpumask_size(); | 9490 | alloc_size += num_possible_cpus() * cpumask_size(); |
9334 | #endif | 9491 | #endif |
9335 | /* | ||
9336 | * As sched_init() is called before page_alloc is setup, | ||
9337 | * we use alloc_bootmem(). | ||
9338 | */ | ||
9339 | if (alloc_size) { | 9492 | if (alloc_size) { |
9340 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 9493 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
9341 | 9494 | ||
@@ -9404,11 +9557,15 @@ void __init sched_init(void) | |||
9404 | #endif /* CONFIG_USER_SCHED */ | 9557 | #endif /* CONFIG_USER_SCHED */ |
9405 | #endif /* CONFIG_GROUP_SCHED */ | 9558 | #endif /* CONFIG_GROUP_SCHED */ |
9406 | 9559 | ||
9560 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP | ||
9561 | update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), | ||
9562 | __alignof__(unsigned long)); | ||
9563 | #endif | ||
9407 | for_each_possible_cpu(i) { | 9564 | for_each_possible_cpu(i) { |
9408 | struct rq *rq; | 9565 | struct rq *rq; |
9409 | 9566 | ||
9410 | rq = cpu_rq(i); | 9567 | rq = cpu_rq(i); |
9411 | spin_lock_init(&rq->lock); | 9568 | raw_spin_lock_init(&rq->lock); |
9412 | rq->nr_running = 0; | 9569 | rq->nr_running = 0; |
9413 | rq->calc_load_active = 0; | 9570 | rq->calc_load_active = 0; |
9414 | rq->calc_load_update = jiffies + LOAD_FREQ; | 9571 | rq->calc_load_update = jiffies + LOAD_FREQ; |
@@ -9468,7 +9625,7 @@ void __init sched_init(void) | |||
9468 | #elif defined CONFIG_USER_SCHED | 9625 | #elif defined CONFIG_USER_SCHED |
9469 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); | 9626 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); |
9470 | init_tg_rt_entry(&init_task_group, | 9627 | init_tg_rt_entry(&init_task_group, |
9471 | &per_cpu(init_rt_rq, i), | 9628 | &per_cpu(init_rt_rq_var, i), |
9472 | &per_cpu(init_sched_rt_entity, i), i, 1, | 9629 | &per_cpu(init_sched_rt_entity, i), i, 1, |
9473 | root_task_group.rt_se[i]); | 9630 | root_task_group.rt_se[i]); |
9474 | #endif | 9631 | #endif |
@@ -9486,6 +9643,8 @@ void __init sched_init(void) | |||
9486 | rq->cpu = i; | 9643 | rq->cpu = i; |
9487 | rq->online = 0; | 9644 | rq->online = 0; |
9488 | rq->migration_thread = NULL; | 9645 | rq->migration_thread = NULL; |
9646 | rq->idle_stamp = 0; | ||
9647 | rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
9489 | INIT_LIST_HEAD(&rq->migration_queue); | 9648 | INIT_LIST_HEAD(&rq->migration_queue); |
9490 | rq_attach_root(rq, &def_root_domain); | 9649 | rq_attach_root(rq, &def_root_domain); |
9491 | #endif | 9650 | #endif |
@@ -9504,7 +9663,7 @@ void __init sched_init(void) | |||
9504 | #endif | 9663 | #endif |
9505 | 9664 | ||
9506 | #ifdef CONFIG_RT_MUTEXES | 9665 | #ifdef CONFIG_RT_MUTEXES |
9507 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); | 9666 | plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock); |
9508 | #endif | 9667 | #endif |
9509 | 9668 | ||
9510 | /* | 9669 | /* |
@@ -9529,13 +9688,15 @@ void __init sched_init(void) | |||
9529 | current->sched_class = &fair_sched_class; | 9688 | current->sched_class = &fair_sched_class; |
9530 | 9689 | ||
9531 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | 9690 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ |
9532 | alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | 9691 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
9533 | #ifdef CONFIG_SMP | 9692 | #ifdef CONFIG_SMP |
9534 | #ifdef CONFIG_NO_HZ | 9693 | #ifdef CONFIG_NO_HZ |
9535 | alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); | 9694 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9536 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | 9695 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); |
9537 | #endif | 9696 | #endif |
9538 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9697 | /* May be allocated at isolcpus cmdline parse time */ |
9698 | if (cpu_isolated_map == NULL) | ||
9699 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | ||
9539 | #endif /* SMP */ | 9700 | #endif /* SMP */ |
9540 | 9701 | ||
9541 | perf_event_init(); | 9702 | perf_event_init(); |
@@ -9546,7 +9707,7 @@ void __init sched_init(void) | |||
9546 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 9707 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
9547 | static inline int preempt_count_equals(int preempt_offset) | 9708 | static inline int preempt_count_equals(int preempt_offset) |
9548 | { | 9709 | { |
9549 | int nested = preempt_count() & ~PREEMPT_ACTIVE; | 9710 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
9550 | 9711 | ||
9551 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); | 9712 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); |
9552 | } | 9713 | } |
@@ -9627,13 +9788,13 @@ void normalize_rt_tasks(void) | |||
9627 | continue; | 9788 | continue; |
9628 | } | 9789 | } |
9629 | 9790 | ||
9630 | spin_lock(&p->pi_lock); | 9791 | raw_spin_lock(&p->pi_lock); |
9631 | rq = __task_rq_lock(p); | 9792 | rq = __task_rq_lock(p); |
9632 | 9793 | ||
9633 | normalize_task(rq, p); | 9794 | normalize_task(rq, p); |
9634 | 9795 | ||
9635 | __task_rq_unlock(rq); | 9796 | __task_rq_unlock(rq); |
9636 | spin_unlock(&p->pi_lock); | 9797 | raw_spin_unlock(&p->pi_lock); |
9637 | } while_each_thread(g, p); | 9798 | } while_each_thread(g, p); |
9638 | 9799 | ||
9639 | read_unlock_irqrestore(&tasklist_lock, flags); | 9800 | read_unlock_irqrestore(&tasklist_lock, flags); |
@@ -9729,13 +9890,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
9729 | se = kzalloc_node(sizeof(struct sched_entity), | 9890 | se = kzalloc_node(sizeof(struct sched_entity), |
9730 | GFP_KERNEL, cpu_to_node(i)); | 9891 | GFP_KERNEL, cpu_to_node(i)); |
9731 | if (!se) | 9892 | if (!se) |
9732 | goto err; | 9893 | goto err_free_rq; |
9733 | 9894 | ||
9734 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 9895 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
9735 | } | 9896 | } |
9736 | 9897 | ||
9737 | return 1; | 9898 | return 1; |
9738 | 9899 | ||
9900 | err_free_rq: | ||
9901 | kfree(cfs_rq); | ||
9739 | err: | 9902 | err: |
9740 | return 0; | 9903 | return 0; |
9741 | } | 9904 | } |
@@ -9817,13 +9980,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
9817 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 9980 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
9818 | GFP_KERNEL, cpu_to_node(i)); | 9981 | GFP_KERNEL, cpu_to_node(i)); |
9819 | if (!rt_se) | 9982 | if (!rt_se) |
9820 | goto err; | 9983 | goto err_free_rq; |
9821 | 9984 | ||
9822 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 9985 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
9823 | } | 9986 | } |
9824 | 9987 | ||
9825 | return 1; | 9988 | return 1; |
9826 | 9989 | ||
9990 | err_free_rq: | ||
9991 | kfree(rt_rq); | ||
9827 | err: | 9992 | err: |
9828 | return 0; | 9993 | return 0; |
9829 | } | 9994 | } |
@@ -9957,7 +10122,7 @@ void sched_move_task(struct task_struct *tsk) | |||
9957 | 10122 | ||
9958 | #ifdef CONFIG_FAIR_GROUP_SCHED | 10123 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9959 | if (tsk->sched_class->moved_group) | 10124 | if (tsk->sched_class->moved_group) |
9960 | tsk->sched_class->moved_group(tsk); | 10125 | tsk->sched_class->moved_group(tsk, on_rq); |
9961 | #endif | 10126 | #endif |
9962 | 10127 | ||
9963 | if (unlikely(running)) | 10128 | if (unlikely(running)) |
@@ -9992,9 +10157,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
9992 | struct rq *rq = cfs_rq->rq; | 10157 | struct rq *rq = cfs_rq->rq; |
9993 | unsigned long flags; | 10158 | unsigned long flags; |
9994 | 10159 | ||
9995 | spin_lock_irqsave(&rq->lock, flags); | 10160 | raw_spin_lock_irqsave(&rq->lock, flags); |
9996 | __set_se_shares(se, shares); | 10161 | __set_se_shares(se, shares); |
9997 | spin_unlock_irqrestore(&rq->lock, flags); | 10162 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
9998 | } | 10163 | } |
9999 | 10164 | ||
10000 | static DEFINE_MUTEX(shares_mutex); | 10165 | static DEFINE_MUTEX(shares_mutex); |
@@ -10179,18 +10344,18 @@ static int tg_set_bandwidth(struct task_group *tg, | |||
10179 | if (err) | 10344 | if (err) |
10180 | goto unlock; | 10345 | goto unlock; |
10181 | 10346 | ||
10182 | spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 10347 | raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
10183 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); | 10348 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); |
10184 | tg->rt_bandwidth.rt_runtime = rt_runtime; | 10349 | tg->rt_bandwidth.rt_runtime = rt_runtime; |
10185 | 10350 | ||
10186 | for_each_possible_cpu(i) { | 10351 | for_each_possible_cpu(i) { |
10187 | struct rt_rq *rt_rq = tg->rt_rq[i]; | 10352 | struct rt_rq *rt_rq = tg->rt_rq[i]; |
10188 | 10353 | ||
10189 | spin_lock(&rt_rq->rt_runtime_lock); | 10354 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
10190 | rt_rq->rt_runtime = rt_runtime; | 10355 | rt_rq->rt_runtime = rt_runtime; |
10191 | spin_unlock(&rt_rq->rt_runtime_lock); | 10356 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
10192 | } | 10357 | } |
10193 | spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 10358 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
10194 | unlock: | 10359 | unlock: |
10195 | read_unlock(&tasklist_lock); | 10360 | read_unlock(&tasklist_lock); |
10196 | mutex_unlock(&rt_constraints_mutex); | 10361 | mutex_unlock(&rt_constraints_mutex); |
@@ -10295,15 +10460,15 @@ static int sched_rt_global_constraints(void) | |||
10295 | if (sysctl_sched_rt_runtime == 0) | 10460 | if (sysctl_sched_rt_runtime == 0) |
10296 | return -EBUSY; | 10461 | return -EBUSY; |
10297 | 10462 | ||
10298 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 10463 | raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
10299 | for_each_possible_cpu(i) { | 10464 | for_each_possible_cpu(i) { |
10300 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 10465 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |
10301 | 10466 | ||
10302 | spin_lock(&rt_rq->rt_runtime_lock); | 10467 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
10303 | rt_rq->rt_runtime = global_rt_runtime(); | 10468 | rt_rq->rt_runtime = global_rt_runtime(); |
10304 | spin_unlock(&rt_rq->rt_runtime_lock); | 10469 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
10305 | } | 10470 | } |
10306 | spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); | 10471 | raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); |
10307 | 10472 | ||
10308 | return 0; | 10473 | return 0; |
10309 | } | 10474 | } |
@@ -10594,9 +10759,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | |||
10594 | /* | 10759 | /* |
10595 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | 10760 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
10596 | */ | 10761 | */ |
10597 | spin_lock_irq(&cpu_rq(cpu)->lock); | 10762 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
10598 | data = *cpuusage; | 10763 | data = *cpuusage; |
10599 | spin_unlock_irq(&cpu_rq(cpu)->lock); | 10764 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
10600 | #else | 10765 | #else |
10601 | data = *cpuusage; | 10766 | data = *cpuusage; |
10602 | #endif | 10767 | #endif |
@@ -10612,9 +10777,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |||
10612 | /* | 10777 | /* |
10613 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | 10778 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
10614 | */ | 10779 | */ |
10615 | spin_lock_irq(&cpu_rq(cpu)->lock); | 10780 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
10616 | *cpuusage = val; | 10781 | *cpuusage = val; |
10617 | spin_unlock_irq(&cpu_rq(cpu)->lock); | 10782 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
10618 | #else | 10783 | #else |
10619 | *cpuusage = val; | 10784 | *cpuusage = val; |
10620 | #endif | 10785 | #endif |
@@ -10848,9 +11013,9 @@ void synchronize_sched_expedited(void) | |||
10848 | init_completion(&req->done); | 11013 | init_completion(&req->done); |
10849 | req->task = NULL; | 11014 | req->task = NULL; |
10850 | req->dest_cpu = RCU_MIGRATION_NEED_QS; | 11015 | req->dest_cpu = RCU_MIGRATION_NEED_QS; |
10851 | spin_lock_irqsave(&rq->lock, flags); | 11016 | raw_spin_lock_irqsave(&rq->lock, flags); |
10852 | list_add(&req->list, &rq->migration_queue); | 11017 | list_add(&req->list, &rq->migration_queue); |
10853 | spin_unlock_irqrestore(&rq->lock, flags); | 11018 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10854 | wake_up_process(rq->migration_thread); | 11019 | wake_up_process(rq->migration_thread); |
10855 | } | 11020 | } |
10856 | for_each_online_cpu(cpu) { | 11021 | for_each_online_cpu(cpu) { |
@@ -10858,13 +11023,14 @@ void synchronize_sched_expedited(void) | |||
10858 | req = &per_cpu(rcu_migration_req, cpu); | 11023 | req = &per_cpu(rcu_migration_req, cpu); |
10859 | rq = cpu_rq(cpu); | 11024 | rq = cpu_rq(cpu); |
10860 | wait_for_completion(&req->done); | 11025 | wait_for_completion(&req->done); |
10861 | spin_lock_irqsave(&rq->lock, flags); | 11026 | raw_spin_lock_irqsave(&rq->lock, flags); |
10862 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) | 11027 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) |
10863 | need_full_sync = 1; | 11028 | need_full_sync = 1; |
10864 | req->dest_cpu = RCU_MIGRATION_IDLE; | 11029 | req->dest_cpu = RCU_MIGRATION_IDLE; |
10865 | spin_unlock_irqrestore(&rq->lock, flags); | 11030 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10866 | } | 11031 | } |
10867 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | 11032 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; |
11033 | synchronize_sched_expedited_count++; | ||
10868 | mutex_unlock(&rcu_sched_expedited_mutex); | 11034 | mutex_unlock(&rcu_sched_expedited_mutex); |
10869 | put_online_cpus(); | 11035 | put_online_cpus(); |
10870 | if (need_full_sync) | 11036 | if (need_full_sync) |