aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2008-04-01 00:22:26 -0400
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2008-04-01 00:22:26 -0400
commit399f486286f44d55c4fff0e9cc5d712f2b443489 (patch)
tree0c2820b3e04232eaa96f08c1057b87728fb3e7a4 /kernel/sched.c
parent481419ec9fbdf3f4ec5389c7e91a81b4a7ebee8d (diff)
parenta9edadbf790d72adf6ebed476cb5caf7743e7e4a (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c115
1 files changed, 82 insertions, 33 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cb53fb1fe3d..8dcdec6fe0fe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -301,7 +301,7 @@ struct cfs_rq {
301 /* 'curr' points to currently running entity on this cfs_rq. 301 /* 'curr' points to currently running entity on this cfs_rq.
302 * It is set to NULL otherwise (i.e when none are currently running). 302 * It is set to NULL otherwise (i.e when none are currently running).
303 */ 303 */
304 struct sched_entity *curr; 304 struct sched_entity *curr, *next;
305 305
306 unsigned long nr_spread_over; 306 unsigned long nr_spread_over;
307 307
@@ -594,18 +594,14 @@ enum {
594 SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, 594 SCHED_FEAT_NEW_FAIR_SLEEPERS = 1,
595 SCHED_FEAT_WAKEUP_PREEMPT = 2, 595 SCHED_FEAT_WAKEUP_PREEMPT = 2,
596 SCHED_FEAT_START_DEBIT = 4, 596 SCHED_FEAT_START_DEBIT = 4,
597 SCHED_FEAT_TREE_AVG = 8, 597 SCHED_FEAT_HRTICK = 8,
598 SCHED_FEAT_APPROX_AVG = 16, 598 SCHED_FEAT_DOUBLE_TICK = 16,
599 SCHED_FEAT_HRTICK = 32,
600 SCHED_FEAT_DOUBLE_TICK = 64,
601}; 599};
602 600
603const_debug unsigned int sysctl_sched_features = 601const_debug unsigned int sysctl_sched_features =
604 SCHED_FEAT_NEW_FAIR_SLEEPERS * 1 | 602 SCHED_FEAT_NEW_FAIR_SLEEPERS * 1 |
605 SCHED_FEAT_WAKEUP_PREEMPT * 1 | 603 SCHED_FEAT_WAKEUP_PREEMPT * 1 |
606 SCHED_FEAT_START_DEBIT * 1 | 604 SCHED_FEAT_START_DEBIT * 1 |
607 SCHED_FEAT_TREE_AVG * 0 |
608 SCHED_FEAT_APPROX_AVG * 0 |
609 SCHED_FEAT_HRTICK * 1 | 605 SCHED_FEAT_HRTICK * 1 |
610 SCHED_FEAT_DOUBLE_TICK * 0; 606 SCHED_FEAT_DOUBLE_TICK * 0;
611 607
@@ -1056,6 +1052,49 @@ static void resched_cpu(int cpu)
1056 resched_task(cpu_curr(cpu)); 1052 resched_task(cpu_curr(cpu));
1057 spin_unlock_irqrestore(&rq->lock, flags); 1053 spin_unlock_irqrestore(&rq->lock, flags);
1058} 1054}
1055
1056#ifdef CONFIG_NO_HZ
1057/*
1058 * When add_timer_on() enqueues a timer into the timer wheel of an
1059 * idle CPU then this timer might expire before the next timer event
1060 * which is scheduled to wake up that CPU. In case of a completely
1061 * idle system the next event might even be infinite time into the
1062 * future. wake_up_idle_cpu() ensures that the CPU is woken up and
1063 * leaves the inner idle loop so the newly added timer is taken into
1064 * account when the CPU goes back to idle and evaluates the timer
1065 * wheel for the next timer event.
1066 */
1067void wake_up_idle_cpu(int cpu)
1068{
1069 struct rq *rq = cpu_rq(cpu);
1070
1071 if (cpu == smp_processor_id())
1072 return;
1073
1074 /*
1075 * This is safe, as this function is called with the timer
1076 * wheel base lock of (cpu) held. When the CPU is on the way
1077 * to idle and has not yet set rq->curr to idle then it will
1078 * be serialized on the timer wheel base lock and take the new
1079 * timer into account automatically.
1080 */
1081 if (rq->curr != rq->idle)
1082 return;
1083
1084 /*
1085 * We can set TIF_RESCHED on the idle task of the other CPU
1086 * lockless. The worst case is that the other CPU runs the
1087 * idle task through an additional NOOP schedule()
1088 */
1089 set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
1090
1091 /* NEED_RESCHED must be visible before we test polling */
1092 smp_mb();
1093 if (!tsk_is_polling(rq->idle))
1094 smp_send_reschedule(cpu);
1095}
1096#endif
1097
1059#else 1098#else
1060static void __resched_task(struct task_struct *p, int tif_bit) 1099static void __resched_task(struct task_struct *p, int tif_bit)
1061{ 1100{
@@ -1084,7 +1123,7 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1084 u64 tmp; 1123 u64 tmp;
1085 1124
1086 if (unlikely(!lw->inv_weight)) 1125 if (unlikely(!lw->inv_weight))
1087 lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1; 1126 lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1);
1088 1127
1089 tmp = (u64)delta_exec * weight; 1128 tmp = (u64)delta_exec * weight;
1090 /* 1129 /*
@@ -1108,11 +1147,13 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
1108static inline void update_load_add(struct load_weight *lw, unsigned long inc) 1147static inline void update_load_add(struct load_weight *lw, unsigned long inc)
1109{ 1148{
1110 lw->weight += inc; 1149 lw->weight += inc;
1150 lw->inv_weight = 0;
1111} 1151}
1112 1152
1113static inline void update_load_sub(struct load_weight *lw, unsigned long dec) 1153static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
1114{ 1154{
1115 lw->weight -= dec; 1155 lw->weight -= dec;
1156 lw->inv_weight = 0;
1116} 1157}
1117 1158
1118/* 1159/*
@@ -1394,6 +1435,12 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
1394{ 1435{
1395 s64 delta; 1436 s64 delta;
1396 1437
1438 /*
1439 * Buddy candidates are cache hot:
1440 */
1441 if (&p->se == cfs_rq_of(&p->se)->next)
1442 return 1;
1443
1397 if (p->sched_class != &fair_sched_class) 1444 if (p->sched_class != &fair_sched_class)
1398 return 0; 1445 return 0;
1399 1446
@@ -1853,10 +1900,11 @@ out_activate:
1853 schedstat_inc(p, se.nr_wakeups_remote); 1900 schedstat_inc(p, se.nr_wakeups_remote);
1854 update_rq_clock(rq); 1901 update_rq_clock(rq);
1855 activate_task(rq, p, 1); 1902 activate_task(rq, p, 1);
1856 check_preempt_curr(rq, p);
1857 success = 1; 1903 success = 1;
1858 1904
1859out_running: 1905out_running:
1906 check_preempt_curr(rq, p);
1907
1860 p->state = TASK_RUNNING; 1908 p->state = TASK_RUNNING;
1861#ifdef CONFIG_SMP 1909#ifdef CONFIG_SMP
1862 if (p->sched_class->task_wake_up) 1910 if (p->sched_class->task_wake_up)
@@ -1890,6 +1938,8 @@ static void __sched_fork(struct task_struct *p)
1890 p->se.exec_start = 0; 1938 p->se.exec_start = 0;
1891 p->se.sum_exec_runtime = 0; 1939 p->se.sum_exec_runtime = 0;
1892 p->se.prev_sum_exec_runtime = 0; 1940 p->se.prev_sum_exec_runtime = 0;
1941 p->se.last_wakeup = 0;
1942 p->se.avg_overlap = 0;
1893 1943
1894#ifdef CONFIG_SCHEDSTATS 1944#ifdef CONFIG_SCHEDSTATS
1895 p->se.wait_start = 0; 1945 p->se.wait_start = 0;
@@ -3875,7 +3925,7 @@ need_resched_nonpreemptible:
3875 3925
3876 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 3926 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
3877 if (unlikely((prev->state & TASK_INTERRUPTIBLE) && 3927 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
3878 unlikely(signal_pending(prev)))) { 3928 signal_pending(prev))) {
3879 prev->state = TASK_RUNNING; 3929 prev->state = TASK_RUNNING;
3880 } else { 3930 } else {
3881 deactivate_task(rq, prev, 1); 3931 deactivate_task(rq, prev, 1);
@@ -4268,11 +4318,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4268 oldprio = p->prio; 4318 oldprio = p->prio;
4269 on_rq = p->se.on_rq; 4319 on_rq = p->se.on_rq;
4270 running = task_current(rq, p); 4320 running = task_current(rq, p);
4271 if (on_rq) { 4321 if (on_rq)
4272 dequeue_task(rq, p, 0); 4322 dequeue_task(rq, p, 0);
4273 if (running) 4323 if (running)
4274 p->sched_class->put_prev_task(rq, p); 4324 p->sched_class->put_prev_task(rq, p);
4275 }
4276 4325
4277 if (rt_prio(prio)) 4326 if (rt_prio(prio))
4278 p->sched_class = &rt_sched_class; 4327 p->sched_class = &rt_sched_class;
@@ -4281,10 +4330,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4281 4330
4282 p->prio = prio; 4331 p->prio = prio;
4283 4332
4333 if (running)
4334 p->sched_class->set_curr_task(rq);
4284 if (on_rq) { 4335 if (on_rq) {
4285 if (running)
4286 p->sched_class->set_curr_task(rq);
4287
4288 enqueue_task(rq, p, 0); 4336 enqueue_task(rq, p, 0);
4289 4337
4290 check_class_changed(rq, p, prev_class, oldprio, running); 4338 check_class_changed(rq, p, prev_class, oldprio, running);
@@ -4581,19 +4629,17 @@ recheck:
4581 update_rq_clock(rq); 4629 update_rq_clock(rq);
4582 on_rq = p->se.on_rq; 4630 on_rq = p->se.on_rq;
4583 running = task_current(rq, p); 4631 running = task_current(rq, p);
4584 if (on_rq) { 4632 if (on_rq)
4585 deactivate_task(rq, p, 0); 4633 deactivate_task(rq, p, 0);
4586 if (running) 4634 if (running)
4587 p->sched_class->put_prev_task(rq, p); 4635 p->sched_class->put_prev_task(rq, p);
4588 }
4589 4636
4590 oldprio = p->prio; 4637 oldprio = p->prio;
4591 __setscheduler(rq, p, policy, param->sched_priority); 4638 __setscheduler(rq, p, policy, param->sched_priority);
4592 4639
4640 if (running)
4641 p->sched_class->set_curr_task(rq);
4593 if (on_rq) { 4642 if (on_rq) {
4594 if (running)
4595 p->sched_class->set_curr_task(rq);
4596
4597 activate_task(rq, p, 0); 4643 activate_task(rq, p, 0);
4598 4644
4599 check_class_changed(rq, p, prev_class, oldprio, running); 4645 check_class_changed(rq, p, prev_class, oldprio, running);
@@ -6804,6 +6850,10 @@ static int ndoms_cur; /* number of sched domains in 'doms_cur' */
6804 */ 6850 */
6805static cpumask_t fallback_doms; 6851static cpumask_t fallback_doms;
6806 6852
6853void __attribute__((weak)) arch_update_cpu_topology(void)
6854{
6855}
6856
6807/* 6857/*
6808 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 6858 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
6809 * For now this just excludes isolated cpus, but could be used to 6859 * For now this just excludes isolated cpus, but could be used to
@@ -6813,6 +6863,7 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
6813{ 6863{
6814 int err; 6864 int err;
6815 6865
6866 arch_update_cpu_topology();
6816 ndoms_cur = 1; 6867 ndoms_cur = 1;
6817 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 6868 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
6818 if (!doms_cur) 6869 if (!doms_cur)
@@ -6917,7 +6968,7 @@ match2:
6917} 6968}
6918 6969
6919#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 6970#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
6920static int arch_reinit_sched_domains(void) 6971int arch_reinit_sched_domains(void)
6921{ 6972{
6922 int err; 6973 int err;
6923 6974
@@ -7618,11 +7669,10 @@ void sched_move_task(struct task_struct *tsk)
7618 running = task_current(rq, tsk); 7669 running = task_current(rq, tsk);
7619 on_rq = tsk->se.on_rq; 7670 on_rq = tsk->se.on_rq;
7620 7671
7621 if (on_rq) { 7672 if (on_rq)
7622 dequeue_task(rq, tsk, 0); 7673 dequeue_task(rq, tsk, 0);
7623 if (unlikely(running)) 7674 if (unlikely(running))
7624 tsk->sched_class->put_prev_task(rq, tsk); 7675 tsk->sched_class->put_prev_task(rq, tsk);
7625 }
7626 7676
7627 set_task_rq(tsk, task_cpu(tsk)); 7677 set_task_rq(tsk, task_cpu(tsk));
7628 7678
@@ -7631,11 +7681,10 @@ void sched_move_task(struct task_struct *tsk)
7631 tsk->sched_class->moved_group(tsk); 7681 tsk->sched_class->moved_group(tsk);
7632#endif 7682#endif
7633 7683
7634 if (on_rq) { 7684 if (unlikely(running))
7635 if (unlikely(running)) 7685 tsk->sched_class->set_curr_task(rq);
7636 tsk->sched_class->set_curr_task(rq); 7686 if (on_rq)
7637 enqueue_task(rq, tsk, 0); 7687 enqueue_task(rq, tsk, 0);
7638 }
7639 7688
7640 task_rq_unlock(rq, &flags); 7689 task_rq_unlock(rq, &flags);
7641} 7690}