aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorJeff Garzik <jeff@garzik.org>2006-04-12 16:54:16 -0400
committerJeff Garzik <jeff@garzik.org>2006-04-12 16:54:16 -0400
commit875999c5539999f61a45620aae0c3e5fb1d2b035 (patch)
tree4535032a8a10f5782c0aef6a620b1a624ea9f863 /kernel/sched.c
parent79072f38909e3d9883317238887460c39ddcc4cb (diff)
parent26ec634c31a11a003040e10b4d650495158632fd (diff)
Merge branch 'upstream'
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c144
1 files changed, 99 insertions, 45 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index a9ecac398bb9..365f0b90b4de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -665,11 +665,57 @@ static int effective_prio(task_t *p)
665} 665}
666 666
667/* 667/*
668 * We place interactive tasks back into the active array, if possible.
669 *
670 * To guarantee that this does not starve expired tasks we ignore the
671 * interactivity of a task if the first expired task had to wait more
672 * than a 'reasonable' amount of time. This deadline timeout is
673 * load-dependent, as the frequency of array switched decreases with
674 * increasing number of running tasks. We also ignore the interactivity
675 * if a better static_prio task has expired, and switch periodically
676 * regardless, to ensure that highly interactive tasks do not starve
677 * the less fortunate for unreasonably long periods.
678 */
679static inline int expired_starving(runqueue_t *rq)
680{
681 int limit;
682
683 /*
684 * Arrays were recently switched, all is well
685 */
686 if (!rq->expired_timestamp)
687 return 0;
688
689 limit = STARVATION_LIMIT * rq->nr_running;
690
691 /*
692 * It's time to switch arrays
693 */
694 if (jiffies - rq->expired_timestamp >= limit)
695 return 1;
696
697 /*
698 * There's a better selection in the expired array
699 */
700 if (rq->curr->static_prio > rq->best_expired_prio)
701 return 1;
702
703 /*
704 * All is well
705 */
706 return 0;
707}
708
709/*
668 * __activate_task - move a task to the runqueue. 710 * __activate_task - move a task to the runqueue.
669 */ 711 */
670static inline void __activate_task(task_t *p, runqueue_t *rq) 712static void __activate_task(task_t *p, runqueue_t *rq)
671{ 713{
672 enqueue_task(p, rq->active); 714 prio_array_t *target = rq->active;
715
716 if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
717 target = rq->expired;
718 enqueue_task(p, target);
673 rq->nr_running++; 719 rq->nr_running++;
674} 720}
675 721
@@ -688,7 +734,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
688 unsigned long long __sleep_time = now - p->timestamp; 734 unsigned long long __sleep_time = now - p->timestamp;
689 unsigned long sleep_time; 735 unsigned long sleep_time;
690 736
691 if (unlikely(p->policy == SCHED_BATCH)) 737 if (batch_task(p))
692 sleep_time = 0; 738 sleep_time = 0;
693 else { 739 else {
694 if (__sleep_time > NS_MAX_SLEEP_AVG) 740 if (__sleep_time > NS_MAX_SLEEP_AVG)
@@ -700,21 +746,25 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
700 if (likely(sleep_time > 0)) { 746 if (likely(sleep_time > 0)) {
701 /* 747 /*
702 * User tasks that sleep a long time are categorised as 748 * User tasks that sleep a long time are categorised as
703 * idle and will get just interactive status to stay active & 749 * idle. They will only have their sleep_avg increased to a
704 * prevent them suddenly becoming cpu hogs and starving 750 * level that makes them just interactive priority to stay
705 * other processes. 751 * active yet prevent them suddenly becoming cpu hogs and
752 * starving other processes.
706 */ 753 */
707 if (p->mm && p->activated != -1 && 754 if (p->mm && sleep_time > INTERACTIVE_SLEEP(p)) {
708 sleep_time > INTERACTIVE_SLEEP(p)) { 755 unsigned long ceiling;
709 p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - 756
710 DEF_TIMESLICE); 757 ceiling = JIFFIES_TO_NS(MAX_SLEEP_AVG -
758 DEF_TIMESLICE);
759 if (p->sleep_avg < ceiling)
760 p->sleep_avg = ceiling;
711 } else { 761 } else {
712 /* 762 /*
713 * Tasks waking from uninterruptible sleep are 763 * Tasks waking from uninterruptible sleep are
714 * limited in their sleep_avg rise as they 764 * limited in their sleep_avg rise as they
715 * are likely to be waiting on I/O 765 * are likely to be waiting on I/O
716 */ 766 */
717 if (p->activated == -1 && p->mm) { 767 if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) {
718 if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) 768 if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
719 sleep_time = 0; 769 sleep_time = 0;
720 else if (p->sleep_avg + sleep_time >= 770 else if (p->sleep_avg + sleep_time >=
@@ -769,7 +819,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
769 * This checks to make sure it's not an uninterruptible task 819 * This checks to make sure it's not an uninterruptible task
770 * that is now waking up. 820 * that is now waking up.
771 */ 821 */
772 if (!p->activated) { 822 if (p->sleep_type == SLEEP_NORMAL) {
773 /* 823 /*
774 * Tasks which were woken up by interrupts (ie. hw events) 824 * Tasks which were woken up by interrupts (ie. hw events)
775 * are most likely of interactive nature. So we give them 825 * are most likely of interactive nature. So we give them
@@ -778,13 +828,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
778 * on a CPU, first time around: 828 * on a CPU, first time around:
779 */ 829 */
780 if (in_interrupt()) 830 if (in_interrupt())
781 p->activated = 2; 831 p->sleep_type = SLEEP_INTERRUPTED;
782 else { 832 else {
783 /* 833 /*
784 * Normal first-time wakeups get a credit too for 834 * Normal first-time wakeups get a credit too for
785 * on-runqueue time, but it will be weighted down: 835 * on-runqueue time, but it will be weighted down:
786 */ 836 */
787 p->activated = 1; 837 p->sleep_type = SLEEP_INTERACTIVE;
788 } 838 }
789 } 839 }
790 p->timestamp = now; 840 p->timestamp = now;
@@ -1272,19 +1322,19 @@ out_activate:
1272 * Tasks on involuntary sleep don't earn 1322 * Tasks on involuntary sleep don't earn
1273 * sleep_avg beyond just interactive state. 1323 * sleep_avg beyond just interactive state.
1274 */ 1324 */
1275 p->activated = -1; 1325 p->sleep_type = SLEEP_NONINTERACTIVE;
1276 } 1326 } else
1277 1327
1278 /* 1328 /*
1279 * Tasks that have marked their sleep as noninteractive get 1329 * Tasks that have marked their sleep as noninteractive get
1280 * woken up without updating their sleep average. (i.e. their 1330 * woken up with their sleep average not weighted in an
1281 * sleep is handled in a priority-neutral manner, no priority 1331 * interactive way.
1282 * boost and no penalty.)
1283 */ 1332 */
1284 if (old_state & TASK_NONINTERACTIVE) 1333 if (old_state & TASK_NONINTERACTIVE)
1285 __activate_task(p, rq); 1334 p->sleep_type = SLEEP_NONINTERACTIVE;
1286 else 1335
1287 activate_task(p, rq, cpu == this_cpu); 1336
1337 activate_task(p, rq, cpu == this_cpu);
1288 /* 1338 /*
1289 * Sync wakeups (i.e. those types of wakeups where the waker 1339 * Sync wakeups (i.e. those types of wakeups where the waker
1290 * has indicated that it will leave the CPU in short order) 1340 * has indicated that it will leave the CPU in short order)
@@ -1658,6 +1708,21 @@ unsigned long nr_iowait(void)
1658 return sum; 1708 return sum;
1659} 1709}
1660 1710
1711unsigned long nr_active(void)
1712{
1713 unsigned long i, running = 0, uninterruptible = 0;
1714
1715 for_each_online_cpu(i) {
1716 running += cpu_rq(i)->nr_running;
1717 uninterruptible += cpu_rq(i)->nr_uninterruptible;
1718 }
1719
1720 if (unlikely((long)uninterruptible < 0))
1721 uninterruptible = 0;
1722
1723 return running + uninterruptible;
1724}
1725
1661#ifdef CONFIG_SMP 1726#ifdef CONFIG_SMP
1662 1727
1663/* 1728/*
@@ -2467,22 +2532,6 @@ unsigned long long current_sched_time(const task_t *tsk)
2467} 2532}
2468 2533
2469/* 2534/*
2470 * We place interactive tasks back into the active array, if possible.
2471 *
2472 * To guarantee that this does not starve expired tasks we ignore the
2473 * interactivity of a task if the first expired task had to wait more
2474 * than a 'reasonable' amount of time. This deadline timeout is
2475 * load-dependent, as the frequency of array switched decreases with
2476 * increasing number of running tasks. We also ignore the interactivity
2477 * if a better static_prio task has expired:
2478 */
2479#define EXPIRED_STARVING(rq) \
2480 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
2481 (jiffies - (rq)->expired_timestamp >= \
2482 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
2483 ((rq)->curr->static_prio > (rq)->best_expired_prio))
2484
2485/*
2486 * Account user cpu time to a process. 2535 * Account user cpu time to a process.
2487 * @p: the process that the cpu time gets accounted to 2536 * @p: the process that the cpu time gets accounted to
2488 * @hardirq_offset: the offset to subtract from hardirq_count() 2537 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2617,7 +2666,7 @@ void scheduler_tick(void)
2617 2666
2618 if (!rq->expired_timestamp) 2667 if (!rq->expired_timestamp)
2619 rq->expired_timestamp = jiffies; 2668 rq->expired_timestamp = jiffies;
2620 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { 2669 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
2621 enqueue_task(p, rq->expired); 2670 enqueue_task(p, rq->expired);
2622 if (p->static_prio < rq->best_expired_prio) 2671 if (p->static_prio < rq->best_expired_prio)
2623 rq->best_expired_prio = p->static_prio; 2672 rq->best_expired_prio = p->static_prio;
@@ -2860,6 +2909,12 @@ EXPORT_SYMBOL(sub_preempt_count);
2860 2909
2861#endif 2910#endif
2862 2911
2912static inline int interactive_sleep(enum sleep_type sleep_type)
2913{
2914 return (sleep_type == SLEEP_INTERACTIVE ||
2915 sleep_type == SLEEP_INTERRUPTED);
2916}
2917
2863/* 2918/*
2864 * schedule() is the main scheduler function. 2919 * schedule() is the main scheduler function.
2865 */ 2920 */
@@ -2983,12 +3038,12 @@ go_idle:
2983 queue = array->queue + idx; 3038 queue = array->queue + idx;
2984 next = list_entry(queue->next, task_t, run_list); 3039 next = list_entry(queue->next, task_t, run_list);
2985 3040
2986 if (!rt_task(next) && next->activated > 0) { 3041 if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
2987 unsigned long long delta = now - next->timestamp; 3042 unsigned long long delta = now - next->timestamp;
2988 if (unlikely((long long)(now - next->timestamp) < 0)) 3043 if (unlikely((long long)(now - next->timestamp) < 0))
2989 delta = 0; 3044 delta = 0;
2990 3045
2991 if (next->activated == 1) 3046 if (next->sleep_type == SLEEP_INTERACTIVE)
2992 delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; 3047 delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128;
2993 3048
2994 array = next->array; 3049 array = next->array;
@@ -2998,10 +3053,9 @@ go_idle:
2998 dequeue_task(next, array); 3053 dequeue_task(next, array);
2999 next->prio = new_prio; 3054 next->prio = new_prio;
3000 enqueue_task(next, array); 3055 enqueue_task(next, array);
3001 } else 3056 }
3002 requeue_task(next, array);
3003 } 3057 }
3004 next->activated = 0; 3058 next->sleep_type = SLEEP_NORMAL;
3005switch_tasks: 3059switch_tasks:
3006 if (next == rq->idle) 3060 if (next == rq->idle)
3007 schedstat_inc(rq, sched_goidle); 3061 schedstat_inc(rq, sched_goidle);