diff options
| author | Jeff Garzik <jeff@garzik.org> | 2006-04-12 16:54:16 -0400 | 
|---|---|---|
| committer | Jeff Garzik <jeff@garzik.org> | 2006-04-12 16:54:16 -0400 | 
| commit | 875999c5539999f61a45620aae0c3e5fb1d2b035 (patch) | |
| tree | 4535032a8a10f5782c0aef6a620b1a624ea9f863 /kernel/sched.c | |
| parent | 79072f38909e3d9883317238887460c39ddcc4cb (diff) | |
| parent | 26ec634c31a11a003040e10b4d650495158632fd (diff) | |
Merge branch 'upstream'
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 144 | 
1 files changed, 99 insertions, 45 deletions
| diff --git a/kernel/sched.c b/kernel/sched.c index a9ecac398bb9..365f0b90b4de 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -665,11 +665,57 @@ static int effective_prio(task_t *p) | |||
| 665 | } | 665 | } | 
| 666 | 666 | ||
| 667 | /* | 667 | /* | 
| 668 | * We place interactive tasks back into the active array, if possible. | ||
| 669 | * | ||
| 670 | * To guarantee that this does not starve expired tasks we ignore the | ||
| 671 | * interactivity of a task if the first expired task had to wait more | ||
| 672 | * than a 'reasonable' amount of time. This deadline timeout is | ||
| 673 | * load-dependent, as the frequency of array switched decreases with | ||
| 674 | * increasing number of running tasks. We also ignore the interactivity | ||
| 675 | * if a better static_prio task has expired, and switch periodically | ||
| 676 | * regardless, to ensure that highly interactive tasks do not starve | ||
| 677 | * the less fortunate for unreasonably long periods. | ||
| 678 | */ | ||
| 679 | static inline int expired_starving(runqueue_t *rq) | ||
| 680 | { | ||
| 681 | int limit; | ||
| 682 | |||
| 683 | /* | ||
| 684 | * Arrays were recently switched, all is well | ||
| 685 | */ | ||
| 686 | if (!rq->expired_timestamp) | ||
| 687 | return 0; | ||
| 688 | |||
| 689 | limit = STARVATION_LIMIT * rq->nr_running; | ||
| 690 | |||
| 691 | /* | ||
| 692 | * It's time to switch arrays | ||
| 693 | */ | ||
| 694 | if (jiffies - rq->expired_timestamp >= limit) | ||
| 695 | return 1; | ||
| 696 | |||
| 697 | /* | ||
| 698 | * There's a better selection in the expired array | ||
| 699 | */ | ||
| 700 | if (rq->curr->static_prio > rq->best_expired_prio) | ||
| 701 | return 1; | ||
| 702 | |||
| 703 | /* | ||
| 704 | * All is well | ||
| 705 | */ | ||
| 706 | return 0; | ||
| 707 | } | ||
| 708 | |||
| 709 | /* | ||
| 668 | * __activate_task - move a task to the runqueue. | 710 | * __activate_task - move a task to the runqueue. | 
| 669 | */ | 711 | */ | 
| 670 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 712 | static void __activate_task(task_t *p, runqueue_t *rq) | 
| 671 | { | 713 | { | 
| 672 | enqueue_task(p, rq->active); | 714 | prio_array_t *target = rq->active; | 
| 715 | |||
| 716 | if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p)))) | ||
| 717 | target = rq->expired; | ||
| 718 | enqueue_task(p, target); | ||
| 673 | rq->nr_running++; | 719 | rq->nr_running++; | 
| 674 | } | 720 | } | 
| 675 | 721 | ||
| @@ -688,7 +734,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now) | |||
| 688 | unsigned long long __sleep_time = now - p->timestamp; | 734 | unsigned long long __sleep_time = now - p->timestamp; | 
| 689 | unsigned long sleep_time; | 735 | unsigned long sleep_time; | 
| 690 | 736 | ||
| 691 | if (unlikely(p->policy == SCHED_BATCH)) | 737 | if (batch_task(p)) | 
| 692 | sleep_time = 0; | 738 | sleep_time = 0; | 
| 693 | else { | 739 | else { | 
| 694 | if (__sleep_time > NS_MAX_SLEEP_AVG) | 740 | if (__sleep_time > NS_MAX_SLEEP_AVG) | 
| @@ -700,21 +746,25 @@ static int recalc_task_prio(task_t *p, unsigned long long now) | |||
| 700 | if (likely(sleep_time > 0)) { | 746 | if (likely(sleep_time > 0)) { | 
| 701 | /* | 747 | /* | 
| 702 | * User tasks that sleep a long time are categorised as | 748 | * User tasks that sleep a long time are categorised as | 
| 703 | * idle and will get just interactive status to stay active & | 749 | * idle. They will only have their sleep_avg increased to a | 
| 704 | * prevent them suddenly becoming cpu hogs and starving | 750 | * level that makes them just interactive priority to stay | 
| 705 | * other processes. | 751 | * active yet prevent them suddenly becoming cpu hogs and | 
| 752 | * starving other processes. | ||
| 706 | */ | 753 | */ | 
| 707 | if (p->mm && p->activated != -1 && | 754 | if (p->mm && sleep_time > INTERACTIVE_SLEEP(p)) { | 
| 708 | sleep_time > INTERACTIVE_SLEEP(p)) { | 755 | unsigned long ceiling; | 
| 709 | p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - | 756 | |
| 710 | DEF_TIMESLICE); | 757 | ceiling = JIFFIES_TO_NS(MAX_SLEEP_AVG - | 
| 758 | DEF_TIMESLICE); | ||
| 759 | if (p->sleep_avg < ceiling) | ||
| 760 | p->sleep_avg = ceiling; | ||
| 711 | } else { | 761 | } else { | 
| 712 | /* | 762 | /* | 
| 713 | * Tasks waking from uninterruptible sleep are | 763 | * Tasks waking from uninterruptible sleep are | 
| 714 | * limited in their sleep_avg rise as they | 764 | * limited in their sleep_avg rise as they | 
| 715 | * are likely to be waiting on I/O | 765 | * are likely to be waiting on I/O | 
| 716 | */ | 766 | */ | 
| 717 | if (p->activated == -1 && p->mm) { | 767 | if (p->sleep_type == SLEEP_NONINTERACTIVE && p->mm) { | 
| 718 | if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) | 768 | if (p->sleep_avg >= INTERACTIVE_SLEEP(p)) | 
| 719 | sleep_time = 0; | 769 | sleep_time = 0; | 
| 720 | else if (p->sleep_avg + sleep_time >= | 770 | else if (p->sleep_avg + sleep_time >= | 
| @@ -769,7 +819,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
| 769 | * This checks to make sure it's not an uninterruptible task | 819 | * This checks to make sure it's not an uninterruptible task | 
| 770 | * that is now waking up. | 820 | * that is now waking up. | 
| 771 | */ | 821 | */ | 
| 772 | if (!p->activated) { | 822 | if (p->sleep_type == SLEEP_NORMAL) { | 
| 773 | /* | 823 | /* | 
| 774 | * Tasks which were woken up by interrupts (ie. hw events) | 824 | * Tasks which were woken up by interrupts (ie. hw events) | 
| 775 | * are most likely of interactive nature. So we give them | 825 | * are most likely of interactive nature. So we give them | 
| @@ -778,13 +828,13 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
| 778 | * on a CPU, first time around: | 828 | * on a CPU, first time around: | 
| 779 | */ | 829 | */ | 
| 780 | if (in_interrupt()) | 830 | if (in_interrupt()) | 
| 781 | p->activated = 2; | 831 | p->sleep_type = SLEEP_INTERRUPTED; | 
| 782 | else { | 832 | else { | 
| 783 | /* | 833 | /* | 
| 784 | * Normal first-time wakeups get a credit too for | 834 | * Normal first-time wakeups get a credit too for | 
| 785 | * on-runqueue time, but it will be weighted down: | 835 | * on-runqueue time, but it will be weighted down: | 
| 786 | */ | 836 | */ | 
| 787 | p->activated = 1; | 837 | p->sleep_type = SLEEP_INTERACTIVE; | 
| 788 | } | 838 | } | 
| 789 | } | 839 | } | 
| 790 | p->timestamp = now; | 840 | p->timestamp = now; | 
| @@ -1272,19 +1322,19 @@ out_activate: | |||
| 1272 | * Tasks on involuntary sleep don't earn | 1322 | * Tasks on involuntary sleep don't earn | 
| 1273 | * sleep_avg beyond just interactive state. | 1323 | * sleep_avg beyond just interactive state. | 
| 1274 | */ | 1324 | */ | 
| 1275 | p->activated = -1; | 1325 | p->sleep_type = SLEEP_NONINTERACTIVE; | 
| 1276 | } | 1326 | } else | 
| 1277 | 1327 | ||
| 1278 | /* | 1328 | /* | 
| 1279 | * Tasks that have marked their sleep as noninteractive get | 1329 | * Tasks that have marked their sleep as noninteractive get | 
| 1280 | * woken up without updating their sleep average. (i.e. their | 1330 | * woken up with their sleep average not weighted in an | 
| 1281 | * sleep is handled in a priority-neutral manner, no priority | 1331 | * interactive way. | 
| 1282 | * boost and no penalty.) | ||
| 1283 | */ | 1332 | */ | 
| 1284 | if (old_state & TASK_NONINTERACTIVE) | 1333 | if (old_state & TASK_NONINTERACTIVE) | 
| 1285 | __activate_task(p, rq); | 1334 | p->sleep_type = SLEEP_NONINTERACTIVE; | 
| 1286 | else | 1335 | |
| 1287 | activate_task(p, rq, cpu == this_cpu); | 1336 | |
| 1337 | activate_task(p, rq, cpu == this_cpu); | ||
| 1288 | /* | 1338 | /* | 
| 1289 | * Sync wakeups (i.e. those types of wakeups where the waker | 1339 | * Sync wakeups (i.e. those types of wakeups where the waker | 
| 1290 | * has indicated that it will leave the CPU in short order) | 1340 | * has indicated that it will leave the CPU in short order) | 
| @@ -1658,6 +1708,21 @@ unsigned long nr_iowait(void) | |||
| 1658 | return sum; | 1708 | return sum; | 
| 1659 | } | 1709 | } | 
| 1660 | 1710 | ||
| 1711 | unsigned long nr_active(void) | ||
| 1712 | { | ||
| 1713 | unsigned long i, running = 0, uninterruptible = 0; | ||
| 1714 | |||
| 1715 | for_each_online_cpu(i) { | ||
| 1716 | running += cpu_rq(i)->nr_running; | ||
| 1717 | uninterruptible += cpu_rq(i)->nr_uninterruptible; | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | if (unlikely((long)uninterruptible < 0)) | ||
| 1721 | uninterruptible = 0; | ||
| 1722 | |||
| 1723 | return running + uninterruptible; | ||
| 1724 | } | ||
| 1725 | |||
| 1661 | #ifdef CONFIG_SMP | 1726 | #ifdef CONFIG_SMP | 
| 1662 | 1727 | ||
| 1663 | /* | 1728 | /* | 
| @@ -2467,22 +2532,6 @@ unsigned long long current_sched_time(const task_t *tsk) | |||
| 2467 | } | 2532 | } | 
| 2468 | 2533 | ||
| 2469 | /* | 2534 | /* | 
| 2470 | * We place interactive tasks back into the active array, if possible. | ||
| 2471 | * | ||
| 2472 | * To guarantee that this does not starve expired tasks we ignore the | ||
| 2473 | * interactivity of a task if the first expired task had to wait more | ||
| 2474 | * than a 'reasonable' amount of time. This deadline timeout is | ||
| 2475 | * load-dependent, as the frequency of array switched decreases with | ||
| 2476 | * increasing number of running tasks. We also ignore the interactivity | ||
| 2477 | * if a better static_prio task has expired: | ||
| 2478 | */ | ||
| 2479 | #define EXPIRED_STARVING(rq) \ | ||
| 2480 | ((STARVATION_LIMIT && ((rq)->expired_timestamp && \ | ||
| 2481 | (jiffies - (rq)->expired_timestamp >= \ | ||
| 2482 | STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ | ||
| 2483 | ((rq)->curr->static_prio > (rq)->best_expired_prio)) | ||
| 2484 | |||
| 2485 | /* | ||
| 2486 | * Account user cpu time to a process. | 2535 | * Account user cpu time to a process. | 
| 2487 | * @p: the process that the cpu time gets accounted to | 2536 | * @p: the process that the cpu time gets accounted to | 
| 2488 | * @hardirq_offset: the offset to subtract from hardirq_count() | 2537 | * @hardirq_offset: the offset to subtract from hardirq_count() | 
| @@ -2617,7 +2666,7 @@ void scheduler_tick(void) | |||
| 2617 | 2666 | ||
| 2618 | if (!rq->expired_timestamp) | 2667 | if (!rq->expired_timestamp) | 
| 2619 | rq->expired_timestamp = jiffies; | 2668 | rq->expired_timestamp = jiffies; | 
| 2620 | if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { | 2669 | if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { | 
| 2621 | enqueue_task(p, rq->expired); | 2670 | enqueue_task(p, rq->expired); | 
| 2622 | if (p->static_prio < rq->best_expired_prio) | 2671 | if (p->static_prio < rq->best_expired_prio) | 
| 2623 | rq->best_expired_prio = p->static_prio; | 2672 | rq->best_expired_prio = p->static_prio; | 
| @@ -2860,6 +2909,12 @@ EXPORT_SYMBOL(sub_preempt_count); | |||
| 2860 | 2909 | ||
| 2861 | #endif | 2910 | #endif | 
| 2862 | 2911 | ||
| 2912 | static inline int interactive_sleep(enum sleep_type sleep_type) | ||
| 2913 | { | ||
| 2914 | return (sleep_type == SLEEP_INTERACTIVE || | ||
| 2915 | sleep_type == SLEEP_INTERRUPTED); | ||
| 2916 | } | ||
| 2917 | |||
| 2863 | /* | 2918 | /* | 
| 2864 | * schedule() is the main scheduler function. | 2919 | * schedule() is the main scheduler function. | 
| 2865 | */ | 2920 | */ | 
| @@ -2983,12 +3038,12 @@ go_idle: | |||
| 2983 | queue = array->queue + idx; | 3038 | queue = array->queue + idx; | 
| 2984 | next = list_entry(queue->next, task_t, run_list); | 3039 | next = list_entry(queue->next, task_t, run_list); | 
| 2985 | 3040 | ||
| 2986 | if (!rt_task(next) && next->activated > 0) { | 3041 | if (!rt_task(next) && interactive_sleep(next->sleep_type)) { | 
| 2987 | unsigned long long delta = now - next->timestamp; | 3042 | unsigned long long delta = now - next->timestamp; | 
| 2988 | if (unlikely((long long)(now - next->timestamp) < 0)) | 3043 | if (unlikely((long long)(now - next->timestamp) < 0)) | 
| 2989 | delta = 0; | 3044 | delta = 0; | 
| 2990 | 3045 | ||
| 2991 | if (next->activated == 1) | 3046 | if (next->sleep_type == SLEEP_INTERACTIVE) | 
| 2992 | delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; | 3047 | delta = delta * (ON_RUNQUEUE_WEIGHT * 128 / 100) / 128; | 
| 2993 | 3048 | ||
| 2994 | array = next->array; | 3049 | array = next->array; | 
| @@ -2998,10 +3053,9 @@ go_idle: | |||
| 2998 | dequeue_task(next, array); | 3053 | dequeue_task(next, array); | 
| 2999 | next->prio = new_prio; | 3054 | next->prio = new_prio; | 
| 3000 | enqueue_task(next, array); | 3055 | enqueue_task(next, array); | 
| 3001 | } else | 3056 | } | 
| 3002 | requeue_task(next, array); | ||
| 3003 | } | 3057 | } | 
| 3004 | next->activated = 0; | 3058 | next->sleep_type = SLEEP_NORMAL; | 
| 3005 | switch_tasks: | 3059 | switch_tasks: | 
| 3006 | if (next == rq->idle) | 3060 | if (next == rq->idle) | 
| 3007 | schedstat_inc(rq, sched_goidle); | 3061 | schedstat_inc(rq, sched_goidle); | 
