aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-01-25 15:08:29 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:29 -0500
commit8f4d37ec073c17e2d4aa8851df5837d798606d6f (patch)
treea9ac9063eca53e4d0110e8086f55241ea70ba993 /kernel/sched.c
parent02b67cc3ba36bdba351d6c3a00593f4ec550d9d3 (diff)
sched: high-res preemption tick
Use HR-timers (when available) to deliver an accurate preemption tick. The regular scheduler tick that runs at 1/HZ can be too coarse when nice level are used. The fairness system will still keep the cpu utilisation 'fair' by then delaying the task that got an excessive amount of CPU time but try to minimize this by delivering preemption points spot-on. The average frequency of this extra interrupt is sched_latency / nr_latency. Which need not be higher than 1/HZ, its just that the distribution within the sched_latency period is important. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c210
1 files changed, 199 insertions, 11 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6ee37602a6d8..17f93d3eda91 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -65,6 +65,7 @@
65#include <linux/reciprocal_div.h> 65#include <linux/reciprocal_div.h>
66#include <linux/unistd.h> 66#include <linux/unistd.h>
67#include <linux/pagemap.h> 67#include <linux/pagemap.h>
68#include <linux/hrtimer.h>
68 69
69#include <asm/tlb.h> 70#include <asm/tlb.h>
70#include <asm/irq_regs.h> 71#include <asm/irq_regs.h>
@@ -451,6 +452,12 @@ struct rq {
451 struct list_head migration_queue; 452 struct list_head migration_queue;
452#endif 453#endif
453 454
455#ifdef CONFIG_SCHED_HRTICK
456 unsigned long hrtick_flags;
457 ktime_t hrtick_expire;
458 struct hrtimer hrtick_timer;
459#endif
460
454#ifdef CONFIG_SCHEDSTATS 461#ifdef CONFIG_SCHEDSTATS
455 /* latency stats */ 462 /* latency stats */
456 struct sched_info rq_sched_info; 463 struct sched_info rq_sched_info;
@@ -572,6 +579,8 @@ enum {
572 SCHED_FEAT_START_DEBIT = 4, 579 SCHED_FEAT_START_DEBIT = 4,
573 SCHED_FEAT_TREE_AVG = 8, 580 SCHED_FEAT_TREE_AVG = 8,
574 SCHED_FEAT_APPROX_AVG = 16, 581 SCHED_FEAT_APPROX_AVG = 16,
582 SCHED_FEAT_HRTICK = 32,
583 SCHED_FEAT_DOUBLE_TICK = 64,
575}; 584};
576 585
577const_debug unsigned int sysctl_sched_features = 586const_debug unsigned int sysctl_sched_features =
@@ -579,7 +588,9 @@ const_debug unsigned int sysctl_sched_features =
579 SCHED_FEAT_WAKEUP_PREEMPT * 1 | 588 SCHED_FEAT_WAKEUP_PREEMPT * 1 |
580 SCHED_FEAT_START_DEBIT * 1 | 589 SCHED_FEAT_START_DEBIT * 1 |
581 SCHED_FEAT_TREE_AVG * 0 | 590 SCHED_FEAT_TREE_AVG * 0 |
582 SCHED_FEAT_APPROX_AVG * 0; 591 SCHED_FEAT_APPROX_AVG * 0 |
592 SCHED_FEAT_HRTICK * 1 |
593 SCHED_FEAT_DOUBLE_TICK * 0;
583 594
584#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) 595#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
585 596
@@ -796,6 +807,173 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
796} 807}
797EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 808EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
798 809
810static void __resched_task(struct task_struct *p, int tif_bit);
811
812static inline void resched_task(struct task_struct *p)
813{
814 __resched_task(p, TIF_NEED_RESCHED);
815}
816
817#ifdef CONFIG_SCHED_HRTICK
818/*
819 * Use HR-timers to deliver accurate preemption points.
820 *
821 * Its all a bit involved since we cannot program an hrt while holding the
822 * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
823 * reschedule event.
824 *
825 * When we get rescheduled we reprogram the hrtick_timer outside of the
826 * rq->lock.
827 */
828static inline void resched_hrt(struct task_struct *p)
829{
830 __resched_task(p, TIF_HRTICK_RESCHED);
831}
832
833static inline void resched_rq(struct rq *rq)
834{
835 unsigned long flags;
836
837 spin_lock_irqsave(&rq->lock, flags);
838 resched_task(rq->curr);
839 spin_unlock_irqrestore(&rq->lock, flags);
840}
841
842enum {
843 HRTICK_SET, /* re-programm hrtick_timer */
844 HRTICK_RESET, /* not a new slice */
845};
846
847/*
848 * Use hrtick when:
849 * - enabled by features
850 * - hrtimer is actually high res
851 */
852static inline int hrtick_enabled(struct rq *rq)
853{
854 if (!sched_feat(HRTICK))
855 return 0;
856 return hrtimer_is_hres_active(&rq->hrtick_timer);
857}
858
859/*
860 * Called to set the hrtick timer state.
861 *
862 * called with rq->lock held and irqs disabled
863 */
864static void hrtick_start(struct rq *rq, u64 delay, int reset)
865{
866 assert_spin_locked(&rq->lock);
867
868 /*
869 * preempt at: now + delay
870 */
871 rq->hrtick_expire =
872 ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
873 /*
874 * indicate we need to program the timer
875 */
876 __set_bit(HRTICK_SET, &rq->hrtick_flags);
877 if (reset)
878 __set_bit(HRTICK_RESET, &rq->hrtick_flags);
879
880 /*
881 * New slices are called from the schedule path and don't need a
882 * forced reschedule.
883 */
884 if (reset)
885 resched_hrt(rq->curr);
886}
887
888static void hrtick_clear(struct rq *rq)
889{
890 if (hrtimer_active(&rq->hrtick_timer))
891 hrtimer_cancel(&rq->hrtick_timer);
892}
893
894/*
895 * Update the timer from the possible pending state.
896 */
897static void hrtick_set(struct rq *rq)
898{
899 ktime_t time;
900 int set, reset;
901 unsigned long flags;
902
903 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
904
905 spin_lock_irqsave(&rq->lock, flags);
906 set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
907 reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
908 time = rq->hrtick_expire;
909 clear_thread_flag(TIF_HRTICK_RESCHED);
910 spin_unlock_irqrestore(&rq->lock, flags);
911
912 if (set) {
913 hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
914 if (reset && !hrtimer_active(&rq->hrtick_timer))
915 resched_rq(rq);
916 } else
917 hrtick_clear(rq);
918}
919
920/*
921 * High-resolution timer tick.
922 * Runs from hardirq context with interrupts disabled.
923 */
924static enum hrtimer_restart hrtick(struct hrtimer *timer)
925{
926 struct rq *rq = container_of(timer, struct rq, hrtick_timer);
927
928 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
929
930 spin_lock(&rq->lock);
931 __update_rq_clock(rq);
932 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
933 spin_unlock(&rq->lock);
934
935 return HRTIMER_NORESTART;
936}
937
938static inline void init_rq_hrtick(struct rq *rq)
939{
940 rq->hrtick_flags = 0;
941 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
942 rq->hrtick_timer.function = hrtick;
943 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
944}
945
946void hrtick_resched(void)
947{
948 struct rq *rq;
949 unsigned long flags;
950
951 if (!test_thread_flag(TIF_HRTICK_RESCHED))
952 return;
953
954 local_irq_save(flags);
955 rq = cpu_rq(smp_processor_id());
956 hrtick_set(rq);
957 local_irq_restore(flags);
958}
959#else
960static inline void hrtick_clear(struct rq *rq)
961{
962}
963
964static inline void hrtick_set(struct rq *rq)
965{
966}
967
968static inline void init_rq_hrtick(struct rq *rq)
969{
970}
971
972void hrtick_resched(void)
973{
974}
975#endif
976
799/* 977/*
800 * resched_task - mark a task 'to be rescheduled now'. 978 * resched_task - mark a task 'to be rescheduled now'.
801 * 979 *
@@ -809,16 +987,16 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
809#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 987#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
810#endif 988#endif
811 989
812static void resched_task(struct task_struct *p) 990static void __resched_task(struct task_struct *p, int tif_bit)
813{ 991{
814 int cpu; 992 int cpu;
815 993
816 assert_spin_locked(&task_rq(p)->lock); 994 assert_spin_locked(&task_rq(p)->lock);
817 995
818 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) 996 if (unlikely(test_tsk_thread_flag(p, tif_bit)))
819 return; 997 return;
820 998
821 set_tsk_thread_flag(p, TIF_NEED_RESCHED); 999 set_tsk_thread_flag(p, tif_bit);
822 1000
823 cpu = task_cpu(p); 1001 cpu = task_cpu(p);
824 if (cpu == smp_processor_id()) 1002 if (cpu == smp_processor_id())
@@ -841,10 +1019,10 @@ static void resched_cpu(int cpu)
841 spin_unlock_irqrestore(&rq->lock, flags); 1019 spin_unlock_irqrestore(&rq->lock, flags);
842} 1020}
843#else 1021#else
844static inline void resched_task(struct task_struct *p) 1022static void __resched_task(struct task_struct *p, int tif_bit)
845{ 1023{
846 assert_spin_locked(&task_rq(p)->lock); 1024 assert_spin_locked(&task_rq(p)->lock);
847 set_tsk_need_resched(p); 1025 set_tsk_thread_flag(p, tif_bit);
848} 1026}
849#endif 1027#endif
850 1028
@@ -3497,7 +3675,7 @@ void scheduler_tick(void)
3497 rq->tick_timestamp = rq->clock; 3675 rq->tick_timestamp = rq->clock;
3498 update_cpu_load(rq); 3676 update_cpu_load(rq);
3499 if (curr != rq->idle) /* FIXME: needed? */ 3677 if (curr != rq->idle) /* FIXME: needed? */
3500 curr->sched_class->task_tick(rq, curr); 3678 curr->sched_class->task_tick(rq, curr, 0);
3501 spin_unlock(&rq->lock); 3679 spin_unlock(&rq->lock);
3502 3680
3503#ifdef CONFIG_SMP 3681#ifdef CONFIG_SMP
@@ -3643,6 +3821,8 @@ need_resched_nonpreemptible:
3643 3821
3644 schedule_debug(prev); 3822 schedule_debug(prev);
3645 3823
3824 hrtick_clear(rq);
3825
3646 /* 3826 /*
3647 * Do the rq-clock update outside the rq lock: 3827 * Do the rq-clock update outside the rq lock:
3648 */ 3828 */
@@ -3680,14 +3860,20 @@ need_resched_nonpreemptible:
3680 ++*switch_count; 3860 ++*switch_count;
3681 3861
3682 context_switch(rq, prev, next); /* unlocks the rq */ 3862 context_switch(rq, prev, next); /* unlocks the rq */
3863 /*
3864 * the context switch might have flipped the stack from under
3865 * us, hence refresh the local variables.
3866 */
3867 cpu = smp_processor_id();
3868 rq = cpu_rq(cpu);
3683 } else 3869 } else
3684 spin_unlock_irq(&rq->lock); 3870 spin_unlock_irq(&rq->lock);
3685 3871
3686 if (unlikely(reacquire_kernel_lock(current) < 0)) { 3872 hrtick_set(rq);
3687 cpu = smp_processor_id(); 3873
3688 rq = cpu_rq(cpu); 3874 if (unlikely(reacquire_kernel_lock(current) < 0))
3689 goto need_resched_nonpreemptible; 3875 goto need_resched_nonpreemptible;
3690 } 3876
3691 preempt_enable_no_resched(); 3877 preempt_enable_no_resched();
3692 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3878 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3693 goto need_resched; 3879 goto need_resched;
@@ -6913,6 +7099,8 @@ void __init sched_init(void)
6913 rq->rt.overloaded = 0; 7099 rq->rt.overloaded = 0;
6914 rq_attach_root(rq, &def_root_domain); 7100 rq_attach_root(rq, &def_root_domain);
6915#endif 7101#endif
7102 init_rq_hrtick(rq);
7103
6916 atomic_set(&rq->nr_iowait, 0); 7104 atomic_set(&rq->nr_iowait, 0);
6917 7105
6918 array = &rq->rt.active; 7106 array = &rq->rt.active;