aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/sched.c210
-rw-r--r--kernel/sched_fair.c69
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_rt.c2
5 files changed, 268 insertions, 17 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 4af15802ccd4..526128a2e622 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -54,3 +54,5 @@ config HZ
54 default 300 if HZ_300 54 default 300 if HZ_300
55 default 1000 if HZ_1000 55 default 1000 if HZ_1000
56 56
57config SCHED_HRTICK
58 def_bool HIGH_RES_TIMERS && X86
diff --git a/kernel/sched.c b/kernel/sched.c
index 6ee37602a6d8..17f93d3eda91 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -65,6 +65,7 @@
65#include <linux/reciprocal_div.h> 65#include <linux/reciprocal_div.h>
66#include <linux/unistd.h> 66#include <linux/unistd.h>
67#include <linux/pagemap.h> 67#include <linux/pagemap.h>
68#include <linux/hrtimer.h>
68 69
69#include <asm/tlb.h> 70#include <asm/tlb.h>
70#include <asm/irq_regs.h> 71#include <asm/irq_regs.h>
@@ -451,6 +452,12 @@ struct rq {
451 struct list_head migration_queue; 452 struct list_head migration_queue;
452#endif 453#endif
453 454
455#ifdef CONFIG_SCHED_HRTICK
456 unsigned long hrtick_flags;
457 ktime_t hrtick_expire;
458 struct hrtimer hrtick_timer;
459#endif
460
454#ifdef CONFIG_SCHEDSTATS 461#ifdef CONFIG_SCHEDSTATS
455 /* latency stats */ 462 /* latency stats */
456 struct sched_info rq_sched_info; 463 struct sched_info rq_sched_info;
@@ -572,6 +579,8 @@ enum {
572 SCHED_FEAT_START_DEBIT = 4, 579 SCHED_FEAT_START_DEBIT = 4,
573 SCHED_FEAT_TREE_AVG = 8, 580 SCHED_FEAT_TREE_AVG = 8,
574 SCHED_FEAT_APPROX_AVG = 16, 581 SCHED_FEAT_APPROX_AVG = 16,
582 SCHED_FEAT_HRTICK = 32,
583 SCHED_FEAT_DOUBLE_TICK = 64,
575}; 584};
576 585
577const_debug unsigned int sysctl_sched_features = 586const_debug unsigned int sysctl_sched_features =
@@ -579,7 +588,9 @@ const_debug unsigned int sysctl_sched_features =
579 SCHED_FEAT_WAKEUP_PREEMPT * 1 | 588 SCHED_FEAT_WAKEUP_PREEMPT * 1 |
580 SCHED_FEAT_START_DEBIT * 1 | 589 SCHED_FEAT_START_DEBIT * 1 |
581 SCHED_FEAT_TREE_AVG * 0 | 590 SCHED_FEAT_TREE_AVG * 0 |
582 SCHED_FEAT_APPROX_AVG * 0; 591 SCHED_FEAT_APPROX_AVG * 0 |
592 SCHED_FEAT_HRTICK * 1 |
593 SCHED_FEAT_DOUBLE_TICK * 0;
583 594
584#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) 595#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
585 596
@@ -796,6 +807,173 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
796} 807}
797EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 808EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
798 809
810static void __resched_task(struct task_struct *p, int tif_bit);
811
812static inline void resched_task(struct task_struct *p)
813{
814 __resched_task(p, TIF_NEED_RESCHED);
815}
816
817#ifdef CONFIG_SCHED_HRTICK
818/*
819 * Use HR-timers to deliver accurate preemption points.
820 *
821 * Its all a bit involved since we cannot program an hrt while holding the
822 * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
823 * reschedule event.
824 *
825 * When we get rescheduled we reprogram the hrtick_timer outside of the
826 * rq->lock.
827 */
828static inline void resched_hrt(struct task_struct *p)
829{
830 __resched_task(p, TIF_HRTICK_RESCHED);
831}
832
833static inline void resched_rq(struct rq *rq)
834{
835 unsigned long flags;
836
837 spin_lock_irqsave(&rq->lock, flags);
838 resched_task(rq->curr);
839 spin_unlock_irqrestore(&rq->lock, flags);
840}
841
842enum {
843 HRTICK_SET, /* re-programm hrtick_timer */
844 HRTICK_RESET, /* not a new slice */
845};
846
847/*
848 * Use hrtick when:
849 * - enabled by features
850 * - hrtimer is actually high res
851 */
852static inline int hrtick_enabled(struct rq *rq)
853{
854 if (!sched_feat(HRTICK))
855 return 0;
856 return hrtimer_is_hres_active(&rq->hrtick_timer);
857}
858
859/*
860 * Called to set the hrtick timer state.
861 *
862 * called with rq->lock held and irqs disabled
863 */
864static void hrtick_start(struct rq *rq, u64 delay, int reset)
865{
866 assert_spin_locked(&rq->lock);
867
868 /*
869 * preempt at: now + delay
870 */
871 rq->hrtick_expire =
872 ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
873 /*
874 * indicate we need to program the timer
875 */
876 __set_bit(HRTICK_SET, &rq->hrtick_flags);
877 if (reset)
878 __set_bit(HRTICK_RESET, &rq->hrtick_flags);
879
880 /*
881 * New slices are called from the schedule path and don't need a
882 * forced reschedule.
883 */
884 if (reset)
885 resched_hrt(rq->curr);
886}
887
888static void hrtick_clear(struct rq *rq)
889{
890 if (hrtimer_active(&rq->hrtick_timer))
891 hrtimer_cancel(&rq->hrtick_timer);
892}
893
894/*
895 * Update the timer from the possible pending state.
896 */
897static void hrtick_set(struct rq *rq)
898{
899 ktime_t time;
900 int set, reset;
901 unsigned long flags;
902
903 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
904
905 spin_lock_irqsave(&rq->lock, flags);
906 set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
907 reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
908 time = rq->hrtick_expire;
909 clear_thread_flag(TIF_HRTICK_RESCHED);
910 spin_unlock_irqrestore(&rq->lock, flags);
911
912 if (set) {
913 hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
914 if (reset && !hrtimer_active(&rq->hrtick_timer))
915 resched_rq(rq);
916 } else
917 hrtick_clear(rq);
918}
919
920/*
921 * High-resolution timer tick.
922 * Runs from hardirq context with interrupts disabled.
923 */
924static enum hrtimer_restart hrtick(struct hrtimer *timer)
925{
926 struct rq *rq = container_of(timer, struct rq, hrtick_timer);
927
928 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
929
930 spin_lock(&rq->lock);
931 __update_rq_clock(rq);
932 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
933 spin_unlock(&rq->lock);
934
935 return HRTIMER_NORESTART;
936}
937
938static inline void init_rq_hrtick(struct rq *rq)
939{
940 rq->hrtick_flags = 0;
941 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
942 rq->hrtick_timer.function = hrtick;
943 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
944}
945
946void hrtick_resched(void)
947{
948 struct rq *rq;
949 unsigned long flags;
950
951 if (!test_thread_flag(TIF_HRTICK_RESCHED))
952 return;
953
954 local_irq_save(flags);
955 rq = cpu_rq(smp_processor_id());
956 hrtick_set(rq);
957 local_irq_restore(flags);
958}
959#else
960static inline void hrtick_clear(struct rq *rq)
961{
962}
963
964static inline void hrtick_set(struct rq *rq)
965{
966}
967
968static inline void init_rq_hrtick(struct rq *rq)
969{
970}
971
972void hrtick_resched(void)
973{
974}
975#endif
976
799/* 977/*
800 * resched_task - mark a task 'to be rescheduled now'. 978 * resched_task - mark a task 'to be rescheduled now'.
801 * 979 *
@@ -809,16 +987,16 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
809#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 987#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
810#endif 988#endif
811 989
812static void resched_task(struct task_struct *p) 990static void __resched_task(struct task_struct *p, int tif_bit)
813{ 991{
814 int cpu; 992 int cpu;
815 993
816 assert_spin_locked(&task_rq(p)->lock); 994 assert_spin_locked(&task_rq(p)->lock);
817 995
818 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) 996 if (unlikely(test_tsk_thread_flag(p, tif_bit)))
819 return; 997 return;
820 998
821 set_tsk_thread_flag(p, TIF_NEED_RESCHED); 999 set_tsk_thread_flag(p, tif_bit);
822 1000
823 cpu = task_cpu(p); 1001 cpu = task_cpu(p);
824 if (cpu == smp_processor_id()) 1002 if (cpu == smp_processor_id())
@@ -841,10 +1019,10 @@ static void resched_cpu(int cpu)
841 spin_unlock_irqrestore(&rq->lock, flags); 1019 spin_unlock_irqrestore(&rq->lock, flags);
842} 1020}
843#else 1021#else
844static inline void resched_task(struct task_struct *p) 1022static void __resched_task(struct task_struct *p, int tif_bit)
845{ 1023{
846 assert_spin_locked(&task_rq(p)->lock); 1024 assert_spin_locked(&task_rq(p)->lock);
847 set_tsk_need_resched(p); 1025 set_tsk_thread_flag(p, tif_bit);
848} 1026}
849#endif 1027#endif
850 1028
@@ -3497,7 +3675,7 @@ void scheduler_tick(void)
3497 rq->tick_timestamp = rq->clock; 3675 rq->tick_timestamp = rq->clock;
3498 update_cpu_load(rq); 3676 update_cpu_load(rq);
3499 if (curr != rq->idle) /* FIXME: needed? */ 3677 if (curr != rq->idle) /* FIXME: needed? */
3500 curr->sched_class->task_tick(rq, curr); 3678 curr->sched_class->task_tick(rq, curr, 0);
3501 spin_unlock(&rq->lock); 3679 spin_unlock(&rq->lock);
3502 3680
3503#ifdef CONFIG_SMP 3681#ifdef CONFIG_SMP
@@ -3643,6 +3821,8 @@ need_resched_nonpreemptible:
3643 3821
3644 schedule_debug(prev); 3822 schedule_debug(prev);
3645 3823
3824 hrtick_clear(rq);
3825
3646 /* 3826 /*
3647 * Do the rq-clock update outside the rq lock: 3827 * Do the rq-clock update outside the rq lock:
3648 */ 3828 */
@@ -3680,14 +3860,20 @@ need_resched_nonpreemptible:
3680 ++*switch_count; 3860 ++*switch_count;
3681 3861
3682 context_switch(rq, prev, next); /* unlocks the rq */ 3862 context_switch(rq, prev, next); /* unlocks the rq */
3863 /*
3864 * the context switch might have flipped the stack from under
3865 * us, hence refresh the local variables.
3866 */
3867 cpu = smp_processor_id();
3868 rq = cpu_rq(cpu);
3683 } else 3869 } else
3684 spin_unlock_irq(&rq->lock); 3870 spin_unlock_irq(&rq->lock);
3685 3871
3686 if (unlikely(reacquire_kernel_lock(current) < 0)) { 3872 hrtick_set(rq);
3687 cpu = smp_processor_id(); 3873
3688 rq = cpu_rq(cpu); 3874 if (unlikely(reacquire_kernel_lock(current) < 0))
3689 goto need_resched_nonpreemptible; 3875 goto need_resched_nonpreemptible;
3690 } 3876
3691 preempt_enable_no_resched(); 3877 preempt_enable_no_resched();
3692 if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) 3878 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3693 goto need_resched; 3879 goto need_resched;
@@ -6913,6 +7099,8 @@ void __init sched_init(void)
6913 rq->rt.overloaded = 0; 7099 rq->rt.overloaded = 0;
6914 rq_attach_root(rq, &def_root_domain); 7100 rq_attach_root(rq, &def_root_domain);
6915#endif 7101#endif
7102 init_rq_hrtick(rq);
7103
6916 atomic_set(&rq->nr_iowait, 0); 7104 atomic_set(&rq->nr_iowait, 0);
6917 7105
6918 array = &rq->rt.active; 7106 array = &rq->rt.active;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index dfa18d55561d..3dab1ff83c4f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -642,13 +642,29 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
642 cfs_rq->curr = NULL; 642 cfs_rq->curr = NULL;
643} 643}
644 644
645static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) 645static void
646entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
646{ 647{
647 /* 648 /*
648 * Update run-time statistics of the 'current'. 649 * Update run-time statistics of the 'current'.
649 */ 650 */
650 update_curr(cfs_rq); 651 update_curr(cfs_rq);
651 652
653#ifdef CONFIG_SCHED_HRTICK
654 /*
655 * queued ticks are scheduled to match the slice, so don't bother
656 * validating it and just reschedule.
657 */
658 if (queued)
659 return resched_task(rq_of(cfs_rq)->curr);
660 /*
661 * don't let the period tick interfere with the hrtick preemption
662 */
663 if (!sched_feat(DOUBLE_TICK) &&
664 hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
665 return;
666#endif
667
652 if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT)) 668 if (cfs_rq->nr_running > 1 || !sched_feat(WAKEUP_PREEMPT))
653 check_preempt_tick(cfs_rq, curr); 669 check_preempt_tick(cfs_rq, curr);
654} 670}
@@ -754,6 +770,43 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
754 770
755#endif /* CONFIG_FAIR_GROUP_SCHED */ 771#endif /* CONFIG_FAIR_GROUP_SCHED */
756 772
773#ifdef CONFIG_SCHED_HRTICK
774static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
775{
776 int requeue = rq->curr == p;
777 struct sched_entity *se = &p->se;
778 struct cfs_rq *cfs_rq = cfs_rq_of(se);
779
780 WARN_ON(task_rq(p) != rq);
781
782 if (hrtick_enabled(rq) && cfs_rq->nr_running > 1) {
783 u64 slice = sched_slice(cfs_rq, se);
784 u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
785 s64 delta = slice - ran;
786
787 if (delta < 0) {
788 if (rq->curr == p)
789 resched_task(p);
790 return;
791 }
792
793 /*
794 * Don't schedule slices shorter than 10000ns, that just
795 * doesn't make sense. Rely on vruntime for fairness.
796 */
797 if (!requeue)
798 delta = max(10000LL, delta);
799
800 hrtick_start(rq, delta, requeue);
801 }
802}
803#else
804static inline void
805hrtick_start_fair(struct rq *rq, struct task_struct *p)
806{
807}
808#endif
809
757/* 810/*
758 * The enqueue_task method is called before nr_running is 811 * The enqueue_task method is called before nr_running is
759 * increased. Here we update the fair scheduling stats and 812 * increased. Here we update the fair scheduling stats and
@@ -782,6 +835,8 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
782 */ 835 */
783 if (incload) 836 if (incload)
784 inc_cpu_load(rq, topse->load.weight); 837 inc_cpu_load(rq, topse->load.weight);
838
839 hrtick_start_fair(rq, rq->curr);
785} 840}
786 841
787/* 842/*
@@ -814,6 +869,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
814 */ 869 */
815 if (decload) 870 if (decload)
816 dec_cpu_load(rq, topse->load.weight); 871 dec_cpu_load(rq, topse->load.weight);
872
873 hrtick_start_fair(rq, rq->curr);
817} 874}
818 875
819/* 876/*
@@ -1049,6 +1106,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1049 1106
1050static struct task_struct *pick_next_task_fair(struct rq *rq) 1107static struct task_struct *pick_next_task_fair(struct rq *rq)
1051{ 1108{
1109 struct task_struct *p;
1052 struct cfs_rq *cfs_rq = &rq->cfs; 1110 struct cfs_rq *cfs_rq = &rq->cfs;
1053 struct sched_entity *se; 1111 struct sched_entity *se;
1054 1112
@@ -1060,7 +1118,10 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1060 cfs_rq = group_cfs_rq(se); 1118 cfs_rq = group_cfs_rq(se);
1061 } while (cfs_rq); 1119 } while (cfs_rq);
1062 1120
1063 return task_of(se); 1121 p = task_of(se);
1122 hrtick_start_fair(rq, p);
1123
1124 return p;
1064} 1125}
1065 1126
1066/* 1127/*
@@ -1235,14 +1296,14 @@ move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1235/* 1296/*
1236 * scheduler tick hitting a task of our scheduling class: 1297 * scheduler tick hitting a task of our scheduling class:
1237 */ 1298 */
1238static void task_tick_fair(struct rq *rq, struct task_struct *curr) 1299static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
1239{ 1300{
1240 struct cfs_rq *cfs_rq; 1301 struct cfs_rq *cfs_rq;
1241 struct sched_entity *se = &curr->se; 1302 struct sched_entity *se = &curr->se;
1242 1303
1243 for_each_sched_entity(se) { 1304 for_each_sched_entity(se) {
1244 cfs_rq = cfs_rq_of(se); 1305 cfs_rq = cfs_rq_of(se);
1245 entity_tick(cfs_rq, se); 1306 entity_tick(cfs_rq, se, queued);
1246 } 1307 }
1247} 1308}
1248 1309
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index ef7a2661fa10..2bcafa375633 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -61,7 +61,7 @@ move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
61} 61}
62#endif 62#endif
63 63
64static void task_tick_idle(struct rq *rq, struct task_struct *curr) 64static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
65{ 65{
66} 66}
67 67
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f350f7b15158..83fbbcb8019e 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -863,7 +863,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
863 } 863 }
864} 864}
865 865
866static void task_tick_rt(struct rq *rq, struct task_struct *p) 866static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
867{ 867{
868 update_curr_rt(rq); 868 update_curr_rt(rq);
869 869