aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2008-07-18 12:01:23 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-20 04:37:28 -0400
commit31656519e132f6612584815f128c83976a9aaaef (patch)
tree595349fdcd65988d7a43d647594dcea8a8a4e80b /kernel
parent577b4a58d2e74a4d48050eeea3e3f952ce04eb86 (diff)
sched, x86: clean up hrtick implementation
random uvesafb failures were reported against Gentoo: http://bugs.gentoo.org/show_bug.cgi?id=222799 and Mihai Moldovan bisected it back to: > 8f4d37ec073c17e2d4aa8851df5837d798606d6f is first bad commit > commit 8f4d37ec073c17e2d4aa8851df5837d798606d6f > Author: Peter Zijlstra <a.p.zijlstra@chello.nl> > Date: Fri Jan 25 21:08:29 2008 +0100 > > sched: high-res preemption tick Linus suspected it to be hrtick + vm86 interaction and observed: > Btw, Peter, Ingo: I think that commit is doing bad things. They aren't > _incorrect_ per se, but they are definitely bad. > > Why? > > Using random _TIF_WORK_MASK flags is really impolite for doing > "scheduling" work. There's a reason that arch/x86/kernel/entry_32.S > special-cases the _TIF_NEED_RESCHED flag: we don't want to exit out of > vm86 mode unnecessarily. > > See the "work_notifysig_v86" label, and how it does that > "save_v86_state()" thing etc etc. Right, I never liked having to fiddle with those TIF flags. Initially I needed it because the hrtimer base lock could not nest in the rq lock. That however is fixed these days. Currently the only reason left to fiddle with the TIF flags is remote wakeups. We cannot program a remote cpu's hrtimer. I've been thinking about using the new and improved IPI function call stuff to implement hrtimer_start_on(). However that does require that smp_call_function_single(.wait=0) works from interrupt context - /me looks at the latest series from Jens - Yes that does seem to be supported, good. Here's a stab at cleaning this stuff up ... Mihai reported test success as well. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Tested-by: Mihai Moldovan <ionic@ionic.de> Cc: Michal Januszewski <spock@gentoo.org> Cc: Antonino Daplas <adaplas@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/sched.c202
-rw-r--r--kernel/sched_fair.c5
3 files changed, 63 insertions, 146 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 526128a2e622..2a202a846757 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
55 default 1000 if HZ_1000 55 default 1000 if HZ_1000
56 56
57config SCHED_HRTICK 57config SCHED_HRTICK
58 def_bool HIGH_RES_TIMERS && X86 58 def_bool HIGH_RES_TIMERS
diff --git a/kernel/sched.c b/kernel/sched.c
index 1ee18dbb4516..c13c75e9f9f7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -571,8 +571,10 @@ struct rq {
571#endif 571#endif
572 572
573#ifdef CONFIG_SCHED_HRTICK 573#ifdef CONFIG_SCHED_HRTICK
574 unsigned long hrtick_flags; 574#ifdef CONFIG_SMP
575 ktime_t hrtick_expire; 575 int hrtick_csd_pending;
576 struct call_single_data hrtick_csd;
577#endif
576 struct hrtimer hrtick_timer; 578 struct hrtimer hrtick_timer;
577#endif 579#endif
578 580
@@ -983,13 +985,6 @@ static struct rq *this_rq_lock(void)
983 return rq; 985 return rq;
984} 986}
985 987
986static void __resched_task(struct task_struct *p, int tif_bit);
987
988static inline void resched_task(struct task_struct *p)
989{
990 __resched_task(p, TIF_NEED_RESCHED);
991}
992
993#ifdef CONFIG_SCHED_HRTICK 988#ifdef CONFIG_SCHED_HRTICK
994/* 989/*
995 * Use HR-timers to deliver accurate preemption points. 990 * Use HR-timers to deliver accurate preemption points.
@@ -1001,25 +996,6 @@ static inline void resched_task(struct task_struct *p)
1001 * When we get rescheduled we reprogram the hrtick_timer outside of the 996 * When we get rescheduled we reprogram the hrtick_timer outside of the
1002 * rq->lock. 997 * rq->lock.
1003 */ 998 */
1004static inline void resched_hrt(struct task_struct *p)
1005{
1006 __resched_task(p, TIF_HRTICK_RESCHED);
1007}
1008
1009static inline void resched_rq(struct rq *rq)
1010{
1011 unsigned long flags;
1012
1013 spin_lock_irqsave(&rq->lock, flags);
1014 resched_task(rq->curr);
1015 spin_unlock_irqrestore(&rq->lock, flags);
1016}
1017
1018enum {
1019 HRTICK_SET, /* re-programm hrtick_timer */
1020 HRTICK_RESET, /* not a new slice */
1021 HRTICK_BLOCK, /* stop hrtick operations */
1022};
1023 999
1024/* 1000/*
1025 * Use hrtick when: 1001 * Use hrtick when:
@@ -1030,40 +1006,11 @@ static inline int hrtick_enabled(struct rq *rq)
1030{ 1006{
1031 if (!sched_feat(HRTICK)) 1007 if (!sched_feat(HRTICK))
1032 return 0; 1008 return 0;
1033 if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags))) 1009 if (!cpu_online(cpu_of(rq)))
1034 return 0; 1010 return 0;
1035 return hrtimer_is_hres_active(&rq->hrtick_timer); 1011 return hrtimer_is_hres_active(&rq->hrtick_timer);
1036} 1012}
1037 1013
1038/*
1039 * Called to set the hrtick timer state.
1040 *
1041 * called with rq->lock held and irqs disabled
1042 */
1043static void hrtick_start(struct rq *rq, u64 delay, int reset)
1044{
1045 assert_spin_locked(&rq->lock);
1046
1047 /*
1048 * preempt at: now + delay
1049 */
1050 rq->hrtick_expire =
1051 ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
1052 /*
1053 * indicate we need to program the timer
1054 */
1055 __set_bit(HRTICK_SET, &rq->hrtick_flags);
1056 if (reset)
1057 __set_bit(HRTICK_RESET, &rq->hrtick_flags);
1058
1059 /*
1060 * New slices are called from the schedule path and don't need a
1061 * forced reschedule.
1062 */
1063 if (reset)
1064 resched_hrt(rq->curr);
1065}
1066
1067static void hrtick_clear(struct rq *rq) 1014static void hrtick_clear(struct rq *rq)
1068{ 1015{
1069 if (hrtimer_active(&rq->hrtick_timer)) 1016 if (hrtimer_active(&rq->hrtick_timer))
@@ -1071,32 +1018,6 @@ static void hrtick_clear(struct rq *rq)
1071} 1018}
1072 1019
1073/* 1020/*
1074 * Update the timer from the possible pending state.
1075 */
1076static void hrtick_set(struct rq *rq)
1077{
1078 ktime_t time;
1079 int set, reset;
1080 unsigned long flags;
1081
1082 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1083
1084 spin_lock_irqsave(&rq->lock, flags);
1085 set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
1086 reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
1087 time = rq->hrtick_expire;
1088 clear_thread_flag(TIF_HRTICK_RESCHED);
1089 spin_unlock_irqrestore(&rq->lock, flags);
1090
1091 if (set) {
1092 hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
1093 if (reset && !hrtimer_active(&rq->hrtick_timer))
1094 resched_rq(rq);
1095 } else
1096 hrtick_clear(rq);
1097}
1098
1099/*
1100 * High-resolution timer tick. 1021 * High-resolution timer tick.
1101 * Runs from hardirq context with interrupts disabled. 1022 * Runs from hardirq context with interrupts disabled.
1102 */ 1023 */
@@ -1115,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1115} 1036}
1116 1037
1117#ifdef CONFIG_SMP 1038#ifdef CONFIG_SMP
1118static void hotplug_hrtick_disable(int cpu) 1039/*
1040 * called from hardirq (IPI) context
1041 */
1042static void __hrtick_start(void *arg)
1119{ 1043{
1120 struct rq *rq = cpu_rq(cpu); 1044 struct rq *rq = arg;
1121 unsigned long flags;
1122
1123 spin_lock_irqsave(&rq->lock, flags);
1124 rq->hrtick_flags = 0;
1125 __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1126 spin_unlock_irqrestore(&rq->lock, flags);
1127 1045
1128 hrtick_clear(rq); 1046 spin_lock(&rq->lock);
1047 hrtimer_restart(&rq->hrtick_timer);
1048 rq->hrtick_csd_pending = 0;
1049 spin_unlock(&rq->lock);
1129} 1050}
1130 1051
1131static void hotplug_hrtick_enable(int cpu) 1052/*
1053 * Called to set the hrtick timer state.
1054 *
1055 * called with rq->lock held and irqs disabled
1056 */
1057static void hrtick_start(struct rq *rq, u64 delay)
1132{ 1058{
1133 struct rq *rq = cpu_rq(cpu); 1059 struct hrtimer *timer = &rq->hrtick_timer;
1134 unsigned long flags; 1060 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
1135 1061
1136 spin_lock_irqsave(&rq->lock, flags); 1062 timer->expires = time;
1137 __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags); 1063
1138 spin_unlock_irqrestore(&rq->lock, flags); 1064 if (rq == this_rq()) {
1065 hrtimer_restart(timer);
1066 } else if (!rq->hrtick_csd_pending) {
1067 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
1068 rq->hrtick_csd_pending = 1;
1069 }
1139} 1070}
1140 1071
1141static int 1072static int
@@ -1150,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1150 case CPU_DOWN_PREPARE_FROZEN: 1081 case CPU_DOWN_PREPARE_FROZEN:
1151 case CPU_DEAD: 1082 case CPU_DEAD:
1152 case CPU_DEAD_FROZEN: 1083 case CPU_DEAD_FROZEN:
1153 hotplug_hrtick_disable(cpu); 1084 hrtick_clear(cpu_rq(cpu));
1154 return NOTIFY_OK;
1155
1156 case CPU_UP_PREPARE:
1157 case CPU_UP_PREPARE_FROZEN:
1158 case CPU_DOWN_FAILED:
1159 case CPU_DOWN_FAILED_FROZEN:
1160 case CPU_ONLINE:
1161 case CPU_ONLINE_FROZEN:
1162 hotplug_hrtick_enable(cpu);
1163 return NOTIFY_OK; 1085 return NOTIFY_OK;
1164 } 1086 }
1165 1087
@@ -1170,46 +1092,45 @@ static void init_hrtick(void)
1170{ 1092{
1171 hotcpu_notifier(hotplug_hrtick, 0); 1093 hotcpu_notifier(hotplug_hrtick, 0);
1172} 1094}
1173#endif /* CONFIG_SMP */ 1095#else
1096/*
1097 * Called to set the hrtick timer state.
1098 *
1099 * called with rq->lock held and irqs disabled
1100 */
1101static void hrtick_start(struct rq *rq, u64 delay)
1102{
1103 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
1104}
1174 1105
1175static void init_rq_hrtick(struct rq *rq) 1106static void init_hrtick(void)
1176{ 1107{
1177 rq->hrtick_flags = 0;
1178 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1179 rq->hrtick_timer.function = hrtick;
1180 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1181} 1108}
1109#endif /* CONFIG_SMP */
1182 1110
1183void hrtick_resched(void) 1111static void init_rq_hrtick(struct rq *rq)
1184{ 1112{
1185 struct rq *rq; 1113#ifdef CONFIG_SMP
1186 unsigned long flags; 1114 rq->hrtick_csd_pending = 0;
1187 1115
1188 if (!test_thread_flag(TIF_HRTICK_RESCHED)) 1116 rq->hrtick_csd.flags = 0;
1189 return; 1117 rq->hrtick_csd.func = __hrtick_start;
1118 rq->hrtick_csd.info = rq;
1119#endif
1190 1120
1191 local_irq_save(flags); 1121 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1192 rq = cpu_rq(smp_processor_id()); 1122 rq->hrtick_timer.function = hrtick;
1193 hrtick_set(rq); 1123 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1194 local_irq_restore(flags);
1195} 1124}
1196#else 1125#else
1197static inline void hrtick_clear(struct rq *rq) 1126static inline void hrtick_clear(struct rq *rq)
1198{ 1127{
1199} 1128}
1200 1129
1201static inline void hrtick_set(struct rq *rq)
1202{
1203}
1204
1205static inline void init_rq_hrtick(struct rq *rq) 1130static inline void init_rq_hrtick(struct rq *rq)
1206{ 1131{
1207} 1132}
1208 1133
1209void hrtick_resched(void)
1210{
1211}
1212
1213static inline void init_hrtick(void) 1134static inline void init_hrtick(void)
1214{ 1135{
1215} 1136}
@@ -1228,16 +1149,16 @@ static inline void init_hrtick(void)
1228#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 1149#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
1229#endif 1150#endif
1230 1151
1231static void __resched_task(struct task_struct *p, int tif_bit) 1152static void resched_task(struct task_struct *p)
1232{ 1153{
1233 int cpu; 1154 int cpu;
1234 1155
1235 assert_spin_locked(&task_rq(p)->lock); 1156 assert_spin_locked(&task_rq(p)->lock);
1236 1157
1237 if (unlikely(test_tsk_thread_flag(p, tif_bit))) 1158 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
1238 return; 1159 return;
1239 1160
1240 set_tsk_thread_flag(p, tif_bit); 1161 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
1241 1162
1242 cpu = task_cpu(p); 1163 cpu = task_cpu(p);
1243 if (cpu == smp_processor_id()) 1164 if (cpu == smp_processor_id())
@@ -1303,10 +1224,10 @@ void wake_up_idle_cpu(int cpu)
1303#endif /* CONFIG_NO_HZ */ 1224#endif /* CONFIG_NO_HZ */
1304 1225
1305#else /* !CONFIG_SMP */ 1226#else /* !CONFIG_SMP */
1306static void __resched_task(struct task_struct *p, int tif_bit) 1227static void resched_task(struct task_struct *p)
1307{ 1228{
1308 assert_spin_locked(&task_rq(p)->lock); 1229 assert_spin_locked(&task_rq(p)->lock);
1309 set_tsk_thread_flag(p, tif_bit); 1230 set_tsk_need_resched(p);
1310} 1231}
1311#endif /* CONFIG_SMP */ 1232#endif /* CONFIG_SMP */
1312 1233
@@ -4395,7 +4316,7 @@ asmlinkage void __sched schedule(void)
4395 struct task_struct *prev, *next; 4316 struct task_struct *prev, *next;
4396 unsigned long *switch_count; 4317 unsigned long *switch_count;
4397 struct rq *rq; 4318 struct rq *rq;
4398 int cpu, hrtick = sched_feat(HRTICK); 4319 int cpu;
4399 4320
4400need_resched: 4321need_resched:
4401 preempt_disable(); 4322 preempt_disable();
@@ -4410,7 +4331,7 @@ need_resched_nonpreemptible:
4410 4331
4411 schedule_debug(prev); 4332 schedule_debug(prev);
4412 4333
4413 if (hrtick) 4334 if (sched_feat(HRTICK))
4414 hrtick_clear(rq); 4335 hrtick_clear(rq);
4415 4336
4416 /* 4337 /*
@@ -4457,9 +4378,6 @@ need_resched_nonpreemptible:
4457 } else 4378 } else
4458 spin_unlock_irq(&rq->lock); 4379 spin_unlock_irq(&rq->lock);
4459 4380
4460 if (hrtick)
4461 hrtick_set(rq);
4462
4463 if (unlikely(reacquire_kernel_lock(current) < 0)) 4381 if (unlikely(reacquire_kernel_lock(current) < 0))
4464 goto need_resched_nonpreemptible; 4382 goto need_resched_nonpreemptible;
4465 4383
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2aa987027d6..6893b3ed65fe 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -878,7 +878,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
878#ifdef CONFIG_SCHED_HRTICK 878#ifdef CONFIG_SCHED_HRTICK
879static void hrtick_start_fair(struct rq *rq, struct task_struct *p) 879static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
880{ 880{
881 int requeue = rq->curr == p;
882 struct sched_entity *se = &p->se; 881 struct sched_entity *se = &p->se;
883 struct cfs_rq *cfs_rq = cfs_rq_of(se); 882 struct cfs_rq *cfs_rq = cfs_rq_of(se);
884 883
@@ -899,10 +898,10 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
899 * Don't schedule slices shorter than 10000ns, that just 898 * Don't schedule slices shorter than 10000ns, that just
900 * doesn't make sense. Rely on vruntime for fairness. 899 * doesn't make sense. Rely on vruntime for fairness.
901 */ 900 */
902 if (!requeue) 901 if (rq->curr != p)
903 delta = max(10000LL, delta); 902 delta = max(10000LL, delta);
904 903
905 hrtick_start(rq, delta, requeue); 904 hrtick_start(rq, delta);
906 } 905 }
907} 906}
908#else /* !CONFIG_SCHED_HRTICK */ 907#else /* !CONFIG_SCHED_HRTICK */