aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-23 22:36:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-23 22:36:53 -0400
commit7f9dce38378f0a4a298e885553d6bb7121376376 (patch)
tree5bfd688c9f356f7216bbc3cef3b4c10153de334b
parent26dcce0fabbef75ae426461edf21b5030bad60f3 (diff)
parentba42059fbd0aa1ac91b582412b5fedb1258f241f (diff)
Merge branch 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: hrtick_enabled() should use cpu_active() sched, x86: clean up hrtick implementation sched: fix build error, provide partition_sched_domains() unconditionally sched: fix warning in inc_rt_tasks() to not declare variable 'rq' if it's not needed cpu hotplug: Make cpu_active_map synchronization dependency clear cpu hotplug, sched: Introduce cpu_active_map and redo sched domain managment (take 2) sched: rework of "prioritize non-migratable tasks over migratable ones" sched: reduce stack size in isolated_cpu_setup() Revert parts of "ftrace: do not trace scheduler functions" Fixed up conflicts in include/asm-x86/thread_info.h (due to the TIF_SINGLESTEP unification vs TIF_HRTICK_RESCHED removal) and kernel/sched_fair.c (due to cpu_active_map vs for_each_cpu_mask_nr() introduction).
-rw-r--r--arch/x86/kernel/signal_32.c3
-rw-r--r--arch/x86/kernel/signal_64.c3
-rw-r--r--include/asm-x86/thread_info.h4
-rw-r--r--include/linux/cpumask.h6
-rw-r--r--include/linux/cpuset.h7
-rw-r--r--include/linux/sched.h11
-rw-r--r--init/main.c7
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cpu.c40
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/sched.c313
-rw-r--r--kernel/sched_fair.c8
-rw-r--r--kernel/sched_rt.c77
14 files changed, 229 insertions, 256 deletions
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 07faaa5109cb..6fb5bcdd8933 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -661,8 +661,5 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
661 if (thread_info_flags & _TIF_SIGPENDING) 661 if (thread_info_flags & _TIF_SIGPENDING)
662 do_signal(regs); 662 do_signal(regs);
663 663
664 if (thread_info_flags & _TIF_HRTICK_RESCHED)
665 hrtick_resched();
666
667 clear_thread_flag(TIF_IRET); 664 clear_thread_flag(TIF_IRET);
668} 665}
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index bf87684474f1..47c3d249e638 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -496,9 +496,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
496 /* deal with pending signal delivery */ 496 /* deal with pending signal delivery */
497 if (thread_info_flags & _TIF_SIGPENDING) 497 if (thread_info_flags & _TIF_SIGPENDING)
498 do_signal(regs); 498 do_signal(regs);
499
500 if (thread_info_flags & _TIF_HRTICK_RESCHED)
501 hrtick_resched();
502} 499}
503 500
504void signal_fault(struct pt_regs *regs, void __user *frame, char *where) 501void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 0a8f27d31d0d..3f2de1050988 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -79,7 +79,6 @@ struct thread_info {
79#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 79#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
80#define TIF_SECCOMP 8 /* secure computing */ 80#define TIF_SECCOMP 8 /* secure computing */
81#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ 81#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
82#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */
83#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 82#define TIF_NOTSC 16 /* TSC is not accessible in userland */
84#define TIF_IA32 17 /* 32bit process */ 83#define TIF_IA32 17 /* 32bit process */
85#define TIF_FORK 18 /* ret_from_fork */ 84#define TIF_FORK 18 /* ret_from_fork */
@@ -102,7 +101,6 @@ struct thread_info {
102#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) 101#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
103#define _TIF_SECCOMP (1 << TIF_SECCOMP) 102#define _TIF_SECCOMP (1 << TIF_SECCOMP)
104#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) 103#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
105#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED)
106#define _TIF_NOTSC (1 << TIF_NOTSC) 104#define _TIF_NOTSC (1 << TIF_NOTSC)
107#define _TIF_IA32 (1 << TIF_IA32) 105#define _TIF_IA32 (1 << TIF_IA32)
108#define _TIF_FORK (1 << TIF_FORK) 106#define _TIF_FORK (1 << TIF_FORK)
@@ -135,7 +133,7 @@ struct thread_info {
135 133
136/* Only used for 64 bit */ 134/* Only used for 64 bit */
137#define _TIF_DO_NOTIFY_MASK \ 135#define _TIF_DO_NOTIFY_MASK \
138 (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) 136 (_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
139 137
140/* flags to check in __switch_to() */ 138/* flags to check in __switch_to() */
141#define _TIF_WORK_CTXSW \ 139#define _TIF_WORK_CTXSW \
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 30d59d1d0626..1b5c98e7fef7 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -458,13 +458,14 @@ int __next_cpu_nr(int n, const cpumask_t *srcp);
458 458
459/* 459/*
460 * The following particular system cpumasks and operations manage 460 * The following particular system cpumasks and operations manage
461 * possible, present and online cpus. Each of them is a fixed size 461 * possible, present, active and online cpus. Each of them is a fixed size
462 * bitmap of size NR_CPUS. 462 * bitmap of size NR_CPUS.
463 * 463 *
464 * #ifdef CONFIG_HOTPLUG_CPU 464 * #ifdef CONFIG_HOTPLUG_CPU
465 * cpu_possible_map - has bit 'cpu' set iff cpu is populatable 465 * cpu_possible_map - has bit 'cpu' set iff cpu is populatable
466 * cpu_present_map - has bit 'cpu' set iff cpu is populated 466 * cpu_present_map - has bit 'cpu' set iff cpu is populated
467 * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler 467 * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
468 * cpu_active_map - has bit 'cpu' set iff cpu available to migration
468 * #else 469 * #else
469 * cpu_possible_map - has bit 'cpu' set iff cpu is populated 470 * cpu_possible_map - has bit 'cpu' set iff cpu is populated
470 * cpu_present_map - copy of cpu_possible_map 471 * cpu_present_map - copy of cpu_possible_map
@@ -515,6 +516,7 @@ int __next_cpu_nr(int n, const cpumask_t *srcp);
515extern cpumask_t cpu_possible_map; 516extern cpumask_t cpu_possible_map;
516extern cpumask_t cpu_online_map; 517extern cpumask_t cpu_online_map;
517extern cpumask_t cpu_present_map; 518extern cpumask_t cpu_present_map;
519extern cpumask_t cpu_active_map;
518 520
519#if NR_CPUS > 1 521#if NR_CPUS > 1
520#define num_online_cpus() cpus_weight_nr(cpu_online_map) 522#define num_online_cpus() cpus_weight_nr(cpu_online_map)
@@ -523,6 +525,7 @@ extern cpumask_t cpu_present_map;
523#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) 525#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
524#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) 526#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
525#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) 527#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
528#define cpu_active(cpu) cpu_isset((cpu), cpu_active_map)
526#else 529#else
527#define num_online_cpus() 1 530#define num_online_cpus() 1
528#define num_possible_cpus() 1 531#define num_possible_cpus() 1
@@ -530,6 +533,7 @@ extern cpumask_t cpu_present_map;
530#define cpu_online(cpu) ((cpu) == 0) 533#define cpu_online(cpu) ((cpu) == 0)
531#define cpu_possible(cpu) ((cpu) == 0) 534#define cpu_possible(cpu) ((cpu) == 0)
532#define cpu_present(cpu) ((cpu) == 0) 535#define cpu_present(cpu) ((cpu) == 0)
536#define cpu_active(cpu) ((cpu) == 0)
533#endif 537#endif
534 538
535#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) 539#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 038578362b47..e8f450c499b0 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -78,6 +78,8 @@ extern void cpuset_track_online_nodes(void);
78 78
79extern int current_cpuset_is_being_rebound(void); 79extern int current_cpuset_is_being_rebound(void);
80 80
81extern void rebuild_sched_domains(void);
82
81#else /* !CONFIG_CPUSETS */ 83#else /* !CONFIG_CPUSETS */
82 84
83static inline int cpuset_init_early(void) { return 0; } 85static inline int cpuset_init_early(void) { return 0; }
@@ -156,6 +158,11 @@ static inline int current_cpuset_is_being_rebound(void)
156 return 0; 158 return 0;
157} 159}
158 160
161static inline void rebuild_sched_domains(void)
162{
163 partition_sched_domains(0, NULL, NULL);
164}
165
159#endif /* !CONFIG_CPUSETS */ 166#endif /* !CONFIG_CPUSETS */
160 167
161#endif /* _LINUX_CPUSET_H */ 168#endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index af443a08431f..dc7e592c473a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -825,7 +825,16 @@ extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
825 struct sched_domain_attr *dattr_new); 825 struct sched_domain_attr *dattr_new);
826extern int arch_reinit_sched_domains(void); 826extern int arch_reinit_sched_domains(void);
827 827
828#endif /* CONFIG_SMP */ 828#else /* CONFIG_SMP */
829
830struct sched_domain_attr;
831
832static inline void
833partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
834 struct sched_domain_attr *dattr_new)
835{
836}
837#endif /* !CONFIG_SMP */
829 838
830struct io_context; /* See blkdev.h */ 839struct io_context; /* See blkdev.h */
831#define NGROUPS_SMALL 32 840#define NGROUPS_SMALL 32
diff --git a/init/main.c b/init/main.c
index 756eca4b821a..2769dc031c62 100644
--- a/init/main.c
+++ b/init/main.c
@@ -415,6 +415,13 @@ static void __init smp_init(void)
415{ 415{
416 unsigned int cpu; 416 unsigned int cpu;
417 417
418 /*
419 * Set up the current CPU as possible to migrate to.
420 * The other ones will be done by cpu_up/cpu_down()
421 */
422 cpu = smp_processor_id();
423 cpu_set(cpu, cpu_active_map);
424
418 /* FIXME: This should be done in userspace --RR */ 425 /* FIXME: This should be done in userspace --RR */
419 for_each_present_cpu(cpu) { 426 for_each_present_cpu(cpu) {
420 if (num_online_cpus() >= setup_max_cpus) 427 if (num_online_cpus() >= setup_max_cpus)
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 526128a2e622..2a202a846757 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
55 default 1000 if HZ_1000 55 default 1000 if HZ_1000
56 56
57config SCHED_HRTICK 57config SCHED_HRTICK
58 def_bool HIGH_RES_TIMERS && X86 58 def_bool HIGH_RES_TIMERS
diff --git a/kernel/Makefile b/kernel/Makefile
index 985ddb7da4d0..15ab63ffe64d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe
15
14ifdef CONFIG_FTRACE 16ifdef CONFIG_FTRACE
15# Do not trace debug files and internal ftrace files 17# Do not trace debug files and internal ftrace files
16CFLAGS_REMOVE_lockdep.o = -pg 18CFLAGS_REMOVE_lockdep.o = -pg
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d26d0b095b3b..2cc409ce0a8f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void)
64 cpu_hotplug.refcount = 0; 64 cpu_hotplug.refcount = 0;
65} 65}
66 66
67cpumask_t cpu_active_map;
68
67#ifdef CONFIG_HOTPLUG_CPU 69#ifdef CONFIG_HOTPLUG_CPU
68 70
69void get_online_cpus(void) 71void get_online_cpus(void)
@@ -291,11 +293,30 @@ int __ref cpu_down(unsigned int cpu)
291 int err = 0; 293 int err = 0;
292 294
293 cpu_maps_update_begin(); 295 cpu_maps_update_begin();
294 if (cpu_hotplug_disabled) 296
297 if (cpu_hotplug_disabled) {
295 err = -EBUSY; 298 err = -EBUSY;
296 else 299 goto out;
297 err = _cpu_down(cpu, 0); 300 }
301
302 cpu_clear(cpu, cpu_active_map);
303
304 /*
305 * Make sure the all cpus did the reschedule and are not
306 * using stale version of the cpu_active_map.
307 * This is not strictly necessary becuase stop_machine()
308 * that we run down the line already provides the required
309 * synchronization. But it's really a side effect and we do not
310 * want to depend on the innards of the stop_machine here.
311 */
312 synchronize_sched();
313
314 err = _cpu_down(cpu, 0);
298 315
316 if (cpu_online(cpu))
317 cpu_set(cpu, cpu_active_map);
318
319out:
299 cpu_maps_update_done(); 320 cpu_maps_update_done();
300 return err; 321 return err;
301} 322}
@@ -355,11 +376,18 @@ int __cpuinit cpu_up(unsigned int cpu)
355 } 376 }
356 377
357 cpu_maps_update_begin(); 378 cpu_maps_update_begin();
358 if (cpu_hotplug_disabled) 379
380 if (cpu_hotplug_disabled) {
359 err = -EBUSY; 381 err = -EBUSY;
360 else 382 goto out;
361 err = _cpu_up(cpu, 0); 383 }
384
385 err = _cpu_up(cpu, 0);
362 386
387 if (cpu_online(cpu))
388 cpu_set(cpu, cpu_active_map);
389
390out:
363 cpu_maps_update_done(); 391 cpu_maps_update_done();
364 return err; 392 return err;
365} 393}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d2cc67dac8b1..d5738910c34c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
564 * partition_sched_domains(). 564 * partition_sched_domains().
565 */ 565 */
566 566
567static void rebuild_sched_domains(void) 567void rebuild_sched_domains(void)
568{ 568{
569 struct kfifo *q; /* queue of cpusets to be scanned */ 569 struct kfifo *q; /* queue of cpusets to be scanned */
570 struct cpuset *cp; /* scans q */ 570 struct cpuset *cp; /* scans q */
diff --git a/kernel/sched.c b/kernel/sched.c
index df80bae68152..6acf749d3336 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -571,8 +571,10 @@ struct rq {
571#endif 571#endif
572 572
573#ifdef CONFIG_SCHED_HRTICK 573#ifdef CONFIG_SCHED_HRTICK
574 unsigned long hrtick_flags; 574#ifdef CONFIG_SMP
575 ktime_t hrtick_expire; 575 int hrtick_csd_pending;
576 struct call_single_data hrtick_csd;
577#endif
576 struct hrtimer hrtick_timer; 578 struct hrtimer hrtick_timer;
577#endif 579#endif
578 580
@@ -983,13 +985,6 @@ static struct rq *this_rq_lock(void)
983 return rq; 985 return rq;
984} 986}
985 987
986static void __resched_task(struct task_struct *p, int tif_bit);
987
988static inline void resched_task(struct task_struct *p)
989{
990 __resched_task(p, TIF_NEED_RESCHED);
991}
992
993#ifdef CONFIG_SCHED_HRTICK 988#ifdef CONFIG_SCHED_HRTICK
994/* 989/*
995 * Use HR-timers to deliver accurate preemption points. 990 * Use HR-timers to deliver accurate preemption points.
@@ -1001,25 +996,6 @@ static inline void resched_task(struct task_struct *p)
1001 * When we get rescheduled we reprogram the hrtick_timer outside of the 996 * When we get rescheduled we reprogram the hrtick_timer outside of the
1002 * rq->lock. 997 * rq->lock.
1003 */ 998 */
1004static inline void resched_hrt(struct task_struct *p)
1005{
1006 __resched_task(p, TIF_HRTICK_RESCHED);
1007}
1008
1009static inline void resched_rq(struct rq *rq)
1010{
1011 unsigned long flags;
1012
1013 spin_lock_irqsave(&rq->lock, flags);
1014 resched_task(rq->curr);
1015 spin_unlock_irqrestore(&rq->lock, flags);
1016}
1017
1018enum {
1019 HRTICK_SET, /* re-programm hrtick_timer */
1020 HRTICK_RESET, /* not a new slice */
1021 HRTICK_BLOCK, /* stop hrtick operations */
1022};
1023 999
1024/* 1000/*
1025 * Use hrtick when: 1001 * Use hrtick when:
@@ -1030,40 +1006,11 @@ static inline int hrtick_enabled(struct rq *rq)
1030{ 1006{
1031 if (!sched_feat(HRTICK)) 1007 if (!sched_feat(HRTICK))
1032 return 0; 1008 return 0;
1033 if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags))) 1009 if (!cpu_active(cpu_of(rq)))
1034 return 0; 1010 return 0;
1035 return hrtimer_is_hres_active(&rq->hrtick_timer); 1011 return hrtimer_is_hres_active(&rq->hrtick_timer);
1036} 1012}
1037 1013
1038/*
1039 * Called to set the hrtick timer state.
1040 *
1041 * called with rq->lock held and irqs disabled
1042 */
1043static void hrtick_start(struct rq *rq, u64 delay, int reset)
1044{
1045 assert_spin_locked(&rq->lock);
1046
1047 /*
1048 * preempt at: now + delay
1049 */
1050 rq->hrtick_expire =
1051 ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
1052 /*
1053 * indicate we need to program the timer
1054 */
1055 __set_bit(HRTICK_SET, &rq->hrtick_flags);
1056 if (reset)
1057 __set_bit(HRTICK_RESET, &rq->hrtick_flags);
1058
1059 /*
1060 * New slices are called from the schedule path and don't need a
1061 * forced reschedule.
1062 */
1063 if (reset)
1064 resched_hrt(rq->curr);
1065}
1066
1067static void hrtick_clear(struct rq *rq) 1014static void hrtick_clear(struct rq *rq)
1068{ 1015{
1069 if (hrtimer_active(&rq->hrtick_timer)) 1016 if (hrtimer_active(&rq->hrtick_timer))
@@ -1071,32 +1018,6 @@ static void hrtick_clear(struct rq *rq)
1071} 1018}
1072 1019
1073/* 1020/*
1074 * Update the timer from the possible pending state.
1075 */
1076static void hrtick_set(struct rq *rq)
1077{
1078 ktime_t time;
1079 int set, reset;
1080 unsigned long flags;
1081
1082 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1083
1084 spin_lock_irqsave(&rq->lock, flags);
1085 set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
1086 reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
1087 time = rq->hrtick_expire;
1088 clear_thread_flag(TIF_HRTICK_RESCHED);
1089 spin_unlock_irqrestore(&rq->lock, flags);
1090
1091 if (set) {
1092 hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
1093 if (reset && !hrtimer_active(&rq->hrtick_timer))
1094 resched_rq(rq);
1095 } else
1096 hrtick_clear(rq);
1097}
1098
1099/*
1100 * High-resolution timer tick. 1021 * High-resolution timer tick.
1101 * Runs from hardirq context with interrupts disabled. 1022 * Runs from hardirq context with interrupts disabled.
1102 */ 1023 */
@@ -1115,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1115} 1036}
1116 1037
1117#ifdef CONFIG_SMP 1038#ifdef CONFIG_SMP
1118static void hotplug_hrtick_disable(int cpu) 1039/*
1040 * called from hardirq (IPI) context
1041 */
1042static void __hrtick_start(void *arg)
1119{ 1043{
1120 struct rq *rq = cpu_rq(cpu); 1044 struct rq *rq = arg;
1121 unsigned long flags;
1122
1123 spin_lock_irqsave(&rq->lock, flags);
1124 rq->hrtick_flags = 0;
1125 __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1126 spin_unlock_irqrestore(&rq->lock, flags);
1127 1045
1128 hrtick_clear(rq); 1046 spin_lock(&rq->lock);
1047 hrtimer_restart(&rq->hrtick_timer);
1048 rq->hrtick_csd_pending = 0;
1049 spin_unlock(&rq->lock);
1129} 1050}
1130 1051
1131static void hotplug_hrtick_enable(int cpu) 1052/*
1053 * Called to set the hrtick timer state.
1054 *
1055 * called with rq->lock held and irqs disabled
1056 */
1057static void hrtick_start(struct rq *rq, u64 delay)
1132{ 1058{
1133 struct rq *rq = cpu_rq(cpu); 1059 struct hrtimer *timer = &rq->hrtick_timer;
1134 unsigned long flags; 1060 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
1135 1061
1136 spin_lock_irqsave(&rq->lock, flags); 1062 timer->expires = time;
1137 __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags); 1063
1138 spin_unlock_irqrestore(&rq->lock, flags); 1064 if (rq == this_rq()) {
1065 hrtimer_restart(timer);
1066 } else if (!rq->hrtick_csd_pending) {
1067 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
1068 rq->hrtick_csd_pending = 1;
1069 }
1139} 1070}
1140 1071
1141static int 1072static int
@@ -1150,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1150 case CPU_DOWN_PREPARE_FROZEN: 1081 case CPU_DOWN_PREPARE_FROZEN:
1151 case CPU_DEAD: 1082 case CPU_DEAD:
1152 case CPU_DEAD_FROZEN: 1083 case CPU_DEAD_FROZEN:
1153 hotplug_hrtick_disable(cpu); 1084 hrtick_clear(cpu_rq(cpu));
1154 return NOTIFY_OK;
1155
1156 case CPU_UP_PREPARE:
1157 case CPU_UP_PREPARE_FROZEN:
1158 case CPU_DOWN_FAILED:
1159 case CPU_DOWN_FAILED_FROZEN:
1160 case CPU_ONLINE:
1161 case CPU_ONLINE_FROZEN:
1162 hotplug_hrtick_enable(cpu);
1163 return NOTIFY_OK; 1085 return NOTIFY_OK;
1164 } 1086 }
1165 1087
@@ -1170,46 +1092,45 @@ static void init_hrtick(void)
1170{ 1092{
1171 hotcpu_notifier(hotplug_hrtick, 0); 1093 hotcpu_notifier(hotplug_hrtick, 0);
1172} 1094}
1173#endif /* CONFIG_SMP */ 1095#else
1096/*
1097 * Called to set the hrtick timer state.
1098 *
1099 * called with rq->lock held and irqs disabled
1100 */
1101static void hrtick_start(struct rq *rq, u64 delay)
1102{
1103 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
1104}
1174 1105
1175static void init_rq_hrtick(struct rq *rq) 1106static void init_hrtick(void)
1176{ 1107{
1177 rq->hrtick_flags = 0;
1178 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1179 rq->hrtick_timer.function = hrtick;
1180 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1181} 1108}
1109#endif /* CONFIG_SMP */
1182 1110
1183void hrtick_resched(void) 1111static void init_rq_hrtick(struct rq *rq)
1184{ 1112{
1185 struct rq *rq; 1113#ifdef CONFIG_SMP
1186 unsigned long flags; 1114 rq->hrtick_csd_pending = 0;
1187 1115
1188 if (!test_thread_flag(TIF_HRTICK_RESCHED)) 1116 rq->hrtick_csd.flags = 0;
1189 return; 1117 rq->hrtick_csd.func = __hrtick_start;
1118 rq->hrtick_csd.info = rq;
1119#endif
1190 1120
1191 local_irq_save(flags); 1121 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1192 rq = cpu_rq(smp_processor_id()); 1122 rq->hrtick_timer.function = hrtick;
1193 hrtick_set(rq); 1123 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1194 local_irq_restore(flags);
1195} 1124}
1196#else 1125#else
1197static inline void hrtick_clear(struct rq *rq) 1126static inline void hrtick_clear(struct rq *rq)
1198{ 1127{
1199} 1128}
1200 1129
1201static inline void hrtick_set(struct rq *rq)
1202{
1203}
1204
1205static inline void init_rq_hrtick(struct rq *rq) 1130static inline void init_rq_hrtick(struct rq *rq)
1206{ 1131{
1207} 1132}
1208 1133
1209void hrtick_resched(void)
1210{
1211}
1212
1213static inline void init_hrtick(void) 1134static inline void init_hrtick(void)
1214{ 1135{
1215} 1136}
@@ -1228,16 +1149,16 @@ static inline void init_hrtick(void)
1228#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 1149#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
1229#endif 1150#endif
1230 1151
1231static void __resched_task(struct task_struct *p, int tif_bit) 1152static void resched_task(struct task_struct *p)
1232{ 1153{
1233 int cpu; 1154 int cpu;
1234 1155
1235 assert_spin_locked(&task_rq(p)->lock); 1156 assert_spin_locked(&task_rq(p)->lock);
1236 1157
1237 if (unlikely(test_tsk_thread_flag(p, tif_bit))) 1158 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
1238 return; 1159 return;
1239 1160
1240 set_tsk_thread_flag(p, tif_bit); 1161 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
1241 1162
1242 cpu = task_cpu(p); 1163 cpu = task_cpu(p);
1243 if (cpu == smp_processor_id()) 1164 if (cpu == smp_processor_id())
@@ -1303,10 +1224,10 @@ void wake_up_idle_cpu(int cpu)
1303#endif /* CONFIG_NO_HZ */ 1224#endif /* CONFIG_NO_HZ */
1304 1225
1305#else /* !CONFIG_SMP */ 1226#else /* !CONFIG_SMP */
1306static void __resched_task(struct task_struct *p, int tif_bit) 1227static void resched_task(struct task_struct *p)
1307{ 1228{
1308 assert_spin_locked(&task_rq(p)->lock); 1229 assert_spin_locked(&task_rq(p)->lock);
1309 set_tsk_thread_flag(p, tif_bit); 1230 set_tsk_need_resched(p);
1310} 1231}
1311#endif /* CONFIG_SMP */ 1232#endif /* CONFIG_SMP */
1312 1233
@@ -2881,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2881 2802
2882 rq = task_rq_lock(p, &flags); 2803 rq = task_rq_lock(p, &flags);
2883 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2804 if (!cpu_isset(dest_cpu, p->cpus_allowed)
2884 || unlikely(cpu_is_offline(dest_cpu))) 2805 || unlikely(!cpu_active(dest_cpu)))
2885 goto out; 2806 goto out;
2886 2807
2887 /* force the process onto the specified CPU */ 2808 /* force the process onto the specified CPU */
@@ -3849,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick)
3849 /* 3770 /*
3850 * If we are going offline and still the leader, give up! 3771 * If we are going offline and still the leader, give up!
3851 */ 3772 */
3852 if (cpu_is_offline(cpu) && 3773 if (!cpu_active(cpu) &&
3853 atomic_read(&nohz.load_balancer) == cpu) { 3774 atomic_read(&nohz.load_balancer) == cpu) {
3854 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3775 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3855 BUG(); 3776 BUG();
@@ -4395,7 +4316,7 @@ asmlinkage void __sched schedule(void)
4395 struct task_struct *prev, *next; 4316 struct task_struct *prev, *next;
4396 unsigned long *switch_count; 4317 unsigned long *switch_count;
4397 struct rq *rq; 4318 struct rq *rq;
4398 int cpu, hrtick = sched_feat(HRTICK); 4319 int cpu;
4399 4320
4400need_resched: 4321need_resched:
4401 preempt_disable(); 4322 preempt_disable();
@@ -4410,7 +4331,7 @@ need_resched_nonpreemptible:
4410 4331
4411 schedule_debug(prev); 4332 schedule_debug(prev);
4412 4333
4413 if (hrtick) 4334 if (sched_feat(HRTICK))
4414 hrtick_clear(rq); 4335 hrtick_clear(rq);
4415 4336
4416 /* 4337 /*
@@ -4457,9 +4378,6 @@ need_resched_nonpreemptible:
4457 } else 4378 } else
4458 spin_unlock_irq(&rq->lock); 4379 spin_unlock_irq(&rq->lock);
4459 4380
4460 if (hrtick)
4461 hrtick_set(rq);
4462
4463 if (unlikely(reacquire_kernel_lock(current) < 0)) 4381 if (unlikely(reacquire_kernel_lock(current) < 0))
4464 goto need_resched_nonpreemptible; 4382 goto need_resched_nonpreemptible;
4465 4383
@@ -5876,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5876 struct rq *rq_dest, *rq_src; 5794 struct rq *rq_dest, *rq_src;
5877 int ret = 0, on_rq; 5795 int ret = 0, on_rq;
5878 5796
5879 if (unlikely(cpu_is_offline(dest_cpu))) 5797 if (unlikely(!cpu_active(dest_cpu)))
5880 return ret; 5798 return ret;
5881 5799
5882 rq_src = cpu_rq(src_cpu); 5800 rq_src = cpu_rq(src_cpu);
@@ -6768,7 +6686,8 @@ static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
6768/* Setup the mask of cpus configured for isolated domains */ 6686/* Setup the mask of cpus configured for isolated domains */
6769static int __init isolated_cpu_setup(char *str) 6687static int __init isolated_cpu_setup(char *str)
6770{ 6688{
6771 int ints[NR_CPUS], i; 6689 static int __initdata ints[NR_CPUS];
6690 int i;
6772 6691
6773 str = get_options(str, ARRAY_SIZE(ints), ints); 6692 str = get_options(str, ARRAY_SIZE(ints), ints);
6774 cpus_clear(cpu_isolated_map); 6693 cpus_clear(cpu_isolated_map);
@@ -7553,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
7553} 7472}
7554 7473
7555/* 7474/*
7556 * Free current domain masks.
7557 * Called after all cpus are attached to NULL domain.
7558 */
7559static void free_sched_domains(void)
7560{
7561 ndoms_cur = 0;
7562 if (doms_cur != &fallback_doms)
7563 kfree(doms_cur);
7564 doms_cur = &fallback_doms;
7565}
7566
7567/*
7568 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7475 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
7569 * For now this just excludes isolated cpus, but could be used to 7476 * For now this just excludes isolated cpus, but could be used to
7570 * exclude other special cases in the future. 7477 * exclude other special cases in the future.
@@ -7642,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7642 * ownership of it and will kfree it when done with it. If the caller 7549 * ownership of it and will kfree it when done with it. If the caller
7643 * failed the kmalloc call, then it can pass in doms_new == NULL, 7550 * failed the kmalloc call, then it can pass in doms_new == NULL,
7644 * and partition_sched_domains() will fallback to the single partition 7551 * and partition_sched_domains() will fallback to the single partition
7645 * 'fallback_doms'. 7552 * 'fallback_doms', it also forces the domains to be rebuilt.
7646 * 7553 *
7647 * Call with hotplug lock held 7554 * Call with hotplug lock held
7648 */ 7555 */
@@ -7656,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7656 /* always unregister in case we don't destroy any domains */ 7563 /* always unregister in case we don't destroy any domains */
7657 unregister_sched_domain_sysctl(); 7564 unregister_sched_domain_sysctl();
7658 7565
7659 if (doms_new == NULL) { 7566 if (doms_new == NULL)
7660 ndoms_new = 1; 7567 ndoms_new = 0;
7661 doms_new = &fallback_doms;
7662 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7663 dattr_new = NULL;
7664 }
7665 7568
7666 /* Destroy deleted domains */ 7569 /* Destroy deleted domains */
7667 for (i = 0; i < ndoms_cur; i++) { 7570 for (i = 0; i < ndoms_cur; i++) {
@@ -7676,6 +7579,14 @@ match1:
7676 ; 7579 ;
7677 } 7580 }
7678 7581
7582 if (doms_new == NULL) {
7583 ndoms_cur = 0;
7584 ndoms_new = 1;
7585 doms_new = &fallback_doms;
7586 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7587 dattr_new = NULL;
7588 }
7589
7679 /* Build new domains */ 7590 /* Build new domains */
7680 for (i = 0; i < ndoms_new; i++) { 7591 for (i = 0; i < ndoms_new; i++) {
7681 for (j = 0; j < ndoms_cur; j++) { 7592 for (j = 0; j < ndoms_cur; j++) {
@@ -7706,17 +7617,10 @@ match2:
7706#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7617#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
7707int arch_reinit_sched_domains(void) 7618int arch_reinit_sched_domains(void)
7708{ 7619{
7709 int err;
7710
7711 get_online_cpus(); 7620 get_online_cpus();
7712 mutex_lock(&sched_domains_mutex); 7621 rebuild_sched_domains();
7713 detach_destroy_domains(&cpu_online_map);
7714 free_sched_domains();
7715 err = arch_init_sched_domains(&cpu_online_map);
7716 mutex_unlock(&sched_domains_mutex);
7717 put_online_cpus(); 7622 put_online_cpus();
7718 7623 return 0;
7719 return err;
7720} 7624}
7721 7625
7722static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 7626static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7786,59 +7690,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7786} 7690}
7787#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ 7691#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7788 7692
7693#ifndef CONFIG_CPUSETS
7789/* 7694/*
7790 * Force a reinitialization of the sched domains hierarchy. The domains 7695 * Add online and remove offline CPUs from the scheduler domains.
7791 * and groups cannot be updated in place without racing with the balancing 7696 * When cpusets are enabled they take over this function.
7792 * code, so we temporarily attach all running cpus to the NULL domain
7793 * which will prevent rebalancing while the sched domains are recalculated.
7794 */ 7697 */
7795static int update_sched_domains(struct notifier_block *nfb, 7698static int update_sched_domains(struct notifier_block *nfb,
7796 unsigned long action, void *hcpu) 7699 unsigned long action, void *hcpu)
7797{ 7700{
7701 switch (action) {
7702 case CPU_ONLINE:
7703 case CPU_ONLINE_FROZEN:
7704 case CPU_DEAD:
7705 case CPU_DEAD_FROZEN:
7706 partition_sched_domains(0, NULL, NULL);
7707 return NOTIFY_OK;
7708
7709 default:
7710 return NOTIFY_DONE;
7711 }
7712}
7713#endif
7714
7715static int update_runtime(struct notifier_block *nfb,
7716 unsigned long action, void *hcpu)
7717{
7798 int cpu = (int)(long)hcpu; 7718 int cpu = (int)(long)hcpu;
7799 7719
7800 switch (action) { 7720 switch (action) {
7801 case CPU_DOWN_PREPARE: 7721 case CPU_DOWN_PREPARE:
7802 case CPU_DOWN_PREPARE_FROZEN: 7722 case CPU_DOWN_PREPARE_FROZEN:
7803 disable_runtime(cpu_rq(cpu)); 7723 disable_runtime(cpu_rq(cpu));
7804 /* fall-through */
7805 case CPU_UP_PREPARE:
7806 case CPU_UP_PREPARE_FROZEN:
7807 detach_destroy_domains(&cpu_online_map);
7808 free_sched_domains();
7809 return NOTIFY_OK; 7724 return NOTIFY_OK;
7810 7725
7811
7812 case CPU_DOWN_FAILED: 7726 case CPU_DOWN_FAILED:
7813 case CPU_DOWN_FAILED_FROZEN: 7727 case CPU_DOWN_FAILED_FROZEN:
7814 case CPU_ONLINE: 7728 case CPU_ONLINE:
7815 case CPU_ONLINE_FROZEN: 7729 case CPU_ONLINE_FROZEN:
7816 enable_runtime(cpu_rq(cpu)); 7730 enable_runtime(cpu_rq(cpu));
7817 /* fall-through */ 7731 return NOTIFY_OK;
7818 case CPU_UP_CANCELED: 7732
7819 case CPU_UP_CANCELED_FROZEN:
7820 case CPU_DEAD:
7821 case CPU_DEAD_FROZEN:
7822 /*
7823 * Fall through and re-initialise the domains.
7824 */
7825 break;
7826 default: 7733 default:
7827 return NOTIFY_DONE; 7734 return NOTIFY_DONE;
7828 } 7735 }
7829
7830#ifndef CONFIG_CPUSETS
7831 /*
7832 * Create default domain partitioning if cpusets are disabled.
7833 * Otherwise we let cpusets rebuild the domains based on the
7834 * current setup.
7835 */
7836
7837 /* The hotplug lock is already held by cpu_up/cpu_down */
7838 arch_init_sched_domains(&cpu_online_map);
7839#endif
7840
7841 return NOTIFY_OK;
7842} 7736}
7843 7737
7844void __init sched_init_smp(void) 7738void __init sched_init_smp(void)
@@ -7858,8 +7752,15 @@ void __init sched_init_smp(void)
7858 cpu_set(smp_processor_id(), non_isolated_cpus); 7752 cpu_set(smp_processor_id(), non_isolated_cpus);
7859 mutex_unlock(&sched_domains_mutex); 7753 mutex_unlock(&sched_domains_mutex);
7860 put_online_cpus(); 7754 put_online_cpus();
7755
7756#ifndef CONFIG_CPUSETS
7861 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7757 /* XXX: Theoretical race here - CPU may be hotplugged now */
7862 hotcpu_notifier(update_sched_domains, 0); 7758 hotcpu_notifier(update_sched_domains, 0);
7759#endif
7760
7761 /* RT runtime code needs to handle some hotplug events */
7762 hotcpu_notifier(update_runtime, 0);
7763
7863 init_hrtick(); 7764 init_hrtick();
7864 7765
7865 /* Move init over to a non-isolated CPU */ 7766 /* Move init over to a non-isolated CPU */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index bb61fe26b62c..cf2cd6ce4cb2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -878,7 +878,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
878#ifdef CONFIG_SCHED_HRTICK 878#ifdef CONFIG_SCHED_HRTICK
879static void hrtick_start_fair(struct rq *rq, struct task_struct *p) 879static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
880{ 880{
881 int requeue = rq->curr == p;
882 struct sched_entity *se = &p->se; 881 struct sched_entity *se = &p->se;
883 struct cfs_rq *cfs_rq = cfs_rq_of(se); 882 struct cfs_rq *cfs_rq = cfs_rq_of(se);
884 883
@@ -899,10 +898,10 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
899 * Don't schedule slices shorter than 10000ns, that just 898 * Don't schedule slices shorter than 10000ns, that just
900 * doesn't make sense. Rely on vruntime for fairness. 899 * doesn't make sense. Rely on vruntime for fairness.
901 */ 900 */
902 if (!requeue) 901 if (rq->curr != p)
903 delta = max(10000LL, delta); 902 delta = max(10000LL, delta);
904 903
905 hrtick_start(rq, delta, requeue); 904 hrtick_start(rq, delta);
906 } 905 }
907} 906}
908#else /* !CONFIG_SCHED_HRTICK */ 907#else /* !CONFIG_SCHED_HRTICK */
@@ -1004,6 +1003,8 @@ static void yield_task_fair(struct rq *rq)
1004 * not idle and an idle cpu is available. The span of cpus to 1003 * not idle and an idle cpu is available. The span of cpus to
1005 * search starts with cpus closest then further out as needed, 1004 * search starts with cpus closest then further out as needed,
1006 * so we always favor a closer, idle cpu. 1005 * so we always favor a closer, idle cpu.
1006 * Domains may include CPUs that are not usable for migration,
1007 * hence we need to mask them out (cpu_active_map)
1007 * 1008 *
1008 * Returns the CPU we should wake onto. 1009 * Returns the CPU we should wake onto.
1009 */ 1010 */
@@ -1031,6 +1032,7 @@ static int wake_idle(int cpu, struct task_struct *p)
1031 || ((sd->flags & SD_WAKE_IDLE_FAR) 1032 || ((sd->flags & SD_WAKE_IDLE_FAR)
1032 && !task_hot(p, task_rq(p)->clock, sd))) { 1033 && !task_hot(p, task_rq(p)->clock, sd))) {
1033 cpus_and(tmp, sd->span, p->cpus_allowed); 1034 cpus_and(tmp, sd->span, p->cpus_allowed);
1035 cpus_and(tmp, tmp, cpu_active_map);
1034 for_each_cpu_mask_nr(i, tmp) { 1036 for_each_cpu_mask_nr(i, tmp) {
1035 if (idle_cpu(i)) { 1037 if (idle_cpu(i)) {
1036 if (i != task_cpu(p)) { 1038 if (i != task_cpu(p)) {
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 7c9614728c59..f85a76363eee 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -505,7 +505,9 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
505 rt_rq->rt_nr_running++; 505 rt_rq->rt_nr_running++;
506#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 506#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
507 if (rt_se_prio(rt_se) < rt_rq->highest_prio) { 507 if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
508#ifdef CONFIG_SMP
508 struct rq *rq = rq_of_rt_rq(rt_rq); 509 struct rq *rq = rq_of_rt_rq(rt_rq);
510#endif
509 511
510 rt_rq->highest_prio = rt_se_prio(rt_se); 512 rt_rq->highest_prio = rt_se_prio(rt_se);
511#ifdef CONFIG_SMP 513#ifdef CONFIG_SMP
@@ -599,11 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
599 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 601 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
600 return; 602 return;
601 603
602 if (rt_se->nr_cpus_allowed == 1) 604 list_add_tail(&rt_se->run_list, queue);
603 list_add(&rt_se->run_list, queue);
604 else
605 list_add_tail(&rt_se->run_list, queue);
606
607 __set_bit(rt_se_prio(rt_se), array->bitmap); 605 __set_bit(rt_se_prio(rt_se), array->bitmap);
608 606
609 inc_rt_tasks(rt_se, rt_rq); 607 inc_rt_tasks(rt_se, rt_rq);
@@ -688,32 +686,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
688 * Put task to the end of the run list without the overhead of dequeue 686 * Put task to the end of the run list without the overhead of dequeue
689 * followed by enqueue. 687 * followed by enqueue.
690 */ 688 */
691static 689static void
692void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 690requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
693{ 691{
694 struct rt_prio_array *array = &rt_rq->active;
695
696 if (on_rt_rq(rt_se)) { 692 if (on_rt_rq(rt_se)) {
697 list_del_init(&rt_se->run_list); 693 struct rt_prio_array *array = &rt_rq->active;
698 list_add_tail(&rt_se->run_list, 694 struct list_head *queue = array->queue + rt_se_prio(rt_se);
699 array->queue + rt_se_prio(rt_se)); 695
696 if (head)
697 list_move(&rt_se->run_list, queue);
698 else
699 list_move_tail(&rt_se->run_list, queue);
700 } 700 }
701} 701}
702 702
703static void requeue_task_rt(struct rq *rq, struct task_struct *p) 703static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
704{ 704{
705 struct sched_rt_entity *rt_se = &p->rt; 705 struct sched_rt_entity *rt_se = &p->rt;
706 struct rt_rq *rt_rq; 706 struct rt_rq *rt_rq;
707 707
708 for_each_sched_rt_entity(rt_se) { 708 for_each_sched_rt_entity(rt_se) {
709 rt_rq = rt_rq_of_se(rt_se); 709 rt_rq = rt_rq_of_se(rt_se);
710 requeue_rt_entity(rt_rq, rt_se); 710 requeue_rt_entity(rt_rq, rt_se, head);
711 } 711 }
712} 712}
713 713
714static void yield_task_rt(struct rq *rq) 714static void yield_task_rt(struct rq *rq)
715{ 715{
716 requeue_task_rt(rq, rq->curr); 716 requeue_task_rt(rq, rq->curr, 0);
717} 717}
718 718
719#ifdef CONFIG_SMP 719#ifdef CONFIG_SMP
@@ -753,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
753 */ 753 */
754 return task_cpu(p); 754 return task_cpu(p);
755} 755}
756
757static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
758{
759 cpumask_t mask;
760
761 if (rq->curr->rt.nr_cpus_allowed == 1)
762 return;
763
764 if (p->rt.nr_cpus_allowed != 1
765 && cpupri_find(&rq->rd->cpupri, p, &mask))
766 return;
767
768 if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
769 return;
770
771 /*
772 * There appears to be other cpus that can accept
773 * current and none to run 'p', so lets reschedule
774 * to try and push current away:
775 */
776 requeue_task_rt(rq, p, 1);
777 resched_task(rq->curr);
778}
779
756#endif /* CONFIG_SMP */ 780#endif /* CONFIG_SMP */
757 781
758/* 782/*
@@ -778,18 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
778 * to move current somewhere else, making room for our non-migratable 802 * to move current somewhere else, making room for our non-migratable
779 * task. 803 * task.
780 */ 804 */
781 if((p->prio == rq->curr->prio) 805 if (p->prio == rq->curr->prio && !need_resched())
782 && p->rt.nr_cpus_allowed == 1 806 check_preempt_equal_prio(rq, p);
783 && rq->curr->rt.nr_cpus_allowed != 1) {
784 cpumask_t mask;
785
786 if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
787 /*
788 * There appears to be other cpus that can accept
789 * current, so lets reschedule to try and push it away
790 */
791 resched_task(rq->curr);
792 }
793#endif 807#endif
794} 808}
795 809
@@ -922,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task)
922 return -1; /* No targets found */ 936 return -1; /* No targets found */
923 937
924 /* 938 /*
939 * Only consider CPUs that are usable for migration.
940 * I guess we might want to change cpupri_find() to ignore those
941 * in the first place.
942 */
943 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
944
945 /*
925 * At this point we have built a mask of cpus representing the 946 * At this point we have built a mask of cpus representing the
926 * lowest priority tasks in the system. Now we want to elect 947 * lowest priority tasks in the system. Now we want to elect
927 * the best one based on our affinity and topology. 948 * the best one based on our affinity and topology.
@@ -1415,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1415 * on the queue: 1436 * on the queue:
1416 */ 1437 */
1417 if (p->rt.run_list.prev != p->rt.run_list.next) { 1438 if (p->rt.run_list.prev != p->rt.run_list.next) {
1418 requeue_task_rt(rq, p); 1439 requeue_task_rt(rq, p, 0);
1419 set_tsk_need_resched(p); 1440 set_tsk_need_resched(p);
1420 } 1441 }
1421} 1442}