diff options
-rw-r--r-- | Documentation/scheduler/sched-domains.txt | 7 | ||||
-rw-r--r-- | Documentation/scheduler/sched-rt-group.txt | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 39 | ||||
-rw-r--r-- | kernel/Makefile | 5 | ||||
-rw-r--r-- | kernel/cpu.c | 24 | ||||
-rw-r--r-- | kernel/cpuset.c | 14 | ||||
-rw-r--r-- | kernel/kthread.c | 1 | ||||
-rw-r--r-- | kernel/sched.c | 247 | ||||
-rw-r--r-- | kernel/sched_cpupri.c | 174 | ||||
-rw-r--r-- | kernel/sched_cpupri.h | 36 | ||||
-rw-r--r-- | kernel/sched_debug.c | 40 | ||||
-rw-r--r-- | kernel/sched_fair.c | 19 | ||||
-rw-r--r-- | kernel/sched_features.h | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 395 |
14 files changed, 718 insertions, 289 deletions
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt index a9e990ab980f..373ceacc367e 100644 --- a/Documentation/scheduler/sched-domains.txt +++ b/Documentation/scheduler/sched-domains.txt | |||
@@ -61,10 +61,7 @@ builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your | |||
61 | arch_init_sched_domains function. This function will attach domains to all | 61 | arch_init_sched_domains function. This function will attach domains to all |
62 | CPUs using cpu_attach_domain. | 62 | CPUs using cpu_attach_domain. |
63 | 63 | ||
64 | Implementors should change the line | 64 | The sched-domains debugging infrastructure can be enabled by enabling |
65 | #undef SCHED_DOMAIN_DEBUG | 65 | CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains |
66 | to | ||
67 | #define SCHED_DOMAIN_DEBUG | ||
68 | in kernel/sched.c as this enables an error checking parse of the sched domains | ||
69 | which should catch most possible errors (described above). It also prints out | 66 | which should catch most possible errors (described above). It also prints out |
70 | the domain structure in a visual format. | 67 | the domain structure in a visual format. |
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 14f901f639ee..3ef339f491e0 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt | |||
@@ -51,9 +51,9 @@ needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s = | |||
51 | 0.00015s. So this group can be scheduled with a period of 0.005s and a run time | 51 | 0.00015s. So this group can be scheduled with a period of 0.005s and a run time |
52 | of 0.00015s. | 52 | of 0.00015s. |
53 | 53 | ||
54 | The remaining CPU time will be used for user input and other tass. Because | 54 | The remaining CPU time will be used for user input and other tasks. Because |
55 | realtime tasks have explicitly allocated the CPU time they need to perform | 55 | realtime tasks have explicitly allocated the CPU time they need to perform |
56 | their tasks, buffer underruns in the graphocs or audio can be eliminated. | 56 | their tasks, buffer underruns in the graphics or audio can be eliminated. |
57 | 57 | ||
58 | NOTE: the above example is not fully implemented as of yet (2.6.25). We still | 58 | NOTE: the above example is not fully implemented as of yet (2.6.25). We still |
59 | lack an EDF scheduler to make non-uniform periods usable. | 59 | lack an EDF scheduler to make non-uniform periods usable. |
diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8d..eaf821072dbd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -134,7 +134,6 @@ extern unsigned long nr_running(void); | |||
134 | extern unsigned long nr_uninterruptible(void); | 134 | extern unsigned long nr_uninterruptible(void); |
135 | extern unsigned long nr_active(void); | 135 | extern unsigned long nr_active(void); |
136 | extern unsigned long nr_iowait(void); | 136 | extern unsigned long nr_iowait(void); |
137 | extern unsigned long weighted_cpuload(const int cpu); | ||
138 | 137 | ||
139 | struct seq_file; | 138 | struct seq_file; |
140 | struct cfs_rq; | 139 | struct cfs_rq; |
@@ -823,23 +822,6 @@ extern int arch_reinit_sched_domains(void); | |||
823 | 822 | ||
824 | #endif /* CONFIG_SMP */ | 823 | #endif /* CONFIG_SMP */ |
825 | 824 | ||
826 | /* | ||
827 | * A runqueue laden with a single nice 0 task scores a weighted_cpuload of | ||
828 | * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a | ||
829 | * task of nice 0 or enough lower priority tasks to bring up the | ||
830 | * weighted_cpuload | ||
831 | */ | ||
832 | static inline int above_background_load(void) | ||
833 | { | ||
834 | unsigned long cpu; | ||
835 | |||
836 | for_each_online_cpu(cpu) { | ||
837 | if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) | ||
838 | return 1; | ||
839 | } | ||
840 | return 0; | ||
841 | } | ||
842 | |||
843 | struct io_context; /* See blkdev.h */ | 825 | struct io_context; /* See blkdev.h */ |
844 | #define NGROUPS_SMALL 32 | 826 | #define NGROUPS_SMALL 32 |
845 | #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) | 827 | #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) |
@@ -921,8 +903,8 @@ struct sched_class { | |||
921 | void (*set_cpus_allowed)(struct task_struct *p, | 903 | void (*set_cpus_allowed)(struct task_struct *p, |
922 | const cpumask_t *newmask); | 904 | const cpumask_t *newmask); |
923 | 905 | ||
924 | void (*join_domain)(struct rq *rq); | 906 | void (*rq_online)(struct rq *rq); |
925 | void (*leave_domain)(struct rq *rq); | 907 | void (*rq_offline)(struct rq *rq); |
926 | 908 | ||
927 | void (*switched_from) (struct rq *this_rq, struct task_struct *task, | 909 | void (*switched_from) (struct rq *this_rq, struct task_struct *task, |
928 | int running); | 910 | int running); |
@@ -1039,6 +1021,7 @@ struct task_struct { | |||
1039 | #endif | 1021 | #endif |
1040 | 1022 | ||
1041 | int prio, static_prio, normal_prio; | 1023 | int prio, static_prio, normal_prio; |
1024 | unsigned int rt_priority; | ||
1042 | const struct sched_class *sched_class; | 1025 | const struct sched_class *sched_class; |
1043 | struct sched_entity se; | 1026 | struct sched_entity se; |
1044 | struct sched_rt_entity rt; | 1027 | struct sched_rt_entity rt; |
@@ -1122,7 +1105,6 @@ struct task_struct { | |||
1122 | int __user *set_child_tid; /* CLONE_CHILD_SETTID */ | 1105 | int __user *set_child_tid; /* CLONE_CHILD_SETTID */ |
1123 | int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ | 1106 | int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ |
1124 | 1107 | ||
1125 | unsigned int rt_priority; | ||
1126 | cputime_t utime, stime, utimescaled, stimescaled; | 1108 | cputime_t utime, stime, utimescaled, stimescaled; |
1127 | cputime_t gtime; | 1109 | cputime_t gtime; |
1128 | cputime_t prev_utime, prev_stime; | 1110 | cputime_t prev_utime, prev_stime; |
@@ -1141,12 +1123,12 @@ struct task_struct { | |||
1141 | gid_t gid,egid,sgid,fsgid; | 1123 | gid_t gid,egid,sgid,fsgid; |
1142 | struct group_info *group_info; | 1124 | struct group_info *group_info; |
1143 | kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; | 1125 | kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; |
1144 | unsigned securebits; | ||
1145 | struct user_struct *user; | 1126 | struct user_struct *user; |
1127 | unsigned securebits; | ||
1146 | #ifdef CONFIG_KEYS | 1128 | #ifdef CONFIG_KEYS |
1129 | unsigned char jit_keyring; /* default keyring to attach requested keys to */ | ||
1147 | struct key *request_key_auth; /* assumed request_key authority */ | 1130 | struct key *request_key_auth; /* assumed request_key authority */ |
1148 | struct key *thread_keyring; /* keyring private to this thread */ | 1131 | struct key *thread_keyring; /* keyring private to this thread */ |
1149 | unsigned char jit_keyring; /* default keyring to attach requested keys to */ | ||
1150 | #endif | 1132 | #endif |
1151 | char comm[TASK_COMM_LEN]; /* executable name excluding path | 1133 | char comm[TASK_COMM_LEN]; /* executable name excluding path |
1152 | - access with [gs]et_task_comm (which lock | 1134 | - access with [gs]et_task_comm (which lock |
@@ -1233,8 +1215,8 @@ struct task_struct { | |||
1233 | # define MAX_LOCK_DEPTH 48UL | 1215 | # define MAX_LOCK_DEPTH 48UL |
1234 | u64 curr_chain_key; | 1216 | u64 curr_chain_key; |
1235 | int lockdep_depth; | 1217 | int lockdep_depth; |
1236 | struct held_lock held_locks[MAX_LOCK_DEPTH]; | ||
1237 | unsigned int lockdep_recursion; | 1218 | unsigned int lockdep_recursion; |
1219 | struct held_lock held_locks[MAX_LOCK_DEPTH]; | ||
1238 | #endif | 1220 | #endif |
1239 | 1221 | ||
1240 | /* journalling filesystem info */ | 1222 | /* journalling filesystem info */ |
@@ -1262,10 +1244,6 @@ struct task_struct { | |||
1262 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ | 1244 | u64 acct_vm_mem1; /* accumulated virtual memory usage */ |
1263 | cputime_t acct_stimexpd;/* stime since last update */ | 1245 | cputime_t acct_stimexpd;/* stime since last update */ |
1264 | #endif | 1246 | #endif |
1265 | #ifdef CONFIG_NUMA | ||
1266 | struct mempolicy *mempolicy; | ||
1267 | short il_next; | ||
1268 | #endif | ||
1269 | #ifdef CONFIG_CPUSETS | 1247 | #ifdef CONFIG_CPUSETS |
1270 | nodemask_t mems_allowed; | 1248 | nodemask_t mems_allowed; |
1271 | int cpuset_mems_generation; | 1249 | int cpuset_mems_generation; |
@@ -1285,6 +1263,10 @@ struct task_struct { | |||
1285 | struct list_head pi_state_list; | 1263 | struct list_head pi_state_list; |
1286 | struct futex_pi_state *pi_state_cache; | 1264 | struct futex_pi_state *pi_state_cache; |
1287 | #endif | 1265 | #endif |
1266 | #ifdef CONFIG_NUMA | ||
1267 | struct mempolicy *mempolicy; | ||
1268 | short il_next; | ||
1269 | #endif | ||
1288 | atomic_t fs_excl; /* holding fs exclusive resources */ | 1270 | atomic_t fs_excl; /* holding fs exclusive resources */ |
1289 | struct rcu_head rcu; | 1271 | struct rcu_head rcu; |
1290 | 1272 | ||
@@ -1504,6 +1486,7 @@ static inline void put_task_struct(struct task_struct *t) | |||
1504 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ | 1486 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ |
1505 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ | 1487 | #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ |
1506 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ | 1488 | #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ |
1489 | #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ | ||
1507 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | 1490 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ |
1508 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ | 1491 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ |
1509 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ | 1492 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ |
diff --git a/kernel/Makefile b/kernel/Makefile index 1c9938addb9d..6c55301112e0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ | 5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ |
6 | exit.o itimer.o time.o softirq.o resource.o \ | 6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o capability.o ptrace.o timer.o user.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
@@ -27,7 +27,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |||
27 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | 27 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
28 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o | 28 | obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o |
29 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o | 29 | obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o |
30 | obj-$(CONFIG_SMP) += cpu.o spinlock.o | 30 | obj-$(CONFIG_SMP) += spinlock.o |
31 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | 31 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o |
32 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 32 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
33 | obj-$(CONFIG_UID16) += uid16.o | 33 | obj-$(CONFIG_UID16) += uid16.o |
@@ -69,6 +69,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | |||
69 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 69 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
70 | obj-$(CONFIG_MARKERS) += marker.o | 70 | obj-$(CONFIG_MARKERS) += marker.o |
71 | obj-$(CONFIG_LATENCYTOP) += latencytop.o | 71 | obj-$(CONFIG_LATENCYTOP) += latencytop.o |
72 | obj-$(CONFIG_SMP) += sched_cpupri.o | ||
72 | 73 | ||
73 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) | 74 | ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) |
74 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 75 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/cpu.c b/kernel/cpu.c index c77bc3a1c722..b11f06dc149a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -15,6 +15,28 @@ | |||
15 | #include <linux/stop_machine.h> | 15 | #include <linux/stop_machine.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | 17 | ||
18 | /* | ||
19 | * Represents all cpu's present in the system | ||
20 | * In systems capable of hotplug, this map could dynamically grow | ||
21 | * as new cpu's are detected in the system via any platform specific | ||
22 | * method, such as ACPI for e.g. | ||
23 | */ | ||
24 | cpumask_t cpu_present_map __read_mostly; | ||
25 | EXPORT_SYMBOL(cpu_present_map); | ||
26 | |||
27 | #ifndef CONFIG_SMP | ||
28 | |||
29 | /* | ||
30 | * Represents all cpu's that are currently online. | ||
31 | */ | ||
32 | cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; | ||
33 | EXPORT_SYMBOL(cpu_online_map); | ||
34 | |||
35 | cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; | ||
36 | EXPORT_SYMBOL(cpu_possible_map); | ||
37 | |||
38 | #else /* CONFIG_SMP */ | ||
39 | |||
18 | /* Serializes the updates to cpu_online_map, cpu_present_map */ | 40 | /* Serializes the updates to cpu_online_map, cpu_present_map */ |
19 | static DEFINE_MUTEX(cpu_add_remove_lock); | 41 | static DEFINE_MUTEX(cpu_add_remove_lock); |
20 | 42 | ||
@@ -403,3 +425,5 @@ out: | |||
403 | cpu_maps_update_done(); | 425 | cpu_maps_update_done(); |
404 | } | 426 | } |
405 | #endif /* CONFIG_PM_SLEEP_SMP */ | 427 | #endif /* CONFIG_PM_SLEEP_SMP */ |
428 | |||
429 | #endif /* CONFIG_SMP */ | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9fceb97e989c..64a05da9bc4c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1194,6 +1194,15 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, | |||
1194 | 1194 | ||
1195 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1195 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
1196 | return -ENOSPC; | 1196 | return -ENOSPC; |
1197 | if (tsk->flags & PF_THREAD_BOUND) { | ||
1198 | cpumask_t mask; | ||
1199 | |||
1200 | mutex_lock(&callback_mutex); | ||
1201 | mask = cs->cpus_allowed; | ||
1202 | mutex_unlock(&callback_mutex); | ||
1203 | if (!cpus_equal(tsk->cpus_allowed, mask)) | ||
1204 | return -EINVAL; | ||
1205 | } | ||
1197 | 1206 | ||
1198 | return security_task_setscheduler(tsk, 0, NULL); | 1207 | return security_task_setscheduler(tsk, 0, NULL); |
1199 | } | 1208 | } |
@@ -1207,11 +1216,14 @@ static void cpuset_attach(struct cgroup_subsys *ss, | |||
1207 | struct mm_struct *mm; | 1216 | struct mm_struct *mm; |
1208 | struct cpuset *cs = cgroup_cs(cont); | 1217 | struct cpuset *cs = cgroup_cs(cont); |
1209 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1218 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1219 | int err; | ||
1210 | 1220 | ||
1211 | mutex_lock(&callback_mutex); | 1221 | mutex_lock(&callback_mutex); |
1212 | guarantee_online_cpus(cs, &cpus); | 1222 | guarantee_online_cpus(cs, &cpus); |
1213 | set_cpus_allowed_ptr(tsk, &cpus); | 1223 | err = set_cpus_allowed_ptr(tsk, &cpus); |
1214 | mutex_unlock(&callback_mutex); | 1224 | mutex_unlock(&callback_mutex); |
1225 | if (err) | ||
1226 | return; | ||
1215 | 1227 | ||
1216 | from = oldcs->mems_allowed; | 1228 | from = oldcs->mems_allowed; |
1217 | to = cs->mems_allowed; | 1229 | to = cs->mems_allowed; |
diff --git a/kernel/kthread.c b/kernel/kthread.c index bd1b9ea024e1..97747cdd37c9 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -180,6 +180,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu) | |||
180 | set_task_cpu(k, cpu); | 180 | set_task_cpu(k, cpu); |
181 | k->cpus_allowed = cpumask_of_cpu(cpu); | 181 | k->cpus_allowed = cpumask_of_cpu(cpu); |
182 | k->rt.nr_cpus_allowed = 1; | 182 | k->rt.nr_cpus_allowed = 1; |
183 | k->flags |= PF_THREAD_BOUND; | ||
183 | } | 184 | } |
184 | EXPORT_SYMBOL(kthread_bind); | 185 | EXPORT_SYMBOL(kthread_bind); |
185 | 186 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 3aaa5c8cb421..c51d9fae8cd8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -74,6 +74,8 @@ | |||
74 | #include <asm/tlb.h> | 74 | #include <asm/tlb.h> |
75 | #include <asm/irq_regs.h> | 75 | #include <asm/irq_regs.h> |
76 | 76 | ||
77 | #include "sched_cpupri.h" | ||
78 | |||
77 | /* | 79 | /* |
78 | * Convert user-nice values [ -20 ... 0 ... 19 ] | 80 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
79 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | 81 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], |
@@ -289,15 +291,15 @@ struct task_group root_task_group; | |||
289 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | 291 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); |
290 | /* Default task group's cfs_rq on each cpu */ | 292 | /* Default task group's cfs_rq on each cpu */ |
291 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | 293 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; |
292 | #endif | 294 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
293 | 295 | ||
294 | #ifdef CONFIG_RT_GROUP_SCHED | 296 | #ifdef CONFIG_RT_GROUP_SCHED |
295 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 297 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
296 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | 298 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; |
297 | #endif | 299 | #endif /* CONFIG_RT_GROUP_SCHED */ |
298 | #else | 300 | #else /* !CONFIG_FAIR_GROUP_SCHED */ |
299 | #define root_task_group init_task_group | 301 | #define root_task_group init_task_group |
300 | #endif | 302 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
301 | 303 | ||
302 | /* task_group_lock serializes add/remove of task groups and also changes to | 304 | /* task_group_lock serializes add/remove of task groups and also changes to |
303 | * a task group's cpu shares. | 305 | * a task group's cpu shares. |
@@ -307,9 +309,9 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
307 | #ifdef CONFIG_FAIR_GROUP_SCHED | 309 | #ifdef CONFIG_FAIR_GROUP_SCHED |
308 | #ifdef CONFIG_USER_SCHED | 310 | #ifdef CONFIG_USER_SCHED |
309 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 311 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
310 | #else | 312 | #else /* !CONFIG_USER_SCHED */ |
311 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | 313 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD |
312 | #endif | 314 | #endif /* CONFIG_USER_SCHED */ |
313 | 315 | ||
314 | /* | 316 | /* |
315 | * A weight of 0 or 1 can cause arithmetics problems. | 317 | * A weight of 0 or 1 can cause arithmetics problems. |
@@ -452,6 +454,9 @@ struct root_domain { | |||
452 | */ | 454 | */ |
453 | cpumask_t rto_mask; | 455 | cpumask_t rto_mask; |
454 | atomic_t rto_count; | 456 | atomic_t rto_count; |
457 | #ifdef CONFIG_SMP | ||
458 | struct cpupri cpupri; | ||
459 | #endif | ||
455 | }; | 460 | }; |
456 | 461 | ||
457 | /* | 462 | /* |
@@ -526,6 +531,7 @@ struct rq { | |||
526 | int push_cpu; | 531 | int push_cpu; |
527 | /* cpu of this runqueue: */ | 532 | /* cpu of this runqueue: */ |
528 | int cpu; | 533 | int cpu; |
534 | int online; | ||
529 | 535 | ||
530 | struct task_struct *migration_thread; | 536 | struct task_struct *migration_thread; |
531 | struct list_head migration_queue; | 537 | struct list_head migration_queue; |
@@ -1313,15 +1319,15 @@ void wake_up_idle_cpu(int cpu) | |||
1313 | if (!tsk_is_polling(rq->idle)) | 1319 | if (!tsk_is_polling(rq->idle)) |
1314 | smp_send_reschedule(cpu); | 1320 | smp_send_reschedule(cpu); |
1315 | } | 1321 | } |
1316 | #endif | 1322 | #endif /* CONFIG_NO_HZ */ |
1317 | 1323 | ||
1318 | #else | 1324 | #else /* !CONFIG_SMP */ |
1319 | static void __resched_task(struct task_struct *p, int tif_bit) | 1325 | static void __resched_task(struct task_struct *p, int tif_bit) |
1320 | { | 1326 | { |
1321 | assert_spin_locked(&task_rq(p)->lock); | 1327 | assert_spin_locked(&task_rq(p)->lock); |
1322 | set_tsk_thread_flag(p, tif_bit); | 1328 | set_tsk_thread_flag(p, tif_bit); |
1323 | } | 1329 | } |
1324 | #endif | 1330 | #endif /* CONFIG_SMP */ |
1325 | 1331 | ||
1326 | #if BITS_PER_LONG == 32 | 1332 | #if BITS_PER_LONG == 32 |
1327 | # define WMULT_CONST (~0UL) | 1333 | # define WMULT_CONST (~0UL) |
@@ -1481,16 +1487,8 @@ static unsigned long source_load(int cpu, int type); | |||
1481 | static unsigned long target_load(int cpu, int type); | 1487 | static unsigned long target_load(int cpu, int type); |
1482 | static unsigned long cpu_avg_load_per_task(int cpu); | 1488 | static unsigned long cpu_avg_load_per_task(int cpu); |
1483 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1489 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1484 | #else /* CONFIG_SMP */ | ||
1485 | |||
1486 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1487 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
1488 | { | ||
1489 | } | ||
1490 | #endif | 1490 | #endif |
1491 | 1491 | ||
1492 | #endif /* CONFIG_SMP */ | ||
1493 | |||
1494 | #include "sched_stats.h" | 1492 | #include "sched_stats.h" |
1495 | #include "sched_idletask.c" | 1493 | #include "sched_idletask.c" |
1496 | #include "sched_fair.c" | 1494 | #include "sched_fair.c" |
@@ -1500,6 +1498,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1500 | #endif | 1498 | #endif |
1501 | 1499 | ||
1502 | #define sched_class_highest (&rt_sched_class) | 1500 | #define sched_class_highest (&rt_sched_class) |
1501 | #define for_each_class(class) \ | ||
1502 | for (class = sched_class_highest; class; class = class->next) | ||
1503 | 1503 | ||
1504 | static inline void inc_load(struct rq *rq, const struct task_struct *p) | 1504 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
1505 | { | 1505 | { |
@@ -1636,12 +1636,6 @@ inline int task_curr(const struct task_struct *p) | |||
1636 | return cpu_curr(task_cpu(p)) == p; | 1636 | return cpu_curr(task_cpu(p)) == p; |
1637 | } | 1637 | } |
1638 | 1638 | ||
1639 | /* Used instead of source_load when we know the type == 0 */ | ||
1640 | unsigned long weighted_cpuload(const int cpu) | ||
1641 | { | ||
1642 | return cpu_rq(cpu)->load.weight; | ||
1643 | } | ||
1644 | |||
1645 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | 1639 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) |
1646 | { | 1640 | { |
1647 | set_task_rq(p, cpu); | 1641 | set_task_rq(p, cpu); |
@@ -1670,6 +1664,12 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1670 | 1664 | ||
1671 | #ifdef CONFIG_SMP | 1665 | #ifdef CONFIG_SMP |
1672 | 1666 | ||
1667 | /* Used instead of source_load when we know the type == 0 */ | ||
1668 | static unsigned long weighted_cpuload(const int cpu) | ||
1669 | { | ||
1670 | return cpu_rq(cpu)->load.weight; | ||
1671 | } | ||
1672 | |||
1673 | /* | 1673 | /* |
1674 | * Is this task likely cache-hot: | 1674 | * Is this task likely cache-hot: |
1675 | */ | 1675 | */ |
@@ -2131,7 +2131,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2131 | } | 2131 | } |
2132 | } | 2132 | } |
2133 | } | 2133 | } |
2134 | #endif | 2134 | #endif /* CONFIG_SCHEDSTATS */ |
2135 | 2135 | ||
2136 | out_activate: | 2136 | out_activate: |
2137 | #endif /* CONFIG_SMP */ | 2137 | #endif /* CONFIG_SMP */ |
@@ -2331,7 +2331,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2331 | notifier->ops->sched_out(notifier, next); | 2331 | notifier->ops->sched_out(notifier, next); |
2332 | } | 2332 | } |
2333 | 2333 | ||
2334 | #else | 2334 | #else /* !CONFIG_PREEMPT_NOTIFIERS */ |
2335 | 2335 | ||
2336 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | 2336 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) |
2337 | { | 2337 | { |
@@ -2343,7 +2343,7 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
2343 | { | 2343 | { |
2344 | } | 2344 | } |
2345 | 2345 | ||
2346 | #endif | 2346 | #endif /* CONFIG_PREEMPT_NOTIFIERS */ |
2347 | 2347 | ||
2348 | /** | 2348 | /** |
2349 | * prepare_task_switch - prepare to switch tasks | 2349 | * prepare_task_switch - prepare to switch tasks |
@@ -3672,6 +3672,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3672 | /* Earliest time when we have to do rebalance again */ | 3672 | /* Earliest time when we have to do rebalance again */ |
3673 | unsigned long next_balance = jiffies + 60*HZ; | 3673 | unsigned long next_balance = jiffies + 60*HZ; |
3674 | int update_next_balance = 0; | 3674 | int update_next_balance = 0; |
3675 | int need_serialize; | ||
3675 | cpumask_t tmp; | 3676 | cpumask_t tmp; |
3676 | 3677 | ||
3677 | for_each_domain(cpu, sd) { | 3678 | for_each_domain(cpu, sd) { |
@@ -3689,8 +3690,9 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3689 | if (interval > HZ*NR_CPUS/10) | 3690 | if (interval > HZ*NR_CPUS/10) |
3690 | interval = HZ*NR_CPUS/10; | 3691 | interval = HZ*NR_CPUS/10; |
3691 | 3692 | ||
3693 | need_serialize = sd->flags & SD_SERIALIZE; | ||
3692 | 3694 | ||
3693 | if (sd->flags & SD_SERIALIZE) { | 3695 | if (need_serialize) { |
3694 | if (!spin_trylock(&balancing)) | 3696 | if (!spin_trylock(&balancing)) |
3695 | goto out; | 3697 | goto out; |
3696 | } | 3698 | } |
@@ -3706,7 +3708,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3706 | } | 3708 | } |
3707 | sd->last_balance = jiffies; | 3709 | sd->last_balance = jiffies; |
3708 | } | 3710 | } |
3709 | if (sd->flags & SD_SERIALIZE) | 3711 | if (need_serialize) |
3710 | spin_unlock(&balancing); | 3712 | spin_unlock(&balancing); |
3711 | out: | 3713 | out: |
3712 | if (time_after(next_balance, sd->last_balance + interval)) { | 3714 | if (time_after(next_balance, sd->last_balance + interval)) { |
@@ -4070,6 +4072,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
4070 | prev->comm, prev->pid, preempt_count()); | 4072 | prev->comm, prev->pid, preempt_count()); |
4071 | 4073 | ||
4072 | debug_show_held_locks(prev); | 4074 | debug_show_held_locks(prev); |
4075 | print_modules(); | ||
4073 | if (irqs_disabled()) | 4076 | if (irqs_disabled()) |
4074 | print_irqtrace_events(prev); | 4077 | print_irqtrace_events(prev); |
4075 | 4078 | ||
@@ -4143,7 +4146,7 @@ asmlinkage void __sched schedule(void) | |||
4143 | struct task_struct *prev, *next; | 4146 | struct task_struct *prev, *next; |
4144 | unsigned long *switch_count; | 4147 | unsigned long *switch_count; |
4145 | struct rq *rq; | 4148 | struct rq *rq; |
4146 | int cpu; | 4149 | int cpu, hrtick = sched_feat(HRTICK); |
4147 | 4150 | ||
4148 | need_resched: | 4151 | need_resched: |
4149 | preempt_disable(); | 4152 | preempt_disable(); |
@@ -4158,7 +4161,8 @@ need_resched_nonpreemptible: | |||
4158 | 4161 | ||
4159 | schedule_debug(prev); | 4162 | schedule_debug(prev); |
4160 | 4163 | ||
4161 | hrtick_clear(rq); | 4164 | if (hrtick) |
4165 | hrtick_clear(rq); | ||
4162 | 4166 | ||
4163 | /* | 4167 | /* |
4164 | * Do the rq-clock update outside the rq lock: | 4168 | * Do the rq-clock update outside the rq lock: |
@@ -4204,7 +4208,8 @@ need_resched_nonpreemptible: | |||
4204 | } else | 4208 | } else |
4205 | spin_unlock_irq(&rq->lock); | 4209 | spin_unlock_irq(&rq->lock); |
4206 | 4210 | ||
4207 | hrtick_set(rq); | 4211 | if (hrtick) |
4212 | hrtick_set(rq); | ||
4208 | 4213 | ||
4209 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 4214 | if (unlikely(reacquire_kernel_lock(current) < 0)) |
4210 | goto need_resched_nonpreemptible; | 4215 | goto need_resched_nonpreemptible; |
@@ -5070,24 +5075,6 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, | |||
5070 | return sched_setaffinity(pid, &new_mask); | 5075 | return sched_setaffinity(pid, &new_mask); |
5071 | } | 5076 | } |
5072 | 5077 | ||
5073 | /* | ||
5074 | * Represents all cpu's present in the system | ||
5075 | * In systems capable of hotplug, this map could dynamically grow | ||
5076 | * as new cpu's are detected in the system via any platform specific | ||
5077 | * method, such as ACPI for e.g. | ||
5078 | */ | ||
5079 | |||
5080 | cpumask_t cpu_present_map __read_mostly; | ||
5081 | EXPORT_SYMBOL(cpu_present_map); | ||
5082 | |||
5083 | #ifndef CONFIG_SMP | ||
5084 | cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; | ||
5085 | EXPORT_SYMBOL(cpu_online_map); | ||
5086 | |||
5087 | cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; | ||
5088 | EXPORT_SYMBOL(cpu_possible_map); | ||
5089 | #endif | ||
5090 | |||
5091 | long sched_getaffinity(pid_t pid, cpumask_t *mask) | 5078 | long sched_getaffinity(pid_t pid, cpumask_t *mask) |
5092 | { | 5079 | { |
5093 | struct task_struct *p; | 5080 | struct task_struct *p; |
@@ -5571,6 +5558,12 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) | |||
5571 | goto out; | 5558 | goto out; |
5572 | } | 5559 | } |
5573 | 5560 | ||
5561 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && | ||
5562 | !cpus_equal(p->cpus_allowed, *new_mask))) { | ||
5563 | ret = -EINVAL; | ||
5564 | goto out; | ||
5565 | } | ||
5566 | |||
5574 | if (p->sched_class->set_cpus_allowed) | 5567 | if (p->sched_class->set_cpus_allowed) |
5575 | p->sched_class->set_cpus_allowed(p, new_mask); | 5568 | p->sched_class->set_cpus_allowed(p, new_mask); |
5576 | else { | 5569 | else { |
@@ -6058,6 +6051,36 @@ static void unregister_sched_domain_sysctl(void) | |||
6058 | } | 6051 | } |
6059 | #endif | 6052 | #endif |
6060 | 6053 | ||
6054 | static void set_rq_online(struct rq *rq) | ||
6055 | { | ||
6056 | if (!rq->online) { | ||
6057 | const struct sched_class *class; | ||
6058 | |||
6059 | cpu_set(rq->cpu, rq->rd->online); | ||
6060 | rq->online = 1; | ||
6061 | |||
6062 | for_each_class(class) { | ||
6063 | if (class->rq_online) | ||
6064 | class->rq_online(rq); | ||
6065 | } | ||
6066 | } | ||
6067 | } | ||
6068 | |||
6069 | static void set_rq_offline(struct rq *rq) | ||
6070 | { | ||
6071 | if (rq->online) { | ||
6072 | const struct sched_class *class; | ||
6073 | |||
6074 | for_each_class(class) { | ||
6075 | if (class->rq_offline) | ||
6076 | class->rq_offline(rq); | ||
6077 | } | ||
6078 | |||
6079 | cpu_clear(rq->cpu, rq->rd->online); | ||
6080 | rq->online = 0; | ||
6081 | } | ||
6082 | } | ||
6083 | |||
6061 | /* | 6084 | /* |
6062 | * migration_call - callback that gets triggered when a CPU is added. | 6085 | * migration_call - callback that gets triggered when a CPU is added. |
6063 | * Here we can start up the necessary migration thread for the new CPU. | 6086 | * Here we can start up the necessary migration thread for the new CPU. |
@@ -6095,7 +6118,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6095 | spin_lock_irqsave(&rq->lock, flags); | 6118 | spin_lock_irqsave(&rq->lock, flags); |
6096 | if (rq->rd) { | 6119 | if (rq->rd) { |
6097 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | 6120 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); |
6098 | cpu_set(cpu, rq->rd->online); | 6121 | |
6122 | set_rq_online(rq); | ||
6099 | } | 6123 | } |
6100 | spin_unlock_irqrestore(&rq->lock, flags); | 6124 | spin_unlock_irqrestore(&rq->lock, flags); |
6101 | break; | 6125 | break; |
@@ -6156,7 +6180,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6156 | spin_lock_irqsave(&rq->lock, flags); | 6180 | spin_lock_irqsave(&rq->lock, flags); |
6157 | if (rq->rd) { | 6181 | if (rq->rd) { |
6158 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); | 6182 | BUG_ON(!cpu_isset(cpu, rq->rd->span)); |
6159 | cpu_clear(cpu, rq->rd->online); | 6183 | set_rq_offline(rq); |
6160 | } | 6184 | } |
6161 | spin_unlock_irqrestore(&rq->lock, flags); | 6185 | spin_unlock_irqrestore(&rq->lock, flags); |
6162 | break; | 6186 | break; |
@@ -6190,6 +6214,28 @@ void __init migration_init(void) | |||
6190 | 6214 | ||
6191 | #ifdef CONFIG_SCHED_DEBUG | 6215 | #ifdef CONFIG_SCHED_DEBUG |
6192 | 6216 | ||
6217 | static inline const char *sd_level_to_string(enum sched_domain_level lvl) | ||
6218 | { | ||
6219 | switch (lvl) { | ||
6220 | case SD_LV_NONE: | ||
6221 | return "NONE"; | ||
6222 | case SD_LV_SIBLING: | ||
6223 | return "SIBLING"; | ||
6224 | case SD_LV_MC: | ||
6225 | return "MC"; | ||
6226 | case SD_LV_CPU: | ||
6227 | return "CPU"; | ||
6228 | case SD_LV_NODE: | ||
6229 | return "NODE"; | ||
6230 | case SD_LV_ALLNODES: | ||
6231 | return "ALLNODES"; | ||
6232 | case SD_LV_MAX: | ||
6233 | return "MAX"; | ||
6234 | |||
6235 | } | ||
6236 | return "MAX"; | ||
6237 | } | ||
6238 | |||
6193 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 6239 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
6194 | cpumask_t *groupmask) | 6240 | cpumask_t *groupmask) |
6195 | { | 6241 | { |
@@ -6209,7 +6255,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6209 | return -1; | 6255 | return -1; |
6210 | } | 6256 | } |
6211 | 6257 | ||
6212 | printk(KERN_CONT "span %s\n", str); | 6258 | printk(KERN_CONT "span %s level %s\n", |
6259 | str, sd_level_to_string(sd->level)); | ||
6213 | 6260 | ||
6214 | if (!cpu_isset(cpu, sd->span)) { | 6261 | if (!cpu_isset(cpu, sd->span)) { |
6215 | printk(KERN_ERR "ERROR: domain->span does not contain " | 6262 | printk(KERN_ERR "ERROR: domain->span does not contain " |
@@ -6293,9 +6340,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
6293 | } | 6340 | } |
6294 | kfree(groupmask); | 6341 | kfree(groupmask); |
6295 | } | 6342 | } |
6296 | #else | 6343 | #else /* !CONFIG_SCHED_DEBUG */ |
6297 | # define sched_domain_debug(sd, cpu) do { } while (0) | 6344 | # define sched_domain_debug(sd, cpu) do { } while (0) |
6298 | #endif | 6345 | #endif /* CONFIG_SCHED_DEBUG */ |
6299 | 6346 | ||
6300 | static int sd_degenerate(struct sched_domain *sd) | 6347 | static int sd_degenerate(struct sched_domain *sd) |
6301 | { | 6348 | { |
@@ -6355,20 +6402,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
6355 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) | 6402 | static void rq_attach_root(struct rq *rq, struct root_domain *rd) |
6356 | { | 6403 | { |
6357 | unsigned long flags; | 6404 | unsigned long flags; |
6358 | const struct sched_class *class; | ||
6359 | 6405 | ||
6360 | spin_lock_irqsave(&rq->lock, flags); | 6406 | spin_lock_irqsave(&rq->lock, flags); |
6361 | 6407 | ||
6362 | if (rq->rd) { | 6408 | if (rq->rd) { |
6363 | struct root_domain *old_rd = rq->rd; | 6409 | struct root_domain *old_rd = rq->rd; |
6364 | 6410 | ||
6365 | for (class = sched_class_highest; class; class = class->next) { | 6411 | if (cpu_isset(rq->cpu, old_rd->online)) |
6366 | if (class->leave_domain) | 6412 | set_rq_offline(rq); |
6367 | class->leave_domain(rq); | ||
6368 | } | ||
6369 | 6413 | ||
6370 | cpu_clear(rq->cpu, old_rd->span); | 6414 | cpu_clear(rq->cpu, old_rd->span); |
6371 | cpu_clear(rq->cpu, old_rd->online); | ||
6372 | 6415 | ||
6373 | if (atomic_dec_and_test(&old_rd->refcount)) | 6416 | if (atomic_dec_and_test(&old_rd->refcount)) |
6374 | kfree(old_rd); | 6417 | kfree(old_rd); |
@@ -6379,12 +6422,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6379 | 6422 | ||
6380 | cpu_set(rq->cpu, rd->span); | 6423 | cpu_set(rq->cpu, rd->span); |
6381 | if (cpu_isset(rq->cpu, cpu_online_map)) | 6424 | if (cpu_isset(rq->cpu, cpu_online_map)) |
6382 | cpu_set(rq->cpu, rd->online); | 6425 | set_rq_online(rq); |
6383 | |||
6384 | for (class = sched_class_highest; class; class = class->next) { | ||
6385 | if (class->join_domain) | ||
6386 | class->join_domain(rq); | ||
6387 | } | ||
6388 | 6426 | ||
6389 | spin_unlock_irqrestore(&rq->lock, flags); | 6427 | spin_unlock_irqrestore(&rq->lock, flags); |
6390 | } | 6428 | } |
@@ -6395,6 +6433,8 @@ static void init_rootdomain(struct root_domain *rd) | |||
6395 | 6433 | ||
6396 | cpus_clear(rd->span); | 6434 | cpus_clear(rd->span); |
6397 | cpus_clear(rd->online); | 6435 | cpus_clear(rd->online); |
6436 | |||
6437 | cpupri_init(&rd->cpupri); | ||
6398 | } | 6438 | } |
6399 | 6439 | ||
6400 | static void init_defrootdomain(void) | 6440 | static void init_defrootdomain(void) |
@@ -6589,7 +6629,7 @@ static void sched_domain_node_span(int node, cpumask_t *span) | |||
6589 | cpus_or(*span, *span, *nodemask); | 6629 | cpus_or(*span, *span, *nodemask); |
6590 | } | 6630 | } |
6591 | } | 6631 | } |
6592 | #endif | 6632 | #endif /* CONFIG_NUMA */ |
6593 | 6633 | ||
6594 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | 6634 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; |
6595 | 6635 | ||
@@ -6608,7 +6648,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, | |||
6608 | *sg = &per_cpu(sched_group_cpus, cpu); | 6648 | *sg = &per_cpu(sched_group_cpus, cpu); |
6609 | return cpu; | 6649 | return cpu; |
6610 | } | 6650 | } |
6611 | #endif | 6651 | #endif /* CONFIG_SCHED_SMT */ |
6612 | 6652 | ||
6613 | /* | 6653 | /* |
6614 | * multi-core sched-domains: | 6654 | * multi-core sched-domains: |
@@ -6616,7 +6656,7 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, | |||
6616 | #ifdef CONFIG_SCHED_MC | 6656 | #ifdef CONFIG_SCHED_MC |
6617 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | 6657 | static DEFINE_PER_CPU(struct sched_domain, core_domains); |
6618 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); | 6658 | static DEFINE_PER_CPU(struct sched_group, sched_group_core); |
6619 | #endif | 6659 | #endif /* CONFIG_SCHED_MC */ |
6620 | 6660 | ||
6621 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | 6661 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) |
6622 | static int | 6662 | static int |
@@ -6718,7 +6758,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
6718 | sg = sg->next; | 6758 | sg = sg->next; |
6719 | } while (sg != group_head); | 6759 | } while (sg != group_head); |
6720 | } | 6760 | } |
6721 | #endif | 6761 | #endif /* CONFIG_NUMA */ |
6722 | 6762 | ||
6723 | #ifdef CONFIG_NUMA | 6763 | #ifdef CONFIG_NUMA |
6724 | /* Free memory allocated for various sched_group structures */ | 6764 | /* Free memory allocated for various sched_group structures */ |
@@ -6755,11 +6795,11 @@ next_sg: | |||
6755 | sched_group_nodes_bycpu[cpu] = NULL; | 6795 | sched_group_nodes_bycpu[cpu] = NULL; |
6756 | } | 6796 | } |
6757 | } | 6797 | } |
6758 | #else | 6798 | #else /* !CONFIG_NUMA */ |
6759 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) | 6799 | static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) |
6760 | { | 6800 | { |
6761 | } | 6801 | } |
6762 | #endif | 6802 | #endif /* CONFIG_NUMA */ |
6763 | 6803 | ||
6764 | /* | 6804 | /* |
6765 | * Initialize sched groups cpu_power. | 6805 | * Initialize sched groups cpu_power. |
@@ -7468,7 +7508,7 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | |||
7468 | #endif | 7508 | #endif |
7469 | return err; | 7509 | return err; |
7470 | } | 7510 | } |
7471 | #endif | 7511 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
7472 | 7512 | ||
7473 | /* | 7513 | /* |
7474 | * Force a reinitialization of the sched domains hierarchy. The domains | 7514 | * Force a reinitialization of the sched domains hierarchy. The domains |
@@ -7479,21 +7519,28 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | |||
7479 | static int update_sched_domains(struct notifier_block *nfb, | 7519 | static int update_sched_domains(struct notifier_block *nfb, |
7480 | unsigned long action, void *hcpu) | 7520 | unsigned long action, void *hcpu) |
7481 | { | 7521 | { |
7522 | int cpu = (int)(long)hcpu; | ||
7523 | |||
7482 | switch (action) { | 7524 | switch (action) { |
7483 | case CPU_UP_PREPARE: | ||
7484 | case CPU_UP_PREPARE_FROZEN: | ||
7485 | case CPU_DOWN_PREPARE: | 7525 | case CPU_DOWN_PREPARE: |
7486 | case CPU_DOWN_PREPARE_FROZEN: | 7526 | case CPU_DOWN_PREPARE_FROZEN: |
7527 | disable_runtime(cpu_rq(cpu)); | ||
7528 | /* fall-through */ | ||
7529 | case CPU_UP_PREPARE: | ||
7530 | case CPU_UP_PREPARE_FROZEN: | ||
7487 | detach_destroy_domains(&cpu_online_map); | 7531 | detach_destroy_domains(&cpu_online_map); |
7488 | free_sched_domains(); | 7532 | free_sched_domains(); |
7489 | return NOTIFY_OK; | 7533 | return NOTIFY_OK; |
7490 | 7534 | ||
7491 | case CPU_UP_CANCELED: | 7535 | |
7492 | case CPU_UP_CANCELED_FROZEN: | ||
7493 | case CPU_DOWN_FAILED: | 7536 | case CPU_DOWN_FAILED: |
7494 | case CPU_DOWN_FAILED_FROZEN: | 7537 | case CPU_DOWN_FAILED_FROZEN: |
7495 | case CPU_ONLINE: | 7538 | case CPU_ONLINE: |
7496 | case CPU_ONLINE_FROZEN: | 7539 | case CPU_ONLINE_FROZEN: |
7540 | enable_runtime(cpu_rq(cpu)); | ||
7541 | /* fall-through */ | ||
7542 | case CPU_UP_CANCELED: | ||
7543 | case CPU_UP_CANCELED_FROZEN: | ||
7497 | case CPU_DEAD: | 7544 | case CPU_DEAD: |
7498 | case CPU_DEAD_FROZEN: | 7545 | case CPU_DEAD_FROZEN: |
7499 | /* | 7546 | /* |
@@ -7693,8 +7740,8 @@ void __init sched_init(void) | |||
7693 | 7740 | ||
7694 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; | 7741 | root_task_group.cfs_rq = (struct cfs_rq **)ptr; |
7695 | ptr += nr_cpu_ids * sizeof(void **); | 7742 | ptr += nr_cpu_ids * sizeof(void **); |
7696 | #endif | 7743 | #endif /* CONFIG_USER_SCHED */ |
7697 | #endif | 7744 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
7698 | #ifdef CONFIG_RT_GROUP_SCHED | 7745 | #ifdef CONFIG_RT_GROUP_SCHED |
7699 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; | 7746 | init_task_group.rt_se = (struct sched_rt_entity **)ptr; |
7700 | ptr += nr_cpu_ids * sizeof(void **); | 7747 | ptr += nr_cpu_ids * sizeof(void **); |
@@ -7708,8 +7755,8 @@ void __init sched_init(void) | |||
7708 | 7755 | ||
7709 | root_task_group.rt_rq = (struct rt_rq **)ptr; | 7756 | root_task_group.rt_rq = (struct rt_rq **)ptr; |
7710 | ptr += nr_cpu_ids * sizeof(void **); | 7757 | ptr += nr_cpu_ids * sizeof(void **); |
7711 | #endif | 7758 | #endif /* CONFIG_USER_SCHED */ |
7712 | #endif | 7759 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7713 | } | 7760 | } |
7714 | 7761 | ||
7715 | #ifdef CONFIG_SMP | 7762 | #ifdef CONFIG_SMP |
@@ -7725,8 +7772,8 @@ void __init sched_init(void) | |||
7725 | #ifdef CONFIG_USER_SCHED | 7772 | #ifdef CONFIG_USER_SCHED |
7726 | init_rt_bandwidth(&root_task_group.rt_bandwidth, | 7773 | init_rt_bandwidth(&root_task_group.rt_bandwidth, |
7727 | global_rt_period(), RUNTIME_INF); | 7774 | global_rt_period(), RUNTIME_INF); |
7728 | #endif | 7775 | #endif /* CONFIG_USER_SCHED */ |
7729 | #endif | 7776 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7730 | 7777 | ||
7731 | #ifdef CONFIG_GROUP_SCHED | 7778 | #ifdef CONFIG_GROUP_SCHED |
7732 | list_add(&init_task_group.list, &task_groups); | 7779 | list_add(&init_task_group.list, &task_groups); |
@@ -7736,8 +7783,8 @@ void __init sched_init(void) | |||
7736 | INIT_LIST_HEAD(&root_task_group.children); | 7783 | INIT_LIST_HEAD(&root_task_group.children); |
7737 | init_task_group.parent = &root_task_group; | 7784 | init_task_group.parent = &root_task_group; |
7738 | list_add(&init_task_group.siblings, &root_task_group.children); | 7785 | list_add(&init_task_group.siblings, &root_task_group.children); |
7739 | #endif | 7786 | #endif /* CONFIG_USER_SCHED */ |
7740 | #endif | 7787 | #endif /* CONFIG_GROUP_SCHED */ |
7741 | 7788 | ||
7742 | for_each_possible_cpu(i) { | 7789 | for_each_possible_cpu(i) { |
7743 | struct rq *rq; | 7790 | struct rq *rq; |
@@ -7817,6 +7864,7 @@ void __init sched_init(void) | |||
7817 | rq->next_balance = jiffies; | 7864 | rq->next_balance = jiffies; |
7818 | rq->push_cpu = 0; | 7865 | rq->push_cpu = 0; |
7819 | rq->cpu = i; | 7866 | rq->cpu = i; |
7867 | rq->online = 0; | ||
7820 | rq->migration_thread = NULL; | 7868 | rq->migration_thread = NULL; |
7821 | INIT_LIST_HEAD(&rq->migration_queue); | 7869 | INIT_LIST_HEAD(&rq->migration_queue); |
7822 | rq_attach_root(rq, &def_root_domain); | 7870 | rq_attach_root(rq, &def_root_domain); |
@@ -8056,7 +8104,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | |||
8056 | { | 8104 | { |
8057 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | 8105 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); |
8058 | } | 8106 | } |
8059 | #else | 8107 | #else /* !CONFG_FAIR_GROUP_SCHED */ |
8060 | static inline void free_fair_sched_group(struct task_group *tg) | 8108 | static inline void free_fair_sched_group(struct task_group *tg) |
8061 | { | 8109 | { |
8062 | } | 8110 | } |
@@ -8074,7 +8122,7 @@ static inline void register_fair_sched_group(struct task_group *tg, int cpu) | |||
8074 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | 8122 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) |
8075 | { | 8123 | { |
8076 | } | 8124 | } |
8077 | #endif | 8125 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8078 | 8126 | ||
8079 | #ifdef CONFIG_RT_GROUP_SCHED | 8127 | #ifdef CONFIG_RT_GROUP_SCHED |
8080 | static void free_rt_sched_group(struct task_group *tg) | 8128 | static void free_rt_sched_group(struct task_group *tg) |
@@ -8145,7 +8193,7 @@ static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | |||
8145 | { | 8193 | { |
8146 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | 8194 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); |
8147 | } | 8195 | } |
8148 | #else | 8196 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8149 | static inline void free_rt_sched_group(struct task_group *tg) | 8197 | static inline void free_rt_sched_group(struct task_group *tg) |
8150 | { | 8198 | { |
8151 | } | 8199 | } |
@@ -8163,7 +8211,7 @@ static inline void register_rt_sched_group(struct task_group *tg, int cpu) | |||
8163 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | 8211 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) |
8164 | { | 8212 | { |
8165 | } | 8213 | } |
8166 | #endif | 8214 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8167 | 8215 | ||
8168 | #ifdef CONFIG_GROUP_SCHED | 8216 | #ifdef CONFIG_GROUP_SCHED |
8169 | static void free_sched_group(struct task_group *tg) | 8217 | static void free_sched_group(struct task_group *tg) |
@@ -8274,7 +8322,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8274 | 8322 | ||
8275 | task_rq_unlock(rq, &flags); | 8323 | task_rq_unlock(rq, &flags); |
8276 | } | 8324 | } |
8277 | #endif | 8325 | #endif /* CONFIG_GROUP_SCHED */ |
8278 | 8326 | ||
8279 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8327 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8280 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 8328 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
@@ -8374,7 +8422,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
8374 | #ifdef CONFIG_CGROUP_SCHED | 8422 | #ifdef CONFIG_CGROUP_SCHED |
8375 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8423 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
8376 | { | 8424 | { |
8377 | struct task_group *tgi, *parent = tg ? tg->parent : NULL; | 8425 | struct task_group *tgi, *parent = tg->parent; |
8378 | unsigned long total = 0; | 8426 | unsigned long total = 0; |
8379 | 8427 | ||
8380 | if (!parent) { | 8428 | if (!parent) { |
@@ -8398,7 +8446,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | |||
8398 | } | 8446 | } |
8399 | rcu_read_unlock(); | 8447 | rcu_read_unlock(); |
8400 | 8448 | ||
8401 | return total + to_ratio(period, runtime) < | 8449 | return total + to_ratio(period, runtime) <= |
8402 | to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), | 8450 | to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), |
8403 | parent->rt_bandwidth.rt_runtime); | 8451 | parent->rt_bandwidth.rt_runtime); |
8404 | } | 8452 | } |
@@ -8515,16 +8563,21 @@ long sched_group_rt_period(struct task_group *tg) | |||
8515 | 8563 | ||
8516 | static int sched_rt_global_constraints(void) | 8564 | static int sched_rt_global_constraints(void) |
8517 | { | 8565 | { |
8566 | struct task_group *tg = &root_task_group; | ||
8567 | u64 rt_runtime, rt_period; | ||
8518 | int ret = 0; | 8568 | int ret = 0; |
8519 | 8569 | ||
8570 | rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); | ||
8571 | rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
8572 | |||
8520 | mutex_lock(&rt_constraints_mutex); | 8573 | mutex_lock(&rt_constraints_mutex); |
8521 | if (!__rt_schedulable(NULL, 1, 0)) | 8574 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) |
8522 | ret = -EINVAL; | 8575 | ret = -EINVAL; |
8523 | mutex_unlock(&rt_constraints_mutex); | 8576 | mutex_unlock(&rt_constraints_mutex); |
8524 | 8577 | ||
8525 | return ret; | 8578 | return ret; |
8526 | } | 8579 | } |
8527 | #else | 8580 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8528 | static int sched_rt_global_constraints(void) | 8581 | static int sched_rt_global_constraints(void) |
8529 | { | 8582 | { |
8530 | unsigned long flags; | 8583 | unsigned long flags; |
@@ -8542,7 +8595,7 @@ static int sched_rt_global_constraints(void) | |||
8542 | 8595 | ||
8543 | return 0; | 8596 | return 0; |
8544 | } | 8597 | } |
8545 | #endif | 8598 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8546 | 8599 | ||
8547 | int sched_rt_handler(struct ctl_table *table, int write, | 8600 | int sched_rt_handler(struct ctl_table *table, int write, |
8548 | struct file *filp, void __user *buffer, size_t *lenp, | 8601 | struct file *filp, void __user *buffer, size_t *lenp, |
@@ -8650,7 +8703,7 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) | |||
8650 | 8703 | ||
8651 | return (u64) tg->shares; | 8704 | return (u64) tg->shares; |
8652 | } | 8705 | } |
8653 | #endif | 8706 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
8654 | 8707 | ||
8655 | #ifdef CONFIG_RT_GROUP_SCHED | 8708 | #ifdef CONFIG_RT_GROUP_SCHED |
8656 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, | 8709 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
@@ -8674,7 +8727,7 @@ static u64 cpu_rt_period_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
8674 | { | 8727 | { |
8675 | return sched_group_rt_period(cgroup_tg(cgrp)); | 8728 | return sched_group_rt_period(cgroup_tg(cgrp)); |
8676 | } | 8729 | } |
8677 | #endif | 8730 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8678 | 8731 | ||
8679 | static struct cftype cpu_files[] = { | 8732 | static struct cftype cpu_files[] = { |
8680 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8733 | #ifdef CONFIG_FAIR_GROUP_SCHED |
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c new file mode 100644 index 000000000000..52154fefab7e --- /dev/null +++ b/kernel/sched_cpupri.c | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * kernel/sched_cpupri.c | ||
3 | * | ||
4 | * CPU priority management | ||
5 | * | ||
6 | * Copyright (C) 2007-2008 Novell | ||
7 | * | ||
8 | * Author: Gregory Haskins <ghaskins@novell.com> | ||
9 | * | ||
10 | * This code tracks the priority of each CPU so that global migration | ||
11 | * decisions are easy to calculate. Each CPU can be in a state as follows: | ||
12 | * | ||
13 | * (INVALID), IDLE, NORMAL, RT1, ... RT99 | ||
14 | * | ||
15 | * going from the lowest priority to the highest. CPUs in the INVALID state | ||
16 | * are not eligible for routing. The system maintains this state with | ||
17 | * a 2 dimensional bitmap (the first for priority class, the second for cpus | ||
18 | * in that class). Therefore a typical application without affinity | ||
19 | * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit | ||
20 | * searches). For tasks with affinity restrictions, the algorithm has a | ||
21 | * worst case complexity of O(min(102, nr_domcpus)), though the scenario that | ||
22 | * yields the worst case search is fairly contrived. | ||
23 | * | ||
24 | * This program is free software; you can redistribute it and/or | ||
25 | * modify it under the terms of the GNU General Public License | ||
26 | * as published by the Free Software Foundation; version 2 | ||
27 | * of the License. | ||
28 | */ | ||
29 | |||
30 | #include "sched_cpupri.h" | ||
31 | |||
32 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | ||
33 | static int convert_prio(int prio) | ||
34 | { | ||
35 | int cpupri; | ||
36 | |||
37 | if (prio == CPUPRI_INVALID) | ||
38 | cpupri = CPUPRI_INVALID; | ||
39 | else if (prio == MAX_PRIO) | ||
40 | cpupri = CPUPRI_IDLE; | ||
41 | else if (prio >= MAX_RT_PRIO) | ||
42 | cpupri = CPUPRI_NORMAL; | ||
43 | else | ||
44 | cpupri = MAX_RT_PRIO - prio + 1; | ||
45 | |||
46 | return cpupri; | ||
47 | } | ||
48 | |||
49 | #define for_each_cpupri_active(array, idx) \ | ||
50 | for (idx = find_first_bit(array, CPUPRI_NR_PRIORITIES); \ | ||
51 | idx < CPUPRI_NR_PRIORITIES; \ | ||
52 | idx = find_next_bit(array, CPUPRI_NR_PRIORITIES, idx+1)) | ||
53 | |||
54 | /** | ||
55 | * cpupri_find - find the best (lowest-pri) CPU in the system | ||
56 | * @cp: The cpupri context | ||
57 | * @p: The task | ||
58 | * @lowest_mask: A mask to fill in with selected CPUs | ||
59 | * | ||
60 | * Note: This function returns the recommended CPUs as calculated during the | ||
61 | * current invokation. By the time the call returns, the CPUs may have in | ||
62 | * fact changed priorities any number of times. While not ideal, it is not | ||
63 | * an issue of correctness since the normal rebalancer logic will correct | ||
64 | * any discrepancies created by racing against the uncertainty of the current | ||
65 | * priority configuration. | ||
66 | * | ||
67 | * Returns: (int)bool - CPUs were found | ||
68 | */ | ||
69 | int cpupri_find(struct cpupri *cp, struct task_struct *p, | ||
70 | cpumask_t *lowest_mask) | ||
71 | { | ||
72 | int idx = 0; | ||
73 | int task_pri = convert_prio(p->prio); | ||
74 | |||
75 | for_each_cpupri_active(cp->pri_active, idx) { | ||
76 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | ||
77 | cpumask_t mask; | ||
78 | |||
79 | if (idx >= task_pri) | ||
80 | break; | ||
81 | |||
82 | cpus_and(mask, p->cpus_allowed, vec->mask); | ||
83 | |||
84 | if (cpus_empty(mask)) | ||
85 | continue; | ||
86 | |||
87 | *lowest_mask = mask; | ||
88 | return 1; | ||
89 | } | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | /** | ||
95 | * cpupri_set - update the cpu priority setting | ||
96 | * @cp: The cpupri context | ||
97 | * @cpu: The target cpu | ||
98 | * @pri: The priority (INVALID-RT99) to assign to this CPU | ||
99 | * | ||
100 | * Note: Assumes cpu_rq(cpu)->lock is locked | ||
101 | * | ||
102 | * Returns: (void) | ||
103 | */ | ||
104 | void cpupri_set(struct cpupri *cp, int cpu, int newpri) | ||
105 | { | ||
106 | int *currpri = &cp->cpu_to_pri[cpu]; | ||
107 | int oldpri = *currpri; | ||
108 | unsigned long flags; | ||
109 | |||
110 | newpri = convert_prio(newpri); | ||
111 | |||
112 | BUG_ON(newpri >= CPUPRI_NR_PRIORITIES); | ||
113 | |||
114 | if (newpri == oldpri) | ||
115 | return; | ||
116 | |||
117 | /* | ||
118 | * If the cpu was currently mapped to a different value, we | ||
119 | * first need to unmap the old value | ||
120 | */ | ||
121 | if (likely(oldpri != CPUPRI_INVALID)) { | ||
122 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; | ||
123 | |||
124 | spin_lock_irqsave(&vec->lock, flags); | ||
125 | |||
126 | vec->count--; | ||
127 | if (!vec->count) | ||
128 | clear_bit(oldpri, cp->pri_active); | ||
129 | cpu_clear(cpu, vec->mask); | ||
130 | |||
131 | spin_unlock_irqrestore(&vec->lock, flags); | ||
132 | } | ||
133 | |||
134 | if (likely(newpri != CPUPRI_INVALID)) { | ||
135 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; | ||
136 | |||
137 | spin_lock_irqsave(&vec->lock, flags); | ||
138 | |||
139 | cpu_set(cpu, vec->mask); | ||
140 | vec->count++; | ||
141 | if (vec->count == 1) | ||
142 | set_bit(newpri, cp->pri_active); | ||
143 | |||
144 | spin_unlock_irqrestore(&vec->lock, flags); | ||
145 | } | ||
146 | |||
147 | *currpri = newpri; | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * cpupri_init - initialize the cpupri structure | ||
152 | * @cp: The cpupri context | ||
153 | * | ||
154 | * Returns: (void) | ||
155 | */ | ||
156 | void cpupri_init(struct cpupri *cp) | ||
157 | { | ||
158 | int i; | ||
159 | |||
160 | memset(cp, 0, sizeof(*cp)); | ||
161 | |||
162 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { | ||
163 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; | ||
164 | |||
165 | spin_lock_init(&vec->lock); | ||
166 | vec->count = 0; | ||
167 | cpus_clear(vec->mask); | ||
168 | } | ||
169 | |||
170 | for_each_possible_cpu(i) | ||
171 | cp->cpu_to_pri[i] = CPUPRI_INVALID; | ||
172 | } | ||
173 | |||
174 | |||
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h new file mode 100644 index 000000000000..f25811b0f931 --- /dev/null +++ b/kernel/sched_cpupri.h | |||
@@ -0,0 +1,36 @@ | |||
1 | #ifndef _LINUX_CPUPRI_H | ||
2 | #define _LINUX_CPUPRI_H | ||
3 | |||
4 | #include <linux/sched.h> | ||
5 | |||
6 | #define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2) | ||
7 | #define CPUPRI_NR_PRI_WORDS BITS_TO_LONGS(CPUPRI_NR_PRIORITIES) | ||
8 | |||
9 | #define CPUPRI_INVALID -1 | ||
10 | #define CPUPRI_IDLE 0 | ||
11 | #define CPUPRI_NORMAL 1 | ||
12 | /* values 2-101 are RT priorities 0-99 */ | ||
13 | |||
14 | struct cpupri_vec { | ||
15 | spinlock_t lock; | ||
16 | int count; | ||
17 | cpumask_t mask; | ||
18 | }; | ||
19 | |||
20 | struct cpupri { | ||
21 | struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; | ||
22 | long pri_active[CPUPRI_NR_PRI_WORDS]; | ||
23 | int cpu_to_pri[NR_CPUS]; | ||
24 | }; | ||
25 | |||
26 | #ifdef CONFIG_SMP | ||
27 | int cpupri_find(struct cpupri *cp, | ||
28 | struct task_struct *p, cpumask_t *lowest_mask); | ||
29 | void cpupri_set(struct cpupri *cp, int cpu, int pri); | ||
30 | void cpupri_init(struct cpupri *cp); | ||
31 | #else | ||
32 | #define cpupri_set(cp, cpu, pri) do { } while (0) | ||
33 | #define cpupri_init() do { } while (0) | ||
34 | #endif | ||
35 | |||
36 | #endif /* _LINUX_CPUPRI_H */ | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 8bb713040ac9..8e077b9c91cb 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -119,9 +119,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
119 | struct sched_entity *last; | 119 | struct sched_entity *last; |
120 | unsigned long flags; | 120 | unsigned long flags; |
121 | 121 | ||
122 | #if !defined(CONFIG_CGROUP_SCHED) || !defined(CONFIG_USER_SCHED) | 122 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) |
123 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | ||
124 | #else | ||
125 | char path[128] = ""; | 123 | char path[128] = ""; |
126 | struct cgroup *cgroup = NULL; | 124 | struct cgroup *cgroup = NULL; |
127 | struct task_group *tg = cfs_rq->tg; | 125 | struct task_group *tg = cfs_rq->tg; |
@@ -133,6 +131,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
133 | cgroup_path(cgroup, path, sizeof(path)); | 131 | cgroup_path(cgroup, path, sizeof(path)); |
134 | 132 | ||
135 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); | 133 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); |
134 | #else | ||
135 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | ||
136 | #endif | 136 | #endif |
137 | 137 | ||
138 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", | 138 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", |
@@ -169,6 +169,39 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
169 | cfs_rq->nr_spread_over); | 169 | cfs_rq->nr_spread_over); |
170 | } | 170 | } |
171 | 171 | ||
172 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | ||
173 | { | ||
174 | #if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) | ||
175 | char path[128] = ""; | ||
176 | struct cgroup *cgroup = NULL; | ||
177 | struct task_group *tg = rt_rq->tg; | ||
178 | |||
179 | if (tg) | ||
180 | cgroup = tg->css.cgroup; | ||
181 | |||
182 | if (cgroup) | ||
183 | cgroup_path(cgroup, path, sizeof(path)); | ||
184 | |||
185 | SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); | ||
186 | #else | ||
187 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); | ||
188 | #endif | ||
189 | |||
190 | |||
191 | #define P(x) \ | ||
192 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) | ||
193 | #define PN(x) \ | ||
194 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) | ||
195 | |||
196 | P(rt_nr_running); | ||
197 | P(rt_throttled); | ||
198 | PN(rt_time); | ||
199 | PN(rt_runtime); | ||
200 | |||
201 | #undef PN | ||
202 | #undef P | ||
203 | } | ||
204 | |||
172 | static void print_cpu(struct seq_file *m, int cpu) | 205 | static void print_cpu(struct seq_file *m, int cpu) |
173 | { | 206 | { |
174 | struct rq *rq = &per_cpu(runqueues, cpu); | 207 | struct rq *rq = &per_cpu(runqueues, cpu); |
@@ -208,6 +241,7 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
208 | #undef PN | 241 | #undef PN |
209 | 242 | ||
210 | print_cfs_stats(m, cpu); | 243 | print_cfs_stats(m, cpu); |
244 | print_rt_stats(m, cpu); | ||
211 | 245 | ||
212 | print_rq(m, rq, cpu); | 246 | print_rq(m, rq, cpu); |
213 | } | 247 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 08ae848b71d4..1fe4c65a8170 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1275,23 +1275,18 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next) | |||
1275 | struct task_struct *p = NULL; | 1275 | struct task_struct *p = NULL; |
1276 | struct sched_entity *se; | 1276 | struct sched_entity *se; |
1277 | 1277 | ||
1278 | if (next == &cfs_rq->tasks) | 1278 | while (next != &cfs_rq->tasks) { |
1279 | return NULL; | ||
1280 | |||
1281 | /* Skip over entities that are not tasks */ | ||
1282 | do { | ||
1283 | se = list_entry(next, struct sched_entity, group_node); | 1279 | se = list_entry(next, struct sched_entity, group_node); |
1284 | next = next->next; | 1280 | next = next->next; |
1285 | } while (next != &cfs_rq->tasks && !entity_is_task(se)); | ||
1286 | 1281 | ||
1287 | if (next == &cfs_rq->tasks) | 1282 | /* Skip over entities that are not tasks */ |
1288 | return NULL; | 1283 | if (entity_is_task(se)) { |
1284 | p = task_of(se); | ||
1285 | break; | ||
1286 | } | ||
1287 | } | ||
1289 | 1288 | ||
1290 | cfs_rq->balance_iterator = next; | 1289 | cfs_rq->balance_iterator = next; |
1291 | |||
1292 | if (entity_is_task(se)) | ||
1293 | p = task_of(se); | ||
1294 | |||
1295 | return p; | 1290 | return p; |
1296 | } | 1291 | } |
1297 | 1292 | ||
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 1c7283cb9581..62b39ca92ebd 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -6,5 +6,3 @@ SCHED_FEAT(CACHE_HOT_BUDDY, 1) | |||
6 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 6 | SCHED_FEAT(SYNC_WAKEUPS, 1) |
7 | SCHED_FEAT(HRTICK, 1) | 7 | SCHED_FEAT(HRTICK, 1) |
8 | SCHED_FEAT(DOUBLE_TICK, 0) | 8 | SCHED_FEAT(DOUBLE_TICK, 0) |
9 | SCHED_FEAT(NORMALIZED_SLEEPER, 1) | ||
10 | SCHED_FEAT(DEADLINE, 1) | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0f3c19197fa4..bd90c8bb0739 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) | |||
12 | 12 | ||
13 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
14 | { | 14 | { |
15 | if (!rq->online) | ||
16 | return; | ||
17 | |||
15 | cpu_set(rq->cpu, rq->rd->rto_mask); | 18 | cpu_set(rq->cpu, rq->rd->rto_mask); |
16 | /* | 19 | /* |
17 | * Make sure the mask is visible before we set | 20 | * Make sure the mask is visible before we set |
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) | |||
26 | 29 | ||
27 | static inline void rt_clear_overload(struct rq *rq) | 30 | static inline void rt_clear_overload(struct rq *rq) |
28 | { | 31 | { |
32 | if (!rq->online) | ||
33 | return; | ||
34 | |||
29 | /* the order here really doesn't matter */ | 35 | /* the order here really doesn't matter */ |
30 | atomic_dec(&rq->rd->rto_count); | 36 | atomic_dec(&rq->rd->rto_count); |
31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | 37 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
@@ -222,47 +228,8 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
222 | 228 | ||
223 | #endif | 229 | #endif |
224 | 230 | ||
225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
226 | { | ||
227 | int i, idle = 1; | ||
228 | cpumask_t span; | ||
229 | |||
230 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
231 | return 1; | ||
232 | |||
233 | span = sched_rt_period_mask(); | ||
234 | for_each_cpu_mask(i, span) { | ||
235 | int enqueue = 0; | ||
236 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
237 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
238 | |||
239 | spin_lock(&rq->lock); | ||
240 | if (rt_rq->rt_time) { | ||
241 | u64 runtime; | ||
242 | |||
243 | spin_lock(&rt_rq->rt_runtime_lock); | ||
244 | runtime = rt_rq->rt_runtime; | ||
245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
247 | rt_rq->rt_throttled = 0; | ||
248 | enqueue = 1; | ||
249 | } | ||
250 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
251 | idle = 0; | ||
252 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
253 | } else if (rt_rq->rt_nr_running) | ||
254 | idle = 0; | ||
255 | |||
256 | if (enqueue) | ||
257 | sched_rt_rq_enqueue(rt_rq); | ||
258 | spin_unlock(&rq->lock); | ||
259 | } | ||
260 | |||
261 | return idle; | ||
262 | } | ||
263 | |||
264 | #ifdef CONFIG_SMP | 231 | #ifdef CONFIG_SMP |
265 | static int balance_runtime(struct rt_rq *rt_rq) | 232 | static int do_balance_runtime(struct rt_rq *rt_rq) |
266 | { | 233 | { |
267 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | 234 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
268 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | 235 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; |
@@ -281,6 +248,9 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
281 | continue; | 248 | continue; |
282 | 249 | ||
283 | spin_lock(&iter->rt_runtime_lock); | 250 | spin_lock(&iter->rt_runtime_lock); |
251 | if (iter->rt_runtime == RUNTIME_INF) | ||
252 | goto next; | ||
253 | |||
284 | diff = iter->rt_runtime - iter->rt_time; | 254 | diff = iter->rt_runtime - iter->rt_time; |
285 | if (diff > 0) { | 255 | if (diff > 0) { |
286 | do_div(diff, weight); | 256 | do_div(diff, weight); |
@@ -294,14 +264,165 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
294 | break; | 264 | break; |
295 | } | 265 | } |
296 | } | 266 | } |
267 | next: | ||
297 | spin_unlock(&iter->rt_runtime_lock); | 268 | spin_unlock(&iter->rt_runtime_lock); |
298 | } | 269 | } |
299 | spin_unlock(&rt_b->rt_runtime_lock); | 270 | spin_unlock(&rt_b->rt_runtime_lock); |
300 | 271 | ||
301 | return more; | 272 | return more; |
302 | } | 273 | } |
274 | |||
275 | static void __disable_runtime(struct rq *rq) | ||
276 | { | ||
277 | struct root_domain *rd = rq->rd; | ||
278 | struct rt_rq *rt_rq; | ||
279 | |||
280 | if (unlikely(!scheduler_running)) | ||
281 | return; | ||
282 | |||
283 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
284 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
285 | s64 want; | ||
286 | int i; | ||
287 | |||
288 | spin_lock(&rt_b->rt_runtime_lock); | ||
289 | spin_lock(&rt_rq->rt_runtime_lock); | ||
290 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
291 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
292 | goto balanced; | ||
293 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
294 | |||
295 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
296 | |||
297 | for_each_cpu_mask(i, rd->span) { | ||
298 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
299 | s64 diff; | ||
300 | |||
301 | if (iter == rt_rq) | ||
302 | continue; | ||
303 | |||
304 | spin_lock(&iter->rt_runtime_lock); | ||
305 | if (want > 0) { | ||
306 | diff = min_t(s64, iter->rt_runtime, want); | ||
307 | iter->rt_runtime -= diff; | ||
308 | want -= diff; | ||
309 | } else { | ||
310 | iter->rt_runtime -= want; | ||
311 | want -= want; | ||
312 | } | ||
313 | spin_unlock(&iter->rt_runtime_lock); | ||
314 | |||
315 | if (!want) | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | spin_lock(&rt_rq->rt_runtime_lock); | ||
320 | BUG_ON(want); | ||
321 | balanced: | ||
322 | rt_rq->rt_runtime = RUNTIME_INF; | ||
323 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
324 | spin_unlock(&rt_b->rt_runtime_lock); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | static void disable_runtime(struct rq *rq) | ||
329 | { | ||
330 | unsigned long flags; | ||
331 | |||
332 | spin_lock_irqsave(&rq->lock, flags); | ||
333 | __disable_runtime(rq); | ||
334 | spin_unlock_irqrestore(&rq->lock, flags); | ||
335 | } | ||
336 | |||
337 | static void __enable_runtime(struct rq *rq) | ||
338 | { | ||
339 | struct root_domain *rd = rq->rd; | ||
340 | struct rt_rq *rt_rq; | ||
341 | |||
342 | if (unlikely(!scheduler_running)) | ||
343 | return; | ||
344 | |||
345 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
346 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
347 | |||
348 | spin_lock(&rt_b->rt_runtime_lock); | ||
349 | spin_lock(&rt_rq->rt_runtime_lock); | ||
350 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
351 | rt_rq->rt_time = 0; | ||
352 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
353 | spin_unlock(&rt_b->rt_runtime_lock); | ||
354 | } | ||
355 | } | ||
356 | |||
357 | static void enable_runtime(struct rq *rq) | ||
358 | { | ||
359 | unsigned long flags; | ||
360 | |||
361 | spin_lock_irqsave(&rq->lock, flags); | ||
362 | __enable_runtime(rq); | ||
363 | spin_unlock_irqrestore(&rq->lock, flags); | ||
364 | } | ||
365 | |||
366 | static int balance_runtime(struct rt_rq *rt_rq) | ||
367 | { | ||
368 | int more = 0; | ||
369 | |||
370 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | ||
371 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
372 | more = do_balance_runtime(rt_rq); | ||
373 | spin_lock(&rt_rq->rt_runtime_lock); | ||
374 | } | ||
375 | |||
376 | return more; | ||
377 | } | ||
378 | #else | ||
379 | static inline int balance_runtime(struct rt_rq *rt_rq) | ||
380 | { | ||
381 | return 0; | ||
382 | } | ||
303 | #endif | 383 | #endif |
304 | 384 | ||
385 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
386 | { | ||
387 | int i, idle = 1; | ||
388 | cpumask_t span; | ||
389 | |||
390 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
391 | return 1; | ||
392 | |||
393 | span = sched_rt_period_mask(); | ||
394 | for_each_cpu_mask(i, span) { | ||
395 | int enqueue = 0; | ||
396 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
397 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
398 | |||
399 | spin_lock(&rq->lock); | ||
400 | if (rt_rq->rt_time) { | ||
401 | u64 runtime; | ||
402 | |||
403 | spin_lock(&rt_rq->rt_runtime_lock); | ||
404 | if (rt_rq->rt_throttled) | ||
405 | balance_runtime(rt_rq); | ||
406 | runtime = rt_rq->rt_runtime; | ||
407 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
408 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
409 | rt_rq->rt_throttled = 0; | ||
410 | enqueue = 1; | ||
411 | } | ||
412 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
413 | idle = 0; | ||
414 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
415 | } else if (rt_rq->rt_nr_running) | ||
416 | idle = 0; | ||
417 | |||
418 | if (enqueue) | ||
419 | sched_rt_rq_enqueue(rt_rq); | ||
420 | spin_unlock(&rq->lock); | ||
421 | } | ||
422 | |||
423 | return idle; | ||
424 | } | ||
425 | |||
305 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 426 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
306 | { | 427 | { |
307 | #ifdef CONFIG_RT_GROUP_SCHED | 428 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -327,18 +448,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
327 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | 448 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) |
328 | return 0; | 449 | return 0; |
329 | 450 | ||
330 | #ifdef CONFIG_SMP | 451 | balance_runtime(rt_rq); |
331 | if (rt_rq->rt_time > runtime) { | 452 | runtime = sched_rt_runtime(rt_rq); |
332 | int more; | 453 | if (runtime == RUNTIME_INF) |
333 | 454 | return 0; | |
334 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
335 | more = balance_runtime(rt_rq); | ||
336 | spin_lock(&rt_rq->rt_runtime_lock); | ||
337 | |||
338 | if (more) | ||
339 | runtime = sched_rt_runtime(rt_rq); | ||
340 | } | ||
341 | #endif | ||
342 | 455 | ||
343 | if (rt_rq->rt_time > runtime) { | 456 | if (rt_rq->rt_time > runtime) { |
344 | rt_rq->rt_throttled = 1; | 457 | rt_rq->rt_throttled = 1; |
@@ -392,12 +505,21 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
392 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 505 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
393 | rt_rq->rt_nr_running++; | 506 | rt_rq->rt_nr_running++; |
394 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 507 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
395 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 508 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) { |
509 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
510 | |||
396 | rt_rq->highest_prio = rt_se_prio(rt_se); | 511 | rt_rq->highest_prio = rt_se_prio(rt_se); |
512 | #ifdef CONFIG_SMP | ||
513 | if (rq->online) | ||
514 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
515 | rt_se_prio(rt_se)); | ||
516 | #endif | ||
517 | } | ||
397 | #endif | 518 | #endif |
398 | #ifdef CONFIG_SMP | 519 | #ifdef CONFIG_SMP |
399 | if (rt_se->nr_cpus_allowed > 1) { | 520 | if (rt_se->nr_cpus_allowed > 1) { |
400 | struct rq *rq = rq_of_rt_rq(rt_rq); | 521 | struct rq *rq = rq_of_rt_rq(rt_rq); |
522 | |||
401 | rq->rt.rt_nr_migratory++; | 523 | rq->rt.rt_nr_migratory++; |
402 | } | 524 | } |
403 | 525 | ||
@@ -417,6 +539,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
417 | static inline | 539 | static inline |
418 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 540 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
419 | { | 541 | { |
542 | #ifdef CONFIG_SMP | ||
543 | int highest_prio = rt_rq->highest_prio; | ||
544 | #endif | ||
545 | |||
420 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 546 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
421 | WARN_ON(!rt_rq->rt_nr_running); | 547 | WARN_ON(!rt_rq->rt_nr_running); |
422 | rt_rq->rt_nr_running--; | 548 | rt_rq->rt_nr_running--; |
@@ -440,6 +566,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
440 | rq->rt.rt_nr_migratory--; | 566 | rq->rt.rt_nr_migratory--; |
441 | } | 567 | } |
442 | 568 | ||
569 | if (rt_rq->highest_prio != highest_prio) { | ||
570 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
571 | |||
572 | if (rq->online) | ||
573 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
574 | rt_rq->highest_prio); | ||
575 | } | ||
576 | |||
443 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 577 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
444 | #endif /* CONFIG_SMP */ | 578 | #endif /* CONFIG_SMP */ |
445 | #ifdef CONFIG_RT_GROUP_SCHED | 579 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -455,6 +589,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
455 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 589 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
456 | struct rt_prio_array *array = &rt_rq->active; | 590 | struct rt_prio_array *array = &rt_rq->active; |
457 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 591 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
592 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
458 | 593 | ||
459 | /* | 594 | /* |
460 | * Don't enqueue the group if its throttled, or when empty. | 595 | * Don't enqueue the group if its throttled, or when empty. |
@@ -465,7 +600,11 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
465 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 600 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
466 | return; | 601 | return; |
467 | 602 | ||
468 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 603 | if (rt_se->nr_cpus_allowed == 1) |
604 | list_add(&rt_se->run_list, queue); | ||
605 | else | ||
606 | list_add_tail(&rt_se->run_list, queue); | ||
607 | |||
469 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 608 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
470 | 609 | ||
471 | inc_rt_tasks(rt_se, rt_rq); | 610 | inc_rt_tasks(rt_se, rt_rq); |
@@ -552,8 +691,11 @@ void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | |||
552 | struct rt_prio_array *array = &rt_rq->active; | 691 | struct rt_prio_array *array = &rt_rq->active; |
553 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | 692 | struct list_head *queue = array->queue + rt_se_prio(rt_se); |
554 | 693 | ||
555 | if (on_rt_rq(rt_se)) | 694 | if (on_rt_rq(rt_se)) { |
556 | list_move_tail(&rt_se->run_list, queue); | 695 | list_del_init(&rt_se->run_list); |
696 | list_add_tail(&rt_se->run_list, | ||
697 | array->queue + rt_se_prio(rt_se)); | ||
698 | } | ||
557 | } | 699 | } |
558 | 700 | ||
559 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 701 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) |
@@ -616,8 +758,37 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
616 | */ | 758 | */ |
617 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 759 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) |
618 | { | 760 | { |
619 | if (p->prio < rq->curr->prio) | 761 | if (p->prio < rq->curr->prio) { |
620 | resched_task(rq->curr); | 762 | resched_task(rq->curr); |
763 | return; | ||
764 | } | ||
765 | |||
766 | #ifdef CONFIG_SMP | ||
767 | /* | ||
768 | * If: | ||
769 | * | ||
770 | * - the newly woken task is of equal priority to the current task | ||
771 | * - the newly woken task is non-migratable while current is migratable | ||
772 | * - current will be preempted on the next reschedule | ||
773 | * | ||
774 | * we should check to see if current can readily move to a different | ||
775 | * cpu. If so, we will reschedule to allow the push logic to try | ||
776 | * to move current somewhere else, making room for our non-migratable | ||
777 | * task. | ||
778 | */ | ||
779 | if((p->prio == rq->curr->prio) | ||
780 | && p->rt.nr_cpus_allowed == 1 | ||
781 | && rq->curr->rt.nr_cpus_allowed != 1) { | ||
782 | cpumask_t mask; | ||
783 | |||
784 | if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
785 | /* | ||
786 | * There appears to be other cpus that can accept | ||
787 | * current, so lets reschedule to try and push it away | ||
788 | */ | ||
789 | resched_task(rq->curr); | ||
790 | } | ||
791 | #endif | ||
621 | } | 792 | } |
622 | 793 | ||
623 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 794 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
@@ -720,73 +891,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
720 | 891 | ||
721 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | 892 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
722 | 893 | ||
723 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | ||
724 | { | ||
725 | int lowest_prio = -1; | ||
726 | int lowest_cpu = -1; | ||
727 | int count = 0; | ||
728 | int cpu; | ||
729 | |||
730 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); | ||
731 | |||
732 | /* | ||
733 | * Scan each rq for the lowest prio. | ||
734 | */ | ||
735 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
736 | struct rq *rq = cpu_rq(cpu); | ||
737 | |||
738 | /* We look for lowest RT prio or non-rt CPU */ | ||
739 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
740 | /* | ||
741 | * if we already found a low RT queue | ||
742 | * and now we found this non-rt queue | ||
743 | * clear the mask and set our bit. | ||
744 | * Otherwise just return the queue as is | ||
745 | * and the count==1 will cause the algorithm | ||
746 | * to use the first bit found. | ||
747 | */ | ||
748 | if (lowest_cpu != -1) { | ||
749 | cpus_clear(*lowest_mask); | ||
750 | cpu_set(rq->cpu, *lowest_mask); | ||
751 | } | ||
752 | return 1; | ||
753 | } | ||
754 | |||
755 | /* no locking for now */ | ||
756 | if ((rq->rt.highest_prio > task->prio) | ||
757 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
758 | if (rq->rt.highest_prio > lowest_prio) { | ||
759 | /* new low - clear old data */ | ||
760 | lowest_prio = rq->rt.highest_prio; | ||
761 | lowest_cpu = cpu; | ||
762 | count = 0; | ||
763 | } | ||
764 | count++; | ||
765 | } else | ||
766 | cpu_clear(cpu, *lowest_mask); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Clear out all the set bits that represent | ||
771 | * runqueues that were of higher prio than | ||
772 | * the lowest_prio. | ||
773 | */ | ||
774 | if (lowest_cpu > 0) { | ||
775 | /* | ||
776 | * Perhaps we could add another cpumask op to | ||
777 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
778 | * Then that could be optimized to use memset and such. | ||
779 | */ | ||
780 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
781 | if (cpu >= lowest_cpu) | ||
782 | break; | ||
783 | cpu_clear(cpu, *lowest_mask); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | return count; | ||
788 | } | ||
789 | |||
790 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) | 894 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
791 | { | 895 | { |
792 | int first; | 896 | int first; |
@@ -808,17 +912,12 @@ static int find_lowest_rq(struct task_struct *task) | |||
808 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | 912 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); |
809 | int this_cpu = smp_processor_id(); | 913 | int this_cpu = smp_processor_id(); |
810 | int cpu = task_cpu(task); | 914 | int cpu = task_cpu(task); |
811 | int count = find_lowest_cpus(task, lowest_mask); | ||
812 | 915 | ||
813 | if (!count) | 916 | if (task->rt.nr_cpus_allowed == 1) |
814 | return -1; /* No targets found */ | 917 | return -1; /* No other targets possible */ |
815 | 918 | ||
816 | /* | 919 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) |
817 | * There is no sense in performing an optimal search if only one | 920 | return -1; /* No targets found */ |
818 | * target is found. | ||
819 | */ | ||
820 | if (count == 1) | ||
821 | return first_cpu(*lowest_mask); | ||
822 | 921 | ||
823 | /* | 922 | /* |
824 | * At this point we have built a mask of cpus representing the | 923 | * At this point we have built a mask of cpus representing the |
@@ -1163,17 +1262,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1163 | } | 1262 | } |
1164 | 1263 | ||
1165 | /* Assumes rq->lock is held */ | 1264 | /* Assumes rq->lock is held */ |
1166 | static void join_domain_rt(struct rq *rq) | 1265 | static void rq_online_rt(struct rq *rq) |
1167 | { | 1266 | { |
1168 | if (rq->rt.overloaded) | 1267 | if (rq->rt.overloaded) |
1169 | rt_set_overload(rq); | 1268 | rt_set_overload(rq); |
1269 | |||
1270 | __enable_runtime(rq); | ||
1271 | |||
1272 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); | ||
1170 | } | 1273 | } |
1171 | 1274 | ||
1172 | /* Assumes rq->lock is held */ | 1275 | /* Assumes rq->lock is held */ |
1173 | static void leave_domain_rt(struct rq *rq) | 1276 | static void rq_offline_rt(struct rq *rq) |
1174 | { | 1277 | { |
1175 | if (rq->rt.overloaded) | 1278 | if (rq->rt.overloaded) |
1176 | rt_clear_overload(rq); | 1279 | rt_clear_overload(rq); |
1280 | |||
1281 | __disable_runtime(rq); | ||
1282 | |||
1283 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
1177 | } | 1284 | } |
1178 | 1285 | ||
1179 | /* | 1286 | /* |
@@ -1336,8 +1443,8 @@ static const struct sched_class rt_sched_class = { | |||
1336 | .load_balance = load_balance_rt, | 1443 | .load_balance = load_balance_rt, |
1337 | .move_one_task = move_one_task_rt, | 1444 | .move_one_task = move_one_task_rt, |
1338 | .set_cpus_allowed = set_cpus_allowed_rt, | 1445 | .set_cpus_allowed = set_cpus_allowed_rt, |
1339 | .join_domain = join_domain_rt, | 1446 | .rq_online = rq_online_rt, |
1340 | .leave_domain = leave_domain_rt, | 1447 | .rq_offline = rq_offline_rt, |
1341 | .pre_schedule = pre_schedule_rt, | 1448 | .pre_schedule = pre_schedule_rt, |
1342 | .post_schedule = post_schedule_rt, | 1449 | .post_schedule = post_schedule_rt, |
1343 | .task_wake_up = task_wake_up_rt, | 1450 | .task_wake_up = task_wake_up_rt, |
@@ -1350,3 +1457,17 @@ static const struct sched_class rt_sched_class = { | |||
1350 | .prio_changed = prio_changed_rt, | 1457 | .prio_changed = prio_changed_rt, |
1351 | .switched_to = switched_to_rt, | 1458 | .switched_to = switched_to_rt, |
1352 | }; | 1459 | }; |
1460 | |||
1461 | #ifdef CONFIG_SCHED_DEBUG | ||
1462 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | ||
1463 | |||
1464 | static void print_rt_stats(struct seq_file *m, int cpu) | ||
1465 | { | ||
1466 | struct rt_rq *rt_rq; | ||
1467 | |||
1468 | rcu_read_lock(); | ||
1469 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) | ||
1470 | print_rt_rq(m, cpu, rt_rq); | ||
1471 | rcu_read_unlock(); | ||
1472 | } | ||
1473 | #endif | ||