diff options
-rw-r--r-- | include/linux/latencytop.h | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 9 | ||||
-rw-r--r-- | kernel/Makefile | 10 | ||||
-rw-r--r-- | kernel/sched.c | 1828 | ||||
-rw-r--r-- | kernel/sched.h | 1064 | ||||
-rw-r--r-- | kernel/sched_autogroup.c | 33 | ||||
-rw-r--r-- | kernel/sched_autogroup.h | 26 | ||||
-rw-r--r-- | kernel/sched_debug.c | 4 | ||||
-rw-r--r-- | kernel/sched_fair.c | 580 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 4 | ||||
-rw-r--r-- | kernel/sched_rt.c | 209 | ||||
-rw-r--r-- | kernel/sched_stats.c | 111 | ||||
-rw-r--r-- | kernel/sched_stats.h | 103 | ||||
-rw-r--r-- | kernel/sched_stoptask.c | 4 |
14 files changed, 2034 insertions, 1954 deletions
diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index b0e99898527c..e23121f9d82a 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h | |||
@@ -10,6 +10,8 @@ | |||
10 | #define _INCLUDE_GUARD_LATENCYTOP_H_ | 10 | #define _INCLUDE_GUARD_LATENCYTOP_H_ |
11 | 11 | ||
12 | #include <linux/compiler.h> | 12 | #include <linux/compiler.h> |
13 | struct task_struct; | ||
14 | |||
13 | #ifdef CONFIG_LATENCYTOP | 15 | #ifdef CONFIG_LATENCYTOP |
14 | 16 | ||
15 | #define LT_SAVECOUNT 32 | 17 | #define LT_SAVECOUNT 32 |
@@ -23,7 +25,6 @@ struct latency_record { | |||
23 | }; | 25 | }; |
24 | 26 | ||
25 | 27 | ||
26 | struct task_struct; | ||
27 | 28 | ||
28 | extern int latencytop_enabled; | 29 | extern int latencytop_enabled; |
29 | void __account_scheduler_latency(struct task_struct *task, int usecs, int inter); | 30 | void __account_scheduler_latency(struct task_struct *task, int usecs, int inter); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f27e2c..8db17b7622ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -925,6 +925,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) | |||
925 | return to_cpumask(sg->cpumask); | 925 | return to_cpumask(sg->cpumask); |
926 | } | 926 | } |
927 | 927 | ||
928 | /** | ||
929 | * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. | ||
930 | * @group: The group whose first cpu is to be returned. | ||
931 | */ | ||
932 | static inline unsigned int group_first_cpu(struct sched_group *group) | ||
933 | { | ||
934 | return cpumask_first(sched_group_cpus(group)); | ||
935 | } | ||
936 | |||
928 | struct sched_domain_attr { | 937 | struct sched_domain_attr { |
929 | int relax_domain_level; | 938 | int relax_domain_level; |
930 | }; | 939 | }; |
diff --git a/kernel/Makefile b/kernel/Makefile index e898c5b9d02c..1a4d37d7f39a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for the linux kernel. | 2 | # Makefile for the linux kernel. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | 5 | obj-y = fork.o exec_domain.o panic.o printk.o \ |
6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ | 6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o \ |
@@ -10,8 +10,12 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o sched_clock.o cred.o \ | 12 | notifier.o ksysfs.o sched_clock.o cred.o \ |
13 | async.o range.o | 13 | async.o range.o groups.o |
14 | obj-y += groups.o | 14 | |
15 | obj-y += sched.o sched_idletask.o sched_fair.o sched_rt.o sched_stoptask.o | ||
16 | obj-$(CONFIG_SCHED_AUTOGROUP) += sched_autogroup.o | ||
17 | obj-$(CONFIG_SCHEDSTATS) += sched_stats.o | ||
18 | obj-$(CONFIG_SCHED_DEBUG) += sched_debug.o | ||
15 | 19 | ||
16 | ifdef CONFIG_FUNCTION_TRACER | 20 | ifdef CONFIG_FUNCTION_TRACER |
17 | # Do not trace debug files and internal ftrace files | 21 | # Do not trace debug files and internal ftrace files |
diff --git a/kernel/sched.c b/kernel/sched.c index c9e3ab6e299e..2ffcceed8862 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -56,7 +56,6 @@ | |||
56 | #include <linux/percpu.h> | 56 | #include <linux/percpu.h> |
57 | #include <linux/proc_fs.h> | 57 | #include <linux/proc_fs.h> |
58 | #include <linux/seq_file.h> | 58 | #include <linux/seq_file.h> |
59 | #include <linux/stop_machine.h> | ||
60 | #include <linux/sysctl.h> | 59 | #include <linux/sysctl.h> |
61 | #include <linux/syscalls.h> | 60 | #include <linux/syscalls.h> |
62 | #include <linux/times.h> | 61 | #include <linux/times.h> |
@@ -72,133 +71,20 @@ | |||
72 | #include <linux/ftrace.h> | 71 | #include <linux/ftrace.h> |
73 | #include <linux/slab.h> | 72 | #include <linux/slab.h> |
74 | #include <linux/init_task.h> | 73 | #include <linux/init_task.h> |
75 | #include <linux/jump_label.h> | ||
76 | 74 | ||
77 | #include <asm/tlb.h> | 75 | #include <asm/tlb.h> |
78 | #include <asm/irq_regs.h> | 76 | #include <asm/irq_regs.h> |
79 | #include <asm/mutex.h> | ||
80 | #ifdef CONFIG_PARAVIRT | 77 | #ifdef CONFIG_PARAVIRT |
81 | #include <asm/paravirt.h> | 78 | #include <asm/paravirt.h> |
82 | #endif | 79 | #endif |
83 | 80 | ||
84 | #include "sched_cpupri.h" | 81 | #include "sched.h" |
85 | #include "workqueue_sched.h" | 82 | #include "workqueue_sched.h" |
86 | #include "sched_autogroup.h" | ||
87 | 83 | ||
88 | #define CREATE_TRACE_POINTS | 84 | #define CREATE_TRACE_POINTS |
89 | #include <trace/events/sched.h> | 85 | #include <trace/events/sched.h> |
90 | 86 | ||
91 | /* | 87 | void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) |
92 | * Convert user-nice values [ -20 ... 0 ... 19 ] | ||
93 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | ||
94 | * and back. | ||
95 | */ | ||
96 | #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) | ||
97 | #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) | ||
98 | #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) | ||
99 | |||
100 | /* | ||
101 | * 'User priority' is the nice value converted to something we | ||
102 | * can work with better when scaling various scheduler parameters, | ||
103 | * it's a [ 0 ... 39 ] range. | ||
104 | */ | ||
105 | #define USER_PRIO(p) ((p)-MAX_RT_PRIO) | ||
106 | #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) | ||
107 | #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) | ||
108 | |||
109 | /* | ||
110 | * Helpers for converting nanosecond timing to jiffy resolution | ||
111 | */ | ||
112 | #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) | ||
113 | |||
114 | #define NICE_0_LOAD SCHED_LOAD_SCALE | ||
115 | #define NICE_0_SHIFT SCHED_LOAD_SHIFT | ||
116 | |||
117 | /* | ||
118 | * These are the 'tuning knobs' of the scheduler: | ||
119 | * | ||
120 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). | ||
121 | * Timeslices get refilled after they expire. | ||
122 | */ | ||
123 | #define DEF_TIMESLICE (100 * HZ / 1000) | ||
124 | |||
125 | /* | ||
126 | * single value that denotes runtime == period, ie unlimited time. | ||
127 | */ | ||
128 | #define RUNTIME_INF ((u64)~0ULL) | ||
129 | |||
130 | static inline int rt_policy(int policy) | ||
131 | { | ||
132 | if (policy == SCHED_FIFO || policy == SCHED_RR) | ||
133 | return 1; | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static inline int task_has_rt_policy(struct task_struct *p) | ||
138 | { | ||
139 | return rt_policy(p->policy); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * This is the priority-queue data structure of the RT scheduling class: | ||
144 | */ | ||
145 | struct rt_prio_array { | ||
146 | DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ | ||
147 | struct list_head queue[MAX_RT_PRIO]; | ||
148 | }; | ||
149 | |||
150 | struct rt_bandwidth { | ||
151 | /* nests inside the rq lock: */ | ||
152 | raw_spinlock_t rt_runtime_lock; | ||
153 | ktime_t rt_period; | ||
154 | u64 rt_runtime; | ||
155 | struct hrtimer rt_period_timer; | ||
156 | }; | ||
157 | |||
158 | static struct rt_bandwidth def_rt_bandwidth; | ||
159 | |||
160 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); | ||
161 | |||
162 | static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) | ||
163 | { | ||
164 | struct rt_bandwidth *rt_b = | ||
165 | container_of(timer, struct rt_bandwidth, rt_period_timer); | ||
166 | ktime_t now; | ||
167 | int overrun; | ||
168 | int idle = 0; | ||
169 | |||
170 | for (;;) { | ||
171 | now = hrtimer_cb_get_time(timer); | ||
172 | overrun = hrtimer_forward(timer, now, rt_b->rt_period); | ||
173 | |||
174 | if (!overrun) | ||
175 | break; | ||
176 | |||
177 | idle = do_sched_rt_period_timer(rt_b, overrun); | ||
178 | } | ||
179 | |||
180 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
181 | } | ||
182 | |||
183 | static | ||
184 | void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | ||
185 | { | ||
186 | rt_b->rt_period = ns_to_ktime(period); | ||
187 | rt_b->rt_runtime = runtime; | ||
188 | |||
189 | raw_spin_lock_init(&rt_b->rt_runtime_lock); | ||
190 | |||
191 | hrtimer_init(&rt_b->rt_period_timer, | ||
192 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
193 | rt_b->rt_period_timer.function = sched_rt_period_timer; | ||
194 | } | ||
195 | |||
196 | static inline int rt_bandwidth_enabled(void) | ||
197 | { | ||
198 | return sysctl_sched_rt_runtime >= 0; | ||
199 | } | ||
200 | |||
201 | static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) | ||
202 | { | 88 | { |
203 | unsigned long delta; | 89 | unsigned long delta; |
204 | ktime_t soft, hard, now; | 90 | ktime_t soft, hard, now; |
@@ -218,609 +104,12 @@ static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period) | |||
218 | } | 104 | } |
219 | } | 105 | } |
220 | 106 | ||
221 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | 107 | DEFINE_MUTEX(sched_domains_mutex); |
222 | { | 108 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
223 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | ||
224 | return; | ||
225 | |||
226 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
227 | return; | ||
228 | |||
229 | raw_spin_lock(&rt_b->rt_runtime_lock); | ||
230 | start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); | ||
231 | raw_spin_unlock(&rt_b->rt_runtime_lock); | ||
232 | } | ||
233 | |||
234 | #ifdef CONFIG_RT_GROUP_SCHED | ||
235 | static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
236 | { | ||
237 | hrtimer_cancel(&rt_b->rt_period_timer); | ||
238 | } | ||
239 | #endif | ||
240 | |||
241 | /* | ||
242 | * sched_domains_mutex serializes calls to init_sched_domains, | ||
243 | * detach_destroy_domains and partition_sched_domains. | ||
244 | */ | ||
245 | static DEFINE_MUTEX(sched_domains_mutex); | ||
246 | |||
247 | #ifdef CONFIG_CGROUP_SCHED | ||
248 | |||
249 | #include <linux/cgroup.h> | ||
250 | |||
251 | struct cfs_rq; | ||
252 | |||
253 | static LIST_HEAD(task_groups); | ||
254 | |||
255 | struct cfs_bandwidth { | ||
256 | #ifdef CONFIG_CFS_BANDWIDTH | ||
257 | raw_spinlock_t lock; | ||
258 | ktime_t period; | ||
259 | u64 quota, runtime; | ||
260 | s64 hierarchal_quota; | ||
261 | u64 runtime_expires; | ||
262 | |||
263 | int idle, timer_active; | ||
264 | struct hrtimer period_timer, slack_timer; | ||
265 | struct list_head throttled_cfs_rq; | ||
266 | |||
267 | /* statistics */ | ||
268 | int nr_periods, nr_throttled; | ||
269 | u64 throttled_time; | ||
270 | #endif | ||
271 | }; | ||
272 | |||
273 | /* task group related information */ | ||
274 | struct task_group { | ||
275 | struct cgroup_subsys_state css; | ||
276 | |||
277 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
278 | /* schedulable entities of this group on each cpu */ | ||
279 | struct sched_entity **se; | ||
280 | /* runqueue "owned" by this group on each cpu */ | ||
281 | struct cfs_rq **cfs_rq; | ||
282 | unsigned long shares; | ||
283 | |||
284 | atomic_t load_weight; | ||
285 | #endif | ||
286 | |||
287 | #ifdef CONFIG_RT_GROUP_SCHED | ||
288 | struct sched_rt_entity **rt_se; | ||
289 | struct rt_rq **rt_rq; | ||
290 | |||
291 | struct rt_bandwidth rt_bandwidth; | ||
292 | #endif | ||
293 | |||
294 | struct rcu_head rcu; | ||
295 | struct list_head list; | ||
296 | |||
297 | struct task_group *parent; | ||
298 | struct list_head siblings; | ||
299 | struct list_head children; | ||
300 | |||
301 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
302 | struct autogroup *autogroup; | ||
303 | #endif | ||
304 | |||
305 | struct cfs_bandwidth cfs_bandwidth; | ||
306 | }; | ||
307 | |||
308 | /* task_group_lock serializes the addition/removal of task groups */ | ||
309 | static DEFINE_SPINLOCK(task_group_lock); | ||
310 | |||
311 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
312 | |||
313 | # define ROOT_TASK_GROUP_LOAD NICE_0_LOAD | ||
314 | |||
315 | /* | ||
316 | * A weight of 0 or 1 can cause arithmetics problems. | ||
317 | * A weight of a cfs_rq is the sum of weights of which entities | ||
318 | * are queued on this cfs_rq, so a weight of a entity should not be | ||
319 | * too large, so as the shares value of a task group. | ||
320 | * (The default weight is 1024 - so there's no practical | ||
321 | * limitation from this.) | ||
322 | */ | ||
323 | #define MIN_SHARES (1UL << 1) | ||
324 | #define MAX_SHARES (1UL << 18) | ||
325 | |||
326 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; | ||
327 | #endif | ||
328 | |||
329 | /* Default task group. | ||
330 | * Every task in system belong to this group at bootup. | ||
331 | */ | ||
332 | struct task_group root_task_group; | ||
333 | |||
334 | #endif /* CONFIG_CGROUP_SCHED */ | ||
335 | |||
336 | /* CFS-related fields in a runqueue */ | ||
337 | struct cfs_rq { | ||
338 | struct load_weight load; | ||
339 | unsigned long nr_running, h_nr_running; | ||
340 | |||
341 | u64 exec_clock; | ||
342 | u64 min_vruntime; | ||
343 | #ifndef CONFIG_64BIT | ||
344 | u64 min_vruntime_copy; | ||
345 | #endif | ||
346 | |||
347 | struct rb_root tasks_timeline; | ||
348 | struct rb_node *rb_leftmost; | ||
349 | |||
350 | struct list_head tasks; | ||
351 | struct list_head *balance_iterator; | ||
352 | |||
353 | /* | ||
354 | * 'curr' points to currently running entity on this cfs_rq. | ||
355 | * It is set to NULL otherwise (i.e when none are currently running). | ||
356 | */ | ||
357 | struct sched_entity *curr, *next, *last, *skip; | ||
358 | |||
359 | #ifdef CONFIG_SCHED_DEBUG | ||
360 | unsigned int nr_spread_over; | ||
361 | #endif | ||
362 | |||
363 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
364 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ | ||
365 | |||
366 | /* | ||
367 | * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in | ||
368 | * a hierarchy). Non-leaf lrqs hold other higher schedulable entities | ||
369 | * (like users, containers etc.) | ||
370 | * | ||
371 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This | ||
372 | * list is used during load balance. | ||
373 | */ | ||
374 | int on_list; | ||
375 | struct list_head leaf_cfs_rq_list; | ||
376 | struct task_group *tg; /* group that "owns" this runqueue */ | ||
377 | |||
378 | #ifdef CONFIG_SMP | ||
379 | /* | ||
380 | * the part of load.weight contributed by tasks | ||
381 | */ | ||
382 | unsigned long task_weight; | ||
383 | |||
384 | /* | ||
385 | * h_load = weight * f(tg) | ||
386 | * | ||
387 | * Where f(tg) is the recursive weight fraction assigned to | ||
388 | * this group. | ||
389 | */ | ||
390 | unsigned long h_load; | ||
391 | |||
392 | /* | ||
393 | * Maintaining per-cpu shares distribution for group scheduling | ||
394 | * | ||
395 | * load_stamp is the last time we updated the load average | ||
396 | * load_last is the last time we updated the load average and saw load | ||
397 | * load_unacc_exec_time is currently unaccounted execution time | ||
398 | */ | ||
399 | u64 load_avg; | ||
400 | u64 load_period; | ||
401 | u64 load_stamp, load_last, load_unacc_exec_time; | ||
402 | |||
403 | unsigned long load_contribution; | ||
404 | #endif | ||
405 | #ifdef CONFIG_CFS_BANDWIDTH | ||
406 | int runtime_enabled; | ||
407 | u64 runtime_expires; | ||
408 | s64 runtime_remaining; | ||
409 | |||
410 | u64 throttled_timestamp; | ||
411 | int throttled, throttle_count; | ||
412 | struct list_head throttled_list; | ||
413 | #endif | ||
414 | #endif | ||
415 | }; | ||
416 | |||
417 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
418 | #ifdef CONFIG_CFS_BANDWIDTH | ||
419 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
420 | { | ||
421 | return &tg->cfs_bandwidth; | ||
422 | } | ||
423 | |||
424 | static inline u64 default_cfs_period(void); | ||
425 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); | ||
426 | static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); | ||
427 | |||
428 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | ||
429 | { | ||
430 | struct cfs_bandwidth *cfs_b = | ||
431 | container_of(timer, struct cfs_bandwidth, slack_timer); | ||
432 | do_sched_cfs_slack_timer(cfs_b); | ||
433 | |||
434 | return HRTIMER_NORESTART; | ||
435 | } | ||
436 | |||
437 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | ||
438 | { | ||
439 | struct cfs_bandwidth *cfs_b = | ||
440 | container_of(timer, struct cfs_bandwidth, period_timer); | ||
441 | ktime_t now; | ||
442 | int overrun; | ||
443 | int idle = 0; | ||
444 | |||
445 | for (;;) { | ||
446 | now = hrtimer_cb_get_time(timer); | ||
447 | overrun = hrtimer_forward(timer, now, cfs_b->period); | ||
448 | |||
449 | if (!overrun) | ||
450 | break; | ||
451 | |||
452 | idle = do_sched_cfs_period_timer(cfs_b, overrun); | ||
453 | } | ||
454 | |||
455 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
456 | } | ||
457 | |||
458 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
459 | { | ||
460 | raw_spin_lock_init(&cfs_b->lock); | ||
461 | cfs_b->runtime = 0; | ||
462 | cfs_b->quota = RUNTIME_INF; | ||
463 | cfs_b->period = ns_to_ktime(default_cfs_period()); | ||
464 | |||
465 | INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); | ||
466 | hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
467 | cfs_b->period_timer.function = sched_cfs_period_timer; | ||
468 | hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
469 | cfs_b->slack_timer.function = sched_cfs_slack_timer; | ||
470 | } | ||
471 | |||
472 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
473 | { | ||
474 | cfs_rq->runtime_enabled = 0; | ||
475 | INIT_LIST_HEAD(&cfs_rq->throttled_list); | ||
476 | } | ||
477 | |||
478 | /* requires cfs_b->lock, may release to reprogram timer */ | ||
479 | static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
480 | { | ||
481 | /* | ||
482 | * The timer may be active because we're trying to set a new bandwidth | ||
483 | * period or because we're racing with the tear-down path | ||
484 | * (timer_active==0 becomes visible before the hrtimer call-back | ||
485 | * terminates). In either case we ensure that it's re-programmed | ||
486 | */ | ||
487 | while (unlikely(hrtimer_active(&cfs_b->period_timer))) { | ||
488 | raw_spin_unlock(&cfs_b->lock); | ||
489 | /* ensure cfs_b->lock is available while we wait */ | ||
490 | hrtimer_cancel(&cfs_b->period_timer); | ||
491 | |||
492 | raw_spin_lock(&cfs_b->lock); | ||
493 | /* if someone else restarted the timer then we're done */ | ||
494 | if (cfs_b->timer_active) | ||
495 | return; | ||
496 | } | ||
497 | |||
498 | cfs_b->timer_active = 1; | ||
499 | start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); | ||
500 | } | ||
501 | |||
502 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
503 | { | ||
504 | hrtimer_cancel(&cfs_b->period_timer); | ||
505 | hrtimer_cancel(&cfs_b->slack_timer); | ||
506 | } | ||
507 | |||
508 | #ifdef HAVE_JUMP_LABEL | ||
509 | static struct jump_label_key __cfs_bandwidth_used; | ||
510 | |||
511 | static inline bool cfs_bandwidth_used(void) | ||
512 | { | ||
513 | return static_branch(&__cfs_bandwidth_used); | ||
514 | } | ||
515 | |||
516 | static void account_cfs_bandwidth_used(int enabled, int was_enabled) | ||
517 | { | ||
518 | /* only need to count groups transitioning between enabled/!enabled */ | ||
519 | if (enabled && !was_enabled) | ||
520 | jump_label_inc(&__cfs_bandwidth_used); | ||
521 | else if (!enabled && was_enabled) | ||
522 | jump_label_dec(&__cfs_bandwidth_used); | ||
523 | } | ||
524 | #else /* !HAVE_JUMP_LABEL */ | ||
525 | /* static_branch doesn't help unless supported */ | ||
526 | static int cfs_bandwidth_used(void) | ||
527 | { | ||
528 | return 1; | ||
529 | } | ||
530 | static void account_cfs_bandwidth_used(int enabled, int was_enabled) {} | ||
531 | #endif /* HAVE_JUMP_LABEL */ | ||
532 | #else /* !CONFIG_CFS_BANDWIDTH */ | ||
533 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | ||
534 | static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
535 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
536 | |||
537 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
538 | { | ||
539 | return NULL; | ||
540 | } | ||
541 | #endif /* CONFIG_CFS_BANDWIDTH */ | ||
542 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
543 | |||
544 | /* Real-Time classes' related field in a runqueue: */ | ||
545 | struct rt_rq { | ||
546 | struct rt_prio_array active; | ||
547 | unsigned long rt_nr_running; | ||
548 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | ||
549 | struct { | ||
550 | int curr; /* highest queued rt task prio */ | ||
551 | #ifdef CONFIG_SMP | ||
552 | int next; /* next highest */ | ||
553 | #endif | ||
554 | } highest_prio; | ||
555 | #endif | ||
556 | #ifdef CONFIG_SMP | ||
557 | unsigned long rt_nr_migratory; | ||
558 | unsigned long rt_nr_total; | ||
559 | int overloaded; | ||
560 | struct plist_head pushable_tasks; | ||
561 | #endif | ||
562 | int rt_throttled; | ||
563 | u64 rt_time; | ||
564 | u64 rt_runtime; | ||
565 | /* Nests inside the rq lock: */ | ||
566 | raw_spinlock_t rt_runtime_lock; | ||
567 | |||
568 | #ifdef CONFIG_RT_GROUP_SCHED | ||
569 | unsigned long rt_nr_boosted; | ||
570 | |||
571 | struct rq *rq; | ||
572 | struct list_head leaf_rt_rq_list; | ||
573 | struct task_group *tg; | ||
574 | #endif | ||
575 | }; | ||
576 | |||
577 | #ifdef CONFIG_SMP | ||
578 | |||
579 | /* | ||
580 | * We add the notion of a root-domain which will be used to define per-domain | ||
581 | * variables. Each exclusive cpuset essentially defines an island domain by | ||
582 | * fully partitioning the member cpus from any other cpuset. Whenever a new | ||
583 | * exclusive cpuset is created, we also create and attach a new root-domain | ||
584 | * object. | ||
585 | * | ||
586 | */ | ||
587 | struct root_domain { | ||
588 | atomic_t refcount; | ||
589 | atomic_t rto_count; | ||
590 | struct rcu_head rcu; | ||
591 | cpumask_var_t span; | ||
592 | cpumask_var_t online; | ||
593 | |||
594 | /* | ||
595 | * The "RT overload" flag: it gets set if a CPU has more than | ||
596 | * one runnable RT task. | ||
597 | */ | ||
598 | cpumask_var_t rto_mask; | ||
599 | struct cpupri cpupri; | ||
600 | }; | ||
601 | |||
602 | /* | ||
603 | * By default the system creates a single root-domain with all cpus as | ||
604 | * members (mimicking the global state we have today). | ||
605 | */ | ||
606 | static struct root_domain def_root_domain; | ||
607 | |||
608 | #endif /* CONFIG_SMP */ | ||
609 | |||
610 | /* | ||
611 | * This is the main, per-CPU runqueue data structure. | ||
612 | * | ||
613 | * Locking rule: those places that want to lock multiple runqueues | ||
614 | * (such as the load balancing or the thread migration code), lock | ||
615 | * acquire operations must be ordered by ascending &runqueue. | ||
616 | */ | ||
617 | struct rq { | ||
618 | /* runqueue lock: */ | ||
619 | raw_spinlock_t lock; | ||
620 | |||
621 | /* | ||
622 | * nr_running and cpu_load should be in the same cacheline because | ||
623 | * remote CPUs use both these fields when doing load calculation. | ||
624 | */ | ||
625 | unsigned long nr_running; | ||
626 | #define CPU_LOAD_IDX_MAX 5 | ||
627 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | ||
628 | unsigned long last_load_update_tick; | ||
629 | #ifdef CONFIG_NO_HZ | ||
630 | u64 nohz_stamp; | ||
631 | unsigned char nohz_balance_kick; | ||
632 | #endif | ||
633 | int skip_clock_update; | ||
634 | |||
635 | /* capture load from *all* tasks on this cpu: */ | ||
636 | struct load_weight load; | ||
637 | unsigned long nr_load_updates; | ||
638 | u64 nr_switches; | ||
639 | |||
640 | struct cfs_rq cfs; | ||
641 | struct rt_rq rt; | ||
642 | |||
643 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
644 | /* list of leaf cfs_rq on this cpu: */ | ||
645 | struct list_head leaf_cfs_rq_list; | ||
646 | #endif | ||
647 | #ifdef CONFIG_RT_GROUP_SCHED | ||
648 | struct list_head leaf_rt_rq_list; | ||
649 | #endif | ||
650 | |||
651 | /* | ||
652 | * This is part of a global counter where only the total sum | ||
653 | * over all CPUs matters. A task can increase this counter on | ||
654 | * one CPU and if it got migrated afterwards it may decrease | ||
655 | * it on another CPU. Always updated under the runqueue lock: | ||
656 | */ | ||
657 | unsigned long nr_uninterruptible; | ||
658 | |||
659 | struct task_struct *curr, *idle, *stop; | ||
660 | unsigned long next_balance; | ||
661 | struct mm_struct *prev_mm; | ||
662 | |||
663 | u64 clock; | ||
664 | u64 clock_task; | ||
665 | |||
666 | atomic_t nr_iowait; | ||
667 | |||
668 | #ifdef CONFIG_SMP | ||
669 | struct root_domain *rd; | ||
670 | struct sched_domain *sd; | ||
671 | |||
672 | unsigned long cpu_power; | ||
673 | |||
674 | unsigned char idle_balance; | ||
675 | /* For active balancing */ | ||
676 | int post_schedule; | ||
677 | int active_balance; | ||
678 | int push_cpu; | ||
679 | struct cpu_stop_work active_balance_work; | ||
680 | /* cpu of this runqueue: */ | ||
681 | int cpu; | ||
682 | int online; | ||
683 | |||
684 | u64 rt_avg; | ||
685 | u64 age_stamp; | ||
686 | u64 idle_stamp; | ||
687 | u64 avg_idle; | ||
688 | #endif | ||
689 | |||
690 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
691 | u64 prev_irq_time; | ||
692 | #endif | ||
693 | #ifdef CONFIG_PARAVIRT | ||
694 | u64 prev_steal_time; | ||
695 | #endif | ||
696 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | ||
697 | u64 prev_steal_time_rq; | ||
698 | #endif | ||
699 | |||
700 | /* calc_load related fields */ | ||
701 | unsigned long calc_load_update; | ||
702 | long calc_load_active; | ||
703 | |||
704 | #ifdef CONFIG_SCHED_HRTICK | ||
705 | #ifdef CONFIG_SMP | ||
706 | int hrtick_csd_pending; | ||
707 | struct call_single_data hrtick_csd; | ||
708 | #endif | ||
709 | struct hrtimer hrtick_timer; | ||
710 | #endif | ||
711 | |||
712 | #ifdef CONFIG_SCHEDSTATS | ||
713 | /* latency stats */ | ||
714 | struct sched_info rq_sched_info; | ||
715 | unsigned long long rq_cpu_time; | ||
716 | /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ | ||
717 | |||
718 | /* sys_sched_yield() stats */ | ||
719 | unsigned int yld_count; | ||
720 | |||
721 | /* schedule() stats */ | ||
722 | unsigned int sched_switch; | ||
723 | unsigned int sched_count; | ||
724 | unsigned int sched_goidle; | ||
725 | |||
726 | /* try_to_wake_up() stats */ | ||
727 | unsigned int ttwu_count; | ||
728 | unsigned int ttwu_local; | ||
729 | #endif | ||
730 | |||
731 | #ifdef CONFIG_SMP | ||
732 | struct llist_head wake_list; | ||
733 | #endif | ||
734 | }; | ||
735 | |||
736 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | ||
737 | |||
738 | |||
739 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); | ||
740 | |||
741 | static inline int cpu_of(struct rq *rq) | ||
742 | { | ||
743 | #ifdef CONFIG_SMP | ||
744 | return rq->cpu; | ||
745 | #else | ||
746 | return 0; | ||
747 | #endif | ||
748 | } | ||
749 | |||
750 | #define rcu_dereference_check_sched_domain(p) \ | ||
751 | rcu_dereference_check((p), \ | ||
752 | lockdep_is_held(&sched_domains_mutex)) | ||
753 | |||
754 | /* | ||
755 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. | ||
756 | * See detach_destroy_domains: synchronize_sched for details. | ||
757 | * | ||
758 | * The domain tree of any CPU may only be accessed from within | ||
759 | * preempt-disabled sections. | ||
760 | */ | ||
761 | #define for_each_domain(cpu, __sd) \ | ||
762 | for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) | ||
763 | |||
764 | #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) | ||
765 | #define this_rq() (&__get_cpu_var(runqueues)) | ||
766 | #define task_rq(p) cpu_rq(task_cpu(p)) | ||
767 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | ||
768 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) | ||
769 | |||
770 | #ifdef CONFIG_CGROUP_SCHED | ||
771 | |||
772 | /* | ||
773 | * Return the group to which this tasks belongs. | ||
774 | * | ||
775 | * We use task_subsys_state_check() and extend the RCU verification with | ||
776 | * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each | ||
777 | * task it moves into the cgroup. Therefore by holding either of those locks, | ||
778 | * we pin the task to the current cgroup. | ||
779 | */ | ||
780 | static inline struct task_group *task_group(struct task_struct *p) | ||
781 | { | ||
782 | struct task_group *tg; | ||
783 | struct cgroup_subsys_state *css; | ||
784 | |||
785 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | ||
786 | lockdep_is_held(&p->pi_lock) || | ||
787 | lockdep_is_held(&task_rq(p)->lock)); | ||
788 | tg = container_of(css, struct task_group, css); | ||
789 | |||
790 | return autogroup_task_group(p, tg); | ||
791 | } | ||
792 | |||
793 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
794 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
795 | { | ||
796 | #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED) | ||
797 | struct task_group *tg = task_group(p); | ||
798 | #endif | ||
799 | |||
800 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
801 | p->se.cfs_rq = tg->cfs_rq[cpu]; | ||
802 | p->se.parent = tg->se[cpu]; | ||
803 | #endif | ||
804 | |||
805 | #ifdef CONFIG_RT_GROUP_SCHED | ||
806 | p->rt.rt_rq = tg->rt_rq[cpu]; | ||
807 | p->rt.parent = tg->rt_se[cpu]; | ||
808 | #endif | ||
809 | } | ||
810 | |||
811 | #else /* CONFIG_CGROUP_SCHED */ | ||
812 | |||
813 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
814 | static inline struct task_group *task_group(struct task_struct *p) | ||
815 | { | ||
816 | return NULL; | ||
817 | } | ||
818 | |||
819 | #endif /* CONFIG_CGROUP_SCHED */ | ||
820 | 109 | ||
821 | static void update_rq_clock_task(struct rq *rq, s64 delta); | 110 | static void update_rq_clock_task(struct rq *rq, s64 delta); |
822 | 111 | ||
823 | static void update_rq_clock(struct rq *rq) | 112 | void update_rq_clock(struct rq *rq) |
824 | { | 113 | { |
825 | s64 delta; | 114 | s64 delta; |
826 | 115 | ||
@@ -833,40 +122,10 @@ static void update_rq_clock(struct rq *rq) | |||
833 | } | 122 | } |
834 | 123 | ||
835 | /* | 124 | /* |
836 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: | ||
837 | */ | ||
838 | #ifdef CONFIG_SCHED_DEBUG | ||
839 | # define const_debug __read_mostly | ||
840 | #else | ||
841 | # define const_debug static const | ||
842 | #endif | ||
843 | |||
844 | /** | ||
845 | * runqueue_is_locked - Returns true if the current cpu runqueue is locked | ||
846 | * @cpu: the processor in question. | ||
847 | * | ||
848 | * This interface allows printk to be called with the runqueue lock | ||
849 | * held and know whether or not it is OK to wake up the klogd. | ||
850 | */ | ||
851 | int runqueue_is_locked(int cpu) | ||
852 | { | ||
853 | return raw_spin_is_locked(&cpu_rq(cpu)->lock); | ||
854 | } | ||
855 | |||
856 | /* | ||
857 | * Debugging: various feature bits | 125 | * Debugging: various feature bits |
858 | */ | 126 | */ |
859 | 127 | ||
860 | #define SCHED_FEAT(name, enabled) \ | 128 | #define SCHED_FEAT(name, enabled) \ |
861 | __SCHED_FEAT_##name , | ||
862 | |||
863 | enum { | ||
864 | #include "sched_features.h" | ||
865 | }; | ||
866 | |||
867 | #undef SCHED_FEAT | ||
868 | |||
869 | #define SCHED_FEAT(name, enabled) \ | ||
870 | (1UL << __SCHED_FEAT_##name) * enabled | | 129 | (1UL << __SCHED_FEAT_##name) * enabled | |
871 | 130 | ||
872 | const_debug unsigned int sysctl_sched_features = | 131 | const_debug unsigned int sysctl_sched_features = |
@@ -965,8 +224,6 @@ late_initcall(sched_init_debug); | |||
965 | 224 | ||
966 | #endif | 225 | #endif |
967 | 226 | ||
968 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) | ||
969 | |||
970 | /* | 227 | /* |
971 | * Number of tasks to iterate in a single balance run. | 228 | * Number of tasks to iterate in a single balance run. |
972 | * Limited because this is done with IRQs disabled. | 229 | * Limited because this is done with IRQs disabled. |
@@ -987,7 +244,7 @@ const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC; | |||
987 | */ | 244 | */ |
988 | unsigned int sysctl_sched_rt_period = 1000000; | 245 | unsigned int sysctl_sched_rt_period = 1000000; |
989 | 246 | ||
990 | static __read_mostly int scheduler_running; | 247 | __read_mostly int scheduler_running; |
991 | 248 | ||
992 | /* | 249 | /* |
993 | * part of the period that we allow rt tasks to run in us. | 250 | * part of the period that we allow rt tasks to run in us. |
@@ -995,112 +252,7 @@ static __read_mostly int scheduler_running; | |||
995 | */ | 252 | */ |
996 | int sysctl_sched_rt_runtime = 950000; | 253 | int sysctl_sched_rt_runtime = 950000; |
997 | 254 | ||
998 | static inline u64 global_rt_period(void) | ||
999 | { | ||
1000 | return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; | ||
1001 | } | ||
1002 | 255 | ||
1003 | static inline u64 global_rt_runtime(void) | ||
1004 | { | ||
1005 | if (sysctl_sched_rt_runtime < 0) | ||
1006 | return RUNTIME_INF; | ||
1007 | |||
1008 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
1009 | } | ||
1010 | |||
1011 | #ifndef prepare_arch_switch | ||
1012 | # define prepare_arch_switch(next) do { } while (0) | ||
1013 | #endif | ||
1014 | #ifndef finish_arch_switch | ||
1015 | # define finish_arch_switch(prev) do { } while (0) | ||
1016 | #endif | ||
1017 | |||
1018 | static inline int task_current(struct rq *rq, struct task_struct *p) | ||
1019 | { | ||
1020 | return rq->curr == p; | ||
1021 | } | ||
1022 | |||
1023 | static inline int task_running(struct rq *rq, struct task_struct *p) | ||
1024 | { | ||
1025 | #ifdef CONFIG_SMP | ||
1026 | return p->on_cpu; | ||
1027 | #else | ||
1028 | return task_current(rq, p); | ||
1029 | #endif | ||
1030 | } | ||
1031 | |||
1032 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
1033 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | ||
1034 | { | ||
1035 | #ifdef CONFIG_SMP | ||
1036 | /* | ||
1037 | * We can optimise this out completely for !SMP, because the | ||
1038 | * SMP rebalancing from interrupt is the only thing that cares | ||
1039 | * here. | ||
1040 | */ | ||
1041 | next->on_cpu = 1; | ||
1042 | #endif | ||
1043 | } | ||
1044 | |||
1045 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | ||
1046 | { | ||
1047 | #ifdef CONFIG_SMP | ||
1048 | /* | ||
1049 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
1050 | * We must ensure this doesn't happen until the switch is completely | ||
1051 | * finished. | ||
1052 | */ | ||
1053 | smp_wmb(); | ||
1054 | prev->on_cpu = 0; | ||
1055 | #endif | ||
1056 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
1057 | /* this is a valid case when another task releases the spinlock */ | ||
1058 | rq->lock.owner = current; | ||
1059 | #endif | ||
1060 | /* | ||
1061 | * If we are tracking spinlock dependencies then we have to | ||
1062 | * fix up the runqueue lock - which gets 'carried over' from | ||
1063 | * prev into current: | ||
1064 | */ | ||
1065 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); | ||
1066 | |||
1067 | raw_spin_unlock_irq(&rq->lock); | ||
1068 | } | ||
1069 | |||
1070 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | ||
1071 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | ||
1072 | { | ||
1073 | #ifdef CONFIG_SMP | ||
1074 | /* | ||
1075 | * We can optimise this out completely for !SMP, because the | ||
1076 | * SMP rebalancing from interrupt is the only thing that cares | ||
1077 | * here. | ||
1078 | */ | ||
1079 | next->on_cpu = 1; | ||
1080 | #endif | ||
1081 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
1082 | raw_spin_unlock_irq(&rq->lock); | ||
1083 | #else | ||
1084 | raw_spin_unlock(&rq->lock); | ||
1085 | #endif | ||
1086 | } | ||
1087 | |||
1088 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | ||
1089 | { | ||
1090 | #ifdef CONFIG_SMP | ||
1091 | /* | ||
1092 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
1093 | * We must ensure this doesn't happen until the switch is completely | ||
1094 | * finished. | ||
1095 | */ | ||
1096 | smp_wmb(); | ||
1097 | prev->on_cpu = 0; | ||
1098 | #endif | ||
1099 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
1100 | local_irq_enable(); | ||
1101 | #endif | ||
1102 | } | ||
1103 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | ||
1104 | 256 | ||
1105 | /* | 257 | /* |
1106 | * __task_rq_lock - lock the rq @p resides on. | 258 | * __task_rq_lock - lock the rq @p resides on. |
@@ -1183,20 +335,6 @@ static struct rq *this_rq_lock(void) | |||
1183 | * rq->lock. | 335 | * rq->lock. |
1184 | */ | 336 | */ |
1185 | 337 | ||
1186 | /* | ||
1187 | * Use hrtick when: | ||
1188 | * - enabled by features | ||
1189 | * - hrtimer is actually high res | ||
1190 | */ | ||
1191 | static inline int hrtick_enabled(struct rq *rq) | ||
1192 | { | ||
1193 | if (!sched_feat(HRTICK)) | ||
1194 | return 0; | ||
1195 | if (!cpu_active(cpu_of(rq))) | ||
1196 | return 0; | ||
1197 | return hrtimer_is_hres_active(&rq->hrtick_timer); | ||
1198 | } | ||
1199 | |||
1200 | static void hrtick_clear(struct rq *rq) | 338 | static void hrtick_clear(struct rq *rq) |
1201 | { | 339 | { |
1202 | if (hrtimer_active(&rq->hrtick_timer)) | 340 | if (hrtimer_active(&rq->hrtick_timer)) |
@@ -1240,7 +378,7 @@ static void __hrtick_start(void *arg) | |||
1240 | * | 378 | * |
1241 | * called with rq->lock held and irqs disabled | 379 | * called with rq->lock held and irqs disabled |
1242 | */ | 380 | */ |
1243 | static void hrtick_start(struct rq *rq, u64 delay) | 381 | void hrtick_start(struct rq *rq, u64 delay) |
1244 | { | 382 | { |
1245 | struct hrtimer *timer = &rq->hrtick_timer; | 383 | struct hrtimer *timer = &rq->hrtick_timer; |
1246 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); | 384 | ktime_t time = ktime_add_ns(timer->base->get_time(), delay); |
@@ -1284,7 +422,7 @@ static __init void init_hrtick(void) | |||
1284 | * | 422 | * |
1285 | * called with rq->lock held and irqs disabled | 423 | * called with rq->lock held and irqs disabled |
1286 | */ | 424 | */ |
1287 | static void hrtick_start(struct rq *rq, u64 delay) | 425 | void hrtick_start(struct rq *rq, u64 delay) |
1288 | { | 426 | { |
1289 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, | 427 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, |
1290 | HRTIMER_MODE_REL_PINNED, 0); | 428 | HRTIMER_MODE_REL_PINNED, 0); |
@@ -1335,7 +473,7 @@ static inline void init_hrtick(void) | |||
1335 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) | 473 | #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) |
1336 | #endif | 474 | #endif |
1337 | 475 | ||
1338 | static void resched_task(struct task_struct *p) | 476 | void resched_task(struct task_struct *p) |
1339 | { | 477 | { |
1340 | int cpu; | 478 | int cpu; |
1341 | 479 | ||
@@ -1356,7 +494,7 @@ static void resched_task(struct task_struct *p) | |||
1356 | smp_send_reschedule(cpu); | 494 | smp_send_reschedule(cpu); |
1357 | } | 495 | } |
1358 | 496 | ||
1359 | static void resched_cpu(int cpu) | 497 | void resched_cpu(int cpu) |
1360 | { | 498 | { |
1361 | struct rq *rq = cpu_rq(cpu); | 499 | struct rq *rq = cpu_rq(cpu); |
1362 | unsigned long flags; | 500 | unsigned long flags; |
@@ -1449,12 +587,7 @@ static inline bool got_nohz_idle_kick(void) | |||
1449 | 587 | ||
1450 | #endif /* CONFIG_NO_HZ */ | 588 | #endif /* CONFIG_NO_HZ */ |
1451 | 589 | ||
1452 | static u64 sched_avg_period(void) | 590 | void sched_avg_update(struct rq *rq) |
1453 | { | ||
1454 | return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; | ||
1455 | } | ||
1456 | |||
1457 | static void sched_avg_update(struct rq *rq) | ||
1458 | { | 591 | { |
1459 | s64 period = sched_avg_period(); | 592 | s64 period = sched_avg_period(); |
1460 | 593 | ||
@@ -1470,193 +603,23 @@ static void sched_avg_update(struct rq *rq) | |||
1470 | } | 603 | } |
1471 | } | 604 | } |
1472 | 605 | ||
1473 | static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | ||
1474 | { | ||
1475 | rq->rt_avg += rt_delta; | ||
1476 | sched_avg_update(rq); | ||
1477 | } | ||
1478 | |||
1479 | #else /* !CONFIG_SMP */ | 606 | #else /* !CONFIG_SMP */ |
1480 | static void resched_task(struct task_struct *p) | 607 | void resched_task(struct task_struct *p) |
1481 | { | 608 | { |
1482 | assert_raw_spin_locked(&task_rq(p)->lock); | 609 | assert_raw_spin_locked(&task_rq(p)->lock); |
1483 | set_tsk_need_resched(p); | 610 | set_tsk_need_resched(p); |
1484 | } | 611 | } |
1485 | |||
1486 | static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | ||
1487 | { | ||
1488 | } | ||
1489 | |||
1490 | static void sched_avg_update(struct rq *rq) | ||
1491 | { | ||
1492 | } | ||
1493 | #endif /* CONFIG_SMP */ | 612 | #endif /* CONFIG_SMP */ |
1494 | 613 | ||
1495 | #if BITS_PER_LONG == 32 | ||
1496 | # define WMULT_CONST (~0UL) | ||
1497 | #else | ||
1498 | # define WMULT_CONST (1UL << 32) | ||
1499 | #endif | ||
1500 | |||
1501 | #define WMULT_SHIFT 32 | ||
1502 | |||
1503 | /* | ||
1504 | * Shift right and round: | ||
1505 | */ | ||
1506 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | ||
1507 | |||
1508 | /* | ||
1509 | * delta *= weight / lw | ||
1510 | */ | ||
1511 | static unsigned long | ||
1512 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | ||
1513 | struct load_weight *lw) | ||
1514 | { | ||
1515 | u64 tmp; | ||
1516 | |||
1517 | /* | ||
1518 | * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched | ||
1519 | * entities since MIN_SHARES = 2. Treat weight as 1 if less than | ||
1520 | * 2^SCHED_LOAD_RESOLUTION. | ||
1521 | */ | ||
1522 | if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION))) | ||
1523 | tmp = (u64)delta_exec * scale_load_down(weight); | ||
1524 | else | ||
1525 | tmp = (u64)delta_exec; | ||
1526 | |||
1527 | if (!lw->inv_weight) { | ||
1528 | unsigned long w = scale_load_down(lw->weight); | ||
1529 | |||
1530 | if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST)) | ||
1531 | lw->inv_weight = 1; | ||
1532 | else if (unlikely(!w)) | ||
1533 | lw->inv_weight = WMULT_CONST; | ||
1534 | else | ||
1535 | lw->inv_weight = WMULT_CONST / w; | ||
1536 | } | ||
1537 | |||
1538 | /* | ||
1539 | * Check whether we'd overflow the 64-bit multiplication: | ||
1540 | */ | ||
1541 | if (unlikely(tmp > WMULT_CONST)) | ||
1542 | tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, | ||
1543 | WMULT_SHIFT/2); | ||
1544 | else | ||
1545 | tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); | ||
1546 | |||
1547 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | ||
1548 | } | ||
1549 | |||
1550 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | ||
1551 | { | ||
1552 | lw->weight += inc; | ||
1553 | lw->inv_weight = 0; | ||
1554 | } | ||
1555 | |||
1556 | static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | ||
1557 | { | ||
1558 | lw->weight -= dec; | ||
1559 | lw->inv_weight = 0; | ||
1560 | } | ||
1561 | |||
1562 | static inline void update_load_set(struct load_weight *lw, unsigned long w) | ||
1563 | { | ||
1564 | lw->weight = w; | ||
1565 | lw->inv_weight = 0; | ||
1566 | } | ||
1567 | |||
1568 | /* | ||
1569 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | ||
1570 | * of tasks with abnormal "nice" values across CPUs the contribution that | ||
1571 | * each task makes to its run queue's load is weighted according to its | ||
1572 | * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a | ||
1573 | * scaled version of the new time slice allocation that they receive on time | ||
1574 | * slice expiry etc. | ||
1575 | */ | ||
1576 | |||
1577 | #define WEIGHT_IDLEPRIO 3 | ||
1578 | #define WMULT_IDLEPRIO 1431655765 | ||
1579 | |||
1580 | /* | ||
1581 | * Nice levels are multiplicative, with a gentle 10% change for every | ||
1582 | * nice level changed. I.e. when a CPU-bound task goes from nice 0 to | ||
1583 | * nice 1, it will get ~10% less CPU time than another CPU-bound task | ||
1584 | * that remained on nice 0. | ||
1585 | * | ||
1586 | * The "10% effect" is relative and cumulative: from _any_ nice level, | ||
1587 | * if you go up 1 level, it's -10% CPU usage, if you go down 1 level | ||
1588 | * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. | ||
1589 | * If a task goes up by ~10% and another task goes down by ~10% then | ||
1590 | * the relative distance between them is ~25%.) | ||
1591 | */ | ||
1592 | static const int prio_to_weight[40] = { | ||
1593 | /* -20 */ 88761, 71755, 56483, 46273, 36291, | ||
1594 | /* -15 */ 29154, 23254, 18705, 14949, 11916, | ||
1595 | /* -10 */ 9548, 7620, 6100, 4904, 3906, | ||
1596 | /* -5 */ 3121, 2501, 1991, 1586, 1277, | ||
1597 | /* 0 */ 1024, 820, 655, 526, 423, | ||
1598 | /* 5 */ 335, 272, 215, 172, 137, | ||
1599 | /* 10 */ 110, 87, 70, 56, 45, | ||
1600 | /* 15 */ 36, 29, 23, 18, 15, | ||
1601 | }; | ||
1602 | |||
1603 | /* | ||
1604 | * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated. | ||
1605 | * | ||
1606 | * In cases where the weight does not change often, we can use the | ||
1607 | * precalculated inverse to speed up arithmetics by turning divisions | ||
1608 | * into multiplications: | ||
1609 | */ | ||
1610 | static const u32 prio_to_wmult[40] = { | ||
1611 | /* -20 */ 48388, 59856, 76040, 92818, 118348, | ||
1612 | /* -15 */ 147320, 184698, 229616, 287308, 360437, | ||
1613 | /* -10 */ 449829, 563644, 704093, 875809, 1099582, | ||
1614 | /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, | ||
1615 | /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, | ||
1616 | /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, | ||
1617 | /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, | ||
1618 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, | ||
1619 | }; | ||
1620 | |||
1621 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
1622 | enum cpuacct_stat_index { | ||
1623 | CPUACCT_STAT_USER, /* ... user mode */ | ||
1624 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
1625 | |||
1626 | CPUACCT_STAT_NSTATS, | ||
1627 | }; | ||
1628 | |||
1629 | #ifdef CONFIG_CGROUP_CPUACCT | ||
1630 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | ||
1631 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
1632 | enum cpuacct_stat_index idx, cputime_t val); | ||
1633 | #else | ||
1634 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | ||
1635 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
1636 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
1637 | #endif | ||
1638 | |||
1639 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | ||
1640 | { | ||
1641 | update_load_add(&rq->load, load); | ||
1642 | } | ||
1643 | |||
1644 | static inline void dec_cpu_load(struct rq *rq, unsigned long load) | ||
1645 | { | ||
1646 | update_load_sub(&rq->load, load); | ||
1647 | } | ||
1648 | |||
1649 | #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ | 614 | #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ |
1650 | (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) | 615 | (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) |
1651 | typedef int (*tg_visitor)(struct task_group *, void *); | ||
1652 | |||
1653 | /* | 616 | /* |
1654 | * Iterate task_group tree rooted at *from, calling @down when first entering a | 617 | * Iterate task_group tree rooted at *from, calling @down when first entering a |
1655 | * node and @up when leaving it for the final time. | 618 | * node and @up when leaving it for the final time. |
1656 | * | 619 | * |
1657 | * Caller must hold rcu_lock or sufficient equivalent. | 620 | * Caller must hold rcu_lock or sufficient equivalent. |
1658 | */ | 621 | */ |
1659 | static int walk_tg_tree_from(struct task_group *from, | 622 | int walk_tg_tree_from(struct task_group *from, |
1660 | tg_visitor down, tg_visitor up, void *data) | 623 | tg_visitor down, tg_visitor up, void *data) |
1661 | { | 624 | { |
1662 | struct task_group *parent, *child; | 625 | struct task_group *parent, *child; |
@@ -1687,270 +650,13 @@ out: | |||
1687 | return ret; | 650 | return ret; |
1688 | } | 651 | } |
1689 | 652 | ||
1690 | /* | 653 | int tg_nop(struct task_group *tg, void *data) |
1691 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
1692 | * leaving it for the final time. | ||
1693 | * | ||
1694 | * Caller must hold rcu_lock or sufficient equivalent. | ||
1695 | */ | ||
1696 | |||
1697 | static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) | ||
1698 | { | ||
1699 | return walk_tg_tree_from(&root_task_group, down, up, data); | ||
1700 | } | ||
1701 | |||
1702 | static int tg_nop(struct task_group *tg, void *data) | ||
1703 | { | 654 | { |
1704 | return 0; | 655 | return 0; |
1705 | } | 656 | } |
1706 | #endif | 657 | #endif |
1707 | 658 | ||
1708 | #ifdef CONFIG_SMP | 659 | void update_cpu_load(struct rq *this_rq); |
1709 | /* Used instead of source_load when we know the type == 0 */ | ||
1710 | static unsigned long weighted_cpuload(const int cpu) | ||
1711 | { | ||
1712 | return cpu_rq(cpu)->load.weight; | ||
1713 | } | ||
1714 | |||
1715 | /* | ||
1716 | * Return a low guess at the load of a migration-source cpu weighted | ||
1717 | * according to the scheduling class and "nice" value. | ||
1718 | * | ||
1719 | * We want to under-estimate the load of migration sources, to | ||
1720 | * balance conservatively. | ||
1721 | */ | ||
1722 | static unsigned long source_load(int cpu, int type) | ||
1723 | { | ||
1724 | struct rq *rq = cpu_rq(cpu); | ||
1725 | unsigned long total = weighted_cpuload(cpu); | ||
1726 | |||
1727 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1728 | return total; | ||
1729 | |||
1730 | return min(rq->cpu_load[type-1], total); | ||
1731 | } | ||
1732 | |||
1733 | /* | ||
1734 | * Return a high guess at the load of a migration-target cpu weighted | ||
1735 | * according to the scheduling class and "nice" value. | ||
1736 | */ | ||
1737 | static unsigned long target_load(int cpu, int type) | ||
1738 | { | ||
1739 | struct rq *rq = cpu_rq(cpu); | ||
1740 | unsigned long total = weighted_cpuload(cpu); | ||
1741 | |||
1742 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1743 | return total; | ||
1744 | |||
1745 | return max(rq->cpu_load[type-1], total); | ||
1746 | } | ||
1747 | |||
1748 | static unsigned long power_of(int cpu) | ||
1749 | { | ||
1750 | return cpu_rq(cpu)->cpu_power; | ||
1751 | } | ||
1752 | |||
1753 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | ||
1754 | |||
1755 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
1756 | { | ||
1757 | struct rq *rq = cpu_rq(cpu); | ||
1758 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); | ||
1759 | |||
1760 | if (nr_running) | ||
1761 | return rq->load.weight / nr_running; | ||
1762 | |||
1763 | return 0; | ||
1764 | } | ||
1765 | |||
1766 | #ifdef CONFIG_PREEMPT | ||
1767 | |||
1768 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
1769 | |||
1770 | /* | ||
1771 | * fair double_lock_balance: Safely acquires both rq->locks in a fair | ||
1772 | * way at the expense of forcing extra atomic operations in all | ||
1773 | * invocations. This assures that the double_lock is acquired using the | ||
1774 | * same underlying policy as the spinlock_t on this architecture, which | ||
1775 | * reduces latency compared to the unfair variant below. However, it | ||
1776 | * also adds more overhead and therefore may reduce throughput. | ||
1777 | */ | ||
1778 | static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1779 | __releases(this_rq->lock) | ||
1780 | __acquires(busiest->lock) | ||
1781 | __acquires(this_rq->lock) | ||
1782 | { | ||
1783 | raw_spin_unlock(&this_rq->lock); | ||
1784 | double_rq_lock(this_rq, busiest); | ||
1785 | |||
1786 | return 1; | ||
1787 | } | ||
1788 | |||
1789 | #else | ||
1790 | /* | ||
1791 | * Unfair double_lock_balance: Optimizes throughput at the expense of | ||
1792 | * latency by eliminating extra atomic operations when the locks are | ||
1793 | * already in proper order on entry. This favors lower cpu-ids and will | ||
1794 | * grant the double lock to lower cpus over higher ids under contention, | ||
1795 | * regardless of entry order into the function. | ||
1796 | */ | ||
1797 | static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1798 | __releases(this_rq->lock) | ||
1799 | __acquires(busiest->lock) | ||
1800 | __acquires(this_rq->lock) | ||
1801 | { | ||
1802 | int ret = 0; | ||
1803 | |||
1804 | if (unlikely(!raw_spin_trylock(&busiest->lock))) { | ||
1805 | if (busiest < this_rq) { | ||
1806 | raw_spin_unlock(&this_rq->lock); | ||
1807 | raw_spin_lock(&busiest->lock); | ||
1808 | raw_spin_lock_nested(&this_rq->lock, | ||
1809 | SINGLE_DEPTH_NESTING); | ||
1810 | ret = 1; | ||
1811 | } else | ||
1812 | raw_spin_lock_nested(&busiest->lock, | ||
1813 | SINGLE_DEPTH_NESTING); | ||
1814 | } | ||
1815 | return ret; | ||
1816 | } | ||
1817 | |||
1818 | #endif /* CONFIG_PREEMPT */ | ||
1819 | |||
1820 | /* | ||
1821 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
1822 | */ | ||
1823 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
1824 | { | ||
1825 | if (unlikely(!irqs_disabled())) { | ||
1826 | /* printk() doesn't work good under rq->lock */ | ||
1827 | raw_spin_unlock(&this_rq->lock); | ||
1828 | BUG_ON(1); | ||
1829 | } | ||
1830 | |||
1831 | return _double_lock_balance(this_rq, busiest); | ||
1832 | } | ||
1833 | |||
1834 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | ||
1835 | __releases(busiest->lock) | ||
1836 | { | ||
1837 | raw_spin_unlock(&busiest->lock); | ||
1838 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | ||
1839 | } | ||
1840 | |||
1841 | /* | ||
1842 | * double_rq_lock - safely lock two runqueues | ||
1843 | * | ||
1844 | * Note this does not disable interrupts like task_rq_lock, | ||
1845 | * you need to do so manually before calling. | ||
1846 | */ | ||
1847 | static void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
1848 | __acquires(rq1->lock) | ||
1849 | __acquires(rq2->lock) | ||
1850 | { | ||
1851 | BUG_ON(!irqs_disabled()); | ||
1852 | if (rq1 == rq2) { | ||
1853 | raw_spin_lock(&rq1->lock); | ||
1854 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
1855 | } else { | ||
1856 | if (rq1 < rq2) { | ||
1857 | raw_spin_lock(&rq1->lock); | ||
1858 | raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); | ||
1859 | } else { | ||
1860 | raw_spin_lock(&rq2->lock); | ||
1861 | raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); | ||
1862 | } | ||
1863 | } | ||
1864 | } | ||
1865 | |||
1866 | /* | ||
1867 | * double_rq_unlock - safely unlock two runqueues | ||
1868 | * | ||
1869 | * Note this does not restore interrupts like task_rq_unlock, | ||
1870 | * you need to do so manually after calling. | ||
1871 | */ | ||
1872 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
1873 | __releases(rq1->lock) | ||
1874 | __releases(rq2->lock) | ||
1875 | { | ||
1876 | raw_spin_unlock(&rq1->lock); | ||
1877 | if (rq1 != rq2) | ||
1878 | raw_spin_unlock(&rq2->lock); | ||
1879 | else | ||
1880 | __release(rq2->lock); | ||
1881 | } | ||
1882 | |||
1883 | #else /* CONFIG_SMP */ | ||
1884 | |||
1885 | /* | ||
1886 | * double_rq_lock - safely lock two runqueues | ||
1887 | * | ||
1888 | * Note this does not disable interrupts like task_rq_lock, | ||
1889 | * you need to do so manually before calling. | ||
1890 | */ | ||
1891 | static void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
1892 | __acquires(rq1->lock) | ||
1893 | __acquires(rq2->lock) | ||
1894 | { | ||
1895 | BUG_ON(!irqs_disabled()); | ||
1896 | BUG_ON(rq1 != rq2); | ||
1897 | raw_spin_lock(&rq1->lock); | ||
1898 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
1899 | } | ||
1900 | |||
1901 | /* | ||
1902 | * double_rq_unlock - safely unlock two runqueues | ||
1903 | * | ||
1904 | * Note this does not restore interrupts like task_rq_unlock, | ||
1905 | * you need to do so manually after calling. | ||
1906 | */ | ||
1907 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
1908 | __releases(rq1->lock) | ||
1909 | __releases(rq2->lock) | ||
1910 | { | ||
1911 | BUG_ON(rq1 != rq2); | ||
1912 | raw_spin_unlock(&rq1->lock); | ||
1913 | __release(rq2->lock); | ||
1914 | } | ||
1915 | |||
1916 | #endif | ||
1917 | |||
1918 | static void calc_load_account_idle(struct rq *this_rq); | ||
1919 | static void update_sysctl(void); | ||
1920 | static int get_update_sysctl_factor(void); | ||
1921 | static void update_cpu_load(struct rq *this_rq); | ||
1922 | |||
1923 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1924 | { | ||
1925 | set_task_rq(p, cpu); | ||
1926 | #ifdef CONFIG_SMP | ||
1927 | /* | ||
1928 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1929 | * successfully executed on another CPU. We must ensure that updates of | ||
1930 | * per-task data have been completed by this moment. | ||
1931 | */ | ||
1932 | smp_wmb(); | ||
1933 | task_thread_info(p)->cpu = cpu; | ||
1934 | #endif | ||
1935 | } | ||
1936 | |||
1937 | static const struct sched_class rt_sched_class; | ||
1938 | |||
1939 | #define sched_class_highest (&stop_sched_class) | ||
1940 | #define for_each_class(class) \ | ||
1941 | for (class = sched_class_highest; class; class = class->next) | ||
1942 | |||
1943 | #include "sched_stats.h" | ||
1944 | |||
1945 | static void inc_nr_running(struct rq *rq) | ||
1946 | { | ||
1947 | rq->nr_running++; | ||
1948 | } | ||
1949 | |||
1950 | static void dec_nr_running(struct rq *rq) | ||
1951 | { | ||
1952 | rq->nr_running--; | ||
1953 | } | ||
1954 | 660 | ||
1955 | static void set_load_weight(struct task_struct *p) | 661 | static void set_load_weight(struct task_struct *p) |
1956 | { | 662 | { |
@@ -1987,7 +693,7 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1987 | /* | 693 | /* |
1988 | * activate_task - move a task to the runqueue. | 694 | * activate_task - move a task to the runqueue. |
1989 | */ | 695 | */ |
1990 | static void activate_task(struct rq *rq, struct task_struct *p, int flags) | 696 | void activate_task(struct rq *rq, struct task_struct *p, int flags) |
1991 | { | 697 | { |
1992 | if (task_contributes_to_load(p)) | 698 | if (task_contributes_to_load(p)) |
1993 | rq->nr_uninterruptible--; | 699 | rq->nr_uninterruptible--; |
@@ -1998,7 +704,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1998 | /* | 704 | /* |
1999 | * deactivate_task - remove a task from the runqueue. | 705 | * deactivate_task - remove a task from the runqueue. |
2000 | */ | 706 | */ |
2001 | static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | 707 | void deactivate_task(struct rq *rq, struct task_struct *p, int flags) |
2002 | { | 708 | { |
2003 | if (task_contributes_to_load(p)) | 709 | if (task_contributes_to_load(p)) |
2004 | rq->nr_uninterruptible++; | 710 | rq->nr_uninterruptible++; |
@@ -2223,15 +929,6 @@ static int irqtime_account_si_update(void) | |||
2223 | 929 | ||
2224 | #endif | 930 | #endif |
2225 | 931 | ||
2226 | #include "sched_idletask.c" | ||
2227 | #include "sched_fair.c" | ||
2228 | #include "sched_rt.c" | ||
2229 | #include "sched_autogroup.c" | ||
2230 | #include "sched_stoptask.c" | ||
2231 | #ifdef CONFIG_SCHED_DEBUG | ||
2232 | # include "sched_debug.c" | ||
2233 | #endif | ||
2234 | |||
2235 | void sched_set_stop_task(int cpu, struct task_struct *stop) | 932 | void sched_set_stop_task(int cpu, struct task_struct *stop) |
2236 | { | 933 | { |
2237 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | 934 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; |
@@ -2329,7 +1026,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
2329 | p->sched_class->prio_changed(rq, p, oldprio); | 1026 | p->sched_class->prio_changed(rq, p, oldprio); |
2330 | } | 1027 | } |
2331 | 1028 | ||
2332 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | 1029 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) |
2333 | { | 1030 | { |
2334 | const struct sched_class *class; | 1031 | const struct sched_class *class; |
2335 | 1032 | ||
@@ -2355,38 +1052,6 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
2355 | } | 1052 | } |
2356 | 1053 | ||
2357 | #ifdef CONFIG_SMP | 1054 | #ifdef CONFIG_SMP |
2358 | /* | ||
2359 | * Is this task likely cache-hot: | ||
2360 | */ | ||
2361 | static int | ||
2362 | task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | ||
2363 | { | ||
2364 | s64 delta; | ||
2365 | |||
2366 | if (p->sched_class != &fair_sched_class) | ||
2367 | return 0; | ||
2368 | |||
2369 | if (unlikely(p->policy == SCHED_IDLE)) | ||
2370 | return 0; | ||
2371 | |||
2372 | /* | ||
2373 | * Buddy candidates are cache hot: | ||
2374 | */ | ||
2375 | if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && | ||
2376 | (&p->se == cfs_rq_of(&p->se)->next || | ||
2377 | &p->se == cfs_rq_of(&p->se)->last)) | ||
2378 | return 1; | ||
2379 | |||
2380 | if (sysctl_sched_migration_cost == -1) | ||
2381 | return 1; | ||
2382 | if (sysctl_sched_migration_cost == 0) | ||
2383 | return 0; | ||
2384 | |||
2385 | delta = now - p->se.exec_start; | ||
2386 | |||
2387 | return delta < (s64)sysctl_sched_migration_cost; | ||
2388 | } | ||
2389 | |||
2390 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 1055 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2391 | { | 1056 | { |
2392 | #ifdef CONFIG_SCHED_DEBUG | 1057 | #ifdef CONFIG_SCHED_DEBUG |
@@ -3469,7 +2134,7 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) | |||
3469 | */ | 2134 | */ |
3470 | static atomic_long_t calc_load_tasks_idle; | 2135 | static atomic_long_t calc_load_tasks_idle; |
3471 | 2136 | ||
3472 | static void calc_load_account_idle(struct rq *this_rq) | 2137 | void calc_load_account_idle(struct rq *this_rq) |
3473 | { | 2138 | { |
3474 | long delta; | 2139 | long delta; |
3475 | 2140 | ||
@@ -3613,7 +2278,7 @@ static void calc_global_nohz(unsigned long ticks) | |||
3613 | */ | 2278 | */ |
3614 | } | 2279 | } |
3615 | #else | 2280 | #else |
3616 | static void calc_load_account_idle(struct rq *this_rq) | 2281 | void calc_load_account_idle(struct rq *this_rq) |
3617 | { | 2282 | { |
3618 | } | 2283 | } |
3619 | 2284 | ||
@@ -3756,7 +2421,7 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx) | |||
3756 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called | 2421 | * scheduler tick (TICK_NSEC). With tickless idle this will not be called |
3757 | * every tick. We fix it up based on jiffies. | 2422 | * every tick. We fix it up based on jiffies. |
3758 | */ | 2423 | */ |
3759 | static void update_cpu_load(struct rq *this_rq) | 2424 | void update_cpu_load(struct rq *this_rq) |
3760 | { | 2425 | { |
3761 | unsigned long this_load = this_rq->load.weight; | 2426 | unsigned long this_load = this_rq->load.weight; |
3762 | unsigned long curr_jiffies = jiffies; | 2427 | unsigned long curr_jiffies = jiffies; |
@@ -6148,53 +4813,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6148 | #endif | 4813 | #endif |
6149 | } | 4814 | } |
6150 | 4815 | ||
6151 | /* | ||
6152 | * Increase the granularity value when there are more CPUs, | ||
6153 | * because with more CPUs the 'effective latency' as visible | ||
6154 | * to users decreases. But the relationship is not linear, | ||
6155 | * so pick a second-best guess by going with the log2 of the | ||
6156 | * number of CPUs. | ||
6157 | * | ||
6158 | * This idea comes from the SD scheduler of Con Kolivas: | ||
6159 | */ | ||
6160 | static int get_update_sysctl_factor(void) | ||
6161 | { | ||
6162 | unsigned int cpus = min_t(int, num_online_cpus(), 8); | ||
6163 | unsigned int factor; | ||
6164 | |||
6165 | switch (sysctl_sched_tunable_scaling) { | ||
6166 | case SCHED_TUNABLESCALING_NONE: | ||
6167 | factor = 1; | ||
6168 | break; | ||
6169 | case SCHED_TUNABLESCALING_LINEAR: | ||
6170 | factor = cpus; | ||
6171 | break; | ||
6172 | case SCHED_TUNABLESCALING_LOG: | ||
6173 | default: | ||
6174 | factor = 1 + ilog2(cpus); | ||
6175 | break; | ||
6176 | } | ||
6177 | |||
6178 | return factor; | ||
6179 | } | ||
6180 | |||
6181 | static void update_sysctl(void) | ||
6182 | { | ||
6183 | unsigned int factor = get_update_sysctl_factor(); | ||
6184 | |||
6185 | #define SET_SYSCTL(name) \ | ||
6186 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
6187 | SET_SYSCTL(sched_min_granularity); | ||
6188 | SET_SYSCTL(sched_latency); | ||
6189 | SET_SYSCTL(sched_wakeup_granularity); | ||
6190 | #undef SET_SYSCTL | ||
6191 | } | ||
6192 | |||
6193 | static inline void sched_init_granularity(void) | ||
6194 | { | ||
6195 | update_sysctl(); | ||
6196 | } | ||
6197 | |||
6198 | #ifdef CONFIG_SMP | 4816 | #ifdef CONFIG_SMP |
6199 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | 4817 | void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
6200 | { | 4818 | { |
@@ -6381,30 +4999,6 @@ static void calc_global_load_remove(struct rq *rq) | |||
6381 | rq->calc_load_active = 0; | 4999 | rq->calc_load_active = 0; |
6382 | } | 5000 | } |
6383 | 5001 | ||
6384 | #ifdef CONFIG_CFS_BANDWIDTH | ||
6385 | static void unthrottle_offline_cfs_rqs(struct rq *rq) | ||
6386 | { | ||
6387 | struct cfs_rq *cfs_rq; | ||
6388 | |||
6389 | for_each_leaf_cfs_rq(rq, cfs_rq) { | ||
6390 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
6391 | |||
6392 | if (!cfs_rq->runtime_enabled) | ||
6393 | continue; | ||
6394 | |||
6395 | /* | ||
6396 | * clock_task is not advancing so we just need to make sure | ||
6397 | * there's some valid quota amount | ||
6398 | */ | ||
6399 | cfs_rq->runtime_remaining = cfs_b->quota; | ||
6400 | if (cfs_rq_throttled(cfs_rq)) | ||
6401 | unthrottle_cfs_rq(cfs_rq); | ||
6402 | } | ||
6403 | } | ||
6404 | #else | ||
6405 | static void unthrottle_offline_cfs_rqs(struct rq *rq) {} | ||
6406 | #endif | ||
6407 | |||
6408 | /* | 5002 | /* |
6409 | * Migrate all tasks from the rq, sleeping tasks will be migrated by | 5003 | * Migrate all tasks from the rq, sleeping tasks will be migrated by |
6410 | * try_to_wake_up()->select_task_rq(). | 5004 | * try_to_wake_up()->select_task_rq(). |
@@ -7010,6 +5604,12 @@ out: | |||
7010 | return -ENOMEM; | 5604 | return -ENOMEM; |
7011 | } | 5605 | } |
7012 | 5606 | ||
5607 | /* | ||
5608 | * By default the system creates a single root-domain with all cpus as | ||
5609 | * members (mimicking the global state we have today). | ||
5610 | */ | ||
5611 | struct root_domain def_root_domain; | ||
5612 | |||
7013 | static void init_defrootdomain(void) | 5613 | static void init_defrootdomain(void) |
7014 | { | 5614 | { |
7015 | init_rootdomain(&def_root_domain); | 5615 | init_rootdomain(&def_root_domain); |
@@ -7418,6 +6018,11 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7418 | update_group_power(sd, cpu); | 6018 | update_group_power(sd, cpu); |
7419 | } | 6019 | } |
7420 | 6020 | ||
6021 | int __weak arch_sd_sibling_asym_packing(void) | ||
6022 | { | ||
6023 | return 0*SD_ASYM_PACKING; | ||
6024 | } | ||
6025 | |||
7421 | /* | 6026 | /* |
7422 | * Initializers for schedule domains | 6027 | * Initializers for schedule domains |
7423 | * Non-inlined to reduce accumulated stack pressure in build_sched_domains() | 6028 | * Non-inlined to reduce accumulated stack pressure in build_sched_domains() |
@@ -8053,29 +6658,6 @@ static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, | |||
8053 | } | 6658 | } |
8054 | } | 6659 | } |
8055 | 6660 | ||
8056 | static int update_runtime(struct notifier_block *nfb, | ||
8057 | unsigned long action, void *hcpu) | ||
8058 | { | ||
8059 | int cpu = (int)(long)hcpu; | ||
8060 | |||
8061 | switch (action) { | ||
8062 | case CPU_DOWN_PREPARE: | ||
8063 | case CPU_DOWN_PREPARE_FROZEN: | ||
8064 | disable_runtime(cpu_rq(cpu)); | ||
8065 | return NOTIFY_OK; | ||
8066 | |||
8067 | case CPU_DOWN_FAILED: | ||
8068 | case CPU_DOWN_FAILED_FROZEN: | ||
8069 | case CPU_ONLINE: | ||
8070 | case CPU_ONLINE_FROZEN: | ||
8071 | enable_runtime(cpu_rq(cpu)); | ||
8072 | return NOTIFY_OK; | ||
8073 | |||
8074 | default: | ||
8075 | return NOTIFY_DONE; | ||
8076 | } | ||
8077 | } | ||
8078 | |||
8079 | void __init sched_init_smp(void) | 6661 | void __init sched_init_smp(void) |
8080 | { | 6662 | { |
8081 | cpumask_var_t non_isolated_cpus; | 6663 | cpumask_var_t non_isolated_cpus; |
@@ -8124,104 +6706,11 @@ int in_sched_functions(unsigned long addr) | |||
8124 | && addr < (unsigned long)__sched_text_end); | 6706 | && addr < (unsigned long)__sched_text_end); |
8125 | } | 6707 | } |
8126 | 6708 | ||
8127 | static void init_cfs_rq(struct cfs_rq *cfs_rq) | 6709 | #ifdef CONFIG_CGROUP_SCHED |
8128 | { | 6710 | struct task_group root_task_group; |
8129 | cfs_rq->tasks_timeline = RB_ROOT; | ||
8130 | INIT_LIST_HEAD(&cfs_rq->tasks); | ||
8131 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | ||
8132 | #ifndef CONFIG_64BIT | ||
8133 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
8134 | #endif | ||
8135 | } | ||
8136 | |||
8137 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | ||
8138 | { | ||
8139 | struct rt_prio_array *array; | ||
8140 | int i; | ||
8141 | |||
8142 | array = &rt_rq->active; | ||
8143 | for (i = 0; i < MAX_RT_PRIO; i++) { | ||
8144 | INIT_LIST_HEAD(array->queue + i); | ||
8145 | __clear_bit(i, array->bitmap); | ||
8146 | } | ||
8147 | /* delimiter for bitsearch: */ | ||
8148 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
8149 | |||
8150 | #if defined CONFIG_SMP | ||
8151 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | ||
8152 | rt_rq->highest_prio.next = MAX_RT_PRIO; | ||
8153 | rt_rq->rt_nr_migratory = 0; | ||
8154 | rt_rq->overloaded = 0; | ||
8155 | plist_head_init(&rt_rq->pushable_tasks); | ||
8156 | #endif | ||
8157 | |||
8158 | rt_rq->rt_time = 0; | ||
8159 | rt_rq->rt_throttled = 0; | ||
8160 | rt_rq->rt_runtime = 0; | ||
8161 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); | ||
8162 | } | ||
8163 | |||
8164 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
8165 | static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | ||
8166 | struct sched_entity *se, int cpu, | ||
8167 | struct sched_entity *parent) | ||
8168 | { | ||
8169 | struct rq *rq = cpu_rq(cpu); | ||
8170 | |||
8171 | cfs_rq->tg = tg; | ||
8172 | cfs_rq->rq = rq; | ||
8173 | #ifdef CONFIG_SMP | ||
8174 | /* allow initial update_cfs_load() to truncate */ | ||
8175 | cfs_rq->load_stamp = 1; | ||
8176 | #endif | ||
8177 | init_cfs_rq_runtime(cfs_rq); | ||
8178 | |||
8179 | tg->cfs_rq[cpu] = cfs_rq; | ||
8180 | tg->se[cpu] = se; | ||
8181 | |||
8182 | /* se could be NULL for root_task_group */ | ||
8183 | if (!se) | ||
8184 | return; | ||
8185 | |||
8186 | if (!parent) | ||
8187 | se->cfs_rq = &rq->cfs; | ||
8188 | else | ||
8189 | se->cfs_rq = parent->my_q; | ||
8190 | |||
8191 | se->my_q = cfs_rq; | ||
8192 | update_load_set(&se->load, 0); | ||
8193 | se->parent = parent; | ||
8194 | } | ||
8195 | #endif | 6711 | #endif |
8196 | 6712 | ||
8197 | #ifdef CONFIG_RT_GROUP_SCHED | 6713 | DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); |
8198 | static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | ||
8199 | struct sched_rt_entity *rt_se, int cpu, | ||
8200 | struct sched_rt_entity *parent) | ||
8201 | { | ||
8202 | struct rq *rq = cpu_rq(cpu); | ||
8203 | |||
8204 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | ||
8205 | rt_rq->rt_nr_boosted = 0; | ||
8206 | rt_rq->rq = rq; | ||
8207 | rt_rq->tg = tg; | ||
8208 | |||
8209 | tg->rt_rq[cpu] = rt_rq; | ||
8210 | tg->rt_se[cpu] = rt_se; | ||
8211 | |||
8212 | if (!rt_se) | ||
8213 | return; | ||
8214 | |||
8215 | if (!parent) | ||
8216 | rt_se->rt_rq = &rq->rt; | ||
8217 | else | ||
8218 | rt_se->rt_rq = parent->my_q; | ||
8219 | |||
8220 | rt_se->my_q = rt_rq; | ||
8221 | rt_se->parent = parent; | ||
8222 | INIT_LIST_HEAD(&rt_se->run_list); | ||
8223 | } | ||
8224 | #endif | ||
8225 | 6714 | ||
8226 | void __init sched_init(void) | 6715 | void __init sched_init(void) |
8227 | { | 6716 | { |
@@ -8294,7 +6783,7 @@ void __init sched_init(void) | |||
8294 | init_cfs_rq(&rq->cfs); | 6783 | init_cfs_rq(&rq->cfs); |
8295 | init_rt_rq(&rq->rt, rq); | 6784 | init_rt_rq(&rq->rt, rq); |
8296 | #ifdef CONFIG_FAIR_GROUP_SCHED | 6785 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8297 | root_task_group.shares = root_task_group_load; | 6786 | root_task_group.shares = ROOT_TASK_GROUP_LOAD; |
8298 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); | 6787 | INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); |
8299 | /* | 6788 | /* |
8300 | * How much cpu bandwidth does root_task_group get? | 6789 | * How much cpu bandwidth does root_task_group get? |
@@ -8357,10 +6846,6 @@ void __init sched_init(void) | |||
8357 | INIT_HLIST_HEAD(&init_task.preempt_notifiers); | 6846 | INIT_HLIST_HEAD(&init_task.preempt_notifiers); |
8358 | #endif | 6847 | #endif |
8359 | 6848 | ||
8360 | #ifdef CONFIG_SMP | ||
8361 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); | ||
8362 | #endif | ||
8363 | |||
8364 | #ifdef CONFIG_RT_MUTEXES | 6849 | #ifdef CONFIG_RT_MUTEXES |
8365 | plist_head_init(&init_task.pi_waiters); | 6850 | plist_head_init(&init_task.pi_waiters); |
8366 | #endif | 6851 | #endif |
@@ -8388,17 +6873,11 @@ void __init sched_init(void) | |||
8388 | 6873 | ||
8389 | #ifdef CONFIG_SMP | 6874 | #ifdef CONFIG_SMP |
8390 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | 6875 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); |
8391 | #ifdef CONFIG_NO_HZ | ||
8392 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); | ||
8393 | alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); | ||
8394 | atomic_set(&nohz.load_balancer, nr_cpu_ids); | ||
8395 | atomic_set(&nohz.first_pick_cpu, nr_cpu_ids); | ||
8396 | atomic_set(&nohz.second_pick_cpu, nr_cpu_ids); | ||
8397 | #endif | ||
8398 | /* May be allocated at isolcpus cmdline parse time */ | 6876 | /* May be allocated at isolcpus cmdline parse time */ |
8399 | if (cpu_isolated_map == NULL) | 6877 | if (cpu_isolated_map == NULL) |
8400 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 6878 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
8401 | #endif /* SMP */ | 6879 | #endif |
6880 | init_sched_fair_class(); | ||
8402 | 6881 | ||
8403 | scheduler_running = 1; | 6882 | scheduler_running = 1; |
8404 | } | 6883 | } |
@@ -8550,169 +7029,14 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
8550 | 7029 | ||
8551 | #endif | 7030 | #endif |
8552 | 7031 | ||
8553 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
8554 | static void free_fair_sched_group(struct task_group *tg) | ||
8555 | { | ||
8556 | int i; | ||
8557 | |||
8558 | destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
8559 | |||
8560 | for_each_possible_cpu(i) { | ||
8561 | if (tg->cfs_rq) | ||
8562 | kfree(tg->cfs_rq[i]); | ||
8563 | if (tg->se) | ||
8564 | kfree(tg->se[i]); | ||
8565 | } | ||
8566 | |||
8567 | kfree(tg->cfs_rq); | ||
8568 | kfree(tg->se); | ||
8569 | } | ||
8570 | |||
8571 | static | ||
8572 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | ||
8573 | { | ||
8574 | struct cfs_rq *cfs_rq; | ||
8575 | struct sched_entity *se; | ||
8576 | int i; | ||
8577 | |||
8578 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); | ||
8579 | if (!tg->cfs_rq) | ||
8580 | goto err; | ||
8581 | tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL); | ||
8582 | if (!tg->se) | ||
8583 | goto err; | ||
8584 | |||
8585 | tg->shares = NICE_0_LOAD; | ||
8586 | |||
8587 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
8588 | |||
8589 | for_each_possible_cpu(i) { | ||
8590 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | ||
8591 | GFP_KERNEL, cpu_to_node(i)); | ||
8592 | if (!cfs_rq) | ||
8593 | goto err; | ||
8594 | |||
8595 | se = kzalloc_node(sizeof(struct sched_entity), | ||
8596 | GFP_KERNEL, cpu_to_node(i)); | ||
8597 | if (!se) | ||
8598 | goto err_free_rq; | ||
8599 | |||
8600 | init_cfs_rq(cfs_rq); | ||
8601 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); | ||
8602 | } | ||
8603 | |||
8604 | return 1; | ||
8605 | |||
8606 | err_free_rq: | ||
8607 | kfree(cfs_rq); | ||
8608 | err: | ||
8609 | return 0; | ||
8610 | } | ||
8611 | |||
8612 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
8613 | { | ||
8614 | struct rq *rq = cpu_rq(cpu); | ||
8615 | unsigned long flags; | ||
8616 | |||
8617 | /* | ||
8618 | * Only empty task groups can be destroyed; so we can speculatively | ||
8619 | * check on_list without danger of it being re-added. | ||
8620 | */ | ||
8621 | if (!tg->cfs_rq[cpu]->on_list) | ||
8622 | return; | ||
8623 | |||
8624 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8625 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | ||
8626 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8627 | } | ||
8628 | #else /* !CONFIG_FAIR_GROUP_SCHED */ | ||
8629 | static inline void free_fair_sched_group(struct task_group *tg) | ||
8630 | { | ||
8631 | } | ||
8632 | |||
8633 | static inline | ||
8634 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | ||
8635 | { | ||
8636 | return 1; | ||
8637 | } | ||
8638 | |||
8639 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
8640 | { | ||
8641 | } | ||
8642 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
8643 | |||
8644 | #ifdef CONFIG_RT_GROUP_SCHED | 7032 | #ifdef CONFIG_RT_GROUP_SCHED |
8645 | static void free_rt_sched_group(struct task_group *tg) | ||
8646 | { | ||
8647 | int i; | ||
8648 | |||
8649 | if (tg->rt_se) | ||
8650 | destroy_rt_bandwidth(&tg->rt_bandwidth); | ||
8651 | |||
8652 | for_each_possible_cpu(i) { | ||
8653 | if (tg->rt_rq) | ||
8654 | kfree(tg->rt_rq[i]); | ||
8655 | if (tg->rt_se) | ||
8656 | kfree(tg->rt_se[i]); | ||
8657 | } | ||
8658 | |||
8659 | kfree(tg->rt_rq); | ||
8660 | kfree(tg->rt_se); | ||
8661 | } | ||
8662 | |||
8663 | static | ||
8664 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | ||
8665 | { | ||
8666 | struct rt_rq *rt_rq; | ||
8667 | struct sched_rt_entity *rt_se; | ||
8668 | int i; | ||
8669 | |||
8670 | tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); | ||
8671 | if (!tg->rt_rq) | ||
8672 | goto err; | ||
8673 | tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL); | ||
8674 | if (!tg->rt_se) | ||
8675 | goto err; | ||
8676 | |||
8677 | init_rt_bandwidth(&tg->rt_bandwidth, | ||
8678 | ktime_to_ns(def_rt_bandwidth.rt_period), 0); | ||
8679 | |||
8680 | for_each_possible_cpu(i) { | ||
8681 | rt_rq = kzalloc_node(sizeof(struct rt_rq), | ||
8682 | GFP_KERNEL, cpu_to_node(i)); | ||
8683 | if (!rt_rq) | ||
8684 | goto err; | ||
8685 | |||
8686 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | ||
8687 | GFP_KERNEL, cpu_to_node(i)); | ||
8688 | if (!rt_se) | ||
8689 | goto err_free_rq; | ||
8690 | |||
8691 | init_rt_rq(rt_rq, cpu_rq(i)); | ||
8692 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
8693 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); | ||
8694 | } | ||
8695 | |||
8696 | return 1; | ||
8697 | |||
8698 | err_free_rq: | ||
8699 | kfree(rt_rq); | ||
8700 | err: | ||
8701 | return 0; | ||
8702 | } | ||
8703 | #else /* !CONFIG_RT_GROUP_SCHED */ | 7033 | #else /* !CONFIG_RT_GROUP_SCHED */ |
8704 | static inline void free_rt_sched_group(struct task_group *tg) | ||
8705 | { | ||
8706 | } | ||
8707 | |||
8708 | static inline | ||
8709 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | ||
8710 | { | ||
8711 | return 1; | ||
8712 | } | ||
8713 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7034 | #endif /* CONFIG_RT_GROUP_SCHED */ |
8714 | 7035 | ||
8715 | #ifdef CONFIG_CGROUP_SCHED | 7036 | #ifdef CONFIG_CGROUP_SCHED |
7037 | /* task_group_lock serializes the addition/removal of task groups */ | ||
7038 | static DEFINE_SPINLOCK(task_group_lock); | ||
7039 | |||
8716 | static void free_sched_group(struct task_group *tg) | 7040 | static void free_sched_group(struct task_group *tg) |
8717 | { | 7041 | { |
8718 | free_fair_sched_group(tg); | 7042 | free_fair_sched_group(tg); |
@@ -8818,47 +7142,6 @@ void sched_move_task(struct task_struct *tsk) | |||
8818 | #endif /* CONFIG_CGROUP_SCHED */ | 7142 | #endif /* CONFIG_CGROUP_SCHED */ |
8819 | 7143 | ||
8820 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7144 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8821 | static DEFINE_MUTEX(shares_mutex); | ||
8822 | |||
8823 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | ||
8824 | { | ||
8825 | int i; | ||
8826 | unsigned long flags; | ||
8827 | |||
8828 | /* | ||
8829 | * We can't change the weight of the root cgroup. | ||
8830 | */ | ||
8831 | if (!tg->se[0]) | ||
8832 | return -EINVAL; | ||
8833 | |||
8834 | shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); | ||
8835 | |||
8836 | mutex_lock(&shares_mutex); | ||
8837 | if (tg->shares == shares) | ||
8838 | goto done; | ||
8839 | |||
8840 | tg->shares = shares; | ||
8841 | for_each_possible_cpu(i) { | ||
8842 | struct rq *rq = cpu_rq(i); | ||
8843 | struct sched_entity *se; | ||
8844 | |||
8845 | se = tg->se[i]; | ||
8846 | /* Propagate contribution to hierarchy */ | ||
8847 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
8848 | for_each_sched_entity(se) | ||
8849 | update_cfs_shares(group_cfs_rq(se)); | ||
8850 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
8851 | } | ||
8852 | |||
8853 | done: | ||
8854 | mutex_unlock(&shares_mutex); | ||
8855 | return 0; | ||
8856 | } | ||
8857 | |||
8858 | unsigned long sched_group_shares(struct task_group *tg) | ||
8859 | { | ||
8860 | return tg->shares; | ||
8861 | } | ||
8862 | #endif | 7145 | #endif |
8863 | 7146 | ||
8864 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) | 7147 | #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH) |
@@ -8883,7 +7166,7 @@ static inline int tg_has_rt_tasks(struct task_group *tg) | |||
8883 | struct task_struct *g, *p; | 7166 | struct task_struct *g, *p; |
8884 | 7167 | ||
8885 | do_each_thread(g, p) { | 7168 | do_each_thread(g, p) { |
8886 | if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) | 7169 | if (rt_task(p) && task_rq(p)->rt.tg == tg) |
8887 | return 1; | 7170 | return 1; |
8888 | } while_each_thread(g, p); | 7171 | } while_each_thread(g, p); |
8889 | 7172 | ||
@@ -9235,7 +7518,7 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); | |||
9235 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | 7518 | static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) |
9236 | { | 7519 | { |
9237 | int i, ret = 0, runtime_enabled, runtime_was_enabled; | 7520 | int i, ret = 0, runtime_enabled, runtime_was_enabled; |
9238 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | 7521 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; |
9239 | 7522 | ||
9240 | if (tg == &root_task_group) | 7523 | if (tg == &root_task_group) |
9241 | return -EINVAL; | 7524 | return -EINVAL; |
@@ -9264,7 +7547,6 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
9264 | runtime_enabled = quota != RUNTIME_INF; | 7547 | runtime_enabled = quota != RUNTIME_INF; |
9265 | runtime_was_enabled = cfs_b->quota != RUNTIME_INF; | 7548 | runtime_was_enabled = cfs_b->quota != RUNTIME_INF; |
9266 | account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled); | 7549 | account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled); |
9267 | |||
9268 | raw_spin_lock_irq(&cfs_b->lock); | 7550 | raw_spin_lock_irq(&cfs_b->lock); |
9269 | cfs_b->period = ns_to_ktime(period); | 7551 | cfs_b->period = ns_to_ktime(period); |
9270 | cfs_b->quota = quota; | 7552 | cfs_b->quota = quota; |
@@ -9280,13 +7562,13 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) | |||
9280 | 7562 | ||
9281 | for_each_possible_cpu(i) { | 7563 | for_each_possible_cpu(i) { |
9282 | struct cfs_rq *cfs_rq = tg->cfs_rq[i]; | 7564 | struct cfs_rq *cfs_rq = tg->cfs_rq[i]; |
9283 | struct rq *rq = rq_of(cfs_rq); | 7565 | struct rq *rq = cfs_rq->rq; |
9284 | 7566 | ||
9285 | raw_spin_lock_irq(&rq->lock); | 7567 | raw_spin_lock_irq(&rq->lock); |
9286 | cfs_rq->runtime_enabled = runtime_enabled; | 7568 | cfs_rq->runtime_enabled = runtime_enabled; |
9287 | cfs_rq->runtime_remaining = 0; | 7569 | cfs_rq->runtime_remaining = 0; |
9288 | 7570 | ||
9289 | if (cfs_rq_throttled(cfs_rq)) | 7571 | if (cfs_rq->throttled) |
9290 | unthrottle_cfs_rq(cfs_rq); | 7572 | unthrottle_cfs_rq(cfs_rq); |
9291 | raw_spin_unlock_irq(&rq->lock); | 7573 | raw_spin_unlock_irq(&rq->lock); |
9292 | } | 7574 | } |
@@ -9300,7 +7582,7 @@ int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) | |||
9300 | { | 7582 | { |
9301 | u64 quota, period; | 7583 | u64 quota, period; |
9302 | 7584 | ||
9303 | period = ktime_to_ns(tg_cfs_bandwidth(tg)->period); | 7585 | period = ktime_to_ns(tg->cfs_bandwidth.period); |
9304 | if (cfs_quota_us < 0) | 7586 | if (cfs_quota_us < 0) |
9305 | quota = RUNTIME_INF; | 7587 | quota = RUNTIME_INF; |
9306 | else | 7588 | else |
@@ -9313,10 +7595,10 @@ long tg_get_cfs_quota(struct task_group *tg) | |||
9313 | { | 7595 | { |
9314 | u64 quota_us; | 7596 | u64 quota_us; |
9315 | 7597 | ||
9316 | if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF) | 7598 | if (tg->cfs_bandwidth.quota == RUNTIME_INF) |
9317 | return -1; | 7599 | return -1; |
9318 | 7600 | ||
9319 | quota_us = tg_cfs_bandwidth(tg)->quota; | 7601 | quota_us = tg->cfs_bandwidth.quota; |
9320 | do_div(quota_us, NSEC_PER_USEC); | 7602 | do_div(quota_us, NSEC_PER_USEC); |
9321 | 7603 | ||
9322 | return quota_us; | 7604 | return quota_us; |
@@ -9327,7 +7609,7 @@ int tg_set_cfs_period(struct task_group *tg, long cfs_period_us) | |||
9327 | u64 quota, period; | 7609 | u64 quota, period; |
9328 | 7610 | ||
9329 | period = (u64)cfs_period_us * NSEC_PER_USEC; | 7611 | period = (u64)cfs_period_us * NSEC_PER_USEC; |
9330 | quota = tg_cfs_bandwidth(tg)->quota; | 7612 | quota = tg->cfs_bandwidth.quota; |
9331 | 7613 | ||
9332 | if (period <= 0) | 7614 | if (period <= 0) |
9333 | return -EINVAL; | 7615 | return -EINVAL; |
@@ -9339,7 +7621,7 @@ long tg_get_cfs_period(struct task_group *tg) | |||
9339 | { | 7621 | { |
9340 | u64 cfs_period_us; | 7622 | u64 cfs_period_us; |
9341 | 7623 | ||
9342 | cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period); | 7624 | cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.period); |
9343 | do_div(cfs_period_us, NSEC_PER_USEC); | 7625 | do_div(cfs_period_us, NSEC_PER_USEC); |
9344 | 7626 | ||
9345 | return cfs_period_us; | 7627 | return cfs_period_us; |
@@ -9399,13 +7681,13 @@ static u64 normalize_cfs_quota(struct task_group *tg, | |||
9399 | static int tg_cfs_schedulable_down(struct task_group *tg, void *data) | 7681 | static int tg_cfs_schedulable_down(struct task_group *tg, void *data) |
9400 | { | 7682 | { |
9401 | struct cfs_schedulable_data *d = data; | 7683 | struct cfs_schedulable_data *d = data; |
9402 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | 7684 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; |
9403 | s64 quota = 0, parent_quota = -1; | 7685 | s64 quota = 0, parent_quota = -1; |
9404 | 7686 | ||
9405 | if (!tg->parent) { | 7687 | if (!tg->parent) { |
9406 | quota = RUNTIME_INF; | 7688 | quota = RUNTIME_INF; |
9407 | } else { | 7689 | } else { |
9408 | struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent); | 7690 | struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth; |
9409 | 7691 | ||
9410 | quota = normalize_cfs_quota(tg, d); | 7692 | quota = normalize_cfs_quota(tg, d); |
9411 | parent_quota = parent_b->hierarchal_quota; | 7693 | parent_quota = parent_b->hierarchal_quota; |
@@ -9449,7 +7731,7 @@ static int cpu_stats_show(struct cgroup *cgrp, struct cftype *cft, | |||
9449 | struct cgroup_map_cb *cb) | 7731 | struct cgroup_map_cb *cb) |
9450 | { | 7732 | { |
9451 | struct task_group *tg = cgroup_tg(cgrp); | 7733 | struct task_group *tg = cgroup_tg(cgrp); |
9452 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); | 7734 | struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; |
9453 | 7735 | ||
9454 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); | 7736 | cb->fill(cb, "nr_periods", cfs_b->nr_periods); |
9455 | cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); | 7737 | cb->fill(cb, "nr_throttled", cfs_b->nr_throttled); |
@@ -9748,7 +8030,7 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
9748 | * | 8030 | * |
9749 | * called with rq->lock held. | 8031 | * called with rq->lock held. |
9750 | */ | 8032 | */ |
9751 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | 8033 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) |
9752 | { | 8034 | { |
9753 | struct cpuacct *ca; | 8035 | struct cpuacct *ca; |
9754 | int cpu; | 8036 | int cpu; |
@@ -9790,7 +8072,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
9790 | /* | 8072 | /* |
9791 | * Charge the system/user time to the task's accounting group. | 8073 | * Charge the system/user time to the task's accounting group. |
9792 | */ | 8074 | */ |
9793 | static void cpuacct_update_stats(struct task_struct *tsk, | 8075 | void cpuacct_update_stats(struct task_struct *tsk, |
9794 | enum cpuacct_stat_index idx, cputime_t val) | 8076 | enum cpuacct_stat_index idx, cputime_t val) |
9795 | { | 8077 | { |
9796 | struct cpuacct *ca; | 8078 | struct cpuacct *ca; |
diff --git a/kernel/sched.h b/kernel/sched.h new file mode 100644 index 000000000000..675261ce3c4a --- /dev/null +++ b/kernel/sched.h | |||
@@ -0,0 +1,1064 @@ | |||
1 | |||
2 | #include <linux/sched.h> | ||
3 | #include <linux/mutex.h> | ||
4 | #include <linux/spinlock.h> | ||
5 | #include <linux/stop_machine.h> | ||
6 | |||
7 | #include "sched_cpupri.h" | ||
8 | |||
9 | extern __read_mostly int scheduler_running; | ||
10 | |||
11 | /* | ||
12 | * Convert user-nice values [ -20 ... 0 ... 19 ] | ||
13 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | ||
14 | * and back. | ||
15 | */ | ||
16 | #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) | ||
17 | #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) | ||
18 | #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) | ||
19 | |||
20 | /* | ||
21 | * 'User priority' is the nice value converted to something we | ||
22 | * can work with better when scaling various scheduler parameters, | ||
23 | * it's a [ 0 ... 39 ] range. | ||
24 | */ | ||
25 | #define USER_PRIO(p) ((p)-MAX_RT_PRIO) | ||
26 | #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) | ||
27 | #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) | ||
28 | |||
29 | /* | ||
30 | * Helpers for converting nanosecond timing to jiffy resolution | ||
31 | */ | ||
32 | #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) | ||
33 | |||
34 | #define NICE_0_LOAD SCHED_LOAD_SCALE | ||
35 | #define NICE_0_SHIFT SCHED_LOAD_SHIFT | ||
36 | |||
37 | /* | ||
38 | * These are the 'tuning knobs' of the scheduler: | ||
39 | * | ||
40 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). | ||
41 | * Timeslices get refilled after they expire. | ||
42 | */ | ||
43 | #define DEF_TIMESLICE (100 * HZ / 1000) | ||
44 | |||
45 | /* | ||
46 | * single value that denotes runtime == period, ie unlimited time. | ||
47 | */ | ||
48 | #define RUNTIME_INF ((u64)~0ULL) | ||
49 | |||
50 | static inline int rt_policy(int policy) | ||
51 | { | ||
52 | if (policy == SCHED_FIFO || policy == SCHED_RR) | ||
53 | return 1; | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static inline int task_has_rt_policy(struct task_struct *p) | ||
58 | { | ||
59 | return rt_policy(p->policy); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * This is the priority-queue data structure of the RT scheduling class: | ||
64 | */ | ||
65 | struct rt_prio_array { | ||
66 | DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */ | ||
67 | struct list_head queue[MAX_RT_PRIO]; | ||
68 | }; | ||
69 | |||
70 | struct rt_bandwidth { | ||
71 | /* nests inside the rq lock: */ | ||
72 | raw_spinlock_t rt_runtime_lock; | ||
73 | ktime_t rt_period; | ||
74 | u64 rt_runtime; | ||
75 | struct hrtimer rt_period_timer; | ||
76 | }; | ||
77 | |||
78 | extern struct mutex sched_domains_mutex; | ||
79 | |||
80 | #ifdef CONFIG_CGROUP_SCHED | ||
81 | |||
82 | #include <linux/cgroup.h> | ||
83 | |||
84 | struct cfs_rq; | ||
85 | struct rt_rq; | ||
86 | |||
87 | static LIST_HEAD(task_groups); | ||
88 | |||
89 | struct cfs_bandwidth { | ||
90 | #ifdef CONFIG_CFS_BANDWIDTH | ||
91 | raw_spinlock_t lock; | ||
92 | ktime_t period; | ||
93 | u64 quota, runtime; | ||
94 | s64 hierarchal_quota; | ||
95 | u64 runtime_expires; | ||
96 | |||
97 | int idle, timer_active; | ||
98 | struct hrtimer period_timer, slack_timer; | ||
99 | struct list_head throttled_cfs_rq; | ||
100 | |||
101 | /* statistics */ | ||
102 | int nr_periods, nr_throttled; | ||
103 | u64 throttled_time; | ||
104 | #endif | ||
105 | }; | ||
106 | |||
107 | /* task group related information */ | ||
108 | struct task_group { | ||
109 | struct cgroup_subsys_state css; | ||
110 | |||
111 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
112 | /* schedulable entities of this group on each cpu */ | ||
113 | struct sched_entity **se; | ||
114 | /* runqueue "owned" by this group on each cpu */ | ||
115 | struct cfs_rq **cfs_rq; | ||
116 | unsigned long shares; | ||
117 | |||
118 | atomic_t load_weight; | ||
119 | #endif | ||
120 | |||
121 | #ifdef CONFIG_RT_GROUP_SCHED | ||
122 | struct sched_rt_entity **rt_se; | ||
123 | struct rt_rq **rt_rq; | ||
124 | |||
125 | struct rt_bandwidth rt_bandwidth; | ||
126 | #endif | ||
127 | |||
128 | struct rcu_head rcu; | ||
129 | struct list_head list; | ||
130 | |||
131 | struct task_group *parent; | ||
132 | struct list_head siblings; | ||
133 | struct list_head children; | ||
134 | |||
135 | #ifdef CONFIG_SCHED_AUTOGROUP | ||
136 | struct autogroup *autogroup; | ||
137 | #endif | ||
138 | |||
139 | struct cfs_bandwidth cfs_bandwidth; | ||
140 | }; | ||
141 | |||
142 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
143 | #define ROOT_TASK_GROUP_LOAD NICE_0_LOAD | ||
144 | |||
145 | /* | ||
146 | * A weight of 0 or 1 can cause arithmetics problems. | ||
147 | * A weight of a cfs_rq is the sum of weights of which entities | ||
148 | * are queued on this cfs_rq, so a weight of a entity should not be | ||
149 | * too large, so as the shares value of a task group. | ||
150 | * (The default weight is 1024 - so there's no practical | ||
151 | * limitation from this.) | ||
152 | */ | ||
153 | #define MIN_SHARES (1UL << 1) | ||
154 | #define MAX_SHARES (1UL << 18) | ||
155 | #endif | ||
156 | |||
157 | /* Default task group. | ||
158 | * Every task in system belong to this group at bootup. | ||
159 | */ | ||
160 | extern struct task_group root_task_group; | ||
161 | |||
162 | typedef int (*tg_visitor)(struct task_group *, void *); | ||
163 | |||
164 | extern int walk_tg_tree_from(struct task_group *from, | ||
165 | tg_visitor down, tg_visitor up, void *data); | ||
166 | |||
167 | /* | ||
168 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
169 | * leaving it for the final time. | ||
170 | * | ||
171 | * Caller must hold rcu_lock or sufficient equivalent. | ||
172 | */ | ||
173 | static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data) | ||
174 | { | ||
175 | return walk_tg_tree_from(&root_task_group, down, up, data); | ||
176 | } | ||
177 | |||
178 | extern int tg_nop(struct task_group *tg, void *data); | ||
179 | |||
180 | extern void free_fair_sched_group(struct task_group *tg); | ||
181 | extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent); | ||
182 | extern void unregister_fair_sched_group(struct task_group *tg, int cpu); | ||
183 | extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | ||
184 | struct sched_entity *se, int cpu, | ||
185 | struct sched_entity *parent); | ||
186 | extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b); | ||
187 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); | ||
188 | |||
189 | extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b); | ||
190 | extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b); | ||
191 | extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq); | ||
192 | |||
193 | extern void free_rt_sched_group(struct task_group *tg); | ||
194 | extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent); | ||
195 | extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | ||
196 | struct sched_rt_entity *rt_se, int cpu, | ||
197 | struct sched_rt_entity *parent); | ||
198 | |||
199 | #else /* CONFIG_CGROUP_SCHED */ | ||
200 | |||
201 | struct cfs_bandwidth { }; | ||
202 | |||
203 | #endif /* CONFIG_CGROUP_SCHED */ | ||
204 | |||
205 | /* CFS-related fields in a runqueue */ | ||
206 | struct cfs_rq { | ||
207 | struct load_weight load; | ||
208 | unsigned long nr_running, h_nr_running; | ||
209 | |||
210 | u64 exec_clock; | ||
211 | u64 min_vruntime; | ||
212 | #ifndef CONFIG_64BIT | ||
213 | u64 min_vruntime_copy; | ||
214 | #endif | ||
215 | |||
216 | struct rb_root tasks_timeline; | ||
217 | struct rb_node *rb_leftmost; | ||
218 | |||
219 | struct list_head tasks; | ||
220 | struct list_head *balance_iterator; | ||
221 | |||
222 | /* | ||
223 | * 'curr' points to currently running entity on this cfs_rq. | ||
224 | * It is set to NULL otherwise (i.e when none are currently running). | ||
225 | */ | ||
226 | struct sched_entity *curr, *next, *last, *skip; | ||
227 | |||
228 | #ifdef CONFIG_SCHED_DEBUG | ||
229 | unsigned int nr_spread_over; | ||
230 | #endif | ||
231 | |||
232 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
233 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ | ||
234 | |||
235 | /* | ||
236 | * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in | ||
237 | * a hierarchy). Non-leaf lrqs hold other higher schedulable entities | ||
238 | * (like users, containers etc.) | ||
239 | * | ||
240 | * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This | ||
241 | * list is used during load balance. | ||
242 | */ | ||
243 | int on_list; | ||
244 | struct list_head leaf_cfs_rq_list; | ||
245 | struct task_group *tg; /* group that "owns" this runqueue */ | ||
246 | |||
247 | #ifdef CONFIG_SMP | ||
248 | /* | ||
249 | * the part of load.weight contributed by tasks | ||
250 | */ | ||
251 | unsigned long task_weight; | ||
252 | |||
253 | /* | ||
254 | * h_load = weight * f(tg) | ||
255 | * | ||
256 | * Where f(tg) is the recursive weight fraction assigned to | ||
257 | * this group. | ||
258 | */ | ||
259 | unsigned long h_load; | ||
260 | |||
261 | /* | ||
262 | * Maintaining per-cpu shares distribution for group scheduling | ||
263 | * | ||
264 | * load_stamp is the last time we updated the load average | ||
265 | * load_last is the last time we updated the load average and saw load | ||
266 | * load_unacc_exec_time is currently unaccounted execution time | ||
267 | */ | ||
268 | u64 load_avg; | ||
269 | u64 load_period; | ||
270 | u64 load_stamp, load_last, load_unacc_exec_time; | ||
271 | |||
272 | unsigned long load_contribution; | ||
273 | #endif /* CONFIG_SMP */ | ||
274 | #ifdef CONFIG_CFS_BANDWIDTH | ||
275 | int runtime_enabled; | ||
276 | u64 runtime_expires; | ||
277 | s64 runtime_remaining; | ||
278 | |||
279 | u64 throttled_timestamp; | ||
280 | int throttled, throttle_count; | ||
281 | struct list_head throttled_list; | ||
282 | #endif /* CONFIG_CFS_BANDWIDTH */ | ||
283 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
284 | }; | ||
285 | |||
286 | static inline int rt_bandwidth_enabled(void) | ||
287 | { | ||
288 | return sysctl_sched_rt_runtime >= 0; | ||
289 | } | ||
290 | |||
291 | /* Real-Time classes' related field in a runqueue: */ | ||
292 | struct rt_rq { | ||
293 | struct rt_prio_array active; | ||
294 | unsigned long rt_nr_running; | ||
295 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | ||
296 | struct { | ||
297 | int curr; /* highest queued rt task prio */ | ||
298 | #ifdef CONFIG_SMP | ||
299 | int next; /* next highest */ | ||
300 | #endif | ||
301 | } highest_prio; | ||
302 | #endif | ||
303 | #ifdef CONFIG_SMP | ||
304 | unsigned long rt_nr_migratory; | ||
305 | unsigned long rt_nr_total; | ||
306 | int overloaded; | ||
307 | struct plist_head pushable_tasks; | ||
308 | #endif | ||
309 | int rt_throttled; | ||
310 | u64 rt_time; | ||
311 | u64 rt_runtime; | ||
312 | /* Nests inside the rq lock: */ | ||
313 | raw_spinlock_t rt_runtime_lock; | ||
314 | |||
315 | #ifdef CONFIG_RT_GROUP_SCHED | ||
316 | unsigned long rt_nr_boosted; | ||
317 | |||
318 | struct rq *rq; | ||
319 | struct list_head leaf_rt_rq_list; | ||
320 | struct task_group *tg; | ||
321 | #endif | ||
322 | }; | ||
323 | |||
324 | #ifdef CONFIG_SMP | ||
325 | |||
326 | /* | ||
327 | * We add the notion of a root-domain which will be used to define per-domain | ||
328 | * variables. Each exclusive cpuset essentially defines an island domain by | ||
329 | * fully partitioning the member cpus from any other cpuset. Whenever a new | ||
330 | * exclusive cpuset is created, we also create and attach a new root-domain | ||
331 | * object. | ||
332 | * | ||
333 | */ | ||
334 | struct root_domain { | ||
335 | atomic_t refcount; | ||
336 | atomic_t rto_count; | ||
337 | struct rcu_head rcu; | ||
338 | cpumask_var_t span; | ||
339 | cpumask_var_t online; | ||
340 | |||
341 | /* | ||
342 | * The "RT overload" flag: it gets set if a CPU has more than | ||
343 | * one runnable RT task. | ||
344 | */ | ||
345 | cpumask_var_t rto_mask; | ||
346 | struct cpupri cpupri; | ||
347 | }; | ||
348 | |||
349 | extern struct root_domain def_root_domain; | ||
350 | |||
351 | #endif /* CONFIG_SMP */ | ||
352 | |||
353 | /* | ||
354 | * This is the main, per-CPU runqueue data structure. | ||
355 | * | ||
356 | * Locking rule: those places that want to lock multiple runqueues | ||
357 | * (such as the load balancing or the thread migration code), lock | ||
358 | * acquire operations must be ordered by ascending &runqueue. | ||
359 | */ | ||
360 | struct rq { | ||
361 | /* runqueue lock: */ | ||
362 | raw_spinlock_t lock; | ||
363 | |||
364 | /* | ||
365 | * nr_running and cpu_load should be in the same cacheline because | ||
366 | * remote CPUs use both these fields when doing load calculation. | ||
367 | */ | ||
368 | unsigned long nr_running; | ||
369 | #define CPU_LOAD_IDX_MAX 5 | ||
370 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | ||
371 | unsigned long last_load_update_tick; | ||
372 | #ifdef CONFIG_NO_HZ | ||
373 | u64 nohz_stamp; | ||
374 | unsigned char nohz_balance_kick; | ||
375 | #endif | ||
376 | int skip_clock_update; | ||
377 | |||
378 | /* capture load from *all* tasks on this cpu: */ | ||
379 | struct load_weight load; | ||
380 | unsigned long nr_load_updates; | ||
381 | u64 nr_switches; | ||
382 | |||
383 | struct cfs_rq cfs; | ||
384 | struct rt_rq rt; | ||
385 | |||
386 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
387 | /* list of leaf cfs_rq on this cpu: */ | ||
388 | struct list_head leaf_cfs_rq_list; | ||
389 | #endif | ||
390 | #ifdef CONFIG_RT_GROUP_SCHED | ||
391 | struct list_head leaf_rt_rq_list; | ||
392 | #endif | ||
393 | |||
394 | /* | ||
395 | * This is part of a global counter where only the total sum | ||
396 | * over all CPUs matters. A task can increase this counter on | ||
397 | * one CPU and if it got migrated afterwards it may decrease | ||
398 | * it on another CPU. Always updated under the runqueue lock: | ||
399 | */ | ||
400 | unsigned long nr_uninterruptible; | ||
401 | |||
402 | struct task_struct *curr, *idle, *stop; | ||
403 | unsigned long next_balance; | ||
404 | struct mm_struct *prev_mm; | ||
405 | |||
406 | u64 clock; | ||
407 | u64 clock_task; | ||
408 | |||
409 | atomic_t nr_iowait; | ||
410 | |||
411 | #ifdef CONFIG_SMP | ||
412 | struct root_domain *rd; | ||
413 | struct sched_domain *sd; | ||
414 | |||
415 | unsigned long cpu_power; | ||
416 | |||
417 | unsigned char idle_balance; | ||
418 | /* For active balancing */ | ||
419 | int post_schedule; | ||
420 | int active_balance; | ||
421 | int push_cpu; | ||
422 | struct cpu_stop_work active_balance_work; | ||
423 | /* cpu of this runqueue: */ | ||
424 | int cpu; | ||
425 | int online; | ||
426 | |||
427 | u64 rt_avg; | ||
428 | u64 age_stamp; | ||
429 | u64 idle_stamp; | ||
430 | u64 avg_idle; | ||
431 | #endif | ||
432 | |||
433 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
434 | u64 prev_irq_time; | ||
435 | #endif | ||
436 | #ifdef CONFIG_PARAVIRT | ||
437 | u64 prev_steal_time; | ||
438 | #endif | ||
439 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | ||
440 | u64 prev_steal_time_rq; | ||
441 | #endif | ||
442 | |||
443 | /* calc_load related fields */ | ||
444 | unsigned long calc_load_update; | ||
445 | long calc_load_active; | ||
446 | |||
447 | #ifdef CONFIG_SCHED_HRTICK | ||
448 | #ifdef CONFIG_SMP | ||
449 | int hrtick_csd_pending; | ||
450 | struct call_single_data hrtick_csd; | ||
451 | #endif | ||
452 | struct hrtimer hrtick_timer; | ||
453 | #endif | ||
454 | |||
455 | #ifdef CONFIG_SCHEDSTATS | ||
456 | /* latency stats */ | ||
457 | struct sched_info rq_sched_info; | ||
458 | unsigned long long rq_cpu_time; | ||
459 | /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ | ||
460 | |||
461 | /* sys_sched_yield() stats */ | ||
462 | unsigned int yld_count; | ||
463 | |||
464 | /* schedule() stats */ | ||
465 | unsigned int sched_switch; | ||
466 | unsigned int sched_count; | ||
467 | unsigned int sched_goidle; | ||
468 | |||
469 | /* try_to_wake_up() stats */ | ||
470 | unsigned int ttwu_count; | ||
471 | unsigned int ttwu_local; | ||
472 | #endif | ||
473 | |||
474 | #ifdef CONFIG_SMP | ||
475 | struct llist_head wake_list; | ||
476 | #endif | ||
477 | }; | ||
478 | |||
479 | static inline int cpu_of(struct rq *rq) | ||
480 | { | ||
481 | #ifdef CONFIG_SMP | ||
482 | return rq->cpu; | ||
483 | #else | ||
484 | return 0; | ||
485 | #endif | ||
486 | } | ||
487 | |||
488 | DECLARE_PER_CPU(struct rq, runqueues); | ||
489 | |||
490 | #define rcu_dereference_check_sched_domain(p) \ | ||
491 | rcu_dereference_check((p), \ | ||
492 | lockdep_is_held(&sched_domains_mutex)) | ||
493 | |||
494 | /* | ||
495 | * The domain tree (rq->sd) is protected by RCU's quiescent state transition. | ||
496 | * See detach_destroy_domains: synchronize_sched for details. | ||
497 | * | ||
498 | * The domain tree of any CPU may only be accessed from within | ||
499 | * preempt-disabled sections. | ||
500 | */ | ||
501 | #define for_each_domain(cpu, __sd) \ | ||
502 | for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent) | ||
503 | |||
504 | #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) | ||
505 | #define this_rq() (&__get_cpu_var(runqueues)) | ||
506 | #define task_rq(p) cpu_rq(task_cpu(p)) | ||
507 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | ||
508 | #define raw_rq() (&__raw_get_cpu_var(runqueues)) | ||
509 | |||
510 | #include "sched_stats.h" | ||
511 | #include "sched_autogroup.h" | ||
512 | |||
513 | #ifdef CONFIG_CGROUP_SCHED | ||
514 | |||
515 | /* | ||
516 | * Return the group to which this tasks belongs. | ||
517 | * | ||
518 | * We use task_subsys_state_check() and extend the RCU verification with | ||
519 | * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each | ||
520 | * task it moves into the cgroup. Therefore by holding either of those locks, | ||
521 | * we pin the task to the current cgroup. | ||
522 | */ | ||
523 | static inline struct task_group *task_group(struct task_struct *p) | ||
524 | { | ||
525 | struct task_group *tg; | ||
526 | struct cgroup_subsys_state *css; | ||
527 | |||
528 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | ||
529 | lockdep_is_held(&p->pi_lock) || | ||
530 | lockdep_is_held(&task_rq(p)->lock)); | ||
531 | tg = container_of(css, struct task_group, css); | ||
532 | |||
533 | return autogroup_task_group(p, tg); | ||
534 | } | ||
535 | |||
536 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | ||
537 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | ||
538 | { | ||
539 | #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED) | ||
540 | struct task_group *tg = task_group(p); | ||
541 | #endif | ||
542 | |||
543 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
544 | p->se.cfs_rq = tg->cfs_rq[cpu]; | ||
545 | p->se.parent = tg->se[cpu]; | ||
546 | #endif | ||
547 | |||
548 | #ifdef CONFIG_RT_GROUP_SCHED | ||
549 | p->rt.rt_rq = tg->rt_rq[cpu]; | ||
550 | p->rt.parent = tg->rt_se[cpu]; | ||
551 | #endif | ||
552 | } | ||
553 | |||
554 | #else /* CONFIG_CGROUP_SCHED */ | ||
555 | |||
556 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | ||
557 | static inline struct task_group *task_group(struct task_struct *p) | ||
558 | { | ||
559 | return NULL; | ||
560 | } | ||
561 | |||
562 | #endif /* CONFIG_CGROUP_SCHED */ | ||
563 | |||
564 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
565 | { | ||
566 | set_task_rq(p, cpu); | ||
567 | #ifdef CONFIG_SMP | ||
568 | /* | ||
569 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
570 | * successfuly executed on another CPU. We must ensure that updates of | ||
571 | * per-task data have been completed by this moment. | ||
572 | */ | ||
573 | smp_wmb(); | ||
574 | task_thread_info(p)->cpu = cpu; | ||
575 | #endif | ||
576 | } | ||
577 | |||
578 | /* | ||
579 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: | ||
580 | */ | ||
581 | #ifdef CONFIG_SCHED_DEBUG | ||
582 | # define const_debug __read_mostly | ||
583 | #else | ||
584 | # define const_debug const | ||
585 | #endif | ||
586 | |||
587 | extern const_debug unsigned int sysctl_sched_features; | ||
588 | |||
589 | #define SCHED_FEAT(name, enabled) \ | ||
590 | __SCHED_FEAT_##name , | ||
591 | |||
592 | enum { | ||
593 | #include "sched_features.h" | ||
594 | }; | ||
595 | |||
596 | #undef SCHED_FEAT | ||
597 | |||
598 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) | ||
599 | |||
600 | static inline u64 global_rt_period(void) | ||
601 | { | ||
602 | return (u64)sysctl_sched_rt_period * NSEC_PER_USEC; | ||
603 | } | ||
604 | |||
605 | static inline u64 global_rt_runtime(void) | ||
606 | { | ||
607 | if (sysctl_sched_rt_runtime < 0) | ||
608 | return RUNTIME_INF; | ||
609 | |||
610 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
611 | } | ||
612 | |||
613 | |||
614 | |||
615 | static inline int task_current(struct rq *rq, struct task_struct *p) | ||
616 | { | ||
617 | return rq->curr == p; | ||
618 | } | ||
619 | |||
620 | static inline int task_running(struct rq *rq, struct task_struct *p) | ||
621 | { | ||
622 | #ifdef CONFIG_SMP | ||
623 | return p->on_cpu; | ||
624 | #else | ||
625 | return task_current(rq, p); | ||
626 | #endif | ||
627 | } | ||
628 | |||
629 | |||
630 | #ifndef prepare_arch_switch | ||
631 | # define prepare_arch_switch(next) do { } while (0) | ||
632 | #endif | ||
633 | #ifndef finish_arch_switch | ||
634 | # define finish_arch_switch(prev) do { } while (0) | ||
635 | #endif | ||
636 | |||
637 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
638 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | ||
639 | { | ||
640 | #ifdef CONFIG_SMP | ||
641 | /* | ||
642 | * We can optimise this out completely for !SMP, because the | ||
643 | * SMP rebalancing from interrupt is the only thing that cares | ||
644 | * here. | ||
645 | */ | ||
646 | next->on_cpu = 1; | ||
647 | #endif | ||
648 | } | ||
649 | |||
650 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | ||
651 | { | ||
652 | #ifdef CONFIG_SMP | ||
653 | /* | ||
654 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
655 | * We must ensure this doesn't happen until the switch is completely | ||
656 | * finished. | ||
657 | */ | ||
658 | smp_wmb(); | ||
659 | prev->on_cpu = 0; | ||
660 | #endif | ||
661 | #ifdef CONFIG_DEBUG_SPINLOCK | ||
662 | /* this is a valid case when another task releases the spinlock */ | ||
663 | rq->lock.owner = current; | ||
664 | #endif | ||
665 | /* | ||
666 | * If we are tracking spinlock dependencies then we have to | ||
667 | * fix up the runqueue lock - which gets 'carried over' from | ||
668 | * prev into current: | ||
669 | */ | ||
670 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); | ||
671 | |||
672 | raw_spin_unlock_irq(&rq->lock); | ||
673 | } | ||
674 | |||
675 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | ||
676 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | ||
677 | { | ||
678 | #ifdef CONFIG_SMP | ||
679 | /* | ||
680 | * We can optimise this out completely for !SMP, because the | ||
681 | * SMP rebalancing from interrupt is the only thing that cares | ||
682 | * here. | ||
683 | */ | ||
684 | next->on_cpu = 1; | ||
685 | #endif | ||
686 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
687 | raw_spin_unlock_irq(&rq->lock); | ||
688 | #else | ||
689 | raw_spin_unlock(&rq->lock); | ||
690 | #endif | ||
691 | } | ||
692 | |||
693 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | ||
694 | { | ||
695 | #ifdef CONFIG_SMP | ||
696 | /* | ||
697 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
698 | * We must ensure this doesn't happen until the switch is completely | ||
699 | * finished. | ||
700 | */ | ||
701 | smp_wmb(); | ||
702 | prev->on_cpu = 0; | ||
703 | #endif | ||
704 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW | ||
705 | local_irq_enable(); | ||
706 | #endif | ||
707 | } | ||
708 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | ||
709 | |||
710 | |||
711 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | ||
712 | { | ||
713 | lw->weight += inc; | ||
714 | lw->inv_weight = 0; | ||
715 | } | ||
716 | |||
717 | static inline void update_load_sub(struct load_weight *lw, unsigned long dec) | ||
718 | { | ||
719 | lw->weight -= dec; | ||
720 | lw->inv_weight = 0; | ||
721 | } | ||
722 | |||
723 | static inline void update_load_set(struct load_weight *lw, unsigned long w) | ||
724 | { | ||
725 | lw->weight = w; | ||
726 | lw->inv_weight = 0; | ||
727 | } | ||
728 | |||
729 | /* | ||
730 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | ||
731 | * of tasks with abnormal "nice" values across CPUs the contribution that | ||
732 | * each task makes to its run queue's load is weighted according to its | ||
733 | * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a | ||
734 | * scaled version of the new time slice allocation that they receive on time | ||
735 | * slice expiry etc. | ||
736 | */ | ||
737 | |||
738 | #define WEIGHT_IDLEPRIO 3 | ||
739 | #define WMULT_IDLEPRIO 1431655765 | ||
740 | |||
741 | /* | ||
742 | * Nice levels are multiplicative, with a gentle 10% change for every | ||
743 | * nice level changed. I.e. when a CPU-bound task goes from nice 0 to | ||
744 | * nice 1, it will get ~10% less CPU time than another CPU-bound task | ||
745 | * that remained on nice 0. | ||
746 | * | ||
747 | * The "10% effect" is relative and cumulative: from _any_ nice level, | ||
748 | * if you go up 1 level, it's -10% CPU usage, if you go down 1 level | ||
749 | * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25. | ||
750 | * If a task goes up by ~10% and another task goes down by ~10% then | ||
751 | * the relative distance between them is ~25%.) | ||
752 | */ | ||
753 | static const int prio_to_weight[40] = { | ||
754 | /* -20 */ 88761, 71755, 56483, 46273, 36291, | ||
755 | /* -15 */ 29154, 23254, 18705, 14949, 11916, | ||
756 | /* -10 */ 9548, 7620, 6100, 4904, 3906, | ||
757 | /* -5 */ 3121, 2501, 1991, 1586, 1277, | ||
758 | /* 0 */ 1024, 820, 655, 526, 423, | ||
759 | /* 5 */ 335, 272, 215, 172, 137, | ||
760 | /* 10 */ 110, 87, 70, 56, 45, | ||
761 | /* 15 */ 36, 29, 23, 18, 15, | ||
762 | }; | ||
763 | |||
764 | /* | ||
765 | * Inverse (2^32/x) values of the prio_to_weight[] array, precalculated. | ||
766 | * | ||
767 | * In cases where the weight does not change often, we can use the | ||
768 | * precalculated inverse to speed up arithmetics by turning divisions | ||
769 | * into multiplications: | ||
770 | */ | ||
771 | static const u32 prio_to_wmult[40] = { | ||
772 | /* -20 */ 48388, 59856, 76040, 92818, 118348, | ||
773 | /* -15 */ 147320, 184698, 229616, 287308, 360437, | ||
774 | /* -10 */ 449829, 563644, 704093, 875809, 1099582, | ||
775 | /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, | ||
776 | /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, | ||
777 | /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, | ||
778 | /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, | ||
779 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, | ||
780 | }; | ||
781 | |||
782 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
783 | enum cpuacct_stat_index { | ||
784 | CPUACCT_STAT_USER, /* ... user mode */ | ||
785 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
786 | |||
787 | CPUACCT_STAT_NSTATS, | ||
788 | }; | ||
789 | |||
790 | |||
791 | #define sched_class_highest (&stop_sched_class) | ||
792 | #define for_each_class(class) \ | ||
793 | for (class = sched_class_highest; class; class = class->next) | ||
794 | |||
795 | extern const struct sched_class stop_sched_class; | ||
796 | extern const struct sched_class rt_sched_class; | ||
797 | extern const struct sched_class fair_sched_class; | ||
798 | extern const struct sched_class idle_sched_class; | ||
799 | |||
800 | |||
801 | #ifdef CONFIG_SMP | ||
802 | |||
803 | extern void trigger_load_balance(struct rq *rq, int cpu); | ||
804 | extern void idle_balance(int this_cpu, struct rq *this_rq); | ||
805 | |||
806 | #else /* CONFIG_SMP */ | ||
807 | |||
808 | static inline void idle_balance(int cpu, struct rq *rq) | ||
809 | { | ||
810 | } | ||
811 | |||
812 | #endif | ||
813 | |||
814 | extern void sysrq_sched_debug_show(void); | ||
815 | extern void sched_init_granularity(void); | ||
816 | extern void update_max_interval(void); | ||
817 | extern void update_group_power(struct sched_domain *sd, int cpu); | ||
818 | extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu); | ||
819 | extern void init_sched_rt_class(void); | ||
820 | extern void init_sched_fair_class(void); | ||
821 | |||
822 | extern void resched_task(struct task_struct *p); | ||
823 | extern void resched_cpu(int cpu); | ||
824 | |||
825 | extern struct rt_bandwidth def_rt_bandwidth; | ||
826 | extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); | ||
827 | |||
828 | extern void update_cpu_load(struct rq *this_rq); | ||
829 | |||
830 | #ifdef CONFIG_CGROUP_CPUACCT | ||
831 | extern void cpuacct_charge(struct task_struct *tsk, u64 cputime); | ||
832 | extern void cpuacct_update_stats(struct task_struct *tsk, | ||
833 | enum cpuacct_stat_index idx, cputime_t val); | ||
834 | #else | ||
835 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | ||
836 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
837 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
838 | #endif | ||
839 | |||
840 | static inline void inc_nr_running(struct rq *rq) | ||
841 | { | ||
842 | rq->nr_running++; | ||
843 | } | ||
844 | |||
845 | static inline void dec_nr_running(struct rq *rq) | ||
846 | { | ||
847 | rq->nr_running--; | ||
848 | } | ||
849 | |||
850 | extern void update_rq_clock(struct rq *rq); | ||
851 | |||
852 | extern void activate_task(struct rq *rq, struct task_struct *p, int flags); | ||
853 | extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); | ||
854 | |||
855 | extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); | ||
856 | |||
857 | extern const_debug unsigned int sysctl_sched_time_avg; | ||
858 | extern const_debug unsigned int sysctl_sched_nr_migrate; | ||
859 | extern const_debug unsigned int sysctl_sched_migration_cost; | ||
860 | |||
861 | static inline u64 sched_avg_period(void) | ||
862 | { | ||
863 | return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; | ||
864 | } | ||
865 | |||
866 | void calc_load_account_idle(struct rq *this_rq); | ||
867 | |||
868 | #ifdef CONFIG_SCHED_HRTICK | ||
869 | |||
870 | /* | ||
871 | * Use hrtick when: | ||
872 | * - enabled by features | ||
873 | * - hrtimer is actually high res | ||
874 | */ | ||
875 | static inline int hrtick_enabled(struct rq *rq) | ||
876 | { | ||
877 | if (!sched_feat(HRTICK)) | ||
878 | return 0; | ||
879 | if (!cpu_active(cpu_of(rq))) | ||
880 | return 0; | ||
881 | return hrtimer_is_hres_active(&rq->hrtick_timer); | ||
882 | } | ||
883 | |||
884 | void hrtick_start(struct rq *rq, u64 delay); | ||
885 | |||
886 | #endif /* CONFIG_SCHED_HRTICK */ | ||
887 | |||
888 | #ifdef CONFIG_SMP | ||
889 | extern void sched_avg_update(struct rq *rq); | ||
890 | static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | ||
891 | { | ||
892 | rq->rt_avg += rt_delta; | ||
893 | sched_avg_update(rq); | ||
894 | } | ||
895 | #else | ||
896 | static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } | ||
897 | static inline void sched_avg_update(struct rq *rq) { } | ||
898 | #endif | ||
899 | |||
900 | extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period); | ||
901 | |||
902 | #ifdef CONFIG_SMP | ||
903 | #ifdef CONFIG_PREEMPT | ||
904 | |||
905 | static inline void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
906 | |||
907 | /* | ||
908 | * fair double_lock_balance: Safely acquires both rq->locks in a fair | ||
909 | * way at the expense of forcing extra atomic operations in all | ||
910 | * invocations. This assures that the double_lock is acquired using the | ||
911 | * same underlying policy as the spinlock_t on this architecture, which | ||
912 | * reduces latency compared to the unfair variant below. However, it | ||
913 | * also adds more overhead and therefore may reduce throughput. | ||
914 | */ | ||
915 | static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
916 | __releases(this_rq->lock) | ||
917 | __acquires(busiest->lock) | ||
918 | __acquires(this_rq->lock) | ||
919 | { | ||
920 | raw_spin_unlock(&this_rq->lock); | ||
921 | double_rq_lock(this_rq, busiest); | ||
922 | |||
923 | return 1; | ||
924 | } | ||
925 | |||
926 | #else | ||
927 | /* | ||
928 | * Unfair double_lock_balance: Optimizes throughput at the expense of | ||
929 | * latency by eliminating extra atomic operations when the locks are | ||
930 | * already in proper order on entry. This favors lower cpu-ids and will | ||
931 | * grant the double lock to lower cpus over higher ids under contention, | ||
932 | * regardless of entry order into the function. | ||
933 | */ | ||
934 | static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
935 | __releases(this_rq->lock) | ||
936 | __acquires(busiest->lock) | ||
937 | __acquires(this_rq->lock) | ||
938 | { | ||
939 | int ret = 0; | ||
940 | |||
941 | if (unlikely(!raw_spin_trylock(&busiest->lock))) { | ||
942 | if (busiest < this_rq) { | ||
943 | raw_spin_unlock(&this_rq->lock); | ||
944 | raw_spin_lock(&busiest->lock); | ||
945 | raw_spin_lock_nested(&this_rq->lock, | ||
946 | SINGLE_DEPTH_NESTING); | ||
947 | ret = 1; | ||
948 | } else | ||
949 | raw_spin_lock_nested(&busiest->lock, | ||
950 | SINGLE_DEPTH_NESTING); | ||
951 | } | ||
952 | return ret; | ||
953 | } | ||
954 | |||
955 | #endif /* CONFIG_PREEMPT */ | ||
956 | |||
957 | /* | ||
958 | * double_lock_balance - lock the busiest runqueue, this_rq is locked already. | ||
959 | */ | ||
960 | static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) | ||
961 | { | ||
962 | if (unlikely(!irqs_disabled())) { | ||
963 | /* printk() doesn't work good under rq->lock */ | ||
964 | raw_spin_unlock(&this_rq->lock); | ||
965 | BUG_ON(1); | ||
966 | } | ||
967 | |||
968 | return _double_lock_balance(this_rq, busiest); | ||
969 | } | ||
970 | |||
971 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | ||
972 | __releases(busiest->lock) | ||
973 | { | ||
974 | raw_spin_unlock(&busiest->lock); | ||
975 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | ||
976 | } | ||
977 | |||
978 | /* | ||
979 | * double_rq_lock - safely lock two runqueues | ||
980 | * | ||
981 | * Note this does not disable interrupts like task_rq_lock, | ||
982 | * you need to do so manually before calling. | ||
983 | */ | ||
984 | static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
985 | __acquires(rq1->lock) | ||
986 | __acquires(rq2->lock) | ||
987 | { | ||
988 | BUG_ON(!irqs_disabled()); | ||
989 | if (rq1 == rq2) { | ||
990 | raw_spin_lock(&rq1->lock); | ||
991 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
992 | } else { | ||
993 | if (rq1 < rq2) { | ||
994 | raw_spin_lock(&rq1->lock); | ||
995 | raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); | ||
996 | } else { | ||
997 | raw_spin_lock(&rq2->lock); | ||
998 | raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); | ||
999 | } | ||
1000 | } | ||
1001 | } | ||
1002 | |||
1003 | /* | ||
1004 | * double_rq_unlock - safely unlock two runqueues | ||
1005 | * | ||
1006 | * Note this does not restore interrupts like task_rq_unlock, | ||
1007 | * you need to do so manually after calling. | ||
1008 | */ | ||
1009 | static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
1010 | __releases(rq1->lock) | ||
1011 | __releases(rq2->lock) | ||
1012 | { | ||
1013 | raw_spin_unlock(&rq1->lock); | ||
1014 | if (rq1 != rq2) | ||
1015 | raw_spin_unlock(&rq2->lock); | ||
1016 | else | ||
1017 | __release(rq2->lock); | ||
1018 | } | ||
1019 | |||
1020 | #else /* CONFIG_SMP */ | ||
1021 | |||
1022 | /* | ||
1023 | * double_rq_lock - safely lock two runqueues | ||
1024 | * | ||
1025 | * Note this does not disable interrupts like task_rq_lock, | ||
1026 | * you need to do so manually before calling. | ||
1027 | */ | ||
1028 | static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) | ||
1029 | __acquires(rq1->lock) | ||
1030 | __acquires(rq2->lock) | ||
1031 | { | ||
1032 | BUG_ON(!irqs_disabled()); | ||
1033 | BUG_ON(rq1 != rq2); | ||
1034 | raw_spin_lock(&rq1->lock); | ||
1035 | __acquire(rq2->lock); /* Fake it out ;) */ | ||
1036 | } | ||
1037 | |||
1038 | /* | ||
1039 | * double_rq_unlock - safely unlock two runqueues | ||
1040 | * | ||
1041 | * Note this does not restore interrupts like task_rq_unlock, | ||
1042 | * you need to do so manually after calling. | ||
1043 | */ | ||
1044 | static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) | ||
1045 | __releases(rq1->lock) | ||
1046 | __releases(rq2->lock) | ||
1047 | { | ||
1048 | BUG_ON(rq1 != rq2); | ||
1049 | raw_spin_unlock(&rq1->lock); | ||
1050 | __release(rq2->lock); | ||
1051 | } | ||
1052 | |||
1053 | #endif | ||
1054 | |||
1055 | extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); | ||
1056 | extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); | ||
1057 | extern void print_cfs_stats(struct seq_file *m, int cpu); | ||
1058 | extern void print_rt_stats(struct seq_file *m, int cpu); | ||
1059 | |||
1060 | extern void init_cfs_rq(struct cfs_rq *cfs_rq); | ||
1061 | extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); | ||
1062 | extern void unthrottle_offline_cfs_rqs(struct rq *rq); | ||
1063 | |||
1064 | extern void account_cfs_bandwidth_used(int enabled, int was_enabled); | ||
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 429242f3c484..e8a1f83ee0e7 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c | |||
@@ -1,15 +1,19 @@ | |||
1 | #ifdef CONFIG_SCHED_AUTOGROUP | 1 | #ifdef CONFIG_SCHED_AUTOGROUP |
2 | 2 | ||
3 | #include "sched.h" | ||
4 | |||
3 | #include <linux/proc_fs.h> | 5 | #include <linux/proc_fs.h> |
4 | #include <linux/seq_file.h> | 6 | #include <linux/seq_file.h> |
5 | #include <linux/kallsyms.h> | 7 | #include <linux/kallsyms.h> |
6 | #include <linux/utsname.h> | 8 | #include <linux/utsname.h> |
9 | #include <linux/security.h> | ||
10 | #include <linux/export.h> | ||
7 | 11 | ||
8 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; | 12 | unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; |
9 | static struct autogroup autogroup_default; | 13 | static struct autogroup autogroup_default; |
10 | static atomic_t autogroup_seq_nr; | 14 | static atomic_t autogroup_seq_nr; |
11 | 15 | ||
12 | static void __init autogroup_init(struct task_struct *init_task) | 16 | void __init autogroup_init(struct task_struct *init_task) |
13 | { | 17 | { |
14 | autogroup_default.tg = &root_task_group; | 18 | autogroup_default.tg = &root_task_group; |
15 | kref_init(&autogroup_default.kref); | 19 | kref_init(&autogroup_default.kref); |
@@ -17,7 +21,7 @@ static void __init autogroup_init(struct task_struct *init_task) | |||
17 | init_task->signal->autogroup = &autogroup_default; | 21 | init_task->signal->autogroup = &autogroup_default; |
18 | } | 22 | } |
19 | 23 | ||
20 | static inline void autogroup_free(struct task_group *tg) | 24 | void autogroup_free(struct task_group *tg) |
21 | { | 25 | { |
22 | kfree(tg->autogroup); | 26 | kfree(tg->autogroup); |
23 | } | 27 | } |
@@ -59,10 +63,6 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p) | |||
59 | return ag; | 63 | return ag; |
60 | } | 64 | } |
61 | 65 | ||
62 | #ifdef CONFIG_RT_GROUP_SCHED | ||
63 | static void free_rt_sched_group(struct task_group *tg); | ||
64 | #endif | ||
65 | |||
66 | static inline struct autogroup *autogroup_create(void) | 66 | static inline struct autogroup *autogroup_create(void) |
67 | { | 67 | { |
68 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); | 68 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); |
@@ -108,8 +108,7 @@ out_fail: | |||
108 | return autogroup_kref_get(&autogroup_default); | 108 | return autogroup_kref_get(&autogroup_default); |
109 | } | 109 | } |
110 | 110 | ||
111 | static inline bool | 111 | bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) |
112 | task_wants_autogroup(struct task_struct *p, struct task_group *tg) | ||
113 | { | 112 | { |
114 | if (tg != &root_task_group) | 113 | if (tg != &root_task_group) |
115 | return false; | 114 | return false; |
@@ -127,22 +126,6 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
127 | return true; | 126 | return true; |
128 | } | 127 | } |
129 | 128 | ||
130 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
131 | { | ||
132 | return !!tg->autogroup; | ||
133 | } | ||
134 | |||
135 | static inline struct task_group * | ||
136 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | ||
137 | { | ||
138 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | ||
139 | |||
140 | if (enabled && task_wants_autogroup(p, tg)) | ||
141 | return p->signal->autogroup->tg; | ||
142 | |||
143 | return tg; | ||
144 | } | ||
145 | |||
146 | static void | 129 | static void |
147 | autogroup_move_group(struct task_struct *p, struct autogroup *ag) | 130 | autogroup_move_group(struct task_struct *p, struct autogroup *ag) |
148 | { | 131 | { |
@@ -263,7 +246,7 @@ out: | |||
263 | #endif /* CONFIG_PROC_FS */ | 246 | #endif /* CONFIG_PROC_FS */ |
264 | 247 | ||
265 | #ifdef CONFIG_SCHED_DEBUG | 248 | #ifdef CONFIG_SCHED_DEBUG |
266 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | 249 | int autogroup_path(struct task_group *tg, char *buf, int buflen) |
267 | { | 250 | { |
268 | if (!task_group_is_autogroup(tg)) | 251 | if (!task_group_is_autogroup(tg)) |
269 | return 0; | 252 | return 0; |
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index c2f0e7248dca..8bd047142816 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h | |||
@@ -1,5 +1,8 @@ | |||
1 | #ifdef CONFIG_SCHED_AUTOGROUP | 1 | #ifdef CONFIG_SCHED_AUTOGROUP |
2 | 2 | ||
3 | #include <linux/kref.h> | ||
4 | #include <linux/rwsem.h> | ||
5 | |||
3 | struct autogroup { | 6 | struct autogroup { |
4 | /* | 7 | /* |
5 | * reference doesn't mean how many thread attach to this | 8 | * reference doesn't mean how many thread attach to this |
@@ -13,9 +16,28 @@ struct autogroup { | |||
13 | int nice; | 16 | int nice; |
14 | }; | 17 | }; |
15 | 18 | ||
16 | static inline bool task_group_is_autogroup(struct task_group *tg); | 19 | extern void autogroup_init(struct task_struct *init_task); |
20 | extern void autogroup_free(struct task_group *tg); | ||
21 | |||
22 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
23 | { | ||
24 | return !!tg->autogroup; | ||
25 | } | ||
26 | |||
27 | extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg); | ||
28 | |||
17 | static inline struct task_group * | 29 | static inline struct task_group * |
18 | autogroup_task_group(struct task_struct *p, struct task_group *tg); | 30 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
31 | { | ||
32 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | ||
33 | |||
34 | if (enabled && task_wants_autogroup(p, tg)) | ||
35 | return p->signal->autogroup->tg; | ||
36 | |||
37 | return tg; | ||
38 | } | ||
39 | |||
40 | extern int autogroup_path(struct task_group *tg, char *buf, int buflen); | ||
19 | 41 | ||
20 | #else /* !CONFIG_SCHED_AUTOGROUP */ | 42 | #else /* !CONFIG_SCHED_AUTOGROUP */ |
21 | 43 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index a6710a112b4f..ce1a85f2ddcb 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/kallsyms.h> | 16 | #include <linux/kallsyms.h> |
17 | #include <linux/utsname.h> | 17 | #include <linux/utsname.h> |
18 | 18 | ||
19 | #include "sched.h" | ||
20 | |||
19 | static DEFINE_SPINLOCK(sched_debug_lock); | 21 | static DEFINE_SPINLOCK(sched_debug_lock); |
20 | 22 | ||
21 | /* | 23 | /* |
@@ -373,7 +375,7 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
373 | return 0; | 375 | return 0; |
374 | } | 376 | } |
375 | 377 | ||
376 | static void sysrq_sched_debug_show(void) | 378 | void sysrq_sched_debug_show(void) |
377 | { | 379 | { |
378 | sched_debug_show(NULL, NULL); | 380 | sched_debug_show(NULL, NULL); |
379 | } | 381 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a608593df243..cd3b64219d9f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -23,6 +23,13 @@ | |||
23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
25 | #include <linux/cpumask.h> | 25 | #include <linux/cpumask.h> |
26 | #include <linux/slab.h> | ||
27 | #include <linux/profile.h> | ||
28 | #include <linux/interrupt.h> | ||
29 | |||
30 | #include <trace/events/sched.h> | ||
31 | |||
32 | #include "sched.h" | ||
26 | 33 | ||
27 | /* | 34 | /* |
28 | * Targeted preemption latency for CPU-bound tasks: | 35 | * Targeted preemption latency for CPU-bound tasks: |
@@ -103,7 +110,110 @@ unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; | |||
103 | unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; | 110 | unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL; |
104 | #endif | 111 | #endif |
105 | 112 | ||
106 | static const struct sched_class fair_sched_class; | 113 | /* |
114 | * Increase the granularity value when there are more CPUs, | ||
115 | * because with more CPUs the 'effective latency' as visible | ||
116 | * to users decreases. But the relationship is not linear, | ||
117 | * so pick a second-best guess by going with the log2 of the | ||
118 | * number of CPUs. | ||
119 | * | ||
120 | * This idea comes from the SD scheduler of Con Kolivas: | ||
121 | */ | ||
122 | static int get_update_sysctl_factor(void) | ||
123 | { | ||
124 | unsigned int cpus = min_t(int, num_online_cpus(), 8); | ||
125 | unsigned int factor; | ||
126 | |||
127 | switch (sysctl_sched_tunable_scaling) { | ||
128 | case SCHED_TUNABLESCALING_NONE: | ||
129 | factor = 1; | ||
130 | break; | ||
131 | case SCHED_TUNABLESCALING_LINEAR: | ||
132 | factor = cpus; | ||
133 | break; | ||
134 | case SCHED_TUNABLESCALING_LOG: | ||
135 | default: | ||
136 | factor = 1 + ilog2(cpus); | ||
137 | break; | ||
138 | } | ||
139 | |||
140 | return factor; | ||
141 | } | ||
142 | |||
143 | static void update_sysctl(void) | ||
144 | { | ||
145 | unsigned int factor = get_update_sysctl_factor(); | ||
146 | |||
147 | #define SET_SYSCTL(name) \ | ||
148 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
149 | SET_SYSCTL(sched_min_granularity); | ||
150 | SET_SYSCTL(sched_latency); | ||
151 | SET_SYSCTL(sched_wakeup_granularity); | ||
152 | #undef SET_SYSCTL | ||
153 | } | ||
154 | |||
155 | void sched_init_granularity(void) | ||
156 | { | ||
157 | update_sysctl(); | ||
158 | } | ||
159 | |||
160 | #if BITS_PER_LONG == 32 | ||
161 | # define WMULT_CONST (~0UL) | ||
162 | #else | ||
163 | # define WMULT_CONST (1UL << 32) | ||
164 | #endif | ||
165 | |||
166 | #define WMULT_SHIFT 32 | ||
167 | |||
168 | /* | ||
169 | * Shift right and round: | ||
170 | */ | ||
171 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | ||
172 | |||
173 | /* | ||
174 | * delta *= weight / lw | ||
175 | */ | ||
176 | static unsigned long | ||
177 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | ||
178 | struct load_weight *lw) | ||
179 | { | ||
180 | u64 tmp; | ||
181 | |||
182 | /* | ||
183 | * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched | ||
184 | * entities since MIN_SHARES = 2. Treat weight as 1 if less than | ||
185 | * 2^SCHED_LOAD_RESOLUTION. | ||
186 | */ | ||
187 | if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION))) | ||
188 | tmp = (u64)delta_exec * scale_load_down(weight); | ||
189 | else | ||
190 | tmp = (u64)delta_exec; | ||
191 | |||
192 | if (!lw->inv_weight) { | ||
193 | unsigned long w = scale_load_down(lw->weight); | ||
194 | |||
195 | if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST)) | ||
196 | lw->inv_weight = 1; | ||
197 | else if (unlikely(!w)) | ||
198 | lw->inv_weight = WMULT_CONST; | ||
199 | else | ||
200 | lw->inv_weight = WMULT_CONST / w; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Check whether we'd overflow the 64-bit multiplication: | ||
205 | */ | ||
206 | if (unlikely(tmp > WMULT_CONST)) | ||
207 | tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, | ||
208 | WMULT_SHIFT/2); | ||
209 | else | ||
210 | tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); | ||
211 | |||
212 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | ||
213 | } | ||
214 | |||
215 | |||
216 | const struct sched_class fair_sched_class; | ||
107 | 217 | ||
108 | /************************************************************** | 218 | /************************************************************** |
109 | * CFS operations on generic schedulable entities: | 219 | * CFS operations on generic schedulable entities: |
@@ -413,7 +523,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
413 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | 523 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); |
414 | } | 524 | } |
415 | 525 | ||
416 | static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) | 526 | struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) |
417 | { | 527 | { |
418 | struct rb_node *left = cfs_rq->rb_leftmost; | 528 | struct rb_node *left = cfs_rq->rb_leftmost; |
419 | 529 | ||
@@ -434,7 +544,7 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se) | |||
434 | } | 544 | } |
435 | 545 | ||
436 | #ifdef CONFIG_SCHED_DEBUG | 546 | #ifdef CONFIG_SCHED_DEBUG |
437 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 547 | struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
438 | { | 548 | { |
439 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); | 549 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
440 | 550 | ||
@@ -684,7 +794,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
684 | { | 794 | { |
685 | update_load_add(&cfs_rq->load, se->load.weight); | 795 | update_load_add(&cfs_rq->load, se->load.weight); |
686 | if (!parent_entity(se)) | 796 | if (!parent_entity(se)) |
687 | inc_cpu_load(rq_of(cfs_rq), se->load.weight); | 797 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); |
688 | if (entity_is_task(se)) { | 798 | if (entity_is_task(se)) { |
689 | add_cfs_task_weight(cfs_rq, se->load.weight); | 799 | add_cfs_task_weight(cfs_rq, se->load.weight); |
690 | list_add(&se->group_node, &cfs_rq->tasks); | 800 | list_add(&se->group_node, &cfs_rq->tasks); |
@@ -697,7 +807,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
697 | { | 807 | { |
698 | update_load_sub(&cfs_rq->load, se->load.weight); | 808 | update_load_sub(&cfs_rq->load, se->load.weight); |
699 | if (!parent_entity(se)) | 809 | if (!parent_entity(se)) |
700 | dec_cpu_load(rq_of(cfs_rq), se->load.weight); | 810 | update_load_sub(&rq_of(cfs_rq)->load, se->load.weight); |
701 | if (entity_is_task(se)) { | 811 | if (entity_is_task(se)) { |
702 | add_cfs_task_weight(cfs_rq, -se->load.weight); | 812 | add_cfs_task_weight(cfs_rq, -se->load.weight); |
703 | list_del_init(&se->group_node); | 813 | list_del_init(&se->group_node); |
@@ -1287,6 +1397,32 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
1287 | */ | 1397 | */ |
1288 | 1398 | ||
1289 | #ifdef CONFIG_CFS_BANDWIDTH | 1399 | #ifdef CONFIG_CFS_BANDWIDTH |
1400 | |||
1401 | #ifdef HAVE_JUMP_LABEL | ||
1402 | static struct jump_label_key __cfs_bandwidth_used; | ||
1403 | |||
1404 | static inline bool cfs_bandwidth_used(void) | ||
1405 | { | ||
1406 | return static_branch(&__cfs_bandwidth_used); | ||
1407 | } | ||
1408 | |||
1409 | void account_cfs_bandwidth_used(int enabled, int was_enabled) | ||
1410 | { | ||
1411 | /* only need to count groups transitioning between enabled/!enabled */ | ||
1412 | if (enabled && !was_enabled) | ||
1413 | jump_label_inc(&__cfs_bandwidth_used); | ||
1414 | else if (!enabled && was_enabled) | ||
1415 | jump_label_dec(&__cfs_bandwidth_used); | ||
1416 | } | ||
1417 | #else /* HAVE_JUMP_LABEL */ | ||
1418 | static bool cfs_bandwidth_used(void) | ||
1419 | { | ||
1420 | return true; | ||
1421 | } | ||
1422 | |||
1423 | void account_cfs_bandwidth_used(int enabled, int was_enabled) {} | ||
1424 | #endif /* HAVE_JUMP_LABEL */ | ||
1425 | |||
1290 | /* | 1426 | /* |
1291 | * default period for cfs group bandwidth. | 1427 | * default period for cfs group bandwidth. |
1292 | * default: 0.1s, units: nanoseconds | 1428 | * default: 0.1s, units: nanoseconds |
@@ -1308,7 +1444,7 @@ static inline u64 sched_cfs_bandwidth_slice(void) | |||
1308 | * | 1444 | * |
1309 | * requires cfs_b->lock | 1445 | * requires cfs_b->lock |
1310 | */ | 1446 | */ |
1311 | static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) | 1447 | void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) |
1312 | { | 1448 | { |
1313 | u64 now; | 1449 | u64 now; |
1314 | 1450 | ||
@@ -1320,6 +1456,11 @@ static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) | |||
1320 | cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); | 1456 | cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); |
1321 | } | 1457 | } |
1322 | 1458 | ||
1459 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
1460 | { | ||
1461 | return &tg->cfs_bandwidth; | ||
1462 | } | ||
1463 | |||
1323 | /* returns 0 on failure to allocate runtime */ | 1464 | /* returns 0 on failure to allocate runtime */ |
1324 | static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) | 1465 | static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) |
1325 | { | 1466 | { |
@@ -1530,7 +1671,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) | |||
1530 | raw_spin_unlock(&cfs_b->lock); | 1671 | raw_spin_unlock(&cfs_b->lock); |
1531 | } | 1672 | } |
1532 | 1673 | ||
1533 | static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) | 1674 | void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) |
1534 | { | 1675 | { |
1535 | struct rq *rq = rq_of(cfs_rq); | 1676 | struct rq *rq = rq_of(cfs_rq); |
1536 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | 1677 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); |
@@ -1839,7 +1980,112 @@ static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
1839 | 1980 | ||
1840 | throttle_cfs_rq(cfs_rq); | 1981 | throttle_cfs_rq(cfs_rq); |
1841 | } | 1982 | } |
1842 | #else | 1983 | |
1984 | static inline u64 default_cfs_period(void); | ||
1985 | static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); | ||
1986 | static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); | ||
1987 | |||
1988 | static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) | ||
1989 | { | ||
1990 | struct cfs_bandwidth *cfs_b = | ||
1991 | container_of(timer, struct cfs_bandwidth, slack_timer); | ||
1992 | do_sched_cfs_slack_timer(cfs_b); | ||
1993 | |||
1994 | return HRTIMER_NORESTART; | ||
1995 | } | ||
1996 | |||
1997 | static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) | ||
1998 | { | ||
1999 | struct cfs_bandwidth *cfs_b = | ||
2000 | container_of(timer, struct cfs_bandwidth, period_timer); | ||
2001 | ktime_t now; | ||
2002 | int overrun; | ||
2003 | int idle = 0; | ||
2004 | |||
2005 | for (;;) { | ||
2006 | now = hrtimer_cb_get_time(timer); | ||
2007 | overrun = hrtimer_forward(timer, now, cfs_b->period); | ||
2008 | |||
2009 | if (!overrun) | ||
2010 | break; | ||
2011 | |||
2012 | idle = do_sched_cfs_period_timer(cfs_b, overrun); | ||
2013 | } | ||
2014 | |||
2015 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
2016 | } | ||
2017 | |||
2018 | void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
2019 | { | ||
2020 | raw_spin_lock_init(&cfs_b->lock); | ||
2021 | cfs_b->runtime = 0; | ||
2022 | cfs_b->quota = RUNTIME_INF; | ||
2023 | cfs_b->period = ns_to_ktime(default_cfs_period()); | ||
2024 | |||
2025 | INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq); | ||
2026 | hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2027 | cfs_b->period_timer.function = sched_cfs_period_timer; | ||
2028 | hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
2029 | cfs_b->slack_timer.function = sched_cfs_slack_timer; | ||
2030 | } | ||
2031 | |||
2032 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) | ||
2033 | { | ||
2034 | cfs_rq->runtime_enabled = 0; | ||
2035 | INIT_LIST_HEAD(&cfs_rq->throttled_list); | ||
2036 | } | ||
2037 | |||
2038 | /* requires cfs_b->lock, may release to reprogram timer */ | ||
2039 | void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
2040 | { | ||
2041 | /* | ||
2042 | * The timer may be active because we're trying to set a new bandwidth | ||
2043 | * period or because we're racing with the tear-down path | ||
2044 | * (timer_active==0 becomes visible before the hrtimer call-back | ||
2045 | * terminates). In either case we ensure that it's re-programmed | ||
2046 | */ | ||
2047 | while (unlikely(hrtimer_active(&cfs_b->period_timer))) { | ||
2048 | raw_spin_unlock(&cfs_b->lock); | ||
2049 | /* ensure cfs_b->lock is available while we wait */ | ||
2050 | hrtimer_cancel(&cfs_b->period_timer); | ||
2051 | |||
2052 | raw_spin_lock(&cfs_b->lock); | ||
2053 | /* if someone else restarted the timer then we're done */ | ||
2054 | if (cfs_b->timer_active) | ||
2055 | return; | ||
2056 | } | ||
2057 | |||
2058 | cfs_b->timer_active = 1; | ||
2059 | start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period); | ||
2060 | } | ||
2061 | |||
2062 | static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | ||
2063 | { | ||
2064 | hrtimer_cancel(&cfs_b->period_timer); | ||
2065 | hrtimer_cancel(&cfs_b->slack_timer); | ||
2066 | } | ||
2067 | |||
2068 | void unthrottle_offline_cfs_rqs(struct rq *rq) | ||
2069 | { | ||
2070 | struct cfs_rq *cfs_rq; | ||
2071 | |||
2072 | for_each_leaf_cfs_rq(rq, cfs_rq) { | ||
2073 | struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); | ||
2074 | |||
2075 | if (!cfs_rq->runtime_enabled) | ||
2076 | continue; | ||
2077 | |||
2078 | /* | ||
2079 | * clock_task is not advancing so we just need to make sure | ||
2080 | * there's some valid quota amount | ||
2081 | */ | ||
2082 | cfs_rq->runtime_remaining = cfs_b->quota; | ||
2083 | if (cfs_rq_throttled(cfs_rq)) | ||
2084 | unthrottle_cfs_rq(cfs_rq); | ||
2085 | } | ||
2086 | } | ||
2087 | |||
2088 | #else /* CONFIG_CFS_BANDWIDTH */ | ||
1843 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 2089 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, |
1844 | unsigned long delta_exec) {} | 2090 | unsigned long delta_exec) {} |
1845 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 2091 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
@@ -1861,8 +2107,22 @@ static inline int throttled_lb_pair(struct task_group *tg, | |||
1861 | { | 2107 | { |
1862 | return 0; | 2108 | return 0; |
1863 | } | 2109 | } |
2110 | |||
2111 | void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
2112 | |||
2113 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
2114 | static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | ||
1864 | #endif | 2115 | #endif |
1865 | 2116 | ||
2117 | static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) | ||
2118 | { | ||
2119 | return NULL; | ||
2120 | } | ||
2121 | static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {} | ||
2122 | void unthrottle_offline_cfs_rqs(struct rq *rq) {} | ||
2123 | |||
2124 | #endif /* CONFIG_CFS_BANDWIDTH */ | ||
2125 | |||
1866 | /************************************************** | 2126 | /************************************************** |
1867 | * CFS operations on tasks: | 2127 | * CFS operations on tasks: |
1868 | */ | 2128 | */ |
@@ -2029,6 +2289,61 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
2029 | } | 2289 | } |
2030 | 2290 | ||
2031 | #ifdef CONFIG_SMP | 2291 | #ifdef CONFIG_SMP |
2292 | /* Used instead of source_load when we know the type == 0 */ | ||
2293 | static unsigned long weighted_cpuload(const int cpu) | ||
2294 | { | ||
2295 | return cpu_rq(cpu)->load.weight; | ||
2296 | } | ||
2297 | |||
2298 | /* | ||
2299 | * Return a low guess at the load of a migration-source cpu weighted | ||
2300 | * according to the scheduling class and "nice" value. | ||
2301 | * | ||
2302 | * We want to under-estimate the load of migration sources, to | ||
2303 | * balance conservatively. | ||
2304 | */ | ||
2305 | static unsigned long source_load(int cpu, int type) | ||
2306 | { | ||
2307 | struct rq *rq = cpu_rq(cpu); | ||
2308 | unsigned long total = weighted_cpuload(cpu); | ||
2309 | |||
2310 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2311 | return total; | ||
2312 | |||
2313 | return min(rq->cpu_load[type-1], total); | ||
2314 | } | ||
2315 | |||
2316 | /* | ||
2317 | * Return a high guess at the load of a migration-target cpu weighted | ||
2318 | * according to the scheduling class and "nice" value. | ||
2319 | */ | ||
2320 | static unsigned long target_load(int cpu, int type) | ||
2321 | { | ||
2322 | struct rq *rq = cpu_rq(cpu); | ||
2323 | unsigned long total = weighted_cpuload(cpu); | ||
2324 | |||
2325 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2326 | return total; | ||
2327 | |||
2328 | return max(rq->cpu_load[type-1], total); | ||
2329 | } | ||
2330 | |||
2331 | static unsigned long power_of(int cpu) | ||
2332 | { | ||
2333 | return cpu_rq(cpu)->cpu_power; | ||
2334 | } | ||
2335 | |||
2336 | static unsigned long cpu_avg_load_per_task(int cpu) | ||
2337 | { | ||
2338 | struct rq *rq = cpu_rq(cpu); | ||
2339 | unsigned long nr_running = ACCESS_ONCE(rq->nr_running); | ||
2340 | |||
2341 | if (nr_running) | ||
2342 | return rq->load.weight / nr_running; | ||
2343 | |||
2344 | return 0; | ||
2345 | } | ||
2346 | |||
2032 | 2347 | ||
2033 | static void task_waking_fair(struct task_struct *p) | 2348 | static void task_waking_fair(struct task_struct *p) |
2034 | { | 2349 | { |
@@ -2783,6 +3098,38 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
2783 | } | 3098 | } |
2784 | 3099 | ||
2785 | /* | 3100 | /* |
3101 | * Is this task likely cache-hot: | ||
3102 | */ | ||
3103 | static int | ||
3104 | task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | ||
3105 | { | ||
3106 | s64 delta; | ||
3107 | |||
3108 | if (p->sched_class != &fair_sched_class) | ||
3109 | return 0; | ||
3110 | |||
3111 | if (unlikely(p->policy == SCHED_IDLE)) | ||
3112 | return 0; | ||
3113 | |||
3114 | /* | ||
3115 | * Buddy candidates are cache hot: | ||
3116 | */ | ||
3117 | if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && | ||
3118 | (&p->se == cfs_rq_of(&p->se)->next || | ||
3119 | &p->se == cfs_rq_of(&p->se)->last)) | ||
3120 | return 1; | ||
3121 | |||
3122 | if (sysctl_sched_migration_cost == -1) | ||
3123 | return 1; | ||
3124 | if (sysctl_sched_migration_cost == 0) | ||
3125 | return 0; | ||
3126 | |||
3127 | delta = now - p->se.exec_start; | ||
3128 | |||
3129 | return delta < (s64)sysctl_sched_migration_cost; | ||
3130 | } | ||
3131 | |||
3132 | /* | ||
2786 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? | 3133 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? |
2787 | */ | 3134 | */ |
2788 | static | 3135 | static |
@@ -3162,15 +3509,6 @@ struct sg_lb_stats { | |||
3162 | }; | 3509 | }; |
3163 | 3510 | ||
3164 | /** | 3511 | /** |
3165 | * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. | ||
3166 | * @group: The group whose first cpu is to be returned. | ||
3167 | */ | ||
3168 | static inline unsigned int group_first_cpu(struct sched_group *group) | ||
3169 | { | ||
3170 | return cpumask_first(sched_group_cpus(group)); | ||
3171 | } | ||
3172 | |||
3173 | /** | ||
3174 | * get_sd_load_idx - Obtain the load index for a given sched domain. | 3512 | * get_sd_load_idx - Obtain the load index for a given sched domain. |
3175 | * @sd: The sched_domain whose load_idx is to be obtained. | 3513 | * @sd: The sched_domain whose load_idx is to be obtained. |
3176 | * @idle: The Idle status of the CPU for whose sd load_icx is obtained. | 3514 | * @idle: The Idle status of the CPU for whose sd load_icx is obtained. |
@@ -3419,7 +3757,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
3419 | sdg->sgp->power = power; | 3757 | sdg->sgp->power = power; |
3420 | } | 3758 | } |
3421 | 3759 | ||
3422 | static void update_group_power(struct sched_domain *sd, int cpu) | 3760 | void update_group_power(struct sched_domain *sd, int cpu) |
3423 | { | 3761 | { |
3424 | struct sched_domain *child = sd->child; | 3762 | struct sched_domain *child = sd->child; |
3425 | struct sched_group *group, *sdg = sd->groups; | 3763 | struct sched_group *group, *sdg = sd->groups; |
@@ -3685,11 +4023,6 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
3685 | } while (sg != sd->groups); | 4023 | } while (sg != sd->groups); |
3686 | } | 4024 | } |
3687 | 4025 | ||
3688 | int __weak arch_sd_sibling_asym_packing(void) | ||
3689 | { | ||
3690 | return 0*SD_ASYM_PACKING; | ||
3691 | } | ||
3692 | |||
3693 | /** | 4026 | /** |
3694 | * check_asym_packing - Check to see if the group is packed into the | 4027 | * check_asym_packing - Check to see if the group is packed into the |
3695 | * sched doman. | 4028 | * sched doman. |
@@ -4053,7 +4386,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group, | |||
4053 | #define MAX_PINNED_INTERVAL 512 | 4386 | #define MAX_PINNED_INTERVAL 512 |
4054 | 4387 | ||
4055 | /* Working cpumask for load_balance and load_balance_newidle. */ | 4388 | /* Working cpumask for load_balance and load_balance_newidle. */ |
4056 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | 4389 | DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); |
4057 | 4390 | ||
4058 | static int need_active_balance(struct sched_domain *sd, int idle, | 4391 | static int need_active_balance(struct sched_domain *sd, int idle, |
4059 | int busiest_cpu, int this_cpu) | 4392 | int busiest_cpu, int this_cpu) |
@@ -4256,7 +4589,7 @@ out: | |||
4256 | * idle_balance is called by schedule() if this_cpu is about to become | 4589 | * idle_balance is called by schedule() if this_cpu is about to become |
4257 | * idle. Attempts to pull tasks from other CPUs. | 4590 | * idle. Attempts to pull tasks from other CPUs. |
4258 | */ | 4591 | */ |
4259 | static void idle_balance(int this_cpu, struct rq *this_rq) | 4592 | void idle_balance(int this_cpu, struct rq *this_rq) |
4260 | { | 4593 | { |
4261 | struct sched_domain *sd; | 4594 | struct sched_domain *sd; |
4262 | int pulled_task = 0; | 4595 | int pulled_task = 0; |
@@ -4631,7 +4964,7 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; | |||
4631 | * Scale the max load_balance interval with the number of CPUs in the system. | 4964 | * Scale the max load_balance interval with the number of CPUs in the system. |
4632 | * This trades load-balance latency on larger machines for less cross talk. | 4965 | * This trades load-balance latency on larger machines for less cross talk. |
4633 | */ | 4966 | */ |
4634 | static void update_max_interval(void) | 4967 | void update_max_interval(void) |
4635 | { | 4968 | { |
4636 | max_load_balance_interval = HZ*num_online_cpus()/10; | 4969 | max_load_balance_interval = HZ*num_online_cpus()/10; |
4637 | } | 4970 | } |
@@ -4833,7 +5166,7 @@ static inline int on_null_domain(int cpu) | |||
4833 | /* | 5166 | /* |
4834 | * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. | 5167 | * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. |
4835 | */ | 5168 | */ |
4836 | static inline void trigger_load_balance(struct rq *rq, int cpu) | 5169 | void trigger_load_balance(struct rq *rq, int cpu) |
4837 | { | 5170 | { |
4838 | /* Don't need to rebalance while attached to NULL domain */ | 5171 | /* Don't need to rebalance while attached to NULL domain */ |
4839 | if (time_after_eq(jiffies, rq->next_balance) && | 5172 | if (time_after_eq(jiffies, rq->next_balance) && |
@@ -4855,15 +5188,6 @@ static void rq_offline_fair(struct rq *rq) | |||
4855 | update_sysctl(); | 5188 | update_sysctl(); |
4856 | } | 5189 | } |
4857 | 5190 | ||
4858 | #else /* CONFIG_SMP */ | ||
4859 | |||
4860 | /* | ||
4861 | * on UP we do not need to balance between CPUs: | ||
4862 | */ | ||
4863 | static inline void idle_balance(int cpu, struct rq *rq) | ||
4864 | { | ||
4865 | } | ||
4866 | |||
4867 | #endif /* CONFIG_SMP */ | 5191 | #endif /* CONFIG_SMP */ |
4868 | 5192 | ||
4869 | /* | 5193 | /* |
@@ -5006,6 +5330,16 @@ static void set_curr_task_fair(struct rq *rq) | |||
5006 | } | 5330 | } |
5007 | } | 5331 | } |
5008 | 5332 | ||
5333 | void init_cfs_rq(struct cfs_rq *cfs_rq) | ||
5334 | { | ||
5335 | cfs_rq->tasks_timeline = RB_ROOT; | ||
5336 | INIT_LIST_HEAD(&cfs_rq->tasks); | ||
5337 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | ||
5338 | #ifndef CONFIG_64BIT | ||
5339 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
5340 | #endif | ||
5341 | } | ||
5342 | |||
5009 | #ifdef CONFIG_FAIR_GROUP_SCHED | 5343 | #ifdef CONFIG_FAIR_GROUP_SCHED |
5010 | static void task_move_group_fair(struct task_struct *p, int on_rq) | 5344 | static void task_move_group_fair(struct task_struct *p, int on_rq) |
5011 | { | 5345 | { |
@@ -5028,7 +5362,161 @@ static void task_move_group_fair(struct task_struct *p, int on_rq) | |||
5028 | if (!on_rq) | 5362 | if (!on_rq) |
5029 | p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; | 5363 | p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; |
5030 | } | 5364 | } |
5365 | |||
5366 | void free_fair_sched_group(struct task_group *tg) | ||
5367 | { | ||
5368 | int i; | ||
5369 | |||
5370 | destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
5371 | |||
5372 | for_each_possible_cpu(i) { | ||
5373 | if (tg->cfs_rq) | ||
5374 | kfree(tg->cfs_rq[i]); | ||
5375 | if (tg->se) | ||
5376 | kfree(tg->se[i]); | ||
5377 | } | ||
5378 | |||
5379 | kfree(tg->cfs_rq); | ||
5380 | kfree(tg->se); | ||
5381 | } | ||
5382 | |||
5383 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | ||
5384 | { | ||
5385 | struct cfs_rq *cfs_rq; | ||
5386 | struct sched_entity *se; | ||
5387 | int i; | ||
5388 | |||
5389 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); | ||
5390 | if (!tg->cfs_rq) | ||
5391 | goto err; | ||
5392 | tg->se = kzalloc(sizeof(se) * nr_cpu_ids, GFP_KERNEL); | ||
5393 | if (!tg->se) | ||
5394 | goto err; | ||
5395 | |||
5396 | tg->shares = NICE_0_LOAD; | ||
5397 | |||
5398 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); | ||
5399 | |||
5400 | for_each_possible_cpu(i) { | ||
5401 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | ||
5402 | GFP_KERNEL, cpu_to_node(i)); | ||
5403 | if (!cfs_rq) | ||
5404 | goto err; | ||
5405 | |||
5406 | se = kzalloc_node(sizeof(struct sched_entity), | ||
5407 | GFP_KERNEL, cpu_to_node(i)); | ||
5408 | if (!se) | ||
5409 | goto err_free_rq; | ||
5410 | |||
5411 | init_cfs_rq(cfs_rq); | ||
5412 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); | ||
5413 | } | ||
5414 | |||
5415 | return 1; | ||
5416 | |||
5417 | err_free_rq: | ||
5418 | kfree(cfs_rq); | ||
5419 | err: | ||
5420 | return 0; | ||
5421 | } | ||
5422 | |||
5423 | void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
5424 | { | ||
5425 | struct rq *rq = cpu_rq(cpu); | ||
5426 | unsigned long flags; | ||
5427 | |||
5428 | /* | ||
5429 | * Only empty task groups can be destroyed; so we can speculatively | ||
5430 | * check on_list without danger of it being re-added. | ||
5431 | */ | ||
5432 | if (!tg->cfs_rq[cpu]->on_list) | ||
5433 | return; | ||
5434 | |||
5435 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
5436 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | ||
5437 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
5438 | } | ||
5439 | |||
5440 | void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | ||
5441 | struct sched_entity *se, int cpu, | ||
5442 | struct sched_entity *parent) | ||
5443 | { | ||
5444 | struct rq *rq = cpu_rq(cpu); | ||
5445 | |||
5446 | cfs_rq->tg = tg; | ||
5447 | cfs_rq->rq = rq; | ||
5448 | #ifdef CONFIG_SMP | ||
5449 | /* allow initial update_cfs_load() to truncate */ | ||
5450 | cfs_rq->load_stamp = 1; | ||
5031 | #endif | 5451 | #endif |
5452 | init_cfs_rq_runtime(cfs_rq); | ||
5453 | |||
5454 | tg->cfs_rq[cpu] = cfs_rq; | ||
5455 | tg->se[cpu] = se; | ||
5456 | |||
5457 | /* se could be NULL for root_task_group */ | ||
5458 | if (!se) | ||
5459 | return; | ||
5460 | |||
5461 | if (!parent) | ||
5462 | se->cfs_rq = &rq->cfs; | ||
5463 | else | ||
5464 | se->cfs_rq = parent->my_q; | ||
5465 | |||
5466 | se->my_q = cfs_rq; | ||
5467 | update_load_set(&se->load, 0); | ||
5468 | se->parent = parent; | ||
5469 | } | ||
5470 | |||
5471 | static DEFINE_MUTEX(shares_mutex); | ||
5472 | |||
5473 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | ||
5474 | { | ||
5475 | int i; | ||
5476 | unsigned long flags; | ||
5477 | |||
5478 | /* | ||
5479 | * We can't change the weight of the root cgroup. | ||
5480 | */ | ||
5481 | if (!tg->se[0]) | ||
5482 | return -EINVAL; | ||
5483 | |||
5484 | shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); | ||
5485 | |||
5486 | mutex_lock(&shares_mutex); | ||
5487 | if (tg->shares == shares) | ||
5488 | goto done; | ||
5489 | |||
5490 | tg->shares = shares; | ||
5491 | for_each_possible_cpu(i) { | ||
5492 | struct rq *rq = cpu_rq(i); | ||
5493 | struct sched_entity *se; | ||
5494 | |||
5495 | se = tg->se[i]; | ||
5496 | /* Propagate contribution to hierarchy */ | ||
5497 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
5498 | for_each_sched_entity(se) | ||
5499 | update_cfs_shares(group_cfs_rq(se)); | ||
5500 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
5501 | } | ||
5502 | |||
5503 | done: | ||
5504 | mutex_unlock(&shares_mutex); | ||
5505 | return 0; | ||
5506 | } | ||
5507 | #else /* CONFIG_FAIR_GROUP_SCHED */ | ||
5508 | |||
5509 | void free_fair_sched_group(struct task_group *tg) { } | ||
5510 | |||
5511 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | ||
5512 | { | ||
5513 | return 1; | ||
5514 | } | ||
5515 | |||
5516 | void unregister_fair_sched_group(struct task_group *tg, int cpu) { } | ||
5517 | |||
5518 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
5519 | |||
5032 | 5520 | ||
5033 | static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) | 5521 | static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task) |
5034 | { | 5522 | { |
@@ -5048,7 +5536,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task | |||
5048 | /* | 5536 | /* |
5049 | * All the scheduling class methods: | 5537 | * All the scheduling class methods: |
5050 | */ | 5538 | */ |
5051 | static const struct sched_class fair_sched_class = { | 5539 | const struct sched_class fair_sched_class = { |
5052 | .next = &idle_sched_class, | 5540 | .next = &idle_sched_class, |
5053 | .enqueue_task = enqueue_task_fair, | 5541 | .enqueue_task = enqueue_task_fair, |
5054 | .dequeue_task = dequeue_task_fair, | 5542 | .dequeue_task = dequeue_task_fair, |
@@ -5085,7 +5573,7 @@ static const struct sched_class fair_sched_class = { | |||
5085 | }; | 5573 | }; |
5086 | 5574 | ||
5087 | #ifdef CONFIG_SCHED_DEBUG | 5575 | #ifdef CONFIG_SCHED_DEBUG |
5088 | static void print_cfs_stats(struct seq_file *m, int cpu) | 5576 | void print_cfs_stats(struct seq_file *m, int cpu) |
5089 | { | 5577 | { |
5090 | struct cfs_rq *cfs_rq; | 5578 | struct cfs_rq *cfs_rq; |
5091 | 5579 | ||
@@ -5095,3 +5583,19 @@ static void print_cfs_stats(struct seq_file *m, int cpu) | |||
5095 | rcu_read_unlock(); | 5583 | rcu_read_unlock(); |
5096 | } | 5584 | } |
5097 | #endif | 5585 | #endif |
5586 | |||
5587 | __init void init_sched_fair_class(void) | ||
5588 | { | ||
5589 | #ifdef CONFIG_SMP | ||
5590 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); | ||
5591 | |||
5592 | #ifdef CONFIG_NO_HZ | ||
5593 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); | ||
5594 | alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); | ||
5595 | atomic_set(&nohz.load_balancer, nr_cpu_ids); | ||
5596 | atomic_set(&nohz.first_pick_cpu, nr_cpu_ids); | ||
5597 | atomic_set(&nohz.second_pick_cpu, nr_cpu_ids); | ||
5598 | #endif | ||
5599 | #endif /* SMP */ | ||
5600 | |||
5601 | } | ||
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 0a51882534ea..91b4c957f289 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #include "sched.h" | ||
2 | |||
1 | /* | 3 | /* |
2 | * idle-task scheduling class. | 4 | * idle-task scheduling class. |
3 | * | 5 | * |
@@ -71,7 +73,7 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task | |||
71 | /* | 73 | /* |
72 | * Simple, special scheduling class for the per-CPU idle tasks: | 74 | * Simple, special scheduling class for the per-CPU idle tasks: |
73 | */ | 75 | */ |
74 | static const struct sched_class idle_sched_class = { | 76 | const struct sched_class idle_sched_class = { |
75 | /* .next is NULL */ | 77 | /* .next is NULL */ |
76 | /* no enqueue/yield_task for idle tasks */ | 78 | /* no enqueue/yield_task for idle tasks */ |
77 | 79 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d95e861122cf..023b35502509 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -3,7 +3,92 @@ | |||
3 | * policies) | 3 | * policies) |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include "sched.h" | ||
7 | |||
8 | #include <linux/slab.h> | ||
9 | |||
10 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); | ||
11 | |||
12 | struct rt_bandwidth def_rt_bandwidth; | ||
13 | |||
14 | static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) | ||
15 | { | ||
16 | struct rt_bandwidth *rt_b = | ||
17 | container_of(timer, struct rt_bandwidth, rt_period_timer); | ||
18 | ktime_t now; | ||
19 | int overrun; | ||
20 | int idle = 0; | ||
21 | |||
22 | for (;;) { | ||
23 | now = hrtimer_cb_get_time(timer); | ||
24 | overrun = hrtimer_forward(timer, now, rt_b->rt_period); | ||
25 | |||
26 | if (!overrun) | ||
27 | break; | ||
28 | |||
29 | idle = do_sched_rt_period_timer(rt_b, overrun); | ||
30 | } | ||
31 | |||
32 | return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; | ||
33 | } | ||
34 | |||
35 | void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | ||
36 | { | ||
37 | rt_b->rt_period = ns_to_ktime(period); | ||
38 | rt_b->rt_runtime = runtime; | ||
39 | |||
40 | raw_spin_lock_init(&rt_b->rt_runtime_lock); | ||
41 | |||
42 | hrtimer_init(&rt_b->rt_period_timer, | ||
43 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
44 | rt_b->rt_period_timer.function = sched_rt_period_timer; | ||
45 | } | ||
46 | |||
47 | static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
48 | { | ||
49 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | ||
50 | return; | ||
51 | |||
52 | if (hrtimer_active(&rt_b->rt_period_timer)) | ||
53 | return; | ||
54 | |||
55 | raw_spin_lock(&rt_b->rt_runtime_lock); | ||
56 | start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period); | ||
57 | raw_spin_unlock(&rt_b->rt_runtime_lock); | ||
58 | } | ||
59 | |||
60 | void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | ||
61 | { | ||
62 | struct rt_prio_array *array; | ||
63 | int i; | ||
64 | |||
65 | array = &rt_rq->active; | ||
66 | for (i = 0; i < MAX_RT_PRIO; i++) { | ||
67 | INIT_LIST_HEAD(array->queue + i); | ||
68 | __clear_bit(i, array->bitmap); | ||
69 | } | ||
70 | /* delimiter for bitsearch: */ | ||
71 | __set_bit(MAX_RT_PRIO, array->bitmap); | ||
72 | |||
73 | #if defined CONFIG_SMP | ||
74 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | ||
75 | rt_rq->highest_prio.next = MAX_RT_PRIO; | ||
76 | rt_rq->rt_nr_migratory = 0; | ||
77 | rt_rq->overloaded = 0; | ||
78 | plist_head_init(&rt_rq->pushable_tasks); | ||
79 | #endif | ||
80 | |||
81 | rt_rq->rt_time = 0; | ||
82 | rt_rq->rt_throttled = 0; | ||
83 | rt_rq->rt_runtime = 0; | ||
84 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); | ||
85 | } | ||
86 | |||
6 | #ifdef CONFIG_RT_GROUP_SCHED | 87 | #ifdef CONFIG_RT_GROUP_SCHED |
88 | static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) | ||
89 | { | ||
90 | hrtimer_cancel(&rt_b->rt_period_timer); | ||
91 | } | ||
7 | 92 | ||
8 | #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) | 93 | #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) |
9 | 94 | ||
@@ -25,6 +110,91 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | |||
25 | return rt_se->rt_rq; | 110 | return rt_se->rt_rq; |
26 | } | 111 | } |
27 | 112 | ||
113 | void free_rt_sched_group(struct task_group *tg) | ||
114 | { | ||
115 | int i; | ||
116 | |||
117 | if (tg->rt_se) | ||
118 | destroy_rt_bandwidth(&tg->rt_bandwidth); | ||
119 | |||
120 | for_each_possible_cpu(i) { | ||
121 | if (tg->rt_rq) | ||
122 | kfree(tg->rt_rq[i]); | ||
123 | if (tg->rt_se) | ||
124 | kfree(tg->rt_se[i]); | ||
125 | } | ||
126 | |||
127 | kfree(tg->rt_rq); | ||
128 | kfree(tg->rt_se); | ||
129 | } | ||
130 | |||
131 | void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | ||
132 | struct sched_rt_entity *rt_se, int cpu, | ||
133 | struct sched_rt_entity *parent) | ||
134 | { | ||
135 | struct rq *rq = cpu_rq(cpu); | ||
136 | |||
137 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | ||
138 | rt_rq->rt_nr_boosted = 0; | ||
139 | rt_rq->rq = rq; | ||
140 | rt_rq->tg = tg; | ||
141 | |||
142 | tg->rt_rq[cpu] = rt_rq; | ||
143 | tg->rt_se[cpu] = rt_se; | ||
144 | |||
145 | if (!rt_se) | ||
146 | return; | ||
147 | |||
148 | if (!parent) | ||
149 | rt_se->rt_rq = &rq->rt; | ||
150 | else | ||
151 | rt_se->rt_rq = parent->my_q; | ||
152 | |||
153 | rt_se->my_q = rt_rq; | ||
154 | rt_se->parent = parent; | ||
155 | INIT_LIST_HEAD(&rt_se->run_list); | ||
156 | } | ||
157 | |||
158 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | ||
159 | { | ||
160 | struct rt_rq *rt_rq; | ||
161 | struct sched_rt_entity *rt_se; | ||
162 | int i; | ||
163 | |||
164 | tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); | ||
165 | if (!tg->rt_rq) | ||
166 | goto err; | ||
167 | tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL); | ||
168 | if (!tg->rt_se) | ||
169 | goto err; | ||
170 | |||
171 | init_rt_bandwidth(&tg->rt_bandwidth, | ||
172 | ktime_to_ns(def_rt_bandwidth.rt_period), 0); | ||
173 | |||
174 | for_each_possible_cpu(i) { | ||
175 | rt_rq = kzalloc_node(sizeof(struct rt_rq), | ||
176 | GFP_KERNEL, cpu_to_node(i)); | ||
177 | if (!rt_rq) | ||
178 | goto err; | ||
179 | |||
180 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | ||
181 | GFP_KERNEL, cpu_to_node(i)); | ||
182 | if (!rt_se) | ||
183 | goto err_free_rq; | ||
184 | |||
185 | init_rt_rq(rt_rq, cpu_rq(i)); | ||
186 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
187 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); | ||
188 | } | ||
189 | |||
190 | return 1; | ||
191 | |||
192 | err_free_rq: | ||
193 | kfree(rt_rq); | ||
194 | err: | ||
195 | return 0; | ||
196 | } | ||
197 | |||
28 | #else /* CONFIG_RT_GROUP_SCHED */ | 198 | #else /* CONFIG_RT_GROUP_SCHED */ |
29 | 199 | ||
30 | #define rt_entity_is_task(rt_se) (1) | 200 | #define rt_entity_is_task(rt_se) (1) |
@@ -47,6 +217,12 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) | |||
47 | return &rq->rt; | 217 | return &rq->rt; |
48 | } | 218 | } |
49 | 219 | ||
220 | void free_rt_sched_group(struct task_group *tg) { } | ||
221 | |||
222 | int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | ||
223 | { | ||
224 | return 1; | ||
225 | } | ||
50 | #endif /* CONFIG_RT_GROUP_SCHED */ | 226 | #endif /* CONFIG_RT_GROUP_SCHED */ |
51 | 227 | ||
52 | #ifdef CONFIG_SMP | 228 | #ifdef CONFIG_SMP |
@@ -556,6 +732,28 @@ static void enable_runtime(struct rq *rq) | |||
556 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 732 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
557 | } | 733 | } |
558 | 734 | ||
735 | int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
736 | { | ||
737 | int cpu = (int)(long)hcpu; | ||
738 | |||
739 | switch (action) { | ||
740 | case CPU_DOWN_PREPARE: | ||
741 | case CPU_DOWN_PREPARE_FROZEN: | ||
742 | disable_runtime(cpu_rq(cpu)); | ||
743 | return NOTIFY_OK; | ||
744 | |||
745 | case CPU_DOWN_FAILED: | ||
746 | case CPU_DOWN_FAILED_FROZEN: | ||
747 | case CPU_ONLINE: | ||
748 | case CPU_ONLINE_FROZEN: | ||
749 | enable_runtime(cpu_rq(cpu)); | ||
750 | return NOTIFY_OK; | ||
751 | |||
752 | default: | ||
753 | return NOTIFY_DONE; | ||
754 | } | ||
755 | } | ||
756 | |||
559 | static int balance_runtime(struct rt_rq *rt_rq) | 757 | static int balance_runtime(struct rt_rq *rt_rq) |
560 | { | 758 | { |
561 | int more = 0; | 759 | int more = 0; |
@@ -1178,8 +1376,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
1178 | /* Only try algorithms three times */ | 1376 | /* Only try algorithms three times */ |
1179 | #define RT_MAX_TRIES 3 | 1377 | #define RT_MAX_TRIES 3 |
1180 | 1378 | ||
1181 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); | ||
1182 | |||
1183 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | 1379 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
1184 | { | 1380 | { |
1185 | if (!task_running(rq, p) && | 1381 | if (!task_running(rq, p) && |
@@ -1653,13 +1849,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
1653 | pull_rt_task(rq); | 1849 | pull_rt_task(rq); |
1654 | } | 1850 | } |
1655 | 1851 | ||
1656 | static inline void init_sched_rt_class(void) | 1852 | void init_sched_rt_class(void) |
1657 | { | 1853 | { |
1658 | unsigned int i; | 1854 | unsigned int i; |
1659 | 1855 | ||
1660 | for_each_possible_cpu(i) | 1856 | for_each_possible_cpu(i) { |
1661 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), | 1857 | zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), |
1662 | GFP_KERNEL, cpu_to_node(i)); | 1858 | GFP_KERNEL, cpu_to_node(i)); |
1859 | } | ||
1663 | } | 1860 | } |
1664 | #endif /* CONFIG_SMP */ | 1861 | #endif /* CONFIG_SMP */ |
1665 | 1862 | ||
@@ -1800,7 +1997,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) | |||
1800 | return 0; | 1997 | return 0; |
1801 | } | 1998 | } |
1802 | 1999 | ||
1803 | static const struct sched_class rt_sched_class = { | 2000 | const struct sched_class rt_sched_class = { |
1804 | .next = &fair_sched_class, | 2001 | .next = &fair_sched_class, |
1805 | .enqueue_task = enqueue_task_rt, | 2002 | .enqueue_task = enqueue_task_rt, |
1806 | .dequeue_task = dequeue_task_rt, | 2003 | .dequeue_task = dequeue_task_rt, |
@@ -1835,7 +2032,7 @@ static const struct sched_class rt_sched_class = { | |||
1835 | #ifdef CONFIG_SCHED_DEBUG | 2032 | #ifdef CONFIG_SCHED_DEBUG |
1836 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | 2033 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); |
1837 | 2034 | ||
1838 | static void print_rt_stats(struct seq_file *m, int cpu) | 2035 | void print_rt_stats(struct seq_file *m, int cpu) |
1839 | { | 2036 | { |
1840 | rt_rq_iter_t iter; | 2037 | rt_rq_iter_t iter; |
1841 | struct rt_rq *rt_rq; | 2038 | struct rt_rq *rt_rq; |
diff --git a/kernel/sched_stats.c b/kernel/sched_stats.c new file mode 100644 index 000000000000..2a581ba8e190 --- /dev/null +++ b/kernel/sched_stats.c | |||
@@ -0,0 +1,111 @@ | |||
1 | |||
2 | #include <linux/slab.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/seq_file.h> | ||
5 | #include <linux/proc_fs.h> | ||
6 | |||
7 | #include "sched.h" | ||
8 | |||
9 | /* | ||
10 | * bump this up when changing the output format or the meaning of an existing | ||
11 | * format, so that tools can adapt (or abort) | ||
12 | */ | ||
13 | #define SCHEDSTAT_VERSION 15 | ||
14 | |||
15 | static int show_schedstat(struct seq_file *seq, void *v) | ||
16 | { | ||
17 | int cpu; | ||
18 | int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; | ||
19 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); | ||
20 | |||
21 | if (mask_str == NULL) | ||
22 | return -ENOMEM; | ||
23 | |||
24 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | ||
25 | seq_printf(seq, "timestamp %lu\n", jiffies); | ||
26 | for_each_online_cpu(cpu) { | ||
27 | struct rq *rq = cpu_rq(cpu); | ||
28 | #ifdef CONFIG_SMP | ||
29 | struct sched_domain *sd; | ||
30 | int dcount = 0; | ||
31 | #endif | ||
32 | |||
33 | /* runqueue-specific stats */ | ||
34 | seq_printf(seq, | ||
35 | "cpu%d %u %u %u %u %u %u %llu %llu %lu", | ||
36 | cpu, rq->yld_count, | ||
37 | rq->sched_switch, rq->sched_count, rq->sched_goidle, | ||
38 | rq->ttwu_count, rq->ttwu_local, | ||
39 | rq->rq_cpu_time, | ||
40 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); | ||
41 | |||
42 | seq_printf(seq, "\n"); | ||
43 | |||
44 | #ifdef CONFIG_SMP | ||
45 | /* domain-specific stats */ | ||
46 | rcu_read_lock(); | ||
47 | for_each_domain(cpu, sd) { | ||
48 | enum cpu_idle_type itype; | ||
49 | |||
50 | cpumask_scnprintf(mask_str, mask_len, | ||
51 | sched_domain_span(sd)); | ||
52 | seq_printf(seq, "domain%d %s", dcount++, mask_str); | ||
53 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; | ||
54 | itype++) { | ||
55 | seq_printf(seq, " %u %u %u %u %u %u %u %u", | ||
56 | sd->lb_count[itype], | ||
57 | sd->lb_balanced[itype], | ||
58 | sd->lb_failed[itype], | ||
59 | sd->lb_imbalance[itype], | ||
60 | sd->lb_gained[itype], | ||
61 | sd->lb_hot_gained[itype], | ||
62 | sd->lb_nobusyq[itype], | ||
63 | sd->lb_nobusyg[itype]); | ||
64 | } | ||
65 | seq_printf(seq, | ||
66 | " %u %u %u %u %u %u %u %u %u %u %u %u\n", | ||
67 | sd->alb_count, sd->alb_failed, sd->alb_pushed, | ||
68 | sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed, | ||
69 | sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed, | ||
70 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | ||
71 | sd->ttwu_move_balance); | ||
72 | } | ||
73 | rcu_read_unlock(); | ||
74 | #endif | ||
75 | } | ||
76 | kfree(mask_str); | ||
77 | return 0; | ||
78 | } | ||
79 | |||
80 | static int schedstat_open(struct inode *inode, struct file *file) | ||
81 | { | ||
82 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | ||
83 | char *buf = kmalloc(size, GFP_KERNEL); | ||
84 | struct seq_file *m; | ||
85 | int res; | ||
86 | |||
87 | if (!buf) | ||
88 | return -ENOMEM; | ||
89 | res = single_open(file, show_schedstat, NULL); | ||
90 | if (!res) { | ||
91 | m = file->private_data; | ||
92 | m->buf = buf; | ||
93 | m->size = size; | ||
94 | } else | ||
95 | kfree(buf); | ||
96 | return res; | ||
97 | } | ||
98 | |||
99 | static const struct file_operations proc_schedstat_operations = { | ||
100 | .open = schedstat_open, | ||
101 | .read = seq_read, | ||
102 | .llseek = seq_lseek, | ||
103 | .release = single_release, | ||
104 | }; | ||
105 | |||
106 | static int __init proc_schedstat_init(void) | ||
107 | { | ||
108 | proc_create("schedstat", 0, NULL, &proc_schedstat_operations); | ||
109 | return 0; | ||
110 | } | ||
111 | module_init(proc_schedstat_init); | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 87f9e36ea56e..ea2b6f0ec868 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -1,108 +1,5 @@ | |||
1 | 1 | ||
2 | #ifdef CONFIG_SCHEDSTATS | 2 | #ifdef CONFIG_SCHEDSTATS |
3 | /* | ||
4 | * bump this up when changing the output format or the meaning of an existing | ||
5 | * format, so that tools can adapt (or abort) | ||
6 | */ | ||
7 | #define SCHEDSTAT_VERSION 15 | ||
8 | |||
9 | static int show_schedstat(struct seq_file *seq, void *v) | ||
10 | { | ||
11 | int cpu; | ||
12 | int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; | ||
13 | char *mask_str = kmalloc(mask_len, GFP_KERNEL); | ||
14 | |||
15 | if (mask_str == NULL) | ||
16 | return -ENOMEM; | ||
17 | |||
18 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | ||
19 | seq_printf(seq, "timestamp %lu\n", jiffies); | ||
20 | for_each_online_cpu(cpu) { | ||
21 | struct rq *rq = cpu_rq(cpu); | ||
22 | #ifdef CONFIG_SMP | ||
23 | struct sched_domain *sd; | ||
24 | int dcount = 0; | ||
25 | #endif | ||
26 | |||
27 | /* runqueue-specific stats */ | ||
28 | seq_printf(seq, | ||
29 | "cpu%d %u %u %u %u %u %u %llu %llu %lu", | ||
30 | cpu, rq->yld_count, | ||
31 | rq->sched_switch, rq->sched_count, rq->sched_goidle, | ||
32 | rq->ttwu_count, rq->ttwu_local, | ||
33 | rq->rq_cpu_time, | ||
34 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); | ||
35 | |||
36 | seq_printf(seq, "\n"); | ||
37 | |||
38 | #ifdef CONFIG_SMP | ||
39 | /* domain-specific stats */ | ||
40 | rcu_read_lock(); | ||
41 | for_each_domain(cpu, sd) { | ||
42 | enum cpu_idle_type itype; | ||
43 | |||
44 | cpumask_scnprintf(mask_str, mask_len, | ||
45 | sched_domain_span(sd)); | ||
46 | seq_printf(seq, "domain%d %s", dcount++, mask_str); | ||
47 | for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; | ||
48 | itype++) { | ||
49 | seq_printf(seq, " %u %u %u %u %u %u %u %u", | ||
50 | sd->lb_count[itype], | ||
51 | sd->lb_balanced[itype], | ||
52 | sd->lb_failed[itype], | ||
53 | sd->lb_imbalance[itype], | ||
54 | sd->lb_gained[itype], | ||
55 | sd->lb_hot_gained[itype], | ||
56 | sd->lb_nobusyq[itype], | ||
57 | sd->lb_nobusyg[itype]); | ||
58 | } | ||
59 | seq_printf(seq, | ||
60 | " %u %u %u %u %u %u %u %u %u %u %u %u\n", | ||
61 | sd->alb_count, sd->alb_failed, sd->alb_pushed, | ||
62 | sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed, | ||
63 | sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed, | ||
64 | sd->ttwu_wake_remote, sd->ttwu_move_affine, | ||
65 | sd->ttwu_move_balance); | ||
66 | } | ||
67 | rcu_read_unlock(); | ||
68 | #endif | ||
69 | } | ||
70 | kfree(mask_str); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | static int schedstat_open(struct inode *inode, struct file *file) | ||
75 | { | ||
76 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | ||
77 | char *buf = kmalloc(size, GFP_KERNEL); | ||
78 | struct seq_file *m; | ||
79 | int res; | ||
80 | |||
81 | if (!buf) | ||
82 | return -ENOMEM; | ||
83 | res = single_open(file, show_schedstat, NULL); | ||
84 | if (!res) { | ||
85 | m = file->private_data; | ||
86 | m->buf = buf; | ||
87 | m->size = size; | ||
88 | } else | ||
89 | kfree(buf); | ||
90 | return res; | ||
91 | } | ||
92 | |||
93 | static const struct file_operations proc_schedstat_operations = { | ||
94 | .open = schedstat_open, | ||
95 | .read = seq_read, | ||
96 | .llseek = seq_lseek, | ||
97 | .release = single_release, | ||
98 | }; | ||
99 | |||
100 | static int __init proc_schedstat_init(void) | ||
101 | { | ||
102 | proc_create("schedstat", 0, NULL, &proc_schedstat_operations); | ||
103 | return 0; | ||
104 | } | ||
105 | module_init(proc_schedstat_init); | ||
106 | 3 | ||
107 | /* | 4 | /* |
108 | * Expects runqueue lock to be held for atomicity of update | 5 | * Expects runqueue lock to be held for atomicity of update |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 8b44e7fa7fb3..7b386e86fd23 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #include "sched.h" | ||
2 | |||
1 | /* | 3 | /* |
2 | * stop-task scheduling class. | 4 | * stop-task scheduling class. |
3 | * | 5 | * |
@@ -80,7 +82,7 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task) | |||
80 | /* | 82 | /* |
81 | * Simple, special scheduling class for the per-CPU stop tasks: | 83 | * Simple, special scheduling class for the per-CPU stop tasks: |
82 | */ | 84 | */ |
83 | static const struct sched_class stop_sched_class = { | 85 | const struct sched_class stop_sched_class = { |
84 | .next = &rt_sched_class, | 86 | .next = &rt_sched_class, |
85 | 87 | ||
86 | .enqueue_task = enqueue_task_stop, | 88 | .enqueue_task = enqueue_task_stop, |