diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/marker.c | 930 | ||||
-rw-r--r-- | kernel/module.c | 18 | ||||
-rw-r--r-- | kernel/sched.c | 444 | ||||
-rw-r--r-- | kernel/sched_debug.c | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 414 | ||||
-rw-r--r-- | kernel/sched_features.h | 122 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 4 | ||||
-rw-r--r-- | kernel/sched_rt.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_printk.c | 1 |
10 files changed, 526 insertions, 1416 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f9..7c9b0a585502 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o | |||
87 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o | 87 | obj-$(CONFIG_SYSCTL) += utsname_sysctl.o |
88 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o | 88 | obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o |
89 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o | 89 | obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o |
90 | obj-$(CONFIG_MARKERS) += marker.o | ||
91 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o | 90 | obj-$(CONFIG_TRACEPOINTS) += tracepoint.o |
92 | obj-$(CONFIG_LATENCYTOP) += latencytop.o | 91 | obj-$(CONFIG_LATENCYTOP) += latencytop.o |
93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ | 92 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ |
diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f2647868..000000000000 --- a/kernel/marker.c +++ /dev/null | |||
@@ -1,930 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007 Mathieu Desnoyers | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | */ | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mutex.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/jhash.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/rcupdate.h> | ||
24 | #include <linux/marker.h> | ||
25 | #include <linux/err.h> | ||
26 | #include <linux/slab.h> | ||
27 | |||
28 | extern struct marker __start___markers[]; | ||
29 | extern struct marker __stop___markers[]; | ||
30 | |||
31 | /* Set to 1 to enable marker debug output */ | ||
32 | static const int marker_debug; | ||
33 | |||
34 | /* | ||
35 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin | ||
36 | * and module markers and the hash table. | ||
37 | */ | ||
38 | static DEFINE_MUTEX(markers_mutex); | ||
39 | |||
40 | /* | ||
41 | * Marker hash table, containing the active markers. | ||
42 | * Protected by module_mutex. | ||
43 | */ | ||
44 | #define MARKER_HASH_BITS 6 | ||
45 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) | ||
46 | static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | ||
47 | |||
48 | /* | ||
49 | * Note about RCU : | ||
50 | * It is used to make sure every handler has finished using its private data | ||
51 | * between two consecutive operation (add or remove) on a given marker. It is | ||
52 | * also used to delay the free of multiple probes array until a quiescent state | ||
53 | * is reached. | ||
54 | * marker entries modifications are protected by the markers_mutex. | ||
55 | */ | ||
56 | struct marker_entry { | ||
57 | struct hlist_node hlist; | ||
58 | char *format; | ||
59 | /* Probe wrapper */ | ||
60 | void (*call)(const struct marker *mdata, void *call_private, ...); | ||
61 | struct marker_probe_closure single; | ||
62 | struct marker_probe_closure *multi; | ||
63 | int refcount; /* Number of times armed. 0 if disarmed. */ | ||
64 | struct rcu_head rcu; | ||
65 | void *oldptr; | ||
66 | int rcu_pending; | ||
67 | unsigned char ptype:1; | ||
68 | unsigned char format_allocated:1; | ||
69 | char name[0]; /* Contains name'\0'format'\0' */ | ||
70 | }; | ||
71 | |||
72 | /** | ||
73 | * __mark_empty_function - Empty probe callback | ||
74 | * @probe_private: probe private data | ||
75 | * @call_private: call site private data | ||
76 | * @fmt: format string | ||
77 | * @...: variable argument list | ||
78 | * | ||
79 | * Empty callback provided as a probe to the markers. By providing this to a | ||
80 | * disabled marker, we make sure the execution flow is always valid even | ||
81 | * though the function pointer change and the marker enabling are two distinct | ||
82 | * operations that modifies the execution flow of preemptible code. | ||
83 | */ | ||
84 | notrace void __mark_empty_function(void *probe_private, void *call_private, | ||
85 | const char *fmt, va_list *args) | ||
86 | { | ||
87 | } | ||
88 | EXPORT_SYMBOL_GPL(__mark_empty_function); | ||
89 | |||
90 | /* | ||
91 | * marker_probe_cb Callback that prepares the variable argument list for probes. | ||
92 | * @mdata: pointer of type struct marker | ||
93 | * @call_private: caller site private data | ||
94 | * @...: Variable argument list. | ||
95 | * | ||
96 | * Since we do not use "typical" pointer based RCU in the 1 argument case, we | ||
97 | * need to put a full smp_rmb() in this branch. This is why we do not use | ||
98 | * rcu_dereference() for the pointer read. | ||
99 | */ | ||
100 | notrace void marker_probe_cb(const struct marker *mdata, | ||
101 | void *call_private, ...) | ||
102 | { | ||
103 | va_list args; | ||
104 | char ptype; | ||
105 | |||
106 | /* | ||
107 | * rcu_read_lock_sched does two things : disabling preemption to make | ||
108 | * sure the teardown of the callbacks can be done correctly when they | ||
109 | * are in modules and they insure RCU read coherency. | ||
110 | */ | ||
111 | rcu_read_lock_sched_notrace(); | ||
112 | ptype = mdata->ptype; | ||
113 | if (likely(!ptype)) { | ||
114 | marker_probe_func *func; | ||
115 | /* Must read the ptype before ptr. They are not data dependant, | ||
116 | * so we put an explicit smp_rmb() here. */ | ||
117 | smp_rmb(); | ||
118 | func = mdata->single.func; | ||
119 | /* Must read the ptr before private data. They are not data | ||
120 | * dependant, so we put an explicit smp_rmb() here. */ | ||
121 | smp_rmb(); | ||
122 | va_start(args, call_private); | ||
123 | func(mdata->single.probe_private, call_private, mdata->format, | ||
124 | &args); | ||
125 | va_end(args); | ||
126 | } else { | ||
127 | struct marker_probe_closure *multi; | ||
128 | int i; | ||
129 | /* | ||
130 | * Read mdata->ptype before mdata->multi. | ||
131 | */ | ||
132 | smp_rmb(); | ||
133 | multi = mdata->multi; | ||
134 | /* | ||
135 | * multi points to an array, therefore accessing the array | ||
136 | * depends on reading multi. However, even in this case, | ||
137 | * we must insure that the pointer is read _before_ the array | ||
138 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
139 | * in the fast path, so put the explicit barrier here. | ||
140 | */ | ||
141 | smp_read_barrier_depends(); | ||
142 | for (i = 0; multi[i].func; i++) { | ||
143 | va_start(args, call_private); | ||
144 | multi[i].func(multi[i].probe_private, call_private, | ||
145 | mdata->format, &args); | ||
146 | va_end(args); | ||
147 | } | ||
148 | } | ||
149 | rcu_read_unlock_sched_notrace(); | ||
150 | } | ||
151 | EXPORT_SYMBOL_GPL(marker_probe_cb); | ||
152 | |||
153 | /* | ||
154 | * marker_probe_cb Callback that does not prepare the variable argument list. | ||
155 | * @mdata: pointer of type struct marker | ||
156 | * @call_private: caller site private data | ||
157 | * @...: Variable argument list. | ||
158 | * | ||
159 | * Should be connected to markers "MARK_NOARGS". | ||
160 | */ | ||
161 | static notrace void marker_probe_cb_noarg(const struct marker *mdata, | ||
162 | void *call_private, ...) | ||
163 | { | ||
164 | va_list args; /* not initialized */ | ||
165 | char ptype; | ||
166 | |||
167 | rcu_read_lock_sched_notrace(); | ||
168 | ptype = mdata->ptype; | ||
169 | if (likely(!ptype)) { | ||
170 | marker_probe_func *func; | ||
171 | /* Must read the ptype before ptr. They are not data dependant, | ||
172 | * so we put an explicit smp_rmb() here. */ | ||
173 | smp_rmb(); | ||
174 | func = mdata->single.func; | ||
175 | /* Must read the ptr before private data. They are not data | ||
176 | * dependant, so we put an explicit smp_rmb() here. */ | ||
177 | smp_rmb(); | ||
178 | func(mdata->single.probe_private, call_private, mdata->format, | ||
179 | &args); | ||
180 | } else { | ||
181 | struct marker_probe_closure *multi; | ||
182 | int i; | ||
183 | /* | ||
184 | * Read mdata->ptype before mdata->multi. | ||
185 | */ | ||
186 | smp_rmb(); | ||
187 | multi = mdata->multi; | ||
188 | /* | ||
189 | * multi points to an array, therefore accessing the array | ||
190 | * depends on reading multi. However, even in this case, | ||
191 | * we must insure that the pointer is read _before_ the array | ||
192 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
193 | * in the fast path, so put the explicit barrier here. | ||
194 | */ | ||
195 | smp_read_barrier_depends(); | ||
196 | for (i = 0; multi[i].func; i++) | ||
197 | multi[i].func(multi[i].probe_private, call_private, | ||
198 | mdata->format, &args); | ||
199 | } | ||
200 | rcu_read_unlock_sched_notrace(); | ||
201 | } | ||
202 | |||
203 | static void free_old_closure(struct rcu_head *head) | ||
204 | { | ||
205 | struct marker_entry *entry = container_of(head, | ||
206 | struct marker_entry, rcu); | ||
207 | kfree(entry->oldptr); | ||
208 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
209 | smp_wmb(); | ||
210 | entry->rcu_pending = 0; | ||
211 | } | ||
212 | |||
213 | static void debug_print_probes(struct marker_entry *entry) | ||
214 | { | ||
215 | int i; | ||
216 | |||
217 | if (!marker_debug) | ||
218 | return; | ||
219 | |||
220 | if (!entry->ptype) { | ||
221 | printk(KERN_DEBUG "Single probe : %p %p\n", | ||
222 | entry->single.func, | ||
223 | entry->single.probe_private); | ||
224 | } else { | ||
225 | for (i = 0; entry->multi[i].func; i++) | ||
226 | printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, | ||
227 | entry->multi[i].func, | ||
228 | entry->multi[i].probe_private); | ||
229 | } | ||
230 | } | ||
231 | |||
232 | static struct marker_probe_closure * | ||
233 | marker_entry_add_probe(struct marker_entry *entry, | ||
234 | marker_probe_func *probe, void *probe_private) | ||
235 | { | ||
236 | int nr_probes = 0; | ||
237 | struct marker_probe_closure *old, *new; | ||
238 | |||
239 | WARN_ON(!probe); | ||
240 | |||
241 | debug_print_probes(entry); | ||
242 | old = entry->multi; | ||
243 | if (!entry->ptype) { | ||
244 | if (entry->single.func == probe && | ||
245 | entry->single.probe_private == probe_private) | ||
246 | return ERR_PTR(-EBUSY); | ||
247 | if (entry->single.func == __mark_empty_function) { | ||
248 | /* 0 -> 1 probes */ | ||
249 | entry->single.func = probe; | ||
250 | entry->single.probe_private = probe_private; | ||
251 | entry->refcount = 1; | ||
252 | entry->ptype = 0; | ||
253 | debug_print_probes(entry); | ||
254 | return NULL; | ||
255 | } else { | ||
256 | /* 1 -> 2 probes */ | ||
257 | nr_probes = 1; | ||
258 | old = NULL; | ||
259 | } | ||
260 | } else { | ||
261 | /* (N -> N+1), (N != 0, 1) probes */ | ||
262 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) | ||
263 | if (old[nr_probes].func == probe | ||
264 | && old[nr_probes].probe_private | ||
265 | == probe_private) | ||
266 | return ERR_PTR(-EBUSY); | ||
267 | } | ||
268 | /* + 2 : one for new probe, one for NULL func */ | ||
269 | new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), | ||
270 | GFP_KERNEL); | ||
271 | if (new == NULL) | ||
272 | return ERR_PTR(-ENOMEM); | ||
273 | if (!old) | ||
274 | new[0] = entry->single; | ||
275 | else | ||
276 | memcpy(new, old, | ||
277 | nr_probes * sizeof(struct marker_probe_closure)); | ||
278 | new[nr_probes].func = probe; | ||
279 | new[nr_probes].probe_private = probe_private; | ||
280 | entry->refcount = nr_probes + 1; | ||
281 | entry->multi = new; | ||
282 | entry->ptype = 1; | ||
283 | debug_print_probes(entry); | ||
284 | return old; | ||
285 | } | ||
286 | |||
287 | static struct marker_probe_closure * | ||
288 | marker_entry_remove_probe(struct marker_entry *entry, | ||
289 | marker_probe_func *probe, void *probe_private) | ||
290 | { | ||
291 | int nr_probes = 0, nr_del = 0, i; | ||
292 | struct marker_probe_closure *old, *new; | ||
293 | |||
294 | old = entry->multi; | ||
295 | |||
296 | debug_print_probes(entry); | ||
297 | if (!entry->ptype) { | ||
298 | /* 0 -> N is an error */ | ||
299 | WARN_ON(entry->single.func == __mark_empty_function); | ||
300 | /* 1 -> 0 probes */ | ||
301 | WARN_ON(probe && entry->single.func != probe); | ||
302 | WARN_ON(entry->single.probe_private != probe_private); | ||
303 | entry->single.func = __mark_empty_function; | ||
304 | entry->refcount = 0; | ||
305 | entry->ptype = 0; | ||
306 | debug_print_probes(entry); | ||
307 | return NULL; | ||
308 | } else { | ||
309 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
310 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | ||
311 | if ((!probe || old[nr_probes].func == probe) | ||
312 | && old[nr_probes].probe_private | ||
313 | == probe_private) | ||
314 | nr_del++; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | if (nr_probes - nr_del == 0) { | ||
319 | /* N -> 0, (N > 1) */ | ||
320 | entry->single.func = __mark_empty_function; | ||
321 | entry->refcount = 0; | ||
322 | entry->ptype = 0; | ||
323 | } else if (nr_probes - nr_del == 1) { | ||
324 | /* N -> 1, (N > 1) */ | ||
325 | for (i = 0; old[i].func; i++) | ||
326 | if ((probe && old[i].func != probe) || | ||
327 | old[i].probe_private != probe_private) | ||
328 | entry->single = old[i]; | ||
329 | entry->refcount = 1; | ||
330 | entry->ptype = 0; | ||
331 | } else { | ||
332 | int j = 0; | ||
333 | /* N -> M, (N > 1, M > 1) */ | ||
334 | /* + 1 for NULL */ | ||
335 | new = kzalloc((nr_probes - nr_del + 1) | ||
336 | * sizeof(struct marker_probe_closure), GFP_KERNEL); | ||
337 | if (new == NULL) | ||
338 | return ERR_PTR(-ENOMEM); | ||
339 | for (i = 0; old[i].func; i++) | ||
340 | if ((probe && old[i].func != probe) || | ||
341 | old[i].probe_private != probe_private) | ||
342 | new[j++] = old[i]; | ||
343 | entry->refcount = nr_probes - nr_del; | ||
344 | entry->ptype = 1; | ||
345 | entry->multi = new; | ||
346 | } | ||
347 | debug_print_probes(entry); | ||
348 | return old; | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * Get marker if the marker is present in the marker hash table. | ||
353 | * Must be called with markers_mutex held. | ||
354 | * Returns NULL if not present. | ||
355 | */ | ||
356 | static struct marker_entry *get_marker(const char *name) | ||
357 | { | ||
358 | struct hlist_head *head; | ||
359 | struct hlist_node *node; | ||
360 | struct marker_entry *e; | ||
361 | u32 hash = jhash(name, strlen(name), 0); | ||
362 | |||
363 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
364 | hlist_for_each_entry(e, node, head, hlist) { | ||
365 | if (!strcmp(name, e->name)) | ||
366 | return e; | ||
367 | } | ||
368 | return NULL; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * Add the marker to the marker hash table. Must be called with markers_mutex | ||
373 | * held. | ||
374 | */ | ||
375 | static struct marker_entry *add_marker(const char *name, const char *format) | ||
376 | { | ||
377 | struct hlist_head *head; | ||
378 | struct hlist_node *node; | ||
379 | struct marker_entry *e; | ||
380 | size_t name_len = strlen(name) + 1; | ||
381 | size_t format_len = 0; | ||
382 | u32 hash = jhash(name, name_len-1, 0); | ||
383 | |||
384 | if (format) | ||
385 | format_len = strlen(format) + 1; | ||
386 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
387 | hlist_for_each_entry(e, node, head, hlist) { | ||
388 | if (!strcmp(name, e->name)) { | ||
389 | printk(KERN_NOTICE | ||
390 | "Marker %s busy\n", name); | ||
391 | return ERR_PTR(-EBUSY); /* Already there */ | ||
392 | } | ||
393 | } | ||
394 | /* | ||
395 | * Using kmalloc here to allocate a variable length element. Could | ||
396 | * cause some memory fragmentation if overused. | ||
397 | */ | ||
398 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | ||
399 | GFP_KERNEL); | ||
400 | if (!e) | ||
401 | return ERR_PTR(-ENOMEM); | ||
402 | memcpy(&e->name[0], name, name_len); | ||
403 | if (format) { | ||
404 | e->format = &e->name[name_len]; | ||
405 | memcpy(e->format, format, format_len); | ||
406 | if (strcmp(e->format, MARK_NOARGS) == 0) | ||
407 | e->call = marker_probe_cb_noarg; | ||
408 | else | ||
409 | e->call = marker_probe_cb; | ||
410 | trace_mark(core_marker_format, "name %s format %s", | ||
411 | e->name, e->format); | ||
412 | } else { | ||
413 | e->format = NULL; | ||
414 | e->call = marker_probe_cb; | ||
415 | } | ||
416 | e->single.func = __mark_empty_function; | ||
417 | e->single.probe_private = NULL; | ||
418 | e->multi = NULL; | ||
419 | e->ptype = 0; | ||
420 | e->format_allocated = 0; | ||
421 | e->refcount = 0; | ||
422 | e->rcu_pending = 0; | ||
423 | hlist_add_head(&e->hlist, head); | ||
424 | return e; | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Remove the marker from the marker hash table. Must be called with mutex_lock | ||
429 | * held. | ||
430 | */ | ||
431 | static int remove_marker(const char *name) | ||
432 | { | ||
433 | struct hlist_head *head; | ||
434 | struct hlist_node *node; | ||
435 | struct marker_entry *e; | ||
436 | int found = 0; | ||
437 | size_t len = strlen(name) + 1; | ||
438 | u32 hash = jhash(name, len-1, 0); | ||
439 | |||
440 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
441 | hlist_for_each_entry(e, node, head, hlist) { | ||
442 | if (!strcmp(name, e->name)) { | ||
443 | found = 1; | ||
444 | break; | ||
445 | } | ||
446 | } | ||
447 | if (!found) | ||
448 | return -ENOENT; | ||
449 | if (e->single.func != __mark_empty_function) | ||
450 | return -EBUSY; | ||
451 | hlist_del(&e->hlist); | ||
452 | if (e->format_allocated) | ||
453 | kfree(e->format); | ||
454 | /* Make sure the call_rcu has been executed */ | ||
455 | if (e->rcu_pending) | ||
456 | rcu_barrier_sched(); | ||
457 | kfree(e); | ||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Set the mark_entry format to the format found in the element. | ||
463 | */ | ||
464 | static int marker_set_format(struct marker_entry *entry, const char *format) | ||
465 | { | ||
466 | entry->format = kstrdup(format, GFP_KERNEL); | ||
467 | if (!entry->format) | ||
468 | return -ENOMEM; | ||
469 | entry->format_allocated = 1; | ||
470 | |||
471 | trace_mark(core_marker_format, "name %s format %s", | ||
472 | entry->name, entry->format); | ||
473 | return 0; | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Sets the probe callback corresponding to one marker. | ||
478 | */ | ||
479 | static int set_marker(struct marker_entry *entry, struct marker *elem, | ||
480 | int active) | ||
481 | { | ||
482 | int ret = 0; | ||
483 | WARN_ON(strcmp(entry->name, elem->name) != 0); | ||
484 | |||
485 | if (entry->format) { | ||
486 | if (strcmp(entry->format, elem->format) != 0) { | ||
487 | printk(KERN_NOTICE | ||
488 | "Format mismatch for probe %s " | ||
489 | "(%s), marker (%s)\n", | ||
490 | entry->name, | ||
491 | entry->format, | ||
492 | elem->format); | ||
493 | return -EPERM; | ||
494 | } | ||
495 | } else { | ||
496 | ret = marker_set_format(entry, elem->format); | ||
497 | if (ret) | ||
498 | return ret; | ||
499 | } | ||
500 | |||
501 | /* | ||
502 | * probe_cb setup (statically known) is done here. It is | ||
503 | * asynchronous with the rest of execution, therefore we only | ||
504 | * pass from a "safe" callback (with argument) to an "unsafe" | ||
505 | * callback (does not set arguments). | ||
506 | */ | ||
507 | elem->call = entry->call; | ||
508 | /* | ||
509 | * Sanity check : | ||
510 | * We only update the single probe private data when the ptr is | ||
511 | * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) | ||
512 | */ | ||
513 | WARN_ON(elem->single.func != __mark_empty_function | ||
514 | && elem->single.probe_private != entry->single.probe_private | ||
515 | && !elem->ptype); | ||
516 | elem->single.probe_private = entry->single.probe_private; | ||
517 | /* | ||
518 | * Make sure the private data is valid when we update the | ||
519 | * single probe ptr. | ||
520 | */ | ||
521 | smp_wmb(); | ||
522 | elem->single.func = entry->single.func; | ||
523 | /* | ||
524 | * We also make sure that the new probe callbacks array is consistent | ||
525 | * before setting a pointer to it. | ||
526 | */ | ||
527 | rcu_assign_pointer(elem->multi, entry->multi); | ||
528 | /* | ||
529 | * Update the function or multi probe array pointer before setting the | ||
530 | * ptype. | ||
531 | */ | ||
532 | smp_wmb(); | ||
533 | elem->ptype = entry->ptype; | ||
534 | |||
535 | if (elem->tp_name && (active ^ elem->state)) { | ||
536 | WARN_ON(!elem->tp_cb); | ||
537 | /* | ||
538 | * It is ok to directly call the probe registration because type | ||
539 | * checking has been done in the __trace_mark_tp() macro. | ||
540 | */ | ||
541 | |||
542 | if (active) { | ||
543 | /* | ||
544 | * try_module_get should always succeed because we hold | ||
545 | * lock_module() to get the tp_cb address. | ||
546 | */ | ||
547 | ret = try_module_get(__module_text_address( | ||
548 | (unsigned long)elem->tp_cb)); | ||
549 | BUG_ON(!ret); | ||
550 | ret = tracepoint_probe_register_noupdate( | ||
551 | elem->tp_name, | ||
552 | elem->tp_cb); | ||
553 | } else { | ||
554 | ret = tracepoint_probe_unregister_noupdate( | ||
555 | elem->tp_name, | ||
556 | elem->tp_cb); | ||
557 | /* | ||
558 | * tracepoint_probe_update_all() must be called | ||
559 | * before the module containing tp_cb is unloaded. | ||
560 | */ | ||
561 | module_put(__module_text_address( | ||
562 | (unsigned long)elem->tp_cb)); | ||
563 | } | ||
564 | } | ||
565 | elem->state = active; | ||
566 | |||
567 | return ret; | ||
568 | } | ||
569 | |||
570 | /* | ||
571 | * Disable a marker and its probe callback. | ||
572 | * Note: only waiting an RCU period after setting elem->call to the empty | ||
573 | * function insures that the original callback is not used anymore. This insured | ||
574 | * by rcu_read_lock_sched around the call site. | ||
575 | */ | ||
576 | static void disable_marker(struct marker *elem) | ||
577 | { | ||
578 | int ret; | ||
579 | |||
580 | /* leave "call" as is. It is known statically. */ | ||
581 | if (elem->tp_name && elem->state) { | ||
582 | WARN_ON(!elem->tp_cb); | ||
583 | /* | ||
584 | * It is ok to directly call the probe registration because type | ||
585 | * checking has been done in the __trace_mark_tp() macro. | ||
586 | */ | ||
587 | ret = tracepoint_probe_unregister_noupdate(elem->tp_name, | ||
588 | elem->tp_cb); | ||
589 | WARN_ON(ret); | ||
590 | /* | ||
591 | * tracepoint_probe_update_all() must be called | ||
592 | * before the module containing tp_cb is unloaded. | ||
593 | */ | ||
594 | module_put(__module_text_address((unsigned long)elem->tp_cb)); | ||
595 | } | ||
596 | elem->state = 0; | ||
597 | elem->single.func = __mark_empty_function; | ||
598 | /* Update the function before setting the ptype */ | ||
599 | smp_wmb(); | ||
600 | elem->ptype = 0; /* single probe */ | ||
601 | /* | ||
602 | * Leave the private data and id there, because removal is racy and | ||
603 | * should be done only after an RCU period. These are never used until | ||
604 | * the next initialization anyway. | ||
605 | */ | ||
606 | } | ||
607 | |||
608 | /** | ||
609 | * marker_update_probe_range - Update a probe range | ||
610 | * @begin: beginning of the range | ||
611 | * @end: end of the range | ||
612 | * | ||
613 | * Updates the probe callback corresponding to a range of markers. | ||
614 | */ | ||
615 | void marker_update_probe_range(struct marker *begin, | ||
616 | struct marker *end) | ||
617 | { | ||
618 | struct marker *iter; | ||
619 | struct marker_entry *mark_entry; | ||
620 | |||
621 | mutex_lock(&markers_mutex); | ||
622 | for (iter = begin; iter < end; iter++) { | ||
623 | mark_entry = get_marker(iter->name); | ||
624 | if (mark_entry) { | ||
625 | set_marker(mark_entry, iter, !!mark_entry->refcount); | ||
626 | /* | ||
627 | * ignore error, continue | ||
628 | */ | ||
629 | } else { | ||
630 | disable_marker(iter); | ||
631 | } | ||
632 | } | ||
633 | mutex_unlock(&markers_mutex); | ||
634 | } | ||
635 | |||
636 | /* | ||
637 | * Update probes, removing the faulty probes. | ||
638 | * | ||
639 | * Internal callback only changed before the first probe is connected to it. | ||
640 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | ||
641 | * transitions. All other transitions will leave the old private data valid. | ||
642 | * This makes the non-atomicity of the callback/private data updates valid. | ||
643 | * | ||
644 | * "special case" updates : | ||
645 | * 0 -> 1 callback | ||
646 | * 1 -> 0 callback | ||
647 | * 1 -> 2 callbacks | ||
648 | * 2 -> 1 callbacks | ||
649 | * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. | ||
650 | * Site effect : marker_set_format may delete the marker entry (creating a | ||
651 | * replacement). | ||
652 | */ | ||
653 | static void marker_update_probes(void) | ||
654 | { | ||
655 | /* Core kernel markers */ | ||
656 | marker_update_probe_range(__start___markers, __stop___markers); | ||
657 | /* Markers in modules. */ | ||
658 | module_update_markers(); | ||
659 | tracepoint_probe_update_all(); | ||
660 | } | ||
661 | |||
662 | /** | ||
663 | * marker_probe_register - Connect a probe to a marker | ||
664 | * @name: marker name | ||
665 | * @format: format string | ||
666 | * @probe: probe handler | ||
667 | * @probe_private: probe private data | ||
668 | * | ||
669 | * private data must be a valid allocated memory address, or NULL. | ||
670 | * Returns 0 if ok, error value on error. | ||
671 | * The probe address must at least be aligned on the architecture pointer size. | ||
672 | */ | ||
673 | int marker_probe_register(const char *name, const char *format, | ||
674 | marker_probe_func *probe, void *probe_private) | ||
675 | { | ||
676 | struct marker_entry *entry; | ||
677 | int ret = 0; | ||
678 | struct marker_probe_closure *old; | ||
679 | |||
680 | mutex_lock(&markers_mutex); | ||
681 | entry = get_marker(name); | ||
682 | if (!entry) { | ||
683 | entry = add_marker(name, format); | ||
684 | if (IS_ERR(entry)) | ||
685 | ret = PTR_ERR(entry); | ||
686 | } else if (format) { | ||
687 | if (!entry->format) | ||
688 | ret = marker_set_format(entry, format); | ||
689 | else if (strcmp(entry->format, format)) | ||
690 | ret = -EPERM; | ||
691 | } | ||
692 | if (ret) | ||
693 | goto end; | ||
694 | |||
695 | /* | ||
696 | * If we detect that a call_rcu is pending for this marker, | ||
697 | * make sure it's executed now. | ||
698 | */ | ||
699 | if (entry->rcu_pending) | ||
700 | rcu_barrier_sched(); | ||
701 | old = marker_entry_add_probe(entry, probe, probe_private); | ||
702 | if (IS_ERR(old)) { | ||
703 | ret = PTR_ERR(old); | ||
704 | goto end; | ||
705 | } | ||
706 | mutex_unlock(&markers_mutex); | ||
707 | marker_update_probes(); | ||
708 | mutex_lock(&markers_mutex); | ||
709 | entry = get_marker(name); | ||
710 | if (!entry) | ||
711 | goto end; | ||
712 | if (entry->rcu_pending) | ||
713 | rcu_barrier_sched(); | ||
714 | entry->oldptr = old; | ||
715 | entry->rcu_pending = 1; | ||
716 | /* write rcu_pending before calling the RCU callback */ | ||
717 | smp_wmb(); | ||
718 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
719 | end: | ||
720 | mutex_unlock(&markers_mutex); | ||
721 | return ret; | ||
722 | } | ||
723 | EXPORT_SYMBOL_GPL(marker_probe_register); | ||
724 | |||
725 | /** | ||
726 | * marker_probe_unregister - Disconnect a probe from a marker | ||
727 | * @name: marker name | ||
728 | * @probe: probe function pointer | ||
729 | * @probe_private: probe private data | ||
730 | * | ||
731 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | ||
732 | * We do not need to call a synchronize_sched to make sure the probes have | ||
733 | * finished running before doing a module unload, because the module unload | ||
734 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
735 | * have finished. | ||
736 | */ | ||
737 | int marker_probe_unregister(const char *name, | ||
738 | marker_probe_func *probe, void *probe_private) | ||
739 | { | ||
740 | struct marker_entry *entry; | ||
741 | struct marker_probe_closure *old; | ||
742 | int ret = -ENOENT; | ||
743 | |||
744 | mutex_lock(&markers_mutex); | ||
745 | entry = get_marker(name); | ||
746 | if (!entry) | ||
747 | goto end; | ||
748 | if (entry->rcu_pending) | ||
749 | rcu_barrier_sched(); | ||
750 | old = marker_entry_remove_probe(entry, probe, probe_private); | ||
751 | mutex_unlock(&markers_mutex); | ||
752 | marker_update_probes(); | ||
753 | mutex_lock(&markers_mutex); | ||
754 | entry = get_marker(name); | ||
755 | if (!entry) | ||
756 | goto end; | ||
757 | if (entry->rcu_pending) | ||
758 | rcu_barrier_sched(); | ||
759 | entry->oldptr = old; | ||
760 | entry->rcu_pending = 1; | ||
761 | /* write rcu_pending before calling the RCU callback */ | ||
762 | smp_wmb(); | ||
763 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
764 | remove_marker(name); /* Ignore busy error message */ | ||
765 | ret = 0; | ||
766 | end: | ||
767 | mutex_unlock(&markers_mutex); | ||
768 | return ret; | ||
769 | } | ||
770 | EXPORT_SYMBOL_GPL(marker_probe_unregister); | ||
771 | |||
772 | static struct marker_entry * | ||
773 | get_marker_from_private_data(marker_probe_func *probe, void *probe_private) | ||
774 | { | ||
775 | struct marker_entry *entry; | ||
776 | unsigned int i; | ||
777 | struct hlist_head *head; | ||
778 | struct hlist_node *node; | ||
779 | |||
780 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { | ||
781 | head = &marker_table[i]; | ||
782 | hlist_for_each_entry(entry, node, head, hlist) { | ||
783 | if (!entry->ptype) { | ||
784 | if (entry->single.func == probe | ||
785 | && entry->single.probe_private | ||
786 | == probe_private) | ||
787 | return entry; | ||
788 | } else { | ||
789 | struct marker_probe_closure *closure; | ||
790 | closure = entry->multi; | ||
791 | for (i = 0; closure[i].func; i++) { | ||
792 | if (closure[i].func == probe && | ||
793 | closure[i].probe_private | ||
794 | == probe_private) | ||
795 | return entry; | ||
796 | } | ||
797 | } | ||
798 | } | ||
799 | } | ||
800 | return NULL; | ||
801 | } | ||
802 | |||
803 | /** | ||
804 | * marker_probe_unregister_private_data - Disconnect a probe from a marker | ||
805 | * @probe: probe function | ||
806 | * @probe_private: probe private data | ||
807 | * | ||
808 | * Unregister a probe by providing the registered private data. | ||
809 | * Only removes the first marker found in hash table. | ||
810 | * Return 0 on success or error value. | ||
811 | * We do not need to call a synchronize_sched to make sure the probes have | ||
812 | * finished running before doing a module unload, because the module unload | ||
813 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
814 | * have finished. | ||
815 | */ | ||
816 | int marker_probe_unregister_private_data(marker_probe_func *probe, | ||
817 | void *probe_private) | ||
818 | { | ||
819 | struct marker_entry *entry; | ||
820 | int ret = 0; | ||
821 | struct marker_probe_closure *old; | ||
822 | |||
823 | mutex_lock(&markers_mutex); | ||
824 | entry = get_marker_from_private_data(probe, probe_private); | ||
825 | if (!entry) { | ||
826 | ret = -ENOENT; | ||
827 | goto end; | ||
828 | } | ||
829 | if (entry->rcu_pending) | ||
830 | rcu_barrier_sched(); | ||
831 | old = marker_entry_remove_probe(entry, NULL, probe_private); | ||
832 | mutex_unlock(&markers_mutex); | ||
833 | marker_update_probes(); | ||
834 | mutex_lock(&markers_mutex); | ||
835 | entry = get_marker_from_private_data(probe, probe_private); | ||
836 | if (!entry) | ||
837 | goto end; | ||
838 | if (entry->rcu_pending) | ||
839 | rcu_barrier_sched(); | ||
840 | entry->oldptr = old; | ||
841 | entry->rcu_pending = 1; | ||
842 | /* write rcu_pending before calling the RCU callback */ | ||
843 | smp_wmb(); | ||
844 | call_rcu_sched(&entry->rcu, free_old_closure); | ||
845 | remove_marker(entry->name); /* Ignore busy error message */ | ||
846 | end: | ||
847 | mutex_unlock(&markers_mutex); | ||
848 | return ret; | ||
849 | } | ||
850 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); | ||
851 | |||
852 | /** | ||
853 | * marker_get_private_data - Get a marker's probe private data | ||
854 | * @name: marker name | ||
855 | * @probe: probe to match | ||
856 | * @num: get the nth matching probe's private data | ||
857 | * | ||
858 | * Returns the nth private data pointer (starting from 0) matching, or an | ||
859 | * ERR_PTR. | ||
860 | * Returns the private data pointer, or an ERR_PTR. | ||
861 | * The private data pointer should _only_ be dereferenced if the caller is the | ||
862 | * owner of the data, or its content could vanish. This is mostly used to | ||
863 | * confirm that a caller is the owner of a registered probe. | ||
864 | */ | ||
865 | void *marker_get_private_data(const char *name, marker_probe_func *probe, | ||
866 | int num) | ||
867 | { | ||
868 | struct hlist_head *head; | ||
869 | struct hlist_node *node; | ||
870 | struct marker_entry *e; | ||
871 | size_t name_len = strlen(name) + 1; | ||
872 | u32 hash = jhash(name, name_len-1, 0); | ||
873 | int i; | ||
874 | |||
875 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | ||
876 | hlist_for_each_entry(e, node, head, hlist) { | ||
877 | if (!strcmp(name, e->name)) { | ||
878 | if (!e->ptype) { | ||
879 | if (num == 0 && e->single.func == probe) | ||
880 | return e->single.probe_private; | ||
881 | } else { | ||
882 | struct marker_probe_closure *closure; | ||
883 | int match = 0; | ||
884 | closure = e->multi; | ||
885 | for (i = 0; closure[i].func; i++) { | ||
886 | if (closure[i].func != probe) | ||
887 | continue; | ||
888 | if (match++ == num) | ||
889 | return closure[i].probe_private; | ||
890 | } | ||
891 | } | ||
892 | break; | ||
893 | } | ||
894 | } | ||
895 | return ERR_PTR(-ENOENT); | ||
896 | } | ||
897 | EXPORT_SYMBOL_GPL(marker_get_private_data); | ||
898 | |||
899 | #ifdef CONFIG_MODULES | ||
900 | |||
901 | int marker_module_notify(struct notifier_block *self, | ||
902 | unsigned long val, void *data) | ||
903 | { | ||
904 | struct module *mod = data; | ||
905 | |||
906 | switch (val) { | ||
907 | case MODULE_STATE_COMING: | ||
908 | marker_update_probe_range(mod->markers, | ||
909 | mod->markers + mod->num_markers); | ||
910 | break; | ||
911 | case MODULE_STATE_GOING: | ||
912 | marker_update_probe_range(mod->markers, | ||
913 | mod->markers + mod->num_markers); | ||
914 | break; | ||
915 | } | ||
916 | return 0; | ||
917 | } | ||
918 | |||
919 | struct notifier_block marker_module_nb = { | ||
920 | .notifier_call = marker_module_notify, | ||
921 | .priority = 0, | ||
922 | }; | ||
923 | |||
924 | static int init_markers(void) | ||
925 | { | ||
926 | return register_module_notifier(&marker_module_nb); | ||
927 | } | ||
928 | __initcall(init_markers); | ||
929 | |||
930 | #endif /* CONFIG_MODULES */ | ||
diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f6..b6ee424245dd 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, | |||
2237 | sizeof(*mod->ctors), &mod->num_ctors); | 2237 | sizeof(*mod->ctors), &mod->num_ctors); |
2238 | #endif | 2238 | #endif |
2239 | 2239 | ||
2240 | #ifdef CONFIG_MARKERS | ||
2241 | mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", | ||
2242 | sizeof(*mod->markers), &mod->num_markers); | ||
2243 | #endif | ||
2244 | #ifdef CONFIG_TRACEPOINTS | 2240 | #ifdef CONFIG_TRACEPOINTS |
2245 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, | 2241 | mod->tracepoints = section_objs(hdr, sechdrs, secstrings, |
2246 | "__tracepoints", | 2242 | "__tracepoints", |
@@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, | |||
2958 | EXPORT_SYMBOL(module_layout); | 2954 | EXPORT_SYMBOL(module_layout); |
2959 | #endif | 2955 | #endif |
2960 | 2956 | ||
2961 | #ifdef CONFIG_MARKERS | ||
2962 | void module_update_markers(void) | ||
2963 | { | ||
2964 | struct module *mod; | ||
2965 | |||
2966 | mutex_lock(&module_mutex); | ||
2967 | list_for_each_entry(mod, &modules, list) | ||
2968 | if (!mod->taints) | ||
2969 | marker_update_probe_range(mod->markers, | ||
2970 | mod->markers + mod->num_markers); | ||
2971 | mutex_unlock(&module_mutex); | ||
2972 | } | ||
2973 | #endif | ||
2974 | |||
2975 | #ifdef CONFIG_TRACEPOINTS | 2957 | #ifdef CONFIG_TRACEPOINTS |
2976 | void module_update_tracepoints(void) | 2958 | void module_update_tracepoints(void) |
2977 | { | 2959 | { |
diff --git a/kernel/sched.c b/kernel/sched.c index d9db3fb17573..faf4d463bbff 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -119,8 +119,6 @@ | |||
119 | */ | 119 | */ |
120 | #define RUNTIME_INF ((u64)~0ULL) | 120 | #define RUNTIME_INF ((u64)~0ULL) |
121 | 121 | ||
122 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
123 | |||
124 | static inline int rt_policy(int policy) | 122 | static inline int rt_policy(int policy) |
125 | { | 123 | { |
126 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 124 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
@@ -378,13 +376,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | |||
378 | 376 | ||
379 | #else | 377 | #else |
380 | 378 | ||
381 | #ifdef CONFIG_SMP | ||
382 | static int root_task_group_empty(void) | ||
383 | { | ||
384 | return 1; | ||
385 | } | ||
386 | #endif | ||
387 | |||
388 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 379 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
389 | static inline struct task_group *task_group(struct task_struct *p) | 380 | static inline struct task_group *task_group(struct task_struct *p) |
390 | { | 381 | { |
@@ -514,14 +505,6 @@ struct root_domain { | |||
514 | #ifdef CONFIG_SMP | 505 | #ifdef CONFIG_SMP |
515 | struct cpupri cpupri; | 506 | struct cpupri cpupri; |
516 | #endif | 507 | #endif |
517 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
518 | /* | ||
519 | * Preferred wake up cpu nominated by sched_mc balance that will be | ||
520 | * used when most cpus are idle in the system indicating overall very | ||
521 | * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) | ||
522 | */ | ||
523 | unsigned int sched_mc_preferred_wakeup_cpu; | ||
524 | #endif | ||
525 | }; | 508 | }; |
526 | 509 | ||
527 | /* | 510 | /* |
@@ -646,9 +629,10 @@ struct rq { | |||
646 | 629 | ||
647 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 630 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
648 | 631 | ||
649 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync) | 632 | static inline |
633 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
650 | { | 634 | { |
651 | rq->curr->sched_class->check_preempt_curr(rq, p, sync); | 635 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
652 | } | 636 | } |
653 | 637 | ||
654 | static inline int cpu_of(struct rq *rq) | 638 | static inline int cpu_of(struct rq *rq) |
@@ -1509,8 +1493,65 @@ static int tg_nop(struct task_group *tg, void *data) | |||
1509 | #endif | 1493 | #endif |
1510 | 1494 | ||
1511 | #ifdef CONFIG_SMP | 1495 | #ifdef CONFIG_SMP |
1512 | static unsigned long source_load(int cpu, int type); | 1496 | /* Used instead of source_load when we know the type == 0 */ |
1513 | static unsigned long target_load(int cpu, int type); | 1497 | static unsigned long weighted_cpuload(const int cpu) |
1498 | { | ||
1499 | return cpu_rq(cpu)->load.weight; | ||
1500 | } | ||
1501 | |||
1502 | /* | ||
1503 | * Return a low guess at the load of a migration-source cpu weighted | ||
1504 | * according to the scheduling class and "nice" value. | ||
1505 | * | ||
1506 | * We want to under-estimate the load of migration sources, to | ||
1507 | * balance conservatively. | ||
1508 | */ | ||
1509 | static unsigned long source_load(int cpu, int type) | ||
1510 | { | ||
1511 | struct rq *rq = cpu_rq(cpu); | ||
1512 | unsigned long total = weighted_cpuload(cpu); | ||
1513 | |||
1514 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1515 | return total; | ||
1516 | |||
1517 | return min(rq->cpu_load[type-1], total); | ||
1518 | } | ||
1519 | |||
1520 | /* | ||
1521 | * Return a high guess at the load of a migration-target cpu weighted | ||
1522 | * according to the scheduling class and "nice" value. | ||
1523 | */ | ||
1524 | static unsigned long target_load(int cpu, int type) | ||
1525 | { | ||
1526 | struct rq *rq = cpu_rq(cpu); | ||
1527 | unsigned long total = weighted_cpuload(cpu); | ||
1528 | |||
1529 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1530 | return total; | ||
1531 | |||
1532 | return max(rq->cpu_load[type-1], total); | ||
1533 | } | ||
1534 | |||
1535 | static struct sched_group *group_of(int cpu) | ||
1536 | { | ||
1537 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
1538 | |||
1539 | if (!sd) | ||
1540 | return NULL; | ||
1541 | |||
1542 | return sd->groups; | ||
1543 | } | ||
1544 | |||
1545 | static unsigned long power_of(int cpu) | ||
1546 | { | ||
1547 | struct sched_group *group = group_of(cpu); | ||
1548 | |||
1549 | if (!group) | ||
1550 | return SCHED_LOAD_SCALE; | ||
1551 | |||
1552 | return group->cpu_power; | ||
1553 | } | ||
1554 | |||
1514 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1555 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1515 | 1556 | ||
1516 | static unsigned long cpu_avg_load_per_task(int cpu) | 1557 | static unsigned long cpu_avg_load_per_task(int cpu) |
@@ -1695,6 +1736,8 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1695 | 1736 | ||
1696 | #ifdef CONFIG_PREEMPT | 1737 | #ifdef CONFIG_PREEMPT |
1697 | 1738 | ||
1739 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
1740 | |||
1698 | /* | 1741 | /* |
1699 | * fair double_lock_balance: Safely acquires both rq->locks in a fair | 1742 | * fair double_lock_balance: Safely acquires both rq->locks in a fair |
1700 | * way at the expense of forcing extra atomic operations in all | 1743 | * way at the expense of forcing extra atomic operations in all |
@@ -1959,13 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1959 | } | 2002 | } |
1960 | 2003 | ||
1961 | #ifdef CONFIG_SMP | 2004 | #ifdef CONFIG_SMP |
1962 | |||
1963 | /* Used instead of source_load when we know the type == 0 */ | ||
1964 | static unsigned long weighted_cpuload(const int cpu) | ||
1965 | { | ||
1966 | return cpu_rq(cpu)->load.weight; | ||
1967 | } | ||
1968 | |||
1969 | /* | 2005 | /* |
1970 | * Is this task likely cache-hot: | 2006 | * Is this task likely cache-hot: |
1971 | */ | 2007 | */ |
@@ -2239,185 +2275,6 @@ void kick_process(struct task_struct *p) | |||
2239 | preempt_enable(); | 2275 | preempt_enable(); |
2240 | } | 2276 | } |
2241 | EXPORT_SYMBOL_GPL(kick_process); | 2277 | EXPORT_SYMBOL_GPL(kick_process); |
2242 | |||
2243 | /* | ||
2244 | * Return a low guess at the load of a migration-source cpu weighted | ||
2245 | * according to the scheduling class and "nice" value. | ||
2246 | * | ||
2247 | * We want to under-estimate the load of migration sources, to | ||
2248 | * balance conservatively. | ||
2249 | */ | ||
2250 | static unsigned long source_load(int cpu, int type) | ||
2251 | { | ||
2252 | struct rq *rq = cpu_rq(cpu); | ||
2253 | unsigned long total = weighted_cpuload(cpu); | ||
2254 | |||
2255 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2256 | return total; | ||
2257 | |||
2258 | return min(rq->cpu_load[type-1], total); | ||
2259 | } | ||
2260 | |||
2261 | /* | ||
2262 | * Return a high guess at the load of a migration-target cpu weighted | ||
2263 | * according to the scheduling class and "nice" value. | ||
2264 | */ | ||
2265 | static unsigned long target_load(int cpu, int type) | ||
2266 | { | ||
2267 | struct rq *rq = cpu_rq(cpu); | ||
2268 | unsigned long total = weighted_cpuload(cpu); | ||
2269 | |||
2270 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2271 | return total; | ||
2272 | |||
2273 | return max(rq->cpu_load[type-1], total); | ||
2274 | } | ||
2275 | |||
2276 | /* | ||
2277 | * find_idlest_group finds and returns the least busy CPU group within the | ||
2278 | * domain. | ||
2279 | */ | ||
2280 | static struct sched_group * | ||
2281 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | ||
2282 | { | ||
2283 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; | ||
2284 | unsigned long min_load = ULONG_MAX, this_load = 0; | ||
2285 | int load_idx = sd->forkexec_idx; | ||
2286 | int imbalance = 100 + (sd->imbalance_pct-100)/2; | ||
2287 | |||
2288 | do { | ||
2289 | unsigned long load, avg_load; | ||
2290 | int local_group; | ||
2291 | int i; | ||
2292 | |||
2293 | /* Skip over this group if it has no CPUs allowed */ | ||
2294 | if (!cpumask_intersects(sched_group_cpus(group), | ||
2295 | &p->cpus_allowed)) | ||
2296 | continue; | ||
2297 | |||
2298 | local_group = cpumask_test_cpu(this_cpu, | ||
2299 | sched_group_cpus(group)); | ||
2300 | |||
2301 | /* Tally up the load of all CPUs in the group */ | ||
2302 | avg_load = 0; | ||
2303 | |||
2304 | for_each_cpu(i, sched_group_cpus(group)) { | ||
2305 | /* Bias balancing toward cpus of our domain */ | ||
2306 | if (local_group) | ||
2307 | load = source_load(i, load_idx); | ||
2308 | else | ||
2309 | load = target_load(i, load_idx); | ||
2310 | |||
2311 | avg_load += load; | ||
2312 | } | ||
2313 | |||
2314 | /* Adjust by relative CPU power of the group */ | ||
2315 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
2316 | |||
2317 | if (local_group) { | ||
2318 | this_load = avg_load; | ||
2319 | this = group; | ||
2320 | } else if (avg_load < min_load) { | ||
2321 | min_load = avg_load; | ||
2322 | idlest = group; | ||
2323 | } | ||
2324 | } while (group = group->next, group != sd->groups); | ||
2325 | |||
2326 | if (!idlest || 100*this_load < imbalance*min_load) | ||
2327 | return NULL; | ||
2328 | return idlest; | ||
2329 | } | ||
2330 | |||
2331 | /* | ||
2332 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
2333 | */ | ||
2334 | static int | ||
2335 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
2336 | { | ||
2337 | unsigned long load, min_load = ULONG_MAX; | ||
2338 | int idlest = -1; | ||
2339 | int i; | ||
2340 | |||
2341 | /* Traverse only the allowed CPUs */ | ||
2342 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
2343 | load = weighted_cpuload(i); | ||
2344 | |||
2345 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
2346 | min_load = load; | ||
2347 | idlest = i; | ||
2348 | } | ||
2349 | } | ||
2350 | |||
2351 | return idlest; | ||
2352 | } | ||
2353 | |||
2354 | /* | ||
2355 | * sched_balance_self: balance the current task (running on cpu) in domains | ||
2356 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | ||
2357 | * SD_BALANCE_EXEC. | ||
2358 | * | ||
2359 | * Balance, ie. select the least loaded group. | ||
2360 | * | ||
2361 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
2362 | * | ||
2363 | * preempt must be disabled. | ||
2364 | */ | ||
2365 | static int sched_balance_self(int cpu, int flag) | ||
2366 | { | ||
2367 | struct task_struct *t = current; | ||
2368 | struct sched_domain *tmp, *sd = NULL; | ||
2369 | |||
2370 | for_each_domain(cpu, tmp) { | ||
2371 | /* | ||
2372 | * If power savings logic is enabled for a domain, stop there. | ||
2373 | */ | ||
2374 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
2375 | break; | ||
2376 | if (tmp->flags & flag) | ||
2377 | sd = tmp; | ||
2378 | } | ||
2379 | |||
2380 | if (sd) | ||
2381 | update_shares(sd); | ||
2382 | |||
2383 | while (sd) { | ||
2384 | struct sched_group *group; | ||
2385 | int new_cpu, weight; | ||
2386 | |||
2387 | if (!(sd->flags & flag)) { | ||
2388 | sd = sd->child; | ||
2389 | continue; | ||
2390 | } | ||
2391 | |||
2392 | group = find_idlest_group(sd, t, cpu); | ||
2393 | if (!group) { | ||
2394 | sd = sd->child; | ||
2395 | continue; | ||
2396 | } | ||
2397 | |||
2398 | new_cpu = find_idlest_cpu(group, t, cpu); | ||
2399 | if (new_cpu == -1 || new_cpu == cpu) { | ||
2400 | /* Now try balancing at a lower domain level of cpu */ | ||
2401 | sd = sd->child; | ||
2402 | continue; | ||
2403 | } | ||
2404 | |||
2405 | /* Now try balancing at a lower domain level of new_cpu */ | ||
2406 | cpu = new_cpu; | ||
2407 | weight = cpumask_weight(sched_domain_span(sd)); | ||
2408 | sd = NULL; | ||
2409 | for_each_domain(cpu, tmp) { | ||
2410 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
2411 | break; | ||
2412 | if (tmp->flags & flag) | ||
2413 | sd = tmp; | ||
2414 | } | ||
2415 | /* while loop will break here if sd == NULL */ | ||
2416 | } | ||
2417 | |||
2418 | return cpu; | ||
2419 | } | ||
2420 | |||
2421 | #endif /* CONFIG_SMP */ | 2278 | #endif /* CONFIG_SMP */ |
2422 | 2279 | ||
2423 | /** | 2280 | /** |
@@ -2455,37 +2312,22 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2455 | * | 2312 | * |
2456 | * returns failure only if the task is already active. | 2313 | * returns failure only if the task is already active. |
2457 | */ | 2314 | */ |
2458 | static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | 2315 | static int try_to_wake_up(struct task_struct *p, unsigned int state, |
2316 | int wake_flags) | ||
2459 | { | 2317 | { |
2460 | int cpu, orig_cpu, this_cpu, success = 0; | 2318 | int cpu, orig_cpu, this_cpu, success = 0; |
2461 | unsigned long flags; | 2319 | unsigned long flags; |
2462 | long old_state; | ||
2463 | struct rq *rq; | 2320 | struct rq *rq; |
2464 | 2321 | ||
2465 | if (!sched_feat(SYNC_WAKEUPS)) | 2322 | if (!sched_feat(SYNC_WAKEUPS)) |
2466 | sync = 0; | 2323 | wake_flags &= ~WF_SYNC; |
2467 | |||
2468 | #ifdef CONFIG_SMP | ||
2469 | if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { | ||
2470 | struct sched_domain *sd; | ||
2471 | 2324 | ||
2472 | this_cpu = raw_smp_processor_id(); | 2325 | this_cpu = get_cpu(); |
2473 | cpu = task_cpu(p); | ||
2474 | |||
2475 | for_each_domain(this_cpu, sd) { | ||
2476 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2477 | update_shares(sd); | ||
2478 | break; | ||
2479 | } | ||
2480 | } | ||
2481 | } | ||
2482 | #endif | ||
2483 | 2326 | ||
2484 | smp_wmb(); | 2327 | smp_wmb(); |
2485 | rq = task_rq_lock(p, &flags); | 2328 | rq = task_rq_lock(p, &flags); |
2486 | update_rq_clock(rq); | 2329 | update_rq_clock(rq); |
2487 | old_state = p->state; | 2330 | if (!(p->state & state)) |
2488 | if (!(old_state & state)) | ||
2489 | goto out; | 2331 | goto out; |
2490 | 2332 | ||
2491 | if (p->se.on_rq) | 2333 | if (p->se.on_rq) |
@@ -2493,27 +2335,29 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2493 | 2335 | ||
2494 | cpu = task_cpu(p); | 2336 | cpu = task_cpu(p); |
2495 | orig_cpu = cpu; | 2337 | orig_cpu = cpu; |
2496 | this_cpu = smp_processor_id(); | ||
2497 | 2338 | ||
2498 | #ifdef CONFIG_SMP | 2339 | #ifdef CONFIG_SMP |
2499 | if (unlikely(task_running(rq, p))) | 2340 | if (unlikely(task_running(rq, p))) |
2500 | goto out_activate; | 2341 | goto out_activate; |
2501 | 2342 | ||
2502 | cpu = p->sched_class->select_task_rq(p, sync); | 2343 | /* |
2503 | if (cpu != orig_cpu) { | 2344 | * In order to handle concurrent wakeups and release the rq->lock |
2345 | * we put the task in TASK_WAKING state. | ||
2346 | * | ||
2347 | * First fix up the nr_uninterruptible count: | ||
2348 | */ | ||
2349 | if (task_contributes_to_load(p)) | ||
2350 | rq->nr_uninterruptible--; | ||
2351 | p->state = TASK_WAKING; | ||
2352 | task_rq_unlock(rq, &flags); | ||
2353 | |||
2354 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
2355 | if (cpu != orig_cpu) | ||
2504 | set_task_cpu(p, cpu); | 2356 | set_task_cpu(p, cpu); |
2505 | task_rq_unlock(rq, &flags); | ||
2506 | /* might preempt at this point */ | ||
2507 | rq = task_rq_lock(p, &flags); | ||
2508 | old_state = p->state; | ||
2509 | if (!(old_state & state)) | ||
2510 | goto out; | ||
2511 | if (p->se.on_rq) | ||
2512 | goto out_running; | ||
2513 | 2357 | ||
2514 | this_cpu = smp_processor_id(); | 2358 | rq = task_rq_lock(p, &flags); |
2515 | cpu = task_cpu(p); | 2359 | WARN_ON(p->state != TASK_WAKING); |
2516 | } | 2360 | cpu = task_cpu(p); |
2517 | 2361 | ||
2518 | #ifdef CONFIG_SCHEDSTATS | 2362 | #ifdef CONFIG_SCHEDSTATS |
2519 | schedstat_inc(rq, ttwu_count); | 2363 | schedstat_inc(rq, ttwu_count); |
@@ -2533,7 +2377,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2533 | out_activate: | 2377 | out_activate: |
2534 | #endif /* CONFIG_SMP */ | 2378 | #endif /* CONFIG_SMP */ |
2535 | schedstat_inc(p, se.nr_wakeups); | 2379 | schedstat_inc(p, se.nr_wakeups); |
2536 | if (sync) | 2380 | if (wake_flags & WF_SYNC) |
2537 | schedstat_inc(p, se.nr_wakeups_sync); | 2381 | schedstat_inc(p, se.nr_wakeups_sync); |
2538 | if (orig_cpu != cpu) | 2382 | if (orig_cpu != cpu) |
2539 | schedstat_inc(p, se.nr_wakeups_migrate); | 2383 | schedstat_inc(p, se.nr_wakeups_migrate); |
@@ -2562,7 +2406,7 @@ out_activate: | |||
2562 | 2406 | ||
2563 | out_running: | 2407 | out_running: |
2564 | trace_sched_wakeup(rq, p, success); | 2408 | trace_sched_wakeup(rq, p, success); |
2565 | check_preempt_curr(rq, p, sync); | 2409 | check_preempt_curr(rq, p, wake_flags); |
2566 | 2410 | ||
2567 | p->state = TASK_RUNNING; | 2411 | p->state = TASK_RUNNING; |
2568 | #ifdef CONFIG_SMP | 2412 | #ifdef CONFIG_SMP |
@@ -2571,6 +2415,7 @@ out_running: | |||
2571 | #endif | 2415 | #endif |
2572 | out: | 2416 | out: |
2573 | task_rq_unlock(rq, &flags); | 2417 | task_rq_unlock(rq, &flags); |
2418 | put_cpu(); | ||
2574 | 2419 | ||
2575 | return success; | 2420 | return success; |
2576 | } | 2421 | } |
@@ -2613,6 +2458,7 @@ static void __sched_fork(struct task_struct *p) | |||
2613 | p->se.avg_overlap = 0; | 2458 | p->se.avg_overlap = 0; |
2614 | p->se.start_runtime = 0; | 2459 | p->se.start_runtime = 0; |
2615 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2460 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2461 | p->se.avg_running = 0; | ||
2616 | 2462 | ||
2617 | #ifdef CONFIG_SCHEDSTATS | 2463 | #ifdef CONFIG_SCHEDSTATS |
2618 | p->se.wait_start = 0; | 2464 | p->se.wait_start = 0; |
@@ -2674,11 +2520,6 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2674 | 2520 | ||
2675 | __sched_fork(p); | 2521 | __sched_fork(p); |
2676 | 2522 | ||
2677 | #ifdef CONFIG_SMP | ||
2678 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | ||
2679 | #endif | ||
2680 | set_task_cpu(p, cpu); | ||
2681 | |||
2682 | /* | 2523 | /* |
2683 | * Make sure we do not leak PI boosting priority to the child. | 2524 | * Make sure we do not leak PI boosting priority to the child. |
2684 | */ | 2525 | */ |
@@ -2709,6 +2550,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2709 | if (!rt_prio(p->prio)) | 2550 | if (!rt_prio(p->prio)) |
2710 | p->sched_class = &fair_sched_class; | 2551 | p->sched_class = &fair_sched_class; |
2711 | 2552 | ||
2553 | #ifdef CONFIG_SMP | ||
2554 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2555 | #endif | ||
2556 | set_task_cpu(p, cpu); | ||
2557 | |||
2712 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2558 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2713 | if (likely(sched_info_on())) | 2559 | if (likely(sched_info_on())) |
2714 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2560 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
@@ -2754,7 +2600,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2754 | inc_nr_running(rq); | 2600 | inc_nr_running(rq); |
2755 | } | 2601 | } |
2756 | trace_sched_wakeup_new(rq, p, 1); | 2602 | trace_sched_wakeup_new(rq, p, 1); |
2757 | check_preempt_curr(rq, p, 0); | 2603 | check_preempt_curr(rq, p, WF_FORK); |
2758 | #ifdef CONFIG_SMP | 2604 | #ifdef CONFIG_SMP |
2759 | if (p->sched_class->task_wake_up) | 2605 | if (p->sched_class->task_wake_up) |
2760 | p->sched_class->task_wake_up(rq, p); | 2606 | p->sched_class->task_wake_up(rq, p); |
@@ -3263,7 +3109,7 @@ out: | |||
3263 | void sched_exec(void) | 3109 | void sched_exec(void) |
3264 | { | 3110 | { |
3265 | int new_cpu, this_cpu = get_cpu(); | 3111 | int new_cpu, this_cpu = get_cpu(); |
3266 | new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); | 3112 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); |
3267 | put_cpu(); | 3113 | put_cpu(); |
3268 | if (new_cpu != this_cpu) | 3114 | if (new_cpu != this_cpu) |
3269 | sched_migrate_task(current, new_cpu); | 3115 | sched_migrate_task(current, new_cpu); |
@@ -3683,11 +3529,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
3683 | *imbalance = sds->min_load_per_task; | 3529 | *imbalance = sds->min_load_per_task; |
3684 | sds->busiest = sds->group_min; | 3530 | sds->busiest = sds->group_min; |
3685 | 3531 | ||
3686 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { | ||
3687 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = | ||
3688 | group_first_cpu(sds->group_leader); | ||
3689 | } | ||
3690 | |||
3691 | return 1; | 3532 | return 1; |
3692 | 3533 | ||
3693 | } | 3534 | } |
@@ -3711,7 +3552,18 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
3711 | } | 3552 | } |
3712 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | 3553 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
3713 | 3554 | ||
3714 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | 3555 | |
3556 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) | ||
3557 | { | ||
3558 | return SCHED_LOAD_SCALE; | ||
3559 | } | ||
3560 | |||
3561 | unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
3562 | { | ||
3563 | return default_scale_freq_power(sd, cpu); | ||
3564 | } | ||
3565 | |||
3566 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) | ||
3715 | { | 3567 | { |
3716 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3568 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
3717 | unsigned long smt_gain = sd->smt_gain; | 3569 | unsigned long smt_gain = sd->smt_gain; |
@@ -3721,6 +3573,11 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | |||
3721 | return smt_gain; | 3573 | return smt_gain; |
3722 | } | 3574 | } |
3723 | 3575 | ||
3576 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
3577 | { | ||
3578 | return default_scale_smt_power(sd, cpu); | ||
3579 | } | ||
3580 | |||
3724 | unsigned long scale_rt_power(int cpu) | 3581 | unsigned long scale_rt_power(int cpu) |
3725 | { | 3582 | { |
3726 | struct rq *rq = cpu_rq(cpu); | 3583 | struct rq *rq = cpu_rq(cpu); |
@@ -3745,10 +3602,19 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
3745 | unsigned long power = SCHED_LOAD_SCALE; | 3602 | unsigned long power = SCHED_LOAD_SCALE; |
3746 | struct sched_group *sdg = sd->groups; | 3603 | struct sched_group *sdg = sd->groups; |
3747 | 3604 | ||
3748 | /* here we could scale based on cpufreq */ | 3605 | if (sched_feat(ARCH_POWER)) |
3606 | power *= arch_scale_freq_power(sd, cpu); | ||
3607 | else | ||
3608 | power *= default_scale_freq_power(sd, cpu); | ||
3609 | |||
3610 | power >>= SCHED_LOAD_SHIFT; | ||
3749 | 3611 | ||
3750 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | 3612 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { |
3751 | power *= arch_scale_smt_power(sd, cpu); | 3613 | if (sched_feat(ARCH_POWER)) |
3614 | power *= arch_scale_smt_power(sd, cpu); | ||
3615 | else | ||
3616 | power *= default_scale_smt_power(sd, cpu); | ||
3617 | |||
3752 | power >>= SCHED_LOAD_SHIFT; | 3618 | power >>= SCHED_LOAD_SHIFT; |
3753 | } | 3619 | } |
3754 | 3620 | ||
@@ -4161,26 +4027,6 @@ ret: | |||
4161 | return NULL; | 4027 | return NULL; |
4162 | } | 4028 | } |
4163 | 4029 | ||
4164 | static struct sched_group *group_of(int cpu) | ||
4165 | { | ||
4166 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
4167 | |||
4168 | if (!sd) | ||
4169 | return NULL; | ||
4170 | |||
4171 | return sd->groups; | ||
4172 | } | ||
4173 | |||
4174 | static unsigned long power_of(int cpu) | ||
4175 | { | ||
4176 | struct sched_group *group = group_of(cpu); | ||
4177 | |||
4178 | if (!group) | ||
4179 | return SCHED_LOAD_SCALE; | ||
4180 | |||
4181 | return group->cpu_power; | ||
4182 | } | ||
4183 | |||
4184 | /* | 4030 | /* |
4185 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 4031 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
4186 | */ | 4032 | */ |
@@ -5465,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5465 | #endif | 5311 | #endif |
5466 | } | 5312 | } |
5467 | 5313 | ||
5468 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 5314 | static void put_prev_task(struct rq *rq, struct task_struct *p) |
5469 | { | 5315 | { |
5470 | if (prev->state == TASK_RUNNING) { | 5316 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; |
5471 | u64 runtime = prev->se.sum_exec_runtime; | ||
5472 | 5317 | ||
5473 | runtime -= prev->se.prev_sum_exec_runtime; | 5318 | update_avg(&p->se.avg_running, runtime); |
5474 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5475 | 5319 | ||
5320 | if (p->state == TASK_RUNNING) { | ||
5476 | /* | 5321 | /* |
5477 | * In order to avoid avg_overlap growing stale when we are | 5322 | * In order to avoid avg_overlap growing stale when we are |
5478 | * indeed overlapping and hence not getting put to sleep, grow | 5323 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5482,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
5482 | * correlates to the amount of cache footprint a task can | 5327 | * correlates to the amount of cache footprint a task can |
5483 | * build up. | 5328 | * build up. |
5484 | */ | 5329 | */ |
5485 | update_avg(&prev->se.avg_overlap, runtime); | 5330 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); |
5331 | update_avg(&p->se.avg_overlap, runtime); | ||
5332 | } else { | ||
5333 | update_avg(&p->se.avg_running, 0); | ||
5486 | } | 5334 | } |
5487 | prev->sched_class->put_prev_task(rq, prev); | 5335 | p->sched_class->put_prev_task(rq, p); |
5488 | } | 5336 | } |
5489 | 5337 | ||
5490 | /* | 5338 | /* |
@@ -5716,10 +5564,10 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
5716 | 5564 | ||
5717 | #endif /* CONFIG_PREEMPT */ | 5565 | #endif /* CONFIG_PREEMPT */ |
5718 | 5566 | ||
5719 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, | 5567 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
5720 | void *key) | 5568 | void *key) |
5721 | { | 5569 | { |
5722 | return try_to_wake_up(curr->private, mode, sync); | 5570 | return try_to_wake_up(curr->private, mode, wake_flags); |
5723 | } | 5571 | } |
5724 | EXPORT_SYMBOL(default_wake_function); | 5572 | EXPORT_SYMBOL(default_wake_function); |
5725 | 5573 | ||
@@ -5733,14 +5581,14 @@ EXPORT_SYMBOL(default_wake_function); | |||
5733 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5581 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
5734 | */ | 5582 | */ |
5735 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5583 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
5736 | int nr_exclusive, int sync, void *key) | 5584 | int nr_exclusive, int wake_flags, void *key) |
5737 | { | 5585 | { |
5738 | wait_queue_t *curr, *next; | 5586 | wait_queue_t *curr, *next; |
5739 | 5587 | ||
5740 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { | 5588 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { |
5741 | unsigned flags = curr->flags; | 5589 | unsigned flags = curr->flags; |
5742 | 5590 | ||
5743 | if (curr->func(curr, mode, sync, key) && | 5591 | if (curr->func(curr, mode, wake_flags, key) && |
5744 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) | 5592 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) |
5745 | break; | 5593 | break; |
5746 | } | 5594 | } |
@@ -5801,16 +5649,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | |||
5801 | int nr_exclusive, void *key) | 5649 | int nr_exclusive, void *key) |
5802 | { | 5650 | { |
5803 | unsigned long flags; | 5651 | unsigned long flags; |
5804 | int sync = 1; | 5652 | int wake_flags = WF_SYNC; |
5805 | 5653 | ||
5806 | if (unlikely(!q)) | 5654 | if (unlikely(!q)) |
5807 | return; | 5655 | return; |
5808 | 5656 | ||
5809 | if (unlikely(!nr_exclusive)) | 5657 | if (unlikely(!nr_exclusive)) |
5810 | sync = 0; | 5658 | wake_flags = 0; |
5811 | 5659 | ||
5812 | spin_lock_irqsave(&q->lock, flags); | 5660 | spin_lock_irqsave(&q->lock, flags); |
5813 | __wake_up_common(q, mode, nr_exclusive, sync, key); | 5661 | __wake_up_common(q, mode, nr_exclusive, wake_flags, key); |
5814 | spin_unlock_irqrestore(&q->lock, flags); | 5662 | spin_unlock_irqrestore(&q->lock, flags); |
5815 | } | 5663 | } |
5816 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); | 5664 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); |
@@ -8000,9 +7848,7 @@ static int sd_degenerate(struct sched_domain *sd) | |||
8000 | } | 7848 | } |
8001 | 7849 | ||
8002 | /* Following flags don't use groups */ | 7850 | /* Following flags don't use groups */ |
8003 | if (sd->flags & (SD_WAKE_IDLE | | 7851 | if (sd->flags & (SD_WAKE_AFFINE)) |
8004 | SD_WAKE_AFFINE | | ||
8005 | SD_WAKE_BALANCE)) | ||
8006 | return 0; | 7852 | return 0; |
8007 | 7853 | ||
8008 | return 1; | 7854 | return 1; |
@@ -8019,10 +7865,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
8019 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) | 7865 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) |
8020 | return 0; | 7866 | return 0; |
8021 | 7867 | ||
8022 | /* Does parent contain flags not in child? */ | ||
8023 | /* WAKE_BALANCE is a subset of WAKE_AFFINE */ | ||
8024 | if (cflags & SD_WAKE_AFFINE) | ||
8025 | pflags &= ~SD_WAKE_BALANCE; | ||
8026 | /* Flags needing groups don't count if only 1 group in parent */ | 7868 | /* Flags needing groups don't count if only 1 group in parent */ |
8027 | if (parent->groups == parent->groups->next) { | 7869 | if (parent->groups == parent->groups->next) { |
8028 | pflags &= ~(SD_LOAD_BALANCE | | 7870 | pflags &= ~(SD_LOAD_BALANCE | |
@@ -8708,10 +8550,10 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
8708 | request = attr->relax_domain_level; | 8550 | request = attr->relax_domain_level; |
8709 | if (request < sd->level) { | 8551 | if (request < sd->level) { |
8710 | /* turn off idle balance on this domain */ | 8552 | /* turn off idle balance on this domain */ |
8711 | sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); | 8553 | sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
8712 | } else { | 8554 | } else { |
8713 | /* turn on idle balance on this domain */ | 8555 | /* turn on idle balance on this domain */ |
8714 | sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); | 8556 | sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
8715 | } | 8557 | } |
8716 | } | 8558 | } |
8717 | 8559 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5ddbd0891267..efb84409bc43 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
395 | PN(se.sum_exec_runtime); | 395 | PN(se.sum_exec_runtime); |
396 | PN(se.avg_overlap); | 396 | PN(se.avg_overlap); |
397 | PN(se.avg_wakeup); | 397 | PN(se.avg_wakeup); |
398 | PN(se.avg_running); | ||
398 | 399 | ||
399 | nr_switches = p->nvcsw + p->nivcsw; | 400 | nr_switches = p->nvcsw + p->nivcsw; |
400 | 401 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index aa7f84121016..10d218ab69f2 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -711,7 +711,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
711 | 711 | ||
712 | if (!initial) { | 712 | if (!initial) { |
713 | /* sleeps upto a single latency don't count. */ | 713 | /* sleeps upto a single latency don't count. */ |
714 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 714 | if (sched_feat(FAIR_SLEEPERS)) { |
715 | unsigned long thresh = sysctl_sched_latency; | 715 | unsigned long thresh = sysctl_sched_latency; |
716 | 716 | ||
717 | /* | 717 | /* |
@@ -725,6 +725,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
725 | task_of(se)->policy != SCHED_IDLE)) | 725 | task_of(se)->policy != SCHED_IDLE)) |
726 | thresh = calc_delta_fair(thresh, se); | 726 | thresh = calc_delta_fair(thresh, se); |
727 | 727 | ||
728 | /* | ||
729 | * Halve their sleep time's effect, to allow | ||
730 | * for a gentler effect of sleepers: | ||
731 | */ | ||
732 | if (sched_feat(GENTLE_FAIR_SLEEPERS)) | ||
733 | thresh >>= 1; | ||
734 | |||
728 | vruntime -= thresh; | 735 | vruntime -= thresh; |
729 | } | 736 | } |
730 | } | 737 | } |
@@ -757,10 +764,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
757 | 764 | ||
758 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 765 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
759 | { | 766 | { |
760 | if (cfs_rq->last == se) | 767 | if (!se || cfs_rq->last == se) |
761 | cfs_rq->last = NULL; | 768 | cfs_rq->last = NULL; |
762 | 769 | ||
763 | if (cfs_rq->next == se) | 770 | if (!se || cfs_rq->next == se) |
764 | cfs_rq->next = NULL; | 771 | cfs_rq->next = NULL; |
765 | } | 772 | } |
766 | 773 | ||
@@ -1062,83 +1069,6 @@ static void yield_task_fair(struct rq *rq) | |||
1062 | se->vruntime = rightmost->vruntime + 1; | 1069 | se->vruntime = rightmost->vruntime + 1; |
1063 | } | 1070 | } |
1064 | 1071 | ||
1065 | /* | ||
1066 | * wake_idle() will wake a task on an idle cpu if task->cpu is | ||
1067 | * not idle and an idle cpu is available. The span of cpus to | ||
1068 | * search starts with cpus closest then further out as needed, | ||
1069 | * so we always favor a closer, idle cpu. | ||
1070 | * Domains may include CPUs that are not usable for migration, | ||
1071 | * hence we need to mask them out (rq->rd->online) | ||
1072 | * | ||
1073 | * Returns the CPU we should wake onto. | ||
1074 | */ | ||
1075 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | ||
1076 | |||
1077 | #define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online) | ||
1078 | |||
1079 | static int wake_idle(int cpu, struct task_struct *p) | ||
1080 | { | ||
1081 | struct sched_domain *sd; | ||
1082 | int i; | ||
1083 | unsigned int chosen_wakeup_cpu; | ||
1084 | int this_cpu; | ||
1085 | struct rq *task_rq = task_rq(p); | ||
1086 | |||
1087 | /* | ||
1088 | * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu | ||
1089 | * are idle and this is not a kernel thread and this task's affinity | ||
1090 | * allows it to be moved to preferred cpu, then just move! | ||
1091 | */ | ||
1092 | |||
1093 | this_cpu = smp_processor_id(); | ||
1094 | chosen_wakeup_cpu = | ||
1095 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; | ||
1096 | |||
1097 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && | ||
1098 | idle_cpu(cpu) && idle_cpu(this_cpu) && | ||
1099 | p->mm && !(p->flags & PF_KTHREAD) && | ||
1100 | cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) | ||
1101 | return chosen_wakeup_cpu; | ||
1102 | |||
1103 | /* | ||
1104 | * If it is idle, then it is the best cpu to run this task. | ||
1105 | * | ||
1106 | * This cpu is also the best, if it has more than one task already. | ||
1107 | * Siblings must be also busy(in most cases) as they didn't already | ||
1108 | * pickup the extra load from this cpu and hence we need not check | ||
1109 | * sibling runqueue info. This will avoid the checks and cache miss | ||
1110 | * penalities associated with that. | ||
1111 | */ | ||
1112 | if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1) | ||
1113 | return cpu; | ||
1114 | |||
1115 | for_each_domain(cpu, sd) { | ||
1116 | if ((sd->flags & SD_WAKE_IDLE) | ||
1117 | || ((sd->flags & SD_WAKE_IDLE_FAR) | ||
1118 | && !task_hot(p, task_rq->clock, sd))) { | ||
1119 | for_each_cpu_and(i, sched_domain_span(sd), | ||
1120 | &p->cpus_allowed) { | ||
1121 | if (cpu_rd_active(i, task_rq) && idle_cpu(i)) { | ||
1122 | if (i != task_cpu(p)) { | ||
1123 | schedstat_inc(p, | ||
1124 | se.nr_wakeups_idle); | ||
1125 | } | ||
1126 | return i; | ||
1127 | } | ||
1128 | } | ||
1129 | } else { | ||
1130 | break; | ||
1131 | } | ||
1132 | } | ||
1133 | return cpu; | ||
1134 | } | ||
1135 | #else /* !ARCH_HAS_SCHED_WAKE_IDLE*/ | ||
1136 | static inline int wake_idle(int cpu, struct task_struct *p) | ||
1137 | { | ||
1138 | return cpu; | ||
1139 | } | ||
1140 | #endif | ||
1141 | |||
1142 | #ifdef CONFIG_SMP | 1072 | #ifdef CONFIG_SMP |
1143 | 1073 | ||
1144 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1074 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1225,25 +1155,34 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
1225 | 1155 | ||
1226 | #endif | 1156 | #endif |
1227 | 1157 | ||
1228 | static int | 1158 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
1229 | wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | ||
1230 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
1231 | int idx, unsigned long load, unsigned long this_load, | ||
1232 | unsigned int imbalance) | ||
1233 | { | 1159 | { |
1234 | struct task_struct *curr = this_rq->curr; | 1160 | struct task_struct *curr = current; |
1235 | struct task_group *tg; | 1161 | unsigned long this_load, load; |
1236 | unsigned long tl = this_load; | 1162 | int idx, this_cpu, prev_cpu; |
1237 | unsigned long tl_per_task; | 1163 | unsigned long tl_per_task; |
1164 | unsigned int imbalance; | ||
1165 | struct task_group *tg; | ||
1238 | unsigned long weight; | 1166 | unsigned long weight; |
1239 | int balanced; | 1167 | int balanced; |
1240 | 1168 | ||
1241 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1169 | idx = sd->wake_idx; |
1242 | return 0; | 1170 | this_cpu = smp_processor_id(); |
1171 | prev_cpu = task_cpu(p); | ||
1172 | load = source_load(prev_cpu, idx); | ||
1173 | this_load = target_load(this_cpu, idx); | ||
1243 | 1174 | ||
1244 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || | 1175 | if (sync) { |
1245 | p->se.avg_overlap > sysctl_sched_migration_cost)) | 1176 | if (sched_feat(SYNC_LESS) && |
1246 | sync = 0; | 1177 | (curr->se.avg_overlap > sysctl_sched_migration_cost || |
1178 | p->se.avg_overlap > sysctl_sched_migration_cost)) | ||
1179 | sync = 0; | ||
1180 | } else { | ||
1181 | if (sched_feat(SYNC_MORE) && | ||
1182 | (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
1183 | p->se.avg_overlap < sysctl_sched_migration_cost)) | ||
1184 | sync = 1; | ||
1185 | } | ||
1247 | 1186 | ||
1248 | /* | 1187 | /* |
1249 | * If sync wakeup then subtract the (maximum possible) | 1188 | * If sync wakeup then subtract the (maximum possible) |
@@ -1254,24 +1193,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1254 | tg = task_group(current); | 1193 | tg = task_group(current); |
1255 | weight = current->se.load.weight; | 1194 | weight = current->se.load.weight; |
1256 | 1195 | ||
1257 | tl += effective_load(tg, this_cpu, -weight, -weight); | 1196 | this_load += effective_load(tg, this_cpu, -weight, -weight); |
1258 | load += effective_load(tg, prev_cpu, 0, -weight); | 1197 | load += effective_load(tg, prev_cpu, 0, -weight); |
1259 | } | 1198 | } |
1260 | 1199 | ||
1261 | tg = task_group(p); | 1200 | tg = task_group(p); |
1262 | weight = p->se.load.weight; | 1201 | weight = p->se.load.weight; |
1263 | 1202 | ||
1203 | imbalance = 100 + (sd->imbalance_pct - 100) / 2; | ||
1204 | |||
1264 | /* | 1205 | /* |
1265 | * In low-load situations, where prev_cpu is idle and this_cpu is idle | 1206 | * In low-load situations, where prev_cpu is idle and this_cpu is idle |
1266 | * due to the sync cause above having dropped tl to 0, we'll always have | 1207 | * due to the sync cause above having dropped this_load to 0, we'll |
1267 | * an imbalance, but there's really nothing you can do about that, so | 1208 | * always have an imbalance, but there's really nothing you can do |
1268 | * that's good too. | 1209 | * about that, so that's good too. |
1269 | * | 1210 | * |
1270 | * Otherwise check if either cpus are near enough in load to allow this | 1211 | * Otherwise check if either cpus are near enough in load to allow this |
1271 | * task to be woken on this_cpu. | 1212 | * task to be woken on this_cpu. |
1272 | */ | 1213 | */ |
1273 | balanced = !tl || | 1214 | balanced = !this_load || |
1274 | 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | 1215 | 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= |
1275 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | 1216 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); |
1276 | 1217 | ||
1277 | /* | 1218 | /* |
@@ -1285,14 +1226,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1285 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1226 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
1286 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1227 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
1287 | 1228 | ||
1288 | if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= | 1229 | if (balanced || |
1289 | tl_per_task)) { | 1230 | (this_load <= load && |
1231 | this_load + target_load(prev_cpu, idx) <= tl_per_task)) { | ||
1290 | /* | 1232 | /* |
1291 | * This domain has SD_WAKE_AFFINE and | 1233 | * This domain has SD_WAKE_AFFINE and |
1292 | * p is cache cold in this domain, and | 1234 | * p is cache cold in this domain, and |
1293 | * there is no bad imbalance. | 1235 | * there is no bad imbalance. |
1294 | */ | 1236 | */ |
1295 | schedstat_inc(this_sd, ttwu_move_affine); | 1237 | schedstat_inc(sd, ttwu_move_affine); |
1296 | schedstat_inc(p, se.nr_wakeups_affine); | 1238 | schedstat_inc(p, se.nr_wakeups_affine); |
1297 | 1239 | ||
1298 | return 1; | 1240 | return 1; |
@@ -1300,65 +1242,215 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1300 | return 0; | 1242 | return 0; |
1301 | } | 1243 | } |
1302 | 1244 | ||
1303 | static int select_task_rq_fair(struct task_struct *p, int sync) | 1245 | /* |
1246 | * find_idlest_group finds and returns the least busy CPU group within the | ||
1247 | * domain. | ||
1248 | */ | ||
1249 | static struct sched_group * | ||
1250 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, | ||
1251 | int this_cpu, int load_idx) | ||
1304 | { | 1252 | { |
1305 | struct sched_domain *sd, *this_sd = NULL; | 1253 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; |
1306 | int prev_cpu, this_cpu, new_cpu; | 1254 | unsigned long min_load = ULONG_MAX, this_load = 0; |
1307 | unsigned long load, this_load; | 1255 | int imbalance = 100 + (sd->imbalance_pct-100)/2; |
1308 | struct rq *this_rq; | ||
1309 | unsigned int imbalance; | ||
1310 | int idx; | ||
1311 | 1256 | ||
1312 | prev_cpu = task_cpu(p); | 1257 | do { |
1313 | this_cpu = smp_processor_id(); | 1258 | unsigned long load, avg_load; |
1314 | this_rq = cpu_rq(this_cpu); | 1259 | int local_group; |
1315 | new_cpu = prev_cpu; | 1260 | int i; |
1316 | 1261 | ||
1317 | /* | 1262 | /* Skip over this group if it has no CPUs allowed */ |
1318 | * 'this_sd' is the first domain that both | 1263 | if (!cpumask_intersects(sched_group_cpus(group), |
1319 | * this_cpu and prev_cpu are present in: | 1264 | &p->cpus_allowed)) |
1320 | */ | 1265 | continue; |
1321 | for_each_domain(this_cpu, sd) { | 1266 | |
1322 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { | 1267 | local_group = cpumask_test_cpu(this_cpu, |
1323 | this_sd = sd; | 1268 | sched_group_cpus(group)); |
1324 | break; | 1269 | |
1270 | /* Tally up the load of all CPUs in the group */ | ||
1271 | avg_load = 0; | ||
1272 | |||
1273 | for_each_cpu(i, sched_group_cpus(group)) { | ||
1274 | /* Bias balancing toward cpus of our domain */ | ||
1275 | if (local_group) | ||
1276 | load = source_load(i, load_idx); | ||
1277 | else | ||
1278 | load = target_load(i, load_idx); | ||
1279 | |||
1280 | avg_load += load; | ||
1281 | } | ||
1282 | |||
1283 | /* Adjust by relative CPU power of the group */ | ||
1284 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
1285 | |||
1286 | if (local_group) { | ||
1287 | this_load = avg_load; | ||
1288 | this = group; | ||
1289 | } else if (avg_load < min_load) { | ||
1290 | min_load = avg_load; | ||
1291 | idlest = group; | ||
1292 | } | ||
1293 | } while (group = group->next, group != sd->groups); | ||
1294 | |||
1295 | if (!idlest || 100*this_load < imbalance*min_load) | ||
1296 | return NULL; | ||
1297 | return idlest; | ||
1298 | } | ||
1299 | |||
1300 | /* | ||
1301 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
1302 | */ | ||
1303 | static int | ||
1304 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
1305 | { | ||
1306 | unsigned long load, min_load = ULONG_MAX; | ||
1307 | int idlest = -1; | ||
1308 | int i; | ||
1309 | |||
1310 | /* Traverse only the allowed CPUs */ | ||
1311 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
1312 | load = weighted_cpuload(i); | ||
1313 | |||
1314 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
1315 | min_load = load; | ||
1316 | idlest = i; | ||
1325 | } | 1317 | } |
1326 | } | 1318 | } |
1327 | 1319 | ||
1328 | if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) | 1320 | return idlest; |
1329 | goto out; | 1321 | } |
1330 | 1322 | ||
1331 | /* | 1323 | /* |
1332 | * Check for affine wakeup and passive balancing possibilities. | 1324 | * sched_balance_self: balance the current task (running on cpu) in domains |
1333 | */ | 1325 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and |
1334 | if (!this_sd) | 1326 | * SD_BALANCE_EXEC. |
1327 | * | ||
1328 | * Balance, ie. select the least loaded group. | ||
1329 | * | ||
1330 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
1331 | * | ||
1332 | * preempt must be disabled. | ||
1333 | */ | ||
1334 | static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | ||
1335 | { | ||
1336 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | ||
1337 | int cpu = smp_processor_id(); | ||
1338 | int prev_cpu = task_cpu(p); | ||
1339 | int new_cpu = cpu; | ||
1340 | int want_affine = 0; | ||
1341 | int want_sd = 1; | ||
1342 | int sync = wake_flags & WF_SYNC; | ||
1343 | |||
1344 | if (sd_flag & SD_BALANCE_WAKE) { | ||
1345 | if (sched_feat(AFFINE_WAKEUPS)) | ||
1346 | want_affine = 1; | ||
1347 | new_cpu = prev_cpu; | ||
1348 | } | ||
1349 | |||
1350 | rcu_read_lock(); | ||
1351 | for_each_domain(cpu, tmp) { | ||
1352 | /* | ||
1353 | * If power savings logic is enabled for a domain, see if we | ||
1354 | * are not overloaded, if so, don't balance wider. | ||
1355 | */ | ||
1356 | if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { | ||
1357 | unsigned long power = 0; | ||
1358 | unsigned long nr_running = 0; | ||
1359 | unsigned long capacity; | ||
1360 | int i; | ||
1361 | |||
1362 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
1363 | power += power_of(i); | ||
1364 | nr_running += cpu_rq(i)->cfs.nr_running; | ||
1365 | } | ||
1366 | |||
1367 | capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); | ||
1368 | |||
1369 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
1370 | nr_running /= 2; | ||
1371 | |||
1372 | if (nr_running < capacity) | ||
1373 | want_sd = 0; | ||
1374 | } | ||
1375 | |||
1376 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | ||
1377 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | ||
1378 | |||
1379 | affine_sd = tmp; | ||
1380 | want_affine = 0; | ||
1381 | } | ||
1382 | |||
1383 | if (!want_sd && !want_affine) | ||
1384 | break; | ||
1385 | |||
1386 | if (!(tmp->flags & sd_flag)) | ||
1387 | continue; | ||
1388 | |||
1389 | if (want_sd) | ||
1390 | sd = tmp; | ||
1391 | } | ||
1392 | |||
1393 | if (sched_feat(LB_SHARES_UPDATE)) { | ||
1394 | /* | ||
1395 | * Pick the largest domain to update shares over | ||
1396 | */ | ||
1397 | tmp = sd; | ||
1398 | if (affine_sd && (!tmp || | ||
1399 | cpumask_weight(sched_domain_span(affine_sd)) > | ||
1400 | cpumask_weight(sched_domain_span(sd)))) | ||
1401 | tmp = affine_sd; | ||
1402 | |||
1403 | if (tmp) | ||
1404 | update_shares(tmp); | ||
1405 | } | ||
1406 | |||
1407 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | ||
1408 | new_cpu = cpu; | ||
1335 | goto out; | 1409 | goto out; |
1410 | } | ||
1336 | 1411 | ||
1337 | idx = this_sd->wake_idx; | 1412 | while (sd) { |
1413 | int load_idx = sd->forkexec_idx; | ||
1414 | struct sched_group *group; | ||
1415 | int weight; | ||
1338 | 1416 | ||
1339 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | 1417 | if (!(sd->flags & sd_flag)) { |
1418 | sd = sd->child; | ||
1419 | continue; | ||
1420 | } | ||
1340 | 1421 | ||
1341 | load = source_load(prev_cpu, idx); | 1422 | if (sd_flag & SD_BALANCE_WAKE) |
1342 | this_load = target_load(this_cpu, idx); | 1423 | load_idx = sd->wake_idx; |
1343 | 1424 | ||
1344 | if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, | 1425 | group = find_idlest_group(sd, p, cpu, load_idx); |
1345 | load, this_load, imbalance)) | 1426 | if (!group) { |
1346 | return this_cpu; | 1427 | sd = sd->child; |
1428 | continue; | ||
1429 | } | ||
1347 | 1430 | ||
1348 | /* | 1431 | new_cpu = find_idlest_cpu(group, p, cpu); |
1349 | * Start passive balancing when half the imbalance_pct | 1432 | if (new_cpu == -1 || new_cpu == cpu) { |
1350 | * limit is reached. | 1433 | /* Now try balancing at a lower domain level of cpu */ |
1351 | */ | 1434 | sd = sd->child; |
1352 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1435 | continue; |
1353 | if (imbalance*this_load <= 100*load) { | ||
1354 | schedstat_inc(this_sd, ttwu_move_balance); | ||
1355 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1356 | return this_cpu; | ||
1357 | } | 1436 | } |
1437 | |||
1438 | /* Now try balancing at a lower domain level of new_cpu */ | ||
1439 | cpu = new_cpu; | ||
1440 | weight = cpumask_weight(sched_domain_span(sd)); | ||
1441 | sd = NULL; | ||
1442 | for_each_domain(cpu, tmp) { | ||
1443 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
1444 | break; | ||
1445 | if (tmp->flags & sd_flag) | ||
1446 | sd = tmp; | ||
1447 | } | ||
1448 | /* while loop will break here if sd == NULL */ | ||
1358 | } | 1449 | } |
1359 | 1450 | ||
1360 | out: | 1451 | out: |
1361 | return wake_idle(new_cpu, p); | 1452 | rcu_read_unlock(); |
1453 | return new_cpu; | ||
1362 | } | 1454 | } |
1363 | #endif /* CONFIG_SMP */ | 1455 | #endif /* CONFIG_SMP */ |
1364 | 1456 | ||
@@ -1471,11 +1563,12 @@ static void set_next_buddy(struct sched_entity *se) | |||
1471 | /* | 1563 | /* |
1472 | * Preempt the current task with a newly woken task if needed: | 1564 | * Preempt the current task with a newly woken task if needed: |
1473 | */ | 1565 | */ |
1474 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | 1566 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) |
1475 | { | 1567 | { |
1476 | struct task_struct *curr = rq->curr; | 1568 | struct task_struct *curr = rq->curr; |
1477 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1569 | struct sched_entity *se = &curr->se, *pse = &p->se; |
1478 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1570 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1571 | int sync = wake_flags & WF_SYNC; | ||
1479 | 1572 | ||
1480 | update_curr(cfs_rq); | 1573 | update_curr(cfs_rq); |
1481 | 1574 | ||
@@ -1501,7 +1594,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1501 | */ | 1594 | */ |
1502 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | 1595 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) |
1503 | set_last_buddy(se); | 1596 | set_last_buddy(se); |
1504 | set_next_buddy(pse); | 1597 | if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) |
1598 | set_next_buddy(pse); | ||
1505 | 1599 | ||
1506 | /* | 1600 | /* |
1507 | * We can come here with TIF_NEED_RESCHED already set from new task | 1601 | * We can come here with TIF_NEED_RESCHED already set from new task |
@@ -1523,16 +1617,25 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1523 | return; | 1617 | return; |
1524 | } | 1618 | } |
1525 | 1619 | ||
1526 | if (!sched_feat(WAKEUP_PREEMPT)) | 1620 | if ((sched_feat(WAKEUP_SYNC) && sync) || |
1527 | return; | 1621 | (sched_feat(WAKEUP_OVERLAP) && |
1528 | 1622 | (se->avg_overlap < sysctl_sched_migration_cost && | |
1529 | if (sched_feat(WAKEUP_OVERLAP) && (sync || | 1623 | pse->avg_overlap < sysctl_sched_migration_cost))) { |
1530 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
1531 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
1532 | resched_task(curr); | 1624 | resched_task(curr); |
1533 | return; | 1625 | return; |
1534 | } | 1626 | } |
1535 | 1627 | ||
1628 | if (sched_feat(WAKEUP_RUNNING)) { | ||
1629 | if (pse->avg_running < se->avg_running) { | ||
1630 | set_next_buddy(pse); | ||
1631 | resched_task(curr); | ||
1632 | return; | ||
1633 | } | ||
1634 | } | ||
1635 | |||
1636 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
1637 | return; | ||
1638 | |||
1536 | find_matching_se(&se, &pse); | 1639 | find_matching_se(&se, &pse); |
1537 | 1640 | ||
1538 | BUG_ON(!pse); | 1641 | BUG_ON(!pse); |
@@ -1555,8 +1658,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
1555 | /* | 1658 | /* |
1556 | * If se was a buddy, clear it so that it will have to earn | 1659 | * If se was a buddy, clear it so that it will have to earn |
1557 | * the favour again. | 1660 | * the favour again. |
1661 | * | ||
1662 | * If se was not a buddy, clear the buddies because neither | ||
1663 | * was elegible to run, let them earn it again. | ||
1664 | * | ||
1665 | * IOW. unconditionally clear buddies. | ||
1558 | */ | 1666 | */ |
1559 | __clear_buddies(cfs_rq, se); | 1667 | __clear_buddies(cfs_rq, NULL); |
1560 | set_next_entity(cfs_rq, se); | 1668 | set_next_entity(cfs_rq, se); |
1561 | cfs_rq = group_cfs_rq(se); | 1669 | cfs_rq = group_cfs_rq(se); |
1562 | } while (cfs_rq); | 1670 | } while (cfs_rq); |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index e2dc63a5815d..0d94083582c7 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -1,17 +1,123 @@ | |||
1 | SCHED_FEAT(NEW_FAIR_SLEEPERS, 0) | 1 | /* |
2 | * Disregards a certain amount of sleep time (sched_latency_ns) and | ||
3 | * considers the task to be running during that period. This gives it | ||
4 | * a service deficit on wakeup, allowing it to run sooner. | ||
5 | */ | ||
6 | SCHED_FEAT(FAIR_SLEEPERS, 1) | ||
7 | |||
8 | /* | ||
9 | * Only give sleepers 50% of their service deficit. This allows | ||
10 | * them to run sooner, but does not allow tons of sleepers to | ||
11 | * rip the spread apart. | ||
12 | */ | ||
13 | SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) | ||
14 | |||
15 | /* | ||
16 | * By not normalizing the sleep time, heavy tasks get an effective | ||
17 | * longer period, and lighter task an effective shorter period they | ||
18 | * are considered running. | ||
19 | */ | ||
2 | SCHED_FEAT(NORMALIZED_SLEEPER, 0) | 20 | SCHED_FEAT(NORMALIZED_SLEEPER, 0) |
3 | SCHED_FEAT(ADAPTIVE_GRAN, 1) | 21 | |
4 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | 22 | /* |
23 | * Place new tasks ahead so that they do not starve already running | ||
24 | * tasks | ||
25 | */ | ||
5 | SCHED_FEAT(START_DEBIT, 1) | 26 | SCHED_FEAT(START_DEBIT, 1) |
27 | |||
28 | /* | ||
29 | * Should wakeups try to preempt running tasks. | ||
30 | */ | ||
31 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | ||
32 | |||
33 | /* | ||
34 | * Compute wakeup_gran based on task behaviour, clipped to | ||
35 | * [0, sched_wakeup_gran_ns] | ||
36 | */ | ||
37 | SCHED_FEAT(ADAPTIVE_GRAN, 1) | ||
38 | |||
39 | /* | ||
40 | * When converting the wakeup granularity to virtual time, do it such | ||
41 | * that heavier tasks preempting a lighter task have an edge. | ||
42 | */ | ||
43 | SCHED_FEAT(ASYM_GRAN, 1) | ||
44 | |||
45 | /* | ||
46 | * Always wakeup-preempt SYNC wakeups, see SYNC_WAKEUPS. | ||
47 | */ | ||
48 | SCHED_FEAT(WAKEUP_SYNC, 0) | ||
49 | |||
50 | /* | ||
51 | * Wakeup preempt based on task behaviour. Tasks that do not overlap | ||
52 | * don't get preempted. | ||
53 | */ | ||
54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | ||
55 | |||
56 | /* | ||
57 | * Wakeup preemption towards tasks that run short | ||
58 | */ | ||
59 | SCHED_FEAT(WAKEUP_RUNNING, 0) | ||
60 | |||
61 | /* | ||
62 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate | ||
63 | * the remote end is likely to consume the data we just wrote, and | ||
64 | * therefore has cache benefit from being placed on the same cpu, see | ||
65 | * also AFFINE_WAKEUPS. | ||
66 | */ | ||
67 | SCHED_FEAT(SYNC_WAKEUPS, 1) | ||
68 | |||
69 | /* | ||
70 | * Based on load and program behaviour, see if it makes sense to place | ||
71 | * a newly woken task on the same cpu as the task that woke it -- | ||
72 | * improve cache locality. Typically used with SYNC wakeups as | ||
73 | * generated by pipes and the like, see also SYNC_WAKEUPS. | ||
74 | */ | ||
6 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 75 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
76 | |||
77 | /* | ||
78 | * Weaken SYNC hint based on overlap | ||
79 | */ | ||
80 | SCHED_FEAT(SYNC_LESS, 1) | ||
81 | |||
82 | /* | ||
83 | * Add SYNC hint based on overlap | ||
84 | */ | ||
85 | SCHED_FEAT(SYNC_MORE, 0) | ||
86 | |||
87 | /* | ||
88 | * Prefer to schedule the task we woke last (assuming it failed | ||
89 | * wakeup-preemption), since its likely going to consume data we | ||
90 | * touched, increases cache locality. | ||
91 | */ | ||
92 | SCHED_FEAT(NEXT_BUDDY, 0) | ||
93 | |||
94 | /* | ||
95 | * Prefer to schedule the task that ran last (when we did | ||
96 | * wake-preempt) as that likely will touch the same data, increases | ||
97 | * cache locality. | ||
98 | */ | ||
99 | SCHED_FEAT(LAST_BUDDY, 1) | ||
100 | |||
101 | /* | ||
102 | * Consider buddies to be cache hot, decreases the likelyness of a | ||
103 | * cache buddy being migrated away, increases cache locality. | ||
104 | */ | ||
7 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) | 105 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) |
8 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 106 | |
107 | /* | ||
108 | * Use arch dependent cpu power functions | ||
109 | */ | ||
110 | SCHED_FEAT(ARCH_POWER, 0) | ||
111 | |||
9 | SCHED_FEAT(HRTICK, 0) | 112 | SCHED_FEAT(HRTICK, 0) |
10 | SCHED_FEAT(DOUBLE_TICK, 0) | 113 | SCHED_FEAT(DOUBLE_TICK, 0) |
11 | SCHED_FEAT(ASYM_GRAN, 1) | ||
12 | SCHED_FEAT(LB_BIAS, 1) | 114 | SCHED_FEAT(LB_BIAS, 1) |
13 | SCHED_FEAT(LB_WAKEUP_UPDATE, 1) | 115 | SCHED_FEAT(LB_SHARES_UPDATE, 1) |
14 | SCHED_FEAT(ASYM_EFF_LOAD, 1) | 116 | SCHED_FEAT(ASYM_EFF_LOAD, 1) |
15 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | 117 | |
16 | SCHED_FEAT(LAST_BUDDY, 1) | 118 | /* |
119 | * Spin-wait on mutex acquisition when the mutex owner is running on | ||
120 | * another cpu -- assumes that when the owner is running, it will soon | ||
121 | * release the lock. Decreases scheduling overhead. | ||
122 | */ | ||
17 | SCHED_FEAT(OWNER_SPIN, 1) | 123 | SCHED_FEAT(OWNER_SPIN, 1) |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 499672c10cbd..a8b448af004b 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -6,7 +6,7 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
9 | static int select_task_rq_idle(struct task_struct *p, int sync) | 9 | static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) |
10 | { | 10 | { |
11 | return task_cpu(p); /* IDLE tasks as never migrated */ | 11 | return task_cpu(p); /* IDLE tasks as never migrated */ |
12 | } | 12 | } |
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync) | |||
14 | /* | 14 | /* |
15 | * Idle tasks are unconditionally rescheduled: | 15 | * Idle tasks are unconditionally rescheduled: |
16 | */ | 16 | */ |
17 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync) | 17 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags) |
18 | { | 18 | { |
19 | resched_task(rq->idle); | 19 | resched_task(rq->idle); |
20 | } | 20 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 2eb4bd6a526c..13de7126a6ab 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq) | |||
938 | #ifdef CONFIG_SMP | 938 | #ifdef CONFIG_SMP |
939 | static int find_lowest_rq(struct task_struct *task); | 939 | static int find_lowest_rq(struct task_struct *task); |
940 | 940 | ||
941 | static int select_task_rq_rt(struct task_struct *p, int sync) | 941 | static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) |
942 | { | 942 | { |
943 | struct rq *rq = task_rq(p); | 943 | struct rq *rq = task_rq(p); |
944 | 944 | ||
945 | if (sd_flag != SD_BALANCE_WAKE) | ||
946 | return smp_processor_id(); | ||
947 | |||
945 | /* | 948 | /* |
946 | * If the current task is an RT task, then | 949 | * If the current task is an RT task, then |
947 | * try to see if we can wake this RT task up on another | 950 | * try to see if we can wake this RT task up on another |
@@ -999,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
999 | /* | 1002 | /* |
1000 | * Preempt the current task with a newly woken task if needed: | 1003 | * Preempt the current task with a newly woken task if needed: |
1001 | */ | 1004 | */ |
1002 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) | 1005 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) |
1003 | { | 1006 | { |
1004 | if (p->prio < rq->curr->prio) { | 1007 | if (p->prio < rq->curr->prio) { |
1005 | resched_task(rq->curr); | 1008 | resched_task(rq->curr); |
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d365ae..2547d8813cf0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/ftrace.h> | 11 | #include <linux/ftrace.h> |
12 | #include <linux/string.h> | 12 | #include <linux/string.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/marker.h> | ||
15 | #include <linux/mutex.h> | 14 | #include <linux/mutex.h> |
16 | #include <linux/ctype.h> | 15 | #include <linux/ctype.h> |
17 | #include <linux/list.h> | 16 | #include <linux/list.h> |