diff options
author | Christoph Lameter <clameter@sgi.com> | 2008-02-14 15:05:41 -0500 |
---|---|---|
committer | Christoph Lameter <clameter@sgi.com> | 2008-02-14 15:05:41 -0500 |
commit | c5974932c1e8514d3478573bb52beebeb2c786dd (patch) | |
tree | a204156fbb0036fb76e89ceffa15a30e90bc3f75 /kernel | |
parent | 9e40ade04c45a46f6b3d647e0bdac1a32bfaa3a9 (diff) | |
parent | e760e716d47b48caf98da348368fd41b4a9b9e7e (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/marker.c | 677 | ||||
-rw-r--r-- | kernel/module.c | 7 | ||||
-rw-r--r-- | kernel/rcupdate.c | 5 | ||||
-rw-r--r-- | kernel/rtmutex.c | 5 | ||||
-rw-r--r-- | kernel/sched.c | 494 | ||||
-rw-r--r-- | kernel/sched_rt.c | 102 | ||||
-rw-r--r-- | kernel/signal.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 36 | ||||
-rw-r--r-- | kernel/timeconst.pl | 2 | ||||
-rw-r--r-- | kernel/user.c | 50 |
10 files changed, 1003 insertions, 377 deletions
diff --git a/kernel/marker.c b/kernel/marker.c index 5323cfaedbce..c4c2cd8b61f5 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
@@ -27,35 +27,42 @@ | |||
27 | extern struct marker __start___markers[]; | 27 | extern struct marker __start___markers[]; |
28 | extern struct marker __stop___markers[]; | 28 | extern struct marker __stop___markers[]; |
29 | 29 | ||
30 | /* Set to 1 to enable marker debug output */ | ||
31 | const int marker_debug; | ||
32 | |||
30 | /* | 33 | /* |
31 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin | 34 | * markers_mutex nests inside module_mutex. Markers mutex protects the builtin |
32 | * and module markers, the hash table and deferred_sync. | 35 | * and module markers and the hash table. |
33 | */ | 36 | */ |
34 | static DEFINE_MUTEX(markers_mutex); | 37 | static DEFINE_MUTEX(markers_mutex); |
35 | 38 | ||
36 | /* | 39 | /* |
37 | * Marker deferred synchronization. | ||
38 | * Upon marker probe_unregister, we delay call to synchronize_sched() to | ||
39 | * accelerate mass unregistration (only when there is no more reference to a | ||
40 | * given module do we call synchronize_sched()). However, we need to make sure | ||
41 | * every critical region has ended before we re-arm a marker that has been | ||
42 | * unregistered and then registered back with a different probe data. | ||
43 | */ | ||
44 | static int deferred_sync; | ||
45 | |||
46 | /* | ||
47 | * Marker hash table, containing the active markers. | 40 | * Marker hash table, containing the active markers. |
48 | * Protected by module_mutex. | 41 | * Protected by module_mutex. |
49 | */ | 42 | */ |
50 | #define MARKER_HASH_BITS 6 | 43 | #define MARKER_HASH_BITS 6 |
51 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) | 44 | #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) |
52 | 45 | ||
46 | /* | ||
47 | * Note about RCU : | ||
48 | * It is used to make sure every handler has finished using its private data | ||
49 | * between two consecutive operation (add or remove) on a given marker. It is | ||
50 | * also used to delay the free of multiple probes array until a quiescent state | ||
51 | * is reached. | ||
52 | * marker entries modifications are protected by the markers_mutex. | ||
53 | */ | ||
53 | struct marker_entry { | 54 | struct marker_entry { |
54 | struct hlist_node hlist; | 55 | struct hlist_node hlist; |
55 | char *format; | 56 | char *format; |
56 | marker_probe_func *probe; | 57 | void (*call)(const struct marker *mdata, /* Probe wrapper */ |
57 | void *private; | 58 | void *call_private, const char *fmt, ...); |
59 | struct marker_probe_closure single; | ||
60 | struct marker_probe_closure *multi; | ||
58 | int refcount; /* Number of times armed. 0 if disarmed. */ | 61 | int refcount; /* Number of times armed. 0 if disarmed. */ |
62 | struct rcu_head rcu; | ||
63 | void *oldptr; | ||
64 | char rcu_pending:1; | ||
65 | char ptype:1; | ||
59 | char name[0]; /* Contains name'\0'format'\0' */ | 66 | char name[0]; /* Contains name'\0'format'\0' */ |
60 | }; | 67 | }; |
61 | 68 | ||
@@ -63,7 +70,8 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | |||
63 | 70 | ||
64 | /** | 71 | /** |
65 | * __mark_empty_function - Empty probe callback | 72 | * __mark_empty_function - Empty probe callback |
66 | * @mdata: pointer of type const struct marker | 73 | * @probe_private: probe private data |
74 | * @call_private: call site private data | ||
67 | * @fmt: format string | 75 | * @fmt: format string |
68 | * @...: variable argument list | 76 | * @...: variable argument list |
69 | * | 77 | * |
@@ -72,13 +80,267 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; | |||
72 | * though the function pointer change and the marker enabling are two distinct | 80 | * though the function pointer change and the marker enabling are two distinct |
73 | * operations that modifies the execution flow of preemptible code. | 81 | * operations that modifies the execution flow of preemptible code. |
74 | */ | 82 | */ |
75 | void __mark_empty_function(const struct marker *mdata, void *private, | 83 | void __mark_empty_function(void *probe_private, void *call_private, |
76 | const char *fmt, ...) | 84 | const char *fmt, va_list *args) |
77 | { | 85 | { |
78 | } | 86 | } |
79 | EXPORT_SYMBOL_GPL(__mark_empty_function); | 87 | EXPORT_SYMBOL_GPL(__mark_empty_function); |
80 | 88 | ||
81 | /* | 89 | /* |
90 | * marker_probe_cb Callback that prepares the variable argument list for probes. | ||
91 | * @mdata: pointer of type struct marker | ||
92 | * @call_private: caller site private data | ||
93 | * @fmt: format string | ||
94 | * @...: Variable argument list. | ||
95 | * | ||
96 | * Since we do not use "typical" pointer based RCU in the 1 argument case, we | ||
97 | * need to put a full smp_rmb() in this branch. This is why we do not use | ||
98 | * rcu_dereference() for the pointer read. | ||
99 | */ | ||
100 | void marker_probe_cb(const struct marker *mdata, void *call_private, | ||
101 | const char *fmt, ...) | ||
102 | { | ||
103 | va_list args; | ||
104 | char ptype; | ||
105 | |||
106 | /* | ||
107 | * disabling preemption to make sure the teardown of the callbacks can | ||
108 | * be done correctly when they are in modules and they insure RCU read | ||
109 | * coherency. | ||
110 | */ | ||
111 | preempt_disable(); | ||
112 | ptype = ACCESS_ONCE(mdata->ptype); | ||
113 | if (likely(!ptype)) { | ||
114 | marker_probe_func *func; | ||
115 | /* Must read the ptype before ptr. They are not data dependant, | ||
116 | * so we put an explicit smp_rmb() here. */ | ||
117 | smp_rmb(); | ||
118 | func = ACCESS_ONCE(mdata->single.func); | ||
119 | /* Must read the ptr before private data. They are not data | ||
120 | * dependant, so we put an explicit smp_rmb() here. */ | ||
121 | smp_rmb(); | ||
122 | va_start(args, fmt); | ||
123 | func(mdata->single.probe_private, call_private, fmt, &args); | ||
124 | va_end(args); | ||
125 | } else { | ||
126 | struct marker_probe_closure *multi; | ||
127 | int i; | ||
128 | /* | ||
129 | * multi points to an array, therefore accessing the array | ||
130 | * depends on reading multi. However, even in this case, | ||
131 | * we must insure that the pointer is read _before_ the array | ||
132 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
133 | * in the fast path, so put the explicit barrier here. | ||
134 | */ | ||
135 | smp_read_barrier_depends(); | ||
136 | multi = ACCESS_ONCE(mdata->multi); | ||
137 | for (i = 0; multi[i].func; i++) { | ||
138 | va_start(args, fmt); | ||
139 | multi[i].func(multi[i].probe_private, call_private, fmt, | ||
140 | &args); | ||
141 | va_end(args); | ||
142 | } | ||
143 | } | ||
144 | preempt_enable(); | ||
145 | } | ||
146 | EXPORT_SYMBOL_GPL(marker_probe_cb); | ||
147 | |||
148 | /* | ||
149 | * marker_probe_cb Callback that does not prepare the variable argument list. | ||
150 | * @mdata: pointer of type struct marker | ||
151 | * @call_private: caller site private data | ||
152 | * @fmt: format string | ||
153 | * @...: Variable argument list. | ||
154 | * | ||
155 | * Should be connected to markers "MARK_NOARGS". | ||
156 | */ | ||
157 | void marker_probe_cb_noarg(const struct marker *mdata, | ||
158 | void *call_private, const char *fmt, ...) | ||
159 | { | ||
160 | va_list args; /* not initialized */ | ||
161 | char ptype; | ||
162 | |||
163 | preempt_disable(); | ||
164 | ptype = ACCESS_ONCE(mdata->ptype); | ||
165 | if (likely(!ptype)) { | ||
166 | marker_probe_func *func; | ||
167 | /* Must read the ptype before ptr. They are not data dependant, | ||
168 | * so we put an explicit smp_rmb() here. */ | ||
169 | smp_rmb(); | ||
170 | func = ACCESS_ONCE(mdata->single.func); | ||
171 | /* Must read the ptr before private data. They are not data | ||
172 | * dependant, so we put an explicit smp_rmb() here. */ | ||
173 | smp_rmb(); | ||
174 | func(mdata->single.probe_private, call_private, fmt, &args); | ||
175 | } else { | ||
176 | struct marker_probe_closure *multi; | ||
177 | int i; | ||
178 | /* | ||
179 | * multi points to an array, therefore accessing the array | ||
180 | * depends on reading multi. However, even in this case, | ||
181 | * we must insure that the pointer is read _before_ the array | ||
182 | * data. Same as rcu_dereference, but we need a full smp_rmb() | ||
183 | * in the fast path, so put the explicit barrier here. | ||
184 | */ | ||
185 | smp_read_barrier_depends(); | ||
186 | multi = ACCESS_ONCE(mdata->multi); | ||
187 | for (i = 0; multi[i].func; i++) | ||
188 | multi[i].func(multi[i].probe_private, call_private, fmt, | ||
189 | &args); | ||
190 | } | ||
191 | preempt_enable(); | ||
192 | } | ||
193 | EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); | ||
194 | |||
195 | static void free_old_closure(struct rcu_head *head) | ||
196 | { | ||
197 | struct marker_entry *entry = container_of(head, | ||
198 | struct marker_entry, rcu); | ||
199 | kfree(entry->oldptr); | ||
200 | /* Make sure we free the data before setting the pending flag to 0 */ | ||
201 | smp_wmb(); | ||
202 | entry->rcu_pending = 0; | ||
203 | } | ||
204 | |||
205 | static void debug_print_probes(struct marker_entry *entry) | ||
206 | { | ||
207 | int i; | ||
208 | |||
209 | if (!marker_debug) | ||
210 | return; | ||
211 | |||
212 | if (!entry->ptype) { | ||
213 | printk(KERN_DEBUG "Single probe : %p %p\n", | ||
214 | entry->single.func, | ||
215 | entry->single.probe_private); | ||
216 | } else { | ||
217 | for (i = 0; entry->multi[i].func; i++) | ||
218 | printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, | ||
219 | entry->multi[i].func, | ||
220 | entry->multi[i].probe_private); | ||
221 | } | ||
222 | } | ||
223 | |||
224 | static struct marker_probe_closure * | ||
225 | marker_entry_add_probe(struct marker_entry *entry, | ||
226 | marker_probe_func *probe, void *probe_private) | ||
227 | { | ||
228 | int nr_probes = 0; | ||
229 | struct marker_probe_closure *old, *new; | ||
230 | |||
231 | WARN_ON(!probe); | ||
232 | |||
233 | debug_print_probes(entry); | ||
234 | old = entry->multi; | ||
235 | if (!entry->ptype) { | ||
236 | if (entry->single.func == probe && | ||
237 | entry->single.probe_private == probe_private) | ||
238 | return ERR_PTR(-EBUSY); | ||
239 | if (entry->single.func == __mark_empty_function) { | ||
240 | /* 0 -> 1 probes */ | ||
241 | entry->single.func = probe; | ||
242 | entry->single.probe_private = probe_private; | ||
243 | entry->refcount = 1; | ||
244 | entry->ptype = 0; | ||
245 | debug_print_probes(entry); | ||
246 | return NULL; | ||
247 | } else { | ||
248 | /* 1 -> 2 probes */ | ||
249 | nr_probes = 1; | ||
250 | old = NULL; | ||
251 | } | ||
252 | } else { | ||
253 | /* (N -> N+1), (N != 0, 1) probes */ | ||
254 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) | ||
255 | if (old[nr_probes].func == probe | ||
256 | && old[nr_probes].probe_private | ||
257 | == probe_private) | ||
258 | return ERR_PTR(-EBUSY); | ||
259 | } | ||
260 | /* + 2 : one for new probe, one for NULL func */ | ||
261 | new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), | ||
262 | GFP_KERNEL); | ||
263 | if (new == NULL) | ||
264 | return ERR_PTR(-ENOMEM); | ||
265 | if (!old) | ||
266 | new[0] = entry->single; | ||
267 | else | ||
268 | memcpy(new, old, | ||
269 | nr_probes * sizeof(struct marker_probe_closure)); | ||
270 | new[nr_probes].func = probe; | ||
271 | new[nr_probes].probe_private = probe_private; | ||
272 | entry->refcount = nr_probes + 1; | ||
273 | entry->multi = new; | ||
274 | entry->ptype = 1; | ||
275 | debug_print_probes(entry); | ||
276 | return old; | ||
277 | } | ||
278 | |||
279 | static struct marker_probe_closure * | ||
280 | marker_entry_remove_probe(struct marker_entry *entry, | ||
281 | marker_probe_func *probe, void *probe_private) | ||
282 | { | ||
283 | int nr_probes = 0, nr_del = 0, i; | ||
284 | struct marker_probe_closure *old, *new; | ||
285 | |||
286 | old = entry->multi; | ||
287 | |||
288 | debug_print_probes(entry); | ||
289 | if (!entry->ptype) { | ||
290 | /* 0 -> N is an error */ | ||
291 | WARN_ON(entry->single.func == __mark_empty_function); | ||
292 | /* 1 -> 0 probes */ | ||
293 | WARN_ON(probe && entry->single.func != probe); | ||
294 | WARN_ON(entry->single.probe_private != probe_private); | ||
295 | entry->single.func = __mark_empty_function; | ||
296 | entry->refcount = 0; | ||
297 | entry->ptype = 0; | ||
298 | debug_print_probes(entry); | ||
299 | return NULL; | ||
300 | } else { | ||
301 | /* (N -> M), (N > 1, M >= 0) probes */ | ||
302 | for (nr_probes = 0; old[nr_probes].func; nr_probes++) { | ||
303 | if ((!probe || old[nr_probes].func == probe) | ||
304 | && old[nr_probes].probe_private | ||
305 | == probe_private) | ||
306 | nr_del++; | ||
307 | } | ||
308 | } | ||
309 | |||
310 | if (nr_probes - nr_del == 0) { | ||
311 | /* N -> 0, (N > 1) */ | ||
312 | entry->single.func = __mark_empty_function; | ||
313 | entry->refcount = 0; | ||
314 | entry->ptype = 0; | ||
315 | } else if (nr_probes - nr_del == 1) { | ||
316 | /* N -> 1, (N > 1) */ | ||
317 | for (i = 0; old[i].func; i++) | ||
318 | if ((probe && old[i].func != probe) || | ||
319 | old[i].probe_private != probe_private) | ||
320 | entry->single = old[i]; | ||
321 | entry->refcount = 1; | ||
322 | entry->ptype = 0; | ||
323 | } else { | ||
324 | int j = 0; | ||
325 | /* N -> M, (N > 1, M > 1) */ | ||
326 | /* + 1 for NULL */ | ||
327 | new = kzalloc((nr_probes - nr_del + 1) | ||
328 | * sizeof(struct marker_probe_closure), GFP_KERNEL); | ||
329 | if (new == NULL) | ||
330 | return ERR_PTR(-ENOMEM); | ||
331 | for (i = 0; old[i].func; i++) | ||
332 | if ((probe && old[i].func != probe) || | ||
333 | old[i].probe_private != probe_private) | ||
334 | new[j++] = old[i]; | ||
335 | entry->refcount = nr_probes - nr_del; | ||
336 | entry->ptype = 1; | ||
337 | entry->multi = new; | ||
338 | } | ||
339 | debug_print_probes(entry); | ||
340 | return old; | ||
341 | } | ||
342 | |||
343 | /* | ||
82 | * Get marker if the marker is present in the marker hash table. | 344 | * Get marker if the marker is present in the marker hash table. |
83 | * Must be called with markers_mutex held. | 345 | * Must be called with markers_mutex held. |
84 | * Returns NULL if not present. | 346 | * Returns NULL if not present. |
@@ -102,8 +364,7 @@ static struct marker_entry *get_marker(const char *name) | |||
102 | * Add the marker to the marker hash table. Must be called with markers_mutex | 364 | * Add the marker to the marker hash table. Must be called with markers_mutex |
103 | * held. | 365 | * held. |
104 | */ | 366 | */ |
105 | static int add_marker(const char *name, const char *format, | 367 | static struct marker_entry *add_marker(const char *name, const char *format) |
106 | marker_probe_func *probe, void *private) | ||
107 | { | 368 | { |
108 | struct hlist_head *head; | 369 | struct hlist_head *head; |
109 | struct hlist_node *node; | 370 | struct hlist_node *node; |
@@ -118,9 +379,8 @@ static int add_marker(const char *name, const char *format, | |||
118 | hlist_for_each_entry(e, node, head, hlist) { | 379 | hlist_for_each_entry(e, node, head, hlist) { |
119 | if (!strcmp(name, e->name)) { | 380 | if (!strcmp(name, e->name)) { |
120 | printk(KERN_NOTICE | 381 | printk(KERN_NOTICE |
121 | "Marker %s busy, probe %p already installed\n", | 382 | "Marker %s busy\n", name); |
122 | name, e->probe); | 383 | return ERR_PTR(-EBUSY); /* Already there */ |
123 | return -EBUSY; /* Already there */ | ||
124 | } | 384 | } |
125 | } | 385 | } |
126 | /* | 386 | /* |
@@ -130,34 +390,42 @@ static int add_marker(const char *name, const char *format, | |||
130 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | 390 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, |
131 | GFP_KERNEL); | 391 | GFP_KERNEL); |
132 | if (!e) | 392 | if (!e) |
133 | return -ENOMEM; | 393 | return ERR_PTR(-ENOMEM); |
134 | memcpy(&e->name[0], name, name_len); | 394 | memcpy(&e->name[0], name, name_len); |
135 | if (format) { | 395 | if (format) { |
136 | e->format = &e->name[name_len]; | 396 | e->format = &e->name[name_len]; |
137 | memcpy(e->format, format, format_len); | 397 | memcpy(e->format, format, format_len); |
398 | if (strcmp(e->format, MARK_NOARGS) == 0) | ||
399 | e->call = marker_probe_cb_noarg; | ||
400 | else | ||
401 | e->call = marker_probe_cb; | ||
138 | trace_mark(core_marker_format, "name %s format %s", | 402 | trace_mark(core_marker_format, "name %s format %s", |
139 | e->name, e->format); | 403 | e->name, e->format); |
140 | } else | 404 | } else { |
141 | e->format = NULL; | 405 | e->format = NULL; |
142 | e->probe = probe; | 406 | e->call = marker_probe_cb; |
143 | e->private = private; | 407 | } |
408 | e->single.func = __mark_empty_function; | ||
409 | e->single.probe_private = NULL; | ||
410 | e->multi = NULL; | ||
411 | e->ptype = 0; | ||
144 | e->refcount = 0; | 412 | e->refcount = 0; |
413 | e->rcu_pending = 0; | ||
145 | hlist_add_head(&e->hlist, head); | 414 | hlist_add_head(&e->hlist, head); |
146 | return 0; | 415 | return e; |
147 | } | 416 | } |
148 | 417 | ||
149 | /* | 418 | /* |
150 | * Remove the marker from the marker hash table. Must be called with mutex_lock | 419 | * Remove the marker from the marker hash table. Must be called with mutex_lock |
151 | * held. | 420 | * held. |
152 | */ | 421 | */ |
153 | static void *remove_marker(const char *name) | 422 | static int remove_marker(const char *name) |
154 | { | 423 | { |
155 | struct hlist_head *head; | 424 | struct hlist_head *head; |
156 | struct hlist_node *node; | 425 | struct hlist_node *node; |
157 | struct marker_entry *e; | 426 | struct marker_entry *e; |
158 | int found = 0; | 427 | int found = 0; |
159 | size_t len = strlen(name) + 1; | 428 | size_t len = strlen(name) + 1; |
160 | void *private = NULL; | ||
161 | u32 hash = jhash(name, len-1, 0); | 429 | u32 hash = jhash(name, len-1, 0); |
162 | 430 | ||
163 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | 431 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; |
@@ -167,12 +435,16 @@ static void *remove_marker(const char *name) | |||
167 | break; | 435 | break; |
168 | } | 436 | } |
169 | } | 437 | } |
170 | if (found) { | 438 | if (!found) |
171 | private = e->private; | 439 | return -ENOENT; |
172 | hlist_del(&e->hlist); | 440 | if (e->single.func != __mark_empty_function) |
173 | kfree(e); | 441 | return -EBUSY; |
174 | } | 442 | hlist_del(&e->hlist); |
175 | return private; | 443 | /* Make sure the call_rcu has been executed */ |
444 | if (e->rcu_pending) | ||
445 | rcu_barrier(); | ||
446 | kfree(e); | ||
447 | return 0; | ||
176 | } | 448 | } |
177 | 449 | ||
178 | /* | 450 | /* |
@@ -184,6 +456,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
184 | size_t name_len = strlen((*entry)->name) + 1; | 456 | size_t name_len = strlen((*entry)->name) + 1; |
185 | size_t format_len = strlen(format) + 1; | 457 | size_t format_len = strlen(format) + 1; |
186 | 458 | ||
459 | |||
187 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, | 460 | e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, |
188 | GFP_KERNEL); | 461 | GFP_KERNEL); |
189 | if (!e) | 462 | if (!e) |
@@ -191,11 +464,20 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
191 | memcpy(&e->name[0], (*entry)->name, name_len); | 464 | memcpy(&e->name[0], (*entry)->name, name_len); |
192 | e->format = &e->name[name_len]; | 465 | e->format = &e->name[name_len]; |
193 | memcpy(e->format, format, format_len); | 466 | memcpy(e->format, format, format_len); |
194 | e->probe = (*entry)->probe; | 467 | if (strcmp(e->format, MARK_NOARGS) == 0) |
195 | e->private = (*entry)->private; | 468 | e->call = marker_probe_cb_noarg; |
469 | else | ||
470 | e->call = marker_probe_cb; | ||
471 | e->single = (*entry)->single; | ||
472 | e->multi = (*entry)->multi; | ||
473 | e->ptype = (*entry)->ptype; | ||
196 | e->refcount = (*entry)->refcount; | 474 | e->refcount = (*entry)->refcount; |
475 | e->rcu_pending = 0; | ||
197 | hlist_add_before(&e->hlist, &(*entry)->hlist); | 476 | hlist_add_before(&e->hlist, &(*entry)->hlist); |
198 | hlist_del(&(*entry)->hlist); | 477 | hlist_del(&(*entry)->hlist); |
478 | /* Make sure the call_rcu has been executed */ | ||
479 | if ((*entry)->rcu_pending) | ||
480 | rcu_barrier(); | ||
199 | kfree(*entry); | 481 | kfree(*entry); |
200 | *entry = e; | 482 | *entry = e; |
201 | trace_mark(core_marker_format, "name %s format %s", | 483 | trace_mark(core_marker_format, "name %s format %s", |
@@ -206,7 +488,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format) | |||
206 | /* | 488 | /* |
207 | * Sets the probe callback corresponding to one marker. | 489 | * Sets the probe callback corresponding to one marker. |
208 | */ | 490 | */ |
209 | static int set_marker(struct marker_entry **entry, struct marker *elem) | 491 | static int set_marker(struct marker_entry **entry, struct marker *elem, |
492 | int active) | ||
210 | { | 493 | { |
211 | int ret; | 494 | int ret; |
212 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | 495 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); |
@@ -226,9 +509,43 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) | |||
226 | if (ret) | 509 | if (ret) |
227 | return ret; | 510 | return ret; |
228 | } | 511 | } |
229 | elem->call = (*entry)->probe; | 512 | |
230 | elem->private = (*entry)->private; | 513 | /* |
231 | elem->state = 1; | 514 | * probe_cb setup (statically known) is done here. It is |
515 | * asynchronous with the rest of execution, therefore we only | ||
516 | * pass from a "safe" callback (with argument) to an "unsafe" | ||
517 | * callback (does not set arguments). | ||
518 | */ | ||
519 | elem->call = (*entry)->call; | ||
520 | /* | ||
521 | * Sanity check : | ||
522 | * We only update the single probe private data when the ptr is | ||
523 | * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) | ||
524 | */ | ||
525 | WARN_ON(elem->single.func != __mark_empty_function | ||
526 | && elem->single.probe_private | ||
527 | != (*entry)->single.probe_private && | ||
528 | !elem->ptype); | ||
529 | elem->single.probe_private = (*entry)->single.probe_private; | ||
530 | /* | ||
531 | * Make sure the private data is valid when we update the | ||
532 | * single probe ptr. | ||
533 | */ | ||
534 | smp_wmb(); | ||
535 | elem->single.func = (*entry)->single.func; | ||
536 | /* | ||
537 | * We also make sure that the new probe callbacks array is consistent | ||
538 | * before setting a pointer to it. | ||
539 | */ | ||
540 | rcu_assign_pointer(elem->multi, (*entry)->multi); | ||
541 | /* | ||
542 | * Update the function or multi probe array pointer before setting the | ||
543 | * ptype. | ||
544 | */ | ||
545 | smp_wmb(); | ||
546 | elem->ptype = (*entry)->ptype; | ||
547 | elem->state = active; | ||
548 | |||
232 | return 0; | 549 | return 0; |
233 | } | 550 | } |
234 | 551 | ||
@@ -240,8 +557,12 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) | |||
240 | */ | 557 | */ |
241 | static void disable_marker(struct marker *elem) | 558 | static void disable_marker(struct marker *elem) |
242 | { | 559 | { |
560 | /* leave "call" as is. It is known statically. */ | ||
243 | elem->state = 0; | 561 | elem->state = 0; |
244 | elem->call = __mark_empty_function; | 562 | elem->single.func = __mark_empty_function; |
563 | /* Update the function before setting the ptype */ | ||
564 | smp_wmb(); | ||
565 | elem->ptype = 0; /* single probe */ | ||
245 | /* | 566 | /* |
246 | * Leave the private data and id there, because removal is racy and | 567 | * Leave the private data and id there, because removal is racy and |
247 | * should be done only after a synchronize_sched(). These are never used | 568 | * should be done only after a synchronize_sched(). These are never used |
@@ -253,14 +574,11 @@ static void disable_marker(struct marker *elem) | |||
253 | * marker_update_probe_range - Update a probe range | 574 | * marker_update_probe_range - Update a probe range |
254 | * @begin: beginning of the range | 575 | * @begin: beginning of the range |
255 | * @end: end of the range | 576 | * @end: end of the range |
256 | * @probe_module: module address of the probe being updated | ||
257 | * @refcount: number of references left to the given probe_module (out) | ||
258 | * | 577 | * |
259 | * Updates the probe callback corresponding to a range of markers. | 578 | * Updates the probe callback corresponding to a range of markers. |
260 | */ | 579 | */ |
261 | void marker_update_probe_range(struct marker *begin, | 580 | void marker_update_probe_range(struct marker *begin, |
262 | struct marker *end, struct module *probe_module, | 581 | struct marker *end) |
263 | int *refcount) | ||
264 | { | 582 | { |
265 | struct marker *iter; | 583 | struct marker *iter; |
266 | struct marker_entry *mark_entry; | 584 | struct marker_entry *mark_entry; |
@@ -268,15 +586,12 @@ void marker_update_probe_range(struct marker *begin, | |||
268 | mutex_lock(&markers_mutex); | 586 | mutex_lock(&markers_mutex); |
269 | for (iter = begin; iter < end; iter++) { | 587 | for (iter = begin; iter < end; iter++) { |
270 | mark_entry = get_marker(iter->name); | 588 | mark_entry = get_marker(iter->name); |
271 | if (mark_entry && mark_entry->refcount) { | 589 | if (mark_entry) { |
272 | set_marker(&mark_entry, iter); | 590 | set_marker(&mark_entry, iter, |
591 | !!mark_entry->refcount); | ||
273 | /* | 592 | /* |
274 | * ignore error, continue | 593 | * ignore error, continue |
275 | */ | 594 | */ |
276 | if (probe_module) | ||
277 | if (probe_module == | ||
278 | __module_text_address((unsigned long)mark_entry->probe)) | ||
279 | (*refcount)++; | ||
280 | } else { | 595 | } else { |
281 | disable_marker(iter); | 596 | disable_marker(iter); |
282 | } | 597 | } |
@@ -289,20 +604,27 @@ void marker_update_probe_range(struct marker *begin, | |||
289 | * Issues a synchronize_sched() when no reference to the module passed | 604 | * Issues a synchronize_sched() when no reference to the module passed |
290 | * as parameter is found in the probes so the probe module can be | 605 | * as parameter is found in the probes so the probe module can be |
291 | * safely unloaded from now on. | 606 | * safely unloaded from now on. |
607 | * | ||
608 | * Internal callback only changed before the first probe is connected to it. | ||
609 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | ||
610 | * transitions. All other transitions will leave the old private data valid. | ||
611 | * This makes the non-atomicity of the callback/private data updates valid. | ||
612 | * | ||
613 | * "special case" updates : | ||
614 | * 0 -> 1 callback | ||
615 | * 1 -> 0 callback | ||
616 | * 1 -> 2 callbacks | ||
617 | * 2 -> 1 callbacks | ||
618 | * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. | ||
619 | * Site effect : marker_set_format may delete the marker entry (creating a | ||
620 | * replacement). | ||
292 | */ | 621 | */ |
293 | static void marker_update_probes(struct module *probe_module) | 622 | static void marker_update_probes(void) |
294 | { | 623 | { |
295 | int refcount = 0; | ||
296 | |||
297 | /* Core kernel markers */ | 624 | /* Core kernel markers */ |
298 | marker_update_probe_range(__start___markers, | 625 | marker_update_probe_range(__start___markers, __stop___markers); |
299 | __stop___markers, probe_module, &refcount); | ||
300 | /* Markers in modules. */ | 626 | /* Markers in modules. */ |
301 | module_update_markers(probe_module, &refcount); | 627 | module_update_markers(); |
302 | if (probe_module && refcount == 0) { | ||
303 | synchronize_sched(); | ||
304 | deferred_sync = 0; | ||
305 | } | ||
306 | } | 628 | } |
307 | 629 | ||
308 | /** | 630 | /** |
@@ -310,33 +632,49 @@ static void marker_update_probes(struct module *probe_module) | |||
310 | * @name: marker name | 632 | * @name: marker name |
311 | * @format: format string | 633 | * @format: format string |
312 | * @probe: probe handler | 634 | * @probe: probe handler |
313 | * @private: probe private data | 635 | * @probe_private: probe private data |
314 | * | 636 | * |
315 | * private data must be a valid allocated memory address, or NULL. | 637 | * private data must be a valid allocated memory address, or NULL. |
316 | * Returns 0 if ok, error value on error. | 638 | * Returns 0 if ok, error value on error. |
639 | * The probe address must at least be aligned on the architecture pointer size. | ||
317 | */ | 640 | */ |
318 | int marker_probe_register(const char *name, const char *format, | 641 | int marker_probe_register(const char *name, const char *format, |
319 | marker_probe_func *probe, void *private) | 642 | marker_probe_func *probe, void *probe_private) |
320 | { | 643 | { |
321 | struct marker_entry *entry; | 644 | struct marker_entry *entry; |
322 | int ret = 0; | 645 | int ret = 0; |
646 | struct marker_probe_closure *old; | ||
323 | 647 | ||
324 | mutex_lock(&markers_mutex); | 648 | mutex_lock(&markers_mutex); |
325 | entry = get_marker(name); | 649 | entry = get_marker(name); |
326 | if (entry && entry->refcount) { | 650 | if (!entry) { |
327 | ret = -EBUSY; | 651 | entry = add_marker(name, format); |
328 | goto end; | 652 | if (IS_ERR(entry)) { |
329 | } | 653 | ret = PTR_ERR(entry); |
330 | if (deferred_sync) { | 654 | goto end; |
331 | synchronize_sched(); | 655 | } |
332 | deferred_sync = 0; | ||
333 | } | 656 | } |
334 | ret = add_marker(name, format, probe, private); | 657 | /* |
335 | if (ret) | 658 | * If we detect that a call_rcu is pending for this marker, |
659 | * make sure it's executed now. | ||
660 | */ | ||
661 | if (entry->rcu_pending) | ||
662 | rcu_barrier(); | ||
663 | old = marker_entry_add_probe(entry, probe, probe_private); | ||
664 | if (IS_ERR(old)) { | ||
665 | ret = PTR_ERR(old); | ||
336 | goto end; | 666 | goto end; |
667 | } | ||
337 | mutex_unlock(&markers_mutex); | 668 | mutex_unlock(&markers_mutex); |
338 | marker_update_probes(NULL); | 669 | marker_update_probes(); /* may update entry */ |
339 | return ret; | 670 | mutex_lock(&markers_mutex); |
671 | entry = get_marker(name); | ||
672 | WARN_ON(!entry); | ||
673 | entry->oldptr = old; | ||
674 | entry->rcu_pending = 1; | ||
675 | /* write rcu_pending before calling the RCU callback */ | ||
676 | smp_wmb(); | ||
677 | call_rcu(&entry->rcu, free_old_closure); | ||
340 | end: | 678 | end: |
341 | mutex_unlock(&markers_mutex); | 679 | mutex_unlock(&markers_mutex); |
342 | return ret; | 680 | return ret; |
@@ -346,171 +684,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register); | |||
346 | /** | 684 | /** |
347 | * marker_probe_unregister - Disconnect a probe from a marker | 685 | * marker_probe_unregister - Disconnect a probe from a marker |
348 | * @name: marker name | 686 | * @name: marker name |
687 | * @probe: probe function pointer | ||
688 | * @probe_private: probe private data | ||
349 | * | 689 | * |
350 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | 690 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). |
691 | * We do not need to call a synchronize_sched to make sure the probes have | ||
692 | * finished running before doing a module unload, because the module unload | ||
693 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
694 | * have finished. | ||
351 | */ | 695 | */ |
352 | void *marker_probe_unregister(const char *name) | 696 | int marker_probe_unregister(const char *name, |
697 | marker_probe_func *probe, void *probe_private) | ||
353 | { | 698 | { |
354 | struct module *probe_module; | ||
355 | struct marker_entry *entry; | 699 | struct marker_entry *entry; |
356 | void *private; | 700 | struct marker_probe_closure *old; |
701 | int ret = 0; | ||
357 | 702 | ||
358 | mutex_lock(&markers_mutex); | 703 | mutex_lock(&markers_mutex); |
359 | entry = get_marker(name); | 704 | entry = get_marker(name); |
360 | if (!entry) { | 705 | if (!entry) { |
361 | private = ERR_PTR(-ENOENT); | 706 | ret = -ENOENT; |
362 | goto end; | 707 | goto end; |
363 | } | 708 | } |
364 | entry->refcount = 0; | 709 | if (entry->rcu_pending) |
365 | /* In what module is the probe handler ? */ | 710 | rcu_barrier(); |
366 | probe_module = __module_text_address((unsigned long)entry->probe); | 711 | old = marker_entry_remove_probe(entry, probe, probe_private); |
367 | private = remove_marker(name); | ||
368 | deferred_sync = 1; | ||
369 | mutex_unlock(&markers_mutex); | 712 | mutex_unlock(&markers_mutex); |
370 | marker_update_probes(probe_module); | 713 | marker_update_probes(); /* may update entry */ |
371 | return private; | 714 | mutex_lock(&markers_mutex); |
715 | entry = get_marker(name); | ||
716 | entry->oldptr = old; | ||
717 | entry->rcu_pending = 1; | ||
718 | /* write rcu_pending before calling the RCU callback */ | ||
719 | smp_wmb(); | ||
720 | call_rcu(&entry->rcu, free_old_closure); | ||
721 | remove_marker(name); /* Ignore busy error message */ | ||
372 | end: | 722 | end: |
373 | mutex_unlock(&markers_mutex); | 723 | mutex_unlock(&markers_mutex); |
374 | return private; | 724 | return ret; |
375 | } | 725 | } |
376 | EXPORT_SYMBOL_GPL(marker_probe_unregister); | 726 | EXPORT_SYMBOL_GPL(marker_probe_unregister); |
377 | 727 | ||
378 | /** | 728 | static struct marker_entry * |
379 | * marker_probe_unregister_private_data - Disconnect a probe from a marker | 729 | get_marker_from_private_data(marker_probe_func *probe, void *probe_private) |
380 | * @private: probe private data | ||
381 | * | ||
382 | * Unregister a marker by providing the registered private data. | ||
383 | * Returns the private data given to marker_probe_register, or an ERR_PTR(). | ||
384 | */ | ||
385 | void *marker_probe_unregister_private_data(void *private) | ||
386 | { | 730 | { |
387 | struct module *probe_module; | ||
388 | struct hlist_head *head; | ||
389 | struct hlist_node *node; | ||
390 | struct marker_entry *entry; | 731 | struct marker_entry *entry; |
391 | int found = 0; | ||
392 | unsigned int i; | 732 | unsigned int i; |
733 | struct hlist_head *head; | ||
734 | struct hlist_node *node; | ||
393 | 735 | ||
394 | mutex_lock(&markers_mutex); | ||
395 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { | 736 | for (i = 0; i < MARKER_TABLE_SIZE; i++) { |
396 | head = &marker_table[i]; | 737 | head = &marker_table[i]; |
397 | hlist_for_each_entry(entry, node, head, hlist) { | 738 | hlist_for_each_entry(entry, node, head, hlist) { |
398 | if (entry->private == private) { | 739 | if (!entry->ptype) { |
399 | found = 1; | 740 | if (entry->single.func == probe |
400 | goto iter_end; | 741 | && entry->single.probe_private |
742 | == probe_private) | ||
743 | return entry; | ||
744 | } else { | ||
745 | struct marker_probe_closure *closure; | ||
746 | closure = entry->multi; | ||
747 | for (i = 0; closure[i].func; i++) { | ||
748 | if (closure[i].func == probe && | ||
749 | closure[i].probe_private | ||
750 | == probe_private) | ||
751 | return entry; | ||
752 | } | ||
401 | } | 753 | } |
402 | } | 754 | } |
403 | } | 755 | } |
404 | iter_end: | 756 | return NULL; |
405 | if (!found) { | ||
406 | private = ERR_PTR(-ENOENT); | ||
407 | goto end; | ||
408 | } | ||
409 | entry->refcount = 0; | ||
410 | /* In what module is the probe handler ? */ | ||
411 | probe_module = __module_text_address((unsigned long)entry->probe); | ||
412 | private = remove_marker(entry->name); | ||
413 | deferred_sync = 1; | ||
414 | mutex_unlock(&markers_mutex); | ||
415 | marker_update_probes(probe_module); | ||
416 | return private; | ||
417 | end: | ||
418 | mutex_unlock(&markers_mutex); | ||
419 | return private; | ||
420 | } | 757 | } |
421 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); | ||
422 | 758 | ||
423 | /** | 759 | /** |
424 | * marker_arm - Arm a marker | 760 | * marker_probe_unregister_private_data - Disconnect a probe from a marker |
425 | * @name: marker name | 761 | * @probe: probe function |
762 | * @probe_private: probe private data | ||
426 | * | 763 | * |
427 | * Activate a marker. It keeps a reference count of the number of | 764 | * Unregister a probe by providing the registered private data. |
428 | * arming/disarming done. | 765 | * Only removes the first marker found in hash table. |
429 | * Returns 0 if ok, error value on error. | 766 | * Return 0 on success or error value. |
767 | * We do not need to call a synchronize_sched to make sure the probes have | ||
768 | * finished running before doing a module unload, because the module unload | ||
769 | * itself uses stop_machine(), which insures that every preempt disabled section | ||
770 | * have finished. | ||
430 | */ | 771 | */ |
431 | int marker_arm(const char *name) | 772 | int marker_probe_unregister_private_data(marker_probe_func *probe, |
773 | void *probe_private) | ||
432 | { | 774 | { |
433 | struct marker_entry *entry; | 775 | struct marker_entry *entry; |
434 | int ret = 0; | 776 | int ret = 0; |
777 | struct marker_probe_closure *old; | ||
435 | 778 | ||
436 | mutex_lock(&markers_mutex); | 779 | mutex_lock(&markers_mutex); |
437 | entry = get_marker(name); | 780 | entry = get_marker_from_private_data(probe, probe_private); |
438 | if (!entry) { | 781 | if (!entry) { |
439 | ret = -ENOENT; | 782 | ret = -ENOENT; |
440 | goto end; | 783 | goto end; |
441 | } | 784 | } |
442 | /* | 785 | if (entry->rcu_pending) |
443 | * Only need to update probes when refcount passes from 0 to 1. | 786 | rcu_barrier(); |
444 | */ | 787 | old = marker_entry_remove_probe(entry, NULL, probe_private); |
445 | if (entry->refcount++) | ||
446 | goto end; | ||
447 | end: | ||
448 | mutex_unlock(&markers_mutex); | 788 | mutex_unlock(&markers_mutex); |
449 | marker_update_probes(NULL); | 789 | marker_update_probes(); /* may update entry */ |
450 | return ret; | ||
451 | } | ||
452 | EXPORT_SYMBOL_GPL(marker_arm); | ||
453 | |||
454 | /** | ||
455 | * marker_disarm - Disarm a marker | ||
456 | * @name: marker name | ||
457 | * | ||
458 | * Disarm a marker. It keeps a reference count of the number of arming/disarming | ||
459 | * done. | ||
460 | * Returns 0 if ok, error value on error. | ||
461 | */ | ||
462 | int marker_disarm(const char *name) | ||
463 | { | ||
464 | struct marker_entry *entry; | ||
465 | int ret = 0; | ||
466 | |||
467 | mutex_lock(&markers_mutex); | 790 | mutex_lock(&markers_mutex); |
468 | entry = get_marker(name); | 791 | entry = get_marker_from_private_data(probe, probe_private); |
469 | if (!entry) { | 792 | WARN_ON(!entry); |
470 | ret = -ENOENT; | 793 | entry->oldptr = old; |
471 | goto end; | 794 | entry->rcu_pending = 1; |
472 | } | 795 | /* write rcu_pending before calling the RCU callback */ |
473 | /* | 796 | smp_wmb(); |
474 | * Only permit decrement refcount if higher than 0. | 797 | call_rcu(&entry->rcu, free_old_closure); |
475 | * Do probe update only on 1 -> 0 transition. | 798 | remove_marker(entry->name); /* Ignore busy error message */ |
476 | */ | ||
477 | if (entry->refcount) { | ||
478 | if (--entry->refcount) | ||
479 | goto end; | ||
480 | } else { | ||
481 | ret = -EPERM; | ||
482 | goto end; | ||
483 | } | ||
484 | end: | 799 | end: |
485 | mutex_unlock(&markers_mutex); | 800 | mutex_unlock(&markers_mutex); |
486 | marker_update_probes(NULL); | ||
487 | return ret; | 801 | return ret; |
488 | } | 802 | } |
489 | EXPORT_SYMBOL_GPL(marker_disarm); | 803 | EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); |
490 | 804 | ||
491 | /** | 805 | /** |
492 | * marker_get_private_data - Get a marker's probe private data | 806 | * marker_get_private_data - Get a marker's probe private data |
493 | * @name: marker name | 807 | * @name: marker name |
808 | * @probe: probe to match | ||
809 | * @num: get the nth matching probe's private data | ||
494 | * | 810 | * |
811 | * Returns the nth private data pointer (starting from 0) matching, or an | ||
812 | * ERR_PTR. | ||
495 | * Returns the private data pointer, or an ERR_PTR. | 813 | * Returns the private data pointer, or an ERR_PTR. |
496 | * The private data pointer should _only_ be dereferenced if the caller is the | 814 | * The private data pointer should _only_ be dereferenced if the caller is the |
497 | * owner of the data, or its content could vanish. This is mostly used to | 815 | * owner of the data, or its content could vanish. This is mostly used to |
498 | * confirm that a caller is the owner of a registered probe. | 816 | * confirm that a caller is the owner of a registered probe. |
499 | */ | 817 | */ |
500 | void *marker_get_private_data(const char *name) | 818 | void *marker_get_private_data(const char *name, marker_probe_func *probe, |
819 | int num) | ||
501 | { | 820 | { |
502 | struct hlist_head *head; | 821 | struct hlist_head *head; |
503 | struct hlist_node *node; | 822 | struct hlist_node *node; |
504 | struct marker_entry *e; | 823 | struct marker_entry *e; |
505 | size_t name_len = strlen(name) + 1; | 824 | size_t name_len = strlen(name) + 1; |
506 | u32 hash = jhash(name, name_len-1, 0); | 825 | u32 hash = jhash(name, name_len-1, 0); |
507 | int found = 0; | 826 | int i; |
508 | 827 | ||
509 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; | 828 | head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; |
510 | hlist_for_each_entry(e, node, head, hlist) { | 829 | hlist_for_each_entry(e, node, head, hlist) { |
511 | if (!strcmp(name, e->name)) { | 830 | if (!strcmp(name, e->name)) { |
512 | found = 1; | 831 | if (!e->ptype) { |
513 | return e->private; | 832 | if (num == 0 && e->single.func == probe) |
833 | return e->single.probe_private; | ||
834 | else | ||
835 | break; | ||
836 | } else { | ||
837 | struct marker_probe_closure *closure; | ||
838 | int match = 0; | ||
839 | closure = e->multi; | ||
840 | for (i = 0; closure[i].func; i++) { | ||
841 | if (closure[i].func != probe) | ||
842 | continue; | ||
843 | if (match++ == num) | ||
844 | return closure[i].probe_private; | ||
845 | } | ||
846 | } | ||
514 | } | 847 | } |
515 | } | 848 | } |
516 | return ERR_PTR(-ENOENT); | 849 | return ERR_PTR(-ENOENT); |
diff --git a/kernel/module.c b/kernel/module.c index 4202da97a1da..92595bad3812 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2038,7 +2038,7 @@ static struct module *load_module(void __user *umod, | |||
2038 | #ifdef CONFIG_MARKERS | 2038 | #ifdef CONFIG_MARKERS |
2039 | if (!mod->taints) | 2039 | if (!mod->taints) |
2040 | marker_update_probe_range(mod->markers, | 2040 | marker_update_probe_range(mod->markers, |
2041 | mod->markers + mod->num_markers, NULL, NULL); | 2041 | mod->markers + mod->num_markers); |
2042 | #endif | 2042 | #endif |
2043 | err = module_finalize(hdr, sechdrs, mod); | 2043 | err = module_finalize(hdr, sechdrs, mod); |
2044 | if (err < 0) | 2044 | if (err < 0) |
@@ -2564,7 +2564,7 @@ EXPORT_SYMBOL(struct_module); | |||
2564 | #endif | 2564 | #endif |
2565 | 2565 | ||
2566 | #ifdef CONFIG_MARKERS | 2566 | #ifdef CONFIG_MARKERS |
2567 | void module_update_markers(struct module *probe_module, int *refcount) | 2567 | void module_update_markers(void) |
2568 | { | 2568 | { |
2569 | struct module *mod; | 2569 | struct module *mod; |
2570 | 2570 | ||
@@ -2572,8 +2572,7 @@ void module_update_markers(struct module *probe_module, int *refcount) | |||
2572 | list_for_each_entry(mod, &modules, list) | 2572 | list_for_each_entry(mod, &modules, list) |
2573 | if (!mod->taints) | 2573 | if (!mod->taints) |
2574 | marker_update_probe_range(mod->markers, | 2574 | marker_update_probe_range(mod->markers, |
2575 | mod->markers + mod->num_markers, | 2575 | mod->markers + mod->num_markers); |
2576 | probe_module, refcount); | ||
2577 | mutex_unlock(&module_mutex); | 2576 | mutex_unlock(&module_mutex); |
2578 | } | 2577 | } |
2579 | #endif | 2578 | #endif |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 760dfc233a00..c09605f8d16c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -56,7 +56,10 @@ static atomic_t rcu_barrier_cpu_count; | |||
56 | static DEFINE_MUTEX(rcu_barrier_mutex); | 56 | static DEFINE_MUTEX(rcu_barrier_mutex); |
57 | static struct completion rcu_barrier_completion; | 57 | static struct completion rcu_barrier_completion; |
58 | 58 | ||
59 | /* Because of FASTCALL declaration of complete, we use this wrapper */ | 59 | /* |
60 | * Awaken the corresponding synchronize_rcu() instance now that a | ||
61 | * grace period has elapsed. | ||
62 | */ | ||
60 | static void wakeme_after_rcu(struct rcu_head *head) | 63 | static void wakeme_after_rcu(struct rcu_head *head) |
61 | { | 64 | { |
62 | struct rcu_synchronize *rcu; | 65 | struct rcu_synchronize *rcu; |
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 0deef71ff8d2..6522ae5b14a2 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c | |||
@@ -630,9 +630,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |||
630 | set_current_state(state); | 630 | set_current_state(state); |
631 | 631 | ||
632 | /* Setup the timer, when timeout != NULL */ | 632 | /* Setup the timer, when timeout != NULL */ |
633 | if (unlikely(timeout)) | 633 | if (unlikely(timeout)) { |
634 | hrtimer_start(&timeout->timer, timeout->timer.expires, | 634 | hrtimer_start(&timeout->timer, timeout->timer.expires, |
635 | HRTIMER_MODE_ABS); | 635 | HRTIMER_MODE_ABS); |
636 | if (!hrtimer_active(&timeout->timer)) | ||
637 | timeout->task = NULL; | ||
638 | } | ||
636 | 639 | ||
637 | for (;;) { | 640 | for (;;) { |
638 | /* Try to acquire the lock: */ | 641 | /* Try to acquire the lock: */ |
diff --git a/kernel/sched.c b/kernel/sched.c index 3eedd5260907..f28f19e65b59 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -155,7 +155,7 @@ struct rt_prio_array { | |||
155 | struct list_head queue[MAX_RT_PRIO]; | 155 | struct list_head queue[MAX_RT_PRIO]; |
156 | }; | 156 | }; |
157 | 157 | ||
158 | #ifdef CONFIG_FAIR_GROUP_SCHED | 158 | #ifdef CONFIG_GROUP_SCHED |
159 | 159 | ||
160 | #include <linux/cgroup.h> | 160 | #include <linux/cgroup.h> |
161 | 161 | ||
@@ -165,19 +165,16 @@ static LIST_HEAD(task_groups); | |||
165 | 165 | ||
166 | /* task group related information */ | 166 | /* task group related information */ |
167 | struct task_group { | 167 | struct task_group { |
168 | #ifdef CONFIG_FAIR_CGROUP_SCHED | 168 | #ifdef CONFIG_CGROUP_SCHED |
169 | struct cgroup_subsys_state css; | 169 | struct cgroup_subsys_state css; |
170 | #endif | 170 | #endif |
171 | |||
172 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
171 | /* schedulable entities of this group on each cpu */ | 173 | /* schedulable entities of this group on each cpu */ |
172 | struct sched_entity **se; | 174 | struct sched_entity **se; |
173 | /* runqueue "owned" by this group on each cpu */ | 175 | /* runqueue "owned" by this group on each cpu */ |
174 | struct cfs_rq **cfs_rq; | 176 | struct cfs_rq **cfs_rq; |
175 | 177 | ||
176 | struct sched_rt_entity **rt_se; | ||
177 | struct rt_rq **rt_rq; | ||
178 | |||
179 | unsigned int rt_ratio; | ||
180 | |||
181 | /* | 178 | /* |
182 | * shares assigned to a task group governs how much of cpu bandwidth | 179 | * shares assigned to a task group governs how much of cpu bandwidth |
183 | * is allocated to the group. The more shares a group has, the more is | 180 | * is allocated to the group. The more shares a group has, the more is |
@@ -213,33 +210,46 @@ struct task_group { | |||
213 | * | 210 | * |
214 | */ | 211 | */ |
215 | unsigned long shares; | 212 | unsigned long shares; |
213 | #endif | ||
214 | |||
215 | #ifdef CONFIG_RT_GROUP_SCHED | ||
216 | struct sched_rt_entity **rt_se; | ||
217 | struct rt_rq **rt_rq; | ||
218 | |||
219 | u64 rt_runtime; | ||
220 | #endif | ||
216 | 221 | ||
217 | struct rcu_head rcu; | 222 | struct rcu_head rcu; |
218 | struct list_head list; | 223 | struct list_head list; |
219 | }; | 224 | }; |
220 | 225 | ||
226 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
221 | /* Default task group's sched entity on each cpu */ | 227 | /* Default task group's sched entity on each cpu */ |
222 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | 228 | static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); |
223 | /* Default task group's cfs_rq on each cpu */ | 229 | /* Default task group's cfs_rq on each cpu */ |
224 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | 230 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; |
225 | 231 | ||
226 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | ||
227 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | ||
228 | |||
229 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; | 232 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; |
230 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; | 233 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; |
234 | #endif | ||
235 | |||
236 | #ifdef CONFIG_RT_GROUP_SCHED | ||
237 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | ||
238 | static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; | ||
231 | 239 | ||
232 | static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; | 240 | static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; |
233 | static struct rt_rq *init_rt_rq_p[NR_CPUS]; | 241 | static struct rt_rq *init_rt_rq_p[NR_CPUS]; |
242 | #endif | ||
234 | 243 | ||
235 | /* task_group_mutex serializes add/remove of task groups and also changes to | 244 | /* task_group_lock serializes add/remove of task groups and also changes to |
236 | * a task group's cpu shares. | 245 | * a task group's cpu shares. |
237 | */ | 246 | */ |
238 | static DEFINE_MUTEX(task_group_mutex); | 247 | static DEFINE_SPINLOCK(task_group_lock); |
239 | 248 | ||
240 | /* doms_cur_mutex serializes access to doms_cur[] array */ | 249 | /* doms_cur_mutex serializes access to doms_cur[] array */ |
241 | static DEFINE_MUTEX(doms_cur_mutex); | 250 | static DEFINE_MUTEX(doms_cur_mutex); |
242 | 251 | ||
252 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
243 | #ifdef CONFIG_SMP | 253 | #ifdef CONFIG_SMP |
244 | /* kernel thread that runs rebalance_shares() periodically */ | 254 | /* kernel thread that runs rebalance_shares() periodically */ |
245 | static struct task_struct *lb_monitor_task; | 255 | static struct task_struct *lb_monitor_task; |
@@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused); | |||
248 | 258 | ||
249 | static void set_se_shares(struct sched_entity *se, unsigned long shares); | 259 | static void set_se_shares(struct sched_entity *se, unsigned long shares); |
250 | 260 | ||
261 | #ifdef CONFIG_USER_SCHED | ||
262 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | ||
263 | #else | ||
264 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | ||
265 | #endif | ||
266 | |||
267 | #define MIN_GROUP_SHARES 2 | ||
268 | |||
269 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | ||
270 | #endif | ||
271 | |||
251 | /* Default task group. | 272 | /* Default task group. |
252 | * Every task in system belong to this group at bootup. | 273 | * Every task in system belong to this group at bootup. |
253 | */ | 274 | */ |
254 | struct task_group init_task_group = { | 275 | struct task_group init_task_group = { |
276 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
255 | .se = init_sched_entity_p, | 277 | .se = init_sched_entity_p, |
256 | .cfs_rq = init_cfs_rq_p, | 278 | .cfs_rq = init_cfs_rq_p, |
279 | #endif | ||
257 | 280 | ||
281 | #ifdef CONFIG_RT_GROUP_SCHED | ||
258 | .rt_se = init_sched_rt_entity_p, | 282 | .rt_se = init_sched_rt_entity_p, |
259 | .rt_rq = init_rt_rq_p, | 283 | .rt_rq = init_rt_rq_p, |
260 | }; | ||
261 | |||
262 | #ifdef CONFIG_FAIR_USER_SCHED | ||
263 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | ||
264 | #else | ||
265 | # define INIT_TASK_GROUP_LOAD NICE_0_LOAD | ||
266 | #endif | 284 | #endif |
267 | 285 | }; | |
268 | #define MIN_GROUP_SHARES 2 | ||
269 | |||
270 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | ||
271 | 286 | ||
272 | /* return group to which a task belongs */ | 287 | /* return group to which a task belongs */ |
273 | static inline struct task_group *task_group(struct task_struct *p) | 288 | static inline struct task_group *task_group(struct task_struct *p) |
274 | { | 289 | { |
275 | struct task_group *tg; | 290 | struct task_group *tg; |
276 | 291 | ||
277 | #ifdef CONFIG_FAIR_USER_SCHED | 292 | #ifdef CONFIG_USER_SCHED |
278 | tg = p->user->tg; | 293 | tg = p->user->tg; |
279 | #elif defined(CONFIG_FAIR_CGROUP_SCHED) | 294 | #elif defined(CONFIG_CGROUP_SCHED) |
280 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), | 295 | tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), |
281 | struct task_group, css); | 296 | struct task_group, css); |
282 | #else | 297 | #else |
@@ -288,21 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
288 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 303 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
289 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | 304 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) |
290 | { | 305 | { |
306 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
291 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; | 307 | p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; |
292 | p->se.parent = task_group(p)->se[cpu]; | 308 | p->se.parent = task_group(p)->se[cpu]; |
309 | #endif | ||
293 | 310 | ||
311 | #ifdef CONFIG_RT_GROUP_SCHED | ||
294 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; | 312 | p->rt.rt_rq = task_group(p)->rt_rq[cpu]; |
295 | p->rt.parent = task_group(p)->rt_se[cpu]; | 313 | p->rt.parent = task_group(p)->rt_se[cpu]; |
296 | } | 314 | #endif |
297 | |||
298 | static inline void lock_task_group_list(void) | ||
299 | { | ||
300 | mutex_lock(&task_group_mutex); | ||
301 | } | ||
302 | |||
303 | static inline void unlock_task_group_list(void) | ||
304 | { | ||
305 | mutex_unlock(&task_group_mutex); | ||
306 | } | 315 | } |
307 | 316 | ||
308 | static inline void lock_doms_cur(void) | 317 | static inline void lock_doms_cur(void) |
@@ -318,12 +327,10 @@ static inline void unlock_doms_cur(void) | |||
318 | #else | 327 | #else |
319 | 328 | ||
320 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 329 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
321 | static inline void lock_task_group_list(void) { } | ||
322 | static inline void unlock_task_group_list(void) { } | ||
323 | static inline void lock_doms_cur(void) { } | 330 | static inline void lock_doms_cur(void) { } |
324 | static inline void unlock_doms_cur(void) { } | 331 | static inline void unlock_doms_cur(void) { } |
325 | 332 | ||
326 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 333 | #endif /* CONFIG_GROUP_SCHED */ |
327 | 334 | ||
328 | /* CFS-related fields in a runqueue */ | 335 | /* CFS-related fields in a runqueue */ |
329 | struct cfs_rq { | 336 | struct cfs_rq { |
@@ -363,7 +370,7 @@ struct cfs_rq { | |||
363 | struct rt_rq { | 370 | struct rt_rq { |
364 | struct rt_prio_array active; | 371 | struct rt_prio_array active; |
365 | unsigned long rt_nr_running; | 372 | unsigned long rt_nr_running; |
366 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 373 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
367 | int highest_prio; /* highest queued rt task prio */ | 374 | int highest_prio; /* highest queued rt task prio */ |
368 | #endif | 375 | #endif |
369 | #ifdef CONFIG_SMP | 376 | #ifdef CONFIG_SMP |
@@ -373,7 +380,9 @@ struct rt_rq { | |||
373 | int rt_throttled; | 380 | int rt_throttled; |
374 | u64 rt_time; | 381 | u64 rt_time; |
375 | 382 | ||
376 | #ifdef CONFIG_FAIR_GROUP_SCHED | 383 | #ifdef CONFIG_RT_GROUP_SCHED |
384 | unsigned long rt_nr_boosted; | ||
385 | |||
377 | struct rq *rq; | 386 | struct rq *rq; |
378 | struct list_head leaf_rt_rq_list; | 387 | struct list_head leaf_rt_rq_list; |
379 | struct task_group *tg; | 388 | struct task_group *tg; |
@@ -447,6 +456,8 @@ struct rq { | |||
447 | #ifdef CONFIG_FAIR_GROUP_SCHED | 456 | #ifdef CONFIG_FAIR_GROUP_SCHED |
448 | /* list of leaf cfs_rq on this cpu: */ | 457 | /* list of leaf cfs_rq on this cpu: */ |
449 | struct list_head leaf_cfs_rq_list; | 458 | struct list_head leaf_cfs_rq_list; |
459 | #endif | ||
460 | #ifdef CONFIG_RT_GROUP_SCHED | ||
450 | struct list_head leaf_rt_rq_list; | 461 | struct list_head leaf_rt_rq_list; |
451 | #endif | 462 | #endif |
452 | 463 | ||
@@ -652,19 +663,21 @@ const_debug unsigned int sysctl_sched_features = | |||
652 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | 663 | const_debug unsigned int sysctl_sched_nr_migrate = 32; |
653 | 664 | ||
654 | /* | 665 | /* |
655 | * period over which we measure -rt task cpu usage in ms. | 666 | * period over which we measure -rt task cpu usage in us. |
656 | * default: 1s | 667 | * default: 1s |
657 | */ | 668 | */ |
658 | const_debug unsigned int sysctl_sched_rt_period = 1000; | 669 | unsigned int sysctl_sched_rt_period = 1000000; |
659 | 670 | ||
660 | #define SCHED_RT_FRAC_SHIFT 16 | 671 | /* |
661 | #define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) | 672 | * part of the period that we allow rt tasks to run in us. |
673 | * default: 0.95s | ||
674 | */ | ||
675 | int sysctl_sched_rt_runtime = 950000; | ||
662 | 676 | ||
663 | /* | 677 | /* |
664 | * ratio of time -rt tasks may consume. | 678 | * single value that denotes runtime == period, ie unlimited time. |
665 | * default: 95% | ||
666 | */ | 679 | */ |
667 | const_debug unsigned int sysctl_sched_rt_ratio = 62259; | 680 | #define RUNTIME_INF ((u64)~0ULL) |
668 | 681 | ||
669 | /* | 682 | /* |
670 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | 683 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu |
@@ -4571,6 +4584,15 @@ recheck: | |||
4571 | return -EPERM; | 4584 | return -EPERM; |
4572 | } | 4585 | } |
4573 | 4586 | ||
4587 | #ifdef CONFIG_RT_GROUP_SCHED | ||
4588 | /* | ||
4589 | * Do not allow realtime tasks into groups that have no runtime | ||
4590 | * assigned. | ||
4591 | */ | ||
4592 | if (rt_policy(policy) && task_group(p)->rt_runtime == 0) | ||
4593 | return -EPERM; | ||
4594 | #endif | ||
4595 | |||
4574 | retval = security_task_setscheduler(p, policy, param); | 4596 | retval = security_task_setscheduler(p, policy, param); |
4575 | if (retval) | 4597 | if (retval) |
4576 | return retval; | 4598 | return retval; |
@@ -7112,7 +7134,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7112 | /* delimiter for bitsearch: */ | 7134 | /* delimiter for bitsearch: */ |
7113 | __set_bit(MAX_RT_PRIO, array->bitmap); | 7135 | __set_bit(MAX_RT_PRIO, array->bitmap); |
7114 | 7136 | ||
7115 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 7137 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
7116 | rt_rq->highest_prio = MAX_RT_PRIO; | 7138 | rt_rq->highest_prio = MAX_RT_PRIO; |
7117 | #endif | 7139 | #endif |
7118 | #ifdef CONFIG_SMP | 7140 | #ifdef CONFIG_SMP |
@@ -7123,7 +7145,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
7123 | rt_rq->rt_time = 0; | 7145 | rt_rq->rt_time = 0; |
7124 | rt_rq->rt_throttled = 0; | 7146 | rt_rq->rt_throttled = 0; |
7125 | 7147 | ||
7126 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7148 | #ifdef CONFIG_RT_GROUP_SCHED |
7149 | rt_rq->rt_nr_boosted = 0; | ||
7127 | rt_rq->rq = rq; | 7150 | rt_rq->rq = rq; |
7128 | #endif | 7151 | #endif |
7129 | } | 7152 | } |
@@ -7146,7 +7169,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg, | |||
7146 | se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); | 7169 | se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); |
7147 | se->parent = NULL; | 7170 | se->parent = NULL; |
7148 | } | 7171 | } |
7172 | #endif | ||
7149 | 7173 | ||
7174 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7150 | static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, | 7175 | static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, |
7151 | struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, | 7176 | struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, |
7152 | int cpu, int add) | 7177 | int cpu, int add) |
@@ -7175,7 +7200,7 @@ void __init sched_init(void) | |||
7175 | init_defrootdomain(); | 7200 | init_defrootdomain(); |
7176 | #endif | 7201 | #endif |
7177 | 7202 | ||
7178 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7203 | #ifdef CONFIG_GROUP_SCHED |
7179 | list_add(&init_task_group.list, &task_groups); | 7204 | list_add(&init_task_group.list, &task_groups); |
7180 | #endif | 7205 | #endif |
7181 | 7206 | ||
@@ -7196,7 +7221,10 @@ void __init sched_init(void) | |||
7196 | &per_cpu(init_cfs_rq, i), | 7221 | &per_cpu(init_cfs_rq, i), |
7197 | &per_cpu(init_sched_entity, i), i, 1); | 7222 | &per_cpu(init_sched_entity, i), i, 1); |
7198 | 7223 | ||
7199 | init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */ | 7224 | #endif |
7225 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7226 | init_task_group.rt_runtime = | ||
7227 | sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
7200 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); | 7228 | INIT_LIST_HEAD(&rq->leaf_rt_rq_list); |
7201 | init_tg_rt_entry(rq, &init_task_group, | 7229 | init_tg_rt_entry(rq, &init_task_group, |
7202 | &per_cpu(init_rt_rq, i), | 7230 | &per_cpu(init_rt_rq, i), |
@@ -7303,7 +7331,7 @@ void normalize_rt_tasks(void) | |||
7303 | unsigned long flags; | 7331 | unsigned long flags; |
7304 | struct rq *rq; | 7332 | struct rq *rq; |
7305 | 7333 | ||
7306 | read_lock_irq(&tasklist_lock); | 7334 | read_lock_irqsave(&tasklist_lock, flags); |
7307 | do_each_thread(g, p) { | 7335 | do_each_thread(g, p) { |
7308 | /* | 7336 | /* |
7309 | * Only normalize user tasks: | 7337 | * Only normalize user tasks: |
@@ -7329,16 +7357,16 @@ void normalize_rt_tasks(void) | |||
7329 | continue; | 7357 | continue; |
7330 | } | 7358 | } |
7331 | 7359 | ||
7332 | spin_lock_irqsave(&p->pi_lock, flags); | 7360 | spin_lock(&p->pi_lock); |
7333 | rq = __task_rq_lock(p); | 7361 | rq = __task_rq_lock(p); |
7334 | 7362 | ||
7335 | normalize_task(rq, p); | 7363 | normalize_task(rq, p); |
7336 | 7364 | ||
7337 | __task_rq_unlock(rq); | 7365 | __task_rq_unlock(rq); |
7338 | spin_unlock_irqrestore(&p->pi_lock, flags); | 7366 | spin_unlock(&p->pi_lock); |
7339 | } while_each_thread(g, p); | 7367 | } while_each_thread(g, p); |
7340 | 7368 | ||
7341 | read_unlock_irq(&tasklist_lock); | 7369 | read_unlock_irqrestore(&tasklist_lock, flags); |
7342 | } | 7370 | } |
7343 | 7371 | ||
7344 | #endif /* CONFIG_MAGIC_SYSRQ */ | 7372 | #endif /* CONFIG_MAGIC_SYSRQ */ |
@@ -7387,9 +7415,9 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
7387 | 7415 | ||
7388 | #endif | 7416 | #endif |
7389 | 7417 | ||
7390 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7418 | #ifdef CONFIG_GROUP_SCHED |
7391 | 7419 | ||
7392 | #ifdef CONFIG_SMP | 7420 | #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP |
7393 | /* | 7421 | /* |
7394 | * distribute shares of all task groups among their schedulable entities, | 7422 | * distribute shares of all task groups among their schedulable entities, |
7395 | * to reflect load distribution across cpus. | 7423 | * to reflect load distribution across cpus. |
@@ -7540,7 +7568,8 @@ static int load_balance_monitor(void *unused) | |||
7540 | } | 7568 | } |
7541 | #endif /* CONFIG_SMP */ | 7569 | #endif /* CONFIG_SMP */ |
7542 | 7570 | ||
7543 | static void free_sched_group(struct task_group *tg) | 7571 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7572 | static void free_fair_sched_group(struct task_group *tg) | ||
7544 | { | 7573 | { |
7545 | int i; | 7574 | int i; |
7546 | 7575 | ||
@@ -7549,49 +7578,27 @@ static void free_sched_group(struct task_group *tg) | |||
7549 | kfree(tg->cfs_rq[i]); | 7578 | kfree(tg->cfs_rq[i]); |
7550 | if (tg->se) | 7579 | if (tg->se) |
7551 | kfree(tg->se[i]); | 7580 | kfree(tg->se[i]); |
7552 | if (tg->rt_rq) | ||
7553 | kfree(tg->rt_rq[i]); | ||
7554 | if (tg->rt_se) | ||
7555 | kfree(tg->rt_se[i]); | ||
7556 | } | 7581 | } |
7557 | 7582 | ||
7558 | kfree(tg->cfs_rq); | 7583 | kfree(tg->cfs_rq); |
7559 | kfree(tg->se); | 7584 | kfree(tg->se); |
7560 | kfree(tg->rt_rq); | ||
7561 | kfree(tg->rt_se); | ||
7562 | kfree(tg); | ||
7563 | } | 7585 | } |
7564 | 7586 | ||
7565 | /* allocate runqueue etc for a new task group */ | 7587 | static int alloc_fair_sched_group(struct task_group *tg) |
7566 | struct task_group *sched_create_group(void) | ||
7567 | { | 7588 | { |
7568 | struct task_group *tg; | ||
7569 | struct cfs_rq *cfs_rq; | 7589 | struct cfs_rq *cfs_rq; |
7570 | struct sched_entity *se; | 7590 | struct sched_entity *se; |
7571 | struct rt_rq *rt_rq; | ||
7572 | struct sched_rt_entity *rt_se; | ||
7573 | struct rq *rq; | 7591 | struct rq *rq; |
7574 | int i; | 7592 | int i; |
7575 | 7593 | ||
7576 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | ||
7577 | if (!tg) | ||
7578 | return ERR_PTR(-ENOMEM); | ||
7579 | |||
7580 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); | 7594 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); |
7581 | if (!tg->cfs_rq) | 7595 | if (!tg->cfs_rq) |
7582 | goto err; | 7596 | goto err; |
7583 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); | 7597 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); |
7584 | if (!tg->se) | 7598 | if (!tg->se) |
7585 | goto err; | 7599 | goto err; |
7586 | tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); | ||
7587 | if (!tg->rt_rq) | ||
7588 | goto err; | ||
7589 | tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); | ||
7590 | if (!tg->rt_se) | ||
7591 | goto err; | ||
7592 | 7600 | ||
7593 | tg->shares = NICE_0_LOAD; | 7601 | tg->shares = NICE_0_LOAD; |
7594 | tg->rt_ratio = 0; /* XXX */ | ||
7595 | 7602 | ||
7596 | for_each_possible_cpu(i) { | 7603 | for_each_possible_cpu(i) { |
7597 | rq = cpu_rq(i); | 7604 | rq = cpu_rq(i); |
@@ -7606,6 +7613,79 @@ struct task_group *sched_create_group(void) | |||
7606 | if (!se) | 7613 | if (!se) |
7607 | goto err; | 7614 | goto err; |
7608 | 7615 | ||
7616 | init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); | ||
7617 | } | ||
7618 | |||
7619 | return 1; | ||
7620 | |||
7621 | err: | ||
7622 | return 0; | ||
7623 | } | ||
7624 | |||
7625 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
7626 | { | ||
7627 | list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, | ||
7628 | &cpu_rq(cpu)->leaf_cfs_rq_list); | ||
7629 | } | ||
7630 | |||
7631 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
7632 | { | ||
7633 | list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); | ||
7634 | } | ||
7635 | #else | ||
7636 | static inline void free_fair_sched_group(struct task_group *tg) | ||
7637 | { | ||
7638 | } | ||
7639 | |||
7640 | static inline int alloc_fair_sched_group(struct task_group *tg) | ||
7641 | { | ||
7642 | return 1; | ||
7643 | } | ||
7644 | |||
7645 | static inline void register_fair_sched_group(struct task_group *tg, int cpu) | ||
7646 | { | ||
7647 | } | ||
7648 | |||
7649 | static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | ||
7650 | { | ||
7651 | } | ||
7652 | #endif | ||
7653 | |||
7654 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7655 | static void free_rt_sched_group(struct task_group *tg) | ||
7656 | { | ||
7657 | int i; | ||
7658 | |||
7659 | for_each_possible_cpu(i) { | ||
7660 | if (tg->rt_rq) | ||
7661 | kfree(tg->rt_rq[i]); | ||
7662 | if (tg->rt_se) | ||
7663 | kfree(tg->rt_se[i]); | ||
7664 | } | ||
7665 | |||
7666 | kfree(tg->rt_rq); | ||
7667 | kfree(tg->rt_se); | ||
7668 | } | ||
7669 | |||
7670 | static int alloc_rt_sched_group(struct task_group *tg) | ||
7671 | { | ||
7672 | struct rt_rq *rt_rq; | ||
7673 | struct sched_rt_entity *rt_se; | ||
7674 | struct rq *rq; | ||
7675 | int i; | ||
7676 | |||
7677 | tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); | ||
7678 | if (!tg->rt_rq) | ||
7679 | goto err; | ||
7680 | tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); | ||
7681 | if (!tg->rt_se) | ||
7682 | goto err; | ||
7683 | |||
7684 | tg->rt_runtime = 0; | ||
7685 | |||
7686 | for_each_possible_cpu(i) { | ||
7687 | rq = cpu_rq(i); | ||
7688 | |||
7609 | rt_rq = kmalloc_node(sizeof(struct rt_rq), | 7689 | rt_rq = kmalloc_node(sizeof(struct rt_rq), |
7610 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); | 7690 | GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); |
7611 | if (!rt_rq) | 7691 | if (!rt_rq) |
@@ -7616,20 +7696,75 @@ struct task_group *sched_create_group(void) | |||
7616 | if (!rt_se) | 7696 | if (!rt_se) |
7617 | goto err; | 7697 | goto err; |
7618 | 7698 | ||
7619 | init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); | ||
7620 | init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); | 7699 | init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); |
7621 | } | 7700 | } |
7622 | 7701 | ||
7623 | lock_task_group_list(); | 7702 | return 1; |
7703 | |||
7704 | err: | ||
7705 | return 0; | ||
7706 | } | ||
7707 | |||
7708 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
7709 | { | ||
7710 | list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, | ||
7711 | &cpu_rq(cpu)->leaf_rt_rq_list); | ||
7712 | } | ||
7713 | |||
7714 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
7715 | { | ||
7716 | list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); | ||
7717 | } | ||
7718 | #else | ||
7719 | static inline void free_rt_sched_group(struct task_group *tg) | ||
7720 | { | ||
7721 | } | ||
7722 | |||
7723 | static inline int alloc_rt_sched_group(struct task_group *tg) | ||
7724 | { | ||
7725 | return 1; | ||
7726 | } | ||
7727 | |||
7728 | static inline void register_rt_sched_group(struct task_group *tg, int cpu) | ||
7729 | { | ||
7730 | } | ||
7731 | |||
7732 | static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) | ||
7733 | { | ||
7734 | } | ||
7735 | #endif | ||
7736 | |||
7737 | static void free_sched_group(struct task_group *tg) | ||
7738 | { | ||
7739 | free_fair_sched_group(tg); | ||
7740 | free_rt_sched_group(tg); | ||
7741 | kfree(tg); | ||
7742 | } | ||
7743 | |||
7744 | /* allocate runqueue etc for a new task group */ | ||
7745 | struct task_group *sched_create_group(void) | ||
7746 | { | ||
7747 | struct task_group *tg; | ||
7748 | unsigned long flags; | ||
7749 | int i; | ||
7750 | |||
7751 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | ||
7752 | if (!tg) | ||
7753 | return ERR_PTR(-ENOMEM); | ||
7754 | |||
7755 | if (!alloc_fair_sched_group(tg)) | ||
7756 | goto err; | ||
7757 | |||
7758 | if (!alloc_rt_sched_group(tg)) | ||
7759 | goto err; | ||
7760 | |||
7761 | spin_lock_irqsave(&task_group_lock, flags); | ||
7624 | for_each_possible_cpu(i) { | 7762 | for_each_possible_cpu(i) { |
7625 | rq = cpu_rq(i); | 7763 | register_fair_sched_group(tg, i); |
7626 | cfs_rq = tg->cfs_rq[i]; | 7764 | register_rt_sched_group(tg, i); |
7627 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
7628 | rt_rq = tg->rt_rq[i]; | ||
7629 | list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); | ||
7630 | } | 7765 | } |
7631 | list_add_rcu(&tg->list, &task_groups); | 7766 | list_add_rcu(&tg->list, &task_groups); |
7632 | unlock_task_group_list(); | 7767 | spin_unlock_irqrestore(&task_group_lock, flags); |
7633 | 7768 | ||
7634 | return tg; | 7769 | return tg; |
7635 | 7770 | ||
@@ -7648,21 +7783,16 @@ static void free_sched_group_rcu(struct rcu_head *rhp) | |||
7648 | /* Destroy runqueue etc associated with a task group */ | 7783 | /* Destroy runqueue etc associated with a task group */ |
7649 | void sched_destroy_group(struct task_group *tg) | 7784 | void sched_destroy_group(struct task_group *tg) |
7650 | { | 7785 | { |
7651 | struct cfs_rq *cfs_rq = NULL; | 7786 | unsigned long flags; |
7652 | struct rt_rq *rt_rq = NULL; | ||
7653 | int i; | 7787 | int i; |
7654 | 7788 | ||
7655 | lock_task_group_list(); | 7789 | spin_lock_irqsave(&task_group_lock, flags); |
7656 | for_each_possible_cpu(i) { | 7790 | for_each_possible_cpu(i) { |
7657 | cfs_rq = tg->cfs_rq[i]; | 7791 | unregister_fair_sched_group(tg, i); |
7658 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | 7792 | unregister_rt_sched_group(tg, i); |
7659 | rt_rq = tg->rt_rq[i]; | ||
7660 | list_del_rcu(&rt_rq->leaf_rt_rq_list); | ||
7661 | } | 7793 | } |
7662 | list_del_rcu(&tg->list); | 7794 | list_del_rcu(&tg->list); |
7663 | unlock_task_group_list(); | 7795 | spin_unlock_irqrestore(&task_group_lock, flags); |
7664 | |||
7665 | BUG_ON(!cfs_rq); | ||
7666 | 7796 | ||
7667 | /* wait for possible concurrent references to cfs_rqs complete */ | 7797 | /* wait for possible concurrent references to cfs_rqs complete */ |
7668 | call_rcu(&tg->rcu, free_sched_group_rcu); | 7798 | call_rcu(&tg->rcu, free_sched_group_rcu); |
@@ -7703,6 +7833,7 @@ void sched_move_task(struct task_struct *tsk) | |||
7703 | task_rq_unlock(rq, &flags); | 7833 | task_rq_unlock(rq, &flags); |
7704 | } | 7834 | } |
7705 | 7835 | ||
7836 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7706 | /* rq->lock to be locked by caller */ | 7837 | /* rq->lock to be locked by caller */ |
7707 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 7838 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
7708 | { | 7839 | { |
@@ -7728,13 +7859,14 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
7728 | } | 7859 | } |
7729 | } | 7860 | } |
7730 | 7861 | ||
7862 | static DEFINE_MUTEX(shares_mutex); | ||
7863 | |||
7731 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) | 7864 | int sched_group_set_shares(struct task_group *tg, unsigned long shares) |
7732 | { | 7865 | { |
7733 | int i; | 7866 | int i; |
7734 | struct cfs_rq *cfs_rq; | 7867 | unsigned long flags; |
7735 | struct rq *rq; | ||
7736 | 7868 | ||
7737 | lock_task_group_list(); | 7869 | mutex_lock(&shares_mutex); |
7738 | if (tg->shares == shares) | 7870 | if (tg->shares == shares) |
7739 | goto done; | 7871 | goto done; |
7740 | 7872 | ||
@@ -7746,10 +7878,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7746 | * load_balance_fair) from referring to this group first, | 7878 | * load_balance_fair) from referring to this group first, |
7747 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. | 7879 | * by taking it off the rq->leaf_cfs_rq_list on each cpu. |
7748 | */ | 7880 | */ |
7749 | for_each_possible_cpu(i) { | 7881 | spin_lock_irqsave(&task_group_lock, flags); |
7750 | cfs_rq = tg->cfs_rq[i]; | 7882 | for_each_possible_cpu(i) |
7751 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | 7883 | unregister_fair_sched_group(tg, i); |
7752 | } | 7884 | spin_unlock_irqrestore(&task_group_lock, flags); |
7753 | 7885 | ||
7754 | /* wait for any ongoing reference to this group to finish */ | 7886 | /* wait for any ongoing reference to this group to finish */ |
7755 | synchronize_sched(); | 7887 | synchronize_sched(); |
@@ -7769,13 +7901,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
7769 | * Enable load balance activity on this group, by inserting it back on | 7901 | * Enable load balance activity on this group, by inserting it back on |
7770 | * each cpu's rq->leaf_cfs_rq_list. | 7902 | * each cpu's rq->leaf_cfs_rq_list. |
7771 | */ | 7903 | */ |
7772 | for_each_possible_cpu(i) { | 7904 | spin_lock_irqsave(&task_group_lock, flags); |
7773 | rq = cpu_rq(i); | 7905 | for_each_possible_cpu(i) |
7774 | cfs_rq = tg->cfs_rq[i]; | 7906 | register_fair_sched_group(tg, i); |
7775 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | 7907 | spin_unlock_irqrestore(&task_group_lock, flags); |
7776 | } | ||
7777 | done: | 7908 | done: |
7778 | unlock_task_group_list(); | 7909 | mutex_unlock(&shares_mutex); |
7779 | return 0; | 7910 | return 0; |
7780 | } | 7911 | } |
7781 | 7912 | ||
@@ -7783,35 +7914,84 @@ unsigned long sched_group_shares(struct task_group *tg) | |||
7783 | { | 7914 | { |
7784 | return tg->shares; | 7915 | return tg->shares; |
7785 | } | 7916 | } |
7917 | #endif | ||
7786 | 7918 | ||
7919 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7787 | /* | 7920 | /* |
7788 | * Ensure the total rt_ratio <= sysctl_sched_rt_ratio | 7921 | * Ensure that the real time constraints are schedulable. |
7789 | */ | 7922 | */ |
7790 | int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio) | 7923 | static DEFINE_MUTEX(rt_constraints_mutex); |
7924 | |||
7925 | static unsigned long to_ratio(u64 period, u64 runtime) | ||
7926 | { | ||
7927 | if (runtime == RUNTIME_INF) | ||
7928 | return 1ULL << 16; | ||
7929 | |||
7930 | runtime *= (1ULL << 16); | ||
7931 | div64_64(runtime, period); | ||
7932 | return runtime; | ||
7933 | } | ||
7934 | |||
7935 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | ||
7791 | { | 7936 | { |
7792 | struct task_group *tgi; | 7937 | struct task_group *tgi; |
7793 | unsigned long total = 0; | 7938 | unsigned long total = 0; |
7939 | unsigned long global_ratio = | ||
7940 | to_ratio(sysctl_sched_rt_period, | ||
7941 | sysctl_sched_rt_runtime < 0 ? | ||
7942 | RUNTIME_INF : sysctl_sched_rt_runtime); | ||
7794 | 7943 | ||
7795 | rcu_read_lock(); | 7944 | rcu_read_lock(); |
7796 | list_for_each_entry_rcu(tgi, &task_groups, list) | 7945 | list_for_each_entry_rcu(tgi, &task_groups, list) { |
7797 | total += tgi->rt_ratio; | 7946 | if (tgi == tg) |
7798 | rcu_read_unlock(); | 7947 | continue; |
7799 | 7948 | ||
7800 | if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio) | 7949 | total += to_ratio(period, tgi->rt_runtime); |
7801 | return -EINVAL; | 7950 | } |
7951 | rcu_read_unlock(); | ||
7802 | 7952 | ||
7803 | tg->rt_ratio = rt_ratio; | 7953 | return total + to_ratio(period, runtime) < global_ratio; |
7804 | return 0; | ||
7805 | } | 7954 | } |
7806 | 7955 | ||
7807 | unsigned long sched_group_rt_ratio(struct task_group *tg) | 7956 | int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) |
7808 | { | 7957 | { |
7809 | return tg->rt_ratio; | 7958 | u64 rt_runtime, rt_period; |
7959 | int err = 0; | ||
7960 | |||
7961 | rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; | ||
7962 | rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; | ||
7963 | if (rt_runtime_us == -1) | ||
7964 | rt_runtime = rt_period; | ||
7965 | |||
7966 | mutex_lock(&rt_constraints_mutex); | ||
7967 | if (!__rt_schedulable(tg, rt_period, rt_runtime)) { | ||
7968 | err = -EINVAL; | ||
7969 | goto unlock; | ||
7970 | } | ||
7971 | if (rt_runtime_us == -1) | ||
7972 | rt_runtime = RUNTIME_INF; | ||
7973 | tg->rt_runtime = rt_runtime; | ||
7974 | unlock: | ||
7975 | mutex_unlock(&rt_constraints_mutex); | ||
7976 | |||
7977 | return err; | ||
7810 | } | 7978 | } |
7811 | 7979 | ||
7812 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 7980 | long sched_group_rt_runtime(struct task_group *tg) |
7981 | { | ||
7982 | u64 rt_runtime_us; | ||
7983 | |||
7984 | if (tg->rt_runtime == RUNTIME_INF) | ||
7985 | return -1; | ||
7986 | |||
7987 | rt_runtime_us = tg->rt_runtime; | ||
7988 | do_div(rt_runtime_us, NSEC_PER_USEC); | ||
7989 | return rt_runtime_us; | ||
7990 | } | ||
7991 | #endif | ||
7992 | #endif /* CONFIG_GROUP_SCHED */ | ||
7813 | 7993 | ||
7814 | #ifdef CONFIG_FAIR_CGROUP_SCHED | 7994 | #ifdef CONFIG_CGROUP_SCHED |
7815 | 7995 | ||
7816 | /* return corresponding task_group object of a cgroup */ | 7996 | /* return corresponding task_group object of a cgroup */ |
7817 | static inline struct task_group *cgroup_tg(struct cgroup *cgrp) | 7997 | static inline struct task_group *cgroup_tg(struct cgroup *cgrp) |
@@ -7857,9 +8037,15 @@ static int | |||
7857 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 8037 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
7858 | struct task_struct *tsk) | 8038 | struct task_struct *tsk) |
7859 | { | 8039 | { |
8040 | #ifdef CONFIG_RT_GROUP_SCHED | ||
8041 | /* Don't accept realtime tasks when there is no way for them to run */ | ||
8042 | if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0) | ||
8043 | return -EINVAL; | ||
8044 | #else | ||
7860 | /* We don't support RT-tasks being in separate groups */ | 8045 | /* We don't support RT-tasks being in separate groups */ |
7861 | if (tsk->sched_class != &fair_sched_class) | 8046 | if (tsk->sched_class != &fair_sched_class) |
7862 | return -EINVAL; | 8047 | return -EINVAL; |
8048 | #endif | ||
7863 | 8049 | ||
7864 | return 0; | 8050 | return 0; |
7865 | } | 8051 | } |
@@ -7871,6 +8057,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
7871 | sched_move_task(tsk); | 8057 | sched_move_task(tsk); |
7872 | } | 8058 | } |
7873 | 8059 | ||
8060 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7874 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 8061 | static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, |
7875 | u64 shareval) | 8062 | u64 shareval) |
7876 | { | 8063 | { |
@@ -7883,31 +8070,70 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) | |||
7883 | 8070 | ||
7884 | return (u64) tg->shares; | 8071 | return (u64) tg->shares; |
7885 | } | 8072 | } |
8073 | #endif | ||
7886 | 8074 | ||
7887 | static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype, | 8075 | #ifdef CONFIG_RT_GROUP_SCHED |
7888 | u64 rt_ratio_val) | 8076 | static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, |
8077 | struct file *file, | ||
8078 | const char __user *userbuf, | ||
8079 | size_t nbytes, loff_t *unused_ppos) | ||
7889 | { | 8080 | { |
7890 | return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val); | 8081 | char buffer[64]; |
8082 | int retval = 0; | ||
8083 | s64 val; | ||
8084 | char *end; | ||
8085 | |||
8086 | if (!nbytes) | ||
8087 | return -EINVAL; | ||
8088 | if (nbytes >= sizeof(buffer)) | ||
8089 | return -E2BIG; | ||
8090 | if (copy_from_user(buffer, userbuf, nbytes)) | ||
8091 | return -EFAULT; | ||
8092 | |||
8093 | buffer[nbytes] = 0; /* nul-terminate */ | ||
8094 | |||
8095 | /* strip newline if necessary */ | ||
8096 | if (nbytes && (buffer[nbytes-1] == '\n')) | ||
8097 | buffer[nbytes-1] = 0; | ||
8098 | val = simple_strtoll(buffer, &end, 0); | ||
8099 | if (*end) | ||
8100 | return -EINVAL; | ||
8101 | |||
8102 | /* Pass to subsystem */ | ||
8103 | retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val); | ||
8104 | if (!retval) | ||
8105 | retval = nbytes; | ||
8106 | return retval; | ||
7891 | } | 8107 | } |
7892 | 8108 | ||
7893 | static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft) | 8109 | static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, |
8110 | struct file *file, | ||
8111 | char __user *buf, size_t nbytes, | ||
8112 | loff_t *ppos) | ||
7894 | { | 8113 | { |
7895 | struct task_group *tg = cgroup_tg(cgrp); | 8114 | char tmp[64]; |
8115 | long val = sched_group_rt_runtime(cgroup_tg(cgrp)); | ||
8116 | int len = sprintf(tmp, "%ld\n", val); | ||
7896 | 8117 | ||
7897 | return (u64) tg->rt_ratio; | 8118 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
7898 | } | 8119 | } |
8120 | #endif | ||
7899 | 8121 | ||
7900 | static struct cftype cpu_files[] = { | 8122 | static struct cftype cpu_files[] = { |
8123 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
7901 | { | 8124 | { |
7902 | .name = "shares", | 8125 | .name = "shares", |
7903 | .read_uint = cpu_shares_read_uint, | 8126 | .read_uint = cpu_shares_read_uint, |
7904 | .write_uint = cpu_shares_write_uint, | 8127 | .write_uint = cpu_shares_write_uint, |
7905 | }, | 8128 | }, |
8129 | #endif | ||
8130 | #ifdef CONFIG_RT_GROUP_SCHED | ||
7906 | { | 8131 | { |
7907 | .name = "rt_ratio", | 8132 | .name = "rt_runtime_us", |
7908 | .read_uint = cpu_rt_ratio_read_uint, | 8133 | .read = cpu_rt_runtime_read, |
7909 | .write_uint = cpu_rt_ratio_write_uint, | 8134 | .write = cpu_rt_runtime_write, |
7910 | }, | 8135 | }, |
8136 | #endif | ||
7911 | }; | 8137 | }; |
7912 | 8138 | ||
7913 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 8139 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
@@ -7926,7 +8152,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7926 | .early_init = 1, | 8152 | .early_init = 1, |
7927 | }; | 8153 | }; |
7928 | 8154 | ||
7929 | #endif /* CONFIG_FAIR_CGROUP_SCHED */ | 8155 | #endif /* CONFIG_CGROUP_SCHED */ |
7930 | 8156 | ||
7931 | #ifdef CONFIG_CGROUP_CPUACCT | 8157 | #ifdef CONFIG_CGROUP_CPUACCT |
7932 | 8158 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 274b40d7bef2..f54792b175b2 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -55,14 +55,14 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se) | |||
55 | return !list_empty(&rt_se->run_list); | 55 | return !list_empty(&rt_se->run_list); |
56 | } | 56 | } |
57 | 57 | ||
58 | #ifdef CONFIG_FAIR_GROUP_SCHED | 58 | #ifdef CONFIG_RT_GROUP_SCHED |
59 | 59 | ||
60 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | 60 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
61 | { | 61 | { |
62 | if (!rt_rq->tg) | 62 | if (!rt_rq->tg) |
63 | return SCHED_RT_FRAC; | 63 | return RUNTIME_INF; |
64 | 64 | ||
65 | return rt_rq->tg->rt_ratio; | 65 | return rt_rq->tg->rt_runtime; |
66 | } | 66 | } |
67 | 67 | ||
68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
89 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); | 89 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se); |
90 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | 90 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se); |
91 | 91 | ||
92 | static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | 92 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
93 | { | 93 | { |
94 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 94 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
95 | 95 | ||
@@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | |||
102 | } | 102 | } |
103 | } | 103 | } |
104 | 104 | ||
105 | static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | 105 | static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
106 | { | 106 | { |
107 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 107 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
108 | 108 | ||
@@ -110,11 +110,31 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | |||
110 | dequeue_rt_entity(rt_se); | 110 | dequeue_rt_entity(rt_se); |
111 | } | 111 | } |
112 | 112 | ||
113 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
114 | { | ||
115 | return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; | ||
116 | } | ||
117 | |||
118 | static int rt_se_boosted(struct sched_rt_entity *rt_se) | ||
119 | { | ||
120 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
121 | struct task_struct *p; | ||
122 | |||
123 | if (rt_rq) | ||
124 | return !!rt_rq->rt_nr_boosted; | ||
125 | |||
126 | p = rt_task_of(rt_se); | ||
127 | return p->prio != p->normal_prio; | ||
128 | } | ||
129 | |||
113 | #else | 130 | #else |
114 | 131 | ||
115 | static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) | 132 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
116 | { | 133 | { |
117 | return sysctl_sched_rt_ratio; | 134 | if (sysctl_sched_rt_runtime == -1) |
135 | return RUNTIME_INF; | ||
136 | |||
137 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | ||
118 | } | 138 | } |
119 | 139 | ||
120 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 140 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -141,19 +161,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
141 | return NULL; | 161 | return NULL; |
142 | } | 162 | } |
143 | 163 | ||
144 | static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) | 164 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
145 | { | 165 | { |
146 | } | 166 | } |
147 | 167 | ||
148 | static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) | 168 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
149 | { | 169 | { |
150 | } | 170 | } |
151 | 171 | ||
172 | static inline int rt_rq_throttled(struct rt_rq *rt_rq) | ||
173 | { | ||
174 | return rt_rq->rt_throttled; | ||
175 | } | ||
152 | #endif | 176 | #endif |
153 | 177 | ||
154 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 178 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
155 | { | 179 | { |
156 | #ifdef CONFIG_FAIR_GROUP_SCHED | 180 | #ifdef CONFIG_RT_GROUP_SCHED |
157 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | 181 | struct rt_rq *rt_rq = group_rt_rq(rt_se); |
158 | 182 | ||
159 | if (rt_rq) | 183 | if (rt_rq) |
@@ -163,28 +187,26 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) | |||
163 | return rt_task_of(rt_se)->prio; | 187 | return rt_task_of(rt_se)->prio; |
164 | } | 188 | } |
165 | 189 | ||
166 | static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) | 190 | static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) |
167 | { | 191 | { |
168 | unsigned int rt_ratio = sched_rt_ratio(rt_rq); | 192 | u64 runtime = sched_rt_runtime(rt_rq); |
169 | u64 period, ratio; | ||
170 | 193 | ||
171 | if (rt_ratio == SCHED_RT_FRAC) | 194 | if (runtime == RUNTIME_INF) |
172 | return 0; | 195 | return 0; |
173 | 196 | ||
174 | if (rt_rq->rt_throttled) | 197 | if (rt_rq->rt_throttled) |
175 | return 1; | 198 | return rt_rq_throttled(rt_rq); |
176 | |||
177 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | ||
178 | ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
179 | 199 | ||
180 | if (rt_rq->rt_time > ratio) { | 200 | if (rt_rq->rt_time > runtime) { |
181 | struct rq *rq = rq_of_rt_rq(rt_rq); | 201 | struct rq *rq = rq_of_rt_rq(rt_rq); |
182 | 202 | ||
183 | rq->rt_throttled = 1; | 203 | rq->rt_throttled = 1; |
184 | rt_rq->rt_throttled = 1; | 204 | rt_rq->rt_throttled = 1; |
185 | 205 | ||
186 | sched_rt_ratio_dequeue(rt_rq); | 206 | if (rt_rq_throttled(rt_rq)) { |
187 | return 1; | 207 | sched_rt_rq_dequeue(rt_rq); |
208 | return 1; | ||
209 | } | ||
188 | } | 210 | } |
189 | 211 | ||
190 | return 0; | 212 | return 0; |
@@ -196,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq) | |||
196 | u64 period; | 218 | u64 period; |
197 | 219 | ||
198 | while (rq->clock > rq->rt_period_expire) { | 220 | while (rq->clock > rq->rt_period_expire) { |
199 | period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; | 221 | period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; |
200 | rq->rt_period_expire += period; | 222 | rq->rt_period_expire += period; |
201 | 223 | ||
202 | for_each_leaf_rt_rq(rt_rq, rq) { | 224 | for_each_leaf_rt_rq(rt_rq, rq) { |
203 | unsigned long rt_ratio = sched_rt_ratio(rt_rq); | 225 | u64 runtime = sched_rt_runtime(rt_rq); |
204 | u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; | ||
205 | 226 | ||
206 | rt_rq->rt_time -= min(rt_rq->rt_time, ratio); | 227 | rt_rq->rt_time -= min(rt_rq->rt_time, runtime); |
207 | if (rt_rq->rt_throttled) { | 228 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
208 | rt_rq->rt_throttled = 0; | 229 | rt_rq->rt_throttled = 0; |
209 | sched_rt_ratio_enqueue(rt_rq); | 230 | sched_rt_rq_enqueue(rt_rq); |
210 | } | 231 | } |
211 | } | 232 | } |
212 | 233 | ||
@@ -239,12 +260,7 @@ static void update_curr_rt(struct rq *rq) | |||
239 | cpuacct_charge(curr, delta_exec); | 260 | cpuacct_charge(curr, delta_exec); |
240 | 261 | ||
241 | rt_rq->rt_time += delta_exec; | 262 | rt_rq->rt_time += delta_exec; |
242 | /* | 263 | if (sched_rt_runtime_exceeded(rt_rq)) |
243 | * might make it a tad more accurate: | ||
244 | * | ||
245 | * update_sched_rt_period(rq); | ||
246 | */ | ||
247 | if (sched_rt_ratio_exceeded(rt_rq)) | ||
248 | resched_task(curr); | 264 | resched_task(curr); |
249 | } | 265 | } |
250 | 266 | ||
@@ -253,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
253 | { | 269 | { |
254 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 270 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
255 | rt_rq->rt_nr_running++; | 271 | rt_rq->rt_nr_running++; |
256 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 272 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
257 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 273 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) |
258 | rt_rq->highest_prio = rt_se_prio(rt_se); | 274 | rt_rq->highest_prio = rt_se_prio(rt_se); |
259 | #endif | 275 | #endif |
@@ -265,6 +281,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
265 | 281 | ||
266 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 282 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
267 | #endif | 283 | #endif |
284 | #ifdef CONFIG_RT_GROUP_SCHED | ||
285 | if (rt_se_boosted(rt_se)) | ||
286 | rt_rq->rt_nr_boosted++; | ||
287 | #endif | ||
268 | } | 288 | } |
269 | 289 | ||
270 | static inline | 290 | static inline |
@@ -273,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
273 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 293 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
274 | WARN_ON(!rt_rq->rt_nr_running); | 294 | WARN_ON(!rt_rq->rt_nr_running); |
275 | rt_rq->rt_nr_running--; | 295 | rt_rq->rt_nr_running--; |
276 | #if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED | 296 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
277 | if (rt_rq->rt_nr_running) { | 297 | if (rt_rq->rt_nr_running) { |
278 | struct rt_prio_array *array; | 298 | struct rt_prio_array *array; |
279 | 299 | ||
@@ -295,6 +315,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
295 | 315 | ||
296 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 316 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
297 | #endif /* CONFIG_SMP */ | 317 | #endif /* CONFIG_SMP */ |
318 | #ifdef CONFIG_RT_GROUP_SCHED | ||
319 | if (rt_se_boosted(rt_se)) | ||
320 | rt_rq->rt_nr_boosted--; | ||
321 | |||
322 | WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); | ||
323 | #endif | ||
298 | } | 324 | } |
299 | 325 | ||
300 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | 326 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) |
@@ -303,7 +329,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
303 | struct rt_prio_array *array = &rt_rq->active; | 329 | struct rt_prio_array *array = &rt_rq->active; |
304 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 330 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
305 | 331 | ||
306 | if (group_rq && group_rq->rt_throttled) | 332 | if (group_rq && rt_rq_throttled(group_rq)) |
307 | return; | 333 | return; |
308 | 334 | ||
309 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 335 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); |
@@ -496,7 +522,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) | |||
496 | if (unlikely(!rt_rq->rt_nr_running)) | 522 | if (unlikely(!rt_rq->rt_nr_running)) |
497 | return NULL; | 523 | return NULL; |
498 | 524 | ||
499 | if (sched_rt_ratio_exceeded(rt_rq)) | 525 | if (rt_rq_throttled(rt_rq)) |
500 | return NULL; | 526 | return NULL; |
501 | 527 | ||
502 | do { | 528 | do { |
diff --git a/kernel/signal.c b/kernel/signal.c index 2c1f08defac2..84917fe507f7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -972,7 +972,7 @@ void zap_other_threads(struct task_struct *p) | |||
972 | } | 972 | } |
973 | } | 973 | } |
974 | 974 | ||
975 | int fastcall __fatal_signal_pending(struct task_struct *tsk) | 975 | int __fatal_signal_pending(struct task_struct *tsk) |
976 | { | 976 | { |
977 | return sigismember(&tsk->pending.signal, SIGKILL); | 977 | return sigismember(&tsk->pending.signal, SIGKILL); |
978 | } | 978 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d41ef6b4cf72..8b7e95411795 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = { | |||
311 | .mode = 0644, | 311 | .mode = 0644, |
312 | .proc_handler = &proc_dointvec, | 312 | .proc_handler = &proc_dointvec, |
313 | }, | 313 | }, |
314 | { | ||
315 | .ctl_name = CTL_UNNUMBERED, | ||
316 | .procname = "sched_rt_period_ms", | ||
317 | .data = &sysctl_sched_rt_period, | ||
318 | .maxlen = sizeof(unsigned int), | ||
319 | .mode = 0644, | ||
320 | .proc_handler = &proc_dointvec, | ||
321 | }, | ||
322 | { | ||
323 | .ctl_name = CTL_UNNUMBERED, | ||
324 | .procname = "sched_rt_ratio", | ||
325 | .data = &sysctl_sched_rt_ratio, | ||
326 | .maxlen = sizeof(unsigned int), | ||
327 | .mode = 0644, | ||
328 | .proc_handler = &proc_dointvec, | ||
329 | }, | ||
330 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 314 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |
331 | { | 315 | { |
332 | .ctl_name = CTL_UNNUMBERED, | 316 | .ctl_name = CTL_UNNUMBERED, |
@@ -348,6 +332,22 @@ static struct ctl_table kern_table[] = { | |||
348 | #endif | 332 | #endif |
349 | { | 333 | { |
350 | .ctl_name = CTL_UNNUMBERED, | 334 | .ctl_name = CTL_UNNUMBERED, |
335 | .procname = "sched_rt_period_us", | ||
336 | .data = &sysctl_sched_rt_period, | ||
337 | .maxlen = sizeof(unsigned int), | ||
338 | .mode = 0644, | ||
339 | .proc_handler = &proc_dointvec, | ||
340 | }, | ||
341 | { | ||
342 | .ctl_name = CTL_UNNUMBERED, | ||
343 | .procname = "sched_rt_runtime_us", | ||
344 | .data = &sysctl_sched_rt_runtime, | ||
345 | .maxlen = sizeof(int), | ||
346 | .mode = 0644, | ||
347 | .proc_handler = &proc_dointvec, | ||
348 | }, | ||
349 | { | ||
350 | .ctl_name = CTL_UNNUMBERED, | ||
351 | .procname = "sched_compat_yield", | 351 | .procname = "sched_compat_yield", |
352 | .data = &sysctl_sched_compat_yield, | 352 | .data = &sysctl_sched_compat_yield, |
353 | .maxlen = sizeof(unsigned int), | 353 | .maxlen = sizeof(unsigned int), |
@@ -978,8 +978,8 @@ static struct ctl_table vm_table[] = { | |||
978 | { | 978 | { |
979 | .ctl_name = CTL_UNNUMBERED, | 979 | .ctl_name = CTL_UNNUMBERED, |
980 | .procname = "nr_overcommit_hugepages", | 980 | .procname = "nr_overcommit_hugepages", |
981 | .data = &nr_overcommit_huge_pages, | 981 | .data = &sysctl_overcommit_huge_pages, |
982 | .maxlen = sizeof(nr_overcommit_huge_pages), | 982 | .maxlen = sizeof(sysctl_overcommit_huge_pages), |
983 | .mode = 0644, | 983 | .mode = 0644, |
984 | .proc_handler = &hugetlb_overcommit_handler, | 984 | .proc_handler = &hugetlb_overcommit_handler, |
985 | }, | 985 | }, |
diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl index 62b1287932ed..41468035473c 100644 --- a/kernel/timeconst.pl +++ b/kernel/timeconst.pl | |||
@@ -339,7 +339,7 @@ sub output($@) | |||
339 | print "\n"; | 339 | print "\n"; |
340 | 340 | ||
341 | foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', | 341 | foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', |
342 | 'USEC_TO_HZ','HZ_TO_USEC') { | 342 | 'HZ_TO_USEC','USEC_TO_HZ') { |
343 | foreach $bit (32, 64) { | 343 | foreach $bit (32, 64) { |
344 | foreach $suf ('MUL', 'ADJ', 'SHR') { | 344 | foreach $suf ('MUL', 'ADJ', 'SHR') { |
345 | printf "#define %-23s %s\n", | 345 | printf "#define %-23s %s\n", |
diff --git a/kernel/user.c b/kernel/user.c index 7d7900c5a1fd..7132022a040c 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -57,7 +57,7 @@ struct user_struct root_user = { | |||
57 | .uid_keyring = &root_user_keyring, | 57 | .uid_keyring = &root_user_keyring, |
58 | .session_keyring = &root_session_keyring, | 58 | .session_keyring = &root_session_keyring, |
59 | #endif | 59 | #endif |
60 | #ifdef CONFIG_FAIR_USER_SCHED | 60 | #ifdef CONFIG_USER_SCHED |
61 | .tg = &init_task_group, | 61 | .tg = &init_task_group, |
62 | #endif | 62 | #endif |
63 | }; | 63 | }; |
@@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | |||
90 | return NULL; | 90 | return NULL; |
91 | } | 91 | } |
92 | 92 | ||
93 | #ifdef CONFIG_FAIR_USER_SCHED | 93 | #ifdef CONFIG_USER_SCHED |
94 | 94 | ||
95 | static void sched_destroy_user(struct user_struct *up) | 95 | static void sched_destroy_user(struct user_struct *up) |
96 | { | 96 | { |
@@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p) | |||
113 | sched_move_task(p); | 113 | sched_move_task(p); |
114 | } | 114 | } |
115 | 115 | ||
116 | #else /* CONFIG_FAIR_USER_SCHED */ | 116 | #else /* CONFIG_USER_SCHED */ |
117 | 117 | ||
118 | static void sched_destroy_user(struct user_struct *up) { } | 118 | static void sched_destroy_user(struct user_struct *up) { } |
119 | static int sched_create_user(struct user_struct *up) { return 0; } | 119 | static int sched_create_user(struct user_struct *up) { return 0; } |
120 | static void sched_switch_user(struct task_struct *p) { } | 120 | static void sched_switch_user(struct task_struct *p) { } |
121 | 121 | ||
122 | #endif /* CONFIG_FAIR_USER_SCHED */ | 122 | #endif /* CONFIG_USER_SCHED */ |
123 | 123 | ||
124 | #if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS) | 124 | #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) |
125 | 125 | ||
126 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ | 126 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ |
127 | static DEFINE_MUTEX(uids_mutex); | 127 | static DEFINE_MUTEX(uids_mutex); |
@@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void) | |||
137 | } | 137 | } |
138 | 138 | ||
139 | /* uid directory attributes */ | 139 | /* uid directory attributes */ |
140 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
140 | static ssize_t cpu_shares_show(struct kobject *kobj, | 141 | static ssize_t cpu_shares_show(struct kobject *kobj, |
141 | struct kobj_attribute *attr, | 142 | struct kobj_attribute *attr, |
142 | char *buf) | 143 | char *buf) |
@@ -163,10 +164,45 @@ static ssize_t cpu_shares_store(struct kobject *kobj, | |||
163 | 164 | ||
164 | static struct kobj_attribute cpu_share_attr = | 165 | static struct kobj_attribute cpu_share_attr = |
165 | __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); | 166 | __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); |
167 | #endif | ||
168 | |||
169 | #ifdef CONFIG_RT_GROUP_SCHED | ||
170 | static ssize_t cpu_rt_runtime_show(struct kobject *kobj, | ||
171 | struct kobj_attribute *attr, | ||
172 | char *buf) | ||
173 | { | ||
174 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
175 | |||
176 | return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg)); | ||
177 | } | ||
178 | |||
179 | static ssize_t cpu_rt_runtime_store(struct kobject *kobj, | ||
180 | struct kobj_attribute *attr, | ||
181 | const char *buf, size_t size) | ||
182 | { | ||
183 | struct user_struct *up = container_of(kobj, struct user_struct, kobj); | ||
184 | unsigned long rt_runtime; | ||
185 | int rc; | ||
186 | |||
187 | sscanf(buf, "%lu", &rt_runtime); | ||
188 | |||
189 | rc = sched_group_set_rt_runtime(up->tg, rt_runtime); | ||
190 | |||
191 | return (rc ? rc : size); | ||
192 | } | ||
193 | |||
194 | static struct kobj_attribute cpu_rt_runtime_attr = | ||
195 | __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); | ||
196 | #endif | ||
166 | 197 | ||
167 | /* default attributes per uid directory */ | 198 | /* default attributes per uid directory */ |
168 | static struct attribute *uids_attributes[] = { | 199 | static struct attribute *uids_attributes[] = { |
200 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
169 | &cpu_share_attr.attr, | 201 | &cpu_share_attr.attr, |
202 | #endif | ||
203 | #ifdef CONFIG_RT_GROUP_SCHED | ||
204 | &cpu_rt_runtime_attr.attr, | ||
205 | #endif | ||
170 | NULL | 206 | NULL |
171 | }; | 207 | }; |
172 | 208 | ||
@@ -269,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags) | |||
269 | schedule_work(&up->work); | 305 | schedule_work(&up->work); |
270 | } | 306 | } |
271 | 307 | ||
272 | #else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */ | 308 | #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ |
273 | 309 | ||
274 | int uids_sysfs_init(void) { return 0; } | 310 | int uids_sysfs_init(void) { return 0; } |
275 | static inline int uids_user_create(struct user_struct *up) { return 0; } | 311 | static inline int uids_user_create(struct user_struct *up) { return 0; } |
@@ -373,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
373 | spin_lock_irq(&uidhash_lock); | 409 | spin_lock_irq(&uidhash_lock); |
374 | up = uid_hash_find(uid, hashent); | 410 | up = uid_hash_find(uid, hashent); |
375 | if (up) { | 411 | if (up) { |
376 | /* This case is not possible when CONFIG_FAIR_USER_SCHED | 412 | /* This case is not possible when CONFIG_USER_SCHED |
377 | * is defined, since we serialize alloc_uid() using | 413 | * is defined, since we serialize alloc_uid() using |
378 | * uids_mutex. Hence no need to call | 414 | * uids_mutex. Hence no need to call |
379 | * sched_destroy_user() or remove_user_sysfs_dir(). | 415 | * sched_destroy_user() or remove_user_sysfs_dir(). |