diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-01-10 21:43:52 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-10 21:43:52 -0500 |
commit | 99cd7074891f87c49660e3b2880564324a4733ac (patch) | |
tree | 903d2665bcb445f1f265d1adf7a99f265bcefc15 /kernel | |
parent | e8a9cbf6ae620d9e5ba9cb42001c033287a284a3 (diff) | |
parent | c59765042f53a79a7a65585042ff463b69cb248c (diff) |
Merge commit 'v2.6.29-rc1' into tracing/urgent
Diffstat (limited to 'kernel')
46 files changed, 1821 insertions, 782 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index e1c5bf3365c0..2921d90ce32f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o | 12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ |
13 | async.o | ||
13 | 14 | ||
14 | ifdef CONFIG_FUNCTION_TRACER | 15 | ifdef CONFIG_FUNCTION_TRACER |
15 | # Do not trace debug files and internal ftrace files | 16 | # Do not trace debug files and internal ftrace files |
diff --git a/kernel/async.c b/kernel/async.c new file mode 100644 index 000000000000..f286e9f2b736 --- /dev/null +++ b/kernel/async.c | |||
@@ -0,0 +1,335 @@ | |||
1 | /* | ||
2 | * async.c: Asynchronous function calls for boot performance | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * Author: Arjan van de Ven <arjan@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | */ | ||
12 | |||
13 | |||
14 | /* | ||
15 | |||
16 | Goals and Theory of Operation | ||
17 | |||
18 | The primary goal of this feature is to reduce the kernel boot time, | ||
19 | by doing various independent hardware delays and discovery operations | ||
20 | decoupled and not strictly serialized. | ||
21 | |||
22 | More specifically, the asynchronous function call concept allows | ||
23 | certain operations (primarily during system boot) to happen | ||
24 | asynchronously, out of order, while these operations still | ||
25 | have their externally visible parts happen sequentially and in-order. | ||
26 | (not unlike how out-of-order CPUs retire their instructions in order) | ||
27 | |||
28 | Key to the asynchronous function call implementation is the concept of | ||
29 | a "sequence cookie" (which, although it has an abstracted type, can be | ||
30 | thought of as a monotonically incrementing number). | ||
31 | |||
32 | The async core will assign each scheduled event such a sequence cookie and | ||
33 | pass this to the called functions. | ||
34 | |||
35 | The asynchronously called function should before doing a globally visible | ||
36 | operation, such as registering device numbers, call the | ||
37 | async_synchronize_cookie() function and pass in its own cookie. The | ||
38 | async_synchronize_cookie() function will make sure that all asynchronous | ||
39 | operations that were scheduled prior to the operation corresponding with the | ||
40 | cookie have completed. | ||
41 | |||
42 | Subsystem/driver initialization code that scheduled asynchronous probe | ||
43 | functions, but which shares global resources with other drivers/subsystems | ||
44 | that do not use the asynchronous call feature, need to do a full | ||
45 | synchronization with the async_synchronize_full() function, before returning | ||
46 | from their init function. This is to maintain strict ordering between the | ||
47 | asynchronous and synchronous parts of the kernel. | ||
48 | |||
49 | */ | ||
50 | |||
51 | #include <linux/async.h> | ||
52 | #include <linux/module.h> | ||
53 | #include <linux/wait.h> | ||
54 | #include <linux/sched.h> | ||
55 | #include <linux/init.h> | ||
56 | #include <linux/kthread.h> | ||
57 | #include <asm/atomic.h> | ||
58 | |||
59 | static async_cookie_t next_cookie = 1; | ||
60 | |||
61 | #define MAX_THREADS 256 | ||
62 | #define MAX_WORK 32768 | ||
63 | |||
64 | static LIST_HEAD(async_pending); | ||
65 | static LIST_HEAD(async_running); | ||
66 | static DEFINE_SPINLOCK(async_lock); | ||
67 | |||
68 | static int async_enabled = 0; | ||
69 | |||
70 | struct async_entry { | ||
71 | struct list_head list; | ||
72 | async_cookie_t cookie; | ||
73 | async_func_ptr *func; | ||
74 | void *data; | ||
75 | struct list_head *running; | ||
76 | }; | ||
77 | |||
78 | static DECLARE_WAIT_QUEUE_HEAD(async_done); | ||
79 | static DECLARE_WAIT_QUEUE_HEAD(async_new); | ||
80 | |||
81 | static atomic_t entry_count; | ||
82 | static atomic_t thread_count; | ||
83 | |||
84 | extern int initcall_debug; | ||
85 | |||
86 | |||
87 | /* | ||
88 | * MUST be called with the lock held! | ||
89 | */ | ||
90 | static async_cookie_t __lowest_in_progress(struct list_head *running) | ||
91 | { | ||
92 | struct async_entry *entry; | ||
93 | if (!list_empty(&async_pending)) { | ||
94 | entry = list_first_entry(&async_pending, | ||
95 | struct async_entry, list); | ||
96 | return entry->cookie; | ||
97 | } else if (!list_empty(running)) { | ||
98 | entry = list_first_entry(running, | ||
99 | struct async_entry, list); | ||
100 | return entry->cookie; | ||
101 | } else { | ||
102 | /* nothing in progress... next_cookie is "infinity" */ | ||
103 | return next_cookie; | ||
104 | } | ||
105 | |||
106 | } | ||
107 | /* | ||
108 | * pick the first pending entry and run it | ||
109 | */ | ||
110 | static void run_one_entry(void) | ||
111 | { | ||
112 | unsigned long flags; | ||
113 | struct async_entry *entry; | ||
114 | ktime_t calltime, delta, rettime; | ||
115 | |||
116 | /* 1) pick one task from the pending queue */ | ||
117 | |||
118 | spin_lock_irqsave(&async_lock, flags); | ||
119 | if (list_empty(&async_pending)) | ||
120 | goto out; | ||
121 | entry = list_first_entry(&async_pending, struct async_entry, list); | ||
122 | |||
123 | /* 2) move it to the running queue */ | ||
124 | list_del(&entry->list); | ||
125 | list_add_tail(&entry->list, &async_running); | ||
126 | spin_unlock_irqrestore(&async_lock, flags); | ||
127 | |||
128 | /* 3) run it (and print duration)*/ | ||
129 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
130 | printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); | ||
131 | calltime = ktime_get(); | ||
132 | } | ||
133 | entry->func(entry->data, entry->cookie); | ||
134 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
135 | rettime = ktime_get(); | ||
136 | delta = ktime_sub(rettime, calltime); | ||
137 | printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, | ||
138 | entry->func, ktime_to_ns(delta) >> 10); | ||
139 | } | ||
140 | |||
141 | /* 4) remove it from the running queue */ | ||
142 | spin_lock_irqsave(&async_lock, flags); | ||
143 | list_del(&entry->list); | ||
144 | |||
145 | /* 5) free the entry */ | ||
146 | kfree(entry); | ||
147 | atomic_dec(&entry_count); | ||
148 | |||
149 | spin_unlock_irqrestore(&async_lock, flags); | ||
150 | |||
151 | /* 6) wake up any waiters. */ | ||
152 | wake_up(&async_done); | ||
153 | return; | ||
154 | |||
155 | out: | ||
156 | spin_unlock_irqrestore(&async_lock, flags); | ||
157 | } | ||
158 | |||
159 | |||
160 | static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running) | ||
161 | { | ||
162 | struct async_entry *entry; | ||
163 | unsigned long flags; | ||
164 | async_cookie_t newcookie; | ||
165 | |||
166 | |||
167 | /* allow irq-off callers */ | ||
168 | entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC); | ||
169 | |||
170 | /* | ||
171 | * If we're out of memory or if there's too much work | ||
172 | * pending already, we execute synchronously. | ||
173 | */ | ||
174 | if (!async_enabled || !entry || atomic_read(&entry_count) > MAX_WORK) { | ||
175 | kfree(entry); | ||
176 | spin_lock_irqsave(&async_lock, flags); | ||
177 | newcookie = next_cookie++; | ||
178 | spin_unlock_irqrestore(&async_lock, flags); | ||
179 | |||
180 | /* low on memory.. run synchronously */ | ||
181 | ptr(data, newcookie); | ||
182 | return newcookie; | ||
183 | } | ||
184 | entry->func = ptr; | ||
185 | entry->data = data; | ||
186 | entry->running = running; | ||
187 | |||
188 | spin_lock_irqsave(&async_lock, flags); | ||
189 | newcookie = entry->cookie = next_cookie++; | ||
190 | list_add_tail(&entry->list, &async_pending); | ||
191 | atomic_inc(&entry_count); | ||
192 | spin_unlock_irqrestore(&async_lock, flags); | ||
193 | wake_up(&async_new); | ||
194 | return newcookie; | ||
195 | } | ||
196 | |||
197 | async_cookie_t async_schedule(async_func_ptr *ptr, void *data) | ||
198 | { | ||
199 | return __async_schedule(ptr, data, &async_pending); | ||
200 | } | ||
201 | EXPORT_SYMBOL_GPL(async_schedule); | ||
202 | |||
203 | async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) | ||
204 | { | ||
205 | return __async_schedule(ptr, data, running); | ||
206 | } | ||
207 | EXPORT_SYMBOL_GPL(async_schedule_special); | ||
208 | |||
209 | void async_synchronize_full(void) | ||
210 | { | ||
211 | do { | ||
212 | async_synchronize_cookie(next_cookie); | ||
213 | } while (!list_empty(&async_running) || !list_empty(&async_pending)); | ||
214 | } | ||
215 | EXPORT_SYMBOL_GPL(async_synchronize_full); | ||
216 | |||
217 | void async_synchronize_full_special(struct list_head *list) | ||
218 | { | ||
219 | async_synchronize_cookie_special(next_cookie, list); | ||
220 | } | ||
221 | EXPORT_SYMBOL_GPL(async_synchronize_full_special); | ||
222 | |||
223 | void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) | ||
224 | { | ||
225 | ktime_t starttime, delta, endtime; | ||
226 | |||
227 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
228 | printk("async_waiting @ %i\n", task_pid_nr(current)); | ||
229 | starttime = ktime_get(); | ||
230 | } | ||
231 | |||
232 | wait_event(async_done, __lowest_in_progress(running) >= cookie); | ||
233 | |||
234 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
235 | endtime = ktime_get(); | ||
236 | delta = ktime_sub(endtime, starttime); | ||
237 | |||
238 | printk("async_continuing @ %i after %lli usec\n", | ||
239 | task_pid_nr(current), ktime_to_ns(delta) >> 10); | ||
240 | } | ||
241 | } | ||
242 | EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); | ||
243 | |||
244 | void async_synchronize_cookie(async_cookie_t cookie) | ||
245 | { | ||
246 | async_synchronize_cookie_special(cookie, &async_running); | ||
247 | } | ||
248 | EXPORT_SYMBOL_GPL(async_synchronize_cookie); | ||
249 | |||
250 | |||
251 | static int async_thread(void *unused) | ||
252 | { | ||
253 | DECLARE_WAITQUEUE(wq, current); | ||
254 | add_wait_queue(&async_new, &wq); | ||
255 | |||
256 | while (!kthread_should_stop()) { | ||
257 | int ret = HZ; | ||
258 | set_current_state(TASK_INTERRUPTIBLE); | ||
259 | /* | ||
260 | * check the list head without lock.. false positives | ||
261 | * are dealt with inside run_one_entry() while holding | ||
262 | * the lock. | ||
263 | */ | ||
264 | rmb(); | ||
265 | if (!list_empty(&async_pending)) | ||
266 | run_one_entry(); | ||
267 | else | ||
268 | ret = schedule_timeout(HZ); | ||
269 | |||
270 | if (ret == 0) { | ||
271 | /* | ||
272 | * we timed out, this means we as thread are redundant. | ||
273 | * we sign off and die, but we to avoid any races there | ||
274 | * is a last-straw check to see if work snuck in. | ||
275 | */ | ||
276 | atomic_dec(&thread_count); | ||
277 | wmb(); /* manager must see our departure first */ | ||
278 | if (list_empty(&async_pending)) | ||
279 | break; | ||
280 | /* | ||
281 | * woops work came in between us timing out and us | ||
282 | * signing off; we need to stay alive and keep working. | ||
283 | */ | ||
284 | atomic_inc(&thread_count); | ||
285 | } | ||
286 | } | ||
287 | remove_wait_queue(&async_new, &wq); | ||
288 | |||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | static int async_manager_thread(void *unused) | ||
293 | { | ||
294 | DECLARE_WAITQUEUE(wq, current); | ||
295 | add_wait_queue(&async_new, &wq); | ||
296 | |||
297 | while (!kthread_should_stop()) { | ||
298 | int tc, ec; | ||
299 | |||
300 | set_current_state(TASK_INTERRUPTIBLE); | ||
301 | |||
302 | tc = atomic_read(&thread_count); | ||
303 | rmb(); | ||
304 | ec = atomic_read(&entry_count); | ||
305 | |||
306 | while (tc < ec && tc < MAX_THREADS) { | ||
307 | kthread_run(async_thread, NULL, "async/%i", tc); | ||
308 | atomic_inc(&thread_count); | ||
309 | tc++; | ||
310 | } | ||
311 | |||
312 | schedule(); | ||
313 | } | ||
314 | remove_wait_queue(&async_new, &wq); | ||
315 | |||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static int __init async_init(void) | ||
320 | { | ||
321 | if (async_enabled) | ||
322 | kthread_run(async_manager_thread, NULL, "async/mgr"); | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | static int __init setup_async(char *str) | ||
327 | { | ||
328 | async_enabled = 1; | ||
329 | return 1; | ||
330 | } | ||
331 | |||
332 | __setup("fastboot", setup_async); | ||
333 | |||
334 | |||
335 | core_initcall(async_init); | ||
diff --git a/kernel/capability.c b/kernel/capability.c index c598d9d5be4f..688926e496be 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -306,7 +306,7 @@ int capable(int cap) | |||
306 | BUG(); | 306 | BUG(); |
307 | } | 307 | } |
308 | 308 | ||
309 | if (has_capability(current, cap)) { | 309 | if (security_capable(cap) == 0) { |
310 | current->flags |= PF_SUPERPRIV; | 310 | current->flags |= PF_SUPERPRIV; |
311 | return 1; | 311 | return 1; |
312 | } | 312 | } |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 87bb0258fd27..c29831076e7a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -84,7 +84,7 @@ struct cgroupfs_root { | |||
84 | /* Tracks how many cgroups are currently defined in hierarchy.*/ | 84 | /* Tracks how many cgroups are currently defined in hierarchy.*/ |
85 | int number_of_cgroups; | 85 | int number_of_cgroups; |
86 | 86 | ||
87 | /* A list running through the mounted hierarchies */ | 87 | /* A list running through the active hierarchies */ |
88 | struct list_head root_list; | 88 | struct list_head root_list; |
89 | 89 | ||
90 | /* Hierarchy-specific flags */ | 90 | /* Hierarchy-specific flags */ |
@@ -116,7 +116,6 @@ static int root_count; | |||
116 | * be called. | 116 | * be called. |
117 | */ | 117 | */ |
118 | static int need_forkexit_callback __read_mostly; | 118 | static int need_forkexit_callback __read_mostly; |
119 | static int need_mm_owner_callback __read_mostly; | ||
120 | 119 | ||
121 | /* convenient tests for these bits */ | 120 | /* convenient tests for these bits */ |
122 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 121 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
@@ -149,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
149 | #define for_each_subsys(_root, _ss) \ | 148 | #define for_each_subsys(_root, _ss) \ |
150 | list_for_each_entry(_ss, &_root->subsys_list, sibling) | 149 | list_for_each_entry(_ss, &_root->subsys_list, sibling) |
151 | 150 | ||
152 | /* for_each_root() allows you to iterate across the active hierarchies */ | 151 | /* for_each_active_root() allows you to iterate across the active hierarchies */ |
153 | #define for_each_root(_root) \ | 152 | #define for_each_active_root(_root) \ |
154 | list_for_each_entry(_root, &roots, root_list) | 153 | list_for_each_entry(_root, &roots, root_list) |
155 | 154 | ||
156 | /* the list of cgroups eligible for automatic release. Protected by | 155 | /* the list of cgroups eligible for automatic release. Protected by |
@@ -272,7 +271,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
272 | 271 | ||
273 | rcu_read_lock(); | 272 | rcu_read_lock(); |
274 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 273 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
275 | struct cgroup *cgrp = cg->subsys[i]->cgroup; | 274 | struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); |
276 | if (atomic_dec_and_test(&cgrp->count) && | 275 | if (atomic_dec_and_test(&cgrp->count) && |
277 | notify_on_release(cgrp)) { | 276 | notify_on_release(cgrp)) { |
278 | if (taskexit) | 277 | if (taskexit) |
@@ -385,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp) | |||
385 | return 0; | 384 | return 0; |
386 | } | 385 | } |
387 | 386 | ||
387 | /** | ||
388 | * link_css_set - a helper function to link a css_set to a cgroup | ||
389 | * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links() | ||
390 | * @cg: the css_set to be linked | ||
391 | * @cgrp: the destination cgroup | ||
392 | */ | ||
393 | static void link_css_set(struct list_head *tmp_cg_links, | ||
394 | struct css_set *cg, struct cgroup *cgrp) | ||
395 | { | ||
396 | struct cg_cgroup_link *link; | ||
397 | |||
398 | BUG_ON(list_empty(tmp_cg_links)); | ||
399 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, | ||
400 | cgrp_link_list); | ||
401 | link->cg = cg; | ||
402 | list_move(&link->cgrp_link_list, &cgrp->css_sets); | ||
403 | list_add(&link->cg_link_list, &cg->cg_links); | ||
404 | } | ||
405 | |||
388 | /* | 406 | /* |
389 | * find_css_set() takes an existing cgroup group and a | 407 | * find_css_set() takes an existing cgroup group and a |
390 | * cgroup object, and returns a css_set object that's | 408 | * cgroup object, and returns a css_set object that's |
@@ -400,7 +418,6 @@ static struct css_set *find_css_set( | |||
400 | int i; | 418 | int i; |
401 | 419 | ||
402 | struct list_head tmp_cg_links; | 420 | struct list_head tmp_cg_links; |
403 | struct cg_cgroup_link *link; | ||
404 | 421 | ||
405 | struct hlist_head *hhead; | 422 | struct hlist_head *hhead; |
406 | 423 | ||
@@ -445,26 +462,11 @@ static struct css_set *find_css_set( | |||
445 | * only do it for the first subsystem in each | 462 | * only do it for the first subsystem in each |
446 | * hierarchy | 463 | * hierarchy |
447 | */ | 464 | */ |
448 | if (ss->root->subsys_list.next == &ss->sibling) { | 465 | if (ss->root->subsys_list.next == &ss->sibling) |
449 | BUG_ON(list_empty(&tmp_cg_links)); | 466 | link_css_set(&tmp_cg_links, res, cgrp); |
450 | link = list_entry(tmp_cg_links.next, | ||
451 | struct cg_cgroup_link, | ||
452 | cgrp_link_list); | ||
453 | list_del(&link->cgrp_link_list); | ||
454 | list_add(&link->cgrp_link_list, &cgrp->css_sets); | ||
455 | link->cg = res; | ||
456 | list_add(&link->cg_link_list, &res->cg_links); | ||
457 | } | ||
458 | } | ||
459 | if (list_empty(&rootnode.subsys_list)) { | ||
460 | link = list_entry(tmp_cg_links.next, | ||
461 | struct cg_cgroup_link, | ||
462 | cgrp_link_list); | ||
463 | list_del(&link->cgrp_link_list); | ||
464 | list_add(&link->cgrp_link_list, &dummytop->css_sets); | ||
465 | link->cg = res; | ||
466 | list_add(&link->cg_link_list, &res->cg_links); | ||
467 | } | 467 | } |
468 | if (list_empty(&rootnode.subsys_list)) | ||
469 | link_css_set(&tmp_cg_links, res, dummytop); | ||
468 | 470 | ||
469 | BUG_ON(!list_empty(&tmp_cg_links)); | 471 | BUG_ON(!list_empty(&tmp_cg_links)); |
470 | 472 | ||
@@ -587,11 +589,18 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
587 | { | 589 | { |
588 | struct cgroup_subsys *ss; | 590 | struct cgroup_subsys *ss; |
589 | for_each_subsys(cgrp->root, ss) | 591 | for_each_subsys(cgrp->root, ss) |
590 | if (ss->pre_destroy && cgrp->subsys[ss->subsys_id]) | 592 | if (ss->pre_destroy) |
591 | ss->pre_destroy(ss, cgrp); | 593 | ss->pre_destroy(ss, cgrp); |
592 | return; | 594 | return; |
593 | } | 595 | } |
594 | 596 | ||
597 | static void free_cgroup_rcu(struct rcu_head *obj) | ||
598 | { | ||
599 | struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head); | ||
600 | |||
601 | kfree(cgrp); | ||
602 | } | ||
603 | |||
595 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 604 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
596 | { | 605 | { |
597 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 606 | /* is dentry a directory ? if so, kfree() associated cgroup */ |
@@ -611,19 +620,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
611 | /* | 620 | /* |
612 | * Release the subsystem state objects. | 621 | * Release the subsystem state objects. |
613 | */ | 622 | */ |
614 | for_each_subsys(cgrp->root, ss) { | 623 | for_each_subsys(cgrp->root, ss) |
615 | if (cgrp->subsys[ss->subsys_id]) | 624 | ss->destroy(ss, cgrp); |
616 | ss->destroy(ss, cgrp); | ||
617 | } | ||
618 | 625 | ||
619 | cgrp->root->number_of_cgroups--; | 626 | cgrp->root->number_of_cgroups--; |
620 | mutex_unlock(&cgroup_mutex); | 627 | mutex_unlock(&cgroup_mutex); |
621 | 628 | ||
622 | /* Drop the active superblock reference that we took when we | 629 | /* |
623 | * created the cgroup */ | 630 | * Drop the active superblock reference that we took when we |
631 | * created the cgroup | ||
632 | */ | ||
624 | deactivate_super(cgrp->root->sb); | 633 | deactivate_super(cgrp->root->sb); |
625 | 634 | ||
626 | kfree(cgrp); | 635 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); |
627 | } | 636 | } |
628 | iput(inode); | 637 | iput(inode); |
629 | } | 638 | } |
@@ -713,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
713 | BUG_ON(cgrp->subsys[i]); | 722 | BUG_ON(cgrp->subsys[i]); |
714 | BUG_ON(!dummytop->subsys[i]); | 723 | BUG_ON(!dummytop->subsys[i]); |
715 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); | 724 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); |
725 | mutex_lock(&ss->hierarchy_mutex); | ||
716 | cgrp->subsys[i] = dummytop->subsys[i]; | 726 | cgrp->subsys[i] = dummytop->subsys[i]; |
717 | cgrp->subsys[i]->cgroup = cgrp; | 727 | cgrp->subsys[i]->cgroup = cgrp; |
718 | list_add(&ss->sibling, &root->subsys_list); | 728 | list_move(&ss->sibling, &root->subsys_list); |
719 | rcu_assign_pointer(ss->root, root); | 729 | ss->root = root; |
720 | if (ss->bind) | 730 | if (ss->bind) |
721 | ss->bind(ss, cgrp); | 731 | ss->bind(ss, cgrp); |
722 | 732 | mutex_unlock(&ss->hierarchy_mutex); | |
723 | } else if (bit & removed_bits) { | 733 | } else if (bit & removed_bits) { |
724 | /* We're removing this subsystem */ | 734 | /* We're removing this subsystem */ |
725 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); | 735 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); |
726 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 736 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); |
737 | mutex_lock(&ss->hierarchy_mutex); | ||
727 | if (ss->bind) | 738 | if (ss->bind) |
728 | ss->bind(ss, dummytop); | 739 | ss->bind(ss, dummytop); |
729 | dummytop->subsys[i]->cgroup = dummytop; | 740 | dummytop->subsys[i]->cgroup = dummytop; |
730 | cgrp->subsys[i] = NULL; | 741 | cgrp->subsys[i] = NULL; |
731 | rcu_assign_pointer(subsys[i]->root, &rootnode); | 742 | subsys[i]->root = &rootnode; |
732 | list_del(&ss->sibling); | 743 | list_move(&ss->sibling, &rootnode.subsys_list); |
744 | mutex_unlock(&ss->hierarchy_mutex); | ||
733 | } else if (bit & final_bits) { | 745 | } else if (bit & final_bits) { |
734 | /* Subsystem state should already exist */ | 746 | /* Subsystem state should already exist */ |
735 | BUG_ON(!cgrp->subsys[i]); | 747 | BUG_ON(!cgrp->subsys[i]); |
@@ -991,7 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
991 | root = NULL; | 1003 | root = NULL; |
992 | } else { | 1004 | } else { |
993 | /* New superblock */ | 1005 | /* New superblock */ |
994 | struct cgroup *cgrp = &root->top_cgroup; | 1006 | struct cgroup *root_cgrp = &root->top_cgroup; |
995 | struct inode *inode; | 1007 | struct inode *inode; |
996 | int i; | 1008 | int i; |
997 | 1009 | ||
@@ -1032,7 +1044,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1032 | list_add(&root->root_list, &roots); | 1044 | list_add(&root->root_list, &roots); |
1033 | root_count++; | 1045 | root_count++; |
1034 | 1046 | ||
1035 | sb->s_root->d_fsdata = &root->top_cgroup; | 1047 | sb->s_root->d_fsdata = root_cgrp; |
1036 | root->top_cgroup.dentry = sb->s_root; | 1048 | root->top_cgroup.dentry = sb->s_root; |
1037 | 1049 | ||
1038 | /* Link the top cgroup in this hierarchy into all | 1050 | /* Link the top cgroup in this hierarchy into all |
@@ -1043,29 +1055,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1043 | struct hlist_node *node; | 1055 | struct hlist_node *node; |
1044 | struct css_set *cg; | 1056 | struct css_set *cg; |
1045 | 1057 | ||
1046 | hlist_for_each_entry(cg, node, hhead, hlist) { | 1058 | hlist_for_each_entry(cg, node, hhead, hlist) |
1047 | struct cg_cgroup_link *link; | 1059 | link_css_set(&tmp_cg_links, cg, root_cgrp); |
1048 | |||
1049 | BUG_ON(list_empty(&tmp_cg_links)); | ||
1050 | link = list_entry(tmp_cg_links.next, | ||
1051 | struct cg_cgroup_link, | ||
1052 | cgrp_link_list); | ||
1053 | list_del(&link->cgrp_link_list); | ||
1054 | link->cg = cg; | ||
1055 | list_add(&link->cgrp_link_list, | ||
1056 | &root->top_cgroup.css_sets); | ||
1057 | list_add(&link->cg_link_list, &cg->cg_links); | ||
1058 | } | ||
1059 | } | 1060 | } |
1060 | write_unlock(&css_set_lock); | 1061 | write_unlock(&css_set_lock); |
1061 | 1062 | ||
1062 | free_cg_links(&tmp_cg_links); | 1063 | free_cg_links(&tmp_cg_links); |
1063 | 1064 | ||
1064 | BUG_ON(!list_empty(&cgrp->sibling)); | 1065 | BUG_ON(!list_empty(&root_cgrp->sibling)); |
1065 | BUG_ON(!list_empty(&cgrp->children)); | 1066 | BUG_ON(!list_empty(&root_cgrp->children)); |
1066 | BUG_ON(root->number_of_cgroups != 1); | 1067 | BUG_ON(root->number_of_cgroups != 1); |
1067 | 1068 | ||
1068 | cgroup_populate_dir(cgrp); | 1069 | cgroup_populate_dir(root_cgrp); |
1069 | mutex_unlock(&inode->i_mutex); | 1070 | mutex_unlock(&inode->i_mutex); |
1070 | mutex_unlock(&cgroup_mutex); | 1071 | mutex_unlock(&cgroup_mutex); |
1071 | } | 1072 | } |
@@ -1114,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1114 | } | 1115 | } |
1115 | write_unlock(&css_set_lock); | 1116 | write_unlock(&css_set_lock); |
1116 | 1117 | ||
1117 | if (!list_empty(&root->root_list)) { | 1118 | list_del(&root->root_list); |
1118 | list_del(&root->root_list); | 1119 | root_count--; |
1119 | root_count--; | 1120 | |
1120 | } | ||
1121 | mutex_unlock(&cgroup_mutex); | 1121 | mutex_unlock(&cgroup_mutex); |
1122 | 1122 | ||
1123 | kfree(root); | 1123 | kfree(root); |
@@ -1146,14 +1146,16 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
1146 | * @buf: the buffer to write the path into | 1146 | * @buf: the buffer to write the path into |
1147 | * @buflen: the length of the buffer | 1147 | * @buflen: the length of the buffer |
1148 | * | 1148 | * |
1149 | * Called with cgroup_mutex held. Writes path of cgroup into buf. | 1149 | * Called with cgroup_mutex held or else with an RCU-protected cgroup |
1150 | * Returns 0 on success, -errno on error. | 1150 | * reference. Writes path of cgroup into buf. Returns 0 on success, |
1151 | * -errno on error. | ||
1151 | */ | 1152 | */ |
1152 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | 1153 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) |
1153 | { | 1154 | { |
1154 | char *start; | 1155 | char *start; |
1156 | struct dentry *dentry = rcu_dereference(cgrp->dentry); | ||
1155 | 1157 | ||
1156 | if (cgrp == dummytop) { | 1158 | if (!dentry || cgrp == dummytop) { |
1157 | /* | 1159 | /* |
1158 | * Inactive subsystems have no dentry for their root | 1160 | * Inactive subsystems have no dentry for their root |
1159 | * cgroup | 1161 | * cgroup |
@@ -1166,13 +1168,14 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1166 | 1168 | ||
1167 | *--start = '\0'; | 1169 | *--start = '\0'; |
1168 | for (;;) { | 1170 | for (;;) { |
1169 | int len = cgrp->dentry->d_name.len; | 1171 | int len = dentry->d_name.len; |
1170 | if ((start -= len) < buf) | 1172 | if ((start -= len) < buf) |
1171 | return -ENAMETOOLONG; | 1173 | return -ENAMETOOLONG; |
1172 | memcpy(start, cgrp->dentry->d_name.name, len); | 1174 | memcpy(start, cgrp->dentry->d_name.name, len); |
1173 | cgrp = cgrp->parent; | 1175 | cgrp = cgrp->parent; |
1174 | if (!cgrp) | 1176 | if (!cgrp) |
1175 | break; | 1177 | break; |
1178 | dentry = rcu_dereference(cgrp->dentry); | ||
1176 | if (!cgrp->parent) | 1179 | if (!cgrp->parent) |
1177 | continue; | 1180 | continue; |
1178 | if (--start < buf) | 1181 | if (--start < buf) |
@@ -1217,7 +1220,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1217 | int retval = 0; | 1220 | int retval = 0; |
1218 | struct cgroup_subsys *ss; | 1221 | struct cgroup_subsys *ss; |
1219 | struct cgroup *oldcgrp; | 1222 | struct cgroup *oldcgrp; |
1220 | struct css_set *cg = tsk->cgroups; | 1223 | struct css_set *cg; |
1221 | struct css_set *newcg; | 1224 | struct css_set *newcg; |
1222 | struct cgroupfs_root *root = cgrp->root; | 1225 | struct cgroupfs_root *root = cgrp->root; |
1223 | int subsys_id; | 1226 | int subsys_id; |
@@ -1237,11 +1240,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1237 | } | 1240 | } |
1238 | } | 1241 | } |
1239 | 1242 | ||
1243 | task_lock(tsk); | ||
1244 | cg = tsk->cgroups; | ||
1245 | get_css_set(cg); | ||
1246 | task_unlock(tsk); | ||
1240 | /* | 1247 | /* |
1241 | * Locate or allocate a new css_set for this task, | 1248 | * Locate or allocate a new css_set for this task, |
1242 | * based on its final set of cgroups | 1249 | * based on its final set of cgroups |
1243 | */ | 1250 | */ |
1244 | newcg = find_css_set(cg, cgrp); | 1251 | newcg = find_css_set(cg, cgrp); |
1252 | put_css_set(cg); | ||
1245 | if (!newcg) | 1253 | if (!newcg) |
1246 | return -ENOMEM; | 1254 | return -ENOMEM; |
1247 | 1255 | ||
@@ -1446,7 +1454,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
1446 | struct cftype *cft = __d_cft(file->f_dentry); | 1454 | struct cftype *cft = __d_cft(file->f_dentry); |
1447 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1455 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1448 | 1456 | ||
1449 | if (!cft || cgroup_is_removed(cgrp)) | 1457 | if (cgroup_is_removed(cgrp)) |
1450 | return -ENODEV; | 1458 | return -ENODEV; |
1451 | if (cft->write) | 1459 | if (cft->write) |
1452 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 1460 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
@@ -1491,7 +1499,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
1491 | struct cftype *cft = __d_cft(file->f_dentry); | 1499 | struct cftype *cft = __d_cft(file->f_dentry); |
1492 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1500 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1493 | 1501 | ||
1494 | if (!cft || cgroup_is_removed(cgrp)) | 1502 | if (cgroup_is_removed(cgrp)) |
1495 | return -ENODEV; | 1503 | return -ENODEV; |
1496 | 1504 | ||
1497 | if (cft->read) | 1505 | if (cft->read) |
@@ -1555,10 +1563,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
1555 | err = generic_file_open(inode, file); | 1563 | err = generic_file_open(inode, file); |
1556 | if (err) | 1564 | if (err) |
1557 | return err; | 1565 | return err; |
1558 | |||
1559 | cft = __d_cft(file->f_dentry); | 1566 | cft = __d_cft(file->f_dentry); |
1560 | if (!cft) | 1567 | |
1561 | return -ENODEV; | ||
1562 | if (cft->read_map || cft->read_seq_string) { | 1568 | if (cft->read_map || cft->read_seq_string) { |
1563 | struct cgroup_seqfile_state *state = | 1569 | struct cgroup_seqfile_state *state = |
1564 | kzalloc(sizeof(*state), GFP_USER); | 1570 | kzalloc(sizeof(*state), GFP_USER); |
@@ -1672,7 +1678,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | |||
1672 | if (!error) { | 1678 | if (!error) { |
1673 | dentry->d_fsdata = cgrp; | 1679 | dentry->d_fsdata = cgrp; |
1674 | inc_nlink(parent->d_inode); | 1680 | inc_nlink(parent->d_inode); |
1675 | cgrp->dentry = dentry; | 1681 | rcu_assign_pointer(cgrp->dentry, dentry); |
1676 | dget(dentry); | 1682 | dget(dentry); |
1677 | } | 1683 | } |
1678 | dput(dentry); | 1684 | dput(dentry); |
@@ -1813,6 +1819,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
1813 | { | 1819 | { |
1814 | struct task_struct *res; | 1820 | struct task_struct *res; |
1815 | struct list_head *l = it->task; | 1821 | struct list_head *l = it->task; |
1822 | struct cg_cgroup_link *link; | ||
1816 | 1823 | ||
1817 | /* If the iterator cg is NULL, we have no tasks */ | 1824 | /* If the iterator cg is NULL, we have no tasks */ |
1818 | if (!it->cg_link) | 1825 | if (!it->cg_link) |
@@ -1820,7 +1827,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
1820 | res = list_entry(l, struct task_struct, cg_list); | 1827 | res = list_entry(l, struct task_struct, cg_list); |
1821 | /* Advance iterator to find next entry */ | 1828 | /* Advance iterator to find next entry */ |
1822 | l = l->next; | 1829 | l = l->next; |
1823 | if (l == &res->cgroups->tasks) { | 1830 | link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list); |
1831 | if (l == &link->cg->tasks) { | ||
1824 | /* We reached the end of this task list - move on to | 1832 | /* We reached the end of this task list - move on to |
1825 | * the next cg_cgroup_link */ | 1833 | * the next cg_cgroup_link */ |
1826 | cgroup_advance_iter(cgrp, it); | 1834 | cgroup_advance_iter(cgrp, it); |
@@ -2014,14 +2022,16 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
2014 | */ | 2022 | */ |
2015 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | 2023 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) |
2016 | { | 2024 | { |
2017 | int n = 0; | 2025 | int n = 0, pid; |
2018 | struct cgroup_iter it; | 2026 | struct cgroup_iter it; |
2019 | struct task_struct *tsk; | 2027 | struct task_struct *tsk; |
2020 | cgroup_iter_start(cgrp, &it); | 2028 | cgroup_iter_start(cgrp, &it); |
2021 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2029 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
2022 | if (unlikely(n == npids)) | 2030 | if (unlikely(n == npids)) |
2023 | break; | 2031 | break; |
2024 | pidarray[n++] = task_pid_vnr(tsk); | 2032 | pid = task_pid_vnr(tsk); |
2033 | if (pid > 0) | ||
2034 | pidarray[n++] = pid; | ||
2025 | } | 2035 | } |
2026 | cgroup_iter_end(cgrp, &it); | 2036 | cgroup_iter_end(cgrp, &it); |
2027 | return n; | 2037 | return n; |
@@ -2053,7 +2063,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
2053 | 2063 | ||
2054 | ret = 0; | 2064 | ret = 0; |
2055 | cgrp = dentry->d_fsdata; | 2065 | cgrp = dentry->d_fsdata; |
2056 | rcu_read_lock(); | ||
2057 | 2066 | ||
2058 | cgroup_iter_start(cgrp, &it); | 2067 | cgroup_iter_start(cgrp, &it); |
2059 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2068 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
@@ -2078,7 +2087,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
2078 | } | 2087 | } |
2079 | cgroup_iter_end(cgrp, &it); | 2088 | cgroup_iter_end(cgrp, &it); |
2080 | 2089 | ||
2081 | rcu_read_unlock(); | ||
2082 | err: | 2090 | err: |
2083 | return ret; | 2091 | return ret; |
2084 | } | 2092 | } |
@@ -2325,7 +2333,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2325 | struct cgroup *cgrp) | 2333 | struct cgroup *cgrp) |
2326 | { | 2334 | { |
2327 | css->cgroup = cgrp; | 2335 | css->cgroup = cgrp; |
2328 | atomic_set(&css->refcnt, 0); | 2336 | atomic_set(&css->refcnt, 1); |
2329 | css->flags = 0; | 2337 | css->flags = 0; |
2330 | if (cgrp == dummytop) | 2338 | if (cgrp == dummytop) |
2331 | set_bit(CSS_ROOT, &css->flags); | 2339 | set_bit(CSS_ROOT, &css->flags); |
@@ -2333,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2333 | cgrp->subsys[ss->subsys_id] = css; | 2341 | cgrp->subsys[ss->subsys_id] = css; |
2334 | } | 2342 | } |
2335 | 2343 | ||
2344 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) | ||
2345 | { | ||
2346 | /* We need to take each hierarchy_mutex in a consistent order */ | ||
2347 | int i; | ||
2348 | |||
2349 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2350 | struct cgroup_subsys *ss = subsys[i]; | ||
2351 | if (ss->root == root) | ||
2352 | mutex_lock_nested(&ss->hierarchy_mutex, i); | ||
2353 | } | ||
2354 | } | ||
2355 | |||
2356 | static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | ||
2357 | { | ||
2358 | int i; | ||
2359 | |||
2360 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2361 | struct cgroup_subsys *ss = subsys[i]; | ||
2362 | if (ss->root == root) | ||
2363 | mutex_unlock(&ss->hierarchy_mutex); | ||
2364 | } | ||
2365 | } | ||
2366 | |||
2336 | /* | 2367 | /* |
2337 | * cgroup_create - create a cgroup | 2368 | * cgroup_create - create a cgroup |
2338 | * @parent: cgroup that will be parent of the new cgroup | 2369 | * @parent: cgroup that will be parent of the new cgroup |
@@ -2381,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2381 | init_cgroup_css(css, ss, cgrp); | 2412 | init_cgroup_css(css, ss, cgrp); |
2382 | } | 2413 | } |
2383 | 2414 | ||
2415 | cgroup_lock_hierarchy(root); | ||
2384 | list_add(&cgrp->sibling, &cgrp->parent->children); | 2416 | list_add(&cgrp->sibling, &cgrp->parent->children); |
2417 | cgroup_unlock_hierarchy(root); | ||
2385 | root->number_of_cgroups++; | 2418 | root->number_of_cgroups++; |
2386 | 2419 | ||
2387 | err = cgroup_create_dir(cgrp, dentry, mode); | 2420 | err = cgroup_create_dir(cgrp, dentry, mode); |
@@ -2432,7 +2465,7 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
2432 | { | 2465 | { |
2433 | /* Check the reference count on each subsystem. Since we | 2466 | /* Check the reference count on each subsystem. Since we |
2434 | * already established that there are no tasks in the | 2467 | * already established that there are no tasks in the |
2435 | * cgroup, if the css refcount is also 0, then there should | 2468 | * cgroup, if the css refcount is also 1, then there should |
2436 | * be no outstanding references, so the subsystem is safe to | 2469 | * be no outstanding references, so the subsystem is safe to |
2437 | * destroy. We scan across all subsystems rather than using | 2470 | * destroy. We scan across all subsystems rather than using |
2438 | * the per-hierarchy linked list of mounted subsystems since | 2471 | * the per-hierarchy linked list of mounted subsystems since |
@@ -2453,19 +2486,67 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
2453 | * matter, since it can only happen if the cgroup | 2486 | * matter, since it can only happen if the cgroup |
2454 | * has been deleted and hence no longer needs the | 2487 | * has been deleted and hence no longer needs the |
2455 | * release agent to be called anyway. */ | 2488 | * release agent to be called anyway. */ |
2456 | if (css && atomic_read(&css->refcnt)) | 2489 | if (css && (atomic_read(&css->refcnt) > 1)) |
2457 | return 1; | 2490 | return 1; |
2458 | } | 2491 | } |
2459 | return 0; | 2492 | return 0; |
2460 | } | 2493 | } |
2461 | 2494 | ||
2495 | /* | ||
2496 | * Atomically mark all (or else none) of the cgroup's CSS objects as | ||
2497 | * CSS_REMOVED. Return true on success, or false if the cgroup has | ||
2498 | * busy subsystems. Call with cgroup_mutex held | ||
2499 | */ | ||
2500 | |||
2501 | static int cgroup_clear_css_refs(struct cgroup *cgrp) | ||
2502 | { | ||
2503 | struct cgroup_subsys *ss; | ||
2504 | unsigned long flags; | ||
2505 | bool failed = false; | ||
2506 | local_irq_save(flags); | ||
2507 | for_each_subsys(cgrp->root, ss) { | ||
2508 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2509 | int refcnt; | ||
2510 | do { | ||
2511 | /* We can only remove a CSS with a refcnt==1 */ | ||
2512 | refcnt = atomic_read(&css->refcnt); | ||
2513 | if (refcnt > 1) { | ||
2514 | failed = true; | ||
2515 | goto done; | ||
2516 | } | ||
2517 | BUG_ON(!refcnt); | ||
2518 | /* | ||
2519 | * Drop the refcnt to 0 while we check other | ||
2520 | * subsystems. This will cause any racing | ||
2521 | * css_tryget() to spin until we set the | ||
2522 | * CSS_REMOVED bits or abort | ||
2523 | */ | ||
2524 | } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); | ||
2525 | } | ||
2526 | done: | ||
2527 | for_each_subsys(cgrp->root, ss) { | ||
2528 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2529 | if (failed) { | ||
2530 | /* | ||
2531 | * Restore old refcnt if we previously managed | ||
2532 | * to clear it from 1 to 0 | ||
2533 | */ | ||
2534 | if (!atomic_read(&css->refcnt)) | ||
2535 | atomic_set(&css->refcnt, 1); | ||
2536 | } else { | ||
2537 | /* Commit the fact that the CSS is removed */ | ||
2538 | set_bit(CSS_REMOVED, &css->flags); | ||
2539 | } | ||
2540 | } | ||
2541 | local_irq_restore(flags); | ||
2542 | return !failed; | ||
2543 | } | ||
2544 | |||
2462 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | 2545 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) |
2463 | { | 2546 | { |
2464 | struct cgroup *cgrp = dentry->d_fsdata; | 2547 | struct cgroup *cgrp = dentry->d_fsdata; |
2465 | struct dentry *d; | 2548 | struct dentry *d; |
2466 | struct cgroup *parent; | 2549 | struct cgroup *parent; |
2467 | struct super_block *sb; | ||
2468 | struct cgroupfs_root *root; | ||
2469 | 2550 | ||
2470 | /* the vfs holds both inode->i_mutex already */ | 2551 | /* the vfs holds both inode->i_mutex already */ |
2471 | 2552 | ||
@@ -2488,12 +2569,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2488 | 2569 | ||
2489 | mutex_lock(&cgroup_mutex); | 2570 | mutex_lock(&cgroup_mutex); |
2490 | parent = cgrp->parent; | 2571 | parent = cgrp->parent; |
2491 | root = cgrp->root; | ||
2492 | sb = root->sb; | ||
2493 | 2572 | ||
2494 | if (atomic_read(&cgrp->count) | 2573 | if (atomic_read(&cgrp->count) |
2495 | || !list_empty(&cgrp->children) | 2574 | || !list_empty(&cgrp->children) |
2496 | || cgroup_has_css_refs(cgrp)) { | 2575 | || !cgroup_clear_css_refs(cgrp)) { |
2497 | mutex_unlock(&cgroup_mutex); | 2576 | mutex_unlock(&cgroup_mutex); |
2498 | return -EBUSY; | 2577 | return -EBUSY; |
2499 | } | 2578 | } |
@@ -2503,8 +2582,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2503 | if (!list_empty(&cgrp->release_list)) | 2582 | if (!list_empty(&cgrp->release_list)) |
2504 | list_del(&cgrp->release_list); | 2583 | list_del(&cgrp->release_list); |
2505 | spin_unlock(&release_list_lock); | 2584 | spin_unlock(&release_list_lock); |
2506 | /* delete my sibling from parent->children */ | 2585 | |
2586 | cgroup_lock_hierarchy(cgrp->root); | ||
2587 | /* delete this cgroup from parent->children */ | ||
2507 | list_del(&cgrp->sibling); | 2588 | list_del(&cgrp->sibling); |
2589 | cgroup_unlock_hierarchy(cgrp->root); | ||
2590 | |||
2508 | spin_lock(&cgrp->dentry->d_lock); | 2591 | spin_lock(&cgrp->dentry->d_lock); |
2509 | d = dget(cgrp->dentry); | 2592 | d = dget(cgrp->dentry); |
2510 | spin_unlock(&d->d_lock); | 2593 | spin_unlock(&d->d_lock); |
@@ -2526,6 +2609,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2526 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 2609 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
2527 | 2610 | ||
2528 | /* Create the top cgroup state for this subsystem */ | 2611 | /* Create the top cgroup state for this subsystem */ |
2612 | list_add(&ss->sibling, &rootnode.subsys_list); | ||
2529 | ss->root = &rootnode; | 2613 | ss->root = &rootnode; |
2530 | css = ss->create(ss, dummytop); | 2614 | css = ss->create(ss, dummytop); |
2531 | /* We don't handle early failures gracefully */ | 2615 | /* We don't handle early failures gracefully */ |
@@ -2539,13 +2623,13 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2539 | init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; | 2623 | init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; |
2540 | 2624 | ||
2541 | need_forkexit_callback |= ss->fork || ss->exit; | 2625 | need_forkexit_callback |= ss->fork || ss->exit; |
2542 | need_mm_owner_callback |= !!ss->mm_owner_changed; | ||
2543 | 2626 | ||
2544 | /* At system boot, before all subsystems have been | 2627 | /* At system boot, before all subsystems have been |
2545 | * registered, no tasks have been forked, so we don't | 2628 | * registered, no tasks have been forked, so we don't |
2546 | * need to invoke fork callbacks here. */ | 2629 | * need to invoke fork callbacks here. */ |
2547 | BUG_ON(!list_empty(&init_task.tasks)); | 2630 | BUG_ON(!list_empty(&init_task.tasks)); |
2548 | 2631 | ||
2632 | mutex_init(&ss->hierarchy_mutex); | ||
2549 | ss->active = 1; | 2633 | ss->active = 1; |
2550 | } | 2634 | } |
2551 | 2635 | ||
@@ -2564,7 +2648,6 @@ int __init cgroup_init_early(void) | |||
2564 | INIT_HLIST_NODE(&init_css_set.hlist); | 2648 | INIT_HLIST_NODE(&init_css_set.hlist); |
2565 | css_set_count = 1; | 2649 | css_set_count = 1; |
2566 | init_cgroup_root(&rootnode); | 2650 | init_cgroup_root(&rootnode); |
2567 | list_add(&rootnode.root_list, &roots); | ||
2568 | root_count = 1; | 2651 | root_count = 1; |
2569 | init_task.cgroups = &init_css_set; | 2652 | init_task.cgroups = &init_css_set; |
2570 | 2653 | ||
@@ -2671,15 +2754,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v) | |||
2671 | 2754 | ||
2672 | mutex_lock(&cgroup_mutex); | 2755 | mutex_lock(&cgroup_mutex); |
2673 | 2756 | ||
2674 | for_each_root(root) { | 2757 | for_each_active_root(root) { |
2675 | struct cgroup_subsys *ss; | 2758 | struct cgroup_subsys *ss; |
2676 | struct cgroup *cgrp; | 2759 | struct cgroup *cgrp; |
2677 | int subsys_id; | 2760 | int subsys_id; |
2678 | int count = 0; | 2761 | int count = 0; |
2679 | 2762 | ||
2680 | /* Skip this hierarchy if it has no active subsystems */ | ||
2681 | if (!root->actual_subsys_bits) | ||
2682 | continue; | ||
2683 | seq_printf(m, "%lu:", root->subsys_bits); | 2763 | seq_printf(m, "%lu:", root->subsys_bits); |
2684 | for_each_subsys(root, ss) | 2764 | for_each_subsys(root, ss) |
2685 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 2765 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); |
@@ -2789,37 +2869,6 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
2789 | } | 2869 | } |
2790 | } | 2870 | } |
2791 | 2871 | ||
2792 | #ifdef CONFIG_MM_OWNER | ||
2793 | /** | ||
2794 | * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes | ||
2795 | * @p: the new owner | ||
2796 | * | ||
2797 | * Called on every change to mm->owner. mm_init_owner() does not | ||
2798 | * invoke this routine, since it assigns the mm->owner the first time | ||
2799 | * and does not change it. | ||
2800 | * | ||
2801 | * The callbacks are invoked with mmap_sem held in read mode. | ||
2802 | */ | ||
2803 | void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) | ||
2804 | { | ||
2805 | struct cgroup *oldcgrp, *newcgrp = NULL; | ||
2806 | |||
2807 | if (need_mm_owner_callback) { | ||
2808 | int i; | ||
2809 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2810 | struct cgroup_subsys *ss = subsys[i]; | ||
2811 | oldcgrp = task_cgroup(old, ss->subsys_id); | ||
2812 | if (new) | ||
2813 | newcgrp = task_cgroup(new, ss->subsys_id); | ||
2814 | if (oldcgrp == newcgrp) | ||
2815 | continue; | ||
2816 | if (ss->mm_owner_changed) | ||
2817 | ss->mm_owner_changed(ss, oldcgrp, newcgrp, new); | ||
2818 | } | ||
2819 | } | ||
2820 | } | ||
2821 | #endif /* CONFIG_MM_OWNER */ | ||
2822 | |||
2823 | /** | 2872 | /** |
2824 | * cgroup_post_fork - called on a new task after adding it to the task list | 2873 | * cgroup_post_fork - called on a new task after adding it to the task list |
2825 | * @child: the task in question | 2874 | * @child: the task in question |
@@ -2833,8 +2882,10 @@ void cgroup_post_fork(struct task_struct *child) | |||
2833 | { | 2882 | { |
2834 | if (use_task_css_set_links) { | 2883 | if (use_task_css_set_links) { |
2835 | write_lock(&css_set_lock); | 2884 | write_lock(&css_set_lock); |
2885 | task_lock(child); | ||
2836 | if (list_empty(&child->cg_list)) | 2886 | if (list_empty(&child->cg_list)) |
2837 | list_add(&child->cg_list, &child->cgroups->tasks); | 2887 | list_add(&child->cg_list, &child->cgroups->tasks); |
2888 | task_unlock(child); | ||
2838 | write_unlock(&css_set_lock); | 2889 | write_unlock(&css_set_lock); |
2839 | } | 2890 | } |
2840 | } | 2891 | } |
@@ -2940,6 +2991,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2940 | mutex_unlock(&cgroup_mutex); | 2991 | mutex_unlock(&cgroup_mutex); |
2941 | return 0; | 2992 | return 0; |
2942 | } | 2993 | } |
2994 | task_lock(tsk); | ||
2943 | cg = tsk->cgroups; | 2995 | cg = tsk->cgroups; |
2944 | parent = task_cgroup(tsk, subsys->subsys_id); | 2996 | parent = task_cgroup(tsk, subsys->subsys_id); |
2945 | 2997 | ||
@@ -2952,6 +3004,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2952 | 3004 | ||
2953 | /* Keep the cgroup alive */ | 3005 | /* Keep the cgroup alive */ |
2954 | get_css_set(cg); | 3006 | get_css_set(cg); |
3007 | task_unlock(tsk); | ||
2955 | mutex_unlock(&cgroup_mutex); | 3008 | mutex_unlock(&cgroup_mutex); |
2956 | 3009 | ||
2957 | /* Now do the VFS work to create a cgroup */ | 3010 | /* Now do the VFS work to create a cgroup */ |
@@ -2970,7 +3023,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2970 | } | 3023 | } |
2971 | 3024 | ||
2972 | /* Create the cgroup directory, which also creates the cgroup */ | 3025 | /* Create the cgroup directory, which also creates the cgroup */ |
2973 | ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755); | 3026 | ret = vfs_mkdir(inode, dentry, 0755); |
2974 | child = __d_cgrp(dentry); | 3027 | child = __d_cgrp(dentry); |
2975 | dput(dentry); | 3028 | dput(dentry); |
2976 | if (ret) { | 3029 | if (ret) { |
@@ -2980,13 +3033,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2980 | goto out_release; | 3033 | goto out_release; |
2981 | } | 3034 | } |
2982 | 3035 | ||
2983 | if (!child) { | ||
2984 | printk(KERN_INFO | ||
2985 | "Couldn't find new cgroup %s\n", nodename); | ||
2986 | ret = -ENOMEM; | ||
2987 | goto out_release; | ||
2988 | } | ||
2989 | |||
2990 | /* The cgroup now exists. Retake cgroup_mutex and check | 3036 | /* The cgroup now exists. Retake cgroup_mutex and check |
2991 | * that we're still in the same state that we thought we | 3037 | * that we're still in the same state that we thought we |
2992 | * were. */ | 3038 | * were. */ |
@@ -3082,7 +3128,8 @@ void __css_put(struct cgroup_subsys_state *css) | |||
3082 | { | 3128 | { |
3083 | struct cgroup *cgrp = css->cgroup; | 3129 | struct cgroup *cgrp = css->cgroup; |
3084 | rcu_read_lock(); | 3130 | rcu_read_lock(); |
3085 | if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) { | 3131 | if ((atomic_dec_return(&css->refcnt) == 1) && |
3132 | notify_on_release(cgrp)) { | ||
3086 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3133 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
3087 | check_for_release(cgrp); | 3134 | check_for_release(cgrp); |
3088 | } | 3135 | } |
diff --git a/kernel/compat.c b/kernel/compat.c index d52e2ec1deb5..42d56544460f 100644 --- a/kernel/compat.c +++ b/kernel/compat.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/migrate.h> | 24 | #include <linux/migrate.h> |
25 | #include <linux/posix-timers.h> | 25 | #include <linux/posix-timers.h> |
26 | #include <linux/times.h> | 26 | #include <linux/times.h> |
27 | #include <linux/ptrace.h> | ||
27 | 28 | ||
28 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
29 | 30 | ||
@@ -229,6 +230,7 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) | |||
229 | if (copy_to_user(tbuf, &tmp, sizeof(tmp))) | 230 | if (copy_to_user(tbuf, &tmp, sizeof(tmp))) |
230 | return -EFAULT; | 231 | return -EFAULT; |
231 | } | 232 | } |
233 | force_successful_syscall_return(); | ||
232 | return compat_jiffies_to_clock_t(jiffies); | 234 | return compat_jiffies_to_clock_t(jiffies); |
233 | } | 235 | } |
234 | 236 | ||
@@ -894,8 +896,9 @@ asmlinkage long compat_sys_time(compat_time_t __user * tloc) | |||
894 | 896 | ||
895 | if (tloc) { | 897 | if (tloc) { |
896 | if (put_user(i,tloc)) | 898 | if (put_user(i,tloc)) |
897 | i = -EFAULT; | 899 | return -EFAULT; |
898 | } | 900 | } |
901 | force_successful_syscall_return(); | ||
899 | return i; | 902 | return i; |
900 | } | 903 | } |
901 | 904 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index 30e74dd6d01b..79e40f00dcb8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -379,8 +379,11 @@ static cpumask_var_t frozen_cpus; | |||
379 | 379 | ||
380 | int disable_nonboot_cpus(void) | 380 | int disable_nonboot_cpus(void) |
381 | { | 381 | { |
382 | int cpu, first_cpu, error = 0; | 382 | int cpu, first_cpu, error; |
383 | 383 | ||
384 | error = stop_machine_create(); | ||
385 | if (error) | ||
386 | return error; | ||
384 | cpu_maps_update_begin(); | 387 | cpu_maps_update_begin(); |
385 | first_cpu = cpumask_first(cpu_online_mask); | 388 | first_cpu = cpumask_first(cpu_online_mask); |
386 | /* We take down all of the non-boot CPUs in one shot to avoid races | 389 | /* We take down all of the non-boot CPUs in one shot to avoid races |
@@ -409,6 +412,7 @@ int disable_nonboot_cpus(void) | |||
409 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 412 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
410 | } | 413 | } |
411 | cpu_maps_update_done(); | 414 | cpu_maps_update_done(); |
415 | stop_machine_destroy(); | ||
412 | return error; | 416 | return error; |
413 | } | 417 | } |
414 | 418 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 39c1a4c1c5a9..647c77a88fcb 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -84,7 +84,7 @@ struct cpuset { | |||
84 | struct cgroup_subsys_state css; | 84 | struct cgroup_subsys_state css; |
85 | 85 | ||
86 | unsigned long flags; /* "unsigned long" so bitops work */ | 86 | unsigned long flags; /* "unsigned long" so bitops work */ |
87 | cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ | 87 | cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ |
88 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ | 88 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ |
89 | 89 | ||
90 | struct cpuset *parent; /* my parent */ | 90 | struct cpuset *parent; /* my parent */ |
@@ -195,8 +195,6 @@ static int cpuset_mems_generation; | |||
195 | 195 | ||
196 | static struct cpuset top_cpuset = { | 196 | static struct cpuset top_cpuset = { |
197 | .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), | 197 | .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), |
198 | .cpus_allowed = CPU_MASK_ALL, | ||
199 | .mems_allowed = NODE_MASK_ALL, | ||
200 | }; | 198 | }; |
201 | 199 | ||
202 | /* | 200 | /* |
@@ -240,6 +238,17 @@ static struct cpuset top_cpuset = { | |||
240 | static DEFINE_MUTEX(callback_mutex); | 238 | static DEFINE_MUTEX(callback_mutex); |
241 | 239 | ||
242 | /* | 240 | /* |
241 | * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist | ||
242 | * buffers. They are statically allocated to prevent using excess stack | ||
243 | * when calling cpuset_print_task_mems_allowed(). | ||
244 | */ | ||
245 | #define CPUSET_NAME_LEN (128) | ||
246 | #define CPUSET_NODELIST_LEN (256) | ||
247 | static char cpuset_name[CPUSET_NAME_LEN]; | ||
248 | static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | ||
249 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | ||
250 | |||
251 | /* | ||
243 | * This is ugly, but preserves the userspace API for existing cpuset | 252 | * This is ugly, but preserves the userspace API for existing cpuset |
244 | * users. If someone tries to mount the "cpuset" filesystem, we | 253 | * users. If someone tries to mount the "cpuset" filesystem, we |
245 | * silently switch it to mount "cgroup" instead | 254 | * silently switch it to mount "cgroup" instead |
@@ -267,7 +276,7 @@ static struct file_system_type cpuset_fs_type = { | |||
267 | }; | 276 | }; |
268 | 277 | ||
269 | /* | 278 | /* |
270 | * Return in *pmask the portion of a cpusets's cpus_allowed that | 279 | * Return in pmask the portion of a cpusets's cpus_allowed that |
271 | * are online. If none are online, walk up the cpuset hierarchy | 280 | * are online. If none are online, walk up the cpuset hierarchy |
272 | * until we find one that does have some online cpus. If we get | 281 | * until we find one that does have some online cpus. If we get |
273 | * all the way to the top and still haven't found any online cpus, | 282 | * all the way to the top and still haven't found any online cpus, |
@@ -280,15 +289,16 @@ static struct file_system_type cpuset_fs_type = { | |||
280 | * Call with callback_mutex held. | 289 | * Call with callback_mutex held. |
281 | */ | 290 | */ |
282 | 291 | ||
283 | static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) | 292 | static void guarantee_online_cpus(const struct cpuset *cs, |
293 | struct cpumask *pmask) | ||
284 | { | 294 | { |
285 | while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map)) | 295 | while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) |
286 | cs = cs->parent; | 296 | cs = cs->parent; |
287 | if (cs) | 297 | if (cs) |
288 | cpus_and(*pmask, cs->cpus_allowed, cpu_online_map); | 298 | cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); |
289 | else | 299 | else |
290 | *pmask = cpu_online_map; | 300 | cpumask_copy(pmask, cpu_online_mask); |
291 | BUG_ON(!cpus_intersects(*pmask, cpu_online_map)); | 301 | BUG_ON(!cpumask_intersects(pmask, cpu_online_mask)); |
292 | } | 302 | } |
293 | 303 | ||
294 | /* | 304 | /* |
@@ -364,14 +374,9 @@ void cpuset_update_task_memory_state(void) | |||
364 | struct task_struct *tsk = current; | 374 | struct task_struct *tsk = current; |
365 | struct cpuset *cs; | 375 | struct cpuset *cs; |
366 | 376 | ||
367 | if (task_cs(tsk) == &top_cpuset) { | 377 | rcu_read_lock(); |
368 | /* Don't need rcu for top_cpuset. It's never freed. */ | 378 | my_cpusets_mem_gen = task_cs(tsk)->mems_generation; |
369 | my_cpusets_mem_gen = top_cpuset.mems_generation; | 379 | rcu_read_unlock(); |
370 | } else { | ||
371 | rcu_read_lock(); | ||
372 | my_cpusets_mem_gen = task_cs(tsk)->mems_generation; | ||
373 | rcu_read_unlock(); | ||
374 | } | ||
375 | 380 | ||
376 | if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { | 381 | if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { |
377 | mutex_lock(&callback_mutex); | 382 | mutex_lock(&callback_mutex); |
@@ -403,12 +408,43 @@ void cpuset_update_task_memory_state(void) | |||
403 | 408 | ||
404 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) | 409 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) |
405 | { | 410 | { |
406 | return cpus_subset(p->cpus_allowed, q->cpus_allowed) && | 411 | return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && |
407 | nodes_subset(p->mems_allowed, q->mems_allowed) && | 412 | nodes_subset(p->mems_allowed, q->mems_allowed) && |
408 | is_cpu_exclusive(p) <= is_cpu_exclusive(q) && | 413 | is_cpu_exclusive(p) <= is_cpu_exclusive(q) && |
409 | is_mem_exclusive(p) <= is_mem_exclusive(q); | 414 | is_mem_exclusive(p) <= is_mem_exclusive(q); |
410 | } | 415 | } |
411 | 416 | ||
417 | /** | ||
418 | * alloc_trial_cpuset - allocate a trial cpuset | ||
419 | * @cs: the cpuset that the trial cpuset duplicates | ||
420 | */ | ||
421 | static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs) | ||
422 | { | ||
423 | struct cpuset *trial; | ||
424 | |||
425 | trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL); | ||
426 | if (!trial) | ||
427 | return NULL; | ||
428 | |||
429 | if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) { | ||
430 | kfree(trial); | ||
431 | return NULL; | ||
432 | } | ||
433 | cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); | ||
434 | |||
435 | return trial; | ||
436 | } | ||
437 | |||
438 | /** | ||
439 | * free_trial_cpuset - free the trial cpuset | ||
440 | * @trial: the trial cpuset to be freed | ||
441 | */ | ||
442 | static void free_trial_cpuset(struct cpuset *trial) | ||
443 | { | ||
444 | free_cpumask_var(trial->cpus_allowed); | ||
445 | kfree(trial); | ||
446 | } | ||
447 | |||
412 | /* | 448 | /* |
413 | * validate_change() - Used to validate that any proposed cpuset change | 449 | * validate_change() - Used to validate that any proposed cpuset change |
414 | * follows the structural rules for cpusets. | 450 | * follows the structural rules for cpusets. |
@@ -458,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
458 | c = cgroup_cs(cont); | 494 | c = cgroup_cs(cont); |
459 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && | 495 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && |
460 | c != cur && | 496 | c != cur && |
461 | cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) | 497 | cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) |
462 | return -EINVAL; | 498 | return -EINVAL; |
463 | if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && | 499 | if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && |
464 | c != cur && | 500 | c != cur && |
@@ -468,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
468 | 504 | ||
469 | /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ | 505 | /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ |
470 | if (cgroup_task_count(cur->css.cgroup)) { | 506 | if (cgroup_task_count(cur->css.cgroup)) { |
471 | if (cpus_empty(trial->cpus_allowed) || | 507 | if (cpumask_empty(trial->cpus_allowed) || |
472 | nodes_empty(trial->mems_allowed)) { | 508 | nodes_empty(trial->mems_allowed)) { |
473 | return -ENOSPC; | 509 | return -ENOSPC; |
474 | } | 510 | } |
@@ -483,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
483 | */ | 519 | */ |
484 | static int cpusets_overlap(struct cpuset *a, struct cpuset *b) | 520 | static int cpusets_overlap(struct cpuset *a, struct cpuset *b) |
485 | { | 521 | { |
486 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); | 522 | return cpumask_intersects(a->cpus_allowed, b->cpus_allowed); |
487 | } | 523 | } |
488 | 524 | ||
489 | static void | 525 | static void |
@@ -508,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | |||
508 | cp = list_first_entry(&q, struct cpuset, stack_list); | 544 | cp = list_first_entry(&q, struct cpuset, stack_list); |
509 | list_del(q.next); | 545 | list_del(q.next); |
510 | 546 | ||
511 | if (cpus_empty(cp->cpus_allowed)) | 547 | if (cpumask_empty(cp->cpus_allowed)) |
512 | continue; | 548 | continue; |
513 | 549 | ||
514 | if (is_sched_load_balance(cp)) | 550 | if (is_sched_load_balance(cp)) |
@@ -575,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | |||
575 | * element of the partition (one sched domain) to be passed to | 611 | * element of the partition (one sched domain) to be passed to |
576 | * partition_sched_domains(). | 612 | * partition_sched_domains(). |
577 | */ | 613 | */ |
578 | static int generate_sched_domains(cpumask_t **domains, | 614 | /* FIXME: see the FIXME in partition_sched_domains() */ |
615 | static int generate_sched_domains(struct cpumask **domains, | ||
579 | struct sched_domain_attr **attributes) | 616 | struct sched_domain_attr **attributes) |
580 | { | 617 | { |
581 | LIST_HEAD(q); /* queue of cpusets to be scanned */ | 618 | LIST_HEAD(q); /* queue of cpusets to be scanned */ |
@@ -583,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains, | |||
583 | struct cpuset **csa; /* array of all cpuset ptrs */ | 620 | struct cpuset **csa; /* array of all cpuset ptrs */ |
584 | int csn; /* how many cpuset ptrs in csa so far */ | 621 | int csn; /* how many cpuset ptrs in csa so far */ |
585 | int i, j, k; /* indices for partition finding loops */ | 622 | int i, j, k; /* indices for partition finding loops */ |
586 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ | 623 | struct cpumask *doms; /* resulting partition; i.e. sched domains */ |
587 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | 624 | struct sched_domain_attr *dattr; /* attributes for custom domains */ |
588 | int ndoms = 0; /* number of sched domains in result */ | 625 | int ndoms = 0; /* number of sched domains in result */ |
589 | int nslot; /* next empty doms[] cpumask_t slot */ | 626 | int nslot; /* next empty doms[] struct cpumask slot */ |
590 | 627 | ||
591 | doms = NULL; | 628 | doms = NULL; |
592 | dattr = NULL; | 629 | dattr = NULL; |
@@ -594,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains, | |||
594 | 631 | ||
595 | /* Special case for the 99% of systems with one, full, sched domain */ | 632 | /* Special case for the 99% of systems with one, full, sched domain */ |
596 | if (is_sched_load_balance(&top_cpuset)) { | 633 | if (is_sched_load_balance(&top_cpuset)) { |
597 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | 634 | doms = kmalloc(cpumask_size(), GFP_KERNEL); |
598 | if (!doms) | 635 | if (!doms) |
599 | goto done; | 636 | goto done; |
600 | 637 | ||
@@ -603,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains, | |||
603 | *dattr = SD_ATTR_INIT; | 640 | *dattr = SD_ATTR_INIT; |
604 | update_domain_attr_tree(dattr, &top_cpuset); | 641 | update_domain_attr_tree(dattr, &top_cpuset); |
605 | } | 642 | } |
606 | *doms = top_cpuset.cpus_allowed; | 643 | cpumask_copy(doms, top_cpuset.cpus_allowed); |
607 | 644 | ||
608 | ndoms = 1; | 645 | ndoms = 1; |
609 | goto done; | 646 | goto done; |
@@ -622,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains, | |||
622 | cp = list_first_entry(&q, struct cpuset, stack_list); | 659 | cp = list_first_entry(&q, struct cpuset, stack_list); |
623 | list_del(q.next); | 660 | list_del(q.next); |
624 | 661 | ||
625 | if (cpus_empty(cp->cpus_allowed)) | 662 | if (cpumask_empty(cp->cpus_allowed)) |
626 | continue; | 663 | continue; |
627 | 664 | ||
628 | /* | 665 | /* |
@@ -673,7 +710,7 @@ restart: | |||
673 | * Now we know how many domains to create. | 710 | * Now we know how many domains to create. |
674 | * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. | 711 | * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. |
675 | */ | 712 | */ |
676 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); | 713 | doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); |
677 | if (!doms) | 714 | if (!doms) |
678 | goto done; | 715 | goto done; |
679 | 716 | ||
@@ -685,7 +722,7 @@ restart: | |||
685 | 722 | ||
686 | for (nslot = 0, i = 0; i < csn; i++) { | 723 | for (nslot = 0, i = 0; i < csn; i++) { |
687 | struct cpuset *a = csa[i]; | 724 | struct cpuset *a = csa[i]; |
688 | cpumask_t *dp; | 725 | struct cpumask *dp; |
689 | int apn = a->pn; | 726 | int apn = a->pn; |
690 | 727 | ||
691 | if (apn < 0) { | 728 | if (apn < 0) { |
@@ -708,14 +745,14 @@ restart: | |||
708 | continue; | 745 | continue; |
709 | } | 746 | } |
710 | 747 | ||
711 | cpus_clear(*dp); | 748 | cpumask_clear(dp); |
712 | if (dattr) | 749 | if (dattr) |
713 | *(dattr + nslot) = SD_ATTR_INIT; | 750 | *(dattr + nslot) = SD_ATTR_INIT; |
714 | for (j = i; j < csn; j++) { | 751 | for (j = i; j < csn; j++) { |
715 | struct cpuset *b = csa[j]; | 752 | struct cpuset *b = csa[j]; |
716 | 753 | ||
717 | if (apn == b->pn) { | 754 | if (apn == b->pn) { |
718 | cpus_or(*dp, *dp, b->cpus_allowed); | 755 | cpumask_or(dp, dp, b->cpus_allowed); |
719 | if (dattr) | 756 | if (dattr) |
720 | update_domain_attr_tree(dattr + nslot, b); | 757 | update_domain_attr_tree(dattr + nslot, b); |
721 | 758 | ||
@@ -755,7 +792,7 @@ done: | |||
755 | static void do_rebuild_sched_domains(struct work_struct *unused) | 792 | static void do_rebuild_sched_domains(struct work_struct *unused) |
756 | { | 793 | { |
757 | struct sched_domain_attr *attr; | 794 | struct sched_domain_attr *attr; |
758 | cpumask_t *doms; | 795 | struct cpumask *doms; |
759 | int ndoms; | 796 | int ndoms; |
760 | 797 | ||
761 | get_online_cpus(); | 798 | get_online_cpus(); |
@@ -824,7 +861,7 @@ void rebuild_sched_domains(void) | |||
824 | static int cpuset_test_cpumask(struct task_struct *tsk, | 861 | static int cpuset_test_cpumask(struct task_struct *tsk, |
825 | struct cgroup_scanner *scan) | 862 | struct cgroup_scanner *scan) |
826 | { | 863 | { |
827 | return !cpus_equal(tsk->cpus_allowed, | 864 | return !cpumask_equal(&tsk->cpus_allowed, |
828 | (cgroup_cs(scan->cg))->cpus_allowed); | 865 | (cgroup_cs(scan->cg))->cpus_allowed); |
829 | } | 866 | } |
830 | 867 | ||
@@ -842,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk, | |||
842 | static void cpuset_change_cpumask(struct task_struct *tsk, | 879 | static void cpuset_change_cpumask(struct task_struct *tsk, |
843 | struct cgroup_scanner *scan) | 880 | struct cgroup_scanner *scan) |
844 | { | 881 | { |
845 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); | 882 | set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed)); |
846 | } | 883 | } |
847 | 884 | ||
848 | /** | 885 | /** |
@@ -874,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) | |||
874 | * @cs: the cpuset to consider | 911 | * @cs: the cpuset to consider |
875 | * @buf: buffer of cpu numbers written to this cpuset | 912 | * @buf: buffer of cpu numbers written to this cpuset |
876 | */ | 913 | */ |
877 | static int update_cpumask(struct cpuset *cs, const char *buf) | 914 | static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, |
915 | const char *buf) | ||
878 | { | 916 | { |
879 | struct ptr_heap heap; | 917 | struct ptr_heap heap; |
880 | struct cpuset trialcs; | ||
881 | int retval; | 918 | int retval; |
882 | int is_load_balanced; | 919 | int is_load_balanced; |
883 | 920 | ||
@@ -885,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf) | |||
885 | if (cs == &top_cpuset) | 922 | if (cs == &top_cpuset) |
886 | return -EACCES; | 923 | return -EACCES; |
887 | 924 | ||
888 | trialcs = *cs; | ||
889 | |||
890 | /* | 925 | /* |
891 | * An empty cpus_allowed is ok only if the cpuset has no tasks. | 926 | * An empty cpus_allowed is ok only if the cpuset has no tasks. |
892 | * Since cpulist_parse() fails on an empty mask, we special case | 927 | * Since cpulist_parse() fails on an empty mask, we special case |
@@ -894,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf) | |||
894 | * with tasks have cpus. | 929 | * with tasks have cpus. |
895 | */ | 930 | */ |
896 | if (!*buf) { | 931 | if (!*buf) { |
897 | cpus_clear(trialcs.cpus_allowed); | 932 | cpumask_clear(trialcs->cpus_allowed); |
898 | } else { | 933 | } else { |
899 | retval = cpulist_parse(buf, &trialcs.cpus_allowed); | 934 | retval = cpulist_parse(buf, trialcs->cpus_allowed); |
900 | if (retval < 0) | 935 | if (retval < 0) |
901 | return retval; | 936 | return retval; |
902 | 937 | ||
903 | if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) | 938 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) |
904 | return -EINVAL; | 939 | return -EINVAL; |
905 | } | 940 | } |
906 | retval = validate_change(cs, &trialcs); | 941 | retval = validate_change(cs, trialcs); |
907 | if (retval < 0) | 942 | if (retval < 0) |
908 | return retval; | 943 | return retval; |
909 | 944 | ||
910 | /* Nothing to do if the cpus didn't change */ | 945 | /* Nothing to do if the cpus didn't change */ |
911 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) | 946 | if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) |
912 | return 0; | 947 | return 0; |
913 | 948 | ||
914 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); | 949 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); |
915 | if (retval) | 950 | if (retval) |
916 | return retval; | 951 | return retval; |
917 | 952 | ||
918 | is_load_balanced = is_sched_load_balance(&trialcs); | 953 | is_load_balanced = is_sched_load_balance(trialcs); |
919 | 954 | ||
920 | mutex_lock(&callback_mutex); | 955 | mutex_lock(&callback_mutex); |
921 | cs->cpus_allowed = trialcs.cpus_allowed; | 956 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
922 | mutex_unlock(&callback_mutex); | 957 | mutex_unlock(&callback_mutex); |
923 | 958 | ||
924 | /* | 959 | /* |
@@ -1006,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) | |||
1006 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 1041 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
1007 | 1042 | ||
1008 | fudge = 10; /* spare mmarray[] slots */ | 1043 | fudge = 10; /* spare mmarray[] slots */ |
1009 | fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ | 1044 | fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */ |
1010 | retval = -ENOMEM; | 1045 | retval = -ENOMEM; |
1011 | 1046 | ||
1012 | /* | 1047 | /* |
@@ -1093,9 +1128,9 @@ done: | |||
1093 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | 1128 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
1094 | * their mempolicies to the cpusets new mems_allowed. | 1129 | * their mempolicies to the cpusets new mems_allowed. |
1095 | */ | 1130 | */ |
1096 | static int update_nodemask(struct cpuset *cs, const char *buf) | 1131 | static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, |
1132 | const char *buf) | ||
1097 | { | 1133 | { |
1098 | struct cpuset trialcs; | ||
1099 | nodemask_t oldmem; | 1134 | nodemask_t oldmem; |
1100 | int retval; | 1135 | int retval; |
1101 | 1136 | ||
@@ -1106,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf) | |||
1106 | if (cs == &top_cpuset) | 1141 | if (cs == &top_cpuset) |
1107 | return -EACCES; | 1142 | return -EACCES; |
1108 | 1143 | ||
1109 | trialcs = *cs; | ||
1110 | |||
1111 | /* | 1144 | /* |
1112 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | 1145 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. |
1113 | * Since nodelist_parse() fails on an empty mask, we special case | 1146 | * Since nodelist_parse() fails on an empty mask, we special case |
@@ -1115,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf) | |||
1115 | * with tasks have memory. | 1148 | * with tasks have memory. |
1116 | */ | 1149 | */ |
1117 | if (!*buf) { | 1150 | if (!*buf) { |
1118 | nodes_clear(trialcs.mems_allowed); | 1151 | nodes_clear(trialcs->mems_allowed); |
1119 | } else { | 1152 | } else { |
1120 | retval = nodelist_parse(buf, trialcs.mems_allowed); | 1153 | retval = nodelist_parse(buf, trialcs->mems_allowed); |
1121 | if (retval < 0) | 1154 | if (retval < 0) |
1122 | goto done; | 1155 | goto done; |
1123 | 1156 | ||
1124 | if (!nodes_subset(trialcs.mems_allowed, | 1157 | if (!nodes_subset(trialcs->mems_allowed, |
1125 | node_states[N_HIGH_MEMORY])) | 1158 | node_states[N_HIGH_MEMORY])) |
1126 | return -EINVAL; | 1159 | return -EINVAL; |
1127 | } | 1160 | } |
1128 | oldmem = cs->mems_allowed; | 1161 | oldmem = cs->mems_allowed; |
1129 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | 1162 | if (nodes_equal(oldmem, trialcs->mems_allowed)) { |
1130 | retval = 0; /* Too easy - nothing to do */ | 1163 | retval = 0; /* Too easy - nothing to do */ |
1131 | goto done; | 1164 | goto done; |
1132 | } | 1165 | } |
1133 | retval = validate_change(cs, &trialcs); | 1166 | retval = validate_change(cs, trialcs); |
1134 | if (retval < 0) | 1167 | if (retval < 0) |
1135 | goto done; | 1168 | goto done; |
1136 | 1169 | ||
1137 | mutex_lock(&callback_mutex); | 1170 | mutex_lock(&callback_mutex); |
1138 | cs->mems_allowed = trialcs.mems_allowed; | 1171 | cs->mems_allowed = trialcs->mems_allowed; |
1139 | cs->mems_generation = cpuset_mems_generation++; | 1172 | cs->mems_generation = cpuset_mems_generation++; |
1140 | mutex_unlock(&callback_mutex); | 1173 | mutex_unlock(&callback_mutex); |
1141 | 1174 | ||
@@ -1156,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1156 | 1189 | ||
1157 | if (val != cs->relax_domain_level) { | 1190 | if (val != cs->relax_domain_level) { |
1158 | cs->relax_domain_level = val; | 1191 | cs->relax_domain_level = val; |
1159 | if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) | 1192 | if (!cpumask_empty(cs->cpus_allowed) && |
1193 | is_sched_load_balance(cs)) | ||
1160 | async_rebuild_sched_domains(); | 1194 | async_rebuild_sched_domains(); |
1161 | } | 1195 | } |
1162 | 1196 | ||
@@ -1175,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1175 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | 1209 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, |
1176 | int turning_on) | 1210 | int turning_on) |
1177 | { | 1211 | { |
1178 | struct cpuset trialcs; | 1212 | struct cpuset *trialcs; |
1179 | int err; | 1213 | int err; |
1180 | int balance_flag_changed; | 1214 | int balance_flag_changed; |
1181 | 1215 | ||
1182 | trialcs = *cs; | 1216 | trialcs = alloc_trial_cpuset(cs); |
1217 | if (!trialcs) | ||
1218 | return -ENOMEM; | ||
1219 | |||
1183 | if (turning_on) | 1220 | if (turning_on) |
1184 | set_bit(bit, &trialcs.flags); | 1221 | set_bit(bit, &trialcs->flags); |
1185 | else | 1222 | else |
1186 | clear_bit(bit, &trialcs.flags); | 1223 | clear_bit(bit, &trialcs->flags); |
1187 | 1224 | ||
1188 | err = validate_change(cs, &trialcs); | 1225 | err = validate_change(cs, trialcs); |
1189 | if (err < 0) | 1226 | if (err < 0) |
1190 | return err; | 1227 | goto out; |
1191 | 1228 | ||
1192 | balance_flag_changed = (is_sched_load_balance(cs) != | 1229 | balance_flag_changed = (is_sched_load_balance(cs) != |
1193 | is_sched_load_balance(&trialcs)); | 1230 | is_sched_load_balance(trialcs)); |
1194 | 1231 | ||
1195 | mutex_lock(&callback_mutex); | 1232 | mutex_lock(&callback_mutex); |
1196 | cs->flags = trialcs.flags; | 1233 | cs->flags = trialcs->flags; |
1197 | mutex_unlock(&callback_mutex); | 1234 | mutex_unlock(&callback_mutex); |
1198 | 1235 | ||
1199 | if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed) | 1236 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
1200 | async_rebuild_sched_domains(); | 1237 | async_rebuild_sched_domains(); |
1201 | 1238 | ||
1202 | return 0; | 1239 | out: |
1240 | free_trial_cpuset(trialcs); | ||
1241 | return err; | ||
1203 | } | 1242 | } |
1204 | 1243 | ||
1205 | /* | 1244 | /* |
@@ -1300,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1300 | return val; | 1339 | return val; |
1301 | } | 1340 | } |
1302 | 1341 | ||
1342 | /* Protected by cgroup_lock */ | ||
1343 | static cpumask_var_t cpus_attach; | ||
1344 | |||
1303 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1345 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1304 | static int cpuset_can_attach(struct cgroup_subsys *ss, | 1346 | static int cpuset_can_attach(struct cgroup_subsys *ss, |
1305 | struct cgroup *cont, struct task_struct *tsk) | 1347 | struct cgroup *cont, struct task_struct *tsk) |
1306 | { | 1348 | { |
1307 | struct cpuset *cs = cgroup_cs(cont); | 1349 | struct cpuset *cs = cgroup_cs(cont); |
1350 | int ret = 0; | ||
1308 | 1351 | ||
1309 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1352 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
1310 | return -ENOSPC; | 1353 | return -ENOSPC; |
1311 | if (tsk->flags & PF_THREAD_BOUND) { | ||
1312 | cpumask_t mask; | ||
1313 | 1354 | ||
1355 | if (tsk->flags & PF_THREAD_BOUND) { | ||
1314 | mutex_lock(&callback_mutex); | 1356 | mutex_lock(&callback_mutex); |
1315 | mask = cs->cpus_allowed; | 1357 | if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed)) |
1358 | ret = -EINVAL; | ||
1316 | mutex_unlock(&callback_mutex); | 1359 | mutex_unlock(&callback_mutex); |
1317 | if (!cpus_equal(tsk->cpus_allowed, mask)) | ||
1318 | return -EINVAL; | ||
1319 | } | 1360 | } |
1320 | 1361 | ||
1321 | return security_task_setscheduler(tsk, 0, NULL); | 1362 | return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL); |
1322 | } | 1363 | } |
1323 | 1364 | ||
1324 | static void cpuset_attach(struct cgroup_subsys *ss, | 1365 | static void cpuset_attach(struct cgroup_subsys *ss, |
1325 | struct cgroup *cont, struct cgroup *oldcont, | 1366 | struct cgroup *cont, struct cgroup *oldcont, |
1326 | struct task_struct *tsk) | 1367 | struct task_struct *tsk) |
1327 | { | 1368 | { |
1328 | cpumask_t cpus; | ||
1329 | nodemask_t from, to; | 1369 | nodemask_t from, to; |
1330 | struct mm_struct *mm; | 1370 | struct mm_struct *mm; |
1331 | struct cpuset *cs = cgroup_cs(cont); | 1371 | struct cpuset *cs = cgroup_cs(cont); |
1332 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1372 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1333 | int err; | 1373 | int err; |
1334 | 1374 | ||
1335 | mutex_lock(&callback_mutex); | 1375 | if (cs == &top_cpuset) { |
1336 | guarantee_online_cpus(cs, &cpus); | 1376 | cpumask_copy(cpus_attach, cpu_possible_mask); |
1337 | err = set_cpus_allowed_ptr(tsk, &cpus); | 1377 | } else { |
1338 | mutex_unlock(&callback_mutex); | 1378 | mutex_lock(&callback_mutex); |
1379 | guarantee_online_cpus(cs, cpus_attach); | ||
1380 | mutex_unlock(&callback_mutex); | ||
1381 | } | ||
1382 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | ||
1339 | if (err) | 1383 | if (err) |
1340 | return; | 1384 | return; |
1341 | 1385 | ||
@@ -1348,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss, | |||
1348 | cpuset_migrate_mm(mm, &from, &to); | 1392 | cpuset_migrate_mm(mm, &from, &to); |
1349 | mmput(mm); | 1393 | mmput(mm); |
1350 | } | 1394 | } |
1351 | |||
1352 | } | 1395 | } |
1353 | 1396 | ||
1354 | /* The various types of files and directories in a cpuset file system */ | 1397 | /* The various types of files and directories in a cpuset file system */ |
@@ -1443,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1443 | const char *buf) | 1486 | const char *buf) |
1444 | { | 1487 | { |
1445 | int retval = 0; | 1488 | int retval = 0; |
1489 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1490 | struct cpuset *trialcs; | ||
1446 | 1491 | ||
1447 | if (!cgroup_lock_live_group(cgrp)) | 1492 | if (!cgroup_lock_live_group(cgrp)) |
1448 | return -ENODEV; | 1493 | return -ENODEV; |
1449 | 1494 | ||
1495 | trialcs = alloc_trial_cpuset(cs); | ||
1496 | if (!trialcs) | ||
1497 | return -ENOMEM; | ||
1498 | |||
1450 | switch (cft->private) { | 1499 | switch (cft->private) { |
1451 | case FILE_CPULIST: | 1500 | case FILE_CPULIST: |
1452 | retval = update_cpumask(cgroup_cs(cgrp), buf); | 1501 | retval = update_cpumask(cs, trialcs, buf); |
1453 | break; | 1502 | break; |
1454 | case FILE_MEMLIST: | 1503 | case FILE_MEMLIST: |
1455 | retval = update_nodemask(cgroup_cs(cgrp), buf); | 1504 | retval = update_nodemask(cs, trialcs, buf); |
1456 | break; | 1505 | break; |
1457 | default: | 1506 | default: |
1458 | retval = -EINVAL; | 1507 | retval = -EINVAL; |
1459 | break; | 1508 | break; |
1460 | } | 1509 | } |
1510 | |||
1511 | free_trial_cpuset(trialcs); | ||
1461 | cgroup_unlock(); | 1512 | cgroup_unlock(); |
1462 | return retval; | 1513 | return retval; |
1463 | } | 1514 | } |
@@ -1476,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1476 | 1527 | ||
1477 | static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | 1528 | static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) |
1478 | { | 1529 | { |
1479 | cpumask_t mask; | 1530 | int ret; |
1480 | 1531 | ||
1481 | mutex_lock(&callback_mutex); | 1532 | mutex_lock(&callback_mutex); |
1482 | mask = cs->cpus_allowed; | 1533 | ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); |
1483 | mutex_unlock(&callback_mutex); | 1534 | mutex_unlock(&callback_mutex); |
1484 | 1535 | ||
1485 | return cpulist_scnprintf(page, PAGE_SIZE, &mask); | 1536 | return ret; |
1486 | } | 1537 | } |
1487 | 1538 | ||
1488 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | 1539 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) |
@@ -1718,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
1718 | parent_cs = cgroup_cs(parent); | 1769 | parent_cs = cgroup_cs(parent); |
1719 | 1770 | ||
1720 | cs->mems_allowed = parent_cs->mems_allowed; | 1771 | cs->mems_allowed = parent_cs->mems_allowed; |
1721 | cs->cpus_allowed = parent_cs->cpus_allowed; | 1772 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); |
1722 | return; | 1773 | return; |
1723 | } | 1774 | } |
1724 | 1775 | ||
@@ -1744,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1744 | cs = kmalloc(sizeof(*cs), GFP_KERNEL); | 1795 | cs = kmalloc(sizeof(*cs), GFP_KERNEL); |
1745 | if (!cs) | 1796 | if (!cs) |
1746 | return ERR_PTR(-ENOMEM); | 1797 | return ERR_PTR(-ENOMEM); |
1798 | if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { | ||
1799 | kfree(cs); | ||
1800 | return ERR_PTR(-ENOMEM); | ||
1801 | } | ||
1747 | 1802 | ||
1748 | cpuset_update_task_memory_state(); | 1803 | cpuset_update_task_memory_state(); |
1749 | cs->flags = 0; | 1804 | cs->flags = 0; |
@@ -1752,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1752 | if (is_spread_slab(parent)) | 1807 | if (is_spread_slab(parent)) |
1753 | set_bit(CS_SPREAD_SLAB, &cs->flags); | 1808 | set_bit(CS_SPREAD_SLAB, &cs->flags); |
1754 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 1809 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
1755 | cpus_clear(cs->cpus_allowed); | 1810 | cpumask_clear(cs->cpus_allowed); |
1756 | nodes_clear(cs->mems_allowed); | 1811 | nodes_clear(cs->mems_allowed); |
1757 | cs->mems_generation = cpuset_mems_generation++; | 1812 | cs->mems_generation = cpuset_mems_generation++; |
1758 | fmeter_init(&cs->fmeter); | 1813 | fmeter_init(&cs->fmeter); |
@@ -1779,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1779 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); | 1834 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); |
1780 | 1835 | ||
1781 | number_of_cpusets--; | 1836 | number_of_cpusets--; |
1837 | free_cpumask_var(cs->cpus_allowed); | ||
1782 | kfree(cs); | 1838 | kfree(cs); |
1783 | } | 1839 | } |
1784 | 1840 | ||
@@ -1802,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = { | |||
1802 | 1858 | ||
1803 | int __init cpuset_init_early(void) | 1859 | int __init cpuset_init_early(void) |
1804 | { | 1860 | { |
1861 | alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed); | ||
1862 | |||
1805 | top_cpuset.mems_generation = cpuset_mems_generation++; | 1863 | top_cpuset.mems_generation = cpuset_mems_generation++; |
1806 | return 0; | 1864 | return 0; |
1807 | } | 1865 | } |
@@ -1817,7 +1875,7 @@ int __init cpuset_init(void) | |||
1817 | { | 1875 | { |
1818 | int err = 0; | 1876 | int err = 0; |
1819 | 1877 | ||
1820 | cpus_setall(top_cpuset.cpus_allowed); | 1878 | cpumask_setall(top_cpuset.cpus_allowed); |
1821 | nodes_setall(top_cpuset.mems_allowed); | 1879 | nodes_setall(top_cpuset.mems_allowed); |
1822 | 1880 | ||
1823 | fmeter_init(&top_cpuset.fmeter); | 1881 | fmeter_init(&top_cpuset.fmeter); |
@@ -1829,6 +1887,9 @@ int __init cpuset_init(void) | |||
1829 | if (err < 0) | 1887 | if (err < 0) |
1830 | return err; | 1888 | return err; |
1831 | 1889 | ||
1890 | if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) | ||
1891 | BUG(); | ||
1892 | |||
1832 | number_of_cpusets = 1; | 1893 | number_of_cpusets = 1; |
1833 | return 0; | 1894 | return 0; |
1834 | } | 1895 | } |
@@ -1903,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
1903 | * has online cpus, so can't be empty). | 1964 | * has online cpus, so can't be empty). |
1904 | */ | 1965 | */ |
1905 | parent = cs->parent; | 1966 | parent = cs->parent; |
1906 | while (cpus_empty(parent->cpus_allowed) || | 1967 | while (cpumask_empty(parent->cpus_allowed) || |
1907 | nodes_empty(parent->mems_allowed)) | 1968 | nodes_empty(parent->mems_allowed)) |
1908 | parent = parent->parent; | 1969 | parent = parent->parent; |
1909 | 1970 | ||
@@ -1944,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
1944 | } | 2005 | } |
1945 | 2006 | ||
1946 | /* Continue past cpusets with all cpus, mems online */ | 2007 | /* Continue past cpusets with all cpus, mems online */ |
1947 | if (cpus_subset(cp->cpus_allowed, cpu_online_map) && | 2008 | if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && |
1948 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2009 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
1949 | continue; | 2010 | continue; |
1950 | 2011 | ||
@@ -1952,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
1952 | 2013 | ||
1953 | /* Remove offline cpus and mems from this cpuset. */ | 2014 | /* Remove offline cpus and mems from this cpuset. */ |
1954 | mutex_lock(&callback_mutex); | 2015 | mutex_lock(&callback_mutex); |
1955 | cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); | 2016 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, |
2017 | cpu_online_mask); | ||
1956 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2018 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
1957 | node_states[N_HIGH_MEMORY]); | 2019 | node_states[N_HIGH_MEMORY]); |
1958 | mutex_unlock(&callback_mutex); | 2020 | mutex_unlock(&callback_mutex); |
1959 | 2021 | ||
1960 | /* Move tasks from the empty cpuset to a parent */ | 2022 | /* Move tasks from the empty cpuset to a parent */ |
1961 | if (cpus_empty(cp->cpus_allowed) || | 2023 | if (cpumask_empty(cp->cpus_allowed) || |
1962 | nodes_empty(cp->mems_allowed)) | 2024 | nodes_empty(cp->mems_allowed)) |
1963 | remove_tasks_in_empty_cpuset(cp); | 2025 | remove_tasks_in_empty_cpuset(cp); |
1964 | else { | 2026 | else { |
@@ -1984,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
1984 | unsigned long phase, void *unused_cpu) | 2046 | unsigned long phase, void *unused_cpu) |
1985 | { | 2047 | { |
1986 | struct sched_domain_attr *attr; | 2048 | struct sched_domain_attr *attr; |
1987 | cpumask_t *doms; | 2049 | struct cpumask *doms; |
1988 | int ndoms; | 2050 | int ndoms; |
1989 | 2051 | ||
1990 | switch (phase) { | 2052 | switch (phase) { |
@@ -1999,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
1999 | } | 2061 | } |
2000 | 2062 | ||
2001 | cgroup_lock(); | 2063 | cgroup_lock(); |
2002 | top_cpuset.cpus_allowed = cpu_online_map; | 2064 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); |
2003 | scan_for_empty_cpusets(&top_cpuset); | 2065 | scan_for_empty_cpusets(&top_cpuset); |
2004 | ndoms = generate_sched_domains(&doms, &attr); | 2066 | ndoms = generate_sched_domains(&doms, &attr); |
2005 | cgroup_unlock(); | 2067 | cgroup_unlock(); |
@@ -2044,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2044 | 2106 | ||
2045 | void __init cpuset_init_smp(void) | 2107 | void __init cpuset_init_smp(void) |
2046 | { | 2108 | { |
2047 | top_cpuset.cpus_allowed = cpu_online_map; | 2109 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); |
2048 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2110 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2049 | 2111 | ||
2050 | hotcpu_notifier(cpuset_track_online_cpus, 0); | 2112 | hotcpu_notifier(cpuset_track_online_cpus, 0); |
@@ -2054,15 +2116,15 @@ void __init cpuset_init_smp(void) | |||
2054 | /** | 2116 | /** |
2055 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. | 2117 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. |
2056 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. | 2118 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. |
2057 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. | 2119 | * @pmask: pointer to struct cpumask variable to receive cpus_allowed set. |
2058 | * | 2120 | * |
2059 | * Description: Returns the cpumask_t cpus_allowed of the cpuset | 2121 | * Description: Returns the cpumask_var_t cpus_allowed of the cpuset |
2060 | * attached to the specified @tsk. Guaranteed to return some non-empty | 2122 | * attached to the specified @tsk. Guaranteed to return some non-empty |
2061 | * subset of cpu_online_map, even if this means going outside the | 2123 | * subset of cpu_online_map, even if this means going outside the |
2062 | * tasks cpuset. | 2124 | * tasks cpuset. |
2063 | **/ | 2125 | **/ |
2064 | 2126 | ||
2065 | void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) | 2127 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) |
2066 | { | 2128 | { |
2067 | mutex_lock(&callback_mutex); | 2129 | mutex_lock(&callback_mutex); |
2068 | cpuset_cpus_allowed_locked(tsk, pmask); | 2130 | cpuset_cpus_allowed_locked(tsk, pmask); |
@@ -2073,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) | |||
2073 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. | 2135 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. |
2074 | * Must be called with callback_mutex held. | 2136 | * Must be called with callback_mutex held. |
2075 | **/ | 2137 | **/ |
2076 | void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask) | 2138 | void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask) |
2077 | { | 2139 | { |
2078 | task_lock(tsk); | 2140 | task_lock(tsk); |
2079 | guarantee_online_cpus(task_cs(tsk), pmask); | 2141 | guarantee_online_cpus(task_cs(tsk), pmask); |
@@ -2356,6 +2418,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, | |||
2356 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); | 2418 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); |
2357 | } | 2419 | } |
2358 | 2420 | ||
2421 | /** | ||
2422 | * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed | ||
2423 | * @task: pointer to task_struct of some task. | ||
2424 | * | ||
2425 | * Description: Prints @task's name, cpuset name, and cached copy of its | ||
2426 | * mems_allowed to the kernel log. Must hold task_lock(task) to allow | ||
2427 | * dereferencing task_cs(task). | ||
2428 | */ | ||
2429 | void cpuset_print_task_mems_allowed(struct task_struct *tsk) | ||
2430 | { | ||
2431 | struct dentry *dentry; | ||
2432 | |||
2433 | dentry = task_cs(tsk)->css.cgroup->dentry; | ||
2434 | spin_lock(&cpuset_buffer_lock); | ||
2435 | snprintf(cpuset_name, CPUSET_NAME_LEN, | ||
2436 | dentry ? (const char *)dentry->d_name.name : "/"); | ||
2437 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, | ||
2438 | tsk->mems_allowed); | ||
2439 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", | ||
2440 | tsk->comm, cpuset_name, cpuset_nodelist); | ||
2441 | spin_unlock(&cpuset_buffer_lock); | ||
2442 | } | ||
2443 | |||
2359 | /* | 2444 | /* |
2360 | * Collection of memory_pressure is suppressed unless | 2445 | * Collection of memory_pressure is suppressed unless |
2361 | * this flag is enabled by writing "1" to the special | 2446 | * this flag is enabled by writing "1" to the special |
diff --git a/kernel/cred.c b/kernel/cred.c index ff7bc071991c..3a039189d707 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -372,7 +372,8 @@ int commit_creds(struct cred *new) | |||
372 | old->fsuid != new->fsuid || | 372 | old->fsuid != new->fsuid || |
373 | old->fsgid != new->fsgid || | 373 | old->fsgid != new->fsgid || |
374 | !cap_issubset(new->cap_permitted, old->cap_permitted)) { | 374 | !cap_issubset(new->cap_permitted, old->cap_permitted)) { |
375 | set_dumpable(task->mm, suid_dumpable); | 375 | if (task->mm) |
376 | set_dumpable(task->mm, suid_dumpable); | ||
376 | task->pdeath_signal = 0; | 377 | task->pdeath_signal = 0; |
377 | smp_wmb(); | 378 | smp_wmb(); |
378 | } | 379 | } |
@@ -506,6 +507,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
506 | else | 507 | else |
507 | old = get_cred(&init_cred); | 508 | old = get_cred(&init_cred); |
508 | 509 | ||
510 | *new = *old; | ||
509 | get_uid(new->user); | 511 | get_uid(new->user); |
510 | get_group_info(new->group_info); | 512 | get_group_info(new->group_info); |
511 | 513 | ||
@@ -529,6 +531,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
529 | 531 | ||
530 | error: | 532 | error: |
531 | put_cred(new); | 533 | put_cred(new); |
534 | put_cred(old); | ||
532 | return NULL; | 535 | return NULL; |
533 | } | 536 | } |
534 | EXPORT_SYMBOL(prepare_kernel_cred); | 537 | EXPORT_SYMBOL(prepare_kernel_cred); |
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c index f013a0c2e111..038707404b76 100644 --- a/kernel/dma-coherent.c +++ b/kernel/dma-coherent.c | |||
@@ -109,20 +109,40 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | |||
109 | int dma_alloc_from_coherent(struct device *dev, ssize_t size, | 109 | int dma_alloc_from_coherent(struct device *dev, ssize_t size, |
110 | dma_addr_t *dma_handle, void **ret) | 110 | dma_addr_t *dma_handle, void **ret) |
111 | { | 111 | { |
112 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | 112 | struct dma_coherent_mem *mem; |
113 | int order = get_order(size); | 113 | int order = get_order(size); |
114 | int pageno; | ||
114 | 115 | ||
115 | if (mem) { | 116 | if (!dev) |
116 | int page = bitmap_find_free_region(mem->bitmap, mem->size, | 117 | return 0; |
117 | order); | 118 | mem = dev->dma_mem; |
118 | if (page >= 0) { | 119 | if (!mem) |
119 | *dma_handle = mem->device_base + (page << PAGE_SHIFT); | 120 | return 0; |
120 | *ret = mem->virt_base + (page << PAGE_SHIFT); | 121 | if (unlikely(size > mem->size)) |
121 | memset(*ret, 0, size); | 122 | return 0; |
122 | } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) | 123 | |
123 | *ret = NULL; | 124 | pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); |
125 | if (pageno >= 0) { | ||
126 | /* | ||
127 | * Memory was found in the per-device arena. | ||
128 | */ | ||
129 | *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); | ||
130 | *ret = mem->virt_base + (pageno << PAGE_SHIFT); | ||
131 | memset(*ret, 0, size); | ||
132 | } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) { | ||
133 | /* | ||
134 | * The per-device arena is exhausted and we are not | ||
135 | * permitted to fall back to generic memory. | ||
136 | */ | ||
137 | *ret = NULL; | ||
138 | } else { | ||
139 | /* | ||
140 | * The per-device arena is exhausted and we are | ||
141 | * permitted to fall back to generic memory. | ||
142 | */ | ||
143 | return 0; | ||
124 | } | 144 | } |
125 | return (mem != NULL); | 145 | return 1; |
126 | } | 146 | } |
127 | EXPORT_SYMBOL(dma_alloc_from_coherent); | 147 | EXPORT_SYMBOL(dma_alloc_from_coherent); |
128 | 148 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index c9e5a1c14e08..c7740fa3252c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -642,35 +642,31 @@ retry: | |||
642 | /* | 642 | /* |
643 | * We found no owner yet mm_users > 1: this implies that we are | 643 | * We found no owner yet mm_users > 1: this implies that we are |
644 | * most likely racing with swapoff (try_to_unuse()) or /proc or | 644 | * most likely racing with swapoff (try_to_unuse()) or /proc or |
645 | * ptrace or page migration (get_task_mm()). Mark owner as NULL, | 645 | * ptrace or page migration (get_task_mm()). Mark owner as NULL. |
646 | * so that subsystems can understand the callback and take action. | ||
647 | */ | 646 | */ |
648 | down_write(&mm->mmap_sem); | ||
649 | cgroup_mm_owner_callbacks(mm->owner, NULL); | ||
650 | mm->owner = NULL; | 647 | mm->owner = NULL; |
651 | up_write(&mm->mmap_sem); | ||
652 | return; | 648 | return; |
653 | 649 | ||
654 | assign_new_owner: | 650 | assign_new_owner: |
655 | BUG_ON(c == p); | 651 | BUG_ON(c == p); |
656 | get_task_struct(c); | 652 | get_task_struct(c); |
657 | read_unlock(&tasklist_lock); | ||
658 | down_write(&mm->mmap_sem); | ||
659 | /* | 653 | /* |
660 | * The task_lock protects c->mm from changing. | 654 | * The task_lock protects c->mm from changing. |
661 | * We always want mm->owner->mm == mm | 655 | * We always want mm->owner->mm == mm |
662 | */ | 656 | */ |
663 | task_lock(c); | 657 | task_lock(c); |
658 | /* | ||
659 | * Delay read_unlock() till we have the task_lock() | ||
660 | * to ensure that c does not slip away underneath us | ||
661 | */ | ||
662 | read_unlock(&tasklist_lock); | ||
664 | if (c->mm != mm) { | 663 | if (c->mm != mm) { |
665 | task_unlock(c); | 664 | task_unlock(c); |
666 | up_write(&mm->mmap_sem); | ||
667 | put_task_struct(c); | 665 | put_task_struct(c); |
668 | goto retry; | 666 | goto retry; |
669 | } | 667 | } |
670 | cgroup_mm_owner_callbacks(mm->owner, c); | ||
671 | mm->owner = c; | 668 | mm->owner = c; |
672 | task_unlock(c); | 669 | task_unlock(c); |
673 | up_write(&mm->mmap_sem); | ||
674 | put_task_struct(c); | 670 | put_task_struct(c); |
675 | } | 671 | } |
676 | #endif /* CONFIG_MM_OWNER */ | 672 | #endif /* CONFIG_MM_OWNER */ |
@@ -1055,10 +1051,7 @@ NORET_TYPE void do_exit(long code) | |||
1055 | preempt_count()); | 1051 | preempt_count()); |
1056 | 1052 | ||
1057 | acct_update_integrals(tsk); | 1053 | acct_update_integrals(tsk); |
1058 | if (tsk->mm) { | 1054 | |
1059 | update_hiwater_rss(tsk->mm); | ||
1060 | update_hiwater_vm(tsk->mm); | ||
1061 | } | ||
1062 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 1055 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
1063 | if (group_dead) { | 1056 | if (group_dead) { |
1064 | hrtimer_cancel(&tsk->signal->real_timer); | 1057 | hrtimer_cancel(&tsk->signal->real_timer); |
diff --git a/kernel/fork.c b/kernel/fork.c index 43cbf30669e6..1d68f1255dd8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -400,6 +400,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); | |||
400 | #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) | 400 | #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) |
401 | #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) | 401 | #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) |
402 | 402 | ||
403 | static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; | ||
404 | |||
405 | static int __init coredump_filter_setup(char *s) | ||
406 | { | ||
407 | default_dump_filter = | ||
408 | (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & | ||
409 | MMF_DUMP_FILTER_MASK; | ||
410 | return 1; | ||
411 | } | ||
412 | |||
413 | __setup("coredump_filter=", coredump_filter_setup); | ||
414 | |||
403 | #include <linux/init_task.h> | 415 | #include <linux/init_task.h> |
404 | 416 | ||
405 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 417 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
@@ -408,8 +420,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
408 | atomic_set(&mm->mm_count, 1); | 420 | atomic_set(&mm->mm_count, 1); |
409 | init_rwsem(&mm->mmap_sem); | 421 | init_rwsem(&mm->mmap_sem); |
410 | INIT_LIST_HEAD(&mm->mmlist); | 422 | INIT_LIST_HEAD(&mm->mmlist); |
411 | mm->flags = (current->mm) ? current->mm->flags | 423 | mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; |
412 | : MMF_DUMP_FILTER_DEFAULT; | ||
413 | mm->core_state = NULL; | 424 | mm->core_state = NULL; |
414 | mm->nr_ptes = 0; | 425 | mm->nr_ptes = 0; |
415 | set_mm_counter(mm, file_rss, 0); | 426 | set_mm_counter(mm, file_rss, 0); |
@@ -758,7 +769,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) | |||
758 | { | 769 | { |
759 | struct sighand_struct *sig; | 770 | struct sighand_struct *sig; |
760 | 771 | ||
761 | if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) { | 772 | if (clone_flags & CLONE_SIGHAND) { |
762 | atomic_inc(¤t->sighand->count); | 773 | atomic_inc(¤t->sighand->count); |
763 | return 0; | 774 | return 0; |
764 | } | 775 | } |
@@ -1115,12 +1126,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1115 | 1126 | ||
1116 | if (pid != &init_struct_pid) { | 1127 | if (pid != &init_struct_pid) { |
1117 | retval = -ENOMEM; | 1128 | retval = -ENOMEM; |
1118 | pid = alloc_pid(task_active_pid_ns(p)); | 1129 | pid = alloc_pid(p->nsproxy->pid_ns); |
1119 | if (!pid) | 1130 | if (!pid) |
1120 | goto bad_fork_cleanup_io; | 1131 | goto bad_fork_cleanup_io; |
1121 | 1132 | ||
1122 | if (clone_flags & CLONE_NEWPID) { | 1133 | if (clone_flags & CLONE_NEWPID) { |
1123 | retval = pid_ns_prepare_proc(task_active_pid_ns(p)); | 1134 | retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); |
1124 | if (retval < 0) | 1135 | if (retval < 0) |
1125 | goto bad_fork_free_pid; | 1136 | goto bad_fork_free_pid; |
1126 | } | 1137 | } |
@@ -1470,12 +1481,10 @@ void __init proc_caches_init(void) | |||
1470 | fs_cachep = kmem_cache_create("fs_cache", | 1481 | fs_cachep = kmem_cache_create("fs_cache", |
1471 | sizeof(struct fs_struct), 0, | 1482 | sizeof(struct fs_struct), 0, |
1472 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
1473 | vm_area_cachep = kmem_cache_create("vm_area_struct", | ||
1474 | sizeof(struct vm_area_struct), 0, | ||
1475 | SLAB_PANIC, NULL); | ||
1476 | mm_cachep = kmem_cache_create("mm_struct", | 1484 | mm_cachep = kmem_cache_create("mm_struct", |
1477 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1485 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1478 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1486 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
1487 | mmap_init(); | ||
1479 | } | 1488 | } |
1480 | 1489 | ||
1481 | /* | 1490 | /* |
diff --git a/kernel/futex.c b/kernel/futex.c index 7c6cbabe52b3..002aa189eb09 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -170,8 +170,11 @@ static void get_futex_key_refs(union futex_key *key) | |||
170 | */ | 170 | */ |
171 | static void drop_futex_key_refs(union futex_key *key) | 171 | static void drop_futex_key_refs(union futex_key *key) |
172 | { | 172 | { |
173 | if (!key->both.ptr) | 173 | if (!key->both.ptr) { |
174 | /* If we're here then we tried to put a key we failed to get */ | ||
175 | WARN_ON_ONCE(1); | ||
174 | return; | 176 | return; |
177 | } | ||
175 | 178 | ||
176 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 179 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
177 | case FUT_OFF_INODE: | 180 | case FUT_OFF_INODE: |
@@ -730,8 +733,8 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
730 | } | 733 | } |
731 | 734 | ||
732 | spin_unlock(&hb->lock); | 735 | spin_unlock(&hb->lock); |
733 | out: | ||
734 | put_futex_key(fshared, &key); | 736 | put_futex_key(fshared, &key); |
737 | out: | ||
735 | return ret; | 738 | return ret; |
736 | } | 739 | } |
737 | 740 | ||
@@ -755,7 +758,7 @@ retryfull: | |||
755 | goto out; | 758 | goto out; |
756 | ret = get_futex_key(uaddr2, fshared, &key2); | 759 | ret = get_futex_key(uaddr2, fshared, &key2); |
757 | if (unlikely(ret != 0)) | 760 | if (unlikely(ret != 0)) |
758 | goto out; | 761 | goto out_put_key1; |
759 | 762 | ||
760 | hb1 = hash_futex(&key1); | 763 | hb1 = hash_futex(&key1); |
761 | hb2 = hash_futex(&key2); | 764 | hb2 = hash_futex(&key2); |
@@ -777,12 +780,12 @@ retry: | |||
777 | * but we might get them from range checking | 780 | * but we might get them from range checking |
778 | */ | 781 | */ |
779 | ret = op_ret; | 782 | ret = op_ret; |
780 | goto out; | 783 | goto out_put_keys; |
781 | #endif | 784 | #endif |
782 | 785 | ||
783 | if (unlikely(op_ret != -EFAULT)) { | 786 | if (unlikely(op_ret != -EFAULT)) { |
784 | ret = op_ret; | 787 | ret = op_ret; |
785 | goto out; | 788 | goto out_put_keys; |
786 | } | 789 | } |
787 | 790 | ||
788 | /* | 791 | /* |
@@ -796,7 +799,7 @@ retry: | |||
796 | ret = futex_handle_fault((unsigned long)uaddr2, | 799 | ret = futex_handle_fault((unsigned long)uaddr2, |
797 | attempt); | 800 | attempt); |
798 | if (ret) | 801 | if (ret) |
799 | goto out; | 802 | goto out_put_keys; |
800 | goto retry; | 803 | goto retry; |
801 | } | 804 | } |
802 | 805 | ||
@@ -834,10 +837,11 @@ retry: | |||
834 | spin_unlock(&hb1->lock); | 837 | spin_unlock(&hb1->lock); |
835 | if (hb1 != hb2) | 838 | if (hb1 != hb2) |
836 | spin_unlock(&hb2->lock); | 839 | spin_unlock(&hb2->lock); |
837 | out: | 840 | out_put_keys: |
838 | put_futex_key(fshared, &key2); | 841 | put_futex_key(fshared, &key2); |
842 | out_put_key1: | ||
839 | put_futex_key(fshared, &key1); | 843 | put_futex_key(fshared, &key1); |
840 | 844 | out: | |
841 | return ret; | 845 | return ret; |
842 | } | 846 | } |
843 | 847 | ||
@@ -854,13 +858,13 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | |||
854 | struct futex_q *this, *next; | 858 | struct futex_q *this, *next; |
855 | int ret, drop_count = 0; | 859 | int ret, drop_count = 0; |
856 | 860 | ||
857 | retry: | 861 | retry: |
858 | ret = get_futex_key(uaddr1, fshared, &key1); | 862 | ret = get_futex_key(uaddr1, fshared, &key1); |
859 | if (unlikely(ret != 0)) | 863 | if (unlikely(ret != 0)) |
860 | goto out; | 864 | goto out; |
861 | ret = get_futex_key(uaddr2, fshared, &key2); | 865 | ret = get_futex_key(uaddr2, fshared, &key2); |
862 | if (unlikely(ret != 0)) | 866 | if (unlikely(ret != 0)) |
863 | goto out; | 867 | goto out_put_key1; |
864 | 868 | ||
865 | hb1 = hash_futex(&key1); | 869 | hb1 = hash_futex(&key1); |
866 | hb2 = hash_futex(&key2); | 870 | hb2 = hash_futex(&key2); |
@@ -882,7 +886,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | |||
882 | if (!ret) | 886 | if (!ret) |
883 | goto retry; | 887 | goto retry; |
884 | 888 | ||
885 | return ret; | 889 | goto out_put_keys; |
886 | } | 890 | } |
887 | if (curval != *cmpval) { | 891 | if (curval != *cmpval) { |
888 | ret = -EAGAIN; | 892 | ret = -EAGAIN; |
@@ -927,9 +931,11 @@ out_unlock: | |||
927 | while (--drop_count >= 0) | 931 | while (--drop_count >= 0) |
928 | drop_futex_key_refs(&key1); | 932 | drop_futex_key_refs(&key1); |
929 | 933 | ||
930 | out: | 934 | out_put_keys: |
931 | put_futex_key(fshared, &key2); | 935 | put_futex_key(fshared, &key2); |
936 | out_put_key1: | ||
932 | put_futex_key(fshared, &key1); | 937 | put_futex_key(fshared, &key1); |
938 | out: | ||
933 | return ret; | 939 | return ret; |
934 | } | 940 | } |
935 | 941 | ||
@@ -990,7 +996,7 @@ static int unqueue_me(struct futex_q *q) | |||
990 | int ret = 0; | 996 | int ret = 0; |
991 | 997 | ||
992 | /* In the common case we don't take the spinlock, which is nice. */ | 998 | /* In the common case we don't take the spinlock, which is nice. */ |
993 | retry: | 999 | retry: |
994 | lock_ptr = q->lock_ptr; | 1000 | lock_ptr = q->lock_ptr; |
995 | barrier(); | 1001 | barrier(); |
996 | if (lock_ptr != NULL) { | 1002 | if (lock_ptr != NULL) { |
@@ -1172,11 +1178,11 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1172 | 1178 | ||
1173 | q.pi_state = NULL; | 1179 | q.pi_state = NULL; |
1174 | q.bitset = bitset; | 1180 | q.bitset = bitset; |
1175 | retry: | 1181 | retry: |
1176 | q.key = FUTEX_KEY_INIT; | 1182 | q.key = FUTEX_KEY_INIT; |
1177 | ret = get_futex_key(uaddr, fshared, &q.key); | 1183 | ret = get_futex_key(uaddr, fshared, &q.key); |
1178 | if (unlikely(ret != 0)) | 1184 | if (unlikely(ret != 0)) |
1179 | goto out_release_sem; | 1185 | goto out; |
1180 | 1186 | ||
1181 | hb = queue_lock(&q); | 1187 | hb = queue_lock(&q); |
1182 | 1188 | ||
@@ -1204,6 +1210,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1204 | 1210 | ||
1205 | if (unlikely(ret)) { | 1211 | if (unlikely(ret)) { |
1206 | queue_unlock(&q, hb); | 1212 | queue_unlock(&q, hb); |
1213 | put_futex_key(fshared, &q.key); | ||
1207 | 1214 | ||
1208 | ret = get_user(uval, uaddr); | 1215 | ret = get_user(uval, uaddr); |
1209 | 1216 | ||
@@ -1213,7 +1220,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1213 | } | 1220 | } |
1214 | ret = -EWOULDBLOCK; | 1221 | ret = -EWOULDBLOCK; |
1215 | if (uval != val) | 1222 | if (uval != val) |
1216 | goto out_unlock_release_sem; | 1223 | goto out_unlock_put_key; |
1217 | 1224 | ||
1218 | /* Only actually queue if *uaddr contained val. */ | 1225 | /* Only actually queue if *uaddr contained val. */ |
1219 | queue_me(&q, hb); | 1226 | queue_me(&q, hb); |
@@ -1305,11 +1312,11 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1305 | return -ERESTART_RESTARTBLOCK; | 1312 | return -ERESTART_RESTARTBLOCK; |
1306 | } | 1313 | } |
1307 | 1314 | ||
1308 | out_unlock_release_sem: | 1315 | out_unlock_put_key: |
1309 | queue_unlock(&q, hb); | 1316 | queue_unlock(&q, hb); |
1310 | |||
1311 | out_release_sem: | ||
1312 | put_futex_key(fshared, &q.key); | 1317 | put_futex_key(fshared, &q.key); |
1318 | |||
1319 | out: | ||
1313 | return ret; | 1320 | return ret; |
1314 | } | 1321 | } |
1315 | 1322 | ||
@@ -1358,16 +1365,16 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1358 | } | 1365 | } |
1359 | 1366 | ||
1360 | q.pi_state = NULL; | 1367 | q.pi_state = NULL; |
1361 | retry: | 1368 | retry: |
1362 | q.key = FUTEX_KEY_INIT; | 1369 | q.key = FUTEX_KEY_INIT; |
1363 | ret = get_futex_key(uaddr, fshared, &q.key); | 1370 | ret = get_futex_key(uaddr, fshared, &q.key); |
1364 | if (unlikely(ret != 0)) | 1371 | if (unlikely(ret != 0)) |
1365 | goto out_release_sem; | 1372 | goto out; |
1366 | 1373 | ||
1367 | retry_unlocked: | 1374 | retry_unlocked: |
1368 | hb = queue_lock(&q); | 1375 | hb = queue_lock(&q); |
1369 | 1376 | ||
1370 | retry_locked: | 1377 | retry_locked: |
1371 | ret = lock_taken = 0; | 1378 | ret = lock_taken = 0; |
1372 | 1379 | ||
1373 | /* | 1380 | /* |
@@ -1388,14 +1395,14 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1388 | */ | 1395 | */ |
1389 | if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) { | 1396 | if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) { |
1390 | ret = -EDEADLK; | 1397 | ret = -EDEADLK; |
1391 | goto out_unlock_release_sem; | 1398 | goto out_unlock_put_key; |
1392 | } | 1399 | } |
1393 | 1400 | ||
1394 | /* | 1401 | /* |
1395 | * Surprise - we got the lock. Just return to userspace: | 1402 | * Surprise - we got the lock. Just return to userspace: |
1396 | */ | 1403 | */ |
1397 | if (unlikely(!curval)) | 1404 | if (unlikely(!curval)) |
1398 | goto out_unlock_release_sem; | 1405 | goto out_unlock_put_key; |
1399 | 1406 | ||
1400 | uval = curval; | 1407 | uval = curval; |
1401 | 1408 | ||
@@ -1431,7 +1438,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1431 | * We took the lock due to owner died take over. | 1438 | * We took the lock due to owner died take over. |
1432 | */ | 1439 | */ |
1433 | if (unlikely(lock_taken)) | 1440 | if (unlikely(lock_taken)) |
1434 | goto out_unlock_release_sem; | 1441 | goto out_unlock_put_key; |
1435 | 1442 | ||
1436 | /* | 1443 | /* |
1437 | * We dont have the lock. Look up the PI state (or create it if | 1444 | * We dont have the lock. Look up the PI state (or create it if |
@@ -1470,7 +1477,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1470 | goto retry_locked; | 1477 | goto retry_locked; |
1471 | } | 1478 | } |
1472 | default: | 1479 | default: |
1473 | goto out_unlock_release_sem; | 1480 | goto out_unlock_put_key; |
1474 | } | 1481 | } |
1475 | } | 1482 | } |
1476 | 1483 | ||
@@ -1561,16 +1568,17 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1561 | destroy_hrtimer_on_stack(&to->timer); | 1568 | destroy_hrtimer_on_stack(&to->timer); |
1562 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | 1569 | return ret != -EINTR ? ret : -ERESTARTNOINTR; |
1563 | 1570 | ||
1564 | out_unlock_release_sem: | 1571 | out_unlock_put_key: |
1565 | queue_unlock(&q, hb); | 1572 | queue_unlock(&q, hb); |
1566 | 1573 | ||
1567 | out_release_sem: | 1574 | out_put_key: |
1568 | put_futex_key(fshared, &q.key); | 1575 | put_futex_key(fshared, &q.key); |
1576 | out: | ||
1569 | if (to) | 1577 | if (to) |
1570 | destroy_hrtimer_on_stack(&to->timer); | 1578 | destroy_hrtimer_on_stack(&to->timer); |
1571 | return ret; | 1579 | return ret; |
1572 | 1580 | ||
1573 | uaddr_faulted: | 1581 | uaddr_faulted: |
1574 | /* | 1582 | /* |
1575 | * We have to r/w *(int __user *)uaddr, and we have to modify it | 1583 | * We have to r/w *(int __user *)uaddr, and we have to modify it |
1576 | * atomically. Therefore, if we continue to fault after get_user() | 1584 | * atomically. Therefore, if we continue to fault after get_user() |
@@ -1583,7 +1591,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1583 | if (attempt++) { | 1591 | if (attempt++) { |
1584 | ret = futex_handle_fault((unsigned long)uaddr, attempt); | 1592 | ret = futex_handle_fault((unsigned long)uaddr, attempt); |
1585 | if (ret) | 1593 | if (ret) |
1586 | goto out_release_sem; | 1594 | goto out_put_key; |
1587 | goto retry_unlocked; | 1595 | goto retry_unlocked; |
1588 | } | 1596 | } |
1589 | 1597 | ||
@@ -1675,9 +1683,9 @@ retry_unlocked: | |||
1675 | 1683 | ||
1676 | out_unlock: | 1684 | out_unlock: |
1677 | spin_unlock(&hb->lock); | 1685 | spin_unlock(&hb->lock); |
1678 | out: | ||
1679 | put_futex_key(fshared, &key); | 1686 | put_futex_key(fshared, &key); |
1680 | 1687 | ||
1688 | out: | ||
1681 | return ret; | 1689 | return ret; |
1682 | 1690 | ||
1683 | pi_faulted: | 1691 | pi_faulted: |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index eb2bfefa6dcc..1455b7651b6b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -634,7 +634,6 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) | |||
634 | { | 634 | { |
635 | } | 635 | } |
636 | 636 | ||
637 | static void __run_hrtimer(struct hrtimer *timer); | ||
638 | 637 | ||
639 | /* | 638 | /* |
640 | * When High resolution timers are active, try to reprogram. Note, that in case | 639 | * When High resolution timers are active, try to reprogram. Note, that in case |
@@ -646,13 +645,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
646 | struct hrtimer_clock_base *base) | 645 | struct hrtimer_clock_base *base) |
647 | { | 646 | { |
648 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { | 647 | if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { |
649 | /* | 648 | spin_unlock(&base->cpu_base->lock); |
650 | * XXX: recursion check? | 649 | raise_softirq_irqoff(HRTIMER_SOFTIRQ); |
651 | * hrtimer_forward() should round up with timer granularity | 650 | spin_lock(&base->cpu_base->lock); |
652 | * so that we never get into inf recursion here, | ||
653 | * it doesn't do that though | ||
654 | */ | ||
655 | __run_hrtimer(timer); | ||
656 | return 1; | 651 | return 1; |
657 | } | 652 | } |
658 | return 0; | 653 | return 0; |
@@ -705,11 +700,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
705 | } | 700 | } |
706 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | 701 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } |
707 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | 702 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } |
708 | static inline int hrtimer_reprogram(struct hrtimer *timer, | ||
709 | struct hrtimer_clock_base *base) | ||
710 | { | ||
711 | return 0; | ||
712 | } | ||
713 | 703 | ||
714 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 704 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
715 | 705 | ||
@@ -780,9 +770,11 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); | |||
780 | * | 770 | * |
781 | * The timer is inserted in expiry order. Insertion into the | 771 | * The timer is inserted in expiry order. Insertion into the |
782 | * red black tree is O(log(n)). Must hold the base lock. | 772 | * red black tree is O(log(n)). Must hold the base lock. |
773 | * | ||
774 | * Returns 1 when the new timer is the leftmost timer in the tree. | ||
783 | */ | 775 | */ |
784 | static void enqueue_hrtimer(struct hrtimer *timer, | 776 | static int enqueue_hrtimer(struct hrtimer *timer, |
785 | struct hrtimer_clock_base *base, int reprogram) | 777 | struct hrtimer_clock_base *base) |
786 | { | 778 | { |
787 | struct rb_node **link = &base->active.rb_node; | 779 | struct rb_node **link = &base->active.rb_node; |
788 | struct rb_node *parent = NULL; | 780 | struct rb_node *parent = NULL; |
@@ -814,20 +806,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, | |||
814 | * Insert the timer to the rbtree and check whether it | 806 | * Insert the timer to the rbtree and check whether it |
815 | * replaces the first pending timer | 807 | * replaces the first pending timer |
816 | */ | 808 | */ |
817 | if (leftmost) { | 809 | if (leftmost) |
818 | /* | ||
819 | * Reprogram the clock event device. When the timer is already | ||
820 | * expired hrtimer_enqueue_reprogram has either called the | ||
821 | * callback or added it to the pending list and raised the | ||
822 | * softirq. | ||
823 | * | ||
824 | * This is a NOP for !HIGHRES | ||
825 | */ | ||
826 | if (reprogram && hrtimer_enqueue_reprogram(timer, base)) | ||
827 | return; | ||
828 | |||
829 | base->first = &timer->node; | 810 | base->first = &timer->node; |
830 | } | ||
831 | 811 | ||
832 | rb_link_node(&timer->node, parent, link); | 812 | rb_link_node(&timer->node, parent, link); |
833 | rb_insert_color(&timer->node, &base->active); | 813 | rb_insert_color(&timer->node, &base->active); |
@@ -836,6 +816,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, | |||
836 | * state of a possibly running callback. | 816 | * state of a possibly running callback. |
837 | */ | 817 | */ |
838 | timer->state |= HRTIMER_STATE_ENQUEUED; | 818 | timer->state |= HRTIMER_STATE_ENQUEUED; |
819 | |||
820 | return leftmost; | ||
839 | } | 821 | } |
840 | 822 | ||
841 | /* | 823 | /* |
@@ -912,7 +894,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n | |||
912 | { | 894 | { |
913 | struct hrtimer_clock_base *base, *new_base; | 895 | struct hrtimer_clock_base *base, *new_base; |
914 | unsigned long flags; | 896 | unsigned long flags; |
915 | int ret; | 897 | int ret, leftmost; |
916 | 898 | ||
917 | base = lock_hrtimer_base(timer, &flags); | 899 | base = lock_hrtimer_base(timer, &flags); |
918 | 900 | ||
@@ -940,12 +922,16 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n | |||
940 | 922 | ||
941 | timer_stats_hrtimer_set_start_info(timer); | 923 | timer_stats_hrtimer_set_start_info(timer); |
942 | 924 | ||
925 | leftmost = enqueue_hrtimer(timer, new_base); | ||
926 | |||
943 | /* | 927 | /* |
944 | * Only allow reprogramming if the new base is on this CPU. | 928 | * Only allow reprogramming if the new base is on this CPU. |
945 | * (it might still be on another CPU if the timer was pending) | 929 | * (it might still be on another CPU if the timer was pending) |
930 | * | ||
931 | * XXX send_remote_softirq() ? | ||
946 | */ | 932 | */ |
947 | enqueue_hrtimer(timer, new_base, | 933 | if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) |
948 | new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); | 934 | hrtimer_enqueue_reprogram(timer, new_base); |
949 | 935 | ||
950 | unlock_hrtimer_base(timer, &flags); | 936 | unlock_hrtimer_base(timer, &flags); |
951 | 937 | ||
@@ -1157,13 +1143,13 @@ static void __run_hrtimer(struct hrtimer *timer) | |||
1157 | spin_lock(&cpu_base->lock); | 1143 | spin_lock(&cpu_base->lock); |
1158 | 1144 | ||
1159 | /* | 1145 | /* |
1160 | * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid | 1146 | * Note: We clear the CALLBACK bit after enqueue_hrtimer and |
1161 | * reprogramming of the event hardware. This happens at the end of this | 1147 | * we do not reprogramm the event hardware. Happens either in |
1162 | * function anyway. | 1148 | * hrtimer_start_range_ns() or in hrtimer_interrupt() |
1163 | */ | 1149 | */ |
1164 | if (restart != HRTIMER_NORESTART) { | 1150 | if (restart != HRTIMER_NORESTART) { |
1165 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | 1151 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); |
1166 | enqueue_hrtimer(timer, base, 0); | 1152 | enqueue_hrtimer(timer, base); |
1167 | } | 1153 | } |
1168 | timer->state &= ~HRTIMER_STATE_CALLBACK; | 1154 | timer->state &= ~HRTIMER_STATE_CALLBACK; |
1169 | } | 1155 | } |
@@ -1243,6 +1229,22 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1243 | } | 1229 | } |
1244 | } | 1230 | } |
1245 | 1231 | ||
1232 | /* | ||
1233 | * local version of hrtimer_peek_ahead_timers() called with interrupts | ||
1234 | * disabled. | ||
1235 | */ | ||
1236 | static void __hrtimer_peek_ahead_timers(void) | ||
1237 | { | ||
1238 | struct tick_device *td; | ||
1239 | |||
1240 | if (!hrtimer_hres_active()) | ||
1241 | return; | ||
1242 | |||
1243 | td = &__get_cpu_var(tick_cpu_device); | ||
1244 | if (td && td->evtdev) | ||
1245 | hrtimer_interrupt(td->evtdev); | ||
1246 | } | ||
1247 | |||
1246 | /** | 1248 | /** |
1247 | * hrtimer_peek_ahead_timers -- run soft-expired timers now | 1249 | * hrtimer_peek_ahead_timers -- run soft-expired timers now |
1248 | * | 1250 | * |
@@ -1254,20 +1256,23 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1254 | */ | 1256 | */ |
1255 | void hrtimer_peek_ahead_timers(void) | 1257 | void hrtimer_peek_ahead_timers(void) |
1256 | { | 1258 | { |
1257 | struct tick_device *td; | ||
1258 | unsigned long flags; | 1259 | unsigned long flags; |
1259 | 1260 | ||
1260 | if (!hrtimer_hres_active()) | ||
1261 | return; | ||
1262 | |||
1263 | local_irq_save(flags); | 1261 | local_irq_save(flags); |
1264 | td = &__get_cpu_var(tick_cpu_device); | 1262 | __hrtimer_peek_ahead_timers(); |
1265 | if (td && td->evtdev) | ||
1266 | hrtimer_interrupt(td->evtdev); | ||
1267 | local_irq_restore(flags); | 1263 | local_irq_restore(flags); |
1268 | } | 1264 | } |
1269 | 1265 | ||
1270 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 1266 | static void run_hrtimer_softirq(struct softirq_action *h) |
1267 | { | ||
1268 | hrtimer_peek_ahead_timers(); | ||
1269 | } | ||
1270 | |||
1271 | #else /* CONFIG_HIGH_RES_TIMERS */ | ||
1272 | |||
1273 | static inline void __hrtimer_peek_ahead_timers(void) { } | ||
1274 | |||
1275 | #endif /* !CONFIG_HIGH_RES_TIMERS */ | ||
1271 | 1276 | ||
1272 | /* | 1277 | /* |
1273 | * Called from timer softirq every jiffy, expire hrtimers: | 1278 | * Called from timer softirq every jiffy, expire hrtimers: |
@@ -1513,39 +1518,36 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, | |||
1513 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); | 1518 | __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0); |
1514 | timer->base = new_base; | 1519 | timer->base = new_base; |
1515 | /* | 1520 | /* |
1516 | * Enqueue the timers on the new cpu, but do not reprogram | 1521 | * Enqueue the timers on the new cpu. This does not |
1517 | * the timer as that would enable a deadlock between | 1522 | * reprogram the event device in case the timer |
1518 | * hrtimer_enqueue_reprogramm() running the timer and us still | 1523 | * expires before the earliest on this CPU, but we run |
1519 | * holding a nested base lock. | 1524 | * hrtimer_interrupt after we migrated everything to |
1520 | * | 1525 | * sort out already expired timers and reprogram the |
1521 | * Instead we tickle the hrtimer interrupt after the migration | 1526 | * event device. |
1522 | * is done, which will run all expired timers and re-programm | ||
1523 | * the timer device. | ||
1524 | */ | 1527 | */ |
1525 | enqueue_hrtimer(timer, new_base, 0); | 1528 | enqueue_hrtimer(timer, new_base); |
1526 | 1529 | ||
1527 | /* Clear the migration state bit */ | 1530 | /* Clear the migration state bit */ |
1528 | timer->state &= ~HRTIMER_STATE_MIGRATE; | 1531 | timer->state &= ~HRTIMER_STATE_MIGRATE; |
1529 | } | 1532 | } |
1530 | } | 1533 | } |
1531 | 1534 | ||
1532 | static int migrate_hrtimers(int scpu) | 1535 | static void migrate_hrtimers(int scpu) |
1533 | { | 1536 | { |
1534 | struct hrtimer_cpu_base *old_base, *new_base; | 1537 | struct hrtimer_cpu_base *old_base, *new_base; |
1535 | int dcpu, i; | 1538 | int i; |
1536 | 1539 | ||
1537 | BUG_ON(cpu_online(scpu)); | 1540 | BUG_ON(cpu_online(scpu)); |
1538 | old_base = &per_cpu(hrtimer_bases, scpu); | ||
1539 | new_base = &get_cpu_var(hrtimer_bases); | ||
1540 | |||
1541 | dcpu = smp_processor_id(); | ||
1542 | |||
1543 | tick_cancel_sched_timer(scpu); | 1541 | tick_cancel_sched_timer(scpu); |
1542 | |||
1543 | local_irq_disable(); | ||
1544 | old_base = &per_cpu(hrtimer_bases, scpu); | ||
1545 | new_base = &__get_cpu_var(hrtimer_bases); | ||
1544 | /* | 1546 | /* |
1545 | * The caller is globally serialized and nobody else | 1547 | * The caller is globally serialized and nobody else |
1546 | * takes two locks at once, deadlock is not possible. | 1548 | * takes two locks at once, deadlock is not possible. |
1547 | */ | 1549 | */ |
1548 | spin_lock_irq(&new_base->lock); | 1550 | spin_lock(&new_base->lock); |
1549 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | 1551 | spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); |
1550 | 1552 | ||
1551 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1553 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
@@ -1554,15 +1556,11 @@ static int migrate_hrtimers(int scpu) | |||
1554 | } | 1556 | } |
1555 | 1557 | ||
1556 | spin_unlock(&old_base->lock); | 1558 | spin_unlock(&old_base->lock); |
1557 | spin_unlock_irq(&new_base->lock); | 1559 | spin_unlock(&new_base->lock); |
1558 | put_cpu_var(hrtimer_bases); | ||
1559 | 1560 | ||
1560 | return dcpu; | 1561 | /* Check, if we got expired work to do */ |
1561 | } | 1562 | __hrtimer_peek_ahead_timers(); |
1562 | 1563 | local_irq_enable(); | |
1563 | static void tickle_timers(void *arg) | ||
1564 | { | ||
1565 | hrtimer_peek_ahead_timers(); | ||
1566 | } | 1564 | } |
1567 | 1565 | ||
1568 | #endif /* CONFIG_HOTPLUG_CPU */ | 1566 | #endif /* CONFIG_HOTPLUG_CPU */ |
@@ -1583,11 +1581,8 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, | |||
1583 | case CPU_DEAD: | 1581 | case CPU_DEAD: |
1584 | case CPU_DEAD_FROZEN: | 1582 | case CPU_DEAD_FROZEN: |
1585 | { | 1583 | { |
1586 | int dcpu; | ||
1587 | |||
1588 | clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); | 1584 | clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); |
1589 | dcpu = migrate_hrtimers(scpu); | 1585 | migrate_hrtimers(scpu); |
1590 | smp_call_function_single(dcpu, tickle_timers, NULL, 0); | ||
1591 | break; | 1586 | break; |
1592 | } | 1587 | } |
1593 | #endif | 1588 | #endif |
@@ -1608,6 +1603,9 @@ void __init hrtimers_init(void) | |||
1608 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, | 1603 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, |
1609 | (void *)(long)smp_processor_id()); | 1604 | (void *)(long)smp_processor_id()); |
1610 | register_cpu_notifier(&hrtimers_nb); | 1605 | register_cpu_notifier(&hrtimers_nb); |
1606 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
1607 | open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); | ||
1608 | #endif | ||
1611 | } | 1609 | } |
1612 | 1610 | ||
1613 | /** | 1611 | /** |
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index cc0f7321b8ce..1de9700f416e 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
13 | #include <linux/async.h> | ||
13 | 14 | ||
14 | #include "internals.h" | 15 | #include "internals.h" |
15 | 16 | ||
@@ -34,6 +35,10 @@ unsigned long probe_irq_on(void) | |||
34 | unsigned int status; | 35 | unsigned int status; |
35 | int i; | 36 | int i; |
36 | 37 | ||
38 | /* | ||
39 | * quiesce the kernel, or at least the asynchronous portion | ||
40 | */ | ||
41 | async_synchronize_full(); | ||
37 | mutex_lock(&probing_active); | 42 | mutex_lock(&probing_active); |
38 | /* | 43 | /* |
39 | * something may have generated an irq long ago and we want to | 44 | * something may have generated an irq long ago and we want to |
diff --git a/kernel/kmod.c b/kernel/kmod.c index b46dbb908669..a27a5f64443d 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -51,8 +51,8 @@ char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe"; | |||
51 | 51 | ||
52 | /** | 52 | /** |
53 | * request_module - try to load a kernel module | 53 | * request_module - try to load a kernel module |
54 | * @fmt: printf style format string for the name of the module | 54 | * @fmt: printf style format string for the name of the module |
55 | * @varargs: arguements as specified in the format string | 55 | * @...: arguments as specified in the format string |
56 | * | 56 | * |
57 | * Load a module using the user mode module loader. The function returns | 57 | * Load a module using the user mode module loader. The function returns |
58 | * zero on success or a negative errno code on failure. Note that a | 58 | * zero on success or a negative errno code on failure. Note that a |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9f8a3f25259a..1b9cbdc0127a 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -69,7 +69,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | |||
69 | /* NOTE: change this value only with kprobe_mutex held */ | 69 | /* NOTE: change this value only with kprobe_mutex held */ |
70 | static bool kprobe_enabled; | 70 | static bool kprobe_enabled; |
71 | 71 | ||
72 | DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 72 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ |
73 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 73 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
74 | static struct { | 74 | static struct { |
75 | spinlock_t lock ____cacheline_aligned_in_smp; | 75 | spinlock_t lock ____cacheline_aligned_in_smp; |
@@ -115,6 +115,7 @@ enum kprobe_slot_state { | |||
115 | SLOT_USED = 2, | 115 | SLOT_USED = 2, |
116 | }; | 116 | }; |
117 | 117 | ||
118 | static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */ | ||
118 | static struct hlist_head kprobe_insn_pages; | 119 | static struct hlist_head kprobe_insn_pages; |
119 | static int kprobe_garbage_slots; | 120 | static int kprobe_garbage_slots; |
120 | static int collect_garbage_slots(void); | 121 | static int collect_garbage_slots(void); |
@@ -144,10 +145,10 @@ loop_end: | |||
144 | } | 145 | } |
145 | 146 | ||
146 | /** | 147 | /** |
147 | * get_insn_slot() - Find a slot on an executable page for an instruction. | 148 | * __get_insn_slot() - Find a slot on an executable page for an instruction. |
148 | * We allocate an executable page if there's no room on existing ones. | 149 | * We allocate an executable page if there's no room on existing ones. |
149 | */ | 150 | */ |
150 | kprobe_opcode_t __kprobes *get_insn_slot(void) | 151 | static kprobe_opcode_t __kprobes *__get_insn_slot(void) |
151 | { | 152 | { |
152 | struct kprobe_insn_page *kip; | 153 | struct kprobe_insn_page *kip; |
153 | struct hlist_node *pos; | 154 | struct hlist_node *pos; |
@@ -196,6 +197,15 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) | |||
196 | return kip->insns; | 197 | return kip->insns; |
197 | } | 198 | } |
198 | 199 | ||
200 | kprobe_opcode_t __kprobes *get_insn_slot(void) | ||
201 | { | ||
202 | kprobe_opcode_t *ret; | ||
203 | mutex_lock(&kprobe_insn_mutex); | ||
204 | ret = __get_insn_slot(); | ||
205 | mutex_unlock(&kprobe_insn_mutex); | ||
206 | return ret; | ||
207 | } | ||
208 | |||
199 | /* Return 1 if all garbages are collected, otherwise 0. */ | 209 | /* Return 1 if all garbages are collected, otherwise 0. */ |
200 | static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) | 210 | static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) |
201 | { | 211 | { |
@@ -226,9 +236,13 @@ static int __kprobes collect_garbage_slots(void) | |||
226 | { | 236 | { |
227 | struct kprobe_insn_page *kip; | 237 | struct kprobe_insn_page *kip; |
228 | struct hlist_node *pos, *next; | 238 | struct hlist_node *pos, *next; |
239 | int safety; | ||
229 | 240 | ||
230 | /* Ensure no-one is preepmted on the garbages */ | 241 | /* Ensure no-one is preepmted on the garbages */ |
231 | if (check_safety() != 0) | 242 | mutex_unlock(&kprobe_insn_mutex); |
243 | safety = check_safety(); | ||
244 | mutex_lock(&kprobe_insn_mutex); | ||
245 | if (safety != 0) | ||
232 | return -EAGAIN; | 246 | return -EAGAIN; |
233 | 247 | ||
234 | hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { | 248 | hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { |
@@ -251,6 +265,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) | |||
251 | struct kprobe_insn_page *kip; | 265 | struct kprobe_insn_page *kip; |
252 | struct hlist_node *pos; | 266 | struct hlist_node *pos; |
253 | 267 | ||
268 | mutex_lock(&kprobe_insn_mutex); | ||
254 | hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { | 269 | hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) { |
255 | if (kip->insns <= slot && | 270 | if (kip->insns <= slot && |
256 | slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { | 271 | slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { |
@@ -267,6 +282,8 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) | |||
267 | 282 | ||
268 | if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) | 283 | if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE) |
269 | collect_garbage_slots(); | 284 | collect_garbage_slots(); |
285 | |||
286 | mutex_unlock(&kprobe_insn_mutex); | ||
270 | } | 287 | } |
271 | #endif | 288 | #endif |
272 | 289 | ||
@@ -310,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
310 | struct kprobe *kp; | 327 | struct kprobe *kp; |
311 | 328 | ||
312 | list_for_each_entry_rcu(kp, &p->list, list) { | 329 | list_for_each_entry_rcu(kp, &p->list, list) { |
313 | if (kp->pre_handler) { | 330 | if (kp->pre_handler && !kprobe_gone(kp)) { |
314 | set_kprobe_instance(kp); | 331 | set_kprobe_instance(kp); |
315 | if (kp->pre_handler(kp, regs)) | 332 | if (kp->pre_handler(kp, regs)) |
316 | return 1; | 333 | return 1; |
@@ -326,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
326 | struct kprobe *kp; | 343 | struct kprobe *kp; |
327 | 344 | ||
328 | list_for_each_entry_rcu(kp, &p->list, list) { | 345 | list_for_each_entry_rcu(kp, &p->list, list) { |
329 | if (kp->post_handler) { | 346 | if (kp->post_handler && !kprobe_gone(kp)) { |
330 | set_kprobe_instance(kp); | 347 | set_kprobe_instance(kp); |
331 | kp->post_handler(kp, regs, flags); | 348 | kp->post_handler(kp, regs, flags); |
332 | reset_kprobe_instance(); | 349 | reset_kprobe_instance(); |
@@ -393,7 +410,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, | |||
393 | hlist_add_head(&ri->hlist, head); | 410 | hlist_add_head(&ri->hlist, head); |
394 | } | 411 | } |
395 | 412 | ||
396 | void kretprobe_hash_lock(struct task_struct *tsk, | 413 | void __kprobes kretprobe_hash_lock(struct task_struct *tsk, |
397 | struct hlist_head **head, unsigned long *flags) | 414 | struct hlist_head **head, unsigned long *flags) |
398 | { | 415 | { |
399 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 416 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
@@ -404,13 +421,15 @@ void kretprobe_hash_lock(struct task_struct *tsk, | |||
404 | spin_lock_irqsave(hlist_lock, *flags); | 421 | spin_lock_irqsave(hlist_lock, *flags); |
405 | } | 422 | } |
406 | 423 | ||
407 | static void kretprobe_table_lock(unsigned long hash, unsigned long *flags) | 424 | static void __kprobes kretprobe_table_lock(unsigned long hash, |
425 | unsigned long *flags) | ||
408 | { | 426 | { |
409 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 427 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
410 | spin_lock_irqsave(hlist_lock, *flags); | 428 | spin_lock_irqsave(hlist_lock, *flags); |
411 | } | 429 | } |
412 | 430 | ||
413 | void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) | 431 | void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, |
432 | unsigned long *flags) | ||
414 | { | 433 | { |
415 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); | 434 | unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); |
416 | spinlock_t *hlist_lock; | 435 | spinlock_t *hlist_lock; |
@@ -419,7 +438,7 @@ void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) | |||
419 | spin_unlock_irqrestore(hlist_lock, *flags); | 438 | spin_unlock_irqrestore(hlist_lock, *flags); |
420 | } | 439 | } |
421 | 440 | ||
422 | void kretprobe_table_unlock(unsigned long hash, unsigned long *flags) | 441 | void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) |
423 | { | 442 | { |
424 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); | 443 | spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); |
425 | spin_unlock_irqrestore(hlist_lock, *flags); | 444 | spin_unlock_irqrestore(hlist_lock, *flags); |
@@ -526,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
526 | ap->addr = p->addr; | 545 | ap->addr = p->addr; |
527 | ap->pre_handler = aggr_pre_handler; | 546 | ap->pre_handler = aggr_pre_handler; |
528 | ap->fault_handler = aggr_fault_handler; | 547 | ap->fault_handler = aggr_fault_handler; |
529 | if (p->post_handler) | 548 | /* We don't care the kprobe which has gone. */ |
549 | if (p->post_handler && !kprobe_gone(p)) | ||
530 | ap->post_handler = aggr_post_handler; | 550 | ap->post_handler = aggr_post_handler; |
531 | if (p->break_handler) | 551 | if (p->break_handler && !kprobe_gone(p)) |
532 | ap->break_handler = aggr_break_handler; | 552 | ap->break_handler = aggr_break_handler; |
533 | 553 | ||
534 | INIT_LIST_HEAD(&ap->list); | 554 | INIT_LIST_HEAD(&ap->list); |
@@ -547,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
547 | int ret = 0; | 567 | int ret = 0; |
548 | struct kprobe *ap; | 568 | struct kprobe *ap; |
549 | 569 | ||
570 | if (kprobe_gone(old_p)) { | ||
571 | /* | ||
572 | * Attempting to insert new probe at the same location that | ||
573 | * had a probe in the module vaddr area which already | ||
574 | * freed. So, the instruction slot has already been | ||
575 | * released. We need a new slot for the new probe. | ||
576 | */ | ||
577 | ret = arch_prepare_kprobe(old_p); | ||
578 | if (ret) | ||
579 | return ret; | ||
580 | } | ||
550 | if (old_p->pre_handler == aggr_pre_handler) { | 581 | if (old_p->pre_handler == aggr_pre_handler) { |
551 | copy_kprobe(old_p, p); | 582 | copy_kprobe(old_p, p); |
552 | ret = add_new_kprobe(old_p, p); | 583 | ret = add_new_kprobe(old_p, p); |
584 | ap = old_p; | ||
553 | } else { | 585 | } else { |
554 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); | 586 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); |
555 | if (!ap) | 587 | if (!ap) { |
588 | if (kprobe_gone(old_p)) | ||
589 | arch_remove_kprobe(old_p); | ||
556 | return -ENOMEM; | 590 | return -ENOMEM; |
591 | } | ||
557 | add_aggr_kprobe(ap, old_p); | 592 | add_aggr_kprobe(ap, old_p); |
558 | copy_kprobe(ap, p); | 593 | copy_kprobe(ap, p); |
559 | ret = add_new_kprobe(ap, p); | 594 | ret = add_new_kprobe(ap, p); |
560 | } | 595 | } |
596 | if (kprobe_gone(old_p)) { | ||
597 | /* | ||
598 | * If the old_p has gone, its breakpoint has been disarmed. | ||
599 | * We have to arm it again after preparing real kprobes. | ||
600 | */ | ||
601 | ap->flags &= ~KPROBE_FLAG_GONE; | ||
602 | if (kprobe_enabled) | ||
603 | arch_arm_kprobe(ap); | ||
604 | } | ||
561 | return ret; | 605 | return ret; |
562 | } | 606 | } |
563 | 607 | ||
@@ -600,8 +644,7 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) | |||
600 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); | 644 | return (kprobe_opcode_t *)(((char *)addr) + p->offset); |
601 | } | 645 | } |
602 | 646 | ||
603 | static int __kprobes __register_kprobe(struct kprobe *p, | 647 | int __kprobes register_kprobe(struct kprobe *p) |
604 | unsigned long called_from) | ||
605 | { | 648 | { |
606 | int ret = 0; | 649 | int ret = 0; |
607 | struct kprobe *old_p; | 650 | struct kprobe *old_p; |
@@ -620,28 +663,30 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
620 | return -EINVAL; | 663 | return -EINVAL; |
621 | } | 664 | } |
622 | 665 | ||
623 | p->mod_refcounted = 0; | 666 | p->flags = 0; |
624 | |||
625 | /* | 667 | /* |
626 | * Check if are we probing a module. | 668 | * Check if are we probing a module. |
627 | */ | 669 | */ |
628 | probed_mod = __module_text_address((unsigned long) p->addr); | 670 | probed_mod = __module_text_address((unsigned long) p->addr); |
629 | if (probed_mod) { | 671 | if (probed_mod) { |
630 | struct module *calling_mod; | ||
631 | calling_mod = __module_text_address(called_from); | ||
632 | /* | 672 | /* |
633 | * We must allow modules to probe themself and in this case | 673 | * We must hold a refcount of the probed module while updating |
634 | * avoid incrementing the module refcount, so as to allow | 674 | * its code to prohibit unexpected unloading. |
635 | * unloading of self probing modules. | ||
636 | */ | 675 | */ |
637 | if (calling_mod && calling_mod != probed_mod) { | 676 | if (unlikely(!try_module_get(probed_mod))) { |
638 | if (unlikely(!try_module_get(probed_mod))) { | 677 | preempt_enable(); |
639 | preempt_enable(); | 678 | return -EINVAL; |
640 | return -EINVAL; | 679 | } |
641 | } | 680 | /* |
642 | p->mod_refcounted = 1; | 681 | * If the module freed .init.text, we couldn't insert |
643 | } else | 682 | * kprobes in there. |
644 | probed_mod = NULL; | 683 | */ |
684 | if (within_module_init((unsigned long)p->addr, probed_mod) && | ||
685 | probed_mod->state != MODULE_STATE_COMING) { | ||
686 | module_put(probed_mod); | ||
687 | preempt_enable(); | ||
688 | return -EINVAL; | ||
689 | } | ||
645 | } | 690 | } |
646 | preempt_enable(); | 691 | preempt_enable(); |
647 | 692 | ||
@@ -668,8 +713,9 @@ static int __kprobes __register_kprobe(struct kprobe *p, | |||
668 | out: | 713 | out: |
669 | mutex_unlock(&kprobe_mutex); | 714 | mutex_unlock(&kprobe_mutex); |
670 | 715 | ||
671 | if (ret && probed_mod) | 716 | if (probed_mod) |
672 | module_put(probed_mod); | 717 | module_put(probed_mod); |
718 | |||
673 | return ret; | 719 | return ret; |
674 | } | 720 | } |
675 | 721 | ||
@@ -697,16 +743,16 @@ valid_p: | |||
697 | list_is_singular(&old_p->list))) { | 743 | list_is_singular(&old_p->list))) { |
698 | /* | 744 | /* |
699 | * Only probe on the hash list. Disarm only if kprobes are | 745 | * Only probe on the hash list. Disarm only if kprobes are |
700 | * enabled - otherwise, the breakpoint would already have | 746 | * enabled and not gone - otherwise, the breakpoint would |
701 | * been removed. We save on flushing icache. | 747 | * already have been removed. We save on flushing icache. |
702 | */ | 748 | */ |
703 | if (kprobe_enabled) | 749 | if (kprobe_enabled && !kprobe_gone(old_p)) |
704 | arch_disarm_kprobe(p); | 750 | arch_disarm_kprobe(p); |
705 | hlist_del_rcu(&old_p->hlist); | 751 | hlist_del_rcu(&old_p->hlist); |
706 | } else { | 752 | } else { |
707 | if (p->break_handler) | 753 | if (p->break_handler && !kprobe_gone(p)) |
708 | old_p->break_handler = NULL; | 754 | old_p->break_handler = NULL; |
709 | if (p->post_handler) { | 755 | if (p->post_handler && !kprobe_gone(p)) { |
710 | list_for_each_entry_rcu(list_p, &old_p->list, list) { | 756 | list_for_each_entry_rcu(list_p, &old_p->list, list) { |
711 | if ((list_p != p) && (list_p->post_handler)) | 757 | if ((list_p != p) && (list_p->post_handler)) |
712 | goto noclean; | 758 | goto noclean; |
@@ -721,39 +767,27 @@ noclean: | |||
721 | 767 | ||
722 | static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) | 768 | static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) |
723 | { | 769 | { |
724 | struct module *mod; | ||
725 | struct kprobe *old_p; | 770 | struct kprobe *old_p; |
726 | 771 | ||
727 | if (p->mod_refcounted) { | 772 | if (list_empty(&p->list)) |
728 | /* | ||
729 | * Since we've already incremented refcount, | ||
730 | * we don't need to disable preemption. | ||
731 | */ | ||
732 | mod = module_text_address((unsigned long)p->addr); | ||
733 | if (mod) | ||
734 | module_put(mod); | ||
735 | } | ||
736 | |||
737 | if (list_empty(&p->list) || list_is_singular(&p->list)) { | ||
738 | if (!list_empty(&p->list)) { | ||
739 | /* "p" is the last child of an aggr_kprobe */ | ||
740 | old_p = list_entry(p->list.next, struct kprobe, list); | ||
741 | list_del(&p->list); | ||
742 | kfree(old_p); | ||
743 | } | ||
744 | arch_remove_kprobe(p); | 773 | arch_remove_kprobe(p); |
774 | else if (list_is_singular(&p->list)) { | ||
775 | /* "p" is the last child of an aggr_kprobe */ | ||
776 | old_p = list_entry(p->list.next, struct kprobe, list); | ||
777 | list_del(&p->list); | ||
778 | arch_remove_kprobe(old_p); | ||
779 | kfree(old_p); | ||
745 | } | 780 | } |
746 | } | 781 | } |
747 | 782 | ||
748 | static int __register_kprobes(struct kprobe **kps, int num, | 783 | int __kprobes register_kprobes(struct kprobe **kps, int num) |
749 | unsigned long called_from) | ||
750 | { | 784 | { |
751 | int i, ret = 0; | 785 | int i, ret = 0; |
752 | 786 | ||
753 | if (num <= 0) | 787 | if (num <= 0) |
754 | return -EINVAL; | 788 | return -EINVAL; |
755 | for (i = 0; i < num; i++) { | 789 | for (i = 0; i < num; i++) { |
756 | ret = __register_kprobe(kps[i], called_from); | 790 | ret = register_kprobe(kps[i]); |
757 | if (ret < 0) { | 791 | if (ret < 0) { |
758 | if (i > 0) | 792 | if (i > 0) |
759 | unregister_kprobes(kps, i); | 793 | unregister_kprobes(kps, i); |
@@ -763,26 +797,11 @@ static int __register_kprobes(struct kprobe **kps, int num, | |||
763 | return ret; | 797 | return ret; |
764 | } | 798 | } |
765 | 799 | ||
766 | /* | ||
767 | * Registration and unregistration functions for kprobe. | ||
768 | */ | ||
769 | int __kprobes register_kprobe(struct kprobe *p) | ||
770 | { | ||
771 | return __register_kprobes(&p, 1, | ||
772 | (unsigned long)__builtin_return_address(0)); | ||
773 | } | ||
774 | |||
775 | void __kprobes unregister_kprobe(struct kprobe *p) | 800 | void __kprobes unregister_kprobe(struct kprobe *p) |
776 | { | 801 | { |
777 | unregister_kprobes(&p, 1); | 802 | unregister_kprobes(&p, 1); |
778 | } | 803 | } |
779 | 804 | ||
780 | int __kprobes register_kprobes(struct kprobe **kps, int num) | ||
781 | { | ||
782 | return __register_kprobes(kps, num, | ||
783 | (unsigned long)__builtin_return_address(0)); | ||
784 | } | ||
785 | |||
786 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) | 805 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) |
787 | { | 806 | { |
788 | int i; | 807 | int i; |
@@ -811,8 +830,7 @@ unsigned long __weak arch_deref_entry_point(void *entry) | |||
811 | return (unsigned long)entry; | 830 | return (unsigned long)entry; |
812 | } | 831 | } |
813 | 832 | ||
814 | static int __register_jprobes(struct jprobe **jps, int num, | 833 | int __kprobes register_jprobes(struct jprobe **jps, int num) |
815 | unsigned long called_from) | ||
816 | { | 834 | { |
817 | struct jprobe *jp; | 835 | struct jprobe *jp; |
818 | int ret = 0, i; | 836 | int ret = 0, i; |
@@ -830,7 +848,7 @@ static int __register_jprobes(struct jprobe **jps, int num, | |||
830 | /* Todo: Verify probepoint is a function entry point */ | 848 | /* Todo: Verify probepoint is a function entry point */ |
831 | jp->kp.pre_handler = setjmp_pre_handler; | 849 | jp->kp.pre_handler = setjmp_pre_handler; |
832 | jp->kp.break_handler = longjmp_break_handler; | 850 | jp->kp.break_handler = longjmp_break_handler; |
833 | ret = __register_kprobe(&jp->kp, called_from); | 851 | ret = register_kprobe(&jp->kp); |
834 | } | 852 | } |
835 | if (ret < 0) { | 853 | if (ret < 0) { |
836 | if (i > 0) | 854 | if (i > 0) |
@@ -843,8 +861,7 @@ static int __register_jprobes(struct jprobe **jps, int num, | |||
843 | 861 | ||
844 | int __kprobes register_jprobe(struct jprobe *jp) | 862 | int __kprobes register_jprobe(struct jprobe *jp) |
845 | { | 863 | { |
846 | return __register_jprobes(&jp, 1, | 864 | return register_jprobes(&jp, 1); |
847 | (unsigned long)__builtin_return_address(0)); | ||
848 | } | 865 | } |
849 | 866 | ||
850 | void __kprobes unregister_jprobe(struct jprobe *jp) | 867 | void __kprobes unregister_jprobe(struct jprobe *jp) |
@@ -852,12 +869,6 @@ void __kprobes unregister_jprobe(struct jprobe *jp) | |||
852 | unregister_jprobes(&jp, 1); | 869 | unregister_jprobes(&jp, 1); |
853 | } | 870 | } |
854 | 871 | ||
855 | int __kprobes register_jprobes(struct jprobe **jps, int num) | ||
856 | { | ||
857 | return __register_jprobes(jps, num, | ||
858 | (unsigned long)__builtin_return_address(0)); | ||
859 | } | ||
860 | |||
861 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) | 872 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) |
862 | { | 873 | { |
863 | int i; | 874 | int i; |
@@ -920,8 +931,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
920 | return 0; | 931 | return 0; |
921 | } | 932 | } |
922 | 933 | ||
923 | static int __kprobes __register_kretprobe(struct kretprobe *rp, | 934 | int __kprobes register_kretprobe(struct kretprobe *rp) |
924 | unsigned long called_from) | ||
925 | { | 935 | { |
926 | int ret = 0; | 936 | int ret = 0; |
927 | struct kretprobe_instance *inst; | 937 | struct kretprobe_instance *inst; |
@@ -967,21 +977,20 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp, | |||
967 | 977 | ||
968 | rp->nmissed = 0; | 978 | rp->nmissed = 0; |
969 | /* Establish function entry probe point */ | 979 | /* Establish function entry probe point */ |
970 | ret = __register_kprobe(&rp->kp, called_from); | 980 | ret = register_kprobe(&rp->kp); |
971 | if (ret != 0) | 981 | if (ret != 0) |
972 | free_rp_inst(rp); | 982 | free_rp_inst(rp); |
973 | return ret; | 983 | return ret; |
974 | } | 984 | } |
975 | 985 | ||
976 | static int __register_kretprobes(struct kretprobe **rps, int num, | 986 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
977 | unsigned long called_from) | ||
978 | { | 987 | { |
979 | int ret = 0, i; | 988 | int ret = 0, i; |
980 | 989 | ||
981 | if (num <= 0) | 990 | if (num <= 0) |
982 | return -EINVAL; | 991 | return -EINVAL; |
983 | for (i = 0; i < num; i++) { | 992 | for (i = 0; i < num; i++) { |
984 | ret = __register_kretprobe(rps[i], called_from); | 993 | ret = register_kretprobe(rps[i]); |
985 | if (ret < 0) { | 994 | if (ret < 0) { |
986 | if (i > 0) | 995 | if (i > 0) |
987 | unregister_kretprobes(rps, i); | 996 | unregister_kretprobes(rps, i); |
@@ -991,23 +1000,11 @@ static int __register_kretprobes(struct kretprobe **rps, int num, | |||
991 | return ret; | 1000 | return ret; |
992 | } | 1001 | } |
993 | 1002 | ||
994 | int __kprobes register_kretprobe(struct kretprobe *rp) | ||
995 | { | ||
996 | return __register_kretprobes(&rp, 1, | ||
997 | (unsigned long)__builtin_return_address(0)); | ||
998 | } | ||
999 | |||
1000 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 1003 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
1001 | { | 1004 | { |
1002 | unregister_kretprobes(&rp, 1); | 1005 | unregister_kretprobes(&rp, 1); |
1003 | } | 1006 | } |
1004 | 1007 | ||
1005 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | ||
1006 | { | ||
1007 | return __register_kretprobes(rps, num, | ||
1008 | (unsigned long)__builtin_return_address(0)); | ||
1009 | } | ||
1010 | |||
1011 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | 1008 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
1012 | { | 1009 | { |
1013 | int i; | 1010 | int i; |
@@ -1055,6 +1052,72 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
1055 | 1052 | ||
1056 | #endif /* CONFIG_KRETPROBES */ | 1053 | #endif /* CONFIG_KRETPROBES */ |
1057 | 1054 | ||
1055 | /* Set the kprobe gone and remove its instruction buffer. */ | ||
1056 | static void __kprobes kill_kprobe(struct kprobe *p) | ||
1057 | { | ||
1058 | struct kprobe *kp; | ||
1059 | p->flags |= KPROBE_FLAG_GONE; | ||
1060 | if (p->pre_handler == aggr_pre_handler) { | ||
1061 | /* | ||
1062 | * If this is an aggr_kprobe, we have to list all the | ||
1063 | * chained probes and mark them GONE. | ||
1064 | */ | ||
1065 | list_for_each_entry_rcu(kp, &p->list, list) | ||
1066 | kp->flags |= KPROBE_FLAG_GONE; | ||
1067 | p->post_handler = NULL; | ||
1068 | p->break_handler = NULL; | ||
1069 | } | ||
1070 | /* | ||
1071 | * Here, we can remove insn_slot safely, because no thread calls | ||
1072 | * the original probed function (which will be freed soon) any more. | ||
1073 | */ | ||
1074 | arch_remove_kprobe(p); | ||
1075 | } | ||
1076 | |||
1077 | /* Module notifier call back, checking kprobes on the module */ | ||
1078 | static int __kprobes kprobes_module_callback(struct notifier_block *nb, | ||
1079 | unsigned long val, void *data) | ||
1080 | { | ||
1081 | struct module *mod = data; | ||
1082 | struct hlist_head *head; | ||
1083 | struct hlist_node *node; | ||
1084 | struct kprobe *p; | ||
1085 | unsigned int i; | ||
1086 | int checkcore = (val == MODULE_STATE_GOING); | ||
1087 | |||
1088 | if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE) | ||
1089 | return NOTIFY_DONE; | ||
1090 | |||
1091 | /* | ||
1092 | * When MODULE_STATE_GOING was notified, both of module .text and | ||
1093 | * .init.text sections would be freed. When MODULE_STATE_LIVE was | ||
1094 | * notified, only .init.text section would be freed. We need to | ||
1095 | * disable kprobes which have been inserted in the sections. | ||
1096 | */ | ||
1097 | mutex_lock(&kprobe_mutex); | ||
1098 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | ||
1099 | head = &kprobe_table[i]; | ||
1100 | hlist_for_each_entry_rcu(p, node, head, hlist) | ||
1101 | if (within_module_init((unsigned long)p->addr, mod) || | ||
1102 | (checkcore && | ||
1103 | within_module_core((unsigned long)p->addr, mod))) { | ||
1104 | /* | ||
1105 | * The vaddr this probe is installed will soon | ||
1106 | * be vfreed buy not synced to disk. Hence, | ||
1107 | * disarming the breakpoint isn't needed. | ||
1108 | */ | ||
1109 | kill_kprobe(p); | ||
1110 | } | ||
1111 | } | ||
1112 | mutex_unlock(&kprobe_mutex); | ||
1113 | return NOTIFY_DONE; | ||
1114 | } | ||
1115 | |||
1116 | static struct notifier_block kprobe_module_nb = { | ||
1117 | .notifier_call = kprobes_module_callback, | ||
1118 | .priority = 0 | ||
1119 | }; | ||
1120 | |||
1058 | static int __init init_kprobes(void) | 1121 | static int __init init_kprobes(void) |
1059 | { | 1122 | { |
1060 | int i, err = 0; | 1123 | int i, err = 0; |
@@ -1111,6 +1174,9 @@ static int __init init_kprobes(void) | |||
1111 | err = arch_init_kprobes(); | 1174 | err = arch_init_kprobes(); |
1112 | if (!err) | 1175 | if (!err) |
1113 | err = register_die_notifier(&kprobe_exceptions_nb); | 1176 | err = register_die_notifier(&kprobe_exceptions_nb); |
1177 | if (!err) | ||
1178 | err = register_module_notifier(&kprobe_module_nb); | ||
1179 | |||
1114 | kprobes_initialized = (err == 0); | 1180 | kprobes_initialized = (err == 0); |
1115 | 1181 | ||
1116 | if (!err) | 1182 | if (!err) |
@@ -1131,10 +1197,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, | |||
1131 | else | 1197 | else |
1132 | kprobe_type = "k"; | 1198 | kprobe_type = "k"; |
1133 | if (sym) | 1199 | if (sym) |
1134 | seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type, | 1200 | seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, |
1135 | sym, offset, (modname ? modname : " ")); | 1201 | sym, offset, (modname ? modname : " "), |
1202 | (kprobe_gone(p) ? "[GONE]" : "")); | ||
1136 | else | 1203 | else |
1137 | seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr); | 1204 | seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, |
1205 | (kprobe_gone(p) ? "[GONE]" : "")); | ||
1138 | } | 1206 | } |
1139 | 1207 | ||
1140 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) | 1208 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) |
@@ -1215,7 +1283,8 @@ static void __kprobes enable_all_kprobes(void) | |||
1215 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1283 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
1216 | head = &kprobe_table[i]; | 1284 | head = &kprobe_table[i]; |
1217 | hlist_for_each_entry_rcu(p, node, head, hlist) | 1285 | hlist_for_each_entry_rcu(p, node, head, hlist) |
1218 | arch_arm_kprobe(p); | 1286 | if (!kprobe_gone(p)) |
1287 | arch_arm_kprobe(p); | ||
1219 | } | 1288 | } |
1220 | 1289 | ||
1221 | kprobe_enabled = true; | 1290 | kprobe_enabled = true; |
@@ -1244,7 +1313,7 @@ static void __kprobes disable_all_kprobes(void) | |||
1244 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1313 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
1245 | head = &kprobe_table[i]; | 1314 | head = &kprobe_table[i]; |
1246 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 1315 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
1247 | if (!arch_trampoline_kprobe(p)) | 1316 | if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) |
1248 | arch_disarm_kprobe(p); | 1317 | arch_disarm_kprobe(p); |
1249 | } | 1318 | } |
1250 | } | 1319 | } |
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 08dd8ed86c77..528dd78e7e7e 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -24,7 +24,7 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) | |||
24 | static struct kobj_attribute _name##_attr = \ | 24 | static struct kobj_attribute _name##_attr = \ |
25 | __ATTR(_name, 0644, _name##_show, _name##_store) | 25 | __ATTR(_name, 0644, _name##_show, _name##_store) |
26 | 26 | ||
27 | #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) | 27 | #if defined(CONFIG_HOTPLUG) |
28 | /* current uevent sequence number */ | 28 | /* current uevent sequence number */ |
29 | static ssize_t uevent_seqnum_show(struct kobject *kobj, | 29 | static ssize_t uevent_seqnum_show(struct kobject *kobj, |
30 | struct kobj_attribute *attr, char *buf) | 30 | struct kobj_attribute *attr, char *buf) |
@@ -137,7 +137,7 @@ struct kobject *kernel_kobj; | |||
137 | EXPORT_SYMBOL_GPL(kernel_kobj); | 137 | EXPORT_SYMBOL_GPL(kernel_kobj); |
138 | 138 | ||
139 | static struct attribute * kernel_attrs[] = { | 139 | static struct attribute * kernel_attrs[] = { |
140 | #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) | 140 | #if defined(CONFIG_HOTPLUG) |
141 | &uevent_seqnum_attr.attr, | 141 | &uevent_seqnum_attr.attr, |
142 | &uevent_helper_attr.attr, | 142 | &uevent_helper_attr.attr, |
143 | #endif | 143 | #endif |
diff --git a/kernel/module.c b/kernel/module.c index f47cce910f25..c9332c90d5a0 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include <linux/device.h> | 43 | #include <linux/device.h> |
44 | #include <linux/string.h> | 44 | #include <linux/string.h> |
45 | #include <linux/mutex.h> | 45 | #include <linux/mutex.h> |
46 | #include <linux/unwind.h> | ||
47 | #include <linux/rculist.h> | 46 | #include <linux/rculist.h> |
48 | #include <asm/uaccess.h> | 47 | #include <asm/uaccess.h> |
49 | #include <asm/cacheflush.h> | 48 | #include <asm/cacheflush.h> |
@@ -51,6 +50,7 @@ | |||
51 | #include <asm/sections.h> | 50 | #include <asm/sections.h> |
52 | #include <linux/tracepoint.h> | 51 | #include <linux/tracepoint.h> |
53 | #include <linux/ftrace.h> | 52 | #include <linux/ftrace.h> |
53 | #include <linux/async.h> | ||
54 | 54 | ||
55 | #if 0 | 55 | #if 0 |
56 | #define DEBUGP printk | 56 | #define DEBUGP printk |
@@ -817,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) | |||
817 | mod->exit(); | 817 | mod->exit(); |
818 | blocking_notifier_call_chain(&module_notify_list, | 818 | blocking_notifier_call_chain(&module_notify_list, |
819 | MODULE_STATE_GOING, mod); | 819 | MODULE_STATE_GOING, mod); |
820 | async_synchronize_full(); | ||
820 | mutex_lock(&module_mutex); | 821 | mutex_lock(&module_mutex); |
821 | /* Store the name of the last unloaded module for diagnostic purposes */ | 822 | /* Store the name of the last unloaded module for diagnostic purposes */ |
822 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); | 823 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); |
@@ -1449,8 +1450,6 @@ static void free_module(struct module *mod) | |||
1449 | remove_sect_attrs(mod); | 1450 | remove_sect_attrs(mod); |
1450 | mod_kobject_remove(mod); | 1451 | mod_kobject_remove(mod); |
1451 | 1452 | ||
1452 | unwind_remove_table(mod->unwind_info, 0); | ||
1453 | |||
1454 | /* Arch-specific cleanup. */ | 1453 | /* Arch-specific cleanup. */ |
1455 | module_arch_cleanup(mod); | 1454 | module_arch_cleanup(mod); |
1456 | 1455 | ||
@@ -1867,7 +1866,6 @@ static noinline struct module *load_module(void __user *umod, | |||
1867 | unsigned int symindex = 0; | 1866 | unsigned int symindex = 0; |
1868 | unsigned int strindex = 0; | 1867 | unsigned int strindex = 0; |
1869 | unsigned int modindex, versindex, infoindex, pcpuindex; | 1868 | unsigned int modindex, versindex, infoindex, pcpuindex; |
1870 | unsigned int unwindex = 0; | ||
1871 | unsigned int num_kp, num_mcount; | 1869 | unsigned int num_kp, num_mcount; |
1872 | struct kernel_param *kp; | 1870 | struct kernel_param *kp; |
1873 | struct module *mod; | 1871 | struct module *mod; |
@@ -1957,9 +1955,6 @@ static noinline struct module *load_module(void __user *umod, | |||
1957 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); | 1955 | versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); |
1958 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); | 1956 | infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); |
1959 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); | 1957 | pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); |
1960 | #ifdef ARCH_UNWIND_SECTION_NAME | ||
1961 | unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME); | ||
1962 | #endif | ||
1963 | 1958 | ||
1964 | /* Don't keep modinfo and version sections. */ | 1959 | /* Don't keep modinfo and version sections. */ |
1965 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1960 | sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; |
@@ -1969,8 +1964,6 @@ static noinline struct module *load_module(void __user *umod, | |||
1969 | sechdrs[symindex].sh_flags |= SHF_ALLOC; | 1964 | sechdrs[symindex].sh_flags |= SHF_ALLOC; |
1970 | sechdrs[strindex].sh_flags |= SHF_ALLOC; | 1965 | sechdrs[strindex].sh_flags |= SHF_ALLOC; |
1971 | #endif | 1966 | #endif |
1972 | if (unwindex) | ||
1973 | sechdrs[unwindex].sh_flags |= SHF_ALLOC; | ||
1974 | 1967 | ||
1975 | /* Check module struct version now, before we try to use module. */ | 1968 | /* Check module struct version now, before we try to use module. */ |
1976 | if (!check_modstruct_version(sechdrs, versindex, mod)) { | 1969 | if (!check_modstruct_version(sechdrs, versindex, mod)) { |
@@ -2267,11 +2260,6 @@ static noinline struct module *load_module(void __user *umod, | |||
2267 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 2260 | add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
2268 | add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); | 2261 | add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs); |
2269 | 2262 | ||
2270 | /* Size of section 0 is 0, so this works well if no unwind info. */ | ||
2271 | mod->unwind_info = unwind_add_table(mod, | ||
2272 | (void *)sechdrs[unwindex].sh_addr, | ||
2273 | sechdrs[unwindex].sh_size); | ||
2274 | |||
2275 | /* Get rid of temporary copy */ | 2263 | /* Get rid of temporary copy */ |
2276 | vfree(hdr); | 2264 | vfree(hdr); |
2277 | 2265 | ||
@@ -2366,11 +2354,12 @@ sys_init_module(void __user *umod, | |||
2366 | /* Now it's a first class citizen! Wake up anyone waiting for it. */ | 2354 | /* Now it's a first class citizen! Wake up anyone waiting for it. */ |
2367 | mod->state = MODULE_STATE_LIVE; | 2355 | mod->state = MODULE_STATE_LIVE; |
2368 | wake_up(&module_wq); | 2356 | wake_up(&module_wq); |
2357 | blocking_notifier_call_chain(&module_notify_list, | ||
2358 | MODULE_STATE_LIVE, mod); | ||
2369 | 2359 | ||
2370 | mutex_lock(&module_mutex); | 2360 | mutex_lock(&module_mutex); |
2371 | /* Drop initial reference. */ | 2361 | /* Drop initial reference. */ |
2372 | module_put(mod); | 2362 | module_put(mod); |
2373 | unwind_remove_table(mod->unwind_info, 1); | ||
2374 | module_free(mod, mod->module_init); | 2363 | module_free(mod, mod->module_init); |
2375 | mod->module_init = NULL; | 2364 | mod->module_init = NULL; |
2376 | mod->init_size = 0; | 2365 | mod->init_size = 0; |
@@ -2405,7 +2394,7 @@ static const char *get_ksymbol(struct module *mod, | |||
2405 | unsigned long nextval; | 2394 | unsigned long nextval; |
2406 | 2395 | ||
2407 | /* At worse, next value is at end of module */ | 2396 | /* At worse, next value is at end of module */ |
2408 | if (within(addr, mod->module_init, mod->init_size)) | 2397 | if (within_module_init(addr, mod)) |
2409 | nextval = (unsigned long)mod->module_init+mod->init_text_size; | 2398 | nextval = (unsigned long)mod->module_init+mod->init_text_size; |
2410 | else | 2399 | else |
2411 | nextval = (unsigned long)mod->module_core+mod->core_text_size; | 2400 | nextval = (unsigned long)mod->module_core+mod->core_text_size; |
@@ -2453,8 +2442,8 @@ const char *module_address_lookup(unsigned long addr, | |||
2453 | 2442 | ||
2454 | preempt_disable(); | 2443 | preempt_disable(); |
2455 | list_for_each_entry_rcu(mod, &modules, list) { | 2444 | list_for_each_entry_rcu(mod, &modules, list) { |
2456 | if (within(addr, mod->module_init, mod->init_size) | 2445 | if (within_module_init(addr, mod) || |
2457 | || within(addr, mod->module_core, mod->core_size)) { | 2446 | within_module_core(addr, mod)) { |
2458 | if (modname) | 2447 | if (modname) |
2459 | *modname = mod->name; | 2448 | *modname = mod->name; |
2460 | ret = get_ksymbol(mod, addr, size, offset); | 2449 | ret = get_ksymbol(mod, addr, size, offset); |
@@ -2476,8 +2465,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) | |||
2476 | 2465 | ||
2477 | preempt_disable(); | 2466 | preempt_disable(); |
2478 | list_for_each_entry_rcu(mod, &modules, list) { | 2467 | list_for_each_entry_rcu(mod, &modules, list) { |
2479 | if (within(addr, mod->module_init, mod->init_size) || | 2468 | if (within_module_init(addr, mod) || |
2480 | within(addr, mod->module_core, mod->core_size)) { | 2469 | within_module_core(addr, mod)) { |
2481 | const char *sym; | 2470 | const char *sym; |
2482 | 2471 | ||
2483 | sym = get_ksymbol(mod, addr, NULL, NULL); | 2472 | sym = get_ksymbol(mod, addr, NULL, NULL); |
@@ -2500,8 +2489,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, | |||
2500 | 2489 | ||
2501 | preempt_disable(); | 2490 | preempt_disable(); |
2502 | list_for_each_entry_rcu(mod, &modules, list) { | 2491 | list_for_each_entry_rcu(mod, &modules, list) { |
2503 | if (within(addr, mod->module_init, mod->init_size) || | 2492 | if (within_module_init(addr, mod) || |
2504 | within(addr, mod->module_core, mod->core_size)) { | 2493 | within_module_core(addr, mod)) { |
2505 | const char *sym; | 2494 | const char *sym; |
2506 | 2495 | ||
2507 | sym = get_ksymbol(mod, addr, size, offset); | 2496 | sym = get_ksymbol(mod, addr, size, offset); |
@@ -2720,7 +2709,7 @@ int is_module_address(unsigned long addr) | |||
2720 | preempt_disable(); | 2709 | preempt_disable(); |
2721 | 2710 | ||
2722 | list_for_each_entry_rcu(mod, &modules, list) { | 2711 | list_for_each_entry_rcu(mod, &modules, list) { |
2723 | if (within(addr, mod->module_core, mod->core_size)) { | 2712 | if (within_module_core(addr, mod)) { |
2724 | preempt_enable(); | 2713 | preempt_enable(); |
2725 | return 1; | 2714 | return 1; |
2726 | } | 2715 | } |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 43c2111cd54d..78bc3fdac0d2 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -13,7 +13,6 @@ | |||
13 | 13 | ||
14 | struct ns_cgroup { | 14 | struct ns_cgroup { |
15 | struct cgroup_subsys_state css; | 15 | struct cgroup_subsys_state css; |
16 | spinlock_t lock; | ||
17 | }; | 16 | }; |
18 | 17 | ||
19 | struct cgroup_subsys ns_subsys; | 18 | struct cgroup_subsys ns_subsys; |
@@ -84,7 +83,6 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, | |||
84 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); | 83 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); |
85 | if (!ns_cgroup) | 84 | if (!ns_cgroup) |
86 | return ERR_PTR(-ENOMEM); | 85 | return ERR_PTR(-ENOMEM); |
87 | spin_lock_init(&ns_cgroup->lock); | ||
88 | return &ns_cgroup->css; | 86 | return &ns_cgroup->css; |
89 | } | 87 | } |
90 | 88 | ||
diff --git a/kernel/panic.c b/kernel/panic.c index 13f06349a786..2a2ff36ff44d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -299,6 +299,8 @@ static int init_oops_id(void) | |||
299 | { | 299 | { |
300 | if (!oops_id) | 300 | if (!oops_id) |
301 | get_random_bytes(&oops_id, sizeof(oops_id)); | 301 | get_random_bytes(&oops_id, sizeof(oops_id)); |
302 | else | ||
303 | oops_id++; | ||
302 | 304 | ||
303 | return 0; | 305 | return 0; |
304 | } | 306 | } |
diff --git a/kernel/pid.c b/kernel/pid.c index 064e76afa507..1b3586fe753a 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -474,8 +474,14 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) | |||
474 | } | 474 | } |
475 | EXPORT_SYMBOL(task_session_nr_ns); | 475 | EXPORT_SYMBOL(task_session_nr_ns); |
476 | 476 | ||
477 | struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) | ||
478 | { | ||
479 | return ns_of_pid(task_pid(tsk)); | ||
480 | } | ||
481 | EXPORT_SYMBOL_GPL(task_active_pid_ns); | ||
482 | |||
477 | /* | 483 | /* |
478 | * Used by proc to find the first pid that is greater then or equal to nr. | 484 | * Used by proc to find the first pid that is greater than or equal to nr. |
479 | * | 485 | * |
480 | * If there is a pid at nr this function is exactly the same as find_pid_ns. | 486 | * If there is a pid at nr this function is exactly the same as find_pid_ns. |
481 | */ | 487 | */ |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f77d3819ef57..45e8541ab7e3 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -258,12 +258,12 @@ int hibernation_snapshot(int platform_mode) | |||
258 | { | 258 | { |
259 | int error; | 259 | int error; |
260 | 260 | ||
261 | /* Free memory before shutting down devices. */ | 261 | error = platform_begin(platform_mode); |
262 | error = swsusp_shrink_memory(); | ||
263 | if (error) | 262 | if (error) |
264 | return error; | 263 | return error; |
265 | 264 | ||
266 | error = platform_begin(platform_mode); | 265 | /* Free memory before shutting down devices. */ |
266 | error = swsusp_shrink_memory(); | ||
267 | if (error) | 267 | if (error) |
268 | goto Close; | 268 | goto Close; |
269 | 269 | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 613f16941b85..239988873971 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -615,7 +615,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) | |||
615 | /* this may fail if the RTC hasn't been initialized */ | 615 | /* this may fail if the RTC hasn't been initialized */ |
616 | status = rtc_read_time(rtc, &alm.time); | 616 | status = rtc_read_time(rtc, &alm.time); |
617 | if (status < 0) { | 617 | if (status < 0) { |
618 | printk(err_readtime, rtc->dev.bus_id, status); | 618 | printk(err_readtime, dev_name(&rtc->dev), status); |
619 | return; | 619 | return; |
620 | } | 620 | } |
621 | rtc_tm_to_time(&alm.time, &now); | 621 | rtc_tm_to_time(&alm.time, &now); |
@@ -626,7 +626,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) | |||
626 | 626 | ||
627 | status = rtc_set_alarm(rtc, &alm); | 627 | status = rtc_set_alarm(rtc, &alm); |
628 | if (status < 0) { | 628 | if (status < 0) { |
629 | printk(err_wakealarm, rtc->dev.bus_id, status); | 629 | printk(err_wakealarm, dev_name(&rtc->dev), status); |
630 | return; | 630 | return; |
631 | } | 631 | } |
632 | 632 | ||
@@ -660,7 +660,7 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr) | |||
660 | if (!device_may_wakeup(candidate->dev.parent)) | 660 | if (!device_may_wakeup(candidate->dev.parent)) |
661 | return 0; | 661 | return 0; |
662 | 662 | ||
663 | *(char **)name_ptr = dev->bus_id; | 663 | *(const char **)name_ptr = dev_name(dev); |
664 | return 1; | 664 | return 1; |
665 | } | 665 | } |
666 | 666 | ||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5d2ab836e998..f5fc2d7680f2 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/syscalls.h> | 25 | #include <linux/syscalls.h> |
26 | #include <linux/console.h> | 26 | #include <linux/console.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/list.h> | ||
28 | 29 | ||
29 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
30 | #include <asm/mmu_context.h> | 31 | #include <asm/mmu_context.h> |
@@ -192,12 +193,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) | |||
192 | return ret; | 193 | return ret; |
193 | } | 194 | } |
194 | 195 | ||
195 | static void chain_free(struct chain_allocator *ca, int clear_page_nosave) | ||
196 | { | ||
197 | free_list_of_pages(ca->chain, clear_page_nosave); | ||
198 | memset(ca, 0, sizeof(struct chain_allocator)); | ||
199 | } | ||
200 | |||
201 | /** | 196 | /** |
202 | * Data types related to memory bitmaps. | 197 | * Data types related to memory bitmaps. |
203 | * | 198 | * |
@@ -233,7 +228,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave) | |||
233 | #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) | 228 | #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) |
234 | 229 | ||
235 | struct bm_block { | 230 | struct bm_block { |
236 | struct bm_block *next; /* next element of the list */ | 231 | struct list_head hook; /* hook into a list of bitmap blocks */ |
237 | unsigned long start_pfn; /* pfn represented by the first bit */ | 232 | unsigned long start_pfn; /* pfn represented by the first bit */ |
238 | unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | 233 | unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ |
239 | unsigned long *data; /* bitmap representing pages */ | 234 | unsigned long *data; /* bitmap representing pages */ |
@@ -244,24 +239,15 @@ static inline unsigned long bm_block_bits(struct bm_block *bb) | |||
244 | return bb->end_pfn - bb->start_pfn; | 239 | return bb->end_pfn - bb->start_pfn; |
245 | } | 240 | } |
246 | 241 | ||
247 | struct zone_bitmap { | ||
248 | struct zone_bitmap *next; /* next element of the list */ | ||
249 | unsigned long start_pfn; /* minimal pfn in this zone */ | ||
250 | unsigned long end_pfn; /* maximal pfn in this zone plus 1 */ | ||
251 | struct bm_block *bm_blocks; /* list of bitmap blocks */ | ||
252 | struct bm_block *cur_block; /* recently used bitmap block */ | ||
253 | }; | ||
254 | |||
255 | /* strcut bm_position is used for browsing memory bitmaps */ | 242 | /* strcut bm_position is used for browsing memory bitmaps */ |
256 | 243 | ||
257 | struct bm_position { | 244 | struct bm_position { |
258 | struct zone_bitmap *zone_bm; | ||
259 | struct bm_block *block; | 245 | struct bm_block *block; |
260 | int bit; | 246 | int bit; |
261 | }; | 247 | }; |
262 | 248 | ||
263 | struct memory_bitmap { | 249 | struct memory_bitmap { |
264 | struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */ | 250 | struct list_head blocks; /* list of bitmap blocks */ |
265 | struct linked_page *p_list; /* list of pages used to store zone | 251 | struct linked_page *p_list; /* list of pages used to store zone |
266 | * bitmap objects and bitmap block | 252 | * bitmap objects and bitmap block |
267 | * objects | 253 | * objects |
@@ -273,11 +259,7 @@ struct memory_bitmap { | |||
273 | 259 | ||
274 | static void memory_bm_position_reset(struct memory_bitmap *bm) | 260 | static void memory_bm_position_reset(struct memory_bitmap *bm) |
275 | { | 261 | { |
276 | struct zone_bitmap *zone_bm; | 262 | bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); |
277 | |||
278 | zone_bm = bm->zone_bm_list; | ||
279 | bm->cur.zone_bm = zone_bm; | ||
280 | bm->cur.block = zone_bm->bm_blocks; | ||
281 | bm->cur.bit = 0; | 263 | bm->cur.bit = 0; |
282 | } | 264 | } |
283 | 265 | ||
@@ -285,151 +267,184 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |||
285 | 267 | ||
286 | /** | 268 | /** |
287 | * create_bm_block_list - create a list of block bitmap objects | 269 | * create_bm_block_list - create a list of block bitmap objects |
270 | * @nr_blocks - number of blocks to allocate | ||
271 | * @list - list to put the allocated blocks into | ||
272 | * @ca - chain allocator to be used for allocating memory | ||
288 | */ | 273 | */ |
289 | 274 | static int create_bm_block_list(unsigned long pages, | |
290 | static inline struct bm_block * | 275 | struct list_head *list, |
291 | create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca) | 276 | struct chain_allocator *ca) |
292 | { | 277 | { |
293 | struct bm_block *bblist = NULL; | 278 | unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); |
294 | 279 | ||
295 | while (nr_blocks-- > 0) { | 280 | while (nr_blocks-- > 0) { |
296 | struct bm_block *bb; | 281 | struct bm_block *bb; |
297 | 282 | ||
298 | bb = chain_alloc(ca, sizeof(struct bm_block)); | 283 | bb = chain_alloc(ca, sizeof(struct bm_block)); |
299 | if (!bb) | 284 | if (!bb) |
300 | return NULL; | 285 | return -ENOMEM; |
301 | 286 | list_add(&bb->hook, list); | |
302 | bb->next = bblist; | ||
303 | bblist = bb; | ||
304 | } | 287 | } |
305 | return bblist; | 288 | |
289 | return 0; | ||
306 | } | 290 | } |
307 | 291 | ||
292 | struct mem_extent { | ||
293 | struct list_head hook; | ||
294 | unsigned long start; | ||
295 | unsigned long end; | ||
296 | }; | ||
297 | |||
308 | /** | 298 | /** |
309 | * create_zone_bm_list - create a list of zone bitmap objects | 299 | * free_mem_extents - free a list of memory extents |
300 | * @list - list of extents to empty | ||
310 | */ | 301 | */ |
302 | static void free_mem_extents(struct list_head *list) | ||
303 | { | ||
304 | struct mem_extent *ext, *aux; | ||
311 | 305 | ||
312 | static inline struct zone_bitmap * | 306 | list_for_each_entry_safe(ext, aux, list, hook) { |
313 | create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca) | 307 | list_del(&ext->hook); |
308 | kfree(ext); | ||
309 | } | ||
310 | } | ||
311 | |||
312 | /** | ||
313 | * create_mem_extents - create a list of memory extents representing | ||
314 | * contiguous ranges of PFNs | ||
315 | * @list - list to put the extents into | ||
316 | * @gfp_mask - mask to use for memory allocations | ||
317 | */ | ||
318 | static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) | ||
314 | { | 319 | { |
315 | struct zone_bitmap *zbmlist = NULL; | 320 | struct zone *zone; |
316 | 321 | ||
317 | while (nr_zones-- > 0) { | 322 | INIT_LIST_HEAD(list); |
318 | struct zone_bitmap *zbm; | ||
319 | 323 | ||
320 | zbm = chain_alloc(ca, sizeof(struct zone_bitmap)); | 324 | for_each_zone(zone) { |
321 | if (!zbm) | 325 | unsigned long zone_start, zone_end; |
322 | return NULL; | 326 | struct mem_extent *ext, *cur, *aux; |
327 | |||
328 | if (!populated_zone(zone)) | ||
329 | continue; | ||
323 | 330 | ||
324 | zbm->next = zbmlist; | 331 | zone_start = zone->zone_start_pfn; |
325 | zbmlist = zbm; | 332 | zone_end = zone->zone_start_pfn + zone->spanned_pages; |
333 | |||
334 | list_for_each_entry(ext, list, hook) | ||
335 | if (zone_start <= ext->end) | ||
336 | break; | ||
337 | |||
338 | if (&ext->hook == list || zone_end < ext->start) { | ||
339 | /* New extent is necessary */ | ||
340 | struct mem_extent *new_ext; | ||
341 | |||
342 | new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); | ||
343 | if (!new_ext) { | ||
344 | free_mem_extents(list); | ||
345 | return -ENOMEM; | ||
346 | } | ||
347 | new_ext->start = zone_start; | ||
348 | new_ext->end = zone_end; | ||
349 | list_add_tail(&new_ext->hook, &ext->hook); | ||
350 | continue; | ||
351 | } | ||
352 | |||
353 | /* Merge this zone's range of PFNs with the existing one */ | ||
354 | if (zone_start < ext->start) | ||
355 | ext->start = zone_start; | ||
356 | if (zone_end > ext->end) | ||
357 | ext->end = zone_end; | ||
358 | |||
359 | /* More merging may be possible */ | ||
360 | cur = ext; | ||
361 | list_for_each_entry_safe_continue(cur, aux, list, hook) { | ||
362 | if (zone_end < cur->start) | ||
363 | break; | ||
364 | if (zone_end < cur->end) | ||
365 | ext->end = cur->end; | ||
366 | list_del(&cur->hook); | ||
367 | kfree(cur); | ||
368 | } | ||
326 | } | 369 | } |
327 | return zbmlist; | 370 | |
371 | return 0; | ||
328 | } | 372 | } |
329 | 373 | ||
330 | /** | 374 | /** |
331 | * memory_bm_create - allocate memory for a memory bitmap | 375 | * memory_bm_create - allocate memory for a memory bitmap |
332 | */ | 376 | */ |
333 | |||
334 | static int | 377 | static int |
335 | memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | 378 | memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) |
336 | { | 379 | { |
337 | struct chain_allocator ca; | 380 | struct chain_allocator ca; |
338 | struct zone *zone; | 381 | struct list_head mem_extents; |
339 | struct zone_bitmap *zone_bm; | 382 | struct mem_extent *ext; |
340 | struct bm_block *bb; | 383 | int error; |
341 | unsigned int nr; | ||
342 | 384 | ||
343 | chain_init(&ca, gfp_mask, safe_needed); | 385 | chain_init(&ca, gfp_mask, safe_needed); |
386 | INIT_LIST_HEAD(&bm->blocks); | ||
344 | 387 | ||
345 | /* Compute the number of zones */ | 388 | error = create_mem_extents(&mem_extents, gfp_mask); |
346 | nr = 0; | 389 | if (error) |
347 | for_each_zone(zone) | 390 | return error; |
348 | if (populated_zone(zone)) | ||
349 | nr++; | ||
350 | |||
351 | /* Allocate the list of zones bitmap objects */ | ||
352 | zone_bm = create_zone_bm_list(nr, &ca); | ||
353 | bm->zone_bm_list = zone_bm; | ||
354 | if (!zone_bm) { | ||
355 | chain_free(&ca, PG_UNSAFE_CLEAR); | ||
356 | return -ENOMEM; | ||
357 | } | ||
358 | |||
359 | /* Initialize the zone bitmap objects */ | ||
360 | for_each_zone(zone) { | ||
361 | unsigned long pfn; | ||
362 | 391 | ||
363 | if (!populated_zone(zone)) | 392 | list_for_each_entry(ext, &mem_extents, hook) { |
364 | continue; | 393 | struct bm_block *bb; |
394 | unsigned long pfn = ext->start; | ||
395 | unsigned long pages = ext->end - ext->start; | ||
365 | 396 | ||
366 | zone_bm->start_pfn = zone->zone_start_pfn; | 397 | bb = list_entry(bm->blocks.prev, struct bm_block, hook); |
367 | zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
368 | /* Allocate the list of bitmap block objects */ | ||
369 | nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); | ||
370 | bb = create_bm_block_list(nr, &ca); | ||
371 | zone_bm->bm_blocks = bb; | ||
372 | zone_bm->cur_block = bb; | ||
373 | if (!bb) | ||
374 | goto Free; | ||
375 | 398 | ||
376 | nr = zone->spanned_pages; | 399 | error = create_bm_block_list(pages, bm->blocks.prev, &ca); |
377 | pfn = zone->zone_start_pfn; | 400 | if (error) |
378 | /* Initialize the bitmap block objects */ | 401 | goto Error; |
379 | while (bb) { | ||
380 | unsigned long *ptr; | ||
381 | 402 | ||
382 | ptr = get_image_page(gfp_mask, safe_needed); | 403 | list_for_each_entry_continue(bb, &bm->blocks, hook) { |
383 | bb->data = ptr; | 404 | bb->data = get_image_page(gfp_mask, safe_needed); |
384 | if (!ptr) | 405 | if (!bb->data) { |
385 | goto Free; | 406 | error = -ENOMEM; |
407 | goto Error; | ||
408 | } | ||
386 | 409 | ||
387 | bb->start_pfn = pfn; | 410 | bb->start_pfn = pfn; |
388 | if (nr >= BM_BITS_PER_BLOCK) { | 411 | if (pages >= BM_BITS_PER_BLOCK) { |
389 | pfn += BM_BITS_PER_BLOCK; | 412 | pfn += BM_BITS_PER_BLOCK; |
390 | nr -= BM_BITS_PER_BLOCK; | 413 | pages -= BM_BITS_PER_BLOCK; |
391 | } else { | 414 | } else { |
392 | /* This is executed only once in the loop */ | 415 | /* This is executed only once in the loop */ |
393 | pfn += nr; | 416 | pfn += pages; |
394 | } | 417 | } |
395 | bb->end_pfn = pfn; | 418 | bb->end_pfn = pfn; |
396 | bb = bb->next; | ||
397 | } | 419 | } |
398 | zone_bm = zone_bm->next; | ||
399 | } | 420 | } |
421 | |||
400 | bm->p_list = ca.chain; | 422 | bm->p_list = ca.chain; |
401 | memory_bm_position_reset(bm); | 423 | memory_bm_position_reset(bm); |
402 | return 0; | 424 | Exit: |
425 | free_mem_extents(&mem_extents); | ||
426 | return error; | ||
403 | 427 | ||
404 | Free: | 428 | Error: |
405 | bm->p_list = ca.chain; | 429 | bm->p_list = ca.chain; |
406 | memory_bm_free(bm, PG_UNSAFE_CLEAR); | 430 | memory_bm_free(bm, PG_UNSAFE_CLEAR); |
407 | return -ENOMEM; | 431 | goto Exit; |
408 | } | 432 | } |
409 | 433 | ||
410 | /** | 434 | /** |
411 | * memory_bm_free - free memory occupied by the memory bitmap @bm | 435 | * memory_bm_free - free memory occupied by the memory bitmap @bm |
412 | */ | 436 | */ |
413 | |||
414 | static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | 437 | static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) |
415 | { | 438 | { |
416 | struct zone_bitmap *zone_bm; | 439 | struct bm_block *bb; |
417 | 440 | ||
418 | /* Free the list of bit blocks for each zone_bitmap object */ | 441 | list_for_each_entry(bb, &bm->blocks, hook) |
419 | zone_bm = bm->zone_bm_list; | 442 | if (bb->data) |
420 | while (zone_bm) { | 443 | free_image_page(bb->data, clear_nosave_free); |
421 | struct bm_block *bb; | ||
422 | 444 | ||
423 | bb = zone_bm->bm_blocks; | ||
424 | while (bb) { | ||
425 | if (bb->data) | ||
426 | free_image_page(bb->data, clear_nosave_free); | ||
427 | bb = bb->next; | ||
428 | } | ||
429 | zone_bm = zone_bm->next; | ||
430 | } | ||
431 | free_list_of_pages(bm->p_list, clear_nosave_free); | 445 | free_list_of_pages(bm->p_list, clear_nosave_free); |
432 | bm->zone_bm_list = NULL; | 446 | |
447 | INIT_LIST_HEAD(&bm->blocks); | ||
433 | } | 448 | } |
434 | 449 | ||
435 | /** | 450 | /** |
@@ -437,38 +452,33 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |||
437 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member | 452 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member |
438 | * of @bm->cur_zone_bm are updated. | 453 | * of @bm->cur_zone_bm are updated. |
439 | */ | 454 | */ |
440 | |||
441 | static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, | 455 | static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, |
442 | void **addr, unsigned int *bit_nr) | 456 | void **addr, unsigned int *bit_nr) |
443 | { | 457 | { |
444 | struct zone_bitmap *zone_bm; | ||
445 | struct bm_block *bb; | 458 | struct bm_block *bb; |
446 | 459 | ||
447 | /* Check if the pfn is from the current zone */ | 460 | /* |
448 | zone_bm = bm->cur.zone_bm; | 461 | * Check if the pfn corresponds to the current bitmap block and find |
449 | if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { | 462 | * the block where it fits if this is not the case. |
450 | zone_bm = bm->zone_bm_list; | 463 | */ |
451 | /* We don't assume that the zones are sorted by pfns */ | 464 | bb = bm->cur.block; |
452 | while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { | ||
453 | zone_bm = zone_bm->next; | ||
454 | |||
455 | if (!zone_bm) | ||
456 | return -EFAULT; | ||
457 | } | ||
458 | bm->cur.zone_bm = zone_bm; | ||
459 | } | ||
460 | /* Check if the pfn corresponds to the current bitmap block */ | ||
461 | bb = zone_bm->cur_block; | ||
462 | if (pfn < bb->start_pfn) | 465 | if (pfn < bb->start_pfn) |
463 | bb = zone_bm->bm_blocks; | 466 | list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) |
467 | if (pfn >= bb->start_pfn) | ||
468 | break; | ||
464 | 469 | ||
465 | while (pfn >= bb->end_pfn) { | 470 | if (pfn >= bb->end_pfn) |
466 | bb = bb->next; | 471 | list_for_each_entry_continue(bb, &bm->blocks, hook) |
472 | if (pfn >= bb->start_pfn && pfn < bb->end_pfn) | ||
473 | break; | ||
467 | 474 | ||
468 | BUG_ON(!bb); | 475 | if (&bb->hook == &bm->blocks) |
469 | } | 476 | return -EFAULT; |
470 | zone_bm->cur_block = bb; | 477 | |
478 | /* The block has been found */ | ||
479 | bm->cur.block = bb; | ||
471 | pfn -= bb->start_pfn; | 480 | pfn -= bb->start_pfn; |
481 | bm->cur.bit = pfn + 1; | ||
472 | *bit_nr = pfn; | 482 | *bit_nr = pfn; |
473 | *addr = bb->data; | 483 | *addr = bb->data; |
474 | return 0; | 484 | return 0; |
@@ -519,6 +529,14 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |||
519 | return test_bit(bit, addr); | 529 | return test_bit(bit, addr); |
520 | } | 530 | } |
521 | 531 | ||
532 | static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) | ||
533 | { | ||
534 | void *addr; | ||
535 | unsigned int bit; | ||
536 | |||
537 | return !memory_bm_find_bit(bm, pfn, &addr, &bit); | ||
538 | } | ||
539 | |||
522 | /** | 540 | /** |
523 | * memory_bm_next_pfn - find the pfn that corresponds to the next set bit | 541 | * memory_bm_next_pfn - find the pfn that corresponds to the next set bit |
524 | * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is | 542 | * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is |
@@ -530,29 +548,21 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |||
530 | 548 | ||
531 | static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | 549 | static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) |
532 | { | 550 | { |
533 | struct zone_bitmap *zone_bm; | ||
534 | struct bm_block *bb; | 551 | struct bm_block *bb; |
535 | int bit; | 552 | int bit; |
536 | 553 | ||
554 | bb = bm->cur.block; | ||
537 | do { | 555 | do { |
538 | bb = bm->cur.block; | 556 | bit = bm->cur.bit; |
539 | do { | 557 | bit = find_next_bit(bb->data, bm_block_bits(bb), bit); |
540 | bit = bm->cur.bit; | 558 | if (bit < bm_block_bits(bb)) |
541 | bit = find_next_bit(bb->data, bm_block_bits(bb), bit); | 559 | goto Return_pfn; |
542 | if (bit < bm_block_bits(bb)) | 560 | |
543 | goto Return_pfn; | 561 | bb = list_entry(bb->hook.next, struct bm_block, hook); |
544 | 562 | bm->cur.block = bb; | |
545 | bb = bb->next; | 563 | bm->cur.bit = 0; |
546 | bm->cur.block = bb; | 564 | } while (&bb->hook != &bm->blocks); |
547 | bm->cur.bit = 0; | 565 | |
548 | } while (bb); | ||
549 | zone_bm = bm->cur.zone_bm->next; | ||
550 | if (zone_bm) { | ||
551 | bm->cur.zone_bm = zone_bm; | ||
552 | bm->cur.block = zone_bm->bm_blocks; | ||
553 | bm->cur.bit = 0; | ||
554 | } | ||
555 | } while (zone_bm); | ||
556 | memory_bm_position_reset(bm); | 566 | memory_bm_position_reset(bm); |
557 | return BM_END_OF_MAP; | 567 | return BM_END_OF_MAP; |
558 | 568 | ||
@@ -808,8 +818,7 @@ static unsigned int count_free_highmem_pages(void) | |||
808 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, | 818 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, |
809 | * and it isn't a part of a free chunk of pages. | 819 | * and it isn't a part of a free chunk of pages. |
810 | */ | 820 | */ |
811 | 821 | static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
812 | static struct page *saveable_highmem_page(unsigned long pfn) | ||
813 | { | 822 | { |
814 | struct page *page; | 823 | struct page *page; |
815 | 824 | ||
@@ -817,6 +826,8 @@ static struct page *saveable_highmem_page(unsigned long pfn) | |||
817 | return NULL; | 826 | return NULL; |
818 | 827 | ||
819 | page = pfn_to_page(pfn); | 828 | page = pfn_to_page(pfn); |
829 | if (page_zone(page) != zone) | ||
830 | return NULL; | ||
820 | 831 | ||
821 | BUG_ON(!PageHighMem(page)); | 832 | BUG_ON(!PageHighMem(page)); |
822 | 833 | ||
@@ -846,13 +857,16 @@ unsigned int count_highmem_pages(void) | |||
846 | mark_free_pages(zone); | 857 | mark_free_pages(zone); |
847 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 858 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
848 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 859 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
849 | if (saveable_highmem_page(pfn)) | 860 | if (saveable_highmem_page(zone, pfn)) |
850 | n++; | 861 | n++; |
851 | } | 862 | } |
852 | return n; | 863 | return n; |
853 | } | 864 | } |
854 | #else | 865 | #else |
855 | static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } | 866 | static inline void *saveable_highmem_page(struct zone *z, unsigned long p) |
867 | { | ||
868 | return NULL; | ||
869 | } | ||
856 | #endif /* CONFIG_HIGHMEM */ | 870 | #endif /* CONFIG_HIGHMEM */ |
857 | 871 | ||
858 | /** | 872 | /** |
@@ -863,8 +877,7 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } | |||
863 | * of pages statically defined as 'unsaveable', and it isn't a part of | 877 | * of pages statically defined as 'unsaveable', and it isn't a part of |
864 | * a free chunk of pages. | 878 | * a free chunk of pages. |
865 | */ | 879 | */ |
866 | 880 | static struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
867 | static struct page *saveable_page(unsigned long pfn) | ||
868 | { | 881 | { |
869 | struct page *page; | 882 | struct page *page; |
870 | 883 | ||
@@ -872,6 +885,8 @@ static struct page *saveable_page(unsigned long pfn) | |||
872 | return NULL; | 885 | return NULL; |
873 | 886 | ||
874 | page = pfn_to_page(pfn); | 887 | page = pfn_to_page(pfn); |
888 | if (page_zone(page) != zone) | ||
889 | return NULL; | ||
875 | 890 | ||
876 | BUG_ON(PageHighMem(page)); | 891 | BUG_ON(PageHighMem(page)); |
877 | 892 | ||
@@ -903,7 +918,7 @@ unsigned int count_data_pages(void) | |||
903 | mark_free_pages(zone); | 918 | mark_free_pages(zone); |
904 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 919 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
905 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 920 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
906 | if(saveable_page(pfn)) | 921 | if (saveable_page(zone, pfn)) |
907 | n++; | 922 | n++; |
908 | } | 923 | } |
909 | return n; | 924 | return n; |
@@ -944,7 +959,7 @@ static inline struct page * | |||
944 | page_is_saveable(struct zone *zone, unsigned long pfn) | 959 | page_is_saveable(struct zone *zone, unsigned long pfn) |
945 | { | 960 | { |
946 | return is_highmem(zone) ? | 961 | return is_highmem(zone) ? |
947 | saveable_highmem_page(pfn) : saveable_page(pfn); | 962 | saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); |
948 | } | 963 | } |
949 | 964 | ||
950 | static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | 965 | static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
@@ -966,7 +981,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
966 | * data modified by kmap_atomic() | 981 | * data modified by kmap_atomic() |
967 | */ | 982 | */ |
968 | safe_copy_page(buffer, s_page); | 983 | safe_copy_page(buffer, s_page); |
969 | dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); | 984 | dst = kmap_atomic(d_page, KM_USER0); |
970 | memcpy(dst, buffer, PAGE_SIZE); | 985 | memcpy(dst, buffer, PAGE_SIZE); |
971 | kunmap_atomic(dst, KM_USER0); | 986 | kunmap_atomic(dst, KM_USER0); |
972 | } else { | 987 | } else { |
@@ -975,7 +990,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
975 | } | 990 | } |
976 | } | 991 | } |
977 | #else | 992 | #else |
978 | #define page_is_saveable(zone, pfn) saveable_page(pfn) | 993 | #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) |
979 | 994 | ||
980 | static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | 995 | static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
981 | { | 996 | { |
@@ -1459,9 +1474,7 @@ load_header(struct swsusp_info *info) | |||
1459 | * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set | 1474 | * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set |
1460 | * the corresponding bit in the memory bitmap @bm | 1475 | * the corresponding bit in the memory bitmap @bm |
1461 | */ | 1476 | */ |
1462 | 1477 | static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | |
1463 | static inline void | ||
1464 | unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | ||
1465 | { | 1478 | { |
1466 | int j; | 1479 | int j; |
1467 | 1480 | ||
@@ -1469,8 +1482,13 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1469 | if (unlikely(buf[j] == BM_END_OF_MAP)) | 1482 | if (unlikely(buf[j] == BM_END_OF_MAP)) |
1470 | break; | 1483 | break; |
1471 | 1484 | ||
1472 | memory_bm_set_bit(bm, buf[j]); | 1485 | if (memory_bm_pfn_present(bm, buf[j])) |
1486 | memory_bm_set_bit(bm, buf[j]); | ||
1487 | else | ||
1488 | return -EFAULT; | ||
1473 | } | 1489 | } |
1490 | |||
1491 | return 0; | ||
1474 | } | 1492 | } |
1475 | 1493 | ||
1476 | /* List of "safe" pages that may be used to store data loaded from the suspend | 1494 | /* List of "safe" pages that may be used to store data loaded from the suspend |
@@ -1608,7 +1626,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) | |||
1608 | pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); | 1626 | pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); |
1609 | if (!pbe) { | 1627 | if (!pbe) { |
1610 | swsusp_free(); | 1628 | swsusp_free(); |
1611 | return NULL; | 1629 | return ERR_PTR(-ENOMEM); |
1612 | } | 1630 | } |
1613 | pbe->orig_page = page; | 1631 | pbe->orig_page = page; |
1614 | if (safe_highmem_pages > 0) { | 1632 | if (safe_highmem_pages > 0) { |
@@ -1677,7 +1695,7 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) | |||
1677 | static inline void * | 1695 | static inline void * |
1678 | get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) | 1696 | get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) |
1679 | { | 1697 | { |
1680 | return NULL; | 1698 | return ERR_PTR(-EINVAL); |
1681 | } | 1699 | } |
1682 | 1700 | ||
1683 | static inline void copy_last_highmem_page(void) {} | 1701 | static inline void copy_last_highmem_page(void) {} |
@@ -1788,8 +1806,13 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | |||
1788 | static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | 1806 | static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) |
1789 | { | 1807 | { |
1790 | struct pbe *pbe; | 1808 | struct pbe *pbe; |
1791 | struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); | 1809 | struct page *page; |
1810 | unsigned long pfn = memory_bm_next_pfn(bm); | ||
1792 | 1811 | ||
1812 | if (pfn == BM_END_OF_MAP) | ||
1813 | return ERR_PTR(-EFAULT); | ||
1814 | |||
1815 | page = pfn_to_page(pfn); | ||
1793 | if (PageHighMem(page)) | 1816 | if (PageHighMem(page)) |
1794 | return get_highmem_page_buffer(page, ca); | 1817 | return get_highmem_page_buffer(page, ca); |
1795 | 1818 | ||
@@ -1805,7 +1828,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | |||
1805 | pbe = chain_alloc(ca, sizeof(struct pbe)); | 1828 | pbe = chain_alloc(ca, sizeof(struct pbe)); |
1806 | if (!pbe) { | 1829 | if (!pbe) { |
1807 | swsusp_free(); | 1830 | swsusp_free(); |
1808 | return NULL; | 1831 | return ERR_PTR(-ENOMEM); |
1809 | } | 1832 | } |
1810 | pbe->orig_address = page_address(page); | 1833 | pbe->orig_address = page_address(page); |
1811 | pbe->address = safe_pages_list; | 1834 | pbe->address = safe_pages_list; |
@@ -1868,7 +1891,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
1868 | return error; | 1891 | return error; |
1869 | 1892 | ||
1870 | } else if (handle->prev <= nr_meta_pages) { | 1893 | } else if (handle->prev <= nr_meta_pages) { |
1871 | unpack_orig_pfns(buffer, ©_bm); | 1894 | error = unpack_orig_pfns(buffer, ©_bm); |
1895 | if (error) | ||
1896 | return error; | ||
1897 | |||
1872 | if (handle->prev == nr_meta_pages) { | 1898 | if (handle->prev == nr_meta_pages) { |
1873 | error = prepare_image(&orig_bm, ©_bm); | 1899 | error = prepare_image(&orig_bm, ©_bm); |
1874 | if (error) | 1900 | if (error) |
@@ -1879,12 +1905,14 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
1879 | restore_pblist = NULL; | 1905 | restore_pblist = NULL; |
1880 | handle->buffer = get_buffer(&orig_bm, &ca); | 1906 | handle->buffer = get_buffer(&orig_bm, &ca); |
1881 | handle->sync_read = 0; | 1907 | handle->sync_read = 0; |
1882 | if (!handle->buffer) | 1908 | if (IS_ERR(handle->buffer)) |
1883 | return -ENOMEM; | 1909 | return PTR_ERR(handle->buffer); |
1884 | } | 1910 | } |
1885 | } else { | 1911 | } else { |
1886 | copy_last_highmem_page(); | 1912 | copy_last_highmem_page(); |
1887 | handle->buffer = get_buffer(&orig_bm, &ca); | 1913 | handle->buffer = get_buffer(&orig_bm, &ca); |
1914 | if (IS_ERR(handle->buffer)) | ||
1915 | return PTR_ERR(handle->buffer); | ||
1888 | if (handle->buffer != buffer) | 1916 | if (handle->buffer != buffer) |
1889 | handle->sync_read = 0; | 1917 | handle->sync_read = 0; |
1890 | } | 1918 | } |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 023ff2a31d89..a92c91451559 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -262,3 +262,125 @@ int swsusp_shrink_memory(void) | |||
262 | 262 | ||
263 | return 0; | 263 | return 0; |
264 | } | 264 | } |
265 | |||
266 | /* | ||
267 | * Platforms, like ACPI, may want us to save some memory used by them during | ||
268 | * hibernation and to restore the contents of this memory during the subsequent | ||
269 | * resume. The code below implements a mechanism allowing us to do that. | ||
270 | */ | ||
271 | |||
272 | struct nvs_page { | ||
273 | unsigned long phys_start; | ||
274 | unsigned int size; | ||
275 | void *kaddr; | ||
276 | void *data; | ||
277 | struct list_head node; | ||
278 | }; | ||
279 | |||
280 | static LIST_HEAD(nvs_list); | ||
281 | |||
282 | /** | ||
283 | * hibernate_nvs_register - register platform NVS memory region to save | ||
284 | * @start - physical address of the region | ||
285 | * @size - size of the region | ||
286 | * | ||
287 | * The NVS region need not be page-aligned (both ends) and we arrange | ||
288 | * things so that the data from page-aligned addresses in this region will | ||
289 | * be copied into separate RAM pages. | ||
290 | */ | ||
291 | int hibernate_nvs_register(unsigned long start, unsigned long size) | ||
292 | { | ||
293 | struct nvs_page *entry, *next; | ||
294 | |||
295 | while (size > 0) { | ||
296 | unsigned int nr_bytes; | ||
297 | |||
298 | entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); | ||
299 | if (!entry) | ||
300 | goto Error; | ||
301 | |||
302 | list_add_tail(&entry->node, &nvs_list); | ||
303 | entry->phys_start = start; | ||
304 | nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); | ||
305 | entry->size = (size < nr_bytes) ? size : nr_bytes; | ||
306 | |||
307 | start += entry->size; | ||
308 | size -= entry->size; | ||
309 | } | ||
310 | return 0; | ||
311 | |||
312 | Error: | ||
313 | list_for_each_entry_safe(entry, next, &nvs_list, node) { | ||
314 | list_del(&entry->node); | ||
315 | kfree(entry); | ||
316 | } | ||
317 | return -ENOMEM; | ||
318 | } | ||
319 | |||
320 | /** | ||
321 | * hibernate_nvs_free - free data pages allocated for saving NVS regions | ||
322 | */ | ||
323 | void hibernate_nvs_free(void) | ||
324 | { | ||
325 | struct nvs_page *entry; | ||
326 | |||
327 | list_for_each_entry(entry, &nvs_list, node) | ||
328 | if (entry->data) { | ||
329 | free_page((unsigned long)entry->data); | ||
330 | entry->data = NULL; | ||
331 | if (entry->kaddr) { | ||
332 | iounmap(entry->kaddr); | ||
333 | entry->kaddr = NULL; | ||
334 | } | ||
335 | } | ||
336 | } | ||
337 | |||
338 | /** | ||
339 | * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions | ||
340 | */ | ||
341 | int hibernate_nvs_alloc(void) | ||
342 | { | ||
343 | struct nvs_page *entry; | ||
344 | |||
345 | list_for_each_entry(entry, &nvs_list, node) { | ||
346 | entry->data = (void *)__get_free_page(GFP_KERNEL); | ||
347 | if (!entry->data) { | ||
348 | hibernate_nvs_free(); | ||
349 | return -ENOMEM; | ||
350 | } | ||
351 | } | ||
352 | return 0; | ||
353 | } | ||
354 | |||
355 | /** | ||
356 | * hibernate_nvs_save - save NVS memory regions | ||
357 | */ | ||
358 | void hibernate_nvs_save(void) | ||
359 | { | ||
360 | struct nvs_page *entry; | ||
361 | |||
362 | printk(KERN_INFO "PM: Saving platform NVS memory\n"); | ||
363 | |||
364 | list_for_each_entry(entry, &nvs_list, node) | ||
365 | if (entry->data) { | ||
366 | entry->kaddr = ioremap(entry->phys_start, entry->size); | ||
367 | memcpy(entry->data, entry->kaddr, entry->size); | ||
368 | } | ||
369 | } | ||
370 | |||
371 | /** | ||
372 | * hibernate_nvs_restore - restore NVS memory regions | ||
373 | * | ||
374 | * This function is going to be called with interrupts disabled, so it | ||
375 | * cannot iounmap the virtual addresses used to access the NVS region. | ||
376 | */ | ||
377 | void hibernate_nvs_restore(void) | ||
378 | { | ||
379 | struct nvs_page *entry; | ||
380 | |||
381 | printk(KERN_INFO "PM: Restoring platform NVS memory\n"); | ||
382 | |||
383 | list_for_each_entry(entry, &nvs_list, node) | ||
384 | if (entry->data) | ||
385 | memcpy(entry->kaddr, entry->data, entry->size); | ||
386 | } | ||
diff --git a/kernel/printk.c b/kernel/printk.c index e651ab05655f..7015733793e8 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -619,7 +619,7 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu) | |||
619 | static const char recursion_bug_msg [] = | 619 | static const char recursion_bug_msg [] = |
620 | KERN_CRIT "BUG: recent printk recursion!\n"; | 620 | KERN_CRIT "BUG: recent printk recursion!\n"; |
621 | static int recursion_bug; | 621 | static int recursion_bug; |
622 | static int new_text_line = 1; | 622 | static int new_text_line = 1; |
623 | static char printk_buf[1024]; | 623 | static char printk_buf[1024]; |
624 | 624 | ||
625 | asmlinkage int vprintk(const char *fmt, va_list args) | 625 | asmlinkage int vprintk(const char *fmt, va_list args) |
diff --git a/kernel/profile.c b/kernel/profile.c index d18e2d2654f2..784933acf5b8 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -445,7 +445,6 @@ void profile_tick(int type) | |||
445 | #ifdef CONFIG_PROC_FS | 445 | #ifdef CONFIG_PROC_FS |
446 | #include <linux/proc_fs.h> | 446 | #include <linux/proc_fs.h> |
447 | #include <asm/uaccess.h> | 447 | #include <asm/uaccess.h> |
448 | #include <asm/ptrace.h> | ||
449 | 448 | ||
450 | static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, | 449 | static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, |
451 | int count, int *eof, void *data) | 450 | int count, int *eof, void *data) |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ad63af8b2521..d92a76a881aa 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -77,8 +77,15 @@ void wakeme_after_rcu(struct rcu_head *head) | |||
77 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), | 77 | * sections are delimited by rcu_read_lock() and rcu_read_unlock(), |
78 | * and may be nested. | 78 | * and may be nested. |
79 | */ | 79 | */ |
80 | void synchronize_rcu(void); /* Makes kernel-doc tools happy */ | 80 | void synchronize_rcu(void) |
81 | synchronize_rcu_xxx(synchronize_rcu, call_rcu) | 81 | { |
82 | struct rcu_synchronize rcu; | ||
83 | init_completion(&rcu.completion); | ||
84 | /* Will wake me after RCU finished. */ | ||
85 | call_rcu(&rcu.head, wakeme_after_rcu); | ||
86 | /* Wait for it. */ | ||
87 | wait_for_completion(&rcu.completion); | ||
88 | } | ||
82 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 89 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
83 | 90 | ||
84 | static void rcu_barrier_callback(struct rcu_head *notused) | 91 | static void rcu_barrier_callback(struct rcu_head *notused) |
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index f9dc8f3720f6..33cfc50781f9 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
@@ -1177,7 +1177,16 @@ EXPORT_SYMBOL_GPL(call_rcu_sched); | |||
1177 | * in -rt this does -not- necessarily result in all currently executing | 1177 | * in -rt this does -not- necessarily result in all currently executing |
1178 | * interrupt -handlers- having completed. | 1178 | * interrupt -handlers- having completed. |
1179 | */ | 1179 | */ |
1180 | synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched) | 1180 | void __synchronize_sched(void) |
1181 | { | ||
1182 | struct rcu_synchronize rcu; | ||
1183 | |||
1184 | init_completion(&rcu.completion); | ||
1185 | /* Will wake me after RCU finished. */ | ||
1186 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | ||
1187 | /* Wait for it. */ | ||
1188 | wait_for_completion(&rcu.completion); | ||
1189 | } | ||
1181 | EXPORT_SYMBOL_GPL(__synchronize_sched); | 1190 | EXPORT_SYMBOL_GPL(__synchronize_sched); |
1182 | 1191 | ||
1183 | /* | 1192 | /* |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 3245b40952c6..1cff28db56b6 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -136,7 +136,7 @@ static int stutter_pause_test = 0; | |||
136 | #endif | 136 | #endif |
137 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | 137 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; |
138 | 138 | ||
139 | #define FULLSTOP_SIGNALED 1 /* Bail due to signal. */ | 139 | #define FULLSTOP_SHUTDOWN 1 /* Bail due to system shutdown/panic. */ |
140 | #define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */ | 140 | #define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */ |
141 | static int fullstop; /* stop generating callbacks at test end. */ | 141 | static int fullstop; /* stop generating callbacks at test end. */ |
142 | DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */ | 142 | DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */ |
@@ -151,12 +151,10 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, | |||
151 | { | 151 | { |
152 | if (fullstop) | 152 | if (fullstop) |
153 | return NOTIFY_DONE; | 153 | return NOTIFY_DONE; |
154 | if (signal_pending(current)) { | 154 | mutex_lock(&fullstop_mutex); |
155 | mutex_lock(&fullstop_mutex); | 155 | if (!fullstop) |
156 | if (!ACCESS_ONCE(fullstop)) | 156 | fullstop = FULLSTOP_SHUTDOWN; |
157 | fullstop = FULLSTOP_SIGNALED; | 157 | mutex_unlock(&fullstop_mutex); |
158 | mutex_unlock(&fullstop_mutex); | ||
159 | } | ||
160 | return NOTIFY_DONE; | 158 | return NOTIFY_DONE; |
161 | } | 159 | } |
162 | 160 | ||
@@ -624,7 +622,7 @@ rcu_torture_writer(void *arg) | |||
624 | rcu_stutter_wait(); | 622 | rcu_stutter_wait(); |
625 | } while (!kthread_should_stop() && !fullstop); | 623 | } while (!kthread_should_stop() && !fullstop); |
626 | VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); | 624 | VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); |
627 | while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED) | 625 | while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) |
628 | schedule_timeout_uninterruptible(1); | 626 | schedule_timeout_uninterruptible(1); |
629 | return 0; | 627 | return 0; |
630 | } | 628 | } |
@@ -649,7 +647,7 @@ rcu_torture_fakewriter(void *arg) | |||
649 | } while (!kthread_should_stop() && !fullstop); | 647 | } while (!kthread_should_stop() && !fullstop); |
650 | 648 | ||
651 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); | 649 | VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); |
652 | while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED) | 650 | while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) |
653 | schedule_timeout_uninterruptible(1); | 651 | schedule_timeout_uninterruptible(1); |
654 | return 0; | 652 | return 0; |
655 | } | 653 | } |
@@ -759,7 +757,7 @@ rcu_torture_reader(void *arg) | |||
759 | VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); | 757 | VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); |
760 | if (irqreader && cur_ops->irqcapable) | 758 | if (irqreader && cur_ops->irqcapable) |
761 | del_timer_sync(&t); | 759 | del_timer_sync(&t); |
762 | while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED) | 760 | while (!kthread_should_stop() && fullstop != FULLSTOP_SHUTDOWN) |
763 | schedule_timeout_uninterruptible(1); | 761 | schedule_timeout_uninterruptible(1); |
764 | return 0; | 762 | return 0; |
765 | } | 763 | } |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a342b032112c..f2d8638e6c60 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -79,7 +79,10 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | |||
79 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 79 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
80 | 80 | ||
81 | #ifdef CONFIG_NO_HZ | 81 | #ifdef CONFIG_NO_HZ |
82 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks); | 82 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
83 | .dynticks_nesting = 1, | ||
84 | .dynticks = 1, | ||
85 | }; | ||
83 | #endif /* #ifdef CONFIG_NO_HZ */ | 86 | #endif /* #ifdef CONFIG_NO_HZ */ |
84 | 87 | ||
85 | static int blimit = 10; /* Maximum callbacks per softirq. */ | 88 | static int blimit = 10; /* Maximum callbacks per softirq. */ |
@@ -572,6 +575,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
572 | /* Special-case the common single-level case. */ | 575 | /* Special-case the common single-level case. */ |
573 | if (NUM_RCU_NODES == 1) { | 576 | if (NUM_RCU_NODES == 1) { |
574 | rnp->qsmask = rnp->qsmaskinit; | 577 | rnp->qsmask = rnp->qsmaskinit; |
578 | rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ | ||
575 | spin_unlock_irqrestore(&rnp->lock, flags); | 579 | spin_unlock_irqrestore(&rnp->lock, flags); |
576 | return; | 580 | return; |
577 | } | 581 | } |
@@ -1379,13 +1383,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
1379 | 1383 | ||
1380 | static void __cpuinit rcu_online_cpu(int cpu) | 1384 | static void __cpuinit rcu_online_cpu(int cpu) |
1381 | { | 1385 | { |
1382 | #ifdef CONFIG_NO_HZ | ||
1383 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
1384 | |||
1385 | rdtp->dynticks_nesting = 1; | ||
1386 | rdtp->dynticks |= 1; /* need consecutive #s even for hotplug. */ | ||
1387 | rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1; | ||
1388 | #endif /* #ifdef CONFIG_NO_HZ */ | ||
1389 | rcu_init_percpu_data(cpu, &rcu_state); | 1386 | rcu_init_percpu_data(cpu, &rcu_state); |
1390 | rcu_init_percpu_data(cpu, &rcu_bh_state); | 1387 | rcu_init_percpu_data(cpu, &rcu_bh_state); |
1391 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 1388 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index f275c8eca772..bf8e7534c803 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -15,10 +15,11 @@ | |||
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | 17 | ||
18 | void res_counter_init(struct res_counter *counter) | 18 | void res_counter_init(struct res_counter *counter, struct res_counter *parent) |
19 | { | 19 | { |
20 | spin_lock_init(&counter->lock); | 20 | spin_lock_init(&counter->lock); |
21 | counter->limit = (unsigned long long)LLONG_MAX; | 21 | counter->limit = (unsigned long long)LLONG_MAX; |
22 | counter->parent = parent; | ||
22 | } | 23 | } |
23 | 24 | ||
24 | int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | 25 | int res_counter_charge_locked(struct res_counter *counter, unsigned long val) |
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | |||
34 | return 0; | 35 | return 0; |
35 | } | 36 | } |
36 | 37 | ||
37 | int res_counter_charge(struct res_counter *counter, unsigned long val) | 38 | int res_counter_charge(struct res_counter *counter, unsigned long val, |
39 | struct res_counter **limit_fail_at) | ||
38 | { | 40 | { |
39 | int ret; | 41 | int ret; |
40 | unsigned long flags; | 42 | unsigned long flags; |
41 | 43 | struct res_counter *c, *u; | |
42 | spin_lock_irqsave(&counter->lock, flags); | 44 | |
43 | ret = res_counter_charge_locked(counter, val); | 45 | *limit_fail_at = NULL; |
44 | spin_unlock_irqrestore(&counter->lock, flags); | 46 | local_irq_save(flags); |
47 | for (c = counter; c != NULL; c = c->parent) { | ||
48 | spin_lock(&c->lock); | ||
49 | ret = res_counter_charge_locked(c, val); | ||
50 | spin_unlock(&c->lock); | ||
51 | if (ret < 0) { | ||
52 | *limit_fail_at = c; | ||
53 | goto undo; | ||
54 | } | ||
55 | } | ||
56 | ret = 0; | ||
57 | goto done; | ||
58 | undo: | ||
59 | for (u = counter; u != c; u = u->parent) { | ||
60 | spin_lock(&u->lock); | ||
61 | res_counter_uncharge_locked(u, val); | ||
62 | spin_unlock(&u->lock); | ||
63 | } | ||
64 | done: | ||
65 | local_irq_restore(flags); | ||
45 | return ret; | 66 | return ret; |
46 | } | 67 | } |
47 | 68 | ||
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | |||
56 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | 77 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) |
57 | { | 78 | { |
58 | unsigned long flags; | 79 | unsigned long flags; |
80 | struct res_counter *c; | ||
59 | 81 | ||
60 | spin_lock_irqsave(&counter->lock, flags); | 82 | local_irq_save(flags); |
61 | res_counter_uncharge_locked(counter, val); | 83 | for (c = counter; c != NULL; c = c->parent) { |
62 | spin_unlock_irqrestore(&counter->lock, flags); | 84 | spin_lock(&c->lock); |
85 | res_counter_uncharge_locked(c, val); | ||
86 | spin_unlock(&c->lock); | ||
87 | } | ||
88 | local_irq_restore(flags); | ||
63 | } | 89 | } |
64 | 90 | ||
65 | 91 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index e633106b12f6..ca6a1536b205 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -623,7 +623,7 @@ resource_size_t resource_alignment(struct resource *res) | |||
623 | */ | 623 | */ |
624 | struct resource * __request_region(struct resource *parent, | 624 | struct resource * __request_region(struct resource *parent, |
625 | resource_size_t start, resource_size_t n, | 625 | resource_size_t start, resource_size_t n, |
626 | const char *name) | 626 | const char *name, int flags) |
627 | { | 627 | { |
628 | struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); | 628 | struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); |
629 | 629 | ||
@@ -634,6 +634,7 @@ struct resource * __request_region(struct resource *parent, | |||
634 | res->start = start; | 634 | res->start = start; |
635 | res->end = start + n - 1; | 635 | res->end = start + n - 1; |
636 | res->flags = IORESOURCE_BUSY; | 636 | res->flags = IORESOURCE_BUSY; |
637 | res->flags |= flags; | ||
637 | 638 | ||
638 | write_lock(&resource_lock); | 639 | write_lock(&resource_lock); |
639 | 640 | ||
@@ -679,7 +680,7 @@ int __check_region(struct resource *parent, resource_size_t start, | |||
679 | { | 680 | { |
680 | struct resource * res; | 681 | struct resource * res; |
681 | 682 | ||
682 | res = __request_region(parent, start, n, "check-region"); | 683 | res = __request_region(parent, start, n, "check-region", 0); |
683 | if (!res) | 684 | if (!res) |
684 | return -EBUSY; | 685 | return -EBUSY; |
685 | 686 | ||
@@ -776,7 +777,7 @@ struct resource * __devm_request_region(struct device *dev, | |||
776 | dr->start = start; | 777 | dr->start = start; |
777 | dr->n = n; | 778 | dr->n = n; |
778 | 779 | ||
779 | res = __request_region(parent, start, n, name); | 780 | res = __request_region(parent, start, n, name, 0); |
780 | if (res) | 781 | if (res) |
781 | devres_add(dev, dr); | 782 | devres_add(dev, dr); |
782 | else | 783 | else |
@@ -876,3 +877,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) | |||
876 | 877 | ||
877 | return err; | 878 | return err; |
878 | } | 879 | } |
880 | |||
881 | #ifdef CONFIG_STRICT_DEVMEM | ||
882 | static int strict_iomem_checks = 1; | ||
883 | #else | ||
884 | static int strict_iomem_checks; | ||
885 | #endif | ||
886 | |||
887 | /* | ||
888 | * check if an address is reserved in the iomem resource tree | ||
889 | * returns 1 if reserved, 0 if not reserved. | ||
890 | */ | ||
891 | int iomem_is_exclusive(u64 addr) | ||
892 | { | ||
893 | struct resource *p = &iomem_resource; | ||
894 | int err = 0; | ||
895 | loff_t l; | ||
896 | int size = PAGE_SIZE; | ||
897 | |||
898 | if (!strict_iomem_checks) | ||
899 | return 0; | ||
900 | |||
901 | addr = addr & PAGE_MASK; | ||
902 | |||
903 | read_lock(&resource_lock); | ||
904 | for (p = p->child; p ; p = r_next(NULL, p, &l)) { | ||
905 | /* | ||
906 | * We can probably skip the resources without | ||
907 | * IORESOURCE_IO attribute? | ||
908 | */ | ||
909 | if (p->start >= addr + size) | ||
910 | break; | ||
911 | if (p->end < addr) | ||
912 | continue; | ||
913 | if (p->flags & IORESOURCE_BUSY && | ||
914 | p->flags & IORESOURCE_EXCLUSIVE) { | ||
915 | err = 1; | ||
916 | break; | ||
917 | } | ||
918 | } | ||
919 | read_unlock(&resource_lock); | ||
920 | |||
921 | return err; | ||
922 | } | ||
923 | |||
924 | static int __init strict_iomem(char *str) | ||
925 | { | ||
926 | if (strstr(str, "relaxed")) | ||
927 | strict_iomem_checks = 0; | ||
928 | if (strstr(str, "strict")) | ||
929 | strict_iomem_checks = 1; | ||
930 | return 1; | ||
931 | } | ||
932 | |||
933 | __setup("iomem=", strict_iomem); | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 545c6fccd1dc..deb5ac8c12f3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3728,8 +3728,13 @@ redo: | |||
3728 | } | 3728 | } |
3729 | 3729 | ||
3730 | double_unlock_balance(this_rq, busiest); | 3730 | double_unlock_balance(this_rq, busiest); |
3731 | /* | ||
3732 | * Should not call ttwu while holding a rq->lock | ||
3733 | */ | ||
3734 | spin_unlock(&this_rq->lock); | ||
3731 | if (active_balance) | 3735 | if (active_balance) |
3732 | wake_up_process(busiest->migration_thread); | 3736 | wake_up_process(busiest->migration_thread); |
3737 | spin_lock(&this_rq->lock); | ||
3733 | 3738 | ||
3734 | } else | 3739 | } else |
3735 | sd->nr_balance_failed = 0; | 3740 | sd->nr_balance_failed = 0; |
@@ -6957,7 +6962,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6957 | spin_unlock_irqrestore(&rq->lock, flags); | 6962 | spin_unlock_irqrestore(&rq->lock, flags); |
6958 | } | 6963 | } |
6959 | 6964 | ||
6960 | static int init_rootdomain(struct root_domain *rd, bool bootmem) | 6965 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) |
6961 | { | 6966 | { |
6962 | memset(rd, 0, sizeof(*rd)); | 6967 | memset(rd, 0, sizeof(*rd)); |
6963 | 6968 | ||
@@ -6970,7 +6975,7 @@ static int init_rootdomain(struct root_domain *rd, bool bootmem) | |||
6970 | } | 6975 | } |
6971 | 6976 | ||
6972 | if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) | 6977 | if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) |
6973 | goto free_rd; | 6978 | goto out; |
6974 | if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) | 6979 | if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) |
6975 | goto free_span; | 6980 | goto free_span; |
6976 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) | 6981 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) |
@@ -6986,8 +6991,7 @@ free_online: | |||
6986 | free_cpumask_var(rd->online); | 6991 | free_cpumask_var(rd->online); |
6987 | free_span: | 6992 | free_span: |
6988 | free_cpumask_var(rd->span); | 6993 | free_cpumask_var(rd->span); |
6989 | free_rd: | 6994 | out: |
6990 | kfree(rd); | ||
6991 | return -ENOMEM; | 6995 | return -ENOMEM; |
6992 | } | 6996 | } |
6993 | 6997 | ||
@@ -7987,7 +7991,7 @@ match2: | |||
7987 | } | 7991 | } |
7988 | 7992 | ||
7989 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 7993 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
7990 | int arch_reinit_sched_domains(void) | 7994 | static void arch_reinit_sched_domains(void) |
7991 | { | 7995 | { |
7992 | get_online_cpus(); | 7996 | get_online_cpus(); |
7993 | 7997 | ||
@@ -7996,13 +8000,10 @@ int arch_reinit_sched_domains(void) | |||
7996 | 8000 | ||
7997 | rebuild_sched_domains(); | 8001 | rebuild_sched_domains(); |
7998 | put_online_cpus(); | 8002 | put_online_cpus(); |
7999 | |||
8000 | return 0; | ||
8001 | } | 8003 | } |
8002 | 8004 | ||
8003 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | 8005 | static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) |
8004 | { | 8006 | { |
8005 | int ret; | ||
8006 | unsigned int level = 0; | 8007 | unsigned int level = 0; |
8007 | 8008 | ||
8008 | if (sscanf(buf, "%u", &level) != 1) | 8009 | if (sscanf(buf, "%u", &level) != 1) |
@@ -8023,9 +8024,9 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | |||
8023 | else | 8024 | else |
8024 | sched_mc_power_savings = level; | 8025 | sched_mc_power_savings = level; |
8025 | 8026 | ||
8026 | ret = arch_reinit_sched_domains(); | 8027 | arch_reinit_sched_domains(); |
8027 | 8028 | ||
8028 | return ret ? ret : count; | 8029 | return count; |
8029 | } | 8030 | } |
8030 | 8031 | ||
8031 | #ifdef CONFIG_SCHED_MC | 8032 | #ifdef CONFIG_SCHED_MC |
@@ -8060,7 +8061,7 @@ static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, | |||
8060 | sched_smt_power_savings_store); | 8061 | sched_smt_power_savings_store); |
8061 | #endif | 8062 | #endif |
8062 | 8063 | ||
8063 | int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) | 8064 | int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) |
8064 | { | 8065 | { |
8065 | int err = 0; | 8066 | int err = 0; |
8066 | 8067 | ||
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index e8ab096ddfe3..a0b0852414cc 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -124,7 +124,7 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) | |||
124 | 124 | ||
125 | clock = scd->tick_gtod + delta; | 125 | clock = scd->tick_gtod + delta; |
126 | min_clock = wrap_max(scd->tick_gtod, scd->clock); | 126 | min_clock = wrap_max(scd->tick_gtod, scd->clock); |
127 | max_clock = scd->tick_gtod + TICK_NSEC; | 127 | max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); |
128 | 128 | ||
129 | clock = wrap_max(clock, min_clock); | 129 | clock = wrap_max(clock, min_clock); |
130 | clock = wrap_min(clock, max_clock); | 130 | clock = wrap_min(clock, max_clock); |
@@ -227,6 +227,9 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); | |||
227 | */ | 227 | */ |
228 | void sched_clock_idle_wakeup_event(u64 delta_ns) | 228 | void sched_clock_idle_wakeup_event(u64 delta_ns) |
229 | { | 229 | { |
230 | if (timekeeping_suspended) | ||
231 | return; | ||
232 | |||
230 | sched_clock_tick(); | 233 | sched_clock_tick(); |
231 | touch_softlockup_watchdog(); | 234 | touch_softlockup_watchdog(); |
232 | } | 235 | } |
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 018b7be1db2e..1e00bfacf9b8 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
@@ -151,7 +151,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
151 | * | 151 | * |
152 | * Returns: -ENOMEM if memory fails. | 152 | * Returns: -ENOMEM if memory fails. |
153 | */ | 153 | */ |
154 | int cpupri_init(struct cpupri *cp, bool bootmem) | 154 | int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) |
155 | { | 155 | { |
156 | int i; | 156 | int i; |
157 | 157 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 56c0efe902a7..8e1352c75557 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -386,20 +386,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
386 | #endif | 386 | #endif |
387 | 387 | ||
388 | /* | 388 | /* |
389 | * delta *= P[w / rw] | ||
390 | */ | ||
391 | static inline unsigned long | ||
392 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
393 | { | ||
394 | for_each_sched_entity(se) { | ||
395 | delta = calc_delta_mine(delta, | ||
396 | se->load.weight, &cfs_rq_of(se)->load); | ||
397 | } | ||
398 | |||
399 | return delta; | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * delta /= w | 389 | * delta /= w |
404 | */ | 390 | */ |
405 | static inline unsigned long | 391 | static inline unsigned long |
@@ -440,12 +426,20 @@ static u64 __sched_period(unsigned long nr_running) | |||
440 | */ | 426 | */ |
441 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 427 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
442 | { | 428 | { |
443 | unsigned long nr_running = cfs_rq->nr_running; | 429 | u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); |
430 | |||
431 | for_each_sched_entity(se) { | ||
432 | struct load_weight *load = &cfs_rq->load; | ||
444 | 433 | ||
445 | if (unlikely(!se->on_rq)) | 434 | if (unlikely(!se->on_rq)) { |
446 | nr_running++; | 435 | struct load_weight lw = cfs_rq->load; |
447 | 436 | ||
448 | return calc_delta_weight(__sched_period(nr_running), se); | 437 | update_load_add(&lw, se->load.weight); |
438 | load = &lw; | ||
439 | } | ||
440 | slice = calc_delta_mine(slice, se->load.weight, load); | ||
441 | } | ||
442 | return slice; | ||
449 | } | 443 | } |
450 | 444 | ||
451 | /* | 445 | /* |
@@ -1623,8 +1617,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
1623 | } | 1617 | } |
1624 | } | 1618 | } |
1625 | 1619 | ||
1626 | #define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0) | ||
1627 | |||
1628 | /* | 1620 | /* |
1629 | * Share the fairness runtime between parent and child, thus the | 1621 | * Share the fairness runtime between parent and child, thus the |
1630 | * total amount of pressure for CPU stays equal - new tasks | 1622 | * total amount of pressure for CPU stays equal - new tasks |
diff --git a/kernel/signal.c b/kernel/signal.c index 8e95855ff3cf..3152ac3b62e2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -858,7 +858,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
858 | q->info.si_signo = sig; | 858 | q->info.si_signo = sig; |
859 | q->info.si_errno = 0; | 859 | q->info.si_errno = 0; |
860 | q->info.si_code = SI_USER; | 860 | q->info.si_code = SI_USER; |
861 | q->info.si_pid = task_pid_vnr(current); | 861 | q->info.si_pid = task_tgid_nr_ns(current, |
862 | task_active_pid_ns(t)); | ||
862 | q->info.si_uid = current_uid(); | 863 | q->info.si_uid = current_uid(); |
863 | break; | 864 | break; |
864 | case (unsigned long) SEND_SIG_PRIV: | 865 | case (unsigned long) SEND_SIG_PRIV: |
diff --git a/kernel/sys.c b/kernel/sys.c index d356d79e84ac..763c3c17ded3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/task_io_accounting_ops.h> | 33 | #include <linux/task_io_accounting_ops.h> |
34 | #include <linux/seccomp.h> | 34 | #include <linux/seccomp.h> |
35 | #include <linux/cpu.h> | 35 | #include <linux/cpu.h> |
36 | #include <linux/ptrace.h> | ||
36 | 37 | ||
37 | #include <linux/compat.h> | 38 | #include <linux/compat.h> |
38 | #include <linux/syscalls.h> | 39 | #include <linux/syscalls.h> |
@@ -927,6 +928,7 @@ asmlinkage long sys_times(struct tms __user * tbuf) | |||
927 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) | 928 | if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) |
928 | return -EFAULT; | 929 | return -EFAULT; |
929 | } | 930 | } |
931 | force_successful_syscall_return(); | ||
930 | return (long) jiffies_64_to_clock_t(get_jiffies_64()); | 932 | return (long) jiffies_64_to_clock_t(get_jiffies_64()); |
931 | } | 933 | } |
932 | 934 | ||
@@ -1627,6 +1629,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) | |||
1627 | utime = stime = cputime_zero; | 1629 | utime = stime = cputime_zero; |
1628 | 1630 | ||
1629 | if (who == RUSAGE_THREAD) { | 1631 | if (who == RUSAGE_THREAD) { |
1632 | utime = task_utime(current); | ||
1633 | stime = task_stime(current); | ||
1630 | accumulate_thread_rusage(p, r); | 1634 | accumulate_thread_rusage(p, r); |
1631 | goto out; | 1635 | goto out; |
1632 | } | 1636 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff6d45c7626f..89d74436318c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -82,15 +82,14 @@ extern int percpu_pagelist_fraction; | |||
82 | extern int compat_log; | 82 | extern int compat_log; |
83 | extern int latencytop_enabled; | 83 | extern int latencytop_enabled; |
84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; | 84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; |
85 | #ifndef CONFIG_MMU | ||
86 | extern int sysctl_nr_trim_pages; | ||
87 | #endif | ||
85 | #ifdef CONFIG_RCU_TORTURE_TEST | 88 | #ifdef CONFIG_RCU_TORTURE_TEST |
86 | extern int rcutorture_runnable; | 89 | extern int rcutorture_runnable; |
87 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 90 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
88 | 91 | ||
89 | /* Constants used for minimum and maximum */ | 92 | /* Constants used for minimum and maximum */ |
90 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP) | ||
91 | static int one = 1; | ||
92 | #endif | ||
93 | |||
94 | #ifdef CONFIG_DETECT_SOFTLOCKUP | 93 | #ifdef CONFIG_DETECT_SOFTLOCKUP |
95 | static int sixty = 60; | 94 | static int sixty = 60; |
96 | static int neg_one = -1; | 95 | static int neg_one = -1; |
@@ -101,6 +100,7 @@ static int two = 2; | |||
101 | #endif | 100 | #endif |
102 | 101 | ||
103 | static int zero; | 102 | static int zero; |
103 | static int one = 1; | ||
104 | static int one_hundred = 100; | 104 | static int one_hundred = 100; |
105 | 105 | ||
106 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 106 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
@@ -952,12 +952,22 @@ static struct ctl_table vm_table[] = { | |||
952 | .data = &dirty_background_ratio, | 952 | .data = &dirty_background_ratio, |
953 | .maxlen = sizeof(dirty_background_ratio), | 953 | .maxlen = sizeof(dirty_background_ratio), |
954 | .mode = 0644, | 954 | .mode = 0644, |
955 | .proc_handler = &proc_dointvec_minmax, | 955 | .proc_handler = &dirty_background_ratio_handler, |
956 | .strategy = &sysctl_intvec, | 956 | .strategy = &sysctl_intvec, |
957 | .extra1 = &zero, | 957 | .extra1 = &zero, |
958 | .extra2 = &one_hundred, | 958 | .extra2 = &one_hundred, |
959 | }, | 959 | }, |
960 | { | 960 | { |
961 | .ctl_name = CTL_UNNUMBERED, | ||
962 | .procname = "dirty_background_bytes", | ||
963 | .data = &dirty_background_bytes, | ||
964 | .maxlen = sizeof(dirty_background_bytes), | ||
965 | .mode = 0644, | ||
966 | .proc_handler = &dirty_background_bytes_handler, | ||
967 | .strategy = &sysctl_intvec, | ||
968 | .extra1 = &one, | ||
969 | }, | ||
970 | { | ||
961 | .ctl_name = VM_DIRTY_RATIO, | 971 | .ctl_name = VM_DIRTY_RATIO, |
962 | .procname = "dirty_ratio", | 972 | .procname = "dirty_ratio", |
963 | .data = &vm_dirty_ratio, | 973 | .data = &vm_dirty_ratio, |
@@ -969,6 +979,16 @@ static struct ctl_table vm_table[] = { | |||
969 | .extra2 = &one_hundred, | 979 | .extra2 = &one_hundred, |
970 | }, | 980 | }, |
971 | { | 981 | { |
982 | .ctl_name = CTL_UNNUMBERED, | ||
983 | .procname = "dirty_bytes", | ||
984 | .data = &vm_dirty_bytes, | ||
985 | .maxlen = sizeof(vm_dirty_bytes), | ||
986 | .mode = 0644, | ||
987 | .proc_handler = &dirty_bytes_handler, | ||
988 | .strategy = &sysctl_intvec, | ||
989 | .extra1 = &one, | ||
990 | }, | ||
991 | { | ||
972 | .procname = "dirty_writeback_centisecs", | 992 | .procname = "dirty_writeback_centisecs", |
973 | .data = &dirty_writeback_interval, | 993 | .data = &dirty_writeback_interval, |
974 | .maxlen = sizeof(dirty_writeback_interval), | 994 | .maxlen = sizeof(dirty_writeback_interval), |
@@ -1085,6 +1105,17 @@ static struct ctl_table vm_table[] = { | |||
1085 | .mode = 0644, | 1105 | .mode = 0644, |
1086 | .proc_handler = &proc_dointvec | 1106 | .proc_handler = &proc_dointvec |
1087 | }, | 1107 | }, |
1108 | #else | ||
1109 | { | ||
1110 | .ctl_name = CTL_UNNUMBERED, | ||
1111 | .procname = "nr_trim_pages", | ||
1112 | .data = &sysctl_nr_trim_pages, | ||
1113 | .maxlen = sizeof(sysctl_nr_trim_pages), | ||
1114 | .mode = 0644, | ||
1115 | .proc_handler = &proc_dointvec_minmax, | ||
1116 | .strategy = &sysctl_intvec, | ||
1117 | .extra1 = &zero, | ||
1118 | }, | ||
1088 | #endif | 1119 | #endif |
1089 | { | 1120 | { |
1090 | .ctl_name = VM_LAPTOP_MODE, | 1121 | .ctl_name = VM_LAPTOP_MODE, |
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index 06b6395b45b2..4f104515a19b 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c | |||
@@ -22,21 +22,11 @@ | |||
22 | 22 | ||
23 | static u32 rand1, preh_val, posth_val, jph_val; | 23 | static u32 rand1, preh_val, posth_val, jph_val; |
24 | static int errors, handler_errors, num_tests; | 24 | static int errors, handler_errors, num_tests; |
25 | static u32 (*target)(u32 value); | ||
26 | static u32 (*target2)(u32 value); | ||
25 | 27 | ||
26 | static noinline u32 kprobe_target(u32 value) | 28 | static noinline u32 kprobe_target(u32 value) |
27 | { | 29 | { |
28 | /* | ||
29 | * gcc ignores noinline on some architectures unless we stuff | ||
30 | * sufficient lard into the function. The get_kprobe() here is | ||
31 | * just for that. | ||
32 | * | ||
33 | * NOTE: We aren't concerned about the correctness of get_kprobe() | ||
34 | * here; hence, this call is neither under !preempt nor with the | ||
35 | * kprobe_mutex held. This is fine(tm) | ||
36 | */ | ||
37 | if (get_kprobe((void *)0xdeadbeef)) | ||
38 | printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n"); | ||
39 | |||
40 | return (value / div_factor); | 30 | return (value / div_factor); |
41 | } | 31 | } |
42 | 32 | ||
@@ -74,7 +64,7 @@ static int test_kprobe(void) | |||
74 | return ret; | 64 | return ret; |
75 | } | 65 | } |
76 | 66 | ||
77 | ret = kprobe_target(rand1); | 67 | ret = target(rand1); |
78 | unregister_kprobe(&kp); | 68 | unregister_kprobe(&kp); |
79 | 69 | ||
80 | if (preh_val == 0) { | 70 | if (preh_val == 0) { |
@@ -92,6 +82,84 @@ static int test_kprobe(void) | |||
92 | return 0; | 82 | return 0; |
93 | } | 83 | } |
94 | 84 | ||
85 | static noinline u32 kprobe_target2(u32 value) | ||
86 | { | ||
87 | return (value / div_factor) + 1; | ||
88 | } | ||
89 | |||
90 | static int kp_pre_handler2(struct kprobe *p, struct pt_regs *regs) | ||
91 | { | ||
92 | preh_val = (rand1 / div_factor) + 1; | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs, | ||
97 | unsigned long flags) | ||
98 | { | ||
99 | if (preh_val != (rand1 / div_factor) + 1) { | ||
100 | handler_errors++; | ||
101 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
102 | "incorrect value in post_handler2\n"); | ||
103 | } | ||
104 | posth_val = preh_val + div_factor; | ||
105 | } | ||
106 | |||
107 | static struct kprobe kp2 = { | ||
108 | .symbol_name = "kprobe_target2", | ||
109 | .pre_handler = kp_pre_handler2, | ||
110 | .post_handler = kp_post_handler2 | ||
111 | }; | ||
112 | |||
113 | static int test_kprobes(void) | ||
114 | { | ||
115 | int ret; | ||
116 | struct kprobe *kps[2] = {&kp, &kp2}; | ||
117 | |||
118 | kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | ||
119 | ret = register_kprobes(kps, 2); | ||
120 | if (ret < 0) { | ||
121 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
122 | "register_kprobes returned %d\n", ret); | ||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | preh_val = 0; | ||
127 | posth_val = 0; | ||
128 | ret = target(rand1); | ||
129 | |||
130 | if (preh_val == 0) { | ||
131 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
132 | "kprobe pre_handler not called\n"); | ||
133 | handler_errors++; | ||
134 | } | ||
135 | |||
136 | if (posth_val == 0) { | ||
137 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
138 | "kprobe post_handler not called\n"); | ||
139 | handler_errors++; | ||
140 | } | ||
141 | |||
142 | preh_val = 0; | ||
143 | posth_val = 0; | ||
144 | ret = target2(rand1); | ||
145 | |||
146 | if (preh_val == 0) { | ||
147 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
148 | "kprobe pre_handler2 not called\n"); | ||
149 | handler_errors++; | ||
150 | } | ||
151 | |||
152 | if (posth_val == 0) { | ||
153 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
154 | "kprobe post_handler2 not called\n"); | ||
155 | handler_errors++; | ||
156 | } | ||
157 | |||
158 | unregister_kprobes(kps, 2); | ||
159 | return 0; | ||
160 | |||
161 | } | ||
162 | |||
95 | static u32 j_kprobe_target(u32 value) | 163 | static u32 j_kprobe_target(u32 value) |
96 | { | 164 | { |
97 | if (value != rand1) { | 165 | if (value != rand1) { |
@@ -121,7 +189,7 @@ static int test_jprobe(void) | |||
121 | return ret; | 189 | return ret; |
122 | } | 190 | } |
123 | 191 | ||
124 | ret = kprobe_target(rand1); | 192 | ret = target(rand1); |
125 | unregister_jprobe(&jp); | 193 | unregister_jprobe(&jp); |
126 | if (jph_val == 0) { | 194 | if (jph_val == 0) { |
127 | printk(KERN_ERR "Kprobe smoke test failed: " | 195 | printk(KERN_ERR "Kprobe smoke test failed: " |
@@ -132,6 +200,43 @@ static int test_jprobe(void) | |||
132 | return 0; | 200 | return 0; |
133 | } | 201 | } |
134 | 202 | ||
203 | static struct jprobe jp2 = { | ||
204 | .entry = j_kprobe_target, | ||
205 | .kp.symbol_name = "kprobe_target2" | ||
206 | }; | ||
207 | |||
208 | static int test_jprobes(void) | ||
209 | { | ||
210 | int ret; | ||
211 | struct jprobe *jps[2] = {&jp, &jp2}; | ||
212 | |||
213 | jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | ||
214 | ret = register_jprobes(jps, 2); | ||
215 | if (ret < 0) { | ||
216 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
217 | "register_jprobes returned %d\n", ret); | ||
218 | return ret; | ||
219 | } | ||
220 | |||
221 | jph_val = 0; | ||
222 | ret = target(rand1); | ||
223 | if (jph_val == 0) { | ||
224 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
225 | "jprobe handler not called\n"); | ||
226 | handler_errors++; | ||
227 | } | ||
228 | |||
229 | jph_val = 0; | ||
230 | ret = target2(rand1); | ||
231 | if (jph_val == 0) { | ||
232 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
233 | "jprobe handler2 not called\n"); | ||
234 | handler_errors++; | ||
235 | } | ||
236 | unregister_jprobes(jps, 2); | ||
237 | |||
238 | return 0; | ||
239 | } | ||
135 | #ifdef CONFIG_KRETPROBES | 240 | #ifdef CONFIG_KRETPROBES |
136 | static u32 krph_val; | 241 | static u32 krph_val; |
137 | 242 | ||
@@ -177,7 +282,7 @@ static int test_kretprobe(void) | |||
177 | return ret; | 282 | return ret; |
178 | } | 283 | } |
179 | 284 | ||
180 | ret = kprobe_target(rand1); | 285 | ret = target(rand1); |
181 | unregister_kretprobe(&rp); | 286 | unregister_kretprobe(&rp); |
182 | if (krph_val != rand1) { | 287 | if (krph_val != rand1) { |
183 | printk(KERN_ERR "Kprobe smoke test failed: " | 288 | printk(KERN_ERR "Kprobe smoke test failed: " |
@@ -187,12 +292,72 @@ static int test_kretprobe(void) | |||
187 | 292 | ||
188 | return 0; | 293 | return 0; |
189 | } | 294 | } |
295 | |||
296 | static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs) | ||
297 | { | ||
298 | unsigned long ret = regs_return_value(regs); | ||
299 | |||
300 | if (ret != (rand1 / div_factor) + 1) { | ||
301 | handler_errors++; | ||
302 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
303 | "incorrect value in kretprobe handler2\n"); | ||
304 | } | ||
305 | if (krph_val == 0) { | ||
306 | handler_errors++; | ||
307 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
308 | "call to kretprobe entry handler failed\n"); | ||
309 | } | ||
310 | |||
311 | krph_val = rand1; | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | static struct kretprobe rp2 = { | ||
316 | .handler = return_handler2, | ||
317 | .entry_handler = entry_handler, | ||
318 | .kp.symbol_name = "kprobe_target2" | ||
319 | }; | ||
320 | |||
321 | static int test_kretprobes(void) | ||
322 | { | ||
323 | int ret; | ||
324 | struct kretprobe *rps[2] = {&rp, &rp2}; | ||
325 | |||
326 | rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ | ||
327 | ret = register_kretprobes(rps, 2); | ||
328 | if (ret < 0) { | ||
329 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
330 | "register_kretprobe returned %d\n", ret); | ||
331 | return ret; | ||
332 | } | ||
333 | |||
334 | krph_val = 0; | ||
335 | ret = target(rand1); | ||
336 | if (krph_val != rand1) { | ||
337 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
338 | "kretprobe handler not called\n"); | ||
339 | handler_errors++; | ||
340 | } | ||
341 | |||
342 | krph_val = 0; | ||
343 | ret = target2(rand1); | ||
344 | if (krph_val != rand1) { | ||
345 | printk(KERN_ERR "Kprobe smoke test failed: " | ||
346 | "kretprobe handler2 not called\n"); | ||
347 | handler_errors++; | ||
348 | } | ||
349 | unregister_kretprobes(rps, 2); | ||
350 | return 0; | ||
351 | } | ||
190 | #endif /* CONFIG_KRETPROBES */ | 352 | #endif /* CONFIG_KRETPROBES */ |
191 | 353 | ||
192 | int init_test_probes(void) | 354 | int init_test_probes(void) |
193 | { | 355 | { |
194 | int ret; | 356 | int ret; |
195 | 357 | ||
358 | target = kprobe_target; | ||
359 | target2 = kprobe_target2; | ||
360 | |||
196 | do { | 361 | do { |
197 | rand1 = random32(); | 362 | rand1 = random32(); |
198 | } while (rand1 <= div_factor); | 363 | } while (rand1 <= div_factor); |
@@ -204,15 +369,30 @@ int init_test_probes(void) | |||
204 | errors++; | 369 | errors++; |
205 | 370 | ||
206 | num_tests++; | 371 | num_tests++; |
372 | ret = test_kprobes(); | ||
373 | if (ret < 0) | ||
374 | errors++; | ||
375 | |||
376 | num_tests++; | ||
207 | ret = test_jprobe(); | 377 | ret = test_jprobe(); |
208 | if (ret < 0) | 378 | if (ret < 0) |
209 | errors++; | 379 | errors++; |
210 | 380 | ||
381 | num_tests++; | ||
382 | ret = test_jprobes(); | ||
383 | if (ret < 0) | ||
384 | errors++; | ||
385 | |||
211 | #ifdef CONFIG_KRETPROBES | 386 | #ifdef CONFIG_KRETPROBES |
212 | num_tests++; | 387 | num_tests++; |
213 | ret = test_kretprobe(); | 388 | ret = test_kretprobe(); |
214 | if (ret < 0) | 389 | if (ret < 0) |
215 | errors++; | 390 | errors++; |
391 | |||
392 | num_tests++; | ||
393 | ret = test_kretprobes(); | ||
394 | if (ret < 0) | ||
395 | errors++; | ||
216 | #endif /* CONFIG_KRETPROBES */ | 396 | #endif /* CONFIG_KRETPROBES */ |
217 | 397 | ||
218 | if (errors) | 398 | if (errors) |
diff --git a/kernel/time.c b/kernel/time.c index d63a4336fad6..4886e3ce83a4 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/fs.h> | 37 | #include <linux/fs.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/math64.h> | 39 | #include <linux/math64.h> |
40 | #include <linux/ptrace.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | #include <asm/unistd.h> | 43 | #include <asm/unistd.h> |
@@ -65,8 +66,9 @@ asmlinkage long sys_time(time_t __user * tloc) | |||
65 | 66 | ||
66 | if (tloc) { | 67 | if (tloc) { |
67 | if (put_user(i,tloc)) | 68 | if (put_user(i,tloc)) |
68 | i = -EFAULT; | 69 | return -EFAULT; |
69 | } | 70 | } |
71 | force_successful_syscall_return(); | ||
70 | return i; | 72 | return i; |
71 | } | 73 | } |
72 | 74 | ||
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 1ca99557e929..06f197560f3b 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -45,7 +45,7 @@ | |||
45 | * | 45 | * |
46 | * The value 8 is somewhat carefully chosen, as anything | 46 | * The value 8 is somewhat carefully chosen, as anything |
47 | * larger can result in overflows. NSEC_PER_JIFFY grows as | 47 | * larger can result in overflows. NSEC_PER_JIFFY grows as |
48 | * HZ shrinks, so values greater then 8 overflow 32bits when | 48 | * HZ shrinks, so values greater than 8 overflow 32bits when |
49 | * HZ=100. | 49 | * HZ=100. |
50 | */ | 50 | */ |
51 | #define JIFFIES_SHIFT 8 | 51 | #define JIFFIES_SHIFT 8 |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fa05e88aa76f..900f1b6598d1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -46,6 +46,9 @@ struct timespec xtime __attribute__ ((aligned (16))); | |||
46 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); | 46 | struct timespec wall_to_monotonic __attribute__ ((aligned (16))); |
47 | static unsigned long total_sleep_time; /* seconds */ | 47 | static unsigned long total_sleep_time; /* seconds */ |
48 | 48 | ||
49 | /* flag for if timekeeping is suspended */ | ||
50 | int __read_mostly timekeeping_suspended; | ||
51 | |||
49 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | 52 | static struct timespec xtime_cache __attribute__ ((aligned (16))); |
50 | void update_xtime_cache(u64 nsec) | 53 | void update_xtime_cache(u64 nsec) |
51 | { | 54 | { |
@@ -92,6 +95,8 @@ void getnstimeofday(struct timespec *ts) | |||
92 | unsigned long seq; | 95 | unsigned long seq; |
93 | s64 nsecs; | 96 | s64 nsecs; |
94 | 97 | ||
98 | WARN_ON(timekeeping_suspended); | ||
99 | |||
95 | do { | 100 | do { |
96 | seq = read_seqbegin(&xtime_lock); | 101 | seq = read_seqbegin(&xtime_lock); |
97 | 102 | ||
@@ -299,8 +304,6 @@ void __init timekeeping_init(void) | |||
299 | write_sequnlock_irqrestore(&xtime_lock, flags); | 304 | write_sequnlock_irqrestore(&xtime_lock, flags); |
300 | } | 305 | } |
301 | 306 | ||
302 | /* flag for if timekeeping is suspended */ | ||
303 | static int timekeeping_suspended; | ||
304 | /* time in seconds when suspend began */ | 307 | /* time in seconds when suspend began */ |
305 | static unsigned long timekeeping_suspend_time; | 308 | static unsigned long timekeeping_suspend_time; |
306 | 309 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a9d9760dc7b6..8b0daf0662ef 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -168,7 +168,13 @@ rb_event_length(struct ring_buffer_event *event) | |||
168 | */ | 168 | */ |
169 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 169 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
170 | { | 170 | { |
171 | return rb_event_length(event); | 171 | unsigned length = rb_event_length(event); |
172 | if (event->type != RINGBUF_TYPE_DATA) | ||
173 | return length; | ||
174 | length -= RB_EVNT_HDR_SIZE; | ||
175 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) | ||
176 | length -= sizeof(event->array[0]); | ||
177 | return length; | ||
172 | } | 178 | } |
173 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); | 179 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); |
174 | 180 | ||
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 2dc06ab35716..43f891b05a4b 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
@@ -92,8 +92,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) | |||
92 | mm = get_task_mm(p); | 92 | mm = get_task_mm(p); |
93 | if (mm) { | 93 | if (mm) { |
94 | /* adjust to KB unit */ | 94 | /* adjust to KB unit */ |
95 | stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB; | 95 | stats->hiwater_rss = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB; |
96 | stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB; | 96 | stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; |
97 | mmput(mm); | 97 | mmput(mm); |
98 | } | 98 | } |
99 | stats->read_char = p->ioac.rchar; | 99 | stats->read_char = p->ioac.rchar; |