diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-01-10 21:41:39 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-10 21:41:39 -0500 |
commit | abede81c4fb2e3b85d8760f25e3da39d2c69a134 (patch) | |
tree | 26c893ec108d837eb9171d678c55a1cea7b22af4 /kernel | |
parent | c9d557c19f94df42db78d4a5de4d25feee694bad (diff) | |
parent | c59765042f53a79a7a65585042ff463b69cb248c (diff) |
Merge commit 'v2.6.29-rc1' into core/urgent
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 3 | ||||
-rw-r--r-- | kernel/async.c | 335 | ||||
-rw-r--r-- | kernel/cgroup.c | 276 | ||||
-rw-r--r-- | kernel/cpuset.c | 251 | ||||
-rw-r--r-- | kernel/cred.c | 5 | ||||
-rw-r--r-- | kernel/fork.c | 8 | ||||
-rw-r--r-- | kernel/irq/autoprobe.c | 5 | ||||
-rw-r--r-- | kernel/module.c | 2 | ||||
-rw-r--r-- | kernel/ns_cgroup.c | 2 | ||||
-rw-r--r-- | kernel/pid.c | 6 | ||||
-rw-r--r-- | kernel/power/disk.c | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 370 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 122 | ||||
-rw-r--r-- | kernel/res_counter.c | 44 | ||||
-rw-r--r-- | kernel/resource.c | 61 | ||||
-rw-r--r-- | kernel/sched.c | 5 | ||||
-rw-r--r-- | kernel/sched_fair.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 14 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 8 |
19 files changed, 1129 insertions, 396 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index e1c5bf3365c0..2921d90ce32f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o | 12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ |
13 | async.o | ||
13 | 14 | ||
14 | ifdef CONFIG_FUNCTION_TRACER | 15 | ifdef CONFIG_FUNCTION_TRACER |
15 | # Do not trace debug files and internal ftrace files | 16 | # Do not trace debug files and internal ftrace files |
diff --git a/kernel/async.c b/kernel/async.c new file mode 100644 index 000000000000..f286e9f2b736 --- /dev/null +++ b/kernel/async.c | |||
@@ -0,0 +1,335 @@ | |||
1 | /* | ||
2 | * async.c: Asynchronous function calls for boot performance | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * Author: Arjan van de Ven <arjan@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | */ | ||
12 | |||
13 | |||
14 | /* | ||
15 | |||
16 | Goals and Theory of Operation | ||
17 | |||
18 | The primary goal of this feature is to reduce the kernel boot time, | ||
19 | by doing various independent hardware delays and discovery operations | ||
20 | decoupled and not strictly serialized. | ||
21 | |||
22 | More specifically, the asynchronous function call concept allows | ||
23 | certain operations (primarily during system boot) to happen | ||
24 | asynchronously, out of order, while these operations still | ||
25 | have their externally visible parts happen sequentially and in-order. | ||
26 | (not unlike how out-of-order CPUs retire their instructions in order) | ||
27 | |||
28 | Key to the asynchronous function call implementation is the concept of | ||
29 | a "sequence cookie" (which, although it has an abstracted type, can be | ||
30 | thought of as a monotonically incrementing number). | ||
31 | |||
32 | The async core will assign each scheduled event such a sequence cookie and | ||
33 | pass this to the called functions. | ||
34 | |||
35 | The asynchronously called function should before doing a globally visible | ||
36 | operation, such as registering device numbers, call the | ||
37 | async_synchronize_cookie() function and pass in its own cookie. The | ||
38 | async_synchronize_cookie() function will make sure that all asynchronous | ||
39 | operations that were scheduled prior to the operation corresponding with the | ||
40 | cookie have completed. | ||
41 | |||
42 | Subsystem/driver initialization code that scheduled asynchronous probe | ||
43 | functions, but which shares global resources with other drivers/subsystems | ||
44 | that do not use the asynchronous call feature, need to do a full | ||
45 | synchronization with the async_synchronize_full() function, before returning | ||
46 | from their init function. This is to maintain strict ordering between the | ||
47 | asynchronous and synchronous parts of the kernel. | ||
48 | |||
49 | */ | ||
50 | |||
51 | #include <linux/async.h> | ||
52 | #include <linux/module.h> | ||
53 | #include <linux/wait.h> | ||
54 | #include <linux/sched.h> | ||
55 | #include <linux/init.h> | ||
56 | #include <linux/kthread.h> | ||
57 | #include <asm/atomic.h> | ||
58 | |||
59 | static async_cookie_t next_cookie = 1; | ||
60 | |||
61 | #define MAX_THREADS 256 | ||
62 | #define MAX_WORK 32768 | ||
63 | |||
64 | static LIST_HEAD(async_pending); | ||
65 | static LIST_HEAD(async_running); | ||
66 | static DEFINE_SPINLOCK(async_lock); | ||
67 | |||
68 | static int async_enabled = 0; | ||
69 | |||
70 | struct async_entry { | ||
71 | struct list_head list; | ||
72 | async_cookie_t cookie; | ||
73 | async_func_ptr *func; | ||
74 | void *data; | ||
75 | struct list_head *running; | ||
76 | }; | ||
77 | |||
78 | static DECLARE_WAIT_QUEUE_HEAD(async_done); | ||
79 | static DECLARE_WAIT_QUEUE_HEAD(async_new); | ||
80 | |||
81 | static atomic_t entry_count; | ||
82 | static atomic_t thread_count; | ||
83 | |||
84 | extern int initcall_debug; | ||
85 | |||
86 | |||
87 | /* | ||
88 | * MUST be called with the lock held! | ||
89 | */ | ||
90 | static async_cookie_t __lowest_in_progress(struct list_head *running) | ||
91 | { | ||
92 | struct async_entry *entry; | ||
93 | if (!list_empty(&async_pending)) { | ||
94 | entry = list_first_entry(&async_pending, | ||
95 | struct async_entry, list); | ||
96 | return entry->cookie; | ||
97 | } else if (!list_empty(running)) { | ||
98 | entry = list_first_entry(running, | ||
99 | struct async_entry, list); | ||
100 | return entry->cookie; | ||
101 | } else { | ||
102 | /* nothing in progress... next_cookie is "infinity" */ | ||
103 | return next_cookie; | ||
104 | } | ||
105 | |||
106 | } | ||
107 | /* | ||
108 | * pick the first pending entry and run it | ||
109 | */ | ||
110 | static void run_one_entry(void) | ||
111 | { | ||
112 | unsigned long flags; | ||
113 | struct async_entry *entry; | ||
114 | ktime_t calltime, delta, rettime; | ||
115 | |||
116 | /* 1) pick one task from the pending queue */ | ||
117 | |||
118 | spin_lock_irqsave(&async_lock, flags); | ||
119 | if (list_empty(&async_pending)) | ||
120 | goto out; | ||
121 | entry = list_first_entry(&async_pending, struct async_entry, list); | ||
122 | |||
123 | /* 2) move it to the running queue */ | ||
124 | list_del(&entry->list); | ||
125 | list_add_tail(&entry->list, &async_running); | ||
126 | spin_unlock_irqrestore(&async_lock, flags); | ||
127 | |||
128 | /* 3) run it (and print duration)*/ | ||
129 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
130 | printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); | ||
131 | calltime = ktime_get(); | ||
132 | } | ||
133 | entry->func(entry->data, entry->cookie); | ||
134 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
135 | rettime = ktime_get(); | ||
136 | delta = ktime_sub(rettime, calltime); | ||
137 | printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, | ||
138 | entry->func, ktime_to_ns(delta) >> 10); | ||
139 | } | ||
140 | |||
141 | /* 4) remove it from the running queue */ | ||
142 | spin_lock_irqsave(&async_lock, flags); | ||
143 | list_del(&entry->list); | ||
144 | |||
145 | /* 5) free the entry */ | ||
146 | kfree(entry); | ||
147 | atomic_dec(&entry_count); | ||
148 | |||
149 | spin_unlock_irqrestore(&async_lock, flags); | ||
150 | |||
151 | /* 6) wake up any waiters. */ | ||
152 | wake_up(&async_done); | ||
153 | return; | ||
154 | |||
155 | out: | ||
156 | spin_unlock_irqrestore(&async_lock, flags); | ||
157 | } | ||
158 | |||
159 | |||
160 | static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running) | ||
161 | { | ||
162 | struct async_entry *entry; | ||
163 | unsigned long flags; | ||
164 | async_cookie_t newcookie; | ||
165 | |||
166 | |||
167 | /* allow irq-off callers */ | ||
168 | entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC); | ||
169 | |||
170 | /* | ||
171 | * If we're out of memory or if there's too much work | ||
172 | * pending already, we execute synchronously. | ||
173 | */ | ||
174 | if (!async_enabled || !entry || atomic_read(&entry_count) > MAX_WORK) { | ||
175 | kfree(entry); | ||
176 | spin_lock_irqsave(&async_lock, flags); | ||
177 | newcookie = next_cookie++; | ||
178 | spin_unlock_irqrestore(&async_lock, flags); | ||
179 | |||
180 | /* low on memory.. run synchronously */ | ||
181 | ptr(data, newcookie); | ||
182 | return newcookie; | ||
183 | } | ||
184 | entry->func = ptr; | ||
185 | entry->data = data; | ||
186 | entry->running = running; | ||
187 | |||
188 | spin_lock_irqsave(&async_lock, flags); | ||
189 | newcookie = entry->cookie = next_cookie++; | ||
190 | list_add_tail(&entry->list, &async_pending); | ||
191 | atomic_inc(&entry_count); | ||
192 | spin_unlock_irqrestore(&async_lock, flags); | ||
193 | wake_up(&async_new); | ||
194 | return newcookie; | ||
195 | } | ||
196 | |||
197 | async_cookie_t async_schedule(async_func_ptr *ptr, void *data) | ||
198 | { | ||
199 | return __async_schedule(ptr, data, &async_pending); | ||
200 | } | ||
201 | EXPORT_SYMBOL_GPL(async_schedule); | ||
202 | |||
203 | async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) | ||
204 | { | ||
205 | return __async_schedule(ptr, data, running); | ||
206 | } | ||
207 | EXPORT_SYMBOL_GPL(async_schedule_special); | ||
208 | |||
209 | void async_synchronize_full(void) | ||
210 | { | ||
211 | do { | ||
212 | async_synchronize_cookie(next_cookie); | ||
213 | } while (!list_empty(&async_running) || !list_empty(&async_pending)); | ||
214 | } | ||
215 | EXPORT_SYMBOL_GPL(async_synchronize_full); | ||
216 | |||
217 | void async_synchronize_full_special(struct list_head *list) | ||
218 | { | ||
219 | async_synchronize_cookie_special(next_cookie, list); | ||
220 | } | ||
221 | EXPORT_SYMBOL_GPL(async_synchronize_full_special); | ||
222 | |||
223 | void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) | ||
224 | { | ||
225 | ktime_t starttime, delta, endtime; | ||
226 | |||
227 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
228 | printk("async_waiting @ %i\n", task_pid_nr(current)); | ||
229 | starttime = ktime_get(); | ||
230 | } | ||
231 | |||
232 | wait_event(async_done, __lowest_in_progress(running) >= cookie); | ||
233 | |||
234 | if (initcall_debug && system_state == SYSTEM_BOOTING) { | ||
235 | endtime = ktime_get(); | ||
236 | delta = ktime_sub(endtime, starttime); | ||
237 | |||
238 | printk("async_continuing @ %i after %lli usec\n", | ||
239 | task_pid_nr(current), ktime_to_ns(delta) >> 10); | ||
240 | } | ||
241 | } | ||
242 | EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); | ||
243 | |||
244 | void async_synchronize_cookie(async_cookie_t cookie) | ||
245 | { | ||
246 | async_synchronize_cookie_special(cookie, &async_running); | ||
247 | } | ||
248 | EXPORT_SYMBOL_GPL(async_synchronize_cookie); | ||
249 | |||
250 | |||
251 | static int async_thread(void *unused) | ||
252 | { | ||
253 | DECLARE_WAITQUEUE(wq, current); | ||
254 | add_wait_queue(&async_new, &wq); | ||
255 | |||
256 | while (!kthread_should_stop()) { | ||
257 | int ret = HZ; | ||
258 | set_current_state(TASK_INTERRUPTIBLE); | ||
259 | /* | ||
260 | * check the list head without lock.. false positives | ||
261 | * are dealt with inside run_one_entry() while holding | ||
262 | * the lock. | ||
263 | */ | ||
264 | rmb(); | ||
265 | if (!list_empty(&async_pending)) | ||
266 | run_one_entry(); | ||
267 | else | ||
268 | ret = schedule_timeout(HZ); | ||
269 | |||
270 | if (ret == 0) { | ||
271 | /* | ||
272 | * we timed out, this means we as thread are redundant. | ||
273 | * we sign off and die, but we to avoid any races there | ||
274 | * is a last-straw check to see if work snuck in. | ||
275 | */ | ||
276 | atomic_dec(&thread_count); | ||
277 | wmb(); /* manager must see our departure first */ | ||
278 | if (list_empty(&async_pending)) | ||
279 | break; | ||
280 | /* | ||
281 | * woops work came in between us timing out and us | ||
282 | * signing off; we need to stay alive and keep working. | ||
283 | */ | ||
284 | atomic_inc(&thread_count); | ||
285 | } | ||
286 | } | ||
287 | remove_wait_queue(&async_new, &wq); | ||
288 | |||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | static int async_manager_thread(void *unused) | ||
293 | { | ||
294 | DECLARE_WAITQUEUE(wq, current); | ||
295 | add_wait_queue(&async_new, &wq); | ||
296 | |||
297 | while (!kthread_should_stop()) { | ||
298 | int tc, ec; | ||
299 | |||
300 | set_current_state(TASK_INTERRUPTIBLE); | ||
301 | |||
302 | tc = atomic_read(&thread_count); | ||
303 | rmb(); | ||
304 | ec = atomic_read(&entry_count); | ||
305 | |||
306 | while (tc < ec && tc < MAX_THREADS) { | ||
307 | kthread_run(async_thread, NULL, "async/%i", tc); | ||
308 | atomic_inc(&thread_count); | ||
309 | tc++; | ||
310 | } | ||
311 | |||
312 | schedule(); | ||
313 | } | ||
314 | remove_wait_queue(&async_new, &wq); | ||
315 | |||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static int __init async_init(void) | ||
320 | { | ||
321 | if (async_enabled) | ||
322 | kthread_run(async_manager_thread, NULL, "async/mgr"); | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | static int __init setup_async(char *str) | ||
327 | { | ||
328 | async_enabled = 1; | ||
329 | return 1; | ||
330 | } | ||
331 | |||
332 | __setup("fastboot", setup_async); | ||
333 | |||
334 | |||
335 | core_initcall(async_init); | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f221446aa02d..c29831076e7a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -84,7 +84,7 @@ struct cgroupfs_root { | |||
84 | /* Tracks how many cgroups are currently defined in hierarchy.*/ | 84 | /* Tracks how many cgroups are currently defined in hierarchy.*/ |
85 | int number_of_cgroups; | 85 | int number_of_cgroups; |
86 | 86 | ||
87 | /* A list running through the mounted hierarchies */ | 87 | /* A list running through the active hierarchies */ |
88 | struct list_head root_list; | 88 | struct list_head root_list; |
89 | 89 | ||
90 | /* Hierarchy-specific flags */ | 90 | /* Hierarchy-specific flags */ |
@@ -148,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
148 | #define for_each_subsys(_root, _ss) \ | 148 | #define for_each_subsys(_root, _ss) \ |
149 | list_for_each_entry(_ss, &_root->subsys_list, sibling) | 149 | list_for_each_entry(_ss, &_root->subsys_list, sibling) |
150 | 150 | ||
151 | /* for_each_root() allows you to iterate across the active hierarchies */ | 151 | /* for_each_active_root() allows you to iterate across the active hierarchies */ |
152 | #define for_each_root(_root) \ | 152 | #define for_each_active_root(_root) \ |
153 | list_for_each_entry(_root, &roots, root_list) | 153 | list_for_each_entry(_root, &roots, root_list) |
154 | 154 | ||
155 | /* the list of cgroups eligible for automatic release. Protected by | 155 | /* the list of cgroups eligible for automatic release. Protected by |
@@ -271,7 +271,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
271 | 271 | ||
272 | rcu_read_lock(); | 272 | rcu_read_lock(); |
273 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 273 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
274 | struct cgroup *cgrp = cg->subsys[i]->cgroup; | 274 | struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); |
275 | if (atomic_dec_and_test(&cgrp->count) && | 275 | if (atomic_dec_and_test(&cgrp->count) && |
276 | notify_on_release(cgrp)) { | 276 | notify_on_release(cgrp)) { |
277 | if (taskexit) | 277 | if (taskexit) |
@@ -384,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp) | |||
384 | return 0; | 384 | return 0; |
385 | } | 385 | } |
386 | 386 | ||
387 | /** | ||
388 | * link_css_set - a helper function to link a css_set to a cgroup | ||
389 | * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links() | ||
390 | * @cg: the css_set to be linked | ||
391 | * @cgrp: the destination cgroup | ||
392 | */ | ||
393 | static void link_css_set(struct list_head *tmp_cg_links, | ||
394 | struct css_set *cg, struct cgroup *cgrp) | ||
395 | { | ||
396 | struct cg_cgroup_link *link; | ||
397 | |||
398 | BUG_ON(list_empty(tmp_cg_links)); | ||
399 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, | ||
400 | cgrp_link_list); | ||
401 | link->cg = cg; | ||
402 | list_move(&link->cgrp_link_list, &cgrp->css_sets); | ||
403 | list_add(&link->cg_link_list, &cg->cg_links); | ||
404 | } | ||
405 | |||
387 | /* | 406 | /* |
388 | * find_css_set() takes an existing cgroup group and a | 407 | * find_css_set() takes an existing cgroup group and a |
389 | * cgroup object, and returns a css_set object that's | 408 | * cgroup object, and returns a css_set object that's |
@@ -399,7 +418,6 @@ static struct css_set *find_css_set( | |||
399 | int i; | 418 | int i; |
400 | 419 | ||
401 | struct list_head tmp_cg_links; | 420 | struct list_head tmp_cg_links; |
402 | struct cg_cgroup_link *link; | ||
403 | 421 | ||
404 | struct hlist_head *hhead; | 422 | struct hlist_head *hhead; |
405 | 423 | ||
@@ -444,26 +462,11 @@ static struct css_set *find_css_set( | |||
444 | * only do it for the first subsystem in each | 462 | * only do it for the first subsystem in each |
445 | * hierarchy | 463 | * hierarchy |
446 | */ | 464 | */ |
447 | if (ss->root->subsys_list.next == &ss->sibling) { | 465 | if (ss->root->subsys_list.next == &ss->sibling) |
448 | BUG_ON(list_empty(&tmp_cg_links)); | 466 | link_css_set(&tmp_cg_links, res, cgrp); |
449 | link = list_entry(tmp_cg_links.next, | ||
450 | struct cg_cgroup_link, | ||
451 | cgrp_link_list); | ||
452 | list_del(&link->cgrp_link_list); | ||
453 | list_add(&link->cgrp_link_list, &cgrp->css_sets); | ||
454 | link->cg = res; | ||
455 | list_add(&link->cg_link_list, &res->cg_links); | ||
456 | } | ||
457 | } | ||
458 | if (list_empty(&rootnode.subsys_list)) { | ||
459 | link = list_entry(tmp_cg_links.next, | ||
460 | struct cg_cgroup_link, | ||
461 | cgrp_link_list); | ||
462 | list_del(&link->cgrp_link_list); | ||
463 | list_add(&link->cgrp_link_list, &dummytop->css_sets); | ||
464 | link->cg = res; | ||
465 | list_add(&link->cg_link_list, &res->cg_links); | ||
466 | } | 467 | } |
468 | if (list_empty(&rootnode.subsys_list)) | ||
469 | link_css_set(&tmp_cg_links, res, dummytop); | ||
467 | 470 | ||
468 | BUG_ON(!list_empty(&tmp_cg_links)); | 471 | BUG_ON(!list_empty(&tmp_cg_links)); |
469 | 472 | ||
@@ -586,11 +589,18 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
586 | { | 589 | { |
587 | struct cgroup_subsys *ss; | 590 | struct cgroup_subsys *ss; |
588 | for_each_subsys(cgrp->root, ss) | 591 | for_each_subsys(cgrp->root, ss) |
589 | if (ss->pre_destroy && cgrp->subsys[ss->subsys_id]) | 592 | if (ss->pre_destroy) |
590 | ss->pre_destroy(ss, cgrp); | 593 | ss->pre_destroy(ss, cgrp); |
591 | return; | 594 | return; |
592 | } | 595 | } |
593 | 596 | ||
597 | static void free_cgroup_rcu(struct rcu_head *obj) | ||
598 | { | ||
599 | struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head); | ||
600 | |||
601 | kfree(cgrp); | ||
602 | } | ||
603 | |||
594 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 604 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
595 | { | 605 | { |
596 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 606 | /* is dentry a directory ? if so, kfree() associated cgroup */ |
@@ -610,19 +620,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
610 | /* | 620 | /* |
611 | * Release the subsystem state objects. | 621 | * Release the subsystem state objects. |
612 | */ | 622 | */ |
613 | for_each_subsys(cgrp->root, ss) { | 623 | for_each_subsys(cgrp->root, ss) |
614 | if (cgrp->subsys[ss->subsys_id]) | 624 | ss->destroy(ss, cgrp); |
615 | ss->destroy(ss, cgrp); | ||
616 | } | ||
617 | 625 | ||
618 | cgrp->root->number_of_cgroups--; | 626 | cgrp->root->number_of_cgroups--; |
619 | mutex_unlock(&cgroup_mutex); | 627 | mutex_unlock(&cgroup_mutex); |
620 | 628 | ||
621 | /* Drop the active superblock reference that we took when we | 629 | /* |
622 | * created the cgroup */ | 630 | * Drop the active superblock reference that we took when we |
631 | * created the cgroup | ||
632 | */ | ||
623 | deactivate_super(cgrp->root->sb); | 633 | deactivate_super(cgrp->root->sb); |
624 | 634 | ||
625 | kfree(cgrp); | 635 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); |
626 | } | 636 | } |
627 | iput(inode); | 637 | iput(inode); |
628 | } | 638 | } |
@@ -712,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
712 | BUG_ON(cgrp->subsys[i]); | 722 | BUG_ON(cgrp->subsys[i]); |
713 | BUG_ON(!dummytop->subsys[i]); | 723 | BUG_ON(!dummytop->subsys[i]); |
714 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); | 724 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); |
725 | mutex_lock(&ss->hierarchy_mutex); | ||
715 | cgrp->subsys[i] = dummytop->subsys[i]; | 726 | cgrp->subsys[i] = dummytop->subsys[i]; |
716 | cgrp->subsys[i]->cgroup = cgrp; | 727 | cgrp->subsys[i]->cgroup = cgrp; |
717 | list_add(&ss->sibling, &root->subsys_list); | 728 | list_move(&ss->sibling, &root->subsys_list); |
718 | rcu_assign_pointer(ss->root, root); | 729 | ss->root = root; |
719 | if (ss->bind) | 730 | if (ss->bind) |
720 | ss->bind(ss, cgrp); | 731 | ss->bind(ss, cgrp); |
721 | 732 | mutex_unlock(&ss->hierarchy_mutex); | |
722 | } else if (bit & removed_bits) { | 733 | } else if (bit & removed_bits) { |
723 | /* We're removing this subsystem */ | 734 | /* We're removing this subsystem */ |
724 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); | 735 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); |
725 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 736 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); |
737 | mutex_lock(&ss->hierarchy_mutex); | ||
726 | if (ss->bind) | 738 | if (ss->bind) |
727 | ss->bind(ss, dummytop); | 739 | ss->bind(ss, dummytop); |
728 | dummytop->subsys[i]->cgroup = dummytop; | 740 | dummytop->subsys[i]->cgroup = dummytop; |
729 | cgrp->subsys[i] = NULL; | 741 | cgrp->subsys[i] = NULL; |
730 | rcu_assign_pointer(subsys[i]->root, &rootnode); | 742 | subsys[i]->root = &rootnode; |
731 | list_del(&ss->sibling); | 743 | list_move(&ss->sibling, &rootnode.subsys_list); |
744 | mutex_unlock(&ss->hierarchy_mutex); | ||
732 | } else if (bit & final_bits) { | 745 | } else if (bit & final_bits) { |
733 | /* Subsystem state should already exist */ | 746 | /* Subsystem state should already exist */ |
734 | BUG_ON(!cgrp->subsys[i]); | 747 | BUG_ON(!cgrp->subsys[i]); |
@@ -990,7 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
990 | root = NULL; | 1003 | root = NULL; |
991 | } else { | 1004 | } else { |
992 | /* New superblock */ | 1005 | /* New superblock */ |
993 | struct cgroup *cgrp = &root->top_cgroup; | 1006 | struct cgroup *root_cgrp = &root->top_cgroup; |
994 | struct inode *inode; | 1007 | struct inode *inode; |
995 | int i; | 1008 | int i; |
996 | 1009 | ||
@@ -1031,7 +1044,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1031 | list_add(&root->root_list, &roots); | 1044 | list_add(&root->root_list, &roots); |
1032 | root_count++; | 1045 | root_count++; |
1033 | 1046 | ||
1034 | sb->s_root->d_fsdata = &root->top_cgroup; | 1047 | sb->s_root->d_fsdata = root_cgrp; |
1035 | root->top_cgroup.dentry = sb->s_root; | 1048 | root->top_cgroup.dentry = sb->s_root; |
1036 | 1049 | ||
1037 | /* Link the top cgroup in this hierarchy into all | 1050 | /* Link the top cgroup in this hierarchy into all |
@@ -1042,29 +1055,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1042 | struct hlist_node *node; | 1055 | struct hlist_node *node; |
1043 | struct css_set *cg; | 1056 | struct css_set *cg; |
1044 | 1057 | ||
1045 | hlist_for_each_entry(cg, node, hhead, hlist) { | 1058 | hlist_for_each_entry(cg, node, hhead, hlist) |
1046 | struct cg_cgroup_link *link; | 1059 | link_css_set(&tmp_cg_links, cg, root_cgrp); |
1047 | |||
1048 | BUG_ON(list_empty(&tmp_cg_links)); | ||
1049 | link = list_entry(tmp_cg_links.next, | ||
1050 | struct cg_cgroup_link, | ||
1051 | cgrp_link_list); | ||
1052 | list_del(&link->cgrp_link_list); | ||
1053 | link->cg = cg; | ||
1054 | list_add(&link->cgrp_link_list, | ||
1055 | &root->top_cgroup.css_sets); | ||
1056 | list_add(&link->cg_link_list, &cg->cg_links); | ||
1057 | } | ||
1058 | } | 1060 | } |
1059 | write_unlock(&css_set_lock); | 1061 | write_unlock(&css_set_lock); |
1060 | 1062 | ||
1061 | free_cg_links(&tmp_cg_links); | 1063 | free_cg_links(&tmp_cg_links); |
1062 | 1064 | ||
1063 | BUG_ON(!list_empty(&cgrp->sibling)); | 1065 | BUG_ON(!list_empty(&root_cgrp->sibling)); |
1064 | BUG_ON(!list_empty(&cgrp->children)); | 1066 | BUG_ON(!list_empty(&root_cgrp->children)); |
1065 | BUG_ON(root->number_of_cgroups != 1); | 1067 | BUG_ON(root->number_of_cgroups != 1); |
1066 | 1068 | ||
1067 | cgroup_populate_dir(cgrp); | 1069 | cgroup_populate_dir(root_cgrp); |
1068 | mutex_unlock(&inode->i_mutex); | 1070 | mutex_unlock(&inode->i_mutex); |
1069 | mutex_unlock(&cgroup_mutex); | 1071 | mutex_unlock(&cgroup_mutex); |
1070 | } | 1072 | } |
@@ -1113,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1113 | } | 1115 | } |
1114 | write_unlock(&css_set_lock); | 1116 | write_unlock(&css_set_lock); |
1115 | 1117 | ||
1116 | if (!list_empty(&root->root_list)) { | 1118 | list_del(&root->root_list); |
1117 | list_del(&root->root_list); | 1119 | root_count--; |
1118 | root_count--; | 1120 | |
1119 | } | ||
1120 | mutex_unlock(&cgroup_mutex); | 1121 | mutex_unlock(&cgroup_mutex); |
1121 | 1122 | ||
1122 | kfree(root); | 1123 | kfree(root); |
@@ -1145,14 +1146,16 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
1145 | * @buf: the buffer to write the path into | 1146 | * @buf: the buffer to write the path into |
1146 | * @buflen: the length of the buffer | 1147 | * @buflen: the length of the buffer |
1147 | * | 1148 | * |
1148 | * Called with cgroup_mutex held. Writes path of cgroup into buf. | 1149 | * Called with cgroup_mutex held or else with an RCU-protected cgroup |
1149 | * Returns 0 on success, -errno on error. | 1150 | * reference. Writes path of cgroup into buf. Returns 0 on success, |
1151 | * -errno on error. | ||
1150 | */ | 1152 | */ |
1151 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | 1153 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) |
1152 | { | 1154 | { |
1153 | char *start; | 1155 | char *start; |
1156 | struct dentry *dentry = rcu_dereference(cgrp->dentry); | ||
1154 | 1157 | ||
1155 | if (cgrp == dummytop) { | 1158 | if (!dentry || cgrp == dummytop) { |
1156 | /* | 1159 | /* |
1157 | * Inactive subsystems have no dentry for their root | 1160 | * Inactive subsystems have no dentry for their root |
1158 | * cgroup | 1161 | * cgroup |
@@ -1165,13 +1168,14 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1165 | 1168 | ||
1166 | *--start = '\0'; | 1169 | *--start = '\0'; |
1167 | for (;;) { | 1170 | for (;;) { |
1168 | int len = cgrp->dentry->d_name.len; | 1171 | int len = dentry->d_name.len; |
1169 | if ((start -= len) < buf) | 1172 | if ((start -= len) < buf) |
1170 | return -ENAMETOOLONG; | 1173 | return -ENAMETOOLONG; |
1171 | memcpy(start, cgrp->dentry->d_name.name, len); | 1174 | memcpy(start, cgrp->dentry->d_name.name, len); |
1172 | cgrp = cgrp->parent; | 1175 | cgrp = cgrp->parent; |
1173 | if (!cgrp) | 1176 | if (!cgrp) |
1174 | break; | 1177 | break; |
1178 | dentry = rcu_dereference(cgrp->dentry); | ||
1175 | if (!cgrp->parent) | 1179 | if (!cgrp->parent) |
1176 | continue; | 1180 | continue; |
1177 | if (--start < buf) | 1181 | if (--start < buf) |
@@ -1216,7 +1220,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1216 | int retval = 0; | 1220 | int retval = 0; |
1217 | struct cgroup_subsys *ss; | 1221 | struct cgroup_subsys *ss; |
1218 | struct cgroup *oldcgrp; | 1222 | struct cgroup *oldcgrp; |
1219 | struct css_set *cg = tsk->cgroups; | 1223 | struct css_set *cg; |
1220 | struct css_set *newcg; | 1224 | struct css_set *newcg; |
1221 | struct cgroupfs_root *root = cgrp->root; | 1225 | struct cgroupfs_root *root = cgrp->root; |
1222 | int subsys_id; | 1226 | int subsys_id; |
@@ -1236,11 +1240,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1236 | } | 1240 | } |
1237 | } | 1241 | } |
1238 | 1242 | ||
1243 | task_lock(tsk); | ||
1244 | cg = tsk->cgroups; | ||
1245 | get_css_set(cg); | ||
1246 | task_unlock(tsk); | ||
1239 | /* | 1247 | /* |
1240 | * Locate or allocate a new css_set for this task, | 1248 | * Locate or allocate a new css_set for this task, |
1241 | * based on its final set of cgroups | 1249 | * based on its final set of cgroups |
1242 | */ | 1250 | */ |
1243 | newcg = find_css_set(cg, cgrp); | 1251 | newcg = find_css_set(cg, cgrp); |
1252 | put_css_set(cg); | ||
1244 | if (!newcg) | 1253 | if (!newcg) |
1245 | return -ENOMEM; | 1254 | return -ENOMEM; |
1246 | 1255 | ||
@@ -1445,7 +1454,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
1445 | struct cftype *cft = __d_cft(file->f_dentry); | 1454 | struct cftype *cft = __d_cft(file->f_dentry); |
1446 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1455 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1447 | 1456 | ||
1448 | if (!cft || cgroup_is_removed(cgrp)) | 1457 | if (cgroup_is_removed(cgrp)) |
1449 | return -ENODEV; | 1458 | return -ENODEV; |
1450 | if (cft->write) | 1459 | if (cft->write) |
1451 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 1460 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
@@ -1490,7 +1499,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
1490 | struct cftype *cft = __d_cft(file->f_dentry); | 1499 | struct cftype *cft = __d_cft(file->f_dentry); |
1491 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1500 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1492 | 1501 | ||
1493 | if (!cft || cgroup_is_removed(cgrp)) | 1502 | if (cgroup_is_removed(cgrp)) |
1494 | return -ENODEV; | 1503 | return -ENODEV; |
1495 | 1504 | ||
1496 | if (cft->read) | 1505 | if (cft->read) |
@@ -1554,10 +1563,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
1554 | err = generic_file_open(inode, file); | 1563 | err = generic_file_open(inode, file); |
1555 | if (err) | 1564 | if (err) |
1556 | return err; | 1565 | return err; |
1557 | |||
1558 | cft = __d_cft(file->f_dentry); | 1566 | cft = __d_cft(file->f_dentry); |
1559 | if (!cft) | 1567 | |
1560 | return -ENODEV; | ||
1561 | if (cft->read_map || cft->read_seq_string) { | 1568 | if (cft->read_map || cft->read_seq_string) { |
1562 | struct cgroup_seqfile_state *state = | 1569 | struct cgroup_seqfile_state *state = |
1563 | kzalloc(sizeof(*state), GFP_USER); | 1570 | kzalloc(sizeof(*state), GFP_USER); |
@@ -1671,7 +1678,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | |||
1671 | if (!error) { | 1678 | if (!error) { |
1672 | dentry->d_fsdata = cgrp; | 1679 | dentry->d_fsdata = cgrp; |
1673 | inc_nlink(parent->d_inode); | 1680 | inc_nlink(parent->d_inode); |
1674 | cgrp->dentry = dentry; | 1681 | rcu_assign_pointer(cgrp->dentry, dentry); |
1675 | dget(dentry); | 1682 | dget(dentry); |
1676 | } | 1683 | } |
1677 | dput(dentry); | 1684 | dput(dentry); |
@@ -1812,6 +1819,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
1812 | { | 1819 | { |
1813 | struct task_struct *res; | 1820 | struct task_struct *res; |
1814 | struct list_head *l = it->task; | 1821 | struct list_head *l = it->task; |
1822 | struct cg_cgroup_link *link; | ||
1815 | 1823 | ||
1816 | /* If the iterator cg is NULL, we have no tasks */ | 1824 | /* If the iterator cg is NULL, we have no tasks */ |
1817 | if (!it->cg_link) | 1825 | if (!it->cg_link) |
@@ -1819,7 +1827,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
1819 | res = list_entry(l, struct task_struct, cg_list); | 1827 | res = list_entry(l, struct task_struct, cg_list); |
1820 | /* Advance iterator to find next entry */ | 1828 | /* Advance iterator to find next entry */ |
1821 | l = l->next; | 1829 | l = l->next; |
1822 | if (l == &res->cgroups->tasks) { | 1830 | link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list); |
1831 | if (l == &link->cg->tasks) { | ||
1823 | /* We reached the end of this task list - move on to | 1832 | /* We reached the end of this task list - move on to |
1824 | * the next cg_cgroup_link */ | 1833 | * the next cg_cgroup_link */ |
1825 | cgroup_advance_iter(cgrp, it); | 1834 | cgroup_advance_iter(cgrp, it); |
@@ -2013,14 +2022,16 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
2013 | */ | 2022 | */ |
2014 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | 2023 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) |
2015 | { | 2024 | { |
2016 | int n = 0; | 2025 | int n = 0, pid; |
2017 | struct cgroup_iter it; | 2026 | struct cgroup_iter it; |
2018 | struct task_struct *tsk; | 2027 | struct task_struct *tsk; |
2019 | cgroup_iter_start(cgrp, &it); | 2028 | cgroup_iter_start(cgrp, &it); |
2020 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2029 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
2021 | if (unlikely(n == npids)) | 2030 | if (unlikely(n == npids)) |
2022 | break; | 2031 | break; |
2023 | pidarray[n++] = task_pid_vnr(tsk); | 2032 | pid = task_pid_vnr(tsk); |
2033 | if (pid > 0) | ||
2034 | pidarray[n++] = pid; | ||
2024 | } | 2035 | } |
2025 | cgroup_iter_end(cgrp, &it); | 2036 | cgroup_iter_end(cgrp, &it); |
2026 | return n; | 2037 | return n; |
@@ -2052,7 +2063,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
2052 | 2063 | ||
2053 | ret = 0; | 2064 | ret = 0; |
2054 | cgrp = dentry->d_fsdata; | 2065 | cgrp = dentry->d_fsdata; |
2055 | rcu_read_lock(); | ||
2056 | 2066 | ||
2057 | cgroup_iter_start(cgrp, &it); | 2067 | cgroup_iter_start(cgrp, &it); |
2058 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2068 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
@@ -2077,7 +2087,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
2077 | } | 2087 | } |
2078 | cgroup_iter_end(cgrp, &it); | 2088 | cgroup_iter_end(cgrp, &it); |
2079 | 2089 | ||
2080 | rcu_read_unlock(); | ||
2081 | err: | 2090 | err: |
2082 | return ret; | 2091 | return ret; |
2083 | } | 2092 | } |
@@ -2324,7 +2333,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2324 | struct cgroup *cgrp) | 2333 | struct cgroup *cgrp) |
2325 | { | 2334 | { |
2326 | css->cgroup = cgrp; | 2335 | css->cgroup = cgrp; |
2327 | atomic_set(&css->refcnt, 0); | 2336 | atomic_set(&css->refcnt, 1); |
2328 | css->flags = 0; | 2337 | css->flags = 0; |
2329 | if (cgrp == dummytop) | 2338 | if (cgrp == dummytop) |
2330 | set_bit(CSS_ROOT, &css->flags); | 2339 | set_bit(CSS_ROOT, &css->flags); |
@@ -2332,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2332 | cgrp->subsys[ss->subsys_id] = css; | 2341 | cgrp->subsys[ss->subsys_id] = css; |
2333 | } | 2342 | } |
2334 | 2343 | ||
2344 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) | ||
2345 | { | ||
2346 | /* We need to take each hierarchy_mutex in a consistent order */ | ||
2347 | int i; | ||
2348 | |||
2349 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2350 | struct cgroup_subsys *ss = subsys[i]; | ||
2351 | if (ss->root == root) | ||
2352 | mutex_lock_nested(&ss->hierarchy_mutex, i); | ||
2353 | } | ||
2354 | } | ||
2355 | |||
2356 | static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | ||
2357 | { | ||
2358 | int i; | ||
2359 | |||
2360 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2361 | struct cgroup_subsys *ss = subsys[i]; | ||
2362 | if (ss->root == root) | ||
2363 | mutex_unlock(&ss->hierarchy_mutex); | ||
2364 | } | ||
2365 | } | ||
2366 | |||
2335 | /* | 2367 | /* |
2336 | * cgroup_create - create a cgroup | 2368 | * cgroup_create - create a cgroup |
2337 | * @parent: cgroup that will be parent of the new cgroup | 2369 | * @parent: cgroup that will be parent of the new cgroup |
@@ -2380,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2380 | init_cgroup_css(css, ss, cgrp); | 2412 | init_cgroup_css(css, ss, cgrp); |
2381 | } | 2413 | } |
2382 | 2414 | ||
2415 | cgroup_lock_hierarchy(root); | ||
2383 | list_add(&cgrp->sibling, &cgrp->parent->children); | 2416 | list_add(&cgrp->sibling, &cgrp->parent->children); |
2417 | cgroup_unlock_hierarchy(root); | ||
2384 | root->number_of_cgroups++; | 2418 | root->number_of_cgroups++; |
2385 | 2419 | ||
2386 | err = cgroup_create_dir(cgrp, dentry, mode); | 2420 | err = cgroup_create_dir(cgrp, dentry, mode); |
@@ -2431,7 +2465,7 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
2431 | { | 2465 | { |
2432 | /* Check the reference count on each subsystem. Since we | 2466 | /* Check the reference count on each subsystem. Since we |
2433 | * already established that there are no tasks in the | 2467 | * already established that there are no tasks in the |
2434 | * cgroup, if the css refcount is also 0, then there should | 2468 | * cgroup, if the css refcount is also 1, then there should |
2435 | * be no outstanding references, so the subsystem is safe to | 2469 | * be no outstanding references, so the subsystem is safe to |
2436 | * destroy. We scan across all subsystems rather than using | 2470 | * destroy. We scan across all subsystems rather than using |
2437 | * the per-hierarchy linked list of mounted subsystems since | 2471 | * the per-hierarchy linked list of mounted subsystems since |
@@ -2452,19 +2486,67 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
2452 | * matter, since it can only happen if the cgroup | 2486 | * matter, since it can only happen if the cgroup |
2453 | * has been deleted and hence no longer needs the | 2487 | * has been deleted and hence no longer needs the |
2454 | * release agent to be called anyway. */ | 2488 | * release agent to be called anyway. */ |
2455 | if (css && atomic_read(&css->refcnt)) | 2489 | if (css && (atomic_read(&css->refcnt) > 1)) |
2456 | return 1; | 2490 | return 1; |
2457 | } | 2491 | } |
2458 | return 0; | 2492 | return 0; |
2459 | } | 2493 | } |
2460 | 2494 | ||
2495 | /* | ||
2496 | * Atomically mark all (or else none) of the cgroup's CSS objects as | ||
2497 | * CSS_REMOVED. Return true on success, or false if the cgroup has | ||
2498 | * busy subsystems. Call with cgroup_mutex held | ||
2499 | */ | ||
2500 | |||
2501 | static int cgroup_clear_css_refs(struct cgroup *cgrp) | ||
2502 | { | ||
2503 | struct cgroup_subsys *ss; | ||
2504 | unsigned long flags; | ||
2505 | bool failed = false; | ||
2506 | local_irq_save(flags); | ||
2507 | for_each_subsys(cgrp->root, ss) { | ||
2508 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2509 | int refcnt; | ||
2510 | do { | ||
2511 | /* We can only remove a CSS with a refcnt==1 */ | ||
2512 | refcnt = atomic_read(&css->refcnt); | ||
2513 | if (refcnt > 1) { | ||
2514 | failed = true; | ||
2515 | goto done; | ||
2516 | } | ||
2517 | BUG_ON(!refcnt); | ||
2518 | /* | ||
2519 | * Drop the refcnt to 0 while we check other | ||
2520 | * subsystems. This will cause any racing | ||
2521 | * css_tryget() to spin until we set the | ||
2522 | * CSS_REMOVED bits or abort | ||
2523 | */ | ||
2524 | } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); | ||
2525 | } | ||
2526 | done: | ||
2527 | for_each_subsys(cgrp->root, ss) { | ||
2528 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2529 | if (failed) { | ||
2530 | /* | ||
2531 | * Restore old refcnt if we previously managed | ||
2532 | * to clear it from 1 to 0 | ||
2533 | */ | ||
2534 | if (!atomic_read(&css->refcnt)) | ||
2535 | atomic_set(&css->refcnt, 1); | ||
2536 | } else { | ||
2537 | /* Commit the fact that the CSS is removed */ | ||
2538 | set_bit(CSS_REMOVED, &css->flags); | ||
2539 | } | ||
2540 | } | ||
2541 | local_irq_restore(flags); | ||
2542 | return !failed; | ||
2543 | } | ||
2544 | |||
2461 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | 2545 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) |
2462 | { | 2546 | { |
2463 | struct cgroup *cgrp = dentry->d_fsdata; | 2547 | struct cgroup *cgrp = dentry->d_fsdata; |
2464 | struct dentry *d; | 2548 | struct dentry *d; |
2465 | struct cgroup *parent; | 2549 | struct cgroup *parent; |
2466 | struct super_block *sb; | ||
2467 | struct cgroupfs_root *root; | ||
2468 | 2550 | ||
2469 | /* the vfs holds both inode->i_mutex already */ | 2551 | /* the vfs holds both inode->i_mutex already */ |
2470 | 2552 | ||
@@ -2487,12 +2569,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2487 | 2569 | ||
2488 | mutex_lock(&cgroup_mutex); | 2570 | mutex_lock(&cgroup_mutex); |
2489 | parent = cgrp->parent; | 2571 | parent = cgrp->parent; |
2490 | root = cgrp->root; | ||
2491 | sb = root->sb; | ||
2492 | 2572 | ||
2493 | if (atomic_read(&cgrp->count) | 2573 | if (atomic_read(&cgrp->count) |
2494 | || !list_empty(&cgrp->children) | 2574 | || !list_empty(&cgrp->children) |
2495 | || cgroup_has_css_refs(cgrp)) { | 2575 | || !cgroup_clear_css_refs(cgrp)) { |
2496 | mutex_unlock(&cgroup_mutex); | 2576 | mutex_unlock(&cgroup_mutex); |
2497 | return -EBUSY; | 2577 | return -EBUSY; |
2498 | } | 2578 | } |
@@ -2502,8 +2582,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2502 | if (!list_empty(&cgrp->release_list)) | 2582 | if (!list_empty(&cgrp->release_list)) |
2503 | list_del(&cgrp->release_list); | 2583 | list_del(&cgrp->release_list); |
2504 | spin_unlock(&release_list_lock); | 2584 | spin_unlock(&release_list_lock); |
2505 | /* delete my sibling from parent->children */ | 2585 | |
2586 | cgroup_lock_hierarchy(cgrp->root); | ||
2587 | /* delete this cgroup from parent->children */ | ||
2506 | list_del(&cgrp->sibling); | 2588 | list_del(&cgrp->sibling); |
2589 | cgroup_unlock_hierarchy(cgrp->root); | ||
2590 | |||
2507 | spin_lock(&cgrp->dentry->d_lock); | 2591 | spin_lock(&cgrp->dentry->d_lock); |
2508 | d = dget(cgrp->dentry); | 2592 | d = dget(cgrp->dentry); |
2509 | spin_unlock(&d->d_lock); | 2593 | spin_unlock(&d->d_lock); |
@@ -2525,6 +2609,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2525 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 2609 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
2526 | 2610 | ||
2527 | /* Create the top cgroup state for this subsystem */ | 2611 | /* Create the top cgroup state for this subsystem */ |
2612 | list_add(&ss->sibling, &rootnode.subsys_list); | ||
2528 | ss->root = &rootnode; | 2613 | ss->root = &rootnode; |
2529 | css = ss->create(ss, dummytop); | 2614 | css = ss->create(ss, dummytop); |
2530 | /* We don't handle early failures gracefully */ | 2615 | /* We don't handle early failures gracefully */ |
@@ -2544,6 +2629,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2544 | * need to invoke fork callbacks here. */ | 2629 | * need to invoke fork callbacks here. */ |
2545 | BUG_ON(!list_empty(&init_task.tasks)); | 2630 | BUG_ON(!list_empty(&init_task.tasks)); |
2546 | 2631 | ||
2632 | mutex_init(&ss->hierarchy_mutex); | ||
2547 | ss->active = 1; | 2633 | ss->active = 1; |
2548 | } | 2634 | } |
2549 | 2635 | ||
@@ -2562,7 +2648,6 @@ int __init cgroup_init_early(void) | |||
2562 | INIT_HLIST_NODE(&init_css_set.hlist); | 2648 | INIT_HLIST_NODE(&init_css_set.hlist); |
2563 | css_set_count = 1; | 2649 | css_set_count = 1; |
2564 | init_cgroup_root(&rootnode); | 2650 | init_cgroup_root(&rootnode); |
2565 | list_add(&rootnode.root_list, &roots); | ||
2566 | root_count = 1; | 2651 | root_count = 1; |
2567 | init_task.cgroups = &init_css_set; | 2652 | init_task.cgroups = &init_css_set; |
2568 | 2653 | ||
@@ -2669,15 +2754,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v) | |||
2669 | 2754 | ||
2670 | mutex_lock(&cgroup_mutex); | 2755 | mutex_lock(&cgroup_mutex); |
2671 | 2756 | ||
2672 | for_each_root(root) { | 2757 | for_each_active_root(root) { |
2673 | struct cgroup_subsys *ss; | 2758 | struct cgroup_subsys *ss; |
2674 | struct cgroup *cgrp; | 2759 | struct cgroup *cgrp; |
2675 | int subsys_id; | 2760 | int subsys_id; |
2676 | int count = 0; | 2761 | int count = 0; |
2677 | 2762 | ||
2678 | /* Skip this hierarchy if it has no active subsystems */ | ||
2679 | if (!root->actual_subsys_bits) | ||
2680 | continue; | ||
2681 | seq_printf(m, "%lu:", root->subsys_bits); | 2763 | seq_printf(m, "%lu:", root->subsys_bits); |
2682 | for_each_subsys(root, ss) | 2764 | for_each_subsys(root, ss) |
2683 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 2765 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); |
@@ -2800,8 +2882,10 @@ void cgroup_post_fork(struct task_struct *child) | |||
2800 | { | 2882 | { |
2801 | if (use_task_css_set_links) { | 2883 | if (use_task_css_set_links) { |
2802 | write_lock(&css_set_lock); | 2884 | write_lock(&css_set_lock); |
2885 | task_lock(child); | ||
2803 | if (list_empty(&child->cg_list)) | 2886 | if (list_empty(&child->cg_list)) |
2804 | list_add(&child->cg_list, &child->cgroups->tasks); | 2887 | list_add(&child->cg_list, &child->cgroups->tasks); |
2888 | task_unlock(child); | ||
2805 | write_unlock(&css_set_lock); | 2889 | write_unlock(&css_set_lock); |
2806 | } | 2890 | } |
2807 | } | 2891 | } |
@@ -2907,6 +2991,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2907 | mutex_unlock(&cgroup_mutex); | 2991 | mutex_unlock(&cgroup_mutex); |
2908 | return 0; | 2992 | return 0; |
2909 | } | 2993 | } |
2994 | task_lock(tsk); | ||
2910 | cg = tsk->cgroups; | 2995 | cg = tsk->cgroups; |
2911 | parent = task_cgroup(tsk, subsys->subsys_id); | 2996 | parent = task_cgroup(tsk, subsys->subsys_id); |
2912 | 2997 | ||
@@ -2919,6 +3004,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2919 | 3004 | ||
2920 | /* Keep the cgroup alive */ | 3005 | /* Keep the cgroup alive */ |
2921 | get_css_set(cg); | 3006 | get_css_set(cg); |
3007 | task_unlock(tsk); | ||
2922 | mutex_unlock(&cgroup_mutex); | 3008 | mutex_unlock(&cgroup_mutex); |
2923 | 3009 | ||
2924 | /* Now do the VFS work to create a cgroup */ | 3010 | /* Now do the VFS work to create a cgroup */ |
@@ -2937,7 +3023,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2937 | } | 3023 | } |
2938 | 3024 | ||
2939 | /* Create the cgroup directory, which also creates the cgroup */ | 3025 | /* Create the cgroup directory, which also creates the cgroup */ |
2940 | ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755); | 3026 | ret = vfs_mkdir(inode, dentry, 0755); |
2941 | child = __d_cgrp(dentry); | 3027 | child = __d_cgrp(dentry); |
2942 | dput(dentry); | 3028 | dput(dentry); |
2943 | if (ret) { | 3029 | if (ret) { |
@@ -2947,13 +3033,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2947 | goto out_release; | 3033 | goto out_release; |
2948 | } | 3034 | } |
2949 | 3035 | ||
2950 | if (!child) { | ||
2951 | printk(KERN_INFO | ||
2952 | "Couldn't find new cgroup %s\n", nodename); | ||
2953 | ret = -ENOMEM; | ||
2954 | goto out_release; | ||
2955 | } | ||
2956 | |||
2957 | /* The cgroup now exists. Retake cgroup_mutex and check | 3036 | /* The cgroup now exists. Retake cgroup_mutex and check |
2958 | * that we're still in the same state that we thought we | 3037 | * that we're still in the same state that we thought we |
2959 | * were. */ | 3038 | * were. */ |
@@ -3049,7 +3128,8 @@ void __css_put(struct cgroup_subsys_state *css) | |||
3049 | { | 3128 | { |
3050 | struct cgroup *cgrp = css->cgroup; | 3129 | struct cgroup *cgrp = css->cgroup; |
3051 | rcu_read_lock(); | 3130 | rcu_read_lock(); |
3052 | if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) { | 3131 | if ((atomic_dec_return(&css->refcnt) == 1) && |
3132 | notify_on_release(cgrp)) { | ||
3053 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3133 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
3054 | check_for_release(cgrp); | 3134 | check_for_release(cgrp); |
3055 | } | 3135 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 345ace5117de..647c77a88fcb 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -84,7 +84,7 @@ struct cpuset { | |||
84 | struct cgroup_subsys_state css; | 84 | struct cgroup_subsys_state css; |
85 | 85 | ||
86 | unsigned long flags; /* "unsigned long" so bitops work */ | 86 | unsigned long flags; /* "unsigned long" so bitops work */ |
87 | cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ | 87 | cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ |
88 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ | 88 | nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ |
89 | 89 | ||
90 | struct cpuset *parent; /* my parent */ | 90 | struct cpuset *parent; /* my parent */ |
@@ -195,8 +195,6 @@ static int cpuset_mems_generation; | |||
195 | 195 | ||
196 | static struct cpuset top_cpuset = { | 196 | static struct cpuset top_cpuset = { |
197 | .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), | 197 | .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), |
198 | .cpus_allowed = CPU_MASK_ALL, | ||
199 | .mems_allowed = NODE_MASK_ALL, | ||
200 | }; | 198 | }; |
201 | 199 | ||
202 | /* | 200 | /* |
@@ -278,7 +276,7 @@ static struct file_system_type cpuset_fs_type = { | |||
278 | }; | 276 | }; |
279 | 277 | ||
280 | /* | 278 | /* |
281 | * Return in *pmask the portion of a cpusets's cpus_allowed that | 279 | * Return in pmask the portion of a cpusets's cpus_allowed that |
282 | * are online. If none are online, walk up the cpuset hierarchy | 280 | * are online. If none are online, walk up the cpuset hierarchy |
283 | * until we find one that does have some online cpus. If we get | 281 | * until we find one that does have some online cpus. If we get |
284 | * all the way to the top and still haven't found any online cpus, | 282 | * all the way to the top and still haven't found any online cpus, |
@@ -291,15 +289,16 @@ static struct file_system_type cpuset_fs_type = { | |||
291 | * Call with callback_mutex held. | 289 | * Call with callback_mutex held. |
292 | */ | 290 | */ |
293 | 291 | ||
294 | static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask) | 292 | static void guarantee_online_cpus(const struct cpuset *cs, |
293 | struct cpumask *pmask) | ||
295 | { | 294 | { |
296 | while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map)) | 295 | while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask)) |
297 | cs = cs->parent; | 296 | cs = cs->parent; |
298 | if (cs) | 297 | if (cs) |
299 | cpus_and(*pmask, cs->cpus_allowed, cpu_online_map); | 298 | cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask); |
300 | else | 299 | else |
301 | *pmask = cpu_online_map; | 300 | cpumask_copy(pmask, cpu_online_mask); |
302 | BUG_ON(!cpus_intersects(*pmask, cpu_online_map)); | 301 | BUG_ON(!cpumask_intersects(pmask, cpu_online_mask)); |
303 | } | 302 | } |
304 | 303 | ||
305 | /* | 304 | /* |
@@ -375,14 +374,9 @@ void cpuset_update_task_memory_state(void) | |||
375 | struct task_struct *tsk = current; | 374 | struct task_struct *tsk = current; |
376 | struct cpuset *cs; | 375 | struct cpuset *cs; |
377 | 376 | ||
378 | if (task_cs(tsk) == &top_cpuset) { | 377 | rcu_read_lock(); |
379 | /* Don't need rcu for top_cpuset. It's never freed. */ | 378 | my_cpusets_mem_gen = task_cs(tsk)->mems_generation; |
380 | my_cpusets_mem_gen = top_cpuset.mems_generation; | 379 | rcu_read_unlock(); |
381 | } else { | ||
382 | rcu_read_lock(); | ||
383 | my_cpusets_mem_gen = task_cs(tsk)->mems_generation; | ||
384 | rcu_read_unlock(); | ||
385 | } | ||
386 | 380 | ||
387 | if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { | 381 | if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { |
388 | mutex_lock(&callback_mutex); | 382 | mutex_lock(&callback_mutex); |
@@ -414,12 +408,43 @@ void cpuset_update_task_memory_state(void) | |||
414 | 408 | ||
415 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) | 409 | static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) |
416 | { | 410 | { |
417 | return cpus_subset(p->cpus_allowed, q->cpus_allowed) && | 411 | return cpumask_subset(p->cpus_allowed, q->cpus_allowed) && |
418 | nodes_subset(p->mems_allowed, q->mems_allowed) && | 412 | nodes_subset(p->mems_allowed, q->mems_allowed) && |
419 | is_cpu_exclusive(p) <= is_cpu_exclusive(q) && | 413 | is_cpu_exclusive(p) <= is_cpu_exclusive(q) && |
420 | is_mem_exclusive(p) <= is_mem_exclusive(q); | 414 | is_mem_exclusive(p) <= is_mem_exclusive(q); |
421 | } | 415 | } |
422 | 416 | ||
417 | /** | ||
418 | * alloc_trial_cpuset - allocate a trial cpuset | ||
419 | * @cs: the cpuset that the trial cpuset duplicates | ||
420 | */ | ||
421 | static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs) | ||
422 | { | ||
423 | struct cpuset *trial; | ||
424 | |||
425 | trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL); | ||
426 | if (!trial) | ||
427 | return NULL; | ||
428 | |||
429 | if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) { | ||
430 | kfree(trial); | ||
431 | return NULL; | ||
432 | } | ||
433 | cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); | ||
434 | |||
435 | return trial; | ||
436 | } | ||
437 | |||
438 | /** | ||
439 | * free_trial_cpuset - free the trial cpuset | ||
440 | * @trial: the trial cpuset to be freed | ||
441 | */ | ||
442 | static void free_trial_cpuset(struct cpuset *trial) | ||
443 | { | ||
444 | free_cpumask_var(trial->cpus_allowed); | ||
445 | kfree(trial); | ||
446 | } | ||
447 | |||
423 | /* | 448 | /* |
424 | * validate_change() - Used to validate that any proposed cpuset change | 449 | * validate_change() - Used to validate that any proposed cpuset change |
425 | * follows the structural rules for cpusets. | 450 | * follows the structural rules for cpusets. |
@@ -469,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
469 | c = cgroup_cs(cont); | 494 | c = cgroup_cs(cont); |
470 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && | 495 | if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && |
471 | c != cur && | 496 | c != cur && |
472 | cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) | 497 | cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) |
473 | return -EINVAL; | 498 | return -EINVAL; |
474 | if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && | 499 | if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && |
475 | c != cur && | 500 | c != cur && |
@@ -479,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
479 | 504 | ||
480 | /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ | 505 | /* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */ |
481 | if (cgroup_task_count(cur->css.cgroup)) { | 506 | if (cgroup_task_count(cur->css.cgroup)) { |
482 | if (cpus_empty(trial->cpus_allowed) || | 507 | if (cpumask_empty(trial->cpus_allowed) || |
483 | nodes_empty(trial->mems_allowed)) { | 508 | nodes_empty(trial->mems_allowed)) { |
484 | return -ENOSPC; | 509 | return -ENOSPC; |
485 | } | 510 | } |
@@ -494,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) | |||
494 | */ | 519 | */ |
495 | static int cpusets_overlap(struct cpuset *a, struct cpuset *b) | 520 | static int cpusets_overlap(struct cpuset *a, struct cpuset *b) |
496 | { | 521 | { |
497 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); | 522 | return cpumask_intersects(a->cpus_allowed, b->cpus_allowed); |
498 | } | 523 | } |
499 | 524 | ||
500 | static void | 525 | static void |
@@ -519,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | |||
519 | cp = list_first_entry(&q, struct cpuset, stack_list); | 544 | cp = list_first_entry(&q, struct cpuset, stack_list); |
520 | list_del(q.next); | 545 | list_del(q.next); |
521 | 546 | ||
522 | if (cpus_empty(cp->cpus_allowed)) | 547 | if (cpumask_empty(cp->cpus_allowed)) |
523 | continue; | 548 | continue; |
524 | 549 | ||
525 | if (is_sched_load_balance(cp)) | 550 | if (is_sched_load_balance(cp)) |
@@ -586,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | |||
586 | * element of the partition (one sched domain) to be passed to | 611 | * element of the partition (one sched domain) to be passed to |
587 | * partition_sched_domains(). | 612 | * partition_sched_domains(). |
588 | */ | 613 | */ |
589 | static int generate_sched_domains(cpumask_t **domains, | 614 | /* FIXME: see the FIXME in partition_sched_domains() */ |
615 | static int generate_sched_domains(struct cpumask **domains, | ||
590 | struct sched_domain_attr **attributes) | 616 | struct sched_domain_attr **attributes) |
591 | { | 617 | { |
592 | LIST_HEAD(q); /* queue of cpusets to be scanned */ | 618 | LIST_HEAD(q); /* queue of cpusets to be scanned */ |
@@ -594,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains, | |||
594 | struct cpuset **csa; /* array of all cpuset ptrs */ | 620 | struct cpuset **csa; /* array of all cpuset ptrs */ |
595 | int csn; /* how many cpuset ptrs in csa so far */ | 621 | int csn; /* how many cpuset ptrs in csa so far */ |
596 | int i, j, k; /* indices for partition finding loops */ | 622 | int i, j, k; /* indices for partition finding loops */ |
597 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ | 623 | struct cpumask *doms; /* resulting partition; i.e. sched domains */ |
598 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | 624 | struct sched_domain_attr *dattr; /* attributes for custom domains */ |
599 | int ndoms = 0; /* number of sched domains in result */ | 625 | int ndoms = 0; /* number of sched domains in result */ |
600 | int nslot; /* next empty doms[] cpumask_t slot */ | 626 | int nslot; /* next empty doms[] struct cpumask slot */ |
601 | 627 | ||
602 | doms = NULL; | 628 | doms = NULL; |
603 | dattr = NULL; | 629 | dattr = NULL; |
@@ -605,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains, | |||
605 | 631 | ||
606 | /* Special case for the 99% of systems with one, full, sched domain */ | 632 | /* Special case for the 99% of systems with one, full, sched domain */ |
607 | if (is_sched_load_balance(&top_cpuset)) { | 633 | if (is_sched_load_balance(&top_cpuset)) { |
608 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | 634 | doms = kmalloc(cpumask_size(), GFP_KERNEL); |
609 | if (!doms) | 635 | if (!doms) |
610 | goto done; | 636 | goto done; |
611 | 637 | ||
@@ -614,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains, | |||
614 | *dattr = SD_ATTR_INIT; | 640 | *dattr = SD_ATTR_INIT; |
615 | update_domain_attr_tree(dattr, &top_cpuset); | 641 | update_domain_attr_tree(dattr, &top_cpuset); |
616 | } | 642 | } |
617 | *doms = top_cpuset.cpus_allowed; | 643 | cpumask_copy(doms, top_cpuset.cpus_allowed); |
618 | 644 | ||
619 | ndoms = 1; | 645 | ndoms = 1; |
620 | goto done; | 646 | goto done; |
@@ -633,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains, | |||
633 | cp = list_first_entry(&q, struct cpuset, stack_list); | 659 | cp = list_first_entry(&q, struct cpuset, stack_list); |
634 | list_del(q.next); | 660 | list_del(q.next); |
635 | 661 | ||
636 | if (cpus_empty(cp->cpus_allowed)) | 662 | if (cpumask_empty(cp->cpus_allowed)) |
637 | continue; | 663 | continue; |
638 | 664 | ||
639 | /* | 665 | /* |
@@ -684,7 +710,7 @@ restart: | |||
684 | * Now we know how many domains to create. | 710 | * Now we know how many domains to create. |
685 | * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. | 711 | * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. |
686 | */ | 712 | */ |
687 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); | 713 | doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); |
688 | if (!doms) | 714 | if (!doms) |
689 | goto done; | 715 | goto done; |
690 | 716 | ||
@@ -696,7 +722,7 @@ restart: | |||
696 | 722 | ||
697 | for (nslot = 0, i = 0; i < csn; i++) { | 723 | for (nslot = 0, i = 0; i < csn; i++) { |
698 | struct cpuset *a = csa[i]; | 724 | struct cpuset *a = csa[i]; |
699 | cpumask_t *dp; | 725 | struct cpumask *dp; |
700 | int apn = a->pn; | 726 | int apn = a->pn; |
701 | 727 | ||
702 | if (apn < 0) { | 728 | if (apn < 0) { |
@@ -719,14 +745,14 @@ restart: | |||
719 | continue; | 745 | continue; |
720 | } | 746 | } |
721 | 747 | ||
722 | cpus_clear(*dp); | 748 | cpumask_clear(dp); |
723 | if (dattr) | 749 | if (dattr) |
724 | *(dattr + nslot) = SD_ATTR_INIT; | 750 | *(dattr + nslot) = SD_ATTR_INIT; |
725 | for (j = i; j < csn; j++) { | 751 | for (j = i; j < csn; j++) { |
726 | struct cpuset *b = csa[j]; | 752 | struct cpuset *b = csa[j]; |
727 | 753 | ||
728 | if (apn == b->pn) { | 754 | if (apn == b->pn) { |
729 | cpus_or(*dp, *dp, b->cpus_allowed); | 755 | cpumask_or(dp, dp, b->cpus_allowed); |
730 | if (dattr) | 756 | if (dattr) |
731 | update_domain_attr_tree(dattr + nslot, b); | 757 | update_domain_attr_tree(dattr + nslot, b); |
732 | 758 | ||
@@ -766,7 +792,7 @@ done: | |||
766 | static void do_rebuild_sched_domains(struct work_struct *unused) | 792 | static void do_rebuild_sched_domains(struct work_struct *unused) |
767 | { | 793 | { |
768 | struct sched_domain_attr *attr; | 794 | struct sched_domain_attr *attr; |
769 | cpumask_t *doms; | 795 | struct cpumask *doms; |
770 | int ndoms; | 796 | int ndoms; |
771 | 797 | ||
772 | get_online_cpus(); | 798 | get_online_cpus(); |
@@ -835,7 +861,7 @@ void rebuild_sched_domains(void) | |||
835 | static int cpuset_test_cpumask(struct task_struct *tsk, | 861 | static int cpuset_test_cpumask(struct task_struct *tsk, |
836 | struct cgroup_scanner *scan) | 862 | struct cgroup_scanner *scan) |
837 | { | 863 | { |
838 | return !cpus_equal(tsk->cpus_allowed, | 864 | return !cpumask_equal(&tsk->cpus_allowed, |
839 | (cgroup_cs(scan->cg))->cpus_allowed); | 865 | (cgroup_cs(scan->cg))->cpus_allowed); |
840 | } | 866 | } |
841 | 867 | ||
@@ -853,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk, | |||
853 | static void cpuset_change_cpumask(struct task_struct *tsk, | 879 | static void cpuset_change_cpumask(struct task_struct *tsk, |
854 | struct cgroup_scanner *scan) | 880 | struct cgroup_scanner *scan) |
855 | { | 881 | { |
856 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); | 882 | set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed)); |
857 | } | 883 | } |
858 | 884 | ||
859 | /** | 885 | /** |
@@ -885,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap) | |||
885 | * @cs: the cpuset to consider | 911 | * @cs: the cpuset to consider |
886 | * @buf: buffer of cpu numbers written to this cpuset | 912 | * @buf: buffer of cpu numbers written to this cpuset |
887 | */ | 913 | */ |
888 | static int update_cpumask(struct cpuset *cs, const char *buf) | 914 | static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, |
915 | const char *buf) | ||
889 | { | 916 | { |
890 | struct ptr_heap heap; | 917 | struct ptr_heap heap; |
891 | struct cpuset trialcs; | ||
892 | int retval; | 918 | int retval; |
893 | int is_load_balanced; | 919 | int is_load_balanced; |
894 | 920 | ||
@@ -896,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf) | |||
896 | if (cs == &top_cpuset) | 922 | if (cs == &top_cpuset) |
897 | return -EACCES; | 923 | return -EACCES; |
898 | 924 | ||
899 | trialcs = *cs; | ||
900 | |||
901 | /* | 925 | /* |
902 | * An empty cpus_allowed is ok only if the cpuset has no tasks. | 926 | * An empty cpus_allowed is ok only if the cpuset has no tasks. |
903 | * Since cpulist_parse() fails on an empty mask, we special case | 927 | * Since cpulist_parse() fails on an empty mask, we special case |
@@ -905,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf) | |||
905 | * with tasks have cpus. | 929 | * with tasks have cpus. |
906 | */ | 930 | */ |
907 | if (!*buf) { | 931 | if (!*buf) { |
908 | cpus_clear(trialcs.cpus_allowed); | 932 | cpumask_clear(trialcs->cpus_allowed); |
909 | } else { | 933 | } else { |
910 | retval = cpulist_parse(buf, &trialcs.cpus_allowed); | 934 | retval = cpulist_parse(buf, trialcs->cpus_allowed); |
911 | if (retval < 0) | 935 | if (retval < 0) |
912 | return retval; | 936 | return retval; |
913 | 937 | ||
914 | if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) | 938 | if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask)) |
915 | return -EINVAL; | 939 | return -EINVAL; |
916 | } | 940 | } |
917 | retval = validate_change(cs, &trialcs); | 941 | retval = validate_change(cs, trialcs); |
918 | if (retval < 0) | 942 | if (retval < 0) |
919 | return retval; | 943 | return retval; |
920 | 944 | ||
921 | /* Nothing to do if the cpus didn't change */ | 945 | /* Nothing to do if the cpus didn't change */ |
922 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) | 946 | if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) |
923 | return 0; | 947 | return 0; |
924 | 948 | ||
925 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); | 949 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); |
926 | if (retval) | 950 | if (retval) |
927 | return retval; | 951 | return retval; |
928 | 952 | ||
929 | is_load_balanced = is_sched_load_balance(&trialcs); | 953 | is_load_balanced = is_sched_load_balance(trialcs); |
930 | 954 | ||
931 | mutex_lock(&callback_mutex); | 955 | mutex_lock(&callback_mutex); |
932 | cs->cpus_allowed = trialcs.cpus_allowed; | 956 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); |
933 | mutex_unlock(&callback_mutex); | 957 | mutex_unlock(&callback_mutex); |
934 | 958 | ||
935 | /* | 959 | /* |
@@ -1017,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) | |||
1017 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 1041 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
1018 | 1042 | ||
1019 | fudge = 10; /* spare mmarray[] slots */ | 1043 | fudge = 10; /* spare mmarray[] slots */ |
1020 | fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ | 1044 | fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */ |
1021 | retval = -ENOMEM; | 1045 | retval = -ENOMEM; |
1022 | 1046 | ||
1023 | /* | 1047 | /* |
@@ -1104,9 +1128,9 @@ done: | |||
1104 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | 1128 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind |
1105 | * their mempolicies to the cpusets new mems_allowed. | 1129 | * their mempolicies to the cpusets new mems_allowed. |
1106 | */ | 1130 | */ |
1107 | static int update_nodemask(struct cpuset *cs, const char *buf) | 1131 | static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, |
1132 | const char *buf) | ||
1108 | { | 1133 | { |
1109 | struct cpuset trialcs; | ||
1110 | nodemask_t oldmem; | 1134 | nodemask_t oldmem; |
1111 | int retval; | 1135 | int retval; |
1112 | 1136 | ||
@@ -1117,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf) | |||
1117 | if (cs == &top_cpuset) | 1141 | if (cs == &top_cpuset) |
1118 | return -EACCES; | 1142 | return -EACCES; |
1119 | 1143 | ||
1120 | trialcs = *cs; | ||
1121 | |||
1122 | /* | 1144 | /* |
1123 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | 1145 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. |
1124 | * Since nodelist_parse() fails on an empty mask, we special case | 1146 | * Since nodelist_parse() fails on an empty mask, we special case |
@@ -1126,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf) | |||
1126 | * with tasks have memory. | 1148 | * with tasks have memory. |
1127 | */ | 1149 | */ |
1128 | if (!*buf) { | 1150 | if (!*buf) { |
1129 | nodes_clear(trialcs.mems_allowed); | 1151 | nodes_clear(trialcs->mems_allowed); |
1130 | } else { | 1152 | } else { |
1131 | retval = nodelist_parse(buf, trialcs.mems_allowed); | 1153 | retval = nodelist_parse(buf, trialcs->mems_allowed); |
1132 | if (retval < 0) | 1154 | if (retval < 0) |
1133 | goto done; | 1155 | goto done; |
1134 | 1156 | ||
1135 | if (!nodes_subset(trialcs.mems_allowed, | 1157 | if (!nodes_subset(trialcs->mems_allowed, |
1136 | node_states[N_HIGH_MEMORY])) | 1158 | node_states[N_HIGH_MEMORY])) |
1137 | return -EINVAL; | 1159 | return -EINVAL; |
1138 | } | 1160 | } |
1139 | oldmem = cs->mems_allowed; | 1161 | oldmem = cs->mems_allowed; |
1140 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | 1162 | if (nodes_equal(oldmem, trialcs->mems_allowed)) { |
1141 | retval = 0; /* Too easy - nothing to do */ | 1163 | retval = 0; /* Too easy - nothing to do */ |
1142 | goto done; | 1164 | goto done; |
1143 | } | 1165 | } |
1144 | retval = validate_change(cs, &trialcs); | 1166 | retval = validate_change(cs, trialcs); |
1145 | if (retval < 0) | 1167 | if (retval < 0) |
1146 | goto done; | 1168 | goto done; |
1147 | 1169 | ||
1148 | mutex_lock(&callback_mutex); | 1170 | mutex_lock(&callback_mutex); |
1149 | cs->mems_allowed = trialcs.mems_allowed; | 1171 | cs->mems_allowed = trialcs->mems_allowed; |
1150 | cs->mems_generation = cpuset_mems_generation++; | 1172 | cs->mems_generation = cpuset_mems_generation++; |
1151 | mutex_unlock(&callback_mutex); | 1173 | mutex_unlock(&callback_mutex); |
1152 | 1174 | ||
@@ -1167,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1167 | 1189 | ||
1168 | if (val != cs->relax_domain_level) { | 1190 | if (val != cs->relax_domain_level) { |
1169 | cs->relax_domain_level = val; | 1191 | cs->relax_domain_level = val; |
1170 | if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) | 1192 | if (!cpumask_empty(cs->cpus_allowed) && |
1193 | is_sched_load_balance(cs)) | ||
1171 | async_rebuild_sched_domains(); | 1194 | async_rebuild_sched_domains(); |
1172 | } | 1195 | } |
1173 | 1196 | ||
@@ -1186,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1186 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | 1209 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, |
1187 | int turning_on) | 1210 | int turning_on) |
1188 | { | 1211 | { |
1189 | struct cpuset trialcs; | 1212 | struct cpuset *trialcs; |
1190 | int err; | 1213 | int err; |
1191 | int balance_flag_changed; | 1214 | int balance_flag_changed; |
1192 | 1215 | ||
1193 | trialcs = *cs; | 1216 | trialcs = alloc_trial_cpuset(cs); |
1217 | if (!trialcs) | ||
1218 | return -ENOMEM; | ||
1219 | |||
1194 | if (turning_on) | 1220 | if (turning_on) |
1195 | set_bit(bit, &trialcs.flags); | 1221 | set_bit(bit, &trialcs->flags); |
1196 | else | 1222 | else |
1197 | clear_bit(bit, &trialcs.flags); | 1223 | clear_bit(bit, &trialcs->flags); |
1198 | 1224 | ||
1199 | err = validate_change(cs, &trialcs); | 1225 | err = validate_change(cs, trialcs); |
1200 | if (err < 0) | 1226 | if (err < 0) |
1201 | return err; | 1227 | goto out; |
1202 | 1228 | ||
1203 | balance_flag_changed = (is_sched_load_balance(cs) != | 1229 | balance_flag_changed = (is_sched_load_balance(cs) != |
1204 | is_sched_load_balance(&trialcs)); | 1230 | is_sched_load_balance(trialcs)); |
1205 | 1231 | ||
1206 | mutex_lock(&callback_mutex); | 1232 | mutex_lock(&callback_mutex); |
1207 | cs->flags = trialcs.flags; | 1233 | cs->flags = trialcs->flags; |
1208 | mutex_unlock(&callback_mutex); | 1234 | mutex_unlock(&callback_mutex); |
1209 | 1235 | ||
1210 | if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed) | 1236 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) |
1211 | async_rebuild_sched_domains(); | 1237 | async_rebuild_sched_domains(); |
1212 | 1238 | ||
1213 | return 0; | 1239 | out: |
1240 | free_trial_cpuset(trialcs); | ||
1241 | return err; | ||
1214 | } | 1242 | } |
1215 | 1243 | ||
1216 | /* | 1244 | /* |
@@ -1311,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1311 | return val; | 1339 | return val; |
1312 | } | 1340 | } |
1313 | 1341 | ||
1342 | /* Protected by cgroup_lock */ | ||
1343 | static cpumask_var_t cpus_attach; | ||
1344 | |||
1314 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1345 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
1315 | static int cpuset_can_attach(struct cgroup_subsys *ss, | 1346 | static int cpuset_can_attach(struct cgroup_subsys *ss, |
1316 | struct cgroup *cont, struct task_struct *tsk) | 1347 | struct cgroup *cont, struct task_struct *tsk) |
1317 | { | 1348 | { |
1318 | struct cpuset *cs = cgroup_cs(cont); | 1349 | struct cpuset *cs = cgroup_cs(cont); |
1350 | int ret = 0; | ||
1319 | 1351 | ||
1320 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1352 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
1321 | return -ENOSPC; | 1353 | return -ENOSPC; |
1322 | if (tsk->flags & PF_THREAD_BOUND) { | ||
1323 | cpumask_t mask; | ||
1324 | 1354 | ||
1355 | if (tsk->flags & PF_THREAD_BOUND) { | ||
1325 | mutex_lock(&callback_mutex); | 1356 | mutex_lock(&callback_mutex); |
1326 | mask = cs->cpus_allowed; | 1357 | if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed)) |
1358 | ret = -EINVAL; | ||
1327 | mutex_unlock(&callback_mutex); | 1359 | mutex_unlock(&callback_mutex); |
1328 | if (!cpus_equal(tsk->cpus_allowed, mask)) | ||
1329 | return -EINVAL; | ||
1330 | } | 1360 | } |
1331 | 1361 | ||
1332 | return security_task_setscheduler(tsk, 0, NULL); | 1362 | return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL); |
1333 | } | 1363 | } |
1334 | 1364 | ||
1335 | static void cpuset_attach(struct cgroup_subsys *ss, | 1365 | static void cpuset_attach(struct cgroup_subsys *ss, |
1336 | struct cgroup *cont, struct cgroup *oldcont, | 1366 | struct cgroup *cont, struct cgroup *oldcont, |
1337 | struct task_struct *tsk) | 1367 | struct task_struct *tsk) |
1338 | { | 1368 | { |
1339 | cpumask_t cpus; | ||
1340 | nodemask_t from, to; | 1369 | nodemask_t from, to; |
1341 | struct mm_struct *mm; | 1370 | struct mm_struct *mm; |
1342 | struct cpuset *cs = cgroup_cs(cont); | 1371 | struct cpuset *cs = cgroup_cs(cont); |
1343 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1372 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1344 | int err; | 1373 | int err; |
1345 | 1374 | ||
1346 | mutex_lock(&callback_mutex); | 1375 | if (cs == &top_cpuset) { |
1347 | guarantee_online_cpus(cs, &cpus); | 1376 | cpumask_copy(cpus_attach, cpu_possible_mask); |
1348 | err = set_cpus_allowed_ptr(tsk, &cpus); | 1377 | } else { |
1349 | mutex_unlock(&callback_mutex); | 1378 | mutex_lock(&callback_mutex); |
1379 | guarantee_online_cpus(cs, cpus_attach); | ||
1380 | mutex_unlock(&callback_mutex); | ||
1381 | } | ||
1382 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | ||
1350 | if (err) | 1383 | if (err) |
1351 | return; | 1384 | return; |
1352 | 1385 | ||
@@ -1359,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss, | |||
1359 | cpuset_migrate_mm(mm, &from, &to); | 1392 | cpuset_migrate_mm(mm, &from, &to); |
1360 | mmput(mm); | 1393 | mmput(mm); |
1361 | } | 1394 | } |
1362 | |||
1363 | } | 1395 | } |
1364 | 1396 | ||
1365 | /* The various types of files and directories in a cpuset file system */ | 1397 | /* The various types of files and directories in a cpuset file system */ |
@@ -1454,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1454 | const char *buf) | 1486 | const char *buf) |
1455 | { | 1487 | { |
1456 | int retval = 0; | 1488 | int retval = 0; |
1489 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1490 | struct cpuset *trialcs; | ||
1457 | 1491 | ||
1458 | if (!cgroup_lock_live_group(cgrp)) | 1492 | if (!cgroup_lock_live_group(cgrp)) |
1459 | return -ENODEV; | 1493 | return -ENODEV; |
1460 | 1494 | ||
1495 | trialcs = alloc_trial_cpuset(cs); | ||
1496 | if (!trialcs) | ||
1497 | return -ENOMEM; | ||
1498 | |||
1461 | switch (cft->private) { | 1499 | switch (cft->private) { |
1462 | case FILE_CPULIST: | 1500 | case FILE_CPULIST: |
1463 | retval = update_cpumask(cgroup_cs(cgrp), buf); | 1501 | retval = update_cpumask(cs, trialcs, buf); |
1464 | break; | 1502 | break; |
1465 | case FILE_MEMLIST: | 1503 | case FILE_MEMLIST: |
1466 | retval = update_nodemask(cgroup_cs(cgrp), buf); | 1504 | retval = update_nodemask(cs, trialcs, buf); |
1467 | break; | 1505 | break; |
1468 | default: | 1506 | default: |
1469 | retval = -EINVAL; | 1507 | retval = -EINVAL; |
1470 | break; | 1508 | break; |
1471 | } | 1509 | } |
1510 | |||
1511 | free_trial_cpuset(trialcs); | ||
1472 | cgroup_unlock(); | 1512 | cgroup_unlock(); |
1473 | return retval; | 1513 | return retval; |
1474 | } | 1514 | } |
@@ -1487,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1487 | 1527 | ||
1488 | static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | 1528 | static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) |
1489 | { | 1529 | { |
1490 | cpumask_t mask; | 1530 | int ret; |
1491 | 1531 | ||
1492 | mutex_lock(&callback_mutex); | 1532 | mutex_lock(&callback_mutex); |
1493 | mask = cs->cpus_allowed; | 1533 | ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); |
1494 | mutex_unlock(&callback_mutex); | 1534 | mutex_unlock(&callback_mutex); |
1495 | 1535 | ||
1496 | return cpulist_scnprintf(page, PAGE_SIZE, &mask); | 1536 | return ret; |
1497 | } | 1537 | } |
1498 | 1538 | ||
1499 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | 1539 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) |
@@ -1729,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
1729 | parent_cs = cgroup_cs(parent); | 1769 | parent_cs = cgroup_cs(parent); |
1730 | 1770 | ||
1731 | cs->mems_allowed = parent_cs->mems_allowed; | 1771 | cs->mems_allowed = parent_cs->mems_allowed; |
1732 | cs->cpus_allowed = parent_cs->cpus_allowed; | 1772 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); |
1733 | return; | 1773 | return; |
1734 | } | 1774 | } |
1735 | 1775 | ||
@@ -1755,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1755 | cs = kmalloc(sizeof(*cs), GFP_KERNEL); | 1795 | cs = kmalloc(sizeof(*cs), GFP_KERNEL); |
1756 | if (!cs) | 1796 | if (!cs) |
1757 | return ERR_PTR(-ENOMEM); | 1797 | return ERR_PTR(-ENOMEM); |
1798 | if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) { | ||
1799 | kfree(cs); | ||
1800 | return ERR_PTR(-ENOMEM); | ||
1801 | } | ||
1758 | 1802 | ||
1759 | cpuset_update_task_memory_state(); | 1803 | cpuset_update_task_memory_state(); |
1760 | cs->flags = 0; | 1804 | cs->flags = 0; |
@@ -1763,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1763 | if (is_spread_slab(parent)) | 1807 | if (is_spread_slab(parent)) |
1764 | set_bit(CS_SPREAD_SLAB, &cs->flags); | 1808 | set_bit(CS_SPREAD_SLAB, &cs->flags); |
1765 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 1809 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
1766 | cpus_clear(cs->cpus_allowed); | 1810 | cpumask_clear(cs->cpus_allowed); |
1767 | nodes_clear(cs->mems_allowed); | 1811 | nodes_clear(cs->mems_allowed); |
1768 | cs->mems_generation = cpuset_mems_generation++; | 1812 | cs->mems_generation = cpuset_mems_generation++; |
1769 | fmeter_init(&cs->fmeter); | 1813 | fmeter_init(&cs->fmeter); |
@@ -1790,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1790 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); | 1834 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); |
1791 | 1835 | ||
1792 | number_of_cpusets--; | 1836 | number_of_cpusets--; |
1837 | free_cpumask_var(cs->cpus_allowed); | ||
1793 | kfree(cs); | 1838 | kfree(cs); |
1794 | } | 1839 | } |
1795 | 1840 | ||
@@ -1813,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = { | |||
1813 | 1858 | ||
1814 | int __init cpuset_init_early(void) | 1859 | int __init cpuset_init_early(void) |
1815 | { | 1860 | { |
1861 | alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed); | ||
1862 | |||
1816 | top_cpuset.mems_generation = cpuset_mems_generation++; | 1863 | top_cpuset.mems_generation = cpuset_mems_generation++; |
1817 | return 0; | 1864 | return 0; |
1818 | } | 1865 | } |
@@ -1828,7 +1875,7 @@ int __init cpuset_init(void) | |||
1828 | { | 1875 | { |
1829 | int err = 0; | 1876 | int err = 0; |
1830 | 1877 | ||
1831 | cpus_setall(top_cpuset.cpus_allowed); | 1878 | cpumask_setall(top_cpuset.cpus_allowed); |
1832 | nodes_setall(top_cpuset.mems_allowed); | 1879 | nodes_setall(top_cpuset.mems_allowed); |
1833 | 1880 | ||
1834 | fmeter_init(&top_cpuset.fmeter); | 1881 | fmeter_init(&top_cpuset.fmeter); |
@@ -1840,6 +1887,9 @@ int __init cpuset_init(void) | |||
1840 | if (err < 0) | 1887 | if (err < 0) |
1841 | return err; | 1888 | return err; |
1842 | 1889 | ||
1890 | if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) | ||
1891 | BUG(); | ||
1892 | |||
1843 | number_of_cpusets = 1; | 1893 | number_of_cpusets = 1; |
1844 | return 0; | 1894 | return 0; |
1845 | } | 1895 | } |
@@ -1914,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
1914 | * has online cpus, so can't be empty). | 1964 | * has online cpus, so can't be empty). |
1915 | */ | 1965 | */ |
1916 | parent = cs->parent; | 1966 | parent = cs->parent; |
1917 | while (cpus_empty(parent->cpus_allowed) || | 1967 | while (cpumask_empty(parent->cpus_allowed) || |
1918 | nodes_empty(parent->mems_allowed)) | 1968 | nodes_empty(parent->mems_allowed)) |
1919 | parent = parent->parent; | 1969 | parent = parent->parent; |
1920 | 1970 | ||
@@ -1955,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
1955 | } | 2005 | } |
1956 | 2006 | ||
1957 | /* Continue past cpusets with all cpus, mems online */ | 2007 | /* Continue past cpusets with all cpus, mems online */ |
1958 | if (cpus_subset(cp->cpus_allowed, cpu_online_map) && | 2008 | if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) && |
1959 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2009 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
1960 | continue; | 2010 | continue; |
1961 | 2011 | ||
@@ -1963,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
1963 | 2013 | ||
1964 | /* Remove offline cpus and mems from this cpuset. */ | 2014 | /* Remove offline cpus and mems from this cpuset. */ |
1965 | mutex_lock(&callback_mutex); | 2015 | mutex_lock(&callback_mutex); |
1966 | cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); | 2016 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, |
2017 | cpu_online_mask); | ||
1967 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2018 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
1968 | node_states[N_HIGH_MEMORY]); | 2019 | node_states[N_HIGH_MEMORY]); |
1969 | mutex_unlock(&callback_mutex); | 2020 | mutex_unlock(&callback_mutex); |
1970 | 2021 | ||
1971 | /* Move tasks from the empty cpuset to a parent */ | 2022 | /* Move tasks from the empty cpuset to a parent */ |
1972 | if (cpus_empty(cp->cpus_allowed) || | 2023 | if (cpumask_empty(cp->cpus_allowed) || |
1973 | nodes_empty(cp->mems_allowed)) | 2024 | nodes_empty(cp->mems_allowed)) |
1974 | remove_tasks_in_empty_cpuset(cp); | 2025 | remove_tasks_in_empty_cpuset(cp); |
1975 | else { | 2026 | else { |
@@ -1995,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
1995 | unsigned long phase, void *unused_cpu) | 2046 | unsigned long phase, void *unused_cpu) |
1996 | { | 2047 | { |
1997 | struct sched_domain_attr *attr; | 2048 | struct sched_domain_attr *attr; |
1998 | cpumask_t *doms; | 2049 | struct cpumask *doms; |
1999 | int ndoms; | 2050 | int ndoms; |
2000 | 2051 | ||
2001 | switch (phase) { | 2052 | switch (phase) { |
@@ -2010,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2010 | } | 2061 | } |
2011 | 2062 | ||
2012 | cgroup_lock(); | 2063 | cgroup_lock(); |
2013 | top_cpuset.cpus_allowed = cpu_online_map; | 2064 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); |
2014 | scan_for_empty_cpusets(&top_cpuset); | 2065 | scan_for_empty_cpusets(&top_cpuset); |
2015 | ndoms = generate_sched_domains(&doms, &attr); | 2066 | ndoms = generate_sched_domains(&doms, &attr); |
2016 | cgroup_unlock(); | 2067 | cgroup_unlock(); |
@@ -2055,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2055 | 2106 | ||
2056 | void __init cpuset_init_smp(void) | 2107 | void __init cpuset_init_smp(void) |
2057 | { | 2108 | { |
2058 | top_cpuset.cpus_allowed = cpu_online_map; | 2109 | cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask); |
2059 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2110 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2060 | 2111 | ||
2061 | hotcpu_notifier(cpuset_track_online_cpus, 0); | 2112 | hotcpu_notifier(cpuset_track_online_cpus, 0); |
@@ -2065,15 +2116,15 @@ void __init cpuset_init_smp(void) | |||
2065 | /** | 2116 | /** |
2066 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. | 2117 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. |
2067 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. | 2118 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. |
2068 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. | 2119 | * @pmask: pointer to struct cpumask variable to receive cpus_allowed set. |
2069 | * | 2120 | * |
2070 | * Description: Returns the cpumask_t cpus_allowed of the cpuset | 2121 | * Description: Returns the cpumask_var_t cpus_allowed of the cpuset |
2071 | * attached to the specified @tsk. Guaranteed to return some non-empty | 2122 | * attached to the specified @tsk. Guaranteed to return some non-empty |
2072 | * subset of cpu_online_map, even if this means going outside the | 2123 | * subset of cpu_online_map, even if this means going outside the |
2073 | * tasks cpuset. | 2124 | * tasks cpuset. |
2074 | **/ | 2125 | **/ |
2075 | 2126 | ||
2076 | void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) | 2127 | void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) |
2077 | { | 2128 | { |
2078 | mutex_lock(&callback_mutex); | 2129 | mutex_lock(&callback_mutex); |
2079 | cpuset_cpus_allowed_locked(tsk, pmask); | 2130 | cpuset_cpus_allowed_locked(tsk, pmask); |
@@ -2084,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) | |||
2084 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. | 2135 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. |
2085 | * Must be called with callback_mutex held. | 2136 | * Must be called with callback_mutex held. |
2086 | **/ | 2137 | **/ |
2087 | void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask) | 2138 | void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask) |
2088 | { | 2139 | { |
2089 | task_lock(tsk); | 2140 | task_lock(tsk); |
2090 | guarantee_online_cpus(task_cs(tsk), pmask); | 2141 | guarantee_online_cpus(task_cs(tsk), pmask); |
diff --git a/kernel/cred.c b/kernel/cred.c index ff7bc071991c..3a039189d707 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -372,7 +372,8 @@ int commit_creds(struct cred *new) | |||
372 | old->fsuid != new->fsuid || | 372 | old->fsuid != new->fsuid || |
373 | old->fsgid != new->fsgid || | 373 | old->fsgid != new->fsgid || |
374 | !cap_issubset(new->cap_permitted, old->cap_permitted)) { | 374 | !cap_issubset(new->cap_permitted, old->cap_permitted)) { |
375 | set_dumpable(task->mm, suid_dumpable); | 375 | if (task->mm) |
376 | set_dumpable(task->mm, suid_dumpable); | ||
376 | task->pdeath_signal = 0; | 377 | task->pdeath_signal = 0; |
377 | smp_wmb(); | 378 | smp_wmb(); |
378 | } | 379 | } |
@@ -506,6 +507,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
506 | else | 507 | else |
507 | old = get_cred(&init_cred); | 508 | old = get_cred(&init_cred); |
508 | 509 | ||
510 | *new = *old; | ||
509 | get_uid(new->user); | 511 | get_uid(new->user); |
510 | get_group_info(new->group_info); | 512 | get_group_info(new->group_info); |
511 | 513 | ||
@@ -529,6 +531,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
529 | 531 | ||
530 | error: | 532 | error: |
531 | put_cred(new); | 533 | put_cred(new); |
534 | put_cred(old); | ||
532 | return NULL; | 535 | return NULL; |
533 | } | 536 | } |
534 | EXPORT_SYMBOL(prepare_kernel_cred); | 537 | EXPORT_SYMBOL(prepare_kernel_cred); |
diff --git a/kernel/fork.c b/kernel/fork.c index 7b8f2a78be3d..1d68f1255dd8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1126,12 +1126,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1126 | 1126 | ||
1127 | if (pid != &init_struct_pid) { | 1127 | if (pid != &init_struct_pid) { |
1128 | retval = -ENOMEM; | 1128 | retval = -ENOMEM; |
1129 | pid = alloc_pid(task_active_pid_ns(p)); | 1129 | pid = alloc_pid(p->nsproxy->pid_ns); |
1130 | if (!pid) | 1130 | if (!pid) |
1131 | goto bad_fork_cleanup_io; | 1131 | goto bad_fork_cleanup_io; |
1132 | 1132 | ||
1133 | if (clone_flags & CLONE_NEWPID) { | 1133 | if (clone_flags & CLONE_NEWPID) { |
1134 | retval = pid_ns_prepare_proc(task_active_pid_ns(p)); | 1134 | retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); |
1135 | if (retval < 0) | 1135 | if (retval < 0) |
1136 | goto bad_fork_free_pid; | 1136 | goto bad_fork_free_pid; |
1137 | } | 1137 | } |
@@ -1481,12 +1481,10 @@ void __init proc_caches_init(void) | |||
1481 | fs_cachep = kmem_cache_create("fs_cache", | 1481 | fs_cachep = kmem_cache_create("fs_cache", |
1482 | sizeof(struct fs_struct), 0, | 1482 | sizeof(struct fs_struct), 0, |
1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1483 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
1484 | vm_area_cachep = kmem_cache_create("vm_area_struct", | ||
1485 | sizeof(struct vm_area_struct), 0, | ||
1486 | SLAB_PANIC, NULL); | ||
1487 | mm_cachep = kmem_cache_create("mm_struct", | 1484 | mm_cachep = kmem_cache_create("mm_struct", |
1488 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1485 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1489 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 1486 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
1487 | mmap_init(); | ||
1490 | } | 1488 | } |
1491 | 1489 | ||
1492 | /* | 1490 | /* |
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index cc0f7321b8ce..1de9700f416e 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
12 | #include <linux/delay.h> | 12 | #include <linux/delay.h> |
13 | #include <linux/async.h> | ||
13 | 14 | ||
14 | #include "internals.h" | 15 | #include "internals.h" |
15 | 16 | ||
@@ -34,6 +35,10 @@ unsigned long probe_irq_on(void) | |||
34 | unsigned int status; | 35 | unsigned int status; |
35 | int i; | 36 | int i; |
36 | 37 | ||
38 | /* | ||
39 | * quiesce the kernel, or at least the asynchronous portion | ||
40 | */ | ||
41 | async_synchronize_full(); | ||
37 | mutex_lock(&probing_active); | 42 | mutex_lock(&probing_active); |
38 | /* | 43 | /* |
39 | * something may have generated an irq long ago and we want to | 44 | * something may have generated an irq long ago and we want to |
diff --git a/kernel/module.c b/kernel/module.c index 496dcb57b608..c9332c90d5a0 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <asm/sections.h> | 50 | #include <asm/sections.h> |
51 | #include <linux/tracepoint.h> | 51 | #include <linux/tracepoint.h> |
52 | #include <linux/ftrace.h> | 52 | #include <linux/ftrace.h> |
53 | #include <linux/async.h> | ||
53 | 54 | ||
54 | #if 0 | 55 | #if 0 |
55 | #define DEBUGP printk | 56 | #define DEBUGP printk |
@@ -816,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) | |||
816 | mod->exit(); | 817 | mod->exit(); |
817 | blocking_notifier_call_chain(&module_notify_list, | 818 | blocking_notifier_call_chain(&module_notify_list, |
818 | MODULE_STATE_GOING, mod); | 819 | MODULE_STATE_GOING, mod); |
820 | async_synchronize_full(); | ||
819 | mutex_lock(&module_mutex); | 821 | mutex_lock(&module_mutex); |
820 | /* Store the name of the last unloaded module for diagnostic purposes */ | 822 | /* Store the name of the last unloaded module for diagnostic purposes */ |
821 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); | 823 | strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 43c2111cd54d..78bc3fdac0d2 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -13,7 +13,6 @@ | |||
13 | 13 | ||
14 | struct ns_cgroup { | 14 | struct ns_cgroup { |
15 | struct cgroup_subsys_state css; | 15 | struct cgroup_subsys_state css; |
16 | spinlock_t lock; | ||
17 | }; | 16 | }; |
18 | 17 | ||
19 | struct cgroup_subsys ns_subsys; | 18 | struct cgroup_subsys ns_subsys; |
@@ -84,7 +83,6 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, | |||
84 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); | 83 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); |
85 | if (!ns_cgroup) | 84 | if (!ns_cgroup) |
86 | return ERR_PTR(-ENOMEM); | 85 | return ERR_PTR(-ENOMEM); |
87 | spin_lock_init(&ns_cgroup->lock); | ||
88 | return &ns_cgroup->css; | 86 | return &ns_cgroup->css; |
89 | } | 87 | } |
90 | 88 | ||
diff --git a/kernel/pid.c b/kernel/pid.c index af9224cdd6c0..1b3586fe753a 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -474,6 +474,12 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) | |||
474 | } | 474 | } |
475 | EXPORT_SYMBOL(task_session_nr_ns); | 475 | EXPORT_SYMBOL(task_session_nr_ns); |
476 | 476 | ||
477 | struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) | ||
478 | { | ||
479 | return ns_of_pid(task_pid(tsk)); | ||
480 | } | ||
481 | EXPORT_SYMBOL_GPL(task_active_pid_ns); | ||
482 | |||
477 | /* | 483 | /* |
478 | * Used by proc to find the first pid that is greater than or equal to nr. | 484 | * Used by proc to find the first pid that is greater than or equal to nr. |
479 | * | 485 | * |
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f77d3819ef57..45e8541ab7e3 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -258,12 +258,12 @@ int hibernation_snapshot(int platform_mode) | |||
258 | { | 258 | { |
259 | int error; | 259 | int error; |
260 | 260 | ||
261 | /* Free memory before shutting down devices. */ | 261 | error = platform_begin(platform_mode); |
262 | error = swsusp_shrink_memory(); | ||
263 | if (error) | 262 | if (error) |
264 | return error; | 263 | return error; |
265 | 264 | ||
266 | error = platform_begin(platform_mode); | 265 | /* Free memory before shutting down devices. */ |
266 | error = swsusp_shrink_memory(); | ||
267 | if (error) | 267 | if (error) |
268 | goto Close; | 268 | goto Close; |
269 | 269 | ||
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5d2ab836e998..f5fc2d7680f2 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/syscalls.h> | 25 | #include <linux/syscalls.h> |
26 | #include <linux/console.h> | 26 | #include <linux/console.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/list.h> | ||
28 | 29 | ||
29 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
30 | #include <asm/mmu_context.h> | 31 | #include <asm/mmu_context.h> |
@@ -192,12 +193,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) | |||
192 | return ret; | 193 | return ret; |
193 | } | 194 | } |
194 | 195 | ||
195 | static void chain_free(struct chain_allocator *ca, int clear_page_nosave) | ||
196 | { | ||
197 | free_list_of_pages(ca->chain, clear_page_nosave); | ||
198 | memset(ca, 0, sizeof(struct chain_allocator)); | ||
199 | } | ||
200 | |||
201 | /** | 196 | /** |
202 | * Data types related to memory bitmaps. | 197 | * Data types related to memory bitmaps. |
203 | * | 198 | * |
@@ -233,7 +228,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave) | |||
233 | #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) | 228 | #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) |
234 | 229 | ||
235 | struct bm_block { | 230 | struct bm_block { |
236 | struct bm_block *next; /* next element of the list */ | 231 | struct list_head hook; /* hook into a list of bitmap blocks */ |
237 | unsigned long start_pfn; /* pfn represented by the first bit */ | 232 | unsigned long start_pfn; /* pfn represented by the first bit */ |
238 | unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ | 233 | unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ |
239 | unsigned long *data; /* bitmap representing pages */ | 234 | unsigned long *data; /* bitmap representing pages */ |
@@ -244,24 +239,15 @@ static inline unsigned long bm_block_bits(struct bm_block *bb) | |||
244 | return bb->end_pfn - bb->start_pfn; | 239 | return bb->end_pfn - bb->start_pfn; |
245 | } | 240 | } |
246 | 241 | ||
247 | struct zone_bitmap { | ||
248 | struct zone_bitmap *next; /* next element of the list */ | ||
249 | unsigned long start_pfn; /* minimal pfn in this zone */ | ||
250 | unsigned long end_pfn; /* maximal pfn in this zone plus 1 */ | ||
251 | struct bm_block *bm_blocks; /* list of bitmap blocks */ | ||
252 | struct bm_block *cur_block; /* recently used bitmap block */ | ||
253 | }; | ||
254 | |||
255 | /* strcut bm_position is used for browsing memory bitmaps */ | 242 | /* strcut bm_position is used for browsing memory bitmaps */ |
256 | 243 | ||
257 | struct bm_position { | 244 | struct bm_position { |
258 | struct zone_bitmap *zone_bm; | ||
259 | struct bm_block *block; | 245 | struct bm_block *block; |
260 | int bit; | 246 | int bit; |
261 | }; | 247 | }; |
262 | 248 | ||
263 | struct memory_bitmap { | 249 | struct memory_bitmap { |
264 | struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */ | 250 | struct list_head blocks; /* list of bitmap blocks */ |
265 | struct linked_page *p_list; /* list of pages used to store zone | 251 | struct linked_page *p_list; /* list of pages used to store zone |
266 | * bitmap objects and bitmap block | 252 | * bitmap objects and bitmap block |
267 | * objects | 253 | * objects |
@@ -273,11 +259,7 @@ struct memory_bitmap { | |||
273 | 259 | ||
274 | static void memory_bm_position_reset(struct memory_bitmap *bm) | 260 | static void memory_bm_position_reset(struct memory_bitmap *bm) |
275 | { | 261 | { |
276 | struct zone_bitmap *zone_bm; | 262 | bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); |
277 | |||
278 | zone_bm = bm->zone_bm_list; | ||
279 | bm->cur.zone_bm = zone_bm; | ||
280 | bm->cur.block = zone_bm->bm_blocks; | ||
281 | bm->cur.bit = 0; | 263 | bm->cur.bit = 0; |
282 | } | 264 | } |
283 | 265 | ||
@@ -285,151 +267,184 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); | |||
285 | 267 | ||
286 | /** | 268 | /** |
287 | * create_bm_block_list - create a list of block bitmap objects | 269 | * create_bm_block_list - create a list of block bitmap objects |
270 | * @nr_blocks - number of blocks to allocate | ||
271 | * @list - list to put the allocated blocks into | ||
272 | * @ca - chain allocator to be used for allocating memory | ||
288 | */ | 273 | */ |
289 | 274 | static int create_bm_block_list(unsigned long pages, | |
290 | static inline struct bm_block * | 275 | struct list_head *list, |
291 | create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca) | 276 | struct chain_allocator *ca) |
292 | { | 277 | { |
293 | struct bm_block *bblist = NULL; | 278 | unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); |
294 | 279 | ||
295 | while (nr_blocks-- > 0) { | 280 | while (nr_blocks-- > 0) { |
296 | struct bm_block *bb; | 281 | struct bm_block *bb; |
297 | 282 | ||
298 | bb = chain_alloc(ca, sizeof(struct bm_block)); | 283 | bb = chain_alloc(ca, sizeof(struct bm_block)); |
299 | if (!bb) | 284 | if (!bb) |
300 | return NULL; | 285 | return -ENOMEM; |
301 | 286 | list_add(&bb->hook, list); | |
302 | bb->next = bblist; | ||
303 | bblist = bb; | ||
304 | } | 287 | } |
305 | return bblist; | 288 | |
289 | return 0; | ||
306 | } | 290 | } |
307 | 291 | ||
292 | struct mem_extent { | ||
293 | struct list_head hook; | ||
294 | unsigned long start; | ||
295 | unsigned long end; | ||
296 | }; | ||
297 | |||
308 | /** | 298 | /** |
309 | * create_zone_bm_list - create a list of zone bitmap objects | 299 | * free_mem_extents - free a list of memory extents |
300 | * @list - list of extents to empty | ||
310 | */ | 301 | */ |
302 | static void free_mem_extents(struct list_head *list) | ||
303 | { | ||
304 | struct mem_extent *ext, *aux; | ||
311 | 305 | ||
312 | static inline struct zone_bitmap * | 306 | list_for_each_entry_safe(ext, aux, list, hook) { |
313 | create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca) | 307 | list_del(&ext->hook); |
308 | kfree(ext); | ||
309 | } | ||
310 | } | ||
311 | |||
312 | /** | ||
313 | * create_mem_extents - create a list of memory extents representing | ||
314 | * contiguous ranges of PFNs | ||
315 | * @list - list to put the extents into | ||
316 | * @gfp_mask - mask to use for memory allocations | ||
317 | */ | ||
318 | static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) | ||
314 | { | 319 | { |
315 | struct zone_bitmap *zbmlist = NULL; | 320 | struct zone *zone; |
316 | 321 | ||
317 | while (nr_zones-- > 0) { | 322 | INIT_LIST_HEAD(list); |
318 | struct zone_bitmap *zbm; | ||
319 | 323 | ||
320 | zbm = chain_alloc(ca, sizeof(struct zone_bitmap)); | 324 | for_each_zone(zone) { |
321 | if (!zbm) | 325 | unsigned long zone_start, zone_end; |
322 | return NULL; | 326 | struct mem_extent *ext, *cur, *aux; |
327 | |||
328 | if (!populated_zone(zone)) | ||
329 | continue; | ||
323 | 330 | ||
324 | zbm->next = zbmlist; | 331 | zone_start = zone->zone_start_pfn; |
325 | zbmlist = zbm; | 332 | zone_end = zone->zone_start_pfn + zone->spanned_pages; |
333 | |||
334 | list_for_each_entry(ext, list, hook) | ||
335 | if (zone_start <= ext->end) | ||
336 | break; | ||
337 | |||
338 | if (&ext->hook == list || zone_end < ext->start) { | ||
339 | /* New extent is necessary */ | ||
340 | struct mem_extent *new_ext; | ||
341 | |||
342 | new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); | ||
343 | if (!new_ext) { | ||
344 | free_mem_extents(list); | ||
345 | return -ENOMEM; | ||
346 | } | ||
347 | new_ext->start = zone_start; | ||
348 | new_ext->end = zone_end; | ||
349 | list_add_tail(&new_ext->hook, &ext->hook); | ||
350 | continue; | ||
351 | } | ||
352 | |||
353 | /* Merge this zone's range of PFNs with the existing one */ | ||
354 | if (zone_start < ext->start) | ||
355 | ext->start = zone_start; | ||
356 | if (zone_end > ext->end) | ||
357 | ext->end = zone_end; | ||
358 | |||
359 | /* More merging may be possible */ | ||
360 | cur = ext; | ||
361 | list_for_each_entry_safe_continue(cur, aux, list, hook) { | ||
362 | if (zone_end < cur->start) | ||
363 | break; | ||
364 | if (zone_end < cur->end) | ||
365 | ext->end = cur->end; | ||
366 | list_del(&cur->hook); | ||
367 | kfree(cur); | ||
368 | } | ||
326 | } | 369 | } |
327 | return zbmlist; | 370 | |
371 | return 0; | ||
328 | } | 372 | } |
329 | 373 | ||
330 | /** | 374 | /** |
331 | * memory_bm_create - allocate memory for a memory bitmap | 375 | * memory_bm_create - allocate memory for a memory bitmap |
332 | */ | 376 | */ |
333 | |||
334 | static int | 377 | static int |
335 | memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) | 378 | memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) |
336 | { | 379 | { |
337 | struct chain_allocator ca; | 380 | struct chain_allocator ca; |
338 | struct zone *zone; | 381 | struct list_head mem_extents; |
339 | struct zone_bitmap *zone_bm; | 382 | struct mem_extent *ext; |
340 | struct bm_block *bb; | 383 | int error; |
341 | unsigned int nr; | ||
342 | 384 | ||
343 | chain_init(&ca, gfp_mask, safe_needed); | 385 | chain_init(&ca, gfp_mask, safe_needed); |
386 | INIT_LIST_HEAD(&bm->blocks); | ||
344 | 387 | ||
345 | /* Compute the number of zones */ | 388 | error = create_mem_extents(&mem_extents, gfp_mask); |
346 | nr = 0; | 389 | if (error) |
347 | for_each_zone(zone) | 390 | return error; |
348 | if (populated_zone(zone)) | ||
349 | nr++; | ||
350 | |||
351 | /* Allocate the list of zones bitmap objects */ | ||
352 | zone_bm = create_zone_bm_list(nr, &ca); | ||
353 | bm->zone_bm_list = zone_bm; | ||
354 | if (!zone_bm) { | ||
355 | chain_free(&ca, PG_UNSAFE_CLEAR); | ||
356 | return -ENOMEM; | ||
357 | } | ||
358 | |||
359 | /* Initialize the zone bitmap objects */ | ||
360 | for_each_zone(zone) { | ||
361 | unsigned long pfn; | ||
362 | 391 | ||
363 | if (!populated_zone(zone)) | 392 | list_for_each_entry(ext, &mem_extents, hook) { |
364 | continue; | 393 | struct bm_block *bb; |
394 | unsigned long pfn = ext->start; | ||
395 | unsigned long pages = ext->end - ext->start; | ||
365 | 396 | ||
366 | zone_bm->start_pfn = zone->zone_start_pfn; | 397 | bb = list_entry(bm->blocks.prev, struct bm_block, hook); |
367 | zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages; | ||
368 | /* Allocate the list of bitmap block objects */ | ||
369 | nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); | ||
370 | bb = create_bm_block_list(nr, &ca); | ||
371 | zone_bm->bm_blocks = bb; | ||
372 | zone_bm->cur_block = bb; | ||
373 | if (!bb) | ||
374 | goto Free; | ||
375 | 398 | ||
376 | nr = zone->spanned_pages; | 399 | error = create_bm_block_list(pages, bm->blocks.prev, &ca); |
377 | pfn = zone->zone_start_pfn; | 400 | if (error) |
378 | /* Initialize the bitmap block objects */ | 401 | goto Error; |
379 | while (bb) { | ||
380 | unsigned long *ptr; | ||
381 | 402 | ||
382 | ptr = get_image_page(gfp_mask, safe_needed); | 403 | list_for_each_entry_continue(bb, &bm->blocks, hook) { |
383 | bb->data = ptr; | 404 | bb->data = get_image_page(gfp_mask, safe_needed); |
384 | if (!ptr) | 405 | if (!bb->data) { |
385 | goto Free; | 406 | error = -ENOMEM; |
407 | goto Error; | ||
408 | } | ||
386 | 409 | ||
387 | bb->start_pfn = pfn; | 410 | bb->start_pfn = pfn; |
388 | if (nr >= BM_BITS_PER_BLOCK) { | 411 | if (pages >= BM_BITS_PER_BLOCK) { |
389 | pfn += BM_BITS_PER_BLOCK; | 412 | pfn += BM_BITS_PER_BLOCK; |
390 | nr -= BM_BITS_PER_BLOCK; | 413 | pages -= BM_BITS_PER_BLOCK; |
391 | } else { | 414 | } else { |
392 | /* This is executed only once in the loop */ | 415 | /* This is executed only once in the loop */ |
393 | pfn += nr; | 416 | pfn += pages; |
394 | } | 417 | } |
395 | bb->end_pfn = pfn; | 418 | bb->end_pfn = pfn; |
396 | bb = bb->next; | ||
397 | } | 419 | } |
398 | zone_bm = zone_bm->next; | ||
399 | } | 420 | } |
421 | |||
400 | bm->p_list = ca.chain; | 422 | bm->p_list = ca.chain; |
401 | memory_bm_position_reset(bm); | 423 | memory_bm_position_reset(bm); |
402 | return 0; | 424 | Exit: |
425 | free_mem_extents(&mem_extents); | ||
426 | return error; | ||
403 | 427 | ||
404 | Free: | 428 | Error: |
405 | bm->p_list = ca.chain; | 429 | bm->p_list = ca.chain; |
406 | memory_bm_free(bm, PG_UNSAFE_CLEAR); | 430 | memory_bm_free(bm, PG_UNSAFE_CLEAR); |
407 | return -ENOMEM; | 431 | goto Exit; |
408 | } | 432 | } |
409 | 433 | ||
410 | /** | 434 | /** |
411 | * memory_bm_free - free memory occupied by the memory bitmap @bm | 435 | * memory_bm_free - free memory occupied by the memory bitmap @bm |
412 | */ | 436 | */ |
413 | |||
414 | static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | 437 | static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) |
415 | { | 438 | { |
416 | struct zone_bitmap *zone_bm; | 439 | struct bm_block *bb; |
417 | 440 | ||
418 | /* Free the list of bit blocks for each zone_bitmap object */ | 441 | list_for_each_entry(bb, &bm->blocks, hook) |
419 | zone_bm = bm->zone_bm_list; | 442 | if (bb->data) |
420 | while (zone_bm) { | 443 | free_image_page(bb->data, clear_nosave_free); |
421 | struct bm_block *bb; | ||
422 | 444 | ||
423 | bb = zone_bm->bm_blocks; | ||
424 | while (bb) { | ||
425 | if (bb->data) | ||
426 | free_image_page(bb->data, clear_nosave_free); | ||
427 | bb = bb->next; | ||
428 | } | ||
429 | zone_bm = zone_bm->next; | ||
430 | } | ||
431 | free_list_of_pages(bm->p_list, clear_nosave_free); | 445 | free_list_of_pages(bm->p_list, clear_nosave_free); |
432 | bm->zone_bm_list = NULL; | 446 | |
447 | INIT_LIST_HEAD(&bm->blocks); | ||
433 | } | 448 | } |
434 | 449 | ||
435 | /** | 450 | /** |
@@ -437,38 +452,33 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) | |||
437 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member | 452 | * to given pfn. The cur_zone_bm member of @bm and the cur_block member |
438 | * of @bm->cur_zone_bm are updated. | 453 | * of @bm->cur_zone_bm are updated. |
439 | */ | 454 | */ |
440 | |||
441 | static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, | 455 | static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, |
442 | void **addr, unsigned int *bit_nr) | 456 | void **addr, unsigned int *bit_nr) |
443 | { | 457 | { |
444 | struct zone_bitmap *zone_bm; | ||
445 | struct bm_block *bb; | 458 | struct bm_block *bb; |
446 | 459 | ||
447 | /* Check if the pfn is from the current zone */ | 460 | /* |
448 | zone_bm = bm->cur.zone_bm; | 461 | * Check if the pfn corresponds to the current bitmap block and find |
449 | if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { | 462 | * the block where it fits if this is not the case. |
450 | zone_bm = bm->zone_bm_list; | 463 | */ |
451 | /* We don't assume that the zones are sorted by pfns */ | 464 | bb = bm->cur.block; |
452 | while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) { | ||
453 | zone_bm = zone_bm->next; | ||
454 | |||
455 | if (!zone_bm) | ||
456 | return -EFAULT; | ||
457 | } | ||
458 | bm->cur.zone_bm = zone_bm; | ||
459 | } | ||
460 | /* Check if the pfn corresponds to the current bitmap block */ | ||
461 | bb = zone_bm->cur_block; | ||
462 | if (pfn < bb->start_pfn) | 465 | if (pfn < bb->start_pfn) |
463 | bb = zone_bm->bm_blocks; | 466 | list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) |
467 | if (pfn >= bb->start_pfn) | ||
468 | break; | ||
464 | 469 | ||
465 | while (pfn >= bb->end_pfn) { | 470 | if (pfn >= bb->end_pfn) |
466 | bb = bb->next; | 471 | list_for_each_entry_continue(bb, &bm->blocks, hook) |
472 | if (pfn >= bb->start_pfn && pfn < bb->end_pfn) | ||
473 | break; | ||
467 | 474 | ||
468 | BUG_ON(!bb); | 475 | if (&bb->hook == &bm->blocks) |
469 | } | 476 | return -EFAULT; |
470 | zone_bm->cur_block = bb; | 477 | |
478 | /* The block has been found */ | ||
479 | bm->cur.block = bb; | ||
471 | pfn -= bb->start_pfn; | 480 | pfn -= bb->start_pfn; |
481 | bm->cur.bit = pfn + 1; | ||
472 | *bit_nr = pfn; | 482 | *bit_nr = pfn; |
473 | *addr = bb->data; | 483 | *addr = bb->data; |
474 | return 0; | 484 | return 0; |
@@ -519,6 +529,14 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |||
519 | return test_bit(bit, addr); | 529 | return test_bit(bit, addr); |
520 | } | 530 | } |
521 | 531 | ||
532 | static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) | ||
533 | { | ||
534 | void *addr; | ||
535 | unsigned int bit; | ||
536 | |||
537 | return !memory_bm_find_bit(bm, pfn, &addr, &bit); | ||
538 | } | ||
539 | |||
522 | /** | 540 | /** |
523 | * memory_bm_next_pfn - find the pfn that corresponds to the next set bit | 541 | * memory_bm_next_pfn - find the pfn that corresponds to the next set bit |
524 | * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is | 542 | * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is |
@@ -530,29 +548,21 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) | |||
530 | 548 | ||
531 | static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) | 549 | static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) |
532 | { | 550 | { |
533 | struct zone_bitmap *zone_bm; | ||
534 | struct bm_block *bb; | 551 | struct bm_block *bb; |
535 | int bit; | 552 | int bit; |
536 | 553 | ||
554 | bb = bm->cur.block; | ||
537 | do { | 555 | do { |
538 | bb = bm->cur.block; | 556 | bit = bm->cur.bit; |
539 | do { | 557 | bit = find_next_bit(bb->data, bm_block_bits(bb), bit); |
540 | bit = bm->cur.bit; | 558 | if (bit < bm_block_bits(bb)) |
541 | bit = find_next_bit(bb->data, bm_block_bits(bb), bit); | 559 | goto Return_pfn; |
542 | if (bit < bm_block_bits(bb)) | 560 | |
543 | goto Return_pfn; | 561 | bb = list_entry(bb->hook.next, struct bm_block, hook); |
544 | 562 | bm->cur.block = bb; | |
545 | bb = bb->next; | 563 | bm->cur.bit = 0; |
546 | bm->cur.block = bb; | 564 | } while (&bb->hook != &bm->blocks); |
547 | bm->cur.bit = 0; | 565 | |
548 | } while (bb); | ||
549 | zone_bm = bm->cur.zone_bm->next; | ||
550 | if (zone_bm) { | ||
551 | bm->cur.zone_bm = zone_bm; | ||
552 | bm->cur.block = zone_bm->bm_blocks; | ||
553 | bm->cur.bit = 0; | ||
554 | } | ||
555 | } while (zone_bm); | ||
556 | memory_bm_position_reset(bm); | 566 | memory_bm_position_reset(bm); |
557 | return BM_END_OF_MAP; | 567 | return BM_END_OF_MAP; |
558 | 568 | ||
@@ -808,8 +818,7 @@ static unsigned int count_free_highmem_pages(void) | |||
808 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, | 818 | * We should save the page if it isn't Nosave or NosaveFree, or Reserved, |
809 | * and it isn't a part of a free chunk of pages. | 819 | * and it isn't a part of a free chunk of pages. |
810 | */ | 820 | */ |
811 | 821 | static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) | |
812 | static struct page *saveable_highmem_page(unsigned long pfn) | ||
813 | { | 822 | { |
814 | struct page *page; | 823 | struct page *page; |
815 | 824 | ||
@@ -817,6 +826,8 @@ static struct page *saveable_highmem_page(unsigned long pfn) | |||
817 | return NULL; | 826 | return NULL; |
818 | 827 | ||
819 | page = pfn_to_page(pfn); | 828 | page = pfn_to_page(pfn); |
829 | if (page_zone(page) != zone) | ||
830 | return NULL; | ||
820 | 831 | ||
821 | BUG_ON(!PageHighMem(page)); | 832 | BUG_ON(!PageHighMem(page)); |
822 | 833 | ||
@@ -846,13 +857,16 @@ unsigned int count_highmem_pages(void) | |||
846 | mark_free_pages(zone); | 857 | mark_free_pages(zone); |
847 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 858 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
848 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 859 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
849 | if (saveable_highmem_page(pfn)) | 860 | if (saveable_highmem_page(zone, pfn)) |
850 | n++; | 861 | n++; |
851 | } | 862 | } |
852 | return n; | 863 | return n; |
853 | } | 864 | } |
854 | #else | 865 | #else |
855 | static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } | 866 | static inline void *saveable_highmem_page(struct zone *z, unsigned long p) |
867 | { | ||
868 | return NULL; | ||
869 | } | ||
856 | #endif /* CONFIG_HIGHMEM */ | 870 | #endif /* CONFIG_HIGHMEM */ |
857 | 871 | ||
858 | /** | 872 | /** |
@@ -863,8 +877,7 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } | |||
863 | * of pages statically defined as 'unsaveable', and it isn't a part of | 877 | * of pages statically defined as 'unsaveable', and it isn't a part of |
864 | * a free chunk of pages. | 878 | * a free chunk of pages. |
865 | */ | 879 | */ |
866 | 880 | static struct page *saveable_page(struct zone *zone, unsigned long pfn) | |
867 | static struct page *saveable_page(unsigned long pfn) | ||
868 | { | 881 | { |
869 | struct page *page; | 882 | struct page *page; |
870 | 883 | ||
@@ -872,6 +885,8 @@ static struct page *saveable_page(unsigned long pfn) | |||
872 | return NULL; | 885 | return NULL; |
873 | 886 | ||
874 | page = pfn_to_page(pfn); | 887 | page = pfn_to_page(pfn); |
888 | if (page_zone(page) != zone) | ||
889 | return NULL; | ||
875 | 890 | ||
876 | BUG_ON(PageHighMem(page)); | 891 | BUG_ON(PageHighMem(page)); |
877 | 892 | ||
@@ -903,7 +918,7 @@ unsigned int count_data_pages(void) | |||
903 | mark_free_pages(zone); | 918 | mark_free_pages(zone); |
904 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; | 919 | max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; |
905 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) | 920 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) |
906 | if(saveable_page(pfn)) | 921 | if (saveable_page(zone, pfn)) |
907 | n++; | 922 | n++; |
908 | } | 923 | } |
909 | return n; | 924 | return n; |
@@ -944,7 +959,7 @@ static inline struct page * | |||
944 | page_is_saveable(struct zone *zone, unsigned long pfn) | 959 | page_is_saveable(struct zone *zone, unsigned long pfn) |
945 | { | 960 | { |
946 | return is_highmem(zone) ? | 961 | return is_highmem(zone) ? |
947 | saveable_highmem_page(pfn) : saveable_page(pfn); | 962 | saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); |
948 | } | 963 | } |
949 | 964 | ||
950 | static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | 965 | static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
@@ -966,7 +981,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
966 | * data modified by kmap_atomic() | 981 | * data modified by kmap_atomic() |
967 | */ | 982 | */ |
968 | safe_copy_page(buffer, s_page); | 983 | safe_copy_page(buffer, s_page); |
969 | dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); | 984 | dst = kmap_atomic(d_page, KM_USER0); |
970 | memcpy(dst, buffer, PAGE_SIZE); | 985 | memcpy(dst, buffer, PAGE_SIZE); |
971 | kunmap_atomic(dst, KM_USER0); | 986 | kunmap_atomic(dst, KM_USER0); |
972 | } else { | 987 | } else { |
@@ -975,7 +990,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
975 | } | 990 | } |
976 | } | 991 | } |
977 | #else | 992 | #else |
978 | #define page_is_saveable(zone, pfn) saveable_page(pfn) | 993 | #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) |
979 | 994 | ||
980 | static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | 995 | static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) |
981 | { | 996 | { |
@@ -1459,9 +1474,7 @@ load_header(struct swsusp_info *info) | |||
1459 | * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set | 1474 | * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set |
1460 | * the corresponding bit in the memory bitmap @bm | 1475 | * the corresponding bit in the memory bitmap @bm |
1461 | */ | 1476 | */ |
1462 | 1477 | static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | |
1463 | static inline void | ||
1464 | unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | ||
1465 | { | 1478 | { |
1466 | int j; | 1479 | int j; |
1467 | 1480 | ||
@@ -1469,8 +1482,13 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) | |||
1469 | if (unlikely(buf[j] == BM_END_OF_MAP)) | 1482 | if (unlikely(buf[j] == BM_END_OF_MAP)) |
1470 | break; | 1483 | break; |
1471 | 1484 | ||
1472 | memory_bm_set_bit(bm, buf[j]); | 1485 | if (memory_bm_pfn_present(bm, buf[j])) |
1486 | memory_bm_set_bit(bm, buf[j]); | ||
1487 | else | ||
1488 | return -EFAULT; | ||
1473 | } | 1489 | } |
1490 | |||
1491 | return 0; | ||
1474 | } | 1492 | } |
1475 | 1493 | ||
1476 | /* List of "safe" pages that may be used to store data loaded from the suspend | 1494 | /* List of "safe" pages that may be used to store data loaded from the suspend |
@@ -1608,7 +1626,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) | |||
1608 | pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); | 1626 | pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); |
1609 | if (!pbe) { | 1627 | if (!pbe) { |
1610 | swsusp_free(); | 1628 | swsusp_free(); |
1611 | return NULL; | 1629 | return ERR_PTR(-ENOMEM); |
1612 | } | 1630 | } |
1613 | pbe->orig_page = page; | 1631 | pbe->orig_page = page; |
1614 | if (safe_highmem_pages > 0) { | 1632 | if (safe_highmem_pages > 0) { |
@@ -1677,7 +1695,7 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) | |||
1677 | static inline void * | 1695 | static inline void * |
1678 | get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) | 1696 | get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) |
1679 | { | 1697 | { |
1680 | return NULL; | 1698 | return ERR_PTR(-EINVAL); |
1681 | } | 1699 | } |
1682 | 1700 | ||
1683 | static inline void copy_last_highmem_page(void) {} | 1701 | static inline void copy_last_highmem_page(void) {} |
@@ -1788,8 +1806,13 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) | |||
1788 | static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | 1806 | static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) |
1789 | { | 1807 | { |
1790 | struct pbe *pbe; | 1808 | struct pbe *pbe; |
1791 | struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); | 1809 | struct page *page; |
1810 | unsigned long pfn = memory_bm_next_pfn(bm); | ||
1792 | 1811 | ||
1812 | if (pfn == BM_END_OF_MAP) | ||
1813 | return ERR_PTR(-EFAULT); | ||
1814 | |||
1815 | page = pfn_to_page(pfn); | ||
1793 | if (PageHighMem(page)) | 1816 | if (PageHighMem(page)) |
1794 | return get_highmem_page_buffer(page, ca); | 1817 | return get_highmem_page_buffer(page, ca); |
1795 | 1818 | ||
@@ -1805,7 +1828,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) | |||
1805 | pbe = chain_alloc(ca, sizeof(struct pbe)); | 1828 | pbe = chain_alloc(ca, sizeof(struct pbe)); |
1806 | if (!pbe) { | 1829 | if (!pbe) { |
1807 | swsusp_free(); | 1830 | swsusp_free(); |
1808 | return NULL; | 1831 | return ERR_PTR(-ENOMEM); |
1809 | } | 1832 | } |
1810 | pbe->orig_address = page_address(page); | 1833 | pbe->orig_address = page_address(page); |
1811 | pbe->address = safe_pages_list; | 1834 | pbe->address = safe_pages_list; |
@@ -1868,7 +1891,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
1868 | return error; | 1891 | return error; |
1869 | 1892 | ||
1870 | } else if (handle->prev <= nr_meta_pages) { | 1893 | } else if (handle->prev <= nr_meta_pages) { |
1871 | unpack_orig_pfns(buffer, ©_bm); | 1894 | error = unpack_orig_pfns(buffer, ©_bm); |
1895 | if (error) | ||
1896 | return error; | ||
1897 | |||
1872 | if (handle->prev == nr_meta_pages) { | 1898 | if (handle->prev == nr_meta_pages) { |
1873 | error = prepare_image(&orig_bm, ©_bm); | 1899 | error = prepare_image(&orig_bm, ©_bm); |
1874 | if (error) | 1900 | if (error) |
@@ -1879,12 +1905,14 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) | |||
1879 | restore_pblist = NULL; | 1905 | restore_pblist = NULL; |
1880 | handle->buffer = get_buffer(&orig_bm, &ca); | 1906 | handle->buffer = get_buffer(&orig_bm, &ca); |
1881 | handle->sync_read = 0; | 1907 | handle->sync_read = 0; |
1882 | if (!handle->buffer) | 1908 | if (IS_ERR(handle->buffer)) |
1883 | return -ENOMEM; | 1909 | return PTR_ERR(handle->buffer); |
1884 | } | 1910 | } |
1885 | } else { | 1911 | } else { |
1886 | copy_last_highmem_page(); | 1912 | copy_last_highmem_page(); |
1887 | handle->buffer = get_buffer(&orig_bm, &ca); | 1913 | handle->buffer = get_buffer(&orig_bm, &ca); |
1914 | if (IS_ERR(handle->buffer)) | ||
1915 | return PTR_ERR(handle->buffer); | ||
1888 | if (handle->buffer != buffer) | 1916 | if (handle->buffer != buffer) |
1889 | handle->sync_read = 0; | 1917 | handle->sync_read = 0; |
1890 | } | 1918 | } |
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 023ff2a31d89..a92c91451559 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c | |||
@@ -262,3 +262,125 @@ int swsusp_shrink_memory(void) | |||
262 | 262 | ||
263 | return 0; | 263 | return 0; |
264 | } | 264 | } |
265 | |||
266 | /* | ||
267 | * Platforms, like ACPI, may want us to save some memory used by them during | ||
268 | * hibernation and to restore the contents of this memory during the subsequent | ||
269 | * resume. The code below implements a mechanism allowing us to do that. | ||
270 | */ | ||
271 | |||
272 | struct nvs_page { | ||
273 | unsigned long phys_start; | ||
274 | unsigned int size; | ||
275 | void *kaddr; | ||
276 | void *data; | ||
277 | struct list_head node; | ||
278 | }; | ||
279 | |||
280 | static LIST_HEAD(nvs_list); | ||
281 | |||
282 | /** | ||
283 | * hibernate_nvs_register - register platform NVS memory region to save | ||
284 | * @start - physical address of the region | ||
285 | * @size - size of the region | ||
286 | * | ||
287 | * The NVS region need not be page-aligned (both ends) and we arrange | ||
288 | * things so that the data from page-aligned addresses in this region will | ||
289 | * be copied into separate RAM pages. | ||
290 | */ | ||
291 | int hibernate_nvs_register(unsigned long start, unsigned long size) | ||
292 | { | ||
293 | struct nvs_page *entry, *next; | ||
294 | |||
295 | while (size > 0) { | ||
296 | unsigned int nr_bytes; | ||
297 | |||
298 | entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); | ||
299 | if (!entry) | ||
300 | goto Error; | ||
301 | |||
302 | list_add_tail(&entry->node, &nvs_list); | ||
303 | entry->phys_start = start; | ||
304 | nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); | ||
305 | entry->size = (size < nr_bytes) ? size : nr_bytes; | ||
306 | |||
307 | start += entry->size; | ||
308 | size -= entry->size; | ||
309 | } | ||
310 | return 0; | ||
311 | |||
312 | Error: | ||
313 | list_for_each_entry_safe(entry, next, &nvs_list, node) { | ||
314 | list_del(&entry->node); | ||
315 | kfree(entry); | ||
316 | } | ||
317 | return -ENOMEM; | ||
318 | } | ||
319 | |||
320 | /** | ||
321 | * hibernate_nvs_free - free data pages allocated for saving NVS regions | ||
322 | */ | ||
323 | void hibernate_nvs_free(void) | ||
324 | { | ||
325 | struct nvs_page *entry; | ||
326 | |||
327 | list_for_each_entry(entry, &nvs_list, node) | ||
328 | if (entry->data) { | ||
329 | free_page((unsigned long)entry->data); | ||
330 | entry->data = NULL; | ||
331 | if (entry->kaddr) { | ||
332 | iounmap(entry->kaddr); | ||
333 | entry->kaddr = NULL; | ||
334 | } | ||
335 | } | ||
336 | } | ||
337 | |||
338 | /** | ||
339 | * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions | ||
340 | */ | ||
341 | int hibernate_nvs_alloc(void) | ||
342 | { | ||
343 | struct nvs_page *entry; | ||
344 | |||
345 | list_for_each_entry(entry, &nvs_list, node) { | ||
346 | entry->data = (void *)__get_free_page(GFP_KERNEL); | ||
347 | if (!entry->data) { | ||
348 | hibernate_nvs_free(); | ||
349 | return -ENOMEM; | ||
350 | } | ||
351 | } | ||
352 | return 0; | ||
353 | } | ||
354 | |||
355 | /** | ||
356 | * hibernate_nvs_save - save NVS memory regions | ||
357 | */ | ||
358 | void hibernate_nvs_save(void) | ||
359 | { | ||
360 | struct nvs_page *entry; | ||
361 | |||
362 | printk(KERN_INFO "PM: Saving platform NVS memory\n"); | ||
363 | |||
364 | list_for_each_entry(entry, &nvs_list, node) | ||
365 | if (entry->data) { | ||
366 | entry->kaddr = ioremap(entry->phys_start, entry->size); | ||
367 | memcpy(entry->data, entry->kaddr, entry->size); | ||
368 | } | ||
369 | } | ||
370 | |||
371 | /** | ||
372 | * hibernate_nvs_restore - restore NVS memory regions | ||
373 | * | ||
374 | * This function is going to be called with interrupts disabled, so it | ||
375 | * cannot iounmap the virtual addresses used to access the NVS region. | ||
376 | */ | ||
377 | void hibernate_nvs_restore(void) | ||
378 | { | ||
379 | struct nvs_page *entry; | ||
380 | |||
381 | printk(KERN_INFO "PM: Restoring platform NVS memory\n"); | ||
382 | |||
383 | list_for_each_entry(entry, &nvs_list, node) | ||
384 | if (entry->data) | ||
385 | memcpy(entry->kaddr, entry->data, entry->size); | ||
386 | } | ||
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index f275c8eca772..bf8e7534c803 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -15,10 +15,11 @@ | |||
15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | 17 | ||
18 | void res_counter_init(struct res_counter *counter) | 18 | void res_counter_init(struct res_counter *counter, struct res_counter *parent) |
19 | { | 19 | { |
20 | spin_lock_init(&counter->lock); | 20 | spin_lock_init(&counter->lock); |
21 | counter->limit = (unsigned long long)LLONG_MAX; | 21 | counter->limit = (unsigned long long)LLONG_MAX; |
22 | counter->parent = parent; | ||
22 | } | 23 | } |
23 | 24 | ||
24 | int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | 25 | int res_counter_charge_locked(struct res_counter *counter, unsigned long val) |
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | |||
34 | return 0; | 35 | return 0; |
35 | } | 36 | } |
36 | 37 | ||
37 | int res_counter_charge(struct res_counter *counter, unsigned long val) | 38 | int res_counter_charge(struct res_counter *counter, unsigned long val, |
39 | struct res_counter **limit_fail_at) | ||
38 | { | 40 | { |
39 | int ret; | 41 | int ret; |
40 | unsigned long flags; | 42 | unsigned long flags; |
41 | 43 | struct res_counter *c, *u; | |
42 | spin_lock_irqsave(&counter->lock, flags); | 44 | |
43 | ret = res_counter_charge_locked(counter, val); | 45 | *limit_fail_at = NULL; |
44 | spin_unlock_irqrestore(&counter->lock, flags); | 46 | local_irq_save(flags); |
47 | for (c = counter; c != NULL; c = c->parent) { | ||
48 | spin_lock(&c->lock); | ||
49 | ret = res_counter_charge_locked(c, val); | ||
50 | spin_unlock(&c->lock); | ||
51 | if (ret < 0) { | ||
52 | *limit_fail_at = c; | ||
53 | goto undo; | ||
54 | } | ||
55 | } | ||
56 | ret = 0; | ||
57 | goto done; | ||
58 | undo: | ||
59 | for (u = counter; u != c; u = u->parent) { | ||
60 | spin_lock(&u->lock); | ||
61 | res_counter_uncharge_locked(u, val); | ||
62 | spin_unlock(&u->lock); | ||
63 | } | ||
64 | done: | ||
65 | local_irq_restore(flags); | ||
45 | return ret; | 66 | return ret; |
46 | } | 67 | } |
47 | 68 | ||
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | |||
56 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | 77 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) |
57 | { | 78 | { |
58 | unsigned long flags; | 79 | unsigned long flags; |
80 | struct res_counter *c; | ||
59 | 81 | ||
60 | spin_lock_irqsave(&counter->lock, flags); | 82 | local_irq_save(flags); |
61 | res_counter_uncharge_locked(counter, val); | 83 | for (c = counter; c != NULL; c = c->parent) { |
62 | spin_unlock_irqrestore(&counter->lock, flags); | 84 | spin_lock(&c->lock); |
85 | res_counter_uncharge_locked(c, val); | ||
86 | spin_unlock(&c->lock); | ||
87 | } | ||
88 | local_irq_restore(flags); | ||
63 | } | 89 | } |
64 | 90 | ||
65 | 91 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index e633106b12f6..ca6a1536b205 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -623,7 +623,7 @@ resource_size_t resource_alignment(struct resource *res) | |||
623 | */ | 623 | */ |
624 | struct resource * __request_region(struct resource *parent, | 624 | struct resource * __request_region(struct resource *parent, |
625 | resource_size_t start, resource_size_t n, | 625 | resource_size_t start, resource_size_t n, |
626 | const char *name) | 626 | const char *name, int flags) |
627 | { | 627 | { |
628 | struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); | 628 | struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); |
629 | 629 | ||
@@ -634,6 +634,7 @@ struct resource * __request_region(struct resource *parent, | |||
634 | res->start = start; | 634 | res->start = start; |
635 | res->end = start + n - 1; | 635 | res->end = start + n - 1; |
636 | res->flags = IORESOURCE_BUSY; | 636 | res->flags = IORESOURCE_BUSY; |
637 | res->flags |= flags; | ||
637 | 638 | ||
638 | write_lock(&resource_lock); | 639 | write_lock(&resource_lock); |
639 | 640 | ||
@@ -679,7 +680,7 @@ int __check_region(struct resource *parent, resource_size_t start, | |||
679 | { | 680 | { |
680 | struct resource * res; | 681 | struct resource * res; |
681 | 682 | ||
682 | res = __request_region(parent, start, n, "check-region"); | 683 | res = __request_region(parent, start, n, "check-region", 0); |
683 | if (!res) | 684 | if (!res) |
684 | return -EBUSY; | 685 | return -EBUSY; |
685 | 686 | ||
@@ -776,7 +777,7 @@ struct resource * __devm_request_region(struct device *dev, | |||
776 | dr->start = start; | 777 | dr->start = start; |
777 | dr->n = n; | 778 | dr->n = n; |
778 | 779 | ||
779 | res = __request_region(parent, start, n, name); | 780 | res = __request_region(parent, start, n, name, 0); |
780 | if (res) | 781 | if (res) |
781 | devres_add(dev, dr); | 782 | devres_add(dev, dr); |
782 | else | 783 | else |
@@ -876,3 +877,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) | |||
876 | 877 | ||
877 | return err; | 878 | return err; |
878 | } | 879 | } |
880 | |||
881 | #ifdef CONFIG_STRICT_DEVMEM | ||
882 | static int strict_iomem_checks = 1; | ||
883 | #else | ||
884 | static int strict_iomem_checks; | ||
885 | #endif | ||
886 | |||
887 | /* | ||
888 | * check if an address is reserved in the iomem resource tree | ||
889 | * returns 1 if reserved, 0 if not reserved. | ||
890 | */ | ||
891 | int iomem_is_exclusive(u64 addr) | ||
892 | { | ||
893 | struct resource *p = &iomem_resource; | ||
894 | int err = 0; | ||
895 | loff_t l; | ||
896 | int size = PAGE_SIZE; | ||
897 | |||
898 | if (!strict_iomem_checks) | ||
899 | return 0; | ||
900 | |||
901 | addr = addr & PAGE_MASK; | ||
902 | |||
903 | read_lock(&resource_lock); | ||
904 | for (p = p->child; p ; p = r_next(NULL, p, &l)) { | ||
905 | /* | ||
906 | * We can probably skip the resources without | ||
907 | * IORESOURCE_IO attribute? | ||
908 | */ | ||
909 | if (p->start >= addr + size) | ||
910 | break; | ||
911 | if (p->end < addr) | ||
912 | continue; | ||
913 | if (p->flags & IORESOURCE_BUSY && | ||
914 | p->flags & IORESOURCE_EXCLUSIVE) { | ||
915 | err = 1; | ||
916 | break; | ||
917 | } | ||
918 | } | ||
919 | read_unlock(&resource_lock); | ||
920 | |||
921 | return err; | ||
922 | } | ||
923 | |||
924 | static int __init strict_iomem(char *str) | ||
925 | { | ||
926 | if (strstr(str, "relaxed")) | ||
927 | strict_iomem_checks = 0; | ||
928 | if (strstr(str, "strict")) | ||
929 | strict_iomem_checks = 1; | ||
930 | return 1; | ||
931 | } | ||
932 | |||
933 | __setup("iomem=", strict_iomem); | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 2e3545f57e77..deb5ac8c12f3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3728,8 +3728,13 @@ redo: | |||
3728 | } | 3728 | } |
3729 | 3729 | ||
3730 | double_unlock_balance(this_rq, busiest); | 3730 | double_unlock_balance(this_rq, busiest); |
3731 | /* | ||
3732 | * Should not call ttwu while holding a rq->lock | ||
3733 | */ | ||
3734 | spin_unlock(&this_rq->lock); | ||
3731 | if (active_balance) | 3735 | if (active_balance) |
3732 | wake_up_process(busiest->migration_thread); | 3736 | wake_up_process(busiest->migration_thread); |
3737 | spin_lock(&this_rq->lock); | ||
3733 | 3738 | ||
3734 | } else | 3739 | } else |
3735 | sd->nr_balance_failed = 0; | 3740 | sd->nr_balance_failed = 0; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e0c0b4bc3f08..8e1352c75557 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1617,8 +1617,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) | |||
1617 | } | 1617 | } |
1618 | } | 1618 | } |
1619 | 1619 | ||
1620 | #define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0) | ||
1621 | |||
1622 | /* | 1620 | /* |
1623 | * Share the fairness runtime between parent and child, thus the | 1621 | * Share the fairness runtime between parent and child, thus the |
1624 | * total amount of pressure for CPU stays equal - new tasks | 1622 | * total amount of pressure for CPU stays equal - new tasks |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 92f6e5bc3c24..89d74436318c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -82,6 +82,9 @@ extern int percpu_pagelist_fraction; | |||
82 | extern int compat_log; | 82 | extern int compat_log; |
83 | extern int latencytop_enabled; | 83 | extern int latencytop_enabled; |
84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; | 84 | extern int sysctl_nr_open_min, sysctl_nr_open_max; |
85 | #ifndef CONFIG_MMU | ||
86 | extern int sysctl_nr_trim_pages; | ||
87 | #endif | ||
85 | #ifdef CONFIG_RCU_TORTURE_TEST | 88 | #ifdef CONFIG_RCU_TORTURE_TEST |
86 | extern int rcutorture_runnable; | 89 | extern int rcutorture_runnable; |
87 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ | 90 | #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ |
@@ -1102,6 +1105,17 @@ static struct ctl_table vm_table[] = { | |||
1102 | .mode = 0644, | 1105 | .mode = 0644, |
1103 | .proc_handler = &proc_dointvec | 1106 | .proc_handler = &proc_dointvec |
1104 | }, | 1107 | }, |
1108 | #else | ||
1109 | { | ||
1110 | .ctl_name = CTL_UNNUMBERED, | ||
1111 | .procname = "nr_trim_pages", | ||
1112 | .data = &sysctl_nr_trim_pages, | ||
1113 | .maxlen = sizeof(sysctl_nr_trim_pages), | ||
1114 | .mode = 0644, | ||
1115 | .proc_handler = &proc_dointvec_minmax, | ||
1116 | .strategy = &sysctl_intvec, | ||
1117 | .extra1 = &zero, | ||
1118 | }, | ||
1105 | #endif | 1119 | #endif |
1106 | { | 1120 | { |
1107 | .ctl_name = VM_LAPTOP_MODE, | 1121 | .ctl_name = VM_LAPTOP_MODE, |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a9d9760dc7b6..8b0daf0662ef 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -168,7 +168,13 @@ rb_event_length(struct ring_buffer_event *event) | |||
168 | */ | 168 | */ |
169 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 169 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
170 | { | 170 | { |
171 | return rb_event_length(event); | 171 | unsigned length = rb_event_length(event); |
172 | if (event->type != RINGBUF_TYPE_DATA) | ||
173 | return length; | ||
174 | length -= RB_EVNT_HDR_SIZE; | ||
175 | if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) | ||
176 | length -= sizeof(event->array[0]); | ||
177 | return length; | ||
172 | } | 178 | } |
173 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); | 179 | EXPORT_SYMBOL_GPL(ring_buffer_event_length); |
174 | 180 | ||