aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-25 07:08:16 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-25 07:08:16 -0400
commit10a010f6953b5a14ba2f0be40a4fce1bea220875 (patch)
tree19aadf718c796bc7fae0a1a1c970d84d67c541d4 /kernel
parent510b37258dfd61693ca6c039865c78bd996e3718 (diff)
parentfb2e405fc1fc8b20d9c78eaa1c7fd5a297efde43 (diff)
Merge branch 'linus' into x86/x2apic
Conflicts: drivers/pci/dmar.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/capability.c338
-rw-r--r--kernel/cpu.c42
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/exec_domain.c2
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/irq/manage.c103
-rw-r--r--kernel/kmod.c6
-rw-r--r--kernel/kthread.c4
-rw-r--r--kernel/module.c336
-rw-r--r--kernel/power/Kconfig11
-rw-r--r--kernel/power/main.c194
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/power/snapshot.c88
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/rcuclassic.c2
-rw-r--r--kernel/rcupreempt.c10
-rw-r--r--kernel/rtmutex-tester.c7
-rw-r--r--kernel/sched.c357
-rw-r--r--kernel/sched_fair.c10
-rw-r--r--kernel/sched_rt.c83
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/softlockup.c45
-rw-r--r--kernel/stop_machine.c3
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c34
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time/clocksource.c12
-rw-r--r--kernel/time/tick-broadcast.c3
-rw-r--r--kernel/time/tick-common.c14
-rw-r--r--kernel/time/tick-sched.c16
-rw-r--r--kernel/trace/trace_sysprof.c4
-rw-r--r--kernel/workqueue.c45
36 files changed, 1125 insertions, 685 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 526128a2e622..382dd5a8b2d7 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
55 default 1000 if HZ_1000 55 default 1000 if HZ_1000
56 56
57config SCHED_HRTICK 57config SCHED_HRTICK
58 def_bool HIGH_RES_TIMERS && X86 58 def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS
diff --git a/kernel/Makefile b/kernel/Makefile
index 985ddb7da4d0..15ab63ffe64d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,6 +11,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o 12 notifier.o ksysfs.o pm_qos_params.o sched_clock.o
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe
15
14ifdef CONFIG_FTRACE 16ifdef CONFIG_FTRACE
15# Do not trace debug files and internal ftrace files 17# Do not trace debug files and internal ftrace files
16CFLAGS_REMOVE_lockdep.o = -pg 18CFLAGS_REMOVE_lockdep.o = -pg
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c10e7aae04d7..4699950e65bd 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major,
1476 struct audit_context *context = tsk->audit_context; 1476 struct audit_context *context = tsk->audit_context;
1477 enum audit_state state; 1477 enum audit_state state;
1478 1478
1479 BUG_ON(!context); 1479 if (unlikely(!context))
1480 return;
1480 1481
1481 /* 1482 /*
1482 * This happens only on certain architectures that make system 1483 * This happens only on certain architectures that make system
diff --git a/kernel/capability.c b/kernel/capability.c
index 901e0fdc3fff..0101e847603e 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
115 return 0; 115 return 0;
116} 116}
117 117
118#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
119
120/*
121 * Without filesystem capability support, we nominally support one process
122 * setting the capabilities of another
123 */
124static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
125 kernel_cap_t *pIp, kernel_cap_t *pPp)
126{
127 struct task_struct *target;
128 int ret;
129
130 spin_lock(&task_capability_lock);
131 read_lock(&tasklist_lock);
132
133 if (pid && pid != task_pid_vnr(current)) {
134 target = find_task_by_vpid(pid);
135 if (!target) {
136 ret = -ESRCH;
137 goto out;
138 }
139 } else
140 target = current;
141
142 ret = security_capget(target, pEp, pIp, pPp);
143
144out:
145 read_unlock(&tasklist_lock);
146 spin_unlock(&task_capability_lock);
147
148 return ret;
149}
150
151/*
152 * cap_set_pg - set capabilities for all processes in a given process
153 * group. We call this holding task_capability_lock and tasklist_lock.
154 */
155static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
156 kernel_cap_t *inheritable,
157 kernel_cap_t *permitted)
158{
159 struct task_struct *g, *target;
160 int ret = -EPERM;
161 int found = 0;
162 struct pid *pgrp;
163
164 spin_lock(&task_capability_lock);
165 read_lock(&tasklist_lock);
166
167 pgrp = find_vpid(pgrp_nr);
168 do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
169 target = g;
170 while_each_thread(g, target) {
171 if (!security_capset_check(target, effective,
172 inheritable, permitted)) {
173 security_capset_set(target, effective,
174 inheritable, permitted);
175 ret = 0;
176 }
177 found = 1;
178 }
179 } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
180
181 read_unlock(&tasklist_lock);
182 spin_unlock(&task_capability_lock);
183
184 if (!found)
185 ret = 0;
186 return ret;
187}
188
118/* 189/*
119 * For sys_getproccap() and sys_setproccap(), any of the three 190 * cap_set_all - set capabilities for all processes other than init
120 * capability set pointers may be NULL -- indicating that that set is 191 * and self. We call this holding task_capability_lock and tasklist_lock.
121 * uninteresting and/or not to be changed.
122 */ 192 */
193static inline int cap_set_all(kernel_cap_t *effective,
194 kernel_cap_t *inheritable,
195 kernel_cap_t *permitted)
196{
197 struct task_struct *g, *target;
198 int ret = -EPERM;
199 int found = 0;
200
201 spin_lock(&task_capability_lock);
202 read_lock(&tasklist_lock);
203
204 do_each_thread(g, target) {
205 if (target == current
206 || is_container_init(target->group_leader))
207 continue;
208 found = 1;
209 if (security_capset_check(target, effective, inheritable,
210 permitted))
211 continue;
212 ret = 0;
213 security_capset_set(target, effective, inheritable, permitted);
214 } while_each_thread(g, target);
215
216 read_unlock(&tasklist_lock);
217 spin_unlock(&task_capability_lock);
218
219 if (!found)
220 ret = 0;
221
222 return ret;
223}
224
225/*
226 * Given the target pid does not refer to the current process we
227 * need more elaborate support... (This support is not present when
228 * filesystem capabilities are configured.)
229 */
230static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
231 kernel_cap_t *inheritable,
232 kernel_cap_t *permitted)
233{
234 struct task_struct *target;
235 int ret;
236
237 if (!capable(CAP_SETPCAP))
238 return -EPERM;
239
240 if (pid == -1) /* all procs other than current and init */
241 return cap_set_all(effective, inheritable, permitted);
242
243 else if (pid < 0) /* all procs in process group */
244 return cap_set_pg(-pid, effective, inheritable, permitted);
245
246 /* target != current */
247 spin_lock(&task_capability_lock);
248 read_lock(&tasklist_lock);
249
250 target = find_task_by_vpid(pid);
251 if (!target)
252 ret = -ESRCH;
253 else {
254 ret = security_capset_check(target, effective, inheritable,
255 permitted);
256
257 /* having verified that the proposed changes are legal,
258 we now put them into effect. */
259 if (!ret)
260 security_capset_set(target, effective, inheritable,
261 permitted);
262 }
263
264 read_unlock(&tasklist_lock);
265 spin_unlock(&task_capability_lock);
266
267 return ret;
268}
269
270#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
271
272/*
273 * If we have configured with filesystem capability support, then the
274 * only thing that can change the capabilities of the current process
275 * is the current process. As such, we can't be in this code at the
276 * same time as we are in the process of setting capabilities in this
277 * process. The net result is that we can limit our use of locks to
278 * when we are reading the caps of another process.
279 */
280static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
281 kernel_cap_t *pIp, kernel_cap_t *pPp)
282{
283 int ret;
284
285 if (pid && (pid != task_pid_vnr(current))) {
286 struct task_struct *target;
287
288 spin_lock(&task_capability_lock);
289 read_lock(&tasklist_lock);
290
291 target = find_task_by_vpid(pid);
292 if (!target)
293 ret = -ESRCH;
294 else
295 ret = security_capget(target, pEp, pIp, pPp);
296
297 read_unlock(&tasklist_lock);
298 spin_unlock(&task_capability_lock);
299 } else
300 ret = security_capget(current, pEp, pIp, pPp);
301
302 return ret;
303}
304
305/*
306 * With filesystem capability support configured, the kernel does not
307 * permit the changing of capabilities in one process by another
308 * process. (CAP_SETPCAP has much less broad semantics when configured
309 * this way.)
310 */
311static inline int do_sys_capset_other_tasks(pid_t pid,
312 kernel_cap_t *effective,
313 kernel_cap_t *inheritable,
314 kernel_cap_t *permitted)
315{
316 return -EPERM;
317}
318
319#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
123 320
124/* 321/*
125 * Atomically modify the effective capabilities returning the original 322 * Atomically modify the effective capabilities returning the original
@@ -155,7 +352,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
155{ 352{
156 int ret = 0; 353 int ret = 0;
157 pid_t pid; 354 pid_t pid;
158 struct task_struct *target;
159 unsigned tocopy; 355 unsigned tocopy;
160 kernel_cap_t pE, pI, pP; 356 kernel_cap_t pE, pI, pP;
161 357
@@ -169,23 +365,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
169 if (pid < 0) 365 if (pid < 0)
170 return -EINVAL; 366 return -EINVAL;
171 367
172 spin_lock(&task_capability_lock); 368 ret = cap_get_target_pid(pid, &pE, &pI, &pP);
173 read_lock(&tasklist_lock);
174
175 if (pid && pid != task_pid_vnr(current)) {
176 target = find_task_by_vpid(pid);
177 if (!target) {
178 ret = -ESRCH;
179 goto out;
180 }
181 } else
182 target = current;
183
184 ret = security_capget(target, &pE, &pI, &pP);
185
186out:
187 read_unlock(&tasklist_lock);
188 spin_unlock(&task_capability_lock);
189 369
190 if (!ret) { 370 if (!ret) {
191 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; 371 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
@@ -216,7 +396,6 @@ out:
216 * before modification is attempted and the application 396 * before modification is attempted and the application
217 * fails. 397 * fails.
218 */ 398 */
219
220 if (copy_to_user(dataptr, kdata, tocopy 399 if (copy_to_user(dataptr, kdata, tocopy
221 * sizeof(struct __user_cap_data_struct))) { 400 * sizeof(struct __user_cap_data_struct))) {
222 return -EFAULT; 401 return -EFAULT;
@@ -226,70 +405,8 @@ out:
226 return ret; 405 return ret;
227} 406}
228 407
229/*
230 * cap_set_pg - set capabilities for all processes in a given process
231 * group. We call this holding task_capability_lock and tasklist_lock.
232 */
233static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
234 kernel_cap_t *inheritable,
235 kernel_cap_t *permitted)
236{
237 struct task_struct *g, *target;
238 int ret = -EPERM;
239 int found = 0;
240 struct pid *pgrp;
241
242 pgrp = find_vpid(pgrp_nr);
243 do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
244 target = g;
245 while_each_thread(g, target) {
246 if (!security_capset_check(target, effective,
247 inheritable,
248 permitted)) {
249 security_capset_set(target, effective,
250 inheritable,
251 permitted);
252 ret = 0;
253 }
254 found = 1;
255 }
256 } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
257
258 if (!found)
259 ret = 0;
260 return ret;
261}
262
263/*
264 * cap_set_all - set capabilities for all processes other than init
265 * and self. We call this holding task_capability_lock and tasklist_lock.
266 */
267static inline int cap_set_all(kernel_cap_t *effective,
268 kernel_cap_t *inheritable,
269 kernel_cap_t *permitted)
270{
271 struct task_struct *g, *target;
272 int ret = -EPERM;
273 int found = 0;
274
275 do_each_thread(g, target) {
276 if (target == current || is_container_init(target->group_leader))
277 continue;
278 found = 1;
279 if (security_capset_check(target, effective, inheritable,
280 permitted))
281 continue;
282 ret = 0;
283 security_capset_set(target, effective, inheritable, permitted);
284 } while_each_thread(g, target);
285
286 if (!found)
287 ret = 0;
288 return ret;
289}
290
291/** 408/**
292 * sys_capset - set capabilities for a process or a group of processes 409 * sys_capset - set capabilities for a process or (*) a group of processes
293 * @header: pointer to struct that contains capability version and 410 * @header: pointer to struct that contains capability version and
294 * target pid data 411 * target pid data
295 * @data: pointer to struct that contains the effective, permitted, 412 * @data: pointer to struct that contains the effective, permitted,
@@ -313,7 +430,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
313 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; 430 struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
314 unsigned i, tocopy; 431 unsigned i, tocopy;
315 kernel_cap_t inheritable, permitted, effective; 432 kernel_cap_t inheritable, permitted, effective;
316 struct task_struct *target;
317 int ret; 433 int ret;
318 pid_t pid; 434 pid_t pid;
319 435
@@ -324,9 +440,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
324 if (get_user(pid, &header->pid)) 440 if (get_user(pid, &header->pid))
325 return -EFAULT; 441 return -EFAULT;
326 442
327 if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
328 return -EPERM;
329
330 if (copy_from_user(&kdata, data, tocopy 443 if (copy_from_user(&kdata, data, tocopy
331 * sizeof(struct __user_cap_data_struct))) { 444 * sizeof(struct __user_cap_data_struct))) {
332 return -EFAULT; 445 return -EFAULT;
@@ -344,40 +457,31 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
344 i++; 457 i++;
345 } 458 }
346 459
347 spin_lock(&task_capability_lock); 460 if (pid && (pid != task_pid_vnr(current)))
348 read_lock(&tasklist_lock); 461 ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
349 462 &permitted);
350 if (pid > 0 && pid != task_pid_vnr(current)) { 463 else {
351 target = find_task_by_vpid(pid); 464 /*
352 if (!target) { 465 * This lock is required even when filesystem
353 ret = -ESRCH; 466 * capability support is configured - it protects the
354 goto out; 467 * sys_capget() call from returning incorrect data in
355 } 468 * the case that the targeted process is not the
356 } else 469 * current one.
357 target = current; 470 */
358 471 spin_lock(&task_capability_lock);
359 ret = 0;
360
361 /* having verified that the proposed changes are legal,
362 we now put them into effect. */
363 if (pid < 0) {
364 if (pid == -1) /* all procs other than current and init */
365 ret = cap_set_all(&effective, &inheritable, &permitted);
366 472
367 else /* all procs in process group */ 473 ret = security_capset_check(current, &effective, &inheritable,
368 ret = cap_set_pg(-pid, &effective, &inheritable,
369 &permitted);
370 } else {
371 ret = security_capset_check(target, &effective, &inheritable,
372 &permitted); 474 &permitted);
475 /*
476 * Having verified that the proposed changes are
477 * legal, we now put them into effect.
478 */
373 if (!ret) 479 if (!ret)
374 security_capset_set(target, &effective, &inheritable, 480 security_capset_set(current, &effective, &inheritable,
375 &permitted); 481 &permitted);
482 spin_unlock(&task_capability_lock);
376 } 483 }
377 484
378out:
379 read_unlock(&tasklist_lock);
380 spin_unlock(&task_capability_lock);
381 485
382 return ret; 486 return ret;
383} 487}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cfb1d43ab801..2cc409ce0a8f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void)
64 cpu_hotplug.refcount = 0; 64 cpu_hotplug.refcount = 0;
65} 65}
66 66
67cpumask_t cpu_active_map;
68
67#ifdef CONFIG_HOTPLUG_CPU 69#ifdef CONFIG_HOTPLUG_CPU
68 70
69void get_online_cpus(void) 71void get_online_cpus(void)
@@ -291,11 +293,30 @@ int __ref cpu_down(unsigned int cpu)
291 int err = 0; 293 int err = 0;
292 294
293 cpu_maps_update_begin(); 295 cpu_maps_update_begin();
294 if (cpu_hotplug_disabled) 296
297 if (cpu_hotplug_disabled) {
295 err = -EBUSY; 298 err = -EBUSY;
296 else 299 goto out;
297 err = _cpu_down(cpu, 0); 300 }
301
302 cpu_clear(cpu, cpu_active_map);
303
304 /*
305 * Make sure the all cpus did the reschedule and are not
306 * using stale version of the cpu_active_map.
307 * This is not strictly necessary becuase stop_machine()
308 * that we run down the line already provides the required
309 * synchronization. But it's really a side effect and we do not
310 * want to depend on the innards of the stop_machine here.
311 */
312 synchronize_sched();
313
314 err = _cpu_down(cpu, 0);
298 315
316 if (cpu_online(cpu))
317 cpu_set(cpu, cpu_active_map);
318
319out:
299 cpu_maps_update_done(); 320 cpu_maps_update_done();
300 return err; 321 return err;
301} 322}
@@ -355,11 +376,18 @@ int __cpuinit cpu_up(unsigned int cpu)
355 } 376 }
356 377
357 cpu_maps_update_begin(); 378 cpu_maps_update_begin();
358 if (cpu_hotplug_disabled) 379
380 if (cpu_hotplug_disabled) {
359 err = -EBUSY; 381 err = -EBUSY;
360 else 382 goto out;
361 err = _cpu_up(cpu, 0); 383 }
384
385 err = _cpu_up(cpu, 0);
362 386
387 if (cpu_online(cpu))
388 cpu_set(cpu, cpu_active_map);
389
390out:
363 cpu_maps_update_done(); 391 cpu_maps_update_done();
364 return err; 392 return err;
365} 393}
@@ -413,7 +441,7 @@ void __ref enable_nonboot_cpus(void)
413 goto out; 441 goto out;
414 442
415 printk("Enabling non-boot CPUs ...\n"); 443 printk("Enabling non-boot CPUs ...\n");
416 for_each_cpu_mask(cpu, frozen_cpus) { 444 for_each_cpu_mask_nr(cpu, frozen_cpus) {
417 error = _cpu_up(cpu, 1); 445 error = _cpu_up(cpu, 1);
418 if (!error) { 446 if (!error) {
419 printk("CPU%d is up\n", cpu); 447 printk("CPU%d is up\n", cpu);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601947a8..d5738910c34c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
564 * partition_sched_domains(). 564 * partition_sched_domains().
565 */ 565 */
566 566
567static void rebuild_sched_domains(void) 567void rebuild_sched_domains(void)
568{ 568{
569 struct kfifo *q; /* queue of cpusets to be scanned */ 569 struct kfifo *q; /* queue of cpusets to be scanned */
570 struct cpuset *cp; /* scans q */ 570 struct cpuset *cp; /* scans q */
@@ -679,7 +679,9 @@ restart:
679 if (apn == b->pn) { 679 if (apn == b->pn) {
680 cpus_or(*dp, *dp, b->cpus_allowed); 680 cpus_or(*dp, *dp, b->cpus_allowed);
681 b->pn = -1; 681 b->pn = -1;
682 update_domain_attr(dattr, b); 682 if (dattr)
683 update_domain_attr(dattr
684 + nslot, b);
683 } 685 }
684 } 686 }
685 nslot++; 687 nslot++;
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index a9e6bad9f706..c1ef192aa655 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -65,7 +65,7 @@ lookup_exec_domain(u_long personality)
65 goto out; 65 goto out;
66 } 66 }
67 67
68#ifdef CONFIG_KMOD 68#ifdef CONFIG_MODULES
69 read_unlock(&exec_domains_lock); 69 read_unlock(&exec_domains_lock);
70 request_module("personality-%ld", pers); 70 request_module("personality-%ld", pers);
71 read_lock(&exec_domains_lock); 71 read_lock(&exec_domains_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f27..552c8d8e77ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,6 +33,7 @@
33#include <linux/cpu.h> 33#include <linux/cpu.h>
34#include <linux/cgroup.h> 34#include <linux/cgroup.h>
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/hugetlb.h>
36#include <linux/swap.h> 37#include <linux/swap.h>
37#include <linux/syscalls.h> 38#include <linux/syscalls.h>
38#include <linux/jiffies.h> 39#include <linux/jiffies.h>
@@ -307,6 +308,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
307 } 308 }
308 309
309 /* 310 /*
311 * Clear hugetlb-related page reserves for children. This only
312 * affects MAP_PRIVATE mappings. Faults generated by the child
313 * are not guaranteed to succeed, even if read-only
314 */
315 if (is_vm_hugetlb_page(tmp))
316 reset_vma_resv_huge_pages(tmp);
317
318 /*
310 * Link in the new vma and copy the page table entries. 319 * Link in the new vma and copy the page table entries.
311 */ 320 */
312 *pprev = tmp; 321 *pprev = tmp;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 909b2231fa93..63b93a935565 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -224,6 +224,17 @@ void enable_irq(unsigned int irq)
224} 224}
225EXPORT_SYMBOL(enable_irq); 225EXPORT_SYMBOL(enable_irq);
226 226
227int set_irq_wake_real(unsigned int irq, unsigned int on)
228{
229 struct irq_desc *desc = irq_desc + irq;
230 int ret = -ENXIO;
231
232 if (desc->chip->set_wake)
233 ret = desc->chip->set_wake(irq, on);
234
235 return ret;
236}
237
227/** 238/**
228 * set_irq_wake - control irq power management wakeup 239 * set_irq_wake - control irq power management wakeup
229 * @irq: interrupt to control 240 * @irq: interrupt to control
@@ -240,30 +251,34 @@ int set_irq_wake(unsigned int irq, unsigned int on)
240{ 251{
241 struct irq_desc *desc = irq_desc + irq; 252 struct irq_desc *desc = irq_desc + irq;
242 unsigned long flags; 253 unsigned long flags;
243 int ret = -ENXIO; 254 int ret = 0;
244 int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
245 255
246 /* wakeup-capable irqs can be shared between drivers that 256 /* wakeup-capable irqs can be shared between drivers that
247 * don't need to have the same sleep mode behaviors. 257 * don't need to have the same sleep mode behaviors.
248 */ 258 */
249 spin_lock_irqsave(&desc->lock, flags); 259 spin_lock_irqsave(&desc->lock, flags);
250 if (on) { 260 if (on) {
251 if (desc->wake_depth++ == 0) 261 if (desc->wake_depth++ == 0) {
252 desc->status |= IRQ_WAKEUP; 262 ret = set_irq_wake_real(irq, on);
253 else 263 if (ret)
254 set_wake = NULL; 264 desc->wake_depth = 0;
265 else
266 desc->status |= IRQ_WAKEUP;
267 }
255 } else { 268 } else {
256 if (desc->wake_depth == 0) { 269 if (desc->wake_depth == 0) {
257 printk(KERN_WARNING "Unbalanced IRQ %d " 270 printk(KERN_WARNING "Unbalanced IRQ %d "
258 "wake disable\n", irq); 271 "wake disable\n", irq);
259 WARN_ON(1); 272 WARN_ON(1);
260 } else if (--desc->wake_depth == 0) 273 } else if (--desc->wake_depth == 0) {
261 desc->status &= ~IRQ_WAKEUP; 274 ret = set_irq_wake_real(irq, on);
262 else 275 if (ret)
263 set_wake = NULL; 276 desc->wake_depth = 1;
277 else
278 desc->status &= ~IRQ_WAKEUP;
279 }
264 } 280 }
265 if (set_wake) 281
266 ret = desc->chip->set_wake(irq, on);
267 spin_unlock_irqrestore(&desc->lock, flags); 282 spin_unlock_irqrestore(&desc->lock, flags);
268 return ret; 283 return ret;
269} 284}
@@ -300,6 +315,30 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
300 desc->handle_irq = NULL; 315 desc->handle_irq = NULL;
301} 316}
302 317
318static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
319 unsigned long flags)
320{
321 int ret;
322
323 if (!chip || !chip->set_type) {
324 /*
325 * IRQF_TRIGGER_* but the PIC does not support multiple
326 * flow-types?
327 */
328 pr_warning("No set_type function for IRQ %d (%s)\n", irq,
329 chip ? (chip->name ? : "unknown") : "unknown");
330 return 0;
331 }
332
333 ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
334
335 if (ret)
336 pr_err("setting flow type for irq %u failed (%pF)\n",
337 irq, chip->set_type);
338
339 return ret;
340}
341
303/* 342/*
304 * Internal function to register an irqaction - typically used to 343 * Internal function to register an irqaction - typically used to
305 * allocate special interrupts that are part of the architecture. 344 * allocate special interrupts that are part of the architecture.
@@ -311,6 +350,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
311 const char *old_name = NULL; 350 const char *old_name = NULL;
312 unsigned long flags; 351 unsigned long flags;
313 int shared = 0; 352 int shared = 0;
353 int ret;
314 354
315 if (irq >= NR_IRQS) 355 if (irq >= NR_IRQS)
316 return -EINVAL; 356 return -EINVAL;
@@ -368,35 +408,23 @@ int setup_irq(unsigned int irq, struct irqaction *new)
368 shared = 1; 408 shared = 1;
369 } 409 }
370 410
371 *p = new;
372
373 /* Exclude IRQ from balancing */
374 if (new->flags & IRQF_NOBALANCING)
375 desc->status |= IRQ_NO_BALANCING;
376
377 if (!shared) { 411 if (!shared) {
378 irq_chip_set_defaults(desc->chip); 412 irq_chip_set_defaults(desc->chip);
379 413
380#if defined(CONFIG_IRQ_PER_CPU)
381 if (new->flags & IRQF_PERCPU)
382 desc->status |= IRQ_PER_CPU;
383#endif
384
385 /* Setup the type (level, edge polarity) if configured: */ 414 /* Setup the type (level, edge polarity) if configured: */
386 if (new->flags & IRQF_TRIGGER_MASK) { 415 if (new->flags & IRQF_TRIGGER_MASK) {
387 if (desc->chip->set_type) 416 ret = __irq_set_trigger(desc->chip, irq, new->flags);
388 desc->chip->set_type(irq, 417
389 new->flags & IRQF_TRIGGER_MASK); 418 if (ret) {
390 else 419 spin_unlock_irqrestore(&desc->lock, flags);
391 /* 420 return ret;
392 * IRQF_TRIGGER_* but the PIC does not support 421 }
393 * multiple flow-types?
394 */
395 printk(KERN_WARNING "No IRQF_TRIGGER set_type "
396 "function for IRQ %d (%s)\n", irq,
397 desc->chip->name);
398 } else 422 } else
399 compat_irq_chip_set_default_handler(desc); 423 compat_irq_chip_set_default_handler(desc);
424#if defined(CONFIG_IRQ_PER_CPU)
425 if (new->flags & IRQF_PERCPU)
426 desc->status |= IRQ_PER_CPU;
427#endif
400 428
401 desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | 429 desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
402 IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED); 430 IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
@@ -415,6 +443,13 @@ int setup_irq(unsigned int irq, struct irqaction *new)
415 /* Set default affinity mask once everything is setup */ 443 /* Set default affinity mask once everything is setup */
416 irq_select_affinity(irq); 444 irq_select_affinity(irq);
417 } 445 }
446
447 *p = new;
448
449 /* Exclude IRQ from balancing */
450 if (new->flags & IRQF_NOBALANCING)
451 desc->status |= IRQ_NO_BALANCING;
452
418 /* Reset broken irq detection when installing new handler */ 453 /* Reset broken irq detection when installing new handler */
419 desc->irq_count = 0; 454 desc->irq_count = 0;
420 desc->irqs_unhandled = 0; 455 desc->irqs_unhandled = 0;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 8df97d3dfda8..2989f67c4446 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -42,7 +42,7 @@ extern int max_threads;
42 42
43static struct workqueue_struct *khelper_wq; 43static struct workqueue_struct *khelper_wq;
44 44
45#ifdef CONFIG_KMOD 45#ifdef CONFIG_MODULES
46 46
47/* 47/*
48 modprobe_path is set via /proc/sys. 48 modprobe_path is set via /proc/sys.
@@ -417,12 +417,12 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
417{ 417{
418 struct file *f; 418 struct file *f;
419 419
420 f = create_write_pipe(); 420 f = create_write_pipe(0);
421 if (IS_ERR(f)) 421 if (IS_ERR(f))
422 return PTR_ERR(f); 422 return PTR_ERR(f);
423 *filp = f; 423 *filp = f;
424 424
425 f = create_read_pipe(f); 425 f = create_read_pipe(f, 0);
426 if (IS_ERR(f)) { 426 if (IS_ERR(f)) {
427 free_write_pipe(*filp); 427 free_write_pipe(*filp);
428 return PTR_ERR(f); 428 return PTR_ERR(f);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index ac3fb7326641..6111c27491b1 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -106,7 +106,7 @@ static void create_kthread(struct kthread_create_info *create)
106 */ 106 */
107 sched_setscheduler(create->result, SCHED_NORMAL, &param); 107 sched_setscheduler(create->result, SCHED_NORMAL, &param);
108 set_user_nice(create->result, KTHREAD_NICE_LEVEL); 108 set_user_nice(create->result, KTHREAD_NICE_LEVEL);
109 set_cpus_allowed(create->result, CPU_MASK_ALL); 109 set_cpus_allowed_ptr(create->result, CPU_MASK_ALL_PTR);
110 } 110 }
111 complete(&create->done); 111 complete(&create->done);
112} 112}
@@ -233,7 +233,7 @@ int kthreadd(void *unused)
233 set_task_comm(tsk, "kthreadd"); 233 set_task_comm(tsk, "kthreadd");
234 ignore_signals(tsk); 234 ignore_signals(tsk);
235 set_user_nice(tsk, KTHREAD_NICE_LEVEL); 235 set_user_nice(tsk, KTHREAD_NICE_LEVEL);
236 set_cpus_allowed(tsk, CPU_MASK_ALL); 236 set_cpus_allowed_ptr(tsk, CPU_MASK_ALL_PTR);
237 237
238 current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; 238 current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
239 239
diff --git a/kernel/module.c b/kernel/module.c
index 5f80478b746d..d8b5605132a0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -70,6 +70,9 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
70 70
71static BLOCKING_NOTIFIER_HEAD(module_notify_list); 71static BLOCKING_NOTIFIER_HEAD(module_notify_list);
72 72
73/* Bounds of module allocation, for speeding __module_text_address */
74static unsigned long module_addr_min = -1UL, module_addr_max = 0;
75
73int register_module_notifier(struct notifier_block * nb) 76int register_module_notifier(struct notifier_block * nb)
74{ 77{
75 return blocking_notifier_chain_register(&module_notify_list, nb); 78 return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -134,17 +137,19 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
134extern const struct kernel_symbol __stop___ksymtab_gpl[]; 137extern const struct kernel_symbol __stop___ksymtab_gpl[];
135extern const struct kernel_symbol __start___ksymtab_gpl_future[]; 138extern const struct kernel_symbol __start___ksymtab_gpl_future[];
136extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; 139extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
137extern const struct kernel_symbol __start___ksymtab_unused[];
138extern const struct kernel_symbol __stop___ksymtab_unused[];
139extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
140extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
141extern const struct kernel_symbol __start___ksymtab_gpl_future[]; 140extern const struct kernel_symbol __start___ksymtab_gpl_future[];
142extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; 141extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
143extern const unsigned long __start___kcrctab[]; 142extern const unsigned long __start___kcrctab[];
144extern const unsigned long __start___kcrctab_gpl[]; 143extern const unsigned long __start___kcrctab_gpl[];
145extern const unsigned long __start___kcrctab_gpl_future[]; 144extern const unsigned long __start___kcrctab_gpl_future[];
145#ifdef CONFIG_UNUSED_SYMBOLS
146extern const struct kernel_symbol __start___ksymtab_unused[];
147extern const struct kernel_symbol __stop___ksymtab_unused[];
148extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
149extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
146extern const unsigned long __start___kcrctab_unused[]; 150extern const unsigned long __start___kcrctab_unused[];
147extern const unsigned long __start___kcrctab_unused_gpl[]; 151extern const unsigned long __start___kcrctab_unused_gpl[];
152#endif
148 153
149#ifndef CONFIG_MODVERSIONS 154#ifndef CONFIG_MODVERSIONS
150#define symversion(base, idx) NULL 155#define symversion(base, idx) NULL
@@ -152,156 +157,186 @@ extern const unsigned long __start___kcrctab_unused_gpl[];
152#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL) 157#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
153#endif 158#endif
154 159
155/* lookup symbol in given range of kernel_symbols */
156static const struct kernel_symbol *lookup_symbol(const char *name,
157 const struct kernel_symbol *start,
158 const struct kernel_symbol *stop)
159{
160 const struct kernel_symbol *ks = start;
161 for (; ks < stop; ks++)
162 if (strcmp(ks->name, name) == 0)
163 return ks;
164 return NULL;
165}
166
167static bool always_ok(bool gplok, bool warn, const char *name)
168{
169 return true;
170}
171
172static bool printk_unused_warning(bool gplok, bool warn, const char *name)
173{
174 if (warn) {
175 printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
176 "however this module is using it.\n", name);
177 printk(KERN_WARNING
178 "This symbol will go away in the future.\n");
179 printk(KERN_WARNING
180 "Please evalute if this is the right api to use and if "
181 "it really is, submit a report the linux kernel "
182 "mailinglist together with submitting your code for "
183 "inclusion.\n");
184 }
185 return true;
186}
187
188static bool gpl_only_unused_warning(bool gplok, bool warn, const char *name)
189{
190 if (!gplok)
191 return false;
192 return printk_unused_warning(gplok, warn, name);
193}
194
195static bool gpl_only(bool gplok, bool warn, const char *name)
196{
197 return gplok;
198}
199
200static bool warn_if_not_gpl(bool gplok, bool warn, const char *name)
201{
202 if (!gplok && warn) {
203 printk(KERN_WARNING "Symbol %s is being used "
204 "by a non-GPL module, which will not "
205 "be allowed in the future\n", name);
206 printk(KERN_WARNING "Please see the file "
207 "Documentation/feature-removal-schedule.txt "
208 "in the kernel source tree for more details.\n");
209 }
210 return true;
211}
212
213struct symsearch { 160struct symsearch {
214 const struct kernel_symbol *start, *stop; 161 const struct kernel_symbol *start, *stop;
215 const unsigned long *crcs; 162 const unsigned long *crcs;
216 bool (*check)(bool gplok, bool warn, const char *name); 163 enum {
164 NOT_GPL_ONLY,
165 GPL_ONLY,
166 WILL_BE_GPL_ONLY,
167 } licence;
168 bool unused;
217}; 169};
218 170
219/* Look through this array of symbol tables for a symbol match which 171static bool each_symbol_in_section(const struct symsearch *arr,
220 * passes the check function. */ 172 unsigned int arrsize,
221static const struct kernel_symbol *search_symarrays(const struct symsearch *arr, 173 struct module *owner,
222 unsigned int num, 174 bool (*fn)(const struct symsearch *syms,
223 const char *name, 175 struct module *owner,
224 bool gplok, 176 unsigned int symnum, void *data),
225 bool warn, 177 void *data)
226 const unsigned long **crc)
227{ 178{
228 unsigned int i; 179 unsigned int i, j;
229 const struct kernel_symbol *ks;
230 180
231 for (i = 0; i < num; i++) { 181 for (j = 0; j < arrsize; j++) {
232 ks = lookup_symbol(name, arr[i].start, arr[i].stop); 182 for (i = 0; i < arr[j].stop - arr[j].start; i++)
233 if (!ks || !arr[i].check(gplok, warn, name)) 183 if (fn(&arr[j], owner, i, data))
234 continue; 184 return true;
235
236 if (crc)
237 *crc = symversion(arr[i].crcs, ks - arr[i].start);
238 return ks;
239 } 185 }
240 return NULL; 186
187 return false;
241} 188}
242 189
243/* Find a symbol, return value, (optional) crc and (optional) module 190/* Returns true as soon as fn returns true, otherwise false. */
244 * which owns it */ 191static bool each_symbol(bool (*fn)(const struct symsearch *arr,
245static unsigned long find_symbol(const char *name, 192 struct module *owner,
246 struct module **owner, 193 unsigned int symnum, void *data),
247 const unsigned long **crc, 194 void *data)
248 bool gplok,
249 bool warn)
250{ 195{
251 struct module *mod; 196 struct module *mod;
252 const struct kernel_symbol *ks;
253 const struct symsearch arr[] = { 197 const struct symsearch arr[] = {
254 { __start___ksymtab, __stop___ksymtab, __start___kcrctab, 198 { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
255 always_ok }, 199 NOT_GPL_ONLY, false },
256 { __start___ksymtab_gpl, __stop___ksymtab_gpl, 200 { __start___ksymtab_gpl, __stop___ksymtab_gpl,
257 __start___kcrctab_gpl, gpl_only }, 201 __start___kcrctab_gpl,
202 GPL_ONLY, false },
258 { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, 203 { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
259 __start___kcrctab_gpl_future, warn_if_not_gpl }, 204 __start___kcrctab_gpl_future,
205 WILL_BE_GPL_ONLY, false },
206#ifdef CONFIG_UNUSED_SYMBOLS
260 { __start___ksymtab_unused, __stop___ksymtab_unused, 207 { __start___ksymtab_unused, __stop___ksymtab_unused,
261 __start___kcrctab_unused, printk_unused_warning }, 208 __start___kcrctab_unused,
209 NOT_GPL_ONLY, true },
262 { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, 210 { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
263 __start___kcrctab_unused_gpl, gpl_only_unused_warning }, 211 __start___kcrctab_unused_gpl,
212 GPL_ONLY, true },
213#endif
264 }; 214 };
265 215
266 /* Core kernel first. */ 216 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
267 ks = search_symarrays(arr, ARRAY_SIZE(arr), name, gplok, warn, crc); 217 return true;
268 if (ks) {
269 if (owner)
270 *owner = NULL;
271 return ks->value;
272 }
273 218
274 /* Now try modules. */
275 list_for_each_entry(mod, &modules, list) { 219 list_for_each_entry(mod, &modules, list) {
276 struct symsearch arr[] = { 220 struct symsearch arr[] = {
277 { mod->syms, mod->syms + mod->num_syms, mod->crcs, 221 { mod->syms, mod->syms + mod->num_syms, mod->crcs,
278 always_ok }, 222 NOT_GPL_ONLY, false },
279 { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, 223 { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
280 mod->gpl_crcs, gpl_only }, 224 mod->gpl_crcs,
225 GPL_ONLY, false },
281 { mod->gpl_future_syms, 226 { mod->gpl_future_syms,
282 mod->gpl_future_syms + mod->num_gpl_future_syms, 227 mod->gpl_future_syms + mod->num_gpl_future_syms,
283 mod->gpl_future_crcs, warn_if_not_gpl }, 228 mod->gpl_future_crcs,
229 WILL_BE_GPL_ONLY, false },
230#ifdef CONFIG_UNUSED_SYMBOLS
284 { mod->unused_syms, 231 { mod->unused_syms,
285 mod->unused_syms + mod->num_unused_syms, 232 mod->unused_syms + mod->num_unused_syms,
286 mod->unused_crcs, printk_unused_warning }, 233 mod->unused_crcs,
234 NOT_GPL_ONLY, true },
287 { mod->unused_gpl_syms, 235 { mod->unused_gpl_syms,
288 mod->unused_gpl_syms + mod->num_unused_gpl_syms, 236 mod->unused_gpl_syms + mod->num_unused_gpl_syms,
289 mod->unused_gpl_crcs, gpl_only_unused_warning }, 237 mod->unused_gpl_crcs,
238 GPL_ONLY, true },
239#endif
290 }; 240 };
291 241
292 ks = search_symarrays(arr, ARRAY_SIZE(arr), 242 if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
293 name, gplok, warn, crc); 243 return true;
294 if (ks) { 244 }
295 if (owner) 245 return false;
296 *owner = mod; 246}
297 return ks->value; 247
248struct find_symbol_arg {
249 /* Input */
250 const char *name;
251 bool gplok;
252 bool warn;
253
254 /* Output */
255 struct module *owner;
256 const unsigned long *crc;
257 unsigned long value;
258};
259
260static bool find_symbol_in_section(const struct symsearch *syms,
261 struct module *owner,
262 unsigned int symnum, void *data)
263{
264 struct find_symbol_arg *fsa = data;
265
266 if (strcmp(syms->start[symnum].name, fsa->name) != 0)
267 return false;
268
269 if (!fsa->gplok) {
270 if (syms->licence == GPL_ONLY)
271 return false;
272 if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
273 printk(KERN_WARNING "Symbol %s is being used "
274 "by a non-GPL module, which will not "
275 "be allowed in the future\n", fsa->name);
276 printk(KERN_WARNING "Please see the file "
277 "Documentation/feature-removal-schedule.txt "
278 "in the kernel source tree for more details.\n");
298 } 279 }
299 } 280 }
300 281
282#ifdef CONFIG_UNUSED_SYMBOLS
283 if (syms->unused && fsa->warn) {
284 printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
285 "however this module is using it.\n", fsa->name);
286 printk(KERN_WARNING
287 "This symbol will go away in the future.\n");
288 printk(KERN_WARNING
289 "Please evalute if this is the right api to use and if "
290 "it really is, submit a report the linux kernel "
291 "mailinglist together with submitting your code for "
292 "inclusion.\n");
293 }
294#endif
295
296 fsa->owner = owner;
297 fsa->crc = symversion(syms->crcs, symnum);
298 fsa->value = syms->start[symnum].value;
299 return true;
300}
301
302/* Find a symbol, return value, (optional) crc and (optional) module
303 * which owns it */
304static unsigned long find_symbol(const char *name,
305 struct module **owner,
306 const unsigned long **crc,
307 bool gplok,
308 bool warn)
309{
310 struct find_symbol_arg fsa;
311
312 fsa.name = name;
313 fsa.gplok = gplok;
314 fsa.warn = warn;
315
316 if (each_symbol(find_symbol_in_section, &fsa)) {
317 if (owner)
318 *owner = fsa.owner;
319 if (crc)
320 *crc = fsa.crc;
321 return fsa.value;
322 }
323
301 DEBUGP("Failed to find symbol %s\n", name); 324 DEBUGP("Failed to find symbol %s\n", name);
302 return -ENOENT; 325 return -ENOENT;
303} 326}
304 327
328/* lookup symbol in given range of kernel_symbols */
329static const struct kernel_symbol *lookup_symbol(const char *name,
330 const struct kernel_symbol *start,
331 const struct kernel_symbol *stop)
332{
333 const struct kernel_symbol *ks = start;
334 for (; ks < stop; ks++)
335 if (strcmp(ks->name, name) == 0)
336 return ks;
337 return NULL;
338}
339
305/* Search for module by name: must hold module_mutex. */ 340/* Search for module by name: must hold module_mutex. */
306static struct module *find_module(const char *name) 341static struct module *find_module(const char *name)
307{ 342{
@@ -639,8 +674,8 @@ static int __try_stop_module(void *_sref)
639{ 674{
640 struct stopref *sref = _sref; 675 struct stopref *sref = _sref;
641 676
642 /* If it's not unused, quit unless we are told to block. */ 677 /* If it's not unused, quit unless we're forcing. */
643 if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) { 678 if (module_refcount(sref->mod) != 0) {
644 if (!(*sref->forced = try_force_unload(sref->flags))) 679 if (!(*sref->forced = try_force_unload(sref->flags)))
645 return -EWOULDBLOCK; 680 return -EWOULDBLOCK;
646 } 681 }
@@ -652,9 +687,16 @@ static int __try_stop_module(void *_sref)
652 687
653static int try_stop_module(struct module *mod, int flags, int *forced) 688static int try_stop_module(struct module *mod, int flags, int *forced)
654{ 689{
655 struct stopref sref = { mod, flags, forced }; 690 if (flags & O_NONBLOCK) {
691 struct stopref sref = { mod, flags, forced };
656 692
657 return stop_machine_run(__try_stop_module, &sref, NR_CPUS); 693 return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
694 } else {
695 /* We don't need to stop the machine for this. */
696 mod->state = MODULE_STATE_GOING;
697 synchronize_sched();
698 return 0;
699 }
658} 700}
659 701
660unsigned int module_refcount(struct module *mod) 702unsigned int module_refcount(struct module *mod)
@@ -1445,8 +1487,10 @@ static int verify_export_symbols(struct module *mod)
1445 { mod->syms, mod->num_syms }, 1487 { mod->syms, mod->num_syms },
1446 { mod->gpl_syms, mod->num_gpl_syms }, 1488 { mod->gpl_syms, mod->num_gpl_syms },
1447 { mod->gpl_future_syms, mod->num_gpl_future_syms }, 1489 { mod->gpl_future_syms, mod->num_gpl_future_syms },
1490#ifdef CONFIG_UNUSED_SYMBOLS
1448 { mod->unused_syms, mod->num_unused_syms }, 1491 { mod->unused_syms, mod->num_unused_syms },
1449 { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, 1492 { mod->unused_gpl_syms, mod->num_unused_gpl_syms },
1493#endif
1450 }; 1494 };
1451 1495
1452 for (i = 0; i < ARRAY_SIZE(arr); i++) { 1496 for (i = 0; i < ARRAY_SIZE(arr); i++) {
@@ -1526,7 +1570,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1526} 1570}
1527 1571
1528/* Update size with this section: return offset. */ 1572/* Update size with this section: return offset. */
1529static long get_offset(unsigned long *size, Elf_Shdr *sechdr) 1573static long get_offset(unsigned int *size, Elf_Shdr *sechdr)
1530{ 1574{
1531 long ret; 1575 long ret;
1532 1576
@@ -1738,6 +1782,20 @@ static inline void add_kallsyms(struct module *mod,
1738} 1782}
1739#endif /* CONFIG_KALLSYMS */ 1783#endif /* CONFIG_KALLSYMS */
1740 1784
1785static void *module_alloc_update_bounds(unsigned long size)
1786{
1787 void *ret = module_alloc(size);
1788
1789 if (ret) {
1790 /* Update module bounds. */
1791 if ((unsigned long)ret < module_addr_min)
1792 module_addr_min = (unsigned long)ret;
1793 if ((unsigned long)ret + size > module_addr_max)
1794 module_addr_max = (unsigned long)ret + size;
1795 }
1796 return ret;
1797}
1798
1741/* Allocate and load the module: note that size of section 0 is always 1799/* Allocate and load the module: note that size of section 0 is always
1742 zero, and we rely on this for optional sections. */ 1800 zero, and we rely on this for optional sections. */
1743static struct module *load_module(void __user *umod, 1801static struct module *load_module(void __user *umod,
@@ -1764,10 +1822,12 @@ static struct module *load_module(void __user *umod,
1764 unsigned int gplfutureindex; 1822 unsigned int gplfutureindex;
1765 unsigned int gplfuturecrcindex; 1823 unsigned int gplfuturecrcindex;
1766 unsigned int unwindex = 0; 1824 unsigned int unwindex = 0;
1825#ifdef CONFIG_UNUSED_SYMBOLS
1767 unsigned int unusedindex; 1826 unsigned int unusedindex;
1768 unsigned int unusedcrcindex; 1827 unsigned int unusedcrcindex;
1769 unsigned int unusedgplindex; 1828 unsigned int unusedgplindex;
1770 unsigned int unusedgplcrcindex; 1829 unsigned int unusedgplcrcindex;
1830#endif
1771 unsigned int markersindex; 1831 unsigned int markersindex;
1772 unsigned int markersstringsindex; 1832 unsigned int markersstringsindex;
1773 struct module *mod; 1833 struct module *mod;
@@ -1850,13 +1910,15 @@ static struct module *load_module(void __user *umod,
1850 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); 1910 exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
1851 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); 1911 gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
1852 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); 1912 gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
1853 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
1854 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
1855 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); 1913 crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
1856 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); 1914 gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
1857 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); 1915 gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
1916#ifdef CONFIG_UNUSED_SYMBOLS
1917 unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
1918 unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
1858 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused"); 1919 unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
1859 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl"); 1920 unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
1921#endif
1860 setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); 1922 setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
1861 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); 1923 exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
1862 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); 1924 obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
@@ -1935,7 +1997,7 @@ static struct module *load_module(void __user *umod,
1935 layout_sections(mod, hdr, sechdrs, secstrings); 1997 layout_sections(mod, hdr, sechdrs, secstrings);
1936 1998
1937 /* Do the allocs. */ 1999 /* Do the allocs. */
1938 ptr = module_alloc(mod->core_size); 2000 ptr = module_alloc_update_bounds(mod->core_size);
1939 if (!ptr) { 2001 if (!ptr) {
1940 err = -ENOMEM; 2002 err = -ENOMEM;
1941 goto free_percpu; 2003 goto free_percpu;
@@ -1943,7 +2005,7 @@ static struct module *load_module(void __user *umod,
1943 memset(ptr, 0, mod->core_size); 2005 memset(ptr, 0, mod->core_size);
1944 mod->module_core = ptr; 2006 mod->module_core = ptr;
1945 2007
1946 ptr = module_alloc(mod->init_size); 2008 ptr = module_alloc_update_bounds(mod->init_size);
1947 if (!ptr && mod->init_size) { 2009 if (!ptr && mod->init_size) {
1948 err = -ENOMEM; 2010 err = -ENOMEM;
1949 goto free_core; 2011 goto free_core;
@@ -2018,14 +2080,15 @@ static struct module *load_module(void __user *umod,
2018 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; 2080 mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
2019 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / 2081 mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
2020 sizeof(*mod->gpl_future_syms); 2082 sizeof(*mod->gpl_future_syms);
2021 mod->num_unused_syms = sechdrs[unusedindex].sh_size /
2022 sizeof(*mod->unused_syms);
2023 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
2024 sizeof(*mod->unused_gpl_syms);
2025 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; 2083 mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
2026 if (gplfuturecrcindex) 2084 if (gplfuturecrcindex)
2027 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; 2085 mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
2028 2086
2087#ifdef CONFIG_UNUSED_SYMBOLS
2088 mod->num_unused_syms = sechdrs[unusedindex].sh_size /
2089 sizeof(*mod->unused_syms);
2090 mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
2091 sizeof(*mod->unused_gpl_syms);
2029 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr; 2092 mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
2030 if (unusedcrcindex) 2093 if (unusedcrcindex)
2031 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr; 2094 mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
@@ -2033,13 +2096,17 @@ static struct module *load_module(void __user *umod,
2033 if (unusedgplcrcindex) 2096 if (unusedgplcrcindex)
2034 mod->unused_gpl_crcs 2097 mod->unused_gpl_crcs
2035 = (void *)sechdrs[unusedgplcrcindex].sh_addr; 2098 = (void *)sechdrs[unusedgplcrcindex].sh_addr;
2099#endif
2036 2100
2037#ifdef CONFIG_MODVERSIONS 2101#ifdef CONFIG_MODVERSIONS
2038 if ((mod->num_syms && !crcindex) || 2102 if ((mod->num_syms && !crcindex)
2039 (mod->num_gpl_syms && !gplcrcindex) || 2103 || (mod->num_gpl_syms && !gplcrcindex)
2040 (mod->num_gpl_future_syms && !gplfuturecrcindex) || 2104 || (mod->num_gpl_future_syms && !gplfuturecrcindex)
2041 (mod->num_unused_syms && !unusedcrcindex) || 2105#ifdef CONFIG_UNUSED_SYMBOLS
2042 (mod->num_unused_gpl_syms && !unusedgplcrcindex)) { 2106 || (mod->num_unused_syms && !unusedcrcindex)
2107 || (mod->num_unused_gpl_syms && !unusedgplcrcindex)
2108#endif
2109 ) {
2043 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name); 2110 printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
2044 err = try_to_force_load(mod, "nocrc"); 2111 err = try_to_force_load(mod, "nocrc");
2045 if (err) 2112 if (err)
@@ -2512,7 +2579,7 @@ static int m_show(struct seq_file *m, void *p)
2512 struct module *mod = list_entry(p, struct module, list); 2579 struct module *mod = list_entry(p, struct module, list);
2513 char buf[8]; 2580 char buf[8];
2514 2581
2515 seq_printf(m, "%s %lu", 2582 seq_printf(m, "%s %u",
2516 mod->name, mod->init_size + mod->core_size); 2583 mod->name, mod->init_size + mod->core_size);
2517 print_unload_info(m, mod); 2584 print_unload_info(m, mod);
2518 2585
@@ -2595,6 +2662,9 @@ struct module *__module_text_address(unsigned long addr)
2595{ 2662{
2596 struct module *mod; 2663 struct module *mod;
2597 2664
2665 if (addr < module_addr_min || addr > module_addr_max)
2666 return NULL;
2667
2598 list_for_each_entry(mod, &modules, list) 2668 list_for_each_entry(mod, &modules, list)
2599 if (within(addr, mod->module_init, mod->init_text_size) 2669 if (within(addr, mod->module_init, mod->init_text_size)
2600 || within(addr, mod->module_core, mod->core_text_size)) 2670 || within(addr, mod->module_core, mod->core_text_size))
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 59dfdf1e1d20..dcd165f92a88 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -94,6 +94,17 @@ config SUSPEND
94 powered and thus its contents are preserved, such as the 94 powered and thus its contents are preserved, such as the
95 suspend-to-RAM state (e.g. the ACPI S3 state). 95 suspend-to-RAM state (e.g. the ACPI S3 state).
96 96
97config PM_TEST_SUSPEND
98 bool "Test suspend/resume and wakealarm during bootup"
99 depends on SUSPEND && PM_DEBUG && RTC_LIB=y
100 ---help---
101 This option will let you suspend your machine during bootup, and
102 make it wake up a few seconds later using an RTC wakeup alarm.
103 Enable this with a kernel parameter like "test_suspend=mem".
104
105 You probably want to have your system's RTC driver statically
106 linked, ensuring that it's available when this test runs.
107
97config SUSPEND_FREEZER 108config SUSPEND_FREEZER
98 bool "Enable freezer for suspend to RAM/standby" \ 109 bool "Enable freezer for suspend to RAM/standby" \
99 if ARCH_WANTS_FREEZER_CONTROL || BROKEN 110 if ARCH_WANTS_FREEZER_CONTROL || BROKEN
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3398f4651aa1..95bff23ecdaa 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -132,6 +132,61 @@ static inline int suspend_test(int level) { return 0; }
132 132
133#ifdef CONFIG_SUSPEND 133#ifdef CONFIG_SUSPEND
134 134
135#ifdef CONFIG_PM_TEST_SUSPEND
136
137/*
138 * We test the system suspend code by setting an RTC wakealarm a short
139 * time in the future, then suspending. Suspending the devices won't
140 * normally take long ... some systems only need a few milliseconds.
141 *
142 * The time it takes is system-specific though, so when we test this
143 * during system bootup we allow a LOT of time.
144 */
145#define TEST_SUSPEND_SECONDS 5
146
147static unsigned long suspend_test_start_time;
148
149static void suspend_test_start(void)
150{
151 /* FIXME Use better timebase than "jiffies", ideally a clocksource.
152 * What we want is a hardware counter that will work correctly even
153 * during the irqs-are-off stages of the suspend/resume cycle...
154 */
155 suspend_test_start_time = jiffies;
156}
157
158static void suspend_test_finish(const char *label)
159{
160 long nj = jiffies - suspend_test_start_time;
161 unsigned msec;
162
163 msec = jiffies_to_msecs(abs(nj));
164 pr_info("PM: %s took %d.%03d seconds\n", label,
165 msec / 1000, msec % 1000);
166
167 /* Warning on suspend means the RTC alarm period needs to be
168 * larger -- the system was sooo slooowwww to suspend that the
169 * alarm (should have) fired before the system went to sleep!
170 *
171 * Warning on either suspend or resume also means the system
172 * has some performance issues. The stack dump of a WARN_ON
173 * is more likely to get the right attention than a printk...
174 */
175 WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000));
176}
177
178#else
179
180static void suspend_test_start(void)
181{
182}
183
184static void suspend_test_finish(const char *label)
185{
186}
187
188#endif
189
135/* This is just an arbitrary number */ 190/* This is just an arbitrary number */
136#define FREE_PAGE_NUMBER (100) 191#define FREE_PAGE_NUMBER (100)
137 192
@@ -266,12 +321,13 @@ int suspend_devices_and_enter(suspend_state_t state)
266 goto Close; 321 goto Close;
267 } 322 }
268 suspend_console(); 323 suspend_console();
324 suspend_test_start();
269 error = device_suspend(PMSG_SUSPEND); 325 error = device_suspend(PMSG_SUSPEND);
270 if (error) { 326 if (error) {
271 printk(KERN_ERR "PM: Some devices failed to suspend\n"); 327 printk(KERN_ERR "PM: Some devices failed to suspend\n");
272 goto Recover_platform; 328 goto Recover_platform;
273 } 329 }
274 330 suspend_test_finish("suspend devices");
275 if (suspend_test(TEST_DEVICES)) 331 if (suspend_test(TEST_DEVICES))
276 goto Recover_platform; 332 goto Recover_platform;
277 333
@@ -293,7 +349,9 @@ int suspend_devices_and_enter(suspend_state_t state)
293 if (suspend_ops->finish) 349 if (suspend_ops->finish)
294 suspend_ops->finish(); 350 suspend_ops->finish();
295 Resume_devices: 351 Resume_devices:
352 suspend_test_start();
296 device_resume(PMSG_RESUME); 353 device_resume(PMSG_RESUME);
354 suspend_test_finish("resume devices");
297 resume_console(); 355 resume_console();
298 Close: 356 Close:
299 if (suspend_ops->end) 357 if (suspend_ops->end)
@@ -521,3 +579,137 @@ static int __init pm_init(void)
521} 579}
522 580
523core_initcall(pm_init); 581core_initcall(pm_init);
582
583
584#ifdef CONFIG_PM_TEST_SUSPEND
585
586#include <linux/rtc.h>
587
588/*
589 * To test system suspend, we need a hands-off mechanism to resume the
590 * system. RTCs wake alarms are a common self-contained mechanism.
591 */
592
593static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
594{
595 static char err_readtime[] __initdata =
596 KERN_ERR "PM: can't read %s time, err %d\n";
597 static char err_wakealarm [] __initdata =
598 KERN_ERR "PM: can't set %s wakealarm, err %d\n";
599 static char err_suspend[] __initdata =
600 KERN_ERR "PM: suspend test failed, error %d\n";
601 static char info_test[] __initdata =
602 KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
603
604 unsigned long now;
605 struct rtc_wkalrm alm;
606 int status;
607
608 /* this may fail if the RTC hasn't been initialized */
609 status = rtc_read_time(rtc, &alm.time);
610 if (status < 0) {
611 printk(err_readtime, rtc->dev.bus_id, status);
612 return;
613 }
614 rtc_tm_to_time(&alm.time, &now);
615
616 memset(&alm, 0, sizeof alm);
617 rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
618 alm.enabled = true;
619
620 status = rtc_set_alarm(rtc, &alm);
621 if (status < 0) {
622 printk(err_wakealarm, rtc->dev.bus_id, status);
623 return;
624 }
625
626 if (state == PM_SUSPEND_MEM) {
627 printk(info_test, pm_states[state]);
628 status = pm_suspend(state);
629 if (status == -ENODEV)
630 state = PM_SUSPEND_STANDBY;
631 }
632 if (state == PM_SUSPEND_STANDBY) {
633 printk(info_test, pm_states[state]);
634 status = pm_suspend(state);
635 }
636 if (status < 0)
637 printk(err_suspend, status);
638}
639
640static int __init has_wakealarm(struct device *dev, void *name_ptr)
641{
642 struct rtc_device *candidate = to_rtc_device(dev);
643
644 if (!candidate->ops->set_alarm)
645 return 0;
646 if (!device_may_wakeup(candidate->dev.parent))
647 return 0;
648
649 *(char **)name_ptr = dev->bus_id;
650 return 1;
651}
652
653/*
654 * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
655 * at startup time. They're normally disabled, for faster boot and because
656 * we can't know which states really work on this particular system.
657 */
658static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
659
660static char warn_bad_state[] __initdata =
661 KERN_WARNING "PM: can't test '%s' suspend state\n";
662
663static int __init setup_test_suspend(char *value)
664{
665 unsigned i;
666
667 /* "=mem" ==> "mem" */
668 value++;
669 for (i = 0; i < PM_SUSPEND_MAX; i++) {
670 if (!pm_states[i])
671 continue;
672 if (strcmp(pm_states[i], value) != 0)
673 continue;
674 test_state = (__force suspend_state_t) i;
675 return 0;
676 }
677 printk(warn_bad_state, value);
678 return 0;
679}
680__setup("test_suspend", setup_test_suspend);
681
682static int __init test_suspend(void)
683{
684 static char warn_no_rtc[] __initdata =
685 KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
686
687 char *pony = NULL;
688 struct rtc_device *rtc = NULL;
689
690 /* PM is initialized by now; is that state testable? */
691 if (test_state == PM_SUSPEND_ON)
692 goto done;
693 if (!valid_state(test_state)) {
694 printk(warn_bad_state, pm_states[test_state]);
695 goto done;
696 }
697
698 /* RTCs have initialized by now too ... can we use one? */
699 class_find_device(rtc_class, NULL, &pony, has_wakealarm);
700 if (pony)
701 rtc = rtc_class_open(pony);
702 if (!rtc) {
703 printk(warn_no_rtc);
704 goto done;
705 }
706
707 /* go for it */
708 test_wakealarm(rtc, test_state);
709 rtc_class_close(rtc);
710done:
711 return 0;
712}
713late_initcall(test_suspend);
714
715#endif /* CONFIG_PM_TEST_SUSPEND */
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 678ec736076b..72016f051477 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -10,6 +10,7 @@
10#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/cpumask.h>
13 14
14/* 15/*
15 * When the user hits Sys-Rq o to power down the machine this is the 16 * When the user hits Sys-Rq o to power down the machine this is the
@@ -25,7 +26,8 @@ static DECLARE_WORK(poweroff_work, do_poweroff);
25 26
26static void handle_poweroff(int key, struct tty_struct *tty) 27static void handle_poweroff(int key, struct tty_struct *tty)
27{ 28{
28 schedule_work(&poweroff_work); 29 /* run sysrq poweroff on boot cpu */
30 schedule_work_on(first_cpu(cpu_online_map), &poweroff_work);
29} 31}
30 32
31static struct sysrq_key_op sysrq_poweroff_op = { 33static struct sysrq_key_op sysrq_poweroff_op = {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5fb87652f214..278946aecaf0 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -149,7 +149,7 @@ static int try_to_freeze_tasks(bool sig_only)
149 unsigned long end_time; 149 unsigned long end_time;
150 unsigned int todo; 150 unsigned int todo;
151 struct timeval start, end; 151 struct timeval start, end;
152 s64 elapsed_csecs64; 152 u64 elapsed_csecs64;
153 unsigned int elapsed_csecs; 153 unsigned int elapsed_csecs;
154 154
155 do_gettimeofday(&start); 155 do_gettimeofday(&start);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5f91a07c4eac..5d2ab836e998 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -205,8 +205,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
205 * objects. The main list's elements are of type struct zone_bitmap 205 * objects. The main list's elements are of type struct zone_bitmap
206 * and each of them corresonds to one zone. For each zone bitmap 206 * and each of them corresonds to one zone. For each zone bitmap
207 * object there is a list of objects of type struct bm_block that 207 * object there is a list of objects of type struct bm_block that
208 * represent each blocks of bit chunks in which information is 208 * represent each blocks of bitmap in which information is stored.
209 * stored.
210 * 209 *
211 * struct memory_bitmap contains a pointer to the main list of zone 210 * struct memory_bitmap contains a pointer to the main list of zone
212 * bitmap objects, a struct bm_position used for browsing the bitmap, 211 * bitmap objects, a struct bm_position used for browsing the bitmap,
@@ -224,26 +223,27 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
224 * pfns that correspond to the start and end of the represented zone. 223 * pfns that correspond to the start and end of the represented zone.
225 * 224 *
226 * struct bm_block contains a pointer to the memory page in which 225 * struct bm_block contains a pointer to the memory page in which
227 * information is stored (in the form of a block of bit chunks 226 * information is stored (in the form of a block of bitmap)
228 * of type unsigned long each). It also contains the pfns that 227 * It also contains the pfns that correspond to the start and end of
229 * correspond to the start and end of the represented memory area and 228 * the represented memory area.
230 * the number of bit chunks in the block.
231 */ 229 */
232 230
233#define BM_END_OF_MAP (~0UL) 231#define BM_END_OF_MAP (~0UL)
234 232
235#define BM_CHUNKS_PER_BLOCK (PAGE_SIZE / sizeof(long))
236#define BM_BITS_PER_CHUNK (sizeof(long) << 3)
237#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) 233#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
238 234
239struct bm_block { 235struct bm_block {
240 struct bm_block *next; /* next element of the list */ 236 struct bm_block *next; /* next element of the list */
241 unsigned long start_pfn; /* pfn represented by the first bit */ 237 unsigned long start_pfn; /* pfn represented by the first bit */
242 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ 238 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
243 unsigned int size; /* number of bit chunks */ 239 unsigned long *data; /* bitmap representing pages */
244 unsigned long *data; /* chunks of bits representing pages */
245}; 240};
246 241
242static inline unsigned long bm_block_bits(struct bm_block *bb)
243{
244 return bb->end_pfn - bb->start_pfn;
245}
246
247struct zone_bitmap { 247struct zone_bitmap {
248 struct zone_bitmap *next; /* next element of the list */ 248 struct zone_bitmap *next; /* next element of the list */
249 unsigned long start_pfn; /* minimal pfn in this zone */ 249 unsigned long start_pfn; /* minimal pfn in this zone */
@@ -257,7 +257,6 @@ struct zone_bitmap {
257struct bm_position { 257struct bm_position {
258 struct zone_bitmap *zone_bm; 258 struct zone_bitmap *zone_bm;
259 struct bm_block *block; 259 struct bm_block *block;
260 int chunk;
261 int bit; 260 int bit;
262}; 261};
263 262
@@ -272,12 +271,6 @@ struct memory_bitmap {
272 271
273/* Functions that operate on memory bitmaps */ 272/* Functions that operate on memory bitmaps */
274 273
275static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
276{
277 bm->cur.chunk = 0;
278 bm->cur.bit = -1;
279}
280
281static void memory_bm_position_reset(struct memory_bitmap *bm) 274static void memory_bm_position_reset(struct memory_bitmap *bm)
282{ 275{
283 struct zone_bitmap *zone_bm; 276 struct zone_bitmap *zone_bm;
@@ -285,7 +278,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
285 zone_bm = bm->zone_bm_list; 278 zone_bm = bm->zone_bm_list;
286 bm->cur.zone_bm = zone_bm; 279 bm->cur.zone_bm = zone_bm;
287 bm->cur.block = zone_bm->bm_blocks; 280 bm->cur.block = zone_bm->bm_blocks;
288 memory_bm_reset_chunk(bm); 281 bm->cur.bit = 0;
289} 282}
290 283
291static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 284static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
@@ -394,12 +387,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
394 bb->start_pfn = pfn; 387 bb->start_pfn = pfn;
395 if (nr >= BM_BITS_PER_BLOCK) { 388 if (nr >= BM_BITS_PER_BLOCK) {
396 pfn += BM_BITS_PER_BLOCK; 389 pfn += BM_BITS_PER_BLOCK;
397 bb->size = BM_CHUNKS_PER_BLOCK;
398 nr -= BM_BITS_PER_BLOCK; 390 nr -= BM_BITS_PER_BLOCK;
399 } else { 391 } else {
400 /* This is executed only once in the loop */ 392 /* This is executed only once in the loop */
401 pfn += nr; 393 pfn += nr;
402 bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
403 } 394 }
404 bb->end_pfn = pfn; 395 bb->end_pfn = pfn;
405 bb = bb->next; 396 bb = bb->next;
@@ -478,8 +469,8 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
478 } 469 }
479 zone_bm->cur_block = bb; 470 zone_bm->cur_block = bb;
480 pfn -= bb->start_pfn; 471 pfn -= bb->start_pfn;
481 *bit_nr = pfn % BM_BITS_PER_CHUNK; 472 *bit_nr = pfn;
482 *addr = bb->data + pfn / BM_BITS_PER_CHUNK; 473 *addr = bb->data;
483 return 0; 474 return 0;
484} 475}
485 476
@@ -528,36 +519,6 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
528 return test_bit(bit, addr); 519 return test_bit(bit, addr);
529} 520}
530 521
531/* Two auxiliary functions for memory_bm_next_pfn */
532
533/* Find the first set bit in the given chunk, if there is one */
534
535static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p)
536{
537 bit++;
538 while (bit < BM_BITS_PER_CHUNK) {
539 if (test_bit(bit, chunk_p))
540 return bit;
541
542 bit++;
543 }
544 return -1;
545}
546
547/* Find a chunk containing some bits set in given block of bits */
548
549static inline int next_chunk_in_block(int n, struct bm_block *bb)
550{
551 n++;
552 while (n < bb->size) {
553 if (bb->data[n])
554 return n;
555
556 n++;
557 }
558 return -1;
559}
560
561/** 522/**
562 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit 523 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit
563 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is 524 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
@@ -571,40 +532,33 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
571{ 532{
572 struct zone_bitmap *zone_bm; 533 struct zone_bitmap *zone_bm;
573 struct bm_block *bb; 534 struct bm_block *bb;
574 int chunk;
575 int bit; 535 int bit;
576 536
577 do { 537 do {
578 bb = bm->cur.block; 538 bb = bm->cur.block;
579 do { 539 do {
580 chunk = bm->cur.chunk;
581 bit = bm->cur.bit; 540 bit = bm->cur.bit;
582 do { 541 bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
583 bit = next_bit_in_chunk(bit, bb->data + chunk); 542 if (bit < bm_block_bits(bb))
584 if (bit >= 0) 543 goto Return_pfn;
585 goto Return_pfn; 544
586
587 chunk = next_chunk_in_block(chunk, bb);
588 bit = -1;
589 } while (chunk >= 0);
590 bb = bb->next; 545 bb = bb->next;
591 bm->cur.block = bb; 546 bm->cur.block = bb;
592 memory_bm_reset_chunk(bm); 547 bm->cur.bit = 0;
593 } while (bb); 548 } while (bb);
594 zone_bm = bm->cur.zone_bm->next; 549 zone_bm = bm->cur.zone_bm->next;
595 if (zone_bm) { 550 if (zone_bm) {
596 bm->cur.zone_bm = zone_bm; 551 bm->cur.zone_bm = zone_bm;
597 bm->cur.block = zone_bm->bm_blocks; 552 bm->cur.block = zone_bm->bm_blocks;
598 memory_bm_reset_chunk(bm); 553 bm->cur.bit = 0;
599 } 554 }
600 } while (zone_bm); 555 } while (zone_bm);
601 memory_bm_position_reset(bm); 556 memory_bm_position_reset(bm);
602 return BM_END_OF_MAP; 557 return BM_END_OF_MAP;
603 558
604 Return_pfn: 559 Return_pfn:
605 bm->cur.chunk = chunk; 560 bm->cur.bit = bit + 1;
606 bm->cur.bit = bit; 561 return bb->start_pfn + bit;
607 return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
608} 562}
609 563
610/** 564/**
diff --git a/kernel/printk.c b/kernel/printk.c
index 07ad9e7f7a66..3f7a2a94583b 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -933,7 +933,7 @@ void suspend_console(void)
933{ 933{
934 if (!console_suspend_enabled) 934 if (!console_suspend_enabled)
935 return; 935 return;
936 printk("Suspending console(s)\n"); 936 printk("Suspending console(s) (use no_console_suspend to debug)\n");
937 acquire_console_sem(); 937 acquire_console_sem();
938 console_suspended = 1; 938 console_suspended = 1;
939} 939}
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 16eeeaa9d618..6f8696c502f4 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -106,7 +106,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
106 */ 106 */
107 cpus_and(cpumask, rcp->cpumask, cpu_online_map); 107 cpus_and(cpumask, rcp->cpumask, cpu_online_map);
108 cpu_clear(rdp->cpu, cpumask); 108 cpu_clear(rdp->cpu, cpumask);
109 for_each_cpu_mask(cpu, cpumask) 109 for_each_cpu_mask_nr(cpu, cpumask)
110 smp_send_reschedule(cpu); 110 smp_send_reschedule(cpu);
111 } 111 }
112} 112}
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 6f62b77d93c4..27827931ca0d 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -756,7 +756,7 @@ rcu_try_flip_idle(void)
756 756
757 /* Now ask each CPU for acknowledgement of the flip. */ 757 /* Now ask each CPU for acknowledgement of the flip. */
758 758
759 for_each_cpu_mask(cpu, rcu_cpu_online_map) { 759 for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
760 per_cpu(rcu_flip_flag, cpu) = rcu_flipped; 760 per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
761 dyntick_save_progress_counter(cpu); 761 dyntick_save_progress_counter(cpu);
762 } 762 }
@@ -774,7 +774,7 @@ rcu_try_flip_waitack(void)
774 int cpu; 774 int cpu;
775 775
776 RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); 776 RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
777 for_each_cpu_mask(cpu, rcu_cpu_online_map) 777 for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
778 if (rcu_try_flip_waitack_needed(cpu) && 778 if (rcu_try_flip_waitack_needed(cpu) &&
779 per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { 779 per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
780 RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); 780 RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
@@ -806,7 +806,7 @@ rcu_try_flip_waitzero(void)
806 /* Check to see if the sum of the "last" counters is zero. */ 806 /* Check to see if the sum of the "last" counters is zero. */
807 807
808 RCU_TRACE_ME(rcupreempt_trace_try_flip_z1); 808 RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
809 for_each_cpu_mask(cpu, rcu_cpu_online_map) 809 for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
810 sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx]; 810 sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
811 if (sum != 0) { 811 if (sum != 0) {
812 RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1); 812 RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -821,7 +821,7 @@ rcu_try_flip_waitzero(void)
821 smp_mb(); /* ^^^^^^^^^^^^ */ 821 smp_mb(); /* ^^^^^^^^^^^^ */
822 822
823 /* Call for a memory barrier from each CPU. */ 823 /* Call for a memory barrier from each CPU. */
824 for_each_cpu_mask(cpu, rcu_cpu_online_map) { 824 for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
825 per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; 825 per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
826 dyntick_save_progress_counter(cpu); 826 dyntick_save_progress_counter(cpu);
827 } 827 }
@@ -841,7 +841,7 @@ rcu_try_flip_waitmb(void)
841 int cpu; 841 int cpu;
842 842
843 RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); 843 RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
844 for_each_cpu_mask(cpu, rcu_cpu_online_map) 844 for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
845 if (rcu_try_flip_waitmb_needed(cpu) && 845 if (rcu_try_flip_waitmb_needed(cpu) &&
846 per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { 846 per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
847 RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); 847 RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 092e4c620af9..a56f629b057a 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -297,8 +297,8 @@ static int test_func(void *data)
297 * 297 *
298 * opcode:data 298 * opcode:data
299 */ 299 */
300static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf, 300static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribute *attr,
301 size_t count) 301 const char *buf, size_t count)
302{ 302{
303 struct sched_param schedpar; 303 struct sched_param schedpar;
304 struct test_thread_data *td; 304 struct test_thread_data *td;
@@ -360,7 +360,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
360 * @dev: thread to query 360 * @dev: thread to query
361 * @buf: char buffer to be filled with thread status info 361 * @buf: char buffer to be filled with thread status info
362 */ 362 */
363static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) 363static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute *attr,
364 char *buf)
364{ 365{
365 struct test_thread_data *td; 366 struct test_thread_data *td;
366 struct task_struct *tsk; 367 struct task_struct *tsk;
diff --git a/kernel/sched.c b/kernel/sched.c
index 99e6d850ecab..6acf749d3336 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -571,8 +571,10 @@ struct rq {
571#endif 571#endif
572 572
573#ifdef CONFIG_SCHED_HRTICK 573#ifdef CONFIG_SCHED_HRTICK
574 unsigned long hrtick_flags; 574#ifdef CONFIG_SMP
575 ktime_t hrtick_expire; 575 int hrtick_csd_pending;
576 struct call_single_data hrtick_csd;
577#endif
576 struct hrtimer hrtick_timer; 578 struct hrtimer hrtick_timer;
577#endif 579#endif
578 580
@@ -983,13 +985,6 @@ static struct rq *this_rq_lock(void)
983 return rq; 985 return rq;
984} 986}
985 987
986static void __resched_task(struct task_struct *p, int tif_bit);
987
988static inline void resched_task(struct task_struct *p)
989{
990 __resched_task(p, TIF_NEED_RESCHED);
991}
992
993#ifdef CONFIG_SCHED_HRTICK 988#ifdef CONFIG_SCHED_HRTICK
994/* 989/*
995 * Use HR-timers to deliver accurate preemption points. 990 * Use HR-timers to deliver accurate preemption points.
@@ -1001,25 +996,6 @@ static inline void resched_task(struct task_struct *p)
1001 * When we get rescheduled we reprogram the hrtick_timer outside of the 996 * When we get rescheduled we reprogram the hrtick_timer outside of the
1002 * rq->lock. 997 * rq->lock.
1003 */ 998 */
1004static inline void resched_hrt(struct task_struct *p)
1005{
1006 __resched_task(p, TIF_HRTICK_RESCHED);
1007}
1008
1009static inline void resched_rq(struct rq *rq)
1010{
1011 unsigned long flags;
1012
1013 spin_lock_irqsave(&rq->lock, flags);
1014 resched_task(rq->curr);
1015 spin_unlock_irqrestore(&rq->lock, flags);
1016}
1017
1018enum {
1019 HRTICK_SET, /* re-programm hrtick_timer */
1020 HRTICK_RESET, /* not a new slice */
1021 HRTICK_BLOCK, /* stop hrtick operations */
1022};
1023 999
1024/* 1000/*
1025 * Use hrtick when: 1001 * Use hrtick when:
@@ -1030,40 +1006,11 @@ static inline int hrtick_enabled(struct rq *rq)
1030{ 1006{
1031 if (!sched_feat(HRTICK)) 1007 if (!sched_feat(HRTICK))
1032 return 0; 1008 return 0;
1033 if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags))) 1009 if (!cpu_active(cpu_of(rq)))
1034 return 0; 1010 return 0;
1035 return hrtimer_is_hres_active(&rq->hrtick_timer); 1011 return hrtimer_is_hres_active(&rq->hrtick_timer);
1036} 1012}
1037 1013
1038/*
1039 * Called to set the hrtick timer state.
1040 *
1041 * called with rq->lock held and irqs disabled
1042 */
1043static void hrtick_start(struct rq *rq, u64 delay, int reset)
1044{
1045 assert_spin_locked(&rq->lock);
1046
1047 /*
1048 * preempt at: now + delay
1049 */
1050 rq->hrtick_expire =
1051 ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
1052 /*
1053 * indicate we need to program the timer
1054 */
1055 __set_bit(HRTICK_SET, &rq->hrtick_flags);
1056 if (reset)
1057 __set_bit(HRTICK_RESET, &rq->hrtick_flags);
1058
1059 /*
1060 * New slices are called from the schedule path and don't need a
1061 * forced reschedule.
1062 */
1063 if (reset)
1064 resched_hrt(rq->curr);
1065}
1066
1067static void hrtick_clear(struct rq *rq) 1014static void hrtick_clear(struct rq *rq)
1068{ 1015{
1069 if (hrtimer_active(&rq->hrtick_timer)) 1016 if (hrtimer_active(&rq->hrtick_timer))
@@ -1071,32 +1018,6 @@ static void hrtick_clear(struct rq *rq)
1071} 1018}
1072 1019
1073/* 1020/*
1074 * Update the timer from the possible pending state.
1075 */
1076static void hrtick_set(struct rq *rq)
1077{
1078 ktime_t time;
1079 int set, reset;
1080 unsigned long flags;
1081
1082 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1083
1084 spin_lock_irqsave(&rq->lock, flags);
1085 set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
1086 reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
1087 time = rq->hrtick_expire;
1088 clear_thread_flag(TIF_HRTICK_RESCHED);
1089 spin_unlock_irqrestore(&rq->lock, flags);
1090
1091 if (set) {
1092 hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
1093 if (reset && !hrtimer_active(&rq->hrtick_timer))
1094 resched_rq(rq);
1095 } else
1096 hrtick_clear(rq);
1097}
1098
1099/*
1100 * High-resolution timer tick. 1021 * High-resolution timer tick.
1101 * Runs from hardirq context with interrupts disabled. 1022 * Runs from hardirq context with interrupts disabled.
1102 */ 1023 */
@@ -1115,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1115} 1036}
1116 1037
1117#ifdef CONFIG_SMP 1038#ifdef CONFIG_SMP
1118static void hotplug_hrtick_disable(int cpu) 1039/*
1040 * called from hardirq (IPI) context
1041 */
1042static void __hrtick_start(void *arg)
1119{ 1043{
1120 struct rq *rq = cpu_rq(cpu); 1044 struct rq *rq = arg;
1121 unsigned long flags;
1122
1123 spin_lock_irqsave(&rq->lock, flags);
1124 rq->hrtick_flags = 0;
1125 __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1126 spin_unlock_irqrestore(&rq->lock, flags);
1127 1045
1128 hrtick_clear(rq); 1046 spin_lock(&rq->lock);
1047 hrtimer_restart(&rq->hrtick_timer);
1048 rq->hrtick_csd_pending = 0;
1049 spin_unlock(&rq->lock);
1129} 1050}
1130 1051
1131static void hotplug_hrtick_enable(int cpu) 1052/*
1053 * Called to set the hrtick timer state.
1054 *
1055 * called with rq->lock held and irqs disabled
1056 */
1057static void hrtick_start(struct rq *rq, u64 delay)
1132{ 1058{
1133 struct rq *rq = cpu_rq(cpu); 1059 struct hrtimer *timer = &rq->hrtick_timer;
1134 unsigned long flags; 1060 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
1135 1061
1136 spin_lock_irqsave(&rq->lock, flags); 1062 timer->expires = time;
1137 __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags); 1063
1138 spin_unlock_irqrestore(&rq->lock, flags); 1064 if (rq == this_rq()) {
1065 hrtimer_restart(timer);
1066 } else if (!rq->hrtick_csd_pending) {
1067 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
1068 rq->hrtick_csd_pending = 1;
1069 }
1139} 1070}
1140 1071
1141static int 1072static int
@@ -1150,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1150 case CPU_DOWN_PREPARE_FROZEN: 1081 case CPU_DOWN_PREPARE_FROZEN:
1151 case CPU_DEAD: 1082 case CPU_DEAD:
1152 case CPU_DEAD_FROZEN: 1083 case CPU_DEAD_FROZEN:
1153 hotplug_hrtick_disable(cpu); 1084 hrtick_clear(cpu_rq(cpu));
1154 return NOTIFY_OK;
1155
1156 case CPU_UP_PREPARE:
1157 case CPU_UP_PREPARE_FROZEN:
1158 case CPU_DOWN_FAILED:
1159 case CPU_DOWN_FAILED_FROZEN:
1160 case CPU_ONLINE:
1161 case CPU_ONLINE_FROZEN:
1162 hotplug_hrtick_enable(cpu);
1163 return NOTIFY_OK; 1085 return NOTIFY_OK;
1164 } 1086 }
1165 1087
@@ -1170,46 +1092,45 @@ static void init_hrtick(void)
1170{ 1092{
1171 hotcpu_notifier(hotplug_hrtick, 0); 1093 hotcpu_notifier(hotplug_hrtick, 0);
1172} 1094}
1173#endif /* CONFIG_SMP */ 1095#else
1096/*
1097 * Called to set the hrtick timer state.
1098 *
1099 * called with rq->lock held and irqs disabled
1100 */
1101static void hrtick_start(struct rq *rq, u64 delay)
1102{
1103 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
1104}
1174 1105
1175static void init_rq_hrtick(struct rq *rq) 1106static void init_hrtick(void)
1176{ 1107{
1177 rq->hrtick_flags = 0;
1178 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1179 rq->hrtick_timer.function = hrtick;
1180 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1181} 1108}
1109#endif /* CONFIG_SMP */
1182 1110
1183void hrtick_resched(void) 1111static void init_rq_hrtick(struct rq *rq)
1184{ 1112{
1185 struct rq *rq; 1113#ifdef CONFIG_SMP
1186 unsigned long flags; 1114 rq->hrtick_csd_pending = 0;
1187 1115
1188 if (!test_thread_flag(TIF_HRTICK_RESCHED)) 1116 rq->hrtick_csd.flags = 0;
1189 return; 1117 rq->hrtick_csd.func = __hrtick_start;
1118 rq->hrtick_csd.info = rq;
1119#endif
1190 1120
1191 local_irq_save(flags); 1121 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1192 rq = cpu_rq(smp_processor_id()); 1122 rq->hrtick_timer.function = hrtick;
1193 hrtick_set(rq); 1123 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1194 local_irq_restore(flags);
1195} 1124}
1196#else 1125#else
1197static inline void hrtick_clear(struct rq *rq) 1126static inline void hrtick_clear(struct rq *rq)
1198{ 1127{
1199} 1128}
1200 1129
1201static inline void hrtick_set(struct rq *rq)
1202{
1203}
1204
1205static inline void init_rq_hrtick(struct rq *rq) 1130static inline void init_rq_hrtick(struct rq *rq)
1206{ 1131{
1207} 1132}
1208 1133
1209void hrtick_resched(void)
1210{
1211}
1212
1213static inline void init_hrtick(void) 1134static inline void init_hrtick(void)
1214{ 1135{
1215} 1136}
@@ -1228,16 +1149,16 @@ static inline void init_hrtick(void)
1228#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 1149#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
1229#endif 1150#endif
1230 1151
1231static void __resched_task(struct task_struct *p, int tif_bit) 1152static void resched_task(struct task_struct *p)
1232{ 1153{
1233 int cpu; 1154 int cpu;
1234 1155
1235 assert_spin_locked(&task_rq(p)->lock); 1156 assert_spin_locked(&task_rq(p)->lock);
1236 1157
1237 if (unlikely(test_tsk_thread_flag(p, tif_bit))) 1158 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
1238 return; 1159 return;
1239 1160
1240 set_tsk_thread_flag(p, tif_bit); 1161 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
1241 1162
1242 cpu = task_cpu(p); 1163 cpu = task_cpu(p);
1243 if (cpu == smp_processor_id()) 1164 if (cpu == smp_processor_id())
@@ -1303,10 +1224,10 @@ void wake_up_idle_cpu(int cpu)
1303#endif /* CONFIG_NO_HZ */ 1224#endif /* CONFIG_NO_HZ */
1304 1225
1305#else /* !CONFIG_SMP */ 1226#else /* !CONFIG_SMP */
1306static void __resched_task(struct task_struct *p, int tif_bit) 1227static void resched_task(struct task_struct *p)
1307{ 1228{
1308 assert_spin_locked(&task_rq(p)->lock); 1229 assert_spin_locked(&task_rq(p)->lock);
1309 set_tsk_thread_flag(p, tif_bit); 1230 set_tsk_need_resched(p);
1310} 1231}
1311#endif /* CONFIG_SMP */ 1232#endif /* CONFIG_SMP */
1312 1233
@@ -2108,7 +2029,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2108 /* Tally up the load of all CPUs in the group */ 2029 /* Tally up the load of all CPUs in the group */
2109 avg_load = 0; 2030 avg_load = 0;
2110 2031
2111 for_each_cpu_mask(i, group->cpumask) { 2032 for_each_cpu_mask_nr(i, group->cpumask) {
2112 /* Bias balancing toward cpus of our domain */ 2033 /* Bias balancing toward cpus of our domain */
2113 if (local_group) 2034 if (local_group)
2114 load = source_load(i, load_idx); 2035 load = source_load(i, load_idx);
@@ -2150,7 +2071,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
2150 /* Traverse only the allowed CPUs */ 2071 /* Traverse only the allowed CPUs */
2151 cpus_and(*tmp, group->cpumask, p->cpus_allowed); 2072 cpus_and(*tmp, group->cpumask, p->cpus_allowed);
2152 2073
2153 for_each_cpu_mask(i, *tmp) { 2074 for_each_cpu_mask_nr(i, *tmp) {
2154 load = weighted_cpuload(i); 2075 load = weighted_cpuload(i);
2155 2076
2156 if (load < min_load || (load == min_load && i == this_cpu)) { 2077 if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2881,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2881 2802
2882 rq = task_rq_lock(p, &flags); 2803 rq = task_rq_lock(p, &flags);
2883 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2804 if (!cpu_isset(dest_cpu, p->cpus_allowed)
2884 || unlikely(cpu_is_offline(dest_cpu))) 2805 || unlikely(!cpu_active(dest_cpu)))
2885 goto out; 2806 goto out;
2886 2807
2887 /* force the process onto the specified CPU */ 2808 /* force the process onto the specified CPU */
@@ -3168,7 +3089,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3168 max_cpu_load = 0; 3089 max_cpu_load = 0;
3169 min_cpu_load = ~0UL; 3090 min_cpu_load = ~0UL;
3170 3091
3171 for_each_cpu_mask(i, group->cpumask) { 3092 for_each_cpu_mask_nr(i, group->cpumask) {
3172 struct rq *rq; 3093 struct rq *rq;
3173 3094
3174 if (!cpu_isset(i, *cpus)) 3095 if (!cpu_isset(i, *cpus))
@@ -3447,7 +3368,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3447 unsigned long max_load = 0; 3368 unsigned long max_load = 0;
3448 int i; 3369 int i;
3449 3370
3450 for_each_cpu_mask(i, group->cpumask) { 3371 for_each_cpu_mask_nr(i, group->cpumask) {
3451 unsigned long wl; 3372 unsigned long wl;
3452 3373
3453 if (!cpu_isset(i, *cpus)) 3374 if (!cpu_isset(i, *cpus))
@@ -3849,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick)
3849 /* 3770 /*
3850 * If we are going offline and still the leader, give up! 3771 * If we are going offline and still the leader, give up!
3851 */ 3772 */
3852 if (cpu_is_offline(cpu) && 3773 if (!cpu_active(cpu) &&
3853 atomic_read(&nohz.load_balancer) == cpu) { 3774 atomic_read(&nohz.load_balancer) == cpu) {
3854 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3775 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3855 BUG(); 3776 BUG();
@@ -3989,7 +3910,7 @@ static void run_rebalance_domains(struct softirq_action *h)
3989 int balance_cpu; 3910 int balance_cpu;
3990 3911
3991 cpu_clear(this_cpu, cpus); 3912 cpu_clear(this_cpu, cpus);
3992 for_each_cpu_mask(balance_cpu, cpus) { 3913 for_each_cpu_mask_nr(balance_cpu, cpus) {
3993 /* 3914 /*
3994 * If this cpu gets work to do, stop the load balancing 3915 * If this cpu gets work to do, stop the load balancing
3995 * work being done for other cpus. Next load 3916 * work being done for other cpus. Next load
@@ -4395,7 +4316,7 @@ asmlinkage void __sched schedule(void)
4395 struct task_struct *prev, *next; 4316 struct task_struct *prev, *next;
4396 unsigned long *switch_count; 4317 unsigned long *switch_count;
4397 struct rq *rq; 4318 struct rq *rq;
4398 int cpu, hrtick = sched_feat(HRTICK); 4319 int cpu;
4399 4320
4400need_resched: 4321need_resched:
4401 preempt_disable(); 4322 preempt_disable();
@@ -4410,7 +4331,7 @@ need_resched_nonpreemptible:
4410 4331
4411 schedule_debug(prev); 4332 schedule_debug(prev);
4412 4333
4413 if (hrtick) 4334 if (sched_feat(HRTICK))
4414 hrtick_clear(rq); 4335 hrtick_clear(rq);
4415 4336
4416 /* 4337 /*
@@ -4457,9 +4378,6 @@ need_resched_nonpreemptible:
4457 } else 4378 } else
4458 spin_unlock_irq(&rq->lock); 4379 spin_unlock_irq(&rq->lock);
4459 4380
4460 if (hrtick)
4461 hrtick_set(rq);
4462
4463 if (unlikely(reacquire_kernel_lock(current) < 0)) 4381 if (unlikely(reacquire_kernel_lock(current) < 0))
4464 goto need_resched_nonpreemptible; 4382 goto need_resched_nonpreemptible;
4465 4383
@@ -5876,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5876 struct rq *rq_dest, *rq_src; 5794 struct rq *rq_dest, *rq_src;
5877 int ret = 0, on_rq; 5795 int ret = 0, on_rq;
5878 5796
5879 if (unlikely(cpu_is_offline(dest_cpu))) 5797 if (unlikely(!cpu_active(dest_cpu)))
5880 return ret; 5798 return ret;
5881 5799
5882 rq_src = cpu_rq(src_cpu); 5800 rq_src = cpu_rq(src_cpu);
@@ -6768,7 +6686,8 @@ static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
6768/* Setup the mask of cpus configured for isolated domains */ 6686/* Setup the mask of cpus configured for isolated domains */
6769static int __init isolated_cpu_setup(char *str) 6687static int __init isolated_cpu_setup(char *str)
6770{ 6688{
6771 int ints[NR_CPUS], i; 6689 static int __initdata ints[NR_CPUS];
6690 int i;
6772 6691
6773 str = get_options(str, ARRAY_SIZE(ints), ints); 6692 str = get_options(str, ARRAY_SIZE(ints), ints);
6774 cpus_clear(cpu_isolated_map); 6693 cpus_clear(cpu_isolated_map);
@@ -6802,7 +6721,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
6802 6721
6803 cpus_clear(*covered); 6722 cpus_clear(*covered);
6804 6723
6805 for_each_cpu_mask(i, *span) { 6724 for_each_cpu_mask_nr(i, *span) {
6806 struct sched_group *sg; 6725 struct sched_group *sg;
6807 int group = group_fn(i, cpu_map, &sg, tmpmask); 6726 int group = group_fn(i, cpu_map, &sg, tmpmask);
6808 int j; 6727 int j;
@@ -6813,7 +6732,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
6813 cpus_clear(sg->cpumask); 6732 cpus_clear(sg->cpumask);
6814 sg->__cpu_power = 0; 6733 sg->__cpu_power = 0;
6815 6734
6816 for_each_cpu_mask(j, *span) { 6735 for_each_cpu_mask_nr(j, *span) {
6817 if (group_fn(j, cpu_map, NULL, tmpmask) != group) 6736 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
6818 continue; 6737 continue;
6819 6738
@@ -7013,7 +6932,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7013 if (!sg) 6932 if (!sg)
7014 return; 6933 return;
7015 do { 6934 do {
7016 for_each_cpu_mask(j, sg->cpumask) { 6935 for_each_cpu_mask_nr(j, sg->cpumask) {
7017 struct sched_domain *sd; 6936 struct sched_domain *sd;
7018 6937
7019 sd = &per_cpu(phys_domains, j); 6938 sd = &per_cpu(phys_domains, j);
@@ -7038,7 +6957,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
7038{ 6957{
7039 int cpu, i; 6958 int cpu, i;
7040 6959
7041 for_each_cpu_mask(cpu, *cpu_map) { 6960 for_each_cpu_mask_nr(cpu, *cpu_map) {
7042 struct sched_group **sched_group_nodes 6961 struct sched_group **sched_group_nodes
7043 = sched_group_nodes_bycpu[cpu]; 6962 = sched_group_nodes_bycpu[cpu];
7044 6963
@@ -7277,7 +7196,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7277 /* 7196 /*
7278 * Set up domains for cpus specified by the cpu_map. 7197 * Set up domains for cpus specified by the cpu_map.
7279 */ 7198 */
7280 for_each_cpu_mask(i, *cpu_map) { 7199 for_each_cpu_mask_nr(i, *cpu_map) {
7281 struct sched_domain *sd = NULL, *p; 7200 struct sched_domain *sd = NULL, *p;
7282 SCHED_CPUMASK_VAR(nodemask, allmasks); 7201 SCHED_CPUMASK_VAR(nodemask, allmasks);
7283 7202
@@ -7344,7 +7263,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7344 7263
7345#ifdef CONFIG_SCHED_SMT 7264#ifdef CONFIG_SCHED_SMT
7346 /* Set up CPU (sibling) groups */ 7265 /* Set up CPU (sibling) groups */
7347 for_each_cpu_mask(i, *cpu_map) { 7266 for_each_cpu_mask_nr(i, *cpu_map) {
7348 SCHED_CPUMASK_VAR(this_sibling_map, allmasks); 7267 SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
7349 SCHED_CPUMASK_VAR(send_covered, allmasks); 7268 SCHED_CPUMASK_VAR(send_covered, allmasks);
7350 7269
@@ -7361,7 +7280,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7361 7280
7362#ifdef CONFIG_SCHED_MC 7281#ifdef CONFIG_SCHED_MC
7363 /* Set up multi-core groups */ 7282 /* Set up multi-core groups */
7364 for_each_cpu_mask(i, *cpu_map) { 7283 for_each_cpu_mask_nr(i, *cpu_map) {
7365 SCHED_CPUMASK_VAR(this_core_map, allmasks); 7284 SCHED_CPUMASK_VAR(this_core_map, allmasks);
7366 SCHED_CPUMASK_VAR(send_covered, allmasks); 7285 SCHED_CPUMASK_VAR(send_covered, allmasks);
7367 7286
@@ -7428,7 +7347,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7428 goto error; 7347 goto error;
7429 } 7348 }
7430 sched_group_nodes[i] = sg; 7349 sched_group_nodes[i] = sg;
7431 for_each_cpu_mask(j, *nodemask) { 7350 for_each_cpu_mask_nr(j, *nodemask) {
7432 struct sched_domain *sd; 7351 struct sched_domain *sd;
7433 7352
7434 sd = &per_cpu(node_domains, j); 7353 sd = &per_cpu(node_domains, j);
@@ -7474,21 +7393,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7474 7393
7475 /* Calculate CPU power for physical packages and nodes */ 7394 /* Calculate CPU power for physical packages and nodes */
7476#ifdef CONFIG_SCHED_SMT 7395#ifdef CONFIG_SCHED_SMT
7477 for_each_cpu_mask(i, *cpu_map) { 7396 for_each_cpu_mask_nr(i, *cpu_map) {
7478 struct sched_domain *sd = &per_cpu(cpu_domains, i); 7397 struct sched_domain *sd = &per_cpu(cpu_domains, i);
7479 7398
7480 init_sched_groups_power(i, sd); 7399 init_sched_groups_power(i, sd);
7481 } 7400 }
7482#endif 7401#endif
7483#ifdef CONFIG_SCHED_MC 7402#ifdef CONFIG_SCHED_MC
7484 for_each_cpu_mask(i, *cpu_map) { 7403 for_each_cpu_mask_nr(i, *cpu_map) {
7485 struct sched_domain *sd = &per_cpu(core_domains, i); 7404 struct sched_domain *sd = &per_cpu(core_domains, i);
7486 7405
7487 init_sched_groups_power(i, sd); 7406 init_sched_groups_power(i, sd);
7488 } 7407 }
7489#endif 7408#endif
7490 7409
7491 for_each_cpu_mask(i, *cpu_map) { 7410 for_each_cpu_mask_nr(i, *cpu_map) {
7492 struct sched_domain *sd = &per_cpu(phys_domains, i); 7411 struct sched_domain *sd = &per_cpu(phys_domains, i);
7493 7412
7494 init_sched_groups_power(i, sd); 7413 init_sched_groups_power(i, sd);
@@ -7508,7 +7427,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7508#endif 7427#endif
7509 7428
7510 /* Attach the domains */ 7429 /* Attach the domains */
7511 for_each_cpu_mask(i, *cpu_map) { 7430 for_each_cpu_mask_nr(i, *cpu_map) {
7512 struct sched_domain *sd; 7431 struct sched_domain *sd;
7513#ifdef CONFIG_SCHED_SMT 7432#ifdef CONFIG_SCHED_SMT
7514 sd = &per_cpu(cpu_domains, i); 7433 sd = &per_cpu(cpu_domains, i);
@@ -7553,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
7553} 7472}
7554 7473
7555/* 7474/*
7556 * Free current domain masks.
7557 * Called after all cpus are attached to NULL domain.
7558 */
7559static void free_sched_domains(void)
7560{
7561 ndoms_cur = 0;
7562 if (doms_cur != &fallback_doms)
7563 kfree(doms_cur);
7564 doms_cur = &fallback_doms;
7565}
7566
7567/*
7568 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7475 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
7569 * For now this just excludes isolated cpus, but could be used to 7476 * For now this just excludes isolated cpus, but could be used to
7570 * exclude other special cases in the future. 7477 * exclude other special cases in the future.
@@ -7603,7 +7510,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7603 7510
7604 unregister_sched_domain_sysctl(); 7511 unregister_sched_domain_sysctl();
7605 7512
7606 for_each_cpu_mask(i, *cpu_map) 7513 for_each_cpu_mask_nr(i, *cpu_map)
7607 cpu_attach_domain(NULL, &def_root_domain, i); 7514 cpu_attach_domain(NULL, &def_root_domain, i);
7608 synchronize_sched(); 7515 synchronize_sched();
7609 arch_destroy_sched_domains(cpu_map, &tmpmask); 7516 arch_destroy_sched_domains(cpu_map, &tmpmask);
@@ -7642,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7642 * ownership of it and will kfree it when done with it. If the caller 7549 * ownership of it and will kfree it when done with it. If the caller
7643 * failed the kmalloc call, then it can pass in doms_new == NULL, 7550 * failed the kmalloc call, then it can pass in doms_new == NULL,
7644 * and partition_sched_domains() will fallback to the single partition 7551 * and partition_sched_domains() will fallback to the single partition
7645 * 'fallback_doms'. 7552 * 'fallback_doms', it also forces the domains to be rebuilt.
7646 * 7553 *
7647 * Call with hotplug lock held 7554 * Call with hotplug lock held
7648 */ 7555 */
@@ -7656,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7656 /* always unregister in case we don't destroy any domains */ 7563 /* always unregister in case we don't destroy any domains */
7657 unregister_sched_domain_sysctl(); 7564 unregister_sched_domain_sysctl();
7658 7565
7659 if (doms_new == NULL) { 7566 if (doms_new == NULL)
7660 ndoms_new = 1; 7567 ndoms_new = 0;
7661 doms_new = &fallback_doms;
7662 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7663 dattr_new = NULL;
7664 }
7665 7568
7666 /* Destroy deleted domains */ 7569 /* Destroy deleted domains */
7667 for (i = 0; i < ndoms_cur; i++) { 7570 for (i = 0; i < ndoms_cur; i++) {
@@ -7676,6 +7579,14 @@ match1:
7676 ; 7579 ;
7677 } 7580 }
7678 7581
7582 if (doms_new == NULL) {
7583 ndoms_cur = 0;
7584 ndoms_new = 1;
7585 doms_new = &fallback_doms;
7586 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7587 dattr_new = NULL;
7588 }
7589
7679 /* Build new domains */ 7590 /* Build new domains */
7680 for (i = 0; i < ndoms_new; i++) { 7591 for (i = 0; i < ndoms_new; i++) {
7681 for (j = 0; j < ndoms_cur; j++) { 7592 for (j = 0; j < ndoms_cur; j++) {
@@ -7706,17 +7617,10 @@ match2:
7706#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7617#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
7707int arch_reinit_sched_domains(void) 7618int arch_reinit_sched_domains(void)
7708{ 7619{
7709 int err;
7710
7711 get_online_cpus(); 7620 get_online_cpus();
7712 mutex_lock(&sched_domains_mutex); 7621 rebuild_sched_domains();
7713 detach_destroy_domains(&cpu_online_map);
7714 free_sched_domains();
7715 err = arch_init_sched_domains(&cpu_online_map);
7716 mutex_unlock(&sched_domains_mutex);
7717 put_online_cpus(); 7622 put_online_cpus();
7718 7623 return 0;
7719 return err;
7720} 7624}
7721 7625
7722static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 7626static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7737,11 +7641,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
7737} 7641}
7738 7642
7739#ifdef CONFIG_SCHED_MC 7643#ifdef CONFIG_SCHED_MC
7740static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) 7644static ssize_t sched_mc_power_savings_show(struct sys_device *dev,
7645 struct sysdev_attribute *attr, char *page)
7741{ 7646{
7742 return sprintf(page, "%u\n", sched_mc_power_savings); 7647 return sprintf(page, "%u\n", sched_mc_power_savings);
7743} 7648}
7744static ssize_t sched_mc_power_savings_store(struct sys_device *dev, 7649static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
7650 struct sysdev_attribute *attr,
7745 const char *buf, size_t count) 7651 const char *buf, size_t count)
7746{ 7652{
7747 return sched_power_savings_store(buf, count, 0); 7653 return sched_power_savings_store(buf, count, 0);
@@ -7751,11 +7657,13 @@ static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
7751#endif 7657#endif
7752 7658
7753#ifdef CONFIG_SCHED_SMT 7659#ifdef CONFIG_SCHED_SMT
7754static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) 7660static ssize_t sched_smt_power_savings_show(struct sys_device *dev,
7661 struct sysdev_attribute *attr, char *page)
7755{ 7662{
7756 return sprintf(page, "%u\n", sched_smt_power_savings); 7663 return sprintf(page, "%u\n", sched_smt_power_savings);
7757} 7664}
7758static ssize_t sched_smt_power_savings_store(struct sys_device *dev, 7665static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
7666 struct sysdev_attribute *attr,
7759 const char *buf, size_t count) 7667 const char *buf, size_t count)
7760{ 7668{
7761 return sched_power_savings_store(buf, count, 1); 7669 return sched_power_savings_store(buf, count, 1);
@@ -7782,59 +7690,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7782} 7690}
7783#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ 7691#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7784 7692
7693#ifndef CONFIG_CPUSETS
7785/* 7694/*
7786 * Force a reinitialization of the sched domains hierarchy. The domains 7695 * Add online and remove offline CPUs from the scheduler domains.
7787 * and groups cannot be updated in place without racing with the balancing 7696 * When cpusets are enabled they take over this function.
7788 * code, so we temporarily attach all running cpus to the NULL domain
7789 * which will prevent rebalancing while the sched domains are recalculated.
7790 */ 7697 */
7791static int update_sched_domains(struct notifier_block *nfb, 7698static int update_sched_domains(struct notifier_block *nfb,
7792 unsigned long action, void *hcpu) 7699 unsigned long action, void *hcpu)
7793{ 7700{
7701 switch (action) {
7702 case CPU_ONLINE:
7703 case CPU_ONLINE_FROZEN:
7704 case CPU_DEAD:
7705 case CPU_DEAD_FROZEN:
7706 partition_sched_domains(0, NULL, NULL);
7707 return NOTIFY_OK;
7708
7709 default:
7710 return NOTIFY_DONE;
7711 }
7712}
7713#endif
7714
7715static int update_runtime(struct notifier_block *nfb,
7716 unsigned long action, void *hcpu)
7717{
7794 int cpu = (int)(long)hcpu; 7718 int cpu = (int)(long)hcpu;
7795 7719
7796 switch (action) { 7720 switch (action) {
7797 case CPU_DOWN_PREPARE: 7721 case CPU_DOWN_PREPARE:
7798 case CPU_DOWN_PREPARE_FROZEN: 7722 case CPU_DOWN_PREPARE_FROZEN:
7799 disable_runtime(cpu_rq(cpu)); 7723 disable_runtime(cpu_rq(cpu));
7800 /* fall-through */
7801 case CPU_UP_PREPARE:
7802 case CPU_UP_PREPARE_FROZEN:
7803 detach_destroy_domains(&cpu_online_map);
7804 free_sched_domains();
7805 return NOTIFY_OK; 7724 return NOTIFY_OK;
7806 7725
7807
7808 case CPU_DOWN_FAILED: 7726 case CPU_DOWN_FAILED:
7809 case CPU_DOWN_FAILED_FROZEN: 7727 case CPU_DOWN_FAILED_FROZEN:
7810 case CPU_ONLINE: 7728 case CPU_ONLINE:
7811 case CPU_ONLINE_FROZEN: 7729 case CPU_ONLINE_FROZEN:
7812 enable_runtime(cpu_rq(cpu)); 7730 enable_runtime(cpu_rq(cpu));
7813 /* fall-through */ 7731 return NOTIFY_OK;
7814 case CPU_UP_CANCELED: 7732
7815 case CPU_UP_CANCELED_FROZEN:
7816 case CPU_DEAD:
7817 case CPU_DEAD_FROZEN:
7818 /*
7819 * Fall through and re-initialise the domains.
7820 */
7821 break;
7822 default: 7733 default:
7823 return NOTIFY_DONE; 7734 return NOTIFY_DONE;
7824 } 7735 }
7825
7826#ifndef CONFIG_CPUSETS
7827 /*
7828 * Create default domain partitioning if cpusets are disabled.
7829 * Otherwise we let cpusets rebuild the domains based on the
7830 * current setup.
7831 */
7832
7833 /* The hotplug lock is already held by cpu_up/cpu_down */
7834 arch_init_sched_domains(&cpu_online_map);
7835#endif
7836
7837 return NOTIFY_OK;
7838} 7736}
7839 7737
7840void __init sched_init_smp(void) 7738void __init sched_init_smp(void)
@@ -7854,8 +7752,15 @@ void __init sched_init_smp(void)
7854 cpu_set(smp_processor_id(), non_isolated_cpus); 7752 cpu_set(smp_processor_id(), non_isolated_cpus);
7855 mutex_unlock(&sched_domains_mutex); 7753 mutex_unlock(&sched_domains_mutex);
7856 put_online_cpus(); 7754 put_online_cpus();
7755
7756#ifndef CONFIG_CPUSETS
7857 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7757 /* XXX: Theoretical race here - CPU may be hotplugged now */
7858 hotcpu_notifier(update_sched_domains, 0); 7758 hotcpu_notifier(update_sched_domains, 0);
7759#endif
7760
7761 /* RT runtime code needs to handle some hotplug events */
7762 hotcpu_notifier(update_runtime, 0);
7763
7859 init_hrtick(); 7764 init_hrtick();
7860 7765
7861 /* Move init over to a non-isolated CPU */ 7766 /* Move init over to a non-isolated CPU */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2aa987027d6..cf2cd6ce4cb2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -878,7 +878,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
878#ifdef CONFIG_SCHED_HRTICK 878#ifdef CONFIG_SCHED_HRTICK
879static void hrtick_start_fair(struct rq *rq, struct task_struct *p) 879static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
880{ 880{
881 int requeue = rq->curr == p;
882 struct sched_entity *se = &p->se; 881 struct sched_entity *se = &p->se;
883 struct cfs_rq *cfs_rq = cfs_rq_of(se); 882 struct cfs_rq *cfs_rq = cfs_rq_of(se);
884 883
@@ -899,10 +898,10 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
899 * Don't schedule slices shorter than 10000ns, that just 898 * Don't schedule slices shorter than 10000ns, that just
900 * doesn't make sense. Rely on vruntime for fairness. 899 * doesn't make sense. Rely on vruntime for fairness.
901 */ 900 */
902 if (!requeue) 901 if (rq->curr != p)
903 delta = max(10000LL, delta); 902 delta = max(10000LL, delta);
904 903
905 hrtick_start(rq, delta, requeue); 904 hrtick_start(rq, delta);
906 } 905 }
907} 906}
908#else /* !CONFIG_SCHED_HRTICK */ 907#else /* !CONFIG_SCHED_HRTICK */
@@ -1004,6 +1003,8 @@ static void yield_task_fair(struct rq *rq)
1004 * not idle and an idle cpu is available. The span of cpus to 1003 * not idle and an idle cpu is available. The span of cpus to
1005 * search starts with cpus closest then further out as needed, 1004 * search starts with cpus closest then further out as needed,
1006 * so we always favor a closer, idle cpu. 1005 * so we always favor a closer, idle cpu.
1006 * Domains may include CPUs that are not usable for migration,
1007 * hence we need to mask them out (cpu_active_map)
1007 * 1008 *
1008 * Returns the CPU we should wake onto. 1009 * Returns the CPU we should wake onto.
1009 */ 1010 */
@@ -1031,7 +1032,8 @@ static int wake_idle(int cpu, struct task_struct *p)
1031 || ((sd->flags & SD_WAKE_IDLE_FAR) 1032 || ((sd->flags & SD_WAKE_IDLE_FAR)
1032 && !task_hot(p, task_rq(p)->clock, sd))) { 1033 && !task_hot(p, task_rq(p)->clock, sd))) {
1033 cpus_and(tmp, sd->span, p->cpus_allowed); 1034 cpus_and(tmp, sd->span, p->cpus_allowed);
1034 for_each_cpu_mask(i, tmp) { 1035 cpus_and(tmp, tmp, cpu_active_map);
1036 for_each_cpu_mask_nr(i, tmp) {
1035 if (idle_cpu(i)) { 1037 if (idle_cpu(i)) {
1036 if (i != task_cpu(p)) { 1038 if (i != task_cpu(p)) {
1037 schedstat_inc(p, 1039 schedstat_inc(p,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 47ceac9e8552..908c04f9dad0 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -240,7 +240,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
240 240
241 spin_lock(&rt_b->rt_runtime_lock); 241 spin_lock(&rt_b->rt_runtime_lock);
242 rt_period = ktime_to_ns(rt_b->rt_period); 242 rt_period = ktime_to_ns(rt_b->rt_period);
243 for_each_cpu_mask(i, rd->span) { 243 for_each_cpu_mask_nr(i, rd->span) {
244 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 244 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
245 s64 diff; 245 s64 diff;
246 246
@@ -253,7 +253,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
253 253
254 diff = iter->rt_runtime - iter->rt_time; 254 diff = iter->rt_runtime - iter->rt_time;
255 if (diff > 0) { 255 if (diff > 0) {
256 do_div(diff, weight); 256 diff = div_u64((u64)diff, weight);
257 if (rt_rq->rt_runtime + diff > rt_period) 257 if (rt_rq->rt_runtime + diff > rt_period)
258 diff = rt_period - rt_rq->rt_runtime; 258 diff = rt_period - rt_rq->rt_runtime;
259 iter->rt_runtime -= diff; 259 iter->rt_runtime -= diff;
@@ -505,7 +505,9 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
505 rt_rq->rt_nr_running++; 505 rt_rq->rt_nr_running++;
506#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 506#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
507 if (rt_se_prio(rt_se) < rt_rq->highest_prio) { 507 if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
508#ifdef CONFIG_SMP
508 struct rq *rq = rq_of_rt_rq(rt_rq); 509 struct rq *rq = rq_of_rt_rq(rt_rq);
510#endif
509 511
510 rt_rq->highest_prio = rt_se_prio(rt_se); 512 rt_rq->highest_prio = rt_se_prio(rt_se);
511#ifdef CONFIG_SMP 513#ifdef CONFIG_SMP
@@ -599,11 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
599 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 601 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
600 return; 602 return;
601 603
602 if (rt_se->nr_cpus_allowed == 1) 604 list_add_tail(&rt_se->run_list, queue);
603 list_add(&rt_se->run_list, queue);
604 else
605 list_add_tail(&rt_se->run_list, queue);
606
607 __set_bit(rt_se_prio(rt_se), array->bitmap); 605 __set_bit(rt_se_prio(rt_se), array->bitmap);
608 606
609 inc_rt_tasks(rt_se, rt_rq); 607 inc_rt_tasks(rt_se, rt_rq);
@@ -688,32 +686,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
688 * Put task to the end of the run list without the overhead of dequeue 686 * Put task to the end of the run list without the overhead of dequeue
689 * followed by enqueue. 687 * followed by enqueue.
690 */ 688 */
691static 689static void
692void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 690requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
693{ 691{
694 struct rt_prio_array *array = &rt_rq->active;
695
696 if (on_rt_rq(rt_se)) { 692 if (on_rt_rq(rt_se)) {
697 list_del_init(&rt_se->run_list); 693 struct rt_prio_array *array = &rt_rq->active;
698 list_add_tail(&rt_se->run_list, 694 struct list_head *queue = array->queue + rt_se_prio(rt_se);
699 array->queue + rt_se_prio(rt_se)); 695
696 if (head)
697 list_move(&rt_se->run_list, queue);
698 else
699 list_move_tail(&rt_se->run_list, queue);
700 } 700 }
701} 701}
702 702
703static void requeue_task_rt(struct rq *rq, struct task_struct *p) 703static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
704{ 704{
705 struct sched_rt_entity *rt_se = &p->rt; 705 struct sched_rt_entity *rt_se = &p->rt;
706 struct rt_rq *rt_rq; 706 struct rt_rq *rt_rq;
707 707
708 for_each_sched_rt_entity(rt_se) { 708 for_each_sched_rt_entity(rt_se) {
709 rt_rq = rt_rq_of_se(rt_se); 709 rt_rq = rt_rq_of_se(rt_se);
710 requeue_rt_entity(rt_rq, rt_se); 710 requeue_rt_entity(rt_rq, rt_se, head);
711 } 711 }
712} 712}
713 713
714static void yield_task_rt(struct rq *rq) 714static void yield_task_rt(struct rq *rq)
715{ 715{
716 requeue_task_rt(rq, rq->curr); 716 requeue_task_rt(rq, rq->curr, 0);
717} 717}
718 718
719#ifdef CONFIG_SMP 719#ifdef CONFIG_SMP
@@ -753,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
753 */ 753 */
754 return task_cpu(p); 754 return task_cpu(p);
755} 755}
756
757static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
758{
759 cpumask_t mask;
760
761 if (rq->curr->rt.nr_cpus_allowed == 1)
762 return;
763
764 if (p->rt.nr_cpus_allowed != 1
765 && cpupri_find(&rq->rd->cpupri, p, &mask))
766 return;
767
768 if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
769 return;
770
771 /*
772 * There appears to be other cpus that can accept
773 * current and none to run 'p', so lets reschedule
774 * to try and push current away:
775 */
776 requeue_task_rt(rq, p, 1);
777 resched_task(rq->curr);
778}
779
756#endif /* CONFIG_SMP */ 780#endif /* CONFIG_SMP */
757 781
758/* 782/*
@@ -778,18 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
778 * to move current somewhere else, making room for our non-migratable 802 * to move current somewhere else, making room for our non-migratable
779 * task. 803 * task.
780 */ 804 */
781 if((p->prio == rq->curr->prio) 805 if (p->prio == rq->curr->prio && !need_resched())
782 && p->rt.nr_cpus_allowed == 1 806 check_preempt_equal_prio(rq, p);
783 && rq->curr->rt.nr_cpus_allowed != 1) {
784 cpumask_t mask;
785
786 if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
787 /*
788 * There appears to be other cpus that can accept
789 * current, so lets reschedule to try and push it away
790 */
791 resched_task(rq->curr);
792 }
793#endif 807#endif
794} 808}
795 809
@@ -922,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task)
922 return -1; /* No targets found */ 936 return -1; /* No targets found */
923 937
924 /* 938 /*
939 * Only consider CPUs that are usable for migration.
940 * I guess we might want to change cpupri_find() to ignore those
941 * in the first place.
942 */
943 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
944
945 /*
925 * At this point we have built a mask of cpus representing the 946 * At this point we have built a mask of cpus representing the
926 * lowest priority tasks in the system. Now we want to elect 947 * lowest priority tasks in the system. Now we want to elect
927 * the best one based on our affinity and topology. 948 * the best one based on our affinity and topology.
@@ -1107,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq)
1107 1128
1108 next = pick_next_task_rt(this_rq); 1129 next = pick_next_task_rt(this_rq);
1109 1130
1110 for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { 1131 for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
1111 if (this_cpu == cpu) 1132 if (this_cpu == cpu)
1112 continue; 1133 continue;
1113 1134
@@ -1415,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1415 * on the queue: 1436 * on the queue:
1416 */ 1437 */
1417 if (p->rt.run_list.prev != p->rt.run_list.next) { 1438 if (p->rt.run_list.prev != p->rt.run_list.next) {
1418 requeue_task_rt(rq, p); 1439 requeue_task_rt(rq, p, 0);
1419 set_tsk_need_resched(p); 1440 set_tsk_need_resched(p);
1420 } 1441 }
1421} 1442}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 81e2fe0f983a..f6b03d56c2bf 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -286,7 +286,7 @@ void irq_exit(void)
286#ifdef CONFIG_NO_HZ 286#ifdef CONFIG_NO_HZ
287 /* Make sure that timer wheel updates are propagated */ 287 /* Make sure that timer wheel updates are propagated */
288 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) 288 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
289 tick_nohz_stop_sched_tick(); 289 tick_nohz_stop_sched_tick(0);
290 rcu_irq_exit(); 290 rcu_irq_exit();
291#endif 291#endif
292 preempt_enable_no_resched(); 292 preempt_enable_no_resched();
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index a272d78185eb..7bd8d1aadd5d 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -13,6 +13,7 @@
13#include <linux/delay.h> 13#include <linux/delay.h>
14#include <linux/freezer.h> 14#include <linux/freezer.h>
15#include <linux/kthread.h> 15#include <linux/kthread.h>
16#include <linux/lockdep.h>
16#include <linux/notifier.h> 17#include <linux/notifier.h>
17#include <linux/module.h> 18#include <linux/module.h>
18 19
@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
25static DEFINE_PER_CPU(struct task_struct *, watchdog_task); 26static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
26 27
27static int __read_mostly did_panic; 28static int __read_mostly did_panic;
28unsigned long __read_mostly softlockup_thresh = 60; 29int __read_mostly softlockup_thresh = 60;
30
31/*
32 * Should we panic (and reboot, if panic_timeout= is set) when a
33 * soft-lockup occurs:
34 */
35unsigned int __read_mostly softlockup_panic =
36 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
37
38static int __init softlockup_panic_setup(char *str)
39{
40 softlockup_panic = simple_strtoul(str, NULL, 0);
41
42 return 1;
43}
44__setup("softlockup_panic=", softlockup_panic_setup);
29 45
30static int 46static int
31softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) 47softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -84,6 +100,14 @@ void softlockup_tick(void)
84 struct pt_regs *regs = get_irq_regs(); 100 struct pt_regs *regs = get_irq_regs();
85 unsigned long now; 101 unsigned long now;
86 102
103 /* Is detection switched off? */
104 if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
105 /* Be sure we don't false trigger if switched back on */
106 if (touch_timestamp)
107 per_cpu(touch_timestamp, this_cpu) = 0;
108 return;
109 }
110
87 if (touch_timestamp == 0) { 111 if (touch_timestamp == 0) {
88 __touch_softlockup_watchdog(); 112 __touch_softlockup_watchdog();
89 return; 113 return;
@@ -92,11 +116,8 @@ void softlockup_tick(void)
92 print_timestamp = per_cpu(print_timestamp, this_cpu); 116 print_timestamp = per_cpu(print_timestamp, this_cpu);
93 117
94 /* report at most once a second */ 118 /* report at most once a second */
95 if ((print_timestamp >= touch_timestamp && 119 if (print_timestamp == touch_timestamp || did_panic)
96 print_timestamp < (touch_timestamp + 1)) ||
97 did_panic || !per_cpu(watchdog_task, this_cpu)) {
98 return; 120 return;
99 }
100 121
101 /* do not print during early bootup: */ 122 /* do not print during early bootup: */
102 if (unlikely(system_state != SYSTEM_RUNNING)) { 123 if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -106,8 +127,11 @@ void softlockup_tick(void)
106 127
107 now = get_timestamp(this_cpu); 128 now = get_timestamp(this_cpu);
108 129
109 /* Wake up the high-prio watchdog task every second: */ 130 /*
110 if (now > (touch_timestamp + 1)) 131 * Wake up the high-prio watchdog task twice per
132 * threshold timespan.
133 */
134 if (now > touch_timestamp + softlockup_thresh/2)
111 wake_up_process(per_cpu(watchdog_task, this_cpu)); 135 wake_up_process(per_cpu(watchdog_task, this_cpu));
112 136
113 /* Warn about unreasonable delays: */ 137 /* Warn about unreasonable delays: */
@@ -121,11 +145,15 @@ void softlockup_tick(void)
121 this_cpu, now - touch_timestamp, 145 this_cpu, now - touch_timestamp,
122 current->comm, task_pid_nr(current)); 146 current->comm, task_pid_nr(current));
123 print_modules(); 147 print_modules();
148 print_irqtrace_events(current);
124 if (regs) 149 if (regs)
125 show_regs(regs); 150 show_regs(regs);
126 else 151 else
127 dump_stack(); 152 dump_stack();
128 spin_unlock(&print_lock); 153 spin_unlock(&print_lock);
154
155 if (softlockup_panic)
156 panic("softlockup: hung tasks");
129} 157}
130 158
131/* 159/*
@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
178 206
179 t->last_switch_timestamp = now; 207 t->last_switch_timestamp = now;
180 touch_nmi_watchdog(); 208 touch_nmi_watchdog();
209
210 if (softlockup_panic)
211 panic("softlockup: blocked tasks");
181} 212}
182 213
183/* 214/*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ba9b2054ecbd..738b411ff2d3 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -33,8 +33,9 @@ static int stopmachine(void *cpu)
33{ 33{
34 int irqs_disabled = 0; 34 int irqs_disabled = 0;
35 int prepared = 0; 35 int prepared = 0;
36 cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
36 37
37 set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu)); 38 set_cpus_allowed_ptr(current, cpumask);
38 39
39 /* Ack: we are alive */ 40 /* Ack: we are alive */
40 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ 41 smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5b9b467de070..bd66ac5406f3 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,6 +31,7 @@ cond_syscall(sys_socketpair);
31cond_syscall(sys_bind); 31cond_syscall(sys_bind);
32cond_syscall(sys_listen); 32cond_syscall(sys_listen);
33cond_syscall(sys_accept); 33cond_syscall(sys_accept);
34cond_syscall(sys_paccept);
34cond_syscall(sys_connect); 35cond_syscall(sys_connect);
35cond_syscall(sys_getsockname); 36cond_syscall(sys_getsockname);
36cond_syscall(sys_getpeername); 37cond_syscall(sys_getpeername);
@@ -59,6 +60,7 @@ cond_syscall(sys_epoll_create);
59cond_syscall(sys_epoll_ctl); 60cond_syscall(sys_epoll_ctl);
60cond_syscall(sys_epoll_wait); 61cond_syscall(sys_epoll_wait);
61cond_syscall(sys_epoll_pwait); 62cond_syscall(sys_epoll_pwait);
63cond_syscall(compat_sys_epoll_pwait);
62cond_syscall(sys_semget); 64cond_syscall(sys_semget);
63cond_syscall(sys_semop); 65cond_syscall(sys_semop);
64cond_syscall(sys_semtimedop); 66cond_syscall(sys_semtimedop);
@@ -94,6 +96,7 @@ cond_syscall(sys_keyctl);
94cond_syscall(compat_sys_keyctl); 96cond_syscall(compat_sys_keyctl);
95cond_syscall(compat_sys_socketcall); 97cond_syscall(compat_sys_socketcall);
96cond_syscall(sys_inotify_init); 98cond_syscall(sys_inotify_init);
99cond_syscall(sys_inotify_init1);
97cond_syscall(sys_inotify_add_watch); 100cond_syscall(sys_inotify_add_watch);
98cond_syscall(sys_inotify_rm_watch); 101cond_syscall(sys_inotify_rm_watch);
99cond_syscall(sys_migrate_pages); 102cond_syscall(sys_migrate_pages);
@@ -154,6 +157,7 @@ cond_syscall(sys_ioprio_get);
154 157
155/* New file descriptors */ 158/* New file descriptors */
156cond_syscall(sys_signalfd); 159cond_syscall(sys_signalfd);
160cond_syscall(sys_signalfd4);
157cond_syscall(compat_sys_signalfd); 161cond_syscall(compat_sys_signalfd);
158cond_syscall(sys_timerfd_create); 162cond_syscall(sys_timerfd_create);
159cond_syscall(sys_timerfd_settime); 163cond_syscall(sys_timerfd_settime);
@@ -161,3 +165,4 @@ cond_syscall(sys_timerfd_gettime);
161cond_syscall(compat_sys_timerfd_settime); 165cond_syscall(compat_sys_timerfd_settime);
162cond_syscall(compat_sys_timerfd_gettime); 166cond_syscall(compat_sys_timerfd_gettime);
163cond_syscall(sys_eventfd); 167cond_syscall(sys_eventfd);
168cond_syscall(sys_eventfd2);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6b16e16428d8..1a8299d1fe59 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -43,6 +43,7 @@
43#include <linux/limits.h> 43#include <linux/limits.h>
44#include <linux/dcache.h> 44#include <linux/dcache.h>
45#include <linux/syscalls.h> 45#include <linux/syscalls.h>
46#include <linux/vmstat.h>
46#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
47#include <linux/acpi.h> 48#include <linux/acpi.h>
48#include <linux/reboot.h> 49#include <linux/reboot.h>
@@ -80,7 +81,6 @@ extern int sysctl_drop_caches;
80extern int percpu_pagelist_fraction; 81extern int percpu_pagelist_fraction;
81extern int compat_log; 82extern int compat_log;
82extern int maps_protect; 83extern int maps_protect;
83extern int sysctl_stat_interval;
84extern int latencytop_enabled; 84extern int latencytop_enabled;
85extern int sysctl_nr_open_min, sysctl_nr_open_max; 85extern int sysctl_nr_open_min, sysctl_nr_open_max;
86#ifdef CONFIG_RCU_TORTURE_TEST 86#ifdef CONFIG_RCU_TORTURE_TEST
@@ -88,12 +88,13 @@ extern int rcutorture_runnable;
88#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 88#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
89 89
90/* Constants used for minimum and maximum */ 90/* Constants used for minimum and maximum */
91#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) 91#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
92static int one = 1; 92static int one = 1;
93#endif 93#endif
94 94
95#ifdef CONFIG_DETECT_SOFTLOCKUP 95#ifdef CONFIG_DETECT_SOFTLOCKUP
96static int sixty = 60; 96static int sixty = 60;
97static int neg_one = -1;
97#endif 98#endif
98 99
99#ifdef CONFIG_MMU 100#ifdef CONFIG_MMU
@@ -110,7 +111,7 @@ static int min_percpu_pagelist_fract = 8;
110 111
111static int ngroups_max = NGROUPS_MAX; 112static int ngroups_max = NGROUPS_MAX;
112 113
113#ifdef CONFIG_KMOD 114#ifdef CONFIG_MODULES
114extern char modprobe_path[]; 115extern char modprobe_path[];
115#endif 116#endif
116#ifdef CONFIG_CHR_DEV_SG 117#ifdef CONFIG_CHR_DEV_SG
@@ -475,7 +476,7 @@ static struct ctl_table kern_table[] = {
475 .proc_handler = &ftrace_enable_sysctl, 476 .proc_handler = &ftrace_enable_sysctl,
476 }, 477 },
477#endif 478#endif
478#ifdef CONFIG_KMOD 479#ifdef CONFIG_MODULES
479 { 480 {
480 .ctl_name = KERN_MODPROBE, 481 .ctl_name = KERN_MODPROBE,
481 .procname = "modprobe", 482 .procname = "modprobe",
@@ -739,13 +740,24 @@ static struct ctl_table kern_table[] = {
739#ifdef CONFIG_DETECT_SOFTLOCKUP 740#ifdef CONFIG_DETECT_SOFTLOCKUP
740 { 741 {
741 .ctl_name = CTL_UNNUMBERED, 742 .ctl_name = CTL_UNNUMBERED,
743 .procname = "softlockup_panic",
744 .data = &softlockup_panic,
745 .maxlen = sizeof(int),
746 .mode = 0644,
747 .proc_handler = &proc_dointvec_minmax,
748 .strategy = &sysctl_intvec,
749 .extra1 = &zero,
750 .extra2 = &one,
751 },
752 {
753 .ctl_name = CTL_UNNUMBERED,
742 .procname = "softlockup_thresh", 754 .procname = "softlockup_thresh",
743 .data = &softlockup_thresh, 755 .data = &softlockup_thresh,
744 .maxlen = sizeof(unsigned long), 756 .maxlen = sizeof(int),
745 .mode = 0644, 757 .mode = 0644,
746 .proc_handler = &proc_doulongvec_minmax, 758 .proc_handler = &proc_dointvec_minmax,
747 .strategy = &sysctl_intvec, 759 .strategy = &sysctl_intvec,
748 .extra1 = &one, 760 .extra1 = &neg_one,
749 .extra2 = &sixty, 761 .extra2 = &sixty,
750 }, 762 },
751 { 763 {
@@ -947,7 +959,7 @@ static struct ctl_table vm_table[] = {
947#ifdef CONFIG_HUGETLB_PAGE 959#ifdef CONFIG_HUGETLB_PAGE
948 { 960 {
949 .procname = "nr_hugepages", 961 .procname = "nr_hugepages",
950 .data = &max_huge_pages, 962 .data = NULL,
951 .maxlen = sizeof(unsigned long), 963 .maxlen = sizeof(unsigned long),
952 .mode = 0644, 964 .mode = 0644,
953 .proc_handler = &hugetlb_sysctl_handler, 965 .proc_handler = &hugetlb_sysctl_handler,
@@ -973,10 +985,12 @@ static struct ctl_table vm_table[] = {
973 { 985 {
974 .ctl_name = CTL_UNNUMBERED, 986 .ctl_name = CTL_UNNUMBERED,
975 .procname = "nr_overcommit_hugepages", 987 .procname = "nr_overcommit_hugepages",
976 .data = &sysctl_overcommit_huge_pages, 988 .data = NULL,
977 .maxlen = sizeof(sysctl_overcommit_huge_pages), 989 .maxlen = sizeof(unsigned long),
978 .mode = 0644, 990 .mode = 0644,
979 .proc_handler = &hugetlb_overcommit_handler, 991 .proc_handler = &hugetlb_overcommit_handler,
992 .extra1 = (void *)&hugetlb_zero,
993 .extra2 = (void *)&hugetlb_infinity,
980 }, 994 },
981#endif 995#endif
982 { 996 {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4a23517169a6..06b17547f4e7 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -301,7 +301,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
301 return -EINVAL; 301 return -EINVAL;
302 302
303 if (isadd == REGISTER) { 303 if (isadd == REGISTER) {
304 for_each_cpu_mask(cpu, mask) { 304 for_each_cpu_mask_nr(cpu, mask) {
305 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, 305 s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
306 cpu_to_node(cpu)); 306 cpu_to_node(cpu));
307 if (!s) 307 if (!s)
@@ -320,7 +320,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
320 320
321 /* Deregister or cleanup */ 321 /* Deregister or cleanup */
322cleanup: 322cleanup:
323 for_each_cpu_mask(cpu, mask) { 323 for_each_cpu_mask_nr(cpu, mask) {
324 listeners = &per_cpu(listener_array, cpu); 324 listeners = &per_cpu(listener_array, cpu);
325 down_write(&listeners->sem); 325 down_write(&listeners->sem);
326 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 326 list_for_each_entry_safe(s, tmp, &listeners->list, list) {
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index dadde5361f32..093d4acf993b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -145,9 +145,9 @@ static void clocksource_watchdog(unsigned long data)
145 * Cycle through CPUs to check if the CPUs stay 145 * Cycle through CPUs to check if the CPUs stay
146 * synchronized to each other. 146 * synchronized to each other.
147 */ 147 */
148 int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map); 148 int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map);
149 149
150 if (next_cpu >= NR_CPUS) 150 if (next_cpu >= nr_cpu_ids)
151 next_cpu = first_cpu(cpu_online_map); 151 next_cpu = first_cpu(cpu_online_map);
152 watchdog_timer.expires += WATCHDOG_INTERVAL; 152 watchdog_timer.expires += WATCHDOG_INTERVAL;
153 add_timer_on(&watchdog_timer, next_cpu); 153 add_timer_on(&watchdog_timer, next_cpu);
@@ -376,7 +376,8 @@ void clocksource_unregister(struct clocksource *cs)
376 * Provides sysfs interface for listing current clocksource. 376 * Provides sysfs interface for listing current clocksource.
377 */ 377 */
378static ssize_t 378static ssize_t
379sysfs_show_current_clocksources(struct sys_device *dev, char *buf) 379sysfs_show_current_clocksources(struct sys_device *dev,
380 struct sysdev_attribute *attr, char *buf)
380{ 381{
381 ssize_t count = 0; 382 ssize_t count = 0;
382 383
@@ -397,6 +398,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
397 * clocksource selction. 398 * clocksource selction.
398 */ 399 */
399static ssize_t sysfs_override_clocksource(struct sys_device *dev, 400static ssize_t sysfs_override_clocksource(struct sys_device *dev,
401 struct sysdev_attribute *attr,
400 const char *buf, size_t count) 402 const char *buf, size_t count)
401{ 403{
402 struct clocksource *ovr = NULL; 404 struct clocksource *ovr = NULL;
@@ -449,7 +451,9 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
449 * Provides sysfs interface for listing registered clocksources 451 * Provides sysfs interface for listing registered clocksources
450 */ 452 */
451static ssize_t 453static ssize_t
452sysfs_show_available_clocksources(struct sys_device *dev, char *buf) 454sysfs_show_available_clocksources(struct sys_device *dev,
455 struct sysdev_attribute *attr,
456 char *buf)
453{ 457{
454 struct clocksource *src; 458 struct clocksource *src;
455 ssize_t count = 0; 459 ssize_t count = 0;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f48d0f09d32f..31463d370b94 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -399,8 +399,7 @@ again:
399 mask = CPU_MASK_NONE; 399 mask = CPU_MASK_NONE;
400 now = ktime_get(); 400 now = ktime_get();
401 /* Find all expired events */ 401 /* Find all expired events */
402 for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS; 402 for_each_cpu_mask_nr(cpu, tick_broadcast_oneshot_mask) {
403 cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
404 td = &per_cpu(tick_cpu_device, cpu); 403 td = &per_cpu(tick_cpu_device, cpu);
405 if (td->evtdev->next_event.tv64 <= now.tv64) 404 if (td->evtdev->next_event.tv64 <= now.tv64)
406 cpu_set(cpu, mask); 405 cpu_set(cpu, mask);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 4f3886562b8c..bf43284d6855 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -135,7 +135,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
135 */ 135 */
136static void tick_setup_device(struct tick_device *td, 136static void tick_setup_device(struct tick_device *td,
137 struct clock_event_device *newdev, int cpu, 137 struct clock_event_device *newdev, int cpu,
138 cpumask_t cpumask) 138 const cpumask_t *cpumask)
139{ 139{
140 ktime_t next_event; 140 ktime_t next_event;
141 void (*handler)(struct clock_event_device *) = NULL; 141 void (*handler)(struct clock_event_device *) = NULL;
@@ -169,8 +169,8 @@ static void tick_setup_device(struct tick_device *td,
169 * When the device is not per cpu, pin the interrupt to the 169 * When the device is not per cpu, pin the interrupt to the
170 * current cpu: 170 * current cpu:
171 */ 171 */
172 if (!cpus_equal(newdev->cpumask, cpumask)) 172 if (!cpus_equal(newdev->cpumask, *cpumask))
173 irq_set_affinity(newdev->irq, cpumask); 173 irq_set_affinity(newdev->irq, *cpumask);
174 174
175 /* 175 /*
176 * When global broadcasting is active, check if the current 176 * When global broadcasting is active, check if the current
@@ -196,20 +196,20 @@ static int tick_check_new_device(struct clock_event_device *newdev)
196 struct tick_device *td; 196 struct tick_device *td;
197 int cpu, ret = NOTIFY_OK; 197 int cpu, ret = NOTIFY_OK;
198 unsigned long flags; 198 unsigned long flags;
199 cpumask_t cpumask; 199 cpumask_of_cpu_ptr_declare(cpumask);
200 200
201 spin_lock_irqsave(&tick_device_lock, flags); 201 spin_lock_irqsave(&tick_device_lock, flags);
202 202
203 cpu = smp_processor_id(); 203 cpu = smp_processor_id();
204 cpumask_of_cpu_ptr_next(cpumask, cpu);
204 if (!cpu_isset(cpu, newdev->cpumask)) 205 if (!cpu_isset(cpu, newdev->cpumask))
205 goto out_bc; 206 goto out_bc;
206 207
207 td = &per_cpu(tick_cpu_device, cpu); 208 td = &per_cpu(tick_cpu_device, cpu);
208 curdev = td->evtdev; 209 curdev = td->evtdev;
209 cpumask = cpumask_of_cpu(cpu);
210 210
211 /* cpu local device ? */ 211 /* cpu local device ? */
212 if (!cpus_equal(newdev->cpumask, cpumask)) { 212 if (!cpus_equal(newdev->cpumask, *cpumask)) {
213 213
214 /* 214 /*
215 * If the cpu affinity of the device interrupt can not 215 * If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
222 * If we have a cpu local device already, do not replace it 222 * If we have a cpu local device already, do not replace it
223 * by a non cpu local device 223 * by a non cpu local device
224 */ 224 */
225 if (curdev && cpus_equal(curdev->cpumask, cpumask)) 225 if (curdev && cpus_equal(curdev->cpumask, *cpumask))
226 goto out_bc; 226 goto out_bc;
227 } 227 }
228 228
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index beef7ccdf842..825b4c00fe44 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void)
140 if (!ts->tick_stopped) 140 if (!ts->tick_stopped)
141 return; 141 return;
142 142
143 touch_softlockup_watchdog();
144
145 cpu_clear(cpu, nohz_cpu_mask); 143 cpu_clear(cpu, nohz_cpu_mask);
146 now = ktime_get(); 144 now = ktime_get();
147 ts->idle_waketime = now; 145 ts->idle_waketime = now;
@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void)
149 local_irq_save(flags); 147 local_irq_save(flags);
150 tick_do_update_jiffies64(now); 148 tick_do_update_jiffies64(now);
151 local_irq_restore(flags); 149 local_irq_restore(flags);
150
151 touch_softlockup_watchdog();
152} 152}
153 153
154void tick_nohz_stop_idle(int cpu) 154void tick_nohz_stop_idle(int cpu)
@@ -195,7 +195,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
195 * Called either from the idle loop or from irq_exit() when an idle period was 195 * Called either from the idle loop or from irq_exit() when an idle period was
196 * just interrupted by an interrupt which did not cause a reschedule. 196 * just interrupted by an interrupt which did not cause a reschedule.
197 */ 197 */
198void tick_nohz_stop_sched_tick(void) 198void tick_nohz_stop_sched_tick(int inidle)
199{ 199{
200 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; 200 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
201 struct tick_sched *ts; 201 struct tick_sched *ts;
@@ -224,6 +224,11 @@ void tick_nohz_stop_sched_tick(void)
224 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 224 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
225 goto end; 225 goto end;
226 226
227 if (!inidle && !ts->inidle)
228 goto end;
229
230 ts->inidle = 1;
231
227 if (need_resched()) 232 if (need_resched())
228 goto end; 233 goto end;
229 234
@@ -373,11 +378,14 @@ void tick_nohz_restart_sched_tick(void)
373 local_irq_disable(); 378 local_irq_disable();
374 tick_nohz_stop_idle(cpu); 379 tick_nohz_stop_idle(cpu);
375 380
376 if (!ts->tick_stopped) { 381 if (!ts->inidle || !ts->tick_stopped) {
382 ts->inidle = 0;
377 local_irq_enable(); 383 local_irq_enable();
378 return; 384 return;
379 } 385 }
380 386
387 ts->inidle = 0;
388
381 rcu_exit_nohz(); 389 rcu_exit_nohz();
382 390
383 /* Update jiffies first */ 391 /* Update jiffies first */
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 2301e1e7c606..63528086337c 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -213,7 +213,9 @@ static void start_stack_timers(void)
213 int cpu; 213 int cpu;
214 214
215 for_each_online_cpu(cpu) { 215 for_each_online_cpu(cpu) {
216 set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); 216 cpumask_of_cpu_ptr(new_mask, cpu);
217
218 set_cpus_allowed_ptr(current, new_mask);
217 start_stack_timer(cpu); 219 start_stack_timer(cpu);
218 } 220 }
219 set_cpus_allowed_ptr(current, &saved_mask); 221 set_cpus_allowed_ptr(current, &saved_mask);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ce7799540c91..6fd158b21026 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -140,7 +140,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
140 wake_up(&cwq->more_work); 140 wake_up(&cwq->more_work);
141} 141}
142 142
143/* Preempt must be disabled. */
144static void __queue_work(struct cpu_workqueue_struct *cwq, 143static void __queue_work(struct cpu_workqueue_struct *cwq,
145 struct work_struct *work) 144 struct work_struct *work)
146{ 145{
@@ -175,6 +174,31 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
175} 174}
176EXPORT_SYMBOL_GPL(queue_work); 175EXPORT_SYMBOL_GPL(queue_work);
177 176
177/**
178 * queue_work_on - queue work on specific cpu
179 * @cpu: CPU number to execute work on
180 * @wq: workqueue to use
181 * @work: work to queue
182 *
183 * Returns 0 if @work was already on a queue, non-zero otherwise.
184 *
185 * We queue the work to a specific CPU, the caller must ensure it
186 * can't go away.
187 */
188int
189queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
190{
191 int ret = 0;
192
193 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
194 BUG_ON(!list_empty(&work->entry));
195 __queue_work(wq_per_cpu(wq, cpu), work);
196 ret = 1;
197 }
198 return ret;
199}
200EXPORT_SYMBOL_GPL(queue_work_on);
201
178static void delayed_work_timer_fn(unsigned long __data) 202static void delayed_work_timer_fn(unsigned long __data)
179{ 203{
180 struct delayed_work *dwork = (struct delayed_work *)__data; 204 struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -397,7 +421,7 @@ void flush_workqueue(struct workqueue_struct *wq)
397 might_sleep(); 421 might_sleep();
398 lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_); 422 lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
399 lock_release(&wq->lockdep_map, 1, _THIS_IP_); 423 lock_release(&wq->lockdep_map, 1, _THIS_IP_);
400 for_each_cpu_mask(cpu, *cpu_map) 424 for_each_cpu_mask_nr(cpu, *cpu_map)
401 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); 425 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
402} 426}
403EXPORT_SYMBOL_GPL(flush_workqueue); 427EXPORT_SYMBOL_GPL(flush_workqueue);
@@ -477,7 +501,7 @@ static void wait_on_work(struct work_struct *work)
477 wq = cwq->wq; 501 wq = cwq->wq;
478 cpu_map = wq_cpu_map(wq); 502 cpu_map = wq_cpu_map(wq);
479 503
480 for_each_cpu_mask(cpu, *cpu_map) 504 for_each_cpu_mask_nr(cpu, *cpu_map)
481 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 505 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
482} 506}
483 507
@@ -553,6 +577,19 @@ int schedule_work(struct work_struct *work)
553} 577}
554EXPORT_SYMBOL(schedule_work); 578EXPORT_SYMBOL(schedule_work);
555 579
580/*
581 * schedule_work_on - put work task on a specific cpu
582 * @cpu: cpu to put the work task on
583 * @work: job to be done
584 *
585 * This puts a job on a specific cpu
586 */
587int schedule_work_on(int cpu, struct work_struct *work)
588{
589 return queue_work_on(cpu, keventd_wq, work);
590}
591EXPORT_SYMBOL(schedule_work_on);
592
556/** 593/**
557 * schedule_delayed_work - put work task in global workqueue after delay 594 * schedule_delayed_work - put work task in global workqueue after delay
558 * @dwork: job to be done 595 * @dwork: job to be done
@@ -813,7 +850,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
813 list_del(&wq->list); 850 list_del(&wq->list);
814 spin_unlock(&workqueue_lock); 851 spin_unlock(&workqueue_lock);
815 852
816 for_each_cpu_mask(cpu, *cpu_map) 853 for_each_cpu_mask_nr(cpu, *cpu_map)
817 cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu)); 854 cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
818 put_online_cpus(); 855 put_online_cpus();
819 856