diff options
Diffstat (limited to 'kernel')
96 files changed, 1247 insertions, 911 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 353d3fe8ba33..85cbfb31e73e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o | |||
107 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 107 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
108 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o | 108 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o |
109 | obj-$(CONFIG_PADATA) += padata.o | 109 | obj-$(CONFIG_PADATA) += padata.o |
110 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | ||
110 | 111 | ||
111 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 112 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
112 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 113 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 37b2bea170c8..e99dda04b126 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -607,7 +607,7 @@ void audit_trim_trees(void) | |||
607 | spin_lock(&hash_lock); | 607 | spin_lock(&hash_lock); |
608 | list_for_each_entry(node, &tree->chunks, list) { | 608 | list_for_each_entry(node, &tree->chunks, list) { |
609 | struct audit_chunk *chunk = find_chunk(node); | 609 | struct audit_chunk *chunk = find_chunk(node); |
610 | /* this could be NULL if the watch is dieing else where... */ | 610 | /* this could be NULL if the watch is dying else where... */ |
611 | struct inode *inode = chunk->mark.i.inode; | 611 | struct inode *inode = chunk->mark.i.inode; |
612 | node->index |= 1U<<31; | 612 | node->index |= 1U<<31; |
613 | if (iterate_mounts(compare_root, inode, root_mnt)) | 613 | if (iterate_mounts(compare_root, inode, root_mnt)) |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index f49a0318c2ed..b33513a08beb 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -1011,7 +1011,7 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, | |||
1011 | /* | 1011 | /* |
1012 | * to_send and len_sent accounting are very loose estimates. We aren't | 1012 | * to_send and len_sent accounting are very loose estimates. We aren't |
1013 | * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being | 1013 | * really worried about a hard cap to MAX_EXECVE_AUDIT_LEN so much as being |
1014 | * within about 500 bytes (next page boundry) | 1014 | * within about 500 bytes (next page boundary) |
1015 | * | 1015 | * |
1016 | * why snprintf? an int is up to 12 digits long. if we just assumed when | 1016 | * why snprintf? an int is up to 12 digits long. if we just assumed when |
1017 | * logging that a[%d]= was going to be 16 characters long we would be wasting | 1017 | * logging that a[%d]= was going to be 16 characters long we would be wasting |
diff --git a/kernel/bounds.c b/kernel/bounds.c index 98a51f26c136..0c9b862292b2 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c | |||
@@ -9,11 +9,13 @@ | |||
9 | #include <linux/page-flags.h> | 9 | #include <linux/page-flags.h> |
10 | #include <linux/mmzone.h> | 10 | #include <linux/mmzone.h> |
11 | #include <linux/kbuild.h> | 11 | #include <linux/kbuild.h> |
12 | #include <linux/page_cgroup.h> | ||
12 | 13 | ||
13 | void foo(void) | 14 | void foo(void) |
14 | { | 15 | { |
15 | /* The enum constants to put into include/generated/bounds.h */ | 16 | /* The enum constants to put into include/generated/bounds.h */ |
16 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); | 17 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); |
17 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); | 18 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); |
19 | DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS); | ||
18 | /* End of constants */ | 20 | /* End of constants */ |
19 | } | 21 | } |
diff --git a/kernel/capability.c b/kernel/capability.c index 9e9385f132c8..bf0c734d0c12 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/security.h> | 14 | #include <linux/security.h> |
15 | #include <linux/syscalls.h> | 15 | #include <linux/syscalls.h> |
16 | #include <linux/pid_namespace.h> | 16 | #include <linux/pid_namespace.h> |
17 | #include <linux/user_namespace.h> | ||
17 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
18 | 19 | ||
19 | /* | 20 | /* |
@@ -290,6 +291,60 @@ error: | |||
290 | } | 291 | } |
291 | 292 | ||
292 | /** | 293 | /** |
294 | * has_capability - Does a task have a capability in init_user_ns | ||
295 | * @t: The task in question | ||
296 | * @cap: The capability to be tested for | ||
297 | * | ||
298 | * Return true if the specified task has the given superior capability | ||
299 | * currently in effect to the initial user namespace, false if not. | ||
300 | * | ||
301 | * Note that this does not set PF_SUPERPRIV on the task. | ||
302 | */ | ||
303 | bool has_capability(struct task_struct *t, int cap) | ||
304 | { | ||
305 | int ret = security_real_capable(t, &init_user_ns, cap); | ||
306 | |||
307 | return (ret == 0); | ||
308 | } | ||
309 | |||
310 | /** | ||
311 | * has_capability - Does a task have a capability in a specific user ns | ||
312 | * @t: The task in question | ||
313 | * @ns: target user namespace | ||
314 | * @cap: The capability to be tested for | ||
315 | * | ||
316 | * Return true if the specified task has the given superior capability | ||
317 | * currently in effect to the specified user namespace, false if not. | ||
318 | * | ||
319 | * Note that this does not set PF_SUPERPRIV on the task. | ||
320 | */ | ||
321 | bool has_ns_capability(struct task_struct *t, | ||
322 | struct user_namespace *ns, int cap) | ||
323 | { | ||
324 | int ret = security_real_capable(t, ns, cap); | ||
325 | |||
326 | return (ret == 0); | ||
327 | } | ||
328 | |||
329 | /** | ||
330 | * has_capability_noaudit - Does a task have a capability (unaudited) | ||
331 | * @t: The task in question | ||
332 | * @cap: The capability to be tested for | ||
333 | * | ||
334 | * Return true if the specified task has the given superior capability | ||
335 | * currently in effect to init_user_ns, false if not. Don't write an | ||
336 | * audit message for the check. | ||
337 | * | ||
338 | * Note that this does not set PF_SUPERPRIV on the task. | ||
339 | */ | ||
340 | bool has_capability_noaudit(struct task_struct *t, int cap) | ||
341 | { | ||
342 | int ret = security_real_capable_noaudit(t, &init_user_ns, cap); | ||
343 | |||
344 | return (ret == 0); | ||
345 | } | ||
346 | |||
347 | /** | ||
293 | * capable - Determine if the current task has a superior capability in effect | 348 | * capable - Determine if the current task has a superior capability in effect |
294 | * @cap: The capability to be tested for | 349 | * @cap: The capability to be tested for |
295 | * | 350 | * |
@@ -299,17 +354,48 @@ error: | |||
299 | * This sets PF_SUPERPRIV on the task if the capability is available on the | 354 | * This sets PF_SUPERPRIV on the task if the capability is available on the |
300 | * assumption that it's about to be used. | 355 | * assumption that it's about to be used. |
301 | */ | 356 | */ |
302 | int capable(int cap) | 357 | bool capable(int cap) |
358 | { | ||
359 | return ns_capable(&init_user_ns, cap); | ||
360 | } | ||
361 | EXPORT_SYMBOL(capable); | ||
362 | |||
363 | /** | ||
364 | * ns_capable - Determine if the current task has a superior capability in effect | ||
365 | * @ns: The usernamespace we want the capability in | ||
366 | * @cap: The capability to be tested for | ||
367 | * | ||
368 | * Return true if the current task has the given superior capability currently | ||
369 | * available for use, false if not. | ||
370 | * | ||
371 | * This sets PF_SUPERPRIV on the task if the capability is available on the | ||
372 | * assumption that it's about to be used. | ||
373 | */ | ||
374 | bool ns_capable(struct user_namespace *ns, int cap) | ||
303 | { | 375 | { |
304 | if (unlikely(!cap_valid(cap))) { | 376 | if (unlikely(!cap_valid(cap))) { |
305 | printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); | 377 | printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); |
306 | BUG(); | 378 | BUG(); |
307 | } | 379 | } |
308 | 380 | ||
309 | if (security_capable(current_cred(), cap) == 0) { | 381 | if (security_capable(ns, current_cred(), cap) == 0) { |
310 | current->flags |= PF_SUPERPRIV; | 382 | current->flags |= PF_SUPERPRIV; |
311 | return 1; | 383 | return true; |
312 | } | 384 | } |
313 | return 0; | 385 | return false; |
314 | } | 386 | } |
315 | EXPORT_SYMBOL(capable); | 387 | EXPORT_SYMBOL(ns_capable); |
388 | |||
389 | /** | ||
390 | * task_ns_capable - Determine whether current task has a superior | ||
391 | * capability targeted at a specific task's user namespace. | ||
392 | * @t: The task whose user namespace is targeted. | ||
393 | * @cap: The capability in question. | ||
394 | * | ||
395 | * Return true if it does, false otherwise. | ||
396 | */ | ||
397 | bool task_ns_capable(struct task_struct *t, int cap) | ||
398 | { | ||
399 | return ns_capable(task_cred_xxx(t, user)->user_ns, cap); | ||
400 | } | ||
401 | EXPORT_SYMBOL(task_ns_capable); | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 95362d15128c..25c7eb52de1a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -157,7 +157,7 @@ struct css_id { | |||
157 | }; | 157 | }; |
158 | 158 | ||
159 | /* | 159 | /* |
160 | * cgroup_event represents events which userspace want to recieve. | 160 | * cgroup_event represents events which userspace want to receive. |
161 | */ | 161 | */ |
162 | struct cgroup_event { | 162 | struct cgroup_event { |
163 | /* | 163 | /* |
@@ -1813,10 +1813,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1813 | 1813 | ||
1814 | /* Update the css_set linked lists if we're using them */ | 1814 | /* Update the css_set linked lists if we're using them */ |
1815 | write_lock(&css_set_lock); | 1815 | write_lock(&css_set_lock); |
1816 | if (!list_empty(&tsk->cg_list)) { | 1816 | if (!list_empty(&tsk->cg_list)) |
1817 | list_del(&tsk->cg_list); | 1817 | list_move(&tsk->cg_list, &newcg->tasks); |
1818 | list_add(&tsk->cg_list, &newcg->tasks); | ||
1819 | } | ||
1820 | write_unlock(&css_set_lock); | 1818 | write_unlock(&css_set_lock); |
1821 | 1819 | ||
1822 | for_each_subsys(root, ss) { | 1820 | for_each_subsys(root, ss) { |
@@ -3655,12 +3653,12 @@ again: | |||
3655 | spin_lock(&release_list_lock); | 3653 | spin_lock(&release_list_lock); |
3656 | set_bit(CGRP_REMOVED, &cgrp->flags); | 3654 | set_bit(CGRP_REMOVED, &cgrp->flags); |
3657 | if (!list_empty(&cgrp->release_list)) | 3655 | if (!list_empty(&cgrp->release_list)) |
3658 | list_del(&cgrp->release_list); | 3656 | list_del_init(&cgrp->release_list); |
3659 | spin_unlock(&release_list_lock); | 3657 | spin_unlock(&release_list_lock); |
3660 | 3658 | ||
3661 | cgroup_lock_hierarchy(cgrp->root); | 3659 | cgroup_lock_hierarchy(cgrp->root); |
3662 | /* delete this cgroup from parent->children */ | 3660 | /* delete this cgroup from parent->children */ |
3663 | list_del(&cgrp->sibling); | 3661 | list_del_init(&cgrp->sibling); |
3664 | cgroup_unlock_hierarchy(cgrp->root); | 3662 | cgroup_unlock_hierarchy(cgrp->root); |
3665 | 3663 | ||
3666 | d = dget(cgrp->dentry); | 3664 | d = dget(cgrp->dentry); |
@@ -3879,7 +3877,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
3879 | subsys[ss->subsys_id] = NULL; | 3877 | subsys[ss->subsys_id] = NULL; |
3880 | 3878 | ||
3881 | /* remove subsystem from rootnode's list of subsystems */ | 3879 | /* remove subsystem from rootnode's list of subsystems */ |
3882 | list_del(&ss->sibling); | 3880 | list_del_init(&ss->sibling); |
3883 | 3881 | ||
3884 | /* | 3882 | /* |
3885 | * disentangle the css from all css_sets attached to the dummytop. as | 3883 | * disentangle the css from all css_sets attached to the dummytop. as |
@@ -4241,7 +4239,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4241 | if (!list_empty(&tsk->cg_list)) { | 4239 | if (!list_empty(&tsk->cg_list)) { |
4242 | write_lock(&css_set_lock); | 4240 | write_lock(&css_set_lock); |
4243 | if (!list_empty(&tsk->cg_list)) | 4241 | if (!list_empty(&tsk->cg_list)) |
4244 | list_del(&tsk->cg_list); | 4242 | list_del_init(&tsk->cg_list); |
4245 | write_unlock(&css_set_lock); | 4243 | write_unlock(&css_set_lock); |
4246 | } | 4244 | } |
4247 | 4245 | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index 156cc5556140..12b7458f23b1 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -126,7 +126,7 @@ static void cpu_hotplug_done(void) | |||
126 | #else /* #if CONFIG_HOTPLUG_CPU */ | 126 | #else /* #if CONFIG_HOTPLUG_CPU */ |
127 | static void cpu_hotplug_begin(void) {} | 127 | static void cpu_hotplug_begin(void) {} |
128 | static void cpu_hotplug_done(void) {} | 128 | static void cpu_hotplug_done(void) {} |
129 | #endif /* #esle #if CONFIG_HOTPLUG_CPU */ | 129 | #endif /* #else #if CONFIG_HOTPLUG_CPU */ |
130 | 130 | ||
131 | /* Need to know about CPUs going up/down? */ | 131 | /* Need to know about CPUs going up/down? */ |
132 | int __ref register_cpu_notifier(struct notifier_block *nb) | 132 | int __ref register_cpu_notifier(struct notifier_block *nb) |
@@ -160,7 +160,6 @@ static void cpu_notify_nofail(unsigned long val, void *v) | |||
160 | { | 160 | { |
161 | BUG_ON(cpu_notify(val, v)); | 161 | BUG_ON(cpu_notify(val, v)); |
162 | } | 162 | } |
163 | |||
164 | EXPORT_SYMBOL(register_cpu_notifier); | 163 | EXPORT_SYMBOL(register_cpu_notifier); |
165 | 164 | ||
166 | void __ref unregister_cpu_notifier(struct notifier_block *nb) | 165 | void __ref unregister_cpu_notifier(struct notifier_block *nb) |
@@ -205,7 +204,6 @@ static int __ref take_cpu_down(void *_param) | |||
205 | return err; | 204 | return err; |
206 | 205 | ||
207 | cpu_notify(CPU_DYING | param->mod, param->hcpu); | 206 | cpu_notify(CPU_DYING | param->mod, param->hcpu); |
208 | |||
209 | return 0; | 207 | return 0; |
210 | } | 208 | } |
211 | 209 | ||
@@ -227,6 +225,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
227 | return -EINVAL; | 225 | return -EINVAL; |
228 | 226 | ||
229 | cpu_hotplug_begin(); | 227 | cpu_hotplug_begin(); |
228 | |||
230 | err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); | 229 | err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); |
231 | if (err) { | 230 | if (err) { |
232 | nr_calls--; | 231 | nr_calls--; |
@@ -304,7 +303,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) | |||
304 | ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); | 303 | ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); |
305 | if (ret) { | 304 | if (ret) { |
306 | nr_calls--; | 305 | nr_calls--; |
307 | printk("%s: attempt to bring up CPU %u failed\n", | 306 | printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", |
308 | __func__, cpu); | 307 | __func__, cpu); |
309 | goto out_notify; | 308 | goto out_notify; |
310 | } | 309 | } |
@@ -450,14 +449,14 @@ void __ref enable_nonboot_cpus(void) | |||
450 | if (cpumask_empty(frozen_cpus)) | 449 | if (cpumask_empty(frozen_cpus)) |
451 | goto out; | 450 | goto out; |
452 | 451 | ||
453 | printk("Enabling non-boot CPUs ...\n"); | 452 | printk(KERN_INFO "Enabling non-boot CPUs ...\n"); |
454 | 453 | ||
455 | arch_enable_nonboot_cpus_begin(); | 454 | arch_enable_nonboot_cpus_begin(); |
456 | 455 | ||
457 | for_each_cpu(cpu, frozen_cpus) { | 456 | for_each_cpu(cpu, frozen_cpus) { |
458 | error = _cpu_up(cpu, 1); | 457 | error = _cpu_up(cpu, 1); |
459 | if (!error) { | 458 | if (!error) { |
460 | printk("CPU%d is up\n", cpu); | 459 | printk(KERN_INFO "CPU%d is up\n", cpu); |
461 | continue; | 460 | continue; |
462 | } | 461 | } |
463 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); | 462 | printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); |
@@ -509,7 +508,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu) | |||
509 | */ | 508 | */ |
510 | 509 | ||
511 | /* cpu_bit_bitmap[0] is empty - so we can back into it */ | 510 | /* cpu_bit_bitmap[0] is empty - so we can back into it */ |
512 | #define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x) | 511 | #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x)) |
513 | #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) | 512 | #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) |
514 | #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) | 513 | #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) |
515 | #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) | 514 | #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e92e98189032..33eee16addb8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
1015 | struct cpuset *cs; | 1015 | struct cpuset *cs; |
1016 | int migrate; | 1016 | int migrate; |
1017 | const nodemask_t *oldmem = scan->data; | 1017 | const nodemask_t *oldmem = scan->data; |
1018 | NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); | 1018 | static nodemask_t newmems; /* protected by cgroup_mutex */ |
1019 | |||
1020 | if (!newmems) | ||
1021 | return; | ||
1022 | 1019 | ||
1023 | cs = cgroup_cs(scan->cg); | 1020 | cs = cgroup_cs(scan->cg); |
1024 | guarantee_online_mems(cs, newmems); | 1021 | guarantee_online_mems(cs, &newmems); |
1025 | |||
1026 | cpuset_change_task_nodemask(p, newmems); | ||
1027 | 1022 | ||
1028 | NODEMASK_FREE(newmems); | 1023 | cpuset_change_task_nodemask(p, &newmems); |
1029 | 1024 | ||
1030 | mm = get_task_mm(p); | 1025 | mm = get_task_mm(p); |
1031 | if (!mm) | 1026 | if (!mm) |
@@ -1438,44 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont, | |||
1438 | struct mm_struct *mm; | 1433 | struct mm_struct *mm; |
1439 | struct cpuset *cs = cgroup_cs(cont); | 1434 | struct cpuset *cs = cgroup_cs(cont); |
1440 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1435 | struct cpuset *oldcs = cgroup_cs(oldcont); |
1441 | NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); | 1436 | static nodemask_t to; /* protected by cgroup_mutex */ |
1442 | NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL); | ||
1443 | |||
1444 | if (from == NULL || to == NULL) | ||
1445 | goto alloc_fail; | ||
1446 | 1437 | ||
1447 | if (cs == &top_cpuset) { | 1438 | if (cs == &top_cpuset) { |
1448 | cpumask_copy(cpus_attach, cpu_possible_mask); | 1439 | cpumask_copy(cpus_attach, cpu_possible_mask); |
1449 | } else { | 1440 | } else { |
1450 | guarantee_online_cpus(cs, cpus_attach); | 1441 | guarantee_online_cpus(cs, cpus_attach); |
1451 | } | 1442 | } |
1452 | guarantee_online_mems(cs, to); | 1443 | guarantee_online_mems(cs, &to); |
1453 | 1444 | ||
1454 | /* do per-task migration stuff possibly for each in the threadgroup */ | 1445 | /* do per-task migration stuff possibly for each in the threadgroup */ |
1455 | cpuset_attach_task(tsk, to, cs); | 1446 | cpuset_attach_task(tsk, &to, cs); |
1456 | if (threadgroup) { | 1447 | if (threadgroup) { |
1457 | struct task_struct *c; | 1448 | struct task_struct *c; |
1458 | rcu_read_lock(); | 1449 | rcu_read_lock(); |
1459 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | 1450 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { |
1460 | cpuset_attach_task(c, to, cs); | 1451 | cpuset_attach_task(c, &to, cs); |
1461 | } | 1452 | } |
1462 | rcu_read_unlock(); | 1453 | rcu_read_unlock(); |
1463 | } | 1454 | } |
1464 | 1455 | ||
1465 | /* change mm; only needs to be done once even if threadgroup */ | 1456 | /* change mm; only needs to be done once even if threadgroup */ |
1466 | *from = oldcs->mems_allowed; | 1457 | to = cs->mems_allowed; |
1467 | *to = cs->mems_allowed; | ||
1468 | mm = get_task_mm(tsk); | 1458 | mm = get_task_mm(tsk); |
1469 | if (mm) { | 1459 | if (mm) { |
1470 | mpol_rebind_mm(mm, to); | 1460 | mpol_rebind_mm(mm, &to); |
1471 | if (is_memory_migrate(cs)) | 1461 | if (is_memory_migrate(cs)) |
1472 | cpuset_migrate_mm(mm, from, to); | 1462 | cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to); |
1473 | mmput(mm); | 1463 | mmput(mm); |
1474 | } | 1464 | } |
1475 | |||
1476 | alloc_fail: | ||
1477 | NODEMASK_FREE(from); | ||
1478 | NODEMASK_FREE(to); | ||
1479 | } | 1465 | } |
1480 | 1466 | ||
1481 | /* The various types of files and directories in a cpuset file system */ | 1467 | /* The various types of files and directories in a cpuset file system */ |
@@ -1610,34 +1596,26 @@ out: | |||
1610 | * across a page fault. | 1596 | * across a page fault. |
1611 | */ | 1597 | */ |
1612 | 1598 | ||
1613 | static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) | 1599 | static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs) |
1614 | { | 1600 | { |
1615 | int ret; | 1601 | size_t count; |
1616 | 1602 | ||
1617 | mutex_lock(&callback_mutex); | 1603 | mutex_lock(&callback_mutex); |
1618 | ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); | 1604 | count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); |
1619 | mutex_unlock(&callback_mutex); | 1605 | mutex_unlock(&callback_mutex); |
1620 | 1606 | ||
1621 | return ret; | 1607 | return count; |
1622 | } | 1608 | } |
1623 | 1609 | ||
1624 | static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) | 1610 | static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) |
1625 | { | 1611 | { |
1626 | NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); | 1612 | size_t count; |
1627 | int retval; | ||
1628 | |||
1629 | if (mask == NULL) | ||
1630 | return -ENOMEM; | ||
1631 | 1613 | ||
1632 | mutex_lock(&callback_mutex); | 1614 | mutex_lock(&callback_mutex); |
1633 | *mask = cs->mems_allowed; | 1615 | count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed); |
1634 | mutex_unlock(&callback_mutex); | 1616 | mutex_unlock(&callback_mutex); |
1635 | 1617 | ||
1636 | retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); | 1618 | return count; |
1637 | |||
1638 | NODEMASK_FREE(mask); | ||
1639 | |||
1640 | return retval; | ||
1641 | } | 1619 | } |
1642 | 1620 | ||
1643 | static ssize_t cpuset_common_file_read(struct cgroup *cont, | 1621 | static ssize_t cpuset_common_file_read(struct cgroup *cont, |
@@ -1862,8 +1840,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
1862 | cs = cgroup_cs(cgroup); | 1840 | cs = cgroup_cs(cgroup); |
1863 | parent_cs = cgroup_cs(parent); | 1841 | parent_cs = cgroup_cs(parent); |
1864 | 1842 | ||
1843 | mutex_lock(&callback_mutex); | ||
1865 | cs->mems_allowed = parent_cs->mems_allowed; | 1844 | cs->mems_allowed = parent_cs->mems_allowed; |
1866 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); | 1845 | cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); |
1846 | mutex_unlock(&callback_mutex); | ||
1867 | return; | 1847 | return; |
1868 | } | 1848 | } |
1869 | 1849 | ||
@@ -2066,10 +2046,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2066 | struct cpuset *cp; /* scans cpusets being updated */ | 2046 | struct cpuset *cp; /* scans cpusets being updated */ |
2067 | struct cpuset *child; /* scans child cpusets of cp */ | 2047 | struct cpuset *child; /* scans child cpusets of cp */ |
2068 | struct cgroup *cont; | 2048 | struct cgroup *cont; |
2069 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); | 2049 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
2070 | |||
2071 | if (oldmems == NULL) | ||
2072 | return; | ||
2073 | 2050 | ||
2074 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 2051 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
2075 | 2052 | ||
@@ -2086,7 +2063,7 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2086 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 2063 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
2087 | continue; | 2064 | continue; |
2088 | 2065 | ||
2089 | *oldmems = cp->mems_allowed; | 2066 | oldmems = cp->mems_allowed; |
2090 | 2067 | ||
2091 | /* Remove offline cpus and mems from this cpuset. */ | 2068 | /* Remove offline cpus and mems from this cpuset. */ |
2092 | mutex_lock(&callback_mutex); | 2069 | mutex_lock(&callback_mutex); |
@@ -2102,10 +2079,9 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
2102 | remove_tasks_in_empty_cpuset(cp); | 2079 | remove_tasks_in_empty_cpuset(cp); |
2103 | else { | 2080 | else { |
2104 | update_tasks_cpumask(cp, NULL); | 2081 | update_tasks_cpumask(cp, NULL); |
2105 | update_tasks_nodemask(cp, oldmems, NULL); | 2082 | update_tasks_nodemask(cp, &oldmems, NULL); |
2106 | } | 2083 | } |
2107 | } | 2084 | } |
2108 | NODEMASK_FREE(oldmems); | ||
2109 | } | 2085 | } |
2110 | 2086 | ||
2111 | /* | 2087 | /* |
@@ -2147,19 +2123,16 @@ void cpuset_update_active_cpus(void) | |||
2147 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2123 | static int cpuset_track_online_nodes(struct notifier_block *self, |
2148 | unsigned long action, void *arg) | 2124 | unsigned long action, void *arg) |
2149 | { | 2125 | { |
2150 | NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); | 2126 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
2151 | |||
2152 | if (oldmems == NULL) | ||
2153 | return NOTIFY_DONE; | ||
2154 | 2127 | ||
2155 | cgroup_lock(); | 2128 | cgroup_lock(); |
2156 | switch (action) { | 2129 | switch (action) { |
2157 | case MEM_ONLINE: | 2130 | case MEM_ONLINE: |
2158 | *oldmems = top_cpuset.mems_allowed; | 2131 | oldmems = top_cpuset.mems_allowed; |
2159 | mutex_lock(&callback_mutex); | 2132 | mutex_lock(&callback_mutex); |
2160 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 2133 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
2161 | mutex_unlock(&callback_mutex); | 2134 | mutex_unlock(&callback_mutex); |
2162 | update_tasks_nodemask(&top_cpuset, oldmems, NULL); | 2135 | update_tasks_nodemask(&top_cpuset, &oldmems, NULL); |
2163 | break; | 2136 | break; |
2164 | case MEM_OFFLINE: | 2137 | case MEM_OFFLINE: |
2165 | /* | 2138 | /* |
@@ -2173,7 +2146,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
2173 | } | 2146 | } |
2174 | cgroup_unlock(); | 2147 | cgroup_unlock(); |
2175 | 2148 | ||
2176 | NODEMASK_FREE(oldmems); | ||
2177 | return NOTIFY_OK; | 2149 | return NOTIFY_OK; |
2178 | } | 2150 | } |
2179 | #endif | 2151 | #endif |
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c new file mode 100644 index 000000000000..5f85690285d4 --- /dev/null +++ b/kernel/crash_dump.c | |||
@@ -0,0 +1,34 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/crash_dump.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/errno.h> | ||
5 | #include <linux/module.h> | ||
6 | |||
7 | /* | ||
8 | * If we have booted due to a crash, max_pfn will be a very low value. We need | ||
9 | * to know the amount of memory that the previous kernel used. | ||
10 | */ | ||
11 | unsigned long saved_max_pfn; | ||
12 | |||
13 | /* | ||
14 | * stores the physical address of elf header of crash image | ||
15 | * | ||
16 | * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by | ||
17 | * is_kdump_kernel() to determine if we are booting after a panic. Hence put | ||
18 | * it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. | ||
19 | */ | ||
20 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | ||
21 | |||
22 | /* | ||
23 | * elfcorehdr= specifies the location of elf core header stored by the crashed | ||
24 | * kernel. This option will be passed by kexec loader to the capture kernel. | ||
25 | */ | ||
26 | static int __init setup_elfcorehdr(char *arg) | ||
27 | { | ||
28 | char *end; | ||
29 | if (!arg) | ||
30 | return -EINVAL; | ||
31 | elfcorehdr_addr = memparse(arg, &end); | ||
32 | return end > arg ? 0 : -EINVAL; | ||
33 | } | ||
34 | early_param("elfcorehdr", setup_elfcorehdr); | ||
diff --git a/kernel/cred.c b/kernel/cred.c index 2343c132c5a7..5557b55048df 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -741,6 +741,12 @@ int set_create_files_as(struct cred *new, struct inode *inode) | |||
741 | } | 741 | } |
742 | EXPORT_SYMBOL(set_create_files_as); | 742 | EXPORT_SYMBOL(set_create_files_as); |
743 | 743 | ||
744 | struct user_namespace *current_user_ns(void) | ||
745 | { | ||
746 | return _current_user_ns(); | ||
747 | } | ||
748 | EXPORT_SYMBOL(current_user_ns); | ||
749 | |||
744 | #ifdef CONFIG_DEBUG_CREDENTIALS | 750 | #ifdef CONFIG_DEBUG_CREDENTIALS |
745 | 751 | ||
746 | bool creds_are_invalid(const struct cred *cred) | 752 | bool creds_are_invalid(const struct cred *cred) |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index cefd4a11f6d9..bad6786dee88 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
@@ -538,7 +538,7 @@ return_normal: | |||
538 | 538 | ||
539 | /* | 539 | /* |
540 | * For single stepping, try to only enter on the processor | 540 | * For single stepping, try to only enter on the processor |
541 | * that was single stepping. To gaurd against a deadlock, the | 541 | * that was single stepping. To guard against a deadlock, the |
542 | * kernel will only try for the value of sstep_tries before | 542 | * kernel will only try for the value of sstep_tries before |
543 | * giving up and continuing on. | 543 | * giving up and continuing on. |
544 | */ | 544 | */ |
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 481a7bd2dfe7..a11db956dd62 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c | |||
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd) | |||
1093 | put_packet(remcom_out_buffer); | 1093 | put_packet(remcom_out_buffer); |
1094 | return 0; | 1094 | return 0; |
1095 | } | 1095 | } |
1096 | |||
1097 | /** | ||
1098 | * gdbstub_exit - Send an exit message to GDB | ||
1099 | * @status: The exit code to report. | ||
1100 | */ | ||
1101 | void gdbstub_exit(int status) | ||
1102 | { | ||
1103 | unsigned char checksum, ch, buffer[3]; | ||
1104 | int loop; | ||
1105 | |||
1106 | buffer[0] = 'W'; | ||
1107 | buffer[1] = hex_asc_hi(status); | ||
1108 | buffer[2] = hex_asc_lo(status); | ||
1109 | |||
1110 | dbg_io_ops->write_char('$'); | ||
1111 | checksum = 0; | ||
1112 | |||
1113 | for (loop = 0; loop < 3; loop++) { | ||
1114 | ch = buffer[loop]; | ||
1115 | checksum += ch; | ||
1116 | dbg_io_ops->write_char(ch); | ||
1117 | } | ||
1118 | |||
1119 | dbg_io_ops->write_char('#'); | ||
1120 | dbg_io_ops->write_char(hex_asc_hi(checksum)); | ||
1121 | dbg_io_ops->write_char(hex_asc_lo(checksum)); | ||
1122 | |||
1123 | /* make sure the output is flushed, lest the bootloader clobber it */ | ||
1124 | dbg_io_ops->flush(); | ||
1125 | } | ||
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index bd3e8e29caa3..be14779bcef6 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -78,7 +78,7 @@ static unsigned int kdb_continue_catastrophic; | |||
78 | static kdbtab_t *kdb_commands; | 78 | static kdbtab_t *kdb_commands; |
79 | #define KDB_BASE_CMD_MAX 50 | 79 | #define KDB_BASE_CMD_MAX 50 |
80 | static int kdb_max_commands = KDB_BASE_CMD_MAX; | 80 | static int kdb_max_commands = KDB_BASE_CMD_MAX; |
81 | static kdbtab_t kdb_base_commands[50]; | 81 | static kdbtab_t kdb_base_commands[KDB_BASE_CMD_MAX]; |
82 | #define for_each_kdbcmd(cmd, num) \ | 82 | #define for_each_kdbcmd(cmd, num) \ |
83 | for ((cmd) = kdb_base_commands, (num) = 0; \ | 83 | for ((cmd) = kdb_base_commands, (num) = 0; \ |
84 | num < kdb_max_commands; \ | 84 | num < kdb_max_commands; \ |
@@ -441,9 +441,9 @@ static int kdb_check_regs(void) | |||
441 | * symbol name, and offset to the caller. | 441 | * symbol name, and offset to the caller. |
442 | * | 442 | * |
443 | * The argument may consist of a numeric value (decimal or | 443 | * The argument may consist of a numeric value (decimal or |
444 | * hexidecimal), a symbol name, a register name (preceeded by the | 444 | * hexidecimal), a symbol name, a register name (preceded by the |
445 | * percent sign), an environment variable with a numeric value | 445 | * percent sign), an environment variable with a numeric value |
446 | * (preceeded by a dollar sign) or a simple arithmetic expression | 446 | * (preceded by a dollar sign) or a simple arithmetic expression |
447 | * consisting of a symbol name, +/-, and a numeric constant value | 447 | * consisting of a symbol name, +/-, and a numeric constant value |
448 | * (offset). | 448 | * (offset). |
449 | * Parameters: | 449 | * Parameters: |
@@ -1335,7 +1335,7 @@ void kdb_print_state(const char *text, int value) | |||
1335 | * error The hardware-defined error code | 1335 | * error The hardware-defined error code |
1336 | * reason2 kdb's current reason code. | 1336 | * reason2 kdb's current reason code. |
1337 | * Initially error but can change | 1337 | * Initially error but can change |
1338 | * acording to kdb state. | 1338 | * according to kdb state. |
1339 | * db_result Result code from break or debug point. | 1339 | * db_result Result code from break or debug point. |
1340 | * regs The exception frame at time of fault/breakpoint. | 1340 | * regs The exception frame at time of fault/breakpoint. |
1341 | * should always be valid. | 1341 | * should always be valid. |
@@ -2892,7 +2892,7 @@ static void __init kdb_inittab(void) | |||
2892 | "Send a signal to a process", 0, KDB_REPEAT_NONE); | 2892 | "Send a signal to a process", 0, KDB_REPEAT_NONE); |
2893 | kdb_register_repeat("summary", kdb_summary, "", | 2893 | kdb_register_repeat("summary", kdb_summary, "", |
2894 | "Summarize the system", 4, KDB_REPEAT_NONE); | 2894 | "Summarize the system", 4, KDB_REPEAT_NONE); |
2895 | kdb_register_repeat("per_cpu", kdb_per_cpu, "", | 2895 | kdb_register_repeat("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]", |
2896 | "Display per_cpu variables", 3, KDB_REPEAT_NONE); | 2896 | "Display per_cpu variables", 3, KDB_REPEAT_NONE); |
2897 | kdb_register_repeat("grephelp", kdb_grep_help, "", | 2897 | kdb_register_repeat("grephelp", kdb_grep_help, "", |
2898 | "Display help on | grep", 0, KDB_REPEAT_NONE); | 2898 | "Display help on | grep", 0, KDB_REPEAT_NONE); |
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 6b2485dcb050..5532dd37aa86 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c | |||
@@ -545,7 +545,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) | |||
545 | * Mask for process state. | 545 | * Mask for process state. |
546 | * Notes: | 546 | * Notes: |
547 | * The mask folds data from several sources into a single long value, so | 547 | * The mask folds data from several sources into a single long value, so |
548 | * be carefull not to overlap the bits. TASK_* bits are in the LSB, | 548 | * be careful not to overlap the bits. TASK_* bits are in the LSB, |
549 | * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there | 549 | * special cases like UNRUNNABLE are in the MSB. As of 2.6.10-rc1 there |
550 | * is no overlap between TASK_* and EXIT_* but that may not always be | 550 | * is no overlap between TASK_* and EXIT_* but that may not always be |
551 | * true, so EXIT_* bits are shifted left 16 bits before being stored in | 551 | * true, so EXIT_* bits are shifted left 16 bits before being stored in |
diff --git a/kernel/exit.c b/kernel/exit.c index f9a45ebcc7b1..f5d2f63bae0b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -841,7 +841,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
841 | /* Let father know we died | 841 | /* Let father know we died |
842 | * | 842 | * |
843 | * Thread signals are configurable, but you aren't going to use | 843 | * Thread signals are configurable, but you aren't going to use |
844 | * that to send signals to arbitary processes. | 844 | * that to send signals to arbitrary processes. |
845 | * That stops right now. | 845 | * That stops right now. |
846 | * | 846 | * |
847 | * If the parent exec id doesn't match the exec id we saved | 847 | * If the parent exec id doesn't match the exec id we saved |
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code) | |||
908 | profile_task_exit(tsk); | 908 | profile_task_exit(tsk); |
909 | 909 | ||
910 | WARN_ON(atomic_read(&tsk->fs_excl)); | 910 | WARN_ON(atomic_read(&tsk->fs_excl)); |
911 | WARN_ON(blk_needs_flush_plug(tsk)); | ||
911 | 912 | ||
912 | if (unlikely(in_interrupt())) | 913 | if (unlikely(in_interrupt())) |
913 | panic("Aiee, killing interrupt handler!"); | 914 | panic("Aiee, killing interrupt handler!"); |
diff --git a/kernel/fork.c b/kernel/fork.c index 05b92c457010..e7548dee636b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/tracehook.h> | 40 | #include <linux/tracehook.h> |
41 | #include <linux/futex.h> | 41 | #include <linux/futex.h> |
42 | #include <linux/compat.h> | 42 | #include <linux/compat.h> |
43 | #include <linux/kthread.h> | ||
43 | #include <linux/task_io_accounting_ops.h> | 44 | #include <linux/task_io_accounting_ops.h> |
44 | #include <linux/rcupdate.h> | 45 | #include <linux/rcupdate.h> |
45 | #include <linux/ptrace.h> | 46 | #include <linux/ptrace.h> |
@@ -109,20 +110,25 @@ int nr_processes(void) | |||
109 | } | 110 | } |
110 | 111 | ||
111 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 112 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
112 | # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) | 113 | # define alloc_task_struct_node(node) \ |
113 | # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) | 114 | kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node) |
115 | # define free_task_struct(tsk) \ | ||
116 | kmem_cache_free(task_struct_cachep, (tsk)) | ||
114 | static struct kmem_cache *task_struct_cachep; | 117 | static struct kmem_cache *task_struct_cachep; |
115 | #endif | 118 | #endif |
116 | 119 | ||
117 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 120 | #ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
118 | static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) | 121 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, |
122 | int node) | ||
119 | { | 123 | { |
120 | #ifdef CONFIG_DEBUG_STACK_USAGE | 124 | #ifdef CONFIG_DEBUG_STACK_USAGE |
121 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; | 125 | gfp_t mask = GFP_KERNEL | __GFP_ZERO; |
122 | #else | 126 | #else |
123 | gfp_t mask = GFP_KERNEL; | 127 | gfp_t mask = GFP_KERNEL; |
124 | #endif | 128 | #endif |
125 | return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); | 129 | struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER); |
130 | |||
131 | return page ? page_address(page) : NULL; | ||
126 | } | 132 | } |
127 | 133 | ||
128 | static inline void free_thread_info(struct thread_info *ti) | 134 | static inline void free_thread_info(struct thread_info *ti) |
@@ -249,16 +255,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
249 | struct task_struct *tsk; | 255 | struct task_struct *tsk; |
250 | struct thread_info *ti; | 256 | struct thread_info *ti; |
251 | unsigned long *stackend; | 257 | unsigned long *stackend; |
252 | 258 | int node = tsk_fork_get_node(orig); | |
253 | int err; | 259 | int err; |
254 | 260 | ||
255 | prepare_to_copy(orig); | 261 | prepare_to_copy(orig); |
256 | 262 | ||
257 | tsk = alloc_task_struct(); | 263 | tsk = alloc_task_struct_node(node); |
258 | if (!tsk) | 264 | if (!tsk) |
259 | return NULL; | 265 | return NULL; |
260 | 266 | ||
261 | ti = alloc_thread_info(tsk); | 267 | ti = alloc_thread_info_node(tsk, node); |
262 | if (!ti) { | 268 | if (!ti) { |
263 | free_task_struct(tsk); | 269 | free_task_struct(tsk); |
264 | return NULL; | 270 | return NULL; |
@@ -1181,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1181 | pid = alloc_pid(p->nsproxy->pid_ns); | 1187 | pid = alloc_pid(p->nsproxy->pid_ns); |
1182 | if (!pid) | 1188 | if (!pid) |
1183 | goto bad_fork_cleanup_io; | 1189 | goto bad_fork_cleanup_io; |
1184 | |||
1185 | if (clone_flags & CLONE_NEWPID) { | ||
1186 | retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); | ||
1187 | if (retval < 0) | ||
1188 | goto bad_fork_free_pid; | ||
1189 | } | ||
1190 | } | 1190 | } |
1191 | 1191 | ||
1192 | p->pid = pid_nr(pid); | 1192 | p->pid = pid_nr(pid); |
@@ -1205,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1205 | * Clear TID on mm_release()? | 1205 | * Clear TID on mm_release()? |
1206 | */ | 1206 | */ |
1207 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; | 1207 | p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; |
1208 | #ifdef CONFIG_BLOCK | ||
1209 | p->plug = NULL; | ||
1210 | #endif | ||
1208 | #ifdef CONFIG_FUTEX | 1211 | #ifdef CONFIG_FUTEX |
1209 | p->robust_list = NULL; | 1212 | p->robust_list = NULL; |
1210 | #ifdef CONFIG_COMPAT | 1213 | #ifdef CONFIG_COMPAT |
@@ -1290,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1290 | tracehook_finish_clone(p, clone_flags, trace); | 1293 | tracehook_finish_clone(p, clone_flags, trace); |
1291 | 1294 | ||
1292 | if (thread_group_leader(p)) { | 1295 | if (thread_group_leader(p)) { |
1293 | if (clone_flags & CLONE_NEWPID) | 1296 | if (is_child_reaper(pid)) |
1294 | p->nsproxy->pid_ns->child_reaper = p; | 1297 | p->nsproxy->pid_ns->child_reaper = p; |
1295 | 1298 | ||
1296 | p->signal->leader_pid = pid; | 1299 | p->signal->leader_pid = pid; |
@@ -1513,38 +1516,24 @@ void __init proc_caches_init(void) | |||
1513 | } | 1516 | } |
1514 | 1517 | ||
1515 | /* | 1518 | /* |
1516 | * Check constraints on flags passed to the unshare system call and | 1519 | * Check constraints on flags passed to the unshare system call. |
1517 | * force unsharing of additional process context as appropriate. | ||
1518 | */ | 1520 | */ |
1519 | static void check_unshare_flags(unsigned long *flags_ptr) | 1521 | static int check_unshare_flags(unsigned long unshare_flags) |
1520 | { | 1522 | { |
1523 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
1524 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
1525 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
1526 | return -EINVAL; | ||
1521 | /* | 1527 | /* |
1522 | * If unsharing a thread from a thread group, must also | 1528 | * Not implemented, but pretend it works if there is nothing to |
1523 | * unshare vm. | 1529 | * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND |
1524 | */ | 1530 | * needs to unshare vm. |
1525 | if (*flags_ptr & CLONE_THREAD) | ||
1526 | *flags_ptr |= CLONE_VM; | ||
1527 | |||
1528 | /* | ||
1529 | * If unsharing vm, must also unshare signal handlers. | ||
1530 | */ | ||
1531 | if (*flags_ptr & CLONE_VM) | ||
1532 | *flags_ptr |= CLONE_SIGHAND; | ||
1533 | |||
1534 | /* | ||
1535 | * If unsharing namespace, must also unshare filesystem information. | ||
1536 | */ | 1531 | */ |
1537 | if (*flags_ptr & CLONE_NEWNS) | 1532 | if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { |
1538 | *flags_ptr |= CLONE_FS; | 1533 | /* FIXME: get_task_mm() increments ->mm_users */ |
1539 | } | 1534 | if (atomic_read(¤t->mm->mm_users) > 1) |
1540 | 1535 | return -EINVAL; | |
1541 | /* | 1536 | } |
1542 | * Unsharing of tasks created with CLONE_THREAD is not supported yet | ||
1543 | */ | ||
1544 | static int unshare_thread(unsigned long unshare_flags) | ||
1545 | { | ||
1546 | if (unshare_flags & CLONE_THREAD) | ||
1547 | return -EINVAL; | ||
1548 | 1537 | ||
1549 | return 0; | 1538 | return 0; |
1550 | } | 1539 | } |
@@ -1571,34 +1560,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | |||
1571 | } | 1560 | } |
1572 | 1561 | ||
1573 | /* | 1562 | /* |
1574 | * Unsharing of sighand is not supported yet | ||
1575 | */ | ||
1576 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | ||
1577 | { | ||
1578 | struct sighand_struct *sigh = current->sighand; | ||
1579 | |||
1580 | if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1) | ||
1581 | return -EINVAL; | ||
1582 | else | ||
1583 | return 0; | ||
1584 | } | ||
1585 | |||
1586 | /* | ||
1587 | * Unshare vm if it is being shared | ||
1588 | */ | ||
1589 | static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) | ||
1590 | { | ||
1591 | struct mm_struct *mm = current->mm; | ||
1592 | |||
1593 | if ((unshare_flags & CLONE_VM) && | ||
1594 | (mm && atomic_read(&mm->mm_users) > 1)) { | ||
1595 | return -EINVAL; | ||
1596 | } | ||
1597 | |||
1598 | return 0; | ||
1599 | } | ||
1600 | |||
1601 | /* | ||
1602 | * Unshare file descriptor table if it is being shared | 1563 | * Unshare file descriptor table if it is being shared |
1603 | */ | 1564 | */ |
1604 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) | 1565 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) |
@@ -1626,45 +1587,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp | |||
1626 | */ | 1587 | */ |
1627 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | 1588 | SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) |
1628 | { | 1589 | { |
1629 | int err = 0; | ||
1630 | struct fs_struct *fs, *new_fs = NULL; | 1590 | struct fs_struct *fs, *new_fs = NULL; |
1631 | struct sighand_struct *new_sigh = NULL; | ||
1632 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | ||
1633 | struct files_struct *fd, *new_fd = NULL; | 1591 | struct files_struct *fd, *new_fd = NULL; |
1634 | struct nsproxy *new_nsproxy = NULL; | 1592 | struct nsproxy *new_nsproxy = NULL; |
1635 | int do_sysvsem = 0; | 1593 | int do_sysvsem = 0; |
1594 | int err; | ||
1636 | 1595 | ||
1637 | check_unshare_flags(&unshare_flags); | 1596 | err = check_unshare_flags(unshare_flags); |
1638 | 1597 | if (err) | |
1639 | /* Return -EINVAL for all unsupported flags */ | ||
1640 | err = -EINVAL; | ||
1641 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | ||
1642 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | ||
1643 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) | ||
1644 | goto bad_unshare_out; | 1598 | goto bad_unshare_out; |
1645 | 1599 | ||
1646 | /* | 1600 | /* |
1601 | * If unsharing namespace, must also unshare filesystem information. | ||
1602 | */ | ||
1603 | if (unshare_flags & CLONE_NEWNS) | ||
1604 | unshare_flags |= CLONE_FS; | ||
1605 | /* | ||
1647 | * CLONE_NEWIPC must also detach from the undolist: after switching | 1606 | * CLONE_NEWIPC must also detach from the undolist: after switching |
1648 | * to a new ipc namespace, the semaphore arrays from the old | 1607 | * to a new ipc namespace, the semaphore arrays from the old |
1649 | * namespace are unreachable. | 1608 | * namespace are unreachable. |
1650 | */ | 1609 | */ |
1651 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) | 1610 | if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) |
1652 | do_sysvsem = 1; | 1611 | do_sysvsem = 1; |
1653 | if ((err = unshare_thread(unshare_flags))) | ||
1654 | goto bad_unshare_out; | ||
1655 | if ((err = unshare_fs(unshare_flags, &new_fs))) | 1612 | if ((err = unshare_fs(unshare_flags, &new_fs))) |
1656 | goto bad_unshare_cleanup_thread; | 1613 | goto bad_unshare_out; |
1657 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | ||
1658 | goto bad_unshare_cleanup_fs; | ||
1659 | if ((err = unshare_vm(unshare_flags, &new_mm))) | ||
1660 | goto bad_unshare_cleanup_sigh; | ||
1661 | if ((err = unshare_fd(unshare_flags, &new_fd))) | 1614 | if ((err = unshare_fd(unshare_flags, &new_fd))) |
1662 | goto bad_unshare_cleanup_vm; | 1615 | goto bad_unshare_cleanup_fs; |
1663 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, | 1616 | if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, |
1664 | new_fs))) | 1617 | new_fs))) |
1665 | goto bad_unshare_cleanup_fd; | 1618 | goto bad_unshare_cleanup_fd; |
1666 | 1619 | ||
1667 | if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { | 1620 | if (new_fs || new_fd || do_sysvsem || new_nsproxy) { |
1668 | if (do_sysvsem) { | 1621 | if (do_sysvsem) { |
1669 | /* | 1622 | /* |
1670 | * CLONE_SYSVSEM is equivalent to sys_exit(). | 1623 | * CLONE_SYSVSEM is equivalent to sys_exit(). |
@@ -1690,19 +1643,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1690 | spin_unlock(&fs->lock); | 1643 | spin_unlock(&fs->lock); |
1691 | } | 1644 | } |
1692 | 1645 | ||
1693 | if (new_mm) { | ||
1694 | mm = current->mm; | ||
1695 | active_mm = current->active_mm; | ||
1696 | current->mm = new_mm; | ||
1697 | current->active_mm = new_mm; | ||
1698 | if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
1699 | atomic_dec(&mm->oom_disable_count); | ||
1700 | atomic_inc(&new_mm->oom_disable_count); | ||
1701 | } | ||
1702 | activate_mm(active_mm, new_mm); | ||
1703 | new_mm = mm; | ||
1704 | } | ||
1705 | |||
1706 | if (new_fd) { | 1646 | if (new_fd) { |
1707 | fd = current->files; | 1647 | fd = current->files; |
1708 | current->files = new_fd; | 1648 | current->files = new_fd; |
@@ -1719,20 +1659,10 @@ bad_unshare_cleanup_fd: | |||
1719 | if (new_fd) | 1659 | if (new_fd) |
1720 | put_files_struct(new_fd); | 1660 | put_files_struct(new_fd); |
1721 | 1661 | ||
1722 | bad_unshare_cleanup_vm: | ||
1723 | if (new_mm) | ||
1724 | mmput(new_mm); | ||
1725 | |||
1726 | bad_unshare_cleanup_sigh: | ||
1727 | if (new_sigh) | ||
1728 | if (atomic_dec_and_test(&new_sigh->count)) | ||
1729 | kmem_cache_free(sighand_cachep, new_sigh); | ||
1730 | |||
1731 | bad_unshare_cleanup_fs: | 1662 | bad_unshare_cleanup_fs: |
1732 | if (new_fs) | 1663 | if (new_fs) |
1733 | free_fs_struct(new_fs); | 1664 | free_fs_struct(new_fs); |
1734 | 1665 | ||
1735 | bad_unshare_cleanup_thread: | ||
1736 | bad_unshare_out: | 1666 | bad_unshare_out: |
1737 | return err; | 1667 | return err; |
1738 | } | 1668 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index bda415715382..fe28dc282eae 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -782,8 +782,8 @@ static void __unqueue_futex(struct futex_q *q) | |||
782 | { | 782 | { |
783 | struct futex_hash_bucket *hb; | 783 | struct futex_hash_bucket *hb; |
784 | 784 | ||
785 | if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr) | 785 | if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr)) |
786 | || plist_node_empty(&q->list))) | 786 | || WARN_ON(plist_node_empty(&q->list))) |
787 | return; | 787 | return; |
788 | 788 | ||
789 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); | 789 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); |
@@ -1886,7 +1886,7 @@ retry: | |||
1886 | restart->futex.val = val; | 1886 | restart->futex.val = val; |
1887 | restart->futex.time = abs_time->tv64; | 1887 | restart->futex.time = abs_time->tv64; |
1888 | restart->futex.bitset = bitset; | 1888 | restart->futex.bitset = bitset; |
1889 | restart->futex.flags = flags; | 1889 | restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; |
1890 | 1890 | ||
1891 | ret = -ERESTART_RESTARTBLOCK; | 1891 | ret = -ERESTART_RESTARTBLOCK; |
1892 | 1892 | ||
@@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, | |||
2418 | goto err_unlock; | 2418 | goto err_unlock; |
2419 | ret = -EPERM; | 2419 | ret = -EPERM; |
2420 | pcred = __task_cred(p); | 2420 | pcred = __task_cred(p); |
2421 | /* If victim is in different user_ns, then uids are not | ||
2422 | comparable, so we must have CAP_SYS_PTRACE */ | ||
2423 | if (cred->user->user_ns != pcred->user->user_ns) { | ||
2424 | if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
2425 | goto err_unlock; | ||
2426 | goto ok; | ||
2427 | } | ||
2428 | /* If victim is in same user_ns, then uids are comparable */ | ||
2421 | if (cred->euid != pcred->euid && | 2429 | if (cred->euid != pcred->euid && |
2422 | cred->euid != pcred->uid && | 2430 | cred->euid != pcred->uid && |
2423 | !capable(CAP_SYS_PTRACE)) | 2431 | !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) |
2424 | goto err_unlock; | 2432 | goto err_unlock; |
2433 | ok: | ||
2425 | head = p->robust_list; | 2434 | head = p->robust_list; |
2426 | rcu_read_unlock(); | 2435 | rcu_read_unlock(); |
2427 | } | 2436 | } |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index a7934ac75e5b..5f9e689dc8f0 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, | |||
153 | goto err_unlock; | 153 | goto err_unlock; |
154 | ret = -EPERM; | 154 | ret = -EPERM; |
155 | pcred = __task_cred(p); | 155 | pcred = __task_cred(p); |
156 | /* If victim is in different user_ns, then uids are not | ||
157 | comparable, so we must have CAP_SYS_PTRACE */ | ||
158 | if (cred->user->user_ns != pcred->user->user_ns) { | ||
159 | if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
160 | goto err_unlock; | ||
161 | goto ok; | ||
162 | } | ||
163 | /* If victim is in same user_ns, then uids are comparable */ | ||
156 | if (cred->euid != pcred->euid && | 164 | if (cred->euid != pcred->euid && |
157 | cred->euid != pcred->uid && | 165 | cred->euid != pcred->uid && |
158 | !capable(CAP_SYS_PTRACE)) | 166 | !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) |
159 | goto err_unlock; | 167 | goto err_unlock; |
168 | ok: | ||
160 | head = p->compat_robust_list; | 169 | head = p->compat_robust_list; |
161 | rcu_read_unlock(); | 170 | rcu_read_unlock(); |
162 | } | 171 | } |
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index 3f761001d517..e97ca59e2520 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' | 1 | ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' |
2 | 2 | ||
3 | obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o | 3 | obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o |
diff --git a/kernel/groups.c b/kernel/groups.c index 253dc0f35cf4..1cc476d52dd3 100644 --- a/kernel/groups.c +++ b/kernel/groups.c | |||
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) | |||
233 | struct group_info *group_info; | 233 | struct group_info *group_info; |
234 | int retval; | 234 | int retval; |
235 | 235 | ||
236 | if (!capable(CAP_SETGID)) | 236 | if (!nsown_capable(CAP_SETGID)) |
237 | return -EPERM; | 237 | return -EPERM; |
238 | if ((unsigned)gidsetsize > NGROUPS_MAX) | 238 | if ((unsigned)gidsetsize > NGROUPS_MAX) |
239 | return -EINVAL; | 239 | return -EINVAL; |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 09bef82d74cb..c574f9a12c48 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig | |||
@@ -10,13 +10,6 @@ menu "IRQ subsystem" | |||
10 | config GENERIC_HARDIRQS | 10 | config GENERIC_HARDIRQS |
11 | def_bool y | 11 | def_bool y |
12 | 12 | ||
13 | # Select this to disable the deprecated stuff | ||
14 | config GENERIC_HARDIRQS_NO_DEPRECATED | ||
15 | bool | ||
16 | |||
17 | config GENERIC_HARDIRQS_NO_COMPAT | ||
18 | bool | ||
19 | |||
20 | # Options selectable by the architecture code | 13 | # Options selectable by the architecture code |
21 | 14 | ||
22 | # Make sparse irq Kconfig switch below available | 15 | # Make sparse irq Kconfig switch below available |
@@ -31,6 +24,10 @@ config GENERIC_IRQ_PROBE | |||
31 | config GENERIC_IRQ_SHOW | 24 | config GENERIC_IRQ_SHOW |
32 | bool | 25 | bool |
33 | 26 | ||
27 | # Print level/edge extra information | ||
28 | config GENERIC_IRQ_SHOW_LEVEL | ||
29 | bool | ||
30 | |||
34 | # Support for delayed migration from interrupt context | 31 | # Support for delayed migration from interrupt context |
35 | config GENERIC_PENDING_IRQ | 32 | config GENERIC_PENDING_IRQ |
36 | bool | 33 | bool |
@@ -47,6 +44,10 @@ config HARDIRQS_SW_RESEND | |||
47 | config IRQ_PREFLOW_FASTEOI | 44 | config IRQ_PREFLOW_FASTEOI |
48 | bool | 45 | bool |
49 | 46 | ||
47 | # Edge style eoi based handler (cell) | ||
48 | config IRQ_EDGE_EOI_HANDLER | ||
49 | bool | ||
50 | |||
50 | # Support forced irq threading | 51 | # Support forced irq threading |
51 | config IRQ_FORCED_THREADING | 52 | config IRQ_FORCED_THREADING |
52 | bool | 53 | bool |
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 394784c57060..342d8f44e401 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c | |||
@@ -70,10 +70,8 @@ unsigned long probe_irq_on(void) | |||
70 | raw_spin_lock_irq(&desc->lock); | 70 | raw_spin_lock_irq(&desc->lock); |
71 | if (!desc->action && irq_settings_can_probe(desc)) { | 71 | if (!desc->action && irq_settings_can_probe(desc)) { |
72 | desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; | 72 | desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; |
73 | if (irq_startup(desc)) { | 73 | if (irq_startup(desc)) |
74 | irq_compat_set_pending(desc); | ||
75 | desc->istate |= IRQS_PENDING; | 74 | desc->istate |= IRQS_PENDING; |
76 | } | ||
77 | } | 75 | } |
78 | raw_spin_unlock_irq(&desc->lock); | 76 | raw_spin_unlock_irq(&desc->lock); |
79 | } | 77 | } |
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c9c0601f0615..4af1e2b244cb 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -34,9 +34,14 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip) | |||
34 | if (!chip) | 34 | if (!chip) |
35 | chip = &no_irq_chip; | 35 | chip = &no_irq_chip; |
36 | 36 | ||
37 | irq_chip_set_defaults(chip); | ||
38 | desc->irq_data.chip = chip; | 37 | desc->irq_data.chip = chip; |
39 | irq_put_desc_unlock(desc, flags); | 38 | irq_put_desc_unlock(desc, flags); |
39 | /* | ||
40 | * For !CONFIG_SPARSE_IRQ make the irq show up in | ||
41 | * allocated_irqs. For the CONFIG_SPARSE_IRQ case, it is | ||
42 | * already marked, and this call is harmless. | ||
43 | */ | ||
44 | irq_reserve_irq(irq); | ||
40 | return 0; | 45 | return 0; |
41 | } | 46 | } |
42 | EXPORT_SYMBOL(irq_set_chip); | 47 | EXPORT_SYMBOL(irq_set_chip); |
@@ -134,26 +139,22 @@ EXPORT_SYMBOL_GPL(irq_get_irq_data); | |||
134 | 139 | ||
135 | static void irq_state_clr_disabled(struct irq_desc *desc) | 140 | static void irq_state_clr_disabled(struct irq_desc *desc) |
136 | { | 141 | { |
137 | desc->istate &= ~IRQS_DISABLED; | 142 | irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED); |
138 | irq_compat_clr_disabled(desc); | ||
139 | } | 143 | } |
140 | 144 | ||
141 | static void irq_state_set_disabled(struct irq_desc *desc) | 145 | static void irq_state_set_disabled(struct irq_desc *desc) |
142 | { | 146 | { |
143 | desc->istate |= IRQS_DISABLED; | 147 | irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); |
144 | irq_compat_set_disabled(desc); | ||
145 | } | 148 | } |
146 | 149 | ||
147 | static void irq_state_clr_masked(struct irq_desc *desc) | 150 | static void irq_state_clr_masked(struct irq_desc *desc) |
148 | { | 151 | { |
149 | desc->istate &= ~IRQS_MASKED; | 152 | irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED); |
150 | irq_compat_clr_masked(desc); | ||
151 | } | 153 | } |
152 | 154 | ||
153 | static void irq_state_set_masked(struct irq_desc *desc) | 155 | static void irq_state_set_masked(struct irq_desc *desc) |
154 | { | 156 | { |
155 | desc->istate |= IRQS_MASKED; | 157 | irqd_set(&desc->irq_data, IRQD_IRQ_MASKED); |
156 | irq_compat_set_masked(desc); | ||
157 | } | 158 | } |
158 | 159 | ||
159 | int irq_startup(struct irq_desc *desc) | 160 | int irq_startup(struct irq_desc *desc) |
@@ -203,126 +204,6 @@ void irq_disable(struct irq_desc *desc) | |||
203 | } | 204 | } |
204 | } | 205 | } |
205 | 206 | ||
206 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
207 | /* Temporary migration helpers */ | ||
208 | static void compat_irq_mask(struct irq_data *data) | ||
209 | { | ||
210 | data->chip->mask(data->irq); | ||
211 | } | ||
212 | |||
213 | static void compat_irq_unmask(struct irq_data *data) | ||
214 | { | ||
215 | data->chip->unmask(data->irq); | ||
216 | } | ||
217 | |||
218 | static void compat_irq_ack(struct irq_data *data) | ||
219 | { | ||
220 | data->chip->ack(data->irq); | ||
221 | } | ||
222 | |||
223 | static void compat_irq_mask_ack(struct irq_data *data) | ||
224 | { | ||
225 | data->chip->mask_ack(data->irq); | ||
226 | } | ||
227 | |||
228 | static void compat_irq_eoi(struct irq_data *data) | ||
229 | { | ||
230 | data->chip->eoi(data->irq); | ||
231 | } | ||
232 | |||
233 | static void compat_irq_enable(struct irq_data *data) | ||
234 | { | ||
235 | data->chip->enable(data->irq); | ||
236 | } | ||
237 | |||
238 | static void compat_irq_disable(struct irq_data *data) | ||
239 | { | ||
240 | data->chip->disable(data->irq); | ||
241 | } | ||
242 | |||
243 | static void compat_irq_shutdown(struct irq_data *data) | ||
244 | { | ||
245 | data->chip->shutdown(data->irq); | ||
246 | } | ||
247 | |||
248 | static unsigned int compat_irq_startup(struct irq_data *data) | ||
249 | { | ||
250 | return data->chip->startup(data->irq); | ||
251 | } | ||
252 | |||
253 | static int compat_irq_set_affinity(struct irq_data *data, | ||
254 | const struct cpumask *dest, bool force) | ||
255 | { | ||
256 | return data->chip->set_affinity(data->irq, dest); | ||
257 | } | ||
258 | |||
259 | static int compat_irq_set_type(struct irq_data *data, unsigned int type) | ||
260 | { | ||
261 | return data->chip->set_type(data->irq, type); | ||
262 | } | ||
263 | |||
264 | static int compat_irq_set_wake(struct irq_data *data, unsigned int on) | ||
265 | { | ||
266 | return data->chip->set_wake(data->irq, on); | ||
267 | } | ||
268 | |||
269 | static int compat_irq_retrigger(struct irq_data *data) | ||
270 | { | ||
271 | return data->chip->retrigger(data->irq); | ||
272 | } | ||
273 | |||
274 | static void compat_bus_lock(struct irq_data *data) | ||
275 | { | ||
276 | data->chip->bus_lock(data->irq); | ||
277 | } | ||
278 | |||
279 | static void compat_bus_sync_unlock(struct irq_data *data) | ||
280 | { | ||
281 | data->chip->bus_sync_unlock(data->irq); | ||
282 | } | ||
283 | #endif | ||
284 | |||
285 | /* | ||
286 | * Fixup enable/disable function pointers | ||
287 | */ | ||
288 | void irq_chip_set_defaults(struct irq_chip *chip) | ||
289 | { | ||
290 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
291 | if (chip->enable) | ||
292 | chip->irq_enable = compat_irq_enable; | ||
293 | if (chip->disable) | ||
294 | chip->irq_disable = compat_irq_disable; | ||
295 | if (chip->shutdown) | ||
296 | chip->irq_shutdown = compat_irq_shutdown; | ||
297 | if (chip->startup) | ||
298 | chip->irq_startup = compat_irq_startup; | ||
299 | if (!chip->end) | ||
300 | chip->end = dummy_irq_chip.end; | ||
301 | if (chip->bus_lock) | ||
302 | chip->irq_bus_lock = compat_bus_lock; | ||
303 | if (chip->bus_sync_unlock) | ||
304 | chip->irq_bus_sync_unlock = compat_bus_sync_unlock; | ||
305 | if (chip->mask) | ||
306 | chip->irq_mask = compat_irq_mask; | ||
307 | if (chip->unmask) | ||
308 | chip->irq_unmask = compat_irq_unmask; | ||
309 | if (chip->ack) | ||
310 | chip->irq_ack = compat_irq_ack; | ||
311 | if (chip->mask_ack) | ||
312 | chip->irq_mask_ack = compat_irq_mask_ack; | ||
313 | if (chip->eoi) | ||
314 | chip->irq_eoi = compat_irq_eoi; | ||
315 | if (chip->set_affinity) | ||
316 | chip->irq_set_affinity = compat_irq_set_affinity; | ||
317 | if (chip->set_type) | ||
318 | chip->irq_set_type = compat_irq_set_type; | ||
319 | if (chip->set_wake) | ||
320 | chip->irq_set_wake = compat_irq_set_wake; | ||
321 | if (chip->retrigger) | ||
322 | chip->irq_retrigger = compat_irq_retrigger; | ||
323 | #endif | ||
324 | } | ||
325 | |||
326 | static inline void mask_ack_irq(struct irq_desc *desc) | 207 | static inline void mask_ack_irq(struct irq_desc *desc) |
327 | { | 208 | { |
328 | if (desc->irq_data.chip->irq_mask_ack) | 209 | if (desc->irq_data.chip->irq_mask_ack) |
@@ -372,11 +253,10 @@ void handle_nested_irq(unsigned int irq) | |||
372 | kstat_incr_irqs_this_cpu(irq, desc); | 253 | kstat_incr_irqs_this_cpu(irq, desc); |
373 | 254 | ||
374 | action = desc->action; | 255 | action = desc->action; |
375 | if (unlikely(!action || (desc->istate & IRQS_DISABLED))) | 256 | if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) |
376 | goto out_unlock; | 257 | goto out_unlock; |
377 | 258 | ||
378 | irq_compat_set_progress(desc); | 259 | irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); |
379 | desc->istate |= IRQS_INPROGRESS; | ||
380 | raw_spin_unlock_irq(&desc->lock); | 260 | raw_spin_unlock_irq(&desc->lock); |
381 | 261 | ||
382 | action_ret = action->thread_fn(action->irq, action->dev_id); | 262 | action_ret = action->thread_fn(action->irq, action->dev_id); |
@@ -384,8 +264,7 @@ void handle_nested_irq(unsigned int irq) | |||
384 | note_interrupt(irq, desc, action_ret); | 264 | note_interrupt(irq, desc, action_ret); |
385 | 265 | ||
386 | raw_spin_lock_irq(&desc->lock); | 266 | raw_spin_lock_irq(&desc->lock); |
387 | desc->istate &= ~IRQS_INPROGRESS; | 267 | irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); |
388 | irq_compat_clr_progress(desc); | ||
389 | 268 | ||
390 | out_unlock: | 269 | out_unlock: |
391 | raw_spin_unlock_irq(&desc->lock); | 270 | raw_spin_unlock_irq(&desc->lock); |
@@ -416,14 +295,14 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) | |||
416 | { | 295 | { |
417 | raw_spin_lock(&desc->lock); | 296 | raw_spin_lock(&desc->lock); |
418 | 297 | ||
419 | if (unlikely(desc->istate & IRQS_INPROGRESS)) | 298 | if (unlikely(irqd_irq_inprogress(&desc->irq_data))) |
420 | if (!irq_check_poll(desc)) | 299 | if (!irq_check_poll(desc)) |
421 | goto out_unlock; | 300 | goto out_unlock; |
422 | 301 | ||
423 | desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); | 302 | desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); |
424 | kstat_incr_irqs_this_cpu(irq, desc); | 303 | kstat_incr_irqs_this_cpu(irq, desc); |
425 | 304 | ||
426 | if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) | 305 | if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) |
427 | goto out_unlock; | 306 | goto out_unlock; |
428 | 307 | ||
429 | handle_irq_event(desc); | 308 | handle_irq_event(desc); |
@@ -448,7 +327,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) | |||
448 | raw_spin_lock(&desc->lock); | 327 | raw_spin_lock(&desc->lock); |
449 | mask_ack_irq(desc); | 328 | mask_ack_irq(desc); |
450 | 329 | ||
451 | if (unlikely(desc->istate & IRQS_INPROGRESS)) | 330 | if (unlikely(irqd_irq_inprogress(&desc->irq_data))) |
452 | if (!irq_check_poll(desc)) | 331 | if (!irq_check_poll(desc)) |
453 | goto out_unlock; | 332 | goto out_unlock; |
454 | 333 | ||
@@ -459,12 +338,12 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) | |||
459 | * If its disabled or no action available | 338 | * If its disabled or no action available |
460 | * keep it masked and get out of here | 339 | * keep it masked and get out of here |
461 | */ | 340 | */ |
462 | if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) | 341 | if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) |
463 | goto out_unlock; | 342 | goto out_unlock; |
464 | 343 | ||
465 | handle_irq_event(desc); | 344 | handle_irq_event(desc); |
466 | 345 | ||
467 | if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT))) | 346 | if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT)) |
468 | unmask_irq(desc); | 347 | unmask_irq(desc); |
469 | out_unlock: | 348 | out_unlock: |
470 | raw_spin_unlock(&desc->lock); | 349 | raw_spin_unlock(&desc->lock); |
@@ -496,7 +375,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
496 | { | 375 | { |
497 | raw_spin_lock(&desc->lock); | 376 | raw_spin_lock(&desc->lock); |
498 | 377 | ||
499 | if (unlikely(desc->istate & IRQS_INPROGRESS)) | 378 | if (unlikely(irqd_irq_inprogress(&desc->irq_data))) |
500 | if (!irq_check_poll(desc)) | 379 | if (!irq_check_poll(desc)) |
501 | goto out; | 380 | goto out; |
502 | 381 | ||
@@ -507,8 +386,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | |||
507 | * If its disabled or no action available | 386 | * If its disabled or no action available |
508 | * then mask it and get out of here: | 387 | * then mask it and get out of here: |
509 | */ | 388 | */ |
510 | if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) { | 389 | if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { |
511 | irq_compat_set_pending(desc); | ||
512 | desc->istate |= IRQS_PENDING; | 390 | desc->istate |= IRQS_PENDING; |
513 | mask_irq(desc); | 391 | mask_irq(desc); |
514 | goto out; | 392 | goto out; |
@@ -537,7 +415,7 @@ out: | |||
537 | * @desc: the interrupt description structure for this irq | 415 | * @desc: the interrupt description structure for this irq |
538 | * | 416 | * |
539 | * Interrupt occures on the falling and/or rising edge of a hardware | 417 | * Interrupt occures on the falling and/or rising edge of a hardware |
540 | * signal. The occurence is latched into the irq controller hardware | 418 | * signal. The occurrence is latched into the irq controller hardware |
541 | * and must be acked in order to be reenabled. After the ack another | 419 | * and must be acked in order to be reenabled. After the ack another |
542 | * interrupt can happen on the same source even before the first one | 420 | * interrupt can happen on the same source even before the first one |
543 | * is handled by the associated event handler. If this happens it | 421 | * is handled by the associated event handler. If this happens it |
@@ -558,10 +436,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
558 | * we shouldn't process the IRQ. Mark it pending, handle | 436 | * we shouldn't process the IRQ. Mark it pending, handle |
559 | * the necessary masking and go out | 437 | * the necessary masking and go out |
560 | */ | 438 | */ |
561 | if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) || | 439 | if (unlikely(irqd_irq_disabled(&desc->irq_data) || |
562 | !desc->action))) { | 440 | irqd_irq_inprogress(&desc->irq_data) || !desc->action)) { |
563 | if (!irq_check_poll(desc)) { | 441 | if (!irq_check_poll(desc)) { |
564 | irq_compat_set_pending(desc); | ||
565 | desc->istate |= IRQS_PENDING; | 442 | desc->istate |= IRQS_PENDING; |
566 | mask_ack_irq(desc); | 443 | mask_ack_irq(desc); |
567 | goto out_unlock; | 444 | goto out_unlock; |
@@ -584,20 +461,65 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) | |||
584 | * Renable it, if it was not disabled in meantime. | 461 | * Renable it, if it was not disabled in meantime. |
585 | */ | 462 | */ |
586 | if (unlikely(desc->istate & IRQS_PENDING)) { | 463 | if (unlikely(desc->istate & IRQS_PENDING)) { |
587 | if (!(desc->istate & IRQS_DISABLED) && | 464 | if (!irqd_irq_disabled(&desc->irq_data) && |
588 | (desc->istate & IRQS_MASKED)) | 465 | irqd_irq_masked(&desc->irq_data)) |
589 | unmask_irq(desc); | 466 | unmask_irq(desc); |
590 | } | 467 | } |
591 | 468 | ||
592 | handle_irq_event(desc); | 469 | handle_irq_event(desc); |
593 | 470 | ||
594 | } while ((desc->istate & IRQS_PENDING) && | 471 | } while ((desc->istate & IRQS_PENDING) && |
595 | !(desc->istate & IRQS_DISABLED)); | 472 | !irqd_irq_disabled(&desc->irq_data)); |
596 | 473 | ||
597 | out_unlock: | 474 | out_unlock: |
598 | raw_spin_unlock(&desc->lock); | 475 | raw_spin_unlock(&desc->lock); |
599 | } | 476 | } |
600 | 477 | ||
478 | #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER | ||
479 | /** | ||
480 | * handle_edge_eoi_irq - edge eoi type IRQ handler | ||
481 | * @irq: the interrupt number | ||
482 | * @desc: the interrupt description structure for this irq | ||
483 | * | ||
484 | * Similar as the above handle_edge_irq, but using eoi and w/o the | ||
485 | * mask/unmask logic. | ||
486 | */ | ||
487 | void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) | ||
488 | { | ||
489 | struct irq_chip *chip = irq_desc_get_chip(desc); | ||
490 | |||
491 | raw_spin_lock(&desc->lock); | ||
492 | |||
493 | desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); | ||
494 | /* | ||
495 | * If we're currently running this IRQ, or its disabled, | ||
496 | * we shouldn't process the IRQ. Mark it pending, handle | ||
497 | * the necessary masking and go out | ||
498 | */ | ||
499 | if (unlikely(irqd_irq_disabled(&desc->irq_data) || | ||
500 | irqd_irq_inprogress(&desc->irq_data) || !desc->action)) { | ||
501 | if (!irq_check_poll(desc)) { | ||
502 | desc->istate |= IRQS_PENDING; | ||
503 | goto out_eoi; | ||
504 | } | ||
505 | } | ||
506 | kstat_incr_irqs_this_cpu(irq, desc); | ||
507 | |||
508 | do { | ||
509 | if (unlikely(!desc->action)) | ||
510 | goto out_eoi; | ||
511 | |||
512 | handle_irq_event(desc); | ||
513 | |||
514 | } while ((desc->istate & IRQS_PENDING) && | ||
515 | !irqd_irq_disabled(&desc->irq_data)); | ||
516 | |||
517 | out_eoi: | ||
518 | chip->irq_eoi(&desc->irq_data); | ||
519 | raw_spin_unlock(&desc->lock); | ||
520 | } | ||
521 | #endif | ||
522 | |||
601 | /** | 523 | /** |
602 | * handle_percpu_irq - Per CPU local irq handler | 524 | * handle_percpu_irq - Per CPU local irq handler |
603 | * @irq: the interrupt number | 525 | * @irq: the interrupt number |
@@ -642,8 +564,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
642 | if (handle == handle_bad_irq) { | 564 | if (handle == handle_bad_irq) { |
643 | if (desc->irq_data.chip != &no_irq_chip) | 565 | if (desc->irq_data.chip != &no_irq_chip) |
644 | mask_ack_irq(desc); | 566 | mask_ack_irq(desc); |
645 | irq_compat_set_disabled(desc); | 567 | irq_state_set_disabled(desc); |
646 | desc->istate |= IRQS_DISABLED; | ||
647 | desc->depth = 1; | 568 | desc->depth = 1; |
648 | } | 569 | } |
649 | desc->handle_irq = handle; | 570 | desc->handle_irq = handle; |
@@ -684,8 +605,70 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) | |||
684 | irqd_set(&desc->irq_data, IRQD_PER_CPU); | 605 | irqd_set(&desc->irq_data, IRQD_PER_CPU); |
685 | if (irq_settings_can_move_pcntxt(desc)) | 606 | if (irq_settings_can_move_pcntxt(desc)) |
686 | irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT); | 607 | irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT); |
608 | if (irq_settings_is_level(desc)) | ||
609 | irqd_set(&desc->irq_data, IRQD_LEVEL); | ||
687 | 610 | ||
688 | irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc)); | 611 | irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc)); |
689 | 612 | ||
690 | irq_put_desc_unlock(desc, flags); | 613 | irq_put_desc_unlock(desc, flags); |
691 | } | 614 | } |
615 | |||
616 | /** | ||
617 | * irq_cpu_online - Invoke all irq_cpu_online functions. | ||
618 | * | ||
619 | * Iterate through all irqs and invoke the chip.irq_cpu_online() | ||
620 | * for each. | ||
621 | */ | ||
622 | void irq_cpu_online(void) | ||
623 | { | ||
624 | struct irq_desc *desc; | ||
625 | struct irq_chip *chip; | ||
626 | unsigned long flags; | ||
627 | unsigned int irq; | ||
628 | |||
629 | for_each_active_irq(irq) { | ||
630 | desc = irq_to_desc(irq); | ||
631 | if (!desc) | ||
632 | continue; | ||
633 | |||
634 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
635 | |||
636 | chip = irq_data_get_irq_chip(&desc->irq_data); | ||
637 | if (chip && chip->irq_cpu_online && | ||
638 | (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || | ||
639 | !irqd_irq_disabled(&desc->irq_data))) | ||
640 | chip->irq_cpu_online(&desc->irq_data); | ||
641 | |||
642 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
643 | } | ||
644 | } | ||
645 | |||
646 | /** | ||
647 | * irq_cpu_offline - Invoke all irq_cpu_offline functions. | ||
648 | * | ||
649 | * Iterate through all irqs and invoke the chip.irq_cpu_offline() | ||
650 | * for each. | ||
651 | */ | ||
652 | void irq_cpu_offline(void) | ||
653 | { | ||
654 | struct irq_desc *desc; | ||
655 | struct irq_chip *chip; | ||
656 | unsigned long flags; | ||
657 | unsigned int irq; | ||
658 | |||
659 | for_each_active_irq(irq) { | ||
660 | desc = irq_to_desc(irq); | ||
661 | if (!desc) | ||
662 | continue; | ||
663 | |||
664 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
665 | |||
666 | chip = irq_data_get_irq_chip(&desc->irq_data); | ||
667 | if (chip && chip->irq_cpu_offline && | ||
668 | (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) || | ||
669 | !irqd_irq_disabled(&desc->irq_data))) | ||
670 | chip->irq_cpu_offline(&desc->irq_data); | ||
671 | |||
672 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
673 | } | ||
674 | } | ||
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h deleted file mode 100644 index 6bbaf66aca85..000000000000 --- a/kernel/irq/compat.h +++ /dev/null | |||
@@ -1,72 +0,0 @@ | |||
1 | /* | ||
2 | * Compat layer for transition period | ||
3 | */ | ||
4 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT | ||
5 | static inline void irq_compat_set_progress(struct irq_desc *desc) | ||
6 | { | ||
7 | desc->status |= IRQ_INPROGRESS; | ||
8 | } | ||
9 | |||
10 | static inline void irq_compat_clr_progress(struct irq_desc *desc) | ||
11 | { | ||
12 | desc->status &= ~IRQ_INPROGRESS; | ||
13 | } | ||
14 | static inline void irq_compat_set_disabled(struct irq_desc *desc) | ||
15 | { | ||
16 | desc->status |= IRQ_DISABLED; | ||
17 | } | ||
18 | static inline void irq_compat_clr_disabled(struct irq_desc *desc) | ||
19 | { | ||
20 | desc->status &= ~IRQ_DISABLED; | ||
21 | } | ||
22 | static inline void irq_compat_set_pending(struct irq_desc *desc) | ||
23 | { | ||
24 | desc->status |= IRQ_PENDING; | ||
25 | } | ||
26 | |||
27 | static inline void irq_compat_clr_pending(struct irq_desc *desc) | ||
28 | { | ||
29 | desc->status &= ~IRQ_PENDING; | ||
30 | } | ||
31 | static inline void irq_compat_set_masked(struct irq_desc *desc) | ||
32 | { | ||
33 | desc->status |= IRQ_MASKED; | ||
34 | } | ||
35 | |||
36 | static inline void irq_compat_clr_masked(struct irq_desc *desc) | ||
37 | { | ||
38 | desc->status &= ~IRQ_MASKED; | ||
39 | } | ||
40 | static inline void irq_compat_set_move_pending(struct irq_desc *desc) | ||
41 | { | ||
42 | desc->status |= IRQ_MOVE_PENDING; | ||
43 | } | ||
44 | |||
45 | static inline void irq_compat_clr_move_pending(struct irq_desc *desc) | ||
46 | { | ||
47 | desc->status &= ~IRQ_MOVE_PENDING; | ||
48 | } | ||
49 | static inline void irq_compat_set_affinity(struct irq_desc *desc) | ||
50 | { | ||
51 | desc->status |= IRQ_AFFINITY_SET; | ||
52 | } | ||
53 | |||
54 | static inline void irq_compat_clr_affinity(struct irq_desc *desc) | ||
55 | { | ||
56 | desc->status &= ~IRQ_AFFINITY_SET; | ||
57 | } | ||
58 | #else | ||
59 | static inline void irq_compat_set_progress(struct irq_desc *desc) { } | ||
60 | static inline void irq_compat_clr_progress(struct irq_desc *desc) { } | ||
61 | static inline void irq_compat_set_disabled(struct irq_desc *desc) { } | ||
62 | static inline void irq_compat_clr_disabled(struct irq_desc *desc) { } | ||
63 | static inline void irq_compat_set_pending(struct irq_desc *desc) { } | ||
64 | static inline void irq_compat_clr_pending(struct irq_desc *desc) { } | ||
65 | static inline void irq_compat_set_masked(struct irq_desc *desc) { } | ||
66 | static inline void irq_compat_clr_masked(struct irq_desc *desc) { } | ||
67 | static inline void irq_compat_set_move_pending(struct irq_desc *desc) { } | ||
68 | static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { } | ||
69 | static inline void irq_compat_set_affinity(struct irq_desc *desc) { } | ||
70 | static inline void irq_compat_clr_affinity(struct irq_desc *desc) { } | ||
71 | #endif | ||
72 | |||
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h index d1a33b7fa61d..306cba37e9a5 100644 --- a/kernel/irq/debug.h +++ b/kernel/irq/debug.h | |||
@@ -4,8 +4,10 @@ | |||
4 | 4 | ||
5 | #include <linux/kallsyms.h> | 5 | #include <linux/kallsyms.h> |
6 | 6 | ||
7 | #define P(f) if (desc->status & f) printk("%14s set\n", #f) | 7 | #define P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f) |
8 | #define PS(f) if (desc->istate & f) printk("%14s set\n", #f) | 8 | #define PS(f) if (desc->istate & f) printk("%14s set\n", #f) |
9 | /* FIXME */ | ||
10 | #define PD(f) do { } while (0) | ||
9 | 11 | ||
10 | static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) | 12 | static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) |
11 | { | 13 | { |
@@ -28,13 +30,15 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) | |||
28 | P(IRQ_NOAUTOEN); | 30 | P(IRQ_NOAUTOEN); |
29 | 31 | ||
30 | PS(IRQS_AUTODETECT); | 32 | PS(IRQS_AUTODETECT); |
31 | PS(IRQS_INPROGRESS); | ||
32 | PS(IRQS_REPLAY); | 33 | PS(IRQS_REPLAY); |
33 | PS(IRQS_WAITING); | 34 | PS(IRQS_WAITING); |
34 | PS(IRQS_DISABLED); | ||
35 | PS(IRQS_PENDING); | 35 | PS(IRQS_PENDING); |
36 | PS(IRQS_MASKED); | 36 | |
37 | PD(IRQS_INPROGRESS); | ||
38 | PD(IRQS_DISABLED); | ||
39 | PD(IRQS_MASKED); | ||
37 | } | 40 | } |
38 | 41 | ||
39 | #undef P | 42 | #undef P |
40 | #undef PS | 43 | #undef PS |
44 | #undef PD | ||
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c index 20dc5474947e..b5fcd96c7102 100644 --- a/kernel/irq/dummychip.c +++ b/kernel/irq/dummychip.c | |||
@@ -31,13 +31,6 @@ static unsigned int noop_ret(struct irq_data *data) | |||
31 | return 0; | 31 | return 0; |
32 | } | 32 | } |
33 | 33 | ||
34 | #ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED | ||
35 | static void compat_noop(unsigned int irq) { } | ||
36 | #define END_INIT .end = compat_noop | ||
37 | #else | ||
38 | #define END_INIT | ||
39 | #endif | ||
40 | |||
41 | /* | 34 | /* |
42 | * Generic no controller implementation | 35 | * Generic no controller implementation |
43 | */ | 36 | */ |
@@ -48,7 +41,6 @@ struct irq_chip no_irq_chip = { | |||
48 | .irq_enable = noop, | 41 | .irq_enable = noop, |
49 | .irq_disable = noop, | 42 | .irq_disable = noop, |
50 | .irq_ack = ack_bad, | 43 | .irq_ack = ack_bad, |
51 | END_INIT | ||
52 | }; | 44 | }; |
53 | 45 | ||
54 | /* | 46 | /* |
@@ -64,5 +56,4 @@ struct irq_chip dummy_irq_chip = { | |||
64 | .irq_ack = noop, | 56 | .irq_ack = noop, |
65 | .irq_mask = noop, | 57 | .irq_mask = noop, |
66 | .irq_unmask = noop, | 58 | .irq_unmask = noop, |
67 | END_INIT | ||
68 | }; | 59 | }; |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 517561fc7317..90cb55f6d7eb 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -175,28 +175,13 @@ irqreturn_t handle_irq_event(struct irq_desc *desc) | |||
175 | struct irqaction *action = desc->action; | 175 | struct irqaction *action = desc->action; |
176 | irqreturn_t ret; | 176 | irqreturn_t ret; |
177 | 177 | ||
178 | irq_compat_clr_pending(desc); | ||
179 | desc->istate &= ~IRQS_PENDING; | 178 | desc->istate &= ~IRQS_PENDING; |
180 | irq_compat_set_progress(desc); | 179 | irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); |
181 | desc->istate |= IRQS_INPROGRESS; | ||
182 | raw_spin_unlock(&desc->lock); | 180 | raw_spin_unlock(&desc->lock); |
183 | 181 | ||
184 | ret = handle_irq_event_percpu(desc, action); | 182 | ret = handle_irq_event_percpu(desc, action); |
185 | 183 | ||
186 | raw_spin_lock(&desc->lock); | 184 | raw_spin_lock(&desc->lock); |
187 | desc->istate &= ~IRQS_INPROGRESS; | 185 | irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); |
188 | irq_compat_clr_progress(desc); | ||
189 | return ret; | 186 | return ret; |
190 | } | 187 | } |
191 | |||
192 | /** | ||
193 | * handle_IRQ_event - irq action chain handler | ||
194 | * @irq: the interrupt number | ||
195 | * @action: the interrupt action chain for this irq | ||
196 | * | ||
197 | * Handles the action chain of an irq event | ||
198 | */ | ||
199 | irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | ||
200 | { | ||
201 | return handle_irq_event_percpu(irq_to_desc(irq), action); | ||
202 | } | ||
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 6c6ec9a49027..6546431447d7 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
@@ -15,10 +15,6 @@ | |||
15 | 15 | ||
16 | #define istate core_internal_state__do_not_mess_with_it | 16 | #define istate core_internal_state__do_not_mess_with_it |
17 | 17 | ||
18 | #ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT | ||
19 | # define status status_use_accessors | ||
20 | #endif | ||
21 | |||
22 | extern int noirqdebug; | 18 | extern int noirqdebug; |
23 | 19 | ||
24 | /* | 20 | /* |
@@ -44,38 +40,28 @@ enum { | |||
44 | * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt | 40 | * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt |
45 | * detection | 41 | * detection |
46 | * IRQS_POLL_INPROGRESS - polling in progress | 42 | * IRQS_POLL_INPROGRESS - polling in progress |
47 | * IRQS_INPROGRESS - Interrupt in progress | ||
48 | * IRQS_ONESHOT - irq is not unmasked in primary handler | 43 | * IRQS_ONESHOT - irq is not unmasked in primary handler |
49 | * IRQS_REPLAY - irq is replayed | 44 | * IRQS_REPLAY - irq is replayed |
50 | * IRQS_WAITING - irq is waiting | 45 | * IRQS_WAITING - irq is waiting |
51 | * IRQS_DISABLED - irq is disabled | ||
52 | * IRQS_PENDING - irq is pending and replayed later | 46 | * IRQS_PENDING - irq is pending and replayed later |
53 | * IRQS_MASKED - irq is masked | ||
54 | * IRQS_SUSPENDED - irq is suspended | 47 | * IRQS_SUSPENDED - irq is suspended |
55 | */ | 48 | */ |
56 | enum { | 49 | enum { |
57 | IRQS_AUTODETECT = 0x00000001, | 50 | IRQS_AUTODETECT = 0x00000001, |
58 | IRQS_SPURIOUS_DISABLED = 0x00000002, | 51 | IRQS_SPURIOUS_DISABLED = 0x00000002, |
59 | IRQS_POLL_INPROGRESS = 0x00000008, | 52 | IRQS_POLL_INPROGRESS = 0x00000008, |
60 | IRQS_INPROGRESS = 0x00000010, | ||
61 | IRQS_ONESHOT = 0x00000020, | 53 | IRQS_ONESHOT = 0x00000020, |
62 | IRQS_REPLAY = 0x00000040, | 54 | IRQS_REPLAY = 0x00000040, |
63 | IRQS_WAITING = 0x00000080, | 55 | IRQS_WAITING = 0x00000080, |
64 | IRQS_DISABLED = 0x00000100, | ||
65 | IRQS_PENDING = 0x00000200, | 56 | IRQS_PENDING = 0x00000200, |
66 | IRQS_MASKED = 0x00000400, | ||
67 | IRQS_SUSPENDED = 0x00000800, | 57 | IRQS_SUSPENDED = 0x00000800, |
68 | }; | 58 | }; |
69 | 59 | ||
70 | #include "compat.h" | ||
71 | #include "debug.h" | 60 | #include "debug.h" |
72 | #include "settings.h" | 61 | #include "settings.h" |
73 | 62 | ||
74 | #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) | 63 | #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) |
75 | 64 | ||
76 | /* Set default functions for irq_chip structures: */ | ||
77 | extern void irq_chip_set_defaults(struct irq_chip *chip); | ||
78 | |||
79 | extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | 65 | extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, |
80 | unsigned long flags); | 66 | unsigned long flags); |
81 | extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); | 67 | extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); |
@@ -162,13 +148,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags) | |||
162 | static inline void irqd_set_move_pending(struct irq_data *d) | 148 | static inline void irqd_set_move_pending(struct irq_data *d) |
163 | { | 149 | { |
164 | d->state_use_accessors |= IRQD_SETAFFINITY_PENDING; | 150 | d->state_use_accessors |= IRQD_SETAFFINITY_PENDING; |
165 | irq_compat_set_move_pending(irq_data_to_desc(d)); | ||
166 | } | 151 | } |
167 | 152 | ||
168 | static inline void irqd_clr_move_pending(struct irq_data *d) | 153 | static inline void irqd_clr_move_pending(struct irq_data *d) |
169 | { | 154 | { |
170 | d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING; | 155 | d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING; |
171 | irq_compat_clr_move_pending(irq_data_to_desc(d)); | ||
172 | } | 156 | } |
173 | 157 | ||
174 | static inline void irqd_clear(struct irq_data *d, unsigned int mask) | 158 | static inline void irqd_clear(struct irq_data *d, unsigned int mask) |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index dbccc799407f..2c039c9b9383 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -80,7 +80,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | |||
80 | desc->irq_data.handler_data = NULL; | 80 | desc->irq_data.handler_data = NULL; |
81 | desc->irq_data.msi_desc = NULL; | 81 | desc->irq_data.msi_desc = NULL; |
82 | irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); | 82 | irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); |
83 | desc->istate = IRQS_DISABLED; | 83 | irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); |
84 | desc->handle_irq = handle_bad_irq; | 84 | desc->handle_irq = handle_bad_irq; |
85 | desc->depth = 1; | 85 | desc->depth = 1; |
86 | desc->irq_count = 0; | 86 | desc->irq_count = 0; |
@@ -198,15 +198,6 @@ err: | |||
198 | return -ENOMEM; | 198 | return -ENOMEM; |
199 | } | 199 | } |
200 | 200 | ||
201 | struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) | ||
202 | { | ||
203 | int res = irq_alloc_descs(irq, irq, 1, node); | ||
204 | |||
205 | if (res == -EEXIST || res == irq) | ||
206 | return irq_to_desc(irq); | ||
207 | return NULL; | ||
208 | } | ||
209 | |||
210 | static int irq_expand_nr_irqs(unsigned int nr) | 201 | static int irq_expand_nr_irqs(unsigned int nr) |
211 | { | 202 | { |
212 | if (nr > IRQ_BITMAP_BITS) | 203 | if (nr > IRQ_BITMAP_BITS) |
@@ -247,7 +238,6 @@ int __init early_irq_init(void) | |||
247 | 238 | ||
248 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | 239 | struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { |
249 | [0 ... NR_IRQS-1] = { | 240 | [0 ... NR_IRQS-1] = { |
250 | .istate = IRQS_DISABLED, | ||
251 | .handle_irq = handle_bad_irq, | 241 | .handle_irq = handle_bad_irq, |
252 | .depth = 1, | 242 | .depth = 1, |
253 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), | 243 | .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), |
@@ -283,11 +273,6 @@ struct irq_desc *irq_to_desc(unsigned int irq) | |||
283 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; | 273 | return (irq < NR_IRQS) ? irq_desc + irq : NULL; |
284 | } | 274 | } |
285 | 275 | ||
286 | struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) | ||
287 | { | ||
288 | return irq_to_desc(irq); | ||
289 | } | ||
290 | |||
291 | static void free_desc(unsigned int irq) | 276 | static void free_desc(unsigned int irq) |
292 | { | 277 | { |
293 | dynamic_irq_cleanup(irq); | 278 | dynamic_irq_cleanup(irq); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a2aa73e536c..07c1611f3899 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -41,7 +41,7 @@ early_param("threadirqs", setup_forced_irqthreads); | |||
41 | void synchronize_irq(unsigned int irq) | 41 | void synchronize_irq(unsigned int irq) |
42 | { | 42 | { |
43 | struct irq_desc *desc = irq_to_desc(irq); | 43 | struct irq_desc *desc = irq_to_desc(irq); |
44 | unsigned int state; | 44 | bool inprogress; |
45 | 45 | ||
46 | if (!desc) | 46 | if (!desc) |
47 | return; | 47 | return; |
@@ -53,16 +53,16 @@ void synchronize_irq(unsigned int irq) | |||
53 | * Wait until we're out of the critical section. This might | 53 | * Wait until we're out of the critical section. This might |
54 | * give the wrong answer due to the lack of memory barriers. | 54 | * give the wrong answer due to the lack of memory barriers. |
55 | */ | 55 | */ |
56 | while (desc->istate & IRQS_INPROGRESS) | 56 | while (irqd_irq_inprogress(&desc->irq_data)) |
57 | cpu_relax(); | 57 | cpu_relax(); |
58 | 58 | ||
59 | /* Ok, that indicated we're done: double-check carefully. */ | 59 | /* Ok, that indicated we're done: double-check carefully. */ |
60 | raw_spin_lock_irqsave(&desc->lock, flags); | 60 | raw_spin_lock_irqsave(&desc->lock, flags); |
61 | state = desc->istate; | 61 | inprogress = irqd_irq_inprogress(&desc->irq_data); |
62 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 62 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
63 | 63 | ||
64 | /* Oops, that failed? */ | 64 | /* Oops, that failed? */ |
65 | } while (state & IRQS_INPROGRESS); | 65 | } while (inprogress); |
66 | 66 | ||
67 | /* | 67 | /* |
68 | * We made sure that no hardirq handler is running. Now verify | 68 | * We made sure that no hardirq handler is running. Now verify |
@@ -112,13 +112,13 @@ void irq_set_thread_affinity(struct irq_desc *desc) | |||
112 | } | 112 | } |
113 | 113 | ||
114 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 114 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
115 | static inline bool irq_can_move_pcntxt(struct irq_desc *desc) | 115 | static inline bool irq_can_move_pcntxt(struct irq_data *data) |
116 | { | 116 | { |
117 | return irq_settings_can_move_pcntxt(desc); | 117 | return irqd_can_move_in_process_context(data); |
118 | } | 118 | } |
119 | static inline bool irq_move_pending(struct irq_desc *desc) | 119 | static inline bool irq_move_pending(struct irq_data *data) |
120 | { | 120 | { |
121 | return irqd_is_setaffinity_pending(&desc->irq_data); | 121 | return irqd_is_setaffinity_pending(data); |
122 | } | 122 | } |
123 | static inline void | 123 | static inline void |
124 | irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) | 124 | irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) |
@@ -131,43 +131,34 @@ irq_get_pending(struct cpumask *mask, struct irq_desc *desc) | |||
131 | cpumask_copy(mask, desc->pending_mask); | 131 | cpumask_copy(mask, desc->pending_mask); |
132 | } | 132 | } |
133 | #else | 133 | #else |
134 | static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; } | 134 | static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; } |
135 | static inline bool irq_move_pending(struct irq_desc *desc) { return false; } | 135 | static inline bool irq_move_pending(struct irq_data *data) { return false; } |
136 | static inline void | 136 | static inline void |
137 | irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { } | 137 | irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { } |
138 | static inline void | 138 | static inline void |
139 | irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } | 139 | irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } |
140 | #endif | 140 | #endif |
141 | 141 | ||
142 | /** | 142 | int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) |
143 | * irq_set_affinity - Set the irq affinity of a given irq | ||
144 | * @irq: Interrupt to set affinity | ||
145 | * @cpumask: cpumask | ||
146 | * | ||
147 | */ | ||
148 | int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | ||
149 | { | 143 | { |
150 | struct irq_desc *desc = irq_to_desc(irq); | 144 | struct irq_chip *chip = irq_data_get_irq_chip(data); |
151 | struct irq_chip *chip = desc->irq_data.chip; | 145 | struct irq_desc *desc = irq_data_to_desc(data); |
152 | unsigned long flags; | ||
153 | int ret = 0; | 146 | int ret = 0; |
154 | 147 | ||
155 | if (!chip->irq_set_affinity) | 148 | if (!chip || !chip->irq_set_affinity) |
156 | return -EINVAL; | 149 | return -EINVAL; |
157 | 150 | ||
158 | raw_spin_lock_irqsave(&desc->lock, flags); | 151 | if (irq_can_move_pcntxt(data)) { |
159 | 152 | ret = chip->irq_set_affinity(data, mask, false); | |
160 | if (irq_can_move_pcntxt(desc)) { | ||
161 | ret = chip->irq_set_affinity(&desc->irq_data, mask, false); | ||
162 | switch (ret) { | 153 | switch (ret) { |
163 | case IRQ_SET_MASK_OK: | 154 | case IRQ_SET_MASK_OK: |
164 | cpumask_copy(desc->irq_data.affinity, mask); | 155 | cpumask_copy(data->affinity, mask); |
165 | case IRQ_SET_MASK_OK_NOCOPY: | 156 | case IRQ_SET_MASK_OK_NOCOPY: |
166 | irq_set_thread_affinity(desc); | 157 | irq_set_thread_affinity(desc); |
167 | ret = 0; | 158 | ret = 0; |
168 | } | 159 | } |
169 | } else { | 160 | } else { |
170 | irqd_set_move_pending(&desc->irq_data); | 161 | irqd_set_move_pending(data); |
171 | irq_copy_pending(desc, mask); | 162 | irq_copy_pending(desc, mask); |
172 | } | 163 | } |
173 | 164 | ||
@@ -175,8 +166,28 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
175 | kref_get(&desc->affinity_notify->kref); | 166 | kref_get(&desc->affinity_notify->kref); |
176 | schedule_work(&desc->affinity_notify->work); | 167 | schedule_work(&desc->affinity_notify->work); |
177 | } | 168 | } |
178 | irq_compat_set_affinity(desc); | 169 | irqd_set(data, IRQD_AFFINITY_SET); |
179 | irqd_set(&desc->irq_data, IRQD_AFFINITY_SET); | 170 | |
171 | return ret; | ||
172 | } | ||
173 | |||
174 | /** | ||
175 | * irq_set_affinity - Set the irq affinity of a given irq | ||
176 | * @irq: Interrupt to set affinity | ||
177 | * @mask: cpumask | ||
178 | * | ||
179 | */ | ||
180 | int irq_set_affinity(unsigned int irq, const struct cpumask *mask) | ||
181 | { | ||
182 | struct irq_desc *desc = irq_to_desc(irq); | ||
183 | unsigned long flags; | ||
184 | int ret; | ||
185 | |||
186 | if (!desc) | ||
187 | return -EINVAL; | ||
188 | |||
189 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
190 | ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask); | ||
180 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 191 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
181 | return ret; | 192 | return ret; |
182 | } | 193 | } |
@@ -206,7 +217,7 @@ static void irq_affinity_notify(struct work_struct *work) | |||
206 | goto out; | 217 | goto out; |
207 | 218 | ||
208 | raw_spin_lock_irqsave(&desc->lock, flags); | 219 | raw_spin_lock_irqsave(&desc->lock, flags); |
209 | if (irq_move_pending(desc)) | 220 | if (irq_move_pending(&desc->irq_data)) |
210 | irq_get_pending(cpumask, desc); | 221 | irq_get_pending(cpumask, desc); |
211 | else | 222 | else |
212 | cpumask_copy(cpumask, desc->irq_data.affinity); | 223 | cpumask_copy(cpumask, desc->irq_data.affinity); |
@@ -285,10 +296,8 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) | |||
285 | if (cpumask_intersects(desc->irq_data.affinity, | 296 | if (cpumask_intersects(desc->irq_data.affinity, |
286 | cpu_online_mask)) | 297 | cpu_online_mask)) |
287 | set = desc->irq_data.affinity; | 298 | set = desc->irq_data.affinity; |
288 | else { | 299 | else |
289 | irq_compat_clr_affinity(desc); | ||
290 | irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); | 300 | irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); |
291 | } | ||
292 | } | 301 | } |
293 | 302 | ||
294 | cpumask_and(mask, cpu_online_mask, set); | 303 | cpumask_and(mask, cpu_online_mask, set); |
@@ -551,9 +560,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
551 | flags &= IRQ_TYPE_SENSE_MASK; | 560 | flags &= IRQ_TYPE_SENSE_MASK; |
552 | 561 | ||
553 | if (chip->flags & IRQCHIP_SET_TYPE_MASKED) { | 562 | if (chip->flags & IRQCHIP_SET_TYPE_MASKED) { |
554 | if (!(desc->istate & IRQS_MASKED)) | 563 | if (!irqd_irq_masked(&desc->irq_data)) |
555 | mask_irq(desc); | 564 | mask_irq(desc); |
556 | if (!(desc->istate & IRQS_DISABLED)) | 565 | if (!irqd_irq_disabled(&desc->irq_data)) |
557 | unmask = 1; | 566 | unmask = 1; |
558 | } | 567 | } |
559 | 568 | ||
@@ -575,8 +584,6 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
575 | irqd_set(&desc->irq_data, IRQD_LEVEL); | 584 | irqd_set(&desc->irq_data, IRQD_LEVEL); |
576 | } | 585 | } |
577 | 586 | ||
578 | if (chip != desc->irq_data.chip) | ||
579 | irq_chip_set_defaults(desc->irq_data.chip); | ||
580 | ret = 0; | 587 | ret = 0; |
581 | break; | 588 | break; |
582 | default: | 589 | default: |
@@ -651,7 +658,7 @@ again: | |||
651 | * irq_wake_thread(). See the comment there which explains the | 658 | * irq_wake_thread(). See the comment there which explains the |
652 | * serialization. | 659 | * serialization. |
653 | */ | 660 | */ |
654 | if (unlikely(desc->istate & IRQS_INPROGRESS)) { | 661 | if (unlikely(irqd_irq_inprogress(&desc->irq_data))) { |
655 | raw_spin_unlock_irq(&desc->lock); | 662 | raw_spin_unlock_irq(&desc->lock); |
656 | chip_bus_sync_unlock(desc); | 663 | chip_bus_sync_unlock(desc); |
657 | cpu_relax(); | 664 | cpu_relax(); |
@@ -668,12 +675,10 @@ again: | |||
668 | 675 | ||
669 | desc->threads_oneshot &= ~action->thread_mask; | 676 | desc->threads_oneshot &= ~action->thread_mask; |
670 | 677 | ||
671 | if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) && | 678 | if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) && |
672 | (desc->istate & IRQS_MASKED)) { | 679 | irqd_irq_masked(&desc->irq_data)) |
673 | irq_compat_clr_masked(desc); | 680 | unmask_irq(desc); |
674 | desc->istate &= ~IRQS_MASKED; | 681 | |
675 | desc->irq_data.chip->irq_unmask(&desc->irq_data); | ||
676 | } | ||
677 | out_unlock: | 682 | out_unlock: |
678 | raw_spin_unlock_irq(&desc->lock); | 683 | raw_spin_unlock_irq(&desc->lock); |
679 | chip_bus_sync_unlock(desc); | 684 | chip_bus_sync_unlock(desc); |
@@ -767,7 +772,7 @@ static int irq_thread(void *data) | |||
767 | atomic_inc(&desc->threads_active); | 772 | atomic_inc(&desc->threads_active); |
768 | 773 | ||
769 | raw_spin_lock_irq(&desc->lock); | 774 | raw_spin_lock_irq(&desc->lock); |
770 | if (unlikely(desc->istate & IRQS_DISABLED)) { | 775 | if (unlikely(irqd_irq_disabled(&desc->irq_data))) { |
771 | /* | 776 | /* |
772 | * CHECKME: We might need a dedicated | 777 | * CHECKME: We might need a dedicated |
773 | * IRQ_THREAD_PENDING flag here, which | 778 | * IRQ_THREAD_PENDING flag here, which |
@@ -775,7 +780,6 @@ static int irq_thread(void *data) | |||
775 | * but AFAICT IRQS_PENDING should be fine as it | 780 | * but AFAICT IRQS_PENDING should be fine as it |
776 | * retriggers the interrupt itself --- tglx | 781 | * retriggers the interrupt itself --- tglx |
777 | */ | 782 | */ |
778 | irq_compat_set_pending(desc); | ||
779 | desc->istate |= IRQS_PENDING; | 783 | desc->istate |= IRQS_PENDING; |
780 | raw_spin_unlock_irq(&desc->lock); | 784 | raw_spin_unlock_irq(&desc->lock); |
781 | } else { | 785 | } else { |
@@ -971,8 +975,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
971 | new->thread_mask = 1 << ffz(thread_mask); | 975 | new->thread_mask = 1 << ffz(thread_mask); |
972 | 976 | ||
973 | if (!shared) { | 977 | if (!shared) { |
974 | irq_chip_set_defaults(desc->irq_data.chip); | ||
975 | |||
976 | init_waitqueue_head(&desc->wait_for_threads); | 978 | init_waitqueue_head(&desc->wait_for_threads); |
977 | 979 | ||
978 | /* Setup the type (level, edge polarity) if configured: */ | 980 | /* Setup the type (level, edge polarity) if configured: */ |
@@ -985,8 +987,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
985 | } | 987 | } |
986 | 988 | ||
987 | desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ | 989 | desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ |
988 | IRQS_INPROGRESS | IRQS_ONESHOT | \ | 990 | IRQS_ONESHOT | IRQS_WAITING); |
989 | IRQS_WAITING); | 991 | irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); |
990 | 992 | ||
991 | if (new->flags & IRQF_PERCPU) { | 993 | if (new->flags & IRQF_PERCPU) { |
992 | irqd_set(&desc->irq_data, IRQD_PER_CPU); | 994 | irqd_set(&desc->irq_data, IRQD_PER_CPU); |
@@ -1049,6 +1051,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
1049 | register_irq_proc(irq, desc); | 1051 | register_irq_proc(irq, desc); |
1050 | new->dir = NULL; | 1052 | new->dir = NULL; |
1051 | register_handler_proc(irq, new); | 1053 | register_handler_proc(irq, new); |
1054 | free_cpumask_var(mask); | ||
1052 | 1055 | ||
1053 | return 0; | 1056 | return 0; |
1054 | 1057 | ||
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index ec4806d4778b..47420908fba0 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -35,7 +35,7 @@ void irq_move_masked_irq(struct irq_data *idata) | |||
35 | * do the disable, re-program, enable sequence. | 35 | * do the disable, re-program, enable sequence. |
36 | * This is *not* particularly important for level triggered | 36 | * This is *not* particularly important for level triggered |
37 | * but in a edge trigger case, we might be setting rte | 37 | * but in a edge trigger case, we might be setting rte |
38 | * when an active trigger is comming in. This could | 38 | * when an active trigger is coming in. This could |
39 | * cause some ioapics to mal-function. | 39 | * cause some ioapics to mal-function. |
40 | * Being paranoid i guess! | 40 | * Being paranoid i guess! |
41 | * | 41 | * |
@@ -53,20 +53,14 @@ void irq_move_masked_irq(struct irq_data *idata) | |||
53 | cpumask_clear(desc->pending_mask); | 53 | cpumask_clear(desc->pending_mask); |
54 | } | 54 | } |
55 | 55 | ||
56 | void move_masked_irq(int irq) | ||
57 | { | ||
58 | irq_move_masked_irq(irq_get_irq_data(irq)); | ||
59 | } | ||
60 | |||
61 | void irq_move_irq(struct irq_data *idata) | 56 | void irq_move_irq(struct irq_data *idata) |
62 | { | 57 | { |
63 | struct irq_desc *desc = irq_data_to_desc(idata); | ||
64 | bool masked; | 58 | bool masked; |
65 | 59 | ||
66 | if (likely(!irqd_is_setaffinity_pending(idata))) | 60 | if (likely(!irqd_is_setaffinity_pending(idata))) |
67 | return; | 61 | return; |
68 | 62 | ||
69 | if (unlikely(desc->istate & IRQS_DISABLED)) | 63 | if (unlikely(irqd_irq_disabled(idata))) |
70 | return; | 64 | return; |
71 | 65 | ||
72 | /* | 66 | /* |
@@ -74,15 +68,10 @@ void irq_move_irq(struct irq_data *idata) | |||
74 | * threaded interrupt with ONESHOT set, we can end up with an | 68 | * threaded interrupt with ONESHOT set, we can end up with an |
75 | * interrupt storm. | 69 | * interrupt storm. |
76 | */ | 70 | */ |
77 | masked = desc->istate & IRQS_MASKED; | 71 | masked = irqd_irq_masked(idata); |
78 | if (!masked) | 72 | if (!masked) |
79 | idata->chip->irq_mask(idata); | 73 | idata->chip->irq_mask(idata); |
80 | irq_move_masked_irq(idata); | 74 | irq_move_masked_irq(idata); |
81 | if (!masked) | 75 | if (!masked) |
82 | idata->chip->irq_unmask(idata); | 76 | idata->chip->irq_unmask(idata); |
83 | } | 77 | } |
84 | |||
85 | void move_native_irq(int irq) | ||
86 | { | ||
87 | irq_move_irq(irq_get_irq_data(irq)); | ||
88 | } | ||
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 760248de109d..dd201bd35103 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -364,6 +364,10 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec) | |||
364 | return 0; | 364 | return 0; |
365 | } | 365 | } |
366 | 366 | ||
367 | #ifndef ACTUAL_NR_IRQS | ||
368 | # define ACTUAL_NR_IRQS nr_irqs | ||
369 | #endif | ||
370 | |||
367 | int show_interrupts(struct seq_file *p, void *v) | 371 | int show_interrupts(struct seq_file *p, void *v) |
368 | { | 372 | { |
369 | static int prec; | 373 | static int prec; |
@@ -373,10 +377,10 @@ int show_interrupts(struct seq_file *p, void *v) | |||
373 | struct irqaction *action; | 377 | struct irqaction *action; |
374 | struct irq_desc *desc; | 378 | struct irq_desc *desc; |
375 | 379 | ||
376 | if (i > nr_irqs) | 380 | if (i > ACTUAL_NR_IRQS) |
377 | return 0; | 381 | return 0; |
378 | 382 | ||
379 | if (i == nr_irqs) | 383 | if (i == ACTUAL_NR_IRQS) |
380 | return arch_show_interrupts(p, prec); | 384 | return arch_show_interrupts(p, prec); |
381 | 385 | ||
382 | /* print header and calculate the width of the first column */ | 386 | /* print header and calculate the width of the first column */ |
@@ -404,7 +408,20 @@ int show_interrupts(struct seq_file *p, void *v) | |||
404 | seq_printf(p, "%*d: ", prec, i); | 408 | seq_printf(p, "%*d: ", prec, i); |
405 | for_each_online_cpu(j) | 409 | for_each_online_cpu(j) |
406 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 410 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
407 | seq_printf(p, " %8s", desc->irq_data.chip->name); | 411 | |
412 | if (desc->irq_data.chip) { | ||
413 | if (desc->irq_data.chip->irq_print_chip) | ||
414 | desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); | ||
415 | else if (desc->irq_data.chip->name) | ||
416 | seq_printf(p, " %8s", desc->irq_data.chip->name); | ||
417 | else | ||
418 | seq_printf(p, " %8s", "-"); | ||
419 | } else { | ||
420 | seq_printf(p, " %8s", "None"); | ||
421 | } | ||
422 | #ifdef CONFIG_GENIRC_IRQ_SHOW_LEVEL | ||
423 | seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); | ||
424 | #endif | ||
408 | if (desc->name) | 425 | if (desc->name) |
409 | seq_printf(p, "-%-8s", desc->name); | 426 | seq_printf(p, "-%-8s", desc->name); |
410 | 427 | ||
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index ad683a99b1ec..14dd5761e8c9 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c | |||
@@ -65,7 +65,6 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) | |||
65 | if (desc->istate & IRQS_REPLAY) | 65 | if (desc->istate & IRQS_REPLAY) |
66 | return; | 66 | return; |
67 | if (desc->istate & IRQS_PENDING) { | 67 | if (desc->istate & IRQS_PENDING) { |
68 | irq_compat_clr_pending(desc); | ||
69 | desc->istate &= ~IRQS_PENDING; | 68 | desc->istate &= ~IRQS_PENDING; |
70 | desc->istate |= IRQS_REPLAY; | 69 | desc->istate |= IRQS_REPLAY; |
71 | 70 | ||
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h index 0227ad358272..0d91730b6330 100644 --- a/kernel/irq/settings.h +++ b/kernel/irq/settings.h | |||
@@ -15,17 +15,8 @@ enum { | |||
15 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | 15 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, |
16 | }; | 16 | }; |
17 | 17 | ||
18 | #define IRQ_INPROGRESS GOT_YOU_MORON | ||
19 | #define IRQ_REPLAY GOT_YOU_MORON | ||
20 | #define IRQ_WAITING GOT_YOU_MORON | ||
21 | #define IRQ_DISABLED GOT_YOU_MORON | ||
22 | #define IRQ_PENDING GOT_YOU_MORON | ||
23 | #define IRQ_MASKED GOT_YOU_MORON | ||
24 | #define IRQ_WAKEUP GOT_YOU_MORON | ||
25 | #define IRQ_MOVE_PENDING GOT_YOU_MORON | ||
26 | #define IRQ_PER_CPU GOT_YOU_MORON | 18 | #define IRQ_PER_CPU GOT_YOU_MORON |
27 | #define IRQ_NO_BALANCING GOT_YOU_MORON | 19 | #define IRQ_NO_BALANCING GOT_YOU_MORON |
28 | #define IRQ_AFFINITY_SET GOT_YOU_MORON | ||
29 | #define IRQ_LEVEL GOT_YOU_MORON | 20 | #define IRQ_LEVEL GOT_YOU_MORON |
30 | #define IRQ_NOPROBE GOT_YOU_MORON | 21 | #define IRQ_NOPROBE GOT_YOU_MORON |
31 | #define IRQ_NOREQUEST GOT_YOU_MORON | 22 | #define IRQ_NOREQUEST GOT_YOU_MORON |
@@ -37,102 +28,98 @@ enum { | |||
37 | static inline void | 28 | static inline void |
38 | irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) | 29 | irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) |
39 | { | 30 | { |
40 | desc->status &= ~(clr & _IRQF_MODIFY_MASK); | 31 | desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK); |
41 | desc->status |= (set & _IRQF_MODIFY_MASK); | 32 | desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); |
42 | } | 33 | } |
43 | 34 | ||
44 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) | 35 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) |
45 | { | 36 | { |
46 | return desc->status & _IRQ_PER_CPU; | 37 | return desc->status_use_accessors & _IRQ_PER_CPU; |
47 | } | 38 | } |
48 | 39 | ||
49 | static inline void irq_settings_set_per_cpu(struct irq_desc *desc) | 40 | static inline void irq_settings_set_per_cpu(struct irq_desc *desc) |
50 | { | 41 | { |
51 | desc->status |= _IRQ_PER_CPU; | 42 | desc->status_use_accessors |= _IRQ_PER_CPU; |
52 | } | 43 | } |
53 | 44 | ||
54 | static inline void irq_settings_set_no_balancing(struct irq_desc *desc) | 45 | static inline void irq_settings_set_no_balancing(struct irq_desc *desc) |
55 | { | 46 | { |
56 | desc->status |= _IRQ_NO_BALANCING; | 47 | desc->status_use_accessors |= _IRQ_NO_BALANCING; |
57 | } | 48 | } |
58 | 49 | ||
59 | static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc) | 50 | static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc) |
60 | { | 51 | { |
61 | return desc->status & _IRQ_NO_BALANCING; | 52 | return desc->status_use_accessors & _IRQ_NO_BALANCING; |
62 | } | 53 | } |
63 | 54 | ||
64 | static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc) | 55 | static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc) |
65 | { | 56 | { |
66 | return desc->status & IRQ_TYPE_SENSE_MASK; | 57 | return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK; |
67 | } | 58 | } |
68 | 59 | ||
69 | static inline void | 60 | static inline void |
70 | irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask) | 61 | irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask) |
71 | { | 62 | { |
72 | desc->status &= ~IRQ_TYPE_SENSE_MASK; | 63 | desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK; |
73 | desc->status |= mask & IRQ_TYPE_SENSE_MASK; | 64 | desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK; |
74 | } | 65 | } |
75 | 66 | ||
76 | static inline bool irq_settings_is_level(struct irq_desc *desc) | 67 | static inline bool irq_settings_is_level(struct irq_desc *desc) |
77 | { | 68 | { |
78 | return desc->status & _IRQ_LEVEL; | 69 | return desc->status_use_accessors & _IRQ_LEVEL; |
79 | } | 70 | } |
80 | 71 | ||
81 | static inline void irq_settings_clr_level(struct irq_desc *desc) | 72 | static inline void irq_settings_clr_level(struct irq_desc *desc) |
82 | { | 73 | { |
83 | desc->status &= ~_IRQ_LEVEL; | 74 | desc->status_use_accessors &= ~_IRQ_LEVEL; |
84 | } | 75 | } |
85 | 76 | ||
86 | static inline void irq_settings_set_level(struct irq_desc *desc) | 77 | static inline void irq_settings_set_level(struct irq_desc *desc) |
87 | { | 78 | { |
88 | desc->status |= _IRQ_LEVEL; | 79 | desc->status_use_accessors |= _IRQ_LEVEL; |
89 | } | 80 | } |
90 | 81 | ||
91 | static inline bool irq_settings_can_request(struct irq_desc *desc) | 82 | static inline bool irq_settings_can_request(struct irq_desc *desc) |
92 | { | 83 | { |
93 | return !(desc->status & _IRQ_NOREQUEST); | 84 | return !(desc->status_use_accessors & _IRQ_NOREQUEST); |
94 | } | 85 | } |
95 | 86 | ||
96 | static inline void irq_settings_clr_norequest(struct irq_desc *desc) | 87 | static inline void irq_settings_clr_norequest(struct irq_desc *desc) |
97 | { | 88 | { |
98 | desc->status &= ~_IRQ_NOREQUEST; | 89 | desc->status_use_accessors &= ~_IRQ_NOREQUEST; |
99 | } | 90 | } |
100 | 91 | ||
101 | static inline void irq_settings_set_norequest(struct irq_desc *desc) | 92 | static inline void irq_settings_set_norequest(struct irq_desc *desc) |
102 | { | 93 | { |
103 | desc->status |= _IRQ_NOREQUEST; | 94 | desc->status_use_accessors |= _IRQ_NOREQUEST; |
104 | } | 95 | } |
105 | 96 | ||
106 | static inline bool irq_settings_can_probe(struct irq_desc *desc) | 97 | static inline bool irq_settings_can_probe(struct irq_desc *desc) |
107 | { | 98 | { |
108 | return !(desc->status & _IRQ_NOPROBE); | 99 | return !(desc->status_use_accessors & _IRQ_NOPROBE); |
109 | } | 100 | } |
110 | 101 | ||
111 | static inline void irq_settings_clr_noprobe(struct irq_desc *desc) | 102 | static inline void irq_settings_clr_noprobe(struct irq_desc *desc) |
112 | { | 103 | { |
113 | desc->status &= ~_IRQ_NOPROBE; | 104 | desc->status_use_accessors &= ~_IRQ_NOPROBE; |
114 | } | 105 | } |
115 | 106 | ||
116 | static inline void irq_settings_set_noprobe(struct irq_desc *desc) | 107 | static inline void irq_settings_set_noprobe(struct irq_desc *desc) |
117 | { | 108 | { |
118 | desc->status |= _IRQ_NOPROBE; | 109 | desc->status_use_accessors |= _IRQ_NOPROBE; |
119 | } | 110 | } |
120 | 111 | ||
121 | static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) | 112 | static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) |
122 | { | 113 | { |
123 | return desc->status & _IRQ_MOVE_PCNTXT; | 114 | return desc->status_use_accessors & _IRQ_MOVE_PCNTXT; |
124 | } | 115 | } |
125 | 116 | ||
126 | static inline bool irq_settings_can_autoenable(struct irq_desc *desc) | 117 | static inline bool irq_settings_can_autoenable(struct irq_desc *desc) |
127 | { | 118 | { |
128 | return !(desc->status & _IRQ_NOAUTOEN); | 119 | return !(desc->status_use_accessors & _IRQ_NOAUTOEN); |
129 | } | 120 | } |
130 | 121 | ||
131 | static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) | 122 | static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) |
132 | { | 123 | { |
133 | return desc->status & _IRQ_NESTED_THREAD; | 124 | return desc->status_use_accessors & _IRQ_NESTED_THREAD; |
134 | } | 125 | } |
135 | |||
136 | /* Nothing should touch desc->status from now on */ | ||
137 | #undef status | ||
138 | #define status USE_THE_PROPER_WRAPPERS_YOU_MORON | ||
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index dd586ebf9c8c..dfbd550401b2 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c | |||
@@ -45,12 +45,12 @@ bool irq_wait_for_poll(struct irq_desc *desc) | |||
45 | #ifdef CONFIG_SMP | 45 | #ifdef CONFIG_SMP |
46 | do { | 46 | do { |
47 | raw_spin_unlock(&desc->lock); | 47 | raw_spin_unlock(&desc->lock); |
48 | while (desc->istate & IRQS_INPROGRESS) | 48 | while (irqd_irq_inprogress(&desc->irq_data)) |
49 | cpu_relax(); | 49 | cpu_relax(); |
50 | raw_spin_lock(&desc->lock); | 50 | raw_spin_lock(&desc->lock); |
51 | } while (desc->istate & IRQS_INPROGRESS); | 51 | } while (irqd_irq_inprogress(&desc->irq_data)); |
52 | /* Might have been disabled in meantime */ | 52 | /* Might have been disabled in meantime */ |
53 | return !(desc->istate & IRQS_DISABLED) && desc->action; | 53 | return !irqd_irq_disabled(&desc->irq_data) && desc->action; |
54 | #else | 54 | #else |
55 | return false; | 55 | return false; |
56 | #endif | 56 | #endif |
@@ -75,7 +75,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) | |||
75 | * Do not poll disabled interrupts unless the spurious | 75 | * Do not poll disabled interrupts unless the spurious |
76 | * disabled poller asks explicitely. | 76 | * disabled poller asks explicitely. |
77 | */ | 77 | */ |
78 | if ((desc->istate & IRQS_DISABLED) && !force) | 78 | if (irqd_irq_disabled(&desc->irq_data) && !force) |
79 | goto out; | 79 | goto out; |
80 | 80 | ||
81 | /* | 81 | /* |
@@ -88,12 +88,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) | |||
88 | goto out; | 88 | goto out; |
89 | 89 | ||
90 | /* Already running on another processor */ | 90 | /* Already running on another processor */ |
91 | if (desc->istate & IRQS_INPROGRESS) { | 91 | if (irqd_irq_inprogress(&desc->irq_data)) { |
92 | /* | 92 | /* |
93 | * Already running: If it is shared get the other | 93 | * Already running: If it is shared get the other |
94 | * CPU to go looking for our mystery interrupt too | 94 | * CPU to go looking for our mystery interrupt too |
95 | */ | 95 | */ |
96 | irq_compat_set_pending(desc); | ||
97 | desc->istate |= IRQS_PENDING; | 96 | desc->istate |= IRQS_PENDING; |
98 | goto out; | 97 | goto out; |
99 | } | 98 | } |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6f6d091b5757..079f1d39a8b8 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr) | |||
64 | if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || | 64 | if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || |
65 | arch_is_kernel_text(addr)) | 65 | arch_is_kernel_text(addr)) |
66 | return 1; | 66 | return 1; |
67 | return in_gate_area_no_task(addr); | 67 | return in_gate_area_no_mm(addr); |
68 | } | 68 | } |
69 | 69 | ||
70 | static inline int is_kernel(unsigned long addr) | 70 | static inline int is_kernel(unsigned long addr) |
71 | { | 71 | { |
72 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) | 72 | if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) |
73 | return 1; | 73 | return 1; |
74 | return in_gate_area_no_task(addr); | 74 | return in_gate_area_no_mm(addr); |
75 | } | 75 | } |
76 | 76 | ||
77 | static int is_ksym_addr(unsigned long addr) | 77 | static int is_ksym_addr(unsigned long addr) |
@@ -342,13 +342,15 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, | |||
342 | } | 342 | } |
343 | 343 | ||
344 | /* Look up a kernel symbol and return it in a text buffer. */ | 344 | /* Look up a kernel symbol and return it in a text buffer. */ |
345 | int sprint_symbol(char *buffer, unsigned long address) | 345 | static int __sprint_symbol(char *buffer, unsigned long address, |
346 | int symbol_offset) | ||
346 | { | 347 | { |
347 | char *modname; | 348 | char *modname; |
348 | const char *name; | 349 | const char *name; |
349 | unsigned long offset, size; | 350 | unsigned long offset, size; |
350 | int len; | 351 | int len; |
351 | 352 | ||
353 | address += symbol_offset; | ||
352 | name = kallsyms_lookup(address, &size, &offset, &modname, buffer); | 354 | name = kallsyms_lookup(address, &size, &offset, &modname, buffer); |
353 | if (!name) | 355 | if (!name) |
354 | return sprintf(buffer, "0x%lx", address); | 356 | return sprintf(buffer, "0x%lx", address); |
@@ -357,17 +359,53 @@ int sprint_symbol(char *buffer, unsigned long address) | |||
357 | strcpy(buffer, name); | 359 | strcpy(buffer, name); |
358 | len = strlen(buffer); | 360 | len = strlen(buffer); |
359 | buffer += len; | 361 | buffer += len; |
362 | offset -= symbol_offset; | ||
360 | 363 | ||
361 | if (modname) | 364 | if (modname) |
362 | len += sprintf(buffer, "+%#lx/%#lx [%s]", | 365 | len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname); |
363 | offset, size, modname); | ||
364 | else | 366 | else |
365 | len += sprintf(buffer, "+%#lx/%#lx", offset, size); | 367 | len += sprintf(buffer, "+%#lx/%#lx", offset, size); |
366 | 368 | ||
367 | return len; | 369 | return len; |
368 | } | 370 | } |
371 | |||
372 | /** | ||
373 | * sprint_symbol - Look up a kernel symbol and return it in a text buffer | ||
374 | * @buffer: buffer to be stored | ||
375 | * @address: address to lookup | ||
376 | * | ||
377 | * This function looks up a kernel symbol with @address and stores its name, | ||
378 | * offset, size and module name to @buffer if possible. If no symbol was found, | ||
379 | * just saves its @address as is. | ||
380 | * | ||
381 | * This function returns the number of bytes stored in @buffer. | ||
382 | */ | ||
383 | int sprint_symbol(char *buffer, unsigned long address) | ||
384 | { | ||
385 | return __sprint_symbol(buffer, address, 0); | ||
386 | } | ||
387 | |||
369 | EXPORT_SYMBOL_GPL(sprint_symbol); | 388 | EXPORT_SYMBOL_GPL(sprint_symbol); |
370 | 389 | ||
390 | /** | ||
391 | * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer | ||
392 | * @buffer: buffer to be stored | ||
393 | * @address: address to lookup | ||
394 | * | ||
395 | * This function is for stack backtrace and does the same thing as | ||
396 | * sprint_symbol() but with modified/decreased @address. If there is a | ||
397 | * tail-call to the function marked "noreturn", gcc optimized out code after | ||
398 | * the call so that the stack-saved return address could point outside of the | ||
399 | * caller. This function ensures that kallsyms will find the original caller | ||
400 | * by decreasing @address. | ||
401 | * | ||
402 | * This function returns the number of bytes stored in @buffer. | ||
403 | */ | ||
404 | int sprint_backtrace(char *buffer, unsigned long address) | ||
405 | { | ||
406 | return __sprint_symbol(buffer, address, -1); | ||
407 | } | ||
408 | |||
371 | /* Look up a kernel symbol and print it to the kernel messages. */ | 409 | /* Look up a kernel symbol and print it to the kernel messages. */ |
372 | void __print_symbol(const char *fmt, unsigned long address) | 410 | void __print_symbol(const char *fmt, unsigned long address) |
373 | { | 411 | { |
@@ -477,13 +515,11 @@ static int s_show(struct seq_file *m, void *p) | |||
477 | */ | 515 | */ |
478 | type = iter->exported ? toupper(iter->type) : | 516 | type = iter->exported ? toupper(iter->type) : |
479 | tolower(iter->type); | 517 | tolower(iter->type); |
480 | seq_printf(m, "%0*lx %c %s\t[%s]\n", | 518 | seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value, |
481 | (int)(2 * sizeof(void *)), | 519 | type, iter->name, iter->module_name); |
482 | iter->value, type, iter->name, iter->module_name); | ||
483 | } else | 520 | } else |
484 | seq_printf(m, "%0*lx %c %s\n", | 521 | seq_printf(m, "%pK %c %s\n", (void *)iter->value, |
485 | (int)(2 * sizeof(void *)), | 522 | iter->type, iter->name); |
486 | iter->value, iter->type, iter->name); | ||
487 | return 0; | 523 | return 0; |
488 | } | 524 | } |
489 | 525 | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index ec19b92c7ebd..87b77de03dd3 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/vmalloc.h> | 33 | #include <linux/vmalloc.h> |
34 | #include <linux/swap.h> | 34 | #include <linux/swap.h> |
35 | #include <linux/kmsg_dump.h> | 35 | #include <linux/kmsg_dump.h> |
36 | #include <linux/syscore_ops.h> | ||
36 | 37 | ||
37 | #include <asm/page.h> | 38 | #include <asm/page.h> |
38 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
@@ -144,7 +145,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, | |||
144 | /* Initialize the list of destination pages */ | 145 | /* Initialize the list of destination pages */ |
145 | INIT_LIST_HEAD(&image->dest_pages); | 146 | INIT_LIST_HEAD(&image->dest_pages); |
146 | 147 | ||
147 | /* Initialize the list of unuseable pages */ | 148 | /* Initialize the list of unusable pages */ |
148 | INIT_LIST_HEAD(&image->unuseable_pages); | 149 | INIT_LIST_HEAD(&image->unuseable_pages); |
149 | 150 | ||
150 | /* Read in the segments */ | 151 | /* Read in the segments */ |
@@ -454,7 +455,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image, | |||
454 | /* Deal with the destination pages I have inadvertently allocated. | 455 | /* Deal with the destination pages I have inadvertently allocated. |
455 | * | 456 | * |
456 | * Ideally I would convert multi-page allocations into single | 457 | * Ideally I would convert multi-page allocations into single |
457 | * page allocations, and add everyting to image->dest_pages. | 458 | * page allocations, and add everything to image->dest_pages. |
458 | * | 459 | * |
459 | * For now it is simpler to just free the pages. | 460 | * For now it is simpler to just free the pages. |
460 | */ | 461 | */ |
@@ -602,7 +603,7 @@ static void kimage_free_extra_pages(struct kimage *image) | |||
602 | /* Walk through and free any extra destination pages I may have */ | 603 | /* Walk through and free any extra destination pages I may have */ |
603 | kimage_free_page_list(&image->dest_pages); | 604 | kimage_free_page_list(&image->dest_pages); |
604 | 605 | ||
605 | /* Walk through and free any unuseable pages I have cached */ | 606 | /* Walk through and free any unusable pages I have cached */ |
606 | kimage_free_page_list(&image->unuseable_pages); | 607 | kimage_free_page_list(&image->unuseable_pages); |
607 | 608 | ||
608 | } | 609 | } |
@@ -1099,7 +1100,8 @@ size_t crash_get_memory_size(void) | |||
1099 | return size; | 1100 | return size; |
1100 | } | 1101 | } |
1101 | 1102 | ||
1102 | static void free_reserved_phys_range(unsigned long begin, unsigned long end) | 1103 | void __weak crash_free_reserved_phys_range(unsigned long begin, |
1104 | unsigned long end) | ||
1103 | { | 1105 | { |
1104 | unsigned long addr; | 1106 | unsigned long addr; |
1105 | 1107 | ||
@@ -1135,7 +1137,7 @@ int crash_shrink_memory(unsigned long new_size) | |||
1135 | start = roundup(start, PAGE_SIZE); | 1137 | start = roundup(start, PAGE_SIZE); |
1136 | end = roundup(start + new_size, PAGE_SIZE); | 1138 | end = roundup(start + new_size, PAGE_SIZE); |
1137 | 1139 | ||
1138 | free_reserved_phys_range(end, crashk_res.end); | 1140 | crash_free_reserved_phys_range(end, crashk_res.end); |
1139 | 1141 | ||
1140 | if ((start == end) && (crashk_res.parent != NULL)) | 1142 | if ((start == end) && (crashk_res.parent != NULL)) |
1141 | release_resource(&crashk_res); | 1143 | release_resource(&crashk_res); |
@@ -1531,6 +1533,11 @@ int kernel_kexec(void) | |||
1531 | local_irq_disable(); | 1533 | local_irq_disable(); |
1532 | /* Suspend system devices */ | 1534 | /* Suspend system devices */ |
1533 | error = sysdev_suspend(PMSG_FREEZE); | 1535 | error = sysdev_suspend(PMSG_FREEZE); |
1536 | if (!error) { | ||
1537 | error = syscore_suspend(); | ||
1538 | if (error) | ||
1539 | sysdev_resume(); | ||
1540 | } | ||
1534 | if (error) | 1541 | if (error) |
1535 | goto Enable_irqs; | 1542 | goto Enable_irqs; |
1536 | } else | 1543 | } else |
@@ -1545,6 +1552,7 @@ int kernel_kexec(void) | |||
1545 | 1552 | ||
1546 | #ifdef CONFIG_KEXEC_JUMP | 1553 | #ifdef CONFIG_KEXEC_JUMP |
1547 | if (kexec_image->preserve_context) { | 1554 | if (kexec_image->preserve_context) { |
1555 | syscore_resume(); | ||
1548 | sysdev_resume(); | 1556 | sysdev_resume(); |
1549 | Enable_irqs: | 1557 | Enable_irqs: |
1550 | local_irq_enable(); | 1558 | local_irq_enable(); |
diff --git a/kernel/kthread.c b/kernel/kthread.c index c55afba990a3..3b34d2732bce 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -27,6 +27,7 @@ struct kthread_create_info | |||
27 | /* Information passed to kthread() from kthreadd. */ | 27 | /* Information passed to kthread() from kthreadd. */ |
28 | int (*threadfn)(void *data); | 28 | int (*threadfn)(void *data); |
29 | void *data; | 29 | void *data; |
30 | int node; | ||
30 | 31 | ||
31 | /* Result passed back to kthread_create() from kthreadd. */ | 32 | /* Result passed back to kthread_create() from kthreadd. */ |
32 | struct task_struct *result; | 33 | struct task_struct *result; |
@@ -98,10 +99,23 @@ static int kthread(void *_create) | |||
98 | do_exit(ret); | 99 | do_exit(ret); |
99 | } | 100 | } |
100 | 101 | ||
102 | /* called from do_fork() to get node information for about to be created task */ | ||
103 | int tsk_fork_get_node(struct task_struct *tsk) | ||
104 | { | ||
105 | #ifdef CONFIG_NUMA | ||
106 | if (tsk == kthreadd_task) | ||
107 | return tsk->pref_node_fork; | ||
108 | #endif | ||
109 | return numa_node_id(); | ||
110 | } | ||
111 | |||
101 | static void create_kthread(struct kthread_create_info *create) | 112 | static void create_kthread(struct kthread_create_info *create) |
102 | { | 113 | { |
103 | int pid; | 114 | int pid; |
104 | 115 | ||
116 | #ifdef CONFIG_NUMA | ||
117 | current->pref_node_fork = create->node; | ||
118 | #endif | ||
105 | /* We want our own signal handler (we take no signals by default). */ | 119 | /* We want our own signal handler (we take no signals by default). */ |
106 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); | 120 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); |
107 | if (pid < 0) { | 121 | if (pid < 0) { |
@@ -111,33 +125,38 @@ static void create_kthread(struct kthread_create_info *create) | |||
111 | } | 125 | } |
112 | 126 | ||
113 | /** | 127 | /** |
114 | * kthread_create - create a kthread. | 128 | * kthread_create_on_node - create a kthread. |
115 | * @threadfn: the function to run until signal_pending(current). | 129 | * @threadfn: the function to run until signal_pending(current). |
116 | * @data: data ptr for @threadfn. | 130 | * @data: data ptr for @threadfn. |
131 | * @node: memory node number. | ||
117 | * @namefmt: printf-style name for the thread. | 132 | * @namefmt: printf-style name for the thread. |
118 | * | 133 | * |
119 | * Description: This helper function creates and names a kernel | 134 | * Description: This helper function creates and names a kernel |
120 | * thread. The thread will be stopped: use wake_up_process() to start | 135 | * thread. The thread will be stopped: use wake_up_process() to start |
121 | * it. See also kthread_run(). | 136 | * it. See also kthread_run(). |
122 | * | 137 | * |
138 | * If thread is going to be bound on a particular cpu, give its node | ||
139 | * in @node, to get NUMA affinity for kthread stack, or else give -1. | ||
123 | * When woken, the thread will run @threadfn() with @data as its | 140 | * When woken, the thread will run @threadfn() with @data as its |
124 | * argument. @threadfn() can either call do_exit() directly if it is a | 141 | * argument. @threadfn() can either call do_exit() directly if it is a |
125 | * standalone thread for which noone will call kthread_stop(), or | 142 | * standalone thread for which no one will call kthread_stop(), or |
126 | * return when 'kthread_should_stop()' is true (which means | 143 | * return when 'kthread_should_stop()' is true (which means |
127 | * kthread_stop() has been called). The return value should be zero | 144 | * kthread_stop() has been called). The return value should be zero |
128 | * or a negative error number; it will be passed to kthread_stop(). | 145 | * or a negative error number; it will be passed to kthread_stop(). |
129 | * | 146 | * |
130 | * Returns a task_struct or ERR_PTR(-ENOMEM). | 147 | * Returns a task_struct or ERR_PTR(-ENOMEM). |
131 | */ | 148 | */ |
132 | struct task_struct *kthread_create(int (*threadfn)(void *data), | 149 | struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), |
133 | void *data, | 150 | void *data, |
134 | const char namefmt[], | 151 | int node, |
135 | ...) | 152 | const char namefmt[], |
153 | ...) | ||
136 | { | 154 | { |
137 | struct kthread_create_info create; | 155 | struct kthread_create_info create; |
138 | 156 | ||
139 | create.threadfn = threadfn; | 157 | create.threadfn = threadfn; |
140 | create.data = data; | 158 | create.data = data; |
159 | create.node = node; | ||
141 | init_completion(&create.done); | 160 | init_completion(&create.done); |
142 | 161 | ||
143 | spin_lock(&kthread_create_lock); | 162 | spin_lock(&kthread_create_lock); |
@@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
164 | } | 183 | } |
165 | return create.result; | 184 | return create.result; |
166 | } | 185 | } |
167 | EXPORT_SYMBOL(kthread_create); | 186 | EXPORT_SYMBOL(kthread_create_on_node); |
168 | 187 | ||
169 | /** | 188 | /** |
170 | * kthread_bind - bind a just-created kthread to a cpu. | 189 | * kthread_bind - bind a just-created kthread to a cpu. |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index ee74b35e528d..376066e10413 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -153,7 +153,7 @@ static inline void store_stacktrace(struct task_struct *tsk, | |||
153 | } | 153 | } |
154 | 154 | ||
155 | /** | 155 | /** |
156 | * __account_scheduler_latency - record an occured latency | 156 | * __account_scheduler_latency - record an occurred latency |
157 | * @tsk - the task struct of the task hitting the latency | 157 | * @tsk - the task struct of the task hitting the latency |
158 | * @usecs - the duration of the latency in microseconds | 158 | * @usecs - the duration of the latency in microseconds |
159 | * @inter - 1 if the sleep was interruptible, 0 if uninterruptible | 159 | * @inter - 1 if the sleep was interruptible, 0 if uninterruptible |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 0d2058da80f5..53a68956f131 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -2309,7 +2309,7 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2309 | if (unlikely(curr->hardirqs_enabled)) { | 2309 | if (unlikely(curr->hardirqs_enabled)) { |
2310 | /* | 2310 | /* |
2311 | * Neither irq nor preemption are disabled here | 2311 | * Neither irq nor preemption are disabled here |
2312 | * so this is racy by nature but loosing one hit | 2312 | * so this is racy by nature but losing one hit |
2313 | * in a stat is not a big deal. | 2313 | * in a stat is not a big deal. |
2314 | */ | 2314 | */ |
2315 | __debug_atomic_inc(redundant_hardirqs_on); | 2315 | __debug_atomic_inc(redundant_hardirqs_on); |
@@ -2620,7 +2620,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
2620 | if (!graph_lock()) | 2620 | if (!graph_lock()) |
2621 | return 0; | 2621 | return 0; |
2622 | /* | 2622 | /* |
2623 | * Make sure we didnt race: | 2623 | * Make sure we didn't race: |
2624 | */ | 2624 | */ |
2625 | if (unlikely(hlock_class(this)->usage_mask & new_mask)) { | 2625 | if (unlikely(hlock_class(this)->usage_mask & new_mask)) { |
2626 | graph_unlock(); | 2626 | graph_unlock(); |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 1969d2fc4b36..71edd2f60c02 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -225,7 +225,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
225 | nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, | 225 | nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, |
226 | nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, | 226 | nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, |
227 | nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, | 227 | nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, |
228 | sum_forward_deps = 0, factor = 0; | 228 | sum_forward_deps = 0; |
229 | 229 | ||
230 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | 230 | list_for_each_entry(class, &all_lock_classes, lock_entry) { |
231 | 231 | ||
@@ -283,13 +283,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
283 | nr_hardirq_unsafe * nr_hardirq_safe + | 283 | nr_hardirq_unsafe * nr_hardirq_safe + |
284 | nr_list_entries); | 284 | nr_list_entries); |
285 | 285 | ||
286 | /* | ||
287 | * Estimated factor between direct and indirect | ||
288 | * dependencies: | ||
289 | */ | ||
290 | if (nr_list_entries) | ||
291 | factor = sum_forward_deps / nr_list_entries; | ||
292 | |||
293 | #ifdef CONFIG_PROVE_LOCKING | 286 | #ifdef CONFIG_PROVE_LOCKING |
294 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", | 287 | seq_printf(m, " dependency chains: %11lu [max: %lu]\n", |
295 | nr_lock_chains, MAX_LOCKDEP_CHAINS); | 288 | nr_lock_chains, MAX_LOCKDEP_CHAINS); |
diff --git a/kernel/module.c b/kernel/module.c index efa290ea94bf..d5938a5c19c4 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -809,7 +809,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
809 | wait_for_zero_refcount(mod); | 809 | wait_for_zero_refcount(mod); |
810 | 810 | ||
811 | mutex_unlock(&module_mutex); | 811 | mutex_unlock(&module_mutex); |
812 | /* Final destruction now noone is using it. */ | 812 | /* Final destruction now no one is using it. */ |
813 | if (mod->exit != NULL) | 813 | if (mod->exit != NULL) |
814 | mod->exit(); | 814 | mod->exit(); |
815 | blocking_notifier_call_chain(&module_notify_list, | 815 | blocking_notifier_call_chain(&module_notify_list, |
@@ -1168,7 +1168,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr, | |||
1168 | { | 1168 | { |
1169 | struct module_sect_attr *sattr = | 1169 | struct module_sect_attr *sattr = |
1170 | container_of(mattr, struct module_sect_attr, mattr); | 1170 | container_of(mattr, struct module_sect_attr, mattr); |
1171 | return sprintf(buf, "0x%lx\n", sattr->address); | 1171 | return sprintf(buf, "0x%pK\n", (void *)sattr->address); |
1172 | } | 1172 | } |
1173 | 1173 | ||
1174 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) | 1174 | static void free_sect_attrs(struct module_sect_attrs *sect_attrs) |
@@ -2777,7 +2777,7 @@ static struct module *load_module(void __user *umod, | |||
2777 | mod->state = MODULE_STATE_COMING; | 2777 | mod->state = MODULE_STATE_COMING; |
2778 | 2778 | ||
2779 | /* Now sew it into the lists so we can get lockdep and oops | 2779 | /* Now sew it into the lists so we can get lockdep and oops |
2780 | * info during argument parsing. Noone should access us, since | 2780 | * info during argument parsing. No one should access us, since |
2781 | * strong_try_module_get() will fail. | 2781 | * strong_try_module_get() will fail. |
2782 | * lockdep/oops can run asynchronous, so use the RCU list insertion | 2782 | * lockdep/oops can run asynchronous, so use the RCU list insertion |
2783 | * function to insert in a way safe to concurrent readers. | 2783 | * function to insert in a way safe to concurrent readers. |
@@ -2971,7 +2971,7 @@ static const char *get_ksymbol(struct module *mod, | |||
2971 | else | 2971 | else |
2972 | nextval = (unsigned long)mod->module_core+mod->core_text_size; | 2972 | nextval = (unsigned long)mod->module_core+mod->core_text_size; |
2973 | 2973 | ||
2974 | /* Scan for closest preceeding symbol, and next symbol. (ELF | 2974 | /* Scan for closest preceding symbol, and next symbol. (ELF |
2975 | starts real symbols at 1). */ | 2975 | starts real symbols at 1). */ |
2976 | for (i = 1; i < mod->num_symtab; i++) { | 2976 | for (i = 1; i < mod->num_symtab; i++) { |
2977 | if (mod->symtab[i].st_shndx == SHN_UNDEF) | 2977 | if (mod->symtab[i].st_shndx == SHN_UNDEF) |
@@ -3224,7 +3224,7 @@ static int m_show(struct seq_file *m, void *p) | |||
3224 | mod->state == MODULE_STATE_COMING ? "Loading": | 3224 | mod->state == MODULE_STATE_COMING ? "Loading": |
3225 | "Live"); | 3225 | "Live"); |
3226 | /* Used by oprofile and other similar tools. */ | 3226 | /* Used by oprofile and other similar tools. */ |
3227 | seq_printf(m, " 0x%p", mod->module_core); | 3227 | seq_printf(m, " 0x%pK", mod->module_core); |
3228 | 3228 | ||
3229 | /* Taints info */ | 3229 | /* Taints info */ |
3230 | if (mod->taints) | 3230 | if (mod->taints) |
diff --git a/kernel/mutex.c b/kernel/mutex.c index a5889fb28ecf..c4195fa98900 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -245,7 +245,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
245 | } | 245 | } |
246 | __set_task_state(task, state); | 246 | __set_task_state(task, state); |
247 | 247 | ||
248 | /* didnt get the lock, go to sleep: */ | 248 | /* didn't get the lock, go to sleep: */ |
249 | spin_unlock_mutex(&lock->wait_lock, flags); | 249 | spin_unlock_mutex(&lock->wait_lock, flags); |
250 | preempt_enable_no_resched(); | 250 | preempt_enable_no_resched(); |
251 | schedule(); | 251 | schedule(); |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f74e6c00e26d..a05d191ffdd9 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
@@ -69,13 +69,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
69 | goto out_ns; | 69 | goto out_ns; |
70 | } | 70 | } |
71 | 71 | ||
72 | new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); | 72 | new_nsp->uts_ns = copy_utsname(flags, tsk); |
73 | if (IS_ERR(new_nsp->uts_ns)) { | 73 | if (IS_ERR(new_nsp->uts_ns)) { |
74 | err = PTR_ERR(new_nsp->uts_ns); | 74 | err = PTR_ERR(new_nsp->uts_ns); |
75 | goto out_uts; | 75 | goto out_uts; |
76 | } | 76 | } |
77 | 77 | ||
78 | new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); | 78 | new_nsp->ipc_ns = copy_ipcs(flags, tsk); |
79 | if (IS_ERR(new_nsp->ipc_ns)) { | 79 | if (IS_ERR(new_nsp->ipc_ns)) { |
80 | err = PTR_ERR(new_nsp->ipc_ns); | 80 | err = PTR_ERR(new_nsp->ipc_ns); |
81 | goto out_ipc; | 81 | goto out_ipc; |
diff --git a/kernel/padata.c b/kernel/padata.c index 751019415d23..b91941df5e63 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
@@ -262,7 +262,7 @@ static void padata_reorder(struct parallel_data *pd) | |||
262 | /* | 262 | /* |
263 | * This cpu has to do the parallel processing of the next | 263 | * This cpu has to do the parallel processing of the next |
264 | * object. It's waiting in the cpu's parallelization queue, | 264 | * object. It's waiting in the cpu's parallelization queue, |
265 | * so exit imediately. | 265 | * so exit immediately. |
266 | */ | 266 | */ |
267 | if (PTR_ERR(padata) == -ENODATA) { | 267 | if (PTR_ERR(padata) == -ENODATA) { |
268 | del_timer(&pd->timer); | 268 | del_timer(&pd->timer); |
@@ -284,7 +284,7 @@ static void padata_reorder(struct parallel_data *pd) | |||
284 | /* | 284 | /* |
285 | * The next object that needs serialization might have arrived to | 285 | * The next object that needs serialization might have arrived to |
286 | * the reorder queues in the meantime, we will be called again | 286 | * the reorder queues in the meantime, we will be called again |
287 | * from the timer function if noone else cares for it. | 287 | * from the timer function if no one else cares for it. |
288 | */ | 288 | */ |
289 | if (atomic_read(&pd->reorder_objects) | 289 | if (atomic_read(&pd->reorder_objects) |
290 | && !(pinst->flags & PADATA_RESET)) | 290 | && !(pinst->flags & PADATA_RESET)) |
@@ -515,7 +515,7 @@ static void __padata_stop(struct padata_instance *pinst) | |||
515 | put_online_cpus(); | 515 | put_online_cpus(); |
516 | } | 516 | } |
517 | 517 | ||
518 | /* Replace the internal control stucture with a new one. */ | 518 | /* Replace the internal control structure with a new one. */ |
519 | static void padata_replace(struct padata_instance *pinst, | 519 | static void padata_replace(struct padata_instance *pinst, |
520 | struct parallel_data *pd_new) | 520 | struct parallel_data *pd_new) |
521 | { | 521 | { |
@@ -768,7 +768,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) | |||
768 | } | 768 | } |
769 | 769 | ||
770 | /** | 770 | /** |
771 | * padata_remove_cpu - remove a cpu from the one or both(serial and paralell) | 771 | * padata_remove_cpu - remove a cpu from the one or both(serial and parallel) |
772 | * padata cpumasks. | 772 | * padata cpumasks. |
773 | * | 773 | * |
774 | * @pinst: padata instance | 774 | * @pinst: padata instance |
diff --git a/kernel/panic.c b/kernel/panic.c index 991bb87a1704..69231670eb95 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -433,3 +433,13 @@ EXPORT_SYMBOL(__stack_chk_fail); | |||
433 | 433 | ||
434 | core_param(panic, panic_timeout, int, 0644); | 434 | core_param(panic, panic_timeout, int, 0644); |
435 | core_param(pause_on_oops, pause_on_oops, int, 0644); | 435 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
436 | |||
437 | static int __init oops_setup(char *s) | ||
438 | { | ||
439 | if (!s) | ||
440 | return -EINVAL; | ||
441 | if (!strcmp(s, "panic")) | ||
442 | panic_on_oops = 1; | ||
443 | return 0; | ||
444 | } | ||
445 | early_param("oops", oops_setup); | ||
diff --git a/kernel/params.c b/kernel/params.c index 0da1411222b9..7ab388a48a2e 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -95,7 +95,7 @@ static int parse_one(char *param, | |||
95 | /* Find parameter */ | 95 | /* Find parameter */ |
96 | for (i = 0; i < num_params; i++) { | 96 | for (i = 0; i < num_params; i++) { |
97 | if (parameq(param, params[i].name)) { | 97 | if (parameq(param, params[i].name)) { |
98 | /* Noone handled NULL, so do it here. */ | 98 | /* No one handled NULL, so do it here. */ |
99 | if (!val && params[i].ops->set != param_set_bool) | 99 | if (!val && params[i].ops->set != param_set_bool) |
100 | return -EINVAL; | 100 | return -EINVAL; |
101 | DEBUGP("They are equal! Calling %p\n", | 101 | DEBUGP("They are equal! Calling %p\n", |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3472bb1a070c..8e81a9860a0d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -145,7 +145,8 @@ static struct srcu_struct pmus_srcu; | |||
145 | */ | 145 | */ |
146 | int sysctl_perf_event_paranoid __read_mostly = 1; | 146 | int sysctl_perf_event_paranoid __read_mostly = 1; |
147 | 147 | ||
148 | int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ | 148 | /* Minimum for 512 kiB + 1 user control page */ |
149 | int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ | ||
149 | 150 | ||
150 | /* | 151 | /* |
151 | * max perf event sample rate | 152 | * max perf event sample rate |
@@ -363,6 +364,7 @@ void perf_cgroup_switch(struct task_struct *task, int mode) | |||
363 | } | 364 | } |
364 | 365 | ||
365 | if (mode & PERF_CGROUP_SWIN) { | 366 | if (mode & PERF_CGROUP_SWIN) { |
367 | WARN_ON_ONCE(cpuctx->cgrp); | ||
366 | /* set cgrp before ctxsw in to | 368 | /* set cgrp before ctxsw in to |
367 | * allow event_filter_match() to not | 369 | * allow event_filter_match() to not |
368 | * have to pass task around | 370 | * have to pass task around |
@@ -941,6 +943,7 @@ static void perf_group_attach(struct perf_event *event) | |||
941 | static void | 943 | static void |
942 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 944 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
943 | { | 945 | { |
946 | struct perf_cpu_context *cpuctx; | ||
944 | /* | 947 | /* |
945 | * We can have double detach due to exit/hot-unplug + close. | 948 | * We can have double detach due to exit/hot-unplug + close. |
946 | */ | 949 | */ |
@@ -949,8 +952,17 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) | |||
949 | 952 | ||
950 | event->attach_state &= ~PERF_ATTACH_CONTEXT; | 953 | event->attach_state &= ~PERF_ATTACH_CONTEXT; |
951 | 954 | ||
952 | if (is_cgroup_event(event)) | 955 | if (is_cgroup_event(event)) { |
953 | ctx->nr_cgroups--; | 956 | ctx->nr_cgroups--; |
957 | cpuctx = __get_cpu_context(ctx); | ||
958 | /* | ||
959 | * if there are no more cgroup events | ||
960 | * then cler cgrp to avoid stale pointer | ||
961 | * in update_cgrp_time_from_cpuctx() | ||
962 | */ | ||
963 | if (!ctx->nr_cgroups) | ||
964 | cpuctx->cgrp = NULL; | ||
965 | } | ||
954 | 966 | ||
955 | ctx->nr_events--; | 967 | ctx->nr_events--; |
956 | if (event->attr.inherit_stat) | 968 | if (event->attr.inherit_stat) |
@@ -2412,6 +2424,14 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) | |||
2412 | if (!ctx || !ctx->nr_events) | 2424 | if (!ctx || !ctx->nr_events) |
2413 | goto out; | 2425 | goto out; |
2414 | 2426 | ||
2427 | /* | ||
2428 | * We must ctxsw out cgroup events to avoid conflict | ||
2429 | * when invoking perf_task_event_sched_in() later on | ||
2430 | * in this function. Otherwise we end up trying to | ||
2431 | * ctxswin cgroup events which are already scheduled | ||
2432 | * in. | ||
2433 | */ | ||
2434 | perf_cgroup_sched_out(current); | ||
2415 | task_ctx_sched_out(ctx, EVENT_ALL); | 2435 | task_ctx_sched_out(ctx, EVENT_ALL); |
2416 | 2436 | ||
2417 | raw_spin_lock(&ctx->lock); | 2437 | raw_spin_lock(&ctx->lock); |
@@ -2436,6 +2456,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) | |||
2436 | 2456 | ||
2437 | raw_spin_unlock(&ctx->lock); | 2457 | raw_spin_unlock(&ctx->lock); |
2438 | 2458 | ||
2459 | /* | ||
2460 | * Also calls ctxswin for cgroup events, if any: | ||
2461 | */ | ||
2439 | perf_event_context_sched_in(ctx, ctx->task); | 2462 | perf_event_context_sched_in(ctx, ctx->task); |
2440 | out: | 2463 | out: |
2441 | local_irq_restore(flags); | 2464 | local_irq_restore(flags); |
@@ -6520,6 +6543,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6520 | goto err_alloc; | 6543 | goto err_alloc; |
6521 | } | 6544 | } |
6522 | 6545 | ||
6546 | if (task) { | ||
6547 | put_task_struct(task); | ||
6548 | task = NULL; | ||
6549 | } | ||
6550 | |||
6523 | /* | 6551 | /* |
6524 | * Look up the group leader (we will attach this event to it): | 6552 | * Look up the group leader (we will attach this event to it): |
6525 | */ | 6553 | */ |
diff --git a/kernel/pid.c b/kernel/pid.c index 02f221274265..57a8346a270e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns) | |||
217 | return -1; | 217 | return -1; |
218 | } | 218 | } |
219 | 219 | ||
220 | int next_pidmap(struct pid_namespace *pid_ns, int last) | 220 | int next_pidmap(struct pid_namespace *pid_ns, unsigned int last) |
221 | { | 221 | { |
222 | int offset; | 222 | int offset; |
223 | struct pidmap *map, *end; | 223 | struct pidmap *map, *end; |
224 | 224 | ||
225 | if (last >= PID_MAX_LIMIT) | ||
226 | return -1; | ||
227 | |||
225 | offset = (last + 1) & BITS_PER_PAGE_MASK; | 228 | offset = (last + 1) & BITS_PER_PAGE_MASK; |
226 | map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE]; | 229 | map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE]; |
227 | end = &pid_ns->pidmap[PIDMAP_ENTRIES]; | 230 | end = &pid_ns->pidmap[PIDMAP_ENTRIES]; |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a5aff94e1f0b..e9c9adc84ca6 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/err.h> | 14 | #include <linux/err.h> |
15 | #include <linux/acct.h> | 15 | #include <linux/acct.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/proc_fs.h> | ||
17 | 18 | ||
18 | #define BITS_PER_PAGE (PAGE_SIZE*8) | 19 | #define BITS_PER_PAGE (PAGE_SIZE*8) |
19 | 20 | ||
@@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
72 | { | 73 | { |
73 | struct pid_namespace *ns; | 74 | struct pid_namespace *ns; |
74 | unsigned int level = parent_pid_ns->level + 1; | 75 | unsigned int level = parent_pid_ns->level + 1; |
75 | int i; | 76 | int i, err = -ENOMEM; |
76 | 77 | ||
77 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); | 78 | ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); |
78 | if (ns == NULL) | 79 | if (ns == NULL) |
@@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p | |||
96 | for (i = 1; i < PIDMAP_ENTRIES; i++) | 97 | for (i = 1; i < PIDMAP_ENTRIES; i++) |
97 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); | 98 | atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); |
98 | 99 | ||
100 | err = pid_ns_prepare_proc(ns); | ||
101 | if (err) | ||
102 | goto out_put_parent_pid_ns; | ||
103 | |||
99 | return ns; | 104 | return ns; |
100 | 105 | ||
106 | out_put_parent_pid_ns: | ||
107 | put_pid_ns(parent_pid_ns); | ||
101 | out_free_map: | 108 | out_free_map: |
102 | kfree(ns->pidmap[0].page); | 109 | kfree(ns->pidmap[0].page); |
103 | out_free: | 110 | out_free: |
104 | kmem_cache_free(pid_ns_cachep, ns); | 111 | kmem_cache_free(pid_ns_cachep, ns); |
105 | out: | 112 | out: |
106 | return ERR_PTR(-ENOMEM); | 113 | return ERR_PTR(err); |
107 | } | 114 | } |
108 | 115 | ||
109 | static void destroy_pid_namespace(struct pid_namespace *ns) | 116 | static void destroy_pid_namespace(struct pid_namespace *ns) |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 67fea9d25d55..0791b13df7bf 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -1347,7 +1347,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |||
1347 | 1347 | ||
1348 | /* | 1348 | /* |
1349 | * Now that all the timers on our list have the firing flag, | 1349 | * Now that all the timers on our list have the firing flag, |
1350 | * noone will touch their list entries but us. We'll take | 1350 | * no one will touch their list entries but us. We'll take |
1351 | * each timer's lock before clearing its firing flag, so no | 1351 | * each timer's lock before clearing its firing flag, so no |
1352 | * timer call will interfere. | 1352 | * timer call will interfere. |
1353 | */ | 1353 | */ |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4c0124919f9a..e5498d7405c3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -313,7 +313,7 @@ static void schedule_next_timer(struct k_itimer *timr) | |||
313 | * restarted (i.e. we have flagged this in the sys_private entry of the | 313 | * restarted (i.e. we have flagged this in the sys_private entry of the |
314 | * info block). | 314 | * info block). |
315 | * | 315 | * |
316 | * To protect aginst the timer going away while the interrupt is queued, | 316 | * To protect against the timer going away while the interrupt is queued, |
317 | * we require that the it_requeue_pending flag be set. | 317 | * we require that the it_requeue_pending flag be set. |
318 | */ | 318 | */ |
319 | void do_schedule_next_timer(struct siginfo *info) | 319 | void do_schedule_next_timer(struct siginfo *info) |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 4603f08dc47b..6de9a8fc3417 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -18,9 +18,13 @@ config SUSPEND_FREEZER | |||
18 | 18 | ||
19 | Turning OFF this setting is NOT recommended! If in doubt, say Y. | 19 | Turning OFF this setting is NOT recommended! If in doubt, say Y. |
20 | 20 | ||
21 | config HIBERNATE_CALLBACKS | ||
22 | bool | ||
23 | |||
21 | config HIBERNATION | 24 | config HIBERNATION |
22 | bool "Hibernation (aka 'suspend to disk')" | 25 | bool "Hibernation (aka 'suspend to disk')" |
23 | depends on SWAP && ARCH_HIBERNATION_POSSIBLE | 26 | depends on SWAP && ARCH_HIBERNATION_POSSIBLE |
27 | select HIBERNATE_CALLBACKS | ||
24 | select LZO_COMPRESS | 28 | select LZO_COMPRESS |
25 | select LZO_DECOMPRESS | 29 | select LZO_DECOMPRESS |
26 | ---help--- | 30 | ---help--- |
@@ -85,7 +89,7 @@ config PM_STD_PARTITION | |||
85 | 89 | ||
86 | config PM_SLEEP | 90 | config PM_SLEEP |
87 | def_bool y | 91 | def_bool y |
88 | depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE | 92 | depends on SUSPEND || HIBERNATE_CALLBACKS |
89 | 93 | ||
90 | config PM_SLEEP_SMP | 94 | config PM_SLEEP_SMP |
91 | def_bool y | 95 | def_bool y |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index c350e18b53e3..c5ebc6a90643 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -1,4 +1,5 @@ | |||
1 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | 1 | |
2 | ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG | ||
2 | 3 | ||
3 | obj-$(CONFIG_PM) += main.o | 4 | obj-$(CONFIG_PM) += main.o |
4 | obj-$(CONFIG_PM_SLEEP) += console.o | 5 | obj-$(CONFIG_PM_SLEEP) += console.o |
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c index 83bbc7c02df9..d09dd10c5a5e 100644 --- a/kernel/power/block_io.c +++ b/kernel/power/block_io.c | |||
@@ -28,7 +28,7 @@ | |||
28 | static int submit(int rw, struct block_device *bdev, sector_t sector, | 28 | static int submit(int rw, struct block_device *bdev, sector_t sector, |
29 | struct page *page, struct bio **bio_chain) | 29 | struct page *page, struct bio **bio_chain) |
30 | { | 30 | { |
31 | const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG; | 31 | const int bio_rw = rw | REQ_SYNC; |
32 | struct bio *bio; | 32 | struct bio *bio; |
33 | 33 | ||
34 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); | 34 | bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index aeabd26e3342..50aae660174d 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -273,8 +273,11 @@ static int create_image(int platform_mode) | |||
273 | local_irq_disable(); | 273 | local_irq_disable(); |
274 | 274 | ||
275 | error = sysdev_suspend(PMSG_FREEZE); | 275 | error = sysdev_suspend(PMSG_FREEZE); |
276 | if (!error) | 276 | if (!error) { |
277 | error = syscore_suspend(); | 277 | error = syscore_suspend(); |
278 | if (error) | ||
279 | sysdev_resume(); | ||
280 | } | ||
278 | if (error) { | 281 | if (error) { |
279 | printk(KERN_ERR "PM: Some system devices failed to power down, " | 282 | printk(KERN_ERR "PM: Some system devices failed to power down, " |
280 | "aborting hibernation\n"); | 283 | "aborting hibernation\n"); |
@@ -407,8 +410,11 @@ static int resume_target_kernel(bool platform_mode) | |||
407 | local_irq_disable(); | 410 | local_irq_disable(); |
408 | 411 | ||
409 | error = sysdev_suspend(PMSG_QUIESCE); | 412 | error = sysdev_suspend(PMSG_QUIESCE); |
410 | if (!error) | 413 | if (!error) { |
411 | error = syscore_suspend(); | 414 | error = syscore_suspend(); |
415 | if (error) | ||
416 | sysdev_resume(); | ||
417 | } | ||
412 | if (error) | 418 | if (error) |
413 | goto Enable_irqs; | 419 | goto Enable_irqs; |
414 | 420 | ||
diff --git a/kernel/power/main.c b/kernel/power/main.c index 8eaba5f27b10..de9aef8742f4 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c | |||
@@ -224,7 +224,7 @@ power_attr(state); | |||
224 | * writing to 'state'. It first should read from 'wakeup_count' and store | 224 | * writing to 'state'. It first should read from 'wakeup_count' and store |
225 | * the read value. Then, after carrying out its own preparations for the system | 225 | * the read value. Then, after carrying out its own preparations for the system |
226 | * transition to a sleep state, it should write the stored value to | 226 | * transition to a sleep state, it should write the stored value to |
227 | * 'wakeup_count'. If that fails, at least one wakeup event has occured since | 227 | * 'wakeup_count'. If that fails, at least one wakeup event has occurred since |
228 | * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it | 228 | * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it |
229 | * is allowed to write to 'state', but the transition will be aborted if there | 229 | * is allowed to write to 'state', but the transition will be aborted if there |
230 | * are any wakeup events detected after 'wakeup_count' was written to. | 230 | * are any wakeup events detected after 'wakeup_count' was written to. |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 2814c32aed51..8935369d503a 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -164,8 +164,11 @@ static int suspend_enter(suspend_state_t state) | |||
164 | BUG_ON(!irqs_disabled()); | 164 | BUG_ON(!irqs_disabled()); |
165 | 165 | ||
166 | error = sysdev_suspend(PMSG_SUSPEND); | 166 | error = sysdev_suspend(PMSG_SUSPEND); |
167 | if (!error) | 167 | if (!error) { |
168 | error = syscore_suspend(); | 168 | error = syscore_suspend(); |
169 | if (error) | ||
170 | sysdev_resume(); | ||
171 | } | ||
169 | if (!error) { | 172 | if (!error) { |
170 | if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { | 173 | if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { |
171 | error = suspend_ops->enter(state); | 174 | error = suspend_ops->enter(state); |
diff --git a/kernel/printk.c b/kernel/printk.c index 33284adb2189..da8ca817eae3 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -53,7 +53,7 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) | |||
53 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) | 53 | #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) |
54 | 54 | ||
55 | /* printk's without a loglevel use this.. */ | 55 | /* printk's without a loglevel use this.. */ |
56 | #define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ | 56 | #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL |
57 | 57 | ||
58 | /* We show everything that is MORE important than this.. */ | 58 | /* We show everything that is MORE important than this.. */ |
59 | #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ | 59 | #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ |
@@ -113,6 +113,11 @@ static unsigned con_start; /* Index into log_buf: next char to be sent to consol | |||
113 | static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ | 113 | static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * If exclusive_console is non-NULL then only this console is to be printed to. | ||
117 | */ | ||
118 | static struct console *exclusive_console; | ||
119 | |||
120 | /* | ||
116 | * Array of consoles built from command line options (console=) | 121 | * Array of consoles built from command line options (console=) |
117 | */ | 122 | */ |
118 | struct console_cmdline | 123 | struct console_cmdline |
@@ -476,6 +481,8 @@ static void __call_console_drivers(unsigned start, unsigned end) | |||
476 | struct console *con; | 481 | struct console *con; |
477 | 482 | ||
478 | for_each_console(con) { | 483 | for_each_console(con) { |
484 | if (exclusive_console && con != exclusive_console) | ||
485 | continue; | ||
479 | if ((con->flags & CON_ENABLED) && con->write && | 486 | if ((con->flags & CON_ENABLED) && con->write && |
480 | (cpu_online(smp_processor_id()) || | 487 | (cpu_online(smp_processor_id()) || |
481 | (con->flags & CON_ANYTIME))) | 488 | (con->flags & CON_ANYTIME))) |
@@ -1230,6 +1237,11 @@ void console_unlock(void) | |||
1230 | local_irq_restore(flags); | 1237 | local_irq_restore(flags); |
1231 | } | 1238 | } |
1232 | console_locked = 0; | 1239 | console_locked = 0; |
1240 | |||
1241 | /* Release the exclusive_console once it is used */ | ||
1242 | if (unlikely(exclusive_console)) | ||
1243 | exclusive_console = NULL; | ||
1244 | |||
1233 | up(&console_sem); | 1245 | up(&console_sem); |
1234 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1246 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1235 | if (wake_klogd) | 1247 | if (wake_klogd) |
@@ -1316,6 +1328,18 @@ void console_start(struct console *console) | |||
1316 | } | 1328 | } |
1317 | EXPORT_SYMBOL(console_start); | 1329 | EXPORT_SYMBOL(console_start); |
1318 | 1330 | ||
1331 | static int __read_mostly keep_bootcon; | ||
1332 | |||
1333 | static int __init keep_bootcon_setup(char *str) | ||
1334 | { | ||
1335 | keep_bootcon = 1; | ||
1336 | printk(KERN_INFO "debug: skip boot console de-registration.\n"); | ||
1337 | |||
1338 | return 0; | ||
1339 | } | ||
1340 | |||
1341 | early_param("keep_bootcon", keep_bootcon_setup); | ||
1342 | |||
1319 | /* | 1343 | /* |
1320 | * The console driver calls this routine during kernel initialization | 1344 | * The console driver calls this routine during kernel initialization |
1321 | * to register the console printing procedure with printk() and to | 1345 | * to register the console printing procedure with printk() and to |
@@ -1452,6 +1476,12 @@ void register_console(struct console *newcon) | |||
1452 | spin_lock_irqsave(&logbuf_lock, flags); | 1476 | spin_lock_irqsave(&logbuf_lock, flags); |
1453 | con_start = log_start; | 1477 | con_start = log_start; |
1454 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1478 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1479 | /* | ||
1480 | * We're about to replay the log buffer. Only do this to the | ||
1481 | * just-registered console to avoid excessive message spam to | ||
1482 | * the already-registered consoles. | ||
1483 | */ | ||
1484 | exclusive_console = newcon; | ||
1455 | } | 1485 | } |
1456 | console_unlock(); | 1486 | console_unlock(); |
1457 | console_sysfs_notify(); | 1487 | console_sysfs_notify(); |
@@ -1463,7 +1493,9 @@ void register_console(struct console *newcon) | |||
1463 | * users know there might be something in the kernel's log buffer that | 1493 | * users know there might be something in the kernel's log buffer that |
1464 | * went to the bootconsole (that they do not see on the real console) | 1494 | * went to the bootconsole (that they do not see on the real console) |
1465 | */ | 1495 | */ |
1466 | if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { | 1496 | if (bcon && |
1497 | ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && | ||
1498 | !keep_bootcon) { | ||
1467 | /* we need to iterate through twice, to make sure we print | 1499 | /* we need to iterate through twice, to make sure we print |
1468 | * everything out, before we unregister the console(s) | 1500 | * everything out, before we unregister the console(s) |
1469 | */ | 1501 | */ |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e2302e40b360..0fc1eed28d27 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -134,21 +134,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
134 | return 0; | 134 | return 0; |
135 | rcu_read_lock(); | 135 | rcu_read_lock(); |
136 | tcred = __task_cred(task); | 136 | tcred = __task_cred(task); |
137 | if ((cred->uid != tcred->euid || | 137 | if (cred->user->user_ns == tcred->user->user_ns && |
138 | cred->uid != tcred->suid || | 138 | (cred->uid == tcred->euid && |
139 | cred->uid != tcred->uid || | 139 | cred->uid == tcred->suid && |
140 | cred->gid != tcred->egid || | 140 | cred->uid == tcred->uid && |
141 | cred->gid != tcred->sgid || | 141 | cred->gid == tcred->egid && |
142 | cred->gid != tcred->gid) && | 142 | cred->gid == tcred->sgid && |
143 | !capable(CAP_SYS_PTRACE)) { | 143 | cred->gid == tcred->gid)) |
144 | rcu_read_unlock(); | 144 | goto ok; |
145 | return -EPERM; | 145 | if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) |
146 | } | 146 | goto ok; |
147 | rcu_read_unlock(); | ||
148 | return -EPERM; | ||
149 | ok: | ||
147 | rcu_read_unlock(); | 150 | rcu_read_unlock(); |
148 | smp_rmb(); | 151 | smp_rmb(); |
149 | if (task->mm) | 152 | if (task->mm) |
150 | dumpable = get_dumpable(task->mm); | 153 | dumpable = get_dumpable(task->mm); |
151 | if (!dumpable && !capable(CAP_SYS_PTRACE)) | 154 | if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE)) |
152 | return -EPERM; | 155 | return -EPERM; |
153 | 156 | ||
154 | return security_ptrace_access_check(task, mode); | 157 | return security_ptrace_access_check(task, mode); |
@@ -198,7 +201,7 @@ static int ptrace_attach(struct task_struct *task) | |||
198 | goto unlock_tasklist; | 201 | goto unlock_tasklist; |
199 | 202 | ||
200 | task->ptrace = PT_PTRACED; | 203 | task->ptrace = PT_PTRACED; |
201 | if (capable(CAP_SYS_PTRACE)) | 204 | if (task_ns_capable(task, CAP_SYS_PTRACE)) |
202 | task->ptrace |= PT_PTRACE_CAP; | 205 | task->ptrace |= PT_PTRACE_CAP; |
203 | 206 | ||
204 | __ptrace_link(task, current); | 207 | __ptrace_link(task, current); |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index c7eaa37a768b..34683efa2cce 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -126,10 +126,24 @@ ssize_t res_counter_read(struct res_counter *counter, int member, | |||
126 | pos, buf, s - buf); | 126 | pos, buf, s - buf); |
127 | } | 127 | } |
128 | 128 | ||
129 | #if BITS_PER_LONG == 32 | ||
130 | u64 res_counter_read_u64(struct res_counter *counter, int member) | ||
131 | { | ||
132 | unsigned long flags; | ||
133 | u64 ret; | ||
134 | |||
135 | spin_lock_irqsave(&counter->lock, flags); | ||
136 | ret = *res_counter_member(counter, member); | ||
137 | spin_unlock_irqrestore(&counter->lock, flags); | ||
138 | |||
139 | return ret; | ||
140 | } | ||
141 | #else | ||
129 | u64 res_counter_read_u64(struct res_counter *counter, int member) | 142 | u64 res_counter_read_u64(struct res_counter *counter, int member) |
130 | { | 143 | { |
131 | return *res_counter_member(counter, member); | 144 | return *res_counter_member(counter, member); |
132 | } | 145 | } |
146 | #endif | ||
133 | 147 | ||
134 | int res_counter_memparse_write_strategy(const char *buf, | 148 | int res_counter_memparse_write_strategy(const char *buf, |
135 | unsigned long long *res) | 149 | unsigned long long *res) |
diff --git a/kernel/sched.c b/kernel/sched.c index a172494a9a63..312f8b95c2d4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2309,7 +2309,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
2309 | * Cause a process which is running on another CPU to enter | 2309 | * Cause a process which is running on another CPU to enter |
2310 | * kernel-mode, without any delay. (to get signals handled.) | 2310 | * kernel-mode, without any delay. (to get signals handled.) |
2311 | * | 2311 | * |
2312 | * NOTE: this function doesnt have to take the runqueue lock, | 2312 | * NOTE: this function doesn't have to take the runqueue lock, |
2313 | * because all it wants to ensure is that the remote task enters | 2313 | * because all it wants to ensure is that the remote task enters |
2314 | * the kernel. If the IPI races and the task has been migrated | 2314 | * the kernel. If the IPI races and the task has been migrated |
2315 | * to another CPU then no harm is done and the purpose has been | 2315 | * to another CPU then no harm is done and the purpose has been |
@@ -4111,6 +4111,16 @@ need_resched: | |||
4111 | try_to_wake_up_local(to_wakeup); | 4111 | try_to_wake_up_local(to_wakeup); |
4112 | } | 4112 | } |
4113 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | 4113 | deactivate_task(rq, prev, DEQUEUE_SLEEP); |
4114 | |||
4115 | /* | ||
4116 | * If we are going to sleep and we have plugged IO queued, make | ||
4117 | * sure to submit it to avoid deadlocks. | ||
4118 | */ | ||
4119 | if (blk_needs_flush_plug(prev)) { | ||
4120 | raw_spin_unlock(&rq->lock); | ||
4121 | blk_schedule_flush_plug(prev); | ||
4122 | raw_spin_lock(&rq->lock); | ||
4123 | } | ||
4114 | } | 4124 | } |
4115 | switch_count = &prev->nvcsw; | 4125 | switch_count = &prev->nvcsw; |
4116 | } | 4126 | } |
@@ -4892,8 +4902,11 @@ static bool check_same_owner(struct task_struct *p) | |||
4892 | 4902 | ||
4893 | rcu_read_lock(); | 4903 | rcu_read_lock(); |
4894 | pcred = __task_cred(p); | 4904 | pcred = __task_cred(p); |
4895 | match = (cred->euid == pcred->euid || | 4905 | if (cred->user->user_ns == pcred->user->user_ns) |
4896 | cred->euid == pcred->uid); | 4906 | match = (cred->euid == pcred->euid || |
4907 | cred->euid == pcred->uid); | ||
4908 | else | ||
4909 | match = false; | ||
4897 | rcu_read_unlock(); | 4910 | rcu_read_unlock(); |
4898 | return match; | 4911 | return match; |
4899 | } | 4912 | } |
@@ -4984,7 +4997,7 @@ recheck: | |||
4984 | */ | 4997 | */ |
4985 | raw_spin_lock_irqsave(&p->pi_lock, flags); | 4998 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
4986 | /* | 4999 | /* |
4987 | * To be able to change p->policy safely, the apropriate | 5000 | * To be able to change p->policy safely, the appropriate |
4988 | * runqueue lock must be held. | 5001 | * runqueue lock must be held. |
4989 | */ | 5002 | */ |
4990 | rq = __task_rq_lock(p); | 5003 | rq = __task_rq_lock(p); |
@@ -4998,6 +5011,17 @@ recheck: | |||
4998 | return -EINVAL; | 5011 | return -EINVAL; |
4999 | } | 5012 | } |
5000 | 5013 | ||
5014 | /* | ||
5015 | * If not changing anything there's no need to proceed further: | ||
5016 | */ | ||
5017 | if (unlikely(policy == p->policy && (!rt_policy(policy) || | ||
5018 | param->sched_priority == p->rt_priority))) { | ||
5019 | |||
5020 | __task_rq_unlock(rq); | ||
5021 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5022 | return 0; | ||
5023 | } | ||
5024 | |||
5001 | #ifdef CONFIG_RT_GROUP_SCHED | 5025 | #ifdef CONFIG_RT_GROUP_SCHED |
5002 | if (user) { | 5026 | if (user) { |
5003 | /* | 5027 | /* |
@@ -5221,7 +5245,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
5221 | goto out_free_cpus_allowed; | 5245 | goto out_free_cpus_allowed; |
5222 | } | 5246 | } |
5223 | retval = -EPERM; | 5247 | retval = -EPERM; |
5224 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 5248 | if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE)) |
5225 | goto out_unlock; | 5249 | goto out_unlock; |
5226 | 5250 | ||
5227 | retval = security_task_setscheduler(p); | 5251 | retval = security_task_setscheduler(p); |
@@ -5460,6 +5484,8 @@ EXPORT_SYMBOL(yield); | |||
5460 | * yield_to - yield the current processor to another thread in | 5484 | * yield_to - yield the current processor to another thread in |
5461 | * your thread group, or accelerate that thread toward the | 5485 | * your thread group, or accelerate that thread toward the |
5462 | * processor it's on. | 5486 | * processor it's on. |
5487 | * @p: target task | ||
5488 | * @preempt: whether task preemption is allowed or not | ||
5463 | * | 5489 | * |
5464 | * It's the caller's job to ensure that the target task struct | 5490 | * It's the caller's job to ensure that the target task struct |
5465 | * can't go away on us before we can do any checks. | 5491 | * can't go away on us before we can do any checks. |
@@ -5525,6 +5551,7 @@ void __sched io_schedule(void) | |||
5525 | 5551 | ||
5526 | delayacct_blkio_start(); | 5552 | delayacct_blkio_start(); |
5527 | atomic_inc(&rq->nr_iowait); | 5553 | atomic_inc(&rq->nr_iowait); |
5554 | blk_flush_plug(current); | ||
5528 | current->in_iowait = 1; | 5555 | current->in_iowait = 1; |
5529 | schedule(); | 5556 | schedule(); |
5530 | current->in_iowait = 0; | 5557 | current->in_iowait = 0; |
@@ -5540,6 +5567,7 @@ long __sched io_schedule_timeout(long timeout) | |||
5540 | 5567 | ||
5541 | delayacct_blkio_start(); | 5568 | delayacct_blkio_start(); |
5542 | atomic_inc(&rq->nr_iowait); | 5569 | atomic_inc(&rq->nr_iowait); |
5570 | blk_flush_plug(current); | ||
5543 | current->in_iowait = 1; | 5571 | current->in_iowait = 1; |
5544 | ret = schedule_timeout(timeout); | 5572 | ret = schedule_timeout(timeout); |
5545 | current->in_iowait = 0; | 5573 | current->in_iowait = 0; |
@@ -5688,7 +5716,7 @@ void show_state_filter(unsigned long state_filter) | |||
5688 | do_each_thread(g, p) { | 5716 | do_each_thread(g, p) { |
5689 | /* | 5717 | /* |
5690 | * reset the NMI-timeout, listing all files on a slow | 5718 | * reset the NMI-timeout, listing all files on a slow |
5691 | * console might take alot of time: | 5719 | * console might take a lot of time: |
5692 | */ | 5720 | */ |
5693 | touch_nmi_watchdog(); | 5721 | touch_nmi_watchdog(); |
5694 | if (!state_filter || (p->state & state_filter)) | 5722 | if (!state_filter || (p->state & state_filter)) |
@@ -6303,6 +6331,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6303 | break; | 6331 | break; |
6304 | #endif | 6332 | #endif |
6305 | } | 6333 | } |
6334 | |||
6335 | update_max_interval(); | ||
6336 | |||
6306 | return NOTIFY_OK; | 6337 | return NOTIFY_OK; |
6307 | } | 6338 | } |
6308 | 6339 | ||
@@ -8434,7 +8465,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8434 | { | 8465 | { |
8435 | struct cfs_rq *cfs_rq; | 8466 | struct cfs_rq *cfs_rq; |
8436 | struct sched_entity *se; | 8467 | struct sched_entity *se; |
8437 | struct rq *rq; | ||
8438 | int i; | 8468 | int i; |
8439 | 8469 | ||
8440 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); | 8470 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); |
@@ -8447,8 +8477,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8447 | tg->shares = NICE_0_LOAD; | 8477 | tg->shares = NICE_0_LOAD; |
8448 | 8478 | ||
8449 | for_each_possible_cpu(i) { | 8479 | for_each_possible_cpu(i) { |
8450 | rq = cpu_rq(i); | ||
8451 | |||
8452 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 8480 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), |
8453 | GFP_KERNEL, cpu_to_node(i)); | 8481 | GFP_KERNEL, cpu_to_node(i)); |
8454 | if (!cfs_rq) | 8482 | if (!cfs_rq) |
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 5946ac515602..429242f3c484 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c | |||
@@ -179,7 +179,7 @@ void sched_autogroup_create_attach(struct task_struct *p) | |||
179 | struct autogroup *ag = autogroup_create(); | 179 | struct autogroup *ag = autogroup_create(); |
180 | 180 | ||
181 | autogroup_move_group(p, ag); | 181 | autogroup_move_group(p, ag); |
182 | /* drop extra refrence added by autogroup_create() */ | 182 | /* drop extra reference added by autogroup_create() */ |
183 | autogroup_kref_put(ag); | 183 | autogroup_kref_put(ag); |
184 | } | 184 | } |
185 | EXPORT_SYMBOL(sched_autogroup_create_attach); | 185 | EXPORT_SYMBOL(sched_autogroup_create_attach); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3f7ec9e27ee1..6fa833ab2cb8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
25 | #include <linux/cpumask.h> | ||
25 | 26 | ||
26 | /* | 27 | /* |
27 | * Targeted preemption latency for CPU-bound tasks: | 28 | * Targeted preemption latency for CPU-bound tasks: |
@@ -2103,21 +2104,20 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2103 | enum cpu_idle_type idle, int *all_pinned, | 2104 | enum cpu_idle_type idle, int *all_pinned, |
2104 | int *this_best_prio, struct cfs_rq *busiest_cfs_rq) | 2105 | int *this_best_prio, struct cfs_rq *busiest_cfs_rq) |
2105 | { | 2106 | { |
2106 | int loops = 0, pulled = 0, pinned = 0; | 2107 | int loops = 0, pulled = 0; |
2107 | long rem_load_move = max_load_move; | 2108 | long rem_load_move = max_load_move; |
2108 | struct task_struct *p, *n; | 2109 | struct task_struct *p, *n; |
2109 | 2110 | ||
2110 | if (max_load_move == 0) | 2111 | if (max_load_move == 0) |
2111 | goto out; | 2112 | goto out; |
2112 | 2113 | ||
2113 | pinned = 1; | ||
2114 | |||
2115 | list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) { | 2114 | list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) { |
2116 | if (loops++ > sysctl_sched_nr_migrate) | 2115 | if (loops++ > sysctl_sched_nr_migrate) |
2117 | break; | 2116 | break; |
2118 | 2117 | ||
2119 | if ((p->se.load.weight >> 1) > rem_load_move || | 2118 | if ((p->se.load.weight >> 1) > rem_load_move || |
2120 | !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) | 2119 | !can_migrate_task(p, busiest, this_cpu, sd, idle, |
2120 | all_pinned)) | ||
2121 | continue; | 2121 | continue; |
2122 | 2122 | ||
2123 | pull_task(busiest, p, this_rq, this_cpu); | 2123 | pull_task(busiest, p, this_rq, this_cpu); |
@@ -2152,9 +2152,6 @@ out: | |||
2152 | */ | 2152 | */ |
2153 | schedstat_add(sd, lb_gained[idle], pulled); | 2153 | schedstat_add(sd, lb_gained[idle], pulled); |
2154 | 2154 | ||
2155 | if (all_pinned) | ||
2156 | *all_pinned = pinned; | ||
2157 | |||
2158 | return max_load_move - rem_load_move; | 2155 | return max_load_move - rem_load_move; |
2159 | } | 2156 | } |
2160 | 2157 | ||
@@ -3061,7 +3058,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3061 | 3058 | ||
3062 | /* | 3059 | /* |
3063 | * if *imbalance is less than the average load per runnable task | 3060 | * if *imbalance is less than the average load per runnable task |
3064 | * there is no gaurantee that any tasks will be moved so we'll have | 3061 | * there is no guarantee that any tasks will be moved so we'll have |
3065 | * a think about bumping its value to force at least one task to be | 3062 | * a think about bumping its value to force at least one task to be |
3066 | * moved | 3063 | * moved |
3067 | */ | 3064 | */ |
@@ -3126,6 +3123,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3126 | if (!sds.busiest || sds.busiest_nr_running == 0) | 3123 | if (!sds.busiest || sds.busiest_nr_running == 0) |
3127 | goto out_balanced; | 3124 | goto out_balanced; |
3128 | 3125 | ||
3126 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; | ||
3127 | |||
3129 | /* | 3128 | /* |
3130 | * If the busiest group is imbalanced the below checks don't | 3129 | * If the busiest group is imbalanced the below checks don't |
3131 | * work because they assumes all things are equal, which typically | 3130 | * work because they assumes all things are equal, which typically |
@@ -3150,7 +3149,6 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3150 | * Don't pull any tasks if this group is already above the domain | 3149 | * Don't pull any tasks if this group is already above the domain |
3151 | * average load. | 3150 | * average load. |
3152 | */ | 3151 | */ |
3153 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; | ||
3154 | if (sds.this_load >= sds.avg_load) | 3152 | if (sds.this_load >= sds.avg_load) |
3155 | goto out_balanced; | 3153 | goto out_balanced; |
3156 | 3154 | ||
@@ -3339,6 +3337,7 @@ redo: | |||
3339 | * still unbalanced. ld_moved simply stays zero, so it is | 3337 | * still unbalanced. ld_moved simply stays zero, so it is |
3340 | * correctly treated as an imbalance. | 3338 | * correctly treated as an imbalance. |
3341 | */ | 3339 | */ |
3340 | all_pinned = 1; | ||
3342 | local_irq_save(flags); | 3341 | local_irq_save(flags); |
3343 | double_rq_lock(this_rq, busiest); | 3342 | double_rq_lock(this_rq, busiest); |
3344 | ld_moved = move_tasks(this_rq, this_cpu, busiest, | 3343 | ld_moved = move_tasks(this_rq, this_cpu, busiest, |
@@ -3819,6 +3818,17 @@ void select_nohz_load_balancer(int stop_tick) | |||
3819 | 3818 | ||
3820 | static DEFINE_SPINLOCK(balancing); | 3819 | static DEFINE_SPINLOCK(balancing); |
3821 | 3820 | ||
3821 | static unsigned long __read_mostly max_load_balance_interval = HZ/10; | ||
3822 | |||
3823 | /* | ||
3824 | * Scale the max load_balance interval with the number of CPUs in the system. | ||
3825 | * This trades load-balance latency on larger machines for less cross talk. | ||
3826 | */ | ||
3827 | static void update_max_interval(void) | ||
3828 | { | ||
3829 | max_load_balance_interval = HZ*num_online_cpus()/10; | ||
3830 | } | ||
3831 | |||
3822 | /* | 3832 | /* |
3823 | * It checks each scheduling domain to see if it is due to be balanced, | 3833 | * It checks each scheduling domain to see if it is due to be balanced, |
3824 | * and initiates a balancing operation if so. | 3834 | * and initiates a balancing operation if so. |
@@ -3848,10 +3858,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3848 | 3858 | ||
3849 | /* scale ms to jiffies */ | 3859 | /* scale ms to jiffies */ |
3850 | interval = msecs_to_jiffies(interval); | 3860 | interval = msecs_to_jiffies(interval); |
3851 | if (unlikely(!interval)) | 3861 | interval = clamp(interval, 1UL, max_load_balance_interval); |
3852 | interval = 1; | ||
3853 | if (interval > HZ*NR_CPUS/10) | ||
3854 | interval = HZ*NR_CPUS/10; | ||
3855 | 3862 | ||
3856 | need_serialize = sd->flags & SD_SERIALIZE; | 3863 | need_serialize = sd->flags & SD_SERIALIZE; |
3857 | 3864 | ||
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index c82f26c1b7c3..a776a6396427 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -94,6 +94,4 @@ static const struct sched_class idle_sched_class = { | |||
94 | 94 | ||
95 | .prio_changed = prio_changed_idle, | 95 | .prio_changed = prio_changed_idle, |
96 | .switched_to = switched_to_idle, | 96 | .switched_to = switched_to_idle, |
97 | |||
98 | /* no .task_new for idle tasks */ | ||
99 | }; | 97 | }; |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index db308cb08b75..e7cebdc65f82 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1378,7 +1378,7 @@ retry: | |||
1378 | task = pick_next_pushable_task(rq); | 1378 | task = pick_next_pushable_task(rq); |
1379 | if (task_cpu(next_task) == rq->cpu && task == next_task) { | 1379 | if (task_cpu(next_task) == rq->cpu && task == next_task) { |
1380 | /* | 1380 | /* |
1381 | * If we get here, the task hasnt moved at all, but | 1381 | * If we get here, the task hasn't moved at all, but |
1382 | * it has failed to push. We will not try again, | 1382 | * it has failed to push. We will not try again, |
1383 | * since the other cpus will pull from us when they | 1383 | * since the other cpus will pull from us when they |
1384 | * are ready. | 1384 | * are ready. |
@@ -1488,7 +1488,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
1488 | /* | 1488 | /* |
1489 | * We continue with the search, just in | 1489 | * We continue with the search, just in |
1490 | * case there's an even higher prio task | 1490 | * case there's an even higher prio task |
1491 | * in another runqueue. (low likelyhood | 1491 | * in another runqueue. (low likelihood |
1492 | * but possible) | 1492 | * but possible) |
1493 | */ | 1493 | */ |
1494 | } | 1494 | } |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 84ec9bcf82d9..1ba2bd40fdac 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -102,6 +102,4 @@ static const struct sched_class stop_sched_class = { | |||
102 | 102 | ||
103 | .prio_changed = prio_changed_stop, | 103 | .prio_changed = prio_changed_stop, |
104 | .switched_to = switched_to_stop, | 104 | .switched_to = switched_to_stop, |
105 | |||
106 | /* no .task_new for stop tasks */ | ||
107 | }; | 105 | }; |
diff --git a/kernel/signal.c b/kernel/signal.c index 4e3cff10fdce..7165af5f1b11 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -226,7 +226,7 @@ static inline void print_dropped_signal(int sig) | |||
226 | /* | 226 | /* |
227 | * allocate a new signal queue record | 227 | * allocate a new signal queue record |
228 | * - this may be called without locks if and only if t == current, otherwise an | 228 | * - this may be called without locks if and only if t == current, otherwise an |
229 | * appopriate lock must be held to stop the target task from exiting | 229 | * appropriate lock must be held to stop the target task from exiting |
230 | */ | 230 | */ |
231 | static struct sigqueue * | 231 | static struct sigqueue * |
232 | __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) | 232 | __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) |
@@ -375,15 +375,15 @@ int unhandled_signal(struct task_struct *tsk, int sig) | |||
375 | return !tracehook_consider_fatal_signal(tsk, sig); | 375 | return !tracehook_consider_fatal_signal(tsk, sig); |
376 | } | 376 | } |
377 | 377 | ||
378 | 378 | /* | |
379 | /* Notify the system that a driver wants to block all signals for this | 379 | * Notify the system that a driver wants to block all signals for this |
380 | * process, and wants to be notified if any signals at all were to be | 380 | * process, and wants to be notified if any signals at all were to be |
381 | * sent/acted upon. If the notifier routine returns non-zero, then the | 381 | * sent/acted upon. If the notifier routine returns non-zero, then the |
382 | * signal will be acted upon after all. If the notifier routine returns 0, | 382 | * signal will be acted upon after all. If the notifier routine returns 0, |
383 | * then then signal will be blocked. Only one block per process is | 383 | * then then signal will be blocked. Only one block per process is |
384 | * allowed. priv is a pointer to private data that the notifier routine | 384 | * allowed. priv is a pointer to private data that the notifier routine |
385 | * can use to determine if the signal should be blocked or not. */ | 385 | * can use to determine if the signal should be blocked or not. |
386 | 386 | */ | |
387 | void | 387 | void |
388 | block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask) | 388 | block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask) |
389 | { | 389 | { |
@@ -434,9 +434,10 @@ still_pending: | |||
434 | copy_siginfo(info, &first->info); | 434 | copy_siginfo(info, &first->info); |
435 | __sigqueue_free(first); | 435 | __sigqueue_free(first); |
436 | } else { | 436 | } else { |
437 | /* Ok, it wasn't in the queue. This must be | 437 | /* |
438 | a fast-pathed signal or we must have been | 438 | * Ok, it wasn't in the queue. This must be |
439 | out of queue space. So zero out the info. | 439 | * a fast-pathed signal or we must have been |
440 | * out of queue space. So zero out the info. | ||
440 | */ | 441 | */ |
441 | info->si_signo = sig; | 442 | info->si_signo = sig; |
442 | info->si_errno = 0; | 443 | info->si_errno = 0; |
@@ -468,7 +469,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, | |||
468 | } | 469 | } |
469 | 470 | ||
470 | /* | 471 | /* |
471 | * Dequeue a signal and return the element to the caller, which is | 472 | * Dequeue a signal and return the element to the caller, which is |
472 | * expected to free it. | 473 | * expected to free it. |
473 | * | 474 | * |
474 | * All callers have to hold the siglock. | 475 | * All callers have to hold the siglock. |
@@ -490,7 +491,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
490 | * itimers are process shared and we restart periodic | 491 | * itimers are process shared and we restart periodic |
491 | * itimers in the signal delivery path to prevent DoS | 492 | * itimers in the signal delivery path to prevent DoS |
492 | * attacks in the high resolution timer case. This is | 493 | * attacks in the high resolution timer case. This is |
493 | * compliant with the old way of self restarting | 494 | * compliant with the old way of self-restarting |
494 | * itimers, as the SIGALRM is a legacy signal and only | 495 | * itimers, as the SIGALRM is a legacy signal and only |
495 | * queued once. Changing the restart behaviour to | 496 | * queued once. Changing the restart behaviour to |
496 | * restart the timer in the signal dequeue path is | 497 | * restart the timer in the signal dequeue path is |
@@ -636,13 +637,33 @@ static inline bool si_fromuser(const struct siginfo *info) | |||
636 | } | 637 | } |
637 | 638 | ||
638 | /* | 639 | /* |
640 | * called with RCU read lock from check_kill_permission() | ||
641 | */ | ||
642 | static int kill_ok_by_cred(struct task_struct *t) | ||
643 | { | ||
644 | const struct cred *cred = current_cred(); | ||
645 | const struct cred *tcred = __task_cred(t); | ||
646 | |||
647 | if (cred->user->user_ns == tcred->user->user_ns && | ||
648 | (cred->euid == tcred->suid || | ||
649 | cred->euid == tcred->uid || | ||
650 | cred->uid == tcred->suid || | ||
651 | cred->uid == tcred->uid)) | ||
652 | return 1; | ||
653 | |||
654 | if (ns_capable(tcred->user->user_ns, CAP_KILL)) | ||
655 | return 1; | ||
656 | |||
657 | return 0; | ||
658 | } | ||
659 | |||
660 | /* | ||
639 | * Bad permissions for sending the signal | 661 | * Bad permissions for sending the signal |
640 | * - the caller must hold the RCU read lock | 662 | * - the caller must hold the RCU read lock |
641 | */ | 663 | */ |
642 | static int check_kill_permission(int sig, struct siginfo *info, | 664 | static int check_kill_permission(int sig, struct siginfo *info, |
643 | struct task_struct *t) | 665 | struct task_struct *t) |
644 | { | 666 | { |
645 | const struct cred *cred, *tcred; | ||
646 | struct pid *sid; | 667 | struct pid *sid; |
647 | int error; | 668 | int error; |
648 | 669 | ||
@@ -656,14 +677,8 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
656 | if (error) | 677 | if (error) |
657 | return error; | 678 | return error; |
658 | 679 | ||
659 | cred = current_cred(); | ||
660 | tcred = __task_cred(t); | ||
661 | if (!same_thread_group(current, t) && | 680 | if (!same_thread_group(current, t) && |
662 | (cred->euid ^ tcred->suid) && | 681 | !kill_ok_by_cred(t)) { |
663 | (cred->euid ^ tcred->uid) && | ||
664 | (cred->uid ^ tcred->suid) && | ||
665 | (cred->uid ^ tcred->uid) && | ||
666 | !capable(CAP_KILL)) { | ||
667 | switch (sig) { | 682 | switch (sig) { |
668 | case SIGCONT: | 683 | case SIGCONT: |
669 | sid = task_session(t); | 684 | sid = task_session(t); |
@@ -909,14 +924,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
909 | if (info == SEND_SIG_FORCED) | 924 | if (info == SEND_SIG_FORCED) |
910 | goto out_set; | 925 | goto out_set; |
911 | 926 | ||
912 | /* Real-time signals must be queued if sent by sigqueue, or | 927 | /* |
913 | some other real-time mechanism. It is implementation | 928 | * Real-time signals must be queued if sent by sigqueue, or |
914 | defined whether kill() does so. We attempt to do so, on | 929 | * some other real-time mechanism. It is implementation |
915 | the principle of least surprise, but since kill is not | 930 | * defined whether kill() does so. We attempt to do so, on |
916 | allowed to fail with EAGAIN when low on memory we just | 931 | * the principle of least surprise, but since kill is not |
917 | make sure at least one signal gets delivered and don't | 932 | * allowed to fail with EAGAIN when low on memory we just |
918 | pass on the info struct. */ | 933 | * make sure at least one signal gets delivered and don't |
919 | 934 | * pass on the info struct. | |
935 | */ | ||
920 | if (sig < SIGRTMIN) | 936 | if (sig < SIGRTMIN) |
921 | override_rlimit = (is_si_special(info) || info->si_code >= 0); | 937 | override_rlimit = (is_si_special(info) || info->si_code >= 0); |
922 | else | 938 | else |
@@ -1187,8 +1203,7 @@ retry: | |||
1187 | return error; | 1203 | return error; |
1188 | } | 1204 | } |
1189 | 1205 | ||
1190 | int | 1206 | int kill_proc_info(int sig, struct siginfo *info, pid_t pid) |
1191 | kill_proc_info(int sig, struct siginfo *info, pid_t pid) | ||
1192 | { | 1207 | { |
1193 | int error; | 1208 | int error; |
1194 | rcu_read_lock(); | 1209 | rcu_read_lock(); |
@@ -1285,8 +1300,7 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid) | |||
1285 | * These are for backward compatibility with the rest of the kernel source. | 1300 | * These are for backward compatibility with the rest of the kernel source. |
1286 | */ | 1301 | */ |
1287 | 1302 | ||
1288 | int | 1303 | int send_sig_info(int sig, struct siginfo *info, struct task_struct *p) |
1289 | send_sig_info(int sig, struct siginfo *info, struct task_struct *p) | ||
1290 | { | 1304 | { |
1291 | /* | 1305 | /* |
1292 | * Make sure legacy kernel users don't send in bad values | 1306 | * Make sure legacy kernel users don't send in bad values |
@@ -1354,7 +1368,7 @@ EXPORT_SYMBOL(kill_pid); | |||
1354 | * These functions support sending signals using preallocated sigqueue | 1368 | * These functions support sending signals using preallocated sigqueue |
1355 | * structures. This is needed "because realtime applications cannot | 1369 | * structures. This is needed "because realtime applications cannot |
1356 | * afford to lose notifications of asynchronous events, like timer | 1370 | * afford to lose notifications of asynchronous events, like timer |
1357 | * expirations or I/O completions". In the case of Posix Timers | 1371 | * expirations or I/O completions". In the case of POSIX Timers |
1358 | * we allocate the sigqueue structure from the timer_create. If this | 1372 | * we allocate the sigqueue structure from the timer_create. If this |
1359 | * allocation fails we are able to report the failure to the application | 1373 | * allocation fails we are able to report the failure to the application |
1360 | * with an EAGAIN error. | 1374 | * with an EAGAIN error. |
@@ -1539,7 +1553,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) | |||
1539 | info.si_signo = SIGCHLD; | 1553 | info.si_signo = SIGCHLD; |
1540 | info.si_errno = 0; | 1554 | info.si_errno = 0; |
1541 | /* | 1555 | /* |
1542 | * see comment in do_notify_parent() abot the following 3 lines | 1556 | * see comment in do_notify_parent() about the following 4 lines |
1543 | */ | 1557 | */ |
1544 | rcu_read_lock(); | 1558 | rcu_read_lock(); |
1545 | info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); | 1559 | info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns); |
@@ -1597,7 +1611,7 @@ static inline int may_ptrace_stop(void) | |||
1597 | } | 1611 | } |
1598 | 1612 | ||
1599 | /* | 1613 | /* |
1600 | * Return nonzero if there is a SIGKILL that should be waking us up. | 1614 | * Return non-zero if there is a SIGKILL that should be waking us up. |
1601 | * Called with the siglock held. | 1615 | * Called with the siglock held. |
1602 | */ | 1616 | */ |
1603 | static int sigkill_pending(struct task_struct *tsk) | 1617 | static int sigkill_pending(struct task_struct *tsk) |
@@ -1721,7 +1735,7 @@ void ptrace_notify(int exit_code) | |||
1721 | /* | 1735 | /* |
1722 | * This performs the stopping for SIGSTOP and other stop signals. | 1736 | * This performs the stopping for SIGSTOP and other stop signals. |
1723 | * We have to stop all threads in the thread group. | 1737 | * We have to stop all threads in the thread group. |
1724 | * Returns nonzero if we've actually stopped and released the siglock. | 1738 | * Returns non-zero if we've actually stopped and released the siglock. |
1725 | * Returns zero if we didn't stop and still hold the siglock. | 1739 | * Returns zero if we didn't stop and still hold the siglock. |
1726 | */ | 1740 | */ |
1727 | static int do_signal_stop(int signr) | 1741 | static int do_signal_stop(int signr) |
@@ -1809,10 +1823,12 @@ static int ptrace_signal(int signr, siginfo_t *info, | |||
1809 | 1823 | ||
1810 | current->exit_code = 0; | 1824 | current->exit_code = 0; |
1811 | 1825 | ||
1812 | /* Update the siginfo structure if the signal has | 1826 | /* |
1813 | changed. If the debugger wanted something | 1827 | * Update the siginfo structure if the signal has |
1814 | specific in the siginfo structure then it should | 1828 | * changed. If the debugger wanted something |
1815 | have updated *info via PTRACE_SETSIGINFO. */ | 1829 | * specific in the siginfo structure then it should |
1830 | * have updated *info via PTRACE_SETSIGINFO. | ||
1831 | */ | ||
1816 | if (signr != info->si_signo) { | 1832 | if (signr != info->si_signo) { |
1817 | info->si_signo = signr; | 1833 | info->si_signo = signr; |
1818 | info->si_errno = 0; | 1834 | info->si_errno = 0; |
@@ -1871,7 +1887,7 @@ relock: | |||
1871 | for (;;) { | 1887 | for (;;) { |
1872 | struct k_sigaction *ka; | 1888 | struct k_sigaction *ka; |
1873 | /* | 1889 | /* |
1874 | * Tracing can induce an artifical signal and choose sigaction. | 1890 | * Tracing can induce an artificial signal and choose sigaction. |
1875 | * The return value in @signr determines the default action, | 1891 | * The return value in @signr determines the default action, |
1876 | * but @info->si_signo is the signal number we will report. | 1892 | * but @info->si_signo is the signal number we will report. |
1877 | */ | 1893 | */ |
@@ -2020,7 +2036,8 @@ void exit_signals(struct task_struct *tsk) | |||
2020 | if (!signal_pending(tsk)) | 2036 | if (!signal_pending(tsk)) |
2021 | goto out; | 2037 | goto out; |
2022 | 2038 | ||
2023 | /* It could be that __group_complete_signal() choose us to | 2039 | /* |
2040 | * It could be that __group_complete_signal() choose us to | ||
2024 | * notify about group-wide signal. Another thread should be | 2041 | * notify about group-wide signal. Another thread should be |
2025 | * woken now to take the signal since we will not. | 2042 | * woken now to take the signal since we will not. |
2026 | */ | 2043 | */ |
@@ -2058,6 +2075,9 @@ EXPORT_SYMBOL(unblock_all_signals); | |||
2058 | * System call entry points. | 2075 | * System call entry points. |
2059 | */ | 2076 | */ |
2060 | 2077 | ||
2078 | /** | ||
2079 | * sys_restart_syscall - restart a system call | ||
2080 | */ | ||
2061 | SYSCALL_DEFINE0(restart_syscall) | 2081 | SYSCALL_DEFINE0(restart_syscall) |
2062 | { | 2082 | { |
2063 | struct restart_block *restart = ¤t_thread_info()->restart_block; | 2083 | struct restart_block *restart = ¤t_thread_info()->restart_block; |
@@ -2111,6 +2131,13 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) | |||
2111 | return error; | 2131 | return error; |
2112 | } | 2132 | } |
2113 | 2133 | ||
2134 | /** | ||
2135 | * sys_rt_sigprocmask - change the list of currently blocked signals | ||
2136 | * @how: whether to add, remove, or set signals | ||
2137 | * @set: stores pending signals | ||
2138 | * @oset: previous value of signal mask if non-null | ||
2139 | * @sigsetsize: size of sigset_t type | ||
2140 | */ | ||
2114 | SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, | 2141 | SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, |
2115 | sigset_t __user *, oset, size_t, sigsetsize) | 2142 | sigset_t __user *, oset, size_t, sigsetsize) |
2116 | { | 2143 | { |
@@ -2169,8 +2196,14 @@ long do_sigpending(void __user *set, unsigned long sigsetsize) | |||
2169 | 2196 | ||
2170 | out: | 2197 | out: |
2171 | return error; | 2198 | return error; |
2172 | } | 2199 | } |
2173 | 2200 | ||
2201 | /** | ||
2202 | * sys_rt_sigpending - examine a pending signal that has been raised | ||
2203 | * while blocked | ||
2204 | * @set: stores pending signals | ||
2205 | * @sigsetsize: size of sigset_t type or larger | ||
2206 | */ | ||
2174 | SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize) | 2207 | SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize) |
2175 | { | 2208 | { |
2176 | return do_sigpending(set, sigsetsize); | 2209 | return do_sigpending(set, sigsetsize); |
@@ -2219,9 +2252,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) | |||
2219 | err |= __put_user(from->si_trapno, &to->si_trapno); | 2252 | err |= __put_user(from->si_trapno, &to->si_trapno); |
2220 | #endif | 2253 | #endif |
2221 | #ifdef BUS_MCEERR_AO | 2254 | #ifdef BUS_MCEERR_AO |
2222 | /* | 2255 | /* |
2223 | * Other callers might not initialize the si_lsb field, | 2256 | * Other callers might not initialize the si_lsb field, |
2224 | * so check explicitely for the right codes here. | 2257 | * so check explicitly for the right codes here. |
2225 | */ | 2258 | */ |
2226 | if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) | 2259 | if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) |
2227 | err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); | 2260 | err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); |
@@ -2250,6 +2283,14 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) | |||
2250 | 2283 | ||
2251 | #endif | 2284 | #endif |
2252 | 2285 | ||
2286 | /** | ||
2287 | * sys_rt_sigtimedwait - synchronously wait for queued signals specified | ||
2288 | * in @uthese | ||
2289 | * @uthese: queued signals to wait for | ||
2290 | * @uinfo: if non-null, the signal's siginfo is returned here | ||
2291 | * @uts: upper bound on process time suspension | ||
2292 | * @sigsetsize: size of sigset_t type | ||
2293 | */ | ||
2253 | SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, | 2294 | SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, |
2254 | siginfo_t __user *, uinfo, const struct timespec __user *, uts, | 2295 | siginfo_t __user *, uinfo, const struct timespec __user *, uts, |
2255 | size_t, sigsetsize) | 2296 | size_t, sigsetsize) |
@@ -2266,7 +2307,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, | |||
2266 | 2307 | ||
2267 | if (copy_from_user(&these, uthese, sizeof(these))) | 2308 | if (copy_from_user(&these, uthese, sizeof(these))) |
2268 | return -EFAULT; | 2309 | return -EFAULT; |
2269 | 2310 | ||
2270 | /* | 2311 | /* |
2271 | * Invert the set of allowed signals to get those we | 2312 | * Invert the set of allowed signals to get those we |
2272 | * want to block. | 2313 | * want to block. |
@@ -2291,9 +2332,11 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, | |||
2291 | + (ts.tv_sec || ts.tv_nsec)); | 2332 | + (ts.tv_sec || ts.tv_nsec)); |
2292 | 2333 | ||
2293 | if (timeout) { | 2334 | if (timeout) { |
2294 | /* None ready -- temporarily unblock those we're | 2335 | /* |
2336 | * None ready -- temporarily unblock those we're | ||
2295 | * interested while we are sleeping in so that we'll | 2337 | * interested while we are sleeping in so that we'll |
2296 | * be awakened when they arrive. */ | 2338 | * be awakened when they arrive. |
2339 | */ | ||
2297 | current->real_blocked = current->blocked; | 2340 | current->real_blocked = current->blocked; |
2298 | sigandsets(¤t->blocked, ¤t->blocked, &these); | 2341 | sigandsets(¤t->blocked, ¤t->blocked, &these); |
2299 | recalc_sigpending(); | 2342 | recalc_sigpending(); |
@@ -2325,6 +2368,11 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, | |||
2325 | return ret; | 2368 | return ret; |
2326 | } | 2369 | } |
2327 | 2370 | ||
2371 | /** | ||
2372 | * sys_kill - send a signal to a process | ||
2373 | * @pid: the PID of the process | ||
2374 | * @sig: signal to be sent | ||
2375 | */ | ||
2328 | SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) | 2376 | SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) |
2329 | { | 2377 | { |
2330 | struct siginfo info; | 2378 | struct siginfo info; |
@@ -2400,7 +2448,11 @@ SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig) | |||
2400 | return do_tkill(tgid, pid, sig); | 2448 | return do_tkill(tgid, pid, sig); |
2401 | } | 2449 | } |
2402 | 2450 | ||
2403 | /* | 2451 | /** |
2452 | * sys_tkill - send signal to one specific task | ||
2453 | * @pid: the PID of the task | ||
2454 | * @sig: signal to be sent | ||
2455 | * | ||
2404 | * Send a signal to only one task, even if it's a CLONE_THREAD task. | 2456 | * Send a signal to only one task, even if it's a CLONE_THREAD task. |
2405 | */ | 2457 | */ |
2406 | SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) | 2458 | SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) |
@@ -2412,6 +2464,12 @@ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) | |||
2412 | return do_tkill(0, pid, sig); | 2464 | return do_tkill(0, pid, sig); |
2413 | } | 2465 | } |
2414 | 2466 | ||
2467 | /** | ||
2468 | * sys_rt_sigqueueinfo - send signal information to a signal | ||
2469 | * @pid: the PID of the thread | ||
2470 | * @sig: signal to be sent | ||
2471 | * @uinfo: signal info to be sent | ||
2472 | */ | ||
2415 | SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, | 2473 | SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, |
2416 | siginfo_t __user *, uinfo) | 2474 | siginfo_t __user *, uinfo) |
2417 | { | 2475 | { |
@@ -2421,9 +2479,13 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, | |||
2421 | return -EFAULT; | 2479 | return -EFAULT; |
2422 | 2480 | ||
2423 | /* Not even root can pretend to send signals from the kernel. | 2481 | /* Not even root can pretend to send signals from the kernel. |
2424 | Nor can they impersonate a kill(), which adds source info. */ | 2482 | * Nor can they impersonate a kill()/tgkill(), which adds source info. |
2425 | if (info.si_code >= 0) | 2483 | */ |
2484 | if (info.si_code >= 0 || info.si_code == SI_TKILL) { | ||
2485 | /* We used to allow any < 0 si_code */ | ||
2486 | WARN_ON_ONCE(info.si_code < 0); | ||
2426 | return -EPERM; | 2487 | return -EPERM; |
2488 | } | ||
2427 | info.si_signo = sig; | 2489 | info.si_signo = sig; |
2428 | 2490 | ||
2429 | /* POSIX.1b doesn't mention process groups. */ | 2491 | /* POSIX.1b doesn't mention process groups. */ |
@@ -2437,9 +2499,13 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) | |||
2437 | return -EINVAL; | 2499 | return -EINVAL; |
2438 | 2500 | ||
2439 | /* Not even root can pretend to send signals from the kernel. | 2501 | /* Not even root can pretend to send signals from the kernel. |
2440 | Nor can they impersonate a kill(), which adds source info. */ | 2502 | * Nor can they impersonate a kill()/tgkill(), which adds source info. |
2441 | if (info->si_code >= 0) | 2503 | */ |
2504 | if (info->si_code >= 0 || info->si_code == SI_TKILL) { | ||
2505 | /* We used to allow any < 0 si_code */ | ||
2506 | WARN_ON_ONCE(info->si_code < 0); | ||
2442 | return -EPERM; | 2507 | return -EPERM; |
2508 | } | ||
2443 | info->si_signo = sig; | 2509 | info->si_signo = sig; |
2444 | 2510 | ||
2445 | return do_send_specific(tgid, pid, sig, info); | 2511 | return do_send_specific(tgid, pid, sig, info); |
@@ -2531,12 +2597,11 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s | |||
2531 | 2597 | ||
2532 | error = -EINVAL; | 2598 | error = -EINVAL; |
2533 | /* | 2599 | /* |
2534 | * | 2600 | * Note - this code used to test ss_flags incorrectly: |
2535 | * Note - this code used to test ss_flags incorrectly | ||
2536 | * old code may have been written using ss_flags==0 | 2601 | * old code may have been written using ss_flags==0 |
2537 | * to mean ss_flags==SS_ONSTACK (as this was the only | 2602 | * to mean ss_flags==SS_ONSTACK (as this was the only |
2538 | * way that worked) - this fix preserves that older | 2603 | * way that worked) - this fix preserves that older |
2539 | * mechanism | 2604 | * mechanism. |
2540 | */ | 2605 | */ |
2541 | if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0) | 2606 | if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0) |
2542 | goto out; | 2607 | goto out; |
@@ -2570,6 +2635,10 @@ out: | |||
2570 | 2635 | ||
2571 | #ifdef __ARCH_WANT_SYS_SIGPENDING | 2636 | #ifdef __ARCH_WANT_SYS_SIGPENDING |
2572 | 2637 | ||
2638 | /** | ||
2639 | * sys_sigpending - examine pending signals | ||
2640 | * @set: where mask of pending signal is returned | ||
2641 | */ | ||
2573 | SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) | 2642 | SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) |
2574 | { | 2643 | { |
2575 | return do_sigpending(set, sizeof(*set)); | 2644 | return do_sigpending(set, sizeof(*set)); |
@@ -2578,8 +2647,15 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) | |||
2578 | #endif | 2647 | #endif |
2579 | 2648 | ||
2580 | #ifdef __ARCH_WANT_SYS_SIGPROCMASK | 2649 | #ifdef __ARCH_WANT_SYS_SIGPROCMASK |
2581 | /* Some platforms have their own version with special arguments others | 2650 | /** |
2582 | support only sys_rt_sigprocmask. */ | 2651 | * sys_sigprocmask - examine and change blocked signals |
2652 | * @how: whether to add, remove, or set signals | ||
2653 | * @set: signals to add or remove (if non-null) | ||
2654 | * @oset: previous value of signal mask if non-null | ||
2655 | * | ||
2656 | * Some platforms have their own version with special arguments; | ||
2657 | * others support only sys_rt_sigprocmask. | ||
2658 | */ | ||
2583 | 2659 | ||
2584 | SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, | 2660 | SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, |
2585 | old_sigset_t __user *, oset) | 2661 | old_sigset_t __user *, oset) |
@@ -2632,6 +2708,13 @@ out: | |||
2632 | #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ | 2708 | #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ |
2633 | 2709 | ||
2634 | #ifdef __ARCH_WANT_SYS_RT_SIGACTION | 2710 | #ifdef __ARCH_WANT_SYS_RT_SIGACTION |
2711 | /** | ||
2712 | * sys_rt_sigaction - alter an action taken by a process | ||
2713 | * @sig: signal to be sent | ||
2714 | * @act: new sigaction | ||
2715 | * @oact: used to save the previous sigaction | ||
2716 | * @sigsetsize: size of sigset_t type | ||
2717 | */ | ||
2635 | SYSCALL_DEFINE4(rt_sigaction, int, sig, | 2718 | SYSCALL_DEFINE4(rt_sigaction, int, sig, |
2636 | const struct sigaction __user *, act, | 2719 | const struct sigaction __user *, act, |
2637 | struct sigaction __user *, oact, | 2720 | struct sigaction __user *, oact, |
@@ -2718,6 +2801,12 @@ SYSCALL_DEFINE0(pause) | |||
2718 | #endif | 2801 | #endif |
2719 | 2802 | ||
2720 | #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND | 2803 | #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND |
2804 | /** | ||
2805 | * sys_rt_sigsuspend - replace the signal mask for a value with the | ||
2806 | * @unewset value until a signal is received | ||
2807 | * @unewset: new signal mask value | ||
2808 | * @sigsetsize: size of sigset_t type | ||
2809 | */ | ||
2721 | SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) | 2810 | SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) |
2722 | { | 2811 | { |
2723 | sigset_t newset; | 2812 | sigset_t newset; |
diff --git a/kernel/smp.c b/kernel/smp.c index 7cbd0f293df4..73a195193558 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -604,6 +604,87 @@ void ipi_call_unlock_irq(void) | |||
604 | } | 604 | } |
605 | #endif /* USE_GENERIC_SMP_HELPERS */ | 605 | #endif /* USE_GENERIC_SMP_HELPERS */ |
606 | 606 | ||
607 | /* Setup configured maximum number of CPUs to activate */ | ||
608 | unsigned int setup_max_cpus = NR_CPUS; | ||
609 | EXPORT_SYMBOL(setup_max_cpus); | ||
610 | |||
611 | |||
612 | /* | ||
613 | * Setup routine for controlling SMP activation | ||
614 | * | ||
615 | * Command-line option of "nosmp" or "maxcpus=0" will disable SMP | ||
616 | * activation entirely (the MPS table probe still happens, though). | ||
617 | * | ||
618 | * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer | ||
619 | * greater than 0, limits the maximum number of CPUs activated in | ||
620 | * SMP mode to <NUM>. | ||
621 | */ | ||
622 | |||
623 | void __weak arch_disable_smp_support(void) { } | ||
624 | |||
625 | static int __init nosmp(char *str) | ||
626 | { | ||
627 | setup_max_cpus = 0; | ||
628 | arch_disable_smp_support(); | ||
629 | |||
630 | return 0; | ||
631 | } | ||
632 | |||
633 | early_param("nosmp", nosmp); | ||
634 | |||
635 | /* this is hard limit */ | ||
636 | static int __init nrcpus(char *str) | ||
637 | { | ||
638 | int nr_cpus; | ||
639 | |||
640 | get_option(&str, &nr_cpus); | ||
641 | if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) | ||
642 | nr_cpu_ids = nr_cpus; | ||
643 | |||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | early_param("nr_cpus", nrcpus); | ||
648 | |||
649 | static int __init maxcpus(char *str) | ||
650 | { | ||
651 | get_option(&str, &setup_max_cpus); | ||
652 | if (setup_max_cpus == 0) | ||
653 | arch_disable_smp_support(); | ||
654 | |||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | early_param("maxcpus", maxcpus); | ||
659 | |||
660 | /* Setup number of possible processor ids */ | ||
661 | int nr_cpu_ids __read_mostly = NR_CPUS; | ||
662 | EXPORT_SYMBOL(nr_cpu_ids); | ||
663 | |||
664 | /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ | ||
665 | void __init setup_nr_cpu_ids(void) | ||
666 | { | ||
667 | nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; | ||
668 | } | ||
669 | |||
670 | /* Called by boot processor to activate the rest. */ | ||
671 | void __init smp_init(void) | ||
672 | { | ||
673 | unsigned int cpu; | ||
674 | |||
675 | /* FIXME: This should be done in userspace --RR */ | ||
676 | for_each_present_cpu(cpu) { | ||
677 | if (num_online_cpus() >= setup_max_cpus) | ||
678 | break; | ||
679 | if (!cpu_online(cpu)) | ||
680 | cpu_up(cpu); | ||
681 | } | ||
682 | |||
683 | /* Any cleanup work */ | ||
684 | printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); | ||
685 | smp_cpus_done(setup_max_cpus); | ||
686 | } | ||
687 | |||
607 | /* | 688 | /* |
608 | * Call a function on all processors. May be used during early boot while | 689 | * Call a function on all processors. May be used during early boot while |
609 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead | 690 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 56e5dec837f0..174f976c2874 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -567,7 +567,7 @@ static void __tasklet_hrtimer_trampoline(unsigned long data) | |||
567 | /** | 567 | /** |
568 | * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks | 568 | * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks |
569 | * @ttimer: tasklet_hrtimer which is initialized | 569 | * @ttimer: tasklet_hrtimer which is initialized |
570 | * @function: hrtimer callback funtion which gets called from softirq context | 570 | * @function: hrtimer callback function which gets called from softirq context |
571 | * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME) | 571 | * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME) |
572 | * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL) | 572 | * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL) |
573 | */ | 573 | */ |
@@ -845,7 +845,10 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, | |||
845 | switch (action) { | 845 | switch (action) { |
846 | case CPU_UP_PREPARE: | 846 | case CPU_UP_PREPARE: |
847 | case CPU_UP_PREPARE_FROZEN: | 847 | case CPU_UP_PREPARE_FROZEN: |
848 | p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); | 848 | p = kthread_create_on_node(run_ksoftirqd, |
849 | hcpu, | ||
850 | cpu_to_node(hotcpu), | ||
851 | "ksoftirqd/%d", hotcpu); | ||
849 | if (IS_ERR(p)) { | 852 | if (IS_ERR(p)) { |
850 | printk("ksoftirqd for %i failed\n", hotcpu); | 853 | printk("ksoftirqd for %i failed\n", hotcpu); |
851 | return notifier_from_errno(PTR_ERR(p)); | 854 | return notifier_from_errno(PTR_ERR(p)); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2df820b03beb..e3516b29076c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -301,8 +301,10 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
301 | case CPU_UP_PREPARE: | 301 | case CPU_UP_PREPARE: |
302 | BUG_ON(stopper->thread || stopper->enabled || | 302 | BUG_ON(stopper->thread || stopper->enabled || |
303 | !list_empty(&stopper->works)); | 303 | !list_empty(&stopper->works)); |
304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", | 304 | p = kthread_create_on_node(cpu_stopper_thread, |
305 | cpu); | 305 | stopper, |
306 | cpu_to_node(cpu), | ||
307 | "migration/%d", cpu); | ||
306 | if (IS_ERR(p)) | 308 | if (IS_ERR(p)) |
307 | return notifier_from_errno(PTR_ERR(p)); | 309 | return notifier_from_errno(PTR_ERR(p)); |
308 | get_task_struct(p); | 310 | get_task_struct(p); |
diff --git a/kernel/sys.c b/kernel/sys.c index 1ad48b3b9068..af468edf096a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -120,16 +120,33 @@ EXPORT_SYMBOL(cad_pid); | |||
120 | void (*pm_power_off_prepare)(void); | 120 | void (*pm_power_off_prepare)(void); |
121 | 121 | ||
122 | /* | 122 | /* |
123 | * Returns true if current's euid is same as p's uid or euid, | ||
124 | * or has CAP_SYS_NICE to p's user_ns. | ||
125 | * | ||
126 | * Called with rcu_read_lock, creds are safe | ||
127 | */ | ||
128 | static bool set_one_prio_perm(struct task_struct *p) | ||
129 | { | ||
130 | const struct cred *cred = current_cred(), *pcred = __task_cred(p); | ||
131 | |||
132 | if (pcred->user->user_ns == cred->user->user_ns && | ||
133 | (pcred->uid == cred->euid || | ||
134 | pcred->euid == cred->euid)) | ||
135 | return true; | ||
136 | if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE)) | ||
137 | return true; | ||
138 | return false; | ||
139 | } | ||
140 | |||
141 | /* | ||
123 | * set the priority of a task | 142 | * set the priority of a task |
124 | * - the caller must hold the RCU read lock | 143 | * - the caller must hold the RCU read lock |
125 | */ | 144 | */ |
126 | static int set_one_prio(struct task_struct *p, int niceval, int error) | 145 | static int set_one_prio(struct task_struct *p, int niceval, int error) |
127 | { | 146 | { |
128 | const struct cred *cred = current_cred(), *pcred = __task_cred(p); | ||
129 | int no_nice; | 147 | int no_nice; |
130 | 148 | ||
131 | if (pcred->uid != cred->euid && | 149 | if (!set_one_prio_perm(p)) { |
132 | pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) { | ||
133 | error = -EPERM; | 150 | error = -EPERM; |
134 | goto out; | 151 | goto out; |
135 | } | 152 | } |
@@ -506,7 +523,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) | |||
506 | if (rgid != (gid_t) -1) { | 523 | if (rgid != (gid_t) -1) { |
507 | if (old->gid == rgid || | 524 | if (old->gid == rgid || |
508 | old->egid == rgid || | 525 | old->egid == rgid || |
509 | capable(CAP_SETGID)) | 526 | nsown_capable(CAP_SETGID)) |
510 | new->gid = rgid; | 527 | new->gid = rgid; |
511 | else | 528 | else |
512 | goto error; | 529 | goto error; |
@@ -515,7 +532,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) | |||
515 | if (old->gid == egid || | 532 | if (old->gid == egid || |
516 | old->egid == egid || | 533 | old->egid == egid || |
517 | old->sgid == egid || | 534 | old->sgid == egid || |
518 | capable(CAP_SETGID)) | 535 | nsown_capable(CAP_SETGID)) |
519 | new->egid = egid; | 536 | new->egid = egid; |
520 | else | 537 | else |
521 | goto error; | 538 | goto error; |
@@ -550,7 +567,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid) | |||
550 | old = current_cred(); | 567 | old = current_cred(); |
551 | 568 | ||
552 | retval = -EPERM; | 569 | retval = -EPERM; |
553 | if (capable(CAP_SETGID)) | 570 | if (nsown_capable(CAP_SETGID)) |
554 | new->gid = new->egid = new->sgid = new->fsgid = gid; | 571 | new->gid = new->egid = new->sgid = new->fsgid = gid; |
555 | else if (gid == old->gid || gid == old->sgid) | 572 | else if (gid == old->gid || gid == old->sgid) |
556 | new->egid = new->fsgid = gid; | 573 | new->egid = new->fsgid = gid; |
@@ -617,7 +634,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) | |||
617 | new->uid = ruid; | 634 | new->uid = ruid; |
618 | if (old->uid != ruid && | 635 | if (old->uid != ruid && |
619 | old->euid != ruid && | 636 | old->euid != ruid && |
620 | !capable(CAP_SETUID)) | 637 | !nsown_capable(CAP_SETUID)) |
621 | goto error; | 638 | goto error; |
622 | } | 639 | } |
623 | 640 | ||
@@ -626,7 +643,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) | |||
626 | if (old->uid != euid && | 643 | if (old->uid != euid && |
627 | old->euid != euid && | 644 | old->euid != euid && |
628 | old->suid != euid && | 645 | old->suid != euid && |
629 | !capable(CAP_SETUID)) | 646 | !nsown_capable(CAP_SETUID)) |
630 | goto error; | 647 | goto error; |
631 | } | 648 | } |
632 | 649 | ||
@@ -674,7 +691,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) | |||
674 | old = current_cred(); | 691 | old = current_cred(); |
675 | 692 | ||
676 | retval = -EPERM; | 693 | retval = -EPERM; |
677 | if (capable(CAP_SETUID)) { | 694 | if (nsown_capable(CAP_SETUID)) { |
678 | new->suid = new->uid = uid; | 695 | new->suid = new->uid = uid; |
679 | if (uid != old->uid) { | 696 | if (uid != old->uid) { |
680 | retval = set_user(new); | 697 | retval = set_user(new); |
@@ -716,7 +733,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) | |||
716 | old = current_cred(); | 733 | old = current_cred(); |
717 | 734 | ||
718 | retval = -EPERM; | 735 | retval = -EPERM; |
719 | if (!capable(CAP_SETUID)) { | 736 | if (!nsown_capable(CAP_SETUID)) { |
720 | if (ruid != (uid_t) -1 && ruid != old->uid && | 737 | if (ruid != (uid_t) -1 && ruid != old->uid && |
721 | ruid != old->euid && ruid != old->suid) | 738 | ruid != old->euid && ruid != old->suid) |
722 | goto error; | 739 | goto error; |
@@ -780,7 +797,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) | |||
780 | old = current_cred(); | 797 | old = current_cred(); |
781 | 798 | ||
782 | retval = -EPERM; | 799 | retval = -EPERM; |
783 | if (!capable(CAP_SETGID)) { | 800 | if (!nsown_capable(CAP_SETGID)) { |
784 | if (rgid != (gid_t) -1 && rgid != old->gid && | 801 | if (rgid != (gid_t) -1 && rgid != old->gid && |
785 | rgid != old->egid && rgid != old->sgid) | 802 | rgid != old->egid && rgid != old->sgid) |
786 | goto error; | 803 | goto error; |
@@ -840,7 +857,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid) | |||
840 | 857 | ||
841 | if (uid == old->uid || uid == old->euid || | 858 | if (uid == old->uid || uid == old->euid || |
842 | uid == old->suid || uid == old->fsuid || | 859 | uid == old->suid || uid == old->fsuid || |
843 | capable(CAP_SETUID)) { | 860 | nsown_capable(CAP_SETUID)) { |
844 | if (uid != old_fsuid) { | 861 | if (uid != old_fsuid) { |
845 | new->fsuid = uid; | 862 | new->fsuid = uid; |
846 | if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) | 863 | if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) |
@@ -873,7 +890,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid) | |||
873 | 890 | ||
874 | if (gid == old->gid || gid == old->egid || | 891 | if (gid == old->gid || gid == old->egid || |
875 | gid == old->sgid || gid == old->fsgid || | 892 | gid == old->sgid || gid == old->fsgid || |
876 | capable(CAP_SETGID)) { | 893 | nsown_capable(CAP_SETGID)) { |
877 | if (gid != old_fsgid) { | 894 | if (gid != old_fsgid) { |
878 | new->fsgid = gid; | 895 | new->fsgid = gid; |
879 | goto change_okay; | 896 | goto change_okay; |
@@ -1181,8 +1198,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) | |||
1181 | int errno; | 1198 | int errno; |
1182 | char tmp[__NEW_UTS_LEN]; | 1199 | char tmp[__NEW_UTS_LEN]; |
1183 | 1200 | ||
1184 | if (!capable(CAP_SYS_ADMIN)) | 1201 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) |
1185 | return -EPERM; | 1202 | return -EPERM; |
1203 | |||
1186 | if (len < 0 || len > __NEW_UTS_LEN) | 1204 | if (len < 0 || len > __NEW_UTS_LEN) |
1187 | return -EINVAL; | 1205 | return -EINVAL; |
1188 | down_write(&uts_sem); | 1206 | down_write(&uts_sem); |
@@ -1230,7 +1248,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) | |||
1230 | int errno; | 1248 | int errno; |
1231 | char tmp[__NEW_UTS_LEN]; | 1249 | char tmp[__NEW_UTS_LEN]; |
1232 | 1250 | ||
1233 | if (!capable(CAP_SYS_ADMIN)) | 1251 | if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) |
1234 | return -EPERM; | 1252 | return -EPERM; |
1235 | if (len < 0 || len > __NEW_UTS_LEN) | 1253 | if (len < 0 || len > __NEW_UTS_LEN) |
1236 | return -EINVAL; | 1254 | return -EINVAL; |
@@ -1345,6 +1363,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, | |||
1345 | rlim = tsk->signal->rlim + resource; | 1363 | rlim = tsk->signal->rlim + resource; |
1346 | task_lock(tsk->group_leader); | 1364 | task_lock(tsk->group_leader); |
1347 | if (new_rlim) { | 1365 | if (new_rlim) { |
1366 | /* Keep the capable check against init_user_ns until | ||
1367 | cgroups can contain all limits */ | ||
1348 | if (new_rlim->rlim_max > rlim->rlim_max && | 1368 | if (new_rlim->rlim_max > rlim->rlim_max && |
1349 | !capable(CAP_SYS_RESOURCE)) | 1369 | !capable(CAP_SYS_RESOURCE)) |
1350 | retval = -EPERM; | 1370 | retval = -EPERM; |
@@ -1388,19 +1408,22 @@ static int check_prlimit_permission(struct task_struct *task) | |||
1388 | { | 1408 | { |
1389 | const struct cred *cred = current_cred(), *tcred; | 1409 | const struct cred *cred = current_cred(), *tcred; |
1390 | 1410 | ||
1391 | tcred = __task_cred(task); | 1411 | if (current == task) |
1392 | if (current != task && | 1412 | return 0; |
1393 | (cred->uid != tcred->euid || | ||
1394 | cred->uid != tcred->suid || | ||
1395 | cred->uid != tcred->uid || | ||
1396 | cred->gid != tcred->egid || | ||
1397 | cred->gid != tcred->sgid || | ||
1398 | cred->gid != tcred->gid) && | ||
1399 | !capable(CAP_SYS_RESOURCE)) { | ||
1400 | return -EPERM; | ||
1401 | } | ||
1402 | 1413 | ||
1403 | return 0; | 1414 | tcred = __task_cred(task); |
1415 | if (cred->user->user_ns == tcred->user->user_ns && | ||
1416 | (cred->uid == tcred->euid && | ||
1417 | cred->uid == tcred->suid && | ||
1418 | cred->uid == tcred->uid && | ||
1419 | cred->gid == tcred->egid && | ||
1420 | cred->gid == tcred->sgid && | ||
1421 | cred->gid == tcred->gid)) | ||
1422 | return 0; | ||
1423 | if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE)) | ||
1424 | return 0; | ||
1425 | |||
1426 | return -EPERM; | ||
1404 | } | 1427 | } |
1405 | 1428 | ||
1406 | SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, | 1429 | SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 40245d697602..c0bb32414b17 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -117,6 +117,7 @@ static int neg_one = -1; | |||
117 | static int zero; | 117 | static int zero; |
118 | static int __maybe_unused one = 1; | 118 | static int __maybe_unused one = 1; |
119 | static int __maybe_unused two = 2; | 119 | static int __maybe_unused two = 2; |
120 | static int __maybe_unused three = 3; | ||
120 | static unsigned long one_ul = 1; | 121 | static unsigned long one_ul = 1; |
121 | static int one_hundred = 100; | 122 | static int one_hundred = 100; |
122 | #ifdef CONFIG_PRINTK | 123 | #ifdef CONFIG_PRINTK |
@@ -169,6 +170,11 @@ static int proc_taint(struct ctl_table *table, int write, | |||
169 | void __user *buffer, size_t *lenp, loff_t *ppos); | 170 | void __user *buffer, size_t *lenp, loff_t *ppos); |
170 | #endif | 171 | #endif |
171 | 172 | ||
173 | #ifdef CONFIG_PRINTK | ||
174 | static int proc_dmesg_restrict(struct ctl_table *table, int write, | ||
175 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
176 | #endif | ||
177 | |||
172 | #ifdef CONFIG_MAGIC_SYSRQ | 178 | #ifdef CONFIG_MAGIC_SYSRQ |
173 | /* Note: sysrq code uses it's own private copy */ | 179 | /* Note: sysrq code uses it's own private copy */ |
174 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; | 180 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; |
@@ -706,7 +712,7 @@ static struct ctl_table kern_table[] = { | |||
706 | .data = &kptr_restrict, | 712 | .data = &kptr_restrict, |
707 | .maxlen = sizeof(int), | 713 | .maxlen = sizeof(int), |
708 | .mode = 0644, | 714 | .mode = 0644, |
709 | .proc_handler = proc_dointvec_minmax, | 715 | .proc_handler = proc_dmesg_restrict, |
710 | .extra1 = &zero, | 716 | .extra1 = &zero, |
711 | .extra2 = &two, | 717 | .extra2 = &two, |
712 | }, | 718 | }, |
@@ -971,14 +977,18 @@ static struct ctl_table vm_table[] = { | |||
971 | .data = &sysctl_overcommit_memory, | 977 | .data = &sysctl_overcommit_memory, |
972 | .maxlen = sizeof(sysctl_overcommit_memory), | 978 | .maxlen = sizeof(sysctl_overcommit_memory), |
973 | .mode = 0644, | 979 | .mode = 0644, |
974 | .proc_handler = proc_dointvec, | 980 | .proc_handler = proc_dointvec_minmax, |
981 | .extra1 = &zero, | ||
982 | .extra2 = &two, | ||
975 | }, | 983 | }, |
976 | { | 984 | { |
977 | .procname = "panic_on_oom", | 985 | .procname = "panic_on_oom", |
978 | .data = &sysctl_panic_on_oom, | 986 | .data = &sysctl_panic_on_oom, |
979 | .maxlen = sizeof(sysctl_panic_on_oom), | 987 | .maxlen = sizeof(sysctl_panic_on_oom), |
980 | .mode = 0644, | 988 | .mode = 0644, |
981 | .proc_handler = proc_dointvec, | 989 | .proc_handler = proc_dointvec_minmax, |
990 | .extra1 = &zero, | ||
991 | .extra2 = &two, | ||
982 | }, | 992 | }, |
983 | { | 993 | { |
984 | .procname = "oom_kill_allocating_task", | 994 | .procname = "oom_kill_allocating_task", |
@@ -1006,7 +1016,8 @@ static struct ctl_table vm_table[] = { | |||
1006 | .data = &page_cluster, | 1016 | .data = &page_cluster, |
1007 | .maxlen = sizeof(int), | 1017 | .maxlen = sizeof(int), |
1008 | .mode = 0644, | 1018 | .mode = 0644, |
1009 | .proc_handler = proc_dointvec, | 1019 | .proc_handler = proc_dointvec_minmax, |
1020 | .extra1 = &zero, | ||
1010 | }, | 1021 | }, |
1011 | { | 1022 | { |
1012 | .procname = "dirty_background_ratio", | 1023 | .procname = "dirty_background_ratio", |
@@ -1054,7 +1065,8 @@ static struct ctl_table vm_table[] = { | |||
1054 | .data = &dirty_expire_interval, | 1065 | .data = &dirty_expire_interval, |
1055 | .maxlen = sizeof(dirty_expire_interval), | 1066 | .maxlen = sizeof(dirty_expire_interval), |
1056 | .mode = 0644, | 1067 | .mode = 0644, |
1057 | .proc_handler = proc_dointvec, | 1068 | .proc_handler = proc_dointvec_minmax, |
1069 | .extra1 = &zero, | ||
1058 | }, | 1070 | }, |
1059 | { | 1071 | { |
1060 | .procname = "nr_pdflush_threads", | 1072 | .procname = "nr_pdflush_threads", |
@@ -1130,6 +1142,8 @@ static struct ctl_table vm_table[] = { | |||
1130 | .maxlen = sizeof(int), | 1142 | .maxlen = sizeof(int), |
1131 | .mode = 0644, | 1143 | .mode = 0644, |
1132 | .proc_handler = drop_caches_sysctl_handler, | 1144 | .proc_handler = drop_caches_sysctl_handler, |
1145 | .extra1 = &one, | ||
1146 | .extra2 = &three, | ||
1133 | }, | 1147 | }, |
1134 | #ifdef CONFIG_COMPACTION | 1148 | #ifdef CONFIG_COMPACTION |
1135 | { | 1149 | { |
@@ -2385,6 +2399,17 @@ static int proc_taint(struct ctl_table *table, int write, | |||
2385 | return err; | 2399 | return err; |
2386 | } | 2400 | } |
2387 | 2401 | ||
2402 | #ifdef CONFIG_PRINTK | ||
2403 | static int proc_dmesg_restrict(struct ctl_table *table, int write, | ||
2404 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
2405 | { | ||
2406 | if (write && !capable(CAP_SYS_ADMIN)) | ||
2407 | return -EPERM; | ||
2408 | |||
2409 | return proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
2410 | } | ||
2411 | #endif | ||
2412 | |||
2388 | struct do_proc_dointvec_minmax_conv_param { | 2413 | struct do_proc_dointvec_minmax_conv_param { |
2389 | int *min; | 2414 | int *min; |
2390 | int *max; | 2415 | int *max; |
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 10b90d8a03c4..4e4932a7b360 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c | |||
@@ -111,11 +111,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | |||
111 | const char *fail = NULL; | 111 | const char *fail = NULL; |
112 | 112 | ||
113 | if (table->parent) { | 113 | if (table->parent) { |
114 | if (table->procname && !table->parent->procname) | 114 | if (!table->parent->procname) |
115 | set_fail(&fail, table, "Parent without procname"); | 115 | set_fail(&fail, table, "Parent without procname"); |
116 | } | 116 | } |
117 | if (!table->procname) | ||
118 | set_fail(&fail, table, "No procname"); | ||
119 | if (table->child) { | 117 | if (table->child) { |
120 | if (table->data) | 118 | if (table->data) |
121 | set_fail(&fail, table, "Directory with data?"); | 119 | set_fail(&fail, table, "Directory with data?"); |
@@ -144,13 +142,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table) | |||
144 | set_fail(&fail, table, "No maxlen"); | 142 | set_fail(&fail, table, "No maxlen"); |
145 | } | 143 | } |
146 | #ifdef CONFIG_PROC_SYSCTL | 144 | #ifdef CONFIG_PROC_SYSCTL |
147 | if (table->procname && !table->proc_handler) | 145 | if (!table->proc_handler) |
148 | set_fail(&fail, table, "No proc_handler"); | 146 | set_fail(&fail, table, "No proc_handler"); |
149 | #endif | 147 | #endif |
150 | #if 0 | ||
151 | if (!table->procname && table->proc_handler) | ||
152 | set_fail(&fail, table, "proc_handler without procname"); | ||
153 | #endif | ||
154 | sysctl_check_leaf(namespaces, table, &fail); | 148 | sysctl_check_leaf(namespaces, table, &fail); |
155 | } | 149 | } |
156 | if (table->mode > 0777) | 150 | if (table->mode > 0777) |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 3971c6b9d58d..9ffea360a778 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -685,7 +685,7 @@ static int __init taskstats_init(void) | |||
685 | goto err_cgroup_ops; | 685 | goto err_cgroup_ops; |
686 | 686 | ||
687 | family_registered = 1; | 687 | family_registered = 1; |
688 | printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); | 688 | pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); |
689 | return 0; | 689 | return 0; |
690 | err_cgroup_ops: | 690 | err_cgroup_ops: |
691 | genl_unregister_ops(&family, &taskstats_ops); | 691 | genl_unregister_ops(&family, &taskstats_ops); |
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index b2fa506667c0..a470154e0408 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c | |||
@@ -34,7 +34,7 @@ | |||
34 | * inaccuracies caused by missed or lost timer | 34 | * inaccuracies caused by missed or lost timer |
35 | * interrupts and the inability for the timer | 35 | * interrupts and the inability for the timer |
36 | * interrupt hardware to accuratly tick at the | 36 | * interrupt hardware to accuratly tick at the |
37 | * requested HZ value. It is also not reccomended | 37 | * requested HZ value. It is also not recommended |
38 | * for "tick-less" systems. | 38 | * for "tick-less" systems. |
39 | */ | 39 | */ |
40 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) | 40 | #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5f1bb8e2008f..f6117a4c7cb8 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
@@ -652,6 +652,8 @@ int do_adjtimex(struct timex *txc) | |||
652 | struct timespec delta; | 652 | struct timespec delta; |
653 | delta.tv_sec = txc->time.tv_sec; | 653 | delta.tv_sec = txc->time.tv_sec; |
654 | delta.tv_nsec = txc->time.tv_usec; | 654 | delta.tv_nsec = txc->time.tv_usec; |
655 | if (!capable(CAP_SYS_TIME)) | ||
656 | return -EPERM; | ||
655 | if (!(txc->modes & ADJ_NANO)) | 657 | if (!(txc->modes & ADJ_NANO)) |
656 | delta.tv_nsec *= 1000; | 658 | delta.tv_nsec *= 1000; |
657 | result = timekeeping_inject_offset(&delta); | 659 | result = timekeeping_inject_offset(&delta); |
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 25028dd4fa18..c340ca658f37 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c | |||
@@ -19,7 +19,6 @@ | |||
19 | */ | 19 | */ |
20 | #include <linux/device.h> | 20 | #include <linux/device.h> |
21 | #include <linux/file.h> | 21 | #include <linux/file.h> |
22 | #include <linux/mutex.h> | ||
23 | #include <linux/posix-clock.h> | 22 | #include <linux/posix-clock.h> |
24 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
25 | #include <linux/syscalls.h> | 24 | #include <linux/syscalls.h> |
@@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp) | |||
34 | { | 33 | { |
35 | struct posix_clock *clk = fp->private_data; | 34 | struct posix_clock *clk = fp->private_data; |
36 | 35 | ||
37 | mutex_lock(&clk->mutex); | 36 | down_read(&clk->rwsem); |
38 | 37 | ||
39 | if (!clk->zombie) | 38 | if (!clk->zombie) |
40 | return clk; | 39 | return clk; |
41 | 40 | ||
42 | mutex_unlock(&clk->mutex); | 41 | up_read(&clk->rwsem); |
43 | 42 | ||
44 | return NULL; | 43 | return NULL; |
45 | } | 44 | } |
46 | 45 | ||
47 | static void put_posix_clock(struct posix_clock *clk) | 46 | static void put_posix_clock(struct posix_clock *clk) |
48 | { | 47 | { |
49 | mutex_unlock(&clk->mutex); | 48 | up_read(&clk->rwsem); |
50 | } | 49 | } |
51 | 50 | ||
52 | static ssize_t posix_clock_read(struct file *fp, char __user *buf, | 51 | static ssize_t posix_clock_read(struct file *fp, char __user *buf, |
@@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) | |||
156 | struct posix_clock *clk = | 155 | struct posix_clock *clk = |
157 | container_of(inode->i_cdev, struct posix_clock, cdev); | 156 | container_of(inode->i_cdev, struct posix_clock, cdev); |
158 | 157 | ||
159 | mutex_lock(&clk->mutex); | 158 | down_read(&clk->rwsem); |
160 | 159 | ||
161 | if (clk->zombie) { | 160 | if (clk->zombie) { |
162 | err = -ENODEV; | 161 | err = -ENODEV; |
@@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) | |||
172 | fp->private_data = clk; | 171 | fp->private_data = clk; |
173 | } | 172 | } |
174 | out: | 173 | out: |
175 | mutex_unlock(&clk->mutex); | 174 | up_read(&clk->rwsem); |
176 | return err; | 175 | return err; |
177 | } | 176 | } |
178 | 177 | ||
@@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid) | |||
211 | int err; | 210 | int err; |
212 | 211 | ||
213 | kref_init(&clk->kref); | 212 | kref_init(&clk->kref); |
214 | mutex_init(&clk->mutex); | 213 | init_rwsem(&clk->rwsem); |
215 | 214 | ||
216 | cdev_init(&clk->cdev, &posix_clock_file_operations); | 215 | cdev_init(&clk->cdev, &posix_clock_file_operations); |
217 | clk->cdev.owner = clk->ops.owner; | 216 | clk->cdev.owner = clk->ops.owner; |
218 | err = cdev_add(&clk->cdev, devid, 1); | 217 | err = cdev_add(&clk->cdev, devid, 1); |
219 | if (err) | ||
220 | goto no_cdev; | ||
221 | 218 | ||
222 | return err; | 219 | return err; |
223 | no_cdev: | ||
224 | mutex_destroy(&clk->mutex); | ||
225 | return err; | ||
226 | } | 220 | } |
227 | EXPORT_SYMBOL_GPL(posix_clock_register); | 221 | EXPORT_SYMBOL_GPL(posix_clock_register); |
228 | 222 | ||
229 | static void delete_clock(struct kref *kref) | 223 | static void delete_clock(struct kref *kref) |
230 | { | 224 | { |
231 | struct posix_clock *clk = container_of(kref, struct posix_clock, kref); | 225 | struct posix_clock *clk = container_of(kref, struct posix_clock, kref); |
232 | mutex_destroy(&clk->mutex); | 226 | |
233 | if (clk->release) | 227 | if (clk->release) |
234 | clk->release(clk); | 228 | clk->release(clk); |
235 | } | 229 | } |
@@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk) | |||
238 | { | 232 | { |
239 | cdev_del(&clk->cdev); | 233 | cdev_del(&clk->cdev); |
240 | 234 | ||
241 | mutex_lock(&clk->mutex); | 235 | down_write(&clk->rwsem); |
242 | clk->zombie = true; | 236 | clk->zombie = true; |
243 | mutex_unlock(&clk->mutex); | 237 | up_write(&clk->rwsem); |
244 | 238 | ||
245 | kref_put(&clk->kref, delete_clock); | 239 | kref_put(&clk->kref, delete_clock); |
246 | } | 240 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3bd7e3d5c632..8ad5d576755e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -14,7 +14,7 @@ | |||
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
16 | #include <linux/sched.h> | 16 | #include <linux/sched.h> |
17 | #include <linux/sysdev.h> | 17 | #include <linux/syscore_ops.h> |
18 | #include <linux/clocksource.h> | 18 | #include <linux/clocksource.h> |
19 | #include <linux/jiffies.h> | 19 | #include <linux/jiffies.h> |
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
@@ -597,13 +597,12 @@ static struct timespec timekeeping_suspend_time; | |||
597 | 597 | ||
598 | /** | 598 | /** |
599 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 599 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
600 | * @dev: unused | ||
601 | * | 600 | * |
602 | * This is for the generic clocksource timekeeping. | 601 | * This is for the generic clocksource timekeeping. |
603 | * xtime/wall_to_monotonic/jiffies/etc are | 602 | * xtime/wall_to_monotonic/jiffies/etc are |
604 | * still managed by arch specific suspend/resume code. | 603 | * still managed by arch specific suspend/resume code. |
605 | */ | 604 | */ |
606 | static int timekeeping_resume(struct sys_device *dev) | 605 | static void timekeeping_resume(void) |
607 | { | 606 | { |
608 | unsigned long flags; | 607 | unsigned long flags; |
609 | struct timespec ts; | 608 | struct timespec ts; |
@@ -632,11 +631,9 @@ static int timekeeping_resume(struct sys_device *dev) | |||
632 | 631 | ||
633 | /* Resume hrtimers */ | 632 | /* Resume hrtimers */ |
634 | hres_timers_resume(); | 633 | hres_timers_resume(); |
635 | |||
636 | return 0; | ||
637 | } | 634 | } |
638 | 635 | ||
639 | static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | 636 | static int timekeeping_suspend(void) |
640 | { | 637 | { |
641 | unsigned long flags; | 638 | unsigned long flags; |
642 | 639 | ||
@@ -654,26 +651,18 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | |||
654 | } | 651 | } |
655 | 652 | ||
656 | /* sysfs resume/suspend bits for timekeeping */ | 653 | /* sysfs resume/suspend bits for timekeeping */ |
657 | static struct sysdev_class timekeeping_sysclass = { | 654 | static struct syscore_ops timekeeping_syscore_ops = { |
658 | .name = "timekeeping", | ||
659 | .resume = timekeeping_resume, | 655 | .resume = timekeeping_resume, |
660 | .suspend = timekeeping_suspend, | 656 | .suspend = timekeeping_suspend, |
661 | }; | 657 | }; |
662 | 658 | ||
663 | static struct sys_device device_timer = { | 659 | static int __init timekeeping_init_ops(void) |
664 | .id = 0, | ||
665 | .cls = &timekeeping_sysclass, | ||
666 | }; | ||
667 | |||
668 | static int __init timekeeping_init_device(void) | ||
669 | { | 660 | { |
670 | int error = sysdev_class_register(&timekeeping_sysclass); | 661 | register_syscore_ops(&timekeeping_syscore_ops); |
671 | if (!error) | 662 | return 0; |
672 | error = sysdev_register(&device_timer); | ||
673 | return error; | ||
674 | } | 663 | } |
675 | 664 | ||
676 | device_initcall(timekeeping_init_device); | 665 | device_initcall(timekeeping_init_ops); |
677 | 666 | ||
678 | /* | 667 | /* |
679 | * If the error is already larger, we look ahead even further | 668 | * If the error is already larger, we look ahead even further |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 2f3b585b8d7d..a5d0a3a85dd8 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
@@ -236,7 +236,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, | |||
236 | unsigned int timer_flag) | 236 | unsigned int timer_flag) |
237 | { | 237 | { |
238 | /* | 238 | /* |
239 | * It doesnt matter which lock we take: | 239 | * It doesn't matter which lock we take: |
240 | */ | 240 | */ |
241 | raw_spinlock_t *lock; | 241 | raw_spinlock_t *lock; |
242 | struct entry *entry, input; | 242 | struct entry *entry, input; |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index cbafed7d4f38..6957aa298dfa 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q) | |||
703 | * | 703 | * |
704 | **/ | 704 | **/ |
705 | static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | 705 | static void blk_add_trace_rq(struct request_queue *q, struct request *rq, |
706 | u32 what) | 706 | u32 what) |
707 | { | 707 | { |
708 | struct blk_trace *bt = q->blk_trace; | 708 | struct blk_trace *bt = q->blk_trace; |
709 | int rw = rq->cmd_flags & 0x03; | ||
710 | 709 | ||
711 | if (likely(!bt)) | 710 | if (likely(!bt)) |
712 | return; | 711 | return; |
713 | 712 | ||
714 | if (rq->cmd_flags & REQ_DISCARD) | ||
715 | rw |= REQ_DISCARD; | ||
716 | |||
717 | if (rq->cmd_flags & REQ_SECURE) | ||
718 | rw |= REQ_SECURE; | ||
719 | |||
720 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 713 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
721 | what |= BLK_TC_ACT(BLK_TC_PC); | 714 | what |= BLK_TC_ACT(BLK_TC_PC); |
722 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, | 715 | __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags, |
723 | what, rq->errors, rq->cmd_len, rq->cmd); | 716 | what, rq->errors, rq->cmd_len, rq->cmd); |
724 | } else { | 717 | } else { |
725 | what |= BLK_TC_ACT(BLK_TC_FS); | 718 | what |= BLK_TC_ACT(BLK_TC_FS); |
726 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, | 719 | __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), |
727 | what, rq->errors, 0, NULL); | 720 | rq->cmd_flags, what, rq->errors, 0, NULL); |
728 | } | 721 | } |
729 | } | 722 | } |
730 | 723 | ||
@@ -857,29 +850,21 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) | |||
857 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); | 850 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); |
858 | } | 851 | } |
859 | 852 | ||
860 | static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q) | 853 | static void blk_add_trace_unplug(void *ignore, struct request_queue *q, |
854 | unsigned int depth, bool explicit) | ||
861 | { | 855 | { |
862 | struct blk_trace *bt = q->blk_trace; | 856 | struct blk_trace *bt = q->blk_trace; |
863 | 857 | ||
864 | if (bt) { | 858 | if (bt) { |
865 | unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; | 859 | __be64 rpdu = cpu_to_be64(depth); |
866 | __be64 rpdu = cpu_to_be64(pdu); | 860 | u32 what; |
867 | |||
868 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, | ||
869 | sizeof(rpdu), &rpdu); | ||
870 | } | ||
871 | } | ||
872 | 861 | ||
873 | static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q) | 862 | if (explicit) |
874 | { | 863 | what = BLK_TA_UNPLUG_IO; |
875 | struct blk_trace *bt = q->blk_trace; | 864 | else |
876 | 865 | what = BLK_TA_UNPLUG_TIMER; | |
877 | if (bt) { | ||
878 | unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; | ||
879 | __be64 rpdu = cpu_to_be64(pdu); | ||
880 | 866 | ||
881 | __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, | 867 | __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); |
882 | sizeof(rpdu), &rpdu); | ||
883 | } | 868 | } |
884 | } | 869 | } |
885 | 870 | ||
@@ -1022,9 +1007,7 @@ static void blk_register_tracepoints(void) | |||
1022 | WARN_ON(ret); | 1007 | WARN_ON(ret); |
1023 | ret = register_trace_block_plug(blk_add_trace_plug, NULL); | 1008 | ret = register_trace_block_plug(blk_add_trace_plug, NULL); |
1024 | WARN_ON(ret); | 1009 | WARN_ON(ret); |
1025 | ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); | 1010 | ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); |
1026 | WARN_ON(ret); | ||
1027 | ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); | ||
1028 | WARN_ON(ret); | 1011 | WARN_ON(ret); |
1029 | ret = register_trace_block_split(blk_add_trace_split, NULL); | 1012 | ret = register_trace_block_split(blk_add_trace_split, NULL); |
1030 | WARN_ON(ret); | 1013 | WARN_ON(ret); |
@@ -1039,8 +1022,7 @@ static void blk_unregister_tracepoints(void) | |||
1039 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); | 1022 | unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); |
1040 | unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); | 1023 | unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); |
1041 | unregister_trace_block_split(blk_add_trace_split, NULL); | 1024 | unregister_trace_block_split(blk_add_trace_split, NULL); |
1042 | unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL); | 1025 | unregister_trace_block_unplug(blk_add_trace_unplug, NULL); |
1043 | unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL); | ||
1044 | unregister_trace_block_plug(blk_add_trace_plug, NULL); | 1026 | unregister_trace_block_plug(blk_add_trace_plug, NULL); |
1045 | unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); | 1027 | unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); |
1046 | unregister_trace_block_getrq(blk_add_trace_getrq, NULL); | 1028 | unregister_trace_block_getrq(blk_add_trace_getrq, NULL); |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 888b611897d3..ee24fa1935ac 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -1268,7 +1268,7 @@ static int ftrace_update_code(struct module *mod) | |||
1268 | p->flags = 0L; | 1268 | p->flags = 0L; |
1269 | 1269 | ||
1270 | /* | 1270 | /* |
1271 | * Do the initial record convertion from mcount jump | 1271 | * Do the initial record conversion from mcount jump |
1272 | * to the NOP instructions. | 1272 | * to the NOP instructions. |
1273 | */ | 1273 | */ |
1274 | if (!ftrace_code_disable(mod, p)) { | 1274 | if (!ftrace_code_disable(mod, p)) { |
@@ -1467,7 +1467,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1467 | return t_hash_next(m, pos); | 1467 | return t_hash_next(m, pos); |
1468 | 1468 | ||
1469 | (*pos)++; | 1469 | (*pos)++; |
1470 | iter->pos = *pos; | 1470 | iter->pos = iter->func_pos = *pos; |
1471 | 1471 | ||
1472 | if (iter->flags & FTRACE_ITER_PRINTALL) | 1472 | if (iter->flags & FTRACE_ITER_PRINTALL) |
1473 | return t_hash_start(m, pos); | 1473 | return t_hash_start(m, pos); |
@@ -1502,7 +1502,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos) | |||
1502 | if (!rec) | 1502 | if (!rec) |
1503 | return t_hash_start(m, pos); | 1503 | return t_hash_start(m, pos); |
1504 | 1504 | ||
1505 | iter->func_pos = *pos; | ||
1506 | iter->func = rec; | 1505 | iter->func = rec; |
1507 | 1506 | ||
1508 | return iter; | 1507 | return iter; |
@@ -3426,7 +3425,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) | |||
3426 | atomic_set(&t->tracing_graph_pause, 0); | 3425 | atomic_set(&t->tracing_graph_pause, 0); |
3427 | atomic_set(&t->trace_overrun, 0); | 3426 | atomic_set(&t->trace_overrun, 0); |
3428 | t->ftrace_timestamp = 0; | 3427 | t->ftrace_timestamp = 0; |
3429 | /* make curr_ret_stack visable before we add the ret_stack */ | 3428 | /* make curr_ret_stack visible before we add the ret_stack */ |
3430 | smp_wmb(); | 3429 | smp_wmb(); |
3431 | t->ret_stack = ret_stack; | 3430 | t->ret_stack = ret_stack; |
3432 | } | 3431 | } |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d9c8bcafb120..0ef7b4b2a1f7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -1478,7 +1478,7 @@ static inline unsigned long rb_page_entries(struct buffer_page *bpage) | |||
1478 | return local_read(&bpage->entries) & RB_WRITE_MASK; | 1478 | return local_read(&bpage->entries) & RB_WRITE_MASK; |
1479 | } | 1479 | } |
1480 | 1480 | ||
1481 | /* Size is determined by what has been commited */ | 1481 | /* Size is determined by what has been committed */ |
1482 | static inline unsigned rb_page_size(struct buffer_page *bpage) | 1482 | static inline unsigned rb_page_size(struct buffer_page *bpage) |
1483 | { | 1483 | { |
1484 | return rb_page_commit(bpage); | 1484 | return rb_page_commit(bpage); |
@@ -2932,7 +2932,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
2932 | /* | 2932 | /* |
2933 | * cpu_buffer->pages just needs to point to the buffer, it | 2933 | * cpu_buffer->pages just needs to point to the buffer, it |
2934 | * has no specific buffer page to point to. Lets move it out | 2934 | * has no specific buffer page to point to. Lets move it out |
2935 | * of our way so we don't accidently swap it. | 2935 | * of our way so we don't accidentally swap it. |
2936 | */ | 2936 | */ |
2937 | cpu_buffer->pages = reader->list.prev; | 2937 | cpu_buffer->pages = reader->list.prev; |
2938 | 2938 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9541c27c1cf2..d38c16a06a6f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3239,7 +3239,7 @@ waitagain: | |||
3239 | trace_seq_init(&iter->seq); | 3239 | trace_seq_init(&iter->seq); |
3240 | 3240 | ||
3241 | /* | 3241 | /* |
3242 | * If there was nothing to send to user, inspite of consuming trace | 3242 | * If there was nothing to send to user, in spite of consuming trace |
3243 | * entries, go back to wait for more entries. | 3243 | * entries, go back to wait for more entries. |
3244 | */ | 3244 | */ |
3245 | if (sret == -EBUSY) | 3245 | if (sret == -EBUSY) |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 685a67d55db0..6302747a1398 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
@@ -46,7 +46,7 @@ u64 notrace trace_clock_local(void) | |||
46 | } | 46 | } |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * trace_clock(): 'inbetween' trace clock. Not completely serialized, | 49 | * trace_clock(): 'between' trace clock. Not completely serialized, |
50 | * but not completely incorrect when crossing CPUs either. | 50 | * but not completely incorrect when crossing CPUs either. |
51 | * | 51 | * |
52 | * This is based on cpu_clock(), which will allow at most ~1 jiffy of | 52 | * This is based on cpu_clock(), which will allow at most ~1 jiffy of |
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index 1516cb3ec549..e32744c84d94 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h | |||
@@ -27,7 +27,7 @@ | |||
27 | * in the structure. | 27 | * in the structure. |
28 | * | 28 | * |
29 | * * for structures within structures, the format of the internal | 29 | * * for structures within structures, the format of the internal |
30 | * structure is layed out. This allows the internal structure | 30 | * structure is laid out. This allows the internal structure |
31 | * to be deciphered for the format file. Although these macros | 31 | * to be deciphered for the format file. Although these macros |
32 | * may become out of sync with the internal structure, they | 32 | * may become out of sync with the internal structure, they |
33 | * will create a compile error if it happens. Since the | 33 | * will create a compile error if it happens. Since the |
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 76b05980225c..962cdb24ed81 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -905,7 +905,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | |||
905 | * | 905 | * |
906 | * returns 1 if | 906 | * returns 1 if |
907 | * - we are inside irq code | 907 | * - we are inside irq code |
908 | * - we just extered irq code | 908 | * - we just entered irq code |
909 | * | 909 | * |
910 | * retunns 0 if | 910 | * retunns 0 if |
911 | * - funcgraph-interrupts option is set | 911 | * - funcgraph-interrupts option is set |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 92b6e1e12d98..a4969b47afc1 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -80,7 +80,7 @@ static struct tracer_flags tracer_flags = { | |||
80 | * skip the latency if the sequence has changed - some other section | 80 | * skip the latency if the sequence has changed - some other section |
81 | * did a maximum and could disturb our measurement with serial console | 81 | * did a maximum and could disturb our measurement with serial console |
82 | * printouts, etc. Truly coinciding maximum latencies should be rare | 82 | * printouts, etc. Truly coinciding maximum latencies should be rare |
83 | * and what happens together happens separately as well, so this doesnt | 83 | * and what happens together happens separately as well, so this doesn't |
84 | * decrease the validity of the maximum found: | 84 | * decrease the validity of the maximum found: |
85 | */ | 85 | */ |
86 | static __cacheline_aligned_in_smp unsigned long max_sequence; | 86 | static __cacheline_aligned_in_smp unsigned long max_sequence; |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8435b43b1782..35d55a386145 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1839,7 +1839,7 @@ static void unregister_probe_event(struct trace_probe *tp) | |||
1839 | kfree(tp->call.print_fmt); | 1839 | kfree(tp->call.print_fmt); |
1840 | } | 1840 | } |
1841 | 1841 | ||
1842 | /* Make a debugfs interface for controling probe points */ | 1842 | /* Make a debugfs interface for controlling probe points */ |
1843 | static __init int init_kprobe_trace(void) | 1843 | static __init int init_kprobe_trace(void) |
1844 | { | 1844 | { |
1845 | struct dentry *d_tracer; | 1845 | struct dentry *d_tracer; |
diff --git a/kernel/uid16.c b/kernel/uid16.c index 419209893d87..51c6e89e8619 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c | |||
@@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) | |||
189 | struct group_info *group_info; | 189 | struct group_info *group_info; |
190 | int retval; | 190 | int retval; |
191 | 191 | ||
192 | if (!capable(CAP_SETGID)) | 192 | if (!nsown_capable(CAP_SETGID)) |
193 | return -EPERM; | 193 | return -EPERM; |
194 | if ((unsigned)gidsetsize > NGROUPS_MAX) | 194 | if ((unsigned)gidsetsize > NGROUPS_MAX) |
195 | return -EINVAL; | 195 | return -EINVAL; |
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index eb27fd3430a2..92cb706c7fc8 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c | |||
@@ -20,7 +20,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register); | |||
20 | 20 | ||
21 | /* | 21 | /* |
22 | * Removes a registered user return notifier. Must be called from atomic | 22 | * Removes a registered user return notifier. Must be called from atomic |
23 | * context, and from the same cpu registration occured in. | 23 | * context, and from the same cpu registration occurred in. |
24 | */ | 24 | */ |
25 | void user_return_notifier_unregister(struct user_return_notifier *urn) | 25 | void user_return_notifier_unregister(struct user_return_notifier *urn) |
26 | { | 26 | { |
diff --git a/kernel/user.c b/kernel/user.c index 5c598ca781df..9e03e9c1df8d 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -17,9 +17,13 @@ | |||
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/user_namespace.h> | 18 | #include <linux/user_namespace.h> |
19 | 19 | ||
20 | /* | ||
21 | * userns count is 1 for root user, 1 for init_uts_ns, | ||
22 | * and 1 for... ? | ||
23 | */ | ||
20 | struct user_namespace init_user_ns = { | 24 | struct user_namespace init_user_ns = { |
21 | .kref = { | 25 | .kref = { |
22 | .refcount = ATOMIC_INIT(2), | 26 | .refcount = ATOMIC_INIT(3), |
23 | }, | 27 | }, |
24 | .creator = &root_user, | 28 | .creator = &root_user, |
25 | }; | 29 | }; |
@@ -47,7 +51,7 @@ static struct kmem_cache *uid_cachep; | |||
47 | */ | 51 | */ |
48 | static DEFINE_SPINLOCK(uidhash_lock); | 52 | static DEFINE_SPINLOCK(uidhash_lock); |
49 | 53 | ||
50 | /* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ | 54 | /* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */ |
51 | struct user_struct root_user = { | 55 | struct user_struct root_user = { |
52 | .__count = ATOMIC_INIT(2), | 56 | .__count = ATOMIC_INIT(2), |
53 | .processes = ATOMIC_INIT(1), | 57 | .processes = ATOMIC_INIT(1), |
diff --git a/kernel/utsname.c b/kernel/utsname.c index 8a82b4b8ea52..44646179eaba 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/utsname.h> | 14 | #include <linux/utsname.h> |
15 | #include <linux/err.h> | 15 | #include <linux/err.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/user_namespace.h> | ||
17 | 18 | ||
18 | static struct uts_namespace *create_uts_ns(void) | 19 | static struct uts_namespace *create_uts_ns(void) |
19 | { | 20 | { |
@@ -30,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void) | |||
30 | * @old_ns: namespace to clone | 31 | * @old_ns: namespace to clone |
31 | * Return NULL on error (failure to kmalloc), new ns otherwise | 32 | * Return NULL on error (failure to kmalloc), new ns otherwise |
32 | */ | 33 | */ |
33 | static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | 34 | static struct uts_namespace *clone_uts_ns(struct task_struct *tsk, |
35 | struct uts_namespace *old_ns) | ||
34 | { | 36 | { |
35 | struct uts_namespace *ns; | 37 | struct uts_namespace *ns; |
36 | 38 | ||
@@ -40,6 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
40 | 42 | ||
41 | down_read(&uts_sem); | 43 | down_read(&uts_sem); |
42 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 44 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
45 | ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns); | ||
43 | up_read(&uts_sem); | 46 | up_read(&uts_sem); |
44 | return ns; | 47 | return ns; |
45 | } | 48 | } |
@@ -50,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
50 | * utsname of this process won't be seen by parent, and vice | 53 | * utsname of this process won't be seen by parent, and vice |
51 | * versa. | 54 | * versa. |
52 | */ | 55 | */ |
53 | struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) | 56 | struct uts_namespace *copy_utsname(unsigned long flags, |
57 | struct task_struct *tsk) | ||
54 | { | 58 | { |
59 | struct uts_namespace *old_ns = tsk->nsproxy->uts_ns; | ||
55 | struct uts_namespace *new_ns; | 60 | struct uts_namespace *new_ns; |
56 | 61 | ||
57 | BUG_ON(!old_ns); | 62 | BUG_ON(!old_ns); |
@@ -60,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol | |||
60 | if (!(flags & CLONE_NEWUTS)) | 65 | if (!(flags & CLONE_NEWUTS)) |
61 | return old_ns; | 66 | return old_ns; |
62 | 67 | ||
63 | new_ns = clone_uts_ns(old_ns); | 68 | new_ns = clone_uts_ns(tsk, old_ns); |
64 | 69 | ||
65 | put_uts_ns(old_ns); | 70 | put_uts_ns(old_ns); |
66 | return new_ns; | 71 | return new_ns; |
@@ -71,5 +76,6 @@ void free_uts_ns(struct kref *kref) | |||
71 | struct uts_namespace *ns; | 76 | struct uts_namespace *ns; |
72 | 77 | ||
73 | ns = container_of(kref, struct uts_namespace, kref); | 78 | ns = container_of(kref, struct uts_namespace, kref); |
79 | put_user_ns(ns->user_ns); | ||
74 | kfree(ns); | 80 | kfree(ns); |
75 | } | 81 | } |
diff --git a/kernel/wait.c b/kernel/wait.c index b0310eb6cc1e..f45ea8d2a1ce 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
@@ -142,7 +142,7 @@ EXPORT_SYMBOL(finish_wait); | |||
142 | * woken up through the queue. | 142 | * woken up through the queue. |
143 | * | 143 | * |
144 | * This prevents waiter starvation where an exclusive waiter | 144 | * This prevents waiter starvation where an exclusive waiter |
145 | * aborts and is woken up concurrently and noone wakes up | 145 | * aborts and is woken up concurrently and no one wakes up |
146 | * the next waiter. | 146 | * the next waiter. |
147 | */ | 147 | */ |
148 | void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, | 148 | void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 18bb15776c57..140dce750450 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -48,12 +48,15 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | |||
48 | * Should we panic when a soft-lockup or hard-lockup occurs: | 48 | * Should we panic when a soft-lockup or hard-lockup occurs: |
49 | */ | 49 | */ |
50 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 50 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
51 | static int hardlockup_panic; | 51 | static int hardlockup_panic = |
52 | CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; | ||
52 | 53 | ||
53 | static int __init hardlockup_panic_setup(char *str) | 54 | static int __init hardlockup_panic_setup(char *str) |
54 | { | 55 | { |
55 | if (!strncmp(str, "panic", 5)) | 56 | if (!strncmp(str, "panic", 5)) |
56 | hardlockup_panic = 1; | 57 | hardlockup_panic = 1; |
58 | else if (!strncmp(str, "nopanic", 7)) | ||
59 | hardlockup_panic = 0; | ||
57 | else if (!strncmp(str, "0", 1)) | 60 | else if (!strncmp(str, "0", 1)) |
58 | watchdog_enabled = 0; | 61 | watchdog_enabled = 0; |
59 | return 1; | 62 | return 1; |
@@ -415,19 +418,22 @@ static int watchdog_prepare_cpu(int cpu) | |||
415 | static int watchdog_enable(int cpu) | 418 | static int watchdog_enable(int cpu) |
416 | { | 419 | { |
417 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); | 420 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); |
418 | int err; | 421 | int err = 0; |
419 | 422 | ||
420 | /* enable the perf event */ | 423 | /* enable the perf event */ |
421 | err = watchdog_nmi_enable(cpu); | 424 | err = watchdog_nmi_enable(cpu); |
422 | if (err) | 425 | |
423 | return err; | 426 | /* Regardless of err above, fall through and start softlockup */ |
424 | 427 | ||
425 | /* create the watchdog thread */ | 428 | /* create the watchdog thread */ |
426 | if (!p) { | 429 | if (!p) { |
427 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); | 430 | p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); |
428 | if (IS_ERR(p)) { | 431 | if (IS_ERR(p)) { |
429 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); | 432 | printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); |
430 | return PTR_ERR(p); | 433 | if (!err) |
434 | /* if hardlockup hasn't already set this */ | ||
435 | err = PTR_ERR(p); | ||
436 | goto out; | ||
431 | } | 437 | } |
432 | kthread_bind(p, cpu); | 438 | kthread_bind(p, cpu); |
433 | per_cpu(watchdog_touch_ts, cpu) = 0; | 439 | per_cpu(watchdog_touch_ts, cpu) = 0; |
@@ -435,7 +441,8 @@ static int watchdog_enable(int cpu) | |||
435 | wake_up_process(p); | 441 | wake_up_process(p); |
436 | } | 442 | } |
437 | 443 | ||
438 | return 0; | 444 | out: |
445 | return err; | ||
439 | } | 446 | } |
440 | 447 | ||
441 | static void watchdog_disable(int cpu) | 448 | static void watchdog_disable(int cpu) |
@@ -547,7 +554,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
547 | break; | 554 | break; |
548 | #endif /* CONFIG_HOTPLUG_CPU */ | 555 | #endif /* CONFIG_HOTPLUG_CPU */ |
549 | } | 556 | } |
550 | return notifier_from_errno(err); | 557 | |
558 | /* | ||
559 | * hardlockup and softlockup are not important enough | ||
560 | * to block cpu bring up. Just always succeed and | ||
561 | * rely on printk output to flag problems. | ||
562 | */ | ||
563 | return NOTIFY_OK; | ||
551 | } | 564 | } |
552 | 565 | ||
553 | static struct notifier_block __cpuinitdata cpu_nfb = { | 566 | static struct notifier_block __cpuinitdata cpu_nfb = { |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5ca7ce9ce754..8859a41806dd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -1291,7 +1291,7 @@ __acquires(&gcwq->lock) | |||
1291 | return true; | 1291 | return true; |
1292 | spin_unlock_irq(&gcwq->lock); | 1292 | spin_unlock_irq(&gcwq->lock); |
1293 | 1293 | ||
1294 | /* CPU has come up inbetween, retry migration */ | 1294 | /* CPU has come up in between, retry migration */ |
1295 | cpu_relax(); | 1295 | cpu_relax(); |
1296 | } | 1296 | } |
1297 | } | 1297 | } |
@@ -1366,8 +1366,10 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind) | |||
1366 | worker->id = id; | 1366 | worker->id = id; |
1367 | 1367 | ||
1368 | if (!on_unbound_cpu) | 1368 | if (!on_unbound_cpu) |
1369 | worker->task = kthread_create(worker_thread, worker, | 1369 | worker->task = kthread_create_on_node(worker_thread, |
1370 | "kworker/%u:%d", gcwq->cpu, id); | 1370 | worker, |
1371 | cpu_to_node(gcwq->cpu), | ||
1372 | "kworker/%u:%d", gcwq->cpu, id); | ||
1371 | else | 1373 | else |
1372 | worker->task = kthread_create(worker_thread, worker, | 1374 | worker->task = kthread_create(worker_thread, worker, |
1373 | "kworker/u:%d", id); | 1375 | "kworker/u:%d", id); |