aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/capability.c96
-rw-r--r--kernel/cgroup.c14
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/cpuset.c80
-rw-r--r--kernel/crash_dump.c34
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/debug/gdbstub.c30
-rw-r--r--kernel/debug/kdb/kdb_main.c4
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c154
-rw-r--r--kernel/futex.c15
-rw-r--r--kernel/futex_compat.c11
-rw-r--r--kernel/gcov/Makefile2
-rw-r--r--kernel/groups.c2
-rw-r--r--kernel/irq/Kconfig12
-rw-r--r--kernel/irq/autoprobe.c4
-rw-r--r--kernel/irq/chip.c283
-rw-r--r--kernel/irq/compat.h72
-rw-r--r--kernel/irq/debug.h12
-rw-r--r--kernel/irq/dummychip.c9
-rw-r--r--kernel/irq/handle.c19
-rw-r--r--kernel/irq/internals.h16
-rw-r--r--kernel/irq/irqdesc.c17
-rw-r--r--kernel/irq/manage.c102
-rw-r--r--kernel/irq/migration.c15
-rw-r--r--kernel/irq/proc.c23
-rw-r--r--kernel/irq/resend.c1
-rw-r--r--kernel/irq/settings.h55
-rw-r--r--kernel/irq/spurious.c11
-rw-r--r--kernel/kallsyms.c58
-rw-r--r--kernel/kthread.c31
-rw-r--r--kernel/lockdep_proc.c9
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/nsproxy.c4
-rw-r--r--kernel/panic.c10
-rw-r--r--kernel/perf_event.c15
-rw-r--r--kernel/pid_namespace.c11
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/block_io.c2
-rw-r--r--kernel/printk.c36
-rw-r--r--kernel/ptrace.c27
-rw-r--r--kernel/res_counter.c14
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_stoptask.c2
-rw-r--r--kernel/signal.c46
-rw-r--r--kernel/smp.c81
-rw-r--r--kernel/softirq.c5
-rw-r--r--kernel/stop_machine.c6
-rw-r--r--kernel/sys.c77
-rw-r--r--kernel/sysctl.c35
-rw-r--r--kernel/sysctl_check.c10
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/time/timekeeping.c27
-rw-r--r--kernel/trace/blktrace.c15
-rw-r--r--kernel/trace/ftrace.c3
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/user.c8
-rw-r--r--kernel/utsname.c12
-rw-r--r--kernel/watchdog.c27
-rw-r--r--kernel/workqueue.c6
63 files changed, 977 insertions, 753 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 353d3fe8ba33..85cbfb31e73e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
107obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 107obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
108obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o 108obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
109obj-$(CONFIG_PADATA) += padata.o 109obj-$(CONFIG_PADATA) += padata.o
110obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
110 111
111ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 112ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
112# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 113# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 98a51f26c136..0c9b862292b2 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,11 +9,13 @@
9#include <linux/page-flags.h> 9#include <linux/page-flags.h>
10#include <linux/mmzone.h> 10#include <linux/mmzone.h>
11#include <linux/kbuild.h> 11#include <linux/kbuild.h>
12#include <linux/page_cgroup.h>
12 13
13void foo(void) 14void foo(void)
14{ 15{
15 /* The enum constants to put into include/generated/bounds.h */ 16 /* The enum constants to put into include/generated/bounds.h */
16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 17 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 18 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
19 DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
18 /* End of constants */ 20 /* End of constants */
19} 21}
diff --git a/kernel/capability.c b/kernel/capability.c
index 9e9385f132c8..bf0c734d0c12 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -14,6 +14,7 @@
14#include <linux/security.h> 14#include <linux/security.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/pid_namespace.h> 16#include <linux/pid_namespace.h>
17#include <linux/user_namespace.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18 19
19/* 20/*
@@ -290,6 +291,60 @@ error:
290} 291}
291 292
292/** 293/**
294 * has_capability - Does a task have a capability in init_user_ns
295 * @t: The task in question
296 * @cap: The capability to be tested for
297 *
298 * Return true if the specified task has the given superior capability
299 * currently in effect to the initial user namespace, false if not.
300 *
301 * Note that this does not set PF_SUPERPRIV on the task.
302 */
303bool has_capability(struct task_struct *t, int cap)
304{
305 int ret = security_real_capable(t, &init_user_ns, cap);
306
307 return (ret == 0);
308}
309
310/**
311 * has_capability - Does a task have a capability in a specific user ns
312 * @t: The task in question
313 * @ns: target user namespace
314 * @cap: The capability to be tested for
315 *
316 * Return true if the specified task has the given superior capability
317 * currently in effect to the specified user namespace, false if not.
318 *
319 * Note that this does not set PF_SUPERPRIV on the task.
320 */
321bool has_ns_capability(struct task_struct *t,
322 struct user_namespace *ns, int cap)
323{
324 int ret = security_real_capable(t, ns, cap);
325
326 return (ret == 0);
327}
328
329/**
330 * has_capability_noaudit - Does a task have a capability (unaudited)
331 * @t: The task in question
332 * @cap: The capability to be tested for
333 *
334 * Return true if the specified task has the given superior capability
335 * currently in effect to init_user_ns, false if not. Don't write an
336 * audit message for the check.
337 *
338 * Note that this does not set PF_SUPERPRIV on the task.
339 */
340bool has_capability_noaudit(struct task_struct *t, int cap)
341{
342 int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
343
344 return (ret == 0);
345}
346
347/**
293 * capable - Determine if the current task has a superior capability in effect 348 * capable - Determine if the current task has a superior capability in effect
294 * @cap: The capability to be tested for 349 * @cap: The capability to be tested for
295 * 350 *
@@ -299,17 +354,48 @@ error:
299 * This sets PF_SUPERPRIV on the task if the capability is available on the 354 * This sets PF_SUPERPRIV on the task if the capability is available on the
300 * assumption that it's about to be used. 355 * assumption that it's about to be used.
301 */ 356 */
302int capable(int cap) 357bool capable(int cap)
358{
359 return ns_capable(&init_user_ns, cap);
360}
361EXPORT_SYMBOL(capable);
362
363/**
364 * ns_capable - Determine if the current task has a superior capability in effect
365 * @ns: The usernamespace we want the capability in
366 * @cap: The capability to be tested for
367 *
368 * Return true if the current task has the given superior capability currently
369 * available for use, false if not.
370 *
371 * This sets PF_SUPERPRIV on the task if the capability is available on the
372 * assumption that it's about to be used.
373 */
374bool ns_capable(struct user_namespace *ns, int cap)
303{ 375{
304 if (unlikely(!cap_valid(cap))) { 376 if (unlikely(!cap_valid(cap))) {
305 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); 377 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
306 BUG(); 378 BUG();
307 } 379 }
308 380
309 if (security_capable(current_cred(), cap) == 0) { 381 if (security_capable(ns, current_cred(), cap) == 0) {
310 current->flags |= PF_SUPERPRIV; 382 current->flags |= PF_SUPERPRIV;
311 return 1; 383 return true;
312 } 384 }
313 return 0; 385 return false;
314} 386}
315EXPORT_SYMBOL(capable); 387EXPORT_SYMBOL(ns_capable);
388
389/**
390 * task_ns_capable - Determine whether current task has a superior
391 * capability targeted at a specific task's user namespace.
392 * @t: The task whose user namespace is targeted.
393 * @cap: The capability in question.
394 *
395 * Return true if it does, false otherwise.
396 */
397bool task_ns_capable(struct task_struct *t, int cap)
398{
399 return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
400}
401EXPORT_SYMBOL(task_ns_capable);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 95362d15128c..e31b220a743d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1813,10 +1813,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1813 1813
1814 /* Update the css_set linked lists if we're using them */ 1814 /* Update the css_set linked lists if we're using them */
1815 write_lock(&css_set_lock); 1815 write_lock(&css_set_lock);
1816 if (!list_empty(&tsk->cg_list)) { 1816 if (!list_empty(&tsk->cg_list))
1817 list_del(&tsk->cg_list); 1817 list_move(&tsk->cg_list, &newcg->tasks);
1818 list_add(&tsk->cg_list, &newcg->tasks);
1819 }
1820 write_unlock(&css_set_lock); 1818 write_unlock(&css_set_lock);
1821 1819
1822 for_each_subsys(root, ss) { 1820 for_each_subsys(root, ss) {
@@ -3655,12 +3653,12 @@ again:
3655 spin_lock(&release_list_lock); 3653 spin_lock(&release_list_lock);
3656 set_bit(CGRP_REMOVED, &cgrp->flags); 3654 set_bit(CGRP_REMOVED, &cgrp->flags);
3657 if (!list_empty(&cgrp->release_list)) 3655 if (!list_empty(&cgrp->release_list))
3658 list_del(&cgrp->release_list); 3656 list_del_init(&cgrp->release_list);
3659 spin_unlock(&release_list_lock); 3657 spin_unlock(&release_list_lock);
3660 3658
3661 cgroup_lock_hierarchy(cgrp->root); 3659 cgroup_lock_hierarchy(cgrp->root);
3662 /* delete this cgroup from parent->children */ 3660 /* delete this cgroup from parent->children */
3663 list_del(&cgrp->sibling); 3661 list_del_init(&cgrp->sibling);
3664 cgroup_unlock_hierarchy(cgrp->root); 3662 cgroup_unlock_hierarchy(cgrp->root);
3665 3663
3666 d = dget(cgrp->dentry); 3664 d = dget(cgrp->dentry);
@@ -3879,7 +3877,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
3879 subsys[ss->subsys_id] = NULL; 3877 subsys[ss->subsys_id] = NULL;
3880 3878
3881 /* remove subsystem from rootnode's list of subsystems */ 3879 /* remove subsystem from rootnode's list of subsystems */
3882 list_del(&ss->sibling); 3880 list_del_init(&ss->sibling);
3883 3881
3884 /* 3882 /*
3885 * disentangle the css from all css_sets attached to the dummytop. as 3883 * disentangle the css from all css_sets attached to the dummytop. as
@@ -4241,7 +4239,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4241 if (!list_empty(&tsk->cg_list)) { 4239 if (!list_empty(&tsk->cg_list)) {
4242 write_lock(&css_set_lock); 4240 write_lock(&css_set_lock);
4243 if (!list_empty(&tsk->cg_list)) 4241 if (!list_empty(&tsk->cg_list))
4244 list_del(&tsk->cg_list); 4242 list_del_init(&tsk->cg_list);
4245 write_unlock(&css_set_lock); 4243 write_unlock(&css_set_lock);
4246 } 4244 }
4247 4245
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 156cc5556140..c95fc4df0faa 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -160,7 +160,6 @@ static void cpu_notify_nofail(unsigned long val, void *v)
160{ 160{
161 BUG_ON(cpu_notify(val, v)); 161 BUG_ON(cpu_notify(val, v));
162} 162}
163
164EXPORT_SYMBOL(register_cpu_notifier); 163EXPORT_SYMBOL(register_cpu_notifier);
165 164
166void __ref unregister_cpu_notifier(struct notifier_block *nb) 165void __ref unregister_cpu_notifier(struct notifier_block *nb)
@@ -205,7 +204,6 @@ static int __ref take_cpu_down(void *_param)
205 return err; 204 return err;
206 205
207 cpu_notify(CPU_DYING | param->mod, param->hcpu); 206 cpu_notify(CPU_DYING | param->mod, param->hcpu);
208
209 return 0; 207 return 0;
210} 208}
211 209
@@ -227,6 +225,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
227 return -EINVAL; 225 return -EINVAL;
228 226
229 cpu_hotplug_begin(); 227 cpu_hotplug_begin();
228
230 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); 229 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
231 if (err) { 230 if (err) {
232 nr_calls--; 231 nr_calls--;
@@ -304,7 +303,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
304 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); 303 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
305 if (ret) { 304 if (ret) {
306 nr_calls--; 305 nr_calls--;
307 printk("%s: attempt to bring up CPU %u failed\n", 306 printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
308 __func__, cpu); 307 __func__, cpu);
309 goto out_notify; 308 goto out_notify;
310 } 309 }
@@ -450,14 +449,14 @@ void __ref enable_nonboot_cpus(void)
450 if (cpumask_empty(frozen_cpus)) 449 if (cpumask_empty(frozen_cpus))
451 goto out; 450 goto out;
452 451
453 printk("Enabling non-boot CPUs ...\n"); 452 printk(KERN_INFO "Enabling non-boot CPUs ...\n");
454 453
455 arch_enable_nonboot_cpus_begin(); 454 arch_enable_nonboot_cpus_begin();
456 455
457 for_each_cpu(cpu, frozen_cpus) { 456 for_each_cpu(cpu, frozen_cpus) {
458 error = _cpu_up(cpu, 1); 457 error = _cpu_up(cpu, 1);
459 if (!error) { 458 if (!error) {
460 printk("CPU%d is up\n", cpu); 459 printk(KERN_INFO "CPU%d is up\n", cpu);
461 continue; 460 continue;
462 } 461 }
463 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); 462 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
@@ -509,7 +508,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
509 */ 508 */
510 509
511/* cpu_bit_bitmap[0] is empty - so we can back into it */ 510/* cpu_bit_bitmap[0] is empty - so we can back into it */
512#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x) 511#define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
513#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) 512#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
514#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) 513#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
515#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) 514#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e92e98189032..33eee16addb8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
1015 struct cpuset *cs; 1015 struct cpuset *cs;
1016 int migrate; 1016 int migrate;
1017 const nodemask_t *oldmem = scan->data; 1017 const nodemask_t *oldmem = scan->data;
1018 NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); 1018 static nodemask_t newmems; /* protected by cgroup_mutex */
1019
1020 if (!newmems)
1021 return;
1022 1019
1023 cs = cgroup_cs(scan->cg); 1020 cs = cgroup_cs(scan->cg);
1024 guarantee_online_mems(cs, newmems); 1021 guarantee_online_mems(cs, &newmems);
1025
1026 cpuset_change_task_nodemask(p, newmems);
1027 1022
1028 NODEMASK_FREE(newmems); 1023 cpuset_change_task_nodemask(p, &newmems);
1029 1024
1030 mm = get_task_mm(p); 1025 mm = get_task_mm(p);
1031 if (!mm) 1026 if (!mm)
@@ -1438,44 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1438 struct mm_struct *mm; 1433 struct mm_struct *mm;
1439 struct cpuset *cs = cgroup_cs(cont); 1434 struct cpuset *cs = cgroup_cs(cont);
1440 struct cpuset *oldcs = cgroup_cs(oldcont); 1435 struct cpuset *oldcs = cgroup_cs(oldcont);
1441 NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); 1436 static nodemask_t to; /* protected by cgroup_mutex */
1442 NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
1443
1444 if (from == NULL || to == NULL)
1445 goto alloc_fail;
1446 1437
1447 if (cs == &top_cpuset) { 1438 if (cs == &top_cpuset) {
1448 cpumask_copy(cpus_attach, cpu_possible_mask); 1439 cpumask_copy(cpus_attach, cpu_possible_mask);
1449 } else { 1440 } else {
1450 guarantee_online_cpus(cs, cpus_attach); 1441 guarantee_online_cpus(cs, cpus_attach);
1451 } 1442 }
1452 guarantee_online_mems(cs, to); 1443 guarantee_online_mems(cs, &to);
1453 1444
1454 /* do per-task migration stuff possibly for each in the threadgroup */ 1445 /* do per-task migration stuff possibly for each in the threadgroup */
1455 cpuset_attach_task(tsk, to, cs); 1446 cpuset_attach_task(tsk, &to, cs);
1456 if (threadgroup) { 1447 if (threadgroup) {
1457 struct task_struct *c; 1448 struct task_struct *c;
1458 rcu_read_lock(); 1449 rcu_read_lock();
1459 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { 1450 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1460 cpuset_attach_task(c, to, cs); 1451 cpuset_attach_task(c, &to, cs);
1461 } 1452 }
1462 rcu_read_unlock(); 1453 rcu_read_unlock();
1463 } 1454 }
1464 1455
1465 /* change mm; only needs to be done once even if threadgroup */ 1456 /* change mm; only needs to be done once even if threadgroup */
1466 *from = oldcs->mems_allowed; 1457 to = cs->mems_allowed;
1467 *to = cs->mems_allowed;
1468 mm = get_task_mm(tsk); 1458 mm = get_task_mm(tsk);
1469 if (mm) { 1459 if (mm) {
1470 mpol_rebind_mm(mm, to); 1460 mpol_rebind_mm(mm, &to);
1471 if (is_memory_migrate(cs)) 1461 if (is_memory_migrate(cs))
1472 cpuset_migrate_mm(mm, from, to); 1462 cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
1473 mmput(mm); 1463 mmput(mm);
1474 } 1464 }
1475
1476alloc_fail:
1477 NODEMASK_FREE(from);
1478 NODEMASK_FREE(to);
1479} 1465}
1480 1466
1481/* The various types of files and directories in a cpuset file system */ 1467/* The various types of files and directories in a cpuset file system */
@@ -1610,34 +1596,26 @@ out:
1610 * across a page fault. 1596 * across a page fault.
1611 */ 1597 */
1612 1598
1613static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) 1599static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1614{ 1600{
1615 int ret; 1601 size_t count;
1616 1602
1617 mutex_lock(&callback_mutex); 1603 mutex_lock(&callback_mutex);
1618 ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); 1604 count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
1619 mutex_unlock(&callback_mutex); 1605 mutex_unlock(&callback_mutex);
1620 1606
1621 return ret; 1607 return count;
1622} 1608}
1623 1609
1624static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1610static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1625{ 1611{
1626 NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); 1612 size_t count;
1627 int retval;
1628
1629 if (mask == NULL)
1630 return -ENOMEM;
1631 1613
1632 mutex_lock(&callback_mutex); 1614 mutex_lock(&callback_mutex);
1633 *mask = cs->mems_allowed; 1615 count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
1634 mutex_unlock(&callback_mutex); 1616 mutex_unlock(&callback_mutex);
1635 1617
1636 retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); 1618 return count;
1637
1638 NODEMASK_FREE(mask);
1639
1640 return retval;
1641} 1619}
1642 1620
1643static ssize_t cpuset_common_file_read(struct cgroup *cont, 1621static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1862,8 +1840,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1862 cs = cgroup_cs(cgroup); 1840 cs = cgroup_cs(cgroup);
1863 parent_cs = cgroup_cs(parent); 1841 parent_cs = cgroup_cs(parent);
1864 1842
1843 mutex_lock(&callback_mutex);
1865 cs->mems_allowed = parent_cs->mems_allowed; 1844 cs->mems_allowed = parent_cs->mems_allowed;
1866 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); 1845 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1846 mutex_unlock(&callback_mutex);
1867 return; 1847 return;
1868} 1848}
1869 1849
@@ -2066,10 +2046,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2066 struct cpuset *cp; /* scans cpusets being updated */ 2046 struct cpuset *cp; /* scans cpusets being updated */
2067 struct cpuset *child; /* scans child cpusets of cp */ 2047 struct cpuset *child; /* scans child cpusets of cp */
2068 struct cgroup *cont; 2048 struct cgroup *cont;
2069 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2049 static nodemask_t oldmems; /* protected by cgroup_mutex */
2070
2071 if (oldmems == NULL)
2072 return;
2073 2050
2074 list_add_tail((struct list_head *)&root->stack_list, &queue); 2051 list_add_tail((struct list_head *)&root->stack_list, &queue);
2075 2052
@@ -2086,7 +2063,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2086 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2063 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2087 continue; 2064 continue;
2088 2065
2089 *oldmems = cp->mems_allowed; 2066 oldmems = cp->mems_allowed;
2090 2067
2091 /* Remove offline cpus and mems from this cpuset. */ 2068 /* Remove offline cpus and mems from this cpuset. */
2092 mutex_lock(&callback_mutex); 2069 mutex_lock(&callback_mutex);
@@ -2102,10 +2079,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2102 remove_tasks_in_empty_cpuset(cp); 2079 remove_tasks_in_empty_cpuset(cp);
2103 else { 2080 else {
2104 update_tasks_cpumask(cp, NULL); 2081 update_tasks_cpumask(cp, NULL);
2105 update_tasks_nodemask(cp, oldmems, NULL); 2082 update_tasks_nodemask(cp, &oldmems, NULL);
2106 } 2083 }
2107 } 2084 }
2108 NODEMASK_FREE(oldmems);
2109} 2085}
2110 2086
2111/* 2087/*
@@ -2147,19 +2123,16 @@ void cpuset_update_active_cpus(void)
2147static int cpuset_track_online_nodes(struct notifier_block *self, 2123static int cpuset_track_online_nodes(struct notifier_block *self,
2148 unsigned long action, void *arg) 2124 unsigned long action, void *arg)
2149{ 2125{
2150 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2126 static nodemask_t oldmems; /* protected by cgroup_mutex */
2151
2152 if (oldmems == NULL)
2153 return NOTIFY_DONE;
2154 2127
2155 cgroup_lock(); 2128 cgroup_lock();
2156 switch (action) { 2129 switch (action) {
2157 case MEM_ONLINE: 2130 case MEM_ONLINE:
2158 *oldmems = top_cpuset.mems_allowed; 2131 oldmems = top_cpuset.mems_allowed;
2159 mutex_lock(&callback_mutex); 2132 mutex_lock(&callback_mutex);
2160 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2133 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2161 mutex_unlock(&callback_mutex); 2134 mutex_unlock(&callback_mutex);
2162 update_tasks_nodemask(&top_cpuset, oldmems, NULL); 2135 update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
2163 break; 2136 break;
2164 case MEM_OFFLINE: 2137 case MEM_OFFLINE:
2165 /* 2138 /*
@@ -2173,7 +2146,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2173 } 2146 }
2174 cgroup_unlock(); 2147 cgroup_unlock();
2175 2148
2176 NODEMASK_FREE(oldmems);
2177 return NOTIFY_OK; 2149 return NOTIFY_OK;
2178} 2150}
2179#endif 2151#endif
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
new file mode 100644
index 000000000000..5f85690285d4
--- /dev/null
+++ b/kernel/crash_dump.c
@@ -0,0 +1,34 @@
1#include <linux/kernel.h>
2#include <linux/crash_dump.h>
3#include <linux/init.h>
4#include <linux/errno.h>
5#include <linux/module.h>
6
7/*
8 * If we have booted due to a crash, max_pfn will be a very low value. We need
9 * to know the amount of memory that the previous kernel used.
10 */
11unsigned long saved_max_pfn;
12
13/*
14 * stores the physical address of elf header of crash image
15 *
16 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
17 * is_kdump_kernel() to determine if we are booting after a panic. Hence put
18 * it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
19 */
20unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
21
22/*
23 * elfcorehdr= specifies the location of elf core header stored by the crashed
24 * kernel. This option will be passed by kexec loader to the capture kernel.
25 */
26static int __init setup_elfcorehdr(char *arg)
27{
28 char *end;
29 if (!arg)
30 return -EINVAL;
31 elfcorehdr_addr = memparse(arg, &end);
32 return end > arg ? 0 : -EINVAL;
33}
34early_param("elfcorehdr", setup_elfcorehdr);
diff --git a/kernel/cred.c b/kernel/cred.c
index 2343c132c5a7..5557b55048df 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -741,6 +741,12 @@ int set_create_files_as(struct cred *new, struct inode *inode)
741} 741}
742EXPORT_SYMBOL(set_create_files_as); 742EXPORT_SYMBOL(set_create_files_as);
743 743
744struct user_namespace *current_user_ns(void)
745{
746 return _current_user_ns();
747}
748EXPORT_SYMBOL(current_user_ns);
749
744#ifdef CONFIG_DEBUG_CREDENTIALS 750#ifdef CONFIG_DEBUG_CREDENTIALS
745 751
746bool creds_are_invalid(const struct cred *cred) 752bool creds_are_invalid(const struct cred *cred)
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 481a7bd2dfe7..a11db956dd62 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
1093 put_packet(remcom_out_buffer); 1093 put_packet(remcom_out_buffer);
1094 return 0; 1094 return 0;
1095} 1095}
1096
1097/**
1098 * gdbstub_exit - Send an exit message to GDB
1099 * @status: The exit code to report.
1100 */
1101void gdbstub_exit(int status)
1102{
1103 unsigned char checksum, ch, buffer[3];
1104 int loop;
1105
1106 buffer[0] = 'W';
1107 buffer[1] = hex_asc_hi(status);
1108 buffer[2] = hex_asc_lo(status);
1109
1110 dbg_io_ops->write_char('$');
1111 checksum = 0;
1112
1113 for (loop = 0; loop < 3; loop++) {
1114 ch = buffer[loop];
1115 checksum += ch;
1116 dbg_io_ops->write_char(ch);
1117 }
1118
1119 dbg_io_ops->write_char('#');
1120 dbg_io_ops->write_char(hex_asc_hi(checksum));
1121 dbg_io_ops->write_char(hex_asc_lo(checksum));
1122
1123 /* make sure the output is flushed, lest the bootloader clobber it */
1124 dbg_io_ops->flush();
1125}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index bd3e8e29caa3..6bc6e3bc4f9c 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -78,7 +78,7 @@ static unsigned int kdb_continue_catastrophic;
78static kdbtab_t *kdb_commands; 78static kdbtab_t *kdb_commands;
79#define KDB_BASE_CMD_MAX 50 79#define KDB_BASE_CMD_MAX 50
80static int kdb_max_commands = KDB_BASE_CMD_MAX; 80static int kdb_max_commands = KDB_BASE_CMD_MAX;
81static kdbtab_t kdb_base_commands[50]; 81static kdbtab_t kdb_base_commands[KDB_BASE_CMD_MAX];
82#define for_each_kdbcmd(cmd, num) \ 82#define for_each_kdbcmd(cmd, num) \
83 for ((cmd) = kdb_base_commands, (num) = 0; \ 83 for ((cmd) = kdb_base_commands, (num) = 0; \
84 num < kdb_max_commands; \ 84 num < kdb_max_commands; \
@@ -2892,7 +2892,7 @@ static void __init kdb_inittab(void)
2892 "Send a signal to a process", 0, KDB_REPEAT_NONE); 2892 "Send a signal to a process", 0, KDB_REPEAT_NONE);
2893 kdb_register_repeat("summary", kdb_summary, "", 2893 kdb_register_repeat("summary", kdb_summary, "",
2894 "Summarize the system", 4, KDB_REPEAT_NONE); 2894 "Summarize the system", 4, KDB_REPEAT_NONE);
2895 kdb_register_repeat("per_cpu", kdb_per_cpu, "", 2895 kdb_register_repeat("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
2896 "Display per_cpu variables", 3, KDB_REPEAT_NONE); 2896 "Display per_cpu variables", 3, KDB_REPEAT_NONE);
2897 kdb_register_repeat("grephelp", kdb_grep_help, "", 2897 kdb_register_repeat("grephelp", kdb_grep_help, "",
2898 "Display help on | grep", 0, KDB_REPEAT_NONE); 2898 "Display help on | grep", 0, KDB_REPEAT_NONE);
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b1..6a488ad2dce5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
908 profile_task_exit(tsk); 908 profile_task_exit(tsk);
909 909
910 WARN_ON(atomic_read(&tsk->fs_excl)); 910 WARN_ON(atomic_read(&tsk->fs_excl));
911 WARN_ON(blk_needs_flush_plug(tsk));
911 912
912 if (unlikely(in_interrupt())) 913 if (unlikely(in_interrupt()))
913 panic("Aiee, killing interrupt handler!"); 914 panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index 05b92c457010..e7548dee636b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
40#include <linux/tracehook.h> 40#include <linux/tracehook.h>
41#include <linux/futex.h> 41#include <linux/futex.h>
42#include <linux/compat.h> 42#include <linux/compat.h>
43#include <linux/kthread.h>
43#include <linux/task_io_accounting_ops.h> 44#include <linux/task_io_accounting_ops.h>
44#include <linux/rcupdate.h> 45#include <linux/rcupdate.h>
45#include <linux/ptrace.h> 46#include <linux/ptrace.h>
@@ -109,20 +110,25 @@ int nr_processes(void)
109} 110}
110 111
111#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 112#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
112# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) 113# define alloc_task_struct_node(node) \
113# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) 114 kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
115# define free_task_struct(tsk) \
116 kmem_cache_free(task_struct_cachep, (tsk))
114static struct kmem_cache *task_struct_cachep; 117static struct kmem_cache *task_struct_cachep;
115#endif 118#endif
116 119
117#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR 120#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
118static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) 121static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
122 int node)
119{ 123{
120#ifdef CONFIG_DEBUG_STACK_USAGE 124#ifdef CONFIG_DEBUG_STACK_USAGE
121 gfp_t mask = GFP_KERNEL | __GFP_ZERO; 125 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
122#else 126#else
123 gfp_t mask = GFP_KERNEL; 127 gfp_t mask = GFP_KERNEL;
124#endif 128#endif
125 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); 129 struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
130
131 return page ? page_address(page) : NULL;
126} 132}
127 133
128static inline void free_thread_info(struct thread_info *ti) 134static inline void free_thread_info(struct thread_info *ti)
@@ -249,16 +255,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
249 struct task_struct *tsk; 255 struct task_struct *tsk;
250 struct thread_info *ti; 256 struct thread_info *ti;
251 unsigned long *stackend; 257 unsigned long *stackend;
252 258 int node = tsk_fork_get_node(orig);
253 int err; 259 int err;
254 260
255 prepare_to_copy(orig); 261 prepare_to_copy(orig);
256 262
257 tsk = alloc_task_struct(); 263 tsk = alloc_task_struct_node(node);
258 if (!tsk) 264 if (!tsk)
259 return NULL; 265 return NULL;
260 266
261 ti = alloc_thread_info(tsk); 267 ti = alloc_thread_info_node(tsk, node);
262 if (!ti) { 268 if (!ti) {
263 free_task_struct(tsk); 269 free_task_struct(tsk);
264 return NULL; 270 return NULL;
@@ -1181,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1181 pid = alloc_pid(p->nsproxy->pid_ns); 1187 pid = alloc_pid(p->nsproxy->pid_ns);
1182 if (!pid) 1188 if (!pid)
1183 goto bad_fork_cleanup_io; 1189 goto bad_fork_cleanup_io;
1184
1185 if (clone_flags & CLONE_NEWPID) {
1186 retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
1187 if (retval < 0)
1188 goto bad_fork_free_pid;
1189 }
1190 } 1190 }
1191 1191
1192 p->pid = pid_nr(pid); 1192 p->pid = pid_nr(pid);
@@ -1205,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1205 * Clear TID on mm_release()? 1205 * Clear TID on mm_release()?
1206 */ 1206 */
1207 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1207 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1208#ifdef CONFIG_BLOCK
1209 p->plug = NULL;
1210#endif
1208#ifdef CONFIG_FUTEX 1211#ifdef CONFIG_FUTEX
1209 p->robust_list = NULL; 1212 p->robust_list = NULL;
1210#ifdef CONFIG_COMPAT 1213#ifdef CONFIG_COMPAT
@@ -1290,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1290 tracehook_finish_clone(p, clone_flags, trace); 1293 tracehook_finish_clone(p, clone_flags, trace);
1291 1294
1292 if (thread_group_leader(p)) { 1295 if (thread_group_leader(p)) {
1293 if (clone_flags & CLONE_NEWPID) 1296 if (is_child_reaper(pid))
1294 p->nsproxy->pid_ns->child_reaper = p; 1297 p->nsproxy->pid_ns->child_reaper = p;
1295 1298
1296 p->signal->leader_pid = pid; 1299 p->signal->leader_pid = pid;
@@ -1513,38 +1516,24 @@ void __init proc_caches_init(void)
1513} 1516}
1514 1517
1515/* 1518/*
1516 * Check constraints on flags passed to the unshare system call and 1519 * Check constraints on flags passed to the unshare system call.
1517 * force unsharing of additional process context as appropriate.
1518 */ 1520 */
1519static void check_unshare_flags(unsigned long *flags_ptr) 1521static int check_unshare_flags(unsigned long unshare_flags)
1520{ 1522{
1523 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1524 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1525 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1526 return -EINVAL;
1521 /* 1527 /*
1522 * If unsharing a thread from a thread group, must also 1528 * Not implemented, but pretend it works if there is nothing to
1523 * unshare vm. 1529 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
1524 */ 1530 * needs to unshare vm.
1525 if (*flags_ptr & CLONE_THREAD)
1526 *flags_ptr |= CLONE_VM;
1527
1528 /*
1529 * If unsharing vm, must also unshare signal handlers.
1530 */
1531 if (*flags_ptr & CLONE_VM)
1532 *flags_ptr |= CLONE_SIGHAND;
1533
1534 /*
1535 * If unsharing namespace, must also unshare filesystem information.
1536 */ 1531 */
1537 if (*flags_ptr & CLONE_NEWNS) 1532 if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
1538 *flags_ptr |= CLONE_FS; 1533 /* FIXME: get_task_mm() increments ->mm_users */
1539} 1534 if (atomic_read(&current->mm->mm_users) > 1)
1540 1535 return -EINVAL;
1541/* 1536 }
1542 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1543 */
1544static int unshare_thread(unsigned long unshare_flags)
1545{
1546 if (unshare_flags & CLONE_THREAD)
1547 return -EINVAL;
1548 1537
1549 return 0; 1538 return 0;
1550} 1539}
@@ -1571,34 +1560,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1571} 1560}
1572 1561
1573/* 1562/*
1574 * Unsharing of sighand is not supported yet
1575 */
1576static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1577{
1578 struct sighand_struct *sigh = current->sighand;
1579
1580 if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
1581 return -EINVAL;
1582 else
1583 return 0;
1584}
1585
1586/*
1587 * Unshare vm if it is being shared
1588 */
1589static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1590{
1591 struct mm_struct *mm = current->mm;
1592
1593 if ((unshare_flags & CLONE_VM) &&
1594 (mm && atomic_read(&mm->mm_users) > 1)) {
1595 return -EINVAL;
1596 }
1597
1598 return 0;
1599}
1600
1601/*
1602 * Unshare file descriptor table if it is being shared 1563 * Unshare file descriptor table if it is being shared
1603 */ 1564 */
1604static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) 1565static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
@@ -1626,45 +1587,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
1626 */ 1587 */
1627SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) 1588SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1628{ 1589{
1629 int err = 0;
1630 struct fs_struct *fs, *new_fs = NULL; 1590 struct fs_struct *fs, *new_fs = NULL;
1631 struct sighand_struct *new_sigh = NULL;
1632 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1633 struct files_struct *fd, *new_fd = NULL; 1591 struct files_struct *fd, *new_fd = NULL;
1634 struct nsproxy *new_nsproxy = NULL; 1592 struct nsproxy *new_nsproxy = NULL;
1635 int do_sysvsem = 0; 1593 int do_sysvsem = 0;
1594 int err;
1636 1595
1637 check_unshare_flags(&unshare_flags); 1596 err = check_unshare_flags(unshare_flags);
1638 1597 if (err)
1639 /* Return -EINVAL for all unsupported flags */
1640 err = -EINVAL;
1641 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1642 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1643 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1644 goto bad_unshare_out; 1598 goto bad_unshare_out;
1645 1599
1646 /* 1600 /*
1601 * If unsharing namespace, must also unshare filesystem information.
1602 */
1603 if (unshare_flags & CLONE_NEWNS)
1604 unshare_flags |= CLONE_FS;
1605 /*
1647 * CLONE_NEWIPC must also detach from the undolist: after switching 1606 * CLONE_NEWIPC must also detach from the undolist: after switching
1648 * to a new ipc namespace, the semaphore arrays from the old 1607 * to a new ipc namespace, the semaphore arrays from the old
1649 * namespace are unreachable. 1608 * namespace are unreachable.
1650 */ 1609 */
1651 if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) 1610 if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
1652 do_sysvsem = 1; 1611 do_sysvsem = 1;
1653 if ((err = unshare_thread(unshare_flags)))
1654 goto bad_unshare_out;
1655 if ((err = unshare_fs(unshare_flags, &new_fs))) 1612 if ((err = unshare_fs(unshare_flags, &new_fs)))
1656 goto bad_unshare_cleanup_thread; 1613 goto bad_unshare_out;
1657 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1658 goto bad_unshare_cleanup_fs;
1659 if ((err = unshare_vm(unshare_flags, &new_mm)))
1660 goto bad_unshare_cleanup_sigh;
1661 if ((err = unshare_fd(unshare_flags, &new_fd))) 1614 if ((err = unshare_fd(unshare_flags, &new_fd)))
1662 goto bad_unshare_cleanup_vm; 1615 goto bad_unshare_cleanup_fs;
1663 if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, 1616 if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1664 new_fs))) 1617 new_fs)))
1665 goto bad_unshare_cleanup_fd; 1618 goto bad_unshare_cleanup_fd;
1666 1619
1667 if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { 1620 if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
1668 if (do_sysvsem) { 1621 if (do_sysvsem) {
1669 /* 1622 /*
1670 * CLONE_SYSVSEM is equivalent to sys_exit(). 1623 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1690,19 +1643,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1690 spin_unlock(&fs->lock); 1643 spin_unlock(&fs->lock);
1691 } 1644 }
1692 1645
1693 if (new_mm) {
1694 mm = current->mm;
1695 active_mm = current->active_mm;
1696 current->mm = new_mm;
1697 current->active_mm = new_mm;
1698 if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
1699 atomic_dec(&mm->oom_disable_count);
1700 atomic_inc(&new_mm->oom_disable_count);
1701 }
1702 activate_mm(active_mm, new_mm);
1703 new_mm = mm;
1704 }
1705
1706 if (new_fd) { 1646 if (new_fd) {
1707 fd = current->files; 1647 fd = current->files;
1708 current->files = new_fd; 1648 current->files = new_fd;
@@ -1719,20 +1659,10 @@ bad_unshare_cleanup_fd:
1719 if (new_fd) 1659 if (new_fd)
1720 put_files_struct(new_fd); 1660 put_files_struct(new_fd);
1721 1661
1722bad_unshare_cleanup_vm:
1723 if (new_mm)
1724 mmput(new_mm);
1725
1726bad_unshare_cleanup_sigh:
1727 if (new_sigh)
1728 if (atomic_dec_and_test(&new_sigh->count))
1729 kmem_cache_free(sighand_cachep, new_sigh);
1730
1731bad_unshare_cleanup_fs: 1662bad_unshare_cleanup_fs:
1732 if (new_fs) 1663 if (new_fs)
1733 free_fs_struct(new_fs); 1664 free_fs_struct(new_fs);
1734 1665
1735bad_unshare_cleanup_thread:
1736bad_unshare_out: 1666bad_unshare_out:
1737 return err; 1667 return err;
1738} 1668}
diff --git a/kernel/futex.c b/kernel/futex.c
index bda415715382..dfb924ffe65b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -782,8 +782,8 @@ static void __unqueue_futex(struct futex_q *q)
782{ 782{
783 struct futex_hash_bucket *hb; 783 struct futex_hash_bucket *hb;
784 784
785 if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr) 785 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
786 || plist_node_empty(&q->list))) 786 || WARN_ON(plist_node_empty(&q->list)))
787 return; 787 return;
788 788
789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); 789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
@@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2418 goto err_unlock; 2418 goto err_unlock;
2419 ret = -EPERM; 2419 ret = -EPERM;
2420 pcred = __task_cred(p); 2420 pcred = __task_cred(p);
2421 /* If victim is in different user_ns, then uids are not
2422 comparable, so we must have CAP_SYS_PTRACE */
2423 if (cred->user->user_ns != pcred->user->user_ns) {
2424 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2425 goto err_unlock;
2426 goto ok;
2427 }
2428 /* If victim is in same user_ns, then uids are comparable */
2421 if (cred->euid != pcred->euid && 2429 if (cred->euid != pcred->euid &&
2422 cred->euid != pcred->uid && 2430 cred->euid != pcred->uid &&
2423 !capable(CAP_SYS_PTRACE)) 2431 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2424 goto err_unlock; 2432 goto err_unlock;
2433ok:
2425 head = p->robust_list; 2434 head = p->robust_list;
2426 rcu_read_unlock(); 2435 rcu_read_unlock();
2427 } 2436 }
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index a7934ac75e5b..5f9e689dc8f0 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
153 goto err_unlock; 153 goto err_unlock;
154 ret = -EPERM; 154 ret = -EPERM;
155 pcred = __task_cred(p); 155 pcred = __task_cred(p);
156 /* If victim is in different user_ns, then uids are not
157 comparable, so we must have CAP_SYS_PTRACE */
158 if (cred->user->user_ns != pcred->user->user_ns) {
159 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
160 goto err_unlock;
161 goto ok;
162 }
163 /* If victim is in same user_ns, then uids are comparable */
156 if (cred->euid != pcred->euid && 164 if (cred->euid != pcred->euid &&
157 cred->euid != pcred->uid && 165 cred->euid != pcred->uid &&
158 !capable(CAP_SYS_PTRACE)) 166 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
159 goto err_unlock; 167 goto err_unlock;
168ok:
160 head = p->compat_robust_list; 169 head = p->compat_robust_list;
161 rcu_read_unlock(); 170 rcu_read_unlock();
162 } 171 }
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 3f761001d517..e97ca59e2520 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -1,3 +1,3 @@
1EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' 1ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
2 2
3obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o 3obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o
diff --git a/kernel/groups.c b/kernel/groups.c
index 253dc0f35cf4..1cc476d52dd3 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
233 struct group_info *group_info; 233 struct group_info *group_info;
234 int retval; 234 int retval;
235 235
236 if (!capable(CAP_SETGID)) 236 if (!nsown_capable(CAP_SETGID))
237 return -EPERM; 237 return -EPERM;
238 if ((unsigned)gidsetsize > NGROUPS_MAX) 238 if ((unsigned)gidsetsize > NGROUPS_MAX)
239 return -EINVAL; 239 return -EINVAL;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 09bef82d74cb..a69c333f78e4 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -10,10 +10,6 @@ menu "IRQ subsystem"
10config GENERIC_HARDIRQS 10config GENERIC_HARDIRQS
11 def_bool y 11 def_bool y
12 12
13# Select this to disable the deprecated stuff
14config GENERIC_HARDIRQS_NO_DEPRECATED
15 bool
16
17config GENERIC_HARDIRQS_NO_COMPAT 13config GENERIC_HARDIRQS_NO_COMPAT
18 bool 14 bool
19 15
@@ -31,6 +27,10 @@ config GENERIC_IRQ_PROBE
31config GENERIC_IRQ_SHOW 27config GENERIC_IRQ_SHOW
32 bool 28 bool
33 29
30# Print level/edge extra information
31config GENERIC_IRQ_SHOW_LEVEL
32 bool
33
34# Support for delayed migration from interrupt context 34# Support for delayed migration from interrupt context
35config GENERIC_PENDING_IRQ 35config GENERIC_PENDING_IRQ
36 bool 36 bool
@@ -47,6 +47,10 @@ config HARDIRQS_SW_RESEND
47config IRQ_PREFLOW_FASTEOI 47config IRQ_PREFLOW_FASTEOI
48 bool 48 bool
49 49
50# Edge style eoi based handler (cell)
51config IRQ_EDGE_EOI_HANDLER
52 bool
53
50# Support forced irq threading 54# Support forced irq threading
51config IRQ_FORCED_THREADING 55config IRQ_FORCED_THREADING
52 bool 56 bool
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 394784c57060..342d8f44e401 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -70,10 +70,8 @@ unsigned long probe_irq_on(void)
70 raw_spin_lock_irq(&desc->lock); 70 raw_spin_lock_irq(&desc->lock);
71 if (!desc->action && irq_settings_can_probe(desc)) { 71 if (!desc->action && irq_settings_can_probe(desc)) {
72 desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; 72 desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
73 if (irq_startup(desc)) { 73 if (irq_startup(desc))
74 irq_compat_set_pending(desc);
75 desc->istate |= IRQS_PENDING; 74 desc->istate |= IRQS_PENDING;
76 }
77 } 75 }
78 raw_spin_unlock_irq(&desc->lock); 76 raw_spin_unlock_irq(&desc->lock);
79 } 77 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c9c0601f0615..616ec1c6b06f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,9 +34,14 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip)
34 if (!chip) 34 if (!chip)
35 chip = &no_irq_chip; 35 chip = &no_irq_chip;
36 36
37 irq_chip_set_defaults(chip);
38 desc->irq_data.chip = chip; 37 desc->irq_data.chip = chip;
39 irq_put_desc_unlock(desc, flags); 38 irq_put_desc_unlock(desc, flags);
39 /*
40 * For !CONFIG_SPARSE_IRQ make the irq show up in
41 * allocated_irqs. For the CONFIG_SPARSE_IRQ case, it is
42 * already marked, and this call is harmless.
43 */
44 irq_reserve_irq(irq);
40 return 0; 45 return 0;
41} 46}
42EXPORT_SYMBOL(irq_set_chip); 47EXPORT_SYMBOL(irq_set_chip);
@@ -134,26 +139,22 @@ EXPORT_SYMBOL_GPL(irq_get_irq_data);
134 139
135static void irq_state_clr_disabled(struct irq_desc *desc) 140static void irq_state_clr_disabled(struct irq_desc *desc)
136{ 141{
137 desc->istate &= ~IRQS_DISABLED; 142 irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
138 irq_compat_clr_disabled(desc);
139} 143}
140 144
141static void irq_state_set_disabled(struct irq_desc *desc) 145static void irq_state_set_disabled(struct irq_desc *desc)
142{ 146{
143 desc->istate |= IRQS_DISABLED; 147 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
144 irq_compat_set_disabled(desc);
145} 148}
146 149
147static void irq_state_clr_masked(struct irq_desc *desc) 150static void irq_state_clr_masked(struct irq_desc *desc)
148{ 151{
149 desc->istate &= ~IRQS_MASKED; 152 irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
150 irq_compat_clr_masked(desc);
151} 153}
152 154
153static void irq_state_set_masked(struct irq_desc *desc) 155static void irq_state_set_masked(struct irq_desc *desc)
154{ 156{
155 desc->istate |= IRQS_MASKED; 157 irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
156 irq_compat_set_masked(desc);
157} 158}
158 159
159int irq_startup(struct irq_desc *desc) 160int irq_startup(struct irq_desc *desc)
@@ -203,126 +204,6 @@ void irq_disable(struct irq_desc *desc)
203 } 204 }
204} 205}
205 206
206#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
207/* Temporary migration helpers */
208static void compat_irq_mask(struct irq_data *data)
209{
210 data->chip->mask(data->irq);
211}
212
213static void compat_irq_unmask(struct irq_data *data)
214{
215 data->chip->unmask(data->irq);
216}
217
218static void compat_irq_ack(struct irq_data *data)
219{
220 data->chip->ack(data->irq);
221}
222
223static void compat_irq_mask_ack(struct irq_data *data)
224{
225 data->chip->mask_ack(data->irq);
226}
227
228static void compat_irq_eoi(struct irq_data *data)
229{
230 data->chip->eoi(data->irq);
231}
232
233static void compat_irq_enable(struct irq_data *data)
234{
235 data->chip->enable(data->irq);
236}
237
238static void compat_irq_disable(struct irq_data *data)
239{
240 data->chip->disable(data->irq);
241}
242
243static void compat_irq_shutdown(struct irq_data *data)
244{
245 data->chip->shutdown(data->irq);
246}
247
248static unsigned int compat_irq_startup(struct irq_data *data)
249{
250 return data->chip->startup(data->irq);
251}
252
253static int compat_irq_set_affinity(struct irq_data *data,
254 const struct cpumask *dest, bool force)
255{
256 return data->chip->set_affinity(data->irq, dest);
257}
258
259static int compat_irq_set_type(struct irq_data *data, unsigned int type)
260{
261 return data->chip->set_type(data->irq, type);
262}
263
264static int compat_irq_set_wake(struct irq_data *data, unsigned int on)
265{
266 return data->chip->set_wake(data->irq, on);
267}
268
269static int compat_irq_retrigger(struct irq_data *data)
270{
271 return data->chip->retrigger(data->irq);
272}
273
274static void compat_bus_lock(struct irq_data *data)
275{
276 data->chip->bus_lock(data->irq);
277}
278
279static void compat_bus_sync_unlock(struct irq_data *data)
280{
281 data->chip->bus_sync_unlock(data->irq);
282}
283#endif
284
285/*
286 * Fixup enable/disable function pointers
287 */
288void irq_chip_set_defaults(struct irq_chip *chip)
289{
290#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
291 if (chip->enable)
292 chip->irq_enable = compat_irq_enable;
293 if (chip->disable)
294 chip->irq_disable = compat_irq_disable;
295 if (chip->shutdown)
296 chip->irq_shutdown = compat_irq_shutdown;
297 if (chip->startup)
298 chip->irq_startup = compat_irq_startup;
299 if (!chip->end)
300 chip->end = dummy_irq_chip.end;
301 if (chip->bus_lock)
302 chip->irq_bus_lock = compat_bus_lock;
303 if (chip->bus_sync_unlock)
304 chip->irq_bus_sync_unlock = compat_bus_sync_unlock;
305 if (chip->mask)
306 chip->irq_mask = compat_irq_mask;
307 if (chip->unmask)
308 chip->irq_unmask = compat_irq_unmask;
309 if (chip->ack)
310 chip->irq_ack = compat_irq_ack;
311 if (chip->mask_ack)
312 chip->irq_mask_ack = compat_irq_mask_ack;
313 if (chip->eoi)
314 chip->irq_eoi = compat_irq_eoi;
315 if (chip->set_affinity)
316 chip->irq_set_affinity = compat_irq_set_affinity;
317 if (chip->set_type)
318 chip->irq_set_type = compat_irq_set_type;
319 if (chip->set_wake)
320 chip->irq_set_wake = compat_irq_set_wake;
321 if (chip->retrigger)
322 chip->irq_retrigger = compat_irq_retrigger;
323#endif
324}
325
326static inline void mask_ack_irq(struct irq_desc *desc) 207static inline void mask_ack_irq(struct irq_desc *desc)
327{ 208{
328 if (desc->irq_data.chip->irq_mask_ack) 209 if (desc->irq_data.chip->irq_mask_ack)
@@ -372,11 +253,10 @@ void handle_nested_irq(unsigned int irq)
372 kstat_incr_irqs_this_cpu(irq, desc); 253 kstat_incr_irqs_this_cpu(irq, desc);
373 254
374 action = desc->action; 255 action = desc->action;
375 if (unlikely(!action || (desc->istate & IRQS_DISABLED))) 256 if (unlikely(!action || irqd_irq_disabled(&desc->irq_data)))
376 goto out_unlock; 257 goto out_unlock;
377 258
378 irq_compat_set_progress(desc); 259 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
379 desc->istate |= IRQS_INPROGRESS;
380 raw_spin_unlock_irq(&desc->lock); 260 raw_spin_unlock_irq(&desc->lock);
381 261
382 action_ret = action->thread_fn(action->irq, action->dev_id); 262 action_ret = action->thread_fn(action->irq, action->dev_id);
@@ -384,8 +264,7 @@ void handle_nested_irq(unsigned int irq)
384 note_interrupt(irq, desc, action_ret); 264 note_interrupt(irq, desc, action_ret);
385 265
386 raw_spin_lock_irq(&desc->lock); 266 raw_spin_lock_irq(&desc->lock);
387 desc->istate &= ~IRQS_INPROGRESS; 267 irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
388 irq_compat_clr_progress(desc);
389 268
390out_unlock: 269out_unlock:
391 raw_spin_unlock_irq(&desc->lock); 270 raw_spin_unlock_irq(&desc->lock);
@@ -416,14 +295,14 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
416{ 295{
417 raw_spin_lock(&desc->lock); 296 raw_spin_lock(&desc->lock);
418 297
419 if (unlikely(desc->istate & IRQS_INPROGRESS)) 298 if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
420 if (!irq_check_poll(desc)) 299 if (!irq_check_poll(desc))
421 goto out_unlock; 300 goto out_unlock;
422 301
423 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); 302 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
424 kstat_incr_irqs_this_cpu(irq, desc); 303 kstat_incr_irqs_this_cpu(irq, desc);
425 304
426 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) 305 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data)))
427 goto out_unlock; 306 goto out_unlock;
428 307
429 handle_irq_event(desc); 308 handle_irq_event(desc);
@@ -448,7 +327,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
448 raw_spin_lock(&desc->lock); 327 raw_spin_lock(&desc->lock);
449 mask_ack_irq(desc); 328 mask_ack_irq(desc);
450 329
451 if (unlikely(desc->istate & IRQS_INPROGRESS)) 330 if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
452 if (!irq_check_poll(desc)) 331 if (!irq_check_poll(desc))
453 goto out_unlock; 332 goto out_unlock;
454 333
@@ -459,12 +338,12 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
459 * If its disabled or no action available 338 * If its disabled or no action available
460 * keep it masked and get out of here 339 * keep it masked and get out of here
461 */ 340 */
462 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) 341 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data)))
463 goto out_unlock; 342 goto out_unlock;
464 343
465 handle_irq_event(desc); 344 handle_irq_event(desc);
466 345
467 if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT))) 346 if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT))
468 unmask_irq(desc); 347 unmask_irq(desc);
469out_unlock: 348out_unlock:
470 raw_spin_unlock(&desc->lock); 349 raw_spin_unlock(&desc->lock);
@@ -496,7 +375,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
496{ 375{
497 raw_spin_lock(&desc->lock); 376 raw_spin_lock(&desc->lock);
498 377
499 if (unlikely(desc->istate & IRQS_INPROGRESS)) 378 if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
500 if (!irq_check_poll(desc)) 379 if (!irq_check_poll(desc))
501 goto out; 380 goto out;
502 381
@@ -507,8 +386,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
507 * If its disabled or no action available 386 * If its disabled or no action available
508 * then mask it and get out of here: 387 * then mask it and get out of here:
509 */ 388 */
510 if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) { 389 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
511 irq_compat_set_pending(desc);
512 desc->istate |= IRQS_PENDING; 390 desc->istate |= IRQS_PENDING;
513 mask_irq(desc); 391 mask_irq(desc);
514 goto out; 392 goto out;
@@ -558,10 +436,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
558 * we shouldn't process the IRQ. Mark it pending, handle 436 * we shouldn't process the IRQ. Mark it pending, handle
559 * the necessary masking and go out 437 * the necessary masking and go out
560 */ 438 */
561 if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) || 439 if (unlikely(irqd_irq_disabled(&desc->irq_data) ||
562 !desc->action))) { 440 irqd_irq_inprogress(&desc->irq_data) || !desc->action)) {
563 if (!irq_check_poll(desc)) { 441 if (!irq_check_poll(desc)) {
564 irq_compat_set_pending(desc);
565 desc->istate |= IRQS_PENDING; 442 desc->istate |= IRQS_PENDING;
566 mask_ack_irq(desc); 443 mask_ack_irq(desc);
567 goto out_unlock; 444 goto out_unlock;
@@ -584,20 +461,65 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
584 * Renable it, if it was not disabled in meantime. 461 * Renable it, if it was not disabled in meantime.
585 */ 462 */
586 if (unlikely(desc->istate & IRQS_PENDING)) { 463 if (unlikely(desc->istate & IRQS_PENDING)) {
587 if (!(desc->istate & IRQS_DISABLED) && 464 if (!irqd_irq_disabled(&desc->irq_data) &&
588 (desc->istate & IRQS_MASKED)) 465 irqd_irq_masked(&desc->irq_data))
589 unmask_irq(desc); 466 unmask_irq(desc);
590 } 467 }
591 468
592 handle_irq_event(desc); 469 handle_irq_event(desc);
593 470
594 } while ((desc->istate & IRQS_PENDING) && 471 } while ((desc->istate & IRQS_PENDING) &&
595 !(desc->istate & IRQS_DISABLED)); 472 !irqd_irq_disabled(&desc->irq_data));
596 473
597out_unlock: 474out_unlock:
598 raw_spin_unlock(&desc->lock); 475 raw_spin_unlock(&desc->lock);
599} 476}
600 477
478#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
479/**
480 * handle_edge_eoi_irq - edge eoi type IRQ handler
481 * @irq: the interrupt number
482 * @desc: the interrupt description structure for this irq
483 *
484 * Similar as the above handle_edge_irq, but using eoi and w/o the
485 * mask/unmask logic.
486 */
487void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc)
488{
489 struct irq_chip *chip = irq_desc_get_chip(desc);
490
491 raw_spin_lock(&desc->lock);
492
493 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
494 /*
495 * If we're currently running this IRQ, or its disabled,
496 * we shouldn't process the IRQ. Mark it pending, handle
497 * the necessary masking and go out
498 */
499 if (unlikely(irqd_irq_disabled(&desc->irq_data) ||
500 irqd_irq_inprogress(&desc->irq_data) || !desc->action)) {
501 if (!irq_check_poll(desc)) {
502 desc->istate |= IRQS_PENDING;
503 goto out_eoi;
504 }
505 }
506 kstat_incr_irqs_this_cpu(irq, desc);
507
508 do {
509 if (unlikely(!desc->action))
510 goto out_eoi;
511
512 handle_irq_event(desc);
513
514 } while ((desc->istate & IRQS_PENDING) &&
515 !irqd_irq_disabled(&desc->irq_data));
516
517out_unlock:
518 chip->irq_eoi(&desc->irq_data);
519 raw_spin_unlock(&desc->lock);
520}
521#endif
522
601/** 523/**
602 * handle_percpu_irq - Per CPU local irq handler 524 * handle_percpu_irq - Per CPU local irq handler
603 * @irq: the interrupt number 525 * @irq: the interrupt number
@@ -642,8 +564,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
642 if (handle == handle_bad_irq) { 564 if (handle == handle_bad_irq) {
643 if (desc->irq_data.chip != &no_irq_chip) 565 if (desc->irq_data.chip != &no_irq_chip)
644 mask_ack_irq(desc); 566 mask_ack_irq(desc);
645 irq_compat_set_disabled(desc); 567 irq_state_set_disabled(desc);
646 desc->istate |= IRQS_DISABLED;
647 desc->depth = 1; 568 desc->depth = 1;
648 } 569 }
649 desc->handle_irq = handle; 570 desc->handle_irq = handle;
@@ -684,8 +605,70 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
684 irqd_set(&desc->irq_data, IRQD_PER_CPU); 605 irqd_set(&desc->irq_data, IRQD_PER_CPU);
685 if (irq_settings_can_move_pcntxt(desc)) 606 if (irq_settings_can_move_pcntxt(desc))
686 irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT); 607 irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
608 if (irq_settings_is_level(desc))
609 irqd_set(&desc->irq_data, IRQD_LEVEL);
687 610
688 irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc)); 611 irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
689 612
690 irq_put_desc_unlock(desc, flags); 613 irq_put_desc_unlock(desc, flags);
691} 614}
615
616/**
617 * irq_cpu_online - Invoke all irq_cpu_online functions.
618 *
619 * Iterate through all irqs and invoke the chip.irq_cpu_online()
620 * for each.
621 */
622void irq_cpu_online(void)
623{
624 struct irq_desc *desc;
625 struct irq_chip *chip;
626 unsigned long flags;
627 unsigned int irq;
628
629 for_each_active_irq(irq) {
630 desc = irq_to_desc(irq);
631 if (!desc)
632 continue;
633
634 raw_spin_lock_irqsave(&desc->lock, flags);
635
636 chip = irq_data_get_irq_chip(&desc->irq_data);
637 if (chip && chip->irq_cpu_online &&
638 (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
639 !irqd_irq_disabled(&desc->irq_data)))
640 chip->irq_cpu_online(&desc->irq_data);
641
642 raw_spin_unlock_irqrestore(&desc->lock, flags);
643 }
644}
645
646/**
647 * irq_cpu_offline - Invoke all irq_cpu_offline functions.
648 *
649 * Iterate through all irqs and invoke the chip.irq_cpu_offline()
650 * for each.
651 */
652void irq_cpu_offline(void)
653{
654 struct irq_desc *desc;
655 struct irq_chip *chip;
656 unsigned long flags;
657 unsigned int irq;
658
659 for_each_active_irq(irq) {
660 desc = irq_to_desc(irq);
661 if (!desc)
662 continue;
663
664 raw_spin_lock_irqsave(&desc->lock, flags);
665
666 chip = irq_data_get_irq_chip(&desc->irq_data);
667 if (chip && chip->irq_cpu_offline &&
668 (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
669 !irqd_irq_disabled(&desc->irq_data)))
670 chip->irq_cpu_offline(&desc->irq_data);
671
672 raw_spin_unlock_irqrestore(&desc->lock, flags);
673 }
674}
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h
deleted file mode 100644
index 6bbaf66aca85..000000000000
--- a/kernel/irq/compat.h
+++ /dev/null
@@ -1,72 +0,0 @@
1/*
2 * Compat layer for transition period
3 */
4#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
5static inline void irq_compat_set_progress(struct irq_desc *desc)
6{
7 desc->status |= IRQ_INPROGRESS;
8}
9
10static inline void irq_compat_clr_progress(struct irq_desc *desc)
11{
12 desc->status &= ~IRQ_INPROGRESS;
13}
14static inline void irq_compat_set_disabled(struct irq_desc *desc)
15{
16 desc->status |= IRQ_DISABLED;
17}
18static inline void irq_compat_clr_disabled(struct irq_desc *desc)
19{
20 desc->status &= ~IRQ_DISABLED;
21}
22static inline void irq_compat_set_pending(struct irq_desc *desc)
23{
24 desc->status |= IRQ_PENDING;
25}
26
27static inline void irq_compat_clr_pending(struct irq_desc *desc)
28{
29 desc->status &= ~IRQ_PENDING;
30}
31static inline void irq_compat_set_masked(struct irq_desc *desc)
32{
33 desc->status |= IRQ_MASKED;
34}
35
36static inline void irq_compat_clr_masked(struct irq_desc *desc)
37{
38 desc->status &= ~IRQ_MASKED;
39}
40static inline void irq_compat_set_move_pending(struct irq_desc *desc)
41{
42 desc->status |= IRQ_MOVE_PENDING;
43}
44
45static inline void irq_compat_clr_move_pending(struct irq_desc *desc)
46{
47 desc->status &= ~IRQ_MOVE_PENDING;
48}
49static inline void irq_compat_set_affinity(struct irq_desc *desc)
50{
51 desc->status |= IRQ_AFFINITY_SET;
52}
53
54static inline void irq_compat_clr_affinity(struct irq_desc *desc)
55{
56 desc->status &= ~IRQ_AFFINITY_SET;
57}
58#else
59static inline void irq_compat_set_progress(struct irq_desc *desc) { }
60static inline void irq_compat_clr_progress(struct irq_desc *desc) { }
61static inline void irq_compat_set_disabled(struct irq_desc *desc) { }
62static inline void irq_compat_clr_disabled(struct irq_desc *desc) { }
63static inline void irq_compat_set_pending(struct irq_desc *desc) { }
64static inline void irq_compat_clr_pending(struct irq_desc *desc) { }
65static inline void irq_compat_set_masked(struct irq_desc *desc) { }
66static inline void irq_compat_clr_masked(struct irq_desc *desc) { }
67static inline void irq_compat_set_move_pending(struct irq_desc *desc) { }
68static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { }
69static inline void irq_compat_set_affinity(struct irq_desc *desc) { }
70static inline void irq_compat_clr_affinity(struct irq_desc *desc) { }
71#endif
72
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index d1a33b7fa61d..306cba37e9a5 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -4,8 +4,10 @@
4 4
5#include <linux/kallsyms.h> 5#include <linux/kallsyms.h>
6 6
7#define P(f) if (desc->status & f) printk("%14s set\n", #f) 7#define P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f)
8#define PS(f) if (desc->istate & f) printk("%14s set\n", #f) 8#define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
9/* FIXME */
10#define PD(f) do { } while (0)
9 11
10static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) 12static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
11{ 13{
@@ -28,13 +30,15 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
28 P(IRQ_NOAUTOEN); 30 P(IRQ_NOAUTOEN);
29 31
30 PS(IRQS_AUTODETECT); 32 PS(IRQS_AUTODETECT);
31 PS(IRQS_INPROGRESS);
32 PS(IRQS_REPLAY); 33 PS(IRQS_REPLAY);
33 PS(IRQS_WAITING); 34 PS(IRQS_WAITING);
34 PS(IRQS_DISABLED);
35 PS(IRQS_PENDING); 35 PS(IRQS_PENDING);
36 PS(IRQS_MASKED); 36
37 PD(IRQS_INPROGRESS);
38 PD(IRQS_DISABLED);
39 PD(IRQS_MASKED);
37} 40}
38 41
39#undef P 42#undef P
40#undef PS 43#undef PS
44#undef PD
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 20dc5474947e..b5fcd96c7102 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -31,13 +31,6 @@ static unsigned int noop_ret(struct irq_data *data)
31 return 0; 31 return 0;
32} 32}
33 33
34#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
35static void compat_noop(unsigned int irq) { }
36#define END_INIT .end = compat_noop
37#else
38#define END_INIT
39#endif
40
41/* 34/*
42 * Generic no controller implementation 35 * Generic no controller implementation
43 */ 36 */
@@ -48,7 +41,6 @@ struct irq_chip no_irq_chip = {
48 .irq_enable = noop, 41 .irq_enable = noop,
49 .irq_disable = noop, 42 .irq_disable = noop,
50 .irq_ack = ack_bad, 43 .irq_ack = ack_bad,
51 END_INIT
52}; 44};
53 45
54/* 46/*
@@ -64,5 +56,4 @@ struct irq_chip dummy_irq_chip = {
64 .irq_ack = noop, 56 .irq_ack = noop,
65 .irq_mask = noop, 57 .irq_mask = noop,
66 .irq_unmask = noop, 58 .irq_unmask = noop,
67 END_INIT
68}; 59};
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 517561fc7317..90cb55f6d7eb 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -175,28 +175,13 @@ irqreturn_t handle_irq_event(struct irq_desc *desc)
175 struct irqaction *action = desc->action; 175 struct irqaction *action = desc->action;
176 irqreturn_t ret; 176 irqreturn_t ret;
177 177
178 irq_compat_clr_pending(desc);
179 desc->istate &= ~IRQS_PENDING; 178 desc->istate &= ~IRQS_PENDING;
180 irq_compat_set_progress(desc); 179 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
181 desc->istate |= IRQS_INPROGRESS;
182 raw_spin_unlock(&desc->lock); 180 raw_spin_unlock(&desc->lock);
183 181
184 ret = handle_irq_event_percpu(desc, action); 182 ret = handle_irq_event_percpu(desc, action);
185 183
186 raw_spin_lock(&desc->lock); 184 raw_spin_lock(&desc->lock);
187 desc->istate &= ~IRQS_INPROGRESS; 185 irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
188 irq_compat_clr_progress(desc);
189 return ret; 186 return ret;
190} 187}
191
192/**
193 * handle_IRQ_event - irq action chain handler
194 * @irq: the interrupt number
195 * @action: the interrupt action chain for this irq
196 *
197 * Handles the action chain of an irq event
198 */
199irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
200{
201 return handle_irq_event_percpu(irq_to_desc(irq), action);
202}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 6c6ec9a49027..6546431447d7 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -15,10 +15,6 @@
15 15
16#define istate core_internal_state__do_not_mess_with_it 16#define istate core_internal_state__do_not_mess_with_it
17 17
18#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
19# define status status_use_accessors
20#endif
21
22extern int noirqdebug; 18extern int noirqdebug;
23 19
24/* 20/*
@@ -44,38 +40,28 @@ enum {
44 * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt 40 * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt
45 * detection 41 * detection
46 * IRQS_POLL_INPROGRESS - polling in progress 42 * IRQS_POLL_INPROGRESS - polling in progress
47 * IRQS_INPROGRESS - Interrupt in progress
48 * IRQS_ONESHOT - irq is not unmasked in primary handler 43 * IRQS_ONESHOT - irq is not unmasked in primary handler
49 * IRQS_REPLAY - irq is replayed 44 * IRQS_REPLAY - irq is replayed
50 * IRQS_WAITING - irq is waiting 45 * IRQS_WAITING - irq is waiting
51 * IRQS_DISABLED - irq is disabled
52 * IRQS_PENDING - irq is pending and replayed later 46 * IRQS_PENDING - irq is pending and replayed later
53 * IRQS_MASKED - irq is masked
54 * IRQS_SUSPENDED - irq is suspended 47 * IRQS_SUSPENDED - irq is suspended
55 */ 48 */
56enum { 49enum {
57 IRQS_AUTODETECT = 0x00000001, 50 IRQS_AUTODETECT = 0x00000001,
58 IRQS_SPURIOUS_DISABLED = 0x00000002, 51 IRQS_SPURIOUS_DISABLED = 0x00000002,
59 IRQS_POLL_INPROGRESS = 0x00000008, 52 IRQS_POLL_INPROGRESS = 0x00000008,
60 IRQS_INPROGRESS = 0x00000010,
61 IRQS_ONESHOT = 0x00000020, 53 IRQS_ONESHOT = 0x00000020,
62 IRQS_REPLAY = 0x00000040, 54 IRQS_REPLAY = 0x00000040,
63 IRQS_WAITING = 0x00000080, 55 IRQS_WAITING = 0x00000080,
64 IRQS_DISABLED = 0x00000100,
65 IRQS_PENDING = 0x00000200, 56 IRQS_PENDING = 0x00000200,
66 IRQS_MASKED = 0x00000400,
67 IRQS_SUSPENDED = 0x00000800, 57 IRQS_SUSPENDED = 0x00000800,
68}; 58};
69 59
70#include "compat.h"
71#include "debug.h" 60#include "debug.h"
72#include "settings.h" 61#include "settings.h"
73 62
74#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) 63#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data)
75 64
76/* Set default functions for irq_chip structures: */
77extern void irq_chip_set_defaults(struct irq_chip *chip);
78
79extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, 65extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
80 unsigned long flags); 66 unsigned long flags);
81extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); 67extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
@@ -162,13 +148,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
162static inline void irqd_set_move_pending(struct irq_data *d) 148static inline void irqd_set_move_pending(struct irq_data *d)
163{ 149{
164 d->state_use_accessors |= IRQD_SETAFFINITY_PENDING; 150 d->state_use_accessors |= IRQD_SETAFFINITY_PENDING;
165 irq_compat_set_move_pending(irq_data_to_desc(d));
166} 151}
167 152
168static inline void irqd_clr_move_pending(struct irq_data *d) 153static inline void irqd_clr_move_pending(struct irq_data *d)
169{ 154{
170 d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING; 155 d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING;
171 irq_compat_clr_move_pending(irq_data_to_desc(d));
172} 156}
173 157
174static inline void irqd_clear(struct irq_data *d, unsigned int mask) 158static inline void irqd_clear(struct irq_data *d, unsigned int mask)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index dbccc799407f..2c039c9b9383 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -80,7 +80,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
80 desc->irq_data.handler_data = NULL; 80 desc->irq_data.handler_data = NULL;
81 desc->irq_data.msi_desc = NULL; 81 desc->irq_data.msi_desc = NULL;
82 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); 82 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
83 desc->istate = IRQS_DISABLED; 83 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
84 desc->handle_irq = handle_bad_irq; 84 desc->handle_irq = handle_bad_irq;
85 desc->depth = 1; 85 desc->depth = 1;
86 desc->irq_count = 0; 86 desc->irq_count = 0;
@@ -198,15 +198,6 @@ err:
198 return -ENOMEM; 198 return -ENOMEM;
199} 199}
200 200
201struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
202{
203 int res = irq_alloc_descs(irq, irq, 1, node);
204
205 if (res == -EEXIST || res == irq)
206 return irq_to_desc(irq);
207 return NULL;
208}
209
210static int irq_expand_nr_irqs(unsigned int nr) 201static int irq_expand_nr_irqs(unsigned int nr)
211{ 202{
212 if (nr > IRQ_BITMAP_BITS) 203 if (nr > IRQ_BITMAP_BITS)
@@ -247,7 +238,6 @@ int __init early_irq_init(void)
247 238
248struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { 239struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
249 [0 ... NR_IRQS-1] = { 240 [0 ... NR_IRQS-1] = {
250 .istate = IRQS_DISABLED,
251 .handle_irq = handle_bad_irq, 241 .handle_irq = handle_bad_irq,
252 .depth = 1, 242 .depth = 1,
253 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), 243 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
@@ -283,11 +273,6 @@ struct irq_desc *irq_to_desc(unsigned int irq)
283 return (irq < NR_IRQS) ? irq_desc + irq : NULL; 273 return (irq < NR_IRQS) ? irq_desc + irq : NULL;
284} 274}
285 275
286struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
287{
288 return irq_to_desc(irq);
289}
290
291static void free_desc(unsigned int irq) 276static void free_desc(unsigned int irq)
292{ 277{
293 dynamic_irq_cleanup(irq); 278 dynamic_irq_cleanup(irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a2aa73e536c..12a80fdae11c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -41,7 +41,7 @@ early_param("threadirqs", setup_forced_irqthreads);
41void synchronize_irq(unsigned int irq) 41void synchronize_irq(unsigned int irq)
42{ 42{
43 struct irq_desc *desc = irq_to_desc(irq); 43 struct irq_desc *desc = irq_to_desc(irq);
44 unsigned int state; 44 bool inprogress;
45 45
46 if (!desc) 46 if (!desc)
47 return; 47 return;
@@ -53,16 +53,16 @@ void synchronize_irq(unsigned int irq)
53 * Wait until we're out of the critical section. This might 53 * Wait until we're out of the critical section. This might
54 * give the wrong answer due to the lack of memory barriers. 54 * give the wrong answer due to the lack of memory barriers.
55 */ 55 */
56 while (desc->istate & IRQS_INPROGRESS) 56 while (irqd_irq_inprogress(&desc->irq_data))
57 cpu_relax(); 57 cpu_relax();
58 58
59 /* Ok, that indicated we're done: double-check carefully. */ 59 /* Ok, that indicated we're done: double-check carefully. */
60 raw_spin_lock_irqsave(&desc->lock, flags); 60 raw_spin_lock_irqsave(&desc->lock, flags);
61 state = desc->istate; 61 inprogress = irqd_irq_inprogress(&desc->irq_data);
62 raw_spin_unlock_irqrestore(&desc->lock, flags); 62 raw_spin_unlock_irqrestore(&desc->lock, flags);
63 63
64 /* Oops, that failed? */ 64 /* Oops, that failed? */
65 } while (state & IRQS_INPROGRESS); 65 } while (inprogress);
66 66
67 /* 67 /*
68 * We made sure that no hardirq handler is running. Now verify 68 * We made sure that no hardirq handler is running. Now verify
@@ -112,13 +112,13 @@ void irq_set_thread_affinity(struct irq_desc *desc)
112} 112}
113 113
114#ifdef CONFIG_GENERIC_PENDING_IRQ 114#ifdef CONFIG_GENERIC_PENDING_IRQ
115static inline bool irq_can_move_pcntxt(struct irq_desc *desc) 115static inline bool irq_can_move_pcntxt(struct irq_data *data)
116{ 116{
117 return irq_settings_can_move_pcntxt(desc); 117 return irqd_can_move_in_process_context(data);
118} 118}
119static inline bool irq_move_pending(struct irq_desc *desc) 119static inline bool irq_move_pending(struct irq_data *data)
120{ 120{
121 return irqd_is_setaffinity_pending(&desc->irq_data); 121 return irqd_is_setaffinity_pending(data);
122} 122}
123static inline void 123static inline void
124irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) 124irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
@@ -131,43 +131,34 @@ irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
131 cpumask_copy(mask, desc->pending_mask); 131 cpumask_copy(mask, desc->pending_mask);
132} 132}
133#else 133#else
134static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; } 134static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; }
135static inline bool irq_move_pending(struct irq_desc *desc) { return false; } 135static inline bool irq_move_pending(struct irq_data *data) { return false; }
136static inline void 136static inline void
137irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { } 137irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
138static inline void 138static inline void
139irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } 139irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
140#endif 140#endif
141 141
142/** 142int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
143 * irq_set_affinity - Set the irq affinity of a given irq
144 * @irq: Interrupt to set affinity
145 * @cpumask: cpumask
146 *
147 */
148int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
149{ 143{
150 struct irq_desc *desc = irq_to_desc(irq); 144 struct irq_chip *chip = irq_data_get_irq_chip(data);
151 struct irq_chip *chip = desc->irq_data.chip; 145 struct irq_desc *desc = irq_data_to_desc(data);
152 unsigned long flags;
153 int ret = 0; 146 int ret = 0;
154 147
155 if (!chip->irq_set_affinity) 148 if (!chip || !chip->irq_set_affinity)
156 return -EINVAL; 149 return -EINVAL;
157 150
158 raw_spin_lock_irqsave(&desc->lock, flags); 151 if (irq_can_move_pcntxt(data)) {
159 152 ret = chip->irq_set_affinity(data, mask, false);
160 if (irq_can_move_pcntxt(desc)) {
161 ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
162 switch (ret) { 153 switch (ret) {
163 case IRQ_SET_MASK_OK: 154 case IRQ_SET_MASK_OK:
164 cpumask_copy(desc->irq_data.affinity, mask); 155 cpumask_copy(data->affinity, mask);
165 case IRQ_SET_MASK_OK_NOCOPY: 156 case IRQ_SET_MASK_OK_NOCOPY:
166 irq_set_thread_affinity(desc); 157 irq_set_thread_affinity(desc);
167 ret = 0; 158 ret = 0;
168 } 159 }
169 } else { 160 } else {
170 irqd_set_move_pending(&desc->irq_data); 161 irqd_set_move_pending(data);
171 irq_copy_pending(desc, mask); 162 irq_copy_pending(desc, mask);
172 } 163 }
173 164
@@ -175,8 +166,28 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
175 kref_get(&desc->affinity_notify->kref); 166 kref_get(&desc->affinity_notify->kref);
176 schedule_work(&desc->affinity_notify->work); 167 schedule_work(&desc->affinity_notify->work);
177 } 168 }
178 irq_compat_set_affinity(desc); 169 irqd_set(data, IRQD_AFFINITY_SET);
179 irqd_set(&desc->irq_data, IRQD_AFFINITY_SET); 170
171 return ret;
172}
173
174/**
175 * irq_set_affinity - Set the irq affinity of a given irq
176 * @irq: Interrupt to set affinity
177 * @mask: cpumask
178 *
179 */
180int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
181{
182 struct irq_desc *desc = irq_to_desc(irq);
183 unsigned long flags;
184 int ret;
185
186 if (!desc)
187 return -EINVAL;
188
189 raw_spin_lock_irqsave(&desc->lock, flags);
190 ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask);
180 raw_spin_unlock_irqrestore(&desc->lock, flags); 191 raw_spin_unlock_irqrestore(&desc->lock, flags);
181 return ret; 192 return ret;
182} 193}
@@ -206,7 +217,7 @@ static void irq_affinity_notify(struct work_struct *work)
206 goto out; 217 goto out;
207 218
208 raw_spin_lock_irqsave(&desc->lock, flags); 219 raw_spin_lock_irqsave(&desc->lock, flags);
209 if (irq_move_pending(desc)) 220 if (irq_move_pending(&desc->irq_data))
210 irq_get_pending(cpumask, desc); 221 irq_get_pending(cpumask, desc);
211 else 222 else
212 cpumask_copy(cpumask, desc->irq_data.affinity); 223 cpumask_copy(cpumask, desc->irq_data.affinity);
@@ -285,10 +296,8 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
285 if (cpumask_intersects(desc->irq_data.affinity, 296 if (cpumask_intersects(desc->irq_data.affinity,
286 cpu_online_mask)) 297 cpu_online_mask))
287 set = desc->irq_data.affinity; 298 set = desc->irq_data.affinity;
288 else { 299 else
289 irq_compat_clr_affinity(desc);
290 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); 300 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
291 }
292 } 301 }
293 302
294 cpumask_and(mask, cpu_online_mask, set); 303 cpumask_and(mask, cpu_online_mask, set);
@@ -551,9 +560,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
551 flags &= IRQ_TYPE_SENSE_MASK; 560 flags &= IRQ_TYPE_SENSE_MASK;
552 561
553 if (chip->flags & IRQCHIP_SET_TYPE_MASKED) { 562 if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
554 if (!(desc->istate & IRQS_MASKED)) 563 if (!irqd_irq_masked(&desc->irq_data))
555 mask_irq(desc); 564 mask_irq(desc);
556 if (!(desc->istate & IRQS_DISABLED)) 565 if (!irqd_irq_disabled(&desc->irq_data))
557 unmask = 1; 566 unmask = 1;
558 } 567 }
559 568
@@ -575,8 +584,6 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
575 irqd_set(&desc->irq_data, IRQD_LEVEL); 584 irqd_set(&desc->irq_data, IRQD_LEVEL);
576 } 585 }
577 586
578 if (chip != desc->irq_data.chip)
579 irq_chip_set_defaults(desc->irq_data.chip);
580 ret = 0; 587 ret = 0;
581 break; 588 break;
582 default: 589 default:
@@ -651,7 +658,7 @@ again:
651 * irq_wake_thread(). See the comment there which explains the 658 * irq_wake_thread(). See the comment there which explains the
652 * serialization. 659 * serialization.
653 */ 660 */
654 if (unlikely(desc->istate & IRQS_INPROGRESS)) { 661 if (unlikely(irqd_irq_inprogress(&desc->irq_data))) {
655 raw_spin_unlock_irq(&desc->lock); 662 raw_spin_unlock_irq(&desc->lock);
656 chip_bus_sync_unlock(desc); 663 chip_bus_sync_unlock(desc);
657 cpu_relax(); 664 cpu_relax();
@@ -668,12 +675,10 @@ again:
668 675
669 desc->threads_oneshot &= ~action->thread_mask; 676 desc->threads_oneshot &= ~action->thread_mask;
670 677
671 if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) && 678 if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
672 (desc->istate & IRQS_MASKED)) { 679 irqd_irq_masked(&desc->irq_data))
673 irq_compat_clr_masked(desc); 680 unmask_irq(desc);
674 desc->istate &= ~IRQS_MASKED; 681
675 desc->irq_data.chip->irq_unmask(&desc->irq_data);
676 }
677out_unlock: 682out_unlock:
678 raw_spin_unlock_irq(&desc->lock); 683 raw_spin_unlock_irq(&desc->lock);
679 chip_bus_sync_unlock(desc); 684 chip_bus_sync_unlock(desc);
@@ -767,7 +772,7 @@ static int irq_thread(void *data)
767 atomic_inc(&desc->threads_active); 772 atomic_inc(&desc->threads_active);
768 773
769 raw_spin_lock_irq(&desc->lock); 774 raw_spin_lock_irq(&desc->lock);
770 if (unlikely(desc->istate & IRQS_DISABLED)) { 775 if (unlikely(irqd_irq_disabled(&desc->irq_data))) {
771 /* 776 /*
772 * CHECKME: We might need a dedicated 777 * CHECKME: We might need a dedicated
773 * IRQ_THREAD_PENDING flag here, which 778 * IRQ_THREAD_PENDING flag here, which
@@ -775,7 +780,6 @@ static int irq_thread(void *data)
775 * but AFAICT IRQS_PENDING should be fine as it 780 * but AFAICT IRQS_PENDING should be fine as it
776 * retriggers the interrupt itself --- tglx 781 * retriggers the interrupt itself --- tglx
777 */ 782 */
778 irq_compat_set_pending(desc);
779 desc->istate |= IRQS_PENDING; 783 desc->istate |= IRQS_PENDING;
780 raw_spin_unlock_irq(&desc->lock); 784 raw_spin_unlock_irq(&desc->lock);
781 } else { 785 } else {
@@ -971,8 +975,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
971 new->thread_mask = 1 << ffz(thread_mask); 975 new->thread_mask = 1 << ffz(thread_mask);
972 976
973 if (!shared) { 977 if (!shared) {
974 irq_chip_set_defaults(desc->irq_data.chip);
975
976 init_waitqueue_head(&desc->wait_for_threads); 978 init_waitqueue_head(&desc->wait_for_threads);
977 979
978 /* Setup the type (level, edge polarity) if configured: */ 980 /* Setup the type (level, edge polarity) if configured: */
@@ -985,8 +987,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
985 } 987 }
986 988
987 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ 989 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
988 IRQS_INPROGRESS | IRQS_ONESHOT | \ 990 IRQS_ONESHOT | IRQS_WAITING);
989 IRQS_WAITING); 991 irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
990 992
991 if (new->flags & IRQF_PERCPU) { 993 if (new->flags & IRQF_PERCPU) {
992 irqd_set(&desc->irq_data, IRQD_PER_CPU); 994 irqd_set(&desc->irq_data, IRQD_PER_CPU);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index ec4806d4778b..bc6194698dfd 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -53,20 +53,14 @@ void irq_move_masked_irq(struct irq_data *idata)
53 cpumask_clear(desc->pending_mask); 53 cpumask_clear(desc->pending_mask);
54} 54}
55 55
56void move_masked_irq(int irq)
57{
58 irq_move_masked_irq(irq_get_irq_data(irq));
59}
60
61void irq_move_irq(struct irq_data *idata) 56void irq_move_irq(struct irq_data *idata)
62{ 57{
63 struct irq_desc *desc = irq_data_to_desc(idata);
64 bool masked; 58 bool masked;
65 59
66 if (likely(!irqd_is_setaffinity_pending(idata))) 60 if (likely(!irqd_is_setaffinity_pending(idata)))
67 return; 61 return;
68 62
69 if (unlikely(desc->istate & IRQS_DISABLED)) 63 if (unlikely(irqd_irq_disabled(idata)))
70 return; 64 return;
71 65
72 /* 66 /*
@@ -74,15 +68,10 @@ void irq_move_irq(struct irq_data *idata)
74 * threaded interrupt with ONESHOT set, we can end up with an 68 * threaded interrupt with ONESHOT set, we can end up with an
75 * interrupt storm. 69 * interrupt storm.
76 */ 70 */
77 masked = desc->istate & IRQS_MASKED; 71 masked = irqd_irq_masked(idata);
78 if (!masked) 72 if (!masked)
79 idata->chip->irq_mask(idata); 73 idata->chip->irq_mask(idata);
80 irq_move_masked_irq(idata); 74 irq_move_masked_irq(idata);
81 if (!masked) 75 if (!masked)
82 idata->chip->irq_unmask(idata); 76 idata->chip->irq_unmask(idata);
83} 77}
84
85void move_native_irq(int irq)
86{
87 irq_move_irq(irq_get_irq_data(irq));
88}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 760248de109d..dd201bd35103 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -364,6 +364,10 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec)
364 return 0; 364 return 0;
365} 365}
366 366
367#ifndef ACTUAL_NR_IRQS
368# define ACTUAL_NR_IRQS nr_irqs
369#endif
370
367int show_interrupts(struct seq_file *p, void *v) 371int show_interrupts(struct seq_file *p, void *v)
368{ 372{
369 static int prec; 373 static int prec;
@@ -373,10 +377,10 @@ int show_interrupts(struct seq_file *p, void *v)
373 struct irqaction *action; 377 struct irqaction *action;
374 struct irq_desc *desc; 378 struct irq_desc *desc;
375 379
376 if (i > nr_irqs) 380 if (i > ACTUAL_NR_IRQS)
377 return 0; 381 return 0;
378 382
379 if (i == nr_irqs) 383 if (i == ACTUAL_NR_IRQS)
380 return arch_show_interrupts(p, prec); 384 return arch_show_interrupts(p, prec);
381 385
382 /* print header and calculate the width of the first column */ 386 /* print header and calculate the width of the first column */
@@ -404,7 +408,20 @@ int show_interrupts(struct seq_file *p, void *v)
404 seq_printf(p, "%*d: ", prec, i); 408 seq_printf(p, "%*d: ", prec, i);
405 for_each_online_cpu(j) 409 for_each_online_cpu(j)
406 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 410 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
407 seq_printf(p, " %8s", desc->irq_data.chip->name); 411
412 if (desc->irq_data.chip) {
413 if (desc->irq_data.chip->irq_print_chip)
414 desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
415 else if (desc->irq_data.chip->name)
416 seq_printf(p, " %8s", desc->irq_data.chip->name);
417 else
418 seq_printf(p, " %8s", "-");
419 } else {
420 seq_printf(p, " %8s", "None");
421 }
422#ifdef CONFIG_GENIRC_IRQ_SHOW_LEVEL
423 seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
424#endif
408 if (desc->name) 425 if (desc->name)
409 seq_printf(p, "-%-8s", desc->name); 426 seq_printf(p, "-%-8s", desc->name);
410 427
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index ad683a99b1ec..14dd5761e8c9 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -65,7 +65,6 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
65 if (desc->istate & IRQS_REPLAY) 65 if (desc->istate & IRQS_REPLAY)
66 return; 66 return;
67 if (desc->istate & IRQS_PENDING) { 67 if (desc->istate & IRQS_PENDING) {
68 irq_compat_clr_pending(desc);
69 desc->istate &= ~IRQS_PENDING; 68 desc->istate &= ~IRQS_PENDING;
70 desc->istate |= IRQS_REPLAY; 69 desc->istate |= IRQS_REPLAY;
71 70
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 0227ad358272..0d91730b6330 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -15,17 +15,8 @@ enum {
15 _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, 15 _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
16}; 16};
17 17
18#define IRQ_INPROGRESS GOT_YOU_MORON
19#define IRQ_REPLAY GOT_YOU_MORON
20#define IRQ_WAITING GOT_YOU_MORON
21#define IRQ_DISABLED GOT_YOU_MORON
22#define IRQ_PENDING GOT_YOU_MORON
23#define IRQ_MASKED GOT_YOU_MORON
24#define IRQ_WAKEUP GOT_YOU_MORON
25#define IRQ_MOVE_PENDING GOT_YOU_MORON
26#define IRQ_PER_CPU GOT_YOU_MORON 18#define IRQ_PER_CPU GOT_YOU_MORON
27#define IRQ_NO_BALANCING GOT_YOU_MORON 19#define IRQ_NO_BALANCING GOT_YOU_MORON
28#define IRQ_AFFINITY_SET GOT_YOU_MORON
29#define IRQ_LEVEL GOT_YOU_MORON 20#define IRQ_LEVEL GOT_YOU_MORON
30#define IRQ_NOPROBE GOT_YOU_MORON 21#define IRQ_NOPROBE GOT_YOU_MORON
31#define IRQ_NOREQUEST GOT_YOU_MORON 22#define IRQ_NOREQUEST GOT_YOU_MORON
@@ -37,102 +28,98 @@ enum {
37static inline void 28static inline void
38irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) 29irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
39{ 30{
40 desc->status &= ~(clr & _IRQF_MODIFY_MASK); 31 desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK);
41 desc->status |= (set & _IRQF_MODIFY_MASK); 32 desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
42} 33}
43 34
44static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) 35static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
45{ 36{
46 return desc->status & _IRQ_PER_CPU; 37 return desc->status_use_accessors & _IRQ_PER_CPU;
47} 38}
48 39
49static inline void irq_settings_set_per_cpu(struct irq_desc *desc) 40static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
50{ 41{
51 desc->status |= _IRQ_PER_CPU; 42 desc->status_use_accessors |= _IRQ_PER_CPU;
52} 43}
53 44
54static inline void irq_settings_set_no_balancing(struct irq_desc *desc) 45static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
55{ 46{
56 desc->status |= _IRQ_NO_BALANCING; 47 desc->status_use_accessors |= _IRQ_NO_BALANCING;
57} 48}
58 49
59static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc) 50static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
60{ 51{
61 return desc->status & _IRQ_NO_BALANCING; 52 return desc->status_use_accessors & _IRQ_NO_BALANCING;
62} 53}
63 54
64static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc) 55static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
65{ 56{
66 return desc->status & IRQ_TYPE_SENSE_MASK; 57 return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK;
67} 58}
68 59
69static inline void 60static inline void
70irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask) 61irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
71{ 62{
72 desc->status &= ~IRQ_TYPE_SENSE_MASK; 63 desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK;
73 desc->status |= mask & IRQ_TYPE_SENSE_MASK; 64 desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK;
74} 65}
75 66
76static inline bool irq_settings_is_level(struct irq_desc *desc) 67static inline bool irq_settings_is_level(struct irq_desc *desc)
77{ 68{
78 return desc->status & _IRQ_LEVEL; 69 return desc->status_use_accessors & _IRQ_LEVEL;
79} 70}
80 71
81static inline void irq_settings_clr_level(struct irq_desc *desc) 72static inline void irq_settings_clr_level(struct irq_desc *desc)
82{ 73{
83 desc->status &= ~_IRQ_LEVEL; 74 desc->status_use_accessors &= ~_IRQ_LEVEL;
84} 75}
85 76
86static inline void irq_settings_set_level(struct irq_desc *desc) 77static inline void irq_settings_set_level(struct irq_desc *desc)
87{ 78{
88 desc->status |= _IRQ_LEVEL; 79 desc->status_use_accessors |= _IRQ_LEVEL;
89} 80}
90 81
91static inline bool irq_settings_can_request(struct irq_desc *desc) 82static inline bool irq_settings_can_request(struct irq_desc *desc)
92{ 83{
93 return !(desc->status & _IRQ_NOREQUEST); 84 return !(desc->status_use_accessors & _IRQ_NOREQUEST);
94} 85}
95 86
96static inline void irq_settings_clr_norequest(struct irq_desc *desc) 87static inline void irq_settings_clr_norequest(struct irq_desc *desc)
97{ 88{
98 desc->status &= ~_IRQ_NOREQUEST; 89 desc->status_use_accessors &= ~_IRQ_NOREQUEST;
99} 90}
100 91
101static inline void irq_settings_set_norequest(struct irq_desc *desc) 92static inline void irq_settings_set_norequest(struct irq_desc *desc)
102{ 93{
103 desc->status |= _IRQ_NOREQUEST; 94 desc->status_use_accessors |= _IRQ_NOREQUEST;
104} 95}
105 96
106static inline bool irq_settings_can_probe(struct irq_desc *desc) 97static inline bool irq_settings_can_probe(struct irq_desc *desc)
107{ 98{
108 return !(desc->status & _IRQ_NOPROBE); 99 return !(desc->status_use_accessors & _IRQ_NOPROBE);
109} 100}
110 101
111static inline void irq_settings_clr_noprobe(struct irq_desc *desc) 102static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
112{ 103{
113 desc->status &= ~_IRQ_NOPROBE; 104 desc->status_use_accessors &= ~_IRQ_NOPROBE;
114} 105}
115 106
116static inline void irq_settings_set_noprobe(struct irq_desc *desc) 107static inline void irq_settings_set_noprobe(struct irq_desc *desc)
117{ 108{
118 desc->status |= _IRQ_NOPROBE; 109 desc->status_use_accessors |= _IRQ_NOPROBE;
119} 110}
120 111
121static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc) 112static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
122{ 113{
123 return desc->status & _IRQ_MOVE_PCNTXT; 114 return desc->status_use_accessors & _IRQ_MOVE_PCNTXT;
124} 115}
125 116
126static inline bool irq_settings_can_autoenable(struct irq_desc *desc) 117static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
127{ 118{
128 return !(desc->status & _IRQ_NOAUTOEN); 119 return !(desc->status_use_accessors & _IRQ_NOAUTOEN);
129} 120}
130 121
131static inline bool irq_settings_is_nested_thread(struct irq_desc *desc) 122static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
132{ 123{
133 return desc->status & _IRQ_NESTED_THREAD; 124 return desc->status_use_accessors & _IRQ_NESTED_THREAD;
134} 125}
135
136/* Nothing should touch desc->status from now on */
137#undef status
138#define status USE_THE_PROPER_WRAPPERS_YOU_MORON
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dd586ebf9c8c..dfbd550401b2 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -45,12 +45,12 @@ bool irq_wait_for_poll(struct irq_desc *desc)
45#ifdef CONFIG_SMP 45#ifdef CONFIG_SMP
46 do { 46 do {
47 raw_spin_unlock(&desc->lock); 47 raw_spin_unlock(&desc->lock);
48 while (desc->istate & IRQS_INPROGRESS) 48 while (irqd_irq_inprogress(&desc->irq_data))
49 cpu_relax(); 49 cpu_relax();
50 raw_spin_lock(&desc->lock); 50 raw_spin_lock(&desc->lock);
51 } while (desc->istate & IRQS_INPROGRESS); 51 } while (irqd_irq_inprogress(&desc->irq_data));
52 /* Might have been disabled in meantime */ 52 /* Might have been disabled in meantime */
53 return !(desc->istate & IRQS_DISABLED) && desc->action; 53 return !irqd_irq_disabled(&desc->irq_data) && desc->action;
54#else 54#else
55 return false; 55 return false;
56#endif 56#endif
@@ -75,7 +75,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
75 * Do not poll disabled interrupts unless the spurious 75 * Do not poll disabled interrupts unless the spurious
76 * disabled poller asks explicitely. 76 * disabled poller asks explicitely.
77 */ 77 */
78 if ((desc->istate & IRQS_DISABLED) && !force) 78 if (irqd_irq_disabled(&desc->irq_data) && !force)
79 goto out; 79 goto out;
80 80
81 /* 81 /*
@@ -88,12 +88,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
88 goto out; 88 goto out;
89 89
90 /* Already running on another processor */ 90 /* Already running on another processor */
91 if (desc->istate & IRQS_INPROGRESS) { 91 if (irqd_irq_inprogress(&desc->irq_data)) {
92 /* 92 /*
93 * Already running: If it is shared get the other 93 * Already running: If it is shared get the other
94 * CPU to go looking for our mystery interrupt too 94 * CPU to go looking for our mystery interrupt too
95 */ 95 */
96 irq_compat_set_pending(desc);
97 desc->istate |= IRQS_PENDING; 96 desc->istate |= IRQS_PENDING;
98 goto out; 97 goto out;
99 } 98 }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6f6d091b5757..079f1d39a8b8 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr)
64 if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || 64 if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
65 arch_is_kernel_text(addr)) 65 arch_is_kernel_text(addr))
66 return 1; 66 return 1;
67 return in_gate_area_no_task(addr); 67 return in_gate_area_no_mm(addr);
68} 68}
69 69
70static inline int is_kernel(unsigned long addr) 70static inline int is_kernel(unsigned long addr)
71{ 71{
72 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) 72 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
73 return 1; 73 return 1;
74 return in_gate_area_no_task(addr); 74 return in_gate_area_no_mm(addr);
75} 75}
76 76
77static int is_ksym_addr(unsigned long addr) 77static int is_ksym_addr(unsigned long addr)
@@ -342,13 +342,15 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
342} 342}
343 343
344/* Look up a kernel symbol and return it in a text buffer. */ 344/* Look up a kernel symbol and return it in a text buffer. */
345int sprint_symbol(char *buffer, unsigned long address) 345static int __sprint_symbol(char *buffer, unsigned long address,
346 int symbol_offset)
346{ 347{
347 char *modname; 348 char *modname;
348 const char *name; 349 const char *name;
349 unsigned long offset, size; 350 unsigned long offset, size;
350 int len; 351 int len;
351 352
353 address += symbol_offset;
352 name = kallsyms_lookup(address, &size, &offset, &modname, buffer); 354 name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
353 if (!name) 355 if (!name)
354 return sprintf(buffer, "0x%lx", address); 356 return sprintf(buffer, "0x%lx", address);
@@ -357,17 +359,53 @@ int sprint_symbol(char *buffer, unsigned long address)
357 strcpy(buffer, name); 359 strcpy(buffer, name);
358 len = strlen(buffer); 360 len = strlen(buffer);
359 buffer += len; 361 buffer += len;
362 offset -= symbol_offset;
360 363
361 if (modname) 364 if (modname)
362 len += sprintf(buffer, "+%#lx/%#lx [%s]", 365 len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname);
363 offset, size, modname);
364 else 366 else
365 len += sprintf(buffer, "+%#lx/%#lx", offset, size); 367 len += sprintf(buffer, "+%#lx/%#lx", offset, size);
366 368
367 return len; 369 return len;
368} 370}
371
372/**
373 * sprint_symbol - Look up a kernel symbol and return it in a text buffer
374 * @buffer: buffer to be stored
375 * @address: address to lookup
376 *
377 * This function looks up a kernel symbol with @address and stores its name,
378 * offset, size and module name to @buffer if possible. If no symbol was found,
379 * just saves its @address as is.
380 *
381 * This function returns the number of bytes stored in @buffer.
382 */
383int sprint_symbol(char *buffer, unsigned long address)
384{
385 return __sprint_symbol(buffer, address, 0);
386}
387
369EXPORT_SYMBOL_GPL(sprint_symbol); 388EXPORT_SYMBOL_GPL(sprint_symbol);
370 389
390/**
391 * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
392 * @buffer: buffer to be stored
393 * @address: address to lookup
394 *
395 * This function is for stack backtrace and does the same thing as
396 * sprint_symbol() but with modified/decreased @address. If there is a
397 * tail-call to the function marked "noreturn", gcc optimized out code after
398 * the call so that the stack-saved return address could point outside of the
399 * caller. This function ensures that kallsyms will find the original caller
400 * by decreasing @address.
401 *
402 * This function returns the number of bytes stored in @buffer.
403 */
404int sprint_backtrace(char *buffer, unsigned long address)
405{
406 return __sprint_symbol(buffer, address, -1);
407}
408
371/* Look up a kernel symbol and print it to the kernel messages. */ 409/* Look up a kernel symbol and print it to the kernel messages. */
372void __print_symbol(const char *fmt, unsigned long address) 410void __print_symbol(const char *fmt, unsigned long address)
373{ 411{
@@ -477,13 +515,11 @@ static int s_show(struct seq_file *m, void *p)
477 */ 515 */
478 type = iter->exported ? toupper(iter->type) : 516 type = iter->exported ? toupper(iter->type) :
479 tolower(iter->type); 517 tolower(iter->type);
480 seq_printf(m, "%0*lx %c %s\t[%s]\n", 518 seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value,
481 (int)(2 * sizeof(void *)), 519 type, iter->name, iter->module_name);
482 iter->value, type, iter->name, iter->module_name);
483 } else 520 } else
484 seq_printf(m, "%0*lx %c %s\n", 521 seq_printf(m, "%pK %c %s\n", (void *)iter->value,
485 (int)(2 * sizeof(void *)), 522 iter->type, iter->name);
486 iter->value, iter->type, iter->name);
487 return 0; 523 return 0;
488} 524}
489 525
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c55afba990a3..684ab3f7dd72 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -27,6 +27,7 @@ struct kthread_create_info
27 /* Information passed to kthread() from kthreadd. */ 27 /* Information passed to kthread() from kthreadd. */
28 int (*threadfn)(void *data); 28 int (*threadfn)(void *data);
29 void *data; 29 void *data;
30 int node;
30 31
31 /* Result passed back to kthread_create() from kthreadd. */ 32 /* Result passed back to kthread_create() from kthreadd. */
32 struct task_struct *result; 33 struct task_struct *result;
@@ -98,10 +99,23 @@ static int kthread(void *_create)
98 do_exit(ret); 99 do_exit(ret);
99} 100}
100 101
102/* called from do_fork() to get node information for about to be created task */
103int tsk_fork_get_node(struct task_struct *tsk)
104{
105#ifdef CONFIG_NUMA
106 if (tsk == kthreadd_task)
107 return tsk->pref_node_fork;
108#endif
109 return numa_node_id();
110}
111
101static void create_kthread(struct kthread_create_info *create) 112static void create_kthread(struct kthread_create_info *create)
102{ 113{
103 int pid; 114 int pid;
104 115
116#ifdef CONFIG_NUMA
117 current->pref_node_fork = create->node;
118#endif
105 /* We want our own signal handler (we take no signals by default). */ 119 /* We want our own signal handler (we take no signals by default). */
106 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); 120 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
107 if (pid < 0) { 121 if (pid < 0) {
@@ -111,15 +125,18 @@ static void create_kthread(struct kthread_create_info *create)
111} 125}
112 126
113/** 127/**
114 * kthread_create - create a kthread. 128 * kthread_create_on_node - create a kthread.
115 * @threadfn: the function to run until signal_pending(current). 129 * @threadfn: the function to run until signal_pending(current).
116 * @data: data ptr for @threadfn. 130 * @data: data ptr for @threadfn.
131 * @node: memory node number.
117 * @namefmt: printf-style name for the thread. 132 * @namefmt: printf-style name for the thread.
118 * 133 *
119 * Description: This helper function creates and names a kernel 134 * Description: This helper function creates and names a kernel
120 * thread. The thread will be stopped: use wake_up_process() to start 135 * thread. The thread will be stopped: use wake_up_process() to start
121 * it. See also kthread_run(). 136 * it. See also kthread_run().
122 * 137 *
138 * If thread is going to be bound on a particular cpu, give its node
139 * in @node, to get NUMA affinity for kthread stack, or else give -1.
123 * When woken, the thread will run @threadfn() with @data as its 140 * When woken, the thread will run @threadfn() with @data as its
124 * argument. @threadfn() can either call do_exit() directly if it is a 141 * argument. @threadfn() can either call do_exit() directly if it is a
125 * standalone thread for which noone will call kthread_stop(), or 142 * standalone thread for which noone will call kthread_stop(), or
@@ -129,15 +146,17 @@ static void create_kthread(struct kthread_create_info *create)
129 * 146 *
130 * Returns a task_struct or ERR_PTR(-ENOMEM). 147 * Returns a task_struct or ERR_PTR(-ENOMEM).
131 */ 148 */
132struct task_struct *kthread_create(int (*threadfn)(void *data), 149struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
133 void *data, 150 void *data,
134 const char namefmt[], 151 int node,
135 ...) 152 const char namefmt[],
153 ...)
136{ 154{
137 struct kthread_create_info create; 155 struct kthread_create_info create;
138 156
139 create.threadfn = threadfn; 157 create.threadfn = threadfn;
140 create.data = data; 158 create.data = data;
159 create.node = node;
141 init_completion(&create.done); 160 init_completion(&create.done);
142 161
143 spin_lock(&kthread_create_lock); 162 spin_lock(&kthread_create_lock);
@@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
164 } 183 }
165 return create.result; 184 return create.result;
166} 185}
167EXPORT_SYMBOL(kthread_create); 186EXPORT_SYMBOL(kthread_create_on_node);
168 187
169/** 188/**
170 * kthread_bind - bind a just-created kthread to a cpu. 189 * kthread_bind - bind a just-created kthread to a cpu.
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 1969d2fc4b36..71edd2f60c02 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -225,7 +225,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
225 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, 225 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
226 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, 226 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
227 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, 227 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
228 sum_forward_deps = 0, factor = 0; 228 sum_forward_deps = 0;
229 229
230 list_for_each_entry(class, &all_lock_classes, lock_entry) { 230 list_for_each_entry(class, &all_lock_classes, lock_entry) {
231 231
@@ -283,13 +283,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
283 nr_hardirq_unsafe * nr_hardirq_safe + 283 nr_hardirq_unsafe * nr_hardirq_safe +
284 nr_list_entries); 284 nr_list_entries);
285 285
286 /*
287 * Estimated factor between direct and indirect
288 * dependencies:
289 */
290 if (nr_list_entries)
291 factor = sum_forward_deps / nr_list_entries;
292
293#ifdef CONFIG_PROVE_LOCKING 286#ifdef CONFIG_PROVE_LOCKING
294 seq_printf(m, " dependency chains: %11lu [max: %lu]\n", 287 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
295 nr_lock_chains, MAX_LOCKDEP_CHAINS); 288 nr_lock_chains, MAX_LOCKDEP_CHAINS);
diff --git a/kernel/module.c b/kernel/module.c
index efa290ea94bf..1f9f7bc56ca1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1168,7 +1168,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
1168{ 1168{
1169 struct module_sect_attr *sattr = 1169 struct module_sect_attr *sattr =
1170 container_of(mattr, struct module_sect_attr, mattr); 1170 container_of(mattr, struct module_sect_attr, mattr);
1171 return sprintf(buf, "0x%lx\n", sattr->address); 1171 return sprintf(buf, "0x%pK\n", (void *)sattr->address);
1172} 1172}
1173 1173
1174static void free_sect_attrs(struct module_sect_attrs *sect_attrs) 1174static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -3224,7 +3224,7 @@ static int m_show(struct seq_file *m, void *p)
3224 mod->state == MODULE_STATE_COMING ? "Loading": 3224 mod->state == MODULE_STATE_COMING ? "Loading":
3225 "Live"); 3225 "Live");
3226 /* Used by oprofile and other similar tools. */ 3226 /* Used by oprofile and other similar tools. */
3227 seq_printf(m, " 0x%p", mod->module_core); 3227 seq_printf(m, " 0x%pK", mod->module_core);
3228 3228
3229 /* Taints info */ 3229 /* Taints info */
3230 if (mod->taints) 3230 if (mod->taints)
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f74e6c00e26d..a05d191ffdd9 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -69,13 +69,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
69 goto out_ns; 69 goto out_ns;
70 } 70 }
71 71
72 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); 72 new_nsp->uts_ns = copy_utsname(flags, tsk);
73 if (IS_ERR(new_nsp->uts_ns)) { 73 if (IS_ERR(new_nsp->uts_ns)) {
74 err = PTR_ERR(new_nsp->uts_ns); 74 err = PTR_ERR(new_nsp->uts_ns);
75 goto out_uts; 75 goto out_uts;
76 } 76 }
77 77
78 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 78 new_nsp->ipc_ns = copy_ipcs(flags, tsk);
79 if (IS_ERR(new_nsp->ipc_ns)) { 79 if (IS_ERR(new_nsp->ipc_ns)) {
80 err = PTR_ERR(new_nsp->ipc_ns); 80 err = PTR_ERR(new_nsp->ipc_ns);
81 goto out_ipc; 81 goto out_ipc;
diff --git a/kernel/panic.c b/kernel/panic.c
index 991bb87a1704..69231670eb95 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -433,3 +433,13 @@ EXPORT_SYMBOL(__stack_chk_fail);
433 433
434core_param(panic, panic_timeout, int, 0644); 434core_param(panic, panic_timeout, int, 0644);
435core_param(pause_on_oops, pause_on_oops, int, 0644); 435core_param(pause_on_oops, pause_on_oops, int, 0644);
436
437static int __init oops_setup(char *s)
438{
439 if (!s)
440 return -EINVAL;
441 if (!strcmp(s, "panic"))
442 panic_on_oops = 1;
443 return 0;
444}
445early_param("oops", oops_setup);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3472bb1a070c..c75925c4d1e2 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -145,7 +145,8 @@ static struct srcu_struct pmus_srcu;
145 */ 145 */
146int sysctl_perf_event_paranoid __read_mostly = 1; 146int sysctl_perf_event_paranoid __read_mostly = 1;
147 147
148int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ 148/* Minimum for 128 pages + 1 for the user control page */
149int sysctl_perf_event_mlock __read_mostly = 516; /* 'free' kb per user */
149 150
150/* 151/*
151 * max perf event sample rate 152 * max perf event sample rate
@@ -941,6 +942,7 @@ static void perf_group_attach(struct perf_event *event)
941static void 942static void
942list_del_event(struct perf_event *event, struct perf_event_context *ctx) 943list_del_event(struct perf_event *event, struct perf_event_context *ctx)
943{ 944{
945 struct perf_cpu_context *cpuctx;
944 /* 946 /*
945 * We can have double detach due to exit/hot-unplug + close. 947 * We can have double detach due to exit/hot-unplug + close.
946 */ 948 */
@@ -949,8 +951,17 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
949 951
950 event->attach_state &= ~PERF_ATTACH_CONTEXT; 952 event->attach_state &= ~PERF_ATTACH_CONTEXT;
951 953
952 if (is_cgroup_event(event)) 954 if (is_cgroup_event(event)) {
953 ctx->nr_cgroups--; 955 ctx->nr_cgroups--;
956 cpuctx = __get_cpu_context(ctx);
957 /*
958 * if there are no more cgroup events
959 * then cler cgrp to avoid stale pointer
960 * in update_cgrp_time_from_cpuctx()
961 */
962 if (!ctx->nr_cgroups)
963 cpuctx->cgrp = NULL;
964 }
954 965
955 ctx->nr_events--; 966 ctx->nr_events--;
956 if (event->attr.inherit_stat) 967 if (event->attr.inherit_stat)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94e1f0b..e9c9adc84ca6 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -14,6 +14,7 @@
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/acct.h> 15#include <linux/acct.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h>
17 18
18#define BITS_PER_PAGE (PAGE_SIZE*8) 19#define BITS_PER_PAGE (PAGE_SIZE*8)
19 20
@@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
72{ 73{
73 struct pid_namespace *ns; 74 struct pid_namespace *ns;
74 unsigned int level = parent_pid_ns->level + 1; 75 unsigned int level = parent_pid_ns->level + 1;
75 int i; 76 int i, err = -ENOMEM;
76 77
77 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 78 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
78 if (ns == NULL) 79 if (ns == NULL)
@@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
96 for (i = 1; i < PIDMAP_ENTRIES; i++) 97 for (i = 1; i < PIDMAP_ENTRIES; i++)
97 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); 98 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
98 99
100 err = pid_ns_prepare_proc(ns);
101 if (err)
102 goto out_put_parent_pid_ns;
103
99 return ns; 104 return ns;
100 105
106out_put_parent_pid_ns:
107 put_pid_ns(parent_pid_ns);
101out_free_map: 108out_free_map:
102 kfree(ns->pidmap[0].page); 109 kfree(ns->pidmap[0].page);
103out_free: 110out_free:
104 kmem_cache_free(pid_ns_cachep, ns); 111 kmem_cache_free(pid_ns_cachep, ns);
105out: 112out:
106 return ERR_PTR(-ENOMEM); 113 return ERR_PTR(err);
107} 114}
108 115
109static void destroy_pid_namespace(struct pid_namespace *ns) 116static void destroy_pid_namespace(struct pid_namespace *ns)
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c350e18b53e3..c5ebc6a90643 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,4 +1,5 @@
1ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG 1
2ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
2 3
3obj-$(CONFIG_PM) += main.o 4obj-$(CONFIG_PM) += main.o
4obj-$(CONFIG_PM_SLEEP) += console.o 5obj-$(CONFIG_PM_SLEEP) += console.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index 83bbc7c02df9..d09dd10c5a5e 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -28,7 +28,7 @@
28static int submit(int rw, struct block_device *bdev, sector_t sector, 28static int submit(int rw, struct block_device *bdev, sector_t sector,
29 struct page *page, struct bio **bio_chain) 29 struct page *page, struct bio **bio_chain)
30{ 30{
31 const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG; 31 const int bio_rw = rw | REQ_SYNC;
32 struct bio *bio; 32 struct bio *bio;
33 33
34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
diff --git a/kernel/printk.c b/kernel/printk.c
index 33284adb2189..da8ca817eae3 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -53,7 +53,7 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
53#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 53#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
54 54
55/* printk's without a loglevel use this.. */ 55/* printk's without a loglevel use this.. */
56#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ 56#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
57 57
58/* We show everything that is MORE important than this.. */ 58/* We show everything that is MORE important than this.. */
59#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ 59#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
@@ -113,6 +113,11 @@ static unsigned con_start; /* Index into log_buf: next char to be sent to consol
113static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ 113static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */
114 114
115/* 115/*
116 * If exclusive_console is non-NULL then only this console is to be printed to.
117 */
118static struct console *exclusive_console;
119
120/*
116 * Array of consoles built from command line options (console=) 121 * Array of consoles built from command line options (console=)
117 */ 122 */
118struct console_cmdline 123struct console_cmdline
@@ -476,6 +481,8 @@ static void __call_console_drivers(unsigned start, unsigned end)
476 struct console *con; 481 struct console *con;
477 482
478 for_each_console(con) { 483 for_each_console(con) {
484 if (exclusive_console && con != exclusive_console)
485 continue;
479 if ((con->flags & CON_ENABLED) && con->write && 486 if ((con->flags & CON_ENABLED) && con->write &&
480 (cpu_online(smp_processor_id()) || 487 (cpu_online(smp_processor_id()) ||
481 (con->flags & CON_ANYTIME))) 488 (con->flags & CON_ANYTIME)))
@@ -1230,6 +1237,11 @@ void console_unlock(void)
1230 local_irq_restore(flags); 1237 local_irq_restore(flags);
1231 } 1238 }
1232 console_locked = 0; 1239 console_locked = 0;
1240
1241 /* Release the exclusive_console once it is used */
1242 if (unlikely(exclusive_console))
1243 exclusive_console = NULL;
1244
1233 up(&console_sem); 1245 up(&console_sem);
1234 spin_unlock_irqrestore(&logbuf_lock, flags); 1246 spin_unlock_irqrestore(&logbuf_lock, flags);
1235 if (wake_klogd) 1247 if (wake_klogd)
@@ -1316,6 +1328,18 @@ void console_start(struct console *console)
1316} 1328}
1317EXPORT_SYMBOL(console_start); 1329EXPORT_SYMBOL(console_start);
1318 1330
1331static int __read_mostly keep_bootcon;
1332
1333static int __init keep_bootcon_setup(char *str)
1334{
1335 keep_bootcon = 1;
1336 printk(KERN_INFO "debug: skip boot console de-registration.\n");
1337
1338 return 0;
1339}
1340
1341early_param("keep_bootcon", keep_bootcon_setup);
1342
1319/* 1343/*
1320 * The console driver calls this routine during kernel initialization 1344 * The console driver calls this routine during kernel initialization
1321 * to register the console printing procedure with printk() and to 1345 * to register the console printing procedure with printk() and to
@@ -1452,6 +1476,12 @@ void register_console(struct console *newcon)
1452 spin_lock_irqsave(&logbuf_lock, flags); 1476 spin_lock_irqsave(&logbuf_lock, flags);
1453 con_start = log_start; 1477 con_start = log_start;
1454 spin_unlock_irqrestore(&logbuf_lock, flags); 1478 spin_unlock_irqrestore(&logbuf_lock, flags);
1479 /*
1480 * We're about to replay the log buffer. Only do this to the
1481 * just-registered console to avoid excessive message spam to
1482 * the already-registered consoles.
1483 */
1484 exclusive_console = newcon;
1455 } 1485 }
1456 console_unlock(); 1486 console_unlock();
1457 console_sysfs_notify(); 1487 console_sysfs_notify();
@@ -1463,7 +1493,9 @@ void register_console(struct console *newcon)
1463 * users know there might be something in the kernel's log buffer that 1493 * users know there might be something in the kernel's log buffer that
1464 * went to the bootconsole (that they do not see on the real console) 1494 * went to the bootconsole (that they do not see on the real console)
1465 */ 1495 */
1466 if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { 1496 if (bcon &&
1497 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
1498 !keep_bootcon) {
1467 /* we need to iterate through twice, to make sure we print 1499 /* we need to iterate through twice, to make sure we print
1468 * everything out, before we unregister the console(s) 1500 * everything out, before we unregister the console(s)
1469 */ 1501 */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e2302e40b360..0fc1eed28d27 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -134,21 +134,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
134 return 0; 134 return 0;
135 rcu_read_lock(); 135 rcu_read_lock();
136 tcred = __task_cred(task); 136 tcred = __task_cred(task);
137 if ((cred->uid != tcred->euid || 137 if (cred->user->user_ns == tcred->user->user_ns &&
138 cred->uid != tcred->suid || 138 (cred->uid == tcred->euid &&
139 cred->uid != tcred->uid || 139 cred->uid == tcred->suid &&
140 cred->gid != tcred->egid || 140 cred->uid == tcred->uid &&
141 cred->gid != tcred->sgid || 141 cred->gid == tcred->egid &&
142 cred->gid != tcred->gid) && 142 cred->gid == tcred->sgid &&
143 !capable(CAP_SYS_PTRACE)) { 143 cred->gid == tcred->gid))
144 rcu_read_unlock(); 144 goto ok;
145 return -EPERM; 145 if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
146 } 146 goto ok;
147 rcu_read_unlock();
148 return -EPERM;
149ok:
147 rcu_read_unlock(); 150 rcu_read_unlock();
148 smp_rmb(); 151 smp_rmb();
149 if (task->mm) 152 if (task->mm)
150 dumpable = get_dumpable(task->mm); 153 dumpable = get_dumpable(task->mm);
151 if (!dumpable && !capable(CAP_SYS_PTRACE)) 154 if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
152 return -EPERM; 155 return -EPERM;
153 156
154 return security_ptrace_access_check(task, mode); 157 return security_ptrace_access_check(task, mode);
@@ -198,7 +201,7 @@ static int ptrace_attach(struct task_struct *task)
198 goto unlock_tasklist; 201 goto unlock_tasklist;
199 202
200 task->ptrace = PT_PTRACED; 203 task->ptrace = PT_PTRACED;
201 if (capable(CAP_SYS_PTRACE)) 204 if (task_ns_capable(task, CAP_SYS_PTRACE))
202 task->ptrace |= PT_PTRACE_CAP; 205 task->ptrace |= PT_PTRACE_CAP;
203 206
204 __ptrace_link(task, current); 207 __ptrace_link(task, current);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index c7eaa37a768b..34683efa2cce 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -126,10 +126,24 @@ ssize_t res_counter_read(struct res_counter *counter, int member,
126 pos, buf, s - buf); 126 pos, buf, s - buf);
127} 127}
128 128
129#if BITS_PER_LONG == 32
130u64 res_counter_read_u64(struct res_counter *counter, int member)
131{
132 unsigned long flags;
133 u64 ret;
134
135 spin_lock_irqsave(&counter->lock, flags);
136 ret = *res_counter_member(counter, member);
137 spin_unlock_irqrestore(&counter->lock, flags);
138
139 return ret;
140}
141#else
129u64 res_counter_read_u64(struct res_counter *counter, int member) 142u64 res_counter_read_u64(struct res_counter *counter, int member)
130{ 143{
131 return *res_counter_member(counter, member); 144 return *res_counter_member(counter, member);
132} 145}
146#endif
133 147
134int res_counter_memparse_write_strategy(const char *buf, 148int res_counter_memparse_write_strategy(const char *buf,
135 unsigned long long *res) 149 unsigned long long *res)
diff --git a/kernel/sched.c b/kernel/sched.c
index a172494a9a63..f592ce6f8616 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4115,6 +4115,16 @@ need_resched:
4115 switch_count = &prev->nvcsw; 4115 switch_count = &prev->nvcsw;
4116 } 4116 }
4117 4117
4118 /*
4119 * If we are going to sleep and we have plugged IO queued, make
4120 * sure to submit it to avoid deadlocks.
4121 */
4122 if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
4123 raw_spin_unlock(&rq->lock);
4124 blk_flush_plug(prev);
4125 raw_spin_lock(&rq->lock);
4126 }
4127
4118 pre_schedule(rq, prev); 4128 pre_schedule(rq, prev);
4119 4129
4120 if (unlikely(!rq->nr_running)) 4130 if (unlikely(!rq->nr_running))
@@ -4892,8 +4902,11 @@ static bool check_same_owner(struct task_struct *p)
4892 4902
4893 rcu_read_lock(); 4903 rcu_read_lock();
4894 pcred = __task_cred(p); 4904 pcred = __task_cred(p);
4895 match = (cred->euid == pcred->euid || 4905 if (cred->user->user_ns == pcred->user->user_ns)
4896 cred->euid == pcred->uid); 4906 match = (cred->euid == pcred->euid ||
4907 cred->euid == pcred->uid);
4908 else
4909 match = false;
4897 rcu_read_unlock(); 4910 rcu_read_unlock();
4898 return match; 4911 return match;
4899} 4912}
@@ -5221,7 +5234,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5221 goto out_free_cpus_allowed; 5234 goto out_free_cpus_allowed;
5222 } 5235 }
5223 retval = -EPERM; 5236 retval = -EPERM;
5224 if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) 5237 if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
5225 goto out_unlock; 5238 goto out_unlock;
5226 5239
5227 retval = security_task_setscheduler(p); 5240 retval = security_task_setscheduler(p);
@@ -5460,6 +5473,8 @@ EXPORT_SYMBOL(yield);
5460 * yield_to - yield the current processor to another thread in 5473 * yield_to - yield the current processor to another thread in
5461 * your thread group, or accelerate that thread toward the 5474 * your thread group, or accelerate that thread toward the
5462 * processor it's on. 5475 * processor it's on.
5476 * @p: target task
5477 * @preempt: whether task preemption is allowed or not
5463 * 5478 *
5464 * It's the caller's job to ensure that the target task struct 5479 * It's the caller's job to ensure that the target task struct
5465 * can't go away on us before we can do any checks. 5480 * can't go away on us before we can do any checks.
@@ -5525,6 +5540,7 @@ void __sched io_schedule(void)
5525 5540
5526 delayacct_blkio_start(); 5541 delayacct_blkio_start();
5527 atomic_inc(&rq->nr_iowait); 5542 atomic_inc(&rq->nr_iowait);
5543 blk_flush_plug(current);
5528 current->in_iowait = 1; 5544 current->in_iowait = 1;
5529 schedule(); 5545 schedule();
5530 current->in_iowait = 0; 5546 current->in_iowait = 0;
@@ -5540,6 +5556,7 @@ long __sched io_schedule_timeout(long timeout)
5540 5556
5541 delayacct_blkio_start(); 5557 delayacct_blkio_start();
5542 atomic_inc(&rq->nr_iowait); 5558 atomic_inc(&rq->nr_iowait);
5559 blk_flush_plug(current);
5543 current->in_iowait = 1; 5560 current->in_iowait = 1;
5544 ret = schedule_timeout(timeout); 5561 ret = schedule_timeout(timeout);
5545 current->in_iowait = 0; 5562 current->in_iowait = 0;
@@ -8434,7 +8451,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8434{ 8451{
8435 struct cfs_rq *cfs_rq; 8452 struct cfs_rq *cfs_rq;
8436 struct sched_entity *se; 8453 struct sched_entity *se;
8437 struct rq *rq;
8438 int i; 8454 int i;
8439 8455
8440 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); 8456 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8447,8 +8463,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8447 tg->shares = NICE_0_LOAD; 8463 tg->shares = NICE_0_LOAD;
8448 8464
8449 for_each_possible_cpu(i) { 8465 for_each_possible_cpu(i) {
8450 rq = cpu_rq(i);
8451
8452 cfs_rq = kzalloc_node(sizeof(struct cfs_rq), 8466 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
8453 GFP_KERNEL, cpu_to_node(i)); 8467 GFP_KERNEL, cpu_to_node(i));
8454 if (!cfs_rq) 8468 if (!cfs_rq)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index c82f26c1b7c3..a776a6396427 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -94,6 +94,4 @@ static const struct sched_class idle_sched_class = {
94 94
95 .prio_changed = prio_changed_idle, 95 .prio_changed = prio_changed_idle,
96 .switched_to = switched_to_idle, 96 .switched_to = switched_to_idle,
97
98 /* no .task_new for idle tasks */
99}; 97};
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 84ec9bcf82d9..1ba2bd40fdac 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -102,6 +102,4 @@ static const struct sched_class stop_sched_class = {
102 102
103 .prio_changed = prio_changed_stop, 103 .prio_changed = prio_changed_stop,
104 .switched_to = switched_to_stop, 104 .switched_to = switched_to_stop,
105
106 /* no .task_new for stop tasks */
107}; 105};
diff --git a/kernel/signal.c b/kernel/signal.c
index 4e3cff10fdce..1186cf7fac77 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -636,13 +636,33 @@ static inline bool si_fromuser(const struct siginfo *info)
636} 636}
637 637
638/* 638/*
639 * called with RCU read lock from check_kill_permission()
640 */
641static int kill_ok_by_cred(struct task_struct *t)
642{
643 const struct cred *cred = current_cred();
644 const struct cred *tcred = __task_cred(t);
645
646 if (cred->user->user_ns == tcred->user->user_ns &&
647 (cred->euid == tcred->suid ||
648 cred->euid == tcred->uid ||
649 cred->uid == tcred->suid ||
650 cred->uid == tcred->uid))
651 return 1;
652
653 if (ns_capable(tcred->user->user_ns, CAP_KILL))
654 return 1;
655
656 return 0;
657}
658
659/*
639 * Bad permissions for sending the signal 660 * Bad permissions for sending the signal
640 * - the caller must hold the RCU read lock 661 * - the caller must hold the RCU read lock
641 */ 662 */
642static int check_kill_permission(int sig, struct siginfo *info, 663static int check_kill_permission(int sig, struct siginfo *info,
643 struct task_struct *t) 664 struct task_struct *t)
644{ 665{
645 const struct cred *cred, *tcred;
646 struct pid *sid; 666 struct pid *sid;
647 int error; 667 int error;
648 668
@@ -656,14 +676,8 @@ static int check_kill_permission(int sig, struct siginfo *info,
656 if (error) 676 if (error)
657 return error; 677 return error;
658 678
659 cred = current_cred();
660 tcred = __task_cred(t);
661 if (!same_thread_group(current, t) && 679 if (!same_thread_group(current, t) &&
662 (cred->euid ^ tcred->suid) && 680 !kill_ok_by_cred(t)) {
663 (cred->euid ^ tcred->uid) &&
664 (cred->uid ^ tcred->suid) &&
665 (cred->uid ^ tcred->uid) &&
666 !capable(CAP_KILL)) {
667 switch (sig) { 681 switch (sig) {
668 case SIGCONT: 682 case SIGCONT:
669 sid = task_session(t); 683 sid = task_session(t);
@@ -2421,9 +2435,13 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2421 return -EFAULT; 2435 return -EFAULT;
2422 2436
2423 /* Not even root can pretend to send signals from the kernel. 2437 /* Not even root can pretend to send signals from the kernel.
2424 Nor can they impersonate a kill(), which adds source info. */ 2438 * Nor can they impersonate a kill()/tgkill(), which adds source info.
2425 if (info.si_code >= 0) 2439 */
2440 if (info.si_code >= 0 || info.si_code == SI_TKILL) {
2441 /* We used to allow any < 0 si_code */
2442 WARN_ON_ONCE(info.si_code < 0);
2426 return -EPERM; 2443 return -EPERM;
2444 }
2427 info.si_signo = sig; 2445 info.si_signo = sig;
2428 2446
2429 /* POSIX.1b doesn't mention process groups. */ 2447 /* POSIX.1b doesn't mention process groups. */
@@ -2437,9 +2455,13 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
2437 return -EINVAL; 2455 return -EINVAL;
2438 2456
2439 /* Not even root can pretend to send signals from the kernel. 2457 /* Not even root can pretend to send signals from the kernel.
2440 Nor can they impersonate a kill(), which adds source info. */ 2458 * Nor can they impersonate a kill()/tgkill(), which adds source info.
2441 if (info->si_code >= 0) 2459 */
2460 if (info->si_code >= 0 || info->si_code == SI_TKILL) {
2461 /* We used to allow any < 0 si_code */
2462 WARN_ON_ONCE(info->si_code < 0);
2442 return -EPERM; 2463 return -EPERM;
2464 }
2443 info->si_signo = sig; 2465 info->si_signo = sig;
2444 2466
2445 return do_send_specific(tgid, pid, sig, info); 2467 return do_send_specific(tgid, pid, sig, info);
diff --git a/kernel/smp.c b/kernel/smp.c
index 7cbd0f293df4..73a195193558 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -604,6 +604,87 @@ void ipi_call_unlock_irq(void)
604} 604}
605#endif /* USE_GENERIC_SMP_HELPERS */ 605#endif /* USE_GENERIC_SMP_HELPERS */
606 606
607/* Setup configured maximum number of CPUs to activate */
608unsigned int setup_max_cpus = NR_CPUS;
609EXPORT_SYMBOL(setup_max_cpus);
610
611
612/*
613 * Setup routine for controlling SMP activation
614 *
615 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
616 * activation entirely (the MPS table probe still happens, though).
617 *
618 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
619 * greater than 0, limits the maximum number of CPUs activated in
620 * SMP mode to <NUM>.
621 */
622
623void __weak arch_disable_smp_support(void) { }
624
625static int __init nosmp(char *str)
626{
627 setup_max_cpus = 0;
628 arch_disable_smp_support();
629
630 return 0;
631}
632
633early_param("nosmp", nosmp);
634
635/* this is hard limit */
636static int __init nrcpus(char *str)
637{
638 int nr_cpus;
639
640 get_option(&str, &nr_cpus);
641 if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
642 nr_cpu_ids = nr_cpus;
643
644 return 0;
645}
646
647early_param("nr_cpus", nrcpus);
648
649static int __init maxcpus(char *str)
650{
651 get_option(&str, &setup_max_cpus);
652 if (setup_max_cpus == 0)
653 arch_disable_smp_support();
654
655 return 0;
656}
657
658early_param("maxcpus", maxcpus);
659
660/* Setup number of possible processor ids */
661int nr_cpu_ids __read_mostly = NR_CPUS;
662EXPORT_SYMBOL(nr_cpu_ids);
663
664/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
665void __init setup_nr_cpu_ids(void)
666{
667 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
668}
669
670/* Called by boot processor to activate the rest. */
671void __init smp_init(void)
672{
673 unsigned int cpu;
674
675 /* FIXME: This should be done in userspace --RR */
676 for_each_present_cpu(cpu) {
677 if (num_online_cpus() >= setup_max_cpus)
678 break;
679 if (!cpu_online(cpu))
680 cpu_up(cpu);
681 }
682
683 /* Any cleanup work */
684 printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
685 smp_cpus_done(setup_max_cpus);
686}
687
607/* 688/*
608 * Call a function on all processors. May be used during early boot while 689 * Call a function on all processors. May be used during early boot while
609 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead 690 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 56e5dec837f0..735d87095172 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -845,7 +845,10 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
845 switch (action) { 845 switch (action) {
846 case CPU_UP_PREPARE: 846 case CPU_UP_PREPARE:
847 case CPU_UP_PREPARE_FROZEN: 847 case CPU_UP_PREPARE_FROZEN:
848 p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); 848 p = kthread_create_on_node(run_ksoftirqd,
849 hcpu,
850 cpu_to_node(hotcpu),
851 "ksoftirqd/%d", hotcpu);
849 if (IS_ERR(p)) { 852 if (IS_ERR(p)) {
850 printk("ksoftirqd for %i failed\n", hotcpu); 853 printk("ksoftirqd for %i failed\n", hotcpu);
851 return notifier_from_errno(PTR_ERR(p)); 854 return notifier_from_errno(PTR_ERR(p));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2df820b03beb..e3516b29076c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,8 +301,10 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
301 case CPU_UP_PREPARE: 301 case CPU_UP_PREPARE:
302 BUG_ON(stopper->thread || stopper->enabled || 302 BUG_ON(stopper->thread || stopper->enabled ||
303 !list_empty(&stopper->works)); 303 !list_empty(&stopper->works));
304 p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", 304 p = kthread_create_on_node(cpu_stopper_thread,
305 cpu); 305 stopper,
306 cpu_to_node(cpu),
307 "migration/%d", cpu);
306 if (IS_ERR(p)) 308 if (IS_ERR(p))
307 return notifier_from_errno(PTR_ERR(p)); 309 return notifier_from_errno(PTR_ERR(p));
308 get_task_struct(p); 310 get_task_struct(p);
diff --git a/kernel/sys.c b/kernel/sys.c
index 1ad48b3b9068..af468edf096a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,16 +120,33 @@ EXPORT_SYMBOL(cad_pid);
120void (*pm_power_off_prepare)(void); 120void (*pm_power_off_prepare)(void);
121 121
122/* 122/*
123 * Returns true if current's euid is same as p's uid or euid,
124 * or has CAP_SYS_NICE to p's user_ns.
125 *
126 * Called with rcu_read_lock, creds are safe
127 */
128static bool set_one_prio_perm(struct task_struct *p)
129{
130 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
131
132 if (pcred->user->user_ns == cred->user->user_ns &&
133 (pcred->uid == cred->euid ||
134 pcred->euid == cred->euid))
135 return true;
136 if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE))
137 return true;
138 return false;
139}
140
141/*
123 * set the priority of a task 142 * set the priority of a task
124 * - the caller must hold the RCU read lock 143 * - the caller must hold the RCU read lock
125 */ 144 */
126static int set_one_prio(struct task_struct *p, int niceval, int error) 145static int set_one_prio(struct task_struct *p, int niceval, int error)
127{ 146{
128 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
129 int no_nice; 147 int no_nice;
130 148
131 if (pcred->uid != cred->euid && 149 if (!set_one_prio_perm(p)) {
132 pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
133 error = -EPERM; 150 error = -EPERM;
134 goto out; 151 goto out;
135 } 152 }
@@ -506,7 +523,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
506 if (rgid != (gid_t) -1) { 523 if (rgid != (gid_t) -1) {
507 if (old->gid == rgid || 524 if (old->gid == rgid ||
508 old->egid == rgid || 525 old->egid == rgid ||
509 capable(CAP_SETGID)) 526 nsown_capable(CAP_SETGID))
510 new->gid = rgid; 527 new->gid = rgid;
511 else 528 else
512 goto error; 529 goto error;
@@ -515,7 +532,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
515 if (old->gid == egid || 532 if (old->gid == egid ||
516 old->egid == egid || 533 old->egid == egid ||
517 old->sgid == egid || 534 old->sgid == egid ||
518 capable(CAP_SETGID)) 535 nsown_capable(CAP_SETGID))
519 new->egid = egid; 536 new->egid = egid;
520 else 537 else
521 goto error; 538 goto error;
@@ -550,7 +567,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
550 old = current_cred(); 567 old = current_cred();
551 568
552 retval = -EPERM; 569 retval = -EPERM;
553 if (capable(CAP_SETGID)) 570 if (nsown_capable(CAP_SETGID))
554 new->gid = new->egid = new->sgid = new->fsgid = gid; 571 new->gid = new->egid = new->sgid = new->fsgid = gid;
555 else if (gid == old->gid || gid == old->sgid) 572 else if (gid == old->gid || gid == old->sgid)
556 new->egid = new->fsgid = gid; 573 new->egid = new->fsgid = gid;
@@ -617,7 +634,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
617 new->uid = ruid; 634 new->uid = ruid;
618 if (old->uid != ruid && 635 if (old->uid != ruid &&
619 old->euid != ruid && 636 old->euid != ruid &&
620 !capable(CAP_SETUID)) 637 !nsown_capable(CAP_SETUID))
621 goto error; 638 goto error;
622 } 639 }
623 640
@@ -626,7 +643,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
626 if (old->uid != euid && 643 if (old->uid != euid &&
627 old->euid != euid && 644 old->euid != euid &&
628 old->suid != euid && 645 old->suid != euid &&
629 !capable(CAP_SETUID)) 646 !nsown_capable(CAP_SETUID))
630 goto error; 647 goto error;
631 } 648 }
632 649
@@ -674,7 +691,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
674 old = current_cred(); 691 old = current_cred();
675 692
676 retval = -EPERM; 693 retval = -EPERM;
677 if (capable(CAP_SETUID)) { 694 if (nsown_capable(CAP_SETUID)) {
678 new->suid = new->uid = uid; 695 new->suid = new->uid = uid;
679 if (uid != old->uid) { 696 if (uid != old->uid) {
680 retval = set_user(new); 697 retval = set_user(new);
@@ -716,7 +733,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
716 old = current_cred(); 733 old = current_cred();
717 734
718 retval = -EPERM; 735 retval = -EPERM;
719 if (!capable(CAP_SETUID)) { 736 if (!nsown_capable(CAP_SETUID)) {
720 if (ruid != (uid_t) -1 && ruid != old->uid && 737 if (ruid != (uid_t) -1 && ruid != old->uid &&
721 ruid != old->euid && ruid != old->suid) 738 ruid != old->euid && ruid != old->suid)
722 goto error; 739 goto error;
@@ -780,7 +797,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
780 old = current_cred(); 797 old = current_cred();
781 798
782 retval = -EPERM; 799 retval = -EPERM;
783 if (!capable(CAP_SETGID)) { 800 if (!nsown_capable(CAP_SETGID)) {
784 if (rgid != (gid_t) -1 && rgid != old->gid && 801 if (rgid != (gid_t) -1 && rgid != old->gid &&
785 rgid != old->egid && rgid != old->sgid) 802 rgid != old->egid && rgid != old->sgid)
786 goto error; 803 goto error;
@@ -840,7 +857,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
840 857
841 if (uid == old->uid || uid == old->euid || 858 if (uid == old->uid || uid == old->euid ||
842 uid == old->suid || uid == old->fsuid || 859 uid == old->suid || uid == old->fsuid ||
843 capable(CAP_SETUID)) { 860 nsown_capable(CAP_SETUID)) {
844 if (uid != old_fsuid) { 861 if (uid != old_fsuid) {
845 new->fsuid = uid; 862 new->fsuid = uid;
846 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 863 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -873,7 +890,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
873 890
874 if (gid == old->gid || gid == old->egid || 891 if (gid == old->gid || gid == old->egid ||
875 gid == old->sgid || gid == old->fsgid || 892 gid == old->sgid || gid == old->fsgid ||
876 capable(CAP_SETGID)) { 893 nsown_capable(CAP_SETGID)) {
877 if (gid != old_fsgid) { 894 if (gid != old_fsgid) {
878 new->fsgid = gid; 895 new->fsgid = gid;
879 goto change_okay; 896 goto change_okay;
@@ -1181,8 +1198,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1181 int errno; 1198 int errno;
1182 char tmp[__NEW_UTS_LEN]; 1199 char tmp[__NEW_UTS_LEN];
1183 1200
1184 if (!capable(CAP_SYS_ADMIN)) 1201 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1185 return -EPERM; 1202 return -EPERM;
1203
1186 if (len < 0 || len > __NEW_UTS_LEN) 1204 if (len < 0 || len > __NEW_UTS_LEN)
1187 return -EINVAL; 1205 return -EINVAL;
1188 down_write(&uts_sem); 1206 down_write(&uts_sem);
@@ -1230,7 +1248,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1230 int errno; 1248 int errno;
1231 char tmp[__NEW_UTS_LEN]; 1249 char tmp[__NEW_UTS_LEN];
1232 1250
1233 if (!capable(CAP_SYS_ADMIN)) 1251 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1234 return -EPERM; 1252 return -EPERM;
1235 if (len < 0 || len > __NEW_UTS_LEN) 1253 if (len < 0 || len > __NEW_UTS_LEN)
1236 return -EINVAL; 1254 return -EINVAL;
@@ -1345,6 +1363,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource,
1345 rlim = tsk->signal->rlim + resource; 1363 rlim = tsk->signal->rlim + resource;
1346 task_lock(tsk->group_leader); 1364 task_lock(tsk->group_leader);
1347 if (new_rlim) { 1365 if (new_rlim) {
1366 /* Keep the capable check against init_user_ns until
1367 cgroups can contain all limits */
1348 if (new_rlim->rlim_max > rlim->rlim_max && 1368 if (new_rlim->rlim_max > rlim->rlim_max &&
1349 !capable(CAP_SYS_RESOURCE)) 1369 !capable(CAP_SYS_RESOURCE))
1350 retval = -EPERM; 1370 retval = -EPERM;
@@ -1388,19 +1408,22 @@ static int check_prlimit_permission(struct task_struct *task)
1388{ 1408{
1389 const struct cred *cred = current_cred(), *tcred; 1409 const struct cred *cred = current_cred(), *tcred;
1390 1410
1391 tcred = __task_cred(task); 1411 if (current == task)
1392 if (current != task && 1412 return 0;
1393 (cred->uid != tcred->euid ||
1394 cred->uid != tcred->suid ||
1395 cred->uid != tcred->uid ||
1396 cred->gid != tcred->egid ||
1397 cred->gid != tcred->sgid ||
1398 cred->gid != tcred->gid) &&
1399 !capable(CAP_SYS_RESOURCE)) {
1400 return -EPERM;
1401 }
1402 1413
1403 return 0; 1414 tcred = __task_cred(task);
1415 if (cred->user->user_ns == tcred->user->user_ns &&
1416 (cred->uid == tcred->euid &&
1417 cred->uid == tcred->suid &&
1418 cred->uid == tcred->uid &&
1419 cred->gid == tcred->egid &&
1420 cred->gid == tcred->sgid &&
1421 cred->gid == tcred->gid))
1422 return 0;
1423 if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
1424 return 0;
1425
1426 return -EPERM;
1404} 1427}
1405 1428
1406SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1429SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 40245d697602..c0bb32414b17 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -117,6 +117,7 @@ static int neg_one = -1;
117static int zero; 117static int zero;
118static int __maybe_unused one = 1; 118static int __maybe_unused one = 1;
119static int __maybe_unused two = 2; 119static int __maybe_unused two = 2;
120static int __maybe_unused three = 3;
120static unsigned long one_ul = 1; 121static unsigned long one_ul = 1;
121static int one_hundred = 100; 122static int one_hundred = 100;
122#ifdef CONFIG_PRINTK 123#ifdef CONFIG_PRINTK
@@ -169,6 +170,11 @@ static int proc_taint(struct ctl_table *table, int write,
169 void __user *buffer, size_t *lenp, loff_t *ppos); 170 void __user *buffer, size_t *lenp, loff_t *ppos);
170#endif 171#endif
171 172
173#ifdef CONFIG_PRINTK
174static int proc_dmesg_restrict(struct ctl_table *table, int write,
175 void __user *buffer, size_t *lenp, loff_t *ppos);
176#endif
177
172#ifdef CONFIG_MAGIC_SYSRQ 178#ifdef CONFIG_MAGIC_SYSRQ
173/* Note: sysrq code uses it's own private copy */ 179/* Note: sysrq code uses it's own private copy */
174static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; 180static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
@@ -706,7 +712,7 @@ static struct ctl_table kern_table[] = {
706 .data = &kptr_restrict, 712 .data = &kptr_restrict,
707 .maxlen = sizeof(int), 713 .maxlen = sizeof(int),
708 .mode = 0644, 714 .mode = 0644,
709 .proc_handler = proc_dointvec_minmax, 715 .proc_handler = proc_dmesg_restrict,
710 .extra1 = &zero, 716 .extra1 = &zero,
711 .extra2 = &two, 717 .extra2 = &two,
712 }, 718 },
@@ -971,14 +977,18 @@ static struct ctl_table vm_table[] = {
971 .data = &sysctl_overcommit_memory, 977 .data = &sysctl_overcommit_memory,
972 .maxlen = sizeof(sysctl_overcommit_memory), 978 .maxlen = sizeof(sysctl_overcommit_memory),
973 .mode = 0644, 979 .mode = 0644,
974 .proc_handler = proc_dointvec, 980 .proc_handler = proc_dointvec_minmax,
981 .extra1 = &zero,
982 .extra2 = &two,
975 }, 983 },
976 { 984 {
977 .procname = "panic_on_oom", 985 .procname = "panic_on_oom",
978 .data = &sysctl_panic_on_oom, 986 .data = &sysctl_panic_on_oom,
979 .maxlen = sizeof(sysctl_panic_on_oom), 987 .maxlen = sizeof(sysctl_panic_on_oom),
980 .mode = 0644, 988 .mode = 0644,
981 .proc_handler = proc_dointvec, 989 .proc_handler = proc_dointvec_minmax,
990 .extra1 = &zero,
991 .extra2 = &two,
982 }, 992 },
983 { 993 {
984 .procname = "oom_kill_allocating_task", 994 .procname = "oom_kill_allocating_task",
@@ -1006,7 +1016,8 @@ static struct ctl_table vm_table[] = {
1006 .data = &page_cluster, 1016 .data = &page_cluster,
1007 .maxlen = sizeof(int), 1017 .maxlen = sizeof(int),
1008 .mode = 0644, 1018 .mode = 0644,
1009 .proc_handler = proc_dointvec, 1019 .proc_handler = proc_dointvec_minmax,
1020 .extra1 = &zero,
1010 }, 1021 },
1011 { 1022 {
1012 .procname = "dirty_background_ratio", 1023 .procname = "dirty_background_ratio",
@@ -1054,7 +1065,8 @@ static struct ctl_table vm_table[] = {
1054 .data = &dirty_expire_interval, 1065 .data = &dirty_expire_interval,
1055 .maxlen = sizeof(dirty_expire_interval), 1066 .maxlen = sizeof(dirty_expire_interval),
1056 .mode = 0644, 1067 .mode = 0644,
1057 .proc_handler = proc_dointvec, 1068 .proc_handler = proc_dointvec_minmax,
1069 .extra1 = &zero,
1058 }, 1070 },
1059 { 1071 {
1060 .procname = "nr_pdflush_threads", 1072 .procname = "nr_pdflush_threads",
@@ -1130,6 +1142,8 @@ static struct ctl_table vm_table[] = {
1130 .maxlen = sizeof(int), 1142 .maxlen = sizeof(int),
1131 .mode = 0644, 1143 .mode = 0644,
1132 .proc_handler = drop_caches_sysctl_handler, 1144 .proc_handler = drop_caches_sysctl_handler,
1145 .extra1 = &one,
1146 .extra2 = &three,
1133 }, 1147 },
1134#ifdef CONFIG_COMPACTION 1148#ifdef CONFIG_COMPACTION
1135 { 1149 {
@@ -2385,6 +2399,17 @@ static int proc_taint(struct ctl_table *table, int write,
2385 return err; 2399 return err;
2386} 2400}
2387 2401
2402#ifdef CONFIG_PRINTK
2403static int proc_dmesg_restrict(struct ctl_table *table, int write,
2404 void __user *buffer, size_t *lenp, loff_t *ppos)
2405{
2406 if (write && !capable(CAP_SYS_ADMIN))
2407 return -EPERM;
2408
2409 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2410}
2411#endif
2412
2388struct do_proc_dointvec_minmax_conv_param { 2413struct do_proc_dointvec_minmax_conv_param {
2389 int *min; 2414 int *min;
2390 int *max; 2415 int *max;
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 10b90d8a03c4..4e4932a7b360 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -111,11 +111,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
111 const char *fail = NULL; 111 const char *fail = NULL;
112 112
113 if (table->parent) { 113 if (table->parent) {
114 if (table->procname && !table->parent->procname) 114 if (!table->parent->procname)
115 set_fail(&fail, table, "Parent without procname"); 115 set_fail(&fail, table, "Parent without procname");
116 } 116 }
117 if (!table->procname)
118 set_fail(&fail, table, "No procname");
119 if (table->child) { 117 if (table->child) {
120 if (table->data) 118 if (table->data)
121 set_fail(&fail, table, "Directory with data?"); 119 set_fail(&fail, table, "Directory with data?");
@@ -144,13 +142,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
144 set_fail(&fail, table, "No maxlen"); 142 set_fail(&fail, table, "No maxlen");
145 } 143 }
146#ifdef CONFIG_PROC_SYSCTL 144#ifdef CONFIG_PROC_SYSCTL
147 if (table->procname && !table->proc_handler) 145 if (!table->proc_handler)
148 set_fail(&fail, table, "No proc_handler"); 146 set_fail(&fail, table, "No proc_handler");
149#endif 147#endif
150#if 0
151 if (!table->procname && table->proc_handler)
152 set_fail(&fail, table, "proc_handler without procname");
153#endif
154 sysctl_check_leaf(namespaces, table, &fail); 148 sysctl_check_leaf(namespaces, table, &fail);
155 } 149 }
156 if (table->mode > 0777) 150 if (table->mode > 0777)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 3971c6b9d58d..9ffea360a778 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -685,7 +685,7 @@ static int __init taskstats_init(void)
685 goto err_cgroup_ops; 685 goto err_cgroup_ops;
686 686
687 family_registered = 1; 687 family_registered = 1;
688 printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); 688 pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
689 return 0; 689 return 0;
690err_cgroup_ops: 690err_cgroup_ops:
691 genl_unregister_ops(&family, &taskstats_ops); 691 genl_unregister_ops(&family, &taskstats_ops);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3bd7e3d5c632..8ad5d576755e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -14,7 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/sysdev.h> 17#include <linux/syscore_ops.h>
18#include <linux/clocksource.h> 18#include <linux/clocksource.h>
19#include <linux/jiffies.h> 19#include <linux/jiffies.h>
20#include <linux/time.h> 20#include <linux/time.h>
@@ -597,13 +597,12 @@ static struct timespec timekeeping_suspend_time;
597 597
598/** 598/**
599 * timekeeping_resume - Resumes the generic timekeeping subsystem. 599 * timekeeping_resume - Resumes the generic timekeeping subsystem.
600 * @dev: unused
601 * 600 *
602 * This is for the generic clocksource timekeeping. 601 * This is for the generic clocksource timekeeping.
603 * xtime/wall_to_monotonic/jiffies/etc are 602 * xtime/wall_to_monotonic/jiffies/etc are
604 * still managed by arch specific suspend/resume code. 603 * still managed by arch specific suspend/resume code.
605 */ 604 */
606static int timekeeping_resume(struct sys_device *dev) 605static void timekeeping_resume(void)
607{ 606{
608 unsigned long flags; 607 unsigned long flags;
609 struct timespec ts; 608 struct timespec ts;
@@ -632,11 +631,9 @@ static int timekeeping_resume(struct sys_device *dev)
632 631
633 /* Resume hrtimers */ 632 /* Resume hrtimers */
634 hres_timers_resume(); 633 hres_timers_resume();
635
636 return 0;
637} 634}
638 635
639static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) 636static int timekeeping_suspend(void)
640{ 637{
641 unsigned long flags; 638 unsigned long flags;
642 639
@@ -654,26 +651,18 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
654} 651}
655 652
656/* sysfs resume/suspend bits for timekeeping */ 653/* sysfs resume/suspend bits for timekeeping */
657static struct sysdev_class timekeeping_sysclass = { 654static struct syscore_ops timekeeping_syscore_ops = {
658 .name = "timekeeping",
659 .resume = timekeeping_resume, 655 .resume = timekeeping_resume,
660 .suspend = timekeeping_suspend, 656 .suspend = timekeeping_suspend,
661}; 657};
662 658
663static struct sys_device device_timer = { 659static int __init timekeeping_init_ops(void)
664 .id = 0,
665 .cls = &timekeeping_sysclass,
666};
667
668static int __init timekeeping_init_device(void)
669{ 660{
670 int error = sysdev_class_register(&timekeeping_sysclass); 661 register_syscore_ops(&timekeeping_syscore_ops);
671 if (!error) 662 return 0;
672 error = sysdev_register(&device_timer);
673 return error;
674} 663}
675 664
676device_initcall(timekeeping_init_device); 665device_initcall(timekeeping_init_ops);
677 666
678/* 667/*
679 * If the error is already larger, we look ahead even further 668 * If the error is already larger, we look ahead even further
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cbafed7d4f38..7aa40f8e182d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q)
703 * 703 *
704 **/ 704 **/
705static void blk_add_trace_rq(struct request_queue *q, struct request *rq, 705static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
706 u32 what) 706 u32 what)
707{ 707{
708 struct blk_trace *bt = q->blk_trace; 708 struct blk_trace *bt = q->blk_trace;
709 int rw = rq->cmd_flags & 0x03;
710 709
711 if (likely(!bt)) 710 if (likely(!bt))
712 return; 711 return;
713 712
714 if (rq->cmd_flags & REQ_DISCARD)
715 rw |= REQ_DISCARD;
716
717 if (rq->cmd_flags & REQ_SECURE)
718 rw |= REQ_SECURE;
719
720 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 713 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
721 what |= BLK_TC_ACT(BLK_TC_PC); 714 what |= BLK_TC_ACT(BLK_TC_PC);
722 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, 715 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
723 what, rq->errors, rq->cmd_len, rq->cmd); 716 what, rq->errors, rq->cmd_len, rq->cmd);
724 } else { 717 } else {
725 what |= BLK_TC_ACT(BLK_TC_FS); 718 what |= BLK_TC_ACT(BLK_TC_FS);
726 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, 719 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
727 what, rq->errors, 0, NULL); 720 rq->cmd_flags, what, rq->errors, 0, NULL);
728 } 721 }
729} 722}
730 723
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 888b611897d3..c075f4ea6b94 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1467,7 +1467,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1467 return t_hash_next(m, pos); 1467 return t_hash_next(m, pos);
1468 1468
1469 (*pos)++; 1469 (*pos)++;
1470 iter->pos = *pos; 1470 iter->pos = iter->func_pos = *pos;
1471 1471
1472 if (iter->flags & FTRACE_ITER_PRINTALL) 1472 if (iter->flags & FTRACE_ITER_PRINTALL)
1473 return t_hash_start(m, pos); 1473 return t_hash_start(m, pos);
@@ -1502,7 +1502,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1502 if (!rec) 1502 if (!rec)
1503 return t_hash_start(m, pos); 1503 return t_hash_start(m, pos);
1504 1504
1505 iter->func_pos = *pos;
1506 iter->func = rec; 1505 iter->func = rec;
1507 1506
1508 return iter; 1507 return iter;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 419209893d87..51c6e89e8619 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
189 struct group_info *group_info; 189 struct group_info *group_info;
190 int retval; 190 int retval;
191 191
192 if (!capable(CAP_SETGID)) 192 if (!nsown_capable(CAP_SETGID))
193 return -EPERM; 193 return -EPERM;
194 if ((unsigned)gidsetsize > NGROUPS_MAX) 194 if ((unsigned)gidsetsize > NGROUPS_MAX)
195 return -EINVAL; 195 return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 5c598ca781df..9e03e9c1df8d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -17,9 +17,13 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
19 19
20/*
21 * userns count is 1 for root user, 1 for init_uts_ns,
22 * and 1 for... ?
23 */
20struct user_namespace init_user_ns = { 24struct user_namespace init_user_ns = {
21 .kref = { 25 .kref = {
22 .refcount = ATOMIC_INIT(2), 26 .refcount = ATOMIC_INIT(3),
23 }, 27 },
24 .creator = &root_user, 28 .creator = &root_user,
25}; 29};
@@ -47,7 +51,7 @@ static struct kmem_cache *uid_cachep;
47 */ 51 */
48static DEFINE_SPINLOCK(uidhash_lock); 52static DEFINE_SPINLOCK(uidhash_lock);
49 53
50/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ 54/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */
51struct user_struct root_user = { 55struct user_struct root_user = {
52 .__count = ATOMIC_INIT(2), 56 .__count = ATOMIC_INIT(2),
53 .processes = ATOMIC_INIT(1), 57 .processes = ATOMIC_INIT(1),
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 8a82b4b8ea52..44646179eaba 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -14,6 +14,7 @@
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/user_namespace.h>
17 18
18static struct uts_namespace *create_uts_ns(void) 19static struct uts_namespace *create_uts_ns(void)
19{ 20{
@@ -30,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void)
30 * @old_ns: namespace to clone 31 * @old_ns: namespace to clone
31 * Return NULL on error (failure to kmalloc), new ns otherwise 32 * Return NULL on error (failure to kmalloc), new ns otherwise
32 */ 33 */
33static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) 34static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
35 struct uts_namespace *old_ns)
34{ 36{
35 struct uts_namespace *ns; 37 struct uts_namespace *ns;
36 38
@@ -40,6 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
40 42
41 down_read(&uts_sem); 43 down_read(&uts_sem);
42 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 44 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
45 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
43 up_read(&uts_sem); 46 up_read(&uts_sem);
44 return ns; 47 return ns;
45} 48}
@@ -50,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
50 * utsname of this process won't be seen by parent, and vice 53 * utsname of this process won't be seen by parent, and vice
51 * versa. 54 * versa.
52 */ 55 */
53struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) 56struct uts_namespace *copy_utsname(unsigned long flags,
57 struct task_struct *tsk)
54{ 58{
59 struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
55 struct uts_namespace *new_ns; 60 struct uts_namespace *new_ns;
56 61
57 BUG_ON(!old_ns); 62 BUG_ON(!old_ns);
@@ -60,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol
60 if (!(flags & CLONE_NEWUTS)) 65 if (!(flags & CLONE_NEWUTS))
61 return old_ns; 66 return old_ns;
62 67
63 new_ns = clone_uts_ns(old_ns); 68 new_ns = clone_uts_ns(tsk, old_ns);
64 69
65 put_uts_ns(old_ns); 70 put_uts_ns(old_ns);
66 return new_ns; 71 return new_ns;
@@ -71,5 +76,6 @@ void free_uts_ns(struct kref *kref)
71 struct uts_namespace *ns; 76 struct uts_namespace *ns;
72 77
73 ns = container_of(kref, struct uts_namespace, kref); 78 ns = container_of(kref, struct uts_namespace, kref);
79 put_user_ns(ns->user_ns);
74 kfree(ns); 80 kfree(ns);
75} 81}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 18bb15776c57..140dce750450 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -48,12 +48,15 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
48 * Should we panic when a soft-lockup or hard-lockup occurs: 48 * Should we panic when a soft-lockup or hard-lockup occurs:
49 */ 49 */
50#ifdef CONFIG_HARDLOCKUP_DETECTOR 50#ifdef CONFIG_HARDLOCKUP_DETECTOR
51static int hardlockup_panic; 51static int hardlockup_panic =
52 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
52 53
53static int __init hardlockup_panic_setup(char *str) 54static int __init hardlockup_panic_setup(char *str)
54{ 55{
55 if (!strncmp(str, "panic", 5)) 56 if (!strncmp(str, "panic", 5))
56 hardlockup_panic = 1; 57 hardlockup_panic = 1;
58 else if (!strncmp(str, "nopanic", 7))
59 hardlockup_panic = 0;
57 else if (!strncmp(str, "0", 1)) 60 else if (!strncmp(str, "0", 1))
58 watchdog_enabled = 0; 61 watchdog_enabled = 0;
59 return 1; 62 return 1;
@@ -415,19 +418,22 @@ static int watchdog_prepare_cpu(int cpu)
415static int watchdog_enable(int cpu) 418static int watchdog_enable(int cpu)
416{ 419{
417 struct task_struct *p = per_cpu(softlockup_watchdog, cpu); 420 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
418 int err; 421 int err = 0;
419 422
420 /* enable the perf event */ 423 /* enable the perf event */
421 err = watchdog_nmi_enable(cpu); 424 err = watchdog_nmi_enable(cpu);
422 if (err) 425
423 return err; 426 /* Regardless of err above, fall through and start softlockup */
424 427
425 /* create the watchdog thread */ 428 /* create the watchdog thread */
426 if (!p) { 429 if (!p) {
427 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); 430 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
428 if (IS_ERR(p)) { 431 if (IS_ERR(p)) {
429 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); 432 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
430 return PTR_ERR(p); 433 if (!err)
434 /* if hardlockup hasn't already set this */
435 err = PTR_ERR(p);
436 goto out;
431 } 437 }
432 kthread_bind(p, cpu); 438 kthread_bind(p, cpu);
433 per_cpu(watchdog_touch_ts, cpu) = 0; 439 per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -435,7 +441,8 @@ static int watchdog_enable(int cpu)
435 wake_up_process(p); 441 wake_up_process(p);
436 } 442 }
437 443
438 return 0; 444out:
445 return err;
439} 446}
440 447
441static void watchdog_disable(int cpu) 448static void watchdog_disable(int cpu)
@@ -547,7 +554,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
547 break; 554 break;
548#endif /* CONFIG_HOTPLUG_CPU */ 555#endif /* CONFIG_HOTPLUG_CPU */
549 } 556 }
550 return notifier_from_errno(err); 557
558 /*
559 * hardlockup and softlockup are not important enough
560 * to block cpu bring up. Just always succeed and
561 * rely on printk output to flag problems.
562 */
563 return NOTIFY_OK;
551} 564}
552 565
553static struct notifier_block __cpuinitdata cpu_nfb = { 566static struct notifier_block __cpuinitdata cpu_nfb = {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5ca7ce9ce754..04ef830690ec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1366,8 +1366,10 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
1366 worker->id = id; 1366 worker->id = id;
1367 1367
1368 if (!on_unbound_cpu) 1368 if (!on_unbound_cpu)
1369 worker->task = kthread_create(worker_thread, worker, 1369 worker->task = kthread_create_on_node(worker_thread,
1370 "kworker/%u:%d", gcwq->cpu, id); 1370 worker,
1371 cpu_to_node(gcwq->cpu),
1372 "kworker/%u:%d", gcwq->cpu, id);
1371 else 1373 else
1372 worker->task = kthread_create(worker_thread, worker, 1374 worker->task = kthread_create(worker_thread, worker,
1373 "kworker/u:%d", id); 1375 "kworker/u:%d", id);