aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/capability.c96
-rw-r--r--kernel/cgroup.c14
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/cpuset.c80
-rw-r--r--kernel/crash_dump.c34
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/fork.c151
-rw-r--r--kernel/futex.c11
-rw-r--r--kernel/futex_compat.c11
-rw-r--r--kernel/gcov/Makefile2
-rw-r--r--kernel/groups.c2
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/irq/proc.c3
-rw-r--r--kernel/kallsyms.c14
-rw-r--r--kernel/kthread.c31
-rw-r--r--kernel/module.c4
-rw-r--r--kernel/nsproxy.c4
-rw-r--r--kernel/panic.c10
-rw-r--r--kernel/perf_event.c17
-rw-r--r--kernel/pid_namespace.c11
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/printk.c36
-rw-r--r--kernel/ptrace.c27
-rw-r--r--kernel/res_counter.c14
-rw-r--r--kernel/sched.c9
-rw-r--r--kernel/signal.c46
-rw-r--r--kernel/smp.c81
-rw-r--r--kernel/softirq.c5
-rw-r--r--kernel/stop_machine.c6
-rw-r--r--kernel/sys.c77
-rw-r--r--kernel/sysctl.c35
-rw-r--r--kernel/sysctl_check.c10
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace_events_filter.c2
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/user.c8
-rw-r--r--kernel/utsname.c12
-rw-r--r--kernel/watchdog.c27
-rw-r--r--kernel/workqueue.c6
43 files changed, 621 insertions, 310 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 353d3fe8ba33..85cbfb31e73e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
107obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 107obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
108obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o 108obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
109obj-$(CONFIG_PADATA) += padata.o 109obj-$(CONFIG_PADATA) += padata.o
110obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
110 111
111ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 112ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
112# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 113# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 98a51f26c136..0c9b862292b2 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,11 +9,13 @@
9#include <linux/page-flags.h> 9#include <linux/page-flags.h>
10#include <linux/mmzone.h> 10#include <linux/mmzone.h>
11#include <linux/kbuild.h> 11#include <linux/kbuild.h>
12#include <linux/page_cgroup.h>
12 13
13void foo(void) 14void foo(void)
14{ 15{
15 /* The enum constants to put into include/generated/bounds.h */ 16 /* The enum constants to put into include/generated/bounds.h */
16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 17 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 18 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
19 DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
18 /* End of constants */ 20 /* End of constants */
19} 21}
diff --git a/kernel/capability.c b/kernel/capability.c
index 9e9385f132c8..bf0c734d0c12 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -14,6 +14,7 @@
14#include <linux/security.h> 14#include <linux/security.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/pid_namespace.h> 16#include <linux/pid_namespace.h>
17#include <linux/user_namespace.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18 19
19/* 20/*
@@ -290,6 +291,60 @@ error:
290} 291}
291 292
292/** 293/**
294 * has_capability - Does a task have a capability in init_user_ns
295 * @t: The task in question
296 * @cap: The capability to be tested for
297 *
298 * Return true if the specified task has the given superior capability
299 * currently in effect to the initial user namespace, false if not.
300 *
301 * Note that this does not set PF_SUPERPRIV on the task.
302 */
303bool has_capability(struct task_struct *t, int cap)
304{
305 int ret = security_real_capable(t, &init_user_ns, cap);
306
307 return (ret == 0);
308}
309
310/**
311 * has_capability - Does a task have a capability in a specific user ns
312 * @t: The task in question
313 * @ns: target user namespace
314 * @cap: The capability to be tested for
315 *
316 * Return true if the specified task has the given superior capability
317 * currently in effect to the specified user namespace, false if not.
318 *
319 * Note that this does not set PF_SUPERPRIV on the task.
320 */
321bool has_ns_capability(struct task_struct *t,
322 struct user_namespace *ns, int cap)
323{
324 int ret = security_real_capable(t, ns, cap);
325
326 return (ret == 0);
327}
328
329/**
330 * has_capability_noaudit - Does a task have a capability (unaudited)
331 * @t: The task in question
332 * @cap: The capability to be tested for
333 *
334 * Return true if the specified task has the given superior capability
335 * currently in effect to init_user_ns, false if not. Don't write an
336 * audit message for the check.
337 *
338 * Note that this does not set PF_SUPERPRIV on the task.
339 */
340bool has_capability_noaudit(struct task_struct *t, int cap)
341{
342 int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
343
344 return (ret == 0);
345}
346
347/**
293 * capable - Determine if the current task has a superior capability in effect 348 * capable - Determine if the current task has a superior capability in effect
294 * @cap: The capability to be tested for 349 * @cap: The capability to be tested for
295 * 350 *
@@ -299,17 +354,48 @@ error:
299 * This sets PF_SUPERPRIV on the task if the capability is available on the 354 * This sets PF_SUPERPRIV on the task if the capability is available on the
300 * assumption that it's about to be used. 355 * assumption that it's about to be used.
301 */ 356 */
302int capable(int cap) 357bool capable(int cap)
358{
359 return ns_capable(&init_user_ns, cap);
360}
361EXPORT_SYMBOL(capable);
362
363/**
364 * ns_capable - Determine if the current task has a superior capability in effect
365 * @ns: The usernamespace we want the capability in
366 * @cap: The capability to be tested for
367 *
368 * Return true if the current task has the given superior capability currently
369 * available for use, false if not.
370 *
371 * This sets PF_SUPERPRIV on the task if the capability is available on the
372 * assumption that it's about to be used.
373 */
374bool ns_capable(struct user_namespace *ns, int cap)
303{ 375{
304 if (unlikely(!cap_valid(cap))) { 376 if (unlikely(!cap_valid(cap))) {
305 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); 377 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
306 BUG(); 378 BUG();
307 } 379 }
308 380
309 if (security_capable(current_cred(), cap) == 0) { 381 if (security_capable(ns, current_cred(), cap) == 0) {
310 current->flags |= PF_SUPERPRIV; 382 current->flags |= PF_SUPERPRIV;
311 return 1; 383 return true;
312 } 384 }
313 return 0; 385 return false;
314} 386}
315EXPORT_SYMBOL(capable); 387EXPORT_SYMBOL(ns_capable);
388
389/**
390 * task_ns_capable - Determine whether current task has a superior
391 * capability targeted at a specific task's user namespace.
392 * @t: The task whose user namespace is targeted.
393 * @cap: The capability in question.
394 *
395 * Return true if it does, false otherwise.
396 */
397bool task_ns_capable(struct task_struct *t, int cap)
398{
399 return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
400}
401EXPORT_SYMBOL(task_ns_capable);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 95362d15128c..e31b220a743d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1813,10 +1813,8 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1813 1813
1814 /* Update the css_set linked lists if we're using them */ 1814 /* Update the css_set linked lists if we're using them */
1815 write_lock(&css_set_lock); 1815 write_lock(&css_set_lock);
1816 if (!list_empty(&tsk->cg_list)) { 1816 if (!list_empty(&tsk->cg_list))
1817 list_del(&tsk->cg_list); 1817 list_move(&tsk->cg_list, &newcg->tasks);
1818 list_add(&tsk->cg_list, &newcg->tasks);
1819 }
1820 write_unlock(&css_set_lock); 1818 write_unlock(&css_set_lock);
1821 1819
1822 for_each_subsys(root, ss) { 1820 for_each_subsys(root, ss) {
@@ -3655,12 +3653,12 @@ again:
3655 spin_lock(&release_list_lock); 3653 spin_lock(&release_list_lock);
3656 set_bit(CGRP_REMOVED, &cgrp->flags); 3654 set_bit(CGRP_REMOVED, &cgrp->flags);
3657 if (!list_empty(&cgrp->release_list)) 3655 if (!list_empty(&cgrp->release_list))
3658 list_del(&cgrp->release_list); 3656 list_del_init(&cgrp->release_list);
3659 spin_unlock(&release_list_lock); 3657 spin_unlock(&release_list_lock);
3660 3658
3661 cgroup_lock_hierarchy(cgrp->root); 3659 cgroup_lock_hierarchy(cgrp->root);
3662 /* delete this cgroup from parent->children */ 3660 /* delete this cgroup from parent->children */
3663 list_del(&cgrp->sibling); 3661 list_del_init(&cgrp->sibling);
3664 cgroup_unlock_hierarchy(cgrp->root); 3662 cgroup_unlock_hierarchy(cgrp->root);
3665 3663
3666 d = dget(cgrp->dentry); 3664 d = dget(cgrp->dentry);
@@ -3879,7 +3877,7 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
3879 subsys[ss->subsys_id] = NULL; 3877 subsys[ss->subsys_id] = NULL;
3880 3878
3881 /* remove subsystem from rootnode's list of subsystems */ 3879 /* remove subsystem from rootnode's list of subsystems */
3882 list_del(&ss->sibling); 3880 list_del_init(&ss->sibling);
3883 3881
3884 /* 3882 /*
3885 * disentangle the css from all css_sets attached to the dummytop. as 3883 * disentangle the css from all css_sets attached to the dummytop. as
@@ -4241,7 +4239,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4241 if (!list_empty(&tsk->cg_list)) { 4239 if (!list_empty(&tsk->cg_list)) {
4242 write_lock(&css_set_lock); 4240 write_lock(&css_set_lock);
4243 if (!list_empty(&tsk->cg_list)) 4241 if (!list_empty(&tsk->cg_list))
4244 list_del(&tsk->cg_list); 4242 list_del_init(&tsk->cg_list);
4245 write_unlock(&css_set_lock); 4243 write_unlock(&css_set_lock);
4246 } 4244 }
4247 4245
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 156cc5556140..c95fc4df0faa 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -160,7 +160,6 @@ static void cpu_notify_nofail(unsigned long val, void *v)
160{ 160{
161 BUG_ON(cpu_notify(val, v)); 161 BUG_ON(cpu_notify(val, v));
162} 162}
163
164EXPORT_SYMBOL(register_cpu_notifier); 163EXPORT_SYMBOL(register_cpu_notifier);
165 164
166void __ref unregister_cpu_notifier(struct notifier_block *nb) 165void __ref unregister_cpu_notifier(struct notifier_block *nb)
@@ -205,7 +204,6 @@ static int __ref take_cpu_down(void *_param)
205 return err; 204 return err;
206 205
207 cpu_notify(CPU_DYING | param->mod, param->hcpu); 206 cpu_notify(CPU_DYING | param->mod, param->hcpu);
208
209 return 0; 207 return 0;
210} 208}
211 209
@@ -227,6 +225,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
227 return -EINVAL; 225 return -EINVAL;
228 226
229 cpu_hotplug_begin(); 227 cpu_hotplug_begin();
228
230 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); 229 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
231 if (err) { 230 if (err) {
232 nr_calls--; 231 nr_calls--;
@@ -304,7 +303,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
304 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); 303 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
305 if (ret) { 304 if (ret) {
306 nr_calls--; 305 nr_calls--;
307 printk("%s: attempt to bring up CPU %u failed\n", 306 printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
308 __func__, cpu); 307 __func__, cpu);
309 goto out_notify; 308 goto out_notify;
310 } 309 }
@@ -450,14 +449,14 @@ void __ref enable_nonboot_cpus(void)
450 if (cpumask_empty(frozen_cpus)) 449 if (cpumask_empty(frozen_cpus))
451 goto out; 450 goto out;
452 451
453 printk("Enabling non-boot CPUs ...\n"); 452 printk(KERN_INFO "Enabling non-boot CPUs ...\n");
454 453
455 arch_enable_nonboot_cpus_begin(); 454 arch_enable_nonboot_cpus_begin();
456 455
457 for_each_cpu(cpu, frozen_cpus) { 456 for_each_cpu(cpu, frozen_cpus) {
458 error = _cpu_up(cpu, 1); 457 error = _cpu_up(cpu, 1);
459 if (!error) { 458 if (!error) {
460 printk("CPU%d is up\n", cpu); 459 printk(KERN_INFO "CPU%d is up\n", cpu);
461 continue; 460 continue;
462 } 461 }
463 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); 462 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
@@ -509,7 +508,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
509 */ 508 */
510 509
511/* cpu_bit_bitmap[0] is empty - so we can back into it */ 510/* cpu_bit_bitmap[0] is empty - so we can back into it */
512#define MASK_DECLARE_1(x) [x+1][0] = 1UL << (x) 511#define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
513#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) 512#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
514#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) 513#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
515#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) 514#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e92e98189032..33eee16addb8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
1015 struct cpuset *cs; 1015 struct cpuset *cs;
1016 int migrate; 1016 int migrate;
1017 const nodemask_t *oldmem = scan->data; 1017 const nodemask_t *oldmem = scan->data;
1018 NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); 1018 static nodemask_t newmems; /* protected by cgroup_mutex */
1019
1020 if (!newmems)
1021 return;
1022 1019
1023 cs = cgroup_cs(scan->cg); 1020 cs = cgroup_cs(scan->cg);
1024 guarantee_online_mems(cs, newmems); 1021 guarantee_online_mems(cs, &newmems);
1025
1026 cpuset_change_task_nodemask(p, newmems);
1027 1022
1028 NODEMASK_FREE(newmems); 1023 cpuset_change_task_nodemask(p, &newmems);
1029 1024
1030 mm = get_task_mm(p); 1025 mm = get_task_mm(p);
1031 if (!mm) 1026 if (!mm)
@@ -1438,44 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1438 struct mm_struct *mm; 1433 struct mm_struct *mm;
1439 struct cpuset *cs = cgroup_cs(cont); 1434 struct cpuset *cs = cgroup_cs(cont);
1440 struct cpuset *oldcs = cgroup_cs(oldcont); 1435 struct cpuset *oldcs = cgroup_cs(oldcont);
1441 NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); 1436 static nodemask_t to; /* protected by cgroup_mutex */
1442 NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
1443
1444 if (from == NULL || to == NULL)
1445 goto alloc_fail;
1446 1437
1447 if (cs == &top_cpuset) { 1438 if (cs == &top_cpuset) {
1448 cpumask_copy(cpus_attach, cpu_possible_mask); 1439 cpumask_copy(cpus_attach, cpu_possible_mask);
1449 } else { 1440 } else {
1450 guarantee_online_cpus(cs, cpus_attach); 1441 guarantee_online_cpus(cs, cpus_attach);
1451 } 1442 }
1452 guarantee_online_mems(cs, to); 1443 guarantee_online_mems(cs, &to);
1453 1444
1454 /* do per-task migration stuff possibly for each in the threadgroup */ 1445 /* do per-task migration stuff possibly for each in the threadgroup */
1455 cpuset_attach_task(tsk, to, cs); 1446 cpuset_attach_task(tsk, &to, cs);
1456 if (threadgroup) { 1447 if (threadgroup) {
1457 struct task_struct *c; 1448 struct task_struct *c;
1458 rcu_read_lock(); 1449 rcu_read_lock();
1459 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { 1450 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1460 cpuset_attach_task(c, to, cs); 1451 cpuset_attach_task(c, &to, cs);
1461 } 1452 }
1462 rcu_read_unlock(); 1453 rcu_read_unlock();
1463 } 1454 }
1464 1455
1465 /* change mm; only needs to be done once even if threadgroup */ 1456 /* change mm; only needs to be done once even if threadgroup */
1466 *from = oldcs->mems_allowed; 1457 to = cs->mems_allowed;
1467 *to = cs->mems_allowed;
1468 mm = get_task_mm(tsk); 1458 mm = get_task_mm(tsk);
1469 if (mm) { 1459 if (mm) {
1470 mpol_rebind_mm(mm, to); 1460 mpol_rebind_mm(mm, &to);
1471 if (is_memory_migrate(cs)) 1461 if (is_memory_migrate(cs))
1472 cpuset_migrate_mm(mm, from, to); 1462 cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
1473 mmput(mm); 1463 mmput(mm);
1474 } 1464 }
1475
1476alloc_fail:
1477 NODEMASK_FREE(from);
1478 NODEMASK_FREE(to);
1479} 1465}
1480 1466
1481/* The various types of files and directories in a cpuset file system */ 1467/* The various types of files and directories in a cpuset file system */
@@ -1610,34 +1596,26 @@ out:
1610 * across a page fault. 1596 * across a page fault.
1611 */ 1597 */
1612 1598
1613static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) 1599static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1614{ 1600{
1615 int ret; 1601 size_t count;
1616 1602
1617 mutex_lock(&callback_mutex); 1603 mutex_lock(&callback_mutex);
1618 ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); 1604 count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
1619 mutex_unlock(&callback_mutex); 1605 mutex_unlock(&callback_mutex);
1620 1606
1621 return ret; 1607 return count;
1622} 1608}
1623 1609
1624static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1610static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1625{ 1611{
1626 NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); 1612 size_t count;
1627 int retval;
1628
1629 if (mask == NULL)
1630 return -ENOMEM;
1631 1613
1632 mutex_lock(&callback_mutex); 1614 mutex_lock(&callback_mutex);
1633 *mask = cs->mems_allowed; 1615 count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
1634 mutex_unlock(&callback_mutex); 1616 mutex_unlock(&callback_mutex);
1635 1617
1636 retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); 1618 return count;
1637
1638 NODEMASK_FREE(mask);
1639
1640 return retval;
1641} 1619}
1642 1620
1643static ssize_t cpuset_common_file_read(struct cgroup *cont, 1621static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1862,8 +1840,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1862 cs = cgroup_cs(cgroup); 1840 cs = cgroup_cs(cgroup);
1863 parent_cs = cgroup_cs(parent); 1841 parent_cs = cgroup_cs(parent);
1864 1842
1843 mutex_lock(&callback_mutex);
1865 cs->mems_allowed = parent_cs->mems_allowed; 1844 cs->mems_allowed = parent_cs->mems_allowed;
1866 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); 1845 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1846 mutex_unlock(&callback_mutex);
1867 return; 1847 return;
1868} 1848}
1869 1849
@@ -2066,10 +2046,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2066 struct cpuset *cp; /* scans cpusets being updated */ 2046 struct cpuset *cp; /* scans cpusets being updated */
2067 struct cpuset *child; /* scans child cpusets of cp */ 2047 struct cpuset *child; /* scans child cpusets of cp */
2068 struct cgroup *cont; 2048 struct cgroup *cont;
2069 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2049 static nodemask_t oldmems; /* protected by cgroup_mutex */
2070
2071 if (oldmems == NULL)
2072 return;
2073 2050
2074 list_add_tail((struct list_head *)&root->stack_list, &queue); 2051 list_add_tail((struct list_head *)&root->stack_list, &queue);
2075 2052
@@ -2086,7 +2063,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2086 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2063 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2087 continue; 2064 continue;
2088 2065
2089 *oldmems = cp->mems_allowed; 2066 oldmems = cp->mems_allowed;
2090 2067
2091 /* Remove offline cpus and mems from this cpuset. */ 2068 /* Remove offline cpus and mems from this cpuset. */
2092 mutex_lock(&callback_mutex); 2069 mutex_lock(&callback_mutex);
@@ -2102,10 +2079,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2102 remove_tasks_in_empty_cpuset(cp); 2079 remove_tasks_in_empty_cpuset(cp);
2103 else { 2080 else {
2104 update_tasks_cpumask(cp, NULL); 2081 update_tasks_cpumask(cp, NULL);
2105 update_tasks_nodemask(cp, oldmems, NULL); 2082 update_tasks_nodemask(cp, &oldmems, NULL);
2106 } 2083 }
2107 } 2084 }
2108 NODEMASK_FREE(oldmems);
2109} 2085}
2110 2086
2111/* 2087/*
@@ -2147,19 +2123,16 @@ void cpuset_update_active_cpus(void)
2147static int cpuset_track_online_nodes(struct notifier_block *self, 2123static int cpuset_track_online_nodes(struct notifier_block *self,
2148 unsigned long action, void *arg) 2124 unsigned long action, void *arg)
2149{ 2125{
2150 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2126 static nodemask_t oldmems; /* protected by cgroup_mutex */
2151
2152 if (oldmems == NULL)
2153 return NOTIFY_DONE;
2154 2127
2155 cgroup_lock(); 2128 cgroup_lock();
2156 switch (action) { 2129 switch (action) {
2157 case MEM_ONLINE: 2130 case MEM_ONLINE:
2158 *oldmems = top_cpuset.mems_allowed; 2131 oldmems = top_cpuset.mems_allowed;
2159 mutex_lock(&callback_mutex); 2132 mutex_lock(&callback_mutex);
2160 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2133 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2161 mutex_unlock(&callback_mutex); 2134 mutex_unlock(&callback_mutex);
2162 update_tasks_nodemask(&top_cpuset, oldmems, NULL); 2135 update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
2163 break; 2136 break;
2164 case MEM_OFFLINE: 2137 case MEM_OFFLINE:
2165 /* 2138 /*
@@ -2173,7 +2146,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2173 } 2146 }
2174 cgroup_unlock(); 2147 cgroup_unlock();
2175 2148
2176 NODEMASK_FREE(oldmems);
2177 return NOTIFY_OK; 2149 return NOTIFY_OK;
2178} 2150}
2179#endif 2151#endif
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
new file mode 100644
index 000000000000..5f85690285d4
--- /dev/null
+++ b/kernel/crash_dump.c
@@ -0,0 +1,34 @@
1#include <linux/kernel.h>
2#include <linux/crash_dump.h>
3#include <linux/init.h>
4#include <linux/errno.h>
5#include <linux/module.h>
6
7/*
8 * If we have booted due to a crash, max_pfn will be a very low value. We need
9 * to know the amount of memory that the previous kernel used.
10 */
11unsigned long saved_max_pfn;
12
13/*
14 * stores the physical address of elf header of crash image
15 *
16 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
17 * is_kdump_kernel() to determine if we are booting after a panic. Hence put
18 * it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
19 */
20unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
21
22/*
23 * elfcorehdr= specifies the location of elf core header stored by the crashed
24 * kernel. This option will be passed by kexec loader to the capture kernel.
25 */
26static int __init setup_elfcorehdr(char *arg)
27{
28 char *end;
29 if (!arg)
30 return -EINVAL;
31 elfcorehdr_addr = memparse(arg, &end);
32 return end > arg ? 0 : -EINVAL;
33}
34early_param("elfcorehdr", setup_elfcorehdr);
diff --git a/kernel/cred.c b/kernel/cred.c
index 2343c132c5a7..5557b55048df 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -741,6 +741,12 @@ int set_create_files_as(struct cred *new, struct inode *inode)
741} 741}
742EXPORT_SYMBOL(set_create_files_as); 742EXPORT_SYMBOL(set_create_files_as);
743 743
744struct user_namespace *current_user_ns(void)
745{
746 return _current_user_ns();
747}
748EXPORT_SYMBOL(current_user_ns);
749
744#ifdef CONFIG_DEBUG_CREDENTIALS 750#ifdef CONFIG_DEBUG_CREDENTIALS
745 751
746bool creds_are_invalid(const struct cred *cred) 752bool creds_are_invalid(const struct cred *cred)
diff --git a/kernel/fork.c b/kernel/fork.c
index 05b92c457010..457fff2e17e0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
40#include <linux/tracehook.h> 40#include <linux/tracehook.h>
41#include <linux/futex.h> 41#include <linux/futex.h>
42#include <linux/compat.h> 42#include <linux/compat.h>
43#include <linux/kthread.h>
43#include <linux/task_io_accounting_ops.h> 44#include <linux/task_io_accounting_ops.h>
44#include <linux/rcupdate.h> 45#include <linux/rcupdate.h>
45#include <linux/ptrace.h> 46#include <linux/ptrace.h>
@@ -109,20 +110,25 @@ int nr_processes(void)
109} 110}
110 111
111#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 112#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
112# define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) 113# define alloc_task_struct_node(node) \
113# define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) 114 kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
115# define free_task_struct(tsk) \
116 kmem_cache_free(task_struct_cachep, (tsk))
114static struct kmem_cache *task_struct_cachep; 117static struct kmem_cache *task_struct_cachep;
115#endif 118#endif
116 119
117#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR 120#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
118static inline struct thread_info *alloc_thread_info(struct task_struct *tsk) 121static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
122 int node)
119{ 123{
120#ifdef CONFIG_DEBUG_STACK_USAGE 124#ifdef CONFIG_DEBUG_STACK_USAGE
121 gfp_t mask = GFP_KERNEL | __GFP_ZERO; 125 gfp_t mask = GFP_KERNEL | __GFP_ZERO;
122#else 126#else
123 gfp_t mask = GFP_KERNEL; 127 gfp_t mask = GFP_KERNEL;
124#endif 128#endif
125 return (struct thread_info *)__get_free_pages(mask, THREAD_SIZE_ORDER); 129 struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
130
131 return page ? page_address(page) : NULL;
126} 132}
127 133
128static inline void free_thread_info(struct thread_info *ti) 134static inline void free_thread_info(struct thread_info *ti)
@@ -249,16 +255,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
249 struct task_struct *tsk; 255 struct task_struct *tsk;
250 struct thread_info *ti; 256 struct thread_info *ti;
251 unsigned long *stackend; 257 unsigned long *stackend;
252 258 int node = tsk_fork_get_node(orig);
253 int err; 259 int err;
254 260
255 prepare_to_copy(orig); 261 prepare_to_copy(orig);
256 262
257 tsk = alloc_task_struct(); 263 tsk = alloc_task_struct_node(node);
258 if (!tsk) 264 if (!tsk)
259 return NULL; 265 return NULL;
260 266
261 ti = alloc_thread_info(tsk); 267 ti = alloc_thread_info_node(tsk, node);
262 if (!ti) { 268 if (!ti) {
263 free_task_struct(tsk); 269 free_task_struct(tsk);
264 return NULL; 270 return NULL;
@@ -1181,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1181 pid = alloc_pid(p->nsproxy->pid_ns); 1187 pid = alloc_pid(p->nsproxy->pid_ns);
1182 if (!pid) 1188 if (!pid)
1183 goto bad_fork_cleanup_io; 1189 goto bad_fork_cleanup_io;
1184
1185 if (clone_flags & CLONE_NEWPID) {
1186 retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
1187 if (retval < 0)
1188 goto bad_fork_free_pid;
1189 }
1190 } 1190 }
1191 1191
1192 p->pid = pid_nr(pid); 1192 p->pid = pid_nr(pid);
@@ -1290,7 +1290,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1290 tracehook_finish_clone(p, clone_flags, trace); 1290 tracehook_finish_clone(p, clone_flags, trace);
1291 1291
1292 if (thread_group_leader(p)) { 1292 if (thread_group_leader(p)) {
1293 if (clone_flags & CLONE_NEWPID) 1293 if (is_child_reaper(pid))
1294 p->nsproxy->pid_ns->child_reaper = p; 1294 p->nsproxy->pid_ns->child_reaper = p;
1295 1295
1296 p->signal->leader_pid = pid; 1296 p->signal->leader_pid = pid;
@@ -1513,38 +1513,24 @@ void __init proc_caches_init(void)
1513} 1513}
1514 1514
1515/* 1515/*
1516 * Check constraints on flags passed to the unshare system call and 1516 * Check constraints on flags passed to the unshare system call.
1517 * force unsharing of additional process context as appropriate.
1518 */ 1517 */
1519static void check_unshare_flags(unsigned long *flags_ptr) 1518static int check_unshare_flags(unsigned long unshare_flags)
1520{ 1519{
1520 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1521 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1522 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1523 return -EINVAL;
1521 /* 1524 /*
1522 * If unsharing a thread from a thread group, must also 1525 * Not implemented, but pretend it works if there is nothing to
1523 * unshare vm. 1526 * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
1524 */ 1527 * needs to unshare vm.
1525 if (*flags_ptr & CLONE_THREAD)
1526 *flags_ptr |= CLONE_VM;
1527
1528 /*
1529 * If unsharing vm, must also unshare signal handlers.
1530 */
1531 if (*flags_ptr & CLONE_VM)
1532 *flags_ptr |= CLONE_SIGHAND;
1533
1534 /*
1535 * If unsharing namespace, must also unshare filesystem information.
1536 */ 1528 */
1537 if (*flags_ptr & CLONE_NEWNS) 1529 if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
1538 *flags_ptr |= CLONE_FS; 1530 /* FIXME: get_task_mm() increments ->mm_users */
1539} 1531 if (atomic_read(&current->mm->mm_users) > 1)
1540 1532 return -EINVAL;
1541/* 1533 }
1542 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1543 */
1544static int unshare_thread(unsigned long unshare_flags)
1545{
1546 if (unshare_flags & CLONE_THREAD)
1547 return -EINVAL;
1548 1534
1549 return 0; 1535 return 0;
1550} 1536}
@@ -1571,34 +1557,6 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1571} 1557}
1572 1558
1573/* 1559/*
1574 * Unsharing of sighand is not supported yet
1575 */
1576static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1577{
1578 struct sighand_struct *sigh = current->sighand;
1579
1580 if ((unshare_flags & CLONE_SIGHAND) && atomic_read(&sigh->count) > 1)
1581 return -EINVAL;
1582 else
1583 return 0;
1584}
1585
1586/*
1587 * Unshare vm if it is being shared
1588 */
1589static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1590{
1591 struct mm_struct *mm = current->mm;
1592
1593 if ((unshare_flags & CLONE_VM) &&
1594 (mm && atomic_read(&mm->mm_users) > 1)) {
1595 return -EINVAL;
1596 }
1597
1598 return 0;
1599}
1600
1601/*
1602 * Unshare file descriptor table if it is being shared 1560 * Unshare file descriptor table if it is being shared
1603 */ 1561 */
1604static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) 1562static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
@@ -1626,45 +1584,37 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
1626 */ 1584 */
1627SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) 1585SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1628{ 1586{
1629 int err = 0;
1630 struct fs_struct *fs, *new_fs = NULL; 1587 struct fs_struct *fs, *new_fs = NULL;
1631 struct sighand_struct *new_sigh = NULL;
1632 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1633 struct files_struct *fd, *new_fd = NULL; 1588 struct files_struct *fd, *new_fd = NULL;
1634 struct nsproxy *new_nsproxy = NULL; 1589 struct nsproxy *new_nsproxy = NULL;
1635 int do_sysvsem = 0; 1590 int do_sysvsem = 0;
1591 int err;
1636 1592
1637 check_unshare_flags(&unshare_flags); 1593 err = check_unshare_flags(unshare_flags);
1638 1594 if (err)
1639 /* Return -EINVAL for all unsupported flags */
1640 err = -EINVAL;
1641 if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
1642 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
1643 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
1644 goto bad_unshare_out; 1595 goto bad_unshare_out;
1645 1596
1646 /* 1597 /*
1598 * If unsharing namespace, must also unshare filesystem information.
1599 */
1600 if (unshare_flags & CLONE_NEWNS)
1601 unshare_flags |= CLONE_FS;
1602 /*
1647 * CLONE_NEWIPC must also detach from the undolist: after switching 1603 * CLONE_NEWIPC must also detach from the undolist: after switching
1648 * to a new ipc namespace, the semaphore arrays from the old 1604 * to a new ipc namespace, the semaphore arrays from the old
1649 * namespace are unreachable. 1605 * namespace are unreachable.
1650 */ 1606 */
1651 if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) 1607 if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
1652 do_sysvsem = 1; 1608 do_sysvsem = 1;
1653 if ((err = unshare_thread(unshare_flags)))
1654 goto bad_unshare_out;
1655 if ((err = unshare_fs(unshare_flags, &new_fs))) 1609 if ((err = unshare_fs(unshare_flags, &new_fs)))
1656 goto bad_unshare_cleanup_thread; 1610 goto bad_unshare_out;
1657 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1658 goto bad_unshare_cleanup_fs;
1659 if ((err = unshare_vm(unshare_flags, &new_mm)))
1660 goto bad_unshare_cleanup_sigh;
1661 if ((err = unshare_fd(unshare_flags, &new_fd))) 1611 if ((err = unshare_fd(unshare_flags, &new_fd)))
1662 goto bad_unshare_cleanup_vm; 1612 goto bad_unshare_cleanup_fs;
1663 if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, 1613 if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
1664 new_fs))) 1614 new_fs)))
1665 goto bad_unshare_cleanup_fd; 1615 goto bad_unshare_cleanup_fd;
1666 1616
1667 if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { 1617 if (new_fs || new_fd || do_sysvsem || new_nsproxy) {
1668 if (do_sysvsem) { 1618 if (do_sysvsem) {
1669 /* 1619 /*
1670 * CLONE_SYSVSEM is equivalent to sys_exit(). 1620 * CLONE_SYSVSEM is equivalent to sys_exit().
@@ -1690,19 +1640,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1690 spin_unlock(&fs->lock); 1640 spin_unlock(&fs->lock);
1691 } 1641 }
1692 1642
1693 if (new_mm) {
1694 mm = current->mm;
1695 active_mm = current->active_mm;
1696 current->mm = new_mm;
1697 current->active_mm = new_mm;
1698 if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
1699 atomic_dec(&mm->oom_disable_count);
1700 atomic_inc(&new_mm->oom_disable_count);
1701 }
1702 activate_mm(active_mm, new_mm);
1703 new_mm = mm;
1704 }
1705
1706 if (new_fd) { 1643 if (new_fd) {
1707 fd = current->files; 1644 fd = current->files;
1708 current->files = new_fd; 1645 current->files = new_fd;
@@ -1719,20 +1656,10 @@ bad_unshare_cleanup_fd:
1719 if (new_fd) 1656 if (new_fd)
1720 put_files_struct(new_fd); 1657 put_files_struct(new_fd);
1721 1658
1722bad_unshare_cleanup_vm:
1723 if (new_mm)
1724 mmput(new_mm);
1725
1726bad_unshare_cleanup_sigh:
1727 if (new_sigh)
1728 if (atomic_dec_and_test(&new_sigh->count))
1729 kmem_cache_free(sighand_cachep, new_sigh);
1730
1731bad_unshare_cleanup_fs: 1659bad_unshare_cleanup_fs:
1732 if (new_fs) 1660 if (new_fs)
1733 free_fs_struct(new_fs); 1661 free_fs_struct(new_fs);
1734 1662
1735bad_unshare_cleanup_thread:
1736bad_unshare_out: 1663bad_unshare_out:
1737 return err; 1664 return err;
1738} 1665}
diff --git a/kernel/futex.c b/kernel/futex.c
index bda415715382..6570c459f31c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2418 goto err_unlock; 2418 goto err_unlock;
2419 ret = -EPERM; 2419 ret = -EPERM;
2420 pcred = __task_cred(p); 2420 pcred = __task_cred(p);
2421 /* If victim is in different user_ns, then uids are not
2422 comparable, so we must have CAP_SYS_PTRACE */
2423 if (cred->user->user_ns != pcred->user->user_ns) {
2424 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2425 goto err_unlock;
2426 goto ok;
2427 }
2428 /* If victim is in same user_ns, then uids are comparable */
2421 if (cred->euid != pcred->euid && 2429 if (cred->euid != pcred->euid &&
2422 cred->euid != pcred->uid && 2430 cred->euid != pcred->uid &&
2423 !capable(CAP_SYS_PTRACE)) 2431 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2424 goto err_unlock; 2432 goto err_unlock;
2433ok:
2425 head = p->robust_list; 2434 head = p->robust_list;
2426 rcu_read_unlock(); 2435 rcu_read_unlock();
2427 } 2436 }
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index a7934ac75e5b..5f9e689dc8f0 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
153 goto err_unlock; 153 goto err_unlock;
154 ret = -EPERM; 154 ret = -EPERM;
155 pcred = __task_cred(p); 155 pcred = __task_cred(p);
156 /* If victim is in different user_ns, then uids are not
157 comparable, so we must have CAP_SYS_PTRACE */
158 if (cred->user->user_ns != pcred->user->user_ns) {
159 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
160 goto err_unlock;
161 goto ok;
162 }
163 /* If victim is in same user_ns, then uids are comparable */
156 if (cred->euid != pcred->euid && 164 if (cred->euid != pcred->euid &&
157 cred->euid != pcred->uid && 165 cred->euid != pcred->uid &&
158 !capable(CAP_SYS_PTRACE)) 166 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
159 goto err_unlock; 167 goto err_unlock;
168ok:
160 head = p->compat_robust_list; 169 head = p->compat_robust_list;
161 rcu_read_unlock(); 170 rcu_read_unlock();
162 } 171 }
diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile
index 3f761001d517..e97ca59e2520 100644
--- a/kernel/gcov/Makefile
+++ b/kernel/gcov/Makefile
@@ -1,3 +1,3 @@
1EXTRA_CFLAGS := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' 1ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"'
2 2
3obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o 3obj-$(CONFIG_GCOV_KERNEL) := base.o fs.o gcc_3_4.o
diff --git a/kernel/groups.c b/kernel/groups.c
index 253dc0f35cf4..1cc476d52dd3 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
233 struct group_info *group_info; 233 struct group_info *group_info;
234 int retval; 234 int retval;
235 235
236 if (!capable(CAP_SETGID)) 236 if (!nsown_capable(CAP_SETGID))
237 return -EPERM; 237 return -EPERM;
238 if ((unsigned)gidsetsize > NGROUPS_MAX) 238 if ((unsigned)gidsetsize > NGROUPS_MAX)
239 return -EINVAL; 239 return -EINVAL;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index acd599a43bfb..0a2aa73e536c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1064,10 +1064,10 @@ mismatch:
1064 ret = -EBUSY; 1064 ret = -EBUSY;
1065 1065
1066out_mask: 1066out_mask:
1067 raw_spin_unlock_irqrestore(&desc->lock, flags);
1067 free_cpumask_var(mask); 1068 free_cpumask_var(mask);
1068 1069
1069out_thread: 1070out_thread:
1070 raw_spin_unlock_irqrestore(&desc->lock, flags);
1071 if (new->thread) { 1071 if (new->thread) {
1072 struct task_struct *t = new->thread; 1072 struct task_struct *t = new->thread;
1073 1073
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 4cc2e5ed0bec..760248de109d 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -405,7 +405,8 @@ int show_interrupts(struct seq_file *p, void *v)
405 for_each_online_cpu(j) 405 for_each_online_cpu(j)
406 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 406 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
407 seq_printf(p, " %8s", desc->irq_data.chip->name); 407 seq_printf(p, " %8s", desc->irq_data.chip->name);
408 seq_printf(p, "-%-8s", desc->name); 408 if (desc->name)
409 seq_printf(p, "-%-8s", desc->name);
409 410
410 if (action) { 411 if (action) {
411 seq_printf(p, " %s", action->name); 412 seq_printf(p, " %s", action->name);
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6f6d091b5757..a56aa58b9cb0 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr)
64 if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || 64 if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
65 arch_is_kernel_text(addr)) 65 arch_is_kernel_text(addr))
66 return 1; 66 return 1;
67 return in_gate_area_no_task(addr); 67 return in_gate_area_no_mm(addr);
68} 68}
69 69
70static inline int is_kernel(unsigned long addr) 70static inline int is_kernel(unsigned long addr)
71{ 71{
72 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) 72 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
73 return 1; 73 return 1;
74 return in_gate_area_no_task(addr); 74 return in_gate_area_no_mm(addr);
75} 75}
76 76
77static int is_ksym_addr(unsigned long addr) 77static int is_ksym_addr(unsigned long addr)
@@ -477,13 +477,11 @@ static int s_show(struct seq_file *m, void *p)
477 */ 477 */
478 type = iter->exported ? toupper(iter->type) : 478 type = iter->exported ? toupper(iter->type) :
479 tolower(iter->type); 479 tolower(iter->type);
480 seq_printf(m, "%0*lx %c %s\t[%s]\n", 480 seq_printf(m, "%pK %c %s\t[%s]\n", (void *)iter->value,
481 (int)(2 * sizeof(void *)), 481 type, iter->name, iter->module_name);
482 iter->value, type, iter->name, iter->module_name);
483 } else 482 } else
484 seq_printf(m, "%0*lx %c %s\n", 483 seq_printf(m, "%pK %c %s\n", (void *)iter->value,
485 (int)(2 * sizeof(void *)), 484 iter->type, iter->name);
486 iter->value, iter->type, iter->name);
487 return 0; 485 return 0;
488} 486}
489 487
diff --git a/kernel/kthread.c b/kernel/kthread.c
index c55afba990a3..684ab3f7dd72 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -27,6 +27,7 @@ struct kthread_create_info
27 /* Information passed to kthread() from kthreadd. */ 27 /* Information passed to kthread() from kthreadd. */
28 int (*threadfn)(void *data); 28 int (*threadfn)(void *data);
29 void *data; 29 void *data;
30 int node;
30 31
31 /* Result passed back to kthread_create() from kthreadd. */ 32 /* Result passed back to kthread_create() from kthreadd. */
32 struct task_struct *result; 33 struct task_struct *result;
@@ -98,10 +99,23 @@ static int kthread(void *_create)
98 do_exit(ret); 99 do_exit(ret);
99} 100}
100 101
102/* called from do_fork() to get node information for about to be created task */
103int tsk_fork_get_node(struct task_struct *tsk)
104{
105#ifdef CONFIG_NUMA
106 if (tsk == kthreadd_task)
107 return tsk->pref_node_fork;
108#endif
109 return numa_node_id();
110}
111
101static void create_kthread(struct kthread_create_info *create) 112static void create_kthread(struct kthread_create_info *create)
102{ 113{
103 int pid; 114 int pid;
104 115
116#ifdef CONFIG_NUMA
117 current->pref_node_fork = create->node;
118#endif
105 /* We want our own signal handler (we take no signals by default). */ 119 /* We want our own signal handler (we take no signals by default). */
106 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); 120 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
107 if (pid < 0) { 121 if (pid < 0) {
@@ -111,15 +125,18 @@ static void create_kthread(struct kthread_create_info *create)
111} 125}
112 126
113/** 127/**
114 * kthread_create - create a kthread. 128 * kthread_create_on_node - create a kthread.
115 * @threadfn: the function to run until signal_pending(current). 129 * @threadfn: the function to run until signal_pending(current).
116 * @data: data ptr for @threadfn. 130 * @data: data ptr for @threadfn.
131 * @node: memory node number.
117 * @namefmt: printf-style name for the thread. 132 * @namefmt: printf-style name for the thread.
118 * 133 *
119 * Description: This helper function creates and names a kernel 134 * Description: This helper function creates and names a kernel
120 * thread. The thread will be stopped: use wake_up_process() to start 135 * thread. The thread will be stopped: use wake_up_process() to start
121 * it. See also kthread_run(). 136 * it. See also kthread_run().
122 * 137 *
138 * If thread is going to be bound on a particular cpu, give its node
139 * in @node, to get NUMA affinity for kthread stack, or else give -1.
123 * When woken, the thread will run @threadfn() with @data as its 140 * When woken, the thread will run @threadfn() with @data as its
124 * argument. @threadfn() can either call do_exit() directly if it is a 141 * argument. @threadfn() can either call do_exit() directly if it is a
125 * standalone thread for which noone will call kthread_stop(), or 142 * standalone thread for which noone will call kthread_stop(), or
@@ -129,15 +146,17 @@ static void create_kthread(struct kthread_create_info *create)
129 * 146 *
130 * Returns a task_struct or ERR_PTR(-ENOMEM). 147 * Returns a task_struct or ERR_PTR(-ENOMEM).
131 */ 148 */
132struct task_struct *kthread_create(int (*threadfn)(void *data), 149struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
133 void *data, 150 void *data,
134 const char namefmt[], 151 int node,
135 ...) 152 const char namefmt[],
153 ...)
136{ 154{
137 struct kthread_create_info create; 155 struct kthread_create_info create;
138 156
139 create.threadfn = threadfn; 157 create.threadfn = threadfn;
140 create.data = data; 158 create.data = data;
159 create.node = node;
141 init_completion(&create.done); 160 init_completion(&create.done);
142 161
143 spin_lock(&kthread_create_lock); 162 spin_lock(&kthread_create_lock);
@@ -164,7 +183,7 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
164 } 183 }
165 return create.result; 184 return create.result;
166} 185}
167EXPORT_SYMBOL(kthread_create); 186EXPORT_SYMBOL(kthread_create_on_node);
168 187
169/** 188/**
170 * kthread_bind - bind a just-created kthread to a cpu. 189 * kthread_bind - bind a just-created kthread to a cpu.
diff --git a/kernel/module.c b/kernel/module.c
index efa290ea94bf..1f9f7bc56ca1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1168,7 +1168,7 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
1168{ 1168{
1169 struct module_sect_attr *sattr = 1169 struct module_sect_attr *sattr =
1170 container_of(mattr, struct module_sect_attr, mattr); 1170 container_of(mattr, struct module_sect_attr, mattr);
1171 return sprintf(buf, "0x%lx\n", sattr->address); 1171 return sprintf(buf, "0x%pK\n", (void *)sattr->address);
1172} 1172}
1173 1173
1174static void free_sect_attrs(struct module_sect_attrs *sect_attrs) 1174static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -3224,7 +3224,7 @@ static int m_show(struct seq_file *m, void *p)
3224 mod->state == MODULE_STATE_COMING ? "Loading": 3224 mod->state == MODULE_STATE_COMING ? "Loading":
3225 "Live"); 3225 "Live");
3226 /* Used by oprofile and other similar tools. */ 3226 /* Used by oprofile and other similar tools. */
3227 seq_printf(m, " 0x%p", mod->module_core); 3227 seq_printf(m, " 0x%pK", mod->module_core);
3228 3228
3229 /* Taints info */ 3229 /* Taints info */
3230 if (mod->taints) 3230 if (mod->taints)
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f74e6c00e26d..a05d191ffdd9 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -69,13 +69,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
69 goto out_ns; 69 goto out_ns;
70 } 70 }
71 71
72 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); 72 new_nsp->uts_ns = copy_utsname(flags, tsk);
73 if (IS_ERR(new_nsp->uts_ns)) { 73 if (IS_ERR(new_nsp->uts_ns)) {
74 err = PTR_ERR(new_nsp->uts_ns); 74 err = PTR_ERR(new_nsp->uts_ns);
75 goto out_uts; 75 goto out_uts;
76 } 76 }
77 77
78 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 78 new_nsp->ipc_ns = copy_ipcs(flags, tsk);
79 if (IS_ERR(new_nsp->ipc_ns)) { 79 if (IS_ERR(new_nsp->ipc_ns)) {
80 err = PTR_ERR(new_nsp->ipc_ns); 80 err = PTR_ERR(new_nsp->ipc_ns);
81 goto out_ipc; 81 goto out_ipc;
diff --git a/kernel/panic.c b/kernel/panic.c
index 991bb87a1704..69231670eb95 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -433,3 +433,13 @@ EXPORT_SYMBOL(__stack_chk_fail);
433 433
434core_param(panic, panic_timeout, int, 0644); 434core_param(panic, panic_timeout, int, 0644);
435core_param(pause_on_oops, pause_on_oops, int, 0644); 435core_param(pause_on_oops, pause_on_oops, int, 0644);
436
437static int __init oops_setup(char *s)
438{
439 if (!s)
440 return -EINVAL;
441 if (!strcmp(s, "panic"))
442 panic_on_oops = 1;
443 return 0;
444}
445early_param("oops", oops_setup);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ed253aa24ba4..3472bb1a070c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5122,7 +5122,7 @@ static int perf_exclude_event(struct perf_event *event,
5122 struct pt_regs *regs) 5122 struct pt_regs *regs)
5123{ 5123{
5124 if (event->hw.state & PERF_HES_STOPPED) 5124 if (event->hw.state & PERF_HES_STOPPED)
5125 return 0; 5125 return 1;
5126 5126
5127 if (regs) { 5127 if (regs) {
5128 if (event->attr.exclude_user && user_mode(regs)) 5128 if (event->attr.exclude_user && user_mode(regs))
@@ -5478,6 +5478,8 @@ static int perf_tp_event_match(struct perf_event *event,
5478 struct perf_sample_data *data, 5478 struct perf_sample_data *data,
5479 struct pt_regs *regs) 5479 struct pt_regs *regs)
5480{ 5480{
5481 if (event->hw.state & PERF_HES_STOPPED)
5482 return 0;
5481 /* 5483 /*
5482 * All tracepoints are from kernel-space. 5484 * All tracepoints are from kernel-space.
5483 */ 5485 */
@@ -6720,17 +6722,20 @@ __perf_event_exit_task(struct perf_event *child_event,
6720 struct perf_event_context *child_ctx, 6722 struct perf_event_context *child_ctx,
6721 struct task_struct *child) 6723 struct task_struct *child)
6722{ 6724{
6723 struct perf_event *parent_event; 6725 if (child_event->parent) {
6726 raw_spin_lock_irq(&child_ctx->lock);
6727 perf_group_detach(child_event);
6728 raw_spin_unlock_irq(&child_ctx->lock);
6729 }
6724 6730
6725 perf_remove_from_context(child_event); 6731 perf_remove_from_context(child_event);
6726 6732
6727 parent_event = child_event->parent;
6728 /* 6733 /*
6729 * It can happen that parent exits first, and has events 6734 * It can happen that the parent exits first, and has events
6730 * that are still around due to the child reference. These 6735 * that are still around due to the child reference. These
6731 * events need to be zapped - but otherwise linger. 6736 * events need to be zapped.
6732 */ 6737 */
6733 if (parent_event) { 6738 if (child_event->parent) {
6734 sync_child_event(child_event, child); 6739 sync_child_event(child_event, child);
6735 free_event(child_event); 6740 free_event(child_event);
6736 } 6741 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94e1f0b..e9c9adc84ca6 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -14,6 +14,7 @@
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/acct.h> 15#include <linux/acct.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h>
17 18
18#define BITS_PER_PAGE (PAGE_SIZE*8) 19#define BITS_PER_PAGE (PAGE_SIZE*8)
19 20
@@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
72{ 73{
73 struct pid_namespace *ns; 74 struct pid_namespace *ns;
74 unsigned int level = parent_pid_ns->level + 1; 75 unsigned int level = parent_pid_ns->level + 1;
75 int i; 76 int i, err = -ENOMEM;
76 77
77 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 78 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
78 if (ns == NULL) 79 if (ns == NULL)
@@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
96 for (i = 1; i < PIDMAP_ENTRIES; i++) 97 for (i = 1; i < PIDMAP_ENTRIES; i++)
97 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); 98 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
98 99
100 err = pid_ns_prepare_proc(ns);
101 if (err)
102 goto out_put_parent_pid_ns;
103
99 return ns; 104 return ns;
100 105
106out_put_parent_pid_ns:
107 put_pid_ns(parent_pid_ns);
101out_free_map: 108out_free_map:
102 kfree(ns->pidmap[0].page); 109 kfree(ns->pidmap[0].page);
103out_free: 110out_free:
104 kmem_cache_free(pid_ns_cachep, ns); 111 kmem_cache_free(pid_ns_cachep, ns);
105out: 112out:
106 return ERR_PTR(-ENOMEM); 113 return ERR_PTR(err);
107} 114}
108 115
109static void destroy_pid_namespace(struct pid_namespace *ns) 116static void destroy_pid_namespace(struct pid_namespace *ns)
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c350e18b53e3..c5ebc6a90643 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,4 +1,5 @@
1ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG 1
2ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG
2 3
3obj-$(CONFIG_PM) += main.o 4obj-$(CONFIG_PM) += main.o
4obj-$(CONFIG_PM_SLEEP) += console.o 5obj-$(CONFIG_PM_SLEEP) += console.o
diff --git a/kernel/printk.c b/kernel/printk.c
index 33284adb2189..da8ca817eae3 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -53,7 +53,7 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
53#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 53#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
54 54
55/* printk's without a loglevel use this.. */ 55/* printk's without a loglevel use this.. */
56#define DEFAULT_MESSAGE_LOGLEVEL 4 /* KERN_WARNING */ 56#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
57 57
58/* We show everything that is MORE important than this.. */ 58/* We show everything that is MORE important than this.. */
59#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ 59#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
@@ -113,6 +113,11 @@ static unsigned con_start; /* Index into log_buf: next char to be sent to consol
113static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */ 113static unsigned log_end; /* Index into log_buf: most-recently-written-char + 1 */
114 114
115/* 115/*
116 * If exclusive_console is non-NULL then only this console is to be printed to.
117 */
118static struct console *exclusive_console;
119
120/*
116 * Array of consoles built from command line options (console=) 121 * Array of consoles built from command line options (console=)
117 */ 122 */
118struct console_cmdline 123struct console_cmdline
@@ -476,6 +481,8 @@ static void __call_console_drivers(unsigned start, unsigned end)
476 struct console *con; 481 struct console *con;
477 482
478 for_each_console(con) { 483 for_each_console(con) {
484 if (exclusive_console && con != exclusive_console)
485 continue;
479 if ((con->flags & CON_ENABLED) && con->write && 486 if ((con->flags & CON_ENABLED) && con->write &&
480 (cpu_online(smp_processor_id()) || 487 (cpu_online(smp_processor_id()) ||
481 (con->flags & CON_ANYTIME))) 488 (con->flags & CON_ANYTIME)))
@@ -1230,6 +1237,11 @@ void console_unlock(void)
1230 local_irq_restore(flags); 1237 local_irq_restore(flags);
1231 } 1238 }
1232 console_locked = 0; 1239 console_locked = 0;
1240
1241 /* Release the exclusive_console once it is used */
1242 if (unlikely(exclusive_console))
1243 exclusive_console = NULL;
1244
1233 up(&console_sem); 1245 up(&console_sem);
1234 spin_unlock_irqrestore(&logbuf_lock, flags); 1246 spin_unlock_irqrestore(&logbuf_lock, flags);
1235 if (wake_klogd) 1247 if (wake_klogd)
@@ -1316,6 +1328,18 @@ void console_start(struct console *console)
1316} 1328}
1317EXPORT_SYMBOL(console_start); 1329EXPORT_SYMBOL(console_start);
1318 1330
1331static int __read_mostly keep_bootcon;
1332
1333static int __init keep_bootcon_setup(char *str)
1334{
1335 keep_bootcon = 1;
1336 printk(KERN_INFO "debug: skip boot console de-registration.\n");
1337
1338 return 0;
1339}
1340
1341early_param("keep_bootcon", keep_bootcon_setup);
1342
1319/* 1343/*
1320 * The console driver calls this routine during kernel initialization 1344 * The console driver calls this routine during kernel initialization
1321 * to register the console printing procedure with printk() and to 1345 * to register the console printing procedure with printk() and to
@@ -1452,6 +1476,12 @@ void register_console(struct console *newcon)
1452 spin_lock_irqsave(&logbuf_lock, flags); 1476 spin_lock_irqsave(&logbuf_lock, flags);
1453 con_start = log_start; 1477 con_start = log_start;
1454 spin_unlock_irqrestore(&logbuf_lock, flags); 1478 spin_unlock_irqrestore(&logbuf_lock, flags);
1479 /*
1480 * We're about to replay the log buffer. Only do this to the
1481 * just-registered console to avoid excessive message spam to
1482 * the already-registered consoles.
1483 */
1484 exclusive_console = newcon;
1455 } 1485 }
1456 console_unlock(); 1486 console_unlock();
1457 console_sysfs_notify(); 1487 console_sysfs_notify();
@@ -1463,7 +1493,9 @@ void register_console(struct console *newcon)
1463 * users know there might be something in the kernel's log buffer that 1493 * users know there might be something in the kernel's log buffer that
1464 * went to the bootconsole (that they do not see on the real console) 1494 * went to the bootconsole (that they do not see on the real console)
1465 */ 1495 */
1466 if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { 1496 if (bcon &&
1497 ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
1498 !keep_bootcon) {
1467 /* we need to iterate through twice, to make sure we print 1499 /* we need to iterate through twice, to make sure we print
1468 * everything out, before we unregister the console(s) 1500 * everything out, before we unregister the console(s)
1469 */ 1501 */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e2302e40b360..0fc1eed28d27 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -134,21 +134,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
134 return 0; 134 return 0;
135 rcu_read_lock(); 135 rcu_read_lock();
136 tcred = __task_cred(task); 136 tcred = __task_cred(task);
137 if ((cred->uid != tcred->euid || 137 if (cred->user->user_ns == tcred->user->user_ns &&
138 cred->uid != tcred->suid || 138 (cred->uid == tcred->euid &&
139 cred->uid != tcred->uid || 139 cred->uid == tcred->suid &&
140 cred->gid != tcred->egid || 140 cred->uid == tcred->uid &&
141 cred->gid != tcred->sgid || 141 cred->gid == tcred->egid &&
142 cred->gid != tcred->gid) && 142 cred->gid == tcred->sgid &&
143 !capable(CAP_SYS_PTRACE)) { 143 cred->gid == tcred->gid))
144 rcu_read_unlock(); 144 goto ok;
145 return -EPERM; 145 if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
146 } 146 goto ok;
147 rcu_read_unlock();
148 return -EPERM;
149ok:
147 rcu_read_unlock(); 150 rcu_read_unlock();
148 smp_rmb(); 151 smp_rmb();
149 if (task->mm) 152 if (task->mm)
150 dumpable = get_dumpable(task->mm); 153 dumpable = get_dumpable(task->mm);
151 if (!dumpable && !capable(CAP_SYS_PTRACE)) 154 if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
152 return -EPERM; 155 return -EPERM;
153 156
154 return security_ptrace_access_check(task, mode); 157 return security_ptrace_access_check(task, mode);
@@ -198,7 +201,7 @@ static int ptrace_attach(struct task_struct *task)
198 goto unlock_tasklist; 201 goto unlock_tasklist;
199 202
200 task->ptrace = PT_PTRACED; 203 task->ptrace = PT_PTRACED;
201 if (capable(CAP_SYS_PTRACE)) 204 if (task_ns_capable(task, CAP_SYS_PTRACE))
202 task->ptrace |= PT_PTRACE_CAP; 205 task->ptrace |= PT_PTRACE_CAP;
203 206
204 __ptrace_link(task, current); 207 __ptrace_link(task, current);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index c7eaa37a768b..34683efa2cce 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -126,10 +126,24 @@ ssize_t res_counter_read(struct res_counter *counter, int member,
126 pos, buf, s - buf); 126 pos, buf, s - buf);
127} 127}
128 128
129#if BITS_PER_LONG == 32
130u64 res_counter_read_u64(struct res_counter *counter, int member)
131{
132 unsigned long flags;
133 u64 ret;
134
135 spin_lock_irqsave(&counter->lock, flags);
136 ret = *res_counter_member(counter, member);
137 spin_unlock_irqrestore(&counter->lock, flags);
138
139 return ret;
140}
141#else
129u64 res_counter_read_u64(struct res_counter *counter, int member) 142u64 res_counter_read_u64(struct res_counter *counter, int member)
130{ 143{
131 return *res_counter_member(counter, member); 144 return *res_counter_member(counter, member);
132} 145}
146#endif
133 147
134int res_counter_memparse_write_strategy(const char *buf, 148int res_counter_memparse_write_strategy(const char *buf,
135 unsigned long long *res) 149 unsigned long long *res)
diff --git a/kernel/sched.c b/kernel/sched.c
index a172494a9a63..480adeb63f8f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4892,8 +4892,11 @@ static bool check_same_owner(struct task_struct *p)
4892 4892
4893 rcu_read_lock(); 4893 rcu_read_lock();
4894 pcred = __task_cred(p); 4894 pcred = __task_cred(p);
4895 match = (cred->euid == pcred->euid || 4895 if (cred->user->user_ns == pcred->user->user_ns)
4896 cred->euid == pcred->uid); 4896 match = (cred->euid == pcred->euid ||
4897 cred->euid == pcred->uid);
4898 else
4899 match = false;
4897 rcu_read_unlock(); 4900 rcu_read_unlock();
4898 return match; 4901 return match;
4899} 4902}
@@ -5221,7 +5224,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5221 goto out_free_cpus_allowed; 5224 goto out_free_cpus_allowed;
5222 } 5225 }
5223 retval = -EPERM; 5226 retval = -EPERM;
5224 if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) 5227 if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
5225 goto out_unlock; 5228 goto out_unlock;
5226 5229
5227 retval = security_task_setscheduler(p); 5230 retval = security_task_setscheduler(p);
diff --git a/kernel/signal.c b/kernel/signal.c
index 4e3cff10fdce..324eff5468ad 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -636,13 +636,33 @@ static inline bool si_fromuser(const struct siginfo *info)
636} 636}
637 637
638/* 638/*
639 * called with RCU read lock from check_kill_permission()
640 */
641static int kill_ok_by_cred(struct task_struct *t)
642{
643 const struct cred *cred = current_cred();
644 const struct cred *tcred = __task_cred(t);
645
646 if (cred->user->user_ns == tcred->user->user_ns &&
647 (cred->euid == tcred->suid ||
648 cred->euid == tcred->uid ||
649 cred->uid == tcred->suid ||
650 cred->uid == tcred->uid))
651 return 1;
652
653 if (ns_capable(tcred->user->user_ns, CAP_KILL))
654 return 1;
655
656 return 0;
657}
658
659/*
639 * Bad permissions for sending the signal 660 * Bad permissions for sending the signal
640 * - the caller must hold the RCU read lock 661 * - the caller must hold the RCU read lock
641 */ 662 */
642static int check_kill_permission(int sig, struct siginfo *info, 663static int check_kill_permission(int sig, struct siginfo *info,
643 struct task_struct *t) 664 struct task_struct *t)
644{ 665{
645 const struct cred *cred, *tcred;
646 struct pid *sid; 666 struct pid *sid;
647 int error; 667 int error;
648 668
@@ -656,14 +676,8 @@ static int check_kill_permission(int sig, struct siginfo *info,
656 if (error) 676 if (error)
657 return error; 677 return error;
658 678
659 cred = current_cred();
660 tcred = __task_cred(t);
661 if (!same_thread_group(current, t) && 679 if (!same_thread_group(current, t) &&
662 (cred->euid ^ tcred->suid) && 680 !kill_ok_by_cred(t)) {
663 (cred->euid ^ tcred->uid) &&
664 (cred->uid ^ tcred->suid) &&
665 (cred->uid ^ tcred->uid) &&
666 !capable(CAP_KILL)) {
667 switch (sig) { 681 switch (sig) {
668 case SIGCONT: 682 case SIGCONT:
669 sid = task_session(t); 683 sid = task_session(t);
@@ -2421,9 +2435,13 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
2421 return -EFAULT; 2435 return -EFAULT;
2422 2436
2423 /* Not even root can pretend to send signals from the kernel. 2437 /* Not even root can pretend to send signals from the kernel.
2424 Nor can they impersonate a kill(), which adds source info. */ 2438 * Nor can they impersonate a kill()/tgkill(), which adds source info.
2425 if (info.si_code >= 0) 2439 */
2440 if (info.si_code != SI_QUEUE) {
2441 /* We used to allow any < 0 si_code */
2442 WARN_ON_ONCE(info.si_code < 0);
2426 return -EPERM; 2443 return -EPERM;
2444 }
2427 info.si_signo = sig; 2445 info.si_signo = sig;
2428 2446
2429 /* POSIX.1b doesn't mention process groups. */ 2447 /* POSIX.1b doesn't mention process groups. */
@@ -2437,9 +2455,13 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
2437 return -EINVAL; 2455 return -EINVAL;
2438 2456
2439 /* Not even root can pretend to send signals from the kernel. 2457 /* Not even root can pretend to send signals from the kernel.
2440 Nor can they impersonate a kill(), which adds source info. */ 2458 * Nor can they impersonate a kill()/tgkill(), which adds source info.
2441 if (info->si_code >= 0) 2459 */
2460 if (info->si_code != SI_QUEUE) {
2461 /* We used to allow any < 0 si_code */
2462 WARN_ON_ONCE(info->si_code < 0);
2442 return -EPERM; 2463 return -EPERM;
2464 }
2443 info->si_signo = sig; 2465 info->si_signo = sig;
2444 2466
2445 return do_send_specific(tgid, pid, sig, info); 2467 return do_send_specific(tgid, pid, sig, info);
diff --git a/kernel/smp.c b/kernel/smp.c
index 7cbd0f293df4..73a195193558 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -604,6 +604,87 @@ void ipi_call_unlock_irq(void)
604} 604}
605#endif /* USE_GENERIC_SMP_HELPERS */ 605#endif /* USE_GENERIC_SMP_HELPERS */
606 606
607/* Setup configured maximum number of CPUs to activate */
608unsigned int setup_max_cpus = NR_CPUS;
609EXPORT_SYMBOL(setup_max_cpus);
610
611
612/*
613 * Setup routine for controlling SMP activation
614 *
615 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
616 * activation entirely (the MPS table probe still happens, though).
617 *
618 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
619 * greater than 0, limits the maximum number of CPUs activated in
620 * SMP mode to <NUM>.
621 */
622
623void __weak arch_disable_smp_support(void) { }
624
625static int __init nosmp(char *str)
626{
627 setup_max_cpus = 0;
628 arch_disable_smp_support();
629
630 return 0;
631}
632
633early_param("nosmp", nosmp);
634
635/* this is hard limit */
636static int __init nrcpus(char *str)
637{
638 int nr_cpus;
639
640 get_option(&str, &nr_cpus);
641 if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
642 nr_cpu_ids = nr_cpus;
643
644 return 0;
645}
646
647early_param("nr_cpus", nrcpus);
648
649static int __init maxcpus(char *str)
650{
651 get_option(&str, &setup_max_cpus);
652 if (setup_max_cpus == 0)
653 arch_disable_smp_support();
654
655 return 0;
656}
657
658early_param("maxcpus", maxcpus);
659
660/* Setup number of possible processor ids */
661int nr_cpu_ids __read_mostly = NR_CPUS;
662EXPORT_SYMBOL(nr_cpu_ids);
663
664/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
665void __init setup_nr_cpu_ids(void)
666{
667 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
668}
669
670/* Called by boot processor to activate the rest. */
671void __init smp_init(void)
672{
673 unsigned int cpu;
674
675 /* FIXME: This should be done in userspace --RR */
676 for_each_present_cpu(cpu) {
677 if (num_online_cpus() >= setup_max_cpus)
678 break;
679 if (!cpu_online(cpu))
680 cpu_up(cpu);
681 }
682
683 /* Any cleanup work */
684 printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
685 smp_cpus_done(setup_max_cpus);
686}
687
607/* 688/*
608 * Call a function on all processors. May be used during early boot while 689 * Call a function on all processors. May be used during early boot while
609 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead 690 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 56e5dec837f0..735d87095172 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -845,7 +845,10 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
845 switch (action) { 845 switch (action) {
846 case CPU_UP_PREPARE: 846 case CPU_UP_PREPARE:
847 case CPU_UP_PREPARE_FROZEN: 847 case CPU_UP_PREPARE_FROZEN:
848 p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); 848 p = kthread_create_on_node(run_ksoftirqd,
849 hcpu,
850 cpu_to_node(hotcpu),
851 "ksoftirqd/%d", hotcpu);
849 if (IS_ERR(p)) { 852 if (IS_ERR(p)) {
850 printk("ksoftirqd for %i failed\n", hotcpu); 853 printk("ksoftirqd for %i failed\n", hotcpu);
851 return notifier_from_errno(PTR_ERR(p)); 854 return notifier_from_errno(PTR_ERR(p));
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2df820b03beb..e3516b29076c 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,8 +301,10 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
301 case CPU_UP_PREPARE: 301 case CPU_UP_PREPARE:
302 BUG_ON(stopper->thread || stopper->enabled || 302 BUG_ON(stopper->thread || stopper->enabled ||
303 !list_empty(&stopper->works)); 303 !list_empty(&stopper->works));
304 p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", 304 p = kthread_create_on_node(cpu_stopper_thread,
305 cpu); 305 stopper,
306 cpu_to_node(cpu),
307 "migration/%d", cpu);
306 if (IS_ERR(p)) 308 if (IS_ERR(p))
307 return notifier_from_errno(PTR_ERR(p)); 309 return notifier_from_errno(PTR_ERR(p));
308 get_task_struct(p); 310 get_task_struct(p);
diff --git a/kernel/sys.c b/kernel/sys.c
index 1ad48b3b9068..af468edf096a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,16 +120,33 @@ EXPORT_SYMBOL(cad_pid);
120void (*pm_power_off_prepare)(void); 120void (*pm_power_off_prepare)(void);
121 121
122/* 122/*
123 * Returns true if current's euid is same as p's uid or euid,
124 * or has CAP_SYS_NICE to p's user_ns.
125 *
126 * Called with rcu_read_lock, creds are safe
127 */
128static bool set_one_prio_perm(struct task_struct *p)
129{
130 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
131
132 if (pcred->user->user_ns == cred->user->user_ns &&
133 (pcred->uid == cred->euid ||
134 pcred->euid == cred->euid))
135 return true;
136 if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE))
137 return true;
138 return false;
139}
140
141/*
123 * set the priority of a task 142 * set the priority of a task
124 * - the caller must hold the RCU read lock 143 * - the caller must hold the RCU read lock
125 */ 144 */
126static int set_one_prio(struct task_struct *p, int niceval, int error) 145static int set_one_prio(struct task_struct *p, int niceval, int error)
127{ 146{
128 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
129 int no_nice; 147 int no_nice;
130 148
131 if (pcred->uid != cred->euid && 149 if (!set_one_prio_perm(p)) {
132 pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
133 error = -EPERM; 150 error = -EPERM;
134 goto out; 151 goto out;
135 } 152 }
@@ -506,7 +523,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
506 if (rgid != (gid_t) -1) { 523 if (rgid != (gid_t) -1) {
507 if (old->gid == rgid || 524 if (old->gid == rgid ||
508 old->egid == rgid || 525 old->egid == rgid ||
509 capable(CAP_SETGID)) 526 nsown_capable(CAP_SETGID))
510 new->gid = rgid; 527 new->gid = rgid;
511 else 528 else
512 goto error; 529 goto error;
@@ -515,7 +532,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
515 if (old->gid == egid || 532 if (old->gid == egid ||
516 old->egid == egid || 533 old->egid == egid ||
517 old->sgid == egid || 534 old->sgid == egid ||
518 capable(CAP_SETGID)) 535 nsown_capable(CAP_SETGID))
519 new->egid = egid; 536 new->egid = egid;
520 else 537 else
521 goto error; 538 goto error;
@@ -550,7 +567,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
550 old = current_cred(); 567 old = current_cred();
551 568
552 retval = -EPERM; 569 retval = -EPERM;
553 if (capable(CAP_SETGID)) 570 if (nsown_capable(CAP_SETGID))
554 new->gid = new->egid = new->sgid = new->fsgid = gid; 571 new->gid = new->egid = new->sgid = new->fsgid = gid;
555 else if (gid == old->gid || gid == old->sgid) 572 else if (gid == old->gid || gid == old->sgid)
556 new->egid = new->fsgid = gid; 573 new->egid = new->fsgid = gid;
@@ -617,7 +634,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
617 new->uid = ruid; 634 new->uid = ruid;
618 if (old->uid != ruid && 635 if (old->uid != ruid &&
619 old->euid != ruid && 636 old->euid != ruid &&
620 !capable(CAP_SETUID)) 637 !nsown_capable(CAP_SETUID))
621 goto error; 638 goto error;
622 } 639 }
623 640
@@ -626,7 +643,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
626 if (old->uid != euid && 643 if (old->uid != euid &&
627 old->euid != euid && 644 old->euid != euid &&
628 old->suid != euid && 645 old->suid != euid &&
629 !capable(CAP_SETUID)) 646 !nsown_capable(CAP_SETUID))
630 goto error; 647 goto error;
631 } 648 }
632 649
@@ -674,7 +691,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
674 old = current_cred(); 691 old = current_cred();
675 692
676 retval = -EPERM; 693 retval = -EPERM;
677 if (capable(CAP_SETUID)) { 694 if (nsown_capable(CAP_SETUID)) {
678 new->suid = new->uid = uid; 695 new->suid = new->uid = uid;
679 if (uid != old->uid) { 696 if (uid != old->uid) {
680 retval = set_user(new); 697 retval = set_user(new);
@@ -716,7 +733,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
716 old = current_cred(); 733 old = current_cred();
717 734
718 retval = -EPERM; 735 retval = -EPERM;
719 if (!capable(CAP_SETUID)) { 736 if (!nsown_capable(CAP_SETUID)) {
720 if (ruid != (uid_t) -1 && ruid != old->uid && 737 if (ruid != (uid_t) -1 && ruid != old->uid &&
721 ruid != old->euid && ruid != old->suid) 738 ruid != old->euid && ruid != old->suid)
722 goto error; 739 goto error;
@@ -780,7 +797,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
780 old = current_cred(); 797 old = current_cred();
781 798
782 retval = -EPERM; 799 retval = -EPERM;
783 if (!capable(CAP_SETGID)) { 800 if (!nsown_capable(CAP_SETGID)) {
784 if (rgid != (gid_t) -1 && rgid != old->gid && 801 if (rgid != (gid_t) -1 && rgid != old->gid &&
785 rgid != old->egid && rgid != old->sgid) 802 rgid != old->egid && rgid != old->sgid)
786 goto error; 803 goto error;
@@ -840,7 +857,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
840 857
841 if (uid == old->uid || uid == old->euid || 858 if (uid == old->uid || uid == old->euid ||
842 uid == old->suid || uid == old->fsuid || 859 uid == old->suid || uid == old->fsuid ||
843 capable(CAP_SETUID)) { 860 nsown_capable(CAP_SETUID)) {
844 if (uid != old_fsuid) { 861 if (uid != old_fsuid) {
845 new->fsuid = uid; 862 new->fsuid = uid;
846 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 863 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -873,7 +890,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
873 890
874 if (gid == old->gid || gid == old->egid || 891 if (gid == old->gid || gid == old->egid ||
875 gid == old->sgid || gid == old->fsgid || 892 gid == old->sgid || gid == old->fsgid ||
876 capable(CAP_SETGID)) { 893 nsown_capable(CAP_SETGID)) {
877 if (gid != old_fsgid) { 894 if (gid != old_fsgid) {
878 new->fsgid = gid; 895 new->fsgid = gid;
879 goto change_okay; 896 goto change_okay;
@@ -1181,8 +1198,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1181 int errno; 1198 int errno;
1182 char tmp[__NEW_UTS_LEN]; 1199 char tmp[__NEW_UTS_LEN];
1183 1200
1184 if (!capable(CAP_SYS_ADMIN)) 1201 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1185 return -EPERM; 1202 return -EPERM;
1203
1186 if (len < 0 || len > __NEW_UTS_LEN) 1204 if (len < 0 || len > __NEW_UTS_LEN)
1187 return -EINVAL; 1205 return -EINVAL;
1188 down_write(&uts_sem); 1206 down_write(&uts_sem);
@@ -1230,7 +1248,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1230 int errno; 1248 int errno;
1231 char tmp[__NEW_UTS_LEN]; 1249 char tmp[__NEW_UTS_LEN];
1232 1250
1233 if (!capable(CAP_SYS_ADMIN)) 1251 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1234 return -EPERM; 1252 return -EPERM;
1235 if (len < 0 || len > __NEW_UTS_LEN) 1253 if (len < 0 || len > __NEW_UTS_LEN)
1236 return -EINVAL; 1254 return -EINVAL;
@@ -1345,6 +1363,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource,
1345 rlim = tsk->signal->rlim + resource; 1363 rlim = tsk->signal->rlim + resource;
1346 task_lock(tsk->group_leader); 1364 task_lock(tsk->group_leader);
1347 if (new_rlim) { 1365 if (new_rlim) {
1366 /* Keep the capable check against init_user_ns until
1367 cgroups can contain all limits */
1348 if (new_rlim->rlim_max > rlim->rlim_max && 1368 if (new_rlim->rlim_max > rlim->rlim_max &&
1349 !capable(CAP_SYS_RESOURCE)) 1369 !capable(CAP_SYS_RESOURCE))
1350 retval = -EPERM; 1370 retval = -EPERM;
@@ -1388,19 +1408,22 @@ static int check_prlimit_permission(struct task_struct *task)
1388{ 1408{
1389 const struct cred *cred = current_cred(), *tcred; 1409 const struct cred *cred = current_cred(), *tcred;
1390 1410
1391 tcred = __task_cred(task); 1411 if (current == task)
1392 if (current != task && 1412 return 0;
1393 (cred->uid != tcred->euid ||
1394 cred->uid != tcred->suid ||
1395 cred->uid != tcred->uid ||
1396 cred->gid != tcred->egid ||
1397 cred->gid != tcred->sgid ||
1398 cred->gid != tcred->gid) &&
1399 !capable(CAP_SYS_RESOURCE)) {
1400 return -EPERM;
1401 }
1402 1413
1403 return 0; 1414 tcred = __task_cred(task);
1415 if (cred->user->user_ns == tcred->user->user_ns &&
1416 (cred->uid == tcred->euid &&
1417 cred->uid == tcred->suid &&
1418 cred->uid == tcred->uid &&
1419 cred->gid == tcred->egid &&
1420 cred->gid == tcred->sgid &&
1421 cred->gid == tcred->gid))
1422 return 0;
1423 if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
1424 return 0;
1425
1426 return -EPERM;
1404} 1427}
1405 1428
1406SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1429SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 40245d697602..c0bb32414b17 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -117,6 +117,7 @@ static int neg_one = -1;
117static int zero; 117static int zero;
118static int __maybe_unused one = 1; 118static int __maybe_unused one = 1;
119static int __maybe_unused two = 2; 119static int __maybe_unused two = 2;
120static int __maybe_unused three = 3;
120static unsigned long one_ul = 1; 121static unsigned long one_ul = 1;
121static int one_hundred = 100; 122static int one_hundred = 100;
122#ifdef CONFIG_PRINTK 123#ifdef CONFIG_PRINTK
@@ -169,6 +170,11 @@ static int proc_taint(struct ctl_table *table, int write,
169 void __user *buffer, size_t *lenp, loff_t *ppos); 170 void __user *buffer, size_t *lenp, loff_t *ppos);
170#endif 171#endif
171 172
173#ifdef CONFIG_PRINTK
174static int proc_dmesg_restrict(struct ctl_table *table, int write,
175 void __user *buffer, size_t *lenp, loff_t *ppos);
176#endif
177
172#ifdef CONFIG_MAGIC_SYSRQ 178#ifdef CONFIG_MAGIC_SYSRQ
173/* Note: sysrq code uses it's own private copy */ 179/* Note: sysrq code uses it's own private copy */
174static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; 180static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
@@ -706,7 +712,7 @@ static struct ctl_table kern_table[] = {
706 .data = &kptr_restrict, 712 .data = &kptr_restrict,
707 .maxlen = sizeof(int), 713 .maxlen = sizeof(int),
708 .mode = 0644, 714 .mode = 0644,
709 .proc_handler = proc_dointvec_minmax, 715 .proc_handler = proc_dmesg_restrict,
710 .extra1 = &zero, 716 .extra1 = &zero,
711 .extra2 = &two, 717 .extra2 = &two,
712 }, 718 },
@@ -971,14 +977,18 @@ static struct ctl_table vm_table[] = {
971 .data = &sysctl_overcommit_memory, 977 .data = &sysctl_overcommit_memory,
972 .maxlen = sizeof(sysctl_overcommit_memory), 978 .maxlen = sizeof(sysctl_overcommit_memory),
973 .mode = 0644, 979 .mode = 0644,
974 .proc_handler = proc_dointvec, 980 .proc_handler = proc_dointvec_minmax,
981 .extra1 = &zero,
982 .extra2 = &two,
975 }, 983 },
976 { 984 {
977 .procname = "panic_on_oom", 985 .procname = "panic_on_oom",
978 .data = &sysctl_panic_on_oom, 986 .data = &sysctl_panic_on_oom,
979 .maxlen = sizeof(sysctl_panic_on_oom), 987 .maxlen = sizeof(sysctl_panic_on_oom),
980 .mode = 0644, 988 .mode = 0644,
981 .proc_handler = proc_dointvec, 989 .proc_handler = proc_dointvec_minmax,
990 .extra1 = &zero,
991 .extra2 = &two,
982 }, 992 },
983 { 993 {
984 .procname = "oom_kill_allocating_task", 994 .procname = "oom_kill_allocating_task",
@@ -1006,7 +1016,8 @@ static struct ctl_table vm_table[] = {
1006 .data = &page_cluster, 1016 .data = &page_cluster,
1007 .maxlen = sizeof(int), 1017 .maxlen = sizeof(int),
1008 .mode = 0644, 1018 .mode = 0644,
1009 .proc_handler = proc_dointvec, 1019 .proc_handler = proc_dointvec_minmax,
1020 .extra1 = &zero,
1010 }, 1021 },
1011 { 1022 {
1012 .procname = "dirty_background_ratio", 1023 .procname = "dirty_background_ratio",
@@ -1054,7 +1065,8 @@ static struct ctl_table vm_table[] = {
1054 .data = &dirty_expire_interval, 1065 .data = &dirty_expire_interval,
1055 .maxlen = sizeof(dirty_expire_interval), 1066 .maxlen = sizeof(dirty_expire_interval),
1056 .mode = 0644, 1067 .mode = 0644,
1057 .proc_handler = proc_dointvec, 1068 .proc_handler = proc_dointvec_minmax,
1069 .extra1 = &zero,
1058 }, 1070 },
1059 { 1071 {
1060 .procname = "nr_pdflush_threads", 1072 .procname = "nr_pdflush_threads",
@@ -1130,6 +1142,8 @@ static struct ctl_table vm_table[] = {
1130 .maxlen = sizeof(int), 1142 .maxlen = sizeof(int),
1131 .mode = 0644, 1143 .mode = 0644,
1132 .proc_handler = drop_caches_sysctl_handler, 1144 .proc_handler = drop_caches_sysctl_handler,
1145 .extra1 = &one,
1146 .extra2 = &three,
1133 }, 1147 },
1134#ifdef CONFIG_COMPACTION 1148#ifdef CONFIG_COMPACTION
1135 { 1149 {
@@ -2385,6 +2399,17 @@ static int proc_taint(struct ctl_table *table, int write,
2385 return err; 2399 return err;
2386} 2400}
2387 2401
2402#ifdef CONFIG_PRINTK
2403static int proc_dmesg_restrict(struct ctl_table *table, int write,
2404 void __user *buffer, size_t *lenp, loff_t *ppos)
2405{
2406 if (write && !capable(CAP_SYS_ADMIN))
2407 return -EPERM;
2408
2409 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2410}
2411#endif
2412
2388struct do_proc_dointvec_minmax_conv_param { 2413struct do_proc_dointvec_minmax_conv_param {
2389 int *min; 2414 int *min;
2390 int *max; 2415 int *max;
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 10b90d8a03c4..4e4932a7b360 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -111,11 +111,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
111 const char *fail = NULL; 111 const char *fail = NULL;
112 112
113 if (table->parent) { 113 if (table->parent) {
114 if (table->procname && !table->parent->procname) 114 if (!table->parent->procname)
115 set_fail(&fail, table, "Parent without procname"); 115 set_fail(&fail, table, "Parent without procname");
116 } 116 }
117 if (!table->procname)
118 set_fail(&fail, table, "No procname");
119 if (table->child) { 117 if (table->child) {
120 if (table->data) 118 if (table->data)
121 set_fail(&fail, table, "Directory with data?"); 119 set_fail(&fail, table, "Directory with data?");
@@ -144,13 +142,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
144 set_fail(&fail, table, "No maxlen"); 142 set_fail(&fail, table, "No maxlen");
145 } 143 }
146#ifdef CONFIG_PROC_SYSCTL 144#ifdef CONFIG_PROC_SYSCTL
147 if (table->procname && !table->proc_handler) 145 if (!table->proc_handler)
148 set_fail(&fail, table, "No proc_handler"); 146 set_fail(&fail, table, "No proc_handler");
149#endif 147#endif
150#if 0
151 if (!table->procname && table->proc_handler)
152 set_fail(&fail, table, "proc_handler without procname");
153#endif
154 sysctl_check_leaf(namespaces, table, &fail); 148 sysctl_check_leaf(namespaces, table, &fail);
155 } 149 }
156 if (table->mode > 0777) 150 if (table->mode > 0777)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 3971c6b9d58d..9ffea360a778 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -685,7 +685,7 @@ static int __init taskstats_init(void)
685 goto err_cgroup_ops; 685 goto err_cgroup_ops;
686 686
687 family_registered = 1; 687 family_registered = 1;
688 printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); 688 pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
689 return 0; 689 return 0;
690err_cgroup_ops: 690err_cgroup_ops:
691 genl_unregister_ops(&family, &taskstats_ops); 691 genl_unregister_ops(&family, &taskstats_ops);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 14674dce77a6..61d7d59f4a1a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -275,7 +275,7 @@ config PROFILE_ANNOTATED_BRANCHES
275 This tracer profiles all the the likely and unlikely macros 275 This tracer profiles all the the likely and unlikely macros
276 in the kernel. It will display the results in: 276 in the kernel. It will display the results in:
277 277
278 /sys/kernel/debug/tracing/profile_annotated_branch 278 /sys/kernel/debug/tracing/trace_stat/branch_annotated
279 279
280 Note: this will add a significant overhead; only turn this 280 Note: this will add a significant overhead; only turn this
281 on if you need to profile the system's use of these macros. 281 on if you need to profile the system's use of these macros.
@@ -288,7 +288,7 @@ config PROFILE_ALL_BRANCHES
288 taken in the kernel is recorded whether it hit or miss. 288 taken in the kernel is recorded whether it hit or miss.
289 The results will be displayed in: 289 The results will be displayed in:
290 290
291 /sys/kernel/debug/tracing/profile_branch 291 /sys/kernel/debug/tracing/trace_stat/branch_all
292 292
293 This option also enables the likely/unlikely profiler. 293 This option also enables the likely/unlikely profiler.
294 294
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index db7b439d23ee..d9c8bcafb120 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -668,7 +668,7 @@ static struct list_head *rb_list_head(struct list_head *list)
668 * the reader page). But if the next page is a header page, 668 * the reader page). But if the next page is a header page,
669 * its flags will be non zero. 669 * its flags will be non zero.
670 */ 670 */
671static int inline 671static inline int
672rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer, 672rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
673 struct buffer_page *page, struct list_head *list) 673 struct buffer_page *page, struct list_head *list)
674{ 674{
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 3249b4f77ef0..8008ddcfbf20 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -391,8 +391,8 @@ static int process_ops(struct filter_pred *preds,
391 struct filter_pred *op, void *rec) 391 struct filter_pred *op, void *rec)
392{ 392{
393 struct filter_pred *pred; 393 struct filter_pred *pred;
394 int match = 0;
394 int type; 395 int type;
395 int match;
396 int i; 396 int i;
397 397
398 /* 398 /*
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 419209893d87..51c6e89e8619 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
189 struct group_info *group_info; 189 struct group_info *group_info;
190 int retval; 190 int retval;
191 191
192 if (!capable(CAP_SETGID)) 192 if (!nsown_capable(CAP_SETGID))
193 return -EPERM; 193 return -EPERM;
194 if ((unsigned)gidsetsize > NGROUPS_MAX) 194 if ((unsigned)gidsetsize > NGROUPS_MAX)
195 return -EINVAL; 195 return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 5c598ca781df..9e03e9c1df8d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -17,9 +17,13 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
19 19
20/*
21 * userns count is 1 for root user, 1 for init_uts_ns,
22 * and 1 for... ?
23 */
20struct user_namespace init_user_ns = { 24struct user_namespace init_user_ns = {
21 .kref = { 25 .kref = {
22 .refcount = ATOMIC_INIT(2), 26 .refcount = ATOMIC_INIT(3),
23 }, 27 },
24 .creator = &root_user, 28 .creator = &root_user,
25}; 29};
@@ -47,7 +51,7 @@ static struct kmem_cache *uid_cachep;
47 */ 51 */
48static DEFINE_SPINLOCK(uidhash_lock); 52static DEFINE_SPINLOCK(uidhash_lock);
49 53
50/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ 54/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */
51struct user_struct root_user = { 55struct user_struct root_user = {
52 .__count = ATOMIC_INIT(2), 56 .__count = ATOMIC_INIT(2),
53 .processes = ATOMIC_INIT(1), 57 .processes = ATOMIC_INIT(1),
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 8a82b4b8ea52..44646179eaba 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -14,6 +14,7 @@
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/user_namespace.h>
17 18
18static struct uts_namespace *create_uts_ns(void) 19static struct uts_namespace *create_uts_ns(void)
19{ 20{
@@ -30,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void)
30 * @old_ns: namespace to clone 31 * @old_ns: namespace to clone
31 * Return NULL on error (failure to kmalloc), new ns otherwise 32 * Return NULL on error (failure to kmalloc), new ns otherwise
32 */ 33 */
33static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) 34static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
35 struct uts_namespace *old_ns)
34{ 36{
35 struct uts_namespace *ns; 37 struct uts_namespace *ns;
36 38
@@ -40,6 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
40 42
41 down_read(&uts_sem); 43 down_read(&uts_sem);
42 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 44 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
45 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
43 up_read(&uts_sem); 46 up_read(&uts_sem);
44 return ns; 47 return ns;
45} 48}
@@ -50,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
50 * utsname of this process won't be seen by parent, and vice 53 * utsname of this process won't be seen by parent, and vice
51 * versa. 54 * versa.
52 */ 55 */
53struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) 56struct uts_namespace *copy_utsname(unsigned long flags,
57 struct task_struct *tsk)
54{ 58{
59 struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
55 struct uts_namespace *new_ns; 60 struct uts_namespace *new_ns;
56 61
57 BUG_ON(!old_ns); 62 BUG_ON(!old_ns);
@@ -60,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol
60 if (!(flags & CLONE_NEWUTS)) 65 if (!(flags & CLONE_NEWUTS))
61 return old_ns; 66 return old_ns;
62 67
63 new_ns = clone_uts_ns(old_ns); 68 new_ns = clone_uts_ns(tsk, old_ns);
64 69
65 put_uts_ns(old_ns); 70 put_uts_ns(old_ns);
66 return new_ns; 71 return new_ns;
@@ -71,5 +76,6 @@ void free_uts_ns(struct kref *kref)
71 struct uts_namespace *ns; 76 struct uts_namespace *ns;
72 77
73 ns = container_of(kref, struct uts_namespace, kref); 78 ns = container_of(kref, struct uts_namespace, kref);
79 put_user_ns(ns->user_ns);
74 kfree(ns); 80 kfree(ns);
75} 81}
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 18bb15776c57..140dce750450 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -48,12 +48,15 @@ static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
48 * Should we panic when a soft-lockup or hard-lockup occurs: 48 * Should we panic when a soft-lockup or hard-lockup occurs:
49 */ 49 */
50#ifdef CONFIG_HARDLOCKUP_DETECTOR 50#ifdef CONFIG_HARDLOCKUP_DETECTOR
51static int hardlockup_panic; 51static int hardlockup_panic =
52 CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
52 53
53static int __init hardlockup_panic_setup(char *str) 54static int __init hardlockup_panic_setup(char *str)
54{ 55{
55 if (!strncmp(str, "panic", 5)) 56 if (!strncmp(str, "panic", 5))
56 hardlockup_panic = 1; 57 hardlockup_panic = 1;
58 else if (!strncmp(str, "nopanic", 7))
59 hardlockup_panic = 0;
57 else if (!strncmp(str, "0", 1)) 60 else if (!strncmp(str, "0", 1))
58 watchdog_enabled = 0; 61 watchdog_enabled = 0;
59 return 1; 62 return 1;
@@ -415,19 +418,22 @@ static int watchdog_prepare_cpu(int cpu)
415static int watchdog_enable(int cpu) 418static int watchdog_enable(int cpu)
416{ 419{
417 struct task_struct *p = per_cpu(softlockup_watchdog, cpu); 420 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
418 int err; 421 int err = 0;
419 422
420 /* enable the perf event */ 423 /* enable the perf event */
421 err = watchdog_nmi_enable(cpu); 424 err = watchdog_nmi_enable(cpu);
422 if (err) 425
423 return err; 426 /* Regardless of err above, fall through and start softlockup */
424 427
425 /* create the watchdog thread */ 428 /* create the watchdog thread */
426 if (!p) { 429 if (!p) {
427 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); 430 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
428 if (IS_ERR(p)) { 431 if (IS_ERR(p)) {
429 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); 432 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
430 return PTR_ERR(p); 433 if (!err)
434 /* if hardlockup hasn't already set this */
435 err = PTR_ERR(p);
436 goto out;
431 } 437 }
432 kthread_bind(p, cpu); 438 kthread_bind(p, cpu);
433 per_cpu(watchdog_touch_ts, cpu) = 0; 439 per_cpu(watchdog_touch_ts, cpu) = 0;
@@ -435,7 +441,8 @@ static int watchdog_enable(int cpu)
435 wake_up_process(p); 441 wake_up_process(p);
436 } 442 }
437 443
438 return 0; 444out:
445 return err;
439} 446}
440 447
441static void watchdog_disable(int cpu) 448static void watchdog_disable(int cpu)
@@ -547,7 +554,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
547 break; 554 break;
548#endif /* CONFIG_HOTPLUG_CPU */ 555#endif /* CONFIG_HOTPLUG_CPU */
549 } 556 }
550 return notifier_from_errno(err); 557
558 /*
559 * hardlockup and softlockup are not important enough
560 * to block cpu bring up. Just always succeed and
561 * rely on printk output to flag problems.
562 */
563 return NOTIFY_OK;
551} 564}
552 565
553static struct notifier_block __cpuinitdata cpu_nfb = { 566static struct notifier_block __cpuinitdata cpu_nfb = {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5ca7ce9ce754..04ef830690ec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1366,8 +1366,10 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
1366 worker->id = id; 1366 worker->id = id;
1367 1367
1368 if (!on_unbound_cpu) 1368 if (!on_unbound_cpu)
1369 worker->task = kthread_create(worker_thread, worker, 1369 worker->task = kthread_create_on_node(worker_thread,
1370 "kworker/%u:%d", gcwq->cpu, id); 1370 worker,
1371 cpu_to_node(gcwq->cpu),
1372 "kworker/%u:%d", gcwq->cpu, id);
1371 else 1373 else
1372 worker->task = kthread_create(worker_thread, worker, 1374 worker->task = kthread_create(worker_thread, worker,
1373 "kworker/u:%d", id); 1375 "kworker/u:%d", id);