aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/capability.c96
-rw-r--r--kernel/cpuset.c80
-rw-r--r--kernel/crash_dump.c34
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/debug/gdbstub.c30
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/futex.c15
-rw-r--r--kernel/futex_compat.c11
-rw-r--r--kernel/groups.c2
-rw-r--r--kernel/kallsyms.c48
-rw-r--r--kernel/lockdep_proc.c9
-rw-r--r--kernel/nsproxy.c4
-rw-r--r--kernel/perf_event.c15
-rw-r--r--kernel/pid_namespace.c11
-rw-r--r--kernel/power/block_io.c2
-rw-r--r--kernel/ptrace.c27
-rw-r--r--kernel/res_counter.c14
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_stoptask.c2
-rw-r--r--kernel/signal.c30
-rw-r--r--kernel/sys.c77
-rw-r--r--kernel/sysctl.c35
-rw-r--r--kernel/sysctl_check.c10
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/trace/blktrace.c15
-rw-r--r--kernel/trace/ftrace.c3
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/user.c8
-rw-r--r--kernel/utsname.c12
33 files changed, 462 insertions, 181 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 353d3fe8ba33..85cbfb31e73e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
107obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 107obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
108obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o 108obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
109obj-$(CONFIG_PADATA) += padata.o 109obj-$(CONFIG_PADATA) += padata.o
110obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
110 111
111ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 112ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
112# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 113# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 98a51f26c136..0c9b862292b2 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,11 +9,13 @@
9#include <linux/page-flags.h> 9#include <linux/page-flags.h>
10#include <linux/mmzone.h> 10#include <linux/mmzone.h>
11#include <linux/kbuild.h> 11#include <linux/kbuild.h>
12#include <linux/page_cgroup.h>
12 13
13void foo(void) 14void foo(void)
14{ 15{
15 /* The enum constants to put into include/generated/bounds.h */ 16 /* The enum constants to put into include/generated/bounds.h */
16 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); 17 DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
17 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); 18 DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
19 DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
18 /* End of constants */ 20 /* End of constants */
19} 21}
diff --git a/kernel/capability.c b/kernel/capability.c
index 9e9385f132c8..bf0c734d0c12 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -14,6 +14,7 @@
14#include <linux/security.h> 14#include <linux/security.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/pid_namespace.h> 16#include <linux/pid_namespace.h>
17#include <linux/user_namespace.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
18 19
19/* 20/*
@@ -290,6 +291,60 @@ error:
290} 291}
291 292
292/** 293/**
294 * has_capability - Does a task have a capability in init_user_ns
295 * @t: The task in question
296 * @cap: The capability to be tested for
297 *
298 * Return true if the specified task has the given superior capability
299 * currently in effect to the initial user namespace, false if not.
300 *
301 * Note that this does not set PF_SUPERPRIV on the task.
302 */
303bool has_capability(struct task_struct *t, int cap)
304{
305 int ret = security_real_capable(t, &init_user_ns, cap);
306
307 return (ret == 0);
308}
309
310/**
311 * has_capability - Does a task have a capability in a specific user ns
312 * @t: The task in question
313 * @ns: target user namespace
314 * @cap: The capability to be tested for
315 *
316 * Return true if the specified task has the given superior capability
317 * currently in effect to the specified user namespace, false if not.
318 *
319 * Note that this does not set PF_SUPERPRIV on the task.
320 */
321bool has_ns_capability(struct task_struct *t,
322 struct user_namespace *ns, int cap)
323{
324 int ret = security_real_capable(t, ns, cap);
325
326 return (ret == 0);
327}
328
329/**
330 * has_capability_noaudit - Does a task have a capability (unaudited)
331 * @t: The task in question
332 * @cap: The capability to be tested for
333 *
334 * Return true if the specified task has the given superior capability
335 * currently in effect to init_user_ns, false if not. Don't write an
336 * audit message for the check.
337 *
338 * Note that this does not set PF_SUPERPRIV on the task.
339 */
340bool has_capability_noaudit(struct task_struct *t, int cap)
341{
342 int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
343
344 return (ret == 0);
345}
346
347/**
293 * capable - Determine if the current task has a superior capability in effect 348 * capable - Determine if the current task has a superior capability in effect
294 * @cap: The capability to be tested for 349 * @cap: The capability to be tested for
295 * 350 *
@@ -299,17 +354,48 @@ error:
299 * This sets PF_SUPERPRIV on the task if the capability is available on the 354 * This sets PF_SUPERPRIV on the task if the capability is available on the
300 * assumption that it's about to be used. 355 * assumption that it's about to be used.
301 */ 356 */
302int capable(int cap) 357bool capable(int cap)
358{
359 return ns_capable(&init_user_ns, cap);
360}
361EXPORT_SYMBOL(capable);
362
363/**
364 * ns_capable - Determine if the current task has a superior capability in effect
365 * @ns: The usernamespace we want the capability in
366 * @cap: The capability to be tested for
367 *
368 * Return true if the current task has the given superior capability currently
369 * available for use, false if not.
370 *
371 * This sets PF_SUPERPRIV on the task if the capability is available on the
372 * assumption that it's about to be used.
373 */
374bool ns_capable(struct user_namespace *ns, int cap)
303{ 375{
304 if (unlikely(!cap_valid(cap))) { 376 if (unlikely(!cap_valid(cap))) {
305 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap); 377 printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
306 BUG(); 378 BUG();
307 } 379 }
308 380
309 if (security_capable(current_cred(), cap) == 0) { 381 if (security_capable(ns, current_cred(), cap) == 0) {
310 current->flags |= PF_SUPERPRIV; 382 current->flags |= PF_SUPERPRIV;
311 return 1; 383 return true;
312 } 384 }
313 return 0; 385 return false;
314} 386}
315EXPORT_SYMBOL(capable); 387EXPORT_SYMBOL(ns_capable);
388
389/**
390 * task_ns_capable - Determine whether current task has a superior
391 * capability targeted at a specific task's user namespace.
392 * @t: The task whose user namespace is targeted.
393 * @cap: The capability in question.
394 *
395 * Return true if it does, false otherwise.
396 */
397bool task_ns_capable(struct task_struct *t, int cap)
398{
399 return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
400}
401EXPORT_SYMBOL(task_ns_capable);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e92e98189032..33eee16addb8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
1015 struct cpuset *cs; 1015 struct cpuset *cs;
1016 int migrate; 1016 int migrate;
1017 const nodemask_t *oldmem = scan->data; 1017 const nodemask_t *oldmem = scan->data;
1018 NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL); 1018 static nodemask_t newmems; /* protected by cgroup_mutex */
1019
1020 if (!newmems)
1021 return;
1022 1019
1023 cs = cgroup_cs(scan->cg); 1020 cs = cgroup_cs(scan->cg);
1024 guarantee_online_mems(cs, newmems); 1021 guarantee_online_mems(cs, &newmems);
1025
1026 cpuset_change_task_nodemask(p, newmems);
1027 1022
1028 NODEMASK_FREE(newmems); 1023 cpuset_change_task_nodemask(p, &newmems);
1029 1024
1030 mm = get_task_mm(p); 1025 mm = get_task_mm(p);
1031 if (!mm) 1026 if (!mm)
@@ -1438,44 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1438 struct mm_struct *mm; 1433 struct mm_struct *mm;
1439 struct cpuset *cs = cgroup_cs(cont); 1434 struct cpuset *cs = cgroup_cs(cont);
1440 struct cpuset *oldcs = cgroup_cs(oldcont); 1435 struct cpuset *oldcs = cgroup_cs(oldcont);
1441 NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL); 1436 static nodemask_t to; /* protected by cgroup_mutex */
1442 NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
1443
1444 if (from == NULL || to == NULL)
1445 goto alloc_fail;
1446 1437
1447 if (cs == &top_cpuset) { 1438 if (cs == &top_cpuset) {
1448 cpumask_copy(cpus_attach, cpu_possible_mask); 1439 cpumask_copy(cpus_attach, cpu_possible_mask);
1449 } else { 1440 } else {
1450 guarantee_online_cpus(cs, cpus_attach); 1441 guarantee_online_cpus(cs, cpus_attach);
1451 } 1442 }
1452 guarantee_online_mems(cs, to); 1443 guarantee_online_mems(cs, &to);
1453 1444
1454 /* do per-task migration stuff possibly for each in the threadgroup */ 1445 /* do per-task migration stuff possibly for each in the threadgroup */
1455 cpuset_attach_task(tsk, to, cs); 1446 cpuset_attach_task(tsk, &to, cs);
1456 if (threadgroup) { 1447 if (threadgroup) {
1457 struct task_struct *c; 1448 struct task_struct *c;
1458 rcu_read_lock(); 1449 rcu_read_lock();
1459 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { 1450 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1460 cpuset_attach_task(c, to, cs); 1451 cpuset_attach_task(c, &to, cs);
1461 } 1452 }
1462 rcu_read_unlock(); 1453 rcu_read_unlock();
1463 } 1454 }
1464 1455
1465 /* change mm; only needs to be done once even if threadgroup */ 1456 /* change mm; only needs to be done once even if threadgroup */
1466 *from = oldcs->mems_allowed; 1457 to = cs->mems_allowed;
1467 *to = cs->mems_allowed;
1468 mm = get_task_mm(tsk); 1458 mm = get_task_mm(tsk);
1469 if (mm) { 1459 if (mm) {
1470 mpol_rebind_mm(mm, to); 1460 mpol_rebind_mm(mm, &to);
1471 if (is_memory_migrate(cs)) 1461 if (is_memory_migrate(cs))
1472 cpuset_migrate_mm(mm, from, to); 1462 cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
1473 mmput(mm); 1463 mmput(mm);
1474 } 1464 }
1475
1476alloc_fail:
1477 NODEMASK_FREE(from);
1478 NODEMASK_FREE(to);
1479} 1465}
1480 1466
1481/* The various types of files and directories in a cpuset file system */ 1467/* The various types of files and directories in a cpuset file system */
@@ -1610,34 +1596,26 @@ out:
1610 * across a page fault. 1596 * across a page fault.
1611 */ 1597 */
1612 1598
1613static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) 1599static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1614{ 1600{
1615 int ret; 1601 size_t count;
1616 1602
1617 mutex_lock(&callback_mutex); 1603 mutex_lock(&callback_mutex);
1618 ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); 1604 count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
1619 mutex_unlock(&callback_mutex); 1605 mutex_unlock(&callback_mutex);
1620 1606
1621 return ret; 1607 return count;
1622} 1608}
1623 1609
1624static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1610static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1625{ 1611{
1626 NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL); 1612 size_t count;
1627 int retval;
1628
1629 if (mask == NULL)
1630 return -ENOMEM;
1631 1613
1632 mutex_lock(&callback_mutex); 1614 mutex_lock(&callback_mutex);
1633 *mask = cs->mems_allowed; 1615 count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
1634 mutex_unlock(&callback_mutex); 1616 mutex_unlock(&callback_mutex);
1635 1617
1636 retval = nodelist_scnprintf(page, PAGE_SIZE, *mask); 1618 return count;
1637
1638 NODEMASK_FREE(mask);
1639
1640 return retval;
1641} 1619}
1642 1620
1643static ssize_t cpuset_common_file_read(struct cgroup *cont, 1621static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1862,8 +1840,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1862 cs = cgroup_cs(cgroup); 1840 cs = cgroup_cs(cgroup);
1863 parent_cs = cgroup_cs(parent); 1841 parent_cs = cgroup_cs(parent);
1864 1842
1843 mutex_lock(&callback_mutex);
1865 cs->mems_allowed = parent_cs->mems_allowed; 1844 cs->mems_allowed = parent_cs->mems_allowed;
1866 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed); 1845 cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
1846 mutex_unlock(&callback_mutex);
1867 return; 1847 return;
1868} 1848}
1869 1849
@@ -2066,10 +2046,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2066 struct cpuset *cp; /* scans cpusets being updated */ 2046 struct cpuset *cp; /* scans cpusets being updated */
2067 struct cpuset *child; /* scans child cpusets of cp */ 2047 struct cpuset *child; /* scans child cpusets of cp */
2068 struct cgroup *cont; 2048 struct cgroup *cont;
2069 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2049 static nodemask_t oldmems; /* protected by cgroup_mutex */
2070
2071 if (oldmems == NULL)
2072 return;
2073 2050
2074 list_add_tail((struct list_head *)&root->stack_list, &queue); 2051 list_add_tail((struct list_head *)&root->stack_list, &queue);
2075 2052
@@ -2086,7 +2063,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2086 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2063 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2087 continue; 2064 continue;
2088 2065
2089 *oldmems = cp->mems_allowed; 2066 oldmems = cp->mems_allowed;
2090 2067
2091 /* Remove offline cpus and mems from this cpuset. */ 2068 /* Remove offline cpus and mems from this cpuset. */
2092 mutex_lock(&callback_mutex); 2069 mutex_lock(&callback_mutex);
@@ -2102,10 +2079,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2102 remove_tasks_in_empty_cpuset(cp); 2079 remove_tasks_in_empty_cpuset(cp);
2103 else { 2080 else {
2104 update_tasks_cpumask(cp, NULL); 2081 update_tasks_cpumask(cp, NULL);
2105 update_tasks_nodemask(cp, oldmems, NULL); 2082 update_tasks_nodemask(cp, &oldmems, NULL);
2106 } 2083 }
2107 } 2084 }
2108 NODEMASK_FREE(oldmems);
2109} 2085}
2110 2086
2111/* 2087/*
@@ -2147,19 +2123,16 @@ void cpuset_update_active_cpus(void)
2147static int cpuset_track_online_nodes(struct notifier_block *self, 2123static int cpuset_track_online_nodes(struct notifier_block *self,
2148 unsigned long action, void *arg) 2124 unsigned long action, void *arg)
2149{ 2125{
2150 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL); 2126 static nodemask_t oldmems; /* protected by cgroup_mutex */
2151
2152 if (oldmems == NULL)
2153 return NOTIFY_DONE;
2154 2127
2155 cgroup_lock(); 2128 cgroup_lock();
2156 switch (action) { 2129 switch (action) {
2157 case MEM_ONLINE: 2130 case MEM_ONLINE:
2158 *oldmems = top_cpuset.mems_allowed; 2131 oldmems = top_cpuset.mems_allowed;
2159 mutex_lock(&callback_mutex); 2132 mutex_lock(&callback_mutex);
2160 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2133 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2161 mutex_unlock(&callback_mutex); 2134 mutex_unlock(&callback_mutex);
2162 update_tasks_nodemask(&top_cpuset, oldmems, NULL); 2135 update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
2163 break; 2136 break;
2164 case MEM_OFFLINE: 2137 case MEM_OFFLINE:
2165 /* 2138 /*
@@ -2173,7 +2146,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
2173 } 2146 }
2174 cgroup_unlock(); 2147 cgroup_unlock();
2175 2148
2176 NODEMASK_FREE(oldmems);
2177 return NOTIFY_OK; 2149 return NOTIFY_OK;
2178} 2150}
2179#endif 2151#endif
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
new file mode 100644
index 000000000000..5f85690285d4
--- /dev/null
+++ b/kernel/crash_dump.c
@@ -0,0 +1,34 @@
1#include <linux/kernel.h>
2#include <linux/crash_dump.h>
3#include <linux/init.h>
4#include <linux/errno.h>
5#include <linux/module.h>
6
7/*
8 * If we have booted due to a crash, max_pfn will be a very low value. We need
9 * to know the amount of memory that the previous kernel used.
10 */
11unsigned long saved_max_pfn;
12
13/*
14 * stores the physical address of elf header of crash image
15 *
16 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
17 * is_kdump_kernel() to determine if we are booting after a panic. Hence put
18 * it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
19 */
20unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
21
22/*
23 * elfcorehdr= specifies the location of elf core header stored by the crashed
24 * kernel. This option will be passed by kexec loader to the capture kernel.
25 */
26static int __init setup_elfcorehdr(char *arg)
27{
28 char *end;
29 if (!arg)
30 return -EINVAL;
31 elfcorehdr_addr = memparse(arg, &end);
32 return end > arg ? 0 : -EINVAL;
33}
34early_param("elfcorehdr", setup_elfcorehdr);
diff --git a/kernel/cred.c b/kernel/cred.c
index 2343c132c5a7..5557b55048df 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -741,6 +741,12 @@ int set_create_files_as(struct cred *new, struct inode *inode)
741} 741}
742EXPORT_SYMBOL(set_create_files_as); 742EXPORT_SYMBOL(set_create_files_as);
743 743
744struct user_namespace *current_user_ns(void)
745{
746 return _current_user_ns();
747}
748EXPORT_SYMBOL(current_user_ns);
749
744#ifdef CONFIG_DEBUG_CREDENTIALS 750#ifdef CONFIG_DEBUG_CREDENTIALS
745 751
746bool creds_are_invalid(const struct cred *cred) 752bool creds_are_invalid(const struct cred *cred)
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 481a7bd2dfe7..a11db956dd62 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
1093 put_packet(remcom_out_buffer); 1093 put_packet(remcom_out_buffer);
1094 return 0; 1094 return 0;
1095} 1095}
1096
1097/**
1098 * gdbstub_exit - Send an exit message to GDB
1099 * @status: The exit code to report.
1100 */
1101void gdbstub_exit(int status)
1102{
1103 unsigned char checksum, ch, buffer[3];
1104 int loop;
1105
1106 buffer[0] = 'W';
1107 buffer[1] = hex_asc_hi(status);
1108 buffer[2] = hex_asc_lo(status);
1109
1110 dbg_io_ops->write_char('$');
1111 checksum = 0;
1112
1113 for (loop = 0; loop < 3; loop++) {
1114 ch = buffer[loop];
1115 checksum += ch;
1116 dbg_io_ops->write_char(ch);
1117 }
1118
1119 dbg_io_ops->write_char('#');
1120 dbg_io_ops->write_char(hex_asc_hi(checksum));
1121 dbg_io_ops->write_char(hex_asc_lo(checksum));
1122
1123 /* make sure the output is flushed, lest the bootloader clobber it */
1124 dbg_io_ops->flush();
1125}
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b1..6a488ad2dce5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
908 profile_task_exit(tsk); 908 profile_task_exit(tsk);
909 909
910 WARN_ON(atomic_read(&tsk->fs_excl)); 910 WARN_ON(atomic_read(&tsk->fs_excl));
911 WARN_ON(blk_needs_flush_plug(tsk));
911 912
912 if (unlikely(in_interrupt())) 913 if (unlikely(in_interrupt()))
913 panic("Aiee, killing interrupt handler!"); 914 panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index f2b494d7c557..e7548dee636b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1187,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1187 pid = alloc_pid(p->nsproxy->pid_ns); 1187 pid = alloc_pid(p->nsproxy->pid_ns);
1188 if (!pid) 1188 if (!pid)
1189 goto bad_fork_cleanup_io; 1189 goto bad_fork_cleanup_io;
1190
1191 if (clone_flags & CLONE_NEWPID) {
1192 retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
1193 if (retval < 0)
1194 goto bad_fork_free_pid;
1195 }
1196 } 1190 }
1197 1191
1198 p->pid = pid_nr(pid); 1192 p->pid = pid_nr(pid);
@@ -1211,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1211 * Clear TID on mm_release()? 1205 * Clear TID on mm_release()?
1212 */ 1206 */
1213 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1207 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1208#ifdef CONFIG_BLOCK
1209 p->plug = NULL;
1210#endif
1214#ifdef CONFIG_FUTEX 1211#ifdef CONFIG_FUTEX
1215 p->robust_list = NULL; 1212 p->robust_list = NULL;
1216#ifdef CONFIG_COMPAT 1213#ifdef CONFIG_COMPAT
@@ -1296,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1296 tracehook_finish_clone(p, clone_flags, trace); 1293 tracehook_finish_clone(p, clone_flags, trace);
1297 1294
1298 if (thread_group_leader(p)) { 1295 if (thread_group_leader(p)) {
1299 if (clone_flags & CLONE_NEWPID) 1296 if (is_child_reaper(pid))
1300 p->nsproxy->pid_ns->child_reaper = p; 1297 p->nsproxy->pid_ns->child_reaper = p;
1301 1298
1302 p->signal->leader_pid = pid; 1299 p->signal->leader_pid = pid;
diff --git a/kernel/futex.c b/kernel/futex.c
index bda415715382..dfb924ffe65b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -782,8 +782,8 @@ static void __unqueue_futex(struct futex_q *q)
782{ 782{
783 struct futex_hash_bucket *hb; 783 struct futex_hash_bucket *hb;
784 784
785 if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr) 785 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
786 || plist_node_empty(&q->list))) 786 || WARN_ON(plist_node_empty(&q->list)))
787 return; 787 return;
788 788
789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); 789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
@@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2418 goto err_unlock; 2418 goto err_unlock;
2419 ret = -EPERM; 2419 ret = -EPERM;
2420 pcred = __task_cred(p); 2420 pcred = __task_cred(p);
2421 /* If victim is in different user_ns, then uids are not
2422 comparable, so we must have CAP_SYS_PTRACE */
2423 if (cred->user->user_ns != pcred->user->user_ns) {
2424 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2425 goto err_unlock;
2426 goto ok;
2427 }
2428 /* If victim is in same user_ns, then uids are comparable */
2421 if (cred->euid != pcred->euid && 2429 if (cred->euid != pcred->euid &&
2422 cred->euid != pcred->uid && 2430 cred->euid != pcred->uid &&
2423 !capable(CAP_SYS_PTRACE)) 2431 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2424 goto err_unlock; 2432 goto err_unlock;
2433ok:
2425 head = p->robust_list; 2434 head = p->robust_list;
2426 rcu_read_unlock(); 2435 rcu_read_unlock();
2427 } 2436 }
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index a7934ac75e5b..5f9e689dc8f0 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
153 goto err_unlock; 153 goto err_unlock;
154 ret = -EPERM; 154 ret = -EPERM;
155 pcred = __task_cred(p); 155 pcred = __task_cred(p);
156 /* If victim is in different user_ns, then uids are not
157 comparable, so we must have CAP_SYS_PTRACE */
158 if (cred->user->user_ns != pcred->user->user_ns) {
159 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
160 goto err_unlock;
161 goto ok;
162 }
163 /* If victim is in same user_ns, then uids are comparable */
156 if (cred->euid != pcred->euid && 164 if (cred->euid != pcred->euid &&
157 cred->euid != pcred->uid && 165 cred->euid != pcred->uid &&
158 !capable(CAP_SYS_PTRACE)) 166 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
159 goto err_unlock; 167 goto err_unlock;
168ok:
160 head = p->compat_robust_list; 169 head = p->compat_robust_list;
161 rcu_read_unlock(); 170 rcu_read_unlock();
162 } 171 }
diff --git a/kernel/groups.c b/kernel/groups.c
index 253dc0f35cf4..1cc476d52dd3 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
233 struct group_info *group_info; 233 struct group_info *group_info;
234 int retval; 234 int retval;
235 235
236 if (!capable(CAP_SETGID)) 236 if (!nsown_capable(CAP_SETGID))
237 return -EPERM; 237 return -EPERM;
238 if ((unsigned)gidsetsize > NGROUPS_MAX) 238 if ((unsigned)gidsetsize > NGROUPS_MAX)
239 return -EINVAL; 239 return -EINVAL;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 75dcca37d61a..079f1d39a8b8 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr)
64 if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || 64 if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
65 arch_is_kernel_text(addr)) 65 arch_is_kernel_text(addr))
66 return 1; 66 return 1;
67 return in_gate_area_no_task(addr); 67 return in_gate_area_no_mm(addr);
68} 68}
69 69
70static inline int is_kernel(unsigned long addr) 70static inline int is_kernel(unsigned long addr)
71{ 71{
72 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) 72 if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
73 return 1; 73 return 1;
74 return in_gate_area_no_task(addr); 74 return in_gate_area_no_mm(addr);
75} 75}
76 76
77static int is_ksym_addr(unsigned long addr) 77static int is_ksym_addr(unsigned long addr)
@@ -342,13 +342,15 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
342} 342}
343 343
344/* Look up a kernel symbol and return it in a text buffer. */ 344/* Look up a kernel symbol and return it in a text buffer. */
345int sprint_symbol(char *buffer, unsigned long address) 345static int __sprint_symbol(char *buffer, unsigned long address,
346 int symbol_offset)
346{ 347{
347 char *modname; 348 char *modname;
348 const char *name; 349 const char *name;
349 unsigned long offset, size; 350 unsigned long offset, size;
350 int len; 351 int len;
351 352
353 address += symbol_offset;
352 name = kallsyms_lookup(address, &size, &offset, &modname, buffer); 354 name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
353 if (!name) 355 if (!name)
354 return sprintf(buffer, "0x%lx", address); 356 return sprintf(buffer, "0x%lx", address);
@@ -357,17 +359,53 @@ int sprint_symbol(char *buffer, unsigned long address)
357 strcpy(buffer, name); 359 strcpy(buffer, name);
358 len = strlen(buffer); 360 len = strlen(buffer);
359 buffer += len; 361 buffer += len;
362 offset -= symbol_offset;
360 363
361 if (modname) 364 if (modname)
362 len += sprintf(buffer, "+%#lx/%#lx [%s]", 365 len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname);
363 offset, size, modname);
364 else 366 else
365 len += sprintf(buffer, "+%#lx/%#lx", offset, size); 367 len += sprintf(buffer, "+%#lx/%#lx", offset, size);
366 368
367 return len; 369 return len;
368} 370}
371
372/**
373 * sprint_symbol - Look up a kernel symbol and return it in a text buffer
374 * @buffer: buffer to be stored
375 * @address: address to lookup
376 *
377 * This function looks up a kernel symbol with @address and stores its name,
378 * offset, size and module name to @buffer if possible. If no symbol was found,
379 * just saves its @address as is.
380 *
381 * This function returns the number of bytes stored in @buffer.
382 */
383int sprint_symbol(char *buffer, unsigned long address)
384{
385 return __sprint_symbol(buffer, address, 0);
386}
387
369EXPORT_SYMBOL_GPL(sprint_symbol); 388EXPORT_SYMBOL_GPL(sprint_symbol);
370 389
390/**
391 * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
392 * @buffer: buffer to be stored
393 * @address: address to lookup
394 *
395 * This function is for stack backtrace and does the same thing as
396 * sprint_symbol() but with modified/decreased @address. If there is a
397 * tail-call to the function marked "noreturn", gcc optimized out code after
398 * the call so that the stack-saved return address could point outside of the
399 * caller. This function ensures that kallsyms will find the original caller
400 * by decreasing @address.
401 *
402 * This function returns the number of bytes stored in @buffer.
403 */
404int sprint_backtrace(char *buffer, unsigned long address)
405{
406 return __sprint_symbol(buffer, address, -1);
407}
408
371/* Look up a kernel symbol and print it to the kernel messages. */ 409/* Look up a kernel symbol and print it to the kernel messages. */
372void __print_symbol(const char *fmt, unsigned long address) 410void __print_symbol(const char *fmt, unsigned long address)
373{ 411{
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 1969d2fc4b36..71edd2f60c02 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -225,7 +225,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
225 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, 225 nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
226 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, 226 nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
227 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, 227 nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
228 sum_forward_deps = 0, factor = 0; 228 sum_forward_deps = 0;
229 229
230 list_for_each_entry(class, &all_lock_classes, lock_entry) { 230 list_for_each_entry(class, &all_lock_classes, lock_entry) {
231 231
@@ -283,13 +283,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
283 nr_hardirq_unsafe * nr_hardirq_safe + 283 nr_hardirq_unsafe * nr_hardirq_safe +
284 nr_list_entries); 284 nr_list_entries);
285 285
286 /*
287 * Estimated factor between direct and indirect
288 * dependencies:
289 */
290 if (nr_list_entries)
291 factor = sum_forward_deps / nr_list_entries;
292
293#ifdef CONFIG_PROVE_LOCKING 286#ifdef CONFIG_PROVE_LOCKING
294 seq_printf(m, " dependency chains: %11lu [max: %lu]\n", 287 seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
295 nr_lock_chains, MAX_LOCKDEP_CHAINS); 288 nr_lock_chains, MAX_LOCKDEP_CHAINS);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f74e6c00e26d..a05d191ffdd9 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -69,13 +69,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
69 goto out_ns; 69 goto out_ns;
70 } 70 }
71 71
72 new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); 72 new_nsp->uts_ns = copy_utsname(flags, tsk);
73 if (IS_ERR(new_nsp->uts_ns)) { 73 if (IS_ERR(new_nsp->uts_ns)) {
74 err = PTR_ERR(new_nsp->uts_ns); 74 err = PTR_ERR(new_nsp->uts_ns);
75 goto out_uts; 75 goto out_uts;
76 } 76 }
77 77
78 new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 78 new_nsp->ipc_ns = copy_ipcs(flags, tsk);
79 if (IS_ERR(new_nsp->ipc_ns)) { 79 if (IS_ERR(new_nsp->ipc_ns)) {
80 err = PTR_ERR(new_nsp->ipc_ns); 80 err = PTR_ERR(new_nsp->ipc_ns);
81 goto out_ipc; 81 goto out_ipc;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3472bb1a070c..c75925c4d1e2 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -145,7 +145,8 @@ static struct srcu_struct pmus_srcu;
145 */ 145 */
146int sysctl_perf_event_paranoid __read_mostly = 1; 146int sysctl_perf_event_paranoid __read_mostly = 1;
147 147
148int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ 148/* Minimum for 128 pages + 1 for the user control page */
149int sysctl_perf_event_mlock __read_mostly = 516; /* 'free' kb per user */
149 150
150/* 151/*
151 * max perf event sample rate 152 * max perf event sample rate
@@ -941,6 +942,7 @@ static void perf_group_attach(struct perf_event *event)
941static void 942static void
942list_del_event(struct perf_event *event, struct perf_event_context *ctx) 943list_del_event(struct perf_event *event, struct perf_event_context *ctx)
943{ 944{
945 struct perf_cpu_context *cpuctx;
944 /* 946 /*
945 * We can have double detach due to exit/hot-unplug + close. 947 * We can have double detach due to exit/hot-unplug + close.
946 */ 948 */
@@ -949,8 +951,17 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
949 951
950 event->attach_state &= ~PERF_ATTACH_CONTEXT; 952 event->attach_state &= ~PERF_ATTACH_CONTEXT;
951 953
952 if (is_cgroup_event(event)) 954 if (is_cgroup_event(event)) {
953 ctx->nr_cgroups--; 955 ctx->nr_cgroups--;
956 cpuctx = __get_cpu_context(ctx);
957 /*
958 * if there are no more cgroup events
959 * then cler cgrp to avoid stale pointer
960 * in update_cgrp_time_from_cpuctx()
961 */
962 if (!ctx->nr_cgroups)
963 cpuctx->cgrp = NULL;
964 }
954 965
955 ctx->nr_events--; 966 ctx->nr_events--;
956 if (event->attr.inherit_stat) 967 if (event->attr.inherit_stat)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94e1f0b..e9c9adc84ca6 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -14,6 +14,7 @@
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/acct.h> 15#include <linux/acct.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h>
17 18
18#define BITS_PER_PAGE (PAGE_SIZE*8) 19#define BITS_PER_PAGE (PAGE_SIZE*8)
19 20
@@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
72{ 73{
73 struct pid_namespace *ns; 74 struct pid_namespace *ns;
74 unsigned int level = parent_pid_ns->level + 1; 75 unsigned int level = parent_pid_ns->level + 1;
75 int i; 76 int i, err = -ENOMEM;
76 77
77 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL); 78 ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
78 if (ns == NULL) 79 if (ns == NULL)
@@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
96 for (i = 1; i < PIDMAP_ENTRIES; i++) 97 for (i = 1; i < PIDMAP_ENTRIES; i++)
97 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); 98 atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
98 99
100 err = pid_ns_prepare_proc(ns);
101 if (err)
102 goto out_put_parent_pid_ns;
103
99 return ns; 104 return ns;
100 105
106out_put_parent_pid_ns:
107 put_pid_ns(parent_pid_ns);
101out_free_map: 108out_free_map:
102 kfree(ns->pidmap[0].page); 109 kfree(ns->pidmap[0].page);
103out_free: 110out_free:
104 kmem_cache_free(pid_ns_cachep, ns); 111 kmem_cache_free(pid_ns_cachep, ns);
105out: 112out:
106 return ERR_PTR(-ENOMEM); 113 return ERR_PTR(err);
107} 114}
108 115
109static void destroy_pid_namespace(struct pid_namespace *ns) 116static void destroy_pid_namespace(struct pid_namespace *ns)
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index 83bbc7c02df9..d09dd10c5a5e 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -28,7 +28,7 @@
28static int submit(int rw, struct block_device *bdev, sector_t sector, 28static int submit(int rw, struct block_device *bdev, sector_t sector,
29 struct page *page, struct bio **bio_chain) 29 struct page *page, struct bio **bio_chain)
30{ 30{
31 const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG; 31 const int bio_rw = rw | REQ_SYNC;
32 struct bio *bio; 32 struct bio *bio;
33 33
34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 34 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e2302e40b360..0fc1eed28d27 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -134,21 +134,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
134 return 0; 134 return 0;
135 rcu_read_lock(); 135 rcu_read_lock();
136 tcred = __task_cred(task); 136 tcred = __task_cred(task);
137 if ((cred->uid != tcred->euid || 137 if (cred->user->user_ns == tcred->user->user_ns &&
138 cred->uid != tcred->suid || 138 (cred->uid == tcred->euid &&
139 cred->uid != tcred->uid || 139 cred->uid == tcred->suid &&
140 cred->gid != tcred->egid || 140 cred->uid == tcred->uid &&
141 cred->gid != tcred->sgid || 141 cred->gid == tcred->egid &&
142 cred->gid != tcred->gid) && 142 cred->gid == tcred->sgid &&
143 !capable(CAP_SYS_PTRACE)) { 143 cred->gid == tcred->gid))
144 rcu_read_unlock(); 144 goto ok;
145 return -EPERM; 145 if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
146 } 146 goto ok;
147 rcu_read_unlock();
148 return -EPERM;
149ok:
147 rcu_read_unlock(); 150 rcu_read_unlock();
148 smp_rmb(); 151 smp_rmb();
149 if (task->mm) 152 if (task->mm)
150 dumpable = get_dumpable(task->mm); 153 dumpable = get_dumpable(task->mm);
151 if (!dumpable && !capable(CAP_SYS_PTRACE)) 154 if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
152 return -EPERM; 155 return -EPERM;
153 156
154 return security_ptrace_access_check(task, mode); 157 return security_ptrace_access_check(task, mode);
@@ -198,7 +201,7 @@ static int ptrace_attach(struct task_struct *task)
198 goto unlock_tasklist; 201 goto unlock_tasklist;
199 202
200 task->ptrace = PT_PTRACED; 203 task->ptrace = PT_PTRACED;
201 if (capable(CAP_SYS_PTRACE)) 204 if (task_ns_capable(task, CAP_SYS_PTRACE))
202 task->ptrace |= PT_PTRACE_CAP; 205 task->ptrace |= PT_PTRACE_CAP;
203 206
204 __ptrace_link(task, current); 207 __ptrace_link(task, current);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index c7eaa37a768b..34683efa2cce 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -126,10 +126,24 @@ ssize_t res_counter_read(struct res_counter *counter, int member,
126 pos, buf, s - buf); 126 pos, buf, s - buf);
127} 127}
128 128
129#if BITS_PER_LONG == 32
130u64 res_counter_read_u64(struct res_counter *counter, int member)
131{
132 unsigned long flags;
133 u64 ret;
134
135 spin_lock_irqsave(&counter->lock, flags);
136 ret = *res_counter_member(counter, member);
137 spin_unlock_irqrestore(&counter->lock, flags);
138
139 return ret;
140}
141#else
129u64 res_counter_read_u64(struct res_counter *counter, int member) 142u64 res_counter_read_u64(struct res_counter *counter, int member)
130{ 143{
131 return *res_counter_member(counter, member); 144 return *res_counter_member(counter, member);
132} 145}
146#endif
133 147
134int res_counter_memparse_write_strategy(const char *buf, 148int res_counter_memparse_write_strategy(const char *buf,
135 unsigned long long *res) 149 unsigned long long *res)
diff --git a/kernel/sched.c b/kernel/sched.c
index a172494a9a63..f592ce6f8616 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4115,6 +4115,16 @@ need_resched:
4115 switch_count = &prev->nvcsw; 4115 switch_count = &prev->nvcsw;
4116 } 4116 }
4117 4117
4118 /*
4119 * If we are going to sleep and we have plugged IO queued, make
4120 * sure to submit it to avoid deadlocks.
4121 */
4122 if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
4123 raw_spin_unlock(&rq->lock);
4124 blk_flush_plug(prev);
4125 raw_spin_lock(&rq->lock);
4126 }
4127
4118 pre_schedule(rq, prev); 4128 pre_schedule(rq, prev);
4119 4129
4120 if (unlikely(!rq->nr_running)) 4130 if (unlikely(!rq->nr_running))
@@ -4892,8 +4902,11 @@ static bool check_same_owner(struct task_struct *p)
4892 4902
4893 rcu_read_lock(); 4903 rcu_read_lock();
4894 pcred = __task_cred(p); 4904 pcred = __task_cred(p);
4895 match = (cred->euid == pcred->euid || 4905 if (cred->user->user_ns == pcred->user->user_ns)
4896 cred->euid == pcred->uid); 4906 match = (cred->euid == pcred->euid ||
4907 cred->euid == pcred->uid);
4908 else
4909 match = false;
4897 rcu_read_unlock(); 4910 rcu_read_unlock();
4898 return match; 4911 return match;
4899} 4912}
@@ -5221,7 +5234,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5221 goto out_free_cpus_allowed; 5234 goto out_free_cpus_allowed;
5222 } 5235 }
5223 retval = -EPERM; 5236 retval = -EPERM;
5224 if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) 5237 if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
5225 goto out_unlock; 5238 goto out_unlock;
5226 5239
5227 retval = security_task_setscheduler(p); 5240 retval = security_task_setscheduler(p);
@@ -5460,6 +5473,8 @@ EXPORT_SYMBOL(yield);
5460 * yield_to - yield the current processor to another thread in 5473 * yield_to - yield the current processor to another thread in
5461 * your thread group, or accelerate that thread toward the 5474 * your thread group, or accelerate that thread toward the
5462 * processor it's on. 5475 * processor it's on.
5476 * @p: target task
5477 * @preempt: whether task preemption is allowed or not
5463 * 5478 *
5464 * It's the caller's job to ensure that the target task struct 5479 * It's the caller's job to ensure that the target task struct
5465 * can't go away on us before we can do any checks. 5480 * can't go away on us before we can do any checks.
@@ -5525,6 +5540,7 @@ void __sched io_schedule(void)
5525 5540
5526 delayacct_blkio_start(); 5541 delayacct_blkio_start();
5527 atomic_inc(&rq->nr_iowait); 5542 atomic_inc(&rq->nr_iowait);
5543 blk_flush_plug(current);
5528 current->in_iowait = 1; 5544 current->in_iowait = 1;
5529 schedule(); 5545 schedule();
5530 current->in_iowait = 0; 5546 current->in_iowait = 0;
@@ -5540,6 +5556,7 @@ long __sched io_schedule_timeout(long timeout)
5540 5556
5541 delayacct_blkio_start(); 5557 delayacct_blkio_start();
5542 atomic_inc(&rq->nr_iowait); 5558 atomic_inc(&rq->nr_iowait);
5559 blk_flush_plug(current);
5543 current->in_iowait = 1; 5560 current->in_iowait = 1;
5544 ret = schedule_timeout(timeout); 5561 ret = schedule_timeout(timeout);
5545 current->in_iowait = 0; 5562 current->in_iowait = 0;
@@ -8434,7 +8451,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8434{ 8451{
8435 struct cfs_rq *cfs_rq; 8452 struct cfs_rq *cfs_rq;
8436 struct sched_entity *se; 8453 struct sched_entity *se;
8437 struct rq *rq;
8438 int i; 8454 int i;
8439 8455
8440 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); 8456 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8447,8 +8463,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8447 tg->shares = NICE_0_LOAD; 8463 tg->shares = NICE_0_LOAD;
8448 8464
8449 for_each_possible_cpu(i) { 8465 for_each_possible_cpu(i) {
8450 rq = cpu_rq(i);
8451
8452 cfs_rq = kzalloc_node(sizeof(struct cfs_rq), 8466 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
8453 GFP_KERNEL, cpu_to_node(i)); 8467 GFP_KERNEL, cpu_to_node(i));
8454 if (!cfs_rq) 8468 if (!cfs_rq)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index c82f26c1b7c3..a776a6396427 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -94,6 +94,4 @@ static const struct sched_class idle_sched_class = {
94 94
95 .prio_changed = prio_changed_idle, 95 .prio_changed = prio_changed_idle,
96 .switched_to = switched_to_idle, 96 .switched_to = switched_to_idle,
97
98 /* no .task_new for idle tasks */
99}; 97};
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 84ec9bcf82d9..1ba2bd40fdac 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -102,6 +102,4 @@ static const struct sched_class stop_sched_class = {
102 102
103 .prio_changed = prio_changed_stop, 103 .prio_changed = prio_changed_stop,
104 .switched_to = switched_to_stop, 104 .switched_to = switched_to_stop,
105
106 /* no .task_new for stop tasks */
107}; 105};
diff --git a/kernel/signal.c b/kernel/signal.c
index 31751868de88..324eff5468ad 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -636,13 +636,33 @@ static inline bool si_fromuser(const struct siginfo *info)
636} 636}
637 637
638/* 638/*
639 * called with RCU read lock from check_kill_permission()
640 */
641static int kill_ok_by_cred(struct task_struct *t)
642{
643 const struct cred *cred = current_cred();
644 const struct cred *tcred = __task_cred(t);
645
646 if (cred->user->user_ns == tcred->user->user_ns &&
647 (cred->euid == tcred->suid ||
648 cred->euid == tcred->uid ||
649 cred->uid == tcred->suid ||
650 cred->uid == tcred->uid))
651 return 1;
652
653 if (ns_capable(tcred->user->user_ns, CAP_KILL))
654 return 1;
655
656 return 0;
657}
658
659/*
639 * Bad permissions for sending the signal 660 * Bad permissions for sending the signal
640 * - the caller must hold the RCU read lock 661 * - the caller must hold the RCU read lock
641 */ 662 */
642static int check_kill_permission(int sig, struct siginfo *info, 663static int check_kill_permission(int sig, struct siginfo *info,
643 struct task_struct *t) 664 struct task_struct *t)
644{ 665{
645 const struct cred *cred, *tcred;
646 struct pid *sid; 666 struct pid *sid;
647 int error; 667 int error;
648 668
@@ -656,14 +676,8 @@ static int check_kill_permission(int sig, struct siginfo *info,
656 if (error) 676 if (error)
657 return error; 677 return error;
658 678
659 cred = current_cred();
660 tcred = __task_cred(t);
661 if (!same_thread_group(current, t) && 679 if (!same_thread_group(current, t) &&
662 (cred->euid ^ tcred->suid) && 680 !kill_ok_by_cred(t)) {
663 (cred->euid ^ tcred->uid) &&
664 (cred->uid ^ tcred->suid) &&
665 (cred->uid ^ tcred->uid) &&
666 !capable(CAP_KILL)) {
667 switch (sig) { 681 switch (sig) {
668 case SIGCONT: 682 case SIGCONT:
669 sid = task_session(t); 683 sid = task_session(t);
diff --git a/kernel/sys.c b/kernel/sys.c
index 1ad48b3b9068..af468edf096a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,16 +120,33 @@ EXPORT_SYMBOL(cad_pid);
120void (*pm_power_off_prepare)(void); 120void (*pm_power_off_prepare)(void);
121 121
122/* 122/*
123 * Returns true if current's euid is same as p's uid or euid,
124 * or has CAP_SYS_NICE to p's user_ns.
125 *
126 * Called with rcu_read_lock, creds are safe
127 */
128static bool set_one_prio_perm(struct task_struct *p)
129{
130 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
131
132 if (pcred->user->user_ns == cred->user->user_ns &&
133 (pcred->uid == cred->euid ||
134 pcred->euid == cred->euid))
135 return true;
136 if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE))
137 return true;
138 return false;
139}
140
141/*
123 * set the priority of a task 142 * set the priority of a task
124 * - the caller must hold the RCU read lock 143 * - the caller must hold the RCU read lock
125 */ 144 */
126static int set_one_prio(struct task_struct *p, int niceval, int error) 145static int set_one_prio(struct task_struct *p, int niceval, int error)
127{ 146{
128 const struct cred *cred = current_cred(), *pcred = __task_cred(p);
129 int no_nice; 147 int no_nice;
130 148
131 if (pcred->uid != cred->euid && 149 if (!set_one_prio_perm(p)) {
132 pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
133 error = -EPERM; 150 error = -EPERM;
134 goto out; 151 goto out;
135 } 152 }
@@ -506,7 +523,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
506 if (rgid != (gid_t) -1) { 523 if (rgid != (gid_t) -1) {
507 if (old->gid == rgid || 524 if (old->gid == rgid ||
508 old->egid == rgid || 525 old->egid == rgid ||
509 capable(CAP_SETGID)) 526 nsown_capable(CAP_SETGID))
510 new->gid = rgid; 527 new->gid = rgid;
511 else 528 else
512 goto error; 529 goto error;
@@ -515,7 +532,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
515 if (old->gid == egid || 532 if (old->gid == egid ||
516 old->egid == egid || 533 old->egid == egid ||
517 old->sgid == egid || 534 old->sgid == egid ||
518 capable(CAP_SETGID)) 535 nsown_capable(CAP_SETGID))
519 new->egid = egid; 536 new->egid = egid;
520 else 537 else
521 goto error; 538 goto error;
@@ -550,7 +567,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
550 old = current_cred(); 567 old = current_cred();
551 568
552 retval = -EPERM; 569 retval = -EPERM;
553 if (capable(CAP_SETGID)) 570 if (nsown_capable(CAP_SETGID))
554 new->gid = new->egid = new->sgid = new->fsgid = gid; 571 new->gid = new->egid = new->sgid = new->fsgid = gid;
555 else if (gid == old->gid || gid == old->sgid) 572 else if (gid == old->gid || gid == old->sgid)
556 new->egid = new->fsgid = gid; 573 new->egid = new->fsgid = gid;
@@ -617,7 +634,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
617 new->uid = ruid; 634 new->uid = ruid;
618 if (old->uid != ruid && 635 if (old->uid != ruid &&
619 old->euid != ruid && 636 old->euid != ruid &&
620 !capable(CAP_SETUID)) 637 !nsown_capable(CAP_SETUID))
621 goto error; 638 goto error;
622 } 639 }
623 640
@@ -626,7 +643,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
626 if (old->uid != euid && 643 if (old->uid != euid &&
627 old->euid != euid && 644 old->euid != euid &&
628 old->suid != euid && 645 old->suid != euid &&
629 !capable(CAP_SETUID)) 646 !nsown_capable(CAP_SETUID))
630 goto error; 647 goto error;
631 } 648 }
632 649
@@ -674,7 +691,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
674 old = current_cred(); 691 old = current_cred();
675 692
676 retval = -EPERM; 693 retval = -EPERM;
677 if (capable(CAP_SETUID)) { 694 if (nsown_capable(CAP_SETUID)) {
678 new->suid = new->uid = uid; 695 new->suid = new->uid = uid;
679 if (uid != old->uid) { 696 if (uid != old->uid) {
680 retval = set_user(new); 697 retval = set_user(new);
@@ -716,7 +733,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
716 old = current_cred(); 733 old = current_cred();
717 734
718 retval = -EPERM; 735 retval = -EPERM;
719 if (!capable(CAP_SETUID)) { 736 if (!nsown_capable(CAP_SETUID)) {
720 if (ruid != (uid_t) -1 && ruid != old->uid && 737 if (ruid != (uid_t) -1 && ruid != old->uid &&
721 ruid != old->euid && ruid != old->suid) 738 ruid != old->euid && ruid != old->suid)
722 goto error; 739 goto error;
@@ -780,7 +797,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
780 old = current_cred(); 797 old = current_cred();
781 798
782 retval = -EPERM; 799 retval = -EPERM;
783 if (!capable(CAP_SETGID)) { 800 if (!nsown_capable(CAP_SETGID)) {
784 if (rgid != (gid_t) -1 && rgid != old->gid && 801 if (rgid != (gid_t) -1 && rgid != old->gid &&
785 rgid != old->egid && rgid != old->sgid) 802 rgid != old->egid && rgid != old->sgid)
786 goto error; 803 goto error;
@@ -840,7 +857,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
840 857
841 if (uid == old->uid || uid == old->euid || 858 if (uid == old->uid || uid == old->euid ||
842 uid == old->suid || uid == old->fsuid || 859 uid == old->suid || uid == old->fsuid ||
843 capable(CAP_SETUID)) { 860 nsown_capable(CAP_SETUID)) {
844 if (uid != old_fsuid) { 861 if (uid != old_fsuid) {
845 new->fsuid = uid; 862 new->fsuid = uid;
846 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 863 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -873,7 +890,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
873 890
874 if (gid == old->gid || gid == old->egid || 891 if (gid == old->gid || gid == old->egid ||
875 gid == old->sgid || gid == old->fsgid || 892 gid == old->sgid || gid == old->fsgid ||
876 capable(CAP_SETGID)) { 893 nsown_capable(CAP_SETGID)) {
877 if (gid != old_fsgid) { 894 if (gid != old_fsgid) {
878 new->fsgid = gid; 895 new->fsgid = gid;
879 goto change_okay; 896 goto change_okay;
@@ -1181,8 +1198,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1181 int errno; 1198 int errno;
1182 char tmp[__NEW_UTS_LEN]; 1199 char tmp[__NEW_UTS_LEN];
1183 1200
1184 if (!capable(CAP_SYS_ADMIN)) 1201 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1185 return -EPERM; 1202 return -EPERM;
1203
1186 if (len < 0 || len > __NEW_UTS_LEN) 1204 if (len < 0 || len > __NEW_UTS_LEN)
1187 return -EINVAL; 1205 return -EINVAL;
1188 down_write(&uts_sem); 1206 down_write(&uts_sem);
@@ -1230,7 +1248,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1230 int errno; 1248 int errno;
1231 char tmp[__NEW_UTS_LEN]; 1249 char tmp[__NEW_UTS_LEN];
1232 1250
1233 if (!capable(CAP_SYS_ADMIN)) 1251 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
1234 return -EPERM; 1252 return -EPERM;
1235 if (len < 0 || len > __NEW_UTS_LEN) 1253 if (len < 0 || len > __NEW_UTS_LEN)
1236 return -EINVAL; 1254 return -EINVAL;
@@ -1345,6 +1363,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource,
1345 rlim = tsk->signal->rlim + resource; 1363 rlim = tsk->signal->rlim + resource;
1346 task_lock(tsk->group_leader); 1364 task_lock(tsk->group_leader);
1347 if (new_rlim) { 1365 if (new_rlim) {
1366 /* Keep the capable check against init_user_ns until
1367 cgroups can contain all limits */
1348 if (new_rlim->rlim_max > rlim->rlim_max && 1368 if (new_rlim->rlim_max > rlim->rlim_max &&
1349 !capable(CAP_SYS_RESOURCE)) 1369 !capable(CAP_SYS_RESOURCE))
1350 retval = -EPERM; 1370 retval = -EPERM;
@@ -1388,19 +1408,22 @@ static int check_prlimit_permission(struct task_struct *task)
1388{ 1408{
1389 const struct cred *cred = current_cred(), *tcred; 1409 const struct cred *cred = current_cred(), *tcred;
1390 1410
1391 tcred = __task_cred(task); 1411 if (current == task)
1392 if (current != task && 1412 return 0;
1393 (cred->uid != tcred->euid ||
1394 cred->uid != tcred->suid ||
1395 cred->uid != tcred->uid ||
1396 cred->gid != tcred->egid ||
1397 cred->gid != tcred->sgid ||
1398 cred->gid != tcred->gid) &&
1399 !capable(CAP_SYS_RESOURCE)) {
1400 return -EPERM;
1401 }
1402 1413
1403 return 0; 1414 tcred = __task_cred(task);
1415 if (cred->user->user_ns == tcred->user->user_ns &&
1416 (cred->uid == tcred->euid &&
1417 cred->uid == tcred->suid &&
1418 cred->uid == tcred->uid &&
1419 cred->gid == tcred->egid &&
1420 cred->gid == tcred->sgid &&
1421 cred->gid == tcred->gid))
1422 return 0;
1423 if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
1424 return 0;
1425
1426 return -EPERM;
1404} 1427}
1405 1428
1406SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1429SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 40245d697602..c0bb32414b17 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -117,6 +117,7 @@ static int neg_one = -1;
117static int zero; 117static int zero;
118static int __maybe_unused one = 1; 118static int __maybe_unused one = 1;
119static int __maybe_unused two = 2; 119static int __maybe_unused two = 2;
120static int __maybe_unused three = 3;
120static unsigned long one_ul = 1; 121static unsigned long one_ul = 1;
121static int one_hundred = 100; 122static int one_hundred = 100;
122#ifdef CONFIG_PRINTK 123#ifdef CONFIG_PRINTK
@@ -169,6 +170,11 @@ static int proc_taint(struct ctl_table *table, int write,
169 void __user *buffer, size_t *lenp, loff_t *ppos); 170 void __user *buffer, size_t *lenp, loff_t *ppos);
170#endif 171#endif
171 172
173#ifdef CONFIG_PRINTK
174static int proc_dmesg_restrict(struct ctl_table *table, int write,
175 void __user *buffer, size_t *lenp, loff_t *ppos);
176#endif
177
172#ifdef CONFIG_MAGIC_SYSRQ 178#ifdef CONFIG_MAGIC_SYSRQ
173/* Note: sysrq code uses it's own private copy */ 179/* Note: sysrq code uses it's own private copy */
174static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; 180static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
@@ -706,7 +712,7 @@ static struct ctl_table kern_table[] = {
706 .data = &kptr_restrict, 712 .data = &kptr_restrict,
707 .maxlen = sizeof(int), 713 .maxlen = sizeof(int),
708 .mode = 0644, 714 .mode = 0644,
709 .proc_handler = proc_dointvec_minmax, 715 .proc_handler = proc_dmesg_restrict,
710 .extra1 = &zero, 716 .extra1 = &zero,
711 .extra2 = &two, 717 .extra2 = &two,
712 }, 718 },
@@ -971,14 +977,18 @@ static struct ctl_table vm_table[] = {
971 .data = &sysctl_overcommit_memory, 977 .data = &sysctl_overcommit_memory,
972 .maxlen = sizeof(sysctl_overcommit_memory), 978 .maxlen = sizeof(sysctl_overcommit_memory),
973 .mode = 0644, 979 .mode = 0644,
974 .proc_handler = proc_dointvec, 980 .proc_handler = proc_dointvec_minmax,
981 .extra1 = &zero,
982 .extra2 = &two,
975 }, 983 },
976 { 984 {
977 .procname = "panic_on_oom", 985 .procname = "panic_on_oom",
978 .data = &sysctl_panic_on_oom, 986 .data = &sysctl_panic_on_oom,
979 .maxlen = sizeof(sysctl_panic_on_oom), 987 .maxlen = sizeof(sysctl_panic_on_oom),
980 .mode = 0644, 988 .mode = 0644,
981 .proc_handler = proc_dointvec, 989 .proc_handler = proc_dointvec_minmax,
990 .extra1 = &zero,
991 .extra2 = &two,
982 }, 992 },
983 { 993 {
984 .procname = "oom_kill_allocating_task", 994 .procname = "oom_kill_allocating_task",
@@ -1006,7 +1016,8 @@ static struct ctl_table vm_table[] = {
1006 .data = &page_cluster, 1016 .data = &page_cluster,
1007 .maxlen = sizeof(int), 1017 .maxlen = sizeof(int),
1008 .mode = 0644, 1018 .mode = 0644,
1009 .proc_handler = proc_dointvec, 1019 .proc_handler = proc_dointvec_minmax,
1020 .extra1 = &zero,
1010 }, 1021 },
1011 { 1022 {
1012 .procname = "dirty_background_ratio", 1023 .procname = "dirty_background_ratio",
@@ -1054,7 +1065,8 @@ static struct ctl_table vm_table[] = {
1054 .data = &dirty_expire_interval, 1065 .data = &dirty_expire_interval,
1055 .maxlen = sizeof(dirty_expire_interval), 1066 .maxlen = sizeof(dirty_expire_interval),
1056 .mode = 0644, 1067 .mode = 0644,
1057 .proc_handler = proc_dointvec, 1068 .proc_handler = proc_dointvec_minmax,
1069 .extra1 = &zero,
1058 }, 1070 },
1059 { 1071 {
1060 .procname = "nr_pdflush_threads", 1072 .procname = "nr_pdflush_threads",
@@ -1130,6 +1142,8 @@ static struct ctl_table vm_table[] = {
1130 .maxlen = sizeof(int), 1142 .maxlen = sizeof(int),
1131 .mode = 0644, 1143 .mode = 0644,
1132 .proc_handler = drop_caches_sysctl_handler, 1144 .proc_handler = drop_caches_sysctl_handler,
1145 .extra1 = &one,
1146 .extra2 = &three,
1133 }, 1147 },
1134#ifdef CONFIG_COMPACTION 1148#ifdef CONFIG_COMPACTION
1135 { 1149 {
@@ -2385,6 +2399,17 @@ static int proc_taint(struct ctl_table *table, int write,
2385 return err; 2399 return err;
2386} 2400}
2387 2401
2402#ifdef CONFIG_PRINTK
2403static int proc_dmesg_restrict(struct ctl_table *table, int write,
2404 void __user *buffer, size_t *lenp, loff_t *ppos)
2405{
2406 if (write && !capable(CAP_SYS_ADMIN))
2407 return -EPERM;
2408
2409 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2410}
2411#endif
2412
2388struct do_proc_dointvec_minmax_conv_param { 2413struct do_proc_dointvec_minmax_conv_param {
2389 int *min; 2414 int *min;
2390 int *max; 2415 int *max;
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 10b90d8a03c4..4e4932a7b360 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -111,11 +111,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
111 const char *fail = NULL; 111 const char *fail = NULL;
112 112
113 if (table->parent) { 113 if (table->parent) {
114 if (table->procname && !table->parent->procname) 114 if (!table->parent->procname)
115 set_fail(&fail, table, "Parent without procname"); 115 set_fail(&fail, table, "Parent without procname");
116 } 116 }
117 if (!table->procname)
118 set_fail(&fail, table, "No procname");
119 if (table->child) { 117 if (table->child) {
120 if (table->data) 118 if (table->data)
121 set_fail(&fail, table, "Directory with data?"); 119 set_fail(&fail, table, "Directory with data?");
@@ -144,13 +142,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
144 set_fail(&fail, table, "No maxlen"); 142 set_fail(&fail, table, "No maxlen");
145 } 143 }
146#ifdef CONFIG_PROC_SYSCTL 144#ifdef CONFIG_PROC_SYSCTL
147 if (table->procname && !table->proc_handler) 145 if (!table->proc_handler)
148 set_fail(&fail, table, "No proc_handler"); 146 set_fail(&fail, table, "No proc_handler");
149#endif 147#endif
150#if 0
151 if (!table->procname && table->proc_handler)
152 set_fail(&fail, table, "proc_handler without procname");
153#endif
154 sysctl_check_leaf(namespaces, table, &fail); 148 sysctl_check_leaf(namespaces, table, &fail);
155 } 149 }
156 if (table->mode > 0777) 150 if (table->mode > 0777)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 3971c6b9d58d..9ffea360a778 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -685,7 +685,7 @@ static int __init taskstats_init(void)
685 goto err_cgroup_ops; 685 goto err_cgroup_ops;
686 686
687 family_registered = 1; 687 family_registered = 1;
688 printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); 688 pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
689 return 0; 689 return 0;
690err_cgroup_ops: 690err_cgroup_ops:
691 genl_unregister_ops(&family, &taskstats_ops); 691 genl_unregister_ops(&family, &taskstats_ops);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cbafed7d4f38..7aa40f8e182d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q)
703 * 703 *
704 **/ 704 **/
705static void blk_add_trace_rq(struct request_queue *q, struct request *rq, 705static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
706 u32 what) 706 u32 what)
707{ 707{
708 struct blk_trace *bt = q->blk_trace; 708 struct blk_trace *bt = q->blk_trace;
709 int rw = rq->cmd_flags & 0x03;
710 709
711 if (likely(!bt)) 710 if (likely(!bt))
712 return; 711 return;
713 712
714 if (rq->cmd_flags & REQ_DISCARD)
715 rw |= REQ_DISCARD;
716
717 if (rq->cmd_flags & REQ_SECURE)
718 rw |= REQ_SECURE;
719
720 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 713 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
721 what |= BLK_TC_ACT(BLK_TC_PC); 714 what |= BLK_TC_ACT(BLK_TC_PC);
722 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw, 715 __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
723 what, rq->errors, rq->cmd_len, rq->cmd); 716 what, rq->errors, rq->cmd_len, rq->cmd);
724 } else { 717 } else {
725 what |= BLK_TC_ACT(BLK_TC_FS); 718 what |= BLK_TC_ACT(BLK_TC_FS);
726 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw, 719 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
727 what, rq->errors, 0, NULL); 720 rq->cmd_flags, what, rq->errors, 0, NULL);
728 } 721 }
729} 722}
730 723
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 888b611897d3..c075f4ea6b94 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1467,7 +1467,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1467 return t_hash_next(m, pos); 1467 return t_hash_next(m, pos);
1468 1468
1469 (*pos)++; 1469 (*pos)++;
1470 iter->pos = *pos; 1470 iter->pos = iter->func_pos = *pos;
1471 1471
1472 if (iter->flags & FTRACE_ITER_PRINTALL) 1472 if (iter->flags & FTRACE_ITER_PRINTALL)
1473 return t_hash_start(m, pos); 1473 return t_hash_start(m, pos);
@@ -1502,7 +1502,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1502 if (!rec) 1502 if (!rec)
1503 return t_hash_start(m, pos); 1503 return t_hash_start(m, pos);
1504 1504
1505 iter->func_pos = *pos;
1506 iter->func = rec; 1505 iter->func = rec;
1507 1506
1508 return iter; 1507 return iter;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 419209893d87..51c6e89e8619 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
189 struct group_info *group_info; 189 struct group_info *group_info;
190 int retval; 190 int retval;
191 191
192 if (!capable(CAP_SETGID)) 192 if (!nsown_capable(CAP_SETGID))
193 return -EPERM; 193 return -EPERM;
194 if ((unsigned)gidsetsize > NGROUPS_MAX) 194 if ((unsigned)gidsetsize > NGROUPS_MAX)
195 return -EINVAL; 195 return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 5c598ca781df..9e03e9c1df8d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -17,9 +17,13 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/user_namespace.h> 18#include <linux/user_namespace.h>
19 19
20/*
21 * userns count is 1 for root user, 1 for init_uts_ns,
22 * and 1 for... ?
23 */
20struct user_namespace init_user_ns = { 24struct user_namespace init_user_ns = {
21 .kref = { 25 .kref = {
22 .refcount = ATOMIC_INIT(2), 26 .refcount = ATOMIC_INIT(3),
23 }, 27 },
24 .creator = &root_user, 28 .creator = &root_user,
25}; 29};
@@ -47,7 +51,7 @@ static struct kmem_cache *uid_cachep;
47 */ 51 */
48static DEFINE_SPINLOCK(uidhash_lock); 52static DEFINE_SPINLOCK(uidhash_lock);
49 53
50/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ 54/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */
51struct user_struct root_user = { 55struct user_struct root_user = {
52 .__count = ATOMIC_INIT(2), 56 .__count = ATOMIC_INIT(2),
53 .processes = ATOMIC_INIT(1), 57 .processes = ATOMIC_INIT(1),
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 8a82b4b8ea52..44646179eaba 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -14,6 +14,7 @@
14#include <linux/utsname.h> 14#include <linux/utsname.h>
15#include <linux/err.h> 15#include <linux/err.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/user_namespace.h>
17 18
18static struct uts_namespace *create_uts_ns(void) 19static struct uts_namespace *create_uts_ns(void)
19{ 20{
@@ -30,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void)
30 * @old_ns: namespace to clone 31 * @old_ns: namespace to clone
31 * Return NULL on error (failure to kmalloc), new ns otherwise 32 * Return NULL on error (failure to kmalloc), new ns otherwise
32 */ 33 */
33static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) 34static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
35 struct uts_namespace *old_ns)
34{ 36{
35 struct uts_namespace *ns; 37 struct uts_namespace *ns;
36 38
@@ -40,6 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
40 42
41 down_read(&uts_sem); 43 down_read(&uts_sem);
42 memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); 44 memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
45 ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
43 up_read(&uts_sem); 46 up_read(&uts_sem);
44 return ns; 47 return ns;
45} 48}
@@ -50,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
50 * utsname of this process won't be seen by parent, and vice 53 * utsname of this process won't be seen by parent, and vice
51 * versa. 54 * versa.
52 */ 55 */
53struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) 56struct uts_namespace *copy_utsname(unsigned long flags,
57 struct task_struct *tsk)
54{ 58{
59 struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
55 struct uts_namespace *new_ns; 60 struct uts_namespace *new_ns;
56 61
57 BUG_ON(!old_ns); 62 BUG_ON(!old_ns);
@@ -60,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol
60 if (!(flags & CLONE_NEWUTS)) 65 if (!(flags & CLONE_NEWUTS))
61 return old_ns; 66 return old_ns;
62 67
63 new_ns = clone_uts_ns(old_ns); 68 new_ns = clone_uts_ns(tsk, old_ns);
64 69
65 put_uts_ns(old_ns); 70 put_uts_ns(old_ns);
66 return new_ns; 71 return new_ns;
@@ -71,5 +76,6 @@ void free_uts_ns(struct kref *kref)
71 struct uts_namespace *ns; 76 struct uts_namespace *ns;
72 77
73 ns = container_of(kref, struct uts_namespace, kref); 78 ns = container_of(kref, struct uts_namespace, kref);
79 put_user_ns(ns->user_ns);
74 kfree(ns); 80 kfree(ns);
75} 81}