aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sys.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sys.c')
-rw-r--r--kernel/sys.c373
1 files changed, 188 insertions, 185 deletions
diff --git a/kernel/sys.c b/kernel/sys.c
index e6e0ece5f6a0..0da73cf73e60 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -47,6 +47,7 @@
47#include <linux/syscalls.h> 47#include <linux/syscalls.h>
48#include <linux/kprobes.h> 48#include <linux/kprobes.h>
49#include <linux/user_namespace.h> 49#include <linux/user_namespace.h>
50#include <linux/binfmts.h>
50 51
51#include <linux/kmsg_dump.h> 52#include <linux/kmsg_dump.h>
52/* Move somewhere else to avoid recompiling? */ 53/* Move somewhere else to avoid recompiling? */
@@ -323,7 +324,6 @@ void kernel_restart_prepare(char *cmd)
323 system_state = SYSTEM_RESTART; 324 system_state = SYSTEM_RESTART;
324 usermodehelper_disable(); 325 usermodehelper_disable();
325 device_shutdown(); 326 device_shutdown();
326 syscore_shutdown();
327} 327}
328 328
329/** 329/**
@@ -369,6 +369,7 @@ void kernel_restart(char *cmd)
369{ 369{
370 kernel_restart_prepare(cmd); 370 kernel_restart_prepare(cmd);
371 disable_nonboot_cpus(); 371 disable_nonboot_cpus();
372 syscore_shutdown();
372 if (!cmd) 373 if (!cmd)
373 printk(KERN_EMERG "Restarting system.\n"); 374 printk(KERN_EMERG "Restarting system.\n");
374 else 375 else
@@ -394,6 +395,7 @@ static void kernel_shutdown_prepare(enum system_states state)
394void kernel_halt(void) 395void kernel_halt(void)
395{ 396{
396 kernel_shutdown_prepare(SYSTEM_HALT); 397 kernel_shutdown_prepare(SYSTEM_HALT);
398 disable_nonboot_cpus();
397 syscore_shutdown(); 399 syscore_shutdown();
398 printk(KERN_EMERG "System halted.\n"); 400 printk(KERN_EMERG "System halted.\n");
399 kmsg_dump(KMSG_DUMP_HALT); 401 kmsg_dump(KMSG_DUMP_HALT);
@@ -433,11 +435,12 @@ static DEFINE_MUTEX(reboot_mutex);
433SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, 435SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
434 void __user *, arg) 436 void __user *, arg)
435{ 437{
438 struct pid_namespace *pid_ns = task_active_pid_ns(current);
436 char buffer[256]; 439 char buffer[256];
437 int ret = 0; 440 int ret = 0;
438 441
439 /* We only trust the superuser with rebooting the system. */ 442 /* We only trust the superuser with rebooting the system. */
440 if (!capable(CAP_SYS_BOOT)) 443 if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT))
441 return -EPERM; 444 return -EPERM;
442 445
443 /* For safety, we require "magic" arguments. */ 446 /* For safety, we require "magic" arguments. */
@@ -453,7 +456,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
453 * pid_namespace, the command is handled by reboot_pid_ns() which will 456 * pid_namespace, the command is handled by reboot_pid_ns() which will
454 * call do_exit(). 457 * call do_exit().
455 */ 458 */
456 ret = reboot_pid_ns(task_active_pid_ns(current), cmd); 459 ret = reboot_pid_ns(pid_ns, cmd);
457 if (ret) 460 if (ret)
458 return ret; 461 return ret;
459 462
@@ -1046,7 +1049,7 @@ void do_sys_times(struct tms *tms)
1046 cputime_t tgutime, tgstime, cutime, cstime; 1049 cputime_t tgutime, tgstime, cutime, cstime;
1047 1050
1048 spin_lock_irq(&current->sighand->siglock); 1051 spin_lock_irq(&current->sighand->siglock);
1049 thread_group_times(current, &tgutime, &tgstime); 1052 thread_group_cputime_adjusted(current, &tgutime, &tgstime);
1050 cutime = current->signal->cutime; 1053 cutime = current->signal->cutime;
1051 cstime = current->signal->cstime; 1054 cstime = current->signal->cstime;
1052 spin_unlock_irq(&current->sighand->siglock); 1055 spin_unlock_irq(&current->sighand->siglock);
@@ -1704,7 +1707,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1704 utime = stime = 0; 1707 utime = stime = 0;
1705 1708
1706 if (who == RUSAGE_THREAD) { 1709 if (who == RUSAGE_THREAD) {
1707 task_times(current, &utime, &stime); 1710 task_cputime_adjusted(current, &utime, &stime);
1708 accumulate_thread_rusage(p, r); 1711 accumulate_thread_rusage(p, r);
1709 maxrss = p->signal->maxrss; 1712 maxrss = p->signal->maxrss;
1710 goto out; 1713 goto out;
@@ -1730,7 +1733,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
1730 break; 1733 break;
1731 1734
1732 case RUSAGE_SELF: 1735 case RUSAGE_SELF:
1733 thread_group_times(p, &tgutime, &tgstime); 1736 thread_group_cputime_adjusted(p, &tgutime, &tgstime);
1734 utime += tgutime; 1737 utime += tgutime;
1735 stime += tgstime; 1738 stime += tgstime;
1736 r->ru_nvcsw += p->signal->nvcsw; 1739 r->ru_nvcsw += p->signal->nvcsw;
@@ -1792,14 +1795,14 @@ SYSCALL_DEFINE1(umask, int, mask)
1792static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1795static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1793{ 1796{
1794 struct fd exe; 1797 struct fd exe;
1795 struct dentry *dentry; 1798 struct inode *inode;
1796 int err; 1799 int err;
1797 1800
1798 exe = fdget(fd); 1801 exe = fdget(fd);
1799 if (!exe.file) 1802 if (!exe.file)
1800 return -EBADF; 1803 return -EBADF;
1801 1804
1802 dentry = exe.file->f_path.dentry; 1805 inode = file_inode(exe.file);
1803 1806
1804 /* 1807 /*
1805 * Because the original mm->exe_file points to executable file, make 1808 * Because the original mm->exe_file points to executable file, make
@@ -1807,11 +1810,11 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1807 * overall picture. 1810 * overall picture.
1808 */ 1811 */
1809 err = -EACCES; 1812 err = -EACCES;
1810 if (!S_ISREG(dentry->d_inode->i_mode) || 1813 if (!S_ISREG(inode->i_mode) ||
1811 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1814 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC)
1812 goto exit; 1815 goto exit;
1813 1816
1814 err = inode_permission(dentry->d_inode, MAY_EXEC); 1817 err = inode_permission(inode, MAY_EXEC);
1815 if (err) 1818 if (err)
1816 goto exit; 1819 goto exit;
1817 1820
@@ -2012,160 +2015,159 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2012 2015
2013 error = 0; 2016 error = 0;
2014 switch (option) { 2017 switch (option) {
2015 case PR_SET_PDEATHSIG: 2018 case PR_SET_PDEATHSIG:
2016 if (!valid_signal(arg2)) { 2019 if (!valid_signal(arg2)) {
2017 error = -EINVAL; 2020 error = -EINVAL;
2018 break;
2019 }
2020 me->pdeath_signal = arg2;
2021 break;
2022 case PR_GET_PDEATHSIG:
2023 error = put_user(me->pdeath_signal, (int __user *)arg2);
2024 break;
2025 case PR_GET_DUMPABLE:
2026 error = get_dumpable(me->mm);
2027 break; 2021 break;
2028 case PR_SET_DUMPABLE: 2022 }
2029 if (arg2 < 0 || arg2 > 1) { 2023 me->pdeath_signal = arg2;
2030 error = -EINVAL; 2024 break;
2031 break; 2025 case PR_GET_PDEATHSIG:
2032 } 2026 error = put_user(me->pdeath_signal, (int __user *)arg2);
2033 set_dumpable(me->mm, arg2); 2027 break;
2028 case PR_GET_DUMPABLE:
2029 error = get_dumpable(me->mm);
2030 break;
2031 case PR_SET_DUMPABLE:
2032 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) {
2033 error = -EINVAL;
2034 break; 2034 break;
2035 }
2036 set_dumpable(me->mm, arg2);
2037 break;
2035 2038
2036 case PR_SET_UNALIGN: 2039 case PR_SET_UNALIGN:
2037 error = SET_UNALIGN_CTL(me, arg2); 2040 error = SET_UNALIGN_CTL(me, arg2);
2038 break; 2041 break;
2039 case PR_GET_UNALIGN: 2042 case PR_GET_UNALIGN:
2040 error = GET_UNALIGN_CTL(me, arg2); 2043 error = GET_UNALIGN_CTL(me, arg2);
2041 break; 2044 break;
2042 case PR_SET_FPEMU: 2045 case PR_SET_FPEMU:
2043 error = SET_FPEMU_CTL(me, arg2); 2046 error = SET_FPEMU_CTL(me, arg2);
2044 break; 2047 break;
2045 case PR_GET_FPEMU: 2048 case PR_GET_FPEMU:
2046 error = GET_FPEMU_CTL(me, arg2); 2049 error = GET_FPEMU_CTL(me, arg2);
2047 break; 2050 break;
2048 case PR_SET_FPEXC: 2051 case PR_SET_FPEXC:
2049 error = SET_FPEXC_CTL(me, arg2); 2052 error = SET_FPEXC_CTL(me, arg2);
2050 break; 2053 break;
2051 case PR_GET_FPEXC: 2054 case PR_GET_FPEXC:
2052 error = GET_FPEXC_CTL(me, arg2); 2055 error = GET_FPEXC_CTL(me, arg2);
2053 break; 2056 break;
2054 case PR_GET_TIMING: 2057 case PR_GET_TIMING:
2055 error = PR_TIMING_STATISTICAL; 2058 error = PR_TIMING_STATISTICAL;
2056 break; 2059 break;
2057 case PR_SET_TIMING: 2060 case PR_SET_TIMING:
2058 if (arg2 != PR_TIMING_STATISTICAL) 2061 if (arg2 != PR_TIMING_STATISTICAL)
2059 error = -EINVAL; 2062 error = -EINVAL;
2060 break; 2063 break;
2061 case PR_SET_NAME: 2064 case PR_SET_NAME:
2062 comm[sizeof(me->comm)-1] = 0; 2065 comm[sizeof(me->comm) - 1] = 0;
2063 if (strncpy_from_user(comm, (char __user *)arg2, 2066 if (strncpy_from_user(comm, (char __user *)arg2,
2064 sizeof(me->comm) - 1) < 0) 2067 sizeof(me->comm) - 1) < 0)
2065 return -EFAULT; 2068 return -EFAULT;
2066 set_task_comm(me, comm); 2069 set_task_comm(me, comm);
2067 proc_comm_connector(me); 2070 proc_comm_connector(me);
2068 break; 2071 break;
2069 case PR_GET_NAME: 2072 case PR_GET_NAME:
2070 get_task_comm(comm, me); 2073 get_task_comm(comm, me);
2071 if (copy_to_user((char __user *)arg2, comm, 2074 if (copy_to_user((char __user *)arg2, comm, sizeof(comm)))
2072 sizeof(comm))) 2075 return -EFAULT;
2073 return -EFAULT; 2076 break;
2074 break; 2077 case PR_GET_ENDIAN:
2075 case PR_GET_ENDIAN: 2078 error = GET_ENDIAN(me, arg2);
2076 error = GET_ENDIAN(me, arg2); 2079 break;
2077 break; 2080 case PR_SET_ENDIAN:
2078 case PR_SET_ENDIAN: 2081 error = SET_ENDIAN(me, arg2);
2079 error = SET_ENDIAN(me, arg2); 2082 break;
2080 break; 2083 case PR_GET_SECCOMP:
2081 case PR_GET_SECCOMP: 2084 error = prctl_get_seccomp();
2082 error = prctl_get_seccomp(); 2085 break;
2083 break; 2086 case PR_SET_SECCOMP:
2084 case PR_SET_SECCOMP: 2087 error = prctl_set_seccomp(arg2, (char __user *)arg3);
2085 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2088 break;
2086 break; 2089 case PR_GET_TSC:
2087 case PR_GET_TSC: 2090 error = GET_TSC_CTL(arg2);
2088 error = GET_TSC_CTL(arg2); 2091 break;
2089 break; 2092 case PR_SET_TSC:
2090 case PR_SET_TSC: 2093 error = SET_TSC_CTL(arg2);
2091 error = SET_TSC_CTL(arg2); 2094 break;
2092 break; 2095 case PR_TASK_PERF_EVENTS_DISABLE:
2093 case PR_TASK_PERF_EVENTS_DISABLE: 2096 error = perf_event_task_disable();
2094 error = perf_event_task_disable(); 2097 break;
2095 break; 2098 case PR_TASK_PERF_EVENTS_ENABLE:
2096 case PR_TASK_PERF_EVENTS_ENABLE: 2099 error = perf_event_task_enable();
2097 error = perf_event_task_enable(); 2100 break;
2098 break; 2101 case PR_GET_TIMERSLACK:
2099 case PR_GET_TIMERSLACK: 2102 error = current->timer_slack_ns;
2100 error = current->timer_slack_ns; 2103 break;
2101 break; 2104 case PR_SET_TIMERSLACK:
2102 case PR_SET_TIMERSLACK: 2105 if (arg2 <= 0)
2103 if (arg2 <= 0) 2106 current->timer_slack_ns =
2104 current->timer_slack_ns =
2105 current->default_timer_slack_ns; 2107 current->default_timer_slack_ns;
2106 else 2108 else
2107 current->timer_slack_ns = arg2; 2109 current->timer_slack_ns = arg2;
2108 break; 2110 break;
2109 case PR_MCE_KILL: 2111 case PR_MCE_KILL:
2110 if (arg4 | arg5) 2112 if (arg4 | arg5)
2111 return -EINVAL; 2113 return -EINVAL;
2112 switch (arg2) { 2114 switch (arg2) {
2113 case PR_MCE_KILL_CLEAR: 2115 case PR_MCE_KILL_CLEAR:
2114 if (arg3 != 0) 2116 if (arg3 != 0)
2115 return -EINVAL;
2116 current->flags &= ~PF_MCE_PROCESS;
2117 break;
2118 case PR_MCE_KILL_SET:
2119 current->flags |= PF_MCE_PROCESS;
2120 if (arg3 == PR_MCE_KILL_EARLY)
2121 current->flags |= PF_MCE_EARLY;
2122 else if (arg3 == PR_MCE_KILL_LATE)
2123 current->flags &= ~PF_MCE_EARLY;
2124 else if (arg3 == PR_MCE_KILL_DEFAULT)
2125 current->flags &=
2126 ~(PF_MCE_EARLY|PF_MCE_PROCESS);
2127 else
2128 return -EINVAL;
2129 break;
2130 default:
2131 return -EINVAL; 2117 return -EINVAL;
2132 } 2118 current->flags &= ~PF_MCE_PROCESS;
2133 break; 2119 break;
2134 case PR_MCE_KILL_GET: 2120 case PR_MCE_KILL_SET:
2135 if (arg2 | arg3 | arg4 | arg5) 2121 current->flags |= PF_MCE_PROCESS;
2136 return -EINVAL; 2122 if (arg3 == PR_MCE_KILL_EARLY)
2137 if (current->flags & PF_MCE_PROCESS) 2123 current->flags |= PF_MCE_EARLY;
2138 error = (current->flags & PF_MCE_EARLY) ? 2124 else if (arg3 == PR_MCE_KILL_LATE)
2139 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2125 current->flags &= ~PF_MCE_EARLY;
2126 else if (arg3 == PR_MCE_KILL_DEFAULT)
2127 current->flags &=
2128 ~(PF_MCE_EARLY|PF_MCE_PROCESS);
2140 else 2129 else
2141 error = PR_MCE_KILL_DEFAULT;
2142 break;
2143 case PR_SET_MM:
2144 error = prctl_set_mm(arg2, arg3, arg4, arg5);
2145 break;
2146 case PR_GET_TID_ADDRESS:
2147 error = prctl_get_tid_address(me, (int __user **)arg2);
2148 break;
2149 case PR_SET_CHILD_SUBREAPER:
2150 me->signal->is_child_subreaper = !!arg2;
2151 break;
2152 case PR_GET_CHILD_SUBREAPER:
2153 error = put_user(me->signal->is_child_subreaper,
2154 (int __user *) arg2);
2155 break;
2156 case PR_SET_NO_NEW_PRIVS:
2157 if (arg2 != 1 || arg3 || arg4 || arg5)
2158 return -EINVAL; 2130 return -EINVAL;
2159
2160 current->no_new_privs = 1;
2161 break; 2131 break;
2162 case PR_GET_NO_NEW_PRIVS:
2163 if (arg2 || arg3 || arg4 || arg5)
2164 return -EINVAL;
2165 return current->no_new_privs ? 1 : 0;
2166 default: 2132 default:
2167 error = -EINVAL; 2133 return -EINVAL;
2168 break; 2134 }
2135 break;
2136 case PR_MCE_KILL_GET:
2137 if (arg2 | arg3 | arg4 | arg5)
2138 return -EINVAL;
2139 if (current->flags & PF_MCE_PROCESS)
2140 error = (current->flags & PF_MCE_EARLY) ?
2141 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
2142 else
2143 error = PR_MCE_KILL_DEFAULT;
2144 break;
2145 case PR_SET_MM:
2146 error = prctl_set_mm(arg2, arg3, arg4, arg5);
2147 break;
2148 case PR_GET_TID_ADDRESS:
2149 error = prctl_get_tid_address(me, (int __user **)arg2);
2150 break;
2151 case PR_SET_CHILD_SUBREAPER:
2152 me->signal->is_child_subreaper = !!arg2;
2153 break;
2154 case PR_GET_CHILD_SUBREAPER:
2155 error = put_user(me->signal->is_child_subreaper,
2156 (int __user *)arg2);
2157 break;
2158 case PR_SET_NO_NEW_PRIVS:
2159 if (arg2 != 1 || arg3 || arg4 || arg5)
2160 return -EINVAL;
2161
2162 current->no_new_privs = 1;
2163 break;
2164 case PR_GET_NO_NEW_PRIVS:
2165 if (arg2 || arg3 || arg4 || arg5)
2166 return -EINVAL;
2167 return current->no_new_privs ? 1 : 0;
2168 default:
2169 error = -EINVAL;
2170 break;
2169 } 2171 }
2170 return error; 2172 return error;
2171} 2173}
@@ -2184,14 +2186,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
2184 2186
2185char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 2187char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
2186 2188
2187static void argv_cleanup(struct subprocess_info *info) 2189static int __orderly_poweroff(bool force)
2188{ 2190{
2189 argv_free(info->argv);
2190}
2191
2192static int __orderly_poweroff(void)
2193{
2194 int argc;
2195 char **argv; 2191 char **argv;
2196 static char *envp[] = { 2192 static char *envp[] = {
2197 "HOME=/", 2193 "HOME=/",
@@ -2200,21 +2196,40 @@ static int __orderly_poweroff(void)
2200 }; 2196 };
2201 int ret; 2197 int ret;
2202 2198
2203 argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); 2199 argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
2204 if (argv == NULL) { 2200 if (argv) {
2201 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
2202 argv_free(argv);
2203 } else {
2205 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2204 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
2206 __func__, poweroff_cmd); 2205 __func__, poweroff_cmd);
2207 return -ENOMEM; 2206 ret = -ENOMEM;
2208 } 2207 }
2209 2208
2210 ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, 2209 if (ret && force) {
2211 NULL, argv_cleanup, NULL); 2210 printk(KERN_WARNING "Failed to start orderly shutdown: "
2212 if (ret == -ENOMEM) 2211 "forcing the issue\n");
2213 argv_free(argv); 2212 /*
2213 * I guess this should try to kick off some daemon to sync and
2214 * poweroff asap. Or not even bother syncing if we're doing an
2215 * emergency shutdown?
2216 */
2217 emergency_sync();
2218 kernel_power_off();
2219 }
2214 2220
2215 return ret; 2221 return ret;
2216} 2222}
2217 2223
2224static bool poweroff_force;
2225
2226static void poweroff_work_func(struct work_struct *work)
2227{
2228 __orderly_poweroff(poweroff_force);
2229}
2230
2231static DECLARE_WORK(poweroff_work, poweroff_work_func);
2232
2218/** 2233/**
2219 * orderly_poweroff - Trigger an orderly system poweroff 2234 * orderly_poweroff - Trigger an orderly system poweroff
2220 * @force: force poweroff if command execution fails 2235 * @force: force poweroff if command execution fails
@@ -2224,21 +2239,9 @@ static int __orderly_poweroff(void)
2224 */ 2239 */
2225int orderly_poweroff(bool force) 2240int orderly_poweroff(bool force)
2226{ 2241{
2227 int ret = __orderly_poweroff(); 2242 if (force) /* do not override the pending "true" */
2228 2243 poweroff_force = true;
2229 if (ret && force) { 2244 schedule_work(&poweroff_work);
2230 printk(KERN_WARNING "Failed to start orderly shutdown: " 2245 return 0;
2231 "forcing the issue\n");
2232
2233 /*
2234 * I guess this should try to kick off some daemon to sync and
2235 * poweroff asap. Or not even bother syncing if we're doing an
2236 * emergency shutdown?
2237 */
2238 emergency_sync();
2239 kernel_power_off();
2240 }
2241
2242 return ret;
2243} 2246}
2244EXPORT_SYMBOL_GPL(orderly_poweroff); 2247EXPORT_SYMBOL_GPL(orderly_poweroff);