aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 15:22:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 15:22:13 -0500
commit9977d9b379cb77e0f67bd6f4563618106e58e11d (patch)
tree0191accfddf578edb52c69c933d64521e3dce297 /kernel
parentcf4af01221579a4e895f43dbfc47598fbfc5a731 (diff)
parent541880d9a2c7871f6370071d55aa6662d329c51e (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal
Pull big execve/kernel_thread/fork unification series from Al Viro: "All architectures are converted to new model. Quite a bit of that stuff is actually shared with architecture trees; in such cases it's literally shared branch pulled by both, not a cherry-pick. A lot of ugliness and black magic is gone (-3KLoC total in this one): - kernel_thread()/kernel_execve()/sys_execve() redesign. We don't do syscalls from kernel anymore for either kernel_thread() or kernel_execve(): kernel_thread() is essentially clone(2) with callback run before we return to userland, the callbacks either never return or do successful do_execve() before returning. kernel_execve() is a wrapper for do_execve() - it doesn't need to do transition to user mode anymore. As a result kernel_thread() and kernel_execve() are arch-independent now - they live in kernel/fork.c and fs/exec.c resp. sys_execve() is also in fs/exec.c and it's completely architecture-independent. - daemonize() is gone, along with its parts in fs/*.c - struct pt_regs * is no longer passed to do_fork/copy_process/ copy_thread/do_execve/search_binary_handler/->load_binary/do_coredump. - sys_fork()/sys_vfork()/sys_clone() unified; some architectures still need wrappers (ones with callee-saved registers not saved in pt_regs on syscall entry), but the main part of those suckers is in kernel/fork.c now." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal: (113 commits) do_coredump(): get rid of pt_regs argument print_fatal_signal(): get rid of pt_regs argument ptrace_signal(): get rid of unused arguments get rid of ptrace_signal_deliver() arguments new helper: signal_pt_regs() unify default ptrace_signal_deliver flagday: kill pt_regs argument of do_fork() death to idle_regs() don't pass regs to copy_process() flagday: don't pass regs to copy_thread() bfin: switch to generic vfork, get rid of pointless wrappers xtensa: switch to generic clone() openrisc: switch to use of generic fork and clone unicore32: switch to generic clone(2) score: switch to generic fork/vfork/clone c6x: sanitize copy_thread(), get rid of clone(2) wrapper, switch to generic clone() take sys_fork/sys_vfork/sys_clone prototypes to linux/syscalls.h mn10300: switch to generic fork/vfork/clone h8300: switch to generic fork/vfork/clone tile: switch to generic clone() ... Conflicts: arch/microblaze/include/asm/Kbuild
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c102
-rw-r--r--kernel/exit.c92
-rw-r--r--kernel/fork.c64
-rw-r--r--kernel/signal.c15
4 files changed, 76 insertions, 197 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fc7376bf86ea..e37e6a12c5e3 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -200,7 +200,6 @@ struct audit_context {
200 struct list_head names_list; /* anchor for struct audit_names->list */ 200 struct list_head names_list; /* anchor for struct audit_names->list */
201 char * filterkey; /* key for rule that triggered record */ 201 char * filterkey; /* key for rule that triggered record */
202 struct path pwd; 202 struct path pwd;
203 struct audit_context *previous; /* For nested syscalls */
204 struct audit_aux_data *aux; 203 struct audit_aux_data *aux;
205 struct audit_aux_data *aux_pids; 204 struct audit_aux_data *aux_pids;
206 struct sockaddr_storage *sockaddr; 205 struct sockaddr_storage *sockaddr;
@@ -1091,29 +1090,13 @@ int audit_alloc(struct task_struct *tsk)
1091 1090
1092static inline void audit_free_context(struct audit_context *context) 1091static inline void audit_free_context(struct audit_context *context)
1093{ 1092{
1094 struct audit_context *previous; 1093 audit_free_names(context);
1095 int count = 0; 1094 unroll_tree_refs(context, NULL, 0);
1096 1095 free_tree_refs(context);
1097 do { 1096 audit_free_aux(context);
1098 previous = context->previous; 1097 kfree(context->filterkey);
1099 if (previous || (count && count < 10)) { 1098 kfree(context->sockaddr);
1100 ++count; 1099 kfree(context);
1101 printk(KERN_ERR "audit(:%d): major=%d name_count=%d:"
1102 " freeing multiple contexts (%d)\n",
1103 context->serial, context->major,
1104 context->name_count, count);
1105 }
1106 audit_free_names(context);
1107 unroll_tree_refs(context, NULL, 0);
1108 free_tree_refs(context);
1109 audit_free_aux(context);
1110 kfree(context->filterkey);
1111 kfree(context->sockaddr);
1112 kfree(context);
1113 context = previous;
1114 } while (context);
1115 if (count >= 10)
1116 printk(KERN_ERR "audit: freed %d contexts\n", count);
1117} 1100}
1118 1101
1119void audit_log_task_context(struct audit_buffer *ab) 1102void audit_log_task_context(struct audit_buffer *ab)
@@ -1783,42 +1766,6 @@ void __audit_syscall_entry(int arch, int major,
1783 if (!context) 1766 if (!context)
1784 return; 1767 return;
1785 1768
1786 /*
1787 * This happens only on certain architectures that make system
1788 * calls in kernel_thread via the entry.S interface, instead of
1789 * with direct calls. (If you are porting to a new
1790 * architecture, hitting this condition can indicate that you
1791 * got the _exit/_leave calls backward in entry.S.)
1792 *
1793 * i386 no
1794 * x86_64 no
1795 * ppc64 yes (see arch/powerpc/platforms/iseries/misc.S)
1796 *
1797 * This also happens with vm86 emulation in a non-nested manner
1798 * (entries without exits), so this case must be caught.
1799 */
1800 if (context->in_syscall) {
1801 struct audit_context *newctx;
1802
1803#if AUDIT_DEBUG
1804 printk(KERN_ERR
1805 "audit(:%d) pid=%d in syscall=%d;"
1806 " entering syscall=%d\n",
1807 context->serial, tsk->pid, context->major, major);
1808#endif
1809 newctx = audit_alloc_context(context->state);
1810 if (newctx) {
1811 newctx->previous = context;
1812 context = newctx;
1813 tsk->audit_context = newctx;
1814 } else {
1815 /* If we can't alloc a new context, the best we
1816 * can do is to leak memory (any pending putname
1817 * will be lost). The only other alternative is
1818 * to abandon auditing. */
1819 audit_zero_context(context, context->state);
1820 }
1821 }
1822 BUG_ON(context->in_syscall || context->name_count); 1769 BUG_ON(context->in_syscall || context->name_count);
1823 1770
1824 if (!audit_enabled) 1771 if (!audit_enabled)
@@ -1881,28 +1828,21 @@ void __audit_syscall_exit(int success, long return_code)
1881 if (!list_empty(&context->killed_trees)) 1828 if (!list_empty(&context->killed_trees))
1882 audit_kill_trees(&context->killed_trees); 1829 audit_kill_trees(&context->killed_trees);
1883 1830
1884 if (context->previous) { 1831 audit_free_names(context);
1885 struct audit_context *new_context = context->previous; 1832 unroll_tree_refs(context, NULL, 0);
1886 context->previous = NULL; 1833 audit_free_aux(context);
1887 audit_free_context(context); 1834 context->aux = NULL;
1888 tsk->audit_context = new_context; 1835 context->aux_pids = NULL;
1889 } else { 1836 context->target_pid = 0;
1890 audit_free_names(context); 1837 context->target_sid = 0;
1891 unroll_tree_refs(context, NULL, 0); 1838 context->sockaddr_len = 0;
1892 audit_free_aux(context); 1839 context->type = 0;
1893 context->aux = NULL; 1840 context->fds[0] = -1;
1894 context->aux_pids = NULL; 1841 if (context->state != AUDIT_RECORD_CONTEXT) {
1895 context->target_pid = 0; 1842 kfree(context->filterkey);
1896 context->target_sid = 0; 1843 context->filterkey = NULL;
1897 context->sockaddr_len = 0;
1898 context->type = 0;
1899 context->fds[0] = -1;
1900 if (context->state != AUDIT_RECORD_CONTEXT) {
1901 kfree(context->filterkey);
1902 context->filterkey = NULL;
1903 }
1904 tsk->audit_context = context;
1905 } 1844 }
1845 tsk->audit_context = context;
1906} 1846}
1907 1847
1908static inline void handle_one(const struct inode *inode) 1848static inline void handle_one(const struct inode *inode)
diff --git a/kernel/exit.c b/kernel/exit.c
index 618f7ee56003..50d2e93c36ea 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -322,43 +322,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
322 } 322 }
323} 323}
324 324
325/**
326 * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
327 *
328 * If a kernel thread is launched as a result of a system call, or if
329 * it ever exits, it should generally reparent itself to kthreadd so it
330 * isn't in the way of other processes and is correctly cleaned up on exit.
331 *
332 * The various task state such as scheduling policy and priority may have
333 * been inherited from a user process, so we reset them to sane values here.
334 *
335 * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
336 */
337static void reparent_to_kthreadd(void)
338{
339 write_lock_irq(&tasklist_lock);
340
341 ptrace_unlink(current);
342 /* Reparent to init */
343 current->real_parent = current->parent = kthreadd_task;
344 list_move_tail(&current->sibling, &current->real_parent->children);
345
346 /* Set the exit signal to SIGCHLD so we signal init on exit */
347 current->exit_signal = SIGCHLD;
348
349 if (task_nice(current) < 0)
350 set_user_nice(current, 0);
351 /* cpus_allowed? */
352 /* rt_priority? */
353 /* signals? */
354 memcpy(current->signal->rlim, init_task.signal->rlim,
355 sizeof(current->signal->rlim));
356
357 atomic_inc(&init_cred.usage);
358 commit_creds(&init_cred);
359 write_unlock_irq(&tasklist_lock);
360}
361
362void __set_special_pids(struct pid *pid) 325void __set_special_pids(struct pid *pid)
363{ 326{
364 struct task_struct *curr = current->group_leader; 327 struct task_struct *curr = current->group_leader;
@@ -370,13 +333,6 @@ void __set_special_pids(struct pid *pid)
370 change_pid(curr, PIDTYPE_PGID, pid); 333 change_pid(curr, PIDTYPE_PGID, pid);
371} 334}
372 335
373static void set_special_pids(struct pid *pid)
374{
375 write_lock_irq(&tasklist_lock);
376 __set_special_pids(pid);
377 write_unlock_irq(&tasklist_lock);
378}
379
380/* 336/*
381 * Let kernel threads use this to say that they allow a certain signal. 337 * Let kernel threads use this to say that they allow a certain signal.
382 * Must not be used if kthread was cloned with CLONE_SIGHAND. 338 * Must not be used if kthread was cloned with CLONE_SIGHAND.
@@ -416,54 +372,6 @@ int disallow_signal(int sig)
416 372
417EXPORT_SYMBOL(disallow_signal); 373EXPORT_SYMBOL(disallow_signal);
418 374
419/*
420 * Put all the gunge required to become a kernel thread without
421 * attached user resources in one place where it belongs.
422 */
423
424void daemonize(const char *name, ...)
425{
426 va_list args;
427 sigset_t blocked;
428
429 va_start(args, name);
430 vsnprintf(current->comm, sizeof(current->comm), name, args);
431 va_end(args);
432
433 /*
434 * If we were started as result of loading a module, close all of the
435 * user space pages. We don't need them, and if we didn't close them
436 * they would be locked into memory.
437 */
438 exit_mm(current);
439 /*
440 * We don't want to get frozen, in case system-wide hibernation
441 * or suspend transition begins right now.
442 */
443 current->flags |= (PF_NOFREEZE | PF_KTHREAD);
444
445 if (current->nsproxy != &init_nsproxy) {
446 get_nsproxy(&init_nsproxy);
447 switch_task_namespaces(current, &init_nsproxy);
448 }
449 set_special_pids(&init_struct_pid);
450 proc_clear_tty(current);
451
452 /* Block and flush all signals */
453 sigfillset(&blocked);
454 sigprocmask(SIG_BLOCK, &blocked, NULL);
455 flush_signals(current);
456
457 /* Become as one with the init task */
458
459 daemonize_fs_struct();
460 daemonize_descriptors();
461
462 reparent_to_kthreadd();
463}
464
465EXPORT_SYMBOL(daemonize);
466
467#ifdef CONFIG_MM_OWNER 375#ifdef CONFIG_MM_OWNER
468/* 376/*
469 * A task is exiting. If it owned this mm, find a new owner for the mm. 377 * A task is exiting. If it owned this mm, find a new owner for the mm.
diff --git a/kernel/fork.c b/kernel/fork.c
index 79de9f99a48d..3c31e874afad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1129,7 +1129,6 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
1129 */ 1129 */
1130static struct task_struct *copy_process(unsigned long clone_flags, 1130static struct task_struct *copy_process(unsigned long clone_flags,
1131 unsigned long stack_start, 1131 unsigned long stack_start,
1132 struct pt_regs *regs,
1133 unsigned long stack_size, 1132 unsigned long stack_size,
1134 int __user *child_tidptr, 1133 int __user *child_tidptr,
1135 struct pid *pid, 1134 struct pid *pid,
@@ -1321,7 +1320,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1321 retval = copy_io(clone_flags, p); 1320 retval = copy_io(clone_flags, p);
1322 if (retval) 1321 if (retval)
1323 goto bad_fork_cleanup_namespaces; 1322 goto bad_fork_cleanup_namespaces;
1324 retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); 1323 retval = copy_thread(clone_flags, stack_start, stack_size, p);
1325 if (retval) 1324 if (retval)
1326 goto bad_fork_cleanup_io; 1325 goto bad_fork_cleanup_io;
1327 1326
@@ -1510,12 +1509,6 @@ fork_out:
1510 return ERR_PTR(retval); 1509 return ERR_PTR(retval);
1511} 1510}
1512 1511
1513noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
1514{
1515 memset(regs, 0, sizeof(struct pt_regs));
1516 return regs;
1517}
1518
1519static inline void init_idle_pids(struct pid_link *links) 1512static inline void init_idle_pids(struct pid_link *links)
1520{ 1513{
1521 enum pid_type type; 1514 enum pid_type type;
@@ -1529,10 +1522,7 @@ static inline void init_idle_pids(struct pid_link *links)
1529struct task_struct * __cpuinit fork_idle(int cpu) 1522struct task_struct * __cpuinit fork_idle(int cpu)
1530{ 1523{
1531 struct task_struct *task; 1524 struct task_struct *task;
1532 struct pt_regs regs; 1525 task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
1533
1534 task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
1535 &init_struct_pid, 0);
1536 if (!IS_ERR(task)) { 1526 if (!IS_ERR(task)) {
1537 init_idle_pids(task->pids); 1527 init_idle_pids(task->pids);
1538 init_idle(task, cpu); 1528 init_idle(task, cpu);
@@ -1549,7 +1539,6 @@ struct task_struct * __cpuinit fork_idle(int cpu)
1549 */ 1539 */
1550long do_fork(unsigned long clone_flags, 1540long do_fork(unsigned long clone_flags,
1551 unsigned long stack_start, 1541 unsigned long stack_start,
1552 struct pt_regs *regs,
1553 unsigned long stack_size, 1542 unsigned long stack_size,
1554 int __user *parent_tidptr, 1543 int __user *parent_tidptr,
1555 int __user *child_tidptr) 1544 int __user *child_tidptr)
@@ -1579,7 +1568,7 @@ long do_fork(unsigned long clone_flags,
1579 * requested, no event is reported; otherwise, report if the event 1568 * requested, no event is reported; otherwise, report if the event
1580 * for the type of forking is enabled. 1569 * for the type of forking is enabled.
1581 */ 1570 */
1582 if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) { 1571 if (!(clone_flags & CLONE_UNTRACED)) {
1583 if (clone_flags & CLONE_VFORK) 1572 if (clone_flags & CLONE_VFORK)
1584 trace = PTRACE_EVENT_VFORK; 1573 trace = PTRACE_EVENT_VFORK;
1585 else if ((clone_flags & CSIGNAL) != SIGCHLD) 1574 else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1591,7 +1580,7 @@ long do_fork(unsigned long clone_flags,
1591 trace = 0; 1580 trace = 0;
1592 } 1581 }
1593 1582
1594 p = copy_process(clone_flags, stack_start, regs, stack_size, 1583 p = copy_process(clone_flags, stack_start, stack_size,
1595 child_tidptr, NULL, trace); 1584 child_tidptr, NULL, trace);
1596 /* 1585 /*
1597 * Do this prior waking up the new thread - the thread pointer 1586 * Do this prior waking up the new thread - the thread pointer
@@ -1635,11 +1624,54 @@ long do_fork(unsigned long clone_flags,
1635 */ 1624 */
1636pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) 1625pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
1637{ 1626{
1638 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL, 1627 return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
1639 (unsigned long)arg, NULL, NULL); 1628 (unsigned long)arg, NULL, NULL);
1640} 1629}
1641#endif 1630#endif
1642 1631
1632#ifdef __ARCH_WANT_SYS_FORK
1633SYSCALL_DEFINE0(fork)
1634{
1635#ifdef CONFIG_MMU
1636 return do_fork(SIGCHLD, 0, 0, NULL, NULL);
1637#else
1638 /* can not support in nommu mode */
1639 return(-EINVAL);
1640#endif
1641}
1642#endif
1643
1644#ifdef __ARCH_WANT_SYS_VFORK
1645SYSCALL_DEFINE0(vfork)
1646{
1647 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
1648 0, NULL, NULL);
1649}
1650#endif
1651
1652#ifdef __ARCH_WANT_SYS_CLONE
1653#ifdef CONFIG_CLONE_BACKWARDS
1654SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1655 int __user *, parent_tidptr,
1656 int, tls_val,
1657 int __user *, child_tidptr)
1658#elif defined(CONFIG_CLONE_BACKWARDS2)
1659SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
1660 int __user *, parent_tidptr,
1661 int __user *, child_tidptr,
1662 int, tls_val)
1663#else
1664SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1665 int __user *, parent_tidptr,
1666 int __user *, child_tidptr,
1667 int, tls_val)
1668#endif
1669{
1670 return do_fork(clone_flags, newsp, 0,
1671 parent_tidptr, child_tidptr);
1672}
1673#endif
1674
1643#ifndef ARCH_MIN_MMSTRUCT_ALIGN 1675#ifndef ARCH_MIN_MMSTRUCT_ALIGN
1644#define ARCH_MIN_MMSTRUCT_ALIGN 0 1676#define ARCH_MIN_MMSTRUCT_ALIGN 0
1645#endif 1677#endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 5ffb5626e072..a49c7f36ceb3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1159,8 +1159,9 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
1159 return __send_signal(sig, info, t, group, from_ancestor_ns); 1159 return __send_signal(sig, info, t, group, from_ancestor_ns);
1160} 1160}
1161 1161
1162static void print_fatal_signal(struct pt_regs *regs, int signr) 1162static void print_fatal_signal(int signr)
1163{ 1163{
1164 struct pt_regs *regs = signal_pt_regs();
1164 printk("%s/%d: potentially unexpected fatal signal %d.\n", 1165 printk("%s/%d: potentially unexpected fatal signal %d.\n",
1165 current->comm, task_pid_nr(current), signr); 1166 current->comm, task_pid_nr(current), signr);
1166 1167
@@ -2131,10 +2132,9 @@ static void do_jobctl_trap(void)
2131 } 2132 }
2132} 2133}
2133 2134
2134static int ptrace_signal(int signr, siginfo_t *info, 2135static int ptrace_signal(int signr, siginfo_t *info)
2135 struct pt_regs *regs, void *cookie)
2136{ 2136{
2137 ptrace_signal_deliver(regs, cookie); 2137 ptrace_signal_deliver();
2138 /* 2138 /*
2139 * We do not check sig_kernel_stop(signr) but set this marker 2139 * We do not check sig_kernel_stop(signr) but set this marker
2140 * unconditionally because we do not know whether debugger will 2140 * unconditionally because we do not know whether debugger will
@@ -2257,8 +2257,7 @@ relock:
2257 break; /* will return 0 */ 2257 break; /* will return 0 */
2258 2258
2259 if (unlikely(current->ptrace) && signr != SIGKILL) { 2259 if (unlikely(current->ptrace) && signr != SIGKILL) {
2260 signr = ptrace_signal(signr, info, 2260 signr = ptrace_signal(signr, info);
2261 regs, cookie);
2262 if (!signr) 2261 if (!signr)
2263 continue; 2262 continue;
2264 } 2263 }
@@ -2343,7 +2342,7 @@ relock:
2343 2342
2344 if (sig_kernel_coredump(signr)) { 2343 if (sig_kernel_coredump(signr)) {
2345 if (print_fatal_signals) 2344 if (print_fatal_signals)
2346 print_fatal_signal(regs, info->si_signo); 2345 print_fatal_signal(info->si_signo);
2347 /* 2346 /*
2348 * If it was able to dump core, this kills all 2347 * If it was able to dump core, this kills all
2349 * other threads in the group and synchronizes with 2348 * other threads in the group and synchronizes with
@@ -2352,7 +2351,7 @@ relock:
2352 * first and our do_group_exit call below will use 2351 * first and our do_group_exit call below will use
2353 * that value and ignore the one we pass it. 2352 * that value and ignore the one we pass it.
2354 */ 2353 */
2355 do_coredump(info, regs); 2354 do_coredump(info);
2356 } 2355 }
2357 2356
2358 /* 2357 /*