aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exec.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exec.c')
-rw-r--r--fs/exec.c143
1 files changed, 93 insertions, 50 deletions
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8d..5e559013e303 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,19 +25,18 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mman.h> 28#include <linux/mm.h>
29#include <linux/stat.h> 29#include <linux/stat.h>
30#include <linux/fcntl.h> 30#include <linux/fcntl.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/swap.h>
32#include <linux/string.h> 33#include <linux/string.h>
33#include <linux/init.h> 34#include <linux/init.h>
34#include <linux/pagemap.h>
35#include <linux/highmem.h> 35#include <linux/highmem.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/key.h> 37#include <linux/key.h>
38#include <linux/personality.h> 38#include <linux/personality.h>
39#include <linux/binfmts.h> 39#include <linux/binfmts.h>
40#include <linux/swap.h>
41#include <linux/utsname.h> 40#include <linux/utsname.h>
42#include <linux/pid_namespace.h> 41#include <linux/pid_namespace.h>
43#include <linux/module.h> 42#include <linux/module.h>
@@ -47,7 +46,6 @@
47#include <linux/mount.h> 46#include <linux/mount.h>
48#include <linux/security.h> 47#include <linux/security.h>
49#include <linux/syscalls.h> 48#include <linux/syscalls.h>
50#include <linux/rmap.h>
51#include <linux/tsacct_kern.h> 49#include <linux/tsacct_kern.h>
52#include <linux/cn_proc.h> 50#include <linux/cn_proc.h>
53#include <linux/audit.h> 51#include <linux/audit.h>
@@ -541,7 +539,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
541 /* 539 /*
542 * when the old and new regions overlap clear from new_end. 540 * when the old and new regions overlap clear from new_end.
543 */ 541 */
544 free_pgd_range(&tlb, new_end, old_end, new_end, 542 free_pgd_range(tlb, new_end, old_end, new_end,
545 vma->vm_next ? vma->vm_next->vm_start : 0); 543 vma->vm_next ? vma->vm_next->vm_start : 0);
546 } else { 544 } else {
547 /* 545 /*
@@ -550,7 +548,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
550 * have constraints on va-space that make this illegal (IA64) - 548 * have constraints on va-space that make this illegal (IA64) -
551 * for the others its just a little faster. 549 * for the others its just a little faster.
552 */ 550 */
553 free_pgd_range(&tlb, old_start, old_end, new_end, 551 free_pgd_range(tlb, old_start, old_end, new_end,
554 vma->vm_next ? vma->vm_next->vm_start : 0); 552 vma->vm_next ? vma->vm_next->vm_start : 0);
555 } 553 }
556 tlb_finish_mmu(tlb, new_end, old_end); 554 tlb_finish_mmu(tlb, new_end, old_end);
@@ -724,12 +722,10 @@ static int exec_mmap(struct mm_struct *mm)
724 * Make sure that if there is a core dump in progress 722 * Make sure that if there is a core dump in progress
725 * for the old mm, we get out and die instead of going 723 * for the old mm, we get out and die instead of going
726 * through with the exec. We must hold mmap_sem around 724 * through with the exec. We must hold mmap_sem around
727 * checking core_waiters and changing tsk->mm. The 725 * checking core_state and changing tsk->mm.
728 * core-inducing thread will increment core_waiters for
729 * each thread whose ->mm == old_mm.
730 */ 726 */
731 down_read(&old_mm->mmap_sem); 727 down_read(&old_mm->mmap_sem);
732 if (unlikely(old_mm->core_waiters)) { 728 if (unlikely(old_mm->core_state)) {
733 up_read(&old_mm->mmap_sem); 729 up_read(&old_mm->mmap_sem);
734 return -EINTR; 730 return -EINTR;
735 } 731 }
@@ -1328,6 +1324,7 @@ int do_execve(char * filename,
1328 if (retval < 0) 1324 if (retval < 0)
1329 goto out; 1325 goto out;
1330 1326
1327 current->flags &= ~PF_KTHREAD;
1331 retval = search_binary_handler(bprm,regs); 1328 retval = search_binary_handler(bprm,regs);
1332 if (retval >= 0) { 1329 if (retval >= 0) {
1333 /* execve success */ 1330 /* execve success */
@@ -1382,17 +1379,14 @@ EXPORT_SYMBOL(set_binfmt);
1382 * name into corename, which must have space for at least 1379 * name into corename, which must have space for at least
1383 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1380 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1384 */ 1381 */
1385static int format_corename(char *corename, const char *pattern, long signr) 1382static int format_corename(char *corename, int nr_threads, long signr)
1386{ 1383{
1387 const char *pat_ptr = pattern; 1384 const char *pat_ptr = core_pattern;
1385 int ispipe = (*pat_ptr == '|');
1388 char *out_ptr = corename; 1386 char *out_ptr = corename;
1389 char *const out_end = corename + CORENAME_MAX_SIZE; 1387 char *const out_end = corename + CORENAME_MAX_SIZE;
1390 int rc; 1388 int rc;
1391 int pid_in_pattern = 0; 1389 int pid_in_pattern = 0;
1392 int ispipe = 0;
1393
1394 if (*pattern == '|')
1395 ispipe = 1;
1396 1390
1397 /* Repeat as long as we have more pattern to process and more output 1391 /* Repeat as long as we have more pattern to process and more output
1398 space */ 1392 space */
@@ -1493,7 +1487,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
1493 * and core_uses_pid is set, then .%pid will be appended to 1487 * and core_uses_pid is set, then .%pid will be appended to
1494 * the filename. Do not do this for piped commands. */ 1488 * the filename. Do not do this for piped commands. */
1495 if (!ispipe && !pid_in_pattern 1489 if (!ispipe && !pid_in_pattern
1496 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) { 1490 && (core_uses_pid || nr_threads)) {
1497 rc = snprintf(out_ptr, out_end - out_ptr, 1491 rc = snprintf(out_ptr, out_end - out_ptr,
1498 ".%d", task_tgid_vnr(current)); 1492 ".%d", task_tgid_vnr(current));
1499 if (rc > out_end - out_ptr) 1493 if (rc > out_end - out_ptr)
@@ -1505,9 +1499,10 @@ out:
1505 return ispipe; 1499 return ispipe;
1506} 1500}
1507 1501
1508static void zap_process(struct task_struct *start) 1502static int zap_process(struct task_struct *start)
1509{ 1503{
1510 struct task_struct *t; 1504 struct task_struct *t;
1505 int nr = 0;
1511 1506
1512 start->signal->flags = SIGNAL_GROUP_EXIT; 1507 start->signal->flags = SIGNAL_GROUP_EXIT;
1513 start->signal->group_stop_count = 0; 1508 start->signal->group_stop_count = 0;
@@ -1515,72 +1510,99 @@ static void zap_process(struct task_struct *start)
1515 t = start; 1510 t = start;
1516 do { 1511 do {
1517 if (t != current && t->mm) { 1512 if (t != current && t->mm) {
1518 t->mm->core_waiters++;
1519 sigaddset(&t->pending.signal, SIGKILL); 1513 sigaddset(&t->pending.signal, SIGKILL);
1520 signal_wake_up(t, 1); 1514 signal_wake_up(t, 1);
1515 nr++;
1521 } 1516 }
1522 } while ((t = next_thread(t)) != start); 1517 } while_each_thread(start, t);
1518
1519 return nr;
1523} 1520}
1524 1521
1525static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 1522static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1526 int exit_code) 1523 struct core_state *core_state, int exit_code)
1527{ 1524{
1528 struct task_struct *g, *p; 1525 struct task_struct *g, *p;
1529 unsigned long flags; 1526 unsigned long flags;
1530 int err = -EAGAIN; 1527 int nr = -EAGAIN;
1531 1528
1532 spin_lock_irq(&tsk->sighand->siglock); 1529 spin_lock_irq(&tsk->sighand->siglock);
1533 if (!signal_group_exit(tsk->signal)) { 1530 if (!signal_group_exit(tsk->signal)) {
1531 mm->core_state = core_state;
1534 tsk->signal->group_exit_code = exit_code; 1532 tsk->signal->group_exit_code = exit_code;
1535 zap_process(tsk); 1533 nr = zap_process(tsk);
1536 err = 0;
1537 } 1534 }
1538 spin_unlock_irq(&tsk->sighand->siglock); 1535 spin_unlock_irq(&tsk->sighand->siglock);
1539 if (err) 1536 if (unlikely(nr < 0))
1540 return err; 1537 return nr;
1541 1538
1542 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) 1539 if (atomic_read(&mm->mm_users) == nr + 1)
1543 goto done; 1540 goto done;
1544 1541 /*
1542 * We should find and kill all tasks which use this mm, and we should
1543 * count them correctly into ->nr_threads. We don't take tasklist
1544 * lock, but this is safe wrt:
1545 *
1546 * fork:
1547 * None of sub-threads can fork after zap_process(leader). All
1548 * processes which were created before this point should be
1549 * visible to zap_threads() because copy_process() adds the new
1550 * process to the tail of init_task.tasks list, and lock/unlock
1551 * of ->siglock provides a memory barrier.
1552 *
1553 * do_exit:
1554 * The caller holds mm->mmap_sem. This means that the task which
1555 * uses this mm can't pass exit_mm(), so it can't exit or clear
1556 * its ->mm.
1557 *
1558 * de_thread:
1559 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1560 * we must see either old or new leader, this does not matter.
1561 * However, it can change p->sighand, so lock_task_sighand(p)
1562 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1563 * it can't fail.
1564 *
1565 * Note also that "g" can be the old leader with ->mm == NULL
1566 * and already unhashed and thus removed from ->thread_group.
1567 * This is OK, __unhash_process()->list_del_rcu() does not
1568 * clear the ->next pointer, we will find the new leader via
1569 * next_thread().
1570 */
1545 rcu_read_lock(); 1571 rcu_read_lock();
1546 for_each_process(g) { 1572 for_each_process(g) {
1547 if (g == tsk->group_leader) 1573 if (g == tsk->group_leader)
1548 continue; 1574 continue;
1549 1575 if (g->flags & PF_KTHREAD)
1576 continue;
1550 p = g; 1577 p = g;
1551 do { 1578 do {
1552 if (p->mm) { 1579 if (p->mm) {
1553 if (p->mm == mm) { 1580 if (unlikely(p->mm == mm)) {
1554 /*
1555 * p->sighand can't disappear, but
1556 * may be changed by de_thread()
1557 */
1558 lock_task_sighand(p, &flags); 1581 lock_task_sighand(p, &flags);
1559 zap_process(p); 1582 nr += zap_process(p);
1560 unlock_task_sighand(p, &flags); 1583 unlock_task_sighand(p, &flags);
1561 } 1584 }
1562 break; 1585 break;
1563 } 1586 }
1564 } while ((p = next_thread(p)) != g); 1587 } while_each_thread(g, p);
1565 } 1588 }
1566 rcu_read_unlock(); 1589 rcu_read_unlock();
1567done: 1590done:
1568 return mm->core_waiters; 1591 atomic_set(&core_state->nr_threads, nr);
1592 return nr;
1569} 1593}
1570 1594
1571static int coredump_wait(int exit_code) 1595static int coredump_wait(int exit_code, struct core_state *core_state)
1572{ 1596{
1573 struct task_struct *tsk = current; 1597 struct task_struct *tsk = current;
1574 struct mm_struct *mm = tsk->mm; 1598 struct mm_struct *mm = tsk->mm;
1575 struct completion startup_done;
1576 struct completion *vfork_done; 1599 struct completion *vfork_done;
1577 int core_waiters; 1600 int core_waiters;
1578 1601
1579 init_completion(&mm->core_done); 1602 init_completion(&core_state->startup);
1580 init_completion(&startup_done); 1603 core_state->dumper.task = tsk;
1581 mm->core_startup_done = &startup_done; 1604 core_state->dumper.next = NULL;
1582 1605 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1583 core_waiters = zap_threads(tsk, mm, exit_code);
1584 up_write(&mm->mmap_sem); 1606 up_write(&mm->mmap_sem);
1585 1607
1586 if (unlikely(core_waiters < 0)) 1608 if (unlikely(core_waiters < 0))
@@ -1597,12 +1619,32 @@ static int coredump_wait(int exit_code)
1597 } 1619 }
1598 1620
1599 if (core_waiters) 1621 if (core_waiters)
1600 wait_for_completion(&startup_done); 1622 wait_for_completion(&core_state->startup);
1601fail: 1623fail:
1602 BUG_ON(mm->core_waiters);
1603 return core_waiters; 1624 return core_waiters;
1604} 1625}
1605 1626
1627static void coredump_finish(struct mm_struct *mm)
1628{
1629 struct core_thread *curr, *next;
1630 struct task_struct *task;
1631
1632 next = mm->core_state->dumper.next;
1633 while ((curr = next) != NULL) {
1634 next = curr->next;
1635 task = curr->task;
1636 /*
1637 * see exit_mm(), curr->task must not see
1638 * ->task == NULL before we read ->next.
1639 */
1640 smp_mb();
1641 curr->task = NULL;
1642 wake_up_process(task);
1643 }
1644
1645 mm->core_state = NULL;
1646}
1647
1606/* 1648/*
1607 * set_dumpable converts traditional three-value dumpable to two flags and 1649 * set_dumpable converts traditional three-value dumpable to two flags and
1608 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1650 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -1654,6 +1696,7 @@ int get_dumpable(struct mm_struct *mm)
1654 1696
1655int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1697int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1656{ 1698{
1699 struct core_state core_state;
1657 char corename[CORENAME_MAX_SIZE + 1]; 1700 char corename[CORENAME_MAX_SIZE + 1];
1658 struct mm_struct *mm = current->mm; 1701 struct mm_struct *mm = current->mm;
1659 struct linux_binfmt * binfmt; 1702 struct linux_binfmt * binfmt;
@@ -1677,7 +1720,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1677 /* 1720 /*
1678 * If another thread got here first, or we are not dumpable, bail out. 1721 * If another thread got here first, or we are not dumpable, bail out.
1679 */ 1722 */
1680 if (mm->core_waiters || !get_dumpable(mm)) { 1723 if (mm->core_state || !get_dumpable(mm)) {
1681 up_write(&mm->mmap_sem); 1724 up_write(&mm->mmap_sem);
1682 goto fail; 1725 goto fail;
1683 } 1726 }
@@ -1692,7 +1735,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1692 current->fsuid = 0; /* Dump root private */ 1735 current->fsuid = 0; /* Dump root private */
1693 } 1736 }
1694 1737
1695 retval = coredump_wait(exit_code); 1738 retval = coredump_wait(exit_code, &core_state);
1696 if (retval < 0) 1739 if (retval < 0)
1697 goto fail; 1740 goto fail;
1698 1741
@@ -1707,7 +1750,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1707 * uses lock_kernel() 1750 * uses lock_kernel()
1708 */ 1751 */
1709 lock_kernel(); 1752 lock_kernel();
1710 ispipe = format_corename(corename, core_pattern, signr); 1753 ispipe = format_corename(corename, retval, signr);
1711 unlock_kernel(); 1754 unlock_kernel();
1712 /* 1755 /*
1713 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1756 * Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1829,7 @@ fail_unlock:
1786 argv_free(helper_argv); 1829 argv_free(helper_argv);
1787 1830
1788 current->fsuid = fsuid; 1831 current->fsuid = fsuid;
1789 complete_all(&mm->core_done); 1832 coredump_finish(mm);
1790fail: 1833fail:
1791 return retval; 1834 return retval;
1792} 1835}