aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c405
1 files changed, 345 insertions, 60 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 4ae8cfc1c89c..fbea12d7a943 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -446,6 +446,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
446 } 446 }
447} 447}
448 448
449/*
450 * Allocate a new mm structure and copy contents from the
451 * mm structure of the passed in task structure.
452 */
453static struct mm_struct *dup_mm(struct task_struct *tsk)
454{
455 struct mm_struct *mm, *oldmm = current->mm;
456 int err;
457
458 if (!oldmm)
459 return NULL;
460
461 mm = allocate_mm();
462 if (!mm)
463 goto fail_nomem;
464
465 memcpy(mm, oldmm, sizeof(*mm));
466
467 if (!mm_init(mm))
468 goto fail_nomem;
469
470 if (init_new_context(tsk, mm))
471 goto fail_nocontext;
472
473 err = dup_mmap(mm, oldmm);
474 if (err)
475 goto free_pt;
476
477 mm->hiwater_rss = get_mm_rss(mm);
478 mm->hiwater_vm = mm->total_vm;
479
480 return mm;
481
482free_pt:
483 mmput(mm);
484
485fail_nomem:
486 return NULL;
487
488fail_nocontext:
489 /*
490 * If init_new_context() failed, we cannot use mmput() to free the mm
491 * because it calls destroy_context()
492 */
493 mm_free_pgd(mm);
494 free_mm(mm);
495 return NULL;
496}
497
449static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 498static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
450{ 499{
451 struct mm_struct * mm, *oldmm; 500 struct mm_struct * mm, *oldmm;
@@ -473,43 +522,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
473 } 522 }
474 523
475 retval = -ENOMEM; 524 retval = -ENOMEM;
476 mm = allocate_mm(); 525 mm = dup_mm(tsk);
477 if (!mm) 526 if (!mm)
478 goto fail_nomem; 527 goto fail_nomem;
479 528
480 /* Copy the current MM stuff.. */
481 memcpy(mm, oldmm, sizeof(*mm));
482 if (!mm_init(mm))
483 goto fail_nomem;
484
485 if (init_new_context(tsk,mm))
486 goto fail_nocontext;
487
488 retval = dup_mmap(mm, oldmm);
489 if (retval)
490 goto free_pt;
491
492 mm->hiwater_rss = get_mm_rss(mm);
493 mm->hiwater_vm = mm->total_vm;
494
495good_mm: 529good_mm:
496 tsk->mm = mm; 530 tsk->mm = mm;
497 tsk->active_mm = mm; 531 tsk->active_mm = mm;
498 return 0; 532 return 0;
499 533
500free_pt:
501 mmput(mm);
502fail_nomem: 534fail_nomem:
503 return retval; 535 return retval;
504
505fail_nocontext:
506 /*
507 * If init_new_context() failed, we cannot use mmput() to free the mm
508 * because it calls destroy_context()
509 */
510 mm_free_pgd(mm);
511 free_mm(mm);
512 return retval;
513} 536}
514 537
515static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 538static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -597,32 +620,17 @@ out:
597 return newf; 620 return newf;
598} 621}
599 622
600static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 623/*
624 * Allocate a new files structure and copy contents from the
625 * passed in files structure.
626 */
627static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
601{ 628{
602 struct files_struct *oldf, *newf; 629 struct files_struct *newf;
603 struct file **old_fds, **new_fds; 630 struct file **old_fds, **new_fds;
604 int open_files, size, i, error = 0, expand; 631 int open_files, size, i, expand;
605 struct fdtable *old_fdt, *new_fdt; 632 struct fdtable *old_fdt, *new_fdt;
606 633
607 /*
608 * A background process may not have any files ...
609 */
610 oldf = current->files;
611 if (!oldf)
612 goto out;
613
614 if (clone_flags & CLONE_FILES) {
615 atomic_inc(&oldf->count);
616 goto out;
617 }
618
619 /*
620 * Note: we may be using current for both targets (See exec.c)
621 * This works because we cache current->files (old) as oldf. Don't
622 * break this.
623 */
624 tsk->files = NULL;
625 error = -ENOMEM;
626 newf = alloc_files(); 634 newf = alloc_files();
627 if (!newf) 635 if (!newf)
628 goto out; 636 goto out;
@@ -651,9 +659,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
651 if (expand) { 659 if (expand) {
652 spin_unlock(&oldf->file_lock); 660 spin_unlock(&oldf->file_lock);
653 spin_lock(&newf->file_lock); 661 spin_lock(&newf->file_lock);
654 error = expand_files(newf, open_files-1); 662 *errorp = expand_files(newf, open_files-1);
655 spin_unlock(&newf->file_lock); 663 spin_unlock(&newf->file_lock);
656 if (error < 0) 664 if (*errorp < 0)
657 goto out_release; 665 goto out_release;
658 new_fdt = files_fdtable(newf); 666 new_fdt = files_fdtable(newf);
659 /* 667 /*
@@ -702,10 +710,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
702 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 710 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
703 } 711 }
704 712
705 tsk->files = newf;
706 error = 0;
707out: 713out:
708 return error; 714 return newf;
709 715
710out_release: 716out_release:
711 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); 717 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
@@ -715,6 +721,40 @@ out_release:
715 goto out; 721 goto out;
716} 722}
717 723
724static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
725{
726 struct files_struct *oldf, *newf;
727 int error = 0;
728
729 /*
730 * A background process may not have any files ...
731 */
732 oldf = current->files;
733 if (!oldf)
734 goto out;
735
736 if (clone_flags & CLONE_FILES) {
737 atomic_inc(&oldf->count);
738 goto out;
739 }
740
741 /*
742 * Note: we may be using current for both targets (See exec.c)
743 * This works because we cache current->files (old) as oldf. Don't
744 * break this.
745 */
746 tsk->files = NULL;
747 error = -ENOMEM;
748 newf = dup_fd(oldf, &error);
749 if (!newf)
750 goto out;
751
752 tsk->files = newf;
753 error = 0;
754out:
755 return error;
756}
757
718/* 758/*
719 * Helper to unshare the files of the current task. 759 * Helper to unshare the files of the current task.
720 * We don't want to expose copy_files internals to 760 * We don't want to expose copy_files internals to
@@ -802,7 +842,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
802 init_sigpending(&sig->shared_pending); 842 init_sigpending(&sig->shared_pending);
803 INIT_LIST_HEAD(&sig->posix_timers); 843 INIT_LIST_HEAD(&sig->posix_timers);
804 844
805 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); 845 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
806 sig->it_real_incr.tv64 = 0; 846 sig->it_real_incr.tv64 = 0;
807 sig->real_timer.function = it_real_fn; 847 sig->real_timer.function = it_real_fn;
808 sig->real_timer.data = tsk; 848 sig->real_timer.data = tsk;
@@ -1083,8 +1123,8 @@ static task_t *copy_process(unsigned long clone_flags,
1083 p->real_parent = current; 1123 p->real_parent = current;
1084 p->parent = p->real_parent; 1124 p->parent = p->real_parent;
1085 1125
1126 spin_lock(&current->sighand->siglock);
1086 if (clone_flags & CLONE_THREAD) { 1127 if (clone_flags & CLONE_THREAD) {
1087 spin_lock(&current->sighand->siglock);
1088 /* 1128 /*
1089 * Important: if an exit-all has been started then 1129 * Important: if an exit-all has been started then
1090 * do not create this new thread - the whole thread 1130 * do not create this new thread - the whole thread
@@ -1122,8 +1162,6 @@ static task_t *copy_process(unsigned long clone_flags,
1122 */ 1162 */
1123 p->it_prof_expires = jiffies_to_cputime(1); 1163 p->it_prof_expires = jiffies_to_cputime(1);
1124 } 1164 }
1125
1126 spin_unlock(&current->sighand->siglock);
1127 } 1165 }
1128 1166
1129 /* 1167 /*
@@ -1135,8 +1173,6 @@ static task_t *copy_process(unsigned long clone_flags,
1135 if (unlikely(p->ptrace & PT_PTRACED)) 1173 if (unlikely(p->ptrace & PT_PTRACED))
1136 __ptrace_link(p, current->parent); 1174 __ptrace_link(p, current->parent);
1137 1175
1138 attach_pid(p, PIDTYPE_PID, p->pid);
1139 attach_pid(p, PIDTYPE_TGID, p->tgid);
1140 if (thread_group_leader(p)) { 1176 if (thread_group_leader(p)) {
1141 p->signal->tty = current->signal->tty; 1177 p->signal->tty = current->signal->tty;
1142 p->signal->pgrp = process_group(current); 1178 p->signal->pgrp = process_group(current);
@@ -1146,9 +1182,12 @@ static task_t *copy_process(unsigned long clone_flags,
1146 if (p->pid) 1182 if (p->pid)
1147 __get_cpu_var(process_counts)++; 1183 __get_cpu_var(process_counts)++;
1148 } 1184 }
1185 attach_pid(p, PIDTYPE_TGID, p->tgid);
1186 attach_pid(p, PIDTYPE_PID, p->pid);
1149 1187
1150 nr_threads++; 1188 nr_threads++;
1151 total_forks++; 1189 total_forks++;
1190 spin_unlock(&current->sighand->siglock);
1152 write_unlock_irq(&tasklist_lock); 1191 write_unlock_irq(&tasklist_lock);
1153 proc_fork_connector(p); 1192 proc_fork_connector(p);
1154 return p; 1193 return p;
@@ -1323,3 +1362,249 @@ void __init proc_caches_init(void)
1323 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1362 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1324 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1363 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1325} 1364}
1365
1366
1367/*
1368 * Check constraints on flags passed to the unshare system call and
1369 * force unsharing of additional process context as appropriate.
1370 */
1371static inline void check_unshare_flags(unsigned long *flags_ptr)
1372{
1373 /*
1374 * If unsharing a thread from a thread group, must also
1375 * unshare vm.
1376 */
1377 if (*flags_ptr & CLONE_THREAD)
1378 *flags_ptr |= CLONE_VM;
1379
1380 /*
1381 * If unsharing vm, must also unshare signal handlers.
1382 */
1383 if (*flags_ptr & CLONE_VM)
1384 *flags_ptr |= CLONE_SIGHAND;
1385
1386 /*
1387 * If unsharing signal handlers and the task was created
1388 * using CLONE_THREAD, then must unshare the thread
1389 */
1390 if ((*flags_ptr & CLONE_SIGHAND) &&
1391 (atomic_read(&current->signal->count) > 1))
1392 *flags_ptr |= CLONE_THREAD;
1393
1394 /*
1395 * If unsharing namespace, must also unshare filesystem information.
1396 */
1397 if (*flags_ptr & CLONE_NEWNS)
1398 *flags_ptr |= CLONE_FS;
1399}
1400
1401/*
1402 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1403 */
1404static int unshare_thread(unsigned long unshare_flags)
1405{
1406 if (unshare_flags & CLONE_THREAD)
1407 return -EINVAL;
1408
1409 return 0;
1410}
1411
1412/*
1413 * Unshare the filesystem structure if it is being shared
1414 */
1415static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1416{
1417 struct fs_struct *fs = current->fs;
1418
1419 if ((unshare_flags & CLONE_FS) &&
1420 (fs && atomic_read(&fs->count) > 1)) {
1421 *new_fsp = __copy_fs_struct(current->fs);
1422 if (!*new_fsp)
1423 return -ENOMEM;
1424 }
1425
1426 return 0;
1427}
1428
1429/*
1430 * Unshare the namespace structure if it is being shared
1431 */
1432static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
1433{
1434 struct namespace *ns = current->namespace;
1435
1436 if ((unshare_flags & CLONE_NEWNS) &&
1437 (ns && atomic_read(&ns->count) > 1)) {
1438 if (!capable(CAP_SYS_ADMIN))
1439 return -EPERM;
1440
1441 *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
1442 if (!*new_nsp)
1443 return -ENOMEM;
1444 }
1445
1446 return 0;
1447}
1448
1449/*
1450 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
1451 * supported yet
1452 */
1453static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1454{
1455 struct sighand_struct *sigh = current->sighand;
1456
1457 if ((unshare_flags & CLONE_SIGHAND) &&
1458 (sigh && atomic_read(&sigh->count) > 1))
1459 return -EINVAL;
1460 else
1461 return 0;
1462}
1463
1464/*
1465 * Unshare vm if it is being shared
1466 */
1467static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1468{
1469 struct mm_struct *mm = current->mm;
1470
1471 if ((unshare_flags & CLONE_VM) &&
1472 (mm && atomic_read(&mm->mm_users) > 1)) {
1473 *new_mmp = dup_mm(current);
1474 if (!*new_mmp)
1475 return -ENOMEM;
1476 }
1477
1478 return 0;
1479}
1480
1481/*
1482 * Unshare file descriptor table if it is being shared
1483 */
1484static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1485{
1486 struct files_struct *fd = current->files;
1487 int error = 0;
1488
1489 if ((unshare_flags & CLONE_FILES) &&
1490 (fd && atomic_read(&fd->count) > 1)) {
1491 *new_fdp = dup_fd(fd, &error);
1492 if (!*new_fdp)
1493 return error;
1494 }
1495
1496 return 0;
1497}
1498
1499/*
1500 * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
1501 * supported yet
1502 */
1503static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
1504{
1505 if (unshare_flags & CLONE_SYSVSEM)
1506 return -EINVAL;
1507
1508 return 0;
1509}
1510
1511/*
1512 * unshare allows a process to 'unshare' part of the process
1513 * context which was originally shared using clone. copy_*
1514 * functions used by do_fork() cannot be used here directly
1515 * because they modify an inactive task_struct that is being
1516 * constructed. Here we are modifying the current, active,
1517 * task_struct.
1518 */
1519asmlinkage long sys_unshare(unsigned long unshare_flags)
1520{
1521 int err = 0;
1522 struct fs_struct *fs, *new_fs = NULL;
1523 struct namespace *ns, *new_ns = NULL;
1524 struct sighand_struct *sigh, *new_sigh = NULL;
1525 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1526 struct files_struct *fd, *new_fd = NULL;
1527 struct sem_undo_list *new_ulist = NULL;
1528
1529 check_unshare_flags(&unshare_flags);
1530
1531 if ((err = unshare_thread(unshare_flags)))
1532 goto bad_unshare_out;
1533 if ((err = unshare_fs(unshare_flags, &new_fs)))
1534 goto bad_unshare_cleanup_thread;
1535 if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
1536 goto bad_unshare_cleanup_fs;
1537 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1538 goto bad_unshare_cleanup_ns;
1539 if ((err = unshare_vm(unshare_flags, &new_mm)))
1540 goto bad_unshare_cleanup_sigh;
1541 if ((err = unshare_fd(unshare_flags, &new_fd)))
1542 goto bad_unshare_cleanup_vm;
1543 if ((err = unshare_semundo(unshare_flags, &new_ulist)))
1544 goto bad_unshare_cleanup_fd;
1545
1546 if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
1547
1548 task_lock(current);
1549
1550 if (new_fs) {
1551 fs = current->fs;
1552 current->fs = new_fs;
1553 new_fs = fs;
1554 }
1555
1556 if (new_ns) {
1557 ns = current->namespace;
1558 current->namespace = new_ns;
1559 new_ns = ns;
1560 }
1561
1562 if (new_sigh) {
1563 sigh = current->sighand;
1564 current->sighand = new_sigh;
1565 new_sigh = sigh;
1566 }
1567
1568 if (new_mm) {
1569 mm = current->mm;
1570 active_mm = current->active_mm;
1571 current->mm = new_mm;
1572 current->active_mm = new_mm;
1573 activate_mm(active_mm, new_mm);
1574 new_mm = mm;
1575 }
1576
1577 if (new_fd) {
1578 fd = current->files;
1579 current->files = new_fd;
1580 new_fd = fd;
1581 }
1582
1583 task_unlock(current);
1584 }
1585
1586bad_unshare_cleanup_fd:
1587 if (new_fd)
1588 put_files_struct(new_fd);
1589
1590bad_unshare_cleanup_vm:
1591 if (new_mm)
1592 mmput(new_mm);
1593
1594bad_unshare_cleanup_sigh:
1595 if (new_sigh)
1596 if (atomic_dec_and_test(&new_sigh->count))
1597 kmem_cache_free(sighand_cachep, new_sigh);
1598
1599bad_unshare_cleanup_ns:
1600 if (new_ns)
1601 put_namespace(new_ns);
1602
1603bad_unshare_cleanup_fs:
1604 if (new_fs)
1605 put_fs_struct(new_fs);
1606
1607bad_unshare_cleanup_thread:
1608bad_unshare_out:
1609 return err;
1610}