aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
authorDave Kleikamp <shaggy@austin.ibm.com>2006-03-14 18:05:45 -0500
committerDave Kleikamp <shaggy@austin.ibm.com>2006-03-14 18:05:45 -0500
commitc5111f504d2a9b0d258d7c4752b4093523315989 (patch)
tree6a52864aff79691689aea21cb0cb928327d5de5b /kernel/fork.c
parent69eb66d7da7dba2696281981347698e1693c2340 (diff)
parenta488edc914aa1d766a4e2c982b5ae03d5657ec1b (diff)
Merge with /home/shaggy/git/linus-clean/
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c415
1 files changed, 354 insertions, 61 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 4ae8cfc1c89c..ccdfbb16c86d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,8 +108,10 @@ void free_task(struct task_struct *tsk)
108} 108}
109EXPORT_SYMBOL(free_task); 109EXPORT_SYMBOL(free_task);
110 110
111void __put_task_struct(struct task_struct *tsk) 111void __put_task_struct_cb(struct rcu_head *rhp)
112{ 112{
113 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
114
113 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); 115 WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
114 WARN_ON(atomic_read(&tsk->usage)); 116 WARN_ON(atomic_read(&tsk->usage));
115 WARN_ON(tsk == current); 117 WARN_ON(tsk == current);
@@ -446,6 +448,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
446 } 448 }
447} 449}
448 450
451/*
452 * Allocate a new mm structure and copy contents from the
453 * mm structure of the passed in task structure.
454 */
455static struct mm_struct *dup_mm(struct task_struct *tsk)
456{
457 struct mm_struct *mm, *oldmm = current->mm;
458 int err;
459
460 if (!oldmm)
461 return NULL;
462
463 mm = allocate_mm();
464 if (!mm)
465 goto fail_nomem;
466
467 memcpy(mm, oldmm, sizeof(*mm));
468
469 if (!mm_init(mm))
470 goto fail_nomem;
471
472 if (init_new_context(tsk, mm))
473 goto fail_nocontext;
474
475 err = dup_mmap(mm, oldmm);
476 if (err)
477 goto free_pt;
478
479 mm->hiwater_rss = get_mm_rss(mm);
480 mm->hiwater_vm = mm->total_vm;
481
482 return mm;
483
484free_pt:
485 mmput(mm);
486
487fail_nomem:
488 return NULL;
489
490fail_nocontext:
491 /*
492 * If init_new_context() failed, we cannot use mmput() to free the mm
493 * because it calls destroy_context()
494 */
495 mm_free_pgd(mm);
496 free_mm(mm);
497 return NULL;
498}
499
449static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) 500static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
450{ 501{
451 struct mm_struct * mm, *oldmm; 502 struct mm_struct * mm, *oldmm;
@@ -473,43 +524,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
473 } 524 }
474 525
475 retval = -ENOMEM; 526 retval = -ENOMEM;
476 mm = allocate_mm(); 527 mm = dup_mm(tsk);
477 if (!mm) 528 if (!mm)
478 goto fail_nomem; 529 goto fail_nomem;
479 530
480 /* Copy the current MM stuff.. */
481 memcpy(mm, oldmm, sizeof(*mm));
482 if (!mm_init(mm))
483 goto fail_nomem;
484
485 if (init_new_context(tsk,mm))
486 goto fail_nocontext;
487
488 retval = dup_mmap(mm, oldmm);
489 if (retval)
490 goto free_pt;
491
492 mm->hiwater_rss = get_mm_rss(mm);
493 mm->hiwater_vm = mm->total_vm;
494
495good_mm: 531good_mm:
496 tsk->mm = mm; 532 tsk->mm = mm;
497 tsk->active_mm = mm; 533 tsk->active_mm = mm;
498 return 0; 534 return 0;
499 535
500free_pt:
501 mmput(mm);
502fail_nomem: 536fail_nomem:
503 return retval; 537 return retval;
504
505fail_nocontext:
506 /*
507 * If init_new_context() failed, we cannot use mmput() to free the mm
508 * because it calls destroy_context()
509 */
510 mm_free_pgd(mm);
511 free_mm(mm);
512 return retval;
513} 538}
514 539
515static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) 540static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -597,32 +622,17 @@ out:
597 return newf; 622 return newf;
598} 623}
599 624
600static int copy_files(unsigned long clone_flags, struct task_struct * tsk) 625/*
626 * Allocate a new files structure and copy contents from the
627 * passed in files structure.
628 */
629static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
601{ 630{
602 struct files_struct *oldf, *newf; 631 struct files_struct *newf;
603 struct file **old_fds, **new_fds; 632 struct file **old_fds, **new_fds;
604 int open_files, size, i, error = 0, expand; 633 int open_files, size, i, expand;
605 struct fdtable *old_fdt, *new_fdt; 634 struct fdtable *old_fdt, *new_fdt;
606 635
607 /*
608 * A background process may not have any files ...
609 */
610 oldf = current->files;
611 if (!oldf)
612 goto out;
613
614 if (clone_flags & CLONE_FILES) {
615 atomic_inc(&oldf->count);
616 goto out;
617 }
618
619 /*
620 * Note: we may be using current for both targets (See exec.c)
621 * This works because we cache current->files (old) as oldf. Don't
622 * break this.
623 */
624 tsk->files = NULL;
625 error = -ENOMEM;
626 newf = alloc_files(); 636 newf = alloc_files();
627 if (!newf) 637 if (!newf)
628 goto out; 638 goto out;
@@ -651,9 +661,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
651 if (expand) { 661 if (expand) {
652 spin_unlock(&oldf->file_lock); 662 spin_unlock(&oldf->file_lock);
653 spin_lock(&newf->file_lock); 663 spin_lock(&newf->file_lock);
654 error = expand_files(newf, open_files-1); 664 *errorp = expand_files(newf, open_files-1);
655 spin_unlock(&newf->file_lock); 665 spin_unlock(&newf->file_lock);
656 if (error < 0) 666 if (*errorp < 0)
657 goto out_release; 667 goto out_release;
658 new_fdt = files_fdtable(newf); 668 new_fdt = files_fdtable(newf);
659 /* 669 /*
@@ -702,10 +712,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
702 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); 712 memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
703 } 713 }
704 714
705 tsk->files = newf;
706 error = 0;
707out: 715out:
708 return error; 716 return newf;
709 717
710out_release: 718out_release:
711 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); 719 free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
@@ -715,6 +723,40 @@ out_release:
715 goto out; 723 goto out;
716} 724}
717 725
726static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
727{
728 struct files_struct *oldf, *newf;
729 int error = 0;
730
731 /*
732 * A background process may not have any files ...
733 */
734 oldf = current->files;
735 if (!oldf)
736 goto out;
737
738 if (clone_flags & CLONE_FILES) {
739 atomic_inc(&oldf->count);
740 goto out;
741 }
742
743 /*
744 * Note: we may be using current for both targets (See exec.c)
745 * This works because we cache current->files (old) as oldf. Don't
746 * break this.
747 */
748 tsk->files = NULL;
749 error = -ENOMEM;
750 newf = dup_fd(oldf, &error);
751 if (!newf)
752 goto out;
753
754 tsk->files = newf;
755 error = 0;
756out:
757 return error;
758}
759
718/* 760/*
719 * Helper to unshare the files of the current task. 761 * Helper to unshare the files of the current task.
720 * We don't want to expose copy_files internals to 762 * We don't want to expose copy_files internals to
@@ -802,7 +844,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
802 init_sigpending(&sig->shared_pending); 844 init_sigpending(&sig->shared_pending);
803 INIT_LIST_HEAD(&sig->posix_timers); 845 INIT_LIST_HEAD(&sig->posix_timers);
804 846
805 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC); 847 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
806 sig->it_real_incr.tv64 = 0; 848 sig->it_real_incr.tv64 = 0;
807 sig->real_timer.function = it_real_fn; 849 sig->real_timer.function = it_real_fn;
808 sig->real_timer.data = tsk; 850 sig->real_timer.data = tsk;
@@ -1020,6 +1062,12 @@ static task_t *copy_process(unsigned long clone_flags,
1020 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1062 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1021 1063
1022 /* 1064 /*
1065 * sigaltstack should be cleared when sharing the same VM
1066 */
1067 if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
1068 p->sas_ss_sp = p->sas_ss_size = 0;
1069
1070 /*
1023 * Syscall tracing should be turned off in the child regardless 1071 * Syscall tracing should be turned off in the child regardless
1024 * of CLONE_PTRACE. 1072 * of CLONE_PTRACE.
1025 */ 1073 */
@@ -1083,8 +1131,8 @@ static task_t *copy_process(unsigned long clone_flags,
1083 p->real_parent = current; 1131 p->real_parent = current;
1084 p->parent = p->real_parent; 1132 p->parent = p->real_parent;
1085 1133
1134 spin_lock(&current->sighand->siglock);
1086 if (clone_flags & CLONE_THREAD) { 1135 if (clone_flags & CLONE_THREAD) {
1087 spin_lock(&current->sighand->siglock);
1088 /* 1136 /*
1089 * Important: if an exit-all has been started then 1137 * Important: if an exit-all has been started then
1090 * do not create this new thread - the whole thread 1138 * do not create this new thread - the whole thread
@@ -1122,8 +1170,6 @@ static task_t *copy_process(unsigned long clone_flags,
1122 */ 1170 */
1123 p->it_prof_expires = jiffies_to_cputime(1); 1171 p->it_prof_expires = jiffies_to_cputime(1);
1124 } 1172 }
1125
1126 spin_unlock(&current->sighand->siglock);
1127 } 1173 }
1128 1174
1129 /* 1175 /*
@@ -1135,8 +1181,6 @@ static task_t *copy_process(unsigned long clone_flags,
1135 if (unlikely(p->ptrace & PT_PTRACED)) 1181 if (unlikely(p->ptrace & PT_PTRACED))
1136 __ptrace_link(p, current->parent); 1182 __ptrace_link(p, current->parent);
1137 1183
1138 attach_pid(p, PIDTYPE_PID, p->pid);
1139 attach_pid(p, PIDTYPE_TGID, p->tgid);
1140 if (thread_group_leader(p)) { 1184 if (thread_group_leader(p)) {
1141 p->signal->tty = current->signal->tty; 1185 p->signal->tty = current->signal->tty;
1142 p->signal->pgrp = process_group(current); 1186 p->signal->pgrp = process_group(current);
@@ -1146,9 +1190,12 @@ static task_t *copy_process(unsigned long clone_flags,
1146 if (p->pid) 1190 if (p->pid)
1147 __get_cpu_var(process_counts)++; 1191 __get_cpu_var(process_counts)++;
1148 } 1192 }
1193 attach_pid(p, PIDTYPE_TGID, p->tgid);
1194 attach_pid(p, PIDTYPE_PID, p->pid);
1149 1195
1150 nr_threads++; 1196 nr_threads++;
1151 total_forks++; 1197 total_forks++;
1198 spin_unlock(&current->sighand->siglock);
1152 write_unlock_irq(&tasklist_lock); 1199 write_unlock_irq(&tasklist_lock);
1153 proc_fork_connector(p); 1200 proc_fork_connector(p);
1154 return p; 1201 return p;
@@ -1323,3 +1370,249 @@ void __init proc_caches_init(void)
1323 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, 1370 sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
1324 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1371 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1325} 1372}
1373
1374
1375/*
1376 * Check constraints on flags passed to the unshare system call and
1377 * force unsharing of additional process context as appropriate.
1378 */
1379static inline void check_unshare_flags(unsigned long *flags_ptr)
1380{
1381 /*
1382 * If unsharing a thread from a thread group, must also
1383 * unshare vm.
1384 */
1385 if (*flags_ptr & CLONE_THREAD)
1386 *flags_ptr |= CLONE_VM;
1387
1388 /*
1389 * If unsharing vm, must also unshare signal handlers.
1390 */
1391 if (*flags_ptr & CLONE_VM)
1392 *flags_ptr |= CLONE_SIGHAND;
1393
1394 /*
1395 * If unsharing signal handlers and the task was created
1396 * using CLONE_THREAD, then must unshare the thread
1397 */
1398 if ((*flags_ptr & CLONE_SIGHAND) &&
1399 (atomic_read(&current->signal->count) > 1))
1400 *flags_ptr |= CLONE_THREAD;
1401
1402 /*
1403 * If unsharing namespace, must also unshare filesystem information.
1404 */
1405 if (*flags_ptr & CLONE_NEWNS)
1406 *flags_ptr |= CLONE_FS;
1407}
1408
1409/*
1410 * Unsharing of tasks created with CLONE_THREAD is not supported yet
1411 */
1412static int unshare_thread(unsigned long unshare_flags)
1413{
1414 if (unshare_flags & CLONE_THREAD)
1415 return -EINVAL;
1416
1417 return 0;
1418}
1419
1420/*
1421 * Unshare the filesystem structure if it is being shared
1422 */
1423static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
1424{
1425 struct fs_struct *fs = current->fs;
1426
1427 if ((unshare_flags & CLONE_FS) &&
1428 (fs && atomic_read(&fs->count) > 1)) {
1429 *new_fsp = __copy_fs_struct(current->fs);
1430 if (!*new_fsp)
1431 return -ENOMEM;
1432 }
1433
1434 return 0;
1435}
1436
1437/*
1438 * Unshare the namespace structure if it is being shared
1439 */
1440static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
1441{
1442 struct namespace *ns = current->namespace;
1443
1444 if ((unshare_flags & CLONE_NEWNS) &&
1445 (ns && atomic_read(&ns->count) > 1)) {
1446 if (!capable(CAP_SYS_ADMIN))
1447 return -EPERM;
1448
1449 *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
1450 if (!*new_nsp)
1451 return -ENOMEM;
1452 }
1453
1454 return 0;
1455}
1456
1457/*
1458 * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
1459 * supported yet
1460 */
1461static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
1462{
1463 struct sighand_struct *sigh = current->sighand;
1464
1465 if ((unshare_flags & CLONE_SIGHAND) &&
1466 (sigh && atomic_read(&sigh->count) > 1))
1467 return -EINVAL;
1468 else
1469 return 0;
1470}
1471
1472/*
1473 * Unshare vm if it is being shared
1474 */
1475static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
1476{
1477 struct mm_struct *mm = current->mm;
1478
1479 if ((unshare_flags & CLONE_VM) &&
1480 (mm && atomic_read(&mm->mm_users) > 1)) {
1481 *new_mmp = dup_mm(current);
1482 if (!*new_mmp)
1483 return -ENOMEM;
1484 }
1485
1486 return 0;
1487}
1488
1489/*
1490 * Unshare file descriptor table if it is being shared
1491 */
1492static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
1493{
1494 struct files_struct *fd = current->files;
1495 int error = 0;
1496
1497 if ((unshare_flags & CLONE_FILES) &&
1498 (fd && atomic_read(&fd->count) > 1)) {
1499 *new_fdp = dup_fd(fd, &error);
1500 if (!*new_fdp)
1501 return error;
1502 }
1503
1504 return 0;
1505}
1506
1507/*
1508 * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
1509 * supported yet
1510 */
1511static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
1512{
1513 if (unshare_flags & CLONE_SYSVSEM)
1514 return -EINVAL;
1515
1516 return 0;
1517}
1518
1519/*
1520 * unshare allows a process to 'unshare' part of the process
1521 * context which was originally shared using clone. copy_*
1522 * functions used by do_fork() cannot be used here directly
1523 * because they modify an inactive task_struct that is being
1524 * constructed. Here we are modifying the current, active,
1525 * task_struct.
1526 */
1527asmlinkage long sys_unshare(unsigned long unshare_flags)
1528{
1529 int err = 0;
1530 struct fs_struct *fs, *new_fs = NULL;
1531 struct namespace *ns, *new_ns = NULL;
1532 struct sighand_struct *sigh, *new_sigh = NULL;
1533 struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
1534 struct files_struct *fd, *new_fd = NULL;
1535 struct sem_undo_list *new_ulist = NULL;
1536
1537 check_unshare_flags(&unshare_flags);
1538
1539 if ((err = unshare_thread(unshare_flags)))
1540 goto bad_unshare_out;
1541 if ((err = unshare_fs(unshare_flags, &new_fs)))
1542 goto bad_unshare_cleanup_thread;
1543 if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
1544 goto bad_unshare_cleanup_fs;
1545 if ((err = unshare_sighand(unshare_flags, &new_sigh)))
1546 goto bad_unshare_cleanup_ns;
1547 if ((err = unshare_vm(unshare_flags, &new_mm)))
1548 goto bad_unshare_cleanup_sigh;
1549 if ((err = unshare_fd(unshare_flags, &new_fd)))
1550 goto bad_unshare_cleanup_vm;
1551 if ((err = unshare_semundo(unshare_flags, &new_ulist)))
1552 goto bad_unshare_cleanup_fd;
1553
1554 if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
1555
1556 task_lock(current);
1557
1558 if (new_fs) {
1559 fs = current->fs;
1560 current->fs = new_fs;
1561 new_fs = fs;
1562 }
1563
1564 if (new_ns) {
1565 ns = current->namespace;
1566 current->namespace = new_ns;
1567 new_ns = ns;
1568 }
1569
1570 if (new_sigh) {
1571 sigh = current->sighand;
1572 current->sighand = new_sigh;
1573 new_sigh = sigh;
1574 }
1575
1576 if (new_mm) {
1577 mm = current->mm;
1578 active_mm = current->active_mm;
1579 current->mm = new_mm;
1580 current->active_mm = new_mm;
1581 activate_mm(active_mm, new_mm);
1582 new_mm = mm;
1583 }
1584
1585 if (new_fd) {
1586 fd = current->files;
1587 current->files = new_fd;
1588 new_fd = fd;
1589 }
1590
1591 task_unlock(current);
1592 }
1593
1594bad_unshare_cleanup_fd:
1595 if (new_fd)
1596 put_files_struct(new_fd);
1597
1598bad_unshare_cleanup_vm:
1599 if (new_mm)
1600 mmput(new_mm);
1601
1602bad_unshare_cleanup_sigh:
1603 if (new_sigh)
1604 if (atomic_dec_and_test(&new_sigh->count))
1605 kmem_cache_free(sighand_cachep, new_sigh);
1606
1607bad_unshare_cleanup_ns:
1608 if (new_ns)
1609 put_namespace(new_ns);
1610
1611bad_unshare_cleanup_fs:
1612 if (new_fs)
1613 put_fs_struct(new_fs);
1614
1615bad_unshare_cleanup_thread:
1616bad_unshare_out:
1617 return err;
1618}