diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 394 |
1 files changed, 340 insertions, 54 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 7f0ab5ee948c..8e88b374cee9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -446,6 +446,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
446 | } | 446 | } |
447 | } | 447 | } |
448 | 448 | ||
449 | /* | ||
450 | * Allocate a new mm structure and copy contents from the | ||
451 | * mm structure of the passed in task structure. | ||
452 | */ | ||
453 | static struct mm_struct *dup_mm(struct task_struct *tsk) | ||
454 | { | ||
455 | struct mm_struct *mm, *oldmm = current->mm; | ||
456 | int err; | ||
457 | |||
458 | if (!oldmm) | ||
459 | return NULL; | ||
460 | |||
461 | mm = allocate_mm(); | ||
462 | if (!mm) | ||
463 | goto fail_nomem; | ||
464 | |||
465 | memcpy(mm, oldmm, sizeof(*mm)); | ||
466 | |||
467 | if (!mm_init(mm)) | ||
468 | goto fail_nomem; | ||
469 | |||
470 | if (init_new_context(tsk, mm)) | ||
471 | goto fail_nocontext; | ||
472 | |||
473 | err = dup_mmap(mm, oldmm); | ||
474 | if (err) | ||
475 | goto free_pt; | ||
476 | |||
477 | mm->hiwater_rss = get_mm_rss(mm); | ||
478 | mm->hiwater_vm = mm->total_vm; | ||
479 | |||
480 | return mm; | ||
481 | |||
482 | free_pt: | ||
483 | mmput(mm); | ||
484 | |||
485 | fail_nomem: | ||
486 | return NULL; | ||
487 | |||
488 | fail_nocontext: | ||
489 | /* | ||
490 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
491 | * because it calls destroy_context() | ||
492 | */ | ||
493 | mm_free_pgd(mm); | ||
494 | free_mm(mm); | ||
495 | return NULL; | ||
496 | } | ||
497 | |||
449 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | 498 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) |
450 | { | 499 | { |
451 | struct mm_struct * mm, *oldmm; | 500 | struct mm_struct * mm, *oldmm; |
@@ -473,43 +522,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
473 | } | 522 | } |
474 | 523 | ||
475 | retval = -ENOMEM; | 524 | retval = -ENOMEM; |
476 | mm = allocate_mm(); | 525 | mm = dup_mm(tsk); |
477 | if (!mm) | 526 | if (!mm) |
478 | goto fail_nomem; | 527 | goto fail_nomem; |
479 | 528 | ||
480 | /* Copy the current MM stuff.. */ | ||
481 | memcpy(mm, oldmm, sizeof(*mm)); | ||
482 | if (!mm_init(mm)) | ||
483 | goto fail_nomem; | ||
484 | |||
485 | if (init_new_context(tsk,mm)) | ||
486 | goto fail_nocontext; | ||
487 | |||
488 | retval = dup_mmap(mm, oldmm); | ||
489 | if (retval) | ||
490 | goto free_pt; | ||
491 | |||
492 | mm->hiwater_rss = get_mm_rss(mm); | ||
493 | mm->hiwater_vm = mm->total_vm; | ||
494 | |||
495 | good_mm: | 529 | good_mm: |
496 | tsk->mm = mm; | 530 | tsk->mm = mm; |
497 | tsk->active_mm = mm; | 531 | tsk->active_mm = mm; |
498 | return 0; | 532 | return 0; |
499 | 533 | ||
500 | free_pt: | ||
501 | mmput(mm); | ||
502 | fail_nomem: | 534 | fail_nomem: |
503 | return retval; | 535 | return retval; |
504 | |||
505 | fail_nocontext: | ||
506 | /* | ||
507 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
508 | * because it calls destroy_context() | ||
509 | */ | ||
510 | mm_free_pgd(mm); | ||
511 | free_mm(mm); | ||
512 | return retval; | ||
513 | } | 536 | } |
514 | 537 | ||
515 | static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) | 538 | static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) |
@@ -597,32 +620,17 @@ out: | |||
597 | return newf; | 620 | return newf; |
598 | } | 621 | } |
599 | 622 | ||
600 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 623 | /* |
624 | * Allocate a new files structure and copy contents from the | ||
625 | * passed in files structure. | ||
626 | */ | ||
627 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | ||
601 | { | 628 | { |
602 | struct files_struct *oldf, *newf; | 629 | struct files_struct *newf; |
603 | struct file **old_fds, **new_fds; | 630 | struct file **old_fds, **new_fds; |
604 | int open_files, size, i, error = 0, expand; | 631 | int open_files, size, i, expand; |
605 | struct fdtable *old_fdt, *new_fdt; | 632 | struct fdtable *old_fdt, *new_fdt; |
606 | 633 | ||
607 | /* | ||
608 | * A background process may not have any files ... | ||
609 | */ | ||
610 | oldf = current->files; | ||
611 | if (!oldf) | ||
612 | goto out; | ||
613 | |||
614 | if (clone_flags & CLONE_FILES) { | ||
615 | atomic_inc(&oldf->count); | ||
616 | goto out; | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | * Note: we may be using current for both targets (See exec.c) | ||
621 | * This works because we cache current->files (old) as oldf. Don't | ||
622 | * break this. | ||
623 | */ | ||
624 | tsk->files = NULL; | ||
625 | error = -ENOMEM; | ||
626 | newf = alloc_files(); | 634 | newf = alloc_files(); |
627 | if (!newf) | 635 | if (!newf) |
628 | goto out; | 636 | goto out; |
@@ -651,9 +659,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
651 | if (expand) { | 659 | if (expand) { |
652 | spin_unlock(&oldf->file_lock); | 660 | spin_unlock(&oldf->file_lock); |
653 | spin_lock(&newf->file_lock); | 661 | spin_lock(&newf->file_lock); |
654 | error = expand_files(newf, open_files-1); | 662 | *errorp = expand_files(newf, open_files-1); |
655 | spin_unlock(&newf->file_lock); | 663 | spin_unlock(&newf->file_lock); |
656 | if (error < 0) | 664 | if (*errorp < 0) |
657 | goto out_release; | 665 | goto out_release; |
658 | new_fdt = files_fdtable(newf); | 666 | new_fdt = files_fdtable(newf); |
659 | /* | 667 | /* |
@@ -702,10 +710,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
702 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | 710 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); |
703 | } | 711 | } |
704 | 712 | ||
705 | tsk->files = newf; | ||
706 | error = 0; | ||
707 | out: | 713 | out: |
708 | return error; | 714 | return newf; |
709 | 715 | ||
710 | out_release: | 716 | out_release: |
711 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); | 717 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); |
@@ -715,6 +721,40 @@ out_release: | |||
715 | goto out; | 721 | goto out; |
716 | } | 722 | } |
717 | 723 | ||
724 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | ||
725 | { | ||
726 | struct files_struct *oldf, *newf; | ||
727 | int error = 0; | ||
728 | |||
729 | /* | ||
730 | * A background process may not have any files ... | ||
731 | */ | ||
732 | oldf = current->files; | ||
733 | if (!oldf) | ||
734 | goto out; | ||
735 | |||
736 | if (clone_flags & CLONE_FILES) { | ||
737 | atomic_inc(&oldf->count); | ||
738 | goto out; | ||
739 | } | ||
740 | |||
741 | /* | ||
742 | * Note: we may be using current for both targets (See exec.c) | ||
743 | * This works because we cache current->files (old) as oldf. Don't | ||
744 | * break this. | ||
745 | */ | ||
746 | tsk->files = NULL; | ||
747 | error = -ENOMEM; | ||
748 | newf = dup_fd(oldf, &error); | ||
749 | if (!newf) | ||
750 | goto out; | ||
751 | |||
752 | tsk->files = newf; | ||
753 | error = 0; | ||
754 | out: | ||
755 | return error; | ||
756 | } | ||
757 | |||
718 | /* | 758 | /* |
719 | * Helper to unshare the files of the current task. | 759 | * Helper to unshare the files of the current task. |
720 | * We don't want to expose copy_files internals to | 760 | * We don't want to expose copy_files internals to |
@@ -1323,3 +1363,249 @@ void __init proc_caches_init(void) | |||
1323 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1363 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1324 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1364 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
1325 | } | 1365 | } |
1366 | |||
1367 | |||
1368 | /* | ||
1369 | * Check constraints on flags passed to the unshare system call and | ||
1370 | * force unsharing of additional process context as appropriate. | ||
1371 | */ | ||
1372 | static inline void check_unshare_flags(unsigned long *flags_ptr) | ||
1373 | { | ||
1374 | /* | ||
1375 | * If unsharing a thread from a thread group, must also | ||
1376 | * unshare vm. | ||
1377 | */ | ||
1378 | if (*flags_ptr & CLONE_THREAD) | ||
1379 | *flags_ptr |= CLONE_VM; | ||
1380 | |||
1381 | /* | ||
1382 | * If unsharing vm, must also unshare signal handlers. | ||
1383 | */ | ||
1384 | if (*flags_ptr & CLONE_VM) | ||
1385 | *flags_ptr |= CLONE_SIGHAND; | ||
1386 | |||
1387 | /* | ||
1388 | * If unsharing signal handlers and the task was created | ||
1389 | * using CLONE_THREAD, then must unshare the thread | ||
1390 | */ | ||
1391 | if ((*flags_ptr & CLONE_SIGHAND) && | ||
1392 | (atomic_read(¤t->signal->count) > 1)) | ||
1393 | *flags_ptr |= CLONE_THREAD; | ||
1394 | |||
1395 | /* | ||
1396 | * If unsharing namespace, must also unshare filesystem information. | ||
1397 | */ | ||
1398 | if (*flags_ptr & CLONE_NEWNS) | ||
1399 | *flags_ptr |= CLONE_FS; | ||
1400 | } | ||
1401 | |||
1402 | /* | ||
1403 | * Unsharing of tasks created with CLONE_THREAD is not supported yet | ||
1404 | */ | ||
1405 | static int unshare_thread(unsigned long unshare_flags) | ||
1406 | { | ||
1407 | if (unshare_flags & CLONE_THREAD) | ||
1408 | return -EINVAL; | ||
1409 | |||
1410 | return 0; | ||
1411 | } | ||
1412 | |||
1413 | /* | ||
1414 | * Unshare the filesystem structure if it is being shared | ||
1415 | */ | ||
1416 | static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | ||
1417 | { | ||
1418 | struct fs_struct *fs = current->fs; | ||
1419 | |||
1420 | if ((unshare_flags & CLONE_FS) && | ||
1421 | (fs && atomic_read(&fs->count) > 1)) { | ||
1422 | *new_fsp = __copy_fs_struct(current->fs); | ||
1423 | if (!*new_fsp) | ||
1424 | return -ENOMEM; | ||
1425 | } | ||
1426 | |||
1427 | return 0; | ||
1428 | } | ||
1429 | |||
1430 | /* | ||
1431 | * Unshare the namespace structure if it is being shared | ||
1432 | */ | ||
1433 | static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) | ||
1434 | { | ||
1435 | struct namespace *ns = current->namespace; | ||
1436 | |||
1437 | if ((unshare_flags & CLONE_NEWNS) && | ||
1438 | (ns && atomic_read(&ns->count) > 1)) { | ||
1439 | if (!capable(CAP_SYS_ADMIN)) | ||
1440 | return -EPERM; | ||
1441 | |||
1442 | *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs); | ||
1443 | if (!*new_nsp) | ||
1444 | return -ENOMEM; | ||
1445 | } | ||
1446 | |||
1447 | return 0; | ||
1448 | } | ||
1449 | |||
1450 | /* | ||
1451 | * Unsharing of sighand for tasks created with CLONE_SIGHAND is not | ||
1452 | * supported yet | ||
1453 | */ | ||
1454 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | ||
1455 | { | ||
1456 | struct sighand_struct *sigh = current->sighand; | ||
1457 | |||
1458 | if ((unshare_flags & CLONE_SIGHAND) && | ||
1459 | (sigh && atomic_read(&sigh->count) > 1)) | ||
1460 | return -EINVAL; | ||
1461 | else | ||
1462 | return 0; | ||
1463 | } | ||
1464 | |||
1465 | /* | ||
1466 | * Unshare vm if it is being shared | ||
1467 | */ | ||
1468 | static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) | ||
1469 | { | ||
1470 | struct mm_struct *mm = current->mm; | ||
1471 | |||
1472 | if ((unshare_flags & CLONE_VM) && | ||
1473 | (mm && atomic_read(&mm->mm_users) > 1)) { | ||
1474 | *new_mmp = dup_mm(current); | ||
1475 | if (!*new_mmp) | ||
1476 | return -ENOMEM; | ||
1477 | } | ||
1478 | |||
1479 | return 0; | ||
1480 | } | ||
1481 | |||
1482 | /* | ||
1483 | * Unshare file descriptor table if it is being shared | ||
1484 | */ | ||
1485 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) | ||
1486 | { | ||
1487 | struct files_struct *fd = current->files; | ||
1488 | int error = 0; | ||
1489 | |||
1490 | if ((unshare_flags & CLONE_FILES) && | ||
1491 | (fd && atomic_read(&fd->count) > 1)) { | ||
1492 | *new_fdp = dup_fd(fd, &error); | ||
1493 | if (!*new_fdp) | ||
1494 | return error; | ||
1495 | } | ||
1496 | |||
1497 | return 0; | ||
1498 | } | ||
1499 | |||
1500 | /* | ||
1501 | * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not | ||
1502 | * supported yet | ||
1503 | */ | ||
1504 | static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) | ||
1505 | { | ||
1506 | if (unshare_flags & CLONE_SYSVSEM) | ||
1507 | return -EINVAL; | ||
1508 | |||
1509 | return 0; | ||
1510 | } | ||
1511 | |||
1512 | /* | ||
1513 | * unshare allows a process to 'unshare' part of the process | ||
1514 | * context which was originally shared using clone. copy_* | ||
1515 | * functions used by do_fork() cannot be used here directly | ||
1516 | * because they modify an inactive task_struct that is being | ||
1517 | * constructed. Here we are modifying the current, active, | ||
1518 | * task_struct. | ||
1519 | */ | ||
1520 | asmlinkage long sys_unshare(unsigned long unshare_flags) | ||
1521 | { | ||
1522 | int err = 0; | ||
1523 | struct fs_struct *fs, *new_fs = NULL; | ||
1524 | struct namespace *ns, *new_ns = NULL; | ||
1525 | struct sighand_struct *sigh, *new_sigh = NULL; | ||
1526 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | ||
1527 | struct files_struct *fd, *new_fd = NULL; | ||
1528 | struct sem_undo_list *new_ulist = NULL; | ||
1529 | |||
1530 | check_unshare_flags(&unshare_flags); | ||
1531 | |||
1532 | if ((err = unshare_thread(unshare_flags))) | ||
1533 | goto bad_unshare_out; | ||
1534 | if ((err = unshare_fs(unshare_flags, &new_fs))) | ||
1535 | goto bad_unshare_cleanup_thread; | ||
1536 | if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs))) | ||
1537 | goto bad_unshare_cleanup_fs; | ||
1538 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | ||
1539 | goto bad_unshare_cleanup_ns; | ||
1540 | if ((err = unshare_vm(unshare_flags, &new_mm))) | ||
1541 | goto bad_unshare_cleanup_sigh; | ||
1542 | if ((err = unshare_fd(unshare_flags, &new_fd))) | ||
1543 | goto bad_unshare_cleanup_vm; | ||
1544 | if ((err = unshare_semundo(unshare_flags, &new_ulist))) | ||
1545 | goto bad_unshare_cleanup_fd; | ||
1546 | |||
1547 | if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) { | ||
1548 | |||
1549 | task_lock(current); | ||
1550 | |||
1551 | if (new_fs) { | ||
1552 | fs = current->fs; | ||
1553 | current->fs = new_fs; | ||
1554 | new_fs = fs; | ||
1555 | } | ||
1556 | |||
1557 | if (new_ns) { | ||
1558 | ns = current->namespace; | ||
1559 | current->namespace = new_ns; | ||
1560 | new_ns = ns; | ||
1561 | } | ||
1562 | |||
1563 | if (new_sigh) { | ||
1564 | sigh = current->sighand; | ||
1565 | current->sighand = new_sigh; | ||
1566 | new_sigh = sigh; | ||
1567 | } | ||
1568 | |||
1569 | if (new_mm) { | ||
1570 | mm = current->mm; | ||
1571 | active_mm = current->active_mm; | ||
1572 | current->mm = new_mm; | ||
1573 | current->active_mm = new_mm; | ||
1574 | activate_mm(active_mm, new_mm); | ||
1575 | new_mm = mm; | ||
1576 | } | ||
1577 | |||
1578 | if (new_fd) { | ||
1579 | fd = current->files; | ||
1580 | current->files = new_fd; | ||
1581 | new_fd = fd; | ||
1582 | } | ||
1583 | |||
1584 | task_unlock(current); | ||
1585 | } | ||
1586 | |||
1587 | bad_unshare_cleanup_fd: | ||
1588 | if (new_fd) | ||
1589 | put_files_struct(new_fd); | ||
1590 | |||
1591 | bad_unshare_cleanup_vm: | ||
1592 | if (new_mm) | ||
1593 | mmput(new_mm); | ||
1594 | |||
1595 | bad_unshare_cleanup_sigh: | ||
1596 | if (new_sigh) | ||
1597 | if (atomic_dec_and_test(&new_sigh->count)) | ||
1598 | kmem_cache_free(sighand_cachep, new_sigh); | ||
1599 | |||
1600 | bad_unshare_cleanup_ns: | ||
1601 | if (new_ns) | ||
1602 | put_namespace(new_ns); | ||
1603 | |||
1604 | bad_unshare_cleanup_fs: | ||
1605 | if (new_fs) | ||
1606 | put_fs_struct(new_fs); | ||
1607 | |||
1608 | bad_unshare_cleanup_thread: | ||
1609 | bad_unshare_out: | ||
1610 | return err; | ||
1611 | } | ||