diff options
| author | Steve French <sfrench@us.ibm.com> | 2006-02-10 11:53:29 -0500 |
|---|---|---|
| committer | Steve French <sfrench@us.ibm.com> | 2006-02-10 11:53:29 -0500 |
| commit | b580513e841d81eebf0d7b02f412be0882c2ce5f (patch) | |
| tree | 9336a14058eed66c335db10512f33a31f9e8c938 /kernel/fork.c | |
| parent | 04fdabe17c4840a4cd84c3589f20f5d4689b1ec5 (diff) | |
| parent | 418aade459f03318defd18ef0b11981a63bd81b0 (diff) | |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 394 |
1 files changed, 340 insertions, 54 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 7f0ab5ee948c..8e88b374cee9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -446,6 +446,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
| 446 | } | 446 | } |
| 447 | } | 447 | } |
| 448 | 448 | ||
| 449 | /* | ||
| 450 | * Allocate a new mm structure and copy contents from the | ||
| 451 | * mm structure of the passed in task structure. | ||
| 452 | */ | ||
| 453 | static struct mm_struct *dup_mm(struct task_struct *tsk) | ||
| 454 | { | ||
| 455 | struct mm_struct *mm, *oldmm = current->mm; | ||
| 456 | int err; | ||
| 457 | |||
| 458 | if (!oldmm) | ||
| 459 | return NULL; | ||
| 460 | |||
| 461 | mm = allocate_mm(); | ||
| 462 | if (!mm) | ||
| 463 | goto fail_nomem; | ||
| 464 | |||
| 465 | memcpy(mm, oldmm, sizeof(*mm)); | ||
| 466 | |||
| 467 | if (!mm_init(mm)) | ||
| 468 | goto fail_nomem; | ||
| 469 | |||
| 470 | if (init_new_context(tsk, mm)) | ||
| 471 | goto fail_nocontext; | ||
| 472 | |||
| 473 | err = dup_mmap(mm, oldmm); | ||
| 474 | if (err) | ||
| 475 | goto free_pt; | ||
| 476 | |||
| 477 | mm->hiwater_rss = get_mm_rss(mm); | ||
| 478 | mm->hiwater_vm = mm->total_vm; | ||
| 479 | |||
| 480 | return mm; | ||
| 481 | |||
| 482 | free_pt: | ||
| 483 | mmput(mm); | ||
| 484 | |||
| 485 | fail_nomem: | ||
| 486 | return NULL; | ||
| 487 | |||
| 488 | fail_nocontext: | ||
| 489 | /* | ||
| 490 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
| 491 | * because it calls destroy_context() | ||
| 492 | */ | ||
| 493 | mm_free_pgd(mm); | ||
| 494 | free_mm(mm); | ||
| 495 | return NULL; | ||
| 496 | } | ||
| 497 | |||
| 449 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | 498 | static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) |
| 450 | { | 499 | { |
| 451 | struct mm_struct * mm, *oldmm; | 500 | struct mm_struct * mm, *oldmm; |
| @@ -473,43 +522,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
| 473 | } | 522 | } |
| 474 | 523 | ||
| 475 | retval = -ENOMEM; | 524 | retval = -ENOMEM; |
| 476 | mm = allocate_mm(); | 525 | mm = dup_mm(tsk); |
| 477 | if (!mm) | 526 | if (!mm) |
| 478 | goto fail_nomem; | 527 | goto fail_nomem; |
| 479 | 528 | ||
| 480 | /* Copy the current MM stuff.. */ | ||
| 481 | memcpy(mm, oldmm, sizeof(*mm)); | ||
| 482 | if (!mm_init(mm)) | ||
| 483 | goto fail_nomem; | ||
| 484 | |||
| 485 | if (init_new_context(tsk,mm)) | ||
| 486 | goto fail_nocontext; | ||
| 487 | |||
| 488 | retval = dup_mmap(mm, oldmm); | ||
| 489 | if (retval) | ||
| 490 | goto free_pt; | ||
| 491 | |||
| 492 | mm->hiwater_rss = get_mm_rss(mm); | ||
| 493 | mm->hiwater_vm = mm->total_vm; | ||
| 494 | |||
| 495 | good_mm: | 529 | good_mm: |
| 496 | tsk->mm = mm; | 530 | tsk->mm = mm; |
| 497 | tsk->active_mm = mm; | 531 | tsk->active_mm = mm; |
| 498 | return 0; | 532 | return 0; |
| 499 | 533 | ||
| 500 | free_pt: | ||
| 501 | mmput(mm); | ||
| 502 | fail_nomem: | 534 | fail_nomem: |
| 503 | return retval; | 535 | return retval; |
| 504 | |||
| 505 | fail_nocontext: | ||
| 506 | /* | ||
| 507 | * If init_new_context() failed, we cannot use mmput() to free the mm | ||
| 508 | * because it calls destroy_context() | ||
| 509 | */ | ||
| 510 | mm_free_pgd(mm); | ||
| 511 | free_mm(mm); | ||
| 512 | return retval; | ||
| 513 | } | 536 | } |
| 514 | 537 | ||
| 515 | static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) | 538 | static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) |
| @@ -597,32 +620,17 @@ out: | |||
| 597 | return newf; | 620 | return newf; |
| 598 | } | 621 | } |
| 599 | 622 | ||
| 600 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | 623 | /* |
| 624 | * Allocate a new files structure and copy contents from the | ||
| 625 | * passed in files structure. | ||
| 626 | */ | ||
| 627 | static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | ||
| 601 | { | 628 | { |
| 602 | struct files_struct *oldf, *newf; | 629 | struct files_struct *newf; |
| 603 | struct file **old_fds, **new_fds; | 630 | struct file **old_fds, **new_fds; |
| 604 | int open_files, size, i, error = 0, expand; | 631 | int open_files, size, i, expand; |
| 605 | struct fdtable *old_fdt, *new_fdt; | 632 | struct fdtable *old_fdt, *new_fdt; |
| 606 | 633 | ||
| 607 | /* | ||
| 608 | * A background process may not have any files ... | ||
| 609 | */ | ||
| 610 | oldf = current->files; | ||
| 611 | if (!oldf) | ||
| 612 | goto out; | ||
| 613 | |||
| 614 | if (clone_flags & CLONE_FILES) { | ||
| 615 | atomic_inc(&oldf->count); | ||
| 616 | goto out; | ||
| 617 | } | ||
| 618 | |||
| 619 | /* | ||
| 620 | * Note: we may be using current for both targets (See exec.c) | ||
| 621 | * This works because we cache current->files (old) as oldf. Don't | ||
| 622 | * break this. | ||
| 623 | */ | ||
| 624 | tsk->files = NULL; | ||
| 625 | error = -ENOMEM; | ||
| 626 | newf = alloc_files(); | 634 | newf = alloc_files(); |
| 627 | if (!newf) | 635 | if (!newf) |
| 628 | goto out; | 636 | goto out; |
| @@ -651,9 +659,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
| 651 | if (expand) { | 659 | if (expand) { |
| 652 | spin_unlock(&oldf->file_lock); | 660 | spin_unlock(&oldf->file_lock); |
| 653 | spin_lock(&newf->file_lock); | 661 | spin_lock(&newf->file_lock); |
| 654 | error = expand_files(newf, open_files-1); | 662 | *errorp = expand_files(newf, open_files-1); |
| 655 | spin_unlock(&newf->file_lock); | 663 | spin_unlock(&newf->file_lock); |
| 656 | if (error < 0) | 664 | if (*errorp < 0) |
| 657 | goto out_release; | 665 | goto out_release; |
| 658 | new_fdt = files_fdtable(newf); | 666 | new_fdt = files_fdtable(newf); |
| 659 | /* | 667 | /* |
| @@ -702,10 +710,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | |||
| 702 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | 710 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); |
| 703 | } | 711 | } |
| 704 | 712 | ||
| 705 | tsk->files = newf; | ||
| 706 | error = 0; | ||
| 707 | out: | 713 | out: |
| 708 | return error; | 714 | return newf; |
| 709 | 715 | ||
| 710 | out_release: | 716 | out_release: |
| 711 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); | 717 | free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); |
| @@ -715,6 +721,40 @@ out_release: | |||
| 715 | goto out; | 721 | goto out; |
| 716 | } | 722 | } |
| 717 | 723 | ||
| 724 | static int copy_files(unsigned long clone_flags, struct task_struct * tsk) | ||
| 725 | { | ||
| 726 | struct files_struct *oldf, *newf; | ||
| 727 | int error = 0; | ||
| 728 | |||
| 729 | /* | ||
| 730 | * A background process may not have any files ... | ||
| 731 | */ | ||
| 732 | oldf = current->files; | ||
| 733 | if (!oldf) | ||
| 734 | goto out; | ||
| 735 | |||
| 736 | if (clone_flags & CLONE_FILES) { | ||
| 737 | atomic_inc(&oldf->count); | ||
| 738 | goto out; | ||
| 739 | } | ||
| 740 | |||
| 741 | /* | ||
| 742 | * Note: we may be using current for both targets (See exec.c) | ||
| 743 | * This works because we cache current->files (old) as oldf. Don't | ||
| 744 | * break this. | ||
| 745 | */ | ||
| 746 | tsk->files = NULL; | ||
| 747 | error = -ENOMEM; | ||
| 748 | newf = dup_fd(oldf, &error); | ||
| 749 | if (!newf) | ||
| 750 | goto out; | ||
| 751 | |||
| 752 | tsk->files = newf; | ||
| 753 | error = 0; | ||
| 754 | out: | ||
| 755 | return error; | ||
| 756 | } | ||
| 757 | |||
| 718 | /* | 758 | /* |
| 719 | * Helper to unshare the files of the current task. | 759 | * Helper to unshare the files of the current task. |
| 720 | * We don't want to expose copy_files internals to | 760 | * We don't want to expose copy_files internals to |
| @@ -1323,3 +1363,249 @@ void __init proc_caches_init(void) | |||
| 1323 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1363 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
| 1324 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); | 1364 | SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); |
| 1325 | } | 1365 | } |
| 1366 | |||
| 1367 | |||
| 1368 | /* | ||
| 1369 | * Check constraints on flags passed to the unshare system call and | ||
| 1370 | * force unsharing of additional process context as appropriate. | ||
| 1371 | */ | ||
| 1372 | static inline void check_unshare_flags(unsigned long *flags_ptr) | ||
| 1373 | { | ||
| 1374 | /* | ||
| 1375 | * If unsharing a thread from a thread group, must also | ||
| 1376 | * unshare vm. | ||
| 1377 | */ | ||
| 1378 | if (*flags_ptr & CLONE_THREAD) | ||
| 1379 | *flags_ptr |= CLONE_VM; | ||
| 1380 | |||
| 1381 | /* | ||
| 1382 | * If unsharing vm, must also unshare signal handlers. | ||
| 1383 | */ | ||
| 1384 | if (*flags_ptr & CLONE_VM) | ||
| 1385 | *flags_ptr |= CLONE_SIGHAND; | ||
| 1386 | |||
| 1387 | /* | ||
| 1388 | * If unsharing signal handlers and the task was created | ||
| 1389 | * using CLONE_THREAD, then must unshare the thread | ||
| 1390 | */ | ||
| 1391 | if ((*flags_ptr & CLONE_SIGHAND) && | ||
| 1392 | (atomic_read(¤t->signal->count) > 1)) | ||
| 1393 | *flags_ptr |= CLONE_THREAD; | ||
| 1394 | |||
| 1395 | /* | ||
| 1396 | * If unsharing namespace, must also unshare filesystem information. | ||
| 1397 | */ | ||
| 1398 | if (*flags_ptr & CLONE_NEWNS) | ||
| 1399 | *flags_ptr |= CLONE_FS; | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | /* | ||
| 1403 | * Unsharing of tasks created with CLONE_THREAD is not supported yet | ||
| 1404 | */ | ||
| 1405 | static int unshare_thread(unsigned long unshare_flags) | ||
| 1406 | { | ||
| 1407 | if (unshare_flags & CLONE_THREAD) | ||
| 1408 | return -EINVAL; | ||
| 1409 | |||
| 1410 | return 0; | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | /* | ||
| 1414 | * Unshare the filesystem structure if it is being shared | ||
| 1415 | */ | ||
| 1416 | static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) | ||
| 1417 | { | ||
| 1418 | struct fs_struct *fs = current->fs; | ||
| 1419 | |||
| 1420 | if ((unshare_flags & CLONE_FS) && | ||
| 1421 | (fs && atomic_read(&fs->count) > 1)) { | ||
| 1422 | *new_fsp = __copy_fs_struct(current->fs); | ||
| 1423 | if (!*new_fsp) | ||
| 1424 | return -ENOMEM; | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | return 0; | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | /* | ||
| 1431 | * Unshare the namespace structure if it is being shared | ||
| 1432 | */ | ||
| 1433 | static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) | ||
| 1434 | { | ||
| 1435 | struct namespace *ns = current->namespace; | ||
| 1436 | |||
| 1437 | if ((unshare_flags & CLONE_NEWNS) && | ||
| 1438 | (ns && atomic_read(&ns->count) > 1)) { | ||
| 1439 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1440 | return -EPERM; | ||
| 1441 | |||
| 1442 | *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs); | ||
| 1443 | if (!*new_nsp) | ||
| 1444 | return -ENOMEM; | ||
| 1445 | } | ||
| 1446 | |||
| 1447 | return 0; | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | /* | ||
| 1451 | * Unsharing of sighand for tasks created with CLONE_SIGHAND is not | ||
| 1452 | * supported yet | ||
| 1453 | */ | ||
| 1454 | static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) | ||
| 1455 | { | ||
| 1456 | struct sighand_struct *sigh = current->sighand; | ||
| 1457 | |||
| 1458 | if ((unshare_flags & CLONE_SIGHAND) && | ||
| 1459 | (sigh && atomic_read(&sigh->count) > 1)) | ||
| 1460 | return -EINVAL; | ||
| 1461 | else | ||
| 1462 | return 0; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | /* | ||
| 1466 | * Unshare vm if it is being shared | ||
| 1467 | */ | ||
| 1468 | static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) | ||
| 1469 | { | ||
| 1470 | struct mm_struct *mm = current->mm; | ||
| 1471 | |||
| 1472 | if ((unshare_flags & CLONE_VM) && | ||
| 1473 | (mm && atomic_read(&mm->mm_users) > 1)) { | ||
| 1474 | *new_mmp = dup_mm(current); | ||
| 1475 | if (!*new_mmp) | ||
| 1476 | return -ENOMEM; | ||
| 1477 | } | ||
| 1478 | |||
| 1479 | return 0; | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | /* | ||
| 1483 | * Unshare file descriptor table if it is being shared | ||
| 1484 | */ | ||
| 1485 | static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) | ||
| 1486 | { | ||
| 1487 | struct files_struct *fd = current->files; | ||
| 1488 | int error = 0; | ||
| 1489 | |||
| 1490 | if ((unshare_flags & CLONE_FILES) && | ||
| 1491 | (fd && atomic_read(&fd->count) > 1)) { | ||
| 1492 | *new_fdp = dup_fd(fd, &error); | ||
| 1493 | if (!*new_fdp) | ||
| 1494 | return error; | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | return 0; | ||
| 1498 | } | ||
| 1499 | |||
| 1500 | /* | ||
| 1501 | * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not | ||
| 1502 | * supported yet | ||
| 1503 | */ | ||
| 1504 | static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) | ||
| 1505 | { | ||
| 1506 | if (unshare_flags & CLONE_SYSVSEM) | ||
| 1507 | return -EINVAL; | ||
| 1508 | |||
| 1509 | return 0; | ||
| 1510 | } | ||
| 1511 | |||
| 1512 | /* | ||
| 1513 | * unshare allows a process to 'unshare' part of the process | ||
| 1514 | * context which was originally shared using clone. copy_* | ||
| 1515 | * functions used by do_fork() cannot be used here directly | ||
| 1516 | * because they modify an inactive task_struct that is being | ||
| 1517 | * constructed. Here we are modifying the current, active, | ||
| 1518 | * task_struct. | ||
| 1519 | */ | ||
| 1520 | asmlinkage long sys_unshare(unsigned long unshare_flags) | ||
| 1521 | { | ||
| 1522 | int err = 0; | ||
| 1523 | struct fs_struct *fs, *new_fs = NULL; | ||
| 1524 | struct namespace *ns, *new_ns = NULL; | ||
| 1525 | struct sighand_struct *sigh, *new_sigh = NULL; | ||
| 1526 | struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; | ||
| 1527 | struct files_struct *fd, *new_fd = NULL; | ||
| 1528 | struct sem_undo_list *new_ulist = NULL; | ||
| 1529 | |||
| 1530 | check_unshare_flags(&unshare_flags); | ||
| 1531 | |||
| 1532 | if ((err = unshare_thread(unshare_flags))) | ||
| 1533 | goto bad_unshare_out; | ||
| 1534 | if ((err = unshare_fs(unshare_flags, &new_fs))) | ||
| 1535 | goto bad_unshare_cleanup_thread; | ||
| 1536 | if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs))) | ||
| 1537 | goto bad_unshare_cleanup_fs; | ||
| 1538 | if ((err = unshare_sighand(unshare_flags, &new_sigh))) | ||
| 1539 | goto bad_unshare_cleanup_ns; | ||
| 1540 | if ((err = unshare_vm(unshare_flags, &new_mm))) | ||
| 1541 | goto bad_unshare_cleanup_sigh; | ||
| 1542 | if ((err = unshare_fd(unshare_flags, &new_fd))) | ||
| 1543 | goto bad_unshare_cleanup_vm; | ||
| 1544 | if ((err = unshare_semundo(unshare_flags, &new_ulist))) | ||
| 1545 | goto bad_unshare_cleanup_fd; | ||
| 1546 | |||
| 1547 | if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) { | ||
| 1548 | |||
| 1549 | task_lock(current); | ||
| 1550 | |||
| 1551 | if (new_fs) { | ||
| 1552 | fs = current->fs; | ||
| 1553 | current->fs = new_fs; | ||
| 1554 | new_fs = fs; | ||
| 1555 | } | ||
| 1556 | |||
| 1557 | if (new_ns) { | ||
| 1558 | ns = current->namespace; | ||
| 1559 | current->namespace = new_ns; | ||
| 1560 | new_ns = ns; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | if (new_sigh) { | ||
| 1564 | sigh = current->sighand; | ||
| 1565 | current->sighand = new_sigh; | ||
| 1566 | new_sigh = sigh; | ||
| 1567 | } | ||
| 1568 | |||
| 1569 | if (new_mm) { | ||
| 1570 | mm = current->mm; | ||
| 1571 | active_mm = current->active_mm; | ||
| 1572 | current->mm = new_mm; | ||
| 1573 | current->active_mm = new_mm; | ||
| 1574 | activate_mm(active_mm, new_mm); | ||
| 1575 | new_mm = mm; | ||
| 1576 | } | ||
| 1577 | |||
| 1578 | if (new_fd) { | ||
| 1579 | fd = current->files; | ||
| 1580 | current->files = new_fd; | ||
| 1581 | new_fd = fd; | ||
| 1582 | } | ||
| 1583 | |||
| 1584 | task_unlock(current); | ||
| 1585 | } | ||
| 1586 | |||
| 1587 | bad_unshare_cleanup_fd: | ||
| 1588 | if (new_fd) | ||
| 1589 | put_files_struct(new_fd); | ||
| 1590 | |||
| 1591 | bad_unshare_cleanup_vm: | ||
| 1592 | if (new_mm) | ||
| 1593 | mmput(new_mm); | ||
| 1594 | |||
| 1595 | bad_unshare_cleanup_sigh: | ||
| 1596 | if (new_sigh) | ||
| 1597 | if (atomic_dec_and_test(&new_sigh->count)) | ||
| 1598 | kmem_cache_free(sighand_cachep, new_sigh); | ||
| 1599 | |||
| 1600 | bad_unshare_cleanup_ns: | ||
| 1601 | if (new_ns) | ||
| 1602 | put_namespace(new_ns); | ||
| 1603 | |||
| 1604 | bad_unshare_cleanup_fs: | ||
| 1605 | if (new_fs) | ||
| 1606 | put_fs_struct(new_fs); | ||
| 1607 | |||
| 1608 | bad_unshare_cleanup_thread: | ||
| 1609 | bad_unshare_out: | ||
| 1610 | return err; | ||
| 1611 | } | ||
