aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c589
1 files changed, 287 insertions, 302 deletions
diff --git a/fs/namei.c b/fs/namei.c
index e05c243105a0..1c0fca6e899e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
22#include <linux/quotaops.h>
23#include <linux/pagemap.h> 22#include <linux/pagemap.h>
24#include <linux/fsnotify.h> 23#include <linux/fsnotify.h>
25#include <linux/personality.h> 24#include <linux/personality.h>
@@ -498,8 +497,6 @@ static int link_path_walk(const char *, struct nameidata *);
498 497
499static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 498static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
500{ 499{
501 int res = 0;
502 char *name;
503 if (IS_ERR(link)) 500 if (IS_ERR(link))
504 goto fail; 501 goto fail;
505 502
@@ -510,22 +507,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
510 path_get(&nd->root); 507 path_get(&nd->root);
511 } 508 }
512 509
513 res = link_path_walk(link, nd); 510 return link_path_walk(link, nd);
514 if (nd->depth || res || nd->last_type!=LAST_NORM)
515 return res;
516 /*
517 * If it is an iterative symlinks resolution in open_namei() we
518 * have to copy the last component. And all that crap because of
519 * bloody create() on broken symlinks. Furrfu...
520 */
521 name = __getname();
522 if (unlikely(!name)) {
523 path_put(&nd->path);
524 return -ENOMEM;
525 }
526 strcpy(name, nd->last.name);
527 nd->last.name = name;
528 return 0;
529fail: 511fail:
530 path_put(&nd->path); 512 path_put(&nd->path);
531 return PTR_ERR(link); 513 return PTR_ERR(link);
@@ -547,10 +529,10 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
547 nd->path.dentry = path->dentry; 529 nd->path.dentry = path->dentry;
548} 530}
549 531
550static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 532static __always_inline int
533__do_follow_link(struct path *path, struct nameidata *nd, void **p)
551{ 534{
552 int error; 535 int error;
553 void *cookie;
554 struct dentry *dentry = path->dentry; 536 struct dentry *dentry = path->dentry;
555 537
556 touch_atime(path->mnt, dentry); 538 touch_atime(path->mnt, dentry);
@@ -562,9 +544,9 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
562 } 544 }
563 mntget(path->mnt); 545 mntget(path->mnt);
564 nd->last_type = LAST_BIND; 546 nd->last_type = LAST_BIND;
565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 547 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
566 error = PTR_ERR(cookie); 548 error = PTR_ERR(*p);
567 if (!IS_ERR(cookie)) { 549 if (!IS_ERR(*p)) {
568 char *s = nd_get_link(nd); 550 char *s = nd_get_link(nd);
569 error = 0; 551 error = 0;
570 if (s) 552 if (s)
@@ -574,8 +556,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
574 if (error) 556 if (error)
575 path_put(&nd->path); 557 path_put(&nd->path);
576 } 558 }
577 if (dentry->d_inode->i_op->put_link)
578 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
579 } 559 }
580 return error; 560 return error;
581} 561}
@@ -589,6 +569,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
589 */ 569 */
590static inline int do_follow_link(struct path *path, struct nameidata *nd) 570static inline int do_follow_link(struct path *path, struct nameidata *nd)
591{ 571{
572 void *cookie;
592 int err = -ELOOP; 573 int err = -ELOOP;
593 if (current->link_count >= MAX_NESTED_LINKS) 574 if (current->link_count >= MAX_NESTED_LINKS)
594 goto loop; 575 goto loop;
@@ -602,7 +583,9 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
602 current->link_count++; 583 current->link_count++;
603 current->total_link_count++; 584 current->total_link_count++;
604 nd->depth++; 585 nd->depth++;
605 err = __do_follow_link(path, nd); 586 err = __do_follow_link(path, nd, &cookie);
587 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
588 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
606 path_put(path); 589 path_put(path);
607 current->link_count--; 590 current->link_count--;
608 nd->depth--; 591 nd->depth--;
@@ -689,33 +672,20 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
689 set_root(nd); 672 set_root(nd);
690 673
691 while(1) { 674 while(1) {
692 struct vfsmount *parent;
693 struct dentry *old = nd->path.dentry; 675 struct dentry *old = nd->path.dentry;
694 676
695 if (nd->path.dentry == nd->root.dentry && 677 if (nd->path.dentry == nd->root.dentry &&
696 nd->path.mnt == nd->root.mnt) { 678 nd->path.mnt == nd->root.mnt) {
697 break; 679 break;
698 } 680 }
699 spin_lock(&dcache_lock);
700 if (nd->path.dentry != nd->path.mnt->mnt_root) { 681 if (nd->path.dentry != nd->path.mnt->mnt_root) {
701 nd->path.dentry = dget(nd->path.dentry->d_parent); 682 /* rare case of legitimate dget_parent()... */
702 spin_unlock(&dcache_lock); 683 nd->path.dentry = dget_parent(nd->path.dentry);
703 dput(old); 684 dput(old);
704 break; 685 break;
705 } 686 }
706 spin_unlock(&dcache_lock); 687 if (!follow_up(&nd->path))
707 spin_lock(&vfsmount_lock);
708 parent = nd->path.mnt->mnt_parent;
709 if (parent == nd->path.mnt) {
710 spin_unlock(&vfsmount_lock);
711 break; 688 break;
712 }
713 mntget(parent);
714 nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
715 spin_unlock(&vfsmount_lock);
716 dput(old);
717 mntput(nd->path.mnt);
718 nd->path.mnt = parent;
719 } 689 }
720 follow_mount(&nd->path); 690 follow_mount(&nd->path);
721} 691}
@@ -823,6 +793,17 @@ fail:
823} 793}
824 794
825/* 795/*
796 * This is a temporary kludge to deal with "automount" symlinks; proper
797 * solution is to trigger them on follow_mount(), so that do_lookup()
798 * would DTRT. To be killed before 2.6.34-final.
799 */
800static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
801{
802 return inode && unlikely(inode->i_op->follow_link) &&
803 ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
804}
805
806/*
826 * Name resolution. 807 * Name resolution.
827 * This is the basic name resolution function, turning a pathname into 808 * This is the basic name resolution function, turning a pathname into
828 * the final dentry. We expect 'base' to be positive and a directory. 809 * the final dentry. We expect 'base' to be positive and a directory.
@@ -942,8 +923,7 @@ last_component:
942 if (err) 923 if (err)
943 break; 924 break;
944 inode = next.dentry->d_inode; 925 inode = next.dentry->d_inode;
945 if ((lookup_flags & LOOKUP_FOLLOW) 926 if (follow_on_final(inode, lookup_flags)) {
946 && inode && inode->i_op->follow_link) {
947 err = do_follow_link(&next, nd); 927 err = do_follow_link(&next, nd);
948 if (err) 928 if (err)
949 goto return_err; 929 goto return_err;
@@ -1337,7 +1317,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1337 return -ENOENT; 1317 return -ENOENT;
1338 1318
1339 BUG_ON(victim->d_parent->d_inode != dir); 1319 BUG_ON(victim->d_parent->d_inode != dir);
1340 audit_inode_child(victim->d_name.name, victim, dir); 1320 audit_inode_child(victim, dir);
1341 1321
1342 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 1322 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1343 if (error) 1323 if (error)
@@ -1378,22 +1358,6 @@ static inline int may_create(struct inode *dir, struct dentry *child)
1378 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 1358 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1379} 1359}
1380 1360
1381/*
1382 * O_DIRECTORY translates into forcing a directory lookup.
1383 */
1384static inline int lookup_flags(unsigned int f)
1385{
1386 unsigned long retval = LOOKUP_FOLLOW;
1387
1388 if (f & O_NOFOLLOW)
1389 retval &= ~LOOKUP_FOLLOW;
1390
1391 if (f & O_DIRECTORY)
1392 retval |= LOOKUP_DIRECTORY;
1393
1394 return retval;
1395}
1396
1397/* 1361/*
1398 * p1 and p2 should be directories on the same fs. 1362 * p1 and p2 should be directories on the same fs.
1399 */ 1363 */
@@ -1451,7 +1415,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1451 error = security_inode_create(dir, dentry, mode); 1415 error = security_inode_create(dir, dentry, mode);
1452 if (error) 1416 if (error)
1453 return error; 1417 return error;
1454 vfs_dq_init(dir);
1455 error = dir->i_op->create(dir, dentry, mode, nd); 1418 error = dir->i_op->create(dir, dentry, mode, nd);
1456 if (!error) 1419 if (!error)
1457 fsnotify_create(dir, dentry); 1420 fsnotify_create(dir, dentry);
@@ -1493,7 +1456,7 @@ int may_open(struct path *path, int acc_mode, int flag)
1493 * An append-only file must be opened in append mode for writing. 1456 * An append-only file must be opened in append mode for writing.
1494 */ 1457 */
1495 if (IS_APPEND(inode)) { 1458 if (IS_APPEND(inode)) {
1496 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1459 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
1497 return -EPERM; 1460 return -EPERM;
1498 if (flag & O_TRUNC) 1461 if (flag & O_TRUNC)
1499 return -EPERM; 1462 return -EPERM;
@@ -1537,7 +1500,7 @@ static int handle_truncate(struct path *path)
1537 * what get passed to sys_open(). 1500 * what get passed to sys_open().
1538 */ 1501 */
1539static int __open_namei_create(struct nameidata *nd, struct path *path, 1502static int __open_namei_create(struct nameidata *nd, struct path *path,
1540 int flag, int mode) 1503 int open_flag, int mode)
1541{ 1504{
1542 int error; 1505 int error;
1543 struct dentry *dir = nd->path.dentry; 1506 struct dentry *dir = nd->path.dentry;
@@ -1555,7 +1518,7 @@ out_unlock:
1555 if (error) 1518 if (error)
1556 return error; 1519 return error;
1557 /* Don't check for write permission, don't truncate */ 1520 /* Don't check for write permission, don't truncate */
1558 return may_open(&nd->path, 0, flag & ~O_TRUNC); 1521 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
1559} 1522}
1560 1523
1561/* 1524/*
@@ -1593,129 +1556,132 @@ static int open_will_truncate(int flag, struct inode *inode)
1593 return (flag & O_TRUNC); 1556 return (flag & O_TRUNC);
1594} 1557}
1595 1558
1596/* 1559static struct file *finish_open(struct nameidata *nd,
1597 * Note that the low bits of the passed in "open_flag" 1560 int open_flag, int acc_mode)
1598 * are not the same as in the local variable "flag". See
1599 * open_to_namei_flags() for more details.
1600 */
1601struct file *do_filp_open(int dfd, const char *pathname,
1602 int open_flag, int mode, int acc_mode)
1603{ 1561{
1604 struct file *filp; 1562 struct file *filp;
1605 struct nameidata nd;
1606 int error;
1607 struct path path;
1608 struct dentry *dir;
1609 int count = 0;
1610 int will_truncate; 1563 int will_truncate;
1611 int flag = open_to_namei_flags(open_flag); 1564 int error;
1612 int force_reval = 0;
1613 1565
1566 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
1567 if (will_truncate) {
1568 error = mnt_want_write(nd->path.mnt);
1569 if (error)
1570 goto exit;
1571 }
1572 error = may_open(&nd->path, acc_mode, open_flag);
1573 if (error) {
1574 if (will_truncate)
1575 mnt_drop_write(nd->path.mnt);
1576 goto exit;
1577 }
1578 filp = nameidata_to_filp(nd);
1579 if (!IS_ERR(filp)) {
1580 error = ima_file_check(filp, acc_mode);
1581 if (error) {
1582 fput(filp);
1583 filp = ERR_PTR(error);
1584 }
1585 }
1586 if (!IS_ERR(filp)) {
1587 if (will_truncate) {
1588 error = handle_truncate(&nd->path);
1589 if (error) {
1590 fput(filp);
1591 filp = ERR_PTR(error);
1592 }
1593 }
1594 }
1614 /* 1595 /*
1615 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1596 * It is now safe to drop the mnt write
1616 * check for O_DSYNC if the need any syncing at all we enforce it's 1597 * because the filp has had a write taken
1617 * always set instead of having to deal with possibly weird behaviour 1598 * on its behalf.
1618 * for malicious applications setting only __O_SYNC.
1619 */ 1599 */
1620 if (open_flag & __O_SYNC) 1600 if (will_truncate)
1621 open_flag |= O_DSYNC; 1601 mnt_drop_write(nd->path.mnt);
1622 1602 return filp;
1623 if (!acc_mode)
1624 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1625 1603
1626 /* O_TRUNC implies we need access checks for write permissions */ 1604exit:
1627 if (flag & O_TRUNC) 1605 if (!IS_ERR(nd->intent.open.file))
1628 acc_mode |= MAY_WRITE; 1606 release_open_intent(nd);
1607 path_put(&nd->path);
1608 return ERR_PTR(error);
1609}
1629 1610
1630 /* Allow the LSM permission hook to distinguish append 1611static struct file *do_last(struct nameidata *nd, struct path *path,
1631 access from general write access. */ 1612 int open_flag, int acc_mode,
1632 if (flag & O_APPEND) 1613 int mode, const char *pathname,
1633 acc_mode |= MAY_APPEND; 1614 int *want_dir)
1615{
1616 struct dentry *dir = nd->path.dentry;
1617 struct file *filp;
1618 int error = -EISDIR;
1634 1619
1635 /* 1620 switch (nd->last_type) {
1636 * The simplest case - just a plain lookup. 1621 case LAST_DOTDOT:
1637 */ 1622 follow_dotdot(nd);
1638 if (!(flag & O_CREAT)) { 1623 dir = nd->path.dentry;
1639 filp = get_empty_filp(); 1624 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
1640 1625 if (!dir->d_op->d_revalidate(dir, nd)) {
1641 if (filp == NULL) 1626 error = -ESTALE;
1642 return ERR_PTR(-ENFILE); 1627 goto exit;
1643 nd.intent.open.file = filp;
1644 filp->f_flags = open_flag;
1645 nd.intent.open.flags = flag;
1646 nd.intent.open.create_mode = 0;
1647 error = do_path_lookup(dfd, pathname,
1648 lookup_flags(flag)|LOOKUP_OPEN, &nd);
1649 if (IS_ERR(nd.intent.open.file)) {
1650 if (error == 0) {
1651 error = PTR_ERR(nd.intent.open.file);
1652 path_put(&nd.path);
1653 } 1628 }
1654 } else if (error) 1629 }
1655 release_open_intent(&nd); 1630 /* fallthrough */
1656 if (error) 1631 case LAST_DOT:
1657 return ERR_PTR(error); 1632 case LAST_ROOT:
1633 if (open_flag & O_CREAT)
1634 goto exit;
1635 /* fallthrough */
1636 case LAST_BIND:
1637 audit_inode(pathname, dir);
1658 goto ok; 1638 goto ok;
1659 } 1639 }
1660 1640
1661 /* 1641 /* trailing slashes? */
1662 * Create - we need to know the parent. 1642 if (nd->last.name[nd->last.len]) {
1663 */ 1643 if (open_flag & O_CREAT)
1664reval: 1644 goto exit;
1665 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1645 *want_dir = 1;
1666 if (error)
1667 return ERR_PTR(error);
1668 if (force_reval)
1669 nd.flags |= LOOKUP_REVAL;
1670 error = path_walk(pathname, &nd);
1671 if (error) {
1672 if (nd.root.mnt)
1673 path_put(&nd.root);
1674 return ERR_PTR(error);
1675 } 1646 }
1676 if (unlikely(!audit_dummy_context()))
1677 audit_inode(pathname, nd.path.dentry);
1678 1647
1679 /* 1648 /* just plain open? */
1680 * We have the parent and last component. First of all, check 1649 if (!(open_flag & O_CREAT)) {
1681 * that we are not asked to creat(2) an obvious directory - that 1650 error = do_lookup(nd, &nd->last, path);
1682 * will not do. 1651 if (error)
1683 */ 1652 goto exit;
1684 error = -EISDIR; 1653 error = -ENOENT;
1685 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1654 if (!path->dentry->d_inode)
1686 goto exit_parent; 1655 goto exit_dput;
1656 if (path->dentry->d_inode->i_op->follow_link)
1657 return NULL;
1658 error = -ENOTDIR;
1659 if (*want_dir && !path->dentry->d_inode->i_op->lookup)
1660 goto exit_dput;
1661 path_to_nameidata(path, nd);
1662 audit_inode(pathname, nd->path.dentry);
1663 goto ok;
1664 }
1687 1665
1688 error = -ENFILE; 1666 /* OK, it's O_CREAT */
1689 filp = get_empty_filp();
1690 if (filp == NULL)
1691 goto exit_parent;
1692 nd.intent.open.file = filp;
1693 filp->f_flags = open_flag;
1694 nd.intent.open.flags = flag;
1695 nd.intent.open.create_mode = mode;
1696 dir = nd.path.dentry;
1697 nd.flags &= ~LOOKUP_PARENT;
1698 nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
1699 if (flag & O_EXCL)
1700 nd.flags |= LOOKUP_EXCL;
1701 mutex_lock(&dir->d_inode->i_mutex); 1667 mutex_lock(&dir->d_inode->i_mutex);
1702 path.dentry = lookup_hash(&nd);
1703 path.mnt = nd.path.mnt;
1704 1668
1705do_last: 1669 path->dentry = lookup_hash(nd);
1706 error = PTR_ERR(path.dentry); 1670 path->mnt = nd->path.mnt;
1707 if (IS_ERR(path.dentry)) { 1671
1672 error = PTR_ERR(path->dentry);
1673 if (IS_ERR(path->dentry)) {
1708 mutex_unlock(&dir->d_inode->i_mutex); 1674 mutex_unlock(&dir->d_inode->i_mutex);
1709 goto exit; 1675 goto exit;
1710 } 1676 }
1711 1677
1712 if (IS_ERR(nd.intent.open.file)) { 1678 if (IS_ERR(nd->intent.open.file)) {
1713 error = PTR_ERR(nd.intent.open.file); 1679 error = PTR_ERR(nd->intent.open.file);
1714 goto exit_mutex_unlock; 1680 goto exit_mutex_unlock;
1715 } 1681 }
1716 1682
1717 /* Negative dentry, just create the file */ 1683 /* Negative dentry, just create the file */
1718 if (!path.dentry->d_inode) { 1684 if (!path->dentry->d_inode) {
1719 /* 1685 /*
1720 * This write is needed to ensure that a 1686 * This write is needed to ensure that a
1721 * ro->rw transition does not occur between 1687 * ro->rw transition does not occur between
@@ -1723,21 +1689,18 @@ do_last:
1723 * a permanent write count is taken through 1689 * a permanent write count is taken through
1724 * the 'struct file' in nameidata_to_filp(). 1690 * the 'struct file' in nameidata_to_filp().
1725 */ 1691 */
1726 error = mnt_want_write(nd.path.mnt); 1692 error = mnt_want_write(nd->path.mnt);
1727 if (error) 1693 if (error)
1728 goto exit_mutex_unlock; 1694 goto exit_mutex_unlock;
1729 error = __open_namei_create(&nd, &path, flag, mode); 1695 error = __open_namei_create(nd, path, open_flag, mode);
1730 if (error) { 1696 if (error) {
1731 mnt_drop_write(nd.path.mnt); 1697 mnt_drop_write(nd->path.mnt);
1732 goto exit; 1698 goto exit;
1733 } 1699 }
1734 filp = nameidata_to_filp(&nd); 1700 filp = nameidata_to_filp(nd);
1735 mnt_drop_write(nd.path.mnt); 1701 mnt_drop_write(nd->path.mnt);
1736 if (nd.root.mnt)
1737 path_put(&nd.root);
1738 if (!IS_ERR(filp)) { 1702 if (!IS_ERR(filp)) {
1739 error = ima_path_check(&filp->f_path, filp->f_mode & 1703 error = ima_file_check(filp, acc_mode);
1740 (MAY_READ | MAY_WRITE | MAY_EXEC));
1741 if (error) { 1704 if (error) {
1742 fput(filp); 1705 fput(filp);
1743 filp = ERR_PTR(error); 1706 filp = ERR_PTR(error);
@@ -1750,151 +1713,181 @@ do_last:
1750 * It already exists. 1713 * It already exists.
1751 */ 1714 */
1752 mutex_unlock(&dir->d_inode->i_mutex); 1715 mutex_unlock(&dir->d_inode->i_mutex);
1753 audit_inode(pathname, path.dentry); 1716 audit_inode(pathname, path->dentry);
1754 1717
1755 error = -EEXIST; 1718 error = -EEXIST;
1756 if (flag & O_EXCL) 1719 if (open_flag & O_EXCL)
1757 goto exit_dput; 1720 goto exit_dput;
1758 1721
1759 if (__follow_mount(&path)) { 1722 if (__follow_mount(path)) {
1760 error = -ELOOP; 1723 error = -ELOOP;
1761 if (flag & O_NOFOLLOW) 1724 if (open_flag & O_NOFOLLOW)
1762 goto exit_dput; 1725 goto exit_dput;
1763 } 1726 }
1764 1727
1765 error = -ENOENT; 1728 error = -ENOENT;
1766 if (!path.dentry->d_inode) 1729 if (!path->dentry->d_inode)
1767 goto exit_dput; 1730 goto exit_dput;
1768 if (path.dentry->d_inode->i_op->follow_link)
1769 goto do_link;
1770 1731
1771 path_to_nameidata(&path, &nd); 1732 if (path->dentry->d_inode->i_op->follow_link)
1733 return NULL;
1734
1735 path_to_nameidata(path, nd);
1772 error = -EISDIR; 1736 error = -EISDIR;
1773 if (S_ISDIR(path.dentry->d_inode->i_mode)) 1737 if (S_ISDIR(path->dentry->d_inode->i_mode))
1774 goto exit; 1738 goto exit;
1775ok: 1739ok:
1740 filp = finish_open(nd, open_flag, acc_mode);
1741 return filp;
1742
1743exit_mutex_unlock:
1744 mutex_unlock(&dir->d_inode->i_mutex);
1745exit_dput:
1746 path_put_conditional(path, nd);
1747exit:
1748 if (!IS_ERR(nd->intent.open.file))
1749 release_open_intent(nd);
1750 path_put(&nd->path);
1751 return ERR_PTR(error);
1752}
1753
1754/*
1755 * Note that the low bits of the passed in "open_flag"
1756 * are not the same as in the local variable "flag". See
1757 * open_to_namei_flags() for more details.
1758 */
1759struct file *do_filp_open(int dfd, const char *pathname,
1760 int open_flag, int mode, int acc_mode)
1761{
1762 struct file *filp;
1763 struct nameidata nd;
1764 int error;
1765 struct path path;
1766 int count = 0;
1767 int flag = open_to_namei_flags(open_flag);
1768 int force_reval = 0;
1769 int want_dir = open_flag & O_DIRECTORY;
1770
1771 if (!(open_flag & O_CREAT))
1772 mode = 0;
1773
1776 /* 1774 /*
1777 * Consider: 1775 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1778 * 1. may_open() truncates a file 1776 * check for O_DSYNC if the need any syncing at all we enforce it's
1779 * 2. a rw->ro mount transition occurs 1777 * always set instead of having to deal with possibly weird behaviour
1780 * 3. nameidata_to_filp() fails due to 1778 * for malicious applications setting only __O_SYNC.
1781 * the ro mount.
1782 * That would be inconsistent, and should
1783 * be avoided. Taking this mnt write here
1784 * ensures that (2) can not occur.
1785 */ 1779 */
1786 will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode); 1780 if (open_flag & __O_SYNC)
1787 if (will_truncate) { 1781 open_flag |= O_DSYNC;
1788 error = mnt_want_write(nd.path.mnt); 1782
1789 if (error) 1783 if (!acc_mode)
1790 goto exit; 1784 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1791 } 1785
1792 error = may_open(&nd.path, acc_mode, flag); 1786 /* O_TRUNC implies we need access checks for write permissions */
1787 if (open_flag & O_TRUNC)
1788 acc_mode |= MAY_WRITE;
1789
1790 /* Allow the LSM permission hook to distinguish append
1791 access from general write access. */
1792 if (open_flag & O_APPEND)
1793 acc_mode |= MAY_APPEND;
1794
1795 /* find the parent */
1796reval:
1797 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1798 if (error)
1799 return ERR_PTR(error);
1800 if (force_reval)
1801 nd.flags |= LOOKUP_REVAL;
1802
1803 current->total_link_count = 0;
1804 error = link_path_walk(pathname, &nd);
1793 if (error) { 1805 if (error) {
1794 if (will_truncate) 1806 filp = ERR_PTR(error);
1795 mnt_drop_write(nd.path.mnt); 1807 goto out;
1796 goto exit;
1797 }
1798 filp = nameidata_to_filp(&nd);
1799 if (!IS_ERR(filp)) {
1800 error = ima_path_check(&filp->f_path, filp->f_mode &
1801 (MAY_READ | MAY_WRITE | MAY_EXEC));
1802 if (error) {
1803 fput(filp);
1804 filp = ERR_PTR(error);
1805 }
1806 } 1808 }
1807 if (!IS_ERR(filp)) { 1809 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
1808 if (acc_mode & MAY_WRITE) 1810 audit_inode(pathname, nd.path.dentry);
1809 vfs_dq_init(nd.path.dentry->d_inode);
1810 1811
1811 if (will_truncate) {
1812 error = handle_truncate(&nd.path);
1813 if (error) {
1814 fput(filp);
1815 filp = ERR_PTR(error);
1816 }
1817 }
1818 }
1819 /* 1812 /*
1820 * It is now safe to drop the mnt write 1813 * We have the parent and last component.
1821 * because the filp has had a write taken
1822 * on its behalf.
1823 */ 1814 */
1824 if (will_truncate) 1815
1825 mnt_drop_write(nd.path.mnt); 1816 error = -ENFILE;
1817 filp = get_empty_filp();
1818 if (filp == NULL)
1819 goto exit_parent;
1820 nd.intent.open.file = filp;
1821 filp->f_flags = open_flag;
1822 nd.intent.open.flags = flag;
1823 nd.intent.open.create_mode = mode;
1824 nd.flags &= ~LOOKUP_PARENT;
1825 nd.flags |= LOOKUP_OPEN;
1826 if (open_flag & O_CREAT) {
1827 nd.flags |= LOOKUP_CREATE;
1828 if (open_flag & O_EXCL)
1829 nd.flags |= LOOKUP_EXCL;
1830 }
1831 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
1832 while (unlikely(!filp)) { /* trailing symlink */
1833 struct path holder;
1834 struct inode *inode = path.dentry->d_inode;
1835 void *cookie;
1836 error = -ELOOP;
1837 /* S_ISDIR part is a temporary automount kludge */
1838 if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
1839 goto exit_dput;
1840 if (count++ == 32)
1841 goto exit_dput;
1842 /*
1843 * This is subtle. Instead of calling do_follow_link() we do
1844 * the thing by hands. The reason is that this way we have zero
1845 * link_count and path_walk() (called from ->follow_link)
1846 * honoring LOOKUP_PARENT. After that we have the parent and
1847 * last component, i.e. we are in the same situation as after
1848 * the first path_walk(). Well, almost - if the last component
1849 * is normal we get its copy stored in nd->last.name and we will
1850 * have to putname() it when we are done. Procfs-like symlinks
1851 * just set LAST_BIND.
1852 */
1853 nd.flags |= LOOKUP_PARENT;
1854 error = security_inode_follow_link(path.dentry, &nd);
1855 if (error)
1856 goto exit_dput;
1857 error = __do_follow_link(&path, &nd, &cookie);
1858 if (unlikely(error)) {
1859 /* nd.path had been dropped */
1860 if (!IS_ERR(cookie) && inode->i_op->put_link)
1861 inode->i_op->put_link(path.dentry, &nd, cookie);
1862 path_put(&path);
1863 release_open_intent(&nd);
1864 filp = ERR_PTR(error);
1865 goto out;
1866 }
1867 holder = path;
1868 nd.flags &= ~LOOKUP_PARENT;
1869 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname, &want_dir);
1870 if (inode->i_op->put_link)
1871 inode->i_op->put_link(holder.dentry, &nd, cookie);
1872 path_put(&holder);
1873 }
1874out:
1826 if (nd.root.mnt) 1875 if (nd.root.mnt)
1827 path_put(&nd.root); 1876 path_put(&nd.root);
1877 if (filp == ERR_PTR(-ESTALE) && !force_reval) {
1878 force_reval = 1;
1879 goto reval;
1880 }
1828 return filp; 1881 return filp;
1829 1882
1830exit_mutex_unlock:
1831 mutex_unlock(&dir->d_inode->i_mutex);
1832exit_dput: 1883exit_dput:
1833 path_put_conditional(&path, &nd); 1884 path_put_conditional(&path, &nd);
1834exit:
1835 if (!IS_ERR(nd.intent.open.file)) 1885 if (!IS_ERR(nd.intent.open.file))
1836 release_open_intent(&nd); 1886 release_open_intent(&nd);
1837exit_parent: 1887exit_parent:
1838 if (nd.root.mnt)
1839 path_put(&nd.root);
1840 path_put(&nd.path); 1888 path_put(&nd.path);
1841 return ERR_PTR(error); 1889 filp = ERR_PTR(error);
1842 1890 goto out;
1843do_link:
1844 error = -ELOOP;
1845 if (flag & O_NOFOLLOW)
1846 goto exit_dput;
1847 /*
1848 * This is subtle. Instead of calling do_follow_link() we do the
1849 * thing by hands. The reason is that this way we have zero link_count
1850 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1851 * After that we have the parent and last component, i.e.
1852 * we are in the same situation as after the first path_walk().
1853 * Well, almost - if the last component is normal we get its copy
1854 * stored in nd->last.name and we will have to putname() it when we
1855 * are done. Procfs-like symlinks just set LAST_BIND.
1856 */
1857 nd.flags |= LOOKUP_PARENT;
1858 error = security_inode_follow_link(path.dentry, &nd);
1859 if (error)
1860 goto exit_dput;
1861 error = __do_follow_link(&path, &nd);
1862 path_put(&path);
1863 if (error) {
1864 /* Does someone understand code flow here? Or it is only
1865 * me so stupid? Anathema to whoever designed this non-sense
1866 * with "intent.open".
1867 */
1868 release_open_intent(&nd);
1869 if (nd.root.mnt)
1870 path_put(&nd.root);
1871 if (error == -ESTALE && !force_reval) {
1872 force_reval = 1;
1873 goto reval;
1874 }
1875 return ERR_PTR(error);
1876 }
1877 nd.flags &= ~LOOKUP_PARENT;
1878 if (nd.last_type == LAST_BIND)
1879 goto ok;
1880 error = -EISDIR;
1881 if (nd.last_type != LAST_NORM)
1882 goto exit;
1883 if (nd.last.name[nd.last.len]) {
1884 __putname(nd.last.name);
1885 goto exit;
1886 }
1887 error = -ELOOP;
1888 if (count++==32) {
1889 __putname(nd.last.name);
1890 goto exit;
1891 }
1892 dir = nd.path.dentry;
1893 mutex_lock(&dir->d_inode->i_mutex);
1894 path.dentry = lookup_hash(&nd);
1895 path.mnt = nd.path.mnt;
1896 __putname(nd.last.name);
1897 goto do_last;
1898} 1891}
1899 1892
1900/** 1893/**
@@ -1988,7 +1981,6 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1988 if (error) 1981 if (error)
1989 return error; 1982 return error;
1990 1983
1991 vfs_dq_init(dir);
1992 error = dir->i_op->mknod(dir, dentry, mode, dev); 1984 error = dir->i_op->mknod(dir, dentry, mode, dev);
1993 if (!error) 1985 if (!error)
1994 fsnotify_create(dir, dentry); 1986 fsnotify_create(dir, dentry);
@@ -2087,7 +2079,6 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2087 if (error) 2079 if (error)
2088 return error; 2080 return error;
2089 2081
2090 vfs_dq_init(dir);
2091 error = dir->i_op->mkdir(dir, dentry, mode); 2082 error = dir->i_op->mkdir(dir, dentry, mode);
2092 if (!error) 2083 if (!error)
2093 fsnotify_mkdir(dir, dentry); 2084 fsnotify_mkdir(dir, dentry);
@@ -2173,8 +2164,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2173 if (!dir->i_op->rmdir) 2164 if (!dir->i_op->rmdir)
2174 return -EPERM; 2165 return -EPERM;
2175 2166
2176 vfs_dq_init(dir);
2177
2178 mutex_lock(&dentry->d_inode->i_mutex); 2167 mutex_lock(&dentry->d_inode->i_mutex);
2179 dentry_unhash(dentry); 2168 dentry_unhash(dentry);
2180 if (d_mountpoint(dentry)) 2169 if (d_mountpoint(dentry))
@@ -2260,15 +2249,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2260 if (!dir->i_op->unlink) 2249 if (!dir->i_op->unlink)
2261 return -EPERM; 2250 return -EPERM;
2262 2251
2263 vfs_dq_init(dir);
2264
2265 mutex_lock(&dentry->d_inode->i_mutex); 2252 mutex_lock(&dentry->d_inode->i_mutex);
2266 if (d_mountpoint(dentry)) 2253 if (d_mountpoint(dentry))
2267 error = -EBUSY; 2254 error = -EBUSY;
2268 else { 2255 else {
2269 error = security_inode_unlink(dir, dentry); 2256 error = security_inode_unlink(dir, dentry);
2270 if (!error) 2257 if (!error) {
2271 error = dir->i_op->unlink(dir, dentry); 2258 error = dir->i_op->unlink(dir, dentry);
2259 if (!error)
2260 dentry->d_inode->i_flags |= S_DEAD;
2261 }
2272 } 2262 }
2273 mutex_unlock(&dentry->d_inode->i_mutex); 2263 mutex_unlock(&dentry->d_inode->i_mutex);
2274 2264
@@ -2371,7 +2361,6 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2371 if (error) 2361 if (error)
2372 return error; 2362 return error;
2373 2363
2374 vfs_dq_init(dir);
2375 error = dir->i_op->symlink(dir, dentry, oldname); 2364 error = dir->i_op->symlink(dir, dentry, oldname);
2376 if (!error) 2365 if (!error)
2377 fsnotify_create(dir, dentry); 2366 fsnotify_create(dir, dentry);
@@ -2455,7 +2444,6 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2455 return error; 2444 return error;
2456 2445
2457 mutex_lock(&inode->i_mutex); 2446 mutex_lock(&inode->i_mutex);
2458 vfs_dq_init(dir);
2459 error = dir->i_op->link(old_dentry, dir, new_dentry); 2447 error = dir->i_op->link(old_dentry, dir, new_dentry);
2460 mutex_unlock(&inode->i_mutex); 2448 mutex_unlock(&inode->i_mutex);
2461 if (!error) 2449 if (!error)
@@ -2621,6 +2609,8 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2621 else 2609 else
2622 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2610 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2623 if (!error) { 2611 if (!error) {
2612 if (target)
2613 target->i_flags |= S_DEAD;
2624 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2614 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2625 d_move(old_dentry, new_dentry); 2615 d_move(old_dentry, new_dentry);
2626 } 2616 }
@@ -2654,20 +2644,15 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2654 if (!old_dir->i_op->rename) 2644 if (!old_dir->i_op->rename)
2655 return -EPERM; 2645 return -EPERM;
2656 2646
2657 vfs_dq_init(old_dir);
2658 vfs_dq_init(new_dir);
2659
2660 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2647 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
2661 2648
2662 if (is_dir) 2649 if (is_dir)
2663 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2650 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2664 else 2651 else
2665 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2652 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2666 if (!error) { 2653 if (!error)
2667 const char *new_name = old_dentry->d_name.name; 2654 fsnotify_move(old_dir, new_dir, old_name, is_dir,
2668 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
2669 new_dentry->d_inode, old_dentry); 2655 new_dentry->d_inode, old_dentry);
2670 }
2671 fsnotify_oldname_free(old_name); 2656 fsnotify_oldname_free(old_name);
2672 2657
2673 return error; 2658 return error;