aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c597
1 files changed, 292 insertions, 305 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 68921d9b5302..a7dce91a7e42 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,7 +19,6 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h> 21#include <linux/namei.h>
22#include <linux/quotaops.h>
23#include <linux/pagemap.h> 22#include <linux/pagemap.h>
24#include <linux/fsnotify.h> 23#include <linux/fsnotify.h>
25#include <linux/personality.h> 24#include <linux/personality.h>
@@ -232,6 +231,7 @@ int generic_permission(struct inode *inode, int mask,
232 /* 231 /*
233 * Searching includes executable on directories, else just read. 232 * Searching includes executable on directories, else just read.
234 */ 233 */
234 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
235 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 235 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
236 if (capable(CAP_DAC_READ_SEARCH)) 236 if (capable(CAP_DAC_READ_SEARCH))
237 return 0; 237 return 0;
@@ -497,8 +497,6 @@ static int link_path_walk(const char *, struct nameidata *);
497 497
498static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 498static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
499{ 499{
500 int res = 0;
501 char *name;
502 if (IS_ERR(link)) 500 if (IS_ERR(link))
503 goto fail; 501 goto fail;
504 502
@@ -509,22 +507,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
509 path_get(&nd->root); 507 path_get(&nd->root);
510 } 508 }
511 509
512 res = link_path_walk(link, nd); 510 return link_path_walk(link, nd);
513 if (nd->depth || res || nd->last_type!=LAST_NORM)
514 return res;
515 /*
516 * If it is an iterative symlinks resolution in open_namei() we
517 * have to copy the last component. And all that crap because of
518 * bloody create() on broken symlinks. Furrfu...
519 */
520 name = __getname();
521 if (unlikely(!name)) {
522 path_put(&nd->path);
523 return -ENOMEM;
524 }
525 strcpy(name, nd->last.name);
526 nd->last.name = name;
527 return 0;
528fail: 511fail:
529 path_put(&nd->path); 512 path_put(&nd->path);
530 return PTR_ERR(link); 513 return PTR_ERR(link);
@@ -546,10 +529,10 @@ static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
546 nd->path.dentry = path->dentry; 529 nd->path.dentry = path->dentry;
547} 530}
548 531
549static __always_inline int __do_follow_link(struct path *path, struct nameidata *nd) 532static __always_inline int
533__do_follow_link(struct path *path, struct nameidata *nd, void **p)
550{ 534{
551 int error; 535 int error;
552 void *cookie;
553 struct dentry *dentry = path->dentry; 536 struct dentry *dentry = path->dentry;
554 537
555 touch_atime(path->mnt, dentry); 538 touch_atime(path->mnt, dentry);
@@ -560,9 +543,10 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
560 dget(dentry); 543 dget(dentry);
561 } 544 }
562 mntget(path->mnt); 545 mntget(path->mnt);
563 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 546 nd->last_type = LAST_BIND;
564 error = PTR_ERR(cookie); 547 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
565 if (!IS_ERR(cookie)) { 548 error = PTR_ERR(*p);
549 if (!IS_ERR(*p)) {
566 char *s = nd_get_link(nd); 550 char *s = nd_get_link(nd);
567 error = 0; 551 error = 0;
568 if (s) 552 if (s)
@@ -572,8 +556,6 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
572 if (error) 556 if (error)
573 path_put(&nd->path); 557 path_put(&nd->path);
574 } 558 }
575 if (dentry->d_inode->i_op->put_link)
576 dentry->d_inode->i_op->put_link(dentry, nd, cookie);
577 } 559 }
578 return error; 560 return error;
579} 561}
@@ -587,6 +569,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
587 */ 569 */
588static inline int do_follow_link(struct path *path, struct nameidata *nd) 570static inline int do_follow_link(struct path *path, struct nameidata *nd)
589{ 571{
572 void *cookie;
590 int err = -ELOOP; 573 int err = -ELOOP;
591 if (current->link_count >= MAX_NESTED_LINKS) 574 if (current->link_count >= MAX_NESTED_LINKS)
592 goto loop; 575 goto loop;
@@ -600,7 +583,9 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
600 current->link_count++; 583 current->link_count++;
601 current->total_link_count++; 584 current->total_link_count++;
602 nd->depth++; 585 nd->depth++;
603 err = __do_follow_link(path, nd); 586 err = __do_follow_link(path, nd, &cookie);
587 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
588 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
604 path_put(path); 589 path_put(path);
605 current->link_count--; 590 current->link_count--;
606 nd->depth--; 591 nd->depth--;
@@ -687,33 +672,20 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
687 set_root(nd); 672 set_root(nd);
688 673
689 while(1) { 674 while(1) {
690 struct vfsmount *parent;
691 struct dentry *old = nd->path.dentry; 675 struct dentry *old = nd->path.dentry;
692 676
693 if (nd->path.dentry == nd->root.dentry && 677 if (nd->path.dentry == nd->root.dentry &&
694 nd->path.mnt == nd->root.mnt) { 678 nd->path.mnt == nd->root.mnt) {
695 break; 679 break;
696 } 680 }
697 spin_lock(&dcache_lock);
698 if (nd->path.dentry != nd->path.mnt->mnt_root) { 681 if (nd->path.dentry != nd->path.mnt->mnt_root) {
699 nd->path.dentry = dget(nd->path.dentry->d_parent); 682 /* rare case of legitimate dget_parent()... */
700 spin_unlock(&dcache_lock); 683 nd->path.dentry = dget_parent(nd->path.dentry);
701 dput(old); 684 dput(old);
702 break; 685 break;
703 } 686 }
704 spin_unlock(&dcache_lock); 687 if (!follow_up(&nd->path))
705 spin_lock(&vfsmount_lock);
706 parent = nd->path.mnt->mnt_parent;
707 if (parent == nd->path.mnt) {
708 spin_unlock(&vfsmount_lock);
709 break; 688 break;
710 }
711 mntget(parent);
712 nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
713 spin_unlock(&vfsmount_lock);
714 dput(old);
715 mntput(nd->path.mnt);
716 nd->path.mnt = parent;
717 } 689 }
718 follow_mount(&nd->path); 690 follow_mount(&nd->path);
719} 691}
@@ -821,6 +793,17 @@ fail:
821} 793}
822 794
823/* 795/*
796 * This is a temporary kludge to deal with "automount" symlinks; proper
797 * solution is to trigger them on follow_mount(), so that do_lookup()
798 * would DTRT. To be killed before 2.6.34-final.
799 */
800static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
801{
802 return inode && unlikely(inode->i_op->follow_link) &&
803 ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
804}
805
806/*
824 * Name resolution. 807 * Name resolution.
825 * This is the basic name resolution function, turning a pathname into 808 * This is the basic name resolution function, turning a pathname into
826 * the final dentry. We expect 'base' to be positive and a directory. 809 * the final dentry. We expect 'base' to be positive and a directory.
@@ -940,8 +923,7 @@ last_component:
940 if (err) 923 if (err)
941 break; 924 break;
942 inode = next.dentry->d_inode; 925 inode = next.dentry->d_inode;
943 if ((lookup_flags & LOOKUP_FOLLOW) 926 if (follow_on_final(inode, lookup_flags)) {
944 && inode && inode->i_op->follow_link) {
945 err = do_follow_link(&next, nd); 927 err = do_follow_link(&next, nd);
946 if (err) 928 if (err)
947 goto return_err; 929 goto return_err;
@@ -1335,7 +1317,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1335 return -ENOENT; 1317 return -ENOENT;
1336 1318
1337 BUG_ON(victim->d_parent->d_inode != dir); 1319 BUG_ON(victim->d_parent->d_inode != dir);
1338 audit_inode_child(victim->d_name.name, victim, dir); 1320 audit_inode_child(victim, dir);
1339 1321
1340 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 1322 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1341 if (error) 1323 if (error)
@@ -1376,22 +1358,6 @@ static inline int may_create(struct inode *dir, struct dentry *child)
1376 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 1358 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1377} 1359}
1378 1360
1379/*
1380 * O_DIRECTORY translates into forcing a directory lookup.
1381 */
1382static inline int lookup_flags(unsigned int f)
1383{
1384 unsigned long retval = LOOKUP_FOLLOW;
1385
1386 if (f & O_NOFOLLOW)
1387 retval &= ~LOOKUP_FOLLOW;
1388
1389 if (f & O_DIRECTORY)
1390 retval |= LOOKUP_DIRECTORY;
1391
1392 return retval;
1393}
1394
1395/* 1361/*
1396 * p1 and p2 should be directories on the same fs. 1362 * p1 and p2 should be directories on the same fs.
1397 */ 1363 */
@@ -1449,7 +1415,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1449 error = security_inode_create(dir, dentry, mode); 1415 error = security_inode_create(dir, dentry, mode);
1450 if (error) 1416 if (error)
1451 return error; 1417 return error;
1452 vfs_dq_init(dir);
1453 error = dir->i_op->create(dir, dentry, mode, nd); 1418 error = dir->i_op->create(dir, dentry, mode, nd);
1454 if (!error) 1419 if (!error)
1455 fsnotify_create(dir, dentry); 1420 fsnotify_create(dir, dentry);
@@ -1491,7 +1456,7 @@ int may_open(struct path *path, int acc_mode, int flag)
1491 * An append-only file must be opened in append mode for writing. 1456 * An append-only file must be opened in append mode for writing.
1492 */ 1457 */
1493 if (IS_APPEND(inode)) { 1458 if (IS_APPEND(inode)) {
1494 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1459 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
1495 return -EPERM; 1460 return -EPERM;
1496 if (flag & O_TRUNC) 1461 if (flag & O_TRUNC)
1497 return -EPERM; 1462 return -EPERM;
@@ -1535,7 +1500,7 @@ static int handle_truncate(struct path *path)
1535 * what get passed to sys_open(). 1500 * what get passed to sys_open().
1536 */ 1501 */
1537static int __open_namei_create(struct nameidata *nd, struct path *path, 1502static int __open_namei_create(struct nameidata *nd, struct path *path,
1538 int flag, int mode) 1503 int open_flag, int mode)
1539{ 1504{
1540 int error; 1505 int error;
1541 struct dentry *dir = nd->path.dentry; 1506 struct dentry *dir = nd->path.dentry;
@@ -1553,7 +1518,7 @@ out_unlock:
1553 if (error) 1518 if (error)
1554 return error; 1519 return error;
1555 /* Don't check for write permission, don't truncate */ 1520 /* Don't check for write permission, don't truncate */
1556 return may_open(&nd->path, 0, flag & ~O_TRUNC); 1521 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
1557} 1522}
1558 1523
1559/* 1524/*
@@ -1591,125 +1556,133 @@ static int open_will_truncate(int flag, struct inode *inode)
1591 return (flag & O_TRUNC); 1556 return (flag & O_TRUNC);
1592} 1557}
1593 1558
1594/* 1559static struct file *finish_open(struct nameidata *nd,
1595 * Note that the low bits of the passed in "open_flag" 1560 int open_flag, int acc_mode)
1596 * are not the same as in the local variable "flag". See
1597 * open_to_namei_flags() for more details.
1598 */
1599struct file *do_filp_open(int dfd, const char *pathname,
1600 int open_flag, int mode, int acc_mode)
1601{ 1561{
1602 struct file *filp; 1562 struct file *filp;
1603 struct nameidata nd;
1604 int error;
1605 struct path path, save;
1606 struct dentry *dir;
1607 int count = 0;
1608 int will_truncate; 1563 int will_truncate;
1609 int flag = open_to_namei_flags(open_flag); 1564 int error;
1610 1565
1566 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
1567 if (will_truncate) {
1568 error = mnt_want_write(nd->path.mnt);
1569 if (error)
1570 goto exit;
1571 }
1572 error = may_open(&nd->path, acc_mode, open_flag);
1573 if (error) {
1574 if (will_truncate)
1575 mnt_drop_write(nd->path.mnt);
1576 goto exit;
1577 }
1578 filp = nameidata_to_filp(nd);
1579 if (!IS_ERR(filp)) {
1580 error = ima_file_check(filp, acc_mode);
1581 if (error) {
1582 fput(filp);
1583 filp = ERR_PTR(error);
1584 }
1585 }
1586 if (!IS_ERR(filp)) {
1587 if (will_truncate) {
1588 error = handle_truncate(&nd->path);
1589 if (error) {
1590 fput(filp);
1591 filp = ERR_PTR(error);
1592 }
1593 }
1594 }
1611 /* 1595 /*
1612 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1596 * It is now safe to drop the mnt write
1613 * check for O_DSYNC if the need any syncing at all we enforce it's 1597 * because the filp has had a write taken
1614 * always set instead of having to deal with possibly weird behaviour 1598 * on its behalf.
1615 * for malicious applications setting only __O_SYNC.
1616 */ 1599 */
1617 if (open_flag & __O_SYNC) 1600 if (will_truncate)
1618 open_flag |= O_DSYNC; 1601 mnt_drop_write(nd->path.mnt);
1619 1602 return filp;
1620 if (!acc_mode)
1621 acc_mode = MAY_OPEN | ACC_MODE(flag);
1622 1603
1623 /* O_TRUNC implies we need access checks for write permissions */ 1604exit:
1624 if (flag & O_TRUNC) 1605 if (!IS_ERR(nd->intent.open.file))
1625 acc_mode |= MAY_WRITE; 1606 release_open_intent(nd);
1607 path_put(&nd->path);
1608 return ERR_PTR(error);
1609}
1626 1610
1627 /* Allow the LSM permission hook to distinguish append 1611static struct file *do_last(struct nameidata *nd, struct path *path,
1628 access from general write access. */ 1612 int open_flag, int acc_mode,
1629 if (flag & O_APPEND) 1613 int mode, const char *pathname)
1630 acc_mode |= MAY_APPEND; 1614{
1615 struct dentry *dir = nd->path.dentry;
1616 struct file *filp;
1617 int error = -EISDIR;
1631 1618
1632 /* 1619 switch (nd->last_type) {
1633 * The simplest case - just a plain lookup. 1620 case LAST_DOTDOT:
1634 */ 1621 follow_dotdot(nd);
1635 if (!(flag & O_CREAT)) { 1622 dir = nd->path.dentry;
1636 filp = get_empty_filp(); 1623 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
1637 1624 if (!dir->d_op->d_revalidate(dir, nd)) {
1638 if (filp == NULL) 1625 error = -ESTALE;
1639 return ERR_PTR(-ENFILE); 1626 goto exit;
1640 nd.intent.open.file = filp;
1641 filp->f_flags = open_flag;
1642 nd.intent.open.flags = flag;
1643 nd.intent.open.create_mode = 0;
1644 error = do_path_lookup(dfd, pathname,
1645 lookup_flags(flag)|LOOKUP_OPEN, &nd);
1646 if (IS_ERR(nd.intent.open.file)) {
1647 if (error == 0) {
1648 error = PTR_ERR(nd.intent.open.file);
1649 path_put(&nd.path);
1650 } 1627 }
1651 } else if (error) 1628 }
1652 release_open_intent(&nd); 1629 /* fallthrough */
1653 if (error) 1630 case LAST_DOT:
1654 return ERR_PTR(error); 1631 case LAST_ROOT:
1632 if (open_flag & O_CREAT)
1633 goto exit;
1634 /* fallthrough */
1635 case LAST_BIND:
1636 audit_inode(pathname, dir);
1655 goto ok; 1637 goto ok;
1656 } 1638 }
1657 1639
1658 /* 1640 /* trailing slashes? */
1659 * Create - we need to know the parent. 1641 if (nd->last.name[nd->last.len]) {
1660 */ 1642 if (open_flag & O_CREAT)
1661 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1643 goto exit;
1662 if (error) 1644 nd->flags |= LOOKUP_DIRECTORY;
1663 return ERR_PTR(error);
1664 error = path_walk(pathname, &nd);
1665 if (error) {
1666 if (nd.root.mnt)
1667 path_put(&nd.root);
1668 return ERR_PTR(error);
1669 } 1645 }
1670 if (unlikely(!audit_dummy_context()))
1671 audit_inode(pathname, nd.path.dentry);
1672 1646
1673 /* 1647 /* just plain open? */
1674 * We have the parent and last component. First of all, check 1648 if (!(open_flag & O_CREAT)) {
1675 * that we are not asked to creat(2) an obvious directory - that 1649 error = do_lookup(nd, &nd->last, path);
1676 * will not do. 1650 if (error)
1677 */ 1651 goto exit;
1678 error = -EISDIR; 1652 error = -ENOENT;
1679 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1653 if (!path->dentry->d_inode)
1680 goto exit_parent; 1654 goto exit_dput;
1655 if (path->dentry->d_inode->i_op->follow_link)
1656 return NULL;
1657 error = -ENOTDIR;
1658 if (nd->flags & LOOKUP_DIRECTORY) {
1659 if (!path->dentry->d_inode->i_op->lookup)
1660 goto exit_dput;
1661 }
1662 path_to_nameidata(path, nd);
1663 audit_inode(pathname, nd->path.dentry);
1664 goto ok;
1665 }
1681 1666
1682 error = -ENFILE; 1667 /* OK, it's O_CREAT */
1683 filp = get_empty_filp();
1684 if (filp == NULL)
1685 goto exit_parent;
1686 nd.intent.open.file = filp;
1687 filp->f_flags = open_flag;
1688 nd.intent.open.flags = flag;
1689 nd.intent.open.create_mode = mode;
1690 dir = nd.path.dentry;
1691 nd.flags &= ~LOOKUP_PARENT;
1692 nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
1693 if (flag & O_EXCL)
1694 nd.flags |= LOOKUP_EXCL;
1695 mutex_lock(&dir->d_inode->i_mutex); 1668 mutex_lock(&dir->d_inode->i_mutex);
1696 path.dentry = lookup_hash(&nd);
1697 path.mnt = nd.path.mnt;
1698 1669
1699do_last: 1670 path->dentry = lookup_hash(nd);
1700 error = PTR_ERR(path.dentry); 1671 path->mnt = nd->path.mnt;
1701 if (IS_ERR(path.dentry)) { 1672
1673 error = PTR_ERR(path->dentry);
1674 if (IS_ERR(path->dentry)) {
1702 mutex_unlock(&dir->d_inode->i_mutex); 1675 mutex_unlock(&dir->d_inode->i_mutex);
1703 goto exit; 1676 goto exit;
1704 } 1677 }
1705 1678
1706 if (IS_ERR(nd.intent.open.file)) { 1679 if (IS_ERR(nd->intent.open.file)) {
1707 error = PTR_ERR(nd.intent.open.file); 1680 error = PTR_ERR(nd->intent.open.file);
1708 goto exit_mutex_unlock; 1681 goto exit_mutex_unlock;
1709 } 1682 }
1710 1683
1711 /* Negative dentry, just create the file */ 1684 /* Negative dentry, just create the file */
1712 if (!path.dentry->d_inode) { 1685 if (!path->dentry->d_inode) {
1713 /* 1686 /*
1714 * This write is needed to ensure that a 1687 * This write is needed to ensure that a
1715 * ro->rw transition does not occur between 1688 * ro->rw transition does not occur between
@@ -1717,21 +1690,18 @@ do_last:
1717 * a permanent write count is taken through 1690 * a permanent write count is taken through
1718 * the 'struct file' in nameidata_to_filp(). 1691 * the 'struct file' in nameidata_to_filp().
1719 */ 1692 */
1720 error = mnt_want_write(nd.path.mnt); 1693 error = mnt_want_write(nd->path.mnt);
1721 if (error) 1694 if (error)
1722 goto exit_mutex_unlock; 1695 goto exit_mutex_unlock;
1723 error = __open_namei_create(&nd, &path, flag, mode); 1696 error = __open_namei_create(nd, path, open_flag, mode);
1724 if (error) { 1697 if (error) {
1725 mnt_drop_write(nd.path.mnt); 1698 mnt_drop_write(nd->path.mnt);
1726 goto exit; 1699 goto exit;
1727 } 1700 }
1728 filp = nameidata_to_filp(&nd); 1701 filp = nameidata_to_filp(nd);
1729 mnt_drop_write(nd.path.mnt); 1702 mnt_drop_write(nd->path.mnt);
1730 if (nd.root.mnt)
1731 path_put(&nd.root);
1732 if (!IS_ERR(filp)) { 1703 if (!IS_ERR(filp)) {
1733 error = ima_path_check(&filp->f_path, filp->f_mode & 1704 error = ima_file_check(filp, acc_mode);
1734 (MAY_READ | MAY_WRITE | MAY_EXEC));
1735 if (error) { 1705 if (error) {
1736 fput(filp); 1706 fput(filp);
1737 filp = ERR_PTR(error); 1707 filp = ERR_PTR(error);
@@ -1744,157 +1714,182 @@ do_last:
1744 * It already exists. 1714 * It already exists.
1745 */ 1715 */
1746 mutex_unlock(&dir->d_inode->i_mutex); 1716 mutex_unlock(&dir->d_inode->i_mutex);
1747 audit_inode(pathname, path.dentry); 1717 audit_inode(pathname, path->dentry);
1748 1718
1749 error = -EEXIST; 1719 error = -EEXIST;
1750 if (flag & O_EXCL) 1720 if (open_flag & O_EXCL)
1751 goto exit_dput; 1721 goto exit_dput;
1752 1722
1753 if (__follow_mount(&path)) { 1723 if (__follow_mount(path)) {
1754 error = -ELOOP; 1724 error = -ELOOP;
1755 if (flag & O_NOFOLLOW) 1725 if (open_flag & O_NOFOLLOW)
1756 goto exit_dput; 1726 goto exit_dput;
1757 } 1727 }
1758 1728
1759 error = -ENOENT; 1729 error = -ENOENT;
1760 if (!path.dentry->d_inode) 1730 if (!path->dentry->d_inode)
1761 goto exit_dput; 1731 goto exit_dput;
1762 if (path.dentry->d_inode->i_op->follow_link)
1763 goto do_link;
1764 1732
1765 path_to_nameidata(&path, &nd); 1733 if (path->dentry->d_inode->i_op->follow_link)
1734 return NULL;
1735
1736 path_to_nameidata(path, nd);
1766 error = -EISDIR; 1737 error = -EISDIR;
1767 if (S_ISDIR(path.dentry->d_inode->i_mode)) 1738 if (S_ISDIR(path->dentry->d_inode->i_mode))
1768 goto exit; 1739 goto exit;
1769ok: 1740ok:
1770 /* 1741 filp = finish_open(nd, open_flag, acc_mode);
1771 * Consider:
1772 * 1. may_open() truncates a file
1773 * 2. a rw->ro mount transition occurs
1774 * 3. nameidata_to_filp() fails due to
1775 * the ro mount.
1776 * That would be inconsistent, and should
1777 * be avoided. Taking this mnt write here
1778 * ensures that (2) can not occur.
1779 */
1780 will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
1781 if (will_truncate) {
1782 error = mnt_want_write(nd.path.mnt);
1783 if (error)
1784 goto exit;
1785 }
1786 error = may_open(&nd.path, acc_mode, flag);
1787 if (error) {
1788 if (will_truncate)
1789 mnt_drop_write(nd.path.mnt);
1790 goto exit;
1791 }
1792 filp = nameidata_to_filp(&nd);
1793 if (!IS_ERR(filp)) {
1794 error = ima_path_check(&filp->f_path, filp->f_mode &
1795 (MAY_READ | MAY_WRITE | MAY_EXEC));
1796 if (error) {
1797 fput(filp);
1798 filp = ERR_PTR(error);
1799 }
1800 }
1801 if (!IS_ERR(filp)) {
1802 if (acc_mode & MAY_WRITE)
1803 vfs_dq_init(nd.path.dentry->d_inode);
1804
1805 if (will_truncate) {
1806 error = handle_truncate(&nd.path);
1807 if (error) {
1808 fput(filp);
1809 filp = ERR_PTR(error);
1810 }
1811 }
1812 }
1813 /*
1814 * It is now safe to drop the mnt write
1815 * because the filp has had a write taken
1816 * on its behalf.
1817 */
1818 if (will_truncate)
1819 mnt_drop_write(nd.path.mnt);
1820 if (nd.root.mnt)
1821 path_put(&nd.root);
1822 return filp; 1742 return filp;
1823 1743
1824exit_mutex_unlock: 1744exit_mutex_unlock:
1825 mutex_unlock(&dir->d_inode->i_mutex); 1745 mutex_unlock(&dir->d_inode->i_mutex);
1826exit_dput: 1746exit_dput:
1827 path_put_conditional(&path, &nd); 1747 path_put_conditional(path, nd);
1828exit: 1748exit:
1829 if (!IS_ERR(nd.intent.open.file)) 1749 if (!IS_ERR(nd->intent.open.file))
1830 release_open_intent(&nd); 1750 release_open_intent(nd);
1831exit_parent: 1751 path_put(&nd->path);
1832 if (nd.root.mnt)
1833 path_put(&nd.root);
1834 path_put(&nd.path);
1835 return ERR_PTR(error); 1752 return ERR_PTR(error);
1753}
1754
1755/*
1756 * Note that the low bits of the passed in "open_flag"
1757 * are not the same as in the local variable "flag". See
1758 * open_to_namei_flags() for more details.
1759 */
1760struct file *do_filp_open(int dfd, const char *pathname,
1761 int open_flag, int mode, int acc_mode)
1762{
1763 struct file *filp;
1764 struct nameidata nd;
1765 int error;
1766 struct path path;
1767 int count = 0;
1768 int flag = open_to_namei_flags(open_flag);
1769 int force_reval = 0;
1770
1771 if (!(open_flag & O_CREAT))
1772 mode = 0;
1836 1773
1837do_link:
1838 error = -ELOOP;
1839 if (flag & O_NOFOLLOW)
1840 goto exit_dput;
1841 /* 1774 /*
1842 * This is subtle. Instead of calling do_follow_link() we do the 1775 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1843 * thing by hands. The reason is that this way we have zero link_count 1776 * check for O_DSYNC if the need any syncing at all we enforce it's
1844 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. 1777 * always set instead of having to deal with possibly weird behaviour
1845 * After that we have the parent and last component, i.e. 1778 * for malicious applications setting only __O_SYNC.
1846 * we are in the same situation as after the first path_walk().
1847 * Well, almost - if the last component is normal we get its copy
1848 * stored in nd->last.name and we will have to putname() it when we
1849 * are done. Procfs-like symlinks just set LAST_BIND.
1850 */ 1779 */
1851 nd.flags |= LOOKUP_PARENT; 1780 if (open_flag & __O_SYNC)
1852 error = security_inode_follow_link(path.dentry, &nd); 1781 open_flag |= O_DSYNC;
1782
1783 if (!acc_mode)
1784 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1785
1786 /* O_TRUNC implies we need access checks for write permissions */
1787 if (open_flag & O_TRUNC)
1788 acc_mode |= MAY_WRITE;
1789
1790 /* Allow the LSM permission hook to distinguish append
1791 access from general write access. */
1792 if (open_flag & O_APPEND)
1793 acc_mode |= MAY_APPEND;
1794
1795 /* find the parent */
1796reval:
1797 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1853 if (error) 1798 if (error)
1854 goto exit_dput; 1799 return ERR_PTR(error);
1855 save = nd.path; 1800 if (force_reval)
1856 path_get(&save);
1857 error = __do_follow_link(&path, &nd);
1858 if (error == -ESTALE) {
1859 /* nd.path had been dropped */
1860 nd.path = save;
1861 path_get(&nd.path);
1862 nd.flags |= LOOKUP_REVAL; 1801 nd.flags |= LOOKUP_REVAL;
1863 error = __do_follow_link(&path, &nd); 1802
1864 } 1803 current->total_link_count = 0;
1865 path_put(&save); 1804 error = link_path_walk(pathname, &nd);
1866 path_put(&path);
1867 if (error) { 1805 if (error) {
1868 /* Does someone understand code flow here? Or it is only 1806 filp = ERR_PTR(error);
1869 * me so stupid? Anathema to whoever designed this non-sense 1807 goto out;
1870 * with "intent.open".
1871 */
1872 release_open_intent(&nd);
1873 if (nd.root.mnt)
1874 path_put(&nd.root);
1875 return ERR_PTR(error);
1876 } 1808 }
1809 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
1810 audit_inode(pathname, nd.path.dentry);
1811
1812 /*
1813 * We have the parent and last component.
1814 */
1815
1816 error = -ENFILE;
1817 filp = get_empty_filp();
1818 if (filp == NULL)
1819 goto exit_parent;
1820 nd.intent.open.file = filp;
1821 filp->f_flags = open_flag;
1822 nd.intent.open.flags = flag;
1823 nd.intent.open.create_mode = mode;
1877 nd.flags &= ~LOOKUP_PARENT; 1824 nd.flags &= ~LOOKUP_PARENT;
1878 if (nd.last_type == LAST_BIND) 1825 nd.flags |= LOOKUP_OPEN;
1879 goto ok; 1826 if (open_flag & O_CREAT) {
1880 error = -EISDIR; 1827 nd.flags |= LOOKUP_CREATE;
1881 if (nd.last_type != LAST_NORM) 1828 if (open_flag & O_EXCL)
1882 goto exit; 1829 nd.flags |= LOOKUP_EXCL;
1883 if (nd.last.name[nd.last.len]) { 1830 }
1884 __putname(nd.last.name); 1831 if (open_flag & O_DIRECTORY)
1885 goto exit; 1832 nd.flags |= LOOKUP_DIRECTORY;
1833 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1834 while (unlikely(!filp)) { /* trailing symlink */
1835 struct path holder;
1836 struct inode *inode = path.dentry->d_inode;
1837 void *cookie;
1838 error = -ELOOP;
1839 /* S_ISDIR part is a temporary automount kludge */
1840 if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
1841 goto exit_dput;
1842 if (count++ == 32)
1843 goto exit_dput;
1844 /*
1845 * This is subtle. Instead of calling do_follow_link() we do
1846 * the thing by hands. The reason is that this way we have zero
1847 * link_count and path_walk() (called from ->follow_link)
1848 * honoring LOOKUP_PARENT. After that we have the parent and
1849 * last component, i.e. we are in the same situation as after
1850 * the first path_walk(). Well, almost - if the last component
1851 * is normal we get its copy stored in nd->last.name and we will
1852 * have to putname() it when we are done. Procfs-like symlinks
1853 * just set LAST_BIND.
1854 */
1855 nd.flags |= LOOKUP_PARENT;
1856 error = security_inode_follow_link(path.dentry, &nd);
1857 if (error)
1858 goto exit_dput;
1859 error = __do_follow_link(&path, &nd, &cookie);
1860 if (unlikely(error)) {
1861 /* nd.path had been dropped */
1862 if (!IS_ERR(cookie) && inode->i_op->put_link)
1863 inode->i_op->put_link(path.dentry, &nd, cookie);
1864 path_put(&path);
1865 release_open_intent(&nd);
1866 filp = ERR_PTR(error);
1867 goto out;
1868 }
1869 holder = path;
1870 nd.flags &= ~LOOKUP_PARENT;
1871 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1872 if (inode->i_op->put_link)
1873 inode->i_op->put_link(holder.dentry, &nd, cookie);
1874 path_put(&holder);
1886 } 1875 }
1887 error = -ELOOP; 1876out:
1888 if (count++==32) { 1877 if (nd.root.mnt)
1889 __putname(nd.last.name); 1878 path_put(&nd.root);
1890 goto exit; 1879 if (filp == ERR_PTR(-ESTALE) && !force_reval) {
1880 force_reval = 1;
1881 goto reval;
1891 } 1882 }
1892 dir = nd.path.dentry; 1883 return filp;
1893 mutex_lock(&dir->d_inode->i_mutex); 1884
1894 path.dentry = lookup_hash(&nd); 1885exit_dput:
1895 path.mnt = nd.path.mnt; 1886 path_put_conditional(&path, &nd);
1896 __putname(nd.last.name); 1887 if (!IS_ERR(nd.intent.open.file))
1897 goto do_last; 1888 release_open_intent(&nd);
1889exit_parent:
1890 path_put(&nd.path);
1891 filp = ERR_PTR(error);
1892 goto out;
1898} 1893}
1899 1894
1900/** 1895/**
@@ -1988,7 +1983,6 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1988 if (error) 1983 if (error)
1989 return error; 1984 return error;
1990 1985
1991 vfs_dq_init(dir);
1992 error = dir->i_op->mknod(dir, dentry, mode, dev); 1986 error = dir->i_op->mknod(dir, dentry, mode, dev);
1993 if (!error) 1987 if (!error)
1994 fsnotify_create(dir, dentry); 1988 fsnotify_create(dir, dentry);
@@ -2087,7 +2081,6 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2087 if (error) 2081 if (error)
2088 return error; 2082 return error;
2089 2083
2090 vfs_dq_init(dir);
2091 error = dir->i_op->mkdir(dir, dentry, mode); 2084 error = dir->i_op->mkdir(dir, dentry, mode);
2092 if (!error) 2085 if (!error)
2093 fsnotify_mkdir(dir, dentry); 2086 fsnotify_mkdir(dir, dentry);
@@ -2173,8 +2166,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2173 if (!dir->i_op->rmdir) 2166 if (!dir->i_op->rmdir)
2174 return -EPERM; 2167 return -EPERM;
2175 2168
2176 vfs_dq_init(dir);
2177
2178 mutex_lock(&dentry->d_inode->i_mutex); 2169 mutex_lock(&dentry->d_inode->i_mutex);
2179 dentry_unhash(dentry); 2170 dentry_unhash(dentry);
2180 if (d_mountpoint(dentry)) 2171 if (d_mountpoint(dentry))
@@ -2260,15 +2251,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2260 if (!dir->i_op->unlink) 2251 if (!dir->i_op->unlink)
2261 return -EPERM; 2252 return -EPERM;
2262 2253
2263 vfs_dq_init(dir);
2264
2265 mutex_lock(&dentry->d_inode->i_mutex); 2254 mutex_lock(&dentry->d_inode->i_mutex);
2266 if (d_mountpoint(dentry)) 2255 if (d_mountpoint(dentry))
2267 error = -EBUSY; 2256 error = -EBUSY;
2268 else { 2257 else {
2269 error = security_inode_unlink(dir, dentry); 2258 error = security_inode_unlink(dir, dentry);
2270 if (!error) 2259 if (!error) {
2271 error = dir->i_op->unlink(dir, dentry); 2260 error = dir->i_op->unlink(dir, dentry);
2261 if (!error)
2262 dentry->d_inode->i_flags |= S_DEAD;
2263 }
2272 } 2264 }
2273 mutex_unlock(&dentry->d_inode->i_mutex); 2265 mutex_unlock(&dentry->d_inode->i_mutex);
2274 2266
@@ -2371,7 +2363,6 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2371 if (error) 2363 if (error)
2372 return error; 2364 return error;
2373 2365
2374 vfs_dq_init(dir);
2375 error = dir->i_op->symlink(dir, dentry, oldname); 2366 error = dir->i_op->symlink(dir, dentry, oldname);
2376 if (!error) 2367 if (!error)
2377 fsnotify_create(dir, dentry); 2368 fsnotify_create(dir, dentry);
@@ -2455,7 +2446,6 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2455 return error; 2446 return error;
2456 2447
2457 mutex_lock(&inode->i_mutex); 2448 mutex_lock(&inode->i_mutex);
2458 vfs_dq_init(dir);
2459 error = dir->i_op->link(old_dentry, dir, new_dentry); 2449 error = dir->i_op->link(old_dentry, dir, new_dentry);
2460 mutex_unlock(&inode->i_mutex); 2450 mutex_unlock(&inode->i_mutex);
2461 if (!error) 2451 if (!error)
@@ -2556,7 +2546,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
2556 * e) conversion from fhandle to dentry may come in the wrong moment - when 2546 * e) conversion from fhandle to dentry may come in the wrong moment - when
2557 * we are removing the target. Solution: we will have to grab ->i_mutex 2547 * we are removing the target. Solution: we will have to grab ->i_mutex
2558 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on 2548 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
2559 * ->i_mutex on parents, which works but leads to some truely excessive 2549 * ->i_mutex on parents, which works but leads to some truly excessive
2560 * locking]. 2550 * locking].
2561 */ 2551 */
2562static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, 2552static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
@@ -2621,6 +2611,8 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2621 else 2611 else
2622 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2612 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2623 if (!error) { 2613 if (!error) {
2614 if (target)
2615 target->i_flags |= S_DEAD;
2624 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2616 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2625 d_move(old_dentry, new_dentry); 2617 d_move(old_dentry, new_dentry);
2626 } 2618 }
@@ -2654,20 +2646,15 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2654 if (!old_dir->i_op->rename) 2646 if (!old_dir->i_op->rename)
2655 return -EPERM; 2647 return -EPERM;
2656 2648
2657 vfs_dq_init(old_dir);
2658 vfs_dq_init(new_dir);
2659
2660 old_name = fsnotify_oldname_init(old_dentry->d_name.name); 2649 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
2661 2650
2662 if (is_dir) 2651 if (is_dir)
2663 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); 2652 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
2664 else 2653 else
2665 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); 2654 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
2666 if (!error) { 2655 if (!error)
2667 const char *new_name = old_dentry->d_name.name; 2656 fsnotify_move(old_dir, new_dir, old_name, is_dir,
2668 fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir,
2669 new_dentry->d_inode, old_dentry); 2657 new_dentry->d_inode, old_dentry);
2670 }
2671 fsnotify_oldname_free(old_name); 2658 fsnotify_oldname_free(old_name);
2672 2659
2673 return error; 2660 return error;