diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dcache.c | 203 | ||||
-rw-r--r-- | fs/filesystems.c | 3 | ||||
-rw-r--r-- | fs/namei.c | 743 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 4 |
4 files changed, 794 insertions, 159 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index dc0551c9755d..187fea040108 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -152,9 +152,23 @@ static void d_free(struct dentry *dentry) | |||
152 | call_rcu(&dentry->d_u.d_rcu, __d_free); | 152 | call_rcu(&dentry->d_u.d_rcu, __d_free); |
153 | } | 153 | } |
154 | 154 | ||
155 | /** | ||
156 | * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups | ||
157 | * After this call, in-progress rcu-walk path lookup will fail. This | ||
158 | * should be called after unhashing, and after changing d_inode (if | ||
159 | * the dentry has not already been unhashed). | ||
160 | */ | ||
161 | static inline void dentry_rcuwalk_barrier(struct dentry *dentry) | ||
162 | { | ||
163 | assert_spin_locked(&dentry->d_lock); | ||
164 | /* Go through a barrier */ | ||
165 | write_seqcount_barrier(&dentry->d_seq); | ||
166 | } | ||
167 | |||
155 | /* | 168 | /* |
156 | * Release the dentry's inode, using the filesystem | 169 | * Release the dentry's inode, using the filesystem |
157 | * d_iput() operation if defined. | 170 | * d_iput() operation if defined. Dentry has no refcount |
171 | * and is unhashed. | ||
158 | */ | 172 | */ |
159 | static void dentry_iput(struct dentry * dentry) | 173 | static void dentry_iput(struct dentry * dentry) |
160 | __releases(dentry->d_lock) | 174 | __releases(dentry->d_lock) |
@@ -179,6 +193,28 @@ static void dentry_iput(struct dentry * dentry) | |||
179 | } | 193 | } |
180 | 194 | ||
181 | /* | 195 | /* |
196 | * Release the dentry's inode, using the filesystem | ||
197 | * d_iput() operation if defined. dentry remains in-use. | ||
198 | */ | ||
199 | static void dentry_unlink_inode(struct dentry * dentry) | ||
200 | __releases(dentry->d_lock) | ||
201 | __releases(dcache_inode_lock) | ||
202 | { | ||
203 | struct inode *inode = dentry->d_inode; | ||
204 | dentry->d_inode = NULL; | ||
205 | list_del_init(&dentry->d_alias); | ||
206 | dentry_rcuwalk_barrier(dentry); | ||
207 | spin_unlock(&dentry->d_lock); | ||
208 | spin_unlock(&dcache_inode_lock); | ||
209 | if (!inode->i_nlink) | ||
210 | fsnotify_inoderemove(inode); | ||
211 | if (dentry->d_op && dentry->d_op->d_iput) | ||
212 | dentry->d_op->d_iput(dentry, inode); | ||
213 | else | ||
214 | iput(inode); | ||
215 | } | ||
216 | |||
217 | /* | ||
182 | * dentry_lru_(add|del|move_tail) must be called with d_lock held. | 218 | * dentry_lru_(add|del|move_tail) must be called with d_lock held. |
183 | */ | 219 | */ |
184 | static void dentry_lru_add(struct dentry *dentry) | 220 | static void dentry_lru_add(struct dentry *dentry) |
@@ -272,6 +308,7 @@ void __d_drop(struct dentry *dentry) | |||
272 | spin_lock(&dcache_hash_lock); | 308 | spin_lock(&dcache_hash_lock); |
273 | hlist_del_rcu(&dentry->d_hash); | 309 | hlist_del_rcu(&dentry->d_hash); |
274 | spin_unlock(&dcache_hash_lock); | 310 | spin_unlock(&dcache_hash_lock); |
311 | dentry_rcuwalk_barrier(dentry); | ||
275 | } | 312 | } |
276 | } | 313 | } |
277 | EXPORT_SYMBOL(__d_drop); | 314 | EXPORT_SYMBOL(__d_drop); |
@@ -309,6 +346,7 @@ relock: | |||
309 | spin_unlock(&dcache_inode_lock); | 346 | spin_unlock(&dcache_inode_lock); |
310 | goto relock; | 347 | goto relock; |
311 | } | 348 | } |
349 | |||
312 | if (ref) | 350 | if (ref) |
313 | dentry->d_count--; | 351 | dentry->d_count--; |
314 | /* if dentry was on the d_lru list delete it from there */ | 352 | /* if dentry was on the d_lru list delete it from there */ |
@@ -1221,6 +1259,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) | |||
1221 | dentry->d_count = 1; | 1259 | dentry->d_count = 1; |
1222 | dentry->d_flags = DCACHE_UNHASHED; | 1260 | dentry->d_flags = DCACHE_UNHASHED; |
1223 | spin_lock_init(&dentry->d_lock); | 1261 | spin_lock_init(&dentry->d_lock); |
1262 | seqcount_init(&dentry->d_seq); | ||
1224 | dentry->d_inode = NULL; | 1263 | dentry->d_inode = NULL; |
1225 | dentry->d_parent = NULL; | 1264 | dentry->d_parent = NULL; |
1226 | dentry->d_sb = NULL; | 1265 | dentry->d_sb = NULL; |
@@ -1269,6 +1308,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) | |||
1269 | if (inode) | 1308 | if (inode) |
1270 | list_add(&dentry->d_alias, &inode->i_dentry); | 1309 | list_add(&dentry->d_alias, &inode->i_dentry); |
1271 | dentry->d_inode = inode; | 1310 | dentry->d_inode = inode; |
1311 | dentry_rcuwalk_barrier(dentry); | ||
1272 | spin_unlock(&dentry->d_lock); | 1312 | spin_unlock(&dentry->d_lock); |
1273 | fsnotify_d_instantiate(dentry, inode); | 1313 | fsnotify_d_instantiate(dentry, inode); |
1274 | } | 1314 | } |
@@ -1611,6 +1651,111 @@ err_out: | |||
1611 | EXPORT_SYMBOL(d_add_ci); | 1651 | EXPORT_SYMBOL(d_add_ci); |
1612 | 1652 | ||
1613 | /** | 1653 | /** |
1654 | * __d_lookup_rcu - search for a dentry (racy, store-free) | ||
1655 | * @parent: parent dentry | ||
1656 | * @name: qstr of name we wish to find | ||
1657 | * @seq: returns d_seq value at the point where the dentry was found | ||
1658 | * @inode: returns dentry->d_inode when the inode was found valid. | ||
1659 | * Returns: dentry, or NULL | ||
1660 | * | ||
1661 | * __d_lookup_rcu is the dcache lookup function for rcu-walk name | ||
1662 | * resolution (store-free path walking) design described in | ||
1663 | * Documentation/filesystems/path-lookup.txt. | ||
1664 | * | ||
1665 | * This is not to be used outside core vfs. | ||
1666 | * | ||
1667 | * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock | ||
1668 | * held, and rcu_read_lock held. The returned dentry must not be stored into | ||
1669 | * without taking d_lock and checking d_seq sequence count against @seq | ||
1670 | * returned here. | ||
1671 | * | ||
1672 | * A refcount may be taken on the found dentry with the __d_rcu_to_refcount | ||
1673 | * function. | ||
1674 | * | ||
1675 | * Alternatively, __d_lookup_rcu may be called again to look up the child of | ||
1676 | * the returned dentry, so long as its parent's seqlock is checked after the | ||
1677 | * child is looked up. Thus, an interlocking stepping of sequence lock checks | ||
1678 | * is formed, giving integrity down the path walk. | ||
1679 | */ | ||
1680 | struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name, | ||
1681 | unsigned *seq, struct inode **inode) | ||
1682 | { | ||
1683 | unsigned int len = name->len; | ||
1684 | unsigned int hash = name->hash; | ||
1685 | const unsigned char *str = name->name; | ||
1686 | struct hlist_head *head = d_hash(parent, hash); | ||
1687 | struct hlist_node *node; | ||
1688 | struct dentry *dentry; | ||
1689 | |||
1690 | /* | ||
1691 | * Note: There is significant duplication with __d_lookup_rcu which is | ||
1692 | * required to prevent single threaded performance regressions | ||
1693 | * especially on architectures where smp_rmb (in seqcounts) are costly. | ||
1694 | * Keep the two functions in sync. | ||
1695 | */ | ||
1696 | |||
1697 | /* | ||
1698 | * The hash list is protected using RCU. | ||
1699 | * | ||
1700 | * Carefully use d_seq when comparing a candidate dentry, to avoid | ||
1701 | * races with d_move(). | ||
1702 | * | ||
1703 | * It is possible that concurrent renames can mess up our list | ||
1704 | * walk here and result in missing our dentry, resulting in the | ||
1705 | * false-negative result. d_lookup() protects against concurrent | ||
1706 | * renames using rename_lock seqlock. | ||
1707 | * | ||
1708 | * See Documentation/vfs/dcache-locking.txt for more details. | ||
1709 | */ | ||
1710 | hlist_for_each_entry_rcu(dentry, node, head, d_hash) { | ||
1711 | struct inode *i; | ||
1712 | const char *tname; | ||
1713 | int tlen; | ||
1714 | |||
1715 | if (dentry->d_name.hash != hash) | ||
1716 | continue; | ||
1717 | |||
1718 | seqretry: | ||
1719 | *seq = read_seqcount_begin(&dentry->d_seq); | ||
1720 | if (dentry->d_parent != parent) | ||
1721 | continue; | ||
1722 | if (d_unhashed(dentry)) | ||
1723 | continue; | ||
1724 | tlen = dentry->d_name.len; | ||
1725 | tname = dentry->d_name.name; | ||
1726 | i = dentry->d_inode; | ||
1727 | /* | ||
1728 | * This seqcount check is required to ensure name and | ||
1729 | * len are loaded atomically, so as not to walk off the | ||
1730 | * edge of memory when walking. If we could load this | ||
1731 | * atomically some other way, we could drop this check. | ||
1732 | */ | ||
1733 | if (read_seqcount_retry(&dentry->d_seq, *seq)) | ||
1734 | goto seqretry; | ||
1735 | if (parent->d_op && parent->d_op->d_compare) { | ||
1736 | if (parent->d_op->d_compare(parent, *inode, | ||
1737 | dentry, i, | ||
1738 | tlen, tname, name)) | ||
1739 | continue; | ||
1740 | } else { | ||
1741 | if (tlen != len) | ||
1742 | continue; | ||
1743 | if (memcmp(tname, str, tlen)) | ||
1744 | continue; | ||
1745 | } | ||
1746 | /* | ||
1747 | * No extra seqcount check is required after the name | ||
1748 | * compare. The caller must perform a seqcount check in | ||
1749 | * order to do anything useful with the returned dentry | ||
1750 | * anyway. | ||
1751 | */ | ||
1752 | *inode = i; | ||
1753 | return dentry; | ||
1754 | } | ||
1755 | return NULL; | ||
1756 | } | ||
1757 | |||
1758 | /** | ||
1614 | * d_lookup - search for a dentry | 1759 | * d_lookup - search for a dentry |
1615 | * @parent: parent dentry | 1760 | * @parent: parent dentry |
1616 | * @name: qstr of name we wish to find | 1761 | * @name: qstr of name we wish to find |
@@ -1621,9 +1766,9 @@ EXPORT_SYMBOL(d_add_ci); | |||
1621 | * dentry is returned. The caller must use dput to free the entry when it has | 1766 | * dentry is returned. The caller must use dput to free the entry when it has |
1622 | * finished using it. %NULL is returned if the dentry does not exist. | 1767 | * finished using it. %NULL is returned if the dentry does not exist. |
1623 | */ | 1768 | */ |
1624 | struct dentry * d_lookup(struct dentry * parent, struct qstr * name) | 1769 | struct dentry *d_lookup(struct dentry *parent, struct qstr *name) |
1625 | { | 1770 | { |
1626 | struct dentry * dentry = NULL; | 1771 | struct dentry *dentry; |
1627 | unsigned seq; | 1772 | unsigned seq; |
1628 | 1773 | ||
1629 | do { | 1774 | do { |
@@ -1636,7 +1781,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) | |||
1636 | } | 1781 | } |
1637 | EXPORT_SYMBOL(d_lookup); | 1782 | EXPORT_SYMBOL(d_lookup); |
1638 | 1783 | ||
1639 | /* | 1784 | /** |
1640 | * __d_lookup - search for a dentry (racy) | 1785 | * __d_lookup - search for a dentry (racy) |
1641 | * @parent: parent dentry | 1786 | * @parent: parent dentry |
1642 | * @name: qstr of name we wish to find | 1787 | * @name: qstr of name we wish to find |
@@ -1651,17 +1796,24 @@ EXPORT_SYMBOL(d_lookup); | |||
1651 | * | 1796 | * |
1652 | * __d_lookup callers must be commented. | 1797 | * __d_lookup callers must be commented. |
1653 | */ | 1798 | */ |
1654 | struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | 1799 | struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) |
1655 | { | 1800 | { |
1656 | unsigned int len = name->len; | 1801 | unsigned int len = name->len; |
1657 | unsigned int hash = name->hash; | 1802 | unsigned int hash = name->hash; |
1658 | const unsigned char *str = name->name; | 1803 | const unsigned char *str = name->name; |
1659 | struct hlist_head *head = d_hash(parent,hash); | 1804 | struct hlist_head *head = d_hash(parent,hash); |
1660 | struct dentry *found = NULL; | ||
1661 | struct hlist_node *node; | 1805 | struct hlist_node *node; |
1806 | struct dentry *found = NULL; | ||
1662 | struct dentry *dentry; | 1807 | struct dentry *dentry; |
1663 | 1808 | ||
1664 | /* | 1809 | /* |
1810 | * Note: There is significant duplication with __d_lookup_rcu which is | ||
1811 | * required to prevent single threaded performance regressions | ||
1812 | * especially on architectures where smp_rmb (in seqcounts) are costly. | ||
1813 | * Keep the two functions in sync. | ||
1814 | */ | ||
1815 | |||
1816 | /* | ||
1665 | * The hash list is protected using RCU. | 1817 | * The hash list is protected using RCU. |
1666 | * | 1818 | * |
1667 | * Take d_lock when comparing a candidate dentry, to avoid races | 1819 | * Take d_lock when comparing a candidate dentry, to avoid races |
@@ -1677,24 +1829,15 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1677 | rcu_read_lock(); | 1829 | rcu_read_lock(); |
1678 | 1830 | ||
1679 | hlist_for_each_entry_rcu(dentry, node, head, d_hash) { | 1831 | hlist_for_each_entry_rcu(dentry, node, head, d_hash) { |
1680 | struct qstr *qstr; | 1832 | const char *tname; |
1833 | int tlen; | ||
1681 | 1834 | ||
1682 | if (dentry->d_name.hash != hash) | 1835 | if (dentry->d_name.hash != hash) |
1683 | continue; | 1836 | continue; |
1684 | if (dentry->d_parent != parent) | ||
1685 | continue; | ||
1686 | 1837 | ||
1687 | spin_lock(&dentry->d_lock); | 1838 | spin_lock(&dentry->d_lock); |
1688 | |||
1689 | /* | ||
1690 | * Recheck the dentry after taking the lock - d_move may have | ||
1691 | * changed things. Don't bother checking the hash because | ||
1692 | * we're about to compare the whole name anyway. | ||
1693 | */ | ||
1694 | if (dentry->d_parent != parent) | 1839 | if (dentry->d_parent != parent) |
1695 | goto next; | 1840 | goto next; |
1696 | |||
1697 | /* non-existing due to RCU? */ | ||
1698 | if (d_unhashed(dentry)) | 1841 | if (d_unhashed(dentry)) |
1699 | goto next; | 1842 | goto next; |
1700 | 1843 | ||
@@ -1702,16 +1845,17 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
1702 | * It is safe to compare names since d_move() cannot | 1845 | * It is safe to compare names since d_move() cannot |
1703 | * change the qstr (protected by d_lock). | 1846 | * change the qstr (protected by d_lock). |
1704 | */ | 1847 | */ |
1705 | qstr = &dentry->d_name; | 1848 | tlen = dentry->d_name.len; |
1849 | tname = dentry->d_name.name; | ||
1706 | if (parent->d_op && parent->d_op->d_compare) { | 1850 | if (parent->d_op && parent->d_op->d_compare) { |
1707 | if (parent->d_op->d_compare(parent, parent->d_inode, | 1851 | if (parent->d_op->d_compare(parent, parent->d_inode, |
1708 | dentry, dentry->d_inode, | 1852 | dentry, dentry->d_inode, |
1709 | qstr->len, qstr->name, name)) | 1853 | tlen, tname, name)) |
1710 | goto next; | 1854 | goto next; |
1711 | } else { | 1855 | } else { |
1712 | if (qstr->len != len) | 1856 | if (tlen != len) |
1713 | goto next; | 1857 | goto next; |
1714 | if (memcmp(qstr->name, str, len)) | 1858 | if (memcmp(tname, str, tlen)) |
1715 | goto next; | 1859 | goto next; |
1716 | } | 1860 | } |
1717 | 1861 | ||
@@ -1821,7 +1965,7 @@ again: | |||
1821 | goto again; | 1965 | goto again; |
1822 | } | 1966 | } |
1823 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | 1967 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; |
1824 | dentry_iput(dentry); | 1968 | dentry_unlink_inode(dentry); |
1825 | fsnotify_nameremove(dentry, isdir); | 1969 | fsnotify_nameremove(dentry, isdir); |
1826 | return; | 1970 | return; |
1827 | } | 1971 | } |
@@ -1884,7 +2028,9 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) | |||
1884 | BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ | 2028 | BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ |
1885 | 2029 | ||
1886 | spin_lock(&dentry->d_lock); | 2030 | spin_lock(&dentry->d_lock); |
2031 | write_seqcount_begin(&dentry->d_seq); | ||
1887 | memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); | 2032 | memcpy((unsigned char *)dentry->d_name.name, name->name, name->len); |
2033 | write_seqcount_end(&dentry->d_seq); | ||
1888 | spin_unlock(&dentry->d_lock); | 2034 | spin_unlock(&dentry->d_lock); |
1889 | } | 2035 | } |
1890 | EXPORT_SYMBOL(dentry_update_name_case); | 2036 | EXPORT_SYMBOL(dentry_update_name_case); |
@@ -1997,6 +2143,9 @@ void d_move(struct dentry * dentry, struct dentry * target) | |||
1997 | 2143 | ||
1998 | dentry_lock_for_move(dentry, target); | 2144 | dentry_lock_for_move(dentry, target); |
1999 | 2145 | ||
2146 | write_seqcount_begin(&dentry->d_seq); | ||
2147 | write_seqcount_begin(&target->d_seq); | ||
2148 | |||
2000 | /* Move the dentry to the target hash queue, if on different bucket */ | 2149 | /* Move the dentry to the target hash queue, if on different bucket */ |
2001 | spin_lock(&dcache_hash_lock); | 2150 | spin_lock(&dcache_hash_lock); |
2002 | if (!d_unhashed(dentry)) | 2151 | if (!d_unhashed(dentry)) |
@@ -2005,6 +2154,7 @@ void d_move(struct dentry * dentry, struct dentry * target) | |||
2005 | spin_unlock(&dcache_hash_lock); | 2154 | spin_unlock(&dcache_hash_lock); |
2006 | 2155 | ||
2007 | /* Unhash the target: dput() will then get rid of it */ | 2156 | /* Unhash the target: dput() will then get rid of it */ |
2157 | /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ | ||
2008 | __d_drop(target); | 2158 | __d_drop(target); |
2009 | 2159 | ||
2010 | list_del(&dentry->d_u.d_child); | 2160 | list_del(&dentry->d_u.d_child); |
@@ -2028,6 +2178,9 @@ void d_move(struct dentry * dentry, struct dentry * target) | |||
2028 | 2178 | ||
2029 | list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); | 2179 | list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); |
2030 | 2180 | ||
2181 | write_seqcount_end(&target->d_seq); | ||
2182 | write_seqcount_end(&dentry->d_seq); | ||
2183 | |||
2031 | dentry_unlock_parents_for_move(dentry, target); | 2184 | dentry_unlock_parents_for_move(dentry, target); |
2032 | spin_unlock(&target->d_lock); | 2185 | spin_unlock(&target->d_lock); |
2033 | fsnotify_d_move(dentry); | 2186 | fsnotify_d_move(dentry); |
@@ -2110,6 +2263,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) | |||
2110 | 2263 | ||
2111 | dentry_lock_for_move(anon, dentry); | 2264 | dentry_lock_for_move(anon, dentry); |
2112 | 2265 | ||
2266 | write_seqcount_begin(&dentry->d_seq); | ||
2267 | write_seqcount_begin(&anon->d_seq); | ||
2268 | |||
2113 | dparent = dentry->d_parent; | 2269 | dparent = dentry->d_parent; |
2114 | aparent = anon->d_parent; | 2270 | aparent = anon->d_parent; |
2115 | 2271 | ||
@@ -2130,6 +2286,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) | |||
2130 | else | 2286 | else |
2131 | INIT_LIST_HEAD(&anon->d_u.d_child); | 2287 | INIT_LIST_HEAD(&anon->d_u.d_child); |
2132 | 2288 | ||
2289 | write_seqcount_end(&dentry->d_seq); | ||
2290 | write_seqcount_end(&anon->d_seq); | ||
2291 | |||
2133 | dentry_unlock_parents_for_move(anon, dentry); | 2292 | dentry_unlock_parents_for_move(anon, dentry); |
2134 | spin_unlock(&dentry->d_lock); | 2293 | spin_unlock(&dentry->d_lock); |
2135 | 2294 | ||
diff --git a/fs/filesystems.c b/fs/filesystems.c index 68ba492d8eef..751d6b255a12 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
@@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs) | |||
115 | tmp = &(*tmp)->next; | 115 | tmp = &(*tmp)->next; |
116 | } | 116 | } |
117 | write_unlock(&file_systems_lock); | 117 | write_unlock(&file_systems_lock); |
118 | |||
119 | synchronize_rcu(); | ||
120 | |||
118 | return -EINVAL; | 121 | return -EINVAL; |
119 | } | 122 | } |
120 | 123 | ||
diff --git a/fs/namei.c b/fs/namei.c index 5642bc2be418..8d3f15b3a541 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname); | |||
169 | /* | 169 | /* |
170 | * This does basic POSIX ACL permission checking | 170 | * This does basic POSIX ACL permission checking |
171 | */ | 171 | */ |
172 | static int acl_permission_check(struct inode *inode, int mask, | 172 | static inline int __acl_permission_check(struct inode *inode, int mask, |
173 | int (*check_acl)(struct inode *inode, int mask)) | 173 | int (*check_acl)(struct inode *inode, int mask), int rcu) |
174 | { | 174 | { |
175 | umode_t mode = inode->i_mode; | 175 | umode_t mode = inode->i_mode; |
176 | 176 | ||
@@ -180,9 +180,13 @@ static int acl_permission_check(struct inode *inode, int mask, | |||
180 | mode >>= 6; | 180 | mode >>= 6; |
181 | else { | 181 | else { |
182 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { | 182 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { |
183 | int error = check_acl(inode, mask); | 183 | if (rcu) { |
184 | if (error != -EAGAIN) | 184 | return -ECHILD; |
185 | return error; | 185 | } else { |
186 | int error = check_acl(inode, mask); | ||
187 | if (error != -EAGAIN) | ||
188 | return error; | ||
189 | } | ||
186 | } | 190 | } |
187 | 191 | ||
188 | if (in_group_p(inode->i_gid)) | 192 | if (in_group_p(inode->i_gid)) |
@@ -197,6 +201,12 @@ static int acl_permission_check(struct inode *inode, int mask, | |||
197 | return -EACCES; | 201 | return -EACCES; |
198 | } | 202 | } |
199 | 203 | ||
204 | static inline int acl_permission_check(struct inode *inode, int mask, | ||
205 | int (*check_acl)(struct inode *inode, int mask)) | ||
206 | { | ||
207 | return __acl_permission_check(inode, mask, check_acl, 0); | ||
208 | } | ||
209 | |||
200 | /** | 210 | /** |
201 | * generic_permission - check for access rights on a Posix-like filesystem | 211 | * generic_permission - check for access rights on a Posix-like filesystem |
202 | * @inode: inode to check access rights for | 212 | * @inode: inode to check access rights for |
@@ -375,6 +385,173 @@ void path_put(struct path *path) | |||
375 | EXPORT_SYMBOL(path_put); | 385 | EXPORT_SYMBOL(path_put); |
376 | 386 | ||
377 | /** | 387 | /** |
388 | * nameidata_drop_rcu - drop this nameidata out of rcu-walk | ||
389 | * @nd: nameidata pathwalk data to drop | ||
390 | * @Returns: 0 on success, -ECHLID on failure | ||
391 | * | ||
392 | * Path walking has 2 modes, rcu-walk and ref-walk (see | ||
393 | * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt | ||
394 | * to drop out of rcu-walk mode and take normal reference counts on dentries | ||
395 | * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take | ||
396 | * refcounts at the last known good point before rcu-walk got stuck, so | ||
397 | * ref-walk may continue from there. If this is not successful (eg. a seqcount | ||
398 | * has changed), then failure is returned and path walk restarts from the | ||
399 | * beginning in ref-walk mode. | ||
400 | * | ||
401 | * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into | ||
402 | * ref-walk. Must be called from rcu-walk context. | ||
403 | */ | ||
404 | static int nameidata_drop_rcu(struct nameidata *nd) | ||
405 | { | ||
406 | struct fs_struct *fs = current->fs; | ||
407 | struct dentry *dentry = nd->path.dentry; | ||
408 | |||
409 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
410 | if (nd->root.mnt) { | ||
411 | spin_lock(&fs->lock); | ||
412 | if (nd->root.mnt != fs->root.mnt || | ||
413 | nd->root.dentry != fs->root.dentry) | ||
414 | goto err_root; | ||
415 | } | ||
416 | spin_lock(&dentry->d_lock); | ||
417 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
418 | goto err; | ||
419 | BUG_ON(nd->inode != dentry->d_inode); | ||
420 | spin_unlock(&dentry->d_lock); | ||
421 | if (nd->root.mnt) { | ||
422 | path_get(&nd->root); | ||
423 | spin_unlock(&fs->lock); | ||
424 | } | ||
425 | mntget(nd->path.mnt); | ||
426 | |||
427 | rcu_read_unlock(); | ||
428 | br_read_unlock(vfsmount_lock); | ||
429 | nd->flags &= ~LOOKUP_RCU; | ||
430 | return 0; | ||
431 | err: | ||
432 | spin_unlock(&dentry->d_lock); | ||
433 | err_root: | ||
434 | if (nd->root.mnt) | ||
435 | spin_unlock(&fs->lock); | ||
436 | return -ECHILD; | ||
437 | } | ||
438 | |||
439 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
440 | static inline int nameidata_drop_rcu_maybe(struct nameidata *nd) | ||
441 | { | ||
442 | if (nd->flags & LOOKUP_RCU) | ||
443 | return nameidata_drop_rcu(nd); | ||
444 | return 0; | ||
445 | } | ||
446 | |||
447 | /** | ||
448 | * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk | ||
449 | * @nd: nameidata pathwalk data to drop | ||
450 | * @dentry: dentry to drop | ||
451 | * @Returns: 0 on success, -ECHLID on failure | ||
452 | * | ||
453 | * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root, | ||
454 | * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on | ||
455 | * @nd. Must be called from rcu-walk context. | ||
456 | */ | ||
457 | static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry) | ||
458 | { | ||
459 | struct fs_struct *fs = current->fs; | ||
460 | struct dentry *parent = nd->path.dentry; | ||
461 | |||
462 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
463 | if (nd->root.mnt) { | ||
464 | spin_lock(&fs->lock); | ||
465 | if (nd->root.mnt != fs->root.mnt || | ||
466 | nd->root.dentry != fs->root.dentry) | ||
467 | goto err_root; | ||
468 | } | ||
469 | spin_lock(&parent->d_lock); | ||
470 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | ||
471 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
472 | goto err; | ||
473 | /* | ||
474 | * If the sequence check on the child dentry passed, then the child has | ||
475 | * not been removed from its parent. This means the parent dentry must | ||
476 | * be valid and able to take a reference at this point. | ||
477 | */ | ||
478 | BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); | ||
479 | BUG_ON(!parent->d_count); | ||
480 | parent->d_count++; | ||
481 | spin_unlock(&dentry->d_lock); | ||
482 | spin_unlock(&parent->d_lock); | ||
483 | if (nd->root.mnt) { | ||
484 | path_get(&nd->root); | ||
485 | spin_unlock(&fs->lock); | ||
486 | } | ||
487 | mntget(nd->path.mnt); | ||
488 | |||
489 | rcu_read_unlock(); | ||
490 | br_read_unlock(vfsmount_lock); | ||
491 | nd->flags &= ~LOOKUP_RCU; | ||
492 | return 0; | ||
493 | err: | ||
494 | spin_unlock(&dentry->d_lock); | ||
495 | spin_unlock(&parent->d_lock); | ||
496 | err_root: | ||
497 | if (nd->root.mnt) | ||
498 | spin_unlock(&fs->lock); | ||
499 | return -ECHILD; | ||
500 | } | ||
501 | |||
502 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
503 | static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) | ||
504 | { | ||
505 | if (nd->flags & LOOKUP_RCU) | ||
506 | return nameidata_dentry_drop_rcu(nd, dentry); | ||
507 | return 0; | ||
508 | } | ||
509 | |||
510 | /** | ||
511 | * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk | ||
512 | * @nd: nameidata pathwalk data to drop | ||
513 | * @Returns: 0 on success, -ECHLID on failure | ||
514 | * | ||
515 | * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk. | ||
516 | * nd->path should be the final element of the lookup, so nd->root is discarded. | ||
517 | * Must be called from rcu-walk context. | ||
518 | */ | ||
519 | static int nameidata_drop_rcu_last(struct nameidata *nd) | ||
520 | { | ||
521 | struct dentry *dentry = nd->path.dentry; | ||
522 | |||
523 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | ||
524 | nd->flags &= ~LOOKUP_RCU; | ||
525 | nd->root.mnt = NULL; | ||
526 | spin_lock(&dentry->d_lock); | ||
527 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | ||
528 | goto err_unlock; | ||
529 | BUG_ON(nd->inode != dentry->d_inode); | ||
530 | spin_unlock(&dentry->d_lock); | ||
531 | |||
532 | mntget(nd->path.mnt); | ||
533 | |||
534 | rcu_read_unlock(); | ||
535 | br_read_unlock(vfsmount_lock); | ||
536 | |||
537 | return 0; | ||
538 | |||
539 | err_unlock: | ||
540 | spin_unlock(&dentry->d_lock); | ||
541 | rcu_read_unlock(); | ||
542 | br_read_unlock(vfsmount_lock); | ||
543 | return -ECHILD; | ||
544 | } | ||
545 | |||
546 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
547 | static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) | ||
548 | { | ||
549 | if (likely(nd->flags & LOOKUP_RCU)) | ||
550 | return nameidata_drop_rcu_last(nd); | ||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | /** | ||
378 | * release_open_intent - free up open intent resources | 555 | * release_open_intent - free up open intent resources |
379 | * @nd: pointer to nameidata | 556 | * @nd: pointer to nameidata |
380 | */ | 557 | */ |
@@ -459,26 +636,40 @@ force_reval_path(struct path *path, struct nameidata *nd) | |||
459 | * short-cut DAC fails, then call ->permission() to do more | 636 | * short-cut DAC fails, then call ->permission() to do more |
460 | * complete permission check. | 637 | * complete permission check. |
461 | */ | 638 | */ |
462 | static int exec_permission(struct inode *inode) | 639 | static inline int __exec_permission(struct inode *inode, int rcu) |
463 | { | 640 | { |
464 | int ret; | 641 | int ret; |
465 | 642 | ||
466 | if (inode->i_op->permission) { | 643 | if (inode->i_op->permission) { |
644 | if (rcu) | ||
645 | return -ECHILD; | ||
467 | ret = inode->i_op->permission(inode, MAY_EXEC); | 646 | ret = inode->i_op->permission(inode, MAY_EXEC); |
468 | if (!ret) | 647 | if (!ret) |
469 | goto ok; | 648 | goto ok; |
470 | return ret; | 649 | return ret; |
471 | } | 650 | } |
472 | ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); | 651 | ret = __acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl, rcu); |
473 | if (!ret) | 652 | if (!ret) |
474 | goto ok; | 653 | goto ok; |
654 | if (rcu && ret == -ECHILD) | ||
655 | return ret; | ||
475 | 656 | ||
476 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) | 657 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) |
477 | goto ok; | 658 | goto ok; |
478 | 659 | ||
479 | return ret; | 660 | return ret; |
480 | ok: | 661 | ok: |
481 | return security_inode_permission(inode, MAY_EXEC); | 662 | return security_inode_exec_permission(inode, rcu); |
663 | } | ||
664 | |||
665 | static int exec_permission(struct inode *inode) | ||
666 | { | ||
667 | return __exec_permission(inode, 0); | ||
668 | } | ||
669 | |||
670 | static int exec_permission_rcu(struct inode *inode) | ||
671 | { | ||
672 | return __exec_permission(inode, 1); | ||
482 | } | 673 | } |
483 | 674 | ||
484 | static __always_inline void set_root(struct nameidata *nd) | 675 | static __always_inline void set_root(struct nameidata *nd) |
@@ -489,8 +680,20 @@ static __always_inline void set_root(struct nameidata *nd) | |||
489 | 680 | ||
490 | static int link_path_walk(const char *, struct nameidata *); | 681 | static int link_path_walk(const char *, struct nameidata *); |
491 | 682 | ||
683 | static __always_inline void set_root_rcu(struct nameidata *nd) | ||
684 | { | ||
685 | if (!nd->root.mnt) { | ||
686 | struct fs_struct *fs = current->fs; | ||
687 | spin_lock(&fs->lock); | ||
688 | nd->root = fs->root; | ||
689 | spin_unlock(&fs->lock); | ||
690 | } | ||
691 | } | ||
692 | |||
492 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) | 693 | static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) |
493 | { | 694 | { |
695 | int ret; | ||
696 | |||
494 | if (IS_ERR(link)) | 697 | if (IS_ERR(link)) |
495 | goto fail; | 698 | goto fail; |
496 | 699 | ||
@@ -500,8 +703,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l | |||
500 | nd->path = nd->root; | 703 | nd->path = nd->root; |
501 | path_get(&nd->root); | 704 | path_get(&nd->root); |
502 | } | 705 | } |
706 | nd->inode = nd->path.dentry->d_inode; | ||
503 | 707 | ||
504 | return link_path_walk(link, nd); | 708 | ret = link_path_walk(link, nd); |
709 | return ret; | ||
505 | fail: | 710 | fail: |
506 | path_put(&nd->path); | 711 | path_put(&nd->path); |
507 | return PTR_ERR(link); | 712 | return PTR_ERR(link); |
@@ -516,11 +721,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd) | |||
516 | 721 | ||
517 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) | 722 | static inline void path_to_nameidata(struct path *path, struct nameidata *nd) |
518 | { | 723 | { |
519 | dput(nd->path.dentry); | 724 | if (!(nd->flags & LOOKUP_RCU)) { |
520 | if (nd->path.mnt != path->mnt) { | 725 | dput(nd->path.dentry); |
521 | mntput(nd->path.mnt); | 726 | if (nd->path.mnt != path->mnt) |
522 | nd->path.mnt = path->mnt; | 727 | mntput(nd->path.mnt); |
523 | } | 728 | } |
729 | nd->path.mnt = path->mnt; | ||
524 | nd->path.dentry = path->dentry; | 730 | nd->path.dentry = path->dentry; |
525 | } | 731 | } |
526 | 732 | ||
@@ -535,9 +741,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p) | |||
535 | 741 | ||
536 | if (path->mnt != nd->path.mnt) { | 742 | if (path->mnt != nd->path.mnt) { |
537 | path_to_nameidata(path, nd); | 743 | path_to_nameidata(path, nd); |
744 | nd->inode = nd->path.dentry->d_inode; | ||
538 | dget(dentry); | 745 | dget(dentry); |
539 | } | 746 | } |
540 | mntget(path->mnt); | 747 | mntget(path->mnt); |
748 | |||
541 | nd->last_type = LAST_BIND; | 749 | nd->last_type = LAST_BIND; |
542 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); | 750 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); |
543 | error = PTR_ERR(*p); | 751 | error = PTR_ERR(*p); |
@@ -591,6 +799,20 @@ loop: | |||
591 | return err; | 799 | return err; |
592 | } | 800 | } |
593 | 801 | ||
802 | static int follow_up_rcu(struct path *path) | ||
803 | { | ||
804 | struct vfsmount *parent; | ||
805 | struct dentry *mountpoint; | ||
806 | |||
807 | parent = path->mnt->mnt_parent; | ||
808 | if (parent == path->mnt) | ||
809 | return 0; | ||
810 | mountpoint = path->mnt->mnt_mountpoint; | ||
811 | path->dentry = mountpoint; | ||
812 | path->mnt = parent; | ||
813 | return 1; | ||
814 | } | ||
815 | |||
594 | int follow_up(struct path *path) | 816 | int follow_up(struct path *path) |
595 | { | 817 | { |
596 | struct vfsmount *parent; | 818 | struct vfsmount *parent; |
@@ -615,6 +837,21 @@ int follow_up(struct path *path) | |||
615 | /* | 837 | /* |
616 | * serialization is taken care of in namespace.c | 838 | * serialization is taken care of in namespace.c |
617 | */ | 839 | */ |
840 | static void __follow_mount_rcu(struct nameidata *nd, struct path *path, | ||
841 | struct inode **inode) | ||
842 | { | ||
843 | while (d_mountpoint(path->dentry)) { | ||
844 | struct vfsmount *mounted; | ||
845 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); | ||
846 | if (!mounted) | ||
847 | return; | ||
848 | path->mnt = mounted; | ||
849 | path->dentry = mounted->mnt_root; | ||
850 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | ||
851 | *inode = path->dentry->d_inode; | ||
852 | } | ||
853 | } | ||
854 | |||
618 | static int __follow_mount(struct path *path) | 855 | static int __follow_mount(struct path *path) |
619 | { | 856 | { |
620 | int res = 0; | 857 | int res = 0; |
@@ -660,7 +897,42 @@ int follow_down(struct path *path) | |||
660 | return 0; | 897 | return 0; |
661 | } | 898 | } |
662 | 899 | ||
663 | static __always_inline void follow_dotdot(struct nameidata *nd) | 900 | static int follow_dotdot_rcu(struct nameidata *nd) |
901 | { | ||
902 | struct inode *inode = nd->inode; | ||
903 | |||
904 | set_root_rcu(nd); | ||
905 | |||
906 | while(1) { | ||
907 | if (nd->path.dentry == nd->root.dentry && | ||
908 | nd->path.mnt == nd->root.mnt) { | ||
909 | break; | ||
910 | } | ||
911 | if (nd->path.dentry != nd->path.mnt->mnt_root) { | ||
912 | struct dentry *old = nd->path.dentry; | ||
913 | struct dentry *parent = old->d_parent; | ||
914 | unsigned seq; | ||
915 | |||
916 | seq = read_seqcount_begin(&parent->d_seq); | ||
917 | if (read_seqcount_retry(&old->d_seq, nd->seq)) | ||
918 | return -ECHILD; | ||
919 | inode = parent->d_inode; | ||
920 | nd->path.dentry = parent; | ||
921 | nd->seq = seq; | ||
922 | break; | ||
923 | } | ||
924 | if (!follow_up_rcu(&nd->path)) | ||
925 | break; | ||
926 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
927 | inode = nd->path.dentry->d_inode; | ||
928 | } | ||
929 | __follow_mount_rcu(nd, &nd->path, &inode); | ||
930 | nd->inode = inode; | ||
931 | |||
932 | return 0; | ||
933 | } | ||
934 | |||
935 | static void follow_dotdot(struct nameidata *nd) | ||
664 | { | 936 | { |
665 | set_root(nd); | 937 | set_root(nd); |
666 | 938 | ||
@@ -681,6 +953,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd) | |||
681 | break; | 953 | break; |
682 | } | 954 | } |
683 | follow_mount(&nd->path); | 955 | follow_mount(&nd->path); |
956 | nd->inode = nd->path.dentry->d_inode; | ||
684 | } | 957 | } |
685 | 958 | ||
686 | /* | 959 | /* |
@@ -718,18 +991,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent, | |||
718 | * It _is_ time-critical. | 991 | * It _is_ time-critical. |
719 | */ | 992 | */ |
720 | static int do_lookup(struct nameidata *nd, struct qstr *name, | 993 | static int do_lookup(struct nameidata *nd, struct qstr *name, |
721 | struct path *path) | 994 | struct path *path, struct inode **inode) |
722 | { | 995 | { |
723 | struct vfsmount *mnt = nd->path.mnt; | 996 | struct vfsmount *mnt = nd->path.mnt; |
724 | struct dentry *dentry, *parent; | 997 | struct dentry *dentry, *parent = nd->path.dentry; |
725 | struct inode *dir; | 998 | struct inode *dir; |
726 | /* | 999 | /* |
727 | * See if the low-level filesystem might want | 1000 | * See if the low-level filesystem might want |
728 | * to use its own hash.. | 1001 | * to use its own hash.. |
729 | */ | 1002 | */ |
730 | if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { | 1003 | if (parent->d_op && parent->d_op->d_hash) { |
731 | int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, | 1004 | int err = parent->d_op->d_hash(parent, nd->inode, name); |
732 | nd->path.dentry->d_inode, name); | ||
733 | if (err < 0) | 1005 | if (err < 0) |
734 | return err; | 1006 | return err; |
735 | } | 1007 | } |
@@ -739,21 +1011,48 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
739 | * of a false negative due to a concurrent rename, we're going to | 1011 | * of a false negative due to a concurrent rename, we're going to |
740 | * do the non-racy lookup, below. | 1012 | * do the non-racy lookup, below. |
741 | */ | 1013 | */ |
742 | dentry = __d_lookup(nd->path.dentry, name); | 1014 | if (nd->flags & LOOKUP_RCU) { |
743 | if (!dentry) | 1015 | unsigned seq; |
744 | goto need_lookup; | 1016 | |
1017 | *inode = nd->inode; | ||
1018 | dentry = __d_lookup_rcu(parent, name, &seq, inode); | ||
1019 | if (!dentry) { | ||
1020 | if (nameidata_drop_rcu(nd)) | ||
1021 | return -ECHILD; | ||
1022 | goto need_lookup; | ||
1023 | } | ||
1024 | /* Memory barrier in read_seqcount_begin of child is enough */ | ||
1025 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) | ||
1026 | return -ECHILD; | ||
1027 | |||
1028 | nd->seq = seq; | ||
1029 | if (dentry->d_op && dentry->d_op->d_revalidate) { | ||
1030 | /* We commonly drop rcu-walk here */ | ||
1031 | if (nameidata_dentry_drop_rcu(nd, dentry)) | ||
1032 | return -ECHILD; | ||
1033 | goto need_revalidate; | ||
1034 | } | ||
1035 | path->mnt = mnt; | ||
1036 | path->dentry = dentry; | ||
1037 | __follow_mount_rcu(nd, path, inode); | ||
1038 | } else { | ||
1039 | dentry = __d_lookup(parent, name); | ||
1040 | if (!dentry) | ||
1041 | goto need_lookup; | ||
745 | found: | 1042 | found: |
746 | if (dentry->d_op && dentry->d_op->d_revalidate) | 1043 | if (dentry->d_op && dentry->d_op->d_revalidate) |
747 | goto need_revalidate; | 1044 | goto need_revalidate; |
748 | done: | 1045 | done: |
749 | path->mnt = mnt; | 1046 | path->mnt = mnt; |
750 | path->dentry = dentry; | 1047 | path->dentry = dentry; |
751 | __follow_mount(path); | 1048 | __follow_mount(path); |
1049 | *inode = path->dentry->d_inode; | ||
1050 | } | ||
752 | return 0; | 1051 | return 0; |
753 | 1052 | ||
754 | need_lookup: | 1053 | need_lookup: |
755 | parent = nd->path.dentry; | ||
756 | dir = parent->d_inode; | 1054 | dir = parent->d_inode; |
1055 | BUG_ON(nd->inode != dir); | ||
757 | 1056 | ||
758 | mutex_lock(&dir->i_mutex); | 1057 | mutex_lock(&dir->i_mutex); |
759 | /* | 1058 | /* |
@@ -815,7 +1114,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags) | |||
815 | static int link_path_walk(const char *name, struct nameidata *nd) | 1114 | static int link_path_walk(const char *name, struct nameidata *nd) |
816 | { | 1115 | { |
817 | struct path next; | 1116 | struct path next; |
818 | struct inode *inode; | ||
819 | int err; | 1117 | int err; |
820 | unsigned int lookup_flags = nd->flags; | 1118 | unsigned int lookup_flags = nd->flags; |
821 | 1119 | ||
@@ -824,18 +1122,28 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
824 | if (!*name) | 1122 | if (!*name) |
825 | goto return_reval; | 1123 | goto return_reval; |
826 | 1124 | ||
827 | inode = nd->path.dentry->d_inode; | ||
828 | if (nd->depth) | 1125 | if (nd->depth) |
829 | lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); | 1126 | lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); |
830 | 1127 | ||
831 | /* At this point we know we have a real path component. */ | 1128 | /* At this point we know we have a real path component. */ |
832 | for(;;) { | 1129 | for(;;) { |
1130 | struct inode *inode; | ||
833 | unsigned long hash; | 1131 | unsigned long hash; |
834 | struct qstr this; | 1132 | struct qstr this; |
835 | unsigned int c; | 1133 | unsigned int c; |
836 | 1134 | ||
837 | nd->flags |= LOOKUP_CONTINUE; | 1135 | nd->flags |= LOOKUP_CONTINUE; |
838 | err = exec_permission(inode); | 1136 | if (nd->flags & LOOKUP_RCU) { |
1137 | err = exec_permission_rcu(nd->inode); | ||
1138 | if (err == -ECHILD) { | ||
1139 | if (nameidata_drop_rcu(nd)) | ||
1140 | return -ECHILD; | ||
1141 | goto exec_again; | ||
1142 | } | ||
1143 | } else { | ||
1144 | exec_again: | ||
1145 | err = exec_permission(nd->inode); | ||
1146 | } | ||
839 | if (err) | 1147 | if (err) |
840 | break; | 1148 | break; |
841 | 1149 | ||
@@ -866,37 +1174,44 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
866 | if (this.name[0] == '.') switch (this.len) { | 1174 | if (this.name[0] == '.') switch (this.len) { |
867 | default: | 1175 | default: |
868 | break; | 1176 | break; |
869 | case 2: | 1177 | case 2: |
870 | if (this.name[1] != '.') | 1178 | if (this.name[1] != '.') |
871 | break; | 1179 | break; |
872 | follow_dotdot(nd); | 1180 | if (nd->flags & LOOKUP_RCU) { |
873 | inode = nd->path.dentry->d_inode; | 1181 | if (follow_dotdot_rcu(nd)) |
1182 | return -ECHILD; | ||
1183 | } else | ||
1184 | follow_dotdot(nd); | ||
874 | /* fallthrough */ | 1185 | /* fallthrough */ |
875 | case 1: | 1186 | case 1: |
876 | continue; | 1187 | continue; |
877 | } | 1188 | } |
878 | /* This does the actual lookups.. */ | 1189 | /* This does the actual lookups.. */ |
879 | err = do_lookup(nd, &this, &next); | 1190 | err = do_lookup(nd, &this, &next, &inode); |
880 | if (err) | 1191 | if (err) |
881 | break; | 1192 | break; |
882 | |||
883 | err = -ENOENT; | 1193 | err = -ENOENT; |
884 | inode = next.dentry->d_inode; | ||
885 | if (!inode) | 1194 | if (!inode) |
886 | goto out_dput; | 1195 | goto out_dput; |
887 | 1196 | ||
888 | if (inode->i_op->follow_link) { | 1197 | if (inode->i_op->follow_link) { |
1198 | /* We commonly drop rcu-walk here */ | ||
1199 | if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) | ||
1200 | return -ECHILD; | ||
1201 | BUG_ON(inode != next.dentry->d_inode); | ||
889 | err = do_follow_link(&next, nd); | 1202 | err = do_follow_link(&next, nd); |
890 | if (err) | 1203 | if (err) |
891 | goto return_err; | 1204 | goto return_err; |
1205 | nd->inode = nd->path.dentry->d_inode; | ||
892 | err = -ENOENT; | 1206 | err = -ENOENT; |
893 | inode = nd->path.dentry->d_inode; | 1207 | if (!nd->inode) |
894 | if (!inode) | ||
895 | break; | 1208 | break; |
896 | } else | 1209 | } else { |
897 | path_to_nameidata(&next, nd); | 1210 | path_to_nameidata(&next, nd); |
1211 | nd->inode = inode; | ||
1212 | } | ||
898 | err = -ENOTDIR; | 1213 | err = -ENOTDIR; |
899 | if (!inode->i_op->lookup) | 1214 | if (!nd->inode->i_op->lookup) |
900 | break; | 1215 | break; |
901 | continue; | 1216 | continue; |
902 | /* here ends the main loop */ | 1217 | /* here ends the main loop */ |
@@ -911,32 +1226,39 @@ last_component: | |||
911 | if (this.name[0] == '.') switch (this.len) { | 1226 | if (this.name[0] == '.') switch (this.len) { |
912 | default: | 1227 | default: |
913 | break; | 1228 | break; |
914 | case 2: | 1229 | case 2: |
915 | if (this.name[1] != '.') | 1230 | if (this.name[1] != '.') |
916 | break; | 1231 | break; |
917 | follow_dotdot(nd); | 1232 | if (nd->flags & LOOKUP_RCU) { |
918 | inode = nd->path.dentry->d_inode; | 1233 | if (follow_dotdot_rcu(nd)) |
1234 | return -ECHILD; | ||
1235 | } else | ||
1236 | follow_dotdot(nd); | ||
919 | /* fallthrough */ | 1237 | /* fallthrough */ |
920 | case 1: | 1238 | case 1: |
921 | goto return_reval; | 1239 | goto return_reval; |
922 | } | 1240 | } |
923 | err = do_lookup(nd, &this, &next); | 1241 | err = do_lookup(nd, &this, &next, &inode); |
924 | if (err) | 1242 | if (err) |
925 | break; | 1243 | break; |
926 | inode = next.dentry->d_inode; | ||
927 | if (follow_on_final(inode, lookup_flags)) { | 1244 | if (follow_on_final(inode, lookup_flags)) { |
1245 | if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry)) | ||
1246 | return -ECHILD; | ||
1247 | BUG_ON(inode != next.dentry->d_inode); | ||
928 | err = do_follow_link(&next, nd); | 1248 | err = do_follow_link(&next, nd); |
929 | if (err) | 1249 | if (err) |
930 | goto return_err; | 1250 | goto return_err; |
931 | inode = nd->path.dentry->d_inode; | 1251 | nd->inode = nd->path.dentry->d_inode; |
932 | } else | 1252 | } else { |
933 | path_to_nameidata(&next, nd); | 1253 | path_to_nameidata(&next, nd); |
1254 | nd->inode = inode; | ||
1255 | } | ||
934 | err = -ENOENT; | 1256 | err = -ENOENT; |
935 | if (!inode) | 1257 | if (!nd->inode) |
936 | break; | 1258 | break; |
937 | if (lookup_flags & LOOKUP_DIRECTORY) { | 1259 | if (lookup_flags & LOOKUP_DIRECTORY) { |
938 | err = -ENOTDIR; | 1260 | err = -ENOTDIR; |
939 | if (!inode->i_op->lookup) | 1261 | if (!nd->inode->i_op->lookup) |
940 | break; | 1262 | break; |
941 | } | 1263 | } |
942 | goto return_base; | 1264 | goto return_base; |
@@ -958,6 +1280,8 @@ return_reval: | |||
958 | */ | 1280 | */ |
959 | if (nd->path.dentry && nd->path.dentry->d_sb && | 1281 | if (nd->path.dentry && nd->path.dentry->d_sb && |
960 | (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { | 1282 | (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { |
1283 | if (nameidata_drop_rcu_maybe(nd)) | ||
1284 | return -ECHILD; | ||
961 | err = -ESTALE; | 1285 | err = -ESTALE; |
962 | /* Note: we do not d_invalidate() */ | 1286 | /* Note: we do not d_invalidate() */ |
963 | if (!nd->path.dentry->d_op->d_revalidate( | 1287 | if (!nd->path.dentry->d_op->d_revalidate( |
@@ -965,16 +1289,34 @@ return_reval: | |||
965 | break; | 1289 | break; |
966 | } | 1290 | } |
967 | return_base: | 1291 | return_base: |
1292 | if (nameidata_drop_rcu_last_maybe(nd)) | ||
1293 | return -ECHILD; | ||
968 | return 0; | 1294 | return 0; |
969 | out_dput: | 1295 | out_dput: |
970 | path_put_conditional(&next, nd); | 1296 | if (!(nd->flags & LOOKUP_RCU)) |
1297 | path_put_conditional(&next, nd); | ||
971 | break; | 1298 | break; |
972 | } | 1299 | } |
973 | path_put(&nd->path); | 1300 | if (!(nd->flags & LOOKUP_RCU)) |
1301 | path_put(&nd->path); | ||
974 | return_err: | 1302 | return_err: |
975 | return err; | 1303 | return err; |
976 | } | 1304 | } |
977 | 1305 | ||
1306 | static inline int path_walk_rcu(const char *name, struct nameidata *nd) | ||
1307 | { | ||
1308 | current->total_link_count = 0; | ||
1309 | |||
1310 | return link_path_walk(name, nd); | ||
1311 | } | ||
1312 | |||
1313 | static inline int path_walk_simple(const char *name, struct nameidata *nd) | ||
1314 | { | ||
1315 | current->total_link_count = 0; | ||
1316 | |||
1317 | return link_path_walk(name, nd); | ||
1318 | } | ||
1319 | |||
978 | static int path_walk(const char *name, struct nameidata *nd) | 1320 | static int path_walk(const char *name, struct nameidata *nd) |
979 | { | 1321 | { |
980 | struct path save = nd->path; | 1322 | struct path save = nd->path; |
@@ -1000,6 +1342,88 @@ static int path_walk(const char *name, struct nameidata *nd) | |||
1000 | return result; | 1342 | return result; |
1001 | } | 1343 | } |
1002 | 1344 | ||
1345 | static void path_finish_rcu(struct nameidata *nd) | ||
1346 | { | ||
1347 | if (nd->flags & LOOKUP_RCU) { | ||
1348 | /* RCU dangling. Cancel it. */ | ||
1349 | nd->flags &= ~LOOKUP_RCU; | ||
1350 | nd->root.mnt = NULL; | ||
1351 | rcu_read_unlock(); | ||
1352 | br_read_unlock(vfsmount_lock); | ||
1353 | } | ||
1354 | if (nd->file) | ||
1355 | fput(nd->file); | ||
1356 | } | ||
1357 | |||
1358 | static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | ||
1359 | { | ||
1360 | int retval = 0; | ||
1361 | int fput_needed; | ||
1362 | struct file *file; | ||
1363 | |||
1364 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | ||
1365 | nd->flags = flags | LOOKUP_RCU; | ||
1366 | nd->depth = 0; | ||
1367 | nd->root.mnt = NULL; | ||
1368 | nd->file = NULL; | ||
1369 | |||
1370 | if (*name=='/') { | ||
1371 | struct fs_struct *fs = current->fs; | ||
1372 | |||
1373 | br_read_lock(vfsmount_lock); | ||
1374 | rcu_read_lock(); | ||
1375 | |||
1376 | spin_lock(&fs->lock); | ||
1377 | nd->root = fs->root; | ||
1378 | nd->path = nd->root; | ||
1379 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1380 | spin_unlock(&fs->lock); | ||
1381 | |||
1382 | } else if (dfd == AT_FDCWD) { | ||
1383 | struct fs_struct *fs = current->fs; | ||
1384 | |||
1385 | br_read_lock(vfsmount_lock); | ||
1386 | rcu_read_lock(); | ||
1387 | |||
1388 | spin_lock(&fs->lock); | ||
1389 | nd->path = fs->pwd; | ||
1390 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1391 | spin_unlock(&fs->lock); | ||
1392 | } else { | ||
1393 | struct dentry *dentry; | ||
1394 | |||
1395 | file = fget_light(dfd, &fput_needed); | ||
1396 | retval = -EBADF; | ||
1397 | if (!file) | ||
1398 | goto out_fail; | ||
1399 | |||
1400 | dentry = file->f_path.dentry; | ||
1401 | |||
1402 | retval = -ENOTDIR; | ||
1403 | if (!S_ISDIR(dentry->d_inode->i_mode)) | ||
1404 | goto fput_fail; | ||
1405 | |||
1406 | retval = file_permission(file, MAY_EXEC); | ||
1407 | if (retval) | ||
1408 | goto fput_fail; | ||
1409 | |||
1410 | nd->path = file->f_path; | ||
1411 | if (fput_needed) | ||
1412 | nd->file = file; | ||
1413 | |||
1414 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1415 | br_read_lock(vfsmount_lock); | ||
1416 | rcu_read_lock(); | ||
1417 | } | ||
1418 | nd->inode = nd->path.dentry->d_inode; | ||
1419 | return 0; | ||
1420 | |||
1421 | fput_fail: | ||
1422 | fput_light(file, fput_needed); | ||
1423 | out_fail: | ||
1424 | return retval; | ||
1425 | } | ||
1426 | |||
1003 | static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | 1427 | static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) |
1004 | { | 1428 | { |
1005 | int retval = 0; | 1429 | int retval = 0; |
@@ -1040,6 +1464,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei | |||
1040 | 1464 | ||
1041 | fput_light(file, fput_needed); | 1465 | fput_light(file, fput_needed); |
1042 | } | 1466 | } |
1467 | nd->inode = nd->path.dentry->d_inode; | ||
1043 | return 0; | 1468 | return 0; |
1044 | 1469 | ||
1045 | fput_fail: | 1470 | fput_fail: |
@@ -1052,16 +1477,53 @@ out_fail: | |||
1052 | static int do_path_lookup(int dfd, const char *name, | 1477 | static int do_path_lookup(int dfd, const char *name, |
1053 | unsigned int flags, struct nameidata *nd) | 1478 | unsigned int flags, struct nameidata *nd) |
1054 | { | 1479 | { |
1055 | int retval = path_init(dfd, name, flags, nd); | 1480 | int retval; |
1056 | if (!retval) | 1481 | |
1057 | retval = path_walk(name, nd); | 1482 | /* |
1058 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | 1483 | * Path walking is largely split up into 2 different synchronisation |
1059 | nd->path.dentry->d_inode)) | 1484 | * schemes, rcu-walk and ref-walk (explained in |
1060 | audit_inode(name, nd->path.dentry); | 1485 | * Documentation/filesystems/path-lookup.txt). These share much of the |
1486 | * path walk code, but some things particularly setup, cleanup, and | ||
1487 | * following mounts are sufficiently divergent that functions are | ||
1488 | * duplicated. Typically there is a function foo(), and its RCU | ||
1489 | * analogue, foo_rcu(). | ||
1490 | * | ||
1491 | * -ECHILD is the error number of choice (just to avoid clashes) that | ||
1492 | * is returned if some aspect of an rcu-walk fails. Such an error must | ||
1493 | * be handled by restarting a traditional ref-walk (which will always | ||
1494 | * be able to complete). | ||
1495 | */ | ||
1496 | retval = path_init_rcu(dfd, name, flags, nd); | ||
1497 | if (unlikely(retval)) | ||
1498 | return retval; | ||
1499 | retval = path_walk_rcu(name, nd); | ||
1500 | path_finish_rcu(nd); | ||
1061 | if (nd->root.mnt) { | 1501 | if (nd->root.mnt) { |
1062 | path_put(&nd->root); | 1502 | path_put(&nd->root); |
1063 | nd->root.mnt = NULL; | 1503 | nd->root.mnt = NULL; |
1064 | } | 1504 | } |
1505 | |||
1506 | if (unlikely(retval == -ECHILD || retval == -ESTALE)) { | ||
1507 | /* slower, locked walk */ | ||
1508 | if (retval == -ESTALE) | ||
1509 | flags |= LOOKUP_REVAL; | ||
1510 | retval = path_init(dfd, name, flags, nd); | ||
1511 | if (unlikely(retval)) | ||
1512 | return retval; | ||
1513 | retval = path_walk(name, nd); | ||
1514 | if (nd->root.mnt) { | ||
1515 | path_put(&nd->root); | ||
1516 | nd->root.mnt = NULL; | ||
1517 | } | ||
1518 | } | ||
1519 | |||
1520 | if (likely(!retval)) { | ||
1521 | if (unlikely(!audit_dummy_context())) { | ||
1522 | if (nd->path.dentry && nd->inode) | ||
1523 | audit_inode(name, nd->path.dentry); | ||
1524 | } | ||
1525 | } | ||
1526 | |||
1065 | return retval; | 1527 | return retval; |
1066 | } | 1528 | } |
1067 | 1529 | ||
@@ -1104,10 +1566,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
1104 | path_get(&nd->path); | 1566 | path_get(&nd->path); |
1105 | nd->root = nd->path; | 1567 | nd->root = nd->path; |
1106 | path_get(&nd->root); | 1568 | path_get(&nd->root); |
1569 | nd->inode = nd->path.dentry->d_inode; | ||
1107 | 1570 | ||
1108 | retval = path_walk(name, nd); | 1571 | retval = path_walk(name, nd); |
1109 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | 1572 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && |
1110 | nd->path.dentry->d_inode)) | 1573 | nd->inode)) |
1111 | audit_inode(name, nd->path.dentry); | 1574 | audit_inode(name, nd->path.dentry); |
1112 | 1575 | ||
1113 | path_put(&nd->root); | 1576 | path_put(&nd->root); |
@@ -1488,6 +1951,7 @@ out_unlock: | |||
1488 | mutex_unlock(&dir->d_inode->i_mutex); | 1951 | mutex_unlock(&dir->d_inode->i_mutex); |
1489 | dput(nd->path.dentry); | 1952 | dput(nd->path.dentry); |
1490 | nd->path.dentry = path->dentry; | 1953 | nd->path.dentry = path->dentry; |
1954 | |||
1491 | if (error) | 1955 | if (error) |
1492 | return error; | 1956 | return error; |
1493 | /* Don't check for write permission, don't truncate */ | 1957 | /* Don't check for write permission, don't truncate */ |
@@ -1582,6 +2046,9 @@ exit: | |||
1582 | return ERR_PTR(error); | 2046 | return ERR_PTR(error); |
1583 | } | 2047 | } |
1584 | 2048 | ||
2049 | /* | ||
2050 | * Handle O_CREAT case for do_filp_open | ||
2051 | */ | ||
1585 | static struct file *do_last(struct nameidata *nd, struct path *path, | 2052 | static struct file *do_last(struct nameidata *nd, struct path *path, |
1586 | int open_flag, int acc_mode, | 2053 | int open_flag, int acc_mode, |
1587 | int mode, const char *pathname) | 2054 | int mode, const char *pathname) |
@@ -1603,42 +2070,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
1603 | } | 2070 | } |
1604 | /* fallthrough */ | 2071 | /* fallthrough */ |
1605 | case LAST_ROOT: | 2072 | case LAST_ROOT: |
1606 | if (open_flag & O_CREAT) | 2073 | goto exit; |
1607 | goto exit; | ||
1608 | /* fallthrough */ | ||
1609 | case LAST_BIND: | 2074 | case LAST_BIND: |
1610 | audit_inode(pathname, dir); | 2075 | audit_inode(pathname, dir); |
1611 | goto ok; | 2076 | goto ok; |
1612 | } | 2077 | } |
1613 | 2078 | ||
1614 | /* trailing slashes? */ | 2079 | /* trailing slashes? */ |
1615 | if (nd->last.name[nd->last.len]) { | 2080 | if (nd->last.name[nd->last.len]) |
1616 | if (open_flag & O_CREAT) | 2081 | goto exit; |
1617 | goto exit; | ||
1618 | nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW; | ||
1619 | } | ||
1620 | |||
1621 | /* just plain open? */ | ||
1622 | if (!(open_flag & O_CREAT)) { | ||
1623 | error = do_lookup(nd, &nd->last, path); | ||
1624 | if (error) | ||
1625 | goto exit; | ||
1626 | error = -ENOENT; | ||
1627 | if (!path->dentry->d_inode) | ||
1628 | goto exit_dput; | ||
1629 | if (path->dentry->d_inode->i_op->follow_link) | ||
1630 | return NULL; | ||
1631 | error = -ENOTDIR; | ||
1632 | if (nd->flags & LOOKUP_DIRECTORY) { | ||
1633 | if (!path->dentry->d_inode->i_op->lookup) | ||
1634 | goto exit_dput; | ||
1635 | } | ||
1636 | path_to_nameidata(path, nd); | ||
1637 | audit_inode(pathname, nd->path.dentry); | ||
1638 | goto ok; | ||
1639 | } | ||
1640 | 2082 | ||
1641 | /* OK, it's O_CREAT */ | ||
1642 | mutex_lock(&dir->d_inode->i_mutex); | 2083 | mutex_lock(&dir->d_inode->i_mutex); |
1643 | 2084 | ||
1644 | path->dentry = lookup_hash(nd); | 2085 | path->dentry = lookup_hash(nd); |
@@ -1709,8 +2150,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
1709 | return NULL; | 2150 | return NULL; |
1710 | 2151 | ||
1711 | path_to_nameidata(path, nd); | 2152 | path_to_nameidata(path, nd); |
2153 | nd->inode = path->dentry->d_inode; | ||
1712 | error = -EISDIR; | 2154 | error = -EISDIR; |
1713 | if (S_ISDIR(path->dentry->d_inode->i_mode)) | 2155 | if (S_ISDIR(nd->inode->i_mode)) |
1714 | goto exit; | 2156 | goto exit; |
1715 | ok: | 2157 | ok: |
1716 | filp = finish_open(nd, open_flag, acc_mode); | 2158 | filp = finish_open(nd, open_flag, acc_mode); |
@@ -1741,7 +2183,7 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1741 | struct path path; | 2183 | struct path path; |
1742 | int count = 0; | 2184 | int count = 0; |
1743 | int flag = open_to_namei_flags(open_flag); | 2185 | int flag = open_to_namei_flags(open_flag); |
1744 | int force_reval = 0; | 2186 | int flags; |
1745 | 2187 | ||
1746 | if (!(open_flag & O_CREAT)) | 2188 | if (!(open_flag & O_CREAT)) |
1747 | mode = 0; | 2189 | mode = 0; |
@@ -1770,54 +2212,84 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1770 | if (open_flag & O_APPEND) | 2212 | if (open_flag & O_APPEND) |
1771 | acc_mode |= MAY_APPEND; | 2213 | acc_mode |= MAY_APPEND; |
1772 | 2214 | ||
1773 | /* find the parent */ | 2215 | flags = LOOKUP_OPEN; |
1774 | reval: | 2216 | if (open_flag & O_CREAT) { |
1775 | error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); | 2217 | flags |= LOOKUP_CREATE; |
2218 | if (open_flag & O_EXCL) | ||
2219 | flags |= LOOKUP_EXCL; | ||
2220 | } | ||
2221 | if (open_flag & O_DIRECTORY) | ||
2222 | flags |= LOOKUP_DIRECTORY; | ||
2223 | if (!(open_flag & O_NOFOLLOW)) | ||
2224 | flags |= LOOKUP_FOLLOW; | ||
2225 | |||
2226 | filp = get_empty_filp(); | ||
2227 | if (!filp) | ||
2228 | return ERR_PTR(-ENFILE); | ||
2229 | |||
2230 | filp->f_flags = open_flag; | ||
2231 | nd.intent.open.file = filp; | ||
2232 | nd.intent.open.flags = flag; | ||
2233 | nd.intent.open.create_mode = mode; | ||
2234 | |||
2235 | if (open_flag & O_CREAT) | ||
2236 | goto creat; | ||
2237 | |||
2238 | /* !O_CREAT, simple open */ | ||
2239 | error = do_path_lookup(dfd, pathname, flags, &nd); | ||
2240 | if (unlikely(error)) | ||
2241 | goto out_filp; | ||
2242 | error = -ELOOP; | ||
2243 | if (!(nd.flags & LOOKUP_FOLLOW)) { | ||
2244 | if (nd.inode->i_op->follow_link) | ||
2245 | goto out_path; | ||
2246 | } | ||
2247 | error = -ENOTDIR; | ||
2248 | if (nd.flags & LOOKUP_DIRECTORY) { | ||
2249 | if (!nd.inode->i_op->lookup) | ||
2250 | goto out_path; | ||
2251 | } | ||
2252 | audit_inode(pathname, nd.path.dentry); | ||
2253 | filp = finish_open(&nd, open_flag, acc_mode); | ||
2254 | return filp; | ||
2255 | |||
2256 | creat: | ||
2257 | /* OK, have to create the file. Find the parent. */ | ||
2258 | error = path_init_rcu(dfd, pathname, | ||
2259 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
1776 | if (error) | 2260 | if (error) |
1777 | return ERR_PTR(error); | 2261 | goto out_filp; |
1778 | if (force_reval) | 2262 | error = path_walk_rcu(pathname, &nd); |
1779 | nd.flags |= LOOKUP_REVAL; | 2263 | path_finish_rcu(&nd); |
2264 | if (unlikely(error == -ECHILD || error == -ESTALE)) { | ||
2265 | /* slower, locked walk */ | ||
2266 | if (error == -ESTALE) { | ||
2267 | reval: | ||
2268 | flags |= LOOKUP_REVAL; | ||
2269 | } | ||
2270 | error = path_init(dfd, pathname, | ||
2271 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
2272 | if (error) | ||
2273 | goto out_filp; | ||
1780 | 2274 | ||
1781 | current->total_link_count = 0; | 2275 | error = path_walk_simple(pathname, &nd); |
1782 | error = link_path_walk(pathname, &nd); | ||
1783 | if (error) { | ||
1784 | filp = ERR_PTR(error); | ||
1785 | goto out; | ||
1786 | } | 2276 | } |
1787 | if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) | 2277 | if (unlikely(error)) |
2278 | goto out_filp; | ||
2279 | if (unlikely(!audit_dummy_context())) | ||
1788 | audit_inode(pathname, nd.path.dentry); | 2280 | audit_inode(pathname, nd.path.dentry); |
1789 | 2281 | ||
1790 | /* | 2282 | /* |
1791 | * We have the parent and last component. | 2283 | * We have the parent and last component. |
1792 | */ | 2284 | */ |
1793 | 2285 | nd.flags = flags; | |
1794 | error = -ENFILE; | ||
1795 | filp = get_empty_filp(); | ||
1796 | if (filp == NULL) | ||
1797 | goto exit_parent; | ||
1798 | nd.intent.open.file = filp; | ||
1799 | filp->f_flags = open_flag; | ||
1800 | nd.intent.open.flags = flag; | ||
1801 | nd.intent.open.create_mode = mode; | ||
1802 | nd.flags &= ~LOOKUP_PARENT; | ||
1803 | nd.flags |= LOOKUP_OPEN; | ||
1804 | if (open_flag & O_CREAT) { | ||
1805 | nd.flags |= LOOKUP_CREATE; | ||
1806 | if (open_flag & O_EXCL) | ||
1807 | nd.flags |= LOOKUP_EXCL; | ||
1808 | } | ||
1809 | if (open_flag & O_DIRECTORY) | ||
1810 | nd.flags |= LOOKUP_DIRECTORY; | ||
1811 | if (!(open_flag & O_NOFOLLOW)) | ||
1812 | nd.flags |= LOOKUP_FOLLOW; | ||
1813 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2286 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); |
1814 | while (unlikely(!filp)) { /* trailing symlink */ | 2287 | while (unlikely(!filp)) { /* trailing symlink */ |
1815 | struct path holder; | 2288 | struct path holder; |
1816 | struct inode *inode = path.dentry->d_inode; | ||
1817 | void *cookie; | 2289 | void *cookie; |
1818 | error = -ELOOP; | 2290 | error = -ELOOP; |
1819 | /* S_ISDIR part is a temporary automount kludge */ | 2291 | /* S_ISDIR part is a temporary automount kludge */ |
1820 | if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) | 2292 | if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode)) |
1821 | goto exit_dput; | 2293 | goto exit_dput; |
1822 | if (count++ == 32) | 2294 | if (count++ == 32) |
1823 | goto exit_dput; | 2295 | goto exit_dput; |
@@ -1838,36 +2310,33 @@ reval: | |||
1838 | goto exit_dput; | 2310 | goto exit_dput; |
1839 | error = __do_follow_link(&path, &nd, &cookie); | 2311 | error = __do_follow_link(&path, &nd, &cookie); |
1840 | if (unlikely(error)) { | 2312 | if (unlikely(error)) { |
2313 | if (!IS_ERR(cookie) && nd.inode->i_op->put_link) | ||
2314 | nd.inode->i_op->put_link(path.dentry, &nd, cookie); | ||
1841 | /* nd.path had been dropped */ | 2315 | /* nd.path had been dropped */ |
1842 | if (!IS_ERR(cookie) && inode->i_op->put_link) | 2316 | nd.path = path; |
1843 | inode->i_op->put_link(path.dentry, &nd, cookie); | 2317 | goto out_path; |
1844 | path_put(&path); | ||
1845 | release_open_intent(&nd); | ||
1846 | filp = ERR_PTR(error); | ||
1847 | goto out; | ||
1848 | } | 2318 | } |
1849 | holder = path; | 2319 | holder = path; |
1850 | nd.flags &= ~LOOKUP_PARENT; | 2320 | nd.flags &= ~LOOKUP_PARENT; |
1851 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2321 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); |
1852 | if (inode->i_op->put_link) | 2322 | if (nd.inode->i_op->put_link) |
1853 | inode->i_op->put_link(holder.dentry, &nd, cookie); | 2323 | nd.inode->i_op->put_link(holder.dentry, &nd, cookie); |
1854 | path_put(&holder); | 2324 | path_put(&holder); |
1855 | } | 2325 | } |
1856 | out: | 2326 | out: |
1857 | if (nd.root.mnt) | 2327 | if (nd.root.mnt) |
1858 | path_put(&nd.root); | 2328 | path_put(&nd.root); |
1859 | if (filp == ERR_PTR(-ESTALE) && !force_reval) { | 2329 | if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) |
1860 | force_reval = 1; | ||
1861 | goto reval; | 2330 | goto reval; |
1862 | } | ||
1863 | return filp; | 2331 | return filp; |
1864 | 2332 | ||
1865 | exit_dput: | 2333 | exit_dput: |
1866 | path_put_conditional(&path, &nd); | 2334 | path_put_conditional(&path, &nd); |
2335 | out_path: | ||
2336 | path_put(&nd.path); | ||
2337 | out_filp: | ||
1867 | if (!IS_ERR(nd.intent.open.file)) | 2338 | if (!IS_ERR(nd.intent.open.file)) |
1868 | release_open_intent(&nd); | 2339 | release_open_intent(&nd); |
1869 | exit_parent: | ||
1870 | path_put(&nd.path); | ||
1871 | filp = ERR_PTR(error); | 2340 | filp = ERR_PTR(error); |
1872 | goto out; | 2341 | goto out; |
1873 | } | 2342 | } |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ae4b0fd9033f..998e3a715bcc 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -402,6 +402,10 @@ static int proc_sys_compare(const struct dentry *parent, | |||
402 | const struct dentry *dentry, const struct inode *inode, | 402 | const struct dentry *dentry, const struct inode *inode, |
403 | unsigned int len, const char *str, const struct qstr *name) | 403 | unsigned int len, const char *str, const struct qstr *name) |
404 | { | 404 | { |
405 | /* Although proc doesn't have negative dentries, rcu-walk means | ||
406 | * that inode here can be NULL */ | ||
407 | if (!inode) | ||
408 | return 0; | ||
405 | if (name->len != len) | 409 | if (name->len != len) |
406 | return 1; | 410 | return 1; |
407 | if (memcmp(name->name, str, len)) | 411 | if (memcmp(name->name, str, len)) |