diff options
-rw-r--r-- | fs/dcache.c | 152 | ||||
-rw-r--r-- | fs/namei.c | 19 | ||||
-rw-r--r-- | include/linux/dcache.h | 2 |
3 files changed, 121 insertions, 52 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index b80531c91779..539943eb442c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -153,16 +153,33 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, | |||
153 | * In contrast, 'ct' and 'tcount' can be from a pathname, and do | 153 | * In contrast, 'ct' and 'tcount' can be from a pathname, and do |
154 | * need the careful unaligned handling. | 154 | * need the careful unaligned handling. |
155 | */ | 155 | */ |
156 | static inline int dentry_cmp(const unsigned char *cs, size_t scount, | 156 | static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) |
157 | const unsigned char *ct, size_t tcount) | ||
158 | { | 157 | { |
159 | unsigned long a,b,mask; | 158 | unsigned long a,b,mask; |
159 | const unsigned char *cs; | ||
160 | 160 | ||
161 | if (unlikely(scount != tcount)) | 161 | if (unlikely(dentry->d_name.len != tcount)) |
162 | return 1; | 162 | return 1; |
163 | /* | ||
164 | * Be careful about RCU walk racing with rename: | ||
165 | * use ACCESS_ONCE to fetch the name pointer. | ||
166 | * | ||
167 | * NOTE! Even if a rename will mean that the length | ||
168 | * was not loaded atomically, we don't care. The | ||
169 | * RCU walk will check the sequence count eventually, | ||
170 | * and catch it. And we won't overrun the buffer, | ||
171 | * because we're reading the name pointer atomically, | ||
172 | * and a dentry name is guaranteed to be properly | ||
173 | * terminated with a NUL byte. | ||
174 | * | ||
175 | * End result: even if 'len' is wrong, we'll exit | ||
176 | * early because the data cannot match (there can | ||
177 | * be no NUL in the ct/tcount data) | ||
178 | */ | ||
179 | cs = ACCESS_ONCE(dentry->d_name.name); | ||
163 | 180 | ||
164 | for (;;) { | 181 | for (;;) { |
165 | a = load_unaligned_zeropad(cs); | 182 | a = *(unsigned long *)cs; |
166 | b = load_unaligned_zeropad(ct); | 183 | b = load_unaligned_zeropad(ct); |
167 | if (tcount < sizeof(unsigned long)) | 184 | if (tcount < sizeof(unsigned long)) |
168 | break; | 185 | break; |
@@ -180,10 +197,11 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, | |||
180 | 197 | ||
181 | #else | 198 | #else |
182 | 199 | ||
183 | static inline int dentry_cmp(const unsigned char *cs, size_t scount, | 200 | static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) |
184 | const unsigned char *ct, size_t tcount) | ||
185 | { | 201 | { |
186 | if (scount != tcount) | 202 | const unsigned char *cs = dentry->d_name.name; |
203 | |||
204 | if (dentry->d_name.len != tcount) | ||
187 | return 1; | 205 | return 1; |
188 | 206 | ||
189 | do { | 207 | do { |
@@ -1439,18 +1457,16 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, | |||
1439 | } | 1457 | } |
1440 | 1458 | ||
1441 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { | 1459 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { |
1442 | struct qstr *qstr = &alias->d_name; | ||
1443 | |||
1444 | /* | 1460 | /* |
1445 | * Don't need alias->d_lock here, because aliases with | 1461 | * Don't need alias->d_lock here, because aliases with |
1446 | * d_parent == entry->d_parent are not subject to name or | 1462 | * d_parent == entry->d_parent are not subject to name or |
1447 | * parent changes, because the parent inode i_mutex is held. | 1463 | * parent changes, because the parent inode i_mutex is held. |
1448 | */ | 1464 | */ |
1449 | if (qstr->hash != hash) | 1465 | if (alias->d_name.hash != hash) |
1450 | continue; | 1466 | continue; |
1451 | if (alias->d_parent != entry->d_parent) | 1467 | if (alias->d_parent != entry->d_parent) |
1452 | continue; | 1468 | continue; |
1453 | if (dentry_cmp(qstr->name, qstr->len, name, len)) | 1469 | if (dentry_cmp(alias, name, len)) |
1454 | continue; | 1470 | continue; |
1455 | __dget(alias); | 1471 | __dget(alias); |
1456 | return alias; | 1472 | return alias; |
@@ -1727,6 +1743,48 @@ err_out: | |||
1727 | } | 1743 | } |
1728 | EXPORT_SYMBOL(d_add_ci); | 1744 | EXPORT_SYMBOL(d_add_ci); |
1729 | 1745 | ||
1746 | /* | ||
1747 | * Do the slow-case of the dentry name compare. | ||
1748 | * | ||
1749 | * Unlike the dentry_cmp() function, we need to atomically | ||
1750 | * load the name, length and inode information, so that the | ||
1751 | * filesystem can rely on them, and can use the 'name' and | ||
1752 | * 'len' information without worrying about walking off the | ||
1753 | * end of memory etc. | ||
1754 | * | ||
1755 | * Thus the read_seqcount_retry() and the "duplicate" info | ||
1756 | * in arguments (the low-level filesystem should not look | ||
1757 | * at the dentry inode or name contents directly, since | ||
1758 | * rename can change them while we're in RCU mode). | ||
1759 | */ | ||
1760 | enum slow_d_compare { | ||
1761 | D_COMP_OK, | ||
1762 | D_COMP_NOMATCH, | ||
1763 | D_COMP_SEQRETRY, | ||
1764 | }; | ||
1765 | |||
1766 | static noinline enum slow_d_compare slow_dentry_cmp( | ||
1767 | const struct dentry *parent, | ||
1768 | struct inode *inode, | ||
1769 | struct dentry *dentry, | ||
1770 | unsigned int seq, | ||
1771 | const struct qstr *name) | ||
1772 | { | ||
1773 | int tlen = dentry->d_name.len; | ||
1774 | const char *tname = dentry->d_name.name; | ||
1775 | struct inode *i = dentry->d_inode; | ||
1776 | |||
1777 | if (read_seqcount_retry(&dentry->d_seq, seq)) { | ||
1778 | cpu_relax(); | ||
1779 | return D_COMP_SEQRETRY; | ||
1780 | } | ||
1781 | if (parent->d_op->d_compare(parent, inode, | ||
1782 | dentry, i, | ||
1783 | tlen, tname, name)) | ||
1784 | return D_COMP_NOMATCH; | ||
1785 | return D_COMP_OK; | ||
1786 | } | ||
1787 | |||
1730 | /** | 1788 | /** |
1731 | * __d_lookup_rcu - search for a dentry (racy, store-free) | 1789 | * __d_lookup_rcu - search for a dentry (racy, store-free) |
1732 | * @parent: parent dentry | 1790 | * @parent: parent dentry |
@@ -1753,10 +1811,13 @@ EXPORT_SYMBOL(d_add_ci); | |||
1753 | * the returned dentry, so long as its parent's seqlock is checked after the | 1811 | * the returned dentry, so long as its parent's seqlock is checked after the |
1754 | * child is looked up. Thus, an interlocking stepping of sequence lock checks | 1812 | * child is looked up. Thus, an interlocking stepping of sequence lock checks |
1755 | * is formed, giving integrity down the path walk. | 1813 | * is formed, giving integrity down the path walk. |
1814 | * | ||
1815 | * NOTE! The caller *has* to check the resulting dentry against the sequence | ||
1816 | * number we've returned before using any of the resulting dentry state! | ||
1756 | */ | 1817 | */ |
1757 | struct dentry *__d_lookup_rcu(const struct dentry *parent, | 1818 | struct dentry *__d_lookup_rcu(const struct dentry *parent, |
1758 | const struct qstr *name, | 1819 | const struct qstr *name, |
1759 | unsigned *seqp, struct inode **inode) | 1820 | unsigned *seqp, struct inode *inode) |
1760 | { | 1821 | { |
1761 | unsigned int len = name->len; | 1822 | unsigned int len = name->len; |
1762 | unsigned int hash = name->hash; | 1823 | unsigned int hash = name->hash; |
@@ -1787,49 +1848,46 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, | |||
1787 | */ | 1848 | */ |
1788 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { | 1849 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { |
1789 | unsigned seq; | 1850 | unsigned seq; |
1790 | struct inode *i; | ||
1791 | const char *tname; | ||
1792 | int tlen; | ||
1793 | 1851 | ||
1794 | if (dentry->d_name.hash != hash) | 1852 | if (dentry->d_name.hash != hash) |
1795 | continue; | 1853 | continue; |
1796 | 1854 | ||
1797 | seqretry: | 1855 | seqretry: |
1798 | seq = read_seqcount_begin(&dentry->d_seq); | 1856 | /* |
1857 | * The dentry sequence count protects us from concurrent | ||
1858 | * renames, and thus protects inode, parent and name fields. | ||
1859 | * | ||
1860 | * The caller must perform a seqcount check in order | ||
1861 | * to do anything useful with the returned dentry, | ||
1862 | * including using the 'd_inode' pointer. | ||
1863 | * | ||
1864 | * NOTE! We do a "raw" seqcount_begin here. That means that | ||
1865 | * we don't wait for the sequence count to stabilize if it | ||
1866 | * is in the middle of a sequence change. If we do the slow | ||
1867 | * dentry compare, we will do seqretries until it is stable, | ||
1868 | * and if we end up with a successful lookup, we actually | ||
1869 | * want to exit RCU lookup anyway. | ||
1870 | */ | ||
1871 | seq = raw_seqcount_begin(&dentry->d_seq); | ||
1799 | if (dentry->d_parent != parent) | 1872 | if (dentry->d_parent != parent) |
1800 | continue; | 1873 | continue; |
1801 | if (d_unhashed(dentry)) | 1874 | if (d_unhashed(dentry)) |
1802 | continue; | 1875 | continue; |
1803 | tlen = dentry->d_name.len; | 1876 | *seqp = seq; |
1804 | tname = dentry->d_name.name; | 1877 | |
1805 | i = dentry->d_inode; | ||
1806 | prefetch(tname); | ||
1807 | /* | ||
1808 | * This seqcount check is required to ensure name and | ||
1809 | * len are loaded atomically, so as not to walk off the | ||
1810 | * edge of memory when walking. If we could load this | ||
1811 | * atomically some other way, we could drop this check. | ||
1812 | */ | ||
1813 | if (read_seqcount_retry(&dentry->d_seq, seq)) | ||
1814 | goto seqretry; | ||
1815 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { | 1878 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { |
1816 | if (parent->d_op->d_compare(parent, *inode, | 1879 | switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { |
1817 | dentry, i, | 1880 | case D_COMP_OK: |
1818 | tlen, tname, name)) | 1881 | return dentry; |
1819 | continue; | 1882 | case D_COMP_NOMATCH: |
1820 | } else { | ||
1821 | if (dentry_cmp(tname, tlen, str, len)) | ||
1822 | continue; | 1883 | continue; |
1884 | default: | ||
1885 | goto seqretry; | ||
1886 | } | ||
1823 | } | 1887 | } |
1824 | /* | 1888 | |
1825 | * No extra seqcount check is required after the name | 1889 | if (!dentry_cmp(dentry, str, len)) |
1826 | * compare. The caller must perform a seqcount check in | 1890 | return dentry; |
1827 | * order to do anything useful with the returned dentry | ||
1828 | * anyway. | ||
1829 | */ | ||
1830 | *seqp = seq; | ||
1831 | *inode = i; | ||
1832 | return dentry; | ||
1833 | } | 1891 | } |
1834 | return NULL; | 1892 | return NULL; |
1835 | } | 1893 | } |
@@ -1908,8 +1966,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) | |||
1908 | rcu_read_lock(); | 1966 | rcu_read_lock(); |
1909 | 1967 | ||
1910 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { | 1968 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { |
1911 | const char *tname; | ||
1912 | int tlen; | ||
1913 | 1969 | ||
1914 | if (dentry->d_name.hash != hash) | 1970 | if (dentry->d_name.hash != hash) |
1915 | continue; | 1971 | continue; |
@@ -1924,15 +1980,15 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) | |||
1924 | * It is safe to compare names since d_move() cannot | 1980 | * It is safe to compare names since d_move() cannot |
1925 | * change the qstr (protected by d_lock). | 1981 | * change the qstr (protected by d_lock). |
1926 | */ | 1982 | */ |
1927 | tlen = dentry->d_name.len; | ||
1928 | tname = dentry->d_name.name; | ||
1929 | if (parent->d_flags & DCACHE_OP_COMPARE) { | 1983 | if (parent->d_flags & DCACHE_OP_COMPARE) { |
1984 | int tlen = dentry->d_name.len; | ||
1985 | const char *tname = dentry->d_name.name; | ||
1930 | if (parent->d_op->d_compare(parent, parent->d_inode, | 1986 | if (parent->d_op->d_compare(parent, parent->d_inode, |
1931 | dentry, dentry->d_inode, | 1987 | dentry, dentry->d_inode, |
1932 | tlen, tname, name)) | 1988 | tlen, tname, name)) |
1933 | goto next; | 1989 | goto next; |
1934 | } else { | 1990 | } else { |
1935 | if (dentry_cmp(tname, tlen, str, len)) | 1991 | if (dentry_cmp(dentry, str, len)) |
1936 | goto next; | 1992 | goto next; |
1937 | } | 1993 | } |
1938 | 1994 | ||
diff --git a/fs/namei.c b/fs/namei.c index c42791914f82..46bd0045575d 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1154,12 +1154,25 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
1154 | */ | 1154 | */ |
1155 | if (nd->flags & LOOKUP_RCU) { | 1155 | if (nd->flags & LOOKUP_RCU) { |
1156 | unsigned seq; | 1156 | unsigned seq; |
1157 | *inode = nd->inode; | 1157 | dentry = __d_lookup_rcu(parent, name, &seq, nd->inode); |
1158 | dentry = __d_lookup_rcu(parent, name, &seq, inode); | ||
1159 | if (!dentry) | 1158 | if (!dentry) |
1160 | goto unlazy; | 1159 | goto unlazy; |
1161 | 1160 | ||
1162 | /* Memory barrier in read_seqcount_begin of child is enough */ | 1161 | /* |
1162 | * This sequence count validates that the inode matches | ||
1163 | * the dentry name information from lookup. | ||
1164 | */ | ||
1165 | *inode = dentry->d_inode; | ||
1166 | if (read_seqcount_retry(&dentry->d_seq, seq)) | ||
1167 | return -ECHILD; | ||
1168 | |||
1169 | /* | ||
1170 | * This sequence count validates that the parent had no | ||
1171 | * changes while we did the lookup of the dentry above. | ||
1172 | * | ||
1173 | * The memory barrier in read_seqcount_begin of child is | ||
1174 | * enough, we can use __read_seqcount_retry here. | ||
1175 | */ | ||
1163 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) | 1176 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) |
1164 | return -ECHILD; | 1177 | return -ECHILD; |
1165 | nd->seq = seq; | 1178 | nd->seq = seq; |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7e11f1418203..8239f64d1c2e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -282,7 +282,7 @@ extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); | |||
282 | extern struct dentry *__d_lookup(struct dentry *, struct qstr *); | 282 | extern struct dentry *__d_lookup(struct dentry *, struct qstr *); |
283 | extern struct dentry *__d_lookup_rcu(const struct dentry *parent, | 283 | extern struct dentry *__d_lookup_rcu(const struct dentry *parent, |
284 | const struct qstr *name, | 284 | const struct qstr *name, |
285 | unsigned *seq, struct inode **inode); | 285 | unsigned *seq, struct inode *inode); |
286 | 286 | ||
287 | /** | 287 | /** |
288 | * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok | 288 | * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok |