diff options
Diffstat (limited to 'fs/dcache.c')
-rw-r--r-- | fs/dcache.c | 202 |
1 files changed, 145 insertions, 57 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index b60ddc41d783..4435d8b32904 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -141,18 +141,25 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, | |||
141 | * Compare 2 name strings, return 0 if they match, otherwise non-zero. | 141 | * Compare 2 name strings, return 0 if they match, otherwise non-zero. |
142 | * The strings are both count bytes long, and count is non-zero. | 142 | * The strings are both count bytes long, and count is non-zero. |
143 | */ | 143 | */ |
144 | static inline int dentry_cmp(const unsigned char *cs, size_t scount, | ||
145 | const unsigned char *ct, size_t tcount) | ||
146 | { | ||
147 | #ifdef CONFIG_DCACHE_WORD_ACCESS | 144 | #ifdef CONFIG_DCACHE_WORD_ACCESS |
148 | unsigned long a,b,mask; | ||
149 | 145 | ||
150 | if (unlikely(scount != tcount)) | 146 | #include <asm/word-at-a-time.h> |
151 | return 1; | 147 | /* |
148 | * NOTE! 'cs' and 'scount' come from a dentry, so it has a | ||
149 | * aligned allocation for this particular component. We don't | ||
150 | * strictly need the load_unaligned_zeropad() safety, but it | ||
151 | * doesn't hurt either. | ||
152 | * | ||
153 | * In contrast, 'ct' and 'tcount' can be from a pathname, and do | ||
154 | * need the careful unaligned handling. | ||
155 | */ | ||
156 | static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) | ||
157 | { | ||
158 | unsigned long a,b,mask; | ||
152 | 159 | ||
153 | for (;;) { | 160 | for (;;) { |
154 | a = *(unsigned long *)cs; | 161 | a = *(unsigned long *)cs; |
155 | b = *(unsigned long *)ct; | 162 | b = load_unaligned_zeropad(ct); |
156 | if (tcount < sizeof(unsigned long)) | 163 | if (tcount < sizeof(unsigned long)) |
157 | break; | 164 | break; |
158 | if (unlikely(a != b)) | 165 | if (unlikely(a != b)) |
@@ -165,10 +172,12 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, | |||
165 | } | 172 | } |
166 | mask = ~(~0ul << tcount*8); | 173 | mask = ~(~0ul << tcount*8); |
167 | return unlikely(!!((a ^ b) & mask)); | 174 | return unlikely(!!((a ^ b) & mask)); |
175 | } | ||
176 | |||
168 | #else | 177 | #else |
169 | if (scount != tcount) | ||
170 | return 1; | ||
171 | 178 | ||
179 | static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount) | ||
180 | { | ||
172 | do { | 181 | do { |
173 | if (*cs != *ct) | 182 | if (*cs != *ct) |
174 | return 1; | 183 | return 1; |
@@ -177,7 +186,32 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, | |||
177 | tcount--; | 186 | tcount--; |
178 | } while (tcount); | 187 | } while (tcount); |
179 | return 0; | 188 | return 0; |
189 | } | ||
190 | |||
180 | #endif | 191 | #endif |
192 | |||
193 | static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount) | ||
194 | { | ||
195 | const unsigned char *cs; | ||
196 | /* | ||
197 | * Be careful about RCU walk racing with rename: | ||
198 | * use ACCESS_ONCE to fetch the name pointer. | ||
199 | * | ||
200 | * NOTE! Even if a rename will mean that the length | ||
201 | * was not loaded atomically, we don't care. The | ||
202 | * RCU walk will check the sequence count eventually, | ||
203 | * and catch it. And we won't overrun the buffer, | ||
204 | * because we're reading the name pointer atomically, | ||
205 | * and a dentry name is guaranteed to be properly | ||
206 | * terminated with a NUL byte. | ||
207 | * | ||
208 | * End result: even if 'len' is wrong, we'll exit | ||
209 | * early because the data cannot match (there can | ||
210 | * be no NUL in the ct/tcount data) | ||
211 | */ | ||
212 | cs = ACCESS_ONCE(dentry->d_name.name); | ||
213 | smp_read_barrier_depends(); | ||
214 | return dentry_string_cmp(cs, ct, tcount); | ||
181 | } | 215 | } |
182 | 216 | ||
183 | static void __d_free(struct rcu_head *head) | 217 | static void __d_free(struct rcu_head *head) |
@@ -1240,6 +1274,13 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | |||
1240 | if (!dentry) | 1274 | if (!dentry) |
1241 | return NULL; | 1275 | return NULL; |
1242 | 1276 | ||
1277 | /* | ||
1278 | * We guarantee that the inline name is always NUL-terminated. | ||
1279 | * This way the memcpy() done by the name switching in rename | ||
1280 | * will still always have a NUL at the end, even if we might | ||
1281 | * be overwriting an internal NUL character | ||
1282 | */ | ||
1283 | dentry->d_iname[DNAME_INLINE_LEN-1] = 0; | ||
1243 | if (name->len > DNAME_INLINE_LEN-1) { | 1284 | if (name->len > DNAME_INLINE_LEN-1) { |
1244 | dname = kmalloc(name->len + 1, GFP_KERNEL); | 1285 | dname = kmalloc(name->len + 1, GFP_KERNEL); |
1245 | if (!dname) { | 1286 | if (!dname) { |
@@ -1249,13 +1290,16 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | |||
1249 | } else { | 1290 | } else { |
1250 | dname = dentry->d_iname; | 1291 | dname = dentry->d_iname; |
1251 | } | 1292 | } |
1252 | dentry->d_name.name = dname; | ||
1253 | 1293 | ||
1254 | dentry->d_name.len = name->len; | 1294 | dentry->d_name.len = name->len; |
1255 | dentry->d_name.hash = name->hash; | 1295 | dentry->d_name.hash = name->hash; |
1256 | memcpy(dname, name->name, name->len); | 1296 | memcpy(dname, name->name, name->len); |
1257 | dname[name->len] = 0; | 1297 | dname[name->len] = 0; |
1258 | 1298 | ||
1299 | /* Make sure we always see the terminating NUL character */ | ||
1300 | smp_wmb(); | ||
1301 | dentry->d_name.name = dname; | ||
1302 | |||
1259 | dentry->d_count = 1; | 1303 | dentry->d_count = 1; |
1260 | dentry->d_flags = 0; | 1304 | dentry->d_flags = 0; |
1261 | spin_lock_init(&dentry->d_lock); | 1305 | spin_lock_init(&dentry->d_lock); |
@@ -1421,18 +1465,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, | |||
1421 | } | 1465 | } |
1422 | 1466 | ||
1423 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { | 1467 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { |
1424 | struct qstr *qstr = &alias->d_name; | ||
1425 | |||
1426 | /* | 1468 | /* |
1427 | * Don't need alias->d_lock here, because aliases with | 1469 | * Don't need alias->d_lock here, because aliases with |
1428 | * d_parent == entry->d_parent are not subject to name or | 1470 | * d_parent == entry->d_parent are not subject to name or |
1429 | * parent changes, because the parent inode i_mutex is held. | 1471 | * parent changes, because the parent inode i_mutex is held. |
1430 | */ | 1472 | */ |
1431 | if (qstr->hash != hash) | 1473 | if (alias->d_name.hash != hash) |
1432 | continue; | 1474 | continue; |
1433 | if (alias->d_parent != entry->d_parent) | 1475 | if (alias->d_parent != entry->d_parent) |
1434 | continue; | 1476 | continue; |
1435 | if (dentry_cmp(qstr->name, qstr->len, name, len)) | 1477 | if (alias->d_name.len != len) |
1478 | continue; | ||
1479 | if (dentry_cmp(alias, name, len)) | ||
1436 | continue; | 1480 | continue; |
1437 | __dget(alias); | 1481 | __dget(alias); |
1438 | return alias; | 1482 | return alias; |
@@ -1471,7 +1515,7 @@ struct dentry *d_make_root(struct inode *root_inode) | |||
1471 | struct dentry *res = NULL; | 1515 | struct dentry *res = NULL; |
1472 | 1516 | ||
1473 | if (root_inode) { | 1517 | if (root_inode) { |
1474 | static const struct qstr name = { .name = "/", .len = 1 }; | 1518 | static const struct qstr name = QSTR_INIT("/", 1); |
1475 | 1519 | ||
1476 | res = __d_alloc(root_inode->i_sb, &name); | 1520 | res = __d_alloc(root_inode->i_sb, &name); |
1477 | if (res) | 1521 | if (res) |
@@ -1709,6 +1753,48 @@ err_out: | |||
1709 | } | 1753 | } |
1710 | EXPORT_SYMBOL(d_add_ci); | 1754 | EXPORT_SYMBOL(d_add_ci); |
1711 | 1755 | ||
1756 | /* | ||
1757 | * Do the slow-case of the dentry name compare. | ||
1758 | * | ||
1759 | * Unlike the dentry_cmp() function, we need to atomically | ||
1760 | * load the name, length and inode information, so that the | ||
1761 | * filesystem can rely on them, and can use the 'name' and | ||
1762 | * 'len' information without worrying about walking off the | ||
1763 | * end of memory etc. | ||
1764 | * | ||
1765 | * Thus the read_seqcount_retry() and the "duplicate" info | ||
1766 | * in arguments (the low-level filesystem should not look | ||
1767 | * at the dentry inode or name contents directly, since | ||
1768 | * rename can change them while we're in RCU mode). | ||
1769 | */ | ||
1770 | enum slow_d_compare { | ||
1771 | D_COMP_OK, | ||
1772 | D_COMP_NOMATCH, | ||
1773 | D_COMP_SEQRETRY, | ||
1774 | }; | ||
1775 | |||
1776 | static noinline enum slow_d_compare slow_dentry_cmp( | ||
1777 | const struct dentry *parent, | ||
1778 | struct inode *inode, | ||
1779 | struct dentry *dentry, | ||
1780 | unsigned int seq, | ||
1781 | const struct qstr *name) | ||
1782 | { | ||
1783 | int tlen = dentry->d_name.len; | ||
1784 | const char *tname = dentry->d_name.name; | ||
1785 | struct inode *i = dentry->d_inode; | ||
1786 | |||
1787 | if (read_seqcount_retry(&dentry->d_seq, seq)) { | ||
1788 | cpu_relax(); | ||
1789 | return D_COMP_SEQRETRY; | ||
1790 | } | ||
1791 | if (parent->d_op->d_compare(parent, inode, | ||
1792 | dentry, i, | ||
1793 | tlen, tname, name)) | ||
1794 | return D_COMP_NOMATCH; | ||
1795 | return D_COMP_OK; | ||
1796 | } | ||
1797 | |||
1712 | /** | 1798 | /** |
1713 | * __d_lookup_rcu - search for a dentry (racy, store-free) | 1799 | * __d_lookup_rcu - search for a dentry (racy, store-free) |
1714 | * @parent: parent dentry | 1800 | * @parent: parent dentry |
@@ -1735,15 +1821,17 @@ EXPORT_SYMBOL(d_add_ci); | |||
1735 | * the returned dentry, so long as its parent's seqlock is checked after the | 1821 | * the returned dentry, so long as its parent's seqlock is checked after the |
1736 | * child is looked up. Thus, an interlocking stepping of sequence lock checks | 1822 | * child is looked up. Thus, an interlocking stepping of sequence lock checks |
1737 | * is formed, giving integrity down the path walk. | 1823 | * is formed, giving integrity down the path walk. |
1824 | * | ||
1825 | * NOTE! The caller *has* to check the resulting dentry against the sequence | ||
1826 | * number we've returned before using any of the resulting dentry state! | ||
1738 | */ | 1827 | */ |
1739 | struct dentry *__d_lookup_rcu(const struct dentry *parent, | 1828 | struct dentry *__d_lookup_rcu(const struct dentry *parent, |
1740 | const struct qstr *name, | 1829 | const struct qstr *name, |
1741 | unsigned *seqp, struct inode **inode) | 1830 | unsigned *seqp, struct inode *inode) |
1742 | { | 1831 | { |
1743 | unsigned int len = name->len; | 1832 | u64 hashlen = name->hash_len; |
1744 | unsigned int hash = name->hash; | ||
1745 | const unsigned char *str = name->name; | 1833 | const unsigned char *str = name->name; |
1746 | struct hlist_bl_head *b = d_hash(parent, hash); | 1834 | struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen)); |
1747 | struct hlist_bl_node *node; | 1835 | struct hlist_bl_node *node; |
1748 | struct dentry *dentry; | 1836 | struct dentry *dentry; |
1749 | 1837 | ||
@@ -1769,49 +1857,47 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent, | |||
1769 | */ | 1857 | */ |
1770 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { | 1858 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { |
1771 | unsigned seq; | 1859 | unsigned seq; |
1772 | struct inode *i; | ||
1773 | const char *tname; | ||
1774 | int tlen; | ||
1775 | |||
1776 | if (dentry->d_name.hash != hash) | ||
1777 | continue; | ||
1778 | 1860 | ||
1779 | seqretry: | 1861 | seqretry: |
1780 | seq = read_seqcount_begin(&dentry->d_seq); | 1862 | /* |
1863 | * The dentry sequence count protects us from concurrent | ||
1864 | * renames, and thus protects inode, parent and name fields. | ||
1865 | * | ||
1866 | * The caller must perform a seqcount check in order | ||
1867 | * to do anything useful with the returned dentry, | ||
1868 | * including using the 'd_inode' pointer. | ||
1869 | * | ||
1870 | * NOTE! We do a "raw" seqcount_begin here. That means that | ||
1871 | * we don't wait for the sequence count to stabilize if it | ||
1872 | * is in the middle of a sequence change. If we do the slow | ||
1873 | * dentry compare, we will do seqretries until it is stable, | ||
1874 | * and if we end up with a successful lookup, we actually | ||
1875 | * want to exit RCU lookup anyway. | ||
1876 | */ | ||
1877 | seq = raw_seqcount_begin(&dentry->d_seq); | ||
1781 | if (dentry->d_parent != parent) | 1878 | if (dentry->d_parent != parent) |
1782 | continue; | 1879 | continue; |
1783 | if (d_unhashed(dentry)) | 1880 | if (d_unhashed(dentry)) |
1784 | continue; | 1881 | continue; |
1785 | tlen = dentry->d_name.len; | 1882 | *seqp = seq; |
1786 | tname = dentry->d_name.name; | 1883 | |
1787 | i = dentry->d_inode; | ||
1788 | prefetch(tname); | ||
1789 | /* | ||
1790 | * This seqcount check is required to ensure name and | ||
1791 | * len are loaded atomically, so as not to walk off the | ||
1792 | * edge of memory when walking. If we could load this | ||
1793 | * atomically some other way, we could drop this check. | ||
1794 | */ | ||
1795 | if (read_seqcount_retry(&dentry->d_seq, seq)) | ||
1796 | goto seqretry; | ||
1797 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { | 1884 | if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { |
1798 | if (parent->d_op->d_compare(parent, *inode, | 1885 | if (dentry->d_name.hash != hashlen_hash(hashlen)) |
1799 | dentry, i, | ||
1800 | tlen, tname, name)) | ||
1801 | continue; | 1886 | continue; |
1802 | } else { | 1887 | switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) { |
1803 | if (dentry_cmp(tname, tlen, str, len)) | 1888 | case D_COMP_OK: |
1889 | return dentry; | ||
1890 | case D_COMP_NOMATCH: | ||
1804 | continue; | 1891 | continue; |
1892 | default: | ||
1893 | goto seqretry; | ||
1894 | } | ||
1805 | } | 1895 | } |
1806 | /* | 1896 | |
1807 | * No extra seqcount check is required after the name | 1897 | if (dentry->d_name.hash_len != hashlen) |
1808 | * compare. The caller must perform a seqcount check in | 1898 | continue; |
1809 | * order to do anything useful with the returned dentry | 1899 | if (!dentry_cmp(dentry, str, hashlen_len(hashlen))) |
1810 | * anyway. | 1900 | return dentry; |
1811 | */ | ||
1812 | *seqp = seq; | ||
1813 | *inode = i; | ||
1814 | return dentry; | ||
1815 | } | 1901 | } |
1816 | return NULL; | 1902 | return NULL; |
1817 | } | 1903 | } |
@@ -1890,8 +1976,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) | |||
1890 | rcu_read_lock(); | 1976 | rcu_read_lock(); |
1891 | 1977 | ||
1892 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { | 1978 | hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { |
1893 | const char *tname; | ||
1894 | int tlen; | ||
1895 | 1979 | ||
1896 | if (dentry->d_name.hash != hash) | 1980 | if (dentry->d_name.hash != hash) |
1897 | continue; | 1981 | continue; |
@@ -1906,15 +1990,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name) | |||
1906 | * It is safe to compare names since d_move() cannot | 1990 | * It is safe to compare names since d_move() cannot |
1907 | * change the qstr (protected by d_lock). | 1991 | * change the qstr (protected by d_lock). |
1908 | */ | 1992 | */ |
1909 | tlen = dentry->d_name.len; | ||
1910 | tname = dentry->d_name.name; | ||
1911 | if (parent->d_flags & DCACHE_OP_COMPARE) { | 1993 | if (parent->d_flags & DCACHE_OP_COMPARE) { |
1994 | int tlen = dentry->d_name.len; | ||
1995 | const char *tname = dentry->d_name.name; | ||
1912 | if (parent->d_op->d_compare(parent, parent->d_inode, | 1996 | if (parent->d_op->d_compare(parent, parent->d_inode, |
1913 | dentry, dentry->d_inode, | 1997 | dentry, dentry->d_inode, |
1914 | tlen, tname, name)) | 1998 | tlen, tname, name)) |
1915 | goto next; | 1999 | goto next; |
1916 | } else { | 2000 | } else { |
1917 | if (dentry_cmp(tname, tlen, str, len)) | 2001 | if (dentry->d_name.len != len) |
2002 | goto next; | ||
2003 | if (dentry_cmp(dentry, str, len)) | ||
1918 | goto next; | 2004 | goto next; |
1919 | } | 2005 | } |
1920 | 2006 | ||
@@ -3007,6 +3093,7 @@ static void __init dcache_init_early(void) | |||
3007 | HASH_EARLY, | 3093 | HASH_EARLY, |
3008 | &d_hash_shift, | 3094 | &d_hash_shift, |
3009 | &d_hash_mask, | 3095 | &d_hash_mask, |
3096 | 0, | ||
3010 | 0); | 3097 | 0); |
3011 | 3098 | ||
3012 | for (loop = 0; loop < (1U << d_hash_shift); loop++) | 3099 | for (loop = 0; loop < (1U << d_hash_shift); loop++) |
@@ -3037,6 +3124,7 @@ static void __init dcache_init(void) | |||
3037 | 0, | 3124 | 0, |
3038 | &d_hash_shift, | 3125 | &d_hash_shift, |
3039 | &d_hash_mask, | 3126 | &d_hash_mask, |
3127 | 0, | ||
3040 | 0); | 3128 | 0); |
3041 | 3129 | ||
3042 | for (loop = 0; loop < (1U << d_hash_shift); loop++) | 3130 | for (loop = 0; loop < (1U << d_hash_shift); loop++) |