aboutsummaryrefslogtreecommitdiffstats
path: root/fs/dcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dcache.c')
-rw-r--r--fs/dcache.c202
1 files changed, 145 insertions, 57 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index b60ddc41d783..4435d8b32904 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -141,18 +141,25 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
141 * Compare 2 name strings, return 0 if they match, otherwise non-zero. 141 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
142 * The strings are both count bytes long, and count is non-zero. 142 * The strings are both count bytes long, and count is non-zero.
143 */ 143 */
144static inline int dentry_cmp(const unsigned char *cs, size_t scount,
145 const unsigned char *ct, size_t tcount)
146{
147#ifdef CONFIG_DCACHE_WORD_ACCESS 144#ifdef CONFIG_DCACHE_WORD_ACCESS
148 unsigned long a,b,mask;
149 145
150 if (unlikely(scount != tcount)) 146#include <asm/word-at-a-time.h>
151 return 1; 147/*
148 * NOTE! 'cs' and 'scount' come from a dentry, so it has a
149 * aligned allocation for this particular component. We don't
150 * strictly need the load_unaligned_zeropad() safety, but it
151 * doesn't hurt either.
152 *
153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
154 * need the careful unaligned handling.
155 */
156static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
157{
158 unsigned long a,b,mask;
152 159
153 for (;;) { 160 for (;;) {
154 a = *(unsigned long *)cs; 161 a = *(unsigned long *)cs;
155 b = *(unsigned long *)ct; 162 b = load_unaligned_zeropad(ct);
156 if (tcount < sizeof(unsigned long)) 163 if (tcount < sizeof(unsigned long))
157 break; 164 break;
158 if (unlikely(a != b)) 165 if (unlikely(a != b))
@@ -165,10 +172,12 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
165 } 172 }
166 mask = ~(~0ul << tcount*8); 173 mask = ~(~0ul << tcount*8);
167 return unlikely(!!((a ^ b) & mask)); 174 return unlikely(!!((a ^ b) & mask));
175}
176
168#else 177#else
169 if (scount != tcount)
170 return 1;
171 178
179static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
180{
172 do { 181 do {
173 if (*cs != *ct) 182 if (*cs != *ct)
174 return 1; 183 return 1;
@@ -177,7 +186,32 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
177 tcount--; 186 tcount--;
178 } while (tcount); 187 } while (tcount);
179 return 0; 188 return 0;
189}
190
180#endif 191#endif
192
193static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
194{
195 const unsigned char *cs;
196 /*
197 * Be careful about RCU walk racing with rename:
198 * use ACCESS_ONCE to fetch the name pointer.
199 *
200 * NOTE! Even if a rename will mean that the length
201 * was not loaded atomically, we don't care. The
202 * RCU walk will check the sequence count eventually,
203 * and catch it. And we won't overrun the buffer,
204 * because we're reading the name pointer atomically,
205 * and a dentry name is guaranteed to be properly
206 * terminated with a NUL byte.
207 *
208 * End result: even if 'len' is wrong, we'll exit
209 * early because the data cannot match (there can
210 * be no NUL in the ct/tcount data)
211 */
212 cs = ACCESS_ONCE(dentry->d_name.name);
213 smp_read_barrier_depends();
214 return dentry_string_cmp(cs, ct, tcount);
181} 215}
182 216
183static void __d_free(struct rcu_head *head) 217static void __d_free(struct rcu_head *head)
@@ -1240,6 +1274,13 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1240 if (!dentry) 1274 if (!dentry)
1241 return NULL; 1275 return NULL;
1242 1276
1277 /*
1278 * We guarantee that the inline name is always NUL-terminated.
1279 * This way the memcpy() done by the name switching in rename
1280 * will still always have a NUL at the end, even if we might
1281 * be overwriting an internal NUL character
1282 */
1283 dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1243 if (name->len > DNAME_INLINE_LEN-1) { 1284 if (name->len > DNAME_INLINE_LEN-1) {
1244 dname = kmalloc(name->len + 1, GFP_KERNEL); 1285 dname = kmalloc(name->len + 1, GFP_KERNEL);
1245 if (!dname) { 1286 if (!dname) {
@@ -1249,13 +1290,16 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1249 } else { 1290 } else {
1250 dname = dentry->d_iname; 1291 dname = dentry->d_iname;
1251 } 1292 }
1252 dentry->d_name.name = dname;
1253 1293
1254 dentry->d_name.len = name->len; 1294 dentry->d_name.len = name->len;
1255 dentry->d_name.hash = name->hash; 1295 dentry->d_name.hash = name->hash;
1256 memcpy(dname, name->name, name->len); 1296 memcpy(dname, name->name, name->len);
1257 dname[name->len] = 0; 1297 dname[name->len] = 0;
1258 1298
1299 /* Make sure we always see the terminating NUL character */
1300 smp_wmb();
1301 dentry->d_name.name = dname;
1302
1259 dentry->d_count = 1; 1303 dentry->d_count = 1;
1260 dentry->d_flags = 0; 1304 dentry->d_flags = 0;
1261 spin_lock_init(&dentry->d_lock); 1305 spin_lock_init(&dentry->d_lock);
@@ -1421,18 +1465,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1421 } 1465 }
1422 1466
1423 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1467 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1424 struct qstr *qstr = &alias->d_name;
1425
1426 /* 1468 /*
1427 * Don't need alias->d_lock here, because aliases with 1469 * Don't need alias->d_lock here, because aliases with
1428 * d_parent == entry->d_parent are not subject to name or 1470 * d_parent == entry->d_parent are not subject to name or
1429 * parent changes, because the parent inode i_mutex is held. 1471 * parent changes, because the parent inode i_mutex is held.
1430 */ 1472 */
1431 if (qstr->hash != hash) 1473 if (alias->d_name.hash != hash)
1432 continue; 1474 continue;
1433 if (alias->d_parent != entry->d_parent) 1475 if (alias->d_parent != entry->d_parent)
1434 continue; 1476 continue;
1435 if (dentry_cmp(qstr->name, qstr->len, name, len)) 1477 if (alias->d_name.len != len)
1478 continue;
1479 if (dentry_cmp(alias, name, len))
1436 continue; 1480 continue;
1437 __dget(alias); 1481 __dget(alias);
1438 return alias; 1482 return alias;
@@ -1471,7 +1515,7 @@ struct dentry *d_make_root(struct inode *root_inode)
1471 struct dentry *res = NULL; 1515 struct dentry *res = NULL;
1472 1516
1473 if (root_inode) { 1517 if (root_inode) {
1474 static const struct qstr name = { .name = "/", .len = 1 }; 1518 static const struct qstr name = QSTR_INIT("/", 1);
1475 1519
1476 res = __d_alloc(root_inode->i_sb, &name); 1520 res = __d_alloc(root_inode->i_sb, &name);
1477 if (res) 1521 if (res)
@@ -1709,6 +1753,48 @@ err_out:
1709} 1753}
1710EXPORT_SYMBOL(d_add_ci); 1754EXPORT_SYMBOL(d_add_ci);
1711 1755
1756/*
1757 * Do the slow-case of the dentry name compare.
1758 *
1759 * Unlike the dentry_cmp() function, we need to atomically
1760 * load the name, length and inode information, so that the
1761 * filesystem can rely on them, and can use the 'name' and
1762 * 'len' information without worrying about walking off the
1763 * end of memory etc.
1764 *
1765 * Thus the read_seqcount_retry() and the "duplicate" info
1766 * in arguments (the low-level filesystem should not look
1767 * at the dentry inode or name contents directly, since
1768 * rename can change them while we're in RCU mode).
1769 */
1770enum slow_d_compare {
1771 D_COMP_OK,
1772 D_COMP_NOMATCH,
1773 D_COMP_SEQRETRY,
1774};
1775
1776static noinline enum slow_d_compare slow_dentry_cmp(
1777 const struct dentry *parent,
1778 struct inode *inode,
1779 struct dentry *dentry,
1780 unsigned int seq,
1781 const struct qstr *name)
1782{
1783 int tlen = dentry->d_name.len;
1784 const char *tname = dentry->d_name.name;
1785 struct inode *i = dentry->d_inode;
1786
1787 if (read_seqcount_retry(&dentry->d_seq, seq)) {
1788 cpu_relax();
1789 return D_COMP_SEQRETRY;
1790 }
1791 if (parent->d_op->d_compare(parent, inode,
1792 dentry, i,
1793 tlen, tname, name))
1794 return D_COMP_NOMATCH;
1795 return D_COMP_OK;
1796}
1797
1712/** 1798/**
1713 * __d_lookup_rcu - search for a dentry (racy, store-free) 1799 * __d_lookup_rcu - search for a dentry (racy, store-free)
1714 * @parent: parent dentry 1800 * @parent: parent dentry
@@ -1735,15 +1821,17 @@ EXPORT_SYMBOL(d_add_ci);
1735 * the returned dentry, so long as its parent's seqlock is checked after the 1821 * the returned dentry, so long as its parent's seqlock is checked after the
1736 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1822 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1737 * is formed, giving integrity down the path walk. 1823 * is formed, giving integrity down the path walk.
1824 *
1825 * NOTE! The caller *has* to check the resulting dentry against the sequence
1826 * number we've returned before using any of the resulting dentry state!
1738 */ 1827 */
1739struct dentry *__d_lookup_rcu(const struct dentry *parent, 1828struct dentry *__d_lookup_rcu(const struct dentry *parent,
1740 const struct qstr *name, 1829 const struct qstr *name,
1741 unsigned *seqp, struct inode **inode) 1830 unsigned *seqp, struct inode *inode)
1742{ 1831{
1743 unsigned int len = name->len; 1832 u64 hashlen = name->hash_len;
1744 unsigned int hash = name->hash;
1745 const unsigned char *str = name->name; 1833 const unsigned char *str = name->name;
1746 struct hlist_bl_head *b = d_hash(parent, hash); 1834 struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
1747 struct hlist_bl_node *node; 1835 struct hlist_bl_node *node;
1748 struct dentry *dentry; 1836 struct dentry *dentry;
1749 1837
@@ -1769,49 +1857,47 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
1769 */ 1857 */
1770 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1858 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1771 unsigned seq; 1859 unsigned seq;
1772 struct inode *i;
1773 const char *tname;
1774 int tlen;
1775
1776 if (dentry->d_name.hash != hash)
1777 continue;
1778 1860
1779seqretry: 1861seqretry:
1780 seq = read_seqcount_begin(&dentry->d_seq); 1862 /*
1863 * The dentry sequence count protects us from concurrent
1864 * renames, and thus protects inode, parent and name fields.
1865 *
1866 * The caller must perform a seqcount check in order
1867 * to do anything useful with the returned dentry,
1868 * including using the 'd_inode' pointer.
1869 *
1870 * NOTE! We do a "raw" seqcount_begin here. That means that
1871 * we don't wait for the sequence count to stabilize if it
1872 * is in the middle of a sequence change. If we do the slow
1873 * dentry compare, we will do seqretries until it is stable,
1874 * and if we end up with a successful lookup, we actually
1875 * want to exit RCU lookup anyway.
1876 */
1877 seq = raw_seqcount_begin(&dentry->d_seq);
1781 if (dentry->d_parent != parent) 1878 if (dentry->d_parent != parent)
1782 continue; 1879 continue;
1783 if (d_unhashed(dentry)) 1880 if (d_unhashed(dentry))
1784 continue; 1881 continue;
1785 tlen = dentry->d_name.len; 1882 *seqp = seq;
1786 tname = dentry->d_name.name; 1883
1787 i = dentry->d_inode;
1788 prefetch(tname);
1789 /*
1790 * This seqcount check is required to ensure name and
1791 * len are loaded atomically, so as not to walk off the
1792 * edge of memory when walking. If we could load this
1793 * atomically some other way, we could drop this check.
1794 */
1795 if (read_seqcount_retry(&dentry->d_seq, seq))
1796 goto seqretry;
1797 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1884 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1798 if (parent->d_op->d_compare(parent, *inode, 1885 if (dentry->d_name.hash != hashlen_hash(hashlen))
1799 dentry, i,
1800 tlen, tname, name))
1801 continue; 1886 continue;
1802 } else { 1887 switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
1803 if (dentry_cmp(tname, tlen, str, len)) 1888 case D_COMP_OK:
1889 return dentry;
1890 case D_COMP_NOMATCH:
1804 continue; 1891 continue;
1892 default:
1893 goto seqretry;
1894 }
1805 } 1895 }
1806 /* 1896
1807 * No extra seqcount check is required after the name 1897 if (dentry->d_name.hash_len != hashlen)
1808 * compare. The caller must perform a seqcount check in 1898 continue;
1809 * order to do anything useful with the returned dentry 1899 if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
1810 * anyway. 1900 return dentry;
1811 */
1812 *seqp = seq;
1813 *inode = i;
1814 return dentry;
1815 } 1901 }
1816 return NULL; 1902 return NULL;
1817} 1903}
@@ -1890,8 +1976,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1890 rcu_read_lock(); 1976 rcu_read_lock();
1891 1977
1892 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1978 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1893 const char *tname;
1894 int tlen;
1895 1979
1896 if (dentry->d_name.hash != hash) 1980 if (dentry->d_name.hash != hash)
1897 continue; 1981 continue;
@@ -1906,15 +1990,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1906 * It is safe to compare names since d_move() cannot 1990 * It is safe to compare names since d_move() cannot
1907 * change the qstr (protected by d_lock). 1991 * change the qstr (protected by d_lock).
1908 */ 1992 */
1909 tlen = dentry->d_name.len;
1910 tname = dentry->d_name.name;
1911 if (parent->d_flags & DCACHE_OP_COMPARE) { 1993 if (parent->d_flags & DCACHE_OP_COMPARE) {
1994 int tlen = dentry->d_name.len;
1995 const char *tname = dentry->d_name.name;
1912 if (parent->d_op->d_compare(parent, parent->d_inode, 1996 if (parent->d_op->d_compare(parent, parent->d_inode,
1913 dentry, dentry->d_inode, 1997 dentry, dentry->d_inode,
1914 tlen, tname, name)) 1998 tlen, tname, name))
1915 goto next; 1999 goto next;
1916 } else { 2000 } else {
1917 if (dentry_cmp(tname, tlen, str, len)) 2001 if (dentry->d_name.len != len)
2002 goto next;
2003 if (dentry_cmp(dentry, str, len))
1918 goto next; 2004 goto next;
1919 } 2005 }
1920 2006
@@ -3007,6 +3093,7 @@ static void __init dcache_init_early(void)
3007 HASH_EARLY, 3093 HASH_EARLY,
3008 &d_hash_shift, 3094 &d_hash_shift,
3009 &d_hash_mask, 3095 &d_hash_mask,
3096 0,
3010 0); 3097 0);
3011 3098
3012 for (loop = 0; loop < (1U << d_hash_shift); loop++) 3099 for (loop = 0; loop < (1U << d_hash_shift); loop++)
@@ -3037,6 +3124,7 @@ static void __init dcache_init(void)
3037 0, 3124 0,
3038 &d_hash_shift, 3125 &d_hash_shift,
3039 &d_hash_mask, 3126 &d_hash_mask,
3127 0,
3040 0); 3128 0);
3041 3129
3042 for (loop = 0; loop < (1U << d_hash_shift); loop++) 3130 for (loop = 0; loop < (1U << d_hash_shift); loop++)