aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/dcache.c152
-rw-r--r--fs/namei.c19
-rw-r--r--include/linux/dcache.h2
3 files changed, 121 insertions, 52 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index b80531c91779..539943eb442c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -153,16 +153,33 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do 153 * In contrast, 'ct' and 'tcount' can be from a pathname, and do
154 * need the careful unaligned handling. 154 * need the careful unaligned handling.
155 */ 155 */
156static inline int dentry_cmp(const unsigned char *cs, size_t scount, 156static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
157 const unsigned char *ct, size_t tcount)
158{ 157{
159 unsigned long a,b,mask; 158 unsigned long a,b,mask;
159 const unsigned char *cs;
160 160
161 if (unlikely(scount != tcount)) 161 if (unlikely(dentry->d_name.len != tcount))
162 return 1; 162 return 1;
163 /*
164 * Be careful about RCU walk racing with rename:
165 * use ACCESS_ONCE to fetch the name pointer.
166 *
167 * NOTE! Even if a rename will mean that the length
168 * was not loaded atomically, we don't care. The
169 * RCU walk will check the sequence count eventually,
170 * and catch it. And we won't overrun the buffer,
171 * because we're reading the name pointer atomically,
172 * and a dentry name is guaranteed to be properly
173 * terminated with a NUL byte.
174 *
175 * End result: even if 'len' is wrong, we'll exit
176 * early because the data cannot match (there can
177 * be no NUL in the ct/tcount data)
178 */
179 cs = ACCESS_ONCE(dentry->d_name.name);
163 180
164 for (;;) { 181 for (;;) {
165 a = load_unaligned_zeropad(cs); 182 a = *(unsigned long *)cs;
166 b = load_unaligned_zeropad(ct); 183 b = load_unaligned_zeropad(ct);
167 if (tcount < sizeof(unsigned long)) 184 if (tcount < sizeof(unsigned long))
168 break; 185 break;
@@ -180,10 +197,11 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
180 197
181#else 198#else
182 199
183static inline int dentry_cmp(const unsigned char *cs, size_t scount, 200static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
184 const unsigned char *ct, size_t tcount)
185{ 201{
186 if (scount != tcount) 202 const unsigned char *cs = dentry->d_name.name;
203
204 if (dentry->d_name.len != tcount)
187 return 1; 205 return 1;
188 206
189 do { 207 do {
@@ -1439,18 +1457,16 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1439 } 1457 }
1440 1458
1441 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1459 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1442 struct qstr *qstr = &alias->d_name;
1443
1444 /* 1460 /*
1445 * Don't need alias->d_lock here, because aliases with 1461 * Don't need alias->d_lock here, because aliases with
1446 * d_parent == entry->d_parent are not subject to name or 1462 * d_parent == entry->d_parent are not subject to name or
1447 * parent changes, because the parent inode i_mutex is held. 1463 * parent changes, because the parent inode i_mutex is held.
1448 */ 1464 */
1449 if (qstr->hash != hash) 1465 if (alias->d_name.hash != hash)
1450 continue; 1466 continue;
1451 if (alias->d_parent != entry->d_parent) 1467 if (alias->d_parent != entry->d_parent)
1452 continue; 1468 continue;
1453 if (dentry_cmp(qstr->name, qstr->len, name, len)) 1469 if (dentry_cmp(alias, name, len))
1454 continue; 1470 continue;
1455 __dget(alias); 1471 __dget(alias);
1456 return alias; 1472 return alias;
@@ -1727,6 +1743,48 @@ err_out:
1727} 1743}
1728EXPORT_SYMBOL(d_add_ci); 1744EXPORT_SYMBOL(d_add_ci);
1729 1745
1746/*
1747 * Do the slow-case of the dentry name compare.
1748 *
1749 * Unlike the dentry_cmp() function, we need to atomically
1750 * load the name, length and inode information, so that the
1751 * filesystem can rely on them, and can use the 'name' and
1752 * 'len' information without worrying about walking off the
1753 * end of memory etc.
1754 *
1755 * Thus the read_seqcount_retry() and the "duplicate" info
1756 * in arguments (the low-level filesystem should not look
1757 * at the dentry inode or name contents directly, since
1758 * rename can change them while we're in RCU mode).
1759 */
1760enum slow_d_compare {
1761 D_COMP_OK,
1762 D_COMP_NOMATCH,
1763 D_COMP_SEQRETRY,
1764};
1765
1766static noinline enum slow_d_compare slow_dentry_cmp(
1767 const struct dentry *parent,
1768 struct inode *inode,
1769 struct dentry *dentry,
1770 unsigned int seq,
1771 const struct qstr *name)
1772{
1773 int tlen = dentry->d_name.len;
1774 const char *tname = dentry->d_name.name;
1775 struct inode *i = dentry->d_inode;
1776
1777 if (read_seqcount_retry(&dentry->d_seq, seq)) {
1778 cpu_relax();
1779 return D_COMP_SEQRETRY;
1780 }
1781 if (parent->d_op->d_compare(parent, inode,
1782 dentry, i,
1783 tlen, tname, name))
1784 return D_COMP_NOMATCH;
1785 return D_COMP_OK;
1786}
1787
1730/** 1788/**
1731 * __d_lookup_rcu - search for a dentry (racy, store-free) 1789 * __d_lookup_rcu - search for a dentry (racy, store-free)
1732 * @parent: parent dentry 1790 * @parent: parent dentry
@@ -1753,10 +1811,13 @@ EXPORT_SYMBOL(d_add_ci);
1753 * the returned dentry, so long as its parent's seqlock is checked after the 1811 * the returned dentry, so long as its parent's seqlock is checked after the
1754 * child is looked up. Thus, an interlocking stepping of sequence lock checks 1812 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1755 * is formed, giving integrity down the path walk. 1813 * is formed, giving integrity down the path walk.
1814 *
1815 * NOTE! The caller *has* to check the resulting dentry against the sequence
1816 * number we've returned before using any of the resulting dentry state!
1756 */ 1817 */
1757struct dentry *__d_lookup_rcu(const struct dentry *parent, 1818struct dentry *__d_lookup_rcu(const struct dentry *parent,
1758 const struct qstr *name, 1819 const struct qstr *name,
1759 unsigned *seqp, struct inode **inode) 1820 unsigned *seqp, struct inode *inode)
1760{ 1821{
1761 unsigned int len = name->len; 1822 unsigned int len = name->len;
1762 unsigned int hash = name->hash; 1823 unsigned int hash = name->hash;
@@ -1787,49 +1848,46 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
1787 */ 1848 */
1788 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1849 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1789 unsigned seq; 1850 unsigned seq;
1790 struct inode *i;
1791 const char *tname;
1792 int tlen;
1793 1851
1794 if (dentry->d_name.hash != hash) 1852 if (dentry->d_name.hash != hash)
1795 continue; 1853 continue;
1796 1854
1797seqretry: 1855seqretry:
1798 seq = read_seqcount_begin(&dentry->d_seq); 1856 /*
1857 * The dentry sequence count protects us from concurrent
1858 * renames, and thus protects inode, parent and name fields.
1859 *
1860 * The caller must perform a seqcount check in order
1861 * to do anything useful with the returned dentry,
1862 * including using the 'd_inode' pointer.
1863 *
1864 * NOTE! We do a "raw" seqcount_begin here. That means that
1865 * we don't wait for the sequence count to stabilize if it
1866 * is in the middle of a sequence change. If we do the slow
1867 * dentry compare, we will do seqretries until it is stable,
1868 * and if we end up with a successful lookup, we actually
1869 * want to exit RCU lookup anyway.
1870 */
1871 seq = raw_seqcount_begin(&dentry->d_seq);
1799 if (dentry->d_parent != parent) 1872 if (dentry->d_parent != parent)
1800 continue; 1873 continue;
1801 if (d_unhashed(dentry)) 1874 if (d_unhashed(dentry))
1802 continue; 1875 continue;
1803 tlen = dentry->d_name.len; 1876 *seqp = seq;
1804 tname = dentry->d_name.name; 1877
1805 i = dentry->d_inode;
1806 prefetch(tname);
1807 /*
1808 * This seqcount check is required to ensure name and
1809 * len are loaded atomically, so as not to walk off the
1810 * edge of memory when walking. If we could load this
1811 * atomically some other way, we could drop this check.
1812 */
1813 if (read_seqcount_retry(&dentry->d_seq, seq))
1814 goto seqretry;
1815 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) { 1878 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1816 if (parent->d_op->d_compare(parent, *inode, 1879 switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
1817 dentry, i, 1880 case D_COMP_OK:
1818 tlen, tname, name)) 1881 return dentry;
1819 continue; 1882 case D_COMP_NOMATCH:
1820 } else {
1821 if (dentry_cmp(tname, tlen, str, len))
1822 continue; 1883 continue;
1884 default:
1885 goto seqretry;
1886 }
1823 } 1887 }
1824 /* 1888
1825 * No extra seqcount check is required after the name 1889 if (!dentry_cmp(dentry, str, len))
1826 * compare. The caller must perform a seqcount check in 1890 return dentry;
1827 * order to do anything useful with the returned dentry
1828 * anyway.
1829 */
1830 *seqp = seq;
1831 *inode = i;
1832 return dentry;
1833 } 1891 }
1834 return NULL; 1892 return NULL;
1835} 1893}
@@ -1908,8 +1966,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1908 rcu_read_lock(); 1966 rcu_read_lock();
1909 1967
1910 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) { 1968 hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
1911 const char *tname;
1912 int tlen;
1913 1969
1914 if (dentry->d_name.hash != hash) 1970 if (dentry->d_name.hash != hash)
1915 continue; 1971 continue;
@@ -1924,15 +1980,15 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1924 * It is safe to compare names since d_move() cannot 1980 * It is safe to compare names since d_move() cannot
1925 * change the qstr (protected by d_lock). 1981 * change the qstr (protected by d_lock).
1926 */ 1982 */
1927 tlen = dentry->d_name.len;
1928 tname = dentry->d_name.name;
1929 if (parent->d_flags & DCACHE_OP_COMPARE) { 1983 if (parent->d_flags & DCACHE_OP_COMPARE) {
1984 int tlen = dentry->d_name.len;
1985 const char *tname = dentry->d_name.name;
1930 if (parent->d_op->d_compare(parent, parent->d_inode, 1986 if (parent->d_op->d_compare(parent, parent->d_inode,
1931 dentry, dentry->d_inode, 1987 dentry, dentry->d_inode,
1932 tlen, tname, name)) 1988 tlen, tname, name))
1933 goto next; 1989 goto next;
1934 } else { 1990 } else {
1935 if (dentry_cmp(tname, tlen, str, len)) 1991 if (dentry_cmp(dentry, str, len))
1936 goto next; 1992 goto next;
1937 } 1993 }
1938 1994
diff --git a/fs/namei.c b/fs/namei.c
index c42791914f82..46bd0045575d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1154,12 +1154,25 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1154 */ 1154 */
1155 if (nd->flags & LOOKUP_RCU) { 1155 if (nd->flags & LOOKUP_RCU) {
1156 unsigned seq; 1156 unsigned seq;
1157 *inode = nd->inode; 1157 dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
1158 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1159 if (!dentry) 1158 if (!dentry)
1160 goto unlazy; 1159 goto unlazy;
1161 1160
1162 /* Memory barrier in read_seqcount_begin of child is enough */ 1161 /*
1162 * This sequence count validates that the inode matches
1163 * the dentry name information from lookup.
1164 */
1165 *inode = dentry->d_inode;
1166 if (read_seqcount_retry(&dentry->d_seq, seq))
1167 return -ECHILD;
1168
1169 /*
1170 * This sequence count validates that the parent had no
1171 * changes while we did the lookup of the dentry above.
1172 *
1173 * The memory barrier in read_seqcount_begin of child is
1174 * enough, we can use __read_seqcount_retry here.
1175 */
1163 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1176 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1164 return -ECHILD; 1177 return -ECHILD;
1165 nd->seq = seq; 1178 nd->seq = seq;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7e11f1418203..8239f64d1c2e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -282,7 +282,7 @@ extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *);
282extern struct dentry *__d_lookup(struct dentry *, struct qstr *); 282extern struct dentry *__d_lookup(struct dentry *, struct qstr *);
283extern struct dentry *__d_lookup_rcu(const struct dentry *parent, 283extern struct dentry *__d_lookup_rcu(const struct dentry *parent,
284 const struct qstr *name, 284 const struct qstr *name,
285 unsigned *seq, struct inode **inode); 285 unsigned *seq, struct inode *inode);
286 286
287/** 287/**
288 * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok 288 * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok