aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuis Henriques <lhenriques@suse.com>2019-03-21 06:20:10 -0400
committerIlya Dryomov <idryomov@gmail.com>2019-05-07 13:22:36 -0400
commit0c44a8e0fc55f56a70f72e67d7cc5b9341dae7d1 (patch)
tree22ff1d529ddff80eb05f19809f37ceda342b900c
parent3886274adf34a4e38417772e3d1c0b213380004e (diff)
ceph: quota: fix quota subdir mounts
The CephFS kernel client does not enforce quotas set in a directory that isn't visible from the mount point. For example, given the path '/dir1/dir2', if quotas are set in 'dir1' and the filesystem is mounted with mount -t ceph <server>:<port>:/dir1/ /mnt then the client won't be able to access 'dir1' inode, even if 'dir2' belongs to a quota realm that points to it. This patch fixes this issue by simply doing an MDS LOOKUPINO operation for unknown inodes. Any inode reference obtained this way will be added to a list in ceph_mds_client, and will only be released when the filesystem is umounted. Link: https://tracker.ceph.com/issues/38482 Reported-by: Hendrik Peyerl <hpeyerl@plusline.net> Signed-off-by: Luis Henriques <lhenriques@suse.com> Reviewed-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/mds_client.h18
-rw-r--r--fs/ceph/quota.c177
-rw-r--r--fs/ceph/super.h1
4 files changed, 190 insertions, 10 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9049c2a3e972..5dee98b4cfde 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4125,6 +4125,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
4125 mdsc->max_sessions = 0; 4125 mdsc->max_sessions = 0;
4126 mdsc->stopping = 0; 4126 mdsc->stopping = 0;
4127 atomic64_set(&mdsc->quotarealms_count, 0); 4127 atomic64_set(&mdsc->quotarealms_count, 0);
4128 mdsc->quotarealms_inodes = RB_ROOT;
4129 mutex_init(&mdsc->quotarealms_inodes_mutex);
4128 mdsc->last_snap_seq = 0; 4130 mdsc->last_snap_seq = 0;
4129 init_rwsem(&mdsc->snap_rwsem); 4131 init_rwsem(&mdsc->snap_rwsem);
4130 mdsc->snap_realms = RB_ROOT; 4132 mdsc->snap_realms = RB_ROOT;
@@ -4216,6 +4218,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
4216 * their inode/dcache refs 4218 * their inode/dcache refs
4217 */ 4219 */
4218 ceph_msgr_flush(); 4220 ceph_msgr_flush();
4221
4222 ceph_cleanup_quotarealms_inodes(mdsc);
4219} 4223}
4220 4224
4221/* 4225/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 50385a481fdb..3f0029aa8a39 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -326,6 +326,18 @@ struct ceph_snapid_map {
326}; 326};
327 327
328/* 328/*
329 * node for list of quotarealm inodes that are not visible from the filesystem
330 * mountpoint, but required to handle, e.g. quotas.
331 */
332struct ceph_quotarealm_inode {
333 struct rb_node node;
334 u64 ino;
335 unsigned long timeout; /* last time a lookup failed for this inode */
336 struct mutex mutex;
337 struct inode *inode;
338};
339
340/*
329 * mds client state 341 * mds client state
330 */ 342 */
331struct ceph_mds_client { 343struct ceph_mds_client {
@@ -344,6 +356,12 @@ struct ceph_mds_client {
344 int stopping; /* true if shutting down */ 356 int stopping; /* true if shutting down */
345 357
346 atomic64_t quotarealms_count; /* # realms with quota */ 358 atomic64_t quotarealms_count; /* # realms with quota */
359 /*
360 * We keep a list of inodes we don't see in the mountpoint but that we
361 * need to track quota realms.
362 */
363 struct rb_root quotarealms_inodes;
364 struct mutex quotarealms_inodes_mutex;
347 365
348 /* 366 /*
349 * snap_rwsem will cover cap linkage into snaprealms, and 367 * snap_rwsem will cover cap linkage into snaprealms, and
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 9455d3aef0c3..c4522212872c 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
22static inline bool ceph_has_realms_with_quotas(struct inode *inode) 22static inline bool ceph_has_realms_with_quotas(struct inode *inode)
23{ 23{
24 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; 24 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
25 return atomic64_read(&mdsc->quotarealms_count) > 0; 25 struct super_block *sb = mdsc->fsc->sb;
26
27 if (atomic64_read(&mdsc->quotarealms_count) > 0)
28 return true;
29 /* if root is the real CephFS root, we don't have quota realms */
30 if (sb->s_root->d_inode &&
31 (sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
32 return false;
33 /* otherwise, we can't know for sure */
34 return true;
26} 35}
27 36
28void ceph_handle_quota(struct ceph_mds_client *mdsc, 37void ceph_handle_quota(struct ceph_mds_client *mdsc,
@@ -68,6 +77,108 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
68 iput(inode); 77 iput(inode);
69} 78}
70 79
80static struct ceph_quotarealm_inode *
81find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
82{
83 struct ceph_quotarealm_inode *qri = NULL;
84 struct rb_node **node, *parent = NULL;
85
86 mutex_lock(&mdsc->quotarealms_inodes_mutex);
87 node = &(mdsc->quotarealms_inodes.rb_node);
88 while (*node) {
89 parent = *node;
90 qri = container_of(*node, struct ceph_quotarealm_inode, node);
91
92 if (ino < qri->ino)
93 node = &((*node)->rb_left);
94 else if (ino > qri->ino)
95 node = &((*node)->rb_right);
96 else
97 break;
98 }
99 if (!qri || (qri->ino != ino)) {
100 /* Not found, create a new one and insert it */
101 qri = kmalloc(sizeof(*qri), GFP_KERNEL);
102 if (qri) {
103 qri->ino = ino;
104 qri->inode = NULL;
105 qri->timeout = 0;
106 mutex_init(&qri->mutex);
107 rb_link_node(&qri->node, parent, node);
108 rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
109 } else
110 pr_warn("Failed to alloc quotarealms_inode\n");
111 }
112 mutex_unlock(&mdsc->quotarealms_inodes_mutex);
113
114 return qri;
115}
116
117/*
118 * This function will try to lookup a realm inode which isn't visible in the
119 * filesystem mountpoint. A list of these kind of inodes (not visible) is
120 * maintained in the mdsc and freed only when the filesystem is umounted.
121 *
122 * Note that these inodes are kept in this list even if the lookup fails, which
123 * allows to prevent useless lookup requests.
124 */
125static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
126 struct super_block *sb,
127 struct ceph_snap_realm *realm)
128{
129 struct ceph_quotarealm_inode *qri;
130 struct inode *in;
131
132 qri = find_quotarealm_inode(mdsc, realm->ino);
133 if (!qri)
134 return NULL;
135
136 mutex_lock(&qri->mutex);
137 if (qri->inode) {
138 /* A request has already returned the inode */
139 mutex_unlock(&qri->mutex);
140 return qri->inode;
141 }
142 /* Check if this inode lookup has failed recently */
143 if (qri->timeout &&
144 time_before_eq(jiffies, qri->timeout)) {
145 mutex_unlock(&qri->mutex);
146 return NULL;
147 }
148 in = ceph_lookup_inode(sb, realm->ino);
149 if (IS_ERR(in)) {
150 pr_warn("Can't lookup inode %llx (err: %ld)\n",
151 realm->ino, PTR_ERR(in));
152 qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
153 } else {
154 qri->timeout = 0;
155 qri->inode = in;
156 }
157 mutex_unlock(&qri->mutex);
158
159 return in;
160}
161
162void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
163{
164 struct ceph_quotarealm_inode *qri;
165 struct rb_node *node;
166
167 /*
168 * It should now be safe to clean quotarealms_inode tree without holding
169 * mdsc->quotarealms_inodes_mutex...
170 */
171 mutex_lock(&mdsc->quotarealms_inodes_mutex);
172 while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
173 node = rb_first(&mdsc->quotarealms_inodes);
174 qri = rb_entry(node, struct ceph_quotarealm_inode, node);
175 rb_erase(node, &mdsc->quotarealms_inodes);
176 iput(qri->inode);
177 kfree(qri);
178 }
179 mutex_unlock(&mdsc->quotarealms_inodes_mutex);
180}
181
71/* 182/*
72 * This function walks through the snaprealm for an inode and returns the 183 * This function walks through the snaprealm for an inode and returns the
73 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files 184 * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
@@ -76,9 +187,15 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
76 * 187 *
77 * Note that the caller is responsible for calling ceph_put_snap_realm() on the 188 * Note that the caller is responsible for calling ceph_put_snap_realm() on the
78 * returned realm. 189 * returned realm.
190 *
191 * Callers of this function need to hold mdsc->snap_rwsem. However, if there's
192 * a need to do an inode lookup, this rwsem will be temporarily dropped. Hence
193 * the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
194 * this function will return -EAGAIN; otherwise, the snaprealms walk-through
195 * will be restarted.
79 */ 196 */
80static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, 197static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
81 struct inode *inode) 198 struct inode *inode, bool retry)
82{ 199{
83 struct ceph_inode_info *ci = NULL; 200 struct ceph_inode_info *ci = NULL;
84 struct ceph_snap_realm *realm, *next; 201 struct ceph_snap_realm *realm, *next;
@@ -88,6 +205,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
88 if (ceph_snap(inode) != CEPH_NOSNAP) 205 if (ceph_snap(inode) != CEPH_NOSNAP)
89 return NULL; 206 return NULL;
90 207
208restart:
91 realm = ceph_inode(inode)->i_snap_realm; 209 realm = ceph_inode(inode)->i_snap_realm;
92 if (realm) 210 if (realm)
93 ceph_get_snap_realm(mdsc, realm); 211 ceph_get_snap_realm(mdsc, realm);
@@ -95,11 +213,25 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
95 pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " 213 pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
96 "null i_snap_realm\n", ceph_vinop(inode)); 214 "null i_snap_realm\n", ceph_vinop(inode));
97 while (realm) { 215 while (realm) {
216 bool has_inode;
217
98 spin_lock(&realm->inodes_with_caps_lock); 218 spin_lock(&realm->inodes_with_caps_lock);
99 in = realm->inode ? igrab(realm->inode) : NULL; 219 has_inode = realm->inode;
220 in = has_inode ? igrab(realm->inode) : NULL;
100 spin_unlock(&realm->inodes_with_caps_lock); 221 spin_unlock(&realm->inodes_with_caps_lock);
101 if (!in) 222 if (has_inode && !in)
102 break; 223 break;
224 if (!in) {
225 up_read(&mdsc->snap_rwsem);
226 in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
227 down_read(&mdsc->snap_rwsem);
228 if (IS_ERR_OR_NULL(in))
229 break;
230 ceph_put_snap_realm(mdsc, realm);
231 if (!retry)
232 return ERR_PTR(-EAGAIN);
233 goto restart;
234 }
103 235
104 ci = ceph_inode(in); 236 ci = ceph_inode(in);
105 has_quota = __ceph_has_any_quota(ci); 237 has_quota = __ceph_has_any_quota(ci);
@@ -125,9 +257,22 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
125 struct ceph_snap_realm *old_realm, *new_realm; 257 struct ceph_snap_realm *old_realm, *new_realm;
126 bool is_same; 258 bool is_same;
127 259
260restart:
261 /*
262 * We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
263 * However, get_quota_realm may drop it temporarily. By setting the
264 * 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
265 * dropped and we can then restart the whole operation.
266 */
128 down_read(&mdsc->snap_rwsem); 267 down_read(&mdsc->snap_rwsem);
129 old_realm = get_quota_realm(mdsc, old); 268 old_realm = get_quota_realm(mdsc, old, true);
130 new_realm = get_quota_realm(mdsc, new); 269 new_realm = get_quota_realm(mdsc, new, false);
270 if (PTR_ERR(new_realm) == -EAGAIN) {
271 up_read(&mdsc->snap_rwsem);
272 if (old_realm)
273 ceph_put_snap_realm(mdsc, old_realm);
274 goto restart;
275 }
131 is_same = (old_realm == new_realm); 276 is_same = (old_realm == new_realm);
132 up_read(&mdsc->snap_rwsem); 277 up_read(&mdsc->snap_rwsem);
133 278
@@ -166,6 +311,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
166 return false; 311 return false;
167 312
168 down_read(&mdsc->snap_rwsem); 313 down_read(&mdsc->snap_rwsem);
314restart:
169 realm = ceph_inode(inode)->i_snap_realm; 315 realm = ceph_inode(inode)->i_snap_realm;
170 if (realm) 316 if (realm)
171 ceph_get_snap_realm(mdsc, realm); 317 ceph_get_snap_realm(mdsc, realm);
@@ -173,12 +319,23 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
173 pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " 319 pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
174 "null i_snap_realm\n", ceph_vinop(inode)); 320 "null i_snap_realm\n", ceph_vinop(inode));
175 while (realm) { 321 while (realm) {
322 bool has_inode;
323
176 spin_lock(&realm->inodes_with_caps_lock); 324 spin_lock(&realm->inodes_with_caps_lock);
177 in = realm->inode ? igrab(realm->inode) : NULL; 325 has_inode = realm->inode;
326 in = has_inode ? igrab(realm->inode) : NULL;
178 spin_unlock(&realm->inodes_with_caps_lock); 327 spin_unlock(&realm->inodes_with_caps_lock);
179 if (!in) 328 if (has_inode && !in)
180 break; 329 break;
181 330 if (!in) {
331 up_read(&mdsc->snap_rwsem);
332 in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
333 down_read(&mdsc->snap_rwsem);
334 if (IS_ERR_OR_NULL(in))
335 break;
336 ceph_put_snap_realm(mdsc, realm);
337 goto restart;
338 }
182 ci = ceph_inode(in); 339 ci = ceph_inode(in);
183 spin_lock(&ci->i_ceph_lock); 340 spin_lock(&ci->i_ceph_lock);
184 if (op == QUOTA_CHECK_MAX_FILES_OP) { 341 if (op == QUOTA_CHECK_MAX_FILES_OP) {
@@ -314,7 +471,7 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
314 bool is_updated = false; 471 bool is_updated = false;
315 472
316 down_read(&mdsc->snap_rwsem); 473 down_read(&mdsc->snap_rwsem);
317 realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); 474 realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
318 up_read(&mdsc->snap_rwsem); 475 up_read(&mdsc->snap_rwsem);
319 if (!realm) 476 if (!realm)
320 return false; 477 return false;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 976f200164f9..a4b0da31d199 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1133,5 +1133,6 @@ extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
1133 loff_t newlen); 1133 loff_t newlen);
1134extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, 1134extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
1135 struct kstatfs *buf); 1135 struct kstatfs *buf);
1136extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
1136 1137
1137#endif /* _FS_CEPH_SUPER_H */ 1138#endif /* _FS_CEPH_SUPER_H */