aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_qm.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2012-03-14 12:53:34 -0400
committerBen Myers <bpm@sgi.com>2012-03-14 12:53:34 -0400
commitb84a3a96751f93071c1863f2962273973c8b8f5e (patch)
tree26409cbc65a9b4457e6c52e90879361fc42d2386 /fs/xfs/xfs_qm.c
parent9f920f116426806bfa34c1422742e1bf7b7a2b4b (diff)
xfs: remove the per-filesystem list of dquots
Instead of keeping a separate per-filesystem list of dquots we can walk the radix tree for the two places where we need to iterate all quota structures. Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_qm.c')
-rw-r--r--fs/xfs/xfs_qm.c415
1 files changed, 219 insertions, 196 deletions
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index bb884e701cd9..2f92d3b0d8a8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -169,6 +169,187 @@ xfs_qm_rele_quotafs_ref(
169} 169}
170 170
171/* 171/*
172 * We use the batch lookup interface to iterate over the dquots as it
173 * currently is the only interface into the radix tree code that allows
174 * fuzzy lookups instead of exact matches. Holding the lock over multiple
175 * operations is fine as all callers are used either during mount/umount
176 * or quotaoff.
177 */
178#define XFS_DQ_LOOKUP_BATCH 32
179
180STATIC int
181xfs_qm_dquot_walk(
182 struct xfs_mount *mp,
183 int type,
184 int (*execute)(struct xfs_dquot *dqp))
185{
186 struct xfs_quotainfo *qi = mp->m_quotainfo;
187 struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
188 uint32_t next_index;
189 int last_error = 0;
190 int skipped;
191 int nr_found;
192
193restart:
194 skipped = 0;
195 next_index = 0;
196 nr_found = 0;
197
198 while (1) {
199 struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
200 int error = 0;
201 int i;
202
203 mutex_lock(&qi->qi_tree_lock);
204 nr_found = radix_tree_gang_lookup(tree, (void **)batch,
205 next_index, XFS_DQ_LOOKUP_BATCH);
206 if (!nr_found) {
207 mutex_unlock(&qi->qi_tree_lock);
208 break;
209 }
210
211 for (i = 0; i < nr_found; i++) {
212 struct xfs_dquot *dqp = batch[i];
213
214 next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
215
216 error = execute(batch[i]);
217 if (error == EAGAIN) {
218 skipped++;
219 continue;
220 }
221 if (error && last_error != EFSCORRUPTED)
222 last_error = error;
223 }
224
225 mutex_unlock(&qi->qi_tree_lock);
226
227 /* bail out if the filesystem is corrupted. */
228 if (last_error == EFSCORRUPTED) {
229 skipped = 0;
230 break;
231 }
232 }
233
234 if (skipped) {
235 delay(1);
236 goto restart;
237 }
238
239 return last_error;
240}
241
242
243/*
244 * Purge a dquot from all tracking data structures and free it.
245 */
246STATIC int
247xfs_qm_dqpurge(
248 struct xfs_dquot *dqp)
249{
250 struct xfs_mount *mp = dqp->q_mount;
251 struct xfs_quotainfo *qi = mp->m_quotainfo;
252 struct xfs_dquot *gdqp = NULL;
253
254 xfs_dqlock(dqp);
255 if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
256 xfs_dqunlock(dqp);
257 return EAGAIN;
258 }
259
260 /*
261 * If this quota has a group hint attached, prepare for releasing it
262 * now.
263 */
264 gdqp = dqp->q_gdquot;
265 if (gdqp) {
266 xfs_dqlock(gdqp);
267 dqp->q_gdquot = NULL;
268 }
269
270 dqp->dq_flags |= XFS_DQ_FREEING;
271
272 /*
273 * If we're turning off quotas, we have to make sure that, for
274 * example, we don't delete quota disk blocks while dquots are
275 * in the process of getting written to those disk blocks.
276 * This dquot might well be on AIL, and we can't leave it there
277 * if we're turning off quotas. Basically, we need this flush
278 * lock, and are willing to block on it.
279 */
280 if (!xfs_dqflock_nowait(dqp)) {
281 /*
282 * Block on the flush lock after nudging dquot buffer,
283 * if it is incore.
284 */
285 xfs_dqflock_pushbuf_wait(dqp);
286 }
287
288 /*
289 * If we are turning this type of quotas off, we don't care
290 * about the dirty metadata sitting in this dquot. OTOH, if
291 * we're unmounting, we do care, so we flush it and wait.
292 */
293 if (XFS_DQ_IS_DIRTY(dqp)) {
294 int error;
295
296 /*
297 * We don't care about getting disk errors here. We need
298 * to purge this dquot anyway, so we go ahead regardless.
299 */
300 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
301 if (error)
302 xfs_warn(mp, "%s: dquot %p flush failed",
303 __func__, dqp);
304 xfs_dqflock(dqp);
305 }
306
307 ASSERT(atomic_read(&dqp->q_pincount) == 0);
308 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
309 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
310
311 xfs_dqfunlock(dqp);
312 xfs_dqunlock(dqp);
313
314 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
315 be32_to_cpu(dqp->q_core.d_id));
316 qi->qi_dquots--;
317
318 /*
319 * We move dquots to the freelist as soon as their reference count
320 * hits zero, so it really should be on the freelist here.
321 */
322 mutex_lock(&qi->qi_lru_lock);
323 ASSERT(!list_empty(&dqp->q_lru));
324 list_del_init(&dqp->q_lru);
325 qi->qi_lru_count--;
326 XFS_STATS_DEC(xs_qm_dquot_unused);
327 mutex_unlock(&qi->qi_lru_lock);
328
329 xfs_qm_dqdestroy(dqp);
330
331 if (gdqp)
332 xfs_qm_dqput(gdqp);
333 return 0;
334}
335
336/*
337 * Purge the dquot cache.
338 */
339void
340xfs_qm_dqpurge_all(
341 struct xfs_mount *mp,
342 uint flags)
343{
344 if (flags & XFS_QMOPT_UQUOTA)
345 xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge);
346 if (flags & XFS_QMOPT_GQUOTA)
347 xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge);
348 if (flags & XFS_QMOPT_PQUOTA)
349 xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge);
350}
351
352/*
172 * Just destroy the quotainfo structure. 353 * Just destroy the quotainfo structure.
173 */ 354 */
174void 355void
@@ -306,175 +487,6 @@ xfs_qm_unmount_quotas(
306 } 487 }
307} 488}
308 489
309/*
310 * Flush all dquots of the given file system to disk. The dquots are
311 * _not_ purged from memory here, just their data written to disk.
312 */
313STATIC int
314xfs_qm_dqflush_all(
315 struct xfs_mount *mp)
316{
317 struct xfs_quotainfo *q = mp->m_quotainfo;
318 int recl;
319 struct xfs_dquot *dqp;
320 int error;
321
322 if (!q)
323 return 0;
324again:
325 mutex_lock(&q->qi_dqlist_lock);
326 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
327 xfs_dqlock(dqp);
328 if ((dqp->dq_flags & XFS_DQ_FREEING) ||
329 !XFS_DQ_IS_DIRTY(dqp)) {
330 xfs_dqunlock(dqp);
331 continue;
332 }
333
334 /* XXX a sentinel would be better */
335 recl = q->qi_dqreclaims;
336 if (!xfs_dqflock_nowait(dqp)) {
337 /*
338 * If we can't grab the flush lock then check
339 * to see if the dquot has been flushed delayed
340 * write. If so, grab its buffer and send it
341 * out immediately. We'll be able to acquire
342 * the flush lock when the I/O completes.
343 */
344 xfs_dqflock_pushbuf_wait(dqp);
345 }
346 /*
347 * Let go of the mplist lock. We don't want to hold it
348 * across a disk write.
349 */
350 mutex_unlock(&q->qi_dqlist_lock);
351 error = xfs_qm_dqflush(dqp, 0);
352 xfs_dqunlock(dqp);
353 if (error)
354 return error;
355
356 mutex_lock(&q->qi_dqlist_lock);
357 if (recl != q->qi_dqreclaims) {
358 mutex_unlock(&q->qi_dqlist_lock);
359 /* XXX restart limit */
360 goto again;
361 }
362 }
363
364 mutex_unlock(&q->qi_dqlist_lock);
365 /* return ! busy */
366 return 0;
367}
368
369/*
370 * Release the group dquot pointers the user dquots may be
371 * carrying around as a hint. mplist is locked on entry and exit.
372 */
373STATIC void
374xfs_qm_detach_gdquots(
375 struct xfs_mount *mp)
376{
377 struct xfs_quotainfo *q = mp->m_quotainfo;
378 struct xfs_dquot *dqp, *gdqp;
379
380 again:
381 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
382 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
383 xfs_dqlock(dqp);
384 if (dqp->dq_flags & XFS_DQ_FREEING) {
385 xfs_dqunlock(dqp);
386 mutex_unlock(&q->qi_dqlist_lock);
387 delay(1);
388 mutex_lock(&q->qi_dqlist_lock);
389 goto again;
390 }
391
392 gdqp = dqp->q_gdquot;
393 if (gdqp)
394 dqp->q_gdquot = NULL;
395 xfs_dqunlock(dqp);
396
397 if (gdqp)
398 xfs_qm_dqrele(gdqp);
399 }
400}
401
402/*
403 * Go through all the incore dquots of this file system and take them
404 * off the mplist and hashlist, if the dquot type matches the dqtype
405 * parameter. This is used when turning off quota accounting for
406 * users and/or groups, as well as when the filesystem is unmounting.
407 */
408STATIC int
409xfs_qm_dqpurge_int(
410 struct xfs_mount *mp,
411 uint flags)
412{
413 struct xfs_quotainfo *q = mp->m_quotainfo;
414 struct xfs_dquot *dqp, *n;
415 uint dqtype;
416 int nmisses = 0;
417 LIST_HEAD (dispose_list);
418
419 if (!q)
420 return 0;
421
422 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
423 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
424 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
425
426 mutex_lock(&q->qi_dqlist_lock);
427
428 /*
429 * In the first pass through all incore dquots of this filesystem,
430 * we release the group dquot pointers the user dquots may be
431 * carrying around as a hint. We need to do this irrespective of
432 * what's being turned off.
433 */
434 xfs_qm_detach_gdquots(mp);
435
436 /*
437 * Try to get rid of all of the unwanted dquots.
438 */
439 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
440 xfs_dqlock(dqp);
441 if ((dqp->dq_flags & dqtype) != 0 &&
442 !(dqp->dq_flags & XFS_DQ_FREEING)) {
443 if (dqp->q_nrefs == 0) {
444 dqp->dq_flags |= XFS_DQ_FREEING;
445 list_move_tail(&dqp->q_mplist, &dispose_list);
446 } else
447 nmisses++;
448 }
449 xfs_dqunlock(dqp);
450 }
451 mutex_unlock(&q->qi_dqlist_lock);
452
453 list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
454 xfs_qm_dqpurge(dqp);
455
456 return nmisses;
457}
458
459int
460xfs_qm_dqpurge_all(
461 xfs_mount_t *mp,
462 uint flags)
463{
464 int ndquots;
465
466 /*
467 * Purge the dquot cache.
468 * None of the dquots should really be busy at this point.
469 */
470 if (mp->m_quotainfo) {
471 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
472 delay(ndquots * 10);
473 }
474 }
475 return 0;
476}
477
478STATIC int 490STATIC int
479xfs_qm_dqattach_one( 491xfs_qm_dqattach_one(
480 xfs_inode_t *ip, 492 xfs_inode_t *ip,
@@ -749,15 +761,10 @@ xfs_qm_init_quotainfo(
749 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); 761 INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
750 mutex_init(&qinf->qi_tree_lock); 762 mutex_init(&qinf->qi_tree_lock);
751 763
752 INIT_LIST_HEAD(&qinf->qi_dqlist);
753 mutex_init(&qinf->qi_dqlist_lock);
754
755 INIT_LIST_HEAD(&qinf->qi_lru_list); 764 INIT_LIST_HEAD(&qinf->qi_lru_list);
756 qinf->qi_lru_count = 0; 765 qinf->qi_lru_count = 0;
757 mutex_init(&qinf->qi_lru_lock); 766 mutex_init(&qinf->qi_lru_lock);
758 767
759 qinf->qi_dqreclaims = 0;
760
761 /* mutex used to serialize quotaoffs */ 768 /* mutex used to serialize quotaoffs */
762 mutex_init(&qinf->qi_quotaofflock); 769 mutex_init(&qinf->qi_quotaofflock);
763 770
@@ -854,9 +861,6 @@ xfs_qm_destroy_quotainfo(
854 */ 861 */
855 xfs_qm_rele_quotafs_ref(mp); 862 xfs_qm_rele_quotafs_ref(mp);
856 863
857 ASSERT(list_empty(&qi->qi_dqlist));
858 mutex_destroy(&qi->qi_dqlist_lock);
859
860 if (qi->qi_uquotaip) { 864 if (qi->qi_uquotaip) {
861 IRELE(qi->qi_uquotaip); 865 IRELE(qi->qi_uquotaip);
862 qi->qi_uquotaip = NULL; /* paranoia */ 866 qi->qi_uquotaip = NULL; /* paranoia */
@@ -1307,6 +1311,28 @@ error0:
1307 return error; 1311 return error;
1308} 1312}
1309 1313
1314STATIC int
1315xfs_qm_flush_one(
1316 struct xfs_dquot *dqp)
1317{
1318 int error = 0;
1319
1320 xfs_dqlock(dqp);
1321 if (dqp->dq_flags & XFS_DQ_FREEING)
1322 goto out_unlock;
1323 if (!XFS_DQ_IS_DIRTY(dqp))
1324 goto out_unlock;
1325
1326 if (!xfs_dqflock_nowait(dqp))
1327 xfs_dqflock_pushbuf_wait(dqp);
1328
1329 error = xfs_qm_dqflush(dqp, 0);
1330
1331out_unlock:
1332 xfs_dqunlock(dqp);
1333 return error;
1334}
1335
1310/* 1336/*
1311 * Walk thru all the filesystem inodes and construct a consistent view 1337 * Walk thru all the filesystem inodes and construct a consistent view
1312 * of the disk quota world. If the quotacheck fails, disable quotas. 1338 * of the disk quota world. If the quotacheck fails, disable quotas.
@@ -1315,7 +1341,7 @@ int
1315xfs_qm_quotacheck( 1341xfs_qm_quotacheck(
1316 xfs_mount_t *mp) 1342 xfs_mount_t *mp)
1317{ 1343{
1318 int done, count, error; 1344 int done, count, error, error2;
1319 xfs_ino_t lastino; 1345 xfs_ino_t lastino;
1320 size_t structsz; 1346 size_t structsz;
1321 xfs_inode_t *uip, *gip; 1347 xfs_inode_t *uip, *gip;
@@ -1329,12 +1355,6 @@ xfs_qm_quotacheck(
1329 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); 1355 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1330 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1356 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1331 1357
1332 /*
1333 * There should be no cached dquots. The (simplistic) quotacheck
1334 * algorithm doesn't like that.
1335 */
1336 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1337
1338 xfs_notice(mp, "Quotacheck needed: Please wait."); 1358 xfs_notice(mp, "Quotacheck needed: Please wait.");
1339 1359
1340 /* 1360 /*
@@ -1373,12 +1393,21 @@ xfs_qm_quotacheck(
1373 } while (!done); 1393 } while (!done);
1374 1394
1375 /* 1395 /*
1376 * We've made all the changes that we need to make incore. 1396 * We've made all the changes that we need to make incore. Flush them
1377 * Flush them down to disk buffers if everything was updated 1397 * down to disk buffers if everything was updated successfully.
1378 * successfully.
1379 */ 1398 */
1380 if (!error) 1399 if (XFS_IS_UQUOTA_ON(mp))
1381 error = xfs_qm_dqflush_all(mp); 1400 error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one);
1401 if (XFS_IS_GQUOTA_ON(mp)) {
1402 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one);
1403 if (!error)
1404 error = error2;
1405 }
1406 if (XFS_IS_PQUOTA_ON(mp)) {
1407 error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one);
1408 if (!error)
1409 error = error2;
1410 }
1382 1411
1383 /* 1412 /*
1384 * We can get this error if we couldn't do a dquot allocation inside 1413 * We can get this error if we couldn't do a dquot allocation inside
@@ -1517,13 +1546,9 @@ xfs_qm_dqfree_one(
1517 mutex_lock(&qi->qi_tree_lock); 1546 mutex_lock(&qi->qi_tree_lock);
1518 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), 1547 radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
1519 be32_to_cpu(dqp->q_core.d_id)); 1548 be32_to_cpu(dqp->q_core.d_id));
1520 mutex_unlock(&qi->qi_tree_lock);
1521 1549
1522 mutex_lock(&qi->qi_dqlist_lock);
1523 list_del_init(&dqp->q_mplist);
1524 qi->qi_dquots--; 1550 qi->qi_dquots--;
1525 qi->qi_dqreclaims++; 1551 mutex_unlock(&qi->qi_tree_lock);
1526 mutex_unlock(&qi->qi_dqlist_lock);
1527 1552
1528 xfs_qm_dqdestroy(dqp); 1553 xfs_qm_dqdestroy(dqp);
1529} 1554}
@@ -1556,8 +1581,6 @@ xfs_qm_dqreclaim_one(
1556 return; 1581 return;
1557 } 1582 }
1558 1583
1559 ASSERT(!list_empty(&dqp->q_mplist));
1560
1561 /* 1584 /*
1562 * Try to grab the flush lock. If this dquot is in the process of 1585 * Try to grab the flush lock. If this dquot is in the process of
1563 * getting flushed to disk, we don't want to reclaim it. 1586 * getting flushed to disk, we don't want to reclaim it.