aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_sync.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c279
1 files changed, 137 insertions, 142 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a427c638d909..dfcbd98d1599 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -24,25 +24,14 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 27#include "xfs_mount.h"
30#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_btree.h"
34#include "xfs_dir2_sf.h"
35#include "xfs_attr_sf.h"
36#include "xfs_inode.h" 29#include "xfs_inode.h"
37#include "xfs_dinode.h" 30#include "xfs_dinode.h"
38#include "xfs_error.h" 31#include "xfs_error.h"
39#include "xfs_mru_cache.h"
40#include "xfs_filestream.h" 32#include "xfs_filestream.h"
41#include "xfs_vnodeops.h" 33#include "xfs_vnodeops.h"
42#include "xfs_utils.h"
43#include "xfs_buf_item.h"
44#include "xfs_inode_item.h" 34#include "xfs_inode_item.h"
45#include "xfs_rw.h"
46#include "xfs_quota.h" 35#include "xfs_quota.h"
47#include "xfs_trace.h" 36#include "xfs_trace.h"
48 37
@@ -144,6 +133,41 @@ restart:
144 return last_error; 133 return last_error;
145} 134}
146 135
136/*
137 * Select the next per-ag structure to iterate during the walk. The reclaim
138 * walk is optimised only to walk AGs with reclaimable inodes in them.
139 */
140static struct xfs_perag *
141xfs_inode_ag_iter_next_pag(
142 struct xfs_mount *mp,
143 xfs_agnumber_t *first,
144 int tag)
145{
146 struct xfs_perag *pag = NULL;
147
148 if (tag == XFS_ICI_RECLAIM_TAG) {
149 int found;
150 int ref;
151
152 spin_lock(&mp->m_perag_lock);
153 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
154 (void **)&pag, *first, 1, tag);
155 if (found <= 0) {
156 spin_unlock(&mp->m_perag_lock);
157 return NULL;
158 }
159 *first = pag->pag_agno + 1;
160 /* open coded pag reference increment */
161 ref = atomic_inc_return(&pag->pag_ref);
162 spin_unlock(&mp->m_perag_lock);
163 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
164 } else {
165 pag = xfs_perag_get(mp, *first);
166 (*first)++;
167 }
168 return pag;
169}
170
147int 171int
148xfs_inode_ag_iterator( 172xfs_inode_ag_iterator(
149 struct xfs_mount *mp, 173 struct xfs_mount *mp,
@@ -154,20 +178,15 @@ xfs_inode_ag_iterator(
154 int exclusive, 178 int exclusive,
155 int *nr_to_scan) 179 int *nr_to_scan)
156{ 180{
181 struct xfs_perag *pag;
157 int error = 0; 182 int error = 0;
158 int last_error = 0; 183 int last_error = 0;
159 xfs_agnumber_t ag; 184 xfs_agnumber_t ag;
160 int nr; 185 int nr;
161 186
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX; 187 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 188 ag = 0;
164 struct xfs_perag *pag; 189 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
165
166 pag = xfs_perag_get(mp, ag);
167 if (!pag->pag_ici_init) {
168 xfs_perag_put(pag);
169 continue;
170 }
171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 190 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
172 exclusive, &nr); 191 exclusive, &nr);
173 xfs_perag_put(pag); 192 xfs_perag_put(pag);
@@ -289,7 +308,7 @@ xfs_sync_inode_attr(
289/* 308/*
290 * Write out pagecache data for the whole filesystem. 309 * Write out pagecache data for the whole filesystem.
291 */ 310 */
292int 311STATIC int
293xfs_sync_data( 312xfs_sync_data(
294 struct xfs_mount *mp, 313 struct xfs_mount *mp,
295 int flags) 314 int flags)
@@ -310,7 +329,7 @@ xfs_sync_data(
310/* 329/*
311 * Write out inode metadata (attributes) for the whole filesystem. 330 * Write out inode metadata (attributes) for the whole filesystem.
312 */ 331 */
313int 332STATIC int
314xfs_sync_attr( 333xfs_sync_attr(
315 struct xfs_mount *mp, 334 struct xfs_mount *mp,
316 int flags) 335 int flags)
@@ -343,8 +362,7 @@ xfs_commit_dummy_trans(
343 362
344 xfs_ilock(ip, XFS_ILOCK_EXCL); 363 xfs_ilock(ip, XFS_ILOCK_EXCL);
345 364
346 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 365 xfs_trans_ijoin(tp, ip);
347 xfs_trans_ihold(tp, ip);
348 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 366 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
349 error = xfs_trans_commit(tp, 0); 367 error = xfs_trans_commit(tp, 0);
350 xfs_iunlock(ip, XFS_ILOCK_EXCL); 368 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -356,68 +374,23 @@ xfs_commit_dummy_trans(
356 374
357STATIC int 375STATIC int
358xfs_sync_fsdata( 376xfs_sync_fsdata(
359 struct xfs_mount *mp, 377 struct xfs_mount *mp)
360 int flags)
361{ 378{
362 struct xfs_buf *bp; 379 struct xfs_buf *bp;
363 struct xfs_buf_log_item *bip;
364 int error = 0;
365 380
366 /* 381 /*
367 * If this is xfssyncd() then only sync the superblock if we can 382 * If the buffer is pinned then push on the log so we won't get stuck
368 * lock it without sleeping and it is not pinned. 383 * waiting in the write for someone, maybe ourselves, to flush the log.
384 *
385 * Even though we just pushed the log above, we did not have the
386 * superblock buffer locked at that point so it can become pinned in
387 * between there and here.
369 */ 388 */
370 if (flags & SYNC_TRYLOCK) { 389 bp = xfs_getsb(mp, 0);
371 ASSERT(!(flags & SYNC_WAIT)); 390 if (XFS_BUF_ISPINNED(bp))
372 391 xfs_log_force(mp, 0);
373 bp = xfs_getsb(mp, XBF_TRYLOCK);
374 if (!bp)
375 goto out;
376
377 bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
378 if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
379 goto out_brelse;
380 } else {
381 bp = xfs_getsb(mp, 0);
382
383 /*
384 * If the buffer is pinned then push on the log so we won't
385 * get stuck waiting in the write for someone, maybe
386 * ourselves, to flush the log.
387 *
388 * Even though we just pushed the log above, we did not have
389 * the superblock buffer locked at that point so it can
390 * become pinned in between there and here.
391 */
392 if (XFS_BUF_ISPINNED(bp))
393 xfs_log_force(mp, 0);
394 }
395
396
397 if (flags & SYNC_WAIT)
398 XFS_BUF_UNASYNC(bp);
399 else
400 XFS_BUF_ASYNC(bp);
401
402 error = xfs_bwrite(mp, bp);
403 if (error)
404 return error;
405
406 /*
407 * If this is a data integrity sync make sure all pending buffers
408 * are flushed out for the log coverage check below.
409 */
410 if (flags & SYNC_WAIT)
411 xfs_flush_buftarg(mp->m_ddev_targp, 1);
412
413 if (xfs_log_need_covered(mp))
414 error = xfs_commit_dummy_trans(mp, flags);
415 return error;
416 392
417 out_brelse: 393 return xfs_bwrite(mp, bp);
418 xfs_buf_relse(bp);
419 out:
420 return error;
421} 394}
422 395
423/* 396/*
@@ -441,7 +414,7 @@ int
441xfs_quiesce_data( 414xfs_quiesce_data(
442 struct xfs_mount *mp) 415 struct xfs_mount *mp)
443{ 416{
444 int error; 417 int error, error2 = 0;
445 418
446 /* push non-blocking */ 419 /* push non-blocking */
447 xfs_sync_data(mp, 0); 420 xfs_sync_data(mp, 0);
@@ -452,13 +425,20 @@ xfs_quiesce_data(
452 xfs_qm_sync(mp, SYNC_WAIT); 425 xfs_qm_sync(mp, SYNC_WAIT);
453 426
454 /* write superblock and hoover up shutdown errors */ 427 /* write superblock and hoover up shutdown errors */
455 error = xfs_sync_fsdata(mp, SYNC_WAIT); 428 error = xfs_sync_fsdata(mp);
429
430 /* make sure all delwri buffers are written out */
431 xfs_flush_buftarg(mp->m_ddev_targp, 1);
432
433 /* mark the log as covered if needed */
434 if (xfs_log_need_covered(mp))
435 error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
456 436
457 /* flush data-only devices */ 437 /* flush data-only devices */
458 if (mp->m_rtdev_targp) 438 if (mp->m_rtdev_targp)
459 XFS_bflush(mp->m_rtdev_targp); 439 XFS_bflush(mp->m_rtdev_targp);
460 440
461 return error; 441 return error ? error : error2;
462} 442}
463 443
464STATIC void 444STATIC void
@@ -581,9 +561,9 @@ xfs_flush_inodes(
581} 561}
582 562
583/* 563/*
584 * Every sync period we need to unpin all items, reclaim inodes, sync 564 * Every sync period we need to unpin all items, reclaim inodes and sync
585 * quota and write out the superblock. We might need to cover the log 565 * disk quotas. We might need to cover the log to indicate that the
586 * to indicate it is idle. 566 * filesystem is idle.
587 */ 567 */
588STATIC void 568STATIC void
589xfs_sync_worker( 569xfs_sync_worker(
@@ -597,7 +577,8 @@ xfs_sync_worker(
597 xfs_reclaim_inodes(mp, 0); 577 xfs_reclaim_inodes(mp, 0);
598 /* dgc: errors ignored here */ 578 /* dgc: errors ignored here */
599 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 579 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
600 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 580 if (xfs_log_need_covered(mp))
581 error = xfs_commit_dummy_trans(mp, 0);
601 } 582 }
602 mp->m_sync_seq++; 583 mp->m_sync_seq++;
603 wake_up(&mp->m_wait_single_sync_task); 584 wake_up(&mp->m_wait_single_sync_task);
@@ -660,7 +641,7 @@ xfs_syncd_init(
660 mp->m_sync_work.w_syncer = xfs_sync_worker; 641 mp->m_sync_work.w_syncer = xfs_sync_worker;
661 mp->m_sync_work.w_mount = mp; 642 mp->m_sync_work.w_mount = mp;
662 mp->m_sync_work.w_completion = NULL; 643 mp->m_sync_work.w_completion = NULL;
663 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); 644 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
664 if (IS_ERR(mp->m_sync_task)) 645 if (IS_ERR(mp->m_sync_task))
665 return -PTR_ERR(mp->m_sync_task); 646 return -PTR_ERR(mp->m_sync_task);
666 return 0; 647 return 0;
@@ -681,6 +662,17 @@ __xfs_inode_set_reclaim_tag(
681 radix_tree_tag_set(&pag->pag_ici_root, 662 radix_tree_tag_set(&pag->pag_ici_root,
682 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 663 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
683 XFS_ICI_RECLAIM_TAG); 664 XFS_ICI_RECLAIM_TAG);
665
666 if (!pag->pag_ici_reclaimable) {
667 /* propagate the reclaim tag up into the perag radix tree */
668 spin_lock(&ip->i_mount->m_perag_lock);
669 radix_tree_tag_set(&ip->i_mount->m_perag_tree,
670 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
671 XFS_ICI_RECLAIM_TAG);
672 spin_unlock(&ip->i_mount->m_perag_lock);
673 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
674 -1, _RET_IP_);
675 }
684 pag->pag_ici_reclaimable++; 676 pag->pag_ici_reclaimable++;
685} 677}
686 678
@@ -715,6 +707,16 @@ __xfs_inode_clear_reclaim_tag(
715 radix_tree_tag_clear(&pag->pag_ici_root, 707 radix_tree_tag_clear(&pag->pag_ici_root,
716 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 708 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
717 pag->pag_ici_reclaimable--; 709 pag->pag_ici_reclaimable--;
710 if (!pag->pag_ici_reclaimable) {
711 /* clear the reclaim tag from the perag radix tree */
712 spin_lock(&ip->i_mount->m_perag_lock);
713 radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
714 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
715 XFS_ICI_RECLAIM_TAG);
716 spin_unlock(&ip->i_mount->m_perag_lock);
717 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
718 -1, _RET_IP_);
719 }
718} 720}
719 721
720/* 722/*
@@ -853,7 +855,36 @@ out:
853reclaim: 855reclaim:
854 xfs_ifunlock(ip); 856 xfs_ifunlock(ip);
855 xfs_iunlock(ip, XFS_ILOCK_EXCL); 857 xfs_iunlock(ip, XFS_ILOCK_EXCL);
856 xfs_ireclaim(ip); 858
859 XFS_STATS_INC(xs_ig_reclaims);
860 /*
861 * Remove the inode from the per-AG radix tree.
862 *
863 * Because radix_tree_delete won't complain even if the item was never
864 * added to the tree assert that it's been there before to catch
865 * problems with the inode life time early on.
866 */
867 write_lock(&pag->pag_ici_lock);
868 if (!radix_tree_delete(&pag->pag_ici_root,
869 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
870 ASSERT(0);
871 write_unlock(&pag->pag_ici_lock);
872
873 /*
874 * Here we do an (almost) spurious inode lock in order to coordinate
875 * with inode cache radix tree lookups. This is because the lookup
876 * can reference the inodes in the cache without taking references.
877 *
878 * We make that OK here by ensuring that we wait until the inode is
879 * unlocked after the lookup before we go ahead and free it. We get
880 * both the ilock and the iolock because the code may need to drop the
881 * ilock one but will still hold the iolock.
882 */
883 xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
884 xfs_qm_dqdetach(ip);
885 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
886
887 xfs_inode_free(ip);
857 return error; 888 return error;
858 889
859} 890}
@@ -869,88 +900,52 @@ xfs_reclaim_inodes(
869 900
870/* 901/*
871 * Shrinker infrastructure. 902 * Shrinker infrastructure.
872 *
873 * This is all far more complex than it needs to be. It adds a global list of
874 * mounts because the shrinkers can only call a global context. We need to make
875 * the shrinkers pass a context to avoid the need for global state.
876 */ 903 */
877static LIST_HEAD(xfs_mount_list);
878static struct rw_semaphore xfs_mount_list_lock;
879
880static int 904static int
881xfs_reclaim_inode_shrink( 905xfs_reclaim_inode_shrink(
906 struct shrinker *shrink,
882 int nr_to_scan, 907 int nr_to_scan,
883 gfp_t gfp_mask) 908 gfp_t gfp_mask)
884{ 909{
885 struct xfs_mount *mp; 910 struct xfs_mount *mp;
886 struct xfs_perag *pag; 911 struct xfs_perag *pag;
887 xfs_agnumber_t ag; 912 xfs_agnumber_t ag;
888 int reclaimable = 0; 913 int reclaimable;
889 914
915 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
890 if (nr_to_scan) { 916 if (nr_to_scan) {
891 if (!(gfp_mask & __GFP_FS)) 917 if (!(gfp_mask & __GFP_FS))
892 return -1; 918 return -1;
893 919
894 down_read(&xfs_mount_list_lock); 920 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
895 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
896 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
897 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); 921 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
898 if (nr_to_scan <= 0) 922 /* if we don't exhaust the scan, don't bother coming back */
899 break; 923 if (nr_to_scan > 0)
900 } 924 return -1;
901 up_read(&xfs_mount_list_lock); 925 }
902 }
903
904 down_read(&xfs_mount_list_lock);
905 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
906 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
907 926
908 pag = xfs_perag_get(mp, ag); 927 reclaimable = 0;
909 if (!pag->pag_ici_init) { 928 ag = 0;
910 xfs_perag_put(pag); 929 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
911 continue; 930 XFS_ICI_RECLAIM_TAG))) {
912 } 931 reclaimable += pag->pag_ici_reclaimable;
913 reclaimable += pag->pag_ici_reclaimable; 932 xfs_perag_put(pag);
914 xfs_perag_put(pag);
915 }
916 } 933 }
917 up_read(&xfs_mount_list_lock);
918 return reclaimable; 934 return reclaimable;
919} 935}
920 936
921static struct shrinker xfs_inode_shrinker = {
922 .shrink = xfs_reclaim_inode_shrink,
923 .seeks = DEFAULT_SEEKS,
924};
925
926void __init
927xfs_inode_shrinker_init(void)
928{
929 init_rwsem(&xfs_mount_list_lock);
930 register_shrinker(&xfs_inode_shrinker);
931}
932
933void
934xfs_inode_shrinker_destroy(void)
935{
936 ASSERT(list_empty(&xfs_mount_list));
937 unregister_shrinker(&xfs_inode_shrinker);
938}
939
940void 937void
941xfs_inode_shrinker_register( 938xfs_inode_shrinker_register(
942 struct xfs_mount *mp) 939 struct xfs_mount *mp)
943{ 940{
944 down_write(&xfs_mount_list_lock); 941 mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
945 list_add_tail(&mp->m_mplist, &xfs_mount_list); 942 mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
946 up_write(&xfs_mount_list_lock); 943 register_shrinker(&mp->m_inode_shrink);
947} 944}
948 945
949void 946void
950xfs_inode_shrinker_unregister( 947xfs_inode_shrinker_unregister(
951 struct xfs_mount *mp) 948 struct xfs_mount *mp)
952{ 949{
953 down_write(&xfs_mount_list_lock); 950 unregister_shrinker(&mp->m_inode_shrink);
954 list_del(&mp->m_mplist);
955 up_write(&xfs_mount_list_lock);
956} 951}