aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_sync.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c329
1 files changed, 201 insertions, 128 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3e..a9f6d20aff41 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
65 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
66 * the number of objects requested. 66 * the number of objects requested.
67 */ 67 */
68 read_lock(&pag->pag_ici_lock);
69 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
70 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
71 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
74 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
75 } 74 }
76 if (!nr_found) 75 if (!nr_found)
77 goto unlock; 76 return NULL;
78 77
79 /* 78 /*
80 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -84,25 +83,20 @@ xfs_inode_ag_lookup(
84 */ 83 */
85 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
86 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
87 goto unlock; 86 return NULL;
88
89 return ip; 87 return ip;
90
91unlock:
92 read_unlock(&pag->pag_ici_lock);
93 return NULL;
94} 88}
95 89
96STATIC int 90STATIC int
97xfs_inode_ag_walk( 91xfs_inode_ag_walk(
98 struct xfs_mount *mp, 92 struct xfs_mount *mp,
99 xfs_agnumber_t ag, 93 struct xfs_perag *pag,
100 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
101 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
102 int flags, 96 int flags,
103 int tag) 97 int tag,
98 int exclusive)
104{ 99{
105 struct xfs_perag *pag = &mp->m_perag[ag];
106 uint32_t first_index; 100 uint32_t first_index;
107 int last_error = 0; 101 int last_error = 0;
108 int skipped; 102 int skipped;
@@ -114,10 +108,20 @@ restart:
114 int error = 0; 108 int error = 0;
115 xfs_inode_t *ip; 109 xfs_inode_t *ip;
116 110
111 if (exclusive)
112 write_lock(&pag->pag_ici_lock);
113 else
114 read_lock(&pag->pag_ici_lock);
117 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 115 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
118 if (!ip) 116 if (!ip) {
117 if (exclusive)
118 write_unlock(&pag->pag_ici_lock);
119 else
120 read_unlock(&pag->pag_ici_lock);
119 break; 121 break;
122 }
120 123
124 /* execute releases pag->pag_ici_lock */
121 error = execute(ip, pag, flags); 125 error = execute(ip, pag, flags);
122 if (error == EAGAIN) { 126 if (error == EAGAIN) {
123 skipped++; 127 skipped++;
@@ -125,9 +129,8 @@ restart:
125 } 129 }
126 if (error) 130 if (error)
127 last_error = error; 131 last_error = error;
128 /* 132
129 * bail out if the filesystem is corrupted. 133 /* bail out if the filesystem is corrupted. */
130 */
131 if (error == EFSCORRUPTED) 134 if (error == EFSCORRUPTED)
132 break; 135 break;
133 136
@@ -137,8 +140,6 @@ restart:
137 delay(1); 140 delay(1);
138 goto restart; 141 goto restart;
139 } 142 }
140
141 xfs_put_perag(mp, pag);
142 return last_error; 143 return last_error;
143} 144}
144 145
@@ -148,16 +149,24 @@ xfs_inode_ag_iterator(
148 int (*execute)(struct xfs_inode *ip, 149 int (*execute)(struct xfs_inode *ip,
149 struct xfs_perag *pag, int flags), 150 struct xfs_perag *pag, int flags),
150 int flags, 151 int flags,
151 int tag) 152 int tag,
153 int exclusive)
152{ 154{
153 int error = 0; 155 int error = 0;
154 int last_error = 0; 156 int last_error = 0;
155 xfs_agnumber_t ag; 157 xfs_agnumber_t ag;
156 158
157 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
158 if (!mp->m_perag[ag].pag_ici_init) 160 struct xfs_perag *pag;
161
162 pag = xfs_perag_get(mp, ag);
163 if (!pag->pag_ici_init) {
164 xfs_perag_put(pag);
159 continue; 165 continue;
160 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 166 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive);
169 xfs_perag_put(pag);
161 if (error) { 170 if (error) {
162 last_error = error; 171 last_error = error;
163 if (error == EFSCORRUPTED) 172 if (error == EFSCORRUPTED)
@@ -174,30 +183,31 @@ xfs_sync_inode_valid(
174 struct xfs_perag *pag) 183 struct xfs_perag *pag)
175{ 184{
176 struct inode *inode = VFS_I(ip); 185 struct inode *inode = VFS_I(ip);
186 int error = EFSCORRUPTED;
177 187
178 /* nothing to sync during shutdown */ 188 /* nothing to sync during shutdown */
179 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 189 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
180 read_unlock(&pag->pag_ici_lock); 190 goto out_unlock;
181 return EFSCORRUPTED;
182 }
183 191
184 /* 192 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
185 * If we can't get a reference on the inode, it must be in reclaim. 193 error = ENOENT;
186 * Leave it for the reclaim code to flush. Also avoid inodes that 194 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
187 * haven't been fully initialised. 195 goto out_unlock;
188 */ 196
189 if (!igrab(inode)) { 197 /* If we can't grab the inode, it must on it's way to reclaim. */
190 read_unlock(&pag->pag_ici_lock); 198 if (!igrab(inode))
191 return ENOENT; 199 goto out_unlock;
192 }
193 read_unlock(&pag->pag_ici_lock);
194 200
195 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 201 if (is_bad_inode(inode)) {
196 IRELE(ip); 202 IRELE(ip);
197 return ENOENT; 203 goto out_unlock;
198 } 204 }
199 205
200 return 0; 206 /* inode is valid */
207 error = 0;
208out_unlock:
209 read_unlock(&pag->pag_ici_lock);
210 return error;
201} 211}
202 212
203STATIC int 213STATIC int
@@ -224,7 +234,7 @@ xfs_sync_inode_data(
224 } 234 }
225 235
226 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? 236 error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
227 0 : XFS_B_ASYNC, FI_NONE); 237 0 : XBF_ASYNC, FI_NONE);
228 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 238 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
229 239
230 out_wait: 240 out_wait:
@@ -260,8 +270,7 @@ xfs_sync_inode_attr(
260 goto out_unlock; 270 goto out_unlock;
261 } 271 }
262 272
263 error = xfs_iflush(ip, (flags & SYNC_WAIT) ? 273 error = xfs_iflush(ip, flags);
264 XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
265 274
266 out_unlock: 275 out_unlock:
267 xfs_iunlock(ip, XFS_ILOCK_SHARED); 276 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -282,14 +291,11 @@ xfs_sync_data(
282 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 291 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
283 292
284 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 293 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
285 XFS_ICI_NO_TAG); 294 XFS_ICI_NO_TAG, 0);
286 if (error) 295 if (error)
287 return XFS_ERROR(error); 296 return XFS_ERROR(error);
288 297
289 xfs_log_force(mp, 0, 298 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
290 (flags & SYNC_WAIT) ?
291 XFS_LOG_FORCE | XFS_LOG_SYNC :
292 XFS_LOG_FORCE);
293 return 0; 299 return 0;
294} 300}
295 301
@@ -304,7 +310,7 @@ xfs_sync_attr(
304 ASSERT((flags & ~SYNC_WAIT) == 0); 310 ASSERT((flags & ~SYNC_WAIT) == 0);
305 311
306 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 312 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
307 XFS_ICI_NO_TAG); 313 XFS_ICI_NO_TAG, 0);
308} 314}
309 315
310STATIC int 316STATIC int
@@ -315,10 +321,6 @@ xfs_commit_dummy_trans(
315 struct xfs_inode *ip = mp->m_rootip; 321 struct xfs_inode *ip = mp->m_rootip;
316 struct xfs_trans *tp; 322 struct xfs_trans *tp;
317 int error; 323 int error;
318 int log_flags = XFS_LOG_FORCE;
319
320 if (flags & SYNC_WAIT)
321 log_flags |= XFS_LOG_SYNC;
322 324
323 /* 325 /*
324 * Put a dummy transaction in the log to tell recovery 326 * Put a dummy transaction in the log to tell recovery
@@ -340,11 +342,11 @@ xfs_commit_dummy_trans(
340 xfs_iunlock(ip, XFS_ILOCK_EXCL); 342 xfs_iunlock(ip, XFS_ILOCK_EXCL);
341 343
342 /* the log force ensures this transaction is pushed to disk */ 344 /* the log force ensures this transaction is pushed to disk */
343 xfs_log_force(mp, 0, log_flags); 345 xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
344 return error; 346 return error;
345} 347}
346 348
347int 349STATIC int
348xfs_sync_fsdata( 350xfs_sync_fsdata(
349 struct xfs_mount *mp, 351 struct xfs_mount *mp,
350 int flags) 352 int flags)
@@ -360,7 +362,7 @@ xfs_sync_fsdata(
360 if (flags & SYNC_TRYLOCK) { 362 if (flags & SYNC_TRYLOCK) {
361 ASSERT(!(flags & SYNC_WAIT)); 363 ASSERT(!(flags & SYNC_WAIT));
362 364
363 bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); 365 bp = xfs_getsb(mp, XBF_TRYLOCK);
364 if (!bp) 366 if (!bp)
365 goto out; 367 goto out;
366 368
@@ -380,7 +382,7 @@ xfs_sync_fsdata(
380 * become pinned in between there and here. 382 * become pinned in between there and here.
381 */ 383 */
382 if (XFS_BUF_ISPINNED(bp)) 384 if (XFS_BUF_ISPINNED(bp))
383 xfs_log_force(mp, 0, XFS_LOG_FORCE); 385 xfs_log_force(mp, 0);
384 } 386 }
385 387
386 388
@@ -441,9 +443,6 @@ xfs_quiesce_data(
441 xfs_sync_data(mp, SYNC_WAIT); 443 xfs_sync_data(mp, SYNC_WAIT);
442 xfs_qm_sync(mp, SYNC_WAIT); 444 xfs_qm_sync(mp, SYNC_WAIT);
443 445
444 /* drop inode references pinned by filestreams */
445 xfs_filestream_flush(mp);
446
447 /* write superblock and hoover up shutdown errors */ 446 /* write superblock and hoover up shutdown errors */
448 error = xfs_sync_fsdata(mp, SYNC_WAIT); 447 error = xfs_sync_fsdata(mp, SYNC_WAIT);
449 448
@@ -460,16 +459,18 @@ xfs_quiesce_fs(
460{ 459{
461 int count = 0, pincount; 460 int count = 0, pincount;
462 461
462 xfs_reclaim_inodes(mp, 0);
463 xfs_flush_buftarg(mp->m_ddev_targp, 0); 463 xfs_flush_buftarg(mp->m_ddev_targp, 0);
464 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
465 464
466 /* 465 /*
467 * This loop must run at least twice. The first instance of the loop 466 * This loop must run at least twice. The first instance of the loop
468 * will flush most meta data but that will generate more meta data 467 * will flush most meta data but that will generate more meta data
469 * (typically directory updates). Which then must be flushed and 468 * (typically directory updates). Which then must be flushed and
470 * logged before we can write the unmount record. 469 * logged before we can write the unmount record. We also so sync
470 * reclaim of inodes to catch any that the above delwri flush skipped.
471 */ 471 */
472 do { 472 do {
473 xfs_reclaim_inodes(mp, SYNC_WAIT);
473 xfs_sync_attr(mp, SYNC_WAIT); 474 xfs_sync_attr(mp, SYNC_WAIT);
474 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); 475 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
475 if (!pincount) { 476 if (!pincount) {
@@ -568,7 +569,7 @@ xfs_flush_inodes(
568 igrab(inode); 569 igrab(inode);
569 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); 570 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
570 wait_for_completion(&completion); 571 wait_for_completion(&completion);
571 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 572 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
572} 573}
573 574
574/* 575/*
@@ -584,8 +585,8 @@ xfs_sync_worker(
584 int error; 585 int error;
585 586
586 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 587 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
587 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); 588 xfs_log_force(mp, 0);
588 xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC); 589 xfs_reclaim_inodes(mp, 0);
589 /* dgc: errors ignored here */ 590 /* dgc: errors ignored here */
590 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 591 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
591 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 592 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
@@ -664,60 +665,6 @@ xfs_syncd_stop(
664 kthread_stop(mp->m_sync_task); 665 kthread_stop(mp->m_sync_task);
665} 666}
666 667
667STATIC int
668xfs_reclaim_inode(
669 xfs_inode_t *ip,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 return -EAGAIN;
686 }
687 __xfs_iflags_set(ip, XFS_IRECLAIM);
688 spin_unlock(&ip->i_flags_lock);
689 write_unlock(&pag->pag_ici_lock);
690 xfs_put_perag(ip->i_mount, pag);
691
692 /*
693 * If the inode is still dirty, then flush it out. If the inode
694 * is not in the AIL, then it will be OK to flush it delwri as
695 * long as xfs_iflush() does not keep any references to the inode.
696 * We leave that decision up to xfs_iflush() since it has the
697 * knowledge of whether it's OK to simply do a delwri flush of
698 * the inode or whether we need to wait until the inode is
699 * pulled from the AIL.
700 * We get the flush lock regardless, though, just to make sure
701 * we don't free it while it is being flushed.
702 */
703 xfs_ilock(ip, XFS_ILOCK_EXCL);
704 xfs_iflock(ip);
705
706 /*
707 * In the case of a forced shutdown we rely on xfs_iflush() to
708 * wait for the inode to be unpinned before returning an error.
709 */
710 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
711 /* synchronize with xfs_iflush_done */
712 xfs_iflock(ip);
713 xfs_ifunlock(ip);
714 }
715
716 xfs_iunlock(ip, XFS_ILOCK_EXCL);
717 xfs_ireclaim(ip);
718 return 0;
719}
720
721void 668void
722__xfs_inode_set_reclaim_tag( 669__xfs_inode_set_reclaim_tag(
723 struct xfs_perag *pag, 670 struct xfs_perag *pag,
@@ -737,16 +684,17 @@ void
737xfs_inode_set_reclaim_tag( 684xfs_inode_set_reclaim_tag(
738 xfs_inode_t *ip) 685 xfs_inode_t *ip)
739{ 686{
740 xfs_mount_t *mp = ip->i_mount; 687 struct xfs_mount *mp = ip->i_mount;
741 xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); 688 struct xfs_perag *pag;
742 689
690 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
743 read_lock(&pag->pag_ici_lock); 691 read_lock(&pag->pag_ici_lock);
744 spin_lock(&ip->i_flags_lock); 692 spin_lock(&ip->i_flags_lock);
745 __xfs_inode_set_reclaim_tag(pag, ip); 693 __xfs_inode_set_reclaim_tag(pag, ip);
746 __xfs_iflags_set(ip, XFS_IRECLAIMABLE); 694 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
747 spin_unlock(&ip->i_flags_lock); 695 spin_unlock(&ip->i_flags_lock);
748 read_unlock(&pag->pag_ici_lock); 696 read_unlock(&pag->pag_ici_lock);
749 xfs_put_perag(mp, pag); 697 xfs_perag_put(pag);
750} 698}
751 699
752void 700void
@@ -759,20 +707,145 @@ __xfs_inode_clear_reclaim_tag(
759 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
760} 708}
761 709
710/*
711 * Inodes in different states need to be treated differently, and the return
712 * value of xfs_iflush is not sufficient to get this right. The following table
713 * lists the inode states and the reclaim actions necessary for non-blocking
714 * reclaim:
715 *
716 *
717 * inode state iflush ret required action
718 * --------------- ---------- ---------------
719 * bad - reclaim
720 * shutdown EIO unpin and reclaim
721 * clean, unpinned 0 reclaim
722 * stale, unpinned 0 reclaim
723 * clean, pinned(*) 0 requeue
724 * stale, pinned EAGAIN requeue
725 * dirty, delwri ok 0 requeue
726 * dirty, delwri blocked EAGAIN requeue
727 * dirty, sync flush 0 reclaim
728 *
729 * (*) dgc: I don't think the clean, pinned state is possible but it gets
730 * handled anyway given the order of checks implemented.
731 *
732 * As can be seen from the table, the return value of xfs_iflush() is not
733 * sufficient to correctly decide the reclaim action here. The checks in
734 * xfs_iflush() might look like duplicates, but they are not.
735 *
736 * Also, because we get the flush lock first, we know that any inode that has
737 * been flushed delwri has had the flush completed by the time we check that
738 * the inode is clean. The clean inode check needs to be done before flushing
739 * the inode delwri otherwise we would loop forever requeuing clean inodes as
740 * we cannot tell apart a successful delwri flush and a clean inode from the
741 * return value of xfs_iflush().
742 *
743 * Note that because the inode is flushed delayed write by background
744 * writeback, the flush lock may already be held here and waiting on it can
745 * result in very long latencies. Hence for sync reclaims, where we wait on the
746 * flush lock, the caller should push out delayed write inodes first before
747 * trying to reclaim them to minimise the amount of time spent waiting. For
748 * background relaim, we just requeue the inode for the next pass.
749 *
750 * Hence the order of actions after gaining the locks should be:
751 * bad => reclaim
752 * shutdown => unpin and reclaim
753 * pinned, delwri => requeue
754 * pinned, sync => unpin
755 * stale => reclaim
756 * clean => reclaim
757 * dirty, delwri => flush and requeue
758 * dirty, sync => flush, wait and reclaim
759 */
762STATIC int 760STATIC int
763xfs_reclaim_inode_now( 761xfs_reclaim_inode(
764 struct xfs_inode *ip, 762 struct xfs_inode *ip,
765 struct xfs_perag *pag, 763 struct xfs_perag *pag,
766 int flags) 764 int sync_mode)
767{ 765{
768 /* ignore if already under reclaim */ 766 int error = 0;
769 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 767
770 read_unlock(&pag->pag_ici_lock); 768 /*
769 * The radix tree lock here protects a thread in xfs_iget from racing
770 * with us starting reclaim on the inode. Once we have the
771 * XFS_IRECLAIM flag set it will not touch us.
772 */
773 spin_lock(&ip->i_flags_lock);
774 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
775 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
776 /* ignore as it is already under reclaim */
777 spin_unlock(&ip->i_flags_lock);
778 write_unlock(&pag->pag_ici_lock);
771 return 0; 779 return 0;
772 } 780 }
773 read_unlock(&pag->pag_ici_lock); 781 __xfs_iflags_set(ip, XFS_IRECLAIM);
782 spin_unlock(&ip->i_flags_lock);
783 write_unlock(&pag->pag_ici_lock);
784
785 xfs_ilock(ip, XFS_ILOCK_EXCL);
786 if (!xfs_iflock_nowait(ip)) {
787 if (!(sync_mode & SYNC_WAIT))
788 goto out;
789 xfs_iflock(ip);
790 }
791
792 if (is_bad_inode(VFS_I(ip)))
793 goto reclaim;
794 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
795 xfs_iunpin_wait(ip);
796 goto reclaim;
797 }
798 if (xfs_ipincount(ip)) {
799 if (!(sync_mode & SYNC_WAIT)) {
800 xfs_ifunlock(ip);
801 goto out;
802 }
803 xfs_iunpin_wait(ip);
804 }
805 if (xfs_iflags_test(ip, XFS_ISTALE))
806 goto reclaim;
807 if (xfs_inode_clean(ip))
808 goto reclaim;
809
810 /* Now we have an inode that needs flushing */
811 error = xfs_iflush(ip, sync_mode);
812 if (sync_mode & SYNC_WAIT) {
813 xfs_iflock(ip);
814 goto reclaim;
815 }
816
817 /*
818 * When we have to flush an inode but don't have SYNC_WAIT set, we
819 * flush the inode out using a delwri buffer and wait for the next
820 * call into reclaim to find it in a clean state instead of waiting for
821 * it now. We also don't return errors here - if the error is transient
822 * then the next reclaim pass will flush the inode, and if the error
823 * is permanent then the next sync reclaim will relcaim the inode and
824 * pass on the error.
825 */
826 if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
827 xfs_fs_cmn_err(CE_WARN, ip->i_mount,
828 "inode 0x%llx background reclaim flush failed with %d",
829 (long long)ip->i_ino, error);
830 }
831out:
832 xfs_iflags_clear(ip, XFS_IRECLAIM);
833 xfs_iunlock(ip, XFS_ILOCK_EXCL);
834 /*
835 * We could return EAGAIN here to make reclaim rescan the inode tree in
836 * a short while. However, this just burns CPU time scanning the tree
837 * waiting for IO to complete and xfssyncd never goes back to the idle
838 * state. Instead, return 0 to let the next scheduled background reclaim
839 * attempt to reclaim the inode again.
840 */
841 return 0;
842
843reclaim:
844 xfs_ifunlock(ip);
845 xfs_iunlock(ip, XFS_ILOCK_EXCL);
846 xfs_ireclaim(ip);
847 return error;
774 848
775 return xfs_reclaim_inode(ip, flags);
776} 849}
777 850
778int 851int
@@ -780,6 +853,6 @@ xfs_reclaim_inodes(
780 xfs_mount_t *mp, 853 xfs_mount_t *mp,
781 int mode) 854 int mode)
782{ 855{
783 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 856 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
784 XFS_ICI_RECLAIM_TAG); 857 XFS_ICI_RECLAIM_TAG, 1);
785} 858}