1 files changed, 201 insertions, 128 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3e..a9f6d20aff41 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
         * as the tree is sparse and a gang lookup walks to find
         * the number of objects requested.
         */
-        read_lock(&pag->pag_ici_lock);
        if (tag == XFS_ICI_NO_TAG) {
                nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
                                (void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
                                (void **)&ip, *first_index, 1, tag);
        }
        if (!nr_found)
-                goto unlock;
+                return NULL;
        /*
         * Update the index for the next lookup. Catch overflows
@@ -84,25 +83,20 @@ xfs_inode_ag_lookup(
         */
        *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
        if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-                goto unlock;
+                return NULL;
        return ip;
-unlock:
-        read_unlock(&pag->pag_ici_lock);
-        return NULL;
 }
 STATIC int
 xfs_inode_ag_walk(
        struct xfs_mount        *mp,
-        xfs_agnumber_t          ag,
+        struct xfs_perag        *pag,
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
        int                     flags,
-        int                     tag)
+        int                     tag,
+        int                     exclusive)
 {
-        struct xfs_perag        *pag = &mp->m_perag[ag];
        uint32_t                first_index;
        int                     last_error = 0;
        int                     skipped;
@@ -114,10 +108,20 @@ restart:
                int             error = 0;
                xfs_inode_t     *ip;
+                if (exclusive)
+                        write_lock(&pag->pag_ici_lock);
+                else
+                        read_lock(&pag->pag_ici_lock);
                ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
-                if (!ip)
+                if (!ip) {
+                        if (exclusive)
+                                write_unlock(&pag->pag_ici_lock);
+                        else
+                                read_unlock(&pag->pag_ici_lock);
                        break;
+                }
+                /* execute releases pag->pag_ici_lock */
                error = execute(ip, pag, flags);
                if (error == EAGAIN) {
                        skipped++;
@@ -125,9 +129,8 @@ restart:
                }
                if (error)
                        last_error = error;
-                /*
-                 * bail out if the filesystem is corrupted.
+                /* bail out if the filesystem is corrupted.  */
-                 */
                if (error == EFSCORRUPTED)
                        break;
@@ -137,8 +140,6 @@ restart:
                delay(1);
                goto restart;
        }
-        xfs_put_perag(mp, pag);
        return last_error;
 }
@@ -148,16 +149,24 @@ xfs_inode_ag_iterator(
        int                     (*execute)(struct xfs_inode *ip,
                                           struct xfs_perag *pag, int flags),
        int                     flags,
-        int                     tag)
+        int                     tag,
+        int                     exclusive)
 {
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
        for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-                if (!mp->m_perag[ag].pag_ici_init)
+                struct xfs_perag        *pag;
+                pag = xfs_perag_get(mp, ag);
+                if (!pag->pag_ici_init) {
+                        xfs_perag_put(pag);
                        continue;
-                error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+                }
+                error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
+                                                exclusive);
+                xfs_perag_put(pag);
                if (error) {
                        last_error = error;
                        if (error == EFSCORRUPTED)
@@ -174,30 +183,31 @@ xfs_sync_inode_valid(
        struct xfs_perag        *pag)
 {
        struct inode            *inode = VFS_I(ip);
+        int                     error = EFSCORRUPTED;
        /* nothing to sync during shutdown */
-        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-                read_unlock(&pag->pag_ici_lock);
+                goto out_unlock;
-                return EFSCORRUPTED;
-        }
-        /*
+        /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-         * If we can't get a reference on the inode, it must be in reclaim.
+        error = ENOENT;
-         * Leave it for the reclaim code to flush. Also avoid inodes that
+        if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-         * haven't been fully initialised.
+                goto out_unlock;
-         */
-        if (!igrab(inode)) {
+        /* If we can't grab the inode, it must on it's way to reclaim. */
-                read_unlock(&pag->pag_ici_lock);
+        if (!igrab(inode))
-                return ENOENT;
+                goto out_unlock;
-        }
-        read_unlock(&pag->pag_ici_lock);
-        if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+        if (is_bad_inode(inode)) {
                IRELE(ip);
-                return ENOENT;
+                goto out_unlock;
        }
-        return 0;
+        /* inode is valid */
+        error = 0;
+out_unlock:
+        read_unlock(&pag->pag_ici_lock);
+        return error;
 }
 STATIC int
@@ -224,7 +234,7 @@ xfs_sync_inode_data(
        }
        error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
-                                0 : XFS_B_ASYNC, FI_NONE);
+                                0 : XBF_ASYNC, FI_NONE);
        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 out_wait:
@@ -260,8 +270,7 @@ xfs_sync_inode_attr(
                goto out_unlock;
        }
-        error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
+        error = xfs_iflush(ip, flags);
-                           XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -282,14 +291,11 @@ xfs_sync_data(
        ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
        error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-                                      XFS_ICI_NO_TAG);
+                                      XFS_ICI_NO_TAG, 0);
        if (error)
                return XFS_ERROR(error);
-        xfs_log_force(mp, 0,
+        xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
-                      (flags & SYNC_WAIT) ?
-                       XFS_LOG_FORCE | XFS_LOG_SYNC :
-                       XFS_LOG_FORCE);
        return 0;
 }
@@ -304,7 +310,7 @@ xfs_sync_attr(
        ASSERT((flags & ~SYNC_WAIT) == 0);
        return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-                                     XFS_ICI_NO_TAG);
+                                     XFS_ICI_NO_TAG, 0);
 }
 STATIC int
@@ -315,10 +321,6 @@ xfs_commit_dummy_trans(
        struct xfs_inode        *ip = mp->m_rootip;
        struct xfs_trans        *tp;
        int                     error;
-        int                     log_flags = XFS_LOG_FORCE;
-        if (flags & SYNC_WAIT)
-                log_flags |= XFS_LOG_SYNC;
        /*
         * Put a dummy transaction in the log to tell recovery
@@ -340,11 +342,11 @@ xfs_commit_dummy_trans(
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        /* the log force ensures this transaction is pushed to disk */
-        xfs_log_force(mp, 0, log_flags);
+        xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
        return error;
 }
-int
+STATIC int
 xfs_sync_fsdata(
        struct xfs_mount        *mp,
        int                     flags)
@@ -360,7 +362,7 @@ xfs_sync_fsdata(
        if (flags & SYNC_TRYLOCK) {
                ASSERT(!(flags & SYNC_WAIT));
-                bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
+                bp = xfs_getsb(mp, XBF_TRYLOCK);
                if (!bp)
                        goto out;
@@ -380,7 +382,7 @@ xfs_sync_fsdata(
                 * become pinned in between there and here.
                 */
                if (XFS_BUF_ISPINNED(bp))
-                        xfs_log_force(mp, 0, XFS_LOG_FORCE);
+                        xfs_log_force(mp, 0);
        }
@@ -441,9 +443,6 @@ xfs_quiesce_data(
        xfs_sync_data(mp, SYNC_WAIT);
        xfs_qm_sync(mp, SYNC_WAIT);
-        /* drop inode references pinned by filestreams */
-        xfs_filestream_flush(mp);
        /* write superblock and hoover up shutdown errors */
        error = xfs_sync_fsdata(mp, SYNC_WAIT);
@@ -460,16 +459,18 @@ xfs_quiesce_fs(
 {
        int     count = 0, pincount;
+        xfs_reclaim_inodes(mp, 0);
        xfs_flush_buftarg(mp->m_ddev_targp, 0);
-        xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
        /*
         * This loop must run at least twice.  The first instance of the loop
         * will flush most meta data but that will generate more meta data
         * (typically directory updates).  Which then must be flushed and
-         * logged before we can write the unmount record.
+         * logged before we can write the unmount record. We also so sync
+         * reclaim of inodes to catch any that the above delwri flush skipped.
         */
        do {
+                xfs_reclaim_inodes(mp, SYNC_WAIT);
                xfs_sync_attr(mp, SYNC_WAIT);
                pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
                if (!pincount) {
@@ -568,7 +569,7 @@ xfs_flush_inodes(
        igrab(inode);
        xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
        wait_for_completion(&completion);
-        xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
+        xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
 }
 /*
@@ -584,8 +585,8 @@ xfs_sync_worker(
        int             error;
        if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-                xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
+                xfs_log_force(mp, 0);
-                xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+                xfs_reclaim_inodes(mp, 0);
                /* dgc: errors ignored here */
                error = xfs_qm_sync(mp, SYNC_TRYLOCK);
                error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
@@ -664,60 +665,6 @@ xfs_syncd_stop(
        kthread_stop(mp->m_sync_task);
 }
-STATIC int
-xfs_reclaim_inode(
-        xfs_inode_t     *ip,
-        int             sync_mode)
-{
-        xfs_perag_t     *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
-        /* The hash lock here protects a thread in xfs_iget_core from
-         * racing with us on linking the inode back with a vnode.
-         * Once we have the XFS_IRECLAIM flag set it will not touch
-         * us.
-         */
-        write_lock(&pag->pag_ici_lock);
-        spin_lock(&ip->i_flags_lock);
-        if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
-            !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
-                spin_unlock(&ip->i_flags_lock);
-                write_unlock(&pag->pag_ici_lock);
-                return -EAGAIN;
-        }
-        __xfs_iflags_set(ip, XFS_IRECLAIM);
-        spin_unlock(&ip->i_flags_lock);
-        write_unlock(&pag->pag_ici_lock);
-        xfs_put_perag(ip->i_mount, pag);
-        /*
-         * If the inode is still dirty, then flush it out.  If the inode
-         * is not in the AIL, then it will be OK to flush it delwri as
-         * long as xfs_iflush() does not keep any references to the inode.
-         * We leave that decision up to xfs_iflush() since it has the
-         * knowledge of whether it's OK to simply do a delwri flush of
-         * the inode or whether we need to wait until the inode is
-         * pulled from the AIL.
-         * We get the flush lock regardless, though, just to make sure
-         * we don't free it while it is being flushed.
-         */
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        xfs_iflock(ip);
-        /*
-         * In the case of a forced shutdown we rely on xfs_iflush() to
-         * wait for the inode to be unpinned before returning an error.
-         */
-        if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
-                /* synchronize with xfs_iflush_done */
-                xfs_iflock(ip);
-                xfs_ifunlock(ip);
-        }
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        xfs_ireclaim(ip);
-        return 0;
-}
 void
 __xfs_inode_set_reclaim_tag(
        struct xfs_perag        *pag,
@@ -737,16 +684,17 @@ void
 xfs_inode_set_reclaim_tag(
        xfs_inode_t     *ip)
 {
-        xfs_mount_t     *mp = ip->i_mount;
+        struct xfs_mount *mp = ip->i_mount;
-        xfs_perag_t     *pag = xfs_get_perag(mp, ip->i_ino);
+        struct xfs_perag *pag;
+        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
        read_lock(&pag->pag_ici_lock);
        spin_lock(&ip->i_flags_lock);
        __xfs_inode_set_reclaim_tag(pag, ip);
        __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
        spin_unlock(&ip->i_flags_lock);
        read_unlock(&pag->pag_ici_lock);
-        xfs_put_perag(mp, pag);
+        xfs_perag_put(pag);
 }
 void
@@ -759,20 +707,145 @@ __xfs_inode_clear_reclaim_tag(
                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
 }
+/*
+ * Inodes in different states need to be treated differently, and the return
+ * value of xfs_iflush is not sufficient to get this right. The following table
+ * lists the inode states and the reclaim actions necessary for non-blocking
+ * reclaim:
+ *
+ *
+ *      inode state          iflush ret         required action
+ *      ---------------      ----------         ---------------
+ *      bad                     -               reclaim
+ *      shutdown                EIO             unpin and reclaim
+ *      clean, unpinned         0               reclaim
+ *      stale, unpinned         0               reclaim
+ *      clean, pinned(*)        0               requeue
+ *      stale, pinned           EAGAIN          requeue
+ *      dirty, delwri ok        0               requeue
+ *      dirty, delwri blocked   EAGAIN          requeue
+ *      dirty, sync flush       0               reclaim
+ *
+ * (*) dgc: I don't think the clean, pinned state is possible but it gets
+ * handled anyway given the order of checks implemented.
+ *
+ * As can be seen from the table, the return value of xfs_iflush() is not
+ * sufficient to correctly decide the reclaim action here. The checks in
+ * xfs_iflush() might look like duplicates, but they are not.
+ *
+ * Also, because we get the flush lock first, we know that any inode that has
+ * been flushed delwri has had the flush completed by the time we check that
+ * the inode is clean. The clean inode check needs to be done before flushing
+ * the inode delwri otherwise we would loop forever requeuing clean inodes as
+ * we cannot tell apart a successful delwri flush and a clean inode from the
+ * return value of xfs_iflush().
+ *
+ * Note that because the inode is flushed delayed write by background
+ * writeback, the flush lock may already be held here and waiting on it can
+ * result in very long latencies. Hence for sync reclaims, where we wait on the
+ * flush lock, the caller should push out delayed write inodes first before
+ * trying to reclaim them to minimise the amount of time spent waiting. For
+ * background relaim, we just requeue the inode for the next pass.
+ *
+ * Hence the order of actions after gaining the locks should be:
+ *      bad             => reclaim
+ *      shutdown        => unpin and reclaim
+ *      pinned, delwri  => requeue
+ *      pinned, sync    => unpin
+ *      stale           => reclaim
+ *      clean           => reclaim
+ *      dirty, delwri   => flush and requeue
+ *      dirty, sync     => flush, wait and reclaim
+ */
 STATIC int
-xfs_reclaim_inode_now(
+xfs_reclaim_inode(
        struct xfs_inode        *ip,
        struct xfs_perag        *pag,
-        int                     flags)
+        int                     sync_mode)
 {
-        /* ignore if already under reclaim */
+        int     error = 0;
-        if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
-                read_unlock(&pag->pag_ici_lock);
+        /*
+         * The radix tree lock here protects a thread in xfs_iget from racing
+         * with us starting reclaim on the inode.  Once we have the
+         * XFS_IRECLAIM flag set it will not touch us.
+         */
+        spin_lock(&ip->i_flags_lock);
+        ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+        if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+                /* ignore as it is already under reclaim */
+                spin_unlock(&ip->i_flags_lock);
+                write_unlock(&pag->pag_ici_lock);
                return 0;
        }
-        read_unlock(&pag->pag_ici_lock);
+        __xfs_iflags_set(ip, XFS_IRECLAIM);
+        spin_unlock(&ip->i_flags_lock);
+        write_unlock(&pag->pag_ici_lock);
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        if (!xfs_iflock_nowait(ip)) {
+                if (!(sync_mode & SYNC_WAIT))
+                        goto out;
+                xfs_iflock(ip);
+        }
+        if (is_bad_inode(VFS_I(ip)))
+                goto reclaim;
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                xfs_iunpin_wait(ip);
+                goto reclaim;
+        }
+        if (xfs_ipincount(ip)) {
+                if (!(sync_mode & SYNC_WAIT)) {
+                        xfs_ifunlock(ip);
+                        goto out;
+                }
+                xfs_iunpin_wait(ip);
+        }
+        if (xfs_iflags_test(ip, XFS_ISTALE))
+                goto reclaim;
+        if (xfs_inode_clean(ip))
+                goto reclaim;
+        /* Now we have an inode that needs flushing */
+        error = xfs_iflush(ip, sync_mode);
+        if (sync_mode & SYNC_WAIT) {
+                xfs_iflock(ip);
+                goto reclaim;
+        }
+        /*
+         * When we have to flush an inode but don't have SYNC_WAIT set, we
+         * flush the inode out using a delwri buffer and wait for the next
+         * call into reclaim to find it in a clean state instead of waiting for
+         * it now. We also don't return errors here - if the error is transient
+         * then the next reclaim pass will flush the inode, and if the error
+         * is permanent then the next sync reclaim will relcaim the inode and
+         * pass on the error.
+         */
+        if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+                xfs_fs_cmn_err(CE_WARN, ip->i_mount,
+                        "inode 0x%llx background reclaim flush failed with %d",
+                        (long long)ip->i_ino, error);
+        }
+out:
+        xfs_iflags_clear(ip, XFS_IRECLAIM);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        /*
+         * We could return EAGAIN here to make reclaim rescan the inode tree in
+         * a short while. However, this just burns CPU time scanning the tree
+         * waiting for IO to complete and xfssyncd never goes back to the idle
+         * state. Instead, return 0 to let the next scheduled background reclaim
+         * attempt to reclaim the inode again.
+         */
+        return 0;
+reclaim:
+        xfs_ifunlock(ip);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        xfs_ireclaim(ip);
+        return error;
-        return xfs_reclaim_inode(ip, flags);
 }
 int
@@ -780,6 +853,6 @@ xfs_reclaim_inodes(
        xfs_mount_t     *mp,
        int             mode)
 {
-        return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
+        return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-                                        XFS_ICI_RECLAIM_TAG);
+                                        XFS_ICI_RECLAIM_TAG, 1);
 }

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 6fed97a8cd3e..a9f6d20aff41 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
65	* as the tree is sparse and a gang lookup walks to find	65	* as the tree is sparse and a gang lookup walks to find
66	* the number of objects requested.	66	* the number of objects requested.
67	*/	67	*/
68	read_lock(&pag->pag_ici_lock);
69	if (tag == XFS_ICI_NO_TAG) {	68	if (tag == XFS_ICI_NO_TAG) {
70	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,	69	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
71	(void *)&ip, first_index, 1);	70	(void *)&ip, first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
74	(void *)&ip, first_index, 1, tag);	73	(void *)&ip, first_index, 1, tag);
75	}	74	}
76	if (!nr_found)	75	if (!nr_found)
77	goto unlock;	76	return NULL;
78		77
79	/*	78	/*
80	* Update the index for the next lookup. Catch overflows	79	* Update the index for the next lookup. Catch overflows
@@ -84,25 +83,20 @@ xfs_inode_ag_lookup(
84	*/	83	*/
85	*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);	84	*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
86	if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))	85	if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
87	goto unlock;	86	return NULL;
88
89	return ip;	87	return ip;
90
91	unlock:
92	read_unlock(&pag->pag_ici_lock);
93	return NULL;
94	}	88	}
95		89
96	STATIC int	90	STATIC int
97	xfs_inode_ag_walk(	91	xfs_inode_ag_walk(
98	struct xfs_mount *mp,	92	struct xfs_mount *mp,
99	xfs_agnumber_t ag,	93	struct xfs_perag *pag,
100	int (execute)(struct xfs_inode ip,	94	int (execute)(struct xfs_inode ip,
101	struct xfs_perag *pag, int flags),	95	struct xfs_perag *pag, int flags),
102	int flags,	96	int flags,
103	int tag)	97	int tag,
		98	int exclusive)
104	{	99	{
105	struct xfs_perag *pag = &mp->m_perag[ag];
106	uint32_t first_index;	100	uint32_t first_index;
107	int last_error = 0;	101	int last_error = 0;
108	int skipped;	102	int skipped;
@@ -114,10 +108,20 @@ restart:
114	int error = 0;	108	int error = 0;
115	xfs_inode_t *ip;	109	xfs_inode_t *ip;
116		110
		111	if (exclusive)
		112	write_lock(&pag->pag_ici_lock);
		113	else
		114	read_lock(&pag->pag_ici_lock);
117	ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);	115	ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
118	if (!ip)	116	if (!ip) {
		117	if (exclusive)
		118	write_unlock(&pag->pag_ici_lock);
		119	else
		120	read_unlock(&pag->pag_ici_lock);
119	break;	121	break;
		122	}
120		123
		124	/* execute releases pag->pag_ici_lock */
121	error = execute(ip, pag, flags);	125	error = execute(ip, pag, flags);
122	if (error == EAGAIN) {	126	if (error == EAGAIN) {
123	skipped++;	127	skipped++;
@@ -125,9 +129,8 @@ restart:
125	}	129	}
126	if (error)	130	if (error)
127	last_error = error;	131	last_error = error;
128	/*	132
129	* bail out if the filesystem is corrupted.	133	/* bail out if the filesystem is corrupted. */
130	*/
131	if (error == EFSCORRUPTED)	134	if (error == EFSCORRUPTED)
132	break;	135	break;
133		136
@@ -137,8 +140,6 @@ restart:
137	delay(1);	140	delay(1);
138	goto restart;	141	goto restart;
139	}	142	}
140
141	xfs_put_perag(mp, pag);
142	return last_error;	143	return last_error;
143	}	144	}
144		145
@@ -148,16 +149,24 @@ xfs_inode_ag_iterator(
148	int (execute)(struct xfs_inode ip,	149	int (execute)(struct xfs_inode ip,
149	struct xfs_perag *pag, int flags),	150	struct xfs_perag *pag, int flags),
150	int flags,	151	int flags,
151	int tag)	152	int tag,
		153	int exclusive)
152	{	154	{
153	int error = 0;	155	int error = 0;
154	int last_error = 0;	156	int last_error = 0;
155	xfs_agnumber_t ag;	157	xfs_agnumber_t ag;
156		158
157	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {	159	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
158	if (!mp->m_perag[ag].pag_ici_init)	160	struct xfs_perag *pag;
		161
		162	pag = xfs_perag_get(mp, ag);
		163	if (!pag->pag_ici_init) {
		164	xfs_perag_put(pag);
159	continue;	165	continue;
160	error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);	166	}
		167	error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
		168	exclusive);
		169	xfs_perag_put(pag);
161	if (error) {	170	if (error) {
162	last_error = error;	171	last_error = error;
163	if (error == EFSCORRUPTED)	172	if (error == EFSCORRUPTED)
@@ -174,30 +183,31 @@ xfs_sync_inode_valid(
174	struct xfs_perag *pag)	183	struct xfs_perag *pag)
175	{	184	{
176	struct inode *inode = VFS_I(ip);	185	struct inode *inode = VFS_I(ip);
		186	int error = EFSCORRUPTED;
177		187
178	/* nothing to sync during shutdown */	188	/* nothing to sync during shutdown */
179	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {	189	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
180	read_unlock(&pag->pag_ici_lock);	190	goto out_unlock;
181	return EFSCORRUPTED;
182	}
183		191
184	/*	192	/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
185	* If we can't get a reference on the inode, it must be in reclaim.	193	error = ENOENT;
186	* Leave it for the reclaim code to flush. Also avoid inodes that	194	if (xfs_iflags_test(ip, XFS_INEW \| XFS_IRECLAIMABLE \| XFS_IRECLAIM))
187	* haven't been fully initialised.	195	goto out_unlock;
188	*/	196
189	if (!igrab(inode)) {	197	/* If we can't grab the inode, it must on it's way to reclaim. */
190	read_unlock(&pag->pag_ici_lock);	198	if (!igrab(inode))
191	return ENOENT;	199	goto out_unlock;
192	}
193	read_unlock(&pag->pag_ici_lock);
194		200
195	if (is_bad_inode(inode) \|\| xfs_iflags_test(ip, XFS_INEW)) {	201	if (is_bad_inode(inode)) {
196	IRELE(ip);	202	IRELE(ip);
197	return ENOENT;	203	goto out_unlock;
198	}	204	}
199		205
200	return 0;	206	/* inode is valid */
		207	error = 0;
		208	out_unlock:
		209	read_unlock(&pag->pag_ici_lock);
		210	return error;
201	}	211	}
202		212
203	STATIC int	213	STATIC int
@@ -224,7 +234,7 @@ xfs_sync_inode_data(
224	}	234	}
225		235
226	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?	236	error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
227	0 : XFS_B_ASYNC, FI_NONE);	237	0 : XBF_ASYNC, FI_NONE);
228	xfs_iunlock(ip, XFS_IOLOCK_SHARED);	238	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
229		239
230	out_wait:	240	out_wait:
@@ -260,8 +270,7 @@ xfs_sync_inode_attr(
260	goto out_unlock;	270	goto out_unlock;
261	}	271	}
262		272
263	error = xfs_iflush(ip, (flags & SYNC_WAIT) ?	273	error = xfs_iflush(ip, flags);
264	XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
265		274
266	out_unlock:	275	out_unlock:
267	xfs_iunlock(ip, XFS_ILOCK_SHARED);	276	xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -282,14 +291,11 @@ xfs_sync_data(
282	ASSERT((flags & ~(SYNC_TRYLOCK\|SYNC_WAIT)) == 0);	291	ASSERT((flags & ~(SYNC_TRYLOCK\|SYNC_WAIT)) == 0);
283		292
284	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,	293	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
285	XFS_ICI_NO_TAG);	294	XFS_ICI_NO_TAG, 0);
286	if (error)	295	if (error)
287	return XFS_ERROR(error);	296	return XFS_ERROR(error);
288		297
289	xfs_log_force(mp, 0,	298	xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
290	(flags & SYNC_WAIT) ?
291	XFS_LOG_FORCE \| XFS_LOG_SYNC :
292	XFS_LOG_FORCE);
293	return 0;	299	return 0;
294	}	300	}
295		301
@@ -304,7 +310,7 @@ xfs_sync_attr(
304	ASSERT((flags & ~SYNC_WAIT) == 0);	310	ASSERT((flags & ~SYNC_WAIT) == 0);
305		311
306	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,	312	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
307	XFS_ICI_NO_TAG);	313	XFS_ICI_NO_TAG, 0);
308	}	314	}
309		315
310	STATIC int	316	STATIC int
@@ -315,10 +321,6 @@ xfs_commit_dummy_trans(
315	struct xfs_inode *ip = mp->m_rootip;	321	struct xfs_inode *ip = mp->m_rootip;
316	struct xfs_trans *tp;	322	struct xfs_trans *tp;
317	int error;	323	int error;
318	int log_flags = XFS_LOG_FORCE;
319
320	if (flags & SYNC_WAIT)
321	log_flags \|= XFS_LOG_SYNC;
322		324
323	/*	325	/*
324	* Put a dummy transaction in the log to tell recovery	326	* Put a dummy transaction in the log to tell recovery
@@ -340,11 +342,11 @@ xfs_commit_dummy_trans(
340	xfs_iunlock(ip, XFS_ILOCK_EXCL);	342	xfs_iunlock(ip, XFS_ILOCK_EXCL);
341		343
342	/* the log force ensures this transaction is pushed to disk */	344	/* the log force ensures this transaction is pushed to disk */
343	xfs_log_force(mp, 0, log_flags);	345	xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
344	return error;	346	return error;
345	}	347	}
346		348
347	int	349	STATIC int
348	xfs_sync_fsdata(	350	xfs_sync_fsdata(
349	struct xfs_mount *mp,	351	struct xfs_mount *mp,
350	int flags)	352	int flags)
@@ -360,7 +362,7 @@ xfs_sync_fsdata(
360	if (flags & SYNC_TRYLOCK) {	362	if (flags & SYNC_TRYLOCK) {
361	ASSERT(!(flags & SYNC_WAIT));	363	ASSERT(!(flags & SYNC_WAIT));
362		364
363	bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);	365	bp = xfs_getsb(mp, XBF_TRYLOCK);
364	if (!bp)	366	if (!bp)
365	goto out;	367	goto out;
366		368
@@ -380,7 +382,7 @@ xfs_sync_fsdata(
380	* become pinned in between there and here.	382	* become pinned in between there and here.
381	*/	383	*/
382	if (XFS_BUF_ISPINNED(bp))	384	if (XFS_BUF_ISPINNED(bp))
383	xfs_log_force(mp, 0, XFS_LOG_FORCE);	385	xfs_log_force(mp, 0);
384	}	386	}
385		387
386		388
@@ -441,9 +443,6 @@ xfs_quiesce_data(
441	xfs_sync_data(mp, SYNC_WAIT);	443	xfs_sync_data(mp, SYNC_WAIT);
442	xfs_qm_sync(mp, SYNC_WAIT);	444	xfs_qm_sync(mp, SYNC_WAIT);
443		445
444	/* drop inode references pinned by filestreams */
445	xfs_filestream_flush(mp);
446
447	/* write superblock and hoover up shutdown errors */	446	/* write superblock and hoover up shutdown errors */
448	error = xfs_sync_fsdata(mp, SYNC_WAIT);	447	error = xfs_sync_fsdata(mp, SYNC_WAIT);
449		448
@@ -460,16 +459,18 @@ xfs_quiesce_fs(
460	{	459	{
461	int count = 0, pincount;	460	int count = 0, pincount;
462		461
		462	xfs_reclaim_inodes(mp, 0);
463	xfs_flush_buftarg(mp->m_ddev_targp, 0);	463	xfs_flush_buftarg(mp->m_ddev_targp, 0);
464	xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
465		464
466	/*	465	/*
467	* This loop must run at least twice. The first instance of the loop	466	* This loop must run at least twice. The first instance of the loop
468	* will flush most meta data but that will generate more meta data	467	* will flush most meta data but that will generate more meta data
469	* (typically directory updates). Which then must be flushed and	468	* (typically directory updates). Which then must be flushed and
470	* logged before we can write the unmount record.	469	* logged before we can write the unmount record. We also so sync
		470	* reclaim of inodes to catch any that the above delwri flush skipped.
471	*/	471	*/
472	do {	472	do {
		473	xfs_reclaim_inodes(mp, SYNC_WAIT);
473	xfs_sync_attr(mp, SYNC_WAIT);	474	xfs_sync_attr(mp, SYNC_WAIT);
474	pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);	475	pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
475	if (!pincount) {	476	if (!pincount) {
@@ -568,7 +569,7 @@ xfs_flush_inodes(
568	igrab(inode);	569	igrab(inode);
569	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);	570	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
570	wait_for_completion(&completion);	571	wait_for_completion(&completion);
571	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE\|XFS_LOG_SYNC);	572	xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
572	}	573	}
573		574
574	/*	575	/*
@@ -584,8 +585,8 @@ xfs_sync_worker(
584	int error;	585	int error;
585		586
586	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {	587	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
587	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);	588	xfs_log_force(mp, 0);
588	xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);	589	xfs_reclaim_inodes(mp, 0);
589	/* dgc: errors ignored here */	590	/* dgc: errors ignored here */
590	error = xfs_qm_sync(mp, SYNC_TRYLOCK);	591	error = xfs_qm_sync(mp, SYNC_TRYLOCK);
591	error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);	592	error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
@@ -664,60 +665,6 @@ xfs_syncd_stop(
664	kthread_stop(mp->m_sync_task);	665	kthread_stop(mp->m_sync_task);
665	}	666	}
666		667
667	STATIC int
668	xfs_reclaim_inode(
669	xfs_inode_t *ip,
670	int sync_mode)
671	{
672	xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674	/* The hash lock here protects a thread in xfs_iget_core from
675	* racing with us on linking the inode back with a vnode.
676	* Once we have the XFS_IRECLAIM flag set it will not touch
677	* us.
678	*/
679	write_lock(&pag->pag_ici_lock);
680	spin_lock(&ip->i_flags_lock);
681	if (__xfs_iflags_test(ip, XFS_IRECLAIM) \|\|
682	!__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683	spin_unlock(&ip->i_flags_lock);
684	write_unlock(&pag->pag_ici_lock);
685	return -EAGAIN;
686	}
687	__xfs_iflags_set(ip, XFS_IRECLAIM);
688	spin_unlock(&ip->i_flags_lock);
689	write_unlock(&pag->pag_ici_lock);
690	xfs_put_perag(ip->i_mount, pag);
691
692	/*
693	* If the inode is still dirty, then flush it out. If the inode
694	* is not in the AIL, then it will be OK to flush it delwri as
695	* long as xfs_iflush() does not keep any references to the inode.
696	* We leave that decision up to xfs_iflush() since it has the
697	* knowledge of whether it's OK to simply do a delwri flush of
698	* the inode or whether we need to wait until the inode is
699	* pulled from the AIL.
700	* We get the flush lock regardless, though, just to make sure
701	* we don't free it while it is being flushed.
702	*/
703	xfs_ilock(ip, XFS_ILOCK_EXCL);
704	xfs_iflock(ip);
705
706	/*
707	* In the case of a forced shutdown we rely on xfs_iflush() to
708	* wait for the inode to be unpinned before returning an error.
709	*/
710	if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
711	/* synchronize with xfs_iflush_done */
712	xfs_iflock(ip);
713	xfs_ifunlock(ip);
714	}
715
716	xfs_iunlock(ip, XFS_ILOCK_EXCL);
717	xfs_ireclaim(ip);
718	return 0;
719	}
720
721	void	668	void
722	__xfs_inode_set_reclaim_tag(	669	__xfs_inode_set_reclaim_tag(
723	struct xfs_perag *pag,	670	struct xfs_perag *pag,
@@ -737,16 +684,17 @@ void
737	xfs_inode_set_reclaim_tag(	684	xfs_inode_set_reclaim_tag(
738	xfs_inode_t *ip)	685	xfs_inode_t *ip)
739	{	686	{
740	xfs_mount_t *mp = ip->i_mount;	687	struct xfs_mount *mp = ip->i_mount;
741	xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);	688	struct xfs_perag *pag;
742		689
		690	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
743	read_lock(&pag->pag_ici_lock);	691	read_lock(&pag->pag_ici_lock);
744	spin_lock(&ip->i_flags_lock);	692	spin_lock(&ip->i_flags_lock);
745	__xfs_inode_set_reclaim_tag(pag, ip);	693	__xfs_inode_set_reclaim_tag(pag, ip);
746	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);	694	__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
747	spin_unlock(&ip->i_flags_lock);	695	spin_unlock(&ip->i_flags_lock);
748	read_unlock(&pag->pag_ici_lock);	696	read_unlock(&pag->pag_ici_lock);
749	xfs_put_perag(mp, pag);	697	xfs_perag_put(pag);
750	}	698	}
751		699
752	void	700	void
@@ -759,20 +707,145 @@ __xfs_inode_clear_reclaim_tag(
759	XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);	707	XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
760	}	708	}
761		709
		710	/*
		711	* Inodes in different states need to be treated differently, and the return
		712	* value of xfs_iflush is not sufficient to get this right. The following table
		713	* lists the inode states and the reclaim actions necessary for non-blocking
		714	* reclaim:
		715	*
		716	*
		717	* inode state iflush ret required action
		718	* --------------- ---------- ---------------
		719	* bad - reclaim
		720	* shutdown EIO unpin and reclaim
		721	* clean, unpinned 0 reclaim
		722	* stale, unpinned 0 reclaim
		723	* clean, pinned(*) 0 requeue
		724	* stale, pinned EAGAIN requeue
		725	* dirty, delwri ok 0 requeue
		726	* dirty, delwri blocked EAGAIN requeue
		727	* dirty, sync flush 0 reclaim
		728	*
		729	* (*) dgc: I don't think the clean, pinned state is possible but it gets
		730	* handled anyway given the order of checks implemented.
		731	*
		732	* As can be seen from the table, the return value of xfs_iflush() is not
		733	* sufficient to correctly decide the reclaim action here. The checks in
		734	* xfs_iflush() might look like duplicates, but they are not.
		735	*
		736	* Also, because we get the flush lock first, we know that any inode that has
		737	* been flushed delwri has had the flush completed by the time we check that
		738	* the inode is clean. The clean inode check needs to be done before flushing
		739	* the inode delwri otherwise we would loop forever requeuing clean inodes as
		740	* we cannot tell apart a successful delwri flush and a clean inode from the
		741	* return value of xfs_iflush().
		742	*
		743	* Note that because the inode is flushed delayed write by background
		744	* writeback, the flush lock may already be held here and waiting on it can
		745	* result in very long latencies. Hence for sync reclaims, where we wait on the
		746	* flush lock, the caller should push out delayed write inodes first before
		747	* trying to reclaim them to minimise the amount of time spent waiting. For
		748	* background relaim, we just requeue the inode for the next pass.
		749	*
		750	* Hence the order of actions after gaining the locks should be:
		751	* bad => reclaim
		752	* shutdown => unpin and reclaim
		753	* pinned, delwri => requeue
		754	* pinned, sync => unpin
		755	* stale => reclaim
		756	* clean => reclaim
		757	* dirty, delwri => flush and requeue
		758	* dirty, sync => flush, wait and reclaim
		759	*/
762	STATIC int	760	STATIC int
763	xfs_reclaim_inode_now(	761	xfs_reclaim_inode(
764	struct xfs_inode *ip,	762	struct xfs_inode *ip,
765	struct xfs_perag *pag,	763	struct xfs_perag *pag,
766	int flags)	764	int sync_mode)
767	{	765	{
768	/* ignore if already under reclaim */	766	int error = 0;
769	if (xfs_iflags_test(ip, XFS_IRECLAIM)) {	767
770	read_unlock(&pag->pag_ici_lock);	768	/*
		769	* The radix tree lock here protects a thread in xfs_iget from racing
		770	* with us starting reclaim on the inode. Once we have the
		771	* XFS_IRECLAIM flag set it will not touch us.
		772	*/
		773	spin_lock(&ip->i_flags_lock);
		774	ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
		775	if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
		776	/* ignore as it is already under reclaim */
		777	spin_unlock(&ip->i_flags_lock);
		778	write_unlock(&pag->pag_ici_lock);
771	return 0;	779	return 0;
772	}	780	}
773	read_unlock(&pag->pag_ici_lock);	781	__xfs_iflags_set(ip, XFS_IRECLAIM);
		782	spin_unlock(&ip->i_flags_lock);
		783	write_unlock(&pag->pag_ici_lock);
		784
		785	xfs_ilock(ip, XFS_ILOCK_EXCL);
		786	if (!xfs_iflock_nowait(ip)) {
		787	if (!(sync_mode & SYNC_WAIT))
		788	goto out;
		789	xfs_iflock(ip);
		790	}
		791
		792	if (is_bad_inode(VFS_I(ip)))
		793	goto reclaim;
		794	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		795	xfs_iunpin_wait(ip);
		796	goto reclaim;
		797	}
		798	if (xfs_ipincount(ip)) {
		799	if (!(sync_mode & SYNC_WAIT)) {
		800	xfs_ifunlock(ip);
		801	goto out;
		802	}
		803	xfs_iunpin_wait(ip);
		804	}
		805	if (xfs_iflags_test(ip, XFS_ISTALE))
		806	goto reclaim;
		807	if (xfs_inode_clean(ip))
		808	goto reclaim;
		809
		810	/* Now we have an inode that needs flushing */
		811	error = xfs_iflush(ip, sync_mode);
		812	if (sync_mode & SYNC_WAIT) {
		813	xfs_iflock(ip);
		814	goto reclaim;
		815	}
		816
		817	/*
		818	* When we have to flush an inode but don't have SYNC_WAIT set, we
		819	* flush the inode out using a delwri buffer and wait for the next
		820	* call into reclaim to find it in a clean state instead of waiting for
		821	* it now. We also don't return errors here - if the error is transient
		822	* then the next reclaim pass will flush the inode, and if the error
		823	* is permanent then the next sync reclaim will relcaim the inode and
		824	* pass on the error.
		825	*/
		826	if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		827	xfs_fs_cmn_err(CE_WARN, ip->i_mount,
		828	"inode 0x%llx background reclaim flush failed with %d",
		829	(long long)ip->i_ino, error);
		830	}
		831	out:
		832	xfs_iflags_clear(ip, XFS_IRECLAIM);
		833	xfs_iunlock(ip, XFS_ILOCK_EXCL);
		834	/*
		835	* We could return EAGAIN here to make reclaim rescan the inode tree in
		836	* a short while. However, this just burns CPU time scanning the tree
		837	* waiting for IO to complete and xfssyncd never goes back to the idle
		838	* state. Instead, return 0 to let the next scheduled background reclaim
		839	* attempt to reclaim the inode again.
		840	*/
		841	return 0;
		842
		843	reclaim:
		844	xfs_ifunlock(ip);
		845	xfs_iunlock(ip, XFS_ILOCK_EXCL);
		846	xfs_ireclaim(ip);
		847	return error;
774		848
775	return xfs_reclaim_inode(ip, flags);
776	}	849	}
777		850
778	int	851	int
@@ -780,6 +853,6 @@ xfs_reclaim_inodes(
780	xfs_mount_t *mp,	853	xfs_mount_t *mp,
781	int mode)	854	int mode)
782	{	855	{
783	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,	856	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
784	XFS_ICI_RECLAIM_TAG);	857	XFS_ICI_RECLAIM_TAG, 1);
785	}	858	}