Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6: [XFS] Avoid replaying inode buffer initialisation log items if on-disk version is newer. [XFS] Ensure file size updates have been completed before writing inode to disk. [XFS] On-demand reaping of the MRU cache
author: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-09-19 14:40:13 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-09-19 14:40:13 -0400
commit: a78feb7c8a0687913f9869c05811054595a42a76 (patch)
tree: b89ceba977943e58475e28cf6acd6377a8d84320 /fs
parent: 91fe7d7cdd7ebb0b6c01f201a23824ab5b466ada (diff)
parent: b394e43e995d08821588a22561c6a71a63b4ff27 (diff)
9 files changed, 101 insertions, 62 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d9c40fe64195..5f152f60d74d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
                ip->i_d.di_size = isize;
                ip->i_update_core = 1;
                ip->i_update_size = 1;
+                mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
        }
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4528f9a3f304..491d1f4f202d 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
        if (vp) {
                vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
-                if (sync)
+                if (sync) {
+                        filemap_fdatawait(inode->i_mapping);
                        flags |= FLUSH_SYNC;
+                }
                error = bhv_vop_iflush(vp, flags);
                if (error == EAGAIN)
                        error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index d7e136143066..fa25b7dcc6c3 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
 #define XFS_BLI_UDQUOT_BUF      0x4
 #define XFS_BLI_PDQUOT_BUF      0x8
 #define XFS_BLI_GDQUOT_BUF      0x10
+/*
+ * This flag indicates that the buffer contains newly allocated
+ * inodes.
+ */
+#define XFS_BLI_INODE_NEW_BUF   0x20
 #define XFS_BLI_CHUNK           128
 #define XFS_BLI_SHIFT           7
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index ce2278611bb7..16f8e175167d 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -467,8 +467,7 @@ void
 xfs_filestream_flush(
        xfs_mount_t     *mp)
 {
-        /* point in time flush, so keep the reaper running */
+        xfs_mru_cache_flush(mp->m_filestream);
-        xfs_mru_cache_flush(mp->m_filestream, 1);
 }
 /*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ae6e8e5f3db..dacb19739cc2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
 /*ARGSUSED*/
 STATIC void
 xlog_recover_do_reg_buffer(
+        xfs_mount_t             *mp,
        xlog_recover_item_t     *item,
        xfs_buf_t               *bp,
        xfs_buf_log_format_t    *buf_f)
@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
        unsigned int            *data_map = NULL;
        unsigned int            map_size = 0;
        int                     error;
+        int                     stale_buf = 1;
+        /*
+         * Scan through the on-disk inode buffer and attempt to
+         * determine if it has been written to since it was logged.
+         *
+         * - If any of the magic numbers are incorrect then the buffer is stale
+         * - If any of the modes are non-zero then the buffer is not stale
+         * - If all of the modes are zero and at least one of the generation
+         *   counts is non-zero then the buffer is stale
+         *
+         * If the end result is a stale buffer then the log buffer is replayed
+         * otherwise it is skipped.
+         *
+         * This heuristic is not perfect.  It can be improved by scanning the
+         * entire inode chunk for evidence that any of the inode clusters have
+         * been updated.  To fix this problem completely we will need a major
+         * architectural change to the logging system.
+         */
+        if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
+                xfs_dinode_t    *dip;
+                int             inodes_per_buf;
+                int             mode_count = 0;
+                int             gen_count = 0;
+                stale_buf = 0;
+                inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
+                for (i = 0; i < inodes_per_buf; i++) {
+                        dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+                                i * mp->m_sb.sb_inodesize);
+                        if (be16_to_cpu(dip->di_core.di_magic) !=
+                                        XFS_DINODE_MAGIC) {
+                                stale_buf = 1;
+                                break;
+                        }
+                        if (be16_to_cpu(dip->di_core.di_mode))
+                                mode_count++;
+                        if (be16_to_cpu(dip->di_core.di_gen))
+                                gen_count++;
+                }
+                if (!mode_count && gen_count)
+                        stale_buf = 1;
+        }
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
                                               -1, 0, XFS_QMOPT_DOWARN,
                                               "dquot_buf_recover");
                }
-                if (!error)
+                if (!error && stale_buf)
                        memcpy(xfs_buf_offset(bp,
                                (uint)bit << XFS_BLI_SHIFT),    /* dest */
                                item->ri_buf[i].i_addr,         /* source */
@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
        if (log->l_quotaoffs_flag & type)
                return;
-        xlog_recover_do_reg_buffer(item, bp, buf_f);
+        xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 }
 /*
@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
                  (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
                xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
        } else {
-                xlog_recover_do_reg_buffer(item, bp, buf_f);
+                xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
        }
        if (error)
                return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 7deb9e3cbbd3..e0b358c1c533 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
         */
        if (!_xfs_mru_cache_migrate(mru, now)) {
                mru->time_zero = now;
-                if (!mru->next_reap)
+                if (!mru->queued) {
-                        mru->next_reap = mru->grp_count * mru->grp_time;
+                        mru->queued = 1;
+                        queue_delayed_work(xfs_mru_reap_wq, &mru->work,
+                                           mru->grp_count * mru->grp_time);
+                }
        } else {
                grp = (now - mru->time_zero) / mru->grp_time;
                grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
        struct work_struct      *work)
 {
        xfs_mru_cache_t         *mru = container_of(work, xfs_mru_cache_t, work.work);
-        unsigned long           now;
+        unsigned long           now, next;
        ASSERT(mru && mru->lists);
        if (!mru || !mru->lists)
                return;
        mutex_spinlock(&mru->lock);
-        now = jiffies;
+        next = _xfs_mru_cache_migrate(mru, jiffies);
-        if (mru->reap_all ||
+        _xfs_mru_cache_clear_reap_list(mru);
-            (mru->next_reap && time_after(now, mru->next_reap))) {
-                if (mru->reap_all)
+        mru->queued = next;
-                        now += mru->grp_count * mru->grp_time * 2;
+        if ((mru->queued > 0)) {
-                mru->next_reap = _xfs_mru_cache_migrate(mru, now);
+                now = jiffies;
-                _xfs_mru_cache_clear_reap_list(mru);
+                if (next <= now)
+                        next = 0;
+                else
+                        next -= now;
+                queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
        }
-        /*
-         * the process that triggered the reap_all is responsible
-         * for restating the periodic reap if it is required.
-         */
-        if (!mru->reap_all)
-                queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
-        mru->reap_all = 0;
        mutex_spinunlock(&mru->lock, 0);
 }
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
        /* An extra list is needed to avoid reaping up to a grp_time early. */
        mru->grp_count = grp_count + 1;
-        mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+        mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
        if (!mru->lists) {
                err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
        mru->grp_time  = grp_time;
        mru->free_func = free_func;
-        /* start up the reaper event */
-        mru->next_reap = 0;
-        mru->reap_all = 0;
-        queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
        *mrup = mru;
 exit:
@@ -394,35 +389,25 @@ exit:
 * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
 * free functions as they're deleted.  When this function returns, the caller is
 * guaranteed that all the free functions for all the elements have finished
- * executing.
+ * executing and the reaper is not running.
- *
- * While we are flushing, we stop the periodic reaper event from triggering.
- * Normally, we want to restart this periodic event, but if we are shutting
- * down the cache we do not want it restarted. hence the restart parameter
- * where 0 = do not restart reaper and 1 = restart reaper.
 */
 void
 xfs_mru_cache_flush(
-        xfs_mru_cache_t         *mru,
+        xfs_mru_cache_t         *mru)
-        int                     restart)
 {
        if (!mru || !mru->lists)
                return;
-        cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
        mutex_spinlock(&mru->lock);
-        mru->reap_all = 1;
+        if (mru->queued) {
-        mutex_spinunlock(&mru->lock, 0);
+                mutex_spinunlock(&mru->lock, 0);
+                cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+                mutex_spinlock(&mru->lock);
+        }
-        queue_work(xfs_mru_reap_wq, &mru->work.work);
+        _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
-        flush_workqueue(xfs_mru_reap_wq);
+        _xfs_mru_cache_clear_reap_list(mru);
-        mutex_spinlock(&mru->lock);
-        WARN_ON_ONCE(mru->reap_all != 0);
-        mru->reap_all = 0;
-        if (restart)
-                queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
        mutex_spinunlock(&mru->lock, 0);
 }
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
        if (!mru || !mru->lists)
                return;
-        /* we don't want the reaper to restart here */
+        xfs_mru_cache_flush(mru);
-        xfs_mru_cache_flush(mru, 0);
        kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
        kmem_free(mru, sizeof(*mru));
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 624fd10ee8e5..dd58ea1bbebe 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
        unsigned int            grp_time;  /* Time period spanned by grps.  */
        unsigned int            lru_grp;   /* Group containing time zero.   */
        unsigned long           time_zero; /* Time first element was added. */
-        unsigned long           next_reap; /* Time that the reaper should
-                                              next do something. */
-        unsigned int            reap_all;  /* if set, reap all lists */
        xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
        struct delayed_work     work;      /* Workqueue data for reaping.   */
+        unsigned int            queued;    /* work has been queued */
 } xfs_mru_cache_t;
 int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
 int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
                             unsigned int grp_count,
                             xfs_mru_cache_free_func_t free_func);
-void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
+void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
                                void *value);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..95fff6872a2f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
        ASSERT(atomic_read(&bip->bli_refcount) > 0);
        bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+        bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
 }
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1a5ad8cd97b0..603459229904 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return XFS_ERROR(EIO);
+        if (flag & FSYNC_DATA)
+                filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
        /*
         * We always need to make sure that the required inode state
         * is safe on disk.  The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
                        sync_lsn = log->l_last_sync_lsn;
                        GRANT_UNLOCK(log, s);
-                        if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0))
+                        if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
-                                return 0;
+                                if (flags & FLUSH_SYNC)
+                                        log_flags |= XFS_LOG_SYNC;
+                                error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
+                                if (error)
+                                        return error;
+                        }
-                        if (flags & FLUSH_SYNC)
+                        if (ip->i_update_core == 0)
-                                log_flags |= XFS_LOG_SYNC;
+                                return 0;
-                        return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
                }
        }
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
        if (flags & FLUSH_INODE) {
                int     flush_flags;
-                if (xfs_ipincount(ip))
-                        return EAGAIN;
                if (flags & FLUSH_SYNC) {
                        xfs_ilock(ip, XFS_ILOCK_SHARED);
                        xfs_iflock(ip);
author	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-09-19 14:40:13 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-09-19 14:40:13 -0400
commit	a78feb7c8a0687913f9869c05811054595a42a76 (patch)
tree	b89ceba977943e58475e28cf6acd6377a8d84320 /fs
parent	91fe7d7cdd7ebb0b6c01f201a23824ab5b466ada (diff)
parent	b394e43e995d08821588a22561c6a71a63b4ff27 (diff)

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d9c40fe64195..5f152f60d74d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
181	ip->i_d.di_size = isize;	181	ip->i_d.di_size = isize;
182	ip->i_update_core = 1;	182	ip->i_update_core = 1;
183	ip->i_update_size = 1;	183	ip->i_update_size = 1;
		184	mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
184	}	185	}
185		186
186	xfs_iunlock(ip, XFS_ILOCK_EXCL);	187	xfs_iunlock(ip, XFS_ILOCK_EXCL);


diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4528f9a3f304..491d1f4f202d 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
415		415
416	if (vp) {	416	if (vp) {
417	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);	417	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
418	if (sync)	418	if (sync) {
		419	filemap_fdatawait(inode->i_mapping);
419	flags \|= FLUSH_SYNC;	420	flags \|= FLUSH_SYNC;
		421	}
420	error = bhv_vop_iflush(vp, flags);	422	error = bhv_vop_iflush(vp, flags);
421	if (error == EAGAIN)	423	if (error == EAGAIN)
422	error = sync? bhv_vop_iflush(vp, flags \| FLUSH_LOG) : 0;	424	error = sync? bhv_vop_iflush(vp, flags \| FLUSH_LOG) : 0;


diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index d7e136143066..fa25b7dcc6c3 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
52	#define XFS_BLI_UDQUOT_BUF 0x4	52	#define XFS_BLI_UDQUOT_BUF 0x4
53	#define XFS_BLI_PDQUOT_BUF 0x8	53	#define XFS_BLI_PDQUOT_BUF 0x8
54	#define XFS_BLI_GDQUOT_BUF 0x10	54	#define XFS_BLI_GDQUOT_BUF 0x10
		55	/*
		56	* This flag indicates that the buffer contains newly allocated
		57	* inodes.
		58	*/
		59	#define XFS_BLI_INODE_NEW_BUF 0x20
55		60
56	#define XFS_BLI_CHUNK 128	61	#define XFS_BLI_CHUNK 128
57	#define XFS_BLI_SHIFT 7	62	#define XFS_BLI_SHIFT 7


diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index ce2278611bb7..16f8e175167d 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c
@@ -467,8 +467,7 @@ void
467	xfs_filestream_flush(	467	xfs_filestream_flush(
468	xfs_mount_t *mp)	468	xfs_mount_t *mp)
469	{	469	{
470	/* point in time flush, so keep the reaper running */	470	xfs_mru_cache_flush(mp->m_filestream);
471	xfs_mru_cache_flush(mp->m_filestream, 1);
472	}	471	}
473		472
474	/*	473	/*


diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8ae6e8e5f3db..dacb19739cc2 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c
@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
1874	/ARGSUSED/	1874	/ARGSUSED/
1875	STATIC void	1875	STATIC void
1876	xlog_recover_do_reg_buffer(	1876	xlog_recover_do_reg_buffer(
		1877	xfs_mount_t *mp,
1877	xlog_recover_item_t *item,	1878	xlog_recover_item_t *item,
1878	xfs_buf_t *bp,	1879	xfs_buf_t *bp,
1879	xfs_buf_log_format_t *buf_f)	1880	xfs_buf_log_format_t *buf_f)
@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
1884	unsigned int *data_map = NULL;	1885	unsigned int *data_map = NULL;
1885	unsigned int map_size = 0;	1886	unsigned int map_size = 0;
1886	int error;	1887	int error;
		1888	int stale_buf = 1;
		1889
		1890	/*
		1891	* Scan through the on-disk inode buffer and attempt to
		1892	* determine if it has been written to since it was logged.
		1893	*
		1894	* - If any of the magic numbers are incorrect then the buffer is stale
		1895	* - If any of the modes are non-zero then the buffer is not stale
		1896	* - If all of the modes are zero and at least one of the generation
		1897	* counts is non-zero then the buffer is stale
		1898	*
		1899	* If the end result is a stale buffer then the log buffer is replayed
		1900	* otherwise it is skipped.
		1901	*
		1902	* This heuristic is not perfect. It can be improved by scanning the
		1903	* entire inode chunk for evidence that any of the inode clusters have
		1904	* been updated. To fix this problem completely we will need a major
		1905	* architectural change to the logging system.
		1906	*/
		1907	if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
		1908	xfs_dinode_t *dip;
		1909	int inodes_per_buf;
		1910	int mode_count = 0;
		1911	int gen_count = 0;
		1912
		1913	stale_buf = 0;
		1914	inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
		1915	for (i = 0; i < inodes_per_buf; i++) {
		1916	dip = (xfs_dinode_t *)xfs_buf_offset(bp,
		1917	i * mp->m_sb.sb_inodesize);
		1918	if (be16_to_cpu(dip->di_core.di_magic) !=
		1919	XFS_DINODE_MAGIC) {
		1920	stale_buf = 1;
		1921	break;
		1922	}
		1923	if (be16_to_cpu(dip->di_core.di_mode))
		1924	mode_count++;
		1925	if (be16_to_cpu(dip->di_core.di_gen))
		1926	gen_count++;
		1927	}
		1928
		1929	if (!mode_count && gen_count)
		1930	stale_buf = 1;
		1931	}
1887		1932
1888	switch (buf_f->blf_type) {	1933	switch (buf_f->blf_type) {
1889	case XFS_LI_BUF:	1934	case XFS_LI_BUF:
@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
1917	-1, 0, XFS_QMOPT_DOWARN,	1962	-1, 0, XFS_QMOPT_DOWARN,
1918	"dquot_buf_recover");	1963	"dquot_buf_recover");
1919	}	1964	}
1920	if (!error)	1965	if (!error && stale_buf)
1921	memcpy(xfs_buf_offset(bp,	1966	memcpy(xfs_buf_offset(bp,
1922	(uint)bit << XFS_BLI_SHIFT), /* dest */	1967	(uint)bit << XFS_BLI_SHIFT), /* dest */
1923	item->ri_buf[i].i_addr, /* source */	1968	item->ri_buf[i].i_addr, /* source */
@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
2089	if (log->l_quotaoffs_flag & type)	2134	if (log->l_quotaoffs_flag & type)
2090	return;	2135	return;
2091		2136
2092	xlog_recover_do_reg_buffer(item, bp, buf_f);	2137	xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2093	}	2138	}
2094		2139
2095	/*	2140	/*
@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
2190	(XFS_BLI_UDQUOT_BUF\|XFS_BLI_PDQUOT_BUF\|XFS_BLI_GDQUOT_BUF)) {	2235	(XFS_BLI_UDQUOT_BUF\|XFS_BLI_PDQUOT_BUF\|XFS_BLI_GDQUOT_BUF)) {
2191	xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);	2236	xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2192	} else {	2237	} else {
2193	xlog_recover_do_reg_buffer(item, bp, buf_f);	2238	xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2194	}	2239	}
2195	if (error)	2240	if (error)
2196	return XFS_ERROR(error);	2241	return XFS_ERROR(error);


diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 7deb9e3cbbd3..e0b358c1c533 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
206	*/	206	*/
207	if (!_xfs_mru_cache_migrate(mru, now)) {	207	if (!_xfs_mru_cache_migrate(mru, now)) {
208	mru->time_zero = now;	208	mru->time_zero = now;
209	if (!mru->next_reap)	209	if (!mru->queued) {
210	mru->next_reap = mru->grp_count * mru->grp_time;	210	mru->queued = 1;
		211	queue_delayed_work(xfs_mru_reap_wq, &mru->work,
		212	mru->grp_count * mru->grp_time);
		213	}
211	} else {	214	} else {
212	grp = (now - mru->time_zero) / mru->grp_time;	215	grp = (now - mru->time_zero) / mru->grp_time;
213	grp = (mru->lru_grp + grp) % mru->grp_count;	216	grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
271	struct work_struct *work)	274	struct work_struct *work)
272	{	275	{
273	xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);	276	xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
274	unsigned long now;	277	unsigned long now, next;
275		278
276	ASSERT(mru && mru->lists);	279	ASSERT(mru && mru->lists);
277	if (!mru \|\| !mru->lists)	280	if (!mru \|\| !mru->lists)
278	return;	281	return;
279		282
280	mutex_spinlock(&mru->lock);	283	mutex_spinlock(&mru->lock);
281	now = jiffies;	284	next = _xfs_mru_cache_migrate(mru, jiffies);
282	if (mru->reap_all \|\|	285	_xfs_mru_cache_clear_reap_list(mru);
283	(mru->next_reap && time_after(now, mru->next_reap))) {	286
284	if (mru->reap_all)	287	mru->queued = next;
285	now += mru->grp_count * mru->grp_time * 2;	288	if ((mru->queued > 0)) {
286	mru->next_reap = _xfs_mru_cache_migrate(mru, now);	289	now = jiffies;
287	_xfs_mru_cache_clear_reap_list(mru);	290	if (next <= now)
		291	next = 0;
		292	else
		293	next -= now;
		294	queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
288	}	295	}
289		296
290	/*
291	* the process that triggered the reap_all is responsible
292	* for restating the periodic reap if it is required.
293	*/
294	if (!mru->reap_all)
295	queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
296	mru->reap_all = 0;
297	mutex_spinunlock(&mru->lock, 0);	297	mutex_spinunlock(&mru->lock, 0);
298	}	298	}
299		299
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
352		352
353	/* An extra list is needed to avoid reaping up to a grp_time early. */	353	/* An extra list is needed to avoid reaping up to a grp_time early. */
354	mru->grp_count = grp_count + 1;	354	mru->grp_count = grp_count + 1;
355	mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);	355	mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
356		356
357	if (!mru->lists) {	357	if (!mru->lists) {
358	err = ENOMEM;	358	err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
374	mru->grp_time = grp_time;	374	mru->grp_time = grp_time;
375	mru->free_func = free_func;	375	mru->free_func = free_func;
376		376
377	/* start up the reaper event */
378	mru->next_reap = 0;
379	mru->reap_all = 0;
380	queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
381
382	*mrup = mru;	377	*mrup = mru;
383		378
384	exit:	379	exit:
@@ -394,35 +389,25 @@ exit:
394	* Call xfs_mru_cache_flush() to flush out all cached entries, calling their	389	* Call xfs_mru_cache_flush() to flush out all cached entries, calling their
395	* free functions as they're deleted. When this function returns, the caller is	390	* free functions as they're deleted. When this function returns, the caller is
396	* guaranteed that all the free functions for all the elements have finished	391	* guaranteed that all the free functions for all the elements have finished
397	* executing.	392	* executing and the reaper is not running.
398	*
399	* While we are flushing, we stop the periodic reaper event from triggering.
400	* Normally, we want to restart this periodic event, but if we are shutting
401	* down the cache we do not want it restarted. hence the restart parameter
402	* where 0 = do not restart reaper and 1 = restart reaper.
403	*/	393	*/
404	void	394	void
405	xfs_mru_cache_flush(	395	xfs_mru_cache_flush(
406	xfs_mru_cache_t *mru,	396	xfs_mru_cache_t *mru)
407	int restart)
408	{	397	{
409	if (!mru \|\| !mru->lists)	398	if (!mru \|\| !mru->lists)
410	return;	399	return;
411		400
412	cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
413
414	mutex_spinlock(&mru->lock);	401	mutex_spinlock(&mru->lock);
415	mru->reap_all = 1;	402	if (mru->queued) {
416	mutex_spinunlock(&mru->lock, 0);	403	mutex_spinunlock(&mru->lock, 0);
		404	cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
		405	mutex_spinlock(&mru->lock);
		406	}
417		407
418	queue_work(xfs_mru_reap_wq, &mru->work.work);	408	_xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
419	flush_workqueue(xfs_mru_reap_wq);	409	_xfs_mru_cache_clear_reap_list(mru);
420		410
421	mutex_spinlock(&mru->lock);
422	WARN_ON_ONCE(mru->reap_all != 0);
423	mru->reap_all = 0;
424	if (restart)
425	queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
426	mutex_spinunlock(&mru->lock, 0);	411	mutex_spinunlock(&mru->lock, 0);
427	}	412	}
428		413
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
433	if (!mru \|\| !mru->lists)	418	if (!mru \|\| !mru->lists)
434	return;	419	return;
435		420
436	/* we don't want the reaper to restart here */	421	xfs_mru_cache_flush(mru);
437	xfs_mru_cache_flush(mru, 0);
438		422
439	kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));	423	kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
440	kmem_free(mru, sizeof(*mru));	424	kmem_free(mru, sizeof(*mru));


diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 624fd10ee8e5..dd58ea1bbebe 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
32	unsigned int grp_time; /* Time period spanned by grps. */	32	unsigned int grp_time; /* Time period spanned by grps. */
33	unsigned int lru_grp; /* Group containing time zero. */	33	unsigned int lru_grp; /* Group containing time zero. */
34	unsigned long time_zero; /* Time first element was added. */	34	unsigned long time_zero; /* Time first element was added. */
35	unsigned long next_reap; /* Time that the reaper should
36	next do something. */
37	unsigned int reap_all; /* if set, reap all lists */
38	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */	35	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
39	struct delayed_work work; /* Workqueue data for reaping. */	36	struct delayed_work work; /* Workqueue data for reaping. */
		37	unsigned int queued; /* work has been queued */
40	} xfs_mru_cache_t;	38	} xfs_mru_cache_t;
41		39
42	int xfs_mru_cache_init(void);	40	int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
44	int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,	42	int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
45	unsigned int grp_count,	43	unsigned int grp_count,
46	xfs_mru_cache_free_func_t free_func);	44	xfs_mru_cache_free_func_t free_func);
47	void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);	45	void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
48	void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);	46	void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
49	int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,	47	int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
50	void *value);	48	void *value);


diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 60b6b898022b..95fff6872a2f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
966	ASSERT(atomic_read(&bip->bli_refcount) > 0);	966	ASSERT(atomic_read(&bip->bli_refcount) > 0);
967		967
968	bip->bli_flags \|= XFS_BLI_INODE_ALLOC_BUF;	968	bip->bli_flags \|= XFS_BLI_INODE_ALLOC_BUF;
		969	bip->bli_format.blf_flags \|= XFS_BLI_INODE_NEW_BUF;
969	}	970	}
970		971
971		972


diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1a5ad8cd97b0..603459229904 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
1082	if (XFS_FORCED_SHUTDOWN(ip->i_mount))	1082	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1083	return XFS_ERROR(EIO);	1083	return XFS_ERROR(EIO);
1084		1084
		1085	if (flag & FSYNC_DATA)
		1086	filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
		1087
1085	/*	1088	/*
1086	* We always need to make sure that the required inode state	1089	* We always need to make sure that the required inode state
1087	* is safe on disk. The vnode might be clean but because	1090	* is safe on disk. The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
3769	sync_lsn = log->l_last_sync_lsn;	3772	sync_lsn = log->l_last_sync_lsn;
3770	GRANT_UNLOCK(log, s);	3773	GRANT_UNLOCK(log, s);
3771		3774
3772	if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0))	3775	if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
3773	return 0;	3776	if (flags & FLUSH_SYNC)
		3777	log_flags \|= XFS_LOG_SYNC;
		3778	error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
		3779	if (error)
		3780	return error;
		3781	}
3774		3782
3775	if (flags & FLUSH_SYNC)	3783	if (ip->i_update_core == 0)
3776	log_flags \|= XFS_LOG_SYNC;	3784	return 0;
3777	return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
3778	}	3785	}
3779	}	3786	}
3780		3787
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
3788	if (flags & FLUSH_INODE) {	3795	if (flags & FLUSH_INODE) {
3789	int flush_flags;	3796	int flush_flags;
3790		3797
3791	if (xfs_ipincount(ip))
3792	return EAGAIN;
3793
3794	if (flags & FLUSH_SYNC) {	3798	if (flags & FLUSH_SYNC) {
3795	xfs_ilock(ip, XFS_ILOCK_SHARED);	3799	xfs_ilock(ip, XFS_ILOCK_SHARED);
3796	xfs_iflock(ip);	3800	xfs_iflock(ip);