1 files changed, 142 insertions, 41 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 09783cc444ac..25ea2408118f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -877,12 +877,11 @@ xfsaild(
 {
        struct xfs_ail  *ailp = data;
        xfs_lsn_t       last_pushed_lsn = 0;
-        long            tout = 0;
+        long            tout = 0; /* milliseconds */
        while (!kthread_should_stop()) {
-                if (tout)
+                schedule_timeout_interruptible(tout ?
-                        schedule_timeout_interruptible(msecs_to_jiffies(tout));
+                                msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
-                tout = 1000;
                /* swsusp */
                try_to_freeze();
@@ -954,16 +953,14 @@ xfs_fs_destroy_inode(
        ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
        /*
-         * If we have nothing to flush with this inode then complete the
+         * We always use background reclaim here because even if the
-         * teardown now, otherwise delay the flush operation.
+         * inode is clean, it still may be under IO and hence we have
+         * to take the flush lock. The background reclaim path handles
+         * this more efficiently than we can here, so simply let background
+         * reclaim tear down all inodes.
         */
-        if (!xfs_inode_clean(ip)) {
-                xfs_inode_set_reclaim_tag(ip);
-                return;
-        }
 out_reclaim:
-        xfs_ireclaim(ip);
+        xfs_inode_set_reclaim_tag(ip);
 }
 /*
@@ -1024,12 +1021,45 @@ xfs_fs_dirty_inode(
        XFS_I(inode)->i_update_core = 1;
 }
-/*
+STATIC int
- * Attempt to flush the inode, this will actually fail
+xfs_log_inode(
- * if the inode is pinned, but we dirty the inode again
+        struct xfs_inode        *ip)
- * at the point when it is unpinned after a log write,
+{
- * since this is when the inode itself becomes flushable.
+        struct xfs_mount        *mp = ip->i_mount;
- */
+        struct xfs_trans        *tp;
+        int                     error;
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
+        tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+        error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+        if (error) {
+                xfs_trans_cancel(tp, 0);
+                /* we need to return with the lock hold shared */
+                xfs_ilock(ip, XFS_ILOCK_SHARED);
+                return error;
+        }
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        /*
+         * Note - it's possible that we might have pushed ourselves out of the
+         * way during trans_reserve which would flush the inode.  But there's
+         * no guarantee that the inode buffer has actually gone out yet (it's
+         * delwri).  Plus the buffer could be pinned anyway if it's part of
+         * an inode in another recent transaction.  So we play it safe and
+         * fire off the transaction anyway.
+         */
+        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ihold(tp, ip);
+        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+        xfs_trans_set_sync(tp);
+        error = xfs_trans_commit(tp, 0);
+        xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+        return error;
+}
 STATIC int
 xfs_fs_write_inode(
        struct inode            *inode,
@@ -1037,7 +1067,7 @@ xfs_fs_write_inode(
 {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
-        int                     error = 0;
+        int                     error = EAGAIN;
        xfs_itrace_entry(ip);
@@ -1048,35 +1078,55 @@ xfs_fs_write_inode(
                error = xfs_wait_on_pages(ip, 0, -1);
                if (error)
                        goto out;
-        }
-        /*
+                /*
-         * Bypass inodes which have already been cleaned by
+                 * Make sure the inode has hit stable storage.  By using the
-         * the inode flush clustering code inside xfs_iflush
+                 * log and the fsync transactions we reduce the IOs we have
-         */
+                 * to do here from two (log and inode) to just the log.
-        if (xfs_inode_clean(ip))
+                 *
-                goto out;
+                 * Note: We still need to do a delwri write of the inode after
+                 * this to flush it to the backing buffer so that bulkstat
-        /*
+                 * works properly if this is the first time the inode has been
-         * We make this non-blocking if the inode is contended, return
+                 * written.  Because we hold the ilock atomically over the
-         * EAGAIN to indicate to the caller that they did not succeed.
+                 * transaction commit and the inode flush we are guaranteed
-         * This prevents the flush path from blocking on inodes inside
+                 * that the inode is not pinned when it returns. If the flush
-         * another operation right now, they get caught later by xfs_sync.
+                 * lock is already held, then the inode has already been
-         */
+                 * flushed once and we don't need to flush it again.  Hence
-        if (sync) {
+                 * the code will only flush the inode if it isn't already
+                 * being flushed.
+                 */
                xfs_ilock(ip, XFS_ILOCK_SHARED);
-                xfs_iflock(ip);
+                if (ip->i_update_core) {
+                        error = xfs_log_inode(ip);
-                error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
+                        if (error)
+                                goto out_unlock;
+                }
        } else {
-                error = EAGAIN;
+                /*
+                 * We make this non-blocking if the inode is contended, return
+                 * EAGAIN to indicate to the caller that they did not succeed.
+                 * This prevents the flush path from blocking on inodes inside
+                 * another operation right now, they get caught later by xfs_sync.
+                 */
                if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
                        goto out;
-                if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+        }
-                        goto out_unlock;
+        if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+                goto out_unlock;
-                error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
+        /*
+         * Now we have the flush lock and the inode is not pinned, we can check
+         * if the inode is really clean as we know that there are no pending
+         * transaction completions, it is not waiting on the delayed write
+         * queue and there is no IO in progress.
+         */
+        if (xfs_inode_clean(ip)) {
+                xfs_ifunlock(ip);
+                error = 0;
+                goto out_unlock;
        }
+        error = xfs_iflush(ip, 0);
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1259,6 +1309,29 @@ xfs_fs_statfs(
        return 0;
 }
+STATIC void
+xfs_save_resvblks(struct xfs_mount *mp)
+{
+        __uint64_t resblks = 0;
+        mp->m_resblks_save = mp->m_resblks;
+        xfs_reserve_blocks(mp, &resblks, NULL);
+}
+STATIC void
+xfs_restore_resvblks(struct xfs_mount *mp)
+{
+        __uint64_t resblks;
+        if (mp->m_resblks_save) {
+                resblks = mp->m_resblks_save;
+                mp->m_resblks_save = 0;
+        } else
+                resblks = xfs_default_resblks(mp);
+        xfs_reserve_blocks(mp, &resblks, NULL);
+}
 STATIC int
 xfs_fs_remount(
        struct super_block      *sb,
@@ -1338,11 +1411,27 @@ xfs_fs_remount(
                        }
                        mp->m_update_flags = 0;
                }
+                /*
+                 * Fill out the reserve pool if it is empty. Use the stashed
+                 * value if it is non-zero, otherwise go with the default.
+                 */
+                xfs_restore_resvblks(mp);
        }
        /* rw -> ro */
        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
+                /*
+                 * After we have synced the data but before we sync the
+                 * metadata, we need to free up the reserve block pool so that
+                 * the used block count in the superblock on disk is correct at
+                 * the end of the remount. Stash the current reserve pool size
+                 * so that if we get remounted rw, we can return it to the same
+                 * size.
+                 */
                xfs_quiesce_data(mp);
+                xfs_save_resvblks(mp);
                xfs_quiesce_attr(mp);
                mp->m_flags |= XFS_MOUNT_RDONLY;
        }
@@ -1361,11 +1450,22 @@ xfs_fs_freeze(
 {
        struct xfs_mount        *mp = XFS_M(sb);
+        xfs_save_resvblks(mp);
        xfs_quiesce_attr(mp);
        return -xfs_fs_log_dummy(mp);
 }
 STATIC int
+xfs_fs_unfreeze(
+        struct super_block      *sb)
+{
+        struct xfs_mount        *mp = XFS_M(sb);
+        xfs_restore_resvblks(mp);
+        return 0;
+}
+STATIC int
 xfs_fs_show_options(
        struct seq_file         *m,
        struct vfsmount         *mnt)
@@ -1587,6 +1687,7 @@ static const struct super_operations xfs_super_operations = {
        .put_super              = xfs_fs_put_super,
        .sync_fs                = xfs_fs_sync_fs,
        .freeze_fs              = xfs_fs_freeze,
+        .unfreeze_fs            = xfs_fs_unfreeze,
        .statfs                 = xfs_fs_statfs,
        .remount_fs             = xfs_fs_remount,
        .show_options           = xfs_fs_show_options,

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 09783cc444ac..25ea2408118f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -877,12 +877,11 @@ xfsaild(
877	{	877	{
878	struct xfs_ail *ailp = data;	878	struct xfs_ail *ailp = data;
879	xfs_lsn_t last_pushed_lsn = 0;	879	xfs_lsn_t last_pushed_lsn = 0;
880	long tout = 0;	880	long tout = 0; /* milliseconds */
881		881
882	while (!kthread_should_stop()) {	882	while (!kthread_should_stop()) {
883	if (tout)	883	schedule_timeout_interruptible(tout ?
884	schedule_timeout_interruptible(msecs_to_jiffies(tout));	884	msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
885	tout = 1000;
886		885
887	/* swsusp */	886	/* swsusp */
888	try_to_freeze();	887	try_to_freeze();
@@ -954,16 +953,14 @@ xfs_fs_destroy_inode(
954	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));	953	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955		954
956	/*	955	/*
957	* If we have nothing to flush with this inode then complete the	956	* We always use background reclaim here because even if the
958	* teardown now, otherwise delay the flush operation.	957	* inode is clean, it still may be under IO and hence we have
		958	* to take the flush lock. The background reclaim path handles
		959	* this more efficiently than we can here, so simply let background
		960	* reclaim tear down all inodes.
959	*/	961	*/
960	if (!xfs_inode_clean(ip)) {
961	xfs_inode_set_reclaim_tag(ip);
962	return;
963	}
964
965	out_reclaim:	962	out_reclaim:
966	xfs_ireclaim(ip);	963	xfs_inode_set_reclaim_tag(ip);
967	}	964	}
968		965
969	/*	966	/*
@@ -1024,12 +1021,45 @@ xfs_fs_dirty_inode(
1024	XFS_I(inode)->i_update_core = 1;	1021	XFS_I(inode)->i_update_core = 1;
1025	}	1022	}
1026		1023
1027	/*	1024	STATIC int
1028	* Attempt to flush the inode, this will actually fail	1025	xfs_log_inode(
1029	* if the inode is pinned, but we dirty the inode again	1026	struct xfs_inode *ip)
1030	* at the point when it is unpinned after a log write,	1027	{
1031	* since this is when the inode itself becomes flushable.	1028	struct xfs_mount *mp = ip->i_mount;
1032	*/	1029	struct xfs_trans *tp;
		1030	int error;
		1031
		1032	xfs_iunlock(ip, XFS_ILOCK_SHARED);
		1033	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
		1034	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
		1035
		1036	if (error) {
		1037	xfs_trans_cancel(tp, 0);
		1038	/* we need to return with the lock hold shared */
		1039	xfs_ilock(ip, XFS_ILOCK_SHARED);
		1040	return error;
		1041	}
		1042
		1043	xfs_ilock(ip, XFS_ILOCK_EXCL);
		1044
		1045	/*
		1046	* Note - it's possible that we might have pushed ourselves out of the
		1047	* way during trans_reserve which would flush the inode. But there's
		1048	* no guarantee that the inode buffer has actually gone out yet (it's
		1049	* delwri). Plus the buffer could be pinned anyway if it's part of
		1050	* an inode in another recent transaction. So we play it safe and
		1051	* fire off the transaction anyway.
		1052	*/
		1053	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
		1054	xfs_trans_ihold(tp, ip);
		1055	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
		1056	xfs_trans_set_sync(tp);
		1057	error = xfs_trans_commit(tp, 0);
		1058	xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
		1059
		1060	return error;
		1061	}
		1062
1033	STATIC int	1063	STATIC int
1034	xfs_fs_write_inode(	1064	xfs_fs_write_inode(
1035	struct inode *inode,	1065	struct inode *inode,
@@ -1037,7 +1067,7 @@ xfs_fs_write_inode(
1037	{	1067	{
1038	struct xfs_inode *ip = XFS_I(inode);	1068	struct xfs_inode *ip = XFS_I(inode);
1039	struct xfs_mount *mp = ip->i_mount;	1069	struct xfs_mount *mp = ip->i_mount;
1040	int error = 0;	1070	int error = EAGAIN;
1041		1071
1042	xfs_itrace_entry(ip);	1072	xfs_itrace_entry(ip);
1043		1073
@@ -1048,35 +1078,55 @@ xfs_fs_write_inode(
1048	error = xfs_wait_on_pages(ip, 0, -1);	1078	error = xfs_wait_on_pages(ip, 0, -1);
1049	if (error)	1079	if (error)
1050	goto out;	1080	goto out;
1051	}
1052		1081
1053	/*	1082	/*
1054	* Bypass inodes which have already been cleaned by	1083	* Make sure the inode has hit stable storage. By using the
1055	* the inode flush clustering code inside xfs_iflush	1084	* log and the fsync transactions we reduce the IOs we have
1056	*/	1085	* to do here from two (log and inode) to just the log.
1057	if (xfs_inode_clean(ip))	1086	*
1058	goto out;	1087	* Note: We still need to do a delwri write of the inode after
1059		1088	* this to flush it to the backing buffer so that bulkstat
1060	/*	1089	* works properly if this is the first time the inode has been
1061	* We make this non-blocking if the inode is contended, return	1090	* written. Because we hold the ilock atomically over the
1062	* EAGAIN to indicate to the caller that they did not succeed.	1091	* transaction commit and the inode flush we are guaranteed
1063	* This prevents the flush path from blocking on inodes inside	1092	* that the inode is not pinned when it returns. If the flush
1064	* another operation right now, they get caught later by xfs_sync.	1093	* lock is already held, then the inode has already been
1065	*/	1094	* flushed once and we don't need to flush it again. Hence
1066	if (sync) {	1095	* the code will only flush the inode if it isn't already
		1096	* being flushed.
		1097	*/
1067	xfs_ilock(ip, XFS_ILOCK_SHARED);	1098	xfs_ilock(ip, XFS_ILOCK_SHARED);
1068	xfs_iflock(ip);	1099	if (ip->i_update_core) {
1069		1100	error = xfs_log_inode(ip);
1070	error = xfs_iflush(ip, XFS_IFLUSH_SYNC);	1101	if (error)
		1102	goto out_unlock;
		1103	}
1071	} else {	1104	} else {
1072	error = EAGAIN;	1105	/*
		1106	* We make this non-blocking if the inode is contended, return
		1107	* EAGAIN to indicate to the caller that they did not succeed.
		1108	* This prevents the flush path from blocking on inodes inside
		1109	* another operation right now, they get caught later by xfs_sync.
		1110	*/
1073	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))	1111	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
1074	goto out;	1112	goto out;
1075	if (xfs_ipincount(ip) \|\| !xfs_iflock_nowait(ip))	1113	}
1076	goto out_unlock;	1114
		1115	if (xfs_ipincount(ip) \|\| !xfs_iflock_nowait(ip))
		1116	goto out_unlock;
1077		1117
1078	error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);	1118	/*
		1119	* Now we have the flush lock and the inode is not pinned, we can check
		1120	* if the inode is really clean as we know that there are no pending
		1121	* transaction completions, it is not waiting on the delayed write
		1122	* queue and there is no IO in progress.
		1123	*/
		1124	if (xfs_inode_clean(ip)) {
		1125	xfs_ifunlock(ip);
		1126	error = 0;
		1127	goto out_unlock;
1079	}	1128	}
		1129	error = xfs_iflush(ip, 0);
1080		1130
1081	out_unlock:	1131	out_unlock:
1082	xfs_iunlock(ip, XFS_ILOCK_SHARED);	1132	xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1259,6 +1309,29 @@ xfs_fs_statfs(
1259	return 0;	1309	return 0;
1260	}	1310	}
1261		1311
		1312	STATIC void
		1313	xfs_save_resvblks(struct xfs_mount *mp)
		1314	{
		1315	__uint64_t resblks = 0;
		1316
		1317	mp->m_resblks_save = mp->m_resblks;
		1318	xfs_reserve_blocks(mp, &resblks, NULL);
		1319	}
		1320
		1321	STATIC void
		1322	xfs_restore_resvblks(struct xfs_mount *mp)
		1323	{
		1324	__uint64_t resblks;
		1325
		1326	if (mp->m_resblks_save) {
		1327	resblks = mp->m_resblks_save;
		1328	mp->m_resblks_save = 0;
		1329	} else
		1330	resblks = xfs_default_resblks(mp);
		1331
		1332	xfs_reserve_blocks(mp, &resblks, NULL);
		1333	}
		1334
1262	STATIC int	1335	STATIC int
1263	xfs_fs_remount(	1336	xfs_fs_remount(
1264	struct super_block *sb,	1337	struct super_block *sb,
@@ -1338,11 +1411,27 @@ xfs_fs_remount(
1338	}	1411	}
1339	mp->m_update_flags = 0;	1412	mp->m_update_flags = 0;
1340	}	1413	}
		1414
		1415	/*
		1416	* Fill out the reserve pool if it is empty. Use the stashed
		1417	* value if it is non-zero, otherwise go with the default.
		1418	*/
		1419	xfs_restore_resvblks(mp);
1341	}	1420	}
1342		1421
1343	/* rw -> ro */	1422	/* rw -> ro */
1344	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {	1423	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
		1424	/*
		1425	* After we have synced the data but before we sync the
		1426	* metadata, we need to free up the reserve block pool so that
		1427	* the used block count in the superblock on disk is correct at
		1428	* the end of the remount. Stash the current reserve pool size
		1429	* so that if we get remounted rw, we can return it to the same
		1430	* size.
		1431	*/
		1432
1345	xfs_quiesce_data(mp);	1433	xfs_quiesce_data(mp);
		1434	xfs_save_resvblks(mp);
1346	xfs_quiesce_attr(mp);	1435	xfs_quiesce_attr(mp);
1347	mp->m_flags \|= XFS_MOUNT_RDONLY;	1436	mp->m_flags \|= XFS_MOUNT_RDONLY;
1348	}	1437	}
@@ -1361,11 +1450,22 @@ xfs_fs_freeze(
1361	{	1450	{
1362	struct xfs_mount *mp = XFS_M(sb);	1451	struct xfs_mount *mp = XFS_M(sb);
1363		1452
		1453	xfs_save_resvblks(mp);
1364	xfs_quiesce_attr(mp);	1454	xfs_quiesce_attr(mp);
1365	return -xfs_fs_log_dummy(mp);	1455	return -xfs_fs_log_dummy(mp);
1366	}	1456	}
1367		1457
1368	STATIC int	1458	STATIC int
		1459	xfs_fs_unfreeze(
		1460	struct super_block *sb)
		1461	{
		1462	struct xfs_mount *mp = XFS_M(sb);
		1463
		1464	xfs_restore_resvblks(mp);
		1465	return 0;
		1466	}
		1467
		1468	STATIC int
1369	xfs_fs_show_options(	1469	xfs_fs_show_options(
1370	struct seq_file *m,	1470	struct seq_file *m,
1371	struct vfsmount *mnt)	1471	struct vfsmount *mnt)
@@ -1587,6 +1687,7 @@ static const struct super_operations xfs_super_operations = {
1587	.put_super = xfs_fs_put_super,	1687	.put_super = xfs_fs_put_super,
1588	.sync_fs = xfs_fs_sync_fs,	1688	.sync_fs = xfs_fs_sync_fs,
1589	.freeze_fs = xfs_fs_freeze,	1689	.freeze_fs = xfs_fs_freeze,
		1690	.unfreeze_fs = xfs_fs_unfreeze,
1590	.statfs = xfs_fs_statfs,	1691	.statfs = xfs_fs_statfs,
1591	.remount_fs = xfs_fs_remount,	1692	.remount_fs = xfs_fs_remount,
1592	.show_options = xfs_fs_show_options,	1693	.show_options = xfs_fs_show_options,