aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-06-18 02:50:27 -0400
committerTim Shimmin <tes@chook.melbourne.sgi.com>2007-07-14 01:35:19 -0400
commit84e1e99f112dead8f9ba036c02d24a9f5ce7f544 (patch)
treee903589be98c05b45586908171d795a1a466357d /fs/xfs
parent641c56fbfeae85d5ec87fee90a752f7b7224f236 (diff)
[XFS] Prevent ENOSPC from aborting transactions that need to succeed
During delayed allocation extent conversion or unwritten extent conversion, we need to reserve some blocks for transactions reservations. We need to reserve these blocks in case a btree split occurs and we need to allocate some blocks. Unfortunately, we've only ever reserved the number of data blocks we are allocating, so in both the unwritten and delalloc case we can get ENOSPC to the transaction reservation. This is bad because in both cases we cannot report the failure to the writing application. The fix is two-fold: 1 - leverage the reserved block infrastructure XFS already has to reserve a small pool of blocks by default to allow specially marked transactions to dip into when we are at ENOSPC. Default setting is min(5%, 1024 blocks). 2 - convert critical transaction reservations to be allowed to dip into this pool. Spots changed are delalloc conversion, unwritten extent conversion and growing a filesystem at ENOSPC. This also allows growing the filesytsem to succeed at ENOSPC. SGI-PV: 964468 SGI-Modid: xfs-linux-melb:xfs-kern:28865a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_fsops.c10
-rw-r--r--fs/xfs/xfs_iomap.c22
-rw-r--r--fs/xfs/xfs_mount.c37
3 files changed, 50 insertions, 19 deletions
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ddd45e5b938..2251a49f3e1 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -177,6 +177,7 @@ xfs_growfs_data_private(
177 up_write(&mp->m_peraglock); 177 up_write(&mp->m_peraglock);
178 } 178 }
179 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 179 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
180 tp->t_flags |= XFS_TRANS_RESERVE;
180 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp), 181 if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
181 XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) { 182 XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
182 xfs_trans_cancel(tp, 0); 183 xfs_trans_cancel(tp, 0);
@@ -499,8 +500,9 @@ xfs_reserve_blocks(
499 unsigned long s; 500 unsigned long s;
500 501
501 /* If inval is null, report current values and return */ 502 /* If inval is null, report current values and return */
502
503 if (inval == (__uint64_t *)NULL) { 503 if (inval == (__uint64_t *)NULL) {
504 if (!outval)
505 return EINVAL;
504 outval->resblks = mp->m_resblks; 506 outval->resblks = mp->m_resblks;
505 outval->resblks_avail = mp->m_resblks_avail; 507 outval->resblks_avail = mp->m_resblks_avail;
506 return 0; 508 return 0;
@@ -563,8 +565,10 @@ retry:
563 } 565 }
564 } 566 }
565out: 567out:
566 outval->resblks = mp->m_resblks; 568 if (outval) {
567 outval->resblks_avail = mp->m_resblks_avail; 569 outval->resblks = mp->m_resblks;
570 outval->resblks_avail = mp->m_resblks_avail;
571 }
568 XFS_SB_UNLOCK(mp, s); 572 XFS_SB_UNLOCK(mp, s);
569 573
570 if (fdblks_delta) { 574 if (fdblks_delta) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3f2b9f2a7b9..ab5062199f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -489,13 +489,13 @@ xfs_iomap_write_direct(
489 if (unlikely(rt)) { 489 if (unlikely(rt)) {
490 resrtextents = qblocks = resaligned; 490 resrtextents = qblocks = resaligned;
491 resrtextents /= mp->m_sb.sb_rextsize; 491 resrtextents /= mp->m_sb.sb_rextsize;
492 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 492 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
493 quota_flag = XFS_QMOPT_RES_RTBLKS; 493 quota_flag = XFS_QMOPT_RES_RTBLKS;
494 } else { 494 } else {
495 resrtextents = 0; 495 resrtextents = 0;
496 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 496 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
497 quota_flag = XFS_QMOPT_RES_REGBLKS; 497 quota_flag = XFS_QMOPT_RES_REGBLKS;
498 } 498 }
499 499
500 /* 500 /*
501 * Allocate and setup the transaction 501 * Allocate and setup the transaction
@@ -788,18 +788,12 @@ xfs_iomap_write_allocate(
788 nimaps = 0; 788 nimaps = 0;
789 while (nimaps == 0) { 789 while (nimaps == 0) {
790 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 790 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
791 tp->t_flags |= XFS_TRANS_RESERVE;
791 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 792 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
792 error = xfs_trans_reserve(tp, nres, 793 error = xfs_trans_reserve(tp, nres,
793 XFS_WRITE_LOG_RES(mp), 794 XFS_WRITE_LOG_RES(mp),
794 0, XFS_TRANS_PERM_LOG_RES, 795 0, XFS_TRANS_PERM_LOG_RES,
795 XFS_WRITE_LOG_COUNT); 796 XFS_WRITE_LOG_COUNT);
796 if (error == ENOSPC) {
797 error = xfs_trans_reserve(tp, 0,
798 XFS_WRITE_LOG_RES(mp),
799 0,
800 XFS_TRANS_PERM_LOG_RES,
801 XFS_WRITE_LOG_COUNT);
802 }
803 if (error) { 797 if (error) {
804 xfs_trans_cancel(tp, 0); 798 xfs_trans_cancel(tp, 0);
805 return XFS_ERROR(error); 799 return XFS_ERROR(error);
@@ -917,8 +911,8 @@ xfs_iomap_write_unwritten(
917 * from unwritten to real. Do allocations in a loop until 911 * from unwritten to real. Do allocations in a loop until
918 * we have covered the range passed in. 912 * we have covered the range passed in.
919 */ 913 */
920
921 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 914 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
915 tp->t_flags |= XFS_TRANS_RESERVE;
922 error = xfs_trans_reserve(tp, resblks, 916 error = xfs_trans_reserve(tp, resblks,
923 XFS_WRITE_LOG_RES(mp), 0, 917 XFS_WRITE_LOG_RES(mp), 0,
924 XFS_TRANS_PERM_LOG_RES, 918 XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 39cf6f3267c..31453ca0f3d 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -725,7 +725,7 @@ xfs_mountfs(
725 bhv_vnode_t *rvp = NULL; 725 bhv_vnode_t *rvp = NULL;
726 int readio_log, writeio_log; 726 int readio_log, writeio_log;
727 xfs_daddr_t d; 727 xfs_daddr_t d;
728 __uint64_t ret64; 728 __uint64_t resblks;
729 __int64_t update_flags; 729 __int64_t update_flags;
730 uint quotamount, quotaflags; 730 uint quotamount, quotaflags;
731 int agno; 731 int agno;
@@ -842,6 +842,7 @@ xfs_mountfs(
842 */ 842 */
843 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 843 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 &&
844 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 844 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) {
845 __uint64_t ret64;
845 if (xfs_uuid_mount(mp)) { 846 if (xfs_uuid_mount(mp)) {
846 error = XFS_ERROR(EINVAL); 847 error = XFS_ERROR(EINVAL);
847 goto error1; 848 goto error1;
@@ -1135,13 +1136,27 @@ xfs_mountfs(
1135 goto error4; 1136 goto error4;
1136 } 1137 }
1137 1138
1138
1139 /* 1139 /*
1140 * Complete the quota initialisation, post-log-replay component. 1140 * Complete the quota initialisation, post-log-replay component.
1141 */ 1141 */
1142 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1142 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags)))
1143 goto error4; 1143 goto error4;
1144 1144
1145 /*
1146 * Now we are mounted, reserve a small amount of unused space for
1147 * privileged transactions. This is needed so that transaction
1148 * space required for critical operations can dip into this pool
1149 * when at ENOSPC. This is needed for operations like create with
1150 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1151 * are not allowed to use this reserved space.
1152 *
1153 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1154 * This may drive us straight to ENOSPC on mount, but that implies
1155 * we were already there on the last unmount.
1156 */
1157 resblks = min_t(__uint64_t, mp->m_sb.sb_dblocks / 20, 1024);
1158 xfs_reserve_blocks(mp, &resblks, NULL);
1159
1145 return 0; 1160 return 0;
1146 1161
1147 error4: 1162 error4:
@@ -1181,6 +1196,7 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1181#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1196#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1182 int64_t fsid; 1197 int64_t fsid;
1183#endif 1198#endif
1199 __uint64_t resblks;
1184 1200
1185 /* 1201 /*
1186 * We can potentially deadlock here if we have an inode cluster 1202 * We can potentially deadlock here if we have an inode cluster
@@ -1209,6 +1225,23 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1209 xfs_binval(mp->m_rtdev_targp); 1225 xfs_binval(mp->m_rtdev_targp);
1210 } 1226 }
1211 1227
1228 /*
1229 * Unreserve any blocks we have so that when we unmount we don't account
1230 * the reserved free space as used. This is really only necessary for
1231 * lazy superblock counting because it trusts the incore superblock
1232 * counters to be aboslutely correct on clean unmount.
1233 *
1234 * We don't bother correcting this elsewhere for lazy superblock
1235 * counting because on mount of an unclean filesystem we reconstruct the
1236 * correct counter value and this is irrelevant.
1237 *
1238 * For non-lazy counter filesystems, this doesn't matter at all because
1239 * we only every apply deltas to the superblock and hence the incore
1240 * value does not matter....
1241 */
1242 resblks = 0;
1243 xfs_reserve_blocks(mp, &resblks, NULL);
1244
1212 xfs_log_sbcount(mp, 1); 1245 xfs_log_sbcount(mp, 1);
1213 xfs_unmountfs_writesb(mp); 1246 xfs_unmountfs_writesb(mp);
1214 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1247 xfs_unmountfs_wait(mp); /* wait for async bufs */