aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/rgrp.c
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2010-11-03 16:01:07 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2010-11-15 07:44:42 -0500
commit044b9414c7caf9a26192c73a5b88fa1a8a32a1c1 (patch)
tree9596bb669a68b04eebc40864c3b3fd71d3d1e273 /fs/gfs2/rgrp.c
parent0143832cc96d0bf78486297aad5c8fb2c2ead02a (diff)
GFS2: Fix inode deallocation race
This area of the code has always been a bit delicate due to the subtleties of lock ordering. The problem is that for "normal" alloc/dealloc, we always grab the inode locks first and the rgrp lock later. In order to ensure no races in looking up the unlinked, but still allocated inodes, we need to hold the rgrp lock when we do the lookup, which means that we can't take the inode glock. The solution is to borrow the technique already used by NFS to solve what is essentially the same problem (given an inode number, look up the inode carefully, checking that it really is in the expected state). We cannot do that directly from the allocation code (lock ordering again) so we give the job to the pre-existing delete workqueue and carry on with the allocation as normal. If we find there is no space, we do a journal flush (required anyway if space from a deallocation is to be released) which should block against the pending deallocations, so we should always get the space back. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r--fs/gfs2/rgrp.c91
1 files changed, 46 insertions, 45 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bef3ab6cf5c1..33c8407b876f 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -963,17 +963,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
963 * The inode, if one has been found, in inode. 963 * The inode, if one has been found, in inode.
964 */ 964 */
965 965
966static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, 966static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
967 u64 skip)
968{ 967{
969 u32 goal = 0, block; 968 u32 goal = 0, block;
970 u64 no_addr; 969 u64 no_addr;
971 struct gfs2_sbd *sdp = rgd->rd_sbd; 970 struct gfs2_sbd *sdp = rgd->rd_sbd;
972 unsigned int n; 971 unsigned int n;
972 struct gfs2_glock *gl;
973 struct gfs2_inode *ip;
974 int error;
975 int found = 0;
973 976
974 for(;;) { 977 while (goal < rgd->rd_data) {
975 if (goal >= rgd->rd_data)
976 break;
977 down_write(&sdp->sd_log_flush_lock); 978 down_write(&sdp->sd_log_flush_lock);
978 n = 1; 979 n = 1;
979 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 980 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
@@ -990,11 +991,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
990 if (no_addr == skip) 991 if (no_addr == skip)
991 continue; 992 continue;
992 *last_unlinked = no_addr; 993 *last_unlinked = no_addr;
993 return no_addr; 994
995 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
996 if (error)
997 continue;
998
999 /* If the inode is already in cache, we can ignore it here
1000 * because the existing inode disposal code will deal with
1001 * it when all refs have gone away. Accessing gl_object like
1002 * this is not safe in general. Here it is ok because we do
1003 * not dereference the pointer, and we only need an approx
1004 * answer to whether it is NULL or not.
1005 */
1006 ip = gl->gl_object;
1007
1008 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
1009 gfs2_glock_put(gl);
1010 else
1011 found++;
1012
1013 /* Limit reclaim to sensible number of tasks */
1014 if (found > 2*NR_CPUS)
1015 return;
994 } 1016 }
995 1017
996 rgd->rd_flags &= ~GFS2_RDF_CHECK; 1018 rgd->rd_flags &= ~GFS2_RDF_CHECK;
997 return 0; 1019 return;
998} 1020}
999 1021
1000/** 1022/**
@@ -1075,11 +1097,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1075 * Try to acquire rgrp in way which avoids contending with others. 1097 * Try to acquire rgrp in way which avoids contending with others.
1076 * 1098 *
1077 * Returns: errno 1099 * Returns: errno
1078 * unlinked: the block address of an unlinked block to be reclaimed
1079 */ 1100 */
1080 1101
1081static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, 1102static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1082 u64 *last_unlinked)
1083{ 1103{
1084 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1104 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1085 struct gfs2_rgrpd *rgd, *begin = NULL; 1105 struct gfs2_rgrpd *rgd, *begin = NULL;
@@ -1089,7 +1109,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1089 int loops = 0; 1109 int loops = 0;
1090 int error, rg_locked; 1110 int error, rg_locked;
1091 1111
1092 *unlinked = 0;
1093 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); 1112 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
1094 1113
1095 while (rgd) { 1114 while (rgd) {
@@ -1106,17 +1125,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1106 case 0: 1125 case 0:
1107 if (try_rgrp_fit(rgd, al)) 1126 if (try_rgrp_fit(rgd, al))
1108 goto out; 1127 goto out;
1109 /* If the rg came in already locked, there's no 1128 if (rgd->rd_flags & GFS2_RDF_CHECK)
1110 way we can recover from a failed try_rgrp_unlink 1129 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1111 because that would require an iput which can only
1112 happen after the rgrp is unlocked. */
1113 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
1114 *unlinked = try_rgrp_unlink(rgd, last_unlinked,
1115 ip->i_no_addr);
1116 if (!rg_locked) 1130 if (!rg_locked)
1117 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1131 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1118 if (*unlinked)
1119 return -EAGAIN;
1120 /* fall through */ 1132 /* fall through */
1121 case GLR_TRYFAILED: 1133 case GLR_TRYFAILED:
1122 rgd = recent_rgrp_next(rgd); 1134 rgd = recent_rgrp_next(rgd);
@@ -1145,13 +1157,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1145 case 0: 1157 case 0:
1146 if (try_rgrp_fit(rgd, al)) 1158 if (try_rgrp_fit(rgd, al))
1147 goto out; 1159 goto out;
1148 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) 1160 if (rgd->rd_flags & GFS2_RDF_CHECK)
1149 *unlinked = try_rgrp_unlink(rgd, last_unlinked, 1161 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1150 ip->i_no_addr);
1151 if (!rg_locked) 1162 if (!rg_locked)
1152 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1163 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1153 if (*unlinked)
1154 return -EAGAIN;
1155 break; 1164 break;
1156 1165
1157 case GLR_TRYFAILED: 1166 case GLR_TRYFAILED:
@@ -1204,12 +1213,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
1204 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1213 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1205 struct gfs2_alloc *al = ip->i_alloc; 1214 struct gfs2_alloc *al = ip->i_alloc;
1206 int error = 0; 1215 int error = 0;
1207 u64 last_unlinked = NO_BLOCK, unlinked; 1216 u64 last_unlinked = NO_BLOCK;
1217 int tries = 0;
1208 1218
1209 if (gfs2_assert_warn(sdp, al->al_requested)) 1219 if (gfs2_assert_warn(sdp, al->al_requested))
1210 return -EINVAL; 1220 return -EINVAL;
1211 1221
1212try_again:
1213 if (hold_rindex) { 1222 if (hold_rindex) {
1214 /* We need to hold the rindex unless the inode we're using is 1223 /* We need to hold the rindex unless the inode we're using is
1215 the rindex itself, in which case it's already held. */ 1224 the rindex itself, in which case it's already held. */
@@ -1218,31 +1227,23 @@ try_again:
1218 else if (!sdp->sd_rgrps) /* We may not have the rindex read 1227 else if (!sdp->sd_rgrps) /* We may not have the rindex read
1219 in, so: */ 1228 in, so: */
1220 error = gfs2_ri_update_special(ip); 1229 error = gfs2_ri_update_special(ip);
1230 if (error)
1231 return error;
1221 } 1232 }
1222 1233
1223 if (error) 1234 do {
1224 return error; 1235 error = get_local_rgrp(ip, &last_unlinked);
1236 /* If there is no space, flushing the log may release some */
1237 if (error)
1238 gfs2_log_flush(sdp, NULL);
1239 } while (error && tries++ < 3);
1225 1240
1226 /* Find an rgrp suitable for allocation. If it encounters any unlinked
1227 dinodes along the way, error will equal -EAGAIN and unlinked will
1228 contains it block address. We then need to look up that inode and
1229 try to free it, and try the allocation again. */
1230 error = get_local_rgrp(ip, &unlinked, &last_unlinked);
1231 if (error) { 1241 if (error) {
1232 if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) 1242 if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
1233 gfs2_glock_dq_uninit(&al->al_ri_gh); 1243 gfs2_glock_dq_uninit(&al->al_ri_gh);
1234 if (error != -EAGAIN) 1244 return error;
1235 return error;
1236
1237 gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked);
1238 /* regardless of whether or not gfs2_process_unlinked_inode
1239 was successful, we don't want to repeat it again. */
1240 last_unlinked = unlinked;
1241 gfs2_log_flush(sdp, NULL);
1242 error = 0;
1243
1244 goto try_again;
1245 } 1245 }
1246
1246 /* no error, so we have the rgrp set in the inode's allocation. */ 1247 /* no error, so we have the rgrp set in the inode's allocation. */
1247 al->al_file = file; 1248 al->al_file = file;
1248 al->al_line = line; 1249 al->al_line = line;