diff options
Diffstat (limited to 'fs/xfs/xfs_icache.c')
-rw-r--r-- | fs/xfs/xfs_icache.c | 148 |
1 files changed, 120 insertions, 28 deletions
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index c48df5f25b9f..981b2cf51985 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -33,6 +33,9 @@ | |||
33 | #include "xfs_trace.h" | 33 | #include "xfs_trace.h" |
34 | #include "xfs_icache.h" | 34 | #include "xfs_icache.h" |
35 | #include "xfs_bmap_util.h" | 35 | #include "xfs_bmap_util.h" |
36 | #include "xfs_quota.h" | ||
37 | #include "xfs_dquot_item.h" | ||
38 | #include "xfs_dquot.h" | ||
36 | 39 | ||
37 | #include <linux/kthread.h> | 40 | #include <linux/kthread.h> |
38 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
@@ -158,7 +161,7 @@ xfs_iget_cache_hit( | |||
158 | if (ip->i_ino != ino) { | 161 | if (ip->i_ino != ino) { |
159 | trace_xfs_iget_skip(ip); | 162 | trace_xfs_iget_skip(ip); |
160 | XFS_STATS_INC(xs_ig_frecycle); | 163 | XFS_STATS_INC(xs_ig_frecycle); |
161 | error = EAGAIN; | 164 | error = -EAGAIN; |
162 | goto out_error; | 165 | goto out_error; |
163 | } | 166 | } |
164 | 167 | ||
@@ -176,7 +179,7 @@ xfs_iget_cache_hit( | |||
176 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { | 179 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
177 | trace_xfs_iget_skip(ip); | 180 | trace_xfs_iget_skip(ip); |
178 | XFS_STATS_INC(xs_ig_frecycle); | 181 | XFS_STATS_INC(xs_ig_frecycle); |
179 | error = EAGAIN; | 182 | error = -EAGAIN; |
180 | goto out_error; | 183 | goto out_error; |
181 | } | 184 | } |
182 | 185 | ||
@@ -184,7 +187,7 @@ xfs_iget_cache_hit( | |||
184 | * If lookup is racing with unlink return an error immediately. | 187 | * If lookup is racing with unlink return an error immediately. |
185 | */ | 188 | */ |
186 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 189 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
187 | error = ENOENT; | 190 | error = -ENOENT; |
188 | goto out_error; | 191 | goto out_error; |
189 | } | 192 | } |
190 | 193 | ||
@@ -206,7 +209,7 @@ xfs_iget_cache_hit( | |||
206 | spin_unlock(&ip->i_flags_lock); | 209 | spin_unlock(&ip->i_flags_lock); |
207 | rcu_read_unlock(); | 210 | rcu_read_unlock(); |
208 | 211 | ||
209 | error = -inode_init_always(mp->m_super, inode); | 212 | error = inode_init_always(mp->m_super, inode); |
210 | if (error) { | 213 | if (error) { |
211 | /* | 214 | /* |
212 | * Re-initializing the inode failed, and we are in deep | 215 | * Re-initializing the inode failed, and we are in deep |
@@ -243,7 +246,7 @@ xfs_iget_cache_hit( | |||
243 | /* If the VFS inode is being torn down, pause and try again. */ | 246 | /* If the VFS inode is being torn down, pause and try again. */ |
244 | if (!igrab(inode)) { | 247 | if (!igrab(inode)) { |
245 | trace_xfs_iget_skip(ip); | 248 | trace_xfs_iget_skip(ip); |
246 | error = EAGAIN; | 249 | error = -EAGAIN; |
247 | goto out_error; | 250 | goto out_error; |
248 | } | 251 | } |
249 | 252 | ||
@@ -285,7 +288,7 @@ xfs_iget_cache_miss( | |||
285 | 288 | ||
286 | ip = xfs_inode_alloc(mp, ino); | 289 | ip = xfs_inode_alloc(mp, ino); |
287 | if (!ip) | 290 | if (!ip) |
288 | return ENOMEM; | 291 | return -ENOMEM; |
289 | 292 | ||
290 | error = xfs_iread(mp, tp, ip, flags); | 293 | error = xfs_iread(mp, tp, ip, flags); |
291 | if (error) | 294 | if (error) |
@@ -294,7 +297,7 @@ xfs_iget_cache_miss( | |||
294 | trace_xfs_iget_miss(ip); | 297 | trace_xfs_iget_miss(ip); |
295 | 298 | ||
296 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | 299 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { |
297 | error = ENOENT; | 300 | error = -ENOENT; |
298 | goto out_destroy; | 301 | goto out_destroy; |
299 | } | 302 | } |
300 | 303 | ||
@@ -305,7 +308,7 @@ xfs_iget_cache_miss( | |||
305 | * recurse into the file system. | 308 | * recurse into the file system. |
306 | */ | 309 | */ |
307 | if (radix_tree_preload(GFP_NOFS)) { | 310 | if (radix_tree_preload(GFP_NOFS)) { |
308 | error = EAGAIN; | 311 | error = -EAGAIN; |
309 | goto out_destroy; | 312 | goto out_destroy; |
310 | } | 313 | } |
311 | 314 | ||
@@ -341,7 +344,7 @@ xfs_iget_cache_miss( | |||
341 | if (unlikely(error)) { | 344 | if (unlikely(error)) { |
342 | WARN_ON(error != -EEXIST); | 345 | WARN_ON(error != -EEXIST); |
343 | XFS_STATS_INC(xs_ig_dup); | 346 | XFS_STATS_INC(xs_ig_dup); |
344 | error = EAGAIN; | 347 | error = -EAGAIN; |
345 | goto out_preload_end; | 348 | goto out_preload_end; |
346 | } | 349 | } |
347 | spin_unlock(&pag->pag_ici_lock); | 350 | spin_unlock(&pag->pag_ici_lock); |
@@ -408,7 +411,7 @@ xfs_iget( | |||
408 | 411 | ||
409 | /* reject inode numbers outside existing AGs */ | 412 | /* reject inode numbers outside existing AGs */ |
410 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | 413 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
411 | return EINVAL; | 414 | return -EINVAL; |
412 | 415 | ||
413 | /* get the perag structure and ensure that it's inode capable */ | 416 | /* get the perag structure and ensure that it's inode capable */ |
414 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); | 417 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); |
@@ -445,7 +448,7 @@ again: | |||
445 | return 0; | 448 | return 0; |
446 | 449 | ||
447 | out_error_or_again: | 450 | out_error_or_again: |
448 | if (error == EAGAIN) { | 451 | if (error == -EAGAIN) { |
449 | delay(1); | 452 | delay(1); |
450 | goto again; | 453 | goto again; |
451 | } | 454 | } |
@@ -489,18 +492,18 @@ xfs_inode_ag_walk_grab( | |||
489 | 492 | ||
490 | /* nothing to sync during shutdown */ | 493 | /* nothing to sync during shutdown */ |
491 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 494 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
492 | return EFSCORRUPTED; | 495 | return -EFSCORRUPTED; |
493 | 496 | ||
494 | /* If we can't grab the inode, it must on it's way to reclaim. */ | 497 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
495 | if (!igrab(inode)) | 498 | if (!igrab(inode)) |
496 | return ENOENT; | 499 | return -ENOENT; |
497 | 500 | ||
498 | /* inode is valid */ | 501 | /* inode is valid */ |
499 | return 0; | 502 | return 0; |
500 | 503 | ||
501 | out_unlock_noent: | 504 | out_unlock_noent: |
502 | spin_unlock(&ip->i_flags_lock); | 505 | spin_unlock(&ip->i_flags_lock); |
503 | return ENOENT; | 506 | return -ENOENT; |
504 | } | 507 | } |
505 | 508 | ||
506 | STATIC int | 509 | STATIC int |
@@ -583,16 +586,16 @@ restart: | |||
583 | continue; | 586 | continue; |
584 | error = execute(batch[i], flags, args); | 587 | error = execute(batch[i], flags, args); |
585 | IRELE(batch[i]); | 588 | IRELE(batch[i]); |
586 | if (error == EAGAIN) { | 589 | if (error == -EAGAIN) { |
587 | skipped++; | 590 | skipped++; |
588 | continue; | 591 | continue; |
589 | } | 592 | } |
590 | if (error && last_error != EFSCORRUPTED) | 593 | if (error && last_error != -EFSCORRUPTED) |
591 | last_error = error; | 594 | last_error = error; |
592 | } | 595 | } |
593 | 596 | ||
594 | /* bail out if the filesystem is corrupted. */ | 597 | /* bail out if the filesystem is corrupted. */ |
595 | if (error == EFSCORRUPTED) | 598 | if (error == -EFSCORRUPTED) |
596 | break; | 599 | break; |
597 | 600 | ||
598 | cond_resched(); | 601 | cond_resched(); |
@@ -652,11 +655,11 @@ xfs_inode_ag_iterator( | |||
652 | xfs_perag_put(pag); | 655 | xfs_perag_put(pag); |
653 | if (error) { | 656 | if (error) { |
654 | last_error = error; | 657 | last_error = error; |
655 | if (error == EFSCORRUPTED) | 658 | if (error == -EFSCORRUPTED) |
656 | break; | 659 | break; |
657 | } | 660 | } |
658 | } | 661 | } |
659 | return XFS_ERROR(last_error); | 662 | return last_error; |
660 | } | 663 | } |
661 | 664 | ||
662 | int | 665 | int |
@@ -680,11 +683,11 @@ xfs_inode_ag_iterator_tag( | |||
680 | xfs_perag_put(pag); | 683 | xfs_perag_put(pag); |
681 | if (error) { | 684 | if (error) { |
682 | last_error = error; | 685 | last_error = error; |
683 | if (error == EFSCORRUPTED) | 686 | if (error == -EFSCORRUPTED) |
684 | break; | 687 | break; |
685 | } | 688 | } |
686 | } | 689 | } |
687 | return XFS_ERROR(last_error); | 690 | return last_error; |
688 | } | 691 | } |
689 | 692 | ||
690 | /* | 693 | /* |
@@ -944,7 +947,7 @@ restart: | |||
944 | * see the stale flag set on the inode. | 947 | * see the stale flag set on the inode. |
945 | */ | 948 | */ |
946 | error = xfs_iflush(ip, &bp); | 949 | error = xfs_iflush(ip, &bp); |
947 | if (error == EAGAIN) { | 950 | if (error == -EAGAIN) { |
948 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 951 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
949 | /* backoff longer than in xfs_ifree_cluster */ | 952 | /* backoff longer than in xfs_ifree_cluster */ |
950 | delay(2); | 953 | delay(2); |
@@ -997,7 +1000,7 @@ out: | |||
997 | xfs_iflags_clear(ip, XFS_IRECLAIM); | 1000 | xfs_iflags_clear(ip, XFS_IRECLAIM); |
998 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1001 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
999 | /* | 1002 | /* |
1000 | * We could return EAGAIN here to make reclaim rescan the inode tree in | 1003 | * We could return -EAGAIN here to make reclaim rescan the inode tree in |
1001 | * a short while. However, this just burns CPU time scanning the tree | 1004 | * a short while. However, this just burns CPU time scanning the tree |
1002 | * waiting for IO to complete and the reclaim work never goes back to | 1005 | * waiting for IO to complete and the reclaim work never goes back to |
1003 | * the idle state. Instead, return 0 to let the next scheduled | 1006 | * the idle state. Instead, return 0 to let the next scheduled |
@@ -1100,7 +1103,7 @@ restart: | |||
1100 | if (!batch[i]) | 1103 | if (!batch[i]) |
1101 | continue; | 1104 | continue; |
1102 | error = xfs_reclaim_inode(batch[i], pag, flags); | 1105 | error = xfs_reclaim_inode(batch[i], pag, flags); |
1103 | if (error && last_error != EFSCORRUPTED) | 1106 | if (error && last_error != -EFSCORRUPTED) |
1104 | last_error = error; | 1107 | last_error = error; |
1105 | } | 1108 | } |
1106 | 1109 | ||
@@ -1129,7 +1132,7 @@ restart: | |||
1129 | trylock = 0; | 1132 | trylock = 0; |
1130 | goto restart; | 1133 | goto restart; |
1131 | } | 1134 | } |
1132 | return XFS_ERROR(last_error); | 1135 | return last_error; |
1133 | } | 1136 | } |
1134 | 1137 | ||
1135 | int | 1138 | int |
@@ -1203,6 +1206,30 @@ xfs_inode_match_id( | |||
1203 | return 1; | 1206 | return 1; |
1204 | } | 1207 | } |
1205 | 1208 | ||
1209 | /* | ||
1210 | * A union-based inode filtering algorithm. Process the inode if any of the | ||
1211 | * criteria match. This is for global/internal scans only. | ||
1212 | */ | ||
1213 | STATIC int | ||
1214 | xfs_inode_match_id_union( | ||
1215 | struct xfs_inode *ip, | ||
1216 | struct xfs_eofblocks *eofb) | ||
1217 | { | ||
1218 | if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && | ||
1219 | uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) | ||
1220 | return 1; | ||
1221 | |||
1222 | if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && | ||
1223 | gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) | ||
1224 | return 1; | ||
1225 | |||
1226 | if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && | ||
1227 | xfs_get_projid(ip) == eofb->eof_prid) | ||
1228 | return 1; | ||
1229 | |||
1230 | return 0; | ||
1231 | } | ||
1232 | |||
1206 | STATIC int | 1233 | STATIC int |
1207 | xfs_inode_free_eofblocks( | 1234 | xfs_inode_free_eofblocks( |
1208 | struct xfs_inode *ip, | 1235 | struct xfs_inode *ip, |
@@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks( | |||
1211 | { | 1238 | { |
1212 | int ret; | 1239 | int ret; |
1213 | struct xfs_eofblocks *eofb = args; | 1240 | struct xfs_eofblocks *eofb = args; |
1241 | bool need_iolock = true; | ||
1242 | int match; | ||
1243 | |||
1244 | ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); | ||
1214 | 1245 | ||
1215 | if (!xfs_can_free_eofblocks(ip, false)) { | 1246 | if (!xfs_can_free_eofblocks(ip, false)) { |
1216 | /* inode could be preallocated or append-only */ | 1247 | /* inode could be preallocated or append-only */ |
@@ -1228,19 +1259,31 @@ xfs_inode_free_eofblocks( | |||
1228 | return 0; | 1259 | return 0; |
1229 | 1260 | ||
1230 | if (eofb) { | 1261 | if (eofb) { |
1231 | if (!xfs_inode_match_id(ip, eofb)) | 1262 | if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) |
1263 | match = xfs_inode_match_id_union(ip, eofb); | ||
1264 | else | ||
1265 | match = xfs_inode_match_id(ip, eofb); | ||
1266 | if (!match) | ||
1232 | return 0; | 1267 | return 0; |
1233 | 1268 | ||
1234 | /* skip the inode if the file size is too small */ | 1269 | /* skip the inode if the file size is too small */ |
1235 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && | 1270 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && |
1236 | XFS_ISIZE(ip) < eofb->eof_min_file_size) | 1271 | XFS_ISIZE(ip) < eofb->eof_min_file_size) |
1237 | return 0; | 1272 | return 0; |
1273 | |||
1274 | /* | ||
1275 | * A scan owner implies we already hold the iolock. Skip it in | ||
1276 | * xfs_free_eofblocks() to avoid deadlock. This also eliminates | ||
1277 | * the possibility of EAGAIN being returned. | ||
1278 | */ | ||
1279 | if (eofb->eof_scan_owner == ip->i_ino) | ||
1280 | need_iolock = false; | ||
1238 | } | 1281 | } |
1239 | 1282 | ||
1240 | ret = xfs_free_eofblocks(ip->i_mount, ip, true); | 1283 | ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); |
1241 | 1284 | ||
1242 | /* don't revisit the inode if we're not waiting */ | 1285 | /* don't revisit the inode if we're not waiting */ |
1243 | if (ret == EAGAIN && !(flags & SYNC_WAIT)) | 1286 | if (ret == -EAGAIN && !(flags & SYNC_WAIT)) |
1244 | ret = 0; | 1287 | ret = 0; |
1245 | 1288 | ||
1246 | return ret; | 1289 | return ret; |
@@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks( | |||
1260 | eofb, XFS_ICI_EOFBLOCKS_TAG); | 1303 | eofb, XFS_ICI_EOFBLOCKS_TAG); |
1261 | } | 1304 | } |
1262 | 1305 | ||
1306 | /* | ||
1307 | * Run eofblocks scans on the quotas applicable to the inode. For inodes with | ||
1308 | * multiple quotas, we don't know exactly which quota caused an allocation | ||
1309 | * failure. We make a best effort by including each quota under low free space | ||
1310 | * conditions (less than 1% free space) in the scan. | ||
1311 | */ | ||
1312 | int | ||
1313 | xfs_inode_free_quota_eofblocks( | ||
1314 | struct xfs_inode *ip) | ||
1315 | { | ||
1316 | int scan = 0; | ||
1317 | struct xfs_eofblocks eofb = {0}; | ||
1318 | struct xfs_dquot *dq; | ||
1319 | |||
1320 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
1321 | |||
1322 | /* | ||
1323 | * Set the scan owner to avoid a potential livelock. Otherwise, the scan | ||
1324 | * can repeatedly trylock on the inode we're currently processing. We | ||
1325 | * run a sync scan to increase effectiveness and use the union filter to | ||
1326 | * cover all applicable quotas in a single scan. | ||
1327 | */ | ||
1328 | eofb.eof_scan_owner = ip->i_ino; | ||
1329 | eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; | ||
1330 | |||
1331 | if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { | ||
1332 | dq = xfs_inode_dquot(ip, XFS_DQ_USER); | ||
1333 | if (dq && xfs_dquot_lowsp(dq)) { | ||
1334 | eofb.eof_uid = VFS_I(ip)->i_uid; | ||
1335 | eofb.eof_flags |= XFS_EOF_FLAGS_UID; | ||
1336 | scan = 1; | ||
1337 | } | ||
1338 | } | ||
1339 | |||
1340 | if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { | ||
1341 | dq = xfs_inode_dquot(ip, XFS_DQ_GROUP); | ||
1342 | if (dq && xfs_dquot_lowsp(dq)) { | ||
1343 | eofb.eof_gid = VFS_I(ip)->i_gid; | ||
1344 | eofb.eof_flags |= XFS_EOF_FLAGS_GID; | ||
1345 | scan = 1; | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | if (scan) | ||
1350 | xfs_icache_free_eofblocks(ip->i_mount, &eofb); | ||
1351 | |||
1352 | return scan; | ||
1353 | } | ||
1354 | |||
1263 | void | 1355 | void |
1264 | xfs_inode_set_eofblocks_tag( | 1356 | xfs_inode_set_eofblocks_tag( |
1265 | xfs_inode_t *ip) | 1357 | xfs_inode_t *ip) |