aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_icache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_icache.c')
-rw-r--r--fs/xfs/xfs_icache.c148
1 files changed, 120 insertions, 28 deletions
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index c48df5f25b9f..981b2cf51985 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,6 +33,9 @@
33#include "xfs_trace.h" 33#include "xfs_trace.h"
34#include "xfs_icache.h" 34#include "xfs_icache.h"
35#include "xfs_bmap_util.h" 35#include "xfs_bmap_util.h"
36#include "xfs_quota.h"
37#include "xfs_dquot_item.h"
38#include "xfs_dquot.h"
36 39
37#include <linux/kthread.h> 40#include <linux/kthread.h>
38#include <linux/freezer.h> 41#include <linux/freezer.h>
@@ -158,7 +161,7 @@ xfs_iget_cache_hit(
158 if (ip->i_ino != ino) { 161 if (ip->i_ino != ino) {
159 trace_xfs_iget_skip(ip); 162 trace_xfs_iget_skip(ip);
160 XFS_STATS_INC(xs_ig_frecycle); 163 XFS_STATS_INC(xs_ig_frecycle);
161 error = EAGAIN; 164 error = -EAGAIN;
162 goto out_error; 165 goto out_error;
163 } 166 }
164 167
@@ -176,7 +179,7 @@ xfs_iget_cache_hit(
176 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 179 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
177 trace_xfs_iget_skip(ip); 180 trace_xfs_iget_skip(ip);
178 XFS_STATS_INC(xs_ig_frecycle); 181 XFS_STATS_INC(xs_ig_frecycle);
179 error = EAGAIN; 182 error = -EAGAIN;
180 goto out_error; 183 goto out_error;
181 } 184 }
182 185
@@ -184,7 +187,7 @@ xfs_iget_cache_hit(
184 * If lookup is racing with unlink return an error immediately. 187 * If lookup is racing with unlink return an error immediately.
185 */ 188 */
186 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { 189 if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
187 error = ENOENT; 190 error = -ENOENT;
188 goto out_error; 191 goto out_error;
189 } 192 }
190 193
@@ -206,7 +209,7 @@ xfs_iget_cache_hit(
206 spin_unlock(&ip->i_flags_lock); 209 spin_unlock(&ip->i_flags_lock);
207 rcu_read_unlock(); 210 rcu_read_unlock();
208 211
209 error = -inode_init_always(mp->m_super, inode); 212 error = inode_init_always(mp->m_super, inode);
210 if (error) { 213 if (error) {
211 /* 214 /*
212 * Re-initializing the inode failed, and we are in deep 215 * Re-initializing the inode failed, and we are in deep
@@ -243,7 +246,7 @@ xfs_iget_cache_hit(
243 /* If the VFS inode is being torn down, pause and try again. */ 246 /* If the VFS inode is being torn down, pause and try again. */
244 if (!igrab(inode)) { 247 if (!igrab(inode)) {
245 trace_xfs_iget_skip(ip); 248 trace_xfs_iget_skip(ip);
246 error = EAGAIN; 249 error = -EAGAIN;
247 goto out_error; 250 goto out_error;
248 } 251 }
249 252
@@ -285,7 +288,7 @@ xfs_iget_cache_miss(
285 288
286 ip = xfs_inode_alloc(mp, ino); 289 ip = xfs_inode_alloc(mp, ino);
287 if (!ip) 290 if (!ip)
288 return ENOMEM; 291 return -ENOMEM;
289 292
290 error = xfs_iread(mp, tp, ip, flags); 293 error = xfs_iread(mp, tp, ip, flags);
291 if (error) 294 if (error)
@@ -294,7 +297,7 @@ xfs_iget_cache_miss(
294 trace_xfs_iget_miss(ip); 297 trace_xfs_iget_miss(ip);
295 298
296 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { 299 if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
297 error = ENOENT; 300 error = -ENOENT;
298 goto out_destroy; 301 goto out_destroy;
299 } 302 }
300 303
@@ -305,7 +308,7 @@ xfs_iget_cache_miss(
305 * recurse into the file system. 308 * recurse into the file system.
306 */ 309 */
307 if (radix_tree_preload(GFP_NOFS)) { 310 if (radix_tree_preload(GFP_NOFS)) {
308 error = EAGAIN; 311 error = -EAGAIN;
309 goto out_destroy; 312 goto out_destroy;
310 } 313 }
311 314
@@ -341,7 +344,7 @@ xfs_iget_cache_miss(
341 if (unlikely(error)) { 344 if (unlikely(error)) {
342 WARN_ON(error != -EEXIST); 345 WARN_ON(error != -EEXIST);
343 XFS_STATS_INC(xs_ig_dup); 346 XFS_STATS_INC(xs_ig_dup);
344 error = EAGAIN; 347 error = -EAGAIN;
345 goto out_preload_end; 348 goto out_preload_end;
346 } 349 }
347 spin_unlock(&pag->pag_ici_lock); 350 spin_unlock(&pag->pag_ici_lock);
@@ -408,7 +411,7 @@ xfs_iget(
408 411
409 /* reject inode numbers outside existing AGs */ 412 /* reject inode numbers outside existing AGs */
410 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 413 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
411 return EINVAL; 414 return -EINVAL;
412 415
413 /* get the perag structure and ensure that it's inode capable */ 416 /* get the perag structure and ensure that it's inode capable */
414 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 417 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
@@ -445,7 +448,7 @@ again:
445 return 0; 448 return 0;
446 449
447out_error_or_again: 450out_error_or_again:
448 if (error == EAGAIN) { 451 if (error == -EAGAIN) {
449 delay(1); 452 delay(1);
450 goto again; 453 goto again;
451 } 454 }
@@ -489,18 +492,18 @@ xfs_inode_ag_walk_grab(
489 492
490 /* nothing to sync during shutdown */ 493 /* nothing to sync during shutdown */
491 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 494 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
492 return EFSCORRUPTED; 495 return -EFSCORRUPTED;
493 496
494 /* If we can't grab the inode, it must on it's way to reclaim. */ 497 /* If we can't grab the inode, it must on it's way to reclaim. */
495 if (!igrab(inode)) 498 if (!igrab(inode))
496 return ENOENT; 499 return -ENOENT;
497 500
498 /* inode is valid */ 501 /* inode is valid */
499 return 0; 502 return 0;
500 503
501out_unlock_noent: 504out_unlock_noent:
502 spin_unlock(&ip->i_flags_lock); 505 spin_unlock(&ip->i_flags_lock);
503 return ENOENT; 506 return -ENOENT;
504} 507}
505 508
506STATIC int 509STATIC int
@@ -583,16 +586,16 @@ restart:
583 continue; 586 continue;
584 error = execute(batch[i], flags, args); 587 error = execute(batch[i], flags, args);
585 IRELE(batch[i]); 588 IRELE(batch[i]);
586 if (error == EAGAIN) { 589 if (error == -EAGAIN) {
587 skipped++; 590 skipped++;
588 continue; 591 continue;
589 } 592 }
590 if (error && last_error != EFSCORRUPTED) 593 if (error && last_error != -EFSCORRUPTED)
591 last_error = error; 594 last_error = error;
592 } 595 }
593 596
594 /* bail out if the filesystem is corrupted. */ 597 /* bail out if the filesystem is corrupted. */
595 if (error == EFSCORRUPTED) 598 if (error == -EFSCORRUPTED)
596 break; 599 break;
597 600
598 cond_resched(); 601 cond_resched();
@@ -652,11 +655,11 @@ xfs_inode_ag_iterator(
652 xfs_perag_put(pag); 655 xfs_perag_put(pag);
653 if (error) { 656 if (error) {
654 last_error = error; 657 last_error = error;
655 if (error == EFSCORRUPTED) 658 if (error == -EFSCORRUPTED)
656 break; 659 break;
657 } 660 }
658 } 661 }
659 return XFS_ERROR(last_error); 662 return last_error;
660} 663}
661 664
662int 665int
@@ -680,11 +683,11 @@ xfs_inode_ag_iterator_tag(
680 xfs_perag_put(pag); 683 xfs_perag_put(pag);
681 if (error) { 684 if (error) {
682 last_error = error; 685 last_error = error;
683 if (error == EFSCORRUPTED) 686 if (error == -EFSCORRUPTED)
684 break; 687 break;
685 } 688 }
686 } 689 }
687 return XFS_ERROR(last_error); 690 return last_error;
688} 691}
689 692
690/* 693/*
@@ -944,7 +947,7 @@ restart:
944 * see the stale flag set on the inode. 947 * see the stale flag set on the inode.
945 */ 948 */
946 error = xfs_iflush(ip, &bp); 949 error = xfs_iflush(ip, &bp);
947 if (error == EAGAIN) { 950 if (error == -EAGAIN) {
948 xfs_iunlock(ip, XFS_ILOCK_EXCL); 951 xfs_iunlock(ip, XFS_ILOCK_EXCL);
949 /* backoff longer than in xfs_ifree_cluster */ 952 /* backoff longer than in xfs_ifree_cluster */
950 delay(2); 953 delay(2);
@@ -997,7 +1000,7 @@ out:
997 xfs_iflags_clear(ip, XFS_IRECLAIM); 1000 xfs_iflags_clear(ip, XFS_IRECLAIM);
998 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1001 xfs_iunlock(ip, XFS_ILOCK_EXCL);
999 /* 1002 /*
1000 * We could return EAGAIN here to make reclaim rescan the inode tree in 1003 * We could return -EAGAIN here to make reclaim rescan the inode tree in
1001 * a short while. However, this just burns CPU time scanning the tree 1004 * a short while. However, this just burns CPU time scanning the tree
1002 * waiting for IO to complete and the reclaim work never goes back to 1005 * waiting for IO to complete and the reclaim work never goes back to
1003 * the idle state. Instead, return 0 to let the next scheduled 1006 * the idle state. Instead, return 0 to let the next scheduled
@@ -1100,7 +1103,7 @@ restart:
1100 if (!batch[i]) 1103 if (!batch[i])
1101 continue; 1104 continue;
1102 error = xfs_reclaim_inode(batch[i], pag, flags); 1105 error = xfs_reclaim_inode(batch[i], pag, flags);
1103 if (error && last_error != EFSCORRUPTED) 1106 if (error && last_error != -EFSCORRUPTED)
1104 last_error = error; 1107 last_error = error;
1105 } 1108 }
1106 1109
@@ -1129,7 +1132,7 @@ restart:
1129 trylock = 0; 1132 trylock = 0;
1130 goto restart; 1133 goto restart;
1131 } 1134 }
1132 return XFS_ERROR(last_error); 1135 return last_error;
1133} 1136}
1134 1137
1135int 1138int
@@ -1203,6 +1206,30 @@ xfs_inode_match_id(
1203 return 1; 1206 return 1;
1204} 1207}
1205 1208
1209/*
1210 * A union-based inode filtering algorithm. Process the inode if any of the
1211 * criteria match. This is for global/internal scans only.
1212 */
1213STATIC int
1214xfs_inode_match_id_union(
1215 struct xfs_inode *ip,
1216 struct xfs_eofblocks *eofb)
1217{
1218 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
1219 uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
1220 return 1;
1221
1222 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
1223 gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
1224 return 1;
1225
1226 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
1227 xfs_get_projid(ip) == eofb->eof_prid)
1228 return 1;
1229
1230 return 0;
1231}
1232
1206STATIC int 1233STATIC int
1207xfs_inode_free_eofblocks( 1234xfs_inode_free_eofblocks(
1208 struct xfs_inode *ip, 1235 struct xfs_inode *ip,
@@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks(
1211{ 1238{
1212 int ret; 1239 int ret;
1213 struct xfs_eofblocks *eofb = args; 1240 struct xfs_eofblocks *eofb = args;
1241 bool need_iolock = true;
1242 int match;
1243
1244 ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
1214 1245
1215 if (!xfs_can_free_eofblocks(ip, false)) { 1246 if (!xfs_can_free_eofblocks(ip, false)) {
1216 /* inode could be preallocated or append-only */ 1247 /* inode could be preallocated or append-only */
@@ -1228,19 +1259,31 @@ xfs_inode_free_eofblocks(
1228 return 0; 1259 return 0;
1229 1260
1230 if (eofb) { 1261 if (eofb) {
1231 if (!xfs_inode_match_id(ip, eofb)) 1262 if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
1263 match = xfs_inode_match_id_union(ip, eofb);
1264 else
1265 match = xfs_inode_match_id(ip, eofb);
1266 if (!match)
1232 return 0; 1267 return 0;
1233 1268
1234 /* skip the inode if the file size is too small */ 1269 /* skip the inode if the file size is too small */
1235 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && 1270 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
1236 XFS_ISIZE(ip) < eofb->eof_min_file_size) 1271 XFS_ISIZE(ip) < eofb->eof_min_file_size)
1237 return 0; 1272 return 0;
1273
1274 /*
1275 * A scan owner implies we already hold the iolock. Skip it in
1276 * xfs_free_eofblocks() to avoid deadlock. This also eliminates
1277 * the possibility of EAGAIN being returned.
1278 */
1279 if (eofb->eof_scan_owner == ip->i_ino)
1280 need_iolock = false;
1238 } 1281 }
1239 1282
1240 ret = xfs_free_eofblocks(ip->i_mount, ip, true); 1283 ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
1241 1284
1242 /* don't revisit the inode if we're not waiting */ 1285 /* don't revisit the inode if we're not waiting */
1243 if (ret == EAGAIN && !(flags & SYNC_WAIT)) 1286 if (ret == -EAGAIN && !(flags & SYNC_WAIT))
1244 ret = 0; 1287 ret = 0;
1245 1288
1246 return ret; 1289 return ret;
@@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks(
1260 eofb, XFS_ICI_EOFBLOCKS_TAG); 1303 eofb, XFS_ICI_EOFBLOCKS_TAG);
1261} 1304}
1262 1305
1306/*
1307 * Run eofblocks scans on the quotas applicable to the inode. For inodes with
1308 * multiple quotas, we don't know exactly which quota caused an allocation
1309 * failure. We make a best effort by including each quota under low free space
1310 * conditions (less than 1% free space) in the scan.
1311 */
1312int
1313xfs_inode_free_quota_eofblocks(
1314 struct xfs_inode *ip)
1315{
1316 int scan = 0;
1317 struct xfs_eofblocks eofb = {0};
1318 struct xfs_dquot *dq;
1319
1320 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1321
1322 /*
1323 * Set the scan owner to avoid a potential livelock. Otherwise, the scan
1324 * can repeatedly trylock on the inode we're currently processing. We
1325 * run a sync scan to increase effectiveness and use the union filter to
1326 * cover all applicable quotas in a single scan.
1327 */
1328 eofb.eof_scan_owner = ip->i_ino;
1329 eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
1330
1331 if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
1332 dq = xfs_inode_dquot(ip, XFS_DQ_USER);
1333 if (dq && xfs_dquot_lowsp(dq)) {
1334 eofb.eof_uid = VFS_I(ip)->i_uid;
1335 eofb.eof_flags |= XFS_EOF_FLAGS_UID;
1336 scan = 1;
1337 }
1338 }
1339
1340 if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
1341 dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
1342 if (dq && xfs_dquot_lowsp(dq)) {
1343 eofb.eof_gid = VFS_I(ip)->i_gid;
1344 eofb.eof_flags |= XFS_EOF_FLAGS_GID;
1345 scan = 1;
1346 }
1347 }
1348
1349 if (scan)
1350 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
1351
1352 return scan;
1353}
1354
1263void 1355void
1264xfs_inode_set_eofblocks_tag( 1356xfs_inode_set_eofblocks_tag(
1265 xfs_inode_t *ip) 1357 xfs_inode_t *ip)