diff options
author | Dave Chinner <david@fromorbit.com> | 2014-08-03 23:53:47 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2014-08-03 23:53:47 -0400 |
commit | e0ac6d45bc52065d388b5aa153b58233af25a429 (patch) | |
tree | 05177a935cdf41f54f609da9ed1c68b42ebd154c /fs/xfs | |
parent | 6eee8972cc1799d3dde3f1e699abf286ed3adbaf (diff) | |
parent | f074051ff550f9f1f1a8ab4868277d049a7fd7aa (diff) |
Merge branch 'xfs-quota-eofblocks-scan' into for-next
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_dquot.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_fs.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 96 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 20 |
6 files changed, 148 insertions, 12 deletions
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 68a68f704837..c24c67e22a2a 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h | |||
@@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) | |||
139 | } | 139 | } |
140 | } | 140 | } |
141 | 141 | ||
142 | /* | ||
143 | * Check whether a dquot is under low free space conditions. We assume the quota | ||
144 | * is enabled and enforced. | ||
145 | */ | ||
146 | static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp) | ||
147 | { | ||
148 | int64_t freesp; | ||
149 | |||
150 | freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount; | ||
151 | if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT]) | ||
152 | return true; | ||
153 | |||
154 | return false; | ||
155 | } | ||
156 | |||
142 | #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) | 157 | #define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) |
143 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) | 158 | #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) |
144 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) | 159 | #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 181605da08e4..fcf91a22f5d8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "xfs_trace.h" | 38 | #include "xfs_trace.h" |
39 | #include "xfs_log.h" | 39 | #include "xfs_log.h" |
40 | #include "xfs_dinode.h" | 40 | #include "xfs_dinode.h" |
41 | #include "xfs_icache.h" | ||
41 | 42 | ||
42 | #include <linux/aio.h> | 43 | #include <linux/aio.h> |
43 | #include <linux/dcache.h> | 44 | #include <linux/dcache.h> |
@@ -689,14 +690,28 @@ write_retry: | |||
689 | ret = generic_perform_write(file, from, pos); | 690 | ret = generic_perform_write(file, from, pos); |
690 | if (likely(ret >= 0)) | 691 | if (likely(ret >= 0)) |
691 | iocb->ki_pos = pos + ret; | 692 | iocb->ki_pos = pos + ret; |
693 | |||
692 | /* | 694 | /* |
693 | * If we just got an ENOSPC, try to write back all dirty inodes to | 695 | * If we hit a space limit, try to free up some lingering preallocated |
694 | * convert delalloc space to free up some of the excess reserved | 696 | * space before returning an error. In the case of ENOSPC, first try to |
695 | * metadata space. | 697 | * write back all dirty inodes to free up some of the excess reserved |
698 | * metadata space. This reduces the chances that the eofblocks scan | ||
699 | * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this | ||
700 | * also behaves as a filter to prevent too many eofblocks scans from | ||
701 | * running at the same time. | ||
696 | */ | 702 | */ |
697 | if (ret == -ENOSPC && !enospc) { | 703 | if (ret == -EDQUOT && !enospc) { |
704 | enospc = xfs_inode_free_quota_eofblocks(ip); | ||
705 | if (enospc) | ||
706 | goto write_retry; | ||
707 | } else if (ret == -ENOSPC && !enospc) { | ||
708 | struct xfs_eofblocks eofb = {0}; | ||
709 | |||
698 | enospc = 1; | 710 | enospc = 1; |
699 | xfs_flush_inodes(ip->i_mount); | 711 | xfs_flush_inodes(ip->i_mount); |
712 | eofb.eof_scan_owner = ip->i_ino; /* for locking */ | ||
713 | eofb.eof_flags = XFS_EOF_FLAGS_SYNC; | ||
714 | xfs_icache_free_eofblocks(ip->i_mount, &eofb); | ||
700 | goto write_retry; | 715 | goto write_retry; |
701 | } | 716 | } |
702 | 717 | ||
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index d34703dbcb42..ffdabc687852 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -375,6 +375,9 @@ struct xfs_fs_eofblocks { | |||
375 | #define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ | 375 | #define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ |
376 | #define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ | 376 | #define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ |
377 | #define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ | 377 | #define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ |
378 | #define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm; | ||
379 | * kernel only, not included in | ||
380 | * valid mask */ | ||
378 | #define XFS_EOF_FLAGS_VALID \ | 381 | #define XFS_EOF_FLAGS_VALID \ |
379 | (XFS_EOF_FLAGS_SYNC | \ | 382 | (XFS_EOF_FLAGS_SYNC | \ |
380 | XFS_EOF_FLAGS_UID | \ | 383 | XFS_EOF_FLAGS_UID | \ |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 08ba4c6e1359..981b2cf51985 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -33,6 +33,9 @@ | |||
33 | #include "xfs_trace.h" | 33 | #include "xfs_trace.h" |
34 | #include "xfs_icache.h" | 34 | #include "xfs_icache.h" |
35 | #include "xfs_bmap_util.h" | 35 | #include "xfs_bmap_util.h" |
36 | #include "xfs_quota.h" | ||
37 | #include "xfs_dquot_item.h" | ||
38 | #include "xfs_dquot.h" | ||
36 | 39 | ||
37 | #include <linux/kthread.h> | 40 | #include <linux/kthread.h> |
38 | #include <linux/freezer.h> | 41 | #include <linux/freezer.h> |
@@ -1203,6 +1206,30 @@ xfs_inode_match_id( | |||
1203 | return 1; | 1206 | return 1; |
1204 | } | 1207 | } |
1205 | 1208 | ||
1209 | /* | ||
1210 | * A union-based inode filtering algorithm. Process the inode if any of the | ||
1211 | * criteria match. This is for global/internal scans only. | ||
1212 | */ | ||
1213 | STATIC int | ||
1214 | xfs_inode_match_id_union( | ||
1215 | struct xfs_inode *ip, | ||
1216 | struct xfs_eofblocks *eofb) | ||
1217 | { | ||
1218 | if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) && | ||
1219 | uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid)) | ||
1220 | return 1; | ||
1221 | |||
1222 | if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) && | ||
1223 | gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid)) | ||
1224 | return 1; | ||
1225 | |||
1226 | if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) && | ||
1227 | xfs_get_projid(ip) == eofb->eof_prid) | ||
1228 | return 1; | ||
1229 | |||
1230 | return 0; | ||
1231 | } | ||
1232 | |||
1206 | STATIC int | 1233 | STATIC int |
1207 | xfs_inode_free_eofblocks( | 1234 | xfs_inode_free_eofblocks( |
1208 | struct xfs_inode *ip, | 1235 | struct xfs_inode *ip, |
@@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks( | |||
1211 | { | 1238 | { |
1212 | int ret; | 1239 | int ret; |
1213 | struct xfs_eofblocks *eofb = args; | 1240 | struct xfs_eofblocks *eofb = args; |
1241 | bool need_iolock = true; | ||
1242 | int match; | ||
1243 | |||
1244 | ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); | ||
1214 | 1245 | ||
1215 | if (!xfs_can_free_eofblocks(ip, false)) { | 1246 | if (!xfs_can_free_eofblocks(ip, false)) { |
1216 | /* inode could be preallocated or append-only */ | 1247 | /* inode could be preallocated or append-only */ |
@@ -1228,16 +1259,28 @@ xfs_inode_free_eofblocks( | |||
1228 | return 0; | 1259 | return 0; |
1229 | 1260 | ||
1230 | if (eofb) { | 1261 | if (eofb) { |
1231 | if (!xfs_inode_match_id(ip, eofb)) | 1262 | if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) |
1263 | match = xfs_inode_match_id_union(ip, eofb); | ||
1264 | else | ||
1265 | match = xfs_inode_match_id(ip, eofb); | ||
1266 | if (!match) | ||
1232 | return 0; | 1267 | return 0; |
1233 | 1268 | ||
1234 | /* skip the inode if the file size is too small */ | 1269 | /* skip the inode if the file size is too small */ |
1235 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && | 1270 | if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && |
1236 | XFS_ISIZE(ip) < eofb->eof_min_file_size) | 1271 | XFS_ISIZE(ip) < eofb->eof_min_file_size) |
1237 | return 0; | 1272 | return 0; |
1273 | |||
1274 | /* | ||
1275 | * A scan owner implies we already hold the iolock. Skip it in | ||
1276 | * xfs_free_eofblocks() to avoid deadlock. This also eliminates | ||
1277 | * the possibility of EAGAIN being returned. | ||
1278 | */ | ||
1279 | if (eofb->eof_scan_owner == ip->i_ino) | ||
1280 | need_iolock = false; | ||
1238 | } | 1281 | } |
1239 | 1282 | ||
1240 | ret = xfs_free_eofblocks(ip->i_mount, ip, true); | 1283 | ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); |
1241 | 1284 | ||
1242 | /* don't revisit the inode if we're not waiting */ | 1285 | /* don't revisit the inode if we're not waiting */ |
1243 | if (ret == -EAGAIN && !(flags & SYNC_WAIT)) | 1286 | if (ret == -EAGAIN && !(flags & SYNC_WAIT)) |
@@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks( | |||
1260 | eofb, XFS_ICI_EOFBLOCKS_TAG); | 1303 | eofb, XFS_ICI_EOFBLOCKS_TAG); |
1261 | } | 1304 | } |
1262 | 1305 | ||
1306 | /* | ||
1307 | * Run eofblocks scans on the quotas applicable to the inode. For inodes with | ||
1308 | * multiple quotas, we don't know exactly which quota caused an allocation | ||
1309 | * failure. We make a best effort by including each quota under low free space | ||
1310 | * conditions (less than 1% free space) in the scan. | ||
1311 | */ | ||
1312 | int | ||
1313 | xfs_inode_free_quota_eofblocks( | ||
1314 | struct xfs_inode *ip) | ||
1315 | { | ||
1316 | int scan = 0; | ||
1317 | struct xfs_eofblocks eofb = {0}; | ||
1318 | struct xfs_dquot *dq; | ||
1319 | |||
1320 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
1321 | |||
1322 | /* | ||
1323 | * Set the scan owner to avoid a potential livelock. Otherwise, the scan | ||
1324 | * can repeatedly trylock on the inode we're currently processing. We | ||
1325 | * run a sync scan to increase effectiveness and use the union filter to | ||
1326 | * cover all applicable quotas in a single scan. | ||
1327 | */ | ||
1328 | eofb.eof_scan_owner = ip->i_ino; | ||
1329 | eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC; | ||
1330 | |||
1331 | if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) { | ||
1332 | dq = xfs_inode_dquot(ip, XFS_DQ_USER); | ||
1333 | if (dq && xfs_dquot_lowsp(dq)) { | ||
1334 | eofb.eof_uid = VFS_I(ip)->i_uid; | ||
1335 | eofb.eof_flags |= XFS_EOF_FLAGS_UID; | ||
1336 | scan = 1; | ||
1337 | } | ||
1338 | } | ||
1339 | |||
1340 | if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) { | ||
1341 | dq = xfs_inode_dquot(ip, XFS_DQ_GROUP); | ||
1342 | if (dq && xfs_dquot_lowsp(dq)) { | ||
1343 | eofb.eof_gid = VFS_I(ip)->i_gid; | ||
1344 | eofb.eof_flags |= XFS_EOF_FLAGS_GID; | ||
1345 | scan = 1; | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | if (scan) | ||
1350 | xfs_icache_free_eofblocks(ip->i_mount, &eofb); | ||
1351 | |||
1352 | return scan; | ||
1353 | } | ||
1354 | |||
1263 | void | 1355 | void |
1264 | xfs_inode_set_eofblocks_tag( | 1356 | xfs_inode_set_eofblocks_tag( |
1265 | xfs_inode_t *ip) | 1357 | xfs_inode_t *ip) |
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 6250430d609c..46748b86b12f 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h | |||
@@ -27,6 +27,7 @@ struct xfs_eofblocks { | |||
27 | kgid_t eof_gid; | 27 | kgid_t eof_gid; |
28 | prid_t eof_prid; | 28 | prid_t eof_prid; |
29 | __u64 eof_min_file_size; | 29 | __u64 eof_min_file_size; |
30 | xfs_ino_t eof_scan_owner; | ||
30 | }; | 31 | }; |
31 | 32 | ||
32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 33 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
@@ -57,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | |||
57 | void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); | 58 | void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); |
58 | void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); | 59 | void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); |
59 | int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); | 60 | int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); |
61 | int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip); | ||
60 | void xfs_eofblocks_worker(struct work_struct *); | 62 | void xfs_eofblocks_worker(struct work_struct *); |
61 | 63 | ||
62 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 64 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
@@ -84,6 +86,7 @@ xfs_fs_eofblocks_from_user( | |||
84 | dst->eof_flags = src->eof_flags; | 86 | dst->eof_flags = src->eof_flags; |
85 | dst->eof_prid = src->eof_prid; | 87 | dst->eof_prid = src->eof_prid; |
86 | dst->eof_min_file_size = src->eof_min_file_size; | 88 | dst->eof_min_file_size = src->eof_min_file_size; |
89 | dst->eof_scan_owner = NULLFSINO; | ||
87 | 90 | ||
88 | dst->eof_uid = INVALID_UID; | 91 | dst->eof_uid = INVALID_UID; |
89 | if (src->eof_flags & XFS_EOF_FLAGS_UID) { | 92 | if (src->eof_flags & XFS_EOF_FLAGS_UID) { |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 17400f0f076c..e9c47b6f5e5a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -397,7 +397,8 @@ xfs_quota_calc_throttle( | |||
397 | struct xfs_inode *ip, | 397 | struct xfs_inode *ip, |
398 | int type, | 398 | int type, |
399 | xfs_fsblock_t *qblocks, | 399 | xfs_fsblock_t *qblocks, |
400 | int *qshift) | 400 | int *qshift, |
401 | int64_t *qfreesp) | ||
401 | { | 402 | { |
402 | int64_t freesp; | 403 | int64_t freesp; |
403 | int shift = 0; | 404 | int shift = 0; |
@@ -406,6 +407,7 @@ xfs_quota_calc_throttle( | |||
406 | /* over hi wmark, squash the prealloc completely */ | 407 | /* over hi wmark, squash the prealloc completely */ |
407 | if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { | 408 | if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { |
408 | *qblocks = 0; | 409 | *qblocks = 0; |
410 | *qfreesp = 0; | ||
409 | return; | 411 | return; |
410 | } | 412 | } |
411 | 413 | ||
@@ -418,6 +420,9 @@ xfs_quota_calc_throttle( | |||
418 | shift += 2; | 420 | shift += 2; |
419 | } | 421 | } |
420 | 422 | ||
423 | if (freesp < *qfreesp) | ||
424 | *qfreesp = freesp; | ||
425 | |||
421 | /* only overwrite the throttle values if we are more aggressive */ | 426 | /* only overwrite the throttle values if we are more aggressive */ |
422 | if ((freesp >> shift) < (*qblocks >> *qshift)) { | 427 | if ((freesp >> shift) < (*qblocks >> *qshift)) { |
423 | *qblocks = freesp; | 428 | *qblocks = freesp; |
@@ -476,15 +481,18 @@ xfs_iomap_prealloc_size( | |||
476 | } | 481 | } |
477 | 482 | ||
478 | /* | 483 | /* |
479 | * Check each quota to cap the prealloc size and provide a shift | 484 | * Check each quota to cap the prealloc size, provide a shift value to |
480 | * value to throttle with. | 485 | * throttle with and adjust amount of available space. |
481 | */ | 486 | */ |
482 | if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) | 487 | if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) |
483 | xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); | 488 | xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift, |
489 | &freesp); | ||
484 | if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) | 490 | if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) |
485 | xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); | 491 | xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift, |
492 | &freesp); | ||
486 | if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) | 493 | if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) |
487 | xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); | 494 | xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift, |
495 | &freesp); | ||
488 | 496 | ||
489 | /* | 497 | /* |
490 | * The final prealloc size is set to the minimum of free space available | 498 | * The final prealloc size is set to the minimum of free space available |