aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2014-08-03 23:53:47 -0400
committerDave Chinner <david@fromorbit.com>2014-08-03 23:53:47 -0400
commite0ac6d45bc52065d388b5aa153b58233af25a429 (patch)
tree05177a935cdf41f54f609da9ed1c68b42ebd154c /fs/xfs
parent6eee8972cc1799d3dde3f1e699abf286ed3adbaf (diff)
parentf074051ff550f9f1f1a8ab4868277d049a7fd7aa (diff)
Merge branch 'xfs-quota-eofblocks-scan' into for-next
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_dquot.h15
-rw-r--r--fs/xfs/xfs_file.c23
-rw-r--r--fs/xfs/xfs_fs.h3
-rw-r--r--fs/xfs/xfs_icache.c96
-rw-r--r--fs/xfs/xfs_icache.h3
-rw-r--r--fs/xfs/xfs_iomap.c20
6 files changed, 148 insertions, 12 deletions
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 68a68f704837..c24c67e22a2a 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
139 } 139 }
140} 140}
141 141
142/*
143 * Check whether a dquot is under low free space conditions. We assume the quota
144 * is enabled and enforced.
145 */
146static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
147{
148 int64_t freesp;
149
150 freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount;
151 if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
152 return true;
153
154 return false;
155}
156
142#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 157#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
143#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 158#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
144#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 159#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 181605da08e4..fcf91a22f5d8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,6 +38,7 @@
38#include "xfs_trace.h" 38#include "xfs_trace.h"
39#include "xfs_log.h" 39#include "xfs_log.h"
40#include "xfs_dinode.h" 40#include "xfs_dinode.h"
41#include "xfs_icache.h"
41 42
42#include <linux/aio.h> 43#include <linux/aio.h>
43#include <linux/dcache.h> 44#include <linux/dcache.h>
@@ -689,14 +690,28 @@ write_retry:
689 ret = generic_perform_write(file, from, pos); 690 ret = generic_perform_write(file, from, pos);
690 if (likely(ret >= 0)) 691 if (likely(ret >= 0))
691 iocb->ki_pos = pos + ret; 692 iocb->ki_pos = pos + ret;
693
692 /* 694 /*
693 * If we just got an ENOSPC, try to write back all dirty inodes to 695 * If we hit a space limit, try to free up some lingering preallocated
694 * convert delalloc space to free up some of the excess reserved 696 * space before returning an error. In the case of ENOSPC, first try to
695 * metadata space. 697 * write back all dirty inodes to free up some of the excess reserved
698 * metadata space. This reduces the chances that the eofblocks scan
699 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
700 * also behaves as a filter to prevent too many eofblocks scans from
701 * running at the same time.
696 */ 702 */
697 if (ret == -ENOSPC && !enospc) { 703 if (ret == -EDQUOT && !enospc) {
704 enospc = xfs_inode_free_quota_eofblocks(ip);
705 if (enospc)
706 goto write_retry;
707 } else if (ret == -ENOSPC && !enospc) {
708 struct xfs_eofblocks eofb = {0};
709
698 enospc = 1; 710 enospc = 1;
699 xfs_flush_inodes(ip->i_mount); 711 xfs_flush_inodes(ip->i_mount);
712 eofb.eof_scan_owner = ip->i_ino; /* for locking */
713 eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
714 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
700 goto write_retry; 715 goto write_retry;
701 } 716 }
702 717
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index d34703dbcb42..ffdabc687852 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -375,6 +375,9 @@ struct xfs_fs_eofblocks {
375#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */ 375#define XFS_EOF_FLAGS_GID (1 << 2) /* filter by gid */
376#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */ 376#define XFS_EOF_FLAGS_PRID (1 << 3) /* filter by project id */
377#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */ 377#define XFS_EOF_FLAGS_MINFILESIZE (1 << 4) /* filter by min file size */
378#define XFS_EOF_FLAGS_UNION (1 << 5) /* union filter algorithm;
379 * kernel only, not included in
380 * valid mask */
378#define XFS_EOF_FLAGS_VALID \ 381#define XFS_EOF_FLAGS_VALID \
379 (XFS_EOF_FLAGS_SYNC | \ 382 (XFS_EOF_FLAGS_SYNC | \
380 XFS_EOF_FLAGS_UID | \ 383 XFS_EOF_FLAGS_UID | \
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 08ba4c6e1359..981b2cf51985 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,6 +33,9 @@
33#include "xfs_trace.h" 33#include "xfs_trace.h"
34#include "xfs_icache.h" 34#include "xfs_icache.h"
35#include "xfs_bmap_util.h" 35#include "xfs_bmap_util.h"
36#include "xfs_quota.h"
37#include "xfs_dquot_item.h"
38#include "xfs_dquot.h"
36 39
37#include <linux/kthread.h> 40#include <linux/kthread.h>
38#include <linux/freezer.h> 41#include <linux/freezer.h>
@@ -1203,6 +1206,30 @@ xfs_inode_match_id(
1203 return 1; 1206 return 1;
1204} 1207}
1205 1208
1209/*
1210 * A union-based inode filtering algorithm. Process the inode if any of the
1211 * criteria match. This is for global/internal scans only.
1212 */
1213STATIC int
1214xfs_inode_match_id_union(
1215 struct xfs_inode *ip,
1216 struct xfs_eofblocks *eofb)
1217{
1218 if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
1219 uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
1220 return 1;
1221
1222 if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
1223 gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
1224 return 1;
1225
1226 if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
1227 xfs_get_projid(ip) == eofb->eof_prid)
1228 return 1;
1229
1230 return 0;
1231}
1232
1206STATIC int 1233STATIC int
1207xfs_inode_free_eofblocks( 1234xfs_inode_free_eofblocks(
1208 struct xfs_inode *ip, 1235 struct xfs_inode *ip,
@@ -1211,6 +1238,10 @@ xfs_inode_free_eofblocks(
1211{ 1238{
1212 int ret; 1239 int ret;
1213 struct xfs_eofblocks *eofb = args; 1240 struct xfs_eofblocks *eofb = args;
1241 bool need_iolock = true;
1242 int match;
1243
1244 ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
1214 1245
1215 if (!xfs_can_free_eofblocks(ip, false)) { 1246 if (!xfs_can_free_eofblocks(ip, false)) {
1216 /* inode could be preallocated or append-only */ 1247 /* inode could be preallocated or append-only */
@@ -1228,16 +1259,28 @@ xfs_inode_free_eofblocks(
1228 return 0; 1259 return 0;
1229 1260
1230 if (eofb) { 1261 if (eofb) {
1231 if (!xfs_inode_match_id(ip, eofb)) 1262 if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
1263 match = xfs_inode_match_id_union(ip, eofb);
1264 else
1265 match = xfs_inode_match_id(ip, eofb);
1266 if (!match)
1232 return 0; 1267 return 0;
1233 1268
1234 /* skip the inode if the file size is too small */ 1269 /* skip the inode if the file size is too small */
1235 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && 1270 if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
1236 XFS_ISIZE(ip) < eofb->eof_min_file_size) 1271 XFS_ISIZE(ip) < eofb->eof_min_file_size)
1237 return 0; 1272 return 0;
1273
1274 /*
1275 * A scan owner implies we already hold the iolock. Skip it in
1276 * xfs_free_eofblocks() to avoid deadlock. This also eliminates
1277 * the possibility of EAGAIN being returned.
1278 */
1279 if (eofb->eof_scan_owner == ip->i_ino)
1280 need_iolock = false;
1238 } 1281 }
1239 1282
1240 ret = xfs_free_eofblocks(ip->i_mount, ip, true); 1283 ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
1241 1284
1242 /* don't revisit the inode if we're not waiting */ 1285 /* don't revisit the inode if we're not waiting */
1243 if (ret == -EAGAIN && !(flags & SYNC_WAIT)) 1286 if (ret == -EAGAIN && !(flags & SYNC_WAIT))
@@ -1260,6 +1303,55 @@ xfs_icache_free_eofblocks(
1260 eofb, XFS_ICI_EOFBLOCKS_TAG); 1303 eofb, XFS_ICI_EOFBLOCKS_TAG);
1261} 1304}
1262 1305
1306/*
1307 * Run eofblocks scans on the quotas applicable to the inode. For inodes with
1308 * multiple quotas, we don't know exactly which quota caused an allocation
1309 * failure. We make a best effort by including each quota under low free space
1310 * conditions (less than 1% free space) in the scan.
1311 */
1312int
1313xfs_inode_free_quota_eofblocks(
1314 struct xfs_inode *ip)
1315{
1316 int scan = 0;
1317 struct xfs_eofblocks eofb = {0};
1318 struct xfs_dquot *dq;
1319
1320 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1321
1322 /*
1323 * Set the scan owner to avoid a potential livelock. Otherwise, the scan
1324 * can repeatedly trylock on the inode we're currently processing. We
1325 * run a sync scan to increase effectiveness and use the union filter to
1326 * cover all applicable quotas in a single scan.
1327 */
1328 eofb.eof_scan_owner = ip->i_ino;
1329 eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
1330
1331 if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
1332 dq = xfs_inode_dquot(ip, XFS_DQ_USER);
1333 if (dq && xfs_dquot_lowsp(dq)) {
1334 eofb.eof_uid = VFS_I(ip)->i_uid;
1335 eofb.eof_flags |= XFS_EOF_FLAGS_UID;
1336 scan = 1;
1337 }
1338 }
1339
1340 if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
1341 dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
1342 if (dq && xfs_dquot_lowsp(dq)) {
1343 eofb.eof_gid = VFS_I(ip)->i_gid;
1344 eofb.eof_flags |= XFS_EOF_FLAGS_GID;
1345 scan = 1;
1346 }
1347 }
1348
1349 if (scan)
1350 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
1351
1352 return scan;
1353}
1354
1263void 1355void
1264xfs_inode_set_eofblocks_tag( 1356xfs_inode_set_eofblocks_tag(
1265 xfs_inode_t *ip) 1357 xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 6250430d609c..46748b86b12f 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,6 +27,7 @@ struct xfs_eofblocks {
27 kgid_t eof_gid; 27 kgid_t eof_gid;
28 prid_t eof_prid; 28 prid_t eof_prid;
29 __u64 eof_min_file_size; 29 __u64 eof_min_file_size;
30 xfs_ino_t eof_scan_owner;
30}; 31};
31 32
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 33#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -57,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
57void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); 58void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
58void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); 59void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
59int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); 60int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
61int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip);
60void xfs_eofblocks_worker(struct work_struct *); 62void xfs_eofblocks_worker(struct work_struct *);
61 63
62int xfs_inode_ag_iterator(struct xfs_mount *mp, 64int xfs_inode_ag_iterator(struct xfs_mount *mp,
@@ -84,6 +86,7 @@ xfs_fs_eofblocks_from_user(
84 dst->eof_flags = src->eof_flags; 86 dst->eof_flags = src->eof_flags;
85 dst->eof_prid = src->eof_prid; 87 dst->eof_prid = src->eof_prid;
86 dst->eof_min_file_size = src->eof_min_file_size; 88 dst->eof_min_file_size = src->eof_min_file_size;
89 dst->eof_scan_owner = NULLFSINO;
87 90
88 dst->eof_uid = INVALID_UID; 91 dst->eof_uid = INVALID_UID;
89 if (src->eof_flags & XFS_EOF_FLAGS_UID) { 92 if (src->eof_flags & XFS_EOF_FLAGS_UID) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 17400f0f076c..e9c47b6f5e5a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -397,7 +397,8 @@ xfs_quota_calc_throttle(
397 struct xfs_inode *ip, 397 struct xfs_inode *ip,
398 int type, 398 int type,
399 xfs_fsblock_t *qblocks, 399 xfs_fsblock_t *qblocks,
400 int *qshift) 400 int *qshift,
401 int64_t *qfreesp)
401{ 402{
402 int64_t freesp; 403 int64_t freesp;
403 int shift = 0; 404 int shift = 0;
@@ -406,6 +407,7 @@ xfs_quota_calc_throttle(
406 /* over hi wmark, squash the prealloc completely */ 407 /* over hi wmark, squash the prealloc completely */
407 if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { 408 if (dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
408 *qblocks = 0; 409 *qblocks = 0;
410 *qfreesp = 0;
409 return; 411 return;
410 } 412 }
411 413
@@ -418,6 +420,9 @@ xfs_quota_calc_throttle(
418 shift += 2; 420 shift += 2;
419 } 421 }
420 422
423 if (freesp < *qfreesp)
424 *qfreesp = freesp;
425
421 /* only overwrite the throttle values if we are more aggressive */ 426 /* only overwrite the throttle values if we are more aggressive */
422 if ((freesp >> shift) < (*qblocks >> *qshift)) { 427 if ((freesp >> shift) < (*qblocks >> *qshift)) {
423 *qblocks = freesp; 428 *qblocks = freesp;
@@ -476,15 +481,18 @@ xfs_iomap_prealloc_size(
476 } 481 }
477 482
478 /* 483 /*
479 * Check each quota to cap the prealloc size and provide a shift 484 * Check each quota to cap the prealloc size, provide a shift value to
480 * value to throttle with. 485 * throttle with and adjust amount of available space.
481 */ 486 */
482 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) 487 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
483 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift); 488 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
489 &freesp);
484 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) 490 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
485 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift); 491 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
492 &freesp);
486 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) 493 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
487 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift); 494 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
495 &freesp);
488 496
489 /* 497 /*
490 * The final prealloc size is set to the minimum of free space available 498 * The final prealloc size is set to the minimum of free space available