aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2012-02-08 07:58:32 -0500
committerSteven Whitehouse <swhiteho@redhat.com>2012-02-28 12:10:21 -0500
commit66fc061bda3526650328b73f69985da3518c4256 (patch)
tree350471fda86ebf60fee5436350d36ce9cedbea75 /fs
parent47ac5537a794fc71f89d51af492a945bd233f70c (diff)
GFS2: FITRIM ioctl support
The FITRIM ioctl provides an alternative way to send discard requests to the underlying device. Using the discard mount option results in every freed block generating a discard request to the block device. This can be slow, since many block devices can only process discard requests of larger sizes, and also such operations can be time consuming. Rather than using the discard mount option, FITRIM allows a sweep of the filesystem on an occasional basis, and also to optionally avoid sending down discard requests for smaller regions. In GFS2 FITRIM will work at resource group granularity. There is a flag for each resource group which keeps track of which resource groups have been trimmed. This flag is reset whenever a deallocation occurs in the resource group, and set whenever a successful FITRIM of that resource group has taken place. This helps to reduce repeated discard requests for the same block ranges, again improving performance. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/rgrp.c164
-rw-r--r--fs/gfs2/rgrp.h10
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/gfs2/xattr.c4
7 files changed, 152 insertions, 36 deletions
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c5fb3597f696..310f2fb6f7ea 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -313,6 +313,8 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
313 return gfs2_get_flags(filp, (u32 __user *)arg); 313 return gfs2_get_flags(filp, (u32 __user *)arg);
314 case FS_IOC_SETFLAGS: 314 case FS_IOC_SETFLAGS:
315 return gfs2_set_flags(filp, (u32 __user *)arg); 315 return gfs2_set_flags(filp, (u32 __user *)arg);
316 case FITRIM:
317 return gfs2_fitrim(filp, (void __user *)arg);
316 } 318 }
317 return -ENOTTY; 319 return -ENOTTY;
318} 320}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 56987460cdae..c98a60ee6dfd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1036,7 +1036,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1036 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1036 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1037 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 1037 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1038 1038
1039 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1039 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1040 if (!rgd) 1040 if (!rgd)
1041 goto out_inodes; 1041 goto out_inodes;
1042 1042
@@ -1255,7 +1255,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1255 * this is the case of the target file already existing 1255 * this is the case of the target file already existing
1256 * so we unlink before doing the rename 1256 * so we unlink before doing the rename
1257 */ 1257 */
1258 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr); 1258 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr, 1);
1259 if (nrgd) 1259 if (nrgd)
1260 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); 1260 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1261 } 1261 }
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8e323c4b7983..fe369bd9e10c 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -76,7 +76,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
76 if (bi->bi_clone == 0) 76 if (bi->bi_clone == 0)
77 return; 77 return;
78 if (sdp->sd_args.ar_discard) 78 if (sdp->sd_args.ar_discard)
79 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi); 79 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
80 memcpy(bi->bi_clone + bi->bi_offset, 80 memcpy(bi->bi_clone + bi->bi_offset,
81 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); 81 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
82 clear_bit(GBF_FULL, &bi->bi_flags); 82 clear_bit(GBF_FULL, &bi->bi_flags);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 49ada95209d0..1446b4e0ac73 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -327,23 +327,31 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
327 * Returns: The resource group, or NULL if not found 327 * Returns: The resource group, or NULL if not found
328 */ 328 */
329 329
330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) 330struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)
331{ 331{
332 struct rb_node **newn; 332 struct rb_node *n, *next;
333 struct gfs2_rgrpd *cur; 333 struct gfs2_rgrpd *cur;
334 334
335 spin_lock(&sdp->sd_rindex_spin); 335 spin_lock(&sdp->sd_rindex_spin);
336 newn = &sdp->sd_rindex_tree.rb_node; 336 n = sdp->sd_rindex_tree.rb_node;
337 while (*newn) { 337 while (n) {
338 cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node); 338 cur = rb_entry(n, struct gfs2_rgrpd, rd_node);
339 next = NULL;
339 if (blk < cur->rd_addr) 340 if (blk < cur->rd_addr)
340 newn = &((*newn)->rb_left); 341 next = n->rb_left;
341 else if (blk >= cur->rd_data0 + cur->rd_data) 342 else if (blk >= cur->rd_data0 + cur->rd_data)
342 newn = &((*newn)->rb_right); 343 next = n->rb_right;
343 else { 344 if (next == NULL) {
344 spin_unlock(&sdp->sd_rindex_spin); 345 spin_unlock(&sdp->sd_rindex_spin);
346 if (exact) {
347 if (blk < cur->rd_addr)
348 return NULL;
349 if (blk >= cur->rd_data0 + cur->rd_data)
350 return NULL;
351 }
345 return cur; 352 return cur;
346 } 353 }
354 n = next;
347 } 355 }
348 spin_unlock(&sdp->sd_rindex_spin); 356 spin_unlock(&sdp->sd_rindex_spin);
349 357
@@ -810,9 +818,9 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
810 818
811} 819}
812 820
813void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 821int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
814 struct buffer_head *bh, 822 struct buffer_head *bh,
815 const struct gfs2_bitmap *bi) 823 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)
816{ 824{
817 struct super_block *sb = sdp->sd_vfs; 825 struct super_block *sb = sdp->sd_vfs;
818 struct block_device *bdev = sb->s_bdev; 826 struct block_device *bdev = sb->s_bdev;
@@ -823,11 +831,19 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
823 sector_t nr_sects = 0; 831 sector_t nr_sects = 0;
824 int rv; 832 int rv;
825 unsigned int x; 833 unsigned int x;
834 u32 trimmed = 0;
835 u8 diff;
826 836
827 for (x = 0; x < bi->bi_len; x++) { 837 for (x = 0; x < bi->bi_len; x++) {
828 const u8 *orig = bh->b_data + bi->bi_offset + x; 838 const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
829 const u8 *clone = bi->bi_clone + bi->bi_offset + x; 839 clone += bi->bi_offset;
830 u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); 840 clone += x;
841 if (bh) {
842 const u8 *orig = bh->b_data + bi->bi_offset + x;
843 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
844 } else {
845 diff = ~(*clone | (*clone >> 1));
846 }
831 diff &= 0x55; 847 diff &= 0x55;
832 if (diff == 0) 848 if (diff == 0)
833 continue; 849 continue;
@@ -838,11 +854,14 @@ void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
838 if (nr_sects == 0) 854 if (nr_sects == 0)
839 goto start_new_extent; 855 goto start_new_extent;
840 if ((start + nr_sects) != blk) { 856 if ((start + nr_sects) != blk) {
841 rv = blkdev_issue_discard(bdev, start, 857 if (nr_sects >= minlen) {
842 nr_sects, GFP_NOFS, 858 rv = blkdev_issue_discard(bdev,
843 0); 859 start, nr_sects,
844 if (rv) 860 GFP_NOFS, 0);
845 goto fail; 861 if (rv)
862 goto fail;
863 trimmed += nr_sects;
864 }
846 nr_sects = 0; 865 nr_sects = 0;
847start_new_extent: 866start_new_extent:
848 start = blk; 867 start = blk;
@@ -853,15 +872,108 @@ start_new_extent:
853 blk += sects_per_blk; 872 blk += sects_per_blk;
854 } 873 }
855 } 874 }
856 if (nr_sects) { 875 if (nr_sects >= minlen) {
857 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); 876 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
858 if (rv) 877 if (rv)
859 goto fail; 878 goto fail;
879 trimmed += nr_sects;
860 } 880 }
861 return; 881 if (ptrimmed)
882 *ptrimmed = trimmed;
883 return 0;
884
862fail: 885fail:
863 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); 886 if (sdp->sd_args.ar_discard)
887 fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
864 sdp->sd_args.ar_discard = 0; 888 sdp->sd_args.ar_discard = 0;
889 return -EIO;
890}
891
892/**
893 * gfs2_fitrim - Generate discard requests for unused bits of the filesystem
894 * @filp: Any file on the filesystem
895 * @argp: Pointer to the arguments (also used to pass result)
896 *
897 * Returns: 0 on success, otherwise error code
898 */
899
900int gfs2_fitrim(struct file *filp, void __user *argp)
901{
902 struct inode *inode = filp->f_dentry->d_inode;
903 struct gfs2_sbd *sdp = GFS2_SB(inode);
904 struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
905 struct buffer_head *bh;
906 struct gfs2_rgrpd *rgd;
907 struct gfs2_rgrpd *rgd_end;
908 struct gfs2_holder gh;
909 struct fstrim_range r;
910 int ret = 0;
911 u64 amt;
912 u64 trimmed = 0;
913 unsigned int x;
914
915 if (!capable(CAP_SYS_ADMIN))
916 return -EPERM;
917
918 if (!blk_queue_discard(q))
919 return -EOPNOTSUPP;
920
921 ret = gfs2_rindex_update(sdp);
922 if (ret)
923 return ret;
924
925 if (argp == NULL) {
926 r.start = 0;
927 r.len = ULLONG_MAX;
928 r.minlen = 0;
929 } else if (copy_from_user(&r, argp, sizeof(r)))
930 return -EFAULT;
931
932 rgd = gfs2_blk2rgrpd(sdp, r.start, 0);
933 rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0);
934
935 while (1) {
936
937 ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
938 if (ret)
939 goto out;
940
941 if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) {
942 /* Trim each bitmap in the rgrp */
943 for (x = 0; x < rgd->rd_length; x++) {
944 struct gfs2_bitmap *bi = rgd->rd_bits + x;
945 ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, r.minlen, &amt);
946 if (ret) {
947 gfs2_glock_dq_uninit(&gh);
948 goto out;
949 }
950 trimmed += amt;
951 }
952
953 /* Mark rgrp as having been trimmed */
954 ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
955 if (ret == 0) {
956 bh = rgd->rd_bits[0].bi_bh;
957 rgd->rd_flags |= GFS2_RGF_TRIMMED;
958 gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
959 gfs2_rgrp_out(rgd, bh->b_data);
960 gfs2_trans_end(sdp);
961 }
962 }
963 gfs2_glock_dq_uninit(&gh);
964
965 if (rgd == rgd_end)
966 break;
967
968 rgd = gfs2_rgrpd_get_next(rgd);
969 }
970
971out:
972 r.len = trimmed << 9;
973 if (argp && copy_to_user(argp, &r, sizeof(r)))
974 return -EFAULT;
975
976 return ret;
865} 977}
866 978
867/** 979/**
@@ -1008,7 +1120,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1008 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1120 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
1009 rgd = begin = ip->i_rgd; 1121 rgd = begin = ip->i_rgd;
1010 else 1122 else
1011 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal); 1123 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1012 1124
1013 if (rgd == NULL) 1125 if (rgd == NULL)
1014 return -EBADSLT; 1126 return -EBADSLT;
@@ -1293,7 +1405,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1293 u32 length, rgrp_blk, buf_blk; 1405 u32 length, rgrp_blk, buf_blk;
1294 unsigned int buf; 1406 unsigned int buf;
1295 1407
1296 rgd = gfs2_blk2rgrpd(sdp, bstart); 1408 rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
1297 if (!rgd) { 1409 if (!rgd) {
1298 if (gfs2_consist(sdp)) 1410 if (gfs2_consist(sdp))
1299 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart); 1411 fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
@@ -1474,7 +1586,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1474 return; 1586 return;
1475 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); 1587 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
1476 rgd->rd_free += blen; 1588 rgd->rd_free += blen;
1477 1589 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
1478 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1590 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1479 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1591 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1480 1592
@@ -1567,7 +1679,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1567 return error; 1679 return error;
1568 1680
1569 error = -EINVAL; 1681 error = -EINVAL;
1570 rgd = gfs2_blk2rgrpd(sdp, no_addr); 1682 rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
1571 if (!rgd) 1683 if (!rgd)
1572 goto fail; 1684 goto fail;
1573 1685
@@ -1610,7 +1722,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
1610 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) 1722 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
1611 rgd = ip->i_rgd; 1723 rgd = ip->i_rgd;
1612 else 1724 else
1613 rgd = gfs2_blk2rgrpd(sdp, block); 1725 rgd = gfs2_blk2rgrpd(sdp, block, 1);
1614 if (!rgd) { 1726 if (!rgd) {
1615 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block); 1727 fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
1616 return; 1728 return;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index ceec9106cdf4..b4b10f4de25f 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -11,6 +11,7 @@
11#define __RGRP_DOT_H__ 11#define __RGRP_DOT_H__
12 12
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/uaccess.h>
14 15
15struct gfs2_rgrpd; 16struct gfs2_rgrpd;
16struct gfs2_sbd; 17struct gfs2_sbd;
@@ -18,7 +19,7 @@ struct gfs2_holder;
18 19
19extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); 20extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
20 21
21extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); 22extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact);
22extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); 23extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
23extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); 24extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
24 25
@@ -62,8 +63,9 @@ extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
62extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); 63extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
63extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); 64extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
64extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); 65extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
65extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, 66extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
66 struct buffer_head *bh, 67 struct buffer_head *bh,
67 const struct gfs2_bitmap *bi); 68 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
69extern int gfs2_fitrim(struct file *filp, void __user *argp);
68 70
69#endif /* __RGRP_DOT_H__ */ 71#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4553ce515f62..f3faf72fa7ae 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1417,7 +1417,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1417 if (error) 1417 if (error)
1418 goto out; 1418 goto out;
1419 1419
1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1421 if (!rgd) { 1421 if (!rgd) {
1422 gfs2_consist_inode(ip); 1422 gfs2_consist_inode(ip);
1423 error = -EIO; 1423 error = -EIO;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index e9636591b5d5..2e5ba425cae7 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -251,7 +251,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
251 if (!blks) 251 if (!blks)
252 return 0; 252 return 0;
253 253
254 rgd = gfs2_blk2rgrpd(sdp, bn); 254 rgd = gfs2_blk2rgrpd(sdp, bn, 1);
255 if (!rgd) { 255 if (!rgd) {
256 gfs2_consist_inode(ip); 256 gfs2_consist_inode(ip);
257 return -EIO; 257 return -EIO;
@@ -1439,7 +1439,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1439 struct gfs2_holder gh; 1439 struct gfs2_holder gh;
1440 int error; 1440 int error;
1441 1441
1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); 1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1);
1443 if (!rgd) { 1443 if (!rgd) {
1444 gfs2_consist_inode(ip); 1444 gfs2_consist_inode(ip);
1445 return -EIO; 1445 return -EIO;