aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2012-03-22 01:15:12 -0400
committerBen Myers <bpm@sgi.com>2012-03-27 17:07:03 -0400
commita66d636385d621e98a915233250356c394a437de (patch)
tree9dda534c10fed016503f77a5f7ce893f9c4db7f7 /fs/xfs
parent3948659e30808fbaa7673bbe89de2ae9769e20a7 (diff)
xfs: fix fstrim offset calculations
xfs_ioc_fstrim() doesn't treat the incoming offset and length correctly. It treats them as a filesystem block address, rather than a disk address. This is wrong because the range passed in is a linear representation, while the filesystem block address notation is a sparse representation. Hence we cannot convert the range direct to filesystem block units and then use that for calculating the range to trim. While this sounds dangerous, the problem is limited to calculating what AGs need to be trimmed. The code that calcuates the actual ranges to trim gets the right result (i.e. only ever discards free space), even though it uses the wrong ranges to limit what is trimmed. Hence this is not a bug that endangers user data. Fix this by treating the range as a disk address range and use the appropriate functions to convert the range into the desired formats for calculations. Further, fix the first free extent lookup (the longest) to actually find the largest free extent. Currently this lookup uses a <= lookup, which results in finding the extent to the left of the largest because we can never get an exact match on the largest extent. This is due to the fact that while we know it's size, we don't know it's location and so the exact match fails and we move one record to the left to get the next largest extent. Instead, use a >= search so that the lookup returns the largest extent regardless of the fact we don't get an exact match on it. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_alloc.h7
-rw-r--r--fs/xfs/xfs_discard.c61
3 files changed, 46 insertions, 24 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 31e90335b83d..0f0df2759b09 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -69,7 +69,7 @@ xfs_alloc_lookup_eq(
69 * Lookup the first record greater than or equal to [bno, len] 69 * Lookup the first record greater than or equal to [bno, len]
70 * in the btree given by cur. 70 * in the btree given by cur.
71 */ 71 */
72STATIC int /* error */ 72int /* error */
73xfs_alloc_lookup_ge( 73xfs_alloc_lookup_ge(
74 struct xfs_btree_cur *cur, /* btree cursor */ 74 struct xfs_btree_cur *cur, /* btree cursor */
75 xfs_agblock_t bno, /* starting block of extent */ 75 xfs_agblock_t bno, /* starting block of extent */
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index ab5d0fd2f535..3a7e7d8f8ded 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -248,6 +248,13 @@ xfs_alloc_lookup_le(
248 xfs_extlen_t len, /* length of extent */ 248 xfs_extlen_t len, /* length of extent */
249 int *stat); /* success/failure */ 249 int *stat); /* success/failure */
250 250
251int /* error */
252xfs_alloc_lookup_ge(
253 struct xfs_btree_cur *cur, /* btree cursor */
254 xfs_agblock_t bno, /* starting block of extent */
255 xfs_extlen_t len, /* length of extent */
256 int *stat); /* success/failure */
257
251int /* error */ 258int /* error */
252xfs_alloc_get_rec( 259xfs_alloc_get_rec(
253 struct xfs_btree_cur *cur, /* btree cursor */ 260 struct xfs_btree_cur *cur, /* btree cursor */
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 286a051f12cf..1ad3a4b8ca40 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -37,9 +37,9 @@ STATIC int
37xfs_trim_extents( 37xfs_trim_extents(
38 struct xfs_mount *mp, 38 struct xfs_mount *mp,
39 xfs_agnumber_t agno, 39 xfs_agnumber_t agno,
40 xfs_fsblock_t start, 40 xfs_daddr_t start,
41 xfs_fsblock_t end, 41 xfs_daddr_t end,
42 xfs_fsblock_t minlen, 42 xfs_daddr_t minlen,
43 __uint64_t *blocks_trimmed) 43 __uint64_t *blocks_trimmed)
44{ 44{
45 struct block_device *bdev = mp->m_ddev_targp->bt_bdev; 45 struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
@@ -67,7 +67,7 @@ xfs_trim_extents(
67 /* 67 /*
68 * Look up the longest btree in the AGF and start with it. 68 * Look up the longest btree in the AGF and start with it.
69 */ 69 */
70 error = xfs_alloc_lookup_le(cur, 0, 70 error = xfs_alloc_lookup_ge(cur, 0,
71 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); 71 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
72 if (error) 72 if (error)
73 goto out_del_cursor; 73 goto out_del_cursor;
@@ -77,8 +77,10 @@ xfs_trim_extents(
77 * enough to be worth discarding. 77 * enough to be worth discarding.
78 */ 78 */
79 while (i) { 79 while (i) {
80 xfs_agblock_t fbno; 80 xfs_agblock_t fbno;
81 xfs_extlen_t flen; 81 xfs_extlen_t flen;
82 xfs_daddr_t dbno;
83 xfs_extlen_t dlen;
82 84
83 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); 85 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
84 if (error) 86 if (error)
@@ -87,9 +89,17 @@ xfs_trim_extents(
87 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); 89 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
88 90
89 /* 91 /*
92 * use daddr format for all range/len calculations as that is
93 * the format the range/len variables are supplied in by
94 * userspace.
95 */
96 dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
97 dlen = XFS_FSB_TO_BB(mp, flen);
98
99 /*
90 * Too small? Give up. 100 * Too small? Give up.
91 */ 101 */
92 if (flen < minlen) { 102 if (dlen < minlen) {
93 trace_xfs_discard_toosmall(mp, agno, fbno, flen); 103 trace_xfs_discard_toosmall(mp, agno, fbno, flen);
94 goto out_del_cursor; 104 goto out_del_cursor;
95 } 105 }
@@ -99,8 +109,7 @@ xfs_trim_extents(
99 * supposed to discard skip it. Do not bother to trim 109 * supposed to discard skip it. Do not bother to trim
100 * down partially overlapping ranges for now. 110 * down partially overlapping ranges for now.
101 */ 111 */
102 if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || 112 if (dbno + dlen < start || dbno > end) {
103 XFS_AGB_TO_FSB(mp, agno, fbno) > end) {
104 trace_xfs_discard_exclude(mp, agno, fbno, flen); 113 trace_xfs_discard_exclude(mp, agno, fbno, flen);
105 goto next_extent; 114 goto next_extent;
106 } 115 }
@@ -115,10 +124,7 @@ xfs_trim_extents(
115 } 124 }
116 125
117 trace_xfs_discard_extent(mp, agno, fbno, flen); 126 trace_xfs_discard_extent(mp, agno, fbno, flen);
118 error = -blkdev_issue_discard(bdev, 127 error = -blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS, 0);
119 XFS_AGB_TO_DADDR(mp, agno, fbno),
120 XFS_FSB_TO_BB(mp, flen),
121 GFP_NOFS, 0);
122 if (error) 128 if (error)
123 goto out_del_cursor; 129 goto out_del_cursor;
124 *blocks_trimmed += flen; 130 *blocks_trimmed += flen;
@@ -137,6 +143,15 @@ out_put_perag:
137 return error; 143 return error;
138} 144}
139 145
146/*
147 * trim a range of the filesystem.
148 *
149 * Note: the parameters passed from userspace are byte ranges into the
150 * filesystem which does not match to the format we use for filesystem block
151 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
152 * is a linear address range. Hence we need to use DADDR based conversions and
153 * comparisons for determining the correct offset and regions to trim.
154 */
140int 155int
141xfs_ioc_trim( 156xfs_ioc_trim(
142 struct xfs_mount *mp, 157 struct xfs_mount *mp,
@@ -145,7 +160,7 @@ xfs_ioc_trim(
145 struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; 160 struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
146 unsigned int granularity = q->limits.discard_granularity; 161 unsigned int granularity = q->limits.discard_granularity;
147 struct fstrim_range range; 162 struct fstrim_range range;
148 xfs_fsblock_t start, end, minlen; 163 xfs_daddr_t start, end, minlen;
149 xfs_agnumber_t start_agno, end_agno, agno; 164 xfs_agnumber_t start_agno, end_agno, agno;
150 __uint64_t blocks_trimmed = 0; 165 __uint64_t blocks_trimmed = 0;
151 int error, last_error = 0; 166 int error, last_error = 0;
@@ -159,22 +174,22 @@ xfs_ioc_trim(
159 174
160 /* 175 /*
161 * Truncating down the len isn't actually quite correct, but using 176 * Truncating down the len isn't actually quite correct, but using
162 * XFS_B_TO_FSB would mean we trivially get overflows for values 177 * BBTOB would mean we trivially get overflows for values
163 * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default 178 * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
164 * used by the fstrim application. In the end it really doesn't 179 * used by the fstrim application. In the end it really doesn't
165 * matter as trimming blocks is an advisory interface. 180 * matter as trimming blocks is an advisory interface.
166 */ 181 */
167 start = XFS_B_TO_FSBT(mp, range.start); 182 start = BTOBB(range.start);
168 end = start + XFS_B_TO_FSBT(mp, range.len) - 1; 183 end = start + BTOBBT(range.len) - 1;
169 minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); 184 minlen = BTOBB(max_t(u64, granularity, range.minlen));
170 185
171 if (start >= mp->m_sb.sb_dblocks) 186 if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks)
172 return -XFS_ERROR(EINVAL); 187 return -XFS_ERROR(EINVAL);
173 if (end > mp->m_sb.sb_dblocks - 1) 188 if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
174 end = mp->m_sb.sb_dblocks - 1; 189 end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1;
175 190
176 start_agno = XFS_FSB_TO_AGNO(mp, start); 191 start_agno = xfs_daddr_to_agno(mp, start);
177 end_agno = XFS_FSB_TO_AGNO(mp, end); 192 end_agno = xfs_daddr_to_agno(mp, end);
178 193
179 for (agno = start_agno; agno <= end_agno; agno++) { 194 for (agno = start_agno; agno <= end_agno; agno++) {
180 error = -xfs_trim_extents(mp, agno, start, end, minlen, 195 error = -xfs_trim_extents(mp, agno, start, end, minlen,