aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-06 14:46:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-06 14:46:16 -0400
commitd484467c860dab3e17893d23b2238e1f581460fa (patch)
treeb4729ec7c8dc03354a7ac23377b8d72c661b3fc6
parent044f1daaaaf7c86bc4fcf433848b7baae236946b (diff)
parent161f55efba5ddccc690139fae9373cafc3447a97 (diff)
Merge tag 'xfs-4.12-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "Here are the XFS changes for 4.12. The big new feature for this release is the new space mapping ioctl that we've been discussing since LSF2016, but other than that most of the patches are larger bug fixes, memory corruption prevention, and other cleanups. Summary: - various code cleanups - introduce GETFSMAP ioctl - various refactoring - avoid dio reads past eof - fix memory corruption and other errors with fragmented directory blocks - fix accidental userspace memory corruptions - publish fs uuid in superblock - make fstrim terminatable - fix race between quotaoff and in-core inode creation - avoid use-after-free when finishing up w/ buffer heads - reserve enough space to handle bmap tree resizing during cow remap" * tag 'xfs-4.12-merge-7' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (53 commits) xfs: fix use-after-free in xfs_finish_page_writeback xfs: reserve enough blocks to handle btree splits when remapping xfs: wait on new inodes during quotaoff dquot release xfs: update ag iterator to support wait on new inodes xfs: support ability to wait on new inodes xfs: publish UUID in struct super_block xfs: Allow user to kill fstrim process xfs: better log intent item refcount checking xfs: fix up quotacheck buffer list error handling xfs: remove xfs_trans_ail_delete_bulk xfs: don't use bool values in trace buffers xfs: fix getfsmap userspace memory corruption while setting OF_LAST xfs: fix __user annotations for xfs_ioc_getfsmap xfs: corruption needs to respect endianess too! xfs: use NULL instead of 0 to initialize a pointer in xfs_ioc_getfsmap xfs: use NULL instead of 0 to initialize a pointer in xfs_getfsmap xfs: simplify validation of the unwritten extent bit xfs: remove unused values from xfs_exntst_t xfs: remove the unused XFS_MAXLINK_1 define xfs: more do_div cleanups ...
-rw-r--r--fs/iomap.c3
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c57
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h12
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c172
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c354
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h14
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c43
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h22
-rw-r--r--fs/xfs/libxfs/xfs_btree.c15
-rw-r--r--fs/xfs/libxfs/xfs_btree.h2
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c7
-rw-r--r--fs/xfs/libxfs/xfs_format.h11
-rw-r--r--fs/xfs/libxfs/xfs_fs.h13
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c90
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c56
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h4
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c70
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h23
-rw-r--r--fs/xfs/xfs_aops.c12
-rw-r--r--fs/xfs/xfs_bmap_item.c6
-rw-r--r--fs/xfs/xfs_bmap_util.c20
-rw-r--r--fs/xfs/xfs_buf.c24
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c15
-rw-r--r--fs/xfs/xfs_discard.c10
-rw-r--r--fs/xfs/xfs_extfree_item.c1
-rw-r--r--fs/xfs/xfs_fsmap.c940
-rw-r--r--fs/xfs/xfs_fsmap.h53
-rw-r--r--fs/xfs/xfs_icache.c58
-rw-r--r--fs/xfs/xfs_icache.h8
-rw-r--r--fs/xfs/xfs_inode.c9
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c29
-rw-r--r--fs/xfs/xfs_ioctl.c89
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c8
-rw-r--r--fs/xfs/xfs_linux.h85
-rw-r--r--fs/xfs/xfs_log.c4
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_qm.c11
-rw-r--r--fs/xfs/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_refcount_item.c1
-rw-r--r--fs/xfs/xfs_reflink.c39
-rw-r--r--fs/xfs/xfs_rmap_item.c1
-rw-r--r--fs/xfs/xfs_rtalloc.h22
-rw-r--r--fs/xfs/xfs_super.c8
-rw-r--r--fs/xfs/xfs_trace.c1
-rw-r--r--fs/xfs/xfs_trace.h144
-rw-r--r--fs/xfs/xfs_trans.c39
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_trans_ail.c71
-rw-r--r--fs/xfs/xfs_trans_priv.h15
-rw-r--r--include/uapi/linux/fsmap.h112
56 files changed, 2162 insertions, 667 deletions
diff --git a/fs/iomap.c b/fs/iomap.c
index 4add7d4ad006..1faabe09b8fd 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -910,6 +910,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
910 break; 910 break;
911 } 911 }
912 pos += ret; 912 pos += ret;
913
914 if (iov_iter_rw(iter) == READ && pos >= dio->i_size)
915 break;
913 } while ((count = iov_iter_count(iter)) > 0); 916 } while ((count = iov_iter_count(iter)) > 0);
914 blk_finish_plug(&plug); 917 blk_finish_plug(&plug);
915 918
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 26ef1958b65b..5c90f82b8f6b 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -79,6 +79,7 @@ xfs-y += xfs_aops.o \
79 xfs_extent_busy.o \ 79 xfs_extent_busy.o \
80 xfs_file.o \ 80 xfs_file.o \
81 xfs_filestream.o \ 81 xfs_filestream.o \
82 xfs_fsmap.o \
82 xfs_fsops.o \ 83 xfs_fsops.o \
83 xfs_globals.o \ 84 xfs_globals.o \
84 xfs_icache.o \ 85 xfs_icache.o \
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 369adcc18c02..7486401ccbd3 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2868,3 +2868,60 @@ err:
2868 xfs_trans_brelse(tp, agbp); 2868 xfs_trans_brelse(tp, agbp);
2869 return error; 2869 return error;
2870} 2870}
2871
2872struct xfs_alloc_query_range_info {
2873 xfs_alloc_query_range_fn fn;
2874 void *priv;
2875};
2876
2877/* Format btree record and pass to our callback. */
2878STATIC int
2879xfs_alloc_query_range_helper(
2880 struct xfs_btree_cur *cur,
2881 union xfs_btree_rec *rec,
2882 void *priv)
2883{
2884 struct xfs_alloc_query_range_info *query = priv;
2885 struct xfs_alloc_rec_incore irec;
2886
2887 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
2888 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
2889 return query->fn(cur, &irec, query->priv);
2890}
2891
2892/* Find all free space within a given range of blocks. */
2893int
2894xfs_alloc_query_range(
2895 struct xfs_btree_cur *cur,
2896 struct xfs_alloc_rec_incore *low_rec,
2897 struct xfs_alloc_rec_incore *high_rec,
2898 xfs_alloc_query_range_fn fn,
2899 void *priv)
2900{
2901 union xfs_btree_irec low_brec;
2902 union xfs_btree_irec high_brec;
2903 struct xfs_alloc_query_range_info query;
2904
2905 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2906 low_brec.a = *low_rec;
2907 high_brec.a = *high_rec;
2908 query.priv = priv;
2909 query.fn = fn;
2910 return xfs_btree_query_range(cur, &low_brec, &high_brec,
2911 xfs_alloc_query_range_helper, &query);
2912}
2913
2914/* Find all free space records. */
2915int
2916xfs_alloc_query_all(
2917 struct xfs_btree_cur *cur,
2918 xfs_alloc_query_range_fn fn,
2919 void *priv)
2920{
2921 struct xfs_alloc_query_range_info query;
2922
2923 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
2924 query.priv = priv;
2925 query.fn = fn;
2926 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
2927}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 2a8d0fa6fbbe..77d9c27330ab 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -219,4 +219,16 @@ int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
219 219
220xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp); 220xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
221 221
222typedef int (*xfs_alloc_query_range_fn)(
223 struct xfs_btree_cur *cur,
224 struct xfs_alloc_rec_incore *rec,
225 void *priv);
226
227int xfs_alloc_query_range(struct xfs_btree_cur *cur,
228 struct xfs_alloc_rec_incore *low_rec,
229 struct xfs_alloc_rec_incore *high_rec,
230 xfs_alloc_query_range_fn fn, void *priv);
231int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
232 void *priv);
233
222#endif /* __XFS_ALLOC_H__ */ 234#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index efb467b10a71..e1fcfe7f0a9a 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -205,19 +205,37 @@ xfs_allocbt_init_key_from_rec(
205 union xfs_btree_key *key, 205 union xfs_btree_key *key,
206 union xfs_btree_rec *rec) 206 union xfs_btree_rec *rec)
207{ 207{
208 ASSERT(rec->alloc.ar_startblock != 0);
209
210 key->alloc.ar_startblock = rec->alloc.ar_startblock; 208 key->alloc.ar_startblock = rec->alloc.ar_startblock;
211 key->alloc.ar_blockcount = rec->alloc.ar_blockcount; 209 key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
212} 210}
213 211
214STATIC void 212STATIC void
213xfs_bnobt_init_high_key_from_rec(
214 union xfs_btree_key *key,
215 union xfs_btree_rec *rec)
216{
217 __u32 x;
218
219 x = be32_to_cpu(rec->alloc.ar_startblock);
220 x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
221 key->alloc.ar_startblock = cpu_to_be32(x);
222 key->alloc.ar_blockcount = 0;
223}
224
225STATIC void
226xfs_cntbt_init_high_key_from_rec(
227 union xfs_btree_key *key,
228 union xfs_btree_rec *rec)
229{
230 key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
231 key->alloc.ar_startblock = 0;
232}
233
234STATIC void
215xfs_allocbt_init_rec_from_cur( 235xfs_allocbt_init_rec_from_cur(
216 struct xfs_btree_cur *cur, 236 struct xfs_btree_cur *cur,
217 union xfs_btree_rec *rec) 237 union xfs_btree_rec *rec)
218{ 238{
219 ASSERT(cur->bc_rec.a.ar_startblock != 0);
220
221 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); 239 rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
222 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); 240 rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
223} 241}
@@ -236,18 +254,24 @@ xfs_allocbt_init_ptr_from_cur(
236} 254}
237 255
238STATIC __int64_t 256STATIC __int64_t
239xfs_allocbt_key_diff( 257xfs_bnobt_key_diff(
240 struct xfs_btree_cur *cur, 258 struct xfs_btree_cur *cur,
241 union xfs_btree_key *key) 259 union xfs_btree_key *key)
242{ 260{
243 xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; 261 xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
244 xfs_alloc_key_t *kp = &key->alloc; 262 xfs_alloc_key_t *kp = &key->alloc;
245 __int64_t diff;
246 263
247 if (cur->bc_btnum == XFS_BTNUM_BNO) { 264 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
248 return (__int64_t)be32_to_cpu(kp->ar_startblock) - 265}
249 rec->ar_startblock; 266
250 } 267STATIC __int64_t
268xfs_cntbt_key_diff(
269 struct xfs_btree_cur *cur,
270 union xfs_btree_key *key)
271{
272 xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
273 xfs_alloc_key_t *kp = &key->alloc;
274 __int64_t diff;
251 275
252 diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; 276 diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
253 if (diff) 277 if (diff)
@@ -256,6 +280,33 @@ xfs_allocbt_key_diff(
256 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; 280 return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
257} 281}
258 282
283STATIC __int64_t
284xfs_bnobt_diff_two_keys(
285 struct xfs_btree_cur *cur,
286 union xfs_btree_key *k1,
287 union xfs_btree_key *k2)
288{
289 return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
290 be32_to_cpu(k2->alloc.ar_startblock);
291}
292
293STATIC __int64_t
294xfs_cntbt_diff_two_keys(
295 struct xfs_btree_cur *cur,
296 union xfs_btree_key *k1,
297 union xfs_btree_key *k2)
298{
299 __int64_t diff;
300
301 diff = be32_to_cpu(k1->alloc.ar_blockcount) -
302 be32_to_cpu(k2->alloc.ar_blockcount);
303 if (diff)
304 return diff;
305
306 return be32_to_cpu(k1->alloc.ar_startblock) -
307 be32_to_cpu(k2->alloc.ar_startblock);
308}
309
259static bool 310static bool
260xfs_allocbt_verify( 311xfs_allocbt_verify(
261 struct xfs_buf *bp) 312 struct xfs_buf *bp)
@@ -346,44 +397,54 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
346 397
347#if defined(DEBUG) || defined(XFS_WARN) 398#if defined(DEBUG) || defined(XFS_WARN)
348STATIC int 399STATIC int
349xfs_allocbt_keys_inorder( 400xfs_bnobt_keys_inorder(
350 struct xfs_btree_cur *cur, 401 struct xfs_btree_cur *cur,
351 union xfs_btree_key *k1, 402 union xfs_btree_key *k1,
352 union xfs_btree_key *k2) 403 union xfs_btree_key *k2)
353{ 404{
354 if (cur->bc_btnum == XFS_BTNUM_BNO) { 405 return be32_to_cpu(k1->alloc.ar_startblock) <
355 return be32_to_cpu(k1->alloc.ar_startblock) < 406 be32_to_cpu(k2->alloc.ar_startblock);
356 be32_to_cpu(k2->alloc.ar_startblock);
357 } else {
358 return be32_to_cpu(k1->alloc.ar_blockcount) <
359 be32_to_cpu(k2->alloc.ar_blockcount) ||
360 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
361 be32_to_cpu(k1->alloc.ar_startblock) <
362 be32_to_cpu(k2->alloc.ar_startblock));
363 }
364} 407}
365 408
366STATIC int 409STATIC int
367xfs_allocbt_recs_inorder( 410xfs_bnobt_recs_inorder(
368 struct xfs_btree_cur *cur, 411 struct xfs_btree_cur *cur,
369 union xfs_btree_rec *r1, 412 union xfs_btree_rec *r1,
370 union xfs_btree_rec *r2) 413 union xfs_btree_rec *r2)
371{ 414{
372 if (cur->bc_btnum == XFS_BTNUM_BNO) { 415 return be32_to_cpu(r1->alloc.ar_startblock) +
373 return be32_to_cpu(r1->alloc.ar_startblock) + 416 be32_to_cpu(r1->alloc.ar_blockcount) <=
374 be32_to_cpu(r1->alloc.ar_blockcount) <= 417 be32_to_cpu(r2->alloc.ar_startblock);
375 be32_to_cpu(r2->alloc.ar_startblock); 418}
376 } else { 419
377 return be32_to_cpu(r1->alloc.ar_blockcount) < 420STATIC int
378 be32_to_cpu(r2->alloc.ar_blockcount) || 421xfs_cntbt_keys_inorder(
379 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && 422 struct xfs_btree_cur *cur,
380 be32_to_cpu(r1->alloc.ar_startblock) < 423 union xfs_btree_key *k1,
381 be32_to_cpu(r2->alloc.ar_startblock)); 424 union xfs_btree_key *k2)
382 } 425{
426 return be32_to_cpu(k1->alloc.ar_blockcount) <
427 be32_to_cpu(k2->alloc.ar_blockcount) ||
428 (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
429 be32_to_cpu(k1->alloc.ar_startblock) <
430 be32_to_cpu(k2->alloc.ar_startblock));
383} 431}
384#endif /* DEBUG */
385 432
386static const struct xfs_btree_ops xfs_allocbt_ops = { 433STATIC int
434xfs_cntbt_recs_inorder(
435 struct xfs_btree_cur *cur,
436 union xfs_btree_rec *r1,
437 union xfs_btree_rec *r2)
438{
439 return be32_to_cpu(r1->alloc.ar_blockcount) <
440 be32_to_cpu(r2->alloc.ar_blockcount) ||
441 (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
442 be32_to_cpu(r1->alloc.ar_startblock) <
443 be32_to_cpu(r2->alloc.ar_startblock));
444}
445#endif /* DEBUG */
446
447static const struct xfs_btree_ops xfs_bnobt_ops = {
387 .rec_len = sizeof(xfs_alloc_rec_t), 448 .rec_len = sizeof(xfs_alloc_rec_t),
388 .key_len = sizeof(xfs_alloc_key_t), 449 .key_len = sizeof(xfs_alloc_key_t),
389 450
@@ -395,13 +456,39 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
395 .get_minrecs = xfs_allocbt_get_minrecs, 456 .get_minrecs = xfs_allocbt_get_minrecs,
396 .get_maxrecs = xfs_allocbt_get_maxrecs, 457 .get_maxrecs = xfs_allocbt_get_maxrecs,
397 .init_key_from_rec = xfs_allocbt_init_key_from_rec, 458 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
459 .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
398 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 460 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
399 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 461 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
400 .key_diff = xfs_allocbt_key_diff, 462 .key_diff = xfs_bnobt_key_diff,
401 .buf_ops = &xfs_allocbt_buf_ops, 463 .buf_ops = &xfs_allocbt_buf_ops,
464 .diff_two_keys = xfs_bnobt_diff_two_keys,
402#if defined(DEBUG) || defined(XFS_WARN) 465#if defined(DEBUG) || defined(XFS_WARN)
403 .keys_inorder = xfs_allocbt_keys_inorder, 466 .keys_inorder = xfs_bnobt_keys_inorder,
404 .recs_inorder = xfs_allocbt_recs_inorder, 467 .recs_inorder = xfs_bnobt_recs_inorder,
468#endif
469};
470
471static const struct xfs_btree_ops xfs_cntbt_ops = {
472 .rec_len = sizeof(xfs_alloc_rec_t),
473 .key_len = sizeof(xfs_alloc_key_t),
474
475 .dup_cursor = xfs_allocbt_dup_cursor,
476 .set_root = xfs_allocbt_set_root,
477 .alloc_block = xfs_allocbt_alloc_block,
478 .free_block = xfs_allocbt_free_block,
479 .update_lastrec = xfs_allocbt_update_lastrec,
480 .get_minrecs = xfs_allocbt_get_minrecs,
481 .get_maxrecs = xfs_allocbt_get_maxrecs,
482 .init_key_from_rec = xfs_allocbt_init_key_from_rec,
483 .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec,
484 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
485 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
486 .key_diff = xfs_cntbt_key_diff,
487 .buf_ops = &xfs_allocbt_buf_ops,
488 .diff_two_keys = xfs_cntbt_diff_two_keys,
489#if defined(DEBUG) || defined(XFS_WARN)
490 .keys_inorder = xfs_cntbt_keys_inorder,
491 .recs_inorder = xfs_cntbt_recs_inorder,
405#endif 492#endif
406}; 493};
407 494
@@ -427,16 +514,15 @@ xfs_allocbt_init_cursor(
427 cur->bc_mp = mp; 514 cur->bc_mp = mp;
428 cur->bc_btnum = btnum; 515 cur->bc_btnum = btnum;
429 cur->bc_blocklog = mp->m_sb.sb_blocklog; 516 cur->bc_blocklog = mp->m_sb.sb_blocklog;
430 cur->bc_ops = &xfs_allocbt_ops;
431 if (btnum == XFS_BTNUM_BNO)
432 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
433 else
434 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
435 517
436 if (btnum == XFS_BTNUM_CNT) { 518 if (btnum == XFS_BTNUM_CNT) {
519 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
520 cur->bc_ops = &xfs_cntbt_ops;
437 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); 521 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
438 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; 522 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
439 } else { 523 } else {
524 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
525 cur->bc_ops = &xfs_bnobt_ops;
440 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); 526 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
441 } 527 }
442 528
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 9bd104f32908..f02eb7673392 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -764,7 +764,6 @@ xfs_bmap_extents_to_btree(
764 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 764 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
765 } else if (dfops->dop_low) { 765 } else if (dfops->dop_low) {
766 args.type = XFS_ALLOCTYPE_START_BNO; 766 args.type = XFS_ALLOCTYPE_START_BNO;
767try_another_ag:
768 args.fsbno = *firstblock; 767 args.fsbno = *firstblock;
769 } else { 768 } else {
770 args.type = XFS_ALLOCTYPE_NEAR_BNO; 769 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -779,20 +778,6 @@ try_another_ag:
779 return error; 778 return error;
780 } 779 }
781 780
782 /*
783 * During a CoW operation, the allocation and bmbt updates occur in
784 * different transactions. The mapping code tries to put new bmbt
785 * blocks near extents being mapped, but the only way to guarantee this
786 * is if the alloc and the mapping happen in a single transaction that
787 * has a block reservation. That isn't the case here, so if we run out
788 * of space we'll try again with another AG.
789 */
790 if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
791 args.fsbno == NULLFSBLOCK &&
792 args.type == XFS_ALLOCTYPE_NEAR_BNO) {
793 args.type = XFS_ALLOCTYPE_FIRST_AG;
794 goto try_another_ag;
795 }
796 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 781 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
797 xfs_iroot_realloc(ip, -1, whichfork); 782 xfs_iroot_realloc(ip, -1, whichfork);
798 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 783 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -925,7 +910,6 @@ xfs_bmap_local_to_extents(
925 * file currently fits in an inode. 910 * file currently fits in an inode.
926 */ 911 */
927 if (*firstblock == NULLFSBLOCK) { 912 if (*firstblock == NULLFSBLOCK) {
928try_another_ag:
929 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); 913 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
930 args.type = XFS_ALLOCTYPE_START_BNO; 914 args.type = XFS_ALLOCTYPE_START_BNO;
931 } else { 915 } else {
@@ -938,19 +922,6 @@ try_another_ag:
938 if (error) 922 if (error)
939 goto done; 923 goto done;
940 924
941 /*
942 * During a CoW operation, the allocation and bmbt updates occur in
943 * different transactions. The mapping code tries to put new bmbt
944 * blocks near extents being mapped, but the only way to guarantee this
945 * is if the alloc and the mapping happen in a single transaction that
946 * has a block reservation. That isn't the case here, so if we run out
947 * of space we'll try again with another AG.
948 */
949 if (xfs_sb_version_hasreflink(&ip->i_mount->m_sb) &&
950 args.fsbno == NULLFSBLOCK &&
951 args.type == XFS_ALLOCTYPE_NEAR_BNO) {
952 goto try_another_ag;
953 }
954 /* Can't fail, the space was reserved. */ 925 /* Can't fail, the space was reserved. */
955 ASSERT(args.fsbno != NULLFSBLOCK); 926 ASSERT(args.fsbno != NULLFSBLOCK);
956 ASSERT(args.len == 1); 927 ASSERT(args.len == 1);
@@ -1260,7 +1231,6 @@ xfs_bmap_read_extents(
1260 xfs_fsblock_t bno; /* block # of "block" */ 1231 xfs_fsblock_t bno; /* block # of "block" */
1261 xfs_buf_t *bp; /* buffer for "block" */ 1232 xfs_buf_t *bp; /* buffer for "block" */
1262 int error; /* error return value */ 1233 int error; /* error return value */
1263 xfs_exntfmt_t exntf; /* XFS_EXTFMT_NOSTATE, if checking */
1264 xfs_extnum_t i, j; /* index into the extents list */ 1234 xfs_extnum_t i, j; /* index into the extents list */
1265 xfs_ifork_t *ifp; /* fork structure */ 1235 xfs_ifork_t *ifp; /* fork structure */
1266 int level; /* btree level, for checking */ 1236 int level; /* btree level, for checking */
@@ -1271,8 +1241,6 @@ xfs_bmap_read_extents(
1271 1241
1272 mp = ip->i_mount; 1242 mp = ip->i_mount;
1273 ifp = XFS_IFORK_PTR(ip, whichfork); 1243 ifp = XFS_IFORK_PTR(ip, whichfork);
1274 exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1275 XFS_EXTFMT_INODE(ip);
1276 block = ifp->if_broot; 1244 block = ifp->if_broot;
1277 /* 1245 /*
1278 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 1246 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
@@ -1340,18 +1308,9 @@ xfs_bmap_read_extents(
1340 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); 1308 xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1341 trp->l0 = be64_to_cpu(frp->l0); 1309 trp->l0 = be64_to_cpu(frp->l0);
1342 trp->l1 = be64_to_cpu(frp->l1); 1310 trp->l1 = be64_to_cpu(frp->l1);
1343 } 1311 if (!xfs_bmbt_validate_extent(mp, whichfork, trp)) {
1344 if (exntf == XFS_EXTFMT_NOSTATE) {
1345 /*
1346 * Check all attribute bmap btree records and
1347 * any "older" data bmap btree records for a
1348 * set bit in the "extent flag" position.
1349 */
1350 if (unlikely(xfs_check_nostate_extents(ifp,
1351 start, num_recs))) {
1352 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", 1312 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1353 XFS_ERRLEVEL_LOW, 1313 XFS_ERRLEVEL_LOW, mp);
1354 ip->i_mount);
1355 goto error0; 1314 goto error0;
1356 } 1315 }
1357 } 1316 }
@@ -2879,27 +2838,30 @@ xfs_bmap_add_extent_hole_delay(
2879 */ 2838 */
2880STATIC int /* error */ 2839STATIC int /* error */
2881xfs_bmap_add_extent_hole_real( 2840xfs_bmap_add_extent_hole_real(
2882 struct xfs_bmalloca *bma, 2841 struct xfs_trans *tp,
2883 int whichfork) 2842 struct xfs_inode *ip,
2843 int whichfork,
2844 xfs_extnum_t *idx,
2845 struct xfs_btree_cur **curp,
2846 struct xfs_bmbt_irec *new,
2847 xfs_fsblock_t *first,
2848 struct xfs_defer_ops *dfops,
2849 int *logflagsp)
2884{ 2850{
2885 struct xfs_bmbt_irec *new = &bma->got; 2851 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
2852 struct xfs_mount *mp = ip->i_mount;
2853 struct xfs_btree_cur *cur = *curp;
2886 int error; /* error return value */ 2854 int error; /* error return value */
2887 int i; /* temp state */ 2855 int i; /* temp state */
2888 xfs_ifork_t *ifp; /* inode fork pointer */
2889 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2856 xfs_bmbt_irec_t left; /* left neighbor extent entry */
2890 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2857 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2891 int rval=0; /* return value (logging flags) */ 2858 int rval=0; /* return value (logging flags) */
2892 int state; /* state bits, accessed thru macros */ 2859 int state; /* state bits, accessed thru macros */
2893 struct xfs_mount *mp;
2894 2860
2895 mp = bma->ip->i_mount; 2861 ASSERT(*idx >= 0);
2896 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 2862 ASSERT(*idx <= xfs_iext_count(ifp));
2897
2898 ASSERT(bma->idx >= 0);
2899 ASSERT(bma->idx <= xfs_iext_count(ifp));
2900 ASSERT(!isnullstartblock(new->br_startblock)); 2863 ASSERT(!isnullstartblock(new->br_startblock));
2901 ASSERT(!bma->cur || 2864 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2902 !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2903 2865
2904 XFS_STATS_INC(mp, xs_add_exlist); 2866 XFS_STATS_INC(mp, xs_add_exlist);
2905 2867
@@ -2912,9 +2874,9 @@ xfs_bmap_add_extent_hole_real(
2912 /* 2874 /*
2913 * Check and set flags if this segment has a left neighbor. 2875 * Check and set flags if this segment has a left neighbor.
2914 */ 2876 */
2915 if (bma->idx > 0) { 2877 if (*idx > 0) {
2916 state |= BMAP_LEFT_VALID; 2878 state |= BMAP_LEFT_VALID;
2917 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left); 2879 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2918 if (isnullstartblock(left.br_startblock)) 2880 if (isnullstartblock(left.br_startblock))
2919 state |= BMAP_LEFT_DELAY; 2881 state |= BMAP_LEFT_DELAY;
2920 } 2882 }
@@ -2923,9 +2885,9 @@ xfs_bmap_add_extent_hole_real(
2923 * Check and set flags if this segment has a current value. 2885 * Check and set flags if this segment has a current value.
2924 * Not true if we're inserting into the "hole" at eof. 2886 * Not true if we're inserting into the "hole" at eof.
2925 */ 2887 */
2926 if (bma->idx < xfs_iext_count(ifp)) { 2888 if (*idx < xfs_iext_count(ifp)) {
2927 state |= BMAP_RIGHT_VALID; 2889 state |= BMAP_RIGHT_VALID;
2928 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right); 2890 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2929 if (isnullstartblock(right.br_startblock)) 2891 if (isnullstartblock(right.br_startblock))
2930 state |= BMAP_RIGHT_DELAY; 2892 state |= BMAP_RIGHT_DELAY;
2931 } 2893 }
@@ -2962,36 +2924,36 @@ xfs_bmap_add_extent_hole_real(
2962 * left and on the right. 2924 * left and on the right.
2963 * Merge all three into a single extent record. 2925 * Merge all three into a single extent record.
2964 */ 2926 */
2965 --bma->idx; 2927 --*idx;
2966 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 2928 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2967 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), 2929 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2968 left.br_blockcount + new->br_blockcount + 2930 left.br_blockcount + new->br_blockcount +
2969 right.br_blockcount); 2931 right.br_blockcount);
2970 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 2932 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2971 2933
2972 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state); 2934 xfs_iext_remove(ip, *idx + 1, 1, state);
2973 2935
2974 XFS_IFORK_NEXT_SET(bma->ip, whichfork, 2936 XFS_IFORK_NEXT_SET(ip, whichfork,
2975 XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1); 2937 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2976 if (bma->cur == NULL) { 2938 if (cur == NULL) {
2977 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2939 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
2978 } else { 2940 } else {
2979 rval = XFS_ILOG_CORE; 2941 rval = XFS_ILOG_CORE;
2980 error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff, 2942 error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
2981 right.br_startblock, right.br_blockcount, 2943 right.br_startblock, right.br_blockcount,
2982 &i); 2944 &i);
2983 if (error) 2945 if (error)
2984 goto done; 2946 goto done;
2985 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2947 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2986 error = xfs_btree_delete(bma->cur, &i); 2948 error = xfs_btree_delete(cur, &i);
2987 if (error) 2949 if (error)
2988 goto done; 2950 goto done;
2989 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2951 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2990 error = xfs_btree_decrement(bma->cur, 0, &i); 2952 error = xfs_btree_decrement(cur, 0, &i);
2991 if (error) 2953 if (error)
2992 goto done; 2954 goto done;
2993 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2955 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2994 error = xfs_bmbt_update(bma->cur, left.br_startoff, 2956 error = xfs_bmbt_update(cur, left.br_startoff,
2995 left.br_startblock, 2957 left.br_startblock,
2996 left.br_blockcount + 2958 left.br_blockcount +
2997 new->br_blockcount + 2959 new->br_blockcount +
@@ -3008,23 +2970,23 @@ xfs_bmap_add_extent_hole_real(
3008 * on the left. 2970 * on the left.
3009 * Merge the new allocation with the left neighbor. 2971 * Merge the new allocation with the left neighbor.
3010 */ 2972 */
3011 --bma->idx; 2973 --*idx;
3012 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 2974 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
3013 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx), 2975 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
3014 left.br_blockcount + new->br_blockcount); 2976 left.br_blockcount + new->br_blockcount);
3015 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 2977 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3016 2978
3017 if (bma->cur == NULL) { 2979 if (cur == NULL) {
3018 rval = xfs_ilog_fext(whichfork); 2980 rval = xfs_ilog_fext(whichfork);
3019 } else { 2981 } else {
3020 rval = 0; 2982 rval = 0;
3021 error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff, 2983 error = xfs_bmbt_lookup_eq(cur, left.br_startoff,
3022 left.br_startblock, left.br_blockcount, 2984 left.br_startblock, left.br_blockcount,
3023 &i); 2985 &i);
3024 if (error) 2986 if (error)
3025 goto done; 2987 goto done;
3026 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2988 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3027 error = xfs_bmbt_update(bma->cur, left.br_startoff, 2989 error = xfs_bmbt_update(cur, left.br_startoff,
3028 left.br_startblock, 2990 left.br_startblock,
3029 left.br_blockcount + 2991 left.br_blockcount +
3030 new->br_blockcount, 2992 new->br_blockcount,
@@ -3040,25 +3002,25 @@ xfs_bmap_add_extent_hole_real(
3040 * on the right. 3002 * on the right.
3041 * Merge the new allocation with the right neighbor. 3003 * Merge the new allocation with the right neighbor.
3042 */ 3004 */
3043 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 3005 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
3044 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx), 3006 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
3045 new->br_startoff, new->br_startblock, 3007 new->br_startoff, new->br_startblock,
3046 new->br_blockcount + right.br_blockcount, 3008 new->br_blockcount + right.br_blockcount,
3047 right.br_state); 3009 right.br_state);
3048 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 3010 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
3049 3011
3050 if (bma->cur == NULL) { 3012 if (cur == NULL) {
3051 rval = xfs_ilog_fext(whichfork); 3013 rval = xfs_ilog_fext(whichfork);
3052 } else { 3014 } else {
3053 rval = 0; 3015 rval = 0;
3054 error = xfs_bmbt_lookup_eq(bma->cur, 3016 error = xfs_bmbt_lookup_eq(cur,
3055 right.br_startoff, 3017 right.br_startoff,
3056 right.br_startblock, 3018 right.br_startblock,
3057 right.br_blockcount, &i); 3019 right.br_blockcount, &i);
3058 if (error) 3020 if (error)
3059 goto done; 3021 goto done;
3060 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 3022 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3061 error = xfs_bmbt_update(bma->cur, new->br_startoff, 3023 error = xfs_bmbt_update(cur, new->br_startoff,
3062 new->br_startblock, 3024 new->br_startblock,
3063 new->br_blockcount + 3025 new->br_blockcount +
3064 right.br_blockcount, 3026 right.br_blockcount,
@@ -3074,22 +3036,22 @@ xfs_bmap_add_extent_hole_real(
3074 * real allocation. 3036 * real allocation.
3075 * Insert a new entry. 3037 * Insert a new entry.
3076 */ 3038 */
3077 xfs_iext_insert(bma->ip, bma->idx, 1, new, state); 3039 xfs_iext_insert(ip, *idx, 1, new, state);
3078 XFS_IFORK_NEXT_SET(bma->ip, whichfork, 3040 XFS_IFORK_NEXT_SET(ip, whichfork,
3079 XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1); 3041 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
3080 if (bma->cur == NULL) { 3042 if (cur == NULL) {
3081 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 3043 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3082 } else { 3044 } else {
3083 rval = XFS_ILOG_CORE; 3045 rval = XFS_ILOG_CORE;
3084 error = xfs_bmbt_lookup_eq(bma->cur, 3046 error = xfs_bmbt_lookup_eq(cur,
3085 new->br_startoff, 3047 new->br_startoff,
3086 new->br_startblock, 3048 new->br_startblock,
3087 new->br_blockcount, &i); 3049 new->br_blockcount, &i);
3088 if (error) 3050 if (error)
3089 goto done; 3051 goto done;
3090 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 3052 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3091 bma->cur->bc_rec.b.br_state = new->br_state; 3053 cur->bc_rec.b.br_state = new->br_state;
3092 error = xfs_btree_insert(bma->cur, &i); 3054 error = xfs_btree_insert(cur, &i);
3093 if (error) 3055 if (error)
3094 goto done; 3056 goto done;
3095 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 3057 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
@@ -3098,30 +3060,30 @@ xfs_bmap_add_extent_hole_real(
3098 } 3060 }
3099 3061
3100 /* add reverse mapping */ 3062 /* add reverse mapping */
3101 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new); 3063 error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new);
3102 if (error) 3064 if (error)
3103 goto done; 3065 goto done;
3104 3066
3105 /* convert to a btree if necessary */ 3067 /* convert to a btree if necessary */
3106 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 3068 if (xfs_bmap_needs_btree(ip, whichfork)) {
3107 int tmp_logflags; /* partial log flag return val */ 3069 int tmp_logflags; /* partial log flag return val */
3108 3070
3109 ASSERT(bma->cur == NULL); 3071 ASSERT(cur == NULL);
3110 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 3072 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp,
3111 bma->firstblock, bma->dfops, &bma->cur,
3112 0, &tmp_logflags, whichfork); 3073 0, &tmp_logflags, whichfork);
3113 bma->logflags |= tmp_logflags; 3074 *logflagsp |= tmp_logflags;
3075 cur = *curp;
3114 if (error) 3076 if (error)
3115 goto done; 3077 goto done;
3116 } 3078 }
3117 3079
3118 /* clear out the allocated field, done with it now in any case. */ 3080 /* clear out the allocated field, done with it now in any case. */
3119 if (bma->cur) 3081 if (cur)
3120 bma->cur->bc_private.b.allocated = 0; 3082 cur->bc_private.b.allocated = 0;
3121 3083
3122 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); 3084 xfs_bmap_check_leaf_extents(cur, ip, whichfork);
3123done: 3085done:
3124 bma->logflags |= rval; 3086 *logflagsp |= rval;
3125 return error; 3087 return error;
3126} 3088}
3127 3089
@@ -3853,60 +3815,6 @@ xfs_bmap_btalloc(
3853} 3815}
3854 3816
3855/* 3817/*
3856 * For a remap operation, just "allocate" an extent at the address that the
3857 * caller passed in, and ensure that the AGFL is the right size. The caller
3858 * will then map the "allocated" extent into the file somewhere.
3859 */
3860STATIC int
3861xfs_bmap_remap_alloc(
3862 struct xfs_bmalloca *ap)
3863{
3864 struct xfs_trans *tp = ap->tp;
3865 struct xfs_mount *mp = tp->t_mountp;
3866 xfs_agblock_t bno;
3867 struct xfs_alloc_arg args;
3868 int error;
3869
3870 /*
3871 * validate that the block number is legal - the enables us to detect
3872 * and handle a silent filesystem corruption rather than crashing.
3873 */
3874 memset(&args, 0, sizeof(struct xfs_alloc_arg));
3875 args.tp = ap->tp;
3876 args.mp = ap->tp->t_mountp;
3877 bno = *ap->firstblock;
3878 args.agno = XFS_FSB_TO_AGNO(mp, bno);
3879 args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
3880 if (args.agno >= mp->m_sb.sb_agcount ||
3881 args.agbno >= mp->m_sb.sb_agblocks)
3882 return -EFSCORRUPTED;
3883
3884 /* "Allocate" the extent from the range we passed in. */
3885 trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
3886 ap->blkno = bno;
3887 ap->ip->i_d.di_nblocks += ap->length;
3888 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3889
3890 /* Fix the freelist, like a real allocator does. */
3891 args.datatype = ap->datatype;
3892 args.pag = xfs_perag_get(args.mp, args.agno);
3893 ASSERT(args.pag);
3894
3895 /*
3896 * The freelist fixing code will decline the allocation if
3897 * the size and shape of the free space doesn't allow for
3898 * allocating the extent and updating all the metadata that
3899 * happens during an allocation. We're remapping, not
3900 * allocating, so skip that check by pretending to be freeing.
3901 */
3902 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3903 xfs_perag_put(args.pag);
3904 if (error)
3905 trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
3906 return error;
3907}
3908
3909/*
3910 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 3818 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3911 * It figures out where to ask the underlying allocator to put the new extent. 3819 * It figures out where to ask the underlying allocator to put the new extent.
3912 */ 3820 */
@@ -3914,8 +3822,6 @@ STATIC int
3914xfs_bmap_alloc( 3822xfs_bmap_alloc(
3915 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3823 struct xfs_bmalloca *ap) /* bmap alloc argument struct */
3916{ 3824{
3917 if (ap->flags & XFS_BMAPI_REMAP)
3918 return xfs_bmap_remap_alloc(ap);
3919 if (XFS_IS_REALTIME_INODE(ap->ip) && 3825 if (XFS_IS_REALTIME_INODE(ap->ip) &&
3920 xfs_alloc_is_userdata(ap->datatype)) 3826 xfs_alloc_is_userdata(ap->datatype))
3921 return xfs_bmap_rtalloc(ap); 3827 return xfs_bmap_rtalloc(ap);
@@ -4386,7 +4292,9 @@ xfs_bmapi_allocate(
4386 if (bma->wasdel) 4292 if (bma->wasdel)
4387 error = xfs_bmap_add_extent_delay_real(bma, whichfork); 4293 error = xfs_bmap_add_extent_delay_real(bma, whichfork);
4388 else 4294 else
4389 error = xfs_bmap_add_extent_hole_real(bma, whichfork); 4295 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
4296 whichfork, &bma->idx, &bma->cur, &bma->got,
4297 bma->firstblock, bma->dfops, &bma->logflags);
4390 4298
4391 bma->logflags |= tmp_logflags; 4299 bma->logflags |= tmp_logflags;
4392 if (error) 4300 if (error)
@@ -4549,9 +4457,7 @@ xfs_bmapi_write(
4549 ASSERT(len > 0); 4457 ASSERT(len > 0);
4550 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); 4458 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4551 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4459 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4552 ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); 4460 ASSERT(!(flags & XFS_BMAPI_REMAP));
4553 ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
4554 ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
4555 4461
4556 /* zeroing is for currently only for data extents, not metadata */ 4462 /* zeroing is for currently only for data extents, not metadata */
4557 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != 4463 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -4635,13 +4541,8 @@ xfs_bmapi_write(
4635 } else { 4541 } else {
4636 need_alloc = true; 4542 need_alloc = true;
4637 } 4543 }
4638 } else { 4544 } else if (isnullstartblock(bma.got.br_startblock)) {
4639 /* 4545 wasdelay = true;
4640 * Make sure we only reflink into a hole.
4641 */
4642 ASSERT(!(flags & XFS_BMAPI_REMAP));
4643 if (isnullstartblock(bma.got.br_startblock))
4644 wasdelay = true;
4645 } 4546 }
4646 4547
4647 /* 4548 /*
@@ -4770,6 +4671,93 @@ error0:
4770 return error; 4671 return error;
4771} 4672}
4772 4673
4674static int
4675xfs_bmapi_remap(
4676 struct xfs_trans *tp,
4677 struct xfs_inode *ip,
4678 xfs_fileoff_t bno,
4679 xfs_filblks_t len,
4680 xfs_fsblock_t startblock,
4681 struct xfs_defer_ops *dfops)
4682{
4683 struct xfs_mount *mp = ip->i_mount;
4684 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4685 struct xfs_btree_cur *cur = NULL;
4686 xfs_fsblock_t firstblock = NULLFSBLOCK;
4687 struct xfs_bmbt_irec got;
4688 xfs_extnum_t idx;
4689 int logflags = 0, error;
4690
4691 ASSERT(len > 0);
4692 ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
4693 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4694
4695 if (unlikely(XFS_TEST_ERROR(
4696 (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4697 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4698 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4699 XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
4700 return -EFSCORRUPTED;
4701 }
4702
4703 if (XFS_FORCED_SHUTDOWN(mp))
4704 return -EIO;
4705
4706 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4707 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4708 if (error)
4709 return error;
4710 }
4711
4712 if (xfs_iext_lookup_extent(ip, ifp, bno, &idx, &got)) {
4713 /* make sure we only reflink into a hole. */
4714 ASSERT(got.br_startoff > bno);
4715 ASSERT(got.br_startoff - bno >= len);
4716 }
4717
4718 ip->i_d.di_nblocks += len;
4719 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4720
4721 if (ifp->if_flags & XFS_IFBROOT) {
4722 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
4723 cur->bc_private.b.firstblock = firstblock;
4724 cur->bc_private.b.dfops = dfops;
4725 cur->bc_private.b.flags = 0;
4726 }
4727
4728 got.br_startoff = bno;
4729 got.br_startblock = startblock;
4730 got.br_blockcount = len;
4731 got.br_state = XFS_EXT_NORM;
4732
4733 error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &idx, &cur,
4734 &got, &firstblock, dfops, &logflags);
4735 if (error)
4736 goto error0;
4737
4738 if (xfs_bmap_wants_extents(ip, XFS_DATA_FORK)) {
4739 int tmp_logflags = 0;
4740
4741 error = xfs_bmap_btree_to_extents(tp, ip, cur,
4742 &tmp_logflags, XFS_DATA_FORK);
4743 logflags |= tmp_logflags;
4744 }
4745
4746error0:
4747 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS)
4748 logflags &= ~XFS_ILOG_DEXT;
4749 else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
4750 logflags &= ~XFS_ILOG_DBROOT;
4751
4752 if (logflags)
4753 xfs_trans_log_inode(tp, ip, logflags);
4754 if (cur) {
4755 xfs_btree_del_cursor(cur,
4756 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4757 }
4758 return error;
4759}
4760
4773/* 4761/*
4774 * When a delalloc extent is split (e.g., due to a hole punch), the original 4762 * When a delalloc extent is split (e.g., due to a hole punch), the original
4775 * indlen reservation must be shared across the two new extents that are left 4763 * indlen reservation must be shared across the two new extents that are left
@@ -4887,7 +4875,7 @@ xfs_bmap_del_extent_delay(
4887 ASSERT(got_endoff >= del_endoff); 4875 ASSERT(got_endoff >= del_endoff);
4888 4876
4889 if (isrt) { 4877 if (isrt) {
4890 int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); 4878 uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
4891 4879
4892 do_div(rtexts, mp->m_sb.sb_rextsize); 4880 do_div(rtexts, mp->m_sb.sb_rextsize);
4893 xfs_mod_frextents(mp, rtexts); 4881 xfs_mod_frextents(mp, rtexts);
@@ -6488,27 +6476,15 @@ xfs_bmap_finish_one(
6488 xfs_filblks_t blockcount, 6476 xfs_filblks_t blockcount,
6489 xfs_exntst_t state) 6477 xfs_exntst_t state)
6490{ 6478{
6491 struct xfs_bmbt_irec bmap; 6479 int error = 0, done;
6492 int nimaps = 1;
6493 xfs_fsblock_t firstfsb;
6494 int flags = XFS_BMAPI_REMAP;
6495 int done;
6496 int error = 0;
6497
6498 bmap.br_startblock = startblock;
6499 bmap.br_startoff = startoff;
6500 bmap.br_blockcount = blockcount;
6501 bmap.br_state = state;
6502 6480
6503 trace_xfs_bmap_deferred(tp->t_mountp, 6481 trace_xfs_bmap_deferred(tp->t_mountp,
6504 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, 6482 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6505 XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 6483 XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6506 ip->i_ino, whichfork, startoff, blockcount, state); 6484 ip->i_ino, whichfork, startoff, blockcount, state);
6507 6485
6508 if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) 6486 if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
6509 return -EFSCORRUPTED; 6487 return -EFSCORRUPTED;
6510 if (whichfork == XFS_ATTR_FORK)
6511 flags |= XFS_BMAPI_ATTRFORK;
6512 6488
6513 if (XFS_TEST_ERROR(false, tp->t_mountp, 6489 if (XFS_TEST_ERROR(false, tp->t_mountp,
6514 XFS_ERRTAG_BMAP_FINISH_ONE, 6490 XFS_ERRTAG_BMAP_FINISH_ONE,
@@ -6517,16 +6493,12 @@ xfs_bmap_finish_one(
6517 6493
6518 switch (type) { 6494 switch (type) {
6519 case XFS_BMAP_MAP: 6495 case XFS_BMAP_MAP:
6520 firstfsb = bmap.br_startblock; 6496 error = xfs_bmapi_remap(tp, ip, startoff, blockcount,
6521 error = xfs_bmapi_write(tp, ip, bmap.br_startoff, 6497 startblock, dfops);
6522 bmap.br_blockcount, flags, &firstfsb,
6523 bmap.br_blockcount, &bmap, &nimaps,
6524 dfops);
6525 break; 6498 break;
6526 case XFS_BMAP_UNMAP: 6499 case XFS_BMAP_UNMAP:
6527 error = xfs_bunmapi(tp, ip, bmap.br_startoff, 6500 error = xfs_bunmapi(tp, ip, startoff, blockcount,
6528 bmap.br_blockcount, flags, 1, &firstfsb, 6501 XFS_BMAPI_REMAP, 1, &startblock, dfops, &done);
6529 dfops, &done);
6530 ASSERT(done); 6502 ASSERT(done);
6531 break; 6503 break;
6532 default: 6504 default:
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index cdef87db5262..c35a14fa1527 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -172,6 +172,18 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags)
172 172
173 173
174/* 174/*
175 * Return true if the extent is a real, allocated extent, or false if it is a
176 * delayed allocation, and unwritten extent or a hole.
177 */
178static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
179{
180 return irec->br_state != XFS_EXT_UNWRITTEN &&
181 irec->br_startblock != HOLESTARTBLOCK &&
182 irec->br_startblock != DELAYSTARTBLOCK &&
183 !isnullstartblock(irec->br_startblock);
184}
185
186/*
175 * This macro is used to determine how many extents will be shifted 187 * This macro is used to determine how many extents will be shifted
176 * in one write transaction. We could require two splits, 188 * in one write transaction. We could require two splits,
177 * an extent move on the first and an extent merge on the second, 189 * an extent move on the first and an extent merge on the second,
@@ -232,8 +244,6 @@ int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
232 struct xfs_bmbt_irec *del); 244 struct xfs_bmbt_irec *del);
233void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, 245void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
234 struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); 246 struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
235int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
236 xfs_extnum_t num);
237uint xfs_default_attroffset(struct xfs_inode *ip); 247uint xfs_default_attroffset(struct xfs_inode *ip);
238int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 248int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
239 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 249 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index fd55db479385..6cba69aff077 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -366,32 +366,6 @@ xfs_bmbt_to_bmdr(
366 memcpy(tpp, fpp, sizeof(*fpp) * dmxr); 366 memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
367} 367}
368 368
369/*
370 * Check extent records, which have just been read, for
371 * any bit in the extent flag field. ASSERT on debug
372 * kernels, as this condition should not occur.
373 * Return an error condition (1) if any flags found,
374 * otherwise return 0.
375 */
376
377int
378xfs_check_nostate_extents(
379 xfs_ifork_t *ifp,
380 xfs_extnum_t idx,
381 xfs_extnum_t num)
382{
383 for (; num > 0; num--, idx++) {
384 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
385 if ((ep->l0 >>
386 (64 - BMBT_EXNTFLAG_BITLEN)) != 0) {
387 ASSERT(0);
388 return 1;
389 }
390 }
391 return 0;
392}
393
394
395STATIC struct xfs_btree_cur * 369STATIC struct xfs_btree_cur *
396xfs_bmbt_dup_cursor( 370xfs_bmbt_dup_cursor(
397 struct xfs_btree_cur *cur) 371 struct xfs_btree_cur *cur)
@@ -448,7 +422,6 @@ xfs_bmbt_alloc_block(
448 if (args.fsbno == NULLFSBLOCK) { 422 if (args.fsbno == NULLFSBLOCK) {
449 args.fsbno = be64_to_cpu(start->l); 423 args.fsbno = be64_to_cpu(start->l);
450 args.type = XFS_ALLOCTYPE_START_BNO; 424 args.type = XFS_ALLOCTYPE_START_BNO;
451try_another_ag:
452 /* 425 /*
453 * Make sure there is sufficient room left in the AG to 426 * Make sure there is sufficient room left in the AG to
454 * complete a full tree split for an extent insert. If 427 * complete a full tree split for an extent insert. If
@@ -477,22 +450,6 @@ try_another_ag:
477 if (error) 450 if (error)
478 goto error0; 451 goto error0;
479 452
480 /*
481 * During a CoW operation, the allocation and bmbt updates occur in
482 * different transactions. The mapping code tries to put new bmbt
483 * blocks near extents being mapped, but the only way to guarantee this
484 * is if the alloc and the mapping happen in a single transaction that
485 * has a block reservation. That isn't the case here, so if we run out
486 * of space we'll try again with another AG.
487 */
488 if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
489 args.fsbno == NULLFSBLOCK &&
490 args.type == XFS_ALLOCTYPE_NEAR_BNO) {
491 args.fsbno = cur->bc_private.b.firstblock;
492 args.type = XFS_ALLOCTYPE_FIRST_AG;
493 goto try_another_ag;
494 }
495
496 if (args.fsbno == NULLFSBLOCK && args.minleft) { 453 if (args.fsbno == NULLFSBLOCK && args.minleft) {
497 /* 454 /*
498 * Could not find an AG with enough free space to satisfy 455 * Could not find an AG with enough free space to satisfy
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 819a8a4dee95..9da5a8d4f184 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -25,14 +25,6 @@ struct xfs_inode;
25struct xfs_trans; 25struct xfs_trans;
26 26
27/* 27/*
28 * Extent state and extent format macros.
29 */
30#define XFS_EXTFMT_INODE(x) \
31 (xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \
32 XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
33#define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN)
34
35/*
36 * Btree block header size depends on a superblock flag. 28 * Btree block header size depends on a superblock flag.
37 */ 29 */
38#define XFS_BMBT_BLOCK_LEN(mp) \ 30#define XFS_BMBT_BLOCK_LEN(mp) \
@@ -140,4 +132,18 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
140extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, 132extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
141 struct xfs_trans *, struct xfs_inode *, int); 133 struct xfs_trans *, struct xfs_inode *, int);
142 134
135/*
136 * Check that the extent does not contain an invalid unwritten extent flag.
137 */
138static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork,
139 struct xfs_bmbt_rec_host *ep)
140{
141 if (ep->l0 >> (64 - BMBT_EXNTFLAG_BITLEN) == 0)
142 return true;
143 if (whichfork == XFS_DATA_FORK &&
144 xfs_sb_version_hasextflgbit(&mp->m_sb))
145 return true;
146 return false;
147}
148
143#endif /* __XFS_BMAP_BTREE_H__ */ 149#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 3059a3ec7ecb..5392674bf893 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4842,6 +4842,21 @@ xfs_btree_query_range(
4842 fn, priv); 4842 fn, priv);
4843} 4843}
4844 4844
4845/* Query a btree for all records. */
4846int
4847xfs_btree_query_all(
4848 struct xfs_btree_cur *cur,
4849 xfs_btree_query_range_fn fn,
4850 void *priv)
4851{
4852 union xfs_btree_irec low_rec;
4853 union xfs_btree_irec high_rec;
4854
4855 memset(&low_rec, 0, sizeof(low_rec));
4856 memset(&high_rec, 0xFF, sizeof(high_rec));
4857 return xfs_btree_query_range(cur, &low_rec, &high_rec, fn, priv);
4858}
4859
4845/* 4860/*
4846 * Calculate the number of blocks needed to store a given number of records 4861 * Calculate the number of blocks needed to store a given number of records
4847 * in a short-format (per-AG metadata) btree. 4862 * in a short-format (per-AG metadata) btree.
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 4bb62580a7fd..27bed08261c5 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -496,6 +496,8 @@ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
496int xfs_btree_query_range(struct xfs_btree_cur *cur, 496int xfs_btree_query_range(struct xfs_btree_cur *cur,
497 union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec, 497 union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
498 xfs_btree_query_range_fn fn, void *priv); 498 xfs_btree_query_range_fn fn, void *priv);
499int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn,
500 void *priv);
499 501
500typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, 502typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
501 void *data); 503 void *data);
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index ac9a003dd29a..747085b4ef44 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -35,13 +35,8 @@ int
35xfs_calc_dquots_per_chunk( 35xfs_calc_dquots_per_chunk(
36 unsigned int nbblks) /* basic block units */ 36 unsigned int nbblks) /* basic block units */
37{ 37{
38 unsigned int ndquots;
39
40 ASSERT(nbblks > 0); 38 ASSERT(nbblks > 0);
41 ndquots = BBTOB(nbblks); 39 return BBTOB(nbblks) / sizeof(xfs_dqblk_t);
42 do_div(ndquots, sizeof(xfs_dqblk_t));
43
44 return ndquots;
45} 40}
46 41
47/* 42/*
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 6b7579e7b60a..a1dccd8d96bc 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -930,10 +930,8 @@ static inline uint xfs_dinode_size(int version)
930/* 930/*
931 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. 931 * The 32 bit link count in the inode theoretically maxes out at UINT_MAX.
932 * Since the pathconf interface is signed, we use 2^31 - 1 instead. 932 * Since the pathconf interface is signed, we use 2^31 - 1 instead.
933 * The old inode format had a 16 bit link count, so its maximum is USHRT_MAX.
934 */ 933 */
935#define XFS_MAXLINK ((1U << 31) - 1U) 934#define XFS_MAXLINK ((1U << 31) - 1U)
936#define XFS_MAXLINK_1 65535U
937 935
938/* 936/*
939 * Values for di_format 937 * Values for di_format
@@ -1578,19 +1576,10 @@ static inline xfs_filblks_t startblockval(xfs_fsblock_t x)
1578} 1576}
1579 1577
1580/* 1578/*
1581 * Possible extent formats.
1582 */
1583typedef enum {
1584 XFS_EXTFMT_NOSTATE = 0,
1585 XFS_EXTFMT_HASSTATE
1586} xfs_exntfmt_t;
1587
1588/*
1589 * Possible extent states. 1579 * Possible extent states.
1590 */ 1580 */
1591typedef enum { 1581typedef enum {
1592 XFS_EXT_NORM, XFS_EXT_UNWRITTEN, 1582 XFS_EXT_NORM, XFS_EXT_UNWRITTEN,
1593 XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID
1594} xfs_exntst_t; 1583} xfs_exntst_t;
1595 1584
1596/* 1585/*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b72dc821d78b..095bdf049a3f 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -92,6 +92,18 @@ struct getbmapx {
92#define BMV_OF_LAST 0x4 /* segment is the last in the file */ 92#define BMV_OF_LAST 0x4 /* segment is the last in the file */
93#define BMV_OF_SHARED 0x8 /* segment shared with another file */ 93#define BMV_OF_SHARED 0x8 /* segment shared with another file */
94 94
95/* fmr_owner special values for FS_IOC_GETFSMAP */
96#define XFS_FMR_OWN_FREE FMR_OWN_FREE /* free space */
97#define XFS_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */
98#define XFS_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */
99#define XFS_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */
100#define XFS_FMR_OWN_AG FMR_OWNER('X', 3) /* per-AG metadata */
101#define XFS_FMR_OWN_INOBT FMR_OWNER('X', 4) /* inode btree blocks */
102#define XFS_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */
103#define XFS_FMR_OWN_REFC FMR_OWNER('X', 6) /* refcount tree */
104#define XFS_FMR_OWN_COW FMR_OWNER('X', 7) /* cow staging */
105#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */
106
95/* 107/*
96 * Structure for XFS_IOC_FSSETDM. 108 * Structure for XFS_IOC_FSSETDM.
97 * For use by backup and restore programs to set the XFS on-disk inode 109 * For use by backup and restore programs to set the XFS on-disk inode
@@ -502,6 +514,7 @@ typedef struct xfs_swapext
502#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) 514#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
503#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) 515#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
504#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks) 516#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
517/* XFS_IOC_GETFSMAP ------ hoisted 59 */
505 518
506/* 519/*
507 * ioctl commands that replace IRIX syssgi()'s 520 * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d93f9d918cfc..09c3d1aecef2 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -508,7 +508,7 @@ xfs_iread(
508 508
509 /* even unallocated inodes are verified */ 509 /* even unallocated inodes are verified */
510 if (!xfs_dinode_verify(mp, ip->i_ino, dip)) { 510 if (!xfs_dinode_verify(mp, ip->i_ino, dip)) {
511 xfs_alert(mp, "%s: validation failed for inode %lld failed", 511 xfs_alert(mp, "%s: validation failed for inode %lld",
512 __func__, ip->i_ino); 512 __func__, ip->i_ino);
513 513
514 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); 514 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 8a37efe04de3..0e80f34fe97c 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -42,35 +42,6 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
42STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); 42STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
43STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); 43STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
44 44
45#ifdef DEBUG
46/*
47 * Make sure that the extents in the given memory buffer
48 * are valid.
49 */
50void
51xfs_validate_extents(
52 xfs_ifork_t *ifp,
53 int nrecs,
54 xfs_exntfmt_t fmt)
55{
56 xfs_bmbt_irec_t irec;
57 xfs_bmbt_rec_host_t rec;
58 int i;
59
60 for (i = 0; i < nrecs; i++) {
61 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
62 rec.l0 = get_unaligned(&ep->l0);
63 rec.l1 = get_unaligned(&ep->l1);
64 xfs_bmbt_get_all(&rec, &irec);
65 if (fmt == XFS_EXTFMT_NOSTATE)
66 ASSERT(irec.br_state == XFS_EXT_NORM);
67 }
68}
69#else /* DEBUG */
70#define xfs_validate_extents(ifp, nrecs, fmt)
71#endif /* DEBUG */
72
73
74/* 45/*
75 * Move inode type and inode format specific information from the 46 * Move inode type and inode format specific information from the
76 * on-disk inode to the in-core inode. For fifos, devs, and sockets 47 * on-disk inode to the in-core inode. For fifos, devs, and sockets
@@ -352,40 +323,33 @@ xfs_iformat_local(
352} 323}
353 324
354/* 325/*
355 * The file consists of a set of extents all 326 * The file consists of a set of extents all of which fit into the on-disk
356 * of which fit into the on-disk inode. 327 * inode. If there are few enough extents to fit into the if_inline_ext, then
357 * If there are few enough extents to fit into 328 * copy them there. Otherwise allocate a buffer for them and copy them into it.
358 * the if_inline_ext, then copy them there. 329 * Either way, set if_extents to point at the extents.
359 * Otherwise allocate a buffer for them and copy
360 * them into it. Either way, set if_extents
361 * to point at the extents.
362 */ 330 */
363STATIC int 331STATIC int
364xfs_iformat_extents( 332xfs_iformat_extents(
365 xfs_inode_t *ip, 333 struct xfs_inode *ip,
366 xfs_dinode_t *dip, 334 struct xfs_dinode *dip,
367 int whichfork) 335 int whichfork)
368{ 336{
369 xfs_bmbt_rec_t *dp; 337 struct xfs_mount *mp = ip->i_mount;
370 xfs_ifork_t *ifp; 338 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
371 int nex; 339 int nex = XFS_DFORK_NEXTENTS(dip, whichfork);
372 int size; 340 int size = nex * sizeof(xfs_bmbt_rec_t);
373 int i; 341 struct xfs_bmbt_rec *dp;
374 342 int i;
375 ifp = XFS_IFORK_PTR(ip, whichfork);
376 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
377 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
378 343
379 /* 344 /*
380 * If the number of extents is unreasonable, then something 345 * If the number of extents is unreasonable, then something is wrong and
381 * is wrong and we just bail out rather than crash in 346 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
382 * kmem_alloc() or memcpy() below.
383 */ 347 */
384 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 348 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
385 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).", 349 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
386 (unsigned long long) ip->i_ino, nex); 350 (unsigned long long) ip->i_ino, nex);
387 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 351 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
388 ip->i_mount, dip); 352 mp, dip);
389 return -EFSCORRUPTED; 353 return -EFSCORRUPTED;
390 } 354 }
391 355
@@ -400,22 +364,17 @@ xfs_iformat_extents(
400 ifp->if_bytes = size; 364 ifp->if_bytes = size;
401 if (size) { 365 if (size) {
402 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); 366 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
403 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
404 for (i = 0; i < nex; i++, dp++) { 367 for (i = 0; i < nex; i++, dp++) {
405 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 368 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
406 ep->l0 = get_unaligned_be64(&dp->l0); 369 ep->l0 = get_unaligned_be64(&dp->l0);
407 ep->l1 = get_unaligned_be64(&dp->l1); 370 ep->l1 = get_unaligned_be64(&dp->l1);
371 if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) {
372 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
373 XFS_ERRLEVEL_LOW, mp);
374 return -EFSCORRUPTED;
375 }
408 } 376 }
409 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); 377 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
410 if (whichfork != XFS_DATA_FORK ||
411 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
412 if (unlikely(xfs_check_nostate_extents(
413 ifp, 0, nex))) {
414 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
415 XFS_ERRLEVEL_LOW,
416 ip->i_mount);
417 return -EFSCORRUPTED;
418 }
419 } 378 }
420 ifp->if_flags |= XFS_IFEXTENTS; 379 ifp->if_flags |= XFS_IFEXTENTS;
421 return 0; 380 return 0;
@@ -518,7 +477,6 @@ xfs_iread_extents(
518 xfs_iext_destroy(ifp); 477 xfs_iext_destroy(ifp);
519 return error; 478 return error;
520 } 479 }
521 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
522 ifp->if_flags |= XFS_IFEXTENTS; 480 ifp->if_flags |= XFS_IFEXTENTS;
523 return 0; 481 return 0;
524} 482}
@@ -837,6 +795,9 @@ xfs_iextents_copy(
837 copied = 0; 795 copied = 0;
838 for (i = 0; i < nrecs; i++) { 796 for (i = 0; i < nrecs; i++) {
839 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); 797 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
798
799 ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep));
800
840 start_block = xfs_bmbt_get_startblock(ep); 801 start_block = xfs_bmbt_get_startblock(ep);
841 if (isnullstartblock(start_block)) { 802 if (isnullstartblock(start_block)) {
842 /* 803 /*
@@ -852,7 +813,6 @@ xfs_iextents_copy(
852 copied++; 813 copied++;
853 } 814 }
854 ASSERT(copied != 0); 815 ASSERT(copied != 0);
855 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
856 816
857 return (copied * (uint)sizeof(xfs_bmbt_rec_t)); 817 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
858} 818}
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3a8cc7139912..06cfb93c2ef9 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2001,14 +2001,14 @@ xfs_rmap_query_range_helper(
2001/* Find all rmaps between two keys. */ 2001/* Find all rmaps between two keys. */
2002int 2002int
2003xfs_rmap_query_range( 2003xfs_rmap_query_range(
2004 struct xfs_btree_cur *cur, 2004 struct xfs_btree_cur *cur,
2005 struct xfs_rmap_irec *low_rec, 2005 struct xfs_rmap_irec *low_rec,
2006 struct xfs_rmap_irec *high_rec, 2006 struct xfs_rmap_irec *high_rec,
2007 xfs_rmap_query_range_fn fn, 2007 xfs_rmap_query_range_fn fn,
2008 void *priv) 2008 void *priv)
2009{ 2009{
2010 union xfs_btree_irec low_brec; 2010 union xfs_btree_irec low_brec;
2011 union xfs_btree_irec high_brec; 2011 union xfs_btree_irec high_brec;
2012 struct xfs_rmap_query_range_info query; 2012 struct xfs_rmap_query_range_info query;
2013 2013
2014 low_brec.r = *low_rec; 2014 low_brec.r = *low_rec;
@@ -2019,6 +2019,20 @@ xfs_rmap_query_range(
2019 xfs_rmap_query_range_helper, &query); 2019 xfs_rmap_query_range_helper, &query);
2020} 2020}
2021 2021
2022/* Find all rmaps. */
2023int
2024xfs_rmap_query_all(
2025 struct xfs_btree_cur *cur,
2026 xfs_rmap_query_range_fn fn,
2027 void *priv)
2028{
2029 struct xfs_rmap_query_range_info query;
2030
2031 query.priv = priv;
2032 query.fn = fn;
2033 return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
2034}
2035
2022/* Clean up after calling xfs_rmap_finish_one. */ 2036/* Clean up after calling xfs_rmap_finish_one. */
2023void 2037void
2024xfs_rmap_finish_one_cleanup( 2038xfs_rmap_finish_one_cleanup(
@@ -2291,3 +2305,31 @@ xfs_rmap_free_extent(
2291 return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner, 2305 return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
2292 XFS_DATA_FORK, &bmap); 2306 XFS_DATA_FORK, &bmap);
2293} 2307}
2308
2309/* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */
2310int
2311xfs_rmap_compare(
2312 const struct xfs_rmap_irec *a,
2313 const struct xfs_rmap_irec *b)
2314{
2315 __u64 oa;
2316 __u64 ob;
2317
2318 oa = xfs_rmap_irec_offset_pack(a);
2319 ob = xfs_rmap_irec_offset_pack(b);
2320
2321 if (a->rm_startblock < b->rm_startblock)
2322 return -1;
2323 else if (a->rm_startblock > b->rm_startblock)
2324 return 1;
2325 else if (a->rm_owner < b->rm_owner)
2326 return -1;
2327 else if (a->rm_owner > b->rm_owner)
2328 return 1;
2329 else if (oa < ob)
2330 return -1;
2331 else if (oa > ob)
2332 return 1;
2333 else
2334 return 0;
2335}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 789930599339..98f908fea103 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -162,6 +162,8 @@ typedef int (*xfs_rmap_query_range_fn)(
162int xfs_rmap_query_range(struct xfs_btree_cur *cur, 162int xfs_rmap_query_range(struct xfs_btree_cur *cur,
163 struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec, 163 struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
164 xfs_rmap_query_range_fn fn, void *priv); 164 xfs_rmap_query_range_fn fn, void *priv);
165int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn,
166 void *priv);
165 167
166enum xfs_rmap_intent_type { 168enum xfs_rmap_intent_type {
167 XFS_RMAP_MAP, 169 XFS_RMAP_MAP,
@@ -212,5 +214,7 @@ int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno,
212int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, 214int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
213 uint64_t owner, uint64_t offset, unsigned int flags, 215 uint64_t owner, uint64_t offset, unsigned int flags,
214 struct xfs_rmap_irec *irec, int *stat); 216 struct xfs_rmap_irec *irec, int *stat);
217int xfs_rmap_compare(const struct xfs_rmap_irec *a,
218 const struct xfs_rmap_irec *b);
215 219
216#endif /* __XFS_RMAP_H__ */ 220#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index ea45584a9913..e47b99e59f60 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1016,3 +1016,73 @@ xfs_rtfree_extent(
1016 } 1016 }
1017 return 0; 1017 return 0;
1018} 1018}
1019
1020/* Find all the free records within a given range. */
1021int
1022xfs_rtalloc_query_range(
1023 struct xfs_trans *tp,
1024 struct xfs_rtalloc_rec *low_rec,
1025 struct xfs_rtalloc_rec *high_rec,
1026 xfs_rtalloc_query_range_fn fn,
1027 void *priv)
1028{
1029 struct xfs_rtalloc_rec rec;
1030 struct xfs_mount *mp = tp->t_mountp;
1031 xfs_rtblock_t rtstart;
1032 xfs_rtblock_t rtend;
1033 xfs_rtblock_t rem;
1034 int is_free;
1035 int error = 0;
1036
1037 if (low_rec->ar_startblock > high_rec->ar_startblock)
1038 return -EINVAL;
1039 else if (low_rec->ar_startblock == high_rec->ar_startblock)
1040 return 0;
1041
1042 /* Iterate the bitmap, looking for discrepancies. */
1043 rtstart = low_rec->ar_startblock;
1044 rem = high_rec->ar_startblock - rtstart;
1045 while (rem) {
1046 /* Is the first block free? */
1047 error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
1048 &is_free);
1049 if (error)
1050 break;
1051
1052 /* How long does the extent go for? */
1053 error = xfs_rtfind_forw(mp, tp, rtstart,
1054 high_rec->ar_startblock - 1, &rtend);
1055 if (error)
1056 break;
1057
1058 if (is_free) {
1059 rec.ar_startblock = rtstart;
1060 rec.ar_blockcount = rtend - rtstart + 1;
1061
1062 error = fn(tp, &rec, priv);
1063 if (error)
1064 break;
1065 }
1066
1067 rem -= rtend - rtstart + 1;
1068 rtstart = rtend + 1;
1069 }
1070
1071 return error;
1072}
1073
1074/* Find all the free records. */
1075int
1076xfs_rtalloc_query_all(
1077 struct xfs_trans *tp,
1078 xfs_rtalloc_query_range_fn fn,
1079 void *priv)
1080{
1081 struct xfs_rtalloc_rec keys[2];
1082
1083 keys[0].ar_startblock = 0;
1084 keys[1].ar_startblock = tp->t_mountp->m_sb.sb_rblocks;
1085 keys[0].ar_blockcount = keys[1].ar_blockcount = 0;
1086
1087 return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv);
1088}
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 7917f6e44286..d787c677d2a3 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -21,8 +21,20 @@
21/* 21/*
22 * Components of space reservations. 22 * Components of space reservations.
23 */ 23 */
24
25/* Worst case number of rmaps that can be held in a block. */
24#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \ 26#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \
25 (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0])) 27 (((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
28
29/* Adding one rmap could split every level up to the top of the tree. */
30#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels)
31
32/* Blocks we might need to add "b" rmaps to a tree. */
33#define XFS_NRMAPADD_SPACE_RES(mp, b)\
34 (((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
35 XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
36 XFS_RMAPADD_SPACE_RES(mp))
37
26#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \ 38#define XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) \
27 (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0])) 39 (((mp)->m_alloc_mxr[0]) - ((mp)->m_alloc_mnr[0]))
28#define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1) 40#define XFS_EXTENTADD_SPACE_RES(mp,w) (XFS_BM_MAXLEVELS(mp,w) - 1)
@@ -30,13 +42,12 @@
30 (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ 42 (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \
31 XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ 43 XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \
32 XFS_EXTENTADD_SPACE_RES(mp,w)) 44 XFS_EXTENTADD_SPACE_RES(mp,w))
45
46/* Blocks we might need to add "b" mappings & rmappings to a file. */
33#define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\ 47#define XFS_SWAP_RMAP_SPACE_RES(mp,b,w)\
34 (((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \ 48 (XFS_NEXTENTADD_SPACE_RES((mp), (b), (w)) + \
35 XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \ 49 XFS_NRMAPADD_SPACE_RES((mp), (b)))
36 XFS_EXTENTADD_SPACE_RES(mp,w) + \ 50
37 ((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \
38 XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) * \
39 (mp)->m_rmap_maxlevels)
40#define XFS_DAENTER_1B(mp,w) \ 51#define XFS_DAENTER_1B(mp,w) \
41 ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1) 52 ((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)
42#define XFS_DAENTER_DBS(mp,w) \ 53#define XFS_DAENTER_DBS(mp,w) \
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 05eca126c688..09af0f7cd55e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -111,11 +111,11 @@ xfs_finish_page_writeback(
111 111
112 bsize = bh->b_size; 112 bsize = bh->b_size;
113 do { 113 do {
114 if (off > end)
115 break;
114 next = bh->b_this_page; 116 next = bh->b_this_page;
115 if (off < bvec->bv_offset) 117 if (off < bvec->bv_offset)
116 goto next_bh; 118 goto next_bh;
117 if (off > end)
118 break;
119 bh->b_end_io(bh, !error); 119 bh->b_end_io(bh, !error);
120next_bh: 120next_bh:
121 off += bsize; 121 off += bsize;
@@ -1261,8 +1261,8 @@ xfs_get_blocks(
1261 1261
1262 if (nimaps) { 1262 if (nimaps) {
1263 trace_xfs_get_blocks_found(ip, offset, size, 1263 trace_xfs_get_blocks_found(ip, offset, size,
1264 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1264 imap.br_state == XFS_EXT_UNWRITTEN ?
1265 : XFS_IO_OVERWRITE, &imap); 1265 XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
1266 xfs_iunlock(ip, lockmode); 1266 xfs_iunlock(ip, lockmode);
1267 } else { 1267 } else {
1268 trace_xfs_get_blocks_notfound(ip, offset, size); 1268 trace_xfs_get_blocks_notfound(ip, offset, size);
@@ -1276,9 +1276,7 @@ xfs_get_blocks(
1276 * For unwritten extents do not report a disk address in the buffered 1276 * For unwritten extents do not report a disk address in the buffered
1277 * read case (treat as if we're reading into a hole). 1277 * read case (treat as if we're reading into a hole).
1278 */ 1278 */
1279 if (imap.br_startblock != HOLESTARTBLOCK && 1279 if (xfs_bmap_is_real_extent(&imap))
1280 imap.br_startblock != DELAYSTARTBLOCK &&
1281 !ISUNWRITTEN(&imap))
1282 xfs_map_buffer(inode, bh_result, &imap, offset); 1280 xfs_map_buffer(inode, bh_result, &imap, offset);
1283 1281
1284 /* 1282 /*
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 9bf57c76623b..d419d23fa214 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -34,6 +34,8 @@
34#include "xfs_bmap.h" 34#include "xfs_bmap.h"
35#include "xfs_icache.h" 35#include "xfs_icache.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_bmap_btree.h"
38#include "xfs_trans_space.h"
37 39
38 40
39kmem_zone_t *xfs_bui_zone; 41kmem_zone_t *xfs_bui_zone;
@@ -215,6 +217,7 @@ void
215xfs_bui_release( 217xfs_bui_release(
216 struct xfs_bui_log_item *buip) 218 struct xfs_bui_log_item *buip)
217{ 219{
220 ASSERT(atomic_read(&buip->bui_refcount) > 0);
218 if (atomic_dec_and_test(&buip->bui_refcount)) { 221 if (atomic_dec_and_test(&buip->bui_refcount)) {
219 xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); 222 xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
220 xfs_bui_item_free(buip); 223 xfs_bui_item_free(buip);
@@ -446,7 +449,8 @@ xfs_bui_recover(
446 return -EIO; 449 return -EIO;
447 } 450 }
448 451
449 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 452 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
453 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
450 if (error) 454 if (error)
451 return error; 455 return error;
452 budp = xfs_trans_get_bud(tp, buip); 456 budp = xfs_trans_get_bud(tp, buip);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8795e9cd867c..2b954308a1d6 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -448,10 +448,9 @@ xfs_getbmap_adjust_shared(
448 next_map->br_blockcount = 0; 448 next_map->br_blockcount = 0;
449 449
450 /* Only written data blocks can be shared. */ 450 /* Only written data blocks can be shared. */
451 if (!xfs_is_reflink_inode(ip) || whichfork != XFS_DATA_FORK || 451 if (!xfs_is_reflink_inode(ip) ||
452 map->br_startblock == DELAYSTARTBLOCK || 452 whichfork != XFS_DATA_FORK ||
453 map->br_startblock == HOLESTARTBLOCK || 453 !xfs_bmap_is_real_extent(map))
454 ISUNWRITTEN(map))
455 return 0; 454 return 0;
456 455
457 agno = XFS_FSB_TO_AGNO(mp, map->br_startblock); 456 agno = XFS_FSB_TO_AGNO(mp, map->br_startblock);
@@ -904,9 +903,9 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
904} 903}
905 904
906/* 905/*
907 * This is called by xfs_inactive to free any blocks beyond eof 906 * This is called to free any blocks beyond eof. The caller must hold
908 * when the link count isn't zero and by xfs_dm_punch_hole() when 907 * IOLOCK_EXCL unless we are in the inode reclaim path and have the only
909 * punching a hole to EOF. 908 * reference to the inode.
910 */ 909 */
911int 910int
912xfs_free_eofblocks( 911xfs_free_eofblocks(
@@ -921,8 +920,6 @@ xfs_free_eofblocks(
921 struct xfs_bmbt_irec imap; 920 struct xfs_bmbt_irec imap;
922 struct xfs_mount *mp = ip->i_mount; 921 struct xfs_mount *mp = ip->i_mount;
923 922
924 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
925
926 /* 923 /*
927 * Figure out if there are any blocks beyond the end 924 * Figure out if there are any blocks beyond the end
928 * of the file. If not, then there is nothing to do. 925 * of the file. If not, then there is nothing to do.
@@ -1209,11 +1206,8 @@ xfs_adjust_extent_unmap_boundaries(
1209 return error; 1206 return error;
1210 1207
1211 if (nimap && imap.br_startblock != HOLESTARTBLOCK) { 1208 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1212 xfs_daddr_t block;
1213
1214 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1209 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1215 block = imap.br_startblock; 1210 mod = do_mod(imap.br_startblock, mp->m_sb.sb_rextsize);
1216 mod = do_div(block, mp->m_sb.sb_rextsize);
1217 if (mod) 1211 if (mod)
1218 *startoffset_fsb += mp->m_sb.sb_rextsize - mod; 1212 *startoffset_fsb += mp->m_sb.sb_rextsize - mod;
1219 } 1213 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ca09061369cb..62fa39276a24 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1079,6 +1079,8 @@ void
1079xfs_buf_unlock( 1079xfs_buf_unlock(
1080 struct xfs_buf *bp) 1080 struct xfs_buf *bp)
1081{ 1081{
1082 ASSERT(xfs_buf_islocked(bp));
1083
1082 XB_CLEAR_OWNER(bp); 1084 XB_CLEAR_OWNER(bp);
1083 up(&bp->b_sema); 1085 up(&bp->b_sema);
1084 1086
@@ -1815,6 +1817,28 @@ error:
1815} 1817}
1816 1818
1817/* 1819/*
1820 * Cancel a delayed write list.
1821 *
1822 * Remove each buffer from the list, clear the delwri queue flag and drop the
1823 * associated buffer reference.
1824 */
1825void
1826xfs_buf_delwri_cancel(
1827 struct list_head *list)
1828{
1829 struct xfs_buf *bp;
1830
1831 while (!list_empty(list)) {
1832 bp = list_first_entry(list, struct xfs_buf, b_list);
1833
1834 xfs_buf_lock(bp);
1835 bp->b_flags &= ~_XBF_DELWRI_Q;
1836 list_del_init(&bp->b_list);
1837 xfs_buf_relse(bp);
1838 }
1839}
1840
1841/*
1818 * Add a buffer to the delayed write list. 1842 * Add a buffer to the delayed write list.
1819 * 1843 *
1820 * This queues a buffer for writeout if it hasn't already been. Note that 1844 * This queues a buffer for writeout if it hasn't already been. Note that
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 3c867e5a63e1..8d1d44f87ce9 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -291,7 +291,6 @@ xfs_buf_readahead(
291 return xfs_buf_readahead_map(target, &map, 1, ops); 291 return xfs_buf_readahead_map(target, &map, 1, ops);
292} 292}
293 293
294struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
295void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); 294void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
296int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); 295int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
297 296
@@ -330,6 +329,7 @@ extern void *xfs_buf_offset(struct xfs_buf *, size_t);
330extern void xfs_buf_stale(struct xfs_buf *bp); 329extern void xfs_buf_stale(struct xfs_buf *bp);
331 330
332/* Delayed Write Buffer Routines */ 331/* Delayed Write Buffer Routines */
332extern void xfs_buf_delwri_cancel(struct list_head *);
333extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); 333extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
334extern int xfs_buf_delwri_submit(struct list_head *); 334extern int xfs_buf_delwri_submit(struct list_head *);
335extern int xfs_buf_delwri_submit_nowait(struct list_head *); 335extern int xfs_buf_delwri_submit_nowait(struct list_head *);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index ad9396e516f6..20b7a5c6eb2f 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -394,6 +394,7 @@ xfs_dir2_leaf_readbuf(
394 394
395 /* 395 /*
396 * Do we need more readahead? 396 * Do we need more readahead?
397 * Each loop tries to process 1 full dir blk; last may be partial.
397 */ 398 */
398 blk_start_plug(&plug); 399 blk_start_plug(&plug);
399 for (mip->ra_index = mip->ra_offset = i = 0; 400 for (mip->ra_index = mip->ra_offset = i = 0;
@@ -404,7 +405,8 @@ xfs_dir2_leaf_readbuf(
404 * Read-ahead a contiguous directory block. 405 * Read-ahead a contiguous directory block.
405 */ 406 */
406 if (i > mip->ra_current && 407 if (i > mip->ra_current &&
407 map[mip->ra_index].br_blockcount >= geo->fsbcount) { 408 (map[mip->ra_index].br_blockcount - mip->ra_offset) >=
409 geo->fsbcount) {
408 xfs_dir3_data_readahead(dp, 410 xfs_dir3_data_readahead(dp,
409 map[mip->ra_index].br_startoff + mip->ra_offset, 411 map[mip->ra_index].br_startoff + mip->ra_offset,
410 XFS_FSB_TO_DADDR(dp->i_mount, 412 XFS_FSB_TO_DADDR(dp->i_mount,
@@ -425,14 +427,19 @@ xfs_dir2_leaf_readbuf(
425 } 427 }
426 428
427 /* 429 /*
428 * Advance offset through the mapping table. 430 * Advance offset through the mapping table, processing a full
431 * dir block even if it is fragmented into several extents.
432 * But stop if we have consumed all valid mappings, even if
433 * it's not yet a full directory block.
429 */ 434 */
430 for (j = 0; j < geo->fsbcount; j += length ) { 435 for (j = 0;
436 j < geo->fsbcount && mip->ra_index < mip->map_valid;
437 j += length ) {
431 /* 438 /*
432 * The rest of this extent but not more than a dir 439 * The rest of this extent but not more than a dir
433 * block. 440 * block.
434 */ 441 */
435 length = min_t(int, geo->fsbcount, 442 length = min_t(int, geo->fsbcount - j,
436 map[mip->ra_index].br_blockcount - 443 map[mip->ra_index].br_blockcount -
437 mip->ra_offset); 444 mip->ra_offset);
438 mip->ra_offset += length; 445 mip->ra_offset += length;
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index d796ffac7296..6a05d278da64 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -132,6 +132,11 @@ next_extent:
132 error = xfs_btree_decrement(cur, 0, &i); 132 error = xfs_btree_decrement(cur, 0, &i);
133 if (error) 133 if (error)
134 goto out_del_cursor; 134 goto out_del_cursor;
135
136 if (fatal_signal_pending(current)) {
137 error = -ERESTARTSYS;
138 goto out_del_cursor;
139 }
135 } 140 }
136 141
137out_del_cursor: 142out_del_cursor:
@@ -196,8 +201,11 @@ xfs_ioc_trim(
196 for (agno = start_agno; agno <= end_agno; agno++) { 201 for (agno = start_agno; agno <= end_agno; agno++) {
197 error = xfs_trim_extents(mp, agno, start, end, minlen, 202 error = xfs_trim_extents(mp, agno, start, end, minlen,
198 &blocks_trimmed); 203 &blocks_trimmed);
199 if (error) 204 if (error) {
200 last_error = error; 205 last_error = error;
206 if (error == -ERESTARTSYS)
207 break;
208 }
201 } 209 }
202 210
203 if (last_error) 211 if (last_error)
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d7bc14906af8..44f8c5451210 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -290,6 +290,7 @@ void
290xfs_efi_release( 290xfs_efi_release(
291 struct xfs_efi_log_item *efip) 291 struct xfs_efi_log_item *efip)
292{ 292{
293 ASSERT(atomic_read(&efip->efi_refcount) > 0);
293 if (atomic_dec_and_test(&efip->efi_refcount)) { 294 if (atomic_dec_and_test(&efip->efi_refcount)) {
294 xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); 295 xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR);
295 xfs_efi_item_free(efip); 296 xfs_efi_item_free(efip);
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
new file mode 100644
index 000000000000..3683819887a5
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.c
@@ -0,0 +1,940 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_sb.h"
27#include "xfs_mount.h"
28#include "xfs_defer.h"
29#include "xfs_inode.h"
30#include "xfs_trans.h"
31#include "xfs_error.h"
32#include "xfs_btree.h"
33#include "xfs_rmap_btree.h"
34#include "xfs_trace.h"
35#include "xfs_log.h"
36#include "xfs_rmap.h"
37#include "xfs_alloc.h"
38#include "xfs_bit.h"
39#include <linux/fsmap.h>
40#include "xfs_fsmap.h"
41#include "xfs_refcount.h"
42#include "xfs_refcount_btree.h"
43#include "xfs_alloc_btree.h"
44#include "xfs_rtalloc.h"
45
46/* Convert an xfs_fsmap to an fsmap. */
47void
48xfs_fsmap_from_internal(
49 struct fsmap *dest,
50 struct xfs_fsmap *src)
51{
52 dest->fmr_device = src->fmr_device;
53 dest->fmr_flags = src->fmr_flags;
54 dest->fmr_physical = BBTOB(src->fmr_physical);
55 dest->fmr_owner = src->fmr_owner;
56 dest->fmr_offset = BBTOB(src->fmr_offset);
57 dest->fmr_length = BBTOB(src->fmr_length);
58 dest->fmr_reserved[0] = 0;
59 dest->fmr_reserved[1] = 0;
60 dest->fmr_reserved[2] = 0;
61}
62
63/* Convert an fsmap to an xfs_fsmap. */
64void
65xfs_fsmap_to_internal(
66 struct xfs_fsmap *dest,
67 struct fsmap *src)
68{
69 dest->fmr_device = src->fmr_device;
70 dest->fmr_flags = src->fmr_flags;
71 dest->fmr_physical = BTOBBT(src->fmr_physical);
72 dest->fmr_owner = src->fmr_owner;
73 dest->fmr_offset = BTOBBT(src->fmr_offset);
74 dest->fmr_length = BTOBBT(src->fmr_length);
75}
76
77/* Convert an fsmap owner into an rmapbt owner. */
78static int
79xfs_fsmap_owner_to_rmap(
80 struct xfs_rmap_irec *dest,
81 struct xfs_fsmap *src)
82{
83 if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
84 dest->rm_owner = src->fmr_owner;
85 return 0;
86 }
87
88 switch (src->fmr_owner) {
89 case 0: /* "lowest owner id possible" */
90 case -1ULL: /* "highest owner id possible" */
91 dest->rm_owner = 0;
92 break;
93 case XFS_FMR_OWN_FREE:
94 dest->rm_owner = XFS_RMAP_OWN_NULL;
95 break;
96 case XFS_FMR_OWN_UNKNOWN:
97 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
98 break;
99 case XFS_FMR_OWN_FS:
100 dest->rm_owner = XFS_RMAP_OWN_FS;
101 break;
102 case XFS_FMR_OWN_LOG:
103 dest->rm_owner = XFS_RMAP_OWN_LOG;
104 break;
105 case XFS_FMR_OWN_AG:
106 dest->rm_owner = XFS_RMAP_OWN_AG;
107 break;
108 case XFS_FMR_OWN_INOBT:
109 dest->rm_owner = XFS_RMAP_OWN_INOBT;
110 break;
111 case XFS_FMR_OWN_INODES:
112 dest->rm_owner = XFS_RMAP_OWN_INODES;
113 break;
114 case XFS_FMR_OWN_REFC:
115 dest->rm_owner = XFS_RMAP_OWN_REFC;
116 break;
117 case XFS_FMR_OWN_COW:
118 dest->rm_owner = XFS_RMAP_OWN_COW;
119 break;
120 case XFS_FMR_OWN_DEFECTIVE: /* not implemented */
121 /* fall through */
122 default:
123 return -EINVAL;
124 }
125 return 0;
126}
127
128/* Convert an rmapbt owner into an fsmap owner. */
129static int
130xfs_fsmap_owner_from_rmap(
131 struct xfs_fsmap *dest,
132 struct xfs_rmap_irec *src)
133{
134 dest->fmr_flags = 0;
135 if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) {
136 dest->fmr_owner = src->rm_owner;
137 return 0;
138 }
139 dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
140
141 switch (src->rm_owner) {
142 case XFS_RMAP_OWN_FS:
143 dest->fmr_owner = XFS_FMR_OWN_FS;
144 break;
145 case XFS_RMAP_OWN_LOG:
146 dest->fmr_owner = XFS_FMR_OWN_LOG;
147 break;
148 case XFS_RMAP_OWN_AG:
149 dest->fmr_owner = XFS_FMR_OWN_AG;
150 break;
151 case XFS_RMAP_OWN_INOBT:
152 dest->fmr_owner = XFS_FMR_OWN_INOBT;
153 break;
154 case XFS_RMAP_OWN_INODES:
155 dest->fmr_owner = XFS_FMR_OWN_INODES;
156 break;
157 case XFS_RMAP_OWN_REFC:
158 dest->fmr_owner = XFS_FMR_OWN_REFC;
159 break;
160 case XFS_RMAP_OWN_COW:
161 dest->fmr_owner = XFS_FMR_OWN_COW;
162 break;
163 case XFS_RMAP_OWN_NULL: /* "free" */
164 dest->fmr_owner = XFS_FMR_OWN_FREE;
165 break;
166 default:
167 return -EFSCORRUPTED;
168 }
169 return 0;
170}
171
172/* getfsmap query state */
173struct xfs_getfsmap_info {
174 struct xfs_fsmap_head *head;
175 xfs_fsmap_format_t formatter; /* formatting fn */
176 void *format_arg; /* format buffer */
177 struct xfs_buf *agf_bp; /* AGF, for refcount queries */
178 xfs_daddr_t next_daddr; /* next daddr we expect */
179 u64 missing_owner; /* owner of holes */
180 u32 dev; /* device id */
181 xfs_agnumber_t agno; /* AG number, if applicable */
182 struct xfs_rmap_irec low; /* low rmap key */
183 struct xfs_rmap_irec high; /* high rmap key */
184 bool last; /* last extent? */
185};
186
187/* Associate a device with a getfsmap handler. */
188struct xfs_getfsmap_dev {
189 u32 dev;
190 int (*fn)(struct xfs_trans *tp,
191 struct xfs_fsmap *keys,
192 struct xfs_getfsmap_info *info);
193};
194
195/* Compare two getfsmap device handlers. */
196static int
197xfs_getfsmap_dev_compare(
198 const void *p1,
199 const void *p2)
200{
201 const struct xfs_getfsmap_dev *d1 = p1;
202 const struct xfs_getfsmap_dev *d2 = p2;
203
204 return d1->dev - d2->dev;
205}
206
207/* Decide if this mapping is shared. */
208STATIC int
209xfs_getfsmap_is_shared(
210 struct xfs_trans *tp,
211 struct xfs_getfsmap_info *info,
212 struct xfs_rmap_irec *rec,
213 bool *stat)
214{
215 struct xfs_mount *mp = tp->t_mountp;
216 struct xfs_btree_cur *cur;
217 xfs_agblock_t fbno;
218 xfs_extlen_t flen;
219 int error;
220
221 *stat = false;
222 if (!xfs_sb_version_hasreflink(&mp->m_sb))
223 return 0;
224 /* rt files will have agno set to NULLAGNUMBER */
225 if (info->agno == NULLAGNUMBER)
226 return 0;
227
228 /* Are there any shared blocks here? */
229 flen = 0;
230 cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
231 info->agno, NULL);
232
233 error = xfs_refcount_find_shared(cur, rec->rm_startblock,
234 rec->rm_blockcount, &fbno, &flen, false);
235
236 xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
237 if (error)
238 return error;
239
240 *stat = flen > 0;
241 return 0;
242}
243
244/*
245 * Format a reverse mapping for getfsmap, having translated rm_startblock
246 * into the appropriate daddr units.
247 */
248STATIC int
249xfs_getfsmap_helper(
250 struct xfs_trans *tp,
251 struct xfs_getfsmap_info *info,
252 struct xfs_rmap_irec *rec,
253 xfs_daddr_t rec_daddr)
254{
255 struct xfs_fsmap fmr;
256 struct xfs_mount *mp = tp->t_mountp;
257 bool shared;
258 int error;
259
260 if (fatal_signal_pending(current))
261 return -EINTR;
262
263 /*
264 * Filter out records that start before our startpoint, if the
265 * caller requested that.
266 */
267 if (xfs_rmap_compare(rec, &info->low) < 0) {
268 rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
269 if (info->next_daddr < rec_daddr)
270 info->next_daddr = rec_daddr;
271 return XFS_BTREE_QUERY_RANGE_CONTINUE;
272 }
273
274 /* Are we just counting mappings? */
275 if (info->head->fmh_count == 0) {
276 if (rec_daddr > info->next_daddr)
277 info->head->fmh_entries++;
278
279 if (info->last)
280 return XFS_BTREE_QUERY_RANGE_CONTINUE;
281
282 info->head->fmh_entries++;
283
284 rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
285 if (info->next_daddr < rec_daddr)
286 info->next_daddr = rec_daddr;
287 return XFS_BTREE_QUERY_RANGE_CONTINUE;
288 }
289
290 /*
291 * If the record starts past the last physical block we saw,
292 * then we've found a gap. Report the gap as being owned by
293 * whatever the caller specified is the missing owner.
294 */
295 if (rec_daddr > info->next_daddr) {
296 if (info->head->fmh_entries >= info->head->fmh_count)
297 return XFS_BTREE_QUERY_RANGE_ABORT;
298
299 fmr.fmr_device = info->dev;
300 fmr.fmr_physical = info->next_daddr;
301 fmr.fmr_owner = info->missing_owner;
302 fmr.fmr_offset = 0;
303 fmr.fmr_length = rec_daddr - info->next_daddr;
304 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
305 error = info->formatter(&fmr, info->format_arg);
306 if (error)
307 return error;
308 info->head->fmh_entries++;
309 }
310
311 if (info->last)
312 goto out;
313
314 /* Fill out the extent we found */
315 if (info->head->fmh_entries >= info->head->fmh_count)
316 return XFS_BTREE_QUERY_RANGE_ABORT;
317
318 trace_xfs_fsmap_mapping(mp, info->dev, info->agno, rec);
319
320 fmr.fmr_device = info->dev;
321 fmr.fmr_physical = rec_daddr;
322 error = xfs_fsmap_owner_from_rmap(&fmr, rec);
323 if (error)
324 return error;
325 fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
326 fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
327 if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
328 fmr.fmr_flags |= FMR_OF_PREALLOC;
329 if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
330 fmr.fmr_flags |= FMR_OF_ATTR_FORK;
331 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
332 fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
333 if (fmr.fmr_flags == 0) {
334 error = xfs_getfsmap_is_shared(tp, info, rec, &shared);
335 if (error)
336 return error;
337 if (shared)
338 fmr.fmr_flags |= FMR_OF_SHARED;
339 }
340 error = info->formatter(&fmr, info->format_arg);
341 if (error)
342 return error;
343 info->head->fmh_entries++;
344
345out:
346 rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
347 if (info->next_daddr < rec_daddr)
348 info->next_daddr = rec_daddr;
349 return XFS_BTREE_QUERY_RANGE_CONTINUE;
350}
351
352/* Transform a rmapbt irec into a fsmap */
353STATIC int
354xfs_getfsmap_datadev_helper(
355 struct xfs_btree_cur *cur,
356 struct xfs_rmap_irec *rec,
357 void *priv)
358{
359 struct xfs_mount *mp = cur->bc_mp;
360 struct xfs_getfsmap_info *info = priv;
361 xfs_fsblock_t fsb;
362 xfs_daddr_t rec_daddr;
363
364 fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->rm_startblock);
365 rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
366
367 return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
368}
369
370/* Transform a rtbitmap "record" into a fsmap */
371STATIC int
372xfs_getfsmap_rtdev_rtbitmap_helper(
373 struct xfs_trans *tp,
374 struct xfs_rtalloc_rec *rec,
375 void *priv)
376{
377 struct xfs_mount *mp = tp->t_mountp;
378 struct xfs_getfsmap_info *info = priv;
379 struct xfs_rmap_irec irec;
380 xfs_daddr_t rec_daddr;
381
382 rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
383
384 irec.rm_startblock = rec->ar_startblock;
385 irec.rm_blockcount = rec->ar_blockcount;
386 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
387 irec.rm_offset = 0;
388 irec.rm_flags = 0;
389
390 return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
391}
392
393/* Transform a bnobt irec into a fsmap */
394STATIC int
395xfs_getfsmap_datadev_bnobt_helper(
396 struct xfs_btree_cur *cur,
397 struct xfs_alloc_rec_incore *rec,
398 void *priv)
399{
400 struct xfs_mount *mp = cur->bc_mp;
401 struct xfs_getfsmap_info *info = priv;
402 struct xfs_rmap_irec irec;
403 xfs_daddr_t rec_daddr;
404
405 rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno,
406 rec->ar_startblock);
407
408 irec.rm_startblock = rec->ar_startblock;
409 irec.rm_blockcount = rec->ar_blockcount;
410 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
411 irec.rm_offset = 0;
412 irec.rm_flags = 0;
413
414 return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
415}
416
417/* Set rmap flags based on the getfsmap flags */
418static void
419xfs_getfsmap_set_irec_flags(
420 struct xfs_rmap_irec *irec,
421 struct xfs_fsmap *fmr)
422{
423 irec->rm_flags = 0;
424 if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
425 irec->rm_flags |= XFS_RMAP_ATTR_FORK;
426 if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
427 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
428 if (fmr->fmr_flags & FMR_OF_PREALLOC)
429 irec->rm_flags |= XFS_RMAP_UNWRITTEN;
430}
431
432/* Execute a getfsmap query against the log device. */
433STATIC int
434xfs_getfsmap_logdev(
435 struct xfs_trans *tp,
436 struct xfs_fsmap *keys,
437 struct xfs_getfsmap_info *info)
438{
439 struct xfs_mount *mp = tp->t_mountp;
440 struct xfs_rmap_irec rmap;
441 int error;
442
443 /* Set up search keys */
444 info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
445 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
446 error = xfs_fsmap_owner_to_rmap(&info->low, keys);
447 if (error)
448 return error;
449 info->low.rm_blockcount = 0;
450 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
451
452 error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1);
453 if (error)
454 return error;
455 info->high.rm_startblock = -1U;
456 info->high.rm_owner = ULLONG_MAX;
457 info->high.rm_offset = ULLONG_MAX;
458 info->high.rm_blockcount = 0;
459 info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
460 info->missing_owner = XFS_FMR_OWN_FREE;
461
462 trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low);
463 trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high);
464
465 if (keys[0].fmr_physical > 0)
466 return 0;
467
468 /* Fabricate an rmap entry for the external log device. */
469 rmap.rm_startblock = 0;
470 rmap.rm_blockcount = mp->m_sb.sb_logblocks;
471 rmap.rm_owner = XFS_RMAP_OWN_LOG;
472 rmap.rm_offset = 0;
473 rmap.rm_flags = 0;
474
475 return xfs_getfsmap_helper(tp, info, &rmap, 0);
476}
477
478/* Execute a getfsmap query against the realtime device. */
479STATIC int
480__xfs_getfsmap_rtdev(
481 struct xfs_trans *tp,
482 struct xfs_fsmap *keys,
483 int (*query_fn)(struct xfs_trans *,
484 struct xfs_getfsmap_info *),
485 struct xfs_getfsmap_info *info)
486{
487 struct xfs_mount *mp = tp->t_mountp;
488 xfs_fsblock_t start_fsb;
489 xfs_fsblock_t end_fsb;
490 xfs_daddr_t eofs;
491 int error = 0;
492
493 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
494 if (keys[0].fmr_physical >= eofs)
495 return 0;
496 if (keys[1].fmr_physical >= eofs)
497 keys[1].fmr_physical = eofs - 1;
498 start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical);
499 end_fsb = XFS_BB_TO_FSB(mp, keys[1].fmr_physical);
500
501 /* Set up search keys */
502 info->low.rm_startblock = start_fsb;
503 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
504 if (error)
505 return error;
506 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
507 info->low.rm_blockcount = 0;
508 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
509
510 info->high.rm_startblock = end_fsb;
511 error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
512 if (error)
513 return error;
514 info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
515 info->high.rm_blockcount = 0;
516 xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
517
518 trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low);
519 trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high);
520
521 return query_fn(tp, info);
522}
523
524/* Actually query the realtime bitmap. */
525STATIC int
526xfs_getfsmap_rtdev_rtbitmap_query(
527 struct xfs_trans *tp,
528 struct xfs_getfsmap_info *info)
529{
530 struct xfs_rtalloc_rec alow;
531 struct xfs_rtalloc_rec ahigh;
532 int error;
533
534 xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
535
536 alow.ar_startblock = info->low.rm_startblock;
537 ahigh.ar_startblock = info->high.rm_startblock;
538 error = xfs_rtalloc_query_range(tp, &alow, &ahigh,
539 xfs_getfsmap_rtdev_rtbitmap_helper, info);
540 if (error)
541 goto err;
542
543 /* Report any gaps at the end of the rtbitmap */
544 info->last = true;
545 error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, &ahigh, info);
546 if (error)
547 goto err;
548err:
549 xfs_iunlock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
550 return error;
551}
552
553/* Execute a getfsmap query against the realtime device rtbitmap. */
554STATIC int
555xfs_getfsmap_rtdev_rtbitmap(
556 struct xfs_trans *tp,
557 struct xfs_fsmap *keys,
558 struct xfs_getfsmap_info *info)
559{
560 info->missing_owner = XFS_FMR_OWN_UNKNOWN;
561 return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query,
562 info);
563}
564
565/* Execute a getfsmap query against the regular data device. */
566STATIC int
567__xfs_getfsmap_datadev(
568 struct xfs_trans *tp,
569 struct xfs_fsmap *keys,
570 struct xfs_getfsmap_info *info,
571 int (*query_fn)(struct xfs_trans *,
572 struct xfs_getfsmap_info *,
573 struct xfs_btree_cur **,
574 void *),
575 void *priv)
576{
577 struct xfs_mount *mp = tp->t_mountp;
578 struct xfs_btree_cur *bt_cur = NULL;
579 xfs_fsblock_t start_fsb;
580 xfs_fsblock_t end_fsb;
581 xfs_agnumber_t start_ag;
582 xfs_agnumber_t end_ag;
583 xfs_daddr_t eofs;
584 int error = 0;
585
586 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
587 if (keys[0].fmr_physical >= eofs)
588 return 0;
589 if (keys[1].fmr_physical >= eofs)
590 keys[1].fmr_physical = eofs - 1;
591 start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
592 end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical);
593
594 /*
595 * Convert the fsmap low/high keys to AG based keys. Initialize
596 * low to the fsmap low key and max out the high key to the end
597 * of the AG.
598 */
599 info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
600 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
601 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
602 if (error)
603 return error;
604 info->low.rm_blockcount = 0;
605 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
606
607 info->high.rm_startblock = -1U;
608 info->high.rm_owner = ULLONG_MAX;
609 info->high.rm_offset = ULLONG_MAX;
610 info->high.rm_blockcount = 0;
611 info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
612
613 start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
614 end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
615
616 /* Query each AG */
617 for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
618 /*
619 * Set the AG high key from the fsmap high key if this
620 * is the last AG that we're querying.
621 */
622 if (info->agno == end_ag) {
623 info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
624 end_fsb);
625 info->high.rm_offset = XFS_BB_TO_FSBT(mp,
626 keys[1].fmr_offset);
627 error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
628 if (error)
629 goto err;
630 xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
631 }
632
633 if (bt_cur) {
634 xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
635 bt_cur = NULL;
636 xfs_trans_brelse(tp, info->agf_bp);
637 info->agf_bp = NULL;
638 }
639
640 error = xfs_alloc_read_agf(mp, tp, info->agno, 0,
641 &info->agf_bp);
642 if (error)
643 goto err;
644
645 trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low);
646 trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
647 &info->high);
648
649 error = query_fn(tp, info, &bt_cur, priv);
650 if (error)
651 goto err;
652
653 /*
654 * Set the AG low key to the start of the AG prior to
655 * moving on to the next AG.
656 */
657 if (info->agno == start_ag) {
658 info->low.rm_startblock = 0;
659 info->low.rm_owner = 0;
660 info->low.rm_offset = 0;
661 info->low.rm_flags = 0;
662 }
663 }
664
665 /* Report any gap at the end of the AG */
666 info->last = true;
667 error = query_fn(tp, info, &bt_cur, priv);
668 if (error)
669 goto err;
670
671err:
672 if (bt_cur)
673 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
674 XFS_BTREE_NOERROR);
675 if (info->agf_bp) {
676 xfs_trans_brelse(tp, info->agf_bp);
677 info->agf_bp = NULL;
678 }
679
680 return error;
681}
682
683/* Actually query the rmap btree. */
684STATIC int
685xfs_getfsmap_datadev_rmapbt_query(
686 struct xfs_trans *tp,
687 struct xfs_getfsmap_info *info,
688 struct xfs_btree_cur **curpp,
689 void *priv)
690{
691 /* Report any gap at the end of the last AG. */
692 if (info->last)
693 return xfs_getfsmap_datadev_helper(*curpp, &info->high, info);
694
695 /* Allocate cursor for this AG and query_range it. */
696 *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
697 info->agno);
698 return xfs_rmap_query_range(*curpp, &info->low, &info->high,
699 xfs_getfsmap_datadev_helper, info);
700}
701
702/* Execute a getfsmap query against the regular data device rmapbt. */
703STATIC int
704xfs_getfsmap_datadev_rmapbt(
705 struct xfs_trans *tp,
706 struct xfs_fsmap *keys,
707 struct xfs_getfsmap_info *info)
708{
709 info->missing_owner = XFS_FMR_OWN_FREE;
710 return __xfs_getfsmap_datadev(tp, keys, info,
711 xfs_getfsmap_datadev_rmapbt_query, NULL);
712}
713
714/* Actually query the bno btree. */
715STATIC int
716xfs_getfsmap_datadev_bnobt_query(
717 struct xfs_trans *tp,
718 struct xfs_getfsmap_info *info,
719 struct xfs_btree_cur **curpp,
720 void *priv)
721{
722 struct xfs_alloc_rec_incore *key = priv;
723
724 /* Report any gap at the end of the last AG. */
725 if (info->last)
726 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
727
728 /* Allocate cursor for this AG and query_range it. */
729 *curpp = xfs_allocbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
730 info->agno, XFS_BTNUM_BNO);
731 key->ar_startblock = info->low.rm_startblock;
732 key[1].ar_startblock = info->high.rm_startblock;
733 return xfs_alloc_query_range(*curpp, key, &key[1],
734 xfs_getfsmap_datadev_bnobt_helper, info);
735}
736
737/* Execute a getfsmap query against the regular data device's bnobt. */
738STATIC int
739xfs_getfsmap_datadev_bnobt(
740 struct xfs_trans *tp,
741 struct xfs_fsmap *keys,
742 struct xfs_getfsmap_info *info)
743{
744 struct xfs_alloc_rec_incore akeys[2];
745
746 info->missing_owner = XFS_FMR_OWN_UNKNOWN;
747 return __xfs_getfsmap_datadev(tp, keys, info,
748 xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
749}
750
751/* Do we recognize the device? */
752STATIC bool
753xfs_getfsmap_is_valid_device(
754 struct xfs_mount *mp,
755 struct xfs_fsmap *fm)
756{
757 if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
758 fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
759 return true;
760 if (mp->m_logdev_targp &&
761 fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
762 return true;
763 if (mp->m_rtdev_targp &&
764 fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
765 return true;
766 return false;
767}
768
769/* Ensure that the low key is less than the high key. */
770STATIC bool
771xfs_getfsmap_check_keys(
772 struct xfs_fsmap *low_key,
773 struct xfs_fsmap *high_key)
774{
775 if (low_key->fmr_device > high_key->fmr_device)
776 return false;
777 if (low_key->fmr_device < high_key->fmr_device)
778 return true;
779
780 if (low_key->fmr_physical > high_key->fmr_physical)
781 return false;
782 if (low_key->fmr_physical < high_key->fmr_physical)
783 return true;
784
785 if (low_key->fmr_owner > high_key->fmr_owner)
786 return false;
787 if (low_key->fmr_owner < high_key->fmr_owner)
788 return true;
789
790 if (low_key->fmr_offset > high_key->fmr_offset)
791 return false;
792 if (low_key->fmr_offset < high_key->fmr_offset)
793 return true;
794
795 return false;
796}
797
798#define XFS_GETFSMAP_DEVS 3
799/*
800 * Get filesystem's extents as described in head, and format for
801 * output. Calls formatter to fill the user's buffer until all
802 * extents are mapped, until the passed-in head->fmh_count slots have
803 * been filled, or until the formatter short-circuits the loop, if it
804 * is tracking filled-in extents on its own.
805 *
806 * Key to Confusion
807 * ----------------
808 * There are multiple levels of keys and counters at work here:
809 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
810 * these reflect fs-wide sector addrs.
811 * dkeys -- fmh_keys used to query each device;
812 * these are fmh_keys but w/ the low key
813 * bumped up by fmr_length.
814 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
815 * is how we detect gaps in the fsmap
816 records and report them.
817 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
818 * dkeys; used to query the metadata.
819 */
820int
821xfs_getfsmap(
822 struct xfs_mount *mp,
823 struct xfs_fsmap_head *head,
824 xfs_fsmap_format_t formatter,
825 void *arg)
826{
827 struct xfs_trans *tp = NULL;
828 struct xfs_fsmap dkeys[2]; /* per-dev keys */
829 struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
830 struct xfs_getfsmap_info info = { NULL };
831 int i;
832 int error = 0;
833
834 if (head->fmh_iflags & ~FMH_IF_VALID)
835 return -EINVAL;
836 if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
837 !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
838 return -EINVAL;
839
840 head->fmh_entries = 0;
841
842 /* Set up our device handlers. */
843 memset(handlers, 0, sizeof(handlers));
844 handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
845 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
846 handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
847 else
848 handlers[0].fn = xfs_getfsmap_datadev_bnobt;
849 if (mp->m_logdev_targp != mp->m_ddev_targp) {
850 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
851 handlers[1].fn = xfs_getfsmap_logdev;
852 }
853 if (mp->m_rtdev_targp) {
854 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
855 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
856 }
857
858 xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
859 xfs_getfsmap_dev_compare);
860
861 /*
862 * To continue where we left off, we allow userspace to use the
863 * last mapping from a previous call as the low key of the next.
864 * This is identified by a non-zero length in the low key. We
865 * have to increment the low key in this scenario to ensure we
866 * don't return the same mapping again, and instead return the
867 * very next mapping.
868 *
869 * If the low key mapping refers to file data, the same physical
870 * blocks could be mapped to several other files/offsets.
871 * According to rmapbt record ordering, the minimal next
872 * possible record for the block range is the next starting
873 * offset in the same inode. Therefore, bump the file offset to
874 * continue the search appropriately. For all other low key
875 * mapping types (attr blocks, metadata), bump the physical
876 * offset as there can be no other mapping for the same physical
877 * block range.
878 */
879 dkeys[0] = head->fmh_keys[0];
880 if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
881 dkeys[0].fmr_physical += dkeys[0].fmr_length;
882 dkeys[0].fmr_owner = 0;
883 if (dkeys[0].fmr_offset)
884 return -EINVAL;
885 } else
886 dkeys[0].fmr_offset += dkeys[0].fmr_length;
887 dkeys[0].fmr_length = 0;
888 memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
889
890 if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
891 return -EINVAL;
892
893 info.next_daddr = head->fmh_keys[0].fmr_physical +
894 head->fmh_keys[0].fmr_length;
895 info.formatter = formatter;
896 info.format_arg = arg;
897 info.head = head;
898
899 /* For each device we support... */
900 for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
901 /* Is this device within the range the user asked for? */
902 if (!handlers[i].fn)
903 continue;
904 if (head->fmh_keys[0].fmr_device > handlers[i].dev)
905 continue;
906 if (head->fmh_keys[1].fmr_device < handlers[i].dev)
907 break;
908
909 /*
910 * If this device number matches the high key, we have
911 * to pass the high key to the handler to limit the
912 * query results. If the device number exceeds the
913 * low key, zero out the low key so that we get
914 * everything from the beginning.
915 */
916 if (handlers[i].dev == head->fmh_keys[1].fmr_device)
917 dkeys[1] = head->fmh_keys[1];
918 if (handlers[i].dev > head->fmh_keys[0].fmr_device)
919 memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
920
921 error = xfs_trans_alloc_empty(mp, &tp);
922 if (error)
923 break;
924
925 info.dev = handlers[i].dev;
926 info.last = false;
927 info.agno = NULLAGNUMBER;
928 error = handlers[i].fn(tp, dkeys, &info);
929 if (error)
930 break;
931 xfs_trans_cancel(tp);
932 tp = NULL;
933 info.next_daddr = 0;
934 }
935
936 if (tp)
937 xfs_trans_cancel(tp);
938 head->fmh_oflags = FMH_OF_DEV_T;
939 return error;
940}
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
new file mode 100644
index 000000000000..0b9bf822595c
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.h
@@ -0,0 +1,53 @@
1/*
2 * Copyright (C) 2017 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_FSMAP_H__
21#define __XFS_FSMAP_H__
22
23struct fsmap;
24
25/* internal fsmap representation */
26struct xfs_fsmap {
27 dev_t fmr_device; /* device id */
28 uint32_t fmr_flags; /* mapping flags */
29 uint64_t fmr_physical; /* device offset of segment */
30 uint64_t fmr_owner; /* owner id */
31 xfs_fileoff_t fmr_offset; /* file offset of segment */
32 xfs_filblks_t fmr_length; /* length of segment, blocks */
33};
34
35struct xfs_fsmap_head {
36 uint32_t fmh_iflags; /* control flags */
37 uint32_t fmh_oflags; /* output flags */
38 unsigned int fmh_count; /* # of entries in array incl. input */
39 unsigned int fmh_entries; /* # of entries filled in (output). */
40
41 struct xfs_fsmap fmh_keys[2]; /* low and high keys */
42};
43
44void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
45void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
46
47/* fsmap to userspace formatter - copy to user & advance pointer */
48typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
49
50int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
51 xfs_fsmap_format_t formatter, void *arg);
52
53#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 3531f8f72fa5..f61c84f8e31a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -262,6 +262,22 @@ xfs_inode_clear_reclaim_tag(
262 xfs_perag_clear_reclaim_tag(pag); 262 xfs_perag_clear_reclaim_tag(pag);
263} 263}
264 264
265static void
266xfs_inew_wait(
267 struct xfs_inode *ip)
268{
269 wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT);
270 DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
271
272 do {
273 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
274 if (!xfs_iflags_test(ip, XFS_INEW))
275 break;
276 schedule();
277 } while (true);
278 finish_wait(wq, &wait.wait);
279}
280
265/* 281/*
266 * When we recycle a reclaimable inode, we need to re-initialise the VFS inode 282 * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
267 * part of the structure. This is made more complex by the fact we store 283 * part of the structure. This is made more complex by the fact we store
@@ -366,14 +382,17 @@ xfs_iget_cache_hit(
366 382
367 error = xfs_reinit_inode(mp, inode); 383 error = xfs_reinit_inode(mp, inode);
368 if (error) { 384 if (error) {
385 bool wake;
369 /* 386 /*
370 * Re-initializing the inode failed, and we are in deep 387 * Re-initializing the inode failed, and we are in deep
371 * trouble. Try to re-add it to the reclaim list. 388 * trouble. Try to re-add it to the reclaim list.
372 */ 389 */
373 rcu_read_lock(); 390 rcu_read_lock();
374 spin_lock(&ip->i_flags_lock); 391 spin_lock(&ip->i_flags_lock);
375 392 wake = !!__xfs_iflags_test(ip, XFS_INEW);
376 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); 393 ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
394 if (wake)
395 wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
377 ASSERT(ip->i_flags & XFS_IRECLAIMABLE); 396 ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
378 trace_xfs_iget_reclaim_fail(ip); 397 trace_xfs_iget_reclaim_fail(ip);
379 goto out_error; 398 goto out_error;
@@ -623,9 +642,11 @@ out_error_or_again:
623 642
624STATIC int 643STATIC int
625xfs_inode_ag_walk_grab( 644xfs_inode_ag_walk_grab(
626 struct xfs_inode *ip) 645 struct xfs_inode *ip,
646 int flags)
627{ 647{
628 struct inode *inode = VFS_I(ip); 648 struct inode *inode = VFS_I(ip);
649 bool newinos = !!(flags & XFS_AGITER_INEW_WAIT);
629 650
630 ASSERT(rcu_read_lock_held()); 651 ASSERT(rcu_read_lock_held());
631 652
@@ -643,7 +664,8 @@ xfs_inode_ag_walk_grab(
643 goto out_unlock_noent; 664 goto out_unlock_noent;
644 665
645 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ 666 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
646 if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) 667 if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) ||
668 __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM))
647 goto out_unlock_noent; 669 goto out_unlock_noent;
648 spin_unlock(&ip->i_flags_lock); 670 spin_unlock(&ip->i_flags_lock);
649 671
@@ -671,7 +693,8 @@ xfs_inode_ag_walk(
671 void *args), 693 void *args),
672 int flags, 694 int flags,
673 void *args, 695 void *args,
674 int tag) 696 int tag,
697 int iter_flags)
675{ 698{
676 uint32_t first_index; 699 uint32_t first_index;
677 int last_error = 0; 700 int last_error = 0;
@@ -713,7 +736,7 @@ restart:
713 for (i = 0; i < nr_found; i++) { 736 for (i = 0; i < nr_found; i++) {
714 struct xfs_inode *ip = batch[i]; 737 struct xfs_inode *ip = batch[i];
715 738
716 if (done || xfs_inode_ag_walk_grab(ip)) 739 if (done || xfs_inode_ag_walk_grab(ip, iter_flags))
717 batch[i] = NULL; 740 batch[i] = NULL;
718 741
719 /* 742 /*
@@ -741,6 +764,9 @@ restart:
741 for (i = 0; i < nr_found; i++) { 764 for (i = 0; i < nr_found; i++) {
742 if (!batch[i]) 765 if (!batch[i])
743 continue; 766 continue;
767 if ((iter_flags & XFS_AGITER_INEW_WAIT) &&
768 xfs_iflags_test(batch[i], XFS_INEW))
769 xfs_inew_wait(batch[i]);
744 error = execute(batch[i], flags, args); 770 error = execute(batch[i], flags, args);
745 IRELE(batch[i]); 771 IRELE(batch[i]);
746 if (error == -EAGAIN) { 772 if (error == -EAGAIN) {
@@ -820,12 +846,13 @@ xfs_cowblocks_worker(
820} 846}
821 847
822int 848int
823xfs_inode_ag_iterator( 849xfs_inode_ag_iterator_flags(
824 struct xfs_mount *mp, 850 struct xfs_mount *mp,
825 int (*execute)(struct xfs_inode *ip, int flags, 851 int (*execute)(struct xfs_inode *ip, int flags,
826 void *args), 852 void *args),
827 int flags, 853 int flags,
828 void *args) 854 void *args,
855 int iter_flags)
829{ 856{
830 struct xfs_perag *pag; 857 struct xfs_perag *pag;
831 int error = 0; 858 int error = 0;
@@ -835,7 +862,8 @@ xfs_inode_ag_iterator(
835 ag = 0; 862 ag = 0;
836 while ((pag = xfs_perag_get(mp, ag))) { 863 while ((pag = xfs_perag_get(mp, ag))) {
837 ag = pag->pag_agno + 1; 864 ag = pag->pag_agno + 1;
838 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); 865 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1,
866 iter_flags);
839 xfs_perag_put(pag); 867 xfs_perag_put(pag);
840 if (error) { 868 if (error) {
841 last_error = error; 869 last_error = error;
@@ -847,6 +875,17 @@ xfs_inode_ag_iterator(
847} 875}
848 876
849int 877int
878xfs_inode_ag_iterator(
879 struct xfs_mount *mp,
880 int (*execute)(struct xfs_inode *ip, int flags,
881 void *args),
882 int flags,
883 void *args)
884{
885 return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0);
886}
887
888int
850xfs_inode_ag_iterator_tag( 889xfs_inode_ag_iterator_tag(
851 struct xfs_mount *mp, 890 struct xfs_mount *mp,
852 int (*execute)(struct xfs_inode *ip, int flags, 891 int (*execute)(struct xfs_inode *ip, int flags,
@@ -863,7 +902,8 @@ xfs_inode_ag_iterator_tag(
863 ag = 0; 902 ag = 0;
864 while ((pag = xfs_perag_get_tag(mp, ag, tag))) { 903 while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
865 ag = pag->pag_agno + 1; 904 ag = pag->pag_agno + 1;
866 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); 905 error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag,
906 0);
867 xfs_perag_put(pag); 907 xfs_perag_put(pag);
868 if (error) { 908 if (error) {
869 last_error = error; 909 last_error = error;
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 8a7c849b4dea..9183f77958ef 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -48,6 +48,11 @@ struct xfs_eofblocks {
48#define XFS_IGET_UNTRUSTED 0x2 48#define XFS_IGET_UNTRUSTED 0x2
49#define XFS_IGET_DONTCACHE 0x4 49#define XFS_IGET_DONTCACHE 0x4
50 50
51/*
52 * flags for AG inode iterator
53 */
54#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */
55
51int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, 56int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
52 uint flags, uint lock_flags, xfs_inode_t **ipp); 57 uint flags, uint lock_flags, xfs_inode_t **ipp);
53 58
@@ -79,6 +84,9 @@ void xfs_cowblocks_worker(struct work_struct *);
79int xfs_inode_ag_iterator(struct xfs_mount *mp, 84int xfs_inode_ag_iterator(struct xfs_mount *mp,
80 int (*execute)(struct xfs_inode *ip, int flags, void *args), 85 int (*execute)(struct xfs_inode *ip, int flags, void *args),
81 int flags, void *args); 86 int flags, void *args);
87int xfs_inode_ag_iterator_flags(struct xfs_mount *mp,
88 int (*execute)(struct xfs_inode *ip, int flags, void *args),
89 int flags, void *args, int iter_flags);
82int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, 90int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
83 int (*execute)(struct xfs_inode *ip, int flags, void *args), 91 int (*execute)(struct xfs_inode *ip, int flags, void *args),
84 int flags, void *args, int tag); 92 int flags, void *args, int tag);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7605d8396596..ec9826c56500 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1906,12 +1906,13 @@ xfs_inactive(
1906 * force is true because we are evicting an inode from the 1906 * force is true because we are evicting an inode from the
1907 * cache. Post-eof blocks must be freed, lest we end up with 1907 * cache. Post-eof blocks must be freed, lest we end up with
1908 * broken free space accounting. 1908 * broken free space accounting.
1909 *
1910 * Note: don't bother with iolock here since lockdep complains
1911 * about acquiring it in reclaim context. We have the only
1912 * reference to the inode at this point anyways.
1909 */ 1913 */
1910 if (xfs_can_free_eofblocks(ip, true)) { 1914 if (xfs_can_free_eofblocks(ip, true))
1911 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1912 xfs_free_eofblocks(ip); 1915 xfs_free_eofblocks(ip);
1913 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1914 }
1915 1916
1916 return; 1917 return;
1917 } 1918 }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 10dcf27b4c85..10e89fcb49d7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -216,7 +216,8 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
216#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ 216#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
217#define XFS_ISTALE (1 << 1) /* inode has been staled */ 217#define XFS_ISTALE (1 << 1) /* inode has been staled */
218#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ 218#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
219#define XFS_INEW (1 << 3) /* inode has just been allocated */ 219#define __XFS_INEW_BIT 3 /* inode has just been allocated */
220#define XFS_INEW (1 << __XFS_INEW_BIT)
220#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ 221#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
221#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ 222#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
222#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ 223#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
@@ -464,6 +465,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
464 xfs_iflags_clear(ip, XFS_INEW); 465 xfs_iflags_clear(ip, XFS_INEW);
465 barrier(); 466 barrier();
466 unlock_new_inode(VFS_I(ip)); 467 unlock_new_inode(VFS_I(ip));
468 wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
467} 469}
468 470
469static inline void xfs_setup_existing_inode(struct xfs_inode *ip) 471static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d90e7811ccdd..08cb7d1a4a3a 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -731,22 +731,27 @@ xfs_iflush_done(
731 * holding the lock before removing the inode from the AIL. 731 * holding the lock before removing the inode from the AIL.
732 */ 732 */
733 if (need_ail) { 733 if (need_ail) {
734 struct xfs_log_item *log_items[need_ail]; 734 bool mlip_changed = false;
735 int i = 0; 735
736 /* this is an opencoded batch version of xfs_trans_ail_delete */
736 spin_lock(&ailp->xa_lock); 737 spin_lock(&ailp->xa_lock);
737 for (blip = lip; blip; blip = blip->li_bio_list) { 738 for (blip = lip; blip; blip = blip->li_bio_list) {
738 iip = INODE_ITEM(blip); 739 if (INODE_ITEM(blip)->ili_logged &&
739 if (iip->ili_logged && 740 blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
740 blip->li_lsn == iip->ili_flush_lsn) { 741 mlip_changed |= xfs_ail_delete_one(ailp, blip);
741 log_items[i++] = blip;
742 }
743 ASSERT(i <= need_ail);
744 } 742 }
745 /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
746 xfs_trans_ail_delete_bulk(ailp, log_items, i,
747 SHUTDOWN_CORRUPT_INCORE);
748 }
749 743
744 if (mlip_changed) {
745 if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
746 xlog_assign_tail_lsn_locked(ailp->xa_mount);
747 if (list_empty(&ailp->xa_ail))
748 wake_up_all(&ailp->xa_empty);
749 }
750 spin_unlock(&ailp->xa_lock);
751
752 if (mlip_changed)
753 xfs_log_space_wake(ailp->xa_mount);
754 }
750 755
751 /* 756 /*
752 * clean up and unlock the flush lock now we are done. We can clear the 757 * clean up and unlock the flush lock now we are done. We can clear the
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 2fd7fdf5438f..6190697603c9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -41,6 +41,9 @@
41#include "xfs_trans.h" 41#include "xfs_trans.h"
42#include "xfs_pnfs.h" 42#include "xfs_pnfs.h"
43#include "xfs_acl.h" 43#include "xfs_acl.h"
44#include "xfs_btree.h"
45#include <linux/fsmap.h>
46#include "xfs_fsmap.h"
44 47
45#include <linux/capability.h> 48#include <linux/capability.h>
46#include <linux/cred.h> 49#include <linux/cred.h>
@@ -1543,10 +1546,11 @@ xfs_ioc_getbmap(
1543 unsigned int cmd, 1546 unsigned int cmd,
1544 void __user *arg) 1547 void __user *arg)
1545{ 1548{
1546 struct getbmapx bmx; 1549 struct getbmapx bmx = { 0 };
1547 int error; 1550 int error;
1548 1551
1549 if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) 1552 /* struct getbmap is a strict subset of struct getbmapx. */
1553 if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
1550 return -EFAULT; 1554 return -EFAULT;
1551 1555
1552 if (bmx.bmv_count < 2) 1556 if (bmx.bmv_count < 2)
@@ -1608,6 +1612,84 @@ xfs_ioc_getbmapx(
1608 return 0; 1612 return 0;
1609} 1613}
1610 1614
1615struct getfsmap_info {
1616 struct xfs_mount *mp;
1617 struct fsmap_head __user *data;
1618 unsigned int idx;
1619 __u32 last_flags;
1620};
1621
1622STATIC int
1623xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
1624{
1625 struct getfsmap_info *info = priv;
1626 struct fsmap fm;
1627
1628 trace_xfs_getfsmap_mapping(info->mp, xfm);
1629
1630 info->last_flags = xfm->fmr_flags;
1631 xfs_fsmap_from_internal(&fm, xfm);
1632 if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
1633 sizeof(struct fsmap)))
1634 return -EFAULT;
1635
1636 return 0;
1637}
1638
1639STATIC int
1640xfs_ioc_getfsmap(
1641 struct xfs_inode *ip,
1642 struct fsmap_head __user *arg)
1643{
1644 struct getfsmap_info info = { NULL };
1645 struct xfs_fsmap_head xhead = {0};
1646 struct fsmap_head head;
1647 bool aborted = false;
1648 int error;
1649
1650 if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
1651 return -EFAULT;
1652 if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) ||
1653 memchr_inv(head.fmh_keys[0].fmr_reserved, 0,
1654 sizeof(head.fmh_keys[0].fmr_reserved)) ||
1655 memchr_inv(head.fmh_keys[1].fmr_reserved, 0,
1656 sizeof(head.fmh_keys[1].fmr_reserved)))
1657 return -EINVAL;
1658
1659 xhead.fmh_iflags = head.fmh_iflags;
1660 xhead.fmh_count = head.fmh_count;
1661 xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
1662 xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
1663
1664 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
1665 trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
1666
1667 info.mp = ip->i_mount;
1668 info.data = arg;
1669 error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
1670 if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
1671 error = 0;
1672 aborted = true;
1673 } else if (error)
1674 return error;
1675
1676 /* If we didn't abort, set the "last" flag in the last fmx */
1677 if (!aborted && info.idx) {
1678 info.last_flags |= FMR_OF_LAST;
1679 if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
1680 &info.last_flags, sizeof(info.last_flags)))
1681 return -EFAULT;
1682 }
1683
1684 /* copy back header */
1685 head.fmh_entries = xhead.fmh_entries;
1686 head.fmh_oflags = xhead.fmh_oflags;
1687 if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
1688 return -EFAULT;
1689
1690 return 0;
1691}
1692
1611int 1693int
1612xfs_ioc_swapext( 1694xfs_ioc_swapext(
1613 xfs_swapext_t *sxp) 1695 xfs_swapext_t *sxp)
@@ -1788,6 +1870,9 @@ xfs_file_ioctl(
1788 case XFS_IOC_GETBMAPX: 1870 case XFS_IOC_GETBMAPX:
1789 return xfs_ioc_getbmapx(ip, arg); 1871 return xfs_ioc_getbmapx(ip, arg);
1790 1872
1873 case FS_IOC_GETFSMAP:
1874 return xfs_ioc_getfsmap(ip, arg);
1875
1791 case XFS_IOC_FD_TO_HANDLE: 1876 case XFS_IOC_FD_TO_HANDLE:
1792 case XFS_IOC_PATH_TO_HANDLE: 1877 case XFS_IOC_PATH_TO_HANDLE:
1793 case XFS_IOC_PATH_TO_FSHANDLE: { 1878 case XFS_IOC_PATH_TO_FSHANDLE: {
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 7c49938c5aed..fa0bc4d46065 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -20,6 +20,7 @@
20#include <linux/mount.h> 20#include <linux/mount.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/uaccess.h> 22#include <linux/uaccess.h>
23#include <linux/fsmap.h>
23#include "xfs.h" 24#include "xfs.h"
24#include "xfs_fs.h" 25#include "xfs_fs.h"
25#include "xfs_format.h" 26#include "xfs_format.h"
@@ -554,6 +555,7 @@ xfs_file_compat_ioctl(
554 case XFS_IOC_GOINGDOWN: 555 case XFS_IOC_GOINGDOWN:
555 case XFS_IOC_ERROR_INJECTION: 556 case XFS_IOC_ERROR_INJECTION:
556 case XFS_IOC_ERROR_CLEARALL: 557 case XFS_IOC_ERROR_CLEARALL:
558 case FS_IOC_GETFSMAP:
557 return xfs_file_ioctl(filp, cmd, p); 559 return xfs_file_ioctl(filp, cmd, p);
558#ifndef BROKEN_X86_ALIGNMENT 560#ifndef BROKEN_X86_ALIGNMENT
559 /* These are handled fine if no alignment issues */ 561 /* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 4b47403f8089..a63f61c256bd 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -240,7 +240,7 @@ xfs_iomap_write_direct(
240 */ 240 */
241 if (IS_DAX(VFS_I(ip))) { 241 if (IS_DAX(VFS_I(ip))) {
242 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 242 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
243 if (ISUNWRITTEN(imap)) { 243 if (imap->br_state == XFS_EXT_UNWRITTEN) {
244 tflags |= XFS_TRANS_RESERVE; 244 tflags |= XFS_TRANS_RESERVE;
245 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 245 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
246 } 246 }
@@ -945,7 +945,7 @@ static inline bool imap_needs_alloc(struct inode *inode,
945 return !nimaps || 945 return !nimaps ||
946 imap->br_startblock == HOLESTARTBLOCK || 946 imap->br_startblock == HOLESTARTBLOCK ||
947 imap->br_startblock == DELAYSTARTBLOCK || 947 imap->br_startblock == DELAYSTARTBLOCK ||
948 (IS_DAX(inode) && ISUNWRITTEN(imap)); 948 (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
949} 949}
950 950
951static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags) 951static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
@@ -1180,10 +1180,10 @@ xfs_xattr_iomap_begin(
1180 if (XFS_FORCED_SHUTDOWN(mp)) 1180 if (XFS_FORCED_SHUTDOWN(mp))
1181 return -EIO; 1181 return -EIO;
1182 1182
1183 lockmode = xfs_ilock_data_map_shared(ip); 1183 lockmode = xfs_ilock_attr_map_shared(ip);
1184 1184
1185 /* if there are no attribute fork or extents, return ENOENT */ 1185 /* if there are no attribute fork or extents, return ENOENT */
1186 if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { 1186 if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
1187 error = -ENOENT; 1187 error = -ENOENT;
1188 goto out_unlock; 1188 goto out_unlock;
1189 } 1189 }
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 592fdf7111cb..044fb0e15390 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -212,88 +212,6 @@ static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
212#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) 212#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
213#define xfs_stack_trace() dump_stack() 213#define xfs_stack_trace() dump_stack()
214 214
215
216/* Move the kernel do_div definition off to one side */
217
218#if defined __i386__
219/* For ia32 we need to pull some tricks to get past various versions
220 * of the compiler which do not like us using do_div in the middle
221 * of large functions.
222 */
223static inline __u32 xfs_do_div(void *a, __u32 b, int n)
224{
225 __u32 mod;
226
227 switch (n) {
228 case 4:
229 mod = *(__u32 *)a % b;
230 *(__u32 *)a = *(__u32 *)a / b;
231 return mod;
232 case 8:
233 {
234 unsigned long __upper, __low, __high, __mod;
235 __u64 c = *(__u64 *)a;
236 __upper = __high = c >> 32;
237 __low = c;
238 if (__high) {
239 __upper = __high % (b);
240 __high = __high / (b);
241 }
242 asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
243 asm("":"=A" (c):"a" (__low),"d" (__high));
244 *(__u64 *)a = c;
245 return __mod;
246 }
247 }
248
249 /* NOTREACHED */
250 return 0;
251}
252
253/* Side effect free 64 bit mod operation */
254static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
255{
256 switch (n) {
257 case 4:
258 return *(__u32 *)a % b;
259 case 8:
260 {
261 unsigned long __upper, __low, __high, __mod;
262 __u64 c = *(__u64 *)a;
263 __upper = __high = c >> 32;
264 __low = c;
265 if (__high) {
266 __upper = __high % (b);
267 __high = __high / (b);
268 }
269 asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
270 asm("":"=A" (c):"a" (__low),"d" (__high));
271 return __mod;
272 }
273 }
274
275 /* NOTREACHED */
276 return 0;
277}
278#else
279static inline __u32 xfs_do_div(void *a, __u32 b, int n)
280{
281 __u32 mod;
282
283 switch (n) {
284 case 4:
285 mod = *(__u32 *)a % b;
286 *(__u32 *)a = *(__u32 *)a / b;
287 return mod;
288 case 8:
289 mod = do_div(*(__u64 *)a, b);
290 return mod;
291 }
292
293 /* NOTREACHED */
294 return 0;
295}
296
297/* Side effect free 64 bit mod operation */ 215/* Side effect free 64 bit mod operation */
298static inline __u32 xfs_do_mod(void *a, __u32 b, int n) 216static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
299{ 217{
@@ -310,10 +228,7 @@ static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
310 /* NOTREACHED */ 228 /* NOTREACHED */
311 return 0; 229 return 0;
312} 230}
313#endif
314 231
315#undef do_div
316#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a))
317#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) 232#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a))
318 233
319static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) 234static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b1469f0a91a6..3731f13f63e9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1293,7 +1293,7 @@ void
1293xfs_log_work_queue( 1293xfs_log_work_queue(
1294 struct xfs_mount *mp) 1294 struct xfs_mount *mp)
1295{ 1295{
1296 queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work, 1296 queue_delayed_work(mp->m_sync_workqueue, &mp->m_log->l_work,
1297 msecs_to_jiffies(xfs_syncd_centisecs * 10)); 1297 msecs_to_jiffies(xfs_syncd_centisecs * 10));
1298} 1298}
1299 1299
@@ -1852,7 +1852,7 @@ xlog_sync(
1852 */ 1852 */
1853 if (log->l_badcrc_factor && 1853 if (log->l_badcrc_factor &&
1854 (prandom_u32() % log->l_badcrc_factor == 0)) { 1854 (prandom_u32() % log->l_badcrc_factor == 0)) {
1855 iclog->ic_header.h_crc &= 0xAAAAAAAA; 1855 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
1856 iclog->ic_state |= XLOG_STATE_IOABORT; 1856 iclog->ic_state |= XLOG_STATE_IOABORT;
1857 xfs_warn(log->l_mp, 1857 xfs_warn(log->l_mp,
1858 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", 1858 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 688ebff1f663..2eaf81859166 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -73,6 +73,10 @@ xfs_uuid_mount(
73 uuid_t *uuid = &mp->m_sb.sb_uuid; 73 uuid_t *uuid = &mp->m_sb.sb_uuid;
74 int hole, i; 74 int hole, i;
75 75
76 /* Publish UUID in struct super_block */
77 BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t));
78 memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t));
79
76 if (mp->m_flags & XFS_MOUNT_NOUUID) 80 if (mp->m_flags & XFS_MOUNT_NOUUID)
77 return 0; 81 return 0;
78 82
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 6db6fd6b82b0..9fa312a41c93 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -183,6 +183,7 @@ typedef struct xfs_mount {
183 struct workqueue_struct *m_reclaim_workqueue; 183 struct workqueue_struct *m_reclaim_workqueue;
184 struct workqueue_struct *m_log_workqueue; 184 struct workqueue_struct *m_log_workqueue;
185 struct workqueue_struct *m_eofblocks_workqueue; 185 struct workqueue_struct *m_eofblocks_workqueue;
186 struct workqueue_struct *m_sync_workqueue;
186 187
187 /* 188 /*
188 * Generation of the filesysyem layout. This is incremented by each 189 * Generation of the filesysyem layout. This is incremented by each
@@ -312,7 +313,7 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
312static inline xfs_agnumber_t 313static inline xfs_agnumber_t
313xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d) 314xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
314{ 315{
315 xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); 316 xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
316 do_div(ld, mp->m_sb.sb_agblocks); 317 do_div(ld, mp->m_sb.sb_agblocks);
317 return (xfs_agnumber_t) ld; 318 return (xfs_agnumber_t) ld;
318} 319}
@@ -320,7 +321,7 @@ xfs_daddr_to_agno(struct xfs_mount *mp, xfs_daddr_t d)
320static inline xfs_agblock_t 321static inline xfs_agblock_t
321xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) 322xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
322{ 323{
323 xfs_daddr_t ld = XFS_BB_TO_FSBT(mp, d); 324 xfs_rfsblock_t ld = XFS_BB_TO_FSBT(mp, d);
324 return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); 325 return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
325} 326}
326 327
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b669b123287b..5fe6e70b88ef 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -851,8 +851,8 @@ xfs_qm_reset_dqcounts(
851 * started afresh by xfs_qm_quotacheck. 851 * started afresh by xfs_qm_quotacheck.
852 */ 852 */
853#ifdef DEBUG 853#ifdef DEBUG
854 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 854 j = (int)XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) /
855 do_div(j, sizeof(xfs_dqblk_t)); 855 sizeof(xfs_dqblk_t);
856 ASSERT(mp->m_quotainfo->qi_dqperchunk == j); 856 ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
857#endif 857#endif
858 dqb = bp->b_addr; 858 dqb = bp->b_addr;
@@ -1384,12 +1384,7 @@ xfs_qm_quotacheck(
1384 mp->m_qflags |= flags; 1384 mp->m_qflags |= flags;
1385 1385
1386 error_return: 1386 error_return:
1387 while (!list_empty(&buffer_list)) { 1387 xfs_buf_delwri_cancel(&buffer_list);
1388 struct xfs_buf *bp =
1389 list_first_entry(&buffer_list, struct xfs_buf, b_list);
1390 list_del_init(&bp->b_list);
1391 xfs_buf_relse(bp);
1392 }
1393 1388
1394 if (error) { 1389 if (error) {
1395 xfs_warn(mp, 1390 xfs_warn(mp,
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 475a3882a81f..9cb5c381b01c 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -759,5 +759,6 @@ xfs_qm_dqrele_all_inodes(
759 uint flags) 759 uint flags)
760{ 760{
761 ASSERT(mp->m_quotainfo); 761 ASSERT(mp->m_quotainfo);
762 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); 762 xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL,
763 XFS_AGITER_INEW_WAIT);
763} 764}
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 6e4c7446c3d4..96fe209b5eb6 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -221,6 +221,7 @@ void
221xfs_cui_release( 221xfs_cui_release(
222 struct xfs_cui_log_item *cuip) 222 struct xfs_cui_log_item *cuip)
223{ 223{
224 ASSERT(atomic_read(&cuip->cui_refcount) > 0);
224 if (atomic_dec_and_test(&cuip->cui_refcount)) { 225 if (atomic_dec_and_test(&cuip->cui_refcount)) {
225 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); 226 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
226 xfs_cui_item_free(cuip); 227 xfs_cui_item_free(cuip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 4a84c5ea266d..ffe6fe7a7eb5 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -206,11 +206,7 @@ xfs_reflink_trim_around_shared(
206 int error = 0; 206 int error = 0;
207 207
208 /* Holes, unwritten, and delalloc extents cannot be shared */ 208 /* Holes, unwritten, and delalloc extents cannot be shared */
209 if (!xfs_is_reflink_inode(ip) || 209 if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
210 ISUNWRITTEN(irec) ||
211 irec->br_startblock == HOLESTARTBLOCK ||
212 irec->br_startblock == DELAYSTARTBLOCK ||
213 isnullstartblock(irec->br_startblock)) {
214 *shared = false; 210 *shared = false;
215 return 0; 211 return 0;
216 } 212 }
@@ -709,8 +705,22 @@ xfs_reflink_end_cow(
709 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 705 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
710 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); 706 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
711 707
712 /* Start a rolling transaction to switch the mappings */ 708 /*
713 resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); 709 * Start a rolling transaction to switch the mappings. We're
710 * unlikely ever to have to remap 16T worth of single-block
711 * extents, so just cap the worst case extent count to 2^32-1.
712 * Stick a warning in just in case, and avoid 64-bit division.
713 */
714 BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX);
715 if (end_fsb - offset_fsb > UINT_MAX) {
716 error = -EFSCORRUPTED;
717 xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE);
718 ASSERT(0);
719 goto out;
720 }
721 resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount,
722 (unsigned int)(end_fsb - offset_fsb),
723 XFS_DATA_FORK);
714 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 724 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
715 resblks, 0, 0, &tp); 725 resblks, 0, 0, &tp);
716 if (error) 726 if (error)
@@ -1045,12 +1055,12 @@ xfs_reflink_remap_extent(
1045 xfs_off_t new_isize) 1055 xfs_off_t new_isize)
1046{ 1056{
1047 struct xfs_mount *mp = ip->i_mount; 1057 struct xfs_mount *mp = ip->i_mount;
1058 bool real_extent = xfs_bmap_is_real_extent(irec);
1048 struct xfs_trans *tp; 1059 struct xfs_trans *tp;
1049 xfs_fsblock_t firstfsb; 1060 xfs_fsblock_t firstfsb;
1050 unsigned int resblks; 1061 unsigned int resblks;
1051 struct xfs_defer_ops dfops; 1062 struct xfs_defer_ops dfops;
1052 struct xfs_bmbt_irec uirec; 1063 struct xfs_bmbt_irec uirec;
1053 bool real_extent;
1054 xfs_filblks_t rlen; 1064 xfs_filblks_t rlen;
1055 xfs_filblks_t unmap_len; 1065 xfs_filblks_t unmap_len;
1056 xfs_off_t newlen; 1066 xfs_off_t newlen;
@@ -1059,11 +1069,6 @@ xfs_reflink_remap_extent(
1059 unmap_len = irec->br_startoff + irec->br_blockcount - destoff; 1069 unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
1060 trace_xfs_reflink_punch_range(ip, destoff, unmap_len); 1070 trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
1061 1071
1062 /* Only remap normal extents. */
1063 real_extent = (irec->br_startblock != HOLESTARTBLOCK &&
1064 irec->br_startblock != DELAYSTARTBLOCK &&
1065 !ISUNWRITTEN(irec));
1066
1067 /* No reflinking if we're low on space */ 1072 /* No reflinking if we're low on space */
1068 if (real_extent) { 1073 if (real_extent) {
1069 error = xfs_reflink_ag_has_free_space(mp, 1074 error = xfs_reflink_ag_has_free_space(mp,
@@ -1359,9 +1364,7 @@ xfs_reflink_dirty_extents(
1359 goto out; 1364 goto out;
1360 if (nmaps == 0) 1365 if (nmaps == 0)
1361 break; 1366 break;
1362 if (map[0].br_startblock == HOLESTARTBLOCK || 1367 if (!xfs_bmap_is_real_extent(&map[0]))
1363 map[0].br_startblock == DELAYSTARTBLOCK ||
1364 ISUNWRITTEN(&map[0]))
1365 goto next; 1368 goto next;
1366 1369
1367 map[1] = map[0]; 1370 map[1] = map[0];
@@ -1435,9 +1438,7 @@ xfs_reflink_clear_inode_flag(
1435 return error; 1438 return error;
1436 if (nmaps == 0) 1439 if (nmaps == 0)
1437 break; 1440 break;
1438 if (map.br_startblock == HOLESTARTBLOCK || 1441 if (!xfs_bmap_is_real_extent(&map))
1439 map.br_startblock == DELAYSTARTBLOCK ||
1440 ISUNWRITTEN(&map))
1441 goto next; 1442 goto next;
1442 1443
1443 agno = XFS_FSB_TO_AGNO(mp, map.br_startblock); 1444 agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 73c827831551..f3b139c9aa16 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -243,6 +243,7 @@ void
243xfs_rui_release( 243xfs_rui_release(
244 struct xfs_rui_log_item *ruip) 244 struct xfs_rui_log_item *ruip)
245{ 245{
246 ASSERT(atomic_read(&ruip->rui_refcount) > 0);
246 if (atomic_dec_and_test(&ruip->rui_refcount)) { 247 if (atomic_dec_and_test(&ruip->rui_refcount)) {
247 xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); 248 xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
248 xfs_rui_item_free(ruip); 249 xfs_rui_item_free(ruip);
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 51dd3c726608..f13133e6f19f 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -23,6 +23,16 @@
23struct xfs_mount; 23struct xfs_mount;
24struct xfs_trans; 24struct xfs_trans;
25 25
26struct xfs_rtalloc_rec {
27 xfs_rtblock_t ar_startblock;
28 xfs_rtblock_t ar_blockcount;
29};
30
31typedef int (*xfs_rtalloc_query_range_fn)(
32 struct xfs_trans *tp,
33 struct xfs_rtalloc_rec *rec,
34 void *priv);
35
26#ifdef CONFIG_XFS_RT 36#ifdef CONFIG_XFS_RT
27/* 37/*
28 * Function prototypes for exported functions. 38 * Function prototypes for exported functions.
@@ -118,13 +128,21 @@ int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
118int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, 128int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
119 xfs_rtblock_t start, xfs_extlen_t len, 129 xfs_rtblock_t start, xfs_extlen_t len,
120 struct xfs_buf **rbpp, xfs_fsblock_t *rsb); 130 struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
121 131int xfs_rtalloc_query_range(struct xfs_trans *tp,
122 132 struct xfs_rtalloc_rec *low_rec,
133 struct xfs_rtalloc_rec *high_rec,
134 xfs_rtalloc_query_range_fn fn,
135 void *priv);
136int xfs_rtalloc_query_all(struct xfs_trans *tp,
137 xfs_rtalloc_query_range_fn fn,
138 void *priv);
123#else 139#else
124# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) 140# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
125# define xfs_rtfree_extent(t,b,l) (ENOSYS) 141# define xfs_rtfree_extent(t,b,l) (ENOSYS)
126# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) 142# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
127# define xfs_growfs_rt(mp,in) (ENOSYS) 143# define xfs_growfs_rt(mp,in) (ENOSYS)
144# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS)
145# define xfs_rtalloc_query_all(t,f,p) (ENOSYS)
128static inline int /* error */ 146static inline int /* error */
129xfs_rtmount_init( 147xfs_rtmount_init(
130 xfs_mount_t *mp) /* file system mount structure */ 148 xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 685c042a120f..47d239dcf3f4 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -877,8 +877,15 @@ xfs_init_mount_workqueues(
877 if (!mp->m_eofblocks_workqueue) 877 if (!mp->m_eofblocks_workqueue)
878 goto out_destroy_log; 878 goto out_destroy_log;
879 879
880 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
881 mp->m_fsname);
882 if (!mp->m_sync_workqueue)
883 goto out_destroy_eofb;
884
880 return 0; 885 return 0;
881 886
887out_destroy_eofb:
888 destroy_workqueue(mp->m_eofblocks_workqueue);
882out_destroy_log: 889out_destroy_log:
883 destroy_workqueue(mp->m_log_workqueue); 890 destroy_workqueue(mp->m_log_workqueue);
884out_destroy_reclaim: 891out_destroy_reclaim:
@@ -899,6 +906,7 @@ STATIC void
899xfs_destroy_mount_workqueues( 906xfs_destroy_mount_workqueues(
900 struct xfs_mount *mp) 907 struct xfs_mount *mp)
901{ 908{
909 destroy_workqueue(mp->m_sync_workqueue);
902 destroy_workqueue(mp->m_eofblocks_workqueue); 910 destroy_workqueue(mp->m_eofblocks_workqueue);
903 destroy_workqueue(mp->m_log_workqueue); 911 destroy_workqueue(mp->m_log_workqueue);
904 destroy_workqueue(mp->m_reclaim_workqueue); 912 destroy_workqueue(mp->m_reclaim_workqueue);
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 7f17ae6d709a..5d95fe348294 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -47,6 +47,7 @@
47#include "xfs_inode_item.h" 47#include "xfs_inode_item.h"
48#include "xfs_bmap_btree.h" 48#include "xfs_bmap_btree.h"
49#include "xfs_filestream.h" 49#include "xfs_filestream.h"
50#include "xfs_fsmap.h"
50 51
51/* 52/*
52 * We include this last to have the helpers above available for the trace 53 * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 383ac227ce2c..7c5a16528d8b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -40,6 +40,8 @@ struct xfs_inode_log_format;
40struct xfs_bmbt_irec; 40struct xfs_bmbt_irec;
41struct xfs_btree_cur; 41struct xfs_btree_cur;
42struct xfs_refcount_irec; 42struct xfs_refcount_irec;
43struct xfs_fsmap;
44struct xfs_rmap_irec;
43 45
44DECLARE_EVENT_CLASS(xfs_attr_list_class, 46DECLARE_EVENT_CLASS(xfs_attr_list_class,
45 TP_PROTO(struct xfs_attr_list_context *ctx), 47 TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -2190,7 +2192,7 @@ DECLARE_EVENT_CLASS(xfs_discard_class,
2190 __entry->agbno = agbno; 2192 __entry->agbno = agbno;
2191 __entry->len = len; 2193 __entry->len = len;
2192 ), 2194 ),
2193 TP_printk("dev %d:%d agno %u agbno %u len %u\n", 2195 TP_printk("dev %d:%d agno %u agbno %u len %u",
2194 MAJOR(__entry->dev), MINOR(__entry->dev), 2196 MAJOR(__entry->dev), MINOR(__entry->dev),
2195 __entry->agno, 2197 __entry->agno,
2196 __entry->agbno, 2198 __entry->agbno,
@@ -2253,8 +2255,8 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
2253 TP_STRUCT__entry( 2255 TP_STRUCT__entry(
2254 __field(dev_t, dev) 2256 __field(dev_t, dev)
2255 __field(void *, dop) 2257 __field(void *, dop)
2256 __field(bool, committed) 2258 __field(char, committed)
2257 __field(bool, low) 2259 __field(char, low)
2258 ), 2260 ),
2259 TP_fast_assign( 2261 TP_fast_assign(
2260 __entry->dev = mp ? mp->m_super->s_dev : 0; 2262 __entry->dev = mp ? mp->m_super->s_dev : 0;
@@ -2262,7 +2264,7 @@ DECLARE_EVENT_CLASS(xfs_defer_class,
2262 __entry->committed = dop->dop_committed; 2264 __entry->committed = dop->dop_committed;
2263 __entry->low = dop->dop_low; 2265 __entry->low = dop->dop_low;
2264 ), 2266 ),
2265 TP_printk("dev %d:%d ops %p committed %d low %d\n", 2267 TP_printk("dev %d:%d ops %p committed %d low %d",
2266 MAJOR(__entry->dev), MINOR(__entry->dev), 2268 MAJOR(__entry->dev), MINOR(__entry->dev),
2267 __entry->dop, 2269 __entry->dop,
2268 __entry->committed, 2270 __entry->committed,
@@ -2279,8 +2281,8 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
2279 TP_STRUCT__entry( 2281 TP_STRUCT__entry(
2280 __field(dev_t, dev) 2282 __field(dev_t, dev)
2281 __field(void *, dop) 2283 __field(void *, dop)
2282 __field(bool, committed) 2284 __field(char, committed)
2283 __field(bool, low) 2285 __field(char, low)
2284 __field(int, error) 2286 __field(int, error)
2285 ), 2287 ),
2286 TP_fast_assign( 2288 TP_fast_assign(
@@ -2290,7 +2292,7 @@ DECLARE_EVENT_CLASS(xfs_defer_error_class,
2290 __entry->low = dop->dop_low; 2292 __entry->low = dop->dop_low;
2291 __entry->error = error; 2293 __entry->error = error;
2292 ), 2294 ),
2293 TP_printk("dev %d:%d ops %p committed %d low %d err %d\n", 2295 TP_printk("dev %d:%d ops %p committed %d low %d err %d",
2294 MAJOR(__entry->dev), MINOR(__entry->dev), 2296 MAJOR(__entry->dev), MINOR(__entry->dev),
2295 __entry->dop, 2297 __entry->dop,
2296 __entry->committed, 2298 __entry->committed,
@@ -2309,7 +2311,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
2309 __field(dev_t, dev) 2311 __field(dev_t, dev)
2310 __field(int, type) 2312 __field(int, type)
2311 __field(void *, intent) 2313 __field(void *, intent)
2312 __field(bool, committed) 2314 __field(char, committed)
2313 __field(int, nr) 2315 __field(int, nr)
2314 ), 2316 ),
2315 TP_fast_assign( 2317 TP_fast_assign(
@@ -2319,7 +2321,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
2319 __entry->committed = dfp->dfp_done != NULL; 2321 __entry->committed = dfp->dfp_done != NULL;
2320 __entry->nr = dfp->dfp_count; 2322 __entry->nr = dfp->dfp_count;
2321 ), 2323 ),
2322 TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n", 2324 TP_printk("dev %d:%d optype %d intent %p committed %d nr %d",
2323 MAJOR(__entry->dev), MINOR(__entry->dev), 2325 MAJOR(__entry->dev), MINOR(__entry->dev),
2324 __entry->type, 2326 __entry->type,
2325 __entry->intent, 2327 __entry->intent,
@@ -2614,7 +2616,8 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class,
2614 __entry->asked = r ? r->ar_asked : 0; 2616 __entry->asked = r ? r->ar_asked : 0;
2615 __entry->len = len; 2617 __entry->len = len;
2616 ), 2618 ),
2617 TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n", 2619 TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u "
2620 "resv %u ask %u len %u",
2618 MAJOR(__entry->dev), MINOR(__entry->dev), 2621 MAJOR(__entry->dev), MINOR(__entry->dev),
2619 __entry->agno, 2622 __entry->agno,
2620 __entry->resv, 2623 __entry->resv,
@@ -2667,7 +2670,7 @@ DECLARE_EVENT_CLASS(xfs_ag_btree_lookup_class,
2667 __entry->agbno = agbno; 2670 __entry->agbno = agbno;
2668 __entry->dir = dir; 2671 __entry->dir = dir;
2669 ), 2672 ),
2670 TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)\n", 2673 TP_printk("dev %d:%d agno %u agbno %u cmp %s(%d)",
2671 MAJOR(__entry->dev), MINOR(__entry->dev), 2674 MAJOR(__entry->dev), MINOR(__entry->dev),
2672 __entry->agno, 2675 __entry->agno,
2673 __entry->agbno, 2676 __entry->agbno,
@@ -2700,7 +2703,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
2700 __entry->blockcount = irec->rc_blockcount; 2703 __entry->blockcount = irec->rc_blockcount;
2701 __entry->refcount = irec->rc_refcount; 2704 __entry->refcount = irec->rc_refcount;
2702 ), 2705 ),
2703 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u\n", 2706 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u",
2704 MAJOR(__entry->dev), MINOR(__entry->dev), 2707 MAJOR(__entry->dev), MINOR(__entry->dev),
2705 __entry->agno, 2708 __entry->agno,
2706 __entry->startblock, 2709 __entry->startblock,
@@ -2735,7 +2738,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
2735 __entry->refcount = irec->rc_refcount; 2738 __entry->refcount = irec->rc_refcount;
2736 __entry->agbno = agbno; 2739 __entry->agbno = agbno;
2737 ), 2740 ),
2738 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u\n", 2741 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u @ agbno %u",
2739 MAJOR(__entry->dev), MINOR(__entry->dev), 2742 MAJOR(__entry->dev), MINOR(__entry->dev),
2740 __entry->agno, 2743 __entry->agno,
2741 __entry->startblock, 2744 __entry->startblock,
@@ -2776,7 +2779,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
2776 __entry->i2_refcount = i2->rc_refcount; 2779 __entry->i2_refcount = i2->rc_refcount;
2777 ), 2780 ),
2778 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " 2781 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
2779 "agbno %u len %u refcount %u\n", 2782 "agbno %u len %u refcount %u",
2780 MAJOR(__entry->dev), MINOR(__entry->dev), 2783 MAJOR(__entry->dev), MINOR(__entry->dev),
2781 __entry->agno, 2784 __entry->agno,
2782 __entry->i1_startblock, 2785 __entry->i1_startblock,
@@ -2822,7 +2825,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
2822 __entry->agbno = agbno; 2825 __entry->agbno = agbno;
2823 ), 2826 ),
2824 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " 2827 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
2825 "agbno %u len %u refcount %u @ agbno %u\n", 2828 "agbno %u len %u refcount %u @ agbno %u",
2826 MAJOR(__entry->dev), MINOR(__entry->dev), 2829 MAJOR(__entry->dev), MINOR(__entry->dev),
2827 __entry->agno, 2830 __entry->agno,
2828 __entry->i1_startblock, 2831 __entry->i1_startblock,
@@ -2875,7 +2878,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
2875 ), 2878 ),
2876 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- " 2879 TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u -- "
2877 "agbno %u len %u refcount %u -- " 2880 "agbno %u len %u refcount %u -- "
2878 "agbno %u len %u refcount %u\n", 2881 "agbno %u len %u refcount %u",
2879 MAJOR(__entry->dev), MINOR(__entry->dev), 2882 MAJOR(__entry->dev), MINOR(__entry->dev),
2880 __entry->agno, 2883 __entry->agno,
2881 __entry->i1_startblock, 2884 __entry->i1_startblock,
@@ -3001,31 +3004,6 @@ DEFINE_EVENT(xfs_inode_error_class, name, \
3001 unsigned long caller_ip), \ 3004 unsigned long caller_ip), \
3002 TP_ARGS(ip, error, caller_ip)) 3005 TP_ARGS(ip, error, caller_ip))
3003 3006
3004/* reflink allocator */
3005TRACE_EVENT(xfs_bmap_remap_alloc,
3006 TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno,
3007 xfs_extlen_t len),
3008 TP_ARGS(ip, fsbno, len),
3009 TP_STRUCT__entry(
3010 __field(dev_t, dev)
3011 __field(xfs_ino_t, ino)
3012 __field(xfs_fsblock_t, fsbno)
3013 __field(xfs_extlen_t, len)
3014 ),
3015 TP_fast_assign(
3016 __entry->dev = VFS_I(ip)->i_sb->s_dev;
3017 __entry->ino = ip->i_ino;
3018 __entry->fsbno = fsbno;
3019 __entry->len = len;
3020 ),
3021 TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x",
3022 MAJOR(__entry->dev), MINOR(__entry->dev),
3023 __entry->ino,
3024 __entry->fsbno,
3025 __entry->len)
3026);
3027DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error);
3028
3029/* reflink tracepoint classes */ 3007/* reflink tracepoint classes */
3030 3008
3031/* two-file io tracepoint class */ 3009/* two-file io tracepoint class */
@@ -3227,7 +3205,7 @@ TRACE_EVENT(xfs_ioctl_clone,
3227 ), 3205 ),
3228 TP_printk("dev %d:%d " 3206 TP_printk("dev %d:%d "
3229 "ino 0x%lx isize 0x%llx -> " 3207 "ino 0x%lx isize 0x%llx -> "
3230 "ino 0x%lx isize 0x%llx\n", 3208 "ino 0x%lx isize 0x%llx",
3231 MAJOR(__entry->dev), MINOR(__entry->dev), 3209 MAJOR(__entry->dev), MINOR(__entry->dev),
3232 __entry->src_ino, 3210 __entry->src_ino,
3233 __entry->src_isize, 3211 __entry->src_isize,
@@ -3267,6 +3245,88 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
3267DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece); 3245DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
3268DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error); 3246DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
3269 3247
3248/* fsmap traces */
3249DECLARE_EVENT_CLASS(xfs_fsmap_class,
3250 TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
3251 struct xfs_rmap_irec *rmap),
3252 TP_ARGS(mp, keydev, agno, rmap),
3253 TP_STRUCT__entry(
3254 __field(dev_t, dev)
3255 __field(dev_t, keydev)
3256 __field(xfs_agnumber_t, agno)
3257 __field(xfs_fsblock_t, bno)
3258 __field(xfs_filblks_t, len)
3259 __field(__uint64_t, owner)
3260 __field(__uint64_t, offset)
3261 __field(unsigned int, flags)
3262 ),
3263 TP_fast_assign(
3264 __entry->dev = mp->m_super->s_dev;
3265 __entry->keydev = new_decode_dev(keydev);
3266 __entry->agno = agno;
3267 __entry->bno = rmap->rm_startblock;
3268 __entry->len = rmap->rm_blockcount;
3269 __entry->owner = rmap->rm_owner;
3270 __entry->offset = rmap->rm_offset;
3271 __entry->flags = rmap->rm_flags;
3272 ),
3273 TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset %llu flags 0x%x",
3274 MAJOR(__entry->dev), MINOR(__entry->dev),
3275 MAJOR(__entry->keydev), MINOR(__entry->keydev),
3276 __entry->agno,
3277 __entry->bno,
3278 __entry->len,
3279 __entry->owner,
3280 __entry->offset,
3281 __entry->flags)
3282)
3283#define DEFINE_FSMAP_EVENT(name) \
3284DEFINE_EVENT(xfs_fsmap_class, name, \
3285 TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \
3286 struct xfs_rmap_irec *rmap), \
3287 TP_ARGS(mp, keydev, agno, rmap))
3288DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
3289DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
3290DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
3291
3292DECLARE_EVENT_CLASS(xfs_getfsmap_class,
3293 TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
3294 TP_ARGS(mp, fsmap),
3295 TP_STRUCT__entry(
3296 __field(dev_t, dev)
3297 __field(dev_t, keydev)
3298 __field(xfs_daddr_t, block)
3299 __field(xfs_daddr_t, len)
3300 __field(__uint64_t, owner)
3301 __field(__uint64_t, offset)
3302 __field(__uint64_t, flags)
3303 ),
3304 TP_fast_assign(
3305 __entry->dev = mp->m_super->s_dev;
3306 __entry->keydev = new_decode_dev(fsmap->fmr_device);
3307 __entry->block = fsmap->fmr_physical;
3308 __entry->len = fsmap->fmr_length;
3309 __entry->owner = fsmap->fmr_owner;
3310 __entry->offset = fsmap->fmr_offset;
3311 __entry->flags = fsmap->fmr_flags;
3312 ),
3313 TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx",
3314 MAJOR(__entry->dev), MINOR(__entry->dev),
3315 MAJOR(__entry->keydev), MINOR(__entry->keydev),
3316 __entry->block,
3317 __entry->len,
3318 __entry->owner,
3319 __entry->offset,
3320 __entry->flags)
3321)
3322#define DEFINE_GETFSMAP_EVENT(name) \
3323DEFINE_EVENT(xfs_getfsmap_class, name, \
3324 TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), \
3325 TP_ARGS(mp, fsmap))
3326DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
3327DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
3328DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
3329
3270#endif /* _TRACE_XFS_H */ 3330#endif /* _TRACE_XFS_H */
3271 3331
3272#undef TRACE_INCLUDE_PATH 3332#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f5969c8274fc..2011620008de 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -263,6 +263,28 @@ xfs_trans_alloc(
263} 263}
264 264
265/* 265/*
266 * Create an empty transaction with no reservation. This is a defensive
267 * mechanism for routines that query metadata without actually modifying
268 * them -- if the metadata being queried is somehow cross-linked (think a
269 * btree block pointer that points higher in the tree), we risk deadlock.
270 * However, blocks grabbed as part of a transaction can be re-grabbed.
271 * The verifiers will notice the corrupt block and the operation will fail
272 * back to userspace without deadlocking.
273 *
274 * Note the zero-length reservation; this transaction MUST be cancelled
275 * without any dirty data.
276 */
277int
278xfs_trans_alloc_empty(
279 struct xfs_mount *mp,
280 struct xfs_trans **tpp)
281{
282 struct xfs_trans_res resv = {0};
283
284 return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
285}
286
287/*
266 * Record the indicated change to the given field for application 288 * Record the indicated change to the given field for application
267 * to the file system's superblock when the transaction commits. 289 * to the file system's superblock when the transaction commits.
268 * For now, just store the change in the transaction structure. 290 * For now, just store the change in the transaction structure.
@@ -1012,17 +1034,14 @@ xfs_trans_cancel(
1012 * chunk we've been working on and get a new transaction to continue. 1034 * chunk we've been working on and get a new transaction to continue.
1013 */ 1035 */
1014int 1036int
1015__xfs_trans_roll( 1037xfs_trans_roll(
1016 struct xfs_trans **tpp, 1038 struct xfs_trans **tpp,
1017 struct xfs_inode *dp, 1039 struct xfs_inode *dp)
1018 int *committed)
1019{ 1040{
1020 struct xfs_trans *trans; 1041 struct xfs_trans *trans;
1021 struct xfs_trans_res tres; 1042 struct xfs_trans_res tres;
1022 int error; 1043 int error;
1023 1044
1024 *committed = 0;
1025
1026 /* 1045 /*
1027 * Ensure that the inode is always logged. 1046 * Ensure that the inode is always logged.
1028 */ 1047 */
@@ -1048,7 +1067,6 @@ __xfs_trans_roll(
1048 if (error) 1067 if (error)
1049 return error; 1068 return error;
1050 1069
1051 *committed = 1;
1052 trans = *tpp; 1070 trans = *tpp;
1053 1071
1054 /* 1072 /*
@@ -1071,12 +1089,3 @@ __xfs_trans_roll(
1071 xfs_trans_ijoin(trans, dp, 0); 1089 xfs_trans_ijoin(trans, dp, 0);
1072 return 0; 1090 return 0;
1073} 1091}
1074
1075int
1076xfs_trans_roll(
1077 struct xfs_trans **tpp,
1078 struct xfs_inode *dp)
1079{
1080 int committed;
1081 return __xfs_trans_roll(tpp, dp, &committed);
1082}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 1646f659b60f..a07acbf0bd8a 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -158,6 +158,8 @@ typedef struct xfs_trans {
158int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp, 158int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
159 uint blocks, uint rtextents, uint flags, 159 uint blocks, uint rtextents, uint flags,
160 struct xfs_trans **tpp); 160 struct xfs_trans **tpp);
161int xfs_trans_alloc_empty(struct xfs_mount *mp,
162 struct xfs_trans **tpp);
161void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); 163void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
162 164
163struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, 165struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
@@ -226,7 +228,6 @@ int xfs_trans_free_extent(struct xfs_trans *,
226 struct xfs_efd_log_item *, xfs_fsblock_t, 228 struct xfs_efd_log_item *, xfs_fsblock_t,
227 xfs_extlen_t, struct xfs_owner_info *); 229 xfs_extlen_t, struct xfs_owner_info *);
228int xfs_trans_commit(struct xfs_trans *); 230int xfs_trans_commit(struct xfs_trans *);
229int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
230int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); 231int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
231void xfs_trans_cancel(xfs_trans_t *); 232void xfs_trans_cancel(xfs_trans_t *);
232int xfs_trans_ail_init(struct xfs_mount *); 233int xfs_trans_ail_init(struct xfs_mount *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index d6c9c3e9e02b..9056c0f34a3c 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -684,8 +684,23 @@ xfs_trans_ail_update_bulk(
684 } 684 }
685} 685}
686 686
687/* 687bool
688 * xfs_trans_ail_delete_bulk - remove multiple log items from the AIL 688xfs_ail_delete_one(
689 struct xfs_ail *ailp,
690 struct xfs_log_item *lip)
691{
692 struct xfs_log_item *mlip = xfs_ail_min(ailp);
693
694 trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
695 xfs_ail_delete(ailp, lip);
696 lip->li_flags &= ~XFS_LI_IN_AIL;
697 lip->li_lsn = 0;
698
699 return mlip == lip;
700}
701
702/**
703 * Remove a log items from the AIL
689 * 704 *
690 * @xfs_trans_ail_delete_bulk takes an array of log items that all need to 705 * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
691 * removed from the AIL. The caller is already holding the AIL lock, and done 706 * removed from the AIL. The caller is already holding the AIL lock, and done
@@ -706,52 +721,36 @@ xfs_trans_ail_update_bulk(
706 * before returning. 721 * before returning.
707 */ 722 */
708void 723void
709xfs_trans_ail_delete_bulk( 724xfs_trans_ail_delete(
710 struct xfs_ail *ailp, 725 struct xfs_ail *ailp,
711 struct xfs_log_item **log_items, 726 struct xfs_log_item *lip,
712 int nr_items,
713 int shutdown_type) __releases(ailp->xa_lock) 727 int shutdown_type) __releases(ailp->xa_lock)
714{ 728{
715 xfs_log_item_t *mlip; 729 struct xfs_mount *mp = ailp->xa_mount;
716 int mlip_changed = 0; 730 bool mlip_changed;
717 int i;
718 731
719 mlip = xfs_ail_min(ailp); 732 if (!(lip->li_flags & XFS_LI_IN_AIL)) {
720 733 spin_unlock(&ailp->xa_lock);
721 for (i = 0; i < nr_items; i++) { 734 if (!XFS_FORCED_SHUTDOWN(mp)) {
722 struct xfs_log_item *lip = log_items[i]; 735 xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
723 if (!(lip->li_flags & XFS_LI_IN_AIL)) { 736 "%s: attempting to delete a log item that is not in the AIL",
724 struct xfs_mount *mp = ailp->xa_mount; 737 __func__);
725 738 xfs_force_shutdown(mp, shutdown_type);
726 spin_unlock(&ailp->xa_lock);
727 if (!XFS_FORCED_SHUTDOWN(mp)) {
728 xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
729 "%s: attempting to delete a log item that is not in the AIL",
730 __func__);
731 xfs_force_shutdown(mp, shutdown_type);
732 }
733 return;
734 } 739 }
735 740 return;
736 trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
737 xfs_ail_delete(ailp, lip);
738 lip->li_flags &= ~XFS_LI_IN_AIL;
739 lip->li_lsn = 0;
740 if (mlip == lip)
741 mlip_changed = 1;
742 } 741 }
743 742
743 mlip_changed = xfs_ail_delete_one(ailp, lip);
744 if (mlip_changed) { 744 if (mlip_changed) {
745 if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) 745 if (!XFS_FORCED_SHUTDOWN(mp))
746 xlog_assign_tail_lsn_locked(ailp->xa_mount); 746 xlog_assign_tail_lsn_locked(mp);
747 if (list_empty(&ailp->xa_ail)) 747 if (list_empty(&ailp->xa_ail))
748 wake_up_all(&ailp->xa_empty); 748 wake_up_all(&ailp->xa_empty);
749 spin_unlock(&ailp->xa_lock); 749 }
750 750
751 spin_unlock(&ailp->xa_lock);
752 if (mlip_changed)
751 xfs_log_space_wake(ailp->xa_mount); 753 xfs_log_space_wake(ailp->xa_mount);
752 } else {
753 spin_unlock(&ailp->xa_lock);
754 }
755} 754}
756 755
757int 756int
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 49931b72da8a..d91706c56c63 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -106,18 +106,9 @@ xfs_trans_ail_update(
106 xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); 106 xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
107} 107}
108 108
109void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, 109bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
110 struct xfs_log_item **log_items, int nr_items, 110void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
111 int shutdown_type) 111 int shutdown_type) __releases(ailp->xa_lock);
112 __releases(ailp->xa_lock);
113static inline void
114xfs_trans_ail_delete(
115 struct xfs_ail *ailp,
116 xfs_log_item_t *lip,
117 int shutdown_type) __releases(ailp->xa_lock)
118{
119 xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type);
120}
121 112
122static inline void 113static inline void
123xfs_trans_ail_remove( 114xfs_trans_ail_remove(
diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h
new file mode 100644
index 000000000000..7e8e5f0bd6d2
--- /dev/null
+++ b/include/uapi/linux/fsmap.h
@@ -0,0 +1,112 @@
1/*
2 * FS_IOC_GETFSMAP ioctl infrastructure.
3 *
4 * Copyright (C) 2017 Oracle. All Rights Reserved.
5 *
6 * Author: Darrick J. Wong <darrick.wong@oracle.com>
7 */
8#ifndef _LINUX_FSMAP_H
9#define _LINUX_FSMAP_H
10
11#include <linux/types.h>
12
13/*
14 * Structure for FS_IOC_GETFSMAP.
15 *
16 * The memory layout for this call are the scalar values defined in
17 * struct fsmap_head, followed by two struct fsmap that describe
18 * the lower and upper bound of mappings to return, followed by an
19 * array of struct fsmap mappings.
20 *
21 * fmh_iflags control the output of the call, whereas fmh_oflags report
22 * on the overall record output. fmh_count should be set to the
23 * length of the fmh_recs array, and fmh_entries will be set to the
24 * number of entries filled out during each call. If fmh_count is
25 * zero, the number of reverse mappings will be returned in
26 * fmh_entries, though no mappings will be returned. fmh_reserved
27 * must be set to zero.
28 *
29 * The two elements in the fmh_keys array are used to constrain the
30 * output. The first element in the array should represent the
31 * lowest disk mapping ("low key") that the user wants to learn
32 * about. If this value is all zeroes, the filesystem will return
33 * the first entry it knows about. For a subsequent call, the
34 * contents of fsmap_head.fmh_recs[fsmap_head.fmh_count - 1] should be
35 * copied into fmh_keys[0] to have the kernel start where it left off.
36 *
37 * The second element in the fmh_keys array should represent the
38 * highest disk mapping ("high key") that the user wants to learn
39 * about. If this value is all ones, the filesystem will not stop
40 * until it runs out of mapping to return or runs out of space in
41 * fmh_recs.
42 *
43 * fmr_device can be either a 32-bit cookie representing a device, or
44 * a 32-bit dev_t if the FMH_OF_DEV_T flag is set. fmr_physical,
45 * fmr_offset, and fmr_length are expressed in units of bytes.
46 * fmr_owner is either an inode number, or a special value if
47 * FMR_OF_SPECIAL_OWNER is set in fmr_flags.
48 */
49struct fsmap {
50 __u32 fmr_device; /* device id */
51 __u32 fmr_flags; /* mapping flags */
52 __u64 fmr_physical; /* device offset of segment */
53 __u64 fmr_owner; /* owner id */
54 __u64 fmr_offset; /* file offset of segment */
55 __u64 fmr_length; /* length of segment */
56 __u64 fmr_reserved[3]; /* must be zero */
57};
58
59struct fsmap_head {
60 __u32 fmh_iflags; /* control flags */
61 __u32 fmh_oflags; /* output flags */
62 __u32 fmh_count; /* # of entries in array incl. input */
63 __u32 fmh_entries; /* # of entries filled in (output). */
64 __u64 fmh_reserved[6]; /* must be zero */
65
66 struct fsmap fmh_keys[2]; /* low and high keys for the mapping search */
67 struct fsmap fmh_recs[]; /* returned records */
68};
69
70/* Size of an fsmap_head with room for nr records. */
71static inline size_t
72fsmap_sizeof(
73 unsigned int nr)
74{
75 return sizeof(struct fsmap_head) + nr * sizeof(struct fsmap);
76}
77
78/* Start the next fsmap query at the end of the current query results. */
79static inline void
80fsmap_advance(
81 struct fsmap_head *head)
82{
83 head->fmh_keys[0] = head->fmh_recs[head->fmh_entries - 1];
84}
85
86/* fmh_iflags values - set by FS_IOC_GETFSMAP caller in the header. */
87/* no flags defined yet */
88#define FMH_IF_VALID 0
89
90/* fmh_oflags values - returned in the header segment only. */
91#define FMH_OF_DEV_T 0x1 /* fmr_device values will be dev_t */
92
93/* fmr_flags values - returned for each non-header segment */
94#define FMR_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
95#define FMR_OF_ATTR_FORK 0x2 /* segment = attribute fork */
96#define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */
97#define FMR_OF_SHARED 0x8 /* segment = shared with another file */
98#define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */
99#define FMR_OF_LAST 0x20 /* segment is the last in the FS */
100
101/* Each FS gets to define its own special owner codes. */
102#define FMR_OWNER(type, code) (((__u64)type << 32) | \
103 ((__u64)code & 0xFFFFFFFFULL))
104#define FMR_OWNER_TYPE(owner) ((__u32)((__u64)owner >> 32))
105#define FMR_OWNER_CODE(owner) ((__u32)(((__u64)owner & 0xFFFFFFFFULL)))
106#define FMR_OWN_FREE FMR_OWNER(0, 1) /* free space */
107#define FMR_OWN_UNKNOWN FMR_OWNER(0, 2) /* unknown owner */
108#define FMR_OWN_METADATA FMR_OWNER(0, 3) /* metadata */
109
110#define FS_IOC_GETFSMAP _IOWR('X', 59, struct fsmap_head)
111
112#endif /* _LINUX_FSMAP_H */