aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/xfs.txt29
-rw-r--r--fs/open.c8
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c104
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c150
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c554
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h13
-rw-r--r--fs/xfs/libxfs/xfs_btree.c24
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c39
-rw-r--r--fs/xfs/libxfs/xfs_format.h62
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c48
-rw-r--r--fs/xfs/libxfs/xfs_sb.c20
-rw-r--r--fs/xfs/xfs_aops.c270
-rw-r--r--fs/xfs/xfs_attr_inactive.c3
-rw-r--r--fs/xfs/xfs_attr_list.c9
-rw-r--r--fs/xfs/xfs_bmap_util.c164
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_error.c2
-rw-r--r--fs/xfs/xfs_error.h8
-rw-r--r--fs/xfs/xfs_file.c161
-rw-r--r--fs/xfs/xfs_filestream.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_inode.c558
-rw-r--r--fs/xfs/xfs_inode.h49
-rw-r--r--fs/xfs/xfs_ioctl.c7
-rw-r--r--fs/xfs/xfs_iomap.c3
-rw-r--r--fs/xfs/xfs_iops.c91
-rw-r--r--fs/xfs/xfs_iops.h2
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_linux.h9
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.c918
-rw-r--r--fs/xfs/xfs_mount.h95
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_pnfs.c7
-rw-r--r--fs/xfs/xfs_pnfs.h5
-rw-r--r--fs/xfs/xfs_qm.c13
-rw-r--r--fs/xfs/xfs_super.c132
-rw-r--r--fs/xfs/xfs_super.h2
-rw-r--r--fs/xfs/xfs_symlink.c58
-rw-r--r--fs/xfs/xfs_trace.h29
-rw-r--r--fs/xfs/xfs_trans.c234
-rw-r--r--include/linux/falloc.h6
-rw-r--r--include/uapi/linux/falloc.h17
49 files changed, 2024 insertions, 1949 deletions
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 0bfafe108357..5a5a05582b58 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -228,30 +228,19 @@ default behaviour.
228Deprecated Mount Options 228Deprecated Mount Options
229======================== 229========================
230 230
231 delaylog/nodelaylog 231None at present.
232 Delayed logging is the only logging method that XFS supports
233 now, so these mount options are now ignored.
234
235 Due for removal in 3.12.
236
237 ihashsize=value
238 In memory inode hashes have been removed, so this option has
239 no function as of August 2007. Option is deprecated.
240
241 Due for removal in 3.12.
242 232
243 irixsgid
244 This behaviour is now controlled by a sysctl, so the mount
245 option is ignored.
246 233
247 Due for removal in 3.12. 234Removed Mount Options
235=====================
248 236
249 osyncisdsync 237 Name Removed
250 osyncisosync 238 ---- -------
251 O_SYNC and O_DSYNC are fully supported, so there is no need 239 delaylog/nodelaylog v3.20
252 for these options any more. 240 ihashsize v3.20
241 irixsgid v3.20
242 osyncisdsync/osyncisosync v3.20
253 243
254 Due for removal in 3.12.
255 244
256sysctls 245sysctls
257======= 246=======
diff --git a/fs/open.c b/fs/open.c
index 6796f04d6032..98e5a52dc68c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
231 return -EINVAL; 231 return -EINVAL;
232 232
233 /* Return error if mode is not supported */ 233 /* Return error if mode is not supported */
234 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 234 if (mode & ~FALLOC_FL_SUPPORTED_MASK)
235 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
236 return -EOPNOTSUPP; 235 return -EOPNOTSUPP;
237 236
238 /* Punch hole and zero range are mutually exclusive */ 237 /* Punch hole and zero range are mutually exclusive */
@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
250 (mode & ~FALLOC_FL_COLLAPSE_RANGE)) 249 (mode & ~FALLOC_FL_COLLAPSE_RANGE))
251 return -EINVAL; 250 return -EINVAL;
252 251
252 /* Insert range should only be used exclusively. */
253 if ((mode & FALLOC_FL_INSERT_RANGE) &&
254 (mode & ~FALLOC_FL_INSERT_RANGE))
255 return -EINVAL;
256
253 if (!(file->f_mode & FMODE_WRITE)) 257 if (!(file->f_mode & FMODE_WRITE))
254 return -EBADF; 258 return -EBADF;
255 259
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a6fbf4472017..516162be1398 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -260,6 +260,7 @@ xfs_alloc_fix_len(
260 rlen = rlen - (k - args->mod); 260 rlen = rlen - (k - args->mod);
261 else 261 else
262 rlen = rlen - args->prod + (args->mod - k); 262 rlen = rlen - args->prod + (args->mod - k);
263 /* casts to (int) catch length underflows */
263 if ((int)rlen < (int)args->minlen) 264 if ((int)rlen < (int)args->minlen)
264 return; 265 return;
265 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 266 ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
@@ -286,7 +287,8 @@ xfs_alloc_fix_minleft(
286 if (diff >= 0) 287 if (diff >= 0)
287 return 1; 288 return 1;
288 args->len += diff; /* shrink the allocated space */ 289 args->len += diff; /* shrink the allocated space */
289 if (args->len >= args->minlen) 290 /* casts to (int) catch length underflows */
291 if ((int)args->len >= (int)args->minlen)
290 return 1; 292 return 1;
291 args->agbno = NULLAGBLOCK; 293 args->agbno = NULLAGBLOCK;
292 return 0; 294 return 0;
@@ -315,6 +317,9 @@ xfs_alloc_fixup_trees(
315 xfs_agblock_t nfbno2; /* second new free startblock */ 317 xfs_agblock_t nfbno2; /* second new free startblock */
316 xfs_extlen_t nflen1=0; /* first new free length */ 318 xfs_extlen_t nflen1=0; /* first new free length */
317 xfs_extlen_t nflen2=0; /* second new free length */ 319 xfs_extlen_t nflen2=0; /* second new free length */
320 struct xfs_mount *mp;
321
322 mp = cnt_cur->bc_mp;
318 323
319 /* 324 /*
320 * Look up the record in the by-size tree if necessary. 325 * Look up the record in the by-size tree if necessary.
@@ -323,13 +328,13 @@ xfs_alloc_fixup_trees(
323#ifdef DEBUG 328#ifdef DEBUG
324 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 329 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
325 return error; 330 return error;
326 XFS_WANT_CORRUPTED_RETURN( 331 XFS_WANT_CORRUPTED_RETURN(mp,
327 i == 1 && nfbno1 == fbno && nflen1 == flen); 332 i == 1 && nfbno1 == fbno && nflen1 == flen);
328#endif 333#endif
329 } else { 334 } else {
330 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 335 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
331 return error; 336 return error;
332 XFS_WANT_CORRUPTED_RETURN(i == 1); 337 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
333 } 338 }
334 /* 339 /*
335 * Look up the record in the by-block tree if necessary. 340 * Look up the record in the by-block tree if necessary.
@@ -338,13 +343,13 @@ xfs_alloc_fixup_trees(
338#ifdef DEBUG 343#ifdef DEBUG
339 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 344 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
340 return error; 345 return error;
341 XFS_WANT_CORRUPTED_RETURN( 346 XFS_WANT_CORRUPTED_RETURN(mp,
342 i == 1 && nfbno1 == fbno && nflen1 == flen); 347 i == 1 && nfbno1 == fbno && nflen1 == flen);
343#endif 348#endif
344 } else { 349 } else {
345 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 350 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
346 return error; 351 return error;
347 XFS_WANT_CORRUPTED_RETURN(i == 1); 352 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
348 } 353 }
349 354
350#ifdef DEBUG 355#ifdef DEBUG
@@ -355,7 +360,7 @@ xfs_alloc_fixup_trees(
355 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 360 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
356 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 361 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
357 362
358 XFS_WANT_CORRUPTED_RETURN( 363 XFS_WANT_CORRUPTED_RETURN(mp,
359 bnoblock->bb_numrecs == cntblock->bb_numrecs); 364 bnoblock->bb_numrecs == cntblock->bb_numrecs);
360 } 365 }
361#endif 366#endif
@@ -386,25 +391,25 @@ xfs_alloc_fixup_trees(
386 */ 391 */
387 if ((error = xfs_btree_delete(cnt_cur, &i))) 392 if ((error = xfs_btree_delete(cnt_cur, &i)))
388 return error; 393 return error;
389 XFS_WANT_CORRUPTED_RETURN(i == 1); 394 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
390 /* 395 /*
391 * Add new by-size btree entry(s). 396 * Add new by-size btree entry(s).
392 */ 397 */
393 if (nfbno1 != NULLAGBLOCK) { 398 if (nfbno1 != NULLAGBLOCK) {
394 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 399 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
395 return error; 400 return error;
396 XFS_WANT_CORRUPTED_RETURN(i == 0); 401 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
397 if ((error = xfs_btree_insert(cnt_cur, &i))) 402 if ((error = xfs_btree_insert(cnt_cur, &i)))
398 return error; 403 return error;
399 XFS_WANT_CORRUPTED_RETURN(i == 1); 404 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
400 } 405 }
401 if (nfbno2 != NULLAGBLOCK) { 406 if (nfbno2 != NULLAGBLOCK) {
402 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 407 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
403 return error; 408 return error;
404 XFS_WANT_CORRUPTED_RETURN(i == 0); 409 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
405 if ((error = xfs_btree_insert(cnt_cur, &i))) 410 if ((error = xfs_btree_insert(cnt_cur, &i)))
406 return error; 411 return error;
407 XFS_WANT_CORRUPTED_RETURN(i == 1); 412 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
408 } 413 }
409 /* 414 /*
410 * Fix up the by-block btree entry(s). 415 * Fix up the by-block btree entry(s).
@@ -415,7 +420,7 @@ xfs_alloc_fixup_trees(
415 */ 420 */
416 if ((error = xfs_btree_delete(bno_cur, &i))) 421 if ((error = xfs_btree_delete(bno_cur, &i)))
417 return error; 422 return error;
418 XFS_WANT_CORRUPTED_RETURN(i == 1); 423 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
419 } else { 424 } else {
420 /* 425 /*
421 * Update the by-block entry to start later|be shorter. 426 * Update the by-block entry to start later|be shorter.
@@ -429,10 +434,10 @@ xfs_alloc_fixup_trees(
429 */ 434 */
430 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 435 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
431 return error; 436 return error;
432 XFS_WANT_CORRUPTED_RETURN(i == 0); 437 XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
433 if ((error = xfs_btree_insert(bno_cur, &i))) 438 if ((error = xfs_btree_insert(bno_cur, &i)))
434 return error; 439 return error;
435 XFS_WANT_CORRUPTED_RETURN(i == 1); 440 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
436 } 441 }
437 return 0; 442 return 0;
438} 443}
@@ -682,7 +687,7 @@ xfs_alloc_ag_vextent_exact(
682 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 687 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
683 if (error) 688 if (error)
684 goto error0; 689 goto error0;
685 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 690 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
686 ASSERT(fbno <= args->agbno); 691 ASSERT(fbno <= args->agbno);
687 692
688 /* 693 /*
@@ -783,7 +788,7 @@ xfs_alloc_find_best_extent(
783 error = xfs_alloc_get_rec(*scur, sbno, slen, &i); 788 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
784 if (error) 789 if (error)
785 goto error0; 790 goto error0;
786 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 791 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
787 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); 792 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
788 793
789 /* 794 /*
@@ -946,7 +951,7 @@ restart:
946 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, 951 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
947 &ltlen, &i))) 952 &ltlen, &i)))
948 goto error0; 953 goto error0;
949 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 954 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
950 if (ltlen >= args->minlen) 955 if (ltlen >= args->minlen)
951 break; 956 break;
952 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 957 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
@@ -966,7 +971,7 @@ restart:
966 */ 971 */
967 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 972 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
968 goto error0; 973 goto error0;
969 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 974 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
970 xfs_alloc_compute_aligned(args, ltbno, ltlen, 975 xfs_alloc_compute_aligned(args, ltbno, ltlen,
971 &ltbnoa, &ltlena); 976 &ltbnoa, &ltlena);
972 if (ltlena < args->minlen) 977 if (ltlena < args->minlen)
@@ -999,7 +1004,7 @@ restart:
999 cnt_cur->bc_ptrs[0] = besti; 1004 cnt_cur->bc_ptrs[0] = besti;
1000 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 1005 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1001 goto error0; 1006 goto error0;
1002 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1007 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1003 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1008 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1004 args->len = blen; 1009 args->len = blen;
1005 if (!xfs_alloc_fix_minleft(args)) { 1010 if (!xfs_alloc_fix_minleft(args)) {
@@ -1088,7 +1093,7 @@ restart:
1088 if (bno_cur_lt) { 1093 if (bno_cur_lt) {
1089 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 1094 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1090 goto error0; 1095 goto error0;
1091 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1096 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1092 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1097 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1093 &ltbnoa, &ltlena); 1098 &ltbnoa, &ltlena);
1094 if (ltlena >= args->minlen) 1099 if (ltlena >= args->minlen)
@@ -1104,7 +1109,7 @@ restart:
1104 if (bno_cur_gt) { 1109 if (bno_cur_gt) {
1105 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 1110 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1106 goto error0; 1111 goto error0;
1107 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1112 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1108 xfs_alloc_compute_aligned(args, gtbno, gtlen, 1113 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1109 &gtbnoa, &gtlena); 1114 &gtbnoa, &gtlena);
1110 if (gtlena >= args->minlen) 1115 if (gtlena >= args->minlen)
@@ -1303,7 +1308,7 @@ restart:
1303 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1308 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1304 if (error) 1309 if (error)
1305 goto error0; 1310 goto error0;
1306 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1311 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1307 1312
1308 xfs_alloc_compute_aligned(args, fbno, flen, 1313 xfs_alloc_compute_aligned(args, fbno, flen,
1309 &rbno, &rlen); 1314 &rbno, &rlen);
@@ -1342,7 +1347,7 @@ restart:
1342 * This can't happen in the second case above. 1347 * This can't happen in the second case above.
1343 */ 1348 */
1344 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1349 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1345 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1350 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1346 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1351 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1347 if (rlen < args->maxlen) { 1352 if (rlen < args->maxlen) {
1348 xfs_agblock_t bestfbno; 1353 xfs_agblock_t bestfbno;
@@ -1362,13 +1367,13 @@ restart:
1362 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1367 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1363 &i))) 1368 &i)))
1364 goto error0; 1369 goto error0;
1365 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1370 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1366 if (flen < bestrlen) 1371 if (flen < bestrlen)
1367 break; 1372 break;
1368 xfs_alloc_compute_aligned(args, fbno, flen, 1373 xfs_alloc_compute_aligned(args, fbno, flen,
1369 &rbno, &rlen); 1374 &rbno, &rlen);
1370 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1375 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1371 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1376 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1372 (rlen <= flen && rbno + rlen <= fbno + flen), 1377 (rlen <= flen && rbno + rlen <= fbno + flen),
1373 error0); 1378 error0);
1374 if (rlen > bestrlen) { 1379 if (rlen > bestrlen) {
@@ -1383,7 +1388,7 @@ restart:
1383 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1388 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1384 &i))) 1389 &i)))
1385 goto error0; 1390 goto error0;
1386 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1391 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1387 rlen = bestrlen; 1392 rlen = bestrlen;
1388 rbno = bestrbno; 1393 rbno = bestrbno;
1389 flen = bestflen; 1394 flen = bestflen;
@@ -1408,7 +1413,7 @@ restart:
1408 if (!xfs_alloc_fix_minleft(args)) 1413 if (!xfs_alloc_fix_minleft(args))
1409 goto out_nominleft; 1414 goto out_nominleft;
1410 rlen = args->len; 1415 rlen = args->len;
1411 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1416 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
1412 /* 1417 /*
1413 * Allocate and initialize a cursor for the by-block tree. 1418 * Allocate and initialize a cursor for the by-block tree.
1414 */ 1419 */
@@ -1422,7 +1427,7 @@ restart:
1422 cnt_cur = bno_cur = NULL; 1427 cnt_cur = bno_cur = NULL;
1423 args->len = rlen; 1428 args->len = rlen;
1424 args->agbno = rbno; 1429 args->agbno = rbno;
1425 XFS_WANT_CORRUPTED_GOTO( 1430 XFS_WANT_CORRUPTED_GOTO(args->mp,
1426 args->agbno + args->len <= 1431 args->agbno + args->len <=
1427 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1432 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1428 error0); 1433 error0);
@@ -1467,7 +1472,7 @@ xfs_alloc_ag_vextent_small(
1467 if (i) { 1472 if (i) {
1468 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1473 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1469 goto error0; 1474 goto error0;
1470 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1475 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1471 } 1476 }
1472 /* 1477 /*
1473 * Nothing in the btree, try the freelist. Make sure 1478 * Nothing in the btree, try the freelist. Make sure
@@ -1493,7 +1498,7 @@ xfs_alloc_ag_vextent_small(
1493 } 1498 }
1494 args->len = 1; 1499 args->len = 1;
1495 args->agbno = fbno; 1500 args->agbno = fbno;
1496 XFS_WANT_CORRUPTED_GOTO( 1501 XFS_WANT_CORRUPTED_GOTO(args->mp,
1497 args->agbno + args->len <= 1502 args->agbno + args->len <=
1498 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1503 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1499 error0); 1504 error0);
@@ -1579,7 +1584,7 @@ xfs_free_ag_extent(
1579 */ 1584 */
1580 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i))) 1585 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1581 goto error0; 1586 goto error0;
1582 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1587 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1583 /* 1588 /*
1584 * It's not contiguous, though. 1589 * It's not contiguous, though.
1585 */ 1590 */
@@ -1591,7 +1596,8 @@ xfs_free_ag_extent(
1591 * space was invalid, it's (partly) already free. 1596 * space was invalid, it's (partly) already free.
1592 * Very bad. 1597 * Very bad.
1593 */ 1598 */
1594 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); 1599 XFS_WANT_CORRUPTED_GOTO(mp,
1600 ltbno + ltlen <= bno, error0);
1595 } 1601 }
1596 } 1602 }
1597 /* 1603 /*
@@ -1606,7 +1612,7 @@ xfs_free_ag_extent(
1606 */ 1612 */
1607 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i))) 1613 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1608 goto error0; 1614 goto error0;
1609 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1615 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1610 /* 1616 /*
1611 * It's not contiguous, though. 1617 * It's not contiguous, though.
1612 */ 1618 */
@@ -1618,7 +1624,7 @@ xfs_free_ag_extent(
1618 * space was invalid, it's (partly) already free. 1624 * space was invalid, it's (partly) already free.
1619 * Very bad. 1625 * Very bad.
1620 */ 1626 */
1621 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); 1627 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
1622 } 1628 }
1623 } 1629 }
1624 /* 1630 /*
@@ -1635,31 +1641,31 @@ xfs_free_ag_extent(
1635 */ 1641 */
1636 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1642 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1637 goto error0; 1643 goto error0;
1638 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1644 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1639 if ((error = xfs_btree_delete(cnt_cur, &i))) 1645 if ((error = xfs_btree_delete(cnt_cur, &i)))
1640 goto error0; 1646 goto error0;
1641 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1647 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1642 /* 1648 /*
1643 * Delete the old by-size entry on the right. 1649 * Delete the old by-size entry on the right.
1644 */ 1650 */
1645 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1651 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1646 goto error0; 1652 goto error0;
1647 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1653 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1648 if ((error = xfs_btree_delete(cnt_cur, &i))) 1654 if ((error = xfs_btree_delete(cnt_cur, &i)))
1649 goto error0; 1655 goto error0;
1650 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1656 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1651 /* 1657 /*
1652 * Delete the old by-block entry for the right block. 1658 * Delete the old by-block entry for the right block.
1653 */ 1659 */
1654 if ((error = xfs_btree_delete(bno_cur, &i))) 1660 if ((error = xfs_btree_delete(bno_cur, &i)))
1655 goto error0; 1661 goto error0;
1656 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1662 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1657 /* 1663 /*
1658 * Move the by-block cursor back to the left neighbor. 1664 * Move the by-block cursor back to the left neighbor.
1659 */ 1665 */
1660 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1666 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1661 goto error0; 1667 goto error0;
1662 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1668 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1663#ifdef DEBUG 1669#ifdef DEBUG
1664 /* 1670 /*
1665 * Check that this is the right record: delete didn't 1671 * Check that this is the right record: delete didn't
@@ -1672,7 +1678,7 @@ xfs_free_ag_extent(
1672 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1678 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1673 &i))) 1679 &i)))
1674 goto error0; 1680 goto error0;
1675 XFS_WANT_CORRUPTED_GOTO( 1681 XFS_WANT_CORRUPTED_GOTO(mp,
1676 i == 1 && xxbno == ltbno && xxlen == ltlen, 1682 i == 1 && xxbno == ltbno && xxlen == ltlen,
1677 error0); 1683 error0);
1678 } 1684 }
@@ -1695,17 +1701,17 @@ xfs_free_ag_extent(
1695 */ 1701 */
1696 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1702 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1697 goto error0; 1703 goto error0;
1698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1704 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1699 if ((error = xfs_btree_delete(cnt_cur, &i))) 1705 if ((error = xfs_btree_delete(cnt_cur, &i)))
1700 goto error0; 1706 goto error0;
1701 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1707 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1702 /* 1708 /*
1703 * Back up the by-block cursor to the left neighbor, and 1709 * Back up the by-block cursor to the left neighbor, and
1704 * update its length. 1710 * update its length.
1705 */ 1711 */
1706 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1712 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1707 goto error0; 1713 goto error0;
1708 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1714 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1709 nbno = ltbno; 1715 nbno = ltbno;
1710 nlen = len + ltlen; 1716 nlen = len + ltlen;
1711 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1717 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
@@ -1721,10 +1727,10 @@ xfs_free_ag_extent(
1721 */ 1727 */
1722 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1728 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1723 goto error0; 1729 goto error0;
1724 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1730 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1725 if ((error = xfs_btree_delete(cnt_cur, &i))) 1731 if ((error = xfs_btree_delete(cnt_cur, &i)))
1726 goto error0; 1732 goto error0;
1727 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1733 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1728 /* 1734 /*
1729 * Update the starting block and length of the right 1735 * Update the starting block and length of the right
1730 * neighbor in the by-block tree. 1736 * neighbor in the by-block tree.
@@ -1743,7 +1749,7 @@ xfs_free_ag_extent(
1743 nlen = len; 1749 nlen = len;
1744 if ((error = xfs_btree_insert(bno_cur, &i))) 1750 if ((error = xfs_btree_insert(bno_cur, &i)))
1745 goto error0; 1751 goto error0;
1746 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1752 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1747 } 1753 }
1748 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1754 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1749 bno_cur = NULL; 1755 bno_cur = NULL;
@@ -1752,10 +1758,10 @@ xfs_free_ag_extent(
1752 */ 1758 */
1753 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1759 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1754 goto error0; 1760 goto error0;
1755 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1761 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
1756 if ((error = xfs_btree_insert(cnt_cur, &i))) 1762 if ((error = xfs_btree_insert(cnt_cur, &i)))
1757 goto error0; 1763 goto error0;
1758 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1764 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1759 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1765 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1760 cnt_cur = NULL; 1766 cnt_cur = NULL;
1761 1767
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 15105dbc9e28..04e79d57bca6 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -86,8 +86,83 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
86 int move_count); 86 int move_count);
87STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); 87STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
88 88
89/*
90 * attr3 block 'firstused' conversion helpers.
91 *
92 * firstused refers to the offset of the first used byte of the nameval region
93 * of an attr leaf block. The region starts at the tail of the block and expands
94 * backwards towards the middle. As such, firstused is initialized to the block
95 * size for an empty leaf block and is reduced from there.
96 *
97 * The attr3 block size is pegged to the fsb size and the maximum fsb is 64k.
98 * The in-core firstused field is 32-bit and thus supports the maximum fsb size.
99 * The on-disk field is only 16-bit, however, and overflows at 64k. Since this
100 * only occurs at exactly 64k, we use zero as a magic on-disk value to represent
101 * the attr block size. The following helpers manage the conversion between the
102 * in-core and on-disk formats.
103 */
104
105static void
106xfs_attr3_leaf_firstused_from_disk(
107 struct xfs_da_geometry *geo,
108 struct xfs_attr3_icleaf_hdr *to,
109 struct xfs_attr_leafblock *from)
110{
111 struct xfs_attr3_leaf_hdr *hdr3;
112
113 if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
114 hdr3 = (struct xfs_attr3_leaf_hdr *) from;
115 to->firstused = be16_to_cpu(hdr3->firstused);
116 } else {
117 to->firstused = be16_to_cpu(from->hdr.firstused);
118 }
119
120 /*
121 * Convert from the magic fsb size value to actual blocksize. This
122 * should only occur for empty blocks when the block size overflows
123 * 16-bits.
124 */
125 if (to->firstused == XFS_ATTR3_LEAF_NULLOFF) {
126 ASSERT(!to->count && !to->usedbytes);
127 ASSERT(geo->blksize > USHRT_MAX);
128 to->firstused = geo->blksize;
129 }
130}
131
132static void
133xfs_attr3_leaf_firstused_to_disk(
134 struct xfs_da_geometry *geo,
135 struct xfs_attr_leafblock *to,
136 struct xfs_attr3_icleaf_hdr *from)
137{
138 struct xfs_attr3_leaf_hdr *hdr3;
139 uint32_t firstused;
140
141 /* magic value should only be seen on disk */
142 ASSERT(from->firstused != XFS_ATTR3_LEAF_NULLOFF);
143
144 /*
145 * Scale down the 32-bit in-core firstused value to the 16-bit on-disk
146 * value. This only overflows at the max supported value of 64k. Use the
147 * magic on-disk value to represent block size in this case.
148 */
149 firstused = from->firstused;
150 if (firstused > USHRT_MAX) {
151 ASSERT(from->firstused == geo->blksize);
152 firstused = XFS_ATTR3_LEAF_NULLOFF;
153 }
154
155 if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
156 hdr3 = (struct xfs_attr3_leaf_hdr *) to;
157 hdr3->firstused = cpu_to_be16(firstused);
158 } else {
159 to->hdr.firstused = cpu_to_be16(firstused);
160 }
161}
162
89void 163void
90xfs_attr3_leaf_hdr_from_disk( 164xfs_attr3_leaf_hdr_from_disk(
165 struct xfs_da_geometry *geo,
91 struct xfs_attr3_icleaf_hdr *to, 166 struct xfs_attr3_icleaf_hdr *to,
92 struct xfs_attr_leafblock *from) 167 struct xfs_attr_leafblock *from)
93{ 168{
@@ -104,7 +179,7 @@ xfs_attr3_leaf_hdr_from_disk(
104 to->magic = be16_to_cpu(hdr3->info.hdr.magic); 179 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
105 to->count = be16_to_cpu(hdr3->count); 180 to->count = be16_to_cpu(hdr3->count);
106 to->usedbytes = be16_to_cpu(hdr3->usedbytes); 181 to->usedbytes = be16_to_cpu(hdr3->usedbytes);
107 to->firstused = be16_to_cpu(hdr3->firstused); 182 xfs_attr3_leaf_firstused_from_disk(geo, to, from);
108 to->holes = hdr3->holes; 183 to->holes = hdr3->holes;
109 184
110 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 185 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -118,7 +193,7 @@ xfs_attr3_leaf_hdr_from_disk(
118 to->magic = be16_to_cpu(from->hdr.info.magic); 193 to->magic = be16_to_cpu(from->hdr.info.magic);
119 to->count = be16_to_cpu(from->hdr.count); 194 to->count = be16_to_cpu(from->hdr.count);
120 to->usedbytes = be16_to_cpu(from->hdr.usedbytes); 195 to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
121 to->firstused = be16_to_cpu(from->hdr.firstused); 196 xfs_attr3_leaf_firstused_from_disk(geo, to, from);
122 to->holes = from->hdr.holes; 197 to->holes = from->hdr.holes;
123 198
124 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 199 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -129,10 +204,11 @@ xfs_attr3_leaf_hdr_from_disk(
129 204
130void 205void
131xfs_attr3_leaf_hdr_to_disk( 206xfs_attr3_leaf_hdr_to_disk(
207 struct xfs_da_geometry *geo,
132 struct xfs_attr_leafblock *to, 208 struct xfs_attr_leafblock *to,
133 struct xfs_attr3_icleaf_hdr *from) 209 struct xfs_attr3_icleaf_hdr *from)
134{ 210{
135 int i; 211 int i;
136 212
137 ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC || 213 ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
138 from->magic == XFS_ATTR3_LEAF_MAGIC); 214 from->magic == XFS_ATTR3_LEAF_MAGIC);
@@ -145,7 +221,7 @@ xfs_attr3_leaf_hdr_to_disk(
145 hdr3->info.hdr.magic = cpu_to_be16(from->magic); 221 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
146 hdr3->count = cpu_to_be16(from->count); 222 hdr3->count = cpu_to_be16(from->count);
147 hdr3->usedbytes = cpu_to_be16(from->usedbytes); 223 hdr3->usedbytes = cpu_to_be16(from->usedbytes);
148 hdr3->firstused = cpu_to_be16(from->firstused); 224 xfs_attr3_leaf_firstused_to_disk(geo, to, from);
149 hdr3->holes = from->holes; 225 hdr3->holes = from->holes;
150 hdr3->pad1 = 0; 226 hdr3->pad1 = 0;
151 227
@@ -160,7 +236,7 @@ xfs_attr3_leaf_hdr_to_disk(
160 to->hdr.info.magic = cpu_to_be16(from->magic); 236 to->hdr.info.magic = cpu_to_be16(from->magic);
161 to->hdr.count = cpu_to_be16(from->count); 237 to->hdr.count = cpu_to_be16(from->count);
162 to->hdr.usedbytes = cpu_to_be16(from->usedbytes); 238 to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
163 to->hdr.firstused = cpu_to_be16(from->firstused); 239 xfs_attr3_leaf_firstused_to_disk(geo, to, from);
164 to->hdr.holes = from->holes; 240 to->hdr.holes = from->holes;
165 to->hdr.pad1 = 0; 241 to->hdr.pad1 = 0;
166 242
@@ -178,7 +254,7 @@ xfs_attr3_leaf_verify(
178 struct xfs_attr_leafblock *leaf = bp->b_addr; 254 struct xfs_attr_leafblock *leaf = bp->b_addr;
179 struct xfs_attr3_icleaf_hdr ichdr; 255 struct xfs_attr3_icleaf_hdr ichdr;
180 256
181 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 257 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
182 258
183 if (xfs_sb_version_hascrc(&mp->m_sb)) { 259 if (xfs_sb_version_hascrc(&mp->m_sb)) {
184 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 260 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
@@ -757,9 +833,10 @@ xfs_attr_shortform_allfit(
757 struct xfs_attr3_icleaf_hdr leafhdr; 833 struct xfs_attr3_icleaf_hdr leafhdr;
758 int bytes; 834 int bytes;
759 int i; 835 int i;
836 struct xfs_mount *mp = bp->b_target->bt_mount;
760 837
761 leaf = bp->b_addr; 838 leaf = bp->b_addr;
762 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); 839 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
763 entry = xfs_attr3_leaf_entryp(leaf); 840 entry = xfs_attr3_leaf_entryp(leaf);
764 841
765 bytes = sizeof(struct xfs_attr_sf_hdr); 842 bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -812,7 +889,7 @@ xfs_attr3_leaf_to_shortform(
812 memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); 889 memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
813 890
814 leaf = (xfs_attr_leafblock_t *)tmpbuffer; 891 leaf = (xfs_attr_leafblock_t *)tmpbuffer;
815 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 892 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
816 entry = xfs_attr3_leaf_entryp(leaf); 893 entry = xfs_attr3_leaf_entryp(leaf);
817 894
818 /* XXX (dgc): buffer is about to be marked stale - why zero it? */ 895 /* XXX (dgc): buffer is about to be marked stale - why zero it? */
@@ -923,7 +1000,7 @@ xfs_attr3_leaf_to_node(
923 btree = dp->d_ops->node_tree_p(node); 1000 btree = dp->d_ops->node_tree_p(node);
924 1001
925 leaf = bp2->b_addr; 1002 leaf = bp2->b_addr;
926 xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); 1003 xfs_attr3_leaf_hdr_from_disk(args->geo, &icleafhdr, leaf);
927 entries = xfs_attr3_leaf_entryp(leaf); 1004 entries = xfs_attr3_leaf_entryp(leaf);
928 1005
929 /* both on-disk, don't endian-flip twice */ 1006 /* both on-disk, don't endian-flip twice */
@@ -988,7 +1065,7 @@ xfs_attr3_leaf_create(
988 } 1065 }
989 ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base; 1066 ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
990 1067
991 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); 1068 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
992 xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1); 1069 xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
993 1070
994 *bpp = bp; 1071 *bpp = bp;
@@ -1073,7 +1150,7 @@ xfs_attr3_leaf_add(
1073 trace_xfs_attr_leaf_add(args); 1150 trace_xfs_attr_leaf_add(args);
1074 1151
1075 leaf = bp->b_addr; 1152 leaf = bp->b_addr;
1076 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 1153 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
1077 ASSERT(args->index >= 0 && args->index <= ichdr.count); 1154 ASSERT(args->index >= 0 && args->index <= ichdr.count);
1078 entsize = xfs_attr_leaf_newentsize(args, NULL); 1155 entsize = xfs_attr_leaf_newentsize(args, NULL);
1079 1156
@@ -1126,7 +1203,7 @@ xfs_attr3_leaf_add(
1126 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); 1203 tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
1127 1204
1128out_log_hdr: 1205out_log_hdr:
1129 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); 1206 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
1130 xfs_trans_log_buf(args->trans, bp, 1207 xfs_trans_log_buf(args->trans, bp,
1131 XFS_DA_LOGRANGE(leaf, &leaf->hdr, 1208 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
1132 xfs_attr3_leaf_hdr_size(leaf))); 1209 xfs_attr3_leaf_hdr_size(leaf)));
@@ -1294,7 +1371,7 @@ xfs_attr3_leaf_compact(
1294 ichdr_dst->freemap[0].base; 1371 ichdr_dst->freemap[0].base;
1295 1372
1296 /* write the header back to initialise the underlying buffer */ 1373 /* write the header back to initialise the underlying buffer */
1297 xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst); 1374 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst);
1298 1375
1299 /* 1376 /*
1300 * Copy all entry's in the same (sorted) order, 1377 * Copy all entry's in the same (sorted) order,
@@ -1344,9 +1421,10 @@ xfs_attr_leaf_order(
1344{ 1421{
1345 struct xfs_attr3_icleaf_hdr ichdr1; 1422 struct xfs_attr3_icleaf_hdr ichdr1;
1346 struct xfs_attr3_icleaf_hdr ichdr2; 1423 struct xfs_attr3_icleaf_hdr ichdr2;
1424 struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
1347 1425
1348 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr); 1426 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
1349 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr); 1427 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
1350 return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2); 1428 return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
1351} 1429}
1352 1430
@@ -1388,8 +1466,8 @@ xfs_attr3_leaf_rebalance(
1388 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); 1466 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
1389 leaf1 = blk1->bp->b_addr; 1467 leaf1 = blk1->bp->b_addr;
1390 leaf2 = blk2->bp->b_addr; 1468 leaf2 = blk2->bp->b_addr;
1391 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); 1469 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr1, leaf1);
1392 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); 1470 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, leaf2);
1393 ASSERT(ichdr2.count == 0); 1471 ASSERT(ichdr2.count == 0);
1394 args = state->args; 1472 args = state->args;
1395 1473
@@ -1490,8 +1568,8 @@ xfs_attr3_leaf_rebalance(
1490 ichdr1.count, count); 1568 ichdr1.count, count);
1491 } 1569 }
1492 1570
1493 xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1); 1571 xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf1, &ichdr1);
1494 xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); 1572 xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf2, &ichdr2);
1495 xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); 1573 xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
1496 xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1); 1574 xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
1497 1575
@@ -1684,7 +1762,7 @@ xfs_attr3_leaf_toosmall(
1684 */ 1762 */
1685 blk = &state->path.blk[ state->path.active-1 ]; 1763 blk = &state->path.blk[ state->path.active-1 ];
1686 leaf = blk->bp->b_addr; 1764 leaf = blk->bp->b_addr;
1687 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 1765 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf);
1688 bytes = xfs_attr3_leaf_hdr_size(leaf) + 1766 bytes = xfs_attr3_leaf_hdr_size(leaf) +
1689 ichdr.count * sizeof(xfs_attr_leaf_entry_t) + 1767 ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
1690 ichdr.usedbytes; 1768 ichdr.usedbytes;
@@ -1740,7 +1818,7 @@ xfs_attr3_leaf_toosmall(
1740 if (error) 1818 if (error)
1741 return error; 1819 return error;
1742 1820
1743 xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); 1821 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, bp->b_addr);
1744 1822
1745 bytes = state->args->geo->blksize - 1823 bytes = state->args->geo->blksize -
1746 (state->args->geo->blksize >> 2) - 1824 (state->args->geo->blksize >> 2) -
@@ -1805,7 +1883,7 @@ xfs_attr3_leaf_remove(
1805 trace_xfs_attr_leaf_remove(args); 1883 trace_xfs_attr_leaf_remove(args);
1806 1884
1807 leaf = bp->b_addr; 1885 leaf = bp->b_addr;
1808 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 1886 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
1809 1887
1810 ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8); 1888 ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
1811 ASSERT(args->index >= 0 && args->index < ichdr.count); 1889 ASSERT(args->index >= 0 && args->index < ichdr.count);
@@ -1918,12 +1996,11 @@ xfs_attr3_leaf_remove(
1918 tmp = be16_to_cpu(entry->nameidx); 1996 tmp = be16_to_cpu(entry->nameidx);
1919 } 1997 }
1920 ichdr.firstused = tmp; 1998 ichdr.firstused = tmp;
1921 if (!ichdr.firstused) 1999 ASSERT(ichdr.firstused != 0);
1922 ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
1923 } else { 2000 } else {
1924 ichdr.holes = 1; /* mark as needing compaction */ 2001 ichdr.holes = 1; /* mark as needing compaction */
1925 } 2002 }
1926 xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); 2003 xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
1927 xfs_trans_log_buf(args->trans, bp, 2004 xfs_trans_log_buf(args->trans, bp,
1928 XFS_DA_LOGRANGE(leaf, &leaf->hdr, 2005 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
1929 xfs_attr3_leaf_hdr_size(leaf))); 2006 xfs_attr3_leaf_hdr_size(leaf)));
@@ -1957,8 +2034,8 @@ xfs_attr3_leaf_unbalance(
1957 2034
1958 drop_leaf = drop_blk->bp->b_addr; 2035 drop_leaf = drop_blk->bp->b_addr;
1959 save_leaf = save_blk->bp->b_addr; 2036 save_leaf = save_blk->bp->b_addr;
1960 xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf); 2037 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
1961 xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf); 2038 xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
1962 entry = xfs_attr3_leaf_entryp(drop_leaf); 2039 entry = xfs_attr3_leaf_entryp(drop_leaf);
1963 2040
1964 /* 2041 /*
@@ -2012,7 +2089,7 @@ xfs_attr3_leaf_unbalance(
2012 tmphdr.firstused = state->args->geo->blksize; 2089 tmphdr.firstused = state->args->geo->blksize;
2013 2090
2014 /* write the header to the temp buffer to initialise it */ 2091 /* write the header to the temp buffer to initialise it */
2015 xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr); 2092 xfs_attr3_leaf_hdr_to_disk(state->args->geo, tmp_leaf, &tmphdr);
2016 2093
2017 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, 2094 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
2018 drop_blk->bp, &drophdr)) { 2095 drop_blk->bp, &drophdr)) {
@@ -2039,7 +2116,7 @@ xfs_attr3_leaf_unbalance(
2039 kmem_free(tmp_leaf); 2116 kmem_free(tmp_leaf);
2040 } 2117 }
2041 2118
2042 xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr); 2119 xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
2043 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, 2120 xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
2044 state->args->geo->blksize - 1); 2121 state->args->geo->blksize - 1);
2045 2122
@@ -2085,7 +2162,7 @@ xfs_attr3_leaf_lookup_int(
2085 trace_xfs_attr_leaf_lookup(args); 2162 trace_xfs_attr_leaf_lookup(args);
2086 2163
2087 leaf = bp->b_addr; 2164 leaf = bp->b_addr;
2088 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2165 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2089 entries = xfs_attr3_leaf_entryp(leaf); 2166 entries = xfs_attr3_leaf_entryp(leaf);
2090 ASSERT(ichdr.count < args->geo->blksize / 8); 2167 ASSERT(ichdr.count < args->geo->blksize / 8);
2091 2168
@@ -2190,7 +2267,7 @@ xfs_attr3_leaf_getvalue(
2190 int valuelen; 2267 int valuelen;
2191 2268
2192 leaf = bp->b_addr; 2269 leaf = bp->b_addr;
2193 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2270 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2194 ASSERT(ichdr.count < args->geo->blksize / 8); 2271 ASSERT(ichdr.count < args->geo->blksize / 8);
2195 ASSERT(args->index < ichdr.count); 2272 ASSERT(args->index < ichdr.count);
2196 2273
@@ -2391,8 +2468,9 @@ xfs_attr_leaf_lasthash(
2391{ 2468{
2392 struct xfs_attr3_icleaf_hdr ichdr; 2469 struct xfs_attr3_icleaf_hdr ichdr;
2393 struct xfs_attr_leaf_entry *entries; 2470 struct xfs_attr_leaf_entry *entries;
2471 struct xfs_mount *mp = bp->b_target->bt_mount;
2394 2472
2395 xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr); 2473 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
2396 entries = xfs_attr3_leaf_entryp(bp->b_addr); 2474 entries = xfs_attr3_leaf_entryp(bp->b_addr);
2397 if (count) 2475 if (count)
2398 *count = ichdr.count; 2476 *count = ichdr.count;
@@ -2486,7 +2564,7 @@ xfs_attr3_leaf_clearflag(
2486 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE); 2564 ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
2487 2565
2488#ifdef DEBUG 2566#ifdef DEBUG
2489 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2567 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2490 ASSERT(args->index < ichdr.count); 2568 ASSERT(args->index < ichdr.count);
2491 ASSERT(args->index >= 0); 2569 ASSERT(args->index >= 0);
2492 2570
@@ -2550,7 +2628,7 @@ xfs_attr3_leaf_setflag(
2550 2628
2551 leaf = bp->b_addr; 2629 leaf = bp->b_addr;
2552#ifdef DEBUG 2630#ifdef DEBUG
2553 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 2631 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
2554 ASSERT(args->index < ichdr.count); 2632 ASSERT(args->index < ichdr.count);
2555 ASSERT(args->index >= 0); 2633 ASSERT(args->index >= 0);
2556#endif 2634#endif
@@ -2629,11 +2707,11 @@ xfs_attr3_leaf_flipflags(
2629 entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2]; 2707 entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
2630 2708
2631#ifdef DEBUG 2709#ifdef DEBUG
2632 xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1); 2710 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr1, leaf1);
2633 ASSERT(args->index < ichdr1.count); 2711 ASSERT(args->index < ichdr1.count);
2634 ASSERT(args->index >= 0); 2712 ASSERT(args->index >= 0);
2635 2713
2636 xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2); 2714 xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr2, leaf2);
2637 ASSERT(args->index2 < ichdr2.count); 2715 ASSERT(args->index2 < ichdr2.count);
2638 ASSERT(args->index2 >= 0); 2716 ASSERT(args->index2 >= 0);
2639 2717
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index e2929da7c3ba..025c4b820c03 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -100,9 +100,11 @@ int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
100int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, 100int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
101 xfs_dablk_t bno, xfs_daddr_t mappedbno, 101 xfs_dablk_t bno, xfs_daddr_t mappedbno,
102 struct xfs_buf **bpp); 102 struct xfs_buf **bpp);
103void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to, 103void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
104 struct xfs_attr3_icleaf_hdr *to,
104 struct xfs_attr_leafblock *from); 105 struct xfs_attr_leafblock *from);
105void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to, 106void xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo,
107 struct xfs_attr_leafblock *to,
106 struct xfs_attr3_icleaf_hdr *from); 108 struct xfs_attr3_icleaf_hdr *from);
107 109
108#endif /* __XFS_ATTR_LEAF_H__ */ 110#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 61ec015dca16..aeffeaaac0ec 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset(
244 } 244 }
245} 245}
246 246
247/*
248 * Debug/sanity checking code
249 */
250
251STATIC int
252xfs_bmap_sanity_check(
253 struct xfs_mount *mp,
254 struct xfs_buf *bp,
255 int level)
256{
257 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
258
259 if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
260 block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
261 return 0;
262
263 if (be16_to_cpu(block->bb_level) != level ||
264 be16_to_cpu(block->bb_numrecs) == 0 ||
265 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
266 return 0;
267
268 return 1;
269}
270
271#ifdef DEBUG 247#ifdef DEBUG
272STATIC struct xfs_buf * 248STATIC struct xfs_buf *
273xfs_bmap_get_bp( 249xfs_bmap_get_bp(
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents(
410 goto error_norelse; 386 goto error_norelse;
411 } 387 }
412 block = XFS_BUF_TO_BLOCK(bp); 388 block = XFS_BUF_TO_BLOCK(bp);
413 XFS_WANT_CORRUPTED_GOTO(
414 xfs_bmap_sanity_check(mp, bp, level),
415 error0);
416 if (level == 0) 389 if (level == 0)
417 break; 390 break;
418 391
@@ -424,7 +397,8 @@ xfs_bmap_check_leaf_extents(
424 xfs_check_block(block, mp, 0, 0); 397 xfs_check_block(block, mp, 0, 0);
425 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 398 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
426 bno = be64_to_cpu(*pp); 399 bno = be64_to_cpu(*pp);
427 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 400 XFS_WANT_CORRUPTED_GOTO(mp,
401 XFS_FSB_SANITY_CHECK(mp, bno), error0);
428 if (bp_release) { 402 if (bp_release) {
429 bp_release = 0; 403 bp_release = 0;
430 xfs_trans_brelse(NULL, bp); 404 xfs_trans_brelse(NULL, bp);
@@ -1029,7 +1003,7 @@ xfs_bmap_add_attrfork_btree(
1029 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 1003 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
1030 goto error0; 1004 goto error0;
1031 /* must be at least one entry */ 1005 /* must be at least one entry */
1032 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); 1006 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
1033 if ((error = xfs_btree_new_iroot(cur, flags, &stat))) 1007 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
1034 goto error0; 1008 goto error0;
1035 if (stat == 0) { 1009 if (stat == 0) {
@@ -1311,14 +1285,12 @@ xfs_bmap_read_extents(
1311 if (error) 1285 if (error)
1312 return error; 1286 return error;
1313 block = XFS_BUF_TO_BLOCK(bp); 1287 block = XFS_BUF_TO_BLOCK(bp);
1314 XFS_WANT_CORRUPTED_GOTO(
1315 xfs_bmap_sanity_check(mp, bp, level),
1316 error0);
1317 if (level == 0) 1288 if (level == 0)
1318 break; 1289 break;
1319 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1290 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1320 bno = be64_to_cpu(*pp); 1291 bno = be64_to_cpu(*pp);
1321 XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); 1292 XFS_WANT_CORRUPTED_GOTO(mp,
1293 XFS_FSB_SANITY_CHECK(mp, bno), error0);
1322 xfs_trans_brelse(tp, bp); 1294 xfs_trans_brelse(tp, bp);
1323 } 1295 }
1324 /* 1296 /*
@@ -1345,9 +1317,6 @@ xfs_bmap_read_extents(
1345 XFS_ERRLEVEL_LOW, ip->i_mount, block); 1317 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1346 goto error0; 1318 goto error0;
1347 } 1319 }
1348 XFS_WANT_CORRUPTED_GOTO(
1349 xfs_bmap_sanity_check(mp, bp, 0),
1350 error0);
1351 /* 1320 /*
1352 * Read-ahead the next leaf block, if any. 1321 * Read-ahead the next leaf block, if any.
1353 */ 1322 */
@@ -1755,7 +1724,9 @@ xfs_bmap_add_extent_delay_real(
1755 xfs_filblks_t temp=0; /* value for da_new calculations */ 1724 xfs_filblks_t temp=0; /* value for da_new calculations */
1756 xfs_filblks_t temp2=0;/* value for da_new calculations */ 1725 xfs_filblks_t temp2=0;/* value for da_new calculations */
1757 int tmp_rval; /* partial logging flags */ 1726 int tmp_rval; /* partial logging flags */
1727 struct xfs_mount *mp;
1758 1728
1729 mp = bma->tp ? bma->tp->t_mountp : NULL;
1759 ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); 1730 ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
1760 1731
1761 ASSERT(bma->idx >= 0); 1732 ASSERT(bma->idx >= 0);
@@ -1866,15 +1837,15 @@ xfs_bmap_add_extent_delay_real(
1866 RIGHT.br_blockcount, &i); 1837 RIGHT.br_blockcount, &i);
1867 if (error) 1838 if (error)
1868 goto done; 1839 goto done;
1869 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1840 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1870 error = xfs_btree_delete(bma->cur, &i); 1841 error = xfs_btree_delete(bma->cur, &i);
1871 if (error) 1842 if (error)
1872 goto done; 1843 goto done;
1873 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1844 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1874 error = xfs_btree_decrement(bma->cur, 0, &i); 1845 error = xfs_btree_decrement(bma->cur, 0, &i);
1875 if (error) 1846 if (error)
1876 goto done; 1847 goto done;
1877 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1848 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1878 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1849 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1879 LEFT.br_startblock, 1850 LEFT.br_startblock,
1880 LEFT.br_blockcount + 1851 LEFT.br_blockcount +
@@ -1907,7 +1878,7 @@ xfs_bmap_add_extent_delay_real(
1907 &i); 1878 &i);
1908 if (error) 1879 if (error)
1909 goto done; 1880 goto done;
1910 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1881 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1911 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1882 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1912 LEFT.br_startblock, 1883 LEFT.br_startblock,
1913 LEFT.br_blockcount + 1884 LEFT.br_blockcount +
@@ -1938,7 +1909,7 @@ xfs_bmap_add_extent_delay_real(
1938 RIGHT.br_blockcount, &i); 1909 RIGHT.br_blockcount, &i);
1939 if (error) 1910 if (error)
1940 goto done; 1911 goto done;
1941 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1912 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1942 error = xfs_bmbt_update(bma->cur, PREV.br_startoff, 1913 error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1943 new->br_startblock, 1914 new->br_startblock,
1944 PREV.br_blockcount + 1915 PREV.br_blockcount +
@@ -1968,12 +1939,12 @@ xfs_bmap_add_extent_delay_real(
1968 &i); 1939 &i);
1969 if (error) 1940 if (error)
1970 goto done; 1941 goto done;
1971 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 1942 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1972 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 1943 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1973 error = xfs_btree_insert(bma->cur, &i); 1944 error = xfs_btree_insert(bma->cur, &i);
1974 if (error) 1945 if (error)
1975 goto done; 1946 goto done;
1976 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1947 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1977 } 1948 }
1978 break; 1949 break;
1979 1950
@@ -2001,7 +1972,7 @@ xfs_bmap_add_extent_delay_real(
2001 &i); 1972 &i);
2002 if (error) 1973 if (error)
2003 goto done; 1974 goto done;
2004 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 1975 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2005 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff, 1976 error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
2006 LEFT.br_startblock, 1977 LEFT.br_startblock,
2007 LEFT.br_blockcount + 1978 LEFT.br_blockcount +
@@ -2038,12 +2009,12 @@ xfs_bmap_add_extent_delay_real(
2038 &i); 2009 &i);
2039 if (error) 2010 if (error)
2040 goto done; 2011 goto done;
2041 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2012 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2042 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 2013 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2043 error = xfs_btree_insert(bma->cur, &i); 2014 error = xfs_btree_insert(bma->cur, &i);
2044 if (error) 2015 if (error)
2045 goto done; 2016 goto done;
2046 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2017 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2047 } 2018 }
2048 2019
2049 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2020 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2084,7 +2055,7 @@ xfs_bmap_add_extent_delay_real(
2084 RIGHT.br_blockcount, &i); 2055 RIGHT.br_blockcount, &i);
2085 if (error) 2056 if (error)
2086 goto done; 2057 goto done;
2087 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2058 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2088 error = xfs_bmbt_update(bma->cur, new->br_startoff, 2059 error = xfs_bmbt_update(bma->cur, new->br_startoff,
2089 new->br_startblock, 2060 new->br_startblock,
2090 new->br_blockcount + 2061 new->br_blockcount +
@@ -2122,12 +2093,12 @@ xfs_bmap_add_extent_delay_real(
2122 &i); 2093 &i);
2123 if (error) 2094 if (error)
2124 goto done; 2095 goto done;
2125 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2096 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2126 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 2097 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2127 error = xfs_btree_insert(bma->cur, &i); 2098 error = xfs_btree_insert(bma->cur, &i);
2128 if (error) 2099 if (error)
2129 goto done; 2100 goto done;
2130 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2101 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2131 } 2102 }
2132 2103
2133 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2104 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2191,12 +2162,12 @@ xfs_bmap_add_extent_delay_real(
2191 &i); 2162 &i);
2192 if (error) 2163 if (error)
2193 goto done; 2164 goto done;
2194 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2165 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2195 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM; 2166 bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2196 error = xfs_btree_insert(bma->cur, &i); 2167 error = xfs_btree_insert(bma->cur, &i);
2197 if (error) 2168 if (error)
2198 goto done; 2169 goto done;
2199 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2170 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2200 } 2171 }
2201 2172
2202 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2173 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2212,9 +2183,8 @@ xfs_bmap_add_extent_delay_real(
2212 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - 2183 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2213 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 2184 (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2214 if (diff > 0) { 2185 if (diff > 0) {
2215 error = xfs_icsb_modify_counters(bma->ip->i_mount, 2186 error = xfs_mod_fdblocks(bma->ip->i_mount,
2216 XFS_SBS_FDBLOCKS, 2187 -((int64_t)diff), false);
2217 -((int64_t)diff), 0);
2218 ASSERT(!error); 2188 ASSERT(!error);
2219 if (error) 2189 if (error)
2220 goto done; 2190 goto done;
@@ -2265,9 +2235,8 @@ xfs_bmap_add_extent_delay_real(
2265 temp += bma->cur->bc_private.b.allocated; 2235 temp += bma->cur->bc_private.b.allocated;
2266 ASSERT(temp <= da_old); 2236 ASSERT(temp <= da_old);
2267 if (temp < da_old) 2237 if (temp < da_old)
2268 xfs_icsb_modify_counters(bma->ip->i_mount, 2238 xfs_mod_fdblocks(bma->ip->i_mount,
2269 XFS_SBS_FDBLOCKS, 2239 (int64_t)(da_old - temp), false);
2270 (int64_t)(da_old - temp), 0);
2271 } 2240 }
2272 2241
2273 /* clear out the allocated field, done with it now in any case. */ 2242 /* clear out the allocated field, done with it now in any case. */
@@ -2309,6 +2278,7 @@ xfs_bmap_add_extent_unwritten_real(
2309 /* left is 0, right is 1, prev is 2 */ 2278 /* left is 0, right is 1, prev is 2 */
2310 int rval=0; /* return value (logging flags) */ 2279 int rval=0; /* return value (logging flags) */
2311 int state = 0;/* state bits, accessed thru macros */ 2280 int state = 0;/* state bits, accessed thru macros */
2281 struct xfs_mount *mp = tp->t_mountp;
2312 2282
2313 *logflagsp = 0; 2283 *logflagsp = 0;
2314 2284
@@ -2421,19 +2391,19 @@ xfs_bmap_add_extent_unwritten_real(
2421 RIGHT.br_startblock, 2391 RIGHT.br_startblock,
2422 RIGHT.br_blockcount, &i))) 2392 RIGHT.br_blockcount, &i)))
2423 goto done; 2393 goto done;
2424 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2394 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2425 if ((error = xfs_btree_delete(cur, &i))) 2395 if ((error = xfs_btree_delete(cur, &i)))
2426 goto done; 2396 goto done;
2427 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2397 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2428 if ((error = xfs_btree_decrement(cur, 0, &i))) 2398 if ((error = xfs_btree_decrement(cur, 0, &i)))
2429 goto done; 2399 goto done;
2430 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2400 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2431 if ((error = xfs_btree_delete(cur, &i))) 2401 if ((error = xfs_btree_delete(cur, &i)))
2432 goto done; 2402 goto done;
2433 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2403 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2434 if ((error = xfs_btree_decrement(cur, 0, &i))) 2404 if ((error = xfs_btree_decrement(cur, 0, &i)))
2435 goto done; 2405 goto done;
2436 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2406 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2407 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2438 LEFT.br_startblock, 2408 LEFT.br_startblock,
2439 LEFT.br_blockcount + PREV.br_blockcount + 2409 LEFT.br_blockcount + PREV.br_blockcount +
@@ -2464,13 +2434,13 @@ xfs_bmap_add_extent_unwritten_real(
2464 PREV.br_startblock, PREV.br_blockcount, 2434 PREV.br_startblock, PREV.br_blockcount,
2465 &i))) 2435 &i)))
2466 goto done; 2436 goto done;
2467 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2437 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2468 if ((error = xfs_btree_delete(cur, &i))) 2438 if ((error = xfs_btree_delete(cur, &i)))
2469 goto done; 2439 goto done;
2470 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2440 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2471 if ((error = xfs_btree_decrement(cur, 0, &i))) 2441 if ((error = xfs_btree_decrement(cur, 0, &i)))
2472 goto done; 2442 goto done;
2473 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2443 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2474 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 2444 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2475 LEFT.br_startblock, 2445 LEFT.br_startblock,
2476 LEFT.br_blockcount + PREV.br_blockcount, 2446 LEFT.br_blockcount + PREV.br_blockcount,
@@ -2499,13 +2469,13 @@ xfs_bmap_add_extent_unwritten_real(
2499 RIGHT.br_startblock, 2469 RIGHT.br_startblock,
2500 RIGHT.br_blockcount, &i))) 2470 RIGHT.br_blockcount, &i)))
2501 goto done; 2471 goto done;
2502 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2472 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2503 if ((error = xfs_btree_delete(cur, &i))) 2473 if ((error = xfs_btree_delete(cur, &i)))
2504 goto done; 2474 goto done;
2505 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2475 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2506 if ((error = xfs_btree_decrement(cur, 0, &i))) 2476 if ((error = xfs_btree_decrement(cur, 0, &i)))
2507 goto done; 2477 goto done;
2508 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2478 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2509 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2479 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2510 new->br_startblock, 2480 new->br_startblock,
2511 new->br_blockcount + RIGHT.br_blockcount, 2481 new->br_blockcount + RIGHT.br_blockcount,
@@ -2532,7 +2502,7 @@ xfs_bmap_add_extent_unwritten_real(
2532 new->br_startblock, new->br_blockcount, 2502 new->br_startblock, new->br_blockcount,
2533 &i))) 2503 &i)))
2534 goto done; 2504 goto done;
2535 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2505 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2536 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2506 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2537 new->br_startblock, new->br_blockcount, 2507 new->br_startblock, new->br_blockcount,
2538 newext))) 2508 newext)))
@@ -2569,7 +2539,7 @@ xfs_bmap_add_extent_unwritten_real(
2569 PREV.br_startblock, PREV.br_blockcount, 2539 PREV.br_startblock, PREV.br_blockcount,
2570 &i))) 2540 &i)))
2571 goto done; 2541 goto done;
2572 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2542 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2573 if ((error = xfs_bmbt_update(cur, 2543 if ((error = xfs_bmbt_update(cur,
2574 PREV.br_startoff + new->br_blockcount, 2544 PREV.br_startoff + new->br_blockcount,
2575 PREV.br_startblock + new->br_blockcount, 2545 PREV.br_startblock + new->br_blockcount,
@@ -2611,7 +2581,7 @@ xfs_bmap_add_extent_unwritten_real(
2611 PREV.br_startblock, PREV.br_blockcount, 2581 PREV.br_startblock, PREV.br_blockcount,
2612 &i))) 2582 &i)))
2613 goto done; 2583 goto done;
2614 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2584 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2615 if ((error = xfs_bmbt_update(cur, 2585 if ((error = xfs_bmbt_update(cur,
2616 PREV.br_startoff + new->br_blockcount, 2586 PREV.br_startoff + new->br_blockcount,
2617 PREV.br_startblock + new->br_blockcount, 2587 PREV.br_startblock + new->br_blockcount,
@@ -2621,7 +2591,7 @@ xfs_bmap_add_extent_unwritten_real(
2621 cur->bc_rec.b = *new; 2591 cur->bc_rec.b = *new;
2622 if ((error = xfs_btree_insert(cur, &i))) 2592 if ((error = xfs_btree_insert(cur, &i)))
2623 goto done; 2593 goto done;
2624 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2594 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2625 } 2595 }
2626 break; 2596 break;
2627 2597
@@ -2651,7 +2621,7 @@ xfs_bmap_add_extent_unwritten_real(
2651 PREV.br_startblock, 2621 PREV.br_startblock,
2652 PREV.br_blockcount, &i))) 2622 PREV.br_blockcount, &i)))
2653 goto done; 2623 goto done;
2654 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2624 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2655 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2625 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2656 PREV.br_startblock, 2626 PREV.br_startblock,
2657 PREV.br_blockcount - new->br_blockcount, 2627 PREV.br_blockcount - new->br_blockcount,
@@ -2689,7 +2659,7 @@ xfs_bmap_add_extent_unwritten_real(
2689 PREV.br_startblock, PREV.br_blockcount, 2659 PREV.br_startblock, PREV.br_blockcount,
2690 &i))) 2660 &i)))
2691 goto done; 2661 goto done;
2692 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2662 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2693 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 2663 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2694 PREV.br_startblock, 2664 PREV.br_startblock,
2695 PREV.br_blockcount - new->br_blockcount, 2665 PREV.br_blockcount - new->br_blockcount,
@@ -2699,11 +2669,11 @@ xfs_bmap_add_extent_unwritten_real(
2699 new->br_startblock, new->br_blockcount, 2669 new->br_startblock, new->br_blockcount,
2700 &i))) 2670 &i)))
2701 goto done; 2671 goto done;
2702 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2672 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2703 cur->bc_rec.b.br_state = XFS_EXT_NORM; 2673 cur->bc_rec.b.br_state = XFS_EXT_NORM;
2704 if ((error = xfs_btree_insert(cur, &i))) 2674 if ((error = xfs_btree_insert(cur, &i)))
2705 goto done; 2675 goto done;
2706 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2676 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2707 } 2677 }
2708 break; 2678 break;
2709 2679
@@ -2737,7 +2707,7 @@ xfs_bmap_add_extent_unwritten_real(
2737 PREV.br_startblock, PREV.br_blockcount, 2707 PREV.br_startblock, PREV.br_blockcount,
2738 &i))) 2708 &i)))
2739 goto done; 2709 goto done;
2740 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2710 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2741 /* new right extent - oldext */ 2711 /* new right extent - oldext */
2742 if ((error = xfs_bmbt_update(cur, r[1].br_startoff, 2712 if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2743 r[1].br_startblock, r[1].br_blockcount, 2713 r[1].br_startblock, r[1].br_blockcount,
@@ -2749,7 +2719,7 @@ xfs_bmap_add_extent_unwritten_real(
2749 new->br_startoff - PREV.br_startoff; 2719 new->br_startoff - PREV.br_startoff;
2750 if ((error = xfs_btree_insert(cur, &i))) 2720 if ((error = xfs_btree_insert(cur, &i)))
2751 goto done; 2721 goto done;
2752 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2722 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2753 /* 2723 /*
2754 * Reset the cursor to the position of the new extent 2724 * Reset the cursor to the position of the new extent
2755 * we are about to insert as we can't trust it after 2725 * we are about to insert as we can't trust it after
@@ -2759,12 +2729,12 @@ xfs_bmap_add_extent_unwritten_real(
2759 new->br_startblock, new->br_blockcount, 2729 new->br_startblock, new->br_blockcount,
2760 &i))) 2730 &i)))
2761 goto done; 2731 goto done;
2762 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 2732 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2763 /* new middle extent - newext */ 2733 /* new middle extent - newext */
2764 cur->bc_rec.b.br_state = new->br_state; 2734 cur->bc_rec.b.br_state = new->br_state;
2765 if ((error = xfs_btree_insert(cur, &i))) 2735 if ((error = xfs_btree_insert(cur, &i)))
2766 goto done; 2736 goto done;
2767 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 2737 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2768 } 2738 }
2769 break; 2739 break;
2770 2740
@@ -2944,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay(
2944 } 2914 }
2945 if (oldlen != newlen) { 2915 if (oldlen != newlen) {
2946 ASSERT(oldlen > newlen); 2916 ASSERT(oldlen > newlen);
2947 xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, 2917 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2948 (int64_t)(oldlen - newlen), 0); 2918 false);
2949 /* 2919 /*
2950 * Nothing to do for disk quota accounting here. 2920 * Nothing to do for disk quota accounting here.
2951 */ 2921 */
@@ -2968,7 +2938,9 @@ xfs_bmap_add_extent_hole_real(
2968 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2938 xfs_bmbt_irec_t right; /* right neighbor extent entry */
2969 int rval=0; /* return value (logging flags) */ 2939 int rval=0; /* return value (logging flags) */
2970 int state; /* state bits, accessed thru macros */ 2940 int state; /* state bits, accessed thru macros */
2941 struct xfs_mount *mp;
2971 2942
2943 mp = bma->tp ? bma->tp->t_mountp : NULL;
2972 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 2944 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2973 2945
2974 ASSERT(bma->idx >= 0); 2946 ASSERT(bma->idx >= 0);
@@ -3056,15 +3028,15 @@ xfs_bmap_add_extent_hole_real(
3056 &i); 3028 &i);
3057 if (error) 3029 if (error)
3058 goto done; 3030 goto done;
3059 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3031 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3060 error = xfs_btree_delete(bma->cur, &i); 3032 error = xfs_btree_delete(bma->cur, &i);
3061 if (error) 3033 if (error)
3062 goto done; 3034 goto done;
3063 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3035 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3064 error = xfs_btree_decrement(bma->cur, 0, &i); 3036 error = xfs_btree_decrement(bma->cur, 0, &i);
3065 if (error) 3037 if (error)
3066 goto done; 3038 goto done;
3067 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3039 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3068 error = xfs_bmbt_update(bma->cur, left.br_startoff, 3040 error = xfs_bmbt_update(bma->cur, left.br_startoff,
3069 left.br_startblock, 3041 left.br_startblock,
3070 left.br_blockcount + 3042 left.br_blockcount +
@@ -3097,7 +3069,7 @@ xfs_bmap_add_extent_hole_real(
3097 &i); 3069 &i);
3098 if (error) 3070 if (error)
3099 goto done; 3071 goto done;
3100 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3072 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3101 error = xfs_bmbt_update(bma->cur, left.br_startoff, 3073 error = xfs_bmbt_update(bma->cur, left.br_startoff,
3102 left.br_startblock, 3074 left.br_startblock,
3103 left.br_blockcount + 3075 left.br_blockcount +
@@ -3131,7 +3103,7 @@ xfs_bmap_add_extent_hole_real(
3131 right.br_blockcount, &i); 3103 right.br_blockcount, &i);
3132 if (error) 3104 if (error)
3133 goto done; 3105 goto done;
3134 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3106 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3135 error = xfs_bmbt_update(bma->cur, new->br_startoff, 3107 error = xfs_bmbt_update(bma->cur, new->br_startoff,
3136 new->br_startblock, 3108 new->br_startblock,
3137 new->br_blockcount + 3109 new->br_blockcount +
@@ -3161,12 +3133,12 @@ xfs_bmap_add_extent_hole_real(
3161 new->br_blockcount, &i); 3133 new->br_blockcount, &i);
3162 if (error) 3134 if (error)
3163 goto done; 3135 goto done;
3164 XFS_WANT_CORRUPTED_GOTO(i == 0, done); 3136 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3165 bma->cur->bc_rec.b.br_state = new->br_state; 3137 bma->cur->bc_rec.b.br_state = new->br_state;
3166 error = xfs_btree_insert(bma->cur, &i); 3138 error = xfs_btree_insert(bma->cur, &i);
3167 if (error) 3139 if (error)
3168 goto done; 3140 goto done;
3169 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 3141 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3170 } 3142 }
3171 break; 3143 break;
3172 } 3144 }
@@ -4160,18 +4132,15 @@ xfs_bmapi_reserve_delalloc(
4160 ASSERT(indlen > 0); 4132 ASSERT(indlen > 0);
4161 4133
4162 if (rt) { 4134 if (rt) {
4163 error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 4135 error = xfs_mod_frextents(mp, -((int64_t)extsz));
4164 -((int64_t)extsz), 0);
4165 } else { 4136 } else {
4166 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4137 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4167 -((int64_t)alen), 0);
4168 } 4138 }
4169 4139
4170 if (error) 4140 if (error)
4171 goto out_unreserve_quota; 4141 goto out_unreserve_quota;
4172 4142
4173 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4143 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4174 -((int64_t)indlen), 0);
4175 if (error) 4144 if (error)
4176 goto out_unreserve_blocks; 4145 goto out_unreserve_blocks;
4177 4146
@@ -4198,9 +4167,9 @@ xfs_bmapi_reserve_delalloc(
4198 4167
4199out_unreserve_blocks: 4168out_unreserve_blocks:
4200 if (rt) 4169 if (rt)
4201 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); 4170 xfs_mod_frextents(mp, extsz);
4202 else 4171 else
4203 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); 4172 xfs_mod_fdblocks(mp, alen, false);
4204out_unreserve_quota: 4173out_unreserve_quota:
4205 if (XFS_IS_QUOTA_ON(mp)) 4174 if (XFS_IS_QUOTA_ON(mp))
4206 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? 4175 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -4801,7 +4770,7 @@ xfs_bmap_del_extent(
4801 got.br_startblock, got.br_blockcount, 4770 got.br_startblock, got.br_blockcount,
4802 &i))) 4771 &i)))
4803 goto done; 4772 goto done;
4804 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4773 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4805 } 4774 }
4806 da_old = da_new = 0; 4775 da_old = da_new = 0;
4807 } else { 4776 } else {
@@ -4835,7 +4804,7 @@ xfs_bmap_del_extent(
4835 } 4804 }
4836 if ((error = xfs_btree_delete(cur, &i))) 4805 if ((error = xfs_btree_delete(cur, &i)))
4837 goto done; 4806 goto done;
4838 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4807 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4839 break; 4808 break;
4840 4809
4841 case 2: 4810 case 2:
@@ -4935,7 +4904,8 @@ xfs_bmap_del_extent(
4935 got.br_startblock, 4904 got.br_startblock,
4936 temp, &i))) 4905 temp, &i)))
4937 goto done; 4906 goto done;
4938 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4907 XFS_WANT_CORRUPTED_GOTO(mp,
4908 i == 1, done);
4939 /* 4909 /*
4940 * Update the btree record back 4910 * Update the btree record back
4941 * to the original value. 4911 * to the original value.
@@ -4956,7 +4926,7 @@ xfs_bmap_del_extent(
4956 error = -ENOSPC; 4926 error = -ENOSPC;
4957 goto done; 4927 goto done;
4958 } 4928 }
4959 XFS_WANT_CORRUPTED_GOTO(i == 1, done); 4929 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4960 } else 4930 } else
4961 flags |= xfs_ilog_fext(whichfork); 4931 flags |= xfs_ilog_fext(whichfork);
4962 XFS_IFORK_NEXT_SET(ip, whichfork, 4932 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5012,10 +4982,8 @@ xfs_bmap_del_extent(
5012 * Nothing to do for disk quota accounting here. 4982 * Nothing to do for disk quota accounting here.
5013 */ 4983 */
5014 ASSERT(da_old >= da_new); 4984 ASSERT(da_old >= da_new);
5015 if (da_old > da_new) { 4985 if (da_old > da_new)
5016 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 4986 xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5017 (int64_t)(da_old - da_new), 0);
5018 }
5019done: 4987done:
5020 *logflagsp = flags; 4988 *logflagsp = flags;
5021 return error; 4989 return error;
@@ -5284,14 +5252,13 @@ xfs_bunmapi(
5284 5252
5285 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); 5253 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5286 do_div(rtexts, mp->m_sb.sb_rextsize); 5254 do_div(rtexts, mp->m_sb.sb_rextsize);
5287 xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, 5255 xfs_mod_frextents(mp, (int64_t)rtexts);
5288 (int64_t)rtexts, 0);
5289 (void)xfs_trans_reserve_quota_nblks(NULL, 5256 (void)xfs_trans_reserve_quota_nblks(NULL,
5290 ip, -((long)del.br_blockcount), 0, 5257 ip, -((long)del.br_blockcount), 0,
5291 XFS_QMOPT_RES_RTBLKS); 5258 XFS_QMOPT_RES_RTBLKS);
5292 } else { 5259 } else {
5293 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 5260 xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
5294 (int64_t)del.br_blockcount, 0); 5261 false);
5295 (void)xfs_trans_reserve_quota_nblks(NULL, 5262 (void)xfs_trans_reserve_quota_nblks(NULL,
5296 ip, -((long)del.br_blockcount), 0, 5263 ip, -((long)del.br_blockcount), 0,
5297 XFS_QMOPT_RES_REGBLKS); 5264 XFS_QMOPT_RES_REGBLKS);
@@ -5453,6 +5420,7 @@ xfs_bmse_merge(
5453 struct xfs_bmbt_irec left; 5420 struct xfs_bmbt_irec left;
5454 xfs_filblks_t blockcount; 5421 xfs_filblks_t blockcount;
5455 int error, i; 5422 int error, i;
5423 struct xfs_mount *mp = ip->i_mount;
5456 5424
5457 xfs_bmbt_get_all(gotp, &got); 5425 xfs_bmbt_get_all(gotp, &got);
5458 xfs_bmbt_get_all(leftp, &left); 5426 xfs_bmbt_get_all(leftp, &left);
@@ -5487,19 +5455,19 @@ xfs_bmse_merge(
5487 got.br_blockcount, &i); 5455 got.br_blockcount, &i);
5488 if (error) 5456 if (error)
5489 return error; 5457 return error;
5490 XFS_WANT_CORRUPTED_RETURN(i == 1); 5458 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5491 5459
5492 error = xfs_btree_delete(cur, &i); 5460 error = xfs_btree_delete(cur, &i);
5493 if (error) 5461 if (error)
5494 return error; 5462 return error;
5495 XFS_WANT_CORRUPTED_RETURN(i == 1); 5463 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5496 5464
5497 /* lookup and update size of the previous extent */ 5465 /* lookup and update size of the previous extent */
5498 error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock, 5466 error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5499 left.br_blockcount, &i); 5467 left.br_blockcount, &i);
5500 if (error) 5468 if (error)
5501 return error; 5469 return error;
5502 XFS_WANT_CORRUPTED_RETURN(i == 1); 5470 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5503 5471
5504 left.br_blockcount = blockcount; 5472 left.br_blockcount = blockcount;
5505 5473
@@ -5518,50 +5486,92 @@ xfs_bmse_shift_one(
5518 int *current_ext, 5486 int *current_ext,
5519 struct xfs_bmbt_rec_host *gotp, 5487 struct xfs_bmbt_rec_host *gotp,
5520 struct xfs_btree_cur *cur, 5488 struct xfs_btree_cur *cur,
5521 int *logflags) 5489 int *logflags,
5490 enum shift_direction direction)
5522{ 5491{
5523 struct xfs_ifork *ifp; 5492 struct xfs_ifork *ifp;
5493 struct xfs_mount *mp;
5524 xfs_fileoff_t startoff; 5494 xfs_fileoff_t startoff;
5525 struct xfs_bmbt_rec_host *leftp; 5495 struct xfs_bmbt_rec_host *adj_irecp;
5526 struct xfs_bmbt_irec got; 5496 struct xfs_bmbt_irec got;
5527 struct xfs_bmbt_irec left; 5497 struct xfs_bmbt_irec adj_irec;
5528 int error; 5498 int error;
5529 int i; 5499 int i;
5500 int total_extents;
5530 5501
5502 mp = ip->i_mount;
5531 ifp = XFS_IFORK_PTR(ip, whichfork); 5503 ifp = XFS_IFORK_PTR(ip, whichfork);
5504 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5532 5505
5533 xfs_bmbt_get_all(gotp, &got); 5506 xfs_bmbt_get_all(gotp, &got);
5534 startoff = got.br_startoff - offset_shift_fsb;
5535 5507
5536 /* delalloc extents should be prevented by caller */ 5508 /* delalloc extents should be prevented by caller */
5537 XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock)); 5509 XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5538 5510
5539 /* 5511 if (direction == SHIFT_LEFT) {
5540 * Check for merge if we've got an extent to the left, otherwise make 5512 startoff = got.br_startoff - offset_shift_fsb;
5541 * sure there's enough room at the start of the file for the shift. 5513
5542 */ 5514 /*
5543 if (*current_ext) { 5515 * Check for merge if we've got an extent to the left,
5544 /* grab the left extent and check for a large enough hole */ 5516 * otherwise make sure there's enough room at the start
5545 leftp = xfs_iext_get_ext(ifp, *current_ext - 1); 5517 * of the file for the shift.
5546 xfs_bmbt_get_all(leftp, &left); 5518 */
5519 if (!*current_ext) {
5520 if (got.br_startoff < offset_shift_fsb)
5521 return -EINVAL;
5522 goto update_current_ext;
5523 }
5524 /*
5525 * grab the left extent and check for a large
5526 * enough hole.
5527 */
5528 adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5529 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5547 5530
5548 if (startoff < left.br_startoff + left.br_blockcount) 5531 if (startoff <
5532 adj_irec.br_startoff + adj_irec.br_blockcount)
5549 return -EINVAL; 5533 return -EINVAL;
5550 5534
5551 /* check whether to merge the extent or shift it down */ 5535 /* check whether to merge the extent or shift it down */
5552 if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) { 5536 if (xfs_bmse_can_merge(&adj_irec, &got,
5537 offset_shift_fsb)) {
5553 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5538 return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5554 *current_ext, gotp, leftp, cur, 5539 *current_ext, gotp, adj_irecp,
5555 logflags); 5540 cur, logflags);
5556 } 5541 }
5557 } else if (got.br_startoff < offset_shift_fsb) 5542 } else {
5558 return -EINVAL; 5543 startoff = got.br_startoff + offset_shift_fsb;
5559 5544 /* nothing to move if this is the last extent */
5545 if (*current_ext >= (total_extents - 1))
5546 goto update_current_ext;
5547 /*
5548 * If this is not the last extent in the file, make sure there
5549 * is enough room between current extent and next extent for
5550 * accommodating the shift.
5551 */
5552 adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5553 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5554 if (startoff + got.br_blockcount > adj_irec.br_startoff)
5555 return -EINVAL;
5556 /*
5557 * Unlike a left shift (which involves a hole punch),
5558 * a right shift does not modify extent neighbors
5559 * in any way. We should never find mergeable extents
5560 * in this scenario. Check anyways and warn if we
5561 * encounter two extents that could be one.
5562 */
5563 if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5564 WARN_ON_ONCE(1);
5565 }
5560 /* 5566 /*
5561 * Increment the extent index for the next iteration, update the start 5567 * Increment the extent index for the next iteration, update the start
5562 * offset of the in-core extent and update the btree if applicable. 5568 * offset of the in-core extent and update the btree if applicable.
5563 */ 5569 */
5564 (*current_ext)++; 5570update_current_ext:
5571 if (direction == SHIFT_LEFT)
5572 (*current_ext)++;
5573 else
5574 (*current_ext)--;
5565 xfs_bmbt_set_startoff(gotp, startoff); 5575 xfs_bmbt_set_startoff(gotp, startoff);
5566 *logflags |= XFS_ILOG_CORE; 5576 *logflags |= XFS_ILOG_CORE;
5567 if (!cur) { 5577 if (!cur) {
@@ -5573,18 +5583,18 @@ xfs_bmse_shift_one(
5573 got.br_blockcount, &i); 5583 got.br_blockcount, &i);
5574 if (error) 5584 if (error)
5575 return error; 5585 return error;
5576 XFS_WANT_CORRUPTED_RETURN(i == 1); 5586 XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5577 5587
5578 got.br_startoff = startoff; 5588 got.br_startoff = startoff;
5579 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, 5589 return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5580 got.br_blockcount, got.br_state); 5590 got.br_blockcount, got.br_state);
5581} 5591}
5582 5592
5583/* 5593/*
5584 * Shift extent records to the left to cover a hole. 5594 * Shift extent records to the left/right to cover/create a hole.
5585 * 5595 *
5586 * The maximum number of extents to be shifted in a single operation is 5596 * The maximum number of extents to be shifted in a single operation is
5587 * @num_exts. @start_fsb specifies the file offset to start the shift and the 5597 * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5588 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb 5598 * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5589 * is the length by which each extent is shifted. If there is no hole to shift 5599 * is the length by which each extent is shifted. If there is no hole to shift
5590 * the extents into, this will be considered invalid operation and we abort 5600 * the extents into, this will be considered invalid operation and we abort
@@ -5594,12 +5604,13 @@ int
5594xfs_bmap_shift_extents( 5604xfs_bmap_shift_extents(
5595 struct xfs_trans *tp, 5605 struct xfs_trans *tp,
5596 struct xfs_inode *ip, 5606 struct xfs_inode *ip,
5597 xfs_fileoff_t start_fsb, 5607 xfs_fileoff_t *next_fsb,
5598 xfs_fileoff_t offset_shift_fsb, 5608 xfs_fileoff_t offset_shift_fsb,
5599 int *done, 5609 int *done,
5600 xfs_fileoff_t *next_fsb, 5610 xfs_fileoff_t stop_fsb,
5601 xfs_fsblock_t *firstblock, 5611 xfs_fsblock_t *firstblock,
5602 struct xfs_bmap_free *flist, 5612 struct xfs_bmap_free *flist,
5613 enum shift_direction direction,
5603 int num_exts) 5614 int num_exts)
5604{ 5615{
5605 struct xfs_btree_cur *cur = NULL; 5616 struct xfs_btree_cur *cur = NULL;
@@ -5609,10 +5620,11 @@ xfs_bmap_shift_extents(
5609 struct xfs_ifork *ifp; 5620 struct xfs_ifork *ifp;
5610 xfs_extnum_t nexts = 0; 5621 xfs_extnum_t nexts = 0;
5611 xfs_extnum_t current_ext; 5622 xfs_extnum_t current_ext;
5623 xfs_extnum_t total_extents;
5624 xfs_extnum_t stop_extent;
5612 int error = 0; 5625 int error = 0;
5613 int whichfork = XFS_DATA_FORK; 5626 int whichfork = XFS_DATA_FORK;
5614 int logflags = 0; 5627 int logflags = 0;
5615 int total_extents;
5616 5628
5617 if (unlikely(XFS_TEST_ERROR( 5629 if (unlikely(XFS_TEST_ERROR(
5618 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5630 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5628,6 +5640,8 @@ xfs_bmap_shift_extents(
5628 5640
5629 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5641 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5630 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5642 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5643 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5644 ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5631 5645
5632 ifp = XFS_IFORK_PTR(ip, whichfork); 5646 ifp = XFS_IFORK_PTR(ip, whichfork);
5633 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5647 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5645,43 +5659,83 @@ xfs_bmap_shift_extents(
5645 } 5659 }
5646 5660
5647 /* 5661 /*
5662 * There may be delalloc extents in the data fork before the range we
5663 * are collapsing out, so we cannot use the count of real extents here.
5664 * Instead we have to calculate it from the incore fork.
5665 */
5666 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5667 if (total_extents == 0) {
5668 *done = 1;
5669 goto del_cursor;
5670 }
5671
5672 /*
5673 * In case of first right shift, we need to initialize next_fsb
5674 */
5675 if (*next_fsb == NULLFSBLOCK) {
5676 gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5677 xfs_bmbt_get_all(gotp, &got);
5678 *next_fsb = got.br_startoff;
5679 if (stop_fsb > *next_fsb) {
5680 *done = 1;
5681 goto del_cursor;
5682 }
5683 }
5684
5685 /* Lookup the extent index at which we have to stop */
5686 if (direction == SHIFT_RIGHT) {
5687 gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5688 /* Make stop_extent exclusive of shift range */
5689 stop_extent--;
5690 } else
5691 stop_extent = total_extents;
5692
5693 /*
5648 * Look up the extent index for the fsb where we start shifting. We can 5694 * Look up the extent index for the fsb where we start shifting. We can
5649 * henceforth iterate with current_ext as extent list changes are locked 5695 * henceforth iterate with current_ext as extent list changes are locked
5650 * out via ilock. 5696 * out via ilock.
5651 * 5697 *
5652 * gotp can be null in 2 cases: 1) if there are no extents or 2) 5698 * gotp can be null in 2 cases: 1) if there are no extents or 2)
5653 * start_fsb lies in a hole beyond which there are no extents. Either 5699 * *next_fsb lies in a hole beyond which there are no extents. Either
5654 * way, we are done. 5700 * way, we are done.
5655 */ 5701 */
5656 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext); 5702 gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5657 if (!gotp) { 5703 if (!gotp) {
5658 *done = 1; 5704 *done = 1;
5659 goto del_cursor; 5705 goto del_cursor;
5660 } 5706 }
5661 5707
5662 /* 5708 /* some sanity checking before we finally start shifting extents */
5663 * There may be delalloc extents in the data fork before the range we 5709 if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5664 * are collapsing out, so we cannot use the count of real extents here. 5710 (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5665 * Instead we have to calculate it from the incore fork. 5711 error = -EIO;
5666 */ 5712 goto del_cursor;
5667 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5713 }
5668 while (nexts++ < num_exts && current_ext < total_extents) { 5714
5715 while (nexts++ < num_exts) {
5669 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb, 5716 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5670 &current_ext, gotp, cur, &logflags); 5717 &current_ext, gotp, cur, &logflags,
5718 direction);
5671 if (error) 5719 if (error)
5672 goto del_cursor; 5720 goto del_cursor;
5721 /*
5722 * If there was an extent merge during the shift, the extent
5723 * count can change. Update the total and grade the next record.
5724 */
5725 if (direction == SHIFT_LEFT) {
5726 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5727 stop_extent = total_extents;
5728 }
5673 5729
5674 /* update total extent count and grab the next record */ 5730 if (current_ext == stop_extent) {
5675 total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); 5731 *done = 1;
5676 if (current_ext >= total_extents) 5732 *next_fsb = NULLFSBLOCK;
5677 break; 5733 break;
5734 }
5678 gotp = xfs_iext_get_ext(ifp, current_ext); 5735 gotp = xfs_iext_get_ext(ifp, current_ext);
5679 } 5736 }
5680 5737
5681 /* Check if we are done */ 5738 if (!*done) {
5682 if (current_ext == total_extents) {
5683 *done = 1;
5684 } else if (next_fsb) {
5685 xfs_bmbt_get_all(gotp, &got); 5739 xfs_bmbt_get_all(gotp, &got);
5686 *next_fsb = got.br_startoff; 5740 *next_fsb = got.br_startoff;
5687 } 5741 }
@@ -5696,3 +5750,189 @@ del_cursor:
5696 5750
5697 return error; 5751 return error;
5698} 5752}
5753
5754/*
5755 * Splits an extent into two extents at split_fsb block such that it is
5756 * the first block of the current_ext. @current_ext is a target extent
5757 * to be split. @split_fsb is a block where the extents is split.
5758 * If split_fsb lies in a hole or the first block of extents, just return 0.
5759 */
5760STATIC int
5761xfs_bmap_split_extent_at(
5762 struct xfs_trans *tp,
5763 struct xfs_inode *ip,
5764 xfs_fileoff_t split_fsb,
5765 xfs_fsblock_t *firstfsb,
5766 struct xfs_bmap_free *free_list)
5767{
5768 int whichfork = XFS_DATA_FORK;
5769 struct xfs_btree_cur *cur = NULL;
5770 struct xfs_bmbt_rec_host *gotp;
5771 struct xfs_bmbt_irec got;
5772 struct xfs_bmbt_irec new; /* split extent */
5773 struct xfs_mount *mp = ip->i_mount;
5774 struct xfs_ifork *ifp;
5775 xfs_fsblock_t gotblkcnt; /* new block count for got */
5776 xfs_extnum_t current_ext;
5777 int error = 0;
5778 int logflags = 0;
5779 int i = 0;
5780
5781 if (unlikely(XFS_TEST_ERROR(
5782 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5783 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5784 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5785 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5786 XFS_ERRLEVEL_LOW, mp);
5787 return -EFSCORRUPTED;
5788 }
5789
5790 if (XFS_FORCED_SHUTDOWN(mp))
5791 return -EIO;
5792
5793 ifp = XFS_IFORK_PTR(ip, whichfork);
5794 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5795 /* Read in all the extents */
5796 error = xfs_iread_extents(tp, ip, whichfork);
5797 if (error)
5798 return error;
5799 }
5800
5801 /*
5802 * gotp can be null in 2 cases: 1) if there are no extents
5803 * or 2) split_fsb lies in a hole beyond which there are
5804 * no extents. Either way, we are done.
5805 */
5806 gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5807 if (!gotp)
5808 return 0;
5809
5810 xfs_bmbt_get_all(gotp, &got);
5811
5812 /*
5813 * Check split_fsb lies in a hole or the start boundary offset
5814 * of the extent.
5815 */
5816 if (got.br_startoff >= split_fsb)
5817 return 0;
5818
5819 gotblkcnt = split_fsb - got.br_startoff;
5820 new.br_startoff = split_fsb;
5821 new.br_startblock = got.br_startblock + gotblkcnt;
5822 new.br_blockcount = got.br_blockcount - gotblkcnt;
5823 new.br_state = got.br_state;
5824
5825 if (ifp->if_flags & XFS_IFBROOT) {
5826 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5827 cur->bc_private.b.firstblock = *firstfsb;
5828 cur->bc_private.b.flist = free_list;
5829 cur->bc_private.b.flags = 0;
5830 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5831 got.br_startblock,
5832 got.br_blockcount,
5833 &i);
5834 if (error)
5835 goto del_cursor;
5836 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5837 }
5838
5839 xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5840 got.br_blockcount = gotblkcnt;
5841
5842 logflags = XFS_ILOG_CORE;
5843 if (cur) {
5844 error = xfs_bmbt_update(cur, got.br_startoff,
5845 got.br_startblock,
5846 got.br_blockcount,
5847 got.br_state);
5848 if (error)
5849 goto del_cursor;
5850 } else
5851 logflags |= XFS_ILOG_DEXT;
5852
5853 /* Add new extent */
5854 current_ext++;
5855 xfs_iext_insert(ip, current_ext, 1, &new, 0);
5856 XFS_IFORK_NEXT_SET(ip, whichfork,
5857 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5858
5859 if (cur) {
5860 error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
5861 new.br_startblock, new.br_blockcount,
5862 &i);
5863 if (error)
5864 goto del_cursor;
5865 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
5866 cur->bc_rec.b.br_state = new.br_state;
5867
5868 error = xfs_btree_insert(cur, &i);
5869 if (error)
5870 goto del_cursor;
5871 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5872 }
5873
5874 /*
5875 * Convert to a btree if necessary.
5876 */
5877 if (xfs_bmap_needs_btree(ip, whichfork)) {
5878 int tmp_logflags; /* partial log flag return val */
5879
5880 ASSERT(cur == NULL);
5881 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
5882 &cur, 0, &tmp_logflags, whichfork);
5883 logflags |= tmp_logflags;
5884 }
5885
5886del_cursor:
5887 if (cur) {
5888 cur->bc_private.b.allocated = 0;
5889 xfs_btree_del_cursor(cur,
5890 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5891 }
5892
5893 if (logflags)
5894 xfs_trans_log_inode(tp, ip, logflags);
5895 return error;
5896}
5897
5898int
5899xfs_bmap_split_extent(
5900 struct xfs_inode *ip,
5901 xfs_fileoff_t split_fsb)
5902{
5903 struct xfs_mount *mp = ip->i_mount;
5904 struct xfs_trans *tp;
5905 struct xfs_bmap_free free_list;
5906 xfs_fsblock_t firstfsb;
5907 int committed;
5908 int error;
5909
5910 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
5911 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5912 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5913 if (error) {
5914 xfs_trans_cancel(tp, 0);
5915 return error;
5916 }
5917
5918 xfs_ilock(ip, XFS_ILOCK_EXCL);
5919 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
5920
5921 xfs_bmap_init(&free_list, &firstfsb);
5922
5923 error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
5924 &firstfsb, &free_list);
5925 if (error)
5926 goto out;
5927
5928 error = xfs_bmap_finish(&tp, &free_list, &committed);
5929 if (error)
5930 goto out;
5931
5932 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
5933
5934
5935out:
5936 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
5937 return error;
5938}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index b9d8a499d2c4..6aaa0c1c7200 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
166 */ 166 */
167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1 167#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
168 168
169enum shift_direction {
170 SHIFT_LEFT = 0,
171 SHIFT_RIGHT,
172};
173
169#ifdef DEBUG 174#ifdef DEBUG
170void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 175void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
171 int whichfork, unsigned long caller_ip); 176 int whichfork, unsigned long caller_ip);
@@ -211,8 +216,10 @@ int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
211 xfs_extnum_t num); 216 xfs_extnum_t num);
212uint xfs_default_attroffset(struct xfs_inode *ip); 217uint xfs_default_attroffset(struct xfs_inode *ip);
213int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, 218int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
214 xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb, 219 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
215 int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock, 220 int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
216 struct xfs_bmap_free *flist, int num_exts); 221 struct xfs_bmap_free *flist, enum shift_direction direction,
222 int num_exts);
223int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
217 224
218#endif /* __XFS_BMAP_H__ */ 225#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 81cad433df85..c72283dd8d44 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -168,7 +168,7 @@ xfs_btree_check_lptr(
168 xfs_fsblock_t bno, /* btree block disk address */ 168 xfs_fsblock_t bno, /* btree block disk address */
169 int level) /* btree block level */ 169 int level) /* btree block level */
170{ 170{
171 XFS_WANT_CORRUPTED_RETURN( 171 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
172 level > 0 && 172 level > 0 &&
173 bno != NULLFSBLOCK && 173 bno != NULLFSBLOCK &&
174 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); 174 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
@@ -187,7 +187,7 @@ xfs_btree_check_sptr(
187{ 187{
188 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; 188 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
189 189
190 XFS_WANT_CORRUPTED_RETURN( 190 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
191 level > 0 && 191 level > 0 &&
192 bno != NULLAGBLOCK && 192 bno != NULLAGBLOCK &&
193 bno != 0 && 193 bno != 0 &&
@@ -1825,7 +1825,7 @@ xfs_btree_lookup(
1825 error = xfs_btree_increment(cur, 0, &i); 1825 error = xfs_btree_increment(cur, 0, &i);
1826 if (error) 1826 if (error)
1827 goto error0; 1827 goto error0;
1828 XFS_WANT_CORRUPTED_RETURN(i == 1); 1828 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1829 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1829 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1830 *stat = 1; 1830 *stat = 1;
1831 return 0; 1831 return 0;
@@ -2285,7 +2285,7 @@ xfs_btree_rshift(
2285 if (error) 2285 if (error)
2286 goto error0; 2286 goto error0;
2287 i = xfs_btree_lastrec(tcur, level); 2287 i = xfs_btree_lastrec(tcur, level);
2288 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 2288 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
2289 2289
2290 error = xfs_btree_increment(tcur, level, &i); 2290 error = xfs_btree_increment(tcur, level, &i);
2291 if (error) 2291 if (error)
@@ -3138,7 +3138,7 @@ xfs_btree_insert(
3138 goto error0; 3138 goto error0;
3139 } 3139 }
3140 3140
3141 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3141 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3142 level++; 3142 level++;
3143 3143
3144 /* 3144 /*
@@ -3582,15 +3582,15 @@ xfs_btree_delrec(
3582 * Actually any entry but the first would suffice. 3582 * Actually any entry but the first would suffice.
3583 */ 3583 */
3584 i = xfs_btree_lastrec(tcur, level); 3584 i = xfs_btree_lastrec(tcur, level);
3585 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3585 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3586 3586
3587 error = xfs_btree_increment(tcur, level, &i); 3587 error = xfs_btree_increment(tcur, level, &i);
3588 if (error) 3588 if (error)
3589 goto error0; 3589 goto error0;
3590 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3590 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3591 3591
3592 i = xfs_btree_lastrec(tcur, level); 3592 i = xfs_btree_lastrec(tcur, level);
3593 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3593 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3594 3594
3595 /* Grab a pointer to the block. */ 3595 /* Grab a pointer to the block. */
3596 right = xfs_btree_get_block(tcur, level, &rbp); 3596 right = xfs_btree_get_block(tcur, level, &rbp);
@@ -3634,12 +3634,12 @@ xfs_btree_delrec(
3634 rrecs = xfs_btree_get_numrecs(right); 3634 rrecs = xfs_btree_get_numrecs(right);
3635 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3635 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3636 i = xfs_btree_firstrec(tcur, level); 3636 i = xfs_btree_firstrec(tcur, level);
3637 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3637 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3638 3638
3639 error = xfs_btree_decrement(tcur, level, &i); 3639 error = xfs_btree_decrement(tcur, level, &i);
3640 if (error) 3640 if (error)
3641 goto error0; 3641 goto error0;
3642 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3642 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3643 } 3643 }
3644 } 3644 }
3645 3645
@@ -3653,13 +3653,13 @@ xfs_btree_delrec(
3653 * previous block. 3653 * previous block.
3654 */ 3654 */
3655 i = xfs_btree_firstrec(tcur, level); 3655 i = xfs_btree_firstrec(tcur, level);
3656 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3656 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3657 3657
3658 error = xfs_btree_decrement(tcur, level, &i); 3658 error = xfs_btree_decrement(tcur, level, &i);
3659 if (error) 3659 if (error)
3660 goto error0; 3660 goto error0;
3661 i = xfs_btree_firstrec(tcur, level); 3661 i = xfs_btree_firstrec(tcur, level);
3662 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 3662 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
3663 3663
3664 /* Grab a pointer to the block. */ 3664 /* Grab a pointer to the block. */
3665 left = xfs_btree_get_block(tcur, level, &lbp); 3665 left = xfs_btree_get_block(tcur, level, &lbp);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 9cb0115c6bd1..2385f8cd08ab 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -538,12 +538,12 @@ xfs_da3_root_split(
538 oldroot = blk1->bp->b_addr; 538 oldroot = blk1->bp->b_addr;
539 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 539 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
540 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { 540 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
541 struct xfs_da3_icnode_hdr nodehdr; 541 struct xfs_da3_icnode_hdr icnodehdr;
542 542
543 dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); 543 dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
544 btree = dp->d_ops->node_tree_p(oldroot); 544 btree = dp->d_ops->node_tree_p(oldroot);
545 size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); 545 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
546 level = nodehdr.level; 546 level = icnodehdr.level;
547 547
548 /* 548 /*
549 * we are about to copy oldroot to bp, so set up the type 549 * we are about to copy oldroot to bp, so set up the type
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 0a49b0286372..74bcbabfa523 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -725,7 +725,13 @@ struct xfs_attr3_icleaf_hdr {
725 __uint16_t magic; 725 __uint16_t magic;
726 __uint16_t count; 726 __uint16_t count;
727 __uint16_t usedbytes; 727 __uint16_t usedbytes;
728 __uint16_t firstused; 728 /*
729 * firstused is 32-bit here instead of 16-bit like the on-disk variant
730 * to support maximum fsb size of 64k without overflow issues throughout
731 * the attr code. Instead, the overflow condition is handled on
732 * conversion to/from disk.
733 */
734 __uint32_t firstused;
729 __u8 holes; 735 __u8 holes;
730 struct { 736 struct {
731 __uint16_t base; 737 __uint16_t base;
@@ -734,6 +740,12 @@ struct xfs_attr3_icleaf_hdr {
734}; 740};
735 741
736/* 742/*
743 * Special value to represent fs block size in the leaf header firstused field.
744 * Only used when block size overflows the 2-bytes available on disk.
745 */
746#define XFS_ATTR3_LEAF_NULLOFF 0
747
748/*
737 * Flags used in the leaf_entry[i].flags field. 749 * Flags used in the leaf_entry[i].flags field.
738 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified 750 * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
739 * on the system call, they are "or"ed together for various operations. 751 * on the system call, they are "or"ed together for various operations.
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 5ff31be9b1cd..de1ea16f5748 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -89,7 +89,7 @@ __xfs_dir3_data_check(
89 * so just ensure that the count falls somewhere inside the 89 * so just ensure that the count falls somewhere inside the
90 * block right now. 90 * block right now.
91 */ 91 */
92 XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < 92 XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
93 ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); 93 ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
94 break; 94 break;
95 case cpu_to_be32(XFS_DIR3_DATA_MAGIC): 95 case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
@@ -107,21 +107,21 @@ __xfs_dir3_data_check(
107 bf = ops->data_bestfree_p(hdr); 107 bf = ops->data_bestfree_p(hdr);
108 count = lastfree = freeseen = 0; 108 count = lastfree = freeseen = 0;
109 if (!bf[0].length) { 109 if (!bf[0].length) {
110 XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); 110 XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
111 freeseen |= 1 << 0; 111 freeseen |= 1 << 0;
112 } 112 }
113 if (!bf[1].length) { 113 if (!bf[1].length) {
114 XFS_WANT_CORRUPTED_RETURN(!bf[1].offset); 114 XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
115 freeseen |= 1 << 1; 115 freeseen |= 1 << 1;
116 } 116 }
117 if (!bf[2].length) { 117 if (!bf[2].length) {
118 XFS_WANT_CORRUPTED_RETURN(!bf[2].offset); 118 XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
119 freeseen |= 1 << 2; 119 freeseen |= 1 << 2;
120 } 120 }
121 121
122 XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >= 122 XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
123 be16_to_cpu(bf[1].length)); 123 be16_to_cpu(bf[1].length));
124 XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >= 124 XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
125 be16_to_cpu(bf[2].length)); 125 be16_to_cpu(bf[2].length));
126 /* 126 /*
127 * Loop over the data/unused entries. 127 * Loop over the data/unused entries.
@@ -134,18 +134,18 @@ __xfs_dir3_data_check(
134 * doesn't need to be there. 134 * doesn't need to be there.
135 */ 135 */
136 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 136 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
137 XFS_WANT_CORRUPTED_RETURN(lastfree == 0); 137 XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
138 XFS_WANT_CORRUPTED_RETURN( 138 XFS_WANT_CORRUPTED_RETURN(mp,
139 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == 139 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
140 (char *)dup - (char *)hdr); 140 (char *)dup - (char *)hdr);
141 dfp = xfs_dir2_data_freefind(hdr, bf, dup); 141 dfp = xfs_dir2_data_freefind(hdr, bf, dup);
142 if (dfp) { 142 if (dfp) {
143 i = (int)(dfp - bf); 143 i = (int)(dfp - bf);
144 XFS_WANT_CORRUPTED_RETURN( 144 XFS_WANT_CORRUPTED_RETURN(mp,
145 (freeseen & (1 << i)) == 0); 145 (freeseen & (1 << i)) == 0);
146 freeseen |= 1 << i; 146 freeseen |= 1 << i;
147 } else { 147 } else {
148 XFS_WANT_CORRUPTED_RETURN( 148 XFS_WANT_CORRUPTED_RETURN(mp,
149 be16_to_cpu(dup->length) <= 149 be16_to_cpu(dup->length) <=
150 be16_to_cpu(bf[2].length)); 150 be16_to_cpu(bf[2].length));
151 } 151 }
@@ -160,13 +160,13 @@ __xfs_dir3_data_check(
160 * The linear search is crude but this is DEBUG code. 160 * The linear search is crude but this is DEBUG code.
161 */ 161 */
162 dep = (xfs_dir2_data_entry_t *)p; 162 dep = (xfs_dir2_data_entry_t *)p;
163 XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0); 163 XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
164 XFS_WANT_CORRUPTED_RETURN( 164 XFS_WANT_CORRUPTED_RETURN(mp,
165 !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); 165 !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
166 XFS_WANT_CORRUPTED_RETURN( 166 XFS_WANT_CORRUPTED_RETURN(mp,
167 be16_to_cpu(*ops->data_entry_tag_p(dep)) == 167 be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
168 (char *)dep - (char *)hdr); 168 (char *)dep - (char *)hdr);
169 XFS_WANT_CORRUPTED_RETURN( 169 XFS_WANT_CORRUPTED_RETURN(mp,
170 ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); 170 ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
171 count++; 171 count++;
172 lastfree = 0; 172 lastfree = 0;
@@ -183,14 +183,15 @@ __xfs_dir3_data_check(
183 be32_to_cpu(lep[i].hashval) == hash) 183 be32_to_cpu(lep[i].hashval) == hash)
184 break; 184 break;
185 } 185 }
186 XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); 186 XFS_WANT_CORRUPTED_RETURN(mp,
187 i < be32_to_cpu(btp->count));
187 } 188 }
188 p += ops->data_entsize(dep->namelen); 189 p += ops->data_entsize(dep->namelen);
189 } 190 }
190 /* 191 /*
191 * Need to have seen all the entries and all the bestfree slots. 192 * Need to have seen all the entries and all the bestfree slots.
192 */ 193 */
193 XFS_WANT_CORRUPTED_RETURN(freeseen == 7); 194 XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
194 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || 195 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
195 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { 196 hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
196 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 197 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
@@ -198,13 +199,13 @@ __xfs_dir3_data_check(
198 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) 199 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
199 stale++; 200 stale++;
200 if (i > 0) 201 if (i > 0)
201 XFS_WANT_CORRUPTED_RETURN( 202 XFS_WANT_CORRUPTED_RETURN(mp,
202 be32_to_cpu(lep[i].hashval) >= 203 be32_to_cpu(lep[i].hashval) >=
203 be32_to_cpu(lep[i - 1].hashval)); 204 be32_to_cpu(lep[i - 1].hashval));
204 } 205 }
205 XFS_WANT_CORRUPTED_RETURN(count == 206 XFS_WANT_CORRUPTED_RETURN(mp, count ==
206 be32_to_cpu(btp->count) - be32_to_cpu(btp->stale)); 207 be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
207 XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale)); 208 XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
208 } 209 }
209 return 0; 210 return 0;
210} 211}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8eb718979383..4daaa662337b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
264 /* must be padded to 64 bit alignment */ 264 /* must be padded to 64 bit alignment */
265} xfs_dsb_t; 265} xfs_dsb_t;
266 266
267/*
268 * Sequence number values for the fields.
269 */
270typedef enum {
271 XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
272 XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
273 XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
274 XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
275 XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
276 XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
277 XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
278 XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
279 XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
280 XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
281 XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
282 XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
283 XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
284 XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
285 XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
286 XFS_SBS_PQUOTINO, XFS_SBS_LSN,
287 XFS_SBS_FIELDCOUNT
288} xfs_sb_field_t;
289
290/*
291 * Mask values, defined based on the xfs_sb_field_t values.
292 * Only define the ones we're using.
293 */
294#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x)
295#define XFS_SB_UUID XFS_SB_MVAL(UUID)
296#define XFS_SB_FNAME XFS_SB_MVAL(FNAME)
297#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO)
298#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO)
299#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO)
300#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM)
301#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO)
302#define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO)
303#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS)
304#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN)
305#define XFS_SB_UNIT XFS_SB_MVAL(UNIT)
306#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH)
307#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT)
308#define XFS_SB_IFREE XFS_SB_MVAL(IFREE)
309#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS)
310#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \
311 XFS_SB_MVAL(BAD_FEATURES2))
312#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
313#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
314#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
315#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
316#define XFS_SB_CRC XFS_SB_MVAL(CRC)
317#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO)
318#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT)
319#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1)
320#define XFS_SB_MOD_BITS \
321 (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
322 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
323 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
324 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
325 XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
326 XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
327 XFS_SB_PQUOTINO)
328
329 267
330/* 268/*
331 * Misc. Flags - warning - these will be cleared by xfs_repair unless 269 * Misc. Flags - warning - these will be cleared by xfs_repair unless
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 116ef1ddb3e3..07349a183a11 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
376 */ 376 */
377 newlen = args.mp->m_ialloc_inos; 377 newlen = args.mp->m_ialloc_inos;
378 if (args.mp->m_maxicount && 378 if (args.mp->m_maxicount &&
379 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) 379 percpu_counter_read(&args.mp->m_icount) + newlen >
380 args.mp->m_maxicount)
380 return -ENOSPC; 381 return -ENOSPC;
381 args.minlen = args.maxlen = args.mp->m_ialloc_blks; 382 args.minlen = args.maxlen = args.mp->m_ialloc_blks;
382 /* 383 /*
@@ -700,7 +701,7 @@ xfs_ialloc_next_rec(
700 error = xfs_inobt_get_rec(cur, rec, &i); 701 error = xfs_inobt_get_rec(cur, rec, &i);
701 if (error) 702 if (error)
702 return error; 703 return error;
703 XFS_WANT_CORRUPTED_RETURN(i == 1); 704 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
704 } 705 }
705 706
706 return 0; 707 return 0;
@@ -724,7 +725,7 @@ xfs_ialloc_get_rec(
724 error = xfs_inobt_get_rec(cur, rec, &i); 725 error = xfs_inobt_get_rec(cur, rec, &i);
725 if (error) 726 if (error)
726 return error; 727 return error;
727 XFS_WANT_CORRUPTED_RETURN(i == 1); 728 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
728 } 729 }
729 730
730 return 0; 731 return 0;
@@ -783,12 +784,12 @@ xfs_dialloc_ag_inobt(
783 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); 784 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
784 if (error) 785 if (error)
785 goto error0; 786 goto error0;
786 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 787 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
787 788
788 error = xfs_inobt_get_rec(cur, &rec, &j); 789 error = xfs_inobt_get_rec(cur, &rec, &j);
789 if (error) 790 if (error)
790 goto error0; 791 goto error0;
791 XFS_WANT_CORRUPTED_GOTO(j == 1, error0); 792 XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0);
792 793
793 if (rec.ir_freecount > 0) { 794 if (rec.ir_freecount > 0) {
794 /* 795 /*
@@ -944,19 +945,19 @@ newino:
944 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 945 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
945 if (error) 946 if (error)
946 goto error0; 947 goto error0;
947 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 948 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
948 949
949 for (;;) { 950 for (;;) {
950 error = xfs_inobt_get_rec(cur, &rec, &i); 951 error = xfs_inobt_get_rec(cur, &rec, &i);
951 if (error) 952 if (error)
952 goto error0; 953 goto error0;
953 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 954 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
954 if (rec.ir_freecount > 0) 955 if (rec.ir_freecount > 0)
955 break; 956 break;
956 error = xfs_btree_increment(cur, 0, &i); 957 error = xfs_btree_increment(cur, 0, &i);
957 if (error) 958 if (error)
958 goto error0; 959 goto error0;
959 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 960 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
960 } 961 }
961 962
962alloc_inode: 963alloc_inode:
@@ -1016,7 +1017,7 @@ xfs_dialloc_ag_finobt_near(
1016 error = xfs_inobt_get_rec(lcur, rec, &i); 1017 error = xfs_inobt_get_rec(lcur, rec, &i);
1017 if (error) 1018 if (error)
1018 return error; 1019 return error;
1019 XFS_WANT_CORRUPTED_RETURN(i == 1); 1020 XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1);
1020 1021
1021 /* 1022 /*
1022 * See if we've landed in the parent inode record. The finobt 1023 * See if we've landed in the parent inode record. The finobt
@@ -1039,10 +1040,10 @@ xfs_dialloc_ag_finobt_near(
1039 error = xfs_inobt_get_rec(rcur, &rrec, &j); 1040 error = xfs_inobt_get_rec(rcur, &rrec, &j);
1040 if (error) 1041 if (error)
1041 goto error_rcur; 1042 goto error_rcur;
1042 XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); 1043 XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur);
1043 } 1044 }
1044 1045
1045 XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); 1046 XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur);
1046 if (i == 1 && j == 1) { 1047 if (i == 1 && j == 1) {
1047 /* 1048 /*
1048 * Both the left and right records are valid. Choose the closer 1049 * Both the left and right records are valid. Choose the closer
@@ -1095,7 +1096,7 @@ xfs_dialloc_ag_finobt_newino(
1095 error = xfs_inobt_get_rec(cur, rec, &i); 1096 error = xfs_inobt_get_rec(cur, rec, &i);
1096 if (error) 1097 if (error)
1097 return error; 1098 return error;
1098 XFS_WANT_CORRUPTED_RETURN(i == 1); 1099 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1099 return 0; 1100 return 0;
1100 } 1101 }
1101 } 1102 }
@@ -1106,12 +1107,12 @@ xfs_dialloc_ag_finobt_newino(
1106 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); 1107 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
1107 if (error) 1108 if (error)
1108 return error; 1109 return error;
1109 XFS_WANT_CORRUPTED_RETURN(i == 1); 1110 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1110 1111
1111 error = xfs_inobt_get_rec(cur, rec, &i); 1112 error = xfs_inobt_get_rec(cur, rec, &i);
1112 if (error) 1113 if (error)
1113 return error; 1114 return error;
1114 XFS_WANT_CORRUPTED_RETURN(i == 1); 1115 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1115 1116
1116 return 0; 1117 return 0;
1117} 1118}
@@ -1133,19 +1134,19 @@ xfs_dialloc_ag_update_inobt(
1133 error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); 1134 error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
1134 if (error) 1135 if (error)
1135 return error; 1136 return error;
1136 XFS_WANT_CORRUPTED_RETURN(i == 1); 1137 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1137 1138
1138 error = xfs_inobt_get_rec(cur, &rec, &i); 1139 error = xfs_inobt_get_rec(cur, &rec, &i);
1139 if (error) 1140 if (error)
1140 return error; 1141 return error;
1141 XFS_WANT_CORRUPTED_RETURN(i == 1); 1142 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
1142 ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % 1143 ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
1143 XFS_INODES_PER_CHUNK) == 0); 1144 XFS_INODES_PER_CHUNK) == 0);
1144 1145
1145 rec.ir_free &= ~XFS_INOBT_MASK(offset); 1146 rec.ir_free &= ~XFS_INOBT_MASK(offset);
1146 rec.ir_freecount--; 1147 rec.ir_freecount--;
1147 1148
1148 XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && 1149 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) &&
1149 (rec.ir_freecount == frec->ir_freecount)); 1150 (rec.ir_freecount == frec->ir_freecount));
1150 1151
1151 return xfs_inobt_update(cur, &rec); 1152 return xfs_inobt_update(cur, &rec);
@@ -1340,7 +1341,8 @@ xfs_dialloc(
1340 * inode. 1341 * inode.
1341 */ 1342 */
1342 if (mp->m_maxicount && 1343 if (mp->m_maxicount &&
1343 mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { 1344 percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
1345 mp->m_maxicount) {
1344 noroom = 1; 1346 noroom = 1;
1345 okalloc = 0; 1347 okalloc = 0;
1346 } 1348 }
@@ -1475,14 +1477,14 @@ xfs_difree_inobt(
1475 __func__, error); 1477 __func__, error);
1476 goto error0; 1478 goto error0;
1477 } 1479 }
1478 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1480 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1479 error = xfs_inobt_get_rec(cur, &rec, &i); 1481 error = xfs_inobt_get_rec(cur, &rec, &i);
1480 if (error) { 1482 if (error) {
1481 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.", 1483 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
1482 __func__, error); 1484 __func__, error);
1483 goto error0; 1485 goto error0;
1484 } 1486 }
1485 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1487 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
1486 /* 1488 /*
1487 * Get the offset in the inode chunk. 1489 * Get the offset in the inode chunk.
1488 */ 1490 */
@@ -1592,7 +1594,7 @@ xfs_difree_finobt(
1592 * freed an inode in a previously fully allocated chunk. If not, 1594 * freed an inode in a previously fully allocated chunk. If not,
1593 * something is out of sync. 1595 * something is out of sync.
1594 */ 1596 */
1595 XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); 1597 XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
1596 1598
1597 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, 1599 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
1598 ibtrec->ir_free, &i); 1600 ibtrec->ir_free, &i);
@@ -1613,12 +1615,12 @@ xfs_difree_finobt(
1613 error = xfs_inobt_get_rec(cur, &rec, &i); 1615 error = xfs_inobt_get_rec(cur, &rec, &i);
1614 if (error) 1616 if (error)
1615 goto error; 1617 goto error;
1616 XFS_WANT_CORRUPTED_GOTO(i == 1, error); 1618 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
1617 1619
1618 rec.ir_free |= XFS_INOBT_MASK(offset); 1620 rec.ir_free |= XFS_INOBT_MASK(offset);
1619 rec.ir_freecount++; 1621 rec.ir_freecount++;
1620 1622
1621 XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && 1623 XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) &&
1622 (rec.ir_freecount == ibtrec->ir_freecount), 1624 (rec.ir_freecount == ibtrec->ir_freecount),
1623 error); 1625 error);
1624 1626
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index b0a5fe95a3e2..dc4bfc5d88fc 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -111,14 +111,6 @@ xfs_mount_validate_sb(
111 bool check_inprogress, 111 bool check_inprogress,
112 bool check_version) 112 bool check_version)
113{ 113{
114
115 /*
116 * If the log device and data device have the
117 * same device number, the log is internal.
118 * Consequently, the sb_logstart should be non-zero. If
119 * we have a zero sb_logstart in this case, we may be trying to mount
120 * a volume filesystem in a non-volume manner.
121 */
122 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 114 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
123 xfs_warn(mp, "bad magic number"); 115 xfs_warn(mp, "bad magic number");
124 return -EWRONGFS; 116 return -EWRONGFS;
@@ -743,17 +735,15 @@ xfs_initialize_perag_data(
743 btree += pag->pagf_btreeblks; 735 btree += pag->pagf_btreeblks;
744 xfs_perag_put(pag); 736 xfs_perag_put(pag);
745 } 737 }
746 /* 738
747 * Overwrite incore superblock counters with just-read data 739 /* Overwrite incore superblock counters with just-read data */
748 */
749 spin_lock(&mp->m_sb_lock); 740 spin_lock(&mp->m_sb_lock);
750 sbp->sb_ifree = ifree; 741 sbp->sb_ifree = ifree;
751 sbp->sb_icount = ialloc; 742 sbp->sb_icount = ialloc;
752 sbp->sb_fdblocks = bfree + bfreelst + btree; 743 sbp->sb_fdblocks = bfree + bfreelst + btree;
753 spin_unlock(&mp->m_sb_lock); 744 spin_unlock(&mp->m_sb_lock);
754 745
755 /* Fixup the per-cpu counters as well. */ 746 xfs_reinit_percpu_counters(mp);
756 xfs_icsb_reinit_counters(mp);
757 747
758 return 0; 748 return 0;
759} 749}
@@ -771,6 +761,10 @@ xfs_log_sb(
771 struct xfs_mount *mp = tp->t_mountp; 761 struct xfs_mount *mp = tp->t_mountp;
772 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); 762 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
773 763
764 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
765 mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
766 mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
767
774 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); 768 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
775 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); 769 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
776 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); 770 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1d8eef9cf0f5..a56960dd1684 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1232,6 +1232,117 @@ xfs_vm_releasepage(
1232 return try_to_free_buffers(page); 1232 return try_to_free_buffers(page);
1233} 1233}
1234 1234
1235/*
1236 * When we map a DIO buffer, we may need to attach an ioend that describes the
1237 * type of write IO we are doing. This passes to the completion function the
1238 * operations it needs to perform. If the mapping is for an overwrite wholly
1239 * within the EOF then we don't need an ioend and so we don't allocate one.
1240 * This avoids the unnecessary overhead of allocating and freeing ioends for
1241 * workloads that don't require transactions on IO completion.
1242 *
1243 * If we get multiple mappings in a single IO, we might be mapping different
1244 * types. But because the direct IO can only have a single private pointer, we
1245 * need to ensure that:
1246 *
1247 * a) i) the ioend spans the entire region of unwritten mappings; or
1248 * ii) the ioend spans all the mappings that cross or are beyond EOF; and
1249 * b) if it contains unwritten extents, it is *permanently* marked as such
1250 *
1251 * We could do this by chaining ioends like buffered IO does, but we only
1252 * actually get one IO completion callback from the direct IO, and that spans
1253 * the entire IO regardless of how many mappings and IOs are needed to complete
1254 * the DIO. There is only going to be one reference to the ioend and its life
1255 * cycle is constrained by the DIO completion code. hence we don't need
1256 * reference counting here.
1257 */
1258static void
1259xfs_map_direct(
1260 struct inode *inode,
1261 struct buffer_head *bh_result,
1262 struct xfs_bmbt_irec *imap,
1263 xfs_off_t offset)
1264{
1265 struct xfs_ioend *ioend;
1266 xfs_off_t size = bh_result->b_size;
1267 int type;
1268
1269 if (ISUNWRITTEN(imap))
1270 type = XFS_IO_UNWRITTEN;
1271 else
1272 type = XFS_IO_OVERWRITE;
1273
1274 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
1275
1276 if (bh_result->b_private) {
1277 ioend = bh_result->b_private;
1278 ASSERT(ioend->io_size > 0);
1279 ASSERT(offset >= ioend->io_offset);
1280 if (offset + size > ioend->io_offset + ioend->io_size)
1281 ioend->io_size = offset - ioend->io_offset + size;
1282
1283 if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
1284 ioend->io_type = XFS_IO_UNWRITTEN;
1285
1286 trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
1287 ioend->io_size, ioend->io_type,
1288 imap);
1289 } else if (type == XFS_IO_UNWRITTEN ||
1290 offset + size > i_size_read(inode)) {
1291 ioend = xfs_alloc_ioend(inode, type);
1292 ioend->io_offset = offset;
1293 ioend->io_size = size;
1294
1295 bh_result->b_private = ioend;
1296 set_buffer_defer_completion(bh_result);
1297
1298 trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
1299 imap);
1300 } else {
1301 trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
1302 imap);
1303 }
1304}
1305
1306/*
1307 * If this is O_DIRECT or the mpage code calling tell them how large the mapping
1308 * is, so that we can avoid repeated get_blocks calls.
1309 *
1310 * If the mapping spans EOF, then we have to break the mapping up as the mapping
1311 * for blocks beyond EOF must be marked new so that sub block regions can be
1312 * correctly zeroed. We can't do this for mappings within EOF unless the mapping
1313 * was just allocated or is unwritten, otherwise the callers would overwrite
1314 * existing data with zeros. Hence we have to split the mapping into a range up
1315 * to and including EOF, and a second mapping for beyond EOF.
1316 */
1317static void
1318xfs_map_trim_size(
1319 struct inode *inode,
1320 sector_t iblock,
1321 struct buffer_head *bh_result,
1322 struct xfs_bmbt_irec *imap,
1323 xfs_off_t offset,
1324 ssize_t size)
1325{
1326 xfs_off_t mapping_size;
1327
1328 mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
1329 mapping_size <<= inode->i_blkbits;
1330
1331 ASSERT(mapping_size > 0);
1332 if (mapping_size > size)
1333 mapping_size = size;
1334 if (offset < i_size_read(inode) &&
1335 offset + mapping_size >= i_size_read(inode)) {
1336 /* limit mapping to block that spans EOF */
1337 mapping_size = roundup_64(i_size_read(inode) - offset,
1338 1 << inode->i_blkbits);
1339 }
1340 if (mapping_size > LONG_MAX)
1341 mapping_size = LONG_MAX;
1342
1343 bh_result->b_size = mapping_size;
1344}
1345
1235STATIC int 1346STATIC int
1236__xfs_get_blocks( 1347__xfs_get_blocks(
1237 struct inode *inode, 1348 struct inode *inode,
@@ -1320,31 +1431,37 @@ __xfs_get_blocks(
1320 1431
1321 xfs_iunlock(ip, lockmode); 1432 xfs_iunlock(ip, lockmode);
1322 } 1433 }
1323 1434 trace_xfs_get_blocks_alloc(ip, offset, size,
1324 trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); 1435 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1436 : XFS_IO_DELALLOC, &imap);
1325 } else if (nimaps) { 1437 } else if (nimaps) {
1326 trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); 1438 trace_xfs_get_blocks_found(ip, offset, size,
1439 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1440 : XFS_IO_OVERWRITE, &imap);
1327 xfs_iunlock(ip, lockmode); 1441 xfs_iunlock(ip, lockmode);
1328 } else { 1442 } else {
1329 trace_xfs_get_blocks_notfound(ip, offset, size); 1443 trace_xfs_get_blocks_notfound(ip, offset, size);
1330 goto out_unlock; 1444 goto out_unlock;
1331 } 1445 }
1332 1446
1447 /* trim mapping down to size requested */
1448 if (direct || size > (1 << inode->i_blkbits))
1449 xfs_map_trim_size(inode, iblock, bh_result,
1450 &imap, offset, size);
1451
1452 /*
1453 * For unwritten extents do not report a disk address in the buffered
1454 * read case (treat as if we're reading into a hole).
1455 */
1333 if (imap.br_startblock != HOLESTARTBLOCK && 1456 if (imap.br_startblock != HOLESTARTBLOCK &&
1334 imap.br_startblock != DELAYSTARTBLOCK) { 1457 imap.br_startblock != DELAYSTARTBLOCK &&
1335 /* 1458 (create || !ISUNWRITTEN(&imap))) {
1336 * For unwritten extents do not report a disk address on 1459 xfs_map_buffer(inode, bh_result, &imap, offset);
1337 * the read case (treat as if we're reading into a hole). 1460 if (ISUNWRITTEN(&imap))
1338 */
1339 if (create || !ISUNWRITTEN(&imap))
1340 xfs_map_buffer(inode, bh_result, &imap, offset);
1341 if (create && ISUNWRITTEN(&imap)) {
1342 if (direct) {
1343 bh_result->b_private = inode;
1344 set_buffer_defer_completion(bh_result);
1345 }
1346 set_buffer_unwritten(bh_result); 1461 set_buffer_unwritten(bh_result);
1347 } 1462 /* direct IO needs special help */
1463 if (create && direct)
1464 xfs_map_direct(inode, bh_result, &imap, offset);
1348 } 1465 }
1349 1466
1350 /* 1467 /*
@@ -1377,39 +1494,6 @@ __xfs_get_blocks(
1377 } 1494 }
1378 } 1495 }
1379 1496
1380 /*
1381 * If this is O_DIRECT or the mpage code calling tell them how large
1382 * the mapping is, so that we can avoid repeated get_blocks calls.
1383 *
1384 * If the mapping spans EOF, then we have to break the mapping up as the
1385 * mapping for blocks beyond EOF must be marked new so that sub block
1386 * regions can be correctly zeroed. We can't do this for mappings within
1387 * EOF unless the mapping was just allocated or is unwritten, otherwise
1388 * the callers would overwrite existing data with zeros. Hence we have
1389 * to split the mapping into a range up to and including EOF, and a
1390 * second mapping for beyond EOF.
1391 */
1392 if (direct || size > (1 << inode->i_blkbits)) {
1393 xfs_off_t mapping_size;
1394
1395 mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
1396 mapping_size <<= inode->i_blkbits;
1397
1398 ASSERT(mapping_size > 0);
1399 if (mapping_size > size)
1400 mapping_size = size;
1401 if (offset < i_size_read(inode) &&
1402 offset + mapping_size >= i_size_read(inode)) {
1403 /* limit mapping to block that spans EOF */
1404 mapping_size = roundup_64(i_size_read(inode) - offset,
1405 1 << inode->i_blkbits);
1406 }
1407 if (mapping_size > LONG_MAX)
1408 mapping_size = LONG_MAX;
1409
1410 bh_result->b_size = mapping_size;
1411 }
1412
1413 return 0; 1497 return 0;
1414 1498
1415out_unlock: 1499out_unlock:
@@ -1440,9 +1524,11 @@ xfs_get_blocks_direct(
1440/* 1524/*
1441 * Complete a direct I/O write request. 1525 * Complete a direct I/O write request.
1442 * 1526 *
1443 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1527 * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
1444 * need to issue a transaction to convert the range from unwritten to written 1528 * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
1445 * extents. 1529 * wholly within the EOF and so there is nothing for us to do. Note that in this
1530 * case the completion can be called in interrupt context, whereas if we have an
1531 * ioend we will always be called in task context (i.e. from a workqueue).
1446 */ 1532 */
1447STATIC void 1533STATIC void
1448xfs_end_io_direct_write( 1534xfs_end_io_direct_write(
@@ -1454,43 +1540,71 @@ xfs_end_io_direct_write(
1454 struct inode *inode = file_inode(iocb->ki_filp); 1540 struct inode *inode = file_inode(iocb->ki_filp);
1455 struct xfs_inode *ip = XFS_I(inode); 1541 struct xfs_inode *ip = XFS_I(inode);
1456 struct xfs_mount *mp = ip->i_mount; 1542 struct xfs_mount *mp = ip->i_mount;
1543 struct xfs_ioend *ioend = private;
1457 1544
1458 if (XFS_FORCED_SHUTDOWN(mp)) 1545 trace_xfs_gbmap_direct_endio(ip, offset, size,
1546 ioend ? ioend->io_type : 0, NULL);
1547
1548 if (!ioend) {
1549 ASSERT(offset + size <= i_size_read(inode));
1459 return; 1550 return;
1551 }
1552
1553 if (XFS_FORCED_SHUTDOWN(mp))
1554 goto out_end_io;
1460 1555
1461 /* 1556 /*
1462 * While the generic direct I/O code updates the inode size, it does 1557 * dio completion end_io functions are only called on writes if more
1463 * so only after the end_io handler is called, which means our 1558 * than 0 bytes was written.
1464 * end_io handler thinks the on-disk size is outside the in-core
1465 * size. To prevent this just update it a little bit earlier here.
1466 */ 1559 */
1560 ASSERT(size > 0);
1561
1562 /*
1563 * The ioend only maps whole blocks, while the IO may be sector aligned.
1564 * Hence the ioend offset/size may not match the IO offset/size exactly.
1565 * Because we don't map overwrites within EOF into the ioend, the offset
1566 * may not match, but only if the endio spans EOF. Either way, write
1567 * the IO sizes into the ioend so that completion processing does the
1568 * right thing.
1569 */
1570 ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
1571 ioend->io_size = size;
1572 ioend->io_offset = offset;
1573
1574 /*
1575 * The ioend tells us whether we are doing unwritten extent conversion
1576 * or an append transaction that updates the on-disk file size. These
1577 * cases are the only cases where we should *potentially* be needing
1578 * to update the VFS inode size.
1579 *
1580 * We need to update the in-core inode size here so that we don't end up
1581 * with the on-disk inode size being outside the in-core inode size. We
1582 * have no other method of updating EOF for AIO, so always do it here
1583 * if necessary.
1584 *
1585 * We need to lock the test/set EOF update as we can be racing with
1586 * other IO completions here to update the EOF. Failing to serialise
1587 * here can result in EOF moving backwards and Bad Things Happen when
1588 * that occurs.
1589 */
1590 spin_lock(&ip->i_flags_lock);
1467 if (offset + size > i_size_read(inode)) 1591 if (offset + size > i_size_read(inode))
1468 i_size_write(inode, offset + size); 1592 i_size_write(inode, offset + size);
1593 spin_unlock(&ip->i_flags_lock);
1469 1594
1470 /* 1595 /*
1471 * For direct I/O we do not know if we need to allocate blocks or not, 1596 * If we are doing an append IO that needs to update the EOF on disk,
1472 * so we can't preallocate an append transaction, as that results in 1597 * do the transaction reserve now so we can use common end io
1473 * nested reservations and log space deadlocks. Hence allocate the 1598 * processing. Stashing the error (if there is one) in the ioend will
1474 * transaction here. While this is sub-optimal and can block IO 1599 * result in the ioend processing passing on the error if it is
1475 * completion for some time, we're stuck with doing it this way until 1600 * possible as we can't return it from here.
1476 * we can pass the ioend to the direct IO allocation callbacks and
1477 * avoid nesting that way.
1478 */ 1601 */
1479 if (private && size > 0) { 1602 if (ioend->io_type == XFS_IO_OVERWRITE)
1480 xfs_iomap_write_unwritten(ip, offset, size); 1603 ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
1481 } else if (offset + size > ip->i_d.di_size) {
1482 struct xfs_trans *tp;
1483 int error;
1484
1485 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1486 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
1487 if (error) {
1488 xfs_trans_cancel(tp, 0);
1489 return;
1490 }
1491 1604
1492 xfs_setfilesize(ip, tp, offset, size); 1605out_end_io:
1493 } 1606 xfs_end_io(&ioend->io_work);
1607 return;
1494} 1608}
1495 1609
1496STATIC ssize_t 1610STATIC ssize_t
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 83af4c149635..f9c1c64782d3 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -132,9 +132,10 @@ xfs_attr3_leaf_inactive(
132 int size; 132 int size;
133 int tmp; 133 int tmp;
134 int i; 134 int i;
135 struct xfs_mount *mp = bp->b_target->bt_mount;
135 136
136 leaf = bp->b_addr; 137 leaf = bp->b_addr;
137 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 138 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
138 139
139 /* 140 /*
140 * Count the number of "remote" value extents. 141 * Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index a43d370d2c58..65fb37a18e92 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -225,6 +225,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
225 int error, i; 225 int error, i;
226 struct xfs_buf *bp; 226 struct xfs_buf *bp;
227 struct xfs_inode *dp = context->dp; 227 struct xfs_inode *dp = context->dp;
228 struct xfs_mount *mp = dp->i_mount;
228 229
229 trace_xfs_attr_node_list(context); 230 trace_xfs_attr_node_list(context);
230 231
@@ -256,7 +257,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
256 case XFS_ATTR_LEAF_MAGIC: 257 case XFS_ATTR_LEAF_MAGIC:
257 case XFS_ATTR3_LEAF_MAGIC: 258 case XFS_ATTR3_LEAF_MAGIC:
258 leaf = bp->b_addr; 259 leaf = bp->b_addr;
259 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); 260 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo,
261 &leafhdr, leaf);
260 entries = xfs_attr3_leaf_entryp(leaf); 262 entries = xfs_attr3_leaf_entryp(leaf);
261 if (cursor->hashval > be32_to_cpu( 263 if (cursor->hashval > be32_to_cpu(
262 entries[leafhdr.count - 1].hashval)) { 264 entries[leafhdr.count - 1].hashval)) {
@@ -340,7 +342,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
340 xfs_trans_brelse(NULL, bp); 342 xfs_trans_brelse(NULL, bp);
341 return error; 343 return error;
342 } 344 }
343 xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf); 345 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
344 if (context->seen_enough || leafhdr.forw == 0) 346 if (context->seen_enough || leafhdr.forw == 0)
345 break; 347 break;
346 cursor->blkno = leafhdr.forw; 348 cursor->blkno = leafhdr.forw;
@@ -368,11 +370,12 @@ xfs_attr3_leaf_list_int(
368 struct xfs_attr_leaf_entry *entry; 370 struct xfs_attr_leaf_entry *entry;
369 int retval; 371 int retval;
370 int i; 372 int i;
373 struct xfs_mount *mp = context->dp->i_mount;
371 374
372 trace_xfs_attr_list_leaf(context); 375 trace_xfs_attr_list_leaf(context);
373 376
374 leaf = bp->b_addr; 377 leaf = bp->b_addr;
375 xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); 378 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
376 entries = xfs_attr3_leaf_entryp(leaf); 379 entries = xfs_attr3_leaf_entryp(leaf);
377 380
378 cursor = context->cursor; 381 cursor = context->cursor;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 22a5dcb70b32..a52bbd3abc7d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1376,22 +1376,19 @@ out:
1376} 1376}
1377 1377
1378/* 1378/*
1379 * xfs_collapse_file_space() 1379 * @next_fsb will keep track of the extent currently undergoing shift.
1380 * This routine frees disk space and shift extent for the given file. 1380 * @stop_fsb will keep track of the extent at which we have to stop.
1381 * The first thing we do is to free data blocks in the specified range 1381 * If we are shifting left, we will start with block (offset + len) and
1382 * by calling xfs_free_file_space(). It would also sync dirty data 1382 * shift each extent till last extent.
1383 * and invalidate page cache over the region on which collapse range 1383 * If we are shifting right, we will start with last extent inside file space
1384 * is working. And Shift extent records to the left to cover a hole. 1384 * and continue until we reach the block corresponding to offset.
1385 * RETURNS:
1386 * 0 on success
1387 * errno on error
1388 *
1389 */ 1385 */
1390int 1386static int
1391xfs_collapse_file_space( 1387xfs_shift_file_space(
1392 struct xfs_inode *ip, 1388 struct xfs_inode *ip,
1393 xfs_off_t offset, 1389 xfs_off_t offset,
1394 xfs_off_t len) 1390 xfs_off_t len,
1391 enum shift_direction direction)
1395{ 1392{
1396 int done = 0; 1393 int done = 0;
1397 struct xfs_mount *mp = ip->i_mount; 1394 struct xfs_mount *mp = ip->i_mount;
@@ -1400,21 +1397,26 @@ xfs_collapse_file_space(
1400 struct xfs_bmap_free free_list; 1397 struct xfs_bmap_free free_list;
1401 xfs_fsblock_t first_block; 1398 xfs_fsblock_t first_block;
1402 int committed; 1399 int committed;
1403 xfs_fileoff_t start_fsb; 1400 xfs_fileoff_t stop_fsb;
1404 xfs_fileoff_t next_fsb; 1401 xfs_fileoff_t next_fsb;
1405 xfs_fileoff_t shift_fsb; 1402 xfs_fileoff_t shift_fsb;
1406 1403
1407 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 1404 ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
1408 1405
1409 trace_xfs_collapse_file_space(ip); 1406 if (direction == SHIFT_LEFT) {
1407 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1408 stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
1409 } else {
1410 /*
1411 * If right shift, delegate the work of initialization of
1412 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
1413 */
1414 next_fsb = NULLFSBLOCK;
1415 stop_fsb = XFS_B_TO_FSB(mp, offset);
1416 }
1410 1417
1411 next_fsb = XFS_B_TO_FSB(mp, offset + len);
1412 shift_fsb = XFS_B_TO_FSB(mp, len); 1418 shift_fsb = XFS_B_TO_FSB(mp, len);
1413 1419
1414 error = xfs_free_file_space(ip, offset, len);
1415 if (error)
1416 return error;
1417
1418 /* 1420 /*
1419 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation 1421 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1420 * into the accessible region of the file. 1422 * into the accessible region of the file.
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
1427 1429
1428 /* 1430 /*
1429 * Writeback and invalidate cache for the remainder of the file as we're 1431 * Writeback and invalidate cache for the remainder of the file as we're
1430 * about to shift down every extent from the collapse range to EOF. The 1432 * about to shift down every extent from offset to EOF.
1431 * free of the collapse range above might have already done some of
1432 * this, but we shouldn't rely on it to do anything outside of the range
1433 * that was freed.
1434 */ 1433 */
1435 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 1434 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1436 offset + len, -1); 1435 offset, -1);
1437 if (error) 1436 if (error)
1438 return error; 1437 return error;
1439 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 1438 error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
1440 (offset + len) >> PAGE_CACHE_SHIFT, -1); 1439 offset >> PAGE_CACHE_SHIFT, -1);
1441 if (error) 1440 if (error)
1442 return error; 1441 return error;
1443 1442
1443 /*
1444 * The extent shiting code works on extent granularity. So, if
1445 * stop_fsb is not the starting block of extent, we need to split
1446 * the extent at stop_fsb.
1447 */
1448 if (direction == SHIFT_RIGHT) {
1449 error = xfs_bmap_split_extent(ip, stop_fsb);
1450 if (error)
1451 return error;
1452 }
1453
1444 while (!error && !done) { 1454 while (!error && !done) {
1445 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1455 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1446 /* 1456 /*
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
1464 if (error) 1474 if (error)
1465 goto out; 1475 goto out;
1466 1476
1467 xfs_trans_ijoin(tp, ip, 0); 1477 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1468 1478
1469 xfs_bmap_init(&free_list, &first_block); 1479 xfs_bmap_init(&free_list, &first_block);
1470 1480
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
1472 * We are using the write transaction in which max 2 bmbt 1482 * We are using the write transaction in which max 2 bmbt
1473 * updates are allowed 1483 * updates are allowed
1474 */ 1484 */
1475 start_fsb = next_fsb; 1485 error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
1476 error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb, 1486 &done, stop_fsb, &first_block, &free_list,
1477 &done, &next_fsb, &first_block, &free_list, 1487 direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1478 XFS_BMAP_MAX_SHIFT_EXTENTS);
1479 if (error) 1488 if (error)
1480 goto out; 1489 goto out;
1481 1490
@@ -1484,18 +1493,70 @@ xfs_collapse_file_space(
1484 goto out; 1493 goto out;
1485 1494
1486 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1495 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1487 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1488 } 1496 }
1489 1497
1490 return error; 1498 return error;
1491 1499
1492out: 1500out:
1493 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1501 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1494 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1495 return error; 1502 return error;
1496} 1503}
1497 1504
1498/* 1505/*
1506 * xfs_collapse_file_space()
1507 * This routine frees disk space and shift extent for the given file.
1508 * The first thing we do is to free data blocks in the specified range
1509 * by calling xfs_free_file_space(). It would also sync dirty data
1510 * and invalidate page cache over the region on which collapse range
1511 * is working. And Shift extent records to the left to cover a hole.
1512 * RETURNS:
1513 * 0 on success
1514 * errno on error
1515 *
1516 */
1517int
1518xfs_collapse_file_space(
1519 struct xfs_inode *ip,
1520 xfs_off_t offset,
1521 xfs_off_t len)
1522{
1523 int error;
1524
1525 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1526 trace_xfs_collapse_file_space(ip);
1527
1528 error = xfs_free_file_space(ip, offset, len);
1529 if (error)
1530 return error;
1531
1532 return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
1533}
1534
1535/*
1536 * xfs_insert_file_space()
1537 * This routine create hole space by shifting extents for the given file.
1538 * The first thing we do is to sync dirty data and invalidate page cache
1539 * over the region on which insert range is working. And split an extent
1540 * to two extents at given offset by calling xfs_bmap_split_extent.
1541 * And shift all extent records which are laying between [offset,
1542 * last allocated extent] to the right to reserve hole range.
1543 * RETURNS:
1544 * 0 on success
1545 * errno on error
1546 */
1547int
1548xfs_insert_file_space(
1549 struct xfs_inode *ip,
1550 loff_t offset,
1551 loff_t len)
1552{
1553 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1554 trace_xfs_insert_file_space(ip);
1555
1556 return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
1557}
1558
1559/*
1499 * We need to check that the format of the data fork in the temporary inode is 1560 * We need to check that the format of the data fork in the temporary inode is
1500 * valid for the target inode before doing the swap. This is not a problem with 1561 * valid for the target inode before doing the swap. This is not a problem with
1501 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1562 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
@@ -1599,13 +1660,6 @@ xfs_swap_extent_flush(
1599 /* Verify O_DIRECT for ftmp */ 1660 /* Verify O_DIRECT for ftmp */
1600 if (VFS_I(ip)->i_mapping->nrpages) 1661 if (VFS_I(ip)->i_mapping->nrpages)
1601 return -EINVAL; 1662 return -EINVAL;
1602
1603 /*
1604 * Don't try to swap extents on mmap()d files because we can't lock
1605 * out races against page faults safely.
1606 */
1607 if (mapping_mapped(VFS_I(ip)->i_mapping))
1608 return -EBUSY;
1609 return 0; 1663 return 0;
1610} 1664}
1611 1665
@@ -1633,13 +1687,14 @@ xfs_swap_extents(
1633 } 1687 }
1634 1688
1635 /* 1689 /*
1636 * Lock up the inodes against other IO and truncate to begin with. 1690 * Lock the inodes against other IO, page faults and truncate to
1637 * Then we can ensure the inodes are flushed and have no page cache 1691 * begin with. Then we can ensure the inodes are flushed and have no
1638 * safely. Once we have done this we can take the ilocks and do the rest 1692 * page cache safely. Once we have done this we can take the ilocks and
1639 * of the checks. 1693 * do the rest of the checks.
1640 */ 1694 */
1641 lock_flags = XFS_IOLOCK_EXCL; 1695 lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1642 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); 1696 xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
1697 xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
1643 1698
1644 /* Verify that both files have the same format */ 1699 /* Verify that both files have the same format */
1645 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { 1700 if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1666,8 +1721,16 @@ xfs_swap_extents(
1666 xfs_trans_cancel(tp, 0); 1721 xfs_trans_cancel(tp, 0);
1667 goto out_unlock; 1722 goto out_unlock;
1668 } 1723 }
1724
1725 /*
1726 * Lock and join the inodes to the tansaction so that transaction commit
1727 * or cancel will unlock the inodes from this point onwards.
1728 */
1669 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); 1729 xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
1670 lock_flags |= XFS_ILOCK_EXCL; 1730 lock_flags |= XFS_ILOCK_EXCL;
1731 xfs_trans_ijoin(tp, ip, lock_flags);
1732 xfs_trans_ijoin(tp, tip, lock_flags);
1733
1671 1734
1672 /* Verify all data are being swapped */ 1735 /* Verify all data are being swapped */
1673 if (sxp->sx_offset != 0 || 1736 if (sxp->sx_offset != 0 ||
@@ -1720,9 +1783,6 @@ xfs_swap_extents(
1720 goto out_trans_cancel; 1783 goto out_trans_cancel;
1721 } 1784 }
1722 1785
1723 xfs_trans_ijoin(tp, ip, lock_flags);
1724 xfs_trans_ijoin(tp, tip, lock_flags);
1725
1726 /* 1786 /*
1727 * Before we've swapped the forks, lets set the owners of the forks 1787 * Before we've swapped the forks, lets set the owners of the forks
1728 * appropriately. We have to do this as we are demand paging the btree 1788 * appropriately. We have to do this as we are demand paging the btree
@@ -1856,5 +1916,5 @@ out_unlock:
1856 1916
1857out_trans_cancel: 1917out_trans_cancel:
1858 xfs_trans_cancel(tp, 0); 1918 xfs_trans_cancel(tp, 0);
1859 goto out_unlock; 1919 goto out;
1860} 1920}
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 736429a72a12..af97d9a1dfb4 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,6 +63,8 @@ int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
63 xfs_off_t len); 63 xfs_off_t len);
64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, 64int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
65 xfs_off_t len); 65 xfs_off_t len);
66int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
67 xfs_off_t len);
66 68
67/* EOF block manipulation functions */ 69/* EOF block manipulation functions */
68bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); 70bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 507d96a57ac7..092d652bc03d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -537,9 +537,9 @@ xfs_buf_item_push(
537 537
538 /* has a previous flush failed due to IO errors? */ 538 /* has a previous flush failed due to IO errors? */
539 if ((bp->b_flags & XBF_WRITE_FAIL) && 539 if ((bp->b_flags & XBF_WRITE_FAIL) &&
540 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { 540 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
541 xfs_warn(bp->b_target->bt_mount, 541 xfs_warn(bp->b_target->bt_mount,
542"Detected failing async write on buffer block 0x%llx. Retrying async write.", 542"Failing async write on buffer block 0x%llx. Retrying async write.",
543 (long long)bp->b_bn); 543 (long long)bp->b_bn);
544 } 544 }
545 545
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 799e5a2d334d..e85a9519a5ae 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -84,7 +84,7 @@ xfs_trim_extents(
84 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); 84 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
85 if (error) 85 if (error)
86 goto out_del_cursor; 86 goto out_del_cursor;
87 XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); 87 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor);
88 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); 88 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
89 89
90 /* 90 /*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 3ee186ac1093..338e50bbfd1e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -131,7 +131,7 @@ xfs_error_report(
131{ 131{
132 if (level <= xfs_error_level) { 132 if (level <= xfs_error_level) {
133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
134 "Internal error %s at line %d of file %s. Caller %pF", 134 "Internal error %s at line %d of file %s. Caller %pS",
135 tag, linenum, filename, ra); 135 tag, linenum, filename, ra);
136 136
137 xfs_stack_trace(); 137 xfs_stack_trace();
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 279a76e52791..c0394ed126fc 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -40,25 +40,25 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
40/* 40/*
41 * Macros to set EFSCORRUPTED & return/branch. 41 * Macros to set EFSCORRUPTED & return/branch.
42 */ 42 */
43#define XFS_WANT_CORRUPTED_GOTO(x,l) \ 43#define XFS_WANT_CORRUPTED_GOTO(mp, x, l) \
44 { \ 44 { \
45 int fs_is_ok = (x); \ 45 int fs_is_ok = (x); \
46 ASSERT(fs_is_ok); \ 46 ASSERT(fs_is_ok); \
47 if (unlikely(!fs_is_ok)) { \ 47 if (unlikely(!fs_is_ok)) { \
48 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \ 48 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
49 XFS_ERRLEVEL_LOW, NULL); \ 49 XFS_ERRLEVEL_LOW, mp); \
50 error = -EFSCORRUPTED; \ 50 error = -EFSCORRUPTED; \
51 goto l; \ 51 goto l; \
52 } \ 52 } \
53 } 53 }
54 54
55#define XFS_WANT_CORRUPTED_RETURN(x) \ 55#define XFS_WANT_CORRUPTED_RETURN(mp, x) \
56 { \ 56 { \
57 int fs_is_ok = (x); \ 57 int fs_is_ok = (x); \
58 ASSERT(fs_is_ok); \ 58 ASSERT(fs_is_ok); \
59 if (unlikely(!fs_is_ok)) { \ 59 if (unlikely(!fs_is_ok)) { \
60 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \ 60 XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
61 XFS_ERRLEVEL_LOW, NULL); \ 61 XFS_ERRLEVEL_LOW, mp); \
62 return -EFSCORRUPTED; \ 62 return -EFSCORRUPTED; \
63 } \ 63 } \
64 } 64 }
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1f12ad0a8585..8121e75352ee 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -559,7 +559,7 @@ restart:
559 if (error <= 0) 559 if (error <= 0)
560 return error; 560 return error;
561 561
562 error = xfs_break_layouts(inode, iolock); 562 error = xfs_break_layouts(inode, iolock, true);
563 if (error) 563 if (error)
564 return error; 564 return error;
565 565
@@ -569,21 +569,42 @@ restart:
569 * write. If zeroing is needed and we are currently holding the 569 * write. If zeroing is needed and we are currently holding the
570 * iolock shared, we need to update it to exclusive which implies 570 * iolock shared, we need to update it to exclusive which implies
571 * having to redo all checks before. 571 * having to redo all checks before.
572 *
573 * We need to serialise against EOF updates that occur in IO
574 * completions here. We want to make sure that nobody is changing the
575 * size while we do this check until we have placed an IO barrier (i.e.
576 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
577 * The spinlock effectively forms a memory barrier once we have the
578 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
579 * and hence be able to correctly determine if we need to run zeroing.
572 */ 580 */
581 spin_lock(&ip->i_flags_lock);
573 if (iocb->ki_pos > i_size_read(inode)) { 582 if (iocb->ki_pos > i_size_read(inode)) {
574 bool zero = false; 583 bool zero = false;
575 584
585 spin_unlock(&ip->i_flags_lock);
576 if (*iolock == XFS_IOLOCK_SHARED) { 586 if (*iolock == XFS_IOLOCK_SHARED) {
577 xfs_rw_iunlock(ip, *iolock); 587 xfs_rw_iunlock(ip, *iolock);
578 *iolock = XFS_IOLOCK_EXCL; 588 *iolock = XFS_IOLOCK_EXCL;
579 xfs_rw_ilock(ip, *iolock); 589 xfs_rw_ilock(ip, *iolock);
580 iov_iter_reexpand(from, count); 590 iov_iter_reexpand(from, count);
591
592 /*
593 * We now have an IO submission barrier in place, but
594 * AIO can do EOF updates during IO completion and hence
595 * we now need to wait for all of them to drain. Non-AIO
596 * DIO will have drained before we are given the
597 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
598 * no-op.
599 */
600 inode_dio_wait(inode);
581 goto restart; 601 goto restart;
582 } 602 }
583 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); 603 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
584 if (error) 604 if (error)
585 return error; 605 return error;
586 } 606 } else
607 spin_unlock(&ip->i_flags_lock);
587 608
588 /* 609 /*
589 * Updating the timestamps will grab the ilock again from 610 * Updating the timestamps will grab the ilock again from
@@ -645,6 +666,8 @@ xfs_file_dio_aio_write(
645 int iolock; 666 int iolock;
646 size_t count = iov_iter_count(from); 667 size_t count = iov_iter_count(from);
647 loff_t pos = iocb->ki_pos; 668 loff_t pos = iocb->ki_pos;
669 loff_t end;
670 struct iov_iter data;
648 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 671 struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
649 mp->m_rtdev_targp : mp->m_ddev_targp; 672 mp->m_rtdev_targp : mp->m_ddev_targp;
650 673
@@ -685,10 +708,11 @@ xfs_file_dio_aio_write(
685 goto out; 708 goto out;
686 count = iov_iter_count(from); 709 count = iov_iter_count(from);
687 pos = iocb->ki_pos; 710 pos = iocb->ki_pos;
711 end = pos + count - 1;
688 712
689 if (mapping->nrpages) { 713 if (mapping->nrpages) {
690 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 714 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
691 pos, pos + count - 1); 715 pos, end);
692 if (ret) 716 if (ret)
693 goto out; 717 goto out;
694 /* 718 /*
@@ -698,7 +722,7 @@ xfs_file_dio_aio_write(
698 */ 722 */
699 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, 723 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
700 pos >> PAGE_CACHE_SHIFT, 724 pos >> PAGE_CACHE_SHIFT,
701 (pos + count - 1) >> PAGE_CACHE_SHIFT); 725 end >> PAGE_CACHE_SHIFT);
702 WARN_ON_ONCE(ret); 726 WARN_ON_ONCE(ret);
703 ret = 0; 727 ret = 0;
704 } 728 }
@@ -715,8 +739,22 @@ xfs_file_dio_aio_write(
715 } 739 }
716 740
717 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 741 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
718 ret = generic_file_direct_write(iocb, from, pos);
719 742
743 data = *from;
744 ret = mapping->a_ops->direct_IO(iocb, &data, pos);
745
746 /* see generic_file_direct_write() for why this is necessary */
747 if (mapping->nrpages) {
748 invalidate_inode_pages2_range(mapping,
749 pos >> PAGE_CACHE_SHIFT,
750 end >> PAGE_CACHE_SHIFT);
751 }
752
753 if (ret > 0) {
754 pos += ret;
755 iov_iter_advance(from, ret);
756 iocb->ki_pos = pos;
757 }
720out: 758out:
721 xfs_rw_iunlock(ip, iolock); 759 xfs_rw_iunlock(ip, iolock);
722 760
@@ -822,6 +860,11 @@ xfs_file_write_iter(
822 return ret; 860 return ret;
823} 861}
824 862
863#define XFS_FALLOC_FL_SUPPORTED \
864 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
865 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
866 FALLOC_FL_INSERT_RANGE)
867
825STATIC long 868STATIC long
826xfs_file_fallocate( 869xfs_file_fallocate(
827 struct file *file, 870 struct file *file,
@@ -835,18 +878,21 @@ xfs_file_fallocate(
835 enum xfs_prealloc_flags flags = 0; 878 enum xfs_prealloc_flags flags = 0;
836 uint iolock = XFS_IOLOCK_EXCL; 879 uint iolock = XFS_IOLOCK_EXCL;
837 loff_t new_size = 0; 880 loff_t new_size = 0;
881 bool do_file_insert = 0;
838 882
839 if (!S_ISREG(inode->i_mode)) 883 if (!S_ISREG(inode->i_mode))
840 return -EINVAL; 884 return -EINVAL;
841 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 885 if (mode & ~XFS_FALLOC_FL_SUPPORTED)
842 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
843 return -EOPNOTSUPP; 886 return -EOPNOTSUPP;
844 887
845 xfs_ilock(ip, iolock); 888 xfs_ilock(ip, iolock);
846 error = xfs_break_layouts(inode, &iolock); 889 error = xfs_break_layouts(inode, &iolock, false);
847 if (error) 890 if (error)
848 goto out_unlock; 891 goto out_unlock;
849 892
893 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
894 iolock |= XFS_MMAPLOCK_EXCL;
895
850 if (mode & FALLOC_FL_PUNCH_HOLE) { 896 if (mode & FALLOC_FL_PUNCH_HOLE) {
851 error = xfs_free_file_space(ip, offset, len); 897 error = xfs_free_file_space(ip, offset, len);
852 if (error) 898 if (error)
@@ -873,6 +919,27 @@ xfs_file_fallocate(
873 error = xfs_collapse_file_space(ip, offset, len); 919 error = xfs_collapse_file_space(ip, offset, len);
874 if (error) 920 if (error)
875 goto out_unlock; 921 goto out_unlock;
922 } else if (mode & FALLOC_FL_INSERT_RANGE) {
923 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
924
925 new_size = i_size_read(inode) + len;
926 if (offset & blksize_mask || len & blksize_mask) {
927 error = -EINVAL;
928 goto out_unlock;
929 }
930
931 /* check the new inode size does not wrap through zero */
932 if (new_size > inode->i_sb->s_maxbytes) {
933 error = -EFBIG;
934 goto out_unlock;
935 }
936
937 /* Offset should be less than i_size */
938 if (offset >= i_size_read(inode)) {
939 error = -EINVAL;
940 goto out_unlock;
941 }
942 do_file_insert = 1;
876 } else { 943 } else {
877 flags |= XFS_PREALLOC_SET; 944 flags |= XFS_PREALLOC_SET;
878 945
@@ -907,8 +974,19 @@ xfs_file_fallocate(
907 iattr.ia_valid = ATTR_SIZE; 974 iattr.ia_valid = ATTR_SIZE;
908 iattr.ia_size = new_size; 975 iattr.ia_size = new_size;
909 error = xfs_setattr_size(ip, &iattr); 976 error = xfs_setattr_size(ip, &iattr);
977 if (error)
978 goto out_unlock;
910 } 979 }
911 980
981 /*
982 * Perform hole insertion now that the file size has been
983 * updated so that if we crash during the operation we don't
984 * leave shifted extents past EOF and hence losing access to
985 * the data that is contained within them.
986 */
987 if (do_file_insert)
988 error = xfs_insert_file_space(ip, offset, len);
989
912out_unlock: 990out_unlock:
913 xfs_iunlock(ip, iolock); 991 xfs_iunlock(ip, iolock);
914 return error; 992 return error;
@@ -997,20 +1075,6 @@ xfs_file_mmap(
997} 1075}
998 1076
999/* 1077/*
1000 * mmap()d file has taken write protection fault and is being made
1001 * writable. We can set the page state up correctly for a writable
1002 * page, which means we can do correct delalloc accounting (ENOSPC
1003 * checking!) and unwritten extent mapping.
1004 */
1005STATIC int
1006xfs_vm_page_mkwrite(
1007 struct vm_area_struct *vma,
1008 struct vm_fault *vmf)
1009{
1010 return block_page_mkwrite(vma, vmf, xfs_get_blocks);
1011}
1012
1013/*
1014 * This type is designed to indicate the type of offset we would like 1078 * This type is designed to indicate the type of offset we would like
1015 * to search from page cache for xfs_seek_hole_data(). 1079 * to search from page cache for xfs_seek_hole_data().
1016 */ 1080 */
@@ -1385,6 +1449,55 @@ xfs_file_llseek(
1385 } 1449 }
1386} 1450}
1387 1451
1452/*
1453 * Locking for serialisation of IO during page faults. This results in a lock
1454 * ordering of:
1455 *
1456 * mmap_sem (MM)
1457 * i_mmap_lock (XFS - truncate serialisation)
1458 * page_lock (MM)
1459 * i_lock (XFS - extent map serialisation)
1460 */
1461STATIC int
1462xfs_filemap_fault(
1463 struct vm_area_struct *vma,
1464 struct vm_fault *vmf)
1465{
1466 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1467 int error;
1468
1469 trace_xfs_filemap_fault(ip);
1470
1471 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1472 error = filemap_fault(vma, vmf);
1473 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1474
1475 return error;
1476}
1477
1478/*
1479 * mmap()d file has taken write protection fault and is being made writable. We
1480 * can set the page state up correctly for a writable page, which means we can
1481 * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
1482 * mapping.
1483 */
1484STATIC int
1485xfs_filemap_page_mkwrite(
1486 struct vm_area_struct *vma,
1487 struct vm_fault *vmf)
1488{
1489 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host);
1490 int error;
1491
1492 trace_xfs_filemap_page_mkwrite(ip);
1493
1494 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1495 error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
1496 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1497
1498 return error;
1499}
1500
1388const struct file_operations xfs_file_operations = { 1501const struct file_operations xfs_file_operations = {
1389 .llseek = xfs_file_llseek, 1502 .llseek = xfs_file_llseek,
1390 .read_iter = xfs_file_read_iter, 1503 .read_iter = xfs_file_read_iter,
@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = {
1415}; 1528};
1416 1529
1417static const struct vm_operations_struct xfs_file_vm_ops = { 1530static const struct vm_operations_struct xfs_file_vm_ops = {
1418 .fault = filemap_fault, 1531 .fault = xfs_filemap_fault,
1419 .map_pages = filemap_map_pages, 1532 .map_pages = filemap_map_pages,
1420 .page_mkwrite = xfs_vm_page_mkwrite, 1533 .page_mkwrite = xfs_filemap_page_mkwrite,
1421}; 1534};
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a2e86e8a0fea..8f9f854376c6 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
322 322
323 pip = xfs_filestream_get_parent(ip); 323 pip = xfs_filestream_get_parent(ip);
324 if (!pip) 324 if (!pip)
325 goto out; 325 return NULLAGNUMBER;
326 326
327 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); 327 mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
328 if (mru) { 328 if (mru) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 74efe5b760dc..cb7e8a29dfb6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -637,12 +637,13 @@ xfs_fs_counts(
637 xfs_mount_t *mp, 637 xfs_mount_t *mp,
638 xfs_fsop_counts_t *cnt) 638 xfs_fsop_counts_t *cnt)
639{ 639{
640 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 640 cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
641 cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
642 cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
643 XFS_ALLOC_SET_ASIDE(mp);
644
641 spin_lock(&mp->m_sb_lock); 645 spin_lock(&mp->m_sb_lock);
642 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
643 cnt->freertx = mp->m_sb.sb_frextents; 646 cnt->freertx = mp->m_sb.sb_frextents;
644 cnt->freeino = mp->m_sb.sb_ifree;
645 cnt->allocino = mp->m_sb.sb_icount;
646 spin_unlock(&mp->m_sb_lock); 647 spin_unlock(&mp->m_sb_lock);
647 return 0; 648 return 0;
648} 649}
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
692 * what to do. This means that the amount of free space can 693 * what to do. This means that the amount of free space can
693 * change while we do this, so we need to retry if we end up 694 * change while we do this, so we need to retry if we end up
694 * trying to reserve more space than is available. 695 * trying to reserve more space than is available.
695 *
696 * We also use the xfs_mod_incore_sb() interface so that we
697 * don't have to care about whether per cpu counter are
698 * enabled, disabled or even compiled in....
699 */ 696 */
700retry: 697retry:
701 spin_lock(&mp->m_sb_lock); 698 spin_lock(&mp->m_sb_lock);
702 xfs_icsb_sync_counters_locked(mp, 0);
703 699
704 /* 700 /*
705 * If our previous reservation was larger than the current value, 701 * If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
716 } else { 712 } else {
717 __int64_t free; 713 __int64_t free;
718 714
719 free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 715 free = percpu_counter_sum(&mp->m_fdblocks) -
716 XFS_ALLOC_SET_ASIDE(mp);
720 if (!free) 717 if (!free)
721 goto out; /* ENOSPC and fdblks_delta = 0 */ 718 goto out; /* ENOSPC and fdblks_delta = 0 */
722 719
@@ -755,8 +752,7 @@ out:
755 * the extra reserve blocks from the reserve..... 752 * the extra reserve blocks from the reserve.....
756 */ 753 */
757 int error; 754 int error;
758 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 755 error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
759 fdblks_delta, 0);
760 if (error == -ENOSPC) 756 if (error == -ENOSPC)
761 goto retry; 757 goto retry;
762 } 758 }
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 9771b7ef62ed..76a9f2783282 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -439,11 +439,11 @@ again:
439 *ipp = ip; 439 *ipp = ip;
440 440
441 /* 441 /*
442 * If we have a real type for an on-disk inode, we can set ops(&unlock) 442 * If we have a real type for an on-disk inode, we can setup the inode
443 * now. If it's a new inode being created, xfs_ialloc will handle it. 443 * now. If it's a new inode being created, xfs_ialloc will handle it.
444 */ 444 */
445 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) 445 if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
446 xfs_setup_inode(ip); 446 xfs_setup_existing_inode(ip);
447 return 0; 447 return 0;
448 448
449out_error_or_again: 449out_error_or_again:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6163767aa856..d6ebc85192b7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
117} 117}
118 118
119/* 119/*
120 * The xfs inode contains 2 locks: a multi-reader lock called the 120 * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
121 * i_iolock and a multi-reader lock called the i_lock. This routine 121 * the i_lock. This routine allows various combinations of the locks to be
122 * allows either or both of the locks to be obtained. 122 * obtained.
123 * 123 *
124 * The 2 locks should always be ordered so that the IO lock is 124 * The 3 locks should always be ordered so that the IO lock is obtained first,
125 * obtained first in order to prevent deadlock. 125 * the mmap lock second and the ilock last in order to prevent deadlock.
126 * 126 *
127 * ip -- the inode being locked 127 * Basic locking order:
128 * lock_flags -- this parameter indicates the inode's locks 128 *
129 * to be locked. It can be: 129 * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
130 * XFS_IOLOCK_SHARED, 130 *
131 * XFS_IOLOCK_EXCL, 131 * mmap_sem locking order:
132 * XFS_ILOCK_SHARED, 132 *
133 * XFS_ILOCK_EXCL, 133 * i_iolock -> page lock -> mmap_sem
134 * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, 134 * mmap_sem -> i_mmap_lock -> page_lock
135 * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, 135 *
136 * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, 136 * The difference in mmap_sem locking order mean that we cannot hold the
137 * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL 137 * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
138 * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
139 * in get_user_pages() to map the user pages into the kernel address space for
140 * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
141 * page faults already hold the mmap_sem.
142 *
143 * Hence to serialise fully against both syscall and mmap based IO, we need to
144 * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
145 * taken in places where we need to invalidate the page cache in a race
146 * free manner (e.g. truncate, hole punch and other extent manipulation
147 * functions).
138 */ 148 */
139void 149void
140xfs_ilock( 150xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
150 */ 160 */
151 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 161 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
152 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 162 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
163 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
164 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
153 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 165 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
154 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 166 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
155 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 167 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
159 else if (lock_flags & XFS_IOLOCK_SHARED) 171 else if (lock_flags & XFS_IOLOCK_SHARED)
160 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 172 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
161 173
174 if (lock_flags & XFS_MMAPLOCK_EXCL)
175 mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
176 else if (lock_flags & XFS_MMAPLOCK_SHARED)
177 mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
178
162 if (lock_flags & XFS_ILOCK_EXCL) 179 if (lock_flags & XFS_ILOCK_EXCL)
163 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); 180 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
164 else if (lock_flags & XFS_ILOCK_SHARED) 181 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
191 */ 208 */
192 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 209 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
193 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 210 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
211 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
212 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
194 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 213 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
195 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 214 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
196 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 215 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
202 if (!mrtryaccess(&ip->i_iolock)) 221 if (!mrtryaccess(&ip->i_iolock))
203 goto out; 222 goto out;
204 } 223 }
224
225 if (lock_flags & XFS_MMAPLOCK_EXCL) {
226 if (!mrtryupdate(&ip->i_mmaplock))
227 goto out_undo_iolock;
228 } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
229 if (!mrtryaccess(&ip->i_mmaplock))
230 goto out_undo_iolock;
231 }
232
205 if (lock_flags & XFS_ILOCK_EXCL) { 233 if (lock_flags & XFS_ILOCK_EXCL) {
206 if (!mrtryupdate(&ip->i_lock)) 234 if (!mrtryupdate(&ip->i_lock))
207 goto out_undo_iolock; 235 goto out_undo_mmaplock;
208 } else if (lock_flags & XFS_ILOCK_SHARED) { 236 } else if (lock_flags & XFS_ILOCK_SHARED) {
209 if (!mrtryaccess(&ip->i_lock)) 237 if (!mrtryaccess(&ip->i_lock))
210 goto out_undo_iolock; 238 goto out_undo_mmaplock;
211 } 239 }
212 return 1; 240 return 1;
213 241
214 out_undo_iolock: 242out_undo_mmaplock:
243 if (lock_flags & XFS_MMAPLOCK_EXCL)
244 mrunlock_excl(&ip->i_mmaplock);
245 else if (lock_flags & XFS_MMAPLOCK_SHARED)
246 mrunlock_shared(&ip->i_mmaplock);
247out_undo_iolock:
215 if (lock_flags & XFS_IOLOCK_EXCL) 248 if (lock_flags & XFS_IOLOCK_EXCL)
216 mrunlock_excl(&ip->i_iolock); 249 mrunlock_excl(&ip->i_iolock);
217 else if (lock_flags & XFS_IOLOCK_SHARED) 250 else if (lock_flags & XFS_IOLOCK_SHARED)
218 mrunlock_shared(&ip->i_iolock); 251 mrunlock_shared(&ip->i_iolock);
219 out: 252out:
220 return 0; 253 return 0;
221} 254}
222 255
@@ -244,6 +277,8 @@ xfs_iunlock(
244 */ 277 */
245 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != 278 ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
246 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); 279 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
280 ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
281 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
247 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != 282 ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
248 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 283 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
249 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); 284 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
254 else if (lock_flags & XFS_IOLOCK_SHARED) 289 else if (lock_flags & XFS_IOLOCK_SHARED)
255 mrunlock_shared(&ip->i_iolock); 290 mrunlock_shared(&ip->i_iolock);
256 291
292 if (lock_flags & XFS_MMAPLOCK_EXCL)
293 mrunlock_excl(&ip->i_mmaplock);
294 else if (lock_flags & XFS_MMAPLOCK_SHARED)
295 mrunlock_shared(&ip->i_mmaplock);
296
257 if (lock_flags & XFS_ILOCK_EXCL) 297 if (lock_flags & XFS_ILOCK_EXCL)
258 mrunlock_excl(&ip->i_lock); 298 mrunlock_excl(&ip->i_lock);
259 else if (lock_flags & XFS_ILOCK_SHARED) 299 else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
271 xfs_inode_t *ip, 311 xfs_inode_t *ip,
272 uint lock_flags) 312 uint lock_flags)
273{ 313{
274 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); 314 ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
275 ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); 315 ASSERT((lock_flags &
316 ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
276 317
277 if (lock_flags & XFS_ILOCK_EXCL) 318 if (lock_flags & XFS_ILOCK_EXCL)
278 mrdemote(&ip->i_lock); 319 mrdemote(&ip->i_lock);
320 if (lock_flags & XFS_MMAPLOCK_EXCL)
321 mrdemote(&ip->i_mmaplock);
279 if (lock_flags & XFS_IOLOCK_EXCL) 322 if (lock_flags & XFS_IOLOCK_EXCL)
280 mrdemote(&ip->i_iolock); 323 mrdemote(&ip->i_iolock);
281 324
@@ -294,6 +337,12 @@ xfs_isilocked(
294 return rwsem_is_locked(&ip->i_lock.mr_lock); 337 return rwsem_is_locked(&ip->i_lock.mr_lock);
295 } 338 }
296 339
340 if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
341 if (!(lock_flags & XFS_MMAPLOCK_SHARED))
342 return !!ip->i_mmaplock.mr_writer;
343 return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
344 }
345
297 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 346 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
298 if (!(lock_flags & XFS_IOLOCK_SHARED)) 347 if (!(lock_flags & XFS_IOLOCK_SHARED))
299 return !!ip->i_iolock.mr_writer; 348 return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
314#endif 363#endif
315 364
316/* 365/*
317 * Bump the subclass so xfs_lock_inodes() acquires each lock with 366 * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
318 * a different value 367 * value. This shouldn't be called for page fault locking, but we also need to
368 * ensure we don't overrun the number of lockdep subclasses for the iolock or
369 * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
319 */ 370 */
320static inline int 371static inline int
321xfs_lock_inumorder(int lock_mode, int subclass) 372xfs_lock_inumorder(int lock_mode, int subclass)
322{ 373{
323 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 374 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
375 ASSERT(subclass + XFS_LOCK_INUMORDER <
376 (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
324 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; 377 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
378 }
379
380 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
381 ASSERT(subclass + XFS_LOCK_INUMORDER <
382 (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
383 lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
384 XFS_MMAPLOCK_SHIFT;
385 }
386
325 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) 387 if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
326 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; 388 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
327 389
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
329} 391}
330 392
331/* 393/*
332 * The following routine will lock n inodes in exclusive mode. 394 * The following routine will lock n inodes in exclusive mode. We assume the
333 * We assume the caller calls us with the inodes in i_ino order. 395 * caller calls us with the inodes in i_ino order.
334 * 396 *
335 * We need to detect deadlock where an inode that we lock 397 * We need to detect deadlock where an inode that we lock is in the AIL and we
336 * is in the AIL and we start waiting for another inode that is locked 398 * start waiting for another inode that is locked by a thread in a long running
337 * by a thread in a long running transaction (such as truncate). This can 399 * transaction (such as truncate). This can result in deadlock since the long
338 * result in deadlock since the long running trans might need to wait 400 * running trans might need to wait for the inode we just locked in order to
339 * for the inode we just locked in order to push the tail and free space 401 * push the tail and free space in the log.
340 * in the log.
341 */ 402 */
342void 403void
343xfs_lock_inodes( 404xfs_lock_inodes(
@@ -348,30 +409,27 @@ xfs_lock_inodes(
348 int attempts = 0, i, j, try_lock; 409 int attempts = 0, i, j, try_lock;
349 xfs_log_item_t *lp; 410 xfs_log_item_t *lp;
350 411
351 ASSERT(ips && (inodes >= 2)); /* we need at least two */ 412 /* currently supports between 2 and 5 inodes */
413 ASSERT(ips && inodes >= 2 && inodes <= 5);
352 414
353 try_lock = 0; 415 try_lock = 0;
354 i = 0; 416 i = 0;
355
356again: 417again:
357 for (; i < inodes; i++) { 418 for (; i < inodes; i++) {
358 ASSERT(ips[i]); 419 ASSERT(ips[i]);
359 420
360 if (i && (ips[i] == ips[i-1])) /* Already locked */ 421 if (i && (ips[i] == ips[i - 1])) /* Already locked */
361 continue; 422 continue;
362 423
363 /* 424 /*
364 * If try_lock is not set yet, make sure all locked inodes 425 * If try_lock is not set yet, make sure all locked inodes are
365 * are not in the AIL. 426 * not in the AIL. If any are, set try_lock to be used later.
366 * If any are, set try_lock to be used later.
367 */ 427 */
368
369 if (!try_lock) { 428 if (!try_lock) {
370 for (j = (i - 1); j >= 0 && !try_lock; j--) { 429 for (j = (i - 1); j >= 0 && !try_lock; j--) {
371 lp = (xfs_log_item_t *)ips[j]->i_itemp; 430 lp = (xfs_log_item_t *)ips[j]->i_itemp;
372 if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { 431 if (lp && (lp->li_flags & XFS_LI_IN_AIL))
373 try_lock++; 432 try_lock++;
374 }
375 } 433 }
376 } 434 }
377 435
@@ -381,51 +439,42 @@ again:
381 * we can't get any, we must release all we have 439 * we can't get any, we must release all we have
382 * and try again. 440 * and try again.
383 */ 441 */
442 if (!try_lock) {
443 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
444 continue;
445 }
446
447 /* try_lock means we have an inode locked that is in the AIL. */
448 ASSERT(i != 0);
449 if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
450 continue;
384 451
385 if (try_lock) { 452 /*
386 /* try_lock must be 0 if i is 0. */ 453 * Unlock all previous guys and try again. xfs_iunlock will try
454 * to push the tail if the inode is in the AIL.
455 */
456 attempts++;
457 for (j = i - 1; j >= 0; j--) {
387 /* 458 /*
388 * try_lock means we have an inode locked 459 * Check to see if we've already unlocked this one. Not
389 * that is in the AIL. 460 * the first one going back, and the inode ptr is the
461 * same.
390 */ 462 */
391 ASSERT(i != 0); 463 if (j != (i - 1) && ips[j] == ips[j + 1])
392 if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { 464 continue;
393 attempts++; 465
394 466 xfs_iunlock(ips[j], lock_mode);
395 /* 467 }
396 * Unlock all previous guys and try again.
397 * xfs_iunlock will try to push the tail
398 * if the inode is in the AIL.
399 */
400
401 for(j = i - 1; j >= 0; j--) {
402
403 /*
404 * Check to see if we've already
405 * unlocked this one.
406 * Not the first one going back,
407 * and the inode ptr is the same.
408 */
409 if ((j != (i - 1)) && ips[j] ==
410 ips[j+1])
411 continue;
412
413 xfs_iunlock(ips[j], lock_mode);
414 }
415 468
416 if ((attempts % 5) == 0) { 469 if ((attempts % 5) == 0) {
417 delay(1); /* Don't just spin the CPU */ 470 delay(1); /* Don't just spin the CPU */
418#ifdef DEBUG 471#ifdef DEBUG
419 xfs_lock_delays++; 472 xfs_lock_delays++;
420#endif 473#endif
421 }
422 i = 0;
423 try_lock = 0;
424 goto again;
425 }
426 } else {
427 xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
428 } 474 }
475 i = 0;
476 try_lock = 0;
477 goto again;
429 } 478 }
430 479
431#ifdef DEBUG 480#ifdef DEBUG
@@ -440,10 +489,10 @@ again:
440} 489}
441 490
442/* 491/*
443 * xfs_lock_two_inodes() can only be used to lock one type of lock 492 * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
444 * at a time - the iolock or the ilock, but not both at once. If 493 * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
445 * we lock both at once, lockdep will report false positives saying 494 * lock more than one at a time, lockdep will report false positives saying we
446 * we have violated locking orders. 495 * have violated locking orders.
447 */ 496 */
448void 497void
449xfs_lock_two_inodes( 498xfs_lock_two_inodes(
@@ -455,8 +504,12 @@ xfs_lock_two_inodes(
455 int attempts = 0; 504 int attempts = 0;
456 xfs_log_item_t *lp; 505 xfs_log_item_t *lp;
457 506
458 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) 507 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
459 ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); 508 ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
509 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
510 } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
511 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
512
460 ASSERT(ip0->i_ino != ip1->i_ino); 513 ASSERT(ip0->i_ino != ip1->i_ino);
461 514
462 if (ip0->i_ino > ip1->i_ino) { 515 if (ip0->i_ino > ip1->i_ino) {
@@ -818,7 +871,7 @@ xfs_ialloc(
818 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 871 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
819 xfs_trans_log_inode(tp, ip, flags); 872 xfs_trans_log_inode(tp, ip, flags);
820 873
821 /* now that we have an i_mode we can setup inode ops and unlock */ 874 /* now that we have an i_mode we can setup the inode structure */
822 xfs_setup_inode(ip); 875 xfs_setup_inode(ip);
823 876
824 *ipp = ip; 877 *ipp = ip;
@@ -1235,12 +1288,14 @@ xfs_create(
1235 xfs_trans_cancel(tp, cancel_flags); 1288 xfs_trans_cancel(tp, cancel_flags);
1236 out_release_inode: 1289 out_release_inode:
1237 /* 1290 /*
1238 * Wait until after the current transaction is aborted to 1291 * Wait until after the current transaction is aborted to finish the
1239 * release the inode. This prevents recursive transactions 1292 * setup of the inode and release the inode. This prevents recursive
1240 * and deadlocks from xfs_inactive. 1293 * transactions and deadlocks from xfs_inactive.
1241 */ 1294 */
1242 if (ip) 1295 if (ip) {
1296 xfs_finish_inode_setup(ip);
1243 IRELE(ip); 1297 IRELE(ip);
1298 }
1244 1299
1245 xfs_qm_dqrele(udqp); 1300 xfs_qm_dqrele(udqp);
1246 xfs_qm_dqrele(gdqp); 1301 xfs_qm_dqrele(gdqp);
@@ -1345,12 +1400,14 @@ xfs_create_tmpfile(
1345 xfs_trans_cancel(tp, cancel_flags); 1400 xfs_trans_cancel(tp, cancel_flags);
1346 out_release_inode: 1401 out_release_inode:
1347 /* 1402 /*
1348 * Wait until after the current transaction is aborted to 1403 * Wait until after the current transaction is aborted to finish the
1349 * release the inode. This prevents recursive transactions 1404 * setup of the inode and release the inode. This prevents recursive
1350 * and deadlocks from xfs_inactive. 1405 * transactions and deadlocks from xfs_inactive.
1351 */ 1406 */
1352 if (ip) 1407 if (ip) {
1408 xfs_finish_inode_setup(ip);
1353 IRELE(ip); 1409 IRELE(ip);
1410 }
1354 1411
1355 xfs_qm_dqrele(udqp); 1412 xfs_qm_dqrele(udqp);
1356 xfs_qm_dqrele(gdqp); 1413 xfs_qm_dqrele(gdqp);
@@ -2611,19 +2668,22 @@ xfs_remove(
2611/* 2668/*
2612 * Enter all inodes for a rename transaction into a sorted array. 2669 * Enter all inodes for a rename transaction into a sorted array.
2613 */ 2670 */
2671#define __XFS_SORT_INODES 5
2614STATIC void 2672STATIC void
2615xfs_sort_for_rename( 2673xfs_sort_for_rename(
2616 xfs_inode_t *dp1, /* in: old (source) directory inode */ 2674 struct xfs_inode *dp1, /* in: old (source) directory inode */
2617 xfs_inode_t *dp2, /* in: new (target) directory inode */ 2675 struct xfs_inode *dp2, /* in: new (target) directory inode */
2618 xfs_inode_t *ip1, /* in: inode of old entry */ 2676 struct xfs_inode *ip1, /* in: inode of old entry */
2619 xfs_inode_t *ip2, /* in: inode of new entry, if it 2677 struct xfs_inode *ip2, /* in: inode of new entry */
2620 already exists, NULL otherwise. */ 2678 struct xfs_inode *wip, /* in: whiteout inode */
2621 xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ 2679 struct xfs_inode **i_tab,/* out: sorted array of inodes */
2622 int *num_inodes) /* out: number of inodes in array */ 2680 int *num_inodes) /* in/out: inodes in array */
2623{ 2681{
2624 xfs_inode_t *temp;
2625 int i, j; 2682 int i, j;
2626 2683
2684 ASSERT(*num_inodes == __XFS_SORT_INODES);
2685 memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
2686
2627 /* 2687 /*
2628 * i_tab contains a list of pointers to inodes. We initialize 2688 * i_tab contains a list of pointers to inodes. We initialize
2629 * the table here & we'll sort it. We will then use it to 2689 * the table here & we'll sort it. We will then use it to
@@ -2631,25 +2691,24 @@ xfs_sort_for_rename(
2631 * 2691 *
2632 * Note that the table may contain duplicates. e.g., dp1 == dp2. 2692 * Note that the table may contain duplicates. e.g., dp1 == dp2.
2633 */ 2693 */
2634 i_tab[0] = dp1; 2694 i = 0;
2635 i_tab[1] = dp2; 2695 i_tab[i++] = dp1;
2636 i_tab[2] = ip1; 2696 i_tab[i++] = dp2;
2637 if (ip2) { 2697 i_tab[i++] = ip1;
2638 *num_inodes = 4; 2698 if (ip2)
2639 i_tab[3] = ip2; 2699 i_tab[i++] = ip2;
2640 } else { 2700 if (wip)
2641 *num_inodes = 3; 2701 i_tab[i++] = wip;
2642 i_tab[3] = NULL; 2702 *num_inodes = i;
2643 }
2644 2703
2645 /* 2704 /*
2646 * Sort the elements via bubble sort. (Remember, there are at 2705 * Sort the elements via bubble sort. (Remember, there are at
2647 * most 4 elements to sort, so this is adequate.) 2706 * most 5 elements to sort, so this is adequate.)
2648 */ 2707 */
2649 for (i = 0; i < *num_inodes; i++) { 2708 for (i = 0; i < *num_inodes; i++) {
2650 for (j = 1; j < *num_inodes; j++) { 2709 for (j = 1; j < *num_inodes; j++) {
2651 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { 2710 if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
2652 temp = i_tab[j]; 2711 struct xfs_inode *temp = i_tab[j];
2653 i_tab[j] = i_tab[j-1]; 2712 i_tab[j] = i_tab[j-1];
2654 i_tab[j-1] = temp; 2713 i_tab[j-1] = temp;
2655 } 2714 }
@@ -2657,6 +2716,31 @@ xfs_sort_for_rename(
2657 } 2716 }
2658} 2717}
2659 2718
2719static int
2720xfs_finish_rename(
2721 struct xfs_trans *tp,
2722 struct xfs_bmap_free *free_list)
2723{
2724 int committed = 0;
2725 int error;
2726
2727 /*
2728 * If this is a synchronous mount, make sure that the rename transaction
2729 * goes to disk before returning to the user.
2730 */
2731 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2732 xfs_trans_set_sync(tp);
2733
2734 error = xfs_bmap_finish(&tp, free_list, &committed);
2735 if (error) {
2736 xfs_bmap_cancel(free_list);
2737 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
2738 return error;
2739 }
2740
2741 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2742}
2743
2660/* 2744/*
2661 * xfs_cross_rename() 2745 * xfs_cross_rename()
2662 * 2746 *
@@ -2685,14 +2769,14 @@ xfs_cross_rename(
2685 ip2->i_ino, 2769 ip2->i_ino,
2686 first_block, free_list, spaceres); 2770 first_block, free_list, spaceres);
2687 if (error) 2771 if (error)
2688 goto out; 2772 goto out_trans_abort;
2689 2773
2690 /* Swap inode number for dirent in second parent */ 2774 /* Swap inode number for dirent in second parent */
2691 error = xfs_dir_replace(tp, dp2, name2, 2775 error = xfs_dir_replace(tp, dp2, name2,
2692 ip1->i_ino, 2776 ip1->i_ino,
2693 first_block, free_list, spaceres); 2777 first_block, free_list, spaceres);
2694 if (error) 2778 if (error)
2695 goto out; 2779 goto out_trans_abort;
2696 2780
2697 /* 2781 /*
2698 * If we're renaming one or more directories across different parents, 2782 * If we're renaming one or more directories across different parents,
@@ -2707,16 +2791,16 @@ xfs_cross_rename(
2707 dp1->i_ino, first_block, 2791 dp1->i_ino, first_block,
2708 free_list, spaceres); 2792 free_list, spaceres);
2709 if (error) 2793 if (error)
2710 goto out; 2794 goto out_trans_abort;
2711 2795
2712 /* transfer ip2 ".." reference to dp1 */ 2796 /* transfer ip2 ".." reference to dp1 */
2713 if (!S_ISDIR(ip1->i_d.di_mode)) { 2797 if (!S_ISDIR(ip1->i_d.di_mode)) {
2714 error = xfs_droplink(tp, dp2); 2798 error = xfs_droplink(tp, dp2);
2715 if (error) 2799 if (error)
2716 goto out; 2800 goto out_trans_abort;
2717 error = xfs_bumplink(tp, dp1); 2801 error = xfs_bumplink(tp, dp1);
2718 if (error) 2802 if (error)
2719 goto out; 2803 goto out_trans_abort;
2720 } 2804 }
2721 2805
2722 /* 2806 /*
@@ -2734,16 +2818,16 @@ xfs_cross_rename(
2734 dp2->i_ino, first_block, 2818 dp2->i_ino, first_block,
2735 free_list, spaceres); 2819 free_list, spaceres);
2736 if (error) 2820 if (error)
2737 goto out; 2821 goto out_trans_abort;
2738 2822
2739 /* transfer ip1 ".." reference to dp2 */ 2823 /* transfer ip1 ".." reference to dp2 */
2740 if (!S_ISDIR(ip2->i_d.di_mode)) { 2824 if (!S_ISDIR(ip2->i_d.di_mode)) {
2741 error = xfs_droplink(tp, dp1); 2825 error = xfs_droplink(tp, dp1);
2742 if (error) 2826 if (error)
2743 goto out; 2827 goto out_trans_abort;
2744 error = xfs_bumplink(tp, dp2); 2828 error = xfs_bumplink(tp, dp2);
2745 if (error) 2829 if (error)
2746 goto out; 2830 goto out_trans_abort;
2747 } 2831 }
2748 2832
2749 /* 2833 /*
@@ -2771,66 +2855,108 @@ xfs_cross_rename(
2771 } 2855 }
2772 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2856 xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2773 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); 2857 xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
2774out: 2858 return xfs_finish_rename(tp, free_list);
2859
2860out_trans_abort:
2861 xfs_bmap_cancel(free_list);
2862 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
2775 return error; 2863 return error;
2776} 2864}
2777 2865
2778/* 2866/*
2867 * xfs_rename_alloc_whiteout()
2868 *
2869 * Return a referenced, unlinked, unlocked inode that that can be used as a
2870 * whiteout in a rename transaction. We use a tmpfile inode here so that if we
2871 * crash between allocating the inode and linking it into the rename transaction
2872 * recovery will free the inode and we won't leak it.
2873 */
2874static int
2875xfs_rename_alloc_whiteout(
2876 struct xfs_inode *dp,
2877 struct xfs_inode **wip)
2878{
2879 struct xfs_inode *tmpfile;
2880 int error;
2881
2882 error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
2883 if (error)
2884 return error;
2885
2886 /* Satisfy xfs_bumplink that this is a real tmpfile */
2887 xfs_finish_inode_setup(tmpfile);
2888 VFS_I(tmpfile)->i_state |= I_LINKABLE;
2889
2890 *wip = tmpfile;
2891 return 0;
2892}
2893
2894/*
2779 * xfs_rename 2895 * xfs_rename
2780 */ 2896 */
2781int 2897int
2782xfs_rename( 2898xfs_rename(
2783 xfs_inode_t *src_dp, 2899 struct xfs_inode *src_dp,
2784 struct xfs_name *src_name, 2900 struct xfs_name *src_name,
2785 xfs_inode_t *src_ip, 2901 struct xfs_inode *src_ip,
2786 xfs_inode_t *target_dp, 2902 struct xfs_inode *target_dp,
2787 struct xfs_name *target_name, 2903 struct xfs_name *target_name,
2788 xfs_inode_t *target_ip, 2904 struct xfs_inode *target_ip,
2789 unsigned int flags) 2905 unsigned int flags)
2790{ 2906{
2791 xfs_trans_t *tp = NULL; 2907 struct xfs_mount *mp = src_dp->i_mount;
2792 xfs_mount_t *mp = src_dp->i_mount; 2908 struct xfs_trans *tp;
2793 int new_parent; /* moving to a new dir */ 2909 struct xfs_bmap_free free_list;
2794 int src_is_directory; /* src_name is a directory */ 2910 xfs_fsblock_t first_block;
2795 int error; 2911 struct xfs_inode *wip = NULL; /* whiteout inode */
2796 xfs_bmap_free_t free_list; 2912 struct xfs_inode *inodes[__XFS_SORT_INODES];
2797 xfs_fsblock_t first_block; 2913 int num_inodes = __XFS_SORT_INODES;
2798 int cancel_flags; 2914 bool new_parent = (src_dp != target_dp);
2799 int committed; 2915 bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
2800 xfs_inode_t *inodes[4]; 2916 int cancel_flags = 0;
2801 int spaceres; 2917 int spaceres;
2802 int num_inodes; 2918 int error;
2803 2919
2804 trace_xfs_rename(src_dp, target_dp, src_name, target_name); 2920 trace_xfs_rename(src_dp, target_dp, src_name, target_name);
2805 2921
2806 new_parent = (src_dp != target_dp); 2922 if ((flags & RENAME_EXCHANGE) && !target_ip)
2807 src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 2923 return -EINVAL;
2808 2924
2809 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, 2925 /*
2926 * If we are doing a whiteout operation, allocate the whiteout inode
2927 * we will be placing at the target and ensure the type is set
2928 * appropriately.
2929 */
2930 if (flags & RENAME_WHITEOUT) {
2931 ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
2932 error = xfs_rename_alloc_whiteout(target_dp, &wip);
2933 if (error)
2934 return error;
2935
2936 /* setup target dirent info as whiteout */
2937 src_name->type = XFS_DIR3_FT_CHRDEV;
2938 }
2939
2940 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
2810 inodes, &num_inodes); 2941 inodes, &num_inodes);
2811 2942
2812 xfs_bmap_init(&free_list, &first_block);
2813 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); 2943 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
2814 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2815 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 2944 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
2816 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); 2945 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
2817 if (error == -ENOSPC) { 2946 if (error == -ENOSPC) {
2818 spaceres = 0; 2947 spaceres = 0;
2819 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); 2948 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
2820 } 2949 }
2821 if (error) { 2950 if (error)
2822 xfs_trans_cancel(tp, 0); 2951 goto out_trans_cancel;
2823 goto std_return; 2952 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2824 }
2825 2953
2826 /* 2954 /*
2827 * Attach the dquots to the inodes 2955 * Attach the dquots to the inodes
2828 */ 2956 */
2829 error = xfs_qm_vop_rename_dqattach(inodes); 2957 error = xfs_qm_vop_rename_dqattach(inodes);
2830 if (error) { 2958 if (error)
2831 xfs_trans_cancel(tp, cancel_flags); 2959 goto out_trans_cancel;
2832 goto std_return;
2833 }
2834 2960
2835 /* 2961 /*
2836 * Lock all the participating inodes. Depending upon whether 2962 * Lock all the participating inodes. Depending upon whether
@@ -2851,6 +2977,8 @@ xfs_rename(
2851 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2977 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
2852 if (target_ip) 2978 if (target_ip)
2853 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 2979 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
2980 if (wip)
2981 xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
2854 2982
2855 /* 2983 /*
2856 * If we are using project inheritance, we only allow renames 2984 * If we are using project inheritance, we only allow renames
@@ -2860,24 +2988,16 @@ xfs_rename(
2860 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 2988 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
2861 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { 2989 (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
2862 error = -EXDEV; 2990 error = -EXDEV;
2863 goto error_return; 2991 goto out_trans_cancel;
2864 } 2992 }
2865 2993
2866 /* 2994 xfs_bmap_init(&free_list, &first_block);
2867 * Handle RENAME_EXCHANGE flags 2995
2868 */ 2996 /* RENAME_EXCHANGE is unique from here on. */
2869 if (flags & RENAME_EXCHANGE) { 2997 if (flags & RENAME_EXCHANGE)
2870 if (target_ip == NULL) { 2998 return xfs_cross_rename(tp, src_dp, src_name, src_ip,
2871 error = -EINVAL; 2999 target_dp, target_name, target_ip,
2872 goto error_return; 3000 &free_list, &first_block, spaceres);
2873 }
2874 error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
2875 target_dp, target_name, target_ip,
2876 &free_list, &first_block, spaceres);
2877 if (error)
2878 goto abort_return;
2879 goto finish_rename;
2880 }
2881 3001
2882 /* 3002 /*
2883 * Set up the target. 3003 * Set up the target.
@@ -2890,7 +3010,7 @@ xfs_rename(
2890 if (!spaceres) { 3010 if (!spaceres) {
2891 error = xfs_dir_canenter(tp, target_dp, target_name); 3011 error = xfs_dir_canenter(tp, target_dp, target_name);
2892 if (error) 3012 if (error)
2893 goto error_return; 3013 goto out_trans_cancel;
2894 } 3014 }
2895 /* 3015 /*
2896 * If target does not exist and the rename crosses 3016 * If target does not exist and the rename crosses
@@ -2901,9 +3021,9 @@ xfs_rename(
2901 src_ip->i_ino, &first_block, 3021 src_ip->i_ino, &first_block,
2902 &free_list, spaceres); 3022 &free_list, spaceres);
2903 if (error == -ENOSPC) 3023 if (error == -ENOSPC)
2904 goto error_return; 3024 goto out_bmap_cancel;
2905 if (error) 3025 if (error)
2906 goto abort_return; 3026 goto out_trans_abort;
2907 3027
2908 xfs_trans_ichgtime(tp, target_dp, 3028 xfs_trans_ichgtime(tp, target_dp,
2909 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3029 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2911,7 +3031,7 @@ xfs_rename(
2911 if (new_parent && src_is_directory) { 3031 if (new_parent && src_is_directory) {
2912 error = xfs_bumplink(tp, target_dp); 3032 error = xfs_bumplink(tp, target_dp);
2913 if (error) 3033 if (error)
2914 goto abort_return; 3034 goto out_trans_abort;
2915 } 3035 }
2916 } else { /* target_ip != NULL */ 3036 } else { /* target_ip != NULL */
2917 /* 3037 /*
@@ -2926,7 +3046,7 @@ xfs_rename(
2926 if (!(xfs_dir_isempty(target_ip)) || 3046 if (!(xfs_dir_isempty(target_ip)) ||
2927 (target_ip->i_d.di_nlink > 2)) { 3047 (target_ip->i_d.di_nlink > 2)) {
2928 error = -EEXIST; 3048 error = -EEXIST;
2929 goto error_return; 3049 goto out_trans_cancel;
2930 } 3050 }
2931 } 3051 }
2932 3052
@@ -2943,7 +3063,7 @@ xfs_rename(
2943 src_ip->i_ino, 3063 src_ip->i_ino,
2944 &first_block, &free_list, spaceres); 3064 &first_block, &free_list, spaceres);
2945 if (error) 3065 if (error)
2946 goto abort_return; 3066 goto out_trans_abort;
2947 3067
2948 xfs_trans_ichgtime(tp, target_dp, 3068 xfs_trans_ichgtime(tp, target_dp,
2949 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3069 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2954,7 +3074,7 @@ xfs_rename(
2954 */ 3074 */
2955 error = xfs_droplink(tp, target_ip); 3075 error = xfs_droplink(tp, target_ip);
2956 if (error) 3076 if (error)
2957 goto abort_return; 3077 goto out_trans_abort;
2958 3078
2959 if (src_is_directory) { 3079 if (src_is_directory) {
2960 /* 3080 /*
@@ -2962,7 +3082,7 @@ xfs_rename(
2962 */ 3082 */
2963 error = xfs_droplink(tp, target_ip); 3083 error = xfs_droplink(tp, target_ip);
2964 if (error) 3084 if (error)
2965 goto abort_return; 3085 goto out_trans_abort;
2966 } 3086 }
2967 } /* target_ip != NULL */ 3087 } /* target_ip != NULL */
2968 3088
@@ -2979,7 +3099,7 @@ xfs_rename(
2979 &first_block, &free_list, spaceres); 3099 &first_block, &free_list, spaceres);
2980 ASSERT(error != -EEXIST); 3100 ASSERT(error != -EEXIST);
2981 if (error) 3101 if (error)
2982 goto abort_return; 3102 goto out_trans_abort;
2983 } 3103 }
2984 3104
2985 /* 3105 /*
@@ -3005,49 +3125,67 @@ xfs_rename(
3005 */ 3125 */
3006 error = xfs_droplink(tp, src_dp); 3126 error = xfs_droplink(tp, src_dp);
3007 if (error) 3127 if (error)
3008 goto abort_return; 3128 goto out_trans_abort;
3009 } 3129 }
3010 3130
3011 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 3131 /*
3132 * For whiteouts, we only need to update the source dirent with the
3133 * inode number of the whiteout inode rather than removing it
3134 * altogether.
3135 */
3136 if (wip) {
3137 error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
3012 &first_block, &free_list, spaceres); 3138 &first_block, &free_list, spaceres);
3139 } else
3140 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
3141 &first_block, &free_list, spaceres);
3013 if (error) 3142 if (error)
3014 goto abort_return; 3143 goto out_trans_abort;
3015
3016 xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3017 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
3018 if (new_parent)
3019 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3020 3144
3021finish_rename:
3022 /* 3145 /*
3023 * If this is a synchronous mount, make sure that the 3146 * For whiteouts, we need to bump the link count on the whiteout inode.
3024 * rename transaction goes to disk before returning to 3147 * This means that failures all the way up to this point leave the inode
3025 * the user. 3148 * on the unlinked list and so cleanup is a simple matter of dropping
3149 * the remaining reference to it. If we fail here after bumping the link
3150 * count, we're shutting down the filesystem so we'll never see the
3151 * intermediate state on disk.
3026 */ 3152 */
3027 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 3153 if (wip) {
3028 xfs_trans_set_sync(tp); 3154 ASSERT(wip->i_d.di_nlink == 0);
3029 } 3155 error = xfs_bumplink(tp, wip);
3156 if (error)
3157 goto out_trans_abort;
3158 error = xfs_iunlink_remove(tp, wip);
3159 if (error)
3160 goto out_trans_abort;
3161 xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
3030 3162
3031 error = xfs_bmap_finish(&tp, &free_list, &committed); 3163 /*
3032 if (error) { 3164 * Now we have a real link, clear the "I'm a tmpfile" state
3033 xfs_bmap_cancel(&free_list); 3165 * flag from the inode so it doesn't accidentally get misused in
3034 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | 3166 * future.
3035 XFS_TRANS_ABORT)); 3167 */
3036 goto std_return; 3168 VFS_I(wip)->i_state &= ~I_LINKABLE;
3037 } 3169 }
3038 3170
3039 /* 3171 xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3040 * trans_commit will unlock src_ip, target_ip & decrement 3172 xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
3041 * the vnode references. 3173 if (new_parent)
3042 */ 3174 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
3043 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3044 3175
3045 abort_return: 3176 error = xfs_finish_rename(tp, &free_list);
3177 if (wip)
3178 IRELE(wip);
3179 return error;
3180
3181out_trans_abort:
3046 cancel_flags |= XFS_TRANS_ABORT; 3182 cancel_flags |= XFS_TRANS_ABORT;
3047 error_return: 3183out_bmap_cancel:
3048 xfs_bmap_cancel(&free_list); 3184 xfs_bmap_cancel(&free_list);
3185out_trans_cancel:
3049 xfs_trans_cancel(tp, cancel_flags); 3186 xfs_trans_cancel(tp, cancel_flags);
3050 std_return: 3187 if (wip)
3188 IRELE(wip);
3051 return error; 3189 return error;
3052} 3190}
3053 3191
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index a1cd55f3f351..8f22d20368d8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
56 struct xfs_inode_log_item *i_itemp; /* logging information */ 56 struct xfs_inode_log_item *i_itemp; /* logging information */
57 mrlock_t i_lock; /* inode lock */ 57 mrlock_t i_lock; /* inode lock */
58 mrlock_t i_iolock; /* inode IO lock */ 58 mrlock_t i_iolock; /* inode IO lock */
59 mrlock_t i_mmaplock; /* inode mmap IO lock */
59 atomic_t i_pincount; /* inode pin count */ 60 atomic_t i_pincount; /* inode pin count */
60 spinlock_t i_flags_lock; /* inode i_flags lock */ 61 spinlock_t i_flags_lock; /* inode i_flags lock */
61 /* Miscellaneous state. */ 62 /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
263#define XFS_IOLOCK_SHARED (1<<1) 264#define XFS_IOLOCK_SHARED (1<<1)
264#define XFS_ILOCK_EXCL (1<<2) 265#define XFS_ILOCK_EXCL (1<<2)
265#define XFS_ILOCK_SHARED (1<<3) 266#define XFS_ILOCK_SHARED (1<<3)
267#define XFS_MMAPLOCK_EXCL (1<<4)
268#define XFS_MMAPLOCK_SHARED (1<<5)
266 269
267#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ 270#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
268 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) 271 | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
272 | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
269 273
270#define XFS_LOCK_FLAGS \ 274#define XFS_LOCK_FLAGS \
271 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ 275 { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \
272 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ 276 { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \
273 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ 277 { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \
274 { XFS_ILOCK_SHARED, "ILOCK_SHARED" } 278 { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \
279 { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \
280 { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" }
275 281
276 282
277/* 283/*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
302#define XFS_IOLOCK_SHIFT 16 308#define XFS_IOLOCK_SHIFT 16
303#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) 309#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
304 310
311#define XFS_MMAPLOCK_SHIFT 20
312
305#define XFS_ILOCK_SHIFT 24 313#define XFS_ILOCK_SHIFT 24
306#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) 314#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
307#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) 315#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
308#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) 316#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
309 317
310#define XFS_IOLOCK_DEP_MASK 0x00ff0000 318#define XFS_IOLOCK_DEP_MASK 0x000f0000
319#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
311#define XFS_ILOCK_DEP_MASK 0xff000000 320#define XFS_ILOCK_DEP_MASK 0xff000000
312#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) 321#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
322 XFS_MMAPLOCK_DEP_MASK | \
323 XFS_ILOCK_DEP_MASK)
313 324
314#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) 325#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \
315#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) 326 >> XFS_IOLOCK_SHIFT)
327#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
328 >> XFS_MMAPLOCK_SHIFT)
329#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \
330 >> XFS_ILOCK_SHIFT)
316 331
317/* 332/*
318 * For multiple groups support: if S_ISGID bit is set in the parent 333 * For multiple groups support: if S_ISGID bit is set in the parent
@@ -391,6 +406,28 @@ int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
391int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); 406int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
392 407
393 408
409/* from xfs_iops.c */
410/*
411 * When setting up a newly allocated inode, we need to call
412 * xfs_finish_inode_setup() once the inode is fully instantiated at
413 * the VFS level to prevent the rest of the world seeing the inode
414 * before we've completed instantiation. Otherwise we can do it
415 * the moment the inode lookup is complete.
416 */
417extern void xfs_setup_inode(struct xfs_inode *ip);
418static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
419{
420 xfs_iflags_clear(ip, XFS_INEW);
421 barrier();
422 unlock_new_inode(VFS_I(ip));
423}
424
425static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
426{
427 xfs_setup_inode(ip);
428 xfs_finish_inode_setup(ip);
429}
430
394#define IHOLD(ip) \ 431#define IHOLD(ip) \
395do { \ 432do { \
396 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 433 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ac4feae45eb3..5f4a396f5186 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -631,7 +631,7 @@ xfs_ioc_space(
631 631
632 if (filp->f_flags & O_DSYNC) 632 if (filp->f_flags & O_DSYNC)
633 flags |= XFS_PREALLOC_SYNC; 633 flags |= XFS_PREALLOC_SYNC;
634 if (ioflags & XFS_IO_INVIS) 634 if (ioflags & XFS_IO_INVIS)
635 flags |= XFS_PREALLOC_INVISIBLE; 635 flags |= XFS_PREALLOC_INVISIBLE;
636 636
637 error = mnt_want_write_file(filp); 637 error = mnt_want_write_file(filp);
@@ -639,10 +639,13 @@ xfs_ioc_space(
639 return error; 639 return error;
640 640
641 xfs_ilock(ip, iolock); 641 xfs_ilock(ip, iolock);
642 error = xfs_break_layouts(inode, &iolock); 642 error = xfs_break_layouts(inode, &iolock, false);
643 if (error) 643 if (error)
644 goto out_unlock; 644 goto out_unlock;
645 645
646 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
647 iolock |= XFS_MMAPLOCK_EXCL;
648
646 switch (bf->l_whence) { 649 switch (bf->l_whence) {
647 case 0: /*SEEK_SET*/ 650 case 0: /*SEEK_SET*/
648 break; 651 break;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ccb1dd0d509e..38e633bad8c2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
460 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), 460 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
461 alloc_blocks); 461 alloc_blocks);
462 462
463 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 463 freesp = percpu_counter_read_positive(&mp->m_fdblocks);
464 freesp = mp->m_sb.sb_fdblocks;
465 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 464 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
466 shift = 2; 465 shift = 2;
467 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 466 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index e53a90331422..2f1839e4dd1b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -187,6 +187,8 @@ xfs_generic_create(
187 else 187 else
188 d_instantiate(dentry, inode); 188 d_instantiate(dentry, inode);
189 189
190 xfs_finish_inode_setup(ip);
191
190 out_free_acl: 192 out_free_acl:
191 if (default_acl) 193 if (default_acl)
192 posix_acl_release(default_acl); 194 posix_acl_release(default_acl);
@@ -195,6 +197,7 @@ xfs_generic_create(
195 return error; 197 return error;
196 198
197 out_cleanup_inode: 199 out_cleanup_inode:
200 xfs_finish_inode_setup(ip);
198 if (!tmpfile) 201 if (!tmpfile)
199 xfs_cleanup_inode(dir, inode, dentry); 202 xfs_cleanup_inode(dir, inode, dentry);
200 iput(inode); 203 iput(inode);
@@ -367,9 +370,11 @@ xfs_vn_symlink(
367 goto out_cleanup_inode; 370 goto out_cleanup_inode;
368 371
369 d_instantiate(dentry, inode); 372 d_instantiate(dentry, inode);
373 xfs_finish_inode_setup(cip);
370 return 0; 374 return 0;
371 375
372 out_cleanup_inode: 376 out_cleanup_inode:
377 xfs_finish_inode_setup(cip);
373 xfs_cleanup_inode(dir, inode, dentry); 378 xfs_cleanup_inode(dir, inode, dentry);
374 iput(inode); 379 iput(inode);
375 out: 380 out:
@@ -389,7 +394,7 @@ xfs_vn_rename(
389 struct xfs_name oname; 394 struct xfs_name oname;
390 struct xfs_name nname; 395 struct xfs_name nname;
391 396
392 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 397 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
393 return -EINVAL; 398 return -EINVAL;
394 399
395 /* if we are exchanging files, we need to set i_mode of both files */ 400 /* if we are exchanging files, we need to set i_mode of both files */
@@ -766,6 +771,7 @@ xfs_setattr_size(
766 return error; 771 return error;
767 772
768 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 773 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
774 ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
769 ASSERT(S_ISREG(ip->i_d.di_mode)); 775 ASSERT(S_ISREG(ip->i_d.di_mode));
770 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 776 ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
771 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); 777 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -829,55 +835,27 @@ xfs_setattr_size(
829 inode_dio_wait(inode); 835 inode_dio_wait(inode);
830 836
831 /* 837 /*
832 * Do all the page cache truncate work outside the transaction context 838 * We've already locked out new page faults, so now we can safely remove
833 * as the "lock" order is page lock->log space reservation. i.e. 839 * pages from the page cache knowing they won't get refaulted until we
834 * locking pages inside the transaction can ABBA deadlock with 840 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
835 * writeback. We have to do the VFS inode size update before we truncate 841 * complete. The truncate_setsize() call also cleans partial EOF page
836 * the pagecache, however, to avoid racing with page faults beyond the 842 * PTEs on extending truncates and hence ensures sub-page block size
837 * new EOF they are not serialised against truncate operations except by 843 * filesystems are correctly handled, too.
838 * page locks and size updates.
839 * 844 *
840 * Hence we are in a situation where a truncate can fail with ENOMEM 845 * We have to do all the page cache truncate work outside the
841 * from xfs_trans_reserve(), but having already truncated the in-memory 846 * transaction context as the "lock" order is page lock->log space
842 * version of the file (i.e. made user visible changes). There's not 847 * reservation as defined by extent allocation in the writeback path.
843 * much we can do about this, except to hope that the caller sees ENOMEM 848 * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
844 * and retries the truncate operation. 849 * having already truncated the in-memory version of the file (i.e. made
850 * user visible changes). There's not much we can do about this, except
851 * to hope that the caller sees ENOMEM and retries the truncate
852 * operation.
845 */ 853 */
846 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 854 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
847 if (error) 855 if (error)
848 return error; 856 return error;
849 truncate_setsize(inode, newsize); 857 truncate_setsize(inode, newsize);
850 858
851 /*
852 * The "we can't serialise against page faults" pain gets worse.
853 *
854 * If the file is mapped then we have to clean the page at the old EOF
855 * when extending the file. Extending the file can expose changes the
856 * underlying page mapping (e.g. from beyond EOF to a hole or
857 * unwritten), and so on the next attempt to write to that page we need
858 * to remap it for write. i.e. we need .page_mkwrite() to be called.
859 * Hence we need to clean the page to clean the pte and so a new write
860 * fault will be triggered appropriately.
861 *
862 * If we do it before we change the inode size, then we can race with a
863 * page fault that maps the page with exactly the same problem. If we do
864 * it after we change the file size, then a new page fault can come in
865 * and allocate space before we've run the rest of the truncate
866 * transaction. That's kinda grotesque, but it's better than have data
867 * over a hole, and so that's the lesser evil that has been chosen here.
868 *
869 * The real solution, however, is to have some mechanism for locking out
870 * page faults while a truncate is in progress.
871 */
872 if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
873 error = filemap_write_and_wait_range(
874 VFS_I(ip)->i_mapping,
875 round_down(oldsize, PAGE_CACHE_SIZE),
876 round_up(oldsize, PAGE_CACHE_SIZE) - 1);
877 if (error)
878 return error;
879 }
880
881 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 859 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
882 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 860 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
883 if (error) 861 if (error)
@@ -975,9 +953,13 @@ xfs_vn_setattr(
975 uint iolock = XFS_IOLOCK_EXCL; 953 uint iolock = XFS_IOLOCK_EXCL;
976 954
977 xfs_ilock(ip, iolock); 955 xfs_ilock(ip, iolock);
978 error = xfs_break_layouts(dentry->d_inode, &iolock); 956 error = xfs_break_layouts(dentry->d_inode, &iolock, true);
979 if (!error) 957 if (!error) {
958 xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
959 iolock |= XFS_MMAPLOCK_EXCL;
960
980 error = xfs_setattr_size(ip, iattr); 961 error = xfs_setattr_size(ip, iattr);
962 }
981 xfs_iunlock(ip, iolock); 963 xfs_iunlock(ip, iolock);
982 } else { 964 } else {
983 error = xfs_setattr_nonsize(ip, iattr, 0); 965 error = xfs_setattr_nonsize(ip, iattr, 0);
@@ -1228,16 +1210,12 @@ xfs_diflags_to_iflags(
1228} 1210}
1229 1211
1230/* 1212/*
1231 * Initialize the Linux inode, set up the operation vectors and 1213 * Initialize the Linux inode and set up the operation vectors.
1232 * unlock the inode.
1233 * 1214 *
1234 * When reading existing inodes from disk this is called directly 1215 * When reading existing inodes from disk this is called directly from xfs_iget,
1235 * from xfs_iget, when creating a new inode it is called from 1216 * when creating a new inode it is called from xfs_ialloc after setting up the
1236 * xfs_ialloc after setting up the inode. 1217 * inode. These callers have different criteria for clearing XFS_INEW, so leave
1237 * 1218 * it up to the caller to deal with unlocking the inode appropriately.
1238 * We are always called with an uninitialised linux inode here.
1239 * We need to initialise the necessary fields and take a reference
1240 * on it.
1241 */ 1219 */
1242void 1220void
1243xfs_setup_inode( 1221xfs_setup_inode(
@@ -1324,9 +1302,4 @@ xfs_setup_inode(
1324 inode_has_no_xattr(inode); 1302 inode_has_no_xattr(inode);
1325 cache_no_acl(inode); 1303 cache_no_acl(inode);
1326 } 1304 }
1327
1328 xfs_iflags_clear(ip, XFS_INEW);
1329 barrier();
1330
1331 unlock_new_inode(inode);
1332} 1305}
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index ea7a98e9cb70..a0f84abb0d09 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations;
25 25
26extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); 26extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
27 27
28extern void xfs_setup_inode(struct xfs_inode *);
29
30/* 28/*
31 * Internal setattr interfaces. 29 * Internal setattr interfaces.
32 */ 30 */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 82e314258f73..80429891dc9b 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -229,7 +229,7 @@ xfs_bulkstat_grab_ichunk(
229 error = xfs_inobt_get_rec(cur, irec, &stat); 229 error = xfs_inobt_get_rec(cur, irec, &stat);
230 if (error) 230 if (error)
231 return error; 231 return error;
232 XFS_WANT_CORRUPTED_RETURN(stat == 1); 232 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
233 233
234 /* Check if the record contains the inode in request */ 234 /* Check if the record contains the inode in request */
235 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { 235 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index c31d2c2eadc4..7c7842c85a08 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
116#undef XFS_NATIVE_HOST 116#undef XFS_NATIVE_HOST
117#endif 117#endif
118 118
119/*
120 * Feature macros (disable/enable)
121 */
122#ifdef CONFIG_SMP
123#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
124#else
125#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
126#endif
127
128#define irix_sgid_inherit xfs_params.sgid_inherit.val 119#define irix_sgid_inherit xfs_params.sgid_inherit.val
129#define irix_symlink_mode xfs_params.symlink_mode.val 120#define irix_symlink_mode xfs_params.symlink_mode.val
130#define xfs_panic_mask xfs_params.panic_mask.val 121#define xfs_panic_mask xfs_params.panic_mask.val
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a5a945fc3bdc..4f5784f85a5b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4463,10 +4463,10 @@ xlog_do_recover(
4463 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); 4463 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
4464 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); 4464 ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
4465 ASSERT(xfs_sb_good_version(sbp)); 4465 ASSERT(xfs_sb_good_version(sbp));
4466 xfs_reinit_percpu_counters(log->l_mp);
4467
4466 xfs_buf_relse(bp); 4468 xfs_buf_relse(bp);
4467 4469
4468 /* We've re-read the superblock so re-initialize per-cpu counters */
4469 xfs_icsb_reinit_counters(log->l_mp);
4470 4470
4471 xlog_recover_check_summary(log); 4471 xlog_recover_check_summary(log);
4472 4472
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fa80e63eea2..2ce7ee3b4ec1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
43#include "xfs_sysfs.h" 43#include "xfs_sysfs.h"
44 44
45 45
46#ifdef HAVE_PERCPU_SB
47STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
48 int);
49STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
50 int);
51STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
52#else
53
54#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
55#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
56#endif
57
58static DEFINE_MUTEX(xfs_uuid_table_mutex); 46static DEFINE_MUTEX(xfs_uuid_table_mutex);
59static int xfs_uuid_table_size; 47static int xfs_uuid_table_size;
60static uuid_t *xfs_uuid_table; 48static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
347 goto reread; 335 goto reread;
348 } 336 }
349 337
350 /* Initialize per-cpu counters */ 338 xfs_reinit_percpu_counters(mp);
351 xfs_icsb_reinit_counters(mp);
352 339
353 /* no need to be quiet anymore, so reset the buf ops */ 340 /* no need to be quiet anymore, so reset the buf ops */
354 bp->b_ops = &xfs_sb_buf_ops; 341 bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
1087 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) 1074 if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1088 return 0; 1075 return 0;
1089 1076
1090 xfs_icsb_sync_counters(mp, 0);
1091
1092 /* 1077 /*
1093 * we don't need to do this if we are updating the superblock 1078 * we don't need to do this if we are updating the superblock
1094 * counters on every modification. 1079 * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
1099 return xfs_sync_sb(mp, true); 1084 return xfs_sync_sb(mp, true);
1100} 1085}
1101 1086
1102/* 1087int
1103 * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply 1088xfs_mod_icount(
1104 * a delta to a specified field in the in-core superblock. Simply 1089 struct xfs_mount *mp,
1105 * switch on the field indicated and apply the delta to that field. 1090 int64_t delta)
1106 * Fields are not allowed to dip below zero, so if the delta would
1107 * do this do not apply it and return EINVAL.
1108 *
1109 * The m_sb_lock must be held when this routine is called.
1110 */
1111STATIC int
1112xfs_mod_incore_sb_unlocked(
1113 xfs_mount_t *mp,
1114 xfs_sb_field_t field,
1115 int64_t delta,
1116 int rsvd)
1117{ 1091{
1118 int scounter; /* short counter for 32 bit fields */ 1092 /* deltas are +/-64, hence the large batch size of 128. */
1119 long long lcounter; /* long counter for 64 bit fields */ 1093 __percpu_counter_add(&mp->m_icount, delta, 128);
1120 long long res_used, rem; 1094 if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
1121
1122 /*
1123 * With the in-core superblock spin lock held, switch
1124 * on the indicated field. Apply the delta to the
1125 * proper field. If the fields value would dip below
1126 * 0, then do not apply the delta and return EINVAL.
1127 */
1128 switch (field) {
1129 case XFS_SBS_ICOUNT:
1130 lcounter = (long long)mp->m_sb.sb_icount;
1131 lcounter += delta;
1132 if (lcounter < 0) {
1133 ASSERT(0);
1134 return -EINVAL;
1135 }
1136 mp->m_sb.sb_icount = lcounter;
1137 return 0;
1138 case XFS_SBS_IFREE:
1139 lcounter = (long long)mp->m_sb.sb_ifree;
1140 lcounter += delta;
1141 if (lcounter < 0) {
1142 ASSERT(0);
1143 return -EINVAL;
1144 }
1145 mp->m_sb.sb_ifree = lcounter;
1146 return 0;
1147 case XFS_SBS_FDBLOCKS:
1148 lcounter = (long long)
1149 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1150 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1151
1152 if (delta > 0) { /* Putting blocks back */
1153 if (res_used > delta) {
1154 mp->m_resblks_avail += delta;
1155 } else {
1156 rem = delta - res_used;
1157 mp->m_resblks_avail = mp->m_resblks;
1158 lcounter += rem;
1159 }
1160 } else { /* Taking blocks away */
1161 lcounter += delta;
1162 if (lcounter >= 0) {
1163 mp->m_sb.sb_fdblocks = lcounter +
1164 XFS_ALLOC_SET_ASIDE(mp);
1165 return 0;
1166 }
1167
1168 /*
1169 * We are out of blocks, use any available reserved
1170 * blocks if were allowed to.
1171 */
1172 if (!rsvd)
1173 return -ENOSPC;
1174
1175 lcounter = (long long)mp->m_resblks_avail + delta;
1176 if (lcounter >= 0) {
1177 mp->m_resblks_avail = lcounter;
1178 return 0;
1179 }
1180 printk_once(KERN_WARNING
1181 "Filesystem \"%s\": reserve blocks depleted! "
1182 "Consider increasing reserve pool size.",
1183 mp->m_fsname);
1184 return -ENOSPC;
1185 }
1186
1187 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1188 return 0;
1189 case XFS_SBS_FREXTENTS:
1190 lcounter = (long long)mp->m_sb.sb_frextents;
1191 lcounter += delta;
1192 if (lcounter < 0) {
1193 return -ENOSPC;
1194 }
1195 mp->m_sb.sb_frextents = lcounter;
1196 return 0;
1197 case XFS_SBS_DBLOCKS:
1198 lcounter = (long long)mp->m_sb.sb_dblocks;
1199 lcounter += delta;
1200 if (lcounter < 0) {
1201 ASSERT(0);
1202 return -EINVAL;
1203 }
1204 mp->m_sb.sb_dblocks = lcounter;
1205 return 0;
1206 case XFS_SBS_AGCOUNT:
1207 scounter = mp->m_sb.sb_agcount;
1208 scounter += delta;
1209 if (scounter < 0) {
1210 ASSERT(0);
1211 return -EINVAL;
1212 }
1213 mp->m_sb.sb_agcount = scounter;
1214 return 0;
1215 case XFS_SBS_IMAX_PCT:
1216 scounter = mp->m_sb.sb_imax_pct;
1217 scounter += delta;
1218 if (scounter < 0) {
1219 ASSERT(0);
1220 return -EINVAL;
1221 }
1222 mp->m_sb.sb_imax_pct = scounter;
1223 return 0;
1224 case XFS_SBS_REXTSIZE:
1225 scounter = mp->m_sb.sb_rextsize;
1226 scounter += delta;
1227 if (scounter < 0) {
1228 ASSERT(0);
1229 return -EINVAL;
1230 }
1231 mp->m_sb.sb_rextsize = scounter;
1232 return 0;
1233 case XFS_SBS_RBMBLOCKS:
1234 scounter = mp->m_sb.sb_rbmblocks;
1235 scounter += delta;
1236 if (scounter < 0) {
1237 ASSERT(0);
1238 return -EINVAL;
1239 }
1240 mp->m_sb.sb_rbmblocks = scounter;
1241 return 0;
1242 case XFS_SBS_RBLOCKS:
1243 lcounter = (long long)mp->m_sb.sb_rblocks;
1244 lcounter += delta;
1245 if (lcounter < 0) {
1246 ASSERT(0);
1247 return -EINVAL;
1248 }
1249 mp->m_sb.sb_rblocks = lcounter;
1250 return 0;
1251 case XFS_SBS_REXTENTS:
1252 lcounter = (long long)mp->m_sb.sb_rextents;
1253 lcounter += delta;
1254 if (lcounter < 0) {
1255 ASSERT(0);
1256 return -EINVAL;
1257 }
1258 mp->m_sb.sb_rextents = lcounter;
1259 return 0;
1260 case XFS_SBS_REXTSLOG:
1261 scounter = mp->m_sb.sb_rextslog;
1262 scounter += delta;
1263 if (scounter < 0) {
1264 ASSERT(0);
1265 return -EINVAL;
1266 }
1267 mp->m_sb.sb_rextslog = scounter;
1268 return 0;
1269 default:
1270 ASSERT(0); 1095 ASSERT(0);
1096 percpu_counter_add(&mp->m_icount, -delta);
1271 return -EINVAL; 1097 return -EINVAL;
1272 } 1098 }
1099 return 0;
1273} 1100}
1274 1101
1275/*
1276 * xfs_mod_incore_sb() is used to change a field in the in-core
1277 * superblock structure by the specified delta. This modification
1278 * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked()
1279 * routine to do the work.
1280 */
1281int 1102int
1282xfs_mod_incore_sb( 1103xfs_mod_ifree(
1283 struct xfs_mount *mp, 1104 struct xfs_mount *mp,
1284 xfs_sb_field_t field, 1105 int64_t delta)
1285 int64_t delta,
1286 int rsvd)
1287{ 1106{
1288 int status; 1107 percpu_counter_add(&mp->m_ifree, delta);
1289 1108 if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
1290#ifdef HAVE_PERCPU_SB 1109 ASSERT(0);
1291 ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); 1110 percpu_counter_add(&mp->m_ifree, -delta);
1292#endif 1111 return -EINVAL;
1293 spin_lock(&mp->m_sb_lock); 1112 }
1294 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1113 return 0;
1295 spin_unlock(&mp->m_sb_lock);
1296
1297 return status;
1298} 1114}
1299 1115
1300/*
1301 * Change more than one field in the in-core superblock structure at a time.
1302 *
1303 * The fields and changes to those fields are specified in the array of
1304 * xfs_mod_sb structures passed in. Either all of the specified deltas
1305 * will be applied or none of them will. If any modified field dips below 0,
1306 * then all modifications will be backed out and EINVAL will be returned.
1307 *
1308 * Note that this function may not be used for the superblock values that
1309 * are tracked with the in-memory per-cpu counters - a direct call to
1310 * xfs_icsb_modify_counters is required for these.
1311 */
1312int 1116int
1313xfs_mod_incore_sb_batch( 1117xfs_mod_fdblocks(
1314 struct xfs_mount *mp, 1118 struct xfs_mount *mp,
1315 xfs_mod_sb_t *msb, 1119 int64_t delta,
1316 uint nmsb, 1120 bool rsvd)
1317 int rsvd)
1318{ 1121{
1319 xfs_mod_sb_t *msbp; 1122 int64_t lcounter;
1320 int error = 0; 1123 long long res_used;
1124 s32 batch;
1125
1126 if (delta > 0) {
1127 /*
1128 * If the reserve pool is depleted, put blocks back into it
1129 * first. Most of the time the pool is full.
1130 */
1131 if (likely(mp->m_resblks == mp->m_resblks_avail)) {
1132 percpu_counter_add(&mp->m_fdblocks, delta);
1133 return 0;
1134 }
1135
1136 spin_lock(&mp->m_sb_lock);
1137 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1138
1139 if (res_used > delta) {
1140 mp->m_resblks_avail += delta;
1141 } else {
1142 delta -= res_used;
1143 mp->m_resblks_avail = mp->m_resblks;
1144 percpu_counter_add(&mp->m_fdblocks, delta);
1145 }
1146 spin_unlock(&mp->m_sb_lock);
1147 return 0;
1148 }
1321 1149
1322 /* 1150 /*
1323 * Loop through the array of mod structures and apply each individually. 1151 * Taking blocks away, need to be more accurate the closer we
1324 * If any fail, then back out all those which have already been applied. 1152 * are to zero.
1325 * Do all of this within the scope of the m_sb_lock so that all of the 1153 *
1326 * changes will be atomic. 1154 * batch size is set to a maximum of 1024 blocks - if we are
1155 * allocating of freeing extents larger than this then we aren't
1156 * going to be hammering the counter lock so a lock per update
1157 * is not a problem.
1158 *
1159 * If the counter has a value of less than 2 * max batch size,
1160 * then make everything serialise as we are real close to
1161 * ENOSPC.
1162 */
1163#define __BATCH 1024
1164 if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
1165 batch = 1;
1166 else
1167 batch = __BATCH;
1168#undef __BATCH
1169
1170 __percpu_counter_add(&mp->m_fdblocks, delta, batch);
1171 if (percpu_counter_compare(&mp->m_fdblocks,
1172 XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
1173 /* we had space! */
1174 return 0;
1175 }
1176
1177 /*
1178 * lock up the sb for dipping into reserves before releasing the space
1179 * that took us to ENOSPC.
1327 */ 1180 */
1328 spin_lock(&mp->m_sb_lock); 1181 spin_lock(&mp->m_sb_lock);
1329 for (msbp = msb; msbp < (msb + nmsb); msbp++) { 1182 percpu_counter_add(&mp->m_fdblocks, -delta);
1330 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1183 if (!rsvd)
1331 msbp->msb_field > XFS_SBS_FDBLOCKS); 1184 goto fdblocks_enospc;
1332 1185
1333 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1186 lcounter = (long long)mp->m_resblks_avail + delta;
1334 msbp->msb_delta, rsvd); 1187 if (lcounter >= 0) {
1335 if (error) 1188 mp->m_resblks_avail = lcounter;
1336 goto unwind; 1189 spin_unlock(&mp->m_sb_lock);
1190 return 0;
1337 } 1191 }
1192 printk_once(KERN_WARNING
1193 "Filesystem \"%s\": reserve blocks depleted! "
1194 "Consider increasing reserve pool size.",
1195 mp->m_fsname);
1196fdblocks_enospc:
1338 spin_unlock(&mp->m_sb_lock); 1197 spin_unlock(&mp->m_sb_lock);
1339 return 0; 1198 return -ENOSPC;
1199}
1340 1200
1341unwind: 1201int
1342 while (--msbp >= msb) { 1202xfs_mod_frextents(
1343 error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1203 struct xfs_mount *mp,
1344 -msbp->msb_delta, rsvd); 1204 int64_t delta)
1345 ASSERT(error == 0); 1205{
1346 } 1206 int64_t lcounter;
1207 int ret = 0;
1208
1209 spin_lock(&mp->m_sb_lock);
1210 lcounter = mp->m_sb.sb_frextents + delta;
1211 if (lcounter < 0)
1212 ret = -ENOSPC;
1213 else
1214 mp->m_sb.sb_frextents = lcounter;
1347 spin_unlock(&mp->m_sb_lock); 1215 spin_unlock(&mp->m_sb_lock);
1348 return error; 1216 return ret;
1349} 1217}
1350 1218
1351/* 1219/*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
1407 } 1275 }
1408 return 0; 1276 return 0;
1409} 1277}
1410
1411#ifdef HAVE_PERCPU_SB
1412/*
1413 * Per-cpu incore superblock counters
1414 *
1415 * Simple concept, difficult implementation
1416 *
1417 * Basically, replace the incore superblock counters with a distributed per cpu
1418 * counter for contended fields (e.g. free block count).
1419 *
1420 * Difficulties arise in that the incore sb is used for ENOSPC checking, and
1421 * hence needs to be accurately read when we are running low on space. Hence
1422 * there is a method to enable and disable the per-cpu counters based on how
1423 * much "stuff" is available in them.
1424 *
1425 * Basically, a counter is enabled if there is enough free resource to justify
1426 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
1427 * ENOSPC), then we disable the counters to synchronise all callers and
1428 * re-distribute the available resources.
1429 *
1430 * If, once we redistributed the available resources, we still get a failure,
1431 * we disable the per-cpu counter and go through the slow path.
1432 *
1433 * The slow path is the current xfs_mod_incore_sb() function. This means that
1434 * when we disable a per-cpu counter, we need to drain its resources back to
1435 * the global superblock. We do this after disabling the counter to prevent
1436 * more threads from queueing up on the counter.
1437 *
1438 * Essentially, this means that we still need a lock in the fast path to enable
1439 * synchronisation between the global counters and the per-cpu counters. This
1440 * is not a problem because the lock will be local to a CPU almost all the time
1441 * and have little contention except when we get to ENOSPC conditions.
1442 *
1443 * Basically, this lock becomes a barrier that enables us to lock out the fast
1444 * path while we do things like enabling and disabling counters and
1445 * synchronising the counters.
1446 *
1447 * Locking rules:
1448 *
1449 * 1. m_sb_lock before picking up per-cpu locks
1450 * 2. per-cpu locks always picked up via for_each_online_cpu() order
1451 * 3. accurate counter sync requires m_sb_lock + per cpu locks
1452 * 4. modifying per-cpu counters requires holding per-cpu lock
1453 * 5. modifying global counters requires holding m_sb_lock
1454 * 6. enabling or disabling a counter requires holding the m_sb_lock
1455 * and _none_ of the per-cpu locks.
1456 *
1457 * Disabled counters are only ever re-enabled by a balance operation
1458 * that results in more free resources per CPU than a given threshold.
1459 * To ensure counters don't remain disabled, they are rebalanced when
1460 * the global resource goes above a higher threshold (i.e. some hysteresis
1461 * is present to prevent thrashing).
1462 */
1463
1464#ifdef CONFIG_HOTPLUG_CPU
1465/*
1466 * hot-plug CPU notifier support.
1467 *
1468 * We need a notifier per filesystem as we need to be able to identify
1469 * the filesystem to balance the counters out. This is achieved by
1470 * having a notifier block embedded in the xfs_mount_t and doing pointer
1471 * magic to get the mount pointer from the notifier block address.
1472 */
1473STATIC int
1474xfs_icsb_cpu_notify(
1475 struct notifier_block *nfb,
1476 unsigned long action,
1477 void *hcpu)
1478{
1479 xfs_icsb_cnts_t *cntp;
1480 xfs_mount_t *mp;
1481
1482 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
1483 cntp = (xfs_icsb_cnts_t *)
1484 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
1485 switch (action) {
1486 case CPU_UP_PREPARE:
1487 case CPU_UP_PREPARE_FROZEN:
1488 /* Easy Case - initialize the area and locks, and
1489 * then rebalance when online does everything else for us. */
1490 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1491 break;
1492 case CPU_ONLINE:
1493 case CPU_ONLINE_FROZEN:
1494 xfs_icsb_lock(mp);
1495 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1496 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1497 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1498 xfs_icsb_unlock(mp);
1499 break;
1500 case CPU_DEAD:
1501 case CPU_DEAD_FROZEN:
1502 /* Disable all the counters, then fold the dead cpu's
1503 * count into the total on the global superblock and
1504 * re-enable the counters. */
1505 xfs_icsb_lock(mp);
1506 spin_lock(&mp->m_sb_lock);
1507 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
1508 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
1509 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
1510
1511 mp->m_sb.sb_icount += cntp->icsb_icount;
1512 mp->m_sb.sb_ifree += cntp->icsb_ifree;
1513 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
1514
1515 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1516
1517 xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
1518 xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
1519 xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
1520 spin_unlock(&mp->m_sb_lock);
1521 xfs_icsb_unlock(mp);
1522 break;
1523 }
1524
1525 return NOTIFY_OK;
1526}
1527#endif /* CONFIG_HOTPLUG_CPU */
1528
1529int
1530xfs_icsb_init_counters(
1531 xfs_mount_t *mp)
1532{
1533 xfs_icsb_cnts_t *cntp;
1534 int i;
1535
1536 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
1537 if (mp->m_sb_cnts == NULL)
1538 return -ENOMEM;
1539
1540 for_each_online_cpu(i) {
1541 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1542 memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1543 }
1544
1545 mutex_init(&mp->m_icsb_mutex);
1546
1547 /*
1548 * start with all counters disabled so that the
1549 * initial balance kicks us off correctly
1550 */
1551 mp->m_icsb_counters = -1;
1552
1553#ifdef CONFIG_HOTPLUG_CPU
1554 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1555 mp->m_icsb_notifier.priority = 0;
1556 register_hotcpu_notifier(&mp->m_icsb_notifier);
1557#endif /* CONFIG_HOTPLUG_CPU */
1558
1559 return 0;
1560}
1561
1562void
1563xfs_icsb_reinit_counters(
1564 xfs_mount_t *mp)
1565{
1566 xfs_icsb_lock(mp);
1567 /*
1568 * start with all counters disabled so that the
1569 * initial balance kicks us off correctly
1570 */
1571 mp->m_icsb_counters = -1;
1572 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
1573 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
1574 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
1575 xfs_icsb_unlock(mp);
1576}
1577
1578void
1579xfs_icsb_destroy_counters(
1580 xfs_mount_t *mp)
1581{
1582 if (mp->m_sb_cnts) {
1583 unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1584 free_percpu(mp->m_sb_cnts);
1585 }
1586 mutex_destroy(&mp->m_icsb_mutex);
1587}
1588
1589STATIC void
1590xfs_icsb_lock_cntr(
1591 xfs_icsb_cnts_t *icsbp)
1592{
1593 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
1594 ndelay(1000);
1595 }
1596}
1597
1598STATIC void
1599xfs_icsb_unlock_cntr(
1600 xfs_icsb_cnts_t *icsbp)
1601{
1602 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
1603}
1604
1605
1606STATIC void
1607xfs_icsb_lock_all_counters(
1608 xfs_mount_t *mp)
1609{
1610 xfs_icsb_cnts_t *cntp;
1611 int i;
1612
1613 for_each_online_cpu(i) {
1614 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1615 xfs_icsb_lock_cntr(cntp);
1616 }
1617}
1618
1619STATIC void
1620xfs_icsb_unlock_all_counters(
1621 xfs_mount_t *mp)
1622{
1623 xfs_icsb_cnts_t *cntp;
1624 int i;
1625
1626 for_each_online_cpu(i) {
1627 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1628 xfs_icsb_unlock_cntr(cntp);
1629 }
1630}
1631
1632STATIC void
1633xfs_icsb_count(
1634 xfs_mount_t *mp,
1635 xfs_icsb_cnts_t *cnt,
1636 int flags)
1637{
1638 xfs_icsb_cnts_t *cntp;
1639 int i;
1640
1641 memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
1642
1643 if (!(flags & XFS_ICSB_LAZY_COUNT))
1644 xfs_icsb_lock_all_counters(mp);
1645
1646 for_each_online_cpu(i) {
1647 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1648 cnt->icsb_icount += cntp->icsb_icount;
1649 cnt->icsb_ifree += cntp->icsb_ifree;
1650 cnt->icsb_fdblocks += cntp->icsb_fdblocks;
1651 }
1652
1653 if (!(flags & XFS_ICSB_LAZY_COUNT))
1654 xfs_icsb_unlock_all_counters(mp);
1655}
1656
1657STATIC int
1658xfs_icsb_counter_disabled(
1659 xfs_mount_t *mp,
1660 xfs_sb_field_t field)
1661{
1662 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1663 return test_bit(field, &mp->m_icsb_counters);
1664}
1665
1666STATIC void
1667xfs_icsb_disable_counter(
1668 xfs_mount_t *mp,
1669 xfs_sb_field_t field)
1670{
1671 xfs_icsb_cnts_t cnt;
1672
1673 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1674
1675 /*
1676 * If we are already disabled, then there is nothing to do
1677 * here. We check before locking all the counters to avoid
1678 * the expensive lock operation when being called in the
1679 * slow path and the counter is already disabled. This is
1680 * safe because the only time we set or clear this state is under
1681 * the m_icsb_mutex.
1682 */
1683 if (xfs_icsb_counter_disabled(mp, field))
1684 return;
1685
1686 xfs_icsb_lock_all_counters(mp);
1687 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1688 /* drain back to superblock */
1689
1690 xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
1691 switch(field) {
1692 case XFS_SBS_ICOUNT:
1693 mp->m_sb.sb_icount = cnt.icsb_icount;
1694 break;
1695 case XFS_SBS_IFREE:
1696 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1697 break;
1698 case XFS_SBS_FDBLOCKS:
1699 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1700 break;
1701 default:
1702 BUG();
1703 }
1704 }
1705
1706 xfs_icsb_unlock_all_counters(mp);
1707}
1708
1709STATIC void
1710xfs_icsb_enable_counter(
1711 xfs_mount_t *mp,
1712 xfs_sb_field_t field,
1713 uint64_t count,
1714 uint64_t resid)
1715{
1716 xfs_icsb_cnts_t *cntp;
1717 int i;
1718
1719 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1720
1721 xfs_icsb_lock_all_counters(mp);
1722 for_each_online_cpu(i) {
1723 cntp = per_cpu_ptr(mp->m_sb_cnts, i);
1724 switch (field) {
1725 case XFS_SBS_ICOUNT:
1726 cntp->icsb_icount = count + resid;
1727 break;
1728 case XFS_SBS_IFREE:
1729 cntp->icsb_ifree = count + resid;
1730 break;
1731 case XFS_SBS_FDBLOCKS:
1732 cntp->icsb_fdblocks = count + resid;
1733 break;
1734 default:
1735 BUG();
1736 break;
1737 }
1738 resid = 0;
1739 }
1740 clear_bit(field, &mp->m_icsb_counters);
1741 xfs_icsb_unlock_all_counters(mp);
1742}
1743
1744void
1745xfs_icsb_sync_counters_locked(
1746 xfs_mount_t *mp,
1747 int flags)
1748{
1749 xfs_icsb_cnts_t cnt;
1750
1751 xfs_icsb_count(mp, &cnt, flags);
1752
1753 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
1754 mp->m_sb.sb_icount = cnt.icsb_icount;
1755 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
1756 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1757 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
1758 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1759}
1760
1761/*
1762 * Accurate update of per-cpu counters to incore superblock
1763 */
1764void
1765xfs_icsb_sync_counters(
1766 xfs_mount_t *mp,
1767 int flags)
1768{
1769 spin_lock(&mp->m_sb_lock);
1770 xfs_icsb_sync_counters_locked(mp, flags);
1771 spin_unlock(&mp->m_sb_lock);
1772}
1773
1774/*
1775 * Balance and enable/disable counters as necessary.
1776 *
1777 * Thresholds for re-enabling counters are somewhat magic. inode counts are
1778 * chosen to be the same number as single on disk allocation chunk per CPU, and
1779 * free blocks is something far enough zero that we aren't going thrash when we
1780 * get near ENOSPC. We also need to supply a minimum we require per cpu to
1781 * prevent looping endlessly when xfs_alloc_space asks for more than will
1782 * be distributed to a single CPU but each CPU has enough blocks to be
1783 * reenabled.
1784 *
1785 * Note that we can be called when counters are already disabled.
1786 * xfs_icsb_disable_counter() optimises the counter locking in this case to
1787 * prevent locking every per-cpu counter needlessly.
1788 */
1789
1790#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
1791#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
1792 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
1793STATIC void
1794xfs_icsb_balance_counter_locked(
1795 xfs_mount_t *mp,
1796 xfs_sb_field_t field,
1797 int min_per_cpu)
1798{
1799 uint64_t count, resid;
1800 int weight = num_online_cpus();
1801 uint64_t min = (uint64_t)min_per_cpu;
1802
1803 /* disable counter and sync counter */
1804 xfs_icsb_disable_counter(mp, field);
1805
1806 /* update counters - first CPU gets residual*/
1807 switch (field) {
1808 case XFS_SBS_ICOUNT:
1809 count = mp->m_sb.sb_icount;
1810 resid = do_div(count, weight);
1811 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1812 return;
1813 break;
1814 case XFS_SBS_IFREE:
1815 count = mp->m_sb.sb_ifree;
1816 resid = do_div(count, weight);
1817 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
1818 return;
1819 break;
1820 case XFS_SBS_FDBLOCKS:
1821 count = mp->m_sb.sb_fdblocks;
1822 resid = do_div(count, weight);
1823 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
1824 return;
1825 break;
1826 default:
1827 BUG();
1828 count = resid = 0; /* quiet, gcc */
1829 break;
1830 }
1831
1832 xfs_icsb_enable_counter(mp, field, count, resid);
1833}
1834
1835STATIC void
1836xfs_icsb_balance_counter(
1837 xfs_mount_t *mp,
1838 xfs_sb_field_t fields,
1839 int min_per_cpu)
1840{
1841 spin_lock(&mp->m_sb_lock);
1842 xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
1843 spin_unlock(&mp->m_sb_lock);
1844}
1845
1846int
1847xfs_icsb_modify_counters(
1848 xfs_mount_t *mp,
1849 xfs_sb_field_t field,
1850 int64_t delta,
1851 int rsvd)
1852{
1853 xfs_icsb_cnts_t *icsbp;
1854 long long lcounter; /* long counter for 64 bit fields */
1855 int ret = 0;
1856
1857 might_sleep();
1858again:
1859 preempt_disable();
1860 icsbp = this_cpu_ptr(mp->m_sb_cnts);
1861
1862 /*
1863 * if the counter is disabled, go to slow path
1864 */
1865 if (unlikely(xfs_icsb_counter_disabled(mp, field)))
1866 goto slow_path;
1867 xfs_icsb_lock_cntr(icsbp);
1868 if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
1869 xfs_icsb_unlock_cntr(icsbp);
1870 goto slow_path;
1871 }
1872
1873 switch (field) {
1874 case XFS_SBS_ICOUNT:
1875 lcounter = icsbp->icsb_icount;
1876 lcounter += delta;
1877 if (unlikely(lcounter < 0))
1878 goto balance_counter;
1879 icsbp->icsb_icount = lcounter;
1880 break;
1881
1882 case XFS_SBS_IFREE:
1883 lcounter = icsbp->icsb_ifree;
1884 lcounter += delta;
1885 if (unlikely(lcounter < 0))
1886 goto balance_counter;
1887 icsbp->icsb_ifree = lcounter;
1888 break;
1889
1890 case XFS_SBS_FDBLOCKS:
1891 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
1892
1893 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1894 lcounter += delta;
1895 if (unlikely(lcounter < 0))
1896 goto balance_counter;
1897 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1898 break;
1899 default:
1900 BUG();
1901 break;
1902 }
1903 xfs_icsb_unlock_cntr(icsbp);
1904 preempt_enable();
1905 return 0;
1906
1907slow_path:
1908 preempt_enable();
1909
1910 /*
1911 * serialise with a mutex so we don't burn lots of cpu on
1912 * the superblock lock. We still need to hold the superblock
1913 * lock, however, when we modify the global structures.
1914 */
1915 xfs_icsb_lock(mp);
1916
1917 /*
1918 * Now running atomically.
1919 *
1920 * If the counter is enabled, someone has beaten us to rebalancing.
1921 * Drop the lock and try again in the fast path....
1922 */
1923 if (!(xfs_icsb_counter_disabled(mp, field))) {
1924 xfs_icsb_unlock(mp);
1925 goto again;
1926 }
1927
1928 /*
1929 * The counter is currently disabled. Because we are
1930 * running atomically here, we know a rebalance cannot
1931 * be in progress. Hence we can go straight to operating
1932 * on the global superblock. We do not call xfs_mod_incore_sb()
1933 * here even though we need to get the m_sb_lock. Doing so
1934 * will cause us to re-enter this function and deadlock.
1935 * Hence we get the m_sb_lock ourselves and then call
1936 * xfs_mod_incore_sb_unlocked() as the unlocked path operates
1937 * directly on the global counters.
1938 */
1939 spin_lock(&mp->m_sb_lock);
1940 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1941 spin_unlock(&mp->m_sb_lock);
1942
1943 /*
1944 * Now that we've modified the global superblock, we
1945 * may be able to re-enable the distributed counters
1946 * (e.g. lots of space just got freed). After that
1947 * we are done.
1948 */
1949 if (ret != -ENOSPC)
1950 xfs_icsb_balance_counter(mp, field, 0);
1951 xfs_icsb_unlock(mp);
1952 return ret;
1953
1954balance_counter:
1955 xfs_icsb_unlock_cntr(icsbp);
1956 preempt_enable();
1957
1958 /*
1959 * We may have multiple threads here if multiple per-cpu
1960 * counters run dry at the same time. This will mean we can
1961 * do more balances than strictly necessary but it is not
1962 * the common slowpath case.
1963 */
1964 xfs_icsb_lock(mp);
1965
1966 /*
1967 * running atomically.
1968 *
1969 * This will leave the counter in the correct state for future
1970 * accesses. After the rebalance, we simply try again and our retry
1971 * will either succeed through the fast path or slow path without
1972 * another balance operation being required.
1973 */
1974 xfs_icsb_balance_counter(mp, field, delta);
1975 xfs_icsb_unlock(mp);
1976 goto again;
1977}
1978
1979#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0d8abd6364d9..8c995a2ccb6f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,8 +18,6 @@
18#ifndef __XFS_MOUNT_H__ 18#ifndef __XFS_MOUNT_H__
19#define __XFS_MOUNT_H__ 19#define __XFS_MOUNT_H__
20 20
21#ifdef __KERNEL__
22
23struct xlog; 21struct xlog;
24struct xfs_inode; 22struct xfs_inode;
25struct xfs_mru_cache; 23struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
29struct xfs_dir_ops; 27struct xfs_dir_ops;
30struct xfs_da_geometry; 28struct xfs_da_geometry;
31 29
32#ifdef HAVE_PERCPU_SB
33
34/*
35 * Valid per-cpu incore superblock counters. Note that if you add new counters,
36 * you may need to define new counter disabled bit field descriptors as there
37 * are more possible fields in the superblock that can fit in a bitfield on a
38 * 32 bit platform. The XFS_SBS_* values for the current current counters just
39 * fit.
40 */
41typedef struct xfs_icsb_cnts {
42 uint64_t icsb_fdblocks;
43 uint64_t icsb_ifree;
44 uint64_t icsb_icount;
45 unsigned long icsb_flags;
46} xfs_icsb_cnts_t;
47
48#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
49
50#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
51
52extern int xfs_icsb_init_counters(struct xfs_mount *);
53extern void xfs_icsb_reinit_counters(struct xfs_mount *);
54extern void xfs_icsb_destroy_counters(struct xfs_mount *);
55extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
56extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
57extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
58 int64_t, int);
59
60#else
61#define xfs_icsb_init_counters(mp) (0)
62#define xfs_icsb_destroy_counters(mp) do { } while (0)
63#define xfs_icsb_reinit_counters(mp) do { } while (0)
64#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
65#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
66#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
67 xfs_mod_incore_sb(mp, field, delta, rsvd)
68#endif
69
70/* dynamic preallocation free space thresholds, 5% down to 1% */ 30/* dynamic preallocation free space thresholds, 5% down to 1% */
71enum { 31enum {
72 XFS_LOWSP_1_PCNT = 0, 32 XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
81 struct super_block *m_super; 41 struct super_block *m_super;
82 xfs_tid_t m_tid; /* next unused tid for fs */ 42 xfs_tid_t m_tid; /* next unused tid for fs */
83 struct xfs_ail *m_ail; /* fs active log item list */ 43 struct xfs_ail *m_ail; /* fs active log item list */
84 xfs_sb_t m_sb; /* copy of fs superblock */ 44
45 struct xfs_sb m_sb; /* copy of fs superblock */
85 spinlock_t m_sb_lock; /* sb counter lock */ 46 spinlock_t m_sb_lock; /* sb counter lock */
47 struct percpu_counter m_icount; /* allocated inodes counter */
48 struct percpu_counter m_ifree; /* free inodes counter */
49 struct percpu_counter m_fdblocks; /* free block counter */
50
86 struct xfs_buf *m_sb_bp; /* buffer for superblock */ 51 struct xfs_buf *m_sb_bp; /* buffer for superblock */
87 char *m_fsname; /* filesystem name */ 52 char *m_fsname; /* filesystem name */
88 int m_fsname_len; /* strlen of fs name */ 53 int m_fsname_len; /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
152 const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ 117 const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
153 uint m_chsize; /* size of next field */ 118 uint m_chsize; /* size of next field */
154 atomic_t m_active_trans; /* number trans frozen */ 119 atomic_t m_active_trans; /* number trans frozen */
155#ifdef HAVE_PERCPU_SB
156 xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */
157 unsigned long m_icsb_counters; /* disabled per-cpu counters */
158 struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
159 struct mutex m_icsb_mutex; /* balancer sync lock */
160#endif
161 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 120 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
162 struct delayed_work m_reclaim_work; /* background inode reclaim */ 121 struct delayed_work m_reclaim_work; /* background inode reclaim */
163 struct delayed_work m_eofblocks_work; /* background eof blocks 122 struct delayed_work m_eofblocks_work; /* background eof blocks
@@ -301,35 +260,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
301} 260}
302 261
303/* 262/*
304 * Per-cpu superblock locking functions
305 */
306#ifdef HAVE_PERCPU_SB
307static inline void
308xfs_icsb_lock(xfs_mount_t *mp)
309{
310 mutex_lock(&mp->m_icsb_mutex);
311}
312
313static inline void
314xfs_icsb_unlock(xfs_mount_t *mp)
315{
316 mutex_unlock(&mp->m_icsb_mutex);
317}
318#else
319#define xfs_icsb_lock(mp)
320#define xfs_icsb_unlock(mp)
321#endif
322
323/*
324 * This structure is for use by the xfs_mod_incore_sb_batch() routine.
325 * xfs_growfs can specify a few fields which are more than int limit
326 */
327typedef struct xfs_mod_sb {
328 xfs_sb_field_t msb_field; /* Field to modify, see below */
329 int64_t msb_delta; /* Change to make to specified field */
330} xfs_mod_sb_t;
331
332/*
333 * Per-ag incore structure, copies of information in agf and agi, to improve the 263 * Per-ag incore structure, copies of information in agf and agi, to improve the
334 * performance of allocation group selection. 264 * performance of allocation group selection.
335 */ 265 */
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
383extern int xfs_mountfs(xfs_mount_t *mp); 313extern int xfs_mountfs(xfs_mount_t *mp);
384extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, 314extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
385 xfs_agnumber_t *maxagi); 315 xfs_agnumber_t *maxagi);
386
387extern void xfs_unmountfs(xfs_mount_t *); 316extern void xfs_unmountfs(xfs_mount_t *);
388extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 317
389extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 318extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
390 uint, int); 319extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
320extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
321 bool reserved);
322extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
323
391extern int xfs_mount_log_sb(xfs_mount_t *); 324extern int xfs_mount_log_sb(xfs_mount_t *);
392extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 325extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
393extern int xfs_readsb(xfs_mount_t *, int); 326extern int xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
399 332
400extern void xfs_set_low_space_thresholds(struct xfs_mount *); 333extern void xfs_set_low_space_thresholds(struct xfs_mount *);
401 334
402#endif /* __KERNEL__ */
403
404#endif /* __XFS_MOUNT_H__ */ 335#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 30ecca3037e3..f8a674d7f092 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
437 if (!mru || !mru->lists) 437 if (!mru || !mru->lists)
438 return -EINVAL; 438 return -EINVAL;
439 439
440 if (radix_tree_preload(GFP_KERNEL)) 440 if (radix_tree_preload(GFP_NOFS))
441 return -ENOMEM; 441 return -ENOMEM;
442 442
443 INIT_LIST_HEAD(&elem->list_node); 443 INIT_LIST_HEAD(&elem->list_node);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 365dd57ea760..981a657eca39 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -31,7 +31,8 @@
31int 31int
32xfs_break_layouts( 32xfs_break_layouts(
33 struct inode *inode, 33 struct inode *inode,
34 uint *iolock) 34 uint *iolock,
35 bool with_imutex)
35{ 36{
36 struct xfs_inode *ip = XFS_I(inode); 37 struct xfs_inode *ip = XFS_I(inode);
37 int error; 38 int error;
@@ -40,8 +41,12 @@ xfs_break_layouts(
40 41
41 while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { 42 while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
42 xfs_iunlock(ip, *iolock); 43 xfs_iunlock(ip, *iolock);
44 if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
45 mutex_unlock(&inode->i_mutex);
43 error = break_layout(inode, true); 46 error = break_layout(inode, true);
44 *iolock = XFS_IOLOCK_EXCL; 47 *iolock = XFS_IOLOCK_EXCL;
48 if (with_imutex)
49 mutex_lock(&inode->i_mutex);
45 xfs_ilock(ip, *iolock); 50 xfs_ilock(ip, *iolock);
46 } 51 }
47 52
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h
index b7fbfce660f6..8147ac108820 100644
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -8,9 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
8int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, 8int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
9 struct iattr *iattr); 9 struct iattr *iattr);
10 10
11int xfs_break_layouts(struct inode *inode, uint *iolock); 11int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex);
12#else 12#else
13static inline int xfs_break_layouts(struct inode *inode, uint *iolock) 13static inline int
14xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
14{ 15{
15 return 0; 16 return 0;
16} 17}
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index fbbb9e62e274..5538468c7f63 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -719,6 +719,7 @@ xfs_qm_qino_alloc(
719 xfs_trans_t *tp; 719 xfs_trans_t *tp;
720 int error; 720 int error;
721 int committed; 721 int committed;
722 bool need_alloc = true;
722 723
723 *ip = NULL; 724 *ip = NULL;
724 /* 725 /*
@@ -747,6 +748,7 @@ xfs_qm_qino_alloc(
747 return error; 748 return error;
748 mp->m_sb.sb_gquotino = NULLFSINO; 749 mp->m_sb.sb_gquotino = NULLFSINO;
749 mp->m_sb.sb_pquotino = NULLFSINO; 750 mp->m_sb.sb_pquotino = NULLFSINO;
751 need_alloc = false;
750 } 752 }
751 } 753 }
752 754
@@ -758,7 +760,7 @@ xfs_qm_qino_alloc(
758 return error; 760 return error;
759 } 761 }
760 762
761 if (!*ip) { 763 if (need_alloc) {
762 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, 764 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
763 &committed); 765 &committed);
764 if (error) { 766 if (error) {
@@ -794,11 +796,14 @@ xfs_qm_qino_alloc(
794 spin_unlock(&mp->m_sb_lock); 796 spin_unlock(&mp->m_sb_lock);
795 xfs_log_sb(tp); 797 xfs_log_sb(tp);
796 798
797 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { 799 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
800 if (error) {
801 ASSERT(XFS_FORCED_SHUTDOWN(mp));
798 xfs_alert(mp, "%s failed (error %d)!", __func__, error); 802 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
799 return error;
800 } 803 }
801 return 0; 804 if (need_alloc)
805 xfs_finish_inode_setup(*ip);
806 return error;
802} 807}
803 808
804 809
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 8fcc4ccc5c79..5f357ca97e76 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -109,8 +109,6 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
109#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ 109#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
110#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ 110#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
111#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ 111#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
112#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */
113#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */
114#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ 112#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
115#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ 113#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
116 114
@@ -361,28 +359,10 @@ xfs_parseargs(
361 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { 359 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
362 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 360 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
363 mp->m_qflags &= ~XFS_GQUOTA_ENFD; 361 mp->m_qflags &= ~XFS_GQUOTA_ENFD;
364 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
365 xfs_warn(mp,
366 "delaylog is the default now, option is deprecated.");
367 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
368 xfs_warn(mp,
369 "nodelaylog support has been removed, option is deprecated.");
370 } else if (!strcmp(this_char, MNTOPT_DISCARD)) { 362 } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
371 mp->m_flags |= XFS_MOUNT_DISCARD; 363 mp->m_flags |= XFS_MOUNT_DISCARD;
372 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { 364 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
373 mp->m_flags &= ~XFS_MOUNT_DISCARD; 365 mp->m_flags &= ~XFS_MOUNT_DISCARD;
374 } else if (!strcmp(this_char, "ihashsize")) {
375 xfs_warn(mp,
376 "ihashsize no longer used, option is deprecated.");
377 } else if (!strcmp(this_char, "osyncisdsync")) {
378 xfs_warn(mp,
379 "osyncisdsync has no effect, option is deprecated.");
380 } else if (!strcmp(this_char, "osyncisosync")) {
381 xfs_warn(mp,
382 "osyncisosync has no effect, option is deprecated.");
383 } else if (!strcmp(this_char, "irixsgid")) {
384 xfs_warn(mp,
385 "irixsgid is now a sysctl(2) variable, option is deprecated.");
386 } else { 366 } else {
387 xfs_warn(mp, "unknown mount option [%s].", this_char); 367 xfs_warn(mp, "unknown mount option [%s].", this_char);
388 return -EINVAL; 368 return -EINVAL;
@@ -986,6 +966,8 @@ xfs_fs_inode_init_once(
986 atomic_set(&ip->i_pincount, 0); 966 atomic_set(&ip->i_pincount, 0);
987 spin_lock_init(&ip->i_flags_lock); 967 spin_lock_init(&ip->i_flags_lock);
988 968
969 mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
970 "xfsino", ip->i_ino);
989 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 971 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
990 "xfsino", ip->i_ino); 972 "xfsino", ip->i_ino);
991} 973}
@@ -1033,23 +1015,6 @@ xfs_free_fsname(
1033 kfree(mp->m_logname); 1015 kfree(mp->m_logname);
1034} 1016}
1035 1017
1036STATIC void
1037xfs_fs_put_super(
1038 struct super_block *sb)
1039{
1040 struct xfs_mount *mp = XFS_M(sb);
1041
1042 xfs_filestream_unmount(mp);
1043 xfs_unmountfs(mp);
1044
1045 xfs_freesb(mp);
1046 xfs_icsb_destroy_counters(mp);
1047 xfs_destroy_mount_workqueues(mp);
1048 xfs_close_devices(mp);
1049 xfs_free_fsname(mp);
1050 kfree(mp);
1051}
1052
1053STATIC int 1018STATIC int
1054xfs_fs_sync_fs( 1019xfs_fs_sync_fs(
1055 struct super_block *sb, 1020 struct super_block *sb,
@@ -1085,6 +1050,9 @@ xfs_fs_statfs(
1085 xfs_sb_t *sbp = &mp->m_sb; 1050 xfs_sb_t *sbp = &mp->m_sb;
1086 struct xfs_inode *ip = XFS_I(dentry->d_inode); 1051 struct xfs_inode *ip = XFS_I(dentry->d_inode);
1087 __uint64_t fakeinos, id; 1052 __uint64_t fakeinos, id;
1053 __uint64_t icount;
1054 __uint64_t ifree;
1055 __uint64_t fdblocks;
1088 xfs_extlen_t lsize; 1056 xfs_extlen_t lsize;
1089 __int64_t ffree; 1057 __int64_t ffree;
1090 1058
@@ -1095,17 +1063,21 @@ xfs_fs_statfs(
1095 statp->f_fsid.val[0] = (u32)id; 1063 statp->f_fsid.val[0] = (u32)id;
1096 statp->f_fsid.val[1] = (u32)(id >> 32); 1064 statp->f_fsid.val[1] = (u32)(id >> 32);
1097 1065
1098 xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); 1066 icount = percpu_counter_sum(&mp->m_icount);
1067 ifree = percpu_counter_sum(&mp->m_ifree);
1068 fdblocks = percpu_counter_sum(&mp->m_fdblocks);
1099 1069
1100 spin_lock(&mp->m_sb_lock); 1070 spin_lock(&mp->m_sb_lock);
1101 statp->f_bsize = sbp->sb_blocksize; 1071 statp->f_bsize = sbp->sb_blocksize;
1102 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 1072 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
1103 statp->f_blocks = sbp->sb_dblocks - lsize; 1073 statp->f_blocks = sbp->sb_dblocks - lsize;
1104 statp->f_bfree = statp->f_bavail = 1074 spin_unlock(&mp->m_sb_lock);
1105 sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1075
1076 statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1077 statp->f_bavail = statp->f_bfree;
1078
1106 fakeinos = statp->f_bfree << sbp->sb_inopblog; 1079 fakeinos = statp->f_bfree << sbp->sb_inopblog;
1107 statp->f_files = 1080 statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
1108 MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
1109 if (mp->m_maxicount) 1081 if (mp->m_maxicount)
1110 statp->f_files = min_t(typeof(statp->f_files), 1082 statp->f_files = min_t(typeof(statp->f_files),
1111 statp->f_files, 1083 statp->f_files,
@@ -1117,10 +1089,9 @@ xfs_fs_statfs(
1117 sbp->sb_icount); 1089 sbp->sb_icount);
1118 1090
1119 /* make sure statp->f_ffree does not underflow */ 1091 /* make sure statp->f_ffree does not underflow */
1120 ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); 1092 ffree = statp->f_files - (icount - ifree);
1121 statp->f_ffree = max_t(__int64_t, ffree, 0); 1093 statp->f_ffree = max_t(__int64_t, ffree, 0);
1122 1094
1123 spin_unlock(&mp->m_sb_lock);
1124 1095
1125 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 1096 if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1126 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 1097 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1256,6 +1227,12 @@ xfs_fs_remount(
1256 1227
1257 /* ro -> rw */ 1228 /* ro -> rw */
1258 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { 1229 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1230 if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
1231 xfs_warn(mp,
1232 "ro->rw transition prohibited on norecovery mount");
1233 return -EINVAL;
1234 }
1235
1259 mp->m_flags &= ~XFS_MOUNT_RDONLY; 1236 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1260 1237
1261 /* 1238 /*
@@ -1401,6 +1378,51 @@ xfs_finish_flags(
1401 return 0; 1378 return 0;
1402} 1379}
1403 1380
1381static int
1382xfs_init_percpu_counters(
1383 struct xfs_mount *mp)
1384{
1385 int error;
1386
1387 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1388 if (error)
1389 return -ENOMEM;
1390
1391 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1392 if (error)
1393 goto free_icount;
1394
1395 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1396 if (error)
1397 goto free_ifree;
1398
1399 return 0;
1400
1401free_ifree:
1402 percpu_counter_destroy(&mp->m_ifree);
1403free_icount:
1404 percpu_counter_destroy(&mp->m_icount);
1405 return -ENOMEM;
1406}
1407
1408void
1409xfs_reinit_percpu_counters(
1410 struct xfs_mount *mp)
1411{
1412 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1413 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1414 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1415}
1416
1417static void
1418xfs_destroy_percpu_counters(
1419 struct xfs_mount *mp)
1420{
1421 percpu_counter_destroy(&mp->m_icount);
1422 percpu_counter_destroy(&mp->m_ifree);
1423 percpu_counter_destroy(&mp->m_fdblocks);
1424}
1425
1404STATIC int 1426STATIC int
1405xfs_fs_fill_super( 1427xfs_fs_fill_super(
1406 struct super_block *sb, 1428 struct super_block *sb,
@@ -1449,7 +1471,7 @@ xfs_fs_fill_super(
1449 if (error) 1471 if (error)
1450 goto out_close_devices; 1472 goto out_close_devices;
1451 1473
1452 error = xfs_icsb_init_counters(mp); 1474 error = xfs_init_percpu_counters(mp);
1453 if (error) 1475 if (error)
1454 goto out_destroy_workqueues; 1476 goto out_destroy_workqueues;
1455 1477
@@ -1507,7 +1529,7 @@ xfs_fs_fill_super(
1507 out_free_sb: 1529 out_free_sb:
1508 xfs_freesb(mp); 1530 xfs_freesb(mp);
1509 out_destroy_counters: 1531 out_destroy_counters:
1510 xfs_icsb_destroy_counters(mp); 1532 xfs_destroy_percpu_counters(mp);
1511out_destroy_workqueues: 1533out_destroy_workqueues:
1512 xfs_destroy_mount_workqueues(mp); 1534 xfs_destroy_mount_workqueues(mp);
1513 out_close_devices: 1535 out_close_devices:
@@ -1524,6 +1546,24 @@ out_destroy_workqueues:
1524 goto out_free_sb; 1546 goto out_free_sb;
1525} 1547}
1526 1548
1549STATIC void
1550xfs_fs_put_super(
1551 struct super_block *sb)
1552{
1553 struct xfs_mount *mp = XFS_M(sb);
1554
1555 xfs_notice(mp, "Unmounting Filesystem");
1556 xfs_filestream_unmount(mp);
1557 xfs_unmountfs(mp);
1558
1559 xfs_freesb(mp);
1560 xfs_destroy_percpu_counters(mp);
1561 xfs_destroy_mount_workqueues(mp);
1562 xfs_close_devices(mp);
1563 xfs_free_fsname(mp);
1564 kfree(mp);
1565}
1566
1527STATIC struct dentry * 1567STATIC struct dentry *
1528xfs_fs_mount( 1568xfs_fs_mount(
1529 struct file_system_type *fs_type, 1569 struct file_system_type *fs_type,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 2b830c2f322e..499058fea303 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
72extern const struct xattr_handler *xfs_xattr_handlers[]; 72extern const struct xattr_handler *xfs_xattr_handlers[];
73extern const struct quotactl_ops xfs_quotactl_operations; 73extern const struct quotactl_ops xfs_quotactl_operations;
74 74
75extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
76
75#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 77#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
76 78
77#endif /* __XFS_SUPER_H__ */ 79#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 25791df6f638..3df411eadb86 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -177,7 +177,7 @@ xfs_symlink(
177 int pathlen; 177 int pathlen;
178 struct xfs_bmap_free free_list; 178 struct xfs_bmap_free free_list;
179 xfs_fsblock_t first_block; 179 xfs_fsblock_t first_block;
180 bool unlock_dp_on_error = false; 180 bool unlock_dp_on_error = false;
181 uint cancel_flags; 181 uint cancel_flags;
182 int committed; 182 int committed;
183 xfs_fileoff_t first_fsb; 183 xfs_fileoff_t first_fsb;
@@ -221,7 +221,7 @@ xfs_symlink(
221 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, 221 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
222 &udqp, &gdqp, &pdqp); 222 &udqp, &gdqp, &pdqp);
223 if (error) 223 if (error)
224 goto std_return; 224 return error;
225 225
226 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 226 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
227 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 227 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -241,7 +241,7 @@ xfs_symlink(
241 } 241 }
242 if (error) { 242 if (error) {
243 cancel_flags = 0; 243 cancel_flags = 0;
244 goto error_return; 244 goto out_trans_cancel;
245 } 245 }
246 246
247 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 247 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -252,7 +252,7 @@ xfs_symlink(
252 */ 252 */
253 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { 253 if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
254 error = -EPERM; 254 error = -EPERM;
255 goto error_return; 255 goto out_trans_cancel;
256 } 256 }
257 257
258 /* 258 /*
@@ -261,7 +261,7 @@ xfs_symlink(
261 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, 261 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
262 pdqp, resblks, 1, 0); 262 pdqp, resblks, 1, 0);
263 if (error) 263 if (error)
264 goto error_return; 264 goto out_trans_cancel;
265 265
266 /* 266 /*
267 * Check for ability to enter directory entry, if no space reserved. 267 * Check for ability to enter directory entry, if no space reserved.
@@ -269,7 +269,7 @@ xfs_symlink(
269 if (!resblks) { 269 if (!resblks) {
270 error = xfs_dir_canenter(tp, dp, link_name); 270 error = xfs_dir_canenter(tp, dp, link_name);
271 if (error) 271 if (error)
272 goto error_return; 272 goto out_trans_cancel;
273 } 273 }
274 /* 274 /*
275 * Initialize the bmap freelist prior to calling either 275 * Initialize the bmap freelist prior to calling either
@@ -282,15 +282,14 @@ xfs_symlink(
282 */ 282 */
283 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 283 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
284 prid, resblks > 0, &ip, NULL); 284 prid, resblks > 0, &ip, NULL);
285 if (error) { 285 if (error)
286 if (error == -ENOSPC) 286 goto out_trans_cancel;
287 goto error_return;
288 goto error1;
289 }
290 287
291 /* 288 /*
292 * An error after we've joined dp to the transaction will result in the 289 * Now we join the directory inode to the transaction. We do not do it
293 * transaction cancel unlocking dp so don't do it explicitly in the 290 * earlier because xfs_dir_ialloc might commit the previous transaction
291 * (and release all the locks). An error from here on will result in
292 * the transaction cancel unlocking dp so don't do it explicitly in the
294 * error path. 293 * error path.
295 */ 294 */
296 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 295 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
@@ -330,7 +329,7 @@ xfs_symlink(
330 XFS_BMAPI_METADATA, &first_block, resblks, 329 XFS_BMAPI_METADATA, &first_block, resblks,
331 mval, &nmaps, &free_list); 330 mval, &nmaps, &free_list);
332 if (error) 331 if (error)
333 goto error2; 332 goto out_bmap_cancel;
334 333
335 if (resblks) 334 if (resblks)
336 resblks -= fs_blocks; 335 resblks -= fs_blocks;
@@ -348,7 +347,7 @@ xfs_symlink(
348 BTOBB(byte_cnt), 0); 347 BTOBB(byte_cnt), 0);
349 if (!bp) { 348 if (!bp) {
350 error = -ENOMEM; 349 error = -ENOMEM;
351 goto error2; 350 goto out_bmap_cancel;
352 } 351 }
353 bp->b_ops = &xfs_symlink_buf_ops; 352 bp->b_ops = &xfs_symlink_buf_ops;
354 353
@@ -378,7 +377,7 @@ xfs_symlink(
378 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 377 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
379 &first_block, &free_list, resblks); 378 &first_block, &free_list, resblks);
380 if (error) 379 if (error)
381 goto error2; 380 goto out_bmap_cancel;
382 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 381 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
383 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 382 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
384 383
@@ -392,10 +391,13 @@ xfs_symlink(
392 } 391 }
393 392
394 error = xfs_bmap_finish(&tp, &free_list, &committed); 393 error = xfs_bmap_finish(&tp, &free_list, &committed);
395 if (error) { 394 if (error)
396 goto error2; 395 goto out_bmap_cancel;
397 } 396
398 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 397 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
398 if (error)
399 goto out_release_inode;
400
399 xfs_qm_dqrele(udqp); 401 xfs_qm_dqrele(udqp);
400 xfs_qm_dqrele(gdqp); 402 xfs_qm_dqrele(gdqp);
401 xfs_qm_dqrele(pdqp); 403 xfs_qm_dqrele(pdqp);
@@ -403,20 +405,28 @@ xfs_symlink(
403 *ipp = ip; 405 *ipp = ip;
404 return 0; 406 return 0;
405 407
406 error2: 408out_bmap_cancel:
407 IRELE(ip);
408 error1:
409 xfs_bmap_cancel(&free_list); 409 xfs_bmap_cancel(&free_list);
410 cancel_flags |= XFS_TRANS_ABORT; 410 cancel_flags |= XFS_TRANS_ABORT;
411 error_return: 411out_trans_cancel:
412 xfs_trans_cancel(tp, cancel_flags); 412 xfs_trans_cancel(tp, cancel_flags);
413out_release_inode:
414 /*
415 * Wait until after the current transaction is aborted to finish the
416 * setup of the inode and release the inode. This prevents recursive
417 * transactions and deadlocks from xfs_inactive.
418 */
419 if (ip) {
420 xfs_finish_inode_setup(ip);
421 IRELE(ip);
422 }
423
413 xfs_qm_dqrele(udqp); 424 xfs_qm_dqrele(udqp);
414 xfs_qm_dqrele(gdqp); 425 xfs_qm_dqrele(gdqp);
415 xfs_qm_dqrele(pdqp); 426 xfs_qm_dqrele(pdqp);
416 427
417 if (unlock_dp_on_error) 428 if (unlock_dp_on_error)
418 xfs_iunlock(dp, XFS_ILOCK_EXCL); 429 xfs_iunlock(dp, XFS_ILOCK_EXCL);
419 std_return:
420 return error; 430 return error;
421} 431}
422 432
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 51372e34d988..615781bf4ee5 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
115 __entry->refcount = refcount; 115 __entry->refcount = refcount;
116 __entry->caller_ip = caller_ip; 116 __entry->caller_ip = caller_ip;
117 ), 117 ),
118 TP_printk("dev %d:%d agno %u refcount %d caller %pf", 118 TP_printk("dev %d:%d agno %u refcount %d caller %ps",
119 MAJOR(__entry->dev), MINOR(__entry->dev), 119 MAJOR(__entry->dev), MINOR(__entry->dev),
120 __entry->agno, 120 __entry->agno,
121 __entry->refcount, 121 __entry->refcount,
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert,
239 __entry->caller_ip = caller_ip; 239 __entry->caller_ip = caller_ip;
240 ), 240 ),
241 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 241 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
242 "offset %lld block %lld count %lld flag %d caller %pf", 242 "offset %lld block %lld count %lld flag %d caller %ps",
243 MAJOR(__entry->dev), MINOR(__entry->dev), 243 MAJOR(__entry->dev), MINOR(__entry->dev),
244 __entry->ino, 244 __entry->ino,
245 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 245 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
283 __entry->caller_ip = caller_ip; 283 __entry->caller_ip = caller_ip;
284 ), 284 ),
285 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " 285 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
286 "offset %lld block %lld count %lld flag %d caller %pf", 286 "offset %lld block %lld count %lld flag %d caller %ps",
287 MAJOR(__entry->dev), MINOR(__entry->dev), 287 MAJOR(__entry->dev), MINOR(__entry->dev),
288 __entry->ino, 288 __entry->ino,
289 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), 289 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
329 __entry->caller_ip = caller_ip; 329 __entry->caller_ip = caller_ip;
330 ), 330 ),
331 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " 331 TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
332 "lock %d flags %s caller %pf", 332 "lock %d flags %s caller %ps",
333 MAJOR(__entry->dev), MINOR(__entry->dev), 333 MAJOR(__entry->dev), MINOR(__entry->dev),
334 (unsigned long long)__entry->bno, 334 (unsigned long long)__entry->bno,
335 __entry->nblks, 335 __entry->nblks,
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
402 __entry->caller_ip = caller_ip; 402 __entry->caller_ip = caller_ip;
403 ), 403 ),
404 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 404 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
405 "lock %d flags %s caller %pf", 405 "lock %d flags %s caller %ps",
406 MAJOR(__entry->dev), MINOR(__entry->dev), 406 MAJOR(__entry->dev), MINOR(__entry->dev),
407 (unsigned long long)__entry->bno, 407 (unsigned long long)__entry->bno,
408 __entry->buffer_length, 408 __entry->buffer_length,
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror,
447 __entry->caller_ip = caller_ip; 447 __entry->caller_ip = caller_ip;
448 ), 448 ),
449 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 449 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
450 "lock %d error %d flags %s caller %pf", 450 "lock %d error %d flags %s caller %ps",
451 MAJOR(__entry->dev), MINOR(__entry->dev), 451 MAJOR(__entry->dev), MINOR(__entry->dev),
452 (unsigned long long)__entry->bno, 452 (unsigned long long)__entry->bno,
453 __entry->buffer_length, 453 __entry->buffer_length,
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
613 __entry->lock_flags = lock_flags; 613 __entry->lock_flags = lock_flags;
614 __entry->caller_ip = caller_ip; 614 __entry->caller_ip = caller_ip;
615 ), 615 ),
616 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", 616 TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
617 MAJOR(__entry->dev), MINOR(__entry->dev), 617 MAJOR(__entry->dev), MINOR(__entry->dev),
618 __entry->ino, 618 __entry->ino,
619 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), 619 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
664DEFINE_INODE_EVENT(xfs_free_file_space); 664DEFINE_INODE_EVENT(xfs_free_file_space);
665DEFINE_INODE_EVENT(xfs_zero_file_space); 665DEFINE_INODE_EVENT(xfs_zero_file_space);
666DEFINE_INODE_EVENT(xfs_collapse_file_space); 666DEFINE_INODE_EVENT(xfs_collapse_file_space);
667DEFINE_INODE_EVENT(xfs_insert_file_space);
667DEFINE_INODE_EVENT(xfs_readdir); 668DEFINE_INODE_EVENT(xfs_readdir);
668#ifdef CONFIG_XFS_POSIX_ACL 669#ifdef CONFIG_XFS_POSIX_ACL
669DEFINE_INODE_EVENT(xfs_get_acl); 670DEFINE_INODE_EVENT(xfs_get_acl);
@@ -685,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
685DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); 686DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
686DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); 687DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
687 688
689DEFINE_INODE_EVENT(xfs_filemap_fault);
690DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
691
688DECLARE_EVENT_CLASS(xfs_iref_class, 692DECLARE_EVENT_CLASS(xfs_iref_class,
689 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), 693 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
690 TP_ARGS(ip, caller_ip), 694 TP_ARGS(ip, caller_ip),
@@ -702,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
702 __entry->pincount = atomic_read(&ip->i_pincount); 706 __entry->pincount = atomic_read(&ip->i_pincount);
703 __entry->caller_ip = caller_ip; 707 __entry->caller_ip = caller_ip;
704 ), 708 ),
705 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", 709 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
706 MAJOR(__entry->dev), MINOR(__entry->dev), 710 MAJOR(__entry->dev), MINOR(__entry->dev),
707 __entry->ino, 711 __entry->ino,
708 __entry->count, 712 __entry->count,
@@ -1217,6 +1221,11 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
1217DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); 1221DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
1218DEFINE_IOMAP_EVENT(xfs_get_blocks_found); 1222DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
1219DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); 1223DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1224DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
1225DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
1226DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
1227DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
1228DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
1220 1229
1221DECLARE_EVENT_CLASS(xfs_simple_io_class, 1230DECLARE_EVENT_CLASS(xfs_simple_io_class,
1222 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 1231 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1333,7 +1342,7 @@ TRACE_EVENT(xfs_bunmap,
1333 __entry->flags = flags; 1342 __entry->flags = flags;
1334 ), 1343 ),
1335 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" 1344 TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
1336 "flags %s caller %pf", 1345 "flags %s caller %ps",
1337 MAJOR(__entry->dev), MINOR(__entry->dev), 1346 MAJOR(__entry->dev), MINOR(__entry->dev),
1338 __entry->ino, 1347 __entry->ino,
1339 __entry->size, 1348 __entry->size,
@@ -1466,7 +1475,7 @@ TRACE_EVENT(xfs_agf,
1466 ), 1475 ),
1467 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " 1476 TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
1468 "levels b %u c %u flfirst %u fllast %u flcount %u " 1477 "levels b %u c %u flfirst %u fllast %u flcount %u "
1469 "freeblks %u longest %u caller %pf", 1478 "freeblks %u longest %u caller %ps",
1470 MAJOR(__entry->dev), MINOR(__entry->dev), 1479 MAJOR(__entry->dev), MINOR(__entry->dev),
1471 __entry->agno, 1480 __entry->agno,
1472 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), 1481 __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index eb90cd59a0ec..220ef2c906b2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -173,7 +173,7 @@ xfs_trans_reserve(
173 uint rtextents) 173 uint rtextents)
174{ 174{
175 int error = 0; 175 int error = 0;
176 int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 176 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
177 177
178 /* Mark this thread as being in a transaction */ 178 /* Mark this thread as being in a transaction */
179 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 179 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
184 * fail if the count would go below zero. 184 * fail if the count would go below zero.
185 */ 185 */
186 if (blocks > 0) { 186 if (blocks > 0) {
187 error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, 187 error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
188 -((int64_t)blocks), rsvd);
189 if (error != 0) { 188 if (error != 0) {
190 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 189 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
191 return -ENOSPC; 190 return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
236 * fail if the count would go below zero. 235 * fail if the count would go below zero.
237 */ 236 */
238 if (rtextents > 0) { 237 if (rtextents > 0) {
239 error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, 238 error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
240 -((int64_t)rtextents), rsvd);
241 if (error) { 239 if (error) {
242 error = -ENOSPC; 240 error = -ENOSPC;
243 goto undo_log; 241 goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
268 266
269undo_blocks: 267undo_blocks:
270 if (blocks > 0) { 268 if (blocks > 0) {
271 xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, 269 xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
272 (int64_t)blocks, rsvd);
273 tp->t_blk_res = 0; 270 tp->t_blk_res = 0;
274 } 271 }
275 272
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
488 sizeof(sbp->sb_frextents) - 1); 485 sizeof(sbp->sb_frextents) - 1);
489} 486}
490 487
488STATIC int
489xfs_sb_mod8(
490 uint8_t *field,
491 int8_t delta)
492{
493 int8_t counter = *field;
494
495 counter += delta;
496 if (counter < 0) {
497 ASSERT(0);
498 return -EINVAL;
499 }
500 *field = counter;
501 return 0;
502}
503
504STATIC int
505xfs_sb_mod32(
506 uint32_t *field,
507 int32_t delta)
508{
509 int32_t counter = *field;
510
511 counter += delta;
512 if (counter < 0) {
513 ASSERT(0);
514 return -EINVAL;
515 }
516 *field = counter;
517 return 0;
518}
519
520STATIC int
521xfs_sb_mod64(
522 uint64_t *field,
523 int64_t delta)
524{
525 int64_t counter = *field;
526
527 counter += delta;
528 if (counter < 0) {
529 ASSERT(0);
530 return -EINVAL;
531 }
532 *field = counter;
533 return 0;
534}
535
491/* 536/*
492 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations 537 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
493 * and apply superblock counter changes to the in-core superblock. The 538 * and apply superblock counter changes to the in-core superblock. The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
495 * applied to the in-core superblock. The idea is that that has already been 540 * applied to the in-core superblock. The idea is that that has already been
496 * done. 541 * done.
497 * 542 *
498 * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
499 * However, we have to ensure that we only modify each superblock field only
500 * once because the application of the delta values may not be atomic. That can
501 * lead to ENOSPC races occurring if we have two separate modifcations of the
502 * free space counter to put back the entire reservation and then take away
503 * what we used.
504 *
505 * If we are not logging superblock counters, then the inode allocated/free and 543 * If we are not logging superblock counters, then the inode allocated/free and
506 * used block counts are not updated in the on disk superblock. In this case, 544 * used block counts are not updated in the on disk superblock. In this case,
507 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 545 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
509 */ 547 */
510void 548void
511xfs_trans_unreserve_and_mod_sb( 549xfs_trans_unreserve_and_mod_sb(
512 xfs_trans_t *tp) 550 struct xfs_trans *tp)
513{ 551{
514 xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ 552 struct xfs_mount *mp = tp->t_mountp;
515 xfs_mod_sb_t *msbp; 553 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
516 xfs_mount_t *mp = tp->t_mountp; 554 int64_t blkdelta = 0;
517 /* REFERENCED */ 555 int64_t rtxdelta = 0;
518 int error; 556 int64_t idelta = 0;
519 int rsvd; 557 int64_t ifreedelta = 0;
520 int64_t blkdelta = 0; 558 int error;
521 int64_t rtxdelta = 0;
522 int64_t idelta = 0;
523 int64_t ifreedelta = 0;
524
525 msbp = msb;
526 rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
527 559
528 /* calculate deltas */ 560 /* calculate deltas */
529 if (tp->t_blk_res > 0) 561 if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
547 579
548 /* apply the per-cpu counters */ 580 /* apply the per-cpu counters */
549 if (blkdelta) { 581 if (blkdelta) {
550 error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, 582 error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
551 blkdelta, rsvd);
552 if (error) 583 if (error)
553 goto out; 584 goto out;
554 } 585 }
555 586
556 if (idelta) { 587 if (idelta) {
557 error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, 588 error = xfs_mod_icount(mp, idelta);
558 idelta, rsvd);
559 if (error) 589 if (error)
560 goto out_undo_fdblocks; 590 goto out_undo_fdblocks;
561 } 591 }
562 592
563 if (ifreedelta) { 593 if (ifreedelta) {
564 error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, 594 error = xfs_mod_ifree(mp, ifreedelta);
565 ifreedelta, rsvd);
566 if (error) 595 if (error)
567 goto out_undo_icount; 596 goto out_undo_icount;
568 } 597 }
569 598
599 if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
600 return;
601
570 /* apply remaining deltas */ 602 /* apply remaining deltas */
571 if (rtxdelta != 0) { 603 spin_lock(&mp->m_sb_lock);
572 msbp->msb_field = XFS_SBS_FREXTENTS; 604 if (rtxdelta) {
573 msbp->msb_delta = rtxdelta; 605 error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
574 msbp++; 606 if (error)
607 goto out_undo_ifree;
575 } 608 }
576 609
577 if (tp->t_flags & XFS_TRANS_SB_DIRTY) { 610 if (tp->t_dblocks_delta != 0) {
578 if (tp->t_dblocks_delta != 0) { 611 error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
579 msbp->msb_field = XFS_SBS_DBLOCKS; 612 if (error)
580 msbp->msb_delta = tp->t_dblocks_delta; 613 goto out_undo_frextents;
581 msbp++;
582 }
583 if (tp->t_agcount_delta != 0) {
584 msbp->msb_field = XFS_SBS_AGCOUNT;
585 msbp->msb_delta = tp->t_agcount_delta;
586 msbp++;
587 }
588 if (tp->t_imaxpct_delta != 0) {
589 msbp->msb_field = XFS_SBS_IMAX_PCT;
590 msbp->msb_delta = tp->t_imaxpct_delta;
591 msbp++;
592 }
593 if (tp->t_rextsize_delta != 0) {
594 msbp->msb_field = XFS_SBS_REXTSIZE;
595 msbp->msb_delta = tp->t_rextsize_delta;
596 msbp++;
597 }
598 if (tp->t_rbmblocks_delta != 0) {
599 msbp->msb_field = XFS_SBS_RBMBLOCKS;
600 msbp->msb_delta = tp->t_rbmblocks_delta;
601 msbp++;
602 }
603 if (tp->t_rblocks_delta != 0) {
604 msbp->msb_field = XFS_SBS_RBLOCKS;
605 msbp->msb_delta = tp->t_rblocks_delta;
606 msbp++;
607 }
608 if (tp->t_rextents_delta != 0) {
609 msbp->msb_field = XFS_SBS_REXTENTS;
610 msbp->msb_delta = tp->t_rextents_delta;
611 msbp++;
612 }
613 if (tp->t_rextslog_delta != 0) {
614 msbp->msb_field = XFS_SBS_REXTSLOG;
615 msbp->msb_delta = tp->t_rextslog_delta;
616 msbp++;
617 }
618 } 614 }
619 615 if (tp->t_agcount_delta != 0) {
620 /* 616 error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
621 * If we need to change anything, do it.
622 */
623 if (msbp > msb) {
624 error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
625 (uint)(msbp - msb), rsvd);
626 if (error) 617 if (error)
627 goto out_undo_ifreecount; 618 goto out_undo_dblocks;
628 } 619 }
629 620 if (tp->t_imaxpct_delta != 0) {
621 error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
622 if (error)
623 goto out_undo_agcount;
624 }
625 if (tp->t_rextsize_delta != 0) {
626 error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
627 tp->t_rextsize_delta);
628 if (error)
629 goto out_undo_imaxpct;
630 }
631 if (tp->t_rbmblocks_delta != 0) {
632 error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
633 tp->t_rbmblocks_delta);
634 if (error)
635 goto out_undo_rextsize;
636 }
637 if (tp->t_rblocks_delta != 0) {
638 error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
639 if (error)
640 goto out_undo_rbmblocks;
641 }
642 if (tp->t_rextents_delta != 0) {
643 error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
644 tp->t_rextents_delta);
645 if (error)
646 goto out_undo_rblocks;
647 }
648 if (tp->t_rextslog_delta != 0) {
649 error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
650 tp->t_rextslog_delta);
651 if (error)
652 goto out_undo_rextents;
653 }
654 spin_unlock(&mp->m_sb_lock);
630 return; 655 return;
631 656
632out_undo_ifreecount: 657out_undo_rextents:
658 if (tp->t_rextents_delta)
659 xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
660out_undo_rblocks:
661 if (tp->t_rblocks_delta)
662 xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
663out_undo_rbmblocks:
664 if (tp->t_rbmblocks_delta)
665 xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
666out_undo_rextsize:
667 if (tp->t_rextsize_delta)
668 xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
669out_undo_imaxpct:
670 if (tp->t_rextsize_delta)
671 xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
672out_undo_agcount:
673 if (tp->t_agcount_delta)
674 xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
675out_undo_dblocks:
676 if (tp->t_dblocks_delta)
677 xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
678out_undo_frextents:
679 if (rtxdelta)
680 xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
681out_undo_ifree:
682 spin_unlock(&mp->m_sb_lock);
633 if (ifreedelta) 683 if (ifreedelta)
634 xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); 684 xfs_mod_ifree(mp, -ifreedelta);
635out_undo_icount: 685out_undo_icount:
636 if (idelta) 686 if (idelta)
637 xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); 687 xfs_mod_icount(mp, -idelta);
638out_undo_fdblocks: 688out_undo_fdblocks:
639 if (blkdelta) 689 if (blkdelta)
640 xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); 690 xfs_mod_fdblocks(mp, -blkdelta, rsvd);
641out: 691out:
642 ASSERT(error == 0); 692 ASSERT(error == 0);
643 return; 693 return;
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index 31591686ac2d..996111000a8c 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -21,4 +21,10 @@ struct space_resv {
21#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv) 21#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv)
22#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv) 22#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv)
23 23
24#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \
25 FALLOC_FL_PUNCH_HOLE | \
26 FALLOC_FL_COLLAPSE_RANGE | \
27 FALLOC_FL_ZERO_RANGE | \
28 FALLOC_FL_INSERT_RANGE)
29
24#endif /* _FALLOC_H_ */ 30#endif /* _FALLOC_H_ */
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index d1197ae3723c..3e445a760f14 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -41,4 +41,21 @@
41 */ 41 */
42#define FALLOC_FL_ZERO_RANGE 0x10 42#define FALLOC_FL_ZERO_RANGE 0x10
43 43
44/*
45 * FALLOC_FL_INSERT_RANGE is use to insert space within the file size without
46 * overwriting any existing data. The contents of the file beyond offset are
47 * shifted towards right by len bytes to create a hole. As such, this
48 * operation will increase the size of the file by len bytes.
49 *
50 * Different filesystems may implement different limitations on the granularity
51 * of the operation. Most will limit operations to filesystem block size
52 * boundaries, but this boundary may be larger or smaller depending on
53 * the filesystem and/or the configuration of the filesystem or file.
54 *
55 * Attempting to insert space using this flag at OR beyond the end of
56 * the file is considered an illegal operation - just use ftruncate(2) or
57 * fallocate(2) with mode 0 for such type of operations.
58 */
59#define FALLOC_FL_INSERT_RANGE 0x20
60
44#endif /* _UAPI_FALLOC_H_ */ 61#endif /* _UAPI_FALLOC_H_ */