aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c31
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c26
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_format.h5
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c76
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c106
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_inode.c57
-rw-r--r--fs/xfs/xfs_iomap.c15
-rw-r--r--fs/xfs/xfs_trans.c7
12 files changed, 205 insertions, 130 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 84db76e0e3e3..fecd187fcf2c 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -157,6 +157,7 @@ __xfs_ag_resv_free(
157 error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); 157 error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
158 resv->ar_reserved = 0; 158 resv->ar_reserved = 0;
159 resv->ar_asked = 0; 159 resv->ar_asked = 0;
160 resv->ar_orig_reserved = 0;
160 161
161 if (error) 162 if (error)
162 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, 163 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
@@ -189,13 +190,34 @@ __xfs_ag_resv_init(
189 struct xfs_mount *mp = pag->pag_mount; 190 struct xfs_mount *mp = pag->pag_mount;
190 struct xfs_ag_resv *resv; 191 struct xfs_ag_resv *resv;
191 int error; 192 int error;
192 xfs_extlen_t reserved; 193 xfs_extlen_t hidden_space;
193 194
194 if (used > ask) 195 if (used > ask)
195 ask = used; 196 ask = used;
196 reserved = ask - used;
197 197
198 error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); 198 switch (type) {
199 case XFS_AG_RESV_RMAPBT:
200 /*
201 * Space taken by the rmapbt is not subtracted from fdblocks
202 * because the rmapbt lives in the free space. Here we must
203 * subtract the entire reservation from fdblocks so that we
204 * always have blocks available for rmapbt expansion.
205 */
206 hidden_space = ask;
207 break;
208 case XFS_AG_RESV_METADATA:
209 /*
210 * Space taken by all other metadata btrees are accounted
211 * on-disk as used space. We therefore only hide the space
212 * that is reserved but not used by the trees.
213 */
214 hidden_space = ask - used;
215 break;
216 default:
217 ASSERT(0);
218 return -EINVAL;
219 }
220 error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
199 if (error) { 221 if (error) {
200 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, 222 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
201 error, _RET_IP_); 223 error, _RET_IP_);
@@ -216,7 +238,8 @@ __xfs_ag_resv_init(
216 238
217 resv = xfs_perag_resv(pag, type); 239 resv = xfs_perag_resv(pag, type);
218 resv->ar_asked = ask; 240 resv->ar_asked = ask;
219 resv->ar_reserved = resv->ar_orig_reserved = reserved; 241 resv->ar_orig_reserved = hidden_space;
242 resv->ar_reserved = ask - used;
220 243
221 trace_xfs_ag_resv_init(pag, type, ask); 244 trace_xfs_ag_resv_init(pag, type, ask);
222 return 0; 245 return 0;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 01628f0c9a0c..7205268b30bc 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5780,6 +5780,32 @@ del_cursor:
5780 return error; 5780 return error;
5781} 5781}
5782 5782
5783/* Make sure we won't be right-shifting an extent past the maximum bound. */
5784int
5785xfs_bmap_can_insert_extents(
5786 struct xfs_inode *ip,
5787 xfs_fileoff_t off,
5788 xfs_fileoff_t shift)
5789{
5790 struct xfs_bmbt_irec got;
5791 int is_empty;
5792 int error = 0;
5793
5794 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5795
5796 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5797 return -EIO;
5798
5799 xfs_ilock(ip, XFS_ILOCK_EXCL);
5800 error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty);
5801 if (!error && !is_empty && got.br_startoff >= off &&
5802 ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff)
5803 error = -EINVAL;
5804 xfs_iunlock(ip, XFS_ILOCK_EXCL);
5805
5806 return error;
5807}
5808
5783int 5809int
5784xfs_bmap_insert_extents( 5810xfs_bmap_insert_extents(
5785 struct xfs_trans *tp, 5811 struct xfs_trans *tp,
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 99dddbd0fcc6..9b49ddf99c41 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -227,6 +227,8 @@ int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
227 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 227 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
228 bool *done, xfs_fsblock_t *firstblock, 228 bool *done, xfs_fsblock_t *firstblock,
229 struct xfs_defer_ops *dfops); 229 struct xfs_defer_ops *dfops);
230int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off,
231 xfs_fileoff_t shift);
230int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, 232int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
231 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, 233 xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
232 bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, 234 bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1c5a8aaf2bfc..059bc44c27e8 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -962,6 +962,9 @@ typedef enum xfs_dinode_fmt {
962 XFS_DFORK_DSIZE(dip, mp) : \ 962 XFS_DFORK_DSIZE(dip, mp) : \
963 XFS_DFORK_ASIZE(dip, mp)) 963 XFS_DFORK_ASIZE(dip, mp))
964 964
965#define XFS_DFORK_MAXEXT(dip, mp, w) \
966 (XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec))
967
965/* 968/*
966 * Return pointers to the data or attribute forks. 969 * Return pointers to the data or attribute forks.
967 */ 970 */
@@ -1526,6 +1529,8 @@ typedef struct xfs_bmdr_block {
1526#define BMBT_STARTBLOCK_BITLEN 52 1529#define BMBT_STARTBLOCK_BITLEN 52
1527#define BMBT_BLOCKCOUNT_BITLEN 21 1530#define BMBT_BLOCKCOUNT_BITLEN 21
1528 1531
1532#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1)
1533
1529typedef struct xfs_bmbt_rec { 1534typedef struct xfs_bmbt_rec {
1530 __be64 l0, l1; 1535 __be64 l0, l1;
1531} xfs_bmbt_rec_t; 1536} xfs_bmbt_rec_t;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d38d724534c4..33dc34655ac3 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -374,6 +374,47 @@ xfs_log_dinode_to_disk(
374 } 374 }
375} 375}
376 376
377static xfs_failaddr_t
378xfs_dinode_verify_fork(
379 struct xfs_dinode *dip,
380 struct xfs_mount *mp,
381 int whichfork)
382{
383 uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
384
385 switch (XFS_DFORK_FORMAT(dip, whichfork)) {
386 case XFS_DINODE_FMT_LOCAL:
387 /*
388 * no local regular files yet
389 */
390 if (whichfork == XFS_DATA_FORK) {
391 if (S_ISREG(be16_to_cpu(dip->di_mode)))
392 return __this_address;
393 if (be64_to_cpu(dip->di_size) >
394 XFS_DFORK_SIZE(dip, mp, whichfork))
395 return __this_address;
396 }
397 if (di_nextents)
398 return __this_address;
399 break;
400 case XFS_DINODE_FMT_EXTENTS:
401 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
402 return __this_address;
403 break;
404 case XFS_DINODE_FMT_BTREE:
405 if (whichfork == XFS_ATTR_FORK) {
406 if (di_nextents > MAXAEXTNUM)
407 return __this_address;
408 } else if (di_nextents > MAXEXTNUM) {
409 return __this_address;
410 }
411 break;
412 default:
413 return __this_address;
414 }
415 return NULL;
416}
417
377xfs_failaddr_t 418xfs_failaddr_t
378xfs_dinode_verify( 419xfs_dinode_verify(
379 struct xfs_mount *mp, 420 struct xfs_mount *mp,
@@ -441,24 +482,9 @@ xfs_dinode_verify(
441 case S_IFREG: 482 case S_IFREG:
442 case S_IFLNK: 483 case S_IFLNK:
443 case S_IFDIR: 484 case S_IFDIR:
444 switch (dip->di_format) { 485 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
445 case XFS_DINODE_FMT_LOCAL: 486 if (fa)
446 /* 487 return fa;
447 * no local regular files yet
448 */
449 if (S_ISREG(mode))
450 return __this_address;
451 if (di_size > XFS_DFORK_DSIZE(dip, mp))
452 return __this_address;
453 if (dip->di_nextents)
454 return __this_address;
455 /* fall through */
456 case XFS_DINODE_FMT_EXTENTS:
457 case XFS_DINODE_FMT_BTREE:
458 break;
459 default:
460 return __this_address;
461 }
462 break; 488 break;
463 case 0: 489 case 0:
464 /* Uninitialized inode ok. */ 490 /* Uninitialized inode ok. */
@@ -468,17 +494,9 @@ xfs_dinode_verify(
468 } 494 }
469 495
470 if (XFS_DFORK_Q(dip)) { 496 if (XFS_DFORK_Q(dip)) {
471 switch (dip->di_aformat) { 497 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
472 case XFS_DINODE_FMT_LOCAL: 498 if (fa)
473 if (dip->di_anextents) 499 return fa;
474 return __this_address;
475 /* fall through */
476 case XFS_DINODE_FMT_EXTENTS:
477 case XFS_DINODE_FMT_BTREE:
478 break;
479 default:
480 return __this_address;
481 }
482 } else { 500 } else {
483 /* 501 /*
484 * If there is no fork offset, this may be a freshly-made inode 502 * If there is no fork offset, this may be a freshly-made inode
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 65fc4ed2e9a1..b228c821bae6 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1029,8 +1029,8 @@ xfs_rtalloc_query_range(
1029 if (low_rec->ar_startext >= mp->m_sb.sb_rextents || 1029 if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
1030 low_rec->ar_startext == high_rec->ar_startext) 1030 low_rec->ar_startext == high_rec->ar_startext)
1031 return 0; 1031 return 0;
1032 if (high_rec->ar_startext >= mp->m_sb.sb_rextents) 1032 if (high_rec->ar_startext > mp->m_sb.sb_rextents)
1033 high_rec->ar_startext = mp->m_sb.sb_rextents - 1; 1033 high_rec->ar_startext = mp->m_sb.sb_rextents;
1034 1034
1035 /* Iterate the bitmap, looking for discrepancies. */ 1035 /* Iterate the bitmap, looking for discrepancies. */
1036 rtstart = low_rec->ar_startext; 1036 rtstart = low_rec->ar_startext;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c35009a86699..83b1e8c6c18f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -685,12 +685,10 @@ out_unlock_iolock:
685} 685}
686 686
687/* 687/*
688 * dead simple method of punching delalyed allocation blocks from a range in 688 * Dead simple method of punching delalyed allocation blocks from a range in
689 * the inode. Walks a block at a time so will be slow, but is only executed in 689 * the inode. This will always punch out both the start and end blocks, even
690 * rare error cases so the overhead is not critical. This will always punch out 690 * if the ranges only partially overlap them, so it is up to the caller to
691 * both the start and end blocks, even if the ranges only partially overlap 691 * ensure that partial blocks are not passed in.
692 * them, so it is up to the caller to ensure that partial blocks are not
693 * passed in.
694 */ 692 */
695int 693int
696xfs_bmap_punch_delalloc_range( 694xfs_bmap_punch_delalloc_range(
@@ -698,63 +696,44 @@ xfs_bmap_punch_delalloc_range(
698 xfs_fileoff_t start_fsb, 696 xfs_fileoff_t start_fsb,
699 xfs_fileoff_t length) 697 xfs_fileoff_t length)
700{ 698{
701 xfs_fileoff_t remaining = length; 699 struct xfs_ifork *ifp = &ip->i_df;
700 xfs_fileoff_t end_fsb = start_fsb + length;
701 struct xfs_bmbt_irec got, del;
702 struct xfs_iext_cursor icur;
702 int error = 0; 703 int error = 0;
703 704
704 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 705 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
705 706
706 do { 707 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
707 int done; 708 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
708 xfs_bmbt_irec_t imap; 709 if (error)
709 int nimaps = 1; 710 return error;
710 xfs_fsblock_t firstblock; 711 }
711 struct xfs_defer_ops dfops;
712 712
713 /* 713 if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
714 * Map the range first and check that it is a delalloc extent 714 return 0;
715 * before trying to unmap the range. Otherwise we will be
716 * trying to remove a real extent (which requires a
717 * transaction) or a hole, which is probably a bad idea...
718 */
719 error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
720 XFS_BMAPI_ENTIRE);
721 715
722 if (error) { 716 while (got.br_startoff + got.br_blockcount > start_fsb) {
723 /* something screwed, just bail */ 717 del = got;
724 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 718 xfs_trim_extent(&del, start_fsb, length);
725 xfs_alert(ip->i_mount,
726 "Failed delalloc mapping lookup ino %lld fsb %lld.",
727 ip->i_ino, start_fsb);
728 }
729 break;
730 }
731 if (!nimaps) {
732 /* nothing there */
733 goto next_block;
734 }
735 if (imap.br_startblock != DELAYSTARTBLOCK) {
736 /* been converted, ignore */
737 goto next_block;
738 }
739 WARN_ON(imap.br_blockcount == 0);
740 719
741 /* 720 /*
742 * Note: while we initialise the firstblock/dfops pair, they 721 * A delete can push the cursor forward. Step back to the
743 * should never be used because blocks should never be 722 * previous extent on non-delalloc or extents outside the
744 * allocated or freed for a delalloc extent and hence we need 723 * target range.
745 * don't cancel or finish them after the xfs_bunmapi() call.
746 */ 724 */
747 xfs_defer_init(&dfops, &firstblock); 725 if (!del.br_blockcount ||
748 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, 726 !isnullstartblock(del.br_startblock)) {
749 &dfops, &done); 727 if (!xfs_iext_prev_extent(ifp, &icur, &got))
750 if (error) 728 break;
751 break; 729 continue;
730 }
752 731
753 ASSERT(!xfs_defer_has_unfinished_work(&dfops)); 732 error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
754next_block: 733 &got, &del);
755 start_fsb++; 734 if (error || !xfs_iext_get_extent(ifp, &icur, &got))
756 remaining--; 735 break;
757 } while(remaining > 0); 736 }
758 737
759 return error; 738 return error;
760} 739}
@@ -1208,7 +1187,22 @@ xfs_free_file_space(
1208 return 0; 1187 return 0;
1209 if (offset + len > XFS_ISIZE(ip)) 1188 if (offset + len > XFS_ISIZE(ip))
1210 len = XFS_ISIZE(ip) - offset; 1189 len = XFS_ISIZE(ip) - offset;
1211 return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); 1190 error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
1191 if (error)
1192 return error;
1193
1194 /*
1195 * If we zeroed right up to EOF and EOF straddles a page boundary we
1196 * must make sure that the post-EOF area is also zeroed because the
1197 * page could be mmap'd and iomap_zero_range doesn't do that for us.
1198 * Writeback of the eof page will do this, albeit clumsily.
1199 */
1200 if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) {
1201 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1202 (offset + len) & ~PAGE_MASK, LLONG_MAX);
1203 }
1204
1205 return error;
1212} 1206}
1213 1207
1214/* 1208/*
@@ -1404,6 +1398,10 @@ xfs_insert_file_space(
1404 1398
1405 trace_xfs_insert_file_space(ip); 1399 trace_xfs_insert_file_space(ip);
1406 1400
1401 error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
1402 if (error)
1403 return error;
1404
1407 error = xfs_prepare_shift(ip, offset); 1405 error = xfs_prepare_shift(ip, offset);
1408 if (error) 1406 if (error)
1409 return error; 1407 return error;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index c34fa9c342f2..c7157bc48bd1 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -513,8 +513,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(
513 struct xfs_trans *tp, 513 struct xfs_trans *tp,
514 struct xfs_getfsmap_info *info) 514 struct xfs_getfsmap_info *info)
515{ 515{
516 struct xfs_rtalloc_rec alow; 516 struct xfs_rtalloc_rec alow = { 0 };
517 struct xfs_rtalloc_rec ahigh; 517 struct xfs_rtalloc_rec ahigh = { 0 };
518 int error; 518 int error;
519 519
520 xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); 520 xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index a7afcad6b711..3f2bd6032cf8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -387,7 +387,7 @@ xfs_reserve_blocks(
387 do { 387 do {
388 free = percpu_counter_sum(&mp->m_fdblocks) - 388 free = percpu_counter_sum(&mp->m_fdblocks) -
389 mp->m_alloc_set_aside; 389 mp->m_alloc_set_aside;
390 if (!free) 390 if (free <= 0)
391 break; 391 break;
392 392
393 delta = request - mp->m_resblks; 393 delta = request - mp->m_resblks;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7a96c4e0ab5c..5df4de666cc1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3236,7 +3236,6 @@ xfs_iflush_cluster(
3236 struct xfs_inode *cip; 3236 struct xfs_inode *cip;
3237 int nr_found; 3237 int nr_found;
3238 int clcount = 0; 3238 int clcount = 0;
3239 int bufwasdelwri;
3240 int i; 3239 int i;
3241 3240
3242 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 3241 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -3360,37 +3359,22 @@ cluster_corrupt_out:
3360 * inode buffer and shut down the filesystem. 3359 * inode buffer and shut down the filesystem.
3361 */ 3360 */
3362 rcu_read_unlock(); 3361 rcu_read_unlock();
3363 /*
3364 * Clean up the buffer. If it was delwri, just release it --
3365 * brelse can handle it with no problems. If not, shut down the
3366 * filesystem before releasing the buffer.
3367 */
3368 bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q);
3369 if (bufwasdelwri)
3370 xfs_buf_relse(bp);
3371
3372 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3362 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3373 3363
3374 if (!bufwasdelwri) {
3375 /*
3376 * Just like incore_relse: if we have b_iodone functions,
3377 * mark the buffer as an error and call them. Otherwise
3378 * mark it as stale and brelse.
3379 */
3380 if (bp->b_iodone) {
3381 bp->b_flags &= ~XBF_DONE;
3382 xfs_buf_stale(bp);
3383 xfs_buf_ioerror(bp, -EIO);
3384 xfs_buf_ioend(bp);
3385 } else {
3386 xfs_buf_stale(bp);
3387 xfs_buf_relse(bp);
3388 }
3389 }
3390
3391 /* 3364 /*
3392 * Unlocks the flush lock 3365 * We'll always have an inode attached to the buffer for completion
3366 * process by the time we are called from xfs_iflush(). Hence we have
3367 * always need to do IO completion processing to abort the inodes
3368 * attached to the buffer. handle them just like the shutdown case in
3369 * xfs_buf_submit().
3393 */ 3370 */
3371 ASSERT(bp->b_iodone);
3372 bp->b_flags &= ~XBF_DONE;
3373 xfs_buf_stale(bp);
3374 xfs_buf_ioerror(bp, -EIO);
3375 xfs_buf_ioend(bp);
3376
3377 /* abort the corrupt inode, as it was not attached to the buffer */
3394 xfs_iflush_abort(cip, false); 3378 xfs_iflush_abort(cip, false);
3395 kmem_free(cilist); 3379 kmem_free(cilist);
3396 xfs_perag_put(pag); 3380 xfs_perag_put(pag);
@@ -3486,12 +3470,17 @@ xfs_iflush(
3486 xfs_log_force(mp, 0); 3470 xfs_log_force(mp, 0);
3487 3471
3488 /* 3472 /*
3489 * inode clustering: 3473 * inode clustering: try to gather other inodes into this write
3490 * see if other inodes can be gathered into this write 3474 *
3475 * Note: Any error during clustering will result in the filesystem
3476 * being shut down and completion callbacks run on the cluster buffer.
3477 * As we have already flushed and attached this inode to the buffer,
3478 * it has already been aborted and released by xfs_iflush_cluster() and
3479 * so we have no further error handling to do here.
3491 */ 3480 */
3492 error = xfs_iflush_cluster(ip, bp); 3481 error = xfs_iflush_cluster(ip, bp);
3493 if (error) 3482 if (error)
3494 goto cluster_corrupt_out; 3483 return error;
3495 3484
3496 *bpp = bp; 3485 *bpp = bp;
3497 return 0; 3486 return 0;
@@ -3500,12 +3489,8 @@ corrupt_out:
3500 if (bp) 3489 if (bp)
3501 xfs_buf_relse(bp); 3490 xfs_buf_relse(bp);
3502 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3491 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3503cluster_corrupt_out:
3504 error = -EFSCORRUPTED;
3505abort_out: 3492abort_out:
3506 /* 3493 /* abort the corrupt inode, as it was not attached to the buffer */
3507 * Unlocks the flush lock
3508 */
3509 xfs_iflush_abort(ip, false); 3494 xfs_iflush_abort(ip, false);
3510 return error; 3495 return error;
3511} 3496}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49f5492eed3b..55876dd02f0c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -963,12 +963,13 @@ xfs_ilock_for_iomap(
963 unsigned *lockmode) 963 unsigned *lockmode)
964{ 964{
965 unsigned mode = XFS_ILOCK_SHARED; 965 unsigned mode = XFS_ILOCK_SHARED;
966 bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO);
966 967
967 /* 968 /*
968 * COW writes may allocate delalloc space or convert unwritten COW 969 * COW writes may allocate delalloc space or convert unwritten COW
969 * extents, so we need to make sure to take the lock exclusively here. 970 * extents, so we need to make sure to take the lock exclusively here.
970 */ 971 */
971 if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) { 972 if (xfs_is_reflink_inode(ip) && is_write) {
972 /* 973 /*
973 * FIXME: It could still overwrite on unshared extents and not 974 * FIXME: It could still overwrite on unshared extents and not
974 * need allocation. 975 * need allocation.
@@ -989,6 +990,7 @@ xfs_ilock_for_iomap(
989 mode = XFS_ILOCK_EXCL; 990 mode = XFS_ILOCK_EXCL;
990 } 991 }
991 992
993relock:
992 if (flags & IOMAP_NOWAIT) { 994 if (flags & IOMAP_NOWAIT) {
993 if (!xfs_ilock_nowait(ip, mode)) 995 if (!xfs_ilock_nowait(ip, mode))
994 return -EAGAIN; 996 return -EAGAIN;
@@ -996,6 +998,17 @@ xfs_ilock_for_iomap(
996 xfs_ilock(ip, mode); 998 xfs_ilock(ip, mode);
997 } 999 }
998 1000
1001 /*
1002 * The reflink iflag could have changed since the earlier unlocked
1003 * check, so if we got ILOCK_SHARED for a write and but we're now a
1004 * reflink inode we have to switch to ILOCK_EXCL and relock.
1005 */
1006 if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) {
1007 xfs_iunlock(ip, mode);
1008 mode = XFS_ILOCK_EXCL;
1009 goto relock;
1010 }
1011
999 *lockmode = mode; 1012 *lockmode = mode;
1000 return 0; 1013 return 0;
1001} 1014}
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index e040af120b69..524f543c5b82 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -258,7 +258,12 @@ xfs_trans_alloc(
258 if (!(flags & XFS_TRANS_NO_WRITECOUNT)) 258 if (!(flags & XFS_TRANS_NO_WRITECOUNT))
259 sb_start_intwrite(mp->m_super); 259 sb_start_intwrite(mp->m_super);
260 260
261 WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); 261 /*
262 * Zero-reservation ("empty") transactions can't modify anything, so
263 * they're allowed to run while we're frozen.
264 */
265 WARN_ON(resp->tr_logres > 0 &&
266 mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
262 atomic_inc(&mp->m_active_trans); 267 atomic_inc(&mp->m_active_trans);
263 268
264 tp = kmem_zone_zalloc(xfs_trans_zone, 269 tp = kmem_zone_zalloc(xfs_trans_zone,