aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/direct-io.c18
-rw-r--r--fs/open.c29
-rw-r--r--fs/xfs/kmem.c21
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_ag.h6
-rw-r--r--fs/xfs/xfs_alloc.c45
-rw-r--r--fs/xfs/xfs_alloc_btree.c16
-rw-r--r--fs/xfs/xfs_aops.c84
-rw-r--r--fs/xfs/xfs_attr_leaf.c17
-rw-r--r--fs/xfs/xfs_attr_remote.c15
-rw-r--r--fs/xfs/xfs_bmap.c193
-rw-r--r--fs/xfs/xfs_bmap.h15
-rw-r--r--fs/xfs/xfs_bmap_btree.c16
-rw-r--r--fs/xfs/xfs_bmap_util.c97
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_btree.c14
-rw-r--r--fs/xfs/xfs_buf.c11
-rw-r--r--fs/xfs/xfs_buf.h14
-rw-r--r--fs/xfs/xfs_buf_item.c19
-rw-r--r--fs/xfs/xfs_da_btree.c19
-rw-r--r--fs/xfs/xfs_dinode.h2
-rw-r--r--fs/xfs/xfs_dir2.c342
-rw-r--r--fs/xfs/xfs_dir2_block.c17
-rw-r--r--fs/xfs/xfs_dir2_data.c20
-rw-r--r--fs/xfs/xfs_dir2_leaf.c17
-rw-r--r--fs/xfs/xfs_dir2_node.c17
-rw-r--r--fs/xfs/xfs_dquot.c2
-rw-r--r--fs/xfs/xfs_dquot_buf.c11
-rw-r--r--fs/xfs/xfs_error.c27
-rw-r--r--fs/xfs/xfs_error.h1
-rw-r--r--fs/xfs/xfs_file.c26
-rw-r--r--fs/xfs/xfs_format.h2
-rw-r--r--fs/xfs/xfs_ialloc.c36
-rw-r--r--fs/xfs/xfs_ialloc_btree.c16
-rw-r--r--fs/xfs/xfs_inode.c123
-rw-r--r--fs/xfs/xfs_inode.h12
-rw-r--r--fs/xfs/xfs_inode_buf.c7
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_iops.c30
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c55
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_sb.c17
-rw-r--r--fs/xfs/xfs_sb.h2
-rw-r--r--fs/xfs/xfs_shared.h4
-rw-r--r--fs/xfs/xfs_symlink.c9
-rw-r--r--fs/xfs/xfs_symlink_remote.c16
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c12
-rw-r--r--fs/xfs/xfs_trans_buf.c11
-rw-r--r--fs/xfs/xfs_trans_resv.c82
-rw-r--r--fs/xfs/xfs_trans_resv.h3
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/uapi/linux/falloc.h35
56 files changed, 1215 insertions, 415 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6e6bff375244..31ba0935e32e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1193,13 +1193,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1193 } 1193 }
1194 1194
1195 /* 1195 /*
1196 * For file extending writes updating i_size before data 1196 * For file extending writes updating i_size before data writeouts
1197 * writeouts complete can expose uninitialized blocks. So 1197 * complete can expose uninitialized blocks in dumb filesystems.
1198 * even for AIO, we need to wait for i/o to complete before 1198 * In that case we need to wait for I/O completion even if asked
1199 * returning in this case. 1199 * for an asynchronous write.
1200 */ 1200 */
1201 dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && 1201 if (is_sync_kiocb(iocb))
1202 (end > i_size_read(inode))); 1202 dio->is_async = false;
1203 else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
1204 (rw & WRITE) && end > i_size_read(inode))
1205 dio->is_async = false;
1206 else
1207 dio->is_async = true;
1208
1203 dio->inode = inode; 1209 dio->inode = inode;
1204 dio->rw = rw; 1210 dio->rw = rw;
1205 1211
diff --git a/fs/open.c b/fs/open.c
index b9ed8b25c108..631aea815def 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
231 return -EINVAL; 231 return -EINVAL;
232 232
233 /* Return error if mode is not supported */ 233 /* Return error if mode is not supported */
234 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 234 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
235 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
236 return -EOPNOTSUPP;
237
238 /* Punch hole and zero range are mutually exclusive */
239 if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
240 (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
235 return -EOPNOTSUPP; 241 return -EOPNOTSUPP;
236 242
237 /* Punch hole must have keep size set */ 243 /* Punch hole must have keep size set */
@@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
239 !(mode & FALLOC_FL_KEEP_SIZE)) 245 !(mode & FALLOC_FL_KEEP_SIZE))
240 return -EOPNOTSUPP; 246 return -EOPNOTSUPP;
241 247
248 /* Collapse range should only be used exclusively. */
249 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
250 (mode & ~FALLOC_FL_COLLAPSE_RANGE))
251 return -EINVAL;
252
242 if (!(file->f_mode & FMODE_WRITE)) 253 if (!(file->f_mode & FMODE_WRITE))
243 return -EBADF; 254 return -EBADF;
244 255
245 /* It's not possible punch hole on append only file */ 256 /*
246 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode)) 257 * It's not possible to punch hole or perform collapse range
258 * on append only file
259 */
260 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
261 && IS_APPEND(inode))
247 return -EPERM; 262 return -EPERM;
248 263
249 if (IS_IMMUTABLE(inode)) 264 if (IS_IMMUTABLE(inode))
@@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
271 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 286 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
272 return -EFBIG; 287 return -EFBIG;
273 288
289 /*
290 * There is no need to overlap collapse range with EOF, in which case
291 * it is effectively a truncate operation
292 */
293 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
294 (offset + len >= i_size_read(inode)))
295 return -EINVAL;
296
274 if (!file->f_op->fallocate) 297 if (!file->f_op->fallocate)
275 return -EOPNOTSUPP; 298 return -EOPNOTSUPP;
276 299
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 66a36befc5c0..844e288b9576 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
65void * 65void *
66kmem_zalloc_large(size_t size, xfs_km_flags_t flags) 66kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
67{ 67{
68 unsigned noio_flag = 0;
68 void *ptr; 69 void *ptr;
70 gfp_t lflags;
69 71
70 ptr = kmem_zalloc(size, flags | KM_MAYFAIL); 72 ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
71 if (ptr) 73 if (ptr)
72 return ptr; 74 return ptr;
73 return vzalloc(size); 75
76 /*
77 * __vmalloc() will allocate data pages and auxillary structures (e.g.
78 * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
79 * here. Hence we need to tell memory reclaim that we are in such a
80 * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
81 * the filesystem here and potentially deadlocking.
82 */
83 if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
84 noio_flag = memalloc_noio_save();
85
86 lflags = kmem_flags_convert(flags);
87 ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
88
89 if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
90 memalloc_noio_restore(noio_flag);
91
92 return ptr;
74} 93}
75 94
76void 95void
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 0ecec1896f25..6888ad886ff6 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
281 if (!acl) 281 if (!acl)
282 goto set_acl; 282 goto set_acl;
283 283
284 error = -EINVAL; 284 error = -E2BIG;
285 if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) 285 if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
286 return error; 286 return error;
287 287
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 3fc109819c34..0fdd4109c624 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -89,6 +89,8 @@ typedef struct xfs_agf {
89 /* structure must be padded to 64 bit alignment */ 89 /* structure must be padded to 64 bit alignment */
90} xfs_agf_t; 90} xfs_agf_t;
91 91
92#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc)
93
92#define XFS_AGF_MAGICNUM 0x00000001 94#define XFS_AGF_MAGICNUM 0x00000001
93#define XFS_AGF_VERSIONNUM 0x00000002 95#define XFS_AGF_VERSIONNUM 0x00000002
94#define XFS_AGF_SEQNO 0x00000004 96#define XFS_AGF_SEQNO 0x00000004
@@ -167,6 +169,8 @@ typedef struct xfs_agi {
167 /* structure must be padded to 64 bit alignment */ 169 /* structure must be padded to 64 bit alignment */
168} xfs_agi_t; 170} xfs_agi_t;
169 171
172#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
173
170#define XFS_AGI_MAGICNUM 0x00000001 174#define XFS_AGI_MAGICNUM 0x00000001
171#define XFS_AGI_VERSIONNUM 0x00000002 175#define XFS_AGI_VERSIONNUM 0x00000002
172#define XFS_AGI_SEQNO 0x00000004 176#define XFS_AGI_SEQNO 0x00000004
@@ -222,6 +226,8 @@ typedef struct xfs_agfl {
222 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ 226 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
223} xfs_agfl_t; 227} xfs_agfl_t;
224 228
229#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
230
225/* 231/*
226 * tags for inode radix tree 232 * tags for inode radix tree
227 */ 233 */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9eab2dfdcbb5..c1cf6a336a72 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -474,7 +474,6 @@ xfs_agfl_read_verify(
474 struct xfs_buf *bp) 474 struct xfs_buf *bp)
475{ 475{
476 struct xfs_mount *mp = bp->b_target->bt_mount; 476 struct xfs_mount *mp = bp->b_target->bt_mount;
477 int agfl_ok = 1;
478 477
479 /* 478 /*
480 * There is no verification of non-crc AGFLs because mkfs does not 479 * There is no verification of non-crc AGFLs because mkfs does not
@@ -485,15 +484,13 @@ xfs_agfl_read_verify(
485 if (!xfs_sb_version_hascrc(&mp->m_sb)) 484 if (!xfs_sb_version_hascrc(&mp->m_sb))
486 return; 485 return;
487 486
488 agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 487 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
489 offsetof(struct xfs_agfl, agfl_crc)); 488 xfs_buf_ioerror(bp, EFSBADCRC);
490 489 else if (!xfs_agfl_verify(bp))
491 agfl_ok = agfl_ok && xfs_agfl_verify(bp);
492
493 if (!agfl_ok) {
494 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
495 xfs_buf_ioerror(bp, EFSCORRUPTED); 490 xfs_buf_ioerror(bp, EFSCORRUPTED);
496 } 491
492 if (bp->b_error)
493 xfs_verifier_error(bp);
497} 494}
498 495
499static void 496static void
@@ -508,16 +505,15 @@ xfs_agfl_write_verify(
508 return; 505 return;
509 506
510 if (!xfs_agfl_verify(bp)) { 507 if (!xfs_agfl_verify(bp)) {
511 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
512 xfs_buf_ioerror(bp, EFSCORRUPTED); 508 xfs_buf_ioerror(bp, EFSCORRUPTED);
509 xfs_verifier_error(bp);
513 return; 510 return;
514 } 511 }
515 512
516 if (bip) 513 if (bip)
517 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 514 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
518 515
519 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 516 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
520 offsetof(struct xfs_agfl, agfl_crc));
521} 517}
522 518
523const struct xfs_buf_ops xfs_agfl_buf_ops = { 519const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -2238,19 +2234,17 @@ xfs_agf_read_verify(
2238 struct xfs_buf *bp) 2234 struct xfs_buf *bp)
2239{ 2235{
2240 struct xfs_mount *mp = bp->b_target->bt_mount; 2236 struct xfs_mount *mp = bp->b_target->bt_mount;
2241 int agf_ok = 1;
2242
2243 if (xfs_sb_version_hascrc(&mp->m_sb))
2244 agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
2245 offsetof(struct xfs_agf, agf_crc));
2246 2237
2247 agf_ok = agf_ok && xfs_agf_verify(mp, bp); 2238 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2248 2239 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2249 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, 2240 xfs_buf_ioerror(bp, EFSBADCRC);
2250 XFS_RANDOM_ALLOC_READ_AGF))) { 2241 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2251 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 2242 XFS_ERRTAG_ALLOC_READ_AGF,
2243 XFS_RANDOM_ALLOC_READ_AGF))
2252 xfs_buf_ioerror(bp, EFSCORRUPTED); 2244 xfs_buf_ioerror(bp, EFSCORRUPTED);
2253 } 2245
2246 if (bp->b_error)
2247 xfs_verifier_error(bp);
2254} 2248}
2255 2249
2256static void 2250static void
@@ -2261,8 +2255,8 @@ xfs_agf_write_verify(
2261 struct xfs_buf_log_item *bip = bp->b_fspriv; 2255 struct xfs_buf_log_item *bip = bp->b_fspriv;
2262 2256
2263 if (!xfs_agf_verify(mp, bp)) { 2257 if (!xfs_agf_verify(mp, bp)) {
2264 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2265 xfs_buf_ioerror(bp, EFSCORRUPTED); 2258 xfs_buf_ioerror(bp, EFSCORRUPTED);
2259 xfs_verifier_error(bp);
2266 return; 2260 return;
2267 } 2261 }
2268 2262
@@ -2272,8 +2266,7 @@ xfs_agf_write_verify(
2272 if (bip) 2266 if (bip)
2273 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2267 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2274 2268
2275 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 2269 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2276 offsetof(struct xfs_agf, agf_crc));
2277} 2270}
2278 2271
2279const struct xfs_buf_ops xfs_agf_buf_ops = { 2272const struct xfs_buf_ops xfs_agf_buf_ops = {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 13085429e523..cc1eadcbb049 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -355,12 +355,14 @@ static void
355xfs_allocbt_read_verify( 355xfs_allocbt_read_verify(
356 struct xfs_buf *bp) 356 struct xfs_buf *bp)
357{ 357{
358 if (!(xfs_btree_sblock_verify_crc(bp) && 358 if (!xfs_btree_sblock_verify_crc(bp))
359 xfs_allocbt_verify(bp))) { 359 xfs_buf_ioerror(bp, EFSBADCRC);
360 trace_xfs_btree_corrupt(bp, _RET_IP_); 360 else if (!xfs_allocbt_verify(bp))
361 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
362 bp->b_target->bt_mount, bp->b_addr);
363 xfs_buf_ioerror(bp, EFSCORRUPTED); 361 xfs_buf_ioerror(bp, EFSCORRUPTED);
362
363 if (bp->b_error) {
364 trace_xfs_btree_corrupt(bp, _RET_IP_);
365 xfs_verifier_error(bp);
364 } 366 }
365} 367}
366 368
@@ -370,9 +372,9 @@ xfs_allocbt_write_verify(
370{ 372{
371 if (!xfs_allocbt_verify(bp)) { 373 if (!xfs_allocbt_verify(bp)) {
372 trace_xfs_btree_corrupt(bp, _RET_IP_); 374 trace_xfs_btree_corrupt(bp, _RET_IP_);
373 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
374 bp->b_target->bt_mount, bp->b_addr);
375 xfs_buf_ioerror(bp, EFSCORRUPTED); 375 xfs_buf_ioerror(bp, EFSCORRUPTED);
376 xfs_verifier_error(bp);
377 return;
376 } 378 }
377 xfs_btree_sblock_calc_crc(bp); 379 xfs_btree_sblock_calc_crc(bp);
378 380
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index db2cfb067d0b..75df77d09f75 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -632,38 +632,46 @@ xfs_map_at_offset(
632} 632}
633 633
634/* 634/*
635 * Test if a given page is suitable for writing as part of an unwritten 635 * Test if a given page contains at least one buffer of a given @type.
636 * or delayed allocate extent. 636 * If @check_all_buffers is true, then we walk all the buffers in the page to
637 * try to find one of the type passed in. If it is not set, then the caller only
638 * needs to check the first buffer on the page for a match.
637 */ 639 */
638STATIC int 640STATIC bool
639xfs_check_page_type( 641xfs_check_page_type(
640 struct page *page, 642 struct page *page,
641 unsigned int type) 643 unsigned int type,
644 bool check_all_buffers)
642{ 645{
643 if (PageWriteback(page)) 646 struct buffer_head *bh;
644 return 0; 647 struct buffer_head *head;
645 648
646 if (page->mapping && page_has_buffers(page)) { 649 if (PageWriteback(page))
647 struct buffer_head *bh, *head; 650 return false;
648 int acceptable = 0; 651 if (!page->mapping)
652 return false;
653 if (!page_has_buffers(page))
654 return false;
649 655
650 bh = head = page_buffers(page); 656 bh = head = page_buffers(page);
651 do { 657 do {
652 if (buffer_unwritten(bh)) 658 if (buffer_unwritten(bh)) {
653 acceptable += (type == XFS_IO_UNWRITTEN); 659 if (type == XFS_IO_UNWRITTEN)
654 else if (buffer_delay(bh)) 660 return true;
655 acceptable += (type == XFS_IO_DELALLOC); 661 } else if (buffer_delay(bh)) {
656 else if (buffer_dirty(bh) && buffer_mapped(bh)) 662 if (type == XFS_IO_DELALLOC)
657 acceptable += (type == XFS_IO_OVERWRITE); 663 return true;
658 else 664 } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
659 break; 665 if (type == XFS_IO_OVERWRITE)
660 } while ((bh = bh->b_this_page) != head); 666 return true;
667 }
661 668
662 if (acceptable) 669 /* If we are only checking the first buffer, we are done now. */
663 return 1; 670 if (!check_all_buffers)
664 } 671 break;
672 } while ((bh = bh->b_this_page) != head);
665 673
666 return 0; 674 return false;
667} 675}
668 676
669/* 677/*
@@ -697,7 +705,7 @@ xfs_convert_page(
697 goto fail_unlock_page; 705 goto fail_unlock_page;
698 if (page->mapping != inode->i_mapping) 706 if (page->mapping != inode->i_mapping)
699 goto fail_unlock_page; 707 goto fail_unlock_page;
700 if (!xfs_check_page_type(page, (*ioendp)->io_type)) 708 if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
701 goto fail_unlock_page; 709 goto fail_unlock_page;
702 710
703 /* 711 /*
@@ -742,6 +750,15 @@ xfs_convert_page(
742 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 750 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
743 page_dirty = p_offset / len; 751 page_dirty = p_offset / len;
744 752
753 /*
754 * The moment we find a buffer that doesn't match our current type
755 * specification or can't be written, abort the loop and start
756 * writeback. As per the above xfs_imap_valid() check, only
757 * xfs_vm_writepage() can handle partial page writeback fully - we are
758 * limited here to the buffers that are contiguous with the current
759 * ioend, and hence a buffer we can't write breaks that contiguity and
760 * we have to defer the rest of the IO to xfs_vm_writepage().
761 */
745 bh = head = page_buffers(page); 762 bh = head = page_buffers(page);
746 do { 763 do {
747 if (offset >= end_offset) 764 if (offset >= end_offset)
@@ -750,7 +767,7 @@ xfs_convert_page(
750 uptodate = 0; 767 uptodate = 0;
751 if (!(PageUptodate(page) || buffer_uptodate(bh))) { 768 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
752 done = 1; 769 done = 1;
753 continue; 770 break;
754 } 771 }
755 772
756 if (buffer_unwritten(bh) || buffer_delay(bh) || 773 if (buffer_unwritten(bh) || buffer_delay(bh) ||
@@ -762,10 +779,11 @@ xfs_convert_page(
762 else 779 else
763 type = XFS_IO_OVERWRITE; 780 type = XFS_IO_OVERWRITE;
764 781
765 if (!xfs_imap_valid(inode, imap, offset)) { 782 /*
766 done = 1; 783 * imap should always be valid because of the above
767 continue; 784 * partial page end_offset check on the imap.
768 } 785 */
786 ASSERT(xfs_imap_valid(inode, imap, offset));
769 787
770 lock_buffer(bh); 788 lock_buffer(bh);
771 if (type != XFS_IO_OVERWRITE) 789 if (type != XFS_IO_OVERWRITE)
@@ -777,6 +795,7 @@ xfs_convert_page(
777 count++; 795 count++;
778 } else { 796 } else {
779 done = 1; 797 done = 1;
798 break;
780 } 799 }
781 } while (offset += len, (bh = bh->b_this_page) != head); 800 } while (offset += len, (bh = bh->b_this_page) != head);
782 801
@@ -868,7 +887,7 @@ xfs_aops_discard_page(
868 struct buffer_head *bh, *head; 887 struct buffer_head *bh, *head;
869 loff_t offset = page_offset(page); 888 loff_t offset = page_offset(page);
870 889
871 if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) 890 if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
872 goto out_invalidate; 891 goto out_invalidate;
873 892
874 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 893 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1441,7 +1460,8 @@ xfs_vm_direct_IO(
1441 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1460 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1442 offset, nr_segs, 1461 offset, nr_segs,
1443 xfs_get_blocks_direct, 1462 xfs_get_blocks_direct,
1444 xfs_end_io_direct_write, NULL, 0); 1463 xfs_end_io_direct_write, NULL,
1464 DIO_ASYNC_EXTEND);
1445 if (ret != -EIOCBQUEUED && iocb->private) 1465 if (ret != -EIOCBQUEUED && iocb->private)
1446 goto out_destroy_ioend; 1466 goto out_destroy_ioend;
1447 } else { 1467 } else {
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 7b126f46a2f9..fe9587fab17a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -213,8 +213,8 @@ xfs_attr3_leaf_write_verify(
213 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 213 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
214 214
215 if (!xfs_attr3_leaf_verify(bp)) { 215 if (!xfs_attr3_leaf_verify(bp)) {
216 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
217 xfs_buf_ioerror(bp, EFSCORRUPTED); 216 xfs_buf_ioerror(bp, EFSCORRUPTED);
217 xfs_verifier_error(bp);
218 return; 218 return;
219 } 219 }
220 220
@@ -224,7 +224,7 @@ xfs_attr3_leaf_write_verify(
224 if (bip) 224 if (bip)
225 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 225 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
226 226
227 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); 227 xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
228} 228}
229 229
230/* 230/*
@@ -239,13 +239,14 @@ xfs_attr3_leaf_read_verify(
239{ 239{
240 struct xfs_mount *mp = bp->b_target->bt_mount; 240 struct xfs_mount *mp = bp->b_target->bt_mount;
241 241
242 if ((xfs_sb_version_hascrc(&mp->m_sb) && 242 if (xfs_sb_version_hascrc(&mp->m_sb) &&
243 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 243 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
244 XFS_ATTR3_LEAF_CRC_OFF)) || 244 xfs_buf_ioerror(bp, EFSBADCRC);
245 !xfs_attr3_leaf_verify(bp)) { 245 else if (!xfs_attr3_leaf_verify(bp))
246 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
247 xfs_buf_ioerror(bp, EFSCORRUPTED); 246 xfs_buf_ioerror(bp, EFSCORRUPTED);
248 } 247
248 if (bp->b_error)
249 xfs_verifier_error(bp);
249} 250}
250 251
251const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { 252const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 5549d69ddb45..6e37823e2932 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -125,7 +125,6 @@ xfs_attr3_rmt_read_verify(
125 struct xfs_mount *mp = bp->b_target->bt_mount; 125 struct xfs_mount *mp = bp->b_target->bt_mount;
126 char *ptr; 126 char *ptr;
127 int len; 127 int len;
128 bool corrupt = false;
129 xfs_daddr_t bno; 128 xfs_daddr_t bno;
130 129
131 /* no verification of non-crc buffers */ 130 /* no verification of non-crc buffers */
@@ -140,11 +139,11 @@ xfs_attr3_rmt_read_verify(
140 while (len > 0) { 139 while (len > 0) {
141 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), 140 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
142 XFS_ATTR3_RMT_CRC_OFF)) { 141 XFS_ATTR3_RMT_CRC_OFF)) {
143 corrupt = true; 142 xfs_buf_ioerror(bp, EFSBADCRC);
144 break; 143 break;
145 } 144 }
146 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { 145 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
147 corrupt = true; 146 xfs_buf_ioerror(bp, EFSCORRUPTED);
148 break; 147 break;
149 } 148 }
150 len -= XFS_LBSIZE(mp); 149 len -= XFS_LBSIZE(mp);
@@ -152,10 +151,9 @@ xfs_attr3_rmt_read_verify(
152 bno += mp->m_bsize; 151 bno += mp->m_bsize;
153 } 152 }
154 153
155 if (corrupt) { 154 if (bp->b_error)
156 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 155 xfs_verifier_error(bp);
157 xfs_buf_ioerror(bp, EFSCORRUPTED); 156 else
158 } else
159 ASSERT(len == 0); 157 ASSERT(len == 0);
160} 158}
161 159
@@ -180,9 +178,8 @@ xfs_attr3_rmt_write_verify(
180 178
181 while (len > 0) { 179 while (len > 0) {
182 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { 180 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
183 XFS_CORRUPTION_ERROR(__func__,
184 XFS_ERRLEVEL_LOW, mp, bp->b_addr);
185 xfs_buf_ioerror(bp, EFSCORRUPTED); 181 xfs_buf_ioerror(bp, EFSCORRUPTED);
182 xfs_verifier_error(bp);
186 return; 183 return;
187 } 184 }
188 if (bip) { 185 if (bip) {
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 152543c4ca70..5b6092ef51ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@ error0:
5378 } 5378 }
5379 return error; 5379 return error;
5380} 5380}
5381
5382/*
5383 * Shift extent records to the left to cover a hole.
5384 *
5385 * The maximum number of extents to be shifted in a single operation
5386 * is @num_exts, and @current_ext keeps track of the current extent
5387 * index we have shifted. @offset_shift_fsb is the length by which each
5388 * extent is shifted. If there is no hole to shift the extents
5389 * into, this will be considered invalid operation and we abort immediately.
5390 */
5391int
5392xfs_bmap_shift_extents(
5393 struct xfs_trans *tp,
5394 struct xfs_inode *ip,
5395 int *done,
5396 xfs_fileoff_t start_fsb,
5397 xfs_fileoff_t offset_shift_fsb,
5398 xfs_extnum_t *current_ext,
5399 xfs_fsblock_t *firstblock,
5400 struct xfs_bmap_free *flist,
5401 int num_exts)
5402{
5403 struct xfs_btree_cur *cur;
5404 struct xfs_bmbt_rec_host *gotp;
5405 struct xfs_bmbt_irec got;
5406 struct xfs_bmbt_irec left;
5407 struct xfs_mount *mp = ip->i_mount;
5408 struct xfs_ifork *ifp;
5409 xfs_extnum_t nexts = 0;
5410 xfs_fileoff_t startoff;
5411 int error = 0;
5412 int i;
5413 int whichfork = XFS_DATA_FORK;
5414 int logflags;
5415 xfs_filblks_t blockcount = 0;
5416
5417 if (unlikely(XFS_TEST_ERROR(
5418 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5419 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5420 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5421 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5422 XFS_ERRLEVEL_LOW, mp);
5423 return XFS_ERROR(EFSCORRUPTED);
5424 }
5425
5426 if (XFS_FORCED_SHUTDOWN(mp))
5427 return XFS_ERROR(EIO);
5428
5429 ASSERT(current_ext != NULL);
5430
5431 ifp = XFS_IFORK_PTR(ip, whichfork);
5432
5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5434 /* Read in all the extents */
5435 error = xfs_iread_extents(tp, ip, whichfork);
5436 if (error)
5437 return error;
5438 }
5439
5440 /*
5441 * If *current_ext is 0, we would need to lookup the extent
5442 * from where we would start shifting and store it in gotp.
5443 */
5444 if (!*current_ext) {
5445 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
5446 /*
5447 * gotp can be null in 2 cases: 1) if there are no extents
5448 * or 2) start_fsb lies in a hole beyond which there are
5449 * no extents. Either way, we are done.
5450 */
5451 if (!gotp) {
5452 *done = 1;
5453 return 0;
5454 }
5455 }
5456
5457 /* We are going to change core inode */
5458 logflags = XFS_ILOG_CORE;
5459
5460 if (ifp->if_flags & XFS_IFBROOT) {
5461 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5462 cur->bc_private.b.firstblock = *firstblock;
5463 cur->bc_private.b.flist = flist;
5464 cur->bc_private.b.flags = 0;
5465 } else {
5466 cur = NULL;
5467 logflags |= XFS_ILOG_DEXT;
5468 }
5469
5470 while (nexts++ < num_exts &&
5471 *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) {
5472
5473 gotp = xfs_iext_get_ext(ifp, *current_ext);
5474 xfs_bmbt_get_all(gotp, &got);
5475 startoff = got.br_startoff - offset_shift_fsb;
5476
5477 /*
5478 * Before shifting extent into hole, make sure that the hole
5479 * is large enough to accomodate the shift.
5480 */
5481 if (*current_ext) {
5482 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5483 *current_ext - 1), &left);
5484
5485 if (startoff < left.br_startoff + left.br_blockcount)
5486 error = XFS_ERROR(EINVAL);
5487 } else if (offset_shift_fsb > got.br_startoff) {
5488 /*
5489 * When first extent is shifted, offset_shift_fsb
5490 * should be less than the stating offset of
5491 * the first extent.
5492 */
5493 error = XFS_ERROR(EINVAL);
5494 }
5495
5496 if (error)
5497 goto del_cursor;
5498
5499 if (cur) {
5500 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5501 got.br_startblock,
5502 got.br_blockcount,
5503 &i);
5504 if (error)
5505 goto del_cursor;
5506 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5507 }
5508
5509 /* Check if we can merge 2 adjacent extents */
5510 if (*current_ext &&
5511 left.br_startoff + left.br_blockcount == startoff &&
5512 left.br_startblock + left.br_blockcount ==
5513 got.br_startblock &&
5514 left.br_state == got.br_state &&
5515 left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
5516 blockcount = left.br_blockcount +
5517 got.br_blockcount;
5518 xfs_iext_remove(ip, *current_ext, 1, 0);
5519 if (cur) {
5520 error = xfs_btree_delete(cur, &i);
5521 if (error)
5522 goto del_cursor;
5523 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5524 }
5525 XFS_IFORK_NEXT_SET(ip, whichfork,
5526 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5527 gotp = xfs_iext_get_ext(ifp, --*current_ext);
5528 xfs_bmbt_get_all(gotp, &got);
5529
5530 /* Make cursor point to the extent we will update */
5531 if (cur) {
5532 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5533 got.br_startblock,
5534 got.br_blockcount,
5535 &i);
5536 if (error)
5537 goto del_cursor;
5538 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5539 }
5540
5541 xfs_bmbt_set_blockcount(gotp, blockcount);
5542 got.br_blockcount = blockcount;
5543 } else {
5544 /* We have to update the startoff */
5545 xfs_bmbt_set_startoff(gotp, startoff);
5546 got.br_startoff = startoff;
5547 }
5548
5549 if (cur) {
5550 error = xfs_bmbt_update(cur, got.br_startoff,
5551 got.br_startblock,
5552 got.br_blockcount,
5553 got.br_state);
5554 if (error)
5555 goto del_cursor;
5556 }
5557
5558 (*current_ext)++;
5559 }
5560
5561 /* Check if we are done */
5562 if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork))
5563 *done = 1;
5564
5565del_cursor:
5566 if (cur)
5567 xfs_btree_del_cursor(cur,
5568 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5569
5570 xfs_trans_log_inode(tp, ip, logflags);
5571
5572 return error;
5573}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 33b41f351225..f84bd7af43be 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
127 { BMAP_RIGHT_FILLING, "RF" }, \ 127 { BMAP_RIGHT_FILLING, "RF" }, \
128 { BMAP_ATTRFORK, "ATTR" } 128 { BMAP_ATTRFORK, "ATTR" }
129 129
130
131/*
132 * This macro is used to determine how many extents will be shifted
133 * in one write transaction. We could require two splits,
134 * an extent move on the first and an extent merge on the second,
135 * So it is proper that one extent is shifted inside write transaction
136 * at a time.
137 */
138#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
139
130#ifdef DEBUG 140#ifdef DEBUG
131void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 141void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
132 int whichfork, unsigned long caller_ip); 142 int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
169int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, 179int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
170 xfs_extnum_t num); 180 xfs_extnum_t num);
171uint xfs_default_attroffset(struct xfs_inode *ip); 181uint xfs_default_attroffset(struct xfs_inode *ip);
182int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
183 int *done, xfs_fileoff_t start_fsb,
184 xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
185 xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
186 int num_exts);
172 187
173#endif /* __XFS_BMAP_H__ */ 188#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 706bc3f777cb..818d546664e7 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -780,12 +780,14 @@ static void
780xfs_bmbt_read_verify( 780xfs_bmbt_read_verify(
781 struct xfs_buf *bp) 781 struct xfs_buf *bp)
782{ 782{
783 if (!(xfs_btree_lblock_verify_crc(bp) && 783 if (!xfs_btree_lblock_verify_crc(bp))
784 xfs_bmbt_verify(bp))) { 784 xfs_buf_ioerror(bp, EFSBADCRC);
785 trace_xfs_btree_corrupt(bp, _RET_IP_); 785 else if (!xfs_bmbt_verify(bp))
786 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
787 bp->b_target->bt_mount, bp->b_addr);
788 xfs_buf_ioerror(bp, EFSCORRUPTED); 786 xfs_buf_ioerror(bp, EFSCORRUPTED);
787
788 if (bp->b_error) {
789 trace_xfs_btree_corrupt(bp, _RET_IP_);
790 xfs_verifier_error(bp);
789 } 791 }
790} 792}
791 793
@@ -794,11 +796,9 @@ xfs_bmbt_write_verify(
794 struct xfs_buf *bp) 796 struct xfs_buf *bp)
795{ 797{
796 if (!xfs_bmbt_verify(bp)) { 798 if (!xfs_bmbt_verify(bp)) {
797 xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
798 trace_xfs_btree_corrupt(bp, _RET_IP_); 799 trace_xfs_btree_corrupt(bp, _RET_IP_);
799 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
800 bp->b_target->bt_mount, bp->b_addr);
801 xfs_buf_ioerror(bp, EFSCORRUPTED); 800 xfs_buf_ioerror(bp, EFSCORRUPTED);
801 xfs_verifier_error(bp);
802 return; 802 return;
803 } 803 }
804 xfs_btree_lblock_calc_crc(bp); 804 xfs_btree_lblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f264616080ca..01f6a646caa1 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
1349 * the freeing of the space succeeds at ENOSPC. 1349 * the freeing of the space succeeds at ENOSPC.
1350 */ 1350 */
1351 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1351 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1352 tp->t_flags |= XFS_TRANS_RESERVE;
1353 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); 1352 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
1354 1353
1355 /* 1354 /*
@@ -1468,6 +1467,102 @@ out:
1468} 1467}
1469 1468
1470/* 1469/*
1470 * xfs_collapse_file_space()
1471 * This routine frees disk space and shift extent for the given file.
1472 * The first thing we do is to free data blocks in the specified range
1473 * by calling xfs_free_file_space(). It would also sync dirty data
1474 * and invalidate page cache over the region on which collapse range
1475 * is working. And Shift extent records to the left to cover a hole.
1476 * RETURNS:
1477 * 0 on success
1478 * errno on error
1479 *
1480 */
1481int
1482xfs_collapse_file_space(
1483 struct xfs_inode *ip,
1484 xfs_off_t offset,
1485 xfs_off_t len)
1486{
1487 int done = 0;
1488 struct xfs_mount *mp = ip->i_mount;
1489 struct xfs_trans *tp;
1490 int error;
1491 xfs_extnum_t current_ext = 0;
1492 struct xfs_bmap_free free_list;
1493 xfs_fsblock_t first_block;
1494 int committed;
1495 xfs_fileoff_t start_fsb;
1496 xfs_fileoff_t shift_fsb;
1497
1498 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1499
1500 trace_xfs_collapse_file_space(ip);
1501
1502 start_fsb = XFS_B_TO_FSB(mp, offset + len);
1503 shift_fsb = XFS_B_TO_FSB(mp, len);
1504
1505 error = xfs_free_file_space(ip, offset, len);
1506 if (error)
1507 return error;
1508
1509 while (!error && !done) {
1510 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1511 tp->t_flags |= XFS_TRANS_RESERVE;
1512 /*
1513 * We would need to reserve permanent block for transaction.
1514 * This will come into picture when after shifting extent into
1515 * hole we found that adjacent extents can be merged which
1516 * may lead to freeing of a block during record update.
1517 */
1518 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
1519 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
1520 if (error) {
1521 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1522 xfs_trans_cancel(tp, 0);
1523 break;
1524 }
1525
1526 xfs_ilock(ip, XFS_ILOCK_EXCL);
1527 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
1528 ip->i_gdquot, ip->i_pdquot,
1529 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
1530 XFS_QMOPT_RES_REGBLKS);
1531 if (error)
1532 goto out;
1533
1534 xfs_trans_ijoin(tp, ip, 0);
1535
1536 xfs_bmap_init(&free_list, &first_block);
1537
1538 /*
1539 * We are using the write transaction in which max 2 bmbt
1540 * updates are allowed
1541 */
1542 error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
1543 shift_fsb, &current_ext,
1544 &first_block, &free_list,
1545 XFS_BMAP_MAX_SHIFT_EXTENTS);
1546 if (error)
1547 goto out;
1548
1549 error = xfs_bmap_finish(&tp, &free_list, &committed);
1550 if (error)
1551 goto out;
1552
1553 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1554 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1555 }
1556
1557 return error;
1558
1559out:
1560 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1561 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1562 return error;
1563}
1564
1565/*
1471 * We need to check that the format of the data fork in the temporary inode is 1566 * We need to check that the format of the data fork in the temporary inode is
1472 * valid for the target inode before doing the swap. This is not a problem with 1567 * valid for the target inode before doing the swap. This is not a problem with
1473 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1568 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b25772..935ed2b24edf 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
99 xfs_off_t len); 99 xfs_off_t len);
100int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, 100int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
101 xfs_off_t len); 101 xfs_off_t len);
102int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
103 xfs_off_t len);
102 104
103/* EOF block manipulation functions */ 105/* EOF block manipulation functions */
104bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); 106bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 9adaae4f3e2f..e80d59fdf89a 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -234,8 +234,7 @@ xfs_btree_lblock_calc_crc(
234 return; 234 return;
235 if (bip) 235 if (bip)
236 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 236 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
237 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 237 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
238 XFS_BTREE_LBLOCK_CRC_OFF);
239} 238}
240 239
241bool 240bool
@@ -243,8 +242,8 @@ xfs_btree_lblock_verify_crc(
243 struct xfs_buf *bp) 242 struct xfs_buf *bp)
244{ 243{
245 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 244 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
246 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 245 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
247 XFS_BTREE_LBLOCK_CRC_OFF); 246
248 return true; 247 return true;
249} 248}
250 249
@@ -267,8 +266,7 @@ xfs_btree_sblock_calc_crc(
267 return; 266 return;
268 if (bip) 267 if (bip)
269 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 268 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
270 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 269 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
271 XFS_BTREE_SBLOCK_CRC_OFF);
272} 270}
273 271
274bool 272bool
@@ -276,8 +274,8 @@ xfs_btree_sblock_verify_crc(
276 struct xfs_buf *bp) 274 struct xfs_buf *bp)
277{ 275{
278 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 276 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
279 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 277 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
280 XFS_BTREE_SBLOCK_CRC_OFF); 278
281 return true; 279 return true;
282} 280}
283 281
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9c061ef2b0d9..107f2fdfe41f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -396,7 +396,17 @@ _xfs_buf_map_pages(
396 bp->b_addr = NULL; 396 bp->b_addr = NULL;
397 } else { 397 } else {
398 int retried = 0; 398 int retried = 0;
399 unsigned noio_flag;
399 400
401 /*
402 * vm_map_ram() will allocate auxillary structures (e.g.
403 * pagetables) with GFP_KERNEL, yet we are likely to be under
404 * GFP_NOFS context here. Hence we need to tell memory reclaim
405 * that we are in such a context via PF_MEMALLOC_NOIO to prevent
406 * memory reclaim re-entering the filesystem here and
407 * potentially deadlocking.
408 */
409 noio_flag = memalloc_noio_save();
400 do { 410 do {
401 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 411 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
402 -1, PAGE_KERNEL); 412 -1, PAGE_KERNEL);
@@ -404,6 +414,7 @@ _xfs_buf_map_pages(
404 break; 414 break;
405 vm_unmap_aliases(); 415 vm_unmap_aliases();
406 } while (retried++ <= 1); 416 } while (retried++ <= 1);
417 memalloc_noio_restore(noio_flag);
407 418
408 if (!bp->b_addr) 419 if (!bp->b_addr)
409 return -ENOMEM; 420 return -ENOMEM;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 995339534db6..b8a3abf6cf47 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -369,6 +369,20 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
369 xfs_buf_rele(bp); 369 xfs_buf_rele(bp);
370} 370}
371 371
372static inline int
373xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
374{
375 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
376 cksum_offset);
377}
378
379static inline void
380xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
381{
382 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
383 cksum_offset);
384}
385
372/* 386/*
373 * Handling of buftargs. 387 * Handling of buftargs.
374 */ 388 */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 33149113e333..8752821443be 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -796,20 +796,6 @@ xfs_buf_item_init(
796 bip->bli_formats[i].blf_map_size = map_size; 796 bip->bli_formats[i].blf_map_size = map_size;
797 } 797 }
798 798
799#ifdef XFS_TRANS_DEBUG
800 /*
801 * Allocate the arrays for tracking what needs to be logged
802 * and what our callers request to be logged. bli_orig
803 * holds a copy of the original, clean buffer for comparison
804 * against, and bli_logged keeps a 1 bit flag per byte in
805 * the buffer to indicate which bytes the callers have asked
806 * to have logged.
807 */
808 bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
809 memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
810 bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
811#endif
812
813 /* 799 /*
814 * Put the buf item into the list of items attached to the 800 * Put the buf item into the list of items attached to the
815 * buffer at the front. 801 * buffer at the front.
@@ -957,11 +943,6 @@ STATIC void
957xfs_buf_item_free( 943xfs_buf_item_free(
958 xfs_buf_log_item_t *bip) 944 xfs_buf_log_item_t *bip)
959{ 945{
960#ifdef XFS_TRANS_DEBUG
961 kmem_free(bip->bli_orig);
962 kmem_free(bip->bli_logged);
963#endif /* XFS_TRANS_DEBUG */
964
965 xfs_buf_item_free_format(bip); 946 xfs_buf_item_free_format(bip);
966 kmem_zone_free(xfs_buf_item_zone, bip); 947 kmem_zone_free(xfs_buf_item_zone, bip);
967} 948}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 796272a2e129..6cc5f6785a77 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -185,8 +185,8 @@ xfs_da3_node_write_verify(
185 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 185 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
186 186
187 if (!xfs_da3_node_verify(bp)) { 187 if (!xfs_da3_node_verify(bp)) {
188 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
189 xfs_buf_ioerror(bp, EFSCORRUPTED); 188 xfs_buf_ioerror(bp, EFSCORRUPTED);
189 xfs_verifier_error(bp);
190 return; 190 return;
191 } 191 }
192 192
@@ -196,7 +196,7 @@ xfs_da3_node_write_verify(
196 if (bip) 196 if (bip)
197 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 197 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
198 198
199 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); 199 xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
200} 200}
201 201
202/* 202/*
@@ -209,18 +209,20 @@ static void
209xfs_da3_node_read_verify( 209xfs_da3_node_read_verify(
210 struct xfs_buf *bp) 210 struct xfs_buf *bp)
211{ 211{
212 struct xfs_mount *mp = bp->b_target->bt_mount;
213 struct xfs_da_blkinfo *info = bp->b_addr; 212 struct xfs_da_blkinfo *info = bp->b_addr;
214 213
215 switch (be16_to_cpu(info->magic)) { 214 switch (be16_to_cpu(info->magic)) {
216 case XFS_DA3_NODE_MAGIC: 215 case XFS_DA3_NODE_MAGIC:
217 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 216 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
218 XFS_DA3_NODE_CRC_OFF)) 217 xfs_buf_ioerror(bp, EFSBADCRC);
219 break; 218 break;
219 }
220 /* fall through */ 220 /* fall through */
221 case XFS_DA_NODE_MAGIC: 221 case XFS_DA_NODE_MAGIC:
222 if (!xfs_da3_node_verify(bp)) 222 if (!xfs_da3_node_verify(bp)) {
223 xfs_buf_ioerror(bp, EFSCORRUPTED);
223 break; 224 break;
225 }
224 return; 226 return;
225 case XFS_ATTR_LEAF_MAGIC: 227 case XFS_ATTR_LEAF_MAGIC:
226 case XFS_ATTR3_LEAF_MAGIC: 228 case XFS_ATTR3_LEAF_MAGIC:
@@ -237,8 +239,7 @@ xfs_da3_node_read_verify(
237 } 239 }
238 240
239 /* corrupt block */ 241 /* corrupt block */
240 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 242 xfs_verifier_error(bp);
241 xfs_buf_ioerror(bp, EFSCORRUPTED);
242} 243}
243 244
244const struct xfs_buf_ops xfs_da3_node_buf_ops = { 245const struct xfs_buf_ops xfs_da3_node_buf_ops = {
@@ -1295,7 +1296,7 @@ xfs_da3_fixhashpath(
1295 node = blk->bp->b_addr; 1296 node = blk->bp->b_addr;
1296 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1297 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1297 btree = dp->d_ops->node_tree_p(node); 1298 btree = dp->d_ops->node_tree_p(node);
1298 if (be32_to_cpu(btree->hashval) == lasthash) 1299 if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1299 break; 1300 break;
1300 blk->hashval = lasthash; 1301 blk->hashval = lasthash;
1301 btree[blk->index].hashval = cpu_to_be32(lasthash); 1302 btree[blk->index].hashval = cpu_to_be32(lasthash);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index e5869b50dc41..623bbe8fd921 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -89,6 +89,8 @@ typedef struct xfs_dinode {
89 /* structure must be padded to 64 bit alignment */ 89 /* structure must be padded to 64 bit alignment */
90} xfs_dinode_t; 90} xfs_dinode_t;
91 91
92#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc)
93
92#define DI_MAX_FLUSH 0xffff 94#define DI_MAX_FLUSH 0xffff
93 95
94/* 96/*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index ce16ef02997a..fda46253966a 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -180,16 +180,23 @@ xfs_dir_init(
180 xfs_inode_t *dp, 180 xfs_inode_t *dp,
181 xfs_inode_t *pdp) 181 xfs_inode_t *pdp)
182{ 182{
183 xfs_da_args_t args; 183 struct xfs_da_args *args;
184 int error; 184 int error;
185 185
186 memset((char *)&args, 0, sizeof(args));
187 args.dp = dp;
188 args.trans = tp;
189 ASSERT(S_ISDIR(dp->i_d.di_mode)); 186 ASSERT(S_ISDIR(dp->i_d.di_mode));
190 if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) 187 error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
188 if (error)
191 return error; 189 return error;
192 return xfs_dir2_sf_create(&args, pdp->i_ino); 190
191 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
192 if (!args)
193 return ENOMEM;
194
195 args->dp = dp;
196 args->trans = tp;
197 error = xfs_dir2_sf_create(args, pdp->i_ino);
198 kmem_free(args);
199 return error;
193} 200}
194 201
195/* 202/*
@@ -205,41 +212,56 @@ xfs_dir_createname(
205 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 212 xfs_bmap_free_t *flist, /* bmap's freeblock list */
206 xfs_extlen_t total) /* bmap's total block count */ 213 xfs_extlen_t total) /* bmap's total block count */
207{ 214{
208 xfs_da_args_t args; 215 struct xfs_da_args *args;
209 int rval; 216 int rval;
210 int v; /* type-checking value */ 217 int v; /* type-checking value */
211 218
212 ASSERT(S_ISDIR(dp->i_d.di_mode)); 219 ASSERT(S_ISDIR(dp->i_d.di_mode));
213 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 220 rval = xfs_dir_ino_validate(tp->t_mountp, inum);
221 if (rval)
214 return rval; 222 return rval;
215 XFS_STATS_INC(xs_dir_create); 223 XFS_STATS_INC(xs_dir_create);
216 224
217 memset(&args, 0, sizeof(xfs_da_args_t)); 225 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
218 args.name = name->name; 226 if (!args)
219 args.namelen = name->len; 227 return ENOMEM;
220 args.filetype = name->type; 228
221 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 229 args->name = name->name;
222 args.inumber = inum; 230 args->namelen = name->len;
223 args.dp = dp; 231 args->filetype = name->type;
224 args.firstblock = first; 232 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
225 args.flist = flist; 233 args->inumber = inum;
226 args.total = total; 234 args->dp = dp;
227 args.whichfork = XFS_DATA_FORK; 235 args->firstblock = first;
228 args.trans = tp; 236 args->flist = flist;
229 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; 237 args->total = total;
230 238 args->whichfork = XFS_DATA_FORK;
231 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 239 args->trans = tp;
232 rval = xfs_dir2_sf_addname(&args); 240 args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
233 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 241
234 return rval; 242 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
235 else if (v) 243 rval = xfs_dir2_sf_addname(args);
236 rval = xfs_dir2_block_addname(&args); 244 goto out_free;
237 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 245 }
238 return rval; 246
239 else if (v) 247 rval = xfs_dir2_isblock(tp, dp, &v);
240 rval = xfs_dir2_leaf_addname(&args); 248 if (rval)
249 goto out_free;
250 if (v) {
251 rval = xfs_dir2_block_addname(args);
252 goto out_free;
253 }
254
255 rval = xfs_dir2_isleaf(tp, dp, &v);
256 if (rval)
257 goto out_free;
258 if (v)
259 rval = xfs_dir2_leaf_addname(args);
241 else 260 else
242 rval = xfs_dir2_node_addname(&args); 261 rval = xfs_dir2_node_addname(args);
262
263out_free:
264 kmem_free(args);
243 return rval; 265 return rval;
244} 266}
245 267
@@ -282,46 +304,66 @@ xfs_dir_lookup(
282 xfs_ino_t *inum, /* out: inode number */ 304 xfs_ino_t *inum, /* out: inode number */
283 struct xfs_name *ci_name) /* out: actual name if CI match */ 305 struct xfs_name *ci_name) /* out: actual name if CI match */
284{ 306{
285 xfs_da_args_t args; 307 struct xfs_da_args *args;
286 int rval; 308 int rval;
287 int v; /* type-checking value */ 309 int v; /* type-checking value */
288 310
289 ASSERT(S_ISDIR(dp->i_d.di_mode)); 311 ASSERT(S_ISDIR(dp->i_d.di_mode));
290 XFS_STATS_INC(xs_dir_lookup); 312 XFS_STATS_INC(xs_dir_lookup);
291 313
292 memset(&args, 0, sizeof(xfs_da_args_t)); 314 /*
293 args.name = name->name; 315 * We need to use KM_NOFS here so that lockdep will not throw false
294 args.namelen = name->len; 316 * positive deadlock warnings on a non-transactional lookup path. It is
295 args.filetype = name->type; 317 * safe to recurse into inode recalim in that case, but lockdep can't
296 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 318 * easily be taught about it. Hence KM_NOFS avoids having to add more
297 args.dp = dp; 319 * lockdep Doing this avoids having to add a bunch of lockdep class
298 args.whichfork = XFS_DATA_FORK; 320 * annotations into the reclaim path for the ilock.
299 args.trans = tp; 321 */
300 args.op_flags = XFS_DA_OP_OKNOENT; 322 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
323 args->name = name->name;
324 args->namelen = name->len;
325 args->filetype = name->type;
326 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
327 args->dp = dp;
328 args->whichfork = XFS_DATA_FORK;
329 args->trans = tp;
330 args->op_flags = XFS_DA_OP_OKNOENT;
301 if (ci_name) 331 if (ci_name)
302 args.op_flags |= XFS_DA_OP_CILOOKUP; 332 args->op_flags |= XFS_DA_OP_CILOOKUP;
303 333
304 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 334 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
305 rval = xfs_dir2_sf_lookup(&args); 335 rval = xfs_dir2_sf_lookup(args);
306 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 336 goto out_check_rval;
307 return rval; 337 }
308 else if (v) 338
309 rval = xfs_dir2_block_lookup(&args); 339 rval = xfs_dir2_isblock(tp, dp, &v);
310 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 340 if (rval)
311 return rval; 341 goto out_free;
312 else if (v) 342 if (v) {
313 rval = xfs_dir2_leaf_lookup(&args); 343 rval = xfs_dir2_block_lookup(args);
344 goto out_check_rval;
345 }
346
347 rval = xfs_dir2_isleaf(tp, dp, &v);
348 if (rval)
349 goto out_free;
350 if (v)
351 rval = xfs_dir2_leaf_lookup(args);
314 else 352 else
315 rval = xfs_dir2_node_lookup(&args); 353 rval = xfs_dir2_node_lookup(args);
354
355out_check_rval:
316 if (rval == EEXIST) 356 if (rval == EEXIST)
317 rval = 0; 357 rval = 0;
318 if (!rval) { 358 if (!rval) {
319 *inum = args.inumber; 359 *inum = args->inumber;
320 if (ci_name) { 360 if (ci_name) {
321 ci_name->name = args.value; 361 ci_name->name = args->value;
322 ci_name->len = args.valuelen; 362 ci_name->len = args->valuelen;
323 } 363 }
324 } 364 }
365out_free:
366 kmem_free(args);
325 return rval; 367 return rval;
326} 368}
327 369
@@ -338,38 +380,51 @@ xfs_dir_removename(
338 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 380 xfs_bmap_free_t *flist, /* bmap's freeblock list */
339 xfs_extlen_t total) /* bmap's total block count */ 381 xfs_extlen_t total) /* bmap's total block count */
340{ 382{
341 xfs_da_args_t args; 383 struct xfs_da_args *args;
342 int rval; 384 int rval;
343 int v; /* type-checking value */ 385 int v; /* type-checking value */
344 386
345 ASSERT(S_ISDIR(dp->i_d.di_mode)); 387 ASSERT(S_ISDIR(dp->i_d.di_mode));
346 XFS_STATS_INC(xs_dir_remove); 388 XFS_STATS_INC(xs_dir_remove);
347 389
348 memset(&args, 0, sizeof(xfs_da_args_t)); 390 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
349 args.name = name->name; 391 if (!args)
350 args.namelen = name->len; 392 return ENOMEM;
351 args.filetype = name->type; 393
352 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 394 args->name = name->name;
353 args.inumber = ino; 395 args->namelen = name->len;
354 args.dp = dp; 396 args->filetype = name->type;
355 args.firstblock = first; 397 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
356 args.flist = flist; 398 args->inumber = ino;
357 args.total = total; 399 args->dp = dp;
358 args.whichfork = XFS_DATA_FORK; 400 args->firstblock = first;
359 args.trans = tp; 401 args->flist = flist;
360 402 args->total = total;
361 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 403 args->whichfork = XFS_DATA_FORK;
362 rval = xfs_dir2_sf_removename(&args); 404 args->trans = tp;
363 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 405
364 return rval; 406 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
365 else if (v) 407 rval = xfs_dir2_sf_removename(args);
366 rval = xfs_dir2_block_removename(&args); 408 goto out_free;
367 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 409 }
368 return rval; 410
369 else if (v) 411 rval = xfs_dir2_isblock(tp, dp, &v);
370 rval = xfs_dir2_leaf_removename(&args); 412 if (rval)
413 goto out_free;
414 if (v) {
415 rval = xfs_dir2_block_removename(args);
416 goto out_free;
417 }
418
419 rval = xfs_dir2_isleaf(tp, dp, &v);
420 if (rval)
421 goto out_free;
422 if (v)
423 rval = xfs_dir2_leaf_removename(args);
371 else 424 else
372 rval = xfs_dir2_node_removename(&args); 425 rval = xfs_dir2_node_removename(args);
426out_free:
427 kmem_free(args);
373 return rval; 428 return rval;
374} 429}
375 430
@@ -386,40 +441,54 @@ xfs_dir_replace(
386 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 441 xfs_bmap_free_t *flist, /* bmap's freeblock list */
387 xfs_extlen_t total) /* bmap's total block count */ 442 xfs_extlen_t total) /* bmap's total block count */
388{ 443{
389 xfs_da_args_t args; 444 struct xfs_da_args *args;
390 int rval; 445 int rval;
391 int v; /* type-checking value */ 446 int v; /* type-checking value */
392 447
393 ASSERT(S_ISDIR(dp->i_d.di_mode)); 448 ASSERT(S_ISDIR(dp->i_d.di_mode));
394 449
395 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 450 rval = xfs_dir_ino_validate(tp->t_mountp, inum);
451 if (rval)
396 return rval; 452 return rval;
397 453
398 memset(&args, 0, sizeof(xfs_da_args_t)); 454 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
399 args.name = name->name; 455 if (!args)
400 args.namelen = name->len; 456 return ENOMEM;
401 args.filetype = name->type; 457
402 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 458 args->name = name->name;
403 args.inumber = inum; 459 args->namelen = name->len;
404 args.dp = dp; 460 args->filetype = name->type;
405 args.firstblock = first; 461 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
406 args.flist = flist; 462 args->inumber = inum;
407 args.total = total; 463 args->dp = dp;
408 args.whichfork = XFS_DATA_FORK; 464 args->firstblock = first;
409 args.trans = tp; 465 args->flist = flist;
410 466 args->total = total;
411 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 467 args->whichfork = XFS_DATA_FORK;
412 rval = xfs_dir2_sf_replace(&args); 468 args->trans = tp;
413 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 469
414 return rval; 470 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
415 else if (v) 471 rval = xfs_dir2_sf_replace(args);
416 rval = xfs_dir2_block_replace(&args); 472 goto out_free;
417 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 473 }
418 return rval; 474
419 else if (v) 475 rval = xfs_dir2_isblock(tp, dp, &v);
420 rval = xfs_dir2_leaf_replace(&args); 476 if (rval)
477 goto out_free;
478 if (v) {
479 rval = xfs_dir2_block_replace(args);
480 goto out_free;
481 }
482
483 rval = xfs_dir2_isleaf(tp, dp, &v);
484 if (rval)
485 goto out_free;
486 if (v)
487 rval = xfs_dir2_leaf_replace(args);
421 else 488 else
422 rval = xfs_dir2_node_replace(&args); 489 rval = xfs_dir2_node_replace(args);
490out_free:
491 kmem_free(args);
423 return rval; 492 return rval;
424} 493}
425 494
@@ -434,7 +503,7 @@ xfs_dir_canenter(
434 struct xfs_name *name, /* name of entry to add */ 503 struct xfs_name *name, /* name of entry to add */
435 uint resblks) 504 uint resblks)
436{ 505{
437 xfs_da_args_t args; 506 struct xfs_da_args *args;
438 int rval; 507 int rval;
439 int v; /* type-checking value */ 508 int v; /* type-checking value */
440 509
@@ -443,29 +512,42 @@ xfs_dir_canenter(
443 512
444 ASSERT(S_ISDIR(dp->i_d.di_mode)); 513 ASSERT(S_ISDIR(dp->i_d.di_mode));
445 514
446 memset(&args, 0, sizeof(xfs_da_args_t)); 515 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
447 args.name = name->name; 516 if (!args)
448 args.namelen = name->len; 517 return ENOMEM;
449 args.filetype = name->type; 518
450 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 519 args->name = name->name;
451 args.dp = dp; 520 args->namelen = name->len;
452 args.whichfork = XFS_DATA_FORK; 521 args->filetype = name->type;
453 args.trans = tp; 522 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
454 args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | 523 args->dp = dp;
524 args->whichfork = XFS_DATA_FORK;
525 args->trans = tp;
526 args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
455 XFS_DA_OP_OKNOENT; 527 XFS_DA_OP_OKNOENT;
456 528
457 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 529 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
458 rval = xfs_dir2_sf_addname(&args); 530 rval = xfs_dir2_sf_addname(args);
459 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 531 goto out_free;
460 return rval; 532 }
461 else if (v) 533
462 rval = xfs_dir2_block_addname(&args); 534 rval = xfs_dir2_isblock(tp, dp, &v);
463 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 535 if (rval)
464 return rval; 536 goto out_free;
465 else if (v) 537 if (v) {
466 rval = xfs_dir2_leaf_addname(&args); 538 rval = xfs_dir2_block_addname(args);
539 goto out_free;
540 }
541
542 rval = xfs_dir2_isleaf(tp, dp, &v);
543 if (rval)
544 goto out_free;
545 if (v)
546 rval = xfs_dir2_leaf_addname(args);
467 else 547 else
468 rval = xfs_dir2_node_addname(&args); 548 rval = xfs_dir2_node_addname(args);
549out_free:
550 kmem_free(args);
469 return rval; 551 return rval;
470} 552}
471 553
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 90cdbf4b5f19..4f6a38cb83a4 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -89,13 +89,14 @@ xfs_dir3_block_read_verify(
89{ 89{
90 struct xfs_mount *mp = bp->b_target->bt_mount; 90 struct xfs_mount *mp = bp->b_target->bt_mount;
91 91
92 if ((xfs_sb_version_hascrc(&mp->m_sb) && 92 if (xfs_sb_version_hascrc(&mp->m_sb) &&
93 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 93 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
94 XFS_DIR3_DATA_CRC_OFF)) || 94 xfs_buf_ioerror(bp, EFSBADCRC);
95 !xfs_dir3_block_verify(bp)) { 95 else if (!xfs_dir3_block_verify(bp))
96 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
97 xfs_buf_ioerror(bp, EFSCORRUPTED); 96 xfs_buf_ioerror(bp, EFSCORRUPTED);
98 } 97
98 if (bp->b_error)
99 xfs_verifier_error(bp);
99} 100}
100 101
101static void 102static void
@@ -107,8 +108,8 @@ xfs_dir3_block_write_verify(
107 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 108 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
108 109
109 if (!xfs_dir3_block_verify(bp)) { 110 if (!xfs_dir3_block_verify(bp)) {
110 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
111 xfs_buf_ioerror(bp, EFSCORRUPTED); 111 xfs_buf_ioerror(bp, EFSCORRUPTED);
112 xfs_verifier_error(bp);
112 return; 113 return;
113 } 114 }
114 115
@@ -118,7 +119,7 @@ xfs_dir3_block_write_verify(
118 if (bip) 119 if (bip)
119 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); 120 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
120 121
121 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); 122 xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
122} 123}
123 124
124const struct xfs_buf_ops xfs_dir3_block_buf_ops = { 125const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 70acff4ee173..afa4ad523f3f 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -241,7 +241,6 @@ static void
241xfs_dir3_data_reada_verify( 241xfs_dir3_data_reada_verify(
242 struct xfs_buf *bp) 242 struct xfs_buf *bp)
243{ 243{
244 struct xfs_mount *mp = bp->b_target->bt_mount;
245 struct xfs_dir2_data_hdr *hdr = bp->b_addr; 244 struct xfs_dir2_data_hdr *hdr = bp->b_addr;
246 245
247 switch (hdr->magic) { 246 switch (hdr->magic) {
@@ -255,8 +254,8 @@ xfs_dir3_data_reada_verify(
255 xfs_dir3_data_verify(bp); 254 xfs_dir3_data_verify(bp);
256 return; 255 return;
257 default: 256 default:
258 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
259 xfs_buf_ioerror(bp, EFSCORRUPTED); 257 xfs_buf_ioerror(bp, EFSCORRUPTED);
258 xfs_verifier_error(bp);
260 break; 259 break;
261 } 260 }
262} 261}
@@ -267,13 +266,14 @@ xfs_dir3_data_read_verify(
267{ 266{
268 struct xfs_mount *mp = bp->b_target->bt_mount; 267 struct xfs_mount *mp = bp->b_target->bt_mount;
269 268
270 if ((xfs_sb_version_hascrc(&mp->m_sb) && 269 if (xfs_sb_version_hascrc(&mp->m_sb) &&
271 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 270 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
272 XFS_DIR3_DATA_CRC_OFF)) || 271 xfs_buf_ioerror(bp, EFSBADCRC);
273 !xfs_dir3_data_verify(bp)) { 272 else if (!xfs_dir3_data_verify(bp))
274 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
275 xfs_buf_ioerror(bp, EFSCORRUPTED); 273 xfs_buf_ioerror(bp, EFSCORRUPTED);
276 } 274
275 if (bp->b_error)
276 xfs_verifier_error(bp);
277} 277}
278 278
279static void 279static void
@@ -285,8 +285,8 @@ xfs_dir3_data_write_verify(
285 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 285 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
286 286
287 if (!xfs_dir3_data_verify(bp)) { 287 if (!xfs_dir3_data_verify(bp)) {
288 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
289 xfs_buf_ioerror(bp, EFSCORRUPTED); 288 xfs_buf_ioerror(bp, EFSCORRUPTED);
289 xfs_verifier_error(bp);
290 return; 290 return;
291 } 291 }
292 292
@@ -296,7 +296,7 @@ xfs_dir3_data_write_verify(
296 if (bip) 296 if (bip)
297 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); 297 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
298 298
299 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); 299 xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
300} 300}
301 301
302const struct xfs_buf_ops xfs_dir3_data_buf_ops = { 302const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae47ec6e16c4..d36e97df1187 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -179,13 +179,14 @@ __read_verify(
179{ 179{
180 struct xfs_mount *mp = bp->b_target->bt_mount; 180 struct xfs_mount *mp = bp->b_target->bt_mount;
181 181
182 if ((xfs_sb_version_hascrc(&mp->m_sb) && 182 if (xfs_sb_version_hascrc(&mp->m_sb) &&
183 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 183 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
184 XFS_DIR3_LEAF_CRC_OFF)) || 184 xfs_buf_ioerror(bp, EFSBADCRC);
185 !xfs_dir3_leaf_verify(bp, magic)) { 185 else if (!xfs_dir3_leaf_verify(bp, magic))
186 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
187 xfs_buf_ioerror(bp, EFSCORRUPTED); 186 xfs_buf_ioerror(bp, EFSCORRUPTED);
188 } 187
188 if (bp->b_error)
189 xfs_verifier_error(bp);
189} 190}
190 191
191static void 192static void
@@ -198,8 +199,8 @@ __write_verify(
198 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 199 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
199 200
200 if (!xfs_dir3_leaf_verify(bp, magic)) { 201 if (!xfs_dir3_leaf_verify(bp, magic)) {
201 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
202 xfs_buf_ioerror(bp, EFSCORRUPTED); 202 xfs_buf_ioerror(bp, EFSCORRUPTED);
203 xfs_verifier_error(bp);
203 return; 204 return;
204 } 205 }
205 206
@@ -209,7 +210,7 @@ __write_verify(
209 if (bip) 210 if (bip)
210 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 211 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
211 212
212 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); 213 xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
213} 214}
214 215
215static void 216static void
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 48c7d18f68c3..cb434d732681 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -115,13 +115,14 @@ xfs_dir3_free_read_verify(
115{ 115{
116 struct xfs_mount *mp = bp->b_target->bt_mount; 116 struct xfs_mount *mp = bp->b_target->bt_mount;
117 117
118 if ((xfs_sb_version_hascrc(&mp->m_sb) && 118 if (xfs_sb_version_hascrc(&mp->m_sb) &&
119 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 119 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
120 XFS_DIR3_FREE_CRC_OFF)) || 120 xfs_buf_ioerror(bp, EFSBADCRC);
121 !xfs_dir3_free_verify(bp)) { 121 else if (!xfs_dir3_free_verify(bp))
122 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
123 xfs_buf_ioerror(bp, EFSCORRUPTED); 122 xfs_buf_ioerror(bp, EFSCORRUPTED);
124 } 123
124 if (bp->b_error)
125 xfs_verifier_error(bp);
125} 126}
126 127
127static void 128static void
@@ -133,8 +134,8 @@ xfs_dir3_free_write_verify(
133 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 134 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
134 135
135 if (!xfs_dir3_free_verify(bp)) { 136 if (!xfs_dir3_free_verify(bp)) {
136 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
137 xfs_buf_ioerror(bp, EFSCORRUPTED); 137 xfs_buf_ioerror(bp, EFSCORRUPTED);
138 xfs_verifier_error(bp);
138 return; 139 return;
139 } 140 }
140 141
@@ -144,7 +145,7 @@ xfs_dir3_free_write_verify(
144 if (bip) 145 if (bip)
145 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); 146 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
146 147
147 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); 148 xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
148} 149}
149 150
150const struct xfs_buf_ops xfs_dir3_free_buf_ops = { 151const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7aeb4c895b32..868b19f096bf 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -615,7 +615,7 @@ xfs_qm_dqread(
615 615
616 if (flags & XFS_QMOPT_DQALLOC) { 616 if (flags & XFS_QMOPT_DQALLOC) {
617 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 617 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
618 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm, 618 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
619 XFS_QM_DQALLOC_SPACE_RES(mp), 0); 619 XFS_QM_DQALLOC_SPACE_RES(mp), 0);
620 if (error) 620 if (error)
621 goto error1; 621 goto error1;
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
index d401457d2f25..610da8177737 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c
@@ -257,10 +257,13 @@ xfs_dquot_buf_read_verify(
257{ 257{
258 struct xfs_mount *mp = bp->b_target->bt_mount; 258 struct xfs_mount *mp = bp->b_target->bt_mount;
259 259
260 if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { 260 if (!xfs_dquot_buf_verify_crc(mp, bp))
261 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 261 xfs_buf_ioerror(bp, EFSBADCRC);
262 else if (!xfs_dquot_buf_verify(mp, bp))
262 xfs_buf_ioerror(bp, EFSCORRUPTED); 263 xfs_buf_ioerror(bp, EFSCORRUPTED);
263 } 264
265 if (bp->b_error)
266 xfs_verifier_error(bp);
264} 267}
265 268
266/* 269/*
@@ -275,8 +278,8 @@ xfs_dquot_buf_write_verify(
275 struct xfs_mount *mp = bp->b_target->bt_mount; 278 struct xfs_mount *mp = bp->b_target->bt_mount;
276 279
277 if (!xfs_dquot_buf_verify(mp, bp)) { 280 if (!xfs_dquot_buf_verify(mp, bp)) {
278 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
279 xfs_buf_ioerror(bp, EFSCORRUPTED); 281 xfs_buf_ioerror(bp, EFSCORRUPTED);
282 xfs_verifier_error(bp);
280 return; 283 return;
281 } 284 }
282} 285}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 9995b807d627..edac5b057d28 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -156,7 +156,7 @@ xfs_error_report(
156{ 156{
157 if (level <= xfs_error_level) { 157 if (level <= xfs_error_level) {
158 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 158 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
159 "Internal error %s at line %d of file %s. Caller 0x%p", 159 "Internal error %s at line %d of file %s. Caller %pF",
160 tag, linenum, filename, ra); 160 tag, linenum, filename, ra);
161 161
162 xfs_stack_trace(); 162 xfs_stack_trace();
@@ -178,3 +178,28 @@ xfs_corruption_error(
178 xfs_error_report(tag, level, mp, filename, linenum, ra); 178 xfs_error_report(tag, level, mp, filename, linenum, ra);
179 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); 179 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
180} 180}
181
182/*
183 * Warnings specifically for verifier errors. Differentiate CRC vs. invalid
184 * values, and omit the stack trace unless the error level is tuned high.
185 */
186void
187xfs_verifier_error(
188 struct xfs_buf *bp)
189{
190 struct xfs_mount *mp = bp->b_target->bt_mount;
191
192 xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
193 bp->b_error == EFSBADCRC ? "CRC error" : "corruption",
194 __return_address, bp->b_bn);
195
196 xfs_alert(mp, "Unmount and run xfs_repair");
197
198 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
199 xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
200 xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
201 }
202
203 if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
204 xfs_stack_trace();
205}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 079a367f44ee..c1c57d4a4b5d 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
34extern void xfs_corruption_error(const char *tag, int level, 34extern void xfs_corruption_error(const char *tag, int level,
35 struct xfs_mount *mp, void *p, const char *filename, 35 struct xfs_mount *mp, void *p, const char *filename,
36 int linenum, inst_t *ra); 36 int linenum, inst_t *ra);
37extern void xfs_verifier_error(struct xfs_buf *bp);
37 38
38#define XFS_ERROR_REPORT(e, lvl, mp) \ 39#define XFS_ERROR_REPORT(e, lvl, mp) \
39 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) 40 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 64b48eade91d..f7abff8c16ca 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,8 @@ xfs_file_fallocate(
823 823
824 if (!S_ISREG(inode->i_mode)) 824 if (!S_ISREG(inode->i_mode))
825 return -EINVAL; 825 return -EINVAL;
826 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 826 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
827 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
827 return -EOPNOTSUPP; 828 return -EOPNOTSUPP;
828 829
829 xfs_ilock(ip, XFS_IOLOCK_EXCL); 830 xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@ xfs_file_fallocate(
831 error = xfs_free_file_space(ip, offset, len); 832 error = xfs_free_file_space(ip, offset, len);
832 if (error) 833 if (error)
833 goto out_unlock; 834 goto out_unlock;
835 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
836 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
837
838 if (offset & blksize_mask || len & blksize_mask) {
839 error = -EINVAL;
840 goto out_unlock;
841 }
842
843 ASSERT(offset + len < i_size_read(inode));
844 new_size = i_size_read(inode) - len;
845
846 error = xfs_collapse_file_space(ip, offset, len);
847 if (error)
848 goto out_unlock;
834 } else { 849 } else {
835 if (!(mode & FALLOC_FL_KEEP_SIZE) && 850 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
836 offset + len > i_size_read(inode)) { 851 offset + len > i_size_read(inode)) {
@@ -840,8 +855,11 @@ xfs_file_fallocate(
840 goto out_unlock; 855 goto out_unlock;
841 } 856 }
842 857
843 error = xfs_alloc_file_space(ip, offset, len, 858 if (mode & FALLOC_FL_ZERO_RANGE)
844 XFS_BMAPI_PREALLOC); 859 error = xfs_zero_file_space(ip, offset, len);
860 else
861 error = xfs_alloc_file_space(ip, offset, len,
862 XFS_BMAPI_PREALLOC);
845 if (error) 863 if (error)
846 goto out_unlock; 864 goto out_unlock;
847 } 865 }
@@ -859,7 +877,7 @@ xfs_file_fallocate(
859 if (ip->i_d.di_mode & S_IXGRP) 877 if (ip->i_d.di_mode & S_IXGRP)
860 ip->i_d.di_mode &= ~S_ISGID; 878 ip->i_d.di_mode &= ~S_ISGID;
861 879
862 if (!(mode & FALLOC_FL_PUNCH_HOLE)) 880 if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
863 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 881 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
864 882
865 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 883 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index b6ab5a3cfa12..9898f31d05d8 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr {
145 __be64 sl_lsn; 145 __be64 sl_lsn;
146}; 146};
147 147
148#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc)
149
148/* 150/*
149 * The maximum pathlen is 1024 bytes. Since the minimum file system 151 * The maximum pathlen is 1024 bytes. Since the minimum file system
150 * blocksize is 512 bytes, we can get a max of 3 extents back from 152 * blocksize is 512 bytes, we can get a max of 3 extents back from
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5d7f105a1c82..8f711db61a0c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -363,6 +363,18 @@ xfs_ialloc_ag_alloc(
363 args.minleft = args.mp->m_in_maxlevels - 1; 363 args.minleft = args.mp->m_in_maxlevels - 1;
364 if ((error = xfs_alloc_vextent(&args))) 364 if ((error = xfs_alloc_vextent(&args)))
365 return error; 365 return error;
366
367 /*
368 * This request might have dirtied the transaction if the AG can
369 * satisfy the request, but the exact block was not available.
370 * If the allocation did fail, subsequent requests will relax
371 * the exact agbno requirement and increase the alignment
372 * instead. It is critical that the total size of the request
373 * (len + alignment + slop) does not increase from this point
374 * on, so reset minalignslop to ensure it is not included in
375 * subsequent requests.
376 */
377 args.minalignslop = 0;
366 } else 378 } else
367 args.fsbno = NULLFSBLOCK; 379 args.fsbno = NULLFSBLOCK;
368 380
@@ -1568,18 +1580,17 @@ xfs_agi_read_verify(
1568 struct xfs_buf *bp) 1580 struct xfs_buf *bp)
1569{ 1581{
1570 struct xfs_mount *mp = bp->b_target->bt_mount; 1582 struct xfs_mount *mp = bp->b_target->bt_mount;
1571 int agi_ok = 1;
1572
1573 if (xfs_sb_version_hascrc(&mp->m_sb))
1574 agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
1575 offsetof(struct xfs_agi, agi_crc));
1576 agi_ok = agi_ok && xfs_agi_verify(bp);
1577 1583
1578 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1584 if (xfs_sb_version_hascrc(&mp->m_sb) &&
1579 XFS_RANDOM_IALLOC_READ_AGI))) { 1585 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
1580 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 1586 xfs_buf_ioerror(bp, EFSBADCRC);
1587 else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
1588 XFS_ERRTAG_IALLOC_READ_AGI,
1589 XFS_RANDOM_IALLOC_READ_AGI))
1581 xfs_buf_ioerror(bp, EFSCORRUPTED); 1590 xfs_buf_ioerror(bp, EFSCORRUPTED);
1582 } 1591
1592 if (bp->b_error)
1593 xfs_verifier_error(bp);
1583} 1594}
1584 1595
1585static void 1596static void
@@ -1590,8 +1601,8 @@ xfs_agi_write_verify(
1590 struct xfs_buf_log_item *bip = bp->b_fspriv; 1601 struct xfs_buf_log_item *bip = bp->b_fspriv;
1591 1602
1592 if (!xfs_agi_verify(bp)) { 1603 if (!xfs_agi_verify(bp)) {
1593 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
1594 xfs_buf_ioerror(bp, EFSCORRUPTED); 1604 xfs_buf_ioerror(bp, EFSCORRUPTED);
1605 xfs_verifier_error(bp);
1595 return; 1606 return;
1596 } 1607 }
1597 1608
@@ -1600,8 +1611,7 @@ xfs_agi_write_verify(
1600 1611
1601 if (bip) 1612 if (bip)
1602 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); 1613 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
1603 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 1614 xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
1604 offsetof(struct xfs_agi, agi_crc));
1605} 1615}
1606 1616
1607const struct xfs_buf_ops xfs_agi_buf_ops = { 1617const struct xfs_buf_ops xfs_agi_buf_ops = {
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c8fa5bbb36de..7e309b11e87d 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -243,12 +243,14 @@ static void
243xfs_inobt_read_verify( 243xfs_inobt_read_verify(
244 struct xfs_buf *bp) 244 struct xfs_buf *bp)
245{ 245{
246 if (!(xfs_btree_sblock_verify_crc(bp) && 246 if (!xfs_btree_sblock_verify_crc(bp))
247 xfs_inobt_verify(bp))) { 247 xfs_buf_ioerror(bp, EFSBADCRC);
248 trace_xfs_btree_corrupt(bp, _RET_IP_); 248 else if (!xfs_inobt_verify(bp))
249 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
250 bp->b_target->bt_mount, bp->b_addr);
251 xfs_buf_ioerror(bp, EFSCORRUPTED); 249 xfs_buf_ioerror(bp, EFSCORRUPTED);
250
251 if (bp->b_error) {
252 trace_xfs_btree_corrupt(bp, _RET_IP_);
253 xfs_verifier_error(bp);
252 } 254 }
253} 255}
254 256
@@ -258,9 +260,9 @@ xfs_inobt_write_verify(
258{ 260{
259 if (!xfs_inobt_verify(bp)) { 261 if (!xfs_inobt_verify(bp)) {
260 trace_xfs_btree_corrupt(bp, _RET_IP_); 262 trace_xfs_btree_corrupt(bp, _RET_IP_);
261 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
262 bp->b_target->bt_mount, bp->b_addr);
263 xfs_buf_ioerror(bp, EFSCORRUPTED); 263 xfs_buf_ioerror(bp, EFSCORRUPTED);
264 xfs_verifier_error(bp);
265 return;
264 } 266 }
265 xfs_btree_sblock_calc_crc(bp); 267 xfs_btree_sblock_calc_crc(bp);
266 268
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3a137e9f9a7d..5e7a38fa6ee6 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,7 +42,6 @@
42#include "xfs_bmap_util.h" 42#include "xfs_bmap_util.h"
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_dinode.h"
46#include "xfs_filestream.h" 45#include "xfs_filestream.h"
47#include "xfs_cksum.h" 46#include "xfs_cksum.h"
48#include "xfs_trace.h" 47#include "xfs_trace.h"
@@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone;
62 61
63STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 62STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
64 63
64STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
65
65/* 66/*
66 * helper function to extract extent size hint from inode 67 * helper function to extract extent size hint from inode
67 */ 68 */
@@ -1115,7 +1116,7 @@ xfs_bumplink(
1115{ 1116{
1116 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1117 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
1117 1118
1118 ASSERT(ip->i_d.di_nlink > 0); 1119 ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
1119 ip->i_d.di_nlink++; 1120 ip->i_d.di_nlink++;
1120 inc_nlink(VFS_I(ip)); 1121 inc_nlink(VFS_I(ip));
1121 if ((ip->i_d.di_version == 1) && 1122 if ((ip->i_d.di_version == 1) &&
@@ -1165,10 +1166,7 @@ xfs_create(
1165 if (XFS_FORCED_SHUTDOWN(mp)) 1166 if (XFS_FORCED_SHUTDOWN(mp))
1166 return XFS_ERROR(EIO); 1167 return XFS_ERROR(EIO);
1167 1168
1168 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1169 prid = xfs_get_initial_prid(dp);
1169 prid = xfs_get_projid(dp);
1170 else
1171 prid = XFS_PROJID_DEFAULT;
1172 1170
1173 /* 1171 /*
1174 * Make sure that we have allocated dquot(s) on disk. 1172 * Make sure that we have allocated dquot(s) on disk.
@@ -1333,6 +1331,113 @@ xfs_create(
1333} 1331}
1334 1332
1335int 1333int
1334xfs_create_tmpfile(
1335 struct xfs_inode *dp,
1336 struct dentry *dentry,
1337 umode_t mode)
1338{
1339 struct xfs_mount *mp = dp->i_mount;
1340 struct xfs_inode *ip = NULL;
1341 struct xfs_trans *tp = NULL;
1342 int error;
1343 uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1344 prid_t prid;
1345 struct xfs_dquot *udqp = NULL;
1346 struct xfs_dquot *gdqp = NULL;
1347 struct xfs_dquot *pdqp = NULL;
1348 struct xfs_trans_res *tres;
1349 uint resblks;
1350
1351 if (XFS_FORCED_SHUTDOWN(mp))
1352 return XFS_ERROR(EIO);
1353
1354 prid = xfs_get_initial_prid(dp);
1355
1356 /*
1357 * Make sure that we have allocated dquot(s) on disk.
1358 */
1359 error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
1360 xfs_kgid_to_gid(current_fsgid()), prid,
1361 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
1362 &udqp, &gdqp, &pdqp);
1363 if (error)
1364 return error;
1365
1366 resblks = XFS_IALLOC_SPACE_RES(mp);
1367 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
1368
1369 tres = &M_RES(mp)->tr_create_tmpfile;
1370 error = xfs_trans_reserve(tp, tres, resblks, 0);
1371 if (error == ENOSPC) {
1372 /* No space at all so try a "no-allocation" reservation */
1373 resblks = 0;
1374 error = xfs_trans_reserve(tp, tres, 0, 0);
1375 }
1376 if (error) {
1377 cancel_flags = 0;
1378 goto out_trans_cancel;
1379 }
1380
1381 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
1382 pdqp, resblks, 1, 0);
1383 if (error)
1384 goto out_trans_cancel;
1385
1386 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
1387 prid, resblks > 0, &ip, NULL);
1388 if (error) {
1389 if (error == ENOSPC)
1390 goto out_trans_cancel;
1391 goto out_trans_abort;
1392 }
1393
1394 if (mp->m_flags & XFS_MOUNT_WSYNC)
1395 xfs_trans_set_sync(tp);
1396
1397 /*
1398 * Attach the dquot(s) to the inodes and modify them incore.
1399 * These ids of the inode couldn't have changed since the new
1400 * inode has been locked ever since it was created.
1401 */
1402 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1403
1404 ip->i_d.di_nlink--;
1405 d_tmpfile(dentry, VFS_I(ip));
1406 error = xfs_iunlink(tp, ip);
1407 if (error)
1408 goto out_trans_abort;
1409
1410 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1411 if (error)
1412 goto out_release_inode;
1413
1414 xfs_qm_dqrele(udqp);
1415 xfs_qm_dqrele(gdqp);
1416 xfs_qm_dqrele(pdqp);
1417
1418 return 0;
1419
1420 out_trans_abort:
1421 cancel_flags |= XFS_TRANS_ABORT;
1422 out_trans_cancel:
1423 xfs_trans_cancel(tp, cancel_flags);
1424 out_release_inode:
1425 /*
1426 * Wait until after the current transaction is aborted to
1427 * release the inode. This prevents recursive transactions
1428 * and deadlocks from xfs_inactive.
1429 */
1430 if (ip)
1431 IRELE(ip);
1432
1433 xfs_qm_dqrele(udqp);
1434 xfs_qm_dqrele(gdqp);
1435 xfs_qm_dqrele(pdqp);
1436
1437 return error;
1438}
1439
1440int
1336xfs_link( 1441xfs_link(
1337 xfs_inode_t *tdp, 1442 xfs_inode_t *tdp,
1338 xfs_inode_t *sip, 1443 xfs_inode_t *sip,
@@ -1397,6 +1502,12 @@ xfs_link(
1397 1502
1398 xfs_bmap_init(&free_list, &first_block); 1503 xfs_bmap_init(&free_list, &first_block);
1399 1504
1505 if (sip->i_d.di_nlink == 0) {
1506 error = xfs_iunlink_remove(tp, sip);
1507 if (error)
1508 goto abort_return;
1509 }
1510
1400 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1511 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1401 &first_block, &free_list, resblks); 1512 &first_block, &free_list, resblks);
1402 if (error) 1513 if (error)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 65e2350f449c..396cc1fafd0d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,6 +20,7 @@
20 20
21#include "xfs_inode_buf.h" 21#include "xfs_inode_buf.h"
22#include "xfs_inode_fork.h" 22#include "xfs_inode_fork.h"
23#include "xfs_dinode.h"
23 24
24/* 25/*
25 * Kernel only inode definitions 26 * Kernel only inode definitions
@@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip,
192 ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); 193 ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
193} 194}
194 195
196static inline prid_t
197xfs_get_initial_prid(struct xfs_inode *dp)
198{
199 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
200 return xfs_get_projid(dp);
201
202 return XFS_PROJID_DEFAULT;
203}
204
195/* 205/*
196 * In-core inode flags. 206 * In-core inode flags.
197 */ 207 */
@@ -323,6 +333,8 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
323 struct xfs_inode **ipp, struct xfs_name *ci_name); 333 struct xfs_inode **ipp, struct xfs_name *ci_name);
324int xfs_create(struct xfs_inode *dp, struct xfs_name *name, 334int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
325 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); 335 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
336int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
337 umode_t mode);
326int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 338int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
327 struct xfs_inode *ip); 339 struct xfs_inode *ip);
328int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 340int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
index 4fc9f39dd89e..24e993996bdc 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -102,8 +102,7 @@ xfs_inode_buf_verify(
102 } 102 }
103 103
104 xfs_buf_ioerror(bp, EFSCORRUPTED); 104 xfs_buf_ioerror(bp, EFSCORRUPTED);
105 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, 105 xfs_verifier_error(bp);
106 mp, dip);
107#ifdef DEBUG 106#ifdef DEBUG
108 xfs_alert(mp, 107 xfs_alert(mp,
109 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 108 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -306,7 +305,7 @@ xfs_dinode_verify(
306 if (!xfs_sb_version_hascrc(&mp->m_sb)) 305 if (!xfs_sb_version_hascrc(&mp->m_sb))
307 return false; 306 return false;
308 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 307 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
309 offsetof(struct xfs_dinode, di_crc))) 308 XFS_DINODE_CRC_OFF))
310 return false; 309 return false;
311 if (be64_to_cpu(dip->di_ino) != ip->i_ino) 310 if (be64_to_cpu(dip->di_ino) != ip->i_ino)
312 return false; 311 return false;
@@ -327,7 +326,7 @@ xfs_dinode_calc_crc(
327 326
328 ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); 327 ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
329 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, 328 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
330 offsetof(struct xfs_dinode, di_crc)); 329 XFS_DINODE_CRC_OFF);
331 dip->di_crc = xfs_end_cksum(crc); 330 dip->di_crc = xfs_end_cksum(crc);
332} 331}
333 332
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 22d1cbea283d..3b80ebae05f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -128,7 +128,6 @@ xfs_iomap_write_direct(
128 xfs_fsblock_t firstfsb; 128 xfs_fsblock_t firstfsb;
129 xfs_extlen_t extsz, temp; 129 xfs_extlen_t extsz, temp;
130 int nimaps; 130 int nimaps;
131 int bmapi_flag;
132 int quota_flag; 131 int quota_flag;
133 int rt; 132 int rt;
134 xfs_trans_t *tp; 133 xfs_trans_t *tp;
@@ -200,18 +199,15 @@ xfs_iomap_write_direct(
200 199
201 xfs_trans_ijoin(tp, ip, 0); 200 xfs_trans_ijoin(tp, ip, 0);
202 201
203 bmapi_flag = 0;
204 if (offset < XFS_ISIZE(ip) || extsz)
205 bmapi_flag |= XFS_BMAPI_PREALLOC;
206
207 /* 202 /*
208 * From this point onwards we overwrite the imap pointer that the 203 * From this point onwards we overwrite the imap pointer that the
209 * caller gave to us. 204 * caller gave to us.
210 */ 205 */
211 xfs_bmap_init(&free_list, &firstfsb); 206 xfs_bmap_init(&free_list, &firstfsb);
212 nimaps = 1; 207 nimaps = 1;
213 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, 208 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
214 &firstfsb, 0, imap, &nimaps, &free_list); 209 XFS_BMAPI_PREALLOC, &firstfsb, 0,
210 imap, &nimaps, &free_list);
215 if (error) 211 if (error)
216 goto out_bmap_cancel; 212 goto out_bmap_cancel;
217 213
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ddfb8190ca1..89b07e43ca28 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -39,6 +39,7 @@
39#include "xfs_da_btree.h" 39#include "xfs_da_btree.h"
40#include "xfs_dir2_priv.h" 40#include "xfs_dir2_priv.h"
41#include "xfs_dinode.h" 41#include "xfs_dinode.h"
42#include "xfs_trans_space.h"
42 43
43#include <linux/capability.h> 44#include <linux/capability.h>
44#include <linux/xattr.h> 45#include <linux/xattr.h>
@@ -48,6 +49,18 @@
48#include <linux/fiemap.h> 49#include <linux/fiemap.h>
49#include <linux/slab.h> 50#include <linux/slab.h>
50 51
52/*
53 * Directories have different lock order w.r.t. mmap_sem compared to regular
54 * files. This is due to readdir potentially triggering page faults on a user
55 * buffer inside filldir(), and this happens with the ilock on the directory
56 * held. For regular files, the lock order is the other way around - the
57 * mmap_sem is taken during the page fault, and then we lock the ilock to do
58 * block mapping. Hence we need a different class for the directory ilock so
59 * that lockdep can tell them apart.
60 */
61static struct lock_class_key xfs_nondir_ilock_class;
62static struct lock_class_key xfs_dir_ilock_class;
63
51static int 64static int
52xfs_initxattrs( 65xfs_initxattrs(
53 struct inode *inode, 66 struct inode *inode,
@@ -1034,6 +1047,19 @@ xfs_vn_fiemap(
1034 return 0; 1047 return 0;
1035} 1048}
1036 1049
1050STATIC int
1051xfs_vn_tmpfile(
1052 struct inode *dir,
1053 struct dentry *dentry,
1054 umode_t mode)
1055{
1056 int error;
1057
1058 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
1059
1060 return -error;
1061}
1062
1037static const struct inode_operations xfs_inode_operations = { 1063static const struct inode_operations xfs_inode_operations = {
1038 .get_acl = xfs_get_acl, 1064 .get_acl = xfs_get_acl,
1039 .set_acl = xfs_set_acl, 1065 .set_acl = xfs_set_acl,
@@ -1072,6 +1098,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
1072 .removexattr = generic_removexattr, 1098 .removexattr = generic_removexattr,
1073 .listxattr = xfs_vn_listxattr, 1099 .listxattr = xfs_vn_listxattr,
1074 .update_time = xfs_vn_update_time, 1100 .update_time = xfs_vn_update_time,
1101 .tmpfile = xfs_vn_tmpfile,
1075}; 1102};
1076 1103
1077static const struct inode_operations xfs_dir_ci_inode_operations = { 1104static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1099,6 +1126,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
1099 .removexattr = generic_removexattr, 1126 .removexattr = generic_removexattr,
1100 .listxattr = xfs_vn_listxattr, 1127 .listxattr = xfs_vn_listxattr,
1101 .update_time = xfs_vn_update_time, 1128 .update_time = xfs_vn_update_time,
1129 .tmpfile = xfs_vn_tmpfile,
1102}; 1130};
1103 1131
1104static const struct inode_operations xfs_symlink_inode_operations = { 1132static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1191,6 +1219,7 @@ xfs_setup_inode(
1191 xfs_diflags_to_iflags(inode, ip); 1219 xfs_diflags_to_iflags(inode, ip);
1192 1220
1193 ip->d_ops = ip->i_mount->m_nondir_inode_ops; 1221 ip->d_ops = ip->i_mount->m_nondir_inode_ops;
1222 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1194 switch (inode->i_mode & S_IFMT) { 1223 switch (inode->i_mode & S_IFMT) {
1195 case S_IFREG: 1224 case S_IFREG:
1196 inode->i_op = &xfs_inode_operations; 1225 inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1227,7 @@ xfs_setup_inode(
1198 inode->i_mapping->a_ops = &xfs_address_space_operations; 1227 inode->i_mapping->a_ops = &xfs_address_space_operations;
1199 break; 1228 break;
1200 case S_IFDIR: 1229 case S_IFDIR:
1230 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1201 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) 1231 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
1202 inode->i_op = &xfs_dir_ci_inode_operations; 1232 inode->i_op = &xfs_dir_ci_inode_operations;
1203 else 1233 else
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index f9bb590acc0e..825249d2dfc1 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t;
119#include "xfs_iops.h" 119#include "xfs_iops.h"
120#include "xfs_aops.h" 120#include "xfs_aops.h"
121#include "xfs_super.h" 121#include "xfs_super.h"
122#include "xfs_cksum.h"
122#include "xfs_buf.h" 123#include "xfs_buf.h"
123#include "xfs_message.h" 124#include "xfs_message.h"
124 125
@@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t;
178#define ENOATTR ENODATA /* Attribute not found */ 179#define ENOATTR ENODATA /* Attribute not found */
179#define EWRONGFS EINVAL /* Mount with wrong filesystem type */ 180#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
180#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ 181#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
182#define EFSBADCRC EBADMSG /* Bad CRC detected */
181 183
182#define SYNCHRONIZE() barrier() 184#define SYNCHRONIZE() barrier()
183#define __return_address __builtin_return_address(0) 185#define __return_address __builtin_return_address(0)
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index b0f4ef77fa70..2c4004475e71 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -175,7 +175,7 @@ void xlog_iodone(struct xfs_buf *);
175struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 175struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
176void xfs_log_ticket_put(struct xlog_ticket *ticket); 176void xfs_log_ticket_put(struct xlog_ticket *ticket);
177 177
178int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 178void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
179 xfs_lsn_t *commit_lsn, int flags); 179 xfs_lsn_t *commit_lsn, int flags);
180bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 180bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
181 181
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 4ef6fdbced78..7e5455391176 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -499,13 +499,6 @@ xlog_cil_push(
499 cil->xc_ctx = new_ctx; 499 cil->xc_ctx = new_ctx;
500 500
501 /* 501 /*
502 * mirror the new sequence into the cil structure so that we can do
503 * unlocked checks against the current sequence in log forces without
504 * risking deferencing a freed context pointer.
505 */
506 cil->xc_current_sequence = new_ctx->sequence;
507
508 /*
509 * The switch is now done, so we can drop the context lock and move out 502 * The switch is now done, so we can drop the context lock and move out
510 * of a shared context. We can't just go straight to the commit record, 503 * of a shared context. We can't just go straight to the commit record,
511 * though - we need to synchronise with previous and future commits so 504 * though - we need to synchronise with previous and future commits so
@@ -523,8 +516,15 @@ xlog_cil_push(
523 * Hence we need to add this context to the committing context list so 516 * Hence we need to add this context to the committing context list so
524 * that higher sequences will wait for us to write out a commit record 517 * that higher sequences will wait for us to write out a commit record
525 * before they do. 518 * before they do.
519 *
520 * xfs_log_force_lsn requires us to mirror the new sequence into the cil
521 * structure atomically with the addition of this sequence to the
522 * committing list. This also ensures that we can do unlocked checks
523 * against the current sequence in log forces without risking
524 * deferencing a freed context pointer.
526 */ 525 */
527 spin_lock(&cil->xc_push_lock); 526 spin_lock(&cil->xc_push_lock);
527 cil->xc_current_sequence = new_ctx->sequence;
528 list_add(&ctx->committing, &cil->xc_committing); 528 list_add(&ctx->committing, &cil->xc_committing);
529 spin_unlock(&cil->xc_push_lock); 529 spin_unlock(&cil->xc_push_lock);
530 up_write(&cil->xc_ctx_lock); 530 up_write(&cil->xc_ctx_lock);
@@ -662,8 +662,14 @@ xlog_cil_push_background(
662 662
663} 663}
664 664
665/*
666 * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
667 * number that is passed. When it returns, the work will be queued for
668 * @push_seq, but it won't be completed. The caller is expected to do any
669 * waiting for push_seq to complete if it is required.
670 */
665static void 671static void
666xlog_cil_push_foreground( 672xlog_cil_push_now(
667 struct xlog *log, 673 struct xlog *log,
668 xfs_lsn_t push_seq) 674 xfs_lsn_t push_seq)
669{ 675{
@@ -688,10 +694,8 @@ xlog_cil_push_foreground(
688 } 694 }
689 695
690 cil->xc_push_seq = push_seq; 696 cil->xc_push_seq = push_seq;
697 queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
691 spin_unlock(&cil->xc_push_lock); 698 spin_unlock(&cil->xc_push_lock);
692
693 /* do the push now */
694 xlog_cil_push(log);
695} 699}
696 700
697bool 701bool
@@ -721,7 +725,7 @@ xlog_cil_empty(
721 * background commit, returns without it held once background commits are 725 * background commit, returns without it held once background commits are
722 * allowed again. 726 * allowed again.
723 */ 727 */
724int 728void
725xfs_log_commit_cil( 729xfs_log_commit_cil(
726 struct xfs_mount *mp, 730 struct xfs_mount *mp,
727 struct xfs_trans *tp, 731 struct xfs_trans *tp,
@@ -767,7 +771,6 @@ xfs_log_commit_cil(
767 xlog_cil_push_background(log); 771 xlog_cil_push_background(log);
768 772
769 up_read(&cil->xc_ctx_lock); 773 up_read(&cil->xc_ctx_lock);
770 return 0;
771} 774}
772 775
773/* 776/*
@@ -796,7 +799,8 @@ xlog_cil_force_lsn(
796 * xlog_cil_push() handles racing pushes for the same sequence, 799 * xlog_cil_push() handles racing pushes for the same sequence,
797 * so no need to deal with it here. 800 * so no need to deal with it here.
798 */ 801 */
799 xlog_cil_push_foreground(log, sequence); 802restart:
803 xlog_cil_push_now(log, sequence);
800 804
801 /* 805 /*
802 * See if we can find a previous sequence still committing. 806 * See if we can find a previous sequence still committing.
@@ -804,7 +808,6 @@ xlog_cil_force_lsn(
804 * before allowing the force of push_seq to go ahead. Hence block 808 * before allowing the force of push_seq to go ahead. Hence block
805 * on commits for those as well. 809 * on commits for those as well.
806 */ 810 */
807restart:
808 spin_lock(&cil->xc_push_lock); 811 spin_lock(&cil->xc_push_lock);
809 list_for_each_entry(ctx, &cil->xc_committing, committing) { 812 list_for_each_entry(ctx, &cil->xc_committing, committing) {
810 if (ctx->sequence > sequence) 813 if (ctx->sequence > sequence)
@@ -822,6 +825,28 @@ restart:
822 /* found it! */ 825 /* found it! */
823 commit_lsn = ctx->commit_lsn; 826 commit_lsn = ctx->commit_lsn;
824 } 827 }
828
829 /*
830 * The call to xlog_cil_push_now() executes the push in the background.
831 * Hence by the time we have got here it our sequence may not have been
832 * pushed yet. This is true if the current sequence still matches the
833 * push sequence after the above wait loop and the CIL still contains
834 * dirty objects.
835 *
836 * When the push occurs, it will empty the CIL and
837 * atomically increment the currect sequence past the push sequence and
838 * move it into the committing list. Of course, if the CIL is clean at
839 * the time of the push, it won't have pushed the CIL at all, so in that
840 * case we should try the push for this sequence again from the start
841 * just in case.
842 */
843
844 if (sequence == cil->xc_current_sequence &&
845 !list_empty(&cil->xc_cil)) {
846 spin_unlock(&cil->xc_push_lock);
847 goto restart;
848 }
849
825 spin_unlock(&cil->xc_push_lock); 850 spin_unlock(&cil->xc_push_lock);
826 return commit_lsn; 851 return commit_lsn;
827} 852}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f96c05669a9e..993cb19e7d39 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -314,6 +314,9 @@ reread:
314 error = bp->b_error; 314 error = bp->b_error;
315 if (loud) 315 if (loud)
316 xfs_warn(mp, "SB validate failed with error %d.", error); 316 xfs_warn(mp, "SB validate failed with error %d.", error);
317 /* bad CRC means corrupted metadata */
318 if (error == EFSBADCRC)
319 error = EFSCORRUPTED;
317 goto release_buf; 320 goto release_buf;
318 } 321 }
319 322
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a6a76b2b6a85..ec5ca65c6211 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -842,7 +842,7 @@ xfs_growfs_rt_alloc(
842 /* 842 /*
843 * Reserve space & log for one extent added to the file. 843 * Reserve space & log for one extent added to the file.
844 */ 844 */
845 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, 845 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
846 resblks, 0); 846 resblks, 0);
847 if (error) 847 if (error)
848 goto error_cancel; 848 goto error_cancel;
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 1e116794bb66..0c0e41bbe4e3 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -288,6 +288,7 @@ xfs_mount_validate_sb(
288 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 288 sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
289 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 289 sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
290 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 290 sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
291 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
291 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 292 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
292 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 293 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
293 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 294 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
@@ -610,12 +611,11 @@ xfs_sb_read_verify(
610 XFS_SB_VERSION_5) || 611 XFS_SB_VERSION_5) ||
611 dsb->sb_crc != 0)) { 612 dsb->sb_crc != 0)) {
612 613
613 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 614 if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
614 offsetof(struct xfs_sb, sb_crc))) {
615 /* Only fail bad secondaries on a known V5 filesystem */ 615 /* Only fail bad secondaries on a known V5 filesystem */
616 if (bp->b_bn == XFS_SB_DADDR || 616 if (bp->b_bn == XFS_SB_DADDR ||
617 xfs_sb_version_hascrc(&mp->m_sb)) { 617 xfs_sb_version_hascrc(&mp->m_sb)) {
618 error = EFSCORRUPTED; 618 error = EFSBADCRC;
619 goto out_error; 619 goto out_error;
620 } 620 }
621 } 621 }
@@ -624,10 +624,9 @@ xfs_sb_read_verify(
624 624
625out_error: 625out_error:
626 if (error) { 626 if (error) {
627 if (error == EFSCORRUPTED)
628 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
629 mp, bp->b_addr);
630 xfs_buf_ioerror(bp, error); 627 xfs_buf_ioerror(bp, error);
628 if (error == EFSCORRUPTED || error == EFSBADCRC)
629 xfs_verifier_error(bp);
631 } 630 }
632} 631}
633 632
@@ -662,9 +661,8 @@ xfs_sb_write_verify(
662 661
663 error = xfs_sb_verify(bp, false); 662 error = xfs_sb_verify(bp, false);
664 if (error) { 663 if (error) {
665 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
666 mp, bp->b_addr);
667 xfs_buf_ioerror(bp, error); 664 xfs_buf_ioerror(bp, error);
665 xfs_verifier_error(bp);
668 return; 666 return;
669 } 667 }
670 668
@@ -674,8 +672,7 @@ xfs_sb_write_verify(
674 if (bip) 672 if (bip)
675 XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 673 XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
676 674
677 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 675 xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
678 offsetof(struct xfs_sb, sb_crc));
679} 676}
680 677
681const struct xfs_buf_ops xfs_sb_buf_ops = { 678const struct xfs_buf_ops xfs_sb_buf_ops = {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 35061d4b614c..f7b2fe77c5a5 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -182,6 +182,8 @@ typedef struct xfs_sb {
182 /* must be padded to 64 bit alignment */ 182 /* must be padded to 64 bit alignment */
183} xfs_sb_t; 183} xfs_sb_t;
184 184
185#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
186
185/* 187/*
186 * Superblock - on disk version. Must match the in core version above. 188 * Superblock - on disk version. Must match the in core version above.
187 * Must be padded to 64 bit alignment. 189 * Must be padded to 64 bit alignment.
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
index 8c5035a13df1..4484e5151395 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h
@@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
104#define XFS_TRANS_SB_COUNT 41 104#define XFS_TRANS_SB_COUNT 41
105#define XFS_TRANS_CHECKPOINT 42 105#define XFS_TRANS_CHECKPOINT 42
106#define XFS_TRANS_ICREATE 43 106#define XFS_TRANS_ICREATE 43
107#define XFS_TRANS_TYPE_MAX 43 107#define XFS_TRANS_CREATE_TMPFILE 44
108#define XFS_TRANS_TYPE_MAX 44
108/* new transaction types need to be reflected in xfs_logprint(8) */ 109/* new transaction types need to be reflected in xfs_logprint(8) */
109 110
110#define XFS_TRANS_TYPES \ 111#define XFS_TRANS_TYPES \
@@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
112 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ 113 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
113 { XFS_TRANS_INACTIVE, "INACTIVE" }, \ 114 { XFS_TRANS_INACTIVE, "INACTIVE" }, \
114 { XFS_TRANS_CREATE, "CREATE" }, \ 115 { XFS_TRANS_CREATE, "CREATE" }, \
116 { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
115 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ 117 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
116 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ 118 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
117 { XFS_TRANS_REMOVE, "REMOVE" }, \ 119 { XFS_TRANS_REMOVE, "REMOVE" }, \
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 14e58f2c96bd..52979aa90986 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -80,6 +80,10 @@ xfs_readlink_bmap(
80 if (error) { 80 if (error) {
81 xfs_buf_ioerror_alert(bp, __func__); 81 xfs_buf_ioerror_alert(bp, __func__);
82 xfs_buf_relse(bp); 82 xfs_buf_relse(bp);
83
84 /* bad CRC means corrupted metadata */
85 if (error == EFSBADCRC)
86 error = EFSCORRUPTED;
83 goto out; 87 goto out;
84 } 88 }
85 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); 89 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -208,10 +212,7 @@ xfs_symlink(
208 return XFS_ERROR(ENAMETOOLONG); 212 return XFS_ERROR(ENAMETOOLONG);
209 213
210 udqp = gdqp = NULL; 214 udqp = gdqp = NULL;
211 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 215 prid = xfs_get_initial_prid(dp);
212 prid = xfs_get_projid(dp);
213 else
214 prid = XFS_PROJID_DEFAULT;
215 216
216 /* 217 /*
217 * Make sure that we have allocated dquot(s) on disk. 218 * Make sure that we have allocated dquot(s) on disk.
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index bf59a2b45f8c..9b32052ff65e 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -133,12 +133,13 @@ xfs_symlink_read_verify(
133 if (!xfs_sb_version_hascrc(&mp->m_sb)) 133 if (!xfs_sb_version_hascrc(&mp->m_sb))
134 return; 134 return;
135 135
136 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 136 if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
137 offsetof(struct xfs_dsymlink_hdr, sl_crc)) || 137 xfs_buf_ioerror(bp, EFSBADCRC);
138 !xfs_symlink_verify(bp)) { 138 else if (!xfs_symlink_verify(bp))
139 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
140 xfs_buf_ioerror(bp, EFSCORRUPTED); 139 xfs_buf_ioerror(bp, EFSCORRUPTED);
141 } 140
141 if (bp->b_error)
142 xfs_verifier_error(bp);
142} 143}
143 144
144static void 145static void
@@ -153,8 +154,8 @@ xfs_symlink_write_verify(
153 return; 154 return;
154 155
155 if (!xfs_symlink_verify(bp)) { 156 if (!xfs_symlink_verify(bp)) {
156 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
157 xfs_buf_ioerror(bp, EFSCORRUPTED); 157 xfs_buf_ioerror(bp, EFSCORRUPTED);
158 xfs_verifier_error(bp);
158 return; 159 return;
159 } 160 }
160 161
@@ -162,8 +163,7 @@ xfs_symlink_write_verify(
162 struct xfs_dsymlink_hdr *dsl = bp->b_addr; 163 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
163 dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 164 dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
164 } 165 }
165 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 166 xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
166 offsetof(struct xfs_dsymlink_hdr, sl_crc));
167} 167}
168 168
169const struct xfs_buf_ops xfs_symlink_buf_ops = { 169const struct xfs_buf_ops xfs_symlink_buf_ops = {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa45b9a0..a4ae41c179a8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
603DEFINE_INODE_EVENT(xfs_inactive_symlink); 603DEFINE_INODE_EVENT(xfs_inactive_symlink);
604DEFINE_INODE_EVENT(xfs_alloc_file_space); 604DEFINE_INODE_EVENT(xfs_alloc_file_space);
605DEFINE_INODE_EVENT(xfs_free_file_space); 605DEFINE_INODE_EVENT(xfs_free_file_space);
606DEFINE_INODE_EVENT(xfs_collapse_file_space);
606DEFINE_INODE_EVENT(xfs_readdir); 607DEFINE_INODE_EVENT(xfs_readdir);
607#ifdef CONFIG_XFS_POSIX_ACL 608#ifdef CONFIG_XFS_POSIX_ACL
608DEFINE_INODE_EVENT(xfs_get_acl); 609DEFINE_INODE_EVENT(xfs_get_acl);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c812c5c060de..54a57326d85b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -887,12 +887,7 @@ xfs_trans_commit(
887 xfs_trans_apply_sb_deltas(tp); 887 xfs_trans_apply_sb_deltas(tp);
888 xfs_trans_apply_dquot_deltas(tp); 888 xfs_trans_apply_dquot_deltas(tp);
889 889
890 error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); 890 xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
891 if (error == ENOMEM) {
892 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
893 error = XFS_ERROR(EIO);
894 goto out_unreserve;
895 }
896 891
897 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 892 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
898 xfs_trans_free(tp); 893 xfs_trans_free(tp);
@@ -902,10 +897,7 @@ xfs_trans_commit(
902 * log out now and wait for it. 897 * log out now and wait for it.
903 */ 898 */
904 if (sync) { 899 if (sync) {
905 if (!error) { 900 error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
906 error = _xfs_log_force_lsn(mp, commit_lsn,
907 XFS_LOG_SYNC, NULL);
908 }
909 XFS_STATS_INC(xs_trans_sync); 901 XFS_STATS_INC(xs_trans_sync);
910 } else { 902 } else {
911 XFS_STATS_INC(xs_trans_async); 903 XFS_STATS_INC(xs_trans_async);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 647b6f1d8923..b8eef0549f3f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -275,6 +275,10 @@ xfs_trans_read_buf_map(
275 XFS_BUF_UNDONE(bp); 275 XFS_BUF_UNDONE(bp);
276 xfs_buf_stale(bp); 276 xfs_buf_stale(bp);
277 xfs_buf_relse(bp); 277 xfs_buf_relse(bp);
278
279 /* bad CRC means corrupted metadata */
280 if (error == EFSBADCRC)
281 error = EFSCORRUPTED;
278 return error; 282 return error;
279 } 283 }
280#ifdef DEBUG 284#ifdef DEBUG
@@ -338,6 +342,9 @@ xfs_trans_read_buf_map(
338 if (tp->t_flags & XFS_TRANS_DIRTY) 342 if (tp->t_flags & XFS_TRANS_DIRTY)
339 xfs_force_shutdown(tp->t_mountp, 343 xfs_force_shutdown(tp->t_mountp,
340 SHUTDOWN_META_IO_ERROR); 344 SHUTDOWN_META_IO_ERROR);
345 /* bad CRC means corrupted metadata */
346 if (error == EFSBADCRC)
347 error = EFSCORRUPTED;
341 return error; 348 return error;
342 } 349 }
343 } 350 }
@@ -375,6 +382,10 @@ xfs_trans_read_buf_map(
375 if (tp->t_flags & XFS_TRANS_DIRTY) 382 if (tp->t_flags & XFS_TRANS_DIRTY)
376 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); 383 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
377 xfs_buf_relse(bp); 384 xfs_buf_relse(bp);
385
386 /* bad CRC means corrupted metadata */
387 if (error == EFSBADCRC)
388 error = EFSCORRUPTED;
378 return error; 389 return error;
379 } 390 }
380#ifdef DEBUG 391#ifdef DEBUG
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index 2ffd3e331b49..ae368165244d 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -81,20 +81,28 @@ xfs_calc_buf_res(
81 * on disk. Hence we need an inode reservation function that calculates all this 81 * on disk. Hence we need an inode reservation function that calculates all this
82 * correctly. So, we log: 82 * correctly. So, we log:
83 * 83 *
84 * - log op headers for object 84 * - 4 log op headers for object
85 * - for the ilf, the inode core and 2 forks
85 * - inode log format object 86 * - inode log format object
86 * - the entire inode contents (core + 2 forks) 87 * - the inode core
87 * - two bmap btree block headers 88 * - two inode forks containing bmap btree root blocks.
89 * - the btree data contained by both forks will fit into the inode size,
90 * hence when combined with the inode core above, we have a total of the
91 * actual inode size.
92 * - the BMBT headers need to be accounted separately, as they are
93 * additional to the records and pointers that fit inside the inode
94 * forks.
88 */ 95 */
89STATIC uint 96STATIC uint
90xfs_calc_inode_res( 97xfs_calc_inode_res(
91 struct xfs_mount *mp, 98 struct xfs_mount *mp,
92 uint ninodes) 99 uint ninodes)
93{ 100{
94 return ninodes * (sizeof(struct xlog_op_header) + 101 return ninodes *
95 sizeof(struct xfs_inode_log_format) + 102 (4 * sizeof(struct xlog_op_header) +
96 mp->m_sb.sb_inodesize + 103 sizeof(struct xfs_inode_log_format) +
97 2 * XFS_BMBT_BLOCK_LEN(mp)); 104 mp->m_sb.sb_inodesize +
105 2 * XFS_BMBT_BLOCK_LEN(mp));
98} 106}
99 107
100/* 108/*
@@ -204,6 +212,19 @@ xfs_calc_rename_reservation(
204} 212}
205 213
206/* 214/*
215 * For removing an inode from unlinked list at first, we can modify:
216 * the agi hash list and counters: sector size
217 * the on disk inode before ours in the agi hash list: inode cluster size
218 */
219STATIC uint
220xfs_calc_iunlink_remove_reservation(
221 struct xfs_mount *mp)
222{
223 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
224 max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
225}
226
227/*
207 * For creating a link to an inode: 228 * For creating a link to an inode:
208 * the parent directory inode: inode size 229 * the parent directory inode: inode size
209 * the linked inode: inode size 230 * the linked inode: inode size
@@ -220,6 +241,7 @@ xfs_calc_link_reservation(
220 struct xfs_mount *mp) 241 struct xfs_mount *mp)
221{ 242{
222 return XFS_DQUOT_LOGRES(mp) + 243 return XFS_DQUOT_LOGRES(mp) +
244 xfs_calc_iunlink_remove_reservation(mp) +
223 MAX((xfs_calc_inode_res(mp, 2) + 245 MAX((xfs_calc_inode_res(mp, 2) +
224 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 246 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
225 XFS_FSB_TO_B(mp, 1))), 247 XFS_FSB_TO_B(mp, 1))),
@@ -229,6 +251,18 @@ xfs_calc_link_reservation(
229} 251}
230 252
231/* 253/*
254 * For adding an inode to unlinked list we can modify:
255 * the agi hash list: sector size
256 * the unlinked inode: inode size
257 */
258STATIC uint
259xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
260{
261 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
262 xfs_calc_inode_res(mp, 1);
263}
264
265/*
232 * For removing a directory entry we can modify: 266 * For removing a directory entry we can modify:
233 * the parent directory inode: inode size 267 * the parent directory inode: inode size
234 * the removed inode: inode size 268 * the removed inode: inode size
@@ -245,10 +279,11 @@ xfs_calc_remove_reservation(
245 struct xfs_mount *mp) 279 struct xfs_mount *mp)
246{ 280{
247 return XFS_DQUOT_LOGRES(mp) + 281 return XFS_DQUOT_LOGRES(mp) +
248 MAX((xfs_calc_inode_res(mp, 2) + 282 xfs_calc_iunlink_add_reservation(mp) +
283 MAX((xfs_calc_inode_res(mp, 1) +
249 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 284 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
250 XFS_FSB_TO_B(mp, 1))), 285 XFS_FSB_TO_B(mp, 1))),
251 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 286 (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
252 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 287 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
253 XFS_FSB_TO_B(mp, 1)))); 288 XFS_FSB_TO_B(mp, 1))));
254} 289}
@@ -343,6 +378,20 @@ xfs_calc_create_reservation(
343 378
344} 379}
345 380
381STATIC uint
382xfs_calc_create_tmpfile_reservation(
383 struct xfs_mount *mp)
384{
385 uint res = XFS_DQUOT_LOGRES(mp);
386
387 if (xfs_sb_version_hascrc(&mp->m_sb))
388 res += xfs_calc_icreate_resv_alloc(mp);
389 else
390 res += xfs_calc_create_resv_alloc(mp);
391
392 return res + xfs_calc_iunlink_add_reservation(mp);
393}
394
346/* 395/*
347 * Making a new directory is the same as creating a new file. 396 * Making a new directory is the same as creating a new file.
348 */ 397 */
@@ -383,9 +432,9 @@ xfs_calc_ifree_reservation(
383{ 432{
384 return XFS_DQUOT_LOGRES(mp) + 433 return XFS_DQUOT_LOGRES(mp) +
385 xfs_calc_inode_res(mp, 1) + 434 xfs_calc_inode_res(mp, 1) +
386 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 435 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
387 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + 436 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
388 max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) + 437 xfs_calc_iunlink_remove_reservation(mp) +
389 xfs_calc_buf_res(1, 0) + 438 xfs_calc_buf_res(1, 0) +
390 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 439 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
391 mp->m_in_maxlevels, 0) + 440 mp->m_in_maxlevels, 0) +
@@ -644,15 +693,14 @@ xfs_calc_qm_setqlim_reservation(
644 693
645/* 694/*
646 * Allocating quota on disk if needed. 695 * Allocating quota on disk if needed.
647 * the write transaction log space: M_RES(mp)->tr_write.tr_logres 696 * the write transaction log space for quota file extent allocation
648 * the unit of quota allocation: one system block size 697 * the unit of quota allocation: one system block size
649 */ 698 */
650STATIC uint 699STATIC uint
651xfs_calc_qm_dqalloc_reservation( 700xfs_calc_qm_dqalloc_reservation(
652 struct xfs_mount *mp) 701 struct xfs_mount *mp)
653{ 702{
654 ASSERT(M_RES(mp)->tr_write.tr_logres); 703 return xfs_calc_write_reservation(mp) +
655 return M_RES(mp)->tr_write.tr_logres +
656 xfs_calc_buf_res(1, 704 xfs_calc_buf_res(1,
657 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); 705 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
658} 706}
@@ -729,6 +777,11 @@ xfs_trans_resv_calc(
729 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; 777 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
730 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 778 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
731 779
780 resp->tr_create_tmpfile.tr_logres =
781 xfs_calc_create_tmpfile_reservation(mp);
782 resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
783 resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
784
732 resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); 785 resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
733 resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; 786 resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
734 resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 787 resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -784,7 +837,6 @@ xfs_trans_resv_calc(
784 /* The following transaction are logged in logical format */ 837 /* The following transaction are logged in logical format */
785 resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); 838 resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
786 resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); 839 resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
787 resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
788 resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); 840 resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
789 resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); 841 resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
790 resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); 842 resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h
index de7de9aaad8a..1097d14cd583 100644
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/xfs_trans_resv.h
@@ -38,11 +38,11 @@ struct xfs_trans_resv {
38 struct xfs_trans_res tr_remove; /* unlink trans */ 38 struct xfs_trans_res tr_remove; /* unlink trans */
39 struct xfs_trans_res tr_symlink; /* symlink trans */ 39 struct xfs_trans_res tr_symlink; /* symlink trans */
40 struct xfs_trans_res tr_create; /* create trans */ 40 struct xfs_trans_res tr_create; /* create trans */
41 struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */
41 struct xfs_trans_res tr_mkdir; /* mkdir trans */ 42 struct xfs_trans_res tr_mkdir; /* mkdir trans */
42 struct xfs_trans_res tr_ifree; /* inode free trans */ 43 struct xfs_trans_res tr_ifree; /* inode free trans */
43 struct xfs_trans_res tr_ichange; /* inode update trans */ 44 struct xfs_trans_res tr_ichange; /* inode update trans */
44 struct xfs_trans_res tr_growdata; /* fs data section grow trans */ 45 struct xfs_trans_res tr_growdata; /* fs data section grow trans */
45 struct xfs_trans_res tr_swrite; /* sync write inode trans */
46 struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ 46 struct xfs_trans_res tr_addafork; /* add inode attr fork trans */
47 struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ 47 struct xfs_trans_res tr_writeid; /* write setuid/setgid file */
48 struct xfs_trans_res tr_attrinval; /* attr fork buffer 48 struct xfs_trans_res tr_attrinval; /* attr fork buffer
@@ -100,6 +100,7 @@ struct xfs_trans_resv {
100#define XFS_ITRUNCATE_LOG_COUNT 2 100#define XFS_ITRUNCATE_LOG_COUNT 2
101#define XFS_INACTIVE_LOG_COUNT 2 101#define XFS_INACTIVE_LOG_COUNT 2
102#define XFS_CREATE_LOG_COUNT 2 102#define XFS_CREATE_LOG_COUNT 2
103#define XFS_CREATE_TMPFILE_LOG_COUNT 2
103#define XFS_MKDIR_LOG_COUNT 3 104#define XFS_MKDIR_LOG_COUNT 3
104#define XFS_SYMLINK_LOG_COUNT 3 105#define XFS_SYMLINK_LOG_COUNT 3
105#define XFS_REMOVE_LOG_COUNT 2 106#define XFS_REMOVE_LOG_COUNT 2
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ea80f1cdff06..81048f9bc783 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2550,6 +2550,9 @@ enum {
2550 2550
2551 /* filesystem does not support filling holes */ 2551 /* filesystem does not support filling holes */
2552 DIO_SKIP_HOLES = 0x02, 2552 DIO_SKIP_HOLES = 0x02,
2553
2554 /* filesystem can handle aio writes beyond i_size */
2555 DIO_ASYNC_EXTEND = 0x04,
2553}; 2556};
2554 2557
2555void dio_end_io(struct bio *bio, int error); 2558void dio_end_io(struct bio *bio, int error);
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 990c4ccf8b61..d1197ae3723c 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -5,5 +5,40 @@
5#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ 5#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
6#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */ 6#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */
7 7
8/*
9 * FALLOC_FL_COLLAPSE_RANGE is used to remove a range of a file
10 * without leaving a hole in the file. The contents of the file beyond
11 * the range being removed is appended to the start offset of the range
12 * being removed (i.e. the hole that was punched is "collapsed"),
13 * resulting in a file layout that looks like the range that was
14 * removed never existed. As such collapsing a range of a file changes
15 * the size of the file, reducing it by the same length of the range
16 * that has been removed by the operation.
17 *
18 * Different filesystems may implement different limitations on the
19 * granularity of the operation. Most will limit operations to
20 * filesystem block size boundaries, but this boundary may be larger or
21 * smaller depending on the filesystem and/or the configuration of the
22 * filesystem or file.
23 *
24 * Attempting to collapse a range that crosses the end of the file is
25 * considered an illegal operation - just use ftruncate(2) if you need
26 * to collapse a range that crosses EOF.
27 */
28#define FALLOC_FL_COLLAPSE_RANGE 0x08
29
30/*
31 * FALLOC_FL_ZERO_RANGE is used to convert a range of file to zeros preferably
32 * without issuing data IO. Blocks should be preallocated for the regions that
33 * span holes in the file, and the entire range is preferable converted to
34 * unwritten extents - even though file system may choose to zero out the
35 * extent or do whatever which will result in reading zeros from the range
36 * while the range remains allocated for the file.
37 *
38 * This can be also used to preallocate blocks past EOF in the same way as
39 * with fallocate. Flag FALLOC_FL_KEEP_SIZE should cause the inode
40 * size to remain the same.
41 */
42#define FALLOC_FL_ZERO_RANGE 0x10
8 43
9#endif /* _UAPI_FALLOC_H_ */ 44#endif /* _UAPI_FALLOC_H_ */