aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:50:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:50:08 -0400
commitd15e03104eb9a4f8e244ab6ed3ca5a107e46db13 (patch)
tree31971cfaa50ea4da3f887f2eb33cb513d09b32bc
parent24e7ea3bea94fe05eae5019f5f12bcdc98fc5157 (diff)
parenta6cf33bc566c5fe888bfa9cf6448037c90661f67 (diff)
Merge tag 'xfs-for-linus-3.15-rc1' of git://oss.sgi.com/xfs/xfs
Pull xfs update from Dave Chinner: "There are a couple of new fallocate features in this request - it was decided that it was easiest to push them through the XFS tree using topic branches and have the ext4 support be based on those branches. Hence you may see some overlap with the ext4 tree merge depending on how they including those topic branches into their tree. Other than that, there is O_TMPFILE support, some cleanups and bug fixes. The main changes in the XFS tree for 3.15-rc1 are: - O_TMPFILE support - allowing AIO+DIO writes beyond EOF - FALLOC_FL_COLLAPSE_RANGE support for fallocate syscall and XFS implementation - FALLOC_FL_ZERO_RANGE support for fallocate syscall and XFS implementation - IO verifier cleanup and rework - stack usage reduction changes - vm_map_ram NOIO context fixes to remove lockdep warings - various bug fixes and cleanups" * tag 'xfs-for-linus-3.15-rc1' of git://oss.sgi.com/xfs/xfs: (34 commits) xfs: fix directory hash ordering bug xfs: extra semi-colon breaks a condition xfs: Add support for FALLOC_FL_ZERO_RANGE fs: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate xfs: inode log reservations are still too small xfs: xfs_check_page_type buffer checks need help xfs: avoid AGI/AGF deadlock scenario for inode chunk allocation xfs: use NOIO contexts for vm_map_ram xfs: don't leak EFSBADCRC to userspace xfs: fix directory inode iolock lockdep false positive xfs: allocate xfs_da_args to reduce stack footprint xfs: always do log forces via the workqueue xfs: modify verifiers to differentiate CRC from other errors xfs: print useful caller information in xfs_error_report xfs: add xfs_verifier_error() xfs: add helper for updating checksums on xfs_bufs xfs: add helper for verifying checksums on xfs_bufs xfs: Use defines for CRC offsets in all cases xfs: skip pointless CRC updates after verifier failures xfs: Add support FALLOC_FL_COLLAPSE_RANGE for fallocate ...
-rw-r--r--fs/direct-io.c18
-rw-r--r--fs/open.c29
-rw-r--r--fs/xfs/kmem.c21
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_ag.h6
-rw-r--r--fs/xfs/xfs_alloc.c45
-rw-r--r--fs/xfs/xfs_alloc_btree.c16
-rw-r--r--fs/xfs/xfs_aops.c84
-rw-r--r--fs/xfs/xfs_attr_leaf.c17
-rw-r--r--fs/xfs/xfs_attr_remote.c15
-rw-r--r--fs/xfs/xfs_bmap.c193
-rw-r--r--fs/xfs/xfs_bmap.h15
-rw-r--r--fs/xfs/xfs_bmap_btree.c16
-rw-r--r--fs/xfs/xfs_bmap_util.c97
-rw-r--r--fs/xfs/xfs_bmap_util.h2
-rw-r--r--fs/xfs/xfs_btree.c14
-rw-r--r--fs/xfs/xfs_buf.c11
-rw-r--r--fs/xfs/xfs_buf.h14
-rw-r--r--fs/xfs/xfs_buf_item.c19
-rw-r--r--fs/xfs/xfs_da_btree.c19
-rw-r--r--fs/xfs/xfs_dinode.h2
-rw-r--r--fs/xfs/xfs_dir2.c342
-rw-r--r--fs/xfs/xfs_dir2_block.c17
-rw-r--r--fs/xfs/xfs_dir2_data.c20
-rw-r--r--fs/xfs/xfs_dir2_leaf.c17
-rw-r--r--fs/xfs/xfs_dir2_node.c17
-rw-r--r--fs/xfs/xfs_dquot.c2
-rw-r--r--fs/xfs/xfs_dquot_buf.c11
-rw-r--r--fs/xfs/xfs_error.c27
-rw-r--r--fs/xfs/xfs_error.h1
-rw-r--r--fs/xfs/xfs_file.c26
-rw-r--r--fs/xfs/xfs_format.h2
-rw-r--r--fs/xfs/xfs_ialloc.c36
-rw-r--r--fs/xfs/xfs_ialloc_btree.c16
-rw-r--r--fs/xfs/xfs_inode.c123
-rw-r--r--fs/xfs/xfs_inode.h12
-rw-r--r--fs/xfs/xfs_inode_buf.c7
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_iops.c30
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c55
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_sb.c17
-rw-r--r--fs/xfs/xfs_sb.h2
-rw-r--r--fs/xfs/xfs_shared.h4
-rw-r--r--fs/xfs/xfs_symlink.c9
-rw-r--r--fs/xfs/xfs_symlink_remote.c16
-rw-r--r--fs/xfs/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_trans.c12
-rw-r--r--fs/xfs/xfs_trans_buf.c11
-rw-r--r--fs/xfs/xfs_trans_resv.c82
-rw-r--r--fs/xfs/xfs_trans_resv.h3
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/uapi/linux/falloc.h35
56 files changed, 1215 insertions, 415 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6e6bff375244..31ba0935e32e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1193,13 +1193,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1193 } 1193 }
1194 1194
1195 /* 1195 /*
1196 * For file extending writes updating i_size before data 1196 * For file extending writes updating i_size before data writeouts
1197 * writeouts complete can expose uninitialized blocks. So 1197 * complete can expose uninitialized blocks in dumb filesystems.
1198 * even for AIO, we need to wait for i/o to complete before 1198 * In that case we need to wait for I/O completion even if asked
1199 * returning in this case. 1199 * for an asynchronous write.
1200 */ 1200 */
1201 dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && 1201 if (is_sync_kiocb(iocb))
1202 (end > i_size_read(inode))); 1202 dio->is_async = false;
1203 else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
1204 (rw & WRITE) && end > i_size_read(inode))
1205 dio->is_async = false;
1206 else
1207 dio->is_async = true;
1208
1203 dio->inode = inode; 1209 dio->inode = inode;
1204 dio->rw = rw; 1210 dio->rw = rw;
1205 1211
diff --git a/fs/open.c b/fs/open.c
index b9ed8b25c108..631aea815def 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
231 return -EINVAL; 231 return -EINVAL;
232 232
233 /* Return error if mode is not supported */ 233 /* Return error if mode is not supported */
234 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 234 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
235 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
236 return -EOPNOTSUPP;
237
238 /* Punch hole and zero range are mutually exclusive */
239 if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
240 (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
235 return -EOPNOTSUPP; 241 return -EOPNOTSUPP;
236 242
237 /* Punch hole must have keep size set */ 243 /* Punch hole must have keep size set */
@@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
239 !(mode & FALLOC_FL_KEEP_SIZE)) 245 !(mode & FALLOC_FL_KEEP_SIZE))
240 return -EOPNOTSUPP; 246 return -EOPNOTSUPP;
241 247
248 /* Collapse range should only be used exclusively. */
249 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
250 (mode & ~FALLOC_FL_COLLAPSE_RANGE))
251 return -EINVAL;
252
242 if (!(file->f_mode & FMODE_WRITE)) 253 if (!(file->f_mode & FMODE_WRITE))
243 return -EBADF; 254 return -EBADF;
244 255
245 /* It's not possible punch hole on append only file */ 256 /*
246 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode)) 257 * It's not possible to punch hole or perform collapse range
258 * on append only file
259 */
260 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
261 && IS_APPEND(inode))
247 return -EPERM; 262 return -EPERM;
248 263
249 if (IS_IMMUTABLE(inode)) 264 if (IS_IMMUTABLE(inode))
@@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
271 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 286 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
272 return -EFBIG; 287 return -EFBIG;
273 288
289 /*
290 * There is no need to overlap collapse range with EOF, in which case
291 * it is effectively a truncate operation
292 */
293 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
294 (offset + len >= i_size_read(inode)))
295 return -EINVAL;
296
274 if (!file->f_op->fallocate) 297 if (!file->f_op->fallocate)
275 return -EOPNOTSUPP; 298 return -EOPNOTSUPP;
276 299
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 66a36befc5c0..844e288b9576 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
65void * 65void *
66kmem_zalloc_large(size_t size, xfs_km_flags_t flags) 66kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
67{ 67{
68 unsigned noio_flag = 0;
68 void *ptr; 69 void *ptr;
70 gfp_t lflags;
69 71
70 ptr = kmem_zalloc(size, flags | KM_MAYFAIL); 72 ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
71 if (ptr) 73 if (ptr)
72 return ptr; 74 return ptr;
73 return vzalloc(size); 75
76 /*
77 * __vmalloc() will allocate data pages and auxillary structures (e.g.
78 * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
79 * here. Hence we need to tell memory reclaim that we are in such a
80 * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
81 * the filesystem here and potentially deadlocking.
82 */
83 if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
84 noio_flag = memalloc_noio_save();
85
86 lflags = kmem_flags_convert(flags);
87 ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
88
89 if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
90 memalloc_noio_restore(noio_flag);
91
92 return ptr;
74} 93}
75 94
76void 95void
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 0ecec1896f25..6888ad886ff6 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
281 if (!acl) 281 if (!acl)
282 goto set_acl; 282 goto set_acl;
283 283
284 error = -EINVAL; 284 error = -E2BIG;
285 if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) 285 if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
286 return error; 286 return error;
287 287
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 3fc109819c34..0fdd4109c624 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -89,6 +89,8 @@ typedef struct xfs_agf {
89 /* structure must be padded to 64 bit alignment */ 89 /* structure must be padded to 64 bit alignment */
90} xfs_agf_t; 90} xfs_agf_t;
91 91
92#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc)
93
92#define XFS_AGF_MAGICNUM 0x00000001 94#define XFS_AGF_MAGICNUM 0x00000001
93#define XFS_AGF_VERSIONNUM 0x00000002 95#define XFS_AGF_VERSIONNUM 0x00000002
94#define XFS_AGF_SEQNO 0x00000004 96#define XFS_AGF_SEQNO 0x00000004
@@ -167,6 +169,8 @@ typedef struct xfs_agi {
167 /* structure must be padded to 64 bit alignment */ 169 /* structure must be padded to 64 bit alignment */
168} xfs_agi_t; 170} xfs_agi_t;
169 171
172#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
173
170#define XFS_AGI_MAGICNUM 0x00000001 174#define XFS_AGI_MAGICNUM 0x00000001
171#define XFS_AGI_VERSIONNUM 0x00000002 175#define XFS_AGI_VERSIONNUM 0x00000002
172#define XFS_AGI_SEQNO 0x00000004 176#define XFS_AGI_SEQNO 0x00000004
@@ -222,6 +226,8 @@ typedef struct xfs_agfl {
222 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ 226 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
223} xfs_agfl_t; 227} xfs_agfl_t;
224 228
229#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
230
225/* 231/*
226 * tags for inode radix tree 232 * tags for inode radix tree
227 */ 233 */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9eab2dfdcbb5..c1cf6a336a72 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -474,7 +474,6 @@ xfs_agfl_read_verify(
474 struct xfs_buf *bp) 474 struct xfs_buf *bp)
475{ 475{
476 struct xfs_mount *mp = bp->b_target->bt_mount; 476 struct xfs_mount *mp = bp->b_target->bt_mount;
477 int agfl_ok = 1;
478 477
479 /* 478 /*
480 * There is no verification of non-crc AGFLs because mkfs does not 479 * There is no verification of non-crc AGFLs because mkfs does not
@@ -485,15 +484,13 @@ xfs_agfl_read_verify(
485 if (!xfs_sb_version_hascrc(&mp->m_sb)) 484 if (!xfs_sb_version_hascrc(&mp->m_sb))
486 return; 485 return;
487 486
488 agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 487 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
489 offsetof(struct xfs_agfl, agfl_crc)); 488 xfs_buf_ioerror(bp, EFSBADCRC);
490 489 else if (!xfs_agfl_verify(bp))
491 agfl_ok = agfl_ok && xfs_agfl_verify(bp);
492
493 if (!agfl_ok) {
494 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
495 xfs_buf_ioerror(bp, EFSCORRUPTED); 490 xfs_buf_ioerror(bp, EFSCORRUPTED);
496 } 491
492 if (bp->b_error)
493 xfs_verifier_error(bp);
497} 494}
498 495
499static void 496static void
@@ -508,16 +505,15 @@ xfs_agfl_write_verify(
508 return; 505 return;
509 506
510 if (!xfs_agfl_verify(bp)) { 507 if (!xfs_agfl_verify(bp)) {
511 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
512 xfs_buf_ioerror(bp, EFSCORRUPTED); 508 xfs_buf_ioerror(bp, EFSCORRUPTED);
509 xfs_verifier_error(bp);
513 return; 510 return;
514 } 511 }
515 512
516 if (bip) 513 if (bip)
517 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 514 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
518 515
519 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 516 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
520 offsetof(struct xfs_agfl, agfl_crc));
521} 517}
522 518
523const struct xfs_buf_ops xfs_agfl_buf_ops = { 519const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -2238,19 +2234,17 @@ xfs_agf_read_verify(
2238 struct xfs_buf *bp) 2234 struct xfs_buf *bp)
2239{ 2235{
2240 struct xfs_mount *mp = bp->b_target->bt_mount; 2236 struct xfs_mount *mp = bp->b_target->bt_mount;
2241 int agf_ok = 1;
2242
2243 if (xfs_sb_version_hascrc(&mp->m_sb))
2244 agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
2245 offsetof(struct xfs_agf, agf_crc));
2246 2237
2247 agf_ok = agf_ok && xfs_agf_verify(mp, bp); 2238 if (xfs_sb_version_hascrc(&mp->m_sb) &&
2248 2239 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
2249 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, 2240 xfs_buf_ioerror(bp, EFSBADCRC);
2250 XFS_RANDOM_ALLOC_READ_AGF))) { 2241 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
2251 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 2242 XFS_ERRTAG_ALLOC_READ_AGF,
2243 XFS_RANDOM_ALLOC_READ_AGF))
2252 xfs_buf_ioerror(bp, EFSCORRUPTED); 2244 xfs_buf_ioerror(bp, EFSCORRUPTED);
2253 } 2245
2246 if (bp->b_error)
2247 xfs_verifier_error(bp);
2254} 2248}
2255 2249
2256static void 2250static void
@@ -2261,8 +2255,8 @@ xfs_agf_write_verify(
2261 struct xfs_buf_log_item *bip = bp->b_fspriv; 2255 struct xfs_buf_log_item *bip = bp->b_fspriv;
2262 2256
2263 if (!xfs_agf_verify(mp, bp)) { 2257 if (!xfs_agf_verify(mp, bp)) {
2264 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
2265 xfs_buf_ioerror(bp, EFSCORRUPTED); 2258 xfs_buf_ioerror(bp, EFSCORRUPTED);
2259 xfs_verifier_error(bp);
2266 return; 2260 return;
2267 } 2261 }
2268 2262
@@ -2272,8 +2266,7 @@ xfs_agf_write_verify(
2272 if (bip) 2266 if (bip)
2273 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2267 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2274 2268
2275 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 2269 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
2276 offsetof(struct xfs_agf, agf_crc));
2277} 2270}
2278 2271
2279const struct xfs_buf_ops xfs_agf_buf_ops = { 2272const struct xfs_buf_ops xfs_agf_buf_ops = {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 13085429e523..cc1eadcbb049 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -355,12 +355,14 @@ static void
355xfs_allocbt_read_verify( 355xfs_allocbt_read_verify(
356 struct xfs_buf *bp) 356 struct xfs_buf *bp)
357{ 357{
358 if (!(xfs_btree_sblock_verify_crc(bp) && 358 if (!xfs_btree_sblock_verify_crc(bp))
359 xfs_allocbt_verify(bp))) { 359 xfs_buf_ioerror(bp, EFSBADCRC);
360 trace_xfs_btree_corrupt(bp, _RET_IP_); 360 else if (!xfs_allocbt_verify(bp))
361 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
362 bp->b_target->bt_mount, bp->b_addr);
363 xfs_buf_ioerror(bp, EFSCORRUPTED); 361 xfs_buf_ioerror(bp, EFSCORRUPTED);
362
363 if (bp->b_error) {
364 trace_xfs_btree_corrupt(bp, _RET_IP_);
365 xfs_verifier_error(bp);
364 } 366 }
365} 367}
366 368
@@ -370,9 +372,9 @@ xfs_allocbt_write_verify(
370{ 372{
371 if (!xfs_allocbt_verify(bp)) { 373 if (!xfs_allocbt_verify(bp)) {
372 trace_xfs_btree_corrupt(bp, _RET_IP_); 374 trace_xfs_btree_corrupt(bp, _RET_IP_);
373 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
374 bp->b_target->bt_mount, bp->b_addr);
375 xfs_buf_ioerror(bp, EFSCORRUPTED); 375 xfs_buf_ioerror(bp, EFSCORRUPTED);
376 xfs_verifier_error(bp);
377 return;
376 } 378 }
377 xfs_btree_sblock_calc_crc(bp); 379 xfs_btree_sblock_calc_crc(bp);
378 380
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index db2cfb067d0b..75df77d09f75 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -632,38 +632,46 @@ xfs_map_at_offset(
632} 632}
633 633
634/* 634/*
635 * Test if a given page is suitable for writing as part of an unwritten 635 * Test if a given page contains at least one buffer of a given @type.
636 * or delayed allocate extent. 636 * If @check_all_buffers is true, then we walk all the buffers in the page to
637 * try to find one of the type passed in. If it is not set, then the caller only
638 * needs to check the first buffer on the page for a match.
637 */ 639 */
638STATIC int 640STATIC bool
639xfs_check_page_type( 641xfs_check_page_type(
640 struct page *page, 642 struct page *page,
641 unsigned int type) 643 unsigned int type,
644 bool check_all_buffers)
642{ 645{
643 if (PageWriteback(page)) 646 struct buffer_head *bh;
644 return 0; 647 struct buffer_head *head;
645 648
646 if (page->mapping && page_has_buffers(page)) { 649 if (PageWriteback(page))
647 struct buffer_head *bh, *head; 650 return false;
648 int acceptable = 0; 651 if (!page->mapping)
652 return false;
653 if (!page_has_buffers(page))
654 return false;
649 655
650 bh = head = page_buffers(page); 656 bh = head = page_buffers(page);
651 do { 657 do {
652 if (buffer_unwritten(bh)) 658 if (buffer_unwritten(bh)) {
653 acceptable += (type == XFS_IO_UNWRITTEN); 659 if (type == XFS_IO_UNWRITTEN)
654 else if (buffer_delay(bh)) 660 return true;
655 acceptable += (type == XFS_IO_DELALLOC); 661 } else if (buffer_delay(bh)) {
656 else if (buffer_dirty(bh) && buffer_mapped(bh)) 662 if (type == XFS_IO_DELALLOC)
657 acceptable += (type == XFS_IO_OVERWRITE); 663 return true;
658 else 664 } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
659 break; 665 if (type == XFS_IO_OVERWRITE)
660 } while ((bh = bh->b_this_page) != head); 666 return true;
667 }
661 668
662 if (acceptable) 669 /* If we are only checking the first buffer, we are done now. */
663 return 1; 670 if (!check_all_buffers)
664 } 671 break;
672 } while ((bh = bh->b_this_page) != head);
665 673
666 return 0; 674 return false;
667} 675}
668 676
669/* 677/*
@@ -697,7 +705,7 @@ xfs_convert_page(
697 goto fail_unlock_page; 705 goto fail_unlock_page;
698 if (page->mapping != inode->i_mapping) 706 if (page->mapping != inode->i_mapping)
699 goto fail_unlock_page; 707 goto fail_unlock_page;
700 if (!xfs_check_page_type(page, (*ioendp)->io_type)) 708 if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
701 goto fail_unlock_page; 709 goto fail_unlock_page;
702 710
703 /* 711 /*
@@ -742,6 +750,15 @@ xfs_convert_page(
742 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 750 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
743 page_dirty = p_offset / len; 751 page_dirty = p_offset / len;
744 752
753 /*
754 * The moment we find a buffer that doesn't match our current type
755 * specification or can't be written, abort the loop and start
756 * writeback. As per the above xfs_imap_valid() check, only
757 * xfs_vm_writepage() can handle partial page writeback fully - we are
758 * limited here to the buffers that are contiguous with the current
759 * ioend, and hence a buffer we can't write breaks that contiguity and
760 * we have to defer the rest of the IO to xfs_vm_writepage().
761 */
745 bh = head = page_buffers(page); 762 bh = head = page_buffers(page);
746 do { 763 do {
747 if (offset >= end_offset) 764 if (offset >= end_offset)
@@ -750,7 +767,7 @@ xfs_convert_page(
750 uptodate = 0; 767 uptodate = 0;
751 if (!(PageUptodate(page) || buffer_uptodate(bh))) { 768 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
752 done = 1; 769 done = 1;
753 continue; 770 break;
754 } 771 }
755 772
756 if (buffer_unwritten(bh) || buffer_delay(bh) || 773 if (buffer_unwritten(bh) || buffer_delay(bh) ||
@@ -762,10 +779,11 @@ xfs_convert_page(
762 else 779 else
763 type = XFS_IO_OVERWRITE; 780 type = XFS_IO_OVERWRITE;
764 781
765 if (!xfs_imap_valid(inode, imap, offset)) { 782 /*
766 done = 1; 783 * imap should always be valid because of the above
767 continue; 784 * partial page end_offset check on the imap.
768 } 785 */
786 ASSERT(xfs_imap_valid(inode, imap, offset));
769 787
770 lock_buffer(bh); 788 lock_buffer(bh);
771 if (type != XFS_IO_OVERWRITE) 789 if (type != XFS_IO_OVERWRITE)
@@ -777,6 +795,7 @@ xfs_convert_page(
777 count++; 795 count++;
778 } else { 796 } else {
779 done = 1; 797 done = 1;
798 break;
780 } 799 }
781 } while (offset += len, (bh = bh->b_this_page) != head); 800 } while (offset += len, (bh = bh->b_this_page) != head);
782 801
@@ -868,7 +887,7 @@ xfs_aops_discard_page(
868 struct buffer_head *bh, *head; 887 struct buffer_head *bh, *head;
869 loff_t offset = page_offset(page); 888 loff_t offset = page_offset(page);
870 889
871 if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) 890 if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
872 goto out_invalidate; 891 goto out_invalidate;
873 892
874 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 893 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1441,7 +1460,8 @@ xfs_vm_direct_IO(
1441 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1460 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1442 offset, nr_segs, 1461 offset, nr_segs,
1443 xfs_get_blocks_direct, 1462 xfs_get_blocks_direct,
1444 xfs_end_io_direct_write, NULL, 0); 1463 xfs_end_io_direct_write, NULL,
1464 DIO_ASYNC_EXTEND);
1445 if (ret != -EIOCBQUEUED && iocb->private) 1465 if (ret != -EIOCBQUEUED && iocb->private)
1446 goto out_destroy_ioend; 1466 goto out_destroy_ioend;
1447 } else { 1467 } else {
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 7b126f46a2f9..fe9587fab17a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -213,8 +213,8 @@ xfs_attr3_leaf_write_verify(
213 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 213 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
214 214
215 if (!xfs_attr3_leaf_verify(bp)) { 215 if (!xfs_attr3_leaf_verify(bp)) {
216 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
217 xfs_buf_ioerror(bp, EFSCORRUPTED); 216 xfs_buf_ioerror(bp, EFSCORRUPTED);
217 xfs_verifier_error(bp);
218 return; 218 return;
219 } 219 }
220 220
@@ -224,7 +224,7 @@ xfs_attr3_leaf_write_verify(
224 if (bip) 224 if (bip)
225 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 225 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
226 226
227 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); 227 xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
228} 228}
229 229
230/* 230/*
@@ -239,13 +239,14 @@ xfs_attr3_leaf_read_verify(
239{ 239{
240 struct xfs_mount *mp = bp->b_target->bt_mount; 240 struct xfs_mount *mp = bp->b_target->bt_mount;
241 241
242 if ((xfs_sb_version_hascrc(&mp->m_sb) && 242 if (xfs_sb_version_hascrc(&mp->m_sb) &&
243 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 243 !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
244 XFS_ATTR3_LEAF_CRC_OFF)) || 244 xfs_buf_ioerror(bp, EFSBADCRC);
245 !xfs_attr3_leaf_verify(bp)) { 245 else if (!xfs_attr3_leaf_verify(bp))
246 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
247 xfs_buf_ioerror(bp, EFSCORRUPTED); 246 xfs_buf_ioerror(bp, EFSCORRUPTED);
248 } 247
248 if (bp->b_error)
249 xfs_verifier_error(bp);
249} 250}
250 251
251const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { 252const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 5549d69ddb45..6e37823e2932 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -125,7 +125,6 @@ xfs_attr3_rmt_read_verify(
125 struct xfs_mount *mp = bp->b_target->bt_mount; 125 struct xfs_mount *mp = bp->b_target->bt_mount;
126 char *ptr; 126 char *ptr;
127 int len; 127 int len;
128 bool corrupt = false;
129 xfs_daddr_t bno; 128 xfs_daddr_t bno;
130 129
131 /* no verification of non-crc buffers */ 130 /* no verification of non-crc buffers */
@@ -140,11 +139,11 @@ xfs_attr3_rmt_read_verify(
140 while (len > 0) { 139 while (len > 0) {
141 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), 140 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
142 XFS_ATTR3_RMT_CRC_OFF)) { 141 XFS_ATTR3_RMT_CRC_OFF)) {
143 corrupt = true; 142 xfs_buf_ioerror(bp, EFSBADCRC);
144 break; 143 break;
145 } 144 }
146 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { 145 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
147 corrupt = true; 146 xfs_buf_ioerror(bp, EFSCORRUPTED);
148 break; 147 break;
149 } 148 }
150 len -= XFS_LBSIZE(mp); 149 len -= XFS_LBSIZE(mp);
@@ -152,10 +151,9 @@ xfs_attr3_rmt_read_verify(
152 bno += mp->m_bsize; 151 bno += mp->m_bsize;
153 } 152 }
154 153
155 if (corrupt) { 154 if (bp->b_error)
156 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 155 xfs_verifier_error(bp);
157 xfs_buf_ioerror(bp, EFSCORRUPTED); 156 else
158 } else
159 ASSERT(len == 0); 157 ASSERT(len == 0);
160} 158}
161 159
@@ -180,9 +178,8 @@ xfs_attr3_rmt_write_verify(
180 178
181 while (len > 0) { 179 while (len > 0) {
182 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { 180 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
183 XFS_CORRUPTION_ERROR(__func__,
184 XFS_ERRLEVEL_LOW, mp, bp->b_addr);
185 xfs_buf_ioerror(bp, EFSCORRUPTED); 181 xfs_buf_ioerror(bp, EFSCORRUPTED);
182 xfs_verifier_error(bp);
186 return; 183 return;
187 } 184 }
188 if (bip) { 185 if (bip) {
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 152543c4ca70..5b6092ef51ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@ error0:
5378 } 5378 }
5379 return error; 5379 return error;
5380} 5380}
5381
5382/*
5383 * Shift extent records to the left to cover a hole.
5384 *
5385 * The maximum number of extents to be shifted in a single operation
5386 * is @num_exts, and @current_ext keeps track of the current extent
5387 * index we have shifted. @offset_shift_fsb is the length by which each
5388 * extent is shifted. If there is no hole to shift the extents
5389 * into, this will be considered invalid operation and we abort immediately.
5390 */
5391int
5392xfs_bmap_shift_extents(
5393 struct xfs_trans *tp,
5394 struct xfs_inode *ip,
5395 int *done,
5396 xfs_fileoff_t start_fsb,
5397 xfs_fileoff_t offset_shift_fsb,
5398 xfs_extnum_t *current_ext,
5399 xfs_fsblock_t *firstblock,
5400 struct xfs_bmap_free *flist,
5401 int num_exts)
5402{
5403 struct xfs_btree_cur *cur;
5404 struct xfs_bmbt_rec_host *gotp;
5405 struct xfs_bmbt_irec got;
5406 struct xfs_bmbt_irec left;
5407 struct xfs_mount *mp = ip->i_mount;
5408 struct xfs_ifork *ifp;
5409 xfs_extnum_t nexts = 0;
5410 xfs_fileoff_t startoff;
5411 int error = 0;
5412 int i;
5413 int whichfork = XFS_DATA_FORK;
5414 int logflags;
5415 xfs_filblks_t blockcount = 0;
5416
5417 if (unlikely(XFS_TEST_ERROR(
5418 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5419 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5420 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5421 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5422 XFS_ERRLEVEL_LOW, mp);
5423 return XFS_ERROR(EFSCORRUPTED);
5424 }
5425
5426 if (XFS_FORCED_SHUTDOWN(mp))
5427 return XFS_ERROR(EIO);
5428
5429 ASSERT(current_ext != NULL);
5430
5431 ifp = XFS_IFORK_PTR(ip, whichfork);
5432
5433 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5434 /* Read in all the extents */
5435 error = xfs_iread_extents(tp, ip, whichfork);
5436 if (error)
5437 return error;
5438 }
5439
5440 /*
5441 * If *current_ext is 0, we would need to lookup the extent
5442 * from where we would start shifting and store it in gotp.
5443 */
5444 if (!*current_ext) {
5445 gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
5446 /*
5447 * gotp can be null in 2 cases: 1) if there are no extents
5448 * or 2) start_fsb lies in a hole beyond which there are
5449 * no extents. Either way, we are done.
5450 */
5451 if (!gotp) {
5452 *done = 1;
5453 return 0;
5454 }
5455 }
5456
5457 /* We are going to change core inode */
5458 logflags = XFS_ILOG_CORE;
5459
5460 if (ifp->if_flags & XFS_IFBROOT) {
5461 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5462 cur->bc_private.b.firstblock = *firstblock;
5463 cur->bc_private.b.flist = flist;
5464 cur->bc_private.b.flags = 0;
5465 } else {
5466 cur = NULL;
5467 logflags |= XFS_ILOG_DEXT;
5468 }
5469
5470 while (nexts++ < num_exts &&
5471 *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) {
5472
5473 gotp = xfs_iext_get_ext(ifp, *current_ext);
5474 xfs_bmbt_get_all(gotp, &got);
5475 startoff = got.br_startoff - offset_shift_fsb;
5476
5477 /*
5478 * Before shifting extent into hole, make sure that the hole
5479 * is large enough to accomodate the shift.
5480 */
5481 if (*current_ext) {
5482 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5483 *current_ext - 1), &left);
5484
5485 if (startoff < left.br_startoff + left.br_blockcount)
5486 error = XFS_ERROR(EINVAL);
5487 } else if (offset_shift_fsb > got.br_startoff) {
5488 /*
5489 * When first extent is shifted, offset_shift_fsb
5490 * should be less than the stating offset of
5491 * the first extent.
5492 */
5493 error = XFS_ERROR(EINVAL);
5494 }
5495
5496 if (error)
5497 goto del_cursor;
5498
5499 if (cur) {
5500 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5501 got.br_startblock,
5502 got.br_blockcount,
5503 &i);
5504 if (error)
5505 goto del_cursor;
5506 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5507 }
5508
5509 /* Check if we can merge 2 adjacent extents */
5510 if (*current_ext &&
5511 left.br_startoff + left.br_blockcount == startoff &&
5512 left.br_startblock + left.br_blockcount ==
5513 got.br_startblock &&
5514 left.br_state == got.br_state &&
5515 left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
5516 blockcount = left.br_blockcount +
5517 got.br_blockcount;
5518 xfs_iext_remove(ip, *current_ext, 1, 0);
5519 if (cur) {
5520 error = xfs_btree_delete(cur, &i);
5521 if (error)
5522 goto del_cursor;
5523 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5524 }
5525 XFS_IFORK_NEXT_SET(ip, whichfork,
5526 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5527 gotp = xfs_iext_get_ext(ifp, --*current_ext);
5528 xfs_bmbt_get_all(gotp, &got);
5529
5530 /* Make cursor point to the extent we will update */
5531 if (cur) {
5532 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5533 got.br_startblock,
5534 got.br_blockcount,
5535 &i);
5536 if (error)
5537 goto del_cursor;
5538 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5539 }
5540
5541 xfs_bmbt_set_blockcount(gotp, blockcount);
5542 got.br_blockcount = blockcount;
5543 } else {
5544 /* We have to update the startoff */
5545 xfs_bmbt_set_startoff(gotp, startoff);
5546 got.br_startoff = startoff;
5547 }
5548
5549 if (cur) {
5550 error = xfs_bmbt_update(cur, got.br_startoff,
5551 got.br_startblock,
5552 got.br_blockcount,
5553 got.br_state);
5554 if (error)
5555 goto del_cursor;
5556 }
5557
5558 (*current_ext)++;
5559 }
5560
5561 /* Check if we are done */
5562 if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork))
5563 *done = 1;
5564
5565del_cursor:
5566 if (cur)
5567 xfs_btree_del_cursor(cur,
5568 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5569
5570 xfs_trans_log_inode(tp, ip, logflags);
5571
5572 return error;
5573}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 33b41f351225..f84bd7af43be 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
127 { BMAP_RIGHT_FILLING, "RF" }, \ 127 { BMAP_RIGHT_FILLING, "RF" }, \
128 { BMAP_ATTRFORK, "ATTR" } 128 { BMAP_ATTRFORK, "ATTR" }
129 129
130
131/*
132 * This macro is used to determine how many extents will be shifted
133 * in one write transaction. We could require two splits,
134 * an extent move on the first and an extent merge on the second,
135 * So it is proper that one extent is shifted inside write transaction
136 * at a time.
137 */
138#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
139
130#ifdef DEBUG 140#ifdef DEBUG
131void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, 141void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
132 int whichfork, unsigned long caller_ip); 142 int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
169int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, 179int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
170 xfs_extnum_t num); 180 xfs_extnum_t num);
171uint xfs_default_attroffset(struct xfs_inode *ip); 181uint xfs_default_attroffset(struct xfs_inode *ip);
182int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
183 int *done, xfs_fileoff_t start_fsb,
184 xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
185 xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
186 int num_exts);
172 187
173#endif /* __XFS_BMAP_H__ */ 188#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 706bc3f777cb..818d546664e7 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -780,12 +780,14 @@ static void
780xfs_bmbt_read_verify( 780xfs_bmbt_read_verify(
781 struct xfs_buf *bp) 781 struct xfs_buf *bp)
782{ 782{
783 if (!(xfs_btree_lblock_verify_crc(bp) && 783 if (!xfs_btree_lblock_verify_crc(bp))
784 xfs_bmbt_verify(bp))) { 784 xfs_buf_ioerror(bp, EFSBADCRC);
785 trace_xfs_btree_corrupt(bp, _RET_IP_); 785 else if (!xfs_bmbt_verify(bp))
786 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
787 bp->b_target->bt_mount, bp->b_addr);
788 xfs_buf_ioerror(bp, EFSCORRUPTED); 786 xfs_buf_ioerror(bp, EFSCORRUPTED);
787
788 if (bp->b_error) {
789 trace_xfs_btree_corrupt(bp, _RET_IP_);
790 xfs_verifier_error(bp);
789 } 791 }
790} 792}
791 793
@@ -794,11 +796,9 @@ xfs_bmbt_write_verify(
794 struct xfs_buf *bp) 796 struct xfs_buf *bp)
795{ 797{
796 if (!xfs_bmbt_verify(bp)) { 798 if (!xfs_bmbt_verify(bp)) {
797 xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
798 trace_xfs_btree_corrupt(bp, _RET_IP_); 799 trace_xfs_btree_corrupt(bp, _RET_IP_);
799 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
800 bp->b_target->bt_mount, bp->b_addr);
801 xfs_buf_ioerror(bp, EFSCORRUPTED); 800 xfs_buf_ioerror(bp, EFSCORRUPTED);
801 xfs_verifier_error(bp);
802 return; 802 return;
803 } 803 }
804 xfs_btree_lblock_calc_crc(bp); 804 xfs_btree_lblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f264616080ca..01f6a646caa1 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
1349 * the freeing of the space succeeds at ENOSPC. 1349 * the freeing of the space succeeds at ENOSPC.
1350 */ 1350 */
1351 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1351 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1352 tp->t_flags |= XFS_TRANS_RESERVE;
1353 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); 1352 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
1354 1353
1355 /* 1354 /*
@@ -1468,6 +1467,102 @@ out:
1468} 1467}
1469 1468
1470/* 1469/*
1470 * xfs_collapse_file_space()
1471 * This routine frees disk space and shift extent for the given file.
1472 * The first thing we do is to free data blocks in the specified range
1473 * by calling xfs_free_file_space(). It would also sync dirty data
1474 * and invalidate page cache over the region on which collapse range
1475 * is working. And Shift extent records to the left to cover a hole.
1476 * RETURNS:
1477 * 0 on success
1478 * errno on error
1479 *
1480 */
1481int
1482xfs_collapse_file_space(
1483 struct xfs_inode *ip,
1484 xfs_off_t offset,
1485 xfs_off_t len)
1486{
1487 int done = 0;
1488 struct xfs_mount *mp = ip->i_mount;
1489 struct xfs_trans *tp;
1490 int error;
1491 xfs_extnum_t current_ext = 0;
1492 struct xfs_bmap_free free_list;
1493 xfs_fsblock_t first_block;
1494 int committed;
1495 xfs_fileoff_t start_fsb;
1496 xfs_fileoff_t shift_fsb;
1497
1498 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1499
1500 trace_xfs_collapse_file_space(ip);
1501
1502 start_fsb = XFS_B_TO_FSB(mp, offset + len);
1503 shift_fsb = XFS_B_TO_FSB(mp, len);
1504
1505 error = xfs_free_file_space(ip, offset, len);
1506 if (error)
1507 return error;
1508
1509 while (!error && !done) {
1510 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1511 tp->t_flags |= XFS_TRANS_RESERVE;
1512 /*
1513 * We would need to reserve permanent block for transaction.
1514 * This will come into picture when after shifting extent into
1515 * hole we found that adjacent extents can be merged which
1516 * may lead to freeing of a block during record update.
1517 */
1518 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
1519 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
1520 if (error) {
1521 ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1522 xfs_trans_cancel(tp, 0);
1523 break;
1524 }
1525
1526 xfs_ilock(ip, XFS_ILOCK_EXCL);
1527 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
1528 ip->i_gdquot, ip->i_pdquot,
1529 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
1530 XFS_QMOPT_RES_REGBLKS);
1531 if (error)
1532 goto out;
1533
1534 xfs_trans_ijoin(tp, ip, 0);
1535
1536 xfs_bmap_init(&free_list, &first_block);
1537
1538 /*
1539 * We are using the write transaction in which max 2 bmbt
1540 * updates are allowed
1541 */
1542 error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
1543 shift_fsb, &current_ext,
1544 &first_block, &free_list,
1545 XFS_BMAP_MAX_SHIFT_EXTENTS);
1546 if (error)
1547 goto out;
1548
1549 error = xfs_bmap_finish(&tp, &free_list, &committed);
1550 if (error)
1551 goto out;
1552
1553 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1554 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1555 }
1556
1557 return error;
1558
1559out:
1560 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1561 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1562 return error;
1563}
1564
1565/*
1471 * We need to check that the format of the data fork in the temporary inode is 1566 * We need to check that the format of the data fork in the temporary inode is
1472 * valid for the target inode before doing the swap. This is not a problem with 1567 * valid for the target inode before doing the swap. This is not a problem with
1473 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized 1568 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b25772..935ed2b24edf 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
99 xfs_off_t len); 99 xfs_off_t len);
100int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, 100int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
101 xfs_off_t len); 101 xfs_off_t len);
102int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
103 xfs_off_t len);
102 104
103/* EOF block manipulation functions */ 105/* EOF block manipulation functions */
104bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); 106bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 9adaae4f3e2f..e80d59fdf89a 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -234,8 +234,7 @@ xfs_btree_lblock_calc_crc(
234 return; 234 return;
235 if (bip) 235 if (bip)
236 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 236 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
237 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 237 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
238 XFS_BTREE_LBLOCK_CRC_OFF);
239} 238}
240 239
241bool 240bool
@@ -243,8 +242,8 @@ xfs_btree_lblock_verify_crc(
243 struct xfs_buf *bp) 242 struct xfs_buf *bp)
244{ 243{
245 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 244 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
246 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 245 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
247 XFS_BTREE_LBLOCK_CRC_OFF); 246
248 return true; 247 return true;
249} 248}
250 249
@@ -267,8 +266,7 @@ xfs_btree_sblock_calc_crc(
267 return; 266 return;
268 if (bip) 267 if (bip)
269 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 268 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
270 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 269 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
271 XFS_BTREE_SBLOCK_CRC_OFF);
272} 270}
273 271
274bool 272bool
@@ -276,8 +274,8 @@ xfs_btree_sblock_verify_crc(
276 struct xfs_buf *bp) 274 struct xfs_buf *bp)
277{ 275{
278 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 276 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
279 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 277 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
280 XFS_BTREE_SBLOCK_CRC_OFF); 278
281 return true; 279 return true;
282} 280}
283 281
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9c061ef2b0d9..107f2fdfe41f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -396,7 +396,17 @@ _xfs_buf_map_pages(
396 bp->b_addr = NULL; 396 bp->b_addr = NULL;
397 } else { 397 } else {
398 int retried = 0; 398 int retried = 0;
399 unsigned noio_flag;
399 400
401 /*
402 * vm_map_ram() will allocate auxillary structures (e.g.
403 * pagetables) with GFP_KERNEL, yet we are likely to be under
404 * GFP_NOFS context here. Hence we need to tell memory reclaim
405 * that we are in such a context via PF_MEMALLOC_NOIO to prevent
406 * memory reclaim re-entering the filesystem here and
407 * potentially deadlocking.
408 */
409 noio_flag = memalloc_noio_save();
400 do { 410 do {
401 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, 411 bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
402 -1, PAGE_KERNEL); 412 -1, PAGE_KERNEL);
@@ -404,6 +414,7 @@ _xfs_buf_map_pages(
404 break; 414 break;
405 vm_unmap_aliases(); 415 vm_unmap_aliases();
406 } while (retried++ <= 1); 416 } while (retried++ <= 1);
417 memalloc_noio_restore(noio_flag);
407 418
408 if (!bp->b_addr) 419 if (!bp->b_addr)
409 return -ENOMEM; 420 return -ENOMEM;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 995339534db6..b8a3abf6cf47 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -369,6 +369,20 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
369 xfs_buf_rele(bp); 369 xfs_buf_rele(bp);
370} 370}
371 371
372static inline int
373xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
374{
375 return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
376 cksum_offset);
377}
378
379static inline void
380xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
381{
382 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
383 cksum_offset);
384}
385
372/* 386/*
373 * Handling of buftargs. 387 * Handling of buftargs.
374 */ 388 */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 33149113e333..8752821443be 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -796,20 +796,6 @@ xfs_buf_item_init(
796 bip->bli_formats[i].blf_map_size = map_size; 796 bip->bli_formats[i].blf_map_size = map_size;
797 } 797 }
798 798
799#ifdef XFS_TRANS_DEBUG
800 /*
801 * Allocate the arrays for tracking what needs to be logged
802 * and what our callers request to be logged. bli_orig
803 * holds a copy of the original, clean buffer for comparison
804 * against, and bli_logged keeps a 1 bit flag per byte in
805 * the buffer to indicate which bytes the callers have asked
806 * to have logged.
807 */
808 bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
809 memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
810 bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
811#endif
812
813 /* 799 /*
814 * Put the buf item into the list of items attached to the 800 * Put the buf item into the list of items attached to the
815 * buffer at the front. 801 * buffer at the front.
@@ -957,11 +943,6 @@ STATIC void
957xfs_buf_item_free( 943xfs_buf_item_free(
958 xfs_buf_log_item_t *bip) 944 xfs_buf_log_item_t *bip)
959{ 945{
960#ifdef XFS_TRANS_DEBUG
961 kmem_free(bip->bli_orig);
962 kmem_free(bip->bli_logged);
963#endif /* XFS_TRANS_DEBUG */
964
965 xfs_buf_item_free_format(bip); 946 xfs_buf_item_free_format(bip);
966 kmem_zone_free(xfs_buf_item_zone, bip); 947 kmem_zone_free(xfs_buf_item_zone, bip);
967} 948}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 796272a2e129..6cc5f6785a77 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -185,8 +185,8 @@ xfs_da3_node_write_verify(
185 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 185 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
186 186
187 if (!xfs_da3_node_verify(bp)) { 187 if (!xfs_da3_node_verify(bp)) {
188 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
189 xfs_buf_ioerror(bp, EFSCORRUPTED); 188 xfs_buf_ioerror(bp, EFSCORRUPTED);
189 xfs_verifier_error(bp);
190 return; 190 return;
191 } 191 }
192 192
@@ -196,7 +196,7 @@ xfs_da3_node_write_verify(
196 if (bip) 196 if (bip)
197 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 197 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
198 198
199 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); 199 xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
200} 200}
201 201
202/* 202/*
@@ -209,18 +209,20 @@ static void
209xfs_da3_node_read_verify( 209xfs_da3_node_read_verify(
210 struct xfs_buf *bp) 210 struct xfs_buf *bp)
211{ 211{
212 struct xfs_mount *mp = bp->b_target->bt_mount;
213 struct xfs_da_blkinfo *info = bp->b_addr; 212 struct xfs_da_blkinfo *info = bp->b_addr;
214 213
215 switch (be16_to_cpu(info->magic)) { 214 switch (be16_to_cpu(info->magic)) {
216 case XFS_DA3_NODE_MAGIC: 215 case XFS_DA3_NODE_MAGIC:
217 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 216 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
218 XFS_DA3_NODE_CRC_OFF)) 217 xfs_buf_ioerror(bp, EFSBADCRC);
219 break; 218 break;
219 }
220 /* fall through */ 220 /* fall through */
221 case XFS_DA_NODE_MAGIC: 221 case XFS_DA_NODE_MAGIC:
222 if (!xfs_da3_node_verify(bp)) 222 if (!xfs_da3_node_verify(bp)) {
223 xfs_buf_ioerror(bp, EFSCORRUPTED);
223 break; 224 break;
225 }
224 return; 226 return;
225 case XFS_ATTR_LEAF_MAGIC: 227 case XFS_ATTR_LEAF_MAGIC:
226 case XFS_ATTR3_LEAF_MAGIC: 228 case XFS_ATTR3_LEAF_MAGIC:
@@ -237,8 +239,7 @@ xfs_da3_node_read_verify(
237 } 239 }
238 240
239 /* corrupt block */ 241 /* corrupt block */
240 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 242 xfs_verifier_error(bp);
241 xfs_buf_ioerror(bp, EFSCORRUPTED);
242} 243}
243 244
244const struct xfs_buf_ops xfs_da3_node_buf_ops = { 245const struct xfs_buf_ops xfs_da3_node_buf_ops = {
@@ -1295,7 +1296,7 @@ xfs_da3_fixhashpath(
1295 node = blk->bp->b_addr; 1296 node = blk->bp->b_addr;
1296 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1297 dp->d_ops->node_hdr_from_disk(&nodehdr, node);
1297 btree = dp->d_ops->node_tree_p(node); 1298 btree = dp->d_ops->node_tree_p(node);
1298 if (be32_to_cpu(btree->hashval) == lasthash) 1299 if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
1299 break; 1300 break;
1300 blk->hashval = lasthash; 1301 blk->hashval = lasthash;
1301 btree[blk->index].hashval = cpu_to_be32(lasthash); 1302 btree[blk->index].hashval = cpu_to_be32(lasthash);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index e5869b50dc41..623bbe8fd921 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -89,6 +89,8 @@ typedef struct xfs_dinode {
89 /* structure must be padded to 64 bit alignment */ 89 /* structure must be padded to 64 bit alignment */
90} xfs_dinode_t; 90} xfs_dinode_t;
91 91
92#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc)
93
92#define DI_MAX_FLUSH 0xffff 94#define DI_MAX_FLUSH 0xffff
93 95
94/* 96/*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index ce16ef02997a..fda46253966a 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -180,16 +180,23 @@ xfs_dir_init(
180 xfs_inode_t *dp, 180 xfs_inode_t *dp,
181 xfs_inode_t *pdp) 181 xfs_inode_t *pdp)
182{ 182{
183 xfs_da_args_t args; 183 struct xfs_da_args *args;
184 int error; 184 int error;
185 185
186 memset((char *)&args, 0, sizeof(args));
187 args.dp = dp;
188 args.trans = tp;
189 ASSERT(S_ISDIR(dp->i_d.di_mode)); 186 ASSERT(S_ISDIR(dp->i_d.di_mode));
190 if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) 187 error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
188 if (error)
191 return error; 189 return error;
192 return xfs_dir2_sf_create(&args, pdp->i_ino); 190
191 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
192 if (!args)
193 return ENOMEM;
194
195 args->dp = dp;
196 args->trans = tp;
197 error = xfs_dir2_sf_create(args, pdp->i_ino);
198 kmem_free(args);
199 return error;
193} 200}
194 201
195/* 202/*
@@ -205,41 +212,56 @@ xfs_dir_createname(
205 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 212 xfs_bmap_free_t *flist, /* bmap's freeblock list */
206 xfs_extlen_t total) /* bmap's total block count */ 213 xfs_extlen_t total) /* bmap's total block count */
207{ 214{
208 xfs_da_args_t args; 215 struct xfs_da_args *args;
209 int rval; 216 int rval;
210 int v; /* type-checking value */ 217 int v; /* type-checking value */
211 218
212 ASSERT(S_ISDIR(dp->i_d.di_mode)); 219 ASSERT(S_ISDIR(dp->i_d.di_mode));
213 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 220 rval = xfs_dir_ino_validate(tp->t_mountp, inum);
221 if (rval)
214 return rval; 222 return rval;
215 XFS_STATS_INC(xs_dir_create); 223 XFS_STATS_INC(xs_dir_create);
216 224
217 memset(&args, 0, sizeof(xfs_da_args_t)); 225 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
218 args.name = name->name; 226 if (!args)
219 args.namelen = name->len; 227 return ENOMEM;
220 args.filetype = name->type; 228
221 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 229 args->name = name->name;
222 args.inumber = inum; 230 args->namelen = name->len;
223 args.dp = dp; 231 args->filetype = name->type;
224 args.firstblock = first; 232 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
225 args.flist = flist; 233 args->inumber = inum;
226 args.total = total; 234 args->dp = dp;
227 args.whichfork = XFS_DATA_FORK; 235 args->firstblock = first;
228 args.trans = tp; 236 args->flist = flist;
229 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; 237 args->total = total;
230 238 args->whichfork = XFS_DATA_FORK;
231 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 239 args->trans = tp;
232 rval = xfs_dir2_sf_addname(&args); 240 args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
233 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 241
234 return rval; 242 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
235 else if (v) 243 rval = xfs_dir2_sf_addname(args);
236 rval = xfs_dir2_block_addname(&args); 244 goto out_free;
237 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 245 }
238 return rval; 246
239 else if (v) 247 rval = xfs_dir2_isblock(tp, dp, &v);
240 rval = xfs_dir2_leaf_addname(&args); 248 if (rval)
249 goto out_free;
250 if (v) {
251 rval = xfs_dir2_block_addname(args);
252 goto out_free;
253 }
254
255 rval = xfs_dir2_isleaf(tp, dp, &v);
256 if (rval)
257 goto out_free;
258 if (v)
259 rval = xfs_dir2_leaf_addname(args);
241 else 260 else
242 rval = xfs_dir2_node_addname(&args); 261 rval = xfs_dir2_node_addname(args);
262
263out_free:
264 kmem_free(args);
243 return rval; 265 return rval;
244} 266}
245 267
@@ -282,46 +304,66 @@ xfs_dir_lookup(
282 xfs_ino_t *inum, /* out: inode number */ 304 xfs_ino_t *inum, /* out: inode number */
283 struct xfs_name *ci_name) /* out: actual name if CI match */ 305 struct xfs_name *ci_name) /* out: actual name if CI match */
284{ 306{
285 xfs_da_args_t args; 307 struct xfs_da_args *args;
286 int rval; 308 int rval;
287 int v; /* type-checking value */ 309 int v; /* type-checking value */
288 310
289 ASSERT(S_ISDIR(dp->i_d.di_mode)); 311 ASSERT(S_ISDIR(dp->i_d.di_mode));
290 XFS_STATS_INC(xs_dir_lookup); 312 XFS_STATS_INC(xs_dir_lookup);
291 313
292 memset(&args, 0, sizeof(xfs_da_args_t)); 314 /*
293 args.name = name->name; 315 * We need to use KM_NOFS here so that lockdep will not throw false
294 args.namelen = name->len; 316 * positive deadlock warnings on a non-transactional lookup path. It is
295 args.filetype = name->type; 317 * safe to recurse into inode recalim in that case, but lockdep can't
296 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 318 * easily be taught about it. Hence KM_NOFS avoids having to add more
297 args.dp = dp; 319 * lockdep Doing this avoids having to add a bunch of lockdep class
298 args.whichfork = XFS_DATA_FORK; 320 * annotations into the reclaim path for the ilock.
299 args.trans = tp; 321 */
300 args.op_flags = XFS_DA_OP_OKNOENT; 322 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
323 args->name = name->name;
324 args->namelen = name->len;
325 args->filetype = name->type;
326 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
327 args->dp = dp;
328 args->whichfork = XFS_DATA_FORK;
329 args->trans = tp;
330 args->op_flags = XFS_DA_OP_OKNOENT;
301 if (ci_name) 331 if (ci_name)
302 args.op_flags |= XFS_DA_OP_CILOOKUP; 332 args->op_flags |= XFS_DA_OP_CILOOKUP;
303 333
304 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 334 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
305 rval = xfs_dir2_sf_lookup(&args); 335 rval = xfs_dir2_sf_lookup(args);
306 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 336 goto out_check_rval;
307 return rval; 337 }
308 else if (v) 338
309 rval = xfs_dir2_block_lookup(&args); 339 rval = xfs_dir2_isblock(tp, dp, &v);
310 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 340 if (rval)
311 return rval; 341 goto out_free;
312 else if (v) 342 if (v) {
313 rval = xfs_dir2_leaf_lookup(&args); 343 rval = xfs_dir2_block_lookup(args);
344 goto out_check_rval;
345 }
346
347 rval = xfs_dir2_isleaf(tp, dp, &v);
348 if (rval)
349 goto out_free;
350 if (v)
351 rval = xfs_dir2_leaf_lookup(args);
314 else 352 else
315 rval = xfs_dir2_node_lookup(&args); 353 rval = xfs_dir2_node_lookup(args);
354
355out_check_rval:
316 if (rval == EEXIST) 356 if (rval == EEXIST)
317 rval = 0; 357 rval = 0;
318 if (!rval) { 358 if (!rval) {
319 *inum = args.inumber; 359 *inum = args->inumber;
320 if (ci_name) { 360 if (ci_name) {
321 ci_name->name = args.value; 361 ci_name->name = args->value;
322 ci_name->len = args.valuelen; 362 ci_name->len = args->valuelen;
323 } 363 }
324 } 364 }
365out_free:
366 kmem_free(args);
325 return rval; 367 return rval;
326} 368}
327 369
@@ -338,38 +380,51 @@ xfs_dir_removename(
338 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 380 xfs_bmap_free_t *flist, /* bmap's freeblock list */
339 xfs_extlen_t total) /* bmap's total block count */ 381 xfs_extlen_t total) /* bmap's total block count */
340{ 382{
341 xfs_da_args_t args; 383 struct xfs_da_args *args;
342 int rval; 384 int rval;
343 int v; /* type-checking value */ 385 int v; /* type-checking value */
344 386
345 ASSERT(S_ISDIR(dp->i_d.di_mode)); 387 ASSERT(S_ISDIR(dp->i_d.di_mode));
346 XFS_STATS_INC(xs_dir_remove); 388 XFS_STATS_INC(xs_dir_remove);
347 389
348 memset(&args, 0, sizeof(xfs_da_args_t)); 390 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
349 args.name = name->name; 391 if (!args)
350 args.namelen = name->len; 392 return ENOMEM;
351 args.filetype = name->type; 393
352 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 394 args->name = name->name;
353 args.inumber = ino; 395 args->namelen = name->len;
354 args.dp = dp; 396 args->filetype = name->type;
355 args.firstblock = first; 397 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
356 args.flist = flist; 398 args->inumber = ino;
357 args.total = total; 399 args->dp = dp;
358 args.whichfork = XFS_DATA_FORK; 400 args->firstblock = first;
359 args.trans = tp; 401 args->flist = flist;
360 402 args->total = total;
361 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 403 args->whichfork = XFS_DATA_FORK;
362 rval = xfs_dir2_sf_removename(&args); 404 args->trans = tp;
363 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 405
364 return rval; 406 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
365 else if (v) 407 rval = xfs_dir2_sf_removename(args);
366 rval = xfs_dir2_block_removename(&args); 408 goto out_free;
367 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 409 }
368 return rval; 410
369 else if (v) 411 rval = xfs_dir2_isblock(tp, dp, &v);
370 rval = xfs_dir2_leaf_removename(&args); 412 if (rval)
413 goto out_free;
414 if (v) {
415 rval = xfs_dir2_block_removename(args);
416 goto out_free;
417 }
418
419 rval = xfs_dir2_isleaf(tp, dp, &v);
420 if (rval)
421 goto out_free;
422 if (v)
423 rval = xfs_dir2_leaf_removename(args);
371 else 424 else
372 rval = xfs_dir2_node_removename(&args); 425 rval = xfs_dir2_node_removename(args);
426out_free:
427 kmem_free(args);
373 return rval; 428 return rval;
374} 429}
375 430
@@ -386,40 +441,54 @@ xfs_dir_replace(
386 xfs_bmap_free_t *flist, /* bmap's freeblock list */ 441 xfs_bmap_free_t *flist, /* bmap's freeblock list */
387 xfs_extlen_t total) /* bmap's total block count */ 442 xfs_extlen_t total) /* bmap's total block count */
388{ 443{
389 xfs_da_args_t args; 444 struct xfs_da_args *args;
390 int rval; 445 int rval;
391 int v; /* type-checking value */ 446 int v; /* type-checking value */
392 447
393 ASSERT(S_ISDIR(dp->i_d.di_mode)); 448 ASSERT(S_ISDIR(dp->i_d.di_mode));
394 449
395 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 450 rval = xfs_dir_ino_validate(tp->t_mountp, inum);
451 if (rval)
396 return rval; 452 return rval;
397 453
398 memset(&args, 0, sizeof(xfs_da_args_t)); 454 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
399 args.name = name->name; 455 if (!args)
400 args.namelen = name->len; 456 return ENOMEM;
401 args.filetype = name->type; 457
402 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 458 args->name = name->name;
403 args.inumber = inum; 459 args->namelen = name->len;
404 args.dp = dp; 460 args->filetype = name->type;
405 args.firstblock = first; 461 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
406 args.flist = flist; 462 args->inumber = inum;
407 args.total = total; 463 args->dp = dp;
408 args.whichfork = XFS_DATA_FORK; 464 args->firstblock = first;
409 args.trans = tp; 465 args->flist = flist;
410 466 args->total = total;
411 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 467 args->whichfork = XFS_DATA_FORK;
412 rval = xfs_dir2_sf_replace(&args); 468 args->trans = tp;
413 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 469
414 return rval; 470 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
415 else if (v) 471 rval = xfs_dir2_sf_replace(args);
416 rval = xfs_dir2_block_replace(&args); 472 goto out_free;
417 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 473 }
418 return rval; 474
419 else if (v) 475 rval = xfs_dir2_isblock(tp, dp, &v);
420 rval = xfs_dir2_leaf_replace(&args); 476 if (rval)
477 goto out_free;
478 if (v) {
479 rval = xfs_dir2_block_replace(args);
480 goto out_free;
481 }
482
483 rval = xfs_dir2_isleaf(tp, dp, &v);
484 if (rval)
485 goto out_free;
486 if (v)
487 rval = xfs_dir2_leaf_replace(args);
421 else 488 else
422 rval = xfs_dir2_node_replace(&args); 489 rval = xfs_dir2_node_replace(args);
490out_free:
491 kmem_free(args);
423 return rval; 492 return rval;
424} 493}
425 494
@@ -434,7 +503,7 @@ xfs_dir_canenter(
434 struct xfs_name *name, /* name of entry to add */ 503 struct xfs_name *name, /* name of entry to add */
435 uint resblks) 504 uint resblks)
436{ 505{
437 xfs_da_args_t args; 506 struct xfs_da_args *args;
438 int rval; 507 int rval;
439 int v; /* type-checking value */ 508 int v; /* type-checking value */
440 509
@@ -443,29 +512,42 @@ xfs_dir_canenter(
443 512
444 ASSERT(S_ISDIR(dp->i_d.di_mode)); 513 ASSERT(S_ISDIR(dp->i_d.di_mode));
445 514
446 memset(&args, 0, sizeof(xfs_da_args_t)); 515 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
447 args.name = name->name; 516 if (!args)
448 args.namelen = name->len; 517 return ENOMEM;
449 args.filetype = name->type; 518
450 args.hashval = dp->i_mount->m_dirnameops->hashname(name); 519 args->name = name->name;
451 args.dp = dp; 520 args->namelen = name->len;
452 args.whichfork = XFS_DATA_FORK; 521 args->filetype = name->type;
453 args.trans = tp; 522 args->hashval = dp->i_mount->m_dirnameops->hashname(name);
454 args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | 523 args->dp = dp;
524 args->whichfork = XFS_DATA_FORK;
525 args->trans = tp;
526 args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
455 XFS_DA_OP_OKNOENT; 527 XFS_DA_OP_OKNOENT;
456 528
457 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 529 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
458 rval = xfs_dir2_sf_addname(&args); 530 rval = xfs_dir2_sf_addname(args);
459 else if ((rval = xfs_dir2_isblock(tp, dp, &v))) 531 goto out_free;
460 return rval; 532 }
461 else if (v) 533
462 rval = xfs_dir2_block_addname(&args); 534 rval = xfs_dir2_isblock(tp, dp, &v);
463 else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) 535 if (rval)
464 return rval; 536 goto out_free;
465 else if (v) 537 if (v) {
466 rval = xfs_dir2_leaf_addname(&args); 538 rval = xfs_dir2_block_addname(args);
539 goto out_free;
540 }
541
542 rval = xfs_dir2_isleaf(tp, dp, &v);
543 if (rval)
544 goto out_free;
545 if (v)
546 rval = xfs_dir2_leaf_addname(args);
467 else 547 else
468 rval = xfs_dir2_node_addname(&args); 548 rval = xfs_dir2_node_addname(args);
549out_free:
550 kmem_free(args);
469 return rval; 551 return rval;
470} 552}
471 553
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 90cdbf4b5f19..4f6a38cb83a4 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -89,13 +89,14 @@ xfs_dir3_block_read_verify(
89{ 89{
90 struct xfs_mount *mp = bp->b_target->bt_mount; 90 struct xfs_mount *mp = bp->b_target->bt_mount;
91 91
92 if ((xfs_sb_version_hascrc(&mp->m_sb) && 92 if (xfs_sb_version_hascrc(&mp->m_sb) &&
93 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 93 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
94 XFS_DIR3_DATA_CRC_OFF)) || 94 xfs_buf_ioerror(bp, EFSBADCRC);
95 !xfs_dir3_block_verify(bp)) { 95 else if (!xfs_dir3_block_verify(bp))
96 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
97 xfs_buf_ioerror(bp, EFSCORRUPTED); 96 xfs_buf_ioerror(bp, EFSCORRUPTED);
98 } 97
98 if (bp->b_error)
99 xfs_verifier_error(bp);
99} 100}
100 101
101static void 102static void
@@ -107,8 +108,8 @@ xfs_dir3_block_write_verify(
107 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 108 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
108 109
109 if (!xfs_dir3_block_verify(bp)) { 110 if (!xfs_dir3_block_verify(bp)) {
110 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
111 xfs_buf_ioerror(bp, EFSCORRUPTED); 111 xfs_buf_ioerror(bp, EFSCORRUPTED);
112 xfs_verifier_error(bp);
112 return; 113 return;
113 } 114 }
114 115
@@ -118,7 +119,7 @@ xfs_dir3_block_write_verify(
118 if (bip) 119 if (bip)
119 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); 120 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
120 121
121 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); 122 xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
122} 123}
123 124
124const struct xfs_buf_ops xfs_dir3_block_buf_ops = { 125const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 70acff4ee173..afa4ad523f3f 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -241,7 +241,6 @@ static void
241xfs_dir3_data_reada_verify( 241xfs_dir3_data_reada_verify(
242 struct xfs_buf *bp) 242 struct xfs_buf *bp)
243{ 243{
244 struct xfs_mount *mp = bp->b_target->bt_mount;
245 struct xfs_dir2_data_hdr *hdr = bp->b_addr; 244 struct xfs_dir2_data_hdr *hdr = bp->b_addr;
246 245
247 switch (hdr->magic) { 246 switch (hdr->magic) {
@@ -255,8 +254,8 @@ xfs_dir3_data_reada_verify(
255 xfs_dir3_data_verify(bp); 254 xfs_dir3_data_verify(bp);
256 return; 255 return;
257 default: 256 default:
258 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
259 xfs_buf_ioerror(bp, EFSCORRUPTED); 257 xfs_buf_ioerror(bp, EFSCORRUPTED);
258 xfs_verifier_error(bp);
260 break; 259 break;
261 } 260 }
262} 261}
@@ -267,13 +266,14 @@ xfs_dir3_data_read_verify(
267{ 266{
268 struct xfs_mount *mp = bp->b_target->bt_mount; 267 struct xfs_mount *mp = bp->b_target->bt_mount;
269 268
270 if ((xfs_sb_version_hascrc(&mp->m_sb) && 269 if (xfs_sb_version_hascrc(&mp->m_sb) &&
271 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 270 !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
272 XFS_DIR3_DATA_CRC_OFF)) || 271 xfs_buf_ioerror(bp, EFSBADCRC);
273 !xfs_dir3_data_verify(bp)) { 272 else if (!xfs_dir3_data_verify(bp))
274 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
275 xfs_buf_ioerror(bp, EFSCORRUPTED); 273 xfs_buf_ioerror(bp, EFSCORRUPTED);
276 } 274
275 if (bp->b_error)
276 xfs_verifier_error(bp);
277} 277}
278 278
279static void 279static void
@@ -285,8 +285,8 @@ xfs_dir3_data_write_verify(
285 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 285 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
286 286
287 if (!xfs_dir3_data_verify(bp)) { 287 if (!xfs_dir3_data_verify(bp)) {
288 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
289 xfs_buf_ioerror(bp, EFSCORRUPTED); 288 xfs_buf_ioerror(bp, EFSCORRUPTED);
289 xfs_verifier_error(bp);
290 return; 290 return;
291 } 291 }
292 292
@@ -296,7 +296,7 @@ xfs_dir3_data_write_verify(
296 if (bip) 296 if (bip)
297 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); 297 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
298 298
299 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); 299 xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
300} 300}
301 301
302const struct xfs_buf_ops xfs_dir3_data_buf_ops = { 302const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae47ec6e16c4..d36e97df1187 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -179,13 +179,14 @@ __read_verify(
179{ 179{
180 struct xfs_mount *mp = bp->b_target->bt_mount; 180 struct xfs_mount *mp = bp->b_target->bt_mount;
181 181
182 if ((xfs_sb_version_hascrc(&mp->m_sb) && 182 if (xfs_sb_version_hascrc(&mp->m_sb) &&
183 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 183 !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
184 XFS_DIR3_LEAF_CRC_OFF)) || 184 xfs_buf_ioerror(bp, EFSBADCRC);
185 !xfs_dir3_leaf_verify(bp, magic)) { 185 else if (!xfs_dir3_leaf_verify(bp, magic))
186 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
187 xfs_buf_ioerror(bp, EFSCORRUPTED); 186 xfs_buf_ioerror(bp, EFSCORRUPTED);
188 } 187
188 if (bp->b_error)
189 xfs_verifier_error(bp);
189} 190}
190 191
191static void 192static void
@@ -198,8 +199,8 @@ __write_verify(
198 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 199 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
199 200
200 if (!xfs_dir3_leaf_verify(bp, magic)) { 201 if (!xfs_dir3_leaf_verify(bp, magic)) {
201 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
202 xfs_buf_ioerror(bp, EFSCORRUPTED); 202 xfs_buf_ioerror(bp, EFSCORRUPTED);
203 xfs_verifier_error(bp);
203 return; 204 return;
204 } 205 }
205 206
@@ -209,7 +210,7 @@ __write_verify(
209 if (bip) 210 if (bip)
210 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 211 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
211 212
212 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); 213 xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
213} 214}
214 215
215static void 216static void
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 48c7d18f68c3..cb434d732681 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -115,13 +115,14 @@ xfs_dir3_free_read_verify(
115{ 115{
116 struct xfs_mount *mp = bp->b_target->bt_mount; 116 struct xfs_mount *mp = bp->b_target->bt_mount;
117 117
118 if ((xfs_sb_version_hascrc(&mp->m_sb) && 118 if (xfs_sb_version_hascrc(&mp->m_sb) &&
119 !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 119 !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
120 XFS_DIR3_FREE_CRC_OFF)) || 120 xfs_buf_ioerror(bp, EFSBADCRC);
121 !xfs_dir3_free_verify(bp)) { 121 else if (!xfs_dir3_free_verify(bp))
122 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
123 xfs_buf_ioerror(bp, EFSCORRUPTED); 122 xfs_buf_ioerror(bp, EFSCORRUPTED);
124 } 123
124 if (bp->b_error)
125 xfs_verifier_error(bp);
125} 126}
126 127
127static void 128static void
@@ -133,8 +134,8 @@ xfs_dir3_free_write_verify(
133 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 134 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
134 135
135 if (!xfs_dir3_free_verify(bp)) { 136 if (!xfs_dir3_free_verify(bp)) {
136 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
137 xfs_buf_ioerror(bp, EFSCORRUPTED); 137 xfs_buf_ioerror(bp, EFSCORRUPTED);
138 xfs_verifier_error(bp);
138 return; 139 return;
139 } 140 }
140 141
@@ -144,7 +145,7 @@ xfs_dir3_free_write_verify(
144 if (bip) 145 if (bip)
145 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); 146 hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
146 147
147 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); 148 xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
148} 149}
149 150
150const struct xfs_buf_ops xfs_dir3_free_buf_ops = { 151const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7aeb4c895b32..868b19f096bf 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -615,7 +615,7 @@ xfs_qm_dqread(
615 615
616 if (flags & XFS_QMOPT_DQALLOC) { 616 if (flags & XFS_QMOPT_DQALLOC) {
617 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 617 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
618 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm, 618 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
619 XFS_QM_DQALLOC_SPACE_RES(mp), 0); 619 XFS_QM_DQALLOC_SPACE_RES(mp), 0);
620 if (error) 620 if (error)
621 goto error1; 621 goto error1;
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
index d401457d2f25..610da8177737 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c
@@ -257,10 +257,13 @@ xfs_dquot_buf_read_verify(
257{ 257{
258 struct xfs_mount *mp = bp->b_target->bt_mount; 258 struct xfs_mount *mp = bp->b_target->bt_mount;
259 259
260 if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { 260 if (!xfs_dquot_buf_verify_crc(mp, bp))
261 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 261 xfs_buf_ioerror(bp, EFSBADCRC);
262 else if (!xfs_dquot_buf_verify(mp, bp))
262 xfs_buf_ioerror(bp, EFSCORRUPTED); 263 xfs_buf_ioerror(bp, EFSCORRUPTED);
263 } 264
265 if (bp->b_error)
266 xfs_verifier_error(bp);
264} 267}
265 268
266/* 269/*
@@ -275,8 +278,8 @@ xfs_dquot_buf_write_verify(
275 struct xfs_mount *mp = bp->b_target->bt_mount; 278 struct xfs_mount *mp = bp->b_target->bt_mount;
276 279
277 if (!xfs_dquot_buf_verify(mp, bp)) { 280 if (!xfs_dquot_buf_verify(mp, bp)) {
278 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
279 xfs_buf_ioerror(bp, EFSCORRUPTED); 281 xfs_buf_ioerror(bp, EFSCORRUPTED);
282 xfs_verifier_error(bp);
280 return; 283 return;
281 } 284 }
282} 285}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 9995b807d627..edac5b057d28 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -156,7 +156,7 @@ xfs_error_report(
156{ 156{
157 if (level <= xfs_error_level) { 157 if (level <= xfs_error_level) {
158 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 158 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
159 "Internal error %s at line %d of file %s. Caller 0x%p", 159 "Internal error %s at line %d of file %s. Caller %pF",
160 tag, linenum, filename, ra); 160 tag, linenum, filename, ra);
161 161
162 xfs_stack_trace(); 162 xfs_stack_trace();
@@ -178,3 +178,28 @@ xfs_corruption_error(
178 xfs_error_report(tag, level, mp, filename, linenum, ra); 178 xfs_error_report(tag, level, mp, filename, linenum, ra);
179 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); 179 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
180} 180}
181
182/*
183 * Warnings specifically for verifier errors. Differentiate CRC vs. invalid
184 * values, and omit the stack trace unless the error level is tuned high.
185 */
186void
187xfs_verifier_error(
188 struct xfs_buf *bp)
189{
190 struct xfs_mount *mp = bp->b_target->bt_mount;
191
192 xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
193 bp->b_error == EFSBADCRC ? "CRC error" : "corruption",
194 __return_address, bp->b_bn);
195
196 xfs_alert(mp, "Unmount and run xfs_repair");
197
198 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
199 xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
200 xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
201 }
202
203 if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
204 xfs_stack_trace();
205}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 079a367f44ee..c1c57d4a4b5d 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
34extern void xfs_corruption_error(const char *tag, int level, 34extern void xfs_corruption_error(const char *tag, int level,
35 struct xfs_mount *mp, void *p, const char *filename, 35 struct xfs_mount *mp, void *p, const char *filename,
36 int linenum, inst_t *ra); 36 int linenum, inst_t *ra);
37extern void xfs_verifier_error(struct xfs_buf *bp);
37 38
38#define XFS_ERROR_REPORT(e, lvl, mp) \ 39#define XFS_ERROR_REPORT(e, lvl, mp) \
39 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) 40 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 64b48eade91d..f7abff8c16ca 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,8 @@ xfs_file_fallocate(
823 823
824 if (!S_ISREG(inode->i_mode)) 824 if (!S_ISREG(inode->i_mode))
825 return -EINVAL; 825 return -EINVAL;
826 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 826 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
827 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
827 return -EOPNOTSUPP; 828 return -EOPNOTSUPP;
828 829
829 xfs_ilock(ip, XFS_IOLOCK_EXCL); 830 xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@ xfs_file_fallocate(
831 error = xfs_free_file_space(ip, offset, len); 832 error = xfs_free_file_space(ip, offset, len);
832 if (error) 833 if (error)
833 goto out_unlock; 834 goto out_unlock;
835 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
836 unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
837
838 if (offset & blksize_mask || len & blksize_mask) {
839 error = -EINVAL;
840 goto out_unlock;
841 }
842
843 ASSERT(offset + len < i_size_read(inode));
844 new_size = i_size_read(inode) - len;
845
846 error = xfs_collapse_file_space(ip, offset, len);
847 if (error)
848 goto out_unlock;
834 } else { 849 } else {
835 if (!(mode & FALLOC_FL_KEEP_SIZE) && 850 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
836 offset + len > i_size_read(inode)) { 851 offset + len > i_size_read(inode)) {
@@ -840,8 +855,11 @@ xfs_file_fallocate(
840 goto out_unlock; 855 goto out_unlock;
841 } 856 }
842 857
843 error = xfs_alloc_file_space(ip, offset, len, 858 if (mode & FALLOC_FL_ZERO_RANGE)
844 XFS_BMAPI_PREALLOC); 859 error = xfs_zero_file_space(ip, offset, len);
860 else
861 error = xfs_alloc_file_space(ip, offset, len,
862 XFS_BMAPI_PREALLOC);
845 if (error) 863 if (error)
846 goto out_unlock; 864 goto out_unlock;
847 } 865 }
@@ -859,7 +877,7 @@ xfs_file_fallocate(
859 if (ip->i_d.di_mode & S_IXGRP) 877 if (ip->i_d.di_mode & S_IXGRP)
860 ip->i_d.di_mode &= ~S_ISGID; 878 ip->i_d.di_mode &= ~S_ISGID;
861 879
862 if (!(mode & FALLOC_FL_PUNCH_HOLE)) 880 if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
863 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 881 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
864 882
865 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 883 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index b6ab5a3cfa12..9898f31d05d8 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr {
145 __be64 sl_lsn; 145 __be64 sl_lsn;
146}; 146};
147 147
148#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc)
149
148/* 150/*
149 * The maximum pathlen is 1024 bytes. Since the minimum file system 151 * The maximum pathlen is 1024 bytes. Since the minimum file system
150 * blocksize is 512 bytes, we can get a max of 3 extents back from 152 * blocksize is 512 bytes, we can get a max of 3 extents back from
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5d7f105a1c82..8f711db61a0c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -363,6 +363,18 @@ xfs_ialloc_ag_alloc(
363 args.minleft = args.mp->m_in_maxlevels - 1; 363 args.minleft = args.mp->m_in_maxlevels - 1;
364 if ((error = xfs_alloc_vextent(&args))) 364 if ((error = xfs_alloc_vextent(&args)))
365 return error; 365 return error;
366
367 /*
368 * This request might have dirtied the transaction if the AG can
369 * satisfy the request, but the exact block was not available.
370 * If the allocation did fail, subsequent requests will relax
371 * the exact agbno requirement and increase the alignment
372 * instead. It is critical that the total size of the request
373 * (len + alignment + slop) does not increase from this point
374 * on, so reset minalignslop to ensure it is not included in
375 * subsequent requests.
376 */
377 args.minalignslop = 0;
366 } else 378 } else
367 args.fsbno = NULLFSBLOCK; 379 args.fsbno = NULLFSBLOCK;
368 380
@@ -1568,18 +1580,17 @@ xfs_agi_read_verify(
1568 struct xfs_buf *bp) 1580 struct xfs_buf *bp)
1569{ 1581{
1570 struct xfs_mount *mp = bp->b_target->bt_mount; 1582 struct xfs_mount *mp = bp->b_target->bt_mount;
1571 int agi_ok = 1;
1572
1573 if (xfs_sb_version_hascrc(&mp->m_sb))
1574 agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
1575 offsetof(struct xfs_agi, agi_crc));
1576 agi_ok = agi_ok && xfs_agi_verify(bp);
1577 1583
1578 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1584 if (xfs_sb_version_hascrc(&mp->m_sb) &&
1579 XFS_RANDOM_IALLOC_READ_AGI))) { 1585 !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
1580 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 1586 xfs_buf_ioerror(bp, EFSBADCRC);
1587 else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
1588 XFS_ERRTAG_IALLOC_READ_AGI,
1589 XFS_RANDOM_IALLOC_READ_AGI))
1581 xfs_buf_ioerror(bp, EFSCORRUPTED); 1590 xfs_buf_ioerror(bp, EFSCORRUPTED);
1582 } 1591
1592 if (bp->b_error)
1593 xfs_verifier_error(bp);
1583} 1594}
1584 1595
1585static void 1596static void
@@ -1590,8 +1601,8 @@ xfs_agi_write_verify(
1590 struct xfs_buf_log_item *bip = bp->b_fspriv; 1601 struct xfs_buf_log_item *bip = bp->b_fspriv;
1591 1602
1592 if (!xfs_agi_verify(bp)) { 1603 if (!xfs_agi_verify(bp)) {
1593 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
1594 xfs_buf_ioerror(bp, EFSCORRUPTED); 1604 xfs_buf_ioerror(bp, EFSCORRUPTED);
1605 xfs_verifier_error(bp);
1595 return; 1606 return;
1596 } 1607 }
1597 1608
@@ -1600,8 +1611,7 @@ xfs_agi_write_verify(
1600 1611
1601 if (bip) 1612 if (bip)
1602 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); 1613 XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
1603 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 1614 xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
1604 offsetof(struct xfs_agi, agi_crc));
1605} 1615}
1606 1616
1607const struct xfs_buf_ops xfs_agi_buf_ops = { 1617const struct xfs_buf_ops xfs_agi_buf_ops = {
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c8fa5bbb36de..7e309b11e87d 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -243,12 +243,14 @@ static void
243xfs_inobt_read_verify( 243xfs_inobt_read_verify(
244 struct xfs_buf *bp) 244 struct xfs_buf *bp)
245{ 245{
246 if (!(xfs_btree_sblock_verify_crc(bp) && 246 if (!xfs_btree_sblock_verify_crc(bp))
247 xfs_inobt_verify(bp))) { 247 xfs_buf_ioerror(bp, EFSBADCRC);
248 trace_xfs_btree_corrupt(bp, _RET_IP_); 248 else if (!xfs_inobt_verify(bp))
249 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
250 bp->b_target->bt_mount, bp->b_addr);
251 xfs_buf_ioerror(bp, EFSCORRUPTED); 249 xfs_buf_ioerror(bp, EFSCORRUPTED);
250
251 if (bp->b_error) {
252 trace_xfs_btree_corrupt(bp, _RET_IP_);
253 xfs_verifier_error(bp);
252 } 254 }
253} 255}
254 256
@@ -258,9 +260,9 @@ xfs_inobt_write_verify(
258{ 260{
259 if (!xfs_inobt_verify(bp)) { 261 if (!xfs_inobt_verify(bp)) {
260 trace_xfs_btree_corrupt(bp, _RET_IP_); 262 trace_xfs_btree_corrupt(bp, _RET_IP_);
261 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
262 bp->b_target->bt_mount, bp->b_addr);
263 xfs_buf_ioerror(bp, EFSCORRUPTED); 263 xfs_buf_ioerror(bp, EFSCORRUPTED);
264 xfs_verifier_error(bp);
265 return;
264 } 266 }
265 xfs_btree_sblock_calc_crc(bp); 267 xfs_btree_sblock_calc_crc(bp);
266 268
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3a137e9f9a7d..5e7a38fa6ee6 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,7 +42,6 @@
42#include "xfs_bmap_util.h" 42#include "xfs_bmap_util.h"
43#include "xfs_error.h" 43#include "xfs_error.h"
44#include "xfs_quota.h" 44#include "xfs_quota.h"
45#include "xfs_dinode.h"
46#include "xfs_filestream.h" 45#include "xfs_filestream.h"
47#include "xfs_cksum.h" 46#include "xfs_cksum.h"
48#include "xfs_trace.h" 47#include "xfs_trace.h"
@@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone;
62 61
63STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); 62STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
64 63
64STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
65
65/* 66/*
66 * helper function to extract extent size hint from inode 67 * helper function to extract extent size hint from inode
67 */ 68 */
@@ -1115,7 +1116,7 @@ xfs_bumplink(
1115{ 1116{
1116 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1117 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
1117 1118
1118 ASSERT(ip->i_d.di_nlink > 0); 1119 ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
1119 ip->i_d.di_nlink++; 1120 ip->i_d.di_nlink++;
1120 inc_nlink(VFS_I(ip)); 1121 inc_nlink(VFS_I(ip));
1121 if ((ip->i_d.di_version == 1) && 1122 if ((ip->i_d.di_version == 1) &&
@@ -1165,10 +1166,7 @@ xfs_create(
1165 if (XFS_FORCED_SHUTDOWN(mp)) 1166 if (XFS_FORCED_SHUTDOWN(mp))
1166 return XFS_ERROR(EIO); 1167 return XFS_ERROR(EIO);
1167 1168
1168 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 1169 prid = xfs_get_initial_prid(dp);
1169 prid = xfs_get_projid(dp);
1170 else
1171 prid = XFS_PROJID_DEFAULT;
1172 1170
1173 /* 1171 /*
1174 * Make sure that we have allocated dquot(s) on disk. 1172 * Make sure that we have allocated dquot(s) on disk.
@@ -1333,6 +1331,113 @@ xfs_create(
1333} 1331}
1334 1332
1335int 1333int
1334xfs_create_tmpfile(
1335 struct xfs_inode *dp,
1336 struct dentry *dentry,
1337 umode_t mode)
1338{
1339 struct xfs_mount *mp = dp->i_mount;
1340 struct xfs_inode *ip = NULL;
1341 struct xfs_trans *tp = NULL;
1342 int error;
1343 uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1344 prid_t prid;
1345 struct xfs_dquot *udqp = NULL;
1346 struct xfs_dquot *gdqp = NULL;
1347 struct xfs_dquot *pdqp = NULL;
1348 struct xfs_trans_res *tres;
1349 uint resblks;
1350
1351 if (XFS_FORCED_SHUTDOWN(mp))
1352 return XFS_ERROR(EIO);
1353
1354 prid = xfs_get_initial_prid(dp);
1355
1356 /*
1357 * Make sure that we have allocated dquot(s) on disk.
1358 */
1359 error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
1360 xfs_kgid_to_gid(current_fsgid()), prid,
1361 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
1362 &udqp, &gdqp, &pdqp);
1363 if (error)
1364 return error;
1365
1366 resblks = XFS_IALLOC_SPACE_RES(mp);
1367 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
1368
1369 tres = &M_RES(mp)->tr_create_tmpfile;
1370 error = xfs_trans_reserve(tp, tres, resblks, 0);
1371 if (error == ENOSPC) {
1372 /* No space at all so try a "no-allocation" reservation */
1373 resblks = 0;
1374 error = xfs_trans_reserve(tp, tres, 0, 0);
1375 }
1376 if (error) {
1377 cancel_flags = 0;
1378 goto out_trans_cancel;
1379 }
1380
1381 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
1382 pdqp, resblks, 1, 0);
1383 if (error)
1384 goto out_trans_cancel;
1385
1386 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
1387 prid, resblks > 0, &ip, NULL);
1388 if (error) {
1389 if (error == ENOSPC)
1390 goto out_trans_cancel;
1391 goto out_trans_abort;
1392 }
1393
1394 if (mp->m_flags & XFS_MOUNT_WSYNC)
1395 xfs_trans_set_sync(tp);
1396
1397 /*
1398 * Attach the dquot(s) to the inodes and modify them incore.
1399 * These ids of the inode couldn't have changed since the new
1400 * inode has been locked ever since it was created.
1401 */
1402 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1403
1404 ip->i_d.di_nlink--;
1405 d_tmpfile(dentry, VFS_I(ip));
1406 error = xfs_iunlink(tp, ip);
1407 if (error)
1408 goto out_trans_abort;
1409
1410 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1411 if (error)
1412 goto out_release_inode;
1413
1414 xfs_qm_dqrele(udqp);
1415 xfs_qm_dqrele(gdqp);
1416 xfs_qm_dqrele(pdqp);
1417
1418 return 0;
1419
1420 out_trans_abort:
1421 cancel_flags |= XFS_TRANS_ABORT;
1422 out_trans_cancel:
1423 xfs_trans_cancel(tp, cancel_flags);
1424 out_release_inode:
1425 /*
1426 * Wait until after the current transaction is aborted to
1427 * release the inode. This prevents recursive transactions
1428 * and deadlocks from xfs_inactive.
1429 */
1430 if (ip)
1431 IRELE(ip);
1432
1433 xfs_qm_dqrele(udqp);
1434 xfs_qm_dqrele(gdqp);
1435 xfs_qm_dqrele(pdqp);
1436
1437 return error;
1438}
1439
1440int
1336xfs_link( 1441xfs_link(
1337 xfs_inode_t *tdp, 1442 xfs_inode_t *tdp,
1338 xfs_inode_t *sip, 1443 xfs_inode_t *sip,
@@ -1397,6 +1502,12 @@ xfs_link(
1397 1502
1398 xfs_bmap_init(&free_list, &first_block); 1503 xfs_bmap_init(&free_list, &first_block);
1399 1504
1505 if (sip->i_d.di_nlink == 0) {
1506 error = xfs_iunlink_remove(tp, sip);
1507 if (error)
1508 goto abort_return;
1509 }
1510
1400 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1511 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1401 &first_block, &free_list, resblks); 1512 &first_block, &free_list, resblks);
1402 if (error) 1513 if (error)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 65e2350f449c..396cc1fafd0d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,6 +20,7 @@
20 20
21#include "xfs_inode_buf.h" 21#include "xfs_inode_buf.h"
22#include "xfs_inode_fork.h" 22#include "xfs_inode_fork.h"
23#include "xfs_dinode.h"
23 24
24/* 25/*
25 * Kernel only inode definitions 26 * Kernel only inode definitions
@@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip,
192 ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); 193 ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
193} 194}
194 195
196static inline prid_t
197xfs_get_initial_prid(struct xfs_inode *dp)
198{
199 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
200 return xfs_get_projid(dp);
201
202 return XFS_PROJID_DEFAULT;
203}
204
195/* 205/*
196 * In-core inode flags. 206 * In-core inode flags.
197 */ 207 */
@@ -323,6 +333,8 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
323 struct xfs_inode **ipp, struct xfs_name *ci_name); 333 struct xfs_inode **ipp, struct xfs_name *ci_name);
324int xfs_create(struct xfs_inode *dp, struct xfs_name *name, 334int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
325 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); 335 umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
336int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
337 umode_t mode);
326int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 338int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
327 struct xfs_inode *ip); 339 struct xfs_inode *ip);
328int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, 340int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
index 4fc9f39dd89e..24e993996bdc 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -102,8 +102,7 @@ xfs_inode_buf_verify(
102 } 102 }
103 103
104 xfs_buf_ioerror(bp, EFSCORRUPTED); 104 xfs_buf_ioerror(bp, EFSCORRUPTED);
105 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, 105 xfs_verifier_error(bp);
106 mp, dip);
107#ifdef DEBUG 106#ifdef DEBUG
108 xfs_alert(mp, 107 xfs_alert(mp,
109 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 108 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -306,7 +305,7 @@ xfs_dinode_verify(
306 if (!xfs_sb_version_hascrc(&mp->m_sb)) 305 if (!xfs_sb_version_hascrc(&mp->m_sb))
307 return false; 306 return false;
308 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 307 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
309 offsetof(struct xfs_dinode, di_crc))) 308 XFS_DINODE_CRC_OFF))
310 return false; 309 return false;
311 if (be64_to_cpu(dip->di_ino) != ip->i_ino) 310 if (be64_to_cpu(dip->di_ino) != ip->i_ino)
312 return false; 311 return false;
@@ -327,7 +326,7 @@ xfs_dinode_calc_crc(
327 326
328 ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); 327 ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
329 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, 328 crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
330 offsetof(struct xfs_dinode, di_crc)); 329 XFS_DINODE_CRC_OFF);
331 dip->di_crc = xfs_end_cksum(crc); 330 dip->di_crc = xfs_end_cksum(crc);
332} 331}
333 332
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 22d1cbea283d..3b80ebae05f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -128,7 +128,6 @@ xfs_iomap_write_direct(
128 xfs_fsblock_t firstfsb; 128 xfs_fsblock_t firstfsb;
129 xfs_extlen_t extsz, temp; 129 xfs_extlen_t extsz, temp;
130 int nimaps; 130 int nimaps;
131 int bmapi_flag;
132 int quota_flag; 131 int quota_flag;
133 int rt; 132 int rt;
134 xfs_trans_t *tp; 133 xfs_trans_t *tp;
@@ -200,18 +199,15 @@ xfs_iomap_write_direct(
200 199
201 xfs_trans_ijoin(tp, ip, 0); 200 xfs_trans_ijoin(tp, ip, 0);
202 201
203 bmapi_flag = 0;
204 if (offset < XFS_ISIZE(ip) || extsz)
205 bmapi_flag |= XFS_BMAPI_PREALLOC;
206
207 /* 202 /*
208 * From this point onwards we overwrite the imap pointer that the 203 * From this point onwards we overwrite the imap pointer that the
209 * caller gave to us. 204 * caller gave to us.
210 */ 205 */
211 xfs_bmap_init(&free_list, &firstfsb); 206 xfs_bmap_init(&free_list, &firstfsb);
212 nimaps = 1; 207 nimaps = 1;
213 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, 208 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
214 &firstfsb, 0, imap, &nimaps, &free_list); 209 XFS_BMAPI_PREALLOC, &firstfsb, 0,
210 imap, &nimaps, &free_list);
215 if (error) 211 if (error)
216 goto out_bmap_cancel; 212 goto out_bmap_cancel;
217 213
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ddfb8190ca1..89b07e43ca28 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -39,6 +39,7 @@
39#include "xfs_da_btree.h" 39#include "xfs_da_btree.h"
40#include "xfs_dir2_priv.h" 40#include "xfs_dir2_priv.h"
41#include "xfs_dinode.h" 41#include "xfs_dinode.h"
42#include "xfs_trans_space.h"
42 43
43#include <linux/capability.h> 44#include <linux/capability.h>
44#include <linux/xattr.h> 45#include <linux/xattr.h>
@@ -48,6 +49,18 @@
48#include <linux/fiemap.h> 49#include <linux/fiemap.h>
49#include <linux/slab.h> 50#include <linux/slab.h>
50 51
52/*
53 * Directories have different lock order w.r.t. mmap_sem compared to regular
54 * files. This is due to readdir potentially triggering page faults on a user
55 * buffer inside filldir(), and this happens with the ilock on the directory
56 * held. For regular files, the lock order is the other way around - the
57 * mmap_sem is taken during the page fault, and then we lock the ilock to do
58 * block mapping. Hence we need a different class for the directory ilock so
59 * that lockdep can tell them apart.
60 */
61static struct lock_class_key xfs_nondir_ilock_class;
62static struct lock_class_key xfs_dir_ilock_class;
63
51static int 64static int
52xfs_initxattrs( 65xfs_initxattrs(
53 struct inode *inode, 66 struct inode *inode,
@@ -1034,6 +1047,19 @@ xfs_vn_fiemap(
1034 return 0; 1047 return 0;
1035} 1048}
1036 1049
1050STATIC int
1051xfs_vn_tmpfile(
1052 struct inode *dir,
1053 struct dentry *dentry,
1054 umode_t mode)
1055{
1056 int error;
1057
1058 error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
1059
1060 return -error;
1061}
1062
1037static const struct inode_operations xfs_inode_operations = { 1063static const struct inode_operations xfs_inode_operations = {
1038 .get_acl = xfs_get_acl, 1064 .get_acl = xfs_get_acl,
1039 .set_acl = xfs_set_acl, 1065 .set_acl = xfs_set_acl,
@@ -1072,6 +1098,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
1072 .removexattr = generic_removexattr, 1098 .removexattr = generic_removexattr,
1073 .listxattr = xfs_vn_listxattr, 1099 .listxattr = xfs_vn_listxattr,
1074 .update_time = xfs_vn_update_time, 1100 .update_time = xfs_vn_update_time,
1101 .tmpfile = xfs_vn_tmpfile,
1075}; 1102};
1076 1103
1077static const struct inode_operations xfs_dir_ci_inode_operations = { 1104static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1099,6 +1126,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
1099 .removexattr = generic_removexattr, 1126 .removexattr = generic_removexattr,
1100 .listxattr = xfs_vn_listxattr, 1127 .listxattr = xfs_vn_listxattr,
1101 .update_time = xfs_vn_update_time, 1128 .update_time = xfs_vn_update_time,
1129 .tmpfile = xfs_vn_tmpfile,
1102}; 1130};
1103 1131
1104static const struct inode_operations xfs_symlink_inode_operations = { 1132static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1191,6 +1219,7 @@ xfs_setup_inode(
1191 xfs_diflags_to_iflags(inode, ip); 1219 xfs_diflags_to_iflags(inode, ip);
1192 1220
1193 ip->d_ops = ip->i_mount->m_nondir_inode_ops; 1221 ip->d_ops = ip->i_mount->m_nondir_inode_ops;
1222 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1194 switch (inode->i_mode & S_IFMT) { 1223 switch (inode->i_mode & S_IFMT) {
1195 case S_IFREG: 1224 case S_IFREG:
1196 inode->i_op = &xfs_inode_operations; 1225 inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1227,7 @@ xfs_setup_inode(
1198 inode->i_mapping->a_ops = &xfs_address_space_operations; 1227 inode->i_mapping->a_ops = &xfs_address_space_operations;
1199 break; 1228 break;
1200 case S_IFDIR: 1229 case S_IFDIR:
1230 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1201 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) 1231 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
1202 inode->i_op = &xfs_dir_ci_inode_operations; 1232 inode->i_op = &xfs_dir_ci_inode_operations;
1203 else 1233 else
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index f9bb590acc0e..825249d2dfc1 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t;
119#include "xfs_iops.h" 119#include "xfs_iops.h"
120#include "xfs_aops.h" 120#include "xfs_aops.h"
121#include "xfs_super.h" 121#include "xfs_super.h"
122#include "xfs_cksum.h"
122#include "xfs_buf.h" 123#include "xfs_buf.h"
123#include "xfs_message.h" 124#include "xfs_message.h"
124 125
@@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t;
178#define ENOATTR ENODATA /* Attribute not found */ 179#define ENOATTR ENODATA /* Attribute not found */
179#define EWRONGFS EINVAL /* Mount with wrong filesystem type */ 180#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
180#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ 181#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
182#define EFSBADCRC EBADMSG /* Bad CRC detected */
181 183
182#define SYNCHRONIZE() barrier() 184#define SYNCHRONIZE() barrier()
183#define __return_address __builtin_return_address(0) 185#define __return_address __builtin_return_address(0)
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index b0f4ef77fa70..2c4004475e71 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -175,7 +175,7 @@ void xlog_iodone(struct xfs_buf *);
175struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 175struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
176void xfs_log_ticket_put(struct xlog_ticket *ticket); 176void xfs_log_ticket_put(struct xlog_ticket *ticket);
177 177
178int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 178void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
179 xfs_lsn_t *commit_lsn, int flags); 179 xfs_lsn_t *commit_lsn, int flags);
180bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 180bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
181 181
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 4ef6fdbced78..7e5455391176 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -499,13 +499,6 @@ xlog_cil_push(
499 cil->xc_ctx = new_ctx; 499 cil->xc_ctx = new_ctx;
500 500
501 /* 501 /*
502 * mirror the new sequence into the cil structure so that we can do
503 * unlocked checks against the current sequence in log forces without
504 * risking deferencing a freed context pointer.
505 */
506 cil->xc_current_sequence = new_ctx->sequence;
507
508 /*
509 * The switch is now done, so we can drop the context lock and move out 502 * The switch is now done, so we can drop the context lock and move out
510 * of a shared context. We can't just go straight to the commit record, 503 * of a shared context. We can't just go straight to the commit record,
511 * though - we need to synchronise with previous and future commits so 504 * though - we need to synchronise with previous and future commits so
@@ -523,8 +516,15 @@ xlog_cil_push(
523 * Hence we need to add this context to the committing context list so 516 * Hence we need to add this context to the committing context list so
524 * that higher sequences will wait for us to write out a commit record 517 * that higher sequences will wait for us to write out a commit record
525 * before they do. 518 * before they do.
519 *
520 * xfs_log_force_lsn requires us to mirror the new sequence into the cil
521 * structure atomically with the addition of this sequence to the
522 * committing list. This also ensures that we can do unlocked checks
523 * against the current sequence in log forces without risking
524 * deferencing a freed context pointer.
526 */ 525 */
527 spin_lock(&cil->xc_push_lock); 526 spin_lock(&cil->xc_push_lock);
527 cil->xc_current_sequence = new_ctx->sequence;
528 list_add(&ctx->committing, &cil->xc_committing); 528 list_add(&ctx->committing, &cil->xc_committing);
529 spin_unlock(&cil->xc_push_lock); 529 spin_unlock(&cil->xc_push_lock);
530 up_write(&cil->xc_ctx_lock); 530 up_write(&cil->xc_ctx_lock);
@@ -662,8 +662,14 @@ xlog_cil_push_background(
662 662
663} 663}
664 664
665/*
666 * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
667 * number that is passed. When it returns, the work will be queued for
668 * @push_seq, but it won't be completed. The caller is expected to do any
669 * waiting for push_seq to complete if it is required.
670 */
665static void 671static void
666xlog_cil_push_foreground( 672xlog_cil_push_now(
667 struct xlog *log, 673 struct xlog *log,
668 xfs_lsn_t push_seq) 674 xfs_lsn_t push_seq)
669{ 675{
@@ -688,10 +694,8 @@ xlog_cil_push_foreground(
688 } 694 }
689 695
690 cil->xc_push_seq = push_seq; 696 cil->xc_push_seq = push_seq;
697 queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
691 spin_unlock(&cil->xc_push_lock); 698 spin_unlock(&cil->xc_push_lock);
692
693 /* do the push now */
694 xlog_cil_push(log);
695} 699}
696 700
697bool 701bool
@@ -721,7 +725,7 @@ xlog_cil_empty(
721 * background commit, returns without it held once background commits are 725 * background commit, returns without it held once background commits are
722 * allowed again. 726 * allowed again.
723 */ 727 */
724int 728void
725xfs_log_commit_cil( 729xfs_log_commit_cil(
726 struct xfs_mount *mp, 730 struct xfs_mount *mp,
727 struct xfs_trans *tp, 731 struct xfs_trans *tp,
@@ -767,7 +771,6 @@ xfs_log_commit_cil(
767 xlog_cil_push_background(log); 771 xlog_cil_push_background(log);
768 772
769 up_read(&cil->xc_ctx_lock); 773 up_read(&cil->xc_ctx_lock);
770 return 0;
771} 774}
772 775
773/* 776/*
@@ -796,7 +799,8 @@ xlog_cil_force_lsn(
796 * xlog_cil_push() handles racing pushes for the same sequence, 799 * xlog_cil_push() handles racing pushes for the same sequence,
797 * so no need to deal with it here. 800 * so no need to deal with it here.
798 */ 801 */
799 xlog_cil_push_foreground(log, sequence); 802restart:
803 xlog_cil_push_now(log, sequence);
800 804
801 /* 805 /*
802 * See if we can find a previous sequence still committing. 806 * See if we can find a previous sequence still committing.
@@ -804,7 +808,6 @@ xlog_cil_force_lsn(
804 * before allowing the force of push_seq to go ahead. Hence block 808 * before allowing the force of push_seq to go ahead. Hence block
805 * on commits for those as well. 809 * on commits for those as well.
806 */ 810 */
807restart:
808 spin_lock(&cil->xc_push_lock); 811 spin_lock(&cil->xc_push_lock);
809 list_for_each_entry(ctx, &cil->xc_committing, committing) { 812 list_for_each_entry(ctx, &cil->xc_committing, committing) {
810 if (ctx->sequence > sequence) 813 if (ctx->sequence > sequence)
@@ -822,6 +825,28 @@ restart:
822 /* found it! */ 825 /* found it! */
823 commit_lsn = ctx->commit_lsn; 826 commit_lsn = ctx->commit_lsn;
824 } 827 }
828
829 /*
830 * The call to xlog_cil_push_now() executes the push in the background.
831 * Hence by the time we have got here it our sequence may not have been
832 * pushed yet. This is true if the current sequence still matches the
833 * push sequence after the above wait loop and the CIL still contains
834 * dirty objects.
835 *
836 * When the push occurs, it will empty the CIL and
837 * atomically increment the currect sequence past the push sequence and
838 * move it into the committing list. Of course, if the CIL is clean at
839 * the time of the push, it won't have pushed the CIL at all, so in that
840 * case we should try the push for this sequence again from the start
841 * just in case.
842 */
843
844 if (sequence == cil->xc_current_sequence &&
845 !list_empty(&cil->xc_cil)) {
846 spin_unlock(&cil->xc_push_lock);
847 goto restart;
848 }
849
825 spin_unlock(&cil->xc_push_lock); 850 spin_unlock(&cil->xc_push_lock);
826 return commit_lsn; 851 return commit_lsn;
827} 852}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f96c05669a9e..993cb19e7d39 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -314,6 +314,9 @@ reread:
314 error = bp->b_error; 314 error = bp->b_error;
315 if (loud) 315 if (loud)
316 xfs_warn(mp, "SB validate failed with error %d.", error); 316 xfs_warn(mp, "SB validate failed with error %d.", error);
317 /* bad CRC means corrupted metadata */
318 if (error == EFSBADCRC)
319 error = EFSCORRUPTED;
317 goto release_buf; 320 goto release_buf;
318 } 321 }
319 322
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a6a76b2b6a85..ec5ca65c6211 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -842,7 +842,7 @@ xfs_growfs_rt_alloc(
842 /* 842 /*
843 * Reserve space & log for one extent added to the file. 843 * Reserve space & log for one extent added to the file.
844 */ 844 */
845 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, 845 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
846 resblks, 0); 846 resblks, 0);
847 if (error) 847 if (error)
848 goto error_cancel; 848 goto error_cancel;
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 1e116794bb66..0c0e41bbe4e3 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -288,6 +288,7 @@ xfs_mount_validate_sb(
288 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 288 sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
289 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 289 sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
290 sbp->sb_inodesize != (1 << sbp->sb_inodelog) || 290 sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
291 sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
291 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 292 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
292 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 293 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
293 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 294 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
@@ -610,12 +611,11 @@ xfs_sb_read_verify(
610 XFS_SB_VERSION_5) || 611 XFS_SB_VERSION_5) ||
611 dsb->sb_crc != 0)) { 612 dsb->sb_crc != 0)) {
612 613
613 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 614 if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
614 offsetof(struct xfs_sb, sb_crc))) {
615 /* Only fail bad secondaries on a known V5 filesystem */ 615 /* Only fail bad secondaries on a known V5 filesystem */
616 if (bp->b_bn == XFS_SB_DADDR || 616 if (bp->b_bn == XFS_SB_DADDR ||
617 xfs_sb_version_hascrc(&mp->m_sb)) { 617 xfs_sb_version_hascrc(&mp->m_sb)) {
618 error = EFSCORRUPTED; 618 error = EFSBADCRC;
619 goto out_error; 619 goto out_error;
620 } 620 }
621 } 621 }
@@ -624,10 +624,9 @@ xfs_sb_read_verify(
624 624
625out_error: 625out_error:
626 if (error) { 626 if (error) {
627 if (error == EFSCORRUPTED)
628 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
629 mp, bp->b_addr);
630 xfs_buf_ioerror(bp, error); 627 xfs_buf_ioerror(bp, error);
628 if (error == EFSCORRUPTED || error == EFSBADCRC)
629 xfs_verifier_error(bp);
631 } 630 }
632} 631}
633 632
@@ -662,9 +661,8 @@ xfs_sb_write_verify(
662 661
663 error = xfs_sb_verify(bp, false); 662 error = xfs_sb_verify(bp, false);
664 if (error) { 663 if (error) {
665 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
666 mp, bp->b_addr);
667 xfs_buf_ioerror(bp, error); 664 xfs_buf_ioerror(bp, error);
665 xfs_verifier_error(bp);
668 return; 666 return;
669 } 667 }
670 668
@@ -674,8 +672,7 @@ xfs_sb_write_verify(
674 if (bip) 672 if (bip)
675 XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 673 XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
676 674
677 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 675 xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
678 offsetof(struct xfs_sb, sb_crc));
679} 676}
680 677
681const struct xfs_buf_ops xfs_sb_buf_ops = { 678const struct xfs_buf_ops xfs_sb_buf_ops = {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 35061d4b614c..f7b2fe77c5a5 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -182,6 +182,8 @@ typedef struct xfs_sb {
182 /* must be padded to 64 bit alignment */ 182 /* must be padded to 64 bit alignment */
183} xfs_sb_t; 183} xfs_sb_t;
184 184
185#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
186
185/* 187/*
186 * Superblock - on disk version. Must match the in core version above. 188 * Superblock - on disk version. Must match the in core version above.
187 * Must be padded to 64 bit alignment. 189 * Must be padded to 64 bit alignment.
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
index 8c5035a13df1..4484e5151395 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h
@@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
104#define XFS_TRANS_SB_COUNT 41 104#define XFS_TRANS_SB_COUNT 41
105#define XFS_TRANS_CHECKPOINT 42 105#define XFS_TRANS_CHECKPOINT 42
106#define XFS_TRANS_ICREATE 43 106#define XFS_TRANS_ICREATE 43
107#define XFS_TRANS_TYPE_MAX 43 107#define XFS_TRANS_CREATE_TMPFILE 44
108#define XFS_TRANS_TYPE_MAX 44
108/* new transaction types need to be reflected in xfs_logprint(8) */ 109/* new transaction types need to be reflected in xfs_logprint(8) */
109 110
110#define XFS_TRANS_TYPES \ 111#define XFS_TRANS_TYPES \
@@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
112 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ 113 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
113 { XFS_TRANS_INACTIVE, "INACTIVE" }, \ 114 { XFS_TRANS_INACTIVE, "INACTIVE" }, \
114 { XFS_TRANS_CREATE, "CREATE" }, \ 115 { XFS_TRANS_CREATE, "CREATE" }, \
116 { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
115 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ 117 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
116 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ 118 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
117 { XFS_TRANS_REMOVE, "REMOVE" }, \ 119 { XFS_TRANS_REMOVE, "REMOVE" }, \
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 14e58f2c96bd..52979aa90986 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -80,6 +80,10 @@ xfs_readlink_bmap(
80 if (error) { 80 if (error) {
81 xfs_buf_ioerror_alert(bp, __func__); 81 xfs_buf_ioerror_alert(bp, __func__);
82 xfs_buf_relse(bp); 82 xfs_buf_relse(bp);
83
84 /* bad CRC means corrupted metadata */
85 if (error == EFSBADCRC)
86 error = EFSCORRUPTED;
83 goto out; 87 goto out;
84 } 88 }
85 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); 89 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -208,10 +212,7 @@ xfs_symlink(
208 return XFS_ERROR(ENAMETOOLONG); 212 return XFS_ERROR(ENAMETOOLONG);
209 213
210 udqp = gdqp = NULL; 214 udqp = gdqp = NULL;
211 if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) 215 prid = xfs_get_initial_prid(dp);
212 prid = xfs_get_projid(dp);
213 else
214 prid = XFS_PROJID_DEFAULT;
215 216
216 /* 217 /*
217 * Make sure that we have allocated dquot(s) on disk. 218 * Make sure that we have allocated dquot(s) on disk.
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index bf59a2b45f8c..9b32052ff65e 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -133,12 +133,13 @@ xfs_symlink_read_verify(
133 if (!xfs_sb_version_hascrc(&mp->m_sb)) 133 if (!xfs_sb_version_hascrc(&mp->m_sb))
134 return; 134 return;
135 135
136 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 136 if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
137 offsetof(struct xfs_dsymlink_hdr, sl_crc)) || 137 xfs_buf_ioerror(bp, EFSBADCRC);
138 !xfs_symlink_verify(bp)) { 138 else if (!xfs_symlink_verify(bp))
139 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
140 xfs_buf_ioerror(bp, EFSCORRUPTED); 139 xfs_buf_ioerror(bp, EFSCORRUPTED);
141 } 140
141 if (bp->b_error)
142 xfs_verifier_error(bp);
142} 143}
143 144
144static void 145static void
@@ -153,8 +154,8 @@ xfs_symlink_write_verify(
153 return; 154 return;
154 155
155 if (!xfs_symlink_verify(bp)) { 156 if (!xfs_symlink_verify(bp)) {
156 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
157 xfs_buf_ioerror(bp, EFSCORRUPTED); 157 xfs_buf_ioerror(bp, EFSCORRUPTED);
158 xfs_verifier_error(bp);
158 return; 159 return;
159 } 160 }
160 161
@@ -162,8 +163,7 @@ xfs_symlink_write_verify(
162 struct xfs_dsymlink_hdr *dsl = bp->b_addr; 163 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
163 dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 164 dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
164 } 165 }
165 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 166 xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
166 offsetof(struct xfs_dsymlink_hdr, sl_crc));
167} 167}
168 168
169const struct xfs_buf_ops xfs_symlink_buf_ops = { 169const struct xfs_buf_ops xfs_symlink_buf_ops = {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa45b9a0..a4ae41c179a8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
603DEFINE_INODE_EVENT(xfs_inactive_symlink); 603DEFINE_INODE_EVENT(xfs_inactive_symlink);
604DEFINE_INODE_EVENT(xfs_alloc_file_space); 604DEFINE_INODE_EVENT(xfs_alloc_file_space);
605DEFINE_INODE_EVENT(xfs_free_file_space); 605DEFINE_INODE_EVENT(xfs_free_file_space);
606DEFINE_INODE_EVENT(xfs_collapse_file_space);
606DEFINE_INODE_EVENT(xfs_readdir); 607DEFINE_INODE_EVENT(xfs_readdir);
607#ifdef CONFIG_XFS_POSIX_ACL 608#ifdef CONFIG_XFS_POSIX_ACL
608DEFINE_INODE_EVENT(xfs_get_acl); 609DEFINE_INODE_EVENT(xfs_get_acl);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c812c5c060de..54a57326d85b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -887,12 +887,7 @@ xfs_trans_commit(
887 xfs_trans_apply_sb_deltas(tp); 887 xfs_trans_apply_sb_deltas(tp);
888 xfs_trans_apply_dquot_deltas(tp); 888 xfs_trans_apply_dquot_deltas(tp);
889 889
890 error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); 890 xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
891 if (error == ENOMEM) {
892 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
893 error = XFS_ERROR(EIO);
894 goto out_unreserve;
895 }
896 891
897 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 892 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
898 xfs_trans_free(tp); 893 xfs_trans_free(tp);
@@ -902,10 +897,7 @@ xfs_trans_commit(
902 * log out now and wait for it. 897 * log out now and wait for it.
903 */ 898 */
904 if (sync) { 899 if (sync) {
905 if (!error) { 900 error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
906 error = _xfs_log_force_lsn(mp, commit_lsn,
907 XFS_LOG_SYNC, NULL);
908 }
909 XFS_STATS_INC(xs_trans_sync); 901 XFS_STATS_INC(xs_trans_sync);
910 } else { 902 } else {
911 XFS_STATS_INC(xs_trans_async); 903 XFS_STATS_INC(xs_trans_async);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 647b6f1d8923..b8eef0549f3f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -275,6 +275,10 @@ xfs_trans_read_buf_map(
275 XFS_BUF_UNDONE(bp); 275 XFS_BUF_UNDONE(bp);
276 xfs_buf_stale(bp); 276 xfs_buf_stale(bp);
277 xfs_buf_relse(bp); 277 xfs_buf_relse(bp);
278
279 /* bad CRC means corrupted metadata */
280 if (error == EFSBADCRC)
281 error = EFSCORRUPTED;
278 return error; 282 return error;
279 } 283 }
280#ifdef DEBUG 284#ifdef DEBUG
@@ -338,6 +342,9 @@ xfs_trans_read_buf_map(
338 if (tp->t_flags & XFS_TRANS_DIRTY) 342 if (tp->t_flags & XFS_TRANS_DIRTY)
339 xfs_force_shutdown(tp->t_mountp, 343 xfs_force_shutdown(tp->t_mountp,
340 SHUTDOWN_META_IO_ERROR); 344 SHUTDOWN_META_IO_ERROR);
345 /* bad CRC means corrupted metadata */
346 if (error == EFSBADCRC)
347 error = EFSCORRUPTED;
341 return error; 348 return error;
342 } 349 }
343 } 350 }
@@ -375,6 +382,10 @@ xfs_trans_read_buf_map(
375 if (tp->t_flags & XFS_TRANS_DIRTY) 382 if (tp->t_flags & XFS_TRANS_DIRTY)
376 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); 383 xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
377 xfs_buf_relse(bp); 384 xfs_buf_relse(bp);
385
386 /* bad CRC means corrupted metadata */
387 if (error == EFSBADCRC)
388 error = EFSCORRUPTED;
378 return error; 389 return error;
379 } 390 }
380#ifdef DEBUG 391#ifdef DEBUG
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index 2ffd3e331b49..ae368165244d 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -81,20 +81,28 @@ xfs_calc_buf_res(
81 * on disk. Hence we need an inode reservation function that calculates all this 81 * on disk. Hence we need an inode reservation function that calculates all this
82 * correctly. So, we log: 82 * correctly. So, we log:
83 * 83 *
84 * - log op headers for object 84 * - 4 log op headers for object
85 * - for the ilf, the inode core and 2 forks
85 * - inode log format object 86 * - inode log format object
86 * - the entire inode contents (core + 2 forks) 87 * - the inode core
87 * - two bmap btree block headers 88 * - two inode forks containing bmap btree root blocks.
89 * - the btree data contained by both forks will fit into the inode size,
90 * hence when combined with the inode core above, we have a total of the
91 * actual inode size.
92 * - the BMBT headers need to be accounted separately, as they are
93 * additional to the records and pointers that fit inside the inode
94 * forks.
88 */ 95 */
89STATIC uint 96STATIC uint
90xfs_calc_inode_res( 97xfs_calc_inode_res(
91 struct xfs_mount *mp, 98 struct xfs_mount *mp,
92 uint ninodes) 99 uint ninodes)
93{ 100{
94 return ninodes * (sizeof(struct xlog_op_header) + 101 return ninodes *
95 sizeof(struct xfs_inode_log_format) + 102 (4 * sizeof(struct xlog_op_header) +
96 mp->m_sb.sb_inodesize + 103 sizeof(struct xfs_inode_log_format) +
97 2 * XFS_BMBT_BLOCK_LEN(mp)); 104 mp->m_sb.sb_inodesize +
105 2 * XFS_BMBT_BLOCK_LEN(mp));
98} 106}
99 107
100/* 108/*
@@ -204,6 +212,19 @@ xfs_calc_rename_reservation(
204} 212}
205 213
206/* 214/*
215 * For removing an inode from unlinked list at first, we can modify:
216 * the agi hash list and counters: sector size
217 * the on disk inode before ours in the agi hash list: inode cluster size
218 */
219STATIC uint
220xfs_calc_iunlink_remove_reservation(
221 struct xfs_mount *mp)
222{
223 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
224 max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
225}
226
227/*
207 * For creating a link to an inode: 228 * For creating a link to an inode:
208 * the parent directory inode: inode size 229 * the parent directory inode: inode size
209 * the linked inode: inode size 230 * the linked inode: inode size
@@ -220,6 +241,7 @@ xfs_calc_link_reservation(
220 struct xfs_mount *mp) 241 struct xfs_mount *mp)
221{ 242{
222 return XFS_DQUOT_LOGRES(mp) + 243 return XFS_DQUOT_LOGRES(mp) +
244 xfs_calc_iunlink_remove_reservation(mp) +
223 MAX((xfs_calc_inode_res(mp, 2) + 245 MAX((xfs_calc_inode_res(mp, 2) +
224 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 246 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
225 XFS_FSB_TO_B(mp, 1))), 247 XFS_FSB_TO_B(mp, 1))),
@@ -229,6 +251,18 @@ xfs_calc_link_reservation(
229} 251}
230 252
231/* 253/*
254 * For adding an inode to unlinked list we can modify:
255 * the agi hash list: sector size
256 * the unlinked inode: inode size
257 */
258STATIC uint
259xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
260{
261 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
262 xfs_calc_inode_res(mp, 1);
263}
264
265/*
232 * For removing a directory entry we can modify: 266 * For removing a directory entry we can modify:
233 * the parent directory inode: inode size 267 * the parent directory inode: inode size
234 * the removed inode: inode size 268 * the removed inode: inode size
@@ -245,10 +279,11 @@ xfs_calc_remove_reservation(
245 struct xfs_mount *mp) 279 struct xfs_mount *mp)
246{ 280{
247 return XFS_DQUOT_LOGRES(mp) + 281 return XFS_DQUOT_LOGRES(mp) +
248 MAX((xfs_calc_inode_res(mp, 2) + 282 xfs_calc_iunlink_add_reservation(mp) +
283 MAX((xfs_calc_inode_res(mp, 1) +
249 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), 284 xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
250 XFS_FSB_TO_B(mp, 1))), 285 XFS_FSB_TO_B(mp, 1))),
251 (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + 286 (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
252 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), 287 xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
253 XFS_FSB_TO_B(mp, 1)))); 288 XFS_FSB_TO_B(mp, 1))));
254} 289}
@@ -343,6 +378,20 @@ xfs_calc_create_reservation(
343 378
344} 379}
345 380
381STATIC uint
382xfs_calc_create_tmpfile_reservation(
383 struct xfs_mount *mp)
384{
385 uint res = XFS_DQUOT_LOGRES(mp);
386
387 if (xfs_sb_version_hascrc(&mp->m_sb))
388 res += xfs_calc_icreate_resv_alloc(mp);
389 else
390 res += xfs_calc_create_resv_alloc(mp);
391
392 return res + xfs_calc_iunlink_add_reservation(mp);
393}
394
346/* 395/*
347 * Making a new directory is the same as creating a new file. 396 * Making a new directory is the same as creating a new file.
348 */ 397 */
@@ -383,9 +432,9 @@ xfs_calc_ifree_reservation(
383{ 432{
384 return XFS_DQUOT_LOGRES(mp) + 433 return XFS_DQUOT_LOGRES(mp) +
385 xfs_calc_inode_res(mp, 1) + 434 xfs_calc_inode_res(mp, 1) +
386 xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + 435 xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
387 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + 436 xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
388 max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) + 437 xfs_calc_iunlink_remove_reservation(mp) +
389 xfs_calc_buf_res(1, 0) + 438 xfs_calc_buf_res(1, 0) +
390 xfs_calc_buf_res(2 + mp->m_ialloc_blks + 439 xfs_calc_buf_res(2 + mp->m_ialloc_blks +
391 mp->m_in_maxlevels, 0) + 440 mp->m_in_maxlevels, 0) +
@@ -644,15 +693,14 @@ xfs_calc_qm_setqlim_reservation(
644 693
645/* 694/*
646 * Allocating quota on disk if needed. 695 * Allocating quota on disk if needed.
647 * the write transaction log space: M_RES(mp)->tr_write.tr_logres 696 * the write transaction log space for quota file extent allocation
648 * the unit of quota allocation: one system block size 697 * the unit of quota allocation: one system block size
649 */ 698 */
650STATIC uint 699STATIC uint
651xfs_calc_qm_dqalloc_reservation( 700xfs_calc_qm_dqalloc_reservation(
652 struct xfs_mount *mp) 701 struct xfs_mount *mp)
653{ 702{
654 ASSERT(M_RES(mp)->tr_write.tr_logres); 703 return xfs_calc_write_reservation(mp) +
655 return M_RES(mp)->tr_write.tr_logres +
656 xfs_calc_buf_res(1, 704 xfs_calc_buf_res(1,
657 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); 705 XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
658} 706}
@@ -729,6 +777,11 @@ xfs_trans_resv_calc(
729 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; 777 resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
730 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 778 resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
731 779
780 resp->tr_create_tmpfile.tr_logres =
781 xfs_calc_create_tmpfile_reservation(mp);
782 resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
783 resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
784
732 resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); 785 resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
733 resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; 786 resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
734 resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; 787 resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -784,7 +837,6 @@ xfs_trans_resv_calc(
784 /* The following transaction are logged in logical format */ 837 /* The following transaction are logged in logical format */
785 resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); 838 resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
786 resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); 839 resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
787 resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
788 resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); 840 resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
789 resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); 841 resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
790 resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); 842 resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h
index de7de9aaad8a..1097d14cd583 100644
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/xfs_trans_resv.h
@@ -38,11 +38,11 @@ struct xfs_trans_resv {
38 struct xfs_trans_res tr_remove; /* unlink trans */ 38 struct xfs_trans_res tr_remove; /* unlink trans */
39 struct xfs_trans_res tr_symlink; /* symlink trans */ 39 struct xfs_trans_res tr_symlink; /* symlink trans */
40 struct xfs_trans_res tr_create; /* create trans */ 40 struct xfs_trans_res tr_create; /* create trans */
41 struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */
41 struct xfs_trans_res tr_mkdir; /* mkdir trans */ 42 struct xfs_trans_res tr_mkdir; /* mkdir trans */
42 struct xfs_trans_res tr_ifree; /* inode free trans */ 43 struct xfs_trans_res tr_ifree; /* inode free trans */
43 struct xfs_trans_res tr_ichange; /* inode update trans */ 44 struct xfs_trans_res tr_ichange; /* inode update trans */
44 struct xfs_trans_res tr_growdata; /* fs data section grow trans */ 45 struct xfs_trans_res tr_growdata; /* fs data section grow trans */
45 struct xfs_trans_res tr_swrite; /* sync write inode trans */
46 struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ 46 struct xfs_trans_res tr_addafork; /* add inode attr fork trans */
47 struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ 47 struct xfs_trans_res tr_writeid; /* write setuid/setgid file */
48 struct xfs_trans_res tr_attrinval; /* attr fork buffer 48 struct xfs_trans_res tr_attrinval; /* attr fork buffer
@@ -100,6 +100,7 @@ struct xfs_trans_resv {
100#define XFS_ITRUNCATE_LOG_COUNT 2 100#define XFS_ITRUNCATE_LOG_COUNT 2
101#define XFS_INACTIVE_LOG_COUNT 2 101#define XFS_INACTIVE_LOG_COUNT 2
102#define XFS_CREATE_LOG_COUNT 2 102#define XFS_CREATE_LOG_COUNT 2
103#define XFS_CREATE_TMPFILE_LOG_COUNT 2
103#define XFS_MKDIR_LOG_COUNT 3 104#define XFS_MKDIR_LOG_COUNT 3
104#define XFS_SYMLINK_LOG_COUNT 3 105#define XFS_SYMLINK_LOG_COUNT 3
105#define XFS_REMOVE_LOG_COUNT 2 106#define XFS_REMOVE_LOG_COUNT 2
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ea80f1cdff06..81048f9bc783 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2550,6 +2550,9 @@ enum {
2550 2550
2551 /* filesystem does not support filling holes */ 2551 /* filesystem does not support filling holes */
2552 DIO_SKIP_HOLES = 0x02, 2552 DIO_SKIP_HOLES = 0x02,
2553
2554 /* filesystem can handle aio writes beyond i_size */
2555 DIO_ASYNC_EXTEND = 0x04,
2553}; 2556};
2554 2557
2555void dio_end_io(struct bio *bio, int error); 2558void dio_end_io(struct bio *bio, int error);
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 990c4ccf8b61..d1197ae3723c 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -5,5 +5,40 @@
5#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ 5#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
6#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */ 6#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */
7 7
8/*
9 * FALLOC_FL_COLLAPSE_RANGE is used to remove a range of a file
10 * without leaving a hole in the file. The contents of the file beyond
11 * the range being removed is appended to the start offset of the range
12 * being removed (i.e. the hole that was punched is "collapsed"),
13 * resulting in a file layout that looks like the range that was
14 * removed never existed. As such collapsing a range of a file changes
15 * the size of the file, reducing it by the same length of the range
16 * that has been removed by the operation.
17 *
18 * Different filesystems may implement different limitations on the
19 * granularity of the operation. Most will limit operations to
20 * filesystem block size boundaries, but this boundary may be larger or
21 * smaller depending on the filesystem and/or the configuration of the
22 * filesystem or file.
23 *
24 * Attempting to collapse a range that crosses the end of the file is
25 * considered an illegal operation - just use ftruncate(2) if you need
26 * to collapse a range that crosses EOF.
27 */
28#define FALLOC_FL_COLLAPSE_RANGE 0x08
29
30/*
31 * FALLOC_FL_ZERO_RANGE is used to convert a range of file to zeros preferably
32 * without issuing data IO. Blocks should be preallocated for the regions that
33 * span holes in the file, and the entire range is preferable converted to
34 * unwritten extents - even though file system may choose to zero out the
35 * extent or do whatever which will result in reading zeros from the range
36 * while the range remains allocated for the file.
37 *
38 * This can be also used to preallocate blocks past EOF in the same way as
39 * with fallocate. Flag FALLOC_FL_KEEP_SIZE should cause the inode
40 * size to remain the same.
41 */
42#define FALLOC_FL_ZERO_RANGE 0x10
8 43
9#endif /* _UAPI_FALLOC_H_ */ 44#endif /* _UAPI_FALLOC_H_ */