aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-26 13:13:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-26 13:13:40 -0400
commit0b9210c9c86e46a7a62bbc7b69b84001315072ff (patch)
tree0a0872c6b998c6fa3de29f1929be025f6060e749
parentc5436731de860b3a3cff70c62d99242418aab1d1 (diff)
parent555b67e4e729ca544bb4028ab12e532c68b70ddb (diff)
Merge tag 'xfs-for-linus-4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs updates from Dave Chinner: "A pretty average collection of fixes, cleanups and improvements in this request. Summary: - fixes for mount line parsing, sparse warnings, read-only compat feature remount behaviour - allow fast path symlink lookups for inline symlinks. - attribute listing cleanups - writeback goes direct to bios rather than indirecting through bufferheads - transaction allocation cleanup - optimised kmem_realloc - added configurable error handling for metadata write errors, changed default error handling behaviour from "retry forever" to "retry until unmount then fail" - fixed several inode cluster writeback lookup vs reclaim race conditions - fixed inode cluster writeback checking wrong inode after lookup - fixed bugs where struct xfs_inode freeing wasn't actually RCU safe - cleaned up inode reclaim tagging" * tag 'xfs-for-linus-4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (39 commits) xfs: fix warning in xfs_finish_page_writeback for non-debug builds xfs: move reclaim tagging functions xfs: simplify inode reclaim tagging interfaces xfs: rename variables in xfs_iflush_cluster for clarity xfs: xfs_iflush_cluster has range issues xfs: mark reclaimed inodes invalid earlier xfs: xfs_inode_free() isn't RCU safe xfs: optimise xfs_iext_destroy xfs: skip stale inodes in xfs_iflush_cluster xfs: fix inode validity check in xfs_iflush_cluster xfs: xfs_iflush_cluster fails to abort on error xfs: remove xfs_fs_evict_inode() xfs: add "fail at unmount" error handling configuration xfs: add configuration handlers for specific errors xfs: add configuration of error failure speed xfs: introduce table-based init for error behaviors xfs: add configurable error support to metadata buffers xfs: introduce metadata IO error class xfs: configurable error behavior via sysfs xfs: buffer ->bi_end_io function requires irq-safe lock ...
-rw-r--r--fs/namei.c1
-rw-r--r--fs/xfs/kmem.c26
-rw-r--r--fs/xfs/kmem.h2
-rw-r--r--fs/xfs/libxfs/xfs_attr.c58
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c22
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c99
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h1
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h5
-rw-r--r--fs/xfs/libxfs/xfs_sb.c8
-rw-r--r--fs/xfs/libxfs/xfs_shared.h102
-rw-r--r--fs/xfs/xfs_aops.c353
-rw-r--r--fs/xfs/xfs_aops.h15
-rw-r--r--fs/xfs/xfs_attr.h4
-rw-r--r--fs/xfs/xfs_attr_inactive.c16
-rw-r--r--fs/xfs/xfs_attr_list.c85
-rw-r--r--fs/xfs/xfs_bmap_util.c45
-rw-r--r--fs/xfs/xfs_buf.c12
-rw-r--r--fs/xfs/xfs_buf.h20
-rw-r--r--fs/xfs/xfs_buf_item.c121
-rw-r--r--fs/xfs/xfs_dquot.c9
-rw-r--r--fs/xfs/xfs_file.c8
-rw-r--r--fs/xfs/xfs_fsops.c14
-rw-r--r--fs/xfs/xfs_icache.c290
-rw-r--r--fs/xfs/xfs_inode.c167
-rw-r--r--fs/xfs/xfs_inode.h5
-rw-r--r--fs/xfs/xfs_inode_item.c6
-rw-r--r--fs/xfs/xfs_ioctl.c31
-rw-r--r--fs/xfs/xfs_iomap.c53
-rw-r--r--fs/xfs/xfs_iops.c117
-rw-r--r--fs/xfs/xfs_log.c62
-rw-r--r--fs/xfs/xfs_log.h3
-rw-r--r--fs/xfs/xfs_log_cil.c1
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_log_recover.c12
-rw-r--r--fs/xfs/xfs_mount.c23
-rw-r--r--fs/xfs/xfs_mount.h34
-rw-r--r--fs/xfs/xfs_pnfs.c7
-rw-r--r--fs/xfs/xfs_qm.c9
-rw-r--r--fs/xfs/xfs_qm_syscalls.c26
-rw-r--r--fs/xfs/xfs_rtalloc.c21
-rw-r--r--fs/xfs/xfs_super.c65
-rw-r--r--fs/xfs/xfs_symlink.c37
-rw-r--r--fs/xfs/xfs_sysfs.c291
-rw-r--r--fs/xfs/xfs_sysfs.h3
-rw-r--r--fs/xfs/xfs_trace.h16
-rw-r--r--fs/xfs/xfs_trans.c88
-rw-r--r--fs/xfs/xfs_trans.h8
-rw-r--r--fs/xfs/xfs_xattr.c17
49 files changed, 1293 insertions, 1135 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 5375571cf6e1..15b124c18ed8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4542,7 +4542,6 @@ int readlink_copy(char __user *buffer, int buflen, const char *link)
4542out: 4542out:
4543 return len; 4543 return len;
4544} 4544}
4545EXPORT_SYMBOL(readlink_copy);
4546 4545
4547/* 4546/*
4548 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 4547 * A helper for ->readlink(). This should be used *ONLY* for symlinks that
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 686ba6fb20dd..339c696bbc01 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -93,19 +93,23 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
93} 93}
94 94
95void * 95void *
96kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, 96kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags)
97 xfs_km_flags_t flags)
98{ 97{
99 void *new; 98 int retries = 0;
99 gfp_t lflags = kmem_flags_convert(flags);
100 void *ptr;
100 101
101 new = kmem_alloc(newsize, flags); 102 do {
102 if (ptr) { 103 ptr = krealloc(old, newsize, lflags);
103 if (new) 104 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
104 memcpy(new, ptr, 105 return ptr;
105 ((oldsize < newsize) ? oldsize : newsize)); 106 if (!(++retries % 100))
106 kmem_free(ptr); 107 xfs_err(NULL,
107 } 108 "%s(%u) possible memory allocation deadlock size %zu in %s (mode:0x%x)",
108 return new; 109 current->comm, current->pid,
110 newsize, __func__, lflags);
111 congestion_wait(BLK_RW_ASYNC, HZ/50);
112 } while (1);
109} 113}
110 114
111void * 115void *
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index d1c66e465ca5..689f746224e7 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -62,7 +62,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
62 62
63extern void *kmem_alloc(size_t, xfs_km_flags_t); 63extern void *kmem_alloc(size_t, xfs_km_flags_t);
64extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); 64extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
65extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); 65extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
66static inline void kmem_free(const void *ptr) 66static inline void kmem_free(const void *ptr)
67{ 67{
68 kvfree(ptr); 68 kvfree(ptr);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index fa3b948ef9c2..4e126f41a0aa 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -242,37 +242,21 @@ xfs_attr_set(
242 return error; 242 return error;
243 } 243 }
244 244
245 /* 245 tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
246 * Start our first transaction of the day. 246 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
247 * 247 tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
248 * All future transactions during this code must be "chained" off 248 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
249 * this one via the trans_dup() call. All transactions will contain
250 * the inode, and the inode will always be marked with trans_ihold().
251 * Since the inode will be locked in all transactions, we must log
252 * the inode in every transaction to let it float upward through
253 * the log.
254 */
255 args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
256 249
257 /* 250 /*
258 * Root fork attributes can use reserved data blocks for this 251 * Root fork attributes can use reserved data blocks for this
259 * operation if necessary 252 * operation if necessary
260 */ 253 */
261 254 error = xfs_trans_alloc(mp, &tres, args.total, 0,
262 if (rsvd) 255 rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
263 args.trans->t_flags |= XFS_TRANS_RESERVE; 256 if (error)
264
265 tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
266 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
267 tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
268 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
269 error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
270 if (error) {
271 xfs_trans_cancel(args.trans);
272 return error; 257 return error;
273 }
274 xfs_ilock(dp, XFS_ILOCK_EXCL);
275 258
259 xfs_ilock(dp, XFS_ILOCK_EXCL);
276 error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, 260 error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
277 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : 261 rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
278 XFS_QMOPT_RES_REGBLKS); 262 XFS_QMOPT_RES_REGBLKS);
@@ -429,31 +413,15 @@ xfs_attr_remove(
429 return error; 413 return error;
430 414
431 /* 415 /*
432 * Start our first transaction of the day.
433 *
434 * All future transactions during this code must be "chained" off
435 * this one via the trans_dup() call. All transactions will contain
436 * the inode, and the inode will always be marked with trans_ihold().
437 * Since the inode will be locked in all transactions, we must log
438 * the inode in every transaction to let it float upward through
439 * the log.
440 */
441 args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
442
443 /*
444 * Root fork attributes can use reserved data blocks for this 416 * Root fork attributes can use reserved data blocks for this
445 * operation if necessary 417 * operation if necessary
446 */ 418 */
447 419 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
448 if (flags & ATTR_ROOT) 420 XFS_ATTRRM_SPACE_RES(mp), 0,
449 args.trans->t_flags |= XFS_TRANS_RESERVE; 421 (flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
450 422 &args.trans);
451 error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, 423 if (error)
452 XFS_ATTRRM_SPACE_RES(mp), 0);
453 if (error) {
454 xfs_trans_cancel(args.trans);
455 return error; 424 return error;
456 }
457 425
458 xfs_ilock(dp, XFS_ILOCK_EXCL); 426 xfs_ilock(dp, XFS_ILOCK_EXCL);
459 /* 427 /*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index ce41d7fe753c..932381caef1b 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1121,15 +1121,14 @@ xfs_bmap_add_attrfork(
1121 1121
1122 mp = ip->i_mount; 1122 mp = ip->i_mount;
1123 ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 1123 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1124 tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK); 1124
1125 blks = XFS_ADDAFORK_SPACE_RES(mp); 1125 blks = XFS_ADDAFORK_SPACE_RES(mp);
1126 if (rsvd) 1126
1127 tp->t_flags |= XFS_TRANS_RESERVE; 1127 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1128 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); 1128 rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1129 if (error) { 1129 if (error)
1130 xfs_trans_cancel(tp);
1131 return error; 1130 return error;
1132 } 1131
1133 xfs_ilock(ip, XFS_ILOCK_EXCL); 1132 xfs_ilock(ip, XFS_ILOCK_EXCL);
1134 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? 1133 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1135 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : 1134 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
@@ -6026,13 +6025,10 @@ xfs_bmap_split_extent(
6026 xfs_fsblock_t firstfsb; 6025 xfs_fsblock_t firstfsb;
6027 int error; 6026 int error;
6028 6027
6029 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 6028 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
6030 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 6029 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
6031 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); 6030 if (error)
6032 if (error) {
6033 xfs_trans_cancel(tp);
6034 return error; 6031 return error;
6035 }
6036 6032
6037 xfs_ilock(ip, XFS_ILOCK_EXCL); 6033 xfs_ilock(ip, XFS_ILOCK_EXCL);
6038 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 6034 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 974d62e677f4..e5bb9cc3b243 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -257,15 +257,12 @@ xfs_dir2_block_to_sf(
257 * 257 *
258 * Convert the inode to local format and copy the data in. 258 * Convert the inode to local format and copy the data in.
259 */ 259 */
260 dp->i_df.if_flags &= ~XFS_IFEXTENTS;
261 dp->i_df.if_flags |= XFS_IFINLINE;
262 dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
263 ASSERT(dp->i_df.if_bytes == 0); 260 ASSERT(dp->i_df.if_bytes == 0);
264 xfs_idata_realloc(dp, size, XFS_DATA_FORK); 261 xfs_init_local_fork(dp, XFS_DATA_FORK, dst, size);
262 dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
263 dp->i_d.di_size = size;
265 264
266 logflags |= XFS_ILOG_DDATA; 265 logflags |= XFS_ILOG_DDATA;
267 memcpy(dp->i_df.if_u1.if_data, dst, size);
268 dp->i_d.di_size = size;
269 xfs_dir2_sf_check(args); 266 xfs_dir2_sf_check(args);
270out: 267out:
271 xfs_trans_log_inode(args->trans, dp, logflags); 268 xfs_trans_log_inode(args->trans, dp, logflags);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 11faf7df14c8..bbcc8c7a44b3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -231,6 +231,48 @@ xfs_iformat_fork(
231 return error; 231 return error;
232} 232}
233 233
234void
235xfs_init_local_fork(
236 struct xfs_inode *ip,
237 int whichfork,
238 const void *data,
239 int size)
240{
241 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
242 int mem_size = size, real_size = 0;
243 bool zero_terminate;
244
245 /*
246 * If we are using the local fork to store a symlink body we need to
247 * zero-terminate it so that we can pass it back to the VFS directly.
248 * Overallocate the in-memory fork by one for that and add a zero
249 * to terminate it below.
250 */
251 zero_terminate = S_ISLNK(VFS_I(ip)->i_mode);
252 if (zero_terminate)
253 mem_size++;
254
255 if (size == 0)
256 ifp->if_u1.if_data = NULL;
257 else if (mem_size <= sizeof(ifp->if_u2.if_inline_data))
258 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
259 else {
260 real_size = roundup(mem_size, 4);
261 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
262 }
263
264 if (size) {
265 memcpy(ifp->if_u1.if_data, data, size);
266 if (zero_terminate)
267 ifp->if_u1.if_data[size] = '\0';
268 }
269
270 ifp->if_bytes = size;
271 ifp->if_real_bytes = real_size;
272 ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
273 ifp->if_flags |= XFS_IFINLINE;
274}
275
234/* 276/*
235 * The file is in-lined in the on-disk inode. 277 * The file is in-lined in the on-disk inode.
236 * If it fits into if_inline_data, then copy 278 * If it fits into if_inline_data, then copy
@@ -248,8 +290,6 @@ xfs_iformat_local(
248 int whichfork, 290 int whichfork,
249 int size) 291 int size)
250{ 292{
251 xfs_ifork_t *ifp;
252 int real_size;
253 293
254 /* 294 /*
255 * If the size is unreasonable, then something 295 * If the size is unreasonable, then something
@@ -265,22 +305,8 @@ xfs_iformat_local(
265 ip->i_mount, dip); 305 ip->i_mount, dip);
266 return -EFSCORRUPTED; 306 return -EFSCORRUPTED;
267 } 307 }
268 ifp = XFS_IFORK_PTR(ip, whichfork); 308
269 real_size = 0; 309 xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
270 if (size == 0)
271 ifp->if_u1.if_data = NULL;
272 else if (size <= sizeof(ifp->if_u2.if_inline_data))
273 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
274 else {
275 real_size = roundup(size, 4);
276 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
277 }
278 ifp->if_bytes = size;
279 ifp->if_real_bytes = real_size;
280 if (size)
281 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
282 ifp->if_flags &= ~XFS_IFEXTENTS;
283 ifp->if_flags |= XFS_IFINLINE;
284 return 0; 310 return 0;
285} 311}
286 312
@@ -516,7 +542,6 @@ xfs_iroot_realloc(
516 new_max = cur_max + rec_diff; 542 new_max = cur_max + rec_diff;
517 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); 543 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
518 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, 544 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
519 XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
520 KM_SLEEP | KM_NOFS); 545 KM_SLEEP | KM_NOFS);
521 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, 546 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
522 ifp->if_broot_bytes); 547 ifp->if_broot_bytes);
@@ -660,7 +685,6 @@ xfs_idata_realloc(
660 ifp->if_u1.if_data = 685 ifp->if_u1.if_data =
661 kmem_realloc(ifp->if_u1.if_data, 686 kmem_realloc(ifp->if_u1.if_data,
662 real_size, 687 real_size,
663 ifp->if_real_bytes,
664 KM_SLEEP | KM_NOFS); 688 KM_SLEEP | KM_NOFS);
665 } 689 }
666 } else { 690 } else {
@@ -1376,8 +1400,7 @@ xfs_iext_realloc_direct(
1376 if (rnew_size != ifp->if_real_bytes) { 1400 if (rnew_size != ifp->if_real_bytes) {
1377 ifp->if_u1.if_extents = 1401 ifp->if_u1.if_extents =
1378 kmem_realloc(ifp->if_u1.if_extents, 1402 kmem_realloc(ifp->if_u1.if_extents,
1379 rnew_size, 1403 rnew_size, KM_NOFS);
1380 ifp->if_real_bytes, KM_NOFS);
1381 } 1404 }
1382 if (rnew_size > ifp->if_real_bytes) { 1405 if (rnew_size > ifp->if_real_bytes) {
1383 memset(&ifp->if_u1.if_extents[ifp->if_bytes / 1406 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
@@ -1461,9 +1484,8 @@ xfs_iext_realloc_indirect(
1461 if (new_size == 0) { 1484 if (new_size == 0) {
1462 xfs_iext_destroy(ifp); 1485 xfs_iext_destroy(ifp);
1463 } else { 1486 } else {
1464 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) 1487 ifp->if_u1.if_ext_irec =
1465 kmem_realloc(ifp->if_u1.if_ext_irec, 1488 kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);
1466 new_size, size, KM_NOFS);
1467 } 1489 }
1468} 1490}
1469 1491
@@ -1497,6 +1519,24 @@ xfs_iext_indirect_to_direct(
1497} 1519}
1498 1520
1499/* 1521/*
1522 * Remove all records from the indirection array.
1523 */
1524STATIC void
1525xfs_iext_irec_remove_all(
1526 struct xfs_ifork *ifp)
1527{
1528 int nlists;
1529 int i;
1530
1531 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1532 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1533 for (i = 0; i < nlists; i++)
1534 kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
1535 kmem_free(ifp->if_u1.if_ext_irec);
1536 ifp->if_flags &= ~XFS_IFEXTIREC;
1537}
1538
1539/*
1500 * Free incore file extents. 1540 * Free incore file extents.
1501 */ 1541 */
1502void 1542void
@@ -1504,14 +1544,7 @@ xfs_iext_destroy(
1504 xfs_ifork_t *ifp) /* inode fork pointer */ 1544 xfs_ifork_t *ifp) /* inode fork pointer */
1505{ 1545{
1506 if (ifp->if_flags & XFS_IFEXTIREC) { 1546 if (ifp->if_flags & XFS_IFEXTIREC) {
1507 int erp_idx; 1547 xfs_iext_irec_remove_all(ifp);
1508 int nlists;
1509
1510 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1511 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1512 xfs_iext_irec_remove(ifp, erp_idx);
1513 }
1514 ifp->if_flags &= ~XFS_IFEXTIREC;
1515 } else if (ifp->if_real_bytes) { 1548 } else if (ifp->if_real_bytes) {
1516 kmem_free(ifp->if_u1.if_extents); 1549 kmem_free(ifp->if_u1.if_extents);
1517 } else if (ifp->if_bytes) { 1550 } else if (ifp->if_bytes) {
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 7d3b1ed6dcbe..f95e072ae646 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -134,6 +134,7 @@ void xfs_iroot_realloc(struct xfs_inode *, int, int);
134int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); 134int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
135int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *, 135int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
136 int); 136 int);
137void xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
137 138
138struct xfs_bmbt_rec_host * 139struct xfs_bmbt_rec_host *
139 xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); 140 xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index d54a8018b079..e8f49c029ff0 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -212,6 +212,11 @@ typedef struct xfs_trans_header {
212#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */ 212#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */
213 213
214/* 214/*
215 * The only type valid for th_type in CIL-enabled file system logs:
216 */
217#define XFS_TRANS_CHECKPOINT 40
218
219/*
215 * Log item types. 220 * Log item types.
216 */ 221 */
217#define XFS_LI_EFI 0x1236 222#define XFS_LI_EFI 0x1236
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 8a53eaa349f4..12ca86778e02 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -838,12 +838,10 @@ xfs_sync_sb(
838 struct xfs_trans *tp; 838 struct xfs_trans *tp;
839 int error; 839 int error;
840 840
841 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); 841 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0,
842 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); 842 XFS_TRANS_NO_WRITECOUNT, &tp);
843 if (error) { 843 if (error)
844 xfs_trans_cancel(tp);
845 return error; 844 return error;
846 }
847 845
848 xfs_log_sb(tp); 846 xfs_log_sb(tp);
849 if (wait) 847 if (wait)
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 81ac870834da..16002b5ec4eb 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -56,103 +56,6 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
56extern const struct xfs_buf_ops xfs_rtbuf_ops; 56extern const struct xfs_buf_ops xfs_rtbuf_ops;
57 57
58/* 58/*
59 * Transaction types. Used to distinguish types of buffers. These never reach
60 * the log.
61 */
62#define XFS_TRANS_SETATTR_NOT_SIZE 1
63#define XFS_TRANS_SETATTR_SIZE 2
64#define XFS_TRANS_INACTIVE 3
65#define XFS_TRANS_CREATE 4
66#define XFS_TRANS_CREATE_TRUNC 5
67#define XFS_TRANS_TRUNCATE_FILE 6
68#define XFS_TRANS_REMOVE 7
69#define XFS_TRANS_LINK 8
70#define XFS_TRANS_RENAME 9
71#define XFS_TRANS_MKDIR 10
72#define XFS_TRANS_RMDIR 11
73#define XFS_TRANS_SYMLINK 12
74#define XFS_TRANS_SET_DMATTRS 13
75#define XFS_TRANS_GROWFS 14
76#define XFS_TRANS_STRAT_WRITE 15
77#define XFS_TRANS_DIOSTRAT 16
78/* 17 was XFS_TRANS_WRITE_SYNC */
79#define XFS_TRANS_WRITEID 18
80#define XFS_TRANS_ADDAFORK 19
81#define XFS_TRANS_ATTRINVAL 20
82#define XFS_TRANS_ATRUNCATE 21
83#define XFS_TRANS_ATTR_SET 22
84#define XFS_TRANS_ATTR_RM 23
85#define XFS_TRANS_ATTR_FLAG 24
86#define XFS_TRANS_CLEAR_AGI_BUCKET 25
87#define XFS_TRANS_SB_CHANGE 26
88/*
89 * Dummy entries since we use the transaction type to index into the
90 * trans_type[] in xlog_recover_print_trans_head()
91 */
92#define XFS_TRANS_DUMMY1 27
93#define XFS_TRANS_DUMMY2 28
94#define XFS_TRANS_QM_QUOTAOFF 29
95#define XFS_TRANS_QM_DQALLOC 30
96#define XFS_TRANS_QM_SETQLIM 31
97#define XFS_TRANS_QM_DQCLUSTER 32
98#define XFS_TRANS_QM_QINOCREATE 33
99#define XFS_TRANS_QM_QUOTAOFF_END 34
100#define XFS_TRANS_FSYNC_TS 35
101#define XFS_TRANS_GROWFSRT_ALLOC 36
102#define XFS_TRANS_GROWFSRT_ZERO 37
103#define XFS_TRANS_GROWFSRT_FREE 38
104#define XFS_TRANS_SWAPEXT 39
105#define XFS_TRANS_CHECKPOINT 40
106#define XFS_TRANS_ICREATE 41
107#define XFS_TRANS_CREATE_TMPFILE 42
108#define XFS_TRANS_TYPE_MAX 43
109/* new transaction types need to be reflected in xfs_logprint(8) */
110
111#define XFS_TRANS_TYPES \
112 { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
113 { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
114 { XFS_TRANS_INACTIVE, "INACTIVE" }, \
115 { XFS_TRANS_CREATE, "CREATE" }, \
116 { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
117 { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
118 { XFS_TRANS_REMOVE, "REMOVE" }, \
119 { XFS_TRANS_LINK, "LINK" }, \
120 { XFS_TRANS_RENAME, "RENAME" }, \
121 { XFS_TRANS_MKDIR, "MKDIR" }, \
122 { XFS_TRANS_RMDIR, "RMDIR" }, \
123 { XFS_TRANS_SYMLINK, "SYMLINK" }, \
124 { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
125 { XFS_TRANS_GROWFS, "GROWFS" }, \
126 { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
127 { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
128 { XFS_TRANS_WRITEID, "WRITEID" }, \
129 { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
130 { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
131 { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
132 { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
133 { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
134 { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
135 { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
136 { XFS_TRANS_SB_CHANGE, "SBCHANGE" }, \
137 { XFS_TRANS_DUMMY1, "DUMMY1" }, \
138 { XFS_TRANS_DUMMY2, "DUMMY2" }, \
139 { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
140 { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
141 { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
142 { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
143 { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
144 { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
145 { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
146 { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
147 { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
148 { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
149 { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
150 { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
151 { XFS_TRANS_ICREATE, "ICREATE" }, \
152 { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
153 { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
154
155/*
156 * This structure is used to track log items associated with 59 * This structure is used to track log items associated with
157 * a transaction. It points to the log item and keeps some 60 * a transaction. It points to the log item and keeps some
158 * flags to track the state of the log item. It also tracks 61 * flags to track the state of the log item. It also tracks
@@ -181,8 +84,9 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
181#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ 84#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
182#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ 85#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
183#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ 86#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
184#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer 87#define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */
185 count in superblock */ 88#define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */
89
186/* 90/*
187 * Field values for xfs_trans_mod_sb. 91 * Field values for xfs_trans_mod_sb.
188 */ 92 */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index c535887c60a8..4c463b99fe57 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -84,23 +84,71 @@ xfs_find_bdev_for_inode(
84} 84}
85 85
86/* 86/*
87 * We're now finished for good with this ioend structure. 87 * We're now finished for good with this page. Update the page state via the
88 * Update the page state via the associated buffer_heads, 88 * associated buffer_heads, paying attention to the start and end offsets that
89 * release holds on the inode and bio, and finally free 89 * we need to process on the page.
90 * up memory. Do not use the ioend after this. 90 */
91static void
92xfs_finish_page_writeback(
93 struct inode *inode,
94 struct bio_vec *bvec,
95 int error)
96{
97 unsigned int end = bvec->bv_offset + bvec->bv_len - 1;
98 struct buffer_head *head, *bh;
99 unsigned int off = 0;
100
101 ASSERT(bvec->bv_offset < PAGE_SIZE);
102 ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
103 ASSERT(end < PAGE_SIZE);
104 ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
105
106 bh = head = page_buffers(bvec->bv_page);
107
108 do {
109 if (off < bvec->bv_offset)
110 goto next_bh;
111 if (off > end)
112 break;
113 bh->b_end_io(bh, !error);
114next_bh:
115 off += bh->b_size;
116 } while ((bh = bh->b_this_page) != head);
117}
118
119/*
120 * We're now finished for good with this ioend structure. Update the page
121 * state, release holds on bios, and finally free up memory. Do not use the
122 * ioend after this.
91 */ 123 */
92STATIC void 124STATIC void
93xfs_destroy_ioend( 125xfs_destroy_ioend(
94 xfs_ioend_t *ioend) 126 struct xfs_ioend *ioend,
127 int error)
95{ 128{
96 struct buffer_head *bh, *next; 129 struct inode *inode = ioend->io_inode;
130 struct bio *last = ioend->io_bio;
131 struct bio *bio, *next;
97 132
98 for (bh = ioend->io_buffer_head; bh; bh = next) { 133 for (bio = &ioend->io_inline_bio; bio; bio = next) {
99 next = bh->b_private; 134 struct bio_vec *bvec;
100 bh->b_end_io(bh, !ioend->io_error); 135 int i;
101 } 136
137 /*
138 * For the last bio, bi_private points to the ioend, so we
139 * need to explicitly end the iteration here.
140 */
141 if (bio == last)
142 next = NULL;
143 else
144 next = bio->bi_private;
102 145
103 mempool_free(ioend, xfs_ioend_pool); 146 /* walk each page on bio, ending page IO on them */
147 bio_for_each_segment_all(bvec, bio, i)
148 xfs_finish_page_writeback(inode, bvec, error);
149
150 bio_put(bio);
151 }
104} 152}
105 153
106/* 154/*
@@ -120,13 +168,9 @@ xfs_setfilesize_trans_alloc(
120 struct xfs_trans *tp; 168 struct xfs_trans *tp;
121 int error; 169 int error;
122 170
123 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); 171 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
124 172 if (error)
125 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
126 if (error) {
127 xfs_trans_cancel(tp);
128 return error; 173 return error;
129 }
130 174
131 ioend->io_append_trans = tp; 175 ioend->io_append_trans = tp;
132 176
@@ -174,7 +218,8 @@ xfs_setfilesize(
174 218
175STATIC int 219STATIC int
176xfs_setfilesize_ioend( 220xfs_setfilesize_ioend(
177 struct xfs_ioend *ioend) 221 struct xfs_ioend *ioend,
222 int error)
178{ 223{
179 struct xfs_inode *ip = XFS_I(ioend->io_inode); 224 struct xfs_inode *ip = XFS_I(ioend->io_inode);
180 struct xfs_trans *tp = ioend->io_append_trans; 225 struct xfs_trans *tp = ioend->io_append_trans;
@@ -188,53 +233,32 @@ xfs_setfilesize_ioend(
188 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 233 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
189 234
190 /* we abort the update if there was an IO error */ 235 /* we abort the update if there was an IO error */
191 if (ioend->io_error) { 236 if (error) {
192 xfs_trans_cancel(tp); 237 xfs_trans_cancel(tp);
193 return ioend->io_error; 238 return error;
194 } 239 }
195 240
196 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 241 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
197} 242}
198 243
199/* 244/*
200 * Schedule IO completion handling on the final put of an ioend.
201 *
202 * If there is no work to do we might as well call it a day and free the
203 * ioend right now.
204 */
205STATIC void
206xfs_finish_ioend(
207 struct xfs_ioend *ioend)
208{
209 if (atomic_dec_and_test(&ioend->io_remaining)) {
210 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
211
212 if (ioend->io_type == XFS_IO_UNWRITTEN)
213 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
214 else if (ioend->io_append_trans)
215 queue_work(mp->m_data_workqueue, &ioend->io_work);
216 else
217 xfs_destroy_ioend(ioend);
218 }
219}
220
221/*
222 * IO write completion. 245 * IO write completion.
223 */ 246 */
224STATIC void 247STATIC void
225xfs_end_io( 248xfs_end_io(
226 struct work_struct *work) 249 struct work_struct *work)
227{ 250{
228 xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); 251 struct xfs_ioend *ioend =
229 struct xfs_inode *ip = XFS_I(ioend->io_inode); 252 container_of(work, struct xfs_ioend, io_work);
230 int error = 0; 253 struct xfs_inode *ip = XFS_I(ioend->io_inode);
254 int error = ioend->io_bio->bi_error;
231 255
232 /* 256 /*
233 * Set an error if the mount has shut down and proceed with end I/O 257 * Set an error if the mount has shut down and proceed with end I/O
234 * processing so it can perform whatever cleanups are necessary. 258 * processing so it can perform whatever cleanups are necessary.
235 */ 259 */
236 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 260 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
237 ioend->io_error = -EIO; 261 error = -EIO;
238 262
239 /* 263 /*
240 * For unwritten extents we need to issue transactions to convert a 264 * For unwritten extents we need to issue transactions to convert a
@@ -244,55 +268,33 @@ xfs_end_io(
244 * on error. 268 * on error.
245 */ 269 */
246 if (ioend->io_type == XFS_IO_UNWRITTEN) { 270 if (ioend->io_type == XFS_IO_UNWRITTEN) {
247 if (ioend->io_error) 271 if (error)
248 goto done; 272 goto done;
249 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 273 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
250 ioend->io_size); 274 ioend->io_size);
251 } else if (ioend->io_append_trans) { 275 } else if (ioend->io_append_trans) {
252 error = xfs_setfilesize_ioend(ioend); 276 error = xfs_setfilesize_ioend(ioend, error);
253 } else { 277 } else {
254 ASSERT(!xfs_ioend_is_append(ioend)); 278 ASSERT(!xfs_ioend_is_append(ioend));
255 } 279 }
256 280
257done: 281done:
258 if (error) 282 xfs_destroy_ioend(ioend, error);
259 ioend->io_error = error;
260 xfs_destroy_ioend(ioend);
261} 283}
262 284
263/* 285STATIC void
264 * Allocate and initialise an IO completion structure. 286xfs_end_bio(
265 * We need to track unwritten extent write completion here initially. 287 struct bio *bio)
266 * We'll need to extend this for updating the ondisk inode size later
267 * (vs. incore size).
268 */
269STATIC xfs_ioend_t *
270xfs_alloc_ioend(
271 struct inode *inode,
272 unsigned int type)
273{ 288{
274 xfs_ioend_t *ioend; 289 struct xfs_ioend *ioend = bio->bi_private;
275 290 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
276 ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
277
278 /*
279 * Set the count to 1 initially, which will prevent an I/O
280 * completion callback from happening before we have started
281 * all the I/O from calling the completion routine too early.
282 */
283 atomic_set(&ioend->io_remaining, 1);
284 ioend->io_error = 0;
285 INIT_LIST_HEAD(&ioend->io_list);
286 ioend->io_type = type;
287 ioend->io_inode = inode;
288 ioend->io_buffer_head = NULL;
289 ioend->io_buffer_tail = NULL;
290 ioend->io_offset = 0;
291 ioend->io_size = 0;
292 ioend->io_append_trans = NULL;
293 291
294 INIT_WORK(&ioend->io_work, xfs_end_io); 292 if (ioend->io_type == XFS_IO_UNWRITTEN)
295 return ioend; 293 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
294 else if (ioend->io_append_trans)
295 queue_work(mp->m_data_workqueue, &ioend->io_work);
296 else
297 xfs_destroy_ioend(ioend, bio->bi_error);
296} 298}
297 299
298STATIC int 300STATIC int
@@ -364,50 +366,6 @@ xfs_imap_valid(
364 offset < imap->br_startoff + imap->br_blockcount; 366 offset < imap->br_startoff + imap->br_blockcount;
365} 367}
366 368
367/*
368 * BIO completion handler for buffered IO.
369 */
370STATIC void
371xfs_end_bio(
372 struct bio *bio)
373{
374 xfs_ioend_t *ioend = bio->bi_private;
375
376 if (!ioend->io_error)
377 ioend->io_error = bio->bi_error;
378
379 /* Toss bio and pass work off to an xfsdatad thread */
380 bio->bi_private = NULL;
381 bio->bi_end_io = NULL;
382 bio_put(bio);
383
384 xfs_finish_ioend(ioend);
385}
386
387STATIC void
388xfs_submit_ioend_bio(
389 struct writeback_control *wbc,
390 xfs_ioend_t *ioend,
391 struct bio *bio)
392{
393 atomic_inc(&ioend->io_remaining);
394 bio->bi_private = ioend;
395 bio->bi_end_io = xfs_end_bio;
396 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
397}
398
399STATIC struct bio *
400xfs_alloc_ioend_bio(
401 struct buffer_head *bh)
402{
403 struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
404
405 ASSERT(bio->bi_private == NULL);
406 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
407 bio->bi_bdev = bh->b_bdev;
408 return bio;
409}
410
411STATIC void 369STATIC void
412xfs_start_buffer_writeback( 370xfs_start_buffer_writeback(
413 struct buffer_head *bh) 371 struct buffer_head *bh)
@@ -452,28 +410,35 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
452} 410}
453 411
454/* 412/*
455 * Submit all of the bios for an ioend. We are only passed a single ioend at a 413 * Submit the bio for an ioend. We are passed an ioend with a bio attached to
456 * time; the caller is responsible for chaining prior to submission. 414 * it, and we submit that bio. The ioend may be used for multiple bio
415 * submissions, so we only want to allocate an append transaction for the ioend
416 * once. In the case of multiple bio submission, each bio will take an IO
417 * reference to the ioend to ensure that the ioend completion is only done once
418 * all bios have been submitted and the ioend is really done.
457 * 419 *
458 * If @fail is non-zero, it means that we have a situation where some part of 420 * If @fail is non-zero, it means that we have a situation where some part of
459 * the submission process has failed after we have marked paged for writeback 421 * the submission process has failed after we have marked paged for writeback
460 * and unlocked them. In this situation, we need to fail the ioend chain rather 422 * and unlocked them. In this situation, we need to fail the bio and ioend
461 * than submit it to IO. This typically only happens on a filesystem shutdown. 423 * rather than submit it to IO. This typically only happens on a filesystem
424 * shutdown.
462 */ 425 */
463STATIC int 426STATIC int
464xfs_submit_ioend( 427xfs_submit_ioend(
465 struct writeback_control *wbc, 428 struct writeback_control *wbc,
466 xfs_ioend_t *ioend, 429 struct xfs_ioend *ioend,
467 int status) 430 int status)
468{ 431{
469 struct buffer_head *bh;
470 struct bio *bio;
471 sector_t lastblock = 0;
472
473 /* Reserve log space if we might write beyond the on-disk inode size. */ 432 /* Reserve log space if we might write beyond the on-disk inode size. */
474 if (!status && 433 if (!status &&
475 ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) 434 ioend->io_type != XFS_IO_UNWRITTEN &&
435 xfs_ioend_is_append(ioend) &&
436 !ioend->io_append_trans)
476 status = xfs_setfilesize_trans_alloc(ioend); 437 status = xfs_setfilesize_trans_alloc(ioend);
438
439 ioend->io_bio->bi_private = ioend;
440 ioend->io_bio->bi_end_io = xfs_end_bio;
441
477 /* 442 /*
478 * If we are failing the IO now, just mark the ioend with an 443 * If we are failing the IO now, just mark the ioend with an
479 * error and finish it. This will run IO completion immediately 444 * error and finish it. This will run IO completion immediately
@@ -481,33 +446,73 @@ xfs_submit_ioend(
481 * time. 446 * time.
482 */ 447 */
483 if (status) { 448 if (status) {
484 ioend->io_error = status; 449 ioend->io_bio->bi_error = status;
485 xfs_finish_ioend(ioend); 450 bio_endio(ioend->io_bio);
486 return status; 451 return status;
487 } 452 }
488 453
489 bio = NULL; 454 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
490 for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 455 ioend->io_bio);
456 return 0;
457}
491 458
492 if (!bio) { 459static void
493retry: 460xfs_init_bio_from_bh(
494 bio = xfs_alloc_ioend_bio(bh); 461 struct bio *bio,
495 } else if (bh->b_blocknr != lastblock + 1) { 462 struct buffer_head *bh)
496 xfs_submit_ioend_bio(wbc, ioend, bio); 463{
497 goto retry; 464 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
498 } 465 bio->bi_bdev = bh->b_bdev;
466}
499 467
500 if (xfs_bio_add_buffer(bio, bh) != bh->b_size) { 468static struct xfs_ioend *
501 xfs_submit_ioend_bio(wbc, ioend, bio); 469xfs_alloc_ioend(
502 goto retry; 470 struct inode *inode,
503 } 471 unsigned int type,
472 xfs_off_t offset,
473 struct buffer_head *bh)
474{
475 struct xfs_ioend *ioend;
476 struct bio *bio;
504 477
505 lastblock = bh->b_blocknr; 478 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
506 } 479 xfs_init_bio_from_bh(bio, bh);
507 if (bio) 480
508 xfs_submit_ioend_bio(wbc, ioend, bio); 481 ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
509 xfs_finish_ioend(ioend); 482 INIT_LIST_HEAD(&ioend->io_list);
510 return 0; 483 ioend->io_type = type;
484 ioend->io_inode = inode;
485 ioend->io_size = 0;
486 ioend->io_offset = offset;
487 INIT_WORK(&ioend->io_work, xfs_end_io);
488 ioend->io_append_trans = NULL;
489 ioend->io_bio = bio;
490 return ioend;
491}
492
493/*
494 * Allocate a new bio, and chain the old bio to the new one.
495 *
496 * Note that we have to do perform the chaining in this unintuitive order
497 * so that the bi_private linkage is set up in the right direction for the
498 * traversal in xfs_destroy_ioend().
499 */
500static void
501xfs_chain_bio(
502 struct xfs_ioend *ioend,
503 struct writeback_control *wbc,
504 struct buffer_head *bh)
505{
506 struct bio *new;
507
508 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
509 xfs_init_bio_from_bh(new, bh);
510
511 bio_chain(ioend->io_bio, new);
512 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
513 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
514 ioend->io_bio);
515 ioend->io_bio = new;
511} 516}
512 517
513/* 518/*
@@ -523,27 +528,24 @@ xfs_add_to_ioend(
523 struct buffer_head *bh, 528 struct buffer_head *bh,
524 xfs_off_t offset, 529 xfs_off_t offset,
525 struct xfs_writepage_ctx *wpc, 530 struct xfs_writepage_ctx *wpc,
531 struct writeback_control *wbc,
526 struct list_head *iolist) 532 struct list_head *iolist)
527{ 533{
528 if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || 534 if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
529 bh->b_blocknr != wpc->last_block + 1 || 535 bh->b_blocknr != wpc->last_block + 1 ||
530 offset != wpc->ioend->io_offset + wpc->ioend->io_size) { 536 offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
531 struct xfs_ioend *new;
532
533 if (wpc->ioend) 537 if (wpc->ioend)
534 list_add(&wpc->ioend->io_list, iolist); 538 list_add(&wpc->ioend->io_list, iolist);
535 539 wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
536 new = xfs_alloc_ioend(inode, wpc->io_type);
537 new->io_offset = offset;
538 new->io_buffer_head = bh;
539 new->io_buffer_tail = bh;
540 wpc->ioend = new;
541 } else {
542 wpc->ioend->io_buffer_tail->b_private = bh;
543 wpc->ioend->io_buffer_tail = bh;
544 } 540 }
545 541
546 bh->b_private = NULL; 542 /*
543 * If the buffer doesn't fit into the bio we need to allocate a new
544 * one. This shouldn't happen more than once for a given buffer.
545 */
546 while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
547 xfs_chain_bio(wpc->ioend, wbc, bh);
548
547 wpc->ioend->io_size += bh->b_size; 549 wpc->ioend->io_size += bh->b_size;
548 wpc->last_block = bh->b_blocknr; 550 wpc->last_block = bh->b_blocknr;
549 xfs_start_buffer_writeback(bh); 551 xfs_start_buffer_writeback(bh);
@@ -803,7 +805,7 @@ xfs_writepage_map(
803 lock_buffer(bh); 805 lock_buffer(bh);
804 if (wpc->io_type != XFS_IO_OVERWRITE) 806 if (wpc->io_type != XFS_IO_OVERWRITE)
805 xfs_map_at_offset(inode, bh, &wpc->imap, offset); 807 xfs_map_at_offset(inode, bh, &wpc->imap, offset);
806 xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list); 808 xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
807 count++; 809 count++;
808 } 810 }
809 811
@@ -1391,13 +1393,10 @@ xfs_end_io_direct_write(
1391 1393
1392 trace_xfs_end_io_direct_write_append(ip, offset, size); 1394 trace_xfs_end_io_direct_write_append(ip, offset, size);
1393 1395
1394 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); 1396 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
1395 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); 1397 &tp);
1396 if (error) { 1398 if (!error)
1397 xfs_trans_cancel(tp); 1399 error = xfs_setfilesize(ip, tp, offset, size);
1398 return error;
1399 }
1400 error = xfs_setfilesize(ip, tp, offset, size);
1401 } 1400 }
1402 1401
1403 return error; 1402 return error;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index b4421177b68d..814aab790713 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,7 +18,7 @@
18#ifndef __XFS_AOPS_H__ 18#ifndef __XFS_AOPS_H__
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern mempool_t *xfs_ioend_pool; 21extern struct bio_set *xfs_ioend_bioset;
22 22
23/* 23/*
24 * Types of I/O for bmap clustering and I/O completion tracking. 24 * Types of I/O for bmap clustering and I/O completion tracking.
@@ -37,22 +37,19 @@ enum {
37 { XFS_IO_OVERWRITE, "overwrite" } 37 { XFS_IO_OVERWRITE, "overwrite" }
38 38
39/* 39/*
40 * xfs_ioend struct manages large extent writes for XFS. 40 * Structure for buffered I/O completions.
41 * It can manage several multi-page bio's at once.
42 */ 41 */
43typedef struct xfs_ioend { 42struct xfs_ioend {
44 struct list_head io_list; /* next ioend in chain */ 43 struct list_head io_list; /* next ioend in chain */
45 unsigned int io_type; /* delalloc / unwritten */ 44 unsigned int io_type; /* delalloc / unwritten */
46 int io_error; /* I/O error code */
47 atomic_t io_remaining; /* hold count */
48 struct inode *io_inode; /* file being written to */ 45 struct inode *io_inode; /* file being written to */
49 struct buffer_head *io_buffer_head;/* buffer linked list head */
50 struct buffer_head *io_buffer_tail;/* buffer linked list tail */
51 size_t io_size; /* size of the extent */ 46 size_t io_size; /* size of the extent */
52 xfs_off_t io_offset; /* offset in the file */ 47 xfs_off_t io_offset; /* offset in the file */
53 struct work_struct io_work; /* xfsdatad work queue */ 48 struct work_struct io_work; /* xfsdatad work queue */
54 struct xfs_trans *io_append_trans;/* xact. for size update */ 49 struct xfs_trans *io_append_trans;/* xact. for size update */
55} xfs_ioend_t; 50 struct bio *io_bio; /* bio being built */
51 struct bio io_inline_bio; /* MUST BE LAST! */
52};
56 53
57extern const struct address_space_operations xfs_address_space_operations; 54extern const struct address_space_operations xfs_address_space_operations;
58 55
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index dd4824589470..e3da5d448bcf 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -112,8 +112,9 @@ typedef struct attrlist_cursor_kern {
112 *========================================================================*/ 112 *========================================================================*/
113 113
114 114
115/* Return 0 on success, or -errno; other state communicated via *context */
115typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, 116typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
116 unsigned char *, int, int, unsigned char *); 117 unsigned char *, int, int);
117 118
118typedef struct xfs_attr_list_context { 119typedef struct xfs_attr_list_context {
119 struct xfs_inode *dp; /* inode */ 120 struct xfs_inode *dp; /* inode */
@@ -126,7 +127,6 @@ typedef struct xfs_attr_list_context {
126 int firstu; /* first used byte in buffer */ 127 int firstu; /* first used byte in buffer */
127 int flags; /* from VOP call */ 128 int flags; /* from VOP call */
128 int resynch; /* T/F: resynch with cursor */ 129 int resynch; /* T/F: resynch with cursor */
129 int put_value; /* T/F: need value for listent */
130 put_listent_func_t put_listent; /* list output fmt function */ 130 put_listent_func_t put_listent; /* list output fmt function */
131 int index; /* index into output buffer */ 131 int index; /* index into output buffer */
132} xfs_attr_list_context_t; 132} xfs_attr_list_context_t;
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 2bb959ada45b..55d214981ed2 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -405,21 +405,11 @@ xfs_attr_inactive(
405 goto out_destroy_fork; 405 goto out_destroy_fork;
406 xfs_iunlock(dp, lock_mode); 406 xfs_iunlock(dp, lock_mode);
407 407
408 /*
409 * Start our first transaction of the day.
410 *
411 * All future transactions during this code must be "chained" off
412 * this one via the trans_dup() call. All transactions will contain
413 * the inode, and the inode will always be marked with trans_ihold().
414 * Since the inode will be locked in all transactions, we must log
415 * the inode in every transaction to let it float upward through
416 * the log.
417 */
418 lock_mode = 0; 408 lock_mode = 0;
419 trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); 409
420 error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); 410 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrinval, 0, 0, 0, &trans);
421 if (error) 411 if (error)
422 goto out_cancel; 412 goto out_destroy_fork;
423 413
424 lock_mode = XFS_ILOCK_EXCL; 414 lock_mode = XFS_ILOCK_EXCL;
425 xfs_ilock(dp, lock_mode); 415 xfs_ilock(dp, lock_mode);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 4fa14820e2e2..d25f26b22ac9 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -106,18 +106,15 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
106 sfe->flags, 106 sfe->flags,
107 sfe->nameval, 107 sfe->nameval,
108 (int)sfe->namelen, 108 (int)sfe->namelen,
109 (int)sfe->valuelen, 109 (int)sfe->valuelen);
110 &sfe->nameval[sfe->namelen]); 110 if (error)
111 111 return error;
112 /* 112 /*
113 * Either search callback finished early or 113 * Either search callback finished early or
114 * didn't fit it all in the buffer after all. 114 * didn't fit it all in the buffer after all.
115 */ 115 */
116 if (context->seen_enough) 116 if (context->seen_enough)
117 break; 117 break;
118
119 if (error)
120 return error;
121 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 118 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
122 } 119 }
123 trace_xfs_attr_list_sf_all(context); 120 trace_xfs_attr_list_sf_all(context);
@@ -200,8 +197,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
200 sbp->flags, 197 sbp->flags,
201 sbp->name, 198 sbp->name,
202 sbp->namelen, 199 sbp->namelen,
203 sbp->valuelen, 200 sbp->valuelen);
204 &sbp->name[sbp->namelen]);
205 if (error) { 201 if (error) {
206 kmem_free(sbuf); 202 kmem_free(sbuf);
207 return error; 203 return error;
@@ -416,6 +412,9 @@ xfs_attr3_leaf_list_int(
416 */ 412 */
417 retval = 0; 413 retval = 0;
418 for (; i < ichdr.count; entry++, i++) { 414 for (; i < ichdr.count; entry++, i++) {
415 char *name;
416 int namelen, valuelen;
417
419 if (be32_to_cpu(entry->hashval) != cursor->hashval) { 418 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
420 cursor->hashval = be32_to_cpu(entry->hashval); 419 cursor->hashval = be32_to_cpu(entry->hashval);
421 cursor->offset = 0; 420 cursor->offset = 0;
@@ -425,56 +424,25 @@ xfs_attr3_leaf_list_int(
425 continue; /* skip incomplete entries */ 424 continue; /* skip incomplete entries */
426 425
427 if (entry->flags & XFS_ATTR_LOCAL) { 426 if (entry->flags & XFS_ATTR_LOCAL) {
428 xfs_attr_leaf_name_local_t *name_loc = 427 xfs_attr_leaf_name_local_t *name_loc;
429 xfs_attr3_leaf_name_local(leaf, i); 428
430 429 name_loc = xfs_attr3_leaf_name_local(leaf, i);
431 retval = context->put_listent(context, 430 name = name_loc->nameval;
432 entry->flags, 431 namelen = name_loc->namelen;
433 name_loc->nameval, 432 valuelen = be16_to_cpu(name_loc->valuelen);
434 (int)name_loc->namelen,
435 be16_to_cpu(name_loc->valuelen),
436 &name_loc->nameval[name_loc->namelen]);
437 if (retval)
438 return retval;
439 } else { 433 } else {
440 xfs_attr_leaf_name_remote_t *name_rmt = 434 xfs_attr_leaf_name_remote_t *name_rmt;
441 xfs_attr3_leaf_name_remote(leaf, i); 435
442 436 name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
443 int valuelen = be32_to_cpu(name_rmt->valuelen); 437 name = name_rmt->name;
444 438 namelen = name_rmt->namelen;
445 if (context->put_value) { 439 valuelen = be32_to_cpu(name_rmt->valuelen);
446 xfs_da_args_t args;
447
448 memset((char *)&args, 0, sizeof(args));
449 args.geo = context->dp->i_mount->m_attr_geo;
450 args.dp = context->dp;
451 args.whichfork = XFS_ATTR_FORK;
452 args.valuelen = valuelen;
453 args.rmtvaluelen = valuelen;
454 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
455 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
456 args.rmtblkcnt = xfs_attr3_rmt_blocks(
457 args.dp->i_mount, valuelen);
458 retval = xfs_attr_rmtval_get(&args);
459 if (!retval)
460 retval = context->put_listent(context,
461 entry->flags,
462 name_rmt->name,
463 (int)name_rmt->namelen,
464 valuelen,
465 args.value);
466 kmem_free(args.value);
467 } else {
468 retval = context->put_listent(context,
469 entry->flags,
470 name_rmt->name,
471 (int)name_rmt->namelen,
472 valuelen,
473 NULL);
474 }
475 if (retval)
476 return retval;
477 } 440 }
441
442 retval = context->put_listent(context, entry->flags,
443 name, namelen, valuelen);
444 if (retval)
445 break;
478 if (context->seen_enough) 446 if (context->seen_enough)
479 break; 447 break;
480 cursor->offset++; 448 cursor->offset++;
@@ -551,8 +519,7 @@ xfs_attr_put_listent(
551 int flags, 519 int flags,
552 unsigned char *name, 520 unsigned char *name,
553 int namelen, 521 int namelen,
554 int valuelen, 522 int valuelen)
555 unsigned char *value)
556{ 523{
557 struct attrlist *alist = (struct attrlist *)context->alist; 524 struct attrlist *alist = (struct attrlist *)context->alist;
558 attrlist_ent_t *aep; 525 attrlist_ent_t *aep;
@@ -581,7 +548,7 @@ xfs_attr_put_listent(
581 trace_xfs_attr_list_full(context); 548 trace_xfs_attr_list_full(context);
582 alist->al_more = 1; 549 alist->al_more = 1;
583 context->seen_enough = 1; 550 context->seen_enough = 1;
584 return 1; 551 return 0;
585 } 552 }
586 553
587 aep = (attrlist_ent_t *)&context->alist[context->firstu]; 554 aep = (attrlist_ent_t *)&context->alist[context->firstu];
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3b6309865c65..613ea2d7ac19 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -900,19 +900,15 @@ xfs_free_eofblocks(
900 * Free them up now by truncating the file to 900 * Free them up now by truncating the file to
901 * its current size. 901 * its current size.
902 */ 902 */
903 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
904
905 if (need_iolock) { 903 if (need_iolock) {
906 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 904 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
907 xfs_trans_cancel(tp);
908 return -EAGAIN; 905 return -EAGAIN;
909 }
910 } 906 }
911 907
912 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 908 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
909 &tp);
913 if (error) { 910 if (error) {
914 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 911 ASSERT(XFS_FORCED_SHUTDOWN(mp));
915 xfs_trans_cancel(tp);
916 if (need_iolock) 912 if (need_iolock)
917 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 913 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
918 return error; 914 return error;
@@ -1037,9 +1033,9 @@ xfs_alloc_file_space(
1037 /* 1033 /*
1038 * Allocate and setup the transaction. 1034 * Allocate and setup the transaction.
1039 */ 1035 */
1040 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1036 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
1041 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 1037 resrtextents, 0, &tp);
1042 resblks, resrtextents); 1038
1043 /* 1039 /*
1044 * Check for running out of space 1040 * Check for running out of space
1045 */ 1041 */
@@ -1048,7 +1044,6 @@ xfs_alloc_file_space(
1048 * Free the transaction structure. 1044 * Free the transaction structure.
1049 */ 1045 */
1050 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1046 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1051 xfs_trans_cancel(tp);
1052 break; 1047 break;
1053 } 1048 }
1054 xfs_ilock(ip, XFS_ILOCK_EXCL); 1049 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1311,18 +1306,10 @@ xfs_free_file_space(
1311 * transaction to dip into the reserve blocks to ensure 1306 * transaction to dip into the reserve blocks to ensure
1312 * the freeing of the space succeeds at ENOSPC. 1307 * the freeing of the space succeeds at ENOSPC.
1313 */ 1308 */
1314 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 1309 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
1315 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); 1310 &tp);
1316
1317 /*
1318 * check for running out of space
1319 */
1320 if (error) { 1311 if (error) {
1321 /*
1322 * Free the transaction structure.
1323 */
1324 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1312 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1325 xfs_trans_cancel(tp);
1326 break; 1313 break;
1327 } 1314 }
1328 xfs_ilock(ip, XFS_ILOCK_EXCL); 1315 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1482,19 +1469,16 @@ xfs_shift_file_space(
1482 } 1469 }
1483 1470
1484 while (!error && !done) { 1471 while (!error && !done) {
1485 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
1486 /* 1472 /*
1487 * We would need to reserve permanent block for transaction. 1473 * We would need to reserve permanent block for transaction.
1488 * This will come into picture when after shifting extent into 1474 * This will come into picture when after shifting extent into
1489 * hole we found that adjacent extents can be merged which 1475 * hole we found that adjacent extents can be merged which
1490 * may lead to freeing of a block during record update. 1476 * may lead to freeing of a block during record update.
1491 */ 1477 */
1492 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 1478 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
1493 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); 1479 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
1494 if (error) { 1480 if (error)
1495 xfs_trans_cancel(tp);
1496 break; 1481 break;
1497 }
1498 1482
1499 xfs_ilock(ip, XFS_ILOCK_EXCL); 1483 xfs_ilock(ip, XFS_ILOCK_EXCL);
1500 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, 1484 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
@@ -1747,12 +1731,9 @@ xfs_swap_extents(
1747 if (error) 1731 if (error)
1748 goto out_unlock; 1732 goto out_unlock;
1749 1733
1750 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); 1734 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
1751 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 1735 if (error)
1752 if (error) {
1753 xfs_trans_cancel(tp);
1754 goto out_unlock; 1736 goto out_unlock;
1755 }
1756 1737
1757 /* 1738 /*
1758 * Lock and join the inodes to the tansaction so that transaction commit 1739 * Lock and join the inodes to the tansaction so that transaction commit
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9a2191b91137..e71cfbd5acb3 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1100,22 +1100,18 @@ xfs_bwrite(
1100 return error; 1100 return error;
1101} 1101}
1102 1102
1103STATIC void 1103static void
1104xfs_buf_bio_end_io( 1104xfs_buf_bio_end_io(
1105 struct bio *bio) 1105 struct bio *bio)
1106{ 1106{
1107 xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; 1107 struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1108 1108
1109 /* 1109 /*
1110 * don't overwrite existing errors - otherwise we can lose errors on 1110 * don't overwrite existing errors - otherwise we can lose errors on
1111 * buffers that require multiple bios to complete. 1111 * buffers that require multiple bios to complete.
1112 */ 1112 */
1113 if (bio->bi_error) { 1113 if (bio->bi_error)
1114 spin_lock(&bp->b_lock); 1114 cmpxchg(&bp->b_io_error, 0, bio->bi_error);
1115 if (!bp->b_io_error)
1116 bp->b_io_error = bio->bi_error;
1117 spin_unlock(&bp->b_lock);
1118 }
1119 1115
1120 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1116 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1121 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1117 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 4eb89bd4ee73..8bfb974f0772 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -183,6 +183,26 @@ typedef struct xfs_buf {
183 unsigned int b_page_count; /* size of page array */ 183 unsigned int b_page_count; /* size of page array */
184 unsigned int b_offset; /* page offset in first page */ 184 unsigned int b_offset; /* page offset in first page */
185 int b_error; /* error code on I/O */ 185 int b_error; /* error code on I/O */
186
187 /*
188 * async write failure retry count. Initialised to zero on the first
189 * failure, then when it exceeds the maximum configured without a
190 * success the write is considered to be failed permanently and the
191 * iodone handler will take appropriate action.
192 *
193 * For retry timeouts, we record the jiffie of the first failure. This
194 * means that we can change the retry timeout for buffers already under
195 * I/O and thus avoid getting stuck in a retry loop with a long timeout.
196 *
197 * last_error is used to ensure that we are getting repeated errors, not
198 * different errors. e.g. a block device might change ENOSPC to EIO when
199 * a failure timeout occurs, so we want to re-initialise the error
200 * retry behaviour appropriately when that happens.
201 */
202 int b_retries;
203 unsigned long b_first_retry_time; /* in jiffies */
204 int b_last_error;
205
186 const struct xfs_buf_ops *b_ops; 206 const struct xfs_buf_ops *b_ops;
187 207
188#ifdef XFS_BUF_LOCK_TRACKING 208#ifdef XFS_BUF_LOCK_TRACKING
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 99e91a0e554e..34257992934c 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1042,35 +1042,22 @@ xfs_buf_do_callbacks(
1042 } 1042 }
1043} 1043}
1044 1044
1045/* 1045static bool
1046 * This is the iodone() function for buffers which have had callbacks 1046xfs_buf_iodone_callback_error(
1047 * attached to them by xfs_buf_attach_iodone(). It should remove each
1048 * log item from the buffer's list and call the callback of each in turn.
1049 * When done, the buffer's fsprivate field is set to NULL and the buffer
1050 * is unlocked with a call to iodone().
1051 */
1052void
1053xfs_buf_iodone_callbacks(
1054 struct xfs_buf *bp) 1047 struct xfs_buf *bp)
1055{ 1048{
1056 struct xfs_log_item *lip = bp->b_fspriv; 1049 struct xfs_log_item *lip = bp->b_fspriv;
1057 struct xfs_mount *mp = lip->li_mountp; 1050 struct xfs_mount *mp = lip->li_mountp;
1058 static ulong lasttime; 1051 static ulong lasttime;
1059 static xfs_buftarg_t *lasttarg; 1052 static xfs_buftarg_t *lasttarg;
1060 1053 struct xfs_error_cfg *cfg;
1061 if (likely(!bp->b_error))
1062 goto do_callbacks;
1063 1054
1064 /* 1055 /*
1065 * If we've already decided to shutdown the filesystem because of 1056 * If we've already decided to shutdown the filesystem because of
1066 * I/O errors, there's no point in giving this a retry. 1057 * I/O errors, there's no point in giving this a retry.
1067 */ 1058 */
1068 if (XFS_FORCED_SHUTDOWN(mp)) { 1059 if (XFS_FORCED_SHUTDOWN(mp))
1069 xfs_buf_stale(bp); 1060 goto out_stale;
1070 bp->b_flags |= XBF_DONE;
1071 trace_xfs_buf_item_iodone(bp, _RET_IP_);
1072 goto do_callbacks;
1073 }
1074 1061
1075 if (bp->b_target != lasttarg || 1062 if (bp->b_target != lasttarg ||
1076 time_after(jiffies, (lasttime + 5*HZ))) { 1063 time_after(jiffies, (lasttime + 5*HZ))) {
@@ -1079,45 +1066,93 @@ xfs_buf_iodone_callbacks(
1079 } 1066 }
1080 lasttarg = bp->b_target; 1067 lasttarg = bp->b_target;
1081 1068
1069 /* synchronous writes will have callers process the error */
1070 if (!(bp->b_flags & XBF_ASYNC))
1071 goto out_stale;
1072
1073 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1074 ASSERT(bp->b_iodone != NULL);
1075
1082 /* 1076 /*
1083 * If the write was asynchronous then no one will be looking for the 1077 * If the write was asynchronous then no one will be looking for the
1084 * error. Clear the error state and write the buffer out again. 1078 * error. If this is the first failure of this type, clear the error
1085 * 1079 * state and write the buffer out again. This means we always retry an
1086 * XXX: This helps against transient write errors, but we need to find 1080 * async write failure at least once, but we also need to set the buffer
1087 * a way to shut the filesystem down if the writes keep failing. 1081 * up to behave correctly now for repeated failures.
1088 *
1089 * In practice we'll shut the filesystem down soon as non-transient
1090 * errors tend to affect the whole device and a failing log write
1091 * will make us give up. But we really ought to do better here.
1092 */ 1082 */
1093 if (bp->b_flags & XBF_ASYNC) { 1083 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) ||
1094 ASSERT(bp->b_iodone != NULL); 1084 bp->b_last_error != bp->b_error) {
1085 bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
1086 XBF_DONE | XBF_WRITE_FAIL);
1087 bp->b_last_error = bp->b_error;
1088 bp->b_retries = 0;
1089 bp->b_first_retry_time = jiffies;
1090
1091 xfs_buf_ioerror(bp, 0);
1092 xfs_buf_submit(bp);
1093 return true;
1094 }
1095 1095
1096 trace_xfs_buf_item_iodone_async(bp, _RET_IP_); 1096 /*
1097 * Repeated failure on an async write. Take action according to the
1098 * error configuration we have been set up to use.
1099 */
1100 cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
1097 1101
1098 xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ 1102 if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
1103 ++bp->b_retries > cfg->max_retries)
1104 goto permanent_error;
1105 if (cfg->retry_timeout &&
1106 time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
1107 goto permanent_error;
1099 1108
1100 if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { 1109 /* At unmount we may treat errors differently */
1101 bp->b_flags |= XBF_WRITE | XBF_ASYNC | 1110 if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
1102 XBF_DONE | XBF_WRITE_FAIL; 1111 goto permanent_error;
1103 xfs_buf_submit(bp);
1104 } else {
1105 xfs_buf_relse(bp);
1106 }
1107 1112
1108 return; 1113 /* still a transient error, higher layers will retry */
1109 } 1114 xfs_buf_ioerror(bp, 0);
1115 xfs_buf_relse(bp);
1116 return true;
1110 1117
1111 /* 1118 /*
1112 * If the write of the buffer was synchronous, we want to make 1119 * Permanent error - we need to trigger a shutdown if we haven't already
1113 * sure to return the error to the caller of xfs_bwrite(). 1120 * to indicate that inconsistency will result from this action.
1114 */ 1121 */
1122permanent_error:
1123 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1124out_stale:
1115 xfs_buf_stale(bp); 1125 xfs_buf_stale(bp);
1116 bp->b_flags |= XBF_DONE; 1126 bp->b_flags |= XBF_DONE;
1117
1118 trace_xfs_buf_error_relse(bp, _RET_IP_); 1127 trace_xfs_buf_error_relse(bp, _RET_IP_);
1128 return false;
1129}
1130
1131/*
1132 * This is the iodone() function for buffers which have had callbacks attached
1133 * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
1134 * callback list, mark the buffer as having no more callbacks and then push the
1135 * buffer through IO completion processing.
1136 */
1137void
1138xfs_buf_iodone_callbacks(
1139 struct xfs_buf *bp)
1140{
1141 /*
1142 * If there is an error, process it. Some errors require us
1143 * to run callbacks after failure processing is done so we
1144 * detect that and take appropriate action.
1145 */
1146 if (bp->b_error && xfs_buf_iodone_callback_error(bp))
1147 return;
1148
1149 /*
1150 * Successful IO or permanent error. Either way, we can clear the
1151 * retry state here in preparation for the next error that may occur.
1152 */
1153 bp->b_last_error = 0;
1154 bp->b_retries = 0;
1119 1155
1120do_callbacks:
1121 xfs_buf_do_callbacks(bp); 1156 xfs_buf_do_callbacks(bp);
1122 bp->b_fspriv = NULL; 1157 bp->b_fspriv = NULL;
1123 bp->b_iodone = NULL; 1158 bp->b_iodone = NULL;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 316b2a1bdba5..e0646659ce16 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -614,11 +614,10 @@ xfs_qm_dqread(
614 trace_xfs_dqread(dqp); 614 trace_xfs_dqread(dqp);
615 615
616 if (flags & XFS_QMOPT_DQALLOC) { 616 if (flags & XFS_QMOPT_DQALLOC) {
617 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 617 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
618 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, 618 XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
619 XFS_QM_DQALLOC_SPACE_RES(mp), 0);
620 if (error) 619 if (error)
621 goto error1; 620 goto error0;
622 } 621 }
623 622
624 /* 623 /*
@@ -692,7 +691,7 @@ error0:
692 * end of the chunk, skip ahead to first id in next allocated chunk 691 * end of the chunk, skip ahead to first id in next allocated chunk
693 * using the SEEK_DATA interface. 692 * using the SEEK_DATA interface.
694 */ 693 */
695int 694static int
696xfs_dq_get_next_id( 695xfs_dq_get_next_id(
697 xfs_mount_t *mp, 696 xfs_mount_t *mp,
698 uint type, 697 uint type,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 85ce3032f815..44af22897c8b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -145,12 +145,10 @@ xfs_update_prealloc_flags(
145 struct xfs_trans *tp; 145 struct xfs_trans *tp;
146 int error; 146 int error;
147 147
148 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); 148 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
149 error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); 149 0, 0, 0, &tp);
150 if (error) { 150 if (error)
151 xfs_trans_cancel(tp);
152 return error; 151 return error;
153 }
154 152
155 xfs_ilock(ip, XFS_ILOCK_EXCL); 153 xfs_ilock(ip, XFS_ILOCK_EXCL);
156 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 154 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ee3aaa0a5317..b4d75825ae37 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -198,14 +198,10 @@ xfs_growfs_data_private(
198 return error; 198 return error;
199 } 199 }
200 200
201 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 201 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
202 tp->t_flags |= XFS_TRANS_RESERVE; 202 XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
203 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, 203 if (error)
204 XFS_GROWFS_SPACE_RES(mp), 0);
205 if (error) {
206 xfs_trans_cancel(tp);
207 return error; 204 return error;
208 }
209 205
210 /* 206 /*
211 * Write new AG headers to disk. Non-transactional, but written 207 * Write new AG headers to disk. Non-transactional, but written
@@ -243,8 +239,8 @@ xfs_growfs_data_private(
243 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); 239 agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
244 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); 240 agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
245 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); 241 agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
246 agf->agf_flfirst = 0; 242 agf->agf_flfirst = cpu_to_be32(1);
247 agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1); 243 agf->agf_fllast = 0;
248 agf->agf_flcount = 0; 244 agf->agf_flcount = 0;
249 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); 245 tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
250 agf->agf_freeblks = cpu_to_be32(tmpsize); 246 agf->agf_freeblks = cpu_to_be32(tmpsize);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index bf2d60749278..99ee6eee5e0b 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -37,9 +37,6 @@
37#include <linux/kthread.h> 37#include <linux/kthread.h>
38#include <linux/freezer.h> 38#include <linux/freezer.h>
39 39
40STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
41 struct xfs_perag *pag, struct xfs_inode *ip);
42
43/* 40/*
44 * Allocate and initialise an xfs_inode. 41 * Allocate and initialise an xfs_inode.
45 */ 42 */
@@ -94,13 +91,6 @@ xfs_inode_free_callback(
94 struct inode *inode = container_of(head, struct inode, i_rcu); 91 struct inode *inode = container_of(head, struct inode, i_rcu);
95 struct xfs_inode *ip = XFS_I(inode); 92 struct xfs_inode *ip = XFS_I(inode);
96 93
97 kmem_zone_free(xfs_inode_zone, ip);
98}
99
100void
101xfs_inode_free(
102 struct xfs_inode *ip)
103{
104 switch (VFS_I(ip)->i_mode & S_IFMT) { 94 switch (VFS_I(ip)->i_mode & S_IFMT) {
105 case S_IFREG: 95 case S_IFREG:
106 case S_IFDIR: 96 case S_IFDIR:
@@ -118,6 +108,25 @@ xfs_inode_free(
118 ip->i_itemp = NULL; 108 ip->i_itemp = NULL;
119 } 109 }
120 110
111 kmem_zone_free(xfs_inode_zone, ip);
112}
113
114static void
115__xfs_inode_free(
116 struct xfs_inode *ip)
117{
118 /* asserts to verify all state is correct here */
119 ASSERT(atomic_read(&ip->i_pincount) == 0);
120 ASSERT(!xfs_isiflocked(ip));
121 XFS_STATS_DEC(ip->i_mount, vn_active);
122
123 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
124}
125
126void
127xfs_inode_free(
128 struct xfs_inode *ip)
129{
121 /* 130 /*
122 * Because we use RCU freeing we need to ensure the inode always 131 * Because we use RCU freeing we need to ensure the inode always
123 * appears to be reclaimed with an invalid inode number when in the 132 * appears to be reclaimed with an invalid inode number when in the
@@ -129,12 +138,123 @@ xfs_inode_free(
129 ip->i_ino = 0; 138 ip->i_ino = 0;
130 spin_unlock(&ip->i_flags_lock); 139 spin_unlock(&ip->i_flags_lock);
131 140
132 /* asserts to verify all state is correct here */ 141 __xfs_inode_free(ip);
133 ASSERT(atomic_read(&ip->i_pincount) == 0); 142}
134 ASSERT(!xfs_isiflocked(ip));
135 XFS_STATS_DEC(ip->i_mount, vn_active);
136 143
137 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 144/*
145 * Queue a new inode reclaim pass if there are reclaimable inodes and there
146 * isn't a reclaim pass already in progress. By default it runs every 5s based
147 * on the xfs periodic sync default of 30s. Perhaps this should have it's own
148 * tunable, but that can be done if this method proves to be ineffective or too
149 * aggressive.
150 */
151static void
152xfs_reclaim_work_queue(
153 struct xfs_mount *mp)
154{
155
156 rcu_read_lock();
157 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
158 queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
159 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
160 }
161 rcu_read_unlock();
162}
163
164/*
165 * This is a fast pass over the inode cache to try to get reclaim moving on as
166 * many inodes as possible in a short period of time. It kicks itself every few
167 * seconds, as well as being kicked by the inode cache shrinker when memory
168 * goes low. It scans as quickly as possible avoiding locked inodes or those
169 * already being flushed, and once done schedules a future pass.
170 */
171void
172xfs_reclaim_worker(
173 struct work_struct *work)
174{
175 struct xfs_mount *mp = container_of(to_delayed_work(work),
176 struct xfs_mount, m_reclaim_work);
177
178 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
179 xfs_reclaim_work_queue(mp);
180}
181
182static void
183xfs_perag_set_reclaim_tag(
184 struct xfs_perag *pag)
185{
186 struct xfs_mount *mp = pag->pag_mount;
187
188 ASSERT(spin_is_locked(&pag->pag_ici_lock));
189 if (pag->pag_ici_reclaimable++)
190 return;
191
192 /* propagate the reclaim tag up into the perag radix tree */
193 spin_lock(&mp->m_perag_lock);
194 radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno,
195 XFS_ICI_RECLAIM_TAG);
196 spin_unlock(&mp->m_perag_lock);
197
198 /* schedule periodic background inode reclaim */
199 xfs_reclaim_work_queue(mp);
200
201 trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
202}
203
204static void
205xfs_perag_clear_reclaim_tag(
206 struct xfs_perag *pag)
207{
208 struct xfs_mount *mp = pag->pag_mount;
209
210 ASSERT(spin_is_locked(&pag->pag_ici_lock));
211 if (--pag->pag_ici_reclaimable)
212 return;
213
214 /* clear the reclaim tag from the perag radix tree */
215 spin_lock(&mp->m_perag_lock);
216 radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno,
217 XFS_ICI_RECLAIM_TAG);
218 spin_unlock(&mp->m_perag_lock);
219 trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
220}
221
222
223/*
224 * We set the inode flag atomically with the radix tree tag.
225 * Once we get tag lookups on the radix tree, this inode flag
226 * can go away.
227 */
228void
229xfs_inode_set_reclaim_tag(
230 struct xfs_inode *ip)
231{
232 struct xfs_mount *mp = ip->i_mount;
233 struct xfs_perag *pag;
234
235 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
236 spin_lock(&pag->pag_ici_lock);
237 spin_lock(&ip->i_flags_lock);
238
239 radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
240 XFS_ICI_RECLAIM_TAG);
241 xfs_perag_set_reclaim_tag(pag);
242 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
243
244 spin_unlock(&ip->i_flags_lock);
245 spin_unlock(&pag->pag_ici_lock);
246 xfs_perag_put(pag);
247}
248
249STATIC void
250xfs_inode_clear_reclaim_tag(
251 struct xfs_perag *pag,
252 xfs_ino_t ino)
253{
254 radix_tree_tag_clear(&pag->pag_ici_root,
255 XFS_INO_TO_AGINO(pag->pag_mount, ino),
256 XFS_ICI_RECLAIM_TAG);
257 xfs_perag_clear_reclaim_tag(pag);
138} 258}
139 259
140/* 260/*
@@ -264,7 +384,7 @@ xfs_iget_cache_hit(
264 */ 384 */
265 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 385 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
266 ip->i_flags |= XFS_INEW; 386 ip->i_flags |= XFS_INEW;
267 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 387 xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
268 inode->i_state = I_NEW; 388 inode->i_state = I_NEW;
269 389
270 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 390 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
@@ -723,121 +843,6 @@ xfs_inode_ag_iterator_tag(
723} 843}
724 844
725/* 845/*
726 * Queue a new inode reclaim pass if there are reclaimable inodes and there
727 * isn't a reclaim pass already in progress. By default it runs every 5s based
728 * on the xfs periodic sync default of 30s. Perhaps this should have it's own
729 * tunable, but that can be done if this method proves to be ineffective or too
730 * aggressive.
731 */
732static void
733xfs_reclaim_work_queue(
734 struct xfs_mount *mp)
735{
736
737 rcu_read_lock();
738 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
739 queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
740 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
741 }
742 rcu_read_unlock();
743}
744
745/*
746 * This is a fast pass over the inode cache to try to get reclaim moving on as
747 * many inodes as possible in a short period of time. It kicks itself every few
748 * seconds, as well as being kicked by the inode cache shrinker when memory
749 * goes low. It scans as quickly as possible avoiding locked inodes or those
750 * already being flushed, and once done schedules a future pass.
751 */
752void
753xfs_reclaim_worker(
754 struct work_struct *work)
755{
756 struct xfs_mount *mp = container_of(to_delayed_work(work),
757 struct xfs_mount, m_reclaim_work);
758
759 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
760 xfs_reclaim_work_queue(mp);
761}
762
763static void
764__xfs_inode_set_reclaim_tag(
765 struct xfs_perag *pag,
766 struct xfs_inode *ip)
767{
768 radix_tree_tag_set(&pag->pag_ici_root,
769 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
770 XFS_ICI_RECLAIM_TAG);
771
772 if (!pag->pag_ici_reclaimable) {
773 /* propagate the reclaim tag up into the perag radix tree */
774 spin_lock(&ip->i_mount->m_perag_lock);
775 radix_tree_tag_set(&ip->i_mount->m_perag_tree,
776 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
777 XFS_ICI_RECLAIM_TAG);
778 spin_unlock(&ip->i_mount->m_perag_lock);
779
780 /* schedule periodic background inode reclaim */
781 xfs_reclaim_work_queue(ip->i_mount);
782
783 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
784 -1, _RET_IP_);
785 }
786 pag->pag_ici_reclaimable++;
787}
788
789/*
790 * We set the inode flag atomically with the radix tree tag.
791 * Once we get tag lookups on the radix tree, this inode flag
792 * can go away.
793 */
794void
795xfs_inode_set_reclaim_tag(
796 xfs_inode_t *ip)
797{
798 struct xfs_mount *mp = ip->i_mount;
799 struct xfs_perag *pag;
800
801 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
802 spin_lock(&pag->pag_ici_lock);
803 spin_lock(&ip->i_flags_lock);
804 __xfs_inode_set_reclaim_tag(pag, ip);
805 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
806 spin_unlock(&ip->i_flags_lock);
807 spin_unlock(&pag->pag_ici_lock);
808 xfs_perag_put(pag);
809}
810
811STATIC void
812__xfs_inode_clear_reclaim(
813 xfs_perag_t *pag,
814 xfs_inode_t *ip)
815{
816 pag->pag_ici_reclaimable--;
817 if (!pag->pag_ici_reclaimable) {
818 /* clear the reclaim tag from the perag radix tree */
819 spin_lock(&ip->i_mount->m_perag_lock);
820 radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
821 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
822 XFS_ICI_RECLAIM_TAG);
823 spin_unlock(&ip->i_mount->m_perag_lock);
824 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
825 -1, _RET_IP_);
826 }
827}
828
829STATIC void
830__xfs_inode_clear_reclaim_tag(
831 xfs_mount_t *mp,
832 xfs_perag_t *pag,
833 xfs_inode_t *ip)
834{
835 radix_tree_tag_clear(&pag->pag_ici_root,
836 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
837 __xfs_inode_clear_reclaim(pag, ip);
838}
839
840/*
841 * Grab the inode for reclaim exclusively. 846 * Grab the inode for reclaim exclusively.
842 * Return 0 if we grabbed it, non-zero otherwise. 847 * Return 0 if we grabbed it, non-zero otherwise.
843 */ 848 */
@@ -929,6 +934,7 @@ xfs_reclaim_inode(
929 int sync_mode) 934 int sync_mode)
930{ 935{
931 struct xfs_buf *bp = NULL; 936 struct xfs_buf *bp = NULL;
937 xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */
932 int error; 938 int error;
933 939
934restart: 940restart:
@@ -993,6 +999,22 @@ restart:
993 999
994 xfs_iflock(ip); 1000 xfs_iflock(ip);
995reclaim: 1001reclaim:
1002 /*
1003 * Because we use RCU freeing we need to ensure the inode always appears
1004 * to be reclaimed with an invalid inode number when in the free state.
1005 * We do this as early as possible under the ILOCK and flush lock so
1006 * that xfs_iflush_cluster() can be guaranteed to detect races with us
1007 * here. By doing this, we guarantee that once xfs_iflush_cluster has
1008 * locked both the XFS_ILOCK and the flush lock that it will see either
1009 * a valid, flushable inode that will serialise correctly against the
1010 * locks below, or it will see a clean (and invalid) inode that it can
1011 * skip.
1012 */
1013 spin_lock(&ip->i_flags_lock);
1014 ip->i_flags = XFS_IRECLAIM;
1015 ip->i_ino = 0;
1016 spin_unlock(&ip->i_flags_lock);
1017
996 xfs_ifunlock(ip); 1018 xfs_ifunlock(ip);
997 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1019 xfs_iunlock(ip, XFS_ILOCK_EXCL);
998 1020
@@ -1006,9 +1028,9 @@ reclaim:
1006 */ 1028 */
1007 spin_lock(&pag->pag_ici_lock); 1029 spin_lock(&pag->pag_ici_lock);
1008 if (!radix_tree_delete(&pag->pag_ici_root, 1030 if (!radix_tree_delete(&pag->pag_ici_root,
1009 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 1031 XFS_INO_TO_AGINO(ip->i_mount, ino)))
1010 ASSERT(0); 1032 ASSERT(0);
1011 __xfs_inode_clear_reclaim(pag, ip); 1033 xfs_perag_clear_reclaim_tag(pag);
1012 spin_unlock(&pag->pag_ici_lock); 1034 spin_unlock(&pag->pag_ici_lock);
1013 1035
1014 /* 1036 /*
@@ -1023,7 +1045,7 @@ reclaim:
1023 xfs_qm_dqdetach(ip); 1045 xfs_qm_dqdetach(ip);
1024 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1046 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1025 1047
1026 xfs_inode_free(ip); 1048 __xfs_inode_free(ip);
1027 return error; 1049 return error;
1028 1050
1029out_ifunlock: 1051out_ifunlock:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 96f606deee31..ee6799e0476f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1030,7 +1030,7 @@ xfs_dir_ialloc(
1030 tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); 1030 tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
1031 } 1031 }
1032 1032
1033 code = xfs_trans_roll(&tp, 0); 1033 code = xfs_trans_roll(&tp, NULL);
1034 if (committed != NULL) 1034 if (committed != NULL)
1035 *committed = 1; 1035 *committed = 1;
1036 1036
@@ -1161,11 +1161,9 @@ xfs_create(
1161 rdev = 0; 1161 rdev = 0;
1162 resblks = XFS_MKDIR_SPACE_RES(mp, name->len); 1162 resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
1163 tres = &M_RES(mp)->tr_mkdir; 1163 tres = &M_RES(mp)->tr_mkdir;
1164 tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
1165 } else { 1164 } else {
1166 resblks = XFS_CREATE_SPACE_RES(mp, name->len); 1165 resblks = XFS_CREATE_SPACE_RES(mp, name->len);
1167 tres = &M_RES(mp)->tr_create; 1166 tres = &M_RES(mp)->tr_create;
1168 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1169 } 1167 }
1170 1168
1171 /* 1169 /*
@@ -1174,20 +1172,19 @@ xfs_create(
1174 * the case we'll drop the one we have and get a more 1172 * the case we'll drop the one we have and get a more
1175 * appropriate transaction later. 1173 * appropriate transaction later.
1176 */ 1174 */
1177 error = xfs_trans_reserve(tp, tres, resblks, 0); 1175 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
1178 if (error == -ENOSPC) { 1176 if (error == -ENOSPC) {
1179 /* flush outstanding delalloc blocks and retry */ 1177 /* flush outstanding delalloc blocks and retry */
1180 xfs_flush_inodes(mp); 1178 xfs_flush_inodes(mp);
1181 error = xfs_trans_reserve(tp, tres, resblks, 0); 1179 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
1182 } 1180 }
1183 if (error == -ENOSPC) { 1181 if (error == -ENOSPC) {
1184 /* No space at all so try a "no-allocation" reservation */ 1182 /* No space at all so try a "no-allocation" reservation */
1185 resblks = 0; 1183 resblks = 0;
1186 error = xfs_trans_reserve(tp, tres, 0, 0); 1184 error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
1187 } 1185 }
1188 if (error) 1186 if (error)
1189 goto out_trans_cancel; 1187 goto out_release_inode;
1190
1191 1188
1192 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | 1189 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
1193 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); 1190 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
@@ -1337,17 +1334,16 @@ xfs_create_tmpfile(
1337 return error; 1334 return error;
1338 1335
1339 resblks = XFS_IALLOC_SPACE_RES(mp); 1336 resblks = XFS_IALLOC_SPACE_RES(mp);
1340 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
1341
1342 tres = &M_RES(mp)->tr_create_tmpfile; 1337 tres = &M_RES(mp)->tr_create_tmpfile;
1343 error = xfs_trans_reserve(tp, tres, resblks, 0); 1338
1339 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
1344 if (error == -ENOSPC) { 1340 if (error == -ENOSPC) {
1345 /* No space at all so try a "no-allocation" reservation */ 1341 /* No space at all so try a "no-allocation" reservation */
1346 resblks = 0; 1342 resblks = 0;
1347 error = xfs_trans_reserve(tp, tres, 0, 0); 1343 error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
1348 } 1344 }
1349 if (error) 1345 if (error)
1350 goto out_trans_cancel; 1346 goto out_release_inode;
1351 1347
1352 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, 1348 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
1353 pdqp, resblks, 1, 0); 1349 pdqp, resblks, 1, 0);
@@ -1432,15 +1428,14 @@ xfs_link(
1432 if (error) 1428 if (error)
1433 goto std_return; 1429 goto std_return;
1434 1430
1435 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
1436 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 1431 resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
1437 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); 1432 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
1438 if (error == -ENOSPC) { 1433 if (error == -ENOSPC) {
1439 resblks = 0; 1434 resblks = 0;
1440 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); 1435 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
1441 } 1436 }
1442 if (error) 1437 if (error)
1443 goto error_return; 1438 goto std_return;
1444 1439
1445 xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 1440 xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
1446 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1441 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
@@ -1710,11 +1705,9 @@ xfs_inactive_truncate(
1710 struct xfs_trans *tp; 1705 struct xfs_trans *tp;
1711 int error; 1706 int error;
1712 1707
1713 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 1708 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
1714 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
1715 if (error) { 1709 if (error) {
1716 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1710 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1717 xfs_trans_cancel(tp);
1718 return error; 1711 return error;
1719 } 1712 }
1720 1713
@@ -1764,8 +1757,6 @@ xfs_inactive_ifree(
1764 struct xfs_trans *tp; 1757 struct xfs_trans *tp;
1765 int error; 1758 int error;
1766 1759
1767 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1768
1769 /* 1760 /*
1770 * The ifree transaction might need to allocate blocks for record 1761 * The ifree transaction might need to allocate blocks for record
1771 * insertion to the finobt. We don't want to fail here at ENOSPC, so 1762 * insertion to the finobt. We don't want to fail here at ENOSPC, so
@@ -1781,9 +1772,8 @@ xfs_inactive_ifree(
1781 * now remains allocated and sits on the unlinked list until the fs is 1772 * now remains allocated and sits on the unlinked list until the fs is
1782 * repaired. 1773 * repaired.
1783 */ 1774 */
1784 tp->t_flags |= XFS_TRANS_RESERVE; 1775 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
1785 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 1776 XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
1786 XFS_IFREE_SPACE_RES(mp), 0);
1787 if (error) { 1777 if (error) {
1788 if (error == -ENOSPC) { 1778 if (error == -ENOSPC) {
1789 xfs_warn_ratelimited(mp, 1779 xfs_warn_ratelimited(mp,
@@ -1792,7 +1782,6 @@ xfs_inactive_ifree(
1792 } else { 1782 } else {
1793 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1783 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1794 } 1784 }
1795 xfs_trans_cancel(tp);
1796 return error; 1785 return error;
1797 } 1786 }
1798 1787
@@ -2525,11 +2514,6 @@ xfs_remove(
2525 if (error) 2514 if (error)
2526 goto std_return; 2515 goto std_return;
2527 2516
2528 if (is_dir)
2529 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
2530 else
2531 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
2532
2533 /* 2517 /*
2534 * We try to get the real space reservation first, 2518 * We try to get the real space reservation first,
2535 * allowing for directory btree deletion(s) implying 2519 * allowing for directory btree deletion(s) implying
@@ -2540,14 +2524,15 @@ xfs_remove(
2540 * block from the directory. 2524 * block from the directory.
2541 */ 2525 */
2542 resblks = XFS_REMOVE_SPACE_RES(mp); 2526 resblks = XFS_REMOVE_SPACE_RES(mp);
2543 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0); 2527 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
2544 if (error == -ENOSPC) { 2528 if (error == -ENOSPC) {
2545 resblks = 0; 2529 resblks = 0;
2546 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0); 2530 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
2531 &tp);
2547 } 2532 }
2548 if (error) { 2533 if (error) {
2549 ASSERT(error != -ENOSPC); 2534 ASSERT(error != -ENOSPC);
2550 goto out_trans_cancel; 2535 goto std_return;
2551 } 2536 }
2552 2537
2553 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); 2538 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
@@ -2855,6 +2840,7 @@ xfs_rename_alloc_whiteout(
2855 * and flag it as linkable. 2840 * and flag it as linkable.
2856 */ 2841 */
2857 drop_nlink(VFS_I(tmpfile)); 2842 drop_nlink(VFS_I(tmpfile));
2843 xfs_setup_iops(tmpfile);
2858 xfs_finish_inode_setup(tmpfile); 2844 xfs_finish_inode_setup(tmpfile);
2859 VFS_I(tmpfile)->i_state |= I_LINKABLE; 2845 VFS_I(tmpfile)->i_state |= I_LINKABLE;
2860 2846
@@ -2910,15 +2896,15 @@ xfs_rename(
2910 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, 2896 xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
2911 inodes, &num_inodes); 2897 inodes, &num_inodes);
2912 2898
2913 tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
2914 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); 2899 spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
2915 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); 2900 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
2916 if (error == -ENOSPC) { 2901 if (error == -ENOSPC) {
2917 spaceres = 0; 2902 spaceres = 0;
2918 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); 2903 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
2904 &tp);
2919 } 2905 }
2920 if (error) 2906 if (error)
2921 goto out_trans_cancel; 2907 goto out_release_wip;
2922 2908
2923 /* 2909 /*
2924 * Attach the dquots to the inodes 2910 * Attach the dquots to the inodes
@@ -3155,6 +3141,7 @@ out_bmap_cancel:
3155 xfs_bmap_cancel(&free_list); 3141 xfs_bmap_cancel(&free_list);
3156out_trans_cancel: 3142out_trans_cancel:
3157 xfs_trans_cancel(tp); 3143 xfs_trans_cancel(tp);
3144out_release_wip:
3158 if (wip) 3145 if (wip)
3159 IRELE(wip); 3146 IRELE(wip);
3160 return error; 3147 return error;
@@ -3162,16 +3149,16 @@ out_trans_cancel:
3162 3149
3163STATIC int 3150STATIC int
3164xfs_iflush_cluster( 3151xfs_iflush_cluster(
3165 xfs_inode_t *ip, 3152 struct xfs_inode *ip,
3166 xfs_buf_t *bp) 3153 struct xfs_buf *bp)
3167{ 3154{
3168 xfs_mount_t *mp = ip->i_mount; 3155 struct xfs_mount *mp = ip->i_mount;
3169 struct xfs_perag *pag; 3156 struct xfs_perag *pag;
3170 unsigned long first_index, mask; 3157 unsigned long first_index, mask;
3171 unsigned long inodes_per_cluster; 3158 unsigned long inodes_per_cluster;
3172 int ilist_size; 3159 int cilist_size;
3173 xfs_inode_t **ilist; 3160 struct xfs_inode **cilist;
3174 xfs_inode_t *iq; 3161 struct xfs_inode *cip;
3175 int nr_found; 3162 int nr_found;
3176 int clcount = 0; 3163 int clcount = 0;
3177 int bufwasdelwri; 3164 int bufwasdelwri;
@@ -3180,23 +3167,23 @@ xfs_iflush_cluster(
3180 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 3167 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
3181 3168
3182 inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 3169 inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
3183 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 3170 cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
3184 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 3171 cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
3185 if (!ilist) 3172 if (!cilist)
3186 goto out_put; 3173 goto out_put;
3187 3174
3188 mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); 3175 mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
3189 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 3176 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
3190 rcu_read_lock(); 3177 rcu_read_lock();
3191 /* really need a gang lookup range call here */ 3178 /* really need a gang lookup range call here */
3192 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 3179 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
3193 first_index, inodes_per_cluster); 3180 first_index, inodes_per_cluster);
3194 if (nr_found == 0) 3181 if (nr_found == 0)
3195 goto out_free; 3182 goto out_free;
3196 3183
3197 for (i = 0; i < nr_found; i++) { 3184 for (i = 0; i < nr_found; i++) {
3198 iq = ilist[i]; 3185 cip = cilist[i];
3199 if (iq == ip) 3186 if (cip == ip)
3200 continue; 3187 continue;
3201 3188
3202 /* 3189 /*
@@ -3205,20 +3192,30 @@ xfs_iflush_cluster(
3205 * We need to check under the i_flags_lock for a valid inode 3192 * We need to check under the i_flags_lock for a valid inode
3206 * here. Skip it if it is not valid or the wrong inode. 3193 * here. Skip it if it is not valid or the wrong inode.
3207 */ 3194 */
3208 spin_lock(&ip->i_flags_lock); 3195 spin_lock(&cip->i_flags_lock);
3209 if (!ip->i_ino || 3196 if (!cip->i_ino ||
3210 (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 3197 __xfs_iflags_test(cip, XFS_ISTALE)) {
3211 spin_unlock(&ip->i_flags_lock); 3198 spin_unlock(&cip->i_flags_lock);
3212 continue; 3199 continue;
3213 } 3200 }
3214 spin_unlock(&ip->i_flags_lock); 3201
3202 /*
3203 * Once we fall off the end of the cluster, no point checking
3204 * any more inodes in the list because they will also all be
3205 * outside the cluster.
3206 */
3207 if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
3208 spin_unlock(&cip->i_flags_lock);
3209 break;
3210 }
3211 spin_unlock(&cip->i_flags_lock);
3215 3212
3216 /* 3213 /*
3217 * Do an un-protected check to see if the inode is dirty and 3214 * Do an un-protected check to see if the inode is dirty and
3218 * is a candidate for flushing. These checks will be repeated 3215 * is a candidate for flushing. These checks will be repeated
3219 * later after the appropriate locks are acquired. 3216 * later after the appropriate locks are acquired.
3220 */ 3217 */
3221 if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 3218 if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
3222 continue; 3219 continue;
3223 3220
3224 /* 3221 /*
@@ -3226,15 +3223,28 @@ xfs_iflush_cluster(
3226 * then this inode cannot be flushed and is skipped. 3223 * then this inode cannot be flushed and is skipped.
3227 */ 3224 */
3228 3225
3229 if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 3226 if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
3227 continue;
3228 if (!xfs_iflock_nowait(cip)) {
3229 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3230 continue; 3230 continue;
3231 if (!xfs_iflock_nowait(iq)) { 3231 }
3232 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3232 if (xfs_ipincount(cip)) {
3233 xfs_ifunlock(cip);
3234 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3233 continue; 3235 continue;
3234 } 3236 }
3235 if (xfs_ipincount(iq)) { 3237
3236 xfs_ifunlock(iq); 3238
3237 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3239 /*
3240 * Check the inode number again, just to be certain we are not
3241 * racing with freeing in xfs_reclaim_inode(). See the comments
3242 * in that function for more information as to why the initial
3243 * check is not sufficient.
3244 */
3245 if (!cip->i_ino) {
3246 xfs_ifunlock(cip);
3247 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3238 continue; 3248 continue;
3239 } 3249 }
3240 3250
@@ -3242,18 +3252,18 @@ xfs_iflush_cluster(
3242 * arriving here means that this inode can be flushed. First 3252 * arriving here means that this inode can be flushed. First
3243 * re-check that it's dirty before flushing. 3253 * re-check that it's dirty before flushing.
3244 */ 3254 */
3245 if (!xfs_inode_clean(iq)) { 3255 if (!xfs_inode_clean(cip)) {
3246 int error; 3256 int error;
3247 error = xfs_iflush_int(iq, bp); 3257 error = xfs_iflush_int(cip, bp);
3248 if (error) { 3258 if (error) {
3249 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3259 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3250 goto cluster_corrupt_out; 3260 goto cluster_corrupt_out;
3251 } 3261 }
3252 clcount++; 3262 clcount++;
3253 } else { 3263 } else {
3254 xfs_ifunlock(iq); 3264 xfs_ifunlock(cip);
3255 } 3265 }
3256 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3266 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3257 } 3267 }
3258 3268
3259 if (clcount) { 3269 if (clcount) {
@@ -3263,7 +3273,7 @@ xfs_iflush_cluster(
3263 3273
3264out_free: 3274out_free:
3265 rcu_read_unlock(); 3275 rcu_read_unlock();
3266 kmem_free(ilist); 3276 kmem_free(cilist);
3267out_put: 3277out_put:
3268 xfs_perag_put(pag); 3278 xfs_perag_put(pag);
3269 return 0; 3279 return 0;
@@ -3306,8 +3316,8 @@ cluster_corrupt_out:
3306 /* 3316 /*
3307 * Unlocks the flush lock 3317 * Unlocks the flush lock
3308 */ 3318 */
3309 xfs_iflush_abort(iq, false); 3319 xfs_iflush_abort(cip, false);
3310 kmem_free(ilist); 3320 kmem_free(cilist);
3311 xfs_perag_put(pag); 3321 xfs_perag_put(pag);
3312 return -EFSCORRUPTED; 3322 return -EFSCORRUPTED;
3313} 3323}
@@ -3327,7 +3337,7 @@ xfs_iflush(
3327 struct xfs_buf **bpp) 3337 struct xfs_buf **bpp)
3328{ 3338{
3329 struct xfs_mount *mp = ip->i_mount; 3339 struct xfs_mount *mp = ip->i_mount;
3330 struct xfs_buf *bp; 3340 struct xfs_buf *bp = NULL;
3331 struct xfs_dinode *dip; 3341 struct xfs_dinode *dip;
3332 int error; 3342 int error;
3333 3343
@@ -3369,14 +3379,22 @@ xfs_iflush(
3369 } 3379 }
3370 3380
3371 /* 3381 /*
3372 * Get the buffer containing the on-disk inode. 3382 * Get the buffer containing the on-disk inode. We are doing a try-lock
3383 * operation here, so we may get an EAGAIN error. In that case, we
3384 * simply want to return with the inode still dirty.
3385 *
3386 * If we get any other error, we effectively have a corruption situation
3387 * and we cannot flush the inode, so we treat it the same as failing
3388 * xfs_iflush_int().
3373 */ 3389 */
3374 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 3390 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
3375 0); 3391 0);
3376 if (error || !bp) { 3392 if (error == -EAGAIN) {
3377 xfs_ifunlock(ip); 3393 xfs_ifunlock(ip);
3378 return error; 3394 return error;
3379 } 3395 }
3396 if (error)
3397 goto corrupt_out;
3380 3398
3381 /* 3399 /*
3382 * First flush out the inode that xfs_iflush was called with. 3400 * First flush out the inode that xfs_iflush was called with.
@@ -3404,7 +3422,8 @@ xfs_iflush(
3404 return 0; 3422 return 0;
3405 3423
3406corrupt_out: 3424corrupt_out:
3407 xfs_buf_relse(bp); 3425 if (bp)
3426 xfs_buf_relse(bp);
3408 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3427 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3409cluster_corrupt_out: 3428cluster_corrupt_out:
3410 error = -EFSCORRUPTED; 3429 error = -EFSCORRUPTED;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 43e1d51b15eb..e52d7c7aeb5b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -440,6 +440,9 @@ loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
440 440
441 441
442/* from xfs_iops.c */ 442/* from xfs_iops.c */
443extern void xfs_setup_inode(struct xfs_inode *ip);
444extern void xfs_setup_iops(struct xfs_inode *ip);
445
443/* 446/*
444 * When setting up a newly allocated inode, we need to call 447 * When setting up a newly allocated inode, we need to call
445 * xfs_finish_inode_setup() once the inode is fully instantiated at 448 * xfs_finish_inode_setup() once the inode is fully instantiated at
@@ -447,7 +450,6 @@ loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
447 * before we've completed instantiation. Otherwise we can do it 450 * before we've completed instantiation. Otherwise we can do it
448 * the moment the inode lookup is complete. 451 * the moment the inode lookup is complete.
449 */ 452 */
450extern void xfs_setup_inode(struct xfs_inode *ip);
451static inline void xfs_finish_inode_setup(struct xfs_inode *ip) 453static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
452{ 454{
453 xfs_iflags_clear(ip, XFS_INEW); 455 xfs_iflags_clear(ip, XFS_INEW);
@@ -458,6 +460,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
458static inline void xfs_setup_existing_inode(struct xfs_inode *ip) 460static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
459{ 461{
460 xfs_setup_inode(ip); 462 xfs_setup_inode(ip);
463 xfs_setup_iops(ip);
461 xfs_finish_inode_setup(ip); 464 xfs_finish_inode_setup(ip);
462} 465}
463 466
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c48b5b18d771..a1b07612224c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -210,7 +210,7 @@ xfs_inode_item_format_data_fork(
210 */ 210 */
211 data_bytes = roundup(ip->i_df.if_bytes, 4); 211 data_bytes = roundup(ip->i_df.if_bytes, 4);
212 ASSERT(ip->i_df.if_real_bytes == 0 || 212 ASSERT(ip->i_df.if_real_bytes == 0 ||
213 ip->i_df.if_real_bytes == data_bytes); 213 ip->i_df.if_real_bytes >= data_bytes);
214 ASSERT(ip->i_df.if_u1.if_data != NULL); 214 ASSERT(ip->i_df.if_u1.if_data != NULL);
215 ASSERT(ip->i_d.di_size > 0); 215 ASSERT(ip->i_d.di_size > 0);
216 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, 216 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
@@ -305,7 +305,7 @@ xfs_inode_item_format_attr_fork(
305 */ 305 */
306 data_bytes = roundup(ip->i_afp->if_bytes, 4); 306 data_bytes = roundup(ip->i_afp->if_bytes, 4);
307 ASSERT(ip->i_afp->if_real_bytes == 0 || 307 ASSERT(ip->i_afp->if_real_bytes == 0 ||
308 ip->i_afp->if_real_bytes == data_bytes); 308 ip->i_afp->if_real_bytes >= data_bytes);
309 ASSERT(ip->i_afp->if_u1.if_data != NULL); 309 ASSERT(ip->i_afp->if_u1.if_data != NULL);
310 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, 310 xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
311 ip->i_afp->if_u1.if_data, 311 ip->i_afp->if_u1.if_data,
@@ -479,6 +479,8 @@ STATIC uint
479xfs_inode_item_push( 479xfs_inode_item_push(
480 struct xfs_log_item *lip, 480 struct xfs_log_item *lip,
481 struct list_head *buffer_list) 481 struct list_head *buffer_list)
482 __releases(&lip->li_ailp->xa_lock)
483 __acquires(&lip->li_ailp->xa_lock)
482{ 484{
483 struct xfs_inode_log_item *iip = INODE_ITEM(lip); 485 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
484 struct xfs_inode *ip = iip->ili_inode; 486 struct xfs_inode *ip = iip->ili_inode;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index bcb6c19ce3ea..dbca7375deef 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -277,7 +277,6 @@ xfs_readlink_by_handle(
277{ 277{
278 struct dentry *dentry; 278 struct dentry *dentry;
279 __u32 olen; 279 __u32 olen;
280 void *link;
281 int error; 280 int error;
282 281
283 if (!capable(CAP_SYS_ADMIN)) 282 if (!capable(CAP_SYS_ADMIN))
@@ -288,7 +287,7 @@ xfs_readlink_by_handle(
288 return PTR_ERR(dentry); 287 return PTR_ERR(dentry);
289 288
290 /* Restrict this handle operation to symlinks only. */ 289 /* Restrict this handle operation to symlinks only. */
291 if (!d_is_symlink(dentry)) { 290 if (!d_inode(dentry)->i_op->readlink) {
292 error = -EINVAL; 291 error = -EINVAL;
293 goto out_dput; 292 goto out_dput;
294 } 293 }
@@ -298,21 +297,8 @@ xfs_readlink_by_handle(
298 goto out_dput; 297 goto out_dput;
299 } 298 }
300 299
301 link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); 300 error = d_inode(dentry)->i_op->readlink(dentry, hreq->ohandle, olen);
302 if (!link) {
303 error = -ENOMEM;
304 goto out_dput;
305 }
306
307 error = xfs_readlink(XFS_I(d_inode(dentry)), link);
308 if (error)
309 goto out_kfree;
310 error = readlink_copy(hreq->ohandle, olen, link);
311 if (error)
312 goto out_kfree;
313 301
314 out_kfree:
315 kfree(link);
316 out_dput: 302 out_dput:
317 dput(dentry); 303 dput(dentry);
318 return error; 304 return error;
@@ -334,12 +320,10 @@ xfs_set_dmattrs(
334 if (XFS_FORCED_SHUTDOWN(mp)) 320 if (XFS_FORCED_SHUTDOWN(mp))
335 return -EIO; 321 return -EIO;
336 322
337 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 323 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
338 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 324 if (error)
339 if (error) {
340 xfs_trans_cancel(tp);
341 return error; 325 return error;
342 } 326
343 xfs_ilock(ip, XFS_ILOCK_EXCL); 327 xfs_ilock(ip, XFS_ILOCK_EXCL);
344 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 328 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
345 329
@@ -1141,10 +1125,9 @@ xfs_ioctl_setattr_get_trans(
1141 if (XFS_FORCED_SHUTDOWN(mp)) 1125 if (XFS_FORCED_SHUTDOWN(mp))
1142 goto out_unlock; 1126 goto out_unlock;
1143 1127
1144 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 1128 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
1145 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
1146 if (error) 1129 if (error)
1147 goto out_cancel; 1130 return ERR_PTR(error);
1148 1131
1149 xfs_ilock(ip, XFS_ILOCK_EXCL); 1132 xfs_ilock(ip, XFS_ILOCK_EXCL);
1150 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags); 1133 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d81bdc080370..58391355a44d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -132,6 +132,7 @@ xfs_iomap_write_direct(
132 int error; 132 int error;
133 int lockmode; 133 int lockmode;
134 int bmapi_flags = XFS_BMAPI_PREALLOC; 134 int bmapi_flags = XFS_BMAPI_PREALLOC;
135 uint tflags = 0;
135 136
136 rt = XFS_IS_REALTIME_INODE(ip); 137 rt = XFS_IS_REALTIME_INODE(ip);
137 extsz = xfs_get_extsz_hint(ip); 138 extsz = xfs_get_extsz_hint(ip);
@@ -192,11 +193,6 @@ xfs_iomap_write_direct(
192 return error; 193 return error;
193 194
194 /* 195 /*
195 * Allocate and setup the transaction
196 */
197 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
198
199 /*
200 * For DAX, we do not allocate unwritten extents, but instead we zero 196 * For DAX, we do not allocate unwritten extents, but instead we zero
201 * the block before we commit the transaction. Ideally we'd like to do 197 * the block before we commit the transaction. Ideally we'd like to do
202 * this outside the transaction context, but if we commit and then crash 198 * this outside the transaction context, but if we commit and then crash
@@ -209,23 +205,17 @@ xfs_iomap_write_direct(
209 * the reserve block pool for bmbt block allocation if there is no space 205 * the reserve block pool for bmbt block allocation if there is no space
210 * left but we need to do unwritten extent conversion. 206 * left but we need to do unwritten extent conversion.
211 */ 207 */
212
213 if (IS_DAX(VFS_I(ip))) { 208 if (IS_DAX(VFS_I(ip))) {
214 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 209 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
215 if (ISUNWRITTEN(imap)) { 210 if (ISUNWRITTEN(imap)) {
216 tp->t_flags |= XFS_TRANS_RESERVE; 211 tflags |= XFS_TRANS_RESERVE;
217 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 212 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
218 } 213 }
219 } 214 }
220 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 215 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents,
221 resblks, resrtextents); 216 tflags, &tp);
222 /* 217 if (error)
223 * Check for running out of space, note: need lock to return
224 */
225 if (error) {
226 xfs_trans_cancel(tp);
227 return error; 218 return error;
228 }
229 219
230 lockmode = XFS_ILOCK_EXCL; 220 lockmode = XFS_ILOCK_EXCL;
231 xfs_ilock(ip, lockmode); 221 xfs_ilock(ip, lockmode);
@@ -726,15 +716,13 @@ xfs_iomap_write_allocate(
726 716
727 nimaps = 0; 717 nimaps = 0;
728 while (nimaps == 0) { 718 while (nimaps == 0) {
729 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
730 tp->t_flags |= XFS_TRANS_RESERVE;
731 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 719 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
732 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 720
733 nres, 0); 721 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
734 if (error) { 722 0, XFS_TRANS_RESERVE, &tp);
735 xfs_trans_cancel(tp); 723 if (error)
736 return error; 724 return error;
737 } 725
738 xfs_ilock(ip, XFS_ILOCK_EXCL); 726 xfs_ilock(ip, XFS_ILOCK_EXCL);
739 xfs_trans_ijoin(tp, ip, 0); 727 xfs_trans_ijoin(tp, ip, 0);
740 728
@@ -878,25 +866,18 @@ xfs_iomap_write_unwritten(
878 866
879 do { 867 do {
880 /* 868 /*
881 * set up a transaction to convert the range of extents 869 * Set up a transaction to convert the range of extents
882 * from unwritten to real. Do allocations in a loop until 870 * from unwritten to real. Do allocations in a loop until
883 * we have covered the range passed in. 871 * we have covered the range passed in.
884 * 872 *
885 * Note that we open code the transaction allocation here 873 * Note that we can't risk to recursing back into the filesystem
886 * to pass KM_NOFS--we can't risk to recursing back into 874 * here as we might be asked to write out the same inode that we
887 * the filesystem here as we might be asked to write out 875 * complete here and might deadlock on the iolock.
888 * the same inode that we complete here and might deadlock
889 * on the iolock.
890 */ 876 */
891 sb_start_intwrite(mp->m_super); 877 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
892 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); 878 XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
893 tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; 879 if (error)
894 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
895 resblks, 0);
896 if (error) {
897 xfs_trans_cancel(tp);
898 return error; 880 return error;
899 }
900 881
901 xfs_ilock(ip, XFS_ILOCK_EXCL); 882 xfs_ilock(ip, XFS_ILOCK_EXCL);
902 xfs_trans_ijoin(tp, ip, 0); 883 xfs_trans_ijoin(tp, ip, 0);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index fb7dc61f4a29..c5d4eba6972e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -181,6 +181,8 @@ xfs_generic_create(
181 } 181 }
182#endif 182#endif
183 183
184 xfs_setup_iops(ip);
185
184 if (tmpfile) 186 if (tmpfile)
185 d_tmpfile(dentry, inode); 187 d_tmpfile(dentry, inode);
186 else 188 else
@@ -368,6 +370,8 @@ xfs_vn_symlink(
368 if (unlikely(error)) 370 if (unlikely(error))
369 goto out_cleanup_inode; 371 goto out_cleanup_inode;
370 372
373 xfs_setup_iops(cip);
374
371 d_instantiate(dentry, inode); 375 d_instantiate(dentry, inode);
372 xfs_finish_inode_setup(cip); 376 xfs_finish_inode_setup(cip);
373 return 0; 377 return 0;
@@ -442,6 +446,16 @@ xfs_vn_get_link(
442 return ERR_PTR(error); 446 return ERR_PTR(error);
443} 447}
444 448
449STATIC const char *
450xfs_vn_get_link_inline(
451 struct dentry *dentry,
452 struct inode *inode,
453 struct delayed_call *done)
454{
455 ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
456 return XFS_I(inode)->i_df.if_u1.if_data;
457}
458
445STATIC int 459STATIC int
446xfs_vn_getattr( 460xfs_vn_getattr(
447 struct vfsmount *mnt, 461 struct vfsmount *mnt,
@@ -599,12 +613,12 @@ xfs_setattr_nonsize(
599 return error; 613 return error;
600 } 614 }
601 615
602 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 616 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
603 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
604 if (error) 617 if (error)
605 goto out_trans_cancel; 618 goto out_dqrele;
606 619
607 xfs_ilock(ip, XFS_ILOCK_EXCL); 620 xfs_ilock(ip, XFS_ILOCK_EXCL);
621 xfs_trans_ijoin(tp, ip, 0);
608 622
609 /* 623 /*
610 * Change file ownership. Must be the owner or privileged. 624 * Change file ownership. Must be the owner or privileged.
@@ -633,12 +647,10 @@ xfs_setattr_nonsize(
633 NULL, capable(CAP_FOWNER) ? 647 NULL, capable(CAP_FOWNER) ?
634 XFS_QMOPT_FORCE_RES : 0); 648 XFS_QMOPT_FORCE_RES : 0);
635 if (error) /* out of quota */ 649 if (error) /* out of quota */
636 goto out_unlock; 650 goto out_cancel;
637 } 651 }
638 } 652 }
639 653
640 xfs_trans_ijoin(tp, ip, 0);
641
642 /* 654 /*
643 * Change file ownership. Must be the owner or privileged. 655 * Change file ownership. Must be the owner or privileged.
644 */ 656 */
@@ -722,10 +734,9 @@ xfs_setattr_nonsize(
722 734
723 return 0; 735 return 0;
724 736
725out_unlock: 737out_cancel:
726 xfs_iunlock(ip, XFS_ILOCK_EXCL);
727out_trans_cancel:
728 xfs_trans_cancel(tp); 738 xfs_trans_cancel(tp);
739out_dqrele:
729 xfs_qm_dqrele(udqp); 740 xfs_qm_dqrele(udqp);
730 xfs_qm_dqrele(gdqp); 741 xfs_qm_dqrele(gdqp);
731 return error; 742 return error;
@@ -834,7 +845,7 @@ xfs_setattr_size(
834 * We have to do all the page cache truncate work outside the 845 * We have to do all the page cache truncate work outside the
835 * transaction context as the "lock" order is page lock->log space 846 * transaction context as the "lock" order is page lock->log space
836 * reservation as defined by extent allocation in the writeback path. 847 * reservation as defined by extent allocation in the writeback path.
837 * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but 848 * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
838 * having already truncated the in-memory version of the file (i.e. made 849 * having already truncated the in-memory version of the file (i.e. made
839 * user visible changes). There's not much we can do about this, except 850 * user visible changes). There's not much we can do about this, except
840 * to hope that the caller sees ENOMEM and retries the truncate 851 * to hope that the caller sees ENOMEM and retries the truncate
@@ -849,10 +860,9 @@ xfs_setattr_size(
849 return error; 860 return error;
850 truncate_setsize(inode, newsize); 861 truncate_setsize(inode, newsize);
851 862
852 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 863 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
853 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
854 if (error) 864 if (error)
855 goto out_trans_cancel; 865 return error;
856 866
857 lock_flags |= XFS_ILOCK_EXCL; 867 lock_flags |= XFS_ILOCK_EXCL;
858 xfs_ilock(ip, XFS_ILOCK_EXCL); 868 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -971,12 +981,9 @@ xfs_vn_update_time(
971 981
972 trace_xfs_update_time(ip); 982 trace_xfs_update_time(ip);
973 983
974 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); 984 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
975 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); 985 if (error)
976 if (error) {
977 xfs_trans_cancel(tp);
978 return error; 986 return error;
979 }
980 987
981 xfs_ilock(ip, XFS_ILOCK_EXCL); 988 xfs_ilock(ip, XFS_ILOCK_EXCL);
982 if (flags & S_CTIME) 989 if (flags & S_CTIME)
@@ -1167,6 +1174,18 @@ static const struct inode_operations xfs_symlink_inode_operations = {
1167 .update_time = xfs_vn_update_time, 1174 .update_time = xfs_vn_update_time,
1168}; 1175};
1169 1176
1177static const struct inode_operations xfs_inline_symlink_inode_operations = {
1178 .readlink = generic_readlink,
1179 .get_link = xfs_vn_get_link_inline,
1180 .getattr = xfs_vn_getattr,
1181 .setattr = xfs_vn_setattr,
1182 .setxattr = generic_setxattr,
1183 .getxattr = generic_getxattr,
1184 .removexattr = generic_removexattr,
1185 .listxattr = xfs_vn_listxattr,
1186 .update_time = xfs_vn_update_time,
1187};
1188
1170STATIC void 1189STATIC void
1171xfs_diflags_to_iflags( 1190xfs_diflags_to_iflags(
1172 struct inode *inode, 1191 struct inode *inode,
@@ -1193,7 +1212,7 @@ xfs_diflags_to_iflags(
1193} 1212}
1194 1213
1195/* 1214/*
1196 * Initialize the Linux inode and set up the operation vectors. 1215 * Initialize the Linux inode.
1197 * 1216 *
1198 * When reading existing inodes from disk this is called directly from xfs_iget, 1217 * When reading existing inodes from disk this is called directly from xfs_iget,
1199 * when creating a new inode it is called from xfs_ialloc after setting up the 1218 * when creating a new inode it is called from xfs_ialloc after setting up the
@@ -1232,32 +1251,12 @@ xfs_setup_inode(
1232 i_size_write(inode, ip->i_d.di_size); 1251 i_size_write(inode, ip->i_d.di_size);
1233 xfs_diflags_to_iflags(inode, ip); 1252 xfs_diflags_to_iflags(inode, ip);
1234 1253
1235 ip->d_ops = ip->i_mount->m_nondir_inode_ops; 1254 if (S_ISDIR(inode->i_mode)) {
1236 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1237 switch (inode->i_mode & S_IFMT) {
1238 case S_IFREG:
1239 inode->i_op = &xfs_inode_operations;
1240 inode->i_fop = &xfs_file_operations;
1241 inode->i_mapping->a_ops = &xfs_address_space_operations;
1242 break;
1243 case S_IFDIR:
1244 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); 1255 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
1245 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
1246 inode->i_op = &xfs_dir_ci_inode_operations;
1247 else
1248 inode->i_op = &xfs_dir_inode_operations;
1249 inode->i_fop = &xfs_dir_file_operations;
1250 ip->d_ops = ip->i_mount->m_dir_inode_ops; 1256 ip->d_ops = ip->i_mount->m_dir_inode_ops;
1251 break; 1257 } else {
1252 case S_IFLNK: 1258 ip->d_ops = ip->i_mount->m_nondir_inode_ops;
1253 inode->i_op = &xfs_symlink_inode_operations; 1259 lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
1254 if (!(ip->i_df.if_flags & XFS_IFINLINE))
1255 inode->i_mapping->a_ops = &xfs_address_space_operations;
1256 break;
1257 default:
1258 inode->i_op = &xfs_inode_operations;
1259 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1260 break;
1261 } 1260 }
1262 1261
1263 /* 1262 /*
@@ -1277,3 +1276,35 @@ xfs_setup_inode(
1277 cache_no_acl(inode); 1276 cache_no_acl(inode);
1278 } 1277 }
1279} 1278}
1279
1280void
1281xfs_setup_iops(
1282 struct xfs_inode *ip)
1283{
1284 struct inode *inode = &ip->i_vnode;
1285
1286 switch (inode->i_mode & S_IFMT) {
1287 case S_IFREG:
1288 inode->i_op = &xfs_inode_operations;
1289 inode->i_fop = &xfs_file_operations;
1290 inode->i_mapping->a_ops = &xfs_address_space_operations;
1291 break;
1292 case S_IFDIR:
1293 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
1294 inode->i_op = &xfs_dir_ci_inode_operations;
1295 else
1296 inode->i_op = &xfs_dir_inode_operations;
1297 inode->i_fop = &xfs_dir_file_operations;
1298 break;
1299 case S_IFLNK:
1300 if (ip->i_df.if_flags & XFS_IFINLINE)
1301 inode->i_op = &xfs_inline_symlink_inode_operations;
1302 else
1303 inode->i_op = &xfs_symlink_inode_operations;
1304 break;
1305 default:
1306 inode->i_op = &xfs_inode_operations;
1307 init_special_inode(inode, inode->i_mode, inode->i_rdev);
1308 break;
1309 }
1310}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b49ccf5c1d75..bde02f1fba73 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -435,8 +435,7 @@ xfs_log_reserve(
435 int cnt, 435 int cnt,
436 struct xlog_ticket **ticp, 436 struct xlog_ticket **ticp,
437 __uint8_t client, 437 __uint8_t client,
438 bool permanent, 438 bool permanent)
439 uint t_type)
440{ 439{
441 struct xlog *log = mp->m_log; 440 struct xlog *log = mp->m_log;
442 struct xlog_ticket *tic; 441 struct xlog_ticket *tic;
@@ -456,7 +455,6 @@ xfs_log_reserve(
456 if (!tic) 455 if (!tic)
457 return -ENOMEM; 456 return -ENOMEM;
458 457
459 tic->t_trans_type = t_type;
460 *ticp = tic; 458 *ticp = tic;
461 459
462 xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt 460 xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
@@ -823,8 +821,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
823 } while (iclog != first_iclog); 821 } while (iclog != first_iclog);
824#endif 822#endif
825 if (! (XLOG_FORCED_SHUTDOWN(log))) { 823 if (! (XLOG_FORCED_SHUTDOWN(log))) {
826 error = xfs_log_reserve(mp, 600, 1, &tic, 824 error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
827 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
828 if (!error) { 825 if (!error) {
829 /* the data section must be 32 bit size aligned */ 826 /* the data section must be 32 bit size aligned */
830 struct { 827 struct {
@@ -2032,58 +2029,8 @@ xlog_print_tic_res(
2032 REG_TYPE_STR(ICREATE, "inode create") 2029 REG_TYPE_STR(ICREATE, "inode create")
2033 }; 2030 };
2034#undef REG_TYPE_STR 2031#undef REG_TYPE_STR
2035#define TRANS_TYPE_STR(type) [XFS_TRANS_##type] = #type
2036 static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
2037 TRANS_TYPE_STR(SETATTR_NOT_SIZE),
2038 TRANS_TYPE_STR(SETATTR_SIZE),
2039 TRANS_TYPE_STR(INACTIVE),
2040 TRANS_TYPE_STR(CREATE),
2041 TRANS_TYPE_STR(CREATE_TRUNC),
2042 TRANS_TYPE_STR(TRUNCATE_FILE),
2043 TRANS_TYPE_STR(REMOVE),
2044 TRANS_TYPE_STR(LINK),
2045 TRANS_TYPE_STR(RENAME),
2046 TRANS_TYPE_STR(MKDIR),
2047 TRANS_TYPE_STR(RMDIR),
2048 TRANS_TYPE_STR(SYMLINK),
2049 TRANS_TYPE_STR(SET_DMATTRS),
2050 TRANS_TYPE_STR(GROWFS),
2051 TRANS_TYPE_STR(STRAT_WRITE),
2052 TRANS_TYPE_STR(DIOSTRAT),
2053 TRANS_TYPE_STR(WRITEID),
2054 TRANS_TYPE_STR(ADDAFORK),
2055 TRANS_TYPE_STR(ATTRINVAL),
2056 TRANS_TYPE_STR(ATRUNCATE),
2057 TRANS_TYPE_STR(ATTR_SET),
2058 TRANS_TYPE_STR(ATTR_RM),
2059 TRANS_TYPE_STR(ATTR_FLAG),
2060 TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
2061 TRANS_TYPE_STR(SB_CHANGE),
2062 TRANS_TYPE_STR(DUMMY1),
2063 TRANS_TYPE_STR(DUMMY2),
2064 TRANS_TYPE_STR(QM_QUOTAOFF),
2065 TRANS_TYPE_STR(QM_DQALLOC),
2066 TRANS_TYPE_STR(QM_SETQLIM),
2067 TRANS_TYPE_STR(QM_DQCLUSTER),
2068 TRANS_TYPE_STR(QM_QINOCREATE),
2069 TRANS_TYPE_STR(QM_QUOTAOFF_END),
2070 TRANS_TYPE_STR(FSYNC_TS),
2071 TRANS_TYPE_STR(GROWFSRT_ALLOC),
2072 TRANS_TYPE_STR(GROWFSRT_ZERO),
2073 TRANS_TYPE_STR(GROWFSRT_FREE),
2074 TRANS_TYPE_STR(SWAPEXT),
2075 TRANS_TYPE_STR(CHECKPOINT),
2076 TRANS_TYPE_STR(ICREATE),
2077 TRANS_TYPE_STR(CREATE_TMPFILE)
2078 };
2079#undef TRANS_TYPE_STR
2080 2032
2081 xfs_warn(mp, "xlog_write: reservation summary:"); 2033 xfs_warn(mp, "xlog_write: reservation summary:");
2082 xfs_warn(mp, " trans type = %s (%u)",
2083 ((ticket->t_trans_type <= 0 ||
2084 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
2085 "bad-trans-type" : trans_type_str[ticket->t_trans_type]),
2086 ticket->t_trans_type);
2087 xfs_warn(mp, " unit res = %d bytes", 2034 xfs_warn(mp, " unit res = %d bytes",
2088 ticket->t_unit_res); 2035 ticket->t_unit_res);
2089 xfs_warn(mp, " current res = %d bytes", 2036 xfs_warn(mp, " current res = %d bytes",
@@ -3378,7 +3325,7 @@ xfs_log_force(
3378{ 3325{
3379 int error; 3326 int error;
3380 3327
3381 trace_xfs_log_force(mp, 0); 3328 trace_xfs_log_force(mp, 0, _RET_IP_);
3382 error = _xfs_log_force(mp, flags, NULL); 3329 error = _xfs_log_force(mp, flags, NULL);
3383 if (error) 3330 if (error)
3384 xfs_warn(mp, "%s: error %d returned.", __func__, error); 3331 xfs_warn(mp, "%s: error %d returned.", __func__, error);
@@ -3527,7 +3474,7 @@ xfs_log_force_lsn(
3527{ 3474{
3528 int error; 3475 int error;
3529 3476
3530 trace_xfs_log_force(mp, lsn); 3477 trace_xfs_log_force(mp, lsn, _RET_IP_);
3531 error = _xfs_log_force_lsn(mp, lsn, flags, NULL); 3478 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3532 if (error) 3479 if (error)
3533 xfs_warn(mp, "%s: error %d returned.", __func__, error); 3480 xfs_warn(mp, "%s: error %d returned.", __func__, error);
@@ -3709,7 +3656,6 @@ xlog_ticket_alloc(
3709 tic->t_tid = prandom_u32(); 3656 tic->t_tid = prandom_u32();
3710 tic->t_clientid = client; 3657 tic->t_clientid = client;
3711 tic->t_flags = XLOG_TIC_INITED; 3658 tic->t_flags = XLOG_TIC_INITED;
3712 tic->t_trans_type = 0;
3713 if (permanent) 3659 if (permanent)
3714 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3660 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3715 3661
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index aa533a7d50f2..80ba0c047090 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -161,8 +161,7 @@ int xfs_log_reserve(struct xfs_mount *mp,
161 int count, 161 int count,
162 struct xlog_ticket **ticket, 162 struct xlog_ticket **ticket,
163 __uint8_t clientid, 163 __uint8_t clientid,
164 bool permanent, 164 bool permanent);
165 uint t_type);
166int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic); 165int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
167int xfs_log_unmount_write(struct xfs_mount *mp); 166int xfs_log_unmount_write(struct xfs_mount *mp);
168void xfs_log_unmount(struct xfs_mount *mp); 167void xfs_log_unmount(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 4e7649351f5a..5e54e7955ea6 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -51,7 +51,6 @@ xlog_cil_ticket_alloc(
51 51
52 tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, 52 tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
53 KM_SLEEP|KM_NOFS); 53 KM_SLEEP|KM_NOFS);
54 tic->t_trans_type = XFS_TRANS_CHECKPOINT;
55 54
56 /* 55 /*
57 * set the current reservation to zero so we know to steal the basic 56 * set the current reservation to zero so we know to steal the basic
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ed8896310c00..765f084759b5 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -175,7 +175,6 @@ typedef struct xlog_ticket {
175 char t_cnt; /* current count : 1 */ 175 char t_cnt; /* current count : 1 */
176 char t_clientid; /* who does this belong to; : 1 */ 176 char t_clientid; /* who does this belong to; : 1 */
177 char t_flags; /* properties of reservation : 1 */ 177 char t_flags; /* properties of reservation : 1 */
178 uint t_trans_type; /* transaction type : 4 */
179 178
180 /* reservation array fields */ 179 /* reservation array fields */
181 uint t_res_num; /* num in array : 4 */ 180 uint t_res_num; /* num in array : 4 */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 396565f43247..835997843846 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3843,7 +3843,7 @@ xlog_recover_add_to_cont_trans(
3843 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 3843 old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
3844 old_len = item->ri_buf[item->ri_cnt-1].i_len; 3844 old_len = item->ri_buf[item->ri_cnt-1].i_len;
3845 3845
3846 ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); 3846 ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);
3847 memcpy(&ptr[old_len], dp, len); 3847 memcpy(&ptr[old_len], dp, len);
3848 item->ri_buf[item->ri_cnt-1].i_len += len; 3848 item->ri_buf[item->ri_cnt-1].i_len += len;
3849 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 3849 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -4205,10 +4205,9 @@ xlog_recover_process_efi(
4205 } 4205 }
4206 } 4206 }
4207 4207
4208 tp = xfs_trans_alloc(mp, 0); 4208 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
4209 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
4210 if (error) 4209 if (error)
4211 goto abort_error; 4210 return error;
4212 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 4211 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
4213 4212
4214 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 4213 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
@@ -4355,10 +4354,9 @@ xlog_recover_clear_agi_bucket(
4355 int offset; 4354 int offset;
4356 int error; 4355 int error;
4357 4356
4358 tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); 4357 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
4359 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_clearagi, 0, 0);
4360 if (error) 4358 if (error)
4361 goto out_abort; 4359 goto out_error;
4362 4360
4363 error = xfs_read_agi(mp, tp, agno, &agibp); 4361 error = xfs_read_agi(mp, tp, agno, &agibp);
4364 if (error) 4362 if (error)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index cfd4210dd015..e39b02351b4a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -89,7 +89,6 @@ xfs_uuid_mount(
89 if (hole < 0) { 89 if (hole < 0) {
90 xfs_uuid_table = kmem_realloc(xfs_uuid_table, 90 xfs_uuid_table = kmem_realloc(xfs_uuid_table,
91 (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table), 91 (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
92 xfs_uuid_table_size * sizeof(*xfs_uuid_table),
93 KM_SLEEP); 92 KM_SLEEP);
94 hole = xfs_uuid_table_size++; 93 hole = xfs_uuid_table_size++;
95 } 94 }
@@ -681,6 +680,9 @@ xfs_mountfs(
681 680
682 xfs_set_maxicount(mp); 681 xfs_set_maxicount(mp);
683 682
683 /* enable fail_at_unmount as default */
684 mp->m_fail_unmount = 1;
685
684 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname); 686 error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
685 if (error) 687 if (error)
686 goto out; 688 goto out;
@@ -690,10 +692,15 @@ xfs_mountfs(
690 if (error) 692 if (error)
691 goto out_remove_sysfs; 693 goto out_remove_sysfs;
692 694
693 error = xfs_uuid_mount(mp); 695 error = xfs_error_sysfs_init(mp);
694 if (error) 696 if (error)
695 goto out_del_stats; 697 goto out_del_stats;
696 698
699
700 error = xfs_uuid_mount(mp);
701 if (error)
702 goto out_remove_error_sysfs;
703
697 /* 704 /*
698 * Set the minimum read and write sizes 705 * Set the minimum read and write sizes
699 */ 706 */
@@ -957,6 +964,7 @@ xfs_mountfs(
957 cancel_delayed_work_sync(&mp->m_reclaim_work); 964 cancel_delayed_work_sync(&mp->m_reclaim_work);
958 xfs_reclaim_inodes(mp, SYNC_WAIT); 965 xfs_reclaim_inodes(mp, SYNC_WAIT);
959 out_log_dealloc: 966 out_log_dealloc:
967 mp->m_flags |= XFS_MOUNT_UNMOUNTING;
960 xfs_log_mount_cancel(mp); 968 xfs_log_mount_cancel(mp);
961 out_fail_wait: 969 out_fail_wait:
962 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 970 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@ -968,6 +976,8 @@ xfs_mountfs(
968 xfs_da_unmount(mp); 976 xfs_da_unmount(mp);
969 out_remove_uuid: 977 out_remove_uuid:
970 xfs_uuid_unmount(mp); 978 xfs_uuid_unmount(mp);
979 out_remove_error_sysfs:
980 xfs_error_sysfs_del(mp);
971 out_del_stats: 981 out_del_stats:
972 xfs_sysfs_del(&mp->m_stats.xs_kobj); 982 xfs_sysfs_del(&mp->m_stats.xs_kobj);
973 out_remove_sysfs: 983 out_remove_sysfs:
@@ -1006,6 +1016,14 @@ xfs_unmountfs(
1006 xfs_log_force(mp, XFS_LOG_SYNC); 1016 xfs_log_force(mp, XFS_LOG_SYNC);
1007 1017
1008 /* 1018 /*
1019 * We now need to tell the world we are unmounting. This will allow
1020 * us to detect that the filesystem is going away and we should error
1021 * out anything that we have been retrying in the background. This will
1022 * prevent neverending retries in AIL pushing from hanging the unmount.
1023 */
1024 mp->m_flags |= XFS_MOUNT_UNMOUNTING;
1025
1026 /*
1009 * Flush all pending changes from the AIL. 1027 * Flush all pending changes from the AIL.
1010 */ 1028 */
1011 xfs_ail_push_all_sync(mp->m_ail); 1029 xfs_ail_push_all_sync(mp->m_ail);
@@ -1056,6 +1074,7 @@ xfs_unmountfs(
1056#endif 1074#endif
1057 xfs_free_perag(mp); 1075 xfs_free_perag(mp);
1058 1076
1077 xfs_error_sysfs_del(mp);
1059 xfs_sysfs_del(&mp->m_stats.xs_kobj); 1078 xfs_sysfs_del(&mp->m_stats.xs_kobj);
1060 xfs_sysfs_del(&mp->m_kobj); 1079 xfs_sysfs_del(&mp->m_kobj);
1061} 1080}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index eafe257b357a..c1b798c72126 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -37,6 +37,32 @@ enum {
37 XFS_LOWSP_MAX, 37 XFS_LOWSP_MAX,
38}; 38};
39 39
40/*
41 * Error Configuration
42 *
43 * Error classes define the subsystem the configuration belongs to.
44 * Error numbers define the errors that are configurable.
45 */
46enum {
47 XFS_ERR_METADATA,
48 XFS_ERR_CLASS_MAX,
49};
50enum {
51 XFS_ERR_DEFAULT,
52 XFS_ERR_EIO,
53 XFS_ERR_ENOSPC,
54 XFS_ERR_ENODEV,
55 XFS_ERR_ERRNO_MAX,
56};
57
58#define XFS_ERR_RETRY_FOREVER -1
59
60struct xfs_error_cfg {
61 struct xfs_kobj kobj;
62 int max_retries;
63 unsigned long retry_timeout; /* in jiffies, 0 = no timeout */
64};
65
40typedef struct xfs_mount { 66typedef struct xfs_mount {
41 struct super_block *m_super; 67 struct super_block *m_super;
42 xfs_tid_t m_tid; /* next unused tid for fs */ 68 xfs_tid_t m_tid; /* next unused tid for fs */
@@ -127,6 +153,9 @@ typedef struct xfs_mount {
127 int64_t m_low_space[XFS_LOWSP_MAX]; 153 int64_t m_low_space[XFS_LOWSP_MAX];
128 /* low free space thresholds */ 154 /* low free space thresholds */
129 struct xfs_kobj m_kobj; 155 struct xfs_kobj m_kobj;
156 struct xfs_kobj m_error_kobj;
157 struct xfs_kobj m_error_meta_kobj;
158 struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
130 struct xstats m_stats; /* per-fs stats */ 159 struct xstats m_stats; /* per-fs stats */
131 160
132 struct workqueue_struct *m_buf_workqueue; 161 struct workqueue_struct *m_buf_workqueue;
@@ -148,6 +177,7 @@ typedef struct xfs_mount {
148 */ 177 */
149 __uint32_t m_generation; 178 __uint32_t m_generation;
150 179
180 bool m_fail_unmount;
151#ifdef DEBUG 181#ifdef DEBUG
152 /* 182 /*
153 * DEBUG mode instrumentation to test and/or trigger delayed allocation 183 * DEBUG mode instrumentation to test and/or trigger delayed allocation
@@ -166,6 +196,7 @@ typedef struct xfs_mount {
166#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 196#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
167 must be synchronous except 197 must be synchronous except
168 for space allocations */ 198 for space allocations */
199#define XFS_MOUNT_UNMOUNTING (1ULL << 1) /* filesystem is unmounting */
169#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 200#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
170#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 201#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
171 operations, typically for 202 operations, typically for
@@ -364,4 +395,7 @@ extern void xfs_set_low_space_thresholds(struct xfs_mount *);
364int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb, 395int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
365 xfs_off_t count_fsb); 396 xfs_off_t count_fsb);
366 397
398struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
399 int error_class, int error);
400
367#endif /* __XFS_MOUNT_H__ */ 401#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 51ddaf2c2b8c..d5b756669fb5 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -308,12 +308,9 @@ xfs_fs_commit_blocks(
308 goto out_drop_iolock; 308 goto out_drop_iolock;
309 } 309 }
310 310
311 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 311 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
312 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 312 if (error)
313 if (error) {
314 xfs_trans_cancel(tp);
315 goto out_drop_iolock; 313 goto out_drop_iolock;
316 }
317 314
318 xfs_ilock(ip, XFS_ILOCK_EXCL); 315 xfs_ilock(ip, XFS_ILOCK_EXCL);
319 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 316 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index be125e1758c1..a60d9e2739d1 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -783,13 +783,10 @@ xfs_qm_qino_alloc(
783 } 783 }
784 } 784 }
785 785
786 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); 786 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
787 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create, 787 XFS_QM_QINOCREATE_SPACE_RES(mp), 0, 0, &tp);
788 XFS_QM_QINOCREATE_SPACE_RES(mp), 0); 788 if (error)
789 if (error) {
790 xfs_trans_cancel(tp);
791 return error; 789 return error;
792 }
793 790
794 if (need_alloc) { 791 if (need_alloc) {
795 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, 792 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index f4d0e0a8f517..475a3882a81f 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -236,10 +236,8 @@ xfs_qm_scall_trunc_qfile(
236 236
237 xfs_ilock(ip, XFS_IOLOCK_EXCL); 237 xfs_ilock(ip, XFS_IOLOCK_EXCL);
238 238
239 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); 239 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
240 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
241 if (error) { 240 if (error) {
242 xfs_trans_cancel(tp);
243 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 241 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
244 goto out_put; 242 goto out_put;
245 } 243 }
@@ -436,12 +434,9 @@ xfs_qm_scall_setqlim(
436 defq = xfs_get_defquota(dqp, q); 434 defq = xfs_get_defquota(dqp, q);
437 xfs_dqunlock(dqp); 435 xfs_dqunlock(dqp);
438 436
439 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 437 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_setqlim, 0, 0, 0, &tp);
440 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0); 438 if (error)
441 if (error) {
442 xfs_trans_cancel(tp);
443 goto out_rele; 439 goto out_rele;
444 }
445 440
446 xfs_dqlock(dqp); 441 xfs_dqlock(dqp);
447 xfs_trans_dqjoin(tp, dqp); 442 xfs_trans_dqjoin(tp, dqp);
@@ -569,13 +564,9 @@ xfs_qm_log_quotaoff_end(
569 int error; 564 int error;
570 xfs_qoff_logitem_t *qoffi; 565 xfs_qoff_logitem_t *qoffi;
571 566
572 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); 567 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
573 568 if (error)
574 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
575 if (error) {
576 xfs_trans_cancel(tp);
577 return error; 569 return error;
578 }
579 570
580 qoffi = xfs_trans_get_qoff_item(tp, startqoff, 571 qoffi = xfs_trans_get_qoff_item(tp, startqoff,
581 flags & XFS_ALL_QUOTA_ACCT); 572 flags & XFS_ALL_QUOTA_ACCT);
@@ -603,12 +594,9 @@ xfs_qm_log_quotaoff(
603 594
604 *qoffstartp = NULL; 595 *qoffstartp = NULL;
605 596
606 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); 597 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
607 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); 598 if (error)
608 if (error) {
609 xfs_trans_cancel(tp);
610 goto out; 599 goto out;
611 }
612 600
613 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); 601 qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
614 xfs_trans_log_quotaoff_item(tp, qoffi); 602 xfs_trans_log_quotaoff_item(tp, qoffi);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index abf44435d04a..3938b37d1043 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -780,15 +780,14 @@ xfs_growfs_rt_alloc(
780 * Allocate space to the file, as necessary. 780 * Allocate space to the file, as necessary.
781 */ 781 */
782 while (oblocks < nblocks) { 782 while (oblocks < nblocks) {
783 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
784 resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); 783 resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
785 /* 784 /*
786 * Reserve space & log for one extent added to the file. 785 * Reserve space & log for one extent added to the file.
787 */ 786 */
788 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, 787 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtalloc, resblks,
789 resblks, 0); 788 0, 0, &tp);
790 if (error) 789 if (error)
791 goto out_trans_cancel; 790 return error;
792 /* 791 /*
793 * Lock the inode. 792 * Lock the inode.
794 */ 793 */
@@ -823,14 +822,13 @@ xfs_growfs_rt_alloc(
823 for (bno = map.br_startoff, fsbno = map.br_startblock; 822 for (bno = map.br_startoff, fsbno = map.br_startblock;
824 bno < map.br_startoff + map.br_blockcount; 823 bno < map.br_startoff + map.br_blockcount;
825 bno++, fsbno++) { 824 bno++, fsbno++) {
826 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO);
827 /* 825 /*
828 * Reserve log for one block zeroing. 826 * Reserve log for one block zeroing.
829 */ 827 */
830 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, 828 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero,
831 0, 0); 829 0, 0, 0, &tp);
832 if (error) 830 if (error)
833 goto out_trans_cancel; 831 return error;
834 /* 832 /*
835 * Lock the bitmap inode. 833 * Lock the bitmap inode.
836 */ 834 */
@@ -994,11 +992,10 @@ xfs_growfs_rt(
994 /* 992 /*
995 * Start a transaction, get the log reservation. 993 * Start a transaction, get the log reservation.
996 */ 994 */
997 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE); 995 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtfree, 0, 0, 0,
998 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtfree, 996 &tp);
999 0, 0);
1000 if (error) 997 if (error)
1001 goto error_cancel; 998 break;
1002 /* 999 /*
1003 * Lock out other callers by grabbing the bitmap inode lock. 1000 * Lock out other callers by grabbing the bitmap inode lock.
1004 */ 1001 */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 187e14b696c2..416421d7ff10 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -58,8 +58,7 @@
58#include <linux/parser.h> 58#include <linux/parser.h>
59 59
60static const struct super_operations xfs_super_operations; 60static const struct super_operations xfs_super_operations;
61static kmem_zone_t *xfs_ioend_zone; 61struct bio_set *xfs_ioend_bioset;
62mempool_t *xfs_ioend_pool;
63 62
64static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 63static struct kset *xfs_kset; /* top-level xfs sysfs dir */
65#ifdef DEBUG 64#ifdef DEBUG
@@ -350,6 +349,7 @@ xfs_parseargs(
350 case Opt_pqnoenforce: 349 case Opt_pqnoenforce:
351 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); 350 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
352 mp->m_qflags &= ~XFS_PQUOTA_ENFD; 351 mp->m_qflags &= ~XFS_PQUOTA_ENFD;
352 break;
353 case Opt_gquota: 353 case Opt_gquota:
354 case Opt_grpquota: 354 case Opt_grpquota:
355 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | 355 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
@@ -928,7 +928,7 @@ xfs_fs_alloc_inode(
928 928
929/* 929/*
930 * Now that the generic code is guaranteed not to be accessing 930 * Now that the generic code is guaranteed not to be accessing
931 * the linux inode, we can reclaim the inode. 931 * the linux inode, we can inactivate and reclaim the inode.
932 */ 932 */
933STATIC void 933STATIC void
934xfs_fs_destroy_inode( 934xfs_fs_destroy_inode(
@@ -938,9 +938,14 @@ xfs_fs_destroy_inode(
938 938
939 trace_xfs_destroy_inode(ip); 939 trace_xfs_destroy_inode(ip);
940 940
941 XFS_STATS_INC(ip->i_mount, vn_reclaim); 941 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
942 XFS_STATS_INC(ip->i_mount, vn_rele);
943 XFS_STATS_INC(ip->i_mount, vn_remove);
944
945 xfs_inactive(ip);
942 946
943 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 947 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
948 XFS_STATS_INC(ip->i_mount, vn_reclaim);
944 949
945 /* 950 /*
946 * We should never get here with one of the reclaim flags already set. 951 * We should never get here with one of the reclaim flags already set.
@@ -987,24 +992,6 @@ xfs_fs_inode_init_once(
987 "xfsino", ip->i_ino); 992 "xfsino", ip->i_ino);
988} 993}
989 994
990STATIC void
991xfs_fs_evict_inode(
992 struct inode *inode)
993{
994 xfs_inode_t *ip = XFS_I(inode);
995
996 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
997
998 trace_xfs_evict_inode(ip);
999
1000 truncate_inode_pages_final(&inode->i_data);
1001 clear_inode(inode);
1002 XFS_STATS_INC(ip->i_mount, vn_rele);
1003 XFS_STATS_INC(ip->i_mount, vn_remove);
1004
1005 xfs_inactive(ip);
1006}
1007
1008/* 995/*
1009 * We do an unlocked check for XFS_IDONTCACHE here because we are already 996 * We do an unlocked check for XFS_IDONTCACHE here because we are already
1010 * serialised against cache hits here via the inode->i_lock and igrab() in 997 * serialised against cache hits here via the inode->i_lock and igrab() in
@@ -1276,6 +1263,16 @@ xfs_fs_remount(
1276 return -EINVAL; 1263 return -EINVAL;
1277 } 1264 }
1278 1265
1266 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
1267 xfs_sb_has_ro_compat_feature(sbp,
1268 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1269 xfs_warn(mp,
1270"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1271 (sbp->sb_features_ro_compat &
1272 XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1273 return -EINVAL;
1274 }
1275
1279 mp->m_flags &= ~XFS_MOUNT_RDONLY; 1276 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1280 1277
1281 /* 1278 /*
@@ -1663,7 +1660,6 @@ xfs_fs_free_cached_objects(
1663static const struct super_operations xfs_super_operations = { 1660static const struct super_operations xfs_super_operations = {
1664 .alloc_inode = xfs_fs_alloc_inode, 1661 .alloc_inode = xfs_fs_alloc_inode,
1665 .destroy_inode = xfs_fs_destroy_inode, 1662 .destroy_inode = xfs_fs_destroy_inode,
1666 .evict_inode = xfs_fs_evict_inode,
1667 .drop_inode = xfs_fs_drop_inode, 1663 .drop_inode = xfs_fs_drop_inode,
1668 .put_super = xfs_fs_put_super, 1664 .put_super = xfs_fs_put_super,
1669 .sync_fs = xfs_fs_sync_fs, 1665 .sync_fs = xfs_fs_sync_fs,
@@ -1688,20 +1684,15 @@ MODULE_ALIAS_FS("xfs");
1688STATIC int __init 1684STATIC int __init
1689xfs_init_zones(void) 1685xfs_init_zones(void)
1690{ 1686{
1691 1687 xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
1692 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); 1688 offsetof(struct xfs_ioend, io_inline_bio));
1693 if (!xfs_ioend_zone) 1689 if (!xfs_ioend_bioset)
1694 goto out; 1690 goto out;
1695 1691
1696 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
1697 xfs_ioend_zone);
1698 if (!xfs_ioend_pool)
1699 goto out_destroy_ioend_zone;
1700
1701 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), 1692 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1702 "xfs_log_ticket"); 1693 "xfs_log_ticket");
1703 if (!xfs_log_ticket_zone) 1694 if (!xfs_log_ticket_zone)
1704 goto out_destroy_ioend_pool; 1695 goto out_free_ioend_bioset;
1705 1696
1706 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), 1697 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
1707 "xfs_bmap_free_item"); 1698 "xfs_bmap_free_item");
@@ -1797,10 +1788,8 @@ xfs_init_zones(void)
1797 kmem_zone_destroy(xfs_bmap_free_item_zone); 1788 kmem_zone_destroy(xfs_bmap_free_item_zone);
1798 out_destroy_log_ticket_zone: 1789 out_destroy_log_ticket_zone:
1799 kmem_zone_destroy(xfs_log_ticket_zone); 1790 kmem_zone_destroy(xfs_log_ticket_zone);
1800 out_destroy_ioend_pool: 1791 out_free_ioend_bioset:
1801 mempool_destroy(xfs_ioend_pool); 1792 bioset_free(xfs_ioend_bioset);
1802 out_destroy_ioend_zone:
1803 kmem_zone_destroy(xfs_ioend_zone);
1804 out: 1793 out:
1805 return -ENOMEM; 1794 return -ENOMEM;
1806} 1795}
@@ -1826,9 +1815,7 @@ xfs_destroy_zones(void)
1826 kmem_zone_destroy(xfs_btree_cur_zone); 1815 kmem_zone_destroy(xfs_btree_cur_zone);
1827 kmem_zone_destroy(xfs_bmap_free_item_zone); 1816 kmem_zone_destroy(xfs_bmap_free_item_zone);
1828 kmem_zone_destroy(xfs_log_ticket_zone); 1817 kmem_zone_destroy(xfs_log_ticket_zone);
1829 mempool_destroy(xfs_ioend_pool); 1818 bioset_free(xfs_ioend_bioset);
1830 kmem_zone_destroy(xfs_ioend_zone);
1831
1832} 1819}
1833 1820
1834STATIC int __init 1821STATIC int __init
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index b44284c1adda..08a46c6181fd 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -131,6 +131,8 @@ xfs_readlink(
131 131
132 trace_xfs_readlink(ip); 132 trace_xfs_readlink(ip);
133 133
134 ASSERT(!(ip->i_df.if_flags & XFS_IFINLINE));
135
134 if (XFS_FORCED_SHUTDOWN(mp)) 136 if (XFS_FORCED_SHUTDOWN(mp))
135 return -EIO; 137 return -EIO;
136 138
@@ -150,12 +152,7 @@ xfs_readlink(
150 } 152 }
151 153
152 154
153 if (ip->i_df.if_flags & XFS_IFINLINE) { 155 error = xfs_readlink_bmap(ip, link);
154 memcpy(link, ip->i_df.if_u1.if_data, pathlen);
155 link[pathlen] = '\0';
156 } else {
157 error = xfs_readlink_bmap(ip, link);
158 }
159 156
160 out: 157 out:
161 xfs_iunlock(ip, XFS_ILOCK_SHARED); 158 xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -221,7 +218,6 @@ xfs_symlink(
221 if (error) 218 if (error)
222 return error; 219 return error;
223 220
224 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
225 /* 221 /*
226 * The symlink will fit into the inode data fork? 222 * The symlink will fit into the inode data fork?
227 * There can't be any attributes so we get the whole variable part. 223 * There can't be any attributes so we get the whole variable part.
@@ -231,13 +227,15 @@ xfs_symlink(
231 else 227 else
232 fs_blocks = xfs_symlink_blocks(mp, pathlen); 228 fs_blocks = xfs_symlink_blocks(mp, pathlen);
233 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 229 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
234 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0); 230
231 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
235 if (error == -ENOSPC && fs_blocks == 0) { 232 if (error == -ENOSPC && fs_blocks == 0) {
236 resblks = 0; 233 resblks = 0;
237 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); 234 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
235 &tp);
238 } 236 }
239 if (error) 237 if (error)
240 goto out_trans_cancel; 238 goto out_release_inode;
241 239
242 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | 240 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
243 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); 241 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
@@ -302,19 +300,11 @@ xfs_symlink(
302 * If the symlink will fit into the inode, write it inline. 300 * If the symlink will fit into the inode, write it inline.
303 */ 301 */
304 if (pathlen <= XFS_IFORK_DSIZE(ip)) { 302 if (pathlen <= XFS_IFORK_DSIZE(ip)) {
305 xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); 303 xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen);
306 memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
307 ip->i_d.di_size = pathlen;
308
309 /*
310 * The inode was initially created in extent format.
311 */
312 ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
313 ip->i_df.if_flags |= XFS_IFINLINE;
314 304
305 ip->i_d.di_size = pathlen;
315 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; 306 ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
316 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); 307 xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
317
318 } else { 308 } else {
319 int offset; 309 int offset;
320 310
@@ -455,12 +445,9 @@ xfs_inactive_symlink_rmt(
455 */ 445 */
456 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); 446 ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
457 447
458 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 448 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
459 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 449 if (error)
460 if (error) {
461 xfs_trans_cancel(tp);
462 return error; 450 return error;
463 }
464 451
465 xfs_ilock(ip, XFS_ILOCK_EXCL); 452 xfs_ilock(ip, XFS_ILOCK_EXCL);
466 xfs_trans_ijoin(tp, ip, 0); 453 xfs_trans_ijoin(tp, ip, 0);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 6ced4f143494..4c2c55086208 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -17,10 +17,11 @@
17 */ 17 */
18 18
19#include "xfs.h" 19#include "xfs.h"
20#include "xfs_sysfs.h" 20#include "xfs_shared.h"
21#include "xfs_format.h" 21#include "xfs_format.h"
22#include "xfs_log_format.h" 22#include "xfs_log_format.h"
23#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
24#include "xfs_sysfs.h"
24#include "xfs_log.h" 25#include "xfs_log.h"
25#include "xfs_log_priv.h" 26#include "xfs_log_priv.h"
26#include "xfs_stats.h" 27#include "xfs_stats.h"
@@ -362,3 +363,291 @@ struct kobj_type xfs_log_ktype = {
362 .sysfs_ops = &xfs_sysfs_ops, 363 .sysfs_ops = &xfs_sysfs_ops,
363 .default_attrs = xfs_log_attrs, 364 .default_attrs = xfs_log_attrs,
364}; 365};
366
367/*
368 * Metadata IO error configuration
369 *
370 * The sysfs structure here is:
371 * ...xfs/<dev>/error/<class>/<errno>/<error_attrs>
372 *
373 * where <class> allows us to discriminate between data IO and metadata IO,
374 * and any other future type of IO (e.g. special inode or directory error
375 * handling) we care to support.
376 */
377static inline struct xfs_error_cfg *
378to_error_cfg(struct kobject *kobject)
379{
380 struct xfs_kobj *kobj = to_kobj(kobject);
381 return container_of(kobj, struct xfs_error_cfg, kobj);
382}
383
384static inline struct xfs_mount *
385err_to_mp(struct kobject *kobject)
386{
387 struct xfs_kobj *kobj = to_kobj(kobject);
388 return container_of(kobj, struct xfs_mount, m_error_kobj);
389}
390
391static ssize_t
392max_retries_show(
393 struct kobject *kobject,
394 char *buf)
395{
396 struct xfs_error_cfg *cfg = to_error_cfg(kobject);
397
398 return snprintf(buf, PAGE_SIZE, "%d\n", cfg->max_retries);
399}
400
401static ssize_t
402max_retries_store(
403 struct kobject *kobject,
404 const char *buf,
405 size_t count)
406{
407 struct xfs_error_cfg *cfg = to_error_cfg(kobject);
408 int ret;
409 int val;
410
411 ret = kstrtoint(buf, 0, &val);
412 if (ret)
413 return ret;
414
415 if (val < -1)
416 return -EINVAL;
417
418 cfg->max_retries = val;
419 return count;
420}
421XFS_SYSFS_ATTR_RW(max_retries);
422
423static ssize_t
424retry_timeout_seconds_show(
425 struct kobject *kobject,
426 char *buf)
427{
428 struct xfs_error_cfg *cfg = to_error_cfg(kobject);
429
430 return snprintf(buf, PAGE_SIZE, "%ld\n",
431 jiffies_to_msecs(cfg->retry_timeout) / MSEC_PER_SEC);
432}
433
434static ssize_t
435retry_timeout_seconds_store(
436 struct kobject *kobject,
437 const char *buf,
438 size_t count)
439{
440 struct xfs_error_cfg *cfg = to_error_cfg(kobject);
441 int ret;
442 int val;
443
444 ret = kstrtoint(buf, 0, &val);
445 if (ret)
446 return ret;
447
448 /* 1 day timeout maximum */
449 if (val < 0 || val > 86400)
450 return -EINVAL;
451
452 cfg->retry_timeout = msecs_to_jiffies(val * MSEC_PER_SEC);
453 return count;
454}
455XFS_SYSFS_ATTR_RW(retry_timeout_seconds);
456
457static ssize_t
458fail_at_unmount_show(
459 struct kobject *kobject,
460 char *buf)
461{
462 struct xfs_mount *mp = err_to_mp(kobject);
463
464 return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_unmount);
465}
466
467static ssize_t
468fail_at_unmount_store(
469 struct kobject *kobject,
470 const char *buf,
471 size_t count)
472{
473 struct xfs_mount *mp = err_to_mp(kobject);
474 int ret;
475 int val;
476
477 ret = kstrtoint(buf, 0, &val);
478 if (ret)
479 return ret;
480
481 if (val < 0 || val > 1)
482 return -EINVAL;
483
484 mp->m_fail_unmount = val;
485 return count;
486}
487XFS_SYSFS_ATTR_RW(fail_at_unmount);
488
489static struct attribute *xfs_error_attrs[] = {
490 ATTR_LIST(max_retries),
491 ATTR_LIST(retry_timeout_seconds),
492 NULL,
493};
494
495
496struct kobj_type xfs_error_cfg_ktype = {
497 .release = xfs_sysfs_release,
498 .sysfs_ops = &xfs_sysfs_ops,
499 .default_attrs = xfs_error_attrs,
500};
501
502struct kobj_type xfs_error_ktype = {
503 .release = xfs_sysfs_release,
504 .sysfs_ops = &xfs_sysfs_ops,
505};
506
507/*
508 * Error initialization tables. These need to be ordered in the same
509 * order as the enums used to index the array. All class init tables need to
510 * define a "default" behaviour as the first entry, all other entries can be
511 * empty.
512 */
513struct xfs_error_init {
514 char *name;
515 int max_retries;
516 int retry_timeout; /* in seconds */
517};
518
519static const struct xfs_error_init xfs_error_meta_init[XFS_ERR_ERRNO_MAX] = {
520 { .name = "default",
521 .max_retries = XFS_ERR_RETRY_FOREVER,
522 .retry_timeout = 0,
523 },
524 { .name = "EIO",
525 .max_retries = XFS_ERR_RETRY_FOREVER,
526 .retry_timeout = 0,
527 },
528 { .name = "ENOSPC",
529 .max_retries = XFS_ERR_RETRY_FOREVER,
530 .retry_timeout = 0,
531 },
532 { .name = "ENODEV",
533 .max_retries = 0,
534 },
535};
536
537static int
538xfs_error_sysfs_init_class(
539 struct xfs_mount *mp,
540 int class,
541 const char *parent_name,
542 struct xfs_kobj *parent_kobj,
543 const struct xfs_error_init init[])
544{
545 struct xfs_error_cfg *cfg;
546 int error;
547 int i;
548
549 ASSERT(class < XFS_ERR_CLASS_MAX);
550
551 error = xfs_sysfs_init(parent_kobj, &xfs_error_ktype,
552 &mp->m_error_kobj, parent_name);
553 if (error)
554 return error;
555
556 for (i = 0; i < XFS_ERR_ERRNO_MAX; i++) {
557 cfg = &mp->m_error_cfg[class][i];
558 error = xfs_sysfs_init(&cfg->kobj, &xfs_error_cfg_ktype,
559 parent_kobj, init[i].name);
560 if (error)
561 goto out_error;
562
563 cfg->max_retries = init[i].max_retries;
564 cfg->retry_timeout = msecs_to_jiffies(
565 init[i].retry_timeout * MSEC_PER_SEC);
566 }
567 return 0;
568
569out_error:
570 /* unwind the entries that succeeded */
571 for (i--; i >= 0; i--) {
572 cfg = &mp->m_error_cfg[class][i];
573 xfs_sysfs_del(&cfg->kobj);
574 }
575 xfs_sysfs_del(parent_kobj);
576 return error;
577}
578
579int
580xfs_error_sysfs_init(
581 struct xfs_mount *mp)
582{
583 int error;
584
585 /* .../xfs/<dev>/error/ */
586 error = xfs_sysfs_init(&mp->m_error_kobj, &xfs_error_ktype,
587 &mp->m_kobj, "error");
588 if (error)
589 return error;
590
591 error = sysfs_create_file(&mp->m_error_kobj.kobject,
592 ATTR_LIST(fail_at_unmount));
593
594 if (error)
595 goto out_error;
596
597 /* .../xfs/<dev>/error/metadata/ */
598 error = xfs_error_sysfs_init_class(mp, XFS_ERR_METADATA,
599 "metadata", &mp->m_error_meta_kobj,
600 xfs_error_meta_init);
601 if (error)
602 goto out_error;
603
604 return 0;
605
606out_error:
607 xfs_sysfs_del(&mp->m_error_kobj);
608 return error;
609}
610
611void
612xfs_error_sysfs_del(
613 struct xfs_mount *mp)
614{
615 struct xfs_error_cfg *cfg;
616 int i, j;
617
618 for (i = 0; i < XFS_ERR_CLASS_MAX; i++) {
619 for (j = 0; j < XFS_ERR_ERRNO_MAX; j++) {
620 cfg = &mp->m_error_cfg[i][j];
621
622 xfs_sysfs_del(&cfg->kobj);
623 }
624 }
625 xfs_sysfs_del(&mp->m_error_meta_kobj);
626 xfs_sysfs_del(&mp->m_error_kobj);
627}
628
629struct xfs_error_cfg *
630xfs_error_get_cfg(
631 struct xfs_mount *mp,
632 int error_class,
633 int error)
634{
635 struct xfs_error_cfg *cfg;
636
637 switch (error) {
638 case EIO:
639 cfg = &mp->m_error_cfg[error_class][XFS_ERR_EIO];
640 break;
641 case ENOSPC:
642 cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENOSPC];
643 break;
644 case ENODEV:
645 cfg = &mp->m_error_cfg[error_class][XFS_ERR_ENODEV];
646 break;
647 default:
648 cfg = &mp->m_error_cfg[error_class][XFS_ERR_DEFAULT];
649 break;
650 }
651
652 return cfg;
653}
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index be692e59938d..d04637181ef2 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -58,4 +58,7 @@ xfs_sysfs_del(
58 wait_for_completion(&kobj->complete); 58 wait_for_completion(&kobj->complete);
59} 59}
60 60
61int xfs_error_sysfs_init(struct xfs_mount *mp);
62void xfs_error_sysfs_del(struct xfs_mount *mp);
63
61#endif /* __XFS_SYSFS_H__ */ 64#endif /* __XFS_SYSFS_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c8d58426008e..ea94ee0fe5ea 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -364,7 +364,6 @@ DEFINE_BUF_EVENT(xfs_buf_delwri_split);
364DEFINE_BUF_EVENT(xfs_buf_get_uncached); 364DEFINE_BUF_EVENT(xfs_buf_get_uncached);
365DEFINE_BUF_EVENT(xfs_bdstrat_shut); 365DEFINE_BUF_EVENT(xfs_bdstrat_shut);
366DEFINE_BUF_EVENT(xfs_buf_item_relse); 366DEFINE_BUF_EVENT(xfs_buf_item_relse);
367DEFINE_BUF_EVENT(xfs_buf_item_iodone);
368DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); 367DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
369DEFINE_BUF_EVENT(xfs_buf_error_relse); 368DEFINE_BUF_EVENT(xfs_buf_error_relse);
370DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); 369DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
@@ -944,7 +943,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
944 TP_ARGS(log, tic), 943 TP_ARGS(log, tic),
945 TP_STRUCT__entry( 944 TP_STRUCT__entry(
946 __field(dev_t, dev) 945 __field(dev_t, dev)
947 __field(unsigned, trans_type)
948 __field(char, ocnt) 946 __field(char, ocnt)
949 __field(char, cnt) 947 __field(char, cnt)
950 __field(int, curr_res) 948 __field(int, curr_res)
@@ -962,7 +960,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
962 ), 960 ),
963 TP_fast_assign( 961 TP_fast_assign(
964 __entry->dev = log->l_mp->m_super->s_dev; 962 __entry->dev = log->l_mp->m_super->s_dev;
965 __entry->trans_type = tic->t_trans_type;
966 __entry->ocnt = tic->t_ocnt; 963 __entry->ocnt = tic->t_ocnt;
967 __entry->cnt = tic->t_cnt; 964 __entry->cnt = tic->t_cnt;
968 __entry->curr_res = tic->t_curr_res; 965 __entry->curr_res = tic->t_curr_res;
@@ -980,14 +977,13 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
980 __entry->curr_block = log->l_curr_block; 977 __entry->curr_block = log->l_curr_block;
981 __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); 978 __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
982 ), 979 ),
983 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " 980 TP_printk("dev %d:%d t_ocnt %u t_cnt %u t_curr_res %u "
984 "t_unit_res %u t_flags %s reserveq %s " 981 "t_unit_res %u t_flags %s reserveq %s "
985 "writeq %s grant_reserve_cycle %d " 982 "writeq %s grant_reserve_cycle %d "
986 "grant_reserve_bytes %d grant_write_cycle %d " 983 "grant_reserve_bytes %d grant_write_cycle %d "
987 "grant_write_bytes %d curr_cycle %d curr_block %d " 984 "grant_write_bytes %d curr_cycle %d curr_block %d "
988 "tail_cycle %d tail_block %d", 985 "tail_cycle %d tail_block %d",
989 MAJOR(__entry->dev), MINOR(__entry->dev), 986 MAJOR(__entry->dev), MINOR(__entry->dev),
990 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
991 __entry->ocnt, 987 __entry->ocnt,
992 __entry->cnt, 988 __entry->cnt,
993 __entry->curr_res, 989 __entry->curr_res,
@@ -1053,19 +1049,21 @@ DECLARE_EVENT_CLASS(xfs_log_item_class,
1053) 1049)
1054 1050
1055TRACE_EVENT(xfs_log_force, 1051TRACE_EVENT(xfs_log_force,
1056 TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn), 1052 TP_PROTO(struct xfs_mount *mp, xfs_lsn_t lsn, unsigned long caller_ip),
1057 TP_ARGS(mp, lsn), 1053 TP_ARGS(mp, lsn, caller_ip),
1058 TP_STRUCT__entry( 1054 TP_STRUCT__entry(
1059 __field(dev_t, dev) 1055 __field(dev_t, dev)
1060 __field(xfs_lsn_t, lsn) 1056 __field(xfs_lsn_t, lsn)
1057 __field(unsigned long, caller_ip)
1061 ), 1058 ),
1062 TP_fast_assign( 1059 TP_fast_assign(
1063 __entry->dev = mp->m_super->s_dev; 1060 __entry->dev = mp->m_super->s_dev;
1064 __entry->lsn = lsn; 1061 __entry->lsn = lsn;
1062 __entry->caller_ip = caller_ip;
1065 ), 1063 ),
1066 TP_printk("dev %d:%d lsn 0x%llx", 1064 TP_printk("dev %d:%d lsn 0x%llx caller %ps",
1067 MAJOR(__entry->dev), MINOR(__entry->dev), 1065 MAJOR(__entry->dev), MINOR(__entry->dev),
1068 __entry->lsn) 1066 __entry->lsn, (void *)__entry->caller_ip)
1069) 1067)
1070 1068
1071#define DEFINE_LOG_ITEM_EVENT(name) \ 1069#define DEFINE_LOG_ITEM_EVENT(name) \
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 20c53666cb4b..5f3d33d16e67 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -47,47 +47,6 @@ xfs_trans_init(
47} 47}
48 48
49/* 49/*
50 * This routine is called to allocate a transaction structure.
51 * The type parameter indicates the type of the transaction. These
52 * are enumerated in xfs_trans.h.
53 *
54 * Dynamically allocate the transaction structure from the transaction
55 * zone, initialize it, and return it to the caller.
56 */
57xfs_trans_t *
58xfs_trans_alloc(
59 xfs_mount_t *mp,
60 uint type)
61{
62 xfs_trans_t *tp;
63
64 sb_start_intwrite(mp->m_super);
65 tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
66 tp->t_flags |= XFS_TRANS_FREEZE_PROT;
67 return tp;
68}
69
70xfs_trans_t *
71_xfs_trans_alloc(
72 xfs_mount_t *mp,
73 uint type,
74 xfs_km_flags_t memflags)
75{
76 xfs_trans_t *tp;
77
78 WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
79 atomic_inc(&mp->m_active_trans);
80
81 tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
82 tp->t_magic = XFS_TRANS_HEADER_MAGIC;
83 tp->t_type = type;
84 tp->t_mountp = mp;
85 INIT_LIST_HEAD(&tp->t_items);
86 INIT_LIST_HEAD(&tp->t_busy);
87 return tp;
88}
89
90/*
91 * Free the transaction structure. If there is more clean up 50 * Free the transaction structure. If there is more clean up
92 * to do when the structure is freed, add it here. 51 * to do when the structure is freed, add it here.
93 */ 52 */
@@ -99,7 +58,7 @@ xfs_trans_free(
99 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); 58 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
100 59
101 atomic_dec(&tp->t_mountp->m_active_trans); 60 atomic_dec(&tp->t_mountp->m_active_trans);
102 if (tp->t_flags & XFS_TRANS_FREEZE_PROT) 61 if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
103 sb_end_intwrite(tp->t_mountp->m_super); 62 sb_end_intwrite(tp->t_mountp->m_super);
104 xfs_trans_free_dqinfo(tp); 63 xfs_trans_free_dqinfo(tp);
105 kmem_zone_free(xfs_trans_zone, tp); 64 kmem_zone_free(xfs_trans_zone, tp);
@@ -125,7 +84,6 @@ xfs_trans_dup(
125 * Initialize the new transaction structure. 84 * Initialize the new transaction structure.
126 */ 85 */
127 ntp->t_magic = XFS_TRANS_HEADER_MAGIC; 86 ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
128 ntp->t_type = tp->t_type;
129 ntp->t_mountp = tp->t_mountp; 87 ntp->t_mountp = tp->t_mountp;
130 INIT_LIST_HEAD(&ntp->t_items); 88 INIT_LIST_HEAD(&ntp->t_items);
131 INIT_LIST_HEAD(&ntp->t_busy); 89 INIT_LIST_HEAD(&ntp->t_busy);
@@ -135,9 +93,9 @@ xfs_trans_dup(
135 93
136 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | 94 ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
137 (tp->t_flags & XFS_TRANS_RESERVE) | 95 (tp->t_flags & XFS_TRANS_RESERVE) |
138 (tp->t_flags & XFS_TRANS_FREEZE_PROT); 96 (tp->t_flags & XFS_TRANS_NO_WRITECOUNT);
139 /* We gave our writer reference to the new transaction */ 97 /* We gave our writer reference to the new transaction */
140 tp->t_flags &= ~XFS_TRANS_FREEZE_PROT; 98 tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
141 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); 99 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
142 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; 100 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
143 tp->t_blk_res = tp->t_blk_res_used; 101 tp->t_blk_res = tp->t_blk_res_used;
@@ -165,7 +123,7 @@ xfs_trans_dup(
165 * This does not do quota reservations. That typically is done by the 123 * This does not do quota reservations. That typically is done by the
166 * caller afterwards. 124 * caller afterwards.
167 */ 125 */
168int 126static int
169xfs_trans_reserve( 127xfs_trans_reserve(
170 struct xfs_trans *tp, 128 struct xfs_trans *tp,
171 struct xfs_trans_res *resp, 129 struct xfs_trans_res *resp,
@@ -219,7 +177,7 @@ xfs_trans_reserve(
219 resp->tr_logres, 177 resp->tr_logres,
220 resp->tr_logcount, 178 resp->tr_logcount,
221 &tp->t_ticket, XFS_TRANSACTION, 179 &tp->t_ticket, XFS_TRANSACTION,
222 permanent, tp->t_type); 180 permanent);
223 } 181 }
224 182
225 if (error) 183 if (error)
@@ -268,6 +226,42 @@ undo_blocks:
268 return error; 226 return error;
269} 227}
270 228
229int
230xfs_trans_alloc(
231 struct xfs_mount *mp,
232 struct xfs_trans_res *resp,
233 uint blocks,
234 uint rtextents,
235 uint flags,
236 struct xfs_trans **tpp)
237{
238 struct xfs_trans *tp;
239 int error;
240
241 if (!(flags & XFS_TRANS_NO_WRITECOUNT))
242 sb_start_intwrite(mp->m_super);
243
244 WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
245 atomic_inc(&mp->m_active_trans);
246
247 tp = kmem_zone_zalloc(xfs_trans_zone,
248 (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
249 tp->t_magic = XFS_TRANS_HEADER_MAGIC;
250 tp->t_flags = flags;
251 tp->t_mountp = mp;
252 INIT_LIST_HEAD(&tp->t_items);
253 INIT_LIST_HEAD(&tp->t_busy);
254
255 error = xfs_trans_reserve(tp, resp, blocks, rtextents);
256 if (error) {
257 xfs_trans_cancel(tp);
258 return error;
259 }
260
261 *tpp = tp;
262 return 0;
263}
264
271/* 265/*
272 * Record the indicated change to the given field for application 266 * Record the indicated change to the given field for application
273 * to the file system's superblock when the transaction commits. 267 * to the file system's superblock when the transaction commits.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e7c49cf43fbc..9a462e892e4f 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -90,7 +90,6 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
90 */ 90 */
91typedef struct xfs_trans { 91typedef struct xfs_trans {
92 unsigned int t_magic; /* magic number */ 92 unsigned int t_magic; /* magic number */
93 unsigned int t_type; /* transaction type */
94 unsigned int t_log_res; /* amt of log space resvd */ 93 unsigned int t_log_res; /* amt of log space resvd */
95 unsigned int t_log_count; /* count for perm log res */ 94 unsigned int t_log_count; /* count for perm log res */
96 unsigned int t_blk_res; /* # of blocks resvd */ 95 unsigned int t_blk_res; /* # of blocks resvd */
@@ -148,10 +147,9 @@ typedef struct xfs_trans {
148/* 147/*
149 * XFS transaction mechanism exported interfaces. 148 * XFS transaction mechanism exported interfaces.
150 */ 149 */
151xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 150int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
152xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); 151 uint blocks, uint rtextents, uint flags,
153int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *, 152 struct xfs_trans **tpp);
154 uint, uint);
155void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); 153void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
156 154
157struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp, 155struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index d111f691f313..ec58ff094b1d 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -146,7 +146,7 @@ __xfs_xattr_put_listent(
146 arraytop = context->count + prefix_len + namelen + 1; 146 arraytop = context->count + prefix_len + namelen + 1;
147 if (arraytop > context->firstu) { 147 if (arraytop > context->firstu) {
148 context->count = -1; /* insufficient space */ 148 context->count = -1; /* insufficient space */
149 return 1; 149 return 0;
150 } 150 }
151 offset = (char *)context->alist + context->count; 151 offset = (char *)context->alist + context->count;
152 strncpy(offset, prefix, prefix_len); 152 strncpy(offset, prefix, prefix_len);
@@ -166,8 +166,7 @@ xfs_xattr_put_listent(
166 int flags, 166 int flags,
167 unsigned char *name, 167 unsigned char *name,
168 int namelen, 168 int namelen,
169 int valuelen, 169 int valuelen)
170 unsigned char *value)
171{ 170{
172 char *prefix; 171 char *prefix;
173 int prefix_len; 172 int prefix_len;
@@ -221,11 +220,15 @@ xfs_xattr_put_listent(
221} 220}
222 221
223ssize_t 222ssize_t
224xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) 223xfs_vn_listxattr(
224 struct dentry *dentry,
225 char *data,
226 size_t size)
225{ 227{
226 struct xfs_attr_list_context context; 228 struct xfs_attr_list_context context;
227 struct attrlist_cursor_kern cursor = { 0 }; 229 struct attrlist_cursor_kern cursor = { 0 };
228 struct inode *inode = d_inode(dentry); 230 struct inode *inode = d_inode(dentry);
231 int error;
229 232
230 /* 233 /*
231 * First read the regular on-disk attributes. 234 * First read the regular on-disk attributes.
@@ -239,7 +242,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
239 context.firstu = context.bufsize; 242 context.firstu = context.bufsize;
240 context.put_listent = xfs_xattr_put_listent; 243 context.put_listent = xfs_xattr_put_listent;
241 244
242 xfs_attr_list_int(&context); 245 error = xfs_attr_list_int(&context);
246 if (error)
247 return error;
243 if (context.count < 0) 248 if (context.count < 0)
244 return -ERANGE; 249 return -ERANGE;
245 250