aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 23:16:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 23:16:08 -0400
commit68b4449d79f00dc3cb6ea1f4bf54cf8e453a53c8 (patch)
tree8a0c55c7f2fcd2ec495c9a20e665561a1eba2b00
parent043cd04950431f206f784d1ed9b3fcc5993045f2 (diff)
parentde50e16ffabf64d30ca9372f39dc855a7553e305 (diff)
Merge tag 'xfs-for-linus-4.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pul xfs updates from Dave Chinner: "There's a couple of small API changes to the core DAX code which required small changes to the ext2 and ext4 code bases, but otherwise everything is within the XFS codebase. This update contains: - A new sparse on-disk inode record format to allow small extents to be used for inode allocation when free space is fragmented. - DAX support. This includes minor changes to the DAX core code to fix problems with lock ordering and bufferhead mapping abuse. - transaction commit interface cleanup - removal of various unnecessary XFS specific type definitions - cleanup and optimisation of freelist preparation before allocation - various minor cleanups - bug fixes for - transaction reservation leaks - incorrect inode logging in unwritten extent conversion - mmap lock vs freeze ordering - remote symlink mishandling - attribute fork removal issues" * tag 'xfs-for-linus-4.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (49 commits) xfs: don't truncate attribute extents if no extents exist xfs: clean up XFS_MIN_FREELIST macros xfs: sanitise error handling in xfs_alloc_fix_freelist xfs: factor out free space extent length check xfs: xfs_alloc_fix_freelist() can use incore perag structures xfs: remove xfs_caddr_t xfs: use void pointers in log validation helpers xfs: return a void pointer from xfs_buf_offset xfs: remove inst_t xfs: remove __psint_t and __psunsigned_t xfs: fix remote symlinks on V5/CRC filesystems xfs: fix xfs_log_done interface xfs: saner xfs_trans_commit interface xfs: remove the flags argument to xfs_trans_cancel xfs: pass a boolean flag to xfs_trans_free_items xfs: switch remaining xfs_trans_dup users to xfs_trans_roll xfs: check min blks for random debug mode sparse allocations xfs: fix sparse inodes 32-bit compile failure xfs: add initial DAX support xfs: add DAX IO path support ...
-rw-r--r--Documentation/filesystems/xfs.txt12
-rw-r--r--fs/dax.c34
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext4/file.c16
-rw-r--r--fs/ext4/inode.c21
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c281
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h10
-rw-r--r--fs/xfs/libxfs/xfs_attr.c25
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c29
-rw-r--r--fs/xfs/libxfs/xfs_format.h65
-rw-r--r--fs/xfs/libxfs/xfs_fs.h1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c542
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h15
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c93
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h10
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c8
-rw-r--r--fs/xfs/libxfs/xfs_sb.c34
-rw-r--r--fs/xfs/libxfs/xfs_shared.h6
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.h4
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h2
-rw-r--r--fs/xfs/xfs_aops.c158
-rw-r--r--fs/xfs/xfs_aops.h7
-rw-r--r--fs/xfs/xfs_attr_inactive.c16
-rw-r--r--fs/xfs/xfs_bmap_util.c89
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_dquot.c8
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_error.h4
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_file.c166
-rw-r--r--fs/xfs/xfs_filestream.c3
-rw-r--r--fs/xfs/xfs_fsops.c10
-rw-r--r--fs/xfs/xfs_inode.c204
-rw-r--r--fs/xfs/xfs_ioctl.c14
-rw-r--r--fs/xfs/xfs_iomap.c18
-rw-r--r--fs/xfs/xfs_iops.c48
-rw-r--r--fs/xfs/xfs_itable.c13
-rw-r--r--fs/xfs/xfs_linux.h14
-rw-r--r--fs/xfs/xfs_log.c51
-rw-r--r--fs/xfs/xfs_log.h13
-rw-r--r--fs/xfs/xfs_log_cil.c12
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c97
-rw-r--r--fs/xfs/xfs_mount.c16
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_syscalls.c20
-rw-r--r--fs/xfs/xfs_quota.h1
-rw-r--r--fs/xfs/xfs_rtalloc.c16
-rw-r--r--fs/xfs/xfs_super.c25
-rw-r--r--fs/xfs/xfs_symlink.c19
-rw-r--r--fs/xfs/xfs_trace.h47
-rw-r--r--fs/xfs/xfs_trans.c91
-rw-r--r--fs/xfs/xfs_trans.h7
-rw-r--r--fs/xfs/xfs_trans_ail.c6
-rw-r--r--fs/xfs/xfs_trans_dquot.c32
-rw-r--r--fs/xfs/xfs_trans_priv.h2
-rw-r--r--include/linux/fs.h9
60 files changed, 1611 insertions, 868 deletions
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 5a5a05582b58..8146e9fd5ffc 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -236,10 +236,10 @@ Removed Mount Options
236 236
237 Name Removed 237 Name Removed
238 ---- ------- 238 ---- -------
239 delaylog/nodelaylog v3.20 239 delaylog/nodelaylog v4.0
240 ihashsize v3.20 240 ihashsize v4.0
241 irixsgid v3.20 241 irixsgid v4.0
242 osyncisdsync/osyncisosync v3.20 242 osyncisdsync/osyncisosync v4.0
243 243
244 244
245sysctls 245sysctls
@@ -346,5 +346,5 @@ Removed Sysctls
346 346
347 Name Removed 347 Name Removed
348 ---- ------- 348 ---- -------
349 fs.xfs.xfsbufd_centisec v3.20 349 fs.xfs.xfsbufd_centisec v4.0
350 fs.xfs.age_buffer_centisecs v3.20 350 fs.xfs.age_buffer_centisecs v4.0
diff --git a/fs/dax.c b/fs/dax.c
index 6f65f00e58ec..99b5fbc38992 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -309,14 +309,21 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
309 out: 309 out:
310 i_mmap_unlock_read(mapping); 310 i_mmap_unlock_read(mapping);
311 311
312 if (bh->b_end_io)
313 bh->b_end_io(bh, 1);
314
315 return error; 312 return error;
316} 313}
317 314
318static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, 315/**
319 get_block_t get_block) 316 * __dax_fault - handle a page fault on a DAX file
317 * @vma: The virtual memory area where the fault occurred
318 * @vmf: The description of the fault
319 * @get_block: The filesystem method used to translate file offsets to blocks
320 *
321 * When a page fault occurs, filesystems may call this helper in their
322 * fault handler for DAX files. __dax_fault() assumes the caller has done all
323 * the necessary locking for the page fault to proceed successfully.
324 */
325int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
326 get_block_t get_block, dax_iodone_t complete_unwritten)
320{ 327{
321 struct file *file = vma->vm_file; 328 struct file *file = vma->vm_file;
322 struct address_space *mapping = file->f_mapping; 329 struct address_space *mapping = file->f_mapping;
@@ -417,7 +424,19 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
417 page_cache_release(page); 424 page_cache_release(page);
418 } 425 }
419 426
427 /*
428 * If we successfully insert the new mapping over an unwritten extent,
429 * we need to ensure we convert the unwritten extent. If there is an
430 * error inserting the mapping, the filesystem needs to leave it as
431 * unwritten to prevent exposure of the stale underlying data to
432 * userspace, but we still need to call the completion function so
433 * the private resources on the mapping buffer can be released. We
434 * indicate what the callback should do via the uptodate variable, same
435 * as for normal BH based IO completions.
436 */
420 error = dax_insert_mapping(inode, &bh, vma, vmf); 437 error = dax_insert_mapping(inode, &bh, vma, vmf);
438 if (buffer_unwritten(&bh))
439 complete_unwritten(&bh, !error);
421 440
422 out: 441 out:
423 if (error == -ENOMEM) 442 if (error == -ENOMEM)
@@ -434,6 +453,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
434 } 453 }
435 goto out; 454 goto out;
436} 455}
456EXPORT_SYMBOL(__dax_fault);
437 457
438/** 458/**
439 * dax_fault - handle a page fault on a DAX file 459 * dax_fault - handle a page fault on a DAX file
@@ -445,7 +465,7 @@ static int do_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
445 * fault handler for DAX files. 465 * fault handler for DAX files.
446 */ 466 */
447int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, 467int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
448 get_block_t get_block) 468 get_block_t get_block, dax_iodone_t complete_unwritten)
449{ 469{
450 int result; 470 int result;
451 struct super_block *sb = file_inode(vma->vm_file)->i_sb; 471 struct super_block *sb = file_inode(vma->vm_file)->i_sb;
@@ -454,7 +474,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
454 sb_start_pagefault(sb); 474 sb_start_pagefault(sb);
455 file_update_time(vma->vm_file); 475 file_update_time(vma->vm_file);
456 } 476 }
457 result = do_dax_fault(vma, vmf, get_block); 477 result = __dax_fault(vma, vmf, get_block, complete_unwritten);
458 if (vmf->flags & FAULT_FLAG_WRITE) 478 if (vmf->flags & FAULT_FLAG_WRITE)
459 sb_end_pagefault(sb); 479 sb_end_pagefault(sb);
460 480
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 3a0a6c6406d0..3b57c9f83c9b 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -28,12 +28,12 @@
28#ifdef CONFIG_FS_DAX 28#ifdef CONFIG_FS_DAX
29static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 29static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
30{ 30{
31 return dax_fault(vma, vmf, ext2_get_block); 31 return dax_fault(vma, vmf, ext2_get_block, NULL);
32} 32}
33 33
34static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 34static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
35{ 35{
36 return dax_mkwrite(vma, vmf, ext2_get_block); 36 return dax_mkwrite(vma, vmf, ext2_get_block, NULL);
37} 37}
38 38
39static const struct vm_operations_struct ext2_dax_vm_ops = { 39static const struct vm_operations_struct ext2_dax_vm_ops = {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac517f15741c..bc313ac5d3fa 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -192,15 +192,27 @@ out:
192} 192}
193 193
194#ifdef CONFIG_FS_DAX 194#ifdef CONFIG_FS_DAX
195static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
196{
197 struct inode *inode = bh->b_assoc_map->host;
198 /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
199 loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
200 int err;
201 if (!uptodate)
202 return;
203 WARN_ON(!buffer_unwritten(bh));
204 err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
205}
206
195static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 207static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
196{ 208{
197 return dax_fault(vma, vmf, ext4_get_block); 209 return dax_fault(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
198 /* Is this the right get_block? */ 210 /* Is this the right get_block? */
199} 211}
200 212
201static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 213static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
202{ 214{
203 return dax_mkwrite(vma, vmf, ext4_get_block); 215 return dax_mkwrite(vma, vmf, ext4_get_block, ext4_end_io_unwritten);
204} 216}
205 217
206static const struct vm_operations_struct ext4_dax_vm_ops = { 218static const struct vm_operations_struct ext4_dax_vm_ops = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f8a8d4ee7459..41f8e55afcd1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -656,18 +656,6 @@ has_zeroout:
656 return retval; 656 return retval;
657} 657}
658 658
659static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
660{
661 struct inode *inode = bh->b_assoc_map->host;
662 /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
663 loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
664 int err;
665 if (!uptodate)
666 return;
667 WARN_ON(!buffer_unwritten(bh));
668 err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
669}
670
671/* Maximum number of blocks we map for direct IO at once. */ 659/* Maximum number of blocks we map for direct IO at once. */
672#define DIO_MAX_BLOCKS 4096 660#define DIO_MAX_BLOCKS 4096
673 661
@@ -705,10 +693,15 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
705 693
706 map_bh(bh, inode->i_sb, map.m_pblk); 694 map_bh(bh, inode->i_sb, map.m_pblk);
707 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 695 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
708 if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) { 696 if (IS_DAX(inode) && buffer_unwritten(bh)) {
697 /*
698 * dgc: I suspect unwritten conversion on ext4+DAX is
699 * fundamentally broken here when there are concurrent
700 * read/write in progress on this inode.
701 */
702 WARN_ON_ONCE(io_end);
709 bh->b_assoc_map = inode->i_mapping; 703 bh->b_assoc_map = inode->i_mapping;
710 bh->b_private = (void *)(unsigned long)iblock; 704 bh->b_private = (void *)(unsigned long)iblock;
711 bh->b_end_io = ext4_end_io_unwritten;
712 } 705 }
713 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) 706 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
714 set_buffer_defer_completion(bh); 707 set_buffer_defer_completion(bh);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 516162be1398..f9e9ffe6fb46 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -149,13 +149,27 @@ xfs_alloc_compute_aligned(
149{ 149{
150 xfs_agblock_t bno; 150 xfs_agblock_t bno;
151 xfs_extlen_t len; 151 xfs_extlen_t len;
152 xfs_extlen_t diff;
152 153
153 /* Trim busy sections out of found extent */ 154 /* Trim busy sections out of found extent */
154 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); 155 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
155 156
157 /*
158 * If we have a largish extent that happens to start before min_agbno,
159 * see if we can shift it into range...
160 */
161 if (bno < args->min_agbno && bno + len > args->min_agbno) {
162 diff = args->min_agbno - bno;
163 if (len > diff) {
164 bno += diff;
165 len -= diff;
166 }
167 }
168
156 if (args->alignment > 1 && len >= args->minlen) { 169 if (args->alignment > 1 && len >= args->minlen) {
157 xfs_agblock_t aligned_bno = roundup(bno, args->alignment); 170 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
158 xfs_extlen_t diff = aligned_bno - bno; 171
172 diff = aligned_bno - bno;
159 173
160 *resbno = aligned_bno; 174 *resbno = aligned_bno;
161 *reslen = diff >= len ? 0 : len - diff; 175 *reslen = diff >= len ? 0 : len - diff;
@@ -795,9 +809,13 @@ xfs_alloc_find_best_extent(
795 * The good extent is closer than this one. 809 * The good extent is closer than this one.
796 */ 810 */
797 if (!dir) { 811 if (!dir) {
812 if (*sbnoa > args->max_agbno)
813 goto out_use_good;
798 if (*sbnoa >= args->agbno + gdiff) 814 if (*sbnoa >= args->agbno + gdiff)
799 goto out_use_good; 815 goto out_use_good;
800 } else { 816 } else {
817 if (*sbnoa < args->min_agbno)
818 goto out_use_good;
801 if (*sbnoa <= args->agbno - gdiff) 819 if (*sbnoa <= args->agbno - gdiff)
802 goto out_use_good; 820 goto out_use_good;
803 } 821 }
@@ -884,6 +902,17 @@ xfs_alloc_ag_vextent_near(
884 dofirst = prandom_u32() & 1; 902 dofirst = prandom_u32() & 1;
885#endif 903#endif
886 904
905 /* handle unitialized agbno range so caller doesn't have to */
906 if (!args->min_agbno && !args->max_agbno)
907 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
908 ASSERT(args->min_agbno <= args->max_agbno);
909
910 /* clamp agbno to the range if it's outside */
911 if (args->agbno < args->min_agbno)
912 args->agbno = args->min_agbno;
913 if (args->agbno > args->max_agbno)
914 args->agbno = args->max_agbno;
915
887restart: 916restart:
888 bno_cur_lt = NULL; 917 bno_cur_lt = NULL;
889 bno_cur_gt = NULL; 918 bno_cur_gt = NULL;
@@ -976,6 +1005,8 @@ restart:
976 &ltbnoa, &ltlena); 1005 &ltbnoa, &ltlena);
977 if (ltlena < args->minlen) 1006 if (ltlena < args->minlen)
978 continue; 1007 continue;
1008 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
1009 continue;
979 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1010 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
980 xfs_alloc_fix_len(args); 1011 xfs_alloc_fix_len(args);
981 ASSERT(args->len >= args->minlen); 1012 ASSERT(args->len >= args->minlen);
@@ -1096,11 +1127,11 @@ restart:
1096 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1127 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1097 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1128 xfs_alloc_compute_aligned(args, ltbno, ltlen,
1098 &ltbnoa, &ltlena); 1129 &ltbnoa, &ltlena);
1099 if (ltlena >= args->minlen) 1130 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1100 break; 1131 break;
1101 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1132 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1102 goto error0; 1133 goto error0;
1103 if (!i) { 1134 if (!i || ltbnoa < args->min_agbno) {
1104 xfs_btree_del_cursor(bno_cur_lt, 1135 xfs_btree_del_cursor(bno_cur_lt,
1105 XFS_BTREE_NOERROR); 1136 XFS_BTREE_NOERROR);
1106 bno_cur_lt = NULL; 1137 bno_cur_lt = NULL;
@@ -1112,11 +1143,11 @@ restart:
1112 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1143 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1113 xfs_alloc_compute_aligned(args, gtbno, gtlen, 1144 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1114 &gtbnoa, &gtlena); 1145 &gtbnoa, &gtlena);
1115 if (gtlena >= args->minlen) 1146 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1116 break; 1147 break;
1117 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1148 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1118 goto error0; 1149 goto error0;
1119 if (!i) { 1150 if (!i || gtbnoa > args->max_agbno) {
1120 xfs_btree_del_cursor(bno_cur_gt, 1151 xfs_btree_del_cursor(bno_cur_gt,
1121 XFS_BTREE_NOERROR); 1152 XFS_BTREE_NOERROR);
1122 bno_cur_gt = NULL; 1153 bno_cur_gt = NULL;
@@ -1216,6 +1247,7 @@ restart:
1216 ASSERT(ltnew >= ltbno); 1247 ASSERT(ltnew >= ltbno);
1217 ASSERT(ltnew + rlen <= ltbnoa + ltlena); 1248 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1218 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1249 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1250 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
1219 args->agbno = ltnew; 1251 args->agbno = ltnew;
1220 1252
1221 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1253 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
@@ -1825,11 +1857,11 @@ xfs_alloc_compute_maxlevels(
1825xfs_extlen_t 1857xfs_extlen_t
1826xfs_alloc_longest_free_extent( 1858xfs_alloc_longest_free_extent(
1827 struct xfs_mount *mp, 1859 struct xfs_mount *mp,
1828 struct xfs_perag *pag) 1860 struct xfs_perag *pag,
1861 xfs_extlen_t need)
1829{ 1862{
1830 xfs_extlen_t need, delta = 0; 1863 xfs_extlen_t delta = 0;
1831 1864
1832 need = XFS_MIN_FREELIST_PAG(pag, mp);
1833 if (need > pag->pagf_flcount) 1865 if (need > pag->pagf_flcount)
1834 delta = need - pag->pagf_flcount; 1866 delta = need - pag->pagf_flcount;
1835 1867
@@ -1838,131 +1870,150 @@ xfs_alloc_longest_free_extent(
1838 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 1870 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1839} 1871}
1840 1872
1873unsigned int
1874xfs_alloc_min_freelist(
1875 struct xfs_mount *mp,
1876 struct xfs_perag *pag)
1877{
1878 unsigned int min_free;
1879
1880 /* space needed by-bno freespace btree */
1881 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
1882 mp->m_ag_maxlevels);
1883 /* space needed by-size freespace btree */
1884 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
1885 mp->m_ag_maxlevels);
1886
1887 return min_free;
1888}
1889
1890/*
1891 * Check if the operation we are fixing up the freelist for should go ahead or
1892 * not. If we are freeing blocks, we always allow it, otherwise the allocation
1893 * is dependent on whether the size and shape of free space available will
1894 * permit the requested allocation to take place.
1895 */
1896static bool
1897xfs_alloc_space_available(
1898 struct xfs_alloc_arg *args,
1899 xfs_extlen_t min_free,
1900 int flags)
1901{
1902 struct xfs_perag *pag = args->pag;
1903 xfs_extlen_t longest;
1904 int available;
1905
1906 if (flags & XFS_ALLOC_FLAG_FREEING)
1907 return true;
1908
1909 /* do we have enough contiguous free space for the allocation? */
1910 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free);
1911 if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
1912 return false;
1913
1914 /* do have enough free space remaining for the allocation? */
1915 available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
1916 min_free - args->total);
1917 if (available < (int)args->minleft)
1918 return false;
1919
1920 return true;
1921}
1922
1841/* 1923/*
1842 * Decide whether to use this allocation group for this allocation. 1924 * Decide whether to use this allocation group for this allocation.
1843 * If so, fix up the btree freelist's size. 1925 * If so, fix up the btree freelist's size.
1844 */ 1926 */
1845STATIC int /* error */ 1927STATIC int /* error */
1846xfs_alloc_fix_freelist( 1928xfs_alloc_fix_freelist(
1847 xfs_alloc_arg_t *args, /* allocation argument structure */ 1929 struct xfs_alloc_arg *args, /* allocation argument structure */
1848 int flags) /* XFS_ALLOC_FLAG_... */ 1930 int flags) /* XFS_ALLOC_FLAG_... */
1849{ 1931{
1850 xfs_buf_t *agbp; /* agf buffer pointer */ 1932 struct xfs_mount *mp = args->mp;
1851 xfs_agf_t *agf; /* a.g. freespace structure pointer */ 1933 struct xfs_perag *pag = args->pag;
1852 xfs_buf_t *agflbp;/* agfl buffer pointer */ 1934 struct xfs_trans *tp = args->tp;
1853 xfs_agblock_t bno; /* freelist block */ 1935 struct xfs_buf *agbp = NULL;
1854 xfs_extlen_t delta; /* new blocks needed in freelist */ 1936 struct xfs_buf *agflbp = NULL;
1855 int error; /* error result code */ 1937 struct xfs_alloc_arg targs; /* local allocation arguments */
1856 xfs_extlen_t longest;/* longest extent in allocation group */ 1938 xfs_agblock_t bno; /* freelist block */
1857 xfs_mount_t *mp; /* file system mount point structure */ 1939 xfs_extlen_t need; /* total blocks needed in freelist */
1858 xfs_extlen_t need; /* total blocks needed in freelist */ 1940 int error;
1859 xfs_perag_t *pag; /* per-ag information structure */
1860 xfs_alloc_arg_t targs; /* local allocation arguments */
1861 xfs_trans_t *tp; /* transaction pointer */
1862
1863 mp = args->mp;
1864 1941
1865 pag = args->pag;
1866 tp = args->tp;
1867 if (!pag->pagf_init) { 1942 if (!pag->pagf_init) {
1868 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1943 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1869 &agbp))) 1944 if (error)
1870 return error; 1945 goto out_no_agbp;
1871 if (!pag->pagf_init) { 1946 if (!pag->pagf_init) {
1872 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1947 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1873 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1948 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1874 args->agbp = NULL; 1949 goto out_agbp_relse;
1875 return 0;
1876 } 1950 }
1877 } else 1951 }
1878 agbp = NULL;
1879 1952
1880 /* 1953 /*
1881 * If this is a metadata preferred pag and we are user data 1954 * If this is a metadata preferred pag and we are user data then try
1882 * then try somewhere else if we are not being asked to 1955 * somewhere else if we are not being asked to try harder at this
1883 * try harder at this point 1956 * point
1884 */ 1957 */
1885 if (pag->pagf_metadata && args->userdata && 1958 if (pag->pagf_metadata && args->userdata &&
1886 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 1959 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1887 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1960 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1888 args->agbp = NULL; 1961 goto out_agbp_relse;
1889 return 0;
1890 } 1962 }
1891 1963
1892 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1964 need = xfs_alloc_min_freelist(mp, pag);
1893 /* 1965 if (!xfs_alloc_space_available(args, need, flags))
1894 * If it looks like there isn't a long enough extent, or enough 1966 goto out_agbp_relse;
1895 * total blocks, reject it.
1896 */
1897 need = XFS_MIN_FREELIST_PAG(pag, mp);
1898 longest = xfs_alloc_longest_free_extent(mp, pag);
1899 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1900 longest ||
1901 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1902 need - args->total) < (int)args->minleft)) {
1903 if (agbp)
1904 xfs_trans_brelse(tp, agbp);
1905 args->agbp = NULL;
1906 return 0;
1907 }
1908 }
1909 1967
1910 /* 1968 /*
1911 * Get the a.g. freespace buffer. 1969 * Get the a.g. freespace buffer.
1912 * Can fail if we're not blocking on locks, and it's held. 1970 * Can fail if we're not blocking on locks, and it's held.
1913 */ 1971 */
1914 if (agbp == NULL) { 1972 if (!agbp) {
1915 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1973 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
1916 &agbp))) 1974 if (error)
1917 return error; 1975 goto out_no_agbp;
1918 if (agbp == NULL) { 1976 if (!agbp) {
1919 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1977 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1920 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1978 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1921 args->agbp = NULL; 1979 goto out_no_agbp;
1922 return 0;
1923 }
1924 }
1925 /*
1926 * Figure out how many blocks we should have in the freelist.
1927 */
1928 agf = XFS_BUF_TO_AGF(agbp);
1929 need = XFS_MIN_FREELIST(agf, mp);
1930 /*
1931 * If there isn't enough total or single-extent, reject it.
1932 */
1933 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1934 delta = need > be32_to_cpu(agf->agf_flcount) ?
1935 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1936 longest = be32_to_cpu(agf->agf_longest);
1937 longest = (longest > delta) ? (longest - delta) :
1938 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1939 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1940 longest ||
1941 ((int)(be32_to_cpu(agf->agf_freeblks) +
1942 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1943 (int)args->minleft)) {
1944 xfs_trans_brelse(tp, agbp);
1945 args->agbp = NULL;
1946 return 0;
1947 } 1980 }
1948 } 1981 }
1982
1983 /* If there isn't enough total space or single-extent, reject it. */
1984 need = xfs_alloc_min_freelist(mp, pag);
1985 if (!xfs_alloc_space_available(args, need, flags))
1986 goto out_agbp_relse;
1987
1949 /* 1988 /*
1950 * Make the freelist shorter if it's too long. 1989 * Make the freelist shorter if it's too long.
1990 *
1991 * Note that from this point onwards, we will always release the agf and
1992 * agfl buffers on error. This handles the case where we error out and
1993 * the buffers are clean or may not have been joined to the transaction
1994 * and hence need to be released manually. If they have been joined to
1995 * the transaction, then xfs_trans_brelse() will handle them
1996 * appropriately based on the recursion count and dirty state of the
1997 * buffer.
1998 *
1999 * XXX (dgc): When we have lots of free space, does this buy us
2000 * anything other than extra overhead when we need to put more blocks
2001 * back on the free list? Maybe we should only do this when space is
2002 * getting low or the AGFL is more than half full?
1951 */ 2003 */
1952 while (be32_to_cpu(agf->agf_flcount) > need) { 2004 while (pag->pagf_flcount > need) {
1953 xfs_buf_t *bp; 2005 struct xfs_buf *bp;
1954 2006
1955 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2007 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1956 if (error) 2008 if (error)
1957 return error; 2009 goto out_agbp_relse;
1958 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) 2010 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1);
1959 return error; 2011 if (error)
2012 goto out_agbp_relse;
1960 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2013 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1961 xfs_trans_binval(tp, bp); 2014 xfs_trans_binval(tp, bp);
1962 } 2015 }
1963 /* 2016
1964 * Initialize the args structure.
1965 */
1966 memset(&targs, 0, sizeof(targs)); 2017 memset(&targs, 0, sizeof(targs));
1967 targs.tp = tp; 2018 targs.tp = tp;
1968 targs.mp = mp; 2019 targs.mp = mp;
@@ -1971,21 +2022,20 @@ xfs_alloc_fix_freelist(
1971 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 2022 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1972 targs.type = XFS_ALLOCTYPE_THIS_AG; 2023 targs.type = XFS_ALLOCTYPE_THIS_AG;
1973 targs.pag = pag; 2024 targs.pag = pag;
1974 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) 2025 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
1975 return error; 2026 if (error)
1976 /* 2027 goto out_agbp_relse;
1977 * Make the freelist longer if it's too short. 2028
1978 */ 2029 /* Make the freelist longer if it's too short. */
1979 while (be32_to_cpu(agf->agf_flcount) < need) { 2030 while (pag->pagf_flcount < need) {
1980 targs.agbno = 0; 2031 targs.agbno = 0;
1981 targs.maxlen = need - be32_to_cpu(agf->agf_flcount); 2032 targs.maxlen = need - pag->pagf_flcount;
1982 /* 2033
1983 * Allocate as many blocks as possible at once. 2034 /* Allocate as many blocks as possible at once. */
1984 */ 2035 error = xfs_alloc_ag_vextent(&targs);
1985 if ((error = xfs_alloc_ag_vextent(&targs))) { 2036 if (error)
1986 xfs_trans_brelse(tp, agflbp); 2037 goto out_agflbp_relse;
1987 return error; 2038
1988 }
1989 /* 2039 /*
1990 * Stop if we run out. Won't happen if callers are obeying 2040 * Stop if we run out. Won't happen if callers are obeying
1991 * the restrictions correctly. Can happen for free calls 2041 * the restrictions correctly. Can happen for free calls
@@ -1994,9 +2044,7 @@ xfs_alloc_fix_freelist(
1994 if (targs.agbno == NULLAGBLOCK) { 2044 if (targs.agbno == NULLAGBLOCK) {
1995 if (flags & XFS_ALLOC_FLAG_FREEING) 2045 if (flags & XFS_ALLOC_FLAG_FREEING)
1996 break; 2046 break;
1997 xfs_trans_brelse(tp, agflbp); 2047 goto out_agflbp_relse;
1998 args->agbp = NULL;
1999 return 0;
2000 } 2048 }
2001 /* 2049 /*
2002 * Put each allocated block on the list. 2050 * Put each allocated block on the list.
@@ -2005,12 +2053,21 @@ xfs_alloc_fix_freelist(
2005 error = xfs_alloc_put_freelist(tp, agbp, 2053 error = xfs_alloc_put_freelist(tp, agbp,
2006 agflbp, bno, 0); 2054 agflbp, bno, 0);
2007 if (error) 2055 if (error)
2008 return error; 2056 goto out_agflbp_relse;
2009 } 2057 }
2010 } 2058 }
2011 xfs_trans_brelse(tp, agflbp); 2059 xfs_trans_brelse(tp, agflbp);
2012 args->agbp = agbp; 2060 args->agbp = agbp;
2013 return 0; 2061 return 0;
2062
2063out_agflbp_relse:
2064 xfs_trans_brelse(tp, agflbp);
2065out_agbp_relse:
2066 if (agbp)
2067 xfs_trans_brelse(tp, agbp);
2068out_no_agbp:
2069 args->agbp = NULL;
2070 return error;
2014} 2071}
2015 2072
2016/* 2073/*
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index d1b4b6a5c894..ca1c8168373a 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -112,6 +112,8 @@ typedef struct xfs_alloc_arg {
112 xfs_extlen_t total; /* total blocks needed in xaction */ 112 xfs_extlen_t total; /* total blocks needed in xaction */
113 xfs_extlen_t alignment; /* align answer to multiple of this */ 113 xfs_extlen_t alignment; /* align answer to multiple of this */
114 xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */ 114 xfs_extlen_t minalignslop; /* slop for minlen+alignment calcs */
115 xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */
116 xfs_agblock_t max_agbno; /* ... */
115 xfs_extlen_t len; /* output: actual size of extent */ 117 xfs_extlen_t len; /* output: actual size of extent */
116 xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ 118 xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */
117 xfs_alloctype_t otype; /* original allocation type */ 119 xfs_alloctype_t otype; /* original allocation type */
@@ -128,11 +130,9 @@ typedef struct xfs_alloc_arg {
128#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ 130#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/
129#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ 131#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */
130 132
131/* 133xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
132 * Find the length of the longest extent in an AG. 134 struct xfs_perag *pag, xfs_extlen_t need);
133 */ 135unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
134xfs_extlen_t
135xfs_alloc_longest_free_extent(struct xfs_mount *mp,
136 struct xfs_perag *pag); 136 struct xfs_perag *pag);
137 137
138/* 138/*
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 0a472fbe06d4..3349c9a1e845 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -266,7 +266,7 @@ xfs_attr_set(
266 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 266 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
267 error = xfs_trans_reserve(args.trans, &tres, args.total, 0); 267 error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
268 if (error) { 268 if (error) {
269 xfs_trans_cancel(args.trans, 0); 269 xfs_trans_cancel(args.trans);
270 return error; 270 return error;
271 } 271 }
272 xfs_ilock(dp, XFS_ILOCK_EXCL); 272 xfs_ilock(dp, XFS_ILOCK_EXCL);
@@ -276,7 +276,7 @@ xfs_attr_set(
276 XFS_QMOPT_RES_REGBLKS); 276 XFS_QMOPT_RES_REGBLKS);
277 if (error) { 277 if (error) {
278 xfs_iunlock(dp, XFS_ILOCK_EXCL); 278 xfs_iunlock(dp, XFS_ILOCK_EXCL);
279 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); 279 xfs_trans_cancel(args.trans);
280 return error; 280 return error;
281 } 281 }
282 282
@@ -320,8 +320,7 @@ xfs_attr_set(
320 xfs_trans_ichgtime(args.trans, dp, 320 xfs_trans_ichgtime(args.trans, dp,
321 XFS_ICHGTIME_CHG); 321 XFS_ICHGTIME_CHG);
322 } 322 }
323 err2 = xfs_trans_commit(args.trans, 323 err2 = xfs_trans_commit(args.trans);
324 XFS_TRANS_RELEASE_LOG_RES);
325 xfs_iunlock(dp, XFS_ILOCK_EXCL); 324 xfs_iunlock(dp, XFS_ILOCK_EXCL);
326 325
327 return error ? error : err2; 326 return error ? error : err2;
@@ -383,16 +382,14 @@ xfs_attr_set(
383 * Commit the last in the sequence of transactions. 382 * Commit the last in the sequence of transactions.
384 */ 383 */
385 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); 384 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
386 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); 385 error = xfs_trans_commit(args.trans);
387 xfs_iunlock(dp, XFS_ILOCK_EXCL); 386 xfs_iunlock(dp, XFS_ILOCK_EXCL);
388 387
389 return error; 388 return error;
390 389
391out: 390out:
392 if (args.trans) { 391 if (args.trans)
393 xfs_trans_cancel(args.trans, 392 xfs_trans_cancel(args.trans);
394 XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
395 }
396 xfs_iunlock(dp, XFS_ILOCK_EXCL); 393 xfs_iunlock(dp, XFS_ILOCK_EXCL);
397 return error; 394 return error;
398} 395}
@@ -462,7 +459,7 @@ xfs_attr_remove(
462 error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, 459 error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
463 XFS_ATTRRM_SPACE_RES(mp), 0); 460 XFS_ATTRRM_SPACE_RES(mp), 0);
464 if (error) { 461 if (error) {
465 xfs_trans_cancel(args.trans, 0); 462 xfs_trans_cancel(args.trans);
466 return error; 463 return error;
467 } 464 }
468 465
@@ -501,16 +498,14 @@ xfs_attr_remove(
501 * Commit the last in the sequence of transactions. 498 * Commit the last in the sequence of transactions.
502 */ 499 */
503 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); 500 xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
504 error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); 501 error = xfs_trans_commit(args.trans);
505 xfs_iunlock(dp, XFS_ILOCK_EXCL); 502 xfs_iunlock(dp, XFS_ILOCK_EXCL);
506 503
507 return error; 504 return error;
508 505
509out: 506out:
510 if (args.trans) { 507 if (args.trans)
511 xfs_trans_cancel(args.trans, 508 xfs_trans_cancel(args.trans);
512 XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
513 }
514 xfs_iunlock(dp, XFS_ILOCK_EXCL); 509 xfs_iunlock(dp, XFS_ILOCK_EXCL);
515 return error; 510 return error;
516} 511}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f1026e86dabc..63e05b663380 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1112,7 +1112,6 @@ xfs_bmap_add_attrfork(
1112 int committed; /* xaction was committed */ 1112 int committed; /* xaction was committed */
1113 int logflags; /* logging flags */ 1113 int logflags; /* logging flags */
1114 int error; /* error return value */ 1114 int error; /* error return value */
1115 int cancel_flags = 0;
1116 1115
1117 ASSERT(XFS_IFORK_Q(ip) == 0); 1116 ASSERT(XFS_IFORK_Q(ip) == 0);
1118 1117
@@ -1124,17 +1123,15 @@ xfs_bmap_add_attrfork(
1124 tp->t_flags |= XFS_TRANS_RESERVE; 1123 tp->t_flags |= XFS_TRANS_RESERVE;
1125 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); 1124 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
1126 if (error) { 1125 if (error) {
1127 xfs_trans_cancel(tp, 0); 1126 xfs_trans_cancel(tp);
1128 return error; 1127 return error;
1129 } 1128 }
1130 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1131 xfs_ilock(ip, XFS_ILOCK_EXCL); 1129 xfs_ilock(ip, XFS_ILOCK_EXCL);
1132 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? 1130 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1133 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : 1131 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1134 XFS_QMOPT_RES_REGBLKS); 1132 XFS_QMOPT_RES_REGBLKS);
1135 if (error) 1133 if (error)
1136 goto trans_cancel; 1134 goto trans_cancel;
1137 cancel_flags |= XFS_TRANS_ABORT;
1138 if (XFS_IFORK_Q(ip)) 1135 if (XFS_IFORK_Q(ip))
1139 goto trans_cancel; 1136 goto trans_cancel;
1140 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { 1137 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
@@ -1218,14 +1215,14 @@ xfs_bmap_add_attrfork(
1218 error = xfs_bmap_finish(&tp, &flist, &committed); 1215 error = xfs_bmap_finish(&tp, &flist, &committed);
1219 if (error) 1216 if (error)
1220 goto bmap_cancel; 1217 goto bmap_cancel;
1221 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1218 error = xfs_trans_commit(tp);
1222 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1219 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1223 return error; 1220 return error;
1224 1221
1225bmap_cancel: 1222bmap_cancel:
1226 xfs_bmap_cancel(&flist); 1223 xfs_bmap_cancel(&flist);
1227trans_cancel: 1224trans_cancel:
1228 xfs_trans_cancel(tp, cancel_flags); 1225 xfs_trans_cancel(tp);
1229 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1226 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1230 return error; 1227 return error;
1231} 1228}
@@ -3521,7 +3518,8 @@ xfs_bmap_longest_free_extent(
3521 } 3518 }
3522 } 3519 }
3523 3520
3524 longest = xfs_alloc_longest_free_extent(mp, pag); 3521 longest = xfs_alloc_longest_free_extent(mp, pag,
3522 xfs_alloc_min_freelist(mp, pag));
3525 if (*blen < longest) 3523 if (*blen < longest)
3526 *blen = longest; 3524 *blen = longest;
3527 3525
@@ -4424,7 +4422,15 @@ xfs_bmapi_convert_unwritten(
4424 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, 4422 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4425 &bma->cur, mval, bma->firstblock, bma->flist, 4423 &bma->cur, mval, bma->firstblock, bma->flist,
4426 &tmp_logflags); 4424 &tmp_logflags);
4427 bma->logflags |= tmp_logflags; 4425 /*
4426 * Log the inode core unconditionally in the unwritten extent conversion
4427 * path because the conversion might not have done so (e.g., if the
4428 * extent count hasn't changed). We need to make sure the inode is dirty
4429 * in the transaction for the sake of fsync(), even if nothing has
4430 * changed, because fsync() will not force the log for this transaction
4431 * unless it sees the inode pinned.
4432 */
4433 bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4428 if (error) 4434 if (error)
4429 return error; 4435 return error;
4430 4436
@@ -5918,7 +5924,7 @@ xfs_bmap_split_extent(
5918 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 5924 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
5919 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); 5925 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
5920 if (error) { 5926 if (error) {
5921 xfs_trans_cancel(tp, 0); 5927 xfs_trans_cancel(tp);
5922 return error; 5928 return error;
5923 } 5929 }
5924 5930
@@ -5936,10 +5942,9 @@ xfs_bmap_split_extent(
5936 if (error) 5942 if (error)
5937 goto out; 5943 goto out;
5938 5944
5939 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 5945 return xfs_trans_commit(tp);
5940
5941 5946
5942out: 5947out:
5943 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 5948 xfs_trans_cancel(tp);
5944 return error; 5949 return error;
5945} 5950}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 4daaa662337b..a0ae572051de 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -170,7 +170,7 @@ typedef struct xfs_sb {
170 __uint32_t sb_features_log_incompat; 170 __uint32_t sb_features_log_incompat;
171 171
172 __uint32_t sb_crc; /* superblock crc */ 172 __uint32_t sb_crc; /* superblock crc */
173 __uint32_t sb_pad; 173 xfs_extlen_t sb_spino_align; /* sparse inode chunk alignment */
174 174
175 xfs_ino_t sb_pquotino; /* project quota inode */ 175 xfs_ino_t sb_pquotino; /* project quota inode */
176 xfs_lsn_t sb_lsn; /* last write sequence */ 176 xfs_lsn_t sb_lsn; /* last write sequence */
@@ -256,7 +256,7 @@ typedef struct xfs_dsb {
256 __be32 sb_features_log_incompat; 256 __be32 sb_features_log_incompat;
257 257
258 __le32 sb_crc; /* superblock crc */ 258 __le32 sb_crc; /* superblock crc */
259 __be32 sb_pad; 259 __be32 sb_spino_align; /* sparse inode chunk alignment */
260 260
261 __be64 sb_pquotino; /* project quota inode */ 261 __be64 sb_pquotino; /* project quota inode */
262 __be64 sb_lsn; /* last write sequence */ 262 __be64 sb_lsn; /* last write sequence */
@@ -457,8 +457,10 @@ xfs_sb_has_ro_compat_feature(
457} 457}
458 458
459#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */ 459#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
460#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
460#define XFS_SB_FEAT_INCOMPAT_ALL \ 461#define XFS_SB_FEAT_INCOMPAT_ALL \
461 (XFS_SB_FEAT_INCOMPAT_FTYPE) 462 (XFS_SB_FEAT_INCOMPAT_FTYPE| \
463 XFS_SB_FEAT_INCOMPAT_SPINODES)
462 464
463#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL 465#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
464static inline bool 466static inline bool
@@ -506,6 +508,12 @@ static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp)
506 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); 508 (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT);
507} 509}
508 510
511static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
512{
513 return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
514 xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
515}
516
509/* 517/*
510 * end of superblock version macros 518 * end of superblock version macros
511 */ 519 */
@@ -758,19 +766,6 @@ typedef struct xfs_agfl {
758 766
759#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) 767#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
760 768
761
762#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
763#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
764 (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp)))
765#define XFS_MIN_FREELIST(a,mp) \
766 (XFS_MIN_FREELIST_RAW( \
767 be32_to_cpu((a)->agf_levels[XFS_BTNUM_BNOi]), \
768 be32_to_cpu((a)->agf_levels[XFS_BTNUM_CNTi]), mp))
769#define XFS_MIN_FREELIST_PAG(pag,mp) \
770 (XFS_MIN_FREELIST_RAW( \
771 (unsigned int)(pag)->pagf_levels[XFS_BTNUM_BNOi], \
772 (unsigned int)(pag)->pagf_levels[XFS_BTNUM_CNTi], mp))
773
774#define XFS_AGB_TO_FSB(mp,agno,agbno) \ 769#define XFS_AGB_TO_FSB(mp,agno,agbno) \
775 (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno)) 770 (((xfs_fsblock_t)(agno) << (mp)->m_sb.sb_agblklog) | (agbno))
776#define XFS_FSB_TO_AGNO(mp,fsbno) \ 771#define XFS_FSB_TO_AGNO(mp,fsbno) \
@@ -1216,26 +1211,54 @@ typedef __uint64_t xfs_inofree_t;
1216#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) 1211#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1)
1217#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) 1212#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i))
1218 1213
1214#define XFS_INOBT_HOLEMASK_FULL 0 /* holemask for full chunk */
1215#define XFS_INOBT_HOLEMASK_BITS (NBBY * sizeof(__uint16_t))
1216#define XFS_INODES_PER_HOLEMASK_BIT \
1217 (XFS_INODES_PER_CHUNK / (NBBY * sizeof(__uint16_t)))
1218
1219static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) 1219static inline xfs_inofree_t xfs_inobt_maskn(int i, int n)
1220{ 1220{
1221 return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; 1221 return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i;
1222} 1222}
1223 1223
1224/* 1224/*
1225 * Data record structure 1225 * The on-disk inode record structure has two formats. The original "full"
1226 * format uses a 4-byte freecount. The "sparse" format uses a 1-byte freecount
1227 * and replaces the 3 high-order freecount bytes wth the holemask and inode
1228 * count.
1229 *
1230 * The holemask of the sparse record format allows an inode chunk to have holes
1231 * that refer to blocks not owned by the inode record. This facilitates inode
1232 * allocation in the event of severe free space fragmentation.
1226 */ 1233 */
1227typedef struct xfs_inobt_rec { 1234typedef struct xfs_inobt_rec {
1228 __be32 ir_startino; /* starting inode number */ 1235 __be32 ir_startino; /* starting inode number */
1229 __be32 ir_freecount; /* count of free inodes (set bits) */ 1236 union {
1237 struct {
1238 __be32 ir_freecount; /* count of free inodes */
1239 } f;
1240 struct {
1241 __be16 ir_holemask;/* hole mask for sparse chunks */
1242 __u8 ir_count; /* total inode count */
1243 __u8 ir_freecount; /* count of free inodes */
1244 } sp;
1245 } ir_u;
1230 __be64 ir_free; /* free inode mask */ 1246 __be64 ir_free; /* free inode mask */
1231} xfs_inobt_rec_t; 1247} xfs_inobt_rec_t;
1232 1248
1233typedef struct xfs_inobt_rec_incore { 1249typedef struct xfs_inobt_rec_incore {
1234 xfs_agino_t ir_startino; /* starting inode number */ 1250 xfs_agino_t ir_startino; /* starting inode number */
1235 __int32_t ir_freecount; /* count of free inodes (set bits) */ 1251 __uint16_t ir_holemask; /* hole mask for sparse chunks */
1252 __uint8_t ir_count; /* total inode count */
1253 __uint8_t ir_freecount; /* count of free inodes (set bits) */
1236 xfs_inofree_t ir_free; /* free inode mask */ 1254 xfs_inofree_t ir_free; /* free inode mask */
1237} xfs_inobt_rec_incore_t; 1255} xfs_inobt_rec_incore_t;
1238 1256
1257static inline bool xfs_inobt_issparse(uint16_t holemask)
1258{
1259 /* non-zero holemask represents a sparse rec. */
1260 return holemask;
1261}
1239 1262
1240/* 1263/*
1241 * Key structure 1264 * Key structure
@@ -1453,8 +1476,8 @@ struct xfs_acl {
1453 sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) 1476 sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
1454 1477
1455/* On-disk XFS extended attribute names */ 1478/* On-disk XFS extended attribute names */
1456#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" 1479#define SGI_ACL_FILE "SGI_ACL_FILE"
1457#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" 1480#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
1458#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) 1481#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
1459#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 1482#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
1460 1483
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 18dc721ca19f..89689c6a43e2 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ 239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
240#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ 240#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */
241#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */ 241#define XFS_FSOP_GEOM_FLAGS_FINOBT 0x20000 /* free inode btree */
242#define XFS_FSOP_GEOM_FLAGS_SPINODES 0x40000 /* sparse inode chunks */
242 243
243/* 244/*
244 * Minimum and maximum sizes need for growth checks. 245 * Minimum and maximum sizes need for growth checks.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 1c9e75521250..66efc702452a 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -65,6 +65,8 @@ xfs_inobt_lookup(
65 int *stat) /* success/failure */ 65 int *stat) /* success/failure */
66{ 66{
67 cur->bc_rec.i.ir_startino = ino; 67 cur->bc_rec.i.ir_startino = ino;
68 cur->bc_rec.i.ir_holemask = 0;
69 cur->bc_rec.i.ir_count = 0;
68 cur->bc_rec.i.ir_freecount = 0; 70 cur->bc_rec.i.ir_freecount = 0;
69 cur->bc_rec.i.ir_free = 0; 71 cur->bc_rec.i.ir_free = 0;
70 return xfs_btree_lookup(cur, dir, stat); 72 return xfs_btree_lookup(cur, dir, stat);
@@ -82,7 +84,14 @@ xfs_inobt_update(
82 union xfs_btree_rec rec; 84 union xfs_btree_rec rec;
83 85
84 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino); 86 rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
85 rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount); 87 if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
88 rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
89 rec.inobt.ir_u.sp.ir_count = irec->ir_count;
90 rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
91 } else {
92 /* ir_holemask/ir_count not supported on-disk */
93 rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
94 }
86 rec.inobt.ir_free = cpu_to_be64(irec->ir_free); 95 rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
87 return xfs_btree_update(cur, &rec); 96 return xfs_btree_update(cur, &rec);
88} 97}
@@ -100,12 +109,27 @@ xfs_inobt_get_rec(
100 int error; 109 int error;
101 110
102 error = xfs_btree_get_rec(cur, &rec, stat); 111 error = xfs_btree_get_rec(cur, &rec, stat);
103 if (!error && *stat == 1) { 112 if (error || *stat == 0)
104 irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino); 113 return error;
105 irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount); 114
106 irec->ir_free = be64_to_cpu(rec->inobt.ir_free); 115 irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
116 if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
117 irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
118 irec->ir_count = rec->inobt.ir_u.sp.ir_count;
119 irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
120 } else {
121 /*
122 * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
123 * values for full inode chunks.
124 */
125 irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
126 irec->ir_count = XFS_INODES_PER_CHUNK;
127 irec->ir_freecount =
128 be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
107 } 129 }
108 return error; 130 irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
131
132 return 0;
109} 133}
110 134
111/* 135/*
@@ -114,10 +138,14 @@ xfs_inobt_get_rec(
114STATIC int 138STATIC int
115xfs_inobt_insert_rec( 139xfs_inobt_insert_rec(
116 struct xfs_btree_cur *cur, 140 struct xfs_btree_cur *cur,
141 __uint16_t holemask,
142 __uint8_t count,
117 __int32_t freecount, 143 __int32_t freecount,
118 xfs_inofree_t free, 144 xfs_inofree_t free,
119 int *stat) 145 int *stat)
120{ 146{
147 cur->bc_rec.i.ir_holemask = holemask;
148 cur->bc_rec.i.ir_count = count;
121 cur->bc_rec.i.ir_freecount = freecount; 149 cur->bc_rec.i.ir_freecount = freecount;
122 cur->bc_rec.i.ir_free = free; 150 cur->bc_rec.i.ir_free = free;
123 return xfs_btree_insert(cur, stat); 151 return xfs_btree_insert(cur, stat);
@@ -154,7 +182,9 @@ xfs_inobt_insert(
154 } 182 }
155 ASSERT(i == 0); 183 ASSERT(i == 0);
156 184
157 error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, 185 error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
186 XFS_INODES_PER_CHUNK,
187 XFS_INODES_PER_CHUNK,
158 XFS_INOBT_ALL_FREE, &i); 188 XFS_INOBT_ALL_FREE, &i);
159 if (error) { 189 if (error) {
160 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 190 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
@@ -220,6 +250,7 @@ xfs_ialloc_inode_init(
220 struct xfs_mount *mp, 250 struct xfs_mount *mp,
221 struct xfs_trans *tp, 251 struct xfs_trans *tp,
222 struct list_head *buffer_list, 252 struct list_head *buffer_list,
253 int icount,
223 xfs_agnumber_t agno, 254 xfs_agnumber_t agno,
224 xfs_agblock_t agbno, 255 xfs_agblock_t agbno,
225 xfs_agblock_t length, 256 xfs_agblock_t length,
@@ -275,7 +306,7 @@ xfs_ialloc_inode_init(
275 * they track in the AIL as if they were physically logged. 306 * they track in the AIL as if they were physically logged.
276 */ 307 */
277 if (tp) 308 if (tp)
278 xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, 309 xfs_icreate_log(tp, agno, agbno, icount,
279 mp->m_sb.sb_inodesize, length, gen); 310 mp->m_sb.sb_inodesize, length, gen);
280 } else 311 } else
281 version = 2; 312 version = 2;
@@ -347,6 +378,214 @@ xfs_ialloc_inode_init(
347} 378}
348 379
349/* 380/*
381 * Align startino and allocmask for a recently allocated sparse chunk such that
382 * they are fit for insertion (or merge) into the on-disk inode btrees.
383 *
384 * Background:
385 *
386 * When enabled, sparse inode support increases the inode alignment from cluster
387 * size to inode chunk size. This means that the minimum range between two
388 * non-adjacent inode records in the inobt is large enough for a full inode
389 * record. This allows for cluster sized, cluster aligned block allocation
390 * without need to worry about whether the resulting inode record overlaps with
391 * another record in the tree. Without this basic rule, we would have to deal
392 * with the consequences of overlap by potentially undoing recent allocations in
393 * the inode allocation codepath.
394 *
395 * Because of this alignment rule (which is enforced on mount), there are two
396 * inobt possibilities for newly allocated sparse chunks. One is that the
397 * aligned inode record for the chunk covers a range of inodes not already
398 * covered in the inobt (i.e., it is safe to insert a new sparse record). The
399 * other is that a record already exists at the aligned startino that considers
400 * the newly allocated range as sparse. In the latter case, record content is
401 * merged in hope that sparse inode chunks fill to full chunks over time.
402 */
403STATIC void
404xfs_align_sparse_ino(
405 struct xfs_mount *mp,
406 xfs_agino_t *startino,
407 uint16_t *allocmask)
408{
409 xfs_agblock_t agbno;
410 xfs_agblock_t mod;
411 int offset;
412
413 agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
414 mod = agbno % mp->m_sb.sb_inoalignmt;
415 if (!mod)
416 return;
417
418 /* calculate the inode offset and align startino */
419 offset = mod << mp->m_sb.sb_inopblog;
420 *startino -= offset;
421
422 /*
423 * Since startino has been aligned down, left shift allocmask such that
424 * it continues to represent the same physical inodes relative to the
425 * new startino.
426 */
427 *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
428}
429
430/*
431 * Determine whether the source inode record can merge into the target. Both
432 * records must be sparse, the inode ranges must match and there must be no
433 * allocation overlap between the records.
434 */
435STATIC bool
436__xfs_inobt_can_merge(
437 struct xfs_inobt_rec_incore *trec, /* tgt record */
438 struct xfs_inobt_rec_incore *srec) /* src record */
439{
440 uint64_t talloc;
441 uint64_t salloc;
442
443 /* records must cover the same inode range */
444 if (trec->ir_startino != srec->ir_startino)
445 return false;
446
447 /* both records must be sparse */
448 if (!xfs_inobt_issparse(trec->ir_holemask) ||
449 !xfs_inobt_issparse(srec->ir_holemask))
450 return false;
451
452 /* both records must track some inodes */
453 if (!trec->ir_count || !srec->ir_count)
454 return false;
455
456 /* can't exceed capacity of a full record */
457 if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
458 return false;
459
460 /* verify there is no allocation overlap */
461 talloc = xfs_inobt_irec_to_allocmask(trec);
462 salloc = xfs_inobt_irec_to_allocmask(srec);
463 if (talloc & salloc)
464 return false;
465
466 return true;
467}
468
469/*
470 * Merge the source inode record into the target. The caller must call
471 * __xfs_inobt_can_merge() to ensure the merge is valid.
472 */
473STATIC void
474__xfs_inobt_rec_merge(
475 struct xfs_inobt_rec_incore *trec, /* target */
476 struct xfs_inobt_rec_incore *srec) /* src */
477{
478 ASSERT(trec->ir_startino == srec->ir_startino);
479
480 /* combine the counts */
481 trec->ir_count += srec->ir_count;
482 trec->ir_freecount += srec->ir_freecount;
483
484 /*
485 * Merge the holemask and free mask. For both fields, 0 bits refer to
486 * allocated inodes. We combine the allocated ranges with bitwise AND.
487 */
488 trec->ir_holemask &= srec->ir_holemask;
489 trec->ir_free &= srec->ir_free;
490}
491
492/*
493 * Insert a new sparse inode chunk into the associated inode btree. The inode
494 * record for the sparse chunk is pre-aligned to a startino that should match
495 * any pre-existing sparse inode record in the tree. This allows sparse chunks
496 * to fill over time.
497 *
498 * This function supports two modes of handling preexisting records depending on
499 * the merge flag. If merge is true, the provided record is merged with the
500 * existing record and updated in place. The merged record is returned in nrec.
501 * If merge is false, an existing record is replaced with the provided record.
502 * If no preexisting record exists, the provided record is always inserted.
503 *
504 * It is considered corruption if a merge is requested and not possible. Given
505 * the sparse inode alignment constraints, this should never happen.
506 */
507STATIC int
508xfs_inobt_insert_sprec(
509 struct xfs_mount *mp,
510 struct xfs_trans *tp,
511 struct xfs_buf *agbp,
512 int btnum,
513 struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
514 bool merge) /* merge or replace */
515{
516 struct xfs_btree_cur *cur;
517 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
518 xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
519 int error;
520 int i;
521 struct xfs_inobt_rec_incore rec;
522
523 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
524
525 /* the new record is pre-aligned so we know where to look */
526 error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
527 if (error)
528 goto error;
529 /* if nothing there, insert a new record and return */
530 if (i == 0) {
531 error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
532 nrec->ir_count, nrec->ir_freecount,
533 nrec->ir_free, &i);
534 if (error)
535 goto error;
536 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
537
538 goto out;
539 }
540
541 /*
542 * A record exists at this startino. Merge or replace the record
543 * depending on what we've been asked to do.
544 */
545 if (merge) {
546 error = xfs_inobt_get_rec(cur, &rec, &i);
547 if (error)
548 goto error;
549 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
550 XFS_WANT_CORRUPTED_GOTO(mp,
551 rec.ir_startino == nrec->ir_startino,
552 error);
553
554 /*
555 * This should never fail. If we have coexisting records that
556 * cannot merge, something is seriously wrong.
557 */
558 XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),
559 error);
560
561 trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,
562 rec.ir_holemask, nrec->ir_startino,
563 nrec->ir_holemask);
564
565 /* merge to nrec to output the updated record */
566 __xfs_inobt_rec_merge(nrec, &rec);
567
568 trace_xfs_irec_merge_post(mp, agno, nrec->ir_startino,
569 nrec->ir_holemask);
570
571 error = xfs_inobt_rec_check_count(mp, nrec);
572 if (error)
573 goto error;
574 }
575
576 error = xfs_inobt_update(cur, nrec);
577 if (error)
578 goto error;
579
580out:
581 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
582 return 0;
583error:
584 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
585 return error;
586}
587
588/*
350 * Allocate new inodes in the allocation group specified by agbp. 589 * Allocate new inodes in the allocation group specified by agbp.
351 * Return 0 for success, else error code. 590 * Return 0 for success, else error code.
352 */ 591 */
@@ -364,11 +603,22 @@ xfs_ialloc_ag_alloc(
364 xfs_agino_t newlen; /* new number of inodes */ 603 xfs_agino_t newlen; /* new number of inodes */
365 int isaligned = 0; /* inode allocation at stripe unit */ 604 int isaligned = 0; /* inode allocation at stripe unit */
366 /* boundary */ 605 /* boundary */
606 uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */
607 struct xfs_inobt_rec_incore rec;
367 struct xfs_perag *pag; 608 struct xfs_perag *pag;
609 int do_sparse = 0;
368 610
369 memset(&args, 0, sizeof(args)); 611 memset(&args, 0, sizeof(args));
370 args.tp = tp; 612 args.tp = tp;
371 args.mp = tp->t_mountp; 613 args.mp = tp->t_mountp;
614 args.fsbno = NULLFSBLOCK;
615
616#ifdef DEBUG
617 /* randomly do sparse inode allocations */
618 if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
619 args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
620 do_sparse = prandom_u32() & 1;
621#endif
372 622
373 /* 623 /*
374 * Locking will ensure that we don't have two callers in here 624 * Locking will ensure that we don't have two callers in here
@@ -390,6 +640,8 @@ xfs_ialloc_ag_alloc(
390 agno = be32_to_cpu(agi->agi_seqno); 640 agno = be32_to_cpu(agi->agi_seqno);
391 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 641 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
392 args.mp->m_ialloc_blks; 642 args.mp->m_ialloc_blks;
643 if (do_sparse)
644 goto sparse_alloc;
393 if (likely(newino != NULLAGINO && 645 if (likely(newino != NULLAGINO &&
394 (args.agbno < be32_to_cpu(agi->agi_length)))) { 646 (args.agbno < be32_to_cpu(agi->agi_length)))) {
395 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 647 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
@@ -428,8 +680,7 @@ xfs_ialloc_ag_alloc(
428 * subsequent requests. 680 * subsequent requests.
429 */ 681 */
430 args.minalignslop = 0; 682 args.minalignslop = 0;
431 } else 683 }
432 args.fsbno = NULLFSBLOCK;
433 684
434 if (unlikely(args.fsbno == NULLFSBLOCK)) { 685 if (unlikely(args.fsbno == NULLFSBLOCK)) {
435 /* 686 /*
@@ -480,6 +731,47 @@ xfs_ialloc_ag_alloc(
480 return error; 731 return error;
481 } 732 }
482 733
734 /*
735 * Finally, try a sparse allocation if the filesystem supports it and
736 * the sparse allocation length is smaller than a full chunk.
737 */
738 if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
739 args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
740 args.fsbno == NULLFSBLOCK) {
741sparse_alloc:
742 args.type = XFS_ALLOCTYPE_NEAR_BNO;
743 args.agbno = be32_to_cpu(agi->agi_root);
744 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
745 args.alignment = args.mp->m_sb.sb_spino_align;
746 args.prod = 1;
747
748 args.minlen = args.mp->m_ialloc_min_blks;
749 args.maxlen = args.minlen;
750
751 /*
752 * The inode record will be aligned to full chunk size. We must
753 * prevent sparse allocation from AG boundaries that result in
754 * invalid inode records, such as records that start at agbno 0
755 * or extend beyond the AG.
756 *
757 * Set min agbno to the first aligned, non-zero agbno and max to
758 * the last aligned agbno that is at least one full chunk from
759 * the end of the AG.
760 */
761 args.min_agbno = args.mp->m_sb.sb_inoalignmt;
762 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
763 args.mp->m_sb.sb_inoalignmt) -
764 args.mp->m_ialloc_blks;
765
766 error = xfs_alloc_vextent(&args);
767 if (error)
768 return error;
769
770 newlen = args.len << args.mp->m_sb.sb_inopblog;
771 ASSERT(newlen <= XFS_INODES_PER_CHUNK);
772 allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
773 }
774
483 if (args.fsbno == NULLFSBLOCK) { 775 if (args.fsbno == NULLFSBLOCK) {
484 *alloc = 0; 776 *alloc = 0;
485 return 0; 777 return 0;
@@ -495,8 +787,8 @@ xfs_ialloc_ag_alloc(
495 * rather than a linear progression to prevent the next generation 787 * rather than a linear progression to prevent the next generation
496 * number from being easily guessable. 788 * number from being easily guessable.
497 */ 789 */
498 error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno, 790 error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, agno,
499 args.len, prandom_u32()); 791 args.agbno, args.len, prandom_u32());
500 792
501 if (error) 793 if (error)
502 return error; 794 return error;
@@ -504,6 +796,73 @@ xfs_ialloc_ag_alloc(
504 * Convert the results. 796 * Convert the results.
505 */ 797 */
506 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); 798 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
799
800 if (xfs_inobt_issparse(~allocmask)) {
801 /*
802 * We've allocated a sparse chunk. Align the startino and mask.
803 */
804 xfs_align_sparse_ino(args.mp, &newino, &allocmask);
805
806 rec.ir_startino = newino;
807 rec.ir_holemask = ~allocmask;
808 rec.ir_count = newlen;
809 rec.ir_freecount = newlen;
810 rec.ir_free = XFS_INOBT_ALL_FREE;
811
812 /*
813 * Insert the sparse record into the inobt and allow for a merge
814 * if necessary. If a merge does occur, rec is updated to the
815 * merged record.
816 */
817 error = xfs_inobt_insert_sprec(args.mp, tp, agbp, XFS_BTNUM_INO,
818 &rec, true);
819 if (error == -EFSCORRUPTED) {
820 xfs_alert(args.mp,
821 "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
822 XFS_AGINO_TO_INO(args.mp, agno,
823 rec.ir_startino),
824 rec.ir_holemask, rec.ir_count);
825 xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
826 }
827 if (error)
828 return error;
829
830 /*
831 * We can't merge the part we've just allocated as for the inobt
832 * due to finobt semantics. The original record may or may not
833 * exist independent of whether physical inodes exist in this
834 * sparse chunk.
835 *
836 * We must update the finobt record based on the inobt record.
837 * rec contains the fully merged and up to date inobt record
838 * from the previous call. Set merge false to replace any
839 * existing record with this one.
840 */
841 if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
842 error = xfs_inobt_insert_sprec(args.mp, tp, agbp,
843 XFS_BTNUM_FINO, &rec,
844 false);
845 if (error)
846 return error;
847 }
848 } else {
849 /* full chunk - insert new records to both btrees */
850 error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
851 XFS_BTNUM_INO);
852 if (error)
853 return error;
854
855 if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
856 error = xfs_inobt_insert(args.mp, tp, agbp, newino,
857 newlen, XFS_BTNUM_FINO);
858 if (error)
859 return error;
860 }
861 }
862
863 /*
864 * Update AGI counts and newino.
865 */
507 be32_add_cpu(&agi->agi_count, newlen); 866 be32_add_cpu(&agi->agi_count, newlen);
508 be32_add_cpu(&agi->agi_freecount, newlen); 867 be32_add_cpu(&agi->agi_freecount, newlen);
509 pag = xfs_perag_get(args.mp, agno); 868 pag = xfs_perag_get(args.mp, agno);
@@ -512,20 +871,6 @@ xfs_ialloc_ag_alloc(
512 agi->agi_newino = cpu_to_be32(newino); 871 agi->agi_newino = cpu_to_be32(newino);
513 872
514 /* 873 /*
515 * Insert records describing the new inode chunk into the btrees.
516 */
517 error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
518 XFS_BTNUM_INO);
519 if (error)
520 return error;
521
522 if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) {
523 error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen,
524 XFS_BTNUM_FINO);
525 if (error)
526 return error;
527 }
528 /*
529 * Log allocation group header fields 874 * Log allocation group header fields
530 */ 875 */
531 xfs_ialloc_log_agi(tp, agbp, 876 xfs_ialloc_log_agi(tp, agbp,
@@ -645,7 +990,7 @@ xfs_ialloc_ag_select(
645 * if we fail allocation due to alignment issues then it is most 990 * if we fail allocation due to alignment issues then it is most
646 * likely a real ENOSPC condition. 991 * likely a real ENOSPC condition.
647 */ 992 */
648 ineed = mp->m_ialloc_blks; 993 ineed = mp->m_ialloc_min_blks;
649 if (flags && ineed > 1) 994 if (flags && ineed > 1)
650 ineed += xfs_ialloc_cluster_alignment(mp); 995 ineed += xfs_ialloc_cluster_alignment(mp);
651 longest = pag->pagf_longest; 996 longest = pag->pagf_longest;
@@ -732,6 +1077,27 @@ xfs_ialloc_get_rec(
732} 1077}
733 1078
734/* 1079/*
1080 * Return the offset of the first free inode in the record. If the inode chunk
1081 * is sparsely allocated, we convert the record holemask to inode granularity
1082 * and mask off the unallocated regions from the inode free mask.
1083 */
1084STATIC int
1085xfs_inobt_first_free_inode(
1086 struct xfs_inobt_rec_incore *rec)
1087{
1088 xfs_inofree_t realfree;
1089
1090 /* if there are no holes, return the first available offset */
1091 if (!xfs_inobt_issparse(rec->ir_holemask))
1092 return xfs_lowbit64(rec->ir_free);
1093
1094 realfree = xfs_inobt_irec_to_allocmask(rec);
1095 realfree &= rec->ir_free;
1096
1097 return xfs_lowbit64(realfree);
1098}
1099
1100/*
735 * Allocate an inode using the inobt-only algorithm. 1101 * Allocate an inode using the inobt-only algorithm.
736 */ 1102 */
737STATIC int 1103STATIC int
@@ -961,7 +1327,7 @@ newino:
961 } 1327 }
962 1328
963alloc_inode: 1329alloc_inode:
964 offset = xfs_lowbit64(rec.ir_free); 1330 offset = xfs_inobt_first_free_inode(&rec);
965 ASSERT(offset >= 0); 1331 ASSERT(offset >= 0);
966 ASSERT(offset < XFS_INODES_PER_CHUNK); 1332 ASSERT(offset < XFS_INODES_PER_CHUNK);
967 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 1333 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1210,7 +1576,7 @@ xfs_dialloc_ag(
1210 if (error) 1576 if (error)
1211 goto error_cur; 1577 goto error_cur;
1212 1578
1213 offset = xfs_lowbit64(rec.ir_free); 1579 offset = xfs_inobt_first_free_inode(&rec);
1214 ASSERT(offset >= 0); 1580 ASSERT(offset >= 0);
1215 ASSERT(offset < XFS_INODES_PER_CHUNK); 1581 ASSERT(offset < XFS_INODES_PER_CHUNK);
1216 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % 1582 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1439,6 +1805,83 @@ out_error:
1439 return error; 1805 return error;
1440} 1806}
1441 1807
1808/*
1809 * Free the blocks of an inode chunk. We must consider that the inode chunk
1810 * might be sparse and only free the regions that are allocated as part of the
1811 * chunk.
1812 */
1813STATIC void
1814xfs_difree_inode_chunk(
1815 struct xfs_mount *mp,
1816 xfs_agnumber_t agno,
1817 struct xfs_inobt_rec_incore *rec,
1818 struct xfs_bmap_free *flist)
1819{
1820 xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp, rec->ir_startino);
1821 int startidx, endidx;
1822 int nextbit;
1823 xfs_agblock_t agbno;
1824 int contigblk;
1825 DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
1826
1827 if (!xfs_inobt_issparse(rec->ir_holemask)) {
1828 /* not sparse, calculate extent info directly */
1829 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno,
1830 XFS_AGINO_TO_AGBNO(mp, rec->ir_startino)),
1831 mp->m_ialloc_blks, flist, mp);
1832 return;
1833 }
1834
1835 /* holemask is only 16-bits (fits in an unsigned long) */
1836 ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
1837 holemask[0] = rec->ir_holemask;
1838
1839 /*
1840 * Find contiguous ranges of zeroes (i.e., allocated regions) in the
1841 * holemask and convert the start/end index of each range to an extent.
1842 * We start with the start and end index both pointing at the first 0 in
1843 * the mask.
1844 */
1845 startidx = endidx = find_first_zero_bit(holemask,
1846 XFS_INOBT_HOLEMASK_BITS);
1847 nextbit = startidx + 1;
1848 while (startidx < XFS_INOBT_HOLEMASK_BITS) {
1849 nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
1850 nextbit);
1851 /*
1852 * If the next zero bit is contiguous, update the end index of
1853 * the current range and continue.
1854 */
1855 if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
1856 nextbit == endidx + 1) {
1857 endidx = nextbit;
1858 goto next;
1859 }
1860
1861 /*
1862 * nextbit is not contiguous with the current end index. Convert
1863 * the current start/end to an extent and add it to the free
1864 * list.
1865 */
1866 agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
1867 mp->m_sb.sb_inopblock;
1868 contigblk = ((endidx - startidx + 1) *
1869 XFS_INODES_PER_HOLEMASK_BIT) /
1870 mp->m_sb.sb_inopblock;
1871
1872 ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
1873 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
1874 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
1875 flist, mp);
1876
1877 /* reset range to current bit and carry on... */
1878 startidx = endidx = nextbit;
1879
1880next:
1881 nextbit++;
1882 }
1883}
1884
1442STATIC int 1885STATIC int
1443xfs_difree_inobt( 1886xfs_difree_inobt(
1444 struct xfs_mount *mp, 1887 struct xfs_mount *mp,
@@ -1446,8 +1889,7 @@ xfs_difree_inobt(
1446 struct xfs_buf *agbp, 1889 struct xfs_buf *agbp,
1447 xfs_agino_t agino, 1890 xfs_agino_t agino,
1448 struct xfs_bmap_free *flist, 1891 struct xfs_bmap_free *flist,
1449 int *deleted, 1892 struct xfs_icluster *xic,
1450 xfs_ino_t *first_ino,
1451 struct xfs_inobt_rec_incore *orec) 1893 struct xfs_inobt_rec_incore *orec)
1452{ 1894{
1453 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 1895 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
@@ -1501,20 +1943,23 @@ xfs_difree_inobt(
1501 rec.ir_freecount++; 1943 rec.ir_freecount++;
1502 1944
1503 /* 1945 /*
1504 * When an inode cluster is free, it becomes eligible for removal 1946 * When an inode chunk is free, it becomes eligible for removal. Don't
1947 * remove the chunk if the block size is large enough for multiple inode
1948 * chunks (that might not be free).
1505 */ 1949 */
1506 if (!(mp->m_flags & XFS_MOUNT_IKEEP) && 1950 if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
1507 (rec.ir_freecount == mp->m_ialloc_inos)) { 1951 rec.ir_free == XFS_INOBT_ALL_FREE &&
1508 1952 mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
1509 *deleted = 1; 1953 xic->deleted = 1;
1510 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); 1954 xic->first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
1955 xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
1511 1956
1512 /* 1957 /*
1513 * Remove the inode cluster from the AGI B+Tree, adjust the 1958 * Remove the inode cluster from the AGI B+Tree, adjust the
1514 * AGI and Superblock inode counts, and mark the disk space 1959 * AGI and Superblock inode counts, and mark the disk space
1515 * to be freed when the transaction is committed. 1960 * to be freed when the transaction is committed.
1516 */ 1961 */
1517 ilen = mp->m_ialloc_inos; 1962 ilen = rec.ir_freecount;
1518 be32_add_cpu(&agi->agi_count, -ilen); 1963 be32_add_cpu(&agi->agi_count, -ilen);
1519 be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); 1964 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1520 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 1965 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
@@ -1530,11 +1975,9 @@ xfs_difree_inobt(
1530 goto error0; 1975 goto error0;
1531 } 1976 }
1532 1977
1533 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, 1978 xfs_difree_inode_chunk(mp, agno, &rec, flist);
1534 XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
1535 mp->m_ialloc_blks, flist, mp);
1536 } else { 1979 } else {
1537 *deleted = 0; 1980 xic->deleted = 0;
1538 1981
1539 error = xfs_inobt_update(cur, &rec); 1982 error = xfs_inobt_update(cur, &rec);
1540 if (error) { 1983 if (error) {
@@ -1599,7 +2042,9 @@ xfs_difree_finobt(
1599 */ 2042 */
1600 XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error); 2043 XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
1601 2044
1602 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, 2045 error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
2046 ibtrec->ir_count,
2047 ibtrec->ir_freecount,
1603 ibtrec->ir_free, &i); 2048 ibtrec->ir_free, &i);
1604 if (error) 2049 if (error)
1605 goto error; 2050 goto error;
@@ -1634,8 +2079,13 @@ xfs_difree_finobt(
1634 * free inode. Hence, if all of the inodes are free and we aren't 2079 * free inode. Hence, if all of the inodes are free and we aren't
1635 * keeping inode chunks permanently on disk, remove the record. 2080 * keeping inode chunks permanently on disk, remove the record.
1636 * Otherwise, update the record with the new information. 2081 * Otherwise, update the record with the new information.
2082 *
2083 * Note that we currently can't free chunks when the block size is large
2084 * enough for multiple chunks. Leave the finobt record to remain in sync
2085 * with the inobt.
1637 */ 2086 */
1638 if (rec.ir_freecount == mp->m_ialloc_inos && 2087 if (rec.ir_free == XFS_INOBT_ALL_FREE &&
2088 mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK &&
1639 !(mp->m_flags & XFS_MOUNT_IKEEP)) { 2089 !(mp->m_flags & XFS_MOUNT_IKEEP)) {
1640 error = xfs_btree_delete(cur, &i); 2090 error = xfs_btree_delete(cur, &i);
1641 if (error) 2091 if (error)
@@ -1671,8 +2121,7 @@ xfs_difree(
1671 struct xfs_trans *tp, /* transaction pointer */ 2121 struct xfs_trans *tp, /* transaction pointer */
1672 xfs_ino_t inode, /* inode to be freed */ 2122 xfs_ino_t inode, /* inode to be freed */
1673 struct xfs_bmap_free *flist, /* extents to free */ 2123 struct xfs_bmap_free *flist, /* extents to free */
1674 int *deleted,/* set if inode cluster was deleted */ 2124 struct xfs_icluster *xic) /* cluster info if deleted */
1675 xfs_ino_t *first_ino)/* first inode in deleted cluster */
1676{ 2125{
1677 /* REFERENCED */ 2126 /* REFERENCED */
1678 xfs_agblock_t agbno; /* block number containing inode */ 2127 xfs_agblock_t agbno; /* block number containing inode */
@@ -1723,8 +2172,7 @@ xfs_difree(
1723 /* 2172 /*
1724 * Fix up the inode allocation btree. 2173 * Fix up the inode allocation btree.
1725 */ 2174 */
1726 error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, 2175 error = xfs_difree_inobt(mp, tp, agbp, agino, flist, xic, &rec);
1727 &rec);
1728 if (error) 2176 if (error)
1729 goto error0; 2177 goto error0;
1730 2178
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 100007d56449..6e450df2979b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -28,6 +28,13 @@ struct xfs_btree_cur;
28/* Move inodes in clusters of this size */ 28/* Move inodes in clusters of this size */
29#define XFS_INODE_BIG_CLUSTER_SIZE 8192 29#define XFS_INODE_BIG_CLUSTER_SIZE 8192
30 30
31struct xfs_icluster {
32 bool deleted; /* record is deleted */
33 xfs_ino_t first_ino; /* first inode number */
34 uint64_t alloc; /* inode phys. allocation bitmap for
35 * sparse chunks */
36};
37
31/* Calculate and return the number of filesystem blocks per inode cluster */ 38/* Calculate and return the number of filesystem blocks per inode cluster */
32static inline int 39static inline int
33xfs_icluster_size_fsb( 40xfs_icluster_size_fsb(
@@ -44,8 +51,7 @@ xfs_icluster_size_fsb(
44static inline struct xfs_dinode * 51static inline struct xfs_dinode *
45xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) 52xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
46{ 53{
47 return (struct xfs_dinode *) 54 return xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog);
48 (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));
49} 55}
50 56
51/* 57/*
@@ -90,8 +96,7 @@ xfs_difree(
90 struct xfs_trans *tp, /* transaction pointer */ 96 struct xfs_trans *tp, /* transaction pointer */
91 xfs_ino_t inode, /* inode to be freed */ 97 xfs_ino_t inode, /* inode to be freed */
92 struct xfs_bmap_free *flist, /* extents to free */ 98 struct xfs_bmap_free *flist, /* extents to free */
93 int *deleted, /* set if inode cluster was deleted */ 99 struct xfs_icluster *ifree); /* cluster info if deleted */
94 xfs_ino_t *first_ino); /* first inode in deleted cluster */
95 100
96/* 101/*
97 * Return the location of the inode in imap, for mapping it into a buffer. 102 * Return the location of the inode in imap, for mapping it into a buffer.
@@ -156,7 +161,7 @@ int xfs_inobt_get_rec(struct xfs_btree_cur *cur,
156 * Inode chunk initialisation routine 161 * Inode chunk initialisation routine
157 */ 162 */
158int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, 163int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,
159 struct list_head *buffer_list, 164 struct list_head *buffer_list, int icount,
160 xfs_agnumber_t agno, xfs_agblock_t agbno, 165 xfs_agnumber_t agno, xfs_agblock_t agbno,
161 xfs_agblock_t length, unsigned int gen); 166 xfs_agblock_t length, unsigned int gen);
162 167
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 964c465ca69c..674ad8f760be 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -167,7 +167,16 @@ xfs_inobt_init_rec_from_cur(
167 union xfs_btree_rec *rec) 167 union xfs_btree_rec *rec)
168{ 168{
169 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); 169 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
170 rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); 170 if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) {
171 rec->inobt.ir_u.sp.ir_holemask =
172 cpu_to_be16(cur->bc_rec.i.ir_holemask);
173 rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count;
174 rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount;
175 } else {
176 /* ir_holemask/ir_count not supported on-disk */
177 rec->inobt.ir_u.f.ir_freecount =
178 cpu_to_be32(cur->bc_rec.i.ir_freecount);
179 }
171 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); 180 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
172} 181}
173 182
@@ -418,3 +427,85 @@ xfs_inobt_maxrecs(
418 return blocklen / sizeof(xfs_inobt_rec_t); 427 return blocklen / sizeof(xfs_inobt_rec_t);
419 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); 428 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t));
420} 429}
430
431/*
432 * Convert the inode record holemask to an inode allocation bitmap. The inode
433 * allocation bitmap is inode granularity and specifies whether an inode is
434 * physically allocated on disk (not whether the inode is considered allocated
435 * or free by the fs).
436 *
437 * A bit value of 1 means the inode is allocated, a value of 0 means it is free.
438 */
439uint64_t
440xfs_inobt_irec_to_allocmask(
441 struct xfs_inobt_rec_incore *rec)
442{
443 uint64_t bitmap = 0;
444 uint64_t inodespbit;
445 int nextbit;
446 uint allocbitmap;
447
448 /*
449 * The holemask has 16-bits for a 64 inode record. Therefore each
450 * holemask bit represents multiple inodes. Create a mask of bits to set
451 * in the allocmask for each holemask bit.
452 */
453 inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1;
454
455 /*
456 * Allocated inodes are represented by 0 bits in holemask. Invert the 0
457 * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask
458 * anything beyond the 16 holemask bits since this casts to a larger
459 * type.
460 */
461 allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1);
462
463 /*
464 * allocbitmap is the inverted holemask so every set bit represents
465 * allocated inodes. To expand from 16-bit holemask granularity to
466 * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target
467 * bitmap for every holemask bit.
468 */
469 nextbit = xfs_next_bit(&allocbitmap, 1, 0);
470 while (nextbit != -1) {
471 ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY));
472
473 bitmap |= (inodespbit <<
474 (nextbit * XFS_INODES_PER_HOLEMASK_BIT));
475
476 nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1);
477 }
478
479 return bitmap;
480}
481
482#if defined(DEBUG) || defined(XFS_WARN)
483/*
484 * Verify that an in-core inode record has a valid inode count.
485 */
486int
487xfs_inobt_rec_check_count(
488 struct xfs_mount *mp,
489 struct xfs_inobt_rec_incore *rec)
490{
491 int inocount = 0;
492 int nextbit = 0;
493 uint64_t allocbmap;
494 int wordsz;
495
496 wordsz = sizeof(allocbmap) / sizeof(unsigned int);
497 allocbmap = xfs_inobt_irec_to_allocmask(rec);
498
499 nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit);
500 while (nextbit != -1) {
501 inocount++;
502 nextbit = xfs_next_bit((uint *) &allocbmap, wordsz,
503 nextbit + 1);
504 }
505
506 if (inocount != rec->ir_count)
507 return -EFSCORRUPTED;
508
509 return 0;
510}
511#endif /* DEBUG */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index d7ebea72c2d0..bd88453217ce 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -62,4 +62,14 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *,
62 xfs_btnum_t); 62 xfs_btnum_t);
63extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); 63extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
64 64
65/* ir_holemask to inode allocation bitmap conversion */
66uint64_t xfs_inobt_irec_to_allocmask(struct xfs_inobt_rec_incore *);
67
68#if defined(DEBUG) || defined(XFS_WARN)
69int xfs_inobt_rec_check_count(struct xfs_mount *,
70 struct xfs_inobt_rec_incore *);
71#else
72#define xfs_inobt_rec_check_count(mp, rec) 0
73#endif /* DEBUG */
74
65#endif /* __XFS_IALLOC_BTREE_H__ */ 75#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 002b6b3a1988..6526e7696184 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -46,8 +46,7 @@ xfs_inobp_check(
46 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 46 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
47 47
48 for (i = 0; i < j; i++) { 48 for (i = 0; i < j; i++) {
49 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 49 dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
50 i * mp->m_sb.sb_inodesize);
51 if (!dip->di_next_unlinked) { 50 if (!dip->di_next_unlinked) {
52 xfs_alert(mp, 51 xfs_alert(mp,
53 "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.", 52 "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
@@ -86,8 +85,7 @@ xfs_inode_buf_verify(
86 int di_ok; 85 int di_ok;
87 xfs_dinode_t *dip; 86 xfs_dinode_t *dip;
88 87
89 dip = (struct xfs_dinode *)xfs_buf_offset(bp, 88 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
90 (i << mp->m_sb.sb_inodelog));
91 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 89 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
92 XFS_DINODE_GOOD_VERSION(dip->di_version); 90 XFS_DINODE_GOOD_VERSION(dip->di_version);
93 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 91 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
@@ -186,7 +184,7 @@ xfs_imap_to_bp(
186 } 184 }
187 185
188 *bpp = bp; 186 *bpp = bp;
189 *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset); 187 *dipp = xfs_buf_offset(bp, imap->im_boffset);
190 return 0; 188 return 0;
191} 189}
192 190
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index dc4bfc5d88fc..df9851c46b5c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -174,6 +174,27 @@ xfs_mount_validate_sb(
174 return -EFSCORRUPTED; 174 return -EFSCORRUPTED;
175 } 175 }
176 176
177 /*
178 * Full inode chunks must be aligned to inode chunk size when
179 * sparse inodes are enabled to support the sparse chunk
180 * allocation algorithm and prevent overlapping inode records.
181 */
182 if (xfs_sb_version_hassparseinodes(sbp)) {
183 uint32_t align;
184
185 xfs_alert(mp,
186 "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
187
188 align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
189 >> sbp->sb_blocklog;
190 if (sbp->sb_inoalignmt != align) {
191 xfs_warn(mp,
192"Inode block alignment (%u) must match chunk size (%u) for sparse inodes.",
193 sbp->sb_inoalignmt, align);
194 return -EINVAL;
195 }
196 }
197
177 if (unlikely( 198 if (unlikely(
178 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 199 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
179 xfs_warn(mp, 200 xfs_warn(mp,
@@ -374,7 +395,7 @@ __xfs_sb_from_disk(
374 be32_to_cpu(from->sb_features_log_incompat); 395 be32_to_cpu(from->sb_features_log_incompat);
375 /* crc is only used on disk, not in memory; just init to 0 here. */ 396 /* crc is only used on disk, not in memory; just init to 0 here. */
376 to->sb_crc = 0; 397 to->sb_crc = 0;
377 to->sb_pad = 0; 398 to->sb_spino_align = be32_to_cpu(from->sb_spino_align);
378 to->sb_pquotino = be64_to_cpu(from->sb_pquotino); 399 to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
379 to->sb_lsn = be64_to_cpu(from->sb_lsn); 400 to->sb_lsn = be64_to_cpu(from->sb_lsn);
380 /* Convert on-disk flags to in-memory flags? */ 401 /* Convert on-disk flags to in-memory flags? */
@@ -516,7 +537,7 @@ xfs_sb_to_disk(
516 cpu_to_be32(from->sb_features_incompat); 537 cpu_to_be32(from->sb_features_incompat);
517 to->sb_features_log_incompat = 538 to->sb_features_log_incompat =
518 cpu_to_be32(from->sb_features_log_incompat); 539 cpu_to_be32(from->sb_features_log_incompat);
519 to->sb_pad = 0; 540 to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
520 to->sb_lsn = cpu_to_be64(from->sb_lsn); 541 to->sb_lsn = cpu_to_be64(from->sb_lsn);
521 } 542 }
522} 543}
@@ -689,6 +710,11 @@ xfs_sb_mount_common(
689 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 710 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
690 sbp->sb_inopblock); 711 sbp->sb_inopblock);
691 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 712 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
713
714 if (sbp->sb_spino_align)
715 mp->m_ialloc_min_blks = sbp->sb_spino_align;
716 else
717 mp->m_ialloc_min_blks = mp->m_ialloc_blks;
692} 718}
693 719
694/* 720/*
@@ -792,12 +818,12 @@ xfs_sync_sb(
792 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP); 818 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_CHANGE, KM_SLEEP);
793 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0); 819 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
794 if (error) { 820 if (error) {
795 xfs_trans_cancel(tp, 0); 821 xfs_trans_cancel(tp);
796 return error; 822 return error;
797 } 823 }
798 824
799 xfs_log_sb(tp); 825 xfs_log_sb(tp);
800 if (wait) 826 if (wait)
801 xfs_trans_set_sync(tp); 827 xfs_trans_set_sync(tp);
802 return xfs_trans_commit(tp, 0); 828 return xfs_trans_commit(tp);
803} 829}
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 8dda4b321343..5be529707903 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -182,12 +182,6 @@ int xfs_log_calc_minimum_size(struct xfs_mount *);
182#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer 182#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
183 count in superblock */ 183 count in superblock */
184/* 184/*
185 * Values for call flags parameter.
186 */
187#define XFS_TRANS_RELEASE_LOG_RES 0x4
188#define XFS_TRANS_ABORT 0x8
189
190/*
191 * Field values for xfs_trans_mod_sb. 185 * Field values for xfs_trans_mod_sb.
192 */ 186 */
193#define XFS_TRANS_SB_ICOUNT 0x00000001 187#define XFS_TRANS_SB_ICOUNT 0x00000001
diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
index 2d5bdfce6d8f..797815012c0e 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.h
+++ b/fs/xfs/libxfs/xfs_trans_resv.h
@@ -73,9 +73,9 @@ struct xfs_trans_resv {
73 * 2 trees * (2 blocks/level * max depth - 1) * block size 73 * 2 trees * (2 blocks/level * max depth - 1) * block size
74 */ 74 */
75#define XFS_ALLOCFREE_LOG_RES(mp,nx) \ 75#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
76 ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1))) 76 ((nx) * (2 * XFS_FSB_TO_B((mp), 2 * (mp)->m_ag_maxlevels - 1)))
77#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \ 77#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
78 ((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1))) 78 ((nx) * (2 * (2 * (mp)->m_ag_maxlevels - 1)))
79 79
80/* 80/*
81 * Per-directory log reservation for any directory change. 81 * Per-directory log reservation for any directory change.
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index bf9c4579334d..41e0428d8175 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -67,7 +67,7 @@
67#define XFS_DIOSTRAT_SPACE_RES(mp, v) \ 67#define XFS_DIOSTRAT_SPACE_RES(mp, v) \
68 (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v)) 68 (XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
69#define XFS_GROWFS_SPACE_RES(mp) \ 69#define XFS_GROWFS_SPACE_RES(mp) \
70 (2 * XFS_AG_MAXLEVELS(mp)) 70 (2 * (mp)->m_ag_maxlevels)
71#define XFS_GROWFSRT_SPACE_RES(mp,b) \ 71#define XFS_GROWFSRT_SPACE_RES(mp,b) \
72 ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK)) 72 ((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
73#define XFS_LINK_SPACE_RES(mp,nl) \ 73#define XFS_LINK_SPACE_RES(mp,nl) \
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e5099f268032..3859f5e27a4d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -109,7 +109,7 @@ xfs_setfilesize_trans_alloc(
109 109
110 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); 110 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
111 if (error) { 111 if (error) {
112 xfs_trans_cancel(tp, 0); 112 xfs_trans_cancel(tp);
113 return error; 113 return error;
114 } 114 }
115 115
@@ -145,7 +145,7 @@ xfs_setfilesize(
145 isize = xfs_new_eof(ip, offset + size); 145 isize = xfs_new_eof(ip, offset + size);
146 if (!isize) { 146 if (!isize) {
147 xfs_iunlock(ip, XFS_ILOCK_EXCL); 147 xfs_iunlock(ip, XFS_ILOCK_EXCL);
148 xfs_trans_cancel(tp, 0); 148 xfs_trans_cancel(tp);
149 return 0; 149 return 0;
150 } 150 }
151 151
@@ -155,7 +155,7 @@ xfs_setfilesize(
155 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 155 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
156 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 156 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
157 157
158 return xfs_trans_commit(tp, 0); 158 return xfs_trans_commit(tp);
159} 159}
160 160
161STATIC int 161STATIC int
@@ -1348,7 +1348,7 @@ __xfs_get_blocks(
1348 sector_t iblock, 1348 sector_t iblock,
1349 struct buffer_head *bh_result, 1349 struct buffer_head *bh_result,
1350 int create, 1350 int create,
1351 int direct) 1351 bool direct)
1352{ 1352{
1353 struct xfs_inode *ip = XFS_I(inode); 1353 struct xfs_inode *ip = XFS_I(inode);
1354 struct xfs_mount *mp = ip->i_mount; 1354 struct xfs_mount *mp = ip->i_mount;
@@ -1413,6 +1413,7 @@ __xfs_get_blocks(
1413 if (error) 1413 if (error)
1414 return error; 1414 return error;
1415 new = 1; 1415 new = 1;
1416
1416 } else { 1417 } else {
1417 /* 1418 /*
1418 * Delalloc reservations do not require a transaction, 1419 * Delalloc reservations do not require a transaction,
@@ -1507,49 +1508,29 @@ xfs_get_blocks(
1507 struct buffer_head *bh_result, 1508 struct buffer_head *bh_result,
1508 int create) 1509 int create)
1509{ 1510{
1510 return __xfs_get_blocks(inode, iblock, bh_result, create, 0); 1511 return __xfs_get_blocks(inode, iblock, bh_result, create, false);
1511} 1512}
1512 1513
1513STATIC int 1514int
1514xfs_get_blocks_direct( 1515xfs_get_blocks_direct(
1515 struct inode *inode, 1516 struct inode *inode,
1516 sector_t iblock, 1517 sector_t iblock,
1517 struct buffer_head *bh_result, 1518 struct buffer_head *bh_result,
1518 int create) 1519 int create)
1519{ 1520{
1520 return __xfs_get_blocks(inode, iblock, bh_result, create, 1); 1521 return __xfs_get_blocks(inode, iblock, bh_result, create, true);
1521} 1522}
1522 1523
1523/* 1524static void
1524 * Complete a direct I/O write request. 1525__xfs_end_io_direct_write(
1525 * 1526 struct inode *inode,
1526 * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. 1527 struct xfs_ioend *ioend,
1527 * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
1528 * wholly within the EOF and so there is nothing for us to do. Note that in this
1529 * case the completion can be called in interrupt context, whereas if we have an
1530 * ioend we will always be called in task context (i.e. from a workqueue).
1531 */
1532STATIC void
1533xfs_end_io_direct_write(
1534 struct kiocb *iocb,
1535 loff_t offset, 1528 loff_t offset,
1536 ssize_t size, 1529 ssize_t size)
1537 void *private)
1538{ 1530{
1539 struct inode *inode = file_inode(iocb->ki_filp); 1531 struct xfs_mount *mp = XFS_I(inode)->i_mount;
1540 struct xfs_inode *ip = XFS_I(inode);
1541 struct xfs_mount *mp = ip->i_mount;
1542 struct xfs_ioend *ioend = private;
1543
1544 trace_xfs_gbmap_direct_endio(ip, offset, size,
1545 ioend ? ioend->io_type : 0, NULL);
1546 1532
1547 if (!ioend) { 1533 if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
1548 ASSERT(offset + size <= i_size_read(inode));
1549 return;
1550 }
1551
1552 if (XFS_FORCED_SHUTDOWN(mp))
1553 goto out_end_io; 1534 goto out_end_io;
1554 1535
1555 /* 1536 /*
@@ -1586,10 +1567,10 @@ xfs_end_io_direct_write(
1586 * here can result in EOF moving backwards and Bad Things Happen when 1567 * here can result in EOF moving backwards and Bad Things Happen when
1587 * that occurs. 1568 * that occurs.
1588 */ 1569 */
1589 spin_lock(&ip->i_flags_lock); 1570 spin_lock(&XFS_I(inode)->i_flags_lock);
1590 if (offset + size > i_size_read(inode)) 1571 if (offset + size > i_size_read(inode))
1591 i_size_write(inode, offset + size); 1572 i_size_write(inode, offset + size);
1592 spin_unlock(&ip->i_flags_lock); 1573 spin_unlock(&XFS_I(inode)->i_flags_lock);
1593 1574
1594 /* 1575 /*
1595 * If we are doing an append IO that needs to update the EOF on disk, 1576 * If we are doing an append IO that needs to update the EOF on disk,
@@ -1606,6 +1587,98 @@ out_end_io:
1606 return; 1587 return;
1607} 1588}
1608 1589
1590/*
1591 * Complete a direct I/O write request.
1592 *
1593 * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
1594 * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
1595 * wholly within the EOF and so there is nothing for us to do. Note that in this
1596 * case the completion can be called in interrupt context, whereas if we have an
1597 * ioend we will always be called in task context (i.e. from a workqueue).
1598 */
1599STATIC void
1600xfs_end_io_direct_write(
1601 struct kiocb *iocb,
1602 loff_t offset,
1603 ssize_t size,
1604 void *private)
1605{
1606 struct inode *inode = file_inode(iocb->ki_filp);
1607 struct xfs_ioend *ioend = private;
1608
1609 trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
1610 ioend ? ioend->io_type : 0, NULL);
1611
1612 if (!ioend) {
1613 ASSERT(offset + size <= i_size_read(inode));
1614 return;
1615 }
1616
1617 __xfs_end_io_direct_write(inode, ioend, offset, size);
1618}
1619
1620/*
1621 * For DAX we need a mapping buffer callback for unwritten extent conversion
1622 * when page faults allocate blocks and then zero them. Note that in this
1623 * case the mapping indicated by the ioend may extend beyond EOF. We most
1624 * definitely do not want to extend EOF here, so we trim back the ioend size to
1625 * EOF.
1626 */
1627#ifdef CONFIG_FS_DAX
1628void
1629xfs_end_io_dax_write(
1630 struct buffer_head *bh,
1631 int uptodate)
1632{
1633 struct xfs_ioend *ioend = bh->b_private;
1634 struct inode *inode = ioend->io_inode;
1635 ssize_t size = ioend->io_size;
1636
1637 ASSERT(IS_DAX(ioend->io_inode));
1638
1639 /* if there was an error zeroing, then don't convert it */
1640 if (!uptodate)
1641 ioend->io_error = -EIO;
1642
1643 /*
1644 * Trim update to EOF, so we don't extend EOF during unwritten extent
1645 * conversion of partial EOF blocks.
1646 */
1647 spin_lock(&XFS_I(inode)->i_flags_lock);
1648 if (ioend->io_offset + size > i_size_read(inode))
1649 size = i_size_read(inode) - ioend->io_offset;
1650 spin_unlock(&XFS_I(inode)->i_flags_lock);
1651
1652 __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
1653
1654}
1655#else
1656void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
1657#endif
1658
1659static inline ssize_t
1660xfs_vm_do_dio(
1661 struct inode *inode,
1662 struct kiocb *iocb,
1663 struct iov_iter *iter,
1664 loff_t offset,
1665 void (*endio)(struct kiocb *iocb,
1666 loff_t offset,
1667 ssize_t size,
1668 void *private),
1669 int flags)
1670{
1671 struct block_device *bdev;
1672
1673 if (IS_DAX(inode))
1674 return dax_do_io(iocb, inode, iter, offset,
1675 xfs_get_blocks_direct, endio, 0);
1676
1677 bdev = xfs_find_bdev_for_inode(inode);
1678 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
1679 xfs_get_blocks_direct, endio, NULL, flags);
1680}
1681
1609STATIC ssize_t 1682STATIC ssize_t
1610xfs_vm_direct_IO( 1683xfs_vm_direct_IO(
1611 struct kiocb *iocb, 1684 struct kiocb *iocb,
@@ -1613,16 +1686,11 @@ xfs_vm_direct_IO(
1613 loff_t offset) 1686 loff_t offset)
1614{ 1687{
1615 struct inode *inode = iocb->ki_filp->f_mapping->host; 1688 struct inode *inode = iocb->ki_filp->f_mapping->host;
1616 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1617 1689
1618 if (iov_iter_rw(iter) == WRITE) { 1690 if (iov_iter_rw(iter) == WRITE)
1619 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, 1691 return xfs_vm_do_dio(inode, iocb, iter, offset,
1620 xfs_get_blocks_direct, 1692 xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
1621 xfs_end_io_direct_write, NULL, 1693 return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
1622 DIO_ASYNC_EXTEND);
1623 }
1624 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
1625 xfs_get_blocks_direct, NULL, NULL, 0);
1626} 1694}
1627 1695
1628/* 1696/*
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index ac644e0137a4..86afd1ac7895 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -53,7 +53,12 @@ typedef struct xfs_ioend {
53} xfs_ioend_t; 53} xfs_ioend_t;
54 54
55extern const struct address_space_operations xfs_address_space_operations; 55extern const struct address_space_operations xfs_address_space_operations;
56extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); 56
57int xfs_get_blocks(struct inode *inode, sector_t offset,
58 struct buffer_head *map_bh, int create);
59int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
60 struct buffer_head *map_bh, int create);
61void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
57 62
58extern void xfs_count_page_state(struct page *, int *, int *); 63extern void xfs_count_page_state(struct page *, int *, int *);
59 64
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 3fbf167cfb4c..2bb959ada45b 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -394,7 +394,6 @@ xfs_attr_inactive(
394{ 394{
395 struct xfs_trans *trans; 395 struct xfs_trans *trans;
396 struct xfs_mount *mp; 396 struct xfs_mount *mp;
397 int cancel_flags = 0;
398 int lock_mode = XFS_ILOCK_SHARED; 397 int lock_mode = XFS_ILOCK_SHARED;
399 int error = 0; 398 int error = 0;
400 399
@@ -423,7 +422,6 @@ xfs_attr_inactive(
423 goto out_cancel; 422 goto out_cancel;
424 423
425 lock_mode = XFS_ILOCK_EXCL; 424 lock_mode = XFS_ILOCK_EXCL;
426 cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
427 xfs_ilock(dp, lock_mode); 425 xfs_ilock(dp, lock_mode);
428 426
429 if (!XFS_IFORK_Q(dp)) 427 if (!XFS_IFORK_Q(dp))
@@ -435,8 +433,14 @@ xfs_attr_inactive(
435 */ 433 */
436 xfs_trans_ijoin(trans, dp, 0); 434 xfs_trans_ijoin(trans, dp, 0);
437 435
438 /* invalidate and truncate the attribute fork extents */ 436 /*
439 if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { 437 * Invalidate and truncate the attribute fork extents. Make sure the
438 * fork actually has attributes as otherwise the invalidation has no
439 * blocks to read and returns an error. In this case, just do the fork
440 * removal below.
441 */
442 if (xfs_inode_hasattr(dp) &&
443 dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
440 error = xfs_attr3_root_inactive(&trans, dp); 444 error = xfs_attr3_root_inactive(&trans, dp);
441 if (error) 445 if (error)
442 goto out_cancel; 446 goto out_cancel;
@@ -449,12 +453,12 @@ xfs_attr_inactive(
449 /* Reset the attribute fork - this also destroys the in-core fork */ 453 /* Reset the attribute fork - this also destroys the in-core fork */
450 xfs_attr_fork_remove(dp, trans); 454 xfs_attr_fork_remove(dp, trans);
451 455
452 error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); 456 error = xfs_trans_commit(trans);
453 xfs_iunlock(dp, lock_mode); 457 xfs_iunlock(dp, lock_mode);
454 return error; 458 return error;
455 459
456out_cancel: 460out_cancel:
457 xfs_trans_cancel(trans, cancel_flags); 461 xfs_trans_cancel(trans);
458out_destroy_fork: 462out_destroy_fork:
459 /* kill the in-core attr fork before we drop the inode lock */ 463 /* kill the in-core attr fork before we drop the inode lock */
460 if (dp->i_afp) 464 if (dp->i_afp)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a52bbd3abc7d..0f34886cf726 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -75,28 +75,20 @@ xfs_bmap_finish(
75 xfs_efi_log_item_t *efi; /* extent free intention */ 75 xfs_efi_log_item_t *efi; /* extent free intention */
76 int error; /* error return value */ 76 int error; /* error return value */
77 xfs_bmap_free_item_t *free; /* free extent item */ 77 xfs_bmap_free_item_t *free; /* free extent item */
78 struct xfs_trans_res tres; /* new log reservation */
79 xfs_mount_t *mp; /* filesystem mount structure */ 78 xfs_mount_t *mp; /* filesystem mount structure */
80 xfs_bmap_free_item_t *next; /* next item on free list */ 79 xfs_bmap_free_item_t *next; /* next item on free list */
81 xfs_trans_t *ntp; /* new transaction pointer */
82 80
83 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 81 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
84 if (flist->xbf_count == 0) { 82 if (flist->xbf_count == 0) {
85 *committed = 0; 83 *committed = 0;
86 return 0; 84 return 0;
87 } 85 }
88 ntp = *tp; 86 efi = xfs_trans_get_efi(*tp, flist->xbf_count);
89 efi = xfs_trans_get_efi(ntp, flist->xbf_count);
90 for (free = flist->xbf_first; free; free = free->xbfi_next) 87 for (free = flist->xbf_first; free; free = free->xbfi_next)
91 xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock, 88 xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
92 free->xbfi_blockcount); 89 free->xbfi_blockcount);
93 90
94 tres.tr_logres = ntp->t_log_res; 91 error = xfs_trans_roll(tp, NULL);
95 tres.tr_logcount = ntp->t_log_count;
96 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
97 ntp = xfs_trans_dup(*tp);
98 error = xfs_trans_commit(*tp, 0);
99 *tp = ntp;
100 *committed = 1; 92 *committed = 1;
101 /* 93 /*
102 * We have a new transaction, so we should return committed=1, 94 * We have a new transaction, so we should return committed=1,
@@ -105,19 +97,10 @@ xfs_bmap_finish(
105 if (error) 97 if (error)
106 return error; 98 return error;
107 99
108 /* 100 efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
109 * transaction commit worked ok so we can drop the extra ticket
110 * reference that we gained in xfs_trans_dup()
111 */
112 xfs_log_ticket_put(ntp->t_ticket);
113
114 error = xfs_trans_reserve(ntp, &tres, 0, 0);
115 if (error)
116 return error;
117 efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
118 for (free = flist->xbf_first; free != NULL; free = next) { 101 for (free = flist->xbf_first; free != NULL; free = next) {
119 next = free->xbfi_next; 102 next = free->xbfi_next;
120 if ((error = xfs_free_extent(ntp, free->xbfi_startblock, 103 if ((error = xfs_free_extent(*tp, free->xbfi_startblock,
121 free->xbfi_blockcount))) { 104 free->xbfi_blockcount))) {
122 /* 105 /*
123 * The bmap free list will be cleaned up at a 106 * The bmap free list will be cleaned up at a
@@ -127,7 +110,7 @@ xfs_bmap_finish(
127 * happens, since this transaction may not be 110 * happens, since this transaction may not be
128 * dirty yet. 111 * dirty yet.
129 */ 112 */
130 mp = ntp->t_mountp; 113 mp = (*tp)->t_mountp;
131 if (!XFS_FORCED_SHUTDOWN(mp)) 114 if (!XFS_FORCED_SHUTDOWN(mp))
132 xfs_force_shutdown(mp, 115 xfs_force_shutdown(mp,
133 (error == -EFSCORRUPTED) ? 116 (error == -EFSCORRUPTED) ?
@@ -135,7 +118,7 @@ xfs_bmap_finish(
135 SHUTDOWN_META_IO_ERROR); 118 SHUTDOWN_META_IO_ERROR);
136 return error; 119 return error;
137 } 120 }
138 xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock, 121 xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock,
139 free->xbfi_blockcount); 122 free->xbfi_blockcount);
140 xfs_bmap_del_free(flist, NULL, free); 123 xfs_bmap_del_free(flist, NULL, free);
141 } 124 }
@@ -878,7 +861,7 @@ xfs_free_eofblocks(
878 861
879 if (need_iolock) { 862 if (need_iolock) {
880 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 863 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
881 xfs_trans_cancel(tp, 0); 864 xfs_trans_cancel(tp);
882 return -EAGAIN; 865 return -EAGAIN;
883 } 866 }
884 } 867 }
@@ -886,7 +869,7 @@ xfs_free_eofblocks(
886 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 869 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
887 if (error) { 870 if (error) {
888 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 871 ASSERT(XFS_FORCED_SHUTDOWN(mp));
889 xfs_trans_cancel(tp, 0); 872 xfs_trans_cancel(tp);
890 if (need_iolock) 873 if (need_iolock)
891 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 874 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
892 return error; 875 return error;
@@ -908,12 +891,9 @@ xfs_free_eofblocks(
908 * If we get an error at this point we simply don't 891 * If we get an error at this point we simply don't
909 * bother truncating the file. 892 * bother truncating the file.
910 */ 893 */
911 xfs_trans_cancel(tp, 894 xfs_trans_cancel(tp);
912 (XFS_TRANS_RELEASE_LOG_RES |
913 XFS_TRANS_ABORT));
914 } else { 895 } else {
915 error = xfs_trans_commit(tp, 896 error = xfs_trans_commit(tp);
916 XFS_TRANS_RELEASE_LOG_RES);
917 if (!error) 897 if (!error)
918 xfs_inode_clear_eofblocks_tag(ip); 898 xfs_inode_clear_eofblocks_tag(ip);
919 } 899 }
@@ -1026,7 +1006,7 @@ xfs_alloc_file_space(
1026 * Free the transaction structure. 1006 * Free the transaction structure.
1027 */ 1007 */
1028 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1008 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1029 xfs_trans_cancel(tp, 0); 1009 xfs_trans_cancel(tp);
1030 break; 1010 break;
1031 } 1011 }
1032 xfs_ilock(ip, XFS_ILOCK_EXCL); 1012 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1053,7 +1033,7 @@ xfs_alloc_file_space(
1053 goto error0; 1033 goto error0;
1054 } 1034 }
1055 1035
1056 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1036 error = xfs_trans_commit(tp);
1057 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1037 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1058 if (error) { 1038 if (error) {
1059 break; 1039 break;
@@ -1077,7 +1057,7 @@ error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
1077 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 1057 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
1078 1058
1079error1: /* Just cancel transaction */ 1059error1: /* Just cancel transaction */
1080 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1060 xfs_trans_cancel(tp);
1081 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1061 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1082 return error; 1062 return error;
1083} 1063}
@@ -1133,14 +1113,29 @@ xfs_zero_remaining_bytes(
1133 break; 1113 break;
1134 ASSERT(imap.br_blockcount >= 1); 1114 ASSERT(imap.br_blockcount >= 1);
1135 ASSERT(imap.br_startoff == offset_fsb); 1115 ASSERT(imap.br_startoff == offset_fsb);
1116 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1117
1118 if (imap.br_startblock == HOLESTARTBLOCK ||
1119 imap.br_state == XFS_EXT_UNWRITTEN) {
1120 /* skip the entire extent */
1121 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
1122 imap.br_blockcount) - 1;
1123 continue;
1124 }
1125
1136 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 1126 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
1137 if (lastoffset > endoff) 1127 if (lastoffset > endoff)
1138 lastoffset = endoff; 1128 lastoffset = endoff;
1139 if (imap.br_startblock == HOLESTARTBLOCK) 1129
1140 continue; 1130 /* DAX can just zero the backing device directly */
1141 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 1131 if (IS_DAX(VFS_I(ip))) {
1142 if (imap.br_state == XFS_EXT_UNWRITTEN) 1132 error = dax_zero_page_range(VFS_I(ip), offset,
1133 lastoffset - offset + 1,
1134 xfs_get_blocks_direct);
1135 if (error)
1136 return error;
1143 continue; 1137 continue;
1138 }
1144 1139
1145 error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? 1140 error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
1146 mp->m_rtdev_targp : mp->m_ddev_targp, 1141 mp->m_rtdev_targp : mp->m_ddev_targp,
@@ -1289,7 +1284,7 @@ xfs_free_file_space(
1289 * Free the transaction structure. 1284 * Free the transaction structure.
1290 */ 1285 */
1291 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp)); 1286 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1292 xfs_trans_cancel(tp, 0); 1287 xfs_trans_cancel(tp);
1293 break; 1288 break;
1294 } 1289 }
1295 xfs_ilock(ip, XFS_ILOCK_EXCL); 1290 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -1320,7 +1315,7 @@ xfs_free_file_space(
1320 goto error0; 1315 goto error0;
1321 } 1316 }
1322 1317
1323 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1318 error = xfs_trans_commit(tp);
1324 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1319 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1325 } 1320 }
1326 1321
@@ -1330,7 +1325,7 @@ xfs_free_file_space(
1330 error0: 1325 error0:
1331 xfs_bmap_cancel(&free_list); 1326 xfs_bmap_cancel(&free_list);
1332 error1: 1327 error1:
1333 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1328 xfs_trans_cancel(tp);
1334 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1329 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1335 goto out; 1330 goto out;
1336} 1331}
@@ -1462,7 +1457,7 @@ xfs_shift_file_space(
1462 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 1457 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
1463 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); 1458 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
1464 if (error) { 1459 if (error) {
1465 xfs_trans_cancel(tp, 0); 1460 xfs_trans_cancel(tp);
1466 break; 1461 break;
1467 } 1462 }
1468 1463
@@ -1492,13 +1487,13 @@ xfs_shift_file_space(
1492 if (error) 1487 if (error)
1493 goto out; 1488 goto out;
1494 1489
1495 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1490 error = xfs_trans_commit(tp);
1496 } 1491 }
1497 1492
1498 return error; 1493 return error;
1499 1494
1500out: 1495out:
1501 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1496 xfs_trans_cancel(tp);
1502 return error; 1497 return error;
1503} 1498}
1504 1499
@@ -1718,7 +1713,7 @@ xfs_swap_extents(
1718 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); 1713 tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
1719 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 1714 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
1720 if (error) { 1715 if (error) {
1721 xfs_trans_cancel(tp, 0); 1716 xfs_trans_cancel(tp);
1722 goto out_unlock; 1717 goto out_unlock;
1723 } 1718 }
1724 1719
@@ -1901,7 +1896,7 @@ xfs_swap_extents(
1901 if (mp->m_flags & XFS_MOUNT_WSYNC) 1896 if (mp->m_flags & XFS_MOUNT_WSYNC)
1902 xfs_trans_set_sync(tp); 1897 xfs_trans_set_sync(tp);
1903 1898
1904 error = xfs_trans_commit(tp, 0); 1899 error = xfs_trans_commit(tp);
1905 1900
1906 trace_xfs_swap_extent_after(ip, 0); 1901 trace_xfs_swap_extent_after(ip, 0);
1907 trace_xfs_swap_extent_after(tip, 1); 1902 trace_xfs_swap_extent_after(tip, 1);
@@ -1915,6 +1910,6 @@ out_unlock:
1915 goto out; 1910 goto out;
1916 1911
1917out_trans_cancel: 1912out_trans_cancel:
1918 xfs_trans_cancel(tp, 0); 1913 xfs_trans_cancel(tp);
1919 goto out; 1914 goto out;
1920} 1915}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1790b00bea7a..a4b7d92e946c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1419,9 +1419,9 @@ xfs_buf_submit_wait(
1419 return error; 1419 return error;
1420} 1420}
1421 1421
1422xfs_caddr_t 1422void *
1423xfs_buf_offset( 1423xfs_buf_offset(
1424 xfs_buf_t *bp, 1424 struct xfs_buf *bp,
1425 size_t offset) 1425 size_t offset)
1426{ 1426{
1427 struct page *page; 1427 struct page *page;
@@ -1431,7 +1431,7 @@ xfs_buf_offset(
1431 1431
1432 offset += bp->b_offset; 1432 offset += bp->b_offset;
1433 page = bp->b_pages[offset >> PAGE_SHIFT]; 1433 page = bp->b_pages[offset >> PAGE_SHIFT];
1434 return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); 1434 return page_address(page) + (offset & (PAGE_SIZE-1));
1435} 1435}
1436 1436
1437/* 1437/*
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 75ff5d5a7d2e..331c1ccf8264 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -299,7 +299,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
299 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) 299 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
300 300
301/* Buffer Utility Routines */ 301/* Buffer Utility Routines */
302extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); 302extern void *xfs_buf_offset(struct xfs_buf *, size_t);
303 303
304/* Delayed Write Buffer Routines */ 304/* Delayed Write Buffer Routines */
305extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); 305extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 02c01bbbc789..4143dc75dca4 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -568,8 +568,6 @@ xfs_qm_dqread(
568 struct xfs_buf *bp; 568 struct xfs_buf *bp;
569 struct xfs_trans *tp = NULL; 569 struct xfs_trans *tp = NULL;
570 int error; 570 int error;
571 int cancelflags = 0;
572
573 571
574 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); 572 dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
575 573
@@ -617,7 +615,6 @@ xfs_qm_dqread(
617 XFS_QM_DQALLOC_SPACE_RES(mp), 0); 615 XFS_QM_DQALLOC_SPACE_RES(mp), 0);
618 if (error) 616 if (error)
619 goto error1; 617 goto error1;
620 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
621 } 618 }
622 619
623 /* 620 /*
@@ -632,7 +629,6 @@ xfs_qm_dqread(
632 * allocate (ENOENT). 629 * allocate (ENOENT).
633 */ 630 */
634 trace_xfs_dqread_fail(dqp); 631 trace_xfs_dqread_fail(dqp);
635 cancelflags |= XFS_TRANS_ABORT;
636 goto error1; 632 goto error1;
637 } 633 }
638 634
@@ -670,7 +666,7 @@ xfs_qm_dqread(
670 xfs_trans_brelse(tp, bp); 666 xfs_trans_brelse(tp, bp);
671 667
672 if (tp) { 668 if (tp) {
673 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 669 error = xfs_trans_commit(tp);
674 if (error) 670 if (error)
675 goto error0; 671 goto error0;
676 } 672 }
@@ -680,7 +676,7 @@ xfs_qm_dqread(
680 676
681error1: 677error1:
682 if (tp) 678 if (tp)
683 xfs_trans_cancel(tp, cancelflags); 679 xfs_trans_cancel(tp);
684error0: 680error0:
685 xfs_qm_dqdestroy(dqp); 681 xfs_qm_dqdestroy(dqp);
686 *O_dqpp = NULL; 682 *O_dqpp = NULL;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 338e50bbfd1e..74d0e5966ebc 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -127,7 +127,7 @@ xfs_error_report(
127 struct xfs_mount *mp, 127 struct xfs_mount *mp,
128 const char *filename, 128 const char *filename,
129 int linenum, 129 int linenum,
130 inst_t *ra) 130 void *ra)
131{ 131{
132 if (level <= xfs_error_level) { 132 if (level <= xfs_error_level) {
133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, 133 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
@@ -146,7 +146,7 @@ xfs_corruption_error(
146 void *p, 146 void *p,
147 const char *filename, 147 const char *filename,
148 int linenum, 148 int linenum,
149 inst_t *ra) 149 void *ra)
150{ 150{
151 if (level <= xfs_error_level) 151 if (level <= xfs_error_level)
152 xfs_hex_dump(p, 64); 152 xfs_hex_dump(p, 64);
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c0394ed126fc..4ed3042a0f16 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -21,10 +21,10 @@
21struct xfs_mount; 21struct xfs_mount;
22 22
23extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, 23extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
24 const char *filename, int linenum, inst_t *ra); 24 const char *filename, int linenum, void *ra);
25extern void xfs_corruption_error(const char *tag, int level, 25extern void xfs_corruption_error(const char *tag, int level,
26 struct xfs_mount *mp, void *p, const char *filename, 26 struct xfs_mount *mp, void *p, const char *filename,
27 int linenum, inst_t *ra); 27 int linenum, void *ra);
28extern void xfs_verifier_error(struct xfs_buf *bp); 28extern void xfs_verifier_error(struct xfs_buf *bp);
29 29
30#define XFS_ERROR_REPORT(e, lvl, mp) \ 30#define XFS_ERROR_REPORT(e, lvl, mp) \
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index cb7fe64cdbfa..adc8f8fdd145 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -239,7 +239,7 @@ xfs_efi_init(
239 239
240 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops); 240 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
241 efip->efi_format.efi_nextents = nextents; 241 efip->efi_format.efi_nextents = nextents;
242 efip->efi_format.efi_id = (__psint_t)(void*)efip; 242 efip->efi_format.efi_id = (uintptr_t)(void *)efip;
243 atomic_set(&efip->efi_next_extent, 0); 243 atomic_set(&efip->efi_next_extent, 0);
244 atomic_set(&efip->efi_refcount, 2); 244 atomic_set(&efip->efi_refcount, 2);
245 245
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7c62fca53e2f..874507de3485 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -80,14 +80,15 @@ xfs_rw_ilock_demote(
80} 80}
81 81
82/* 82/*
83 * xfs_iozero 83 * xfs_iozero clears the specified range supplied via the page cache (except in
84 * the DAX case). Writes through the page cache will allocate blocks over holes,
85 * though the callers usually map the holes first and avoid them. If a block is
86 * not completely zeroed, then it will be read from disk before being partially
87 * zeroed.
84 * 88 *
85 * xfs_iozero clears the specified range of buffer supplied, 89 * In the DAX case, we can just directly write to the underlying pages. This
86 * and marks all the affected blocks as valid and modified. If 90 * will not allocate blocks, but will avoid holes and unwritten extents and so
87 * an affected block is not allocated, it will be allocated. If 91 * not do unnecessary work.
88 * an affected block is not completely overwritten, and is not
89 * valid before the operation, it will be read from disk before
90 * being partially zeroed.
91 */ 92 */
92int 93int
93xfs_iozero( 94xfs_iozero(
@@ -97,7 +98,8 @@ xfs_iozero(
97{ 98{
98 struct page *page; 99 struct page *page;
99 struct address_space *mapping; 100 struct address_space *mapping;
100 int status; 101 int status = 0;
102
101 103
102 mapping = VFS_I(ip)->i_mapping; 104 mapping = VFS_I(ip)->i_mapping;
103 do { 105 do {
@@ -109,20 +111,27 @@ xfs_iozero(
109 if (bytes > count) 111 if (bytes > count)
110 bytes = count; 112 bytes = count;
111 113
112 status = pagecache_write_begin(NULL, mapping, pos, bytes, 114 if (IS_DAX(VFS_I(ip))) {
113 AOP_FLAG_UNINTERRUPTIBLE, 115 status = dax_zero_page_range(VFS_I(ip), pos, bytes,
114 &page, &fsdata); 116 xfs_get_blocks_direct);
115 if (status) 117 if (status)
116 break; 118 break;
119 } else {
120 status = pagecache_write_begin(NULL, mapping, pos, bytes,
121 AOP_FLAG_UNINTERRUPTIBLE,
122 &page, &fsdata);
123 if (status)
124 break;
117 125
118 zero_user(page, offset, bytes); 126 zero_user(page, offset, bytes);
119 127
120 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, 128 status = pagecache_write_end(NULL, mapping, pos, bytes,
121 page, fsdata); 129 bytes, page, fsdata);
122 WARN_ON(status <= 0); /* can't return less than zero! */ 130 WARN_ON(status <= 0); /* can't return less than zero! */
131 status = 0;
132 }
123 pos += bytes; 133 pos += bytes;
124 count -= bytes; 134 count -= bytes;
125 status = 0;
126 } while (count); 135 } while (count);
127 136
128 return status; 137 return status;
@@ -139,7 +148,7 @@ xfs_update_prealloc_flags(
139 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); 148 tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
140 error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); 149 error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
141 if (error) { 150 if (error) {
142 xfs_trans_cancel(tp, 0); 151 xfs_trans_cancel(tp);
143 return error; 152 return error;
144 } 153 }
145 154
@@ -161,7 +170,7 @@ xfs_update_prealloc_flags(
161 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 170 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
162 if (flags & XFS_PREALLOC_SYNC) 171 if (flags & XFS_PREALLOC_SYNC)
163 xfs_trans_set_sync(tp); 172 xfs_trans_set_sync(tp);
164 return xfs_trans_commit(tp, 0); 173 return xfs_trans_commit(tp);
165} 174}
166 175
167/* 176/*
@@ -285,7 +294,7 @@ xfs_file_read_iter(
285 if (file->f_mode & FMODE_NOCMTIME) 294 if (file->f_mode & FMODE_NOCMTIME)
286 ioflags |= XFS_IO_INVIS; 295 ioflags |= XFS_IO_INVIS;
287 296
288 if (unlikely(ioflags & XFS_IO_ISDIRECT)) { 297 if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
289 xfs_buftarg_t *target = 298 xfs_buftarg_t *target =
290 XFS_IS_REALTIME_INODE(ip) ? 299 XFS_IS_REALTIME_INODE(ip) ?
291 mp->m_rtdev_targp : mp->m_ddev_targp; 300 mp->m_rtdev_targp : mp->m_ddev_targp;
@@ -379,7 +388,11 @@ xfs_file_splice_read(
379 388
380 trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 389 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
381 390
382 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 391 /* for dax, we need to avoid the page cache */
392 if (IS_DAX(VFS_I(ip)))
393 ret = default_file_splice_read(infilp, ppos, pipe, count, flags);
394 else
395 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
383 if (ret > 0) 396 if (ret > 0)
384 XFS_STATS_ADD(xs_read_bytes, ret); 397 XFS_STATS_ADD(xs_read_bytes, ret);
385 398
@@ -673,7 +686,7 @@ xfs_file_dio_aio_write(
673 mp->m_rtdev_targp : mp->m_ddev_targp; 686 mp->m_rtdev_targp : mp->m_ddev_targp;
674 687
675 /* DIO must be aligned to device logical sector size */ 688 /* DIO must be aligned to device logical sector size */
676 if ((pos | count) & target->bt_logical_sectormask) 689 if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
677 return -EINVAL; 690 return -EINVAL;
678 691
679 /* "unaligned" here means not aligned to a filesystem block */ 692 /* "unaligned" here means not aligned to a filesystem block */
@@ -759,8 +772,11 @@ xfs_file_dio_aio_write(
759out: 772out:
760 xfs_rw_iunlock(ip, iolock); 773 xfs_rw_iunlock(ip, iolock);
761 774
762 /* No fallback to buffered IO on errors for XFS. */ 775 /*
763 ASSERT(ret < 0 || ret == count); 776 * No fallback to buffered IO on errors for XFS. DAX can result in
777 * partial writes, but direct IO will either complete fully or fail.
778 */
779 ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
764 return ret; 780 return ret;
765} 781}
766 782
@@ -843,7 +859,7 @@ xfs_file_write_iter(
843 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 859 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
844 return -EIO; 860 return -EIO;
845 861
846 if (unlikely(iocb->ki_flags & IOCB_DIRECT)) 862 if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
847 ret = xfs_file_dio_aio_write(iocb, from); 863 ret = xfs_file_dio_aio_write(iocb, from);
848 else 864 else
849 ret = xfs_file_buffered_aio_write(iocb, from); 865 ret = xfs_file_buffered_aio_write(iocb, from);
@@ -1064,17 +1080,6 @@ xfs_file_readdir(
1064 return xfs_readdir(ip, ctx, bufsize); 1080 return xfs_readdir(ip, ctx, bufsize);
1065} 1081}
1066 1082
1067STATIC int
1068xfs_file_mmap(
1069 struct file *filp,
1070 struct vm_area_struct *vma)
1071{
1072 vma->vm_ops = &xfs_file_vm_ops;
1073
1074 file_accessed(filp);
1075 return 0;
1076}
1077
1078/* 1083/*
1079 * This type is designed to indicate the type of offset we would like 1084 * This type is designed to indicate the type of offset we would like
1080 * to search from page cache for xfs_seek_hole_data(). 1085 * to search from page cache for xfs_seek_hole_data().
@@ -1455,48 +1460,83 @@ xfs_file_llseek(
1455 * ordering of: 1460 * ordering of:
1456 * 1461 *
1457 * mmap_sem (MM) 1462 * mmap_sem (MM)
1458 * i_mmap_lock (XFS - truncate serialisation) 1463 * sb_start_pagefault(vfs, freeze)
1459 * page_lock (MM) 1464 * i_mmap_lock (XFS - truncate serialisation)
1460 * i_lock (XFS - extent map serialisation) 1465 * page_lock (MM)
1466 * i_lock (XFS - extent map serialisation)
1467 */
1468
1469/*
1470 * mmap()d file has taken write protection fault and is being made writable. We
1471 * can set the page state up correctly for a writable page, which means we can
1472 * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
1473 * mapping.
1461 */ 1474 */
1462STATIC int 1475STATIC int
1463xfs_filemap_fault( 1476xfs_filemap_page_mkwrite(
1464 struct vm_area_struct *vma, 1477 struct vm_area_struct *vma,
1465 struct vm_fault *vmf) 1478 struct vm_fault *vmf)
1466{ 1479{
1467 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); 1480 struct inode *inode = file_inode(vma->vm_file);
1468 int error; 1481 int ret;
1469 1482
1470 trace_xfs_filemap_fault(ip); 1483 trace_xfs_filemap_page_mkwrite(XFS_I(inode));
1471 1484
1472 xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 1485 sb_start_pagefault(inode->i_sb);
1473 error = filemap_fault(vma, vmf); 1486 file_update_time(vma->vm_file);
1474 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1487 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1475 1488
1476 return error; 1489 if (IS_DAX(inode)) {
1490 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
1491 xfs_end_io_dax_write);
1492 } else {
1493 ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
1494 ret = block_page_mkwrite_return(ret);
1495 }
1496
1497 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1498 sb_end_pagefault(inode->i_sb);
1499
1500 return ret;
1477} 1501}
1478 1502
1479/*
1480 * mmap()d file has taken write protection fault and is being made writable. We
1481 * can set the page state up correctly for a writable page, which means we can
1482 * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
1483 * mapping.
1484 */
1485STATIC int 1503STATIC int
1486xfs_filemap_page_mkwrite( 1504xfs_filemap_fault(
1487 struct vm_area_struct *vma, 1505 struct vm_area_struct *vma,
1488 struct vm_fault *vmf) 1506 struct vm_fault *vmf)
1489{ 1507{
1490 struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); 1508 struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file));
1491 int error; 1509 int ret;
1510
1511 trace_xfs_filemap_fault(ip);
1492 1512
1493 trace_xfs_filemap_page_mkwrite(ip); 1513 /* DAX can shortcut the normal fault path on write faults! */
1514 if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip)))
1515 return xfs_filemap_page_mkwrite(vma, vmf);
1494 1516
1495 xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 1517 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1496 error = block_page_mkwrite(vma, vmf, xfs_get_blocks); 1518 ret = filemap_fault(vma, vmf);
1497 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1519 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1498 1520
1499 return error; 1521 return ret;
1522}
1523
1524static const struct vm_operations_struct xfs_file_vm_ops = {
1525 .fault = xfs_filemap_fault,
1526 .map_pages = filemap_map_pages,
1527 .page_mkwrite = xfs_filemap_page_mkwrite,
1528};
1529
1530STATIC int
1531xfs_file_mmap(
1532 struct file *filp,
1533 struct vm_area_struct *vma)
1534{
1535 file_accessed(filp);
1536 vma->vm_ops = &xfs_file_vm_ops;
1537 if (IS_DAX(file_inode(filp)))
1538 vma->vm_flags |= VM_MIXEDMAP;
1539 return 0;
1500} 1540}
1501 1541
1502const struct file_operations xfs_file_operations = { 1542const struct file_operations xfs_file_operations = {
@@ -1527,9 +1567,3 @@ const struct file_operations xfs_dir_file_operations = {
1527#endif 1567#endif
1528 .fsync = xfs_dir_fsync, 1568 .fsync = xfs_dir_fsync,
1529}; 1569};
1530
1531static const struct vm_operations_struct xfs_file_vm_ops = {
1532 .fault = xfs_filemap_fault,
1533 .map_pages = filemap_map_pages,
1534 .page_mkwrite = xfs_filemap_page_mkwrite,
1535};
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index da82f1cb4b9b..c4c130f9bfb6 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -196,7 +196,8 @@ xfs_filestream_pick_ag(
196 goto next_ag; 196 goto next_ag;
197 } 197 }
198 198
199 longest = xfs_alloc_longest_free_extent(mp, pag); 199 longest = xfs_alloc_longest_free_extent(mp, pag,
200 xfs_alloc_min_freelist(mp, pag));
200 if (((minlen && longest >= minlen) || 201 if (((minlen && longest >= minlen) ||
201 (!minlen && pag->pagf_freeblks >= minfree)) && 202 (!minlen && pag->pagf_freeblks >= minfree)) &&
202 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || 203 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cb7e8a29dfb6..9b3438a7680f 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -101,7 +101,9 @@ xfs_fs_geometry(
101 (xfs_sb_version_hasftype(&mp->m_sb) ? 101 (xfs_sb_version_hasftype(&mp->m_sb) ?
102 XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | 102 XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
103 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 103 (xfs_sb_version_hasfinobt(&mp->m_sb) ?
104 XFS_FSOP_GEOM_FLAGS_FINOBT : 0); 104 XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
105 (xfs_sb_version_hassparseinodes(&mp->m_sb) ?
106 XFS_FSOP_GEOM_FLAGS_SPINODES : 0);
105 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 107 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
106 mp->m_sb.sb_logsectsize : BBSIZE; 108 mp->m_sb.sb_logsectsize : BBSIZE;
107 geo->rtsectsize = mp->m_sb.sb_blocksize; 109 geo->rtsectsize = mp->m_sb.sb_blocksize;
@@ -201,7 +203,7 @@ xfs_growfs_data_private(
201 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, 203 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
202 XFS_GROWFS_SPACE_RES(mp), 0); 204 XFS_GROWFS_SPACE_RES(mp), 0);
203 if (error) { 205 if (error) {
204 xfs_trans_cancel(tp, 0); 206 xfs_trans_cancel(tp);
205 return error; 207 return error;
206 } 208 }
207 209
@@ -489,7 +491,7 @@ xfs_growfs_data_private(
489 if (dpct) 491 if (dpct)
490 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 492 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
491 xfs_trans_set_sync(tp); 493 xfs_trans_set_sync(tp);
492 error = xfs_trans_commit(tp, 0); 494 error = xfs_trans_commit(tp);
493 if (error) 495 if (error)
494 return error; 496 return error;
495 497
@@ -557,7 +559,7 @@ xfs_growfs_data_private(
557 return saved_error ? saved_error : error; 559 return saved_error ? saved_error : error;
558 560
559 error0: 561 error0:
560 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 562 xfs_trans_cancel(tp);
561 return error; 563 return error;
562} 564}
563 565
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 539a85fddbc2..3da9f4da4f3d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -905,7 +905,6 @@ xfs_dir_ialloc(
905 905
906{ 906{
907 xfs_trans_t *tp; 907 xfs_trans_t *tp;
908 xfs_trans_t *ntp;
909 xfs_inode_t *ip; 908 xfs_inode_t *ip;
910 xfs_buf_t *ialloc_context = NULL; 909 xfs_buf_t *ialloc_context = NULL;
911 int code; 910 int code;
@@ -954,8 +953,6 @@ xfs_dir_ialloc(
954 * to succeed the second time. 953 * to succeed the second time.
955 */ 954 */
956 if (ialloc_context) { 955 if (ialloc_context) {
957 struct xfs_trans_res tres;
958
959 /* 956 /*
960 * Normally, xfs_trans_commit releases all the locks. 957 * Normally, xfs_trans_commit releases all the locks.
961 * We call bhold to hang on to the ialloc_context across 958 * We call bhold to hang on to the ialloc_context across
@@ -964,12 +961,6 @@ xfs_dir_ialloc(
964 * allocation group. 961 * allocation group.
965 */ 962 */
966 xfs_trans_bhold(tp, ialloc_context); 963 xfs_trans_bhold(tp, ialloc_context);
967 /*
968 * Save the log reservation so we can use
969 * them in the next transaction.
970 */
971 tres.tr_logres = xfs_trans_get_log_res(tp);
972 tres.tr_logcount = xfs_trans_get_log_count(tp);
973 964
974 /* 965 /*
975 * We want the quota changes to be associated with the next 966 * We want the quota changes to be associated with the next
@@ -985,35 +976,9 @@ xfs_dir_ialloc(
985 tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); 976 tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
986 } 977 }
987 978
988 ntp = xfs_trans_dup(tp); 979 code = xfs_trans_roll(&tp, 0);
989 code = xfs_trans_commit(tp, 0); 980 if (committed != NULL)
990 tp = ntp;
991 if (committed != NULL) {
992 *committed = 1; 981 *committed = 1;
993 }
994 /*
995 * If we get an error during the commit processing,
996 * release the buffer that is still held and return
997 * to the caller.
998 */
999 if (code) {
1000 xfs_buf_relse(ialloc_context);
1001 if (dqinfo) {
1002 tp->t_dqinfo = dqinfo;
1003 xfs_trans_free_dqinfo(tp);
1004 }
1005 *tpp = ntp;
1006 *ipp = NULL;
1007 return code;
1008 }
1009
1010 /*
1011 * transaction commit worked ok so we can drop the extra ticket
1012 * reference that we gained in xfs_trans_dup()
1013 */
1014 xfs_log_ticket_put(tp->t_ticket);
1015 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
1016 code = xfs_trans_reserve(tp, &tres, 0, 0);
1017 982
1018 /* 983 /*
1019 * Re-attach the quota info that we detached from prev trx. 984 * Re-attach the quota info that we detached from prev trx.
@@ -1025,7 +990,7 @@ xfs_dir_ialloc(
1025 990
1026 if (code) { 991 if (code) {
1027 xfs_buf_relse(ialloc_context); 992 xfs_buf_relse(ialloc_context);
1028 *tpp = ntp; 993 *tpp = tp;
1029 *ipp = NULL; 994 *ipp = NULL;
1030 return code; 995 return code;
1031 } 996 }
@@ -1127,7 +1092,6 @@ xfs_create(
1127 xfs_bmap_free_t free_list; 1092 xfs_bmap_free_t free_list;
1128 xfs_fsblock_t first_block; 1093 xfs_fsblock_t first_block;
1129 bool unlock_dp_on_error = false; 1094 bool unlock_dp_on_error = false;
1130 uint cancel_flags;
1131 int committed; 1095 int committed;
1132 prid_t prid; 1096 prid_t prid;
1133 struct xfs_dquot *udqp = NULL; 1097 struct xfs_dquot *udqp = NULL;
@@ -1164,8 +1128,6 @@ xfs_create(
1164 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); 1128 tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1165 } 1129 }
1166 1130
1167 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1168
1169 /* 1131 /*
1170 * Initially assume that the file does not exist and 1132 * Initially assume that the file does not exist and
1171 * reserve the resources for that case. If that is not 1133 * reserve the resources for that case. If that is not
@@ -1183,10 +1145,9 @@ xfs_create(
1183 resblks = 0; 1145 resblks = 0;
1184 error = xfs_trans_reserve(tp, tres, 0, 0); 1146 error = xfs_trans_reserve(tp, tres, 0, 0);
1185 } 1147 }
1186 if (error) { 1148 if (error)
1187 cancel_flags = 0;
1188 goto out_trans_cancel; 1149 goto out_trans_cancel;
1189 } 1150
1190 1151
1191 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1152 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1192 unlock_dp_on_error = true; 1153 unlock_dp_on_error = true;
@@ -1217,7 +1178,7 @@ xfs_create(
1217 if (error) { 1178 if (error) {
1218 if (error == -ENOSPC) 1179 if (error == -ENOSPC)
1219 goto out_trans_cancel; 1180 goto out_trans_cancel;
1220 goto out_trans_abort; 1181 goto out_trans_cancel;
1221 } 1182 }
1222 1183
1223 /* 1184 /*
@@ -1235,7 +1196,7 @@ xfs_create(
1235 resblks - XFS_IALLOC_SPACE_RES(mp) : 0); 1196 resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1236 if (error) { 1197 if (error) {
1237 ASSERT(error != -ENOSPC); 1198 ASSERT(error != -ENOSPC);
1238 goto out_trans_abort; 1199 goto out_trans_cancel;
1239 } 1200 }
1240 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1201 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1241 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1202 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
@@ -1269,7 +1230,7 @@ xfs_create(
1269 if (error) 1230 if (error)
1270 goto out_bmap_cancel; 1231 goto out_bmap_cancel;
1271 1232
1272 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1233 error = xfs_trans_commit(tp);
1273 if (error) 1234 if (error)
1274 goto out_release_inode; 1235 goto out_release_inode;
1275 1236
@@ -1282,10 +1243,8 @@ xfs_create(
1282 1243
1283 out_bmap_cancel: 1244 out_bmap_cancel:
1284 xfs_bmap_cancel(&free_list); 1245 xfs_bmap_cancel(&free_list);
1285 out_trans_abort:
1286 cancel_flags |= XFS_TRANS_ABORT;
1287 out_trans_cancel: 1246 out_trans_cancel:
1288 xfs_trans_cancel(tp, cancel_flags); 1247 xfs_trans_cancel(tp);
1289 out_release_inode: 1248 out_release_inode:
1290 /* 1249 /*
1291 * Wait until after the current transaction is aborted to finish the 1250 * Wait until after the current transaction is aborted to finish the
@@ -1317,7 +1276,6 @@ xfs_create_tmpfile(
1317 struct xfs_inode *ip = NULL; 1276 struct xfs_inode *ip = NULL;
1318 struct xfs_trans *tp = NULL; 1277 struct xfs_trans *tp = NULL;
1319 int error; 1278 int error;
1320 uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1321 prid_t prid; 1279 prid_t prid;
1322 struct xfs_dquot *udqp = NULL; 1280 struct xfs_dquot *udqp = NULL;
1323 struct xfs_dquot *gdqp = NULL; 1281 struct xfs_dquot *gdqp = NULL;
@@ -1350,10 +1308,8 @@ xfs_create_tmpfile(
1350 resblks = 0; 1308 resblks = 0;
1351 error = xfs_trans_reserve(tp, tres, 0, 0); 1309 error = xfs_trans_reserve(tp, tres, 0, 0);
1352 } 1310 }
1353 if (error) { 1311 if (error)
1354 cancel_flags = 0;
1355 goto out_trans_cancel; 1312 goto out_trans_cancel;
1356 }
1357 1313
1358 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, 1314 error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
1359 pdqp, resblks, 1, 0); 1315 pdqp, resblks, 1, 0);
@@ -1365,7 +1321,7 @@ xfs_create_tmpfile(
1365 if (error) { 1321 if (error) {
1366 if (error == -ENOSPC) 1322 if (error == -ENOSPC)
1367 goto out_trans_cancel; 1323 goto out_trans_cancel;
1368 goto out_trans_abort; 1324 goto out_trans_cancel;
1369 } 1325 }
1370 1326
1371 if (mp->m_flags & XFS_MOUNT_WSYNC) 1327 if (mp->m_flags & XFS_MOUNT_WSYNC)
@@ -1381,9 +1337,9 @@ xfs_create_tmpfile(
1381 ip->i_d.di_nlink--; 1337 ip->i_d.di_nlink--;
1382 error = xfs_iunlink(tp, ip); 1338 error = xfs_iunlink(tp, ip);
1383 if (error) 1339 if (error)
1384 goto out_trans_abort; 1340 goto out_trans_cancel;
1385 1341
1386 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1342 error = xfs_trans_commit(tp);
1387 if (error) 1343 if (error)
1388 goto out_release_inode; 1344 goto out_release_inode;
1389 1345
@@ -1394,10 +1350,8 @@ xfs_create_tmpfile(
1394 *ipp = ip; 1350 *ipp = ip;
1395 return 0; 1351 return 0;
1396 1352
1397 out_trans_abort:
1398 cancel_flags |= XFS_TRANS_ABORT;
1399 out_trans_cancel: 1353 out_trans_cancel:
1400 xfs_trans_cancel(tp, cancel_flags); 1354 xfs_trans_cancel(tp);
1401 out_release_inode: 1355 out_release_inode:
1402 /* 1356 /*
1403 * Wait until after the current transaction is aborted to finish the 1357 * Wait until after the current transaction is aborted to finish the
@@ -1427,7 +1381,6 @@ xfs_link(
1427 int error; 1381 int error;
1428 xfs_bmap_free_t free_list; 1382 xfs_bmap_free_t free_list;
1429 xfs_fsblock_t first_block; 1383 xfs_fsblock_t first_block;
1430 int cancel_flags;
1431 int committed; 1384 int committed;
1432 int resblks; 1385 int resblks;
1433 1386
@@ -1447,17 +1400,14 @@ xfs_link(
1447 goto std_return; 1400 goto std_return;
1448 1401
1449 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); 1402 tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
1450 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1451 resblks = XFS_LINK_SPACE_RES(mp, target_name->len); 1403 resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
1452 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0); 1404 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
1453 if (error == -ENOSPC) { 1405 if (error == -ENOSPC) {
1454 resblks = 0; 1406 resblks = 0;
1455 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0); 1407 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
1456 } 1408 }
1457 if (error) { 1409 if (error)
1458 cancel_flags = 0;
1459 goto error_return; 1410 goto error_return;
1460 }
1461 1411
1462 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1412 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1463 1413
@@ -1486,19 +1436,19 @@ xfs_link(
1486 if (sip->i_d.di_nlink == 0) { 1436 if (sip->i_d.di_nlink == 0) {
1487 error = xfs_iunlink_remove(tp, sip); 1437 error = xfs_iunlink_remove(tp, sip);
1488 if (error) 1438 if (error)
1489 goto abort_return; 1439 goto error_return;
1490 } 1440 }
1491 1441
1492 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, 1442 error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
1493 &first_block, &free_list, resblks); 1443 &first_block, &free_list, resblks);
1494 if (error) 1444 if (error)
1495 goto abort_return; 1445 goto error_return;
1496 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 1446 xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1497 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); 1447 xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
1498 1448
1499 error = xfs_bumplink(tp, sip); 1449 error = xfs_bumplink(tp, sip);
1500 if (error) 1450 if (error)
1501 goto abort_return; 1451 goto error_return;
1502 1452
1503 /* 1453 /*
1504 * If this is a synchronous mount, make sure that the 1454 * If this is a synchronous mount, make sure that the
@@ -1512,15 +1462,13 @@ xfs_link(
1512 error = xfs_bmap_finish (&tp, &free_list, &committed); 1462 error = xfs_bmap_finish (&tp, &free_list, &committed);
1513 if (error) { 1463 if (error) {
1514 xfs_bmap_cancel(&free_list); 1464 xfs_bmap_cancel(&free_list);
1515 goto abort_return; 1465 goto error_return;
1516 } 1466 }
1517 1467
1518 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1468 return xfs_trans_commit(tp);
1519 1469
1520 abort_return:
1521 cancel_flags |= XFS_TRANS_ABORT;
1522 error_return: 1470 error_return:
1523 xfs_trans_cancel(tp, cancel_flags); 1471 xfs_trans_cancel(tp);
1524 std_return: 1472 std_return:
1525 return error; 1473 return error;
1526} 1474}
@@ -1555,7 +1503,6 @@ xfs_itruncate_extents(
1555{ 1503{
1556 struct xfs_mount *mp = ip->i_mount; 1504 struct xfs_mount *mp = ip->i_mount;
1557 struct xfs_trans *tp = *tpp; 1505 struct xfs_trans *tp = *tpp;
1558 struct xfs_trans *ntp;
1559 xfs_bmap_free_t free_list; 1506 xfs_bmap_free_t free_list;
1560 xfs_fsblock_t first_block; 1507 xfs_fsblock_t first_block;
1561 xfs_fileoff_t first_unmap_block; 1508 xfs_fileoff_t first_unmap_block;
@@ -1613,29 +1560,7 @@ xfs_itruncate_extents(
1613 if (error) 1560 if (error)
1614 goto out_bmap_cancel; 1561 goto out_bmap_cancel;
1615 1562
1616 if (committed) { 1563 error = xfs_trans_roll(&tp, ip);
1617 /*
1618 * Mark the inode dirty so it will be logged and
1619 * moved forward in the log as part of every commit.
1620 */
1621 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1622 }
1623
1624 ntp = xfs_trans_dup(tp);
1625 error = xfs_trans_commit(tp, 0);
1626 tp = ntp;
1627
1628 xfs_trans_ijoin(tp, ip, 0);
1629
1630 if (error)
1631 goto out;
1632
1633 /*
1634 * Transaction commit worked ok so we can drop the extra ticket
1635 * reference that we gained in xfs_trans_dup()
1636 */
1637 xfs_log_ticket_put(tp->t_ticket);
1638 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
1639 if (error) 1564 if (error)
1640 goto out; 1565 goto out;
1641 } 1566 }
@@ -1756,7 +1681,7 @@ xfs_inactive_truncate(
1756 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 1681 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
1757 if (error) { 1682 if (error) {
1758 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1683 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1759 xfs_trans_cancel(tp, 0); 1684 xfs_trans_cancel(tp);
1760 return error; 1685 return error;
1761 } 1686 }
1762 1687
@@ -1777,7 +1702,7 @@ xfs_inactive_truncate(
1777 1702
1778 ASSERT(ip->i_d.di_nextents == 0); 1703 ASSERT(ip->i_d.di_nextents == 0);
1779 1704
1780 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1705 error = xfs_trans_commit(tp);
1781 if (error) 1706 if (error)
1782 goto error_unlock; 1707 goto error_unlock;
1783 1708
@@ -1785,7 +1710,7 @@ xfs_inactive_truncate(
1785 return 0; 1710 return 0;
1786 1711
1787error_trans_cancel: 1712error_trans_cancel:
1788 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 1713 xfs_trans_cancel(tp);
1789error_unlock: 1714error_unlock:
1790 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1715 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1791 return error; 1716 return error;
@@ -1835,7 +1760,7 @@ xfs_inactive_ifree(
1835 } else { 1760 } else {
1836 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1761 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1837 } 1762 }
1838 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); 1763 xfs_trans_cancel(tp);
1839 return error; 1764 return error;
1840 } 1765 }
1841 1766
@@ -1855,7 +1780,7 @@ xfs_inactive_ifree(
1855 __func__, error); 1780 __func__, error);
1856 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1781 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1857 } 1782 }
1858 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1783 xfs_trans_cancel(tp);
1859 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1784 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1860 return error; 1785 return error;
1861 } 1786 }
@@ -1874,7 +1799,7 @@ xfs_inactive_ifree(
1874 if (error) 1799 if (error)
1875 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 1800 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
1876 __func__, error); 1801 __func__, error);
1877 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1802 error = xfs_trans_commit(tp);
1878 if (error) 1803 if (error)
1879 xfs_notice(mp, "%s: xfs_trans_commit returned error %d", 1804 xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
1880 __func__, error); 1805 __func__, error);
@@ -2235,28 +2160,42 @@ xfs_iunlink_remove(
2235 */ 2160 */
2236STATIC int 2161STATIC int
2237xfs_ifree_cluster( 2162xfs_ifree_cluster(
2238 xfs_inode_t *free_ip, 2163 xfs_inode_t *free_ip,
2239 xfs_trans_t *tp, 2164 xfs_trans_t *tp,
2240 xfs_ino_t inum) 2165 struct xfs_icluster *xic)
2241{ 2166{
2242 xfs_mount_t *mp = free_ip->i_mount; 2167 xfs_mount_t *mp = free_ip->i_mount;
2243 int blks_per_cluster; 2168 int blks_per_cluster;
2244 int inodes_per_cluster; 2169 int inodes_per_cluster;
2245 int nbufs; 2170 int nbufs;
2246 int i, j; 2171 int i, j;
2172 int ioffset;
2247 xfs_daddr_t blkno; 2173 xfs_daddr_t blkno;
2248 xfs_buf_t *bp; 2174 xfs_buf_t *bp;
2249 xfs_inode_t *ip; 2175 xfs_inode_t *ip;
2250 xfs_inode_log_item_t *iip; 2176 xfs_inode_log_item_t *iip;
2251 xfs_log_item_t *lip; 2177 xfs_log_item_t *lip;
2252 struct xfs_perag *pag; 2178 struct xfs_perag *pag;
2179 xfs_ino_t inum;
2253 2180
2181 inum = xic->first_ino;
2254 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 2182 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
2255 blks_per_cluster = xfs_icluster_size_fsb(mp); 2183 blks_per_cluster = xfs_icluster_size_fsb(mp);
2256 inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; 2184 inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
2257 nbufs = mp->m_ialloc_blks / blks_per_cluster; 2185 nbufs = mp->m_ialloc_blks / blks_per_cluster;
2258 2186
2259 for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) { 2187 for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {
2188 /*
2189 * The allocation bitmap tells us which inodes of the chunk were
2190 * physically allocated. Skip the cluster if an inode falls into
2191 * a sparse region.
2192 */
2193 ioffset = inum - xic->first_ino;
2194 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
2195 ASSERT(do_mod(ioffset, inodes_per_cluster) == 0);
2196 continue;
2197 }
2198
2260 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 2199 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
2261 XFS_INO_TO_AGBNO(mp, inum)); 2200 XFS_INO_TO_AGBNO(mp, inum));
2262 2201
@@ -2414,8 +2353,7 @@ xfs_ifree(
2414 xfs_bmap_free_t *flist) 2353 xfs_bmap_free_t *flist)
2415{ 2354{
2416 int error; 2355 int error;
2417 int delete; 2356 struct xfs_icluster xic = { 0 };
2418 xfs_ino_t first_ino;
2419 2357
2420 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 2358 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2421 ASSERT(ip->i_d.di_nlink == 0); 2359 ASSERT(ip->i_d.di_nlink == 0);
@@ -2431,7 +2369,7 @@ xfs_ifree(
2431 if (error) 2369 if (error)
2432 return error; 2370 return error;
2433 2371
2434 error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); 2372 error = xfs_difree(tp, ip->i_ino, flist, &xic);
2435 if (error) 2373 if (error)
2436 return error; 2374 return error;
2437 2375
@@ -2448,8 +2386,8 @@ xfs_ifree(
2448 ip->i_d.di_gen++; 2386 ip->i_d.di_gen++;
2449 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2387 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
2450 2388
2451 if (delete) 2389 if (xic.deleted)
2452 error = xfs_ifree_cluster(ip, tp, first_ino); 2390 error = xfs_ifree_cluster(ip, tp, &xic);
2453 2391
2454 return error; 2392 return error;
2455} 2393}
@@ -2536,7 +2474,6 @@ xfs_remove(
2536 int error = 0; 2474 int error = 0;
2537 xfs_bmap_free_t free_list; 2475 xfs_bmap_free_t free_list;
2538 xfs_fsblock_t first_block; 2476 xfs_fsblock_t first_block;
2539 int cancel_flags;
2540 int committed; 2477 int committed;
2541 uint resblks; 2478 uint resblks;
2542 2479
@@ -2557,7 +2494,6 @@ xfs_remove(
2557 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); 2494 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
2558 else 2495 else
2559 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 2496 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
2560 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2561 2497
2562 /* 2498 /*
2563 * We try to get the real space reservation first, 2499 * We try to get the real space reservation first,
@@ -2576,7 +2512,6 @@ xfs_remove(
2576 } 2512 }
2577 if (error) { 2513 if (error) {
2578 ASSERT(error != -ENOSPC); 2514 ASSERT(error != -ENOSPC);
2579 cancel_flags = 0;
2580 goto out_trans_cancel; 2515 goto out_trans_cancel;
2581 } 2516 }
2582 2517
@@ -2588,7 +2523,6 @@ xfs_remove(
2588 /* 2523 /*
2589 * If we're removing a directory perform some additional validation. 2524 * If we're removing a directory perform some additional validation.
2590 */ 2525 */
2591 cancel_flags |= XFS_TRANS_ABORT;
2592 if (is_dir) { 2526 if (is_dir) {
2593 ASSERT(ip->i_d.di_nlink >= 2); 2527 ASSERT(ip->i_d.di_nlink >= 2);
2594 if (ip->i_d.di_nlink != 2) { 2528 if (ip->i_d.di_nlink != 2) {
@@ -2644,7 +2578,7 @@ xfs_remove(
2644 if (error) 2578 if (error)
2645 goto out_bmap_cancel; 2579 goto out_bmap_cancel;
2646 2580
2647 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2581 error = xfs_trans_commit(tp);
2648 if (error) 2582 if (error)
2649 goto std_return; 2583 goto std_return;
2650 2584
@@ -2656,7 +2590,7 @@ xfs_remove(
2656 out_bmap_cancel: 2590 out_bmap_cancel:
2657 xfs_bmap_cancel(&free_list); 2591 xfs_bmap_cancel(&free_list);
2658 out_trans_cancel: 2592 out_trans_cancel:
2659 xfs_trans_cancel(tp, cancel_flags); 2593 xfs_trans_cancel(tp);
2660 std_return: 2594 std_return:
2661 return error; 2595 return error;
2662} 2596}
@@ -2730,11 +2664,11 @@ xfs_finish_rename(
2730 error = xfs_bmap_finish(&tp, free_list, &committed); 2664 error = xfs_bmap_finish(&tp, free_list, &committed);
2731 if (error) { 2665 if (error) {
2732 xfs_bmap_cancel(free_list); 2666 xfs_bmap_cancel(free_list);
2733 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 2667 xfs_trans_cancel(tp);
2734 return error; 2668 return error;
2735 } 2669 }
2736 2670
2737 return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2671 return xfs_trans_commit(tp);
2738} 2672}
2739 2673
2740/* 2674/*
@@ -2855,7 +2789,7 @@ xfs_cross_rename(
2855 2789
2856out_trans_abort: 2790out_trans_abort:
2857 xfs_bmap_cancel(free_list); 2791 xfs_bmap_cancel(free_list);
2858 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 2792 xfs_trans_cancel(tp);
2859 return error; 2793 return error;
2860} 2794}
2861 2795
@@ -2915,7 +2849,6 @@ xfs_rename(
2915 int num_inodes = __XFS_SORT_INODES; 2849 int num_inodes = __XFS_SORT_INODES;
2916 bool new_parent = (src_dp != target_dp); 2850 bool new_parent = (src_dp != target_dp);
2917 bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode); 2851 bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
2918 int cancel_flags = 0;
2919 int spaceres; 2852 int spaceres;
2920 int error; 2853 int error;
2921 2854
@@ -2951,7 +2884,6 @@ xfs_rename(
2951 } 2884 }
2952 if (error) 2885 if (error)
2953 goto out_trans_cancel; 2886 goto out_trans_cancel;
2954 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2955 2887
2956 /* 2888 /*
2957 * Attach the dquots to the inodes 2889 * Attach the dquots to the inodes
@@ -3022,10 +2954,8 @@ xfs_rename(
3022 error = xfs_dir_createname(tp, target_dp, target_name, 2954 error = xfs_dir_createname(tp, target_dp, target_name,
3023 src_ip->i_ino, &first_block, 2955 src_ip->i_ino, &first_block,
3024 &free_list, spaceres); 2956 &free_list, spaceres);
3025 if (error == -ENOSPC)
3026 goto out_bmap_cancel;
3027 if (error) 2957 if (error)
3028 goto out_trans_abort; 2958 goto out_bmap_cancel;
3029 2959
3030 xfs_trans_ichgtime(tp, target_dp, 2960 xfs_trans_ichgtime(tp, target_dp,
3031 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2961 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3033,7 +2963,7 @@ xfs_rename(
3033 if (new_parent && src_is_directory) { 2963 if (new_parent && src_is_directory) {
3034 error = xfs_bumplink(tp, target_dp); 2964 error = xfs_bumplink(tp, target_dp);
3035 if (error) 2965 if (error)
3036 goto out_trans_abort; 2966 goto out_bmap_cancel;
3037 } 2967 }
3038 } else { /* target_ip != NULL */ 2968 } else { /* target_ip != NULL */
3039 /* 2969 /*
@@ -3065,7 +2995,7 @@ xfs_rename(
3065 src_ip->i_ino, 2995 src_ip->i_ino,
3066 &first_block, &free_list, spaceres); 2996 &first_block, &free_list, spaceres);
3067 if (error) 2997 if (error)
3068 goto out_trans_abort; 2998 goto out_bmap_cancel;
3069 2999
3070 xfs_trans_ichgtime(tp, target_dp, 3000 xfs_trans_ichgtime(tp, target_dp,
3071 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 3001 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -3076,7 +3006,7 @@ xfs_rename(
3076 */ 3006 */
3077 error = xfs_droplink(tp, target_ip); 3007 error = xfs_droplink(tp, target_ip);
3078 if (error) 3008 if (error)
3079 goto out_trans_abort; 3009 goto out_bmap_cancel;
3080 3010
3081 if (src_is_directory) { 3011 if (src_is_directory) {
3082 /* 3012 /*
@@ -3084,7 +3014,7 @@ xfs_rename(
3084 */ 3014 */
3085 error = xfs_droplink(tp, target_ip); 3015 error = xfs_droplink(tp, target_ip);
3086 if (error) 3016 if (error)
3087 goto out_trans_abort; 3017 goto out_bmap_cancel;
3088 } 3018 }
3089 } /* target_ip != NULL */ 3019 } /* target_ip != NULL */
3090 3020
@@ -3101,7 +3031,7 @@ xfs_rename(
3101 &first_block, &free_list, spaceres); 3031 &first_block, &free_list, spaceres);
3102 ASSERT(error != -EEXIST); 3032 ASSERT(error != -EEXIST);
3103 if (error) 3033 if (error)
3104 goto out_trans_abort; 3034 goto out_bmap_cancel;
3105 } 3035 }
3106 3036
3107 /* 3037 /*
@@ -3127,7 +3057,7 @@ xfs_rename(
3127 */ 3057 */
3128 error = xfs_droplink(tp, src_dp); 3058 error = xfs_droplink(tp, src_dp);
3129 if (error) 3059 if (error)
3130 goto out_trans_abort; 3060 goto out_bmap_cancel;
3131 } 3061 }
3132 3062
3133 /* 3063 /*
@@ -3142,7 +3072,7 @@ xfs_rename(
3142 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, 3072 error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
3143 &first_block, &free_list, spaceres); 3073 &first_block, &free_list, spaceres);
3144 if (error) 3074 if (error)
3145 goto out_trans_abort; 3075 goto out_bmap_cancel;
3146 3076
3147 /* 3077 /*
3148 * For whiteouts, we need to bump the link count on the whiteout inode. 3078 * For whiteouts, we need to bump the link count on the whiteout inode.
@@ -3156,10 +3086,10 @@ xfs_rename(
3156 ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0); 3086 ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
3157 error = xfs_bumplink(tp, wip); 3087 error = xfs_bumplink(tp, wip);
3158 if (error) 3088 if (error)
3159 goto out_trans_abort; 3089 goto out_bmap_cancel;
3160 error = xfs_iunlink_remove(tp, wip); 3090 error = xfs_iunlink_remove(tp, wip);
3161 if (error) 3091 if (error)
3162 goto out_trans_abort; 3092 goto out_bmap_cancel;
3163 xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); 3093 xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
3164 3094
3165 /* 3095 /*
@@ -3180,12 +3110,10 @@ xfs_rename(
3180 IRELE(wip); 3110 IRELE(wip);
3181 return error; 3111 return error;
3182 3112
3183out_trans_abort:
3184 cancel_flags |= XFS_TRANS_ABORT;
3185out_bmap_cancel: 3113out_bmap_cancel:
3186 xfs_bmap_cancel(&free_list); 3114 xfs_bmap_cancel(&free_list);
3187out_trans_cancel: 3115out_trans_cancel:
3188 xfs_trans_cancel(tp, cancel_flags); 3116 xfs_trans_cancel(tp);
3189 if (wip) 3117 if (wip)
3190 IRELE(wip); 3118 IRELE(wip);
3191 return error; 3119 return error;
@@ -3464,7 +3392,7 @@ xfs_iflush_int(
3464 ASSERT(ip->i_d.di_version > 1); 3392 ASSERT(ip->i_d.di_version > 1);
3465 3393
3466 /* set *dip = inode's place in the buffer */ 3394 /* set *dip = inode's place in the buffer */
3467 dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); 3395 dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
3468 3396
3469 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC), 3397 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
3470 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 3398 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 87f67c6b654c..ea7d85af5310 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -336,7 +336,7 @@ xfs_set_dmattrs(
336 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); 336 tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
337 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 337 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
338 if (error) { 338 if (error) {
339 xfs_trans_cancel(tp, 0); 339 xfs_trans_cancel(tp);
340 return error; 340 return error;
341 } 341 }
342 xfs_ilock(ip, XFS_ILOCK_EXCL); 342 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -346,7 +346,7 @@ xfs_set_dmattrs(
346 ip->i_d.di_dmstate = state; 346 ip->i_d.di_dmstate = state;
347 347
348 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 348 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
349 error = xfs_trans_commit(tp, 0); 349 error = xfs_trans_commit(tp);
350 350
351 return error; 351 return error;
352} 352}
@@ -1076,7 +1076,7 @@ xfs_ioctl_setattr_get_trans(
1076 return tp; 1076 return tp;
1077 1077
1078out_cancel: 1078out_cancel:
1079 xfs_trans_cancel(tp, 0); 1079 xfs_trans_cancel(tp);
1080 return ERR_PTR(error); 1080 return ERR_PTR(error);
1081} 1081}
1082 1082
@@ -1253,7 +1253,7 @@ xfs_ioctl_setattr(
1253 else 1253 else
1254 ip->i_d.di_extsize = 0; 1254 ip->i_d.di_extsize = 0;
1255 1255
1256 code = xfs_trans_commit(tp, 0); 1256 code = xfs_trans_commit(tp);
1257 1257
1258 /* 1258 /*
1259 * Release any dquot(s) the inode had kept before chown. 1259 * Release any dquot(s) the inode had kept before chown.
@@ -1265,7 +1265,7 @@ xfs_ioctl_setattr(
1265 return code; 1265 return code;
1266 1266
1267error_trans_cancel: 1267error_trans_cancel:
1268 xfs_trans_cancel(tp, 0); 1268 xfs_trans_cancel(tp);
1269error_free_dquots: 1269error_free_dquots:
1270 xfs_qm_dqrele(udqp); 1270 xfs_qm_dqrele(udqp);
1271 xfs_qm_dqrele(pdqp); 1271 xfs_qm_dqrele(pdqp);
@@ -1338,11 +1338,11 @@ xfs_ioc_setxflags(
1338 1338
1339 error = xfs_ioctl_setattr_xflags(tp, ip, &fa); 1339 error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
1340 if (error) { 1340 if (error) {
1341 xfs_trans_cancel(tp, 0); 1341 xfs_trans_cancel(tp);
1342 goto out_drop_write; 1342 goto out_drop_write;
1343 } 1343 }
1344 1344
1345 error = xfs_trans_commit(tp, 0); 1345 error = xfs_trans_commit(tp);
1346out_drop_write: 1346out_drop_write:
1347 mnt_drop_write_file(filp); 1347 mnt_drop_write_file(filp);
1348 return error; 1348 return error;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 38e633bad8c2..1f86033171c8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -183,7 +183,7 @@ xfs_iomap_write_direct(
183 * Check for running out of space, note: need lock to return 183 * Check for running out of space, note: need lock to return
184 */ 184 */
185 if (error) { 185 if (error) {
186 xfs_trans_cancel(tp, 0); 186 xfs_trans_cancel(tp);
187 return error; 187 return error;
188 } 188 }
189 189
@@ -213,7 +213,7 @@ xfs_iomap_write_direct(
213 error = xfs_bmap_finish(&tp, &free_list, &committed); 213 error = xfs_bmap_finish(&tp, &free_list, &committed);
214 if (error) 214 if (error)
215 goto out_bmap_cancel; 215 goto out_bmap_cancel;
216 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 216 error = xfs_trans_commit(tp);
217 if (error) 217 if (error)
218 goto out_unlock; 218 goto out_unlock;
219 219
@@ -236,7 +236,7 @@ out_bmap_cancel:
236 xfs_bmap_cancel(&free_list); 236 xfs_bmap_cancel(&free_list);
237 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 237 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
238out_trans_cancel: 238out_trans_cancel:
239 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 239 xfs_trans_cancel(tp);
240 goto out_unlock; 240 goto out_unlock;
241} 241}
242 242
@@ -690,7 +690,7 @@ xfs_iomap_write_allocate(
690 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 690 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
691 nres, 0); 691 nres, 0);
692 if (error) { 692 if (error) {
693 xfs_trans_cancel(tp, 0); 693 xfs_trans_cancel(tp);
694 return error; 694 return error;
695 } 695 }
696 xfs_ilock(ip, XFS_ILOCK_EXCL); 696 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -760,7 +760,7 @@ xfs_iomap_write_allocate(
760 if (error) 760 if (error)
761 goto trans_cancel; 761 goto trans_cancel;
762 762
763 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 763 error = xfs_trans_commit(tp);
764 if (error) 764 if (error)
765 goto error0; 765 goto error0;
766 766
@@ -791,7 +791,7 @@ xfs_iomap_write_allocate(
791 791
792trans_cancel: 792trans_cancel:
793 xfs_bmap_cancel(&free_list); 793 xfs_bmap_cancel(&free_list);
794 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 794 xfs_trans_cancel(tp);
795error0: 795error0:
796 xfs_iunlock(ip, XFS_ILOCK_EXCL); 796 xfs_iunlock(ip, XFS_ILOCK_EXCL);
797 return error; 797 return error;
@@ -853,7 +853,7 @@ xfs_iomap_write_unwritten(
853 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 853 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
854 resblks, 0); 854 resblks, 0);
855 if (error) { 855 if (error) {
856 xfs_trans_cancel(tp, 0); 856 xfs_trans_cancel(tp);
857 return error; 857 return error;
858 } 858 }
859 859
@@ -890,7 +890,7 @@ xfs_iomap_write_unwritten(
890 if (error) 890 if (error)
891 goto error_on_bmapi_transaction; 891 goto error_on_bmapi_transaction;
892 892
893 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 893 error = xfs_trans_commit(tp);
894 xfs_iunlock(ip, XFS_ILOCK_EXCL); 894 xfs_iunlock(ip, XFS_ILOCK_EXCL);
895 if (error) 895 if (error)
896 return error; 896 return error;
@@ -914,7 +914,7 @@ xfs_iomap_write_unwritten(
914 914
915error_on_bmapi_transaction: 915error_on_bmapi_transaction:
916 xfs_bmap_cancel(&free_list); 916 xfs_bmap_cancel(&free_list);
917 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); 917 xfs_trans_cancel(tp);
918 xfs_iunlock(ip, XFS_ILOCK_EXCL); 918 xfs_iunlock(ip, XFS_ILOCK_EXCL);
919 return error; 919 return error;
920} 920}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 7f51f39f8acc..766b23f86ce9 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -699,7 +699,7 @@ xfs_setattr_nonsize(
699 699
700 if (mp->m_flags & XFS_MOUNT_WSYNC) 700 if (mp->m_flags & XFS_MOUNT_WSYNC)
701 xfs_trans_set_sync(tp); 701 xfs_trans_set_sync(tp);
702 error = xfs_trans_commit(tp, 0); 702 error = xfs_trans_commit(tp);
703 703
704 xfs_iunlock(ip, XFS_ILOCK_EXCL); 704 xfs_iunlock(ip, XFS_ILOCK_EXCL);
705 705
@@ -730,7 +730,7 @@ xfs_setattr_nonsize(
730 return 0; 730 return 0;
731 731
732out_trans_cancel: 732out_trans_cancel:
733 xfs_trans_cancel(tp, 0); 733 xfs_trans_cancel(tp);
734 xfs_iunlock(ip, XFS_ILOCK_EXCL); 734 xfs_iunlock(ip, XFS_ILOCK_EXCL);
735out_dqrele: 735out_dqrele:
736 xfs_qm_dqrele(udqp); 736 xfs_qm_dqrele(udqp);
@@ -752,7 +752,6 @@ xfs_setattr_size(
752 struct xfs_trans *tp; 752 struct xfs_trans *tp;
753 int error; 753 int error;
754 uint lock_flags = 0; 754 uint lock_flags = 0;
755 uint commit_flags = 0;
756 bool did_zeroing = false; 755 bool did_zeroing = false;
757 756
758 trace_xfs_setattr(ip); 757 trace_xfs_setattr(ip);
@@ -848,7 +847,11 @@ xfs_setattr_size(
848 * to hope that the caller sees ENOMEM and retries the truncate 847 * to hope that the caller sees ENOMEM and retries the truncate
849 * operation. 848 * operation.
850 */ 849 */
851 error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); 850 if (IS_DAX(inode))
851 error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
852 else
853 error = block_truncate_page(inode->i_mapping, newsize,
854 xfs_get_blocks);
852 if (error) 855 if (error)
853 return error; 856 return error;
854 truncate_setsize(inode, newsize); 857 truncate_setsize(inode, newsize);
@@ -858,7 +861,6 @@ xfs_setattr_size(
858 if (error) 861 if (error)
859 goto out_trans_cancel; 862 goto out_trans_cancel;
860 863
861 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
862 lock_flags |= XFS_ILOCK_EXCL; 864 lock_flags |= XFS_ILOCK_EXCL;
863 xfs_ilock(ip, XFS_ILOCK_EXCL); 865 xfs_ilock(ip, XFS_ILOCK_EXCL);
864 xfs_trans_ijoin(tp, ip, 0); 866 xfs_trans_ijoin(tp, ip, 0);
@@ -898,7 +900,7 @@ xfs_setattr_size(
898 if (newsize <= oldsize) { 900 if (newsize <= oldsize) {
899 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); 901 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
900 if (error) 902 if (error)
901 goto out_trans_abort; 903 goto out_trans_cancel;
902 904
903 /* 905 /*
904 * Truncated "down", so we're removing references to old data 906 * Truncated "down", so we're removing references to old data
@@ -925,16 +927,14 @@ xfs_setattr_size(
925 if (mp->m_flags & XFS_MOUNT_WSYNC) 927 if (mp->m_flags & XFS_MOUNT_WSYNC)
926 xfs_trans_set_sync(tp); 928 xfs_trans_set_sync(tp);
927 929
928 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 930 error = xfs_trans_commit(tp);
929out_unlock: 931out_unlock:
930 if (lock_flags) 932 if (lock_flags)
931 xfs_iunlock(ip, lock_flags); 933 xfs_iunlock(ip, lock_flags);
932 return error; 934 return error;
933 935
934out_trans_abort:
935 commit_flags |= XFS_TRANS_ABORT;
936out_trans_cancel: 936out_trans_cancel:
937 xfs_trans_cancel(tp, commit_flags); 937 xfs_trans_cancel(tp);
938 goto out_unlock; 938 goto out_unlock;
939} 939}
940 940
@@ -981,7 +981,7 @@ xfs_vn_update_time(
981 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); 981 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
982 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); 982 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
983 if (error) { 983 if (error) {
984 xfs_trans_cancel(tp, 0); 984 xfs_trans_cancel(tp);
985 return error; 985 return error;
986 } 986 }
987 987
@@ -1003,7 +1003,7 @@ xfs_vn_update_time(
1003 } 1003 }
1004 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 1004 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1005 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 1005 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
1006 return xfs_trans_commit(tp, 0); 1006 return xfs_trans_commit(tp);
1007} 1007}
1008 1008
1009#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 1009#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -1188,22 +1188,22 @@ xfs_diflags_to_iflags(
1188 struct inode *inode, 1188 struct inode *inode,
1189 struct xfs_inode *ip) 1189 struct xfs_inode *ip)
1190{ 1190{
1191 if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) 1191 uint16_t flags = ip->i_d.di_flags;
1192
1193 inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
1194 S_NOATIME | S_DAX);
1195
1196 if (flags & XFS_DIFLAG_IMMUTABLE)
1192 inode->i_flags |= S_IMMUTABLE; 1197 inode->i_flags |= S_IMMUTABLE;
1193 else 1198 if (flags & XFS_DIFLAG_APPEND)
1194 inode->i_flags &= ~S_IMMUTABLE;
1195 if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
1196 inode->i_flags |= S_APPEND; 1199 inode->i_flags |= S_APPEND;
1197 else 1200 if (flags & XFS_DIFLAG_SYNC)
1198 inode->i_flags &= ~S_APPEND;
1199 if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
1200 inode->i_flags |= S_SYNC; 1201 inode->i_flags |= S_SYNC;
1201 else 1202 if (flags & XFS_DIFLAG_NOATIME)
1202 inode->i_flags &= ~S_SYNC;
1203 if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
1204 inode->i_flags |= S_NOATIME; 1203 inode->i_flags |= S_NOATIME;
1205 else 1204 /* XXX: Also needs an on-disk per inode flag! */
1206 inode->i_flags &= ~S_NOATIME; 1205 if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
1206 inode->i_flags |= S_DAX;
1207} 1207}
1208 1208
1209/* 1209/*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 80429891dc9b..f41b0c3fddab 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -252,7 +252,7 @@ xfs_bulkstat_grab_ichunk(
252 } 252 }
253 253
254 irec->ir_free |= xfs_inobt_maskn(0, idx); 254 irec->ir_free |= xfs_inobt_maskn(0, idx);
255 *icount = XFS_INODES_PER_CHUNK - irec->ir_freecount; 255 *icount = irec->ir_count - irec->ir_freecount;
256 } 256 }
257 257
258 return 0; 258 return 0;
@@ -415,6 +415,8 @@ xfs_bulkstat(
415 goto del_cursor; 415 goto del_cursor;
416 if (icount) { 416 if (icount) {
417 irbp->ir_startino = r.ir_startino; 417 irbp->ir_startino = r.ir_startino;
418 irbp->ir_holemask = r.ir_holemask;
419 irbp->ir_count = r.ir_count;
418 irbp->ir_freecount = r.ir_freecount; 420 irbp->ir_freecount = r.ir_freecount;
419 irbp->ir_free = r.ir_free; 421 irbp->ir_free = r.ir_free;
420 irbp++; 422 irbp++;
@@ -447,13 +449,15 @@ xfs_bulkstat(
447 * If this chunk has any allocated inodes, save it. 449 * If this chunk has any allocated inodes, save it.
448 * Also start read-ahead now for this chunk. 450 * Also start read-ahead now for this chunk.
449 */ 451 */
450 if (r.ir_freecount < XFS_INODES_PER_CHUNK) { 452 if (r.ir_freecount < r.ir_count) {
451 xfs_bulkstat_ichunk_ra(mp, agno, &r); 453 xfs_bulkstat_ichunk_ra(mp, agno, &r);
452 irbp->ir_startino = r.ir_startino; 454 irbp->ir_startino = r.ir_startino;
455 irbp->ir_holemask = r.ir_holemask;
456 irbp->ir_count = r.ir_count;
453 irbp->ir_freecount = r.ir_freecount; 457 irbp->ir_freecount = r.ir_freecount;
454 irbp->ir_free = r.ir_free; 458 irbp->ir_free = r.ir_free;
455 irbp++; 459 irbp++;
456 icount += XFS_INODES_PER_CHUNK - r.ir_freecount; 460 icount += r.ir_count - r.ir_freecount;
457 } 461 }
458 error = xfs_btree_increment(cur, 0, &stat); 462 error = xfs_btree_increment(cur, 0, &stat);
459 if (error || stat == 0) { 463 if (error || stat == 0) {
@@ -599,8 +603,7 @@ xfs_inumbers(
599 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; 603 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
600 buffer[bufidx].xi_startino = 604 buffer[bufidx].xi_startino =
601 XFS_AGINO_TO_INO(mp, agno, r.ir_startino); 605 XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
602 buffer[bufidx].xi_alloccount = 606 buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
603 XFS_INODES_PER_CHUNK - r.ir_freecount;
604 buffer[bufidx].xi_allocmask = ~r.ir_free; 607 buffer[bufidx].xi_allocmask = ~r.ir_free;
605 if (++bufidx == bcount) { 608 if (++bufidx == bcount) {
606 long written; 609 long written;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 7c7842c85a08..85f883dd6207 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -32,26 +32,12 @@ typedef unsigned int __uint32_t;
32typedef signed long long int __int64_t; 32typedef signed long long int __int64_t;
33typedef unsigned long long int __uint64_t; 33typedef unsigned long long int __uint64_t;
34 34
35typedef __uint32_t inst_t; /* an instruction */
36
37typedef __s64 xfs_off_t; /* <file offset> type */ 35typedef __s64 xfs_off_t; /* <file offset> type */
38typedef unsigned long long xfs_ino_t; /* <inode> type */ 36typedef unsigned long long xfs_ino_t; /* <inode> type */
39typedef __s64 xfs_daddr_t; /* <disk address> type */ 37typedef __s64 xfs_daddr_t; /* <disk address> type */
40typedef char * xfs_caddr_t; /* <core address> type */
41typedef __u32 xfs_dev_t; 38typedef __u32 xfs_dev_t;
42typedef __u32 xfs_nlink_t; 39typedef __u32 xfs_nlink_t;
43 40
44/* __psint_t is the same size as a pointer */
45#if (BITS_PER_LONG == 32)
46typedef __int32_t __psint_t;
47typedef __uint32_t __psunsigned_t;
48#elif (BITS_PER_LONG == 64)
49typedef __int64_t __psint_t;
50typedef __uint64_t __psunsigned_t;
51#else
52#error BITS_PER_LONG must be 32 or 64
53#endif
54
55#include "xfs_types.h" 41#include "xfs_types.h"
56 42
57#include "kmem.h" 43#include "kmem.h"
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index bcc7cfabb787..08d4fe46f0fa 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -109,7 +109,7 @@ xlog_ungrant_log_space(
109STATIC void 109STATIC void
110xlog_verify_dest_ptr( 110xlog_verify_dest_ptr(
111 struct xlog *log, 111 struct xlog *log,
112 char *ptr); 112 void *ptr);
113STATIC void 113STATIC void
114xlog_verify_grant_tail( 114xlog_verify_grant_tail(
115 struct xlog *log); 115 struct xlog *log);
@@ -513,7 +513,7 @@ xfs_log_done(
513 struct xfs_mount *mp, 513 struct xfs_mount *mp,
514 struct xlog_ticket *ticket, 514 struct xlog_ticket *ticket,
515 struct xlog_in_core **iclog, 515 struct xlog_in_core **iclog,
516 uint flags) 516 bool regrant)
517{ 517{
518 struct xlog *log = mp->m_log; 518 struct xlog *log = mp->m_log;
519 xfs_lsn_t lsn = 0; 519 xfs_lsn_t lsn = 0;
@@ -526,14 +526,11 @@ xfs_log_done(
526 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 526 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
527 (xlog_commit_record(log, ticket, iclog, &lsn)))) { 527 (xlog_commit_record(log, ticket, iclog, &lsn)))) {
528 lsn = (xfs_lsn_t) -1; 528 lsn = (xfs_lsn_t) -1;
529 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 529 regrant = false;
530 flags |= XFS_LOG_REL_PERM_RESERV;
531 }
532 } 530 }
533 531
534 532
535 if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 || 533 if (!regrant) {
536 (flags & XFS_LOG_REL_PERM_RESERV)) {
537 trace_xfs_log_done_nonperm(log, ticket); 534 trace_xfs_log_done_nonperm(log, ticket);
538 535
539 /* 536 /*
@@ -541,7 +538,6 @@ xfs_log_done(
541 * request has been made to release a permanent reservation. 538 * request has been made to release a permanent reservation.
542 */ 539 */
543 xlog_ungrant_log_space(log, ticket); 540 xlog_ungrant_log_space(log, ticket);
544 xfs_log_ticket_put(ticket);
545 } else { 541 } else {
546 trace_xfs_log_done_perm(log, ticket); 542 trace_xfs_log_done_perm(log, ticket);
547 543
@@ -553,6 +549,7 @@ xfs_log_done(
553 ticket->t_flags |= XLOG_TIC_INITED; 549 ticket->t_flags |= XLOG_TIC_INITED;
554 } 550 }
555 551
552 xfs_log_ticket_put(ticket);
556 return lsn; 553 return lsn;
557} 554}
558 555
@@ -1447,7 +1444,7 @@ xlog_alloc_log(
1447 iclog->ic_bp = bp; 1444 iclog->ic_bp = bp;
1448 iclog->ic_data = bp->b_addr; 1445 iclog->ic_data = bp->b_addr;
1449#ifdef DEBUG 1446#ifdef DEBUG
1450 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1447 log->l_iclog_bak[i] = &iclog->ic_header;
1451#endif 1448#endif
1452 head = &iclog->ic_header; 1449 head = &iclog->ic_header;
1453 memset(head, 0, sizeof(xlog_rec_header_t)); 1450 memset(head, 0, sizeof(xlog_rec_header_t));
@@ -1602,7 +1599,7 @@ xlog_pack_data(
1602 int i, j, k; 1599 int i, j, k;
1603 int size = iclog->ic_offset + roundoff; 1600 int size = iclog->ic_offset + roundoff;
1604 __be32 cycle_lsn; 1601 __be32 cycle_lsn;
1605 xfs_caddr_t dp; 1602 char *dp;
1606 1603
1607 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); 1604 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
1608 1605
@@ -3664,7 +3661,7 @@ xlog_ticket_alloc(
3664void 3661void
3665xlog_verify_dest_ptr( 3662xlog_verify_dest_ptr(
3666 struct xlog *log, 3663 struct xlog *log,
3667 char *ptr) 3664 void *ptr)
3668{ 3665{
3669 int i; 3666 int i;
3670 int good_ptr = 0; 3667 int good_ptr = 0;
@@ -3767,9 +3764,8 @@ xlog_verify_iclog(
3767 xlog_op_header_t *ophead; 3764 xlog_op_header_t *ophead;
3768 xlog_in_core_t *icptr; 3765 xlog_in_core_t *icptr;
3769 xlog_in_core_2_t *xhdr; 3766 xlog_in_core_2_t *xhdr;
3770 xfs_caddr_t ptr; 3767 void *base_ptr, *ptr, *p;
3771 xfs_caddr_t base_ptr; 3768 ptrdiff_t field_offset;
3772 __psint_t field_offset;
3773 __uint8_t clientid; 3769 __uint8_t clientid;
3774 int len, i, j, k, op_len; 3770 int len, i, j, k, op_len;
3775 int idx; 3771 int idx;
@@ -3788,9 +3784,9 @@ xlog_verify_iclog(
3788 if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) 3784 if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
3789 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); 3785 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
3790 3786
3791 ptr = (xfs_caddr_t) &iclog->ic_header; 3787 base_ptr = ptr = &iclog->ic_header;
3792 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; 3788 p = &iclog->ic_header;
3793 ptr += BBSIZE) { 3789 for (ptr += BBSIZE; ptr < base_ptr + count; ptr += BBSIZE) {
3794 if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) 3790 if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
3795 xfs_emerg(log->l_mp, "%s: unexpected magic num", 3791 xfs_emerg(log->l_mp, "%s: unexpected magic num",
3796 __func__); 3792 __func__);
@@ -3798,20 +3794,19 @@ xlog_verify_iclog(
3798 3794
3799 /* check fields */ 3795 /* check fields */
3800 len = be32_to_cpu(iclog->ic_header.h_num_logops); 3796 len = be32_to_cpu(iclog->ic_header.h_num_logops);
3801 ptr = iclog->ic_datap; 3797 base_ptr = ptr = iclog->ic_datap;
3802 base_ptr = ptr; 3798 ophead = ptr;
3803 ophead = (xlog_op_header_t *)ptr;
3804 xhdr = iclog->ic_data; 3799 xhdr = iclog->ic_data;
3805 for (i = 0; i < len; i++) { 3800 for (i = 0; i < len; i++) {
3806 ophead = (xlog_op_header_t *)ptr; 3801 ophead = ptr;
3807 3802
3808 /* clientid is only 1 byte */ 3803 /* clientid is only 1 byte */
3809 field_offset = (__psint_t) 3804 p = &ophead->oh_clientid;
3810 ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); 3805 field_offset = p - base_ptr;
3811 if (!syncing || (field_offset & 0x1ff)) { 3806 if (!syncing || (field_offset & 0x1ff)) {
3812 clientid = ophead->oh_clientid; 3807 clientid = ophead->oh_clientid;
3813 } else { 3808 } else {
3814 idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); 3809 idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
3815 if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { 3810 if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
3816 j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3811 j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3817 k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3812 k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3829,13 +3824,13 @@ xlog_verify_iclog(
3829 (unsigned long)field_offset); 3824 (unsigned long)field_offset);
3830 3825
3831 /* check length */ 3826 /* check length */
3832 field_offset = (__psint_t) 3827 p = &ophead->oh_len;
3833 ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); 3828 field_offset = p - base_ptr;
3834 if (!syncing || (field_offset & 0x1ff)) { 3829 if (!syncing || (field_offset & 0x1ff)) {
3835 op_len = be32_to_cpu(ophead->oh_len); 3830 op_len = be32_to_cpu(ophead->oh_len);
3836 } else { 3831 } else {
3837 idx = BTOBBT((__psint_t)&ophead->oh_len - 3832 idx = BTOBBT((uintptr_t)&ophead->oh_len -
3838 (__psint_t)iclog->ic_datap); 3833 (uintptr_t)iclog->ic_datap);
3839 if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) { 3834 if (idx >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) {
3840 j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3835 j = idx / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3841 k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 3836 k = idx % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 84e0deb95abd..fa27aaec72cb 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -111,15 +111,6 @@ static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
111#define XFS_LSN_CMP(x,y) _lsn_cmp(x,y) 111#define XFS_LSN_CMP(x,y) _lsn_cmp(x,y)
112 112
113/* 113/*
114 * Macros, structures, prototypes for interface to the log manager.
115 */
116
117/*
118 * Flags to xfs_log_done()
119 */
120#define XFS_LOG_REL_PERM_RESERV 0x1
121
122/*
123 * Flags to xfs_log_force() 114 * Flags to xfs_log_force()
124 * 115 *
125 * XFS_LOG_SYNC: Synchronous force in-core log to disk 116 * XFS_LOG_SYNC: Synchronous force in-core log to disk
@@ -138,7 +129,7 @@ struct xfs_log_callback;
138xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 129xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
139 struct xlog_ticket *ticket, 130 struct xlog_ticket *ticket,
140 struct xlog_in_core **iclog, 131 struct xlog_in_core **iclog,
141 uint flags); 132 bool regrant);
142int _xfs_log_force(struct xfs_mount *mp, 133int _xfs_log_force(struct xfs_mount *mp,
143 uint flags, 134 uint flags,
144 int *log_forced); 135 int *log_forced);
@@ -183,7 +174,7 @@ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
183void xfs_log_ticket_put(struct xlog_ticket *ticket); 174void xfs_log_ticket_put(struct xlog_ticket *ticket);
184 175
185void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 176void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
186 xfs_lsn_t *commit_lsn, int flags); 177 xfs_lsn_t *commit_lsn, bool regrant);
187bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 178bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
188 179
189void xfs_log_work_queue(struct xfs_mount *mp); 180void xfs_log_work_queue(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 45cc0ce18adf..abc2ccbff739 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -624,7 +624,7 @@ restart:
624 spin_unlock(&cil->xc_push_lock); 624 spin_unlock(&cil->xc_push_lock);
625 625
626 /* xfs_log_done always frees the ticket on error. */ 626 /* xfs_log_done always frees the ticket on error. */
627 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); 627 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false);
628 if (commit_lsn == -1) 628 if (commit_lsn == -1)
629 goto out_abort; 629 goto out_abort;
630 630
@@ -773,14 +773,10 @@ xfs_log_commit_cil(
773 struct xfs_mount *mp, 773 struct xfs_mount *mp,
774 struct xfs_trans *tp, 774 struct xfs_trans *tp,
775 xfs_lsn_t *commit_lsn, 775 xfs_lsn_t *commit_lsn,
776 int flags) 776 bool regrant)
777{ 777{
778 struct xlog *log = mp->m_log; 778 struct xlog *log = mp->m_log;
779 struct xfs_cil *cil = log->l_cilp; 779 struct xfs_cil *cil = log->l_cilp;
780 int log_flags = 0;
781
782 if (flags & XFS_TRANS_RELEASE_LOG_RES)
783 log_flags = XFS_LOG_REL_PERM_RESERV;
784 780
785 /* lock out background commit */ 781 /* lock out background commit */
786 down_read(&cil->xc_ctx_lock); 782 down_read(&cil->xc_ctx_lock);
@@ -795,7 +791,7 @@ xfs_log_commit_cil(
795 if (commit_lsn) 791 if (commit_lsn)
796 *commit_lsn = tp->t_commit_lsn; 792 *commit_lsn = tp->t_commit_lsn;
797 793
798 xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 794 xfs_log_done(mp, tp->t_ticket, NULL, regrant);
799 xfs_trans_unreserve_and_mod_sb(tp); 795 xfs_trans_unreserve_and_mod_sb(tp);
800 796
801 /* 797 /*
@@ -809,7 +805,7 @@ xfs_log_commit_cil(
809 * the log items. This affects (at least) processing of stale buffers, 805 * the log items. This affects (at least) processing of stale buffers,
810 * inodes and EFIs. 806 * inodes and EFIs.
811 */ 807 */
812 xfs_trans_free_items(tp, tp->t_commit_lsn, 0); 808 xfs_trans_free_items(tp, tp->t_commit_lsn, false);
813 809
814 xlog_cil_push_background(log); 810 xlog_cil_push_background(log);
815 811
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index db7cbdeb2b42..1c87c8abfbed 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -409,7 +409,7 @@ struct xlog {
409 409
410 /* The following field are used for debugging; need to hold icloglock */ 410 /* The following field are used for debugging; need to hold icloglock */
411#ifdef DEBUG 411#ifdef DEBUG
412 char *l_iclog_bak[XLOG_MAX_ICLOGS]; 412 void *l_iclog_bak[XLOG_MAX_ICLOGS];
413#endif 413#endif
414 414
415}; 415};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 4f5784f85a5b..01dd228ca05e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -147,7 +147,7 @@ xlog_put_bp(
147 * Return the address of the start of the given block number's data 147 * Return the address of the start of the given block number's data
148 * in a log buffer. The buffer covers a log sector-aligned region. 148 * in a log buffer. The buffer covers a log sector-aligned region.
149 */ 149 */
150STATIC xfs_caddr_t 150STATIC char *
151xlog_align( 151xlog_align(
152 struct xlog *log, 152 struct xlog *log,
153 xfs_daddr_t blk_no, 153 xfs_daddr_t blk_no,
@@ -203,7 +203,7 @@ xlog_bread(
203 xfs_daddr_t blk_no, 203 xfs_daddr_t blk_no,
204 int nbblks, 204 int nbblks,
205 struct xfs_buf *bp, 205 struct xfs_buf *bp,
206 xfs_caddr_t *offset) 206 char **offset)
207{ 207{
208 int error; 208 int error;
209 209
@@ -225,9 +225,9 @@ xlog_bread_offset(
225 xfs_daddr_t blk_no, /* block to read from */ 225 xfs_daddr_t blk_no, /* block to read from */
226 int nbblks, /* blocks to read */ 226 int nbblks, /* blocks to read */
227 struct xfs_buf *bp, 227 struct xfs_buf *bp,
228 xfs_caddr_t offset) 228 char *offset)
229{ 229{
230 xfs_caddr_t orig_offset = bp->b_addr; 230 char *orig_offset = bp->b_addr;
231 int orig_len = BBTOB(bp->b_length); 231 int orig_len = BBTOB(bp->b_length);
232 int error, error2; 232 int error, error2;
233 233
@@ -396,7 +396,7 @@ xlog_find_cycle_start(
396 xfs_daddr_t *last_blk, 396 xfs_daddr_t *last_blk,
397 uint cycle) 397 uint cycle)
398{ 398{
399 xfs_caddr_t offset; 399 char *offset;
400 xfs_daddr_t mid_blk; 400 xfs_daddr_t mid_blk;
401 xfs_daddr_t end_blk; 401 xfs_daddr_t end_blk;
402 uint mid_cycle; 402 uint mid_cycle;
@@ -443,7 +443,7 @@ xlog_find_verify_cycle(
443 uint cycle; 443 uint cycle;
444 xfs_buf_t *bp; 444 xfs_buf_t *bp;
445 xfs_daddr_t bufblks; 445 xfs_daddr_t bufblks;
446 xfs_caddr_t buf = NULL; 446 char *buf = NULL;
447 int error = 0; 447 int error = 0;
448 448
449 /* 449 /*
@@ -509,7 +509,7 @@ xlog_find_verify_log_record(
509{ 509{
510 xfs_daddr_t i; 510 xfs_daddr_t i;
511 xfs_buf_t *bp; 511 xfs_buf_t *bp;
512 xfs_caddr_t offset = NULL; 512 char *offset = NULL;
513 xlog_rec_header_t *head = NULL; 513 xlog_rec_header_t *head = NULL;
514 int error = 0; 514 int error = 0;
515 int smallmem = 0; 515 int smallmem = 0;
@@ -616,7 +616,7 @@ xlog_find_head(
616 xfs_daddr_t *return_head_blk) 616 xfs_daddr_t *return_head_blk)
617{ 617{
618 xfs_buf_t *bp; 618 xfs_buf_t *bp;
619 xfs_caddr_t offset; 619 char *offset;
620 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 620 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
621 int num_scan_bblks; 621 int num_scan_bblks;
622 uint first_half_cycle, last_half_cycle; 622 uint first_half_cycle, last_half_cycle;
@@ -891,7 +891,7 @@ xlog_find_tail(
891{ 891{
892 xlog_rec_header_t *rhead; 892 xlog_rec_header_t *rhead;
893 xlog_op_header_t *op_head; 893 xlog_op_header_t *op_head;
894 xfs_caddr_t offset = NULL; 894 char *offset = NULL;
895 xfs_buf_t *bp; 895 xfs_buf_t *bp;
896 int error, i, found; 896 int error, i, found;
897 xfs_daddr_t umount_data_blk; 897 xfs_daddr_t umount_data_blk;
@@ -1099,7 +1099,7 @@ xlog_find_zeroed(
1099 xfs_daddr_t *blk_no) 1099 xfs_daddr_t *blk_no)
1100{ 1100{
1101 xfs_buf_t *bp; 1101 xfs_buf_t *bp;
1102 xfs_caddr_t offset; 1102 char *offset;
1103 uint first_cycle, last_cycle; 1103 uint first_cycle, last_cycle;
1104 xfs_daddr_t new_blk, last_blk, start_blk; 1104 xfs_daddr_t new_blk, last_blk, start_blk;
1105 xfs_daddr_t num_scan_bblks; 1105 xfs_daddr_t num_scan_bblks;
@@ -1199,7 +1199,7 @@ bp_err:
1199STATIC void 1199STATIC void
1200xlog_add_record( 1200xlog_add_record(
1201 struct xlog *log, 1201 struct xlog *log,
1202 xfs_caddr_t buf, 1202 char *buf,
1203 int cycle, 1203 int cycle,
1204 int block, 1204 int block,
1205 int tail_cycle, 1205 int tail_cycle,
@@ -1227,7 +1227,7 @@ xlog_write_log_records(
1227 int tail_cycle, 1227 int tail_cycle,
1228 int tail_block) 1228 int tail_block)
1229{ 1229{
1230 xfs_caddr_t offset; 1230 char *offset;
1231 xfs_buf_t *bp; 1231 xfs_buf_t *bp;
1232 int balign, ealign; 1232 int balign, ealign;
1233 int sectbb = log->l_sectBBsize; 1233 int sectbb = log->l_sectBBsize;
@@ -1789,8 +1789,7 @@ xlog_recover_do_inode_buffer(
1789 return -EFSCORRUPTED; 1789 return -EFSCORRUPTED;
1790 } 1790 }
1791 1791
1792 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1792 buffer_nextp = xfs_buf_offset(bp, next_unlinked_offset);
1793 next_unlinked_offset);
1794 *buffer_nextp = *logged_nextp; 1793 *buffer_nextp = *logged_nextp;
1795 1794
1796 /* 1795 /*
@@ -1798,7 +1797,7 @@ xlog_recover_do_inode_buffer(
1798 * have to leave the inode in a consistent state for whoever 1797 * have to leave the inode in a consistent state for whoever
1799 * reads it next.... 1798 * reads it next....
1800 */ 1799 */
1801 xfs_dinode_calc_crc(mp, (struct xfs_dinode *) 1800 xfs_dinode_calc_crc(mp,
1802 xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); 1801 xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
1803 1802
1804 } 1803 }
@@ -2503,8 +2502,8 @@ xlog_recover_inode_pass2(
2503 xfs_buf_t *bp; 2502 xfs_buf_t *bp;
2504 xfs_dinode_t *dip; 2503 xfs_dinode_t *dip;
2505 int len; 2504 int len;
2506 xfs_caddr_t src; 2505 char *src;
2507 xfs_caddr_t dest; 2506 char *dest;
2508 int error; 2507 int error;
2509 int attr_index; 2508 int attr_index;
2510 uint fields; 2509 uint fields;
@@ -2546,7 +2545,7 @@ xlog_recover_inode_pass2(
2546 goto out_release; 2545 goto out_release;
2547 } 2546 }
2548 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); 2547 ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
2549 dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); 2548 dip = xfs_buf_offset(bp, in_f->ilf_boffset);
2550 2549
2551 /* 2550 /*
2552 * Make sure the place we're flushing out to really looks 2551 * Make sure the place we're flushing out to really looks
@@ -2885,7 +2884,7 @@ xlog_recover_dquot_pass2(
2885 return error; 2884 return error;
2886 2885
2887 ASSERT(bp); 2886 ASSERT(bp);
2888 ddq = (xfs_disk_dquot_t *)xfs_buf_offset(bp, dq_f->qlf_boffset); 2887 ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
2889 2888
2890 /* 2889 /*
2891 * If the dquot has an LSN in it, recover the dquot only if it's less 2890 * If the dquot has an LSN in it, recover the dquot only if it's less
@@ -3068,12 +3067,22 @@ xlog_recover_do_icreate_pass2(
3068 return -EINVAL; 3067 return -EINVAL;
3069 } 3068 }
3070 3069
3071 /* existing allocation is fixed value */ 3070 /*
3072 ASSERT(count == mp->m_ialloc_inos); 3071 * The inode chunk is either full or sparse and we only support
3073 ASSERT(length == mp->m_ialloc_blks); 3072 * m_ialloc_min_blks sized sparse allocations at this time.
3074 if (count != mp->m_ialloc_inos || 3073 */
3075 length != mp->m_ialloc_blks) { 3074 if (length != mp->m_ialloc_blks &&
3076 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); 3075 length != mp->m_ialloc_min_blks) {
3076 xfs_warn(log->l_mp,
3077 "%s: unsupported chunk length", __FUNCTION__);
3078 return -EINVAL;
3079 }
3080
3081 /* verify inode count is consistent with extent length */
3082 if ((count >> mp->m_sb.sb_inopblog) != length) {
3083 xfs_warn(log->l_mp,
3084 "%s: inconsistent inode count and chunk length",
3085 __FUNCTION__);
3077 return -EINVAL; 3086 return -EINVAL;
3078 } 3087 }
3079 3088
@@ -3091,8 +3100,8 @@ xlog_recover_do_icreate_pass2(
3091 XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) 3100 XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
3092 return 0; 3101 return 0;
3093 3102
3094 xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, 3103 xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
3095 be32_to_cpu(icl->icl_gen)); 3104 be32_to_cpu(icl->icl_gen));
3096 return 0; 3105 return 0;
3097} 3106}
3098 3107
@@ -3364,17 +3373,17 @@ STATIC int
3364xlog_recover_add_to_cont_trans( 3373xlog_recover_add_to_cont_trans(
3365 struct xlog *log, 3374 struct xlog *log,
3366 struct xlog_recover *trans, 3375 struct xlog_recover *trans,
3367 xfs_caddr_t dp, 3376 char *dp,
3368 int len) 3377 int len)
3369{ 3378{
3370 xlog_recover_item_t *item; 3379 xlog_recover_item_t *item;
3371 xfs_caddr_t ptr, old_ptr; 3380 char *ptr, *old_ptr;
3372 int old_len; 3381 int old_len;
3373 3382
3374 if (list_empty(&trans->r_itemq)) { 3383 if (list_empty(&trans->r_itemq)) {
3375 /* finish copying rest of trans header */ 3384 /* finish copying rest of trans header */
3376 xlog_recover_add_item(&trans->r_itemq); 3385 xlog_recover_add_item(&trans->r_itemq);
3377 ptr = (xfs_caddr_t) &trans->r_theader + 3386 ptr = (char *)&trans->r_theader +
3378 sizeof(xfs_trans_header_t) - len; 3387 sizeof(xfs_trans_header_t) - len;
3379 memcpy(ptr, dp, len); 3388 memcpy(ptr, dp, len);
3380 return 0; 3389 return 0;
@@ -3410,12 +3419,12 @@ STATIC int
3410xlog_recover_add_to_trans( 3419xlog_recover_add_to_trans(
3411 struct xlog *log, 3420 struct xlog *log,
3412 struct xlog_recover *trans, 3421 struct xlog_recover *trans,
3413 xfs_caddr_t dp, 3422 char *dp,
3414 int len) 3423 int len)
3415{ 3424{
3416 xfs_inode_log_format_t *in_f; /* any will do */ 3425 xfs_inode_log_format_t *in_f; /* any will do */
3417 xlog_recover_item_t *item; 3426 xlog_recover_item_t *item;
3418 xfs_caddr_t ptr; 3427 char *ptr;
3419 3428
3420 if (!len) 3429 if (!len)
3421 return 0; 3430 return 0;
@@ -3504,7 +3513,7 @@ STATIC int
3504xlog_recovery_process_trans( 3513xlog_recovery_process_trans(
3505 struct xlog *log, 3514 struct xlog *log,
3506 struct xlog_recover *trans, 3515 struct xlog_recover *trans,
3507 xfs_caddr_t dp, 3516 char *dp,
3508 unsigned int len, 3517 unsigned int len,
3509 unsigned int flags, 3518 unsigned int flags,
3510 int pass) 3519 int pass)
@@ -3611,8 +3620,8 @@ xlog_recover_process_ophdr(
3611 struct hlist_head rhash[], 3620 struct hlist_head rhash[],
3612 struct xlog_rec_header *rhead, 3621 struct xlog_rec_header *rhead,
3613 struct xlog_op_header *ohead, 3622 struct xlog_op_header *ohead,
3614 xfs_caddr_t dp, 3623 char *dp,
3615 xfs_caddr_t end, 3624 char *end,
3616 int pass) 3625 int pass)
3617{ 3626{
3618 struct xlog_recover *trans; 3627 struct xlog_recover *trans;
@@ -3661,11 +3670,11 @@ xlog_recover_process_data(
3661 struct xlog *log, 3670 struct xlog *log,
3662 struct hlist_head rhash[], 3671 struct hlist_head rhash[],
3663 struct xlog_rec_header *rhead, 3672 struct xlog_rec_header *rhead,
3664 xfs_caddr_t dp, 3673 char *dp,
3665 int pass) 3674 int pass)
3666{ 3675{
3667 struct xlog_op_header *ohead; 3676 struct xlog_op_header *ohead;
3668 xfs_caddr_t end; 3677 char *end;
3669 int num_logops; 3678 int num_logops;
3670 int error; 3679 int error;
3671 3680
@@ -3751,11 +3760,11 @@ xlog_recover_process_efi(
3751 } 3760 }
3752 3761
3753 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); 3762 set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
3754 error = xfs_trans_commit(tp, 0); 3763 error = xfs_trans_commit(tp);
3755 return error; 3764 return error;
3756 3765
3757abort_error: 3766abort_error:
3758 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3767 xfs_trans_cancel(tp);
3759 return error; 3768 return error;
3760} 3769}
3761 3770
@@ -3857,13 +3866,13 @@ xlog_recover_clear_agi_bucket(
3857 xfs_trans_log_buf(tp, agibp, offset, 3866 xfs_trans_log_buf(tp, agibp, offset,
3858 (offset + sizeof(xfs_agino_t) - 1)); 3867 (offset + sizeof(xfs_agino_t) - 1));
3859 3868
3860 error = xfs_trans_commit(tp, 0); 3869 error = xfs_trans_commit(tp);
3861 if (error) 3870 if (error)
3862 goto out_error; 3871 goto out_error;
3863 return; 3872 return;
3864 3873
3865out_abort: 3874out_abort:
3866 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3875 xfs_trans_cancel(tp);
3867out_error: 3876out_error:
3868 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); 3877 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
3869 return; 3878 return;
@@ -4010,7 +4019,7 @@ xlog_recover_process_iunlinks(
4010STATIC int 4019STATIC int
4011xlog_unpack_data_crc( 4020xlog_unpack_data_crc(
4012 struct xlog_rec_header *rhead, 4021 struct xlog_rec_header *rhead,
4013 xfs_caddr_t dp, 4022 char *dp,
4014 struct xlog *log) 4023 struct xlog *log)
4015{ 4024{
4016 __le32 crc; 4025 __le32 crc;
@@ -4040,7 +4049,7 @@ xlog_unpack_data_crc(
4040STATIC int 4049STATIC int
4041xlog_unpack_data( 4050xlog_unpack_data(
4042 struct xlog_rec_header *rhead, 4051 struct xlog_rec_header *rhead,
4043 xfs_caddr_t dp, 4052 char *dp,
4044 struct xlog *log) 4053 struct xlog *log)
4045{ 4054{
4046 int i, j, k; 4055 int i, j, k;
@@ -4122,7 +4131,7 @@ xlog_do_recovery_pass(
4122{ 4131{
4123 xlog_rec_header_t *rhead; 4132 xlog_rec_header_t *rhead;
4124 xfs_daddr_t blk_no; 4133 xfs_daddr_t blk_no;
4125 xfs_caddr_t offset; 4134 char *offset;
4126 xfs_buf_t *hbp, *dbp; 4135 xfs_buf_t *hbp, *dbp;
4127 int error = 0, h_size; 4136 int error = 0, h_size;
4128 int bblks, split_bblks; 4137 int bblks, split_bblks;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6f23fbdfb365..461e791efad7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -725,6 +725,22 @@ xfs_mountfs(
725 } 725 }
726 726
727 /* 727 /*
728 * If enabled, sparse inode chunk alignment is expected to match the
729 * cluster size. Full inode chunk alignment must match the chunk size,
730 * but that is checked on sb read verification...
731 */
732 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
733 mp->m_sb.sb_spino_align !=
734 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
735 xfs_warn(mp,
736 "Sparse inode block alignment (%u) must match cluster size (%llu).",
737 mp->m_sb.sb_spino_align,
738 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
739 error = -EINVAL;
740 goto out_remove_uuid;
741 }
742
743 /*
728 * Set inode alignment fields 744 * Set inode alignment fields
729 */ 745 */
730 xfs_set_inoalignment(mp); 746 xfs_set_inoalignment(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8c995a2ccb6f..7999e91cd49a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -101,6 +101,8 @@ typedef struct xfs_mount {
101 __uint64_t m_flags; /* global mount flags */ 101 __uint64_t m_flags; /* global mount flags */
102 int m_ialloc_inos; /* inodes in inode allocation */ 102 int m_ialloc_inos; /* inodes in inode allocation */
103 int m_ialloc_blks; /* blocks in inode allocation */ 103 int m_ialloc_blks; /* blocks in inode allocation */
104 int m_ialloc_min_blks;/* min blocks in sparse inode
105 * allocation */
104 int m_inoalign_mask;/* mask sb_inoalignmt if used */ 106 int m_inoalign_mask;/* mask sb_inoalignmt if used */
105 uint m_qflags; /* quota status flags */ 107 uint m_qflags; /* quota status flags */
106 struct xfs_trans_resv m_resv; /* precomputed res values */ 108 struct xfs_trans_resv m_resv; /* precomputed res values */
@@ -179,6 +181,8 @@ typedef struct xfs_mount {
179 allocator */ 181 allocator */
180#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ 182#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
181 183
184#define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */
185
182 186
183/* 187/*
184 * Default minimum read and write sizes. 188 * Default minimum read and write sizes.
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 981a657eca39..ab4a6066f7ca 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -306,7 +306,7 @@ xfs_fs_commit_blocks(
306 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 306 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
307 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 307 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
308 if (error) { 308 if (error) {
309 xfs_trans_cancel(tp, 0); 309 xfs_trans_cancel(tp);
310 goto out_drop_iolock; 310 goto out_drop_iolock;
311 } 311 }
312 312
@@ -321,7 +321,7 @@ xfs_fs_commit_blocks(
321 } 321 }
322 322
323 xfs_trans_set_sync(tp); 323 xfs_trans_set_sync(tp);
324 error = xfs_trans_commit(tp, 0); 324 error = xfs_trans_commit(tp);
325 325
326out_drop_iolock: 326out_drop_iolock:
327 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 327 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5538468c7f63..eac9549efd52 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -756,7 +756,7 @@ xfs_qm_qino_alloc(
756 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create, 756 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
757 XFS_QM_QINOCREATE_SPACE_RES(mp), 0); 757 XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
758 if (error) { 758 if (error) {
759 xfs_trans_cancel(tp, 0); 759 xfs_trans_cancel(tp);
760 return error; 760 return error;
761 } 761 }
762 762
@@ -764,8 +764,7 @@ xfs_qm_qino_alloc(
764 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, 764 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
765 &committed); 765 &committed);
766 if (error) { 766 if (error) {
767 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 767 xfs_trans_cancel(tp);
768 XFS_TRANS_ABORT);
769 return error; 768 return error;
770 } 769 }
771 } 770 }
@@ -796,7 +795,7 @@ xfs_qm_qino_alloc(
796 spin_unlock(&mp->m_sb_lock); 795 spin_unlock(&mp->m_sb_lock);
797 xfs_log_sb(tp); 796 xfs_log_sb(tp);
798 797
799 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 798 error = xfs_trans_commit(tp);
800 if (error) { 799 if (error) {
801 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 800 ASSERT(XFS_FORCED_SHUTDOWN(mp));
802 xfs_alert(mp, "%s failed (error %d)!", __func__, error); 801 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 9a25c9275fb3..3640c6e896af 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -239,7 +239,7 @@ xfs_qm_scall_trunc_qfile(
239 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); 239 tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
240 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 240 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
241 if (error) { 241 if (error) {
242 xfs_trans_cancel(tp, 0); 242 xfs_trans_cancel(tp);
243 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 243 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
244 goto out_put; 244 goto out_put;
245 } 245 }
@@ -252,15 +252,14 @@ xfs_qm_scall_trunc_qfile(
252 252
253 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); 253 error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
254 if (error) { 254 if (error) {
255 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 255 xfs_trans_cancel(tp);
256 XFS_TRANS_ABORT);
257 goto out_unlock; 256 goto out_unlock;
258 } 257 }
259 258
260 ASSERT(ip->i_d.di_nextents == 0); 259 ASSERT(ip->i_d.di_nextents == 0);
261 260
262 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 261 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
263 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 262 error = xfs_trans_commit(tp);
264 263
265out_unlock: 264out_unlock:
266 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 265 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -437,7 +436,7 @@ xfs_qm_scall_setqlim(
437 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 436 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
438 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0); 437 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
439 if (error) { 438 if (error) {
440 xfs_trans_cancel(tp, 0); 439 xfs_trans_cancel(tp);
441 goto out_rele; 440 goto out_rele;
442 } 441 }
443 442
@@ -548,7 +547,7 @@ xfs_qm_scall_setqlim(
548 dqp->dq_flags |= XFS_DQ_DIRTY; 547 dqp->dq_flags |= XFS_DQ_DIRTY;
549 xfs_trans_log_dquot(tp, dqp); 548 xfs_trans_log_dquot(tp, dqp);
550 549
551 error = xfs_trans_commit(tp, 0); 550 error = xfs_trans_commit(tp);
552 551
553out_rele: 552out_rele:
554 xfs_qm_dqrele(dqp); 553 xfs_qm_dqrele(dqp);
@@ -571,7 +570,7 @@ xfs_qm_log_quotaoff_end(
571 570
572 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0); 571 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
573 if (error) { 572 if (error) {
574 xfs_trans_cancel(tp, 0); 573 xfs_trans_cancel(tp);
575 return error; 574 return error;
576 } 575 }
577 576
@@ -585,8 +584,7 @@ xfs_qm_log_quotaoff_end(
585 * We don't care about quotoff's performance. 584 * We don't care about quotoff's performance.
586 */ 585 */
587 xfs_trans_set_sync(tp); 586 xfs_trans_set_sync(tp);
588 error = xfs_trans_commit(tp, 0); 587 return xfs_trans_commit(tp);
589 return error;
590} 588}
591 589
592 590
@@ -605,7 +603,7 @@ xfs_qm_log_quotaoff(
605 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); 603 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
606 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0); 604 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
607 if (error) { 605 if (error) {
608 xfs_trans_cancel(tp, 0); 606 xfs_trans_cancel(tp);
609 goto out; 607 goto out;
610 } 608 }
611 609
@@ -624,7 +622,7 @@ xfs_qm_log_quotaoff(
624 * We don't care about quotoff's performance. 622 * We don't care about quotoff's performance.
625 */ 623 */
626 xfs_trans_set_sync(tp); 624 xfs_trans_set_sync(tp);
627 error = xfs_trans_commit(tp, 0); 625 error = xfs_trans_commit(tp);
628 if (error) 626 if (error)
629 goto out; 627 goto out;
630 628
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 5376dd406ba2..ce6506adab7b 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -55,7 +55,6 @@ struct xfs_trans;
55typedef struct xfs_dqtrx { 55typedef struct xfs_dqtrx {
56 struct xfs_dquot *qt_dquot; /* the dquot this refers to */ 56 struct xfs_dquot *qt_dquot; /* the dquot this refers to */
57 ulong qt_blk_res; /* blks reserved on a dquot */ 57 ulong qt_blk_res; /* blks reserved on a dquot */
58 ulong qt_blk_res_used; /* blks used from the reservation */
59 ulong qt_ino_res; /* inode reserved on a dquot */ 58 ulong qt_ino_res; /* inode reserved on a dquot */
60 ulong qt_ino_res_used; /* inodes used from the reservation */ 59 ulong qt_ino_res_used; /* inodes used from the reservation */
61 long qt_bcount_delta; /* dquot blk count changes */ 60 long qt_bcount_delta; /* dquot blk count changes */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index f2079b6911cc..f4e8c06eee26 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -780,7 +780,6 @@ xfs_growfs_rt_alloc(
780 * Allocate space to the file, as necessary. 780 * Allocate space to the file, as necessary.
781 */ 781 */
782 while (oblocks < nblocks) { 782 while (oblocks < nblocks) {
783 int cancelflags = 0;
784 xfs_trans_t *tp; 783 xfs_trans_t *tp;
785 784
786 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); 785 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
@@ -792,7 +791,6 @@ xfs_growfs_rt_alloc(
792 resblks, 0); 791 resblks, 0);
793 if (error) 792 if (error)
794 goto error_cancel; 793 goto error_cancel;
795 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
796 /* 794 /*
797 * Lock the inode. 795 * Lock the inode.
798 */ 796 */
@@ -804,7 +802,6 @@ xfs_growfs_rt_alloc(
804 * Allocate blocks to the bitmap file. 802 * Allocate blocks to the bitmap file.
805 */ 803 */
806 nmap = 1; 804 nmap = 1;
807 cancelflags |= XFS_TRANS_ABORT;
808 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, 805 error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
809 XFS_BMAPI_METADATA, &firstblock, 806 XFS_BMAPI_METADATA, &firstblock,
810 resblks, &map, &nmap, &flist); 807 resblks, &map, &nmap, &flist);
@@ -818,14 +815,13 @@ xfs_growfs_rt_alloc(
818 error = xfs_bmap_finish(&tp, &flist, &committed); 815 error = xfs_bmap_finish(&tp, &flist, &committed);
819 if (error) 816 if (error)
820 goto error_cancel; 817 goto error_cancel;
821 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 818 error = xfs_trans_commit(tp);
822 if (error) 819 if (error)
823 goto error; 820 goto error;
824 /* 821 /*
825 * Now we need to clear the allocated blocks. 822 * Now we need to clear the allocated blocks.
826 * Do this one block per transaction, to keep it simple. 823 * Do this one block per transaction, to keep it simple.
827 */ 824 */
828 cancelflags = 0;
829 for (bno = map.br_startoff, fsbno = map.br_startblock; 825 for (bno = map.br_startoff, fsbno = map.br_startblock;
830 bno < map.br_startoff + map.br_blockcount; 826 bno < map.br_startoff + map.br_blockcount;
831 bno++, fsbno++) { 827 bno++, fsbno++) {
@@ -851,7 +847,7 @@ xfs_growfs_rt_alloc(
851 if (bp == NULL) { 847 if (bp == NULL) {
852 error = -EIO; 848 error = -EIO;
853error_cancel: 849error_cancel:
854 xfs_trans_cancel(tp, cancelflags); 850 xfs_trans_cancel(tp);
855 goto error; 851 goto error;
856 } 852 }
857 memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); 853 memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
@@ -859,7 +855,7 @@ error_cancel:
859 /* 855 /*
860 * Commit the transaction. 856 * Commit the transaction.
861 */ 857 */
862 error = xfs_trans_commit(tp, 0); 858 error = xfs_trans_commit(tp);
863 if (error) 859 if (error)
864 goto error; 860 goto error;
865 } 861 }
@@ -973,7 +969,6 @@ xfs_growfs_rt(
973 bmbno < nrbmblocks; 969 bmbno < nrbmblocks;
974 bmbno++) { 970 bmbno++) {
975 xfs_trans_t *tp; 971 xfs_trans_t *tp;
976 int cancelflags = 0;
977 972
978 *nmp = *mp; 973 *nmp = *mp;
979 nsbp = &nmp->m_sb; 974 nsbp = &nmp->m_sb;
@@ -1015,7 +1010,6 @@ xfs_growfs_rt(
1015 mp->m_rbmip->i_d.di_size = 1010 mp->m_rbmip->i_d.di_size =
1016 nsbp->sb_rbmblocks * nsbp->sb_blocksize; 1011 nsbp->sb_rbmblocks * nsbp->sb_blocksize;
1017 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); 1012 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
1018 cancelflags |= XFS_TRANS_ABORT;
1019 /* 1013 /*
1020 * Get the summary inode into the transaction. 1014 * Get the summary inode into the transaction.
1021 */ 1015 */
@@ -1062,7 +1056,7 @@ xfs_growfs_rt(
1062 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); 1056 nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
1063 if (error) { 1057 if (error) {
1064error_cancel: 1058error_cancel:
1065 xfs_trans_cancel(tp, cancelflags); 1059 xfs_trans_cancel(tp);
1066 break; 1060 break;
1067 } 1061 }
1068 /* 1062 /*
@@ -1076,7 +1070,7 @@ error_cancel:
1076 mp->m_rsumlevels = nrsumlevels; 1070 mp->m_rsumlevels = nrsumlevels;
1077 mp->m_rsumsize = nrsumsize; 1071 mp->m_rsumsize = nrsumsize;
1078 1072
1079 error = xfs_trans_commit(tp, 0); 1073 error = xfs_trans_commit(tp);
1080 if (error) 1074 if (error)
1081 break; 1075 break;
1082 } 1076 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 858e1e62bbaa..1fb16562c159 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -112,6 +112,8 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
112#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ 112#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
113#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ 113#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
114 114
115#define MNTOPT_DAX "dax" /* Enable direct access to bdev pages */
116
115/* 117/*
116 * Table driven mount option parser. 118 * Table driven mount option parser.
117 * 119 *
@@ -363,6 +365,10 @@ xfs_parseargs(
363 mp->m_flags |= XFS_MOUNT_DISCARD; 365 mp->m_flags |= XFS_MOUNT_DISCARD;
364 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { 366 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
365 mp->m_flags &= ~XFS_MOUNT_DISCARD; 367 mp->m_flags &= ~XFS_MOUNT_DISCARD;
368#ifdef CONFIG_FS_DAX
369 } else if (!strcmp(this_char, MNTOPT_DAX)) {
370 mp->m_flags |= XFS_MOUNT_DAX;
371#endif
366 } else { 372 } else {
367 xfs_warn(mp, "unknown mount option [%s].", this_char); 373 xfs_warn(mp, "unknown mount option [%s].", this_char);
368 return -EINVAL; 374 return -EINVAL;
@@ -452,8 +458,8 @@ done:
452} 458}
453 459
454struct proc_xfs_info { 460struct proc_xfs_info {
455 int flag; 461 uint64_t flag;
456 char *str; 462 char *str;
457}; 463};
458 464
459STATIC int 465STATIC int
@@ -474,6 +480,7 @@ xfs_showargs(
474 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 480 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
475 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, 481 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
476 { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE }, 482 { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
483 { XFS_MOUNT_DAX, "," MNTOPT_DAX },
477 { 0, NULL } 484 { 0, NULL }
478 }; 485 };
479 static struct proc_xfs_info xfs_info_unset[] = { 486 static struct proc_xfs_info xfs_info_unset[] = {
@@ -1507,6 +1514,20 @@ xfs_fs_fill_super(
1507 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) 1514 if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
1508 sb->s_flags |= MS_I_VERSION; 1515 sb->s_flags |= MS_I_VERSION;
1509 1516
1517 if (mp->m_flags & XFS_MOUNT_DAX) {
1518 xfs_warn(mp,
1519 "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
1520 if (sb->s_blocksize != PAGE_SIZE) {
1521 xfs_alert(mp,
1522 "Filesystem block size invalid for DAX Turning DAX off.");
1523 mp->m_flags &= ~XFS_MOUNT_DAX;
1524 } else if (!sb->s_bdev->bd_disk->fops->direct_access) {
1525 xfs_alert(mp,
1526 "Block device does not support DAX Turning DAX off.");
1527 mp->m_flags &= ~XFS_MOUNT_DAX;
1528 }
1529 }
1530
1510 error = xfs_mountfs(mp); 1531 error = xfs_mountfs(mp);
1511 if (error) 1532 if (error)
1512 goto out_filestream_unmount; 1533 goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 3df411eadb86..4be27b0210af 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -104,7 +104,7 @@ xfs_readlink_bmap(
104 cur_chunk += sizeof(struct xfs_dsymlink_hdr); 104 cur_chunk += sizeof(struct xfs_dsymlink_hdr);
105 } 105 }
106 106
107 memcpy(link + offset, bp->b_addr, byte_cnt); 107 memcpy(link + offset, cur_chunk, byte_cnt);
108 108
109 pathlen -= byte_cnt; 109 pathlen -= byte_cnt;
110 offset += byte_cnt; 110 offset += byte_cnt;
@@ -178,7 +178,6 @@ xfs_symlink(
178 struct xfs_bmap_free free_list; 178 struct xfs_bmap_free free_list;
179 xfs_fsblock_t first_block; 179 xfs_fsblock_t first_block;
180 bool unlock_dp_on_error = false; 180 bool unlock_dp_on_error = false;
181 uint cancel_flags;
182 int committed; 181 int committed;
183 xfs_fileoff_t first_fsb; 182 xfs_fileoff_t first_fsb;
184 xfs_filblks_t fs_blocks; 183 xfs_filblks_t fs_blocks;
@@ -224,7 +223,6 @@ xfs_symlink(
224 return error; 223 return error;
225 224
226 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); 225 tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
227 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
228 /* 226 /*
229 * The symlink will fit into the inode data fork? 227 * The symlink will fit into the inode data fork?
230 * There can't be any attributes so we get the whole variable part. 228 * There can't be any attributes so we get the whole variable part.
@@ -239,10 +237,8 @@ xfs_symlink(
239 resblks = 0; 237 resblks = 0;
240 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); 238 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
241 } 239 }
242 if (error) { 240 if (error)
243 cancel_flags = 0;
244 goto out_trans_cancel; 241 goto out_trans_cancel;
245 }
246 242
247 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 243 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
248 unlock_dp_on_error = true; 244 unlock_dp_on_error = true;
@@ -394,7 +390,7 @@ xfs_symlink(
394 if (error) 390 if (error)
395 goto out_bmap_cancel; 391 goto out_bmap_cancel;
396 392
397 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 393 error = xfs_trans_commit(tp);
398 if (error) 394 if (error)
399 goto out_release_inode; 395 goto out_release_inode;
400 396
@@ -407,9 +403,8 @@ xfs_symlink(
407 403
408out_bmap_cancel: 404out_bmap_cancel:
409 xfs_bmap_cancel(&free_list); 405 xfs_bmap_cancel(&free_list);
410 cancel_flags |= XFS_TRANS_ABORT;
411out_trans_cancel: 406out_trans_cancel:
412 xfs_trans_cancel(tp, cancel_flags); 407 xfs_trans_cancel(tp);
413out_release_inode: 408out_release_inode:
414 /* 409 /*
415 * Wait until after the current transaction is aborted to finish the 410 * Wait until after the current transaction is aborted to finish the
@@ -464,7 +459,7 @@ xfs_inactive_symlink_rmt(
464 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 459 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
465 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); 460 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
466 if (error) { 461 if (error) {
467 xfs_trans_cancel(tp, 0); 462 xfs_trans_cancel(tp);
468 return error; 463 return error;
469 } 464 }
470 465
@@ -533,7 +528,7 @@ xfs_inactive_symlink_rmt(
533 /* 528 /*
534 * Commit the transaction containing extent freeing and EFDs. 529 * Commit the transaction containing extent freeing and EFDs.
535 */ 530 */
536 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 531 error = xfs_trans_commit(tp);
537 if (error) { 532 if (error) {
538 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 533 ASSERT(XFS_FORCED_SHUTDOWN(mp));
539 goto error_unlock; 534 goto error_unlock;
@@ -552,7 +547,7 @@ xfs_inactive_symlink_rmt(
552error_bmap_cancel: 547error_bmap_cancel:
553 xfs_bmap_cancel(&free_list); 548 xfs_bmap_cancel(&free_list);
554error_trans_cancel: 549error_trans_cancel:
555 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 550 xfs_trans_cancel(tp);
556error_unlock: 551error_unlock:
557 xfs_iunlock(ip, XFS_ILOCK_EXCL); 552 xfs_iunlock(ip, XFS_ILOCK_EXCL);
558 return error; 553 return error;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 615781bf4ee5..8d916d33d93d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -738,6 +738,53 @@ TRACE_EVENT(xfs_iomap_prealloc_size,
738 __entry->blocks, __entry->shift, __entry->writeio_blocks) 738 __entry->blocks, __entry->shift, __entry->writeio_blocks)
739) 739)
740 740
741TRACE_EVENT(xfs_irec_merge_pre,
742 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
743 uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask),
744 TP_ARGS(mp, agno, agino, holemask, nagino, nholemask),
745 TP_STRUCT__entry(
746 __field(dev_t, dev)
747 __field(xfs_agnumber_t, agno)
748 __field(xfs_agino_t, agino)
749 __field(uint16_t, holemask)
750 __field(xfs_agino_t, nagino)
751 __field(uint16_t, nholemask)
752 ),
753 TP_fast_assign(
754 __entry->dev = mp->m_super->s_dev;
755 __entry->agno = agno;
756 __entry->agino = agino;
757 __entry->holemask = holemask;
758 __entry->nagino = nagino;
759 __entry->nholemask = holemask;
760 ),
761 TP_printk("dev %d:%d agno %d inobt (%u:0x%x) new (%u:0x%x)",
762 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
763 __entry->agino, __entry->holemask, __entry->nagino,
764 __entry->nholemask)
765)
766
767TRACE_EVENT(xfs_irec_merge_post,
768 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
769 uint16_t holemask),
770 TP_ARGS(mp, agno, agino, holemask),
771 TP_STRUCT__entry(
772 __field(dev_t, dev)
773 __field(xfs_agnumber_t, agno)
774 __field(xfs_agino_t, agino)
775 __field(uint16_t, holemask)
776 ),
777 TP_fast_assign(
778 __entry->dev = mp->m_super->s_dev;
779 __entry->agno = agno;
780 __entry->agino = agino;
781 __entry->holemask = holemask;
782 ),
783 TP_printk("dev %d:%d agno %d inobt (%u:0x%x)", MAJOR(__entry->dev),
784 MINOR(__entry->dev), __entry->agno, __entry->agino,
785 __entry->holemask)
786)
787
741#define DEFINE_IREF_EVENT(name) \ 788#define DEFINE_IREF_EVENT(name) \
742DEFINE_EVENT(xfs_iref_class, name, \ 789DEFINE_EVENT(xfs_iref_class, name, \
743 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 790 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 220ef2c906b2..0582a27107d4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -113,7 +113,7 @@ xfs_trans_free(
113 * blocks. Locks and log items, however, are no inherited. They must 113 * blocks. Locks and log items, however, are no inherited. They must
114 * be added to the new transaction explicitly. 114 * be added to the new transaction explicitly.
115 */ 115 */
116xfs_trans_t * 116STATIC xfs_trans_t *
117xfs_trans_dup( 117xfs_trans_dup(
118 xfs_trans_t *tp) 118 xfs_trans_t *tp)
119{ 119{
@@ -251,14 +251,7 @@ xfs_trans_reserve(
251 */ 251 */
252undo_log: 252undo_log:
253 if (resp->tr_logres > 0) { 253 if (resp->tr_logres > 0) {
254 int log_flags; 254 xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false);
255
256 if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
257 log_flags = XFS_LOG_REL_PERM_RESERV;
258 } else {
259 log_flags = 0;
260 }
261 xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags);
262 tp->t_ticket = NULL; 255 tp->t_ticket = NULL;
263 tp->t_log_res = 0; 256 tp->t_log_res = 0;
264 tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; 257 tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
@@ -744,7 +737,7 @@ void
744xfs_trans_free_items( 737xfs_trans_free_items(
745 struct xfs_trans *tp, 738 struct xfs_trans *tp,
746 xfs_lsn_t commit_lsn, 739 xfs_lsn_t commit_lsn,
747 int flags) 740 bool abort)
748{ 741{
749 struct xfs_log_item_desc *lidp, *next; 742 struct xfs_log_item_desc *lidp, *next;
750 743
@@ -755,7 +748,7 @@ xfs_trans_free_items(
755 748
756 if (commit_lsn != NULLCOMMITLSN) 749 if (commit_lsn != NULLCOMMITLSN)
757 lip->li_ops->iop_committing(lip, commit_lsn); 750 lip->li_ops->iop_committing(lip, commit_lsn);
758 if (flags & XFS_TRANS_ABORT) 751 if (abort)
759 lip->li_flags |= XFS_LI_ABORTED; 752 lip->li_flags |= XFS_LI_ABORTED;
760 lip->li_ops->iop_unlock(lip); 753 lip->li_ops->iop_unlock(lip);
761 754
@@ -892,27 +885,17 @@ xfs_trans_committed_bulk(
892 * have already been unlocked as if the commit had succeeded. 885 * have already been unlocked as if the commit had succeeded.
893 * Do not reference the transaction structure after this call. 886 * Do not reference the transaction structure after this call.
894 */ 887 */
895int 888static int
896xfs_trans_commit( 889__xfs_trans_commit(
897 struct xfs_trans *tp, 890 struct xfs_trans *tp,
898 uint flags) 891 bool regrant)
899{ 892{
900 struct xfs_mount *mp = tp->t_mountp; 893 struct xfs_mount *mp = tp->t_mountp;
901 xfs_lsn_t commit_lsn = -1; 894 xfs_lsn_t commit_lsn = -1;
902 int error = 0; 895 int error = 0;
903 int log_flags = 0;
904 int sync = tp->t_flags & XFS_TRANS_SYNC; 896 int sync = tp->t_flags & XFS_TRANS_SYNC;
905 897
906 /* 898 /*
907 * Determine whether this commit is releasing a permanent
908 * log reservation or not.
909 */
910 if (flags & XFS_TRANS_RELEASE_LOG_RES) {
911 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
912 log_flags = XFS_LOG_REL_PERM_RESERV;
913 }
914
915 /*
916 * If there is nothing to be logged by the transaction, 899 * If there is nothing to be logged by the transaction,
917 * then unlock all of the items associated with the 900 * then unlock all of the items associated with the
918 * transaction and free the transaction structure. 901 * transaction and free the transaction structure.
@@ -936,7 +919,7 @@ xfs_trans_commit(
936 xfs_trans_apply_sb_deltas(tp); 919 xfs_trans_apply_sb_deltas(tp);
937 xfs_trans_apply_dquot_deltas(tp); 920 xfs_trans_apply_dquot_deltas(tp);
938 921
939 xfs_log_commit_cil(mp, tp, &commit_lsn, flags); 922 xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
940 923
941 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 924 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
942 xfs_trans_free(tp); 925 xfs_trans_free(tp);
@@ -964,18 +947,25 @@ out_unreserve:
964 */ 947 */
965 xfs_trans_unreserve_and_mod_dquots(tp); 948 xfs_trans_unreserve_and_mod_dquots(tp);
966 if (tp->t_ticket) { 949 if (tp->t_ticket) {
967 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 950 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant);
968 if (commit_lsn == -1 && !error) 951 if (commit_lsn == -1 && !error)
969 error = -EIO; 952 error = -EIO;
970 } 953 }
971 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 954 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
972 xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); 955 xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
973 xfs_trans_free(tp); 956 xfs_trans_free(tp);
974 957
975 XFS_STATS_INC(xs_trans_empty); 958 XFS_STATS_INC(xs_trans_empty);
976 return error; 959 return error;
977} 960}
978 961
962int
963xfs_trans_commit(
964 struct xfs_trans *tp)
965{
966 return __xfs_trans_commit(tp, false);
967}
968
979/* 969/*
980 * Unlock all of the transaction's items and free the transaction. 970 * Unlock all of the transaction's items and free the transaction.
981 * The transaction must not have modified any of its items, because 971 * The transaction must not have modified any of its items, because
@@ -986,29 +976,22 @@ out_unreserve:
986 */ 976 */
987void 977void
988xfs_trans_cancel( 978xfs_trans_cancel(
989 xfs_trans_t *tp, 979 struct xfs_trans *tp)
990 int flags)
991{ 980{
992 int log_flags; 981 struct xfs_mount *mp = tp->t_mountp;
993 xfs_mount_t *mp = tp->t_mountp; 982 bool dirty = (tp->t_flags & XFS_TRANS_DIRTY);
994 983
995 /* 984 /*
996 * See if the caller is being too lazy to figure out if
997 * the transaction really needs an abort.
998 */
999 if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY))
1000 flags &= ~XFS_TRANS_ABORT;
1001 /*
1002 * See if the caller is relying on us to shut down the 985 * See if the caller is relying on us to shut down the
1003 * filesystem. This happens in paths where we detect 986 * filesystem. This happens in paths where we detect
1004 * corruption and decide to give up. 987 * corruption and decide to give up.
1005 */ 988 */
1006 if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { 989 if (dirty && !XFS_FORCED_SHUTDOWN(mp)) {
1007 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); 990 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
1008 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 991 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1009 } 992 }
1010#ifdef DEBUG 993#ifdef DEBUG
1011 if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) { 994 if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
1012 struct xfs_log_item_desc *lidp; 995 struct xfs_log_item_desc *lidp;
1013 996
1014 list_for_each_entry(lidp, &tp->t_items, lid_trans) 997 list_for_each_entry(lidp, &tp->t_items, lid_trans)
@@ -1018,27 +1001,20 @@ xfs_trans_cancel(
1018 xfs_trans_unreserve_and_mod_sb(tp); 1001 xfs_trans_unreserve_and_mod_sb(tp);
1019 xfs_trans_unreserve_and_mod_dquots(tp); 1002 xfs_trans_unreserve_and_mod_dquots(tp);
1020 1003
1021 if (tp->t_ticket) { 1004 if (tp->t_ticket)
1022 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 1005 xfs_log_done(mp, tp->t_ticket, NULL, false);
1023 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1024 log_flags = XFS_LOG_REL_PERM_RESERV;
1025 } else {
1026 log_flags = 0;
1027 }
1028 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
1029 }
1030 1006
1031 /* mark this thread as no longer being in a transaction */ 1007 /* mark this thread as no longer being in a transaction */
1032 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1008 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1033 1009
1034 xfs_trans_free_items(tp, NULLCOMMITLSN, flags); 1010 xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
1035 xfs_trans_free(tp); 1011 xfs_trans_free(tp);
1036} 1012}
1037 1013
1038/* 1014/*
1039 * Roll from one trans in the sequence of PERMANENT transactions to 1015 * Roll from one trans in the sequence of PERMANENT transactions to
1040 * the next: permanent transactions are only flushed out when 1016 * the next: permanent transactions are only flushed out when
1041 * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon 1017 * committed with xfs_trans_commit(), but we still want as soon
1042 * as possible to let chunks of it go to the log. So we commit the 1018 * as possible to let chunks of it go to the log. So we commit the
1043 * chunk we've been working on and get a new transaction to continue. 1019 * chunk we've been working on and get a new transaction to continue.
1044 */ 1020 */
@@ -1055,7 +1031,8 @@ xfs_trans_roll(
1055 * Ensure that the inode is always logged. 1031 * Ensure that the inode is always logged.
1056 */ 1032 */
1057 trans = *tpp; 1033 trans = *tpp;
1058 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); 1034 if (dp)
1035 xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
1059 1036
1060 /* 1037 /*
1061 * Copy the critical parameters from one trans to the next. 1038 * Copy the critical parameters from one trans to the next.
@@ -1071,20 +1048,13 @@ xfs_trans_roll(
1071 * is in progress. The caller takes the responsibility to cancel 1048 * is in progress. The caller takes the responsibility to cancel
1072 * the duplicate transaction that gets returned. 1049 * the duplicate transaction that gets returned.
1073 */ 1050 */
1074 error = xfs_trans_commit(trans, 0); 1051 error = __xfs_trans_commit(trans, true);
1075 if (error) 1052 if (error)
1076 return error; 1053 return error;
1077 1054
1078 trans = *tpp; 1055 trans = *tpp;
1079 1056
1080 /* 1057 /*
1081 * transaction commit worked ok so we can drop the extra ticket
1082 * reference that we gained in xfs_trans_dup()
1083 */
1084 xfs_log_ticket_put(trans->t_ticket);
1085
1086
1087 /*
1088 * Reserve space in the log for th next transaction. 1058 * Reserve space in the log for th next transaction.
1089 * This also pushes items in the "AIL", the list of logged items, 1059 * This also pushes items in the "AIL", the list of logged items,
1090 * out to disk if they are taking up space at the tail of the log 1060 * out to disk if they are taking up space at the tail of the log
@@ -1100,6 +1070,7 @@ xfs_trans_roll(
1100 if (error) 1070 if (error)
1101 return error; 1071 return error;
1102 1072
1103 xfs_trans_ijoin(trans, dp, 0); 1073 if (dp)
1074 xfs_trans_ijoin(trans, dp, 0);
1104 return 0; 1075 return 0;
1105} 1076}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index b5bc1ab3c4da..3b21b4e5e467 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -133,8 +133,6 @@ typedef struct xfs_trans {
133 * XFS transaction mechanism exported interfaces that are 133 * XFS transaction mechanism exported interfaces that are
134 * actually macros. 134 * actually macros.
135 */ 135 */
136#define xfs_trans_get_log_res(tp) ((tp)->t_log_res)
137#define xfs_trans_get_log_count(tp) ((tp)->t_log_count)
138#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res) 136#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res)
139#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC) 137#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
140 138
@@ -153,7 +151,6 @@ typedef struct xfs_trans {
153 */ 151 */
154xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); 152xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
155xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); 153xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
156xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
157int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *, 154int xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
158 uint, uint); 155 uint, uint);
159void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t); 156void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
@@ -228,9 +225,9 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
228 struct xfs_efd_log_item *, 225 struct xfs_efd_log_item *,
229 xfs_fsblock_t, 226 xfs_fsblock_t,
230 xfs_extlen_t); 227 xfs_extlen_t);
231int xfs_trans_commit(xfs_trans_t *, uint flags); 228int xfs_trans_commit(struct xfs_trans *);
232int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); 229int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
233void xfs_trans_cancel(xfs_trans_t *, int); 230void xfs_trans_cancel(xfs_trans_t *);
234int xfs_trans_ail_init(struct xfs_mount *); 231int xfs_trans_ail_init(struct xfs_mount *);
235void xfs_trans_ail_destroy(struct xfs_mount *); 232void xfs_trans_ail_destroy(struct xfs_mount *);
236 233
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 573aefb5a573..1098cf490189 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -159,7 +159,7 @@ xfs_trans_ail_cursor_next(
159{ 159{
160 struct xfs_log_item *lip = cur->item; 160 struct xfs_log_item *lip = cur->item;
161 161
162 if ((__psint_t)lip & 1) 162 if ((uintptr_t)lip & 1)
163 lip = xfs_ail_min(ailp); 163 lip = xfs_ail_min(ailp);
164 if (lip) 164 if (lip)
165 cur->item = xfs_ail_next(ailp, lip); 165 cur->item = xfs_ail_next(ailp, lip);
@@ -196,7 +196,7 @@ xfs_trans_ail_cursor_clear(
196 list_for_each_entry(cur, &ailp->xa_cursors, list) { 196 list_for_each_entry(cur, &ailp->xa_cursors, list) {
197 if (cur->item == lip) 197 if (cur->item == lip)
198 cur->item = (struct xfs_log_item *) 198 cur->item = (struct xfs_log_item *)
199 ((__psint_t)cur->item | 1); 199 ((uintptr_t)cur->item | 1);
200 } 200 }
201} 201}
202 202
@@ -287,7 +287,7 @@ xfs_ail_splice(
287 * find the place in the AIL where the items belong. 287 * find the place in the AIL where the items belong.
288 */ 288 */
289 lip = cur ? cur->item : NULL; 289 lip = cur ? cur->item : NULL;
290 if (!lip || (__psint_t) lip & 1) 290 if (!lip || (uintptr_t)lip & 1)
291 lip = __xfs_trans_ail_cursor_last(ailp, lsn); 291 lip = __xfs_trans_ail_cursor_last(ailp, lsn);
292 292
293 /* 293 /*
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 76a16df55ef7..ce78534a047e 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -90,8 +90,9 @@ xfs_trans_dup_dqinfo(
90 xfs_trans_t *ntp) 90 xfs_trans_t *ntp)
91{ 91{
92 xfs_dqtrx_t *oq, *nq; 92 xfs_dqtrx_t *oq, *nq;
93 int i,j; 93 int i, j;
94 xfs_dqtrx_t *oqa, *nqa; 94 xfs_dqtrx_t *oqa, *nqa;
95 ulong blk_res_used;
95 96
96 if (!otp->t_dqinfo) 97 if (!otp->t_dqinfo)
97 return; 98 return;
@@ -102,18 +103,23 @@ xfs_trans_dup_dqinfo(
102 * Because the quota blk reservation is carried forward, 103 * Because the quota blk reservation is carried forward,
103 * it is also necessary to carry forward the DQ_DIRTY flag. 104 * it is also necessary to carry forward the DQ_DIRTY flag.
104 */ 105 */
105 if(otp->t_flags & XFS_TRANS_DQ_DIRTY) 106 if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
106 ntp->t_flags |= XFS_TRANS_DQ_DIRTY; 107 ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
107 108
108 for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { 109 for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
109 oqa = otp->t_dqinfo->dqs[j]; 110 oqa = otp->t_dqinfo->dqs[j];
110 nqa = ntp->t_dqinfo->dqs[j]; 111 nqa = ntp->t_dqinfo->dqs[j];
111 for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { 112 for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
113 blk_res_used = 0;
114
112 if (oqa[i].qt_dquot == NULL) 115 if (oqa[i].qt_dquot == NULL)
113 break; 116 break;
114 oq = &oqa[i]; 117 oq = &oqa[i];
115 nq = &nqa[i]; 118 nq = &nqa[i];
116 119
120 if (oq->qt_blk_res && oq->qt_bcount_delta > 0)
121 blk_res_used = oq->qt_bcount_delta;
122
117 nq->qt_dquot = oq->qt_dquot; 123 nq->qt_dquot = oq->qt_dquot;
118 nq->qt_bcount_delta = nq->qt_icount_delta = 0; 124 nq->qt_bcount_delta = nq->qt_icount_delta = 0;
119 nq->qt_rtbcount_delta = 0; 125 nq->qt_rtbcount_delta = 0;
@@ -121,8 +127,8 @@ xfs_trans_dup_dqinfo(
121 /* 127 /*
122 * Transfer whatever is left of the reservations. 128 * Transfer whatever is left of the reservations.
123 */ 129 */
124 nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; 130 nq->qt_blk_res = oq->qt_blk_res - blk_res_used;
125 oq->qt_blk_res = oq->qt_blk_res_used; 131 oq->qt_blk_res = blk_res_used;
126 132
127 nq->qt_rtblk_res = oq->qt_rtblk_res - 133 nq->qt_rtblk_res = oq->qt_rtblk_res -
128 oq->qt_rtblk_res_used; 134 oq->qt_rtblk_res_used;
@@ -239,10 +245,6 @@ xfs_trans_mod_dquot(
239 * disk blocks used. 245 * disk blocks used.
240 */ 246 */
241 case XFS_TRANS_DQ_BCOUNT: 247 case XFS_TRANS_DQ_BCOUNT:
242 if (qtrx->qt_blk_res && delta > 0) {
243 qtrx->qt_blk_res_used += (ulong)delta;
244 ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
245 }
246 qtrx->qt_bcount_delta += delta; 248 qtrx->qt_bcount_delta += delta;
247 break; 249 break;
248 250
@@ -423,15 +425,19 @@ xfs_trans_apply_dquot_deltas(
423 * reservation that a transaction structure knows of. 425 * reservation that a transaction structure knows of.
424 */ 426 */
425 if (qtrx->qt_blk_res != 0) { 427 if (qtrx->qt_blk_res != 0) {
426 if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { 428 ulong blk_res_used = 0;
427 if (qtrx->qt_blk_res > 429
428 qtrx->qt_blk_res_used) 430 if (qtrx->qt_bcount_delta > 0)
431 blk_res_used = qtrx->qt_bcount_delta;
432
433 if (qtrx->qt_blk_res != blk_res_used) {
434 if (qtrx->qt_blk_res > blk_res_used)
429 dqp->q_res_bcount -= (xfs_qcnt_t) 435 dqp->q_res_bcount -= (xfs_qcnt_t)
430 (qtrx->qt_blk_res - 436 (qtrx->qt_blk_res -
431 qtrx->qt_blk_res_used); 437 blk_res_used);
432 else 438 else
433 dqp->q_res_bcount -= (xfs_qcnt_t) 439 dqp->q_res_bcount -= (xfs_qcnt_t)
434 (qtrx->qt_blk_res_used - 440 (blk_res_used -
435 qtrx->qt_blk_res); 441 qtrx->qt_blk_res);
436 } 442 }
437 } else { 443 } else {
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index bd1281862ad7..1b736294558a 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -30,7 +30,7 @@ void xfs_trans_init(struct xfs_mount *);
30void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); 30void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
31void xfs_trans_del_item(struct xfs_log_item *); 31void xfs_trans_del_item(struct xfs_log_item *);
32void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, 32void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
33 int flags); 33 bool abort);
34void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 34void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
35 35
36void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, 36void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e351da4a934f..3f1a84635da8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -70,6 +70,7 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
70 struct buffer_head *bh_result, int create); 70 struct buffer_head *bh_result, int create);
71typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, 71typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
72 ssize_t bytes, void *private); 72 ssize_t bytes, void *private);
73typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
73 74
74#define MAY_EXEC 0x00000001 75#define MAY_EXEC 0x00000001
75#define MAY_WRITE 0x00000002 76#define MAY_WRITE 0x00000002
@@ -2655,9 +2656,13 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
2655int dax_clear_blocks(struct inode *, sector_t block, long size); 2656int dax_clear_blocks(struct inode *, sector_t block, long size);
2656int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); 2657int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
2657int dax_truncate_page(struct inode *, loff_t from, get_block_t); 2658int dax_truncate_page(struct inode *, loff_t from, get_block_t);
2658int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); 2659int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
2660 dax_iodone_t);
2661int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
2662 dax_iodone_t);
2659int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); 2663int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
2660#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) 2664#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod)
2665#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod)
2661 2666
2662#ifdef CONFIG_BLOCK 2667#ifdef CONFIG_BLOCK
2663typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, 2668typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,