aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-30 23:36:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-30 23:36:49 -0400
commit8400935737bf02d97da281bdcd139a421624b6ba (patch)
tree52f36500bcb717241bbe36ad0522e73bf0417632
parentdbe08116b87cdc2217f11a78b5b70e29068b7efd (diff)
parentdf150ed102baa0e78c06e08e975dfb47147dd677 (diff)
Merge tag 'xfs-for-linus-4.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner: "There are a couple of recently found, long standing remote attribute corruption fixes caused by log recovery getting confused after a crash, and the new DAX code in XFS (merged in 4.2-rc1) needs to actually use the DAX fault path on read faults. Summary: - remote attribute log recovery corruption fixes - DAX page faults need to use direct mappings, not a page cache mapping" * tag 'xfs-for-linus-4.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: remote attributes need to be considered data xfs: remote attribute headers contain an invalid LSN xfs: call dax_fault on read page faults for DAX
-rw-r--r--fs/dax.c14
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c44
-rw-r--r--fs/xfs/xfs_file.c21
-rw-r--r--fs/xfs/xfs_log_recover.c11
4 files changed, 69 insertions, 21 deletions
diff --git a/fs/dax.c b/fs/dax.c
index c3e21ccfc358..a7f77e1fa18c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
319 * @vma: The virtual memory area where the fault occurred 319 * @vma: The virtual memory area where the fault occurred
320 * @vmf: The description of the fault 320 * @vmf: The description of the fault
321 * @get_block: The filesystem method used to translate file offsets to blocks 321 * @get_block: The filesystem method used to translate file offsets to blocks
322 * @complete_unwritten: The filesystem method used to convert unwritten blocks
323 * to written so the data written to them is exposed. This is required for
324 * required by write faults for filesystems that will return unwritten
325 * extent mappings from @get_block, but it is optional for reads as
326 * dax_insert_mapping() will always zero unwritten blocks. If the fs does
327 * not support unwritten extents, the it should pass NULL.
322 * 328 *
323 * When a page fault occurs, filesystems may call this helper in their 329 * When a page fault occurs, filesystems may call this helper in their
324 * fault handler for DAX files. __dax_fault() assumes the caller has done all 330 * fault handler for DAX files. __dax_fault() assumes the caller has done all
@@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
437 * as for normal BH based IO completions. 443 * as for normal BH based IO completions.
438 */ 444 */
439 error = dax_insert_mapping(inode, &bh, vma, vmf); 445 error = dax_insert_mapping(inode, &bh, vma, vmf);
440 if (buffer_unwritten(&bh)) 446 if (buffer_unwritten(&bh)) {
441 complete_unwritten(&bh, !error); 447 if (complete_unwritten)
448 complete_unwritten(&bh, !error);
449 else
450 WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
451 }
442 452
443 out: 453 out:
444 if (error == -ENOMEM) 454 if (error == -ENOMEM)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 20de88d1bf86..dd714037c322 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify(
159 struct xfs_buf *bp) 159 struct xfs_buf *bp)
160{ 160{
161 struct xfs_mount *mp = bp->b_target->bt_mount; 161 struct xfs_mount *mp = bp->b_target->bt_mount;
162 struct xfs_buf_log_item *bip = bp->b_fspriv; 162 int blksize = mp->m_attr_geo->blksize;
163 char *ptr; 163 char *ptr;
164 int len; 164 int len;
165 xfs_daddr_t bno; 165 xfs_daddr_t bno;
166 int blksize = mp->m_attr_geo->blksize;
167 166
168 /* no verification of non-crc buffers */ 167 /* no verification of non-crc buffers */
169 if (!xfs_sb_version_hascrc(&mp->m_sb)) 168 if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify(
175 ASSERT(len >= blksize); 174 ASSERT(len >= blksize);
176 175
177 while (len > 0) { 176 while (len > 0) {
177 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
178
178 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { 179 if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
179 xfs_buf_ioerror(bp, -EFSCORRUPTED); 180 xfs_buf_ioerror(bp, -EFSCORRUPTED);
180 xfs_verifier_error(bp); 181 xfs_verifier_error(bp);
181 return; 182 return;
182 } 183 }
183 if (bip) {
184 struct xfs_attr3_rmt_hdr *rmt;
185 184
186 rmt = (struct xfs_attr3_rmt_hdr *)ptr; 185 /*
187 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); 186 * Ensure we aren't writing bogus LSNs to disk. See
187 * xfs_attr3_rmt_hdr_set() for the explanation.
188 */
189 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
190 xfs_buf_ioerror(bp, -EFSCORRUPTED);
191 xfs_verifier_error(bp);
192 return;
188 } 193 }
189 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 194 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);
190 195
@@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set(
221 rmt->rm_owner = cpu_to_be64(ino); 226 rmt->rm_owner = cpu_to_be64(ino);
222 rmt->rm_blkno = cpu_to_be64(bno); 227 rmt->rm_blkno = cpu_to_be64(bno);
223 228
229 /*
230 * Remote attribute blocks are written synchronously, so we don't
231 * have an LSN that we can stamp in them that makes any sense to log
232 * recovery. To ensure that log recovery handles overwrites of these
233 * blocks sanely (i.e. once they've been freed and reallocated as some
234 * other type of metadata) we need to ensure that the LSN has a value
235 * that tells log recovery to ignore the LSN and overwrite the buffer
236 * with whatever is in it's log. To do this, we use the magic
237 * NULLCOMMITLSN to indicate that the LSN is invalid.
238 */
239 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN);
240
224 return sizeof(struct xfs_attr3_rmt_hdr); 241 return sizeof(struct xfs_attr3_rmt_hdr);
225} 242}
226 243
@@ -434,14 +451,21 @@ xfs_attr_rmtval_set(
434 451
435 /* 452 /*
436 * Allocate a single extent, up to the size of the value. 453 * Allocate a single extent, up to the size of the value.
454 *
455 * Note that we have to consider this a data allocation as we
456 * write the remote attribute without logging the contents.
457 * Hence we must ensure that we aren't using blocks that are on
458 * the busy list so that we don't overwrite blocks which have
459 * recently been freed but their transactions are not yet
460 * committed to disk. If we overwrite the contents of a busy
461 * extent and then crash then the block may not contain the
462 * correct metadata after log recovery occurs.
437 */ 463 */
438 xfs_bmap_init(args->flist, args->firstblock); 464 xfs_bmap_init(args->flist, args->firstblock);
439 nmap = 1; 465 nmap = 1;
440 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 466 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
441 blkcnt, 467 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
442 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 468 args->total, &map, &nmap, args->flist);
443 args->firstblock, args->total, &map, &nmap,
444 args->flist);
445 if (!error) { 469 if (!error) {
446 error = xfs_bmap_finish(&args->trans, args->flist, 470 error = xfs_bmap_finish(&args->trans, args->flist,
447 &committed); 471 &committed);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f0e8249722d4..db4acc1c3e73 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1514,18 +1514,27 @@ xfs_filemap_fault(
1514 struct vm_area_struct *vma, 1514 struct vm_area_struct *vma,
1515 struct vm_fault *vmf) 1515 struct vm_fault *vmf)
1516{ 1516{
1517 struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file)); 1517 struct inode *inode = file_inode(vma->vm_file);
1518 int ret; 1518 int ret;
1519 1519
1520 trace_xfs_filemap_fault(ip); 1520 trace_xfs_filemap_fault(XFS_I(inode));
1521 1521
1522 /* DAX can shortcut the normal fault path on write faults! */ 1522 /* DAX can shortcut the normal fault path on write faults! */
1523 if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip))) 1523 if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode))
1524 return xfs_filemap_page_mkwrite(vma, vmf); 1524 return xfs_filemap_page_mkwrite(vma, vmf);
1525 1525
1526 xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 1526 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1527 ret = filemap_fault(vma, vmf); 1527 if (IS_DAX(inode)) {
1528 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1528 /*
1529 * we do not want to trigger unwritten extent conversion on read
1530 * faults - that is unnecessary overhead and would also require
1531 * changes to xfs_get_blocks_direct() to map unwritten extent
1532 * ioend for conversion on read-only mappings.
1533 */
1534 ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL);
1535 } else
1536 ret = filemap_fault(vma, vmf);
1537 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1529 1538
1530 return ret; 1539 return ret;
1531} 1540}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 01dd228ca05e..480ebba8464f 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn(
1886 uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; 1886 uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid;
1887 break; 1887 break;
1888 case XFS_ATTR3_RMT_MAGIC: 1888 case XFS_ATTR3_RMT_MAGIC:
1889 lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); 1889 /*
1890 uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; 1890 * Remote attr blocks are written synchronously, rather than
1891 break; 1891 * being logged. That means they do not contain a valid LSN
1892 * (i.e. transactionally ordered) in them, and hence any time we
1893 * see a buffer to replay over the top of a remote attribute
1894 * block we should simply do so.
1895 */
1896 goto recover_immediately;
1892 case XFS_SB_MAGIC: 1897 case XFS_SB_MAGIC:
1893 lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); 1898 lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
1894 uuid = &((struct xfs_dsb *)blk)->sb_uuid; 1899 uuid = &((struct xfs_dsb *)blk)->sb_uuid;