diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-30 23:36:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-30 23:36:49 -0400 |
commit | 8400935737bf02d97da281bdcd139a421624b6ba (patch) | |
tree | 52f36500bcb717241bbe36ad0522e73bf0417632 | |
parent | dbe08116b87cdc2217f11a78b5b70e29068b7efd (diff) | |
parent | df150ed102baa0e78c06e08e975dfb47147dd677 (diff) |
Merge tag 'xfs-for-linus-4.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner:
"There are a couple of recently found, long standing remote attribute
corruption fixes caused by log recovery getting confused after a
crash, and the new DAX code in XFS (merged in 4.2-rc1) needs to
actually use the DAX fault path on read faults.
Summary:
- remote attribute log recovery corruption fixes
- DAX page faults need to use direct mappings, not a page cache
mapping"
* tag 'xfs-for-linus-4.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
xfs: remote attributes need to be considered data
xfs: remote attribute headers contain an invalid LSN
xfs: call dax_fault on read page faults for DAX
-rw-r--r-- | fs/dax.c | 14 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_remote.c | 44 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 21 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 11 |
4 files changed, 69 insertions, 21 deletions
@@ -319,6 +319,12 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, | |||
319 | * @vma: The virtual memory area where the fault occurred | 319 | * @vma: The virtual memory area where the fault occurred |
320 | * @vmf: The description of the fault | 320 | * @vmf: The description of the fault |
321 | * @get_block: The filesystem method used to translate file offsets to blocks | 321 | * @get_block: The filesystem method used to translate file offsets to blocks |
322 | * @complete_unwritten: The filesystem method used to convert unwritten blocks | ||
323 | * to written so the data written to them is exposed. This is required for | ||
324 | * required by write faults for filesystems that will return unwritten | ||
325 | * extent mappings from @get_block, but it is optional for reads as | ||
326 | * dax_insert_mapping() will always zero unwritten blocks. If the fs does | ||
327 | * not support unwritten extents, the it should pass NULL. | ||
322 | * | 328 | * |
323 | * When a page fault occurs, filesystems may call this helper in their | 329 | * When a page fault occurs, filesystems may call this helper in their |
324 | * fault handler for DAX files. __dax_fault() assumes the caller has done all | 330 | * fault handler for DAX files. __dax_fault() assumes the caller has done all |
@@ -437,8 +443,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
437 | * as for normal BH based IO completions. | 443 | * as for normal BH based IO completions. |
438 | */ | 444 | */ |
439 | error = dax_insert_mapping(inode, &bh, vma, vmf); | 445 | error = dax_insert_mapping(inode, &bh, vma, vmf); |
440 | if (buffer_unwritten(&bh)) | 446 | if (buffer_unwritten(&bh)) { |
441 | complete_unwritten(&bh, !error); | 447 | if (complete_unwritten) |
448 | complete_unwritten(&bh, !error); | ||
449 | else | ||
450 | WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); | ||
451 | } | ||
442 | 452 | ||
443 | out: | 453 | out: |
444 | if (error == -ENOMEM) | 454 | if (error == -ENOMEM) |
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 20de88d1bf86..dd714037c322 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c | |||
@@ -159,11 +159,10 @@ xfs_attr3_rmt_write_verify( | |||
159 | struct xfs_buf *bp) | 159 | struct xfs_buf *bp) |
160 | { | 160 | { |
161 | struct xfs_mount *mp = bp->b_target->bt_mount; | 161 | struct xfs_mount *mp = bp->b_target->bt_mount; |
162 | struct xfs_buf_log_item *bip = bp->b_fspriv; | 162 | int blksize = mp->m_attr_geo->blksize; |
163 | char *ptr; | 163 | char *ptr; |
164 | int len; | 164 | int len; |
165 | xfs_daddr_t bno; | 165 | xfs_daddr_t bno; |
166 | int blksize = mp->m_attr_geo->blksize; | ||
167 | 166 | ||
168 | /* no verification of non-crc buffers */ | 167 | /* no verification of non-crc buffers */ |
169 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 168 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
@@ -175,16 +174,22 @@ xfs_attr3_rmt_write_verify( | |||
175 | ASSERT(len >= blksize); | 174 | ASSERT(len >= blksize); |
176 | 175 | ||
177 | while (len > 0) { | 176 | while (len > 0) { |
177 | struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; | ||
178 | |||
178 | if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { | 179 | if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { |
179 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | 180 | xfs_buf_ioerror(bp, -EFSCORRUPTED); |
180 | xfs_verifier_error(bp); | 181 | xfs_verifier_error(bp); |
181 | return; | 182 | return; |
182 | } | 183 | } |
183 | if (bip) { | ||
184 | struct xfs_attr3_rmt_hdr *rmt; | ||
185 | 184 | ||
186 | rmt = (struct xfs_attr3_rmt_hdr *)ptr; | 185 | /* |
187 | rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 186 | * Ensure we aren't writing bogus LSNs to disk. See |
187 | * xfs_attr3_rmt_hdr_set() for the explanation. | ||
188 | */ | ||
189 | if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { | ||
190 | xfs_buf_ioerror(bp, -EFSCORRUPTED); | ||
191 | xfs_verifier_error(bp); | ||
192 | return; | ||
188 | } | 193 | } |
189 | xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); | 194 | xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); |
190 | 195 | ||
@@ -221,6 +226,18 @@ xfs_attr3_rmt_hdr_set( | |||
221 | rmt->rm_owner = cpu_to_be64(ino); | 226 | rmt->rm_owner = cpu_to_be64(ino); |
222 | rmt->rm_blkno = cpu_to_be64(bno); | 227 | rmt->rm_blkno = cpu_to_be64(bno); |
223 | 228 | ||
229 | /* | ||
230 | * Remote attribute blocks are written synchronously, so we don't | ||
231 | * have an LSN that we can stamp in them that makes any sense to log | ||
232 | * recovery. To ensure that log recovery handles overwrites of these | ||
233 | * blocks sanely (i.e. once they've been freed and reallocated as some | ||
234 | * other type of metadata) we need to ensure that the LSN has a value | ||
235 | * that tells log recovery to ignore the LSN and overwrite the buffer | ||
236 | * with whatever is in it's log. To do this, we use the magic | ||
237 | * NULLCOMMITLSN to indicate that the LSN is invalid. | ||
238 | */ | ||
239 | rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); | ||
240 | |||
224 | return sizeof(struct xfs_attr3_rmt_hdr); | 241 | return sizeof(struct xfs_attr3_rmt_hdr); |
225 | } | 242 | } |
226 | 243 | ||
@@ -434,14 +451,21 @@ xfs_attr_rmtval_set( | |||
434 | 451 | ||
435 | /* | 452 | /* |
436 | * Allocate a single extent, up to the size of the value. | 453 | * Allocate a single extent, up to the size of the value. |
454 | * | ||
455 | * Note that we have to consider this a data allocation as we | ||
456 | * write the remote attribute without logging the contents. | ||
457 | * Hence we must ensure that we aren't using blocks that are on | ||
458 | * the busy list so that we don't overwrite blocks which have | ||
459 | * recently been freed but their transactions are not yet | ||
460 | * committed to disk. If we overwrite the contents of a busy | ||
461 | * extent and then crash then the block may not contain the | ||
462 | * correct metadata after log recovery occurs. | ||
437 | */ | 463 | */ |
438 | xfs_bmap_init(args->flist, args->firstblock); | 464 | xfs_bmap_init(args->flist, args->firstblock); |
439 | nmap = 1; | 465 | nmap = 1; |
440 | error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, | 466 | error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, |
441 | blkcnt, | 467 | blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, |
442 | XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, | 468 | args->total, &map, &nmap, args->flist); |
443 | args->firstblock, args->total, &map, &nmap, | ||
444 | args->flist); | ||
445 | if (!error) { | 469 | if (!error) { |
446 | error = xfs_bmap_finish(&args->trans, args->flist, | 470 | error = xfs_bmap_finish(&args->trans, args->flist, |
447 | &committed); | 471 | &committed); |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f0e8249722d4..db4acc1c3e73 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -1514,18 +1514,27 @@ xfs_filemap_fault( | |||
1514 | struct vm_area_struct *vma, | 1514 | struct vm_area_struct *vma, |
1515 | struct vm_fault *vmf) | 1515 | struct vm_fault *vmf) |
1516 | { | 1516 | { |
1517 | struct xfs_inode *ip = XFS_I(file_inode(vma->vm_file)); | 1517 | struct inode *inode = file_inode(vma->vm_file); |
1518 | int ret; | 1518 | int ret; |
1519 | 1519 | ||
1520 | trace_xfs_filemap_fault(ip); | 1520 | trace_xfs_filemap_fault(XFS_I(inode)); |
1521 | 1521 | ||
1522 | /* DAX can shortcut the normal fault path on write faults! */ | 1522 | /* DAX can shortcut the normal fault path on write faults! */ |
1523 | if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(VFS_I(ip))) | 1523 | if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) |
1524 | return xfs_filemap_page_mkwrite(vma, vmf); | 1524 | return xfs_filemap_page_mkwrite(vma, vmf); |
1525 | 1525 | ||
1526 | xfs_ilock(ip, XFS_MMAPLOCK_SHARED); | 1526 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1527 | ret = filemap_fault(vma, vmf); | 1527 | if (IS_DAX(inode)) { |
1528 | xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); | 1528 | /* |
1529 | * we do not want to trigger unwritten extent conversion on read | ||
1530 | * faults - that is unnecessary overhead and would also require | ||
1531 | * changes to xfs_get_blocks_direct() to map unwritten extent | ||
1532 | * ioend for conversion on read-only mappings. | ||
1533 | */ | ||
1534 | ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); | ||
1535 | } else | ||
1536 | ret = filemap_fault(vma, vmf); | ||
1537 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | ||
1529 | 1538 | ||
1530 | return ret; | 1539 | return ret; |
1531 | } | 1540 | } |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 01dd228ca05e..480ebba8464f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -1886,9 +1886,14 @@ xlog_recover_get_buf_lsn( | |||
1886 | uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; | 1886 | uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; |
1887 | break; | 1887 | break; |
1888 | case XFS_ATTR3_RMT_MAGIC: | 1888 | case XFS_ATTR3_RMT_MAGIC: |
1889 | lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); | 1889 | /* |
1890 | uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; | 1890 | * Remote attr blocks are written synchronously, rather than |
1891 | break; | 1891 | * being logged. That means they do not contain a valid LSN |
1892 | * (i.e. transactionally ordered) in them, and hence any time we | ||
1893 | * see a buffer to replay over the top of a remote attribute | ||
1894 | * block we should simply do so. | ||
1895 | */ | ||
1896 | goto recover_immediately; | ||
1892 | case XFS_SB_MAGIC: | 1897 | case XFS_SB_MAGIC: |
1893 | lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); | 1898 | lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); |
1894 | uuid = &((struct xfs_dsb *)blk)->sb_uuid; | 1899 | uuid = &((struct xfs_dsb *)blk)->sb_uuid; |