From ecd50729f772f0b982ddbb76c16ee4bc8f495e17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:24:37 +1000 Subject: iomap: add IOMAP_F_NEW flag Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f96c8ffce5f4..ac677d032e3a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1000,6 +1000,7 @@ xfs_file_iomap_begin( if (error) return error; + iomap->flags = IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); } else { ASSERT(nimaps); -- cgit v1.2.2 From 17879e8f865f4ed8b6f9041a2687ad40f13ae460 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:24:50 +1000 Subject: xfs: fix locking for DAX writes So far DAX writes inherited the locking from direct I/O writes, but the direct I/O model of using shared locks for writes is actually wrong for DAX. For direct I/O we're out of any standards and don't have to provide the Posix required exclusion between writers, but for DAX which gets transparently enable on applications without any knowledge of it we can't simply drop the requirement. Even worse this only happens for aligned writes and thus doesn't show up for many typical use cases. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e612a0233710..62649ccdbb4d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -714,24 +714,11 @@ xfs_file_dax_write( struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - int unaligned_io = 0; - int iolock; + int iolock = XFS_IOLOCK_EXCL; struct iov_iter data; - /* "unaligned" here means not aligned to a filesystem block */ - if ((iocb->ki_pos & mp->m_blockmask) || - ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { - unaligned_io = 1; - iolock = XFS_IOLOCK_EXCL; - } else if (mapping->nrpages) { - iolock = XFS_IOLOCK_EXCL; - } else { - iolock = XFS_IOLOCK_SHARED; - } xfs_rw_ilock(ip, iolock); - ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; @@ -758,11 +745,6 @@ xfs_file_dax_write( WARN_ON_ONCE(ret); } - if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - iolock = XFS_IOLOCK_SHARED; - } - trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); data = *from; -- cgit v1.2.2 From 66642c5c1dea411dd2842159f9f297ce8e914994 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:26:39 +1000 Subject: xfs: take the ilock shared if possible in xfs_file_iomap_begin We always just read the extent first, and will later lock exlusively after first dropping the lock in case we actually allocate blocks. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ac677d032e3a..fe4a26d752d0 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -954,6 +954,7 @@ xfs_file_iomap_begin( struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; int nimaps = 1, error = 0; + unsigned lockmode; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; @@ -963,7 +964,7 @@ xfs_file_iomap_begin( iomap); } - xfs_ilock(ip, XFS_ILOCK_EXCL); + lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) @@ -974,7 +975,7 @@ xfs_file_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, XFS_BMAPI_ENTIRE); if (error) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); return error; } @@ -994,7 +995,8 @@ xfs_file_iomap_begin( * xfs_iomap_write_direct() expects the shared lock. It * is unlocked on return. */ - xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + if (lockmode == XFS_ILOCK_EXCL) + xfs_ilock_demote(ip, lockmode); error = xfs_iomap_write_direct(ip, offset, length, &imap, nimaps); if (error) @@ -1005,7 +1007,7 @@ xfs_file_iomap_begin( } else { ASSERT(nimaps); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, 0, &imap); } -- cgit v1.2.2 From e372843a407ddff1e4c4acc7cdf3df9987bf48cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:26:41 +1000 Subject: xfs: refactor xfs_setfilesize Rename the current function to __xfs_setfilesize and add a non-static wrapper that also takes care of creating the transaction. This new helper will be used by the new iomap-based DAX path. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_aops.c | 31 +++++++++++++++++++++---------- fs/xfs/xfs_aops.h | 1 + 2 files changed, 22 insertions(+), 10 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 7575cfc3ad15..4a28fa91e3b1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -200,7 +200,7 @@ xfs_setfilesize_trans_alloc( * Update on-disk file size now that data has been written to disk. */ STATIC int -xfs_setfilesize( +__xfs_setfilesize( struct xfs_inode *ip, struct xfs_trans *tp, xfs_off_t offset, @@ -225,6 +225,23 @@ xfs_setfilesize( return xfs_trans_commit(tp); } +int +xfs_setfilesize( + struct xfs_inode *ip, + xfs_off_t offset, + size_t size) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); + if (error) + return error; + + return __xfs_setfilesize(ip, tp, offset, size); +} + STATIC int xfs_setfilesize_ioend( struct xfs_ioend *ioend, @@ -247,7 +264,7 @@ xfs_setfilesize_ioend( return error; } - return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); + return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); } /* @@ -1336,13 +1353,12 @@ xfs_end_io_direct_write( { struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; uintptr_t flags = (uintptr_t)private; int error = 0; trace_xfs_end_io_direct_write(ip, offset, size); - if (XFS_FORCED_SHUTDOWN(mp)) + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; if (size <= 0) @@ -1380,14 +1396,9 @@ xfs_end_io_direct_write( error = xfs_iomap_write_unwritten(ip, offset, size); } else if (flags & XFS_DIO_FLAG_APPEND) { - struct xfs_trans *tp; - trace_xfs_end_io_direct_write_append(ip, offset, size); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, - &tp); - if (!error) - error = xfs_setfilesize(ip, tp, offset, size); + error = xfs_setfilesize(ip, offset, size); } return error; diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index bf2d9a141a73..1950e3bca2ac 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -62,6 +62,7 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset, ssize_t size, void *private); +int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); extern void xfs_count_page_state(struct page *, int *, int *); extern struct block_device *xfs_find_bdev_for_inode(struct inode *); -- cgit v1.2.2 From 6c31f495d19975b7d2e824ee614934d5db113afe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Sep 2016 11:28:38 +1000 Subject: xfs: use iomap to implement DAX Another users of buffer_heads bytes the dust. Signed-off-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 61 +++++++++++++++--------------------------------------- fs/xfs/xfs_iomap.c | 11 ++++++---- 2 files changed, 24 insertions(+), 48 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 62649ccdbb4d..f99d7fac5abf 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -332,10 +332,7 @@ xfs_file_dax_read( struct kiocb *iocb, struct iov_iter *to) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct iov_iter data = *to; + struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); size_t count = iov_iter_count(to); ssize_t ret = 0; @@ -345,11 +342,7 @@ xfs_file_dax_read( return 0; /* skip atime */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); - if (ret > 0) { - iocb->ki_pos += ret; - iov_iter_advance(to, ret); - } + ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); @@ -711,52 +704,32 @@ xfs_file_dax_write( struct kiocb *iocb, struct iov_iter *from) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = mapping->host; + struct inode *inode = iocb->ki_filp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - ssize_t ret = 0; int iolock = XFS_IOLOCK_EXCL; - struct iov_iter data; + ssize_t ret, error = 0; + size_t count; + loff_t pos; xfs_rw_ilock(ip, iolock); ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; - /* - * Yes, even DAX files can have page cache attached to them: A zeroed - * page is inserted into the pagecache when we have to serve a write - * fault on a hole. It should never be dirtied and can simply be - * dropped from the pagecache once we get real data for the page. - * - * XXX: This is racy against mmap, and there's nothing we can do about - * it. dax_do_io() should really do this invalidation internally as - * it will know if we've allocated over a holei for this specific IO and - * if so it needs to update the mapping tree and invalidate existing - * PTEs over the newly allocated range. Remove this invalidation when - * dax_do_io() is fixed up. - */ - if (mapping->nrpages) { - loff_t end = iocb->ki_pos + iov_iter_count(from) - 1; - - ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, - end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); - } + pos = iocb->ki_pos; + count = iov_iter_count(from); - trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); + trace_xfs_file_dax_write(ip, count, pos); - data = *from; - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - xfs_end_io_direct_write, 0); - if (ret > 0) { - iocb->ki_pos += ret; - iov_iter_advance(from, ret); + ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops); + if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { + i_size_write(inode, iocb->ki_pos); + error = xfs_setfilesize(ip, pos, ret); } + out: xfs_rw_iunlock(ip, iolock); - return ret; + return error ? error : ret; } STATIC ssize_t @@ -1495,7 +1468,7 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); + ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); } else { ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); @@ -1529,7 +1502,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault); + ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index fe4a26d752d0..c08253e11545 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -934,11 +934,13 @@ error_on_bmapi_transaction: return error; } -static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps) +static inline bool imap_needs_alloc(struct inode *inode, + struct xfs_bmbt_irec *imap, int nimaps) { return !nimaps || imap->br_startblock == HOLESTARTBLOCK || - imap->br_startblock == DELAYSTARTBLOCK; + imap->br_startblock == DELAYSTARTBLOCK || + (IS_DAX(inode) && ISUNWRITTEN(imap)); } static int @@ -959,7 +961,8 @@ xfs_file_iomap_begin( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) { + if ((flags & IOMAP_WRITE) && + !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { return xfs_file_iomap_begin_delay(inode, offset, length, flags, iomap); } @@ -979,7 +982,7 @@ xfs_file_iomap_begin( return error; } - if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) { + if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { /* * We cap the maximum length we map here to MAX_WRITEBACK_PAGES * pages to keep the chunks of work done where somewhat symmetric -- cgit v1.2.2