diff options
author | Christoph Hellwig <hch@lst.de> | 2017-02-02 02:56:06 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-02-04 03:47:12 -0500 |
commit | 214d55efa25557ca4d023c4001d798b7d360cd8b (patch) | |
tree | 90fd9f51407aeceea9163b458b139453aeba68f7 /fs | |
parent | 29f319275e7637b3d146aa67db7fad036339fcc1 (diff) |
xfs: fix COW writeback race
commit d2b3964a0780d2d2994eba57f950d6c9fe489ed8 upstream.
Due to the way how xfs_iomap_write_allocate tries to convert the whole
found extents from delalloc to real space we can run into a race
condition with multiple threads doing writes to this same extent.
For the non-COW case that is harmless as the only thing that can happen
is that we call xfs_bmapi_write on an extent that has already been
converted to a real allocation. For COW writes where we move the extent
from the COW to the data fork after I/O completion the race is, however,
not quite as harmless. In the worst case we are now calling
xfs_bmapi_write on a region that contains hole in the COW work, which
will trip up an assert in debug builds or lead to file system corruption
in non-debug builds. This seems to be reproducible with workloads of
small O_DSYNC write, although so far I've not managed to come up with
a with an isolated reproducer.
The fix for the issue is relatively simple: tell xfs_bmapi_write
that we are only asked to convert delayed allocations and skip holes
in that case.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 44 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 2 |
3 files changed, 38 insertions, 14 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 52dc5c175001..fbb60d30089c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -4607,8 +4607,6 @@ xfs_bmapi_write( | |||
4607 | int n; /* current extent index */ | 4607 | int n; /* current extent index */ |
4608 | xfs_fileoff_t obno; /* old block number (offset) */ | 4608 | xfs_fileoff_t obno; /* old block number (offset) */ |
4609 | int whichfork; /* data or attr fork */ | 4609 | int whichfork; /* data or attr fork */ |
4610 | char inhole; /* current location is hole in file */ | ||
4611 | char wasdelay; /* old extent was delayed */ | ||
4612 | 4610 | ||
4613 | #ifdef DEBUG | 4611 | #ifdef DEBUG |
4614 | xfs_fileoff_t orig_bno; /* original block number value */ | 4612 | xfs_fileoff_t orig_bno; /* original block number value */ |
@@ -4694,22 +4692,44 @@ xfs_bmapi_write( | |||
4694 | bma.firstblock = firstblock; | 4692 | bma.firstblock = firstblock; |
4695 | 4693 | ||
4696 | while (bno < end && n < *nmap) { | 4694 | while (bno < end && n < *nmap) { |
4697 | inhole = eof || bma.got.br_startoff > bno; | 4695 | bool need_alloc = false, wasdelay = false; |
4698 | wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); | ||
4699 | 4696 | ||
4700 | /* | 4697 | /* in hole or beyoned EOF? */ |
4701 | * Make sure we only reflink into a hole. | 4698 | if (eof || bma.got.br_startoff > bno) { |
4702 | */ | 4699 | if (flags & XFS_BMAPI_DELALLOC) { |
4703 | if (flags & XFS_BMAPI_REMAP) | 4700 | /* |
4704 | ASSERT(inhole); | 4701 | * For the COW fork we can reasonably get a |
4705 | if (flags & XFS_BMAPI_COWFORK) | 4702 | * request for converting an extent that races |
4706 | ASSERT(!inhole); | 4703 | * with other threads already having converted |
4704 | * part of it, as there converting COW to | ||
4705 | * regular blocks is not protected using the | ||
4706 | * IOLOCK. | ||
4707 | */ | ||
4708 | ASSERT(flags & XFS_BMAPI_COWFORK); | ||
4709 | if (!(flags & XFS_BMAPI_COWFORK)) { | ||
4710 | error = -EIO; | ||
4711 | goto error0; | ||
4712 | } | ||
4713 | |||
4714 | if (eof || bno >= end) | ||
4715 | break; | ||
4716 | } else { | ||
4717 | need_alloc = true; | ||
4718 | } | ||
4719 | } else { | ||
4720 | /* | ||
4721 | * Make sure we only reflink into a hole. | ||
4722 | */ | ||
4723 | ASSERT(!(flags & XFS_BMAPI_REMAP)); | ||
4724 | if (isnullstartblock(bma.got.br_startblock)) | ||
4725 | wasdelay = true; | ||
4726 | } | ||
4707 | 4727 | ||
4708 | /* | 4728 | /* |
4709 | * First, deal with the hole before the allocated space | 4729 | * First, deal with the hole before the allocated space |
4710 | * that we found, if any. | 4730 | * that we found, if any. |
4711 | */ | 4731 | */ |
4712 | if (inhole || wasdelay) { | 4732 | if (need_alloc || wasdelay) { |
4713 | bma.eof = eof; | 4733 | bma.eof = eof; |
4714 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); | 4734 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); |
4715 | bma.wasdel = wasdelay; | 4735 | bma.wasdel = wasdelay; |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index d6d175a4fdec..e7d40b39f18f 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -110,6 +110,9 @@ struct xfs_extent_free_item | |||
110 | /* Map something in the CoW fork. */ | 110 | /* Map something in the CoW fork. */ |
111 | #define XFS_BMAPI_COWFORK 0x200 | 111 | #define XFS_BMAPI_COWFORK 0x200 |
112 | 112 | ||
113 | /* Only convert delalloc space, don't allocate entirely new extents */ | ||
114 | #define XFS_BMAPI_DELALLOC 0x400 | ||
115 | |||
113 | #define XFS_BMAPI_FLAGS \ | 116 | #define XFS_BMAPI_FLAGS \ |
114 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ | 117 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ |
115 | { XFS_BMAPI_METADATA, "METADATA" }, \ | 118 | { XFS_BMAPI_METADATA, "METADATA" }, \ |
@@ -120,7 +123,8 @@ struct xfs_extent_free_item | |||
120 | { XFS_BMAPI_CONVERT, "CONVERT" }, \ | 123 | { XFS_BMAPI_CONVERT, "CONVERT" }, \ |
121 | { XFS_BMAPI_ZERO, "ZERO" }, \ | 124 | { XFS_BMAPI_ZERO, "ZERO" }, \ |
122 | { XFS_BMAPI_REMAP, "REMAP" }, \ | 125 | { XFS_BMAPI_REMAP, "REMAP" }, \ |
123 | { XFS_BMAPI_COWFORK, "COWFORK" } | 126 | { XFS_BMAPI_COWFORK, "COWFORK" }, \ |
127 | { XFS_BMAPI_DELALLOC, "DELALLOC" } | ||
124 | 128 | ||
125 | 129 | ||
126 | static inline int xfs_bmapi_aflag(int w) | 130 | static inline int xfs_bmapi_aflag(int w) |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 15a83813b708..cdc6bdd495be 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate( | |||
681 | xfs_trans_t *tp; | 681 | xfs_trans_t *tp; |
682 | int nimaps; | 682 | int nimaps; |
683 | int error = 0; | 683 | int error = 0; |
684 | int flags = 0; | 684 | int flags = XFS_BMAPI_DELALLOC; |
685 | int nres; | 685 | int nres; |
686 | 686 | ||
687 | if (whichfork == XFS_COW_FORK) | 687 | if (whichfork == XFS_COW_FORK) |