diff options
author | Christoph Hellwig <hch@lst.de> | 2015-02-01 18:02:09 -0500 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2015-02-01 18:02:09 -0500 |
commit | 2ba66237029d1ad6c1a5e2241b0ffbbfff55f750 (patch) | |
tree | 7c85d7aec5513c39b8037100bea2b023f1dc9e53 /fs/xfs | |
parent | f3d215526e6955028dfbbfd446db8716275fb0c7 (diff) |
xfs: don't allocate an ioend for direct I/O completions
Back in the days when the direct I/O ->end_io callback could be called
from interrupt context for AIO we needed a structure to hand off to the
workqueue, and reused the ioend structure for this purpose. These days
->end_io is always called from user or workqueue context, which allows us
to avoid this memory allocation and simplify the code significantly.
[dchinner: removed now unused xfs_finish_ioend_sync() function after
Brian Foster did an initial review. ]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_aops.c | 149 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.h | 3 |
2 files changed, 61 insertions, 91 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 18e2f3bbae5e..3a9b7a1b8704 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc( | |||
135 | */ | 135 | */ |
136 | STATIC int | 136 | STATIC int |
137 | xfs_setfilesize( | 137 | xfs_setfilesize( |
138 | struct xfs_ioend *ioend) | 138 | struct xfs_inode *ip, |
139 | struct xfs_trans *tp, | ||
140 | xfs_off_t offset, | ||
141 | size_t size) | ||
139 | { | 142 | { |
140 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
141 | struct xfs_trans *tp = ioend->io_append_trans; | ||
142 | xfs_fsize_t isize; | 143 | xfs_fsize_t isize; |
143 | 144 | ||
144 | /* | ||
145 | * The transaction may have been allocated in the I/O submission thread, | ||
146 | * thus we need to mark ourselves as beeing in a transaction manually. | ||
147 | * Similarly for freeze protection. | ||
148 | */ | ||
149 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
150 | rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
151 | 0, 1, _THIS_IP_); | ||
152 | |||
153 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 145 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
154 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); | 146 | isize = xfs_new_eof(ip, offset + size); |
155 | if (!isize) { | 147 | if (!isize) { |
156 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 148 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
157 | xfs_trans_cancel(tp, 0); | 149 | xfs_trans_cancel(tp, 0); |
158 | return 0; | 150 | return 0; |
159 | } | 151 | } |
160 | 152 | ||
161 | trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); | 153 | trace_xfs_setfilesize(ip, offset, size); |
162 | 154 | ||
163 | ip->i_d.di_size = isize; | 155 | ip->i_d.di_size = isize; |
164 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 156 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
@@ -167,6 +159,25 @@ xfs_setfilesize( | |||
167 | return xfs_trans_commit(tp, 0); | 159 | return xfs_trans_commit(tp, 0); |
168 | } | 160 | } |
169 | 161 | ||
162 | STATIC int | ||
163 | xfs_setfilesize_ioend( | ||
164 | struct xfs_ioend *ioend) | ||
165 | { | ||
166 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
167 | struct xfs_trans *tp = ioend->io_append_trans; | ||
168 | |||
169 | /* | ||
170 | * The transaction may have been allocated in the I/O submission thread, | ||
171 | * thus we need to mark ourselves as being in a transaction manually. | ||
172 | * Similarly for freeze protection. | ||
173 | */ | ||
174 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
175 | rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
176 | 0, 1, _THIS_IP_); | ||
177 | |||
178 | return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); | ||
179 | } | ||
180 | |||
170 | /* | 181 | /* |
171 | * Schedule IO completion handling on the final put of an ioend. | 182 | * Schedule IO completion handling on the final put of an ioend. |
172 | * | 183 | * |
@@ -182,8 +193,7 @@ xfs_finish_ioend( | |||
182 | 193 | ||
183 | if (ioend->io_type == XFS_IO_UNWRITTEN) | 194 | if (ioend->io_type == XFS_IO_UNWRITTEN) |
184 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); | 195 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
185 | else if (ioend->io_append_trans || | 196 | else if (ioend->io_append_trans) |
186 | (ioend->io_isdirect && xfs_ioend_is_append(ioend))) | ||
187 | queue_work(mp->m_data_workqueue, &ioend->io_work); | 197 | queue_work(mp->m_data_workqueue, &ioend->io_work); |
188 | else | 198 | else |
189 | xfs_destroy_ioend(ioend); | 199 | xfs_destroy_ioend(ioend); |
@@ -215,22 +225,8 @@ xfs_end_io( | |||
215 | if (ioend->io_type == XFS_IO_UNWRITTEN) { | 225 | if (ioend->io_type == XFS_IO_UNWRITTEN) { |
216 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 226 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
217 | ioend->io_size); | 227 | ioend->io_size); |
218 | } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { | ||
219 | /* | ||
220 | * For direct I/O we do not know if we need to allocate blocks | ||
221 | * or not so we can't preallocate an append transaction as that | ||
222 | * results in nested reservations and log space deadlocks. Hence | ||
223 | * allocate the transaction here. While this is sub-optimal and | ||
224 | * can block IO completion for some time, we're stuck with doing | ||
225 | * it this way until we can pass the ioend to the direct IO | ||
226 | * allocation callbacks and avoid nesting that way. | ||
227 | */ | ||
228 | error = xfs_setfilesize_trans_alloc(ioend); | ||
229 | if (error) | ||
230 | goto done; | ||
231 | error = xfs_setfilesize(ioend); | ||
232 | } else if (ioend->io_append_trans) { | 228 | } else if (ioend->io_append_trans) { |
233 | error = xfs_setfilesize(ioend); | 229 | error = xfs_setfilesize_ioend(ioend); |
234 | } else { | 230 | } else { |
235 | ASSERT(!xfs_ioend_is_append(ioend)); | 231 | ASSERT(!xfs_ioend_is_append(ioend)); |
236 | } | 232 | } |
@@ -242,17 +238,6 @@ done: | |||
242 | } | 238 | } |
243 | 239 | ||
244 | /* | 240 | /* |
245 | * Call IO completion handling in caller context on the final put of an ioend. | ||
246 | */ | ||
247 | STATIC void | ||
248 | xfs_finish_ioend_sync( | ||
249 | struct xfs_ioend *ioend) | ||
250 | { | ||
251 | if (atomic_dec_and_test(&ioend->io_remaining)) | ||
252 | xfs_end_io(&ioend->io_work); | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Allocate and initialise an IO completion structure. | 241 | * Allocate and initialise an IO completion structure. |
257 | * We need to track unwritten extent write completion here initially. | 242 | * We need to track unwritten extent write completion here initially. |
258 | * We'll need to extend this for updating the ondisk inode size later | 243 | * We'll need to extend this for updating the ondisk inode size later |
@@ -273,7 +258,6 @@ xfs_alloc_ioend( | |||
273 | * all the I/O from calling the completion routine too early. | 258 | * all the I/O from calling the completion routine too early. |
274 | */ | 259 | */ |
275 | atomic_set(&ioend->io_remaining, 1); | 260 | atomic_set(&ioend->io_remaining, 1); |
276 | ioend->io_isdirect = 0; | ||
277 | ioend->io_error = 0; | 261 | ioend->io_error = 0; |
278 | ioend->io_list = NULL; | 262 | ioend->io_list = NULL; |
279 | ioend->io_type = type; | 263 | ioend->io_type = type; |
@@ -1459,11 +1443,7 @@ xfs_get_blocks_direct( | |||
1459 | * | 1443 | * |
1460 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1444 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
1461 | * need to issue a transaction to convert the range from unwritten to written | 1445 | * need to issue a transaction to convert the range from unwritten to written |
1462 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1446 | * extents. |
1463 | * to do this and we are done. But in case this was a successful AIO | ||
1464 | * request this handler is called from interrupt context, from which we | ||
1465 | * can't start transactions. In that case offload the I/O completion to | ||
1466 | * the workqueues we also use for buffered I/O completion. | ||
1467 | */ | 1447 | */ |
1468 | STATIC void | 1448 | STATIC void |
1469 | xfs_end_io_direct_write( | 1449 | xfs_end_io_direct_write( |
@@ -1472,7 +1452,12 @@ xfs_end_io_direct_write( | |||
1472 | ssize_t size, | 1452 | ssize_t size, |
1473 | void *private) | 1453 | void *private) |
1474 | { | 1454 | { |
1475 | struct xfs_ioend *ioend = iocb->private; | 1455 | struct inode *inode = file_inode(iocb->ki_filp); |
1456 | struct xfs_inode *ip = XFS_I(inode); | ||
1457 | struct xfs_mount *mp = ip->i_mount; | ||
1458 | |||
1459 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1460 | return; | ||
1476 | 1461 | ||
1477 | /* | 1462 | /* |
1478 | * While the generic direct I/O code updates the inode size, it does | 1463 | * While the generic direct I/O code updates the inode size, it does |
@@ -1480,22 +1465,33 @@ xfs_end_io_direct_write( | |||
1480 | * end_io handler thinks the on-disk size is outside the in-core | 1465 | * end_io handler thinks the on-disk size is outside the in-core |
1481 | * size. To prevent this just update it a little bit earlier here. | 1466 | * size. To prevent this just update it a little bit earlier here. |
1482 | */ | 1467 | */ |
1483 | if (offset + size > i_size_read(ioend->io_inode)) | 1468 | if (offset + size > i_size_read(inode)) |
1484 | i_size_write(ioend->io_inode, offset + size); | 1469 | i_size_write(inode, offset + size); |
1485 | 1470 | ||
1486 | /* | 1471 | /* |
1487 | * blockdev_direct_IO can return an error even after the I/O | 1472 | * For direct I/O we do not know if we need to allocate blocks or not, |
1488 | * completion handler was called. Thus we need to protect | 1473 | * so we can't preallocate an append transaction, as that results in |
1489 | * against double-freeing. | 1474 | * nested reservations and log space deadlocks. Hence allocate the |
1475 | * transaction here. While this is sub-optimal and can block IO | ||
1476 | * completion for some time, we're stuck with doing it this way until | ||
1477 | * we can pass the ioend to the direct IO allocation callbacks and | ||
1478 | * avoid nesting that way. | ||
1490 | */ | 1479 | */ |
1491 | iocb->private = NULL; | 1480 | if (private && size > 0) { |
1492 | 1481 | xfs_iomap_write_unwritten(ip, offset, size); | |
1493 | ioend->io_offset = offset; | 1482 | } else if (offset + size > ip->i_d.di_size) { |
1494 | ioend->io_size = size; | 1483 | struct xfs_trans *tp; |
1495 | if (private && size > 0) | 1484 | int error; |
1496 | ioend->io_type = XFS_IO_UNWRITTEN; | 1485 | |
1486 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
1487 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); | ||
1488 | if (error) { | ||
1489 | xfs_trans_cancel(tp, 0); | ||
1490 | return; | ||
1491 | } | ||
1497 | 1492 | ||
1498 | xfs_finish_ioend_sync(ioend); | 1493 | xfs_setfilesize(ip, tp, offset, size); |
1494 | } | ||
1499 | } | 1495 | } |
1500 | 1496 | ||
1501 | STATIC ssize_t | 1497 | STATIC ssize_t |
@@ -1507,39 +1503,16 @@ xfs_vm_direct_IO( | |||
1507 | { | 1503 | { |
1508 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 1504 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
1509 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | 1505 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
1510 | struct xfs_ioend *ioend = NULL; | ||
1511 | ssize_t ret; | ||
1512 | 1506 | ||
1513 | if (rw & WRITE) { | 1507 | if (rw & WRITE) { |
1514 | size_t size = iov_iter_count(iter); | 1508 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, |
1515 | |||
1516 | /* | ||
1517 | * We cannot preallocate a size update transaction here as we | ||
1518 | * don't know whether allocation is necessary or not. Hence we | ||
1519 | * can only tell IO completion that one is necessary if we are | ||
1520 | * not doing unwritten extent conversion. | ||
1521 | */ | ||
1522 | iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); | ||
1523 | if (offset + size > XFS_I(inode)->i_d.di_size) | ||
1524 | ioend->io_isdirect = 1; | ||
1525 | |||
1526 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | ||
1527 | offset, xfs_get_blocks_direct, | 1509 | offset, xfs_get_blocks_direct, |
1528 | xfs_end_io_direct_write, NULL, | 1510 | xfs_end_io_direct_write, NULL, |
1529 | DIO_ASYNC_EXTEND); | 1511 | DIO_ASYNC_EXTEND); |
1530 | if (ret != -EIOCBQUEUED && iocb->private) | ||
1531 | goto out_destroy_ioend; | ||
1532 | } else { | ||
1533 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | ||
1534 | offset, xfs_get_blocks_direct, | ||
1535 | NULL, NULL, 0); | ||
1536 | } | 1512 | } |
1537 | 1513 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | |
1538 | return ret; | 1514 | offset, xfs_get_blocks_direct, |
1539 | 1515 | NULL, NULL, 0); | |
1540 | out_destroy_ioend: | ||
1541 | xfs_destroy_ioend(ioend); | ||
1542 | return ret; | ||
1543 | } | 1516 | } |
1544 | 1517 | ||
1545 | /* | 1518 | /* |
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f94dd459dff9..ac644e0137a4 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool; | |||
24 | * Types of I/O for bmap clustering and I/O completion tracking. | 24 | * Types of I/O for bmap clustering and I/O completion tracking. |
25 | */ | 25 | */ |
26 | enum { | 26 | enum { |
27 | XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */ | ||
28 | XFS_IO_DELALLOC, /* covers delalloc region */ | 27 | XFS_IO_DELALLOC, /* covers delalloc region */ |
29 | XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ | 28 | XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ |
30 | XFS_IO_OVERWRITE, /* covers already allocated extent */ | 29 | XFS_IO_OVERWRITE, /* covers already allocated extent */ |
31 | }; | 30 | }; |
32 | 31 | ||
33 | #define XFS_IO_TYPES \ | 32 | #define XFS_IO_TYPES \ |
34 | { 0, "" }, \ | ||
35 | { XFS_IO_DELALLOC, "delalloc" }, \ | 33 | { XFS_IO_DELALLOC, "delalloc" }, \ |
36 | { XFS_IO_UNWRITTEN, "unwritten" }, \ | 34 | { XFS_IO_UNWRITTEN, "unwritten" }, \ |
37 | { XFS_IO_OVERWRITE, "overwrite" } | 35 | { XFS_IO_OVERWRITE, "overwrite" } |
@@ -45,7 +43,6 @@ typedef struct xfs_ioend { | |||
45 | unsigned int io_type; /* delalloc / unwritten */ | 43 | unsigned int io_type; /* delalloc / unwritten */ |
46 | int io_error; /* I/O error code */ | 44 | int io_error; /* I/O error code */ |
47 | atomic_t io_remaining; /* hold count */ | 45 | atomic_t io_remaining; /* hold count */ |
48 | unsigned int io_isdirect : 1;/* direct I/O */ | ||
49 | struct inode *io_inode; /* file being written to */ | 46 | struct inode *io_inode; /* file being written to */ |
50 | struct buffer_head *io_buffer_head;/* buffer linked list head */ | 47 | struct buffer_head *io_buffer_head;/* buffer linked list head */ |
51 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ | 48 | struct buffer_head *io_buffer_tail;/* buffer linked list tail */ |