aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2015-02-01 18:02:09 -0500
committerDave Chinner <david@fromorbit.com>2015-02-01 18:02:09 -0500
commit2ba66237029d1ad6c1a5e2241b0ffbbfff55f750 (patch)
tree7c85d7aec5513c39b8037100bea2b023f1dc9e53 /fs/xfs
parentf3d215526e6955028dfbbfd446db8716275fb0c7 (diff)
xfs: don't allocate an ioend for direct I/O completions
Back in the days when the direct I/O ->end_io callback could be called from interrupt context for AIO we needed a structure to hand off to the workqueue, and reused the ioend structure for this purpose. These days ->end_io is always called from user or workqueue context, which allows us to avoid this memory allocation and simplify the code significantly. [dchinner: removed now unused xfs_finish_ioend_sync() function after Brian Foster did an initial review. ] Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_aops.c149
-rw-r--r--fs/xfs/xfs_aops.h3
2 files changed, 61 insertions, 91 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 18e2f3bbae5e..3a9b7a1b8704 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc(
135 */ 135 */
136STATIC int 136STATIC int
137xfs_setfilesize( 137xfs_setfilesize(
138 struct xfs_ioend *ioend) 138 struct xfs_inode *ip,
139 struct xfs_trans *tp,
140 xfs_off_t offset,
141 size_t size)
139{ 142{
140 struct xfs_inode *ip = XFS_I(ioend->io_inode);
141 struct xfs_trans *tp = ioend->io_append_trans;
142 xfs_fsize_t isize; 143 xfs_fsize_t isize;
143 144
144 /*
145 * The transaction may have been allocated in the I/O submission thread,
146 * thus we need to mark ourselves as beeing in a transaction manually.
147 * Similarly for freeze protection.
148 */
149 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
150 rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
151 0, 1, _THIS_IP_);
152
153 xfs_ilock(ip, XFS_ILOCK_EXCL); 145 xfs_ilock(ip, XFS_ILOCK_EXCL);
154 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); 146 isize = xfs_new_eof(ip, offset + size);
155 if (!isize) { 147 if (!isize) {
156 xfs_iunlock(ip, XFS_ILOCK_EXCL); 148 xfs_iunlock(ip, XFS_ILOCK_EXCL);
157 xfs_trans_cancel(tp, 0); 149 xfs_trans_cancel(tp, 0);
158 return 0; 150 return 0;
159 } 151 }
160 152
161 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 153 trace_xfs_setfilesize(ip, offset, size);
162 154
163 ip->i_d.di_size = isize; 155 ip->i_d.di_size = isize;
164 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 156 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@ -167,6 +159,25 @@ xfs_setfilesize(
167 return xfs_trans_commit(tp, 0); 159 return xfs_trans_commit(tp, 0);
168} 160}
169 161
162STATIC int
163xfs_setfilesize_ioend(
164 struct xfs_ioend *ioend)
165{
166 struct xfs_inode *ip = XFS_I(ioend->io_inode);
167 struct xfs_trans *tp = ioend->io_append_trans;
168
169 /*
170 * The transaction may have been allocated in the I/O submission thread,
171 * thus we need to mark ourselves as being in a transaction manually.
172 * Similarly for freeze protection.
173 */
174 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
175 rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
176 0, 1, _THIS_IP_);
177
178 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
179}
180
170/* 181/*
171 * Schedule IO completion handling on the final put of an ioend. 182 * Schedule IO completion handling on the final put of an ioend.
172 * 183 *
@@ -182,8 +193,7 @@ xfs_finish_ioend(
182 193
183 if (ioend->io_type == XFS_IO_UNWRITTEN) 194 if (ioend->io_type == XFS_IO_UNWRITTEN)
184 queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 195 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
185 else if (ioend->io_append_trans || 196 else if (ioend->io_append_trans)
186 (ioend->io_isdirect && xfs_ioend_is_append(ioend)))
187 queue_work(mp->m_data_workqueue, &ioend->io_work); 197 queue_work(mp->m_data_workqueue, &ioend->io_work);
188 else 198 else
189 xfs_destroy_ioend(ioend); 199 xfs_destroy_ioend(ioend);
@@ -215,22 +225,8 @@ xfs_end_io(
215 if (ioend->io_type == XFS_IO_UNWRITTEN) { 225 if (ioend->io_type == XFS_IO_UNWRITTEN) {
216 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 226 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
217 ioend->io_size); 227 ioend->io_size);
218 } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) {
219 /*
220 * For direct I/O we do not know if we need to allocate blocks
221 * or not so we can't preallocate an append transaction as that
222 * results in nested reservations and log space deadlocks. Hence
223 * allocate the transaction here. While this is sub-optimal and
224 * can block IO completion for some time, we're stuck with doing
225 * it this way until we can pass the ioend to the direct IO
226 * allocation callbacks and avoid nesting that way.
227 */
228 error = xfs_setfilesize_trans_alloc(ioend);
229 if (error)
230 goto done;
231 error = xfs_setfilesize(ioend);
232 } else if (ioend->io_append_trans) { 228 } else if (ioend->io_append_trans) {
233 error = xfs_setfilesize(ioend); 229 error = xfs_setfilesize_ioend(ioend);
234 } else { 230 } else {
235 ASSERT(!xfs_ioend_is_append(ioend)); 231 ASSERT(!xfs_ioend_is_append(ioend));
236 } 232 }
@@ -242,17 +238,6 @@ done:
242} 238}
243 239
244/* 240/*
245 * Call IO completion handling in caller context on the final put of an ioend.
246 */
247STATIC void
248xfs_finish_ioend_sync(
249 struct xfs_ioend *ioend)
250{
251 if (atomic_dec_and_test(&ioend->io_remaining))
252 xfs_end_io(&ioend->io_work);
253}
254
255/*
256 * Allocate and initialise an IO completion structure. 241 * Allocate and initialise an IO completion structure.
257 * We need to track unwritten extent write completion here initially. 242 * We need to track unwritten extent write completion here initially.
258 * We'll need to extend this for updating the ondisk inode size later 243 * We'll need to extend this for updating the ondisk inode size later
@@ -273,7 +258,6 @@ xfs_alloc_ioend(
273 * all the I/O from calling the completion routine too early. 258 * all the I/O from calling the completion routine too early.
274 */ 259 */
275 atomic_set(&ioend->io_remaining, 1); 260 atomic_set(&ioend->io_remaining, 1);
276 ioend->io_isdirect = 0;
277 ioend->io_error = 0; 261 ioend->io_error = 0;
278 ioend->io_list = NULL; 262 ioend->io_list = NULL;
279 ioend->io_type = type; 263 ioend->io_type = type;
@@ -1459,11 +1443,7 @@ xfs_get_blocks_direct(
1459 * 1443 *
1460 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1444 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1461 * need to issue a transaction to convert the range from unwritten to written 1445 * need to issue a transaction to convert the range from unwritten to written
1462 * extents. In case this is regular synchronous I/O we just call xfs_end_io 1446 * extents.
1463 * to do this and we are done. But in case this was a successful AIO
1464 * request this handler is called from interrupt context, from which we
1465 * can't start transactions. In that case offload the I/O completion to
1466 * the workqueues we also use for buffered I/O completion.
1467 */ 1447 */
1468STATIC void 1448STATIC void
1469xfs_end_io_direct_write( 1449xfs_end_io_direct_write(
@@ -1472,7 +1452,12 @@ xfs_end_io_direct_write(
1472 ssize_t size, 1452 ssize_t size,
1473 void *private) 1453 void *private)
1474{ 1454{
1475 struct xfs_ioend *ioend = iocb->private; 1455 struct inode *inode = file_inode(iocb->ki_filp);
1456 struct xfs_inode *ip = XFS_I(inode);
1457 struct xfs_mount *mp = ip->i_mount;
1458
1459 if (XFS_FORCED_SHUTDOWN(mp))
1460 return;
1476 1461
1477 /* 1462 /*
1478 * While the generic direct I/O code updates the inode size, it does 1463 * While the generic direct I/O code updates the inode size, it does
@@ -1480,22 +1465,33 @@ xfs_end_io_direct_write(
1480 * end_io handler thinks the on-disk size is outside the in-core 1465 * end_io handler thinks the on-disk size is outside the in-core
1481 * size. To prevent this just update it a little bit earlier here. 1466 * size. To prevent this just update it a little bit earlier here.
1482 */ 1467 */
1483 if (offset + size > i_size_read(ioend->io_inode)) 1468 if (offset + size > i_size_read(inode))
1484 i_size_write(ioend->io_inode, offset + size); 1469 i_size_write(inode, offset + size);
1485 1470
1486 /* 1471 /*
1487 * blockdev_direct_IO can return an error even after the I/O 1472 * For direct I/O we do not know if we need to allocate blocks or not,
1488 * completion handler was called. Thus we need to protect 1473 * so we can't preallocate an append transaction, as that results in
1489 * against double-freeing. 1474 * nested reservations and log space deadlocks. Hence allocate the
1475 * transaction here. While this is sub-optimal and can block IO
1476 * completion for some time, we're stuck with doing it this way until
1477 * we can pass the ioend to the direct IO allocation callbacks and
1478 * avoid nesting that way.
1490 */ 1479 */
1491 iocb->private = NULL; 1480 if (private && size > 0) {
1492 1481 xfs_iomap_write_unwritten(ip, offset, size);
1493 ioend->io_offset = offset; 1482 } else if (offset + size > ip->i_d.di_size) {
1494 ioend->io_size = size; 1483 struct xfs_trans *tp;
1495 if (private && size > 0) 1484 int error;
1496 ioend->io_type = XFS_IO_UNWRITTEN; 1485
1486 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1487 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
1488 if (error) {
1489 xfs_trans_cancel(tp, 0);
1490 return;
1491 }
1497 1492
1498 xfs_finish_ioend_sync(ioend); 1493 xfs_setfilesize(ip, tp, offset, size);
1494 }
1499} 1495}
1500 1496
1501STATIC ssize_t 1497STATIC ssize_t
@@ -1507,39 +1503,16 @@ xfs_vm_direct_IO(
1507{ 1503{
1508 struct inode *inode = iocb->ki_filp->f_mapping->host; 1504 struct inode *inode = iocb->ki_filp->f_mapping->host;
1509 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1505 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1510 struct xfs_ioend *ioend = NULL;
1511 ssize_t ret;
1512 1506
1513 if (rw & WRITE) { 1507 if (rw & WRITE) {
1514 size_t size = iov_iter_count(iter); 1508 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1515
1516 /*
1517 * We cannot preallocate a size update transaction here as we
1518 * don't know whether allocation is necessary or not. Hence we
1519 * can only tell IO completion that one is necessary if we are
1520 * not doing unwritten extent conversion.
1521 */
1522 iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
1523 if (offset + size > XFS_I(inode)->i_d.di_size)
1524 ioend->io_isdirect = 1;
1525
1526 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1527 offset, xfs_get_blocks_direct, 1509 offset, xfs_get_blocks_direct,
1528 xfs_end_io_direct_write, NULL, 1510 xfs_end_io_direct_write, NULL,
1529 DIO_ASYNC_EXTEND); 1511 DIO_ASYNC_EXTEND);
1530 if (ret != -EIOCBQUEUED && iocb->private)
1531 goto out_destroy_ioend;
1532 } else {
1533 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1534 offset, xfs_get_blocks_direct,
1535 NULL, NULL, 0);
1536 } 1512 }
1537 1513 return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
1538 return ret; 1514 offset, xfs_get_blocks_direct,
1539 1515 NULL, NULL, 0);
1540out_destroy_ioend:
1541 xfs_destroy_ioend(ioend);
1542 return ret;
1543} 1516}
1544 1517
1545/* 1518/*
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f94dd459dff9..ac644e0137a4 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,14 +24,12 @@ extern mempool_t *xfs_ioend_pool;
24 * Types of I/O for bmap clustering and I/O completion tracking. 24 * Types of I/O for bmap clustering and I/O completion tracking.
25 */ 25 */
26enum { 26enum {
27 XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */
28 XFS_IO_DELALLOC, /* covers delalloc region */ 27 XFS_IO_DELALLOC, /* covers delalloc region */
29 XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ 28 XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
30 XFS_IO_OVERWRITE, /* covers already allocated extent */ 29 XFS_IO_OVERWRITE, /* covers already allocated extent */
31}; 30};
32 31
33#define XFS_IO_TYPES \ 32#define XFS_IO_TYPES \
34 { 0, "" }, \
35 { XFS_IO_DELALLOC, "delalloc" }, \ 33 { XFS_IO_DELALLOC, "delalloc" }, \
36 { XFS_IO_UNWRITTEN, "unwritten" }, \ 34 { XFS_IO_UNWRITTEN, "unwritten" }, \
37 { XFS_IO_OVERWRITE, "overwrite" } 35 { XFS_IO_OVERWRITE, "overwrite" }
@@ -45,7 +43,6 @@ typedef struct xfs_ioend {
45 unsigned int io_type; /* delalloc / unwritten */ 43 unsigned int io_type; /* delalloc / unwritten */
46 int io_error; /* I/O error code */ 44 int io_error; /* I/O error code */
47 atomic_t io_remaining; /* hold count */ 45 atomic_t io_remaining; /* hold count */
48 unsigned int io_isdirect : 1;/* direct I/O */
49 struct inode *io_inode; /* file being written to */ 46 struct inode *io_inode; /* file being written to */
50 struct buffer_head *io_buffer_head;/* buffer linked list head */ 47 struct buffer_head *io_buffer_head;/* buffer linked list head */
51 struct buffer_head *io_buffer_tail;/* buffer linked list tail */ 48 struct buffer_head *io_buffer_tail;/* buffer linked list tail */