aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_aops.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2012-03-13 04:41:05 -0400
committerBen Myers <bpm@sgi.com>2012-03-13 17:30:49 -0400
commit281627df3eb55e1b729b9bb06fff5ff112929646 (patch)
treeffb2d00056f661073f62fa42693970a4485b87b9 /fs/xfs/xfs_aops.c
parent84803fb78237014cbbc86c0f012b273a199f4691 (diff)
xfs: log file size updates at I/O completion time
Do not use unlogged metadata updates and the VFS dirty bit for updating the file size after writeback. In addition to causing various problems with updates getting delayed for far too long this also drags in the unscalable VFS dirty tracking, and is one of the few remaining unlogged metadata updates. Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r--fs/xfs/xfs_aops.c133
1 files changed, 109 insertions, 24 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8e11b07bb281..0dbb9e70fe21 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -26,6 +26,7 @@
26#include "xfs_bmap_btree.h" 26#include "xfs_bmap_btree.h"
27#include "xfs_dinode.h" 27#include "xfs_dinode.h"
28#include "xfs_inode.h" 28#include "xfs_inode.h"
29#include "xfs_inode_item.h"
29#include "xfs_alloc.h" 30#include "xfs_alloc.h"
30#include "xfs_error.h" 31#include "xfs_error.h"
31#include "xfs_rw.h" 32#include "xfs_rw.h"
@@ -107,25 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
107 XFS_I(ioend->io_inode)->i_d.di_size; 108 XFS_I(ioend->io_inode)->i_d.di_size;
108} 109}
109 110
111STATIC int
112xfs_setfilesize_trans_alloc(
113 struct xfs_ioend *ioend)
114{
115 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
116 struct xfs_trans *tp;
117 int error;
118
119 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
120
121 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
122 if (error) {
123 xfs_trans_cancel(tp, 0);
124 return error;
125 }
126
127 ioend->io_append_trans = tp;
128
129 /*
130 * We hand off the transaction to the completion thread now, so
131 * clear the flag here.
132 */
133 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
134 return 0;
135}
136
110/* 137/*
111 * Update on-disk file size now that data has been written to disk. 138 * Update on-disk file size now that data has been written to disk.
112 */ 139 */
113STATIC void 140STATIC int
114xfs_setfilesize( 141xfs_setfilesize(
115 struct xfs_ioend *ioend) 142 struct xfs_ioend *ioend)
116{ 143{
117 struct xfs_inode *ip = XFS_I(ioend->io_inode); 144 struct xfs_inode *ip = XFS_I(ioend->io_inode);
145 struct xfs_trans *tp = ioend->io_append_trans;
118 xfs_fsize_t isize; 146 xfs_fsize_t isize;
119 147
148 /*
149 * The transaction was allocated in the I/O submission thread,
150 * thus we need to mark ourselves as beeing in a transaction
151 * manually.
152 */
153 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
154
120 xfs_ilock(ip, XFS_ILOCK_EXCL); 155 xfs_ilock(ip, XFS_ILOCK_EXCL);
121 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); 156 isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
122 if (isize) { 157 if (!isize) {
123 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 158 xfs_iunlock(ip, XFS_ILOCK_EXCL);
124 ip->i_d.di_size = isize; 159 xfs_trans_cancel(tp, 0);
125 xfs_mark_inode_dirty(ip); 160 return 0;
126 } 161 }
127 162
128 xfs_iunlock(ip, XFS_ILOCK_EXCL); 163 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
164
165 ip->i_d.di_size = isize;
166 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
167 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
168
169 return xfs_trans_commit(tp, 0);
129} 170}
130 171
131/* 172/*
@@ -143,7 +184,7 @@ xfs_finish_ioend(
143 184
144 if (ioend->io_type == IO_UNWRITTEN) 185 if (ioend->io_type == IO_UNWRITTEN)
145 queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 186 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
146 else if (xfs_ioend_is_append(ioend)) 187 else if (ioend->io_append_trans)
147 queue_work(mp->m_data_workqueue, &ioend->io_work); 188 queue_work(mp->m_data_workqueue, &ioend->io_work);
148 else 189 else
149 xfs_destroy_ioend(ioend); 190 xfs_destroy_ioend(ioend);
@@ -173,18 +214,32 @@ xfs_end_io(
173 * range to normal written extens after the data I/O has finished. 214 * range to normal written extens after the data I/O has finished.
174 */ 215 */
175 if (ioend->io_type == IO_UNWRITTEN) { 216 if (ioend->io_type == IO_UNWRITTEN) {
217 /*
218 * For buffered I/O we never preallocate a transaction when
219 * doing the unwritten extent conversion, but for direct I/O
220 * we do not know if we are converting an unwritten extent
221 * or not at the point where we preallocate the transaction.
222 */
223 if (ioend->io_append_trans) {
224 ASSERT(ioend->io_isdirect);
225
226 current_set_flags_nested(
227 &ioend->io_append_trans->t_pflags, PF_FSTRANS);
228 xfs_trans_cancel(ioend->io_append_trans, 0);
229 }
230
176 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 231 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
177 ioend->io_size); 232 ioend->io_size);
178 if (error) { 233 if (error) {
179 ioend->io_error = -error; 234 ioend->io_error = -error;
180 goto done; 235 goto done;
181 } 236 }
237 } else if (ioend->io_append_trans) {
238 error = xfs_setfilesize(ioend);
239 if (error)
240 ioend->io_error = -error;
182 } else { 241 } else {
183 /* 242 ASSERT(!xfs_ioend_is_append(ioend));
184 * We might have to update the on-disk file size after
185 * extending writes.
186 */
187 xfs_setfilesize(ioend);
188 } 243 }
189 244
190done: 245done:
@@ -224,6 +279,7 @@ xfs_alloc_ioend(
224 */ 279 */
225 atomic_set(&ioend->io_remaining, 1); 280 atomic_set(&ioend->io_remaining, 1);
226 ioend->io_isasync = 0; 281 ioend->io_isasync = 0;
282 ioend->io_isdirect = 0;
227 ioend->io_error = 0; 283 ioend->io_error = 0;
228 ioend->io_list = NULL; 284 ioend->io_list = NULL;
229 ioend->io_type = type; 285 ioend->io_type = type;
@@ -234,6 +290,7 @@ xfs_alloc_ioend(
234 ioend->io_size = 0; 290 ioend->io_size = 0;
235 ioend->io_iocb = NULL; 291 ioend->io_iocb = NULL;
236 ioend->io_result = 0; 292 ioend->io_result = 0;
293 ioend->io_append_trans = NULL;
237 294
238 INIT_WORK(&ioend->io_work, xfs_end_io); 295 INIT_WORK(&ioend->io_work, xfs_end_io);
239 return ioend; 296 return ioend;
@@ -341,18 +398,9 @@ xfs_submit_ioend_bio(
341 xfs_ioend_t *ioend, 398 xfs_ioend_t *ioend,
342 struct bio *bio) 399 struct bio *bio)
343{ 400{
344 struct xfs_inode *ip = XFS_I(ioend->io_inode);
345 atomic_inc(&ioend->io_remaining); 401 atomic_inc(&ioend->io_remaining);
346 bio->bi_private = ioend; 402 bio->bi_private = ioend;
347 bio->bi_end_io = xfs_end_bio; 403 bio->bi_end_io = xfs_end_bio;
348
349 /*
350 * If the I/O is beyond EOF we mark the inode dirty immediately
351 * but don't update the inode size until I/O completion.
352 */
353 if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
354 xfs_mark_inode_dirty(ip);
355
356 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); 404 submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
357} 405}
358 406
@@ -999,8 +1047,20 @@ xfs_vm_writepage(
999 wbc, end_index); 1047 wbc, end_index);
1000 } 1048 }
1001 1049
1002 if (iohead) 1050 if (iohead) {
1051 /*
1052 * Reserve log space if we might write beyond the on-disk
1053 * inode size.
1054 */
1055 if (ioend->io_type != IO_UNWRITTEN &&
1056 xfs_ioend_is_append(ioend)) {
1057 err = xfs_setfilesize_trans_alloc(ioend);
1058 if (err)
1059 goto error;
1060 }
1061
1003 xfs_submit_ioend(wbc, iohead); 1062 xfs_submit_ioend(wbc, iohead);
1063 }
1004 1064
1005 return 0; 1065 return 0;
1006 1066
@@ -1280,17 +1340,32 @@ xfs_vm_direct_IO(
1280{ 1340{
1281 struct inode *inode = iocb->ki_filp->f_mapping->host; 1341 struct inode *inode = iocb->ki_filp->f_mapping->host;
1282 struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1342 struct block_device *bdev = xfs_find_bdev_for_inode(inode);
1343 struct xfs_ioend *ioend = NULL;
1283 ssize_t ret; 1344 ssize_t ret;
1284 1345
1285 if (rw & WRITE) { 1346 if (rw & WRITE) {
1286 iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); 1347 size_t size = iov_length(iov, nr_segs);
1348
1349 /*
1350 * We need to preallocate a transaction for a size update
1351 * here. In the case that this write both updates the size
1352 * and converts at least on unwritten extent we will cancel
1353 * the still clean transaction after the I/O has finished.
1354 */
1355 iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
1356 if (offset + size > XFS_I(inode)->i_d.di_size) {
1357 ret = xfs_setfilesize_trans_alloc(ioend);
1358 if (ret)
1359 goto out_destroy_ioend;
1360 ioend->io_isdirect = 1;
1361 }
1287 1362
1288 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1363 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1289 offset, nr_segs, 1364 offset, nr_segs,
1290 xfs_get_blocks_direct, 1365 xfs_get_blocks_direct,
1291 xfs_end_io_direct_write, NULL, 0); 1366 xfs_end_io_direct_write, NULL, 0);
1292 if (ret != -EIOCBQUEUED && iocb->private) 1367 if (ret != -EIOCBQUEUED && iocb->private)
1293 xfs_destroy_ioend(iocb->private); 1368 goto out_trans_cancel;
1294 } else { 1369 } else {
1295 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1370 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1296 offset, nr_segs, 1371 offset, nr_segs,
@@ -1299,6 +1374,16 @@ xfs_vm_direct_IO(
1299 } 1374 }
1300 1375
1301 return ret; 1376 return ret;
1377
1378out_trans_cancel:
1379 if (ioend->io_append_trans) {
1380 current_set_flags_nested(&ioend->io_append_trans->t_pflags,
1381 PF_FSTRANS);
1382 xfs_trans_cancel(ioend->io_append_trans, 0);
1383 }
1384out_destroy_ioend:
1385 xfs_destroy_ioend(ioend);
1386 return ret;
1302} 1387}
1303 1388
1304STATIC void 1389STATIC void