diff options
Diffstat (limited to 'fs/xfs/xfs_aops.c')
| -rw-r--r-- | fs/xfs/xfs_aops.c | 149 |
1 files changed, 61 insertions, 88 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 18e2f3bbae5e..3a9b7a1b8704 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -135,30 +135,22 @@ xfs_setfilesize_trans_alloc( | |||
| 135 | */ | 135 | */ |
| 136 | STATIC int | 136 | STATIC int |
| 137 | xfs_setfilesize( | 137 | xfs_setfilesize( |
| 138 | struct xfs_ioend *ioend) | 138 | struct xfs_inode *ip, |
| 139 | struct xfs_trans *tp, | ||
| 140 | xfs_off_t offset, | ||
| 141 | size_t size) | ||
| 139 | { | 142 | { |
| 140 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
| 141 | struct xfs_trans *tp = ioend->io_append_trans; | ||
| 142 | xfs_fsize_t isize; | 143 | xfs_fsize_t isize; |
| 143 | 144 | ||
| 144 | /* | ||
| 145 | * The transaction may have been allocated in the I/O submission thread, | ||
| 146 | * thus we need to mark ourselves as beeing in a transaction manually. | ||
| 147 | * Similarly for freeze protection. | ||
| 148 | */ | ||
| 149 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
| 150 | rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
| 151 | 0, 1, _THIS_IP_); | ||
| 152 | |||
| 153 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 145 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 154 | isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); | 146 | isize = xfs_new_eof(ip, offset + size); |
| 155 | if (!isize) { | 147 | if (!isize) { |
| 156 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 148 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 157 | xfs_trans_cancel(tp, 0); | 149 | xfs_trans_cancel(tp, 0); |
| 158 | return 0; | 150 | return 0; |
| 159 | } | 151 | } |
| 160 | 152 | ||
| 161 | trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); | 153 | trace_xfs_setfilesize(ip, offset, size); |
| 162 | 154 | ||
| 163 | ip->i_d.di_size = isize; | 155 | ip->i_d.di_size = isize; |
| 164 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 156 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
| @@ -167,6 +159,25 @@ xfs_setfilesize( | |||
| 167 | return xfs_trans_commit(tp, 0); | 159 | return xfs_trans_commit(tp, 0); |
| 168 | } | 160 | } |
| 169 | 161 | ||
| 162 | STATIC int | ||
| 163 | xfs_setfilesize_ioend( | ||
| 164 | struct xfs_ioend *ioend) | ||
| 165 | { | ||
| 166 | struct xfs_inode *ip = XFS_I(ioend->io_inode); | ||
| 167 | struct xfs_trans *tp = ioend->io_append_trans; | ||
| 168 | |||
| 169 | /* | ||
| 170 | * The transaction may have been allocated in the I/O submission thread, | ||
| 171 | * thus we need to mark ourselves as being in a transaction manually. | ||
| 172 | * Similarly for freeze protection. | ||
| 173 | */ | ||
| 174 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); | ||
| 175 | rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
| 176 | 0, 1, _THIS_IP_); | ||
| 177 | |||
| 178 | return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); | ||
| 179 | } | ||
| 180 | |||
| 170 | /* | 181 | /* |
| 171 | * Schedule IO completion handling on the final put of an ioend. | 182 | * Schedule IO completion handling on the final put of an ioend. |
| 172 | * | 183 | * |
| @@ -182,8 +193,7 @@ xfs_finish_ioend( | |||
| 182 | 193 | ||
| 183 | if (ioend->io_type == XFS_IO_UNWRITTEN) | 194 | if (ioend->io_type == XFS_IO_UNWRITTEN) |
| 184 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); | 195 | queue_work(mp->m_unwritten_workqueue, &ioend->io_work); |
| 185 | else if (ioend->io_append_trans || | 196 | else if (ioend->io_append_trans) |
| 186 | (ioend->io_isdirect && xfs_ioend_is_append(ioend))) | ||
| 187 | queue_work(mp->m_data_workqueue, &ioend->io_work); | 197 | queue_work(mp->m_data_workqueue, &ioend->io_work); |
| 188 | else | 198 | else |
| 189 | xfs_destroy_ioend(ioend); | 199 | xfs_destroy_ioend(ioend); |
| @@ -215,22 +225,8 @@ xfs_end_io( | |||
| 215 | if (ioend->io_type == XFS_IO_UNWRITTEN) { | 225 | if (ioend->io_type == XFS_IO_UNWRITTEN) { |
| 216 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, | 226 | error = xfs_iomap_write_unwritten(ip, ioend->io_offset, |
| 217 | ioend->io_size); | 227 | ioend->io_size); |
| 218 | } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { | ||
| 219 | /* | ||
| 220 | * For direct I/O we do not know if we need to allocate blocks | ||
| 221 | * or not so we can't preallocate an append transaction as that | ||
| 222 | * results in nested reservations and log space deadlocks. Hence | ||
| 223 | * allocate the transaction here. While this is sub-optimal and | ||
| 224 | * can block IO completion for some time, we're stuck with doing | ||
| 225 | * it this way until we can pass the ioend to the direct IO | ||
| 226 | * allocation callbacks and avoid nesting that way. | ||
| 227 | */ | ||
| 228 | error = xfs_setfilesize_trans_alloc(ioend); | ||
| 229 | if (error) | ||
| 230 | goto done; | ||
| 231 | error = xfs_setfilesize(ioend); | ||
| 232 | } else if (ioend->io_append_trans) { | 228 | } else if (ioend->io_append_trans) { |
| 233 | error = xfs_setfilesize(ioend); | 229 | error = xfs_setfilesize_ioend(ioend); |
| 234 | } else { | 230 | } else { |
| 235 | ASSERT(!xfs_ioend_is_append(ioend)); | 231 | ASSERT(!xfs_ioend_is_append(ioend)); |
| 236 | } | 232 | } |
| @@ -242,17 +238,6 @@ done: | |||
| 242 | } | 238 | } |
| 243 | 239 | ||
| 244 | /* | 240 | /* |
| 245 | * Call IO completion handling in caller context on the final put of an ioend. | ||
| 246 | */ | ||
| 247 | STATIC void | ||
| 248 | xfs_finish_ioend_sync( | ||
| 249 | struct xfs_ioend *ioend) | ||
| 250 | { | ||
| 251 | if (atomic_dec_and_test(&ioend->io_remaining)) | ||
| 252 | xfs_end_io(&ioend->io_work); | ||
| 253 | } | ||
| 254 | |||
| 255 | /* | ||
| 256 | * Allocate and initialise an IO completion structure. | 241 | * Allocate and initialise an IO completion structure. |
| 257 | * We need to track unwritten extent write completion here initially. | 242 | * We need to track unwritten extent write completion here initially. |
| 258 | * We'll need to extend this for updating the ondisk inode size later | 243 | * We'll need to extend this for updating the ondisk inode size later |
| @@ -273,7 +258,6 @@ xfs_alloc_ioend( | |||
| 273 | * all the I/O from calling the completion routine too early. | 258 | * all the I/O from calling the completion routine too early. |
| 274 | */ | 259 | */ |
| 275 | atomic_set(&ioend->io_remaining, 1); | 260 | atomic_set(&ioend->io_remaining, 1); |
| 276 | ioend->io_isdirect = 0; | ||
| 277 | ioend->io_error = 0; | 261 | ioend->io_error = 0; |
| 278 | ioend->io_list = NULL; | 262 | ioend->io_list = NULL; |
| 279 | ioend->io_type = type; | 263 | ioend->io_type = type; |
| @@ -1459,11 +1443,7 @@ xfs_get_blocks_direct( | |||
| 1459 | * | 1443 | * |
| 1460 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1444 | * If the private argument is non-NULL __xfs_get_blocks signals us that we |
| 1461 | * need to issue a transaction to convert the range from unwritten to written | 1445 | * need to issue a transaction to convert the range from unwritten to written |
| 1462 | * extents. In case this is regular synchronous I/O we just call xfs_end_io | 1446 | * extents. |
| 1463 | * to do this and we are done. But in case this was a successful AIO | ||
| 1464 | * request this handler is called from interrupt context, from which we | ||
| 1465 | * can't start transactions. In that case offload the I/O completion to | ||
| 1466 | * the workqueues we also use for buffered I/O completion. | ||
| 1467 | */ | 1447 | */ |
| 1468 | STATIC void | 1448 | STATIC void |
| 1469 | xfs_end_io_direct_write( | 1449 | xfs_end_io_direct_write( |
| @@ -1472,7 +1452,12 @@ xfs_end_io_direct_write( | |||
| 1472 | ssize_t size, | 1452 | ssize_t size, |
| 1473 | void *private) | 1453 | void *private) |
| 1474 | { | 1454 | { |
| 1475 | struct xfs_ioend *ioend = iocb->private; | 1455 | struct inode *inode = file_inode(iocb->ki_filp); |
| 1456 | struct xfs_inode *ip = XFS_I(inode); | ||
| 1457 | struct xfs_mount *mp = ip->i_mount; | ||
| 1458 | |||
| 1459 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 1460 | return; | ||
| 1476 | 1461 | ||
| 1477 | /* | 1462 | /* |
| 1478 | * While the generic direct I/O code updates the inode size, it does | 1463 | * While the generic direct I/O code updates the inode size, it does |
| @@ -1480,22 +1465,33 @@ xfs_end_io_direct_write( | |||
| 1480 | * end_io handler thinks the on-disk size is outside the in-core | 1465 | * end_io handler thinks the on-disk size is outside the in-core |
| 1481 | * size. To prevent this just update it a little bit earlier here. | 1466 | * size. To prevent this just update it a little bit earlier here. |
| 1482 | */ | 1467 | */ |
| 1483 | if (offset + size > i_size_read(ioend->io_inode)) | 1468 | if (offset + size > i_size_read(inode)) |
| 1484 | i_size_write(ioend->io_inode, offset + size); | 1469 | i_size_write(inode, offset + size); |
| 1485 | 1470 | ||
| 1486 | /* | 1471 | /* |
| 1487 | * blockdev_direct_IO can return an error even after the I/O | 1472 | * For direct I/O we do not know if we need to allocate blocks or not, |
| 1488 | * completion handler was called. Thus we need to protect | 1473 | * so we can't preallocate an append transaction, as that results in |
| 1489 | * against double-freeing. | 1474 | * nested reservations and log space deadlocks. Hence allocate the |
| 1475 | * transaction here. While this is sub-optimal and can block IO | ||
| 1476 | * completion for some time, we're stuck with doing it this way until | ||
| 1477 | * we can pass the ioend to the direct IO allocation callbacks and | ||
| 1478 | * avoid nesting that way. | ||
| 1490 | */ | 1479 | */ |
| 1491 | iocb->private = NULL; | 1480 | if (private && size > 0) { |
| 1492 | 1481 | xfs_iomap_write_unwritten(ip, offset, size); | |
| 1493 | ioend->io_offset = offset; | 1482 | } else if (offset + size > ip->i_d.di_size) { |
| 1494 | ioend->io_size = size; | 1483 | struct xfs_trans *tp; |
| 1495 | if (private && size > 0) | 1484 | int error; |
| 1496 | ioend->io_type = XFS_IO_UNWRITTEN; | 1485 | |
| 1486 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
| 1487 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); | ||
| 1488 | if (error) { | ||
| 1489 | xfs_trans_cancel(tp, 0); | ||
| 1490 | return; | ||
| 1491 | } | ||
| 1497 | 1492 | ||
| 1498 | xfs_finish_ioend_sync(ioend); | 1493 | xfs_setfilesize(ip, tp, offset, size); |
| 1494 | } | ||
| 1499 | } | 1495 | } |
| 1500 | 1496 | ||
| 1501 | STATIC ssize_t | 1497 | STATIC ssize_t |
| @@ -1507,39 +1503,16 @@ xfs_vm_direct_IO( | |||
| 1507 | { | 1503 | { |
| 1508 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 1504 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
| 1509 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); | 1505 | struct block_device *bdev = xfs_find_bdev_for_inode(inode); |
| 1510 | struct xfs_ioend *ioend = NULL; | ||
| 1511 | ssize_t ret; | ||
| 1512 | 1506 | ||
| 1513 | if (rw & WRITE) { | 1507 | if (rw & WRITE) { |
| 1514 | size_t size = iov_iter_count(iter); | 1508 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, |
| 1515 | |||
| 1516 | /* | ||
| 1517 | * We cannot preallocate a size update transaction here as we | ||
| 1518 | * don't know whether allocation is necessary or not. Hence we | ||
| 1519 | * can only tell IO completion that one is necessary if we are | ||
| 1520 | * not doing unwritten extent conversion. | ||
| 1521 | */ | ||
| 1522 | iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); | ||
| 1523 | if (offset + size > XFS_I(inode)->i_d.di_size) | ||
| 1524 | ioend->io_isdirect = 1; | ||
| 1525 | |||
| 1526 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | ||
| 1527 | offset, xfs_get_blocks_direct, | 1509 | offset, xfs_get_blocks_direct, |
| 1528 | xfs_end_io_direct_write, NULL, | 1510 | xfs_end_io_direct_write, NULL, |
| 1529 | DIO_ASYNC_EXTEND); | 1511 | DIO_ASYNC_EXTEND); |
| 1530 | if (ret != -EIOCBQUEUED && iocb->private) | ||
| 1531 | goto out_destroy_ioend; | ||
| 1532 | } else { | ||
| 1533 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | ||
| 1534 | offset, xfs_get_blocks_direct, | ||
| 1535 | NULL, NULL, 0); | ||
| 1536 | } | 1512 | } |
| 1537 | 1513 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, | |
| 1538 | return ret; | 1514 | offset, xfs_get_blocks_direct, |
| 1539 | 1515 | NULL, NULL, 0); | |
| 1540 | out_destroy_ioend: | ||
| 1541 | xfs_destroy_ioend(ioend); | ||
| 1542 | return ret; | ||
| 1543 | } | 1516 | } |
| 1544 | 1517 | ||
| 1545 | /* | 1518 | /* |
