diff options
| author | Dave Chinner <david@fromorbit.com> | 2015-04-16 08:13:18 -0400 |
|---|---|---|
| committer | Dave Chinner <david@fromorbit.com> | 2015-04-16 08:13:18 -0400 |
| commit | 542c311813d5cb2e6f0dfa9557f41c829b8fb6a0 (patch) | |
| tree | 573c5644eb966e44112016c9ae86e80251326223 | |
| parent | 6a63ef064b2444883ce8b68b0779d0c739d27204 (diff) | |
| parent | 0cefb29e6a63727bc7606c47fc538467594ef112 (diff) | |
Merge branch 'xfs-dio-extend-fix' into for-next
Conflicts:
fs/xfs/xfs_file.c
| -rw-r--r-- | fs/xfs/xfs_aops.c | 270 | ||||
| -rw-r--r-- | fs/xfs/xfs_file.c | 46 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 5 |
3 files changed, 239 insertions, 82 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a9b7a1b8704..598b259fda04 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -1233,6 +1233,117 @@ xfs_vm_releasepage( | |||
| 1233 | return try_to_free_buffers(page); | 1233 | return try_to_free_buffers(page); |
| 1234 | } | 1234 | } |
| 1235 | 1235 | ||
| 1236 | /* | ||
| 1237 | * When we map a DIO buffer, we may need to attach an ioend that describes the | ||
| 1238 | * type of write IO we are doing. This passes to the completion function the | ||
| 1239 | * operations it needs to perform. If the mapping is for an overwrite wholly | ||
| 1240 | * within the EOF then we don't need an ioend and so we don't allocate one. | ||
| 1241 | * This avoids the unnecessary overhead of allocating and freeing ioends for | ||
| 1242 | * workloads that don't require transactions on IO completion. | ||
| 1243 | * | ||
| 1244 | * If we get multiple mappings in a single IO, we might be mapping different | ||
| 1245 | * types. But because the direct IO can only have a single private pointer, we | ||
| 1246 | * need to ensure that: | ||
| 1247 | * | ||
| 1248 | * a) i) the ioend spans the entire region of unwritten mappings; or | ||
| 1249 | * ii) the ioend spans all the mappings that cross or are beyond EOF; and | ||
| 1250 | * b) if it contains unwritten extents, it is *permanently* marked as such | ||
| 1251 | * | ||
| 1252 | * We could do this by chaining ioends like buffered IO does, but we only | ||
| 1253 | * actually get one IO completion callback from the direct IO, and that spans | ||
| 1254 | * the entire IO regardless of how many mappings and IOs are needed to complete | ||
| 1255 | * the DIO. There is only going to be one reference to the ioend and its life | ||
| 1256 | * cycle is constrained by the DIO completion code. hence we don't need | ||
| 1257 | * reference counting here. | ||
| 1258 | */ | ||
| 1259 | static void | ||
| 1260 | xfs_map_direct( | ||
| 1261 | struct inode *inode, | ||
| 1262 | struct buffer_head *bh_result, | ||
| 1263 | struct xfs_bmbt_irec *imap, | ||
| 1264 | xfs_off_t offset) | ||
| 1265 | { | ||
| 1266 | struct xfs_ioend *ioend; | ||
| 1267 | xfs_off_t size = bh_result->b_size; | ||
| 1268 | int type; | ||
| 1269 | |||
| 1270 | if (ISUNWRITTEN(imap)) | ||
| 1271 | type = XFS_IO_UNWRITTEN; | ||
| 1272 | else | ||
| 1273 | type = XFS_IO_OVERWRITE; | ||
| 1274 | |||
| 1275 | trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); | ||
| 1276 | |||
| 1277 | if (bh_result->b_private) { | ||
| 1278 | ioend = bh_result->b_private; | ||
| 1279 | ASSERT(ioend->io_size > 0); | ||
| 1280 | ASSERT(offset >= ioend->io_offset); | ||
| 1281 | if (offset + size > ioend->io_offset + ioend->io_size) | ||
| 1282 | ioend->io_size = offset - ioend->io_offset + size; | ||
| 1283 | |||
| 1284 | if (type == XFS_IO_UNWRITTEN && type != ioend->io_type) | ||
| 1285 | ioend->io_type = XFS_IO_UNWRITTEN; | ||
| 1286 | |||
| 1287 | trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, | ||
| 1288 | ioend->io_size, ioend->io_type, | ||
| 1289 | imap); | ||
| 1290 | } else if (type == XFS_IO_UNWRITTEN || | ||
| 1291 | offset + size > i_size_read(inode)) { | ||
| 1292 | ioend = xfs_alloc_ioend(inode, type); | ||
| 1293 | ioend->io_offset = offset; | ||
| 1294 | ioend->io_size = size; | ||
| 1295 | |||
| 1296 | bh_result->b_private = ioend; | ||
| 1297 | set_buffer_defer_completion(bh_result); | ||
| 1298 | |||
| 1299 | trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, | ||
| 1300 | imap); | ||
| 1301 | } else { | ||
| 1302 | trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, | ||
| 1303 | imap); | ||
| 1304 | } | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | /* | ||
| 1308 | * If this is O_DIRECT or the mpage code calling tell them how large the mapping | ||
| 1309 | * is, so that we can avoid repeated get_blocks calls. | ||
| 1310 | * | ||
| 1311 | * If the mapping spans EOF, then we have to break the mapping up as the mapping | ||
| 1312 | * for blocks beyond EOF must be marked new so that sub block regions can be | ||
| 1313 | * correctly zeroed. We can't do this for mappings within EOF unless the mapping | ||
| 1314 | * was just allocated or is unwritten, otherwise the callers would overwrite | ||
| 1315 | * existing data with zeros. Hence we have to split the mapping into a range up | ||
| 1316 | * to and including EOF, and a second mapping for beyond EOF. | ||
| 1317 | */ | ||
| 1318 | static void | ||
| 1319 | xfs_map_trim_size( | ||
| 1320 | struct inode *inode, | ||
| 1321 | sector_t iblock, | ||
| 1322 | struct buffer_head *bh_result, | ||
| 1323 | struct xfs_bmbt_irec *imap, | ||
| 1324 | xfs_off_t offset, | ||
| 1325 | ssize_t size) | ||
| 1326 | { | ||
| 1327 | xfs_off_t mapping_size; | ||
| 1328 | |||
| 1329 | mapping_size = imap->br_startoff + imap->br_blockcount - iblock; | ||
| 1330 | mapping_size <<= inode->i_blkbits; | ||
| 1331 | |||
| 1332 | ASSERT(mapping_size > 0); | ||
| 1333 | if (mapping_size > size) | ||
| 1334 | mapping_size = size; | ||
| 1335 | if (offset < i_size_read(inode) && | ||
| 1336 | offset + mapping_size >= i_size_read(inode)) { | ||
| 1337 | /* limit mapping to block that spans EOF */ | ||
| 1338 | mapping_size = roundup_64(i_size_read(inode) - offset, | ||
| 1339 | 1 << inode->i_blkbits); | ||
| 1340 | } | ||
| 1341 | if (mapping_size > LONG_MAX) | ||
| 1342 | mapping_size = LONG_MAX; | ||
| 1343 | |||
| 1344 | bh_result->b_size = mapping_size; | ||
| 1345 | } | ||
| 1346 | |||
| 1236 | STATIC int | 1347 | STATIC int |
| 1237 | __xfs_get_blocks( | 1348 | __xfs_get_blocks( |
| 1238 | struct inode *inode, | 1349 | struct inode *inode, |
| @@ -1321,31 +1432,37 @@ __xfs_get_blocks( | |||
| 1321 | 1432 | ||
| 1322 | xfs_iunlock(ip, lockmode); | 1433 | xfs_iunlock(ip, lockmode); |
| 1323 | } | 1434 | } |
| 1324 | 1435 | trace_xfs_get_blocks_alloc(ip, offset, size, | |
| 1325 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | 1436 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN |
| 1437 | : XFS_IO_DELALLOC, &imap); | ||
| 1326 | } else if (nimaps) { | 1438 | } else if (nimaps) { |
| 1327 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | 1439 | trace_xfs_get_blocks_found(ip, offset, size, |
| 1440 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN | ||
| 1441 | : XFS_IO_OVERWRITE, &imap); | ||
| 1328 | xfs_iunlock(ip, lockmode); | 1442 | xfs_iunlock(ip, lockmode); |
| 1329 | } else { | 1443 | } else { |
| 1330 | trace_xfs_get_blocks_notfound(ip, offset, size); | 1444 | trace_xfs_get_blocks_notfound(ip, offset, size); |
| 1331 | goto out_unlock; | 1445 | goto out_unlock; |
| 1332 | } | 1446 | } |
| 1333 | 1447 | ||
| 1448 | /* trim mapping down to size requested */ | ||
| 1449 | if (direct || size > (1 << inode->i_blkbits)) | ||
| 1450 | xfs_map_trim_size(inode, iblock, bh_result, | ||
| 1451 | &imap, offset, size); | ||
| 1452 | |||
| 1453 | /* | ||
| 1454 | * For unwritten extents do not report a disk address in the buffered | ||
| 1455 | * read case (treat as if we're reading into a hole). | ||
| 1456 | */ | ||
| 1334 | if (imap.br_startblock != HOLESTARTBLOCK && | 1457 | if (imap.br_startblock != HOLESTARTBLOCK && |
| 1335 | imap.br_startblock != DELAYSTARTBLOCK) { | 1458 | imap.br_startblock != DELAYSTARTBLOCK && |
| 1336 | /* | 1459 | (create || !ISUNWRITTEN(&imap))) { |
| 1337 | * For unwritten extents do not report a disk address on | 1460 | xfs_map_buffer(inode, bh_result, &imap, offset); |
| 1338 | * the read case (treat as if we're reading into a hole). | 1461 | if (ISUNWRITTEN(&imap)) |
| 1339 | */ | ||
| 1340 | if (create || !ISUNWRITTEN(&imap)) | ||
| 1341 | xfs_map_buffer(inode, bh_result, &imap, offset); | ||
| 1342 | if (create && ISUNWRITTEN(&imap)) { | ||
| 1343 | if (direct) { | ||
| 1344 | bh_result->b_private = inode; | ||
| 1345 | set_buffer_defer_completion(bh_result); | ||
| 1346 | } | ||
| 1347 | set_buffer_unwritten(bh_result); | 1462 | set_buffer_unwritten(bh_result); |
| 1348 | } | 1463 | /* direct IO needs special help */ |
| 1464 | if (create && direct) | ||
| 1465 | xfs_map_direct(inode, bh_result, &imap, offset); | ||
| 1349 | } | 1466 | } |
| 1350 | 1467 | ||
| 1351 | /* | 1468 | /* |
| @@ -1378,39 +1495,6 @@ __xfs_get_blocks( | |||
| 1378 | } | 1495 | } |
| 1379 | } | 1496 | } |
| 1380 | 1497 | ||
| 1381 | /* | ||
| 1382 | * If this is O_DIRECT or the mpage code calling tell them how large | ||
| 1383 | * the mapping is, so that we can avoid repeated get_blocks calls. | ||
| 1384 | * | ||
| 1385 | * If the mapping spans EOF, then we have to break the mapping up as the | ||
| 1386 | * mapping for blocks beyond EOF must be marked new so that sub block | ||
| 1387 | * regions can be correctly zeroed. We can't do this for mappings within | ||
| 1388 | * EOF unless the mapping was just allocated or is unwritten, otherwise | ||
| 1389 | * the callers would overwrite existing data with zeros. Hence we have | ||
| 1390 | * to split the mapping into a range up to and including EOF, and a | ||
| 1391 | * second mapping for beyond EOF. | ||
| 1392 | */ | ||
| 1393 | if (direct || size > (1 << inode->i_blkbits)) { | ||
| 1394 | xfs_off_t mapping_size; | ||
| 1395 | |||
| 1396 | mapping_size = imap.br_startoff + imap.br_blockcount - iblock; | ||
| 1397 | mapping_size <<= inode->i_blkbits; | ||
| 1398 | |||
| 1399 | ASSERT(mapping_size > 0); | ||
| 1400 | if (mapping_size > size) | ||
| 1401 | mapping_size = size; | ||
| 1402 | if (offset < i_size_read(inode) && | ||
| 1403 | offset + mapping_size >= i_size_read(inode)) { | ||
| 1404 | /* limit mapping to block that spans EOF */ | ||
| 1405 | mapping_size = roundup_64(i_size_read(inode) - offset, | ||
| 1406 | 1 << inode->i_blkbits); | ||
| 1407 | } | ||
| 1408 | if (mapping_size > LONG_MAX) | ||
| 1409 | mapping_size = LONG_MAX; | ||
| 1410 | |||
| 1411 | bh_result->b_size = mapping_size; | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | return 0; | 1498 | return 0; |
| 1415 | 1499 | ||
| 1416 | out_unlock: | 1500 | out_unlock: |
| @@ -1441,9 +1525,11 @@ xfs_get_blocks_direct( | |||
| 1441 | /* | 1525 | /* |
| 1442 | * Complete a direct I/O write request. | 1526 | * Complete a direct I/O write request. |
| 1443 | * | 1527 | * |
| 1444 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1528 | * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. |
| 1445 | * need to issue a transaction to convert the range from unwritten to written | 1529 | * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite |
| 1446 | * extents. | 1530 | * wholly within the EOF and so there is nothing for us to do. Note that in this |
| 1531 | * case the completion can be called in interrupt context, whereas if we have an | ||
| 1532 | * ioend we will always be called in task context (i.e. from a workqueue). | ||
| 1447 | */ | 1533 | */ |
| 1448 | STATIC void | 1534 | STATIC void |
| 1449 | xfs_end_io_direct_write( | 1535 | xfs_end_io_direct_write( |
| @@ -1455,43 +1541,71 @@ xfs_end_io_direct_write( | |||
| 1455 | struct inode *inode = file_inode(iocb->ki_filp); | 1541 | struct inode *inode = file_inode(iocb->ki_filp); |
| 1456 | struct xfs_inode *ip = XFS_I(inode); | 1542 | struct xfs_inode *ip = XFS_I(inode); |
| 1457 | struct xfs_mount *mp = ip->i_mount; | 1543 | struct xfs_mount *mp = ip->i_mount; |
| 1544 | struct xfs_ioend *ioend = private; | ||
| 1458 | 1545 | ||
| 1459 | if (XFS_FORCED_SHUTDOWN(mp)) | 1546 | trace_xfs_gbmap_direct_endio(ip, offset, size, |
| 1547 | ioend ? ioend->io_type : 0, NULL); | ||
| 1548 | |||
| 1549 | if (!ioend) { | ||
| 1550 | ASSERT(offset + size <= i_size_read(inode)); | ||
| 1460 | return; | 1551 | return; |
| 1552 | } | ||
| 1553 | |||
| 1554 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
| 1555 | goto out_end_io; | ||
| 1461 | 1556 | ||
| 1462 | /* | 1557 | /* |
| 1463 | * While the generic direct I/O code updates the inode size, it does | 1558 | * dio completion end_io functions are only called on writes if more |
| 1464 | * so only after the end_io handler is called, which means our | 1559 | * than 0 bytes was written. |
| 1465 | * end_io handler thinks the on-disk size is outside the in-core | ||
| 1466 | * size. To prevent this just update it a little bit earlier here. | ||
| 1467 | */ | 1560 | */ |
| 1561 | ASSERT(size > 0); | ||
| 1562 | |||
| 1563 | /* | ||
| 1564 | * The ioend only maps whole blocks, while the IO may be sector aligned. | ||
| 1565 | * Hence the ioend offset/size may not match the IO offset/size exactly. | ||
| 1566 | * Because we don't map overwrites within EOF into the ioend, the offset | ||
| 1567 | * may not match, but only if the endio spans EOF. Either way, write | ||
| 1568 | * the IO sizes into the ioend so that completion processing does the | ||
| 1569 | * right thing. | ||
| 1570 | */ | ||
| 1571 | ASSERT(offset + size <= ioend->io_offset + ioend->io_size); | ||
| 1572 | ioend->io_size = size; | ||
| 1573 | ioend->io_offset = offset; | ||
| 1574 | |||
| 1575 | /* | ||
| 1576 | * The ioend tells us whether we are doing unwritten extent conversion | ||
| 1577 | * or an append transaction that updates the on-disk file size. These | ||
| 1578 | * cases are the only cases where we should *potentially* be needing | ||
| 1579 | * to update the VFS inode size. | ||
| 1580 | * | ||
| 1581 | * We need to update the in-core inode size here so that we don't end up | ||
| 1582 | * with the on-disk inode size being outside the in-core inode size. We | ||
| 1583 | * have no other method of updating EOF for AIO, so always do it here | ||
| 1584 | * if necessary. | ||
| 1585 | * | ||
| 1586 | * We need to lock the test/set EOF update as we can be racing with | ||
| 1587 | * other IO completions here to update the EOF. Failing to serialise | ||
| 1588 | * here can result in EOF moving backwards and Bad Things Happen when | ||
| 1589 | * that occurs. | ||
| 1590 | */ | ||
| 1591 | spin_lock(&ip->i_flags_lock); | ||
| 1468 | if (offset + size > i_size_read(inode)) | 1592 | if (offset + size > i_size_read(inode)) |
| 1469 | i_size_write(inode, offset + size); | 1593 | i_size_write(inode, offset + size); |
| 1594 | spin_unlock(&ip->i_flags_lock); | ||
| 1470 | 1595 | ||
| 1471 | /* | 1596 | /* |
| 1472 | * For direct I/O we do not know if we need to allocate blocks or not, | 1597 | * If we are doing an append IO that needs to update the EOF on disk, |
| 1473 | * so we can't preallocate an append transaction, as that results in | 1598 | * do the transaction reserve now so we can use common end io |
| 1474 | * nested reservations and log space deadlocks. Hence allocate the | 1599 | * processing. Stashing the error (if there is one) in the ioend will |
| 1475 | * transaction here. While this is sub-optimal and can block IO | 1600 | * result in the ioend processing passing on the error if it is |
| 1476 | * completion for some time, we're stuck with doing it this way until | 1601 | * possible as we can't return it from here. |
| 1477 | * we can pass the ioend to the direct IO allocation callbacks and | ||
| 1478 | * avoid nesting that way. | ||
| 1479 | */ | 1602 | */ |
| 1480 | if (private && size > 0) { | 1603 | if (ioend->io_type == XFS_IO_OVERWRITE) |
| 1481 | xfs_iomap_write_unwritten(ip, offset, size); | 1604 | ioend->io_error = xfs_setfilesize_trans_alloc(ioend); |
| 1482 | } else if (offset + size > ip->i_d.di_size) { | ||
| 1483 | struct xfs_trans *tp; | ||
| 1484 | int error; | ||
| 1485 | |||
| 1486 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
| 1487 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); | ||
| 1488 | if (error) { | ||
| 1489 | xfs_trans_cancel(tp, 0); | ||
| 1490 | return; | ||
| 1491 | } | ||
| 1492 | 1605 | ||
| 1493 | xfs_setfilesize(ip, tp, offset, size); | 1606 | out_end_io: |
| 1494 | } | 1607 | xfs_end_io(&ioend->io_work); |
| 1608 | return; | ||
| 1495 | } | 1609 | } |
| 1496 | 1610 | ||
| 1497 | STATIC ssize_t | 1611 | STATIC ssize_t |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c203839cd5be..3a5d305e60c9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -569,20 +569,41 @@ restart: | |||
| 569 | * write. If zeroing is needed and we are currently holding the | 569 | * write. If zeroing is needed and we are currently holding the |
| 570 | * iolock shared, we need to update it to exclusive which implies | 570 | * iolock shared, we need to update it to exclusive which implies |
| 571 | * having to redo all checks before. | 571 | * having to redo all checks before. |
| 572 | * | ||
| 573 | * We need to serialise against EOF updates that occur in IO | ||
| 574 | * completions here. We want to make sure that nobody is changing the | ||
| 575 | * size while we do this check until we have placed an IO barrier (i.e. | ||
| 576 | * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. | ||
| 577 | * The spinlock effectively forms a memory barrier once we have the | ||
| 578 | * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value | ||
| 579 | * and hence be able to correctly determine if we need to run zeroing. | ||
| 572 | */ | 580 | */ |
| 581 | spin_lock(&ip->i_flags_lock); | ||
| 573 | if (*pos > i_size_read(inode)) { | 582 | if (*pos > i_size_read(inode)) { |
| 574 | bool zero = false; | 583 | bool zero = false; |
| 575 | 584 | ||
| 585 | spin_unlock(&ip->i_flags_lock); | ||
| 576 | if (*iolock == XFS_IOLOCK_SHARED) { | 586 | if (*iolock == XFS_IOLOCK_SHARED) { |
| 577 | xfs_rw_iunlock(ip, *iolock); | 587 | xfs_rw_iunlock(ip, *iolock); |
| 578 | *iolock = XFS_IOLOCK_EXCL; | 588 | *iolock = XFS_IOLOCK_EXCL; |
| 579 | xfs_rw_ilock(ip, *iolock); | 589 | xfs_rw_ilock(ip, *iolock); |
| 590 | |||
| 591 | /* | ||
| 592 | * We now have an IO submission barrier in place, but | ||
| 593 | * AIO can do EOF updates during IO completion and hence | ||
| 594 | * we now need to wait for all of them to drain. Non-AIO | ||
| 595 | * DIO will have drained before we are given the | ||
| 596 | * XFS_IOLOCK_EXCL, and so for most cases this wait is a | ||
| 597 | * no-op. | ||
| 598 | */ | ||
| 599 | inode_dio_wait(inode); | ||
| 580 | goto restart; | 600 | goto restart; |
| 581 | } | 601 | } |
| 582 | error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); | 602 | error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); |
| 583 | if (error) | 603 | if (error) |
| 584 | return error; | 604 | return error; |
| 585 | } | 605 | } else |
| 606 | spin_unlock(&ip->i_flags_lock); | ||
| 586 | 607 | ||
| 587 | /* | 608 | /* |
| 588 | * Updating the timestamps will grab the ilock again from | 609 | * Updating the timestamps will grab the ilock again from |
| @@ -644,6 +665,8 @@ xfs_file_dio_aio_write( | |||
| 644 | int iolock; | 665 | int iolock; |
| 645 | size_t count = iov_iter_count(from); | 666 | size_t count = iov_iter_count(from); |
| 646 | loff_t pos = iocb->ki_pos; | 667 | loff_t pos = iocb->ki_pos; |
| 668 | loff_t end; | ||
| 669 | struct iov_iter data; | ||
| 647 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | 670 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? |
| 648 | mp->m_rtdev_targp : mp->m_ddev_targp; | 671 | mp->m_rtdev_targp : mp->m_ddev_targp; |
| 649 | 672 | ||
| @@ -683,10 +706,11 @@ xfs_file_dio_aio_write( | |||
| 683 | if (ret) | 706 | if (ret) |
| 684 | goto out; | 707 | goto out; |
| 685 | iov_iter_truncate(from, count); | 708 | iov_iter_truncate(from, count); |
| 709 | end = pos + count - 1; | ||
| 686 | 710 | ||
| 687 | if (mapping->nrpages) { | 711 | if (mapping->nrpages) { |
| 688 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 712 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
| 689 | pos, pos + count - 1); | 713 | pos, end); |
| 690 | if (ret) | 714 | if (ret) |
| 691 | goto out; | 715 | goto out; |
| 692 | /* | 716 | /* |
| @@ -696,7 +720,7 @@ xfs_file_dio_aio_write( | |||
| 696 | */ | 720 | */ |
| 697 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | 721 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, |
| 698 | pos >> PAGE_CACHE_SHIFT, | 722 | pos >> PAGE_CACHE_SHIFT, |
| 699 | (pos + count - 1) >> PAGE_CACHE_SHIFT); | 723 | end >> PAGE_CACHE_SHIFT); |
| 700 | WARN_ON_ONCE(ret); | 724 | WARN_ON_ONCE(ret); |
| 701 | ret = 0; | 725 | ret = 0; |
| 702 | } | 726 | } |
| @@ -713,8 +737,22 @@ xfs_file_dio_aio_write( | |||
| 713 | } | 737 | } |
| 714 | 738 | ||
| 715 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); | 739 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
| 716 | ret = generic_file_direct_write(iocb, from, pos); | ||
| 717 | 740 | ||
| 741 | data = *from; | ||
| 742 | ret = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos); | ||
| 743 | |||
| 744 | /* see generic_file_direct_write() for why this is necessary */ | ||
| 745 | if (mapping->nrpages) { | ||
| 746 | invalidate_inode_pages2_range(mapping, | ||
| 747 | pos >> PAGE_CACHE_SHIFT, | ||
| 748 | end >> PAGE_CACHE_SHIFT); | ||
| 749 | } | ||
| 750 | |||
| 751 | if (ret > 0) { | ||
| 752 | pos += ret; | ||
| 753 | iov_iter_advance(from, ret); | ||
| 754 | iocb->ki_pos = pos; | ||
| 755 | } | ||
| 718 | out: | 756 | out: |
| 719 | xfs_rw_iunlock(ip, iolock); | 757 | xfs_rw_iunlock(ip, iolock); |
| 720 | 758 | ||
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b2a45cc9eceb..615781bf4ee5 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -1221,6 +1221,11 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found); | |||
| 1221 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); | 1221 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
| 1222 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); | 1222 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
| 1223 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | 1223 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); |
| 1224 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct); | ||
| 1225 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); | ||
| 1226 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); | ||
| 1227 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none); | ||
| 1228 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); | ||
| 1224 | 1229 | ||
| 1225 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 1230 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
| 1226 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 1231 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |
