diff options
author | Dave Chinner <david@fromorbit.com> | 2015-04-16 08:13:18 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2015-04-16 08:13:18 -0400 |
commit | 542c311813d5cb2e6f0dfa9557f41c829b8fb6a0 (patch) | |
tree | 573c5644eb966e44112016c9ae86e80251326223 /fs | |
parent | 6a63ef064b2444883ce8b68b0779d0c739d27204 (diff) | |
parent | 0cefb29e6a63727bc7606c47fc538467594ef112 (diff) |
Merge branch 'xfs-dio-extend-fix' into for-next
Conflicts:
fs/xfs/xfs_file.c
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/xfs_aops.c | 270 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 46 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 5 |
3 files changed, 239 insertions, 82 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a9b7a1b8704..598b259fda04 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1233,6 +1233,117 @@ xfs_vm_releasepage( | |||
1233 | return try_to_free_buffers(page); | 1233 | return try_to_free_buffers(page); |
1234 | } | 1234 | } |
1235 | 1235 | ||
1236 | /* | ||
1237 | * When we map a DIO buffer, we may need to attach an ioend that describes the | ||
1238 | * type of write IO we are doing. This passes to the completion function the | ||
1239 | * operations it needs to perform. If the mapping is for an overwrite wholly | ||
1240 | * within the EOF then we don't need an ioend and so we don't allocate one. | ||
1241 | * This avoids the unnecessary overhead of allocating and freeing ioends for | ||
1242 | * workloads that don't require transactions on IO completion. | ||
1243 | * | ||
1244 | * If we get multiple mappings in a single IO, we might be mapping different | ||
1245 | * types. But because the direct IO can only have a single private pointer, we | ||
1246 | * need to ensure that: | ||
1247 | * | ||
1248 | * a) i) the ioend spans the entire region of unwritten mappings; or | ||
1249 | * ii) the ioend spans all the mappings that cross or are beyond EOF; and | ||
1250 | * b) if it contains unwritten extents, it is *permanently* marked as such | ||
1251 | * | ||
1252 | * We could do this by chaining ioends like buffered IO does, but we only | ||
1253 | * actually get one IO completion callback from the direct IO, and that spans | ||
1254 | * the entire IO regardless of how many mappings and IOs are needed to complete | ||
1255 | * the DIO. There is only going to be one reference to the ioend and its life | ||
1256 | * cycle is constrained by the DIO completion code. hence we don't need | ||
1257 | * reference counting here. | ||
1258 | */ | ||
1259 | static void | ||
1260 | xfs_map_direct( | ||
1261 | struct inode *inode, | ||
1262 | struct buffer_head *bh_result, | ||
1263 | struct xfs_bmbt_irec *imap, | ||
1264 | xfs_off_t offset) | ||
1265 | { | ||
1266 | struct xfs_ioend *ioend; | ||
1267 | xfs_off_t size = bh_result->b_size; | ||
1268 | int type; | ||
1269 | |||
1270 | if (ISUNWRITTEN(imap)) | ||
1271 | type = XFS_IO_UNWRITTEN; | ||
1272 | else | ||
1273 | type = XFS_IO_OVERWRITE; | ||
1274 | |||
1275 | trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); | ||
1276 | |||
1277 | if (bh_result->b_private) { | ||
1278 | ioend = bh_result->b_private; | ||
1279 | ASSERT(ioend->io_size > 0); | ||
1280 | ASSERT(offset >= ioend->io_offset); | ||
1281 | if (offset + size > ioend->io_offset + ioend->io_size) | ||
1282 | ioend->io_size = offset - ioend->io_offset + size; | ||
1283 | |||
1284 | if (type == XFS_IO_UNWRITTEN && type != ioend->io_type) | ||
1285 | ioend->io_type = XFS_IO_UNWRITTEN; | ||
1286 | |||
1287 | trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, | ||
1288 | ioend->io_size, ioend->io_type, | ||
1289 | imap); | ||
1290 | } else if (type == XFS_IO_UNWRITTEN || | ||
1291 | offset + size > i_size_read(inode)) { | ||
1292 | ioend = xfs_alloc_ioend(inode, type); | ||
1293 | ioend->io_offset = offset; | ||
1294 | ioend->io_size = size; | ||
1295 | |||
1296 | bh_result->b_private = ioend; | ||
1297 | set_buffer_defer_completion(bh_result); | ||
1298 | |||
1299 | trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, | ||
1300 | imap); | ||
1301 | } else { | ||
1302 | trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, | ||
1303 | imap); | ||
1304 | } | ||
1305 | } | ||
1306 | |||
1307 | /* | ||
1308 | * If this is O_DIRECT or the mpage code calling tell them how large the mapping | ||
1309 | * is, so that we can avoid repeated get_blocks calls. | ||
1310 | * | ||
1311 | * If the mapping spans EOF, then we have to break the mapping up as the mapping | ||
1312 | * for blocks beyond EOF must be marked new so that sub block regions can be | ||
1313 | * correctly zeroed. We can't do this for mappings within EOF unless the mapping | ||
1314 | * was just allocated or is unwritten, otherwise the callers would overwrite | ||
1315 | * existing data with zeros. Hence we have to split the mapping into a range up | ||
1316 | * to and including EOF, and a second mapping for beyond EOF. | ||
1317 | */ | ||
1318 | static void | ||
1319 | xfs_map_trim_size( | ||
1320 | struct inode *inode, | ||
1321 | sector_t iblock, | ||
1322 | struct buffer_head *bh_result, | ||
1323 | struct xfs_bmbt_irec *imap, | ||
1324 | xfs_off_t offset, | ||
1325 | ssize_t size) | ||
1326 | { | ||
1327 | xfs_off_t mapping_size; | ||
1328 | |||
1329 | mapping_size = imap->br_startoff + imap->br_blockcount - iblock; | ||
1330 | mapping_size <<= inode->i_blkbits; | ||
1331 | |||
1332 | ASSERT(mapping_size > 0); | ||
1333 | if (mapping_size > size) | ||
1334 | mapping_size = size; | ||
1335 | if (offset < i_size_read(inode) && | ||
1336 | offset + mapping_size >= i_size_read(inode)) { | ||
1337 | /* limit mapping to block that spans EOF */ | ||
1338 | mapping_size = roundup_64(i_size_read(inode) - offset, | ||
1339 | 1 << inode->i_blkbits); | ||
1340 | } | ||
1341 | if (mapping_size > LONG_MAX) | ||
1342 | mapping_size = LONG_MAX; | ||
1343 | |||
1344 | bh_result->b_size = mapping_size; | ||
1345 | } | ||
1346 | |||
1236 | STATIC int | 1347 | STATIC int |
1237 | __xfs_get_blocks( | 1348 | __xfs_get_blocks( |
1238 | struct inode *inode, | 1349 | struct inode *inode, |
@@ -1321,31 +1432,37 @@ __xfs_get_blocks( | |||
1321 | 1432 | ||
1322 | xfs_iunlock(ip, lockmode); | 1433 | xfs_iunlock(ip, lockmode); |
1323 | } | 1434 | } |
1324 | 1435 | trace_xfs_get_blocks_alloc(ip, offset, size, | |
1325 | trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); | 1436 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN |
1437 | : XFS_IO_DELALLOC, &imap); | ||
1326 | } else if (nimaps) { | 1438 | } else if (nimaps) { |
1327 | trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); | 1439 | trace_xfs_get_blocks_found(ip, offset, size, |
1440 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN | ||
1441 | : XFS_IO_OVERWRITE, &imap); | ||
1328 | xfs_iunlock(ip, lockmode); | 1442 | xfs_iunlock(ip, lockmode); |
1329 | } else { | 1443 | } else { |
1330 | trace_xfs_get_blocks_notfound(ip, offset, size); | 1444 | trace_xfs_get_blocks_notfound(ip, offset, size); |
1331 | goto out_unlock; | 1445 | goto out_unlock; |
1332 | } | 1446 | } |
1333 | 1447 | ||
1448 | /* trim mapping down to size requested */ | ||
1449 | if (direct || size > (1 << inode->i_blkbits)) | ||
1450 | xfs_map_trim_size(inode, iblock, bh_result, | ||
1451 | &imap, offset, size); | ||
1452 | |||
1453 | /* | ||
1454 | * For unwritten extents do not report a disk address in the buffered | ||
1455 | * read case (treat as if we're reading into a hole). | ||
1456 | */ | ||
1334 | if (imap.br_startblock != HOLESTARTBLOCK && | 1457 | if (imap.br_startblock != HOLESTARTBLOCK && |
1335 | imap.br_startblock != DELAYSTARTBLOCK) { | 1458 | imap.br_startblock != DELAYSTARTBLOCK && |
1336 | /* | 1459 | (create || !ISUNWRITTEN(&imap))) { |
1337 | * For unwritten extents do not report a disk address on | 1460 | xfs_map_buffer(inode, bh_result, &imap, offset); |
1338 | * the read case (treat as if we're reading into a hole). | 1461 | if (ISUNWRITTEN(&imap)) |
1339 | */ | ||
1340 | if (create || !ISUNWRITTEN(&imap)) | ||
1341 | xfs_map_buffer(inode, bh_result, &imap, offset); | ||
1342 | if (create && ISUNWRITTEN(&imap)) { | ||
1343 | if (direct) { | ||
1344 | bh_result->b_private = inode; | ||
1345 | set_buffer_defer_completion(bh_result); | ||
1346 | } | ||
1347 | set_buffer_unwritten(bh_result); | 1462 | set_buffer_unwritten(bh_result); |
1348 | } | 1463 | /* direct IO needs special help */ |
1464 | if (create && direct) | ||
1465 | xfs_map_direct(inode, bh_result, &imap, offset); | ||
1349 | } | 1466 | } |
1350 | 1467 | ||
1351 | /* | 1468 | /* |
@@ -1378,39 +1495,6 @@ __xfs_get_blocks( | |||
1378 | } | 1495 | } |
1379 | } | 1496 | } |
1380 | 1497 | ||
1381 | /* | ||
1382 | * If this is O_DIRECT or the mpage code calling tell them how large | ||
1383 | * the mapping is, so that we can avoid repeated get_blocks calls. | ||
1384 | * | ||
1385 | * If the mapping spans EOF, then we have to break the mapping up as the | ||
1386 | * mapping for blocks beyond EOF must be marked new so that sub block | ||
1387 | * regions can be correctly zeroed. We can't do this for mappings within | ||
1388 | * EOF unless the mapping was just allocated or is unwritten, otherwise | ||
1389 | * the callers would overwrite existing data with zeros. Hence we have | ||
1390 | * to split the mapping into a range up to and including EOF, and a | ||
1391 | * second mapping for beyond EOF. | ||
1392 | */ | ||
1393 | if (direct || size > (1 << inode->i_blkbits)) { | ||
1394 | xfs_off_t mapping_size; | ||
1395 | |||
1396 | mapping_size = imap.br_startoff + imap.br_blockcount - iblock; | ||
1397 | mapping_size <<= inode->i_blkbits; | ||
1398 | |||
1399 | ASSERT(mapping_size > 0); | ||
1400 | if (mapping_size > size) | ||
1401 | mapping_size = size; | ||
1402 | if (offset < i_size_read(inode) && | ||
1403 | offset + mapping_size >= i_size_read(inode)) { | ||
1404 | /* limit mapping to block that spans EOF */ | ||
1405 | mapping_size = roundup_64(i_size_read(inode) - offset, | ||
1406 | 1 << inode->i_blkbits); | ||
1407 | } | ||
1408 | if (mapping_size > LONG_MAX) | ||
1409 | mapping_size = LONG_MAX; | ||
1410 | |||
1411 | bh_result->b_size = mapping_size; | ||
1412 | } | ||
1413 | |||
1414 | return 0; | 1498 | return 0; |
1415 | 1499 | ||
1416 | out_unlock: | 1500 | out_unlock: |
@@ -1441,9 +1525,11 @@ xfs_get_blocks_direct( | |||
1441 | /* | 1525 | /* |
1442 | * Complete a direct I/O write request. | 1526 | * Complete a direct I/O write request. |
1443 | * | 1527 | * |
1444 | * If the private argument is non-NULL __xfs_get_blocks signals us that we | 1528 | * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. |
1445 | * need to issue a transaction to convert the range from unwritten to written | 1529 | * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite |
1446 | * extents. | 1530 | * wholly within the EOF and so there is nothing for us to do. Note that in this |
1531 | * case the completion can be called in interrupt context, whereas if we have an | ||
1532 | * ioend we will always be called in task context (i.e. from a workqueue). | ||
1447 | */ | 1533 | */ |
1448 | STATIC void | 1534 | STATIC void |
1449 | xfs_end_io_direct_write( | 1535 | xfs_end_io_direct_write( |
@@ -1455,43 +1541,71 @@ xfs_end_io_direct_write( | |||
1455 | struct inode *inode = file_inode(iocb->ki_filp); | 1541 | struct inode *inode = file_inode(iocb->ki_filp); |
1456 | struct xfs_inode *ip = XFS_I(inode); | 1542 | struct xfs_inode *ip = XFS_I(inode); |
1457 | struct xfs_mount *mp = ip->i_mount; | 1543 | struct xfs_mount *mp = ip->i_mount; |
1544 | struct xfs_ioend *ioend = private; | ||
1458 | 1545 | ||
1459 | if (XFS_FORCED_SHUTDOWN(mp)) | 1546 | trace_xfs_gbmap_direct_endio(ip, offset, size, |
1547 | ioend ? ioend->io_type : 0, NULL); | ||
1548 | |||
1549 | if (!ioend) { | ||
1550 | ASSERT(offset + size <= i_size_read(inode)); | ||
1460 | return; | 1551 | return; |
1552 | } | ||
1553 | |||
1554 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1555 | goto out_end_io; | ||
1461 | 1556 | ||
1462 | /* | 1557 | /* |
1463 | * While the generic direct I/O code updates the inode size, it does | 1558 | * dio completion end_io functions are only called on writes if more |
1464 | * so only after the end_io handler is called, which means our | 1559 | * than 0 bytes was written. |
1465 | * end_io handler thinks the on-disk size is outside the in-core | ||
1466 | * size. To prevent this just update it a little bit earlier here. | ||
1467 | */ | 1560 | */ |
1561 | ASSERT(size > 0); | ||
1562 | |||
1563 | /* | ||
1564 | * The ioend only maps whole blocks, while the IO may be sector aligned. | ||
1565 | * Hence the ioend offset/size may not match the IO offset/size exactly. | ||
1566 | * Because we don't map overwrites within EOF into the ioend, the offset | ||
1567 | * may not match, but only if the endio spans EOF. Either way, write | ||
1568 | * the IO sizes into the ioend so that completion processing does the | ||
1569 | * right thing. | ||
1570 | */ | ||
1571 | ASSERT(offset + size <= ioend->io_offset + ioend->io_size); | ||
1572 | ioend->io_size = size; | ||
1573 | ioend->io_offset = offset; | ||
1574 | |||
1575 | /* | ||
1576 | * The ioend tells us whether we are doing unwritten extent conversion | ||
1577 | * or an append transaction that updates the on-disk file size. These | ||
1578 | * cases are the only cases where we should *potentially* be needing | ||
1579 | * to update the VFS inode size. | ||
1580 | * | ||
1581 | * We need to update the in-core inode size here so that we don't end up | ||
1582 | * with the on-disk inode size being outside the in-core inode size. We | ||
1583 | * have no other method of updating EOF for AIO, so always do it here | ||
1584 | * if necessary. | ||
1585 | * | ||
1586 | * We need to lock the test/set EOF update as we can be racing with | ||
1587 | * other IO completions here to update the EOF. Failing to serialise | ||
1588 | * here can result in EOF moving backwards and Bad Things Happen when | ||
1589 | * that occurs. | ||
1590 | */ | ||
1591 | spin_lock(&ip->i_flags_lock); | ||
1468 | if (offset + size > i_size_read(inode)) | 1592 | if (offset + size > i_size_read(inode)) |
1469 | i_size_write(inode, offset + size); | 1593 | i_size_write(inode, offset + size); |
1594 | spin_unlock(&ip->i_flags_lock); | ||
1470 | 1595 | ||
1471 | /* | 1596 | /* |
1472 | * For direct I/O we do not know if we need to allocate blocks or not, | 1597 | * If we are doing an append IO that needs to update the EOF on disk, |
1473 | * so we can't preallocate an append transaction, as that results in | 1598 | * do the transaction reserve now so we can use common end io |
1474 | * nested reservations and log space deadlocks. Hence allocate the | 1599 | * processing. Stashing the error (if there is one) in the ioend will |
1475 | * transaction here. While this is sub-optimal and can block IO | 1600 | * result in the ioend processing passing on the error if it is |
1476 | * completion for some time, we're stuck with doing it this way until | 1601 | * possible as we can't return it from here. |
1477 | * we can pass the ioend to the direct IO allocation callbacks and | ||
1478 | * avoid nesting that way. | ||
1479 | */ | 1602 | */ |
1480 | if (private && size > 0) { | 1603 | if (ioend->io_type == XFS_IO_OVERWRITE) |
1481 | xfs_iomap_write_unwritten(ip, offset, size); | 1604 | ioend->io_error = xfs_setfilesize_trans_alloc(ioend); |
1482 | } else if (offset + size > ip->i_d.di_size) { | ||
1483 | struct xfs_trans *tp; | ||
1484 | int error; | ||
1485 | |||
1486 | tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); | ||
1487 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); | ||
1488 | if (error) { | ||
1489 | xfs_trans_cancel(tp, 0); | ||
1490 | return; | ||
1491 | } | ||
1492 | 1605 | ||
1493 | xfs_setfilesize(ip, tp, offset, size); | 1606 | out_end_io: |
1494 | } | 1607 | xfs_end_io(&ioend->io_work); |
1608 | return; | ||
1495 | } | 1609 | } |
1496 | 1610 | ||
1497 | STATIC ssize_t | 1611 | STATIC ssize_t |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c203839cd5be..3a5d305e60c9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -569,20 +569,41 @@ restart: | |||
569 | * write. If zeroing is needed and we are currently holding the | 569 | * write. If zeroing is needed and we are currently holding the |
570 | * iolock shared, we need to update it to exclusive which implies | 570 | * iolock shared, we need to update it to exclusive which implies |
571 | * having to redo all checks before. | 571 | * having to redo all checks before. |
572 | * | ||
573 | * We need to serialise against EOF updates that occur in IO | ||
574 | * completions here. We want to make sure that nobody is changing the | ||
575 | * size while we do this check until we have placed an IO barrier (i.e. | ||
576 | * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. | ||
577 | * The spinlock effectively forms a memory barrier once we have the | ||
578 | * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value | ||
579 | * and hence be able to correctly determine if we need to run zeroing. | ||
572 | */ | 580 | */ |
581 | spin_lock(&ip->i_flags_lock); | ||
573 | if (*pos > i_size_read(inode)) { | 582 | if (*pos > i_size_read(inode)) { |
574 | bool zero = false; | 583 | bool zero = false; |
575 | 584 | ||
585 | spin_unlock(&ip->i_flags_lock); | ||
576 | if (*iolock == XFS_IOLOCK_SHARED) { | 586 | if (*iolock == XFS_IOLOCK_SHARED) { |
577 | xfs_rw_iunlock(ip, *iolock); | 587 | xfs_rw_iunlock(ip, *iolock); |
578 | *iolock = XFS_IOLOCK_EXCL; | 588 | *iolock = XFS_IOLOCK_EXCL; |
579 | xfs_rw_ilock(ip, *iolock); | 589 | xfs_rw_ilock(ip, *iolock); |
590 | |||
591 | /* | ||
592 | * We now have an IO submission barrier in place, but | ||
593 | * AIO can do EOF updates during IO completion and hence | ||
594 | * we now need to wait for all of them to drain. Non-AIO | ||
595 | * DIO will have drained before we are given the | ||
596 | * XFS_IOLOCK_EXCL, and so for most cases this wait is a | ||
597 | * no-op. | ||
598 | */ | ||
599 | inode_dio_wait(inode); | ||
580 | goto restart; | 600 | goto restart; |
581 | } | 601 | } |
582 | error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); | 602 | error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); |
583 | if (error) | 603 | if (error) |
584 | return error; | 604 | return error; |
585 | } | 605 | } else |
606 | spin_unlock(&ip->i_flags_lock); | ||
586 | 607 | ||
587 | /* | 608 | /* |
588 | * Updating the timestamps will grab the ilock again from | 609 | * Updating the timestamps will grab the ilock again from |
@@ -644,6 +665,8 @@ xfs_file_dio_aio_write( | |||
644 | int iolock; | 665 | int iolock; |
645 | size_t count = iov_iter_count(from); | 666 | size_t count = iov_iter_count(from); |
646 | loff_t pos = iocb->ki_pos; | 667 | loff_t pos = iocb->ki_pos; |
668 | loff_t end; | ||
669 | struct iov_iter data; | ||
647 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | 670 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? |
648 | mp->m_rtdev_targp : mp->m_ddev_targp; | 671 | mp->m_rtdev_targp : mp->m_ddev_targp; |
649 | 672 | ||
@@ -683,10 +706,11 @@ xfs_file_dio_aio_write( | |||
683 | if (ret) | 706 | if (ret) |
684 | goto out; | 707 | goto out; |
685 | iov_iter_truncate(from, count); | 708 | iov_iter_truncate(from, count); |
709 | end = pos + count - 1; | ||
686 | 710 | ||
687 | if (mapping->nrpages) { | 711 | if (mapping->nrpages) { |
688 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 712 | ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
689 | pos, pos + count - 1); | 713 | pos, end); |
690 | if (ret) | 714 | if (ret) |
691 | goto out; | 715 | goto out; |
692 | /* | 716 | /* |
@@ -696,7 +720,7 @@ xfs_file_dio_aio_write( | |||
696 | */ | 720 | */ |
697 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, | 721 | ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, |
698 | pos >> PAGE_CACHE_SHIFT, | 722 | pos >> PAGE_CACHE_SHIFT, |
699 | (pos + count - 1) >> PAGE_CACHE_SHIFT); | 723 | end >> PAGE_CACHE_SHIFT); |
700 | WARN_ON_ONCE(ret); | 724 | WARN_ON_ONCE(ret); |
701 | ret = 0; | 725 | ret = 0; |
702 | } | 726 | } |
@@ -713,8 +737,22 @@ xfs_file_dio_aio_write( | |||
713 | } | 737 | } |
714 | 738 | ||
715 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); | 739 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
716 | ret = generic_file_direct_write(iocb, from, pos); | ||
717 | 740 | ||
741 | data = *from; | ||
742 | ret = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos); | ||
743 | |||
744 | /* see generic_file_direct_write() for why this is necessary */ | ||
745 | if (mapping->nrpages) { | ||
746 | invalidate_inode_pages2_range(mapping, | ||
747 | pos >> PAGE_CACHE_SHIFT, | ||
748 | end >> PAGE_CACHE_SHIFT); | ||
749 | } | ||
750 | |||
751 | if (ret > 0) { | ||
752 | pos += ret; | ||
753 | iov_iter_advance(from, ret); | ||
754 | iocb->ki_pos = pos; | ||
755 | } | ||
718 | out: | 756 | out: |
719 | xfs_rw_iunlock(ip, iolock); | 757 | xfs_rw_iunlock(ip, iolock); |
720 | 758 | ||
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index b2a45cc9eceb..615781bf4ee5 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -1221,6 +1221,11 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found); | |||
1221 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); | 1221 | DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); |
1222 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); | 1222 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
1223 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | 1223 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); |
1224 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct); | ||
1225 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); | ||
1226 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); | ||
1227 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none); | ||
1228 | DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); | ||
1224 | 1229 | ||
1225 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 1230 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
1226 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 1231 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |