diff options
Diffstat (limited to 'fs/xfs/xfs_aops.c')
| -rw-r--r-- | fs/xfs/xfs_aops.c | 283 |
1 files changed, 19 insertions, 264 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4c463b99fe57..80714ebd54c0 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -1143,6 +1143,8 @@ __xfs_get_blocks( | |||
| 1143 | ssize_t size; | 1143 | ssize_t size; |
| 1144 | int new = 0; | 1144 | int new = 0; |
| 1145 | 1145 | ||
| 1146 | BUG_ON(create && !direct); | ||
| 1147 | |||
| 1146 | if (XFS_FORCED_SHUTDOWN(mp)) | 1148 | if (XFS_FORCED_SHUTDOWN(mp)) |
| 1147 | return -EIO; | 1149 | return -EIO; |
| 1148 | 1150 | ||
| @@ -1150,22 +1152,14 @@ __xfs_get_blocks( | |||
| 1150 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1152 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
| 1151 | size = bh_result->b_size; | 1153 | size = bh_result->b_size; |
| 1152 | 1154 | ||
| 1153 | if (!create && direct && offset >= i_size_read(inode)) | 1155 | if (!create && offset >= i_size_read(inode)) |
| 1154 | return 0; | 1156 | return 0; |
| 1155 | 1157 | ||
| 1156 | /* | 1158 | /* |
| 1157 | * Direct I/O is usually done on preallocated files, so try getting | 1159 | * Direct I/O is usually done on preallocated files, so try getting |
| 1158 | * a block mapping without an exclusive lock first. For buffered | 1160 | * a block mapping without an exclusive lock first. |
| 1159 | * writes we already have the exclusive iolock anyway, so avoiding | ||
| 1160 | * a lock roundtrip here by taking the ilock exclusive from the | ||
| 1161 | * beginning is a useful micro optimization. | ||
| 1162 | */ | 1161 | */ |
| 1163 | if (create && !direct) { | 1162 | lockmode = xfs_ilock_data_map_shared(ip); |
| 1164 | lockmode = XFS_ILOCK_EXCL; | ||
| 1165 | xfs_ilock(ip, lockmode); | ||
| 1166 | } else { | ||
| 1167 | lockmode = xfs_ilock_data_map_shared(ip); | ||
| 1168 | } | ||
| 1169 | 1163 | ||
| 1170 | ASSERT(offset <= mp->m_super->s_maxbytes); | 1164 | ASSERT(offset <= mp->m_super->s_maxbytes); |
| 1171 | if (offset + size > mp->m_super->s_maxbytes) | 1165 | if (offset + size > mp->m_super->s_maxbytes) |
| @@ -1184,37 +1178,19 @@ __xfs_get_blocks( | |||
| 1184 | (imap.br_startblock == HOLESTARTBLOCK || | 1178 | (imap.br_startblock == HOLESTARTBLOCK || |
| 1185 | imap.br_startblock == DELAYSTARTBLOCK) || | 1179 | imap.br_startblock == DELAYSTARTBLOCK) || |
| 1186 | (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { | 1180 | (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { |
| 1187 | if (direct || xfs_get_extsz_hint(ip)) { | 1181 | /* |
| 1188 | /* | 1182 | * xfs_iomap_write_direct() expects the shared lock. It |
| 1189 | * xfs_iomap_write_direct() expects the shared lock. It | 1183 | * is unlocked on return. |
| 1190 | * is unlocked on return. | 1184 | */ |
| 1191 | */ | 1185 | if (lockmode == XFS_ILOCK_EXCL) |
| 1192 | if (lockmode == XFS_ILOCK_EXCL) | 1186 | xfs_ilock_demote(ip, lockmode); |
| 1193 | xfs_ilock_demote(ip, lockmode); | ||
| 1194 | |||
| 1195 | error = xfs_iomap_write_direct(ip, offset, size, | ||
| 1196 | &imap, nimaps); | ||
| 1197 | if (error) | ||
| 1198 | return error; | ||
| 1199 | new = 1; | ||
| 1200 | 1187 | ||
| 1201 | } else { | 1188 | error = xfs_iomap_write_direct(ip, offset, size, |
| 1202 | /* | 1189 | &imap, nimaps); |
| 1203 | * Delalloc reservations do not require a transaction, | 1190 | if (error) |
| 1204 | * we can go on without dropping the lock here. If we | 1191 | return error; |
| 1205 | * are allocating a new delalloc block, make sure that | 1192 | new = 1; |
| 1206 | * we set the new flag so that we mark the buffer new so | ||
| 1207 | * that we know that it is newly allocated if the write | ||
| 1208 | * fails. | ||
| 1209 | */ | ||
| 1210 | if (nimaps && imap.br_startblock == HOLESTARTBLOCK) | ||
| 1211 | new = 1; | ||
| 1212 | error = xfs_iomap_write_delay(ip, offset, size, &imap); | ||
| 1213 | if (error) | ||
| 1214 | goto out_unlock; | ||
| 1215 | 1193 | ||
| 1216 | xfs_iunlock(ip, lockmode); | ||
| 1217 | } | ||
| 1218 | trace_xfs_get_blocks_alloc(ip, offset, size, | 1194 | trace_xfs_get_blocks_alloc(ip, offset, size, |
| 1219 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN | 1195 | ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN |
| 1220 | : XFS_IO_DELALLOC, &imap); | 1196 | : XFS_IO_DELALLOC, &imap); |
| @@ -1235,9 +1211,7 @@ __xfs_get_blocks( | |||
| 1235 | } | 1211 | } |
| 1236 | 1212 | ||
| 1237 | /* trim mapping down to size requested */ | 1213 | /* trim mapping down to size requested */ |
| 1238 | if (direct || size > (1 << inode->i_blkbits)) | 1214 | xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); |
| 1239 | xfs_map_trim_size(inode, iblock, bh_result, | ||
| 1240 | &imap, offset, size); | ||
| 1241 | 1215 | ||
| 1242 | /* | 1216 | /* |
| 1243 | * For unwritten extents do not report a disk address in the buffered | 1217 | * For unwritten extents do not report a disk address in the buffered |
| @@ -1250,7 +1224,7 @@ __xfs_get_blocks( | |||
| 1250 | if (ISUNWRITTEN(&imap)) | 1224 | if (ISUNWRITTEN(&imap)) |
| 1251 | set_buffer_unwritten(bh_result); | 1225 | set_buffer_unwritten(bh_result); |
| 1252 | /* direct IO needs special help */ | 1226 | /* direct IO needs special help */ |
| 1253 | if (create && direct) { | 1227 | if (create) { |
| 1254 | if (dax_fault) | 1228 | if (dax_fault) |
| 1255 | ASSERT(!ISUNWRITTEN(&imap)); | 1229 | ASSERT(!ISUNWRITTEN(&imap)); |
| 1256 | else | 1230 | else |
| @@ -1279,14 +1253,7 @@ __xfs_get_blocks( | |||
| 1279 | (new || ISUNWRITTEN(&imap)))) | 1253 | (new || ISUNWRITTEN(&imap)))) |
| 1280 | set_buffer_new(bh_result); | 1254 | set_buffer_new(bh_result); |
| 1281 | 1255 | ||
| 1282 | if (imap.br_startblock == DELAYSTARTBLOCK) { | 1256 | BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); |
| 1283 | BUG_ON(direct); | ||
| 1284 | if (create) { | ||
| 1285 | set_buffer_uptodate(bh_result); | ||
| 1286 | set_buffer_mapped(bh_result); | ||
| 1287 | set_buffer_delay(bh_result); | ||
| 1288 | } | ||
| 1289 | } | ||
| 1290 | 1257 | ||
| 1291 | return 0; | 1258 | return 0; |
| 1292 | 1259 | ||
| @@ -1427,216 +1394,6 @@ xfs_vm_direct_IO( | |||
| 1427 | xfs_get_blocks_direct, endio, NULL, flags); | 1394 | xfs_get_blocks_direct, endio, NULL, flags); |
| 1428 | } | 1395 | } |
| 1429 | 1396 | ||
| 1430 | /* | ||
| 1431 | * Punch out the delalloc blocks we have already allocated. | ||
| 1432 | * | ||
| 1433 | * Don't bother with xfs_setattr given that nothing can have made it to disk yet | ||
| 1434 | * as the page is still locked at this point. | ||
| 1435 | */ | ||
| 1436 | STATIC void | ||
| 1437 | xfs_vm_kill_delalloc_range( | ||
| 1438 | struct inode *inode, | ||
| 1439 | loff_t start, | ||
| 1440 | loff_t end) | ||
| 1441 | { | ||
| 1442 | struct xfs_inode *ip = XFS_I(inode); | ||
| 1443 | xfs_fileoff_t start_fsb; | ||
| 1444 | xfs_fileoff_t end_fsb; | ||
| 1445 | int error; | ||
| 1446 | |||
| 1447 | start_fsb = XFS_B_TO_FSB(ip->i_mount, start); | ||
| 1448 | end_fsb = XFS_B_TO_FSB(ip->i_mount, end); | ||
| 1449 | if (end_fsb <= start_fsb) | ||
| 1450 | return; | ||
| 1451 | |||
| 1452 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 1453 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
| 1454 | end_fsb - start_fsb); | ||
| 1455 | if (error) { | ||
| 1456 | /* something screwed, just bail */ | ||
| 1457 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
| 1458 | xfs_alert(ip->i_mount, | ||
| 1459 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
| 1460 | ip->i_ino); | ||
| 1461 | } | ||
| 1462 | } | ||
| 1463 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | STATIC void | ||
| 1467 | xfs_vm_write_failed( | ||
| 1468 | struct inode *inode, | ||
| 1469 | struct page *page, | ||
| 1470 | loff_t pos, | ||
| 1471 | unsigned len) | ||
| 1472 | { | ||
| 1473 | loff_t block_offset; | ||
| 1474 | loff_t block_start; | ||
| 1475 | loff_t block_end; | ||
| 1476 | loff_t from = pos & (PAGE_SIZE - 1); | ||
| 1477 | loff_t to = from + len; | ||
| 1478 | struct buffer_head *bh, *head; | ||
| 1479 | struct xfs_mount *mp = XFS_I(inode)->i_mount; | ||
| 1480 | |||
| 1481 | /* | ||
| 1482 | * The request pos offset might be 32 or 64 bit, this is all fine | ||
| 1483 | * on 64-bit platform. However, for 64-bit pos request on 32-bit | ||
| 1484 | * platform, the high 32-bit will be masked off if we evaluate the | ||
| 1485 | * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is | ||
| 1486 | * 0xfffff000 as an unsigned long, hence the result is incorrect | ||
| 1487 | * which could cause the following ASSERT failed in most cases. | ||
| 1488 | * In order to avoid this, we can evaluate the block_offset of the | ||
| 1489 | * start of the page by using shifts rather than masks the mismatch | ||
| 1490 | * problem. | ||
| 1491 | */ | ||
| 1492 | block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT; | ||
| 1493 | |||
| 1494 | ASSERT(block_offset + from == pos); | ||
| 1495 | |||
| 1496 | head = page_buffers(page); | ||
| 1497 | block_start = 0; | ||
| 1498 | for (bh = head; bh != head || !block_start; | ||
| 1499 | bh = bh->b_this_page, block_start = block_end, | ||
| 1500 | block_offset += bh->b_size) { | ||
| 1501 | block_end = block_start + bh->b_size; | ||
| 1502 | |||
| 1503 | /* skip buffers before the write */ | ||
| 1504 | if (block_end <= from) | ||
| 1505 | continue; | ||
| 1506 | |||
| 1507 | /* if the buffer is after the write, we're done */ | ||
| 1508 | if (block_start >= to) | ||
| 1509 | break; | ||
| 1510 | |||
| 1511 | /* | ||
| 1512 | * Process delalloc and unwritten buffers beyond EOF. We can | ||
| 1513 | * encounter unwritten buffers in the event that a file has | ||
| 1514 | * post-EOF unwritten extents and an extending write happens to | ||
| 1515 | * fail (e.g., an unaligned write that also involves a delalloc | ||
| 1516 | * to the same page). | ||
| 1517 | */ | ||
| 1518 | if (!buffer_delay(bh) && !buffer_unwritten(bh)) | ||
| 1519 | continue; | ||
| 1520 | |||
| 1521 | if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) && | ||
| 1522 | block_offset < i_size_read(inode)) | ||
| 1523 | continue; | ||
| 1524 | |||
| 1525 | if (buffer_delay(bh)) | ||
| 1526 | xfs_vm_kill_delalloc_range(inode, block_offset, | ||
| 1527 | block_offset + bh->b_size); | ||
| 1528 | |||
| 1529 | /* | ||
| 1530 | * This buffer does not contain data anymore. make sure anyone | ||
| 1531 | * who finds it knows that for certain. | ||
| 1532 | */ | ||
| 1533 | clear_buffer_delay(bh); | ||
| 1534 | clear_buffer_uptodate(bh); | ||
| 1535 | clear_buffer_mapped(bh); | ||
| 1536 | clear_buffer_new(bh); | ||
| 1537 | clear_buffer_dirty(bh); | ||
| 1538 | clear_buffer_unwritten(bh); | ||
| 1539 | } | ||
| 1540 | |||
| 1541 | } | ||
| 1542 | |||
| 1543 | /* | ||
| 1544 | * This used to call block_write_begin(), but it unlocks and releases the page | ||
| 1545 | * on error, and we need that page to be able to punch stale delalloc blocks out | ||
| 1546 | * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at | ||
| 1547 | * the appropriate point. | ||
| 1548 | */ | ||
| 1549 | STATIC int | ||
| 1550 | xfs_vm_write_begin( | ||
| 1551 | struct file *file, | ||
| 1552 | struct address_space *mapping, | ||
| 1553 | loff_t pos, | ||
| 1554 | unsigned len, | ||
| 1555 | unsigned flags, | ||
| 1556 | struct page **pagep, | ||
| 1557 | void **fsdata) | ||
| 1558 | { | ||
| 1559 | pgoff_t index = pos >> PAGE_SHIFT; | ||
| 1560 | struct page *page; | ||
| 1561 | int status; | ||
| 1562 | struct xfs_mount *mp = XFS_I(mapping->host)->i_mount; | ||
| 1563 | |||
| 1564 | ASSERT(len <= PAGE_SIZE); | ||
| 1565 | |||
| 1566 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
| 1567 | if (!page) | ||
| 1568 | return -ENOMEM; | ||
| 1569 | |||
| 1570 | status = __block_write_begin(page, pos, len, xfs_get_blocks); | ||
| 1571 | if (xfs_mp_fail_writes(mp)) | ||
| 1572 | status = -EIO; | ||
| 1573 | if (unlikely(status)) { | ||
| 1574 | struct inode *inode = mapping->host; | ||
| 1575 | size_t isize = i_size_read(inode); | ||
| 1576 | |||
| 1577 | xfs_vm_write_failed(inode, page, pos, len); | ||
| 1578 | unlock_page(page); | ||
| 1579 | |||
| 1580 | /* | ||
| 1581 | * If the write is beyond EOF, we only want to kill blocks | ||
| 1582 | * allocated in this write, not blocks that were previously | ||
| 1583 | * written successfully. | ||
| 1584 | */ | ||
| 1585 | if (xfs_mp_fail_writes(mp)) | ||
| 1586 | isize = 0; | ||
| 1587 | if (pos + len > isize) { | ||
| 1588 | ssize_t start = max_t(ssize_t, pos, isize); | ||
| 1589 | |||
| 1590 | truncate_pagecache_range(inode, start, pos + len); | ||
| 1591 | } | ||
| 1592 | |||
| 1593 | put_page(page); | ||
| 1594 | page = NULL; | ||
| 1595 | } | ||
| 1596 | |||
| 1597 | *pagep = page; | ||
| 1598 | return status; | ||
| 1599 | } | ||
| 1600 | |||
| 1601 | /* | ||
| 1602 | * On failure, we only need to kill delalloc blocks beyond EOF in the range of | ||
| 1603 | * this specific write because they will never be written. Previous writes | ||
| 1604 | * beyond EOF where block allocation succeeded do not need to be trashed, so | ||
| 1605 | * only new blocks from this write should be trashed. For blocks within | ||
| 1606 | * EOF, generic_write_end() zeros them so they are safe to leave alone and be | ||
| 1607 | * written with all the other valid data. | ||
| 1608 | */ | ||
| 1609 | STATIC int | ||
| 1610 | xfs_vm_write_end( | ||
| 1611 | struct file *file, | ||
| 1612 | struct address_space *mapping, | ||
| 1613 | loff_t pos, | ||
| 1614 | unsigned len, | ||
| 1615 | unsigned copied, | ||
| 1616 | struct page *page, | ||
| 1617 | void *fsdata) | ||
| 1618 | { | ||
| 1619 | int ret; | ||
| 1620 | |||
| 1621 | ASSERT(len <= PAGE_SIZE); | ||
| 1622 | |||
| 1623 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
| 1624 | if (unlikely(ret < len)) { | ||
| 1625 | struct inode *inode = mapping->host; | ||
| 1626 | size_t isize = i_size_read(inode); | ||
| 1627 | loff_t to = pos + len; | ||
| 1628 | |||
| 1629 | if (to > isize) { | ||
| 1630 | /* only kill blocks in this write beyond EOF */ | ||
| 1631 | if (pos > isize) | ||
| 1632 | isize = pos; | ||
| 1633 | xfs_vm_kill_delalloc_range(inode, isize, to); | ||
| 1634 | truncate_pagecache_range(inode, isize, to); | ||
| 1635 | } | ||
| 1636 | } | ||
| 1637 | return ret; | ||
| 1638 | } | ||
| 1639 | |||
| 1640 | STATIC sector_t | 1397 | STATIC sector_t |
| 1641 | xfs_vm_bmap( | 1398 | xfs_vm_bmap( |
| 1642 | struct address_space *mapping, | 1399 | struct address_space *mapping, |
| @@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = { | |||
| 1747 | .set_page_dirty = xfs_vm_set_page_dirty, | 1504 | .set_page_dirty = xfs_vm_set_page_dirty, |
| 1748 | .releasepage = xfs_vm_releasepage, | 1505 | .releasepage = xfs_vm_releasepage, |
| 1749 | .invalidatepage = xfs_vm_invalidatepage, | 1506 | .invalidatepage = xfs_vm_invalidatepage, |
| 1750 | .write_begin = xfs_vm_write_begin, | ||
| 1751 | .write_end = xfs_vm_write_end, | ||
| 1752 | .bmap = xfs_vm_bmap, | 1507 | .bmap = xfs_vm_bmap, |
| 1753 | .direct_IO = xfs_vm_direct_IO, | 1508 | .direct_IO = xfs_vm_direct_IO, |
| 1754 | .migratepage = buffer_migrate_page, | 1509 | .migratepage = buffer_migrate_page, |
