aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_aops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r--fs/xfs/xfs_aops.c283
1 files changed, 19 insertions, 264 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4c463b99fe57..80714ebd54c0 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1143,6 +1143,8 @@ __xfs_get_blocks(
1143 ssize_t size; 1143 ssize_t size;
1144 int new = 0; 1144 int new = 0;
1145 1145
1146 BUG_ON(create && !direct);
1147
1146 if (XFS_FORCED_SHUTDOWN(mp)) 1148 if (XFS_FORCED_SHUTDOWN(mp))
1147 return -EIO; 1149 return -EIO;
1148 1150
@@ -1150,22 +1152,14 @@ __xfs_get_blocks(
1150 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1152 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1151 size = bh_result->b_size; 1153 size = bh_result->b_size;
1152 1154
1153 if (!create && direct && offset >= i_size_read(inode)) 1155 if (!create && offset >= i_size_read(inode))
1154 return 0; 1156 return 0;
1155 1157
1156 /* 1158 /*
1157 * Direct I/O is usually done on preallocated files, so try getting 1159 * Direct I/O is usually done on preallocated files, so try getting
1158 * a block mapping without an exclusive lock first. For buffered 1160 * a block mapping without an exclusive lock first.
1159 * writes we already have the exclusive iolock anyway, so avoiding
1160 * a lock roundtrip here by taking the ilock exclusive from the
1161 * beginning is a useful micro optimization.
1162 */ 1161 */
1163 if (create && !direct) { 1162 lockmode = xfs_ilock_data_map_shared(ip);
1164 lockmode = XFS_ILOCK_EXCL;
1165 xfs_ilock(ip, lockmode);
1166 } else {
1167 lockmode = xfs_ilock_data_map_shared(ip);
1168 }
1169 1163
1170 ASSERT(offset <= mp->m_super->s_maxbytes); 1164 ASSERT(offset <= mp->m_super->s_maxbytes);
1171 if (offset + size > mp->m_super->s_maxbytes) 1165 if (offset + size > mp->m_super->s_maxbytes)
@@ -1184,37 +1178,19 @@ __xfs_get_blocks(
1184 (imap.br_startblock == HOLESTARTBLOCK || 1178 (imap.br_startblock == HOLESTARTBLOCK ||
1185 imap.br_startblock == DELAYSTARTBLOCK) || 1179 imap.br_startblock == DELAYSTARTBLOCK) ||
1186 (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { 1180 (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
1187 if (direct || xfs_get_extsz_hint(ip)) { 1181 /*
1188 /* 1182 * xfs_iomap_write_direct() expects the shared lock. It
1189 * xfs_iomap_write_direct() expects the shared lock. It 1183 * is unlocked on return.
1190 * is unlocked on return. 1184 */
1191 */ 1185 if (lockmode == XFS_ILOCK_EXCL)
1192 if (lockmode == XFS_ILOCK_EXCL) 1186 xfs_ilock_demote(ip, lockmode);
1193 xfs_ilock_demote(ip, lockmode);
1194
1195 error = xfs_iomap_write_direct(ip, offset, size,
1196 &imap, nimaps);
1197 if (error)
1198 return error;
1199 new = 1;
1200 1187
1201 } else { 1188 error = xfs_iomap_write_direct(ip, offset, size,
1202 /* 1189 &imap, nimaps);
1203 * Delalloc reservations do not require a transaction, 1190 if (error)
1204 * we can go on without dropping the lock here. If we 1191 return error;
1205 * are allocating a new delalloc block, make sure that 1192 new = 1;
1206 * we set the new flag so that we mark the buffer new so
1207 * that we know that it is newly allocated if the write
1208 * fails.
1209 */
1210 if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
1211 new = 1;
1212 error = xfs_iomap_write_delay(ip, offset, size, &imap);
1213 if (error)
1214 goto out_unlock;
1215 1193
1216 xfs_iunlock(ip, lockmode);
1217 }
1218 trace_xfs_get_blocks_alloc(ip, offset, size, 1194 trace_xfs_get_blocks_alloc(ip, offset, size,
1219 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1195 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1220 : XFS_IO_DELALLOC, &imap); 1196 : XFS_IO_DELALLOC, &imap);
@@ -1235,9 +1211,7 @@ __xfs_get_blocks(
1235 } 1211 }
1236 1212
1237 /* trim mapping down to size requested */ 1213 /* trim mapping down to size requested */
1238 if (direct || size > (1 << inode->i_blkbits)) 1214 xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
1239 xfs_map_trim_size(inode, iblock, bh_result,
1240 &imap, offset, size);
1241 1215
1242 /* 1216 /*
1243 * For unwritten extents do not report a disk address in the buffered 1217 * For unwritten extents do not report a disk address in the buffered
@@ -1250,7 +1224,7 @@ __xfs_get_blocks(
1250 if (ISUNWRITTEN(&imap)) 1224 if (ISUNWRITTEN(&imap))
1251 set_buffer_unwritten(bh_result); 1225 set_buffer_unwritten(bh_result);
1252 /* direct IO needs special help */ 1226 /* direct IO needs special help */
1253 if (create && direct) { 1227 if (create) {
1254 if (dax_fault) 1228 if (dax_fault)
1255 ASSERT(!ISUNWRITTEN(&imap)); 1229 ASSERT(!ISUNWRITTEN(&imap));
1256 else 1230 else
@@ -1279,14 +1253,7 @@ __xfs_get_blocks(
1279 (new || ISUNWRITTEN(&imap)))) 1253 (new || ISUNWRITTEN(&imap))))
1280 set_buffer_new(bh_result); 1254 set_buffer_new(bh_result);
1281 1255
1282 if (imap.br_startblock == DELAYSTARTBLOCK) { 1256 BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
1283 BUG_ON(direct);
1284 if (create) {
1285 set_buffer_uptodate(bh_result);
1286 set_buffer_mapped(bh_result);
1287 set_buffer_delay(bh_result);
1288 }
1289 }
1290 1257
1291 return 0; 1258 return 0;
1292 1259
@@ -1427,216 +1394,6 @@ xfs_vm_direct_IO(
1427 xfs_get_blocks_direct, endio, NULL, flags); 1394 xfs_get_blocks_direct, endio, NULL, flags);
1428} 1395}
1429 1396
1430/*
1431 * Punch out the delalloc blocks we have already allocated.
1432 *
1433 * Don't bother with xfs_setattr given that nothing can have made it to disk yet
1434 * as the page is still locked at this point.
1435 */
1436STATIC void
1437xfs_vm_kill_delalloc_range(
1438 struct inode *inode,
1439 loff_t start,
1440 loff_t end)
1441{
1442 struct xfs_inode *ip = XFS_I(inode);
1443 xfs_fileoff_t start_fsb;
1444 xfs_fileoff_t end_fsb;
1445 int error;
1446
1447 start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
1448 end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
1449 if (end_fsb <= start_fsb)
1450 return;
1451
1452 xfs_ilock(ip, XFS_ILOCK_EXCL);
1453 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1454 end_fsb - start_fsb);
1455 if (error) {
1456 /* something screwed, just bail */
1457 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1458 xfs_alert(ip->i_mount,
1459 "xfs_vm_write_failed: unable to clean up ino %lld",
1460 ip->i_ino);
1461 }
1462 }
1463 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1464}
1465
1466STATIC void
1467xfs_vm_write_failed(
1468 struct inode *inode,
1469 struct page *page,
1470 loff_t pos,
1471 unsigned len)
1472{
1473 loff_t block_offset;
1474 loff_t block_start;
1475 loff_t block_end;
1476 loff_t from = pos & (PAGE_SIZE - 1);
1477 loff_t to = from + len;
1478 struct buffer_head *bh, *head;
1479 struct xfs_mount *mp = XFS_I(inode)->i_mount;
1480
1481 /*
1482 * The request pos offset might be 32 or 64 bit, this is all fine
1483 * on 64-bit platform. However, for 64-bit pos request on 32-bit
1484 * platform, the high 32-bit will be masked off if we evaluate the
1485 * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
1486 * 0xfffff000 as an unsigned long, hence the result is incorrect
1487 * which could cause the following ASSERT failed in most cases.
1488 * In order to avoid this, we can evaluate the block_offset of the
1489 * start of the page by using shifts rather than masks the mismatch
1490 * problem.
1491 */
1492 block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
1493
1494 ASSERT(block_offset + from == pos);
1495
1496 head = page_buffers(page);
1497 block_start = 0;
1498 for (bh = head; bh != head || !block_start;
1499 bh = bh->b_this_page, block_start = block_end,
1500 block_offset += bh->b_size) {
1501 block_end = block_start + bh->b_size;
1502
1503 /* skip buffers before the write */
1504 if (block_end <= from)
1505 continue;
1506
1507 /* if the buffer is after the write, we're done */
1508 if (block_start >= to)
1509 break;
1510
1511 /*
1512 * Process delalloc and unwritten buffers beyond EOF. We can
1513 * encounter unwritten buffers in the event that a file has
1514 * post-EOF unwritten extents and an extending write happens to
1515 * fail (e.g., an unaligned write that also involves a delalloc
1516 * to the same page).
1517 */
1518 if (!buffer_delay(bh) && !buffer_unwritten(bh))
1519 continue;
1520
1521 if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
1522 block_offset < i_size_read(inode))
1523 continue;
1524
1525 if (buffer_delay(bh))
1526 xfs_vm_kill_delalloc_range(inode, block_offset,
1527 block_offset + bh->b_size);
1528
1529 /*
1530 * This buffer does not contain data anymore. make sure anyone
1531 * who finds it knows that for certain.
1532 */
1533 clear_buffer_delay(bh);
1534 clear_buffer_uptodate(bh);
1535 clear_buffer_mapped(bh);
1536 clear_buffer_new(bh);
1537 clear_buffer_dirty(bh);
1538 clear_buffer_unwritten(bh);
1539 }
1540
1541}
1542
1543/*
1544 * This used to call block_write_begin(), but it unlocks and releases the page
1545 * on error, and we need that page to be able to punch stale delalloc blocks out
1546 * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
1547 * the appropriate point.
1548 */
1549STATIC int
1550xfs_vm_write_begin(
1551 struct file *file,
1552 struct address_space *mapping,
1553 loff_t pos,
1554 unsigned len,
1555 unsigned flags,
1556 struct page **pagep,
1557 void **fsdata)
1558{
1559 pgoff_t index = pos >> PAGE_SHIFT;
1560 struct page *page;
1561 int status;
1562 struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
1563
1564 ASSERT(len <= PAGE_SIZE);
1565
1566 page = grab_cache_page_write_begin(mapping, index, flags);
1567 if (!page)
1568 return -ENOMEM;
1569
1570 status = __block_write_begin(page, pos, len, xfs_get_blocks);
1571 if (xfs_mp_fail_writes(mp))
1572 status = -EIO;
1573 if (unlikely(status)) {
1574 struct inode *inode = mapping->host;
1575 size_t isize = i_size_read(inode);
1576
1577 xfs_vm_write_failed(inode, page, pos, len);
1578 unlock_page(page);
1579
1580 /*
1581 * If the write is beyond EOF, we only want to kill blocks
1582 * allocated in this write, not blocks that were previously
1583 * written successfully.
1584 */
1585 if (xfs_mp_fail_writes(mp))
1586 isize = 0;
1587 if (pos + len > isize) {
1588 ssize_t start = max_t(ssize_t, pos, isize);
1589
1590 truncate_pagecache_range(inode, start, pos + len);
1591 }
1592
1593 put_page(page);
1594 page = NULL;
1595 }
1596
1597 *pagep = page;
1598 return status;
1599}
1600
1601/*
1602 * On failure, we only need to kill delalloc blocks beyond EOF in the range of
1603 * this specific write because they will never be written. Previous writes
1604 * beyond EOF where block allocation succeeded do not need to be trashed, so
1605 * only new blocks from this write should be trashed. For blocks within
1606 * EOF, generic_write_end() zeros them so they are safe to leave alone and be
1607 * written with all the other valid data.
1608 */
1609STATIC int
1610xfs_vm_write_end(
1611 struct file *file,
1612 struct address_space *mapping,
1613 loff_t pos,
1614 unsigned len,
1615 unsigned copied,
1616 struct page *page,
1617 void *fsdata)
1618{
1619 int ret;
1620
1621 ASSERT(len <= PAGE_SIZE);
1622
1623 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1624 if (unlikely(ret < len)) {
1625 struct inode *inode = mapping->host;
1626 size_t isize = i_size_read(inode);
1627 loff_t to = pos + len;
1628
1629 if (to > isize) {
1630 /* only kill blocks in this write beyond EOF */
1631 if (pos > isize)
1632 isize = pos;
1633 xfs_vm_kill_delalloc_range(inode, isize, to);
1634 truncate_pagecache_range(inode, isize, to);
1635 }
1636 }
1637 return ret;
1638}
1639
1640STATIC sector_t 1397STATIC sector_t
1641xfs_vm_bmap( 1398xfs_vm_bmap(
1642 struct address_space *mapping, 1399 struct address_space *mapping,
@@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = {
1747 .set_page_dirty = xfs_vm_set_page_dirty, 1504 .set_page_dirty = xfs_vm_set_page_dirty,
1748 .releasepage = xfs_vm_releasepage, 1505 .releasepage = xfs_vm_releasepage,
1749 .invalidatepage = xfs_vm_invalidatepage, 1506 .invalidatepage = xfs_vm_invalidatepage,
1750 .write_begin = xfs_vm_write_begin,
1751 .write_end = xfs_vm_write_end,
1752 .bmap = xfs_vm_bmap, 1507 .bmap = xfs_vm_bmap,
1753 .direct_IO = xfs_vm_direct_IO, 1508 .direct_IO = xfs_vm_direct_IO,
1754 .migratepage = buffer_migrate_page, 1509 .migratepage = buffer_migrate_page,