aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2016-06-20 20:10:38 -0400
committerDave Chinner <david@fromorbit.com>2016-06-20 20:10:38 -0400
commit9b7fad20760b8f47730f0353459dd39a89c415b9 (patch)
treea0465f6d4bb6dceaf3a6412e1c30b31096fdf712
parent07931b7be70916055b882c6a379a3016f5772681 (diff)
parent3c2bdc912a1cc050db7e858aabe564cb382c9c30 (diff)
Merge branch 'xfs-4.8-iomap-write' into for-next
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/xfs_aops.c283
-rw-r--r--fs/xfs/xfs_bmap_util.c343
-rw-r--r--fs/xfs/xfs_file.c193
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_iomap.c171
-rw-r--r--fs/xfs/xfs_iomap.h7
-rw-r--r--fs/xfs/xfs_iops.c113
-rw-r--r--fs/xfs/xfs_pnfs.c26
-rw-r--r--fs/xfs/xfs_trace.h3
10 files changed, 367 insertions, 776 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5d47b4df61ea..35faf128f36d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -4,6 +4,7 @@ config XFS_FS
4 depends on (64BIT || LBDAF) 4 depends on (64BIT || LBDAF)
5 select EXPORTFS 5 select EXPORTFS
6 select LIBCRC32C 6 select LIBCRC32C
7 select FS_IOMAP
7 help 8 help
8 XFS is a high performance journaling filesystem which originated 9 XFS is a high performance journaling filesystem which originated
9 on the SGI IRIX platform. It is completely multi-threaded, can 10 on the SGI IRIX platform. It is completely multi-threaded, can
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4c463b99fe57..80714ebd54c0 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1143,6 +1143,8 @@ __xfs_get_blocks(
1143 ssize_t size; 1143 ssize_t size;
1144 int new = 0; 1144 int new = 0;
1145 1145
1146 BUG_ON(create && !direct);
1147
1146 if (XFS_FORCED_SHUTDOWN(mp)) 1148 if (XFS_FORCED_SHUTDOWN(mp))
1147 return -EIO; 1149 return -EIO;
1148 1150
@@ -1150,22 +1152,14 @@ __xfs_get_blocks(
1150 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1152 ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1151 size = bh_result->b_size; 1153 size = bh_result->b_size;
1152 1154
1153 if (!create && direct && offset >= i_size_read(inode)) 1155 if (!create && offset >= i_size_read(inode))
1154 return 0; 1156 return 0;
1155 1157
1156 /* 1158 /*
1157 * Direct I/O is usually done on preallocated files, so try getting 1159 * Direct I/O is usually done on preallocated files, so try getting
1158 * a block mapping without an exclusive lock first. For buffered 1160 * a block mapping without an exclusive lock first.
1159 * writes we already have the exclusive iolock anyway, so avoiding
1160 * a lock roundtrip here by taking the ilock exclusive from the
1161 * beginning is a useful micro optimization.
1162 */ 1161 */
1163 if (create && !direct) { 1162 lockmode = xfs_ilock_data_map_shared(ip);
1164 lockmode = XFS_ILOCK_EXCL;
1165 xfs_ilock(ip, lockmode);
1166 } else {
1167 lockmode = xfs_ilock_data_map_shared(ip);
1168 }
1169 1163
1170 ASSERT(offset <= mp->m_super->s_maxbytes); 1164 ASSERT(offset <= mp->m_super->s_maxbytes);
1171 if (offset + size > mp->m_super->s_maxbytes) 1165 if (offset + size > mp->m_super->s_maxbytes)
@@ -1184,37 +1178,19 @@ __xfs_get_blocks(
1184 (imap.br_startblock == HOLESTARTBLOCK || 1178 (imap.br_startblock == HOLESTARTBLOCK ||
1185 imap.br_startblock == DELAYSTARTBLOCK) || 1179 imap.br_startblock == DELAYSTARTBLOCK) ||
1186 (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { 1180 (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
1187 if (direct || xfs_get_extsz_hint(ip)) { 1181 /*
1188 /* 1182 * xfs_iomap_write_direct() expects the shared lock. It
1189 * xfs_iomap_write_direct() expects the shared lock. It 1183 * is unlocked on return.
1190 * is unlocked on return. 1184 */
1191 */ 1185 if (lockmode == XFS_ILOCK_EXCL)
1192 if (lockmode == XFS_ILOCK_EXCL) 1186 xfs_ilock_demote(ip, lockmode);
1193 xfs_ilock_demote(ip, lockmode);
1194
1195 error = xfs_iomap_write_direct(ip, offset, size,
1196 &imap, nimaps);
1197 if (error)
1198 return error;
1199 new = 1;
1200 1187
1201 } else { 1188 error = xfs_iomap_write_direct(ip, offset, size,
1202 /* 1189 &imap, nimaps);
1203 * Delalloc reservations do not require a transaction, 1190 if (error)
1204 * we can go on without dropping the lock here. If we 1191 return error;
1205 * are allocating a new delalloc block, make sure that 1192 new = 1;
1206 * we set the new flag so that we mark the buffer new so
1207 * that we know that it is newly allocated if the write
1208 * fails.
1209 */
1210 if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
1211 new = 1;
1212 error = xfs_iomap_write_delay(ip, offset, size, &imap);
1213 if (error)
1214 goto out_unlock;
1215 1193
1216 xfs_iunlock(ip, lockmode);
1217 }
1218 trace_xfs_get_blocks_alloc(ip, offset, size, 1194 trace_xfs_get_blocks_alloc(ip, offset, size,
1219 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1195 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1220 : XFS_IO_DELALLOC, &imap); 1196 : XFS_IO_DELALLOC, &imap);
@@ -1235,9 +1211,7 @@ __xfs_get_blocks(
1235 } 1211 }
1236 1212
1237 /* trim mapping down to size requested */ 1213 /* trim mapping down to size requested */
1238 if (direct || size > (1 << inode->i_blkbits)) 1214 xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
1239 xfs_map_trim_size(inode, iblock, bh_result,
1240 &imap, offset, size);
1241 1215
1242 /* 1216 /*
1243 * For unwritten extents do not report a disk address in the buffered 1217 * For unwritten extents do not report a disk address in the buffered
@@ -1250,7 +1224,7 @@ __xfs_get_blocks(
1250 if (ISUNWRITTEN(&imap)) 1224 if (ISUNWRITTEN(&imap))
1251 set_buffer_unwritten(bh_result); 1225 set_buffer_unwritten(bh_result);
1252 /* direct IO needs special help */ 1226 /* direct IO needs special help */
1253 if (create && direct) { 1227 if (create) {
1254 if (dax_fault) 1228 if (dax_fault)
1255 ASSERT(!ISUNWRITTEN(&imap)); 1229 ASSERT(!ISUNWRITTEN(&imap));
1256 else 1230 else
@@ -1279,14 +1253,7 @@ __xfs_get_blocks(
1279 (new || ISUNWRITTEN(&imap)))) 1253 (new || ISUNWRITTEN(&imap))))
1280 set_buffer_new(bh_result); 1254 set_buffer_new(bh_result);
1281 1255
1282 if (imap.br_startblock == DELAYSTARTBLOCK) { 1256 BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
1283 BUG_ON(direct);
1284 if (create) {
1285 set_buffer_uptodate(bh_result);
1286 set_buffer_mapped(bh_result);
1287 set_buffer_delay(bh_result);
1288 }
1289 }
1290 1257
1291 return 0; 1258 return 0;
1292 1259
@@ -1427,216 +1394,6 @@ xfs_vm_direct_IO(
1427 xfs_get_blocks_direct, endio, NULL, flags); 1394 xfs_get_blocks_direct, endio, NULL, flags);
1428} 1395}
1429 1396
1430/*
1431 * Punch out the delalloc blocks we have already allocated.
1432 *
1433 * Don't bother with xfs_setattr given that nothing can have made it to disk yet
1434 * as the page is still locked at this point.
1435 */
1436STATIC void
1437xfs_vm_kill_delalloc_range(
1438 struct inode *inode,
1439 loff_t start,
1440 loff_t end)
1441{
1442 struct xfs_inode *ip = XFS_I(inode);
1443 xfs_fileoff_t start_fsb;
1444 xfs_fileoff_t end_fsb;
1445 int error;
1446
1447 start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
1448 end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
1449 if (end_fsb <= start_fsb)
1450 return;
1451
1452 xfs_ilock(ip, XFS_ILOCK_EXCL);
1453 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1454 end_fsb - start_fsb);
1455 if (error) {
1456 /* something screwed, just bail */
1457 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1458 xfs_alert(ip->i_mount,
1459 "xfs_vm_write_failed: unable to clean up ino %lld",
1460 ip->i_ino);
1461 }
1462 }
1463 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1464}
1465
1466STATIC void
1467xfs_vm_write_failed(
1468 struct inode *inode,
1469 struct page *page,
1470 loff_t pos,
1471 unsigned len)
1472{
1473 loff_t block_offset;
1474 loff_t block_start;
1475 loff_t block_end;
1476 loff_t from = pos & (PAGE_SIZE - 1);
1477 loff_t to = from + len;
1478 struct buffer_head *bh, *head;
1479 struct xfs_mount *mp = XFS_I(inode)->i_mount;
1480
1481 /*
1482 * The request pos offset might be 32 or 64 bit, this is all fine
1483 * on 64-bit platform. However, for 64-bit pos request on 32-bit
1484 * platform, the high 32-bit will be masked off if we evaluate the
1485 * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
1486 * 0xfffff000 as an unsigned long, hence the result is incorrect
1487 * which could cause the following ASSERT failed in most cases.
1488 * In order to avoid this, we can evaluate the block_offset of the
1489 * start of the page by using shifts rather than masks the mismatch
1490 * problem.
1491 */
1492 block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
1493
1494 ASSERT(block_offset + from == pos);
1495
1496 head = page_buffers(page);
1497 block_start = 0;
1498 for (bh = head; bh != head || !block_start;
1499 bh = bh->b_this_page, block_start = block_end,
1500 block_offset += bh->b_size) {
1501 block_end = block_start + bh->b_size;
1502
1503 /* skip buffers before the write */
1504 if (block_end <= from)
1505 continue;
1506
1507 /* if the buffer is after the write, we're done */
1508 if (block_start >= to)
1509 break;
1510
1511 /*
1512 * Process delalloc and unwritten buffers beyond EOF. We can
1513 * encounter unwritten buffers in the event that a file has
1514 * post-EOF unwritten extents and an extending write happens to
1515 * fail (e.g., an unaligned write that also involves a delalloc
1516 * to the same page).
1517 */
1518 if (!buffer_delay(bh) && !buffer_unwritten(bh))
1519 continue;
1520
1521 if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
1522 block_offset < i_size_read(inode))
1523 continue;
1524
1525 if (buffer_delay(bh))
1526 xfs_vm_kill_delalloc_range(inode, block_offset,
1527 block_offset + bh->b_size);
1528
1529 /*
1530 * This buffer does not contain data anymore. make sure anyone
1531 * who finds it knows that for certain.
1532 */
1533 clear_buffer_delay(bh);
1534 clear_buffer_uptodate(bh);
1535 clear_buffer_mapped(bh);
1536 clear_buffer_new(bh);
1537 clear_buffer_dirty(bh);
1538 clear_buffer_unwritten(bh);
1539 }
1540
1541}
1542
1543/*
1544 * This used to call block_write_begin(), but it unlocks and releases the page
1545 * on error, and we need that page to be able to punch stale delalloc blocks out
1546 * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
1547 * the appropriate point.
1548 */
1549STATIC int
1550xfs_vm_write_begin(
1551 struct file *file,
1552 struct address_space *mapping,
1553 loff_t pos,
1554 unsigned len,
1555 unsigned flags,
1556 struct page **pagep,
1557 void **fsdata)
1558{
1559 pgoff_t index = pos >> PAGE_SHIFT;
1560 struct page *page;
1561 int status;
1562 struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
1563
1564 ASSERT(len <= PAGE_SIZE);
1565
1566 page = grab_cache_page_write_begin(mapping, index, flags);
1567 if (!page)
1568 return -ENOMEM;
1569
1570 status = __block_write_begin(page, pos, len, xfs_get_blocks);
1571 if (xfs_mp_fail_writes(mp))
1572 status = -EIO;
1573 if (unlikely(status)) {
1574 struct inode *inode = mapping->host;
1575 size_t isize = i_size_read(inode);
1576
1577 xfs_vm_write_failed(inode, page, pos, len);
1578 unlock_page(page);
1579
1580 /*
1581 * If the write is beyond EOF, we only want to kill blocks
1582 * allocated in this write, not blocks that were previously
1583 * written successfully.
1584 */
1585 if (xfs_mp_fail_writes(mp))
1586 isize = 0;
1587 if (pos + len > isize) {
1588 ssize_t start = max_t(ssize_t, pos, isize);
1589
1590 truncate_pagecache_range(inode, start, pos + len);
1591 }
1592
1593 put_page(page);
1594 page = NULL;
1595 }
1596
1597 *pagep = page;
1598 return status;
1599}
1600
1601/*
1602 * On failure, we only need to kill delalloc blocks beyond EOF in the range of
1603 * this specific write because they will never be written. Previous writes
1604 * beyond EOF where block allocation succeeded do not need to be trashed, so
1605 * only new blocks from this write should be trashed. For blocks within
1606 * EOF, generic_write_end() zeros them so they are safe to leave alone and be
1607 * written with all the other valid data.
1608 */
1609STATIC int
1610xfs_vm_write_end(
1611 struct file *file,
1612 struct address_space *mapping,
1613 loff_t pos,
1614 unsigned len,
1615 unsigned copied,
1616 struct page *page,
1617 void *fsdata)
1618{
1619 int ret;
1620
1621 ASSERT(len <= PAGE_SIZE);
1622
1623 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1624 if (unlikely(ret < len)) {
1625 struct inode *inode = mapping->host;
1626 size_t isize = i_size_read(inode);
1627 loff_t to = pos + len;
1628
1629 if (to > isize) {
1630 /* only kill blocks in this write beyond EOF */
1631 if (pos > isize)
1632 isize = pos;
1633 xfs_vm_kill_delalloc_range(inode, isize, to);
1634 truncate_pagecache_range(inode, isize, to);
1635 }
1636 }
1637 return ret;
1638}
1639
1640STATIC sector_t 1397STATIC sector_t
1641xfs_vm_bmap( 1398xfs_vm_bmap(
1642 struct address_space *mapping, 1399 struct address_space *mapping,
@@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = {
1747 .set_page_dirty = xfs_vm_set_page_dirty, 1504 .set_page_dirty = xfs_vm_set_page_dirty,
1748 .releasepage = xfs_vm_releasepage, 1505 .releasepage = xfs_vm_releasepage,
1749 .invalidatepage = xfs_vm_invalidatepage, 1506 .invalidatepage = xfs_vm_invalidatepage,
1750 .write_begin = xfs_vm_write_begin,
1751 .write_end = xfs_vm_write_end,
1752 .bmap = xfs_vm_bmap, 1507 .bmap = xfs_vm_bmap,
1753 .direct_IO = xfs_vm_direct_IO, 1508 .direct_IO = xfs_vm_direct_IO,
1754 .migratepage = buffer_migrate_page, 1509 .migratepage = buffer_migrate_page,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 28c42fb0c12a..91bee2db3207 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1087,99 +1087,120 @@ error1: /* Just cancel transaction */
1087 return error; 1087 return error;
1088} 1088}
1089 1089
1090/* 1090static int
1091 * Zero file bytes between startoff and endoff inclusive. 1091xfs_unmap_extent(
1092 * The iolock is held exclusive and no blocks are buffered. 1092 struct xfs_inode *ip,
1093 * 1093 xfs_fileoff_t startoffset_fsb,
1094 * This function is used by xfs_free_file_space() to zero 1094 xfs_filblks_t len_fsb,
1095 * partial blocks when the range to free is not block aligned. 1095 int *done)
1096 * When unreserving space with boundaries that are not block
1097 * aligned we round up the start and round down the end
1098 * boundaries and then use this function to zero the parts of
1099 * the blocks that got dropped during the rounding.
1100 */
1101STATIC int
1102xfs_zero_remaining_bytes(
1103 xfs_inode_t *ip,
1104 xfs_off_t startoff,
1105 xfs_off_t endoff)
1106{ 1096{
1107 xfs_bmbt_irec_t imap; 1097 struct xfs_mount *mp = ip->i_mount;
1108 xfs_fileoff_t offset_fsb; 1098 struct xfs_trans *tp;
1109 xfs_off_t lastoffset; 1099 struct xfs_bmap_free free_list;
1110 xfs_off_t offset; 1100 xfs_fsblock_t firstfsb;
1111 xfs_buf_t *bp; 1101 uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1112 xfs_mount_t *mp = ip->i_mount; 1102 int error;
1113 int nimap;
1114 int error = 0;
1115 1103
1116 /* 1104 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
1117 * Avoid doing I/O beyond eof - it's not necessary 1105 if (error) {
1118 * since nothing can read beyond eof. The space will 1106 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1119 * be zeroed when the file is extended anyway. 1107 return error;
1120 */ 1108 }
1121 if (startoff >= XFS_ISIZE(ip))
1122 return 0;
1123 1109
1124 if (endoff > XFS_ISIZE(ip)) 1110 xfs_ilock(ip, XFS_ILOCK_EXCL);
1125 endoff = XFS_ISIZE(ip); 1111 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot,
1112 ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS);
1113 if (error)
1114 goto out_trans_cancel;
1126 1115
1127 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1116 xfs_trans_ijoin(tp, ip, 0);
1128 uint lock_mode;
1129 1117
1130 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1118 xfs_bmap_init(&free_list, &firstfsb);
1131 nimap = 1; 1119 error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
1120 &free_list, done);
1121 if (error)
1122 goto out_bmap_cancel;
1132 1123
1133 lock_mode = xfs_ilock_data_map_shared(ip); 1124 error = xfs_bmap_finish(&tp, &free_list, NULL);
1134 error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); 1125 if (error)
1135 xfs_iunlock(ip, lock_mode); 1126 goto out_bmap_cancel;
1136 1127
1137 if (error || nimap < 1) 1128 error = xfs_trans_commit(tp);
1138 break; 1129out_unlock:
1139 ASSERT(imap.br_blockcount >= 1); 1130 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1140 ASSERT(imap.br_startoff == offset_fsb); 1131 return error;
1141 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1142 1132
1143 if (imap.br_startblock == HOLESTARTBLOCK || 1133out_bmap_cancel:
1144 imap.br_state == XFS_EXT_UNWRITTEN) { 1134 xfs_bmap_cancel(&free_list);
1145 /* skip the entire extent */ 1135out_trans_cancel:
1146 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1136 xfs_trans_cancel(tp);
1147 imap.br_blockcount) - 1; 1137 goto out_unlock;
1148 continue; 1138}
1149 }
1150 1139
1151 lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; 1140static int
1152 if (lastoffset > endoff) 1141xfs_adjust_extent_unmap_boundaries(
1153 lastoffset = endoff; 1142 struct xfs_inode *ip,
1143 xfs_fileoff_t *startoffset_fsb,
1144 xfs_fileoff_t *endoffset_fsb)
1145{
1146 struct xfs_mount *mp = ip->i_mount;
1147 struct xfs_bmbt_irec imap;
1148 int nimap, error;
1149 xfs_extlen_t mod = 0;
1154 1150
1155 /* DAX can just zero the backing device directly */ 1151 nimap = 1;
1156 if (IS_DAX(VFS_I(ip))) { 1152 error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
1157 error = dax_zero_page_range(VFS_I(ip), offset, 1153 if (error)
1158 lastoffset - offset + 1, 1154 return error;
1159 xfs_get_blocks_direct);
1160 if (error)
1161 return error;
1162 continue;
1163 }
1164 1155
1165 error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ? 1156 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1166 mp->m_rtdev_targp : mp->m_ddev_targp, 1157 xfs_daddr_t block;
1167 xfs_fsb_to_db(ip, imap.br_startblock),
1168 BTOBB(mp->m_sb.sb_blocksize),
1169 0, &bp, NULL);
1170 if (error)
1171 return error;
1172 1158
1173 memset(bp->b_addr + 1159 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1174 (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 1160 block = imap.br_startblock;
1175 0, lastoffset - offset + 1); 1161 mod = do_div(block, mp->m_sb.sb_rextsize);
1162 if (mod)
1163 *startoffset_fsb += mp->m_sb.sb_rextsize - mod;
1164 }
1176 1165
1177 error = xfs_bwrite(bp); 1166 nimap = 1;
1178 xfs_buf_relse(bp); 1167 error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
1179 if (error) 1168 if (error)
1180 return error; 1169 return error;
1170
1171 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1172 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1173 mod++;
1174 if (mod && mod != mp->m_sb.sb_rextsize)
1175 *endoffset_fsb -= mod;
1181 } 1176 }
1182 return error; 1177
1178 return 0;
1179}
1180
1181static int
1182xfs_flush_unmap_range(
1183 struct xfs_inode *ip,
1184 xfs_off_t offset,
1185 xfs_off_t len)
1186{
1187 struct xfs_mount *mp = ip->i_mount;
1188 struct inode *inode = VFS_I(ip);
1189 xfs_off_t rounding, start, end;
1190 int error;
1191
1192 /* wait for the completion of any pending DIOs */
1193 inode_dio_wait(inode);
1194
1195 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
1196 start = round_down(offset, rounding);
1197 end = round_up(offset + len, rounding) - 1;
1198
1199 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
1200 if (error)
1201 return error;
1202 truncate_pagecache_range(inode, start, end);
1203 return 0;
1183} 1204}
1184 1205
1185int 1206int
@@ -1188,24 +1209,10 @@ xfs_free_file_space(
1188 xfs_off_t offset, 1209 xfs_off_t offset,
1189 xfs_off_t len) 1210 xfs_off_t len)
1190{ 1211{
1191 int done; 1212 struct xfs_mount *mp = ip->i_mount;
1192 xfs_fileoff_t endoffset_fsb;
1193 int error;
1194 xfs_fsblock_t firstfsb;
1195 xfs_bmap_free_t free_list;
1196 xfs_bmbt_irec_t imap;
1197 xfs_off_t ioffset;
1198 xfs_off_t iendoffset;
1199 xfs_extlen_t mod=0;
1200 xfs_mount_t *mp;
1201 int nimap;
1202 uint resblks;
1203 xfs_off_t rounding;
1204 int rt;
1205 xfs_fileoff_t startoffset_fsb; 1213 xfs_fileoff_t startoffset_fsb;
1206 xfs_trans_t *tp; 1214 xfs_fileoff_t endoffset_fsb;
1207 1215 int done = 0, error;
1208 mp = ip->i_mount;
1209 1216
1210 trace_xfs_free_file_space(ip); 1217 trace_xfs_free_file_space(ip);
1211 1218
@@ -1213,135 +1220,45 @@ xfs_free_file_space(
1213 if (error) 1220 if (error)
1214 return error; 1221 return error;
1215 1222
1216 error = 0;
1217 if (len <= 0) /* if nothing being freed */ 1223 if (len <= 0) /* if nothing being freed */
1218 return error; 1224 return 0;
1219 rt = XFS_IS_REALTIME_INODE(ip);
1220 startoffset_fsb = XFS_B_TO_FSB(mp, offset);
1221 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
1222
1223 /* wait for the completion of any pending DIOs */
1224 inode_dio_wait(VFS_I(ip));
1225 1225
1226 rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE); 1226 error = xfs_flush_unmap_range(ip, offset, len);
1227 ioffset = round_down(offset, rounding);
1228 iendoffset = round_up(offset + len, rounding) - 1;
1229 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
1230 iendoffset);
1231 if (error) 1227 if (error)
1232 goto out; 1228 return error;
1233 truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset); 1229
1230 startoffset_fsb = XFS_B_TO_FSB(mp, offset);
1231 endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
1234 1232
1235 /* 1233 /*
1236 * Need to zero the stuff we're not freeing, on disk. 1234 * Need to zero the stuff we're not freeing, on disk. If it's a RT file
1237 * If it's a realtime file & can't use unwritten extents then we 1235 * and we can't use unwritten extents then we actually need to ensure
1238 * actually need to zero the extent edges. Otherwise xfs_bunmapi 1236 * to zero the whole extent, otherwise we just need to take of block
1239 * will take care of it for us. 1237 * boundaries, and xfs_bunmapi will handle the rest.
1240 */ 1238 */
1241 if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 1239 if (XFS_IS_REALTIME_INODE(ip) &&
1242 nimap = 1; 1240 !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
1243 error = xfs_bmapi_read(ip, startoffset_fsb, 1, 1241 error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
1244 &imap, &nimap, 0); 1242 &endoffset_fsb);
1245 if (error) 1243 if (error)
1246 goto out; 1244 return error;
1247 ASSERT(nimap == 0 || nimap == 1);
1248 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1249 xfs_daddr_t block;
1250
1251 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1252 block = imap.br_startblock;
1253 mod = do_div(block, mp->m_sb.sb_rextsize);
1254 if (mod)
1255 startoffset_fsb += mp->m_sb.sb_rextsize - mod;
1256 }
1257 nimap = 1;
1258 error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
1259 &imap, &nimap, 0);
1260 if (error)
1261 goto out;
1262 ASSERT(nimap == 0 || nimap == 1);
1263 if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1264 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1265 mod++;
1266 if (mod && (mod != mp->m_sb.sb_rextsize))
1267 endoffset_fsb -= mod;
1268 }
1269 }
1270 if ((done = (endoffset_fsb <= startoffset_fsb)))
1271 /*
1272 * One contiguous piece to clear
1273 */
1274 error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
1275 else {
1276 /*
1277 * Some full blocks, possibly two pieces to clear
1278 */
1279 if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
1280 error = xfs_zero_remaining_bytes(ip, offset,
1281 XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
1282 if (!error &&
1283 XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
1284 error = xfs_zero_remaining_bytes(ip,
1285 XFS_FSB_TO_B(mp, endoffset_fsb),
1286 offset + len - 1);
1287 } 1245 }
1288 1246
1289 /* 1247 if (endoffset_fsb > startoffset_fsb) {
1290 * free file space until done or until there is an error 1248 while (!done) {
1291 */ 1249 error = xfs_unmap_extent(ip, startoffset_fsb,
1292 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 1250 endoffset_fsb - startoffset_fsb, &done);
1293 while (!error && !done) { 1251 if (error)
1294 1252 return error;
1295 /*
1296 * allocate and setup the transaction. Allow this
1297 * transaction to dip into the reserve blocks to ensure
1298 * the freeing of the space succeeds at ENOSPC.
1299 */
1300 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
1301 &tp);
1302 if (error) {
1303 ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1304 break;
1305 } 1253 }
1306 xfs_ilock(ip, XFS_ILOCK_EXCL);
1307 error = xfs_trans_reserve_quota(tp, mp,
1308 ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
1309 resblks, 0, XFS_QMOPT_RES_REGBLKS);
1310 if (error)
1311 goto error1;
1312
1313 xfs_trans_ijoin(tp, ip, 0);
1314
1315 /*
1316 * issue the bunmapi() call to free the blocks
1317 */
1318 xfs_bmap_init(&free_list, &firstfsb);
1319 error = xfs_bunmapi(tp, ip, startoffset_fsb,
1320 endoffset_fsb - startoffset_fsb,
1321 0, 2, &firstfsb, &free_list, &done);
1322 if (error)
1323 goto error0;
1324
1325 /*
1326 * complete the transaction
1327 */
1328 error = xfs_bmap_finish(&tp, &free_list, NULL);
1329 if (error)
1330 goto error0;
1331
1332 error = xfs_trans_commit(tp);
1333 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1334 } 1254 }
1335 1255
1336 out: 1256 /*
1337 return error; 1257 * Now that we've unmap all full blocks we'll have to zero out any
1338 1258 * partial block at the beginning and/or end. xfs_zero_range is
1339 error0: 1259 * smart enough to skip any holes, including those we just created.
1340 xfs_bmap_cancel(&free_list); 1260 */
1341 error1: 1261 return xfs_zero_range(ip, offset, len, NULL);
1342 xfs_trans_cancel(tp);
1343 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1344 goto out;
1345} 1262}
1346 1263
1347/* 1264/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 47fc63295422..713991c22781 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -37,6 +37,7 @@
37#include "xfs_log.h" 37#include "xfs_log.h"
38#include "xfs_icache.h" 38#include "xfs_icache.h"
39#include "xfs_pnfs.h" 39#include "xfs_pnfs.h"
40#include "xfs_iomap.h"
40 41
41#include <linux/dcache.h> 42#include <linux/dcache.h>
42#include <linux/falloc.h> 43#include <linux/falloc.h>
@@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
80} 81}
81 82
82/* 83/*
83 * xfs_iozero clears the specified range supplied via the page cache (except in 84 * Clear the specified ranges to zero through either the pagecache or DAX.
84 * the DAX case). Writes through the page cache will allocate blocks over holes, 85 * Holes and unwritten extents will be left as-is as they already are zeroed.
85 * though the callers usually map the holes first and avoid them. If a block is
86 * not completely zeroed, then it will be read from disk before being partially
87 * zeroed.
88 *
89 * In the DAX case, we can just directly write to the underlying pages. This
90 * will not allocate blocks, but will avoid holes and unwritten extents and so
91 * not do unnecessary work.
92 */ 86 */
93int 87int
94xfs_iozero( 88xfs_zero_range(
95 struct xfs_inode *ip, /* inode */ 89 struct xfs_inode *ip,
96 loff_t pos, /* offset in file */ 90 xfs_off_t pos,
97 size_t count) /* size of data to zero */ 91 xfs_off_t count,
92 bool *did_zero)
98{ 93{
99 struct page *page; 94 return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
100 struct address_space *mapping;
101 int status = 0;
102
103
104 mapping = VFS_I(ip)->i_mapping;
105 do {
106 unsigned offset, bytes;
107 void *fsdata;
108
109 offset = (pos & (PAGE_SIZE -1)); /* Within page */
110 bytes = PAGE_SIZE - offset;
111 if (bytes > count)
112 bytes = count;
113
114 if (IS_DAX(VFS_I(ip))) {
115 status = dax_zero_page_range(VFS_I(ip), pos, bytes,
116 xfs_get_blocks_direct);
117 if (status)
118 break;
119 } else {
120 status = pagecache_write_begin(NULL, mapping, pos, bytes,
121 AOP_FLAG_UNINTERRUPTIBLE,
122 &page, &fsdata);
123 if (status)
124 break;
125
126 zero_user(page, offset, bytes);
127
128 status = pagecache_write_end(NULL, mapping, pos, bytes,
129 bytes, page, fsdata);
130 WARN_ON(status <= 0); /* can't return less than zero! */
131 status = 0;
132 }
133 pos += bytes;
134 count -= bytes;
135 } while (count);
136
137 return status;
138} 95}
139 96
140int 97int
@@ -424,49 +381,6 @@ out:
424} 381}
425 382
426/* 383/*
427 * This routine is called to handle zeroing any space in the last block of the
428 * file that is beyond the EOF. We do this since the size is being increased
429 * without writing anything to that block and we don't want to read the
430 * garbage on the disk.
431 */
432STATIC int /* error (positive) */
433xfs_zero_last_block(
434 struct xfs_inode *ip,
435 xfs_fsize_t offset,
436 xfs_fsize_t isize,
437 bool *did_zeroing)
438{
439 struct xfs_mount *mp = ip->i_mount;
440 xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
441 int zero_offset = XFS_B_FSB_OFFSET(mp, isize);
442 int zero_len;
443 int nimaps = 1;
444 int error = 0;
445 struct xfs_bmbt_irec imap;
446
447 xfs_ilock(ip, XFS_ILOCK_EXCL);
448 error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
449 xfs_iunlock(ip, XFS_ILOCK_EXCL);
450 if (error)
451 return error;
452
453 ASSERT(nimaps > 0);
454
455 /*
456 * If the block underlying isize is just a hole, then there
457 * is nothing to zero.
458 */
459 if (imap.br_startblock == HOLESTARTBLOCK)
460 return 0;
461
462 zero_len = mp->m_sb.sb_blocksize - zero_offset;
463 if (isize + zero_len > offset)
464 zero_len = offset - isize;
465 *did_zeroing = true;
466 return xfs_iozero(ip, isize, zero_len);
467}
468
469/*
470 * Zero any on disk space between the current EOF and the new, larger EOF. 384 * Zero any on disk space between the current EOF and the new, larger EOF.
471 * 385 *
472 * This handles the normal case of zeroing the remainder of the last block in 386 * This handles the normal case of zeroing the remainder of the last block in
@@ -484,94 +398,11 @@ xfs_zero_eof(
484 xfs_fsize_t isize, /* current inode size */ 398 xfs_fsize_t isize, /* current inode size */
485 bool *did_zeroing) 399 bool *did_zeroing)
486{ 400{
487 struct xfs_mount *mp = ip->i_mount;
488 xfs_fileoff_t start_zero_fsb;
489 xfs_fileoff_t end_zero_fsb;
490 xfs_fileoff_t zero_count_fsb;
491 xfs_fileoff_t last_fsb;
492 xfs_fileoff_t zero_off;
493 xfs_fsize_t zero_len;
494 int nimaps;
495 int error = 0;
496 struct xfs_bmbt_irec imap;
497
498 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 401 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
499 ASSERT(offset > isize); 402 ASSERT(offset > isize);
500 403
501 trace_xfs_zero_eof(ip, isize, offset - isize); 404 trace_xfs_zero_eof(ip, isize, offset - isize);
502 405 return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
503 /*
504 * First handle zeroing the block on which isize resides.
505 *
506 * We only zero a part of that block so it is handled specially.
507 */
508 if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
509 error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
510 if (error)
511 return error;
512 }
513
514 /*
515 * Calculate the range between the new size and the old where blocks
516 * needing to be zeroed may exist.
517 *
518 * To get the block where the last byte in the file currently resides,
519 * we need to subtract one from the size and truncate back to a block
520 * boundary. We subtract 1 in case the size is exactly on a block
521 * boundary.
522 */
523 last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
524 start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
525 end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
526 ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
527 if (last_fsb == end_zero_fsb) {
528 /*
529 * The size was only incremented on its last block.
530 * We took care of that above, so just return.
531 */
532 return 0;
533 }
534
535 ASSERT(start_zero_fsb <= end_zero_fsb);
536 while (start_zero_fsb <= end_zero_fsb) {
537 nimaps = 1;
538 zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
539
540 xfs_ilock(ip, XFS_ILOCK_EXCL);
541 error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
542 &imap, &nimaps, 0);
543 xfs_iunlock(ip, XFS_ILOCK_EXCL);
544 if (error)
545 return error;
546
547 ASSERT(nimaps > 0);
548
549 if (imap.br_state == XFS_EXT_UNWRITTEN ||
550 imap.br_startblock == HOLESTARTBLOCK) {
551 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
552 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
553 continue;
554 }
555
556 /*
557 * There are blocks we need to zero.
558 */
559 zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
560 zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
561
562 if ((zero_off + zero_len) > offset)
563 zero_len = offset - zero_off;
564
565 error = xfs_iozero(ip, zero_off, zero_len);
566 if (error)
567 return error;
568
569 *did_zeroing = true;
570 start_zero_fsb = imap.br_startoff + imap.br_blockcount;
571 ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
572 }
573
574 return 0;
575} 406}
576 407
577/* 408/*
@@ -841,7 +672,7 @@ xfs_file_buffered_aio_write(
841write_retry: 672write_retry:
842 trace_xfs_file_buffered_write(ip, iov_iter_count(from), 673 trace_xfs_file_buffered_write(ip, iov_iter_count(from),
843 iocb->ki_pos, 0); 674 iocb->ki_pos, 0);
844 ret = generic_perform_write(file, from, iocb->ki_pos); 675 ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
845 if (likely(ret >= 0)) 676 if (likely(ret >= 0))
846 iocb->ki_pos += ret; 677 iocb->ki_pos += ret;
847 678
@@ -1553,7 +1384,7 @@ xfs_filemap_page_mkwrite(
1553 if (IS_DAX(inode)) { 1384 if (IS_DAX(inode)) {
1554 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); 1385 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
1555 } else { 1386 } else {
1556 ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); 1387 ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
1557 ret = block_page_mkwrite_return(ret); 1388 ret = block_page_mkwrite_return(ret);
1558 } 1389 }
1559 1390
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 99d75223ff2e..0c19d3d05a91 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -427,7 +427,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
427 enum xfs_prealloc_flags flags); 427 enum xfs_prealloc_flags flags);
428int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, 428int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
429 xfs_fsize_t isize, bool *did_zeroing); 429 xfs_fsize_t isize, bool *did_zeroing);
430int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); 430int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
431 bool *did_zero);
431loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, 432loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
432 loff_t eof, int whence); 433 loff_t eof, int whence);
433 434
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 58391355a44d..620fc9120444 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -15,6 +15,7 @@
15 * along with this program; if not, write the Free Software Foundation, 15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#include <linux/iomap.h>
18#include "xfs.h" 19#include "xfs.h"
19#include "xfs_fs.h" 20#include "xfs_fs.h"
20#include "xfs_shared.h" 21#include "xfs_shared.h"
@@ -940,3 +941,173 @@ error_on_bmapi_transaction:
940 xfs_iunlock(ip, XFS_ILOCK_EXCL); 941 xfs_iunlock(ip, XFS_ILOCK_EXCL);
941 return error; 942 return error;
942} 943}
944
945void
946xfs_bmbt_to_iomap(
947 struct xfs_inode *ip,
948 struct iomap *iomap,
949 struct xfs_bmbt_irec *imap)
950{
951 struct xfs_mount *mp = ip->i_mount;
952
953 if (imap->br_startblock == HOLESTARTBLOCK) {
954 iomap->blkno = IOMAP_NULL_BLOCK;
955 iomap->type = IOMAP_HOLE;
956 } else if (imap->br_startblock == DELAYSTARTBLOCK) {
957 iomap->blkno = IOMAP_NULL_BLOCK;
958 iomap->type = IOMAP_DELALLOC;
959 } else {
960 iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
961 if (imap->br_state == XFS_EXT_UNWRITTEN)
962 iomap->type = IOMAP_UNWRITTEN;
963 else
964 iomap->type = IOMAP_MAPPED;
965 }
966 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
967 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
968 iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
969}
970
971static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
972{
973 return !nimaps ||
974 imap->br_startblock == HOLESTARTBLOCK ||
975 imap->br_startblock == DELAYSTARTBLOCK;
976}
977
978static int
979xfs_file_iomap_begin(
980 struct inode *inode,
981 loff_t offset,
982 loff_t length,
983 unsigned flags,
984 struct iomap *iomap)
985{
986 struct xfs_inode *ip = XFS_I(inode);
987 struct xfs_mount *mp = ip->i_mount;
988 struct xfs_bmbt_irec imap;
989 xfs_fileoff_t offset_fsb, end_fsb;
990 int nimaps = 1, error = 0;
991
992 if (XFS_FORCED_SHUTDOWN(mp))
993 return -EIO;
994
995 xfs_ilock(ip, XFS_ILOCK_EXCL);
996
997 ASSERT(offset <= mp->m_super->s_maxbytes);
998 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
999 length = mp->m_super->s_maxbytes - offset;
1000 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1001 end_fsb = XFS_B_TO_FSB(mp, offset + length);
1002
1003 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1004 &nimaps, XFS_BMAPI_ENTIRE);
1005 if (error) {
1006 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1007 return error;
1008 }
1009
1010 if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
1011 /*
1012 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
1013 * pages to keep the chunks of work done where somewhat symmetric
1014 * with the work writeback does. This is a completely arbitrary
1015 * number pulled out of thin air as a best guess for initial
1016 * testing.
1017 *
1018 * Note that the values needs to be less than 32-bits wide until
1019 * the lower level functions are updated.
1020 */
1021 length = min_t(loff_t, length, 1024 * PAGE_SIZE);
1022 if (xfs_get_extsz_hint(ip)) {
1023 /*
1024 * xfs_iomap_write_direct() expects the shared lock. It
1025 * is unlocked on return.
1026 */
1027 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1028 error = xfs_iomap_write_direct(ip, offset, length, &imap,
1029 nimaps);
1030 } else {
1031 error = xfs_iomap_write_delay(ip, offset, length, &imap);
1032 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1033 }
1034
1035 if (error)
1036 return error;
1037
1038 trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
1039 xfs_bmbt_to_iomap(ip, iomap, &imap);
1040 } else if (nimaps) {
1041 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1042 trace_xfs_iomap_found(ip, offset, length, 0, &imap);
1043 xfs_bmbt_to_iomap(ip, iomap, &imap);
1044 } else {
1045 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1046 trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
1047 iomap->blkno = IOMAP_NULL_BLOCK;
1048 iomap->type = IOMAP_HOLE;
1049 iomap->offset = offset;
1050 iomap->length = length;
1051 }
1052
1053 return 0;
1054}
1055
1056static int
1057xfs_file_iomap_end_delalloc(
1058 struct xfs_inode *ip,
1059 loff_t offset,
1060 loff_t length,
1061 ssize_t written)
1062{
1063 struct xfs_mount *mp = ip->i_mount;
1064 xfs_fileoff_t start_fsb;
1065 xfs_fileoff_t end_fsb;
1066 int error = 0;
1067
1068 start_fsb = XFS_B_TO_FSB(mp, offset + written);
1069 end_fsb = XFS_B_TO_FSB(mp, offset + length);
1070
1071 /*
1072 * Trim back delalloc blocks if we didn't manage to write the whole
1073 * range reserved.
1074 *
1075 * We don't need to care about racing delalloc as we hold i_mutex
1076 * across the reserve/allocate/unreserve calls. If there are delalloc
1077 * blocks in the range, they are ours.
1078 */
1079 if (start_fsb < end_fsb) {
1080 xfs_ilock(ip, XFS_ILOCK_EXCL);
1081 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1082 end_fsb - start_fsb);
1083 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1084
1085 if (error && !XFS_FORCED_SHUTDOWN(mp)) {
1086 xfs_alert(mp, "%s: unable to clean up ino %lld",
1087 __func__, ip->i_ino);
1088 return error;
1089 }
1090 }
1091
1092 return 0;
1093}
1094
1095static int
1096xfs_file_iomap_end(
1097 struct inode *inode,
1098 loff_t offset,
1099 loff_t length,
1100 ssize_t written,
1101 unsigned flags,
1102 struct iomap *iomap)
1103{
1104 if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
1105 return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
1106 length, written);
1107 return 0;
1108}
1109
1110struct iomap_ops xfs_iomap_ops = {
1111 .iomap_begin = xfs_file_iomap_begin,
1112 .iomap_end = xfs_file_iomap_end,
1113};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 8688e663d744..e066d045e2ff 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,6 +18,8 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21#include <linux/iomap.h>
22
21struct xfs_inode; 23struct xfs_inode;
22struct xfs_bmbt_irec; 24struct xfs_bmbt_irec;
23 25
@@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
29 struct xfs_bmbt_irec *); 31 struct xfs_bmbt_irec *);
30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); 32int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
31 33
34void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
35 struct xfs_bmbt_irec *);
36
37extern struct iomap_ops xfs_iomap_ops;
38
32#endif /* __XFS_IOMAP_H__*/ 39#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c5d4eba6972e..ab820f84ed50 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -38,12 +38,13 @@
38#include "xfs_dir2.h" 38#include "xfs_dir2.h"
39#include "xfs_trans_space.h" 39#include "xfs_trans_space.h"
40#include "xfs_pnfs.h" 40#include "xfs_pnfs.h"
41#include "xfs_iomap.h"
41 42
42#include <linux/capability.h> 43#include <linux/capability.h>
43#include <linux/xattr.h> 44#include <linux/xattr.h>
44#include <linux/posix_acl.h> 45#include <linux/posix_acl.h>
45#include <linux/security.h> 46#include <linux/security.h>
46#include <linux/fiemap.h> 47#include <linux/iomap.h>
47#include <linux/slab.h> 48#include <linux/slab.h>
48 49
49/* 50/*
@@ -801,20 +802,30 @@ xfs_setattr_size(
801 return error; 802 return error;
802 803
803 /* 804 /*
805 * Wait for all direct I/O to complete.
806 */
807 inode_dio_wait(inode);
808
809 /*
804 * File data changes must be complete before we start the transaction to 810 * File data changes must be complete before we start the transaction to
805 * modify the inode. This needs to be done before joining the inode to 811 * modify the inode. This needs to be done before joining the inode to
806 * the transaction because the inode cannot be unlocked once it is a 812 * the transaction because the inode cannot be unlocked once it is a
807 * part of the transaction. 813 * part of the transaction.
808 * 814 *
809 * Start with zeroing any data block beyond EOF that we may expose on 815 * Start with zeroing any data beyond EOF that we may expose on file
810 * file extension. 816 * extension, or zeroing out the rest of the block on a downward
817 * truncate.
811 */ 818 */
812 if (newsize > oldsize) { 819 if (newsize > oldsize) {
813 error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); 820 error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
814 if (error) 821 } else {
815 return error; 822 error = iomap_truncate_page(inode, newsize, &did_zeroing,
823 &xfs_iomap_ops);
816 } 824 }
817 825
826 if (error)
827 return error;
828
818 /* 829 /*
819 * We are going to log the inode size change in this transaction so 830 * We are going to log the inode size change in this transaction so
820 * any previous writes that are beyond the on disk EOF and the new 831 * any previous writes that are beyond the on disk EOF and the new
@@ -823,17 +834,14 @@ xfs_setattr_size(
823 * problem. Note that this includes any block zeroing we did above; 834 * problem. Note that this includes any block zeroing we did above;
824 * otherwise those blocks may not be zeroed after a crash. 835 * otherwise those blocks may not be zeroed after a crash.
825 */ 836 */
826 if (newsize > ip->i_d.di_size && 837 if (did_zeroing ||
827 (oldsize != ip->i_d.di_size || did_zeroing)) { 838 (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
828 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 839 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
829 ip->i_d.di_size, newsize); 840 ip->i_d.di_size, newsize);
830 if (error) 841 if (error)
831 return error; 842 return error;
832 } 843 }
833 844
834 /* Now wait for all direct I/O to complete. */
835 inode_dio_wait(inode);
836
837 /* 845 /*
838 * We've already locked out new page faults, so now we can safely remove 846 * We've already locked out new page faults, so now we can safely remove
839 * pages from the page cache knowing they won't get refaulted until we 847 * pages from the page cache knowing they won't get refaulted until we
@@ -851,13 +859,6 @@ xfs_setattr_size(
851 * to hope that the caller sees ENOMEM and retries the truncate 859 * to hope that the caller sees ENOMEM and retries the truncate
852 * operation. 860 * operation.
853 */ 861 */
854 if (IS_DAX(inode))
855 error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
856 else
857 error = block_truncate_page(inode->i_mapping, newsize,
858 xfs_get_blocks);
859 if (error)
860 return error;
861 truncate_setsize(inode, newsize); 862 truncate_setsize(inode, newsize);
862 863
863 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); 864 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -998,51 +999,6 @@ xfs_vn_update_time(
998 return xfs_trans_commit(tp); 999 return xfs_trans_commit(tp);
999} 1000}
1000 1001
1001#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
1002
1003/*
1004 * Call fiemap helper to fill in user data.
1005 * Returns positive errors to xfs_getbmap.
1006 */
1007STATIC int
1008xfs_fiemap_format(
1009 void **arg,
1010 struct getbmapx *bmv,
1011 int *full)
1012{
1013 int error;
1014 struct fiemap_extent_info *fieinfo = *arg;
1015 u32 fiemap_flags = 0;
1016 u64 logical, physical, length;
1017
1018 /* Do nothing for a hole */
1019 if (bmv->bmv_block == -1LL)
1020 return 0;
1021
1022 logical = BBTOB(bmv->bmv_offset);
1023 physical = BBTOB(bmv->bmv_block);
1024 length = BBTOB(bmv->bmv_length);
1025
1026 if (bmv->bmv_oflags & BMV_OF_PREALLOC)
1027 fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
1028 else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
1029 fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
1030 FIEMAP_EXTENT_UNKNOWN);
1031 physical = 0; /* no block yet */
1032 }
1033 if (bmv->bmv_oflags & BMV_OF_LAST)
1034 fiemap_flags |= FIEMAP_EXTENT_LAST;
1035
1036 error = fiemap_fill_next_extent(fieinfo, logical, physical,
1037 length, fiemap_flags);
1038 if (error > 0) {
1039 error = 0;
1040 *full = 1; /* user array now full */
1041 }
1042
1043 return error;
1044}
1045
1046STATIC int 1002STATIC int
1047xfs_vn_fiemap( 1003xfs_vn_fiemap(
1048 struct inode *inode, 1004 struct inode *inode,
@@ -1050,38 +1006,13 @@ xfs_vn_fiemap(
1050 u64 start, 1006 u64 start,
1051 u64 length) 1007 u64 length)
1052{ 1008{
1053 xfs_inode_t *ip = XFS_I(inode);
1054 struct getbmapx bm;
1055 int error; 1009 int error;
1056 1010
1057 error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); 1011 xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
1058 if (error) 1012 error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
1059 return error; 1013 xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
1060
1061 /* Set up bmap header for xfs internal routine */
1062 bm.bmv_offset = BTOBBT(start);
1063 /* Special case for whole file */
1064 if (length == FIEMAP_MAX_OFFSET)
1065 bm.bmv_length = -1LL;
1066 else
1067 bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
1068
1069 /* We add one because in getbmap world count includes the header */
1070 bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
1071 fieinfo->fi_extents_max + 1;
1072 bm.bmv_count = min_t(__s32, bm.bmv_count,
1073 (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
1074 bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
1075 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
1076 bm.bmv_iflags |= BMV_IF_ATTRFORK;
1077 if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
1078 bm.bmv_iflags |= BMV_IF_DELALLOC;
1079
1080 error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
1081 if (error)
1082 return error;
1083 1014
1084 return 0; 1015 return error;
1085} 1016}
1086 1017
1087STATIC int 1018STATIC int
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index db3c7df52e30..0f14b2e4bf6c 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -80,32 +80,6 @@ xfs_fs_get_uuid(
80 return 0; 80 return 0;
81} 81}
82 82
83static void
84xfs_bmbt_to_iomap(
85 struct xfs_inode *ip,
86 struct iomap *iomap,
87 struct xfs_bmbt_irec *imap)
88{
89 struct xfs_mount *mp = ip->i_mount;
90
91 if (imap->br_startblock == HOLESTARTBLOCK) {
92 iomap->blkno = IOMAP_NULL_BLOCK;
93 iomap->type = IOMAP_HOLE;
94 } else if (imap->br_startblock == DELAYSTARTBLOCK) {
95 iomap->blkno = IOMAP_NULL_BLOCK;
96 iomap->type = IOMAP_DELALLOC;
97 } else {
98 iomap->blkno =
99 XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock);
100 if (imap->br_state == XFS_EXT_UNWRITTEN)
101 iomap->type = IOMAP_UNWRITTEN;
102 else
103 iomap->type = IOMAP_MAPPED;
104 }
105 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
106 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
107}
108
109/* 83/*
110 * Get a layout for the pNFS client. 84 * Get a layout for the pNFS client.
111 */ 85 */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ea94ee0fe5ea..bb24ce7b0280 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
1295DEFINE_IOMAP_EVENT(xfs_get_blocks_found); 1295DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
1296DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); 1296DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1297DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); 1297DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
1298DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
1299DEFINE_IOMAP_EVENT(xfs_iomap_found);
1300DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
1298 1301
1299DECLARE_EVENT_CLASS(xfs_simple_io_class, 1302DECLARE_EVENT_CLASS(xfs_simple_io_class,
1300 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 1303 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),