aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c158
1 files changed, 90 insertions, 68 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 098bb8f690c9..15a09cb156ce 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -41,6 +41,7 @@
41#include "locking.h" 41#include "locking.h"
42#include "volumes.h" 42#include "volumes.h"
43#include "qgroup.h" 43#include "qgroup.h"
44#include "compression.h"
44 45
45static struct kmem_cache *btrfs_inode_defrag_cachep; 46static struct kmem_cache *btrfs_inode_defrag_cachep;
46/* 47/*
@@ -498,7 +499,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
498 loff_t isize = i_size_read(inode); 499 loff_t isize = i_size_read(inode);
499 500
500 start_pos = pos & ~((u64)root->sectorsize - 1); 501 start_pos = pos & ~((u64)root->sectorsize - 1);
501 num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); 502 num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
502 503
503 end_of_last_block = start_pos + num_bytes - 1; 504 end_of_last_block = start_pos + num_bytes - 1;
504 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 505 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@ -1379,16 +1380,19 @@ fail:
1379static noinline int 1380static noinline int
1380lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, 1381lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1381 size_t num_pages, loff_t pos, 1382 size_t num_pages, loff_t pos,
1383 size_t write_bytes,
1382 u64 *lockstart, u64 *lockend, 1384 u64 *lockstart, u64 *lockend,
1383 struct extent_state **cached_state) 1385 struct extent_state **cached_state)
1384{ 1386{
1387 struct btrfs_root *root = BTRFS_I(inode)->root;
1385 u64 start_pos; 1388 u64 start_pos;
1386 u64 last_pos; 1389 u64 last_pos;
1387 int i; 1390 int i;
1388 int ret = 0; 1391 int ret = 0;
1389 1392
1390 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); 1393 start_pos = round_down(pos, root->sectorsize);
1391 last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1; 1394 last_pos = start_pos
1395 + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
1392 1396
1393 if (start_pos < inode->i_size) { 1397 if (start_pos < inode->i_size) {
1394 struct btrfs_ordered_extent *ordered; 1398 struct btrfs_ordered_extent *ordered;
@@ -1503,6 +1507,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1503 1507
1504 while (iov_iter_count(i) > 0) { 1508 while (iov_iter_count(i) > 0) {
1505 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1509 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1510 size_t sector_offset;
1506 size_t write_bytes = min(iov_iter_count(i), 1511 size_t write_bytes = min(iov_iter_count(i),
1507 nrptrs * (size_t)PAGE_CACHE_SIZE - 1512 nrptrs * (size_t)PAGE_CACHE_SIZE -
1508 offset); 1513 offset);
@@ -1511,6 +1516,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1511 size_t reserve_bytes; 1516 size_t reserve_bytes;
1512 size_t dirty_pages; 1517 size_t dirty_pages;
1513 size_t copied; 1518 size_t copied;
1519 size_t dirty_sectors;
1520 size_t num_sectors;
1514 1521
1515 WARN_ON(num_pages > nrptrs); 1522 WARN_ON(num_pages > nrptrs);
1516 1523
@@ -1523,29 +1530,29 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1523 break; 1530 break;
1524 } 1531 }
1525 1532
1526 reserve_bytes = num_pages << PAGE_CACHE_SHIFT; 1533 sector_offset = pos & (root->sectorsize - 1);
1534 reserve_bytes = round_up(write_bytes + sector_offset,
1535 root->sectorsize);
1527 1536
1528 if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | 1537 if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1529 BTRFS_INODE_PREALLOC)) { 1538 BTRFS_INODE_PREALLOC)) &&
1530 ret = check_can_nocow(inode, pos, &write_bytes); 1539 check_can_nocow(inode, pos, &write_bytes) > 0) {
1531 if (ret < 0) 1540 /*
1532 break; 1541 * For nodata cow case, no need to reserve
1533 if (ret > 0) { 1542 * data space.
1534 /* 1543 */
1535 * For nodata cow case, no need to reserve 1544 only_release_metadata = true;
1536 * data space. 1545 /*
1537 */ 1546 * our prealloc extent may be smaller than
1538 only_release_metadata = true; 1547 * write_bytes, so scale down.
1539 /* 1548 */
1540 * our prealloc extent may be smaller than 1549 num_pages = DIV_ROUND_UP(write_bytes + offset,
1541 * write_bytes, so scale down. 1550 PAGE_CACHE_SIZE);
1542 */ 1551 reserve_bytes = round_up(write_bytes + sector_offset,
1543 num_pages = DIV_ROUND_UP(write_bytes + offset, 1552 root->sectorsize);
1544 PAGE_CACHE_SIZE); 1553 goto reserve_metadata;
1545 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1546 goto reserve_metadata;
1547 }
1548 } 1554 }
1555
1549 ret = btrfs_check_data_free_space(inode, pos, write_bytes); 1556 ret = btrfs_check_data_free_space(inode, pos, write_bytes);
1550 if (ret < 0) 1557 if (ret < 0)
1551 break; 1558 break;
@@ -1576,8 +1583,8 @@ again:
1576 break; 1583 break;
1577 1584
1578 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages, 1585 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1579 pos, &lockstart, &lockend, 1586 pos, write_bytes, &lockstart,
1580 &cached_state); 1587 &lockend, &cached_state);
1581 if (ret < 0) { 1588 if (ret < 0) {
1582 if (ret == -EAGAIN) 1589 if (ret == -EAGAIN)
1583 goto again; 1590 goto again;
@@ -1612,9 +1619,16 @@ again:
1612 * we still have an outstanding extent for the chunk we actually 1619 * we still have an outstanding extent for the chunk we actually
1613 * managed to copy. 1620 * managed to copy.
1614 */ 1621 */
1615 if (num_pages > dirty_pages) { 1622 num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1616 release_bytes = (num_pages - dirty_pages) << 1623 reserve_bytes);
1617 PAGE_CACHE_SHIFT; 1624 dirty_sectors = round_up(copied + sector_offset,
1625 root->sectorsize);
1626 dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
1627 dirty_sectors);
1628
1629 if (num_sectors > dirty_sectors) {
1630 release_bytes = (write_bytes - copied)
1631 & ~((u64)root->sectorsize - 1);
1618 if (copied > 0) { 1632 if (copied > 0) {
1619 spin_lock(&BTRFS_I(inode)->lock); 1633 spin_lock(&BTRFS_I(inode)->lock);
1620 BTRFS_I(inode)->outstanding_extents++; 1634 BTRFS_I(inode)->outstanding_extents++;
@@ -1633,7 +1647,8 @@ again:
1633 } 1647 }
1634 } 1648 }
1635 1649
1636 release_bytes = dirty_pages << PAGE_CACHE_SHIFT; 1650 release_bytes = round_up(copied + sector_offset,
1651 root->sectorsize);
1637 1652
1638 if (copied > 0) 1653 if (copied > 0)
1639 ret = btrfs_dirty_pages(root, inode, pages, 1654 ret = btrfs_dirty_pages(root, inode, pages,
@@ -1654,8 +1669,7 @@ again:
1654 1669
1655 if (only_release_metadata && copied > 0) { 1670 if (only_release_metadata && copied > 0) {
1656 lockstart = round_down(pos, root->sectorsize); 1671 lockstart = round_down(pos, root->sectorsize);
1657 lockend = lockstart + 1672 lockend = round_up(pos + copied, root->sectorsize) - 1;
1658 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
1659 1673
1660 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 1674 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1661 lockend, EXTENT_NORESERVE, NULL, 1675 lockend, EXTENT_NORESERVE, NULL,
@@ -1761,6 +1775,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1761 ssize_t err; 1775 ssize_t err;
1762 loff_t pos; 1776 loff_t pos;
1763 size_t count; 1777 size_t count;
1778 loff_t oldsize;
1779 int clean_page = 0;
1764 1780
1765 inode_lock(inode); 1781 inode_lock(inode);
1766 err = generic_write_checks(iocb, from); 1782 err = generic_write_checks(iocb, from);
@@ -1799,14 +1815,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1799 pos = iocb->ki_pos; 1815 pos = iocb->ki_pos;
1800 count = iov_iter_count(from); 1816 count = iov_iter_count(from);
1801 start_pos = round_down(pos, root->sectorsize); 1817 start_pos = round_down(pos, root->sectorsize);
1802 if (start_pos > i_size_read(inode)) { 1818 oldsize = i_size_read(inode);
1819 if (start_pos > oldsize) {
1803 /* Expand hole size to cover write data, preventing empty gap */ 1820 /* Expand hole size to cover write data, preventing empty gap */
1804 end_pos = round_up(pos + count, root->sectorsize); 1821 end_pos = round_up(pos + count, root->sectorsize);
1805 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); 1822 err = btrfs_cont_expand(inode, oldsize, end_pos);
1806 if (err) { 1823 if (err) {
1807 inode_unlock(inode); 1824 inode_unlock(inode);
1808 goto out; 1825 goto out;
1809 } 1826 }
1827 if (start_pos > round_up(oldsize, root->sectorsize))
1828 clean_page = 1;
1810 } 1829 }
1811 1830
1812 if (sync) 1831 if (sync)
@@ -1818,6 +1837,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1818 num_written = __btrfs_buffered_write(file, from, pos); 1837 num_written = __btrfs_buffered_write(file, from, pos);
1819 if (num_written > 0) 1838 if (num_written > 0)
1820 iocb->ki_pos = pos + num_written; 1839 iocb->ki_pos = pos + num_written;
1840 if (clean_page)
1841 pagecache_isize_extended(inode, oldsize,
1842 i_size_read(inode));
1821 } 1843 }
1822 1844
1823 inode_unlock(inode); 1845 inode_unlock(inode);
@@ -1825,7 +1847,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1825 /* 1847 /*
1826 * We also have to set last_sub_trans to the current log transid, 1848 * We also have to set last_sub_trans to the current log transid,
1827 * otherwise subsequent syncs to a file that's been synced in this 1849 * otherwise subsequent syncs to a file that's been synced in this
1828 * transaction will appear to have already occured. 1850 * transaction will appear to have already occurred.
1829 */ 1851 */
1830 spin_lock(&BTRFS_I(inode)->lock); 1852 spin_lock(&BTRFS_I(inode)->lock);
1831 BTRFS_I(inode)->last_sub_trans = root->log_transid; 1853 BTRFS_I(inode)->last_sub_trans = root->log_transid;
@@ -1996,10 +2018,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1996 */ 2018 */
1997 smp_mb(); 2019 smp_mb();
1998 if (btrfs_inode_in_log(inode, root->fs_info->generation) || 2020 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1999 (BTRFS_I(inode)->last_trans <= 2021 (full_sync && BTRFS_I(inode)->last_trans <=
2000 root->fs_info->last_trans_committed && 2022 root->fs_info->last_trans_committed) ||
2001 (full_sync || 2023 (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
2002 !btrfs_have_ordered_extents_in_range(inode, start, len)))) { 2024 BTRFS_I(inode)->last_trans
2025 <= root->fs_info->last_trans_committed)) {
2003 /* 2026 /*
2004 * We'v had everything committed since the last time we were 2027 * We'v had everything committed since the last time we were
2005 * modified so clear this flag in case it was set for whatever 2028 * modified so clear this flag in case it was set for whatever
@@ -2293,10 +2316,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2293 int ret = 0; 2316 int ret = 0;
2294 int err = 0; 2317 int err = 0;
2295 unsigned int rsv_count; 2318 unsigned int rsv_count;
2296 bool same_page; 2319 bool same_block;
2297 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); 2320 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2298 u64 ino_size; 2321 u64 ino_size;
2299 bool truncated_page = false; 2322 bool truncated_block = false;
2300 bool updated_inode = false; 2323 bool updated_inode = false;
2301 2324
2302 ret = btrfs_wait_ordered_range(inode, offset, len); 2325 ret = btrfs_wait_ordered_range(inode, offset, len);
@@ -2304,7 +2327,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2304 return ret; 2327 return ret;
2305 2328
2306 inode_lock(inode); 2329 inode_lock(inode);
2307 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); 2330 ino_size = round_up(inode->i_size, root->sectorsize);
2308 ret = find_first_non_hole(inode, &offset, &len); 2331 ret = find_first_non_hole(inode, &offset, &len);
2309 if (ret < 0) 2332 if (ret < 0)
2310 goto out_only_mutex; 2333 goto out_only_mutex;
@@ -2317,31 +2340,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2317 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); 2340 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2318 lockend = round_down(offset + len, 2341 lockend = round_down(offset + len,
2319 BTRFS_I(inode)->root->sectorsize) - 1; 2342 BTRFS_I(inode)->root->sectorsize) - 1;
2320 same_page = ((offset >> PAGE_CACHE_SHIFT) == 2343 same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
2321 ((offset + len - 1) >> PAGE_CACHE_SHIFT)); 2344 == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
2322
2323 /* 2345 /*
2324 * We needn't truncate any page which is beyond the end of the file 2346 * We needn't truncate any block which is beyond the end of the file
2325 * because we are sure there is no data there. 2347 * because we are sure there is no data there.
2326 */ 2348 */
2327 /* 2349 /*
2328 * Only do this if we are in the same page and we aren't doing the 2350 * Only do this if we are in the same block and we aren't doing the
2329 * entire page. 2351 * entire block.
2330 */ 2352 */
2331 if (same_page && len < PAGE_CACHE_SIZE) { 2353 if (same_block && len < root->sectorsize) {
2332 if (offset < ino_size) { 2354 if (offset < ino_size) {
2333 truncated_page = true; 2355 truncated_block = true;
2334 ret = btrfs_truncate_page(inode, offset, len, 0); 2356 ret = btrfs_truncate_block(inode, offset, len, 0);
2335 } else { 2357 } else {
2336 ret = 0; 2358 ret = 0;
2337 } 2359 }
2338 goto out_only_mutex; 2360 goto out_only_mutex;
2339 } 2361 }
2340 2362
2341 /* zero back part of the first page */ 2363 /* zero back part of the first block */
2342 if (offset < ino_size) { 2364 if (offset < ino_size) {
2343 truncated_page = true; 2365 truncated_block = true;
2344 ret = btrfs_truncate_page(inode, offset, 0, 0); 2366 ret = btrfs_truncate_block(inode, offset, 0, 0);
2345 if (ret) { 2367 if (ret) {
2346 inode_unlock(inode); 2368 inode_unlock(inode);
2347 return ret; 2369 return ret;
@@ -2376,9 +2398,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2376 if (!ret) { 2398 if (!ret) {
2377 /* zero the front end of the last page */ 2399 /* zero the front end of the last page */
2378 if (tail_start + tail_len < ino_size) { 2400 if (tail_start + tail_len < ino_size) {
2379 truncated_page = true; 2401 truncated_block = true;
2380 ret = btrfs_truncate_page(inode, 2402 ret = btrfs_truncate_block(inode,
2381 tail_start + tail_len, 0, 1); 2403 tail_start + tail_len,
2404 0, 1);
2382 if (ret) 2405 if (ret)
2383 goto out_only_mutex; 2406 goto out_only_mutex;
2384 } 2407 }
@@ -2544,7 +2567,7 @@ out_trans:
2544 goto out_free; 2567 goto out_free;
2545 2568
2546 inode_inc_iversion(inode); 2569 inode_inc_iversion(inode);
2547 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 2570 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
2548 2571
2549 trans->block_rsv = &root->fs_info->trans_block_rsv; 2572 trans->block_rsv = &root->fs_info->trans_block_rsv;
2550 ret = btrfs_update_inode(trans, root, inode); 2573 ret = btrfs_update_inode(trans, root, inode);
@@ -2558,7 +2581,7 @@ out:
2558 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2581 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2559 &cached_state, GFP_NOFS); 2582 &cached_state, GFP_NOFS);
2560out_only_mutex: 2583out_only_mutex:
2561 if (!updated_inode && truncated_page && !ret && !err) { 2584 if (!updated_inode && truncated_block && !ret && !err) {
2562 /* 2585 /*
2563 * If we only end up zeroing part of a page, we still need to 2586 * If we only end up zeroing part of a page, we still need to
2564 * update the inode item, so that all the time fields are 2587 * update the inode item, so that all the time fields are
@@ -2611,7 +2634,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
2611 return 0; 2634 return 0;
2612 } 2635 }
2613insert: 2636insert:
2614 range = kmalloc(sizeof(*range), GFP_NOFS); 2637 range = kmalloc(sizeof(*range), GFP_KERNEL);
2615 if (!range) 2638 if (!range)
2616 return -ENOMEM; 2639 return -ENOMEM;
2617 range->start = start; 2640 range->start = start;
@@ -2678,10 +2701,10 @@ static long btrfs_fallocate(struct file *file, int mode,
2678 } else if (offset + len > inode->i_size) { 2701 } else if (offset + len > inode->i_size) {
2679 /* 2702 /*
2680 * If we are fallocating from the end of the file onward we 2703 * If we are fallocating from the end of the file onward we
2681 * need to zero out the end of the page if i_size lands in the 2704 * need to zero out the end of the block if i_size lands in the
2682 * middle of a page. 2705 * middle of a block.
2683 */ 2706 */
2684 ret = btrfs_truncate_page(inode, inode->i_size, 0, 0); 2707 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
2685 if (ret) 2708 if (ret)
2686 goto out; 2709 goto out;
2687 } 2710 }
@@ -2712,7 +2735,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2712 btrfs_put_ordered_extent(ordered); 2735 btrfs_put_ordered_extent(ordered);
2713 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 2736 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
2714 alloc_start, locked_end, 2737 alloc_start, locked_end,
2715 &cached_state, GFP_NOFS); 2738 &cached_state, GFP_KERNEL);
2716 /* 2739 /*
2717 * we can't wait on the range with the transaction 2740 * we can't wait on the range with the transaction
2718 * running or with the extent lock held 2741 * running or with the extent lock held
@@ -2794,7 +2817,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2794 if (IS_ERR(trans)) { 2817 if (IS_ERR(trans)) {
2795 ret = PTR_ERR(trans); 2818 ret = PTR_ERR(trans);
2796 } else { 2819 } else {
2797 inode->i_ctime = CURRENT_TIME; 2820 inode->i_ctime = current_fs_time(inode->i_sb);
2798 i_size_write(inode, actual_end); 2821 i_size_write(inode, actual_end);
2799 btrfs_ordered_update_i_size(inode, actual_end, NULL); 2822 btrfs_ordered_update_i_size(inode, actual_end, NULL);
2800 ret = btrfs_update_inode(trans, root, inode); 2823 ret = btrfs_update_inode(trans, root, inode);
@@ -2806,7 +2829,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2806 } 2829 }
2807out_unlock: 2830out_unlock:
2808 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 2831 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2809 &cached_state, GFP_NOFS); 2832 &cached_state, GFP_KERNEL);
2810out: 2833out:
2811 /* 2834 /*
2812 * As we waited the extent range, the data_rsv_map must be empty 2835 * As we waited the extent range, the data_rsv_map must be empty
@@ -2939,8 +2962,7 @@ const struct file_operations btrfs_file_operations = {
2939 2962
2940void btrfs_auto_defrag_exit(void) 2963void btrfs_auto_defrag_exit(void)
2941{ 2964{
2942 if (btrfs_inode_defrag_cachep) 2965 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2943 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2944} 2966}
2945 2967
2946int btrfs_auto_defrag_init(void) 2968int btrfs_auto_defrag_init(void)