diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 100 |
1 files changed, 78 insertions, 22 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 77061bf43edb..af1d0605a5c1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -30,11 +30,11 @@ | |||
30 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
31 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/btrfs.h> | ||
33 | #include "ctree.h" | 34 | #include "ctree.h" |
34 | #include "disk-io.h" | 35 | #include "disk-io.h" |
35 | #include "transaction.h" | 36 | #include "transaction.h" |
36 | #include "btrfs_inode.h" | 37 | #include "btrfs_inode.h" |
37 | #include "ioctl.h" | ||
38 | #include "print-tree.h" | 38 | #include "print-tree.h" |
39 | #include "tree-log.h" | 39 | #include "tree-log.h" |
40 | #include "locking.h" | 40 | #include "locking.h" |
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
293 | struct btrfs_key key; | 293 | struct btrfs_key key; |
294 | struct btrfs_ioctl_defrag_range_args range; | 294 | struct btrfs_ioctl_defrag_range_args range; |
295 | int num_defrag; | 295 | int num_defrag; |
296 | int index; | ||
297 | int ret; | ||
296 | 298 | ||
297 | /* get the inode */ | 299 | /* get the inode */ |
298 | key.objectid = defrag->root; | 300 | key.objectid = defrag->root; |
299 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 301 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
300 | key.offset = (u64)-1; | 302 | key.offset = (u64)-1; |
303 | |||
304 | index = srcu_read_lock(&fs_info->subvol_srcu); | ||
305 | |||
301 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | 306 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); |
302 | if (IS_ERR(inode_root)) { | 307 | if (IS_ERR(inode_root)) { |
303 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 308 | ret = PTR_ERR(inode_root); |
304 | return PTR_ERR(inode_root); | 309 | goto cleanup; |
310 | } | ||
311 | if (btrfs_root_refs(&inode_root->root_item) == 0) { | ||
312 | ret = -ENOENT; | ||
313 | goto cleanup; | ||
305 | } | 314 | } |
306 | 315 | ||
307 | key.objectid = defrag->ino; | 316 | key.objectid = defrag->ino; |
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
309 | key.offset = 0; | 318 | key.offset = 0; |
310 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | 319 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); |
311 | if (IS_ERR(inode)) { | 320 | if (IS_ERR(inode)) { |
312 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | 321 | ret = PTR_ERR(inode); |
313 | return PTR_ERR(inode); | 322 | goto cleanup; |
314 | } | 323 | } |
324 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
315 | 325 | ||
316 | /* do a chunk of defrag */ | 326 | /* do a chunk of defrag */ |
317 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | 327 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
346 | 356 | ||
347 | iput(inode); | 357 | iput(inode); |
348 | return 0; | 358 | return 0; |
359 | cleanup: | ||
360 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
361 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
362 | return ret; | ||
349 | } | 363 | } |
350 | 364 | ||
351 | /* | 365 | /* |
@@ -360,6 +374,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
360 | 374 | ||
361 | atomic_inc(&fs_info->defrag_running); | 375 | atomic_inc(&fs_info->defrag_running); |
362 | while(1) { | 376 | while(1) { |
377 | /* Pause the auto defragger. */ | ||
378 | if (test_bit(BTRFS_FS_STATE_REMOUNTING, | ||
379 | &fs_info->fs_state)) | ||
380 | break; | ||
381 | |||
363 | if (!__need_auto_defrag(fs_info->tree_root)) | 382 | if (!__need_auto_defrag(fs_info->tree_root)) |
364 | break; | 383 | break; |
365 | 384 | ||
@@ -491,8 +510,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | |||
491 | loff_t isize = i_size_read(inode); | 510 | loff_t isize = i_size_read(inode); |
492 | 511 | ||
493 | start_pos = pos & ~((u64)root->sectorsize - 1); | 512 | start_pos = pos & ~((u64)root->sectorsize - 1); |
494 | num_bytes = (write_bytes + pos - start_pos + | 513 | num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize); |
495 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | ||
496 | 514 | ||
497 | end_of_last_block = start_pos + num_bytes - 1; | 515 | end_of_last_block = start_pos + num_bytes - 1; |
498 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 516 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
@@ -1211,7 +1229,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1211 | struct extent_state *cached_state = NULL; | 1229 | struct extent_state *cached_state = NULL; |
1212 | int i; | 1230 | int i; |
1213 | unsigned long index = pos >> PAGE_CACHE_SHIFT; | 1231 | unsigned long index = pos >> PAGE_CACHE_SHIFT; |
1214 | struct inode *inode = fdentry(file)->d_inode; | 1232 | struct inode *inode = file_inode(file); |
1215 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); | 1233 | gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); |
1216 | int err = 0; | 1234 | int err = 0; |
1217 | int faili = 0; | 1235 | int faili = 0; |
@@ -1298,7 +1316,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1298 | struct iov_iter *i, | 1316 | struct iov_iter *i, |
1299 | loff_t pos) | 1317 | loff_t pos) |
1300 | { | 1318 | { |
1301 | struct inode *inode = fdentry(file)->d_inode; | 1319 | struct inode *inode = file_inode(file); |
1302 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1320 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1303 | struct page **pages = NULL; | 1321 | struct page **pages = NULL; |
1304 | unsigned long first_index; | 1322 | unsigned long first_index; |
@@ -1486,7 +1504,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1486 | unsigned long nr_segs, loff_t pos) | 1504 | unsigned long nr_segs, loff_t pos) |
1487 | { | 1505 | { |
1488 | struct file *file = iocb->ki_filp; | 1506 | struct file *file = iocb->ki_filp; |
1489 | struct inode *inode = fdentry(file)->d_inode; | 1507 | struct inode *inode = file_inode(file); |
1490 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1508 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1491 | loff_t *ppos = &iocb->ki_pos; | 1509 | loff_t *ppos = &iocb->ki_pos; |
1492 | u64 start_pos; | 1510 | u64 start_pos; |
@@ -1530,7 +1548,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1530 | * although we have opened a file as writable, we have | 1548 | * although we have opened a file as writable, we have |
1531 | * to stop this write operation to ensure FS consistency. | 1549 | * to stop this write operation to ensure FS consistency. |
1532 | */ | 1550 | */ |
1533 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 1551 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { |
1534 | mutex_unlock(&inode->i_mutex); | 1552 | mutex_unlock(&inode->i_mutex); |
1535 | err = -EROFS; | 1553 | err = -EROFS; |
1536 | goto out; | 1554 | goto out; |
@@ -1594,9 +1612,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1594 | if (err < 0 && num_written > 0) | 1612 | if (err < 0 && num_written > 0) |
1595 | num_written = err; | 1613 | num_written = err; |
1596 | } | 1614 | } |
1597 | out: | 1615 | |
1598 | if (sync) | 1616 | if (sync) |
1599 | atomic_dec(&BTRFS_I(inode)->sync_writers); | 1617 | atomic_dec(&BTRFS_I(inode)->sync_writers); |
1618 | out: | ||
1600 | sb_end_write(inode->i_sb); | 1619 | sb_end_write(inode->i_sb); |
1601 | current->backing_dev_info = NULL; | 1620 | current->backing_dev_info = NULL; |
1602 | return num_written ? num_written : err; | 1621 | return num_written ? num_written : err; |
@@ -1612,7 +1631,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1612 | */ | 1631 | */ |
1613 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, | 1632 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
1614 | &BTRFS_I(inode)->runtime_flags)) { | 1633 | &BTRFS_I(inode)->runtime_flags)) { |
1615 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | 1634 | struct btrfs_trans_handle *trans; |
1635 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1636 | |||
1637 | /* | ||
1638 | * We need to block on a committing transaction to keep us from | ||
1639 | * throwing a ordered operation on to the list and causing | ||
1640 | * something like sync to deadlock trying to flush out this | ||
1641 | * inode. | ||
1642 | */ | ||
1643 | trans = btrfs_start_transaction(root, 0); | ||
1644 | if (IS_ERR(trans)) | ||
1645 | return PTR_ERR(trans); | ||
1646 | btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); | ||
1647 | btrfs_end_transaction(trans, root); | ||
1616 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 1648 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
1617 | filemap_flush(inode->i_mapping); | 1649 | filemap_flush(inode->i_mapping); |
1618 | } | 1650 | } |
@@ -1639,16 +1671,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1639 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1671 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1640 | int ret = 0; | 1672 | int ret = 0; |
1641 | struct btrfs_trans_handle *trans; | 1673 | struct btrfs_trans_handle *trans; |
1674 | bool full_sync = 0; | ||
1642 | 1675 | ||
1643 | trace_btrfs_sync_file(file, datasync); | 1676 | trace_btrfs_sync_file(file, datasync); |
1644 | 1677 | ||
1645 | /* | 1678 | /* |
1646 | * We write the dirty pages in the range and wait until they complete | 1679 | * We write the dirty pages in the range and wait until they complete |
1647 | * out of the ->i_mutex. If so, we can flush the dirty pages by | 1680 | * out of the ->i_mutex. If so, we can flush the dirty pages by |
1648 | * multi-task, and make the performance up. | 1681 | * multi-task, and make the performance up. See |
1682 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. | ||
1649 | */ | 1683 | */ |
1650 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1684 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
1651 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 1685 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); |
1686 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1687 | &BTRFS_I(inode)->runtime_flags)) | ||
1688 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1652 | atomic_dec(&BTRFS_I(inode)->sync_writers); | 1689 | atomic_dec(&BTRFS_I(inode)->sync_writers); |
1653 | if (ret) | 1690 | if (ret) |
1654 | return ret; | 1691 | return ret; |
@@ -1660,7 +1697,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1660 | * range being left. | 1697 | * range being left. |
1661 | */ | 1698 | */ |
1662 | atomic_inc(&root->log_batch); | 1699 | atomic_inc(&root->log_batch); |
1663 | btrfs_wait_ordered_range(inode, start, end - start + 1); | 1700 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
1701 | &BTRFS_I(inode)->runtime_flags); | ||
1702 | if (full_sync) | ||
1703 | btrfs_wait_ordered_range(inode, start, end - start + 1); | ||
1664 | atomic_inc(&root->log_batch); | 1704 | atomic_inc(&root->log_batch); |
1665 | 1705 | ||
1666 | /* | 1706 | /* |
@@ -1727,13 +1767,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1727 | 1767 | ||
1728 | if (ret != BTRFS_NO_LOG_SYNC) { | 1768 | if (ret != BTRFS_NO_LOG_SYNC) { |
1729 | if (ret > 0) { | 1769 | if (ret > 0) { |
1770 | /* | ||
1771 | * If we didn't already wait for ordered extents we need | ||
1772 | * to do that now. | ||
1773 | */ | ||
1774 | if (!full_sync) | ||
1775 | btrfs_wait_ordered_range(inode, start, | ||
1776 | end - start + 1); | ||
1730 | ret = btrfs_commit_transaction(trans, root); | 1777 | ret = btrfs_commit_transaction(trans, root); |
1731 | } else { | 1778 | } else { |
1732 | ret = btrfs_sync_log(trans, root); | 1779 | ret = btrfs_sync_log(trans, root); |
1733 | if (ret == 0) | 1780 | if (ret == 0) { |
1734 | ret = btrfs_end_transaction(trans, root); | 1781 | ret = btrfs_end_transaction(trans, root); |
1735 | else | 1782 | } else { |
1783 | if (!full_sync) | ||
1784 | btrfs_wait_ordered_range(inode, start, | ||
1785 | end - | ||
1786 | start + 1); | ||
1736 | ret = btrfs_commit_transaction(trans, root); | 1787 | ret = btrfs_commit_transaction(trans, root); |
1788 | } | ||
1737 | } | 1789 | } |
1738 | } else { | 1790 | } else { |
1739 | ret = btrfs_end_transaction(trans, root); | 1791 | ret = btrfs_end_transaction(trans, root); |
@@ -2087,7 +2139,7 @@ out: | |||
2087 | static long btrfs_fallocate(struct file *file, int mode, | 2139 | static long btrfs_fallocate(struct file *file, int mode, |
2088 | loff_t offset, loff_t len) | 2140 | loff_t offset, loff_t len) |
2089 | { | 2141 | { |
2090 | struct inode *inode = file->f_path.dentry->d_inode; | 2142 | struct inode *inode = file_inode(file); |
2091 | struct extent_state *cached_state = NULL; | 2143 | struct extent_state *cached_state = NULL; |
2092 | u64 cur_offset; | 2144 | u64 cur_offset; |
2093 | u64 last_byte; | 2145 | u64 last_byte; |
@@ -2241,6 +2293,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) | |||
2241 | if (lockend <= lockstart) | 2293 | if (lockend <= lockstart) |
2242 | lockend = lockstart + root->sectorsize; | 2294 | lockend = lockstart + root->sectorsize; |
2243 | 2295 | ||
2296 | lockend--; | ||
2244 | len = lockend - lockstart + 1; | 2297 | len = lockend - lockstart + 1; |
2245 | 2298 | ||
2246 | len = max_t(u64, len, root->sectorsize); | 2299 | len = max_t(u64, len, root->sectorsize); |
@@ -2307,9 +2360,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) | |||
2307 | } | 2360 | } |
2308 | } | 2361 | } |
2309 | 2362 | ||
2310 | *offset = start; | 2363 | if (!test_bit(EXTENT_FLAG_PREALLOC, |
2311 | free_extent_map(em); | 2364 | &em->flags)) { |
2312 | break; | 2365 | *offset = start; |
2366 | free_extent_map(em); | ||
2367 | break; | ||
2368 | } | ||
2313 | } | 2369 | } |
2314 | } | 2370 | } |
2315 | 2371 | ||