diff options
Diffstat (limited to 'fs')
88 files changed, 7080 insertions, 3741 deletions
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index d70bbbac6b7b..914d1c0bc07a 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
@@ -224,7 +224,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
224 | affs_brelse(bh); | 224 | affs_brelse(bh); |
225 | inode = affs_iget(sb, ino); | 225 | inode = affs_iget(sb, ino); |
226 | if (IS_ERR(inode)) | 226 | if (IS_ERR(inode)) |
227 | return ERR_PTR(PTR_ERR(inode)); | 227 | return ERR_CAST(inode); |
228 | } | 228 | } |
229 | dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; | 229 | dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; |
230 | d_add(dentry, inode); | 230 | d_add(dentry, inode); |
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/blkdev.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/mempool.h> | 37 | #include <linux/mempool.h> |
38 | #include <linux/hash.h> | 38 | #include <linux/hash.h> |
39 | #include <linux/compat.h> | ||
39 | 40 | ||
40 | #include <asm/kmap_types.h> | 41 | #include <asm/kmap_types.h> |
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
@@ -1384,13 +1385,22 @@ static ssize_t aio_fsync(struct kiocb *iocb) | |||
1384 | return ret; | 1385 | return ret; |
1385 | } | 1386 | } |
1386 | 1387 | ||
1387 | static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb) | 1388 | static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat) |
1388 | { | 1389 | { |
1389 | ssize_t ret; | 1390 | ssize_t ret; |
1390 | 1391 | ||
1391 | ret = rw_copy_check_uvector(type, (struct iovec __user *)kiocb->ki_buf, | 1392 | #ifdef CONFIG_COMPAT |
1392 | kiocb->ki_nbytes, 1, | 1393 | if (compat) |
1393 | &kiocb->ki_inline_vec, &kiocb->ki_iovec); | 1394 | ret = compat_rw_copy_check_uvector(type, |
1395 | (struct compat_iovec __user *)kiocb->ki_buf, | ||
1396 | kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, | ||
1397 | &kiocb->ki_iovec); | ||
1398 | else | ||
1399 | #endif | ||
1400 | ret = rw_copy_check_uvector(type, | ||
1401 | (struct iovec __user *)kiocb->ki_buf, | ||
1402 | kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, | ||
1403 | &kiocb->ki_iovec); | ||
1394 | if (ret < 0) | 1404 | if (ret < 0) |
1395 | goto out; | 1405 | goto out; |
1396 | 1406 | ||
@@ -1420,7 +1430,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb) | |||
1420 | * Performs the initial checks and aio retry method | 1430 | * Performs the initial checks and aio retry method |
1421 | * setup for the kiocb at the time of io submission. | 1431 | * setup for the kiocb at the time of io submission. |
1422 | */ | 1432 | */ |
1423 | static ssize_t aio_setup_iocb(struct kiocb *kiocb) | 1433 | static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) |
1424 | { | 1434 | { |
1425 | struct file *file = kiocb->ki_filp; | 1435 | struct file *file = kiocb->ki_filp; |
1426 | ssize_t ret = 0; | 1436 | ssize_t ret = 0; |
@@ -1469,7 +1479,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) | |||
1469 | ret = security_file_permission(file, MAY_READ); | 1479 | ret = security_file_permission(file, MAY_READ); |
1470 | if (unlikely(ret)) | 1480 | if (unlikely(ret)) |
1471 | break; | 1481 | break; |
1472 | ret = aio_setup_vectored_rw(READ, kiocb); | 1482 | ret = aio_setup_vectored_rw(READ, kiocb, compat); |
1473 | if (ret) | 1483 | if (ret) |
1474 | break; | 1484 | break; |
1475 | ret = -EINVAL; | 1485 | ret = -EINVAL; |
@@ -1483,7 +1493,7 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) | |||
1483 | ret = security_file_permission(file, MAY_WRITE); | 1493 | ret = security_file_permission(file, MAY_WRITE); |
1484 | if (unlikely(ret)) | 1494 | if (unlikely(ret)) |
1485 | break; | 1495 | break; |
1486 | ret = aio_setup_vectored_rw(WRITE, kiocb); | 1496 | ret = aio_setup_vectored_rw(WRITE, kiocb, compat); |
1487 | if (ret) | 1497 | if (ret) |
1488 | break; | 1498 | break; |
1489 | ret = -EINVAL; | 1499 | ret = -EINVAL; |
@@ -1548,7 +1558,8 @@ static void aio_batch_free(struct hlist_head *batch_hash) | |||
1548 | } | 1558 | } |
1549 | 1559 | ||
1550 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | 1560 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
1551 | struct iocb *iocb, struct hlist_head *batch_hash) | 1561 | struct iocb *iocb, struct hlist_head *batch_hash, |
1562 | bool compat) | ||
1552 | { | 1563 | { |
1553 | struct kiocb *req; | 1564 | struct kiocb *req; |
1554 | struct file *file; | 1565 | struct file *file; |
@@ -1609,7 +1620,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1609 | req->ki_left = req->ki_nbytes = iocb->aio_nbytes; | 1620 | req->ki_left = req->ki_nbytes = iocb->aio_nbytes; |
1610 | req->ki_opcode = iocb->aio_lio_opcode; | 1621 | req->ki_opcode = iocb->aio_lio_opcode; |
1611 | 1622 | ||
1612 | ret = aio_setup_iocb(req); | 1623 | ret = aio_setup_iocb(req, compat); |
1613 | 1624 | ||
1614 | if (ret) | 1625 | if (ret) |
1615 | goto out_put_req; | 1626 | goto out_put_req; |
@@ -1637,20 +1648,8 @@ out_put_req: | |||
1637 | return ret; | 1648 | return ret; |
1638 | } | 1649 | } |
1639 | 1650 | ||
1640 | /* sys_io_submit: | 1651 | long do_io_submit(aio_context_t ctx_id, long nr, |
1641 | * Queue the nr iocbs pointed to by iocbpp for processing. Returns | 1652 | struct iocb __user *__user *iocbpp, bool compat) |
1642 | * the number of iocbs queued. May return -EINVAL if the aio_context | ||
1643 | * specified by ctx_id is invalid, if nr is < 0, if the iocb at | ||
1644 | * *iocbpp[0] is not properly initialized, if the operation specified | ||
1645 | * is invalid for the file descriptor in the iocb. May fail with | ||
1646 | * -EFAULT if any of the data structures point to invalid data. May | ||
1647 | * fail with -EBADF if the file descriptor specified in the first | ||
1648 | * iocb is invalid. May fail with -EAGAIN if insufficient resources | ||
1649 | * are available to queue any iocbs. Will return 0 if nr is 0. Will | ||
1650 | * fail with -ENOSYS if not implemented. | ||
1651 | */ | ||
1652 | SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | ||
1653 | struct iocb __user * __user *, iocbpp) | ||
1654 | { | 1653 | { |
1655 | struct kioctx *ctx; | 1654 | struct kioctx *ctx; |
1656 | long ret = 0; | 1655 | long ret = 0; |
@@ -1687,7 +1686,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1687 | break; | 1686 | break; |
1688 | } | 1687 | } |
1689 | 1688 | ||
1690 | ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash); | 1689 | ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash, compat); |
1691 | if (ret) | 1690 | if (ret) |
1692 | break; | 1691 | break; |
1693 | } | 1692 | } |
@@ -1697,6 +1696,24 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1697 | return i ? i : ret; | 1696 | return i ? i : ret; |
1698 | } | 1697 | } |
1699 | 1698 | ||
1699 | /* sys_io_submit: | ||
1700 | * Queue the nr iocbs pointed to by iocbpp for processing. Returns | ||
1701 | * the number of iocbs queued. May return -EINVAL if the aio_context | ||
1702 | * specified by ctx_id is invalid, if nr is < 0, if the iocb at | ||
1703 | * *iocbpp[0] is not properly initialized, if the operation specified | ||
1704 | * is invalid for the file descriptor in the iocb. May fail with | ||
1705 | * -EFAULT if any of the data structures point to invalid data. May | ||
1706 | * fail with -EBADF if the file descriptor specified in the first | ||
1707 | * iocb is invalid. May fail with -EAGAIN if insufficient resources | ||
1708 | * are available to queue any iocbs. Will return 0 if nr is 0. Will | ||
1709 | * fail with -ENOSYS if not implemented. | ||
1710 | */ | ||
1711 | SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | ||
1712 | struct iocb __user * __user *, iocbpp) | ||
1713 | { | ||
1714 | return do_io_submit(ctx_id, nr, iocbpp, 0); | ||
1715 | } | ||
1716 | |||
1700 | /* lookup_kiocb | 1717 | /* lookup_kiocb |
1701 | * Finds a given iocb for cancellation. | 1718 | * Finds a given iocb for cancellation. |
1702 | */ | 1719 | */ |
diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 8713c7cfbc79..9a0520b50663 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c | |||
@@ -28,6 +28,7 @@ static int autofs_root_mkdir(struct inode *,struct dentry *,int); | |||
28 | static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); | 28 | static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); |
29 | 29 | ||
30 | const struct file_operations autofs_root_operations = { | 30 | const struct file_operations autofs_root_operations = { |
31 | .llseek = generic_file_llseek, | ||
31 | .read = generic_read_dir, | 32 | .read = generic_read_dir, |
32 | .readdir = autofs_root_readdir, | 33 | .readdir = autofs_root_readdir, |
33 | .ioctl = autofs_root_ioctl, | 34 | .ioctl = autofs_root_ioctl, |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index d29b7f6df862..ba4a38b9c22f 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -95,7 +95,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | |||
95 | */ | 95 | */ |
96 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | 96 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) |
97 | { | 97 | { |
98 | struct autofs_dev_ioctl tmp, *ads; | 98 | struct autofs_dev_ioctl tmp; |
99 | 99 | ||
100 | if (copy_from_user(&tmp, in, sizeof(tmp))) | 100 | if (copy_from_user(&tmp, in, sizeof(tmp))) |
101 | return ERR_PTR(-EFAULT); | 101 | return ERR_PTR(-EFAULT); |
@@ -103,16 +103,7 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i | |||
103 | if (tmp.size < sizeof(tmp)) | 103 | if (tmp.size < sizeof(tmp)) |
104 | return ERR_PTR(-EINVAL); | 104 | return ERR_PTR(-EINVAL); |
105 | 105 | ||
106 | ads = kmalloc(tmp.size, GFP_KERNEL); | 106 | return memdup_user(in, tmp.size); |
107 | if (!ads) | ||
108 | return ERR_PTR(-ENOMEM); | ||
109 | |||
110 | if (copy_from_user(ads, in, tmp.size)) { | ||
111 | kfree(ads); | ||
112 | return ERR_PTR(-EFAULT); | ||
113 | } | ||
114 | |||
115 | return ads; | ||
116 | } | 107 | } |
117 | 108 | ||
118 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) | 109 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) |
@@ -736,11 +727,14 @@ static const struct file_operations _dev_ioctl_fops = { | |||
736 | }; | 727 | }; |
737 | 728 | ||
738 | static struct miscdevice _autofs_dev_ioctl_misc = { | 729 | static struct miscdevice _autofs_dev_ioctl_misc = { |
739 | .minor = MISC_DYNAMIC_MINOR, | 730 | .minor = AUTOFS_MINOR, |
740 | .name = AUTOFS_DEVICE_NAME, | 731 | .name = AUTOFS_DEVICE_NAME, |
741 | .fops = &_dev_ioctl_fops | 732 | .fops = &_dev_ioctl_fops |
742 | }; | 733 | }; |
743 | 734 | ||
735 | MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); | ||
736 | MODULE_ALIAS("devname:autofs"); | ||
737 | |||
744 | /* Register/deregister misc character device */ | 738 | /* Register/deregister misc character device */ |
745 | int autofs_dev_ioctl_init(void) | 739 | int autofs_dev_ioctl_init(void) |
746 | { | 740 | { |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 462859a30141..7ec14097fef1 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -377,6 +377,7 @@ again: | |||
377 | if (!list_empty(&worker->pending) || | 377 | if (!list_empty(&worker->pending) || |
378 | !list_empty(&worker->prio_pending)) { | 378 | !list_empty(&worker->prio_pending)) { |
379 | spin_unlock_irq(&worker->lock); | 379 | spin_unlock_irq(&worker->lock); |
380 | set_current_state(TASK_RUNNING); | ||
380 | goto again; | 381 | goto again; |
381 | } | 382 | } |
382 | 383 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7a4dee199832..6ad63f17eca0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -137,8 +137,8 @@ struct btrfs_inode { | |||
137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
138 | */ | 138 | */ |
139 | spinlock_t accounting_lock; | 139 | spinlock_t accounting_lock; |
140 | atomic_t outstanding_extents; | ||
140 | int reserved_extents; | 141 | int reserved_extents; |
141 | int outstanding_extents; | ||
142 | 142 | ||
143 | /* | 143 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -151,6 +151,7 @@ struct btrfs_inode { | |||
151 | * of these. | 151 | * of these. |
152 | */ | 152 | */ |
153 | unsigned ordered_data_close:1; | 153 | unsigned ordered_data_close:1; |
154 | unsigned orphan_meta_reserved:1; | ||
154 | unsigned dummy_inode:1; | 155 | unsigned dummy_inode:1; |
155 | 156 | ||
156 | /* | 157 | /* |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6795a713b205..0d1d966b0fe4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | 280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, |
281 | struct btrfs_root *root, | 281 | struct btrfs_root *root, |
282 | struct extent_buffer *buf, | 282 | struct extent_buffer *buf, |
283 | struct extent_buffer *cow) | 283 | struct extent_buffer *cow, |
284 | int *last_ref) | ||
284 | { | 285 | { |
285 | u64 refs; | 286 | u64 refs; |
286 | u64 owner; | 287 | u64 owner; |
@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
366 | BUG_ON(ret); | 367 | BUG_ON(ret); |
367 | } | 368 | } |
368 | clean_tree_block(trans, root, buf); | 369 | clean_tree_block(trans, root, buf); |
370 | *last_ref = 1; | ||
369 | } | 371 | } |
370 | return 0; | 372 | return 0; |
371 | } | 373 | } |
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
392 | struct btrfs_disk_key disk_key; | 394 | struct btrfs_disk_key disk_key; |
393 | struct extent_buffer *cow; | 395 | struct extent_buffer *cow; |
394 | int level; | 396 | int level; |
397 | int last_ref = 0; | ||
395 | int unlock_orig = 0; | 398 | int unlock_orig = 0; |
396 | u64 parent_start; | 399 | u64 parent_start; |
397 | 400 | ||
@@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
442 | (unsigned long)btrfs_header_fsid(cow), | 445 | (unsigned long)btrfs_header_fsid(cow), |
443 | BTRFS_FSID_SIZE); | 446 | BTRFS_FSID_SIZE); |
444 | 447 | ||
445 | update_ref_for_cow(trans, root, buf, cow); | 448 | update_ref_for_cow(trans, root, buf, cow, &last_ref); |
449 | |||
450 | if (root->ref_cows) | ||
451 | btrfs_reloc_cow_block(trans, root, buf, cow); | ||
446 | 452 | ||
447 | if (buf == root->node) { | 453 | if (buf == root->node) { |
448 | WARN_ON(parent && parent != buf); | 454 | WARN_ON(parent && parent != buf); |
@@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
457 | extent_buffer_get(cow); | 463 | extent_buffer_get(cow); |
458 | spin_unlock(&root->node_lock); | 464 | spin_unlock(&root->node_lock); |
459 | 465 | ||
460 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 466 | btrfs_free_tree_block(trans, root, buf, parent_start, |
461 | parent_start, root->root_key.objectid, level); | 467 | last_ref); |
462 | free_extent_buffer(buf); | 468 | free_extent_buffer(buf); |
463 | add_root_to_dirty_list(root); | 469 | add_root_to_dirty_list(root); |
464 | } else { | 470 | } else { |
@@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
473 | btrfs_set_node_ptr_generation(parent, parent_slot, | 479 | btrfs_set_node_ptr_generation(parent, parent_slot, |
474 | trans->transid); | 480 | trans->transid); |
475 | btrfs_mark_buffer_dirty(parent); | 481 | btrfs_mark_buffer_dirty(parent); |
476 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 482 | btrfs_free_tree_block(trans, root, buf, parent_start, |
477 | parent_start, root->root_key.objectid, level); | 483 | last_ref); |
478 | } | 484 | } |
479 | if (unlock_orig) | 485 | if (unlock_orig) |
480 | btrfs_tree_unlock(buf); | 486 | btrfs_tree_unlock(buf); |
@@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
949 | return bin_search(eb, key, level, slot); | 955 | return bin_search(eb, key, level, slot); |
950 | } | 956 | } |
951 | 957 | ||
958 | static void root_add_used(struct btrfs_root *root, u32 size) | ||
959 | { | ||
960 | spin_lock(&root->accounting_lock); | ||
961 | btrfs_set_root_used(&root->root_item, | ||
962 | btrfs_root_used(&root->root_item) + size); | ||
963 | spin_unlock(&root->accounting_lock); | ||
964 | } | ||
965 | |||
966 | static void root_sub_used(struct btrfs_root *root, u32 size) | ||
967 | { | ||
968 | spin_lock(&root->accounting_lock); | ||
969 | btrfs_set_root_used(&root->root_item, | ||
970 | btrfs_root_used(&root->root_item) - size); | ||
971 | spin_unlock(&root->accounting_lock); | ||
972 | } | ||
973 | |||
952 | /* given a node and slot number, this reads the blocks it points to. The | 974 | /* given a node and slot number, this reads the blocks it points to. The |
953 | * extent buffer is returned with a reference taken (but unlocked). | 975 | * extent buffer is returned with a reference taken (but unlocked). |
954 | * NULL is returned on error. | 976 | * NULL is returned on error. |
@@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1019 | btrfs_tree_lock(child); | 1041 | btrfs_tree_lock(child); |
1020 | btrfs_set_lock_blocking(child); | 1042 | btrfs_set_lock_blocking(child); |
1021 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); | 1043 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
1022 | BUG_ON(ret); | 1044 | if (ret) { |
1045 | btrfs_tree_unlock(child); | ||
1046 | free_extent_buffer(child); | ||
1047 | goto enospc; | ||
1048 | } | ||
1023 | 1049 | ||
1024 | spin_lock(&root->node_lock); | 1050 | spin_lock(&root->node_lock); |
1025 | root->node = child; | 1051 | root->node = child; |
@@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1034 | btrfs_tree_unlock(mid); | 1060 | btrfs_tree_unlock(mid); |
1035 | /* once for the path */ | 1061 | /* once for the path */ |
1036 | free_extent_buffer(mid); | 1062 | free_extent_buffer(mid); |
1037 | ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, | 1063 | |
1038 | 0, root->root_key.objectid, level); | 1064 | root_sub_used(root, mid->len); |
1065 | btrfs_free_tree_block(trans, root, mid, 0, 1); | ||
1039 | /* once for the root ptr */ | 1066 | /* once for the root ptr */ |
1040 | free_extent_buffer(mid); | 1067 | free_extent_buffer(mid); |
1041 | return ret; | 1068 | return 0; |
1042 | } | 1069 | } |
1043 | if (btrfs_header_nritems(mid) > | 1070 | if (btrfs_header_nritems(mid) > |
1044 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1071 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
@@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1088 | if (wret < 0 && wret != -ENOSPC) | 1115 | if (wret < 0 && wret != -ENOSPC) |
1089 | ret = wret; | 1116 | ret = wret; |
1090 | if (btrfs_header_nritems(right) == 0) { | 1117 | if (btrfs_header_nritems(right) == 0) { |
1091 | u64 bytenr = right->start; | ||
1092 | u32 blocksize = right->len; | ||
1093 | |||
1094 | clean_tree_block(trans, root, right); | 1118 | clean_tree_block(trans, root, right); |
1095 | btrfs_tree_unlock(right); | 1119 | btrfs_tree_unlock(right); |
1096 | free_extent_buffer(right); | ||
1097 | right = NULL; | ||
1098 | wret = del_ptr(trans, root, path, level + 1, pslot + | 1120 | wret = del_ptr(trans, root, path, level + 1, pslot + |
1099 | 1); | 1121 | 1); |
1100 | if (wret) | 1122 | if (wret) |
1101 | ret = wret; | 1123 | ret = wret; |
1102 | wret = btrfs_free_tree_block(trans, root, | 1124 | root_sub_used(root, right->len); |
1103 | bytenr, blocksize, 0, | 1125 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1104 | root->root_key.objectid, | 1126 | free_extent_buffer(right); |
1105 | level); | 1127 | right = NULL; |
1106 | if (wret) | ||
1107 | ret = wret; | ||
1108 | } else { | 1128 | } else { |
1109 | struct btrfs_disk_key right_key; | 1129 | struct btrfs_disk_key right_key; |
1110 | btrfs_node_key(right, &right_key, 0); | 1130 | btrfs_node_key(right, &right_key, 0); |
@@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1136 | BUG_ON(wret == 1); | 1156 | BUG_ON(wret == 1); |
1137 | } | 1157 | } |
1138 | if (btrfs_header_nritems(mid) == 0) { | 1158 | if (btrfs_header_nritems(mid) == 0) { |
1139 | /* we've managed to empty the middle node, drop it */ | ||
1140 | u64 bytenr = mid->start; | ||
1141 | u32 blocksize = mid->len; | ||
1142 | |||
1143 | clean_tree_block(trans, root, mid); | 1159 | clean_tree_block(trans, root, mid); |
1144 | btrfs_tree_unlock(mid); | 1160 | btrfs_tree_unlock(mid); |
1145 | free_extent_buffer(mid); | ||
1146 | mid = NULL; | ||
1147 | wret = del_ptr(trans, root, path, level + 1, pslot); | 1161 | wret = del_ptr(trans, root, path, level + 1, pslot); |
1148 | if (wret) | 1162 | if (wret) |
1149 | ret = wret; | 1163 | ret = wret; |
1150 | wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, | 1164 | root_sub_used(root, mid->len); |
1151 | 0, root->root_key.objectid, level); | 1165 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1152 | if (wret) | 1166 | free_extent_buffer(mid); |
1153 | ret = wret; | 1167 | mid = NULL; |
1154 | } else { | 1168 | } else { |
1155 | /* update the parent key to reflect our changes */ | 1169 | /* update the parent key to reflect our changes */ |
1156 | struct btrfs_disk_key mid_key; | 1170 | struct btrfs_disk_key mid_key; |
@@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1590 | btrfs_release_path(NULL, p); | 1604 | btrfs_release_path(NULL, p); |
1591 | 1605 | ||
1592 | ret = -EAGAIN; | 1606 | ret = -EAGAIN; |
1593 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1607 | tmp = read_tree_block(root, blocknr, blocksize, 0); |
1594 | if (tmp) { | 1608 | if (tmp) { |
1595 | /* | 1609 | /* |
1596 | * If the read above didn't mark this buffer up to date, | 1610 | * If the read above didn't mark this buffer up to date, |
@@ -1740,7 +1754,6 @@ again: | |||
1740 | p->nodes[level + 1], | 1754 | p->nodes[level + 1], |
1741 | p->slots[level + 1], &b); | 1755 | p->slots[level + 1], &b); |
1742 | if (err) { | 1756 | if (err) { |
1743 | free_extent_buffer(b); | ||
1744 | ret = err; | 1757 | ret = err; |
1745 | goto done; | 1758 | goto done; |
1746 | } | 1759 | } |
@@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2076 | if (IS_ERR(c)) | 2089 | if (IS_ERR(c)) |
2077 | return PTR_ERR(c); | 2090 | return PTR_ERR(c); |
2078 | 2091 | ||
2092 | root_add_used(root, root->nodesize); | ||
2093 | |||
2079 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); | 2094 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); |
2080 | btrfs_set_header_nritems(c, 1); | 2095 | btrfs_set_header_nritems(c, 1); |
2081 | btrfs_set_header_level(c, level); | 2096 | btrfs_set_header_level(c, level); |
@@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2134 | int nritems; | 2149 | int nritems; |
2135 | 2150 | ||
2136 | BUG_ON(!path->nodes[level]); | 2151 | BUG_ON(!path->nodes[level]); |
2152 | btrfs_assert_tree_locked(path->nodes[level]); | ||
2137 | lower = path->nodes[level]; | 2153 | lower = path->nodes[level]; |
2138 | nritems = btrfs_header_nritems(lower); | 2154 | nritems = btrfs_header_nritems(lower); |
2139 | BUG_ON(slot > nritems); | 2155 | BUG_ON(slot > nritems); |
@@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2202 | if (IS_ERR(split)) | 2218 | if (IS_ERR(split)) |
2203 | return PTR_ERR(split); | 2219 | return PTR_ERR(split); |
2204 | 2220 | ||
2221 | root_add_used(root, root->nodesize); | ||
2222 | |||
2205 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); | 2223 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); |
2206 | btrfs_set_header_level(split, btrfs_header_level(c)); | 2224 | btrfs_set_header_level(split, btrfs_header_level(c)); |
2207 | btrfs_set_header_bytenr(split, split->start); | 2225 | btrfs_set_header_bytenr(split, split->start); |
@@ -2415,6 +2433,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2415 | 2433 | ||
2416 | if (left_nritems) | 2434 | if (left_nritems) |
2417 | btrfs_mark_buffer_dirty(left); | 2435 | btrfs_mark_buffer_dirty(left); |
2436 | else | ||
2437 | clean_tree_block(trans, root, left); | ||
2438 | |||
2418 | btrfs_mark_buffer_dirty(right); | 2439 | btrfs_mark_buffer_dirty(right); |
2419 | 2440 | ||
2420 | btrfs_item_key(right, &disk_key, 0); | 2441 | btrfs_item_key(right, &disk_key, 0); |
@@ -2660,6 +2681,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2660 | btrfs_mark_buffer_dirty(left); | 2681 | btrfs_mark_buffer_dirty(left); |
2661 | if (right_nritems) | 2682 | if (right_nritems) |
2662 | btrfs_mark_buffer_dirty(right); | 2683 | btrfs_mark_buffer_dirty(right); |
2684 | else | ||
2685 | clean_tree_block(trans, root, right); | ||
2663 | 2686 | ||
2664 | btrfs_item_key(right, &disk_key, 0); | 2687 | btrfs_item_key(right, &disk_key, 0); |
2665 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | 2688 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); |
@@ -2669,8 +2692,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2669 | /* then fixup the leaf pointer in the path */ | 2692 | /* then fixup the leaf pointer in the path */ |
2670 | if (path->slots[0] < push_items) { | 2693 | if (path->slots[0] < push_items) { |
2671 | path->slots[0] += old_left_nritems; | 2694 | path->slots[0] += old_left_nritems; |
2672 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
2673 | clean_tree_block(trans, root, path->nodes[0]); | ||
2674 | btrfs_tree_unlock(path->nodes[0]); | 2695 | btrfs_tree_unlock(path->nodes[0]); |
2675 | free_extent_buffer(path->nodes[0]); | 2696 | free_extent_buffer(path->nodes[0]); |
2676 | path->nodes[0] = left; | 2697 | path->nodes[0] = left; |
@@ -2932,10 +2953,10 @@ again: | |||
2932 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 2953 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
2933 | root->root_key.objectid, | 2954 | root->root_key.objectid, |
2934 | &disk_key, 0, l->start, 0); | 2955 | &disk_key, 0, l->start, 0); |
2935 | if (IS_ERR(right)) { | 2956 | if (IS_ERR(right)) |
2936 | BUG_ON(1); | ||
2937 | return PTR_ERR(right); | 2957 | return PTR_ERR(right); |
2938 | } | 2958 | |
2959 | root_add_used(root, root->leafsize); | ||
2939 | 2960 | ||
2940 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 2961 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
2941 | btrfs_set_header_bytenr(right, right->start); | 2962 | btrfs_set_header_bytenr(right, right->start); |
@@ -3054,7 +3075,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
3054 | 3075 | ||
3055 | btrfs_set_path_blocking(path); | 3076 | btrfs_set_path_blocking(path); |
3056 | ret = split_leaf(trans, root, &key, path, ins_len, 1); | 3077 | ret = split_leaf(trans, root, &key, path, ins_len, 1); |
3057 | BUG_ON(ret); | 3078 | if (ret) |
3079 | goto err; | ||
3058 | 3080 | ||
3059 | path->keep_locks = 0; | 3081 | path->keep_locks = 0; |
3060 | btrfs_unlock_up_safe(path, 1); | 3082 | btrfs_unlock_up_safe(path, 1); |
@@ -3796,9 +3818,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3796 | */ | 3818 | */ |
3797 | btrfs_unlock_up_safe(path, 0); | 3819 | btrfs_unlock_up_safe(path, 0); |
3798 | 3820 | ||
3799 | ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, | 3821 | root_sub_used(root, leaf->len); |
3800 | 0, root->root_key.objectid, 0); | 3822 | |
3801 | return ret; | 3823 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3824 | return 0; | ||
3802 | } | 3825 | } |
3803 | /* | 3826 | /* |
3804 | * delete the item at the leaf level in path. If that empties | 3827 | * delete the item at the leaf level in path. If that empties |
@@ -3865,6 +3888,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3865 | if (leaf == root->node) { | 3888 | if (leaf == root->node) { |
3866 | btrfs_set_header_level(leaf, 0); | 3889 | btrfs_set_header_level(leaf, 0); |
3867 | } else { | 3890 | } else { |
3891 | btrfs_set_path_blocking(path); | ||
3892 | clean_tree_block(trans, root, leaf); | ||
3868 | ret = btrfs_del_leaf(trans, root, path, leaf); | 3893 | ret = btrfs_del_leaf(trans, root, path, leaf); |
3869 | BUG_ON(ret); | 3894 | BUG_ON(ret); |
3870 | } | 3895 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 746a7248678e..e9bf86415e86 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -34,6 +34,7 @@ | |||
34 | 34 | ||
35 | struct btrfs_trans_handle; | 35 | struct btrfs_trans_handle; |
36 | struct btrfs_transaction; | 36 | struct btrfs_transaction; |
37 | struct btrfs_pending_snapshot; | ||
37 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 38 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
38 | extern struct kmem_cache *btrfs_transaction_cachep; | 39 | extern struct kmem_cache *btrfs_transaction_cachep; |
39 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 40 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
@@ -663,6 +664,7 @@ struct btrfs_csum_item { | |||
663 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | 664 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) |
664 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | 665 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) |
665 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | 666 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) |
667 | #define BTRFS_NR_RAID_TYPES 5 | ||
666 | 668 | ||
667 | struct btrfs_block_group_item { | 669 | struct btrfs_block_group_item { |
668 | __le64 used; | 670 | __le64 used; |
@@ -674,42 +676,46 @@ struct btrfs_space_info { | |||
674 | u64 flags; | 676 | u64 flags; |
675 | 677 | ||
676 | u64 total_bytes; /* total bytes in the space */ | 678 | u64 total_bytes; /* total bytes in the space */ |
677 | u64 bytes_used; /* total bytes used on disk */ | 679 | u64 bytes_used; /* total bytes used, |
680 | this does't take mirrors into account */ | ||
678 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | 681 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
679 | transaction finishes */ | 682 | transaction finishes */ |
680 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 683 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
681 | current allocations */ | 684 | current allocations */ |
682 | u64 bytes_readonly; /* total bytes that are read only */ | 685 | u64 bytes_readonly; /* total bytes that are read only */ |
683 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 686 | |
684 | u64 bytes_root; /* the number of bytes needed to commit a | ||
685 | transaction */ | ||
686 | u64 bytes_may_use; /* number of bytes that may be used for | 687 | u64 bytes_may_use; /* number of bytes that may be used for |
687 | delalloc/allocations */ | 688 | delalloc/allocations */ |
688 | u64 bytes_delalloc; /* number of bytes currently reserved for | 689 | u64 disk_used; /* total bytes used on disk */ |
689 | delayed allocation */ | ||
690 | 690 | ||
691 | int full; /* indicates that we cannot allocate any more | 691 | int full; /* indicates that we cannot allocate any more |
692 | chunks for this space */ | 692 | chunks for this space */ |
693 | int force_alloc; /* set if we need to force a chunk alloc for | 693 | int force_alloc; /* set if we need to force a chunk alloc for |
694 | this space */ | 694 | this space */ |
695 | int force_delalloc; /* make people start doing filemap_flush until | ||
696 | we're under a threshold */ | ||
697 | 695 | ||
698 | struct list_head list; | 696 | struct list_head list; |
699 | 697 | ||
700 | /* for controlling how we free up space for allocations */ | ||
701 | wait_queue_head_t allocate_wait; | ||
702 | wait_queue_head_t flush_wait; | ||
703 | int allocating_chunk; | ||
704 | int flushing; | ||
705 | |||
706 | /* for block groups in our same type */ | 698 | /* for block groups in our same type */ |
707 | struct list_head block_groups; | 699 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
708 | spinlock_t lock; | 700 | spinlock_t lock; |
709 | struct rw_semaphore groups_sem; | 701 | struct rw_semaphore groups_sem; |
710 | atomic_t caching_threads; | 702 | atomic_t caching_threads; |
711 | }; | 703 | }; |
712 | 704 | ||
705 | struct btrfs_block_rsv { | ||
706 | u64 size; | ||
707 | u64 reserved; | ||
708 | u64 freed[2]; | ||
709 | struct btrfs_space_info *space_info; | ||
710 | struct list_head list; | ||
711 | spinlock_t lock; | ||
712 | atomic_t usage; | ||
713 | unsigned int priority:8; | ||
714 | unsigned int durable:1; | ||
715 | unsigned int refill_used:1; | ||
716 | unsigned int full:1; | ||
717 | }; | ||
718 | |||
713 | /* | 719 | /* |
714 | * free clusters are used to claim free space in relatively large chunks, | 720 | * free clusters are used to claim free space in relatively large chunks, |
715 | * allowing us to do less seeky writes. They are used for all metadata | 721 | * allowing us to do less seeky writes. They are used for all metadata |
@@ -760,6 +766,7 @@ struct btrfs_block_group_cache { | |||
760 | spinlock_t lock; | 766 | spinlock_t lock; |
761 | u64 pinned; | 767 | u64 pinned; |
762 | u64 reserved; | 768 | u64 reserved; |
769 | u64 reserved_pinned; | ||
763 | u64 bytes_super; | 770 | u64 bytes_super; |
764 | u64 flags; | 771 | u64 flags; |
765 | u64 sectorsize; | 772 | u64 sectorsize; |
@@ -825,6 +832,22 @@ struct btrfs_fs_info { | |||
825 | /* logical->physical extent mapping */ | 832 | /* logical->physical extent mapping */ |
826 | struct btrfs_mapping_tree mapping_tree; | 833 | struct btrfs_mapping_tree mapping_tree; |
827 | 834 | ||
835 | /* block reservation for extent, checksum and root tree */ | ||
836 | struct btrfs_block_rsv global_block_rsv; | ||
837 | /* block reservation for delay allocation */ | ||
838 | struct btrfs_block_rsv delalloc_block_rsv; | ||
839 | /* block reservation for metadata operations */ | ||
840 | struct btrfs_block_rsv trans_block_rsv; | ||
841 | /* block reservation for chunk tree */ | ||
842 | struct btrfs_block_rsv chunk_block_rsv; | ||
843 | |||
844 | struct btrfs_block_rsv empty_block_rsv; | ||
845 | |||
846 | /* list of block reservations that cross multiple transactions */ | ||
847 | struct list_head durable_block_rsv_list; | ||
848 | |||
849 | struct mutex durable_block_rsv_mutex; | ||
850 | |||
828 | u64 generation; | 851 | u64 generation; |
829 | u64 last_trans_committed; | 852 | u64 last_trans_committed; |
830 | 853 | ||
@@ -927,7 +950,6 @@ struct btrfs_fs_info { | |||
927 | struct btrfs_workers endio_meta_write_workers; | 950 | struct btrfs_workers endio_meta_write_workers; |
928 | struct btrfs_workers endio_write_workers; | 951 | struct btrfs_workers endio_write_workers; |
929 | struct btrfs_workers submit_workers; | 952 | struct btrfs_workers submit_workers; |
930 | struct btrfs_workers enospc_workers; | ||
931 | /* | 953 | /* |
932 | * fixup workers take dirty pages that didn't properly go through | 954 | * fixup workers take dirty pages that didn't properly go through |
933 | * the cow mechanism and make them safe to write. It happens | 955 | * the cow mechanism and make them safe to write. It happens |
@@ -943,6 +965,7 @@ struct btrfs_fs_info { | |||
943 | int do_barriers; | 965 | int do_barriers; |
944 | int closing; | 966 | int closing; |
945 | int log_root_recovering; | 967 | int log_root_recovering; |
968 | int enospc_unlink; | ||
946 | 969 | ||
947 | u64 total_pinned; | 970 | u64 total_pinned; |
948 | 971 | ||
@@ -1012,6 +1035,9 @@ struct btrfs_root { | |||
1012 | struct completion kobj_unregister; | 1035 | struct completion kobj_unregister; |
1013 | struct mutex objectid_mutex; | 1036 | struct mutex objectid_mutex; |
1014 | 1037 | ||
1038 | spinlock_t accounting_lock; | ||
1039 | struct btrfs_block_rsv *block_rsv; | ||
1040 | |||
1015 | struct mutex log_mutex; | 1041 | struct mutex log_mutex; |
1016 | wait_queue_head_t log_writer_wait; | 1042 | wait_queue_head_t log_writer_wait; |
1017 | wait_queue_head_t log_commit_wait[2]; | 1043 | wait_queue_head_t log_commit_wait[2]; |
@@ -1043,7 +1069,6 @@ struct btrfs_root { | |||
1043 | int ref_cows; | 1069 | int ref_cows; |
1044 | int track_dirty; | 1070 | int track_dirty; |
1045 | int in_radix; | 1071 | int in_radix; |
1046 | int clean_orphans; | ||
1047 | 1072 | ||
1048 | u64 defrag_trans_start; | 1073 | u64 defrag_trans_start; |
1049 | struct btrfs_key defrag_progress; | 1074 | struct btrfs_key defrag_progress; |
@@ -1057,8 +1082,11 @@ struct btrfs_root { | |||
1057 | 1082 | ||
1058 | struct list_head root_list; | 1083 | struct list_head root_list; |
1059 | 1084 | ||
1060 | spinlock_t list_lock; | 1085 | spinlock_t orphan_lock; |
1061 | struct list_head orphan_list; | 1086 | struct list_head orphan_list; |
1087 | struct btrfs_block_rsv *orphan_block_rsv; | ||
1088 | int orphan_item_inserted; | ||
1089 | int orphan_cleanup_state; | ||
1062 | 1090 | ||
1063 | spinlock_t inode_lock; | 1091 | spinlock_t inode_lock; |
1064 | /* red-black tree that keeps track of in-memory inodes */ | 1092 | /* red-black tree that keeps track of in-memory inodes */ |
@@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
1965 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1993 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1966 | struct btrfs_root *root, unsigned long count); | 1994 | struct btrfs_root *root, unsigned long count); |
1967 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1995 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1996 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
1997 | struct btrfs_root *root, u64 bytenr, | ||
1998 | u64 num_bytes, u64 *refs, u64 *flags); | ||
1968 | int btrfs_pin_extent(struct btrfs_root *root, | 1999 | int btrfs_pin_extent(struct btrfs_root *root, |
1969 | u64 bytenr, u64 num, int reserved); | 2000 | u64 bytenr, u64 num, int reserved); |
1970 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 2001 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
@@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
1984 | u64 parent, u64 root_objectid, | 2015 | u64 parent, u64 root_objectid, |
1985 | struct btrfs_disk_key *key, int level, | 2016 | struct btrfs_disk_key *key, int level, |
1986 | u64 hint, u64 empty_size); | 2017 | u64 hint, u64 empty_size); |
1987 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2018 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
1988 | struct btrfs_root *root, | 2019 | struct btrfs_root *root, |
1989 | u64 bytenr, u32 blocksize, | 2020 | struct extent_buffer *buf, |
1990 | u64 parent, u64 root_objectid, int level); | 2021 | u64 parent, int last_ref); |
1991 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2022 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
1992 | struct btrfs_root *root, | 2023 | struct btrfs_root *root, |
1993 | u64 bytenr, u32 blocksize, | 2024 | u64 bytenr, u32 blocksize, |
@@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2041 | u64 size); | 2072 | u64 size); |
2042 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2073 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2043 | struct btrfs_root *root, u64 group_start); | 2074 | struct btrfs_root *root, u64 group_start); |
2044 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | ||
2045 | struct btrfs_block_group_cache *group); | ||
2046 | |||
2047 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2075 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2048 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2076 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2049 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2077 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2050 | 2078 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | |
2051 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); | 2079 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2052 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | 2080 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
2053 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | 2081 | struct btrfs_root *root, |
2054 | struct inode *inode, int num_items); | 2082 | int num_items, int *retries); |
2055 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | 2083 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2056 | struct inode *inode, int num_items); | 2084 | struct btrfs_root *root); |
2057 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2085 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
2058 | u64 bytes); | 2086 | struct inode *inode); |
2059 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2087 | void btrfs_orphan_release_metadata(struct inode *inode); |
2060 | struct inode *inode, u64 bytes); | 2088 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, |
2061 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | 2089 | struct btrfs_pending_snapshot *pending); |
2062 | u64 bytes); | 2090 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); |
2063 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2091 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
2064 | u64 bytes); | 2092 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
2093 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | ||
2094 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | ||
2095 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | ||
2096 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
2097 | struct btrfs_block_rsv *rsv); | ||
2098 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
2099 | struct btrfs_block_rsv *rsv); | ||
2100 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
2101 | struct btrfs_root *root, | ||
2102 | struct btrfs_block_rsv *block_rsv, | ||
2103 | u64 num_bytes, int *retries); | ||
2104 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
2105 | struct btrfs_root *root, | ||
2106 | struct btrfs_block_rsv *block_rsv, | ||
2107 | u64 min_reserved, int min_factor); | ||
2108 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
2109 | struct btrfs_block_rsv *dst_rsv, | ||
2110 | u64 num_bytes); | ||
2111 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
2112 | struct btrfs_block_rsv *block_rsv, | ||
2113 | u64 num_bytes); | ||
2114 | int btrfs_set_block_group_ro(struct btrfs_root *root, | ||
2115 | struct btrfs_block_group_cache *cache); | ||
2116 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
2117 | struct btrfs_block_group_cache *cache); | ||
2065 | /* ctree.c */ | 2118 | /* ctree.c */ |
2066 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2119 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2067 | int level, int *slot); | 2120 | int level, int *slot); |
@@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2152 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2205 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2153 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2206 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2154 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2207 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2155 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); | 2208 | int btrfs_drop_snapshot(struct btrfs_root *root, |
2209 | struct btrfs_block_rsv *block_rsv, int update_ref); | ||
2156 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2210 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2157 | struct btrfs_root *root, | 2211 | struct btrfs_root *root, |
2158 | struct extent_buffer *node, | 2212 | struct extent_buffer *node, |
@@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
2245 | struct btrfs_root *root, | 2299 | struct btrfs_root *root, |
2246 | const char *name, int name_len, | 2300 | const char *name, int name_len, |
2247 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 2301 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
2302 | struct btrfs_inode_ref * | ||
2303 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
2304 | struct btrfs_root *root, | ||
2305 | struct btrfs_path *path, | ||
2306 | const char *name, int name_len, | ||
2307 | u64 inode_objectid, u64 ref_objectid, int mod); | ||
2248 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 2308 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
2249 | struct btrfs_root *root, | 2309 | struct btrfs_root *root, |
2250 | struct btrfs_path *path, u64 objectid); | 2310 | struct btrfs_path *path, u64 objectid); |
@@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
2257 | struct btrfs_root *root, u64 bytenr, u64 len); | 2317 | struct btrfs_root *root, u64 bytenr, u64 len); |
2258 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 2318 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
2259 | struct bio *bio, u32 *dst); | 2319 | struct bio *bio, u32 *dst); |
2320 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
2321 | struct bio *bio, u64 logical_offset, u32 *dst); | ||
2260 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 2322 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
2261 | struct btrfs_root *root, | 2323 | struct btrfs_root *root, |
2262 | u64 objectid, u64 pos, | 2324 | u64 objectid, u64 pos, |
@@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2311 | u32 min_type); | 2373 | u32 min_type); |
2312 | 2374 | ||
2313 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2375 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
2376 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
2314 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2377 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
2315 | struct extent_state **cached_state); | 2378 | struct extent_state **cached_state); |
2316 | int btrfs_writepages(struct address_space *mapping, | 2379 | int btrfs_writepages(struct address_space *mapping, |
@@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2349 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2412 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2350 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2413 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2351 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2414 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
2415 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2416 | struct btrfs_pending_snapshot *pending, | ||
2417 | u64 *bytes_to_reserve); | ||
2418 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2419 | struct btrfs_pending_snapshot *pending); | ||
2420 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2421 | struct btrfs_root *root); | ||
2352 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2422 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
2353 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2423 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2354 | void btrfs_add_delayed_iput(struct inode *inode); | 2424 | void btrfs_add_delayed_iput(struct inode *inode); |
2355 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2425 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
2426 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | ||
2427 | u64 start, u64 num_bytes, u64 min_size, | ||
2428 | loff_t actual_len, u64 *alloc_hint); | ||
2356 | extern const struct dentry_operations btrfs_dentry_operations; | 2429 | extern const struct dentry_operations btrfs_dentry_operations; |
2357 | 2430 | ||
2358 | /* ioctl.c */ | 2431 | /* ioctl.c */ |
@@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
2409 | struct btrfs_root *root); | 2482 | struct btrfs_root *root); |
2410 | int btrfs_recover_relocation(struct btrfs_root *root); | 2483 | int btrfs_recover_relocation(struct btrfs_root *root); |
2411 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | 2484 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); |
2485 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
2486 | struct btrfs_root *root, struct extent_buffer *buf, | ||
2487 | struct extent_buffer *cow); | ||
2488 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2489 | struct btrfs_pending_snapshot *pending, | ||
2490 | u64 *bytes_to_reserve); | ||
2491 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
2492 | struct btrfs_pending_snapshot *pending); | ||
2412 | #endif | 2493 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 902ce507c4e3..e807b143b857 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -319,107 +319,6 @@ out: | |||
319 | } | 319 | } |
320 | 320 | ||
321 | /* | 321 | /* |
322 | * helper function to lookup reference count and flags of extent. | ||
323 | * | ||
324 | * the head node for delayed ref is used to store the sum of all the | ||
325 | * reference count modifications queued up in the rbtree. the head | ||
326 | * node may also store the extent flags to set. This way you can check | ||
327 | * to see what the reference count and extent flags would be if all of | ||
328 | * the delayed refs are not processed. | ||
329 | */ | ||
330 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
331 | struct btrfs_root *root, u64 bytenr, | ||
332 | u64 num_bytes, u64 *refs, u64 *flags) | ||
333 | { | ||
334 | struct btrfs_delayed_ref_node *ref; | ||
335 | struct btrfs_delayed_ref_head *head; | ||
336 | struct btrfs_delayed_ref_root *delayed_refs; | ||
337 | struct btrfs_path *path; | ||
338 | struct btrfs_extent_item *ei; | ||
339 | struct extent_buffer *leaf; | ||
340 | struct btrfs_key key; | ||
341 | u32 item_size; | ||
342 | u64 num_refs; | ||
343 | u64 extent_flags; | ||
344 | int ret; | ||
345 | |||
346 | path = btrfs_alloc_path(); | ||
347 | if (!path) | ||
348 | return -ENOMEM; | ||
349 | |||
350 | key.objectid = bytenr; | ||
351 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
352 | key.offset = num_bytes; | ||
353 | delayed_refs = &trans->transaction->delayed_refs; | ||
354 | again: | ||
355 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
356 | &key, path, 0, 0); | ||
357 | if (ret < 0) | ||
358 | goto out; | ||
359 | |||
360 | if (ret == 0) { | ||
361 | leaf = path->nodes[0]; | ||
362 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
363 | if (item_size >= sizeof(*ei)) { | ||
364 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
365 | struct btrfs_extent_item); | ||
366 | num_refs = btrfs_extent_refs(leaf, ei); | ||
367 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
368 | } else { | ||
369 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
370 | struct btrfs_extent_item_v0 *ei0; | ||
371 | BUG_ON(item_size != sizeof(*ei0)); | ||
372 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
373 | struct btrfs_extent_item_v0); | ||
374 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
375 | /* FIXME: this isn't correct for data */ | ||
376 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
377 | #else | ||
378 | BUG(); | ||
379 | #endif | ||
380 | } | ||
381 | BUG_ON(num_refs == 0); | ||
382 | } else { | ||
383 | num_refs = 0; | ||
384 | extent_flags = 0; | ||
385 | ret = 0; | ||
386 | } | ||
387 | |||
388 | spin_lock(&delayed_refs->lock); | ||
389 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | ||
390 | if (ref) { | ||
391 | head = btrfs_delayed_node_to_head(ref); | ||
392 | if (!mutex_trylock(&head->mutex)) { | ||
393 | atomic_inc(&ref->refs); | ||
394 | spin_unlock(&delayed_refs->lock); | ||
395 | |||
396 | btrfs_release_path(root->fs_info->extent_root, path); | ||
397 | |||
398 | mutex_lock(&head->mutex); | ||
399 | mutex_unlock(&head->mutex); | ||
400 | btrfs_put_delayed_ref(ref); | ||
401 | goto again; | ||
402 | } | ||
403 | if (head->extent_op && head->extent_op->update_flags) | ||
404 | extent_flags |= head->extent_op->flags_to_set; | ||
405 | else | ||
406 | BUG_ON(num_refs == 0); | ||
407 | |||
408 | num_refs += ref->ref_mod; | ||
409 | mutex_unlock(&head->mutex); | ||
410 | } | ||
411 | WARN_ON(num_refs == 0); | ||
412 | if (refs) | ||
413 | *refs = num_refs; | ||
414 | if (flags) | ||
415 | *flags = extent_flags; | ||
416 | out: | ||
417 | spin_unlock(&delayed_refs->lock); | ||
418 | btrfs_free_path(path); | ||
419 | return ret; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * helper function to update an extent delayed ref in the | 322 | * helper function to update an extent delayed ref in the |
424 | * rbtree. existing and update must both have the same | 323 | * rbtree. existing and update must both have the same |
425 | * bytenr and parent | 324 | * bytenr and parent |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f6fc67ddad36..50e3cf92fbda 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
167 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); |
170 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
171 | struct btrfs_root *root, u64 bytenr, | ||
172 | u64 num_bytes, u64 *refs, u64 *flags); | ||
173 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 170 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
174 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 171 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
175 | u64 parent, u64 orig_ref_root, u64 ref_root, | 172 | u64 parent, u64 orig_ref_root, u64 ref_root, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index feca04197d02..f3b287c22caf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -74,6 +74,11 @@ struct async_submit_bio { | |||
74 | int rw; | 74 | int rw; |
75 | int mirror_num; | 75 | int mirror_num; |
76 | unsigned long bio_flags; | 76 | unsigned long bio_flags; |
77 | /* | ||
78 | * bio_offset is optional, can be used if the pages in the bio | ||
79 | * can't tell us where in the file the bio should go | ||
80 | */ | ||
81 | u64 bio_offset; | ||
77 | struct btrfs_work work; | 82 | struct btrfs_work work; |
78 | }; | 83 | }; |
79 | 84 | ||
@@ -534,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work) | |||
534 | async = container_of(work, struct async_submit_bio, work); | 539 | async = container_of(work, struct async_submit_bio, work); |
535 | fs_info = BTRFS_I(async->inode)->root->fs_info; | 540 | fs_info = BTRFS_I(async->inode)->root->fs_info; |
536 | async->submit_bio_start(async->inode, async->rw, async->bio, | 541 | async->submit_bio_start(async->inode, async->rw, async->bio, |
537 | async->mirror_num, async->bio_flags); | 542 | async->mirror_num, async->bio_flags, |
543 | async->bio_offset); | ||
538 | } | 544 | } |
539 | 545 | ||
540 | static void run_one_async_done(struct btrfs_work *work) | 546 | static void run_one_async_done(struct btrfs_work *work) |
@@ -556,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work) | |||
556 | wake_up(&fs_info->async_submit_wait); | 562 | wake_up(&fs_info->async_submit_wait); |
557 | 563 | ||
558 | async->submit_bio_done(async->inode, async->rw, async->bio, | 564 | async->submit_bio_done(async->inode, async->rw, async->bio, |
559 | async->mirror_num, async->bio_flags); | 565 | async->mirror_num, async->bio_flags, |
566 | async->bio_offset); | ||
560 | } | 567 | } |
561 | 568 | ||
562 | static void run_one_async_free(struct btrfs_work *work) | 569 | static void run_one_async_free(struct btrfs_work *work) |
@@ -570,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work) | |||
570 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 577 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
571 | int rw, struct bio *bio, int mirror_num, | 578 | int rw, struct bio *bio, int mirror_num, |
572 | unsigned long bio_flags, | 579 | unsigned long bio_flags, |
580 | u64 bio_offset, | ||
573 | extent_submit_bio_hook_t *submit_bio_start, | 581 | extent_submit_bio_hook_t *submit_bio_start, |
574 | extent_submit_bio_hook_t *submit_bio_done) | 582 | extent_submit_bio_hook_t *submit_bio_done) |
575 | { | 583 | { |
@@ -592,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
592 | 600 | ||
593 | async->work.flags = 0; | 601 | async->work.flags = 0; |
594 | async->bio_flags = bio_flags; | 602 | async->bio_flags = bio_flags; |
603 | async->bio_offset = bio_offset; | ||
595 | 604 | ||
596 | atomic_inc(&fs_info->nr_async_submits); | 605 | atomic_inc(&fs_info->nr_async_submits); |
597 | 606 | ||
@@ -627,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio) | |||
627 | 636 | ||
628 | static int __btree_submit_bio_start(struct inode *inode, int rw, | 637 | static int __btree_submit_bio_start(struct inode *inode, int rw, |
629 | struct bio *bio, int mirror_num, | 638 | struct bio *bio, int mirror_num, |
630 | unsigned long bio_flags) | 639 | unsigned long bio_flags, |
640 | u64 bio_offset) | ||
631 | { | 641 | { |
632 | /* | 642 | /* |
633 | * when we're called for a write, we're already in the async | 643 | * when we're called for a write, we're already in the async |
@@ -638,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, | |||
638 | } | 648 | } |
639 | 649 | ||
640 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 650 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
641 | int mirror_num, unsigned long bio_flags) | 651 | int mirror_num, unsigned long bio_flags, |
652 | u64 bio_offset) | ||
642 | { | 653 | { |
643 | /* | 654 | /* |
644 | * when we're called for a write, we're already in the async | 655 | * when we're called for a write, we're already in the async |
@@ -648,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
648 | } | 659 | } |
649 | 660 | ||
650 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 661 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
651 | int mirror_num, unsigned long bio_flags) | 662 | int mirror_num, unsigned long bio_flags, |
663 | u64 bio_offset) | ||
652 | { | 664 | { |
653 | int ret; | 665 | int ret; |
654 | 666 | ||
@@ -671,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
671 | */ | 683 | */ |
672 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 684 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
673 | inode, rw, bio, mirror_num, 0, | 685 | inode, rw, bio, mirror_num, 0, |
686 | bio_offset, | ||
674 | __btree_submit_bio_start, | 687 | __btree_submit_bio_start, |
675 | __btree_submit_bio_done); | 688 | __btree_submit_bio_done); |
676 | } | 689 | } |
@@ -894,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
894 | root->ref_cows = 0; | 907 | root->ref_cows = 0; |
895 | root->track_dirty = 0; | 908 | root->track_dirty = 0; |
896 | root->in_radix = 0; | 909 | root->in_radix = 0; |
897 | root->clean_orphans = 0; | 910 | root->orphan_item_inserted = 0; |
911 | root->orphan_cleanup_state = 0; | ||
898 | 912 | ||
899 | root->fs_info = fs_info; | 913 | root->fs_info = fs_info; |
900 | root->objectid = objectid; | 914 | root->objectid = objectid; |
@@ -903,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
903 | root->name = NULL; | 917 | root->name = NULL; |
904 | root->in_sysfs = 0; | 918 | root->in_sysfs = 0; |
905 | root->inode_tree = RB_ROOT; | 919 | root->inode_tree = RB_ROOT; |
920 | root->block_rsv = NULL; | ||
921 | root->orphan_block_rsv = NULL; | ||
906 | 922 | ||
907 | INIT_LIST_HEAD(&root->dirty_list); | 923 | INIT_LIST_HEAD(&root->dirty_list); |
908 | INIT_LIST_HEAD(&root->orphan_list); | 924 | INIT_LIST_HEAD(&root->orphan_list); |
909 | INIT_LIST_HEAD(&root->root_list); | 925 | INIT_LIST_HEAD(&root->root_list); |
910 | spin_lock_init(&root->node_lock); | 926 | spin_lock_init(&root->node_lock); |
911 | spin_lock_init(&root->list_lock); | 927 | spin_lock_init(&root->orphan_lock); |
912 | spin_lock_init(&root->inode_lock); | 928 | spin_lock_init(&root->inode_lock); |
929 | spin_lock_init(&root->accounting_lock); | ||
913 | mutex_init(&root->objectid_mutex); | 930 | mutex_init(&root->objectid_mutex); |
914 | mutex_init(&root->log_mutex); | 931 | mutex_init(&root->log_mutex); |
915 | init_waitqueue_head(&root->log_writer_wait); | 932 | init_waitqueue_head(&root->log_writer_wait); |
@@ -968,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
968 | return 0; | 985 | return 0; |
969 | } | 986 | } |
970 | 987 | ||
971 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
972 | struct btrfs_fs_info *fs_info) | ||
973 | { | ||
974 | struct extent_buffer *eb; | ||
975 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
976 | u64 start = 0; | ||
977 | u64 end = 0; | ||
978 | int ret; | ||
979 | |||
980 | if (!log_root_tree) | ||
981 | return 0; | ||
982 | |||
983 | while (1) { | ||
984 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
985 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | ||
986 | if (ret) | ||
987 | break; | ||
988 | |||
989 | clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, | ||
990 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | ||
991 | } | ||
992 | eb = fs_info->log_root_tree->node; | ||
993 | |||
994 | WARN_ON(btrfs_header_level(eb) != 0); | ||
995 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
996 | |||
997 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
998 | eb->start, eb->len); | ||
999 | BUG_ON(ret); | ||
1000 | |||
1001 | free_extent_buffer(eb); | ||
1002 | kfree(fs_info->log_root_tree); | ||
1003 | fs_info->log_root_tree = NULL; | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 988 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
1008 | struct btrfs_fs_info *fs_info) | 989 | struct btrfs_fs_info *fs_info) |
1009 | { | 990 | { |
@@ -1191,19 +1172,23 @@ again: | |||
1191 | if (root) | 1172 | if (root) |
1192 | return root; | 1173 | return root; |
1193 | 1174 | ||
1194 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1195 | if (ret == 0) | ||
1196 | ret = -ENOENT; | ||
1197 | if (ret < 0) | ||
1198 | return ERR_PTR(ret); | ||
1199 | |||
1200 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1175 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
1201 | if (IS_ERR(root)) | 1176 | if (IS_ERR(root)) |
1202 | return root; | 1177 | return root; |
1203 | 1178 | ||
1204 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
1205 | set_anon_super(&root->anon_super, NULL); | 1179 | set_anon_super(&root->anon_super, NULL); |
1206 | 1180 | ||
1181 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
1182 | ret = -ENOENT; | ||
1183 | goto fail; | ||
1184 | } | ||
1185 | |||
1186 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1187 | if (ret < 0) | ||
1188 | goto fail; | ||
1189 | if (ret == 0) | ||
1190 | root->orphan_item_inserted = 1; | ||
1191 | |||
1207 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1192 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
1208 | if (ret) | 1193 | if (ret) |
1209 | goto fail; | 1194 | goto fail; |
@@ -1212,10 +1197,9 @@ again: | |||
1212 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1197 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
1213 | (unsigned long)root->root_key.objectid, | 1198 | (unsigned long)root->root_key.objectid, |
1214 | root); | 1199 | root); |
1215 | if (ret == 0) { | 1200 | if (ret == 0) |
1216 | root->in_radix = 1; | 1201 | root->in_radix = 1; |
1217 | root->clean_orphans = 1; | 1202 | |
1218 | } | ||
1219 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1203 | spin_unlock(&fs_info->fs_roots_radix_lock); |
1220 | radix_tree_preload_end(); | 1204 | radix_tree_preload_end(); |
1221 | if (ret) { | 1205 | if (ret) { |
@@ -1461,10 +1445,6 @@ static int cleaner_kthread(void *arg) | |||
1461 | struct btrfs_root *root = arg; | 1445 | struct btrfs_root *root = arg; |
1462 | 1446 | ||
1463 | do { | 1447 | do { |
1464 | smp_mb(); | ||
1465 | if (root->fs_info->closing) | ||
1466 | break; | ||
1467 | |||
1468 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1448 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1469 | 1449 | ||
1470 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1450 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
@@ -1477,11 +1457,9 @@ static int cleaner_kthread(void *arg) | |||
1477 | if (freezing(current)) { | 1457 | if (freezing(current)) { |
1478 | refrigerator(); | 1458 | refrigerator(); |
1479 | } else { | 1459 | } else { |
1480 | smp_mb(); | ||
1481 | if (root->fs_info->closing) | ||
1482 | break; | ||
1483 | set_current_state(TASK_INTERRUPTIBLE); | 1460 | set_current_state(TASK_INTERRUPTIBLE); |
1484 | schedule(); | 1461 | if (!kthread_should_stop()) |
1462 | schedule(); | ||
1485 | __set_current_state(TASK_RUNNING); | 1463 | __set_current_state(TASK_RUNNING); |
1486 | } | 1464 | } |
1487 | } while (!kthread_should_stop()); | 1465 | } while (!kthread_should_stop()); |
@@ -1493,36 +1471,40 @@ static int transaction_kthread(void *arg) | |||
1493 | struct btrfs_root *root = arg; | 1471 | struct btrfs_root *root = arg; |
1494 | struct btrfs_trans_handle *trans; | 1472 | struct btrfs_trans_handle *trans; |
1495 | struct btrfs_transaction *cur; | 1473 | struct btrfs_transaction *cur; |
1474 | u64 transid; | ||
1496 | unsigned long now; | 1475 | unsigned long now; |
1497 | unsigned long delay; | 1476 | unsigned long delay; |
1498 | int ret; | 1477 | int ret; |
1499 | 1478 | ||
1500 | do { | 1479 | do { |
1501 | smp_mb(); | ||
1502 | if (root->fs_info->closing) | ||
1503 | break; | ||
1504 | |||
1505 | delay = HZ * 30; | 1480 | delay = HZ * 30; |
1506 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1481 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1507 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1482 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1508 | 1483 | ||
1509 | mutex_lock(&root->fs_info->trans_mutex); | 1484 | spin_lock(&root->fs_info->new_trans_lock); |
1510 | cur = root->fs_info->running_transaction; | 1485 | cur = root->fs_info->running_transaction; |
1511 | if (!cur) { | 1486 | if (!cur) { |
1512 | mutex_unlock(&root->fs_info->trans_mutex); | 1487 | spin_unlock(&root->fs_info->new_trans_lock); |
1513 | goto sleep; | 1488 | goto sleep; |
1514 | } | 1489 | } |
1515 | 1490 | ||
1516 | now = get_seconds(); | 1491 | now = get_seconds(); |
1517 | if (now < cur->start_time || now - cur->start_time < 30) { | 1492 | if (!cur->blocked && |
1518 | mutex_unlock(&root->fs_info->trans_mutex); | 1493 | (now < cur->start_time || now - cur->start_time < 30)) { |
1494 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1519 | delay = HZ * 5; | 1495 | delay = HZ * 5; |
1520 | goto sleep; | 1496 | goto sleep; |
1521 | } | 1497 | } |
1522 | mutex_unlock(&root->fs_info->trans_mutex); | 1498 | transid = cur->transid; |
1523 | trans = btrfs_start_transaction(root, 1); | 1499 | spin_unlock(&root->fs_info->new_trans_lock); |
1524 | ret = btrfs_commit_transaction(trans, root); | ||
1525 | 1500 | ||
1501 | trans = btrfs_join_transaction(root, 1); | ||
1502 | if (transid == trans->transid) { | ||
1503 | ret = btrfs_commit_transaction(trans, root); | ||
1504 | BUG_ON(ret); | ||
1505 | } else { | ||
1506 | btrfs_end_transaction(trans, root); | ||
1507 | } | ||
1526 | sleep: | 1508 | sleep: |
1527 | wake_up_process(root->fs_info->cleaner_kthread); | 1509 | wake_up_process(root->fs_info->cleaner_kthread); |
1528 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1510 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1530,10 +1512,10 @@ sleep: | |||
1530 | if (freezing(current)) { | 1512 | if (freezing(current)) { |
1531 | refrigerator(); | 1513 | refrigerator(); |
1532 | } else { | 1514 | } else { |
1533 | if (root->fs_info->closing) | ||
1534 | break; | ||
1535 | set_current_state(TASK_INTERRUPTIBLE); | 1515 | set_current_state(TASK_INTERRUPTIBLE); |
1536 | schedule_timeout(delay); | 1516 | if (!kthread_should_stop() && |
1517 | !btrfs_transaction_blocked(root->fs_info)) | ||
1518 | schedule_timeout(delay); | ||
1537 | __set_current_state(TASK_RUNNING); | 1519 | __set_current_state(TASK_RUNNING); |
1538 | } | 1520 | } |
1539 | } while (!kthread_should_stop()); | 1521 | } while (!kthread_should_stop()); |
@@ -1620,6 +1602,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1620 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1602 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1621 | INIT_LIST_HEAD(&fs_info->space_info); | 1603 | INIT_LIST_HEAD(&fs_info->space_info); |
1622 | btrfs_mapping_init(&fs_info->mapping_tree); | 1604 | btrfs_mapping_init(&fs_info->mapping_tree); |
1605 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | ||
1606 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | ||
1607 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | ||
1608 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | ||
1609 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | ||
1610 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | ||
1611 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
1623 | atomic_set(&fs_info->nr_async_submits, 0); | 1612 | atomic_set(&fs_info->nr_async_submits, 0); |
1624 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1613 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1625 | atomic_set(&fs_info->async_submit_draining, 0); | 1614 | atomic_set(&fs_info->async_submit_draining, 0); |
@@ -1759,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1759 | min_t(u64, fs_devices->num_devices, | 1748 | min_t(u64, fs_devices->num_devices, |
1760 | fs_info->thread_pool_size), | 1749 | fs_info->thread_pool_size), |
1761 | &fs_info->generic_worker); | 1750 | &fs_info->generic_worker); |
1762 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
1763 | fs_info->thread_pool_size, | ||
1764 | &fs_info->generic_worker); | ||
1765 | 1751 | ||
1766 | /* a higher idle thresh on the submit workers makes it much more | 1752 | /* a higher idle thresh on the submit workers makes it much more |
1767 | * likely that bios will be send down in a sane order to the | 1753 | * likely that bios will be send down in a sane order to the |
@@ -1809,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1809 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1795 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1810 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1796 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1811 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1797 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1812 | btrfs_start_workers(&fs_info->enospc_workers, 1); | ||
1813 | 1798 | ||
1814 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1799 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1815 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1800 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1912,17 +1897,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1912 | 1897 | ||
1913 | csum_root->track_dirty = 1; | 1898 | csum_root->track_dirty = 1; |
1914 | 1899 | ||
1900 | fs_info->generation = generation; | ||
1901 | fs_info->last_trans_committed = generation; | ||
1902 | fs_info->data_alloc_profile = (u64)-1; | ||
1903 | fs_info->metadata_alloc_profile = (u64)-1; | ||
1904 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
1905 | |||
1915 | ret = btrfs_read_block_groups(extent_root); | 1906 | ret = btrfs_read_block_groups(extent_root); |
1916 | if (ret) { | 1907 | if (ret) { |
1917 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 1908 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
1918 | goto fail_block_groups; | 1909 | goto fail_block_groups; |
1919 | } | 1910 | } |
1920 | 1911 | ||
1921 | fs_info->generation = generation; | ||
1922 | fs_info->last_trans_committed = generation; | ||
1923 | fs_info->data_alloc_profile = (u64)-1; | ||
1924 | fs_info->metadata_alloc_profile = (u64)-1; | ||
1925 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
1926 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 1912 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
1927 | "btrfs-cleaner"); | 1913 | "btrfs-cleaner"); |
1928 | if (IS_ERR(fs_info->cleaner_kthread)) | 1914 | if (IS_ERR(fs_info->cleaner_kthread)) |
@@ -1977,6 +1963,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1977 | BUG_ON(ret); | 1963 | BUG_ON(ret); |
1978 | 1964 | ||
1979 | if (!(sb->s_flags & MS_RDONLY)) { | 1965 | if (!(sb->s_flags & MS_RDONLY)) { |
1966 | ret = btrfs_cleanup_fs_roots(fs_info); | ||
1967 | BUG_ON(ret); | ||
1968 | |||
1980 | ret = btrfs_recover_relocation(tree_root); | 1969 | ret = btrfs_recover_relocation(tree_root); |
1981 | if (ret < 0) { | 1970 | if (ret < 0) { |
1982 | printk(KERN_WARNING | 1971 | printk(KERN_WARNING |
@@ -2040,7 +2029,6 @@ fail_sb_buffer: | |||
2040 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2029 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2041 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2030 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2042 | btrfs_stop_workers(&fs_info->submit_workers); | 2031 | btrfs_stop_workers(&fs_info->submit_workers); |
2043 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2044 | fail_iput: | 2032 | fail_iput: |
2045 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2033 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2046 | iput(fs_info->btree_inode); | 2034 | iput(fs_info->btree_inode); |
@@ -2405,11 +2393,11 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2405 | down_write(&root->fs_info->cleanup_work_sem); | 2393 | down_write(&root->fs_info->cleanup_work_sem); |
2406 | up_write(&root->fs_info->cleanup_work_sem); | 2394 | up_write(&root->fs_info->cleanup_work_sem); |
2407 | 2395 | ||
2408 | trans = btrfs_start_transaction(root, 1); | 2396 | trans = btrfs_join_transaction(root, 1); |
2409 | ret = btrfs_commit_transaction(trans, root); | 2397 | ret = btrfs_commit_transaction(trans, root); |
2410 | BUG_ON(ret); | 2398 | BUG_ON(ret); |
2411 | /* run commit again to drop the original snapshot */ | 2399 | /* run commit again to drop the original snapshot */ |
2412 | trans = btrfs_start_transaction(root, 1); | 2400 | trans = btrfs_join_transaction(root, 1); |
2413 | btrfs_commit_transaction(trans, root); | 2401 | btrfs_commit_transaction(trans, root); |
2414 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2402 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2415 | BUG_ON(ret); | 2403 | BUG_ON(ret); |
@@ -2426,15 +2414,15 @@ int close_ctree(struct btrfs_root *root) | |||
2426 | fs_info->closing = 1; | 2414 | fs_info->closing = 1; |
2427 | smp_mb(); | 2415 | smp_mb(); |
2428 | 2416 | ||
2429 | kthread_stop(root->fs_info->transaction_kthread); | ||
2430 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2431 | |||
2432 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2417 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2433 | ret = btrfs_commit_super(root); | 2418 | ret = btrfs_commit_super(root); |
2434 | if (ret) | 2419 | if (ret) |
2435 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2420 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2436 | } | 2421 | } |
2437 | 2422 | ||
2423 | kthread_stop(root->fs_info->transaction_kthread); | ||
2424 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2425 | |||
2438 | fs_info->closing = 2; | 2426 | fs_info->closing = 2; |
2439 | smp_mb(); | 2427 | smp_mb(); |
2440 | 2428 | ||
@@ -2473,7 +2461,6 @@ int close_ctree(struct btrfs_root *root) | |||
2473 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2461 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2474 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2462 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2475 | btrfs_stop_workers(&fs_info->submit_workers); | 2463 | btrfs_stop_workers(&fs_info->submit_workers); |
2476 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2477 | 2464 | ||
2478 | btrfs_close_devices(fs_info->fs_devices); | 2465 | btrfs_close_devices(fs_info->fs_devices); |
2479 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2466 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c958ecbc1916..88e825a0bf21 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
87 | int metadata); | 87 | int metadata); |
88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
89 | int rw, struct bio *bio, int mirror_num, | 89 | int rw, struct bio *bio, int mirror_num, |
90 | unsigned long bio_flags, | 90 | unsigned long bio_flags, u64 bio_offset, |
91 | extent_submit_bio_hook_t *submit_bio_start, | 91 | extent_submit_bio_hook_t *submit_bio_start, |
92 | extent_submit_bio_hook_t *submit_bio_done); | 92 | extent_submit_bio_hook_t *submit_bio_done); |
93 | 93 | ||
@@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | |||
95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
96 | int btrfs_write_tree_block(struct extent_buffer *buf); | 96 | int btrfs_write_tree_block(struct extent_buffer *buf); |
97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
98 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
99 | struct btrfs_fs_info *fs_info); | ||
100 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 98 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, |
101 | struct btrfs_fs_info *fs_info); | 99 | struct btrfs_fs_info *fs_info); |
102 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 100 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c6a4f459ad76..b9080d71991a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -35,10 +35,9 @@ | |||
35 | 35 | ||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc, | 38 | u64 bytenr, u64 num_bytes, int alloc); |
39 | int mark_free); | 39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, |
40 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 40 | u64 num_bytes, int reserve, int sinfo); |
41 | u64 num_bytes, int reserve); | ||
42 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
43 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
44 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
63 | u64 flags, int force); | 62 | u64 flags, int force); |
64 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_root *root, | ||
66 | struct btrfs_path *path, | ||
67 | u64 bytenr, u64 num_bytes, | ||
68 | int is_data, int reserved, | ||
69 | struct extent_buffer **must_clean); | ||
70 | static int find_next_key(struct btrfs_path *path, int level, | 63 | static int find_next_key(struct btrfs_path *path, int level, |
71 | struct btrfs_key *key); | 64 | struct btrfs_key *key); |
72 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 65 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
@@ -91,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | |||
91 | 84 | ||
92 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | 85 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
93 | { | 86 | { |
94 | if (atomic_dec_and_test(&cache->count)) | 87 | if (atomic_dec_and_test(&cache->count)) { |
88 | WARN_ON(cache->pinned > 0); | ||
89 | WARN_ON(cache->reserved > 0); | ||
90 | WARN_ON(cache->reserved_pinned > 0); | ||
95 | kfree(cache); | 91 | kfree(cache); |
92 | } | ||
96 | } | 93 | } |
97 | 94 | ||
98 | /* | 95 | /* |
@@ -319,7 +316,7 @@ static int caching_kthread(void *data) | |||
319 | 316 | ||
320 | exclude_super_stripes(extent_root, block_group); | 317 | exclude_super_stripes(extent_root, block_group); |
321 | spin_lock(&block_group->space_info->lock); | 318 | spin_lock(&block_group->space_info->lock); |
322 | block_group->space_info->bytes_super += block_group->bytes_super; | 319 | block_group->space_info->bytes_readonly += block_group->bytes_super; |
323 | spin_unlock(&block_group->space_info->lock); | 320 | spin_unlock(&block_group->space_info->lock); |
324 | 321 | ||
325 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
@@ -507,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
507 | struct list_head *head = &info->space_info; | 504 | struct list_head *head = &info->space_info; |
508 | struct btrfs_space_info *found; | 505 | struct btrfs_space_info *found; |
509 | 506 | ||
507 | flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | | ||
508 | BTRFS_BLOCK_GROUP_METADATA; | ||
509 | |||
510 | rcu_read_lock(); | 510 | rcu_read_lock(); |
511 | list_for_each_entry_rcu(found, head, list) { | 511 | list_for_each_entry_rcu(found, head, list) { |
512 | if (found->flags == flags) { | 512 | if (found->flags == flags) { |
@@ -610,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
610 | } | 610 | } |
611 | 611 | ||
612 | /* | 612 | /* |
613 | * helper function to lookup reference count and flags of extent. | ||
614 | * | ||
615 | * the head node for delayed ref is used to store the sum of all the | ||
616 | * reference count modifications queued up in the rbtree. the head | ||
617 | * node may also store the extent flags to set. This way you can check | ||
618 | * to see what the reference count and extent flags would be if all of | ||
619 | * the delayed refs are not processed. | ||
620 | */ | ||
621 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
622 | struct btrfs_root *root, u64 bytenr, | ||
623 | u64 num_bytes, u64 *refs, u64 *flags) | ||
624 | { | ||
625 | struct btrfs_delayed_ref_head *head; | ||
626 | struct btrfs_delayed_ref_root *delayed_refs; | ||
627 | struct btrfs_path *path; | ||
628 | struct btrfs_extent_item *ei; | ||
629 | struct extent_buffer *leaf; | ||
630 | struct btrfs_key key; | ||
631 | u32 item_size; | ||
632 | u64 num_refs; | ||
633 | u64 extent_flags; | ||
634 | int ret; | ||
635 | |||
636 | path = btrfs_alloc_path(); | ||
637 | if (!path) | ||
638 | return -ENOMEM; | ||
639 | |||
640 | key.objectid = bytenr; | ||
641 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
642 | key.offset = num_bytes; | ||
643 | if (!trans) { | ||
644 | path->skip_locking = 1; | ||
645 | path->search_commit_root = 1; | ||
646 | } | ||
647 | again: | ||
648 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
649 | &key, path, 0, 0); | ||
650 | if (ret < 0) | ||
651 | goto out_free; | ||
652 | |||
653 | if (ret == 0) { | ||
654 | leaf = path->nodes[0]; | ||
655 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
656 | if (item_size >= sizeof(*ei)) { | ||
657 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
658 | struct btrfs_extent_item); | ||
659 | num_refs = btrfs_extent_refs(leaf, ei); | ||
660 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
661 | } else { | ||
662 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
663 | struct btrfs_extent_item_v0 *ei0; | ||
664 | BUG_ON(item_size != sizeof(*ei0)); | ||
665 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
666 | struct btrfs_extent_item_v0); | ||
667 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
668 | /* FIXME: this isn't correct for data */ | ||
669 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
670 | #else | ||
671 | BUG(); | ||
672 | #endif | ||
673 | } | ||
674 | BUG_ON(num_refs == 0); | ||
675 | } else { | ||
676 | num_refs = 0; | ||
677 | extent_flags = 0; | ||
678 | ret = 0; | ||
679 | } | ||
680 | |||
681 | if (!trans) | ||
682 | goto out; | ||
683 | |||
684 | delayed_refs = &trans->transaction->delayed_refs; | ||
685 | spin_lock(&delayed_refs->lock); | ||
686 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
687 | if (head) { | ||
688 | if (!mutex_trylock(&head->mutex)) { | ||
689 | atomic_inc(&head->node.refs); | ||
690 | spin_unlock(&delayed_refs->lock); | ||
691 | |||
692 | btrfs_release_path(root->fs_info->extent_root, path); | ||
693 | |||
694 | mutex_lock(&head->mutex); | ||
695 | mutex_unlock(&head->mutex); | ||
696 | btrfs_put_delayed_ref(&head->node); | ||
697 | goto again; | ||
698 | } | ||
699 | if (head->extent_op && head->extent_op->update_flags) | ||
700 | extent_flags |= head->extent_op->flags_to_set; | ||
701 | else | ||
702 | BUG_ON(num_refs == 0); | ||
703 | |||
704 | num_refs += head->node.ref_mod; | ||
705 | mutex_unlock(&head->mutex); | ||
706 | } | ||
707 | spin_unlock(&delayed_refs->lock); | ||
708 | out: | ||
709 | WARN_ON(num_refs == 0); | ||
710 | if (refs) | ||
711 | *refs = num_refs; | ||
712 | if (flags) | ||
713 | *flags = extent_flags; | ||
714 | out_free: | ||
715 | btrfs_free_path(path); | ||
716 | return ret; | ||
717 | } | ||
718 | |||
719 | /* | ||
613 | * Back reference rules. Back refs have three main goals: | 720 | * Back reference rules. Back refs have three main goals: |
614 | * | 721 | * |
615 | * 1) differentiate between all holders of references to an extent so that | 722 | * 1) differentiate between all holders of references to an extent so that |
@@ -1871,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1871 | return ret; | 1978 | return ret; |
1872 | } | 1979 | } |
1873 | 1980 | ||
1874 | |||
1875 | /* helper function to actually process a single delayed ref entry */ | 1981 | /* helper function to actually process a single delayed ref entry */ |
1876 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | 1982 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, |
1877 | struct btrfs_root *root, | 1983 | struct btrfs_root *root, |
@@ -1891,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1891 | BUG_ON(extent_op); | 1997 | BUG_ON(extent_op); |
1892 | head = btrfs_delayed_node_to_head(node); | 1998 | head = btrfs_delayed_node_to_head(node); |
1893 | if (insert_reserved) { | 1999 | if (insert_reserved) { |
1894 | int mark_free = 0; | 2000 | btrfs_pin_extent(root, node->bytenr, |
1895 | struct extent_buffer *must_clean = NULL; | 2001 | node->num_bytes, 1); |
1896 | |||
1897 | ret = pin_down_bytes(trans, root, NULL, | ||
1898 | node->bytenr, node->num_bytes, | ||
1899 | head->is_data, 1, &must_clean); | ||
1900 | if (ret > 0) | ||
1901 | mark_free = 1; | ||
1902 | |||
1903 | if (must_clean) { | ||
1904 | clean_tree_block(NULL, root, must_clean); | ||
1905 | btrfs_tree_unlock(must_clean); | ||
1906 | free_extent_buffer(must_clean); | ||
1907 | } | ||
1908 | if (head->is_data) { | 2002 | if (head->is_data) { |
1909 | ret = btrfs_del_csums(trans, root, | 2003 | ret = btrfs_del_csums(trans, root, |
1910 | node->bytenr, | 2004 | node->bytenr, |
1911 | node->num_bytes); | 2005 | node->num_bytes); |
1912 | BUG_ON(ret); | 2006 | BUG_ON(ret); |
1913 | } | 2007 | } |
1914 | if (mark_free) { | ||
1915 | ret = btrfs_free_reserved_extent(root, | ||
1916 | node->bytenr, | ||
1917 | node->num_bytes); | ||
1918 | BUG_ON(ret); | ||
1919 | } | ||
1920 | } | 2008 | } |
1921 | mutex_unlock(&head->mutex); | 2009 | mutex_unlock(&head->mutex); |
1922 | return 0; | 2010 | return 0; |
@@ -2347,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | |||
2347 | ret = 0; | 2435 | ret = 0; |
2348 | out: | 2436 | out: |
2349 | btrfs_free_path(path); | 2437 | btrfs_free_path(path); |
2438 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
2439 | WARN_ON(ret > 0); | ||
2350 | return ret; | 2440 | return ret; |
2351 | } | 2441 | } |
2352 | 2442 | ||
@@ -2660,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2660 | struct btrfs_space_info **space_info) | 2750 | struct btrfs_space_info **space_info) |
2661 | { | 2751 | { |
2662 | struct btrfs_space_info *found; | 2752 | struct btrfs_space_info *found; |
2753 | int i; | ||
2754 | int factor; | ||
2755 | |||
2756 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
2757 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2758 | factor = 2; | ||
2759 | else | ||
2760 | factor = 1; | ||
2663 | 2761 | ||
2664 | found = __find_space_info(info, flags); | 2762 | found = __find_space_info(info, flags); |
2665 | if (found) { | 2763 | if (found) { |
2666 | spin_lock(&found->lock); | 2764 | spin_lock(&found->lock); |
2667 | found->total_bytes += total_bytes; | 2765 | found->total_bytes += total_bytes; |
2668 | found->bytes_used += bytes_used; | 2766 | found->bytes_used += bytes_used; |
2767 | found->disk_used += bytes_used * factor; | ||
2669 | found->full = 0; | 2768 | found->full = 0; |
2670 | spin_unlock(&found->lock); | 2769 | spin_unlock(&found->lock); |
2671 | *space_info = found; | 2770 | *space_info = found; |
@@ -2675,18 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2675 | if (!found) | 2774 | if (!found) |
2676 | return -ENOMEM; | 2775 | return -ENOMEM; |
2677 | 2776 | ||
2678 | INIT_LIST_HEAD(&found->block_groups); | 2777 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
2778 | INIT_LIST_HEAD(&found->block_groups[i]); | ||
2679 | init_rwsem(&found->groups_sem); | 2779 | init_rwsem(&found->groups_sem); |
2680 | init_waitqueue_head(&found->flush_wait); | ||
2681 | init_waitqueue_head(&found->allocate_wait); | ||
2682 | spin_lock_init(&found->lock); | 2780 | spin_lock_init(&found->lock); |
2683 | found->flags = flags; | 2781 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
2782 | BTRFS_BLOCK_GROUP_SYSTEM | | ||
2783 | BTRFS_BLOCK_GROUP_METADATA); | ||
2684 | found->total_bytes = total_bytes; | 2784 | found->total_bytes = total_bytes; |
2685 | found->bytes_used = bytes_used; | 2785 | found->bytes_used = bytes_used; |
2786 | found->disk_used = bytes_used * factor; | ||
2686 | found->bytes_pinned = 0; | 2787 | found->bytes_pinned = 0; |
2687 | found->bytes_reserved = 0; | 2788 | found->bytes_reserved = 0; |
2688 | found->bytes_readonly = 0; | 2789 | found->bytes_readonly = 0; |
2689 | found->bytes_delalloc = 0; | 2790 | found->bytes_may_use = 0; |
2690 | found->full = 0; | 2791 | found->full = 0; |
2691 | found->force_alloc = 0; | 2792 | found->force_alloc = 0; |
2692 | *space_info = found; | 2793 | *space_info = found; |
@@ -2711,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
2711 | } | 2812 | } |
2712 | } | 2813 | } |
2713 | 2814 | ||
2714 | static void set_block_group_readonly(struct btrfs_block_group_cache *cache) | ||
2715 | { | ||
2716 | spin_lock(&cache->space_info->lock); | ||
2717 | spin_lock(&cache->lock); | ||
2718 | if (!cache->ro) { | ||
2719 | cache->space_info->bytes_readonly += cache->key.offset - | ||
2720 | btrfs_block_group_used(&cache->item); | ||
2721 | cache->ro = 1; | ||
2722 | } | ||
2723 | spin_unlock(&cache->lock); | ||
2724 | spin_unlock(&cache->space_info->lock); | ||
2725 | } | ||
2726 | |||
2727 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 2815 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
2728 | { | 2816 | { |
2729 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 2817 | u64 num_devices = root->fs_info->fs_devices->rw_devices; |
@@ -2752,491 +2840,50 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
2752 | return flags; | 2840 | return flags; |
2753 | } | 2841 | } |
2754 | 2842 | ||
2755 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | 2843 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
2756 | { | ||
2757 | struct btrfs_fs_info *info = root->fs_info; | ||
2758 | u64 alloc_profile; | ||
2759 | |||
2760 | if (data) { | ||
2761 | alloc_profile = info->avail_data_alloc_bits & | ||
2762 | info->data_alloc_profile; | ||
2763 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
2764 | } else if (root == root->fs_info->chunk_root) { | ||
2765 | alloc_profile = info->avail_system_alloc_bits & | ||
2766 | info->system_alloc_profile; | ||
2767 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
2768 | } else { | ||
2769 | alloc_profile = info->avail_metadata_alloc_bits & | ||
2770 | info->metadata_alloc_profile; | ||
2771 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
2772 | } | ||
2773 | |||
2774 | return btrfs_reduce_alloc_profile(root, data); | ||
2775 | } | ||
2776 | |||
2777 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
2778 | { | ||
2779 | u64 alloc_target; | ||
2780 | |||
2781 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2782 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
2783 | alloc_target); | ||
2784 | } | ||
2785 | |||
2786 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2787 | { | ||
2788 | u64 num_bytes; | ||
2789 | int level; | ||
2790 | |||
2791 | level = BTRFS_MAX_LEVEL - 2; | ||
2792 | /* | ||
2793 | * NOTE: these calculations are absolutely the worst possible case. | ||
2794 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2795 | * that the tree has grown to its maximum level size. | ||
2796 | */ | ||
2797 | |||
2798 | /* | ||
2799 | * for every item we insert we could insert both an extent item and a | ||
2800 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2801 | * both the original leaf, plus the leaf to the left and right of it. | ||
2802 | * | ||
2803 | * Unless we are talking about the extent root, then we just want the | ||
2804 | * number of items * 2, since we just need the extent item plus its ref. | ||
2805 | */ | ||
2806 | if (root == root->fs_info->extent_root) | ||
2807 | num_bytes = num_items * 2; | ||
2808 | else | ||
2809 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2810 | |||
2811 | /* | ||
2812 | * num_bytes is total number of leaves we could need times the leaf | ||
2813 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2814 | * level, down to the leaf level. | ||
2815 | */ | ||
2816 | num_bytes = (num_bytes * root->leafsize) + | ||
2817 | (num_bytes * (level * 2)) * root->nodesize; | ||
2818 | |||
2819 | return num_bytes; | ||
2820 | } | ||
2821 | |||
2822 | /* | ||
2823 | * Unreserve metadata space for delalloc. If we have less reserved credits than | ||
2824 | * we have extents, this function does nothing. | ||
2825 | */ | ||
2826 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2827 | struct inode *inode, int num_items) | ||
2828 | { | ||
2829 | struct btrfs_fs_info *info = root->fs_info; | ||
2830 | struct btrfs_space_info *meta_sinfo; | ||
2831 | u64 num_bytes; | ||
2832 | u64 alloc_target; | ||
2833 | bool bug = false; | ||
2834 | |||
2835 | /* get the space info for where the metadata will live */ | ||
2836 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
2837 | meta_sinfo = __find_space_info(info, alloc_target); | ||
2838 | |||
2839 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
2840 | num_items); | ||
2841 | |||
2842 | spin_lock(&meta_sinfo->lock); | ||
2843 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
2844 | if (BTRFS_I(inode)->reserved_extents <= | ||
2845 | BTRFS_I(inode)->outstanding_extents) { | ||
2846 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2847 | spin_unlock(&meta_sinfo->lock); | ||
2848 | return 0; | ||
2849 | } | ||
2850 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2851 | |||
2852 | BTRFS_I(inode)->reserved_extents -= num_items; | ||
2853 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
2854 | |||
2855 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2856 | bug = true; | ||
2857 | meta_sinfo->bytes_delalloc = 0; | ||
2858 | } else { | ||
2859 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2860 | } | ||
2861 | spin_unlock(&meta_sinfo->lock); | ||
2862 | |||
2863 | BUG_ON(bug); | ||
2864 | |||
2865 | return 0; | ||
2866 | } | ||
2867 | |||
2868 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2869 | { | 2844 | { |
2870 | u64 thresh; | 2845 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
2871 | 2846 | flags |= root->fs_info->avail_data_alloc_bits & | |
2872 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2847 | root->fs_info->data_alloc_profile; |
2873 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2848 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
2874 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | 2849 | flags |= root->fs_info->avail_system_alloc_bits & |
2875 | meta_sinfo->bytes_may_use; | 2850 | root->fs_info->system_alloc_profile; |
2876 | 2851 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | |
2877 | thresh = meta_sinfo->total_bytes - thresh; | 2852 | flags |= root->fs_info->avail_metadata_alloc_bits & |
2878 | thresh *= 80; | 2853 | root->fs_info->metadata_alloc_profile; |
2879 | do_div(thresh, 100); | 2854 | return btrfs_reduce_alloc_profile(root, flags); |
2880 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2881 | meta_sinfo->force_delalloc = 1; | ||
2882 | else | ||
2883 | meta_sinfo->force_delalloc = 0; | ||
2884 | } | 2855 | } |
2885 | 2856 | ||
2886 | struct async_flush { | 2857 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
2887 | struct btrfs_root *root; | ||
2888 | struct btrfs_space_info *info; | ||
2889 | struct btrfs_work work; | ||
2890 | }; | ||
2891 | |||
2892 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
2893 | { | 2858 | { |
2894 | struct async_flush *async; | 2859 | u64 flags; |
2895 | struct btrfs_root *root; | ||
2896 | struct btrfs_space_info *info; | ||
2897 | |||
2898 | async = container_of(work, struct async_flush, work); | ||
2899 | root = async->root; | ||
2900 | info = async->info; | ||
2901 | |||
2902 | btrfs_start_delalloc_inodes(root, 0); | ||
2903 | wake_up(&info->flush_wait); | ||
2904 | btrfs_wait_ordered_extents(root, 0, 0); | ||
2905 | |||
2906 | spin_lock(&info->lock); | ||
2907 | info->flushing = 0; | ||
2908 | spin_unlock(&info->lock); | ||
2909 | wake_up(&info->flush_wait); | ||
2910 | |||
2911 | kfree(async); | ||
2912 | } | ||
2913 | |||
2914 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2915 | { | ||
2916 | DEFINE_WAIT(wait); | ||
2917 | u64 used; | ||
2918 | |||
2919 | while (1) { | ||
2920 | prepare_to_wait(&info->flush_wait, &wait, | ||
2921 | TASK_UNINTERRUPTIBLE); | ||
2922 | spin_lock(&info->lock); | ||
2923 | if (!info->flushing) { | ||
2924 | spin_unlock(&info->lock); | ||
2925 | break; | ||
2926 | } | ||
2927 | |||
2928 | used = info->bytes_used + info->bytes_reserved + | ||
2929 | info->bytes_pinned + info->bytes_readonly + | ||
2930 | info->bytes_super + info->bytes_root + | ||
2931 | info->bytes_may_use + info->bytes_delalloc; | ||
2932 | if (used < info->total_bytes) { | ||
2933 | spin_unlock(&info->lock); | ||
2934 | break; | ||
2935 | } | ||
2936 | spin_unlock(&info->lock); | ||
2937 | schedule(); | ||
2938 | } | ||
2939 | finish_wait(&info->flush_wait, &wait); | ||
2940 | } | ||
2941 | |||
2942 | static void flush_delalloc(struct btrfs_root *root, | ||
2943 | struct btrfs_space_info *info) | ||
2944 | { | ||
2945 | struct async_flush *async; | ||
2946 | bool wait = false; | ||
2947 | |||
2948 | spin_lock(&info->lock); | ||
2949 | 2860 | ||
2950 | if (!info->flushing) | 2861 | if (data) |
2951 | info->flushing = 1; | 2862 | flags = BTRFS_BLOCK_GROUP_DATA; |
2863 | else if (root == root->fs_info->chunk_root) | ||
2864 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
2952 | else | 2865 | else |
2953 | wait = true; | 2866 | flags = BTRFS_BLOCK_GROUP_METADATA; |
2954 | |||
2955 | spin_unlock(&info->lock); | ||
2956 | |||
2957 | if (wait) { | ||
2958 | wait_on_flush(info); | ||
2959 | return; | ||
2960 | } | ||
2961 | |||
2962 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2963 | if (!async) | ||
2964 | goto flush; | ||
2965 | |||
2966 | async->root = root; | ||
2967 | async->info = info; | ||
2968 | async->work.func = flush_delalloc_async; | ||
2969 | 2867 | ||
2970 | btrfs_queue_worker(&root->fs_info->enospc_workers, | 2868 | return get_alloc_profile(root, flags); |
2971 | &async->work); | ||
2972 | wait_on_flush(info); | ||
2973 | return; | ||
2974 | |||
2975 | flush: | ||
2976 | btrfs_start_delalloc_inodes(root, 0); | ||
2977 | btrfs_wait_ordered_extents(root, 0, 0); | ||
2978 | |||
2979 | spin_lock(&info->lock); | ||
2980 | info->flushing = 0; | ||
2981 | spin_unlock(&info->lock); | ||
2982 | wake_up(&info->flush_wait); | ||
2983 | } | 2869 | } |
2984 | 2870 | ||
2985 | static int maybe_allocate_chunk(struct btrfs_root *root, | 2871 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) |
2986 | struct btrfs_space_info *info) | ||
2987 | { | ||
2988 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
2989 | struct btrfs_trans_handle *trans; | ||
2990 | bool wait = false; | ||
2991 | int ret = 0; | ||
2992 | u64 min_metadata; | ||
2993 | u64 free_space; | ||
2994 | |||
2995 | free_space = btrfs_super_total_bytes(disk_super); | ||
2996 | /* | ||
2997 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
2998 | * space in the volume. | ||
2999 | */ | ||
3000 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
3001 | div64_u64(free_space * 5, 100)); | ||
3002 | if (info->total_bytes >= min_metadata) { | ||
3003 | spin_unlock(&info->lock); | ||
3004 | return 0; | ||
3005 | } | ||
3006 | |||
3007 | if (info->full) { | ||
3008 | spin_unlock(&info->lock); | ||
3009 | return 0; | ||
3010 | } | ||
3011 | |||
3012 | if (!info->allocating_chunk) { | ||
3013 | info->force_alloc = 1; | ||
3014 | info->allocating_chunk = 1; | ||
3015 | } else { | ||
3016 | wait = true; | ||
3017 | } | ||
3018 | |||
3019 | spin_unlock(&info->lock); | ||
3020 | |||
3021 | if (wait) { | ||
3022 | wait_event(info->allocate_wait, | ||
3023 | !info->allocating_chunk); | ||
3024 | return 1; | ||
3025 | } | ||
3026 | |||
3027 | trans = btrfs_start_transaction(root, 1); | ||
3028 | if (!trans) { | ||
3029 | ret = -ENOMEM; | ||
3030 | goto out; | ||
3031 | } | ||
3032 | |||
3033 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3034 | 4096 + 2 * 1024 * 1024, | ||
3035 | info->flags, 0); | ||
3036 | btrfs_end_transaction(trans, root); | ||
3037 | if (ret) | ||
3038 | goto out; | ||
3039 | out: | ||
3040 | spin_lock(&info->lock); | ||
3041 | info->allocating_chunk = 0; | ||
3042 | spin_unlock(&info->lock); | ||
3043 | wake_up(&info->allocate_wait); | ||
3044 | |||
3045 | if (ret) | ||
3046 | return 0; | ||
3047 | return 1; | ||
3048 | } | ||
3049 | |||
3050 | /* | ||
3051 | * Reserve metadata space for delalloc. | ||
3052 | */ | ||
3053 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3054 | struct inode *inode, int num_items) | ||
3055 | { | ||
3056 | struct btrfs_fs_info *info = root->fs_info; | ||
3057 | struct btrfs_space_info *meta_sinfo; | ||
3058 | u64 num_bytes; | ||
3059 | u64 used; | ||
3060 | u64 alloc_target; | ||
3061 | int flushed = 0; | ||
3062 | int force_delalloc; | ||
3063 | |||
3064 | /* get the space info for where the metadata will live */ | ||
3065 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3066 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3067 | |||
3068 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
3069 | num_items); | ||
3070 | again: | ||
3071 | spin_lock(&meta_sinfo->lock); | ||
3072 | |||
3073 | force_delalloc = meta_sinfo->force_delalloc; | ||
3074 | |||
3075 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3077 | |||
3078 | if (!flushed) | ||
3079 | meta_sinfo->bytes_delalloc += num_bytes; | ||
3080 | |||
3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3085 | |||
3086 | if (used > meta_sinfo->total_bytes) { | ||
3087 | flushed++; | ||
3088 | |||
3089 | if (flushed == 1) { | ||
3090 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3091 | goto again; | ||
3092 | flushed++; | ||
3093 | } else { | ||
3094 | spin_unlock(&meta_sinfo->lock); | ||
3095 | } | ||
3096 | |||
3097 | if (flushed == 2) { | ||
3098 | filemap_flush(inode->i_mapping); | ||
3099 | goto again; | ||
3100 | } else if (flushed == 3) { | ||
3101 | flush_delalloc(root, meta_sinfo); | ||
3102 | goto again; | ||
3103 | } | ||
3104 | spin_lock(&meta_sinfo->lock); | ||
3105 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
3106 | spin_unlock(&meta_sinfo->lock); | ||
3107 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3108 | BTRFS_I(inode)->outstanding_extents, | ||
3109 | BTRFS_I(inode)->reserved_extents); | ||
3110 | dump_space_info(meta_sinfo, 0, 0); | ||
3111 | return -ENOSPC; | ||
3112 | } | ||
3113 | |||
3114 | BTRFS_I(inode)->reserved_extents += num_items; | ||
3115 | check_force_delalloc(meta_sinfo); | ||
3116 | spin_unlock(&meta_sinfo->lock); | ||
3117 | |||
3118 | if (!flushed && force_delalloc) | ||
3119 | filemap_flush(inode->i_mapping); | ||
3120 | |||
3121 | return 0; | ||
3122 | } | ||
3123 | |||
3124 | /* | ||
3125 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3126 | * be paired with btrfs_reserve_metadata_space. | ||
3127 | * | ||
3128 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3129 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3130 | * oprations which will result in more used metadata, so we want to make sure we | ||
3131 | * can do that without issue. | ||
3132 | */ | ||
3133 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3134 | { | ||
3135 | struct btrfs_fs_info *info = root->fs_info; | ||
3136 | struct btrfs_space_info *meta_sinfo; | ||
3137 | u64 num_bytes; | ||
3138 | u64 alloc_target; | ||
3139 | bool bug = false; | ||
3140 | |||
3141 | /* get the space info for where the metadata will live */ | ||
3142 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3143 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3144 | |||
3145 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3146 | |||
3147 | spin_lock(&meta_sinfo->lock); | ||
3148 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3149 | bug = true; | ||
3150 | meta_sinfo->bytes_may_use = 0; | ||
3151 | } else { | ||
3152 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3153 | } | ||
3154 | spin_unlock(&meta_sinfo->lock); | ||
3155 | |||
3156 | BUG_ON(bug); | ||
3157 | |||
3158 | return 0; | ||
3159 | } | ||
3160 | |||
3161 | /* | ||
3162 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3163 | * of bytes that would be needed to modify num_items number of items. If we | ||
3164 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3165 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3166 | * items you reserved, since whatever metadata you needed should have already | ||
3167 | * been allocated. | ||
3168 | * | ||
3169 | * This will commit the transaction to make more space if we don't have enough | ||
3170 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3171 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3172 | * callers responsibility to handle it properly. | ||
3173 | */ | ||
3174 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3175 | { | 2872 | { |
3176 | struct btrfs_fs_info *info = root->fs_info; | 2873 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, |
3177 | struct btrfs_space_info *meta_sinfo; | 2874 | BTRFS_BLOCK_GROUP_DATA); |
3178 | u64 num_bytes; | ||
3179 | u64 used; | ||
3180 | u64 alloc_target; | ||
3181 | int retries = 0; | ||
3182 | |||
3183 | /* get the space info for where the metadata will live */ | ||
3184 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3185 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3186 | |||
3187 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3188 | again: | ||
3189 | spin_lock(&meta_sinfo->lock); | ||
3190 | |||
3191 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3192 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3193 | |||
3194 | if (!retries) | ||
3195 | meta_sinfo->bytes_may_use += num_bytes; | ||
3196 | |||
3197 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3198 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3199 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3200 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3201 | |||
3202 | if (used > meta_sinfo->total_bytes) { | ||
3203 | retries++; | ||
3204 | if (retries == 1) { | ||
3205 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3206 | goto again; | ||
3207 | retries++; | ||
3208 | } else { | ||
3209 | spin_unlock(&meta_sinfo->lock); | ||
3210 | } | ||
3211 | |||
3212 | if (retries == 2) { | ||
3213 | flush_delalloc(root, meta_sinfo); | ||
3214 | goto again; | ||
3215 | } | ||
3216 | spin_lock(&meta_sinfo->lock); | ||
3217 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3218 | spin_unlock(&meta_sinfo->lock); | ||
3219 | |||
3220 | dump_space_info(meta_sinfo, 0, 0); | ||
3221 | return -ENOSPC; | ||
3222 | } | ||
3223 | |||
3224 | check_force_delalloc(meta_sinfo); | ||
3225 | spin_unlock(&meta_sinfo->lock); | ||
3226 | |||
3227 | return 0; | ||
3228 | } | 2875 | } |
3229 | 2876 | ||
3230 | /* | 2877 | /* |
3231 | * This will check the space that the inode allocates from to make sure we have | 2878 | * This will check the space that the inode allocates from to make sure we have |
3232 | * enough space for bytes. | 2879 | * enough space for bytes. |
3233 | */ | 2880 | */ |
3234 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2881 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes) |
3235 | u64 bytes) | ||
3236 | { | 2882 | { |
3237 | struct btrfs_space_info *data_sinfo; | 2883 | struct btrfs_space_info *data_sinfo; |
2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3238 | u64 used; | 2885 | u64 used; |
3239 | int ret = 0, committed = 0, flushed = 0; | 2886 | int ret = 0, committed = 0; |
3240 | 2887 | ||
3241 | /* make sure bytes are sectorsize aligned */ | 2888 | /* make sure bytes are sectorsize aligned */ |
3242 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 2889 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
@@ -3248,21 +2895,13 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
3248 | again: | 2895 | again: |
3249 | /* make sure we have enough space to handle the data first */ | 2896 | /* make sure we have enough space to handle the data first */ |
3250 | spin_lock(&data_sinfo->lock); | 2897 | spin_lock(&data_sinfo->lock); |
3251 | used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc + | 2898 | used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + |
3252 | data_sinfo->bytes_reserved + data_sinfo->bytes_pinned + | 2899 | data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + |
3253 | data_sinfo->bytes_readonly + data_sinfo->bytes_may_use + | 2900 | data_sinfo->bytes_may_use; |
3254 | data_sinfo->bytes_super; | ||
3255 | 2901 | ||
3256 | if (used + bytes > data_sinfo->total_bytes) { | 2902 | if (used + bytes > data_sinfo->total_bytes) { |
3257 | struct btrfs_trans_handle *trans; | 2903 | struct btrfs_trans_handle *trans; |
3258 | 2904 | ||
3259 | if (!flushed) { | ||
3260 | spin_unlock(&data_sinfo->lock); | ||
3261 | flush_delalloc(root, data_sinfo); | ||
3262 | flushed = 1; | ||
3263 | goto again; | ||
3264 | } | ||
3265 | |||
3266 | /* | 2905 | /* |
3267 | * if we don't have enough free bytes in this space then we need | 2906 | * if we don't have enough free bytes in this space then we need |
3268 | * to alloc a new chunk. | 2907 | * to alloc a new chunk. |
@@ -3274,15 +2913,15 @@ again: | |||
3274 | spin_unlock(&data_sinfo->lock); | 2913 | spin_unlock(&data_sinfo->lock); |
3275 | alloc: | 2914 | alloc: |
3276 | alloc_target = btrfs_get_alloc_profile(root, 1); | 2915 | alloc_target = btrfs_get_alloc_profile(root, 1); |
3277 | trans = btrfs_start_transaction(root, 1); | 2916 | trans = btrfs_join_transaction(root, 1); |
3278 | if (!trans) | 2917 | if (IS_ERR(trans)) |
3279 | return -ENOMEM; | 2918 | return PTR_ERR(trans); |
3280 | 2919 | ||
3281 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2920 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
3282 | bytes + 2 * 1024 * 1024, | 2921 | bytes + 2 * 1024 * 1024, |
3283 | alloc_target, 0); | 2922 | alloc_target, 0); |
3284 | btrfs_end_transaction(trans, root); | 2923 | btrfs_end_transaction(trans, root); |
3285 | if (ret) | 2924 | if (ret < 0) |
3286 | return ret; | 2925 | return ret; |
3287 | 2926 | ||
3288 | if (!data_sinfo) { | 2927 | if (!data_sinfo) { |
@@ -3297,25 +2936,26 @@ alloc: | |||
3297 | if (!committed && !root->fs_info->open_ioctl_trans) { | 2936 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3298 | committed = 1; | 2937 | committed = 1; |
3299 | trans = btrfs_join_transaction(root, 1); | 2938 | trans = btrfs_join_transaction(root, 1); |
3300 | if (!trans) | 2939 | if (IS_ERR(trans)) |
3301 | return -ENOMEM; | 2940 | return PTR_ERR(trans); |
3302 | ret = btrfs_commit_transaction(trans, root); | 2941 | ret = btrfs_commit_transaction(trans, root); |
3303 | if (ret) | 2942 | if (ret) |
3304 | return ret; | 2943 | return ret; |
3305 | goto again; | 2944 | goto again; |
3306 | } | 2945 | } |
3307 | 2946 | ||
3308 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 2947 | #if 0 /* I hope we never need this code again, just in case */ |
3309 | ", %llu bytes_used, %llu bytes_reserved, " | 2948 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " |
3310 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use " | 2949 | "%llu bytes_reserved, " "%llu bytes_pinned, " |
3311 | "%llu total\n", (unsigned long long)bytes, | 2950 | "%llu bytes_readonly, %llu may use %llu total\n", |
3312 | (unsigned long long)data_sinfo->bytes_delalloc, | 2951 | (unsigned long long)bytes, |
3313 | (unsigned long long)data_sinfo->bytes_used, | 2952 | (unsigned long long)data_sinfo->bytes_used, |
3314 | (unsigned long long)data_sinfo->bytes_reserved, | 2953 | (unsigned long long)data_sinfo->bytes_reserved, |
3315 | (unsigned long long)data_sinfo->bytes_pinned, | 2954 | (unsigned long long)data_sinfo->bytes_pinned, |
3316 | (unsigned long long)data_sinfo->bytes_readonly, | 2955 | (unsigned long long)data_sinfo->bytes_readonly, |
3317 | (unsigned long long)data_sinfo->bytes_may_use, | 2956 | (unsigned long long)data_sinfo->bytes_may_use, |
3318 | (unsigned long long)data_sinfo->total_bytes); | 2957 | (unsigned long long)data_sinfo->total_bytes); |
2958 | #endif | ||
3319 | return -ENOSPC; | 2959 | return -ENOSPC; |
3320 | } | 2960 | } |
3321 | data_sinfo->bytes_may_use += bytes; | 2961 | data_sinfo->bytes_may_use += bytes; |
@@ -3326,12 +2966,13 @@ alloc: | |||
3326 | } | 2966 | } |
3327 | 2967 | ||
3328 | /* | 2968 | /* |
3329 | * if there was an error for whatever reason after calling | 2969 | * called when we are clearing an delalloc extent from the |
3330 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | 2970 | * inode's io_tree or there was an error for whatever reason |
2971 | * after calling btrfs_check_data_free_space | ||
3331 | */ | 2972 | */ |
3332 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2973 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
3333 | struct inode *inode, u64 bytes) | ||
3334 | { | 2974 | { |
2975 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3335 | struct btrfs_space_info *data_sinfo; | 2976 | struct btrfs_space_info *data_sinfo; |
3336 | 2977 | ||
3337 | /* make sure bytes are sectorsize aligned */ | 2978 | /* make sure bytes are sectorsize aligned */ |
@@ -3344,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root, | |||
3344 | spin_unlock(&data_sinfo->lock); | 2985 | spin_unlock(&data_sinfo->lock); |
3345 | } | 2986 | } |
3346 | 2987 | ||
3347 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
3348 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
3349 | u64 bytes) | ||
3350 | { | ||
3351 | struct btrfs_space_info *data_sinfo; | ||
3352 | |||
3353 | /* get the space info for where this inode will be storing its data */ | ||
3354 | data_sinfo = BTRFS_I(inode)->space_info; | ||
3355 | |||
3356 | /* make sure we have enough space to handle the data first */ | ||
3357 | spin_lock(&data_sinfo->lock); | ||
3358 | data_sinfo->bytes_delalloc += bytes; | ||
3359 | |||
3360 | /* | ||
3361 | * we are adding a delalloc extent without calling | ||
3362 | * btrfs_check_data_free_space first. This happens on a weird | ||
3363 | * writepage condition, but shouldn't hurt our accounting | ||
3364 | */ | ||
3365 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
3366 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
3367 | BTRFS_I(inode)->reserved_bytes = 0; | ||
3368 | } else { | ||
3369 | data_sinfo->bytes_may_use -= bytes; | ||
3370 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
3371 | } | ||
3372 | |||
3373 | spin_unlock(&data_sinfo->lock); | ||
3374 | } | ||
3375 | |||
3376 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
3377 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
3378 | u64 bytes) | ||
3379 | { | ||
3380 | struct btrfs_space_info *info; | ||
3381 | |||
3382 | info = BTRFS_I(inode)->space_info; | ||
3383 | |||
3384 | spin_lock(&info->lock); | ||
3385 | info->bytes_delalloc -= bytes; | ||
3386 | spin_unlock(&info->lock); | ||
3387 | } | ||
3388 | |||
3389 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 2988 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
3390 | { | 2989 | { |
3391 | struct list_head *head = &info->space_info; | 2990 | struct list_head *head = &info->space_info; |
@@ -3399,13 +2998,28 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
3399 | rcu_read_unlock(); | 2998 | rcu_read_unlock(); |
3400 | } | 2999 | } |
3401 | 3000 | ||
3001 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | ||
3002 | u64 alloc_bytes) | ||
3003 | { | ||
3004 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | ||
3005 | |||
3006 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
3007 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3008 | return 0; | ||
3009 | |||
3010 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
3011 | alloc_bytes < div_factor(num_bytes, 8)) | ||
3012 | return 0; | ||
3013 | |||
3014 | return 1; | ||
3015 | } | ||
3016 | |||
3402 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3017 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
3403 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3018 | struct btrfs_root *extent_root, u64 alloc_bytes, |
3404 | u64 flags, int force) | 3019 | u64 flags, int force) |
3405 | { | 3020 | { |
3406 | struct btrfs_space_info *space_info; | 3021 | struct btrfs_space_info *space_info; |
3407 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3022 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3408 | u64 thresh; | ||
3409 | int ret = 0; | 3023 | int ret = 0; |
3410 | 3024 | ||
3411 | mutex_lock(&fs_info->chunk_mutex); | 3025 | mutex_lock(&fs_info->chunk_mutex); |
@@ -3428,11 +3042,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3428 | goto out; | 3042 | goto out; |
3429 | } | 3043 | } |
3430 | 3044 | ||
3431 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3045 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { |
3432 | thresh = div_factor(thresh, 8); | ||
3433 | if (!force && | ||
3434 | (space_info->bytes_used + space_info->bytes_pinned + | ||
3435 | space_info->bytes_reserved + alloc_bytes) < thresh) { | ||
3436 | spin_unlock(&space_info->lock); | 3046 | spin_unlock(&space_info->lock); |
3437 | goto out; | 3047 | goto out; |
3438 | } | 3048 | } |
@@ -3454,6 +3064,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3454 | spin_lock(&space_info->lock); | 3064 | spin_lock(&space_info->lock); |
3455 | if (ret) | 3065 | if (ret) |
3456 | space_info->full = 1; | 3066 | space_info->full = 1; |
3067 | else | ||
3068 | ret = 1; | ||
3457 | space_info->force_alloc = 0; | 3069 | space_info->force_alloc = 0; |
3458 | spin_unlock(&space_info->lock); | 3070 | spin_unlock(&space_info->lock); |
3459 | out: | 3071 | out: |
@@ -3461,13 +3073,713 @@ out: | |||
3461 | return ret; | 3073 | return ret; |
3462 | } | 3074 | } |
3463 | 3075 | ||
3076 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
3077 | struct btrfs_root *root, | ||
3078 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
3079 | { | ||
3080 | int ret; | ||
3081 | int end_trans = 0; | ||
3082 | |||
3083 | if (sinfo->full) | ||
3084 | return 0; | ||
3085 | |||
3086 | spin_lock(&sinfo->lock); | ||
3087 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
3088 | spin_unlock(&sinfo->lock); | ||
3089 | if (!ret) | ||
3090 | return 0; | ||
3091 | |||
3092 | if (!trans) { | ||
3093 | trans = btrfs_join_transaction(root, 1); | ||
3094 | BUG_ON(IS_ERR(trans)); | ||
3095 | end_trans = 1; | ||
3096 | } | ||
3097 | |||
3098 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3099 | num_bytes + 2 * 1024 * 1024, | ||
3100 | get_alloc_profile(root, sinfo->flags), 0); | ||
3101 | |||
3102 | if (end_trans) | ||
3103 | btrfs_end_transaction(trans, root); | ||
3104 | |||
3105 | return ret == 1 ? 1 : 0; | ||
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * shrink metadata reservation for delalloc | ||
3110 | */ | ||
3111 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
3112 | struct btrfs_root *root, u64 to_reclaim) | ||
3113 | { | ||
3114 | struct btrfs_block_rsv *block_rsv; | ||
3115 | u64 reserved; | ||
3116 | u64 max_reclaim; | ||
3117 | u64 reclaimed = 0; | ||
3118 | int pause = 1; | ||
3119 | int ret; | ||
3120 | |||
3121 | block_rsv = &root->fs_info->delalloc_block_rsv; | ||
3122 | spin_lock(&block_rsv->lock); | ||
3123 | reserved = block_rsv->reserved; | ||
3124 | spin_unlock(&block_rsv->lock); | ||
3125 | |||
3126 | if (reserved == 0) | ||
3127 | return 0; | ||
3128 | |||
3129 | max_reclaim = min(reserved, to_reclaim); | ||
3130 | |||
3131 | while (1) { | ||
3132 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | ||
3133 | if (!ret) { | ||
3134 | __set_current_state(TASK_INTERRUPTIBLE); | ||
3135 | schedule_timeout(pause); | ||
3136 | pause <<= 1; | ||
3137 | if (pause > HZ / 10) | ||
3138 | pause = HZ / 10; | ||
3139 | } else { | ||
3140 | pause = 1; | ||
3141 | } | ||
3142 | |||
3143 | spin_lock(&block_rsv->lock); | ||
3144 | if (reserved > block_rsv->reserved) | ||
3145 | reclaimed = reserved - block_rsv->reserved; | ||
3146 | reserved = block_rsv->reserved; | ||
3147 | spin_unlock(&block_rsv->lock); | ||
3148 | |||
3149 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
3150 | break; | ||
3151 | |||
3152 | if (trans && trans->transaction->blocked) | ||
3153 | return -EAGAIN; | ||
3154 | } | ||
3155 | return reclaimed >= to_reclaim; | ||
3156 | } | ||
3157 | |||
3158 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | ||
3159 | struct btrfs_root *root, | ||
3160 | struct btrfs_block_rsv *block_rsv, | ||
3161 | u64 num_bytes, int *retries) | ||
3162 | { | ||
3163 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3164 | int ret; | ||
3165 | |||
3166 | if ((*retries) > 2) | ||
3167 | return -ENOSPC; | ||
3168 | |||
3169 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | ||
3170 | if (ret) | ||
3171 | return 1; | ||
3172 | |||
3173 | if (trans && trans->transaction->in_commit) | ||
3174 | return -ENOSPC; | ||
3175 | |||
3176 | ret = shrink_delalloc(trans, root, num_bytes); | ||
3177 | if (ret) | ||
3178 | return ret; | ||
3179 | |||
3180 | spin_lock(&space_info->lock); | ||
3181 | if (space_info->bytes_pinned < num_bytes) | ||
3182 | ret = 1; | ||
3183 | spin_unlock(&space_info->lock); | ||
3184 | if (ret) | ||
3185 | return -ENOSPC; | ||
3186 | |||
3187 | (*retries)++; | ||
3188 | |||
3189 | if (trans) | ||
3190 | return -EAGAIN; | ||
3191 | |||
3192 | trans = btrfs_join_transaction(root, 1); | ||
3193 | BUG_ON(IS_ERR(trans)); | ||
3194 | ret = btrfs_commit_transaction(trans, root); | ||
3195 | BUG_ON(ret); | ||
3196 | |||
3197 | return 1; | ||
3198 | } | ||
3199 | |||
3200 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | ||
3201 | u64 num_bytes) | ||
3202 | { | ||
3203 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3204 | u64 unused; | ||
3205 | int ret = -ENOSPC; | ||
3206 | |||
3207 | spin_lock(&space_info->lock); | ||
3208 | unused = space_info->bytes_used + space_info->bytes_reserved + | ||
3209 | space_info->bytes_pinned + space_info->bytes_readonly; | ||
3210 | |||
3211 | if (unused < space_info->total_bytes) | ||
3212 | unused = space_info->total_bytes - unused; | ||
3213 | else | ||
3214 | unused = 0; | ||
3215 | |||
3216 | if (unused >= num_bytes) { | ||
3217 | if (block_rsv->priority >= 10) { | ||
3218 | space_info->bytes_reserved += num_bytes; | ||
3219 | ret = 0; | ||
3220 | } else { | ||
3221 | if ((unused + block_rsv->reserved) * | ||
3222 | block_rsv->priority >= | ||
3223 | (num_bytes + block_rsv->reserved) * 10) { | ||
3224 | space_info->bytes_reserved += num_bytes; | ||
3225 | ret = 0; | ||
3226 | } | ||
3227 | } | ||
3228 | } | ||
3229 | spin_unlock(&space_info->lock); | ||
3230 | |||
3231 | return ret; | ||
3232 | } | ||
3233 | |||
3234 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | ||
3235 | struct btrfs_root *root) | ||
3236 | { | ||
3237 | struct btrfs_block_rsv *block_rsv; | ||
3238 | if (root->ref_cows) | ||
3239 | block_rsv = trans->block_rsv; | ||
3240 | else | ||
3241 | block_rsv = root->block_rsv; | ||
3242 | |||
3243 | if (!block_rsv) | ||
3244 | block_rsv = &root->fs_info->empty_block_rsv; | ||
3245 | |||
3246 | return block_rsv; | ||
3247 | } | ||
3248 | |||
3249 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
3250 | u64 num_bytes) | ||
3251 | { | ||
3252 | int ret = -ENOSPC; | ||
3253 | spin_lock(&block_rsv->lock); | ||
3254 | if (block_rsv->reserved >= num_bytes) { | ||
3255 | block_rsv->reserved -= num_bytes; | ||
3256 | if (block_rsv->reserved < block_rsv->size) | ||
3257 | block_rsv->full = 0; | ||
3258 | ret = 0; | ||
3259 | } | ||
3260 | spin_unlock(&block_rsv->lock); | ||
3261 | return ret; | ||
3262 | } | ||
3263 | |||
3264 | static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | ||
3265 | u64 num_bytes, int update_size) | ||
3266 | { | ||
3267 | spin_lock(&block_rsv->lock); | ||
3268 | block_rsv->reserved += num_bytes; | ||
3269 | if (update_size) | ||
3270 | block_rsv->size += num_bytes; | ||
3271 | else if (block_rsv->reserved >= block_rsv->size) | ||
3272 | block_rsv->full = 1; | ||
3273 | spin_unlock(&block_rsv->lock); | ||
3274 | } | ||
3275 | |||
3276 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | ||
3277 | struct btrfs_block_rsv *dest, u64 num_bytes) | ||
3278 | { | ||
3279 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3280 | |||
3281 | spin_lock(&block_rsv->lock); | ||
3282 | if (num_bytes == (u64)-1) | ||
3283 | num_bytes = block_rsv->size; | ||
3284 | block_rsv->size -= num_bytes; | ||
3285 | if (block_rsv->reserved >= block_rsv->size) { | ||
3286 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3287 | block_rsv->reserved = block_rsv->size; | ||
3288 | block_rsv->full = 1; | ||
3289 | } else { | ||
3290 | num_bytes = 0; | ||
3291 | } | ||
3292 | spin_unlock(&block_rsv->lock); | ||
3293 | |||
3294 | if (num_bytes > 0) { | ||
3295 | if (dest) { | ||
3296 | block_rsv_add_bytes(dest, num_bytes, 0); | ||
3297 | } else { | ||
3298 | spin_lock(&space_info->lock); | ||
3299 | space_info->bytes_reserved -= num_bytes; | ||
3300 | spin_unlock(&space_info->lock); | ||
3301 | } | ||
3302 | } | ||
3303 | } | ||
3304 | |||
3305 | static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | ||
3306 | struct btrfs_block_rsv *dst, u64 num_bytes) | ||
3307 | { | ||
3308 | int ret; | ||
3309 | |||
3310 | ret = block_rsv_use_bytes(src, num_bytes); | ||
3311 | if (ret) | ||
3312 | return ret; | ||
3313 | |||
3314 | block_rsv_add_bytes(dst, num_bytes, 1); | ||
3315 | return 0; | ||
3316 | } | ||
3317 | |||
3318 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | ||
3319 | { | ||
3320 | memset(rsv, 0, sizeof(*rsv)); | ||
3321 | spin_lock_init(&rsv->lock); | ||
3322 | atomic_set(&rsv->usage, 1); | ||
3323 | rsv->priority = 6; | ||
3324 | INIT_LIST_HEAD(&rsv->list); | ||
3325 | } | ||
3326 | |||
3327 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | ||
3328 | { | ||
3329 | struct btrfs_block_rsv *block_rsv; | ||
3330 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3331 | u64 alloc_target; | ||
3332 | |||
3333 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | ||
3334 | if (!block_rsv) | ||
3335 | return NULL; | ||
3336 | |||
3337 | btrfs_init_block_rsv(block_rsv); | ||
3338 | |||
3339 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3340 | block_rsv->space_info = __find_space_info(fs_info, | ||
3341 | BTRFS_BLOCK_GROUP_METADATA); | ||
3342 | |||
3343 | return block_rsv; | ||
3344 | } | ||
3345 | |||
3346 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
3347 | struct btrfs_block_rsv *rsv) | ||
3348 | { | ||
3349 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | ||
3350 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
3351 | if (!rsv->durable) | ||
3352 | kfree(rsv); | ||
3353 | } | ||
3354 | } | ||
3355 | |||
3356 | /* | ||
3357 | * make the block_rsv struct be able to capture freed space. | ||
3358 | * the captured space will re-add to the the block_rsv struct | ||
3359 | * after transaction commit | ||
3360 | */ | ||
3361 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
3362 | struct btrfs_block_rsv *block_rsv) | ||
3363 | { | ||
3364 | block_rsv->durable = 1; | ||
3365 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
3366 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | ||
3367 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
3368 | } | ||
3369 | |||
3370 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
3371 | struct btrfs_root *root, | ||
3372 | struct btrfs_block_rsv *block_rsv, | ||
3373 | u64 num_bytes, int *retries) | ||
3374 | { | ||
3375 | int ret; | ||
3376 | |||
3377 | if (num_bytes == 0) | ||
3378 | return 0; | ||
3379 | again: | ||
3380 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
3381 | if (!ret) { | ||
3382 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3383 | return 0; | ||
3384 | } | ||
3385 | |||
3386 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
3387 | if (ret > 0) | ||
3388 | goto again; | ||
3389 | |||
3390 | return ret; | ||
3391 | } | ||
3392 | |||
3393 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
3394 | struct btrfs_root *root, | ||
3395 | struct btrfs_block_rsv *block_rsv, | ||
3396 | u64 min_reserved, int min_factor) | ||
3397 | { | ||
3398 | u64 num_bytes = 0; | ||
3399 | int commit_trans = 0; | ||
3400 | int ret = -ENOSPC; | ||
3401 | |||
3402 | if (!block_rsv) | ||
3403 | return 0; | ||
3404 | |||
3405 | spin_lock(&block_rsv->lock); | ||
3406 | if (min_factor > 0) | ||
3407 | num_bytes = div_factor(block_rsv->size, min_factor); | ||
3408 | if (min_reserved > num_bytes) | ||
3409 | num_bytes = min_reserved; | ||
3410 | |||
3411 | if (block_rsv->reserved >= num_bytes) { | ||
3412 | ret = 0; | ||
3413 | } else { | ||
3414 | num_bytes -= block_rsv->reserved; | ||
3415 | if (block_rsv->durable && | ||
3416 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
3417 | commit_trans = 1; | ||
3418 | } | ||
3419 | spin_unlock(&block_rsv->lock); | ||
3420 | if (!ret) | ||
3421 | return 0; | ||
3422 | |||
3423 | if (block_rsv->refill_used) { | ||
3424 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
3425 | if (!ret) { | ||
3426 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
3427 | return 0; | ||
3428 | } | ||
3429 | } | ||
3430 | |||
3431 | if (commit_trans) { | ||
3432 | if (trans) | ||
3433 | return -EAGAIN; | ||
3434 | |||
3435 | trans = btrfs_join_transaction(root, 1); | ||
3436 | BUG_ON(IS_ERR(trans)); | ||
3437 | ret = btrfs_commit_transaction(trans, root); | ||
3438 | return 0; | ||
3439 | } | ||
3440 | |||
3441 | WARN_ON(1); | ||
3442 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3443 | block_rsv->size, block_rsv->reserved, | ||
3444 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3445 | |||
3446 | return -ENOSPC; | ||
3447 | } | ||
3448 | |||
3449 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
3450 | struct btrfs_block_rsv *dst_rsv, | ||
3451 | u64 num_bytes) | ||
3452 | { | ||
3453 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3454 | } | ||
3455 | |||
3456 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
3457 | struct btrfs_block_rsv *block_rsv, | ||
3458 | u64 num_bytes) | ||
3459 | { | ||
3460 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3461 | if (global_rsv->full || global_rsv == block_rsv || | ||
3462 | block_rsv->space_info != global_rsv->space_info) | ||
3463 | global_rsv = NULL; | ||
3464 | block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); | ||
3465 | } | ||
3466 | |||
3467 | /* | ||
3468 | * helper to calculate size of global block reservation. | ||
3469 | * the desired value is sum of space used by extent tree, | ||
3470 | * checksum tree and root tree | ||
3471 | */ | ||
3472 | static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | ||
3473 | { | ||
3474 | struct btrfs_space_info *sinfo; | ||
3475 | u64 num_bytes; | ||
3476 | u64 meta_used; | ||
3477 | u64 data_used; | ||
3478 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
3479 | #if 0 | ||
3480 | /* | ||
3481 | * per tree used space accounting can be inaccuracy, so we | ||
3482 | * can't rely on it. | ||
3483 | */ | ||
3484 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
3485 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
3486 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
3487 | |||
3488 | spin_lock(&fs_info->csum_root->accounting_lock); | ||
3489 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); | ||
3490 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
3491 | |||
3492 | spin_lock(&fs_info->tree_root->accounting_lock); | ||
3493 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); | ||
3494 | spin_unlock(&fs_info->tree_root->accounting_lock); | ||
3495 | #endif | ||
3496 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
3497 | spin_lock(&sinfo->lock); | ||
3498 | data_used = sinfo->bytes_used; | ||
3499 | spin_unlock(&sinfo->lock); | ||
3500 | |||
3501 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
3502 | spin_lock(&sinfo->lock); | ||
3503 | meta_used = sinfo->bytes_used; | ||
3504 | spin_unlock(&sinfo->lock); | ||
3505 | |||
3506 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * | ||
3507 | csum_size * 2; | ||
3508 | num_bytes += div64_u64(data_used + meta_used, 50); | ||
3509 | |||
3510 | if (num_bytes * 3 > meta_used) | ||
3511 | num_bytes = div64_u64(meta_used, 3); | ||
3512 | |||
3513 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); | ||
3514 | } | ||
3515 | |||
3516 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3517 | { | ||
3518 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | ||
3519 | struct btrfs_space_info *sinfo = block_rsv->space_info; | ||
3520 | u64 num_bytes; | ||
3521 | |||
3522 | num_bytes = calc_global_metadata_size(fs_info); | ||
3523 | |||
3524 | spin_lock(&block_rsv->lock); | ||
3525 | spin_lock(&sinfo->lock); | ||
3526 | |||
3527 | block_rsv->size = num_bytes; | ||
3528 | |||
3529 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | ||
3530 | sinfo->bytes_reserved + sinfo->bytes_readonly; | ||
3531 | |||
3532 | if (sinfo->total_bytes > num_bytes) { | ||
3533 | num_bytes = sinfo->total_bytes - num_bytes; | ||
3534 | block_rsv->reserved += num_bytes; | ||
3535 | sinfo->bytes_reserved += num_bytes; | ||
3536 | } | ||
3537 | |||
3538 | if (block_rsv->reserved >= block_rsv->size) { | ||
3539 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3540 | sinfo->bytes_reserved -= num_bytes; | ||
3541 | block_rsv->reserved = block_rsv->size; | ||
3542 | block_rsv->full = 1; | ||
3543 | } | ||
3544 | #if 0 | ||
3545 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
3546 | block_rsv->size, block_rsv->reserved); | ||
3547 | #endif | ||
3548 | spin_unlock(&sinfo->lock); | ||
3549 | spin_unlock(&block_rsv->lock); | ||
3550 | } | ||
3551 | |||
3552 | static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3553 | { | ||
3554 | struct btrfs_space_info *space_info; | ||
3555 | |||
3556 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
3557 | fs_info->chunk_block_rsv.space_info = space_info; | ||
3558 | fs_info->chunk_block_rsv.priority = 10; | ||
3559 | |||
3560 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
3561 | fs_info->global_block_rsv.space_info = space_info; | ||
3562 | fs_info->global_block_rsv.priority = 10; | ||
3563 | fs_info->global_block_rsv.refill_used = 1; | ||
3564 | fs_info->delalloc_block_rsv.space_info = space_info; | ||
3565 | fs_info->trans_block_rsv.space_info = space_info; | ||
3566 | fs_info->empty_block_rsv.space_info = space_info; | ||
3567 | fs_info->empty_block_rsv.priority = 10; | ||
3568 | |||
3569 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | ||
3570 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | ||
3571 | fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||
3572 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | ||
3573 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | ||
3574 | |||
3575 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
3576 | |||
3577 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
3578 | |||
3579 | update_global_block_rsv(fs_info); | ||
3580 | } | ||
3581 | |||
3582 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3583 | { | ||
3584 | block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); | ||
3585 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | ||
3586 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | ||
3587 | WARN_ON(fs_info->trans_block_rsv.size > 0); | ||
3588 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | ||
3589 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | ||
3590 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | ||
3591 | } | ||
3592 | |||
3593 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | ||
3594 | { | ||
3595 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
3596 | 3 * num_items; | ||
3597 | } | ||
3598 | |||
3599 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3600 | struct btrfs_root *root, | ||
3601 | int num_items, int *retries) | ||
3602 | { | ||
3603 | u64 num_bytes; | ||
3604 | int ret; | ||
3605 | |||
3606 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3607 | return 0; | ||
3608 | |||
3609 | num_bytes = calc_trans_metadata_size(root, num_items); | ||
3610 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3611 | num_bytes, retries); | ||
3612 | if (!ret) { | ||
3613 | trans->bytes_reserved += num_bytes; | ||
3614 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3615 | } | ||
3616 | return ret; | ||
3617 | } | ||
3618 | |||
3619 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | ||
3620 | struct btrfs_root *root) | ||
3621 | { | ||
3622 | if (!trans->bytes_reserved) | ||
3623 | return; | ||
3624 | |||
3625 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | ||
3626 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3627 | trans->bytes_reserved); | ||
3628 | trans->bytes_reserved = 0; | ||
3629 | } | ||
3630 | |||
3631 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3632 | struct inode *inode) | ||
3633 | { | ||
3634 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3635 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3636 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | ||
3637 | |||
3638 | /* | ||
3639 | * one for deleting orphan item, one for updating inode and | ||
3640 | * two for calling btrfs_truncate_inode_items. | ||
3641 | * | ||
3642 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3643 | * more space than it uses in most cases. So two units of | ||
3644 | * metadata space should be enough for calling it many times. | ||
3645 | * If all of the metadata space is used, we can commit | ||
3646 | * transaction and use space it freed. | ||
3647 | */ | ||
3648 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3649 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3650 | } | ||
3651 | |||
3652 | void btrfs_orphan_release_metadata(struct inode *inode) | ||
3653 | { | ||
3654 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3655 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3656 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | ||
3657 | } | ||
3658 | |||
3659 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3660 | struct btrfs_pending_snapshot *pending) | ||
3661 | { | ||
3662 | struct btrfs_root *root = pending->root; | ||
3663 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3664 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | ||
3665 | /* | ||
3666 | * two for root back/forward refs, two for directory entries | ||
3667 | * and one for root of the snapshot. | ||
3668 | */ | ||
3669 | u64 num_bytes = calc_trans_metadata_size(root, 5); | ||
3670 | dst_rsv->space_info = src_rsv->space_info; | ||
3671 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3672 | } | ||
3673 | |||
3674 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | ||
3675 | { | ||
3676 | return num_bytes >>= 3; | ||
3677 | } | ||
3678 | |||
3679 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | ||
3680 | { | ||
3681 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3682 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | ||
3683 | u64 to_reserve; | ||
3684 | int nr_extents; | ||
3685 | int retries = 0; | ||
3686 | int ret; | ||
3687 | |||
3688 | if (btrfs_transaction_in_commit(root->fs_info)) | ||
3689 | schedule_timeout(1); | ||
3690 | |||
3691 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
3692 | again: | ||
3693 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3694 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | ||
3695 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | ||
3696 | nr_extents -= BTRFS_I(inode)->reserved_extents; | ||
3697 | to_reserve = calc_trans_metadata_size(root, nr_extents); | ||
3698 | } else { | ||
3699 | nr_extents = 0; | ||
3700 | to_reserve = 0; | ||
3701 | } | ||
3702 | |||
3703 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | ||
3704 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | ||
3705 | if (ret) { | ||
3706 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3707 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
3708 | &retries); | ||
3709 | if (ret > 0) | ||
3710 | goto again; | ||
3711 | return ret; | ||
3712 | } | ||
3713 | |||
3714 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3715 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3716 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3717 | |||
3718 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | ||
3719 | |||
3720 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3721 | shrink_delalloc(NULL, root, to_reserve); | ||
3722 | |||
3723 | return 0; | ||
3724 | } | ||
3725 | |||
3726 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | ||
3727 | { | ||
3728 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3729 | u64 to_free; | ||
3730 | int nr_extents; | ||
3731 | |||
3732 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
3733 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | ||
3734 | |||
3735 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3736 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
3737 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | ||
3738 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | ||
3739 | BTRFS_I(inode)->reserved_extents -= nr_extents; | ||
3740 | } else { | ||
3741 | nr_extents = 0; | ||
3742 | } | ||
3743 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3744 | |||
3745 | to_free = calc_csum_metadata_size(inode, num_bytes); | ||
3746 | if (nr_extents > 0) | ||
3747 | to_free += calc_trans_metadata_size(root, nr_extents); | ||
3748 | |||
3749 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | ||
3750 | to_free); | ||
3751 | } | ||
3752 | |||
3753 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | ||
3754 | { | ||
3755 | int ret; | ||
3756 | |||
3757 | ret = btrfs_check_data_free_space(inode, num_bytes); | ||
3758 | if (ret) | ||
3759 | return ret; | ||
3760 | |||
3761 | ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); | ||
3762 | if (ret) { | ||
3763 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
3764 | return ret; | ||
3765 | } | ||
3766 | |||
3767 | return 0; | ||
3768 | } | ||
3769 | |||
3770 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | ||
3771 | { | ||
3772 | btrfs_delalloc_release_metadata(inode, num_bytes); | ||
3773 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
3774 | } | ||
3775 | |||
3464 | static int update_block_group(struct btrfs_trans_handle *trans, | 3776 | static int update_block_group(struct btrfs_trans_handle *trans, |
3465 | struct btrfs_root *root, | 3777 | struct btrfs_root *root, |
3466 | u64 bytenr, u64 num_bytes, int alloc, | 3778 | u64 bytenr, u64 num_bytes, int alloc) |
3467 | int mark_free) | ||
3468 | { | 3779 | { |
3469 | struct btrfs_block_group_cache *cache; | 3780 | struct btrfs_block_group_cache *cache; |
3470 | struct btrfs_fs_info *info = root->fs_info; | 3781 | struct btrfs_fs_info *info = root->fs_info; |
3782 | int factor; | ||
3471 | u64 total = num_bytes; | 3783 | u64 total = num_bytes; |
3472 | u64 old_val; | 3784 | u64 old_val; |
3473 | u64 byte_in_group; | 3785 | u64 byte_in_group; |
@@ -3486,6 +3798,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3486 | cache = btrfs_lookup_block_group(info, bytenr); | 3798 | cache = btrfs_lookup_block_group(info, bytenr); |
3487 | if (!cache) | 3799 | if (!cache) |
3488 | return -1; | 3800 | return -1; |
3801 | if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
3802 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3803 | BTRFS_BLOCK_GROUP_RAID10)) | ||
3804 | factor = 2; | ||
3805 | else | ||
3806 | factor = 1; | ||
3489 | byte_in_group = bytenr - cache->key.objectid; | 3807 | byte_in_group = bytenr - cache->key.objectid; |
3490 | WARN_ON(byte_in_group > cache->key.offset); | 3808 | WARN_ON(byte_in_group > cache->key.offset); |
3491 | 3809 | ||
@@ -3498,31 +3816,24 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3498 | old_val += num_bytes; | 3816 | old_val += num_bytes; |
3499 | btrfs_set_block_group_used(&cache->item, old_val); | 3817 | btrfs_set_block_group_used(&cache->item, old_val); |
3500 | cache->reserved -= num_bytes; | 3818 | cache->reserved -= num_bytes; |
3501 | cache->space_info->bytes_used += num_bytes; | ||
3502 | cache->space_info->bytes_reserved -= num_bytes; | 3819 | cache->space_info->bytes_reserved -= num_bytes; |
3503 | if (cache->ro) | 3820 | cache->space_info->bytes_used += num_bytes; |
3504 | cache->space_info->bytes_readonly -= num_bytes; | 3821 | cache->space_info->disk_used += num_bytes * factor; |
3505 | spin_unlock(&cache->lock); | 3822 | spin_unlock(&cache->lock); |
3506 | spin_unlock(&cache->space_info->lock); | 3823 | spin_unlock(&cache->space_info->lock); |
3507 | } else { | 3824 | } else { |
3508 | old_val -= num_bytes; | 3825 | old_val -= num_bytes; |
3509 | cache->space_info->bytes_used -= num_bytes; | ||
3510 | if (cache->ro) | ||
3511 | cache->space_info->bytes_readonly += num_bytes; | ||
3512 | btrfs_set_block_group_used(&cache->item, old_val); | 3826 | btrfs_set_block_group_used(&cache->item, old_val); |
3827 | cache->pinned += num_bytes; | ||
3828 | cache->space_info->bytes_pinned += num_bytes; | ||
3829 | cache->space_info->bytes_used -= num_bytes; | ||
3830 | cache->space_info->disk_used -= num_bytes * factor; | ||
3513 | spin_unlock(&cache->lock); | 3831 | spin_unlock(&cache->lock); |
3514 | spin_unlock(&cache->space_info->lock); | 3832 | spin_unlock(&cache->space_info->lock); |
3515 | if (mark_free) { | ||
3516 | int ret; | ||
3517 | 3833 | ||
3518 | ret = btrfs_discard_extent(root, bytenr, | 3834 | set_extent_dirty(info->pinned_extents, |
3519 | num_bytes); | 3835 | bytenr, bytenr + num_bytes - 1, |
3520 | WARN_ON(ret); | 3836 | GFP_NOFS | __GFP_NOFAIL); |
3521 | |||
3522 | ret = btrfs_add_free_space(cache, bytenr, | ||
3523 | num_bytes); | ||
3524 | WARN_ON(ret); | ||
3525 | } | ||
3526 | } | 3837 | } |
3527 | btrfs_put_block_group(cache); | 3838 | btrfs_put_block_group(cache); |
3528 | total -= num_bytes; | 3839 | total -= num_bytes; |
@@ -3546,18 +3857,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3546 | return bytenr; | 3857 | return bytenr; |
3547 | } | 3858 | } |
3548 | 3859 | ||
3549 | /* | 3860 | static int pin_down_extent(struct btrfs_root *root, |
3550 | * this function must be called within transaction | 3861 | struct btrfs_block_group_cache *cache, |
3551 | */ | 3862 | u64 bytenr, u64 num_bytes, int reserved) |
3552 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3553 | u64 bytenr, u64 num_bytes, int reserved) | ||
3554 | { | 3863 | { |
3555 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3556 | struct btrfs_block_group_cache *cache; | ||
3557 | |||
3558 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3559 | BUG_ON(!cache); | ||
3560 | |||
3561 | spin_lock(&cache->space_info->lock); | 3864 | spin_lock(&cache->space_info->lock); |
3562 | spin_lock(&cache->lock); | 3865 | spin_lock(&cache->lock); |
3563 | cache->pinned += num_bytes; | 3866 | cache->pinned += num_bytes; |
@@ -3569,28 +3872,68 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
3569 | spin_unlock(&cache->lock); | 3872 | spin_unlock(&cache->lock); |
3570 | spin_unlock(&cache->space_info->lock); | 3873 | spin_unlock(&cache->space_info->lock); |
3571 | 3874 | ||
3572 | btrfs_put_block_group(cache); | 3875 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
3876 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | ||
3877 | return 0; | ||
3878 | } | ||
3879 | |||
3880 | /* | ||
3881 | * this function must be called within transaction | ||
3882 | */ | ||
3883 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3884 | u64 bytenr, u64 num_bytes, int reserved) | ||
3885 | { | ||
3886 | struct btrfs_block_group_cache *cache; | ||
3887 | |||
3888 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
3889 | BUG_ON(!cache); | ||
3890 | |||
3891 | pin_down_extent(root, cache, bytenr, num_bytes, reserved); | ||
3573 | 3892 | ||
3574 | set_extent_dirty(fs_info->pinned_extents, | 3893 | btrfs_put_block_group(cache); |
3575 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); | ||
3576 | return 0; | 3894 | return 0; |
3577 | } | 3895 | } |
3578 | 3896 | ||
3579 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 3897 | /* |
3580 | u64 num_bytes, int reserve) | 3898 | * update size of reserved extents. this function may return -EAGAIN |
3899 | * if 'reserve' is true or 'sinfo' is false. | ||
3900 | */ | ||
3901 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
3902 | u64 num_bytes, int reserve, int sinfo) | ||
3581 | { | 3903 | { |
3582 | spin_lock(&cache->space_info->lock); | 3904 | int ret = 0; |
3583 | spin_lock(&cache->lock); | 3905 | if (sinfo) { |
3584 | if (reserve) { | 3906 | struct btrfs_space_info *space_info = cache->space_info; |
3585 | cache->reserved += num_bytes; | 3907 | spin_lock(&space_info->lock); |
3586 | cache->space_info->bytes_reserved += num_bytes; | 3908 | spin_lock(&cache->lock); |
3909 | if (reserve) { | ||
3910 | if (cache->ro) { | ||
3911 | ret = -EAGAIN; | ||
3912 | } else { | ||
3913 | cache->reserved += num_bytes; | ||
3914 | space_info->bytes_reserved += num_bytes; | ||
3915 | } | ||
3916 | } else { | ||
3917 | if (cache->ro) | ||
3918 | space_info->bytes_readonly += num_bytes; | ||
3919 | cache->reserved -= num_bytes; | ||
3920 | space_info->bytes_reserved -= num_bytes; | ||
3921 | } | ||
3922 | spin_unlock(&cache->lock); | ||
3923 | spin_unlock(&space_info->lock); | ||
3587 | } else { | 3924 | } else { |
3588 | cache->reserved -= num_bytes; | 3925 | spin_lock(&cache->lock); |
3589 | cache->space_info->bytes_reserved -= num_bytes; | 3926 | if (cache->ro) { |
3927 | ret = -EAGAIN; | ||
3928 | } else { | ||
3929 | if (reserve) | ||
3930 | cache->reserved += num_bytes; | ||
3931 | else | ||
3932 | cache->reserved -= num_bytes; | ||
3933 | } | ||
3934 | spin_unlock(&cache->lock); | ||
3590 | } | 3935 | } |
3591 | spin_unlock(&cache->lock); | 3936 | return ret; |
3592 | spin_unlock(&cache->space_info->lock); | ||
3593 | return 0; | ||
3594 | } | 3937 | } |
3595 | 3938 | ||
3596 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 3939 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
@@ -3621,6 +3964,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
3621 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 3964 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
3622 | 3965 | ||
3623 | up_write(&fs_info->extent_commit_sem); | 3966 | up_write(&fs_info->extent_commit_sem); |
3967 | |||
3968 | update_global_block_rsv(fs_info); | ||
3624 | return 0; | 3969 | return 0; |
3625 | } | 3970 | } |
3626 | 3971 | ||
@@ -3647,14 +3992,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
3647 | btrfs_add_free_space(cache, start, len); | 3992 | btrfs_add_free_space(cache, start, len); |
3648 | } | 3993 | } |
3649 | 3994 | ||
3995 | start += len; | ||
3996 | |||
3650 | spin_lock(&cache->space_info->lock); | 3997 | spin_lock(&cache->space_info->lock); |
3651 | spin_lock(&cache->lock); | 3998 | spin_lock(&cache->lock); |
3652 | cache->pinned -= len; | 3999 | cache->pinned -= len; |
3653 | cache->space_info->bytes_pinned -= len; | 4000 | cache->space_info->bytes_pinned -= len; |
4001 | if (cache->ro) { | ||
4002 | cache->space_info->bytes_readonly += len; | ||
4003 | } else if (cache->reserved_pinned > 0) { | ||
4004 | len = min(len, cache->reserved_pinned); | ||
4005 | cache->reserved_pinned -= len; | ||
4006 | cache->space_info->bytes_reserved += len; | ||
4007 | } | ||
3654 | spin_unlock(&cache->lock); | 4008 | spin_unlock(&cache->lock); |
3655 | spin_unlock(&cache->space_info->lock); | 4009 | spin_unlock(&cache->space_info->lock); |
3656 | |||
3657 | start += len; | ||
3658 | } | 4010 | } |
3659 | 4011 | ||
3660 | if (cache) | 4012 | if (cache) |
@@ -3667,8 +4019,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3667 | { | 4019 | { |
3668 | struct btrfs_fs_info *fs_info = root->fs_info; | 4020 | struct btrfs_fs_info *fs_info = root->fs_info; |
3669 | struct extent_io_tree *unpin; | 4021 | struct extent_io_tree *unpin; |
4022 | struct btrfs_block_rsv *block_rsv; | ||
4023 | struct btrfs_block_rsv *next_rsv; | ||
3670 | u64 start; | 4024 | u64 start; |
3671 | u64 end; | 4025 | u64 end; |
4026 | int idx; | ||
3672 | int ret; | 4027 | int ret; |
3673 | 4028 | ||
3674 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4029 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
@@ -3689,59 +4044,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3689 | cond_resched(); | 4044 | cond_resched(); |
3690 | } | 4045 | } |
3691 | 4046 | ||
3692 | return ret; | 4047 | mutex_lock(&fs_info->durable_block_rsv_mutex); |
3693 | } | 4048 | list_for_each_entry_safe(block_rsv, next_rsv, |
4049 | &fs_info->durable_block_rsv_list, list) { | ||
3694 | 4050 | ||
3695 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 4051 | idx = trans->transid & 0x1; |
3696 | struct btrfs_root *root, | 4052 | if (block_rsv->freed[idx] > 0) { |
3697 | struct btrfs_path *path, | 4053 | block_rsv_add_bytes(block_rsv, |
3698 | u64 bytenr, u64 num_bytes, | 4054 | block_rsv->freed[idx], 0); |
3699 | int is_data, int reserved, | 4055 | block_rsv->freed[idx] = 0; |
3700 | struct extent_buffer **must_clean) | 4056 | } |
3701 | { | 4057 | if (atomic_read(&block_rsv->usage) == 0) { |
3702 | int err = 0; | 4058 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); |
3703 | struct extent_buffer *buf; | ||
3704 | |||
3705 | if (is_data) | ||
3706 | goto pinit; | ||
3707 | |||
3708 | /* | ||
3709 | * discard is sloooow, and so triggering discards on | ||
3710 | * individual btree blocks isn't a good plan. Just | ||
3711 | * pin everything in discard mode. | ||
3712 | */ | ||
3713 | if (btrfs_test_opt(root, DISCARD)) | ||
3714 | goto pinit; | ||
3715 | |||
3716 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | ||
3717 | if (!buf) | ||
3718 | goto pinit; | ||
3719 | 4059 | ||
3720 | /* we can reuse a block if it hasn't been written | 4060 | if (block_rsv->freed[0] == 0 && |
3721 | * and it is from this transaction. We can't | 4061 | block_rsv->freed[1] == 0) { |
3722 | * reuse anything from the tree log root because | 4062 | list_del_init(&block_rsv->list); |
3723 | * it has tiny sub-transactions. | 4063 | kfree(block_rsv); |
3724 | */ | 4064 | } |
3725 | if (btrfs_buffer_uptodate(buf, 0) && | 4065 | } else { |
3726 | btrfs_try_tree_lock(buf)) { | 4066 | btrfs_block_rsv_release(root, block_rsv, 0); |
3727 | u64 header_owner = btrfs_header_owner(buf); | ||
3728 | u64 header_transid = btrfs_header_generation(buf); | ||
3729 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | ||
3730 | header_transid == trans->transid && | ||
3731 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
3732 | *must_clean = buf; | ||
3733 | return 1; | ||
3734 | } | 4067 | } |
3735 | btrfs_tree_unlock(buf); | ||
3736 | } | 4068 | } |
3737 | free_extent_buffer(buf); | 4069 | mutex_unlock(&fs_info->durable_block_rsv_mutex); |
3738 | pinit: | ||
3739 | if (path) | ||
3740 | btrfs_set_path_blocking(path); | ||
3741 | /* unlocks the pinned mutex */ | ||
3742 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); | ||
3743 | 4070 | ||
3744 | BUG_ON(err < 0); | ||
3745 | return 0; | 4071 | return 0; |
3746 | } | 4072 | } |
3747 | 4073 | ||
@@ -3902,9 +4228,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3902 | BUG_ON(ret); | 4228 | BUG_ON(ret); |
3903 | } | 4229 | } |
3904 | } else { | 4230 | } else { |
3905 | int mark_free = 0; | ||
3906 | struct extent_buffer *must_clean = NULL; | ||
3907 | |||
3908 | if (found_extent) { | 4231 | if (found_extent) { |
3909 | BUG_ON(is_data && refs_to_drop != | 4232 | BUG_ON(is_data && refs_to_drop != |
3910 | extent_data_ref_count(root, path, iref)); | 4233 | extent_data_ref_count(root, path, iref)); |
@@ -3917,31 +4240,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3917 | } | 4240 | } |
3918 | } | 4241 | } |
3919 | 4242 | ||
3920 | ret = pin_down_bytes(trans, root, path, bytenr, | ||
3921 | num_bytes, is_data, 0, &must_clean); | ||
3922 | if (ret > 0) | ||
3923 | mark_free = 1; | ||
3924 | BUG_ON(ret < 0); | ||
3925 | /* | ||
3926 | * it is going to be very rare for someone to be waiting | ||
3927 | * on the block we're freeing. del_items might need to | ||
3928 | * schedule, so rather than get fancy, just force it | ||
3929 | * to blocking here | ||
3930 | */ | ||
3931 | if (must_clean) | ||
3932 | btrfs_set_lock_blocking(must_clean); | ||
3933 | |||
3934 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4243 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
3935 | num_to_del); | 4244 | num_to_del); |
3936 | BUG_ON(ret); | 4245 | BUG_ON(ret); |
3937 | btrfs_release_path(extent_root, path); | 4246 | btrfs_release_path(extent_root, path); |
3938 | 4247 | ||
3939 | if (must_clean) { | ||
3940 | clean_tree_block(NULL, root, must_clean); | ||
3941 | btrfs_tree_unlock(must_clean); | ||
3942 | free_extent_buffer(must_clean); | ||
3943 | } | ||
3944 | |||
3945 | if (is_data) { | 4248 | if (is_data) { |
3946 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4249 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
3947 | BUG_ON(ret); | 4250 | BUG_ON(ret); |
@@ -3951,8 +4254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3951 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | 4254 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); |
3952 | } | 4255 | } |
3953 | 4256 | ||
3954 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 4257 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
3955 | mark_free); | ||
3956 | BUG_ON(ret); | 4258 | BUG_ON(ret); |
3957 | } | 4259 | } |
3958 | btrfs_free_path(path); | 4260 | btrfs_free_path(path); |
@@ -3960,7 +4262,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3960 | } | 4262 | } |
3961 | 4263 | ||
3962 | /* | 4264 | /* |
3963 | * when we free an extent, it is possible (and likely) that we free the last | 4265 | * when we free an block, it is possible (and likely) that we free the last |
3964 | * delayed ref for that extent as well. This searches the delayed ref tree for | 4266 | * delayed ref for that extent as well. This searches the delayed ref tree for |
3965 | * a given extent, and if there are no other delayed refs to be processed, it | 4267 | * a given extent, and if there are no other delayed refs to be processed, it |
3966 | * removes it from the tree. | 4268 | * removes it from the tree. |
@@ -3972,7 +4274,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
3972 | struct btrfs_delayed_ref_root *delayed_refs; | 4274 | struct btrfs_delayed_ref_root *delayed_refs; |
3973 | struct btrfs_delayed_ref_node *ref; | 4275 | struct btrfs_delayed_ref_node *ref; |
3974 | struct rb_node *node; | 4276 | struct rb_node *node; |
3975 | int ret; | 4277 | int ret = 0; |
3976 | 4278 | ||
3977 | delayed_refs = &trans->transaction->delayed_refs; | 4279 | delayed_refs = &trans->transaction->delayed_refs; |
3978 | spin_lock(&delayed_refs->lock); | 4280 | spin_lock(&delayed_refs->lock); |
@@ -4024,17 +4326,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
4024 | list_del_init(&head->cluster); | 4326 | list_del_init(&head->cluster); |
4025 | spin_unlock(&delayed_refs->lock); | 4327 | spin_unlock(&delayed_refs->lock); |
4026 | 4328 | ||
4027 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 4329 | BUG_ON(head->extent_op); |
4028 | &head->node, head->extent_op, | 4330 | if (head->must_insert_reserved) |
4029 | head->must_insert_reserved); | 4331 | ret = 1; |
4030 | BUG_ON(ret); | 4332 | |
4333 | mutex_unlock(&head->mutex); | ||
4031 | btrfs_put_delayed_ref(&head->node); | 4334 | btrfs_put_delayed_ref(&head->node); |
4032 | return 0; | 4335 | return ret; |
4033 | out: | 4336 | out: |
4034 | spin_unlock(&delayed_refs->lock); | 4337 | spin_unlock(&delayed_refs->lock); |
4035 | return 0; | 4338 | return 0; |
4036 | } | 4339 | } |
4037 | 4340 | ||
4341 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4342 | struct btrfs_root *root, | ||
4343 | struct extent_buffer *buf, | ||
4344 | u64 parent, int last_ref) | ||
4345 | { | ||
4346 | struct btrfs_block_rsv *block_rsv; | ||
4347 | struct btrfs_block_group_cache *cache = NULL; | ||
4348 | int ret; | ||
4349 | |||
4350 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4351 | ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, | ||
4352 | parent, root->root_key.objectid, | ||
4353 | btrfs_header_level(buf), | ||
4354 | BTRFS_DROP_DELAYED_REF, NULL); | ||
4355 | BUG_ON(ret); | ||
4356 | } | ||
4357 | |||
4358 | if (!last_ref) | ||
4359 | return; | ||
4360 | |||
4361 | block_rsv = get_block_rsv(trans, root); | ||
4362 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
4363 | BUG_ON(block_rsv->space_info != cache->space_info); | ||
4364 | |||
4365 | if (btrfs_header_generation(buf) == trans->transid) { | ||
4366 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4367 | ret = check_ref_cleanup(trans, root, buf->start); | ||
4368 | if (!ret) | ||
4369 | goto pin; | ||
4370 | } | ||
4371 | |||
4372 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
4373 | pin_down_extent(root, cache, buf->start, buf->len, 1); | ||
4374 | goto pin; | ||
4375 | } | ||
4376 | |||
4377 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | ||
4378 | |||
4379 | btrfs_add_free_space(cache, buf->start, buf->len); | ||
4380 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | ||
4381 | if (ret == -EAGAIN) { | ||
4382 | /* block group became read-only */ | ||
4383 | update_reserved_bytes(cache, buf->len, 0, 1); | ||
4384 | goto out; | ||
4385 | } | ||
4386 | |||
4387 | ret = 1; | ||
4388 | spin_lock(&block_rsv->lock); | ||
4389 | if (block_rsv->reserved < block_rsv->size) { | ||
4390 | block_rsv->reserved += buf->len; | ||
4391 | ret = 0; | ||
4392 | } | ||
4393 | spin_unlock(&block_rsv->lock); | ||
4394 | |||
4395 | if (ret) { | ||
4396 | spin_lock(&cache->space_info->lock); | ||
4397 | cache->space_info->bytes_reserved -= buf->len; | ||
4398 | spin_unlock(&cache->space_info->lock); | ||
4399 | } | ||
4400 | goto out; | ||
4401 | } | ||
4402 | pin: | ||
4403 | if (block_rsv->durable && !cache->ro) { | ||
4404 | ret = 0; | ||
4405 | spin_lock(&cache->lock); | ||
4406 | if (!cache->ro) { | ||
4407 | cache->reserved_pinned += buf->len; | ||
4408 | ret = 1; | ||
4409 | } | ||
4410 | spin_unlock(&cache->lock); | ||
4411 | |||
4412 | if (ret) { | ||
4413 | spin_lock(&block_rsv->lock); | ||
4414 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
4415 | spin_unlock(&block_rsv->lock); | ||
4416 | } | ||
4417 | } | ||
4418 | out: | ||
4419 | btrfs_put_block_group(cache); | ||
4420 | } | ||
4421 | |||
4038 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 4422 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
4039 | struct btrfs_root *root, | 4423 | struct btrfs_root *root, |
4040 | u64 bytenr, u64 num_bytes, u64 parent, | 4424 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -4056,8 +4440,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4056 | parent, root_objectid, (int)owner, | 4440 | parent, root_objectid, (int)owner, |
4057 | BTRFS_DROP_DELAYED_REF, NULL); | 4441 | BTRFS_DROP_DELAYED_REF, NULL); |
4058 | BUG_ON(ret); | 4442 | BUG_ON(ret); |
4059 | ret = check_ref_cleanup(trans, root, bytenr); | ||
4060 | BUG_ON(ret); | ||
4061 | } else { | 4443 | } else { |
4062 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 4444 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, |
4063 | parent, root_objectid, owner, | 4445 | parent, root_objectid, owner, |
@@ -4067,21 +4449,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4067 | return ret; | 4449 | return ret; |
4068 | } | 4450 | } |
4069 | 4451 | ||
4070 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4071 | struct btrfs_root *root, | ||
4072 | u64 bytenr, u32 blocksize, | ||
4073 | u64 parent, u64 root_objectid, int level) | ||
4074 | { | ||
4075 | u64 used; | ||
4076 | spin_lock(&root->node_lock); | ||
4077 | used = btrfs_root_used(&root->root_item) - blocksize; | ||
4078 | btrfs_set_root_used(&root->root_item, used); | ||
4079 | spin_unlock(&root->node_lock); | ||
4080 | |||
4081 | return btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4082 | parent, root_objectid, level, 0); | ||
4083 | } | ||
4084 | |||
4085 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 4452 | static u64 stripe_align(struct btrfs_root *root, u64 val) |
4086 | { | 4453 | { |
4087 | u64 mask = ((u64)root->stripesize - 1); | 4454 | u64 mask = ((u64)root->stripesize - 1); |
@@ -4134,6 +4501,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
4134 | return 0; | 4501 | return 0; |
4135 | } | 4502 | } |
4136 | 4503 | ||
4504 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | ||
4505 | { | ||
4506 | int index; | ||
4507 | if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) | ||
4508 | index = 0; | ||
4509 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) | ||
4510 | index = 1; | ||
4511 | else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) | ||
4512 | index = 2; | ||
4513 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) | ||
4514 | index = 3; | ||
4515 | else | ||
4516 | index = 4; | ||
4517 | return index; | ||
4518 | } | ||
4519 | |||
4137 | enum btrfs_loop_type { | 4520 | enum btrfs_loop_type { |
4138 | LOOP_FIND_IDEAL = 0, | 4521 | LOOP_FIND_IDEAL = 0, |
4139 | LOOP_CACHING_NOWAIT = 1, | 4522 | LOOP_CACHING_NOWAIT = 1, |
@@ -4155,7 +4538,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4155 | u64 num_bytes, u64 empty_size, | 4538 | u64 num_bytes, u64 empty_size, |
4156 | u64 search_start, u64 search_end, | 4539 | u64 search_start, u64 search_end, |
4157 | u64 hint_byte, struct btrfs_key *ins, | 4540 | u64 hint_byte, struct btrfs_key *ins, |
4158 | u64 exclude_start, u64 exclude_nr, | ||
4159 | int data) | 4541 | int data) |
4160 | { | 4542 | { |
4161 | int ret = 0; | 4543 | int ret = 0; |
@@ -4168,6 +4550,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4168 | struct btrfs_space_info *space_info; | 4550 | struct btrfs_space_info *space_info; |
4169 | int last_ptr_loop = 0; | 4551 | int last_ptr_loop = 0; |
4170 | int loop = 0; | 4552 | int loop = 0; |
4553 | int index = 0; | ||
4171 | bool found_uncached_bg = false; | 4554 | bool found_uncached_bg = false; |
4172 | bool failed_cluster_refill = false; | 4555 | bool failed_cluster_refill = false; |
4173 | bool failed_alloc = false; | 4556 | bool failed_alloc = false; |
@@ -4237,6 +4620,7 @@ ideal_cache: | |||
4237 | btrfs_put_block_group(block_group); | 4620 | btrfs_put_block_group(block_group); |
4238 | up_read(&space_info->groups_sem); | 4621 | up_read(&space_info->groups_sem); |
4239 | } else { | 4622 | } else { |
4623 | index = get_block_group_index(block_group); | ||
4240 | goto have_block_group; | 4624 | goto have_block_group; |
4241 | } | 4625 | } |
4242 | } else if (block_group) { | 4626 | } else if (block_group) { |
@@ -4245,7 +4629,8 @@ ideal_cache: | |||
4245 | } | 4629 | } |
4246 | search: | 4630 | search: |
4247 | down_read(&space_info->groups_sem); | 4631 | down_read(&space_info->groups_sem); |
4248 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4632 | list_for_each_entry(block_group, &space_info->block_groups[index], |
4633 | list) { | ||
4249 | u64 offset; | 4634 | u64 offset; |
4250 | int cached; | 4635 | int cached; |
4251 | 4636 | ||
@@ -4436,23 +4821,22 @@ checks: | |||
4436 | goto loop; | 4821 | goto loop; |
4437 | } | 4822 | } |
4438 | 4823 | ||
4439 | if (exclude_nr > 0 && | 4824 | ins->objectid = search_start; |
4440 | (search_start + num_bytes > exclude_start && | 4825 | ins->offset = num_bytes; |
4441 | search_start < exclude_start + exclude_nr)) { | 4826 | |
4442 | search_start = exclude_start + exclude_nr; | 4827 | if (offset < search_start) |
4828 | btrfs_add_free_space(block_group, offset, | ||
4829 | search_start - offset); | ||
4830 | BUG_ON(offset > search_start); | ||
4443 | 4831 | ||
4832 | ret = update_reserved_bytes(block_group, num_bytes, 1, | ||
4833 | (data & BTRFS_BLOCK_GROUP_DATA)); | ||
4834 | if (ret == -EAGAIN) { | ||
4444 | btrfs_add_free_space(block_group, offset, num_bytes); | 4835 | btrfs_add_free_space(block_group, offset, num_bytes); |
4445 | /* | ||
4446 | * if search_start is still in this block group | ||
4447 | * then we just re-search this block group | ||
4448 | */ | ||
4449 | if (search_start >= block_group->key.objectid && | ||
4450 | search_start < (block_group->key.objectid + | ||
4451 | block_group->key.offset)) | ||
4452 | goto have_block_group; | ||
4453 | goto loop; | 4836 | goto loop; |
4454 | } | 4837 | } |
4455 | 4838 | ||
4839 | /* we are all good, lets return */ | ||
4456 | ins->objectid = search_start; | 4840 | ins->objectid = search_start; |
4457 | ins->offset = num_bytes; | 4841 | ins->offset = num_bytes; |
4458 | 4842 | ||
@@ -4460,18 +4844,18 @@ checks: | |||
4460 | btrfs_add_free_space(block_group, offset, | 4844 | btrfs_add_free_space(block_group, offset, |
4461 | search_start - offset); | 4845 | search_start - offset); |
4462 | BUG_ON(offset > search_start); | 4846 | BUG_ON(offset > search_start); |
4463 | |||
4464 | update_reserved_extents(block_group, num_bytes, 1); | ||
4465 | |||
4466 | /* we are all good, lets return */ | ||
4467 | break; | 4847 | break; |
4468 | loop: | 4848 | loop: |
4469 | failed_cluster_refill = false; | 4849 | failed_cluster_refill = false; |
4470 | failed_alloc = false; | 4850 | failed_alloc = false; |
4851 | BUG_ON(index != get_block_group_index(block_group)); | ||
4471 | btrfs_put_block_group(block_group); | 4852 | btrfs_put_block_group(block_group); |
4472 | } | 4853 | } |
4473 | up_read(&space_info->groups_sem); | 4854 | up_read(&space_info->groups_sem); |
4474 | 4855 | ||
4856 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | ||
4857 | goto search; | ||
4858 | |||
4475 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for | 4859 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
4476 | * for them to make caching progress. Also | 4860 | * for them to make caching progress. Also |
4477 | * determine the best possible bg to cache | 4861 | * determine the best possible bg to cache |
@@ -4485,6 +4869,7 @@ loop: | |||
4485 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4869 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
4486 | (found_uncached_bg || empty_size || empty_cluster || | 4870 | (found_uncached_bg || empty_size || empty_cluster || |
4487 | allowed_chunk_alloc)) { | 4871 | allowed_chunk_alloc)) { |
4872 | index = 0; | ||
4488 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 4873 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4489 | found_uncached_bg = false; | 4874 | found_uncached_bg = false; |
4490 | loop++; | 4875 | loop++; |
@@ -4567,31 +4952,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4567 | int dump_block_groups) | 4952 | int dump_block_groups) |
4568 | { | 4953 | { |
4569 | struct btrfs_block_group_cache *cache; | 4954 | struct btrfs_block_group_cache *cache; |
4955 | int index = 0; | ||
4570 | 4956 | ||
4571 | spin_lock(&info->lock); | 4957 | spin_lock(&info->lock); |
4572 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4958 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
4573 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4959 | (unsigned long long)(info->total_bytes - info->bytes_used - |
4574 | info->bytes_pinned - info->bytes_reserved - | 4960 | info->bytes_pinned - info->bytes_reserved - |
4575 | info->bytes_super), | 4961 | info->bytes_readonly), |
4576 | (info->full) ? "" : "not "); | 4962 | (info->full) ? "" : "not "); |
4577 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4963 | printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " |
4578 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" | 4964 | "reserved=%llu, may_use=%llu, readonly=%llu\n", |
4579 | "\n", | ||
4580 | (unsigned long long)info->total_bytes, | 4965 | (unsigned long long)info->total_bytes, |
4966 | (unsigned long long)info->bytes_used, | ||
4581 | (unsigned long long)info->bytes_pinned, | 4967 | (unsigned long long)info->bytes_pinned, |
4582 | (unsigned long long)info->bytes_delalloc, | 4968 | (unsigned long long)info->bytes_reserved, |
4583 | (unsigned long long)info->bytes_may_use, | 4969 | (unsigned long long)info->bytes_may_use, |
4584 | (unsigned long long)info->bytes_used, | 4970 | (unsigned long long)info->bytes_readonly); |
4585 | (unsigned long long)info->bytes_root, | ||
4586 | (unsigned long long)info->bytes_super, | ||
4587 | (unsigned long long)info->bytes_reserved); | ||
4588 | spin_unlock(&info->lock); | 4971 | spin_unlock(&info->lock); |
4589 | 4972 | ||
4590 | if (!dump_block_groups) | 4973 | if (!dump_block_groups) |
4591 | return; | 4974 | return; |
4592 | 4975 | ||
4593 | down_read(&info->groups_sem); | 4976 | down_read(&info->groups_sem); |
4594 | list_for_each_entry(cache, &info->block_groups, list) { | 4977 | again: |
4978 | list_for_each_entry(cache, &info->block_groups[index], list) { | ||
4595 | spin_lock(&cache->lock); | 4979 | spin_lock(&cache->lock); |
4596 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 4980 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
4597 | "%llu pinned %llu reserved\n", | 4981 | "%llu pinned %llu reserved\n", |
@@ -4603,6 +4987,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4603 | btrfs_dump_free_space(cache, bytes); | 4987 | btrfs_dump_free_space(cache, bytes); |
4604 | spin_unlock(&cache->lock); | 4988 | spin_unlock(&cache->lock); |
4605 | } | 4989 | } |
4990 | if (++index < BTRFS_NR_RAID_TYPES) | ||
4991 | goto again; | ||
4606 | up_read(&info->groups_sem); | 4992 | up_read(&info->groups_sem); |
4607 | } | 4993 | } |
4608 | 4994 | ||
@@ -4628,9 +5014,8 @@ again: | |||
4628 | 5014 | ||
4629 | WARN_ON(num_bytes < root->sectorsize); | 5015 | WARN_ON(num_bytes < root->sectorsize); |
4630 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5016 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
4631 | search_start, search_end, hint_byte, ins, | 5017 | search_start, search_end, hint_byte, |
4632 | trans->alloc_exclude_start, | 5018 | ins, data); |
4633 | trans->alloc_exclude_nr, data); | ||
4634 | 5019 | ||
4635 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 5020 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { |
4636 | num_bytes = num_bytes >> 1; | 5021 | num_bytes = num_bytes >> 1; |
@@ -4668,7 +5053,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4668 | ret = btrfs_discard_extent(root, start, len); | 5053 | ret = btrfs_discard_extent(root, start, len); |
4669 | 5054 | ||
4670 | btrfs_add_free_space(cache, start, len); | 5055 | btrfs_add_free_space(cache, start, len); |
4671 | update_reserved_extents(cache, len, 0); | 5056 | update_reserved_bytes(cache, len, 0, 1); |
4672 | btrfs_put_block_group(cache); | 5057 | btrfs_put_block_group(cache); |
4673 | 5058 | ||
4674 | return ret; | 5059 | return ret; |
@@ -4731,8 +5116,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
4731 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5116 | btrfs_mark_buffer_dirty(path->nodes[0]); |
4732 | btrfs_free_path(path); | 5117 | btrfs_free_path(path); |
4733 | 5118 | ||
4734 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5119 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4735 | 1, 0); | ||
4736 | if (ret) { | 5120 | if (ret) { |
4737 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5121 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4738 | "%llu\n", (unsigned long long)ins->objectid, | 5122 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4792,8 +5176,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
4792 | btrfs_mark_buffer_dirty(leaf); | 5176 | btrfs_mark_buffer_dirty(leaf); |
4793 | btrfs_free_path(path); | 5177 | btrfs_free_path(path); |
4794 | 5178 | ||
4795 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5179 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4796 | 1, 0); | ||
4797 | if (ret) { | 5180 | if (ret) { |
4798 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5181 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4799 | "%llu\n", (unsigned long long)ins->objectid, | 5182 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4869,73 +5252,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4869 | put_caching_control(caching_ctl); | 5252 | put_caching_control(caching_ctl); |
4870 | } | 5253 | } |
4871 | 5254 | ||
4872 | update_reserved_extents(block_group, ins->offset, 1); | 5255 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); |
5256 | BUG_ON(ret); | ||
4873 | btrfs_put_block_group(block_group); | 5257 | btrfs_put_block_group(block_group); |
4874 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5258 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4875 | 0, owner, offset, ins, 1); | 5259 | 0, owner, offset, ins, 1); |
4876 | return ret; | 5260 | return ret; |
4877 | } | 5261 | } |
4878 | 5262 | ||
4879 | /* | ||
4880 | * finds a free extent and does all the dirty work required for allocation | ||
4881 | * returns the key for the extent through ins, and a tree buffer for | ||
4882 | * the first block of the extent through buf. | ||
4883 | * | ||
4884 | * returns 0 if everything worked, non-zero otherwise. | ||
4885 | */ | ||
4886 | static int alloc_tree_block(struct btrfs_trans_handle *trans, | ||
4887 | struct btrfs_root *root, | ||
4888 | u64 num_bytes, u64 parent, u64 root_objectid, | ||
4889 | struct btrfs_disk_key *key, int level, | ||
4890 | u64 empty_size, u64 hint_byte, u64 search_end, | ||
4891 | struct btrfs_key *ins) | ||
4892 | { | ||
4893 | int ret; | ||
4894 | u64 flags = 0; | ||
4895 | |||
4896 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | ||
4897 | empty_size, hint_byte, search_end, | ||
4898 | ins, 0); | ||
4899 | if (ret) | ||
4900 | return ret; | ||
4901 | |||
4902 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
4903 | if (parent == 0) | ||
4904 | parent = ins->objectid; | ||
4905 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
4906 | } else | ||
4907 | BUG_ON(parent > 0); | ||
4908 | |||
4909 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4910 | struct btrfs_delayed_extent_op *extent_op; | ||
4911 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
4912 | BUG_ON(!extent_op); | ||
4913 | if (key) | ||
4914 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
4915 | else | ||
4916 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
4917 | extent_op->flags_to_set = flags; | ||
4918 | extent_op->update_key = 1; | ||
4919 | extent_op->update_flags = 1; | ||
4920 | extent_op->is_data = 0; | ||
4921 | |||
4922 | ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, | ||
4923 | ins->offset, parent, root_objectid, | ||
4924 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
4925 | extent_op); | ||
4926 | BUG_ON(ret); | ||
4927 | } | ||
4928 | |||
4929 | if (root_objectid == root->root_key.objectid) { | ||
4930 | u64 used; | ||
4931 | spin_lock(&root->node_lock); | ||
4932 | used = btrfs_root_used(&root->root_item) + num_bytes; | ||
4933 | btrfs_set_root_used(&root->root_item, used); | ||
4934 | spin_unlock(&root->node_lock); | ||
4935 | } | ||
4936 | return ret; | ||
4937 | } | ||
4938 | |||
4939 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 5263 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
4940 | struct btrfs_root *root, | 5264 | struct btrfs_root *root, |
4941 | u64 bytenr, u32 blocksize, | 5265 | u64 bytenr, u32 blocksize, |
@@ -4974,8 +5298,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
4974 | return buf; | 5298 | return buf; |
4975 | } | 5299 | } |
4976 | 5300 | ||
5301 | static struct btrfs_block_rsv * | ||
5302 | use_block_rsv(struct btrfs_trans_handle *trans, | ||
5303 | struct btrfs_root *root, u32 blocksize) | ||
5304 | { | ||
5305 | struct btrfs_block_rsv *block_rsv; | ||
5306 | int ret; | ||
5307 | |||
5308 | block_rsv = get_block_rsv(trans, root); | ||
5309 | |||
5310 | if (block_rsv->size == 0) { | ||
5311 | ret = reserve_metadata_bytes(block_rsv, blocksize); | ||
5312 | if (ret) | ||
5313 | return ERR_PTR(ret); | ||
5314 | return block_rsv; | ||
5315 | } | ||
5316 | |||
5317 | ret = block_rsv_use_bytes(block_rsv, blocksize); | ||
5318 | if (!ret) | ||
5319 | return block_rsv; | ||
5320 | |||
5321 | WARN_ON(1); | ||
5322 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
5323 | block_rsv->size, block_rsv->reserved, | ||
5324 | block_rsv->freed[0], block_rsv->freed[1]); | ||
5325 | |||
5326 | return ERR_PTR(-ENOSPC); | ||
5327 | } | ||
5328 | |||
5329 | static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
5330 | { | ||
5331 | block_rsv_add_bytes(block_rsv, blocksize, 0); | ||
5332 | block_rsv_release_bytes(block_rsv, NULL, 0); | ||
5333 | } | ||
5334 | |||
4977 | /* | 5335 | /* |
4978 | * helper function to allocate a block for a given tree | 5336 | * finds a free extent and does all the dirty work required for allocation |
5337 | * returns the key for the extent through ins, and a tree buffer for | ||
5338 | * the first block of the extent through buf. | ||
5339 | * | ||
4979 | * returns the tree buffer or NULL. | 5340 | * returns the tree buffer or NULL. |
4980 | */ | 5341 | */ |
4981 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 5342 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
@@ -4985,18 +5346,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4985 | u64 hint, u64 empty_size) | 5346 | u64 hint, u64 empty_size) |
4986 | { | 5347 | { |
4987 | struct btrfs_key ins; | 5348 | struct btrfs_key ins; |
4988 | int ret; | 5349 | struct btrfs_block_rsv *block_rsv; |
4989 | struct extent_buffer *buf; | 5350 | struct extent_buffer *buf; |
5351 | u64 flags = 0; | ||
5352 | int ret; | ||
5353 | |||
4990 | 5354 | ||
4991 | ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, | 5355 | block_rsv = use_block_rsv(trans, root, blocksize); |
4992 | key, level, empty_size, hint, (u64)-1, &ins); | 5356 | if (IS_ERR(block_rsv)) |
5357 | return ERR_CAST(block_rsv); | ||
5358 | |||
5359 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, | ||
5360 | empty_size, hint, (u64)-1, &ins, 0); | ||
4993 | if (ret) { | 5361 | if (ret) { |
4994 | BUG_ON(ret > 0); | 5362 | unuse_block_rsv(block_rsv, blocksize); |
4995 | return ERR_PTR(ret); | 5363 | return ERR_PTR(ret); |
4996 | } | 5364 | } |
4997 | 5365 | ||
4998 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, | 5366 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
4999 | blocksize, level); | 5367 | blocksize, level); |
5368 | BUG_ON(IS_ERR(buf)); | ||
5369 | |||
5370 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
5371 | if (parent == 0) | ||
5372 | parent = ins.objectid; | ||
5373 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
5374 | } else | ||
5375 | BUG_ON(parent > 0); | ||
5376 | |||
5377 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
5378 | struct btrfs_delayed_extent_op *extent_op; | ||
5379 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
5380 | BUG_ON(!extent_op); | ||
5381 | if (key) | ||
5382 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
5383 | else | ||
5384 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
5385 | extent_op->flags_to_set = flags; | ||
5386 | extent_op->update_key = 1; | ||
5387 | extent_op->update_flags = 1; | ||
5388 | extent_op->is_data = 0; | ||
5389 | |||
5390 | ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, | ||
5391 | ins.offset, parent, root_objectid, | ||
5392 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
5393 | extent_op); | ||
5394 | BUG_ON(ret); | ||
5395 | } | ||
5000 | return buf; | 5396 | return buf; |
5001 | } | 5397 | } |
5002 | 5398 | ||
@@ -5321,7 +5717,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5321 | struct btrfs_path *path, | 5717 | struct btrfs_path *path, |
5322 | struct walk_control *wc) | 5718 | struct walk_control *wc) |
5323 | { | 5719 | { |
5324 | int ret = 0; | 5720 | int ret; |
5325 | int level = wc->level; | 5721 | int level = wc->level; |
5326 | struct extent_buffer *eb = path->nodes[level]; | 5722 | struct extent_buffer *eb = path->nodes[level]; |
5327 | u64 parent = 0; | 5723 | u64 parent = 0; |
@@ -5399,13 +5795,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5399 | btrfs_header_owner(path->nodes[level + 1])); | 5795 | btrfs_header_owner(path->nodes[level + 1])); |
5400 | } | 5796 | } |
5401 | 5797 | ||
5402 | ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, | 5798 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
5403 | root->root_key.objectid, level, 0); | ||
5404 | BUG_ON(ret); | ||
5405 | out: | 5799 | out: |
5406 | wc->refs[level] = 0; | 5800 | wc->refs[level] = 0; |
5407 | wc->flags[level] = 0; | 5801 | wc->flags[level] = 0; |
5408 | return ret; | 5802 | return 0; |
5409 | } | 5803 | } |
5410 | 5804 | ||
5411 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | 5805 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, |
@@ -5483,7 +5877,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
5483 | * also make sure backrefs for the shared block and all lower level | 5877 | * also make sure backrefs for the shared block and all lower level |
5484 | * blocks are properly updated. | 5878 | * blocks are properly updated. |
5485 | */ | 5879 | */ |
5486 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | 5880 | int btrfs_drop_snapshot(struct btrfs_root *root, |
5881 | struct btrfs_block_rsv *block_rsv, int update_ref) | ||
5487 | { | 5882 | { |
5488 | struct btrfs_path *path; | 5883 | struct btrfs_path *path; |
5489 | struct btrfs_trans_handle *trans; | 5884 | struct btrfs_trans_handle *trans; |
@@ -5501,7 +5896,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5501 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 5896 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
5502 | BUG_ON(!wc); | 5897 | BUG_ON(!wc); |
5503 | 5898 | ||
5504 | trans = btrfs_start_transaction(tree_root, 1); | 5899 | trans = btrfs_start_transaction(tree_root, 0); |
5900 | if (block_rsv) | ||
5901 | trans->block_rsv = block_rsv; | ||
5505 | 5902 | ||
5506 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 5903 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
5507 | level = btrfs_header_level(root->node); | 5904 | level = btrfs_header_level(root->node); |
@@ -5589,22 +5986,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5589 | } | 5986 | } |
5590 | 5987 | ||
5591 | BUG_ON(wc->level == 0); | 5988 | BUG_ON(wc->level == 0); |
5592 | if (trans->transaction->in_commit || | 5989 | if (btrfs_should_end_transaction(trans, tree_root)) { |
5593 | trans->transaction->delayed_refs.flushing) { | ||
5594 | ret = btrfs_update_root(trans, tree_root, | 5990 | ret = btrfs_update_root(trans, tree_root, |
5595 | &root->root_key, | 5991 | &root->root_key, |
5596 | root_item); | 5992 | root_item); |
5597 | BUG_ON(ret); | 5993 | BUG_ON(ret); |
5598 | 5994 | ||
5599 | btrfs_end_transaction(trans, tree_root); | 5995 | btrfs_end_transaction_throttle(trans, tree_root); |
5600 | trans = btrfs_start_transaction(tree_root, 1); | 5996 | trans = btrfs_start_transaction(tree_root, 0); |
5601 | } else { | 5997 | if (block_rsv) |
5602 | unsigned long update; | 5998 | trans->block_rsv = block_rsv; |
5603 | update = trans->delayed_ref_updates; | ||
5604 | trans->delayed_ref_updates = 0; | ||
5605 | if (update) | ||
5606 | btrfs_run_delayed_refs(trans, tree_root, | ||
5607 | update); | ||
5608 | } | 5999 | } |
5609 | } | 6000 | } |
5610 | btrfs_release_path(root, path); | 6001 | btrfs_release_path(root, path); |
@@ -5632,7 +6023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5632 | kfree(root); | 6023 | kfree(root); |
5633 | } | 6024 | } |
5634 | out: | 6025 | out: |
5635 | btrfs_end_transaction(trans, tree_root); | 6026 | btrfs_end_transaction_throttle(trans, tree_root); |
5636 | kfree(wc); | 6027 | kfree(wc); |
5637 | btrfs_free_path(path); | 6028 | btrfs_free_path(path); |
5638 | return err; | 6029 | return err; |
@@ -7228,48 +7619,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7228 | return flags; | 7619 | return flags; |
7229 | } | 7620 | } |
7230 | 7621 | ||
7231 | static int __alloc_chunk_for_shrink(struct btrfs_root *root, | 7622 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) |
7232 | struct btrfs_block_group_cache *shrink_block_group, | ||
7233 | int force) | ||
7234 | { | 7623 | { |
7235 | struct btrfs_trans_handle *trans; | 7624 | struct btrfs_space_info *sinfo = cache->space_info; |
7236 | u64 new_alloc_flags; | 7625 | u64 num_bytes; |
7237 | u64 calc; | 7626 | int ret = -ENOSPC; |
7238 | 7627 | ||
7239 | spin_lock(&shrink_block_group->lock); | 7628 | if (cache->ro) |
7240 | if (btrfs_block_group_used(&shrink_block_group->item) + | 7629 | return 0; |
7241 | shrink_block_group->reserved > 0) { | ||
7242 | spin_unlock(&shrink_block_group->lock); | ||
7243 | 7630 | ||
7244 | trans = btrfs_start_transaction(root, 1); | 7631 | spin_lock(&sinfo->lock); |
7245 | spin_lock(&shrink_block_group->lock); | 7632 | spin_lock(&cache->lock); |
7633 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
7634 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
7635 | |||
7636 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | ||
7637 | sinfo->bytes_may_use + sinfo->bytes_readonly + | ||
7638 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | ||
7639 | sinfo->bytes_readonly += num_bytes; | ||
7640 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
7641 | cache->reserved_pinned = 0; | ||
7642 | cache->ro = 1; | ||
7643 | ret = 0; | ||
7644 | } | ||
7645 | spin_unlock(&cache->lock); | ||
7646 | spin_unlock(&sinfo->lock); | ||
7647 | return ret; | ||
7648 | } | ||
7246 | 7649 | ||
7247 | new_alloc_flags = update_block_group_flags(root, | 7650 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
7248 | shrink_block_group->flags); | 7651 | struct btrfs_block_group_cache *cache) |
7249 | if (new_alloc_flags != shrink_block_group->flags) { | ||
7250 | calc = | ||
7251 | btrfs_block_group_used(&shrink_block_group->item); | ||
7252 | } else { | ||
7253 | calc = shrink_block_group->key.offset; | ||
7254 | } | ||
7255 | spin_unlock(&shrink_block_group->lock); | ||
7256 | 7652 | ||
7257 | do_chunk_alloc(trans, root->fs_info->extent_root, | 7653 | { |
7258 | calc + 2 * 1024 * 1024, new_alloc_flags, force); | 7654 | struct btrfs_trans_handle *trans; |
7655 | u64 alloc_flags; | ||
7656 | int ret; | ||
7259 | 7657 | ||
7260 | btrfs_end_transaction(trans, root); | 7658 | BUG_ON(cache->ro); |
7261 | } else | 7659 | |
7262 | spin_unlock(&shrink_block_group->lock); | 7660 | trans = btrfs_join_transaction(root, 1); |
7263 | return 0; | 7661 | BUG_ON(IS_ERR(trans)); |
7264 | } | ||
7265 | 7662 | ||
7663 | alloc_flags = update_block_group_flags(root, cache->flags); | ||
7664 | if (alloc_flags != cache->flags) | ||
7665 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
7266 | 7666 | ||
7267 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | 7667 | ret = set_block_group_ro(cache); |
7268 | struct btrfs_block_group_cache *group) | 7668 | if (!ret) |
7669 | goto out; | ||
7670 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | ||
7671 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
7672 | if (ret < 0) | ||
7673 | goto out; | ||
7674 | ret = set_block_group_ro(cache); | ||
7675 | out: | ||
7676 | btrfs_end_transaction(trans, root); | ||
7677 | return ret; | ||
7678 | } | ||
7269 | 7679 | ||
7680 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
7681 | struct btrfs_block_group_cache *cache) | ||
7270 | { | 7682 | { |
7271 | __alloc_chunk_for_shrink(root, group, 1); | 7683 | struct btrfs_space_info *sinfo = cache->space_info; |
7272 | set_block_group_readonly(group); | 7684 | u64 num_bytes; |
7685 | |||
7686 | BUG_ON(!cache->ro); | ||
7687 | |||
7688 | spin_lock(&sinfo->lock); | ||
7689 | spin_lock(&cache->lock); | ||
7690 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
7691 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
7692 | sinfo->bytes_readonly -= num_bytes; | ||
7693 | cache->ro = 0; | ||
7694 | spin_unlock(&cache->lock); | ||
7695 | spin_unlock(&sinfo->lock); | ||
7273 | return 0; | 7696 | return 0; |
7274 | } | 7697 | } |
7275 | 7698 | ||
@@ -7436,17 +7859,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7436 | */ | 7859 | */ |
7437 | synchronize_rcu(); | 7860 | synchronize_rcu(); |
7438 | 7861 | ||
7862 | release_global_block_rsv(info); | ||
7863 | |||
7439 | while(!list_empty(&info->space_info)) { | 7864 | while(!list_empty(&info->space_info)) { |
7440 | space_info = list_entry(info->space_info.next, | 7865 | space_info = list_entry(info->space_info.next, |
7441 | struct btrfs_space_info, | 7866 | struct btrfs_space_info, |
7442 | list); | 7867 | list); |
7443 | 7868 | if (space_info->bytes_pinned > 0 || | |
7869 | space_info->bytes_reserved > 0) { | ||
7870 | WARN_ON(1); | ||
7871 | dump_space_info(space_info, 0, 0); | ||
7872 | } | ||
7444 | list_del(&space_info->list); | 7873 | list_del(&space_info->list); |
7445 | kfree(space_info); | 7874 | kfree(space_info); |
7446 | } | 7875 | } |
7447 | return 0; | 7876 | return 0; |
7448 | } | 7877 | } |
7449 | 7878 | ||
7879 | static void __link_block_group(struct btrfs_space_info *space_info, | ||
7880 | struct btrfs_block_group_cache *cache) | ||
7881 | { | ||
7882 | int index = get_block_group_index(cache); | ||
7883 | |||
7884 | down_write(&space_info->groups_sem); | ||
7885 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
7886 | up_write(&space_info->groups_sem); | ||
7887 | } | ||
7888 | |||
7450 | int btrfs_read_block_groups(struct btrfs_root *root) | 7889 | int btrfs_read_block_groups(struct btrfs_root *root) |
7451 | { | 7890 | { |
7452 | struct btrfs_path *path; | 7891 | struct btrfs_path *path; |
@@ -7468,10 +7907,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7468 | 7907 | ||
7469 | while (1) { | 7908 | while (1) { |
7470 | ret = find_first_block_group(root, path, &key); | 7909 | ret = find_first_block_group(root, path, &key); |
7471 | if (ret > 0) { | 7910 | if (ret > 0) |
7472 | ret = 0; | 7911 | break; |
7473 | goto error; | ||
7474 | } | ||
7475 | if (ret != 0) | 7912 | if (ret != 0) |
7476 | goto error; | 7913 | goto error; |
7477 | 7914 | ||
@@ -7480,7 +7917,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7480 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 7917 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
7481 | if (!cache) { | 7918 | if (!cache) { |
7482 | ret = -ENOMEM; | 7919 | ret = -ENOMEM; |
7483 | break; | 7920 | goto error; |
7484 | } | 7921 | } |
7485 | 7922 | ||
7486 | atomic_set(&cache->count, 1); | 7923 | atomic_set(&cache->count, 1); |
@@ -7537,20 +7974,36 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7537 | BUG_ON(ret); | 7974 | BUG_ON(ret); |
7538 | cache->space_info = space_info; | 7975 | cache->space_info = space_info; |
7539 | spin_lock(&cache->space_info->lock); | 7976 | spin_lock(&cache->space_info->lock); |
7540 | cache->space_info->bytes_super += cache->bytes_super; | 7977 | cache->space_info->bytes_readonly += cache->bytes_super; |
7541 | spin_unlock(&cache->space_info->lock); | 7978 | spin_unlock(&cache->space_info->lock); |
7542 | 7979 | ||
7543 | down_write(&space_info->groups_sem); | 7980 | __link_block_group(space_info, cache); |
7544 | list_add_tail(&cache->list, &space_info->block_groups); | ||
7545 | up_write(&space_info->groups_sem); | ||
7546 | 7981 | ||
7547 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7982 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7548 | BUG_ON(ret); | 7983 | BUG_ON(ret); |
7549 | 7984 | ||
7550 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7985 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7551 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7986 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7552 | set_block_group_readonly(cache); | 7987 | set_block_group_ro(cache); |
7553 | } | 7988 | } |
7989 | |||
7990 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | ||
7991 | if (!(get_alloc_profile(root, space_info->flags) & | ||
7992 | (BTRFS_BLOCK_GROUP_RAID10 | | ||
7993 | BTRFS_BLOCK_GROUP_RAID1 | | ||
7994 | BTRFS_BLOCK_GROUP_DUP))) | ||
7995 | continue; | ||
7996 | /* | ||
7997 | * avoid allocating from un-mirrored block group if there are | ||
7998 | * mirrored block groups. | ||
7999 | */ | ||
8000 | list_for_each_entry(cache, &space_info->block_groups[3], list) | ||
8001 | set_block_group_ro(cache); | ||
8002 | list_for_each_entry(cache, &space_info->block_groups[4], list) | ||
8003 | set_block_group_ro(cache); | ||
8004 | } | ||
8005 | |||
8006 | init_global_block_rsv(info); | ||
7554 | ret = 0; | 8007 | ret = 0; |
7555 | error: | 8008 | error: |
7556 | btrfs_free_path(path); | 8009 | btrfs_free_path(path); |
@@ -7611,12 +8064,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7611 | BUG_ON(ret); | 8064 | BUG_ON(ret); |
7612 | 8065 | ||
7613 | spin_lock(&cache->space_info->lock); | 8066 | spin_lock(&cache->space_info->lock); |
7614 | cache->space_info->bytes_super += cache->bytes_super; | 8067 | cache->space_info->bytes_readonly += cache->bytes_super; |
7615 | spin_unlock(&cache->space_info->lock); | 8068 | spin_unlock(&cache->space_info->lock); |
7616 | 8069 | ||
7617 | down_write(&cache->space_info->groups_sem); | 8070 | __link_block_group(cache->space_info, cache); |
7618 | list_add_tail(&cache->list, &cache->space_info->block_groups); | ||
7619 | up_write(&cache->space_info->groups_sem); | ||
7620 | 8071 | ||
7621 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 8072 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7622 | BUG_ON(ret); | 8073 | BUG_ON(ret); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d2d03684fab2..a4080c21ec55 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
135 | return state; | 135 | return state; |
136 | } | 136 | } |
137 | 137 | ||
138 | static void free_extent_state(struct extent_state *state) | 138 | void free_extent_state(struct extent_state *state) |
139 | { | 139 | { |
140 | if (!state) | 140 | if (!state) |
141 | return; | 141 | return; |
@@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree, | |||
335 | } | 335 | } |
336 | 336 | ||
337 | static int set_state_cb(struct extent_io_tree *tree, | 337 | static int set_state_cb(struct extent_io_tree *tree, |
338 | struct extent_state *state, | 338 | struct extent_state *state, int *bits) |
339 | unsigned long bits) | ||
340 | { | 339 | { |
341 | if (tree->ops && tree->ops->set_bit_hook) { | 340 | if (tree->ops && tree->ops->set_bit_hook) { |
342 | return tree->ops->set_bit_hook(tree->mapping->host, | 341 | return tree->ops->set_bit_hook(tree->mapping->host, |
343 | state->start, state->end, | 342 | state, bits); |
344 | state->state, bits); | ||
345 | } | 343 | } |
346 | 344 | ||
347 | return 0; | 345 | return 0; |
348 | } | 346 | } |
349 | 347 | ||
350 | static void clear_state_cb(struct extent_io_tree *tree, | 348 | static void clear_state_cb(struct extent_io_tree *tree, |
351 | struct extent_state *state, | 349 | struct extent_state *state, int *bits) |
352 | unsigned long bits) | ||
353 | { | 350 | { |
354 | if (tree->ops && tree->ops->clear_bit_hook) | 351 | if (tree->ops && tree->ops->clear_bit_hook) |
355 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 352 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
@@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
367 | */ | 364 | */ |
368 | static int insert_state(struct extent_io_tree *tree, | 365 | static int insert_state(struct extent_io_tree *tree, |
369 | struct extent_state *state, u64 start, u64 end, | 366 | struct extent_state *state, u64 start, u64 end, |
370 | int bits) | 367 | int *bits) |
371 | { | 368 | { |
372 | struct rb_node *node; | 369 | struct rb_node *node; |
370 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
373 | int ret; | 371 | int ret; |
374 | 372 | ||
375 | if (end < start) { | 373 | if (end < start) { |
@@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
384 | if (ret) | 382 | if (ret) |
385 | return ret; | 383 | return ret; |
386 | 384 | ||
387 | if (bits & EXTENT_DIRTY) | 385 | if (bits_to_set & EXTENT_DIRTY) |
388 | tree->dirty_bytes += end - start + 1; | 386 | tree->dirty_bytes += end - start + 1; |
389 | state->state |= bits; | 387 | state->state |= bits_to_set; |
390 | node = tree_insert(&tree->state, end, &state->rb_node); | 388 | node = tree_insert(&tree->state, end, &state->rb_node); |
391 | if (node) { | 389 | if (node) { |
392 | struct extent_state *found; | 390 | struct extent_state *found; |
@@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
456 | * struct is freed and removed from the tree | 454 | * struct is freed and removed from the tree |
457 | */ | 455 | */ |
458 | static int clear_state_bit(struct extent_io_tree *tree, | 456 | static int clear_state_bit(struct extent_io_tree *tree, |
459 | struct extent_state *state, int bits, int wake, | 457 | struct extent_state *state, |
460 | int delete) | 458 | int *bits, int wake) |
461 | { | 459 | { |
462 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; | 460 | int bits_to_clear = *bits & ~EXTENT_CTLBITS; |
463 | int ret = state->state & bits_to_clear; | 461 | int ret = state->state & bits_to_clear; |
464 | 462 | ||
465 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 463 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
466 | u64 range = state->end - state->start + 1; | 464 | u64 range = state->end - state->start + 1; |
467 | WARN_ON(range > tree->dirty_bytes); | 465 | WARN_ON(range > tree->dirty_bytes); |
468 | tree->dirty_bytes -= range; | 466 | tree->dirty_bytes -= range; |
@@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
471 | state->state &= ~bits_to_clear; | 469 | state->state &= ~bits_to_clear; |
472 | if (wake) | 470 | if (wake) |
473 | wake_up(&state->wq); | 471 | wake_up(&state->wq); |
474 | if (delete || state->state == 0) { | 472 | if (state->state == 0) { |
475 | if (state->tree) { | 473 | if (state->tree) { |
476 | clear_state_cb(tree, state, state->state); | ||
477 | rb_erase(&state->rb_node, &tree->state); | 474 | rb_erase(&state->rb_node, &tree->state); |
478 | state->tree = NULL; | 475 | state->tree = NULL; |
479 | free_extent_state(state); | 476 | free_extent_state(state); |
@@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
514 | int set = 0; | 511 | int set = 0; |
515 | int clear = 0; | 512 | int clear = 0; |
516 | 513 | ||
514 | if (delete) | ||
515 | bits |= ~EXTENT_CTLBITS; | ||
516 | bits |= EXTENT_FIRST_DELALLOC; | ||
517 | |||
517 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 518 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
518 | clear = 1; | 519 | clear = 1; |
519 | again: | 520 | again: |
@@ -580,8 +581,7 @@ hit_next: | |||
580 | if (err) | 581 | if (err) |
581 | goto out; | 582 | goto out; |
582 | if (state->end <= end) { | 583 | if (state->end <= end) { |
583 | set |= clear_state_bit(tree, state, bits, wake, | 584 | set |= clear_state_bit(tree, state, &bits, wake); |
584 | delete); | ||
585 | if (last_end == (u64)-1) | 585 | if (last_end == (u64)-1) |
586 | goto out; | 586 | goto out; |
587 | start = last_end + 1; | 587 | start = last_end + 1; |
@@ -602,7 +602,7 @@ hit_next: | |||
602 | if (wake) | 602 | if (wake) |
603 | wake_up(&state->wq); | 603 | wake_up(&state->wq); |
604 | 604 | ||
605 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); | 605 | set |= clear_state_bit(tree, prealloc, &bits, wake); |
606 | 606 | ||
607 | prealloc = NULL; | 607 | prealloc = NULL; |
608 | goto out; | 608 | goto out; |
@@ -613,7 +613,7 @@ hit_next: | |||
613 | else | 613 | else |
614 | next_node = NULL; | 614 | next_node = NULL; |
615 | 615 | ||
616 | set |= clear_state_bit(tree, state, bits, wake, delete); | 616 | set |= clear_state_bit(tree, state, &bits, wake); |
617 | if (last_end == (u64)-1) | 617 | if (last_end == (u64)-1) |
618 | goto out; | 618 | goto out; |
619 | start = last_end + 1; | 619 | start = last_end + 1; |
@@ -706,19 +706,19 @@ out: | |||
706 | 706 | ||
707 | static int set_state_bits(struct extent_io_tree *tree, | 707 | static int set_state_bits(struct extent_io_tree *tree, |
708 | struct extent_state *state, | 708 | struct extent_state *state, |
709 | int bits) | 709 | int *bits) |
710 | { | 710 | { |
711 | int ret; | 711 | int ret; |
712 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
712 | 713 | ||
713 | ret = set_state_cb(tree, state, bits); | 714 | ret = set_state_cb(tree, state, bits); |
714 | if (ret) | 715 | if (ret) |
715 | return ret; | 716 | return ret; |
716 | 717 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | |
717 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | ||
718 | u64 range = state->end - state->start + 1; | 718 | u64 range = state->end - state->start + 1; |
719 | tree->dirty_bytes += range; | 719 | tree->dirty_bytes += range; |
720 | } | 720 | } |
721 | state->state |= bits; | 721 | state->state |= bits_to_set; |
722 | 722 | ||
723 | return 0; | 723 | return 0; |
724 | } | 724 | } |
@@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state, | |||
745 | * [start, end] is inclusive This takes the tree lock. | 745 | * [start, end] is inclusive This takes the tree lock. |
746 | */ | 746 | */ |
747 | 747 | ||
748 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 748 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
749 | int bits, int exclusive_bits, u64 *failed_start, | 749 | int bits, int exclusive_bits, u64 *failed_start, |
750 | struct extent_state **cached_state, | 750 | struct extent_state **cached_state, gfp_t mask) |
751 | gfp_t mask) | ||
752 | { | 751 | { |
753 | struct extent_state *state; | 752 | struct extent_state *state; |
754 | struct extent_state *prealloc = NULL; | 753 | struct extent_state *prealloc = NULL; |
@@ -757,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
757 | u64 last_start; | 756 | u64 last_start; |
758 | u64 last_end; | 757 | u64 last_end; |
759 | 758 | ||
759 | bits |= EXTENT_FIRST_DELALLOC; | ||
760 | again: | 760 | again: |
761 | if (!prealloc && (mask & __GFP_WAIT)) { | 761 | if (!prealloc && (mask & __GFP_WAIT)) { |
762 | prealloc = alloc_extent_state(mask); | 762 | prealloc = alloc_extent_state(mask); |
@@ -778,7 +778,7 @@ again: | |||
778 | */ | 778 | */ |
779 | node = tree_search(tree, start); | 779 | node = tree_search(tree, start); |
780 | if (!node) { | 780 | if (!node) { |
781 | err = insert_state(tree, prealloc, start, end, bits); | 781 | err = insert_state(tree, prealloc, start, end, &bits); |
782 | prealloc = NULL; | 782 | prealloc = NULL; |
783 | BUG_ON(err == -EEXIST); | 783 | BUG_ON(err == -EEXIST); |
784 | goto out; | 784 | goto out; |
@@ -802,7 +802,7 @@ hit_next: | |||
802 | goto out; | 802 | goto out; |
803 | } | 803 | } |
804 | 804 | ||
805 | err = set_state_bits(tree, state, bits); | 805 | err = set_state_bits(tree, state, &bits); |
806 | if (err) | 806 | if (err) |
807 | goto out; | 807 | goto out; |
808 | 808 | ||
@@ -852,7 +852,7 @@ hit_next: | |||
852 | if (err) | 852 | if (err) |
853 | goto out; | 853 | goto out; |
854 | if (state->end <= end) { | 854 | if (state->end <= end) { |
855 | err = set_state_bits(tree, state, bits); | 855 | err = set_state_bits(tree, state, &bits); |
856 | if (err) | 856 | if (err) |
857 | goto out; | 857 | goto out; |
858 | cache_state(state, cached_state); | 858 | cache_state(state, cached_state); |
@@ -877,7 +877,7 @@ hit_next: | |||
877 | else | 877 | else |
878 | this_end = last_start - 1; | 878 | this_end = last_start - 1; |
879 | err = insert_state(tree, prealloc, start, this_end, | 879 | err = insert_state(tree, prealloc, start, this_end, |
880 | bits); | 880 | &bits); |
881 | BUG_ON(err == -EEXIST); | 881 | BUG_ON(err == -EEXIST); |
882 | if (err) { | 882 | if (err) { |
883 | prealloc = NULL; | 883 | prealloc = NULL; |
@@ -903,7 +903,7 @@ hit_next: | |||
903 | err = split_state(tree, state, prealloc, end + 1); | 903 | err = split_state(tree, state, prealloc, end + 1); |
904 | BUG_ON(err == -EEXIST); | 904 | BUG_ON(err == -EEXIST); |
905 | 905 | ||
906 | err = set_state_bits(tree, prealloc, bits); | 906 | err = set_state_bits(tree, prealloc, &bits); |
907 | if (err) { | 907 | if (err) { |
908 | prealloc = NULL; | 908 | prealloc = NULL; |
909 | goto out; | 909 | goto out; |
@@ -966,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
966 | { | 966 | { |
967 | return clear_extent_bit(tree, start, end, | 967 | return clear_extent_bit(tree, start, end, |
968 | EXTENT_DIRTY | EXTENT_DELALLOC | | 968 | EXTENT_DIRTY | EXTENT_DELALLOC | |
969 | EXTENT_DO_ACCOUNTING, 0, 0, | 969 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); |
970 | NULL, mask); | ||
971 | } | 970 | } |
972 | 971 | ||
973 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -1435,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1435 | if (op & EXTENT_CLEAR_DELALLOC) | 1434 | if (op & EXTENT_CLEAR_DELALLOC) |
1436 | clear_bits |= EXTENT_DELALLOC; | 1435 | clear_bits |= EXTENT_DELALLOC; |
1437 | 1436 | ||
1438 | if (op & EXTENT_CLEAR_ACCOUNTING) | ||
1439 | clear_bits |= EXTENT_DO_ACCOUNTING; | ||
1440 | |||
1441 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | 1437 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
1442 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 1438 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
1443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | 1439 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | |
@@ -1916,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1916 | 1912 | ||
1917 | if (tree->ops && tree->ops->submit_bio_hook) | 1913 | if (tree->ops && tree->ops->submit_bio_hook) |
1918 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1914 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1919 | mirror_num, bio_flags); | 1915 | mirror_num, bio_flags, start); |
1920 | else | 1916 | else |
1921 | submit_bio(rw, bio); | 1917 | submit_bio(rw, bio); |
1922 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1918 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
@@ -2020,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2020 | sector_t sector; | 2016 | sector_t sector; |
2021 | struct extent_map *em; | 2017 | struct extent_map *em; |
2022 | struct block_device *bdev; | 2018 | struct block_device *bdev; |
2019 | struct btrfs_ordered_extent *ordered; | ||
2023 | int ret; | 2020 | int ret; |
2024 | int nr = 0; | 2021 | int nr = 0; |
2025 | size_t page_offset = 0; | 2022 | size_t page_offset = 0; |
@@ -2031,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2031 | set_page_extent_mapped(page); | 2028 | set_page_extent_mapped(page); |
2032 | 2029 | ||
2033 | end = page_end; | 2030 | end = page_end; |
2034 | lock_extent(tree, start, end, GFP_NOFS); | 2031 | while (1) { |
2032 | lock_extent(tree, start, end, GFP_NOFS); | ||
2033 | ordered = btrfs_lookup_ordered_extent(inode, start); | ||
2034 | if (!ordered) | ||
2035 | break; | ||
2036 | unlock_extent(tree, start, end, GFP_NOFS); | ||
2037 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2038 | btrfs_put_ordered_extent(ordered); | ||
2039 | } | ||
2035 | 2040 | ||
2036 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | 2041 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { |
2037 | char *userpage; | 2042 | char *userpage; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bbab4813646f..5691c7b590da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -16,7 +16,9 @@ | |||
16 | #define EXTENT_BOUNDARY (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | ||
19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | ||
20 | 22 | ||
21 | /* flags for bio submission */ | 23 | /* flags for bio submission */ |
22 | #define EXTENT_BIO_COMPRESSED 1 | 24 | #define EXTENT_BIO_COMPRESSED 1 |
@@ -47,7 +49,7 @@ struct extent_state; | |||
47 | 49 | ||
48 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 50 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
49 | struct bio *bio, int mirror_num, | 51 | struct bio *bio, int mirror_num, |
50 | unsigned long bio_flags); | 52 | unsigned long bio_flags, u64 bio_offset); |
51 | struct extent_io_ops { | 53 | struct extent_io_ops { |
52 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 54 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
53 | u64 start, u64 end, int *page_started, | 55 | u64 start, u64 end, int *page_started, |
@@ -69,10 +71,10 @@ struct extent_io_ops { | |||
69 | struct extent_state *state); | 71 | struct extent_state *state); |
70 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 72 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
71 | struct extent_state *state, int uptodate); | 73 | struct extent_state *state, int uptodate); |
72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
73 | unsigned long old, unsigned long bits); | 75 | int *bits); |
74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 76 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
75 | unsigned long bits); | 77 | int *bits); |
76 | int (*merge_extent_hook)(struct inode *inode, | 78 | int (*merge_extent_hook)(struct inode *inode, |
77 | struct extent_state *new, | 79 | struct extent_state *new, |
78 | struct extent_state *other); | 80 | struct extent_state *other); |
@@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
176 | u64 *start, u64 search_end, | 178 | u64 *start, u64 search_end, |
177 | u64 max_bytes, unsigned long bits); | 179 | u64 max_bytes, unsigned long bits); |
178 | 180 | ||
181 | void free_extent_state(struct extent_state *state); | ||
179 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
180 | int bits, int filled, struct extent_state *cached_state); | 183 | int bits, int filled, struct extent_state *cached_state); |
181 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
185 | gfp_t mask); | 188 | gfp_t mask); |
186 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
187 | int bits, gfp_t mask); | 190 | int bits, gfp_t mask); |
191 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
192 | int bits, int exclusive_bits, u64 *failed_start, | ||
193 | struct extent_state **cached_state, gfp_t mask); | ||
188 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
189 | gfp_t mask); | 195 | gfp_t mask); |
190 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54a255065aa3..a562a250ae77 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
149 | } | 149 | } |
150 | 150 | ||
151 | 151 | ||
152 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 152 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
153 | struct bio *bio, u32 *dst) | 153 | struct inode *inode, struct bio *bio, |
154 | u64 logical_offset, u32 *dst, int dio) | ||
154 | { | 155 | { |
155 | u32 sum; | 156 | u32 sum; |
156 | struct bio_vec *bvec = bio->bi_io_vec; | 157 | struct bio_vec *bvec = bio->bi_io_vec; |
157 | int bio_index = 0; | 158 | int bio_index = 0; |
158 | u64 offset; | 159 | u64 offset = 0; |
159 | u64 item_start_offset = 0; | 160 | u64 item_start_offset = 0; |
160 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
161 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
@@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
174 | WARN_ON(bio->bi_vcnt <= 0); | 175 | WARN_ON(bio->bi_vcnt <= 0); |
175 | 176 | ||
176 | disk_bytenr = (u64)bio->bi_sector << 9; | 177 | disk_bytenr = (u64)bio->bi_sector << 9; |
178 | if (dio) | ||
179 | offset = logical_offset; | ||
177 | while (bio_index < bio->bi_vcnt) { | 180 | while (bio_index < bio->bi_vcnt) { |
178 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 181 | if (!dio) |
182 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
179 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); | 183 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); |
180 | if (ret == 0) | 184 | if (ret == 0) |
181 | goto found; | 185 | goto found; |
@@ -238,6 +242,7 @@ found: | |||
238 | else | 242 | else |
239 | set_state_private(io_tree, offset, sum); | 243 | set_state_private(io_tree, offset, sum); |
240 | disk_bytenr += bvec->bv_len; | 244 | disk_bytenr += bvec->bv_len; |
245 | offset += bvec->bv_len; | ||
241 | bio_index++; | 246 | bio_index++; |
242 | bvec++; | 247 | bvec++; |
243 | } | 248 | } |
@@ -245,6 +250,18 @@ found: | |||
245 | return 0; | 250 | return 0; |
246 | } | 251 | } |
247 | 252 | ||
253 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
254 | struct bio *bio, u32 *dst) | ||
255 | { | ||
256 | return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); | ||
257 | } | ||
258 | |||
259 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
260 | struct bio *bio, u64 offset, u32 *dst) | ||
261 | { | ||
262 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | ||
263 | } | ||
264 | |||
248 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
249 | struct list_head *list) | 266 | struct list_head *list) |
250 | { | 267 | { |
@@ -657,6 +674,9 @@ again: | |||
657 | goto found; | 674 | goto found; |
658 | } | 675 | } |
659 | ret = PTR_ERR(item); | 676 | ret = PTR_ERR(item); |
677 | if (ret != -EFBIG && ret != -ENOENT) | ||
678 | goto fail_unlock; | ||
679 | |||
660 | if (ret == -EFBIG) { | 680 | if (ret == -EFBIG) { |
661 | u32 item_size; | 681 | u32 item_size; |
662 | /* we found one, but it isn't big enough yet */ | 682 | /* we found one, but it isn't big enough yet */ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 29ff749ff4ca..79437c5eeb1e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -46,32 +46,42 @@ | |||
46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
47 | int write_bytes, | 47 | int write_bytes, |
48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
49 | const char __user *buf) | 49 | struct iov_iter *i) |
50 | { | 50 | { |
51 | long page_fault = 0; | 51 | size_t copied; |
52 | int i; | 52 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | 54 | ||
55 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 55 | while (write_bytes > 0) { |
56 | size_t count = min_t(size_t, | 56 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 57 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[i]; | 58 | struct page *page = prepared_pages[pg]; |
59 | fault_in_pages_readable(buf, count); | 59 | again: |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | ||
61 | return -EFAULT; | ||
60 | 62 | ||
61 | /* Copy data from userspace to the current page */ | 63 | /* Copy data from userspace to the current page */ |
62 | kmap(page); | 64 | copied = iov_iter_copy_from_user(page, i, offset, count); |
63 | page_fault = __copy_from_user(page_address(page) + offset, | 65 | |
64 | buf, count); | ||
65 | /* Flush processor's dcache for this page */ | 66 | /* Flush processor's dcache for this page */ |
66 | flush_dcache_page(page); | 67 | flush_dcache_page(page); |
67 | kunmap(page); | 68 | iov_iter_advance(i, copied); |
68 | buf += count; | 69 | write_bytes -= copied; |
69 | write_bytes -= count; | ||
70 | 70 | ||
71 | if (page_fault) | 71 | if (unlikely(copied == 0)) { |
72 | break; | 72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, |
73 | iov_iter_single_seg_count(i)); | ||
74 | goto again; | ||
75 | } | ||
76 | |||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
78 | offset += copied; | ||
79 | } else { | ||
80 | pg++; | ||
81 | offset = 0; | ||
82 | } | ||
73 | } | 83 | } |
74 | return page_fault ? -EFAULT : 0; | 84 | return 0; |
75 | } | 85 | } |
76 | 86 | ||
77 | /* | 87 | /* |
@@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
126 | end_of_last_block = start_pos + num_bytes - 1; | 136 | end_of_last_block = start_pos + num_bytes - 1; |
127 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 137 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
128 | NULL); | 138 | NULL); |
129 | if (err) | 139 | BUG_ON(err); |
130 | return err; | ||
131 | 140 | ||
132 | for (i = 0; i < num_pages; i++) { | 141 | for (i = 0; i < num_pages; i++) { |
133 | struct page *p = pages[i]; | 142 | struct page *p = pages[i]; |
@@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
142 | * at this time. | 151 | * at this time. |
143 | */ | 152 | */ |
144 | } | 153 | } |
145 | return err; | 154 | return 0; |
146 | } | 155 | } |
147 | 156 | ||
148 | /* | 157 | /* |
@@ -823,45 +832,46 @@ again: | |||
823 | return 0; | 832 | return 0; |
824 | } | 833 | } |
825 | 834 | ||
826 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
827 | size_t count, loff_t *ppos) | 836 | const struct iovec *iov, |
837 | unsigned long nr_segs, loff_t pos) | ||
828 | { | 838 | { |
829 | loff_t pos; | 839 | struct file *file = iocb->ki_filp; |
840 | struct inode *inode = fdentry(file)->d_inode; | ||
841 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
842 | struct page *pinned[2]; | ||
843 | struct page **pages = NULL; | ||
844 | struct iov_iter i; | ||
845 | loff_t *ppos = &iocb->ki_pos; | ||
830 | loff_t start_pos; | 846 | loff_t start_pos; |
831 | ssize_t num_written = 0; | 847 | ssize_t num_written = 0; |
832 | ssize_t err = 0; | 848 | ssize_t err = 0; |
849 | size_t count; | ||
850 | size_t ocount; | ||
833 | int ret = 0; | 851 | int ret = 0; |
834 | struct inode *inode = fdentry(file)->d_inode; | ||
835 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
836 | struct page **pages = NULL; | ||
837 | int nrptrs; | 852 | int nrptrs; |
838 | struct page *pinned[2]; | ||
839 | unsigned long first_index; | 853 | unsigned long first_index; |
840 | unsigned long last_index; | 854 | unsigned long last_index; |
841 | int will_write; | 855 | int will_write; |
856 | int buffered = 0; | ||
842 | 857 | ||
843 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
844 | (file->f_flags & O_DIRECT)); | 859 | (file->f_flags & O_DIRECT)); |
845 | 860 | ||
846 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
847 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
848 | pinned[0] = NULL; | 861 | pinned[0] = NULL; |
849 | pinned[1] = NULL; | 862 | pinned[1] = NULL; |
850 | 863 | ||
851 | pos = *ppos; | ||
852 | start_pos = pos; | 864 | start_pos = pos; |
853 | 865 | ||
854 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
855 | 867 | ||
856 | /* do the reserve before the mutex lock in case we have to do some | ||
857 | * flushing. We wouldn't deadlock, but this is more polite. | ||
858 | */ | ||
859 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
860 | if (err) | ||
861 | goto out_nolock; | ||
862 | |||
863 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
864 | 869 | ||
870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
871 | if (err) | ||
872 | goto out; | ||
873 | count = ocount; | ||
874 | |||
865 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
866 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
867 | if (err) | 877 | if (err) |
@@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
875 | goto out; | 885 | goto out; |
876 | 886 | ||
877 | file_update_time(file); | 887 | file_update_time(file); |
888 | BTRFS_I(inode)->sequence++; | ||
889 | |||
890 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
891 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
892 | pos, ppos, count, | ||
893 | ocount); | ||
894 | /* | ||
895 | * the generic O_DIRECT will update in-memory i_size after the | ||
896 | * DIOs are done. But our endio handlers that update the on | ||
897 | * disk i_size never update past the in memory i_size. So we | ||
898 | * need one more update here to catch any additions to the | ||
899 | * file | ||
900 | */ | ||
901 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
902 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
903 | mark_inode_dirty(inode); | ||
904 | } | ||
878 | 905 | ||
906 | if (num_written < 0) { | ||
907 | ret = num_written; | ||
908 | num_written = 0; | ||
909 | goto out; | ||
910 | } else if (num_written == count) { | ||
911 | /* pick up pos changes done by the generic code */ | ||
912 | pos = *ppos; | ||
913 | goto out; | ||
914 | } | ||
915 | /* | ||
916 | * We are going to do buffered for the rest of the range, so we | ||
917 | * need to make sure to invalidate the buffered pages when we're | ||
918 | * done. | ||
919 | */ | ||
920 | buffered = 1; | ||
921 | pos += num_written; | ||
922 | } | ||
923 | |||
924 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
925 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
926 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
927 | (sizeof(struct page *))); | ||
879 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 928 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
880 | 929 | ||
881 | /* generic_write_checks can change our pos */ | 930 | /* generic_write_checks can change our pos */ |
882 | start_pos = pos; | 931 | start_pos = pos; |
883 | 932 | ||
884 | BTRFS_I(inode)->sequence++; | ||
885 | first_index = pos >> PAGE_CACHE_SHIFT; | 933 | first_index = pos >> PAGE_CACHE_SHIFT; |
886 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 934 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
887 | 935 | ||
888 | /* | 936 | /* |
889 | * there are lots of better ways to do this, but this code | 937 | * there are lots of better ways to do this, but this code |
@@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
900 | unlock_page(pinned[0]); | 948 | unlock_page(pinned[0]); |
901 | } | 949 | } |
902 | } | 950 | } |
903 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 951 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
904 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 952 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
905 | if (!PageUptodate(pinned[1])) { | 953 | if (!PageUptodate(pinned[1])) { |
906 | ret = btrfs_readpage(NULL, pinned[1]); | 954 | ret = btrfs_readpage(NULL, pinned[1]); |
@@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
911 | } | 959 | } |
912 | } | 960 | } |
913 | 961 | ||
914 | while (count > 0) { | 962 | while (iov_iter_count(&i) > 0) { |
915 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 963 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
916 | size_t write_bytes = min(count, nrptrs * | 964 | size_t write_bytes = min(iov_iter_count(&i), |
917 | (size_t)PAGE_CACHE_SIZE - | 965 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
918 | offset); | 966 | offset); |
919 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 967 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
920 | PAGE_CACHE_SHIFT; | 968 | PAGE_CACHE_SHIFT; |
@@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
922 | WARN_ON(num_pages > nrptrs); | 970 | WARN_ON(num_pages > nrptrs); |
923 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 971 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
924 | 972 | ||
925 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); |
926 | if (ret) | 974 | if (ret) |
927 | goto out; | 975 | goto out; |
928 | 976 | ||
@@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
930 | pos, first_index, last_index, | 978 | pos, first_index, last_index, |
931 | write_bytes); | 979 | write_bytes); |
932 | if (ret) { | 980 | if (ret) { |
933 | btrfs_free_reserved_data_space(root, inode, | 981 | btrfs_delalloc_release_space(inode, write_bytes); |
934 | write_bytes); | ||
935 | goto out; | 982 | goto out; |
936 | } | 983 | } |
937 | 984 | ||
938 | ret = btrfs_copy_from_user(pos, num_pages, | 985 | ret = btrfs_copy_from_user(pos, num_pages, |
939 | write_bytes, pages, buf); | 986 | write_bytes, pages, &i); |
940 | if (ret) { | 987 | if (ret == 0) { |
941 | btrfs_free_reserved_data_space(root, inode, | 988 | dirty_and_release_pages(NULL, root, file, pages, |
942 | write_bytes); | 989 | num_pages, pos, write_bytes); |
943 | btrfs_drop_pages(pages, num_pages); | ||
944 | goto out; | ||
945 | } | 990 | } |
946 | 991 | ||
947 | ret = dirty_and_release_pages(NULL, root, file, pages, | ||
948 | num_pages, pos, write_bytes); | ||
949 | btrfs_drop_pages(pages, num_pages); | 992 | btrfs_drop_pages(pages, num_pages); |
950 | if (ret) { | 993 | if (ret) { |
951 | btrfs_free_reserved_data_space(root, inode, | 994 | btrfs_delalloc_release_space(inode, write_bytes); |
952 | write_bytes); | ||
953 | goto out; | 995 | goto out; |
954 | } | 996 | } |
955 | 997 | ||
@@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
965 | btrfs_throttle(root); | 1007 | btrfs_throttle(root); |
966 | } | 1008 | } |
967 | 1009 | ||
968 | buf += write_bytes; | ||
969 | count -= write_bytes; | ||
970 | pos += write_bytes; | 1010 | pos += write_bytes; |
971 | num_written += write_bytes; | 1011 | num_written += write_bytes; |
972 | 1012 | ||
@@ -976,9 +1016,7 @@ out: | |||
976 | mutex_unlock(&inode->i_mutex); | 1016 | mutex_unlock(&inode->i_mutex); |
977 | if (ret) | 1017 | if (ret) |
978 | err = ret; | 1018 | err = ret; |
979 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
980 | 1019 | ||
981 | out_nolock: | ||
982 | kfree(pages); | 1020 | kfree(pages); |
983 | if (pinned[0]) | 1021 | if (pinned[0]) |
984 | page_cache_release(pinned[0]); | 1022 | page_cache_release(pinned[0]); |
@@ -1008,7 +1046,7 @@ out_nolock: | |||
1008 | num_written = err; | 1046 | num_written = err; |
1009 | 1047 | ||
1010 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1011 | trans = btrfs_start_transaction(root, 1); | 1049 | trans = btrfs_start_transaction(root, 0); |
1012 | ret = btrfs_log_dentry_safe(trans, root, | 1050 | ret = btrfs_log_dentry_safe(trans, root, |
1013 | file->f_dentry); | 1051 | file->f_dentry); |
1014 | if (ret == 0) { | 1052 | if (ret == 0) { |
@@ -1023,7 +1061,7 @@ out_nolock: | |||
1023 | btrfs_end_transaction(trans, root); | 1061 | btrfs_end_transaction(trans, root); |
1024 | } | 1062 | } |
1025 | } | 1063 | } |
1026 | if (file->f_flags & O_DIRECT) { | 1064 | if (file->f_flags & O_DIRECT && buffered) { |
1027 | invalidate_mapping_pages(inode->i_mapping, | 1065 | invalidate_mapping_pages(inode->i_mapping, |
1028 | start_pos >> PAGE_CACHE_SHIFT, | 1066 | start_pos >> PAGE_CACHE_SHIFT, |
1029 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
@@ -1104,9 +1142,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1104 | if (file && file->private_data) | 1142 | if (file && file->private_data) |
1105 | btrfs_ioctl_trans_end(file); | 1143 | btrfs_ioctl_trans_end(file); |
1106 | 1144 | ||
1107 | trans = btrfs_start_transaction(root, 1); | 1145 | trans = btrfs_start_transaction(root, 0); |
1108 | if (!trans) { | 1146 | if (IS_ERR(trans)) { |
1109 | ret = -ENOMEM; | 1147 | ret = PTR_ERR(trans); |
1110 | goto out; | 1148 | goto out; |
1111 | } | 1149 | } |
1112 | 1150 | ||
@@ -1161,7 +1199,7 @@ const struct file_operations btrfs_file_operations = { | |||
1161 | .read = do_sync_read, | 1199 | .read = do_sync_read, |
1162 | .aio_read = generic_file_aio_read, | 1200 | .aio_read = generic_file_aio_read, |
1163 | .splice_read = generic_file_splice_read, | 1201 | .splice_read = generic_file_splice_read, |
1164 | .write = btrfs_file_write, | 1202 | .aio_write = btrfs_file_aio_write, |
1165 | .mmap = btrfs_file_mmap, | 1203 | .mmap = btrfs_file_mmap, |
1166 | .open = generic_file_open, | 1204 | .open = generic_file_open, |
1167 | .release = btrfs_release_file, | 1205 | .release = btrfs_release_file, |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72ce3c173d6a..64f1150bb48d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
49 | return 0; | 49 | return 0; |
50 | } | 50 | } |
51 | 51 | ||
52 | struct btrfs_inode_ref * | ||
53 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
54 | struct btrfs_root *root, | ||
55 | struct btrfs_path *path, | ||
56 | const char *name, int name_len, | ||
57 | u64 inode_objectid, u64 ref_objectid, int mod) | ||
58 | { | ||
59 | struct btrfs_key key; | ||
60 | struct btrfs_inode_ref *ref; | ||
61 | int ins_len = mod < 0 ? -1 : 0; | ||
62 | int cow = mod != 0; | ||
63 | int ret; | ||
64 | |||
65 | key.objectid = inode_objectid; | ||
66 | key.type = BTRFS_INODE_REF_KEY; | ||
67 | key.offset = ref_objectid; | ||
68 | |||
69 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
70 | if (ret < 0) | ||
71 | return ERR_PTR(ret); | ||
72 | if (ret > 0) | ||
73 | return NULL; | ||
74 | if (!find_name_in_backref(path, name, name_len, &ref)) | ||
75 | return NULL; | ||
76 | return ref; | ||
77 | } | ||
78 | |||
52 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 79 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, |
53 | struct btrfs_root *root, | 80 | struct btrfs_root *root, |
54 | const char *name, int name_len, | 81 | const char *name, int name_len, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d601629b85d1..fa6ccc1bfe2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
252 | inline_len, compressed_size, | 252 | inline_len, compressed_size, |
253 | compressed_pages); | 253 | compressed_pages); |
254 | BUG_ON(ret); | 254 | BUG_ON(ret); |
255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | ||
255 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
256 | return 0; | 257 | return 0; |
257 | } | 258 | } |
@@ -414,6 +415,7 @@ again: | |||
414 | trans = btrfs_join_transaction(root, 1); | 415 | trans = btrfs_join_transaction(root, 1); |
415 | BUG_ON(!trans); | 416 | BUG_ON(!trans); |
416 | btrfs_set_trans_block_group(trans, inode); | 417 | btrfs_set_trans_block_group(trans, inode); |
418 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
417 | 419 | ||
418 | /* lets try to make an inline extent */ | 420 | /* lets try to make an inline extent */ |
419 | if (ret || total_in < (actual_end - start)) { | 421 | if (ret || total_in < (actual_end - start)) { |
@@ -439,7 +441,6 @@ again: | |||
439 | start, end, NULL, | 441 | start, end, NULL, |
440 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 442 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
441 | EXTENT_CLEAR_DELALLOC | | 443 | EXTENT_CLEAR_DELALLOC | |
442 | EXTENT_CLEAR_ACCOUNTING | | ||
443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | 444 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); |
444 | 445 | ||
445 | btrfs_end_transaction(trans, root); | 446 | btrfs_end_transaction(trans, root); |
@@ -697,6 +698,38 @@ retry: | |||
697 | return 0; | 698 | return 0; |
698 | } | 699 | } |
699 | 700 | ||
701 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | ||
702 | u64 num_bytes) | ||
703 | { | ||
704 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
705 | struct extent_map *em; | ||
706 | u64 alloc_hint = 0; | ||
707 | |||
708 | read_lock(&em_tree->lock); | ||
709 | em = search_extent_mapping(em_tree, start, num_bytes); | ||
710 | if (em) { | ||
711 | /* | ||
712 | * if block start isn't an actual block number then find the | ||
713 | * first block in this inode and use that as a hint. If that | ||
714 | * block is also bogus then just don't worry about it. | ||
715 | */ | ||
716 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
717 | free_extent_map(em); | ||
718 | em = search_extent_mapping(em_tree, 0, 0); | ||
719 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
720 | alloc_hint = em->block_start; | ||
721 | if (em) | ||
722 | free_extent_map(em); | ||
723 | } else { | ||
724 | alloc_hint = em->block_start; | ||
725 | free_extent_map(em); | ||
726 | } | ||
727 | } | ||
728 | read_unlock(&em_tree->lock); | ||
729 | |||
730 | return alloc_hint; | ||
731 | } | ||
732 | |||
700 | /* | 733 | /* |
701 | * when extent_io.c finds a delayed allocation range in the file, | 734 | * when extent_io.c finds a delayed allocation range in the file, |
702 | * the call backs end up in this code. The basic idea is to | 735 | * the call backs end up in this code. The basic idea is to |
@@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
734 | trans = btrfs_join_transaction(root, 1); | 767 | trans = btrfs_join_transaction(root, 1); |
735 | BUG_ON(!trans); | 768 | BUG_ON(!trans); |
736 | btrfs_set_trans_block_group(trans, inode); | 769 | btrfs_set_trans_block_group(trans, inode); |
770 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
737 | 771 | ||
738 | actual_end = min_t(u64, isize, end + 1); | 772 | actual_end = min_t(u64, isize, end + 1); |
739 | 773 | ||
@@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
753 | EXTENT_CLEAR_UNLOCK_PAGE | | 787 | EXTENT_CLEAR_UNLOCK_PAGE | |
754 | EXTENT_CLEAR_UNLOCK | | 788 | EXTENT_CLEAR_UNLOCK | |
755 | EXTENT_CLEAR_DELALLOC | | 789 | EXTENT_CLEAR_DELALLOC | |
756 | EXTENT_CLEAR_ACCOUNTING | | ||
757 | EXTENT_CLEAR_DIRTY | | 790 | EXTENT_CLEAR_DIRTY | |
758 | EXTENT_SET_WRITEBACK | | 791 | EXTENT_SET_WRITEBACK | |
759 | EXTENT_END_WRITEBACK); | 792 | EXTENT_END_WRITEBACK); |
@@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
769 | BUG_ON(disk_num_bytes > | 802 | BUG_ON(disk_num_bytes > |
770 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 803 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
771 | 804 | ||
772 | 805 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | |
773 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
774 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
775 | start, num_bytes); | ||
776 | if (em) { | ||
777 | /* | ||
778 | * if block start isn't an actual block number then find the | ||
779 | * first block in this inode and use that as a hint. If that | ||
780 | * block is also bogus then just don't worry about it. | ||
781 | */ | ||
782 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
783 | free_extent_map(em); | ||
784 | em = search_extent_mapping(em_tree, 0, 0); | ||
785 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
786 | alloc_hint = em->block_start; | ||
787 | if (em) | ||
788 | free_extent_map(em); | ||
789 | } else { | ||
790 | alloc_hint = em->block_start; | ||
791 | free_extent_map(em); | ||
792 | } | ||
793 | } | ||
794 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
795 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 806 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
796 | 807 | ||
797 | while (disk_num_bytes > 0) { | 808 | while (disk_num_bytes > 0) { |
@@ -1174,6 +1185,13 @@ out_check: | |||
1174 | num_bytes, num_bytes, type); | 1185 | num_bytes, num_bytes, type); |
1175 | BUG_ON(ret); | 1186 | BUG_ON(ret); |
1176 | 1187 | ||
1188 | if (root->root_key.objectid == | ||
1189 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
1190 | ret = btrfs_reloc_clone_csums(inode, cur_offset, | ||
1191 | num_bytes); | ||
1192 | BUG_ON(ret); | ||
1193 | } | ||
1194 | |||
1177 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1195 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
1178 | cur_offset, cur_offset + num_bytes - 1, | 1196 | cur_offset, cur_offset + num_bytes - 1, |
1179 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | | 1197 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
@@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1226 | } | 1244 | } |
1227 | 1245 | ||
1228 | static int btrfs_split_extent_hook(struct inode *inode, | 1246 | static int btrfs_split_extent_hook(struct inode *inode, |
1229 | struct extent_state *orig, u64 split) | 1247 | struct extent_state *orig, u64 split) |
1230 | { | 1248 | { |
1249 | /* not delalloc, ignore it */ | ||
1231 | if (!(orig->state & EXTENT_DELALLOC)) | 1250 | if (!(orig->state & EXTENT_DELALLOC)) |
1232 | return 0; | 1251 | return 0; |
1233 | 1252 | ||
1234 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1253 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
1235 | BTRFS_I(inode)->outstanding_extents++; | ||
1236 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1237 | |||
1238 | return 0; | 1254 | return 0; |
1239 | } | 1255 | } |
1240 | 1256 | ||
@@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1252 | if (!(other->state & EXTENT_DELALLOC)) | 1268 | if (!(other->state & EXTENT_DELALLOC)) |
1253 | return 0; | 1269 | return 0; |
1254 | 1270 | ||
1255 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1271 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1256 | BTRFS_I(inode)->outstanding_extents--; | ||
1257 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1258 | |||
1259 | return 0; | 1272 | return 0; |
1260 | } | 1273 | } |
1261 | 1274 | ||
@@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1264 | * bytes in this file, and to maintain the list of inodes that | 1277 | * bytes in this file, and to maintain the list of inodes that |
1265 | * have pending delalloc work to be done. | 1278 | * have pending delalloc work to be done. |
1266 | */ | 1279 | */ |
1267 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1280 | static int btrfs_set_bit_hook(struct inode *inode, |
1268 | unsigned long old, unsigned long bits) | 1281 | struct extent_state *state, int *bits) |
1269 | { | 1282 | { |
1270 | 1283 | ||
1271 | /* | 1284 | /* |
@@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1273 | * but in this case, we are only testeing for the DELALLOC | 1286 | * but in this case, we are only testeing for the DELALLOC |
1274 | * bit, which is only set or cleared with irqs on | 1287 | * bit, which is only set or cleared with irqs on |
1275 | */ | 1288 | */ |
1276 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1289 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1277 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1290 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1291 | u64 len = state->end + 1 - state->start; | ||
1278 | 1292 | ||
1279 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1293 | if (*bits & EXTENT_FIRST_DELALLOC) |
1280 | BTRFS_I(inode)->outstanding_extents++; | 1294 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1281 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1295 | else |
1282 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1296 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
1283 | 1297 | ||
1284 | spin_lock(&root->fs_info->delalloc_lock); | 1298 | spin_lock(&root->fs_info->delalloc_lock); |
1285 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1299 | BTRFS_I(inode)->delalloc_bytes += len; |
1286 | root->fs_info->delalloc_bytes += end - start + 1; | 1300 | root->fs_info->delalloc_bytes += len; |
1287 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1301 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1288 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1302 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, |
1289 | &root->fs_info->delalloc_inodes); | 1303 | &root->fs_info->delalloc_inodes); |
@@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1297 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1311 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1298 | */ | 1312 | */ |
1299 | static int btrfs_clear_bit_hook(struct inode *inode, | 1313 | static int btrfs_clear_bit_hook(struct inode *inode, |
1300 | struct extent_state *state, unsigned long bits) | 1314 | struct extent_state *state, int *bits) |
1301 | { | 1315 | { |
1302 | /* | 1316 | /* |
1303 | * set_bit and clear bit hooks normally require _irqsave/restore | 1317 | * set_bit and clear bit hooks normally require _irqsave/restore |
1304 | * but in this case, we are only testeing for the DELALLOC | 1318 | * but in this case, we are only testeing for the DELALLOC |
1305 | * bit, which is only set or cleared with irqs on | 1319 | * bit, which is only set or cleared with irqs on |
1306 | */ | 1320 | */ |
1307 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1321 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1308 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1322 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1323 | u64 len = state->end + 1 - state->start; | ||
1309 | 1324 | ||
1310 | if (bits & EXTENT_DO_ACCOUNTING) { | 1325 | if (*bits & EXTENT_FIRST_DELALLOC) |
1311 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1326 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1312 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | 1327 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) |
1313 | BTRFS_I(inode)->outstanding_extents--; | 1328 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1314 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1329 | |
1315 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | 1330 | if (*bits & EXTENT_DO_ACCOUNTING) |
1316 | } | 1331 | btrfs_delalloc_release_metadata(inode, len); |
1332 | |||
1333 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
1334 | btrfs_free_reserved_data_space(inode, len); | ||
1317 | 1335 | ||
1318 | spin_lock(&root->fs_info->delalloc_lock); | 1336 | spin_lock(&root->fs_info->delalloc_lock); |
1319 | if (state->end - state->start + 1 > | 1337 | root->fs_info->delalloc_bytes -= len; |
1320 | root->fs_info->delalloc_bytes) { | 1338 | BTRFS_I(inode)->delalloc_bytes -= len; |
1321 | printk(KERN_INFO "btrfs warning: delalloc account " | 1339 | |
1322 | "%llu %llu\n", | ||
1323 | (unsigned long long) | ||
1324 | state->end - state->start + 1, | ||
1325 | (unsigned long long) | ||
1326 | root->fs_info->delalloc_bytes); | ||
1327 | btrfs_delalloc_free_space(root, inode, (u64)-1); | ||
1328 | root->fs_info->delalloc_bytes = 0; | ||
1329 | BTRFS_I(inode)->delalloc_bytes = 0; | ||
1330 | } else { | ||
1331 | btrfs_delalloc_free_space(root, inode, | ||
1332 | state->end - | ||
1333 | state->start + 1); | ||
1334 | root->fs_info->delalloc_bytes -= state->end - | ||
1335 | state->start + 1; | ||
1336 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
1337 | state->start + 1; | ||
1338 | } | ||
1339 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
1340 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1341 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1342 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); |
@@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
1384 | */ | 1385 | */ |
1385 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, | 1386 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, |
1386 | struct bio *bio, int mirror_num, | 1387 | struct bio *bio, int mirror_num, |
1387 | unsigned long bio_flags) | 1388 | unsigned long bio_flags, |
1389 | u64 bio_offset) | ||
1388 | { | 1390 | { |
1389 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1391 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1390 | int ret = 0; | 1392 | int ret = 0; |
@@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, | |||
1403 | * are inserted into the btree | 1405 | * are inserted into the btree |
1404 | */ | 1406 | */ |
1405 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 1407 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
1406 | int mirror_num, unsigned long bio_flags) | 1408 | int mirror_num, unsigned long bio_flags, |
1409 | u64 bio_offset) | ||
1407 | { | 1410 | { |
1408 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1411 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1409 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1412 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); |
@@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
1414 | * on write, or reading the csums from the tree before a read | 1417 | * on write, or reading the csums from the tree before a read |
1415 | */ | 1418 | */ |
1416 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 1419 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
1417 | int mirror_num, unsigned long bio_flags) | 1420 | int mirror_num, unsigned long bio_flags, |
1421 | u64 bio_offset) | ||
1418 | { | 1422 | { |
1419 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1423 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1420 | int ret = 0; | 1424 | int ret = 0; |
@@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1439 | /* we're doing a write, do the async checksumming */ | 1443 | /* we're doing a write, do the async checksumming */ |
1440 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1444 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
1441 | inode, rw, bio, mirror_num, | 1445 | inode, rw, bio, mirror_num, |
1442 | bio_flags, __btrfs_submit_bio_start, | 1446 | bio_flags, bio_offset, |
1447 | __btrfs_submit_bio_start, | ||
1443 | __btrfs_submit_bio_done); | 1448 | __btrfs_submit_bio_done); |
1444 | } | 1449 | } |
1445 | 1450 | ||
@@ -1520,6 +1525,7 @@ again: | |||
1520 | goto again; | 1525 | goto again; |
1521 | } | 1526 | } |
1522 | 1527 | ||
1528 | BUG(); | ||
1523 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); | 1529 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); |
1524 | ClearPageChecked(page); | 1530 | ClearPageChecked(page); |
1525 | out: | 1531 | out: |
@@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1650 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1656 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) |
1651 | { | 1657 | { |
1652 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1658 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1653 | struct btrfs_trans_handle *trans; | 1659 | struct btrfs_trans_handle *trans = NULL; |
1654 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1660 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1655 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1661 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1656 | struct extent_state *cached_state = NULL; | 1662 | struct extent_state *cached_state = NULL; |
@@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1668 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1674 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1669 | if (!ret) { | 1675 | if (!ret) { |
1670 | trans = btrfs_join_transaction(root, 1); | 1676 | trans = btrfs_join_transaction(root, 1); |
1677 | btrfs_set_trans_block_group(trans, inode); | ||
1678 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1671 | ret = btrfs_update_inode(trans, root, inode); | 1679 | ret = btrfs_update_inode(trans, root, inode); |
1672 | BUG_ON(ret); | 1680 | BUG_ON(ret); |
1673 | btrfs_end_transaction(trans, root); | ||
1674 | } | 1681 | } |
1675 | goto out; | 1682 | goto out; |
1676 | } | 1683 | } |
@@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1680 | 0, &cached_state, GFP_NOFS); | 1687 | 0, &cached_state, GFP_NOFS); |
1681 | 1688 | ||
1682 | trans = btrfs_join_transaction(root, 1); | 1689 | trans = btrfs_join_transaction(root, 1); |
1690 | btrfs_set_trans_block_group(trans, inode); | ||
1691 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1683 | 1692 | ||
1684 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1693 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1685 | compressed = 1; | 1694 | compressed = 1; |
@@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1711 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1720 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
1712 | &ordered_extent->list); | 1721 | &ordered_extent->list); |
1713 | 1722 | ||
1714 | /* this also removes the ordered extent from the tree */ | ||
1715 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1723 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1716 | ret = btrfs_update_inode(trans, root, inode); | 1724 | ret = btrfs_update_inode(trans, root, inode); |
1717 | BUG_ON(ret); | 1725 | BUG_ON(ret); |
1718 | btrfs_end_transaction(trans, root); | ||
1719 | out: | 1726 | out: |
1727 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | ||
1728 | if (trans) | ||
1729 | btrfs_end_transaction(trans, root); | ||
1720 | /* once for us */ | 1730 | /* once for us */ |
1721 | btrfs_put_ordered_extent(ordered_extent); | 1731 | btrfs_put_ordered_extent(ordered_extent); |
1722 | /* once for the tree */ | 1732 | /* once for the tree */ |
@@ -1838,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1838 | 1848 | ||
1839 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1849 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1840 | failrec->last_mirror, | 1850 | failrec->last_mirror, |
1841 | failrec->bio_flags); | 1851 | failrec->bio_flags, 0); |
1842 | return 0; | 1852 | return 0; |
1843 | } | 1853 | } |
1844 | 1854 | ||
@@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
1993 | } | 2003 | } |
1994 | 2004 | ||
1995 | /* | 2005 | /* |
2006 | * calculate extra metadata reservation when snapshotting a subvolume | ||
2007 | * contains orphan files. | ||
2008 | */ | ||
2009 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2010 | struct btrfs_pending_snapshot *pending, | ||
2011 | u64 *bytes_to_reserve) | ||
2012 | { | ||
2013 | struct btrfs_root *root; | ||
2014 | struct btrfs_block_rsv *block_rsv; | ||
2015 | u64 num_bytes; | ||
2016 | int index; | ||
2017 | |||
2018 | root = pending->root; | ||
2019 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2020 | return; | ||
2021 | |||
2022 | block_rsv = root->orphan_block_rsv; | ||
2023 | |||
2024 | /* orphan block reservation for the snapshot */ | ||
2025 | num_bytes = block_rsv->size; | ||
2026 | |||
2027 | /* | ||
2028 | * after the snapshot is created, COWing tree blocks may use more | ||
2029 | * space than it frees. So we should make sure there is enough | ||
2030 | * reserved space. | ||
2031 | */ | ||
2032 | index = trans->transid & 0x1; | ||
2033 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2034 | num_bytes += block_rsv->size - | ||
2035 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2036 | } | ||
2037 | |||
2038 | *bytes_to_reserve += num_bytes; | ||
2039 | } | ||
2040 | |||
2041 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2042 | struct btrfs_pending_snapshot *pending) | ||
2043 | { | ||
2044 | struct btrfs_root *root = pending->root; | ||
2045 | struct btrfs_root *snap = pending->snap; | ||
2046 | struct btrfs_block_rsv *block_rsv; | ||
2047 | u64 num_bytes; | ||
2048 | int index; | ||
2049 | int ret; | ||
2050 | |||
2051 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2052 | return; | ||
2053 | |||
2054 | /* refill source subvolume's orphan block reservation */ | ||
2055 | block_rsv = root->orphan_block_rsv; | ||
2056 | index = trans->transid & 0x1; | ||
2057 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2058 | num_bytes = block_rsv->size - | ||
2059 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2060 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2061 | root->orphan_block_rsv, | ||
2062 | num_bytes); | ||
2063 | BUG_ON(ret); | ||
2064 | } | ||
2065 | |||
2066 | /* setup orphan block reservation for the snapshot */ | ||
2067 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
2068 | BUG_ON(!block_rsv); | ||
2069 | |||
2070 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2071 | snap->orphan_block_rsv = block_rsv; | ||
2072 | |||
2073 | num_bytes = root->orphan_block_rsv->size; | ||
2074 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2075 | block_rsv, num_bytes); | ||
2076 | BUG_ON(ret); | ||
2077 | |||
2078 | #if 0 | ||
2079 | /* insert orphan item for the snapshot */ | ||
2080 | WARN_ON(!root->orphan_item_inserted); | ||
2081 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2082 | snap->root_key.objectid); | ||
2083 | BUG_ON(ret); | ||
2084 | snap->orphan_item_inserted = 1; | ||
2085 | #endif | ||
2086 | } | ||
2087 | |||
2088 | enum btrfs_orphan_cleanup_state { | ||
2089 | ORPHAN_CLEANUP_STARTED = 1, | ||
2090 | ORPHAN_CLEANUP_DONE = 2, | ||
2091 | }; | ||
2092 | |||
2093 | /* | ||
2094 | * This is called in transaction commmit time. If there are no orphan | ||
2095 | * files in the subvolume, it removes orphan item and frees block_rsv | ||
2096 | * structure. | ||
2097 | */ | ||
2098 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2099 | struct btrfs_root *root) | ||
2100 | { | ||
2101 | int ret; | ||
2102 | |||
2103 | if (!list_empty(&root->orphan_list) || | ||
2104 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | ||
2105 | return; | ||
2106 | |||
2107 | if (root->orphan_item_inserted && | ||
2108 | btrfs_root_refs(&root->root_item) > 0) { | ||
2109 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | ||
2110 | root->root_key.objectid); | ||
2111 | BUG_ON(ret); | ||
2112 | root->orphan_item_inserted = 0; | ||
2113 | } | ||
2114 | |||
2115 | if (root->orphan_block_rsv) { | ||
2116 | WARN_ON(root->orphan_block_rsv->size > 0); | ||
2117 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | ||
2118 | root->orphan_block_rsv = NULL; | ||
2119 | } | ||
2120 | } | ||
2121 | |||
2122 | /* | ||
1996 | * This creates an orphan entry for the given inode in case something goes | 2123 | * This creates an orphan entry for the given inode in case something goes |
1997 | * wrong in the middle of an unlink/truncate. | 2124 | * wrong in the middle of an unlink/truncate. |
2125 | * | ||
2126 | * NOTE: caller of this function should reserve 5 units of metadata for | ||
2127 | * this function. | ||
1998 | */ | 2128 | */ |
1999 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | 2129 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) |
2000 | { | 2130 | { |
2001 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2131 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2002 | int ret = 0; | 2132 | struct btrfs_block_rsv *block_rsv = NULL; |
2133 | int reserve = 0; | ||
2134 | int insert = 0; | ||
2135 | int ret; | ||
2003 | 2136 | ||
2004 | spin_lock(&root->list_lock); | 2137 | if (!root->orphan_block_rsv) { |
2138 | block_rsv = btrfs_alloc_block_rsv(root); | ||
2139 | BUG_ON(!block_rsv); | ||
2140 | } | ||
2005 | 2141 | ||
2006 | /* already on the orphan list, we're good */ | 2142 | spin_lock(&root->orphan_lock); |
2007 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2143 | if (!root->orphan_block_rsv) { |
2008 | spin_unlock(&root->list_lock); | 2144 | root->orphan_block_rsv = block_rsv; |
2009 | return 0; | 2145 | } else if (block_rsv) { |
2146 | btrfs_free_block_rsv(root, block_rsv); | ||
2147 | block_rsv = NULL; | ||
2148 | } | ||
2149 | |||
2150 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
2151 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
2152 | #if 0 | ||
2153 | /* | ||
2154 | * For proper ENOSPC handling, we should do orphan | ||
2155 | * cleanup when mounting. But this introduces backward | ||
2156 | * compatibility issue. | ||
2157 | */ | ||
2158 | if (!xchg(&root->orphan_item_inserted, 1)) | ||
2159 | insert = 2; | ||
2160 | else | ||
2161 | insert = 1; | ||
2162 | #endif | ||
2163 | insert = 1; | ||
2164 | } else { | ||
2165 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
2010 | } | 2166 | } |
2011 | 2167 | ||
2012 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2168 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
2169 | BTRFS_I(inode)->orphan_meta_reserved = 1; | ||
2170 | reserve = 1; | ||
2171 | } | ||
2172 | spin_unlock(&root->orphan_lock); | ||
2013 | 2173 | ||
2014 | spin_unlock(&root->list_lock); | 2174 | if (block_rsv) |
2175 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2015 | 2176 | ||
2016 | /* | 2177 | /* grab metadata reservation from transaction handle */ |
2017 | * insert an orphan item to track this unlinked/truncated file | 2178 | if (reserve) { |
2018 | */ | 2179 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
2019 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | 2180 | BUG_ON(ret); |
2181 | } | ||
2020 | 2182 | ||
2021 | return ret; | 2183 | /* insert an orphan item to track this unlinked/truncated file */ |
2184 | if (insert >= 1) { | ||
2185 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
2186 | BUG_ON(ret); | ||
2187 | } | ||
2188 | |||
2189 | /* insert an orphan item to track subvolume contains orphan files */ | ||
2190 | if (insert >= 2) { | ||
2191 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2192 | root->root_key.objectid); | ||
2193 | BUG_ON(ret); | ||
2194 | } | ||
2195 | return 0; | ||
2022 | } | 2196 | } |
2023 | 2197 | ||
2024 | /* | 2198 | /* |
@@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2028 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | 2202 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) |
2029 | { | 2203 | { |
2030 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2204 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2205 | int delete_item = 0; | ||
2206 | int release_rsv = 0; | ||
2031 | int ret = 0; | 2207 | int ret = 0; |
2032 | 2208 | ||
2033 | spin_lock(&root->list_lock); | 2209 | spin_lock(&root->orphan_lock); |
2034 | 2210 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | |
2035 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2211 | list_del_init(&BTRFS_I(inode)->i_orphan); |
2036 | spin_unlock(&root->list_lock); | 2212 | delete_item = 1; |
2037 | return 0; | ||
2038 | } | 2213 | } |
2039 | 2214 | ||
2040 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2215 | if (BTRFS_I(inode)->orphan_meta_reserved) { |
2041 | if (!trans) { | 2216 | BTRFS_I(inode)->orphan_meta_reserved = 0; |
2042 | spin_unlock(&root->list_lock); | 2217 | release_rsv = 1; |
2043 | return 0; | ||
2044 | } | 2218 | } |
2219 | spin_unlock(&root->orphan_lock); | ||
2045 | 2220 | ||
2046 | spin_unlock(&root->list_lock); | 2221 | if (trans && delete_item) { |
2222 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | ||
2223 | BUG_ON(ret); | ||
2224 | } | ||
2047 | 2225 | ||
2048 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | 2226 | if (release_rsv) |
2227 | btrfs_orphan_release_metadata(inode); | ||
2049 | 2228 | ||
2050 | return ret; | 2229 | return 0; |
2051 | } | 2230 | } |
2052 | 2231 | ||
2053 | /* | 2232 | /* |
@@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2064 | struct inode *inode; | 2243 | struct inode *inode; |
2065 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2244 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2066 | 2245 | ||
2067 | if (!xchg(&root->clean_orphans, 0)) | 2246 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2068 | return; | 2247 | return; |
2069 | 2248 | ||
2070 | path = btrfs_alloc_path(); | 2249 | path = btrfs_alloc_path(); |
@@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2117 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2296 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2118 | found_key.offset = 0; | 2297 | found_key.offset = 0; |
2119 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2298 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2120 | if (IS_ERR(inode)) | 2299 | BUG_ON(IS_ERR(inode)); |
2121 | break; | ||
2122 | 2300 | ||
2123 | /* | 2301 | /* |
2124 | * add this inode to the orphan list so btrfs_orphan_del does | 2302 | * add this inode to the orphan list so btrfs_orphan_del does |
2125 | * the proper thing when we hit it | 2303 | * the proper thing when we hit it |
2126 | */ | 2304 | */ |
2127 | spin_lock(&root->list_lock); | 2305 | spin_lock(&root->orphan_lock); |
2128 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2306 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); |
2129 | spin_unlock(&root->list_lock); | 2307 | spin_unlock(&root->orphan_lock); |
2130 | 2308 | ||
2131 | /* | 2309 | /* |
2132 | * if this is a bad inode, means we actually succeeded in | 2310 | * if this is a bad inode, means we actually succeeded in |
@@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2135 | * do a destroy_inode | 2313 | * do a destroy_inode |
2136 | */ | 2314 | */ |
2137 | if (is_bad_inode(inode)) { | 2315 | if (is_bad_inode(inode)) { |
2138 | trans = btrfs_start_transaction(root, 1); | 2316 | trans = btrfs_start_transaction(root, 0); |
2139 | btrfs_orphan_del(trans, inode); | 2317 | btrfs_orphan_del(trans, inode); |
2140 | btrfs_end_transaction(trans, root); | 2318 | btrfs_end_transaction(trans, root); |
2141 | iput(inode); | 2319 | iput(inode); |
@@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2153 | /* this will do delete_inode and everything for us */ | 2331 | /* this will do delete_inode and everything for us */ |
2154 | iput(inode); | 2332 | iput(inode); |
2155 | } | 2333 | } |
2334 | btrfs_free_path(path); | ||
2335 | |||
2336 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | ||
2337 | |||
2338 | if (root->orphan_block_rsv) | ||
2339 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | ||
2340 | (u64)-1); | ||
2341 | |||
2342 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | ||
2343 | trans = btrfs_join_transaction(root, 1); | ||
2344 | btrfs_end_transaction(trans, root); | ||
2345 | } | ||
2156 | 2346 | ||
2157 | if (nr_unlink) | 2347 | if (nr_unlink) |
2158 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2348 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2159 | if (nr_truncate) | 2349 | if (nr_truncate) |
2160 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2350 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2161 | |||
2162 | btrfs_free_path(path); | ||
2163 | } | 2351 | } |
2164 | 2352 | ||
2165 | /* | 2353 | /* |
@@ -2478,29 +2666,201 @@ out: | |||
2478 | return ret; | 2666 | return ret; |
2479 | } | 2667 | } |
2480 | 2668 | ||
2481 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 2669 | /* helper to check if there is any shared block in the path */ |
2670 | static int check_path_shared(struct btrfs_root *root, | ||
2671 | struct btrfs_path *path) | ||
2672 | { | ||
2673 | struct extent_buffer *eb; | ||
2674 | int level; | ||
2675 | int ret; | ||
2676 | u64 refs; | ||
2677 | |||
2678 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
2679 | if (!path->nodes[level]) | ||
2680 | break; | ||
2681 | eb = path->nodes[level]; | ||
2682 | if (!btrfs_block_can_be_shared(root, eb)) | ||
2683 | continue; | ||
2684 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, | ||
2685 | &refs, NULL); | ||
2686 | if (refs > 1) | ||
2687 | return 1; | ||
2688 | } | ||
2689 | return 0; | ||
2690 | } | ||
2691 | |||
2692 | /* | ||
2693 | * helper to start transaction for unlink and rmdir. | ||
2694 | * | ||
2695 | * unlink and rmdir are special in btrfs, they do not always free space. | ||
2696 | * so in enospc case, we should make sure they will free space before | ||
2697 | * allowing them to use the global metadata reservation. | ||
2698 | */ | ||
2699 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | ||
2700 | struct dentry *dentry) | ||
2482 | { | 2701 | { |
2483 | struct btrfs_root *root; | ||
2484 | struct btrfs_trans_handle *trans; | 2702 | struct btrfs_trans_handle *trans; |
2703 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2704 | struct btrfs_path *path; | ||
2705 | struct btrfs_inode_ref *ref; | ||
2706 | struct btrfs_dir_item *di; | ||
2485 | struct inode *inode = dentry->d_inode; | 2707 | struct inode *inode = dentry->d_inode; |
2708 | u64 index; | ||
2709 | int check_link = 1; | ||
2710 | int err = -ENOSPC; | ||
2486 | int ret; | 2711 | int ret; |
2487 | unsigned long nr = 0; | ||
2488 | 2712 | ||
2489 | root = BTRFS_I(dir)->root; | 2713 | trans = btrfs_start_transaction(root, 10); |
2714 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | ||
2715 | return trans; | ||
2490 | 2716 | ||
2491 | /* | 2717 | if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
2492 | * 5 items for unlink inode | 2718 | return ERR_PTR(-ENOSPC); |
2493 | * 1 for orphan | 2719 | |
2494 | */ | 2720 | /* check if there is someone else holds reference */ |
2495 | ret = btrfs_reserve_metadata_space(root, 6); | 2721 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) |
2496 | if (ret) | 2722 | return ERR_PTR(-ENOSPC); |
2497 | return ret; | 2723 | |
2724 | if (atomic_read(&inode->i_count) > 2) | ||
2725 | return ERR_PTR(-ENOSPC); | ||
2726 | |||
2727 | if (xchg(&root->fs_info->enospc_unlink, 1)) | ||
2728 | return ERR_PTR(-ENOSPC); | ||
2498 | 2729 | ||
2499 | trans = btrfs_start_transaction(root, 1); | 2730 | path = btrfs_alloc_path(); |
2731 | if (!path) { | ||
2732 | root->fs_info->enospc_unlink = 0; | ||
2733 | return ERR_PTR(-ENOMEM); | ||
2734 | } | ||
2735 | |||
2736 | trans = btrfs_start_transaction(root, 0); | ||
2500 | if (IS_ERR(trans)) { | 2737 | if (IS_ERR(trans)) { |
2501 | btrfs_unreserve_metadata_space(root, 6); | 2738 | btrfs_free_path(path); |
2502 | return PTR_ERR(trans); | 2739 | root->fs_info->enospc_unlink = 0; |
2740 | return trans; | ||
2741 | } | ||
2742 | |||
2743 | path->skip_locking = 1; | ||
2744 | path->search_commit_root = 1; | ||
2745 | |||
2746 | ret = btrfs_lookup_inode(trans, root, path, | ||
2747 | &BTRFS_I(dir)->location, 0); | ||
2748 | if (ret < 0) { | ||
2749 | err = ret; | ||
2750 | goto out; | ||
2751 | } | ||
2752 | if (ret == 0) { | ||
2753 | if (check_path_shared(root, path)) | ||
2754 | goto out; | ||
2755 | } else { | ||
2756 | check_link = 0; | ||
2757 | } | ||
2758 | btrfs_release_path(root, path); | ||
2759 | |||
2760 | ret = btrfs_lookup_inode(trans, root, path, | ||
2761 | &BTRFS_I(inode)->location, 0); | ||
2762 | if (ret < 0) { | ||
2763 | err = ret; | ||
2764 | goto out; | ||
2765 | } | ||
2766 | if (ret == 0) { | ||
2767 | if (check_path_shared(root, path)) | ||
2768 | goto out; | ||
2769 | } else { | ||
2770 | check_link = 0; | ||
2771 | } | ||
2772 | btrfs_release_path(root, path); | ||
2773 | |||
2774 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
2775 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
2776 | inode->i_ino, (u64)-1, 0); | ||
2777 | if (ret < 0) { | ||
2778 | err = ret; | ||
2779 | goto out; | ||
2780 | } | ||
2781 | BUG_ON(ret == 0); | ||
2782 | if (check_path_shared(root, path)) | ||
2783 | goto out; | ||
2784 | btrfs_release_path(root, path); | ||
2785 | } | ||
2786 | |||
2787 | if (!check_link) { | ||
2788 | err = 0; | ||
2789 | goto out; | ||
2790 | } | ||
2791 | |||
2792 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
2793 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2794 | if (IS_ERR(di)) { | ||
2795 | err = PTR_ERR(di); | ||
2796 | goto out; | ||
2797 | } | ||
2798 | if (di) { | ||
2799 | if (check_path_shared(root, path)) | ||
2800 | goto out; | ||
2801 | } else { | ||
2802 | err = 0; | ||
2803 | goto out; | ||
2503 | } | 2804 | } |
2805 | btrfs_release_path(root, path); | ||
2806 | |||
2807 | ref = btrfs_lookup_inode_ref(trans, root, path, | ||
2808 | dentry->d_name.name, dentry->d_name.len, | ||
2809 | inode->i_ino, dir->i_ino, 0); | ||
2810 | if (IS_ERR(ref)) { | ||
2811 | err = PTR_ERR(ref); | ||
2812 | goto out; | ||
2813 | } | ||
2814 | BUG_ON(!ref); | ||
2815 | if (check_path_shared(root, path)) | ||
2816 | goto out; | ||
2817 | index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
2818 | btrfs_release_path(root, path); | ||
2819 | |||
2820 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, | ||
2821 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2822 | if (IS_ERR(di)) { | ||
2823 | err = PTR_ERR(di); | ||
2824 | goto out; | ||
2825 | } | ||
2826 | BUG_ON(ret == -ENOENT); | ||
2827 | if (check_path_shared(root, path)) | ||
2828 | goto out; | ||
2829 | |||
2830 | err = 0; | ||
2831 | out: | ||
2832 | btrfs_free_path(path); | ||
2833 | if (err) { | ||
2834 | btrfs_end_transaction(trans, root); | ||
2835 | root->fs_info->enospc_unlink = 0; | ||
2836 | return ERR_PTR(err); | ||
2837 | } | ||
2838 | |||
2839 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
2840 | return trans; | ||
2841 | } | ||
2842 | |||
2843 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
2844 | struct btrfs_root *root) | ||
2845 | { | ||
2846 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | ||
2847 | BUG_ON(!root->fs_info->enospc_unlink); | ||
2848 | root->fs_info->enospc_unlink = 0; | ||
2849 | } | ||
2850 | btrfs_end_transaction_throttle(trans, root); | ||
2851 | } | ||
2852 | |||
2853 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | ||
2854 | { | ||
2855 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2856 | struct btrfs_trans_handle *trans; | ||
2857 | struct inode *inode = dentry->d_inode; | ||
2858 | int ret; | ||
2859 | unsigned long nr = 0; | ||
2860 | |||
2861 | trans = __unlink_start_trans(dir, dentry); | ||
2862 | if (IS_ERR(trans)) | ||
2863 | return PTR_ERR(trans); | ||
2504 | 2864 | ||
2505 | btrfs_set_trans_block_group(trans, dir); | 2865 | btrfs_set_trans_block_group(trans, dir); |
2506 | 2866 | ||
@@ -2508,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2508 | 2868 | ||
2509 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2869 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2510 | dentry->d_name.name, dentry->d_name.len); | 2870 | dentry->d_name.name, dentry->d_name.len); |
2871 | BUG_ON(ret); | ||
2511 | 2872 | ||
2512 | if (inode->i_nlink == 0) | 2873 | if (inode->i_nlink == 0) { |
2513 | ret = btrfs_orphan_add(trans, inode); | 2874 | ret = btrfs_orphan_add(trans, inode); |
2875 | BUG_ON(ret); | ||
2876 | } | ||
2514 | 2877 | ||
2515 | nr = trans->blocks_used; | 2878 | nr = trans->blocks_used; |
2516 | 2879 | __unlink_end_trans(trans, root); | |
2517 | btrfs_end_transaction_throttle(trans, root); | ||
2518 | btrfs_unreserve_metadata_space(root, 6); | ||
2519 | btrfs_btree_balance_dirty(root, nr); | 2880 | btrfs_btree_balance_dirty(root, nr); |
2520 | return ret; | 2881 | return ret; |
2521 | } | 2882 | } |
@@ -2587,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2587 | { | 2948 | { |
2588 | struct inode *inode = dentry->d_inode; | 2949 | struct inode *inode = dentry->d_inode; |
2589 | int err = 0; | 2950 | int err = 0; |
2590 | int ret; | ||
2591 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2951 | struct btrfs_root *root = BTRFS_I(dir)->root; |
2592 | struct btrfs_trans_handle *trans; | 2952 | struct btrfs_trans_handle *trans; |
2593 | unsigned long nr = 0; | 2953 | unsigned long nr = 0; |
@@ -2596,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2596 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 2956 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
2597 | return -ENOTEMPTY; | 2957 | return -ENOTEMPTY; |
2598 | 2958 | ||
2599 | ret = btrfs_reserve_metadata_space(root, 5); | 2959 | trans = __unlink_start_trans(dir, dentry); |
2600 | if (ret) | 2960 | if (IS_ERR(trans)) |
2601 | return ret; | ||
2602 | |||
2603 | trans = btrfs_start_transaction(root, 1); | ||
2604 | if (IS_ERR(trans)) { | ||
2605 | btrfs_unreserve_metadata_space(root, 5); | ||
2606 | return PTR_ERR(trans); | 2961 | return PTR_ERR(trans); |
2607 | } | ||
2608 | 2962 | ||
2609 | btrfs_set_trans_block_group(trans, dir); | 2963 | btrfs_set_trans_block_group(trans, dir); |
2610 | 2964 | ||
@@ -2627,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2627 | btrfs_i_size_write(inode, 0); | 2981 | btrfs_i_size_write(inode, 0); |
2628 | out: | 2982 | out: |
2629 | nr = trans->blocks_used; | 2983 | nr = trans->blocks_used; |
2630 | ret = btrfs_end_transaction_throttle(trans, root); | 2984 | __unlink_end_trans(trans, root); |
2631 | btrfs_unreserve_metadata_space(root, 5); | ||
2632 | btrfs_btree_balance_dirty(root, nr); | 2985 | btrfs_btree_balance_dirty(root, nr); |
2633 | 2986 | ||
2634 | if (ret && !err) | ||
2635 | err = ret; | ||
2636 | return err; | 2987 | return err; |
2637 | } | 2988 | } |
2638 | 2989 | ||
@@ -3029,6 +3380,7 @@ out: | |||
3029 | if (pending_del_nr) { | 3380 | if (pending_del_nr) { |
3030 | ret = btrfs_del_items(trans, root, path, pending_del_slot, | 3381 | ret = btrfs_del_items(trans, root, path, pending_del_slot, |
3031 | pending_del_nr); | 3382 | pending_del_nr); |
3383 | BUG_ON(ret); | ||
3032 | } | 3384 | } |
3033 | btrfs_free_path(path); | 3385 | btrfs_free_path(path); |
3034 | return err; | 3386 | return err; |
@@ -3056,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3056 | 3408 | ||
3057 | if ((offset & (blocksize - 1)) == 0) | 3409 | if ((offset & (blocksize - 1)) == 0) |
3058 | goto out; | 3410 | goto out; |
3059 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 3411 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
3060 | if (ret) | ||
3061 | goto out; | ||
3062 | |||
3063 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
3064 | if (ret) | 3412 | if (ret) |
3065 | goto out; | 3413 | goto out; |
3066 | 3414 | ||
@@ -3068,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3068 | again: | 3416 | again: |
3069 | page = grab_cache_page(mapping, index); | 3417 | page = grab_cache_page(mapping, index); |
3070 | if (!page) { | 3418 | if (!page) { |
3071 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3419 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3072 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3073 | goto out; | 3420 | goto out; |
3074 | } | 3421 | } |
3075 | 3422 | ||
@@ -3132,8 +3479,7 @@ again: | |||
3132 | 3479 | ||
3133 | out_unlock: | 3480 | out_unlock: |
3134 | if (ret) | 3481 | if (ret) |
3135 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3482 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3136 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3137 | unlock_page(page); | 3483 | unlock_page(page); |
3138 | page_cache_release(page); | 3484 | page_cache_release(page); |
3139 | out: | 3485 | out: |
@@ -3145,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3145 | struct btrfs_trans_handle *trans; | 3491 | struct btrfs_trans_handle *trans; |
3146 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3492 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3147 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3493 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3148 | struct extent_map *em; | 3494 | struct extent_map *em = NULL; |
3149 | struct extent_state *cached_state = NULL; | 3495 | struct extent_state *cached_state = NULL; |
3150 | u64 mask = root->sectorsize - 1; | 3496 | u64 mask = root->sectorsize - 1; |
3151 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3497 | u64 hole_start = (inode->i_size + mask) & ~mask; |
@@ -3183,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3183 | u64 hint_byte = 0; | 3529 | u64 hint_byte = 0; |
3184 | hole_size = last_byte - cur_offset; | 3530 | hole_size = last_byte - cur_offset; |
3185 | 3531 | ||
3186 | err = btrfs_reserve_metadata_space(root, 2); | 3532 | trans = btrfs_start_transaction(root, 2); |
3187 | if (err) | 3533 | if (IS_ERR(trans)) { |
3534 | err = PTR_ERR(trans); | ||
3188 | break; | 3535 | break; |
3189 | 3536 | } | |
3190 | trans = btrfs_start_transaction(root, 1); | ||
3191 | btrfs_set_trans_block_group(trans, inode); | 3537 | btrfs_set_trans_block_group(trans, inode); |
3192 | 3538 | ||
3193 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3539 | err = btrfs_drop_extents(trans, inode, cur_offset, |
@@ -3205,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3205 | last_byte - 1, 0); | 3551 | last_byte - 1, 0); |
3206 | 3552 | ||
3207 | btrfs_end_transaction(trans, root); | 3553 | btrfs_end_transaction(trans, root); |
3208 | btrfs_unreserve_metadata_space(root, 2); | ||
3209 | } | 3554 | } |
3210 | free_extent_map(em); | 3555 | free_extent_map(em); |
3556 | em = NULL; | ||
3211 | cur_offset = last_byte; | 3557 | cur_offset = last_byte; |
3212 | if (cur_offset >= block_end) | 3558 | if (cur_offset >= block_end) |
3213 | break; | 3559 | break; |
3214 | } | 3560 | } |
3215 | 3561 | ||
3562 | free_extent_map(em); | ||
3216 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, | 3563 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, |
3217 | GFP_NOFS); | 3564 | GFP_NOFS); |
3218 | return err; | 3565 | return err; |
@@ -3239,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3239 | } | 3586 | } |
3240 | } | 3587 | } |
3241 | 3588 | ||
3242 | ret = btrfs_reserve_metadata_space(root, 1); | 3589 | trans = btrfs_start_transaction(root, 5); |
3243 | if (ret) | 3590 | if (IS_ERR(trans)) |
3244 | return ret; | 3591 | return PTR_ERR(trans); |
3245 | 3592 | ||
3246 | trans = btrfs_start_transaction(root, 1); | ||
3247 | btrfs_set_trans_block_group(trans, inode); | 3593 | btrfs_set_trans_block_group(trans, inode); |
3248 | 3594 | ||
3249 | ret = btrfs_orphan_add(trans, inode); | 3595 | ret = btrfs_orphan_add(trans, inode); |
@@ -3251,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3251 | 3597 | ||
3252 | nr = trans->blocks_used; | 3598 | nr = trans->blocks_used; |
3253 | btrfs_end_transaction(trans, root); | 3599 | btrfs_end_transaction(trans, root); |
3254 | btrfs_unreserve_metadata_space(root, 1); | ||
3255 | btrfs_btree_balance_dirty(root, nr); | 3600 | btrfs_btree_balance_dirty(root, nr); |
3256 | 3601 | ||
3257 | if (attr->ia_size > inode->i_size) { | 3602 | if (attr->ia_size > inode->i_size) { |
@@ -3264,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3264 | i_size_write(inode, attr->ia_size); | 3609 | i_size_write(inode, attr->ia_size); |
3265 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3610 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
3266 | 3611 | ||
3267 | trans = btrfs_start_transaction(root, 1); | 3612 | trans = btrfs_start_transaction(root, 0); |
3613 | BUG_ON(IS_ERR(trans)); | ||
3268 | btrfs_set_trans_block_group(trans, inode); | 3614 | btrfs_set_trans_block_group(trans, inode); |
3615 | trans->block_rsv = root->orphan_block_rsv; | ||
3616 | BUG_ON(!trans->block_rsv); | ||
3269 | 3617 | ||
3270 | ret = btrfs_update_inode(trans, root, inode); | 3618 | ret = btrfs_update_inode(trans, root, inode); |
3271 | BUG_ON(ret); | 3619 | BUG_ON(ret); |
@@ -3345,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode) | |||
3345 | btrfs_i_size_write(inode, 0); | 3693 | btrfs_i_size_write(inode, 0); |
3346 | 3694 | ||
3347 | while (1) { | 3695 | while (1) { |
3348 | trans = btrfs_start_transaction(root, 1); | 3696 | trans = btrfs_start_transaction(root, 0); |
3697 | BUG_ON(IS_ERR(trans)); | ||
3349 | btrfs_set_trans_block_group(trans, inode); | 3698 | btrfs_set_trans_block_group(trans, inode); |
3350 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3699 | trans->block_rsv = root->orphan_block_rsv; |
3700 | |||
3701 | ret = btrfs_block_rsv_check(trans, root, | ||
3702 | root->orphan_block_rsv, 0, 5); | ||
3703 | if (ret) { | ||
3704 | BUG_ON(ret != -EAGAIN); | ||
3705 | ret = btrfs_commit_transaction(trans, root); | ||
3706 | BUG_ON(ret); | ||
3707 | continue; | ||
3708 | } | ||
3351 | 3709 | ||
3710 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | ||
3352 | if (ret != -EAGAIN) | 3711 | if (ret != -EAGAIN) |
3353 | break; | 3712 | break; |
3354 | 3713 | ||
@@ -3356,6 +3715,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
3356 | btrfs_end_transaction(trans, root); | 3715 | btrfs_end_transaction(trans, root); |
3357 | trans = NULL; | 3716 | trans = NULL; |
3358 | btrfs_btree_balance_dirty(root, nr); | 3717 | btrfs_btree_balance_dirty(root, nr); |
3718 | |||
3359 | } | 3719 | } |
3360 | 3720 | ||
3361 | if (ret == 0) { | 3721 | if (ret == 0) { |
@@ -3596,40 +3956,10 @@ again: | |||
3596 | return 0; | 3956 | return 0; |
3597 | } | 3957 | } |
3598 | 3958 | ||
3599 | static noinline void init_btrfs_i(struct inode *inode) | ||
3600 | { | ||
3601 | struct btrfs_inode *bi = BTRFS_I(inode); | ||
3602 | |||
3603 | bi->generation = 0; | ||
3604 | bi->sequence = 0; | ||
3605 | bi->last_trans = 0; | ||
3606 | bi->last_sub_trans = 0; | ||
3607 | bi->logged_trans = 0; | ||
3608 | bi->delalloc_bytes = 0; | ||
3609 | bi->reserved_bytes = 0; | ||
3610 | bi->disk_i_size = 0; | ||
3611 | bi->flags = 0; | ||
3612 | bi->index_cnt = (u64)-1; | ||
3613 | bi->last_unlink_trans = 0; | ||
3614 | bi->ordered_data_close = 0; | ||
3615 | bi->force_compress = 0; | ||
3616 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | ||
3617 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | ||
3618 | inode->i_mapping, GFP_NOFS); | ||
3619 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | ||
3620 | inode->i_mapping, GFP_NOFS); | ||
3621 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | ||
3622 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
3623 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
3624 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
3625 | mutex_init(&BTRFS_I(inode)->log_mutex); | ||
3626 | } | ||
3627 | |||
3628 | static int btrfs_init_locked_inode(struct inode *inode, void *p) | 3959 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
3629 | { | 3960 | { |
3630 | struct btrfs_iget_args *args = p; | 3961 | struct btrfs_iget_args *args = p; |
3631 | inode->i_ino = args->ino; | 3962 | inode->i_ino = args->ino; |
3632 | init_btrfs_i(inode); | ||
3633 | BTRFS_I(inode)->root = args->root; | 3963 | BTRFS_I(inode)->root = args->root; |
3634 | btrfs_set_inode_space_info(args->root, inode); | 3964 | btrfs_set_inode_space_info(args->root, inode); |
3635 | return 0; | 3965 | return 0; |
@@ -3692,8 +4022,6 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
3692 | if (!inode) | 4022 | if (!inode) |
3693 | return ERR_PTR(-ENOMEM); | 4023 | return ERR_PTR(-ENOMEM); |
3694 | 4024 | ||
3695 | init_btrfs_i(inode); | ||
3696 | |||
3697 | BTRFS_I(inode)->root = root; | 4025 | BTRFS_I(inode)->root = root; |
3698 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4026 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
3699 | BTRFS_I(inode)->dummy_inode = 1; | 4027 | BTRFS_I(inode)->dummy_inode = 1; |
@@ -3950,7 +4278,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
3950 | struct btrfs_trans_handle *trans; | 4278 | struct btrfs_trans_handle *trans; |
3951 | int ret = 0; | 4279 | int ret = 0; |
3952 | 4280 | ||
3953 | if (root->fs_info->btree_inode == inode) | 4281 | if (BTRFS_I(inode)->dummy_inode) |
3954 | return 0; | 4282 | return 0; |
3955 | 4283 | ||
3956 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4284 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -3971,10 +4299,38 @@ void btrfs_dirty_inode(struct inode *inode) | |||
3971 | { | 4299 | { |
3972 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4300 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3973 | struct btrfs_trans_handle *trans; | 4301 | struct btrfs_trans_handle *trans; |
4302 | int ret; | ||
4303 | |||
4304 | if (BTRFS_I(inode)->dummy_inode) | ||
4305 | return; | ||
3974 | 4306 | ||
3975 | trans = btrfs_join_transaction(root, 1); | 4307 | trans = btrfs_join_transaction(root, 1); |
3976 | btrfs_set_trans_block_group(trans, inode); | 4308 | btrfs_set_trans_block_group(trans, inode); |
3977 | btrfs_update_inode(trans, root, inode); | 4309 | |
4310 | ret = btrfs_update_inode(trans, root, inode); | ||
4311 | if (ret && ret == -ENOSPC) { | ||
4312 | /* whoops, lets try again with the full transaction */ | ||
4313 | btrfs_end_transaction(trans, root); | ||
4314 | trans = btrfs_start_transaction(root, 1); | ||
4315 | if (IS_ERR(trans)) { | ||
4316 | if (printk_ratelimit()) { | ||
4317 | printk(KERN_ERR "btrfs: fail to " | ||
4318 | "dirty inode %lu error %ld\n", | ||
4319 | inode->i_ino, PTR_ERR(trans)); | ||
4320 | } | ||
4321 | return; | ||
4322 | } | ||
4323 | btrfs_set_trans_block_group(trans, inode); | ||
4324 | |||
4325 | ret = btrfs_update_inode(trans, root, inode); | ||
4326 | if (ret) { | ||
4327 | if (printk_ratelimit()) { | ||
4328 | printk(KERN_ERR "btrfs: fail to " | ||
4329 | "dirty inode %lu error %d\n", | ||
4330 | inode->i_ino, ret); | ||
4331 | } | ||
4332 | } | ||
4333 | } | ||
3978 | btrfs_end_transaction(trans, root); | 4334 | btrfs_end_transaction(trans, root); |
3979 | } | 4335 | } |
3980 | 4336 | ||
@@ -4092,7 +4448,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4092 | * btrfs_get_inode_index_count has an explanation for the magic | 4448 | * btrfs_get_inode_index_count has an explanation for the magic |
4093 | * number | 4449 | * number |
4094 | */ | 4450 | */ |
4095 | init_btrfs_i(inode); | ||
4096 | BTRFS_I(inode)->index_cnt = 2; | 4451 | BTRFS_I(inode)->index_cnt = 2; |
4097 | BTRFS_I(inode)->root = root; | 4452 | BTRFS_I(inode)->root = root; |
4098 | BTRFS_I(inode)->generation = trans->transid; | 4453 | BTRFS_I(inode)->generation = trans->transid; |
@@ -4247,26 +4602,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4247 | if (!new_valid_dev(rdev)) | 4602 | if (!new_valid_dev(rdev)) |
4248 | return -EINVAL; | 4603 | return -EINVAL; |
4249 | 4604 | ||
4605 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4606 | if (err) | ||
4607 | return err; | ||
4608 | |||
4250 | /* | 4609 | /* |
4251 | * 2 for inode item and ref | 4610 | * 2 for inode item and ref |
4252 | * 2 for dir items | 4611 | * 2 for dir items |
4253 | * 1 for xattr if selinux is on | 4612 | * 1 for xattr if selinux is on |
4254 | */ | 4613 | */ |
4255 | err = btrfs_reserve_metadata_space(root, 5); | 4614 | trans = btrfs_start_transaction(root, 5); |
4256 | if (err) | 4615 | if (IS_ERR(trans)) |
4257 | return err; | 4616 | return PTR_ERR(trans); |
4258 | 4617 | ||
4259 | trans = btrfs_start_transaction(root, 1); | ||
4260 | if (!trans) | ||
4261 | goto fail; | ||
4262 | btrfs_set_trans_block_group(trans, dir); | 4618 | btrfs_set_trans_block_group(trans, dir); |
4263 | 4619 | ||
4264 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4265 | if (err) { | ||
4266 | err = -ENOSPC; | ||
4267 | goto out_unlock; | ||
4268 | } | ||
4269 | |||
4270 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4620 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4271 | dentry->d_name.len, | 4621 | dentry->d_name.len, |
4272 | dentry->d_parent->d_inode->i_ino, objectid, | 4622 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -4295,13 +4645,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4295 | out_unlock: | 4645 | out_unlock: |
4296 | nr = trans->blocks_used; | 4646 | nr = trans->blocks_used; |
4297 | btrfs_end_transaction_throttle(trans, root); | 4647 | btrfs_end_transaction_throttle(trans, root); |
4298 | fail: | 4648 | btrfs_btree_balance_dirty(root, nr); |
4299 | btrfs_unreserve_metadata_space(root, 5); | ||
4300 | if (drop_inode) { | 4649 | if (drop_inode) { |
4301 | inode_dec_link_count(inode); | 4650 | inode_dec_link_count(inode); |
4302 | iput(inode); | 4651 | iput(inode); |
4303 | } | 4652 | } |
4304 | btrfs_btree_balance_dirty(root, nr); | ||
4305 | return err; | 4653 | return err; |
4306 | } | 4654 | } |
4307 | 4655 | ||
@@ -4311,32 +4659,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4311 | struct btrfs_trans_handle *trans; | 4659 | struct btrfs_trans_handle *trans; |
4312 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4660 | struct btrfs_root *root = BTRFS_I(dir)->root; |
4313 | struct inode *inode = NULL; | 4661 | struct inode *inode = NULL; |
4314 | int err; | ||
4315 | int drop_inode = 0; | 4662 | int drop_inode = 0; |
4663 | int err; | ||
4316 | unsigned long nr = 0; | 4664 | unsigned long nr = 0; |
4317 | u64 objectid; | 4665 | u64 objectid; |
4318 | u64 index = 0; | 4666 | u64 index = 0; |
4319 | 4667 | ||
4668 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4669 | if (err) | ||
4670 | return err; | ||
4320 | /* | 4671 | /* |
4321 | * 2 for inode item and ref | 4672 | * 2 for inode item and ref |
4322 | * 2 for dir items | 4673 | * 2 for dir items |
4323 | * 1 for xattr if selinux is on | 4674 | * 1 for xattr if selinux is on |
4324 | */ | 4675 | */ |
4325 | err = btrfs_reserve_metadata_space(root, 5); | 4676 | trans = btrfs_start_transaction(root, 5); |
4326 | if (err) | 4677 | if (IS_ERR(trans)) |
4327 | return err; | 4678 | return PTR_ERR(trans); |
4328 | 4679 | ||
4329 | trans = btrfs_start_transaction(root, 1); | ||
4330 | if (!trans) | ||
4331 | goto fail; | ||
4332 | btrfs_set_trans_block_group(trans, dir); | 4680 | btrfs_set_trans_block_group(trans, dir); |
4333 | 4681 | ||
4334 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4335 | if (err) { | ||
4336 | err = -ENOSPC; | ||
4337 | goto out_unlock; | ||
4338 | } | ||
4339 | |||
4340 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4682 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4341 | dentry->d_name.len, | 4683 | dentry->d_name.len, |
4342 | dentry->d_parent->d_inode->i_ino, | 4684 | dentry->d_parent->d_inode->i_ino, |
@@ -4368,8 +4710,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4368 | out_unlock: | 4710 | out_unlock: |
4369 | nr = trans->blocks_used; | 4711 | nr = trans->blocks_used; |
4370 | btrfs_end_transaction_throttle(trans, root); | 4712 | btrfs_end_transaction_throttle(trans, root); |
4371 | fail: | ||
4372 | btrfs_unreserve_metadata_space(root, 5); | ||
4373 | if (drop_inode) { | 4713 | if (drop_inode) { |
4374 | inode_dec_link_count(inode); | 4714 | inode_dec_link_count(inode); |
4375 | iput(inode); | 4715 | iput(inode); |
@@ -4396,21 +4736,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4396 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4736 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4397 | return -EPERM; | 4737 | return -EPERM; |
4398 | 4738 | ||
4399 | /* | ||
4400 | * 1 item for inode ref | ||
4401 | * 2 items for dir items | ||
4402 | */ | ||
4403 | err = btrfs_reserve_metadata_space(root, 3); | ||
4404 | if (err) | ||
4405 | return err; | ||
4406 | |||
4407 | btrfs_inc_nlink(inode); | 4739 | btrfs_inc_nlink(inode); |
4408 | 4740 | ||
4409 | err = btrfs_set_inode_index(dir, &index); | 4741 | err = btrfs_set_inode_index(dir, &index); |
4410 | if (err) | 4742 | if (err) |
4411 | goto fail; | 4743 | goto fail; |
4412 | 4744 | ||
4413 | trans = btrfs_start_transaction(root, 1); | 4745 | /* |
4746 | * 1 item for inode ref | ||
4747 | * 2 items for dir items | ||
4748 | */ | ||
4749 | trans = btrfs_start_transaction(root, 3); | ||
4750 | if (IS_ERR(trans)) { | ||
4751 | err = PTR_ERR(trans); | ||
4752 | goto fail; | ||
4753 | } | ||
4414 | 4754 | ||
4415 | btrfs_set_trans_block_group(trans, dir); | 4755 | btrfs_set_trans_block_group(trans, dir); |
4416 | atomic_inc(&inode->i_count); | 4756 | atomic_inc(&inode->i_count); |
@@ -4429,7 +4769,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4429 | nr = trans->blocks_used; | 4769 | nr = trans->blocks_used; |
4430 | btrfs_end_transaction_throttle(trans, root); | 4770 | btrfs_end_transaction_throttle(trans, root); |
4431 | fail: | 4771 | fail: |
4432 | btrfs_unreserve_metadata_space(root, 3); | ||
4433 | if (drop_inode) { | 4772 | if (drop_inode) { |
4434 | inode_dec_link_count(inode); | 4773 | inode_dec_link_count(inode); |
4435 | iput(inode); | 4774 | iput(inode); |
@@ -4449,28 +4788,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4449 | u64 index = 0; | 4788 | u64 index = 0; |
4450 | unsigned long nr = 1; | 4789 | unsigned long nr = 1; |
4451 | 4790 | ||
4791 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4792 | if (err) | ||
4793 | return err; | ||
4794 | |||
4452 | /* | 4795 | /* |
4453 | * 2 items for inode and ref | 4796 | * 2 items for inode and ref |
4454 | * 2 items for dir items | 4797 | * 2 items for dir items |
4455 | * 1 for xattr if selinux is on | 4798 | * 1 for xattr if selinux is on |
4456 | */ | 4799 | */ |
4457 | err = btrfs_reserve_metadata_space(root, 5); | 4800 | trans = btrfs_start_transaction(root, 5); |
4458 | if (err) | 4801 | if (IS_ERR(trans)) |
4459 | return err; | 4802 | return PTR_ERR(trans); |
4460 | |||
4461 | trans = btrfs_start_transaction(root, 1); | ||
4462 | if (!trans) { | ||
4463 | err = -ENOMEM; | ||
4464 | goto out_unlock; | ||
4465 | } | ||
4466 | btrfs_set_trans_block_group(trans, dir); | 4803 | btrfs_set_trans_block_group(trans, dir); |
4467 | 4804 | ||
4468 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4469 | if (err) { | ||
4470 | err = -ENOSPC; | ||
4471 | goto out_fail; | ||
4472 | } | ||
4473 | |||
4474 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4805 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4475 | dentry->d_name.len, | 4806 | dentry->d_name.len, |
4476 | dentry->d_parent->d_inode->i_ino, objectid, | 4807 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -4510,9 +4841,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4510 | out_fail: | 4841 | out_fail: |
4511 | nr = trans->blocks_used; | 4842 | nr = trans->blocks_used; |
4512 | btrfs_end_transaction_throttle(trans, root); | 4843 | btrfs_end_transaction_throttle(trans, root); |
4513 | |||
4514 | out_unlock: | ||
4515 | btrfs_unreserve_metadata_space(root, 5); | ||
4516 | if (drop_on_err) | 4844 | if (drop_on_err) |
4517 | iput(inode); | 4845 | iput(inode); |
4518 | btrfs_btree_balance_dirty(root, nr); | 4846 | btrfs_btree_balance_dirty(root, nr); |
@@ -4770,6 +5098,7 @@ again: | |||
4770 | } | 5098 | } |
4771 | flush_dcache_page(page); | 5099 | flush_dcache_page(page); |
4772 | } else if (create && PageUptodate(page)) { | 5100 | } else if (create && PageUptodate(page)) { |
5101 | WARN_ON(1); | ||
4773 | if (!trans) { | 5102 | if (!trans) { |
4774 | kunmap(page); | 5103 | kunmap(page); |
4775 | free_extent_map(em); | 5104 | free_extent_map(em); |
@@ -4866,11 +5195,651 @@ out: | |||
4866 | return em; | 5195 | return em; |
4867 | } | 5196 | } |
4868 | 5197 | ||
5198 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
5199 | u64 start, u64 len) | ||
5200 | { | ||
5201 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5202 | struct btrfs_trans_handle *trans; | ||
5203 | struct extent_map *em; | ||
5204 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
5205 | struct btrfs_key ins; | ||
5206 | u64 alloc_hint; | ||
5207 | int ret; | ||
5208 | |||
5209 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5210 | |||
5211 | trans = btrfs_join_transaction(root, 0); | ||
5212 | if (!trans) | ||
5213 | return ERR_PTR(-ENOMEM); | ||
5214 | |||
5215 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5216 | |||
5217 | alloc_hint = get_extent_allocation_hint(inode, start, len); | ||
5218 | ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, | ||
5219 | alloc_hint, (u64)-1, &ins, 1); | ||
5220 | if (ret) { | ||
5221 | em = ERR_PTR(ret); | ||
5222 | goto out; | ||
5223 | } | ||
5224 | |||
5225 | em = alloc_extent_map(GFP_NOFS); | ||
5226 | if (!em) { | ||
5227 | em = ERR_PTR(-ENOMEM); | ||
5228 | goto out; | ||
5229 | } | ||
5230 | |||
5231 | em->start = start; | ||
5232 | em->orig_start = em->start; | ||
5233 | em->len = ins.offset; | ||
5234 | |||
5235 | em->block_start = ins.objectid; | ||
5236 | em->block_len = ins.offset; | ||
5237 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
5238 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
5239 | |||
5240 | while (1) { | ||
5241 | write_lock(&em_tree->lock); | ||
5242 | ret = add_extent_mapping(em_tree, em); | ||
5243 | write_unlock(&em_tree->lock); | ||
5244 | if (ret != -EEXIST) | ||
5245 | break; | ||
5246 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
5247 | } | ||
5248 | |||
5249 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
5250 | ins.offset, ins.offset, 0); | ||
5251 | if (ret) { | ||
5252 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
5253 | em = ERR_PTR(ret); | ||
5254 | } | ||
5255 | out: | ||
5256 | btrfs_end_transaction(trans, root); | ||
5257 | return em; | ||
5258 | } | ||
5259 | |||
5260 | /* | ||
5261 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | ||
5262 | * block must be cow'd | ||
5263 | */ | ||
5264 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | ||
5265 | struct inode *inode, u64 offset, u64 len) | ||
5266 | { | ||
5267 | struct btrfs_path *path; | ||
5268 | int ret; | ||
5269 | struct extent_buffer *leaf; | ||
5270 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5271 | struct btrfs_file_extent_item *fi; | ||
5272 | struct btrfs_key key; | ||
5273 | u64 disk_bytenr; | ||
5274 | u64 backref_offset; | ||
5275 | u64 extent_end; | ||
5276 | u64 num_bytes; | ||
5277 | int slot; | ||
5278 | int found_type; | ||
5279 | |||
5280 | path = btrfs_alloc_path(); | ||
5281 | if (!path) | ||
5282 | return -ENOMEM; | ||
5283 | |||
5284 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
5285 | offset, 0); | ||
5286 | if (ret < 0) | ||
5287 | goto out; | ||
5288 | |||
5289 | slot = path->slots[0]; | ||
5290 | if (ret == 1) { | ||
5291 | if (slot == 0) { | ||
5292 | /* can't find the item, must cow */ | ||
5293 | ret = 0; | ||
5294 | goto out; | ||
5295 | } | ||
5296 | slot--; | ||
5297 | } | ||
5298 | ret = 0; | ||
5299 | leaf = path->nodes[0]; | ||
5300 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
5301 | if (key.objectid != inode->i_ino || | ||
5302 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
5303 | /* not our file or wrong item type, must cow */ | ||
5304 | goto out; | ||
5305 | } | ||
5306 | |||
5307 | if (key.offset > offset) { | ||
5308 | /* Wrong offset, must cow */ | ||
5309 | goto out; | ||
5310 | } | ||
5311 | |||
5312 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
5313 | found_type = btrfs_file_extent_type(leaf, fi); | ||
5314 | if (found_type != BTRFS_FILE_EXTENT_REG && | ||
5315 | found_type != BTRFS_FILE_EXTENT_PREALLOC) { | ||
5316 | /* not a regular extent, must cow */ | ||
5317 | goto out; | ||
5318 | } | ||
5319 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
5320 | backref_offset = btrfs_file_extent_offset(leaf, fi); | ||
5321 | |||
5322 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | ||
5323 | if (extent_end < offset + len) { | ||
5324 | /* extent doesn't include our full range, must cow */ | ||
5325 | goto out; | ||
5326 | } | ||
5327 | |||
5328 | if (btrfs_extent_readonly(root, disk_bytenr)) | ||
5329 | goto out; | ||
5330 | |||
5331 | /* | ||
5332 | * look for other files referencing this extent, if we | ||
5333 | * find any we must cow | ||
5334 | */ | ||
5335 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | ||
5336 | key.offset - backref_offset, disk_bytenr)) | ||
5337 | goto out; | ||
5338 | |||
5339 | /* | ||
5340 | * adjust disk_bytenr and num_bytes to cover just the bytes | ||
5341 | * in this extent we are about to write. If there | ||
5342 | * are any csums in that range we have to cow in order | ||
5343 | * to keep the csums correct | ||
5344 | */ | ||
5345 | disk_bytenr += backref_offset; | ||
5346 | disk_bytenr += offset - key.offset; | ||
5347 | num_bytes = min(offset + len, extent_end) - offset; | ||
5348 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | ||
5349 | goto out; | ||
5350 | /* | ||
5351 | * all of the above have passed, it is safe to overwrite this extent | ||
5352 | * without cow | ||
5353 | */ | ||
5354 | ret = 1; | ||
5355 | out: | ||
5356 | btrfs_free_path(path); | ||
5357 | return ret; | ||
5358 | } | ||
5359 | |||
5360 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | ||
5361 | struct buffer_head *bh_result, int create) | ||
5362 | { | ||
5363 | struct extent_map *em; | ||
5364 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5365 | u64 start = iblock << inode->i_blkbits; | ||
5366 | u64 len = bh_result->b_size; | ||
5367 | struct btrfs_trans_handle *trans; | ||
5368 | |||
5369 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
5370 | if (IS_ERR(em)) | ||
5371 | return PTR_ERR(em); | ||
5372 | |||
5373 | /* | ||
5374 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | ||
5375 | * io. INLINE is special, and we could probably kludge it in here, but | ||
5376 | * it's still buffered so for safety lets just fall back to the generic | ||
5377 | * buffered path. | ||
5378 | * | ||
5379 | * For COMPRESSED we _have_ to read the entire extent in so we can | ||
5380 | * decompress it, so there will be buffering required no matter what we | ||
5381 | * do, so go ahead and fallback to buffered. | ||
5382 | * | ||
5383 | * We return -ENOTBLK because thats what makes DIO go ahead and go back | ||
5384 | * to buffered IO. Don't blame me, this is the price we pay for using | ||
5385 | * the generic code. | ||
5386 | */ | ||
5387 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | ||
5388 | em->block_start == EXTENT_MAP_INLINE) { | ||
5389 | free_extent_map(em); | ||
5390 | return -ENOTBLK; | ||
5391 | } | ||
5392 | |||
5393 | /* Just a good old fashioned hole, return */ | ||
5394 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | ||
5395 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
5396 | free_extent_map(em); | ||
5397 | /* DIO will do one hole at a time, so just unlock a sector */ | ||
5398 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | ||
5399 | start + root->sectorsize - 1, GFP_NOFS); | ||
5400 | return 0; | ||
5401 | } | ||
5402 | |||
5403 | /* | ||
5404 | * We don't allocate a new extent in the following cases | ||
5405 | * | ||
5406 | * 1) The inode is marked as NODATACOW. In this case we'll just use the | ||
5407 | * existing extent. | ||
5408 | * 2) The extent is marked as PREALLOC. We're good to go here and can | ||
5409 | * just use the extent. | ||
5410 | * | ||
5411 | */ | ||
5412 | if (!create) { | ||
5413 | len = em->len - (start - em->start); | ||
5414 | goto map; | ||
5415 | } | ||
5416 | |||
5417 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | ||
5418 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
5419 | em->block_start != EXTENT_MAP_HOLE)) { | ||
5420 | int type; | ||
5421 | int ret; | ||
5422 | u64 block_start; | ||
5423 | |||
5424 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5425 | type = BTRFS_ORDERED_PREALLOC; | ||
5426 | else | ||
5427 | type = BTRFS_ORDERED_NOCOW; | ||
5428 | len = min(len, em->len - (start - em->start)); | ||
5429 | block_start = em->block_start + (start - em->start); | ||
5430 | |||
5431 | /* | ||
5432 | * we're not going to log anything, but we do need | ||
5433 | * to make sure the current transaction stays open | ||
5434 | * while we look for nocow cross refs | ||
5435 | */ | ||
5436 | trans = btrfs_join_transaction(root, 0); | ||
5437 | if (!trans) | ||
5438 | goto must_cow; | ||
5439 | |||
5440 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | ||
5441 | ret = btrfs_add_ordered_extent_dio(inode, start, | ||
5442 | block_start, len, len, type); | ||
5443 | btrfs_end_transaction(trans, root); | ||
5444 | if (ret) { | ||
5445 | free_extent_map(em); | ||
5446 | return ret; | ||
5447 | } | ||
5448 | goto unlock; | ||
5449 | } | ||
5450 | btrfs_end_transaction(trans, root); | ||
5451 | } | ||
5452 | must_cow: | ||
5453 | /* | ||
5454 | * this will cow the extent, reset the len in case we changed | ||
5455 | * it above | ||
5456 | */ | ||
5457 | len = bh_result->b_size; | ||
5458 | free_extent_map(em); | ||
5459 | em = btrfs_new_extent_direct(inode, start, len); | ||
5460 | if (IS_ERR(em)) | ||
5461 | return PTR_ERR(em); | ||
5462 | len = min(len, em->len - (start - em->start)); | ||
5463 | unlock: | ||
5464 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5465 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
5466 | 0, NULL, GFP_NOFS); | ||
5467 | map: | ||
5468 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | ||
5469 | inode->i_blkbits; | ||
5470 | bh_result->b_size = len; | ||
5471 | bh_result->b_bdev = em->bdev; | ||
5472 | set_buffer_mapped(bh_result); | ||
5473 | if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5474 | set_buffer_new(bh_result); | ||
5475 | |||
5476 | free_extent_map(em); | ||
5477 | |||
5478 | return 0; | ||
5479 | } | ||
5480 | |||
5481 | struct btrfs_dio_private { | ||
5482 | struct inode *inode; | ||
5483 | u64 logical_offset; | ||
5484 | u64 disk_bytenr; | ||
5485 | u64 bytes; | ||
5486 | u32 *csums; | ||
5487 | void *private; | ||
5488 | }; | ||
5489 | |||
5490 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
5491 | { | ||
5492 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
5493 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5494 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5495 | struct inode *inode = dip->inode; | ||
5496 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5497 | u64 start; | ||
5498 | u32 *private = dip->csums; | ||
5499 | |||
5500 | start = dip->logical_offset; | ||
5501 | do { | ||
5502 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | ||
5503 | struct page *page = bvec->bv_page; | ||
5504 | char *kaddr; | ||
5505 | u32 csum = ~(u32)0; | ||
5506 | unsigned long flags; | ||
5507 | |||
5508 | local_irq_save(flags); | ||
5509 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
5510 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | ||
5511 | csum, bvec->bv_len); | ||
5512 | btrfs_csum_final(csum, (char *)&csum); | ||
5513 | kunmap_atomic(kaddr, KM_IRQ0); | ||
5514 | local_irq_restore(flags); | ||
5515 | |||
5516 | flush_dcache_page(bvec->bv_page); | ||
5517 | if (csum != *private) { | ||
5518 | printk(KERN_ERR "btrfs csum failed ino %lu off" | ||
5519 | " %llu csum %u private %u\n", | ||
5520 | inode->i_ino, (unsigned long long)start, | ||
5521 | csum, *private); | ||
5522 | err = -EIO; | ||
5523 | } | ||
5524 | } | ||
5525 | |||
5526 | start += bvec->bv_len; | ||
5527 | private++; | ||
5528 | bvec++; | ||
5529 | } while (bvec <= bvec_end); | ||
5530 | |||
5531 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | ||
5532 | dip->logical_offset + dip->bytes - 1, GFP_NOFS); | ||
5533 | bio->bi_private = dip->private; | ||
5534 | |||
5535 | kfree(dip->csums); | ||
5536 | kfree(dip); | ||
5537 | dio_end_io(bio, err); | ||
5538 | } | ||
5539 | |||
5540 | static void btrfs_endio_direct_write(struct bio *bio, int err) | ||
5541 | { | ||
5542 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5543 | struct inode *inode = dip->inode; | ||
5544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5545 | struct btrfs_trans_handle *trans; | ||
5546 | struct btrfs_ordered_extent *ordered = NULL; | ||
5547 | struct extent_state *cached_state = NULL; | ||
5548 | int ret; | ||
5549 | |||
5550 | if (err) | ||
5551 | goto out_done; | ||
5552 | |||
5553 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | ||
5554 | dip->logical_offset, dip->bytes); | ||
5555 | if (!ret) | ||
5556 | goto out_done; | ||
5557 | |||
5558 | BUG_ON(!ordered); | ||
5559 | |||
5560 | trans = btrfs_join_transaction(root, 1); | ||
5561 | if (!trans) { | ||
5562 | err = -ENOMEM; | ||
5563 | goto out; | ||
5564 | } | ||
5565 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5566 | |||
5567 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
5568 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5569 | if (!ret) | ||
5570 | ret = btrfs_update_inode(trans, root, inode); | ||
5571 | err = ret; | ||
5572 | goto out; | ||
5573 | } | ||
5574 | |||
5575 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5576 | ordered->file_offset + ordered->len - 1, 0, | ||
5577 | &cached_state, GFP_NOFS); | ||
5578 | |||
5579 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
5580 | ret = btrfs_mark_extent_written(trans, inode, | ||
5581 | ordered->file_offset, | ||
5582 | ordered->file_offset + | ||
5583 | ordered->len); | ||
5584 | if (ret) { | ||
5585 | err = ret; | ||
5586 | goto out_unlock; | ||
5587 | } | ||
5588 | } else { | ||
5589 | ret = insert_reserved_file_extent(trans, inode, | ||
5590 | ordered->file_offset, | ||
5591 | ordered->start, | ||
5592 | ordered->disk_len, | ||
5593 | ordered->len, | ||
5594 | ordered->len, | ||
5595 | 0, 0, 0, | ||
5596 | BTRFS_FILE_EXTENT_REG); | ||
5597 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
5598 | ordered->file_offset, ordered->len); | ||
5599 | if (ret) { | ||
5600 | err = ret; | ||
5601 | WARN_ON(1); | ||
5602 | goto out_unlock; | ||
5603 | } | ||
5604 | } | ||
5605 | |||
5606 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
5607 | btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5608 | btrfs_update_inode(trans, root, inode); | ||
5609 | out_unlock: | ||
5610 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5611 | ordered->file_offset + ordered->len - 1, | ||
5612 | &cached_state, GFP_NOFS); | ||
5613 | out: | ||
5614 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
5615 | btrfs_end_transaction(trans, root); | ||
5616 | btrfs_put_ordered_extent(ordered); | ||
5617 | btrfs_put_ordered_extent(ordered); | ||
5618 | out_done: | ||
5619 | bio->bi_private = dip->private; | ||
5620 | |||
5621 | kfree(dip->csums); | ||
5622 | kfree(dip); | ||
5623 | dio_end_io(bio, err); | ||
5624 | } | ||
5625 | |||
5626 | static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | ||
5627 | struct bio *bio, int mirror_num, | ||
5628 | unsigned long bio_flags, u64 offset) | ||
5629 | { | ||
5630 | int ret; | ||
5631 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5632 | ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); | ||
5633 | BUG_ON(ret); | ||
5634 | return 0; | ||
5635 | } | ||
5636 | |||
5637 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | ||
5638 | loff_t file_offset) | ||
5639 | { | ||
5640 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5641 | struct btrfs_dio_private *dip; | ||
5642 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5643 | u64 start; | ||
5644 | int skip_sum; | ||
5645 | int write = rw & (1 << BIO_RW); | ||
5646 | int ret = 0; | ||
5647 | |||
5648 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
5649 | |||
5650 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | ||
5651 | if (!dip) { | ||
5652 | ret = -ENOMEM; | ||
5653 | goto free_ordered; | ||
5654 | } | ||
5655 | dip->csums = NULL; | ||
5656 | |||
5657 | if (!skip_sum) { | ||
5658 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
5659 | if (!dip->csums) { | ||
5660 | ret = -ENOMEM; | ||
5661 | goto free_ordered; | ||
5662 | } | ||
5663 | } | ||
5664 | |||
5665 | dip->private = bio->bi_private; | ||
5666 | dip->inode = inode; | ||
5667 | dip->logical_offset = file_offset; | ||
5668 | |||
5669 | start = dip->logical_offset; | ||
5670 | dip->bytes = 0; | ||
5671 | do { | ||
5672 | dip->bytes += bvec->bv_len; | ||
5673 | bvec++; | ||
5674 | } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); | ||
5675 | |||
5676 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | ||
5677 | bio->bi_private = dip; | ||
5678 | |||
5679 | if (write) | ||
5680 | bio->bi_end_io = btrfs_endio_direct_write; | ||
5681 | else | ||
5682 | bio->bi_end_io = btrfs_endio_direct_read; | ||
5683 | |||
5684 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5685 | if (ret) | ||
5686 | goto out_err; | ||
5687 | |||
5688 | if (write && !skip_sum) { | ||
5689 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
5690 | inode, rw, bio, 0, 0, | ||
5691 | dip->logical_offset, | ||
5692 | __btrfs_submit_bio_start_direct_io, | ||
5693 | __btrfs_submit_bio_done); | ||
5694 | if (ret) | ||
5695 | goto out_err; | ||
5696 | return; | ||
5697 | } else if (!skip_sum) | ||
5698 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5699 | dip->logical_offset, dip->csums); | ||
5700 | |||
5701 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5702 | if (ret) | ||
5703 | goto out_err; | ||
5704 | return; | ||
5705 | out_err: | ||
5706 | kfree(dip->csums); | ||
5707 | kfree(dip); | ||
5708 | free_ordered: | ||
5709 | /* | ||
5710 | * If this is a write, we need to clean up the reserved space and kill | ||
5711 | * the ordered extent. | ||
5712 | */ | ||
5713 | if (write) { | ||
5714 | struct btrfs_ordered_extent *ordered; | ||
5715 | ordered = btrfs_lookup_ordered_extent(inode, | ||
5716 | dip->logical_offset); | ||
5717 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | ||
5718 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | ||
5719 | btrfs_free_reserved_extent(root, ordered->start, | ||
5720 | ordered->disk_len); | ||
5721 | btrfs_put_ordered_extent(ordered); | ||
5722 | btrfs_put_ordered_extent(ordered); | ||
5723 | } | ||
5724 | bio_endio(bio, ret); | ||
5725 | } | ||
5726 | |||
5727 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | ||
5728 | const struct iovec *iov, loff_t offset, | ||
5729 | unsigned long nr_segs) | ||
5730 | { | ||
5731 | int seg; | ||
5732 | size_t size; | ||
5733 | unsigned long addr; | ||
5734 | unsigned blocksize_mask = root->sectorsize - 1; | ||
5735 | ssize_t retval = -EINVAL; | ||
5736 | loff_t end = offset; | ||
5737 | |||
5738 | if (offset & blocksize_mask) | ||
5739 | goto out; | ||
5740 | |||
5741 | /* Check the memory alignment. Blocks cannot straddle pages */ | ||
5742 | for (seg = 0; seg < nr_segs; seg++) { | ||
5743 | addr = (unsigned long)iov[seg].iov_base; | ||
5744 | size = iov[seg].iov_len; | ||
5745 | end += size; | ||
5746 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
5747 | goto out; | ||
5748 | } | ||
5749 | retval = 0; | ||
5750 | out: | ||
5751 | return retval; | ||
5752 | } | ||
4869 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 5753 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
4870 | const struct iovec *iov, loff_t offset, | 5754 | const struct iovec *iov, loff_t offset, |
4871 | unsigned long nr_segs) | 5755 | unsigned long nr_segs) |
4872 | { | 5756 | { |
4873 | return -EINVAL; | 5757 | struct file *file = iocb->ki_filp; |
5758 | struct inode *inode = file->f_mapping->host; | ||
5759 | struct btrfs_ordered_extent *ordered; | ||
5760 | struct extent_state *cached_state = NULL; | ||
5761 | u64 lockstart, lockend; | ||
5762 | ssize_t ret; | ||
5763 | int writing = rw & WRITE; | ||
5764 | int write_bits = 0; | ||
5765 | size_t count = iov_length(iov, nr_segs); | ||
5766 | |||
5767 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | ||
5768 | offset, nr_segs)) { | ||
5769 | return 0; | ||
5770 | } | ||
5771 | |||
5772 | lockstart = offset; | ||
5773 | lockend = offset + count - 1; | ||
5774 | |||
5775 | if (writing) { | ||
5776 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
5777 | if (ret) | ||
5778 | goto out; | ||
5779 | } | ||
5780 | |||
5781 | while (1) { | ||
5782 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5783 | 0, &cached_state, GFP_NOFS); | ||
5784 | /* | ||
5785 | * We're concerned with the entire range that we're going to be | ||
5786 | * doing DIO to, so we need to make sure theres no ordered | ||
5787 | * extents in this range. | ||
5788 | */ | ||
5789 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
5790 | lockend - lockstart + 1); | ||
5791 | if (!ordered) | ||
5792 | break; | ||
5793 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5794 | &cached_state, GFP_NOFS); | ||
5795 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
5796 | btrfs_put_ordered_extent(ordered); | ||
5797 | cond_resched(); | ||
5798 | } | ||
5799 | |||
5800 | /* | ||
5801 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
5802 | * the dirty or uptodate bits | ||
5803 | */ | ||
5804 | if (writing) { | ||
5805 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
5806 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5807 | EXTENT_DELALLOC, 0, NULL, &cached_state, | ||
5808 | GFP_NOFS); | ||
5809 | if (ret) { | ||
5810 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
5811 | lockend, EXTENT_LOCKED | write_bits, | ||
5812 | 1, 0, &cached_state, GFP_NOFS); | ||
5813 | goto out; | ||
5814 | } | ||
5815 | } | ||
5816 | |||
5817 | free_extent_state(cached_state); | ||
5818 | cached_state = NULL; | ||
5819 | |||
5820 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
5821 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | ||
5822 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | ||
5823 | btrfs_submit_direct, 0); | ||
5824 | |||
5825 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
5826 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
5827 | offset + iov_length(iov, nr_segs) - 1, | ||
5828 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5829 | &cached_state, GFP_NOFS); | ||
5830 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
5831 | /* | ||
5832 | * We're falling back to buffered, unlock the section we didn't | ||
5833 | * do IO on. | ||
5834 | */ | ||
5835 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
5836 | offset + iov_length(iov, nr_segs) - 1, | ||
5837 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5838 | &cached_state, GFP_NOFS); | ||
5839 | } | ||
5840 | out: | ||
5841 | free_extent_state(cached_state); | ||
5842 | return ret; | ||
4874 | } | 5843 | } |
4875 | 5844 | ||
4876 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 5845 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
@@ -5034,7 +6003,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5034 | u64 page_start; | 6003 | u64 page_start; |
5035 | u64 page_end; | 6004 | u64 page_end; |
5036 | 6005 | ||
5037 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 6006 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
5038 | if (ret) { | 6007 | if (ret) { |
5039 | if (ret == -ENOMEM) | 6008 | if (ret == -ENOMEM) |
5040 | ret = VM_FAULT_OOM; | 6009 | ret = VM_FAULT_OOM; |
@@ -5043,13 +6012,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5043 | goto out; | 6012 | goto out; |
5044 | } | 6013 | } |
5045 | 6014 | ||
5046 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
5047 | if (ret) { | ||
5048 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5049 | ret = VM_FAULT_SIGBUS; | ||
5050 | goto out; | ||
5051 | } | ||
5052 | |||
5053 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 6015 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
5054 | again: | 6016 | again: |
5055 | lock_page(page); | 6017 | lock_page(page); |
@@ -5059,7 +6021,6 @@ again: | |||
5059 | 6021 | ||
5060 | if ((page->mapping != inode->i_mapping) || | 6022 | if ((page->mapping != inode->i_mapping) || |
5061 | (page_start >= size)) { | 6023 | (page_start >= size)) { |
5062 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5063 | /* page got truncated out from underneath us */ | 6024 | /* page got truncated out from underneath us */ |
5064 | goto out_unlock; | 6025 | goto out_unlock; |
5065 | } | 6026 | } |
@@ -5100,7 +6061,6 @@ again: | |||
5100 | unlock_extent_cached(io_tree, page_start, page_end, | 6061 | unlock_extent_cached(io_tree, page_start, page_end, |
5101 | &cached_state, GFP_NOFS); | 6062 | &cached_state, GFP_NOFS); |
5102 | ret = VM_FAULT_SIGBUS; | 6063 | ret = VM_FAULT_SIGBUS; |
5103 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5104 | goto out_unlock; | 6064 | goto out_unlock; |
5105 | } | 6065 | } |
5106 | ret = 0; | 6066 | ret = 0; |
@@ -5127,10 +6087,10 @@ again: | |||
5127 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6087 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
5128 | 6088 | ||
5129 | out_unlock: | 6089 | out_unlock: |
5130 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
5131 | if (!ret) | 6090 | if (!ret) |
5132 | return VM_FAULT_LOCKED; | 6091 | return VM_FAULT_LOCKED; |
5133 | unlock_page(page); | 6092 | unlock_page(page); |
6093 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
5134 | out: | 6094 | out: |
5135 | return ret; | 6095 | return ret; |
5136 | } | 6096 | } |
@@ -5155,8 +6115,10 @@ static void btrfs_truncate(struct inode *inode) | |||
5155 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6115 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
5156 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6116 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
5157 | 6117 | ||
5158 | trans = btrfs_start_transaction(root, 1); | 6118 | trans = btrfs_start_transaction(root, 0); |
6119 | BUG_ON(IS_ERR(trans)); | ||
5159 | btrfs_set_trans_block_group(trans, inode); | 6120 | btrfs_set_trans_block_group(trans, inode); |
6121 | trans->block_rsv = root->orphan_block_rsv; | ||
5160 | 6122 | ||
5161 | /* | 6123 | /* |
5162 | * setattr is responsible for setting the ordered_data_close flag, | 6124 | * setattr is responsible for setting the ordered_data_close flag, |
@@ -5179,6 +6141,23 @@ static void btrfs_truncate(struct inode *inode) | |||
5179 | btrfs_add_ordered_operation(trans, root, inode); | 6141 | btrfs_add_ordered_operation(trans, root, inode); |
5180 | 6142 | ||
5181 | while (1) { | 6143 | while (1) { |
6144 | if (!trans) { | ||
6145 | trans = btrfs_start_transaction(root, 0); | ||
6146 | BUG_ON(IS_ERR(trans)); | ||
6147 | btrfs_set_trans_block_group(trans, inode); | ||
6148 | trans->block_rsv = root->orphan_block_rsv; | ||
6149 | } | ||
6150 | |||
6151 | ret = btrfs_block_rsv_check(trans, root, | ||
6152 | root->orphan_block_rsv, 0, 5); | ||
6153 | if (ret) { | ||
6154 | BUG_ON(ret != -EAGAIN); | ||
6155 | ret = btrfs_commit_transaction(trans, root); | ||
6156 | BUG_ON(ret); | ||
6157 | trans = NULL; | ||
6158 | continue; | ||
6159 | } | ||
6160 | |||
5182 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6161 | ret = btrfs_truncate_inode_items(trans, root, inode, |
5183 | inode->i_size, | 6162 | inode->i_size, |
5184 | BTRFS_EXTENT_DATA_KEY); | 6163 | BTRFS_EXTENT_DATA_KEY); |
@@ -5190,10 +6169,8 @@ static void btrfs_truncate(struct inode *inode) | |||
5190 | 6169 | ||
5191 | nr = trans->blocks_used; | 6170 | nr = trans->blocks_used; |
5192 | btrfs_end_transaction(trans, root); | 6171 | btrfs_end_transaction(trans, root); |
6172 | trans = NULL; | ||
5193 | btrfs_btree_balance_dirty(root, nr); | 6173 | btrfs_btree_balance_dirty(root, nr); |
5194 | |||
5195 | trans = btrfs_start_transaction(root, 1); | ||
5196 | btrfs_set_trans_block_group(trans, inode); | ||
5197 | } | 6174 | } |
5198 | 6175 | ||
5199 | if (ret == 0 && inode->i_nlink > 0) { | 6176 | if (ret == 0 && inode->i_nlink > 0) { |
@@ -5254,21 +6231,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
5254 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6231 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
5255 | { | 6232 | { |
5256 | struct btrfs_inode *ei; | 6233 | struct btrfs_inode *ei; |
6234 | struct inode *inode; | ||
5257 | 6235 | ||
5258 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); | 6236 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); |
5259 | if (!ei) | 6237 | if (!ei) |
5260 | return NULL; | 6238 | return NULL; |
6239 | |||
6240 | ei->root = NULL; | ||
6241 | ei->space_info = NULL; | ||
6242 | ei->generation = 0; | ||
6243 | ei->sequence = 0; | ||
5261 | ei->last_trans = 0; | 6244 | ei->last_trans = 0; |
5262 | ei->last_sub_trans = 0; | 6245 | ei->last_sub_trans = 0; |
5263 | ei->logged_trans = 0; | 6246 | ei->logged_trans = 0; |
5264 | ei->outstanding_extents = 0; | 6247 | ei->delalloc_bytes = 0; |
5265 | ei->reserved_extents = 0; | 6248 | ei->reserved_bytes = 0; |
5266 | ei->root = NULL; | 6249 | ei->disk_i_size = 0; |
6250 | ei->flags = 0; | ||
6251 | ei->index_cnt = (u64)-1; | ||
6252 | ei->last_unlink_trans = 0; | ||
6253 | |||
5267 | spin_lock_init(&ei->accounting_lock); | 6254 | spin_lock_init(&ei->accounting_lock); |
6255 | atomic_set(&ei->outstanding_extents, 0); | ||
6256 | ei->reserved_extents = 0; | ||
6257 | |||
6258 | ei->ordered_data_close = 0; | ||
6259 | ei->orphan_meta_reserved = 0; | ||
6260 | ei->dummy_inode = 0; | ||
6261 | ei->force_compress = 0; | ||
6262 | |||
6263 | inode = &ei->vfs_inode; | ||
6264 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | ||
6265 | extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); | ||
6266 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); | ||
6267 | mutex_init(&ei->log_mutex); | ||
5268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
5269 | INIT_LIST_HEAD(&ei->i_orphan); | 6269 | INIT_LIST_HEAD(&ei->i_orphan); |
6270 | INIT_LIST_HEAD(&ei->delalloc_inodes); | ||
5270 | INIT_LIST_HEAD(&ei->ordered_operations); | 6271 | INIT_LIST_HEAD(&ei->ordered_operations); |
5271 | return &ei->vfs_inode; | 6272 | RB_CLEAR_NODE(&ei->rb_node); |
6273 | |||
6274 | return inode; | ||
5272 | } | 6275 | } |
5273 | 6276 | ||
5274 | void btrfs_destroy_inode(struct inode *inode) | 6277 | void btrfs_destroy_inode(struct inode *inode) |
@@ -5278,6 +6281,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5278 | 6281 | ||
5279 | WARN_ON(!list_empty(&inode->i_dentry)); | 6282 | WARN_ON(!list_empty(&inode->i_dentry)); |
5280 | WARN_ON(inode->i_data.nrpages); | 6283 | WARN_ON(inode->i_data.nrpages); |
6284 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | ||
6285 | WARN_ON(BTRFS_I(inode)->reserved_extents); | ||
5281 | 6286 | ||
5282 | /* | 6287 | /* |
5283 | * This can happen where we create an inode, but somebody else also | 6288 | * This can happen where we create an inode, but somebody else also |
@@ -5298,13 +6303,13 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5298 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6303 | spin_unlock(&root->fs_info->ordered_extent_lock); |
5299 | } | 6304 | } |
5300 | 6305 | ||
5301 | spin_lock(&root->list_lock); | 6306 | spin_lock(&root->orphan_lock); |
5302 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6307 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
5303 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", | 6308 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", |
5304 | inode->i_ino); | 6309 | inode->i_ino); |
5305 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6310 | list_del_init(&BTRFS_I(inode)->i_orphan); |
5306 | } | 6311 | } |
5307 | spin_unlock(&root->list_lock); | 6312 | spin_unlock(&root->orphan_lock); |
5308 | 6313 | ||
5309 | while (1) { | 6314 | while (1) { |
5310 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6315 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -5425,19 +6430,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5425 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 6430 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
5426 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 6431 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
5427 | return -ENOTEMPTY; | 6432 | return -ENOTEMPTY; |
5428 | |||
5429 | /* | ||
5430 | * We want to reserve the absolute worst case amount of items. So if | ||
5431 | * both inodes are subvols and we need to unlink them then that would | ||
5432 | * require 4 item modifications, but if they are both normal inodes it | ||
5433 | * would require 5 item modifications, so we'll assume their normal | ||
5434 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
5435 | * should cover the worst case number of items we'll modify. | ||
5436 | */ | ||
5437 | ret = btrfs_reserve_metadata_space(root, 11); | ||
5438 | if (ret) | ||
5439 | return ret; | ||
5440 | |||
5441 | /* | 6433 | /* |
5442 | * we're using rename to replace one file with another. | 6434 | * we're using rename to replace one file with another. |
5443 | * and the replacement file is large. Start IO on it now so | 6435 | * and the replacement file is large. Start IO on it now so |
@@ -5450,8 +6442,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5450 | /* close the racy window with snapshot create/destroy ioctl */ | 6442 | /* close the racy window with snapshot create/destroy ioctl */ |
5451 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6443 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5452 | down_read(&root->fs_info->subvol_sem); | 6444 | down_read(&root->fs_info->subvol_sem); |
6445 | /* | ||
6446 | * We want to reserve the absolute worst case amount of items. So if | ||
6447 | * both inodes are subvols and we need to unlink them then that would | ||
6448 | * require 4 item modifications, but if they are both normal inodes it | ||
6449 | * would require 5 item modifications, so we'll assume their normal | ||
6450 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
6451 | * should cover the worst case number of items we'll modify. | ||
6452 | */ | ||
6453 | trans = btrfs_start_transaction(root, 20); | ||
6454 | if (IS_ERR(trans)) | ||
6455 | return PTR_ERR(trans); | ||
5453 | 6456 | ||
5454 | trans = btrfs_start_transaction(root, 1); | ||
5455 | btrfs_set_trans_block_group(trans, new_dir); | 6457 | btrfs_set_trans_block_group(trans, new_dir); |
5456 | 6458 | ||
5457 | if (dest != root) | 6459 | if (dest != root) |
@@ -5550,7 +6552,6 @@ out_fail: | |||
5550 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6552 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5551 | up_read(&root->fs_info->subvol_sem); | 6553 | up_read(&root->fs_info->subvol_sem); |
5552 | 6554 | ||
5553 | btrfs_unreserve_metadata_space(root, 11); | ||
5554 | return ret; | 6555 | return ret; |
5555 | } | 6556 | } |
5556 | 6557 | ||
@@ -5602,6 +6603,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
5602 | return 0; | 6603 | return 0; |
5603 | } | 6604 | } |
5604 | 6605 | ||
6606 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
6607 | { | ||
6608 | struct btrfs_inode *binode; | ||
6609 | struct inode *inode = NULL; | ||
6610 | |||
6611 | spin_lock(&root->fs_info->delalloc_lock); | ||
6612 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
6613 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
6614 | struct btrfs_inode, delalloc_inodes); | ||
6615 | inode = igrab(&binode->vfs_inode); | ||
6616 | if (inode) { | ||
6617 | list_move_tail(&binode->delalloc_inodes, | ||
6618 | &root->fs_info->delalloc_inodes); | ||
6619 | break; | ||
6620 | } | ||
6621 | |||
6622 | list_del_init(&binode->delalloc_inodes); | ||
6623 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
6624 | } | ||
6625 | spin_unlock(&root->fs_info->delalloc_lock); | ||
6626 | |||
6627 | if (inode) { | ||
6628 | write_inode_now(inode, 0); | ||
6629 | if (delay_iput) | ||
6630 | btrfs_add_delayed_iput(inode); | ||
6631 | else | ||
6632 | iput(inode); | ||
6633 | return 1; | ||
6634 | } | ||
6635 | return 0; | ||
6636 | } | ||
6637 | |||
5605 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 6638 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
5606 | const char *symname) | 6639 | const char *symname) |
5607 | { | 6640 | { |
@@ -5625,26 +6658,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5625 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 6658 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
5626 | return -ENAMETOOLONG; | 6659 | return -ENAMETOOLONG; |
5627 | 6660 | ||
6661 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
6662 | if (err) | ||
6663 | return err; | ||
5628 | /* | 6664 | /* |
5629 | * 2 items for inode item and ref | 6665 | * 2 items for inode item and ref |
5630 | * 2 items for dir items | 6666 | * 2 items for dir items |
5631 | * 1 item for xattr if selinux is on | 6667 | * 1 item for xattr if selinux is on |
5632 | */ | 6668 | */ |
5633 | err = btrfs_reserve_metadata_space(root, 5); | 6669 | trans = btrfs_start_transaction(root, 5); |
5634 | if (err) | 6670 | if (IS_ERR(trans)) |
5635 | return err; | 6671 | return PTR_ERR(trans); |
5636 | 6672 | ||
5637 | trans = btrfs_start_transaction(root, 1); | ||
5638 | if (!trans) | ||
5639 | goto out_fail; | ||
5640 | btrfs_set_trans_block_group(trans, dir); | 6673 | btrfs_set_trans_block_group(trans, dir); |
5641 | 6674 | ||
5642 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
5643 | if (err) { | ||
5644 | err = -ENOSPC; | ||
5645 | goto out_unlock; | ||
5646 | } | ||
5647 | |||
5648 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6675 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
5649 | dentry->d_name.len, | 6676 | dentry->d_name.len, |
5650 | dentry->d_parent->d_inode->i_ino, objectid, | 6677 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -5716,8 +6743,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5716 | out_unlock: | 6743 | out_unlock: |
5717 | nr = trans->blocks_used; | 6744 | nr = trans->blocks_used; |
5718 | btrfs_end_transaction_throttle(trans, root); | 6745 | btrfs_end_transaction_throttle(trans, root); |
5719 | out_fail: | ||
5720 | btrfs_unreserve_metadata_space(root, 5); | ||
5721 | if (drop_inode) { | 6746 | if (drop_inode) { |
5722 | inode_dec_link_count(inode); | 6747 | inode_dec_link_count(inode); |
5723 | iput(inode); | 6748 | iput(inode); |
@@ -5726,33 +6751,28 @@ out_fail: | |||
5726 | return err; | 6751 | return err; |
5727 | } | 6752 | } |
5728 | 6753 | ||
5729 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 6754 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
5730 | u64 alloc_hint, int mode, loff_t actual_len) | 6755 | u64 start, u64 num_bytes, u64 min_size, |
6756 | loff_t actual_len, u64 *alloc_hint) | ||
5731 | { | 6757 | { |
5732 | struct btrfs_trans_handle *trans; | 6758 | struct btrfs_trans_handle *trans; |
5733 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6759 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5734 | struct btrfs_key ins; | 6760 | struct btrfs_key ins; |
5735 | u64 cur_offset = start; | 6761 | u64 cur_offset = start; |
5736 | u64 num_bytes = end - start; | ||
5737 | int ret = 0; | 6762 | int ret = 0; |
5738 | u64 i_size; | ||
5739 | 6763 | ||
5740 | while (num_bytes > 0) { | 6764 | while (num_bytes > 0) { |
5741 | trans = btrfs_start_transaction(root, 1); | 6765 | trans = btrfs_start_transaction(root, 3); |
5742 | 6766 | if (IS_ERR(trans)) { | |
5743 | ret = btrfs_reserve_extent(trans, root, num_bytes, | 6767 | ret = PTR_ERR(trans); |
5744 | root->sectorsize, 0, alloc_hint, | 6768 | break; |
5745 | (u64)-1, &ins, 1); | ||
5746 | if (ret) { | ||
5747 | WARN_ON(1); | ||
5748 | goto stop_trans; | ||
5749 | } | 6769 | } |
5750 | 6770 | ||
5751 | ret = btrfs_reserve_metadata_space(root, 3); | 6771 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, |
6772 | 0, *alloc_hint, (u64)-1, &ins, 1); | ||
5752 | if (ret) { | 6773 | if (ret) { |
5753 | btrfs_free_reserved_extent(root, ins.objectid, | 6774 | btrfs_end_transaction(trans, root); |
5754 | ins.offset); | 6775 | break; |
5755 | goto stop_trans; | ||
5756 | } | 6776 | } |
5757 | 6777 | ||
5758 | ret = insert_reserved_file_extent(trans, inode, | 6778 | ret = insert_reserved_file_extent(trans, inode, |
@@ -5766,34 +6786,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
5766 | 6786 | ||
5767 | num_bytes -= ins.offset; | 6787 | num_bytes -= ins.offset; |
5768 | cur_offset += ins.offset; | 6788 | cur_offset += ins.offset; |
5769 | alloc_hint = ins.objectid + ins.offset; | 6789 | *alloc_hint = ins.objectid + ins.offset; |
5770 | 6790 | ||
5771 | inode->i_ctime = CURRENT_TIME; | 6791 | inode->i_ctime = CURRENT_TIME; |
5772 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 6792 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
5773 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 6793 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
5774 | (actual_len > inode->i_size) && | 6794 | (actual_len > inode->i_size) && |
5775 | (cur_offset > inode->i_size)) { | 6795 | (cur_offset > inode->i_size)) { |
5776 | |||
5777 | if (cur_offset > actual_len) | 6796 | if (cur_offset > actual_len) |
5778 | i_size = actual_len; | 6797 | i_size_write(inode, actual_len); |
5779 | else | 6798 | else |
5780 | i_size = cur_offset; | 6799 | i_size_write(inode, cur_offset); |
5781 | i_size_write(inode, i_size); | 6800 | i_size_write(inode, cur_offset); |
5782 | btrfs_ordered_update_i_size(inode, i_size, NULL); | 6801 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); |
5783 | } | 6802 | } |
5784 | 6803 | ||
5785 | ret = btrfs_update_inode(trans, root, inode); | 6804 | ret = btrfs_update_inode(trans, root, inode); |
5786 | BUG_ON(ret); | 6805 | BUG_ON(ret); |
5787 | 6806 | ||
5788 | btrfs_end_transaction(trans, root); | 6807 | btrfs_end_transaction(trans, root); |
5789 | btrfs_unreserve_metadata_space(root, 3); | ||
5790 | } | 6808 | } |
5791 | return ret; | 6809 | return ret; |
5792 | |||
5793 | stop_trans: | ||
5794 | btrfs_end_transaction(trans, root); | ||
5795 | return ret; | ||
5796 | |||
5797 | } | 6810 | } |
5798 | 6811 | ||
5799 | static long btrfs_fallocate(struct inode *inode, int mode, | 6812 | static long btrfs_fallocate(struct inode *inode, int mode, |
@@ -5826,8 +6839,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5826 | goto out; | 6839 | goto out; |
5827 | } | 6840 | } |
5828 | 6841 | ||
5829 | ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, | 6842 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
5830 | alloc_end - alloc_start); | ||
5831 | if (ret) | 6843 | if (ret) |
5832 | goto out; | 6844 | goto out; |
5833 | 6845 | ||
@@ -5872,16 +6884,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5872 | if (em->block_start == EXTENT_MAP_HOLE || | 6884 | if (em->block_start == EXTENT_MAP_HOLE || |
5873 | (cur_offset >= inode->i_size && | 6885 | (cur_offset >= inode->i_size && |
5874 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 6886 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
5875 | ret = prealloc_file_range(inode, | 6887 | ret = btrfs_prealloc_file_range(inode, 0, cur_offset, |
5876 | cur_offset, last_byte, | 6888 | last_byte - cur_offset, |
5877 | alloc_hint, mode, offset+len); | 6889 | 1 << inode->i_blkbits, |
6890 | offset + len, | ||
6891 | &alloc_hint); | ||
5878 | if (ret < 0) { | 6892 | if (ret < 0) { |
5879 | free_extent_map(em); | 6893 | free_extent_map(em); |
5880 | break; | 6894 | break; |
5881 | } | 6895 | } |
5882 | } | 6896 | } |
5883 | if (em->block_start <= EXTENT_MAP_LAST_BYTE) | ||
5884 | alloc_hint = em->block_start; | ||
5885 | free_extent_map(em); | 6897 | free_extent_map(em); |
5886 | 6898 | ||
5887 | cur_offset = last_byte; | 6899 | cur_offset = last_byte; |
@@ -5893,8 +6905,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5893 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 6905 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5894 | &cached_state, GFP_NOFS); | 6906 | &cached_state, GFP_NOFS); |
5895 | 6907 | ||
5896 | btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, | 6908 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
5897 | alloc_end - alloc_start); | ||
5898 | out: | 6909 | out: |
5899 | mutex_unlock(&inode->i_mutex); | 6910 | mutex_unlock(&inode->i_mutex); |
5900 | return ret; | 6911 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 97a97839a867..4cdb98cf26de 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
240 | u64 index = 0; | 240 | u64 index = 0; |
241 | 241 | ||
242 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | ||
243 | 0, &objectid); | ||
244 | if (ret) | ||
245 | return ret; | ||
242 | /* | 246 | /* |
243 | * 1 - inode item | 247 | * 1 - inode item |
244 | * 2 - refs | 248 | * 2 - refs |
245 | * 1 - root item | 249 | * 1 - root item |
246 | * 2 - dir items | 250 | * 2 - dir items |
247 | */ | 251 | */ |
248 | ret = btrfs_reserve_metadata_space(root, 6); | 252 | trans = btrfs_start_transaction(root, 6); |
249 | if (ret) | 253 | if (IS_ERR(trans)) |
250 | return ret; | 254 | return PTR_ERR(trans); |
251 | |||
252 | trans = btrfs_start_transaction(root, 1); | ||
253 | BUG_ON(!trans); | ||
254 | |||
255 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
256 | 0, &objectid); | ||
257 | if (ret) | ||
258 | goto fail; | ||
259 | 255 | ||
260 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 256 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
261 | 0, objectid, NULL, 0, 0, 0); | 257 | 0, objectid, NULL, 0, 0, 0); |
@@ -345,13 +341,10 @@ fail: | |||
345 | err = btrfs_commit_transaction(trans, root); | 341 | err = btrfs_commit_transaction(trans, root); |
346 | if (err && !ret) | 342 | if (err && !ret) |
347 | ret = err; | 343 | ret = err; |
348 | |||
349 | btrfs_unreserve_metadata_space(root, 6); | ||
350 | return ret; | 344 | return ret; |
351 | } | 345 | } |
352 | 346 | ||
353 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 347 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) |
354 | char *name, int namelen) | ||
355 | { | 348 | { |
356 | struct inode *inode; | 349 | struct inode *inode; |
357 | struct btrfs_pending_snapshot *pending_snapshot; | 350 | struct btrfs_pending_snapshot *pending_snapshot; |
@@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
361 | if (!root->ref_cows) | 354 | if (!root->ref_cows) |
362 | return -EINVAL; | 355 | return -EINVAL; |
363 | 356 | ||
364 | /* | ||
365 | * 1 - inode item | ||
366 | * 2 - refs | ||
367 | * 1 - root item | ||
368 | * 2 - dir items | ||
369 | */ | ||
370 | ret = btrfs_reserve_metadata_space(root, 6); | ||
371 | if (ret) | ||
372 | goto fail; | ||
373 | |||
374 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 357 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
375 | if (!pending_snapshot) { | 358 | if (!pending_snapshot) |
376 | ret = -ENOMEM; | 359 | return -ENOMEM; |
377 | btrfs_unreserve_metadata_space(root, 6); | 360 | |
378 | goto fail; | 361 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
379 | } | ||
380 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | ||
381 | if (!pending_snapshot->name) { | ||
382 | ret = -ENOMEM; | ||
383 | kfree(pending_snapshot); | ||
384 | btrfs_unreserve_metadata_space(root, 6); | ||
385 | goto fail; | ||
386 | } | ||
387 | memcpy(pending_snapshot->name, name, namelen); | ||
388 | pending_snapshot->name[namelen] = '\0'; | ||
389 | pending_snapshot->dentry = dentry; | 362 | pending_snapshot->dentry = dentry; |
390 | trans = btrfs_start_transaction(root, 1); | ||
391 | BUG_ON(!trans); | ||
392 | pending_snapshot->root = root; | 363 | pending_snapshot->root = root; |
364 | |||
365 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | ||
366 | if (IS_ERR(trans)) { | ||
367 | ret = PTR_ERR(trans); | ||
368 | goto fail; | ||
369 | } | ||
370 | |||
371 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | ||
372 | BUG_ON(ret); | ||
373 | |||
393 | list_add(&pending_snapshot->list, | 374 | list_add(&pending_snapshot->list, |
394 | &trans->transaction->pending_snapshots); | 375 | &trans->transaction->pending_snapshots); |
395 | ret = btrfs_commit_transaction(trans, root); | 376 | ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); |
396 | BUG_ON(ret); | 377 | BUG_ON(ret); |
397 | btrfs_unreserve_metadata_space(root, 6); | 378 | |
379 | ret = pending_snapshot->error; | ||
380 | if (ret) | ||
381 | goto fail; | ||
382 | |||
383 | btrfs_orphan_cleanup(pending_snapshot->snap); | ||
398 | 384 | ||
399 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 385 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
400 | if (IS_ERR(inode)) { | 386 | if (IS_ERR(inode)) { |
@@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
405 | d_instantiate(dentry, inode); | 391 | d_instantiate(dentry, inode); |
406 | ret = 0; | 392 | ret = 0; |
407 | fail: | 393 | fail: |
394 | kfree(pending_snapshot); | ||
408 | return ret; | 395 | return ret; |
409 | } | 396 | } |
410 | 397 | ||
@@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
456 | goto out_up_read; | 443 | goto out_up_read; |
457 | 444 | ||
458 | if (snap_src) { | 445 | if (snap_src) { |
459 | error = create_snapshot(snap_src, dentry, | 446 | error = create_snapshot(snap_src, dentry); |
460 | name, namelen); | ||
461 | } else { | 447 | } else { |
462 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 448 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
463 | name, namelen); | 449 | name, namelen); |
@@ -601,19 +587,9 @@ static int btrfs_defrag_file(struct file *file, | |||
601 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 587 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
602 | BTRFS_I(inode)->force_compress = 1; | 588 | BTRFS_I(inode)->force_compress = 1; |
603 | 589 | ||
604 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 590 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
605 | if (ret) { | 591 | if (ret) |
606 | ret = -ENOSPC; | 592 | goto err_unlock; |
607 | break; | ||
608 | } | ||
609 | |||
610 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
611 | if (ret) { | ||
612 | btrfs_free_reserved_data_space(root, inode, | ||
613 | PAGE_CACHE_SIZE); | ||
614 | ret = -ENOSPC; | ||
615 | break; | ||
616 | } | ||
617 | again: | 593 | again: |
618 | if (inode->i_size == 0 || | 594 | if (inode->i_size == 0 || |
619 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | 595 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { |
@@ -622,8 +598,10 @@ again: | |||
622 | } | 598 | } |
623 | 599 | ||
624 | page = grab_cache_page(inode->i_mapping, i); | 600 | page = grab_cache_page(inode->i_mapping, i); |
625 | if (!page) | 601 | if (!page) { |
602 | ret = -ENOMEM; | ||
626 | goto err_reservations; | 603 | goto err_reservations; |
604 | } | ||
627 | 605 | ||
628 | if (!PageUptodate(page)) { | 606 | if (!PageUptodate(page)) { |
629 | btrfs_readpage(NULL, page); | 607 | btrfs_readpage(NULL, page); |
@@ -631,6 +609,7 @@ again: | |||
631 | if (!PageUptodate(page)) { | 609 | if (!PageUptodate(page)) { |
632 | unlock_page(page); | 610 | unlock_page(page); |
633 | page_cache_release(page); | 611 | page_cache_release(page); |
612 | ret = -EIO; | ||
634 | goto err_reservations; | 613 | goto err_reservations; |
635 | } | 614 | } |
636 | } | 615 | } |
@@ -644,8 +623,7 @@ again: | |||
644 | wait_on_page_writeback(page); | 623 | wait_on_page_writeback(page); |
645 | 624 | ||
646 | if (PageDirty(page)) { | 625 | if (PageDirty(page)) { |
647 | btrfs_free_reserved_data_space(root, inode, | 626 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
648 | PAGE_CACHE_SIZE); | ||
649 | goto loop_unlock; | 627 | goto loop_unlock; |
650 | } | 628 | } |
651 | 629 | ||
@@ -683,7 +661,6 @@ loop_unlock: | |||
683 | page_cache_release(page); | 661 | page_cache_release(page); |
684 | mutex_unlock(&inode->i_mutex); | 662 | mutex_unlock(&inode->i_mutex); |
685 | 663 | ||
686 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
687 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 664 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
688 | i++; | 665 | i++; |
689 | } | 666 | } |
@@ -713,9 +690,9 @@ loop_unlock: | |||
713 | return 0; | 690 | return 0; |
714 | 691 | ||
715 | err_reservations: | 692 | err_reservations: |
693 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
694 | err_unlock: | ||
716 | mutex_unlock(&inode->i_mutex); | 695 | mutex_unlock(&inode->i_mutex); |
717 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
718 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
719 | return ret; | 696 | return ret; |
720 | } | 697 | } |
721 | 698 | ||
@@ -811,7 +788,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
811 | device->name, (unsigned long long)new_size); | 788 | device->name, (unsigned long long)new_size); |
812 | 789 | ||
813 | if (new_size > old_size) { | 790 | if (new_size > old_size) { |
814 | trans = btrfs_start_transaction(root, 1); | 791 | trans = btrfs_start_transaction(root, 0); |
815 | ret = btrfs_grow_device(trans, device, new_size); | 792 | ret = btrfs_grow_device(trans, device, new_size); |
816 | btrfs_commit_transaction(trans, root); | 793 | btrfs_commit_transaction(trans, root); |
817 | } else { | 794 | } else { |
@@ -1300,7 +1277,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1300 | if (err) | 1277 | if (err) |
1301 | goto out_up_write; | 1278 | goto out_up_write; |
1302 | 1279 | ||
1303 | trans = btrfs_start_transaction(root, 1); | 1280 | trans = btrfs_start_transaction(root, 0); |
1281 | if (IS_ERR(trans)) { | ||
1282 | err = PTR_ERR(trans); | ||
1283 | goto out; | ||
1284 | } | ||
1285 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1286 | |||
1304 | ret = btrfs_unlink_subvol(trans, root, dir, | 1287 | ret = btrfs_unlink_subvol(trans, root, dir, |
1305 | dest->root_key.objectid, | 1288 | dest->root_key.objectid, |
1306 | dentry->d_name.name, | 1289 | dentry->d_name.name, |
@@ -1314,10 +1297,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1314 | dest->root_item.drop_level = 0; | 1297 | dest->root_item.drop_level = 0; |
1315 | btrfs_set_root_refs(&dest->root_item, 0); | 1298 | btrfs_set_root_refs(&dest->root_item, 0); |
1316 | 1299 | ||
1317 | ret = btrfs_insert_orphan_item(trans, | 1300 | if (!xchg(&dest->orphan_item_inserted, 1)) { |
1318 | root->fs_info->tree_root, | 1301 | ret = btrfs_insert_orphan_item(trans, |
1319 | dest->root_key.objectid); | 1302 | root->fs_info->tree_root, |
1320 | BUG_ON(ret); | 1303 | dest->root_key.objectid); |
1304 | BUG_ON(ret); | ||
1305 | } | ||
1321 | 1306 | ||
1322 | ret = btrfs_commit_transaction(trans, root); | 1307 | ret = btrfs_commit_transaction(trans, root); |
1323 | BUG_ON(ret); | 1308 | BUG_ON(ret); |
@@ -1358,8 +1343,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1358 | ret = -EPERM; | 1343 | ret = -EPERM; |
1359 | goto out; | 1344 | goto out; |
1360 | } | 1345 | } |
1361 | btrfs_defrag_root(root, 0); | 1346 | ret = btrfs_defrag_root(root, 0); |
1362 | btrfs_defrag_root(root->fs_info->extent_root, 0); | 1347 | if (ret) |
1348 | goto out; | ||
1349 | ret = btrfs_defrag_root(root->fs_info->extent_root, 0); | ||
1363 | break; | 1350 | break; |
1364 | case S_IFREG: | 1351 | case S_IFREG: |
1365 | if (!(file->f_mode & FMODE_WRITE)) { | 1352 | if (!(file->f_mode & FMODE_WRITE)) { |
@@ -1389,9 +1376,11 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1389 | /* the rest are all set to zero by kzalloc */ | 1376 | /* the rest are all set to zero by kzalloc */ |
1390 | range->len = (u64)-1; | 1377 | range->len = (u64)-1; |
1391 | } | 1378 | } |
1392 | btrfs_defrag_file(file, range); | 1379 | ret = btrfs_defrag_file(file, range); |
1393 | kfree(range); | 1380 | kfree(range); |
1394 | break; | 1381 | break; |
1382 | default: | ||
1383 | ret = -EINVAL; | ||
1395 | } | 1384 | } |
1396 | out: | 1385 | out: |
1397 | mnt_drop_write(file->f_path.mnt); | 1386 | mnt_drop_write(file->f_path.mnt); |
@@ -1550,12 +1539,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1550 | btrfs_wait_ordered_range(src, off, off+len); | 1539 | btrfs_wait_ordered_range(src, off, off+len); |
1551 | } | 1540 | } |
1552 | 1541 | ||
1553 | trans = btrfs_start_transaction(root, 1); | ||
1554 | BUG_ON(!trans); | ||
1555 | |||
1556 | /* punch hole in destination first */ | ||
1557 | btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); | ||
1558 | |||
1559 | /* clone data */ | 1542 | /* clone data */ |
1560 | key.objectid = src->i_ino; | 1543 | key.objectid = src->i_ino; |
1561 | key.type = BTRFS_EXTENT_DATA_KEY; | 1544 | key.type = BTRFS_EXTENT_DATA_KEY; |
@@ -1566,7 +1549,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1566 | * note the key will change type as we walk through the | 1549 | * note the key will change type as we walk through the |
1567 | * tree. | 1550 | * tree. |
1568 | */ | 1551 | */ |
1569 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 1552 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
1570 | if (ret < 0) | 1553 | if (ret < 0) |
1571 | goto out; | 1554 | goto out; |
1572 | 1555 | ||
@@ -1629,12 +1612,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1629 | new_key.objectid = inode->i_ino; | 1612 | new_key.objectid = inode->i_ino; |
1630 | new_key.offset = key.offset + destoff - off; | 1613 | new_key.offset = key.offset + destoff - off; |
1631 | 1614 | ||
1615 | trans = btrfs_start_transaction(root, 1); | ||
1616 | if (IS_ERR(trans)) { | ||
1617 | ret = PTR_ERR(trans); | ||
1618 | goto out; | ||
1619 | } | ||
1620 | |||
1632 | if (type == BTRFS_FILE_EXTENT_REG || | 1621 | if (type == BTRFS_FILE_EXTENT_REG || |
1633 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 1622 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
1623 | if (off > key.offset) { | ||
1624 | datao += off - key.offset; | ||
1625 | datal -= off - key.offset; | ||
1626 | } | ||
1627 | |||
1628 | if (key.offset + datal > off + len) | ||
1629 | datal = off + len - key.offset; | ||
1630 | |||
1631 | ret = btrfs_drop_extents(trans, inode, | ||
1632 | new_key.offset, | ||
1633 | new_key.offset + datal, | ||
1634 | &hint_byte, 1); | ||
1635 | BUG_ON(ret); | ||
1636 | |||
1634 | ret = btrfs_insert_empty_item(trans, root, path, | 1637 | ret = btrfs_insert_empty_item(trans, root, path, |
1635 | &new_key, size); | 1638 | &new_key, size); |
1636 | if (ret) | 1639 | BUG_ON(ret); |
1637 | goto out; | ||
1638 | 1640 | ||
1639 | leaf = path->nodes[0]; | 1641 | leaf = path->nodes[0]; |
1640 | slot = path->slots[0]; | 1642 | slot = path->slots[0]; |
@@ -1645,14 +1647,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1645 | extent = btrfs_item_ptr(leaf, slot, | 1647 | extent = btrfs_item_ptr(leaf, slot, |
1646 | struct btrfs_file_extent_item); | 1648 | struct btrfs_file_extent_item); |
1647 | 1649 | ||
1648 | if (off > key.offset) { | ||
1649 | datao += off - key.offset; | ||
1650 | datal -= off - key.offset; | ||
1651 | } | ||
1652 | |||
1653 | if (key.offset + datal > off + len) | ||
1654 | datal = off + len - key.offset; | ||
1655 | |||
1656 | /* disko == 0 means it's a hole */ | 1650 | /* disko == 0 means it's a hole */ |
1657 | if (!disko) | 1651 | if (!disko) |
1658 | datao = 0; | 1652 | datao = 0; |
@@ -1683,14 +1677,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1683 | 1677 | ||
1684 | if (comp && (skip || trim)) { | 1678 | if (comp && (skip || trim)) { |
1685 | ret = -EINVAL; | 1679 | ret = -EINVAL; |
1680 | btrfs_end_transaction(trans, root); | ||
1686 | goto out; | 1681 | goto out; |
1687 | } | 1682 | } |
1688 | size -= skip + trim; | 1683 | size -= skip + trim; |
1689 | datal -= skip + trim; | 1684 | datal -= skip + trim; |
1685 | |||
1686 | ret = btrfs_drop_extents(trans, inode, | ||
1687 | new_key.offset, | ||
1688 | new_key.offset + datal, | ||
1689 | &hint_byte, 1); | ||
1690 | BUG_ON(ret); | ||
1691 | |||
1690 | ret = btrfs_insert_empty_item(trans, root, path, | 1692 | ret = btrfs_insert_empty_item(trans, root, path, |
1691 | &new_key, size); | 1693 | &new_key, size); |
1692 | if (ret) | 1694 | BUG_ON(ret); |
1693 | goto out; | ||
1694 | 1695 | ||
1695 | if (skip) { | 1696 | if (skip) { |
1696 | u32 start = | 1697 | u32 start = |
@@ -1708,8 +1709,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1708 | } | 1709 | } |
1709 | 1710 | ||
1710 | btrfs_mark_buffer_dirty(leaf); | 1711 | btrfs_mark_buffer_dirty(leaf); |
1711 | } | 1712 | btrfs_release_path(root, path); |
1712 | 1713 | ||
1714 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1715 | if (new_key.offset + datal > inode->i_size) | ||
1716 | btrfs_i_size_write(inode, | ||
1717 | new_key.offset + datal); | ||
1718 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
1719 | ret = btrfs_update_inode(trans, root, inode); | ||
1720 | BUG_ON(ret); | ||
1721 | btrfs_end_transaction(trans, root); | ||
1722 | } | ||
1713 | next: | 1723 | next: |
1714 | btrfs_release_path(root, path); | 1724 | btrfs_release_path(root, path); |
1715 | key.offset++; | 1725 | key.offset++; |
@@ -1717,17 +1727,7 @@ next: | |||
1717 | ret = 0; | 1727 | ret = 0; |
1718 | out: | 1728 | out: |
1719 | btrfs_release_path(root, path); | 1729 | btrfs_release_path(root, path); |
1720 | if (ret == 0) { | ||
1721 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1722 | if (destoff + olen > inode->i_size) | ||
1723 | btrfs_i_size_write(inode, destoff + olen); | ||
1724 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
1725 | ret = btrfs_update_inode(trans, root, inode); | ||
1726 | } | ||
1727 | btrfs_end_transaction(trans, root); | ||
1728 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 1730 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1729 | if (ret) | ||
1730 | vmtruncate(inode, 0); | ||
1731 | out_unlock: | 1731 | out_unlock: |
1732 | mutex_unlock(&src->i_mutex); | 1732 | mutex_unlock(&src->i_mutex); |
1733 | mutex_unlock(&inode->i_mutex); | 1733 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a127c0ebb2dc..e56c72bc5add 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
128 | u64 len) | ||
129 | { | ||
130 | if (file_offset + len <= entry->file_offset || | ||
131 | entry->file_offset + entry->len <= file_offset) | ||
132 | return 0; | ||
133 | return 1; | ||
134 | } | ||
135 | |||
127 | /* | 136 | /* |
128 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
129 | * the first one less than this offset | 138 | * the first one less than this offset |
@@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
161 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
162 | * inserted. | 171 | * inserted. |
163 | */ | 172 | */ |
164 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
165 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | ||
166 | { | 176 | { |
167 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
168 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
182 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
183 | set_bit(type, &entry->flags); | 193 | set_bit(type, &entry->flags); |
184 | 194 | ||
195 | if (dio) | ||
196 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
197 | |||
185 | /* one ref for the tree */ | 198 | /* one ref for the tree */ |
186 | atomic_set(&entry->refs, 1); | 199 | atomic_set(&entry->refs, 1); |
187 | init_waitqueue_head(&entry->wait); | 200 | init_waitqueue_head(&entry->wait); |
@@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
203 | return 0; | 216 | return 0; |
204 | } | 217 | } |
205 | 218 | ||
219 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
220 | u64 start, u64 len, u64 disk_len, int type) | ||
221 | { | ||
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
223 | disk_len, type, 0); | ||
224 | } | ||
225 | |||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
227 | u64 start, u64 len, u64 disk_len, int type) | ||
228 | { | ||
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
230 | disk_len, type, 1); | ||
231 | } | ||
232 | |||
206 | /* | 233 | /* |
207 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 234 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
208 | * when an ordered extent is finished. If the list covers more than one | 235 | * when an ordered extent is finished. If the list covers more than one |
@@ -311,13 +338,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
311 | tree->last = NULL; | 338 | tree->last = NULL; |
312 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 339 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
313 | 340 | ||
314 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
315 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | ||
316 | BTRFS_I(inode)->outstanding_extents--; | ||
317 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
318 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
319 | inode, 1); | ||
320 | |||
321 | spin_lock(&root->fs_info->ordered_extent_lock); | 341 | spin_lock(&root->fs_info->ordered_extent_lock); |
322 | list_del_init(&entry->root_extent_list); | 342 | list_del_init(&entry->root_extent_list); |
323 | 343 | ||
@@ -491,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
491 | * start IO on any dirty ones so the wait doesn't stall waiting | 511 | * start IO on any dirty ones so the wait doesn't stall waiting |
492 | * for pdflush to find them | 512 | * for pdflush to find them |
493 | */ | 513 | */ |
494 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 514 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
515 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
495 | if (wait) { | 516 | if (wait) { |
496 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 517 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
497 | &entry->flags)); | 518 | &entry->flags)); |
@@ -588,6 +609,47 @@ out: | |||
588 | return entry; | 609 | return entry; |
589 | } | 610 | } |
590 | 611 | ||
612 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
613 | * extents that exist in the range, rather than just the start of the range. | ||
614 | */ | ||
615 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
616 | u64 file_offset, | ||
617 | u64 len) | ||
618 | { | ||
619 | struct btrfs_ordered_inode_tree *tree; | ||
620 | struct rb_node *node; | ||
621 | struct btrfs_ordered_extent *entry = NULL; | ||
622 | |||
623 | tree = &BTRFS_I(inode)->ordered_tree; | ||
624 | spin_lock(&tree->lock); | ||
625 | node = tree_search(tree, file_offset); | ||
626 | if (!node) { | ||
627 | node = tree_search(tree, file_offset + len); | ||
628 | if (!node) | ||
629 | goto out; | ||
630 | } | ||
631 | |||
632 | while (1) { | ||
633 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
634 | if (range_overlaps(entry, file_offset, len)) | ||
635 | break; | ||
636 | |||
637 | if (entry->file_offset >= file_offset + len) { | ||
638 | entry = NULL; | ||
639 | break; | ||
640 | } | ||
641 | entry = NULL; | ||
642 | node = rb_next(node); | ||
643 | if (!node) | ||
644 | break; | ||
645 | } | ||
646 | out: | ||
647 | if (entry) | ||
648 | atomic_inc(&entry->refs); | ||
649 | spin_unlock(&tree->lock); | ||
650 | return entry; | ||
651 | } | ||
652 | |||
591 | /* | 653 | /* |
592 | * lookup and return any extent before 'file_offset'. NULL is returned | 654 | * lookup and return any extent before 'file_offset'. NULL is returned |
593 | * if none is found | 655 | * if none is found |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c82f76a9f040..8ac365492a3f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -72,6 +72,8 @@ struct btrfs_ordered_sum { | |||
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | ||
76 | |||
75 | struct btrfs_ordered_extent { | 77 | struct btrfs_ordered_extent { |
76 | /* logical offset in the file */ | 78 | /* logical offset in the file */ |
77 | u64 file_offset; | 79 | u64 file_offset; |
@@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
140 | struct btrfs_ordered_extent **cached, | 142 | struct btrfs_ordered_extent **cached, |
141 | u64 file_offset, u64 io_size); | 143 | u64 file_offset, u64 io_size); |
142 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
143 | u64 start, u64 len, u64 disk_len, int tyep); | 145 | u64 start, u64 len, u64 disk_len, int type); |
146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
147 | u64 start, u64 len, u64 disk_len, int type); | ||
144 | int btrfs_add_ordered_sum(struct inode *inode, | 148 | int btrfs_add_ordered_sum(struct inode *inode, |
145 | struct btrfs_ordered_extent *entry, | 149 | struct btrfs_ordered_extent *entry, |
146 | struct btrfs_ordered_sum *sum); | 150 | struct btrfs_ordered_sum *sum); |
@@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
151 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 155 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
152 | struct btrfs_ordered_extent * | 156 | struct btrfs_ordered_extent * |
153 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 157 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
158 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
159 | u64 file_offset, | ||
160 | u64 len); | ||
154 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 161 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
155 | struct btrfs_ordered_extent *ordered); | 162 | struct btrfs_ordered_extent *ordered); |
156 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 163 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e558dd941ded..05d41e569236 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -44,8 +44,12 @@ struct tree_entry { | |||
44 | struct backref_node { | 44 | struct backref_node { |
45 | struct rb_node rb_node; | 45 | struct rb_node rb_node; |
46 | u64 bytenr; | 46 | u64 bytenr; |
47 | /* objectid tree block owner */ | 47 | |
48 | u64 new_bytenr; | ||
49 | /* objectid of tree block owner, can be not uptodate */ | ||
48 | u64 owner; | 50 | u64 owner; |
51 | /* link to pending, changed or detached list */ | ||
52 | struct list_head list; | ||
49 | /* list of upper level blocks reference this block */ | 53 | /* list of upper level blocks reference this block */ |
50 | struct list_head upper; | 54 | struct list_head upper; |
51 | /* list of child blocks in the cache */ | 55 | /* list of child blocks in the cache */ |
@@ -56,9 +60,9 @@ struct backref_node { | |||
56 | struct extent_buffer *eb; | 60 | struct extent_buffer *eb; |
57 | /* level of tree block */ | 61 | /* level of tree block */ |
58 | unsigned int level:8; | 62 | unsigned int level:8; |
59 | /* 1 if the block is root of old snapshot */ | 63 | /* is the block in non-reference counted tree */ |
60 | unsigned int old_root:1; | 64 | unsigned int cowonly:1; |
61 | /* 1 if no child blocks in the cache */ | 65 | /* 1 if no child node in the cache */ |
62 | unsigned int lowest:1; | 66 | unsigned int lowest:1; |
63 | /* is the extent buffer locked */ | 67 | /* is the extent buffer locked */ |
64 | unsigned int locked:1; | 68 | unsigned int locked:1; |
@@ -66,6 +70,16 @@ struct backref_node { | |||
66 | unsigned int processed:1; | 70 | unsigned int processed:1; |
67 | /* have backrefs of this block been checked */ | 71 | /* have backrefs of this block been checked */ |
68 | unsigned int checked:1; | 72 | unsigned int checked:1; |
73 | /* | ||
74 | * 1 if corresponding block has been cowed but some upper | ||
75 | * level block pointers may not point to the new location | ||
76 | */ | ||
77 | unsigned int pending:1; | ||
78 | /* | ||
79 | * 1 if the backref node isn't connected to any other | ||
80 | * backref node. | ||
81 | */ | ||
82 | unsigned int detached:1; | ||
69 | }; | 83 | }; |
70 | 84 | ||
71 | /* | 85 | /* |
@@ -74,7 +88,6 @@ struct backref_node { | |||
74 | struct backref_edge { | 88 | struct backref_edge { |
75 | struct list_head list[2]; | 89 | struct list_head list[2]; |
76 | struct backref_node *node[2]; | 90 | struct backref_node *node[2]; |
77 | u64 blockptr; | ||
78 | }; | 91 | }; |
79 | 92 | ||
80 | #define LOWER 0 | 93 | #define LOWER 0 |
@@ -83,9 +96,25 @@ struct backref_edge { | |||
83 | struct backref_cache { | 96 | struct backref_cache { |
84 | /* red black tree of all backref nodes in the cache */ | 97 | /* red black tree of all backref nodes in the cache */ |
85 | struct rb_root rb_root; | 98 | struct rb_root rb_root; |
86 | /* list of backref nodes with no child block in the cache */ | 99 | /* for passing backref nodes to btrfs_reloc_cow_block */ |
100 | struct backref_node *path[BTRFS_MAX_LEVEL]; | ||
101 | /* | ||
102 | * list of blocks that have been cowed but some block | ||
103 | * pointers in upper level blocks may not reflect the | ||
104 | * new location | ||
105 | */ | ||
87 | struct list_head pending[BTRFS_MAX_LEVEL]; | 106 | struct list_head pending[BTRFS_MAX_LEVEL]; |
88 | spinlock_t lock; | 107 | /* list of backref nodes with no child node */ |
108 | struct list_head leaves; | ||
109 | /* list of blocks that have been cowed in current transaction */ | ||
110 | struct list_head changed; | ||
111 | /* list of detached backref node. */ | ||
112 | struct list_head detached; | ||
113 | |||
114 | u64 last_trans; | ||
115 | |||
116 | int nr_nodes; | ||
117 | int nr_edges; | ||
89 | }; | 118 | }; |
90 | 119 | ||
91 | /* | 120 | /* |
@@ -113,15 +142,6 @@ struct tree_block { | |||
113 | unsigned int key_ready:1; | 142 | unsigned int key_ready:1; |
114 | }; | 143 | }; |
115 | 144 | ||
116 | /* inode vector */ | ||
117 | #define INODEVEC_SIZE 16 | ||
118 | |||
119 | struct inodevec { | ||
120 | struct list_head list; | ||
121 | struct inode *inode[INODEVEC_SIZE]; | ||
122 | int nr; | ||
123 | }; | ||
124 | |||
125 | #define MAX_EXTENTS 128 | 145 | #define MAX_EXTENTS 128 |
126 | 146 | ||
127 | struct file_extent_cluster { | 147 | struct file_extent_cluster { |
@@ -138,36 +158,43 @@ struct reloc_control { | |||
138 | struct btrfs_root *extent_root; | 158 | struct btrfs_root *extent_root; |
139 | /* inode for moving data */ | 159 | /* inode for moving data */ |
140 | struct inode *data_inode; | 160 | struct inode *data_inode; |
141 | struct btrfs_workers workers; | 161 | |
162 | struct btrfs_block_rsv *block_rsv; | ||
163 | |||
164 | struct backref_cache backref_cache; | ||
165 | |||
166 | struct file_extent_cluster cluster; | ||
142 | /* tree blocks have been processed */ | 167 | /* tree blocks have been processed */ |
143 | struct extent_io_tree processed_blocks; | 168 | struct extent_io_tree processed_blocks; |
144 | /* map start of tree root to corresponding reloc tree */ | 169 | /* map start of tree root to corresponding reloc tree */ |
145 | struct mapping_tree reloc_root_tree; | 170 | struct mapping_tree reloc_root_tree; |
146 | /* list of reloc trees */ | 171 | /* list of reloc trees */ |
147 | struct list_head reloc_roots; | 172 | struct list_head reloc_roots; |
173 | /* size of metadata reservation for merging reloc trees */ | ||
174 | u64 merging_rsv_size; | ||
175 | /* size of relocated tree nodes */ | ||
176 | u64 nodes_relocated; | ||
177 | |||
148 | u64 search_start; | 178 | u64 search_start; |
149 | u64 extents_found; | 179 | u64 extents_found; |
150 | u64 extents_skipped; | 180 | |
151 | int stage; | 181 | int block_rsv_retries; |
152 | int create_reloc_root; | 182 | |
183 | unsigned int stage:8; | ||
184 | unsigned int create_reloc_tree:1; | ||
185 | unsigned int merge_reloc_tree:1; | ||
153 | unsigned int found_file_extent:1; | 186 | unsigned int found_file_extent:1; |
154 | unsigned int found_old_snapshot:1; | 187 | unsigned int commit_transaction:1; |
155 | }; | 188 | }; |
156 | 189 | ||
157 | /* stages of data relocation */ | 190 | /* stages of data relocation */ |
158 | #define MOVE_DATA_EXTENTS 0 | 191 | #define MOVE_DATA_EXTENTS 0 |
159 | #define UPDATE_DATA_PTRS 1 | 192 | #define UPDATE_DATA_PTRS 1 |
160 | 193 | ||
161 | /* | 194 | static void remove_backref_node(struct backref_cache *cache, |
162 | * merge reloc tree to corresponding fs tree in worker threads | 195 | struct backref_node *node); |
163 | */ | 196 | static void __mark_block_processed(struct reloc_control *rc, |
164 | struct async_merge { | 197 | struct backref_node *node); |
165 | struct btrfs_work work; | ||
166 | struct reloc_control *rc; | ||
167 | struct btrfs_root *root; | ||
168 | struct completion *done; | ||
169 | atomic_t *num_pending; | ||
170 | }; | ||
171 | 198 | ||
172 | static void mapping_tree_init(struct mapping_tree *tree) | 199 | static void mapping_tree_init(struct mapping_tree *tree) |
173 | { | 200 | { |
@@ -181,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache) | |||
181 | cache->rb_root = RB_ROOT; | 208 | cache->rb_root = RB_ROOT; |
182 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | 209 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
183 | INIT_LIST_HEAD(&cache->pending[i]); | 210 | INIT_LIST_HEAD(&cache->pending[i]); |
184 | spin_lock_init(&cache->lock); | 211 | INIT_LIST_HEAD(&cache->changed); |
212 | INIT_LIST_HEAD(&cache->detached); | ||
213 | INIT_LIST_HEAD(&cache->leaves); | ||
214 | } | ||
215 | |||
216 | static void backref_cache_cleanup(struct backref_cache *cache) | ||
217 | { | ||
218 | struct backref_node *node; | ||
219 | int i; | ||
220 | |||
221 | while (!list_empty(&cache->detached)) { | ||
222 | node = list_entry(cache->detached.next, | ||
223 | struct backref_node, list); | ||
224 | remove_backref_node(cache, node); | ||
225 | } | ||
226 | |||
227 | while (!list_empty(&cache->leaves)) { | ||
228 | node = list_entry(cache->leaves.next, | ||
229 | struct backref_node, lower); | ||
230 | remove_backref_node(cache, node); | ||
231 | } | ||
232 | |||
233 | cache->last_trans = 0; | ||
234 | |||
235 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | ||
236 | BUG_ON(!list_empty(&cache->pending[i])); | ||
237 | BUG_ON(!list_empty(&cache->changed)); | ||
238 | BUG_ON(!list_empty(&cache->detached)); | ||
239 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
240 | BUG_ON(cache->nr_nodes); | ||
241 | BUG_ON(cache->nr_edges); | ||
242 | } | ||
243 | |||
244 | static struct backref_node *alloc_backref_node(struct backref_cache *cache) | ||
245 | { | ||
246 | struct backref_node *node; | ||
247 | |||
248 | node = kzalloc(sizeof(*node), GFP_NOFS); | ||
249 | if (node) { | ||
250 | INIT_LIST_HEAD(&node->list); | ||
251 | INIT_LIST_HEAD(&node->upper); | ||
252 | INIT_LIST_HEAD(&node->lower); | ||
253 | RB_CLEAR_NODE(&node->rb_node); | ||
254 | cache->nr_nodes++; | ||
255 | } | ||
256 | return node; | ||
257 | } | ||
258 | |||
259 | static void free_backref_node(struct backref_cache *cache, | ||
260 | struct backref_node *node) | ||
261 | { | ||
262 | if (node) { | ||
263 | cache->nr_nodes--; | ||
264 | kfree(node); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) | ||
269 | { | ||
270 | struct backref_edge *edge; | ||
271 | |||
272 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
273 | if (edge) | ||
274 | cache->nr_edges++; | ||
275 | return edge; | ||
185 | } | 276 | } |
186 | 277 | ||
187 | static void backref_node_init(struct backref_node *node) | 278 | static void free_backref_edge(struct backref_cache *cache, |
279 | struct backref_edge *edge) | ||
188 | { | 280 | { |
189 | memset(node, 0, sizeof(*node)); | 281 | if (edge) { |
190 | INIT_LIST_HEAD(&node->upper); | 282 | cache->nr_edges--; |
191 | INIT_LIST_HEAD(&node->lower); | 283 | kfree(edge); |
192 | RB_CLEAR_NODE(&node->rb_node); | 284 | } |
193 | } | 285 | } |
194 | 286 | ||
195 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | 287 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, |
@@ -250,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, | |||
250 | edges[idx++] = edge; | 342 | edges[idx++] = edge; |
251 | node = edge->node[UPPER]; | 343 | node = edge->node[UPPER]; |
252 | } | 344 | } |
345 | BUG_ON(node->detached); | ||
253 | *index = idx; | 346 | *index = idx; |
254 | return node; | 347 | return node; |
255 | } | 348 | } |
@@ -281,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], | |||
281 | return NULL; | 374 | return NULL; |
282 | } | 375 | } |
283 | 376 | ||
377 | static void unlock_node_buffer(struct backref_node *node) | ||
378 | { | ||
379 | if (node->locked) { | ||
380 | btrfs_tree_unlock(node->eb); | ||
381 | node->locked = 0; | ||
382 | } | ||
383 | } | ||
384 | |||
284 | static void drop_node_buffer(struct backref_node *node) | 385 | static void drop_node_buffer(struct backref_node *node) |
285 | { | 386 | { |
286 | if (node->eb) { | 387 | if (node->eb) { |
287 | if (node->locked) { | 388 | unlock_node_buffer(node); |
288 | btrfs_tree_unlock(node->eb); | ||
289 | node->locked = 0; | ||
290 | } | ||
291 | free_extent_buffer(node->eb); | 389 | free_extent_buffer(node->eb); |
292 | node->eb = NULL; | 390 | node->eb = NULL; |
293 | } | 391 | } |
@@ -296,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node) | |||
296 | static void drop_backref_node(struct backref_cache *tree, | 394 | static void drop_backref_node(struct backref_cache *tree, |
297 | struct backref_node *node) | 395 | struct backref_node *node) |
298 | { | 396 | { |
299 | BUG_ON(!node->lowest); | ||
300 | BUG_ON(!list_empty(&node->upper)); | 397 | BUG_ON(!list_empty(&node->upper)); |
301 | 398 | ||
302 | drop_node_buffer(node); | 399 | drop_node_buffer(node); |
400 | list_del(&node->list); | ||
303 | list_del(&node->lower); | 401 | list_del(&node->lower); |
304 | 402 | if (!RB_EMPTY_NODE(&node->rb_node)) | |
305 | rb_erase(&node->rb_node, &tree->rb_root); | 403 | rb_erase(&node->rb_node, &tree->rb_root); |
306 | kfree(node); | 404 | free_backref_node(tree, node); |
307 | } | 405 | } |
308 | 406 | ||
309 | /* | 407 | /* |
@@ -318,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache, | |||
318 | if (!node) | 416 | if (!node) |
319 | return; | 417 | return; |
320 | 418 | ||
321 | BUG_ON(!node->lowest); | 419 | BUG_ON(!node->lowest && !node->detached); |
322 | while (!list_empty(&node->upper)) { | 420 | while (!list_empty(&node->upper)) { |
323 | edge = list_entry(node->upper.next, struct backref_edge, | 421 | edge = list_entry(node->upper.next, struct backref_edge, |
324 | list[LOWER]); | 422 | list[LOWER]); |
325 | upper = edge->node[UPPER]; | 423 | upper = edge->node[UPPER]; |
326 | list_del(&edge->list[LOWER]); | 424 | list_del(&edge->list[LOWER]); |
327 | list_del(&edge->list[UPPER]); | 425 | list_del(&edge->list[UPPER]); |
328 | kfree(edge); | 426 | free_backref_edge(cache, edge); |
427 | |||
428 | if (RB_EMPTY_NODE(&upper->rb_node)) { | ||
429 | BUG_ON(!list_empty(&node->upper)); | ||
430 | drop_backref_node(cache, node); | ||
431 | node = upper; | ||
432 | node->lowest = 1; | ||
433 | continue; | ||
434 | } | ||
329 | /* | 435 | /* |
330 | * add the node to pending list if no other | 436 | * add the node to leaf node list if no other |
331 | * child block cached. | 437 | * child block cached. |
332 | */ | 438 | */ |
333 | if (list_empty(&upper->lower)) { | 439 | if (list_empty(&upper->lower)) { |
334 | list_add_tail(&upper->lower, | 440 | list_add_tail(&upper->lower, &cache->leaves); |
335 | &cache->pending[upper->level]); | ||
336 | upper->lowest = 1; | 441 | upper->lowest = 1; |
337 | } | 442 | } |
338 | } | 443 | } |
444 | |||
339 | drop_backref_node(cache, node); | 445 | drop_backref_node(cache, node); |
340 | } | 446 | } |
341 | 447 | ||
448 | static void update_backref_node(struct backref_cache *cache, | ||
449 | struct backref_node *node, u64 bytenr) | ||
450 | { | ||
451 | struct rb_node *rb_node; | ||
452 | rb_erase(&node->rb_node, &cache->rb_root); | ||
453 | node->bytenr = bytenr; | ||
454 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | ||
455 | BUG_ON(rb_node); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * update backref cache after a transaction commit | ||
460 | */ | ||
461 | static int update_backref_cache(struct btrfs_trans_handle *trans, | ||
462 | struct backref_cache *cache) | ||
463 | { | ||
464 | struct backref_node *node; | ||
465 | int level = 0; | ||
466 | |||
467 | if (cache->last_trans == 0) { | ||
468 | cache->last_trans = trans->transid; | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | if (cache->last_trans == trans->transid) | ||
473 | return 0; | ||
474 | |||
475 | /* | ||
476 | * detached nodes are used to avoid unnecessary backref | ||
477 | * lookup. transaction commit changes the extent tree. | ||
478 | * so the detached nodes are no longer useful. | ||
479 | */ | ||
480 | while (!list_empty(&cache->detached)) { | ||
481 | node = list_entry(cache->detached.next, | ||
482 | struct backref_node, list); | ||
483 | remove_backref_node(cache, node); | ||
484 | } | ||
485 | |||
486 | while (!list_empty(&cache->changed)) { | ||
487 | node = list_entry(cache->changed.next, | ||
488 | struct backref_node, list); | ||
489 | list_del_init(&node->list); | ||
490 | BUG_ON(node->pending); | ||
491 | update_backref_node(cache, node, node->new_bytenr); | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * some nodes can be left in the pending list if there were | ||
496 | * errors during processing the pending nodes. | ||
497 | */ | ||
498 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
499 | list_for_each_entry(node, &cache->pending[level], list) { | ||
500 | BUG_ON(!node->pending); | ||
501 | if (node->bytenr == node->new_bytenr) | ||
502 | continue; | ||
503 | update_backref_node(cache, node, node->new_bytenr); | ||
504 | } | ||
505 | } | ||
506 | |||
507 | cache->last_trans = 0; | ||
508 | return 1; | ||
509 | } | ||
510 | |||
511 | static int should_ignore_root(struct btrfs_root *root) | ||
512 | { | ||
513 | struct btrfs_root *reloc_root; | ||
514 | |||
515 | if (!root->ref_cows) | ||
516 | return 0; | ||
517 | |||
518 | reloc_root = root->reloc_root; | ||
519 | if (!reloc_root) | ||
520 | return 0; | ||
521 | |||
522 | if (btrfs_root_last_snapshot(&reloc_root->root_item) == | ||
523 | root->fs_info->running_transaction->transid - 1) | ||
524 | return 0; | ||
525 | /* | ||
526 | * if there is reloc tree and it was created in previous | ||
527 | * transaction backref lookup can find the reloc tree, | ||
528 | * so backref node for the fs tree root is useless for | ||
529 | * relocation. | ||
530 | */ | ||
531 | return 1; | ||
532 | } | ||
533 | |||
342 | /* | 534 | /* |
343 | * find reloc tree by address of tree root | 535 | * find reloc tree by address of tree root |
344 | */ | 536 | */ |
@@ -453,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, | |||
453 | * for all upper level blocks that directly/indirectly reference the | 645 | * for all upper level blocks that directly/indirectly reference the |
454 | * block are also cached. | 646 | * block are also cached. |
455 | */ | 647 | */ |
456 | static struct backref_node *build_backref_tree(struct reloc_control *rc, | 648 | static noinline_for_stack |
457 | struct backref_cache *cache, | 649 | struct backref_node *build_backref_tree(struct reloc_control *rc, |
458 | struct btrfs_key *node_key, | 650 | struct btrfs_key *node_key, |
459 | int level, u64 bytenr) | 651 | int level, u64 bytenr) |
460 | { | 652 | { |
653 | struct backref_cache *cache = &rc->backref_cache; | ||
461 | struct btrfs_path *path1; | 654 | struct btrfs_path *path1; |
462 | struct btrfs_path *path2; | 655 | struct btrfs_path *path2; |
463 | struct extent_buffer *eb; | 656 | struct extent_buffer *eb; |
@@ -473,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
473 | unsigned long end; | 666 | unsigned long end; |
474 | unsigned long ptr; | 667 | unsigned long ptr; |
475 | LIST_HEAD(list); | 668 | LIST_HEAD(list); |
669 | LIST_HEAD(useless); | ||
670 | int cowonly; | ||
476 | int ret; | 671 | int ret; |
477 | int err = 0; | 672 | int err = 0; |
478 | 673 | ||
@@ -483,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
483 | goto out; | 678 | goto out; |
484 | } | 679 | } |
485 | 680 | ||
486 | node = kmalloc(sizeof(*node), GFP_NOFS); | 681 | node = alloc_backref_node(cache); |
487 | if (!node) { | 682 | if (!node) { |
488 | err = -ENOMEM; | 683 | err = -ENOMEM; |
489 | goto out; | 684 | goto out; |
490 | } | 685 | } |
491 | 686 | ||
492 | backref_node_init(node); | ||
493 | node->bytenr = bytenr; | 687 | node->bytenr = bytenr; |
494 | node->owner = 0; | ||
495 | node->level = level; | 688 | node->level = level; |
496 | node->lowest = 1; | 689 | node->lowest = 1; |
497 | cur = node; | 690 | cur = node; |
@@ -587,17 +780,20 @@ again: | |||
587 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 780 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
588 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || | 781 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || |
589 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | 782 | key.type == BTRFS_EXTENT_REF_V0_KEY) { |
590 | if (key.objectid == key.offset && | 783 | if (key.type == BTRFS_EXTENT_REF_V0_KEY) { |
591 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
592 | struct btrfs_extent_ref_v0 *ref0; | 784 | struct btrfs_extent_ref_v0 *ref0; |
593 | ref0 = btrfs_item_ptr(eb, path1->slots[0], | 785 | ref0 = btrfs_item_ptr(eb, path1->slots[0], |
594 | struct btrfs_extent_ref_v0); | 786 | struct btrfs_extent_ref_v0); |
595 | root = find_tree_root(rc, eb, ref0); | 787 | root = find_tree_root(rc, eb, ref0); |
596 | if (root) | 788 | if (!root->ref_cows) |
597 | cur->root = root; | 789 | cur->cowonly = 1; |
598 | else | 790 | if (key.objectid == key.offset) { |
599 | cur->old_root = 1; | 791 | if (root && !should_ignore_root(root)) |
600 | break; | 792 | cur->root = root; |
793 | else | ||
794 | list_add(&cur->list, &useless); | ||
795 | break; | ||
796 | } | ||
601 | } | 797 | } |
602 | #else | 798 | #else |
603 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 799 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); |
@@ -614,22 +810,20 @@ again: | |||
614 | break; | 810 | break; |
615 | } | 811 | } |
616 | 812 | ||
617 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 813 | edge = alloc_backref_edge(cache); |
618 | if (!edge) { | 814 | if (!edge) { |
619 | err = -ENOMEM; | 815 | err = -ENOMEM; |
620 | goto out; | 816 | goto out; |
621 | } | 817 | } |
622 | rb_node = tree_search(&cache->rb_root, key.offset); | 818 | rb_node = tree_search(&cache->rb_root, key.offset); |
623 | if (!rb_node) { | 819 | if (!rb_node) { |
624 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 820 | upper = alloc_backref_node(cache); |
625 | if (!upper) { | 821 | if (!upper) { |
626 | kfree(edge); | 822 | free_backref_edge(cache, edge); |
627 | err = -ENOMEM; | 823 | err = -ENOMEM; |
628 | goto out; | 824 | goto out; |
629 | } | 825 | } |
630 | backref_node_init(upper); | ||
631 | upper->bytenr = key.offset; | 826 | upper->bytenr = key.offset; |
632 | upper->owner = 0; | ||
633 | upper->level = cur->level + 1; | 827 | upper->level = cur->level + 1; |
634 | /* | 828 | /* |
635 | * backrefs for the upper level block isn't | 829 | * backrefs for the upper level block isn't |
@@ -639,11 +833,12 @@ again: | |||
639 | } else { | 833 | } else { |
640 | upper = rb_entry(rb_node, struct backref_node, | 834 | upper = rb_entry(rb_node, struct backref_node, |
641 | rb_node); | 835 | rb_node); |
836 | BUG_ON(!upper->checked); | ||
642 | INIT_LIST_HEAD(&edge->list[UPPER]); | 837 | INIT_LIST_HEAD(&edge->list[UPPER]); |
643 | } | 838 | } |
644 | list_add(&edge->list[LOWER], &cur->upper); | 839 | list_add_tail(&edge->list[LOWER], &cur->upper); |
645 | edge->node[UPPER] = upper; | ||
646 | edge->node[LOWER] = cur; | 840 | edge->node[LOWER] = cur; |
841 | edge->node[UPPER] = upper; | ||
647 | 842 | ||
648 | goto next; | 843 | goto next; |
649 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { | 844 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { |
@@ -657,11 +852,17 @@ again: | |||
657 | goto out; | 852 | goto out; |
658 | } | 853 | } |
659 | 854 | ||
855 | if (!root->ref_cows) | ||
856 | cur->cowonly = 1; | ||
857 | |||
660 | if (btrfs_root_level(&root->root_item) == cur->level) { | 858 | if (btrfs_root_level(&root->root_item) == cur->level) { |
661 | /* tree root */ | 859 | /* tree root */ |
662 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 860 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
663 | cur->bytenr); | 861 | cur->bytenr); |
664 | cur->root = root; | 862 | if (should_ignore_root(root)) |
863 | list_add(&cur->list, &useless); | ||
864 | else | ||
865 | cur->root = root; | ||
665 | break; | 866 | break; |
666 | } | 867 | } |
667 | 868 | ||
@@ -692,11 +893,14 @@ again: | |||
692 | if (!path2->nodes[level]) { | 893 | if (!path2->nodes[level]) { |
693 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 894 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
694 | lower->bytenr); | 895 | lower->bytenr); |
695 | lower->root = root; | 896 | if (should_ignore_root(root)) |
897 | list_add(&lower->list, &useless); | ||
898 | else | ||
899 | lower->root = root; | ||
696 | break; | 900 | break; |
697 | } | 901 | } |
698 | 902 | ||
699 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 903 | edge = alloc_backref_edge(cache); |
700 | if (!edge) { | 904 | if (!edge) { |
701 | err = -ENOMEM; | 905 | err = -ENOMEM; |
702 | goto out; | 906 | goto out; |
@@ -705,16 +909,17 @@ again: | |||
705 | eb = path2->nodes[level]; | 909 | eb = path2->nodes[level]; |
706 | rb_node = tree_search(&cache->rb_root, eb->start); | 910 | rb_node = tree_search(&cache->rb_root, eb->start); |
707 | if (!rb_node) { | 911 | if (!rb_node) { |
708 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 912 | upper = alloc_backref_node(cache); |
709 | if (!upper) { | 913 | if (!upper) { |
710 | kfree(edge); | 914 | free_backref_edge(cache, edge); |
711 | err = -ENOMEM; | 915 | err = -ENOMEM; |
712 | goto out; | 916 | goto out; |
713 | } | 917 | } |
714 | backref_node_init(upper); | ||
715 | upper->bytenr = eb->start; | 918 | upper->bytenr = eb->start; |
716 | upper->owner = btrfs_header_owner(eb); | 919 | upper->owner = btrfs_header_owner(eb); |
717 | upper->level = lower->level + 1; | 920 | upper->level = lower->level + 1; |
921 | if (!root->ref_cows) | ||
922 | upper->cowonly = 1; | ||
718 | 923 | ||
719 | /* | 924 | /* |
720 | * if we know the block isn't shared | 925 | * if we know the block isn't shared |
@@ -744,10 +949,12 @@ again: | |||
744 | rb_node); | 949 | rb_node); |
745 | BUG_ON(!upper->checked); | 950 | BUG_ON(!upper->checked); |
746 | INIT_LIST_HEAD(&edge->list[UPPER]); | 951 | INIT_LIST_HEAD(&edge->list[UPPER]); |
952 | if (!upper->owner) | ||
953 | upper->owner = btrfs_header_owner(eb); | ||
747 | } | 954 | } |
748 | list_add_tail(&edge->list[LOWER], &lower->upper); | 955 | list_add_tail(&edge->list[LOWER], &lower->upper); |
749 | edge->node[UPPER] = upper; | ||
750 | edge->node[LOWER] = lower; | 956 | edge->node[LOWER] = lower; |
957 | edge->node[UPPER] = upper; | ||
751 | 958 | ||
752 | if (rb_node) | 959 | if (rb_node) |
753 | break; | 960 | break; |
@@ -785,8 +992,13 @@ next: | |||
785 | * into the cache. | 992 | * into the cache. |
786 | */ | 993 | */ |
787 | BUG_ON(!node->checked); | 994 | BUG_ON(!node->checked); |
788 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | 995 | cowonly = node->cowonly; |
789 | BUG_ON(rb_node); | 996 | if (!cowonly) { |
997 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | ||
998 | &node->rb_node); | ||
999 | BUG_ON(rb_node); | ||
1000 | list_add_tail(&node->lower, &cache->leaves); | ||
1001 | } | ||
790 | 1002 | ||
791 | list_for_each_entry(edge, &node->upper, list[LOWER]) | 1003 | list_for_each_entry(edge, &node->upper, list[LOWER]) |
792 | list_add_tail(&edge->list[UPPER], &list); | 1004 | list_add_tail(&edge->list[UPPER], &list); |
@@ -795,6 +1007,14 @@ next: | |||
795 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | 1007 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); |
796 | list_del_init(&edge->list[UPPER]); | 1008 | list_del_init(&edge->list[UPPER]); |
797 | upper = edge->node[UPPER]; | 1009 | upper = edge->node[UPPER]; |
1010 | if (upper->detached) { | ||
1011 | list_del(&edge->list[LOWER]); | ||
1012 | lower = edge->node[LOWER]; | ||
1013 | free_backref_edge(cache, edge); | ||
1014 | if (list_empty(&lower->upper)) | ||
1015 | list_add(&lower->list, &useless); | ||
1016 | continue; | ||
1017 | } | ||
798 | 1018 | ||
799 | if (!RB_EMPTY_NODE(&upper->rb_node)) { | 1019 | if (!RB_EMPTY_NODE(&upper->rb_node)) { |
800 | if (upper->lowest) { | 1020 | if (upper->lowest) { |
@@ -807,25 +1027,69 @@ next: | |||
807 | } | 1027 | } |
808 | 1028 | ||
809 | BUG_ON(!upper->checked); | 1029 | BUG_ON(!upper->checked); |
810 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1030 | BUG_ON(cowonly != upper->cowonly); |
811 | &upper->rb_node); | 1031 | if (!cowonly) { |
812 | BUG_ON(rb_node); | 1032 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
1033 | &upper->rb_node); | ||
1034 | BUG_ON(rb_node); | ||
1035 | } | ||
813 | 1036 | ||
814 | list_add_tail(&edge->list[UPPER], &upper->lower); | 1037 | list_add_tail(&edge->list[UPPER], &upper->lower); |
815 | 1038 | ||
816 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | 1039 | list_for_each_entry(edge, &upper->upper, list[LOWER]) |
817 | list_add_tail(&edge->list[UPPER], &list); | 1040 | list_add_tail(&edge->list[UPPER], &list); |
818 | } | 1041 | } |
1042 | /* | ||
1043 | * process useless backref nodes. backref nodes for tree leaves | ||
1044 | * are deleted from the cache. backref nodes for upper level | ||
1045 | * tree blocks are left in the cache to avoid unnecessary backref | ||
1046 | * lookup. | ||
1047 | */ | ||
1048 | while (!list_empty(&useless)) { | ||
1049 | upper = list_entry(useless.next, struct backref_node, list); | ||
1050 | list_del_init(&upper->list); | ||
1051 | BUG_ON(!list_empty(&upper->upper)); | ||
1052 | if (upper == node) | ||
1053 | node = NULL; | ||
1054 | if (upper->lowest) { | ||
1055 | list_del_init(&upper->lower); | ||
1056 | upper->lowest = 0; | ||
1057 | } | ||
1058 | while (!list_empty(&upper->lower)) { | ||
1059 | edge = list_entry(upper->lower.next, | ||
1060 | struct backref_edge, list[UPPER]); | ||
1061 | list_del(&edge->list[UPPER]); | ||
1062 | list_del(&edge->list[LOWER]); | ||
1063 | lower = edge->node[LOWER]; | ||
1064 | free_backref_edge(cache, edge); | ||
1065 | |||
1066 | if (list_empty(&lower->upper)) | ||
1067 | list_add(&lower->list, &useless); | ||
1068 | } | ||
1069 | __mark_block_processed(rc, upper); | ||
1070 | if (upper->level > 0) { | ||
1071 | list_add(&upper->list, &cache->detached); | ||
1072 | upper->detached = 1; | ||
1073 | } else { | ||
1074 | rb_erase(&upper->rb_node, &cache->rb_root); | ||
1075 | free_backref_node(cache, upper); | ||
1076 | } | ||
1077 | } | ||
819 | out: | 1078 | out: |
820 | btrfs_free_path(path1); | 1079 | btrfs_free_path(path1); |
821 | btrfs_free_path(path2); | 1080 | btrfs_free_path(path2); |
822 | if (err) { | 1081 | if (err) { |
823 | INIT_LIST_HEAD(&list); | 1082 | while (!list_empty(&useless)) { |
1083 | lower = list_entry(useless.next, | ||
1084 | struct backref_node, upper); | ||
1085 | list_del_init(&lower->upper); | ||
1086 | } | ||
824 | upper = node; | 1087 | upper = node; |
1088 | INIT_LIST_HEAD(&list); | ||
825 | while (upper) { | 1089 | while (upper) { |
826 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1090 | if (RB_EMPTY_NODE(&upper->rb_node)) { |
827 | list_splice_tail(&upper->upper, &list); | 1091 | list_splice_tail(&upper->upper, &list); |
828 | kfree(upper); | 1092 | free_backref_node(cache, upper); |
829 | } | 1093 | } |
830 | 1094 | ||
831 | if (list_empty(&list)) | 1095 | if (list_empty(&list)) |
@@ -833,15 +1097,104 @@ out: | |||
833 | 1097 | ||
834 | edge = list_entry(list.next, struct backref_edge, | 1098 | edge = list_entry(list.next, struct backref_edge, |
835 | list[LOWER]); | 1099 | list[LOWER]); |
1100 | list_del(&edge->list[LOWER]); | ||
836 | upper = edge->node[UPPER]; | 1101 | upper = edge->node[UPPER]; |
837 | kfree(edge); | 1102 | free_backref_edge(cache, edge); |
838 | } | 1103 | } |
839 | return ERR_PTR(err); | 1104 | return ERR_PTR(err); |
840 | } | 1105 | } |
1106 | BUG_ON(node && node->detached); | ||
841 | return node; | 1107 | return node; |
842 | } | 1108 | } |
843 | 1109 | ||
844 | /* | 1110 | /* |
1111 | * helper to add backref node for the newly created snapshot. | ||
1112 | * the backref node is created by cloning backref node that | ||
1113 | * corresponds to root of source tree | ||
1114 | */ | ||
1115 | static int clone_backref_node(struct btrfs_trans_handle *trans, | ||
1116 | struct reloc_control *rc, | ||
1117 | struct btrfs_root *src, | ||
1118 | struct btrfs_root *dest) | ||
1119 | { | ||
1120 | struct btrfs_root *reloc_root = src->reloc_root; | ||
1121 | struct backref_cache *cache = &rc->backref_cache; | ||
1122 | struct backref_node *node = NULL; | ||
1123 | struct backref_node *new_node; | ||
1124 | struct backref_edge *edge; | ||
1125 | struct backref_edge *new_edge; | ||
1126 | struct rb_node *rb_node; | ||
1127 | |||
1128 | if (cache->last_trans > 0) | ||
1129 | update_backref_cache(trans, cache); | ||
1130 | |||
1131 | rb_node = tree_search(&cache->rb_root, src->commit_root->start); | ||
1132 | if (rb_node) { | ||
1133 | node = rb_entry(rb_node, struct backref_node, rb_node); | ||
1134 | if (node->detached) | ||
1135 | node = NULL; | ||
1136 | else | ||
1137 | BUG_ON(node->new_bytenr != reloc_root->node->start); | ||
1138 | } | ||
1139 | |||
1140 | if (!node) { | ||
1141 | rb_node = tree_search(&cache->rb_root, | ||
1142 | reloc_root->commit_root->start); | ||
1143 | if (rb_node) { | ||
1144 | node = rb_entry(rb_node, struct backref_node, | ||
1145 | rb_node); | ||
1146 | BUG_ON(node->detached); | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | if (!node) | ||
1151 | return 0; | ||
1152 | |||
1153 | new_node = alloc_backref_node(cache); | ||
1154 | if (!new_node) | ||
1155 | return -ENOMEM; | ||
1156 | |||
1157 | new_node->bytenr = dest->node->start; | ||
1158 | new_node->level = node->level; | ||
1159 | new_node->lowest = node->lowest; | ||
1160 | new_node->root = dest; | ||
1161 | |||
1162 | if (!node->lowest) { | ||
1163 | list_for_each_entry(edge, &node->lower, list[UPPER]) { | ||
1164 | new_edge = alloc_backref_edge(cache); | ||
1165 | if (!new_edge) | ||
1166 | goto fail; | ||
1167 | |||
1168 | new_edge->node[UPPER] = new_node; | ||
1169 | new_edge->node[LOWER] = edge->node[LOWER]; | ||
1170 | list_add_tail(&new_edge->list[UPPER], | ||
1171 | &new_node->lower); | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, | ||
1176 | &new_node->rb_node); | ||
1177 | BUG_ON(rb_node); | ||
1178 | |||
1179 | if (!new_node->lowest) { | ||
1180 | list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { | ||
1181 | list_add_tail(&new_edge->list[LOWER], | ||
1182 | &new_edge->node[LOWER]->upper); | ||
1183 | } | ||
1184 | } | ||
1185 | return 0; | ||
1186 | fail: | ||
1187 | while (!list_empty(&new_node->lower)) { | ||
1188 | new_edge = list_entry(new_node->lower.next, | ||
1189 | struct backref_edge, list[UPPER]); | ||
1190 | list_del(&new_edge->list[UPPER]); | ||
1191 | free_backref_edge(cache, new_edge); | ||
1192 | } | ||
1193 | free_backref_node(cache, new_node); | ||
1194 | return -ENOMEM; | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
845 | * helper to add 'address of tree root -> reloc tree' mapping | 1198 | * helper to add 'address of tree root -> reloc tree' mapping |
846 | */ | 1199 | */ |
847 | static int __add_reloc_root(struct btrfs_root *root) | 1200 | static int __add_reloc_root(struct btrfs_root *root) |
@@ -901,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
901 | return 0; | 1254 | return 0; |
902 | } | 1255 | } |
903 | 1256 | ||
904 | /* | 1257 | static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, |
905 | * create reloc tree for a given fs tree. reloc tree is just a | 1258 | struct btrfs_root *root, u64 objectid) |
906 | * snapshot of the fs tree with special root objectid. | ||
907 | */ | ||
908 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
909 | struct btrfs_root *root) | ||
910 | { | 1259 | { |
911 | struct btrfs_root *reloc_root; | 1260 | struct btrfs_root *reloc_root; |
912 | struct extent_buffer *eb; | 1261 | struct extent_buffer *eb; |
@@ -914,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
914 | struct btrfs_key root_key; | 1263 | struct btrfs_key root_key; |
915 | int ret; | 1264 | int ret; |
916 | 1265 | ||
917 | if (root->reloc_root) { | ||
918 | reloc_root = root->reloc_root; | ||
919 | reloc_root->last_trans = trans->transid; | ||
920 | return 0; | ||
921 | } | ||
922 | |||
923 | if (!root->fs_info->reloc_ctl || | ||
924 | !root->fs_info->reloc_ctl->create_reloc_root || | ||
925 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
926 | return 0; | ||
927 | |||
928 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1266 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
929 | BUG_ON(!root_item); | 1267 | BUG_ON(!root_item); |
930 | 1268 | ||
931 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 1269 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
932 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 1270 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
933 | root_key.offset = root->root_key.objectid; | 1271 | root_key.offset = objectid; |
934 | 1272 | ||
935 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, | 1273 | if (root->root_key.objectid == objectid) { |
936 | BTRFS_TREE_RELOC_OBJECTID); | 1274 | /* called by btrfs_init_reloc_root */ |
937 | BUG_ON(ret); | 1275 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, |
1276 | BTRFS_TREE_RELOC_OBJECTID); | ||
1277 | BUG_ON(ret); | ||
1278 | |||
1279 | btrfs_set_root_last_snapshot(&root->root_item, | ||
1280 | trans->transid - 1); | ||
1281 | } else { | ||
1282 | /* | ||
1283 | * called by btrfs_reloc_post_snapshot_hook. | ||
1284 | * the source tree is a reloc tree, all tree blocks | ||
1285 | * modified after it was created have RELOC flag | ||
1286 | * set in their headers. so it's OK to not update | ||
1287 | * the 'last_snapshot'. | ||
1288 | */ | ||
1289 | ret = btrfs_copy_root(trans, root, root->node, &eb, | ||
1290 | BTRFS_TREE_RELOC_OBJECTID); | ||
1291 | BUG_ON(ret); | ||
1292 | } | ||
938 | 1293 | ||
939 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); | ||
940 | memcpy(root_item, &root->root_item, sizeof(*root_item)); | 1294 | memcpy(root_item, &root->root_item, sizeof(*root_item)); |
941 | btrfs_set_root_refs(root_item, 1); | ||
942 | btrfs_set_root_bytenr(root_item, eb->start); | 1295 | btrfs_set_root_bytenr(root_item, eb->start); |
943 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 1296 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
944 | btrfs_set_root_generation(root_item, trans->transid); | 1297 | btrfs_set_root_generation(root_item, trans->transid); |
945 | memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); | 1298 | |
946 | root_item->drop_level = 0; | 1299 | if (root->root_key.objectid == objectid) { |
1300 | btrfs_set_root_refs(root_item, 0); | ||
1301 | memset(&root_item->drop_progress, 0, | ||
1302 | sizeof(struct btrfs_disk_key)); | ||
1303 | root_item->drop_level = 0; | ||
1304 | } | ||
947 | 1305 | ||
948 | btrfs_tree_unlock(eb); | 1306 | btrfs_tree_unlock(eb); |
949 | free_extent_buffer(eb); | 1307 | free_extent_buffer(eb); |
@@ -957,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
957 | &root_key); | 1315 | &root_key); |
958 | BUG_ON(IS_ERR(reloc_root)); | 1316 | BUG_ON(IS_ERR(reloc_root)); |
959 | reloc_root->last_trans = trans->transid; | 1317 | reloc_root->last_trans = trans->transid; |
1318 | return reloc_root; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * create reloc tree for a given fs tree. reloc tree is just a | ||
1323 | * snapshot of the fs tree with special root objectid. | ||
1324 | */ | ||
1325 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
1326 | struct btrfs_root *root) | ||
1327 | { | ||
1328 | struct btrfs_root *reloc_root; | ||
1329 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
1330 | int clear_rsv = 0; | ||
1331 | |||
1332 | if (root->reloc_root) { | ||
1333 | reloc_root = root->reloc_root; | ||
1334 | reloc_root->last_trans = trans->transid; | ||
1335 | return 0; | ||
1336 | } | ||
1337 | |||
1338 | if (!rc || !rc->create_reloc_tree || | ||
1339 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
1340 | return 0; | ||
1341 | |||
1342 | if (!trans->block_rsv) { | ||
1343 | trans->block_rsv = rc->block_rsv; | ||
1344 | clear_rsv = 1; | ||
1345 | } | ||
1346 | reloc_root = create_reloc_root(trans, root, root->root_key.objectid); | ||
1347 | if (clear_rsv) | ||
1348 | trans->block_rsv = NULL; | ||
960 | 1349 | ||
961 | __add_reloc_root(reloc_root); | 1350 | __add_reloc_root(reloc_root); |
962 | root->reloc_root = reloc_root; | 1351 | root->reloc_root = reloc_root; |
@@ -980,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
980 | reloc_root = root->reloc_root; | 1369 | reloc_root = root->reloc_root; |
981 | root_item = &reloc_root->root_item; | 1370 | root_item = &reloc_root->root_item; |
982 | 1371 | ||
983 | if (btrfs_root_refs(root_item) == 0) { | 1372 | if (root->fs_info->reloc_ctl->merge_reloc_tree && |
1373 | btrfs_root_refs(root_item) == 0) { | ||
984 | root->reloc_root = NULL; | 1374 | root->reloc_root = NULL; |
985 | del = 1; | 1375 | del = 1; |
986 | } | 1376 | } |
@@ -1102,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | |||
1102 | goto out; | 1492 | goto out; |
1103 | } | 1493 | } |
1104 | 1494 | ||
1105 | if (new_bytenr) | 1495 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
1106 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
1107 | ret = 0; | 1496 | ret = 0; |
1108 | out: | 1497 | out: |
1109 | btrfs_free_path(path); | 1498 | btrfs_free_path(path); |
@@ -1114,19 +1503,18 @@ out: | |||
1114 | * update file extent items in the tree leaf to point to | 1503 | * update file extent items in the tree leaf to point to |
1115 | * the new locations. | 1504 | * the new locations. |
1116 | */ | 1505 | */ |
1117 | static int replace_file_extents(struct btrfs_trans_handle *trans, | 1506 | static noinline_for_stack |
1118 | struct reloc_control *rc, | 1507 | int replace_file_extents(struct btrfs_trans_handle *trans, |
1119 | struct btrfs_root *root, | 1508 | struct reloc_control *rc, |
1120 | struct extent_buffer *leaf, | 1509 | struct btrfs_root *root, |
1121 | struct list_head *inode_list) | 1510 | struct extent_buffer *leaf) |
1122 | { | 1511 | { |
1123 | struct btrfs_key key; | 1512 | struct btrfs_key key; |
1124 | struct btrfs_file_extent_item *fi; | 1513 | struct btrfs_file_extent_item *fi; |
1125 | struct inode *inode = NULL; | 1514 | struct inode *inode = NULL; |
1126 | struct inodevec *ivec = NULL; | ||
1127 | u64 parent; | 1515 | u64 parent; |
1128 | u64 bytenr; | 1516 | u64 bytenr; |
1129 | u64 new_bytenr; | 1517 | u64 new_bytenr = 0; |
1130 | u64 num_bytes; | 1518 | u64 num_bytes; |
1131 | u64 end; | 1519 | u64 end; |
1132 | u32 nritems; | 1520 | u32 nritems; |
@@ -1166,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1166 | * to complete and drop the extent cache | 1554 | * to complete and drop the extent cache |
1167 | */ | 1555 | */ |
1168 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 1556 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
1169 | if (!ivec || ivec->nr == INODEVEC_SIZE) { | ||
1170 | ivec = kmalloc(sizeof(*ivec), GFP_NOFS); | ||
1171 | BUG_ON(!ivec); | ||
1172 | ivec->nr = 0; | ||
1173 | list_add_tail(&ivec->list, inode_list); | ||
1174 | } | ||
1175 | if (first) { | 1557 | if (first) { |
1176 | inode = find_next_inode(root, key.objectid); | 1558 | inode = find_next_inode(root, key.objectid); |
1177 | if (inode) | ||
1178 | ivec->inode[ivec->nr++] = inode; | ||
1179 | first = 0; | 1559 | first = 0; |
1180 | } else if (inode && inode->i_ino < key.objectid) { | 1560 | } else if (inode && inode->i_ino < key.objectid) { |
1561 | btrfs_add_delayed_iput(inode); | ||
1181 | inode = find_next_inode(root, key.objectid); | 1562 | inode = find_next_inode(root, key.objectid); |
1182 | if (inode) | ||
1183 | ivec->inode[ivec->nr++] = inode; | ||
1184 | } | 1563 | } |
1185 | if (inode && inode->i_ino == key.objectid) { | 1564 | if (inode && inode->i_ino == key.objectid) { |
1186 | end = key.offset + | 1565 | end = key.offset + |
@@ -1204,8 +1583,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1204 | 1583 | ||
1205 | ret = get_new_location(rc->data_inode, &new_bytenr, | 1584 | ret = get_new_location(rc->data_inode, &new_bytenr, |
1206 | bytenr, num_bytes); | 1585 | bytenr, num_bytes); |
1207 | if (ret > 0) | 1586 | if (ret > 0) { |
1587 | WARN_ON(1); | ||
1208 | continue; | 1588 | continue; |
1589 | } | ||
1209 | BUG_ON(ret < 0); | 1590 | BUG_ON(ret < 0); |
1210 | 1591 | ||
1211 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); | 1592 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); |
@@ -1225,6 +1606,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1225 | } | 1606 | } |
1226 | if (dirty) | 1607 | if (dirty) |
1227 | btrfs_mark_buffer_dirty(leaf); | 1608 | btrfs_mark_buffer_dirty(leaf); |
1609 | if (inode) | ||
1610 | btrfs_add_delayed_iput(inode); | ||
1228 | return 0; | 1611 | return 0; |
1229 | } | 1612 | } |
1230 | 1613 | ||
@@ -1248,11 +1631,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, | |||
1248 | * if no block got replaced, 0 is returned. if there are other | 1631 | * if no block got replaced, 0 is returned. if there are other |
1249 | * errors, a negative error number is returned. | 1632 | * errors, a negative error number is returned. |
1250 | */ | 1633 | */ |
1251 | static int replace_path(struct btrfs_trans_handle *trans, | 1634 | static noinline_for_stack |
1252 | struct btrfs_root *dest, struct btrfs_root *src, | 1635 | int replace_path(struct btrfs_trans_handle *trans, |
1253 | struct btrfs_path *path, struct btrfs_key *next_key, | 1636 | struct btrfs_root *dest, struct btrfs_root *src, |
1254 | struct extent_buffer **leaf, | 1637 | struct btrfs_path *path, struct btrfs_key *next_key, |
1255 | int lowest_level, int max_level) | 1638 | int lowest_level, int max_level) |
1256 | { | 1639 | { |
1257 | struct extent_buffer *eb; | 1640 | struct extent_buffer *eb; |
1258 | struct extent_buffer *parent; | 1641 | struct extent_buffer *parent; |
@@ -1263,16 +1646,16 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1263 | u64 new_ptr_gen; | 1646 | u64 new_ptr_gen; |
1264 | u64 last_snapshot; | 1647 | u64 last_snapshot; |
1265 | u32 blocksize; | 1648 | u32 blocksize; |
1649 | int cow = 0; | ||
1266 | int level; | 1650 | int level; |
1267 | int ret; | 1651 | int ret; |
1268 | int slot; | 1652 | int slot; |
1269 | 1653 | ||
1270 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 1654 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
1271 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); | 1655 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); |
1272 | BUG_ON(lowest_level > 1 && leaf); | ||
1273 | 1656 | ||
1274 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); | 1657 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); |
1275 | 1658 | again: | |
1276 | slot = path->slots[lowest_level]; | 1659 | slot = path->slots[lowest_level]; |
1277 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); | 1660 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); |
1278 | 1661 | ||
@@ -1286,8 +1669,10 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1286 | return 0; | 1669 | return 0; |
1287 | } | 1670 | } |
1288 | 1671 | ||
1289 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); | 1672 | if (cow) { |
1290 | BUG_ON(ret); | 1673 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); |
1674 | BUG_ON(ret); | ||
1675 | } | ||
1291 | btrfs_set_lock_blocking(eb); | 1676 | btrfs_set_lock_blocking(eb); |
1292 | 1677 | ||
1293 | if (next_key) { | 1678 | if (next_key) { |
@@ -1331,7 +1716,7 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1331 | 1716 | ||
1332 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || | 1717 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || |
1333 | memcmp_node_keys(parent, slot, path, level)) { | 1718 | memcmp_node_keys(parent, slot, path, level)) { |
1334 | if (level <= lowest_level && !leaf) { | 1719 | if (level <= lowest_level) { |
1335 | ret = 0; | 1720 | ret = 0; |
1336 | break; | 1721 | break; |
1337 | } | 1722 | } |
@@ -1339,16 +1724,12 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1339 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1724 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1340 | old_ptr_gen); | 1725 | old_ptr_gen); |
1341 | btrfs_tree_lock(eb); | 1726 | btrfs_tree_lock(eb); |
1342 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1727 | if (cow) { |
1343 | slot, &eb); | 1728 | ret = btrfs_cow_block(trans, dest, eb, parent, |
1344 | BUG_ON(ret); | 1729 | slot, &eb); |
1345 | btrfs_set_lock_blocking(eb); | 1730 | BUG_ON(ret); |
1346 | |||
1347 | if (level <= lowest_level) { | ||
1348 | *leaf = eb; | ||
1349 | ret = 0; | ||
1350 | break; | ||
1351 | } | 1731 | } |
1732 | btrfs_set_lock_blocking(eb); | ||
1352 | 1733 | ||
1353 | btrfs_tree_unlock(parent); | 1734 | btrfs_tree_unlock(parent); |
1354 | free_extent_buffer(parent); | 1735 | free_extent_buffer(parent); |
@@ -1357,6 +1738,13 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1357 | continue; | 1738 | continue; |
1358 | } | 1739 | } |
1359 | 1740 | ||
1741 | if (!cow) { | ||
1742 | btrfs_tree_unlock(parent); | ||
1743 | free_extent_buffer(parent); | ||
1744 | cow = 1; | ||
1745 | goto again; | ||
1746 | } | ||
1747 | |||
1360 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 1748 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1361 | path->slots[level]); | 1749 | path->slots[level]); |
1362 | btrfs_release_path(src, path); | 1750 | btrfs_release_path(src, path); |
@@ -1562,20 +1950,6 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
1562 | return 0; | 1950 | return 0; |
1563 | } | 1951 | } |
1564 | 1952 | ||
1565 | static void put_inodes(struct list_head *list) | ||
1566 | { | ||
1567 | struct inodevec *ivec; | ||
1568 | while (!list_empty(list)) { | ||
1569 | ivec = list_entry(list->next, struct inodevec, list); | ||
1570 | list_del(&ivec->list); | ||
1571 | while (ivec->nr > 0) { | ||
1572 | ivec->nr--; | ||
1573 | iput(ivec->inode[ivec->nr]); | ||
1574 | } | ||
1575 | kfree(ivec); | ||
1576 | } | ||
1577 | } | ||
1578 | |||
1579 | static int find_next_key(struct btrfs_path *path, int level, | 1953 | static int find_next_key(struct btrfs_path *path, int level, |
1580 | struct btrfs_key *key) | 1954 | struct btrfs_key *key) |
1581 | 1955 | ||
@@ -1608,13 +1982,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1608 | struct btrfs_root *reloc_root; | 1982 | struct btrfs_root *reloc_root; |
1609 | struct btrfs_root_item *root_item; | 1983 | struct btrfs_root_item *root_item; |
1610 | struct btrfs_path *path; | 1984 | struct btrfs_path *path; |
1611 | struct extent_buffer *leaf = NULL; | 1985 | struct extent_buffer *leaf; |
1612 | unsigned long nr; | 1986 | unsigned long nr; |
1613 | int level; | 1987 | int level; |
1614 | int max_level; | 1988 | int max_level; |
1615 | int replaced = 0; | 1989 | int replaced = 0; |
1616 | int ret; | 1990 | int ret; |
1617 | int err = 0; | 1991 | int err = 0; |
1992 | u32 min_reserved; | ||
1618 | 1993 | ||
1619 | path = btrfs_alloc_path(); | 1994 | path = btrfs_alloc_path(); |
1620 | if (!path) | 1995 | if (!path) |
@@ -1648,34 +2023,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1648 | btrfs_unlock_up_safe(path, 0); | 2023 | btrfs_unlock_up_safe(path, 0); |
1649 | } | 2024 | } |
1650 | 2025 | ||
1651 | if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { | 2026 | min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
1652 | trans = btrfs_start_transaction(root, 1); | 2027 | memset(&next_key, 0, sizeof(next_key)); |
1653 | 2028 | ||
1654 | leaf = path->nodes[0]; | 2029 | while (1) { |
1655 | btrfs_item_key_to_cpu(leaf, &key, 0); | 2030 | trans = btrfs_start_transaction(root, 0); |
1656 | btrfs_release_path(reloc_root, path); | 2031 | trans->block_rsv = rc->block_rsv; |
1657 | 2032 | ||
1658 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 2033 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
1659 | if (ret < 0) { | 2034 | min_reserved, 0); |
1660 | err = ret; | 2035 | if (ret) { |
1661 | goto out; | 2036 | BUG_ON(ret != -EAGAIN); |
2037 | ret = btrfs_commit_transaction(trans, root); | ||
2038 | BUG_ON(ret); | ||
2039 | continue; | ||
1662 | } | 2040 | } |
1663 | 2041 | ||
1664 | leaf = path->nodes[0]; | ||
1665 | btrfs_unlock_up_safe(path, 1); | ||
1666 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1667 | &inode_list); | ||
1668 | if (ret < 0) | ||
1669 | err = ret; | ||
1670 | goto out; | ||
1671 | } | ||
1672 | |||
1673 | memset(&next_key, 0, sizeof(next_key)); | ||
1674 | |||
1675 | while (1) { | ||
1676 | leaf = NULL; | ||
1677 | replaced = 0; | 2042 | replaced = 0; |
1678 | trans = btrfs_start_transaction(root, 1); | ||
1679 | max_level = level; | 2043 | max_level = level; |
1680 | 2044 | ||
1681 | ret = walk_down_reloc_tree(reloc_root, path, &level); | 2045 | ret = walk_down_reloc_tree(reloc_root, path, &level); |
@@ -1689,14 +2053,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1689 | if (!find_next_key(path, level, &key) && | 2053 | if (!find_next_key(path, level, &key) && |
1690 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { | 2054 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { |
1691 | ret = 0; | 2055 | ret = 0; |
1692 | } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { | ||
1693 | ret = replace_path(trans, root, reloc_root, | ||
1694 | path, &next_key, &leaf, | ||
1695 | level, max_level); | ||
1696 | } else { | 2056 | } else { |
1697 | ret = replace_path(trans, root, reloc_root, | 2057 | ret = replace_path(trans, root, reloc_root, path, |
1698 | path, &next_key, NULL, | 2058 | &next_key, level, max_level); |
1699 | level, max_level); | ||
1700 | } | 2059 | } |
1701 | if (ret < 0) { | 2060 | if (ret < 0) { |
1702 | err = ret; | 2061 | err = ret; |
@@ -1708,16 +2067,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1708 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 2067 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1709 | path->slots[level]); | 2068 | path->slots[level]); |
1710 | replaced = 1; | 2069 | replaced = 1; |
1711 | } else if (leaf) { | ||
1712 | /* | ||
1713 | * no block got replaced, try replacing file extents | ||
1714 | */ | ||
1715 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
1716 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1717 | &inode_list); | ||
1718 | btrfs_tree_unlock(leaf); | ||
1719 | free_extent_buffer(leaf); | ||
1720 | BUG_ON(ret < 0); | ||
1721 | } | 2070 | } |
1722 | 2071 | ||
1723 | ret = walk_up_reloc_tree(reloc_root, path, &level); | 2072 | ret = walk_up_reloc_tree(reloc_root, path, &level); |
@@ -1734,15 +2083,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1734 | root_item->drop_level = level; | 2083 | root_item->drop_level = level; |
1735 | 2084 | ||
1736 | nr = trans->blocks_used; | 2085 | nr = trans->blocks_used; |
1737 | btrfs_end_transaction(trans, root); | 2086 | btrfs_end_transaction_throttle(trans, root); |
1738 | 2087 | ||
1739 | btrfs_btree_balance_dirty(root, nr); | 2088 | btrfs_btree_balance_dirty(root, nr); |
1740 | 2089 | ||
1741 | /* | ||
1742 | * put inodes outside transaction, otherwise we may deadlock. | ||
1743 | */ | ||
1744 | put_inodes(&inode_list); | ||
1745 | |||
1746 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2090 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1747 | invalidate_extent_cache(root, &key, &next_key); | 2091 | invalidate_extent_cache(root, &key, &next_key); |
1748 | } | 2092 | } |
@@ -1765,87 +2109,125 @@ out: | |||
1765 | sizeof(root_item->drop_progress)); | 2109 | sizeof(root_item->drop_progress)); |
1766 | root_item->drop_level = 0; | 2110 | root_item->drop_level = 0; |
1767 | btrfs_set_root_refs(root_item, 0); | 2111 | btrfs_set_root_refs(root_item, 0); |
2112 | btrfs_update_reloc_root(trans, root); | ||
1768 | } | 2113 | } |
1769 | 2114 | ||
1770 | nr = trans->blocks_used; | 2115 | nr = trans->blocks_used; |
1771 | btrfs_end_transaction(trans, root); | 2116 | btrfs_end_transaction_throttle(trans, root); |
1772 | 2117 | ||
1773 | btrfs_btree_balance_dirty(root, nr); | 2118 | btrfs_btree_balance_dirty(root, nr); |
1774 | 2119 | ||
1775 | put_inodes(&inode_list); | ||
1776 | |||
1777 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2120 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1778 | invalidate_extent_cache(root, &key, &next_key); | 2121 | invalidate_extent_cache(root, &key, &next_key); |
1779 | 2122 | ||
1780 | return err; | 2123 | return err; |
1781 | } | 2124 | } |
1782 | 2125 | ||
1783 | /* | 2126 | static noinline_for_stack |
1784 | * callback for the work threads. | 2127 | int prepare_to_merge(struct reloc_control *rc, int err) |
1785 | * this function merges reloc tree with corresponding fs tree, | ||
1786 | * and then drops the reloc tree. | ||
1787 | */ | ||
1788 | static void merge_func(struct btrfs_work *work) | ||
1789 | { | 2128 | { |
1790 | struct btrfs_trans_handle *trans; | 2129 | struct btrfs_root *root = rc->extent_root; |
1791 | struct btrfs_root *root; | ||
1792 | struct btrfs_root *reloc_root; | 2130 | struct btrfs_root *reloc_root; |
1793 | struct async_merge *async; | 2131 | struct btrfs_trans_handle *trans; |
2132 | LIST_HEAD(reloc_roots); | ||
2133 | u64 num_bytes = 0; | ||
2134 | int ret; | ||
2135 | int retries = 0; | ||
2136 | |||
2137 | mutex_lock(&root->fs_info->trans_mutex); | ||
2138 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | ||
2139 | rc->merging_rsv_size += rc->nodes_relocated * 2; | ||
2140 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2141 | again: | ||
2142 | if (!err) { | ||
2143 | num_bytes = rc->merging_rsv_size; | ||
2144 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | ||
2145 | num_bytes, &retries); | ||
2146 | if (ret) | ||
2147 | err = ret; | ||
2148 | } | ||
2149 | |||
2150 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
2151 | |||
2152 | if (!err) { | ||
2153 | if (num_bytes != rc->merging_rsv_size) { | ||
2154 | btrfs_end_transaction(trans, rc->extent_root); | ||
2155 | btrfs_block_rsv_release(rc->extent_root, | ||
2156 | rc->block_rsv, num_bytes); | ||
2157 | retries = 0; | ||
2158 | goto again; | ||
2159 | } | ||
2160 | } | ||
1794 | 2161 | ||
1795 | async = container_of(work, struct async_merge, work); | 2162 | rc->merge_reloc_tree = 1; |
1796 | reloc_root = async->root; | 2163 | |
2164 | while (!list_empty(&rc->reloc_roots)) { | ||
2165 | reloc_root = list_entry(rc->reloc_roots.next, | ||
2166 | struct btrfs_root, root_list); | ||
2167 | list_del_init(&reloc_root->root_list); | ||
1797 | 2168 | ||
1798 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
1799 | root = read_fs_root(reloc_root->fs_info, | 2169 | root = read_fs_root(reloc_root->fs_info, |
1800 | reloc_root->root_key.offset); | 2170 | reloc_root->root_key.offset); |
1801 | BUG_ON(IS_ERR(root)); | 2171 | BUG_ON(IS_ERR(root)); |
1802 | BUG_ON(root->reloc_root != reloc_root); | 2172 | BUG_ON(root->reloc_root != reloc_root); |
1803 | 2173 | ||
1804 | merge_reloc_root(async->rc, root); | 2174 | /* |
1805 | 2175 | * set reference count to 1, so btrfs_recover_relocation | |
1806 | trans = btrfs_start_transaction(root, 1); | 2176 | * knows it should resumes merging |
2177 | */ | ||
2178 | if (!err) | ||
2179 | btrfs_set_root_refs(&reloc_root->root_item, 1); | ||
1807 | btrfs_update_reloc_root(trans, root); | 2180 | btrfs_update_reloc_root(trans, root); |
1808 | btrfs_end_transaction(trans, root); | ||
1809 | } | ||
1810 | 2181 | ||
1811 | btrfs_drop_snapshot(reloc_root, 0); | 2182 | list_add(&reloc_root->root_list, &reloc_roots); |
2183 | } | ||
1812 | 2184 | ||
1813 | if (atomic_dec_and_test(async->num_pending)) | 2185 | list_splice(&reloc_roots, &rc->reloc_roots); |
1814 | complete(async->done); | ||
1815 | 2186 | ||
1816 | kfree(async); | 2187 | if (!err) |
2188 | btrfs_commit_transaction(trans, rc->extent_root); | ||
2189 | else | ||
2190 | btrfs_end_transaction(trans, rc->extent_root); | ||
2191 | return err; | ||
1817 | } | 2192 | } |
1818 | 2193 | ||
1819 | static int merge_reloc_roots(struct reloc_control *rc) | 2194 | static noinline_for_stack |
2195 | int merge_reloc_roots(struct reloc_control *rc) | ||
1820 | { | 2196 | { |
1821 | struct async_merge *async; | ||
1822 | struct btrfs_root *root; | 2197 | struct btrfs_root *root; |
1823 | struct completion done; | 2198 | struct btrfs_root *reloc_root; |
1824 | atomic_t num_pending; | 2199 | LIST_HEAD(reloc_roots); |
2200 | int found = 0; | ||
2201 | int ret; | ||
2202 | again: | ||
2203 | root = rc->extent_root; | ||
2204 | mutex_lock(&root->fs_info->trans_mutex); | ||
2205 | list_splice_init(&rc->reloc_roots, &reloc_roots); | ||
2206 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1825 | 2207 | ||
1826 | init_completion(&done); | 2208 | while (!list_empty(&reloc_roots)) { |
1827 | atomic_set(&num_pending, 1); | 2209 | found = 1; |
2210 | reloc_root = list_entry(reloc_roots.next, | ||
2211 | struct btrfs_root, root_list); | ||
1828 | 2212 | ||
1829 | while (!list_empty(&rc->reloc_roots)) { | 2213 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { |
1830 | root = list_entry(rc->reloc_roots.next, | 2214 | root = read_fs_root(reloc_root->fs_info, |
1831 | struct btrfs_root, root_list); | 2215 | reloc_root->root_key.offset); |
1832 | list_del_init(&root->root_list); | 2216 | BUG_ON(IS_ERR(root)); |
2217 | BUG_ON(root->reloc_root != reloc_root); | ||
1833 | 2218 | ||
1834 | async = kmalloc(sizeof(*async), GFP_NOFS); | 2219 | ret = merge_reloc_root(rc, root); |
1835 | BUG_ON(!async); | 2220 | BUG_ON(ret); |
1836 | async->work.func = merge_func; | 2221 | } else { |
1837 | async->work.flags = 0; | 2222 | list_del_init(&reloc_root->root_list); |
1838 | async->rc = rc; | 2223 | } |
1839 | async->root = root; | 2224 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); |
1840 | async->done = &done; | ||
1841 | async->num_pending = &num_pending; | ||
1842 | atomic_inc(&num_pending); | ||
1843 | btrfs_queue_worker(&rc->workers, &async->work); | ||
1844 | } | 2225 | } |
1845 | 2226 | ||
1846 | if (!atomic_dec_and_test(&num_pending)) | 2227 | if (found) { |
1847 | wait_for_completion(&done); | 2228 | found = 0; |
1848 | 2229 | goto again; | |
2230 | } | ||
1849 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2231 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
1850 | return 0; | 2232 | return 0; |
1851 | } | 2233 | } |
@@ -1876,119 +2258,169 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, | |||
1876 | return btrfs_record_root_in_trans(trans, root); | 2258 | return btrfs_record_root_in_trans(trans, root); |
1877 | } | 2259 | } |
1878 | 2260 | ||
1879 | /* | 2261 | static noinline_for_stack |
1880 | * select one tree from trees that references the block. | 2262 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, |
1881 | * for blocks in refernce counted trees, we preper reloc tree. | 2263 | struct reloc_control *rc, |
1882 | * if no reloc tree found and reloc_only is true, NULL is returned. | 2264 | struct backref_node *node, |
1883 | */ | 2265 | struct backref_edge *edges[], int *nr) |
1884 | static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, | ||
1885 | struct backref_node *node, | ||
1886 | struct backref_edge *edges[], | ||
1887 | int *nr, int reloc_only) | ||
1888 | { | 2266 | { |
1889 | struct backref_node *next; | 2267 | struct backref_node *next; |
1890 | struct btrfs_root *root; | 2268 | struct btrfs_root *root; |
1891 | int index; | 2269 | int index = 0; |
1892 | int loop = 0; | 2270 | |
1893 | again: | ||
1894 | index = 0; | ||
1895 | next = node; | 2271 | next = node; |
1896 | while (1) { | 2272 | while (1) { |
1897 | cond_resched(); | 2273 | cond_resched(); |
1898 | next = walk_up_backref(next, edges, &index); | 2274 | next = walk_up_backref(next, edges, &index); |
1899 | root = next->root; | 2275 | root = next->root; |
1900 | if (!root) { | 2276 | BUG_ON(!root); |
1901 | BUG_ON(!node->old_root); | 2277 | BUG_ON(!root->ref_cows); |
1902 | goto skip; | ||
1903 | } | ||
1904 | |||
1905 | /* no other choice for non-refernce counted tree */ | ||
1906 | if (!root->ref_cows) { | ||
1907 | BUG_ON(reloc_only); | ||
1908 | break; | ||
1909 | } | ||
1910 | 2278 | ||
1911 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2279 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
1912 | record_reloc_root_in_trans(trans, root); | 2280 | record_reloc_root_in_trans(trans, root); |
1913 | break; | 2281 | break; |
1914 | } | 2282 | } |
1915 | 2283 | ||
1916 | if (loop) { | 2284 | btrfs_record_root_in_trans(trans, root); |
1917 | btrfs_record_root_in_trans(trans, root); | 2285 | root = root->reloc_root; |
2286 | |||
2287 | if (next->new_bytenr != root->node->start) { | ||
2288 | BUG_ON(next->new_bytenr); | ||
2289 | BUG_ON(!list_empty(&next->list)); | ||
2290 | next->new_bytenr = root->node->start; | ||
2291 | next->root = root; | ||
2292 | list_add_tail(&next->list, | ||
2293 | &rc->backref_cache.changed); | ||
2294 | __mark_block_processed(rc, next); | ||
1918 | break; | 2295 | break; |
1919 | } | 2296 | } |
1920 | 2297 | ||
1921 | if (reloc_only || next != node) { | 2298 | WARN_ON(1); |
1922 | if (!root->reloc_root) | ||
1923 | btrfs_record_root_in_trans(trans, root); | ||
1924 | root = root->reloc_root; | ||
1925 | /* | ||
1926 | * if the reloc tree was created in current | ||
1927 | * transation, there is no node in backref tree | ||
1928 | * corresponds to the root of the reloc tree. | ||
1929 | */ | ||
1930 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
1931 | trans->transid - 1) | ||
1932 | break; | ||
1933 | } | ||
1934 | skip: | ||
1935 | root = NULL; | 2299 | root = NULL; |
1936 | next = walk_down_backref(edges, &index); | 2300 | next = walk_down_backref(edges, &index); |
1937 | if (!next || next->level <= node->level) | 2301 | if (!next || next->level <= node->level) |
1938 | break; | 2302 | break; |
1939 | } | 2303 | } |
2304 | if (!root) | ||
2305 | return NULL; | ||
1940 | 2306 | ||
1941 | if (!root && !loop && !reloc_only) { | 2307 | *nr = index; |
1942 | loop = 1; | 2308 | next = node; |
1943 | goto again; | 2309 | /* setup backref node path for btrfs_reloc_cow_block */ |
2310 | while (1) { | ||
2311 | rc->backref_cache.path[next->level] = next; | ||
2312 | if (--index < 0) | ||
2313 | break; | ||
2314 | next = edges[index]->node[UPPER]; | ||
1944 | } | 2315 | } |
1945 | |||
1946 | if (root) | ||
1947 | *nr = index; | ||
1948 | else | ||
1949 | *nr = 0; | ||
1950 | |||
1951 | return root; | 2316 | return root; |
1952 | } | 2317 | } |
1953 | 2318 | ||
2319 | /* | ||
2320 | * select a tree root for relocation. return NULL if the block | ||
2321 | * is reference counted. we should use do_relocation() in this | ||
2322 | * case. return a tree root pointer if the block isn't reference | ||
2323 | * counted. return -ENOENT if the block is root of reloc tree. | ||
2324 | */ | ||
1954 | static noinline_for_stack | 2325 | static noinline_for_stack |
1955 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | 2326 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, |
1956 | struct backref_node *node) | 2327 | struct backref_node *node) |
1957 | { | 2328 | { |
2329 | struct backref_node *next; | ||
2330 | struct btrfs_root *root; | ||
2331 | struct btrfs_root *fs_root = NULL; | ||
1958 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | 2332 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; |
1959 | int nr; | 2333 | int index = 0; |
1960 | return __select_one_root(trans, node, edges, &nr, 0); | 2334 | |
2335 | next = node; | ||
2336 | while (1) { | ||
2337 | cond_resched(); | ||
2338 | next = walk_up_backref(next, edges, &index); | ||
2339 | root = next->root; | ||
2340 | BUG_ON(!root); | ||
2341 | |||
2342 | /* no other choice for non-refernce counted tree */ | ||
2343 | if (!root->ref_cows) | ||
2344 | return root; | ||
2345 | |||
2346 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | ||
2347 | fs_root = root; | ||
2348 | |||
2349 | if (next != node) | ||
2350 | return NULL; | ||
2351 | |||
2352 | next = walk_down_backref(edges, &index); | ||
2353 | if (!next || next->level <= node->level) | ||
2354 | break; | ||
2355 | } | ||
2356 | |||
2357 | if (!fs_root) | ||
2358 | return ERR_PTR(-ENOENT); | ||
2359 | return fs_root; | ||
1961 | } | 2360 | } |
1962 | 2361 | ||
1963 | static noinline_for_stack | 2362 | static noinline_for_stack |
1964 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | 2363 | u64 calcu_metadata_size(struct reloc_control *rc, |
1965 | struct backref_node *node, | 2364 | struct backref_node *node, int reserve) |
1966 | struct backref_edge *edges[], int *nr) | ||
1967 | { | 2365 | { |
1968 | return __select_one_root(trans, node, edges, nr, 1); | 2366 | struct backref_node *next = node; |
2367 | struct backref_edge *edge; | ||
2368 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
2369 | u64 num_bytes = 0; | ||
2370 | int index = 0; | ||
2371 | |||
2372 | BUG_ON(reserve && node->processed); | ||
2373 | |||
2374 | while (next) { | ||
2375 | cond_resched(); | ||
2376 | while (1) { | ||
2377 | if (next->processed && (reserve || next != node)) | ||
2378 | break; | ||
2379 | |||
2380 | num_bytes += btrfs_level_size(rc->extent_root, | ||
2381 | next->level); | ||
2382 | |||
2383 | if (list_empty(&next->upper)) | ||
2384 | break; | ||
2385 | |||
2386 | edge = list_entry(next->upper.next, | ||
2387 | struct backref_edge, list[LOWER]); | ||
2388 | edges[index++] = edge; | ||
2389 | next = edge->node[UPPER]; | ||
2390 | } | ||
2391 | next = walk_down_backref(edges, &index); | ||
2392 | } | ||
2393 | return num_bytes; | ||
1969 | } | 2394 | } |
1970 | 2395 | ||
1971 | static void grab_path_buffers(struct btrfs_path *path, | 2396 | static int reserve_metadata_space(struct btrfs_trans_handle *trans, |
1972 | struct backref_node *node, | 2397 | struct reloc_control *rc, |
1973 | struct backref_edge *edges[], int nr) | 2398 | struct backref_node *node) |
1974 | { | 2399 | { |
1975 | int i = 0; | 2400 | struct btrfs_root *root = rc->extent_root; |
1976 | while (1) { | 2401 | u64 num_bytes; |
1977 | drop_node_buffer(node); | 2402 | int ret; |
1978 | node->eb = path->nodes[node->level]; | 2403 | |
1979 | BUG_ON(!node->eb); | 2404 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
1980 | if (path->locks[node->level]) | ||
1981 | node->locked = 1; | ||
1982 | path->nodes[node->level] = NULL; | ||
1983 | path->locks[node->level] = 0; | ||
1984 | |||
1985 | if (i >= nr) | ||
1986 | break; | ||
1987 | 2405 | ||
1988 | edges[i]->blockptr = node->eb->start; | 2406 | trans->block_rsv = rc->block_rsv; |
1989 | node = edges[i]->node[UPPER]; | 2407 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, |
1990 | i++; | 2408 | &rc->block_rsv_retries); |
2409 | if (ret) { | ||
2410 | if (ret == -EAGAIN) | ||
2411 | rc->commit_transaction = 1; | ||
2412 | return ret; | ||
1991 | } | 2413 | } |
2414 | |||
2415 | rc->block_rsv_retries = 0; | ||
2416 | return 0; | ||
2417 | } | ||
2418 | |||
2419 | static void release_metadata_space(struct reloc_control *rc, | ||
2420 | struct backref_node *node) | ||
2421 | { | ||
2422 | u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; | ||
2423 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); | ||
1992 | } | 2424 | } |
1993 | 2425 | ||
1994 | /* | 2426 | /* |
@@ -1999,6 +2431,7 @@ static void grab_path_buffers(struct btrfs_path *path, | |||
1999 | * in that case this function just updates pointers. | 2431 | * in that case this function just updates pointers. |
2000 | */ | 2432 | */ |
2001 | static int do_relocation(struct btrfs_trans_handle *trans, | 2433 | static int do_relocation(struct btrfs_trans_handle *trans, |
2434 | struct reloc_control *rc, | ||
2002 | struct backref_node *node, | 2435 | struct backref_node *node, |
2003 | struct btrfs_key *key, | 2436 | struct btrfs_key *key, |
2004 | struct btrfs_path *path, int lowest) | 2437 | struct btrfs_path *path, int lowest) |
@@ -2019,18 +2452,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2019 | BUG_ON(lowest && node->eb); | 2452 | BUG_ON(lowest && node->eb); |
2020 | 2453 | ||
2021 | path->lowest_level = node->level + 1; | 2454 | path->lowest_level = node->level + 1; |
2455 | rc->backref_cache.path[node->level] = node; | ||
2022 | list_for_each_entry(edge, &node->upper, list[LOWER]) { | 2456 | list_for_each_entry(edge, &node->upper, list[LOWER]) { |
2023 | cond_resched(); | 2457 | cond_resched(); |
2024 | if (node->eb && node->eb->start == edge->blockptr) | ||
2025 | continue; | ||
2026 | 2458 | ||
2027 | upper = edge->node[UPPER]; | 2459 | upper = edge->node[UPPER]; |
2028 | root = select_reloc_root(trans, upper, edges, &nr); | 2460 | root = select_reloc_root(trans, rc, upper, edges, &nr); |
2029 | if (!root) | 2461 | BUG_ON(!root); |
2030 | continue; | 2462 | |
2031 | 2463 | if (upper->eb && !upper->locked) { | |
2032 | if (upper->eb && !upper->locked) | 2464 | if (!lowest) { |
2465 | ret = btrfs_bin_search(upper->eb, key, | ||
2466 | upper->level, &slot); | ||
2467 | BUG_ON(ret); | ||
2468 | bytenr = btrfs_node_blockptr(upper->eb, slot); | ||
2469 | if (node->eb->start == bytenr) | ||
2470 | goto next; | ||
2471 | } | ||
2033 | drop_node_buffer(upper); | 2472 | drop_node_buffer(upper); |
2473 | } | ||
2034 | 2474 | ||
2035 | if (!upper->eb) { | 2475 | if (!upper->eb) { |
2036 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 2476 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
@@ -2040,11 +2480,17 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2040 | } | 2480 | } |
2041 | BUG_ON(ret > 0); | 2481 | BUG_ON(ret > 0); |
2042 | 2482 | ||
2043 | slot = path->slots[upper->level]; | 2483 | if (!upper->eb) { |
2484 | upper->eb = path->nodes[upper->level]; | ||
2485 | path->nodes[upper->level] = NULL; | ||
2486 | } else { | ||
2487 | BUG_ON(upper->eb != path->nodes[upper->level]); | ||
2488 | } | ||
2044 | 2489 | ||
2045 | btrfs_unlock_up_safe(path, upper->level + 1); | 2490 | upper->locked = 1; |
2046 | grab_path_buffers(path, upper, edges, nr); | 2491 | path->locks[upper->level] = 0; |
2047 | 2492 | ||
2493 | slot = path->slots[upper->level]; | ||
2048 | btrfs_release_path(NULL, path); | 2494 | btrfs_release_path(NULL, path); |
2049 | } else { | 2495 | } else { |
2050 | ret = btrfs_bin_search(upper->eb, key, upper->level, | 2496 | ret = btrfs_bin_search(upper->eb, key, upper->level, |
@@ -2053,14 +2499,11 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2053 | } | 2499 | } |
2054 | 2500 | ||
2055 | bytenr = btrfs_node_blockptr(upper->eb, slot); | 2501 | bytenr = btrfs_node_blockptr(upper->eb, slot); |
2056 | if (!lowest) { | 2502 | if (lowest) { |
2057 | if (node->eb->start == bytenr) { | 2503 | BUG_ON(bytenr != node->bytenr); |
2058 | btrfs_tree_unlock(upper->eb); | ||
2059 | upper->locked = 0; | ||
2060 | continue; | ||
2061 | } | ||
2062 | } else { | 2504 | } else { |
2063 | BUG_ON(node->bytenr != bytenr); | 2505 | if (node->eb->start == bytenr) |
2506 | goto next; | ||
2064 | } | 2507 | } |
2065 | 2508 | ||
2066 | blocksize = btrfs_level_size(root, node->level); | 2509 | blocksize = btrfs_level_size(root, node->level); |
@@ -2072,13 +2515,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2072 | if (!node->eb) { | 2515 | if (!node->eb) { |
2073 | ret = btrfs_cow_block(trans, root, eb, upper->eb, | 2516 | ret = btrfs_cow_block(trans, root, eb, upper->eb, |
2074 | slot, &eb); | 2517 | slot, &eb); |
2518 | btrfs_tree_unlock(eb); | ||
2519 | free_extent_buffer(eb); | ||
2075 | if (ret < 0) { | 2520 | if (ret < 0) { |
2076 | err = ret; | 2521 | err = ret; |
2077 | break; | 2522 | goto next; |
2078 | } | 2523 | } |
2079 | btrfs_set_lock_blocking(eb); | 2524 | BUG_ON(node->eb != eb); |
2080 | node->eb = eb; | ||
2081 | node->locked = 1; | ||
2082 | } else { | 2525 | } else { |
2083 | btrfs_set_node_blockptr(upper->eb, slot, | 2526 | btrfs_set_node_blockptr(upper->eb, slot, |
2084 | node->eb->start); | 2527 | node->eb->start); |
@@ -2096,67 +2539,80 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2096 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | 2539 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); |
2097 | BUG_ON(ret); | 2540 | BUG_ON(ret); |
2098 | } | 2541 | } |
2099 | if (!lowest) { | 2542 | next: |
2100 | btrfs_tree_unlock(upper->eb); | 2543 | if (!upper->pending) |
2101 | upper->locked = 0; | 2544 | drop_node_buffer(upper); |
2102 | } | 2545 | else |
2546 | unlock_node_buffer(upper); | ||
2547 | if (err) | ||
2548 | break; | ||
2103 | } | 2549 | } |
2550 | |||
2551 | if (!err && node->pending) { | ||
2552 | drop_node_buffer(node); | ||
2553 | list_move_tail(&node->list, &rc->backref_cache.changed); | ||
2554 | node->pending = 0; | ||
2555 | } | ||
2556 | |||
2104 | path->lowest_level = 0; | 2557 | path->lowest_level = 0; |
2558 | BUG_ON(err == -ENOSPC); | ||
2105 | return err; | 2559 | return err; |
2106 | } | 2560 | } |
2107 | 2561 | ||
2108 | static int link_to_upper(struct btrfs_trans_handle *trans, | 2562 | static int link_to_upper(struct btrfs_trans_handle *trans, |
2563 | struct reloc_control *rc, | ||
2109 | struct backref_node *node, | 2564 | struct backref_node *node, |
2110 | struct btrfs_path *path) | 2565 | struct btrfs_path *path) |
2111 | { | 2566 | { |
2112 | struct btrfs_key key; | 2567 | struct btrfs_key key; |
2113 | if (!node->eb || list_empty(&node->upper)) | ||
2114 | return 0; | ||
2115 | 2568 | ||
2116 | btrfs_node_key_to_cpu(node->eb, &key, 0); | 2569 | btrfs_node_key_to_cpu(node->eb, &key, 0); |
2117 | return do_relocation(trans, node, &key, path, 0); | 2570 | return do_relocation(trans, rc, node, &key, path, 0); |
2118 | } | 2571 | } |
2119 | 2572 | ||
2120 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, | 2573 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, |
2121 | struct backref_cache *cache, | 2574 | struct reloc_control *rc, |
2122 | struct btrfs_path *path) | 2575 | struct btrfs_path *path, int err) |
2123 | { | 2576 | { |
2577 | LIST_HEAD(list); | ||
2578 | struct backref_cache *cache = &rc->backref_cache; | ||
2124 | struct backref_node *node; | 2579 | struct backref_node *node; |
2125 | int level; | 2580 | int level; |
2126 | int ret; | 2581 | int ret; |
2127 | int err = 0; | ||
2128 | 2582 | ||
2129 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2583 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
2130 | while (!list_empty(&cache->pending[level])) { | 2584 | while (!list_empty(&cache->pending[level])) { |
2131 | node = list_entry(cache->pending[level].next, | 2585 | node = list_entry(cache->pending[level].next, |
2132 | struct backref_node, lower); | 2586 | struct backref_node, list); |
2133 | BUG_ON(node->level != level); | 2587 | list_move_tail(&node->list, &list); |
2588 | BUG_ON(!node->pending); | ||
2134 | 2589 | ||
2135 | ret = link_to_upper(trans, node, path); | 2590 | if (!err) { |
2136 | if (ret < 0) | 2591 | ret = link_to_upper(trans, rc, node, path); |
2137 | err = ret; | 2592 | if (ret < 0) |
2138 | /* | 2593 | err = ret; |
2139 | * this remove the node from the pending list and | 2594 | } |
2140 | * may add some other nodes to the level + 1 | ||
2141 | * pending list | ||
2142 | */ | ||
2143 | remove_backref_node(cache, node); | ||
2144 | } | 2595 | } |
2596 | list_splice_init(&list, &cache->pending[level]); | ||
2145 | } | 2597 | } |
2146 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
2147 | return err; | 2598 | return err; |
2148 | } | 2599 | } |
2149 | 2600 | ||
2150 | static void mark_block_processed(struct reloc_control *rc, | 2601 | static void mark_block_processed(struct reloc_control *rc, |
2151 | struct backref_node *node) | 2602 | u64 bytenr, u32 blocksize) |
2603 | { | ||
2604 | set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, | ||
2605 | EXTENT_DIRTY, GFP_NOFS); | ||
2606 | } | ||
2607 | |||
2608 | static void __mark_block_processed(struct reloc_control *rc, | ||
2609 | struct backref_node *node) | ||
2152 | { | 2610 | { |
2153 | u32 blocksize; | 2611 | u32 blocksize; |
2154 | if (node->level == 0 || | 2612 | if (node->level == 0 || |
2155 | in_block_group(node->bytenr, rc->block_group)) { | 2613 | in_block_group(node->bytenr, rc->block_group)) { |
2156 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2614 | blocksize = btrfs_level_size(rc->extent_root, node->level); |
2157 | set_extent_bits(&rc->processed_blocks, node->bytenr, | 2615 | mark_block_processed(rc, node->bytenr, blocksize); |
2158 | node->bytenr + blocksize - 1, EXTENT_DIRTY, | ||
2159 | GFP_NOFS); | ||
2160 | } | 2616 | } |
2161 | node->processed = 1; | 2617 | node->processed = 1; |
2162 | } | 2618 | } |
@@ -2179,7 +2635,7 @@ static void update_processed_blocks(struct reloc_control *rc, | |||
2179 | if (next->processed) | 2635 | if (next->processed) |
2180 | break; | 2636 | break; |
2181 | 2637 | ||
2182 | mark_block_processed(rc, next); | 2638 | __mark_block_processed(rc, next); |
2183 | 2639 | ||
2184 | if (list_empty(&next->upper)) | 2640 | if (list_empty(&next->upper)) |
2185 | break; | 2641 | break; |
@@ -2202,138 +2658,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
2202 | return 0; | 2658 | return 0; |
2203 | } | 2659 | } |
2204 | 2660 | ||
2205 | /* | ||
2206 | * check if there are any file extent pointers in the leaf point to | ||
2207 | * data require processing | ||
2208 | */ | ||
2209 | static int check_file_extents(struct reloc_control *rc, | ||
2210 | u64 bytenr, u32 blocksize, u64 ptr_gen) | ||
2211 | { | ||
2212 | struct btrfs_key found_key; | ||
2213 | struct btrfs_file_extent_item *fi; | ||
2214 | struct extent_buffer *leaf; | ||
2215 | u32 nritems; | ||
2216 | int i; | ||
2217 | int ret = 0; | ||
2218 | |||
2219 | leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); | ||
2220 | |||
2221 | nritems = btrfs_header_nritems(leaf); | ||
2222 | for (i = 0; i < nritems; i++) { | ||
2223 | cond_resched(); | ||
2224 | btrfs_item_key_to_cpu(leaf, &found_key, i); | ||
2225 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
2226 | continue; | ||
2227 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
2228 | if (btrfs_file_extent_type(leaf, fi) == | ||
2229 | BTRFS_FILE_EXTENT_INLINE) | ||
2230 | continue; | ||
2231 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
2232 | if (bytenr == 0) | ||
2233 | continue; | ||
2234 | if (in_block_group(bytenr, rc->block_group)) { | ||
2235 | ret = 1; | ||
2236 | break; | ||
2237 | } | ||
2238 | } | ||
2239 | free_extent_buffer(leaf); | ||
2240 | return ret; | ||
2241 | } | ||
2242 | |||
2243 | /* | ||
2244 | * scan child blocks of a given block to find blocks require processing | ||
2245 | */ | ||
2246 | static int add_child_blocks(struct btrfs_trans_handle *trans, | ||
2247 | struct reloc_control *rc, | ||
2248 | struct backref_node *node, | ||
2249 | struct rb_root *blocks) | ||
2250 | { | ||
2251 | struct tree_block *block; | ||
2252 | struct rb_node *rb_node; | ||
2253 | u64 bytenr; | ||
2254 | u64 ptr_gen; | ||
2255 | u32 blocksize; | ||
2256 | u32 nritems; | ||
2257 | int i; | ||
2258 | int err = 0; | ||
2259 | |||
2260 | nritems = btrfs_header_nritems(node->eb); | ||
2261 | blocksize = btrfs_level_size(rc->extent_root, node->level - 1); | ||
2262 | for (i = 0; i < nritems; i++) { | ||
2263 | cond_resched(); | ||
2264 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2265 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2266 | if (ptr_gen == trans->transid) | ||
2267 | continue; | ||
2268 | if (!in_block_group(bytenr, rc->block_group) && | ||
2269 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2270 | continue; | ||
2271 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2272 | continue; | ||
2273 | |||
2274 | readahead_tree_block(rc->extent_root, | ||
2275 | bytenr, blocksize, ptr_gen); | ||
2276 | } | ||
2277 | |||
2278 | for (i = 0; i < nritems; i++) { | ||
2279 | cond_resched(); | ||
2280 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2281 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2282 | if (ptr_gen == trans->transid) | ||
2283 | continue; | ||
2284 | if (!in_block_group(bytenr, rc->block_group) && | ||
2285 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2286 | continue; | ||
2287 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2288 | continue; | ||
2289 | if (!in_block_group(bytenr, rc->block_group) && | ||
2290 | !check_file_extents(rc, bytenr, blocksize, ptr_gen)) | ||
2291 | continue; | ||
2292 | |||
2293 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
2294 | if (!block) { | ||
2295 | err = -ENOMEM; | ||
2296 | break; | ||
2297 | } | ||
2298 | block->bytenr = bytenr; | ||
2299 | btrfs_node_key_to_cpu(node->eb, &block->key, i); | ||
2300 | block->level = node->level - 1; | ||
2301 | block->key_ready = 1; | ||
2302 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
2303 | BUG_ON(rb_node); | ||
2304 | } | ||
2305 | if (err) | ||
2306 | free_block_list(blocks); | ||
2307 | return err; | ||
2308 | } | ||
2309 | |||
2310 | /* | ||
2311 | * find adjacent blocks require processing | ||
2312 | */ | ||
2313 | static noinline_for_stack | ||
2314 | int add_adjacent_blocks(struct btrfs_trans_handle *trans, | ||
2315 | struct reloc_control *rc, | ||
2316 | struct backref_cache *cache, | ||
2317 | struct rb_root *blocks, int level, | ||
2318 | struct backref_node **upper) | ||
2319 | { | ||
2320 | struct backref_node *node; | ||
2321 | int ret = 0; | ||
2322 | |||
2323 | WARN_ON(!list_empty(&cache->pending[level])); | ||
2324 | |||
2325 | if (list_empty(&cache->pending[level + 1])) | ||
2326 | return 1; | ||
2327 | |||
2328 | node = list_entry(cache->pending[level + 1].next, | ||
2329 | struct backref_node, lower); | ||
2330 | if (node->eb) | ||
2331 | ret = add_child_blocks(trans, rc, node, blocks); | ||
2332 | |||
2333 | *upper = node; | ||
2334 | return ret; | ||
2335 | } | ||
2336 | |||
2337 | static int get_tree_block_key(struct reloc_control *rc, | 2661 | static int get_tree_block_key(struct reloc_control *rc, |
2338 | struct tree_block *block) | 2662 | struct tree_block *block) |
2339 | { | 2663 | { |
@@ -2371,40 +2695,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
2371 | struct btrfs_path *path) | 2695 | struct btrfs_path *path) |
2372 | { | 2696 | { |
2373 | struct btrfs_root *root; | 2697 | struct btrfs_root *root; |
2374 | int ret; | 2698 | int release = 0; |
2699 | int ret = 0; | ||
2375 | 2700 | ||
2701 | if (!node) | ||
2702 | return 0; | ||
2703 | |||
2704 | BUG_ON(node->processed); | ||
2376 | root = select_one_root(trans, node); | 2705 | root = select_one_root(trans, node); |
2377 | if (unlikely(!root)) { | 2706 | if (root == ERR_PTR(-ENOENT)) { |
2378 | rc->found_old_snapshot = 1; | ||
2379 | update_processed_blocks(rc, node); | 2707 | update_processed_blocks(rc, node); |
2380 | return 0; | 2708 | goto out; |
2381 | } | 2709 | } |
2382 | 2710 | ||
2383 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2711 | if (!root || root->ref_cows) { |
2384 | ret = do_relocation(trans, node, key, path, 1); | 2712 | ret = reserve_metadata_space(trans, rc, node); |
2385 | if (ret < 0) | 2713 | if (ret) |
2386 | goto out; | ||
2387 | if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
2388 | ret = replace_file_extents(trans, rc, root, | ||
2389 | node->eb, NULL); | ||
2390 | if (ret < 0) | ||
2391 | goto out; | ||
2392 | } | ||
2393 | drop_node_buffer(node); | ||
2394 | } else if (!root->ref_cows) { | ||
2395 | path->lowest_level = node->level; | ||
2396 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2397 | btrfs_release_path(root, path); | ||
2398 | if (ret < 0) | ||
2399 | goto out; | 2714 | goto out; |
2400 | } else if (root != node->root) { | 2715 | release = 1; |
2401 | WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); | ||
2402 | } | 2716 | } |
2403 | 2717 | ||
2404 | update_processed_blocks(rc, node); | 2718 | if (root) { |
2405 | ret = 0; | 2719 | if (root->ref_cows) { |
2720 | BUG_ON(node->new_bytenr); | ||
2721 | BUG_ON(!list_empty(&node->list)); | ||
2722 | btrfs_record_root_in_trans(trans, root); | ||
2723 | root = root->reloc_root; | ||
2724 | node->new_bytenr = root->node->start; | ||
2725 | node->root = root; | ||
2726 | list_add_tail(&node->list, &rc->backref_cache.changed); | ||
2727 | } else { | ||
2728 | path->lowest_level = node->level; | ||
2729 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2730 | btrfs_release_path(root, path); | ||
2731 | if (ret > 0) | ||
2732 | ret = 0; | ||
2733 | } | ||
2734 | if (!ret) | ||
2735 | update_processed_blocks(rc, node); | ||
2736 | } else { | ||
2737 | ret = do_relocation(trans, rc, node, key, path, 1); | ||
2738 | } | ||
2406 | out: | 2739 | out: |
2407 | drop_node_buffer(node); | 2740 | if (ret || node->level == 0 || node->cowonly) { |
2741 | if (release) | ||
2742 | release_metadata_space(rc, node); | ||
2743 | remove_backref_node(&rc->backref_cache, node); | ||
2744 | } | ||
2408 | return ret; | 2745 | return ret; |
2409 | } | 2746 | } |
2410 | 2747 | ||
@@ -2415,12 +2752,10 @@ static noinline_for_stack | |||
2415 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, | 2752 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, |
2416 | struct reloc_control *rc, struct rb_root *blocks) | 2753 | struct reloc_control *rc, struct rb_root *blocks) |
2417 | { | 2754 | { |
2418 | struct backref_cache *cache; | ||
2419 | struct backref_node *node; | 2755 | struct backref_node *node; |
2420 | struct btrfs_path *path; | 2756 | struct btrfs_path *path; |
2421 | struct tree_block *block; | 2757 | struct tree_block *block; |
2422 | struct rb_node *rb_node; | 2758 | struct rb_node *rb_node; |
2423 | int level = -1; | ||
2424 | int ret; | 2759 | int ret; |
2425 | int err = 0; | 2760 | int err = 0; |
2426 | 2761 | ||
@@ -2428,21 +2763,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2428 | if (!path) | 2763 | if (!path) |
2429 | return -ENOMEM; | 2764 | return -ENOMEM; |
2430 | 2765 | ||
2431 | cache = kmalloc(sizeof(*cache), GFP_NOFS); | ||
2432 | if (!cache) { | ||
2433 | btrfs_free_path(path); | ||
2434 | return -ENOMEM; | ||
2435 | } | ||
2436 | |||
2437 | backref_cache_init(cache); | ||
2438 | |||
2439 | rb_node = rb_first(blocks); | 2766 | rb_node = rb_first(blocks); |
2440 | while (rb_node) { | 2767 | while (rb_node) { |
2441 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2768 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2442 | if (level == -1) | ||
2443 | level = block->level; | ||
2444 | else | ||
2445 | BUG_ON(level != block->level); | ||
2446 | if (!block->key_ready) | 2769 | if (!block->key_ready) |
2447 | reada_tree_block(rc, block); | 2770 | reada_tree_block(rc, block); |
2448 | rb_node = rb_next(rb_node); | 2771 | rb_node = rb_next(rb_node); |
@@ -2460,7 +2783,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2460 | while (rb_node) { | 2783 | while (rb_node) { |
2461 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2784 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2462 | 2785 | ||
2463 | node = build_backref_tree(rc, cache, &block->key, | 2786 | node = build_backref_tree(rc, &block->key, |
2464 | block->level, block->bytenr); | 2787 | block->level, block->bytenr); |
2465 | if (IS_ERR(node)) { | 2788 | if (IS_ERR(node)) { |
2466 | err = PTR_ERR(node); | 2789 | err = PTR_ERR(node); |
@@ -2470,79 +2793,62 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2470 | ret = relocate_tree_block(trans, rc, node, &block->key, | 2793 | ret = relocate_tree_block(trans, rc, node, &block->key, |
2471 | path); | 2794 | path); |
2472 | if (ret < 0) { | 2795 | if (ret < 0) { |
2473 | err = ret; | 2796 | if (ret != -EAGAIN || rb_node == rb_first(blocks)) |
2797 | err = ret; | ||
2474 | goto out; | 2798 | goto out; |
2475 | } | 2799 | } |
2476 | remove_backref_node(cache, node); | ||
2477 | rb_node = rb_next(rb_node); | 2800 | rb_node = rb_next(rb_node); |
2478 | } | 2801 | } |
2479 | 2802 | out: | |
2480 | if (level > 0) | ||
2481 | goto out; | ||
2482 | |||
2483 | free_block_list(blocks); | 2803 | free_block_list(blocks); |
2804 | err = finish_pending_nodes(trans, rc, path, err); | ||
2484 | 2805 | ||
2485 | /* | 2806 | btrfs_free_path(path); |
2486 | * now backrefs of some upper level tree blocks have been cached, | 2807 | return err; |
2487 | * try relocating blocks referenced by these upper level blocks. | 2808 | } |
2488 | */ | ||
2489 | while (1) { | ||
2490 | struct backref_node *upper = NULL; | ||
2491 | if (trans->transaction->in_commit || | ||
2492 | trans->transaction->delayed_refs.flushing) | ||
2493 | break; | ||
2494 | 2809 | ||
2495 | ret = add_adjacent_blocks(trans, rc, cache, blocks, level, | 2810 | static noinline_for_stack |
2496 | &upper); | 2811 | int prealloc_file_extent_cluster(struct inode *inode, |
2497 | if (ret < 0) | 2812 | struct file_extent_cluster *cluster) |
2498 | err = ret; | 2813 | { |
2499 | if (ret != 0) | 2814 | u64 alloc_hint = 0; |
2500 | break; | 2815 | u64 start; |
2816 | u64 end; | ||
2817 | u64 offset = BTRFS_I(inode)->index_cnt; | ||
2818 | u64 num_bytes; | ||
2819 | int nr = 0; | ||
2820 | int ret = 0; | ||
2501 | 2821 | ||
2502 | rb_node = rb_first(blocks); | 2822 | BUG_ON(cluster->start != cluster->boundary[0]); |
2503 | while (rb_node) { | 2823 | mutex_lock(&inode->i_mutex); |
2504 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2505 | if (trans->transaction->in_commit || | ||
2506 | trans->transaction->delayed_refs.flushing) | ||
2507 | goto out; | ||
2508 | BUG_ON(!block->key_ready); | ||
2509 | node = build_backref_tree(rc, cache, &block->key, | ||
2510 | level, block->bytenr); | ||
2511 | if (IS_ERR(node)) { | ||
2512 | err = PTR_ERR(node); | ||
2513 | goto out; | ||
2514 | } | ||
2515 | 2824 | ||
2516 | ret = relocate_tree_block(trans, rc, node, | 2825 | ret = btrfs_check_data_free_space(inode, cluster->end + |
2517 | &block->key, path); | 2826 | 1 - cluster->start); |
2518 | if (ret < 0) { | 2827 | if (ret) |
2519 | err = ret; | 2828 | goto out; |
2520 | goto out; | ||
2521 | } | ||
2522 | remove_backref_node(cache, node); | ||
2523 | rb_node = rb_next(rb_node); | ||
2524 | } | ||
2525 | free_block_list(blocks); | ||
2526 | 2829 | ||
2527 | if (upper) { | 2830 | while (nr < cluster->nr) { |
2528 | ret = link_to_upper(trans, upper, path); | 2831 | start = cluster->boundary[nr] - offset; |
2529 | if (ret < 0) { | 2832 | if (nr + 1 < cluster->nr) |
2530 | err = ret; | 2833 | end = cluster->boundary[nr + 1] - 1 - offset; |
2531 | break; | 2834 | else |
2532 | } | 2835 | end = cluster->end - offset; |
2533 | remove_backref_node(cache, upper); | 2836 | |
2534 | } | 2837 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); |
2838 | num_bytes = end + 1 - start; | ||
2839 | ret = btrfs_prealloc_file_range(inode, 0, start, | ||
2840 | num_bytes, num_bytes, | ||
2841 | end + 1, &alloc_hint); | ||
2842 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2843 | if (ret) | ||
2844 | break; | ||
2845 | nr++; | ||
2535 | } | 2846 | } |
2847 | btrfs_free_reserved_data_space(inode, cluster->end + | ||
2848 | 1 - cluster->start); | ||
2536 | out: | 2849 | out: |
2537 | free_block_list(blocks); | 2850 | mutex_unlock(&inode->i_mutex); |
2538 | 2851 | return ret; | |
2539 | ret = finish_pending_nodes(trans, cache, path); | ||
2540 | if (ret < 0) | ||
2541 | err = ret; | ||
2542 | |||
2543 | kfree(cache); | ||
2544 | btrfs_free_path(path); | ||
2545 | return err; | ||
2546 | } | 2852 | } |
2547 | 2853 | ||
2548 | static noinline_for_stack | 2854 | static noinline_for_stack |
@@ -2588,7 +2894,6 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2588 | u64 offset = BTRFS_I(inode)->index_cnt; | 2894 | u64 offset = BTRFS_I(inode)->index_cnt; |
2589 | unsigned long index; | 2895 | unsigned long index; |
2590 | unsigned long last_index; | 2896 | unsigned long last_index; |
2591 | unsigned int dirty_page = 0; | ||
2592 | struct page *page; | 2897 | struct page *page; |
2593 | struct file_ra_state *ra; | 2898 | struct file_ra_state *ra; |
2594 | int nr = 0; | 2899 | int nr = 0; |
@@ -2601,21 +2906,24 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2601 | if (!ra) | 2906 | if (!ra) |
2602 | return -ENOMEM; | 2907 | return -ENOMEM; |
2603 | 2908 | ||
2604 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2909 | ret = prealloc_file_extent_cluster(inode, cluster); |
2605 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2910 | if (ret) |
2911 | goto out; | ||
2606 | 2912 | ||
2607 | mutex_lock(&inode->i_mutex); | 2913 | file_ra_state_init(ra, inode->i_mapping); |
2608 | 2914 | ||
2609 | i_size_write(inode, cluster->end + 1 - offset); | ||
2610 | ret = setup_extent_mapping(inode, cluster->start - offset, | 2915 | ret = setup_extent_mapping(inode, cluster->start - offset, |
2611 | cluster->end - offset, cluster->start); | 2916 | cluster->end - offset, cluster->start); |
2612 | if (ret) | 2917 | if (ret) |
2613 | goto out_unlock; | 2918 | goto out; |
2614 | |||
2615 | file_ra_state_init(ra, inode->i_mapping); | ||
2616 | 2919 | ||
2617 | WARN_ON(cluster->start != cluster->boundary[0]); | 2920 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
2921 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
2618 | while (index <= last_index) { | 2922 | while (index <= last_index) { |
2923 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | ||
2924 | if (ret) | ||
2925 | goto out; | ||
2926 | |||
2619 | page = find_lock_page(inode->i_mapping, index); | 2927 | page = find_lock_page(inode->i_mapping, index); |
2620 | if (!page) { | 2928 | if (!page) { |
2621 | page_cache_sync_readahead(inode->i_mapping, | 2929 | page_cache_sync_readahead(inode->i_mapping, |
@@ -2623,8 +2931,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2623 | last_index + 1 - index); | 2931 | last_index + 1 - index); |
2624 | page = grab_cache_page(inode->i_mapping, index); | 2932 | page = grab_cache_page(inode->i_mapping, index); |
2625 | if (!page) { | 2933 | if (!page) { |
2934 | btrfs_delalloc_release_metadata(inode, | ||
2935 | PAGE_CACHE_SIZE); | ||
2626 | ret = -ENOMEM; | 2936 | ret = -ENOMEM; |
2627 | goto out_unlock; | 2937 | goto out; |
2628 | } | 2938 | } |
2629 | } | 2939 | } |
2630 | 2940 | ||
@@ -2640,8 +2950,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2640 | if (!PageUptodate(page)) { | 2950 | if (!PageUptodate(page)) { |
2641 | unlock_page(page); | 2951 | unlock_page(page); |
2642 | page_cache_release(page); | 2952 | page_cache_release(page); |
2953 | btrfs_delalloc_release_metadata(inode, | ||
2954 | PAGE_CACHE_SIZE); | ||
2643 | ret = -EIO; | 2955 | ret = -EIO; |
2644 | goto out_unlock; | 2956 | goto out; |
2645 | } | 2957 | } |
2646 | } | 2958 | } |
2647 | 2959 | ||
@@ -2660,10 +2972,9 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2660 | EXTENT_BOUNDARY, GFP_NOFS); | 2972 | EXTENT_BOUNDARY, GFP_NOFS); |
2661 | nr++; | 2973 | nr++; |
2662 | } | 2974 | } |
2663 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
2664 | 2975 | ||
2976 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
2665 | set_page_dirty(page); | 2977 | set_page_dirty(page); |
2666 | dirty_page++; | ||
2667 | 2978 | ||
2668 | unlock_extent(&BTRFS_I(inode)->io_tree, | 2979 | unlock_extent(&BTRFS_I(inode)->io_tree, |
2669 | page_start, page_end, GFP_NOFS); | 2980 | page_start, page_end, GFP_NOFS); |
@@ -2671,20 +2982,11 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2671 | page_cache_release(page); | 2982 | page_cache_release(page); |
2672 | 2983 | ||
2673 | index++; | 2984 | index++; |
2674 | if (nr < cluster->nr && | 2985 | balance_dirty_pages_ratelimited(inode->i_mapping); |
2675 | page_end + 1 + offset == cluster->boundary[nr]) { | 2986 | btrfs_throttle(BTRFS_I(inode)->root); |
2676 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2677 | dirty_page); | ||
2678 | dirty_page = 0; | ||
2679 | } | ||
2680 | } | ||
2681 | if (dirty_page) { | ||
2682 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2683 | dirty_page); | ||
2684 | } | 2987 | } |
2685 | WARN_ON(nr != cluster->nr); | 2988 | WARN_ON(nr != cluster->nr); |
2686 | out_unlock: | 2989 | out: |
2687 | mutex_unlock(&inode->i_mutex); | ||
2688 | kfree(ra); | 2990 | kfree(ra); |
2689 | return ret; | 2991 | return ret; |
2690 | } | 2992 | } |
@@ -2870,9 +3172,6 @@ out: | |||
2870 | static int block_use_full_backref(struct reloc_control *rc, | 3172 | static int block_use_full_backref(struct reloc_control *rc, |
2871 | struct extent_buffer *eb) | 3173 | struct extent_buffer *eb) |
2872 | { | 3174 | { |
2873 | struct btrfs_path *path; | ||
2874 | struct btrfs_extent_item *ei; | ||
2875 | struct btrfs_key key; | ||
2876 | u64 flags; | 3175 | u64 flags; |
2877 | int ret; | 3176 | int ret; |
2878 | 3177 | ||
@@ -2880,28 +3179,14 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
2880 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) | 3179 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) |
2881 | return 1; | 3180 | return 1; |
2882 | 3181 | ||
2883 | path = btrfs_alloc_path(); | 3182 | ret = btrfs_lookup_extent_info(NULL, rc->extent_root, |
2884 | BUG_ON(!path); | 3183 | eb->start, eb->len, NULL, &flags); |
2885 | |||
2886 | key.objectid = eb->start; | ||
2887 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2888 | key.offset = eb->len; | ||
2889 | |||
2890 | path->search_commit_root = 1; | ||
2891 | path->skip_locking = 1; | ||
2892 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
2893 | &key, path, 0, 0); | ||
2894 | BUG_ON(ret); | 3184 | BUG_ON(ret); |
2895 | 3185 | ||
2896 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2897 | struct btrfs_extent_item); | ||
2898 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
2899 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
2900 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) | 3186 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
2901 | ret = 1; | 3187 | ret = 1; |
2902 | else | 3188 | else |
2903 | ret = 0; | 3189 | ret = 0; |
2904 | btrfs_free_path(path); | ||
2905 | return ret; | 3190 | return ret; |
2906 | } | 3191 | } |
2907 | 3192 | ||
@@ -3074,22 +3359,10 @@ int add_data_references(struct reloc_control *rc, | |||
3074 | struct btrfs_extent_inline_ref *iref; | 3359 | struct btrfs_extent_inline_ref *iref; |
3075 | unsigned long ptr; | 3360 | unsigned long ptr; |
3076 | unsigned long end; | 3361 | unsigned long end; |
3077 | u32 blocksize; | 3362 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); |
3078 | int ret; | 3363 | int ret; |
3079 | int err = 0; | 3364 | int err = 0; |
3080 | 3365 | ||
3081 | ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, | ||
3082 | extent_key->offset); | ||
3083 | BUG_ON(ret < 0); | ||
3084 | if (ret > 0) { | ||
3085 | /* the relocated data is fragmented */ | ||
3086 | rc->extents_skipped++; | ||
3087 | btrfs_release_path(rc->extent_root, path); | ||
3088 | return 0; | ||
3089 | } | ||
3090 | |||
3091 | blocksize = btrfs_level_size(rc->extent_root, 0); | ||
3092 | |||
3093 | eb = path->nodes[0]; | 3366 | eb = path->nodes[0]; |
3094 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); | 3367 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); |
3095 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); | 3368 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); |
@@ -3170,7 +3443,8 @@ int add_data_references(struct reloc_control *rc, | |||
3170 | */ | 3443 | */ |
3171 | static noinline_for_stack | 3444 | static noinline_for_stack |
3172 | int find_next_extent(struct btrfs_trans_handle *trans, | 3445 | int find_next_extent(struct btrfs_trans_handle *trans, |
3173 | struct reloc_control *rc, struct btrfs_path *path) | 3446 | struct reloc_control *rc, struct btrfs_path *path, |
3447 | struct btrfs_key *extent_key) | ||
3174 | { | 3448 | { |
3175 | struct btrfs_key key; | 3449 | struct btrfs_key key; |
3176 | struct extent_buffer *leaf; | 3450 | struct extent_buffer *leaf; |
@@ -3225,6 +3499,7 @@ next: | |||
3225 | rc->search_start = end + 1; | 3499 | rc->search_start = end + 1; |
3226 | } else { | 3500 | } else { |
3227 | rc->search_start = key.objectid + key.offset; | 3501 | rc->search_start = key.objectid + key.offset; |
3502 | memcpy(extent_key, &key, sizeof(key)); | ||
3228 | return 0; | 3503 | return 0; |
3229 | } | 3504 | } |
3230 | } | 3505 | } |
@@ -3262,12 +3537,49 @@ static int check_extent_flags(u64 flags) | |||
3262 | return 0; | 3537 | return 0; |
3263 | } | 3538 | } |
3264 | 3539 | ||
3540 | static noinline_for_stack | ||
3541 | int prepare_to_relocate(struct reloc_control *rc) | ||
3542 | { | ||
3543 | struct btrfs_trans_handle *trans; | ||
3544 | int ret; | ||
3545 | |||
3546 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | ||
3547 | if (!rc->block_rsv) | ||
3548 | return -ENOMEM; | ||
3549 | |||
3550 | /* | ||
3551 | * reserve some space for creating reloc trees. | ||
3552 | * btrfs_init_reloc_root will use them when there | ||
3553 | * is no reservation in transaction handle. | ||
3554 | */ | ||
3555 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | ||
3556 | rc->extent_root->nodesize * 256, | ||
3557 | &rc->block_rsv_retries); | ||
3558 | if (ret) | ||
3559 | return ret; | ||
3560 | |||
3561 | rc->block_rsv->refill_used = 1; | ||
3562 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
3563 | |||
3564 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | ||
3565 | rc->search_start = rc->block_group->key.objectid; | ||
3566 | rc->extents_found = 0; | ||
3567 | rc->nodes_relocated = 0; | ||
3568 | rc->merging_rsv_size = 0; | ||
3569 | rc->block_rsv_retries = 0; | ||
3570 | |||
3571 | rc->create_reloc_tree = 1; | ||
3572 | set_reloc_control(rc); | ||
3573 | |||
3574 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
3575 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3576 | return 0; | ||
3577 | } | ||
3265 | 3578 | ||
3266 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3579 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3267 | { | 3580 | { |
3268 | struct rb_root blocks = RB_ROOT; | 3581 | struct rb_root blocks = RB_ROOT; |
3269 | struct btrfs_key key; | 3582 | struct btrfs_key key; |
3270 | struct file_extent_cluster *cluster; | ||
3271 | struct btrfs_trans_handle *trans = NULL; | 3583 | struct btrfs_trans_handle *trans = NULL; |
3272 | struct btrfs_path *path; | 3584 | struct btrfs_path *path; |
3273 | struct btrfs_extent_item *ei; | 3585 | struct btrfs_extent_item *ei; |
@@ -3277,33 +3589,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3277 | int ret; | 3589 | int ret; |
3278 | int err = 0; | 3590 | int err = 0; |
3279 | 3591 | ||
3280 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
3281 | if (!cluster) | ||
3282 | return -ENOMEM; | ||
3283 | |||
3284 | path = btrfs_alloc_path(); | 3592 | path = btrfs_alloc_path(); |
3285 | if (!path) { | 3593 | if (!path) |
3286 | kfree(cluster); | ||
3287 | return -ENOMEM; | 3594 | return -ENOMEM; |
3288 | } | ||
3289 | |||
3290 | rc->extents_found = 0; | ||
3291 | rc->extents_skipped = 0; | ||
3292 | |||
3293 | rc->search_start = rc->block_group->key.objectid; | ||
3294 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3295 | GFP_NOFS); | ||
3296 | |||
3297 | rc->create_reloc_root = 1; | ||
3298 | set_reloc_control(rc); | ||
3299 | 3595 | ||
3300 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3596 | ret = prepare_to_relocate(rc); |
3301 | btrfs_commit_transaction(trans, rc->extent_root); | 3597 | if (ret) { |
3598 | err = ret; | ||
3599 | goto out_free; | ||
3600 | } | ||
3302 | 3601 | ||
3303 | while (1) { | 3602 | while (1) { |
3304 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3603 | trans = btrfs_start_transaction(rc->extent_root, 0); |
3604 | |||
3605 | if (update_backref_cache(trans, &rc->backref_cache)) { | ||
3606 | btrfs_end_transaction(trans, rc->extent_root); | ||
3607 | continue; | ||
3608 | } | ||
3305 | 3609 | ||
3306 | ret = find_next_extent(trans, rc, path); | 3610 | ret = find_next_extent(trans, rc, path, &key); |
3307 | if (ret < 0) | 3611 | if (ret < 0) |
3308 | err = ret; | 3612 | err = ret; |
3309 | if (ret != 0) | 3613 | if (ret != 0) |
@@ -3313,9 +3617,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3313 | 3617 | ||
3314 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3618 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3315 | struct btrfs_extent_item); | 3619 | struct btrfs_extent_item); |
3316 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3620 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); |
3317 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
3318 | path->slots[0]); | ||
3319 | if (item_size >= sizeof(*ei)) { | 3621 | if (item_size >= sizeof(*ei)) { |
3320 | flags = btrfs_extent_flags(path->nodes[0], ei); | 3622 | flags = btrfs_extent_flags(path->nodes[0], ei); |
3321 | ret = check_extent_flags(flags); | 3623 | ret = check_extent_flags(flags); |
@@ -3356,73 +3658,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3356 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 3658 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
3357 | ret = add_tree_block(rc, &key, path, &blocks); | 3659 | ret = add_tree_block(rc, &key, path, &blocks); |
3358 | } else if (rc->stage == UPDATE_DATA_PTRS && | 3660 | } else if (rc->stage == UPDATE_DATA_PTRS && |
3359 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3661 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3360 | ret = add_data_references(rc, &key, path, &blocks); | 3662 | ret = add_data_references(rc, &key, path, &blocks); |
3361 | } else { | 3663 | } else { |
3362 | btrfs_release_path(rc->extent_root, path); | 3664 | btrfs_release_path(rc->extent_root, path); |
3363 | ret = 0; | 3665 | ret = 0; |
3364 | } | 3666 | } |
3365 | if (ret < 0) { | 3667 | if (ret < 0) { |
3366 | err = 0; | 3668 | err = ret; |
3367 | break; | 3669 | break; |
3368 | } | 3670 | } |
3369 | 3671 | ||
3370 | if (!RB_EMPTY_ROOT(&blocks)) { | 3672 | if (!RB_EMPTY_ROOT(&blocks)) { |
3371 | ret = relocate_tree_blocks(trans, rc, &blocks); | 3673 | ret = relocate_tree_blocks(trans, rc, &blocks); |
3372 | if (ret < 0) { | 3674 | if (ret < 0) { |
3675 | if (ret != -EAGAIN) { | ||
3676 | err = ret; | ||
3677 | break; | ||
3678 | } | ||
3679 | rc->extents_found--; | ||
3680 | rc->search_start = key.objectid; | ||
3681 | } | ||
3682 | } | ||
3683 | |||
3684 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | ||
3685 | rc->block_rsv, 0, 5); | ||
3686 | if (ret < 0) { | ||
3687 | if (ret != -EAGAIN) { | ||
3373 | err = ret; | 3688 | err = ret; |
3689 | WARN_ON(1); | ||
3374 | break; | 3690 | break; |
3375 | } | 3691 | } |
3692 | rc->commit_transaction = 1; | ||
3376 | } | 3693 | } |
3377 | 3694 | ||
3378 | nr = trans->blocks_used; | 3695 | if (rc->commit_transaction) { |
3379 | btrfs_end_transaction(trans, rc->extent_root); | 3696 | rc->commit_transaction = 0; |
3697 | ret = btrfs_commit_transaction(trans, rc->extent_root); | ||
3698 | BUG_ON(ret); | ||
3699 | } else { | ||
3700 | nr = trans->blocks_used; | ||
3701 | btrfs_end_transaction_throttle(trans, rc->extent_root); | ||
3702 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3703 | } | ||
3380 | trans = NULL; | 3704 | trans = NULL; |
3381 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3382 | 3705 | ||
3383 | if (rc->stage == MOVE_DATA_EXTENTS && | 3706 | if (rc->stage == MOVE_DATA_EXTENTS && |
3384 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3707 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3385 | rc->found_file_extent = 1; | 3708 | rc->found_file_extent = 1; |
3386 | ret = relocate_data_extent(rc->data_inode, | 3709 | ret = relocate_data_extent(rc->data_inode, |
3387 | &key, cluster); | 3710 | &key, &rc->cluster); |
3388 | if (ret < 0) { | 3711 | if (ret < 0) { |
3389 | err = ret; | 3712 | err = ret; |
3390 | break; | 3713 | break; |
3391 | } | 3714 | } |
3392 | } | 3715 | } |
3393 | } | 3716 | } |
3394 | btrfs_free_path(path); | 3717 | |
3718 | btrfs_release_path(rc->extent_root, path); | ||
3719 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3720 | GFP_NOFS); | ||
3395 | 3721 | ||
3396 | if (trans) { | 3722 | if (trans) { |
3397 | nr = trans->blocks_used; | 3723 | nr = trans->blocks_used; |
3398 | btrfs_end_transaction(trans, rc->extent_root); | 3724 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
3399 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3725 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3400 | } | 3726 | } |
3401 | 3727 | ||
3402 | if (!err) { | 3728 | if (!err) { |
3403 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | 3729 | ret = relocate_file_extent_cluster(rc->data_inode, |
3730 | &rc->cluster); | ||
3404 | if (ret < 0) | 3731 | if (ret < 0) |
3405 | err = ret; | 3732 | err = ret; |
3406 | } | 3733 | } |
3407 | 3734 | ||
3408 | kfree(cluster); | 3735 | rc->create_reloc_tree = 0; |
3736 | set_reloc_control(rc); | ||
3409 | 3737 | ||
3410 | rc->create_reloc_root = 0; | 3738 | backref_cache_cleanup(&rc->backref_cache); |
3411 | smp_mb(); | 3739 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
3412 | 3740 | ||
3413 | if (rc->extents_found > 0) { | 3741 | err = prepare_to_merge(rc, err); |
3414 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3415 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3416 | } | ||
3417 | 3742 | ||
3418 | merge_reloc_roots(rc); | 3743 | merge_reloc_roots(rc); |
3419 | 3744 | ||
3745 | rc->merge_reloc_tree = 0; | ||
3420 | unset_reloc_control(rc); | 3746 | unset_reloc_control(rc); |
3747 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | ||
3421 | 3748 | ||
3422 | /* get rid of pinned extents */ | 3749 | /* get rid of pinned extents */ |
3423 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3750 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3424 | btrfs_commit_transaction(trans, rc->extent_root); | 3751 | btrfs_commit_transaction(trans, rc->extent_root); |
3425 | 3752 | out_free: | |
3753 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | ||
3754 | btrfs_free_path(path); | ||
3426 | return err; | 3755 | return err; |
3427 | } | 3756 | } |
3428 | 3757 | ||
@@ -3448,7 +3777,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3448 | btrfs_set_inode_generation(leaf, item, 1); | 3777 | btrfs_set_inode_generation(leaf, item, 1); |
3449 | btrfs_set_inode_size(leaf, item, 0); | 3778 | btrfs_set_inode_size(leaf, item, 0); |
3450 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3779 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
3451 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3780 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
3781 | BTRFS_INODE_PREALLOC); | ||
3452 | btrfs_mark_buffer_dirty(leaf); | 3782 | btrfs_mark_buffer_dirty(leaf); |
3453 | btrfs_release_path(root, path); | 3783 | btrfs_release_path(root, path); |
3454 | out: | 3784 | out: |
@@ -3460,8 +3790,9 @@ out: | |||
3460 | * helper to create inode for data relocation. | 3790 | * helper to create inode for data relocation. |
3461 | * the inode is in data relocation tree and its link count is 0 | 3791 | * the inode is in data relocation tree and its link count is 0 |
3462 | */ | 3792 | */ |
3463 | static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | 3793 | static noinline_for_stack |
3464 | struct btrfs_block_group_cache *group) | 3794 | struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, |
3795 | struct btrfs_block_group_cache *group) | ||
3465 | { | 3796 | { |
3466 | struct inode *inode = NULL; | 3797 | struct inode *inode = NULL; |
3467 | struct btrfs_trans_handle *trans; | 3798 | struct btrfs_trans_handle *trans; |
@@ -3475,8 +3806,9 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3475 | if (IS_ERR(root)) | 3806 | if (IS_ERR(root)) |
3476 | return ERR_CAST(root); | 3807 | return ERR_CAST(root); |
3477 | 3808 | ||
3478 | trans = btrfs_start_transaction(root, 1); | 3809 | trans = btrfs_start_transaction(root, 6); |
3479 | BUG_ON(!trans); | 3810 | if (IS_ERR(trans)) |
3811 | return ERR_CAST(trans); | ||
3480 | 3812 | ||
3481 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 3813 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); |
3482 | if (err) | 3814 | if (err) |
@@ -3496,7 +3828,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3496 | out: | 3828 | out: |
3497 | nr = trans->blocks_used; | 3829 | nr = trans->blocks_used; |
3498 | btrfs_end_transaction(trans, root); | 3830 | btrfs_end_transaction(trans, root); |
3499 | |||
3500 | btrfs_btree_balance_dirty(root, nr); | 3831 | btrfs_btree_balance_dirty(root, nr); |
3501 | if (err) { | 3832 | if (err) { |
3502 | if (inode) | 3833 | if (inode) |
@@ -3506,6 +3837,21 @@ out: | |||
3506 | return inode; | 3837 | return inode; |
3507 | } | 3838 | } |
3508 | 3839 | ||
3840 | static struct reloc_control *alloc_reloc_control(void) | ||
3841 | { | ||
3842 | struct reloc_control *rc; | ||
3843 | |||
3844 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
3845 | if (!rc) | ||
3846 | return NULL; | ||
3847 | |||
3848 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3849 | backref_cache_init(&rc->backref_cache); | ||
3850 | mapping_tree_init(&rc->reloc_root_tree); | ||
3851 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3852 | return rc; | ||
3853 | } | ||
3854 | |||
3509 | /* | 3855 | /* |
3510 | * function to relocate all extents in a block group. | 3856 | * function to relocate all extents in a block group. |
3511 | */ | 3857 | */ |
@@ -3514,24 +3860,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3514 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3860 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3515 | struct reloc_control *rc; | 3861 | struct reloc_control *rc; |
3516 | int ret; | 3862 | int ret; |
3863 | int rw = 0; | ||
3517 | int err = 0; | 3864 | int err = 0; |
3518 | 3865 | ||
3519 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 3866 | rc = alloc_reloc_control(); |
3520 | if (!rc) | 3867 | if (!rc) |
3521 | return -ENOMEM; | 3868 | return -ENOMEM; |
3522 | 3869 | ||
3523 | mapping_tree_init(&rc->reloc_root_tree); | 3870 | rc->extent_root = extent_root; |
3524 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3525 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3526 | 3871 | ||
3527 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); | 3872 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); |
3528 | BUG_ON(!rc->block_group); | 3873 | BUG_ON(!rc->block_group); |
3529 | 3874 | ||
3530 | btrfs_init_workers(&rc->workers, "relocate", | 3875 | if (!rc->block_group->ro) { |
3531 | fs_info->thread_pool_size, NULL); | 3876 | ret = btrfs_set_block_group_ro(extent_root, rc->block_group); |
3532 | 3877 | if (ret) { | |
3533 | rc->extent_root = extent_root; | 3878 | err = ret; |
3534 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3879 | goto out; |
3880 | } | ||
3881 | rw = 1; | ||
3882 | } | ||
3535 | 3883 | ||
3536 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | 3884 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); |
3537 | if (IS_ERR(rc->data_inode)) { | 3885 | if (IS_ERR(rc->data_inode)) { |
@@ -3548,9 +3896,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3548 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 3896 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); |
3549 | 3897 | ||
3550 | while (1) { | 3898 | while (1) { |
3551 | rc->extents_found = 0; | ||
3552 | rc->extents_skipped = 0; | ||
3553 | |||
3554 | mutex_lock(&fs_info->cleaner_mutex); | 3899 | mutex_lock(&fs_info->cleaner_mutex); |
3555 | 3900 | ||
3556 | btrfs_clean_old_snapshots(fs_info->tree_root); | 3901 | btrfs_clean_old_snapshots(fs_info->tree_root); |
@@ -3559,7 +3904,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3559 | mutex_unlock(&fs_info->cleaner_mutex); | 3904 | mutex_unlock(&fs_info->cleaner_mutex); |
3560 | if (ret < 0) { | 3905 | if (ret < 0) { |
3561 | err = ret; | 3906 | err = ret; |
3562 | break; | 3907 | goto out; |
3563 | } | 3908 | } |
3564 | 3909 | ||
3565 | if (rc->extents_found == 0) | 3910 | if (rc->extents_found == 0) |
@@ -3573,18 +3918,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3573 | invalidate_mapping_pages(rc->data_inode->i_mapping, | 3918 | invalidate_mapping_pages(rc->data_inode->i_mapping, |
3574 | 0, -1); | 3919 | 0, -1); |
3575 | rc->stage = UPDATE_DATA_PTRS; | 3920 | rc->stage = UPDATE_DATA_PTRS; |
3576 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
3577 | rc->extents_skipped >= rc->extents_found) { | ||
3578 | iput(rc->data_inode); | ||
3579 | rc->data_inode = create_reloc_inode(fs_info, | ||
3580 | rc->block_group); | ||
3581 | if (IS_ERR(rc->data_inode)) { | ||
3582 | err = PTR_ERR(rc->data_inode); | ||
3583 | rc->data_inode = NULL; | ||
3584 | break; | ||
3585 | } | ||
3586 | rc->stage = MOVE_DATA_EXTENTS; | ||
3587 | rc->found_file_extent = 0; | ||
3588 | } | 3921 | } |
3589 | } | 3922 | } |
3590 | 3923 | ||
@@ -3597,8 +3930,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3597 | WARN_ON(rc->block_group->reserved > 0); | 3930 | WARN_ON(rc->block_group->reserved > 0); |
3598 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); | 3931 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); |
3599 | out: | 3932 | out: |
3933 | if (err && rw) | ||
3934 | btrfs_set_block_group_rw(extent_root, rc->block_group); | ||
3600 | iput(rc->data_inode); | 3935 | iput(rc->data_inode); |
3601 | btrfs_stop_workers(&rc->workers); | ||
3602 | btrfs_put_block_group(rc->block_group); | 3936 | btrfs_put_block_group(rc->block_group); |
3603 | kfree(rc); | 3937 | kfree(rc); |
3604 | return err; | 3938 | return err; |
@@ -3609,7 +3943,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
3609 | struct btrfs_trans_handle *trans; | 3943 | struct btrfs_trans_handle *trans; |
3610 | int ret; | 3944 | int ret; |
3611 | 3945 | ||
3612 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | 3946 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
3613 | 3947 | ||
3614 | memset(&root->root_item.drop_progress, 0, | 3948 | memset(&root->root_item.drop_progress, 0, |
3615 | sizeof(root->root_item.drop_progress)); | 3949 | sizeof(root->root_item.drop_progress)); |
@@ -3702,20 +4036,20 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3702 | if (list_empty(&reloc_roots)) | 4036 | if (list_empty(&reloc_roots)) |
3703 | goto out; | 4037 | goto out; |
3704 | 4038 | ||
3705 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 4039 | rc = alloc_reloc_control(); |
3706 | if (!rc) { | 4040 | if (!rc) { |
3707 | err = -ENOMEM; | 4041 | err = -ENOMEM; |
3708 | goto out; | 4042 | goto out; |
3709 | } | 4043 | } |
3710 | 4044 | ||
3711 | mapping_tree_init(&rc->reloc_root_tree); | ||
3712 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3713 | btrfs_init_workers(&rc->workers, "relocate", | ||
3714 | root->fs_info->thread_pool_size, NULL); | ||
3715 | rc->extent_root = root->fs_info->extent_root; | 4045 | rc->extent_root = root->fs_info->extent_root; |
3716 | 4046 | ||
3717 | set_reloc_control(rc); | 4047 | set_reloc_control(rc); |
3718 | 4048 | ||
4049 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
4050 | |||
4051 | rc->merge_reloc_tree = 1; | ||
4052 | |||
3719 | while (!list_empty(&reloc_roots)) { | 4053 | while (!list_empty(&reloc_roots)) { |
3720 | reloc_root = list_entry(reloc_roots.next, | 4054 | reloc_root = list_entry(reloc_roots.next, |
3721 | struct btrfs_root, root_list); | 4055 | struct btrfs_root, root_list); |
@@ -3735,20 +4069,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3735 | fs_root->reloc_root = reloc_root; | 4069 | fs_root->reloc_root = reloc_root; |
3736 | } | 4070 | } |
3737 | 4071 | ||
3738 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3739 | btrfs_commit_transaction(trans, rc->extent_root); | 4072 | btrfs_commit_transaction(trans, rc->extent_root); |
3740 | 4073 | ||
3741 | merge_reloc_roots(rc); | 4074 | merge_reloc_roots(rc); |
3742 | 4075 | ||
3743 | unset_reloc_control(rc); | 4076 | unset_reloc_control(rc); |
3744 | 4077 | ||
3745 | trans = btrfs_start_transaction(rc->extent_root, 1); | 4078 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3746 | btrfs_commit_transaction(trans, rc->extent_root); | 4079 | btrfs_commit_transaction(trans, rc->extent_root); |
3747 | out: | 4080 | out: |
3748 | if (rc) { | 4081 | kfree(rc); |
3749 | btrfs_stop_workers(&rc->workers); | ||
3750 | kfree(rc); | ||
3751 | } | ||
3752 | while (!list_empty(&reloc_roots)) { | 4082 | while (!list_empty(&reloc_roots)) { |
3753 | reloc_root = list_entry(reloc_roots.next, | 4083 | reloc_root = list_entry(reloc_roots.next, |
3754 | struct btrfs_root, root_list); | 4084 | struct btrfs_root, root_list); |
@@ -3814,3 +4144,130 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
3814 | btrfs_put_ordered_extent(ordered); | 4144 | btrfs_put_ordered_extent(ordered); |
3815 | return 0; | 4145 | return 0; |
3816 | } | 4146 | } |
4147 | |||
4148 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
4149 | struct btrfs_root *root, struct extent_buffer *buf, | ||
4150 | struct extent_buffer *cow) | ||
4151 | { | ||
4152 | struct reloc_control *rc; | ||
4153 | struct backref_node *node; | ||
4154 | int first_cow = 0; | ||
4155 | int level; | ||
4156 | int ret; | ||
4157 | |||
4158 | rc = root->fs_info->reloc_ctl; | ||
4159 | if (!rc) | ||
4160 | return; | ||
4161 | |||
4162 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && | ||
4163 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
4164 | |||
4165 | level = btrfs_header_level(buf); | ||
4166 | if (btrfs_header_generation(buf) <= | ||
4167 | btrfs_root_last_snapshot(&root->root_item)) | ||
4168 | first_cow = 1; | ||
4169 | |||
4170 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && | ||
4171 | rc->create_reloc_tree) { | ||
4172 | WARN_ON(!first_cow && level == 0); | ||
4173 | |||
4174 | node = rc->backref_cache.path[level]; | ||
4175 | BUG_ON(node->bytenr != buf->start && | ||
4176 | node->new_bytenr != buf->start); | ||
4177 | |||
4178 | drop_node_buffer(node); | ||
4179 | extent_buffer_get(cow); | ||
4180 | node->eb = cow; | ||
4181 | node->new_bytenr = cow->start; | ||
4182 | |||
4183 | if (!node->pending) { | ||
4184 | list_move_tail(&node->list, | ||
4185 | &rc->backref_cache.pending[level]); | ||
4186 | node->pending = 1; | ||
4187 | } | ||
4188 | |||
4189 | if (first_cow) | ||
4190 | __mark_block_processed(rc, node); | ||
4191 | |||
4192 | if (first_cow && level > 0) | ||
4193 | rc->nodes_relocated += buf->len; | ||
4194 | } | ||
4195 | |||
4196 | if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { | ||
4197 | ret = replace_file_extents(trans, rc, root, cow); | ||
4198 | BUG_ON(ret); | ||
4199 | } | ||
4200 | } | ||
4201 | |||
4202 | /* | ||
4203 | * called before creating snapshot. it calculates metadata reservation | ||
4204 | * requried for relocating tree blocks in the snapshot | ||
4205 | */ | ||
4206 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
4207 | struct btrfs_pending_snapshot *pending, | ||
4208 | u64 *bytes_to_reserve) | ||
4209 | { | ||
4210 | struct btrfs_root *root; | ||
4211 | struct reloc_control *rc; | ||
4212 | |||
4213 | root = pending->root; | ||
4214 | if (!root->reloc_root) | ||
4215 | return; | ||
4216 | |||
4217 | rc = root->fs_info->reloc_ctl; | ||
4218 | if (!rc->merge_reloc_tree) | ||
4219 | return; | ||
4220 | |||
4221 | root = root->reloc_root; | ||
4222 | BUG_ON(btrfs_root_refs(&root->root_item) == 0); | ||
4223 | /* | ||
4224 | * relocation is in the stage of merging trees. the space | ||
4225 | * used by merging a reloc tree is twice the size of | ||
4226 | * relocated tree nodes in the worst case. half for cowing | ||
4227 | * the reloc tree, half for cowing the fs tree. the space | ||
4228 | * used by cowing the reloc tree will be freed after the | ||
4229 | * tree is dropped. if we create snapshot, cowing the fs | ||
4230 | * tree may use more space than it frees. so we need | ||
4231 | * reserve extra space. | ||
4232 | */ | ||
4233 | *bytes_to_reserve += rc->nodes_relocated; | ||
4234 | } | ||
4235 | |||
4236 | /* | ||
4237 | * called after snapshot is created. migrate block reservation | ||
4238 | * and create reloc root for the newly created snapshot | ||
4239 | */ | ||
4240 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
4241 | struct btrfs_pending_snapshot *pending) | ||
4242 | { | ||
4243 | struct btrfs_root *root = pending->root; | ||
4244 | struct btrfs_root *reloc_root; | ||
4245 | struct btrfs_root *new_root; | ||
4246 | struct reloc_control *rc; | ||
4247 | int ret; | ||
4248 | |||
4249 | if (!root->reloc_root) | ||
4250 | return; | ||
4251 | |||
4252 | rc = root->fs_info->reloc_ctl; | ||
4253 | rc->merging_rsv_size += rc->nodes_relocated; | ||
4254 | |||
4255 | if (rc->merge_reloc_tree) { | ||
4256 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
4257 | rc->block_rsv, | ||
4258 | rc->nodes_relocated); | ||
4259 | BUG_ON(ret); | ||
4260 | } | ||
4261 | |||
4262 | new_root = pending->snap; | ||
4263 | reloc_root = create_reloc_root(trans, root->reloc_root, | ||
4264 | new_root->root_key.objectid); | ||
4265 | |||
4266 | __add_reloc_root(reloc_root); | ||
4267 | new_root->reloc_root = reloc_root; | ||
4268 | |||
4269 | if (rc->create_reloc_tree) { | ||
4270 | ret = clone_backref_node(trans, rc, root, reloc_root); | ||
4271 | BUG_ON(ret); | ||
4272 | } | ||
4273 | } | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 67fa2d29d663..b91ccd972644 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
259 | struct extent_buffer *leaf; | 259 | struct extent_buffer *leaf; |
260 | struct btrfs_path *path; | 260 | struct btrfs_path *path; |
261 | struct btrfs_key key; | 261 | struct btrfs_key key; |
262 | struct btrfs_key root_key; | ||
263 | struct btrfs_root *root; | ||
262 | int err = 0; | 264 | int err = 0; |
263 | int ret; | 265 | int ret; |
264 | 266 | ||
@@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
270 | key.type = BTRFS_ORPHAN_ITEM_KEY; | 272 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
271 | key.offset = 0; | 273 | key.offset = 0; |
272 | 274 | ||
275 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
276 | root_key.offset = (u64)-1; | ||
277 | |||
273 | while (1) { | 278 | while (1) { |
274 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | 279 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); |
275 | if (ret < 0) { | 280 | if (ret < 0) { |
@@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
294 | key.type != BTRFS_ORPHAN_ITEM_KEY) | 299 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
295 | break; | 300 | break; |
296 | 301 | ||
297 | ret = btrfs_find_dead_roots(tree_root, key.offset); | 302 | root_key.objectid = key.offset; |
298 | if (ret) { | 303 | key.offset++; |
304 | |||
305 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | ||
306 | &root_key); | ||
307 | if (!IS_ERR(root)) | ||
308 | continue; | ||
309 | |||
310 | ret = PTR_ERR(root); | ||
311 | if (ret != -ENOENT) { | ||
299 | err = ret; | 312 | err = ret; |
300 | break; | 313 | break; |
301 | } | 314 | } |
302 | 315 | ||
303 | key.offset++; | 316 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); |
317 | if (ret) { | ||
318 | err = ret; | ||
319 | break; | ||
320 | } | ||
304 | } | 321 | } |
305 | 322 | ||
306 | btrfs_free_path(path); | 323 | btrfs_free_path(path); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1866dff0538e..d34b2dfc9628 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
498 | btrfs_start_delalloc_inodes(root, 0); | 498 | btrfs_start_delalloc_inodes(root, 0); |
499 | btrfs_wait_ordered_extents(root, 0, 0); | 499 | btrfs_wait_ordered_extents(root, 0, 0); |
500 | 500 | ||
501 | trans = btrfs_start_transaction(root, 1); | 501 | trans = btrfs_start_transaction(root, 0); |
502 | ret = btrfs_commit_transaction(trans, root); | 502 | ret = btrfs_commit_transaction(trans, root); |
503 | return ret; | 503 | return ret; |
504 | } | 504 | } |
@@ -694,11 +694,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) |
695 | return -EINVAL; | 695 | return -EINVAL; |
696 | 696 | ||
697 | /* recover relocation */ | 697 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
698 | ret = btrfs_recover_relocation(root); | ||
699 | WARN_ON(ret); | 698 | WARN_ON(ret); |
700 | 699 | ||
701 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 700 | /* recover relocation */ |
701 | ret = btrfs_recover_relocation(root); | ||
702 | WARN_ON(ret); | 702 | WARN_ON(ret); |
703 | 703 | ||
704 | sb->s_flags &= ~MS_RDONLY; | 704 | sb->s_flags &= ~MS_RDONLY; |
@@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
714 | struct list_head *head = &root->fs_info->space_info; | 714 | struct list_head *head = &root->fs_info->space_info; |
715 | struct btrfs_space_info *found; | 715 | struct btrfs_space_info *found; |
716 | u64 total_used = 0; | 716 | u64 total_used = 0; |
717 | u64 data_used = 0; | ||
718 | int bits = dentry->d_sb->s_blocksize_bits; | 717 | int bits = dentry->d_sb->s_blocksize_bits; |
719 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 718 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
720 | 719 | ||
721 | rcu_read_lock(); | 720 | rcu_read_lock(); |
722 | list_for_each_entry_rcu(found, head, list) { | 721 | list_for_each_entry_rcu(found, head, list) |
723 | if (found->flags & (BTRFS_BLOCK_GROUP_DUP| | 722 | total_used += found->disk_used; |
724 | BTRFS_BLOCK_GROUP_RAID10| | ||
725 | BTRFS_BLOCK_GROUP_RAID1)) { | ||
726 | total_used += found->bytes_used; | ||
727 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
728 | data_used += found->bytes_used; | ||
729 | else | ||
730 | data_used += found->total_bytes; | ||
731 | } | ||
732 | |||
733 | total_used += found->bytes_used; | ||
734 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
735 | data_used += found->bytes_used; | ||
736 | else | ||
737 | data_used += found->total_bytes; | ||
738 | } | ||
739 | rcu_read_unlock(); | 723 | rcu_read_unlock(); |
740 | 724 | ||
741 | buf->f_namelen = BTRFS_NAME_LEN; | 725 | buf->f_namelen = BTRFS_NAME_LEN; |
742 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 726 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
743 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 727 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
744 | buf->f_bavail = buf->f_blocks - (data_used >> bits); | 728 | buf->f_bavail = buf->f_bfree; |
745 | buf->f_bsize = dentry->d_sb->s_blocksize; | 729 | buf->f_bsize = dentry->d_sb->s_blocksize; |
746 | buf->f_type = BTRFS_SUPER_MAGIC; | 730 | buf->f_type = BTRFS_SUPER_MAGIC; |
747 | 731 | ||
@@ -832,11 +816,14 @@ static const struct file_operations btrfs_ctl_fops = { | |||
832 | }; | 816 | }; |
833 | 817 | ||
834 | static struct miscdevice btrfs_misc = { | 818 | static struct miscdevice btrfs_misc = { |
835 | .minor = MISC_DYNAMIC_MINOR, | 819 | .minor = BTRFS_MINOR, |
836 | .name = "btrfs-control", | 820 | .name = "btrfs-control", |
837 | .fops = &btrfs_ctl_fops | 821 | .fops = &btrfs_ctl_fops |
838 | }; | 822 | }; |
839 | 823 | ||
824 | MODULE_ALIAS_MISCDEV(BTRFS_MINOR); | ||
825 | MODULE_ALIAS("devname:btrfs-control"); | ||
826 | |||
840 | static int btrfs_interface_init(void) | 827 | static int btrfs_interface_init(void) |
841 | { | 828 | { |
842 | return misc_register(&btrfs_misc); | 829 | return misc_register(&btrfs_misc); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2cb116099b90..66e4c66cc63b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -165,54 +165,89 @@ enum btrfs_trans_type { | |||
165 | TRANS_USERSPACE, | 165 | TRANS_USERSPACE, |
166 | }; | 166 | }; |
167 | 167 | ||
168 | static int may_wait_transaction(struct btrfs_root *root, int type) | ||
169 | { | ||
170 | if (!root->fs_info->log_root_recovering && | ||
171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
172 | type == TRANS_USERSPACE)) | ||
173 | return 1; | ||
174 | return 0; | ||
175 | } | ||
176 | |||
168 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 177 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
169 | int num_blocks, int type) | 178 | u64 num_items, int type) |
170 | { | 179 | { |
171 | struct btrfs_trans_handle *h = | 180 | struct btrfs_trans_handle *h; |
172 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 181 | struct btrfs_transaction *cur_trans; |
182 | int retries = 0; | ||
173 | int ret; | 183 | int ret; |
184 | again: | ||
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
186 | if (!h) | ||
187 | return ERR_PTR(-ENOMEM); | ||
174 | 188 | ||
175 | mutex_lock(&root->fs_info->trans_mutex); | 189 | mutex_lock(&root->fs_info->trans_mutex); |
176 | if (!root->fs_info->log_root_recovering && | 190 | if (may_wait_transaction(root, type)) |
177 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
178 | type == TRANS_USERSPACE)) | ||
179 | wait_current_trans(root); | 191 | wait_current_trans(root); |
192 | |||
180 | ret = join_transaction(root); | 193 | ret = join_transaction(root); |
181 | BUG_ON(ret); | 194 | BUG_ON(ret); |
182 | 195 | ||
183 | h->transid = root->fs_info->running_transaction->transid; | 196 | cur_trans = root->fs_info->running_transaction; |
184 | h->transaction = root->fs_info->running_transaction; | 197 | cur_trans->use_count++; |
185 | h->blocks_reserved = num_blocks; | 198 | mutex_unlock(&root->fs_info->trans_mutex); |
199 | |||
200 | h->transid = cur_trans->transid; | ||
201 | h->transaction = cur_trans; | ||
186 | h->blocks_used = 0; | 202 | h->blocks_used = 0; |
187 | h->block_group = 0; | 203 | h->block_group = 0; |
188 | h->alloc_exclude_nr = 0; | 204 | h->bytes_reserved = 0; |
189 | h->alloc_exclude_start = 0; | ||
190 | h->delayed_ref_updates = 0; | 205 | h->delayed_ref_updates = 0; |
206 | h->block_rsv = NULL; | ||
191 | 207 | ||
192 | if (!current->journal_info && type != TRANS_USERSPACE) | 208 | smp_mb(); |
193 | current->journal_info = h; | 209 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
210 | btrfs_commit_transaction(h, root); | ||
211 | goto again; | ||
212 | } | ||
213 | |||
214 | if (num_items > 0) { | ||
215 | ret = btrfs_trans_reserve_metadata(h, root, num_items, | ||
216 | &retries); | ||
217 | if (ret == -EAGAIN) { | ||
218 | btrfs_commit_transaction(h, root); | ||
219 | goto again; | ||
220 | } | ||
221 | if (ret < 0) { | ||
222 | btrfs_end_transaction(h, root); | ||
223 | return ERR_PTR(ret); | ||
224 | } | ||
225 | } | ||
194 | 226 | ||
195 | root->fs_info->running_transaction->use_count++; | 227 | mutex_lock(&root->fs_info->trans_mutex); |
196 | record_root_in_trans(h, root); | 228 | record_root_in_trans(h, root); |
197 | mutex_unlock(&root->fs_info->trans_mutex); | 229 | mutex_unlock(&root->fs_info->trans_mutex); |
230 | |||
231 | if (!current->journal_info && type != TRANS_USERSPACE) | ||
232 | current->journal_info = h; | ||
198 | return h; | 233 | return h; |
199 | } | 234 | } |
200 | 235 | ||
201 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 236 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
202 | int num_blocks) | 237 | int num_items) |
203 | { | 238 | { |
204 | return start_transaction(root, num_blocks, TRANS_START); | 239 | return start_transaction(root, num_items, TRANS_START); |
205 | } | 240 | } |
206 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 241 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
207 | int num_blocks) | 242 | int num_blocks) |
208 | { | 243 | { |
209 | return start_transaction(root, num_blocks, TRANS_JOIN); | 244 | return start_transaction(root, 0, TRANS_JOIN); |
210 | } | 245 | } |
211 | 246 | ||
212 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 247 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
213 | int num_blocks) | 248 | int num_blocks) |
214 | { | 249 | { |
215 | return start_transaction(r, num_blocks, TRANS_USERSPACE); | 250 | return start_transaction(r, 0, TRANS_USERSPACE); |
216 | } | 251 | } |
217 | 252 | ||
218 | /* wait for a transaction commit to be fully complete */ | 253 | /* wait for a transaction commit to be fully complete */ |
@@ -286,10 +321,36 @@ void btrfs_throttle(struct btrfs_root *root) | |||
286 | mutex_unlock(&root->fs_info->trans_mutex); | 321 | mutex_unlock(&root->fs_info->trans_mutex); |
287 | } | 322 | } |
288 | 323 | ||
324 | static int should_end_transaction(struct btrfs_trans_handle *trans, | ||
325 | struct btrfs_root *root) | ||
326 | { | ||
327 | int ret; | ||
328 | ret = btrfs_block_rsv_check(trans, root, | ||
329 | &root->fs_info->global_block_rsv, 0, 5); | ||
330 | return ret ? 1 : 0; | ||
331 | } | ||
332 | |||
333 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
334 | struct btrfs_root *root) | ||
335 | { | ||
336 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
337 | int updates; | ||
338 | |||
339 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | ||
340 | return 1; | ||
341 | |||
342 | updates = trans->delayed_ref_updates; | ||
343 | trans->delayed_ref_updates = 0; | ||
344 | if (updates) | ||
345 | btrfs_run_delayed_refs(trans, root, updates); | ||
346 | |||
347 | return should_end_transaction(trans, root); | ||
348 | } | ||
349 | |||
289 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 350 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
290 | struct btrfs_root *root, int throttle) | 351 | struct btrfs_root *root, int throttle) |
291 | { | 352 | { |
292 | struct btrfs_transaction *cur_trans; | 353 | struct btrfs_transaction *cur_trans = trans->transaction; |
293 | struct btrfs_fs_info *info = root->fs_info; | 354 | struct btrfs_fs_info *info = root->fs_info; |
294 | int count = 0; | 355 | int count = 0; |
295 | 356 | ||
@@ -313,9 +374,21 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
313 | count++; | 374 | count++; |
314 | } | 375 | } |
315 | 376 | ||
377 | btrfs_trans_release_metadata(trans, root); | ||
378 | |||
379 | if (!root->fs_info->open_ioctl_trans && | ||
380 | should_end_transaction(trans, root)) | ||
381 | trans->transaction->blocked = 1; | ||
382 | |||
383 | if (cur_trans->blocked && !cur_trans->in_commit) { | ||
384 | if (throttle) | ||
385 | return btrfs_commit_transaction(trans, root); | ||
386 | else | ||
387 | wake_up_process(info->transaction_kthread); | ||
388 | } | ||
389 | |||
316 | mutex_lock(&info->trans_mutex); | 390 | mutex_lock(&info->trans_mutex); |
317 | cur_trans = info->running_transaction; | 391 | WARN_ON(cur_trans != info->running_transaction); |
318 | WARN_ON(cur_trans != trans->transaction); | ||
319 | WARN_ON(cur_trans->num_writers < 1); | 392 | WARN_ON(cur_trans->num_writers < 1); |
320 | cur_trans->num_writers--; | 393 | cur_trans->num_writers--; |
321 | 394 | ||
@@ -603,6 +676,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
603 | 676 | ||
604 | btrfs_free_log(trans, root); | 677 | btrfs_free_log(trans, root); |
605 | btrfs_update_reloc_root(trans, root); | 678 | btrfs_update_reloc_root(trans, root); |
679 | btrfs_orphan_commit_root(trans, root); | ||
606 | 680 | ||
607 | if (root->commit_root != root->node) { | 681 | if (root->commit_root != root->node) { |
608 | switch_commit_root(root); | 682 | switch_commit_root(root); |
@@ -627,30 +701,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
627 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 701 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) |
628 | { | 702 | { |
629 | struct btrfs_fs_info *info = root->fs_info; | 703 | struct btrfs_fs_info *info = root->fs_info; |
630 | int ret; | ||
631 | struct btrfs_trans_handle *trans; | 704 | struct btrfs_trans_handle *trans; |
705 | int ret; | ||
632 | unsigned long nr; | 706 | unsigned long nr; |
633 | 707 | ||
634 | smp_mb(); | 708 | if (xchg(&root->defrag_running, 1)) |
635 | if (root->defrag_running) | ||
636 | return 0; | 709 | return 0; |
637 | trans = btrfs_start_transaction(root, 1); | 710 | |
638 | while (1) { | 711 | while (1) { |
639 | root->defrag_running = 1; | 712 | trans = btrfs_start_transaction(root, 0); |
713 | if (IS_ERR(trans)) | ||
714 | return PTR_ERR(trans); | ||
715 | |||
640 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 716 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
717 | |||
641 | nr = trans->blocks_used; | 718 | nr = trans->blocks_used; |
642 | btrfs_end_transaction(trans, root); | 719 | btrfs_end_transaction(trans, root); |
643 | btrfs_btree_balance_dirty(info->tree_root, nr); | 720 | btrfs_btree_balance_dirty(info->tree_root, nr); |
644 | cond_resched(); | 721 | cond_resched(); |
645 | 722 | ||
646 | trans = btrfs_start_transaction(root, 1); | ||
647 | if (root->fs_info->closing || ret != -EAGAIN) | 723 | if (root->fs_info->closing || ret != -EAGAIN) |
648 | break; | 724 | break; |
649 | } | 725 | } |
650 | root->defrag_running = 0; | 726 | root->defrag_running = 0; |
651 | smp_mb(); | 727 | return ret; |
652 | btrfs_end_transaction(trans, root); | ||
653 | return 0; | ||
654 | } | 728 | } |
655 | 729 | ||
656 | #if 0 | 730 | #if 0 |
@@ -758,47 +832,63 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
758 | struct btrfs_root *root = pending->root; | 832 | struct btrfs_root *root = pending->root; |
759 | struct btrfs_root *parent_root; | 833 | struct btrfs_root *parent_root; |
760 | struct inode *parent_inode; | 834 | struct inode *parent_inode; |
835 | struct dentry *dentry; | ||
761 | struct extent_buffer *tmp; | 836 | struct extent_buffer *tmp; |
762 | struct extent_buffer *old; | 837 | struct extent_buffer *old; |
763 | int ret; | 838 | int ret; |
764 | u64 objectid; | 839 | int retries = 0; |
765 | int namelen; | 840 | u64 to_reserve = 0; |
766 | u64 index = 0; | 841 | u64 index = 0; |
767 | 842 | u64 objectid; | |
768 | parent_inode = pending->dentry->d_parent->d_inode; | ||
769 | parent_root = BTRFS_I(parent_inode)->root; | ||
770 | 843 | ||
771 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 844 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
772 | if (!new_root_item) { | 845 | if (!new_root_item) { |
773 | ret = -ENOMEM; | 846 | pending->error = -ENOMEM; |
774 | goto fail; | 847 | goto fail; |
775 | } | 848 | } |
849 | |||
776 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | 850 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); |
777 | if (ret) | 851 | if (ret) { |
852 | pending->error = ret; | ||
778 | goto fail; | 853 | goto fail; |
854 | } | ||
855 | |||
856 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | ||
857 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
858 | |||
859 | if (to_reserve > 0) { | ||
860 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | ||
861 | to_reserve, &retries); | ||
862 | if (ret) { | ||
863 | pending->error = ret; | ||
864 | goto fail; | ||
865 | } | ||
866 | } | ||
779 | 867 | ||
780 | key.objectid = objectid; | 868 | key.objectid = objectid; |
781 | /* record when the snapshot was created in key.offset */ | 869 | key.offset = (u64)-1; |
782 | key.offset = trans->transid; | 870 | key.type = BTRFS_ROOT_ITEM_KEY; |
783 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
784 | 871 | ||
785 | memcpy(&pending->root_key, &key, sizeof(key)); | 872 | trans->block_rsv = &pending->block_rsv; |
786 | pending->root_key.offset = (u64)-1; | ||
787 | 873 | ||
874 | dentry = pending->dentry; | ||
875 | parent_inode = dentry->d_parent->d_inode; | ||
876 | parent_root = BTRFS_I(parent_inode)->root; | ||
788 | record_root_in_trans(trans, parent_root); | 877 | record_root_in_trans(trans, parent_root); |
878 | |||
789 | /* | 879 | /* |
790 | * insert the directory item | 880 | * insert the directory item |
791 | */ | 881 | */ |
792 | namelen = strlen(pending->name); | ||
793 | ret = btrfs_set_inode_index(parent_inode, &index); | 882 | ret = btrfs_set_inode_index(parent_inode, &index); |
794 | BUG_ON(ret); | 883 | BUG_ON(ret); |
795 | ret = btrfs_insert_dir_item(trans, parent_root, | 884 | ret = btrfs_insert_dir_item(trans, parent_root, |
796 | pending->name, namelen, | 885 | dentry->d_name.name, dentry->d_name.len, |
797 | parent_inode->i_ino, | 886 | parent_inode->i_ino, &key, |
798 | &pending->root_key, BTRFS_FT_DIR, index); | 887 | BTRFS_FT_DIR, index); |
799 | BUG_ON(ret); | 888 | BUG_ON(ret); |
800 | 889 | ||
801 | btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); | 890 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
891 | dentry->d_name.len * 2); | ||
802 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 892 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
803 | BUG_ON(ret); | 893 | BUG_ON(ret); |
804 | 894 | ||
@@ -815,22 +905,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
815 | free_extent_buffer(old); | 905 | free_extent_buffer(old); |
816 | 906 | ||
817 | btrfs_set_root_node(new_root_item, tmp); | 907 | btrfs_set_root_node(new_root_item, tmp); |
818 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 908 | /* record when the snapshot was created in key.offset */ |
819 | new_root_item); | 909 | key.offset = trans->transid; |
820 | BUG_ON(ret); | 910 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); |
821 | btrfs_tree_unlock(tmp); | 911 | btrfs_tree_unlock(tmp); |
822 | free_extent_buffer(tmp); | 912 | free_extent_buffer(tmp); |
913 | BUG_ON(ret); | ||
823 | 914 | ||
824 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 915 | /* |
825 | pending->root_key.objectid, | 916 | * insert root back/forward references |
917 | */ | ||
918 | ret = btrfs_add_root_ref(trans, tree_root, objectid, | ||
826 | parent_root->root_key.objectid, | 919 | parent_root->root_key.objectid, |
827 | parent_inode->i_ino, index, pending->name, | 920 | parent_inode->i_ino, index, |
828 | namelen); | 921 | dentry->d_name.name, dentry->d_name.len); |
829 | BUG_ON(ret); | 922 | BUG_ON(ret); |
830 | 923 | ||
924 | key.offset = (u64)-1; | ||
925 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
926 | BUG_ON(IS_ERR(pending->snap)); | ||
927 | |||
928 | btrfs_reloc_post_snapshot(trans, pending); | ||
929 | btrfs_orphan_post_snapshot(trans, pending); | ||
831 | fail: | 930 | fail: |
832 | kfree(new_root_item); | 931 | kfree(new_root_item); |
833 | return ret; | 932 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
933 | return 0; | ||
834 | } | 934 | } |
835 | 935 | ||
836 | /* | 936 | /* |
@@ -878,6 +978,16 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | |||
878 | return ret; | 978 | return ret; |
879 | } | 979 | } |
880 | 980 | ||
981 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | ||
982 | { | ||
983 | int ret = 0; | ||
984 | spin_lock(&info->new_trans_lock); | ||
985 | if (info->running_transaction) | ||
986 | ret = info->running_transaction->blocked; | ||
987 | spin_unlock(&info->new_trans_lock); | ||
988 | return ret; | ||
989 | } | ||
990 | |||
881 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 991 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
882 | struct btrfs_root *root) | 992 | struct btrfs_root *root) |
883 | { | 993 | { |
@@ -899,6 +1009,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
899 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1009 | ret = btrfs_run_delayed_refs(trans, root, 0); |
900 | BUG_ON(ret); | 1010 | BUG_ON(ret); |
901 | 1011 | ||
1012 | btrfs_trans_release_metadata(trans, root); | ||
1013 | |||
902 | cur_trans = trans->transaction; | 1014 | cur_trans = trans->transaction; |
903 | /* | 1015 | /* |
904 | * set the flushing flag so procs in this transaction have to | 1016 | * set the flushing flag so procs in this transaction have to |
@@ -951,9 +1063,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
951 | snap_pending = 1; | 1063 | snap_pending = 1; |
952 | 1064 | ||
953 | WARN_ON(cur_trans != trans->transaction); | 1065 | WARN_ON(cur_trans != trans->transaction); |
954 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
955 | TASK_UNINTERRUPTIBLE); | ||
956 | |||
957 | if (cur_trans->num_writers > 1) | 1066 | if (cur_trans->num_writers > 1) |
958 | timeout = MAX_SCHEDULE_TIMEOUT; | 1067 | timeout = MAX_SCHEDULE_TIMEOUT; |
959 | else if (should_grow) | 1068 | else if (should_grow) |
@@ -976,6 +1085,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
976 | */ | 1085 | */ |
977 | btrfs_run_ordered_operations(root, 1); | 1086 | btrfs_run_ordered_operations(root, 1); |
978 | 1087 | ||
1088 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
1089 | TASK_UNINTERRUPTIBLE); | ||
1090 | |||
979 | smp_mb(); | 1091 | smp_mb(); |
980 | if (cur_trans->num_writers > 1 || should_grow) | 1092 | if (cur_trans->num_writers > 1 || should_grow) |
981 | schedule_timeout(timeout); | 1093 | schedule_timeout(timeout); |
@@ -1103,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1103 | 1215 | ||
1104 | if (btrfs_header_backref_rev(root->node) < | 1216 | if (btrfs_header_backref_rev(root->node) < |
1105 | BTRFS_MIXED_BACKREF_REV) | 1217 | BTRFS_MIXED_BACKREF_REV) |
1106 | btrfs_drop_snapshot(root, 0); | 1218 | btrfs_drop_snapshot(root, NULL, 0); |
1107 | else | 1219 | else |
1108 | btrfs_drop_snapshot(root, 1); | 1220 | btrfs_drop_snapshot(root, NULL, 1); |
1109 | } | 1221 | } |
1110 | return 0; | 1222 | return 0; |
1111 | } | 1223 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 93c7ccb33118..e104986d0bfd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -45,20 +45,23 @@ struct btrfs_transaction { | |||
45 | 45 | ||
46 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
47 | u64 transid; | 47 | u64 transid; |
48 | u64 block_group; | ||
49 | u64 bytes_reserved; | ||
48 | unsigned long blocks_reserved; | 50 | unsigned long blocks_reserved; |
49 | unsigned long blocks_used; | 51 | unsigned long blocks_used; |
50 | struct btrfs_transaction *transaction; | ||
51 | u64 block_group; | ||
52 | u64 alloc_exclude_start; | ||
53 | u64 alloc_exclude_nr; | ||
54 | unsigned long delayed_ref_updates; | 52 | unsigned long delayed_ref_updates; |
53 | struct btrfs_transaction *transaction; | ||
54 | struct btrfs_block_rsv *block_rsv; | ||
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
58 | struct dentry *dentry; | 58 | struct dentry *dentry; |
59 | struct btrfs_root *root; | 59 | struct btrfs_root *root; |
60 | char *name; | 60 | struct btrfs_root *snap; |
61 | struct btrfs_key root_key; | 61 | /* block reservation for the operation */ |
62 | struct btrfs_block_rsv block_rsv; | ||
63 | /* extra metadata reseration for relocation */ | ||
64 | int error; | ||
62 | struct list_head list; | 65 | struct list_head list; |
63 | }; | 66 | }; |
64 | 67 | ||
@@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 88 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
86 | struct btrfs_root *root); | 89 | struct btrfs_root *root); |
87 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 90 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
88 | int num_blocks); | 91 | int num_items); |
89 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 92 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
90 | int num_blocks); | 93 | int num_blocks); |
91 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 94 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
92 | int num_blocks); | 95 | int num_blocks); |
93 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 96 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
94 | struct btrfs_root *root); | 97 | struct btrfs_root *root); |
95 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 98 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
@@ -103,6 +106,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
103 | struct btrfs_root *root); | 106 | struct btrfs_root *root); |
104 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 107 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
105 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
109 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
110 | struct btrfs_root *root); | ||
106 | void btrfs_throttle(struct btrfs_root *root); | 111 | void btrfs_throttle(struct btrfs_root *root); |
107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 112 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
108 | struct btrfs_root *root); | 113 | struct btrfs_root *root); |
@@ -112,5 +117,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
112 | struct extent_io_tree *dirty_pages, int mark); | 117 | struct extent_io_tree *dirty_pages, int mark); |
113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 118 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
114 | struct extent_io_tree *dirty_pages, int mark); | 119 | struct extent_io_tree *dirty_pages, int mark); |
120 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); | ||
115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 121 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
116 | #endif | 122 | #endif |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b10eacdb1620..f7ac8e013ed7 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
117 | path->nodes[1], 0, | 117 | path->nodes[1], 0, |
118 | cache_only, &last_ret, | 118 | cache_only, &last_ret, |
119 | &root->defrag_progress); | 119 | &root->defrag_progress); |
120 | WARN_ON(ret && ret != -EAGAIN); | 120 | if (ret) { |
121 | WARN_ON(ret == -EAGAIN); | ||
122 | goto out; | ||
123 | } | ||
121 | if (next_key_ret == 0) { | 124 | if (next_key_ret == 0) { |
122 | memcpy(&root->defrag_progress, &key, sizeof(key)); | 125 | memcpy(&root->defrag_progress, &key, sizeof(key)); |
123 | ret = -EAGAIN; | 126 | ret = -EAGAIN; |
124 | } | 127 | } |
125 | |||
126 | btrfs_release_path(root, path); | ||
127 | out: | 128 | out: |
128 | if (path) | 129 | if (path) |
129 | btrfs_free_path(path); | 130 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index af57dd2b43d4..fb102a9aee9c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -135,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
135 | struct btrfs_root *root) | 135 | struct btrfs_root *root) |
136 | { | 136 | { |
137 | int ret; | 137 | int ret; |
138 | int err = 0; | ||
138 | 139 | ||
139 | mutex_lock(&root->log_mutex); | 140 | mutex_lock(&root->log_mutex); |
140 | if (root->log_root) { | 141 | if (root->log_root) { |
@@ -155,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
155 | mutex_lock(&root->fs_info->tree_log_mutex); | 156 | mutex_lock(&root->fs_info->tree_log_mutex); |
156 | if (!root->fs_info->log_root_tree) { | 157 | if (!root->fs_info->log_root_tree) { |
157 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 158 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
158 | BUG_ON(ret); | 159 | if (ret) |
160 | err = ret; | ||
159 | } | 161 | } |
160 | if (!root->log_root) { | 162 | if (err == 0 && !root->log_root) { |
161 | ret = btrfs_add_log_tree(trans, root); | 163 | ret = btrfs_add_log_tree(trans, root); |
162 | BUG_ON(ret); | 164 | if (ret) |
165 | err = ret; | ||
163 | } | 166 | } |
164 | mutex_unlock(&root->fs_info->tree_log_mutex); | 167 | mutex_unlock(&root->fs_info->tree_log_mutex); |
165 | root->log_batch++; | 168 | root->log_batch++; |
166 | atomic_inc(&root->log_writers); | 169 | atomic_inc(&root->log_writers); |
167 | mutex_unlock(&root->log_mutex); | 170 | mutex_unlock(&root->log_mutex); |
168 | return 0; | 171 | return err; |
169 | } | 172 | } |
170 | 173 | ||
171 | /* | 174 | /* |
@@ -376,7 +379,7 @@ insert: | |||
376 | BUG_ON(ret); | 379 | BUG_ON(ret); |
377 | } | 380 | } |
378 | } else if (ret) { | 381 | } else if (ret) { |
379 | BUG(); | 382 | return ret; |
380 | } | 383 | } |
381 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], | 384 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], |
382 | path->slots[0]); | 385 | path->slots[0]); |
@@ -1699,9 +1702,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1699 | 1702 | ||
1700 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1703 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1701 | 1704 | ||
1702 | wc->process_func(root, next, wc, ptr_gen); | ||
1703 | |||
1704 | if (*level == 1) { | 1705 | if (*level == 1) { |
1706 | wc->process_func(root, next, wc, ptr_gen); | ||
1707 | |||
1705 | path->slots[*level]++; | 1708 | path->slots[*level]++; |
1706 | if (wc->free) { | 1709 | if (wc->free) { |
1707 | btrfs_read_buffer(next, ptr_gen); | 1710 | btrfs_read_buffer(next, ptr_gen); |
@@ -1734,35 +1737,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1734 | WARN_ON(*level < 0); | 1737 | WARN_ON(*level < 0); |
1735 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 1738 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
1736 | 1739 | ||
1737 | if (path->nodes[*level] == root->node) | 1740 | path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); |
1738 | parent = path->nodes[*level]; | ||
1739 | else | ||
1740 | parent = path->nodes[*level + 1]; | ||
1741 | |||
1742 | bytenr = path->nodes[*level]->start; | ||
1743 | |||
1744 | blocksize = btrfs_level_size(root, *level); | ||
1745 | root_owner = btrfs_header_owner(parent); | ||
1746 | root_gen = btrfs_header_generation(parent); | ||
1747 | |||
1748 | wc->process_func(root, path->nodes[*level], wc, | ||
1749 | btrfs_header_generation(path->nodes[*level])); | ||
1750 | |||
1751 | if (wc->free) { | ||
1752 | next = path->nodes[*level]; | ||
1753 | btrfs_tree_lock(next); | ||
1754 | clean_tree_block(trans, root, next); | ||
1755 | btrfs_set_lock_blocking(next); | ||
1756 | btrfs_wait_tree_block_writeback(next); | ||
1757 | btrfs_tree_unlock(next); | ||
1758 | |||
1759 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
1760 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | ||
1761 | BUG_ON(ret); | ||
1762 | } | ||
1763 | free_extent_buffer(path->nodes[*level]); | ||
1764 | path->nodes[*level] = NULL; | ||
1765 | *level += 1; | ||
1766 | 1741 | ||
1767 | cond_resched(); | 1742 | cond_resched(); |
1768 | return 0; | 1743 | return 0; |
@@ -1781,7 +1756,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1781 | 1756 | ||
1782 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 1757 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { |
1783 | slot = path->slots[i]; | 1758 | slot = path->slots[i]; |
1784 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 1759 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
1785 | struct extent_buffer *node; | 1760 | struct extent_buffer *node; |
1786 | node = path->nodes[i]; | 1761 | node = path->nodes[i]; |
1787 | path->slots[i]++; | 1762 | path->slots[i]++; |
@@ -2047,7 +2022,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2047 | mutex_unlock(&log_root_tree->log_mutex); | 2022 | mutex_unlock(&log_root_tree->log_mutex); |
2048 | 2023 | ||
2049 | ret = update_log_root(trans, log); | 2024 | ret = update_log_root(trans, log); |
2050 | BUG_ON(ret); | ||
2051 | 2025 | ||
2052 | mutex_lock(&log_root_tree->log_mutex); | 2026 | mutex_lock(&log_root_tree->log_mutex); |
2053 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { | 2027 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { |
@@ -2056,6 +2030,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2056 | wake_up(&log_root_tree->log_writer_wait); | 2030 | wake_up(&log_root_tree->log_writer_wait); |
2057 | } | 2031 | } |
2058 | 2032 | ||
2033 | if (ret) { | ||
2034 | BUG_ON(ret != -ENOSPC); | ||
2035 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2036 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | ||
2037 | mutex_unlock(&log_root_tree->log_mutex); | ||
2038 | ret = -EAGAIN; | ||
2039 | goto out; | ||
2040 | } | ||
2041 | |||
2059 | index2 = log_root_tree->log_transid % 2; | 2042 | index2 = log_root_tree->log_transid % 2; |
2060 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2043 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2061 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2044 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
@@ -2129,15 +2112,10 @@ out: | |||
2129 | return 0; | 2112 | return 0; |
2130 | } | 2113 | } |
2131 | 2114 | ||
2132 | /* | 2115 | static void free_log_tree(struct btrfs_trans_handle *trans, |
2133 | * free all the extents used by the tree log. This should be called | 2116 | struct btrfs_root *log) |
2134 | * at commit time of the full transaction | ||
2135 | */ | ||
2136 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2137 | { | 2117 | { |
2138 | int ret; | 2118 | int ret; |
2139 | struct btrfs_root *log; | ||
2140 | struct key; | ||
2141 | u64 start; | 2119 | u64 start; |
2142 | u64 end; | 2120 | u64 end; |
2143 | struct walk_control wc = { | 2121 | struct walk_control wc = { |
@@ -2145,10 +2123,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2145 | .process_func = process_one_buffer | 2123 | .process_func = process_one_buffer |
2146 | }; | 2124 | }; |
2147 | 2125 | ||
2148 | if (!root->log_root || root->fs_info->log_root_recovering) | ||
2149 | return 0; | ||
2150 | |||
2151 | log = root->log_root; | ||
2152 | ret = walk_log_tree(trans, log, &wc); | 2126 | ret = walk_log_tree(trans, log, &wc); |
2153 | BUG_ON(ret); | 2127 | BUG_ON(ret); |
2154 | 2128 | ||
@@ -2162,14 +2136,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2162 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2136 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
2163 | } | 2137 | } |
2164 | 2138 | ||
2165 | if (log->log_transid > 0) { | ||
2166 | ret = btrfs_del_root(trans, root->fs_info->log_root_tree, | ||
2167 | &log->root_key); | ||
2168 | BUG_ON(ret); | ||
2169 | } | ||
2170 | root->log_root = NULL; | ||
2171 | free_extent_buffer(log->node); | 2139 | free_extent_buffer(log->node); |
2172 | kfree(log); | 2140 | kfree(log); |
2141 | } | ||
2142 | |||
2143 | /* | ||
2144 | * free all the extents used by the tree log. This should be called | ||
2145 | * at commit time of the full transaction | ||
2146 | */ | ||
2147 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2148 | { | ||
2149 | if (root->log_root) { | ||
2150 | free_log_tree(trans, root->log_root); | ||
2151 | root->log_root = NULL; | ||
2152 | } | ||
2153 | return 0; | ||
2154 | } | ||
2155 | |||
2156 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
2157 | struct btrfs_fs_info *fs_info) | ||
2158 | { | ||
2159 | if (fs_info->log_root_tree) { | ||
2160 | free_log_tree(trans, fs_info->log_root_tree); | ||
2161 | fs_info->log_root_tree = NULL; | ||
2162 | } | ||
2173 | return 0; | 2163 | return 0; |
2174 | } | 2164 | } |
2175 | 2165 | ||
@@ -2203,6 +2193,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2203 | struct btrfs_dir_item *di; | 2193 | struct btrfs_dir_item *di; |
2204 | struct btrfs_path *path; | 2194 | struct btrfs_path *path; |
2205 | int ret; | 2195 | int ret; |
2196 | int err = 0; | ||
2206 | int bytes_del = 0; | 2197 | int bytes_del = 0; |
2207 | 2198 | ||
2208 | if (BTRFS_I(dir)->logged_trans < trans->transid) | 2199 | if (BTRFS_I(dir)->logged_trans < trans->transid) |
@@ -2218,7 +2209,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2218 | path = btrfs_alloc_path(); | 2209 | path = btrfs_alloc_path(); |
2219 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2210 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
2220 | name, name_len, -1); | 2211 | name, name_len, -1); |
2221 | if (di && !IS_ERR(di)) { | 2212 | if (IS_ERR(di)) { |
2213 | err = PTR_ERR(di); | ||
2214 | goto fail; | ||
2215 | } | ||
2216 | if (di) { | ||
2222 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2217 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2223 | bytes_del += name_len; | 2218 | bytes_del += name_len; |
2224 | BUG_ON(ret); | 2219 | BUG_ON(ret); |
@@ -2226,7 +2221,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2226 | btrfs_release_path(log, path); | 2221 | btrfs_release_path(log, path); |
2227 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | 2222 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, |
2228 | index, name, name_len, -1); | 2223 | index, name, name_len, -1); |
2229 | if (di && !IS_ERR(di)) { | 2224 | if (IS_ERR(di)) { |
2225 | err = PTR_ERR(di); | ||
2226 | goto fail; | ||
2227 | } | ||
2228 | if (di) { | ||
2230 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2229 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2231 | bytes_del += name_len; | 2230 | bytes_del += name_len; |
2232 | BUG_ON(ret); | 2231 | BUG_ON(ret); |
@@ -2244,6 +2243,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2244 | btrfs_release_path(log, path); | 2243 | btrfs_release_path(log, path); |
2245 | 2244 | ||
2246 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | 2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); |
2246 | if (ret < 0) { | ||
2247 | err = ret; | ||
2248 | goto fail; | ||
2249 | } | ||
2247 | if (ret == 0) { | 2250 | if (ret == 0) { |
2248 | struct btrfs_inode_item *item; | 2251 | struct btrfs_inode_item *item; |
2249 | u64 i_size; | 2252 | u64 i_size; |
@@ -2261,9 +2264,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2261 | ret = 0; | 2264 | ret = 0; |
2262 | btrfs_release_path(log, path); | 2265 | btrfs_release_path(log, path); |
2263 | } | 2266 | } |
2264 | 2267 | fail: | |
2265 | btrfs_free_path(path); | 2268 | btrfs_free_path(path); |
2266 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2269 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2270 | if (ret == -ENOSPC) { | ||
2271 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2272 | ret = 0; | ||
2273 | } | ||
2267 | btrfs_end_log_trans(root); | 2274 | btrfs_end_log_trans(root); |
2268 | 2275 | ||
2269 | return 0; | 2276 | return 0; |
@@ -2291,6 +2298,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2291 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2298 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
2292 | dirid, &index); | 2299 | dirid, &index); |
2293 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2300 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2301 | if (ret == -ENOSPC) { | ||
2302 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2303 | ret = 0; | ||
2304 | } | ||
2294 | btrfs_end_log_trans(root); | 2305 | btrfs_end_log_trans(root); |
2295 | 2306 | ||
2296 | return ret; | 2307 | return ret; |
@@ -2318,7 +2329,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | |||
2318 | else | 2329 | else |
2319 | key.type = BTRFS_DIR_LOG_INDEX_KEY; | 2330 | key.type = BTRFS_DIR_LOG_INDEX_KEY; |
2320 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); | 2331 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); |
2321 | BUG_ON(ret); | 2332 | if (ret) |
2333 | return ret; | ||
2322 | 2334 | ||
2323 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2335 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
2324 | struct btrfs_dir_log_item); | 2336 | struct btrfs_dir_log_item); |
@@ -2343,6 +2355,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2343 | struct btrfs_key max_key; | 2355 | struct btrfs_key max_key; |
2344 | struct btrfs_root *log = root->log_root; | 2356 | struct btrfs_root *log = root->log_root; |
2345 | struct extent_buffer *src; | 2357 | struct extent_buffer *src; |
2358 | int err = 0; | ||
2346 | int ret; | 2359 | int ret; |
2347 | int i; | 2360 | int i; |
2348 | int nritems; | 2361 | int nritems; |
@@ -2405,6 +2418,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2405 | ret = overwrite_item(trans, log, dst_path, | 2418 | ret = overwrite_item(trans, log, dst_path, |
2406 | path->nodes[0], path->slots[0], | 2419 | path->nodes[0], path->slots[0], |
2407 | &tmp); | 2420 | &tmp); |
2421 | if (ret) { | ||
2422 | err = ret; | ||
2423 | goto done; | ||
2424 | } | ||
2408 | } | 2425 | } |
2409 | } | 2426 | } |
2410 | btrfs_release_path(root, path); | 2427 | btrfs_release_path(root, path); |
@@ -2432,7 +2449,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2432 | goto done; | 2449 | goto done; |
2433 | ret = overwrite_item(trans, log, dst_path, src, i, | 2450 | ret = overwrite_item(trans, log, dst_path, src, i, |
2434 | &min_key); | 2451 | &min_key); |
2435 | BUG_ON(ret); | 2452 | if (ret) { |
2453 | err = ret; | ||
2454 | goto done; | ||
2455 | } | ||
2436 | } | 2456 | } |
2437 | path->slots[0] = nritems; | 2457 | path->slots[0] = nritems; |
2438 | 2458 | ||
@@ -2454,22 +2474,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2454 | ret = overwrite_item(trans, log, dst_path, | 2474 | ret = overwrite_item(trans, log, dst_path, |
2455 | path->nodes[0], path->slots[0], | 2475 | path->nodes[0], path->slots[0], |
2456 | &tmp); | 2476 | &tmp); |
2457 | 2477 | if (ret) | |
2458 | BUG_ON(ret); | 2478 | err = ret; |
2459 | last_offset = tmp.offset; | 2479 | else |
2480 | last_offset = tmp.offset; | ||
2460 | goto done; | 2481 | goto done; |
2461 | } | 2482 | } |
2462 | } | 2483 | } |
2463 | done: | 2484 | done: |
2464 | *last_offset_ret = last_offset; | ||
2465 | btrfs_release_path(root, path); | 2485 | btrfs_release_path(root, path); |
2466 | btrfs_release_path(log, dst_path); | 2486 | btrfs_release_path(log, dst_path); |
2467 | 2487 | ||
2468 | /* insert the log range keys to indicate where the log is valid */ | 2488 | if (err == 0) { |
2469 | ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, | 2489 | *last_offset_ret = last_offset; |
2470 | first_offset, last_offset); | 2490 | /* |
2471 | BUG_ON(ret); | 2491 | * insert the log range keys to indicate where the log |
2472 | return 0; | 2492 | * is valid |
2493 | */ | ||
2494 | ret = insert_dir_log_key(trans, log, path, key_type, | ||
2495 | inode->i_ino, first_offset, | ||
2496 | last_offset); | ||
2497 | if (ret) | ||
2498 | err = ret; | ||
2499 | } | ||
2500 | return err; | ||
2473 | } | 2501 | } |
2474 | 2502 | ||
2475 | /* | 2503 | /* |
@@ -2501,7 +2529,8 @@ again: | |||
2501 | ret = log_dir_items(trans, root, inode, path, | 2529 | ret = log_dir_items(trans, root, inode, path, |
2502 | dst_path, key_type, min_key, | 2530 | dst_path, key_type, min_key, |
2503 | &max_key); | 2531 | &max_key); |
2504 | BUG_ON(ret); | 2532 | if (ret) |
2533 | return ret; | ||
2505 | if (max_key == (u64)-1) | 2534 | if (max_key == (u64)-1) |
2506 | break; | 2535 | break; |
2507 | min_key = max_key + 1; | 2536 | min_key = max_key + 1; |
@@ -2535,8 +2564,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2535 | 2564 | ||
2536 | while (1) { | 2565 | while (1) { |
2537 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); | 2566 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); |
2538 | 2567 | BUG_ON(ret == 0); | |
2539 | if (ret != 1) | 2568 | if (ret < 0) |
2540 | break; | 2569 | break; |
2541 | 2570 | ||
2542 | if (path->slots[0] == 0) | 2571 | if (path->slots[0] == 0) |
@@ -2554,7 +2583,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2554 | btrfs_release_path(log, path); | 2583 | btrfs_release_path(log, path); |
2555 | } | 2584 | } |
2556 | btrfs_release_path(log, path); | 2585 | btrfs_release_path(log, path); |
2557 | return 0; | 2586 | return ret; |
2558 | } | 2587 | } |
2559 | 2588 | ||
2560 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2589 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
@@ -2587,7 +2616,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2587 | } | 2616 | } |
2588 | ret = btrfs_insert_empty_items(trans, log, dst_path, | 2617 | ret = btrfs_insert_empty_items(trans, log, dst_path, |
2589 | ins_keys, ins_sizes, nr); | 2618 | ins_keys, ins_sizes, nr); |
2590 | BUG_ON(ret); | 2619 | if (ret) { |
2620 | kfree(ins_data); | ||
2621 | return ret; | ||
2622 | } | ||
2591 | 2623 | ||
2592 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { | 2624 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { |
2593 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | 2625 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], |
@@ -2660,16 +2692,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2660 | * we have to do this after the loop above to avoid changing the | 2692 | * we have to do this after the loop above to avoid changing the |
2661 | * log tree while trying to change the log tree. | 2693 | * log tree while trying to change the log tree. |
2662 | */ | 2694 | */ |
2695 | ret = 0; | ||
2663 | while (!list_empty(&ordered_sums)) { | 2696 | while (!list_empty(&ordered_sums)) { |
2664 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, | 2697 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, |
2665 | struct btrfs_ordered_sum, | 2698 | struct btrfs_ordered_sum, |
2666 | list); | 2699 | list); |
2667 | ret = btrfs_csum_file_blocks(trans, log, sums); | 2700 | if (!ret) |
2668 | BUG_ON(ret); | 2701 | ret = btrfs_csum_file_blocks(trans, log, sums); |
2669 | list_del(&sums->list); | 2702 | list_del(&sums->list); |
2670 | kfree(sums); | 2703 | kfree(sums); |
2671 | } | 2704 | } |
2672 | return 0; | 2705 | return ret; |
2673 | } | 2706 | } |
2674 | 2707 | ||
2675 | /* log a single inode in the tree log. | 2708 | /* log a single inode in the tree log. |
@@ -2697,6 +2730,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2697 | struct btrfs_root *log = root->log_root; | 2730 | struct btrfs_root *log = root->log_root; |
2698 | struct extent_buffer *src = NULL; | 2731 | struct extent_buffer *src = NULL; |
2699 | u32 size; | 2732 | u32 size; |
2733 | int err = 0; | ||
2700 | int ret; | 2734 | int ret; |
2701 | int nritems; | 2735 | int nritems; |
2702 | int ins_start_slot = 0; | 2736 | int ins_start_slot = 0; |
@@ -2739,7 +2773,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2739 | } else { | 2773 | } else { |
2740 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 2774 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); |
2741 | } | 2775 | } |
2742 | BUG_ON(ret); | 2776 | if (ret) { |
2777 | err = ret; | ||
2778 | goto out_unlock; | ||
2779 | } | ||
2743 | path->keep_locks = 1; | 2780 | path->keep_locks = 1; |
2744 | 2781 | ||
2745 | while (1) { | 2782 | while (1) { |
@@ -2768,7 +2805,10 @@ again: | |||
2768 | 2805 | ||
2769 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 2806 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, |
2770 | ins_nr, inode_only); | 2807 | ins_nr, inode_only); |
2771 | BUG_ON(ret); | 2808 | if (ret) { |
2809 | err = ret; | ||
2810 | goto out_unlock; | ||
2811 | } | ||
2772 | ins_nr = 1; | 2812 | ins_nr = 1; |
2773 | ins_start_slot = path->slots[0]; | 2813 | ins_start_slot = path->slots[0]; |
2774 | next_slot: | 2814 | next_slot: |
@@ -2784,7 +2824,10 @@ next_slot: | |||
2784 | ret = copy_items(trans, log, dst_path, src, | 2824 | ret = copy_items(trans, log, dst_path, src, |
2785 | ins_start_slot, | 2825 | ins_start_slot, |
2786 | ins_nr, inode_only); | 2826 | ins_nr, inode_only); |
2787 | BUG_ON(ret); | 2827 | if (ret) { |
2828 | err = ret; | ||
2829 | goto out_unlock; | ||
2830 | } | ||
2788 | ins_nr = 0; | 2831 | ins_nr = 0; |
2789 | } | 2832 | } |
2790 | btrfs_release_path(root, path); | 2833 | btrfs_release_path(root, path); |
@@ -2802,7 +2845,10 @@ next_slot: | |||
2802 | ret = copy_items(trans, log, dst_path, src, | 2845 | ret = copy_items(trans, log, dst_path, src, |
2803 | ins_start_slot, | 2846 | ins_start_slot, |
2804 | ins_nr, inode_only); | 2847 | ins_nr, inode_only); |
2805 | BUG_ON(ret); | 2848 | if (ret) { |
2849 | err = ret; | ||
2850 | goto out_unlock; | ||
2851 | } | ||
2806 | ins_nr = 0; | 2852 | ins_nr = 0; |
2807 | } | 2853 | } |
2808 | WARN_ON(ins_nr); | 2854 | WARN_ON(ins_nr); |
@@ -2810,14 +2856,18 @@ next_slot: | |||
2810 | btrfs_release_path(root, path); | 2856 | btrfs_release_path(root, path); |
2811 | btrfs_release_path(log, dst_path); | 2857 | btrfs_release_path(log, dst_path); |
2812 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2858 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2813 | BUG_ON(ret); | 2859 | if (ret) { |
2860 | err = ret; | ||
2861 | goto out_unlock; | ||
2862 | } | ||
2814 | } | 2863 | } |
2815 | BTRFS_I(inode)->logged_trans = trans->transid; | 2864 | BTRFS_I(inode)->logged_trans = trans->transid; |
2865 | out_unlock: | ||
2816 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2866 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2817 | 2867 | ||
2818 | btrfs_free_path(path); | 2868 | btrfs_free_path(path); |
2819 | btrfs_free_path(dst_path); | 2869 | btrfs_free_path(dst_path); |
2820 | return 0; | 2870 | return err; |
2821 | } | 2871 | } |
2822 | 2872 | ||
2823 | /* | 2873 | /* |
@@ -2942,10 +2992,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2942 | goto end_no_trans; | 2992 | goto end_no_trans; |
2943 | } | 2993 | } |
2944 | 2994 | ||
2945 | start_log_trans(trans, root); | 2995 | ret = start_log_trans(trans, root); |
2996 | if (ret) | ||
2997 | goto end_trans; | ||
2946 | 2998 | ||
2947 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2999 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2948 | BUG_ON(ret); | 3000 | if (ret) |
3001 | goto end_trans; | ||
2949 | 3002 | ||
2950 | /* | 3003 | /* |
2951 | * for regular files, if its inode is already on disk, we don't | 3004 | * for regular files, if its inode is already on disk, we don't |
@@ -2955,8 +3008,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2955 | */ | 3008 | */ |
2956 | if (S_ISREG(inode->i_mode) && | 3009 | if (S_ISREG(inode->i_mode) && |
2957 | BTRFS_I(inode)->generation <= last_committed && | 3010 | BTRFS_I(inode)->generation <= last_committed && |
2958 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | 3011 | BTRFS_I(inode)->last_unlink_trans <= last_committed) { |
2959 | goto no_parent; | 3012 | ret = 0; |
3013 | goto end_trans; | ||
3014 | } | ||
2960 | 3015 | ||
2961 | inode_only = LOG_INODE_EXISTS; | 3016 | inode_only = LOG_INODE_EXISTS; |
2962 | while (1) { | 3017 | while (1) { |
@@ -2970,15 +3025,21 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2970 | if (BTRFS_I(inode)->generation > | 3025 | if (BTRFS_I(inode)->generation > |
2971 | root->fs_info->last_trans_committed) { | 3026 | root->fs_info->last_trans_committed) { |
2972 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 3027 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2973 | BUG_ON(ret); | 3028 | if (ret) |
3029 | goto end_trans; | ||
2974 | } | 3030 | } |
2975 | if (IS_ROOT(parent)) | 3031 | if (IS_ROOT(parent)) |
2976 | break; | 3032 | break; |
2977 | 3033 | ||
2978 | parent = parent->d_parent; | 3034 | parent = parent->d_parent; |
2979 | } | 3035 | } |
2980 | no_parent: | ||
2981 | ret = 0; | 3036 | ret = 0; |
3037 | end_trans: | ||
3038 | if (ret < 0) { | ||
3039 | BUG_ON(ret != -ENOSPC); | ||
3040 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
3041 | ret = 1; | ||
3042 | } | ||
2982 | btrfs_end_log_trans(root); | 3043 | btrfs_end_log_trans(root); |
2983 | end_no_trans: | 3044 | end_no_trans: |
2984 | return ret; | 3045 | return ret; |
@@ -3020,7 +3081,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3020 | path = btrfs_alloc_path(); | 3081 | path = btrfs_alloc_path(); |
3021 | BUG_ON(!path); | 3082 | BUG_ON(!path); |
3022 | 3083 | ||
3023 | trans = btrfs_start_transaction(fs_info->tree_root, 1); | 3084 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3024 | 3085 | ||
3025 | wc.trans = trans; | 3086 | wc.trans = trans; |
3026 | wc.pin = 1; | 3087 | wc.pin = 1; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 0776eacb5083..3dfae84c8cc8 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -25,6 +25,8 @@ | |||
25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
26 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
29 | struct btrfs_fs_info *fs_info); | ||
28 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
29 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
30 | struct btrfs_root *root, struct dentry *dentry); | 32 | struct btrfs_root *root, struct dentry *dentry); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8db7b14bbae8..d6e3af8be95b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1097,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1097 | if (!path) | 1097 | if (!path) |
1098 | return -ENOMEM; | 1098 | return -ENOMEM; |
1099 | 1099 | ||
1100 | trans = btrfs_start_transaction(root, 1); | 1100 | trans = btrfs_start_transaction(root, 0); |
1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1102 | key.type = BTRFS_DEV_ITEM_KEY; | 1102 | key.type = BTRFS_DEV_ITEM_KEY; |
1103 | key.offset = device->devid; | 1103 | key.offset = device->devid; |
@@ -1486,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1486 | goto error; | 1486 | goto error; |
1487 | } | 1487 | } |
1488 | 1488 | ||
1489 | trans = btrfs_start_transaction(root, 1); | 1489 | trans = btrfs_start_transaction(root, 0); |
1490 | lock_chunks(root); | 1490 | lock_chunks(root); |
1491 | 1491 | ||
1492 | device->barriers = 1; | 1492 | device->barriers = 1; |
@@ -1751,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1751 | 1751 | ||
1752 | /* step one, relocate all the extents inside this chunk */ | 1752 | /* step one, relocate all the extents inside this chunk */ |
1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
1754 | BUG_ON(ret); | 1754 | if (ret) |
1755 | return ret; | ||
1755 | 1756 | ||
1756 | trans = btrfs_start_transaction(root, 1); | 1757 | trans = btrfs_start_transaction(root, 0); |
1757 | BUG_ON(!trans); | 1758 | BUG_ON(!trans); |
1758 | 1759 | ||
1759 | lock_chunks(root); | 1760 | lock_chunks(root); |
@@ -1925,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1925 | break; | 1926 | break; |
1926 | BUG_ON(ret); | 1927 | BUG_ON(ret); |
1927 | 1928 | ||
1928 | trans = btrfs_start_transaction(dev_root, 1); | 1929 | trans = btrfs_start_transaction(dev_root, 0); |
1929 | BUG_ON(!trans); | 1930 | BUG_ON(!trans); |
1930 | 1931 | ||
1931 | ret = btrfs_grow_device(trans, device, old_size); | 1932 | ret = btrfs_grow_device(trans, device, old_size); |
@@ -2094,11 +2095,7 @@ again: | |||
2094 | } | 2095 | } |
2095 | 2096 | ||
2096 | /* Shrinking succeeded, else we would be at "done". */ | 2097 | /* Shrinking succeeded, else we would be at "done". */ |
2097 | trans = btrfs_start_transaction(root, 1); | 2098 | trans = btrfs_start_transaction(root, 0); |
2098 | if (!trans) { | ||
2099 | ret = -ENOMEM; | ||
2100 | goto done; | ||
2101 | } | ||
2102 | lock_chunks(root); | 2099 | lock_chunks(root); |
2103 | 2100 | ||
2104 | device->disk_total_bytes = new_size; | 2101 | device->disk_total_bytes = new_size; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 59acd3eb288a..88ecbb215878 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
154 | if (trans) | 154 | if (trans) |
155 | return do_setxattr(trans, inode, name, value, size, flags); | 155 | return do_setxattr(trans, inode, name, value, size, flags); |
156 | 156 | ||
157 | ret = btrfs_reserve_metadata_space(root, 2); | 157 | trans = btrfs_start_transaction(root, 2); |
158 | if (ret) | 158 | if (IS_ERR(trans)) |
159 | return ret; | 159 | return PTR_ERR(trans); |
160 | 160 | ||
161 | trans = btrfs_start_transaction(root, 1); | ||
162 | if (!trans) { | ||
163 | ret = -ENOMEM; | ||
164 | goto out; | ||
165 | } | ||
166 | btrfs_set_trans_block_group(trans, inode); | 161 | btrfs_set_trans_block_group(trans, inode); |
167 | 162 | ||
168 | ret = do_setxattr(trans, inode, name, value, size, flags); | 163 | ret = do_setxattr(trans, inode, name, value, size, flags); |
@@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
174 | BUG_ON(ret); | 169 | BUG_ON(ret); |
175 | out: | 170 | out: |
176 | btrfs_end_transaction_throttle(trans, root); | 171 | btrfs_end_transaction_throttle(trans, root); |
177 | btrfs_unreserve_metadata_space(root, 2); | ||
178 | return ret; | 172 | return ret; |
179 | } | 173 | } |
180 | 174 | ||
diff --git a/fs/compat.c b/fs/compat.c index 05448730f840..f0b391c50552 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -568,6 +568,79 @@ out: | |||
568 | return ret; | 568 | return ret; |
569 | } | 569 | } |
570 | 570 | ||
571 | /* A write operation does a read from user space and vice versa */ | ||
572 | #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) | ||
573 | |||
574 | ssize_t compat_rw_copy_check_uvector(int type, | ||
575 | const struct compat_iovec __user *uvector, unsigned long nr_segs, | ||
576 | unsigned long fast_segs, struct iovec *fast_pointer, | ||
577 | struct iovec **ret_pointer) | ||
578 | { | ||
579 | compat_ssize_t tot_len; | ||
580 | struct iovec *iov = *ret_pointer = fast_pointer; | ||
581 | ssize_t ret = 0; | ||
582 | int seg; | ||
583 | |||
584 | /* | ||
585 | * SuS says "The readv() function *may* fail if the iovcnt argument | ||
586 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has | ||
587 | * traditionally returned zero for zero segments, so... | ||
588 | */ | ||
589 | if (nr_segs == 0) | ||
590 | goto out; | ||
591 | |||
592 | ret = -EINVAL; | ||
593 | if (nr_segs > UIO_MAXIOV || nr_segs < 0) | ||
594 | goto out; | ||
595 | if (nr_segs > fast_segs) { | ||
596 | ret = -ENOMEM; | ||
597 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | ||
598 | if (iov == NULL) { | ||
599 | *ret_pointer = fast_pointer; | ||
600 | goto out; | ||
601 | } | ||
602 | } | ||
603 | *ret_pointer = iov; | ||
604 | |||
605 | /* | ||
606 | * Single unix specification: | ||
607 | * We should -EINVAL if an element length is not >= 0 and fitting an | ||
608 | * ssize_t. The total length is fitting an ssize_t | ||
609 | * | ||
610 | * Be careful here because iov_len is a size_t not an ssize_t | ||
611 | */ | ||
612 | tot_len = 0; | ||
613 | ret = -EINVAL; | ||
614 | for (seg = 0; seg < nr_segs; seg++) { | ||
615 | compat_ssize_t tmp = tot_len; | ||
616 | compat_uptr_t buf; | ||
617 | compat_ssize_t len; | ||
618 | |||
619 | if (__get_user(len, &uvector->iov_len) || | ||
620 | __get_user(buf, &uvector->iov_base)) { | ||
621 | ret = -EFAULT; | ||
622 | goto out; | ||
623 | } | ||
624 | if (len < 0) /* size_t not fitting in compat_ssize_t .. */ | ||
625 | goto out; | ||
626 | tot_len += len; | ||
627 | if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ | ||
628 | goto out; | ||
629 | if (!access_ok(vrfy_dir(type), buf, len)) { | ||
630 | ret = -EFAULT; | ||
631 | goto out; | ||
632 | } | ||
633 | iov->iov_base = compat_ptr(buf); | ||
634 | iov->iov_len = (compat_size_t) len; | ||
635 | uvector++; | ||
636 | iov++; | ||
637 | } | ||
638 | ret = tot_len; | ||
639 | |||
640 | out: | ||
641 | return ret; | ||
642 | } | ||
643 | |||
571 | static inline long | 644 | static inline long |
572 | copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) | 645 | copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) |
573 | { | 646 | { |
@@ -600,7 +673,7 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) | |||
600 | iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); | 673 | iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64)); |
601 | ret = copy_iocb(nr, iocb, iocb64); | 674 | ret = copy_iocb(nr, iocb, iocb64); |
602 | if (!ret) | 675 | if (!ret) |
603 | ret = sys_io_submit(ctx_id, nr, iocb64); | 676 | ret = do_io_submit(ctx_id, nr, iocb64, 1); |
604 | return ret; | 677 | return ret; |
605 | } | 678 | } |
606 | 679 | ||
@@ -1077,70 +1150,21 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1077 | { | 1150 | { |
1078 | compat_ssize_t tot_len; | 1151 | compat_ssize_t tot_len; |
1079 | struct iovec iovstack[UIO_FASTIOV]; | 1152 | struct iovec iovstack[UIO_FASTIOV]; |
1080 | struct iovec *iov=iovstack, *vector; | 1153 | struct iovec *iov; |
1081 | ssize_t ret; | 1154 | ssize_t ret; |
1082 | int seg; | ||
1083 | io_fn_t fn; | 1155 | io_fn_t fn; |
1084 | iov_fn_t fnv; | 1156 | iov_fn_t fnv; |
1085 | 1157 | ||
1086 | /* | ||
1087 | * SuS says "The readv() function *may* fail if the iovcnt argument | ||
1088 | * was less than or equal to 0, or greater than {IOV_MAX}. Linux has | ||
1089 | * traditionally returned zero for zero segments, so... | ||
1090 | */ | ||
1091 | ret = 0; | ||
1092 | if (nr_segs == 0) | ||
1093 | goto out; | ||
1094 | |||
1095 | /* | ||
1096 | * First get the "struct iovec" from user memory and | ||
1097 | * verify all the pointers | ||
1098 | */ | ||
1099 | ret = -EINVAL; | 1158 | ret = -EINVAL; |
1100 | if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) | ||
1101 | goto out; | ||
1102 | if (!file->f_op) | 1159 | if (!file->f_op) |
1103 | goto out; | 1160 | goto out; |
1104 | if (nr_segs > UIO_FASTIOV) { | 1161 | |
1105 | ret = -ENOMEM; | ||
1106 | iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); | ||
1107 | if (!iov) | ||
1108 | goto out; | ||
1109 | } | ||
1110 | ret = -EFAULT; | 1162 | ret = -EFAULT; |
1111 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) | 1163 | if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) |
1112 | goto out; | 1164 | goto out; |
1113 | 1165 | ||
1114 | /* | 1166 | tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, |
1115 | * Single unix specification: | 1167 | UIO_FASTIOV, iovstack, &iov); |
1116 | * We should -EINVAL if an element length is not >= 0 and fitting an | ||
1117 | * ssize_t. The total length is fitting an ssize_t | ||
1118 | * | ||
1119 | * Be careful here because iov_len is a size_t not an ssize_t | ||
1120 | */ | ||
1121 | tot_len = 0; | ||
1122 | vector = iov; | ||
1123 | ret = -EINVAL; | ||
1124 | for (seg = 0 ; seg < nr_segs; seg++) { | ||
1125 | compat_ssize_t tmp = tot_len; | ||
1126 | compat_ssize_t len; | ||
1127 | compat_uptr_t buf; | ||
1128 | |||
1129 | if (__get_user(len, &uvector->iov_len) || | ||
1130 | __get_user(buf, &uvector->iov_base)) { | ||
1131 | ret = -EFAULT; | ||
1132 | goto out; | ||
1133 | } | ||
1134 | if (len < 0) /* size_t not fitting an compat_ssize_t .. */ | ||
1135 | goto out; | ||
1136 | tot_len += len; | ||
1137 | if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ | ||
1138 | goto out; | ||
1139 | vector->iov_base = compat_ptr(buf); | ||
1140 | vector->iov_len = (compat_size_t) len; | ||
1141 | uvector++; | ||
1142 | vector++; | ||
1143 | } | ||
1144 | if (tot_len == 0) { | 1168 | if (tot_len == 0) { |
1145 | ret = 0; | 1169 | ret = 0; |
1146 | goto out; | 1170 | goto out; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index e82adc2debb7..da111aacb46e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -82,6 +82,8 @@ struct dio { | |||
82 | int reap_counter; /* rate limit reaping */ | 82 | int reap_counter; /* rate limit reaping */ |
83 | get_block_t *get_block; /* block mapping function */ | 83 | get_block_t *get_block; /* block mapping function */ |
84 | dio_iodone_t *end_io; /* IO completion function */ | 84 | dio_iodone_t *end_io; /* IO completion function */ |
85 | dio_submit_t *submit_io; /* IO submition function */ | ||
86 | loff_t logical_offset_in_bio; /* current first logical block in bio */ | ||
85 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 87 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
86 | sector_t next_block_for_io; /* next block to be put under IO, | 88 | sector_t next_block_for_io; /* next block to be put under IO, |
87 | in dio_blocks units */ | 89 | in dio_blocks units */ |
@@ -96,6 +98,7 @@ struct dio { | |||
96 | unsigned cur_page_offset; /* Offset into it, in bytes */ | 98 | unsigned cur_page_offset; /* Offset into it, in bytes */ |
97 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ | 99 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ |
98 | sector_t cur_page_block; /* Where it starts */ | 100 | sector_t cur_page_block; /* Where it starts */ |
101 | loff_t cur_page_fs_offset; /* Offset in file */ | ||
99 | 102 | ||
100 | /* BIO completion state */ | 103 | /* BIO completion state */ |
101 | spinlock_t bio_lock; /* protects BIO fields below */ | 104 | spinlock_t bio_lock; /* protects BIO fields below */ |
@@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error) | |||
300 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 303 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
301 | } | 304 | } |
302 | 305 | ||
306 | /** | ||
307 | * dio_end_io - handle the end io action for the given bio | ||
308 | * @bio: The direct io bio thats being completed | ||
309 | * @error: Error if there was one | ||
310 | * | ||
311 | * This is meant to be called by any filesystem that uses their own dio_submit_t | ||
312 | * so that the DIO specific endio actions are dealt with after the filesystem | ||
313 | * has done it's completion work. | ||
314 | */ | ||
315 | void dio_end_io(struct bio *bio, int error) | ||
316 | { | ||
317 | struct dio *dio = bio->bi_private; | ||
318 | |||
319 | if (dio->is_async) | ||
320 | dio_bio_end_aio(bio, error); | ||
321 | else | ||
322 | dio_bio_end_io(bio, error); | ||
323 | } | ||
324 | EXPORT_SYMBOL_GPL(dio_end_io); | ||
325 | |||
303 | static int | 326 | static int |
304 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 327 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, |
305 | sector_t first_sector, int nr_vecs) | 328 | sector_t first_sector, int nr_vecs) |
@@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
316 | bio->bi_end_io = dio_bio_end_io; | 339 | bio->bi_end_io = dio_bio_end_io; |
317 | 340 | ||
318 | dio->bio = bio; | 341 | dio->bio = bio; |
342 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | ||
319 | return 0; | 343 | return 0; |
320 | } | 344 | } |
321 | 345 | ||
@@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio) | |||
340 | if (dio->is_async && dio->rw == READ) | 364 | if (dio->is_async && dio->rw == READ) |
341 | bio_set_pages_dirty(bio); | 365 | bio_set_pages_dirty(bio); |
342 | 366 | ||
343 | submit_bio(dio->rw, bio); | 367 | if (dio->submit_io) |
368 | dio->submit_io(dio->rw, bio, dio->inode, | ||
369 | dio->logical_offset_in_bio); | ||
370 | else | ||
371 | submit_bio(dio->rw, bio); | ||
344 | 372 | ||
345 | dio->bio = NULL; | 373 | dio->bio = NULL; |
346 | dio->boundary = 0; | 374 | dio->boundary = 0; |
375 | dio->logical_offset_in_bio = 0; | ||
347 | } | 376 | } |
348 | 377 | ||
349 | /* | 378 | /* |
@@ -603,10 +632,26 @@ static int dio_send_cur_page(struct dio *dio) | |||
603 | int ret = 0; | 632 | int ret = 0; |
604 | 633 | ||
605 | if (dio->bio) { | 634 | if (dio->bio) { |
635 | loff_t cur_offset = dio->block_in_file << dio->blkbits; | ||
636 | loff_t bio_next_offset = dio->logical_offset_in_bio + | ||
637 | dio->bio->bi_size; | ||
638 | |||
606 | /* | 639 | /* |
607 | * See whether this new request is contiguous with the old | 640 | * See whether this new request is contiguous with the old. |
641 | * | ||
642 | * Btrfs cannot handl having logically non-contiguous requests | ||
643 | * submitted. For exmple if you have | ||
644 | * | ||
645 | * Logical: [0-4095][HOLE][8192-12287] | ||
646 | * Phyiscal: [0-4095] [4096-8181] | ||
647 | * | ||
648 | * We cannot submit those pages together as one BIO. So if our | ||
649 | * current logical offset in the file does not equal what would | ||
650 | * be the next logical offset in the bio, submit the bio we | ||
651 | * have. | ||
608 | */ | 652 | */ |
609 | if (dio->final_block_in_bio != dio->cur_page_block) | 653 | if (dio->final_block_in_bio != dio->cur_page_block || |
654 | cur_offset != bio_next_offset) | ||
610 | dio_bio_submit(dio); | 655 | dio_bio_submit(dio); |
611 | /* | 656 | /* |
612 | * Submit now if the underlying fs is about to perform a | 657 | * Submit now if the underlying fs is about to perform a |
@@ -701,6 +746,7 @@ submit_page_section(struct dio *dio, struct page *page, | |||
701 | dio->cur_page_offset = offset; | 746 | dio->cur_page_offset = offset; |
702 | dio->cur_page_len = len; | 747 | dio->cur_page_len = len; |
703 | dio->cur_page_block = blocknr; | 748 | dio->cur_page_block = blocknr; |
749 | dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; | ||
704 | out: | 750 | out: |
705 | return ret; | 751 | return ret; |
706 | } | 752 | } |
@@ -935,7 +981,7 @@ static ssize_t | |||
935 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 981 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
936 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 982 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
937 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | 983 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
938 | struct dio *dio) | 984 | dio_submit_t submit_io, struct dio *dio) |
939 | { | 985 | { |
940 | unsigned long user_addr; | 986 | unsigned long user_addr; |
941 | unsigned long flags; | 987 | unsigned long flags; |
@@ -952,6 +998,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
952 | 998 | ||
953 | dio->get_block = get_block; | 999 | dio->get_block = get_block; |
954 | dio->end_io = end_io; | 1000 | dio->end_io = end_io; |
1001 | dio->submit_io = submit_io; | ||
955 | dio->final_block_in_bio = -1; | 1002 | dio->final_block_in_bio = -1; |
956 | dio->next_block_for_io = -1; | 1003 | dio->next_block_for_io = -1; |
957 | 1004 | ||
@@ -1008,7 +1055,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1008 | } | 1055 | } |
1009 | } /* end iovec loop */ | 1056 | } /* end iovec loop */ |
1010 | 1057 | ||
1011 | if (ret == -ENOTBLK && (rw & WRITE)) { | 1058 | if (ret == -ENOTBLK) { |
1012 | /* | 1059 | /* |
1013 | * The remaining part of the request will be | 1060 | * The remaining part of the request will be |
1014 | * be handled by buffered I/O when we return | 1061 | * be handled by buffered I/O when we return |
@@ -1110,7 +1157,7 @@ ssize_t | |||
1110 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1157 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1111 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1158 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1112 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1159 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1113 | int flags) | 1160 | dio_submit_t submit_io, int flags) |
1114 | { | 1161 | { |
1115 | int seg; | 1162 | int seg; |
1116 | size_t size; | 1163 | size_t size; |
@@ -1197,7 +1244,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1197 | (end > i_size_read(inode))); | 1244 | (end > i_size_read(inode))); |
1198 | 1245 | ||
1199 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1246 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
1200 | nr_segs, blkbits, get_block, end_io, dio); | 1247 | nr_segs, blkbits, get_block, end_io, |
1248 | submit_io, dio); | ||
1201 | 1249 | ||
1202 | /* | 1250 | /* |
1203 | * In case of error extending write may have instantiated a few | 1251 | * In case of error extending write may have instantiated a few |
@@ -768,7 +768,6 @@ static int de_thread(struct task_struct *tsk) | |||
768 | struct signal_struct *sig = tsk->signal; | 768 | struct signal_struct *sig = tsk->signal; |
769 | struct sighand_struct *oldsighand = tsk->sighand; | 769 | struct sighand_struct *oldsighand = tsk->sighand; |
770 | spinlock_t *lock = &oldsighand->siglock; | 770 | spinlock_t *lock = &oldsighand->siglock; |
771 | int count; | ||
772 | 771 | ||
773 | if (thread_group_empty(tsk)) | 772 | if (thread_group_empty(tsk)) |
774 | goto no_thread_group; | 773 | goto no_thread_group; |
@@ -785,13 +784,13 @@ static int de_thread(struct task_struct *tsk) | |||
785 | spin_unlock_irq(lock); | 784 | spin_unlock_irq(lock); |
786 | return -EAGAIN; | 785 | return -EAGAIN; |
787 | } | 786 | } |
787 | |||
788 | sig->group_exit_task = tsk; | 788 | sig->group_exit_task = tsk; |
789 | zap_other_threads(tsk); | 789 | sig->notify_count = zap_other_threads(tsk); |
790 | if (!thread_group_leader(tsk)) | ||
791 | sig->notify_count--; | ||
790 | 792 | ||
791 | /* Account for the thread group leader hanging around: */ | 793 | while (sig->notify_count) { |
792 | count = thread_group_leader(tsk) ? 1 : 2; | ||
793 | sig->notify_count = count; | ||
794 | while (atomic_read(&sig->count) > count) { | ||
795 | __set_current_state(TASK_UNINTERRUPTIBLE); | 794 | __set_current_state(TASK_UNINTERRUPTIBLE); |
796 | spin_unlock_irq(lock); | 795 | spin_unlock_irq(lock); |
797 | schedule(); | 796 | schedule(); |
@@ -1662,12 +1661,15 @@ static int coredump_wait(int exit_code, struct core_state *core_state) | |||
1662 | struct task_struct *tsk = current; | 1661 | struct task_struct *tsk = current; |
1663 | struct mm_struct *mm = tsk->mm; | 1662 | struct mm_struct *mm = tsk->mm; |
1664 | struct completion *vfork_done; | 1663 | struct completion *vfork_done; |
1665 | int core_waiters; | 1664 | int core_waiters = -EBUSY; |
1666 | 1665 | ||
1667 | init_completion(&core_state->startup); | 1666 | init_completion(&core_state->startup); |
1668 | core_state->dumper.task = tsk; | 1667 | core_state->dumper.task = tsk; |
1669 | core_state->dumper.next = NULL; | 1668 | core_state->dumper.next = NULL; |
1670 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | 1669 | |
1670 | down_write(&mm->mmap_sem); | ||
1671 | if (!mm->core_state) | ||
1672 | core_waiters = zap_threads(tsk, mm, core_state, exit_code); | ||
1671 | up_write(&mm->mmap_sem); | 1673 | up_write(&mm->mmap_sem); |
1672 | 1674 | ||
1673 | if (unlikely(core_waiters < 0)) | 1675 | if (unlikely(core_waiters < 0)) |
@@ -1787,21 +1789,61 @@ static void wait_for_dump_helpers(struct file *file) | |||
1787 | } | 1789 | } |
1788 | 1790 | ||
1789 | 1791 | ||
1792 | /* | ||
1793 | * uhm_pipe_setup | ||
1794 | * helper function to customize the process used | ||
1795 | * to collect the core in userspace. Specifically | ||
1796 | * it sets up a pipe and installs it as fd 0 (stdin) | ||
1797 | * for the process. Returns 0 on success, or | ||
1798 | * PTR_ERR on failure. | ||
1799 | * Note that it also sets the core limit to 1. This | ||
1800 | * is a special value that we use to trap recursive | ||
1801 | * core dumps | ||
1802 | */ | ||
1803 | static int umh_pipe_setup(struct subprocess_info *info) | ||
1804 | { | ||
1805 | struct file *rp, *wp; | ||
1806 | struct fdtable *fdt; | ||
1807 | struct coredump_params *cp = (struct coredump_params *)info->data; | ||
1808 | struct files_struct *cf = current->files; | ||
1809 | |||
1810 | wp = create_write_pipe(0); | ||
1811 | if (IS_ERR(wp)) | ||
1812 | return PTR_ERR(wp); | ||
1813 | |||
1814 | rp = create_read_pipe(wp, 0); | ||
1815 | if (IS_ERR(rp)) { | ||
1816 | free_write_pipe(wp); | ||
1817 | return PTR_ERR(rp); | ||
1818 | } | ||
1819 | |||
1820 | cp->file = wp; | ||
1821 | |||
1822 | sys_close(0); | ||
1823 | fd_install(0, rp); | ||
1824 | spin_lock(&cf->file_lock); | ||
1825 | fdt = files_fdtable(cf); | ||
1826 | FD_SET(0, fdt->open_fds); | ||
1827 | FD_CLR(0, fdt->close_on_exec); | ||
1828 | spin_unlock(&cf->file_lock); | ||
1829 | |||
1830 | /* and disallow core files too */ | ||
1831 | current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1}; | ||
1832 | |||
1833 | return 0; | ||
1834 | } | ||
1835 | |||
1790 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | 1836 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) |
1791 | { | 1837 | { |
1792 | struct core_state core_state; | 1838 | struct core_state core_state; |
1793 | char corename[CORENAME_MAX_SIZE + 1]; | 1839 | char corename[CORENAME_MAX_SIZE + 1]; |
1794 | struct mm_struct *mm = current->mm; | 1840 | struct mm_struct *mm = current->mm; |
1795 | struct linux_binfmt * binfmt; | 1841 | struct linux_binfmt * binfmt; |
1796 | struct inode * inode; | ||
1797 | const struct cred *old_cred; | 1842 | const struct cred *old_cred; |
1798 | struct cred *cred; | 1843 | struct cred *cred; |
1799 | int retval = 0; | 1844 | int retval = 0; |
1800 | int flag = 0; | 1845 | int flag = 0; |
1801 | int ispipe = 0; | 1846 | int ispipe; |
1802 | char **helper_argv = NULL; | ||
1803 | int helper_argc = 0; | ||
1804 | int dump_count = 0; | ||
1805 | static atomic_t core_dump_count = ATOMIC_INIT(0); | 1847 | static atomic_t core_dump_count = ATOMIC_INIT(0); |
1806 | struct coredump_params cprm = { | 1848 | struct coredump_params cprm = { |
1807 | .signr = signr, | 1849 | .signr = signr, |
@@ -1820,23 +1862,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1820 | binfmt = mm->binfmt; | 1862 | binfmt = mm->binfmt; |
1821 | if (!binfmt || !binfmt->core_dump) | 1863 | if (!binfmt || !binfmt->core_dump) |
1822 | goto fail; | 1864 | goto fail; |
1823 | 1865 | if (!__get_dumpable(cprm.mm_flags)) | |
1824 | cred = prepare_creds(); | ||
1825 | if (!cred) { | ||
1826 | retval = -ENOMEM; | ||
1827 | goto fail; | 1866 | goto fail; |
1828 | } | ||
1829 | 1867 | ||
1830 | down_write(&mm->mmap_sem); | 1868 | cred = prepare_creds(); |
1831 | /* | 1869 | if (!cred) |
1832 | * If another thread got here first, or we are not dumpable, bail out. | ||
1833 | */ | ||
1834 | if (mm->core_state || !__get_dumpable(cprm.mm_flags)) { | ||
1835 | up_write(&mm->mmap_sem); | ||
1836 | put_cred(cred); | ||
1837 | goto fail; | 1870 | goto fail; |
1838 | } | ||
1839 | |||
1840 | /* | 1871 | /* |
1841 | * We cannot trust fsuid as being the "true" uid of the | 1872 | * We cannot trust fsuid as being the "true" uid of the |
1842 | * process nor do we know its entire history. We only know it | 1873 | * process nor do we know its entire history. We only know it |
@@ -1849,10 +1880,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1849 | } | 1880 | } |
1850 | 1881 | ||
1851 | retval = coredump_wait(exit_code, &core_state); | 1882 | retval = coredump_wait(exit_code, &core_state); |
1852 | if (retval < 0) { | 1883 | if (retval < 0) |
1853 | put_cred(cred); | 1884 | goto fail_creds; |
1854 | goto fail; | ||
1855 | } | ||
1856 | 1885 | ||
1857 | old_cred = override_creds(cred); | 1886 | old_cred = override_creds(cred); |
1858 | 1887 | ||
@@ -1870,19 +1899,19 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1870 | ispipe = format_corename(corename, signr); | 1899 | ispipe = format_corename(corename, signr); |
1871 | unlock_kernel(); | 1900 | unlock_kernel(); |
1872 | 1901 | ||
1873 | if ((!ispipe) && (cprm.limit < binfmt->min_coredump)) | ||
1874 | goto fail_unlock; | ||
1875 | |||
1876 | if (ispipe) { | 1902 | if (ispipe) { |
1877 | if (cprm.limit == 0) { | 1903 | int dump_count; |
1904 | char **helper_argv; | ||
1905 | |||
1906 | if (cprm.limit == 1) { | ||
1878 | /* | 1907 | /* |
1879 | * Normally core limits are irrelevant to pipes, since | 1908 | * Normally core limits are irrelevant to pipes, since |
1880 | * we're not writing to the file system, but we use | 1909 | * we're not writing to the file system, but we use |
1881 | * cprm.limit of 0 here as a speacial value. Any | 1910 | * cprm.limit of 1 here as a speacial value. Any |
1882 | * non-zero limit gets set to RLIM_INFINITY below, but | 1911 | * non-1 limit gets set to RLIM_INFINITY below, but |
1883 | * a limit of 0 skips the dump. This is a consistent | 1912 | * a limit of 0 skips the dump. This is a consistent |
1884 | * way to catch recursive crashes. We can still crash | 1913 | * way to catch recursive crashes. We can still crash |
1885 | * if the core_pattern binary sets RLIM_CORE = !0 | 1914 | * if the core_pattern binary sets RLIM_CORE = !1 |
1886 | * but it runs as root, and can do lots of stupid things | 1915 | * but it runs as root, and can do lots of stupid things |
1887 | * Note that we use task_tgid_vnr here to grab the pid | 1916 | * Note that we use task_tgid_vnr here to grab the pid |
1888 | * of the process group leader. That way we get the | 1917 | * of the process group leader. That way we get the |
@@ -1890,11 +1919,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1890 | * core_pattern process dies. | 1919 | * core_pattern process dies. |
1891 | */ | 1920 | */ |
1892 | printk(KERN_WARNING | 1921 | printk(KERN_WARNING |
1893 | "Process %d(%s) has RLIMIT_CORE set to 0\n", | 1922 | "Process %d(%s) has RLIMIT_CORE set to 1\n", |
1894 | task_tgid_vnr(current), current->comm); | 1923 | task_tgid_vnr(current), current->comm); |
1895 | printk(KERN_WARNING "Aborting core\n"); | 1924 | printk(KERN_WARNING "Aborting core\n"); |
1896 | goto fail_unlock; | 1925 | goto fail_unlock; |
1897 | } | 1926 | } |
1927 | cprm.limit = RLIM_INFINITY; | ||
1898 | 1928 | ||
1899 | dump_count = atomic_inc_return(&core_dump_count); | 1929 | dump_count = atomic_inc_return(&core_dump_count); |
1900 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { | 1930 | if (core_pipe_limit && (core_pipe_limit < dump_count)) { |
@@ -1904,71 +1934,74 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1904 | goto fail_dropcount; | 1934 | goto fail_dropcount; |
1905 | } | 1935 | } |
1906 | 1936 | ||
1907 | helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc); | 1937 | helper_argv = argv_split(GFP_KERNEL, corename+1, NULL); |
1908 | if (!helper_argv) { | 1938 | if (!helper_argv) { |
1909 | printk(KERN_WARNING "%s failed to allocate memory\n", | 1939 | printk(KERN_WARNING "%s failed to allocate memory\n", |
1910 | __func__); | 1940 | __func__); |
1911 | goto fail_dropcount; | 1941 | goto fail_dropcount; |
1912 | } | 1942 | } |
1913 | 1943 | ||
1914 | cprm.limit = RLIM_INFINITY; | 1944 | retval = call_usermodehelper_fns(helper_argv[0], helper_argv, |
1915 | 1945 | NULL, UMH_WAIT_EXEC, umh_pipe_setup, | |
1916 | /* SIGPIPE can happen, but it's just never processed */ | 1946 | NULL, &cprm); |
1917 | if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL, | 1947 | argv_free(helper_argv); |
1918 | &cprm.file)) { | 1948 | if (retval) { |
1919 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 1949 | printk(KERN_INFO "Core dump to %s pipe failed\n", |
1920 | corename); | 1950 | corename); |
1921 | goto fail_dropcount; | 1951 | goto close_fail; |
1922 | } | 1952 | } |
1923 | } else | 1953 | } else { |
1954 | struct inode *inode; | ||
1955 | |||
1956 | if (cprm.limit < binfmt->min_coredump) | ||
1957 | goto fail_unlock; | ||
1958 | |||
1924 | cprm.file = filp_open(corename, | 1959 | cprm.file = filp_open(corename, |
1925 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | 1960 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, |
1926 | 0600); | 1961 | 0600); |
1927 | if (IS_ERR(cprm.file)) | 1962 | if (IS_ERR(cprm.file)) |
1928 | goto fail_dropcount; | 1963 | goto fail_unlock; |
1929 | inode = cprm.file->f_path.dentry->d_inode; | ||
1930 | if (inode->i_nlink > 1) | ||
1931 | goto close_fail; /* multiple links - don't dump */ | ||
1932 | if (!ispipe && d_unhashed(cprm.file->f_path.dentry)) | ||
1933 | goto close_fail; | ||
1934 | |||
1935 | /* AK: actually i see no reason to not allow this for named pipes etc., | ||
1936 | but keep the previous behaviour for now. */ | ||
1937 | if (!ispipe && !S_ISREG(inode->i_mode)) | ||
1938 | goto close_fail; | ||
1939 | /* | ||
1940 | * Dont allow local users get cute and trick others to coredump | ||
1941 | * into their pre-created files: | ||
1942 | * Note, this is not relevant for pipes | ||
1943 | */ | ||
1944 | if (!ispipe && (inode->i_uid != current_fsuid())) | ||
1945 | goto close_fail; | ||
1946 | if (!cprm.file->f_op) | ||
1947 | goto close_fail; | ||
1948 | if (!cprm.file->f_op->write) | ||
1949 | goto close_fail; | ||
1950 | if (!ispipe && | ||
1951 | do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0) | ||
1952 | goto close_fail; | ||
1953 | 1964 | ||
1954 | retval = binfmt->core_dump(&cprm); | 1965 | inode = cprm.file->f_path.dentry->d_inode; |
1966 | if (inode->i_nlink > 1) | ||
1967 | goto close_fail; | ||
1968 | if (d_unhashed(cprm.file->f_path.dentry)) | ||
1969 | goto close_fail; | ||
1970 | /* | ||
1971 | * AK: actually i see no reason to not allow this for named | ||
1972 | * pipes etc, but keep the previous behaviour for now. | ||
1973 | */ | ||
1974 | if (!S_ISREG(inode->i_mode)) | ||
1975 | goto close_fail; | ||
1976 | /* | ||
1977 | * Dont allow local users get cute and trick others to coredump | ||
1978 | * into their pre-created files. | ||
1979 | */ | ||
1980 | if (inode->i_uid != current_fsuid()) | ||
1981 | goto close_fail; | ||
1982 | if (!cprm.file->f_op || !cprm.file->f_op->write) | ||
1983 | goto close_fail; | ||
1984 | if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) | ||
1985 | goto close_fail; | ||
1986 | } | ||
1955 | 1987 | ||
1988 | retval = binfmt->core_dump(&cprm); | ||
1956 | if (retval) | 1989 | if (retval) |
1957 | current->signal->group_exit_code |= 0x80; | 1990 | current->signal->group_exit_code |= 0x80; |
1958 | close_fail: | 1991 | |
1959 | if (ispipe && core_pipe_limit) | 1992 | if (ispipe && core_pipe_limit) |
1960 | wait_for_dump_helpers(cprm.file); | 1993 | wait_for_dump_helpers(cprm.file); |
1961 | filp_close(cprm.file, NULL); | 1994 | close_fail: |
1995 | if (cprm.file) | ||
1996 | filp_close(cprm.file, NULL); | ||
1962 | fail_dropcount: | 1997 | fail_dropcount: |
1963 | if (dump_count) | 1998 | if (ispipe) |
1964 | atomic_dec(&core_dump_count); | 1999 | atomic_dec(&core_dump_count); |
1965 | fail_unlock: | 2000 | fail_unlock: |
1966 | if (helper_argv) | 2001 | coredump_finish(mm); |
1967 | argv_free(helper_argv); | ||
1968 | |||
1969 | revert_creds(old_cred); | 2002 | revert_creds(old_cred); |
2003 | fail_creds: | ||
1970 | put_cred(cred); | 2004 | put_cred(cred); |
1971 | coredump_finish(mm); | ||
1972 | fail: | 2005 | fail: |
1973 | return; | 2006 | return; |
1974 | } | 2007 | } |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d2f37a5516c7..95b7594c76f9 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -591,14 +591,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
591 | ret = ext4_mb_new_blocks(handle, &ar, errp); | 591 | ret = ext4_mb_new_blocks(handle, &ar, errp); |
592 | if (count) | 592 | if (count) |
593 | *count = ar.len; | 593 | *count = ar.len; |
594 | |||
595 | /* | 594 | /* |
596 | * Account for the allocated meta blocks | 595 | * Account for the allocated meta blocks. We will never |
596 | * fail EDQUOT for metdata, but we do account for it. | ||
597 | */ | 597 | */ |
598 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { | 598 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { |
599 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 599 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
600 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; | 600 | EXT4_I(inode)->i_allocated_meta_blocks += ar.len; |
601 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 601 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
602 | dquot_alloc_block_nofail(inode, ar.len); | ||
602 | } | 603 | } |
603 | return ret; | 604 | return ret; |
604 | } | 605 | } |
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 538c48655084..5b6973fbf1bd 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c | |||
@@ -72,9 +72,9 @@ static int add_system_zone(struct ext4_sb_info *sbi, | |||
72 | else if (start_blk >= (entry->start_blk + entry->count)) | 72 | else if (start_blk >= (entry->start_blk + entry->count)) |
73 | n = &(*n)->rb_right; | 73 | n = &(*n)->rb_right; |
74 | else { | 74 | else { |
75 | if (start_blk + count > (entry->start_blk + | 75 | if (start_blk + count > (entry->start_blk + |
76 | entry->count)) | 76 | entry->count)) |
77 | entry->count = (start_blk + count - | 77 | entry->count = (start_blk + count - |
78 | entry->start_blk); | 78 | entry->start_blk); |
79 | new_node = *n; | 79 | new_node = *n; |
80 | new_entry = rb_entry(new_node, struct ext4_system_zone, | 80 | new_entry = rb_entry(new_node, struct ext4_system_zone, |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 86cb6d86a048..ea5e6cb7e2a5 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -83,11 +83,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, | |||
83 | error_msg = "inode out of bounds"; | 83 | error_msg = "inode out of bounds"; |
84 | 84 | ||
85 | if (error_msg != NULL) | 85 | if (error_msg != NULL) |
86 | __ext4_error(dir->i_sb, function, | 86 | ext4_error_inode(function, dir, |
87 | "bad entry in directory #%lu: %s - block=%llu" | 87 | "bad entry in directory: %s - block=%llu" |
88 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", | 88 | "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", |
89 | dir->i_ino, error_msg, | 89 | error_msg, (unsigned long long) bh->b_blocknr, |
90 | (unsigned long long) bh->b_blocknr, | ||
91 | (unsigned) (offset%bh->b_size), offset, | 90 | (unsigned) (offset%bh->b_size), offset, |
92 | le32_to_cpu(de->inode), | 91 | le32_to_cpu(de->inode), |
93 | rlen, de->name_len); | 92 | rlen, de->name_len); |
@@ -111,7 +110,7 @@ static int ext4_readdir(struct file *filp, | |||
111 | 110 | ||
112 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | 111 | if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, |
113 | EXT4_FEATURE_COMPAT_DIR_INDEX) && | 112 | EXT4_FEATURE_COMPAT_DIR_INDEX) && |
114 | ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) || | 113 | ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || |
115 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { | 114 | ((inode->i_size >> sb->s_blocksize_bits) == 1))) { |
116 | err = ext4_dx_readdir(filp, dirent, filldir); | 115 | err = ext4_dx_readdir(filp, dirent, filldir); |
117 | if (err != ERR_BAD_DX_DIR) { | 116 | if (err != ERR_BAD_DX_DIR) { |
@@ -122,20 +121,20 @@ static int ext4_readdir(struct file *filp, | |||
122 | * We don't set the inode dirty flag since it's not | 121 | * We don't set the inode dirty flag since it's not |
123 | * critical that it get flushed back to the disk. | 122 | * critical that it get flushed back to the disk. |
124 | */ | 123 | */ |
125 | EXT4_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL; | 124 | ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); |
126 | } | 125 | } |
127 | stored = 0; | 126 | stored = 0; |
128 | offset = filp->f_pos & (sb->s_blocksize - 1); | 127 | offset = filp->f_pos & (sb->s_blocksize - 1); |
129 | 128 | ||
130 | while (!error && !stored && filp->f_pos < inode->i_size) { | 129 | while (!error && !stored && filp->f_pos < inode->i_size) { |
131 | ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); | 130 | struct ext4_map_blocks map; |
132 | struct buffer_head map_bh; | ||
133 | struct buffer_head *bh = NULL; | 131 | struct buffer_head *bh = NULL; |
134 | 132 | ||
135 | map_bh.b_state = 0; | 133 | map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); |
136 | err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); | 134 | map.m_len = 1; |
135 | err = ext4_map_blocks(NULL, inode, &map, 0); | ||
137 | if (err > 0) { | 136 | if (err > 0) { |
138 | pgoff_t index = map_bh.b_blocknr >> | 137 | pgoff_t index = map.m_pblk >> |
139 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 138 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
140 | if (!ra_has_index(&filp->f_ra, index)) | 139 | if (!ra_has_index(&filp->f_ra, index)) |
141 | page_cache_sync_readahead( | 140 | page_cache_sync_readahead( |
@@ -143,7 +142,7 @@ static int ext4_readdir(struct file *filp, | |||
143 | &filp->f_ra, filp, | 142 | &filp->f_ra, filp, |
144 | index, 1); | 143 | index, 1); |
145 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 144 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
146 | bh = ext4_bread(NULL, inode, blk, 0, &err); | 145 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); |
147 | } | 146 | } |
148 | 147 | ||
149 | /* | 148 | /* |
@@ -152,9 +151,8 @@ static int ext4_readdir(struct file *filp, | |||
152 | */ | 151 | */ |
153 | if (!bh) { | 152 | if (!bh) { |
154 | if (!dir_has_error) { | 153 | if (!dir_has_error) { |
155 | ext4_error(sb, "directory #%lu " | 154 | EXT4_ERROR_INODE(inode, "directory " |
156 | "contains a hole at offset %Lu", | 155 | "contains a hole at offset %Lu", |
157 | inode->i_ino, | ||
158 | (unsigned long long) filp->f_pos); | 156 | (unsigned long long) filp->f_pos); |
159 | dir_has_error = 1; | 157 | dir_has_error = 1; |
160 | } | 158 | } |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf938cf7c5f0..60bd31026e7c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -29,6 +29,9 @@ | |||
29 | #include <linux/wait.h> | 29 | #include <linux/wait.h> |
30 | #include <linux/blockgroup_lock.h> | 30 | #include <linux/blockgroup_lock.h> |
31 | #include <linux/percpu_counter.h> | 31 | #include <linux/percpu_counter.h> |
32 | #ifdef __KERNEL__ | ||
33 | #include <linux/compat.h> | ||
34 | #endif | ||
32 | 35 | ||
33 | /* | 36 | /* |
34 | * The fourth extended filesystem constants/structures | 37 | * The fourth extended filesystem constants/structures |
@@ -54,10 +57,10 @@ | |||
54 | #endif | 57 | #endif |
55 | 58 | ||
56 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ | 59 | #define EXT4_ERROR_INODE(inode, fmt, a...) \ |
57 | ext4_error_inode(__func__, (inode), (fmt), ## a); | 60 | ext4_error_inode(__func__, (inode), (fmt), ## a) |
58 | 61 | ||
59 | #define EXT4_ERROR_FILE(file, fmt, a...) \ | 62 | #define EXT4_ERROR_FILE(file, fmt, a...) \ |
60 | ext4_error_file(__func__, (file), (fmt), ## a); | 63 | ext4_error_file(__func__, (file), (fmt), ## a) |
61 | 64 | ||
62 | /* data type for block offset of block group */ | 65 | /* data type for block offset of block group */ |
63 | typedef int ext4_grpblk_t; | 66 | typedef int ext4_grpblk_t; |
@@ -72,7 +75,7 @@ typedef __u32 ext4_lblk_t; | |||
72 | typedef unsigned int ext4_group_t; | 75 | typedef unsigned int ext4_group_t; |
73 | 76 | ||
74 | /* | 77 | /* |
75 | * Flags used in mballoc's allocation_context flags field. | 78 | * Flags used in mballoc's allocation_context flags field. |
76 | * | 79 | * |
77 | * Also used to show what's going on for debugging purposes when the | 80 | * Also used to show what's going on for debugging purposes when the |
78 | * flag field is exported via the traceport interface | 81 | * flag field is exported via the traceport interface |
@@ -126,6 +129,29 @@ struct ext4_allocation_request { | |||
126 | }; | 129 | }; |
127 | 130 | ||
128 | /* | 131 | /* |
132 | * Logical to physical block mapping, used by ext4_map_blocks() | ||
133 | * | ||
134 | * This structure is used to pass requests into ext4_map_blocks() as | ||
135 | * well as to store the information returned by ext4_map_blocks(). It | ||
136 | * takes less room on the stack than a struct buffer_head. | ||
137 | */ | ||
138 | #define EXT4_MAP_NEW (1 << BH_New) | ||
139 | #define EXT4_MAP_MAPPED (1 << BH_Mapped) | ||
140 | #define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) | ||
141 | #define EXT4_MAP_BOUNDARY (1 << BH_Boundary) | ||
142 | #define EXT4_MAP_UNINIT (1 << BH_Uninit) | ||
143 | #define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ | ||
144 | EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ | ||
145 | EXT4_MAP_UNINIT) | ||
146 | |||
147 | struct ext4_map_blocks { | ||
148 | ext4_fsblk_t m_pblk; | ||
149 | ext4_lblk_t m_lblk; | ||
150 | unsigned int m_len; | ||
151 | unsigned int m_flags; | ||
152 | }; | ||
153 | |||
154 | /* | ||
129 | * For delayed allocation tracking | 155 | * For delayed allocation tracking |
130 | */ | 156 | */ |
131 | struct mpage_da_data { | 157 | struct mpage_da_data { |
@@ -321,6 +347,83 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) | |||
321 | return flags & EXT4_OTHER_FLMASK; | 347 | return flags & EXT4_OTHER_FLMASK; |
322 | } | 348 | } |
323 | 349 | ||
350 | /* | ||
351 | * Inode flags used for atomic set/get | ||
352 | */ | ||
353 | enum { | ||
354 | EXT4_INODE_SECRM = 0, /* Secure deletion */ | ||
355 | EXT4_INODE_UNRM = 1, /* Undelete */ | ||
356 | EXT4_INODE_COMPR = 2, /* Compress file */ | ||
357 | EXT4_INODE_SYNC = 3, /* Synchronous updates */ | ||
358 | EXT4_INODE_IMMUTABLE = 4, /* Immutable file */ | ||
359 | EXT4_INODE_APPEND = 5, /* writes to file may only append */ | ||
360 | EXT4_INODE_NODUMP = 6, /* do not dump file */ | ||
361 | EXT4_INODE_NOATIME = 7, /* do not update atime */ | ||
362 | /* Reserved for compression usage... */ | ||
363 | EXT4_INODE_DIRTY = 8, | ||
364 | EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */ | ||
365 | EXT4_INODE_NOCOMPR = 10, /* Don't compress */ | ||
366 | EXT4_INODE_ECOMPR = 11, /* Compression error */ | ||
367 | /* End compression flags --- maybe not all used */ | ||
368 | EXT4_INODE_INDEX = 12, /* hash-indexed directory */ | ||
369 | EXT4_INODE_IMAGIC = 13, /* AFS directory */ | ||
370 | EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */ | ||
371 | EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */ | ||
372 | EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */ | ||
373 | EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/ | ||
374 | EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */ | ||
375 | EXT4_INODE_EXTENTS = 19, /* Inode uses extents */ | ||
376 | EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */ | ||
377 | EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */ | ||
378 | EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */ | ||
379 | }; | ||
380 | |||
381 | #define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) | ||
382 | #define CHECK_FLAG_VALUE(FLAG) if (!TEST_FLAG_VALUE(FLAG)) { \ | ||
383 | printk(KERN_EMERG "EXT4 flag fail: " #FLAG ": %d %d\n", \ | ||
384 | EXT4_##FLAG##_FL, EXT4_INODE_##FLAG); BUG_ON(1); } | ||
385 | |||
386 | /* | ||
387 | * Since it's pretty easy to mix up bit numbers and hex values, and we | ||
388 | * can't do a compile-time test for ENUM values, we use a run-time | ||
389 | * test to make sure that EXT4_XXX_FL is consistent with respect to | ||
390 | * EXT4_INODE_XXX. If all is well the printk and BUG_ON will all drop | ||
391 | * out so it won't cost any extra space in the compiled kernel image. | ||
392 | * But it's important that these values are the same, since we are | ||
393 | * using EXT4_INODE_XXX to test for the flag values, but EXT4_XX_FL | ||
394 | * must be consistent with the values of FS_XXX_FL defined in | ||
395 | * include/linux/fs.h and the on-disk values found in ext2, ext3, and | ||
396 | * ext4 filesystems, and of course the values defined in e2fsprogs. | ||
397 | * | ||
398 | * It's not paranoia if the Murphy's Law really *is* out to get you. :-) | ||
399 | */ | ||
400 | static inline void ext4_check_flag_values(void) | ||
401 | { | ||
402 | CHECK_FLAG_VALUE(SECRM); | ||
403 | CHECK_FLAG_VALUE(UNRM); | ||
404 | CHECK_FLAG_VALUE(COMPR); | ||
405 | CHECK_FLAG_VALUE(SYNC); | ||
406 | CHECK_FLAG_VALUE(IMMUTABLE); | ||
407 | CHECK_FLAG_VALUE(APPEND); | ||
408 | CHECK_FLAG_VALUE(NODUMP); | ||
409 | CHECK_FLAG_VALUE(NOATIME); | ||
410 | CHECK_FLAG_VALUE(DIRTY); | ||
411 | CHECK_FLAG_VALUE(COMPRBLK); | ||
412 | CHECK_FLAG_VALUE(NOCOMPR); | ||
413 | CHECK_FLAG_VALUE(ECOMPR); | ||
414 | CHECK_FLAG_VALUE(INDEX); | ||
415 | CHECK_FLAG_VALUE(IMAGIC); | ||
416 | CHECK_FLAG_VALUE(JOURNAL_DATA); | ||
417 | CHECK_FLAG_VALUE(NOTAIL); | ||
418 | CHECK_FLAG_VALUE(DIRSYNC); | ||
419 | CHECK_FLAG_VALUE(TOPDIR); | ||
420 | CHECK_FLAG_VALUE(HUGE_FILE); | ||
421 | CHECK_FLAG_VALUE(EXTENTS); | ||
422 | CHECK_FLAG_VALUE(EA_INODE); | ||
423 | CHECK_FLAG_VALUE(EOFBLOCKS); | ||
424 | CHECK_FLAG_VALUE(RESERVED); | ||
425 | } | ||
426 | |||
324 | /* Used to pass group descriptor data when online resize is done */ | 427 | /* Used to pass group descriptor data when online resize is done */ |
325 | struct ext4_new_group_input { | 428 | struct ext4_new_group_input { |
326 | __u32 group; /* Group number for this data */ | 429 | __u32 group; /* Group number for this data */ |
@@ -332,6 +435,18 @@ struct ext4_new_group_input { | |||
332 | __u16 unused; | 435 | __u16 unused; |
333 | }; | 436 | }; |
334 | 437 | ||
438 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | ||
439 | struct compat_ext4_new_group_input { | ||
440 | u32 group; | ||
441 | compat_u64 block_bitmap; | ||
442 | compat_u64 inode_bitmap; | ||
443 | compat_u64 inode_table; | ||
444 | u32 blocks_count; | ||
445 | u16 reserved_blocks; | ||
446 | u16 unused; | ||
447 | }; | ||
448 | #endif | ||
449 | |||
335 | /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ | 450 | /* The struct ext4_new_group_input in kernel space, with free_blocks_count */ |
336 | struct ext4_new_group_data { | 451 | struct ext4_new_group_data { |
337 | __u32 group; | 452 | __u32 group; |
@@ -355,7 +470,7 @@ struct ext4_new_group_data { | |||
355 | #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ | 470 | #define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ |
356 | EXT4_GET_BLOCKS_CREATE) | 471 | EXT4_GET_BLOCKS_CREATE) |
357 | /* Caller is from the delayed allocation writeout path, | 472 | /* Caller is from the delayed allocation writeout path, |
358 | so set the magic i_delalloc_reserve_flag after taking the | 473 | so set the magic i_delalloc_reserve_flag after taking the |
359 | inode allocation semaphore for */ | 474 | inode allocation semaphore for */ |
360 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 | 475 | #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 |
361 | /* caller is from the direct IO path, request to creation of an | 476 | /* caller is from the direct IO path, request to creation of an |
@@ -398,6 +513,7 @@ struct ext4_new_group_data { | |||
398 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) | 513 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) |
399 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) | 514 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) |
400 | 515 | ||
516 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | ||
401 | /* | 517 | /* |
402 | * ioctl commands in 32 bit emulation | 518 | * ioctl commands in 32 bit emulation |
403 | */ | 519 | */ |
@@ -408,11 +524,13 @@ struct ext4_new_group_data { | |||
408 | #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) | 524 | #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) |
409 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) | 525 | #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) |
410 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) | 526 | #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) |
527 | #define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input) | ||
411 | #ifdef CONFIG_JBD2_DEBUG | 528 | #ifdef CONFIG_JBD2_DEBUG |
412 | #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) | 529 | #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) |
413 | #endif | 530 | #endif |
414 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION | 531 | #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION |
415 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION | 532 | #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION |
533 | #endif | ||
416 | 534 | ||
417 | 535 | ||
418 | /* | 536 | /* |
@@ -616,9 +734,8 @@ struct ext4_ext_cache { | |||
616 | */ | 734 | */ |
617 | struct ext4_inode_info { | 735 | struct ext4_inode_info { |
618 | __le32 i_data[15]; /* unconverted */ | 736 | __le32 i_data[15]; /* unconverted */ |
619 | __u32 i_flags; | ||
620 | ext4_fsblk_t i_file_acl; | ||
621 | __u32 i_dtime; | 737 | __u32 i_dtime; |
738 | ext4_fsblk_t i_file_acl; | ||
622 | 739 | ||
623 | /* | 740 | /* |
624 | * i_block_group is the number of the block group which contains | 741 | * i_block_group is the number of the block group which contains |
@@ -629,6 +746,7 @@ struct ext4_inode_info { | |||
629 | */ | 746 | */ |
630 | ext4_group_t i_block_group; | 747 | ext4_group_t i_block_group; |
631 | unsigned long i_state_flags; /* Dynamic state flags */ | 748 | unsigned long i_state_flags; /* Dynamic state flags */ |
749 | unsigned long i_flags; | ||
632 | 750 | ||
633 | ext4_lblk_t i_dir_start_lookup; | 751 | ext4_lblk_t i_dir_start_lookup; |
634 | #ifdef CONFIG_EXT4_FS_XATTR | 752 | #ifdef CONFIG_EXT4_FS_XATTR |
@@ -1062,22 +1180,25 @@ enum { | |||
1062 | EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ | 1180 | EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ |
1063 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ | 1181 | EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ |
1064 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ | 1182 | EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ |
1183 | EXT4_STATE_NEWENTRY, /* File just added to dir */ | ||
1065 | }; | 1184 | }; |
1066 | 1185 | ||
1067 | static inline int ext4_test_inode_state(struct inode *inode, int bit) | 1186 | #define EXT4_INODE_BIT_FNS(name, field) \ |
1068 | { | 1187 | static inline int ext4_test_inode_##name(struct inode *inode, int bit) \ |
1069 | return test_bit(bit, &EXT4_I(inode)->i_state_flags); | 1188 | { \ |
1070 | } | 1189 | return test_bit(bit, &EXT4_I(inode)->i_##field); \ |
1071 | 1190 | } \ | |
1072 | static inline void ext4_set_inode_state(struct inode *inode, int bit) | 1191 | static inline void ext4_set_inode_##name(struct inode *inode, int bit) \ |
1073 | { | 1192 | { \ |
1074 | set_bit(bit, &EXT4_I(inode)->i_state_flags); | 1193 | set_bit(bit, &EXT4_I(inode)->i_##field); \ |
1194 | } \ | ||
1195 | static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ | ||
1196 | { \ | ||
1197 | clear_bit(bit, &EXT4_I(inode)->i_##field); \ | ||
1075 | } | 1198 | } |
1076 | 1199 | ||
1077 | static inline void ext4_clear_inode_state(struct inode *inode, int bit) | 1200 | EXT4_INODE_BIT_FNS(flag, flags) |
1078 | { | 1201 | EXT4_INODE_BIT_FNS(state, state_flags) |
1079 | clear_bit(bit, &EXT4_I(inode)->i_state_flags); | ||
1080 | } | ||
1081 | #else | 1202 | #else |
1082 | /* Assume that user mode programs are passing in an ext4fs superblock, not | 1203 | /* Assume that user mode programs are passing in an ext4fs superblock, not |
1083 | * a kernel struct super_block. This will allow us to call the feature-test | 1204 | * a kernel struct super_block. This will allow us to call the feature-test |
@@ -1264,7 +1385,7 @@ struct ext4_dir_entry_2 { | |||
1264 | 1385 | ||
1265 | #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ | 1386 | #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \ |
1266 | EXT4_FEATURE_COMPAT_DIR_INDEX) && \ | 1387 | EXT4_FEATURE_COMPAT_DIR_INDEX) && \ |
1267 | (EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) | 1388 | ext4_test_inode_flag((dir), EXT4_INODE_INDEX)) |
1268 | #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) | 1389 | #define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX) |
1269 | #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) | 1390 | #define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) |
1270 | 1391 | ||
@@ -1678,6 +1799,7 @@ struct ext4_group_info { | |||
1678 | ext4_grpblk_t bb_first_free; /* first free block */ | 1799 | ext4_grpblk_t bb_first_free; /* first free block */ |
1679 | ext4_grpblk_t bb_free; /* total free blocks */ | 1800 | ext4_grpblk_t bb_free; /* total free blocks */ |
1680 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ | 1801 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
1802 | ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ | ||
1681 | struct list_head bb_prealloc_list; | 1803 | struct list_head bb_prealloc_list; |
1682 | #ifdef DOUBLE_CHECK | 1804 | #ifdef DOUBLE_CHECK |
1683 | void *bb_bitmap; | 1805 | void *bb_bitmap; |
@@ -1772,9 +1894,8 @@ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | |||
1772 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1894 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
1773 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | 1895 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, |
1774 | int chunk); | 1896 | int chunk); |
1775 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1897 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
1776 | ext4_lblk_t iblock, unsigned int max_blocks, | 1898 | struct ext4_map_blocks *map, int flags); |
1777 | struct buffer_head *bh_result, int flags); | ||
1778 | extern void ext4_ext_truncate(struct inode *); | 1899 | extern void ext4_ext_truncate(struct inode *); |
1779 | extern void ext4_ext_init(struct super_block *); | 1900 | extern void ext4_ext_init(struct super_block *); |
1780 | extern void ext4_ext_release(struct super_block *); | 1901 | extern void ext4_ext_release(struct super_block *); |
@@ -1782,6 +1903,8 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, | |||
1782 | loff_t len); | 1903 | loff_t len); |
1783 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 1904 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, |
1784 | ssize_t len); | 1905 | ssize_t len); |
1906 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | ||
1907 | struct ext4_map_blocks *map, int flags); | ||
1785 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, | 1908 | extern int ext4_get_blocks(handle_t *handle, struct inode *inode, |
1786 | sector_t block, unsigned int max_blocks, | 1909 | sector_t block, unsigned int max_blocks, |
1787 | struct buffer_head *bh, int flags); | 1910 | struct buffer_head *bh, int flags); |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b79ad5126468..dade0c024797 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -273,7 +273,7 @@ static inline int ext4_should_journal_data(struct inode *inode) | |||
273 | return 1; | 273 | return 1; |
274 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) | 274 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) |
275 | return 1; | 275 | return 1; |
276 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) | 276 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
277 | return 1; | 277 | return 1; |
278 | return 0; | 278 | return 0; |
279 | } | 279 | } |
@@ -284,7 +284,7 @@ static inline int ext4_should_order_data(struct inode *inode) | |||
284 | return 0; | 284 | return 0; |
285 | if (!S_ISREG(inode->i_mode)) | 285 | if (!S_ISREG(inode->i_mode)) |
286 | return 0; | 286 | return 0; |
287 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) | 287 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
288 | return 0; | 288 | return 0; |
289 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) | 289 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) |
290 | return 1; | 290 | return 1; |
@@ -297,7 +297,7 @@ static inline int ext4_should_writeback_data(struct inode *inode) | |||
297 | return 0; | 297 | return 0; |
298 | if (EXT4_JOURNAL(inode) == NULL) | 298 | if (EXT4_JOURNAL(inode) == NULL) |
299 | return 1; | 299 | return 1; |
300 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) | 300 | if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) |
301 | return 0; | 301 | return 0; |
302 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 302 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
303 | return 1; | 303 | return 1; |
@@ -321,7 +321,7 @@ static inline int ext4_should_dioread_nolock(struct inode *inode) | |||
321 | return 0; | 321 | return 0; |
322 | if (!S_ISREG(inode->i_mode)) | 322 | if (!S_ISREG(inode->i_mode)) |
323 | return 0; | 323 | return 0; |
324 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 324 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
325 | return 0; | 325 | return 0; |
326 | if (ext4_should_journal_data(inode)) | 326 | if (ext4_should_journal_data(inode)) |
327 | return 0; | 327 | return 0; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 236b834b4ca8..377309c1af65 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, | |||
107 | if (err <= 0) | 107 | if (err <= 0) |
108 | return err; | 108 | return err; |
109 | err = ext4_truncate_restart_trans(handle, inode, needed); | 109 | err = ext4_truncate_restart_trans(handle, inode, needed); |
110 | /* | 110 | if (err == 0) |
111 | * We have dropped i_data_sem so someone might have cached again | 111 | err = -EAGAIN; |
112 | * an extent we are going to truncate. | ||
113 | */ | ||
114 | ext4_ext_invalidate_cache(inode); | ||
115 | 112 | ||
116 | return err; | 113 | return err; |
117 | } | 114 | } |
@@ -185,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, | |||
185 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { | 182 | if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { |
186 | /* | 183 | /* |
187 | * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME | 184 | * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME |
188 | * block groups per flexgroup, reserve the first block | 185 | * block groups per flexgroup, reserve the first block |
189 | * group for directories and special files. Regular | 186 | * group for directories and special files. Regular |
190 | * files will start at the second block group. This | 187 | * files will start at the second block group. This |
191 | * tends to speed up directory access and improves | 188 | * tends to speed up directory access and improves |
192 | * fsck times. | 189 | * fsck times. |
193 | */ | 190 | */ |
194 | block_group &= ~(flex_size-1); | 191 | block_group &= ~(flex_size-1); |
@@ -439,10 +436,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode, | |||
439 | return 0; | 436 | return 0; |
440 | 437 | ||
441 | corrupted: | 438 | corrupted: |
442 | __ext4_error(inode->i_sb, function, | 439 | ext4_error_inode(function, inode, |
443 | "bad header/extent in inode #%lu: %s - magic %x, " | 440 | "bad header/extent: %s - magic %x, " |
444 | "entries %u, max %u(%u), depth %u(%u)", | 441 | "entries %u, max %u(%u), depth %u(%u)", |
445 | inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), | 442 | error_msg, le16_to_cpu(eh->eh_magic), |
446 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), | 443 | le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), |
447 | max, le16_to_cpu(eh->eh_depth), depth); | 444 | max, le16_to_cpu(eh->eh_depth), depth); |
448 | 445 | ||
@@ -1622,9 +1619,7 @@ int ext4_ext_try_to_merge(struct inode *inode, | |||
1622 | merge_done = 1; | 1619 | merge_done = 1; |
1623 | WARN_ON(eh->eh_entries == 0); | 1620 | WARN_ON(eh->eh_entries == 0); |
1624 | if (!eh->eh_entries) | 1621 | if (!eh->eh_entries) |
1625 | ext4_error(inode->i_sb, | 1622 | EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); |
1626 | "inode#%lu, eh->eh_entries = 0!", | ||
1627 | inode->i_ino); | ||
1628 | } | 1623 | } |
1629 | 1624 | ||
1630 | return merge_done; | 1625 | return merge_done; |
@@ -2039,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, | |||
2039 | struct ext4_ext_cache *cex; | 2034 | struct ext4_ext_cache *cex; |
2040 | int ret = EXT4_EXT_CACHE_NO; | 2035 | int ret = EXT4_EXT_CACHE_NO; |
2041 | 2036 | ||
2042 | /* | 2037 | /* |
2043 | * We borrow i_block_reservation_lock to protect i_cached_extent | 2038 | * We borrow i_block_reservation_lock to protect i_cached_extent |
2044 | */ | 2039 | */ |
2045 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 2040 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2361,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2361 | int depth = ext_depth(inode); | 2356 | int depth = ext_depth(inode); |
2362 | struct ext4_ext_path *path; | 2357 | struct ext4_ext_path *path; |
2363 | handle_t *handle; | 2358 | handle_t *handle; |
2364 | int i = 0, err = 0; | 2359 | int i, err; |
2365 | 2360 | ||
2366 | ext_debug("truncate since %u\n", start); | 2361 | ext_debug("truncate since %u\n", start); |
2367 | 2362 | ||
@@ -2370,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2370 | if (IS_ERR(handle)) | 2365 | if (IS_ERR(handle)) |
2371 | return PTR_ERR(handle); | 2366 | return PTR_ERR(handle); |
2372 | 2367 | ||
2368 | again: | ||
2373 | ext4_ext_invalidate_cache(inode); | 2369 | ext4_ext_invalidate_cache(inode); |
2374 | 2370 | ||
2375 | /* | 2371 | /* |
2376 | * We start scanning from right side, freeing all the blocks | 2372 | * We start scanning from right side, freeing all the blocks |
2377 | * after i_size and walking into the tree depth-wise. | 2373 | * after i_size and walking into the tree depth-wise. |
2378 | */ | 2374 | */ |
2375 | depth = ext_depth(inode); | ||
2379 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); | 2376 | path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); |
2380 | if (path == NULL) { | 2377 | if (path == NULL) { |
2381 | ext4_journal_stop(handle); | 2378 | ext4_journal_stop(handle); |
2382 | return -ENOMEM; | 2379 | return -ENOMEM; |
2383 | } | 2380 | } |
2381 | path[0].p_depth = depth; | ||
2384 | path[0].p_hdr = ext_inode_hdr(inode); | 2382 | path[0].p_hdr = ext_inode_hdr(inode); |
2385 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { | 2383 | if (ext4_ext_check(inode, path[0].p_hdr, depth)) { |
2386 | err = -EIO; | 2384 | err = -EIO; |
2387 | goto out; | 2385 | goto out; |
2388 | } | 2386 | } |
2389 | path[0].p_depth = depth; | 2387 | i = err = 0; |
2390 | 2388 | ||
2391 | while (i >= 0 && err == 0) { | 2389 | while (i >= 0 && err == 0) { |
2392 | if (i == depth) { | 2390 | if (i == depth) { |
@@ -2480,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) | |||
2480 | out: | 2478 | out: |
2481 | ext4_ext_drop_refs(path); | 2479 | ext4_ext_drop_refs(path); |
2482 | kfree(path); | 2480 | kfree(path); |
2481 | if (err == -EAGAIN) | ||
2482 | goto again; | ||
2483 | ext4_journal_stop(handle); | 2483 | ext4_journal_stop(handle); |
2484 | 2484 | ||
2485 | return err; | 2485 | return err; |
@@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error) | |||
2544 | /* FIXME!! we need to try to merge to left or right after zero-out */ | 2544 | /* FIXME!! we need to try to merge to left or right after zero-out */ |
2545 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | 2545 | static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) |
2546 | { | 2546 | { |
2547 | int ret = -EIO; | 2547 | int ret; |
2548 | struct bio *bio; | 2548 | struct bio *bio; |
2549 | int blkbits, blocksize; | 2549 | int blkbits, blocksize; |
2550 | sector_t ee_pblock; | 2550 | sector_t ee_pblock; |
@@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2568 | len = ee_len; | 2568 | len = ee_len; |
2569 | 2569 | ||
2570 | bio = bio_alloc(GFP_NOIO, len); | 2570 | bio = bio_alloc(GFP_NOIO, len); |
2571 | if (!bio) | ||
2572 | return -ENOMEM; | ||
2573 | |||
2571 | bio->bi_sector = ee_pblock; | 2574 | bio->bi_sector = ee_pblock; |
2572 | bio->bi_bdev = inode->i_sb->s_bdev; | 2575 | bio->bi_bdev = inode->i_sb->s_bdev; |
2573 | 2576 | ||
@@ -2595,22 +2598,20 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2595 | submit_bio(WRITE, bio); | 2598 | submit_bio(WRITE, bio); |
2596 | wait_for_completion(&event); | 2599 | wait_for_completion(&event); |
2597 | 2600 | ||
2598 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 2601 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { |
2599 | ret = 0; | 2602 | bio_put(bio); |
2600 | else { | 2603 | return -EIO; |
2601 | ret = -EIO; | ||
2602 | break; | ||
2603 | } | 2604 | } |
2604 | bio_put(bio); | 2605 | bio_put(bio); |
2605 | ee_len -= done; | 2606 | ee_len -= done; |
2606 | ee_pblock += done << (blkbits - 9); | 2607 | ee_pblock += done << (blkbits - 9); |
2607 | } | 2608 | } |
2608 | return ret; | 2609 | return 0; |
2609 | } | 2610 | } |
2610 | 2611 | ||
2611 | #define EXT4_EXT_ZERO_LEN 7 | 2612 | #define EXT4_EXT_ZERO_LEN 7 |
2612 | /* | 2613 | /* |
2613 | * This function is called by ext4_ext_get_blocks() if someone tries to write | 2614 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
2614 | * to an uninitialized extent. It may result in splitting the uninitialized | 2615 | * to an uninitialized extent. It may result in splitting the uninitialized |
2615 | * extent into multiple extents (upto three - one initialized and two | 2616 | * extent into multiple extents (upto three - one initialized and two |
2616 | * uninitialized). | 2617 | * uninitialized). |
@@ -2620,39 +2621,55 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2620 | * c> Splits in three extents: Somone is writing in middle of the extent | 2621 | * c> Splits in three extents: Somone is writing in middle of the extent |
2621 | */ | 2622 | */ |
2622 | static int ext4_ext_convert_to_initialized(handle_t *handle, | 2623 | static int ext4_ext_convert_to_initialized(handle_t *handle, |
2623 | struct inode *inode, | 2624 | struct inode *inode, |
2624 | struct ext4_ext_path *path, | 2625 | struct ext4_map_blocks *map, |
2625 | ext4_lblk_t iblock, | 2626 | struct ext4_ext_path *path) |
2626 | unsigned int max_blocks) | ||
2627 | { | 2627 | { |
2628 | struct ext4_extent *ex, newex, orig_ex; | 2628 | struct ext4_extent *ex, newex, orig_ex; |
2629 | struct ext4_extent *ex1 = NULL; | 2629 | struct ext4_extent *ex1 = NULL; |
2630 | struct ext4_extent *ex2 = NULL; | 2630 | struct ext4_extent *ex2 = NULL; |
2631 | struct ext4_extent *ex3 = NULL; | 2631 | struct ext4_extent *ex3 = NULL; |
2632 | struct ext4_extent_header *eh; | 2632 | struct ext4_extent_header *eh; |
2633 | ext4_lblk_t ee_block; | 2633 | ext4_lblk_t ee_block, eof_block; |
2634 | unsigned int allocated, ee_len, depth; | 2634 | unsigned int allocated, ee_len, depth; |
2635 | ext4_fsblk_t newblock; | 2635 | ext4_fsblk_t newblock; |
2636 | int err = 0; | 2636 | int err = 0; |
2637 | int ret = 0; | 2637 | int ret = 0; |
2638 | int may_zeroout; | ||
2639 | |||
2640 | ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" | ||
2641 | "block %llu, max_blocks %u\n", inode->i_ino, | ||
2642 | (unsigned long long)map->m_lblk, map->m_len); | ||
2643 | |||
2644 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | ||
2645 | inode->i_sb->s_blocksize_bits; | ||
2646 | if (eof_block < map->m_lblk + map->m_len) | ||
2647 | eof_block = map->m_lblk + map->m_len; | ||
2638 | 2648 | ||
2639 | depth = ext_depth(inode); | 2649 | depth = ext_depth(inode); |
2640 | eh = path[depth].p_hdr; | 2650 | eh = path[depth].p_hdr; |
2641 | ex = path[depth].p_ext; | 2651 | ex = path[depth].p_ext; |
2642 | ee_block = le32_to_cpu(ex->ee_block); | 2652 | ee_block = le32_to_cpu(ex->ee_block); |
2643 | ee_len = ext4_ext_get_actual_len(ex); | 2653 | ee_len = ext4_ext_get_actual_len(ex); |
2644 | allocated = ee_len - (iblock - ee_block); | 2654 | allocated = ee_len - (map->m_lblk - ee_block); |
2645 | newblock = iblock - ee_block + ext_pblock(ex); | 2655 | newblock = map->m_lblk - ee_block + ext_pblock(ex); |
2656 | |||
2646 | ex2 = ex; | 2657 | ex2 = ex; |
2647 | orig_ex.ee_block = ex->ee_block; | 2658 | orig_ex.ee_block = ex->ee_block; |
2648 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2659 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2649 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2660 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); |
2650 | 2661 | ||
2662 | /* | ||
2663 | * It is safe to convert extent to initialized via explicit | ||
2664 | * zeroout only if extent is fully insde i_size or new_size. | ||
2665 | */ | ||
2666 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2667 | |||
2651 | err = ext4_ext_get_access(handle, inode, path + depth); | 2668 | err = ext4_ext_get_access(handle, inode, path + depth); |
2652 | if (err) | 2669 | if (err) |
2653 | goto out; | 2670 | goto out; |
2654 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ | 2671 | /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ |
2655 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { | 2672 | if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { |
2656 | err = ext4_ext_zeroout(inode, &orig_ex); | 2673 | err = ext4_ext_zeroout(inode, &orig_ex); |
2657 | if (err) | 2674 | if (err) |
2658 | goto fix_extent_len; | 2675 | goto fix_extent_len; |
@@ -2665,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2665 | return allocated; | 2682 | return allocated; |
2666 | } | 2683 | } |
2667 | 2684 | ||
2668 | /* ex1: ee_block to iblock - 1 : uninitialized */ | 2685 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ |
2669 | if (iblock > ee_block) { | 2686 | if (map->m_lblk > ee_block) { |
2670 | ex1 = ex; | 2687 | ex1 = ex; |
2671 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 2688 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
2672 | ext4_ext_mark_uninitialized(ex1); | 2689 | ext4_ext_mark_uninitialized(ex1); |
2673 | ex2 = &newex; | 2690 | ex2 = &newex; |
2674 | } | 2691 | } |
@@ -2677,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2677 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | 2694 | * we insert ex3, if ex1 is NULL. This is to avoid temporary |
2678 | * overlap of blocks. | 2695 | * overlap of blocks. |
2679 | */ | 2696 | */ |
2680 | if (!ex1 && allocated > max_blocks) | 2697 | if (!ex1 && allocated > map->m_len) |
2681 | ex2->ee_len = cpu_to_le16(max_blocks); | 2698 | ex2->ee_len = cpu_to_le16(map->m_len); |
2682 | /* ex3: to ee_block + ee_len : uninitialised */ | 2699 | /* ex3: to ee_block + ee_len : uninitialised */ |
2683 | if (allocated > max_blocks) { | 2700 | if (allocated > map->m_len) { |
2684 | unsigned int newdepth; | 2701 | unsigned int newdepth; |
2685 | /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ | 2702 | /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ |
2686 | if (allocated <= EXT4_EXT_ZERO_LEN) { | 2703 | if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { |
2687 | /* | 2704 | /* |
2688 | * iblock == ee_block is handled by the zerouout | 2705 | * map->m_lblk == ee_block is handled by the zerouout |
2689 | * at the beginning. | 2706 | * at the beginning. |
2690 | * Mark first half uninitialized. | 2707 | * Mark first half uninitialized. |
2691 | * Mark second half initialized and zero out the | 2708 | * Mark second half initialized and zero out the |
@@ -2698,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2698 | ext4_ext_dirty(handle, inode, path + depth); | 2715 | ext4_ext_dirty(handle, inode, path + depth); |
2699 | 2716 | ||
2700 | ex3 = &newex; | 2717 | ex3 = &newex; |
2701 | ex3->ee_block = cpu_to_le32(iblock); | 2718 | ex3->ee_block = cpu_to_le32(map->m_lblk); |
2702 | ext4_ext_store_pblock(ex3, newblock); | 2719 | ext4_ext_store_pblock(ex3, newblock); |
2703 | ex3->ee_len = cpu_to_le16(allocated); | 2720 | ex3->ee_len = cpu_to_le16(allocated); |
2704 | err = ext4_ext_insert_extent(handle, inode, path, | 2721 | err = ext4_ext_insert_extent(handle, inode, path, |
@@ -2711,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2711 | ex->ee_len = orig_ex.ee_len; | 2728 | ex->ee_len = orig_ex.ee_len; |
2712 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2729 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
2713 | ext4_ext_dirty(handle, inode, path + depth); | 2730 | ext4_ext_dirty(handle, inode, path + depth); |
2714 | /* blocks available from iblock */ | 2731 | /* blocks available from map->m_lblk */ |
2715 | return allocated; | 2732 | return allocated; |
2716 | 2733 | ||
2717 | } else if (err) | 2734 | } else if (err) |
@@ -2733,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2733 | */ | 2750 | */ |
2734 | depth = ext_depth(inode); | 2751 | depth = ext_depth(inode); |
2735 | ext4_ext_drop_refs(path); | 2752 | ext4_ext_drop_refs(path); |
2736 | path = ext4_ext_find_extent(inode, | 2753 | path = ext4_ext_find_extent(inode, map->m_lblk, |
2737 | iblock, path); | 2754 | path); |
2738 | if (IS_ERR(path)) { | 2755 | if (IS_ERR(path)) { |
2739 | err = PTR_ERR(path); | 2756 | err = PTR_ERR(path); |
2740 | return err; | 2757 | return err; |
@@ -2754,12 +2771,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2754 | return allocated; | 2771 | return allocated; |
2755 | } | 2772 | } |
2756 | ex3 = &newex; | 2773 | ex3 = &newex; |
2757 | ex3->ee_block = cpu_to_le32(iblock + max_blocks); | 2774 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); |
2758 | ext4_ext_store_pblock(ex3, newblock + max_blocks); | 2775 | ext4_ext_store_pblock(ex3, newblock + map->m_len); |
2759 | ex3->ee_len = cpu_to_le16(allocated - max_blocks); | 2776 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); |
2760 | ext4_ext_mark_uninitialized(ex3); | 2777 | ext4_ext_mark_uninitialized(ex3); |
2761 | err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); | 2778 | err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); |
2762 | if (err == -ENOSPC) { | 2779 | if (err == -ENOSPC && may_zeroout) { |
2763 | err = ext4_ext_zeroout(inode, &orig_ex); | 2780 | err = ext4_ext_zeroout(inode, &orig_ex); |
2764 | if (err) | 2781 | if (err) |
2765 | goto fix_extent_len; | 2782 | goto fix_extent_len; |
@@ -2769,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2769 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2786 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
2770 | ext4_ext_dirty(handle, inode, path + depth); | 2787 | ext4_ext_dirty(handle, inode, path + depth); |
2771 | /* zeroed the full extent */ | 2788 | /* zeroed the full extent */ |
2772 | /* blocks available from iblock */ | 2789 | /* blocks available from map->m_lblk */ |
2773 | return allocated; | 2790 | return allocated; |
2774 | 2791 | ||
2775 | } else if (err) | 2792 | } else if (err) |
@@ -2783,11 +2800,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2783 | * update the extent length after successful insert of the | 2800 | * update the extent length after successful insert of the |
2784 | * split extent | 2801 | * split extent |
2785 | */ | 2802 | */ |
2786 | orig_ex.ee_len = cpu_to_le16(ee_len - | 2803 | ee_len -= ext4_ext_get_actual_len(ex3); |
2787 | ext4_ext_get_actual_len(ex3)); | 2804 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2805 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2806 | |||
2788 | depth = newdepth; | 2807 | depth = newdepth; |
2789 | ext4_ext_drop_refs(path); | 2808 | ext4_ext_drop_refs(path); |
2790 | path = ext4_ext_find_extent(inode, iblock, path); | 2809 | path = ext4_ext_find_extent(inode, map->m_lblk, path); |
2791 | if (IS_ERR(path)) { | 2810 | if (IS_ERR(path)) { |
2792 | err = PTR_ERR(path); | 2811 | err = PTR_ERR(path); |
2793 | goto out; | 2812 | goto out; |
@@ -2801,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2801 | if (err) | 2820 | if (err) |
2802 | goto out; | 2821 | goto out; |
2803 | 2822 | ||
2804 | allocated = max_blocks; | 2823 | allocated = map->m_len; |
2805 | 2824 | ||
2806 | /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying | 2825 | /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying |
2807 | * to insert a extent in the middle zerout directly | 2826 | * to insert a extent in the middle zerout directly |
2808 | * otherwise give the extent a chance to merge to left | 2827 | * otherwise give the extent a chance to merge to left |
2809 | */ | 2828 | */ |
2810 | if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && | 2829 | if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && |
2811 | iblock != ee_block) { | 2830 | map->m_lblk != ee_block && may_zeroout) { |
2812 | err = ext4_ext_zeroout(inode, &orig_ex); | 2831 | err = ext4_ext_zeroout(inode, &orig_ex); |
2813 | if (err) | 2832 | if (err) |
2814 | goto fix_extent_len; | 2833 | goto fix_extent_len; |
@@ -2818,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2818 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 2837 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
2819 | ext4_ext_dirty(handle, inode, path + depth); | 2838 | ext4_ext_dirty(handle, inode, path + depth); |
2820 | /* zero out the first half */ | 2839 | /* zero out the first half */ |
2821 | /* blocks available from iblock */ | 2840 | /* blocks available from map->m_lblk */ |
2822 | return allocated; | 2841 | return allocated; |
2823 | } | 2842 | } |
2824 | } | 2843 | } |
@@ -2829,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2829 | */ | 2848 | */ |
2830 | if (ex1 && ex1 != ex) { | 2849 | if (ex1 && ex1 != ex) { |
2831 | ex1 = ex; | 2850 | ex1 = ex; |
2832 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 2851 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
2833 | ext4_ext_mark_uninitialized(ex1); | 2852 | ext4_ext_mark_uninitialized(ex1); |
2834 | ex2 = &newex; | 2853 | ex2 = &newex; |
2835 | } | 2854 | } |
2836 | /* ex2: iblock to iblock + maxblocks-1 : initialised */ | 2855 | /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ |
2837 | ex2->ee_block = cpu_to_le32(iblock); | 2856 | ex2->ee_block = cpu_to_le32(map->m_lblk); |
2838 | ext4_ext_store_pblock(ex2, newblock); | 2857 | ext4_ext_store_pblock(ex2, newblock); |
2839 | ex2->ee_len = cpu_to_le16(allocated); | 2858 | ex2->ee_len = cpu_to_le16(allocated); |
2840 | if (ex2 != ex) | 2859 | if (ex2 != ex) |
@@ -2877,7 +2896,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, | |||
2877 | goto out; | 2896 | goto out; |
2878 | insert: | 2897 | insert: |
2879 | err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); | 2898 | err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); |
2880 | if (err == -ENOSPC) { | 2899 | if (err == -ENOSPC && may_zeroout) { |
2881 | err = ext4_ext_zeroout(inode, &orig_ex); | 2900 | err = ext4_ext_zeroout(inode, &orig_ex); |
2882 | if (err) | 2901 | if (err) |
2883 | goto fix_extent_len; | 2902 | goto fix_extent_len; |
@@ -2904,7 +2923,7 @@ fix_extent_len: | |||
2904 | } | 2923 | } |
2905 | 2924 | ||
2906 | /* | 2925 | /* |
2907 | * This function is called by ext4_ext_get_blocks() from | 2926 | * This function is called by ext4_ext_map_blocks() from |
2908 | * ext4_get_blocks_dio_write() when DIO to write | 2927 | * ext4_get_blocks_dio_write() when DIO to write |
2909 | * to an uninitialized extent. | 2928 | * to an uninitialized extent. |
2910 | * | 2929 | * |
@@ -2927,9 +2946,8 @@ fix_extent_len: | |||
2927 | */ | 2946 | */ |
2928 | static int ext4_split_unwritten_extents(handle_t *handle, | 2947 | static int ext4_split_unwritten_extents(handle_t *handle, |
2929 | struct inode *inode, | 2948 | struct inode *inode, |
2949 | struct ext4_map_blocks *map, | ||
2930 | struct ext4_ext_path *path, | 2950 | struct ext4_ext_path *path, |
2931 | ext4_lblk_t iblock, | ||
2932 | unsigned int max_blocks, | ||
2933 | int flags) | 2951 | int flags) |
2934 | { | 2952 | { |
2935 | struct ext4_extent *ex, newex, orig_ex; | 2953 | struct ext4_extent *ex, newex, orig_ex; |
@@ -2937,41 +2955,55 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2937 | struct ext4_extent *ex2 = NULL; | 2955 | struct ext4_extent *ex2 = NULL; |
2938 | struct ext4_extent *ex3 = NULL; | 2956 | struct ext4_extent *ex3 = NULL; |
2939 | struct ext4_extent_header *eh; | 2957 | struct ext4_extent_header *eh; |
2940 | ext4_lblk_t ee_block; | 2958 | ext4_lblk_t ee_block, eof_block; |
2941 | unsigned int allocated, ee_len, depth; | 2959 | unsigned int allocated, ee_len, depth; |
2942 | ext4_fsblk_t newblock; | 2960 | ext4_fsblk_t newblock; |
2943 | int err = 0; | 2961 | int err = 0; |
2962 | int may_zeroout; | ||
2963 | |||
2964 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" | ||
2965 | "block %llu, max_blocks %u\n", inode->i_ino, | ||
2966 | (unsigned long long)map->m_lblk, map->m_len); | ||
2967 | |||
2968 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | ||
2969 | inode->i_sb->s_blocksize_bits; | ||
2970 | if (eof_block < map->m_lblk + map->m_len) | ||
2971 | eof_block = map->m_lblk + map->m_len; | ||
2944 | 2972 | ||
2945 | ext_debug("ext4_split_unwritten_extents: inode %lu," | ||
2946 | "iblock %llu, max_blocks %u\n", inode->i_ino, | ||
2947 | (unsigned long long)iblock, max_blocks); | ||
2948 | depth = ext_depth(inode); | 2973 | depth = ext_depth(inode); |
2949 | eh = path[depth].p_hdr; | 2974 | eh = path[depth].p_hdr; |
2950 | ex = path[depth].p_ext; | 2975 | ex = path[depth].p_ext; |
2951 | ee_block = le32_to_cpu(ex->ee_block); | 2976 | ee_block = le32_to_cpu(ex->ee_block); |
2952 | ee_len = ext4_ext_get_actual_len(ex); | 2977 | ee_len = ext4_ext_get_actual_len(ex); |
2953 | allocated = ee_len - (iblock - ee_block); | 2978 | allocated = ee_len - (map->m_lblk - ee_block); |
2954 | newblock = iblock - ee_block + ext_pblock(ex); | 2979 | newblock = map->m_lblk - ee_block + ext_pblock(ex); |
2980 | |||
2955 | ex2 = ex; | 2981 | ex2 = ex; |
2956 | orig_ex.ee_block = ex->ee_block; | 2982 | orig_ex.ee_block = ex->ee_block; |
2957 | orig_ex.ee_len = cpu_to_le16(ee_len); | 2983 | orig_ex.ee_len = cpu_to_le16(ee_len); |
2958 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); | 2984 | ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); |
2959 | 2985 | ||
2960 | /* | 2986 | /* |
2987 | * It is safe to convert extent to initialized via explicit | ||
2988 | * zeroout only if extent is fully insde i_size or new_size. | ||
2989 | */ | ||
2990 | may_zeroout = ee_block + ee_len <= eof_block; | ||
2991 | |||
2992 | /* | ||
2961 | * If the uninitialized extent begins at the same logical | 2993 | * If the uninitialized extent begins at the same logical |
2962 | * block where the write begins, and the write completely | 2994 | * block where the write begins, and the write completely |
2963 | * covers the extent, then we don't need to split it. | 2995 | * covers the extent, then we don't need to split it. |
2964 | */ | 2996 | */ |
2965 | if ((iblock == ee_block) && (allocated <= max_blocks)) | 2997 | if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) |
2966 | return allocated; | 2998 | return allocated; |
2967 | 2999 | ||
2968 | err = ext4_ext_get_access(handle, inode, path + depth); | 3000 | err = ext4_ext_get_access(handle, inode, path + depth); |
2969 | if (err) | 3001 | if (err) |
2970 | goto out; | 3002 | goto out; |
2971 | /* ex1: ee_block to iblock - 1 : uninitialized */ | 3003 | /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ |
2972 | if (iblock > ee_block) { | 3004 | if (map->m_lblk > ee_block) { |
2973 | ex1 = ex; | 3005 | ex1 = ex; |
2974 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 3006 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
2975 | ext4_ext_mark_uninitialized(ex1); | 3007 | ext4_ext_mark_uninitialized(ex1); |
2976 | ex2 = &newex; | 3008 | ex2 = &newex; |
2977 | } | 3009 | } |
@@ -2980,18 +3012,18 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
2980 | * we insert ex3, if ex1 is NULL. This is to avoid temporary | 3012 | * we insert ex3, if ex1 is NULL. This is to avoid temporary |
2981 | * overlap of blocks. | 3013 | * overlap of blocks. |
2982 | */ | 3014 | */ |
2983 | if (!ex1 && allocated > max_blocks) | 3015 | if (!ex1 && allocated > map->m_len) |
2984 | ex2->ee_len = cpu_to_le16(max_blocks); | 3016 | ex2->ee_len = cpu_to_le16(map->m_len); |
2985 | /* ex3: to ee_block + ee_len : uninitialised */ | 3017 | /* ex3: to ee_block + ee_len : uninitialised */ |
2986 | if (allocated > max_blocks) { | 3018 | if (allocated > map->m_len) { |
2987 | unsigned int newdepth; | 3019 | unsigned int newdepth; |
2988 | ex3 = &newex; | 3020 | ex3 = &newex; |
2989 | ex3->ee_block = cpu_to_le32(iblock + max_blocks); | 3021 | ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); |
2990 | ext4_ext_store_pblock(ex3, newblock + max_blocks); | 3022 | ext4_ext_store_pblock(ex3, newblock + map->m_len); |
2991 | ex3->ee_len = cpu_to_le16(allocated - max_blocks); | 3023 | ex3->ee_len = cpu_to_le16(allocated - map->m_len); |
2992 | ext4_ext_mark_uninitialized(ex3); | 3024 | ext4_ext_mark_uninitialized(ex3); |
2993 | err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); | 3025 | err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); |
2994 | if (err == -ENOSPC) { | 3026 | if (err == -ENOSPC && may_zeroout) { |
2995 | err = ext4_ext_zeroout(inode, &orig_ex); | 3027 | err = ext4_ext_zeroout(inode, &orig_ex); |
2996 | if (err) | 3028 | if (err) |
2997 | goto fix_extent_len; | 3029 | goto fix_extent_len; |
@@ -3001,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3001 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); | 3033 | ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); |
3002 | ext4_ext_dirty(handle, inode, path + depth); | 3034 | ext4_ext_dirty(handle, inode, path + depth); |
3003 | /* zeroed the full extent */ | 3035 | /* zeroed the full extent */ |
3004 | /* blocks available from iblock */ | 3036 | /* blocks available from map->m_lblk */ |
3005 | return allocated; | 3037 | return allocated; |
3006 | 3038 | ||
3007 | } else if (err) | 3039 | } else if (err) |
@@ -3015,11 +3047,13 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3015 | * update the extent length after successful insert of the | 3047 | * update the extent length after successful insert of the |
3016 | * split extent | 3048 | * split extent |
3017 | */ | 3049 | */ |
3018 | orig_ex.ee_len = cpu_to_le16(ee_len - | 3050 | ee_len -= ext4_ext_get_actual_len(ex3); |
3019 | ext4_ext_get_actual_len(ex3)); | 3051 | orig_ex.ee_len = cpu_to_le16(ee_len); |
3052 | may_zeroout = ee_block + ee_len <= eof_block; | ||
3053 | |||
3020 | depth = newdepth; | 3054 | depth = newdepth; |
3021 | ext4_ext_drop_refs(path); | 3055 | ext4_ext_drop_refs(path); |
3022 | path = ext4_ext_find_extent(inode, iblock, path); | 3056 | path = ext4_ext_find_extent(inode, map->m_lblk, path); |
3023 | if (IS_ERR(path)) { | 3057 | if (IS_ERR(path)) { |
3024 | err = PTR_ERR(path); | 3058 | err = PTR_ERR(path); |
3025 | goto out; | 3059 | goto out; |
@@ -3033,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3033 | if (err) | 3067 | if (err) |
3034 | goto out; | 3068 | goto out; |
3035 | 3069 | ||
3036 | allocated = max_blocks; | 3070 | allocated = map->m_len; |
3037 | } | 3071 | } |
3038 | /* | 3072 | /* |
3039 | * If there was a change of depth as part of the | 3073 | * If there was a change of depth as part of the |
@@ -3042,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3042 | */ | 3076 | */ |
3043 | if (ex1 && ex1 != ex) { | 3077 | if (ex1 && ex1 != ex) { |
3044 | ex1 = ex; | 3078 | ex1 = ex; |
3045 | ex1->ee_len = cpu_to_le16(iblock - ee_block); | 3079 | ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); |
3046 | ext4_ext_mark_uninitialized(ex1); | 3080 | ext4_ext_mark_uninitialized(ex1); |
3047 | ex2 = &newex; | 3081 | ex2 = &newex; |
3048 | } | 3082 | } |
3049 | /* | 3083 | /* |
3050 | * ex2: iblock to iblock + maxblocks-1 : to be direct IO written, | 3084 | * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written |
3051 | * uninitialised still. | 3085 | * using direct I/O, uninitialised still. |
3052 | */ | 3086 | */ |
3053 | ex2->ee_block = cpu_to_le32(iblock); | 3087 | ex2->ee_block = cpu_to_le32(map->m_lblk); |
3054 | ext4_ext_store_pblock(ex2, newblock); | 3088 | ext4_ext_store_pblock(ex2, newblock); |
3055 | ex2->ee_len = cpu_to_le16(allocated); | 3089 | ex2->ee_len = cpu_to_le16(allocated); |
3056 | ext4_ext_mark_uninitialized(ex2); | 3090 | ext4_ext_mark_uninitialized(ex2); |
@@ -3062,7 +3096,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3062 | goto out; | 3096 | goto out; |
3063 | insert: | 3097 | insert: |
3064 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3098 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3065 | if (err == -ENOSPC) { | 3099 | if (err == -ENOSPC && may_zeroout) { |
3066 | err = ext4_ext_zeroout(inode, &orig_ex); | 3100 | err = ext4_ext_zeroout(inode, &orig_ex); |
3067 | if (err) | 3101 | if (err) |
3068 | goto fix_extent_len; | 3102 | goto fix_extent_len; |
@@ -3152,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, | |||
3152 | 3186 | ||
3153 | static int | 3187 | static int |
3154 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3188 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3155 | ext4_lblk_t iblock, unsigned int max_blocks, | 3189 | struct ext4_map_blocks *map, |
3156 | struct ext4_ext_path *path, int flags, | 3190 | struct ext4_ext_path *path, int flags, |
3157 | unsigned int allocated, struct buffer_head *bh_result, | 3191 | unsigned int allocated, ext4_fsblk_t newblock) |
3158 | ext4_fsblk_t newblock) | ||
3159 | { | 3192 | { |
3160 | int ret = 0; | 3193 | int ret = 0; |
3161 | int err = 0; | 3194 | int err = 0; |
@@ -3163,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3163 | 3196 | ||
3164 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" | 3197 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" |
3165 | "block %llu, max_blocks %u, flags %d, allocated %u", | 3198 | "block %llu, max_blocks %u, flags %d, allocated %u", |
3166 | inode->i_ino, (unsigned long long)iblock, max_blocks, | 3199 | inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, |
3167 | flags, allocated); | 3200 | flags, allocated); |
3168 | ext4_ext_show_leaf(inode, path); | 3201 | ext4_ext_show_leaf(inode, path); |
3169 | 3202 | ||
3170 | /* get_block() before submit the IO, split the extent */ | 3203 | /* get_block() before submit the IO, split the extent */ |
3171 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3204 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3172 | ret = ext4_split_unwritten_extents(handle, | 3205 | ret = ext4_split_unwritten_extents(handle, inode, map, |
3173 | inode, path, iblock, | 3206 | path, flags); |
3174 | max_blocks, flags); | ||
3175 | /* | 3207 | /* |
3176 | * Flag the inode(non aio case) or end_io struct (aio case) | 3208 | * Flag the inode(non aio case) or end_io struct (aio case) |
3177 | * that this IO needs to convertion to written when IO is | 3209 | * that this IO needs to convertion to written when IO is |
@@ -3182,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3182 | else | 3214 | else |
3183 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3215 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3184 | if (ext4_should_dioread_nolock(inode)) | 3216 | if (ext4_should_dioread_nolock(inode)) |
3185 | set_buffer_uninit(bh_result); | 3217 | map->m_flags |= EXT4_MAP_UNINIT; |
3186 | goto out; | 3218 | goto out; |
3187 | } | 3219 | } |
3188 | /* IO end_io complete, convert the filled extent to written */ | 3220 | /* IO end_io complete, convert the filled extent to written */ |
@@ -3210,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3210 | * the buffer head will be unmapped so that | 3242 | * the buffer head will be unmapped so that |
3211 | * a read from the block returns 0s. | 3243 | * a read from the block returns 0s. |
3212 | */ | 3244 | */ |
3213 | set_buffer_unwritten(bh_result); | 3245 | map->m_flags |= EXT4_MAP_UNWRITTEN; |
3214 | goto out1; | 3246 | goto out1; |
3215 | } | 3247 | } |
3216 | 3248 | ||
3217 | /* buffered write, writepage time, convert*/ | 3249 | /* buffered write, writepage time, convert*/ |
3218 | ret = ext4_ext_convert_to_initialized(handle, inode, | 3250 | ret = ext4_ext_convert_to_initialized(handle, inode, map, path); |
3219 | path, iblock, | ||
3220 | max_blocks); | ||
3221 | if (ret >= 0) | 3251 | if (ret >= 0) |
3222 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3252 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3223 | out: | 3253 | out: |
@@ -3226,7 +3256,7 @@ out: | |||
3226 | goto out2; | 3256 | goto out2; |
3227 | } else | 3257 | } else |
3228 | allocated = ret; | 3258 | allocated = ret; |
3229 | set_buffer_new(bh_result); | 3259 | map->m_flags |= EXT4_MAP_NEW; |
3230 | /* | 3260 | /* |
3231 | * if we allocated more blocks than requested | 3261 | * if we allocated more blocks than requested |
3232 | * we need to make sure we unmap the extra block | 3262 | * we need to make sure we unmap the extra block |
@@ -3234,11 +3264,11 @@ out: | |||
3234 | * unmapped later when we find the buffer_head marked | 3264 | * unmapped later when we find the buffer_head marked |
3235 | * new. | 3265 | * new. |
3236 | */ | 3266 | */ |
3237 | if (allocated > max_blocks) { | 3267 | if (allocated > map->m_len) { |
3238 | unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, | 3268 | unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, |
3239 | newblock + max_blocks, | 3269 | newblock + map->m_len, |
3240 | allocated - max_blocks); | 3270 | allocated - map->m_len); |
3241 | allocated = max_blocks; | 3271 | allocated = map->m_len; |
3242 | } | 3272 | } |
3243 | 3273 | ||
3244 | /* | 3274 | /* |
@@ -3252,13 +3282,13 @@ out: | |||
3252 | ext4_da_update_reserve_space(inode, allocated, 0); | 3282 | ext4_da_update_reserve_space(inode, allocated, 0); |
3253 | 3283 | ||
3254 | map_out: | 3284 | map_out: |
3255 | set_buffer_mapped(bh_result); | 3285 | map->m_flags |= EXT4_MAP_MAPPED; |
3256 | out1: | 3286 | out1: |
3257 | if (allocated > max_blocks) | 3287 | if (allocated > map->m_len) |
3258 | allocated = max_blocks; | 3288 | allocated = map->m_len; |
3259 | ext4_ext_show_leaf(inode, path); | 3289 | ext4_ext_show_leaf(inode, path); |
3260 | bh_result->b_bdev = inode->i_sb->s_bdev; | 3290 | map->m_pblk = newblock; |
3261 | bh_result->b_blocknr = newblock; | 3291 | map->m_len = allocated; |
3262 | out2: | 3292 | out2: |
3263 | if (path) { | 3293 | if (path) { |
3264 | ext4_ext_drop_refs(path); | 3294 | ext4_ext_drop_refs(path); |
@@ -3284,26 +3314,23 @@ out2: | |||
3284 | * | 3314 | * |
3285 | * return < 0, error case. | 3315 | * return < 0, error case. |
3286 | */ | 3316 | */ |
3287 | int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 3317 | int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
3288 | ext4_lblk_t iblock, | 3318 | struct ext4_map_blocks *map, int flags) |
3289 | unsigned int max_blocks, struct buffer_head *bh_result, | ||
3290 | int flags) | ||
3291 | { | 3319 | { |
3292 | struct ext4_ext_path *path = NULL; | 3320 | struct ext4_ext_path *path = NULL; |
3293 | struct ext4_extent_header *eh; | 3321 | struct ext4_extent_header *eh; |
3294 | struct ext4_extent newex, *ex, *last_ex; | 3322 | struct ext4_extent newex, *ex, *last_ex; |
3295 | ext4_fsblk_t newblock; | 3323 | ext4_fsblk_t newblock; |
3296 | int err = 0, depth, ret, cache_type; | 3324 | int i, err = 0, depth, ret, cache_type; |
3297 | unsigned int allocated = 0; | 3325 | unsigned int allocated = 0; |
3298 | struct ext4_allocation_request ar; | 3326 | struct ext4_allocation_request ar; |
3299 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3327 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3300 | 3328 | ||
3301 | __clear_bit(BH_New, &bh_result->b_state); | ||
3302 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3329 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3303 | iblock, max_blocks, inode->i_ino); | 3330 | map->m_lblk, map->m_len, inode->i_ino); |
3304 | 3331 | ||
3305 | /* check in cache */ | 3332 | /* check in cache */ |
3306 | cache_type = ext4_ext_in_cache(inode, iblock, &newex); | 3333 | cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); |
3307 | if (cache_type) { | 3334 | if (cache_type) { |
3308 | if (cache_type == EXT4_EXT_CACHE_GAP) { | 3335 | if (cache_type == EXT4_EXT_CACHE_GAP) { |
3309 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { | 3336 | if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { |
@@ -3316,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3316 | /* we should allocate requested block */ | 3343 | /* we should allocate requested block */ |
3317 | } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { | 3344 | } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { |
3318 | /* block is already allocated */ | 3345 | /* block is already allocated */ |
3319 | newblock = iblock | 3346 | newblock = map->m_lblk |
3320 | - le32_to_cpu(newex.ee_block) | 3347 | - le32_to_cpu(newex.ee_block) |
3321 | + ext_pblock(&newex); | 3348 | + ext_pblock(&newex); |
3322 | /* number of remaining blocks in the extent */ | 3349 | /* number of remaining blocks in the extent */ |
3323 | allocated = ext4_ext_get_actual_len(&newex) - | 3350 | allocated = ext4_ext_get_actual_len(&newex) - |
3324 | (iblock - le32_to_cpu(newex.ee_block)); | 3351 | (map->m_lblk - le32_to_cpu(newex.ee_block)); |
3325 | goto out; | 3352 | goto out; |
3326 | } else { | 3353 | } else { |
3327 | BUG(); | 3354 | BUG(); |
@@ -3329,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3329 | } | 3356 | } |
3330 | 3357 | ||
3331 | /* find extent for this block */ | 3358 | /* find extent for this block */ |
3332 | path = ext4_ext_find_extent(inode, iblock, NULL); | 3359 | path = ext4_ext_find_extent(inode, map->m_lblk, NULL); |
3333 | if (IS_ERR(path)) { | 3360 | if (IS_ERR(path)) { |
3334 | err = PTR_ERR(path); | 3361 | err = PTR_ERR(path); |
3335 | path = NULL; | 3362 | path = NULL; |
@@ -3345,8 +3372,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3345 | */ | 3372 | */ |
3346 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { | 3373 | if (unlikely(path[depth].p_ext == NULL && depth != 0)) { |
3347 | EXT4_ERROR_INODE(inode, "bad extent address " | 3374 | EXT4_ERROR_INODE(inode, "bad extent address " |
3348 | "iblock: %d, depth: %d pblock %lld", | 3375 | "lblock: %lu, depth: %d pblock %lld", |
3349 | iblock, depth, path[depth].p_block); | 3376 | (unsigned long) map->m_lblk, depth, |
3377 | path[depth].p_block); | ||
3350 | err = -EIO; | 3378 | err = -EIO; |
3351 | goto out2; | 3379 | goto out2; |
3352 | } | 3380 | } |
@@ -3364,12 +3392,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3364 | */ | 3392 | */ |
3365 | ee_len = ext4_ext_get_actual_len(ex); | 3393 | ee_len = ext4_ext_get_actual_len(ex); |
3366 | /* if found extent covers block, simply return it */ | 3394 | /* if found extent covers block, simply return it */ |
3367 | if (in_range(iblock, ee_block, ee_len)) { | 3395 | if (in_range(map->m_lblk, ee_block, ee_len)) { |
3368 | newblock = iblock - ee_block + ee_start; | 3396 | newblock = map->m_lblk - ee_block + ee_start; |
3369 | /* number of remaining blocks in the extent */ | 3397 | /* number of remaining blocks in the extent */ |
3370 | allocated = ee_len - (iblock - ee_block); | 3398 | allocated = ee_len - (map->m_lblk - ee_block); |
3371 | ext_debug("%u fit into %u:%d -> %llu\n", iblock, | 3399 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
3372 | ee_block, ee_len, newblock); | 3400 | ee_block, ee_len, newblock); |
3373 | 3401 | ||
3374 | /* Do not put uninitialized extent in the cache */ | 3402 | /* Do not put uninitialized extent in the cache */ |
3375 | if (!ext4_ext_is_uninitialized(ex)) { | 3403 | if (!ext4_ext_is_uninitialized(ex)) { |
@@ -3379,8 +3407,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3379 | goto out; | 3407 | goto out; |
3380 | } | 3408 | } |
3381 | ret = ext4_ext_handle_uninitialized_extents(handle, | 3409 | ret = ext4_ext_handle_uninitialized_extents(handle, |
3382 | inode, iblock, max_blocks, path, | 3410 | inode, map, path, flags, allocated, |
3383 | flags, allocated, bh_result, newblock); | 3411 | newblock); |
3384 | return ret; | 3412 | return ret; |
3385 | } | 3413 | } |
3386 | } | 3414 | } |
@@ -3394,7 +3422,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3394 | * put just found gap into cache to speed up | 3422 | * put just found gap into cache to speed up |
3395 | * subsequent requests | 3423 | * subsequent requests |
3396 | */ | 3424 | */ |
3397 | ext4_ext_put_gap_in_cache(inode, path, iblock); | 3425 | ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); |
3398 | goto out2; | 3426 | goto out2; |
3399 | } | 3427 | } |
3400 | /* | 3428 | /* |
@@ -3402,11 +3430,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3402 | */ | 3430 | */ |
3403 | 3431 | ||
3404 | /* find neighbour allocated blocks */ | 3432 | /* find neighbour allocated blocks */ |
3405 | ar.lleft = iblock; | 3433 | ar.lleft = map->m_lblk; |
3406 | err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); | 3434 | err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); |
3407 | if (err) | 3435 | if (err) |
3408 | goto out2; | 3436 | goto out2; |
3409 | ar.lright = iblock; | 3437 | ar.lright = map->m_lblk; |
3410 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); | 3438 | err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); |
3411 | if (err) | 3439 | if (err) |
3412 | goto out2; | 3440 | goto out2; |
@@ -3417,26 +3445,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3417 | * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is | 3445 | * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is |
3418 | * EXT_UNINIT_MAX_LEN. | 3446 | * EXT_UNINIT_MAX_LEN. |
3419 | */ | 3447 | */ |
3420 | if (max_blocks > EXT_INIT_MAX_LEN && | 3448 | if (map->m_len > EXT_INIT_MAX_LEN && |
3421 | !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) | 3449 | !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
3422 | max_blocks = EXT_INIT_MAX_LEN; | 3450 | map->m_len = EXT_INIT_MAX_LEN; |
3423 | else if (max_blocks > EXT_UNINIT_MAX_LEN && | 3451 | else if (map->m_len > EXT_UNINIT_MAX_LEN && |
3424 | (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) | 3452 | (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) |
3425 | max_blocks = EXT_UNINIT_MAX_LEN; | 3453 | map->m_len = EXT_UNINIT_MAX_LEN; |
3426 | 3454 | ||
3427 | /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ | 3455 | /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ |
3428 | newex.ee_block = cpu_to_le32(iblock); | 3456 | newex.ee_block = cpu_to_le32(map->m_lblk); |
3429 | newex.ee_len = cpu_to_le16(max_blocks); | 3457 | newex.ee_len = cpu_to_le16(map->m_len); |
3430 | err = ext4_ext_check_overlap(inode, &newex, path); | 3458 | err = ext4_ext_check_overlap(inode, &newex, path); |
3431 | if (err) | 3459 | if (err) |
3432 | allocated = ext4_ext_get_actual_len(&newex); | 3460 | allocated = ext4_ext_get_actual_len(&newex); |
3433 | else | 3461 | else |
3434 | allocated = max_blocks; | 3462 | allocated = map->m_len; |
3435 | 3463 | ||
3436 | /* allocate new block */ | 3464 | /* allocate new block */ |
3437 | ar.inode = inode; | 3465 | ar.inode = inode; |
3438 | ar.goal = ext4_ext_find_goal(inode, path, iblock); | 3466 | ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); |
3439 | ar.logical = iblock; | 3467 | ar.logical = map->m_lblk; |
3440 | ar.len = allocated; | 3468 | ar.len = allocated; |
3441 | if (S_ISREG(inode->i_mode)) | 3469 | if (S_ISREG(inode->i_mode)) |
3442 | ar.flags = EXT4_MB_HINT_DATA; | 3470 | ar.flags = EXT4_MB_HINT_DATA; |
@@ -3470,21 +3498,33 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3470 | EXT4_STATE_DIO_UNWRITTEN); | 3498 | EXT4_STATE_DIO_UNWRITTEN); |
3471 | } | 3499 | } |
3472 | if (ext4_should_dioread_nolock(inode)) | 3500 | if (ext4_should_dioread_nolock(inode)) |
3473 | set_buffer_uninit(bh_result); | 3501 | map->m_flags |= EXT4_MAP_UNINIT; |
3474 | } | 3502 | } |
3475 | 3503 | ||
3476 | if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { | 3504 | if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { |
3477 | if (unlikely(!eh->eh_entries)) { | 3505 | if (unlikely(!eh->eh_entries)) { |
3478 | EXT4_ERROR_INODE(inode, | 3506 | EXT4_ERROR_INODE(inode, |
3479 | "eh->eh_entries == 0 ee_block %d", | 3507 | "eh->eh_entries == 0 and " |
3480 | ex->ee_block); | 3508 | "EOFBLOCKS_FL set"); |
3481 | err = -EIO; | 3509 | err = -EIO; |
3482 | goto out2; | 3510 | goto out2; |
3483 | } | 3511 | } |
3484 | last_ex = EXT_LAST_EXTENT(eh); | 3512 | last_ex = EXT_LAST_EXTENT(eh); |
3485 | if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) | 3513 | /* |
3486 | + ext4_ext_get_actual_len(last_ex)) | 3514 | * If the current leaf block was reached by looking at |
3487 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | 3515 | * the last index block all the way down the tree, and |
3516 | * we are extending the inode beyond the last extent | ||
3517 | * in the current leaf block, then clear the | ||
3518 | * EOFBLOCKS_FL flag. | ||
3519 | */ | ||
3520 | for (i = depth-1; i >= 0; i--) { | ||
3521 | if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) | ||
3522 | break; | ||
3523 | } | ||
3524 | if ((i < 0) && | ||
3525 | (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + | ||
3526 | ext4_ext_get_actual_len(last_ex))) | ||
3527 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
3488 | } | 3528 | } |
3489 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); | 3529 | err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); |
3490 | if (err) { | 3530 | if (err) { |
@@ -3500,9 +3540,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3500 | /* previous routine could use block we allocated */ | 3540 | /* previous routine could use block we allocated */ |
3501 | newblock = ext_pblock(&newex); | 3541 | newblock = ext_pblock(&newex); |
3502 | allocated = ext4_ext_get_actual_len(&newex); | 3542 | allocated = ext4_ext_get_actual_len(&newex); |
3503 | if (allocated > max_blocks) | 3543 | if (allocated > map->m_len) |
3504 | allocated = max_blocks; | 3544 | allocated = map->m_len; |
3505 | set_buffer_new(bh_result); | 3545 | map->m_flags |= EXT4_MAP_NEW; |
3506 | 3546 | ||
3507 | /* | 3547 | /* |
3508 | * Update reserved blocks/metadata blocks after successful | 3548 | * Update reserved blocks/metadata blocks after successful |
@@ -3516,18 +3556,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
3516 | * when it is _not_ an uninitialized extent. | 3556 | * when it is _not_ an uninitialized extent. |
3517 | */ | 3557 | */ |
3518 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { | 3558 | if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { |
3519 | ext4_ext_put_in_cache(inode, iblock, allocated, newblock, | 3559 | ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, |
3520 | EXT4_EXT_CACHE_EXTENT); | 3560 | EXT4_EXT_CACHE_EXTENT); |
3521 | ext4_update_inode_fsync_trans(handle, inode, 1); | 3561 | ext4_update_inode_fsync_trans(handle, inode, 1); |
3522 | } else | 3562 | } else |
3523 | ext4_update_inode_fsync_trans(handle, inode, 0); | 3563 | ext4_update_inode_fsync_trans(handle, inode, 0); |
3524 | out: | 3564 | out: |
3525 | if (allocated > max_blocks) | 3565 | if (allocated > map->m_len) |
3526 | allocated = max_blocks; | 3566 | allocated = map->m_len; |
3527 | ext4_ext_show_leaf(inode, path); | 3567 | ext4_ext_show_leaf(inode, path); |
3528 | set_buffer_mapped(bh_result); | 3568 | map->m_flags |= EXT4_MAP_MAPPED; |
3529 | bh_result->b_bdev = inode->i_sb->s_bdev; | 3569 | map->m_pblk = newblock; |
3530 | bh_result->b_blocknr = newblock; | 3570 | map->m_len = allocated; |
3531 | out2: | 3571 | out2: |
3532 | if (path) { | 3572 | if (path) { |
3533 | ext4_ext_drop_refs(path); | 3573 | ext4_ext_drop_refs(path); |
@@ -3625,7 +3665,7 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
3625 | * can proceed even if the new size is the same as i_size. | 3665 | * can proceed even if the new size is the same as i_size. |
3626 | */ | 3666 | */ |
3627 | if (new_size > i_size_read(inode)) | 3667 | if (new_size > i_size_read(inode)) |
3628 | EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; | 3668 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
3629 | } | 3669 | } |
3630 | 3670 | ||
3631 | } | 3671 | } |
@@ -3640,55 +3680,57 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
3640 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | 3680 | long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) |
3641 | { | 3681 | { |
3642 | handle_t *handle; | 3682 | handle_t *handle; |
3643 | ext4_lblk_t block; | ||
3644 | loff_t new_size; | 3683 | loff_t new_size; |
3645 | unsigned int max_blocks; | 3684 | unsigned int max_blocks; |
3646 | int ret = 0; | 3685 | int ret = 0; |
3647 | int ret2 = 0; | 3686 | int ret2 = 0; |
3648 | int retries = 0; | 3687 | int retries = 0; |
3649 | struct buffer_head map_bh; | 3688 | struct ext4_map_blocks map; |
3650 | unsigned int credits, blkbits = inode->i_blkbits; | 3689 | unsigned int credits, blkbits = inode->i_blkbits; |
3651 | 3690 | ||
3652 | /* | 3691 | /* |
3653 | * currently supporting (pre)allocate mode for extent-based | 3692 | * currently supporting (pre)allocate mode for extent-based |
3654 | * files _only_ | 3693 | * files _only_ |
3655 | */ | 3694 | */ |
3656 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 3695 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3657 | return -EOPNOTSUPP; | 3696 | return -EOPNOTSUPP; |
3658 | 3697 | ||
3659 | /* preallocation to directories is currently not supported */ | 3698 | /* preallocation to directories is currently not supported */ |
3660 | if (S_ISDIR(inode->i_mode)) | 3699 | if (S_ISDIR(inode->i_mode)) |
3661 | return -ENODEV; | 3700 | return -ENODEV; |
3662 | 3701 | ||
3663 | block = offset >> blkbits; | 3702 | map.m_lblk = offset >> blkbits; |
3664 | /* | 3703 | /* |
3665 | * We can't just convert len to max_blocks because | 3704 | * We can't just convert len to max_blocks because |
3666 | * If blocksize = 4096 offset = 3072 and len = 2048 | 3705 | * If blocksize = 4096 offset = 3072 and len = 2048 |
3667 | */ | 3706 | */ |
3668 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 3707 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
3669 | - block; | 3708 | - map.m_lblk; |
3670 | /* | 3709 | /* |
3671 | * credits to insert 1 extent into extent tree | 3710 | * credits to insert 1 extent into extent tree |
3672 | */ | 3711 | */ |
3673 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3712 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
3674 | mutex_lock(&inode->i_mutex); | 3713 | mutex_lock(&inode->i_mutex); |
3714 | ret = inode_newsize_ok(inode, (len + offset)); | ||
3715 | if (ret) { | ||
3716 | mutex_unlock(&inode->i_mutex); | ||
3717 | return ret; | ||
3718 | } | ||
3675 | retry: | 3719 | retry: |
3676 | while (ret >= 0 && ret < max_blocks) { | 3720 | while (ret >= 0 && ret < max_blocks) { |
3677 | block = block + ret; | 3721 | map.m_lblk = map.m_lblk + ret; |
3678 | max_blocks = max_blocks - ret; | 3722 | map.m_len = max_blocks = max_blocks - ret; |
3679 | handle = ext4_journal_start(inode, credits); | 3723 | handle = ext4_journal_start(inode, credits); |
3680 | if (IS_ERR(handle)) { | 3724 | if (IS_ERR(handle)) { |
3681 | ret = PTR_ERR(handle); | 3725 | ret = PTR_ERR(handle); |
3682 | break; | 3726 | break; |
3683 | } | 3727 | } |
3684 | map_bh.b_state = 0; | 3728 | ret = ext4_map_blocks(handle, inode, &map, |
3685 | ret = ext4_get_blocks(handle, inode, block, | ||
3686 | max_blocks, &map_bh, | ||
3687 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); | 3729 | EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); |
3688 | if (ret <= 0) { | 3730 | if (ret <= 0) { |
3689 | #ifdef EXT4FS_DEBUG | 3731 | #ifdef EXT4FS_DEBUG |
3690 | WARN_ON(ret <= 0); | 3732 | WARN_ON(ret <= 0); |
3691 | printk(KERN_ERR "%s: ext4_ext_get_blocks " | 3733 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3692 | "returned error inode#%lu, block=%u, " | 3734 | "returned error inode#%lu, block=%u, " |
3693 | "max_blocks=%u", __func__, | 3735 | "max_blocks=%u", __func__, |
3694 | inode->i_ino, block, max_blocks); | 3736 | inode->i_ino, block, max_blocks); |
@@ -3697,14 +3739,14 @@ retry: | |||
3697 | ret2 = ext4_journal_stop(handle); | 3739 | ret2 = ext4_journal_stop(handle); |
3698 | break; | 3740 | break; |
3699 | } | 3741 | } |
3700 | if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, | 3742 | if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, |
3701 | blkbits) >> blkbits)) | 3743 | blkbits) >> blkbits)) |
3702 | new_size = offset + len; | 3744 | new_size = offset + len; |
3703 | else | 3745 | else |
3704 | new_size = (block + ret) << blkbits; | 3746 | new_size = (map.m_lblk + ret) << blkbits; |
3705 | 3747 | ||
3706 | ext4_falloc_update_inode(inode, mode, new_size, | 3748 | ext4_falloc_update_inode(inode, mode, new_size, |
3707 | buffer_new(&map_bh)); | 3749 | (map.m_flags & EXT4_MAP_NEW)); |
3708 | ext4_mark_inode_dirty(handle, inode); | 3750 | ext4_mark_inode_dirty(handle, inode); |
3709 | ret2 = ext4_journal_stop(handle); | 3751 | ret2 = ext4_journal_stop(handle); |
3710 | if (ret2) | 3752 | if (ret2) |
@@ -3733,42 +3775,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3733 | ssize_t len) | 3775 | ssize_t len) |
3734 | { | 3776 | { |
3735 | handle_t *handle; | 3777 | handle_t *handle; |
3736 | ext4_lblk_t block; | ||
3737 | unsigned int max_blocks; | 3778 | unsigned int max_blocks; |
3738 | int ret = 0; | 3779 | int ret = 0; |
3739 | int ret2 = 0; | 3780 | int ret2 = 0; |
3740 | struct buffer_head map_bh; | 3781 | struct ext4_map_blocks map; |
3741 | unsigned int credits, blkbits = inode->i_blkbits; | 3782 | unsigned int credits, blkbits = inode->i_blkbits; |
3742 | 3783 | ||
3743 | block = offset >> blkbits; | 3784 | map.m_lblk = offset >> blkbits; |
3744 | /* | 3785 | /* |
3745 | * We can't just convert len to max_blocks because | 3786 | * We can't just convert len to max_blocks because |
3746 | * If blocksize = 4096 offset = 3072 and len = 2048 | 3787 | * If blocksize = 4096 offset = 3072 and len = 2048 |
3747 | */ | 3788 | */ |
3748 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 3789 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - |
3749 | - block; | 3790 | map.m_lblk); |
3750 | /* | 3791 | /* |
3751 | * credits to insert 1 extent into extent tree | 3792 | * credits to insert 1 extent into extent tree |
3752 | */ | 3793 | */ |
3753 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 3794 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
3754 | while (ret >= 0 && ret < max_blocks) { | 3795 | while (ret >= 0 && ret < max_blocks) { |
3755 | block = block + ret; | 3796 | map.m_lblk += ret; |
3756 | max_blocks = max_blocks - ret; | 3797 | map.m_len = (max_blocks -= ret); |
3757 | handle = ext4_journal_start(inode, credits); | 3798 | handle = ext4_journal_start(inode, credits); |
3758 | if (IS_ERR(handle)) { | 3799 | if (IS_ERR(handle)) { |
3759 | ret = PTR_ERR(handle); | 3800 | ret = PTR_ERR(handle); |
3760 | break; | 3801 | break; |
3761 | } | 3802 | } |
3762 | map_bh.b_state = 0; | 3803 | ret = ext4_map_blocks(handle, inode, &map, |
3763 | ret = ext4_get_blocks(handle, inode, block, | ||
3764 | max_blocks, &map_bh, | ||
3765 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 3804 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
3766 | if (ret <= 0) { | 3805 | if (ret <= 0) { |
3767 | WARN_ON(ret <= 0); | 3806 | WARN_ON(ret <= 0); |
3768 | printk(KERN_ERR "%s: ext4_ext_get_blocks " | 3807 | printk(KERN_ERR "%s: ext4_ext_map_blocks " |
3769 | "returned error inode#%lu, block=%u, " | 3808 | "returned error inode#%lu, block=%u, " |
3770 | "max_blocks=%u", __func__, | 3809 | "max_blocks=%u", __func__, |
3771 | inode->i_ino, block, max_blocks); | 3810 | inode->i_ino, map.m_lblk, map.m_len); |
3772 | } | 3811 | } |
3773 | ext4_mark_inode_dirty(handle, inode); | 3812 | ext4_mark_inode_dirty(handle, inode); |
3774 | ret2 = ext4_journal_stop(handle); | 3813 | ret2 = ext4_journal_stop(handle); |
@@ -3898,7 +3937,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3898 | int error = 0; | 3937 | int error = 0; |
3899 | 3938 | ||
3900 | /* fallback to generic here if not in extents fmt */ | 3939 | /* fallback to generic here if not in extents fmt */ |
3901 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 3940 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3902 | return generic_block_fiemap(inode, fieinfo, start, len, | 3941 | return generic_block_fiemap(inode, fieinfo, start, len, |
3903 | ext4_get_block); | 3942 | ext4_get_block); |
3904 | 3943 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d0776e410f34..5313ae4cda2d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -66,7 +66,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
66 | * is smaller than s_maxbytes, which is for extent-mapped files. | 66 | * is smaller than s_maxbytes, which is for extent-mapped files. |
67 | */ | 67 | */ |
68 | 68 | ||
69 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { | 69 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 70 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
71 | size_t length = iov_length(iov, nr_segs); | 71 | size_t length = iov_length(iov, nr_segs); |
72 | 72 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index ef3d980e67cb..b6a74f991bf4 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -35,6 +35,29 @@ | |||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * If we're not journaling and this is a just-created file, we have to | ||
39 | * sync our parent directory (if it was freshly created) since | ||
40 | * otherwise it will only be written by writeback, leaving a huge | ||
41 | * window during which a crash may lose the file. This may apply for | ||
42 | * the parent directory's parent as well, and so on recursively, if | ||
43 | * they are also freshly created. | ||
44 | */ | ||
45 | static void ext4_sync_parent(struct inode *inode) | ||
46 | { | ||
47 | struct dentry *dentry = NULL; | ||
48 | |||
49 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { | ||
50 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); | ||
51 | dentry = list_entry(inode->i_dentry.next, | ||
52 | struct dentry, d_alias); | ||
53 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) | ||
54 | break; | ||
55 | inode = dentry->d_parent->d_inode; | ||
56 | sync_mapping_buffers(inode->i_mapping); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | /* | ||
38 | * akpm: A new design for ext4_sync_file(). | 61 | * akpm: A new design for ext4_sync_file(). |
39 | * | 62 | * |
40 | * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). | 63 | * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). |
@@ -66,9 +89,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
66 | ret = flush_completed_IO(inode); | 89 | ret = flush_completed_IO(inode); |
67 | if (ret < 0) | 90 | if (ret < 0) |
68 | return ret; | 91 | return ret; |
69 | 92 | ||
70 | if (!journal) | 93 | if (!journal) { |
71 | return simple_fsync(file, dentry, datasync); | 94 | ret = simple_fsync(file, dentry, datasync); |
95 | if (!ret && !list_empty(&inode->i_dentry)) | ||
96 | ext4_sync_parent(inode); | ||
97 | return ret; | ||
98 | } | ||
72 | 99 | ||
73 | /* | 100 | /* |
74 | * data=writeback,ordered: | 101 | * data=writeback,ordered: |
@@ -102,7 +129,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
102 | (journal->j_flags & JBD2_BARRIER)) | 129 | (journal->j_flags & JBD2_BARRIER)) |
103 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | 130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
104 | NULL, BLKDEV_IFL_WAIT); | 131 | NULL, BLKDEV_IFL_WAIT); |
105 | jbd2_log_wait_commit(journal, commit_tid); | 132 | ret = jbd2_log_wait_commit(journal, commit_tid); |
106 | } else if (journal->j_flags & JBD2_BARRIER) | 133 | } else if (journal->j_flags & JBD2_BARRIER) |
107 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, |
108 | BLKDEV_IFL_WAIT); | 135 | BLKDEV_IFL_WAIT); |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 1a0e183a2f04..25c4b3173fd9 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -240,56 +240,49 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
240 | if (fatal) | 240 | if (fatal) |
241 | goto error_return; | 241 | goto error_return; |
242 | 242 | ||
243 | /* Ok, now we can actually update the inode bitmaps.. */ | 243 | fatal = -ESRCH; |
244 | cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), | 244 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
245 | bit, bitmap_bh->b_data); | 245 | if (gdp) { |
246 | if (!cleared) | ||
247 | ext4_error(sb, "bit already cleared for inode %lu", ino); | ||
248 | else { | ||
249 | gdp = ext4_get_group_desc(sb, block_group, &bh2); | ||
250 | |||
251 | BUFFER_TRACE(bh2, "get_write_access"); | 246 | BUFFER_TRACE(bh2, "get_write_access"); |
252 | fatal = ext4_journal_get_write_access(handle, bh2); | 247 | fatal = ext4_journal_get_write_access(handle, bh2); |
253 | if (fatal) goto error_return; | 248 | } |
254 | 249 | ext4_lock_group(sb, block_group); | |
255 | if (gdp) { | 250 | cleared = ext4_clear_bit(bit, bitmap_bh->b_data); |
256 | ext4_lock_group(sb, block_group); | 251 | if (fatal || !cleared) { |
257 | count = ext4_free_inodes_count(sb, gdp) + 1; | 252 | ext4_unlock_group(sb, block_group); |
258 | ext4_free_inodes_set(sb, gdp, count); | 253 | goto out; |
259 | if (is_directory) { | 254 | } |
260 | count = ext4_used_dirs_count(sb, gdp) - 1; | ||
261 | ext4_used_dirs_set(sb, gdp, count); | ||
262 | if (sbi->s_log_groups_per_flex) { | ||
263 | ext4_group_t f; | ||
264 | |||
265 | f = ext4_flex_group(sbi, block_group); | ||
266 | atomic_dec(&sbi->s_flex_groups[f].used_dirs); | ||
267 | } | ||
268 | 255 | ||
269 | } | 256 | count = ext4_free_inodes_count(sb, gdp) + 1; |
270 | gdp->bg_checksum = ext4_group_desc_csum(sbi, | 257 | ext4_free_inodes_set(sb, gdp, count); |
271 | block_group, gdp); | 258 | if (is_directory) { |
272 | ext4_unlock_group(sb, block_group); | 259 | count = ext4_used_dirs_count(sb, gdp) - 1; |
273 | percpu_counter_inc(&sbi->s_freeinodes_counter); | 260 | ext4_used_dirs_set(sb, gdp, count); |
274 | if (is_directory) | 261 | percpu_counter_dec(&sbi->s_dirs_counter); |
275 | percpu_counter_dec(&sbi->s_dirs_counter); | ||
276 | |||
277 | if (sbi->s_log_groups_per_flex) { | ||
278 | ext4_group_t f; | ||
279 | |||
280 | f = ext4_flex_group(sbi, block_group); | ||
281 | atomic_inc(&sbi->s_flex_groups[f].free_inodes); | ||
282 | } | ||
283 | } | ||
284 | BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); | ||
285 | err = ext4_handle_dirty_metadata(handle, NULL, bh2); | ||
286 | if (!fatal) fatal = err; | ||
287 | } | 262 | } |
288 | BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); | 263 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
289 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 264 | ext4_unlock_group(sb, block_group); |
290 | if (!fatal) | 265 | |
291 | fatal = err; | 266 | percpu_counter_inc(&sbi->s_freeinodes_counter); |
292 | sb->s_dirt = 1; | 267 | if (sbi->s_log_groups_per_flex) { |
268 | ext4_group_t f = ext4_flex_group(sbi, block_group); | ||
269 | |||
270 | atomic_inc(&sbi->s_flex_groups[f].free_inodes); | ||
271 | if (is_directory) | ||
272 | atomic_dec(&sbi->s_flex_groups[f].used_dirs); | ||
273 | } | ||
274 | BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); | ||
275 | fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); | ||
276 | out: | ||
277 | if (cleared) { | ||
278 | BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); | ||
279 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
280 | if (!fatal) | ||
281 | fatal = err; | ||
282 | sb->s_dirt = 1; | ||
283 | } else | ||
284 | ext4_error(sb, "bit already cleared for inode %lu", ino); | ||
285 | |||
293 | error_return: | 286 | error_return: |
294 | brelse(bitmap_bh); | 287 | brelse(bitmap_bh); |
295 | ext4_std_error(sb, fatal); | 288 | ext4_std_error(sb, fatal); |
@@ -499,7 +492,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
499 | 492 | ||
500 | if (S_ISDIR(mode) && | 493 | if (S_ISDIR(mode) && |
501 | ((parent == sb->s_root->d_inode) || | 494 | ((parent == sb->s_root->d_inode) || |
502 | (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) { | 495 | (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) { |
503 | int best_ndir = inodes_per_group; | 496 | int best_ndir = inodes_per_group; |
504 | int ret = -1; | 497 | int ret = -1; |
505 | 498 | ||
@@ -1041,7 +1034,7 @@ got: | |||
1041 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { | 1034 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { |
1042 | /* set extent flag only for directory, file and normal symlink*/ | 1035 | /* set extent flag only for directory, file and normal symlink*/ |
1043 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { | 1036 | if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) { |
1044 | EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; | 1037 | ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); |
1045 | ext4_ext_tree_init(handle, inode); | 1038 | ext4_ext_tree_init(handle, inode); |
1046 | } | 1039 | } |
1047 | } | 1040 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3e0f6af9d08d..19df61c321fd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -149,7 +149,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
149 | int ret; | 149 | int ret; |
150 | 150 | ||
151 | /* | 151 | /* |
152 | * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this | 152 | * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this |
153 | * moment, get_block can be called only for blocks inside i_size since | 153 | * moment, get_block can be called only for blocks inside i_size since |
154 | * page cache has been already dropped and writes are blocked by | 154 | * page cache has been already dropped and writes are blocked by |
155 | * i_mutex. So we can safely drop the i_data_sem here. | 155 | * i_mutex. So we can safely drop the i_data_sem here. |
@@ -348,9 +348,8 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, | |||
348 | if (blk && | 348 | if (blk && |
349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 349 | unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
350 | blk, 1))) { | 350 | blk, 1))) { |
351 | __ext4_error(inode->i_sb, function, | 351 | ext4_error_inode(function, inode, |
352 | "invalid block reference %u " | 352 | "invalid block reference %u", blk); |
353 | "in inode #%lu", blk, inode->i_ino); | ||
354 | return -EIO; | 353 | return -EIO; |
355 | } | 354 | } |
356 | } | 355 | } |
@@ -785,7 +784,7 @@ failed: | |||
785 | /* Allocation failed, free what we already allocated */ | 784 | /* Allocation failed, free what we already allocated */ |
786 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 785 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); |
787 | for (i = 1; i <= n ; i++) { | 786 | for (i = 1; i <= n ; i++) { |
788 | /* | 787 | /* |
789 | * branch[i].bh is newly allocated, so there is no | 788 | * branch[i].bh is newly allocated, so there is no |
790 | * need to revoke the block, which is why we don't | 789 | * need to revoke the block, which is why we don't |
791 | * need to set EXT4_FREE_BLOCKS_METADATA. | 790 | * need to set EXT4_FREE_BLOCKS_METADATA. |
@@ -875,7 +874,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
875 | 874 | ||
876 | err_out: | 875 | err_out: |
877 | for (i = 1; i <= num; i++) { | 876 | for (i = 1; i <= num; i++) { |
878 | /* | 877 | /* |
879 | * branch[i].bh is newly allocated, so there is no | 878 | * branch[i].bh is newly allocated, so there is no |
880 | * need to revoke the block, which is why we don't | 879 | * need to revoke the block, which is why we don't |
881 | * need to set EXT4_FREE_BLOCKS_METADATA. | 880 | * need to set EXT4_FREE_BLOCKS_METADATA. |
@@ -890,9 +889,9 @@ err_out: | |||
890 | } | 889 | } |
891 | 890 | ||
892 | /* | 891 | /* |
893 | * The ext4_ind_get_blocks() function handles non-extents inodes | 892 | * The ext4_ind_map_blocks() function handles non-extents inodes |
894 | * (i.e., using the traditional indirect/double-indirect i_blocks | 893 | * (i.e., using the traditional indirect/double-indirect i_blocks |
895 | * scheme) for ext4_get_blocks(). | 894 | * scheme) for ext4_map_blocks(). |
896 | * | 895 | * |
897 | * Allocation strategy is simple: if we have to allocate something, we will | 896 | * Allocation strategy is simple: if we have to allocate something, we will |
898 | * have to go the whole way to leaf. So let's do it before attaching anything | 897 | * have to go the whole way to leaf. So let's do it before attaching anything |
@@ -917,9 +916,8 @@ err_out: | |||
917 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system | 916 | * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system |
918 | * blocks. | 917 | * blocks. |
919 | */ | 918 | */ |
920 | static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | 919 | static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, |
921 | ext4_lblk_t iblock, unsigned int maxblocks, | 920 | struct ext4_map_blocks *map, |
922 | struct buffer_head *bh_result, | ||
923 | int flags) | 921 | int flags) |
924 | { | 922 | { |
925 | int err = -EIO; | 923 | int err = -EIO; |
@@ -933,9 +931,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
933 | int count = 0; | 931 | int count = 0; |
934 | ext4_fsblk_t first_block = 0; | 932 | ext4_fsblk_t first_block = 0; |
935 | 933 | ||
936 | J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); | 934 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
937 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 935 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
938 | depth = ext4_block_to_path(inode, iblock, offsets, | 936 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
939 | &blocks_to_boundary); | 937 | &blocks_to_boundary); |
940 | 938 | ||
941 | if (depth == 0) | 939 | if (depth == 0) |
@@ -946,10 +944,9 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
946 | /* Simplest case - block found, no allocation needed */ | 944 | /* Simplest case - block found, no allocation needed */ |
947 | if (!partial) { | 945 | if (!partial) { |
948 | first_block = le32_to_cpu(chain[depth - 1].key); | 946 | first_block = le32_to_cpu(chain[depth - 1].key); |
949 | clear_buffer_new(bh_result); | ||
950 | count++; | 947 | count++; |
951 | /*map more blocks*/ | 948 | /*map more blocks*/ |
952 | while (count < maxblocks && count <= blocks_to_boundary) { | 949 | while (count < map->m_len && count <= blocks_to_boundary) { |
953 | ext4_fsblk_t blk; | 950 | ext4_fsblk_t blk; |
954 | 951 | ||
955 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 952 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
@@ -969,7 +966,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
969 | /* | 966 | /* |
970 | * Okay, we need to do block allocation. | 967 | * Okay, we need to do block allocation. |
971 | */ | 968 | */ |
972 | goal = ext4_find_goal(inode, iblock, partial); | 969 | goal = ext4_find_goal(inode, map->m_lblk, partial); |
973 | 970 | ||
974 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 971 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
975 | indirect_blks = (chain + depth) - partial - 1; | 972 | indirect_blks = (chain + depth) - partial - 1; |
@@ -979,11 +976,11 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
979 | * direct blocks to allocate for this branch. | 976 | * direct blocks to allocate for this branch. |
980 | */ | 977 | */ |
981 | count = ext4_blks_to_allocate(partial, indirect_blks, | 978 | count = ext4_blks_to_allocate(partial, indirect_blks, |
982 | maxblocks, blocks_to_boundary); | 979 | map->m_len, blocks_to_boundary); |
983 | /* | 980 | /* |
984 | * Block out ext4_truncate while we alter the tree | 981 | * Block out ext4_truncate while we alter the tree |
985 | */ | 982 | */ |
986 | err = ext4_alloc_branch(handle, inode, iblock, indirect_blks, | 983 | err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, |
987 | &count, goal, | 984 | &count, goal, |
988 | offsets + (partial - chain), partial); | 985 | offsets + (partial - chain), partial); |
989 | 986 | ||
@@ -995,18 +992,20 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, | |||
995 | * may need to return -EAGAIN upwards in the worst case. --sct | 992 | * may need to return -EAGAIN upwards in the worst case. --sct |
996 | */ | 993 | */ |
997 | if (!err) | 994 | if (!err) |
998 | err = ext4_splice_branch(handle, inode, iblock, | 995 | err = ext4_splice_branch(handle, inode, map->m_lblk, |
999 | partial, indirect_blks, count); | 996 | partial, indirect_blks, count); |
1000 | if (err) | 997 | if (err) |
1001 | goto cleanup; | 998 | goto cleanup; |
1002 | 999 | ||
1003 | set_buffer_new(bh_result); | 1000 | map->m_flags |= EXT4_MAP_NEW; |
1004 | 1001 | ||
1005 | ext4_update_inode_fsync_trans(handle, inode, 1); | 1002 | ext4_update_inode_fsync_trans(handle, inode, 1); |
1006 | got_it: | 1003 | got_it: |
1007 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 1004 | map->m_flags |= EXT4_MAP_MAPPED; |
1005 | map->m_pblk = le32_to_cpu(chain[depth-1].key); | ||
1006 | map->m_len = count; | ||
1008 | if (count > blocks_to_boundary) | 1007 | if (count > blocks_to_boundary) |
1009 | set_buffer_boundary(bh_result); | 1008 | map->m_flags |= EXT4_MAP_BOUNDARY; |
1010 | err = count; | 1009 | err = count; |
1011 | /* Clean up and exit */ | 1010 | /* Clean up and exit */ |
1012 | partial = chain + depth - 1; /* the whole chain */ | 1011 | partial = chain + depth - 1; /* the whole chain */ |
@@ -1016,7 +1015,6 @@ cleanup: | |||
1016 | brelse(partial->bh); | 1015 | brelse(partial->bh); |
1017 | partial--; | 1016 | partial--; |
1018 | } | 1017 | } |
1019 | BUFFER_TRACE(bh_result, "returned"); | ||
1020 | out: | 1018 | out: |
1021 | return err; | 1019 | return err; |
1022 | } | 1020 | } |
@@ -1061,7 +1059,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, | |||
1061 | */ | 1059 | */ |
1062 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) | 1060 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) |
1063 | { | 1061 | { |
1064 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1062 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
1065 | return ext4_ext_calc_metadata_amount(inode, lblock); | 1063 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1066 | 1064 | ||
1067 | return ext4_indirect_calc_metadata_amount(inode, lblock); | 1065 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
@@ -1076,7 +1074,6 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1076 | { | 1074 | { |
1077 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1075 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1078 | struct ext4_inode_info *ei = EXT4_I(inode); | 1076 | struct ext4_inode_info *ei = EXT4_I(inode); |
1079 | int mdb_free = 0, allocated_meta_blocks = 0; | ||
1080 | 1077 | ||
1081 | spin_lock(&ei->i_block_reservation_lock); | 1078 | spin_lock(&ei->i_block_reservation_lock); |
1082 | trace_ext4_da_update_reserve_space(inode, used); | 1079 | trace_ext4_da_update_reserve_space(inode, used); |
@@ -1091,11 +1088,10 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1091 | 1088 | ||
1092 | /* Update per-inode reservations */ | 1089 | /* Update per-inode reservations */ |
1093 | ei->i_reserved_data_blocks -= used; | 1090 | ei->i_reserved_data_blocks -= used; |
1094 | used += ei->i_allocated_meta_blocks; | ||
1095 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 1091 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
1096 | allocated_meta_blocks = ei->i_allocated_meta_blocks; | 1092 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1093 | used + ei->i_allocated_meta_blocks); | ||
1097 | ei->i_allocated_meta_blocks = 0; | 1094 | ei->i_allocated_meta_blocks = 0; |
1098 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); | ||
1099 | 1095 | ||
1100 | if (ei->i_reserved_data_blocks == 0) { | 1096 | if (ei->i_reserved_data_blocks == 0) { |
1101 | /* | 1097 | /* |
@@ -1103,30 +1099,23 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1103 | * only when we have written all of the delayed | 1099 | * only when we have written all of the delayed |
1104 | * allocation blocks. | 1100 | * allocation blocks. |
1105 | */ | 1101 | */ |
1106 | mdb_free = ei->i_reserved_meta_blocks; | 1102 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1103 | ei->i_reserved_meta_blocks); | ||
1107 | ei->i_reserved_meta_blocks = 0; | 1104 | ei->i_reserved_meta_blocks = 0; |
1108 | ei->i_da_metadata_calc_len = 0; | 1105 | ei->i_da_metadata_calc_len = 0; |
1109 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | ||
1110 | } | 1106 | } |
1111 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1107 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1112 | 1108 | ||
1113 | /* Update quota subsystem */ | 1109 | /* Update quota subsystem for data blocks */ |
1114 | if (quota_claim) { | 1110 | if (quota_claim) |
1115 | dquot_claim_block(inode, used); | 1111 | dquot_claim_block(inode, used); |
1116 | if (mdb_free) | 1112 | else { |
1117 | dquot_release_reservation_block(inode, mdb_free); | ||
1118 | } else { | ||
1119 | /* | 1113 | /* |
1120 | * We did fallocate with an offset that is already delayed | 1114 | * We did fallocate with an offset that is already delayed |
1121 | * allocated. So on delayed allocated writeback we should | 1115 | * allocated. So on delayed allocated writeback we should |
1122 | * not update the quota for allocated blocks. But then | 1116 | * not re-claim the quota for fallocated blocks. |
1123 | * converting an fallocate region to initialized region would | ||
1124 | * have caused a metadata allocation. So claim quota for | ||
1125 | * that | ||
1126 | */ | 1117 | */ |
1127 | if (allocated_meta_blocks) | 1118 | dquot_release_reservation_block(inode, used); |
1128 | dquot_claim_block(inode, allocated_meta_blocks); | ||
1129 | dquot_release_reservation_block(inode, mdb_free + used); | ||
1130 | } | 1119 | } |
1131 | 1120 | ||
1132 | /* | 1121 | /* |
@@ -1139,15 +1128,15 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
1139 | ext4_discard_preallocations(inode); | 1128 | ext4_discard_preallocations(inode); |
1140 | } | 1129 | } |
1141 | 1130 | ||
1142 | static int check_block_validity(struct inode *inode, const char *msg, | 1131 | static int check_block_validity(struct inode *inode, const char *func, |
1143 | sector_t logical, sector_t phys, int len) | 1132 | struct ext4_map_blocks *map) |
1144 | { | 1133 | { |
1145 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { | 1134 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, |
1146 | __ext4_error(inode->i_sb, msg, | 1135 | map->m_len)) { |
1147 | "inode #%lu logical block %llu mapped to %llu " | 1136 | ext4_error_inode(func, inode, |
1148 | "(size %d)", inode->i_ino, | 1137 | "lblock %lu mapped to illegal pblock %llu " |
1149 | (unsigned long long) logical, | 1138 | "(length %d)", (unsigned long) map->m_lblk, |
1150 | (unsigned long long) phys, len); | 1139 | map->m_pblk, map->m_len); |
1151 | return -EIO; | 1140 | return -EIO; |
1152 | } | 1141 | } |
1153 | return 0; | 1142 | return 0; |
@@ -1212,15 +1201,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1212 | } | 1201 | } |
1213 | 1202 | ||
1214 | /* | 1203 | /* |
1215 | * The ext4_get_blocks() function tries to look up the requested blocks, | 1204 | * The ext4_map_blocks() function tries to look up the requested blocks, |
1216 | * and returns if the blocks are already mapped. | 1205 | * and returns if the blocks are already mapped. |
1217 | * | 1206 | * |
1218 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks | 1207 | * Otherwise it takes the write lock of the i_data_sem and allocate blocks |
1219 | * and store the allocated blocks in the result buffer head and mark it | 1208 | * and store the allocated blocks in the result buffer head and mark it |
1220 | * mapped. | 1209 | * mapped. |
1221 | * | 1210 | * |
1222 | * If file type is extents based, it will call ext4_ext_get_blocks(), | 1211 | * If file type is extents based, it will call ext4_ext_map_blocks(), |
1223 | * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping | 1212 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping |
1224 | * based files | 1213 | * based files |
1225 | * | 1214 | * |
1226 | * On success, it returns the number of blocks being mapped or allocate. | 1215 | * On success, it returns the number of blocks being mapped or allocate. |
@@ -1233,35 +1222,29 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
1233 | * | 1222 | * |
1234 | * It returns the error in case of allocation failure. | 1223 | * It returns the error in case of allocation failure. |
1235 | */ | 1224 | */ |
1236 | int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | 1225 | int ext4_map_blocks(handle_t *handle, struct inode *inode, |
1237 | unsigned int max_blocks, struct buffer_head *bh, | 1226 | struct ext4_map_blocks *map, int flags) |
1238 | int flags) | ||
1239 | { | 1227 | { |
1240 | int retval; | 1228 | int retval; |
1241 | 1229 | ||
1242 | clear_buffer_mapped(bh); | 1230 | map->m_flags = 0; |
1243 | clear_buffer_unwritten(bh); | 1231 | ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," |
1244 | 1232 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | |
1245 | ext_debug("ext4_get_blocks(): inode %lu, flag %d, max_blocks %u," | 1233 | (unsigned long) map->m_lblk); |
1246 | "logical block %lu\n", inode->i_ino, flags, max_blocks, | ||
1247 | (unsigned long)block); | ||
1248 | /* | 1234 | /* |
1249 | * Try to see if we can get the block without requesting a new | 1235 | * Try to see if we can get the block without requesting a new |
1250 | * file system block. | 1236 | * file system block. |
1251 | */ | 1237 | */ |
1252 | down_read((&EXT4_I(inode)->i_data_sem)); | 1238 | down_read((&EXT4_I(inode)->i_data_sem)); |
1253 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1239 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1254 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1240 | retval = ext4_ext_map_blocks(handle, inode, map, 0); |
1255 | bh, 0); | ||
1256 | } else { | 1241 | } else { |
1257 | retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, | 1242 | retval = ext4_ind_map_blocks(handle, inode, map, 0); |
1258 | bh, 0); | ||
1259 | } | 1243 | } |
1260 | up_read((&EXT4_I(inode)->i_data_sem)); | 1244 | up_read((&EXT4_I(inode)->i_data_sem)); |
1261 | 1245 | ||
1262 | if (retval > 0 && buffer_mapped(bh)) { | 1246 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1263 | int ret = check_block_validity(inode, "file system corruption", | 1247 | int ret = check_block_validity(inode, __func__, map); |
1264 | block, bh->b_blocknr, retval); | ||
1265 | if (ret != 0) | 1248 | if (ret != 0) |
1266 | return ret; | 1249 | return ret; |
1267 | } | 1250 | } |
@@ -1277,7 +1260,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1277 | * ext4_ext_get_block() returns th create = 0 | 1260 | * ext4_ext_get_block() returns th create = 0 |
1278 | * with buffer head unmapped. | 1261 | * with buffer head unmapped. |
1279 | */ | 1262 | */ |
1280 | if (retval > 0 && buffer_mapped(bh)) | 1263 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
1281 | return retval; | 1264 | return retval; |
1282 | 1265 | ||
1283 | /* | 1266 | /* |
@@ -1290,7 +1273,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1290 | * of BH_Unwritten and BH_Mapped flags being simultaneously | 1273 | * of BH_Unwritten and BH_Mapped flags being simultaneously |
1291 | * set on the buffer_head. | 1274 | * set on the buffer_head. |
1292 | */ | 1275 | */ |
1293 | clear_buffer_unwritten(bh); | 1276 | map->m_flags &= ~EXT4_MAP_UNWRITTEN; |
1294 | 1277 | ||
1295 | /* | 1278 | /* |
1296 | * New blocks allocate and/or writing to uninitialized extent | 1279 | * New blocks allocate and/or writing to uninitialized extent |
@@ -1312,14 +1295,12 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1312 | * We need to check for EXT4 here because migrate | 1295 | * We need to check for EXT4 here because migrate |
1313 | * could have changed the inode type in between | 1296 | * could have changed the inode type in between |
1314 | */ | 1297 | */ |
1315 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 1298 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
1316 | retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, | 1299 | retval = ext4_ext_map_blocks(handle, inode, map, flags); |
1317 | bh, flags); | ||
1318 | } else { | 1300 | } else { |
1319 | retval = ext4_ind_get_blocks(handle, inode, block, | 1301 | retval = ext4_ind_map_blocks(handle, inode, map, flags); |
1320 | max_blocks, bh, flags); | ||
1321 | 1302 | ||
1322 | if (retval > 0 && buffer_new(bh)) { | 1303 | if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { |
1323 | /* | 1304 | /* |
1324 | * We allocated new blocks which will result in | 1305 | * We allocated new blocks which will result in |
1325 | * i_data's format changing. Force the migrate | 1306 | * i_data's format changing. Force the migrate |
@@ -1342,10 +1323,10 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1342 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; | 1323 | EXT4_I(inode)->i_delalloc_reserved_flag = 0; |
1343 | 1324 | ||
1344 | up_write((&EXT4_I(inode)->i_data_sem)); | 1325 | up_write((&EXT4_I(inode)->i_data_sem)); |
1345 | if (retval > 0 && buffer_mapped(bh)) { | 1326 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
1346 | int ret = check_block_validity(inode, "file system " | 1327 | int ret = check_block_validity(inode, |
1347 | "corruption after allocation", | 1328 | "ext4_map_blocks_after_alloc", |
1348 | block, bh->b_blocknr, retval); | 1329 | map); |
1349 | if (ret != 0) | 1330 | if (ret != 0) |
1350 | return ret; | 1331 | return ret; |
1351 | } | 1332 | } |
@@ -1355,109 +1336,109 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, | |||
1355 | /* Maximum number of blocks we map for direct IO at once. */ | 1336 | /* Maximum number of blocks we map for direct IO at once. */ |
1356 | #define DIO_MAX_BLOCKS 4096 | 1337 | #define DIO_MAX_BLOCKS 4096 |
1357 | 1338 | ||
1358 | int ext4_get_block(struct inode *inode, sector_t iblock, | 1339 | static int _ext4_get_block(struct inode *inode, sector_t iblock, |
1359 | struct buffer_head *bh_result, int create) | 1340 | struct buffer_head *bh, int flags) |
1360 | { | 1341 | { |
1361 | handle_t *handle = ext4_journal_current_handle(); | 1342 | handle_t *handle = ext4_journal_current_handle(); |
1343 | struct ext4_map_blocks map; | ||
1362 | int ret = 0, started = 0; | 1344 | int ret = 0, started = 0; |
1363 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
1364 | int dio_credits; | 1345 | int dio_credits; |
1365 | 1346 | ||
1366 | if (create && !handle) { | 1347 | map.m_lblk = iblock; |
1348 | map.m_len = bh->b_size >> inode->i_blkbits; | ||
1349 | |||
1350 | if (flags && !handle) { | ||
1367 | /* Direct IO write... */ | 1351 | /* Direct IO write... */ |
1368 | if (max_blocks > DIO_MAX_BLOCKS) | 1352 | if (map.m_len > DIO_MAX_BLOCKS) |
1369 | max_blocks = DIO_MAX_BLOCKS; | 1353 | map.m_len = DIO_MAX_BLOCKS; |
1370 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | 1354 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); |
1371 | handle = ext4_journal_start(inode, dio_credits); | 1355 | handle = ext4_journal_start(inode, dio_credits); |
1372 | if (IS_ERR(handle)) { | 1356 | if (IS_ERR(handle)) { |
1373 | ret = PTR_ERR(handle); | 1357 | ret = PTR_ERR(handle); |
1374 | goto out; | 1358 | return ret; |
1375 | } | 1359 | } |
1376 | started = 1; | 1360 | started = 1; |
1377 | } | 1361 | } |
1378 | 1362 | ||
1379 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | 1363 | ret = ext4_map_blocks(handle, inode, &map, flags); |
1380 | create ? EXT4_GET_BLOCKS_CREATE : 0); | ||
1381 | if (ret > 0) { | 1364 | if (ret > 0) { |
1382 | bh_result->b_size = (ret << inode->i_blkbits); | 1365 | map_bh(bh, inode->i_sb, map.m_pblk); |
1366 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | ||
1367 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | ||
1383 | ret = 0; | 1368 | ret = 0; |
1384 | } | 1369 | } |
1385 | if (started) | 1370 | if (started) |
1386 | ext4_journal_stop(handle); | 1371 | ext4_journal_stop(handle); |
1387 | out: | ||
1388 | return ret; | 1372 | return ret; |
1389 | } | 1373 | } |
1390 | 1374 | ||
1375 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
1376 | struct buffer_head *bh, int create) | ||
1377 | { | ||
1378 | return _ext4_get_block(inode, iblock, bh, | ||
1379 | create ? EXT4_GET_BLOCKS_CREATE : 0); | ||
1380 | } | ||
1381 | |||
1391 | /* | 1382 | /* |
1392 | * `handle' can be NULL if create is zero | 1383 | * `handle' can be NULL if create is zero |
1393 | */ | 1384 | */ |
1394 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 1385 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
1395 | ext4_lblk_t block, int create, int *errp) | 1386 | ext4_lblk_t block, int create, int *errp) |
1396 | { | 1387 | { |
1397 | struct buffer_head dummy; | 1388 | struct ext4_map_blocks map; |
1389 | struct buffer_head *bh; | ||
1398 | int fatal = 0, err; | 1390 | int fatal = 0, err; |
1399 | int flags = 0; | ||
1400 | 1391 | ||
1401 | J_ASSERT(handle != NULL || create == 0); | 1392 | J_ASSERT(handle != NULL || create == 0); |
1402 | 1393 | ||
1403 | dummy.b_state = 0; | 1394 | map.m_lblk = block; |
1404 | dummy.b_blocknr = -1000; | 1395 | map.m_len = 1; |
1405 | buffer_trace_init(&dummy.b_history); | 1396 | err = ext4_map_blocks(handle, inode, &map, |
1406 | if (create) | 1397 | create ? EXT4_GET_BLOCKS_CREATE : 0); |
1407 | flags |= EXT4_GET_BLOCKS_CREATE; | 1398 | |
1408 | err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); | 1399 | if (err < 0) |
1409 | /* | 1400 | *errp = err; |
1410 | * ext4_get_blocks() returns number of blocks mapped. 0 in | 1401 | if (err <= 0) |
1411 | * case of a HOLE. | 1402 | return NULL; |
1412 | */ | 1403 | *errp = 0; |
1413 | if (err > 0) { | 1404 | |
1414 | if (err > 1) | 1405 | bh = sb_getblk(inode->i_sb, map.m_pblk); |
1415 | WARN_ON(1); | 1406 | if (!bh) { |
1416 | err = 0; | 1407 | *errp = -EIO; |
1408 | return NULL; | ||
1417 | } | 1409 | } |
1418 | *errp = err; | 1410 | if (map.m_flags & EXT4_MAP_NEW) { |
1419 | if (!err && buffer_mapped(&dummy)) { | 1411 | J_ASSERT(create != 0); |
1420 | struct buffer_head *bh; | 1412 | J_ASSERT(handle != NULL); |
1421 | bh = sb_getblk(inode->i_sb, dummy.b_blocknr); | ||
1422 | if (!bh) { | ||
1423 | *errp = -EIO; | ||
1424 | goto err; | ||
1425 | } | ||
1426 | if (buffer_new(&dummy)) { | ||
1427 | J_ASSERT(create != 0); | ||
1428 | J_ASSERT(handle != NULL); | ||
1429 | 1413 | ||
1430 | /* | 1414 | /* |
1431 | * Now that we do not always journal data, we should | 1415 | * Now that we do not always journal data, we should |
1432 | * keep in mind whether this should always journal the | 1416 | * keep in mind whether this should always journal the |
1433 | * new buffer as metadata. For now, regular file | 1417 | * new buffer as metadata. For now, regular file |
1434 | * writes use ext4_get_block instead, so it's not a | 1418 | * writes use ext4_get_block instead, so it's not a |
1435 | * problem. | 1419 | * problem. |
1436 | */ | 1420 | */ |
1437 | lock_buffer(bh); | 1421 | lock_buffer(bh); |
1438 | BUFFER_TRACE(bh, "call get_create_access"); | 1422 | BUFFER_TRACE(bh, "call get_create_access"); |
1439 | fatal = ext4_journal_get_create_access(handle, bh); | 1423 | fatal = ext4_journal_get_create_access(handle, bh); |
1440 | if (!fatal && !buffer_uptodate(bh)) { | 1424 | if (!fatal && !buffer_uptodate(bh)) { |
1441 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); | 1425 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
1442 | set_buffer_uptodate(bh); | 1426 | set_buffer_uptodate(bh); |
1443 | } | ||
1444 | unlock_buffer(bh); | ||
1445 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1446 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1447 | if (!fatal) | ||
1448 | fatal = err; | ||
1449 | } else { | ||
1450 | BUFFER_TRACE(bh, "not a new buffer"); | ||
1451 | } | ||
1452 | if (fatal) { | ||
1453 | *errp = fatal; | ||
1454 | brelse(bh); | ||
1455 | bh = NULL; | ||
1456 | } | 1427 | } |
1457 | return bh; | 1428 | unlock_buffer(bh); |
1429 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | ||
1430 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
1431 | if (!fatal) | ||
1432 | fatal = err; | ||
1433 | } else { | ||
1434 | BUFFER_TRACE(bh, "not a new buffer"); | ||
1458 | } | 1435 | } |
1459 | err: | 1436 | if (fatal) { |
1460 | return NULL; | 1437 | *errp = fatal; |
1438 | brelse(bh); | ||
1439 | bh = NULL; | ||
1440 | } | ||
1441 | return bh; | ||
1461 | } | 1442 | } |
1462 | 1443 | ||
1463 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1444 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
@@ -1860,7 +1841,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
1860 | int retries = 0; | 1841 | int retries = 0; |
1861 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1842 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1862 | struct ext4_inode_info *ei = EXT4_I(inode); | 1843 | struct ext4_inode_info *ei = EXT4_I(inode); |
1863 | unsigned long md_needed, md_reserved; | 1844 | unsigned long md_needed; |
1864 | int ret; | 1845 | int ret; |
1865 | 1846 | ||
1866 | /* | 1847 | /* |
@@ -1870,22 +1851,24 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | |||
1870 | */ | 1851 | */ |
1871 | repeat: | 1852 | repeat: |
1872 | spin_lock(&ei->i_block_reservation_lock); | 1853 | spin_lock(&ei->i_block_reservation_lock); |
1873 | md_reserved = ei->i_reserved_meta_blocks; | ||
1874 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1854 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1875 | trace_ext4_da_reserve_space(inode, md_needed); | 1855 | trace_ext4_da_reserve_space(inode, md_needed); |
1876 | spin_unlock(&ei->i_block_reservation_lock); | 1856 | spin_unlock(&ei->i_block_reservation_lock); |
1877 | 1857 | ||
1878 | /* | 1858 | /* |
1879 | * Make quota reservation here to prevent quota overflow | 1859 | * We will charge metadata quota at writeout time; this saves |
1880 | * later. Real quota accounting is done at pages writeout | 1860 | * us from metadata over-estimation, though we may go over by |
1881 | * time. | 1861 | * a small amount in the end. Here we just reserve for data. |
1882 | */ | 1862 | */ |
1883 | ret = dquot_reserve_block(inode, md_needed + 1); | 1863 | ret = dquot_reserve_block(inode, 1); |
1884 | if (ret) | 1864 | if (ret) |
1885 | return ret; | 1865 | return ret; |
1886 | 1866 | /* | |
1867 | * We do still charge estimated metadata to the sb though; | ||
1868 | * we cannot afford to run out of free blocks. | ||
1869 | */ | ||
1887 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { | 1870 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1888 | dquot_release_reservation_block(inode, md_needed + 1); | 1871 | dquot_release_reservation_block(inode, 1); |
1889 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1872 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1890 | yield(); | 1873 | yield(); |
1891 | goto repeat; | 1874 | goto repeat; |
@@ -1910,6 +1893,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1910 | 1893 | ||
1911 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1894 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1912 | 1895 | ||
1896 | trace_ext4_da_release_space(inode, to_free); | ||
1913 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { | 1897 | if (unlikely(to_free > ei->i_reserved_data_blocks)) { |
1914 | /* | 1898 | /* |
1915 | * if there aren't enough reserved blocks, then the | 1899 | * if there aren't enough reserved blocks, then the |
@@ -1932,12 +1916,13 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1932 | * only when we have written all of the delayed | 1916 | * only when we have written all of the delayed |
1933 | * allocation blocks. | 1917 | * allocation blocks. |
1934 | */ | 1918 | */ |
1935 | to_free += ei->i_reserved_meta_blocks; | 1919 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, |
1920 | ei->i_reserved_meta_blocks); | ||
1936 | ei->i_reserved_meta_blocks = 0; | 1921 | ei->i_reserved_meta_blocks = 0; |
1937 | ei->i_da_metadata_calc_len = 0; | 1922 | ei->i_da_metadata_calc_len = 0; |
1938 | } | 1923 | } |
1939 | 1924 | ||
1940 | /* update fs dirty blocks counter */ | 1925 | /* update fs dirty data blocks counter */ |
1941 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1926 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); |
1942 | 1927 | ||
1943 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1928 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -2042,28 +2027,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
2042 | /* | 2027 | /* |
2043 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers | 2028 | * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers |
2044 | * | 2029 | * |
2045 | * @mpd->inode - inode to walk through | ||
2046 | * @exbh->b_blocknr - first block on a disk | ||
2047 | * @exbh->b_size - amount of space in bytes | ||
2048 | * @logical - first logical block to start assignment with | ||
2049 | * | ||
2050 | * the function goes through all passed space and put actual disk | 2030 | * the function goes through all passed space and put actual disk |
2051 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten | 2031 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten |
2052 | */ | 2032 | */ |
2053 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | 2033 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, |
2054 | struct buffer_head *exbh) | 2034 | struct ext4_map_blocks *map) |
2055 | { | 2035 | { |
2056 | struct inode *inode = mpd->inode; | 2036 | struct inode *inode = mpd->inode; |
2057 | struct address_space *mapping = inode->i_mapping; | 2037 | struct address_space *mapping = inode->i_mapping; |
2058 | int blocks = exbh->b_size >> inode->i_blkbits; | 2038 | int blocks = map->m_len; |
2059 | sector_t pblock = exbh->b_blocknr, cur_logical; | 2039 | sector_t pblock = map->m_pblk, cur_logical; |
2060 | struct buffer_head *head, *bh; | 2040 | struct buffer_head *head, *bh; |
2061 | pgoff_t index, end; | 2041 | pgoff_t index, end; |
2062 | struct pagevec pvec; | 2042 | struct pagevec pvec; |
2063 | int nr_pages, i; | 2043 | int nr_pages, i; |
2064 | 2044 | ||
2065 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2045 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2066 | end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2046 | end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2067 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2047 | cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2068 | 2048 | ||
2069 | pagevec_init(&pvec, 0); | 2049 | pagevec_init(&pvec, 0); |
@@ -2090,17 +2070,16 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2090 | 2070 | ||
2091 | /* skip blocks out of the range */ | 2071 | /* skip blocks out of the range */ |
2092 | do { | 2072 | do { |
2093 | if (cur_logical >= logical) | 2073 | if (cur_logical >= map->m_lblk) |
2094 | break; | 2074 | break; |
2095 | cur_logical++; | 2075 | cur_logical++; |
2096 | } while ((bh = bh->b_this_page) != head); | 2076 | } while ((bh = bh->b_this_page) != head); |
2097 | 2077 | ||
2098 | do { | 2078 | do { |
2099 | if (cur_logical >= logical + blocks) | 2079 | if (cur_logical >= map->m_lblk + blocks) |
2100 | break; | 2080 | break; |
2101 | 2081 | ||
2102 | if (buffer_delay(bh) || | 2082 | if (buffer_delay(bh) || buffer_unwritten(bh)) { |
2103 | buffer_unwritten(bh)) { | ||
2104 | 2083 | ||
2105 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); | 2084 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); |
2106 | 2085 | ||
@@ -2119,7 +2098,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2119 | } else if (buffer_mapped(bh)) | 2098 | } else if (buffer_mapped(bh)) |
2120 | BUG_ON(bh->b_blocknr != pblock); | 2099 | BUG_ON(bh->b_blocknr != pblock); |
2121 | 2100 | ||
2122 | if (buffer_uninit(exbh)) | 2101 | if (map->m_flags & EXT4_MAP_UNINIT) |
2123 | set_buffer_uninit(bh); | 2102 | set_buffer_uninit(bh); |
2124 | cur_logical++; | 2103 | cur_logical++; |
2125 | pblock++; | 2104 | pblock++; |
@@ -2130,21 +2109,6 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
2130 | } | 2109 | } |
2131 | 2110 | ||
2132 | 2111 | ||
2133 | /* | ||
2134 | * __unmap_underlying_blocks - just a helper function to unmap | ||
2135 | * set of blocks described by @bh | ||
2136 | */ | ||
2137 | static inline void __unmap_underlying_blocks(struct inode *inode, | ||
2138 | struct buffer_head *bh) | ||
2139 | { | ||
2140 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
2141 | int blocks, i; | ||
2142 | |||
2143 | blocks = bh->b_size >> inode->i_blkbits; | ||
2144 | for (i = 0; i < blocks; i++) | ||
2145 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | ||
2146 | } | ||
2147 | |||
2148 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2112 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, |
2149 | sector_t logical, long blk_cnt) | 2113 | sector_t logical, long blk_cnt) |
2150 | { | 2114 | { |
@@ -2206,7 +2170,7 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
2206 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) | 2170 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
2207 | { | 2171 | { |
2208 | int err, blks, get_blocks_flags; | 2172 | int err, blks, get_blocks_flags; |
2209 | struct buffer_head new; | 2173 | struct ext4_map_blocks map; |
2210 | sector_t next = mpd->b_blocknr; | 2174 | sector_t next = mpd->b_blocknr; |
2211 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | 2175 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; |
2212 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | 2176 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; |
@@ -2247,15 +2211,15 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2247 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | 2211 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting |
2248 | * variables are updated after the blocks have been allocated. | 2212 | * variables are updated after the blocks have been allocated. |
2249 | */ | 2213 | */ |
2250 | new.b_state = 0; | 2214 | map.m_lblk = next; |
2215 | map.m_len = max_blocks; | ||
2251 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; | 2216 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE; |
2252 | if (ext4_should_dioread_nolock(mpd->inode)) | 2217 | if (ext4_should_dioread_nolock(mpd->inode)) |
2253 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | 2218 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; |
2254 | if (mpd->b_state & (1 << BH_Delay)) | 2219 | if (mpd->b_state & (1 << BH_Delay)) |
2255 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | 2220 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; |
2256 | 2221 | ||
2257 | blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, | 2222 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); |
2258 | &new, get_blocks_flags); | ||
2259 | if (blks < 0) { | 2223 | if (blks < 0) { |
2260 | err = blks; | 2224 | err = blks; |
2261 | /* | 2225 | /* |
@@ -2282,7 +2246,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2282 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, | 2246 | ext4_msg(mpd->inode->i_sb, KERN_CRIT, |
2283 | "delayed block allocation failed for inode %lu at " | 2247 | "delayed block allocation failed for inode %lu at " |
2284 | "logical offset %llu with max blocks %zd with " | 2248 | "logical offset %llu with max blocks %zd with " |
2285 | "error %d\n", mpd->inode->i_ino, | 2249 | "error %d", mpd->inode->i_ino, |
2286 | (unsigned long long) next, | 2250 | (unsigned long long) next, |
2287 | mpd->b_size >> mpd->inode->i_blkbits, err); | 2251 | mpd->b_size >> mpd->inode->i_blkbits, err); |
2288 | printk(KERN_CRIT "This should not happen!! " | 2252 | printk(KERN_CRIT "This should not happen!! " |
@@ -2297,10 +2261,13 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2297 | } | 2261 | } |
2298 | BUG_ON(blks == 0); | 2262 | BUG_ON(blks == 0); |
2299 | 2263 | ||
2300 | new.b_size = (blks << mpd->inode->i_blkbits); | 2264 | if (map.m_flags & EXT4_MAP_NEW) { |
2265 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
2266 | int i; | ||
2301 | 2267 | ||
2302 | if (buffer_new(&new)) | 2268 | for (i = 0; i < map.m_len; i++) |
2303 | __unmap_underlying_blocks(mpd->inode, &new); | 2269 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
2270 | } | ||
2304 | 2271 | ||
2305 | /* | 2272 | /* |
2306 | * If blocks are delayed marked, we need to | 2273 | * If blocks are delayed marked, we need to |
@@ -2308,7 +2275,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2308 | */ | 2275 | */ |
2309 | if ((mpd->b_state & (1 << BH_Delay)) || | 2276 | if ((mpd->b_state & (1 << BH_Delay)) || |
2310 | (mpd->b_state & (1 << BH_Unwritten))) | 2277 | (mpd->b_state & (1 << BH_Unwritten))) |
2311 | mpage_put_bnr_to_bhs(mpd, next, &new); | 2278 | mpage_put_bnr_to_bhs(mpd, &map); |
2312 | 2279 | ||
2313 | if (ext4_should_order_data(mpd->inode)) { | 2280 | if (ext4_should_order_data(mpd->inode)) { |
2314 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 2281 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
@@ -2349,8 +2316,17 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | |||
2349 | sector_t next; | 2316 | sector_t next; |
2350 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; | 2317 | int nrblocks = mpd->b_size >> mpd->inode->i_blkbits; |
2351 | 2318 | ||
2319 | /* | ||
2320 | * XXX Don't go larger than mballoc is willing to allocate | ||
2321 | * This is a stopgap solution. We eventually need to fold | ||
2322 | * mpage_da_submit_io() into this function and then call | ||
2323 | * ext4_get_blocks() multiple times in a loop | ||
2324 | */ | ||
2325 | if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) | ||
2326 | goto flush_it; | ||
2327 | |||
2352 | /* check if thereserved journal credits might overflow */ | 2328 | /* check if thereserved journal credits might overflow */ |
2353 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | 2329 | if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) { |
2354 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | 2330 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { |
2355 | /* | 2331 | /* |
2356 | * With non-extent format we are limited by the journal | 2332 | * With non-extent format we are limited by the journal |
@@ -2423,17 +2399,6 @@ static int __mpage_da_writepage(struct page *page, | |||
2423 | struct buffer_head *bh, *head; | 2399 | struct buffer_head *bh, *head; |
2424 | sector_t logical; | 2400 | sector_t logical; |
2425 | 2401 | ||
2426 | if (mpd->io_done) { | ||
2427 | /* | ||
2428 | * Rest of the page in the page_vec | ||
2429 | * redirty then and skip then. We will | ||
2430 | * try to write them again after | ||
2431 | * starting a new transaction | ||
2432 | */ | ||
2433 | redirty_page_for_writepage(wbc, page); | ||
2434 | unlock_page(page); | ||
2435 | return MPAGE_DA_EXTENT_TAIL; | ||
2436 | } | ||
2437 | /* | 2402 | /* |
2438 | * Can we merge this page to current extent? | 2403 | * Can we merge this page to current extent? |
2439 | */ | 2404 | */ |
@@ -2528,8 +2493,9 @@ static int __mpage_da_writepage(struct page *page, | |||
2528 | * initialized properly. | 2493 | * initialized properly. |
2529 | */ | 2494 | */ |
2530 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2495 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2531 | struct buffer_head *bh_result, int create) | 2496 | struct buffer_head *bh, int create) |
2532 | { | 2497 | { |
2498 | struct ext4_map_blocks map; | ||
2533 | int ret = 0; | 2499 | int ret = 0; |
2534 | sector_t invalid_block = ~((sector_t) 0xffff); | 2500 | sector_t invalid_block = ~((sector_t) 0xffff); |
2535 | 2501 | ||
@@ -2537,16 +2503,22 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2537 | invalid_block = ~0; | 2503 | invalid_block = ~0; |
2538 | 2504 | ||
2539 | BUG_ON(create == 0); | 2505 | BUG_ON(create == 0); |
2540 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | 2506 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
2507 | |||
2508 | map.m_lblk = iblock; | ||
2509 | map.m_len = 1; | ||
2541 | 2510 | ||
2542 | /* | 2511 | /* |
2543 | * first, we need to know whether the block is allocated already | 2512 | * first, we need to know whether the block is allocated already |
2544 | * preallocated blocks are unmapped but should treated | 2513 | * preallocated blocks are unmapped but should treated |
2545 | * the same as allocated blocks. | 2514 | * the same as allocated blocks. |
2546 | */ | 2515 | */ |
2547 | ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); | 2516 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
2548 | if ((ret == 0) && !buffer_delay(bh_result)) { | 2517 | if (ret < 0) |
2549 | /* the block isn't (pre)allocated yet, let's reserve space */ | 2518 | return ret; |
2519 | if (ret == 0) { | ||
2520 | if (buffer_delay(bh)) | ||
2521 | return 0; /* Not sure this could or should happen */ | ||
2550 | /* | 2522 | /* |
2551 | * XXX: __block_prepare_write() unmaps passed block, | 2523 | * XXX: __block_prepare_write() unmaps passed block, |
2552 | * is it OK? | 2524 | * is it OK? |
@@ -2556,26 +2528,26 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2556 | /* not enough space to reserve */ | 2528 | /* not enough space to reserve */ |
2557 | return ret; | 2529 | return ret; |
2558 | 2530 | ||
2559 | map_bh(bh_result, inode->i_sb, invalid_block); | 2531 | map_bh(bh, inode->i_sb, invalid_block); |
2560 | set_buffer_new(bh_result); | 2532 | set_buffer_new(bh); |
2561 | set_buffer_delay(bh_result); | 2533 | set_buffer_delay(bh); |
2562 | } else if (ret > 0) { | 2534 | return 0; |
2563 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2564 | if (buffer_unwritten(bh_result)) { | ||
2565 | /* A delayed write to unwritten bh should | ||
2566 | * be marked new and mapped. Mapped ensures | ||
2567 | * that we don't do get_block multiple times | ||
2568 | * when we write to the same offset and new | ||
2569 | * ensures that we do proper zero out for | ||
2570 | * partial write. | ||
2571 | */ | ||
2572 | set_buffer_new(bh_result); | ||
2573 | set_buffer_mapped(bh_result); | ||
2574 | } | ||
2575 | ret = 0; | ||
2576 | } | 2535 | } |
2577 | 2536 | ||
2578 | return ret; | 2537 | map_bh(bh, inode->i_sb, map.m_pblk); |
2538 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | ||
2539 | |||
2540 | if (buffer_unwritten(bh)) { | ||
2541 | /* A delayed write to unwritten bh should be marked | ||
2542 | * new and mapped. Mapped ensures that we don't do | ||
2543 | * get_block multiple times when we write to the same | ||
2544 | * offset and new ensures that we do proper zero out | ||
2545 | * for partial write. | ||
2546 | */ | ||
2547 | set_buffer_new(bh); | ||
2548 | set_buffer_mapped(bh); | ||
2549 | } | ||
2550 | return 0; | ||
2579 | } | 2551 | } |
2580 | 2552 | ||
2581 | /* | 2553 | /* |
@@ -2597,21 +2569,8 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2597 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | 2569 | static int noalloc_get_block_write(struct inode *inode, sector_t iblock, |
2598 | struct buffer_head *bh_result, int create) | 2570 | struct buffer_head *bh_result, int create) |
2599 | { | 2571 | { |
2600 | int ret = 0; | ||
2601 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
2602 | |||
2603 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); | 2572 | BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); |
2604 | 2573 | return _ext4_get_block(inode, iblock, bh_result, 0); | |
2605 | /* | ||
2606 | * we don't want to do block allocation in writepage | ||
2607 | * so call get_block_wrap with create = 0 | ||
2608 | */ | ||
2609 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); | ||
2610 | if (ret > 0) { | ||
2611 | bh_result->b_size = (ret << inode->i_blkbits); | ||
2612 | ret = 0; | ||
2613 | } | ||
2614 | return ret; | ||
2615 | } | 2574 | } |
2616 | 2575 | ||
2617 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2576 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
@@ -2821,13 +2780,131 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2821 | * number of contiguous block. So we will limit | 2780 | * number of contiguous block. So we will limit |
2822 | * number of contiguous block to a sane value | 2781 | * number of contiguous block to a sane value |
2823 | */ | 2782 | */ |
2824 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && | 2783 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && |
2825 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2784 | (max_blocks > EXT4_MAX_TRANS_DATA)) |
2826 | max_blocks = EXT4_MAX_TRANS_DATA; | 2785 | max_blocks = EXT4_MAX_TRANS_DATA; |
2827 | 2786 | ||
2828 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2787 | return ext4_chunk_trans_blocks(inode, max_blocks); |
2829 | } | 2788 | } |
2830 | 2789 | ||
2790 | /* | ||
2791 | * write_cache_pages_da - walk the list of dirty pages of the given | ||
2792 | * address space and call the callback function (which usually writes | ||
2793 | * the pages). | ||
2794 | * | ||
2795 | * This is a forked version of write_cache_pages(). Differences: | ||
2796 | * Range cyclic is ignored. | ||
2797 | * no_nrwrite_index_update is always presumed true | ||
2798 | */ | ||
2799 | static int write_cache_pages_da(struct address_space *mapping, | ||
2800 | struct writeback_control *wbc, | ||
2801 | struct mpage_da_data *mpd) | ||
2802 | { | ||
2803 | int ret = 0; | ||
2804 | int done = 0; | ||
2805 | struct pagevec pvec; | ||
2806 | int nr_pages; | ||
2807 | pgoff_t index; | ||
2808 | pgoff_t end; /* Inclusive */ | ||
2809 | long nr_to_write = wbc->nr_to_write; | ||
2810 | |||
2811 | pagevec_init(&pvec, 0); | ||
2812 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
2813 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
2814 | |||
2815 | while (!done && (index <= end)) { | ||
2816 | int i; | ||
2817 | |||
2818 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
2819 | PAGECACHE_TAG_DIRTY, | ||
2820 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
2821 | if (nr_pages == 0) | ||
2822 | break; | ||
2823 | |||
2824 | for (i = 0; i < nr_pages; i++) { | ||
2825 | struct page *page = pvec.pages[i]; | ||
2826 | |||
2827 | /* | ||
2828 | * At this point, the page may be truncated or | ||
2829 | * invalidated (changing page->mapping to NULL), or | ||
2830 | * even swizzled back from swapper_space to tmpfs file | ||
2831 | * mapping. However, page->index will not change | ||
2832 | * because we have a reference on the page. | ||
2833 | */ | ||
2834 | if (page->index > end) { | ||
2835 | done = 1; | ||
2836 | break; | ||
2837 | } | ||
2838 | |||
2839 | lock_page(page); | ||
2840 | |||
2841 | /* | ||
2842 | * Page truncated or invalidated. We can freely skip it | ||
2843 | * then, even for data integrity operations: the page | ||
2844 | * has disappeared concurrently, so there could be no | ||
2845 | * real expectation of this data interity operation | ||
2846 | * even if there is now a new, dirty page at the same | ||
2847 | * pagecache address. | ||
2848 | */ | ||
2849 | if (unlikely(page->mapping != mapping)) { | ||
2850 | continue_unlock: | ||
2851 | unlock_page(page); | ||
2852 | continue; | ||
2853 | } | ||
2854 | |||
2855 | if (!PageDirty(page)) { | ||
2856 | /* someone wrote it for us */ | ||
2857 | goto continue_unlock; | ||
2858 | } | ||
2859 | |||
2860 | if (PageWriteback(page)) { | ||
2861 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2862 | wait_on_page_writeback(page); | ||
2863 | else | ||
2864 | goto continue_unlock; | ||
2865 | } | ||
2866 | |||
2867 | BUG_ON(PageWriteback(page)); | ||
2868 | if (!clear_page_dirty_for_io(page)) | ||
2869 | goto continue_unlock; | ||
2870 | |||
2871 | ret = __mpage_da_writepage(page, wbc, mpd); | ||
2872 | if (unlikely(ret)) { | ||
2873 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
2874 | unlock_page(page); | ||
2875 | ret = 0; | ||
2876 | } else { | ||
2877 | done = 1; | ||
2878 | break; | ||
2879 | } | ||
2880 | } | ||
2881 | |||
2882 | if (nr_to_write > 0) { | ||
2883 | nr_to_write--; | ||
2884 | if (nr_to_write == 0 && | ||
2885 | wbc->sync_mode == WB_SYNC_NONE) { | ||
2886 | /* | ||
2887 | * We stop writing back only if we are | ||
2888 | * not doing integrity sync. In case of | ||
2889 | * integrity sync we have to keep going | ||
2890 | * because someone may be concurrently | ||
2891 | * dirtying pages, and we might have | ||
2892 | * synced a lot of newly appeared dirty | ||
2893 | * pages, but have not synced all of the | ||
2894 | * old dirty pages. | ||
2895 | */ | ||
2896 | done = 1; | ||
2897 | break; | ||
2898 | } | ||
2899 | } | ||
2900 | } | ||
2901 | pagevec_release(&pvec); | ||
2902 | cond_resched(); | ||
2903 | } | ||
2904 | return ret; | ||
2905 | } | ||
2906 | |||
2907 | |||
2831 | static int ext4_da_writepages(struct address_space *mapping, | 2908 | static int ext4_da_writepages(struct address_space *mapping, |
2832 | struct writeback_control *wbc) | 2909 | struct writeback_control *wbc) |
2833 | { | 2910 | { |
@@ -2836,7 +2913,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2836 | handle_t *handle = NULL; | 2913 | handle_t *handle = NULL; |
2837 | struct mpage_da_data mpd; | 2914 | struct mpage_da_data mpd; |
2838 | struct inode *inode = mapping->host; | 2915 | struct inode *inode = mapping->host; |
2839 | int no_nrwrite_index_update; | ||
2840 | int pages_written = 0; | 2916 | int pages_written = 0; |
2841 | long pages_skipped; | 2917 | long pages_skipped; |
2842 | unsigned int max_pages; | 2918 | unsigned int max_pages; |
@@ -2916,12 +2992,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2916 | mpd.wbc = wbc; | 2992 | mpd.wbc = wbc; |
2917 | mpd.inode = mapping->host; | 2993 | mpd.inode = mapping->host; |
2918 | 2994 | ||
2919 | /* | ||
2920 | * we don't want write_cache_pages to update | ||
2921 | * nr_to_write and writeback_index | ||
2922 | */ | ||
2923 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
2924 | wbc->no_nrwrite_index_update = 1; | ||
2925 | pages_skipped = wbc->pages_skipped; | 2995 | pages_skipped = wbc->pages_skipped; |
2926 | 2996 | ||
2927 | retry: | 2997 | retry: |
@@ -2941,7 +3011,7 @@ retry: | |||
2941 | if (IS_ERR(handle)) { | 3011 | if (IS_ERR(handle)) { |
2942 | ret = PTR_ERR(handle); | 3012 | ret = PTR_ERR(handle); |
2943 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 3013 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2944 | "%ld pages, ino %lu; err %d\n", __func__, | 3014 | "%ld pages, ino %lu; err %d", __func__, |
2945 | wbc->nr_to_write, inode->i_ino, ret); | 3015 | wbc->nr_to_write, inode->i_ino, ret); |
2946 | goto out_writepages; | 3016 | goto out_writepages; |
2947 | } | 3017 | } |
@@ -2963,8 +3033,7 @@ retry: | |||
2963 | mpd.io_done = 0; | 3033 | mpd.io_done = 0; |
2964 | mpd.pages_written = 0; | 3034 | mpd.pages_written = 0; |
2965 | mpd.retval = 0; | 3035 | mpd.retval = 0; |
2966 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, | 3036 | ret = write_cache_pages_da(mapping, wbc, &mpd); |
2967 | &mpd); | ||
2968 | /* | 3037 | /* |
2969 | * If we have a contiguous extent of pages and we | 3038 | * If we have a contiguous extent of pages and we |
2970 | * haven't done the I/O yet, map the blocks and submit | 3039 | * haven't done the I/O yet, map the blocks and submit |
@@ -3016,7 +3085,7 @@ retry: | |||
3016 | if (pages_skipped != wbc->pages_skipped) | 3085 | if (pages_skipped != wbc->pages_skipped) |
3017 | ext4_msg(inode->i_sb, KERN_CRIT, | 3086 | ext4_msg(inode->i_sb, KERN_CRIT, |
3018 | "This should not happen leaving %s " | 3087 | "This should not happen leaving %s " |
3019 | "with nr_to_write = %ld ret = %d\n", | 3088 | "with nr_to_write = %ld ret = %d", |
3020 | __func__, wbc->nr_to_write, ret); | 3089 | __func__, wbc->nr_to_write, ret); |
3021 | 3090 | ||
3022 | /* Update index */ | 3091 | /* Update index */ |
@@ -3030,8 +3099,6 @@ retry: | |||
3030 | mapping->writeback_index = index; | 3099 | mapping->writeback_index = index; |
3031 | 3100 | ||
3032 | out_writepages: | 3101 | out_writepages: |
3033 | if (!no_nrwrite_index_update) | ||
3034 | wbc->no_nrwrite_index_update = 0; | ||
3035 | wbc->nr_to_write -= nr_to_writebump; | 3102 | wbc->nr_to_write -= nr_to_writebump; |
3036 | wbc->range_start = range_start; | 3103 | wbc->range_start = range_start; |
3037 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3104 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
@@ -3076,7 +3143,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
3076 | loff_t pos, unsigned len, unsigned flags, | 3143 | loff_t pos, unsigned len, unsigned flags, |
3077 | struct page **pagep, void **fsdata) | 3144 | struct page **pagep, void **fsdata) |
3078 | { | 3145 | { |
3079 | int ret, retries = 0, quota_retries = 0; | 3146 | int ret, retries = 0; |
3080 | struct page *page; | 3147 | struct page *page; |
3081 | pgoff_t index; | 3148 | pgoff_t index; |
3082 | unsigned from, to; | 3149 | unsigned from, to; |
@@ -3135,22 +3202,6 @@ retry: | |||
3135 | 3202 | ||
3136 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3203 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
3137 | goto retry; | 3204 | goto retry; |
3138 | |||
3139 | if ((ret == -EDQUOT) && | ||
3140 | EXT4_I(inode)->i_reserved_meta_blocks && | ||
3141 | (quota_retries++ < 3)) { | ||
3142 | /* | ||
3143 | * Since we often over-estimate the number of meta | ||
3144 | * data blocks required, we may sometimes get a | ||
3145 | * spurios out of quota error even though there would | ||
3146 | * be enough space once we write the data blocks and | ||
3147 | * find out how many meta data blocks were _really_ | ||
3148 | * required. So try forcing the inode write to see if | ||
3149 | * that helps. | ||
3150 | */ | ||
3151 | write_inode_now(inode, (quota_retries == 3)); | ||
3152 | goto retry; | ||
3153 | } | ||
3154 | out: | 3205 | out: |
3155 | return ret; | 3206 | return ret; |
3156 | } | 3207 | } |
@@ -3546,46 +3597,18 @@ out: | |||
3546 | return ret; | 3597 | return ret; |
3547 | } | 3598 | } |
3548 | 3599 | ||
3600 | /* | ||
3601 | * ext4_get_block used when preparing for a DIO write or buffer write. | ||
3602 | * We allocate an uinitialized extent if blocks haven't been allocated. | ||
3603 | * The extent will be converted to initialized after the IO is complete. | ||
3604 | */ | ||
3549 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, | 3605 | static int ext4_get_block_write(struct inode *inode, sector_t iblock, |
3550 | struct buffer_head *bh_result, int create) | 3606 | struct buffer_head *bh_result, int create) |
3551 | { | 3607 | { |
3552 | handle_t *handle = ext4_journal_current_handle(); | ||
3553 | int ret = 0; | ||
3554 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
3555 | int dio_credits; | ||
3556 | int started = 0; | ||
3557 | |||
3558 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | 3608 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", |
3559 | inode->i_ino, create); | 3609 | inode->i_ino, create); |
3560 | /* | 3610 | return _ext4_get_block(inode, iblock, bh_result, |
3561 | * ext4_get_block in prepare for a DIO write or buffer write. | 3611 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
3562 | * We allocate an uinitialized extent if blocks haven't been allocated. | ||
3563 | * The extent will be converted to initialized after IO complete. | ||
3564 | */ | ||
3565 | create = EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
3566 | |||
3567 | if (!handle) { | ||
3568 | if (max_blocks > DIO_MAX_BLOCKS) | ||
3569 | max_blocks = DIO_MAX_BLOCKS; | ||
3570 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
3571 | handle = ext4_journal_start(inode, dio_credits); | ||
3572 | if (IS_ERR(handle)) { | ||
3573 | ret = PTR_ERR(handle); | ||
3574 | goto out; | ||
3575 | } | ||
3576 | started = 1; | ||
3577 | } | ||
3578 | |||
3579 | ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, | ||
3580 | create); | ||
3581 | if (ret > 0) { | ||
3582 | bh_result->b_size = (ret << inode->i_blkbits); | ||
3583 | ret = 0; | ||
3584 | } | ||
3585 | if (started) | ||
3586 | ext4_journal_stop(handle); | ||
3587 | out: | ||
3588 | return ret; | ||
3589 | } | 3612 | } |
3590 | 3613 | ||
3591 | static void dump_completed_IO(struct inode * inode) | 3614 | static void dump_completed_IO(struct inode * inode) |
@@ -3973,7 +3996,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3973 | struct file *file = iocb->ki_filp; | 3996 | struct file *file = iocb->ki_filp; |
3974 | struct inode *inode = file->f_mapping->host; | 3997 | struct inode *inode = file->f_mapping->host; |
3975 | 3998 | ||
3976 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 3999 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3977 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 4000 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3978 | 4001 | ||
3979 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 4002 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
@@ -4302,10 +4325,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4302 | 4325 | ||
4303 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 4326 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
4304 | count)) { | 4327 | count)) { |
4305 | ext4_error(inode->i_sb, "inode #%lu: " | 4328 | EXT4_ERROR_INODE(inode, "attempt to clear invalid " |
4306 | "attempt to clear blocks %llu len %lu, invalid", | 4329 | "blocks %llu len %lu", |
4307 | inode->i_ino, (unsigned long long) block_to_free, | 4330 | (unsigned long long) block_to_free, count); |
4308 | count); | ||
4309 | return 1; | 4331 | return 1; |
4310 | } | 4332 | } |
4311 | 4333 | ||
@@ -4410,11 +4432,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4410 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) | 4432 | if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) |
4411 | ext4_handle_dirty_metadata(handle, inode, this_bh); | 4433 | ext4_handle_dirty_metadata(handle, inode, this_bh); |
4412 | else | 4434 | else |
4413 | ext4_error(inode->i_sb, | 4435 | EXT4_ERROR_INODE(inode, |
4414 | "circular indirect block detected, " | 4436 | "circular indirect block detected at " |
4415 | "inode=%lu, block=%llu", | 4437 | "block %llu", |
4416 | inode->i_ino, | 4438 | (unsigned long long) this_bh->b_blocknr); |
4417 | (unsigned long long) this_bh->b_blocknr); | ||
4418 | } | 4439 | } |
4419 | } | 4440 | } |
4420 | 4441 | ||
@@ -4452,11 +4473,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4452 | 4473 | ||
4453 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), | 4474 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), |
4454 | nr, 1)) { | 4475 | nr, 1)) { |
4455 | ext4_error(inode->i_sb, | 4476 | EXT4_ERROR_INODE(inode, |
4456 | "indirect mapped block in inode " | 4477 | "invalid indirect mapped " |
4457 | "#%lu invalid (level %d, blk #%lu)", | 4478 | "block %lu (level %d)", |
4458 | inode->i_ino, depth, | 4479 | (unsigned long) nr, depth); |
4459 | (unsigned long) nr); | ||
4460 | break; | 4480 | break; |
4461 | } | 4481 | } |
4462 | 4482 | ||
@@ -4468,9 +4488,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4468 | * (should be rare). | 4488 | * (should be rare). |
4469 | */ | 4489 | */ |
4470 | if (!bh) { | 4490 | if (!bh) { |
4471 | ext4_error(inode->i_sb, | 4491 | EXT4_ERROR_INODE(inode, |
4472 | "Read failure, inode=%lu, block=%llu", | 4492 | "Read failure block=%llu", |
4473 | inode->i_ino, nr); | 4493 | (unsigned long long) nr); |
4474 | continue; | 4494 | continue; |
4475 | } | 4495 | } |
4476 | 4496 | ||
@@ -4612,12 +4632,12 @@ void ext4_truncate(struct inode *inode) | |||
4612 | if (!ext4_can_truncate(inode)) | 4632 | if (!ext4_can_truncate(inode)) |
4613 | return; | 4633 | return; |
4614 | 4634 | ||
4615 | EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; | 4635 | ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4616 | 4636 | ||
4617 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) | 4637 | if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) |
4618 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); | 4638 | ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); |
4619 | 4639 | ||
4620 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { | 4640 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4621 | ext4_ext_truncate(inode); | 4641 | ext4_ext_truncate(inode); |
4622 | return; | 4642 | return; |
4623 | } | 4643 | } |
@@ -4785,8 +4805,8 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
4785 | 4805 | ||
4786 | bh = sb_getblk(sb, block); | 4806 | bh = sb_getblk(sb, block); |
4787 | if (!bh) { | 4807 | if (!bh) { |
4788 | ext4_error(sb, "unable to read inode block - " | 4808 | EXT4_ERROR_INODE(inode, "unable to read inode block - " |
4789 | "inode=%lu, block=%llu", inode->i_ino, block); | 4809 | "block %llu", block); |
4790 | return -EIO; | 4810 | return -EIO; |
4791 | } | 4811 | } |
4792 | if (!buffer_uptodate(bh)) { | 4812 | if (!buffer_uptodate(bh)) { |
@@ -4884,8 +4904,8 @@ make_io: | |||
4884 | submit_bh(READ_META, bh); | 4904 | submit_bh(READ_META, bh); |
4885 | wait_on_buffer(bh); | 4905 | wait_on_buffer(bh); |
4886 | if (!buffer_uptodate(bh)) { | 4906 | if (!buffer_uptodate(bh)) { |
4887 | ext4_error(sb, "unable to read inode block - inode=%lu," | 4907 | EXT4_ERROR_INODE(inode, "unable to read inode " |
4888 | " block=%llu", inode->i_ino, block); | 4908 | "block %llu", block); |
4889 | brelse(bh); | 4909 | brelse(bh); |
4890 | return -EIO; | 4910 | return -EIO; |
4891 | } | 4911 | } |
@@ -5096,8 +5116,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5096 | ret = 0; | 5116 | ret = 0; |
5097 | if (ei->i_file_acl && | 5117 | if (ei->i_file_acl && |
5098 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { | 5118 | !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { |
5099 | ext4_error(sb, "bad extended attribute block %llu inode #%lu", | 5119 | EXT4_ERROR_INODE(inode, "bad extended attribute block %llu", |
5100 | ei->i_file_acl, inode->i_ino); | 5120 | ei->i_file_acl); |
5101 | ret = -EIO; | 5121 | ret = -EIO; |
5102 | goto bad_inode; | 5122 | goto bad_inode; |
5103 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | 5123 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { |
@@ -5142,8 +5162,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
5142 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 5162 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
5143 | } else { | 5163 | } else { |
5144 | ret = -EIO; | 5164 | ret = -EIO; |
5145 | ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", | 5165 | EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode); |
5146 | inode->i_mode, inode->i_ino); | ||
5147 | goto bad_inode; | 5166 | goto bad_inode; |
5148 | } | 5167 | } |
5149 | brelse(iloc.bh); | 5168 | brelse(iloc.bh); |
@@ -5381,9 +5400,9 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
5381 | if (wbc->sync_mode == WB_SYNC_ALL) | 5400 | if (wbc->sync_mode == WB_SYNC_ALL) |
5382 | sync_dirty_buffer(iloc.bh); | 5401 | sync_dirty_buffer(iloc.bh); |
5383 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 5402 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
5384 | ext4_error(inode->i_sb, "IO error syncing inode, " | 5403 | EXT4_ERROR_INODE(inode, |
5385 | "inode=%lu, block=%llu", inode->i_ino, | 5404 | "IO error syncing inode (block=%llu)", |
5386 | (unsigned long long)iloc.bh->b_blocknr); | 5405 | (unsigned long long) iloc.bh->b_blocknr); |
5387 | err = -EIO; | 5406 | err = -EIO; |
5388 | } | 5407 | } |
5389 | brelse(iloc.bh); | 5408 | brelse(iloc.bh); |
@@ -5455,7 +5474,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5455 | } | 5474 | } |
5456 | 5475 | ||
5457 | if (attr->ia_valid & ATTR_SIZE) { | 5476 | if (attr->ia_valid & ATTR_SIZE) { |
5458 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) { | 5477 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { |
5459 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 5478 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
5460 | 5479 | ||
5461 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { | 5480 | if (attr->ia_size > sbi->s_bitmap_maxbytes) { |
@@ -5468,7 +5487,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5468 | if (S_ISREG(inode->i_mode) && | 5487 | if (S_ISREG(inode->i_mode) && |
5469 | attr->ia_valid & ATTR_SIZE && | 5488 | attr->ia_valid & ATTR_SIZE && |
5470 | (attr->ia_size < inode->i_size || | 5489 | (attr->ia_size < inode->i_size || |
5471 | (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { | 5490 | (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) { |
5472 | handle_t *handle; | 5491 | handle_t *handle; |
5473 | 5492 | ||
5474 | handle = ext4_journal_start(inode, 3); | 5493 | handle = ext4_journal_start(inode, 3); |
@@ -5500,7 +5519,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
5500 | } | 5519 | } |
5501 | } | 5520 | } |
5502 | /* ext4_truncate will clear the flag */ | 5521 | /* ext4_truncate will clear the flag */ |
5503 | if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) | 5522 | if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) |
5504 | ext4_truncate(inode); | 5523 | ext4_truncate(inode); |
5505 | } | 5524 | } |
5506 | 5525 | ||
@@ -5576,7 +5595,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | |||
5576 | 5595 | ||
5577 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 5596 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
5578 | { | 5597 | { |
5579 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 5598 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
5580 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); | 5599 | return ext4_indirect_trans_blocks(inode, nrblocks, chunk); |
5581 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 5600 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); |
5582 | } | 5601 | } |
@@ -5911,9 +5930,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
5911 | */ | 5930 | */ |
5912 | 5931 | ||
5913 | if (val) | 5932 | if (val) |
5914 | EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL; | 5933 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5915 | else | 5934 | else |
5916 | EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL; | 5935 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
5917 | ext4_set_aops(inode); | 5936 | ext4_set_aops(inode); |
5918 | 5937 | ||
5919 | jbd2_journal_unlock_updates(journal); | 5938 | jbd2_journal_unlock_updates(journal); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 016d0249294f..bf5ae883b1bd 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -258,7 +258,7 @@ setversion_out: | |||
258 | if (me.moved_len > 0) | 258 | if (me.moved_len > 0) |
259 | file_remove_suid(donor_filp); | 259 | file_remove_suid(donor_filp); |
260 | 260 | ||
261 | if (copy_to_user((struct move_extent __user *)arg, | 261 | if (copy_to_user((struct move_extent __user *)arg, |
262 | &me, sizeof(me))) | 262 | &me, sizeof(me))) |
263 | err = -EFAULT; | 263 | err = -EFAULT; |
264 | mext_out: | 264 | mext_out: |
@@ -373,7 +373,30 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
373 | case EXT4_IOC32_SETRSVSZ: | 373 | case EXT4_IOC32_SETRSVSZ: |
374 | cmd = EXT4_IOC_SETRSVSZ; | 374 | cmd = EXT4_IOC_SETRSVSZ; |
375 | break; | 375 | break; |
376 | case EXT4_IOC_GROUP_ADD: | 376 | case EXT4_IOC32_GROUP_ADD: { |
377 | struct compat_ext4_new_group_input __user *uinput; | ||
378 | struct ext4_new_group_input input; | ||
379 | mm_segment_t old_fs; | ||
380 | int err; | ||
381 | |||
382 | uinput = compat_ptr(arg); | ||
383 | err = get_user(input.group, &uinput->group); | ||
384 | err |= get_user(input.block_bitmap, &uinput->block_bitmap); | ||
385 | err |= get_user(input.inode_bitmap, &uinput->inode_bitmap); | ||
386 | err |= get_user(input.inode_table, &uinput->inode_table); | ||
387 | err |= get_user(input.blocks_count, &uinput->blocks_count); | ||
388 | err |= get_user(input.reserved_blocks, | ||
389 | &uinput->reserved_blocks); | ||
390 | if (err) | ||
391 | return -EFAULT; | ||
392 | old_fs = get_fs(); | ||
393 | set_fs(KERNEL_DS); | ||
394 | err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD, | ||
395 | (unsigned long) &input); | ||
396 | set_fs(old_fs); | ||
397 | return err; | ||
398 | } | ||
399 | case EXT4_IOC_MOVE_EXT: | ||
377 | break; | 400 | break; |
378 | default: | 401 | default: |
379 | return -ENOIOCTLCMD; | 402 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b423a364dca3..12b3bc026a68 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, | |||
658 | } | 658 | } |
659 | } | 659 | } |
660 | 660 | ||
661 | /* | ||
662 | * Cache the order of the largest free extent we have available in this block | ||
663 | * group. | ||
664 | */ | ||
665 | static void | ||
666 | mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp) | ||
667 | { | ||
668 | int i; | ||
669 | int bits; | ||
670 | |||
671 | grp->bb_largest_free_order = -1; /* uninit */ | ||
672 | |||
673 | bits = sb->s_blocksize_bits + 1; | ||
674 | for (i = bits; i >= 0; i--) { | ||
675 | if (grp->bb_counters[i] > 0) { | ||
676 | grp->bb_largest_free_order = i; | ||
677 | break; | ||
678 | } | ||
679 | } | ||
680 | } | ||
681 | |||
661 | static noinline_for_stack | 682 | static noinline_for_stack |
662 | void ext4_mb_generate_buddy(struct super_block *sb, | 683 | void ext4_mb_generate_buddy(struct super_block *sb, |
663 | void *buddy, void *bitmap, ext4_group_t group) | 684 | void *buddy, void *bitmap, ext4_group_t group) |
@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
700 | */ | 721 | */ |
701 | grp->bb_free = free; | 722 | grp->bb_free = free; |
702 | } | 723 | } |
724 | mb_set_largest_free_order(sb, grp); | ||
703 | 725 | ||
704 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | 726 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
705 | 727 | ||
@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
725 | * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. | 747 | * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. |
726 | * So it can have information regarding groups_per_page which | 748 | * So it can have information regarding groups_per_page which |
727 | * is blocks_per_page/2 | 749 | * is blocks_per_page/2 |
750 | * | ||
751 | * Locking note: This routine takes the block group lock of all groups | ||
752 | * for this page; do not hold this lock when calling this routine! | ||
728 | */ | 753 | */ |
729 | 754 | ||
730 | static int ext4_mb_init_cache(struct page *page, char *incore) | 755 | static int ext4_mb_init_cache(struct page *page, char *incore) |
@@ -865,6 +890,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
865 | BUG_ON(incore == NULL); | 890 | BUG_ON(incore == NULL); |
866 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", | 891 | mb_debug(1, "put buddy for group %u in page %lu/%x\n", |
867 | group, page->index, i * blocksize); | 892 | group, page->index, i * blocksize); |
893 | trace_ext4_mb_buddy_bitmap_load(sb, group); | ||
868 | grinfo = ext4_get_group_info(sb, group); | 894 | grinfo = ext4_get_group_info(sb, group); |
869 | grinfo->bb_fragments = 0; | 895 | grinfo->bb_fragments = 0; |
870 | memset(grinfo->bb_counters, 0, | 896 | memset(grinfo->bb_counters, 0, |
@@ -882,6 +908,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
882 | BUG_ON(incore != NULL); | 908 | BUG_ON(incore != NULL); |
883 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", | 909 | mb_debug(1, "put bitmap for group %u in page %lu/%x\n", |
884 | group, page->index, i * blocksize); | 910 | group, page->index, i * blocksize); |
911 | trace_ext4_mb_bitmap_load(sb, group); | ||
885 | 912 | ||
886 | /* see comments in ext4_mb_put_pa() */ | 913 | /* see comments in ext4_mb_put_pa() */ |
887 | ext4_lock_group(sb, group); | 914 | ext4_lock_group(sb, group); |
@@ -910,6 +937,11 @@ out: | |||
910 | return err; | 937 | return err; |
911 | } | 938 | } |
912 | 939 | ||
940 | /* | ||
941 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | ||
942 | * block group lock of all groups for this page; do not hold the BG lock when | ||
943 | * calling this routine! | ||
944 | */ | ||
913 | static noinline_for_stack | 945 | static noinline_for_stack |
914 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 946 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
915 | { | 947 | { |
@@ -1004,6 +1036,11 @@ err: | |||
1004 | return ret; | 1036 | return ret; |
1005 | } | 1037 | } |
1006 | 1038 | ||
1039 | /* | ||
1040 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | ||
1041 | * block group lock of all groups for this page; do not hold the BG lock when | ||
1042 | * calling this routine! | ||
1043 | */ | ||
1007 | static noinline_for_stack int | 1044 | static noinline_for_stack int |
1008 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 1045 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
1009 | struct ext4_buddy *e4b) | 1046 | struct ext4_buddy *e4b) |
@@ -1150,7 +1187,7 @@ err: | |||
1150 | return ret; | 1187 | return ret; |
1151 | } | 1188 | } |
1152 | 1189 | ||
1153 | static void ext4_mb_release_desc(struct ext4_buddy *e4b) | 1190 | static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) |
1154 | { | 1191 | { |
1155 | if (e4b->bd_bitmap_page) | 1192 | if (e4b->bd_bitmap_page) |
1156 | page_cache_release(e4b->bd_bitmap_page); | 1193 | page_cache_release(e4b->bd_bitmap_page); |
@@ -1299,6 +1336,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1299 | buddy = buddy2; | 1336 | buddy = buddy2; |
1300 | } while (1); | 1337 | } while (1); |
1301 | } | 1338 | } |
1339 | mb_set_largest_free_order(sb, e4b->bd_info); | ||
1302 | mb_check_buddy(e4b); | 1340 | mb_check_buddy(e4b); |
1303 | } | 1341 | } |
1304 | 1342 | ||
@@ -1427,6 +1465,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
1427 | e4b->bd_info->bb_counters[ord]++; | 1465 | e4b->bd_info->bb_counters[ord]++; |
1428 | e4b->bd_info->bb_counters[ord]++; | 1466 | e4b->bd_info->bb_counters[ord]++; |
1429 | } | 1467 | } |
1468 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); | ||
1430 | 1469 | ||
1431 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | 1470 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); |
1432 | mb_check_buddy(e4b); | 1471 | mb_check_buddy(e4b); |
@@ -1617,7 +1656,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | |||
1617 | } | 1656 | } |
1618 | 1657 | ||
1619 | ext4_unlock_group(ac->ac_sb, group); | 1658 | ext4_unlock_group(ac->ac_sb, group); |
1620 | ext4_mb_release_desc(e4b); | 1659 | ext4_mb_unload_buddy(e4b); |
1621 | 1660 | ||
1622 | return 0; | 1661 | return 0; |
1623 | } | 1662 | } |
@@ -1672,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1672 | ext4_mb_use_best_found(ac, e4b); | 1711 | ext4_mb_use_best_found(ac, e4b); |
1673 | } | 1712 | } |
1674 | ext4_unlock_group(ac->ac_sb, group); | 1713 | ext4_unlock_group(ac->ac_sb, group); |
1675 | ext4_mb_release_desc(e4b); | 1714 | ext4_mb_unload_buddy(e4b); |
1676 | 1715 | ||
1677 | return 0; | 1716 | return 0; |
1678 | } | 1717 | } |
@@ -1821,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1821 | } | 1860 | } |
1822 | } | 1861 | } |
1823 | 1862 | ||
1863 | /* This is now called BEFORE we load the buddy bitmap. */ | ||
1824 | static int ext4_mb_good_group(struct ext4_allocation_context *ac, | 1864 | static int ext4_mb_good_group(struct ext4_allocation_context *ac, |
1825 | ext4_group_t group, int cr) | 1865 | ext4_group_t group, int cr) |
1826 | { | 1866 | { |
1827 | unsigned free, fragments; | 1867 | unsigned free, fragments; |
1828 | unsigned i, bits; | ||
1829 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); | 1868 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); |
1830 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | 1869 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
1831 | 1870 | ||
1832 | BUG_ON(cr < 0 || cr >= 4); | 1871 | BUG_ON(cr < 0 || cr >= 4); |
1833 | BUG_ON(EXT4_MB_GRP_NEED_INIT(grp)); | 1872 | |
1873 | /* We only do this if the grp has never been initialized */ | ||
1874 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
1875 | int ret = ext4_mb_init_group(ac->ac_sb, group); | ||
1876 | if (ret) | ||
1877 | return 0; | ||
1878 | } | ||
1834 | 1879 | ||
1835 | free = grp->bb_free; | 1880 | free = grp->bb_free; |
1836 | fragments = grp->bb_fragments; | 1881 | fragments = grp->bb_fragments; |
@@ -1843,17 +1888,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1843 | case 0: | 1888 | case 0: |
1844 | BUG_ON(ac->ac_2order == 0); | 1889 | BUG_ON(ac->ac_2order == 0); |
1845 | 1890 | ||
1891 | if (grp->bb_largest_free_order < ac->ac_2order) | ||
1892 | return 0; | ||
1893 | |||
1846 | /* Avoid using the first bg of a flexgroup for data files */ | 1894 | /* Avoid using the first bg of a flexgroup for data files */ |
1847 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && | 1895 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && |
1848 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && | 1896 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && |
1849 | ((group % flex_size) == 0)) | 1897 | ((group % flex_size) == 0)) |
1850 | return 0; | 1898 | return 0; |
1851 | 1899 | ||
1852 | bits = ac->ac_sb->s_blocksize_bits + 1; | 1900 | return 1; |
1853 | for (i = ac->ac_2order; i <= bits; i++) | ||
1854 | if (grp->bb_counters[i] > 0) | ||
1855 | return 1; | ||
1856 | break; | ||
1857 | case 1: | 1901 | case 1: |
1858 | if ((free / fragments) >= ac->ac_g_ex.fe_len) | 1902 | if ((free / fragments) >= ac->ac_g_ex.fe_len) |
1859 | return 1; | 1903 | return 1; |
@@ -1964,7 +2008,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | |||
1964 | sbi = EXT4_SB(sb); | 2008 | sbi = EXT4_SB(sb); |
1965 | ngroups = ext4_get_groups_count(sb); | 2009 | ngroups = ext4_get_groups_count(sb); |
1966 | /* non-extent files are limited to low blocks/groups */ | 2010 | /* non-extent files are limited to low blocks/groups */ |
1967 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL)) | 2011 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))) |
1968 | ngroups = sbi->s_blockfile_groups; | 2012 | ngroups = sbi->s_blockfile_groups; |
1969 | 2013 | ||
1970 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); | 2014 | BUG_ON(ac->ac_status == AC_STATUS_FOUND); |
@@ -2024,15 +2068,11 @@ repeat: | |||
2024 | group = ac->ac_g_ex.fe_group; | 2068 | group = ac->ac_g_ex.fe_group; |
2025 | 2069 | ||
2026 | for (i = 0; i < ngroups; group++, i++) { | 2070 | for (i = 0; i < ngroups; group++, i++) { |
2027 | struct ext4_group_info *grp; | ||
2028 | struct ext4_group_desc *desc; | ||
2029 | |||
2030 | if (group == ngroups) | 2071 | if (group == ngroups) |
2031 | group = 0; | 2072 | group = 0; |
2032 | 2073 | ||
2033 | /* quick check to skip empty groups */ | 2074 | /* This now checks without needing the buddy page */ |
2034 | grp = ext4_get_group_info(sb, group); | 2075 | if (!ext4_mb_good_group(ac, group, cr)) |
2035 | if (grp->bb_free == 0) | ||
2036 | continue; | 2076 | continue; |
2037 | 2077 | ||
2038 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2078 | err = ext4_mb_load_buddy(sb, group, &e4b); |
@@ -2040,15 +2080,18 @@ repeat: | |||
2040 | goto out; | 2080 | goto out; |
2041 | 2081 | ||
2042 | ext4_lock_group(sb, group); | 2082 | ext4_lock_group(sb, group); |
2083 | |||
2084 | /* | ||
2085 | * We need to check again after locking the | ||
2086 | * block group | ||
2087 | */ | ||
2043 | if (!ext4_mb_good_group(ac, group, cr)) { | 2088 | if (!ext4_mb_good_group(ac, group, cr)) { |
2044 | /* someone did allocation from this group */ | ||
2045 | ext4_unlock_group(sb, group); | 2089 | ext4_unlock_group(sb, group); |
2046 | ext4_mb_release_desc(&e4b); | 2090 | ext4_mb_unload_buddy(&e4b); |
2047 | continue; | 2091 | continue; |
2048 | } | 2092 | } |
2049 | 2093 | ||
2050 | ac->ac_groups_scanned++; | 2094 | ac->ac_groups_scanned++; |
2051 | desc = ext4_get_group_desc(sb, group, NULL); | ||
2052 | if (cr == 0) | 2095 | if (cr == 0) |
2053 | ext4_mb_simple_scan_group(ac, &e4b); | 2096 | ext4_mb_simple_scan_group(ac, &e4b); |
2054 | else if (cr == 1 && | 2097 | else if (cr == 1 && |
@@ -2058,7 +2101,7 @@ repeat: | |||
2058 | ext4_mb_complex_scan_group(ac, &e4b); | 2101 | ext4_mb_complex_scan_group(ac, &e4b); |
2059 | 2102 | ||
2060 | ext4_unlock_group(sb, group); | 2103 | ext4_unlock_group(sb, group); |
2061 | ext4_mb_release_desc(&e4b); | 2104 | ext4_mb_unload_buddy(&e4b); |
2062 | 2105 | ||
2063 | if (ac->ac_status != AC_STATUS_CONTINUE) | 2106 | if (ac->ac_status != AC_STATUS_CONTINUE) |
2064 | break; | 2107 | break; |
@@ -2148,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2148 | ext4_lock_group(sb, group); | 2191 | ext4_lock_group(sb, group); |
2149 | memcpy(&sg, ext4_get_group_info(sb, group), i); | 2192 | memcpy(&sg, ext4_get_group_info(sb, group), i); |
2150 | ext4_unlock_group(sb, group); | 2193 | ext4_unlock_group(sb, group); |
2151 | ext4_mb_release_desc(&e4b); | 2194 | ext4_mb_unload_buddy(&e4b); |
2152 | 2195 | ||
2153 | seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, | 2196 | seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, |
2154 | sg.info.bb_fragments, sg.info.bb_first_free); | 2197 | sg.info.bb_fragments, sg.info.bb_first_free); |
@@ -2255,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2255 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2298 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2256 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2299 | init_rwsem(&meta_group_info[i]->alloc_sem); |
2257 | meta_group_info[i]->bb_free_root = RB_ROOT; | 2300 | meta_group_info[i]->bb_free_root = RB_ROOT; |
2301 | meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ | ||
2258 | 2302 | ||
2259 | #ifdef DOUBLE_CHECK | 2303 | #ifdef DOUBLE_CHECK |
2260 | { | 2304 | { |
@@ -2536,6 +2580,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2536 | entry->count, entry->group, entry); | 2580 | entry->count, entry->group, entry); |
2537 | 2581 | ||
2538 | if (test_opt(sb, DISCARD)) { | 2582 | if (test_opt(sb, DISCARD)) { |
2583 | int ret; | ||
2539 | ext4_fsblk_t discard_block; | 2584 | ext4_fsblk_t discard_block; |
2540 | 2585 | ||
2541 | discard_block = entry->start_blk + | 2586 | discard_block = entry->start_blk + |
@@ -2543,7 +2588,12 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2543 | trace_ext4_discard_blocks(sb, | 2588 | trace_ext4_discard_blocks(sb, |
2544 | (unsigned long long)discard_block, | 2589 | (unsigned long long)discard_block, |
2545 | entry->count); | 2590 | entry->count); |
2546 | sb_issue_discard(sb, discard_block, entry->count); | 2591 | ret = sb_issue_discard(sb, discard_block, entry->count); |
2592 | if (ret == EOPNOTSUPP) { | ||
2593 | ext4_warning(sb, | ||
2594 | "discard not supported, disabling"); | ||
2595 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | ||
2596 | } | ||
2547 | } | 2597 | } |
2548 | 2598 | ||
2549 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2599 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
@@ -2568,7 +2618,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2568 | } | 2618 | } |
2569 | ext4_unlock_group(sb, entry->group); | 2619 | ext4_unlock_group(sb, entry->group); |
2570 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2620 | kmem_cache_free(ext4_free_ext_cachep, entry); |
2571 | ext4_mb_release_desc(&e4b); | 2621 | ext4_mb_unload_buddy(&e4b); |
2572 | } | 2622 | } |
2573 | 2623 | ||
2574 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); | 2624 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
@@ -2641,7 +2691,7 @@ int __init init_ext4_mballoc(void) | |||
2641 | 2691 | ||
2642 | void exit_ext4_mballoc(void) | 2692 | void exit_ext4_mballoc(void) |
2643 | { | 2693 | { |
2644 | /* | 2694 | /* |
2645 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | 2695 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep |
2646 | * before destroying the slab cache. | 2696 | * before destroying the slab cache. |
2647 | */ | 2697 | */ |
@@ -2981,7 +3031,7 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) | |||
2981 | if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { | 3031 | if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) { |
2982 | atomic_inc(&sbi->s_bal_reqs); | 3032 | atomic_inc(&sbi->s_bal_reqs); |
2983 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); | 3033 | atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated); |
2984 | if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len) | 3034 | if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len) |
2985 | atomic_inc(&sbi->s_bal_success); | 3035 | atomic_inc(&sbi->s_bal_success); |
2986 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); | 3036 | atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned); |
2987 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && | 3037 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && |
@@ -3123,7 +3173,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3123 | continue; | 3173 | continue; |
3124 | 3174 | ||
3125 | /* non-extent files can't have physical blocks past 2^32 */ | 3175 | /* non-extent files can't have physical blocks past 2^32 */ |
3126 | if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) && | 3176 | if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && |
3127 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) | 3177 | pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) |
3128 | continue; | 3178 | continue; |
3129 | 3179 | ||
@@ -3280,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, | |||
3280 | spin_unlock(&pa->pa_lock); | 3330 | spin_unlock(&pa->pa_lock); |
3281 | 3331 | ||
3282 | grp_blk = pa->pa_pstart; | 3332 | grp_blk = pa->pa_pstart; |
3283 | /* | 3333 | /* |
3284 | * If doing group-based preallocation, pa_pstart may be in the | 3334 | * If doing group-based preallocation, pa_pstart may be in the |
3285 | * next group when pa is used up | 3335 | * next group when pa is used up |
3286 | */ | 3336 | */ |
@@ -3697,7 +3747,7 @@ out: | |||
3697 | ext4_unlock_group(sb, group); | 3747 | ext4_unlock_group(sb, group); |
3698 | if (ac) | 3748 | if (ac) |
3699 | kmem_cache_free(ext4_ac_cachep, ac); | 3749 | kmem_cache_free(ext4_ac_cachep, ac); |
3700 | ext4_mb_release_desc(&e4b); | 3750 | ext4_mb_unload_buddy(&e4b); |
3701 | put_bh(bitmap_bh); | 3751 | put_bh(bitmap_bh); |
3702 | return free; | 3752 | return free; |
3703 | } | 3753 | } |
@@ -3801,7 +3851,7 @@ repeat: | |||
3801 | if (bitmap_bh == NULL) { | 3851 | if (bitmap_bh == NULL) { |
3802 | ext4_error(sb, "Error reading block bitmap for %u", | 3852 | ext4_error(sb, "Error reading block bitmap for %u", |
3803 | group); | 3853 | group); |
3804 | ext4_mb_release_desc(&e4b); | 3854 | ext4_mb_unload_buddy(&e4b); |
3805 | continue; | 3855 | continue; |
3806 | } | 3856 | } |
3807 | 3857 | ||
@@ -3810,7 +3860,7 @@ repeat: | |||
3810 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); | 3860 | ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); |
3811 | ext4_unlock_group(sb, group); | 3861 | ext4_unlock_group(sb, group); |
3812 | 3862 | ||
3813 | ext4_mb_release_desc(&e4b); | 3863 | ext4_mb_unload_buddy(&e4b); |
3814 | put_bh(bitmap_bh); | 3864 | put_bh(bitmap_bh); |
3815 | 3865 | ||
3816 | list_del(&pa->u.pa_tmp_list); | 3866 | list_del(&pa->u.pa_tmp_list); |
@@ -4074,7 +4124,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4074 | ext4_mb_release_group_pa(&e4b, pa, ac); | 4124 | ext4_mb_release_group_pa(&e4b, pa, ac); |
4075 | ext4_unlock_group(sb, group); | 4125 | ext4_unlock_group(sb, group); |
4076 | 4126 | ||
4077 | ext4_mb_release_desc(&e4b); | 4127 | ext4_mb_unload_buddy(&e4b); |
4078 | list_del(&pa->u.pa_tmp_list); | 4128 | list_del(&pa->u.pa_tmp_list); |
4079 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); | 4129 | call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); |
4080 | } | 4130 | } |
@@ -4484,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4484 | if (!bh) | 4534 | if (!bh) |
4485 | tbh = sb_find_get_block(inode->i_sb, | 4535 | tbh = sb_find_get_block(inode->i_sb, |
4486 | block + i); | 4536 | block + i); |
4487 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4537 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
4488 | inode, tbh, block + i); | 4538 | inode, tbh, block + i); |
4489 | } | 4539 | } |
4490 | } | 4540 | } |
4491 | 4541 | ||
4492 | /* | 4542 | /* |
4493 | * We need to make sure we don't reuse the freed block until | 4543 | * We need to make sure we don't reuse the freed block until |
4494 | * after the transaction is committed, which we can do by | 4544 | * after the transaction is committed, which we can do by |
4495 | * treating the block as metadata, below. We make an | 4545 | * treating the block as metadata, below. We make an |
@@ -4610,7 +4660,7 @@ do_more: | |||
4610 | atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); | 4660 | atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); |
4611 | } | 4661 | } |
4612 | 4662 | ||
4613 | ext4_mb_release_desc(&e4b); | 4663 | ext4_mb_unload_buddy(&e4b); |
4614 | 4664 | ||
4615 | freed += count; | 4665 | freed += count; |
4616 | 4666 | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 34dcfc52ef44..6f3a27ec30bf 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
475 | */ | 475 | */ |
476 | if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, | 476 | if (!EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb, |
477 | EXT4_FEATURE_INCOMPAT_EXTENTS) || | 477 | EXT4_FEATURE_INCOMPAT_EXTENTS) || |
478 | (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 478 | (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
479 | return -EINVAL; | 479 | return -EINVAL; |
480 | 480 | ||
481 | if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) | 481 | if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index d1fc662cc311..3a6c92ac131c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -482,6 +482,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
482 | int depth = ext_depth(orig_inode); | 482 | int depth = ext_depth(orig_inode); |
483 | int ret; | 483 | int ret; |
484 | 484 | ||
485 | start_ext.ee_block = end_ext.ee_block = 0; | ||
485 | o_start = o_end = oext = orig_path[depth].p_ext; | 486 | o_start = o_end = oext = orig_path[depth].p_ext; |
486 | oext_alen = ext4_ext_get_actual_len(oext); | 487 | oext_alen = ext4_ext_get_actual_len(oext); |
487 | start_ext.ee_len = end_ext.ee_len = 0; | 488 | start_ext.ee_len = end_ext.ee_len = 0; |
@@ -529,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
529 | * new_ext |-------| | 530 | * new_ext |-------| |
530 | */ | 531 | */ |
531 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { | 532 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
532 | ext4_error(orig_inode->i_sb, | 533 | EXT4_ERROR_INODE(orig_inode, |
533 | "new_ext_end(%u) should be less than or equal to " | 534 | "new_ext_end(%u) should be less than or equal to " |
534 | "oext->ee_block(%u) + oext_alen(%d) - 1", | 535 | "oext->ee_block(%u) + oext_alen(%d) - 1", |
535 | new_ext_end, le32_to_cpu(oext->ee_block), | 536 | new_ext_end, le32_to_cpu(oext->ee_block), |
@@ -692,12 +693,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
692 | while (1) { | 693 | while (1) { |
693 | /* The extent for donor must be found. */ | 694 | /* The extent for donor must be found. */ |
694 | if (!dext) { | 695 | if (!dext) { |
695 | ext4_error(donor_inode->i_sb, | 696 | EXT4_ERROR_INODE(donor_inode, |
696 | "The extent for donor must be found"); | 697 | "The extent for donor must be found"); |
697 | *err = -EIO; | 698 | *err = -EIO; |
698 | goto out; | 699 | goto out; |
699 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | 700 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { |
700 | ext4_error(donor_inode->i_sb, | 701 | EXT4_ERROR_INODE(donor_inode, |
701 | "Donor offset(%u) and the first block of donor " | 702 | "Donor offset(%u) and the first block of donor " |
702 | "extent(%u) should be equal", | 703 | "extent(%u) should be equal", |
703 | donor_off, | 704 | donor_off, |
@@ -976,11 +977,11 @@ mext_check_arguments(struct inode *orig_inode, | |||
976 | } | 977 | } |
977 | 978 | ||
978 | /* Ext4 move extent supports only extent based file */ | 979 | /* Ext4 move extent supports only extent based file */ |
979 | if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { | 980 | if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { |
980 | ext4_debug("ext4 move extent: orig file is not extents " | 981 | ext4_debug("ext4 move extent: orig file is not extents " |
981 | "based file [ino:orig %lu]\n", orig_inode->i_ino); | 982 | "based file [ino:orig %lu]\n", orig_inode->i_ino); |
982 | return -EOPNOTSUPP; | 983 | return -EOPNOTSUPP; |
983 | } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { | 984 | } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { |
984 | ext4_debug("ext4 move extent: donor file is not extents " | 985 | ext4_debug("ext4 move extent: donor file is not extents " |
985 | "based file [ino:donor %lu]\n", donor_inode->i_ino); | 986 | "based file [ino:donor %lu]\n", donor_inode->i_ino); |
986 | return -EOPNOTSUPP; | 987 | return -EOPNOTSUPP; |
@@ -1354,7 +1355,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1354 | if (ret1 < 0) | 1355 | if (ret1 < 0) |
1355 | break; | 1356 | break; |
1356 | if (*moved_len > len) { | 1357 | if (*moved_len > len) { |
1357 | ext4_error(orig_inode->i_sb, | 1358 | EXT4_ERROR_INODE(orig_inode, |
1358 | "We replaced blocks too much! " | 1359 | "We replaced blocks too much! " |
1359 | "sum of replaced: %llu requested: %llu", | 1360 | "sum of replaced: %llu requested: %llu", |
1360 | *moved_len, len); | 1361 | *moved_len, len); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0c070fabd108..a43e6617b351 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -187,7 +187,7 @@ unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize) | |||
187 | return blocksize; | 187 | return blocksize; |
188 | return (len & 65532) | ((len & 3) << 16); | 188 | return (len & 65532) | ((len & 3) << 16); |
189 | } | 189 | } |
190 | 190 | ||
191 | __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | 191 | __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) |
192 | { | 192 | { |
193 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) | 193 | if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3)) |
@@ -197,7 +197,7 @@ __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize) | |||
197 | if (len == blocksize) { | 197 | if (len == blocksize) { |
198 | if (blocksize == 65536) | 198 | if (blocksize == 65536) |
199 | return cpu_to_le16(EXT4_MAX_REC_LEN); | 199 | return cpu_to_le16(EXT4_MAX_REC_LEN); |
200 | else | 200 | else |
201 | return cpu_to_le16(0); | 201 | return cpu_to_le16(0); |
202 | } | 202 | } |
203 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); | 203 | return cpu_to_le16((len & 65532) | ((len >> 16) & 3)); |
@@ -349,7 +349,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
349 | brelse(bh); | 349 | brelse(bh); |
350 | } | 350 | } |
351 | if (bcount) | 351 | if (bcount) |
352 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", | 352 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", |
353 | levels ? "" : " ", names, space/bcount, | 353 | levels ? "" : " ", names, space/bcount, |
354 | (space/bcount)*100/blocksize); | 354 | (space/bcount)*100/blocksize); |
355 | return (struct stats) { names, space, bcount}; | 355 | return (struct stats) { names, space, bcount}; |
@@ -653,10 +653,10 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
653 | int ret, err; | 653 | int ret, err; |
654 | __u32 hashval; | 654 | __u32 hashval; |
655 | 655 | ||
656 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", | 656 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
657 | start_hash, start_minor_hash)); | 657 | start_hash, start_minor_hash)); |
658 | dir = dir_file->f_path.dentry->d_inode; | 658 | dir = dir_file->f_path.dentry->d_inode; |
659 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | 659 | if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { |
660 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 660 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
661 | if (hinfo.hash_version <= DX_HASH_TEA) | 661 | if (hinfo.hash_version <= DX_HASH_TEA) |
662 | hinfo.hash_version += | 662 | hinfo.hash_version += |
@@ -801,7 +801,7 @@ static void ext4_update_dx_flag(struct inode *inode) | |||
801 | { | 801 | { |
802 | if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, | 802 | if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb, |
803 | EXT4_FEATURE_COMPAT_DIR_INDEX)) | 803 | EXT4_FEATURE_COMPAT_DIR_INDEX)) |
804 | EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL; | 804 | ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); |
805 | } | 805 | } |
806 | 806 | ||
807 | /* | 807 | /* |
@@ -943,8 +943,8 @@ restart: | |||
943 | wait_on_buffer(bh); | 943 | wait_on_buffer(bh); |
944 | if (!buffer_uptodate(bh)) { | 944 | if (!buffer_uptodate(bh)) { |
945 | /* read error, skip block & hope for the best */ | 945 | /* read error, skip block & hope for the best */ |
946 | ext4_error(sb, "reading directory #%lu offset %lu", | 946 | EXT4_ERROR_INODE(dir, "reading directory lblock %lu", |
947 | dir->i_ino, (unsigned long)block); | 947 | (unsigned long) block); |
948 | brelse(bh); | 948 | brelse(bh); |
949 | goto next; | 949 | goto next; |
950 | } | 950 | } |
@@ -1066,15 +1066,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru | |||
1066 | __u32 ino = le32_to_cpu(de->inode); | 1066 | __u32 ino = le32_to_cpu(de->inode); |
1067 | brelse(bh); | 1067 | brelse(bh); |
1068 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1068 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
1069 | ext4_error(dir->i_sb, "bad inode number: %u", ino); | 1069 | EXT4_ERROR_INODE(dir, "bad inode number: %u", ino); |
1070 | return ERR_PTR(-EIO); | 1070 | return ERR_PTR(-EIO); |
1071 | } | 1071 | } |
1072 | inode = ext4_iget(dir->i_sb, ino); | 1072 | inode = ext4_iget(dir->i_sb, ino); |
1073 | if (unlikely(IS_ERR(inode))) { | 1073 | if (unlikely(IS_ERR(inode))) { |
1074 | if (PTR_ERR(inode) == -ESTALE) { | 1074 | if (PTR_ERR(inode) == -ESTALE) { |
1075 | ext4_error(dir->i_sb, | 1075 | EXT4_ERROR_INODE(dir, |
1076 | "deleted inode referenced: %u", | 1076 | "deleted inode referenced: %u", |
1077 | ino); | 1077 | ino); |
1078 | return ERR_PTR(-EIO); | 1078 | return ERR_PTR(-EIO); |
1079 | } else { | 1079 | } else { |
1080 | return ERR_CAST(inode); | 1080 | return ERR_CAST(inode); |
@@ -1104,8 +1104,8 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
1104 | brelse(bh); | 1104 | brelse(bh); |
1105 | 1105 | ||
1106 | if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { | 1106 | if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { |
1107 | ext4_error(child->d_inode->i_sb, | 1107 | EXT4_ERROR_INODE(child->d_inode, |
1108 | "bad inode number: %u", ino); | 1108 | "bad parent inode number: %u", ino); |
1109 | return ERR_PTR(-EIO); | 1109 | return ERR_PTR(-EIO); |
1110 | } | 1110 | } |
1111 | 1111 | ||
@@ -1141,7 +1141,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, | |||
1141 | unsigned rec_len = 0; | 1141 | unsigned rec_len = 0; |
1142 | 1142 | ||
1143 | while (count--) { | 1143 | while (count--) { |
1144 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) | 1144 | struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) |
1145 | (from + (map->offs<<2)); | 1145 | (from + (map->offs<<2)); |
1146 | rec_len = EXT4_DIR_REC_LEN(de->name_len); | 1146 | rec_len = EXT4_DIR_REC_LEN(de->name_len); |
1147 | memcpy (to, de, rec_len); | 1147 | memcpy (to, de, rec_len); |
@@ -1404,9 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1404 | de = (struct ext4_dir_entry_2 *)((char *)fde + | 1404 | de = (struct ext4_dir_entry_2 *)((char *)fde + |
1405 | ext4_rec_len_from_disk(fde->rec_len, blocksize)); | 1405 | ext4_rec_len_from_disk(fde->rec_len, blocksize)); |
1406 | if ((char *) de >= (((char *) root) + blocksize)) { | 1406 | if ((char *) de >= (((char *) root) + blocksize)) { |
1407 | ext4_error(dir->i_sb, | 1407 | EXT4_ERROR_INODE(dir, "invalid rec_len for '..'"); |
1408 | "invalid rec_len for '..' in inode %lu", | ||
1409 | dir->i_ino); | ||
1410 | brelse(bh); | 1408 | brelse(bh); |
1411 | return -EIO; | 1409 | return -EIO; |
1412 | } | 1410 | } |
@@ -1418,7 +1416,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
1418 | brelse(bh); | 1416 | brelse(bh); |
1419 | return retval; | 1417 | return retval; |
1420 | } | 1418 | } |
1421 | EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; | 1419 | ext4_set_inode_flag(dir, EXT4_INODE_INDEX); |
1422 | data1 = bh2->b_data; | 1420 | data1 = bh2->b_data; |
1423 | 1421 | ||
1424 | memcpy (data1, de, len); | 1422 | memcpy (data1, de, len); |
@@ -1491,7 +1489,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1491 | retval = ext4_dx_add_entry(handle, dentry, inode); | 1489 | retval = ext4_dx_add_entry(handle, dentry, inode); |
1492 | if (!retval || (retval != ERR_BAD_DX_DIR)) | 1490 | if (!retval || (retval != ERR_BAD_DX_DIR)) |
1493 | return retval; | 1491 | return retval; |
1494 | EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL; | 1492 | ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); |
1495 | dx_fallback++; | 1493 | dx_fallback++; |
1496 | ext4_mark_inode_dirty(handle, dir); | 1494 | ext4_mark_inode_dirty(handle, dir); |
1497 | } | 1495 | } |
@@ -1519,6 +1517,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, | |||
1519 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); | 1517 | de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); |
1520 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); | 1518 | retval = add_dirent_to_buf(handle, dentry, inode, de, bh); |
1521 | brelse(bh); | 1519 | brelse(bh); |
1520 | if (retval == 0) | ||
1521 | ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY); | ||
1522 | return retval; | 1522 | return retval; |
1523 | } | 1523 | } |
1524 | 1524 | ||
@@ -1915,9 +1915,8 @@ static int empty_dir(struct inode *inode) | |||
1915 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1915 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
1916 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { | 1916 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
1917 | if (err) | 1917 | if (err) |
1918 | ext4_error(inode->i_sb, | 1918 | EXT4_ERROR_INODE(inode, |
1919 | "error %d reading directory #%lu offset 0", | 1919 | "error %d reading directory lblock 0", err); |
1920 | err, inode->i_ino); | ||
1921 | else | 1920 | else |
1922 | ext4_warning(inode->i_sb, | 1921 | ext4_warning(inode->i_sb, |
1923 | "bad directory (dir #%lu) - no data block", | 1922 | "bad directory (dir #%lu) - no data block", |
@@ -1941,17 +1940,17 @@ static int empty_dir(struct inode *inode) | |||
1941 | de = ext4_next_entry(de1, sb->s_blocksize); | 1940 | de = ext4_next_entry(de1, sb->s_blocksize); |
1942 | while (offset < inode->i_size) { | 1941 | while (offset < inode->i_size) { |
1943 | if (!bh || | 1942 | if (!bh || |
1944 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1943 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
1944 | unsigned int lblock; | ||
1945 | err = 0; | 1945 | err = 0; |
1946 | brelse(bh); | 1946 | brelse(bh); |
1947 | bh = ext4_bread(NULL, inode, | 1947 | lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); |
1948 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1948 | bh = ext4_bread(NULL, inode, lblock, 0, &err); |
1949 | if (!bh) { | 1949 | if (!bh) { |
1950 | if (err) | 1950 | if (err) |
1951 | ext4_error(sb, | 1951 | EXT4_ERROR_INODE(inode, |
1952 | "error %d reading directory" | 1952 | "error %d reading directory " |
1953 | " #%lu offset %u", | 1953 | "lblock %u", err, lblock); |
1954 | err, inode->i_ino, offset); | ||
1955 | offset += sb->s_blocksize; | 1954 | offset += sb->s_blocksize; |
1956 | continue; | 1955 | continue; |
1957 | } | 1956 | } |
@@ -2297,7 +2296,7 @@ retry: | |||
2297 | } | 2296 | } |
2298 | } else { | 2297 | } else { |
2299 | /* clear the extent format for fast symlink */ | 2298 | /* clear the extent format for fast symlink */ |
2300 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; | 2299 | ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); |
2301 | inode->i_op = &ext4_fast_symlink_inode_operations; | 2300 | inode->i_op = &ext4_fast_symlink_inode_operations; |
2302 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); | 2301 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); |
2303 | inode->i_size = l-1; | 2302 | inode->i_size = l-1; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 5692c48754a0..6df797eb9aeb 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -911,7 +911,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
911 | percpu_counter_add(&sbi->s_freeinodes_counter, | 911 | percpu_counter_add(&sbi->s_freeinodes_counter, |
912 | EXT4_INODES_PER_GROUP(sb)); | 912 | EXT4_INODES_PER_GROUP(sb)); |
913 | 913 | ||
914 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | 914 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && |
915 | sbi->s_log_groups_per_flex) { | ||
915 | ext4_group_t flex_group; | 916 | ext4_group_t flex_group; |
916 | flex_group = ext4_flex_group(sbi, input->group); | 917 | flex_group = ext4_flex_group(sbi, input->group); |
917 | atomic_add(input->free_blocks_count, | 918 | atomic_add(input->free_blocks_count, |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e14d22c170d5..49d88c0597c4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -241,6 +241,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | |||
241 | if (sb->s_flags & MS_RDONLY) | 241 | if (sb->s_flags & MS_RDONLY) |
242 | return ERR_PTR(-EROFS); | 242 | return ERR_PTR(-EROFS); |
243 | 243 | ||
244 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | ||
244 | /* Special case here: if the journal has aborted behind our | 245 | /* Special case here: if the journal has aborted behind our |
245 | * backs (eg. EIO in the commit thread), then we still need to | 246 | * backs (eg. EIO in the commit thread), then we still need to |
246 | * take the FS itself readonly cleanly. */ | 247 | * take the FS itself readonly cleanly. */ |
@@ -941,6 +942,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
941 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); | 942 | seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); |
942 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) | 943 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) |
943 | seq_puts(seq, ",journal_async_commit"); | 944 | seq_puts(seq, ",journal_async_commit"); |
945 | else if (test_opt(sb, JOURNAL_CHECKSUM)) | ||
946 | seq_puts(seq, ",journal_checksum"); | ||
944 | if (test_opt(sb, NOBH)) | 947 | if (test_opt(sb, NOBH)) |
945 | seq_puts(seq, ",nobh"); | 948 | seq_puts(seq, ",nobh"); |
946 | if (test_opt(sb, I_VERSION)) | 949 | if (test_opt(sb, I_VERSION)) |
@@ -2213,7 +2216,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) | |||
2213 | struct ext4_attr { | 2216 | struct ext4_attr { |
2214 | struct attribute attr; | 2217 | struct attribute attr; |
2215 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); | 2218 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); |
2216 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, | 2219 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, |
2217 | const char *, size_t); | 2220 | const char *, size_t); |
2218 | int offset; | 2221 | int offset; |
2219 | }; | 2222 | }; |
@@ -2430,6 +2433,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2430 | __releases(kernel_lock) | 2433 | __releases(kernel_lock) |
2431 | __acquires(kernel_lock) | 2434 | __acquires(kernel_lock) |
2432 | { | 2435 | { |
2436 | char *orig_data = kstrdup(data, GFP_KERNEL); | ||
2433 | struct buffer_head *bh; | 2437 | struct buffer_head *bh; |
2434 | struct ext4_super_block *es = NULL; | 2438 | struct ext4_super_block *es = NULL; |
2435 | struct ext4_sb_info *sbi; | 2439 | struct ext4_sb_info *sbi; |
@@ -2793,24 +2797,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2793 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 2797 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
2794 | spin_lock_init(&sbi->s_next_gen_lock); | 2798 | spin_lock_init(&sbi->s_next_gen_lock); |
2795 | 2799 | ||
2796 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
2797 | ext4_count_free_blocks(sb)); | ||
2798 | if (!err) { | ||
2799 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
2800 | ext4_count_free_inodes(sb)); | ||
2801 | } | ||
2802 | if (!err) { | ||
2803 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
2804 | ext4_count_dirs(sb)); | ||
2805 | } | ||
2806 | if (!err) { | ||
2807 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2808 | } | ||
2809 | if (err) { | ||
2810 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
2811 | goto failed_mount3; | ||
2812 | } | ||
2813 | |||
2814 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2800 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2815 | sbi->s_max_writeback_mb_bump = 128; | 2801 | sbi->s_max_writeback_mb_bump = 128; |
2816 | 2802 | ||
@@ -2910,6 +2896,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2910 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 2896 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
2911 | 2897 | ||
2912 | no_journal: | 2898 | no_journal: |
2899 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
2900 | ext4_count_free_blocks(sb)); | ||
2901 | if (!err) | ||
2902 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
2903 | ext4_count_free_inodes(sb)); | ||
2904 | if (!err) | ||
2905 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
2906 | ext4_count_dirs(sb)); | ||
2907 | if (!err) | ||
2908 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
2909 | if (err) { | ||
2910 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
2911 | goto failed_mount_wq; | ||
2912 | } | ||
2913 | if (test_opt(sb, NOBH)) { | 2913 | if (test_opt(sb, NOBH)) { |
2914 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { | 2914 | if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { |
2915 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " | 2915 | ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " |
@@ -3001,7 +3001,7 @@ no_journal: | |||
3001 | err = ext4_setup_system_zone(sb); | 3001 | err = ext4_setup_system_zone(sb); |
3002 | if (err) { | 3002 | if (err) { |
3003 | ext4_msg(sb, KERN_ERR, "failed to initialize system " | 3003 | ext4_msg(sb, KERN_ERR, "failed to initialize system " |
3004 | "zone (%d)\n", err); | 3004 | "zone (%d)", err); |
3005 | goto failed_mount4; | 3005 | goto failed_mount4; |
3006 | } | 3006 | } |
3007 | 3007 | ||
@@ -3040,9 +3040,11 @@ no_journal: | |||
3040 | } else | 3040 | } else |
3041 | descr = "out journal"; | 3041 | descr = "out journal"; |
3042 | 3042 | ||
3043 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); | 3043 | ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " |
3044 | "Opts: %s", descr, orig_data); | ||
3044 | 3045 | ||
3045 | lock_kernel(); | 3046 | lock_kernel(); |
3047 | kfree(orig_data); | ||
3046 | return 0; | 3048 | return 0; |
3047 | 3049 | ||
3048 | cantfind_ext4: | 3050 | cantfind_ext4: |
@@ -3059,6 +3061,10 @@ failed_mount_wq: | |||
3059 | jbd2_journal_destroy(sbi->s_journal); | 3061 | jbd2_journal_destroy(sbi->s_journal); |
3060 | sbi->s_journal = NULL; | 3062 | sbi->s_journal = NULL; |
3061 | } | 3063 | } |
3064 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3065 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3066 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3067 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3062 | failed_mount3: | 3068 | failed_mount3: |
3063 | if (sbi->s_flex_groups) { | 3069 | if (sbi->s_flex_groups) { |
3064 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 3070 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
@@ -3066,10 +3072,6 @@ failed_mount3: | |||
3066 | else | 3072 | else |
3067 | kfree(sbi->s_flex_groups); | 3073 | kfree(sbi->s_flex_groups); |
3068 | } | 3074 | } |
3069 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3070 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3071 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3072 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3073 | failed_mount2: | 3075 | failed_mount2: |
3074 | for (i = 0; i < db_count; i++) | 3076 | for (i = 0; i < db_count; i++) |
3075 | brelse(sbi->s_group_desc[i]); | 3077 | brelse(sbi->s_group_desc[i]); |
@@ -3089,6 +3091,7 @@ out_fail: | |||
3089 | kfree(sbi->s_blockgroup_lock); | 3091 | kfree(sbi->s_blockgroup_lock); |
3090 | kfree(sbi); | 3092 | kfree(sbi); |
3091 | lock_kernel(); | 3093 | lock_kernel(); |
3094 | kfree(orig_data); | ||
3092 | return ret; | 3095 | return ret; |
3093 | } | 3096 | } |
3094 | 3097 | ||
@@ -3380,7 +3383,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3380 | if (!(sb->s_flags & MS_RDONLY)) | 3383 | if (!(sb->s_flags & MS_RDONLY)) |
3381 | es->s_wtime = cpu_to_le32(get_seconds()); | 3384 | es->s_wtime = cpu_to_le32(get_seconds()); |
3382 | es->s_kbytes_written = | 3385 | es->s_kbytes_written = |
3383 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + | 3386 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + |
3384 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - | 3387 | ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - |
3385 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); | 3388 | EXT4_SB(sb)->s_sectors_written_start) >> 1)); |
3386 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 3389 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( |
@@ -3485,8 +3488,10 @@ int ext4_force_commit(struct super_block *sb) | |||
3485 | return 0; | 3488 | return 0; |
3486 | 3489 | ||
3487 | journal = EXT4_SB(sb)->s_journal; | 3490 | journal = EXT4_SB(sb)->s_journal; |
3488 | if (journal) | 3491 | if (journal) { |
3492 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | ||
3489 | ret = ext4_journal_force_commit(journal); | 3493 | ret = ext4_journal_force_commit(journal); |
3494 | } | ||
3490 | 3495 | ||
3491 | return ret; | 3496 | return ret; |
3492 | } | 3497 | } |
@@ -3535,18 +3540,16 @@ static int ext4_freeze(struct super_block *sb) | |||
3535 | * the journal. | 3540 | * the journal. |
3536 | */ | 3541 | */ |
3537 | error = jbd2_journal_flush(journal); | 3542 | error = jbd2_journal_flush(journal); |
3538 | if (error < 0) { | 3543 | if (error < 0) |
3539 | out: | 3544 | goto out; |
3540 | jbd2_journal_unlock_updates(journal); | ||
3541 | return error; | ||
3542 | } | ||
3543 | 3545 | ||
3544 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 3546 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
3545 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3547 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3546 | error = ext4_commit_super(sb, 1); | 3548 | error = ext4_commit_super(sb, 1); |
3547 | if (error) | 3549 | out: |
3548 | goto out; | 3550 | /* we rely on s_frozen to stop further updates */ |
3549 | return 0; | 3551 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
3552 | return error; | ||
3550 | } | 3553 | } |
3551 | 3554 | ||
3552 | /* | 3555 | /* |
@@ -3563,7 +3566,6 @@ static int ext4_unfreeze(struct super_block *sb) | |||
3563 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 3566 | EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
3564 | ext4_commit_super(sb, 1); | 3567 | ext4_commit_super(sb, 1); |
3565 | unlock_super(sb); | 3568 | unlock_super(sb); |
3566 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
3567 | return 0; | 3569 | return 0; |
3568 | } | 3570 | } |
3569 | 3571 | ||
@@ -3580,6 +3582,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3580 | #ifdef CONFIG_QUOTA | 3582 | #ifdef CONFIG_QUOTA |
3581 | int i; | 3583 | int i; |
3582 | #endif | 3584 | #endif |
3585 | char *orig_data = kstrdup(data, GFP_KERNEL); | ||
3583 | 3586 | ||
3584 | lock_kernel(); | 3587 | lock_kernel(); |
3585 | 3588 | ||
@@ -3713,6 +3716,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3713 | #endif | 3716 | #endif |
3714 | unlock_super(sb); | 3717 | unlock_super(sb); |
3715 | unlock_kernel(); | 3718 | unlock_kernel(); |
3719 | |||
3720 | ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); | ||
3721 | kfree(orig_data); | ||
3716 | return 0; | 3722 | return 0; |
3717 | 3723 | ||
3718 | restore_opts: | 3724 | restore_opts: |
@@ -3734,6 +3740,7 @@ restore_opts: | |||
3734 | #endif | 3740 | #endif |
3735 | unlock_super(sb); | 3741 | unlock_super(sb); |
3736 | unlock_kernel(); | 3742 | unlock_kernel(); |
3743 | kfree(orig_data); | ||
3737 | return err; | 3744 | return err; |
3738 | } | 3745 | } |
3739 | 3746 | ||
@@ -4141,6 +4148,7 @@ static int __init init_ext4_fs(void) | |||
4141 | { | 4148 | { |
4142 | int err; | 4149 | int err; |
4143 | 4150 | ||
4151 | ext4_check_flag_values(); | ||
4144 | err = init_ext4_system_zone(); | 4152 | err = init_ext4_system_zone(); |
4145 | if (err) | 4153 | if (err) |
4146 | return err; | 4154 | return err; |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 00740cb32be3..ed9354aff279 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
@@ -34,6 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
34 | .readlink = generic_readlink, | 34 | .readlink = generic_readlink, |
35 | .follow_link = page_follow_link_light, | 35 | .follow_link = page_follow_link_light, |
36 | .put_link = page_put_link, | 36 | .put_link = page_put_link, |
37 | .setattr = ext4_setattr, | ||
37 | #ifdef CONFIG_EXT4_FS_XATTR | 38 | #ifdef CONFIG_EXT4_FS_XATTR |
38 | .setxattr = generic_setxattr, | 39 | .setxattr = generic_setxattr, |
39 | .getxattr = generic_getxattr, | 40 | .getxattr = generic_getxattr, |
@@ -45,6 +46,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
45 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 46 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
46 | .readlink = generic_readlink, | 47 | .readlink = generic_readlink, |
47 | .follow_link = ext4_follow_link, | 48 | .follow_link = ext4_follow_link, |
49 | .setattr = ext4_setattr, | ||
48 | #ifdef CONFIG_EXT4_FS_XATTR | 50 | #ifdef CONFIG_EXT4_FS_XATTR |
49 | .setxattr = generic_setxattr, | 51 | .setxattr = generic_setxattr, |
50 | .getxattr = generic_getxattr, | 52 | .getxattr = generic_getxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 2de0e9515089..04338009793a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -228,9 +228,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
228 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 228 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
229 | if (ext4_xattr_check_block(bh)) { | 229 | if (ext4_xattr_check_block(bh)) { |
230 | bad_block: | 230 | bad_block: |
231 | ext4_error(inode->i_sb, | 231 | EXT4_ERROR_INODE(inode, "bad block %llu", |
232 | "inode %lu: bad block %llu", inode->i_ino, | 232 | EXT4_I(inode)->i_file_acl); |
233 | EXT4_I(inode)->i_file_acl); | ||
234 | error = -EIO; | 233 | error = -EIO; |
235 | goto cleanup; | 234 | goto cleanup; |
236 | } | 235 | } |
@@ -372,9 +371,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
372 | ea_bdebug(bh, "b_count=%d, refcount=%d", | 371 | ea_bdebug(bh, "b_count=%d, refcount=%d", |
373 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); | 372 | atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); |
374 | if (ext4_xattr_check_block(bh)) { | 373 | if (ext4_xattr_check_block(bh)) { |
375 | ext4_error(inode->i_sb, | 374 | EXT4_ERROR_INODE(inode, "bad block %llu", |
376 | "inode %lu: bad block %llu", inode->i_ino, | 375 | EXT4_I(inode)->i_file_acl); |
377 | EXT4_I(inode)->i_file_acl); | ||
378 | error = -EIO; | 376 | error = -EIO; |
379 | goto cleanup; | 377 | goto cleanup; |
380 | } | 378 | } |
@@ -666,8 +664,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, | |||
666 | atomic_read(&(bs->bh->b_count)), | 664 | atomic_read(&(bs->bh->b_count)), |
667 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); | 665 | le32_to_cpu(BHDR(bs->bh)->h_refcount)); |
668 | if (ext4_xattr_check_block(bs->bh)) { | 666 | if (ext4_xattr_check_block(bs->bh)) { |
669 | ext4_error(sb, "inode %lu: bad block %llu", | 667 | EXT4_ERROR_INODE(inode, "bad block %llu", |
670 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 668 | EXT4_I(inode)->i_file_acl); |
671 | error = -EIO; | 669 | error = -EIO; |
672 | goto cleanup; | 670 | goto cleanup; |
673 | } | 671 | } |
@@ -820,7 +818,7 @@ inserted: | |||
820 | EXT4_I(inode)->i_block_group); | 818 | EXT4_I(inode)->i_block_group); |
821 | 819 | ||
822 | /* non-extent files can't have physical blocks past 2^32 */ | 820 | /* non-extent files can't have physical blocks past 2^32 */ |
823 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 821 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
824 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; | 822 | goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; |
825 | 823 | ||
826 | block = ext4_new_meta_blocks(handle, inode, | 824 | block = ext4_new_meta_blocks(handle, inode, |
@@ -828,7 +826,7 @@ inserted: | |||
828 | if (error) | 826 | if (error) |
829 | goto cleanup; | 827 | goto cleanup; |
830 | 828 | ||
831 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | 829 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
832 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); | 830 | BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); |
833 | 831 | ||
834 | ea_idebug(inode, "creating block %d", block); | 832 | ea_idebug(inode, "creating block %d", block); |
@@ -880,8 +878,8 @@ cleanup_dquot: | |||
880 | goto cleanup; | 878 | goto cleanup; |
881 | 879 | ||
882 | bad_block: | 880 | bad_block: |
883 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", | 881 | EXT4_ERROR_INODE(inode, "bad block %llu", |
884 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 882 | EXT4_I(inode)->i_file_acl); |
885 | goto cleanup; | 883 | goto cleanup; |
886 | 884 | ||
887 | #undef header | 885 | #undef header |
@@ -1194,8 +1192,8 @@ retry: | |||
1194 | if (!bh) | 1192 | if (!bh) |
1195 | goto cleanup; | 1193 | goto cleanup; |
1196 | if (ext4_xattr_check_block(bh)) { | 1194 | if (ext4_xattr_check_block(bh)) { |
1197 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", | 1195 | EXT4_ERROR_INODE(inode, "bad block %llu", |
1198 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 1196 | EXT4_I(inode)->i_file_acl); |
1199 | error = -EIO; | 1197 | error = -EIO; |
1200 | goto cleanup; | 1198 | goto cleanup; |
1201 | } | 1199 | } |
@@ -1372,14 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) | |||
1372 | goto cleanup; | 1370 | goto cleanup; |
1373 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); | 1371 | bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); |
1374 | if (!bh) { | 1372 | if (!bh) { |
1375 | ext4_error(inode->i_sb, "inode %lu: block %llu read error", | 1373 | EXT4_ERROR_INODE(inode, "block %llu read error", |
1376 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 1374 | EXT4_I(inode)->i_file_acl); |
1377 | goto cleanup; | 1375 | goto cleanup; |
1378 | } | 1376 | } |
1379 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || | 1377 | if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || |
1380 | BHDR(bh)->h_blocks != cpu_to_le32(1)) { | 1378 | BHDR(bh)->h_blocks != cpu_to_le32(1)) { |
1381 | ext4_error(inode->i_sb, "inode %lu: bad block %llu", | 1379 | EXT4_ERROR_INODE(inode, "bad block %llu", |
1382 | inode->i_ino, EXT4_I(inode)->i_file_acl); | 1380 | EXT4_I(inode)->i_file_acl); |
1383 | goto cleanup; | 1381 | goto cleanup; |
1384 | } | 1382 | } |
1385 | ext4_xattr_release_block(handle, inode, bh); | 1383 | ext4_xattr_release_block(handle, inode, bh); |
@@ -1504,9 +1502,8 @@ again: | |||
1504 | } | 1502 | } |
1505 | bh = sb_bread(inode->i_sb, ce->e_block); | 1503 | bh = sb_bread(inode->i_sb, ce->e_block); |
1506 | if (!bh) { | 1504 | if (!bh) { |
1507 | ext4_error(inode->i_sb, | 1505 | EXT4_ERROR_INODE(inode, "block %lu read error", |
1508 | "inode %lu: block %lu read error", | 1506 | (unsigned long) ce->e_block); |
1509 | inode->i_ino, (unsigned long) ce->e_block); | ||
1510 | } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= | 1507 | } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= |
1511 | EXT4_XATTR_REFCOUNT_MAX) { | 1508 | EXT4_XATTR_REFCOUNT_MAX) { |
1512 | ea_idebug(inode, "block %lu refcount %d>=%d", | 1509 | ea_idebug(inode, "block %lu refcount %d>=%d", |
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index aee049cb9f84..0ec7bb2c95c6 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
@@ -57,6 +57,8 @@ const struct inode_operations vxfs_dir_inode_ops = { | |||
57 | }; | 57 | }; |
58 | 58 | ||
59 | const struct file_operations vxfs_dir_operations = { | 59 | const struct file_operations vxfs_dir_operations = { |
60 | .llseek = generic_file_llseek, | ||
61 | .read = generic_read_dir, | ||
60 | .readdir = vxfs_readdir, | 62 | .readdir = vxfs_readdir, |
61 | }; | 63 | }; |
62 | 64 | ||
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index 1e1f286dd70e..4a8eb31c5338 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c | |||
@@ -103,7 +103,7 @@ static struct fscache_object *fscache_objlist_lookup(loff_t *_pos) | |||
103 | /* banners (can't represent line 0 by pos 0 as that would involve | 103 | /* banners (can't represent line 0 by pos 0 as that would involve |
104 | * returning a NULL pointer) */ | 104 | * returning a NULL pointer) */ |
105 | if (pos == 0) | 105 | if (pos == 0) |
106 | return (struct fscache_object *) ++(*_pos); | 106 | return (struct fscache_object *)(long)++(*_pos); |
107 | if (pos < 3) | 107 | if (pos < 3) |
108 | return (struct fscache_object *)pos; | 108 | return (struct fscache_object *)pos; |
109 | 109 | ||
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index eb7e9423691f..e53df5ebb2b8 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | 19 | ||
20 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); | 20 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); |
21 | MODULE_ALIAS("devname:fuse"); | ||
21 | 22 | ||
22 | static struct kmem_cache *fuse_req_cachep; | 23 | static struct kmem_cache *fuse_req_cachep; |
23 | 24 | ||
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index b9ab69b3a482..e0aca9a0ac68 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c | |||
@@ -272,6 +272,7 @@ static int isofs_readdir(struct file *filp, | |||
272 | 272 | ||
273 | const struct file_operations isofs_dir_operations = | 273 | const struct file_operations isofs_dir_operations = |
274 | { | 274 | { |
275 | .llseek = generic_file_llseek, | ||
275 | .read = generic_read_dir, | 276 | .read = generic_read_dir, |
276 | .readdir = isofs_readdir, | 277 | .readdir = isofs_readdir, |
277 | }; | 278 | }; |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index bfc70f57900f..e214d68620ac 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1311,7 +1311,6 @@ int jbd2_journal_stop(handle_t *handle) | |||
1311 | if (handle->h_sync) | 1311 | if (handle->h_sync) |
1312 | transaction->t_synchronous_commit = 1; | 1312 | transaction->t_synchronous_commit = 1; |
1313 | current->journal_info = NULL; | 1313 | current->journal_info = NULL; |
1314 | spin_lock(&journal->j_state_lock); | ||
1315 | spin_lock(&transaction->t_handle_lock); | 1314 | spin_lock(&transaction->t_handle_lock); |
1316 | transaction->t_outstanding_credits -= handle->h_buffer_credits; | 1315 | transaction->t_outstanding_credits -= handle->h_buffer_credits; |
1317 | transaction->t_updates--; | 1316 | transaction->t_updates--; |
@@ -1340,8 +1339,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
1340 | jbd_debug(2, "transaction too old, requesting commit for " | 1339 | jbd_debug(2, "transaction too old, requesting commit for " |
1341 | "handle %p\n", handle); | 1340 | "handle %p\n", handle); |
1342 | /* This is non-blocking */ | 1341 | /* This is non-blocking */ |
1343 | __jbd2_log_start_commit(journal, transaction->t_tid); | 1342 | jbd2_log_start_commit(journal, transaction->t_tid); |
1344 | spin_unlock(&journal->j_state_lock); | ||
1345 | 1343 | ||
1346 | /* | 1344 | /* |
1347 | * Special case: JBD2_SYNC synchronous updates require us | 1345 | * Special case: JBD2_SYNC synchronous updates require us |
@@ -1351,7 +1349,6 @@ int jbd2_journal_stop(handle_t *handle) | |||
1351 | err = jbd2_log_wait_commit(journal, tid); | 1349 | err = jbd2_log_wait_commit(journal, tid); |
1352 | } else { | 1350 | } else { |
1353 | spin_unlock(&transaction->t_handle_lock); | 1351 | spin_unlock(&transaction->t_handle_lock); |
1354 | spin_unlock(&journal->j_state_lock); | ||
1355 | } | 1352 | } |
1356 | 1353 | ||
1357 | lock_map_release(&handle->h_lockdep_map); | 1354 | lock_map_release(&handle->h_lockdep_map); |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 92dde6f8d893..9578cbe0cd58 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -49,6 +49,7 @@ extern int ncp_symlink(struct inode *, struct dentry *, const char *); | |||
49 | 49 | ||
50 | const struct file_operations ncp_dir_operations = | 50 | const struct file_operations ncp_dir_operations = |
51 | { | 51 | { |
52 | .llseek = generic_file_llseek, | ||
52 | .read = generic_read_dir, | 53 | .read = generic_read_dir, |
53 | .readdir = ncp_readdir, | 54 | .readdir = ncp_readdir, |
54 | .unlocked_ioctl = ncp_ioctl, | 55 | .unlocked_ioctl = ncp_ioctl, |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ee9a179ebdf3..db64854b7b09 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1741,6 +1741,7 @@ remove_lru_entry: | |||
1741 | clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); | 1741 | clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); |
1742 | smp_mb__after_clear_bit(); | 1742 | smp_mb__after_clear_bit(); |
1743 | } | 1743 | } |
1744 | spin_unlock(&inode->i_lock); | ||
1744 | } | 1745 | } |
1745 | spin_unlock(&nfs_access_lru_lock); | 1746 | spin_unlock(&nfs_access_lru_lock); |
1746 | nfs_access_free_list(&head); | 1747 | nfs_access_free_list(&head); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3aea3ca98ab7..91679e2631ee 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1386,7 +1386,7 @@ static int nfs_commit_inode(struct inode *inode, int how) | |||
1386 | int res = 0; | 1386 | int res = 0; |
1387 | 1387 | ||
1388 | if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) | 1388 | if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) |
1389 | goto out; | 1389 | goto out_mark_dirty; |
1390 | spin_lock(&inode->i_lock); | 1390 | spin_lock(&inode->i_lock); |
1391 | res = nfs_scan_commit(inode, &head, 0, 0); | 1391 | res = nfs_scan_commit(inode, &head, 0, 0); |
1392 | spin_unlock(&inode->i_lock); | 1392 | spin_unlock(&inode->i_lock); |
@@ -1398,9 +1398,18 @@ static int nfs_commit_inode(struct inode *inode, int how) | |||
1398 | wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, | 1398 | wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, |
1399 | nfs_wait_bit_killable, | 1399 | nfs_wait_bit_killable, |
1400 | TASK_KILLABLE); | 1400 | TASK_KILLABLE); |
1401 | else | ||
1402 | goto out_mark_dirty; | ||
1401 | } else | 1403 | } else |
1402 | nfs_commit_clear_lock(NFS_I(inode)); | 1404 | nfs_commit_clear_lock(NFS_I(inode)); |
1403 | out: | 1405 | return res; |
1406 | /* Note: If we exit without ensuring that the commit is complete, | ||
1407 | * we must mark the inode as dirty. Otherwise, future calls to | ||
1408 | * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure | ||
1409 | * that the data is on the disk. | ||
1410 | */ | ||
1411 | out_mark_dirty: | ||
1412 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | ||
1404 | return res; | 1413 | return res; |
1405 | } | 1414 | } |
1406 | 1415 | ||
@@ -1509,14 +1518,17 @@ int nfs_wb_page(struct inode *inode, struct page *page) | |||
1509 | }; | 1518 | }; |
1510 | int ret; | 1519 | int ret; |
1511 | 1520 | ||
1512 | while(PagePrivate(page)) { | 1521 | for (;;) { |
1513 | wait_on_page_writeback(page); | 1522 | wait_on_page_writeback(page); |
1514 | if (clear_page_dirty_for_io(page)) { | 1523 | if (clear_page_dirty_for_io(page)) { |
1515 | ret = nfs_writepage_locked(page, &wbc); | 1524 | ret = nfs_writepage_locked(page, &wbc); |
1516 | if (ret < 0) | 1525 | if (ret < 0) |
1517 | goto out_error; | 1526 | goto out_error; |
1527 | continue; | ||
1518 | } | 1528 | } |
1519 | ret = sync_inode(inode, &wbc); | 1529 | if (!PagePrivate(page)) |
1530 | break; | ||
1531 | ret = nfs_commit_inode(inode, FLUSH_SYNC); | ||
1520 | if (ret < 0) | 1532 | if (ret < 0) |
1521 | goto out_error; | 1533 | goto out_error; |
1522 | } | 1534 | } |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 885ab5513ac5..9b58d38bc911 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -267,7 +267,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
267 | shpending = p->signal->shared_pending.signal; | 267 | shpending = p->signal->shared_pending.signal; |
268 | blocked = p->blocked; | 268 | blocked = p->blocked; |
269 | collect_sigign_sigcatch(p, &ignored, &caught); | 269 | collect_sigign_sigcatch(p, &ignored, &caught); |
270 | num_threads = atomic_read(&p->signal->count); | 270 | num_threads = get_nr_threads(p); |
271 | rcu_read_lock(); /* FIXME: is this correct? */ | 271 | rcu_read_lock(); /* FIXME: is this correct? */ |
272 | qsize = atomic_read(&__task_cred(p)->user->sigpending); | 272 | qsize = atomic_read(&__task_cred(p)->user->sigpending); |
273 | rcu_read_unlock(); | 273 | rcu_read_unlock(); |
@@ -410,7 +410,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
410 | tty_nr = new_encode_dev(tty_devnum(sig->tty)); | 410 | tty_nr = new_encode_dev(tty_devnum(sig->tty)); |
411 | } | 411 | } |
412 | 412 | ||
413 | num_threads = atomic_read(&sig->count); | 413 | num_threads = get_nr_threads(task); |
414 | collect_sigign_sigcatch(task, &sigign, &sigcatch); | 414 | collect_sigign_sigcatch(task, &sigign, &sigcatch); |
415 | 415 | ||
416 | cmin_flt = sig->cmin_flt; | 416 | cmin_flt = sig->cmin_flt; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c7f9f23449dc..acb7ef80ea4f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -166,18 +166,6 @@ static int get_fs_path(struct task_struct *task, struct path *path, bool root) | |||
166 | return result; | 166 | return result; |
167 | } | 167 | } |
168 | 168 | ||
169 | static int get_nr_threads(struct task_struct *tsk) | ||
170 | { | ||
171 | unsigned long flags; | ||
172 | int count = 0; | ||
173 | |||
174 | if (lock_task_sighand(tsk, &flags)) { | ||
175 | count = atomic_read(&tsk->signal->count); | ||
176 | unlock_task_sighand(tsk, &flags); | ||
177 | } | ||
178 | return count; | ||
179 | } | ||
180 | |||
181 | static int proc_cwd_link(struct inode *inode, struct path *path) | 169 | static int proc_cwd_link(struct inode *inode, struct path *path) |
182 | { | 170 | { |
183 | struct task_struct *task = get_proc_task(inode); | 171 | struct task_struct *task = get_proc_task(inode); |
@@ -2444,7 +2432,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, | |||
2444 | const struct pid_entry *p = ptr; | 2432 | const struct pid_entry *p = ptr; |
2445 | struct inode *inode; | 2433 | struct inode *inode; |
2446 | struct proc_inode *ei; | 2434 | struct proc_inode *ei; |
2447 | struct dentry *error = ERR_PTR(-EINVAL); | 2435 | struct dentry *error; |
2448 | 2436 | ||
2449 | /* Allocate the inode */ | 2437 | /* Allocate the inode */ |
2450 | error = ERR_PTR(-ENOMEM); | 2438 | error = ERR_PTR(-ENOMEM); |
@@ -2794,7 +2782,7 @@ out: | |||
2794 | 2782 | ||
2795 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) | 2783 | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) |
2796 | { | 2784 | { |
2797 | struct dentry *result = ERR_PTR(-ENOENT); | 2785 | struct dentry *result; |
2798 | struct task_struct *task; | 2786 | struct task_struct *task; |
2799 | unsigned tgid; | 2787 | unsigned tgid; |
2800 | struct pid_namespace *ns; | 2788 | struct pid_namespace *ns; |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 43c127490606..2791907744ed 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -343,21 +343,6 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ | |||
343 | /* | 343 | /* |
344 | * Return an inode number between PROC_DYNAMIC_FIRST and | 344 | * Return an inode number between PROC_DYNAMIC_FIRST and |
345 | * 0xffffffff, or zero on failure. | 345 | * 0xffffffff, or zero on failure. |
346 | * | ||
347 | * Current inode allocations in the proc-fs (hex-numbers): | ||
348 | * | ||
349 | * 00000000 reserved | ||
350 | * 00000001-00000fff static entries (goners) | ||
351 | * 001 root-ino | ||
352 | * | ||
353 | * 00001000-00001fff unused | ||
354 | * 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff | ||
355 | * 80000000-efffffff unused | ||
356 | * f0000000-ffffffff dynamic entries | ||
357 | * | ||
358 | * Goal: | ||
359 | * Once we split the thing into several virtual filesystems, | ||
360 | * we will get rid of magical ranges (and this comment, BTW). | ||
361 | */ | 346 | */ |
362 | static unsigned int get_inode_number(void) | 347 | static unsigned int get_inode_number(void) |
363 | { | 348 | { |
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index c837a77351be..6f37c391468d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -588,7 +588,7 @@ static struct kcore_list kcore_text; | |||
588 | */ | 588 | */ |
589 | static void __init proc_kcore_text_init(void) | 589 | static void __init proc_kcore_text_init(void) |
590 | { | 590 | { |
591 | kclist_add(&kcore_text, _stext, _end - _stext, KCORE_TEXT); | 591 | kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT); |
592 | } | 592 | } |
593 | #else | 593 | #else |
594 | static void __init proc_kcore_text_init(void) | 594 | static void __init proc_kcore_text_init(void) |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 757c069f2a65..4258384ed22d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -110,7 +110,6 @@ void __init proc_root_init(void) | |||
110 | if (err) | 110 | if (err) |
111 | return; | 111 | return; |
112 | proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); | 112 | proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); |
113 | err = PTR_ERR(proc_mnt); | ||
114 | if (IS_ERR(proc_mnt)) { | 113 | if (IS_ERR(proc_mnt)) { |
115 | unregister_filesystem(&proc_fs_type); | 114 | unregister_filesystem(&proc_fs_type); |
116 | return; | 115 | return; |
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 6f30c3d5bcbf..3d3fd4692133 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c | |||
@@ -77,6 +77,7 @@ out: | |||
77 | 77 | ||
78 | const struct file_operations qnx4_dir_operations = | 78 | const struct file_operations qnx4_dir_operations = |
79 | { | 79 | { |
80 | .llseek = generic_file_llseek, | ||
80 | .read = generic_read_dir, | 81 | .read = generic_read_dir, |
81 | .readdir = qnx4_readdir, | 82 | .readdir = qnx4_readdir, |
82 | .fsync = simple_fsync, | 83 | .fsync = simple_fsync, |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 655a4c52b8c3..1ad8bf076cfc 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -1514,11 +1514,13 @@ static void inode_decr_space(struct inode *inode, qsize_t number, int reserve) | |||
1514 | /* | 1514 | /* |
1515 | * This operation can block, but only after everything is updated | 1515 | * This operation can block, but only after everything is updated |
1516 | */ | 1516 | */ |
1517 | int __dquot_alloc_space(struct inode *inode, qsize_t number, | 1517 | int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) |
1518 | int warn, int reserve) | ||
1519 | { | 1518 | { |
1520 | int cnt, ret = 0; | 1519 | int cnt, ret = 0; |
1521 | char warntype[MAXQUOTAS]; | 1520 | char warntype[MAXQUOTAS]; |
1521 | int warn = flags & DQUOT_SPACE_WARN; | ||
1522 | int reserve = flags & DQUOT_SPACE_RESERVE; | ||
1523 | int nofail = flags & DQUOT_SPACE_NOFAIL; | ||
1522 | 1524 | ||
1523 | /* | 1525 | /* |
1524 | * First test before acquiring mutex - solves deadlocks when we | 1526 | * First test before acquiring mutex - solves deadlocks when we |
@@ -1539,7 +1541,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, | |||
1539 | continue; | 1541 | continue; |
1540 | ret = check_bdq(inode->i_dquot[cnt], number, !warn, | 1542 | ret = check_bdq(inode->i_dquot[cnt], number, !warn, |
1541 | warntype+cnt); | 1543 | warntype+cnt); |
1542 | if (ret) { | 1544 | if (ret && !nofail) { |
1543 | spin_unlock(&dq_data_lock); | 1545 | spin_unlock(&dq_data_lock); |
1544 | goto out_flush_warn; | 1546 | goto out_flush_warn; |
1545 | } | 1547 | } |
@@ -1638,10 +1640,11 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); | |||
1638 | /* | 1640 | /* |
1639 | * This operation can block, but only after everything is updated | 1641 | * This operation can block, but only after everything is updated |
1640 | */ | 1642 | */ |
1641 | void __dquot_free_space(struct inode *inode, qsize_t number, int reserve) | 1643 | void __dquot_free_space(struct inode *inode, qsize_t number, int flags) |
1642 | { | 1644 | { |
1643 | unsigned int cnt; | 1645 | unsigned int cnt; |
1644 | char warntype[MAXQUOTAS]; | 1646 | char warntype[MAXQUOTAS]; |
1647 | int reserve = flags & DQUOT_SPACE_RESERVE; | ||
1645 | 1648 | ||
1646 | /* First test before acquiring mutex - solves deadlocks when we | 1649 | /* First test before acquiring mutex - solves deadlocks when we |
1647 | * re-enter the quota code and are already holding the mutex */ | 1650 | * re-enter the quota code and are already holding the mutex */ |
diff --git a/fs/read_write.c b/fs/read_write.c index 113386d6fd2d..9c0485236e68 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -97,6 +97,23 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) | |||
97 | } | 97 | } |
98 | EXPORT_SYMBOL(generic_file_llseek); | 98 | EXPORT_SYMBOL(generic_file_llseek); |
99 | 99 | ||
100 | /** | ||
101 | * noop_llseek - No Operation Performed llseek implementation | ||
102 | * @file: file structure to seek on | ||
103 | * @offset: file offset to seek to | ||
104 | * @origin: type of seek | ||
105 | * | ||
106 | * This is an implementation of ->llseek useable for the rare special case when | ||
107 | * userspace expects the seek to succeed but the (device) file is actually not | ||
108 | * able to perform the seek. In this case you use noop_llseek() instead of | ||
109 | * falling back to the default implementation of ->llseek. | ||
110 | */ | ||
111 | loff_t noop_llseek(struct file *file, loff_t offset, int origin) | ||
112 | { | ||
113 | return file->f_pos; | ||
114 | } | ||
115 | EXPORT_SYMBOL(noop_llseek); | ||
116 | |||
100 | loff_t no_llseek(struct file *file, loff_t offset, int origin) | 117 | loff_t no_llseek(struct file *file, loff_t offset, int origin) |
101 | { | 118 | { |
102 | return -ESPIPE; | 119 | return -ESPIPE; |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 07930449a958..4455fbe269a3 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -18,6 +18,7 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, | |||
18 | int datasync); | 18 | int datasync); |
19 | 19 | ||
20 | const struct file_operations reiserfs_dir_operations = { | 20 | const struct file_operations reiserfs_dir_operations = { |
21 | .llseek = generic_file_llseek, | ||
21 | .read = generic_read_dir, | 22 | .read = generic_read_dir, |
22 | .readdir = reiserfs_readdir, | 23 | .readdir = reiserfs_readdir, |
23 | .fsync = reiserfs_dir_fsync, | 24 | .fsync = reiserfs_dir_fsync, |
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 6c978428892d..00a70cab1f36 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c | |||
@@ -37,6 +37,7 @@ static int smb_link(struct dentry *, struct inode *, struct dentry *); | |||
37 | 37 | ||
38 | const struct file_operations smb_dir_operations = | 38 | const struct file_operations smb_dir_operations = |
39 | { | 39 | { |
40 | .llseek = generic_file_llseek, | ||
40 | .read = generic_read_dir, | 41 | .read = generic_read_dir, |
41 | .readdir = smb_readdir, | 42 | .readdir = smb_readdir, |
42 | .unlocked_ioctl = smb_ioctl, | 43 | .unlocked_ioctl = smb_ioctl, |
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 25a00d19d686..cc6ce8a84c21 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
@@ -26,6 +26,17 @@ config SQUASHFS | |||
26 | 26 | ||
27 | If unsure, say N. | 27 | If unsure, say N. |
28 | 28 | ||
29 | config SQUASHFS_XATTRS | ||
30 | bool "Squashfs XATTR support" | ||
31 | depends on SQUASHFS | ||
32 | default n | ||
33 | help | ||
34 | Saying Y here includes support for extended attributes (xattrs). | ||
35 | Xattrs are name:value pairs associated with inodes by | ||
36 | the kernel or by users (see the attr(5) manual page). | ||
37 | |||
38 | If unsure, say N. | ||
39 | |||
29 | config SQUASHFS_EMBEDDED | 40 | config SQUASHFS_EMBEDDED |
30 | 41 | ||
31 | bool "Additional option for memory-constrained systems" | 42 | bool "Additional option for memory-constrained systems" |
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index df8a19ef870d..2cee3e9fa452 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile | |||
@@ -5,3 +5,5 @@ | |||
5 | obj-$(CONFIG_SQUASHFS) += squashfs.o | 5 | obj-$(CONFIG_SQUASHFS) += squashfs.o |
6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o | 6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o |
7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o | 7 | squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o |
8 | squashfs-$(CONFIG_SQUASHFS_XATTRS) += xattr.o xattr_id.o | ||
9 | |||
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 49daaf669e41..62e63ad25075 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c | |||
@@ -40,11 +40,13 @@ | |||
40 | 40 | ||
41 | #include <linux/fs.h> | 41 | #include <linux/fs.h> |
42 | #include <linux/vfs.h> | 42 | #include <linux/vfs.h> |
43 | #include <linux/xattr.h> | ||
43 | 44 | ||
44 | #include "squashfs_fs.h" | 45 | #include "squashfs_fs.h" |
45 | #include "squashfs_fs_sb.h" | 46 | #include "squashfs_fs_sb.h" |
46 | #include "squashfs_fs_i.h" | 47 | #include "squashfs_fs_i.h" |
47 | #include "squashfs.h" | 48 | #include "squashfs.h" |
49 | #include "xattr.h" | ||
48 | 50 | ||
49 | /* | 51 | /* |
50 | * Initialise VFS inode with the base inode information common to all | 52 | * Initialise VFS inode with the base inode information common to all |
@@ -111,6 +113,7 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
111 | int err, type, offset = SQUASHFS_INODE_OFFSET(ino); | 113 | int err, type, offset = SQUASHFS_INODE_OFFSET(ino); |
112 | union squashfs_inode squashfs_ino; | 114 | union squashfs_inode squashfs_ino; |
113 | struct squashfs_base_inode *sqshb_ino = &squashfs_ino.base; | 115 | struct squashfs_base_inode *sqshb_ino = &squashfs_ino.base; |
116 | int xattr_id = SQUASHFS_INVALID_XATTR; | ||
114 | 117 | ||
115 | TRACE("Entered squashfs_read_inode\n"); | 118 | TRACE("Entered squashfs_read_inode\n"); |
116 | 119 | ||
@@ -199,8 +202,10 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
199 | frag_offset = 0; | 202 | frag_offset = 0; |
200 | } | 203 | } |
201 | 204 | ||
205 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
202 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | 206 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); |
203 | inode->i_size = le64_to_cpu(sqsh_ino->file_size); | 207 | inode->i_size = le64_to_cpu(sqsh_ino->file_size); |
208 | inode->i_op = &squashfs_inode_ops; | ||
204 | inode->i_fop = &generic_ro_fops; | 209 | inode->i_fop = &generic_ro_fops; |
205 | inode->i_mode |= S_IFREG; | 210 | inode->i_mode |= S_IFREG; |
206 | inode->i_blocks = ((inode->i_size - | 211 | inode->i_blocks = ((inode->i_size - |
@@ -251,6 +256,7 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
251 | if (err < 0) | 256 | if (err < 0) |
252 | goto failed_read; | 257 | goto failed_read; |
253 | 258 | ||
259 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
254 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | 260 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); |
255 | inode->i_size = le32_to_cpu(sqsh_ino->file_size); | 261 | inode->i_size = le32_to_cpu(sqsh_ino->file_size); |
256 | inode->i_op = &squashfs_dir_inode_ops; | 262 | inode->i_op = &squashfs_dir_inode_ops; |
@@ -280,21 +286,33 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
280 | 286 | ||
281 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | 287 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); |
282 | inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); | 288 | inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); |
283 | inode->i_op = &page_symlink_inode_operations; | 289 | inode->i_op = &squashfs_symlink_inode_ops; |
284 | inode->i_data.a_ops = &squashfs_symlink_aops; | 290 | inode->i_data.a_ops = &squashfs_symlink_aops; |
285 | inode->i_mode |= S_IFLNK; | 291 | inode->i_mode |= S_IFLNK; |
286 | squashfs_i(inode)->start = block; | 292 | squashfs_i(inode)->start = block; |
287 | squashfs_i(inode)->offset = offset; | 293 | squashfs_i(inode)->offset = offset; |
288 | 294 | ||
295 | if (type == SQUASHFS_LSYMLINK_TYPE) { | ||
296 | __le32 xattr; | ||
297 | |||
298 | err = squashfs_read_metadata(sb, NULL, &block, | ||
299 | &offset, inode->i_size); | ||
300 | if (err < 0) | ||
301 | goto failed_read; | ||
302 | err = squashfs_read_metadata(sb, &xattr, &block, | ||
303 | &offset, sizeof(xattr)); | ||
304 | if (err < 0) | ||
305 | goto failed_read; | ||
306 | xattr_id = le32_to_cpu(xattr); | ||
307 | } | ||
308 | |||
289 | TRACE("Symbolic link inode %x:%x, start_block %llx, offset " | 309 | TRACE("Symbolic link inode %x:%x, start_block %llx, offset " |
290 | "%x\n", SQUASHFS_INODE_BLK(ino), offset, | 310 | "%x\n", SQUASHFS_INODE_BLK(ino), offset, |
291 | block, offset); | 311 | block, offset); |
292 | break; | 312 | break; |
293 | } | 313 | } |
294 | case SQUASHFS_BLKDEV_TYPE: | 314 | case SQUASHFS_BLKDEV_TYPE: |
295 | case SQUASHFS_CHRDEV_TYPE: | 315 | case SQUASHFS_CHRDEV_TYPE: { |
296 | case SQUASHFS_LBLKDEV_TYPE: | ||
297 | case SQUASHFS_LCHRDEV_TYPE: { | ||
298 | struct squashfs_dev_inode *sqsh_ino = &squashfs_ino.dev; | 316 | struct squashfs_dev_inode *sqsh_ino = &squashfs_ino.dev; |
299 | unsigned int rdev; | 317 | unsigned int rdev; |
300 | 318 | ||
@@ -315,10 +333,32 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
315 | SQUASHFS_INODE_BLK(ino), offset, rdev); | 333 | SQUASHFS_INODE_BLK(ino), offset, rdev); |
316 | break; | 334 | break; |
317 | } | 335 | } |
336 | case SQUASHFS_LBLKDEV_TYPE: | ||
337 | case SQUASHFS_LCHRDEV_TYPE: { | ||
338 | struct squashfs_ldev_inode *sqsh_ino = &squashfs_ino.ldev; | ||
339 | unsigned int rdev; | ||
340 | |||
341 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, | ||
342 | sizeof(*sqsh_ino)); | ||
343 | if (err < 0) | ||
344 | goto failed_read; | ||
345 | |||
346 | if (type == SQUASHFS_LCHRDEV_TYPE) | ||
347 | inode->i_mode |= S_IFCHR; | ||
348 | else | ||
349 | inode->i_mode |= S_IFBLK; | ||
350 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
351 | inode->i_op = &squashfs_inode_ops; | ||
352 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | ||
353 | rdev = le32_to_cpu(sqsh_ino->rdev); | ||
354 | init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); | ||
355 | |||
356 | TRACE("Device inode %x:%x, rdev %x\n", | ||
357 | SQUASHFS_INODE_BLK(ino), offset, rdev); | ||
358 | break; | ||
359 | } | ||
318 | case SQUASHFS_FIFO_TYPE: | 360 | case SQUASHFS_FIFO_TYPE: |
319 | case SQUASHFS_SOCKET_TYPE: | 361 | case SQUASHFS_SOCKET_TYPE: { |
320 | case SQUASHFS_LFIFO_TYPE: | ||
321 | case SQUASHFS_LSOCKET_TYPE: { | ||
322 | struct squashfs_ipc_inode *sqsh_ino = &squashfs_ino.ipc; | 362 | struct squashfs_ipc_inode *sqsh_ino = &squashfs_ino.ipc; |
323 | 363 | ||
324 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, | 364 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, |
@@ -334,14 +374,52 @@ int squashfs_read_inode(struct inode *inode, long long ino) | |||
334 | init_special_inode(inode, inode->i_mode, 0); | 374 | init_special_inode(inode, inode->i_mode, 0); |
335 | break; | 375 | break; |
336 | } | 376 | } |
377 | case SQUASHFS_LFIFO_TYPE: | ||
378 | case SQUASHFS_LSOCKET_TYPE: { | ||
379 | struct squashfs_lipc_inode *sqsh_ino = &squashfs_ino.lipc; | ||
380 | |||
381 | err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, | ||
382 | sizeof(*sqsh_ino)); | ||
383 | if (err < 0) | ||
384 | goto failed_read; | ||
385 | |||
386 | if (type == SQUASHFS_LFIFO_TYPE) | ||
387 | inode->i_mode |= S_IFIFO; | ||
388 | else | ||
389 | inode->i_mode |= S_IFSOCK; | ||
390 | xattr_id = le32_to_cpu(sqsh_ino->xattr); | ||
391 | inode->i_op = &squashfs_inode_ops; | ||
392 | inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); | ||
393 | init_special_inode(inode, inode->i_mode, 0); | ||
394 | break; | ||
395 | } | ||
337 | default: | 396 | default: |
338 | ERROR("Unknown inode type %d in squashfs_iget!\n", type); | 397 | ERROR("Unknown inode type %d in squashfs_iget!\n", type); |
339 | return -EINVAL; | 398 | return -EINVAL; |
340 | } | 399 | } |
341 | 400 | ||
401 | if (xattr_id != SQUASHFS_INVALID_XATTR && msblk->xattr_id_table) { | ||
402 | err = squashfs_xattr_lookup(sb, xattr_id, | ||
403 | &squashfs_i(inode)->xattr_count, | ||
404 | &squashfs_i(inode)->xattr_size, | ||
405 | &squashfs_i(inode)->xattr); | ||
406 | if (err < 0) | ||
407 | goto failed_read; | ||
408 | inode->i_blocks += ((squashfs_i(inode)->xattr_size - 1) >> 9) | ||
409 | + 1; | ||
410 | } else | ||
411 | squashfs_i(inode)->xattr_count = 0; | ||
412 | |||
342 | return 0; | 413 | return 0; |
343 | 414 | ||
344 | failed_read: | 415 | failed_read: |
345 | ERROR("Unable to read inode 0x%llx\n", ino); | 416 | ERROR("Unable to read inode 0x%llx\n", ino); |
346 | return err; | 417 | return err; |
347 | } | 418 | } |
419 | |||
420 | |||
421 | const struct inode_operations squashfs_inode_ops = { | ||
422 | .getxattr = generic_getxattr, | ||
423 | .listxattr = squashfs_listxattr | ||
424 | }; | ||
425 | |||
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 5266bd8ad932..7a9464d08cf6 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c | |||
@@ -57,11 +57,13 @@ | |||
57 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
58 | #include <linux/string.h> | 58 | #include <linux/string.h> |
59 | #include <linux/dcache.h> | 59 | #include <linux/dcache.h> |
60 | #include <linux/xattr.h> | ||
60 | 61 | ||
61 | #include "squashfs_fs.h" | 62 | #include "squashfs_fs.h" |
62 | #include "squashfs_fs_sb.h" | 63 | #include "squashfs_fs_sb.h" |
63 | #include "squashfs_fs_i.h" | 64 | #include "squashfs_fs_i.h" |
64 | #include "squashfs.h" | 65 | #include "squashfs.h" |
66 | #include "xattr.h" | ||
65 | 67 | ||
66 | /* | 68 | /* |
67 | * Lookup name in the directory index, returning the location of the metadata | 69 | * Lookup name in the directory index, returning the location of the metadata |
@@ -237,5 +239,7 @@ failed: | |||
237 | 239 | ||
238 | 240 | ||
239 | const struct inode_operations squashfs_dir_inode_ops = { | 241 | const struct inode_operations squashfs_dir_inode_ops = { |
240 | .lookup = squashfs_lookup | 242 | .lookup = squashfs_lookup, |
243 | .getxattr = generic_getxattr, | ||
244 | .listxattr = squashfs_listxattr | ||
241 | }; | 245 | }; |
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index fe2587af5512..733a17c42945 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h | |||
@@ -73,8 +73,11 @@ extern struct inode *squashfs_iget(struct super_block *, long long, | |||
73 | unsigned int); | 73 | unsigned int); |
74 | extern int squashfs_read_inode(struct inode *, long long); | 74 | extern int squashfs_read_inode(struct inode *, long long); |
75 | 75 | ||
76 | /* xattr.c */ | ||
77 | extern ssize_t squashfs_listxattr(struct dentry *, char *, size_t); | ||
78 | |||
76 | /* | 79 | /* |
77 | * Inodes, files and decompressor operations | 80 | * Inodes, files, decompressor and xattr operations |
78 | */ | 81 | */ |
79 | 82 | ||
80 | /* dir.c */ | 83 | /* dir.c */ |
@@ -86,11 +89,18 @@ extern const struct export_operations squashfs_export_ops; | |||
86 | /* file.c */ | 89 | /* file.c */ |
87 | extern const struct address_space_operations squashfs_aops; | 90 | extern const struct address_space_operations squashfs_aops; |
88 | 91 | ||
92 | /* inode.c */ | ||
93 | extern const struct inode_operations squashfs_inode_ops; | ||
94 | |||
89 | /* namei.c */ | 95 | /* namei.c */ |
90 | extern const struct inode_operations squashfs_dir_inode_ops; | 96 | extern const struct inode_operations squashfs_dir_inode_ops; |
91 | 97 | ||
92 | /* symlink.c */ | 98 | /* symlink.c */ |
93 | extern const struct address_space_operations squashfs_symlink_aops; | 99 | extern const struct address_space_operations squashfs_symlink_aops; |
100 | extern const struct inode_operations squashfs_symlink_inode_ops; | ||
101 | |||
102 | /* xattr.c */ | ||
103 | extern const struct xattr_handler *squashfs_xattr_handlers[]; | ||
94 | 104 | ||
95 | /* zlib_wrapper.c */ | 105 | /* zlib_wrapper.c */ |
96 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; | 106 | extern const struct squashfs_decompressor squashfs_zlib_comp_ops; |
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 79024245ea00..8eabb808b78d 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h | |||
@@ -46,6 +46,7 @@ | |||
46 | #define SQUASHFS_NAME_LEN 256 | 46 | #define SQUASHFS_NAME_LEN 256 |
47 | 47 | ||
48 | #define SQUASHFS_INVALID_FRAG (0xffffffffU) | 48 | #define SQUASHFS_INVALID_FRAG (0xffffffffU) |
49 | #define SQUASHFS_INVALID_XATTR (0xffffffffU) | ||
49 | #define SQUASHFS_INVALID_BLK (-1LL) | 50 | #define SQUASHFS_INVALID_BLK (-1LL) |
50 | 51 | ||
51 | /* Filesystem flags */ | 52 | /* Filesystem flags */ |
@@ -96,6 +97,13 @@ | |||
96 | #define SQUASHFS_LFIFO_TYPE 13 | 97 | #define SQUASHFS_LFIFO_TYPE 13 |
97 | #define SQUASHFS_LSOCKET_TYPE 14 | 98 | #define SQUASHFS_LSOCKET_TYPE 14 |
98 | 99 | ||
100 | /* Xattr types */ | ||
101 | #define SQUASHFS_XATTR_USER 0 | ||
102 | #define SQUASHFS_XATTR_TRUSTED 1 | ||
103 | #define SQUASHFS_XATTR_SECURITY 2 | ||
104 | #define SQUASHFS_XATTR_VALUE_OOL 256 | ||
105 | #define SQUASHFS_XATTR_PREFIX_MASK 0xff | ||
106 | |||
99 | /* Flag whether block is compressed or uncompressed, bit is set if block is | 107 | /* Flag whether block is compressed or uncompressed, bit is set if block is |
100 | * uncompressed */ | 108 | * uncompressed */ |
101 | #define SQUASHFS_COMPRESSED_BIT (1 << 15) | 109 | #define SQUASHFS_COMPRESSED_BIT (1 << 15) |
@@ -174,6 +182,24 @@ | |||
174 | 182 | ||
175 | #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ | 183 | #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ |
176 | sizeof(u64)) | 184 | sizeof(u64)) |
185 | /* xattr id lookup table defines */ | ||
186 | #define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id)) | ||
187 | |||
188 | #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \ | ||
189 | SQUASHFS_METADATA_SIZE) | ||
190 | |||
191 | #define SQUASHFS_XATTR_BLOCK_OFFSET(A) (SQUASHFS_XATTR_BYTES(A) % \ | ||
192 | SQUASHFS_METADATA_SIZE) | ||
193 | |||
194 | #define SQUASHFS_XATTR_BLOCKS(A) ((SQUASHFS_XATTR_BYTES(A) + \ | ||
195 | SQUASHFS_METADATA_SIZE - 1) / \ | ||
196 | SQUASHFS_METADATA_SIZE) | ||
197 | |||
198 | #define SQUASHFS_XATTR_BLOCK_BYTES(A) (SQUASHFS_XATTR_BLOCKS(A) *\ | ||
199 | sizeof(u64)) | ||
200 | #define SQUASHFS_XATTR_BLK(A) ((unsigned int) ((A) >> 16)) | ||
201 | |||
202 | #define SQUASHFS_XATTR_OFFSET(A) ((unsigned int) ((A) & 0xffff)) | ||
177 | 203 | ||
178 | /* cached data constants for filesystem */ | 204 | /* cached data constants for filesystem */ |
179 | #define SQUASHFS_CACHED_BLKS 8 | 205 | #define SQUASHFS_CACHED_BLKS 8 |
@@ -228,7 +254,7 @@ struct squashfs_super_block { | |||
228 | __le64 root_inode; | 254 | __le64 root_inode; |
229 | __le64 bytes_used; | 255 | __le64 bytes_used; |
230 | __le64 id_table_start; | 256 | __le64 id_table_start; |
231 | __le64 xattr_table_start; | 257 | __le64 xattr_id_table_start; |
232 | __le64 inode_table_start; | 258 | __le64 inode_table_start; |
233 | __le64 directory_table_start; | 259 | __le64 directory_table_start; |
234 | __le64 fragment_table_start; | 260 | __le64 fragment_table_start; |
@@ -261,6 +287,17 @@ struct squashfs_ipc_inode { | |||
261 | __le32 nlink; | 287 | __le32 nlink; |
262 | }; | 288 | }; |
263 | 289 | ||
290 | struct squashfs_lipc_inode { | ||
291 | __le16 inode_type; | ||
292 | __le16 mode; | ||
293 | __le16 uid; | ||
294 | __le16 guid; | ||
295 | __le32 mtime; | ||
296 | __le32 inode_number; | ||
297 | __le32 nlink; | ||
298 | __le32 xattr; | ||
299 | }; | ||
300 | |||
264 | struct squashfs_dev_inode { | 301 | struct squashfs_dev_inode { |
265 | __le16 inode_type; | 302 | __le16 inode_type; |
266 | __le16 mode; | 303 | __le16 mode; |
@@ -272,6 +309,18 @@ struct squashfs_dev_inode { | |||
272 | __le32 rdev; | 309 | __le32 rdev; |
273 | }; | 310 | }; |
274 | 311 | ||
312 | struct squashfs_ldev_inode { | ||
313 | __le16 inode_type; | ||
314 | __le16 mode; | ||
315 | __le16 uid; | ||
316 | __le16 guid; | ||
317 | __le32 mtime; | ||
318 | __le32 inode_number; | ||
319 | __le32 nlink; | ||
320 | __le32 rdev; | ||
321 | __le32 xattr; | ||
322 | }; | ||
323 | |||
275 | struct squashfs_symlink_inode { | 324 | struct squashfs_symlink_inode { |
276 | __le16 inode_type; | 325 | __le16 inode_type; |
277 | __le16 mode; | 326 | __le16 mode; |
@@ -349,12 +398,14 @@ struct squashfs_ldir_inode { | |||
349 | union squashfs_inode { | 398 | union squashfs_inode { |
350 | struct squashfs_base_inode base; | 399 | struct squashfs_base_inode base; |
351 | struct squashfs_dev_inode dev; | 400 | struct squashfs_dev_inode dev; |
401 | struct squashfs_ldev_inode ldev; | ||
352 | struct squashfs_symlink_inode symlink; | 402 | struct squashfs_symlink_inode symlink; |
353 | struct squashfs_reg_inode reg; | 403 | struct squashfs_reg_inode reg; |
354 | struct squashfs_lreg_inode lreg; | 404 | struct squashfs_lreg_inode lreg; |
355 | struct squashfs_dir_inode dir; | 405 | struct squashfs_dir_inode dir; |
356 | struct squashfs_ldir_inode ldir; | 406 | struct squashfs_ldir_inode ldir; |
357 | struct squashfs_ipc_inode ipc; | 407 | struct squashfs_ipc_inode ipc; |
408 | struct squashfs_lipc_inode lipc; | ||
358 | }; | 409 | }; |
359 | 410 | ||
360 | struct squashfs_dir_entry { | 411 | struct squashfs_dir_entry { |
@@ -377,4 +428,27 @@ struct squashfs_fragment_entry { | |||
377 | unsigned int unused; | 428 | unsigned int unused; |
378 | }; | 429 | }; |
379 | 430 | ||
431 | struct squashfs_xattr_entry { | ||
432 | __le16 type; | ||
433 | __le16 size; | ||
434 | char data[0]; | ||
435 | }; | ||
436 | |||
437 | struct squashfs_xattr_val { | ||
438 | __le32 vsize; | ||
439 | char value[0]; | ||
440 | }; | ||
441 | |||
442 | struct squashfs_xattr_id { | ||
443 | __le64 xattr; | ||
444 | __le32 count; | ||
445 | __le32 size; | ||
446 | }; | ||
447 | |||
448 | struct squashfs_xattr_id_table { | ||
449 | __le64 xattr_table_start; | ||
450 | __le32 xattr_ids; | ||
451 | __le32 unused; | ||
452 | }; | ||
453 | |||
380 | #endif | 454 | #endif |
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h index fbfca30c0c68..d3e3a37f28a1 100644 --- a/fs/squashfs/squashfs_fs_i.h +++ b/fs/squashfs/squashfs_fs_i.h | |||
@@ -26,6 +26,9 @@ | |||
26 | struct squashfs_inode_info { | 26 | struct squashfs_inode_info { |
27 | u64 start; | 27 | u64 start; |
28 | int offset; | 28 | int offset; |
29 | u64 xattr; | ||
30 | unsigned int xattr_size; | ||
31 | int xattr_count; | ||
29 | union { | 32 | union { |
30 | struct { | 33 | struct { |
31 | u64 fragment_block; | 34 | u64 fragment_block; |
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 2e77dc547e25..d9037a5215f0 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h | |||
@@ -61,6 +61,7 @@ struct squashfs_sb_info { | |||
61 | int next_meta_index; | 61 | int next_meta_index; |
62 | __le64 *id_table; | 62 | __le64 *id_table; |
63 | __le64 *fragment_index; | 63 | __le64 *fragment_index; |
64 | __le64 *xattr_id_table; | ||
64 | struct mutex read_data_mutex; | 65 | struct mutex read_data_mutex; |
65 | struct mutex meta_index_mutex; | 66 | struct mutex meta_index_mutex; |
66 | struct meta_index *meta_index; | 67 | struct meta_index *meta_index; |
@@ -68,9 +69,11 @@ struct squashfs_sb_info { | |||
68 | __le64 *inode_lookup_table; | 69 | __le64 *inode_lookup_table; |
69 | u64 inode_table; | 70 | u64 inode_table; |
70 | u64 directory_table; | 71 | u64 directory_table; |
72 | u64 xattr_table; | ||
71 | unsigned int block_size; | 73 | unsigned int block_size; |
72 | unsigned short block_log; | 74 | unsigned short block_log; |
73 | long long bytes_used; | 75 | long long bytes_used; |
74 | unsigned int inodes; | 76 | unsigned int inodes; |
77 | int xattr_ids; | ||
75 | }; | 78 | }; |
76 | #endif | 79 | #endif |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 48b6f4a385a6..88b4f8606652 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -36,12 +36,14 @@ | |||
36 | #include <linux/init.h> | 36 | #include <linux/init.h> |
37 | #include <linux/module.h> | 37 | #include <linux/module.h> |
38 | #include <linux/magic.h> | 38 | #include <linux/magic.h> |
39 | #include <linux/xattr.h> | ||
39 | 40 | ||
40 | #include "squashfs_fs.h" | 41 | #include "squashfs_fs.h" |
41 | #include "squashfs_fs_sb.h" | 42 | #include "squashfs_fs_sb.h" |
42 | #include "squashfs_fs_i.h" | 43 | #include "squashfs_fs_i.h" |
43 | #include "squashfs.h" | 44 | #include "squashfs.h" |
44 | #include "decompressor.h" | 45 | #include "decompressor.h" |
46 | #include "xattr.h" | ||
45 | 47 | ||
46 | static struct file_system_type squashfs_fs_type; | 48 | static struct file_system_type squashfs_fs_type; |
47 | static const struct super_operations squashfs_super_ops; | 49 | static const struct super_operations squashfs_super_ops; |
@@ -82,7 +84,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
82 | long long root_inode; | 84 | long long root_inode; |
83 | unsigned short flags; | 85 | unsigned short flags; |
84 | unsigned int fragments; | 86 | unsigned int fragments; |
85 | u64 lookup_table_start; | 87 | u64 lookup_table_start, xattr_id_table_start; |
86 | int err; | 88 | int err; |
87 | 89 | ||
88 | TRACE("Entered squashfs_fill_superblock\n"); | 90 | TRACE("Entered squashfs_fill_superblock\n"); |
@@ -139,13 +141,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
139 | if (msblk->decompressor == NULL) | 141 | if (msblk->decompressor == NULL) |
140 | goto failed_mount; | 142 | goto failed_mount; |
141 | 143 | ||
142 | /* | ||
143 | * Check if there's xattrs in the filesystem. These are not | ||
144 | * supported in this version, so warn that they will be ignored. | ||
145 | */ | ||
146 | if (le64_to_cpu(sblk->xattr_table_start) != SQUASHFS_INVALID_BLK) | ||
147 | ERROR("Xattrs in filesystem, these will be ignored\n"); | ||
148 | |||
149 | /* Check the filesystem does not extend beyond the end of the | 144 | /* Check the filesystem does not extend beyond the end of the |
150 | block device */ | 145 | block device */ |
151 | msblk->bytes_used = le64_to_cpu(sblk->bytes_used); | 146 | msblk->bytes_used = le64_to_cpu(sblk->bytes_used); |
@@ -253,7 +248,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
253 | allocate_lookup_table: | 248 | allocate_lookup_table: |
254 | lookup_table_start = le64_to_cpu(sblk->lookup_table_start); | 249 | lookup_table_start = le64_to_cpu(sblk->lookup_table_start); |
255 | if (lookup_table_start == SQUASHFS_INVALID_BLK) | 250 | if (lookup_table_start == SQUASHFS_INVALID_BLK) |
256 | goto allocate_root; | 251 | goto allocate_xattr_table; |
257 | 252 | ||
258 | /* Allocate and read inode lookup table */ | 253 | /* Allocate and read inode lookup table */ |
259 | msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb, | 254 | msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb, |
@@ -266,6 +261,21 @@ allocate_lookup_table: | |||
266 | 261 | ||
267 | sb->s_export_op = &squashfs_export_ops; | 262 | sb->s_export_op = &squashfs_export_ops; |
268 | 263 | ||
264 | allocate_xattr_table: | ||
265 | sb->s_xattr = squashfs_xattr_handlers; | ||
266 | xattr_id_table_start = le64_to_cpu(sblk->xattr_id_table_start); | ||
267 | if (xattr_id_table_start == SQUASHFS_INVALID_BLK) | ||
268 | goto allocate_root; | ||
269 | |||
270 | /* Allocate and read xattr id lookup table */ | ||
271 | msblk->xattr_id_table = squashfs_read_xattr_id_table(sb, | ||
272 | xattr_id_table_start, &msblk->xattr_table, &msblk->xattr_ids); | ||
273 | if (IS_ERR(msblk->xattr_id_table)) { | ||
274 | err = PTR_ERR(msblk->xattr_id_table); | ||
275 | msblk->xattr_id_table = NULL; | ||
276 | if (err != -ENOTSUPP) | ||
277 | goto failed_mount; | ||
278 | } | ||
269 | allocate_root: | 279 | allocate_root: |
270 | root = new_inode(sb); | 280 | root = new_inode(sb); |
271 | if (!root) { | 281 | if (!root) { |
@@ -301,6 +311,7 @@ failed_mount: | |||
301 | kfree(msblk->inode_lookup_table); | 311 | kfree(msblk->inode_lookup_table); |
302 | kfree(msblk->fragment_index); | 312 | kfree(msblk->fragment_index); |
303 | kfree(msblk->id_table); | 313 | kfree(msblk->id_table); |
314 | kfree(msblk->xattr_id_table); | ||
304 | kfree(sb->s_fs_info); | 315 | kfree(sb->s_fs_info); |
305 | sb->s_fs_info = NULL; | 316 | sb->s_fs_info = NULL; |
306 | kfree(sblk); | 317 | kfree(sblk); |
@@ -355,6 +366,7 @@ static void squashfs_put_super(struct super_block *sb) | |||
355 | kfree(sbi->fragment_index); | 366 | kfree(sbi->fragment_index); |
356 | kfree(sbi->meta_index); | 367 | kfree(sbi->meta_index); |
357 | kfree(sbi->inode_lookup_table); | 368 | kfree(sbi->inode_lookup_table); |
369 | kfree(sbi->xattr_id_table); | ||
358 | kfree(sb->s_fs_info); | 370 | kfree(sb->s_fs_info); |
359 | sb->s_fs_info = NULL; | 371 | sb->s_fs_info = NULL; |
360 | } | 372 | } |
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c index 32b911f4ee39..ec86434921e1 100644 --- a/fs/squashfs/symlink.c +++ b/fs/squashfs/symlink.c | |||
@@ -35,11 +35,13 @@ | |||
35 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
36 | #include <linux/string.h> | 36 | #include <linux/string.h> |
37 | #include <linux/pagemap.h> | 37 | #include <linux/pagemap.h> |
38 | #include <linux/xattr.h> | ||
38 | 39 | ||
39 | #include "squashfs_fs.h" | 40 | #include "squashfs_fs.h" |
40 | #include "squashfs_fs_sb.h" | 41 | #include "squashfs_fs_sb.h" |
41 | #include "squashfs_fs_i.h" | 42 | #include "squashfs_fs_i.h" |
42 | #include "squashfs.h" | 43 | #include "squashfs.h" |
44 | #include "xattr.h" | ||
43 | 45 | ||
44 | static int squashfs_symlink_readpage(struct file *file, struct page *page) | 46 | static int squashfs_symlink_readpage(struct file *file, struct page *page) |
45 | { | 47 | { |
@@ -114,3 +116,12 @@ error_out: | |||
114 | const struct address_space_operations squashfs_symlink_aops = { | 116 | const struct address_space_operations squashfs_symlink_aops = { |
115 | .readpage = squashfs_symlink_readpage | 117 | .readpage = squashfs_symlink_readpage |
116 | }; | 118 | }; |
119 | |||
120 | const struct inode_operations squashfs_symlink_inode_ops = { | ||
121 | .readlink = generic_readlink, | ||
122 | .follow_link = page_follow_link_light, | ||
123 | .put_link = page_put_link, | ||
124 | .getxattr = generic_getxattr, | ||
125 | .listxattr = squashfs_listxattr | ||
126 | }; | ||
127 | |||
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c new file mode 100644 index 000000000000..c7655e8b31cd --- /dev/null +++ b/fs/squashfs/xattr.c | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2010 | ||
5 | * Phillip Lougher <phillip@lougher.demon.co.uk> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * xattr_id.c | ||
22 | */ | ||
23 | |||
24 | #include <linux/init.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/string.h> | ||
27 | #include <linux/fs.h> | ||
28 | #include <linux/vfs.h> | ||
29 | #include <linux/xattr.h> | ||
30 | #include <linux/slab.h> | ||
31 | |||
32 | #include "squashfs_fs.h" | ||
33 | #include "squashfs_fs_sb.h" | ||
34 | #include "squashfs_fs_i.h" | ||
35 | #include "squashfs.h" | ||
36 | |||
37 | static const struct xattr_handler *squashfs_xattr_handler(int); | ||
38 | |||
39 | ssize_t squashfs_listxattr(struct dentry *d, char *buffer, | ||
40 | size_t buffer_size) | ||
41 | { | ||
42 | struct inode *inode = d->d_inode; | ||
43 | struct super_block *sb = inode->i_sb; | ||
44 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
45 | u64 start = SQUASHFS_XATTR_BLK(squashfs_i(inode)->xattr) | ||
46 | + msblk->xattr_table; | ||
47 | int offset = SQUASHFS_XATTR_OFFSET(squashfs_i(inode)->xattr); | ||
48 | int count = squashfs_i(inode)->xattr_count; | ||
49 | size_t rest = buffer_size; | ||
50 | int err; | ||
51 | |||
52 | /* check that the file system has xattrs */ | ||
53 | if (msblk->xattr_id_table == NULL) | ||
54 | return -EOPNOTSUPP; | ||
55 | |||
56 | /* loop reading each xattr name */ | ||
57 | while (count--) { | ||
58 | struct squashfs_xattr_entry entry; | ||
59 | struct squashfs_xattr_val val; | ||
60 | const struct xattr_handler *handler; | ||
61 | int name_size, prefix_size = 0; | ||
62 | |||
63 | err = squashfs_read_metadata(sb, &entry, &start, &offset, | ||
64 | sizeof(entry)); | ||
65 | if (err < 0) | ||
66 | goto failed; | ||
67 | |||
68 | name_size = le16_to_cpu(entry.size); | ||
69 | handler = squashfs_xattr_handler(le16_to_cpu(entry.type)); | ||
70 | if (handler) | ||
71 | prefix_size = handler->list(d, buffer, rest, NULL, | ||
72 | name_size, handler->flags); | ||
73 | if (prefix_size) { | ||
74 | if (buffer) { | ||
75 | if (prefix_size + name_size + 1 > rest) { | ||
76 | err = -ERANGE; | ||
77 | goto failed; | ||
78 | } | ||
79 | buffer += prefix_size; | ||
80 | } | ||
81 | err = squashfs_read_metadata(sb, buffer, &start, | ||
82 | &offset, name_size); | ||
83 | if (err < 0) | ||
84 | goto failed; | ||
85 | if (buffer) { | ||
86 | buffer[name_size] = '\0'; | ||
87 | buffer += name_size + 1; | ||
88 | } | ||
89 | rest -= prefix_size + name_size + 1; | ||
90 | } else { | ||
91 | /* no handler or insuffficient privileges, so skip */ | ||
92 | err = squashfs_read_metadata(sb, NULL, &start, | ||
93 | &offset, name_size); | ||
94 | if (err < 0) | ||
95 | goto failed; | ||
96 | } | ||
97 | |||
98 | |||
99 | /* skip remaining xattr entry */ | ||
100 | err = squashfs_read_metadata(sb, &val, &start, &offset, | ||
101 | sizeof(val)); | ||
102 | if (err < 0) | ||
103 | goto failed; | ||
104 | |||
105 | err = squashfs_read_metadata(sb, NULL, &start, &offset, | ||
106 | le32_to_cpu(val.vsize)); | ||
107 | if (err < 0) | ||
108 | goto failed; | ||
109 | } | ||
110 | err = buffer_size - rest; | ||
111 | |||
112 | failed: | ||
113 | return err; | ||
114 | } | ||
115 | |||
116 | |||
117 | static int squashfs_xattr_get(struct inode *inode, int name_index, | ||
118 | const char *name, void *buffer, size_t buffer_size) | ||
119 | { | ||
120 | struct super_block *sb = inode->i_sb; | ||
121 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
122 | u64 start = SQUASHFS_XATTR_BLK(squashfs_i(inode)->xattr) | ||
123 | + msblk->xattr_table; | ||
124 | int offset = SQUASHFS_XATTR_OFFSET(squashfs_i(inode)->xattr); | ||
125 | int count = squashfs_i(inode)->xattr_count; | ||
126 | int name_len = strlen(name); | ||
127 | int err, vsize; | ||
128 | char *target = kmalloc(name_len, GFP_KERNEL); | ||
129 | |||
130 | if (target == NULL) | ||
131 | return -ENOMEM; | ||
132 | |||
133 | /* loop reading each xattr name */ | ||
134 | for (; count; count--) { | ||
135 | struct squashfs_xattr_entry entry; | ||
136 | struct squashfs_xattr_val val; | ||
137 | int type, prefix, name_size; | ||
138 | |||
139 | err = squashfs_read_metadata(sb, &entry, &start, &offset, | ||
140 | sizeof(entry)); | ||
141 | if (err < 0) | ||
142 | goto failed; | ||
143 | |||
144 | name_size = le16_to_cpu(entry.size); | ||
145 | type = le16_to_cpu(entry.type); | ||
146 | prefix = type & SQUASHFS_XATTR_PREFIX_MASK; | ||
147 | |||
148 | if (prefix == name_index && name_size == name_len) | ||
149 | err = squashfs_read_metadata(sb, target, &start, | ||
150 | &offset, name_size); | ||
151 | else | ||
152 | err = squashfs_read_metadata(sb, NULL, &start, | ||
153 | &offset, name_size); | ||
154 | if (err < 0) | ||
155 | goto failed; | ||
156 | |||
157 | if (prefix == name_index && name_size == name_len && | ||
158 | strncmp(target, name, name_size) == 0) { | ||
159 | /* found xattr */ | ||
160 | if (type & SQUASHFS_XATTR_VALUE_OOL) { | ||
161 | __le64 xattr; | ||
162 | /* val is a reference to the real location */ | ||
163 | err = squashfs_read_metadata(sb, &val, &start, | ||
164 | &offset, sizeof(val)); | ||
165 | if (err < 0) | ||
166 | goto failed; | ||
167 | err = squashfs_read_metadata(sb, &xattr, &start, | ||
168 | &offset, sizeof(xattr)); | ||
169 | if (err < 0) | ||
170 | goto failed; | ||
171 | xattr = le64_to_cpu(xattr); | ||
172 | start = SQUASHFS_XATTR_BLK(xattr) + | ||
173 | msblk->xattr_table; | ||
174 | offset = SQUASHFS_XATTR_OFFSET(xattr); | ||
175 | } | ||
176 | /* read xattr value */ | ||
177 | err = squashfs_read_metadata(sb, &val, &start, &offset, | ||
178 | sizeof(val)); | ||
179 | if (err < 0) | ||
180 | goto failed; | ||
181 | |||
182 | vsize = le32_to_cpu(val.vsize); | ||
183 | if (buffer) { | ||
184 | if (vsize > buffer_size) { | ||
185 | err = -ERANGE; | ||
186 | goto failed; | ||
187 | } | ||
188 | err = squashfs_read_metadata(sb, buffer, &start, | ||
189 | &offset, vsize); | ||
190 | if (err < 0) | ||
191 | goto failed; | ||
192 | } | ||
193 | break; | ||
194 | } | ||
195 | |||
196 | /* no match, skip remaining xattr entry */ | ||
197 | err = squashfs_read_metadata(sb, &val, &start, &offset, | ||
198 | sizeof(val)); | ||
199 | if (err < 0) | ||
200 | goto failed; | ||
201 | err = squashfs_read_metadata(sb, NULL, &start, &offset, | ||
202 | le32_to_cpu(val.vsize)); | ||
203 | if (err < 0) | ||
204 | goto failed; | ||
205 | } | ||
206 | err = count ? vsize : -ENODATA; | ||
207 | |||
208 | failed: | ||
209 | kfree(target); | ||
210 | return err; | ||
211 | } | ||
212 | |||
213 | |||
214 | /* | ||
215 | * User namespace support | ||
216 | */ | ||
217 | static size_t squashfs_user_list(struct dentry *d, char *list, size_t list_size, | ||
218 | const char *name, size_t name_len, int type) | ||
219 | { | ||
220 | if (list && XATTR_USER_PREFIX_LEN <= list_size) | ||
221 | memcpy(list, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); | ||
222 | return XATTR_USER_PREFIX_LEN; | ||
223 | } | ||
224 | |||
225 | static int squashfs_user_get(struct dentry *d, const char *name, void *buffer, | ||
226 | size_t size, int type) | ||
227 | { | ||
228 | if (name[0] == '\0') | ||
229 | return -EINVAL; | ||
230 | |||
231 | return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_USER, name, | ||
232 | buffer, size); | ||
233 | } | ||
234 | |||
235 | static const struct xattr_handler squashfs_xattr_user_handler = { | ||
236 | .prefix = XATTR_USER_PREFIX, | ||
237 | .list = squashfs_user_list, | ||
238 | .get = squashfs_user_get | ||
239 | }; | ||
240 | |||
241 | /* | ||
242 | * Trusted namespace support | ||
243 | */ | ||
244 | static size_t squashfs_trusted_list(struct dentry *d, char *list, | ||
245 | size_t list_size, const char *name, size_t name_len, int type) | ||
246 | { | ||
247 | if (!capable(CAP_SYS_ADMIN)) | ||
248 | return 0; | ||
249 | |||
250 | if (list && XATTR_TRUSTED_PREFIX_LEN <= list_size) | ||
251 | memcpy(list, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); | ||
252 | return XATTR_TRUSTED_PREFIX_LEN; | ||
253 | } | ||
254 | |||
255 | static int squashfs_trusted_get(struct dentry *d, const char *name, | ||
256 | void *buffer, size_t size, int type) | ||
257 | { | ||
258 | if (name[0] == '\0') | ||
259 | return -EINVAL; | ||
260 | |||
261 | return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_TRUSTED, name, | ||
262 | buffer, size); | ||
263 | } | ||
264 | |||
265 | static const struct xattr_handler squashfs_xattr_trusted_handler = { | ||
266 | .prefix = XATTR_TRUSTED_PREFIX, | ||
267 | .list = squashfs_trusted_list, | ||
268 | .get = squashfs_trusted_get | ||
269 | }; | ||
270 | |||
271 | /* | ||
272 | * Security namespace support | ||
273 | */ | ||
274 | static size_t squashfs_security_list(struct dentry *d, char *list, | ||
275 | size_t list_size, const char *name, size_t name_len, int type) | ||
276 | { | ||
277 | if (list && XATTR_SECURITY_PREFIX_LEN <= list_size) | ||
278 | memcpy(list, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); | ||
279 | return XATTR_SECURITY_PREFIX_LEN; | ||
280 | } | ||
281 | |||
282 | static int squashfs_security_get(struct dentry *d, const char *name, | ||
283 | void *buffer, size_t size, int type) | ||
284 | { | ||
285 | if (name[0] == '\0') | ||
286 | return -EINVAL; | ||
287 | |||
288 | return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_SECURITY, name, | ||
289 | buffer, size); | ||
290 | } | ||
291 | |||
292 | static const struct xattr_handler squashfs_xattr_security_handler = { | ||
293 | .prefix = XATTR_SECURITY_PREFIX, | ||
294 | .list = squashfs_security_list, | ||
295 | .get = squashfs_security_get | ||
296 | }; | ||
297 | |||
298 | static inline const struct xattr_handler *squashfs_xattr_handler(int type) | ||
299 | { | ||
300 | if (type & ~(SQUASHFS_XATTR_PREFIX_MASK | SQUASHFS_XATTR_VALUE_OOL)) | ||
301 | /* ignore unrecognised type */ | ||
302 | return NULL; | ||
303 | |||
304 | switch (type & SQUASHFS_XATTR_PREFIX_MASK) { | ||
305 | case SQUASHFS_XATTR_USER: | ||
306 | return &squashfs_xattr_user_handler; | ||
307 | case SQUASHFS_XATTR_TRUSTED: | ||
308 | return &squashfs_xattr_trusted_handler; | ||
309 | case SQUASHFS_XATTR_SECURITY: | ||
310 | return &squashfs_xattr_security_handler; | ||
311 | default: | ||
312 | /* ignore unrecognised type */ | ||
313 | return NULL; | ||
314 | } | ||
315 | } | ||
316 | |||
317 | const struct xattr_handler *squashfs_xattr_handlers[] = { | ||
318 | &squashfs_xattr_user_handler, | ||
319 | &squashfs_xattr_trusted_handler, | ||
320 | &squashfs_xattr_security_handler, | ||
321 | NULL | ||
322 | }; | ||
323 | |||
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h new file mode 100644 index 000000000000..9da071ae181c --- /dev/null +++ b/fs/squashfs/xattr.h | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2010 | ||
5 | * Phillip Lougher <phillip@lougher.demon.co.uk> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * xattr.h | ||
22 | */ | ||
23 | |||
24 | #ifdef CONFIG_SQUASHFS_XATTRS | ||
25 | extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, | ||
26 | u64 *, int *); | ||
27 | extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, | ||
28 | int *, unsigned long long *); | ||
29 | #else | ||
30 | static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, | ||
31 | u64 start, u64 *xattr_table_start, int *xattr_ids) | ||
32 | { | ||
33 | ERROR("Xattrs in filesystem, these will be ignored\n"); | ||
34 | return ERR_PTR(-ENOTSUPP); | ||
35 | } | ||
36 | |||
37 | static inline int squashfs_xattr_lookup(struct super_block *sb, | ||
38 | unsigned int index, int *count, int *size, | ||
39 | unsigned long long *xattr) | ||
40 | { | ||
41 | return 0; | ||
42 | } | ||
43 | #define squashfs_listxattr NULL | ||
44 | #define generic_getxattr NULL | ||
45 | #define squashfs_xattr_handlers NULL | ||
46 | #endif | ||
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c new file mode 100644 index 000000000000..cfb41106098f --- /dev/null +++ b/fs/squashfs/xattr_id.c | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * Squashfs - a compressed read only filesystem for Linux | ||
3 | * | ||
4 | * Copyright (c) 2010 | ||
5 | * Phillip Lougher <phillip@lougher.demon.co.uk> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version 2, | ||
10 | * or (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | ||
20 | * | ||
21 | * xattr_id.c | ||
22 | */ | ||
23 | |||
24 | /* | ||
25 | * This file implements code to map the 32-bit xattr id stored in the inode | ||
26 | * into the on disk location of the xattr data. | ||
27 | */ | ||
28 | |||
29 | #include <linux/fs.h> | ||
30 | #include <linux/vfs.h> | ||
31 | #include <linux/slab.h> | ||
32 | |||
33 | #include "squashfs_fs.h" | ||
34 | #include "squashfs_fs_sb.h" | ||
35 | #include "squashfs_fs_i.h" | ||
36 | #include "squashfs.h" | ||
37 | |||
38 | /* | ||
39 | * Map xattr id using the xattr id look up table | ||
40 | */ | ||
41 | int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, | ||
42 | int *count, unsigned int *size, unsigned long long *xattr) | ||
43 | { | ||
44 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
45 | int block = SQUASHFS_XATTR_BLOCK(index); | ||
46 | int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index); | ||
47 | u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]); | ||
48 | struct squashfs_xattr_id id; | ||
49 | int err; | ||
50 | |||
51 | err = squashfs_read_metadata(sb, &id, &start_block, &offset, | ||
52 | sizeof(id)); | ||
53 | if (err < 0) | ||
54 | return err; | ||
55 | |||
56 | *xattr = le64_to_cpu(id.xattr); | ||
57 | *size = le32_to_cpu(id.size); | ||
58 | *count = le32_to_cpu(id.count); | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | |||
63 | /* | ||
64 | * Read uncompressed xattr id lookup table indexes from disk into memory | ||
65 | */ | ||
66 | __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start, | ||
67 | u64 *xattr_table_start, int *xattr_ids) | ||
68 | { | ||
69 | unsigned int len; | ||
70 | __le64 *xid_table; | ||
71 | struct squashfs_xattr_id_table id_table; | ||
72 | int err; | ||
73 | |||
74 | err = squashfs_read_table(sb, &id_table, start, sizeof(id_table)); | ||
75 | if (err < 0) { | ||
76 | ERROR("unable to read xattr id table\n"); | ||
77 | return ERR_PTR(err); | ||
78 | } | ||
79 | *xattr_table_start = le64_to_cpu(id_table.xattr_table_start); | ||
80 | *xattr_ids = le32_to_cpu(id_table.xattr_ids); | ||
81 | len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); | ||
82 | |||
83 | TRACE("In read_xattr_index_table, length %d\n", len); | ||
84 | |||
85 | /* Allocate xattr id lookup table indexes */ | ||
86 | xid_table = kmalloc(len, GFP_KERNEL); | ||
87 | if (xid_table == NULL) { | ||
88 | ERROR("Failed to allocate xattr id index table\n"); | ||
89 | return ERR_PTR(-ENOMEM); | ||
90 | } | ||
91 | |||
92 | err = squashfs_read_table(sb, xid_table, start + sizeof(id_table), len); | ||
93 | if (err < 0) { | ||
94 | ERROR("unable to read xattr id index table\n"); | ||
95 | kfree(xid_table); | ||
96 | return ERR_PTR(err); | ||
97 | } | ||
98 | |||
99 | return xid_table; | ||
100 | } | ||
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 3a84455c2a77..1660c81ffa3d 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
@@ -207,6 +207,7 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
207 | 207 | ||
208 | /* readdir and lookup functions */ | 208 | /* readdir and lookup functions */ |
209 | const struct file_operations udf_dir_operations = { | 209 | const struct file_operations udf_dir_operations = { |
210 | .llseek = generic_file_llseek, | ||
210 | .read = generic_read_dir, | 211 | .read = generic_read_dir, |
211 | .readdir = udf_readdir, | 212 | .readdir = udf_readdir, |
212 | .unlocked_ioctl = udf_ioctl, | 213 | .unlocked_ioctl = udf_ioctl, |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 14743d935a93..ad9bc1ebd3a6 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -918,6 +918,7 @@ again: | |||
918 | sbi->s_bytesex = BYTESEX_LE; | 918 | sbi->s_bytesex = BYTESEX_LE; |
919 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { | 919 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { |
920 | case UFS_MAGIC: | 920 | case UFS_MAGIC: |
921 | case UFS_MAGIC_BW: | ||
921 | case UFS2_MAGIC: | 922 | case UFS2_MAGIC: |
922 | case UFS_MAGIC_LFN: | 923 | case UFS_MAGIC_LFN: |
923 | case UFS_MAGIC_FEA: | 924 | case UFS_MAGIC_FEA: |
@@ -927,6 +928,7 @@ again: | |||
927 | sbi->s_bytesex = BYTESEX_BE; | 928 | sbi->s_bytesex = BYTESEX_BE; |
928 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { | 929 | switch ((uspi->fs_magic = fs32_to_cpu(sb, usb3->fs_magic))) { |
929 | case UFS_MAGIC: | 930 | case UFS_MAGIC: |
931 | case UFS_MAGIC_BW: | ||
930 | case UFS2_MAGIC: | 932 | case UFS2_MAGIC: |
931 | case UFS_MAGIC_LFN: | 933 | case UFS_MAGIC_LFN: |
932 | case UFS_MAGIC_FEA: | 934 | case UFS_MAGIC_FEA: |
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h index 6943ec677c0b..8aba544f9fad 100644 --- a/fs/ufs/ufs_fs.h +++ b/fs/ufs/ufs_fs.h | |||
@@ -48,6 +48,7 @@ typedef __u16 __bitwise __fs16; | |||
48 | #define UFS_SECTOR_SIZE 512 | 48 | #define UFS_SECTOR_SIZE 512 |
49 | #define UFS_SECTOR_BITS 9 | 49 | #define UFS_SECTOR_BITS 9 |
50 | #define UFS_MAGIC 0x00011954 | 50 | #define UFS_MAGIC 0x00011954 |
51 | #define UFS_MAGIC_BW 0x0f242697 | ||
51 | #define UFS2_MAGIC 0x19540119 | 52 | #define UFS2_MAGIC 0x19540119 |
52 | #define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */ | 53 | #define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */ |
53 | 54 | ||