diff options
author | Omar Sandoval <osandov@fb.com> | 2016-05-20 16:50:33 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2016-06-25 09:20:10 -0400 |
commit | 02dbfc99b424dde3cf0a492ed3bec4f222441754 (patch) | |
tree | d08f267bac15ea5939185424bbe73306b7f11cb6 | |
parent | 33688abb2802ff3a230bd2441f765477b94cc89e (diff) |
Btrfs: fix ->iterate_shared() by upgrading i_rwsem for delayed nodes
Commit fe742fd4f90f ("Revert "btrfs: switch to ->iterate_shared()"")
backed out the conversion to ->iterate_shared() for Btrfs because the
delayed inode handling in btrfs_real_readdir() is racy. However, we can
still do readdir in parallel if there are no delayed nodes.
This is a temporary fix which upgrades the shared inode lock to an
exclusive lock only when we have delayed items until we come up with a
more complete solution. While we're here, rename the
btrfs_{get,put}_delayed_items functions to make it very clear that
they're just for readdir.
Tested with xfstests and by doing a parallel kernel build:
while make tinyconfig && make -j4 && git clean dqfx; do
:
done
along with a bunch of parallel finds in another shell:
while true; do
for ((i=0; i<4; i++)); do
find . >/dev/null &
done
wait
done
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/delayed-inode.c | 27 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.h | 10 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 10 |
3 files changed, 34 insertions, 13 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 61561c2a3f96..d3aaabbfada0 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode) | |||
1606 | return 0; | 1606 | return 0; |
1607 | } | 1607 | } |
1608 | 1608 | ||
1609 | void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, | 1609 | bool btrfs_readdir_get_delayed_items(struct inode *inode, |
1610 | struct list_head *del_list) | 1610 | struct list_head *ins_list, |
1611 | struct list_head *del_list) | ||
1611 | { | 1612 | { |
1612 | struct btrfs_delayed_node *delayed_node; | 1613 | struct btrfs_delayed_node *delayed_node; |
1613 | struct btrfs_delayed_item *item; | 1614 | struct btrfs_delayed_item *item; |
1614 | 1615 | ||
1615 | delayed_node = btrfs_get_delayed_node(inode); | 1616 | delayed_node = btrfs_get_delayed_node(inode); |
1616 | if (!delayed_node) | 1617 | if (!delayed_node) |
1617 | return; | 1618 | return false; |
1619 | |||
1620 | /* | ||
1621 | * We can only do one readdir with delayed items at a time because of | ||
1622 | * item->readdir_list. | ||
1623 | */ | ||
1624 | inode_unlock_shared(inode); | ||
1625 | inode_lock(inode); | ||
1618 | 1626 | ||
1619 | mutex_lock(&delayed_node->mutex); | 1627 | mutex_lock(&delayed_node->mutex); |
1620 | item = __btrfs_first_delayed_insertion_item(delayed_node); | 1628 | item = __btrfs_first_delayed_insertion_item(delayed_node); |
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, | |||
1641 | * requeue or dequeue this delayed node. | 1649 | * requeue or dequeue this delayed node. |
1642 | */ | 1650 | */ |
1643 | atomic_dec(&delayed_node->refs); | 1651 | atomic_dec(&delayed_node->refs); |
1652 | |||
1653 | return true; | ||
1644 | } | 1654 | } |
1645 | 1655 | ||
1646 | void btrfs_put_delayed_items(struct list_head *ins_list, | 1656 | void btrfs_readdir_put_delayed_items(struct inode *inode, |
1647 | struct list_head *del_list) | 1657 | struct list_head *ins_list, |
1658 | struct list_head *del_list) | ||
1648 | { | 1659 | { |
1649 | struct btrfs_delayed_item *curr, *next; | 1660 | struct btrfs_delayed_item *curr, *next; |
1650 | 1661 | ||
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list, | |||
1659 | if (atomic_dec_and_test(&curr->refs)) | 1670 | if (atomic_dec_and_test(&curr->refs)) |
1660 | kfree(curr); | 1671 | kfree(curr); |
1661 | } | 1672 | } |
1673 | |||
1674 | /* | ||
1675 | * The VFS is going to do up_read(), so we need to downgrade back to a | ||
1676 | * read lock. | ||
1677 | */ | ||
1678 | downgrade_write(&inode->i_rwsem); | ||
1662 | } | 1679 | } |
1663 | 1680 | ||
1664 | int btrfs_should_delete_dir_index(struct list_head *del_list, | 1681 | int btrfs_should_delete_dir_index(struct list_head *del_list, |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0167853c84ae..2495b3d4075f 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); | |||
137 | void btrfs_destroy_delayed_inodes(struct btrfs_root *root); | 137 | void btrfs_destroy_delayed_inodes(struct btrfs_root *root); |
138 | 138 | ||
139 | /* Used for readdir() */ | 139 | /* Used for readdir() */ |
140 | void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, | 140 | bool btrfs_readdir_get_delayed_items(struct inode *inode, |
141 | struct list_head *del_list); | 141 | struct list_head *ins_list, |
142 | void btrfs_put_delayed_items(struct list_head *ins_list, | 142 | struct list_head *del_list); |
143 | struct list_head *del_list); | 143 | void btrfs_readdir_put_delayed_items(struct inode *inode, |
144 | struct list_head *ins_list, | ||
145 | struct list_head *del_list); | ||
144 | int btrfs_should_delete_dir_index(struct list_head *del_list, | 146 | int btrfs_should_delete_dir_index(struct list_head *del_list, |
145 | u64 index); | 147 | u64 index); |
146 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, | 148 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d2be95cfb6d1..969a25c5abcb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5757 | int name_len; | 5757 | int name_len; |
5758 | int is_curr = 0; /* ctx->pos points to the current index? */ | 5758 | int is_curr = 0; /* ctx->pos points to the current index? */ |
5759 | bool emitted; | 5759 | bool emitted; |
5760 | bool put = false; | ||
5760 | 5761 | ||
5761 | /* FIXME, use a real flag for deciding about the key type */ | 5762 | /* FIXME, use a real flag for deciding about the key type */ |
5762 | if (root->fs_info->tree_root == root) | 5763 | if (root->fs_info->tree_root == root) |
@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5774 | if (key_type == BTRFS_DIR_INDEX_KEY) { | 5775 | if (key_type == BTRFS_DIR_INDEX_KEY) { |
5775 | INIT_LIST_HEAD(&ins_list); | 5776 | INIT_LIST_HEAD(&ins_list); |
5776 | INIT_LIST_HEAD(&del_list); | 5777 | INIT_LIST_HEAD(&del_list); |
5777 | btrfs_get_delayed_items(inode, &ins_list, &del_list); | 5778 | put = btrfs_readdir_get_delayed_items(inode, &ins_list, |
5779 | &del_list); | ||
5778 | } | 5780 | } |
5779 | 5781 | ||
5780 | key.type = key_type; | 5782 | key.type = key_type; |
@@ -5921,8 +5923,8 @@ next: | |||
5921 | nopos: | 5923 | nopos: |
5922 | ret = 0; | 5924 | ret = 0; |
5923 | err: | 5925 | err: |
5924 | if (key_type == BTRFS_DIR_INDEX_KEY) | 5926 | if (put) |
5925 | btrfs_put_delayed_items(&ins_list, &del_list); | 5927 | btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list); |
5926 | btrfs_free_path(path); | 5928 | btrfs_free_path(path); |
5927 | return ret; | 5929 | return ret; |
5928 | } | 5930 | } |
@@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { | |||
10534 | static const struct file_operations btrfs_dir_file_operations = { | 10536 | static const struct file_operations btrfs_dir_file_operations = { |
10535 | .llseek = generic_file_llseek, | 10537 | .llseek = generic_file_llseek, |
10536 | .read = generic_read_dir, | 10538 | .read = generic_read_dir, |
10537 | .iterate = btrfs_real_readdir, | 10539 | .iterate_shared = btrfs_real_readdir, |
10538 | .unlocked_ioctl = btrfs_ioctl, | 10540 | .unlocked_ioctl = btrfs_ioctl, |
10539 | #ifdef CONFIG_COMPAT | 10541 | #ifdef CONFIG_COMPAT |
10540 | .compat_ioctl = btrfs_compat_ioctl, | 10542 | .compat_ioctl = btrfs_compat_ioctl, |