aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOmar Sandoval <osandov@fb.com>2016-05-20 16:50:33 -0400
committerChris Mason <clm@fb.com>2016-06-25 09:20:10 -0400
commit02dbfc99b424dde3cf0a492ed3bec4f222441754 (patch)
treed08f267bac15ea5939185424bbe73306b7f11cb6
parent33688abb2802ff3a230bd2441f765477b94cc89e (diff)
Btrfs: fix ->iterate_shared() by upgrading i_rwsem for delayed nodes
Commit fe742fd4f90f ("Revert "btrfs: switch to ->iterate_shared()"") backed out the conversion to ->iterate_shared() for Btrfs because the delayed inode handling in btrfs_real_readdir() is racy. However, we can still do readdir in parallel if there are no delayed nodes. This is a temporary fix which upgrades the shared inode lock to an exclusive lock only when we have delayed items until we come up with a more complete solution. While we're here, rename the btrfs_{get,put}_delayed_items functions to make it very clear that they're just for readdir. Tested with xfstests and by doing a parallel kernel build: while make tinyconfig && make -j4 && git clean dqfx; do : done along with a bunch of parallel finds in another shell: while true; do for ((i=0; i<4; i++)); do find . >/dev/null & done wait done Signed-off-by: Omar Sandoval <osandov@fb.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/delayed-inode.c27
-rw-r--r--fs/btrfs/delayed-inode.h10
-rw-r--r--fs/btrfs/inode.c10
3 files changed, 34 insertions, 13 deletions
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 61561c2a3f96..d3aaabbfada0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1606 return 0; 1606 return 0;
1607} 1607}
1608 1608
1609void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 1609bool btrfs_readdir_get_delayed_items(struct inode *inode,
1610 struct list_head *del_list) 1610 struct list_head *ins_list,
1611 struct list_head *del_list)
1611{ 1612{
1612 struct btrfs_delayed_node *delayed_node; 1613 struct btrfs_delayed_node *delayed_node;
1613 struct btrfs_delayed_item *item; 1614 struct btrfs_delayed_item *item;
1614 1615
1615 delayed_node = btrfs_get_delayed_node(inode); 1616 delayed_node = btrfs_get_delayed_node(inode);
1616 if (!delayed_node) 1617 if (!delayed_node)
1617 return; 1618 return false;
1619
1620 /*
1621 * We can only do one readdir with delayed items at a time because of
1622 * item->readdir_list.
1623 */
1624 inode_unlock_shared(inode);
1625 inode_lock(inode);
1618 1626
1619 mutex_lock(&delayed_node->mutex); 1627 mutex_lock(&delayed_node->mutex);
1620 item = __btrfs_first_delayed_insertion_item(delayed_node); 1628 item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1641 * requeue or dequeue this delayed node. 1649 * requeue or dequeue this delayed node.
1642 */ 1650 */
1643 atomic_dec(&delayed_node->refs); 1651 atomic_dec(&delayed_node->refs);
1652
1653 return true;
1644} 1654}
1645 1655
1646void btrfs_put_delayed_items(struct list_head *ins_list, 1656void btrfs_readdir_put_delayed_items(struct inode *inode,
1647 struct list_head *del_list) 1657 struct list_head *ins_list,
1658 struct list_head *del_list)
1648{ 1659{
1649 struct btrfs_delayed_item *curr, *next; 1660 struct btrfs_delayed_item *curr, *next;
1650 1661
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
1659 if (atomic_dec_and_test(&curr->refs)) 1670 if (atomic_dec_and_test(&curr->refs))
1660 kfree(curr); 1671 kfree(curr);
1661 } 1672 }
1673
1674 /*
1675 * The VFS is going to do up_read(), so we need to downgrade back to a
1676 * read lock.
1677 */
1678 downgrade_write(&inode->i_rwsem);
1662} 1679}
1663 1680
1664int btrfs_should_delete_dir_index(struct list_head *del_list, 1681int btrfs_should_delete_dir_index(struct list_head *del_list,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0167853c84ae..2495b3d4075f 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
137void btrfs_destroy_delayed_inodes(struct btrfs_root *root); 137void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
138 138
139/* Used for readdir() */ 139/* Used for readdir() */
140void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 140bool btrfs_readdir_get_delayed_items(struct inode *inode,
141 struct list_head *del_list); 141 struct list_head *ins_list,
142void btrfs_put_delayed_items(struct list_head *ins_list, 142 struct list_head *del_list);
143 struct list_head *del_list); 143void btrfs_readdir_put_delayed_items(struct inode *inode,
144 struct list_head *ins_list,
145 struct list_head *del_list);
144int btrfs_should_delete_dir_index(struct list_head *del_list, 146int btrfs_should_delete_dir_index(struct list_head *del_list,
145 u64 index); 147 u64 index);
146int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, 148int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2be95cfb6d1..969a25c5abcb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5757 int name_len; 5757 int name_len;
5758 int is_curr = 0; /* ctx->pos points to the current index? */ 5758 int is_curr = 0; /* ctx->pos points to the current index? */
5759 bool emitted; 5759 bool emitted;
5760 bool put = false;
5760 5761
5761 /* FIXME, use a real flag for deciding about the key type */ 5762 /* FIXME, use a real flag for deciding about the key type */
5762 if (root->fs_info->tree_root == root) 5763 if (root->fs_info->tree_root == root)
@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5774 if (key_type == BTRFS_DIR_INDEX_KEY) { 5775 if (key_type == BTRFS_DIR_INDEX_KEY) {
5775 INIT_LIST_HEAD(&ins_list); 5776 INIT_LIST_HEAD(&ins_list);
5776 INIT_LIST_HEAD(&del_list); 5777 INIT_LIST_HEAD(&del_list);
5777 btrfs_get_delayed_items(inode, &ins_list, &del_list); 5778 put = btrfs_readdir_get_delayed_items(inode, &ins_list,
5779 &del_list);
5778 } 5780 }
5779 5781
5780 key.type = key_type; 5782 key.type = key_type;
@@ -5921,8 +5923,8 @@ next:
5921nopos: 5923nopos:
5922 ret = 0; 5924 ret = 0;
5923err: 5925err:
5924 if (key_type == BTRFS_DIR_INDEX_KEY) 5926 if (put)
5925 btrfs_put_delayed_items(&ins_list, &del_list); 5927 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
5926 btrfs_free_path(path); 5928 btrfs_free_path(path);
5927 return ret; 5929 return ret;
5928} 5930}
@@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
10534static const struct file_operations btrfs_dir_file_operations = { 10536static const struct file_operations btrfs_dir_file_operations = {
10535 .llseek = generic_file_llseek, 10537 .llseek = generic_file_llseek,
10536 .read = generic_read_dir, 10538 .read = generic_read_dir,
10537 .iterate = btrfs_real_readdir, 10539 .iterate_shared = btrfs_real_readdir,
10538 .unlocked_ioctl = btrfs_ioctl, 10540 .unlocked_ioctl = btrfs_ioctl,
10539#ifdef CONFIG_COMPAT 10541#ifdef CONFIG_COMPAT
10540 .compat_ioctl = btrfs_compat_ioctl, 10542 .compat_ioctl = btrfs_compat_ioctl,