diff options
author | Yan Zheng <zheng.yan@oracle.com> | 2009-07-22 09:59:00 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-07-22 09:59:00 -0400 |
commit | 33c66f430bfa3a033e70470e4c93f967156b696d (patch) | |
tree | 5af7edc4564aa3f32033b364495828eb32b690a7 /fs/btrfs/ctree.c | |
parent | e457afec60fdbd86b963d36f4a8a9285088c6043 (diff) |
Btrfs: fix locking issue in btrfs_find_next_key
When walking up the tree, btrfs_find_next_key assumes the upper level tree
block is properly locked. This isn't always true even path->keep_locks is 1.
This is because btrfs_find_next_key may advance path->slots[] several times
instead of only once.
When 'path->slots[level] >= btrfs_header_nritems(path->nodes[level])' is found,
we can't guarantee the original value of 'path->slots[level]' is
'btrfs_header_nritems(path->nodes[level]) - 1'. If it's not, the tree block at
'level + 1' isn't locked.
This patch fixes the issue by explicitly checking the locking state,
re-searching the tree if it's not locked.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 95 |
1 files changed, 62 insertions, 33 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7bb66c65ddfd..fdd423a550d6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1701,6 +1701,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1701 | struct extent_buffer *b; | 1701 | struct extent_buffer *b; |
1702 | int slot; | 1702 | int slot; |
1703 | int ret; | 1703 | int ret; |
1704 | int err; | ||
1704 | int level; | 1705 | int level; |
1705 | int lowest_unlock = 1; | 1706 | int lowest_unlock = 1; |
1706 | u8 lowest_level = 0; | 1707 | u8 lowest_level = 0; |
@@ -1737,8 +1738,6 @@ again: | |||
1737 | p->locks[level] = 1; | 1738 | p->locks[level] = 1; |
1738 | 1739 | ||
1739 | if (cow) { | 1740 | if (cow) { |
1740 | int wret; | ||
1741 | |||
1742 | /* | 1741 | /* |
1743 | * if we don't really need to cow this block | 1742 | * if we don't really need to cow this block |
1744 | * then we don't want to set the path blocking, | 1743 | * then we don't want to set the path blocking, |
@@ -1749,12 +1748,12 @@ again: | |||
1749 | 1748 | ||
1750 | btrfs_set_path_blocking(p); | 1749 | btrfs_set_path_blocking(p); |
1751 | 1750 | ||
1752 | wret = btrfs_cow_block(trans, root, b, | 1751 | err = btrfs_cow_block(trans, root, b, |
1753 | p->nodes[level + 1], | 1752 | p->nodes[level + 1], |
1754 | p->slots[level + 1], &b); | 1753 | p->slots[level + 1], &b); |
1755 | if (wret) { | 1754 | if (err) { |
1756 | free_extent_buffer(b); | 1755 | free_extent_buffer(b); |
1757 | ret = wret; | 1756 | ret = err; |
1758 | goto done; | 1757 | goto done; |
1759 | } | 1758 | } |
1760 | } | 1759 | } |
@@ -1793,41 +1792,45 @@ cow_done: | |||
1793 | ret = bin_search(b, key, level, &slot); | 1792 | ret = bin_search(b, key, level, &slot); |
1794 | 1793 | ||
1795 | if (level != 0) { | 1794 | if (level != 0) { |
1796 | if (ret && slot > 0) | 1795 | int dec = 0; |
1796 | if (ret && slot > 0) { | ||
1797 | dec = 1; | ||
1797 | slot -= 1; | 1798 | slot -= 1; |
1799 | } | ||
1798 | p->slots[level] = slot; | 1800 | p->slots[level] = slot; |
1799 | ret = setup_nodes_for_search(trans, root, p, b, level, | 1801 | err = setup_nodes_for_search(trans, root, p, b, level, |
1800 | ins_len); | 1802 | ins_len); |
1801 | if (ret == -EAGAIN) | 1803 | if (err == -EAGAIN) |
1802 | goto again; | 1804 | goto again; |
1803 | else if (ret) | 1805 | if (err) { |
1806 | ret = err; | ||
1804 | goto done; | 1807 | goto done; |
1808 | } | ||
1805 | b = p->nodes[level]; | 1809 | b = p->nodes[level]; |
1806 | slot = p->slots[level]; | 1810 | slot = p->slots[level]; |
1807 | 1811 | ||
1808 | unlock_up(p, level, lowest_unlock); | 1812 | unlock_up(p, level, lowest_unlock); |
1809 | 1813 | ||
1810 | /* this is only true while dropping a snapshot */ | ||
1811 | if (level == lowest_level) { | 1814 | if (level == lowest_level) { |
1812 | ret = 0; | 1815 | if (dec) |
1816 | p->slots[level]++; | ||
1813 | goto done; | 1817 | goto done; |
1814 | } | 1818 | } |
1815 | 1819 | ||
1816 | ret = read_block_for_search(trans, root, p, | 1820 | err = read_block_for_search(trans, root, p, |
1817 | &b, level, slot, key); | 1821 | &b, level, slot, key); |
1818 | if (ret == -EAGAIN) | 1822 | if (err == -EAGAIN) |
1819 | goto again; | 1823 | goto again; |
1820 | 1824 | if (err) { | |
1821 | if (ret == -EIO) | 1825 | ret = err; |
1822 | goto done; | 1826 | goto done; |
1827 | } | ||
1823 | 1828 | ||
1824 | if (!p->skip_locking) { | 1829 | if (!p->skip_locking) { |
1825 | int lret; | ||
1826 | |||
1827 | btrfs_clear_path_blocking(p, NULL); | 1830 | btrfs_clear_path_blocking(p, NULL); |
1828 | lret = btrfs_try_spin_lock(b); | 1831 | err = btrfs_try_spin_lock(b); |
1829 | 1832 | ||
1830 | if (!lret) { | 1833 | if (!err) { |
1831 | btrfs_set_path_blocking(p); | 1834 | btrfs_set_path_blocking(p); |
1832 | btrfs_tree_lock(b); | 1835 | btrfs_tree_lock(b); |
1833 | btrfs_clear_path_blocking(p, b); | 1836 | btrfs_clear_path_blocking(p, b); |
@@ -1837,16 +1840,14 @@ cow_done: | |||
1837 | p->slots[level] = slot; | 1840 | p->slots[level] = slot; |
1838 | if (ins_len > 0 && | 1841 | if (ins_len > 0 && |
1839 | btrfs_leaf_free_space(root, b) < ins_len) { | 1842 | btrfs_leaf_free_space(root, b) < ins_len) { |
1840 | int sret; | ||
1841 | |||
1842 | btrfs_set_path_blocking(p); | 1843 | btrfs_set_path_blocking(p); |
1843 | sret = split_leaf(trans, root, key, | 1844 | err = split_leaf(trans, root, key, |
1844 | p, ins_len, ret == 0); | 1845 | p, ins_len, ret == 0); |
1845 | btrfs_clear_path_blocking(p, NULL); | 1846 | btrfs_clear_path_blocking(p, NULL); |
1846 | 1847 | ||
1847 | BUG_ON(sret > 0); | 1848 | BUG_ON(err > 0); |
1848 | if (sret) { | 1849 | if (err) { |
1849 | ret = sret; | 1850 | ret = err; |
1850 | goto done; | 1851 | goto done; |
1851 | } | 1852 | } |
1852 | } | 1853 | } |
@@ -4042,10 +4043,9 @@ out: | |||
4042 | * calling this function. | 4043 | * calling this function. |
4043 | */ | 4044 | */ |
4044 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | 4045 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, |
4045 | struct btrfs_key *key, int lowest_level, | 4046 | struct btrfs_key *key, int level, |
4046 | int cache_only, u64 min_trans) | 4047 | int cache_only, u64 min_trans) |
4047 | { | 4048 | { |
4048 | int level = lowest_level; | ||
4049 | int slot; | 4049 | int slot; |
4050 | struct extent_buffer *c; | 4050 | struct extent_buffer *c; |
4051 | 4051 | ||
@@ -4058,11 +4058,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | |||
4058 | c = path->nodes[level]; | 4058 | c = path->nodes[level]; |
4059 | next: | 4059 | next: |
4060 | if (slot >= btrfs_header_nritems(c)) { | 4060 | if (slot >= btrfs_header_nritems(c)) { |
4061 | level++; | 4061 | int ret; |
4062 | if (level == BTRFS_MAX_LEVEL) | 4062 | int orig_lowest; |
4063 | struct btrfs_key cur_key; | ||
4064 | if (level + 1 >= BTRFS_MAX_LEVEL || | ||
4065 | !path->nodes[level + 1]) | ||
4063 | return 1; | 4066 | return 1; |
4064 | continue; | 4067 | |
4068 | if (path->locks[level + 1]) { | ||
4069 | level++; | ||
4070 | continue; | ||
4071 | } | ||
4072 | |||
4073 | slot = btrfs_header_nritems(c) - 1; | ||
4074 | if (level == 0) | ||
4075 | btrfs_item_key_to_cpu(c, &cur_key, slot); | ||
4076 | else | ||
4077 | btrfs_node_key_to_cpu(c, &cur_key, slot); | ||
4078 | |||
4079 | orig_lowest = path->lowest_level; | ||
4080 | btrfs_release_path(root, path); | ||
4081 | path->lowest_level = level; | ||
4082 | ret = btrfs_search_slot(NULL, root, &cur_key, path, | ||
4083 | 0, 0); | ||
4084 | path->lowest_level = orig_lowest; | ||
4085 | if (ret < 0) | ||
4086 | return ret; | ||
4087 | |||
4088 | c = path->nodes[level]; | ||
4089 | slot = path->slots[level]; | ||
4090 | if (ret == 0) | ||
4091 | slot++; | ||
4092 | goto next; | ||
4065 | } | 4093 | } |
4094 | |||
4066 | if (level == 0) | 4095 | if (level == 0) |
4067 | btrfs_item_key_to_cpu(c, key, slot); | 4096 | btrfs_item_key_to_cpu(c, key, slot); |
4068 | else { | 4097 | else { |