diff options
author | Miao Xie <miaox@cn.fujitsu.com> | 2010-10-26 20:57:29 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-10-29 11:25:45 -0400 |
commit | 19fe0a8b787d7c7f9318975b5a8c6e7e5e54e925 (patch) | |
tree | 78aea56ede5735ae3183f2c4b846773b8fd8155b /fs/btrfs | |
parent | 897ca6e9b4fef86d5dfb6b31fd9f592ce6a47a42 (diff) |
Btrfs: Switch the extent buffer rbtree into a radix tree
This patch reduces the CPU time spent in the extent buffer search by using the
radix tree instead of the rbtree and using the rcu lock instead of the spin
lock.
I did a quick test by the benchmark tool[1] and found the patch improve the
file creation/deletion performance problem that I have reported[2].
Before applying this patch:
Create files:
Total files: 50000
Total time: 0.971531
Average time: 0.000019
Delete files:
Total files: 50000
Total time: 1.366761
Average time: 0.000027
After applying this patch:
Create files:
Total files: 50000
Total time: 0.927455
Average time: 0.000019
Delete files:
Total files: 50000
Total time: 1.292280
Average time: 0.000026
[1] http://marc.info/?l=linux-btrfs&m=128212635122920&q=p3
[2] http://marc.info/?l=linux-btrfs&m=128212635122920&w=2
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/extent_io.c | 114 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 4 |
2 files changed, 49 insertions, 69 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6e3b326346a7..4c639e156296 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, | |||
104 | struct address_space *mapping, gfp_t mask) | 104 | struct address_space *mapping, gfp_t mask) |
105 | { | 105 | { |
106 | tree->state = RB_ROOT; | 106 | tree->state = RB_ROOT; |
107 | tree->buffer = RB_ROOT; | 107 | INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC); |
108 | tree->ops = NULL; | 108 | tree->ops = NULL; |
109 | tree->dirty_bytes = 0; | 109 | tree->dirty_bytes = 0; |
110 | spin_lock_init(&tree->lock); | 110 | spin_lock_init(&tree->lock); |
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree, | |||
235 | return ret; | 235 | return ret; |
236 | } | 236 | } |
237 | 237 | ||
238 | static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, | ||
239 | u64 offset, struct rb_node *node) | ||
240 | { | ||
241 | struct rb_root *root = &tree->buffer; | ||
242 | struct rb_node **p = &root->rb_node; | ||
243 | struct rb_node *parent = NULL; | ||
244 | struct extent_buffer *eb; | ||
245 | |||
246 | while (*p) { | ||
247 | parent = *p; | ||
248 | eb = rb_entry(parent, struct extent_buffer, rb_node); | ||
249 | |||
250 | if (offset < eb->start) | ||
251 | p = &(*p)->rb_left; | ||
252 | else if (offset > eb->start) | ||
253 | p = &(*p)->rb_right; | ||
254 | else | ||
255 | return eb; | ||
256 | } | ||
257 | |||
258 | rb_link_node(node, parent, p); | ||
259 | rb_insert_color(node, root); | ||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | ||
264 | u64 offset) | ||
265 | { | ||
266 | struct rb_root *root = &tree->buffer; | ||
267 | struct rb_node *n = root->rb_node; | ||
268 | struct extent_buffer *eb; | ||
269 | |||
270 | while (n) { | ||
271 | eb = rb_entry(n, struct extent_buffer, rb_node); | ||
272 | if (offset < eb->start) | ||
273 | n = n->rb_left; | ||
274 | else if (offset > eb->start) | ||
275 | n = n->rb_right; | ||
276 | else | ||
277 | return eb; | ||
278 | } | ||
279 | return NULL; | ||
280 | } | ||
281 | |||
282 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | 238 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, |
283 | struct extent_state *other) | 239 | struct extent_state *other) |
284 | { | 240 | { |
@@ -3082,6 +3038,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3082 | eb->len = len; | 3038 | eb->len = len; |
3083 | spin_lock_init(&eb->lock); | 3039 | spin_lock_init(&eb->lock); |
3084 | init_waitqueue_head(&eb->lock_wq); | 3040 | init_waitqueue_head(&eb->lock_wq); |
3041 | INIT_RCU_HEAD(&eb->rcu_head); | ||
3085 | 3042 | ||
3086 | #if LEAK_DEBUG | 3043 | #if LEAK_DEBUG |
3087 | spin_lock_irqsave(&leak_lock, flags); | 3044 | spin_lock_irqsave(&leak_lock, flags); |
@@ -3150,16 +3107,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3150 | struct page *p; | 3107 | struct page *p; |
3151 | struct address_space *mapping = tree->mapping; | 3108 | struct address_space *mapping = tree->mapping; |
3152 | int uptodate = 1; | 3109 | int uptodate = 1; |
3110 | int ret; | ||
3153 | 3111 | ||
3154 | spin_lock(&tree->buffer_lock); | 3112 | rcu_read_lock(); |
3155 | eb = buffer_search(tree, start); | 3113 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3156 | if (eb) { | 3114 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3157 | atomic_inc(&eb->refs); | 3115 | rcu_read_unlock(); |
3158 | spin_unlock(&tree->buffer_lock); | ||
3159 | mark_page_accessed(eb->first_page); | 3116 | mark_page_accessed(eb->first_page); |
3160 | return eb; | 3117 | return eb; |
3161 | } | 3118 | } |
3162 | spin_unlock(&tree->buffer_lock); | 3119 | rcu_read_unlock(); |
3163 | 3120 | ||
3164 | eb = __alloc_extent_buffer(tree, start, len, mask); | 3121 | eb = __alloc_extent_buffer(tree, start, len, mask); |
3165 | if (!eb) | 3122 | if (!eb) |
@@ -3198,17 +3155,25 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3198 | if (uptodate) | 3155 | if (uptodate) |
3199 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3156 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3200 | 3157 | ||
3158 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
3159 | if (ret) | ||
3160 | goto free_eb; | ||
3161 | |||
3201 | spin_lock(&tree->buffer_lock); | 3162 | spin_lock(&tree->buffer_lock); |
3202 | exists = buffer_tree_insert(tree, start, &eb->rb_node); | 3163 | ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); |
3203 | if (exists) { | 3164 | if (ret == -EEXIST) { |
3165 | exists = radix_tree_lookup(&tree->buffer, | ||
3166 | start >> PAGE_CACHE_SHIFT); | ||
3204 | /* add one reference for the caller */ | 3167 | /* add one reference for the caller */ |
3205 | atomic_inc(&exists->refs); | 3168 | atomic_inc(&exists->refs); |
3206 | spin_unlock(&tree->buffer_lock); | 3169 | spin_unlock(&tree->buffer_lock); |
3170 | radix_tree_preload_end(); | ||
3207 | goto free_eb; | 3171 | goto free_eb; |
3208 | } | 3172 | } |
3209 | /* add one reference for the tree */ | 3173 | /* add one reference for the tree */ |
3210 | atomic_inc(&eb->refs); | 3174 | atomic_inc(&eb->refs); |
3211 | spin_unlock(&tree->buffer_lock); | 3175 | spin_unlock(&tree->buffer_lock); |
3176 | radix_tree_preload_end(); | ||
3212 | return eb; | 3177 | return eb; |
3213 | 3178 | ||
3214 | free_eb: | 3179 | free_eb: |
@@ -3224,16 +3189,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, | |||
3224 | { | 3189 | { |
3225 | struct extent_buffer *eb; | 3190 | struct extent_buffer *eb; |
3226 | 3191 | ||
3227 | spin_lock(&tree->buffer_lock); | 3192 | rcu_read_lock(); |
3228 | eb = buffer_search(tree, start); | 3193 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3229 | if (eb) | 3194 | if (eb && atomic_inc_not_zero(&eb->refs)) { |
3230 | atomic_inc(&eb->refs); | 3195 | rcu_read_unlock(); |
3231 | spin_unlock(&tree->buffer_lock); | ||
3232 | |||
3233 | if (eb) | ||
3234 | mark_page_accessed(eb->first_page); | 3196 | mark_page_accessed(eb->first_page); |
3197 | return eb; | ||
3198 | } | ||
3199 | rcu_read_unlock(); | ||
3235 | 3200 | ||
3236 | return eb; | 3201 | return NULL; |
3237 | } | 3202 | } |
3238 | 3203 | ||
3239 | void free_extent_buffer(struct extent_buffer *eb) | 3204 | void free_extent_buffer(struct extent_buffer *eb) |
@@ -3863,6 +3828,14 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
3863 | } | 3828 | } |
3864 | } | 3829 | } |
3865 | 3830 | ||
3831 | static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) | ||
3832 | { | ||
3833 | struct extent_buffer *eb = | ||
3834 | container_of(head, struct extent_buffer, rcu_head); | ||
3835 | |||
3836 | btrfs_release_extent_buffer(eb); | ||
3837 | } | ||
3838 | |||
3866 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | 3839 | int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) |
3867 | { | 3840 | { |
3868 | u64 start = page_offset(page); | 3841 | u64 start = page_offset(page); |
@@ -3870,23 +3843,30 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | |||
3870 | int ret = 1; | 3843 | int ret = 1; |
3871 | 3844 | ||
3872 | spin_lock(&tree->buffer_lock); | 3845 | spin_lock(&tree->buffer_lock); |
3873 | eb = buffer_search(tree, start); | 3846 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3874 | if (!eb) | 3847 | if (!eb) |
3875 | goto out; | 3848 | goto out; |
3876 | 3849 | ||
3877 | if (atomic_read(&eb->refs) > 1) { | 3850 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
3878 | ret = 0; | 3851 | ret = 0; |
3879 | goto out; | 3852 | goto out; |
3880 | } | 3853 | } |
3881 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3854 | |
3855 | /* | ||
3856 | * set @eb->refs to 0 if it is already 1, and then release the @eb. | ||
3857 | * Or go back. | ||
3858 | */ | ||
3859 | if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) { | ||
3882 | ret = 0; | 3860 | ret = 0; |
3883 | goto out; | 3861 | goto out; |
3884 | } | 3862 | } |
3885 | 3863 | ||
3886 | rb_erase(&eb->rb_node, &tree->buffer); | 3864 | radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3887 | /* at this point we can safely release the extent buffer */ | ||
3888 | btrfs_release_extent_buffer(eb); | ||
3889 | out: | 3865 | out: |
3890 | spin_unlock(&tree->buffer_lock); | 3866 | spin_unlock(&tree->buffer_lock); |
3867 | |||
3868 | /* at this point we can safely release the extent buffer */ | ||
3869 | if (atomic_read(&eb->refs) == 0) | ||
3870 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | ||
3891 | return ret; | 3871 | return ret; |
3892 | } | 3872 | } |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5691c7b590da..1c6d4f342ef7 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -85,7 +85,7 @@ struct extent_io_ops { | |||
85 | 85 | ||
86 | struct extent_io_tree { | 86 | struct extent_io_tree { |
87 | struct rb_root state; | 87 | struct rb_root state; |
88 | struct rb_root buffer; | 88 | struct radix_tree_root buffer; |
89 | struct address_space *mapping; | 89 | struct address_space *mapping; |
90 | u64 dirty_bytes; | 90 | u64 dirty_bytes; |
91 | spinlock_t lock; | 91 | spinlock_t lock; |
@@ -123,7 +123,7 @@ struct extent_buffer { | |||
123 | unsigned long bflags; | 123 | unsigned long bflags; |
124 | atomic_t refs; | 124 | atomic_t refs; |
125 | struct list_head leak_list; | 125 | struct list_head leak_list; |
126 | struct rb_node rb_node; | 126 | struct rcu_head rcu_head; |
127 | 127 | ||
128 | /* the spinlock is used to protect most operations */ | 128 | /* the spinlock is used to protect most operations */ |
129 | spinlock_t lock; | 129 | spinlock_t lock; |