aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2010-10-26 20:57:29 -0400
committerChris Mason <chris.mason@oracle.com>2010-10-29 11:25:45 -0400
commit19fe0a8b787d7c7f9318975b5a8c6e7e5e54e925 (patch)
tree78aea56ede5735ae3183f2c4b846773b8fd8155b /fs
parent897ca6e9b4fef86d5dfb6b31fd9f592ce6a47a42 (diff)
Btrfs: Switch the extent buffer rbtree into a radix tree
This patch reduces the CPU time spent in the extent buffer search by using the radix tree instead of the rbtree and using the rcu lock instead of the spin lock. I did a quick test by the benchmark tool[1] and found the patch improve the file creation/deletion performance problem that I have reported[2]. Before applying this patch: Create files: Total files: 50000 Total time: 0.971531 Average time: 0.000019 Delete files: Total files: 50000 Total time: 1.366761 Average time: 0.000027 After applying this patch: Create files: Total files: 50000 Total time: 0.927455 Average time: 0.000019 Delete files: Total files: 50000 Total time: 1.292280 Average time: 0.000026 [1] http://marc.info/?l=linux-btrfs&m=128212635122920&q=p3 [2] http://marc.info/?l=linux-btrfs&m=128212635122920&w=2 Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c114
-rw-r--r--fs/btrfs/extent_io.h4
2 files changed, 49 insertions, 69 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6e3b326346a7..4c639e156296 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask) 104 struct address_space *mapping, gfp_t mask)
105{ 105{
106 tree->state = RB_ROOT; 106 tree->state = RB_ROOT;
107 tree->buffer = RB_ROOT; 107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
108 tree->ops = NULL; 108 tree->ops = NULL;
109 tree->dirty_bytes = 0; 109 tree->dirty_bytes = 0;
110 spin_lock_init(&tree->lock); 110 spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
235 return ret; 235 return ret;
236} 236}
237 237
238static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
239 u64 offset, struct rb_node *node)
240{
241 struct rb_root *root = &tree->buffer;
242 struct rb_node **p = &root->rb_node;
243 struct rb_node *parent = NULL;
244 struct extent_buffer *eb;
245
246 while (*p) {
247 parent = *p;
248 eb = rb_entry(parent, struct extent_buffer, rb_node);
249
250 if (offset < eb->start)
251 p = &(*p)->rb_left;
252 else if (offset > eb->start)
253 p = &(*p)->rb_right;
254 else
255 return eb;
256 }
257
258 rb_link_node(node, parent, p);
259 rb_insert_color(node, root);
260 return NULL;
261}
262
263static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
264 u64 offset)
265{
266 struct rb_root *root = &tree->buffer;
267 struct rb_node *n = root->rb_node;
268 struct extent_buffer *eb;
269
270 while (n) {
271 eb = rb_entry(n, struct extent_buffer, rb_node);
272 if (offset < eb->start)
273 n = n->rb_left;
274 else if (offset > eb->start)
275 n = n->rb_right;
276 else
277 return eb;
278 }
279 return NULL;
280}
281
282static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, 238static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
283 struct extent_state *other) 239 struct extent_state *other)
284{ 240{
@@ -3082,6 +3038,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3082 eb->len = len; 3038 eb->len = len;
3083 spin_lock_init(&eb->lock); 3039 spin_lock_init(&eb->lock);
3084 init_waitqueue_head(&eb->lock_wq); 3040 init_waitqueue_head(&eb->lock_wq);
3041 INIT_RCU_HEAD(&eb->rcu_head);
3085 3042
3086#if LEAK_DEBUG 3043#if LEAK_DEBUG
3087 spin_lock_irqsave(&leak_lock, flags); 3044 spin_lock_irqsave(&leak_lock, flags);
@@ -3150,16 +3107,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3150 struct page *p; 3107 struct page *p;
3151 struct address_space *mapping = tree->mapping; 3108 struct address_space *mapping = tree->mapping;
3152 int uptodate = 1; 3109 int uptodate = 1;
3110 int ret;
3153 3111
3154 spin_lock(&tree->buffer_lock); 3112 rcu_read_lock();
3155 eb = buffer_search(tree, start); 3113 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3156 if (eb) { 3114 if (eb && atomic_inc_not_zero(&eb->refs)) {
3157 atomic_inc(&eb->refs); 3115 rcu_read_unlock();
3158 spin_unlock(&tree->buffer_lock);
3159 mark_page_accessed(eb->first_page); 3116 mark_page_accessed(eb->first_page);
3160 return eb; 3117 return eb;
3161 } 3118 }
3162 spin_unlock(&tree->buffer_lock); 3119 rcu_read_unlock();
3163 3120
3164 eb = __alloc_extent_buffer(tree, start, len, mask); 3121 eb = __alloc_extent_buffer(tree, start, len, mask);
3165 if (!eb) 3122 if (!eb)
@@ -3198,17 +3155,25 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3198 if (uptodate) 3155 if (uptodate)
3199 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3156 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3200 3157
3158 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3159 if (ret)
3160 goto free_eb;
3161
3201 spin_lock(&tree->buffer_lock); 3162 spin_lock(&tree->buffer_lock);
3202 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3163 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
3203 if (exists) { 3164 if (ret == -EEXIST) {
3165 exists = radix_tree_lookup(&tree->buffer,
3166 start >> PAGE_CACHE_SHIFT);
3204 /* add one reference for the caller */ 3167 /* add one reference for the caller */
3205 atomic_inc(&exists->refs); 3168 atomic_inc(&exists->refs);
3206 spin_unlock(&tree->buffer_lock); 3169 spin_unlock(&tree->buffer_lock);
3170 radix_tree_preload_end();
3207 goto free_eb; 3171 goto free_eb;
3208 } 3172 }
3209 /* add one reference for the tree */ 3173 /* add one reference for the tree */
3210 atomic_inc(&eb->refs); 3174 atomic_inc(&eb->refs);
3211 spin_unlock(&tree->buffer_lock); 3175 spin_unlock(&tree->buffer_lock);
3176 radix_tree_preload_end();
3212 return eb; 3177 return eb;
3213 3178
3214free_eb: 3179free_eb:
@@ -3224,16 +3189,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3224{ 3189{
3225 struct extent_buffer *eb; 3190 struct extent_buffer *eb;
3226 3191
3227 spin_lock(&tree->buffer_lock); 3192 rcu_read_lock();
3228 eb = buffer_search(tree, start); 3193 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3229 if (eb) 3194 if (eb && atomic_inc_not_zero(&eb->refs)) {
3230 atomic_inc(&eb->refs); 3195 rcu_read_unlock();
3231 spin_unlock(&tree->buffer_lock);
3232
3233 if (eb)
3234 mark_page_accessed(eb->first_page); 3196 mark_page_accessed(eb->first_page);
3197 return eb;
3198 }
3199 rcu_read_unlock();
3235 3200
3236 return eb; 3201 return NULL;
3237} 3202}
3238 3203
3239void free_extent_buffer(struct extent_buffer *eb) 3204void free_extent_buffer(struct extent_buffer *eb)
@@ -3863,6 +3828,14 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3863 } 3828 }
3864} 3829}
3865 3830
3831static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
3832{
3833 struct extent_buffer *eb =
3834 container_of(head, struct extent_buffer, rcu_head);
3835
3836 btrfs_release_extent_buffer(eb);
3837}
3838
3866int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3839int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3867{ 3840{
3868 u64 start = page_offset(page); 3841 u64 start = page_offset(page);
@@ -3870,23 +3843,30 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3870 int ret = 1; 3843 int ret = 1;
3871 3844
3872 spin_lock(&tree->buffer_lock); 3845 spin_lock(&tree->buffer_lock);
3873 eb = buffer_search(tree, start); 3846 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3874 if (!eb) 3847 if (!eb)
3875 goto out; 3848 goto out;
3876 3849
3877 if (atomic_read(&eb->refs) > 1) { 3850 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3878 ret = 0; 3851 ret = 0;
3879 goto out; 3852 goto out;
3880 } 3853 }
3881 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3854
3855 /*
3856 * set @eb->refs to 0 if it is already 1, and then release the @eb.
3857 * Or go back.
3858 */
3859 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
3882 ret = 0; 3860 ret = 0;
3883 goto out; 3861 goto out;
3884 } 3862 }
3885 3863
3886 rb_erase(&eb->rb_node, &tree->buffer); 3864 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3887 /* at this point we can safely release the extent buffer */
3888 btrfs_release_extent_buffer(eb);
3889out: 3865out:
3890 spin_unlock(&tree->buffer_lock); 3866 spin_unlock(&tree->buffer_lock);
3867
3868 /* at this point we can safely release the extent buffer */
3869 if (atomic_read(&eb->refs) == 0)
3870 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
3891 return ret; 3871 return ret;
3892} 3872}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590da..1c6d4f342ef7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
85 85
86struct extent_io_tree { 86struct extent_io_tree {
87 struct rb_root state; 87 struct rb_root state;
88 struct rb_root buffer; 88 struct radix_tree_root buffer;
89 struct address_space *mapping; 89 struct address_space *mapping;
90 u64 dirty_bytes; 90 u64 dirty_bytes;
91 spinlock_t lock; 91 spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
123 unsigned long bflags; 123 unsigned long bflags;
124 atomic_t refs; 124 atomic_t refs;
125 struct list_head leak_list; 125 struct list_head leak_list;
126 struct rb_node rb_node; 126 struct rcu_head rcu_head;
127 127
128 /* the spinlock is used to protect most operations */ 128 /* the spinlock is used to protect most operations */
129 spinlock_t lock; 129 spinlock_t lock;