diff options
author | Arne Jansen <sensille@gmx.net> | 2012-02-25 03:09:30 -0500 |
---|---|---|
committer | David Sterba <dsterba@suse.cz> | 2012-04-18 13:12:44 -0400 |
commit | 8c9c2bf7a3c4f7e9d158c0be9c49f372fb943ad2 (patch) | |
tree | 6c17b4db9b39cb8eb950c21377f453df18f467c3 /fs | |
parent | 848cce0d4102b5b4b26b0987b43e1919d462afe2 (diff) |
btrfs: fix race in reada
When inserting into the radix tree returns EEXIST, get the existing
entry without giving up the spinlock in between.
There was a race for both the zones trees and the extent tree.
Signed-off-by: Arne Jansen <sensille@gmx.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/inode.c | 8 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 35 |
2 files changed, 23 insertions, 20 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a682c267576d..98ee5a51aa29 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4332,7 +4332,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4332 | } | 4332 | } |
4333 | no_dentry: | 4333 | no_dentry: |
4334 | /* is this a reference to our own snapshot? If so | 4334 | /* is this a reference to our own snapshot? If so |
4335 | * skip it | 4335 | * skip it. |
4336 | * | ||
4337 | * In contrast to old kernels, we insert the snapshot's | ||
4338 | * dir item and dir index after it has been created, so | ||
4339 | * we won't find a reference to our own snapshot. We | ||
4340 | * still keep the following code for backward | ||
4341 | * compatibility. | ||
4336 | */ | 4342 | */ |
4337 | if (location.type == BTRFS_ROOT_ITEM_KEY && | 4343 | if (location.type == BTRFS_ROOT_ITEM_KEY && |
4338 | location.objectid == root->root_key.objectid) { | 4344 | location.objectid == root->root_key.objectid) { |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index dc5d33146fdb..8dec650099c8 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, | |||
250 | struct btrfs_bio *bbio) | 250 | struct btrfs_bio *bbio) |
251 | { | 251 | { |
252 | int ret; | 252 | int ret; |
253 | int looped = 0; | ||
254 | struct reada_zone *zone; | 253 | struct reada_zone *zone; |
255 | struct btrfs_block_group_cache *cache = NULL; | 254 | struct btrfs_block_group_cache *cache = NULL; |
256 | u64 start; | 255 | u64 start; |
257 | u64 end; | 256 | u64 end; |
258 | int i; | 257 | int i; |
259 | 258 | ||
260 | again: | ||
261 | zone = NULL; | 259 | zone = NULL; |
262 | spin_lock(&fs_info->reada_lock); | 260 | spin_lock(&fs_info->reada_lock); |
263 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, | 261 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, |
@@ -274,9 +272,6 @@ again: | |||
274 | spin_unlock(&fs_info->reada_lock); | 272 | spin_unlock(&fs_info->reada_lock); |
275 | } | 273 | } |
276 | 274 | ||
277 | if (looped) | ||
278 | return NULL; | ||
279 | |||
280 | cache = btrfs_lookup_block_group(fs_info, logical); | 275 | cache = btrfs_lookup_block_group(fs_info, logical); |
281 | if (!cache) | 276 | if (!cache) |
282 | return NULL; | 277 | return NULL; |
@@ -307,13 +302,15 @@ again: | |||
307 | ret = radix_tree_insert(&dev->reada_zones, | 302 | ret = radix_tree_insert(&dev->reada_zones, |
308 | (unsigned long)(zone->end >> PAGE_CACHE_SHIFT), | 303 | (unsigned long)(zone->end >> PAGE_CACHE_SHIFT), |
309 | zone); | 304 | zone); |
310 | spin_unlock(&fs_info->reada_lock); | ||
311 | 305 | ||
312 | if (ret) { | 306 | if (ret == -EEXIST) { |
313 | kfree(zone); | 307 | kfree(zone); |
314 | looped = 1; | 308 | ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, |
315 | goto again; | 309 | logical >> PAGE_CACHE_SHIFT, 1); |
310 | if (ret == 1) | ||
311 | kref_get(&zone->refcnt); | ||
316 | } | 312 | } |
313 | spin_unlock(&fs_info->reada_lock); | ||
317 | 314 | ||
318 | return zone; | 315 | return zone; |
319 | } | 316 | } |
@@ -323,8 +320,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
323 | struct btrfs_key *top, int level) | 320 | struct btrfs_key *top, int level) |
324 | { | 321 | { |
325 | int ret; | 322 | int ret; |
326 | int looped = 0; | ||
327 | struct reada_extent *re = NULL; | 323 | struct reada_extent *re = NULL; |
324 | struct reada_extent *re_exist = NULL; | ||
328 | struct btrfs_fs_info *fs_info = root->fs_info; | 325 | struct btrfs_fs_info *fs_info = root->fs_info; |
329 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | 326 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
330 | struct btrfs_bio *bbio = NULL; | 327 | struct btrfs_bio *bbio = NULL; |
@@ -335,14 +332,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
335 | int i; | 332 | int i; |
336 | unsigned long index = logical >> PAGE_CACHE_SHIFT; | 333 | unsigned long index = logical >> PAGE_CACHE_SHIFT; |
337 | 334 | ||
338 | again: | ||
339 | spin_lock(&fs_info->reada_lock); | 335 | spin_lock(&fs_info->reada_lock); |
340 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 336 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
341 | if (re) | 337 | if (re) |
342 | kref_get(&re->refcnt); | 338 | kref_get(&re->refcnt); |
343 | spin_unlock(&fs_info->reada_lock); | 339 | spin_unlock(&fs_info->reada_lock); |
344 | 340 | ||
345 | if (re || looped) | 341 | if (re) |
346 | return re; | 342 | return re; |
347 | 343 | ||
348 | re = kzalloc(sizeof(*re), GFP_NOFS); | 344 | re = kzalloc(sizeof(*re), GFP_NOFS); |
@@ -398,12 +394,15 @@ again: | |||
398 | /* insert extent in reada_tree + all per-device trees, all or nothing */ | 394 | /* insert extent in reada_tree + all per-device trees, all or nothing */ |
399 | spin_lock(&fs_info->reada_lock); | 395 | spin_lock(&fs_info->reada_lock); |
400 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); | 396 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); |
397 | if (ret == -EEXIST) { | ||
398 | re_exist = radix_tree_lookup(&fs_info->reada_tree, index); | ||
399 | BUG_ON(!re_exist); | ||
400 | kref_get(&re_exist->refcnt); | ||
401 | spin_unlock(&fs_info->reada_lock); | ||
402 | goto error; | ||
403 | } | ||
401 | if (ret) { | 404 | if (ret) { |
402 | spin_unlock(&fs_info->reada_lock); | 405 | spin_unlock(&fs_info->reada_lock); |
403 | if (ret != -ENOMEM) { | ||
404 | /* someone inserted the extent in the meantime */ | ||
405 | looped = 1; | ||
406 | } | ||
407 | goto error; | 406 | goto error; |
408 | } | 407 | } |
409 | for (i = 0; i < nzones; ++i) { | 408 | for (i = 0; i < nzones; ++i) { |
@@ -450,9 +449,7 @@ error: | |||
450 | } | 449 | } |
451 | kfree(bbio); | 450 | kfree(bbio); |
452 | kfree(re); | 451 | kfree(re); |
453 | if (looped) | 452 | return re_exist; |
454 | goto again; | ||
455 | return NULL; | ||
456 | } | 453 | } |
457 | 454 | ||
458 | static void reada_kref_dummy(struct kref *kr) | 455 | static void reada_kref_dummy(struct kref *kr) |