aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-30 15:44:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-30 15:44:29 -0400
commit9613bebb223dea3179c265dc31e1bb41ae39f321 (patch)
tree39bf883573d23775a53be3172323c0237fef5630 /fs/btrfs/ordered-data.c
parent40380f1c7841a5dcbf0b20f0b6da11969211ef77 (diff)
parentbc3f116fec194f1d7329b160c266fe16b9266a1e (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes and features from Chris Mason: "We've merged in the error handling patches from SuSE. These are already shipping in the sles kernel, and they give btrfs the ability to abort transactions and go readonly on errors. It involves a lot of churn as they clarify BUG_ONs, and remove the ones we now properly deal with. Josef reworked the way our metadata interacts with the page cache. page->private now points to the btrfs extent_buffer object, which makes everything faster. He changed it so we write an whole extent buffer at a time instead of allowing individual pages to go down,, which will be important for the raid5/6 code (for the 3.5 merge window ;) Josef also made us more aggressive about dropping pages for metadata blocks that were freed due to COW. Overall, our metadata caching is much faster now. We've integrated my patch for metadata bigger than the page size. This allows metadata blocks up to 64KB in size. In practice 16K and 32K seem to work best. For workloads with lots of metadata, this cuts down the size of the extent allocation tree dramatically and fragments much less. Scrub was updated to support the larger block sizes, which ended up being a fairly large change (thanks Stefan Behrens). We also have an assortment of fixes and updates, especially to the balancing code (Ilya Dryomov), the back ref walker (Jan Schmidt) and the defragging code (Liu Bo)." Fixed up trivial conflicts in fs/btrfs/scrub.c that were just due to removal of the second argument to k[un]map_atomic() in commit 7ac687d9e047. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (75 commits) Btrfs: update the checks for mixed block groups with big metadata blocks Btrfs: update to the right index of defragment Btrfs: do not bother to defrag an extent if it is a big real extent Btrfs: add a check to decide if we should defrag the range Btrfs: fix recursive defragment with autodefrag option Btrfs: fix the mismatch of page->mapping Btrfs: fix race between direct io and autodefrag Btrfs: fix deadlock during allocating chunks Btrfs: show useful info in space reservation tracepoint Btrfs: don't use crc items bigger than 4KB Btrfs: flush out and clean up any block device pages during mount btrfs: disallow unequal data/metadata blocksize for mixed block groups Btrfs: enhance superblock sanity checks Btrfs: change scrub to support big blocks Btrfs: minor cleanup in scrub Btrfs: introduce common define for max number of mirrors Btrfs: fix infinite loop in btrfs_shrink_device() Btrfs: fix memory leak in resolver code Btrfs: allow dup for data chunks in mixed mode Btrfs: validate target profiles only if we are going to use them ...
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c60
1 files changed, 27 insertions, 33 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a1c940425307..bbf6d0d9aebe 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -59,6 +59,14 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
59 return NULL; 59 return NULL;
60} 60}
61 61
62static void ordered_data_tree_panic(struct inode *inode, int errno,
63 u64 offset)
64{
65 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
66 btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
67 "%llu\n", (unsigned long long)offset);
68}
69
62/* 70/*
63 * look for a given offset in the tree, and if it can't be found return the 71 * look for a given offset in the tree, and if it can't be found return the
64 * first lesser offset 72 * first lesser offset
@@ -207,7 +215,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
207 spin_lock(&tree->lock); 215 spin_lock(&tree->lock);
208 node = tree_insert(&tree->tree, file_offset, 216 node = tree_insert(&tree->tree, file_offset,
209 &entry->rb_node); 217 &entry->rb_node);
210 BUG_ON(node); 218 if (node)
219 ordered_data_tree_panic(inode, -EEXIST, file_offset);
211 spin_unlock(&tree->lock); 220 spin_unlock(&tree->lock);
212 221
213 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
@@ -215,7 +224,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
215 &BTRFS_I(inode)->root->fs_info->ordered_extents); 224 &BTRFS_I(inode)->root->fs_info->ordered_extents);
216 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 225 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
217 226
218 BUG_ON(node);
219 return 0; 227 return 0;
220} 228}
221 229
@@ -249,9 +257,9 @@ int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
249 * when an ordered extent is finished. If the list covers more than one 257 * when an ordered extent is finished. If the list covers more than one
250 * ordered extent, it is split across multiples. 258 * ordered extent, it is split across multiples.
251 */ 259 */
252int btrfs_add_ordered_sum(struct inode *inode, 260void btrfs_add_ordered_sum(struct inode *inode,
253 struct btrfs_ordered_extent *entry, 261 struct btrfs_ordered_extent *entry,
254 struct btrfs_ordered_sum *sum) 262 struct btrfs_ordered_sum *sum)
255{ 263{
256 struct btrfs_ordered_inode_tree *tree; 264 struct btrfs_ordered_inode_tree *tree;
257 265
@@ -259,7 +267,6 @@ int btrfs_add_ordered_sum(struct inode *inode,
259 spin_lock(&tree->lock); 267 spin_lock(&tree->lock);
260 list_add_tail(&sum->list, &entry->list); 268 list_add_tail(&sum->list, &entry->list);
261 spin_unlock(&tree->lock); 269 spin_unlock(&tree->lock);
262 return 0;
263} 270}
264 271
265/* 272/*
@@ -384,7 +391,7 @@ out:
384 * used to drop a reference on an ordered extent. This will free 391 * used to drop a reference on an ordered extent. This will free
385 * the extent if the last reference is dropped 392 * the extent if the last reference is dropped
386 */ 393 */
387int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 394void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
388{ 395{
389 struct list_head *cur; 396 struct list_head *cur;
390 struct btrfs_ordered_sum *sum; 397 struct btrfs_ordered_sum *sum;
@@ -400,7 +407,6 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
400 } 407 }
401 kfree(entry); 408 kfree(entry);
402 } 409 }
403 return 0;
404} 410}
405 411
406/* 412/*
@@ -408,8 +414,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
408 * and you must wake_up entry->wait. You must hold the tree lock 414 * and you must wake_up entry->wait. You must hold the tree lock
409 * while you call this function. 415 * while you call this function.
410 */ 416 */
411static int __btrfs_remove_ordered_extent(struct inode *inode, 417static void __btrfs_remove_ordered_extent(struct inode *inode,
412 struct btrfs_ordered_extent *entry) 418 struct btrfs_ordered_extent *entry)
413{ 419{
414 struct btrfs_ordered_inode_tree *tree; 420 struct btrfs_ordered_inode_tree *tree;
415 struct btrfs_root *root = BTRFS_I(inode)->root; 421 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -436,35 +442,30 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
436 list_del_init(&BTRFS_I(inode)->ordered_operations); 442 list_del_init(&BTRFS_I(inode)->ordered_operations);
437 } 443 }
438 spin_unlock(&root->fs_info->ordered_extent_lock); 444 spin_unlock(&root->fs_info->ordered_extent_lock);
439
440 return 0;
441} 445}
442 446
443/* 447/*
444 * remove an ordered extent from the tree. No references are dropped 448 * remove an ordered extent from the tree. No references are dropped
445 * but any waiters are woken. 449 * but any waiters are woken.
446 */ 450 */
447int btrfs_remove_ordered_extent(struct inode *inode, 451void btrfs_remove_ordered_extent(struct inode *inode,
448 struct btrfs_ordered_extent *entry) 452 struct btrfs_ordered_extent *entry)
449{ 453{
450 struct btrfs_ordered_inode_tree *tree; 454 struct btrfs_ordered_inode_tree *tree;
451 int ret;
452 455
453 tree = &BTRFS_I(inode)->ordered_tree; 456 tree = &BTRFS_I(inode)->ordered_tree;
454 spin_lock(&tree->lock); 457 spin_lock(&tree->lock);
455 ret = __btrfs_remove_ordered_extent(inode, entry); 458 __btrfs_remove_ordered_extent(inode, entry);
456 spin_unlock(&tree->lock); 459 spin_unlock(&tree->lock);
457 wake_up(&entry->wait); 460 wake_up(&entry->wait);
458
459 return ret;
460} 461}
461 462
462/* 463/*
463 * wait for all the ordered extents in a root. This is done when balancing 464 * wait for all the ordered extents in a root. This is done when balancing
464 * space between drives. 465 * space between drives.
465 */ 466 */
466int btrfs_wait_ordered_extents(struct btrfs_root *root, 467void btrfs_wait_ordered_extents(struct btrfs_root *root,
467 int nocow_only, int delay_iput) 468 int nocow_only, int delay_iput)
468{ 469{
469 struct list_head splice; 470 struct list_head splice;
470 struct list_head *cur; 471 struct list_head *cur;
@@ -512,7 +513,6 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
512 spin_lock(&root->fs_info->ordered_extent_lock); 513 spin_lock(&root->fs_info->ordered_extent_lock);
513 } 514 }
514 spin_unlock(&root->fs_info->ordered_extent_lock); 515 spin_unlock(&root->fs_info->ordered_extent_lock);
515 return 0;
516} 516}
517 517
518/* 518/*
@@ -525,7 +525,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root,
525 * extra check to make sure the ordered operation list really is empty 525 * extra check to make sure the ordered operation list really is empty
526 * before we return 526 * before we return
527 */ 527 */
528int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) 528void btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
529{ 529{
530 struct btrfs_inode *btrfs_inode; 530 struct btrfs_inode *btrfs_inode;
531 struct inode *inode; 531 struct inode *inode;
@@ -573,8 +573,6 @@ again:
573 573
574 spin_unlock(&root->fs_info->ordered_extent_lock); 574 spin_unlock(&root->fs_info->ordered_extent_lock);
575 mutex_unlock(&root->fs_info->ordered_operations_mutex); 575 mutex_unlock(&root->fs_info->ordered_operations_mutex);
576
577 return 0;
578} 576}
579 577
580/* 578/*
@@ -609,7 +607,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
609/* 607/*
610 * Used to wait on ordered extents across a large range of bytes. 608 * Used to wait on ordered extents across a large range of bytes.
611 */ 609 */
612int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 610void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
613{ 611{
614 u64 end; 612 u64 end;
615 u64 orig_end; 613 u64 orig_end;
@@ -664,7 +662,6 @@ again:
664 schedule_timeout(1); 662 schedule_timeout(1);
665 goto again; 663 goto again;
666 } 664 }
667 return 0;
668} 665}
669 666
670/* 667/*
@@ -948,9 +945,8 @@ out:
948 * If trans is not null, we'll do a friendly check for a transaction that 945 * If trans is not null, we'll do a friendly check for a transaction that
949 * is already flushing things and force the IO down ourselves. 946 * is already flushing things and force the IO down ourselves.
950 */ 947 */
951int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 948void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
952 struct btrfs_root *root, 949 struct btrfs_root *root, struct inode *inode)
953 struct inode *inode)
954{ 950{
955 u64 last_mod; 951 u64 last_mod;
956 952
@@ -961,7 +957,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
961 * commit, we can safely return without doing anything 957 * commit, we can safely return without doing anything
962 */ 958 */
963 if (last_mod < root->fs_info->last_trans_committed) 959 if (last_mod < root->fs_info->last_trans_committed)
964 return 0; 960 return;
965 961
966 /* 962 /*
967 * the transaction is already committing. Just start the IO and 963 * the transaction is already committing. Just start the IO and
@@ -969,7 +965,7 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
969 */ 965 */
970 if (trans && root->fs_info->running_transaction->blocked) { 966 if (trans && root->fs_info->running_transaction->blocked) {
971 btrfs_wait_ordered_range(inode, 0, (u64)-1); 967 btrfs_wait_ordered_range(inode, 0, (u64)-1);
972 return 0; 968 return;
973 } 969 }
974 970
975 spin_lock(&root->fs_info->ordered_extent_lock); 971 spin_lock(&root->fs_info->ordered_extent_lock);
@@ -978,6 +974,4 @@ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
978 &root->fs_info->ordered_operations); 974 &root->fs_info->ordered_operations);
979 } 975 }
980 spin_unlock(&root->fs_info->ordered_extent_lock); 976 spin_unlock(&root->fs_info->ordered_extent_lock);
981
982 return 0;
983} 977}