aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-04-11 20:46:03 -0400
committerChris Mason <chris.mason@oracle.com>2011-04-11 20:46:03 -0400
commit874d0d2633e0f3fe955607c6b04d5fc5325781c4 (patch)
treea96165f1f13501419af1c88dae146bc1061f8664
parent507903b81840a70cc6a179d4eb03584ad50e8c5b (diff)
parent13c5a93e7005d7dae0b6d070d25203593e692d13 (diff)
Merge branch 'for-chris' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-work into for-linus
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/free-space-cache.c117
-rw-r--r--fs/btrfs/inode.c107
-rw-r--r--fs/btrfs/transaction.c48
-rw-r--r--fs/btrfs/transaction.h4
7 files changed, 180 insertions, 124 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3458b5725540..0d00a07b5b29 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2576,6 +2576,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2576int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2576int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2577 struct inode *inode, u64 start, u64 end); 2577 struct inode *inode, u64 start, u64 end);
2578int btrfs_release_file(struct inode *inode, struct file *file); 2578int btrfs_release_file(struct inode *inode, struct file *file);
2579void btrfs_drop_pages(struct page **pages, size_t num_pages);
2580int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
2581 struct page **pages, size_t num_pages,
2582 loff_t pos, size_t write_bytes,
2583 struct extent_state **cached);
2579 2584
2580/* tree-defrag.c */ 2585/* tree-defrag.c */
2581int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 2586int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a272bfd74ea0..ef6865c17cd6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3136,7 +3136,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3136 btrfs_destroy_pinned_extent(root, 3136 btrfs_destroy_pinned_extent(root,
3137 root->fs_info->pinned_extents); 3137 root->fs_info->pinned_extents);
3138 3138
3139 t->use_count = 0; 3139 atomic_set(&t->use_count, 0);
3140 list_del_init(&t->list); 3140 list_del_init(&t->list);
3141 memset(t, 0, sizeof(*t)); 3141 memset(t, 0, sizeof(*t));
3142 kmem_cache_free(btrfs_transaction_cachep, t); 3142 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e621ea54a3fd..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
104/* 104/*
105 * unlocks pages after btrfs_file_write is done with them 105 * unlocks pages after btrfs_file_write is done with them
106 */ 106 */
107static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) 107void btrfs_drop_pages(struct page **pages, size_t num_pages)
108{ 108{
109 size_t i; 109 size_t i;
110 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
127 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
128 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
129 */ 129 */
130static noinline int dirty_and_release_pages(struct btrfs_root *root, 130int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
131 struct file *file, 131 struct page **pages, size_t num_pages,
132 struct page **pages, 132 loff_t pos, size_t write_bytes,
133 size_t num_pages, 133 struct extent_state **cached)
134 loff_t pos,
135 size_t write_bytes)
136{ 134{
137 int err = 0; 135 int err = 0;
138 int i; 136 int i;
139 struct inode *inode = fdentry(file)->d_inode;
140 u64 num_bytes; 137 u64 num_bytes;
141 u64 start_pos; 138 u64 start_pos;
142 u64 end_of_last_block; 139 u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
149 146
150 end_of_last_block = start_pos + num_bytes - 1; 147 end_of_last_block = start_pos + num_bytes - 1;
151 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 148 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
152 NULL); 149 cached);
153 if (err) 150 if (err)
154 return err; 151 return err;
155 152
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
992 } 989 }
993 990
994 if (copied > 0) { 991 if (copied > 0) {
995 ret = dirty_and_release_pages(root, file, pages, 992 ret = btrfs_dirty_pages(root, inode, pages,
996 dirty_pages, pos, 993 dirty_pages, pos, copied,
997 copied); 994 NULL);
998 if (ret) { 995 if (ret) {
999 btrfs_delalloc_release_space(inode, 996 btrfs_delalloc_release_space(inode,
1000 dirty_pages << PAGE_CACHE_SHIFT); 997 dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f561c953205b..a3f420def0e9 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
508 struct inode *inode; 508 struct inode *inode;
509 struct rb_node *node; 509 struct rb_node *node;
510 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
511 struct page *page; 512 struct page *page;
512 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
513 struct btrfs_free_cluster *cluster = NULL; 514 struct btrfs_free_cluster *cluster = NULL;
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
517 u64 start, end, len; 518 u64 start, end, len;
518 u64 bytes = 0; 519 u64 bytes = 0;
519 u32 *crc, *checksums; 520 u32 *crc, *checksums;
520 pgoff_t index = 0, last_index = 0;
521 unsigned long first_page_offset; 521 unsigned long first_page_offset;
522 int num_checksums; 522 int index = 0, num_pages = 0;
523 int entries = 0; 523 int entries = 0;
524 int bitmaps = 0; 524 int bitmaps = 0;
525 int ret = 0; 525 int ret = 0;
526 bool next_page = false; 526 bool next_page = false;
527 bool out_of_space = false;
527 528
528 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
529 530
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
551 return 0; 552 return 0;
552 } 553 }
553 554
554 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
555 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
556 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
557 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
558 560
559 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
560 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
561 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
562 if (!crc) { 563 if (!crc) {
563 iput(inode); 564 iput(inode);
564 return 0; 565 return 0;
565 } 566 }
566 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
567 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
568 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
569 * our entries. 577 * our entries.
570 */ 578 */
571 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
572 580
573 /* Get the cluster for this block_group if it exists */ 581 /* Get the cluster for this block_group if it exists */
574 if (!list_empty(&block_group->cluster_list)) 582 if (!list_empty(&block_group->cluster_list))
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
590 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
591 * know and don't freak out. 599 * know and don't freak out.
592 */ 600 */
593 while (index <= last_index) { 601 while (index < num_pages) {
594 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
595 if (!page) { 603 if (!page) {
596 pgoff_t i = 0; 604 int i;
597 605
598 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
599 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
600 unlock_page(page); 608 page_cache_release(pages[i]);
601 page_cache_release(page);
602 page_cache_release(page);
603 i++;
604 } 609 }
605 goto out_free; 610 goto out_free;
606 } 611 }
612 pages[index] = page;
607 index++; 613 index++;
608 } 614 }
609 615
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
631 offset = start_offset; 637 offset = start_offset;
632 } 638 }
633 639
634 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
635 646
636 addr = kmap(page); 647 addr = kmap(page);
637 entry = addr + start_offset; 648 entry = addr + start_offset;
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
708 719
709 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
710 721
711 ClearPageChecked(page);
712 set_page_extent_mapped(page);
713 SetPageUptodate(page);
714 set_page_dirty(page);
715
716 /*
717 * We need to release our reference we got for grab_cache_page,
718 * except for the first page which will hold our checksums, we
719 * do that below.
720 */
721 if (index != 0) {
722 unlock_page(page);
723 page_cache_release(page);
724 }
725
726 page_cache_release(page);
727
728 index++; 722 index++;
729 } while (node || next_page); 723 } while (node || next_page);
730 724
@@ -734,6 +728,10 @@ int btrfs_write_out_cache(struct btrfs_root *root,
734 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
735 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
736 730
731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
737 page = find_get_page(inode->i_mapping, index); 735 page = find_get_page(inode->i_mapping, index);
738 736
739 addr = kmap(page); 737 addr = kmap(page);
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
745 crc++; 743 crc++;
746 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
747 745
748 ClearPageChecked(page);
749 set_page_extent_mapped(page);
750 SetPageUptodate(page);
751 set_page_dirty(page);
752 unlock_page(page);
753 page_cache_release(page);
754 page_cache_release(page);
755 list_del_init(&entry->list); 746 list_del_init(&entry->list);
756 index++; 747 index++;
757 } 748 }
758 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
759 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
760 while (index <= last_index) { 760 while (index < num_pages) {
761 void *addr; 761 void *addr;
762 762
763 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
764
765 addr = kmap(page); 764 addr = kmap(page);
766 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
767 kunmap(page); 766 kunmap(page);
768 ClearPageChecked(page);
769 set_page_extent_mapped(page);
770 SetPageUptodate(page);
771 set_page_dirty(page);
772 unlock_page(page);
773 page_cache_release(page);
774 page_cache_release(page);
775 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
776 index++; 768 index++;
777 } 769 }
778 770
779 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
780
781 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
782 { 772 {
783 void *addr; 773 void *addr;
784 u64 *gen; 774 u64 *gen;
785 775
786 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
787 777
788 addr = kmap(page); 778 addr = kmap(page);
789 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
790 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
791 *gen = trans->transid; 781 *gen = trans->transid;
792 kunmap(page); 782 kunmap(page);
793 ClearPageChecked(page);
794 set_page_extent_mapped(page);
795 SetPageUptodate(page);
796 set_page_dirty(page);
797 unlock_page(page);
798 page_cache_release(page);
799 page_cache_release(page);
800 } 783 }
801 BTRFS_I(inode)->generation = trans->transid;
802 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
803 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
804 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
805 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
806 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
807 799
808 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -853,6 +845,7 @@ out_free:
853 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
854 } 846 }
855 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
856 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
857 iput(inode); 850 iput(inode);
858 return ret; 851 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5a993e0ec865..55a6a0b416d7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1769,9 +1769,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1769 add_pending_csums(trans, inode, ordered_extent->file_offset, 1769 add_pending_csums(trans, inode, ordered_extent->file_offset,
1770 &ordered_extent->list); 1770 &ordered_extent->list);
1771 1771
1772 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1772 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1773 ret = btrfs_update_inode(trans, root, inode); 1773 if (!ret) {
1774 BUG_ON(ret); 1774 ret = btrfs_update_inode(trans, root, inode);
1775 BUG_ON(ret);
1776 }
1777 ret = 0;
1775out: 1778out:
1776 if (nolock) { 1779 if (nolock) {
1777 if (trans) 1780 if (trans)
@@ -2589,6 +2592,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2589 struct btrfs_inode_item *item, 2592 struct btrfs_inode_item *item,
2590 struct inode *inode) 2593 struct inode *inode)
2591{ 2594{
2595 if (!leaf->map_token)
2596 map_private_extent_buffer(leaf, (unsigned long)item,
2597 sizeof(struct btrfs_inode_item),
2598 &leaf->map_token, &leaf->kaddr,
2599 &leaf->map_start, &leaf->map_len,
2600 KM_USER1);
2601
2592 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2602 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2593 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2603 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2594 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2604 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2617,6 +2627,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2617 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2627 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2618 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2628 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2619 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2629 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2630
2631 if (leaf->map_token) {
2632 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2633 leaf->map_token = NULL;
2634 }
2620} 2635}
2621 2636
2622/* 2637/*
@@ -5433,17 +5448,30 @@ out:
5433} 5448}
5434 5449
5435static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5450static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5451 struct extent_map *em,
5436 u64 start, u64 len) 5452 u64 start, u64 len)
5437{ 5453{
5438 struct btrfs_root *root = BTRFS_I(inode)->root; 5454 struct btrfs_root *root = BTRFS_I(inode)->root;
5439 struct btrfs_trans_handle *trans; 5455 struct btrfs_trans_handle *trans;
5440 struct extent_map *em;
5441 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5456 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5442 struct btrfs_key ins; 5457 struct btrfs_key ins;
5443 u64 alloc_hint; 5458 u64 alloc_hint;
5444 int ret; 5459 int ret;
5460 bool insert = false;
5445 5461
5446 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5462 /*
5463 * Ok if the extent map we looked up is a hole and is for the exact
5464 * range we want, there is no reason to allocate a new one, however if
5465 * it is not right then we need to free this one and drop the cache for
5466 * our range.
5467 */
5468 if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
5469 em->len != len) {
5470 free_extent_map(em);
5471 em = NULL;
5472 insert = true;
5473 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5474 }
5447 5475
5448 trans = btrfs_join_transaction(root, 0); 5476 trans = btrfs_join_transaction(root, 0);
5449 if (IS_ERR(trans)) 5477 if (IS_ERR(trans))
@@ -5459,10 +5487,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5459 goto out; 5487 goto out;
5460 } 5488 }
5461 5489
5462 em = alloc_extent_map(GFP_NOFS);
5463 if (!em) { 5490 if (!em) {
5464 em = ERR_PTR(-ENOMEM); 5491 em = alloc_extent_map(GFP_NOFS);
5465 goto out; 5492 if (!em) {
5493 em = ERR_PTR(-ENOMEM);
5494 goto out;
5495 }
5466 } 5496 }
5467 5497
5468 em->start = start; 5498 em->start = start;
@@ -5472,9 +5502,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5472 em->block_start = ins.objectid; 5502 em->block_start = ins.objectid;
5473 em->block_len = ins.offset; 5503 em->block_len = ins.offset;
5474 em->bdev = root->fs_info->fs_devices->latest_bdev; 5504 em->bdev = root->fs_info->fs_devices->latest_bdev;
5505
5506 /*
5507 * We need to do this because if we're using the original em we searched
5508 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
5509 */
5510 em->flags = 0;
5475 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5511 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5476 5512
5477 while (1) { 5513 while (insert) {
5478 write_lock(&em_tree->lock); 5514 write_lock(&em_tree->lock);
5479 ret = add_extent_mapping(em_tree, em); 5515 ret = add_extent_mapping(em_tree, em);
5480 write_unlock(&em_tree->lock); 5516 write_unlock(&em_tree->lock);
@@ -5692,8 +5728,7 @@ must_cow:
5692 * it above 5728 * it above
5693 */ 5729 */
5694 len = bh_result->b_size; 5730 len = bh_result->b_size;
5695 free_extent_map(em); 5731 em = btrfs_new_extent_direct(inode, em, start, len);
5696 em = btrfs_new_extent_direct(inode, start, len);
5697 if (IS_ERR(em)) 5732 if (IS_ERR(em))
5698 return PTR_ERR(em); 5733 return PTR_ERR(em);
5699 len = min(len, em->len - (start - em->start)); 5734 len = min(len, em->len - (start - em->start));
@@ -5856,8 +5891,10 @@ again:
5856 } 5891 }
5857 5892
5858 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5893 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5859 btrfs_ordered_update_i_size(inode, 0, ordered); 5894 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5860 btrfs_update_inode(trans, root, inode); 5895 if (!ret)
5896 btrfs_update_inode(trans, root, inode);
5897 ret = 0;
5861out_unlock: 5898out_unlock:
5862 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5899 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5863 ordered->file_offset + ordered->len - 1, 5900 ordered->file_offset + ordered->len - 1,
@@ -5943,7 +5980,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5943 5980
5944static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 5981static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5945 int rw, u64 file_offset, int skip_sum, 5982 int rw, u64 file_offset, int skip_sum,
5946 u32 *csums) 5983 u32 *csums, int async_submit)
5947{ 5984{
5948 int write = rw & REQ_WRITE; 5985 int write = rw & REQ_WRITE;
5949 struct btrfs_root *root = BTRFS_I(inode)->root; 5986 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5954,13 +5991,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5954 if (ret) 5991 if (ret)
5955 goto err; 5992 goto err;
5956 5993
5957 if (write && !skip_sum) { 5994 if (skip_sum)
5995 goto map;
5996
5997 if (write && async_submit) {
5958 ret = btrfs_wq_submit_bio(root->fs_info, 5998 ret = btrfs_wq_submit_bio(root->fs_info,
5959 inode, rw, bio, 0, 0, 5999 inode, rw, bio, 0, 0,
5960 file_offset, 6000 file_offset,
5961 __btrfs_submit_bio_start_direct_io, 6001 __btrfs_submit_bio_start_direct_io,
5962 __btrfs_submit_bio_done); 6002 __btrfs_submit_bio_done);
5963 goto err; 6003 goto err;
6004 } else if (write) {
6005 /*
6006 * If we aren't doing async submit, calculate the csum of the
6007 * bio now.
6008 */
6009 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
6010 if (ret)
6011 goto err;
5964 } else if (!skip_sum) { 6012 } else if (!skip_sum) {
5965 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6013 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5966 file_offset, csums); 6014 file_offset, csums);
@@ -5968,7 +6016,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5968 goto err; 6016 goto err;
5969 } 6017 }
5970 6018
5971 ret = btrfs_map_bio(root, rw, bio, 0, 1); 6019map:
6020 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
5972err: 6021err:
5973 bio_put(bio); 6022 bio_put(bio);
5974 return ret; 6023 return ret;
@@ -5990,15 +6039,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5990 int nr_pages = 0; 6039 int nr_pages = 0;
5991 u32 *csums = dip->csums; 6040 u32 *csums = dip->csums;
5992 int ret = 0; 6041 int ret = 0;
6042 int async_submit = 0;
5993 int write = rw & REQ_WRITE; 6043 int write = rw & REQ_WRITE;
5994 6044
5995 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5996 if (!bio)
5997 return -ENOMEM;
5998 bio->bi_private = dip;
5999 bio->bi_end_io = btrfs_end_dio_bio;
6000 atomic_inc(&dip->pending_bios);
6001
6002 map_length = orig_bio->bi_size; 6045 map_length = orig_bio->bi_size;
6003 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6046 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
6004 &map_length, NULL, 0); 6047 &map_length, NULL, 0);
@@ -6007,6 +6050,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6007 return -EIO; 6050 return -EIO;
6008 } 6051 }
6009 6052
6053 if (map_length >= orig_bio->bi_size) {
6054 bio = orig_bio;
6055 goto submit;
6056 }
6057
6058 async_submit = 1;
6059 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6060 if (!bio)
6061 return -ENOMEM;
6062 bio->bi_private = dip;
6063 bio->bi_end_io = btrfs_end_dio_bio;
6064 atomic_inc(&dip->pending_bios);
6065
6010 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 6066 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
6011 if (unlikely(map_length < submit_len + bvec->bv_len || 6067 if (unlikely(map_length < submit_len + bvec->bv_len ||
6012 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 6068 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6020,7 +6076,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6020 atomic_inc(&dip->pending_bios); 6076 atomic_inc(&dip->pending_bios);
6021 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6077 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6022 file_offset, skip_sum, 6078 file_offset, skip_sum,
6023 csums); 6079 csums, async_submit);
6024 if (ret) { 6080 if (ret) {
6025 bio_put(bio); 6081 bio_put(bio);
6026 atomic_dec(&dip->pending_bios); 6082 atomic_dec(&dip->pending_bios);
@@ -6057,8 +6113,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6057 } 6113 }
6058 } 6114 }
6059 6115
6116submit:
6060 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6117 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6061 csums); 6118 csums, async_submit);
6062 if (!ret) 6119 if (!ret)
6063 return 0; 6120 return 0;
6064 6121
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5b158da7e0bb..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
32 32
33static noinline void put_transaction(struct btrfs_transaction *transaction) 33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(transaction->use_count == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 37 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 38 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 39 }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
60 if (!cur_trans) 58 if (!cur_trans)
61 return -ENOMEM; 59 return -ENOMEM;
62 root->fs_info->generation++; 60 root->fs_info->generation++;
63 cur_trans->num_writers = 1; 61 atomic_set(&cur_trans->num_writers, 1);
64 cur_trans->num_joined = 0; 62 cur_trans->num_joined = 0;
65 cur_trans->transid = root->fs_info->generation; 63 cur_trans->transid = root->fs_info->generation;
66 init_waitqueue_head(&cur_trans->writer_wait); 64 init_waitqueue_head(&cur_trans->writer_wait);
67 init_waitqueue_head(&cur_trans->commit_wait); 65 init_waitqueue_head(&cur_trans->commit_wait);
68 cur_trans->in_commit = 0; 66 cur_trans->in_commit = 0;
69 cur_trans->blocked = 0; 67 cur_trans->blocked = 0;
70 cur_trans->use_count = 1; 68 atomic_set(&cur_trans->use_count, 1);
71 cur_trans->commit_done = 0; 69 cur_trans->commit_done = 0;
72 cur_trans->start_time = get_seconds(); 70 cur_trans->start_time = get_seconds();
73 71
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
88 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
89 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
90 } else { 88 } else {
91 cur_trans->num_writers++; 89 atomic_inc(&cur_trans->num_writers);
92 cur_trans->num_joined++; 90 cur_trans->num_joined++;
93 } 91 }
94 92
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
145 cur_trans = root->fs_info->running_transaction; 143 cur_trans = root->fs_info->running_transaction;
146 if (cur_trans && cur_trans->blocked) { 144 if (cur_trans && cur_trans->blocked) {
147 DEFINE_WAIT(wait); 145 DEFINE_WAIT(wait);
148 cur_trans->use_count++; 146 atomic_inc(&cur_trans->use_count);
149 while (1) { 147 while (1) {
150 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
151 TASK_UNINTERRUPTIBLE); 149 TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181{ 179{
182 struct btrfs_trans_handle *h; 180 struct btrfs_trans_handle *h;
183 struct btrfs_transaction *cur_trans; 181 struct btrfs_transaction *cur_trans;
182 int retries = 0;
184 int ret; 183 int ret;
185 184
186 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -204,7 +203,7 @@ again:
204 } 203 }
205 204
206 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
207 cur_trans->use_count++; 206 atomic_inc(&cur_trans->use_count);
208 if (type != TRANS_JOIN_NOLOCK) 207 if (type != TRANS_JOIN_NOLOCK)
209 mutex_unlock(&root->fs_info->trans_mutex); 208 mutex_unlock(&root->fs_info->trans_mutex);
210 209
@@ -224,10 +223,18 @@ again:
224 223
225 if (num_items > 0) { 224 if (num_items > 0) {
226 ret = btrfs_trans_reserve_metadata(h, root, num_items); 225 ret = btrfs_trans_reserve_metadata(h, root, num_items);
227 if (ret == -EAGAIN) { 226 if (ret == -EAGAIN && !retries) {
227 retries++;
228 btrfs_commit_transaction(h, root); 228 btrfs_commit_transaction(h, root);
229 goto again; 229 goto again;
230 } else if (ret == -EAGAIN) {
231 /*
232 * We have already retried and got EAGAIN, so really we
233 * don't have space, so set ret to -ENOSPC.
234 */
235 ret = -ENOSPC;
230 } 236 }
237
231 if (ret < 0) { 238 if (ret < 0) {
232 btrfs_end_transaction(h, root); 239 btrfs_end_transaction(h, root);
233 return ERR_PTR(ret); 240 return ERR_PTR(ret);
@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
327 goto out_unlock; /* nothing committing|committed */ 334 goto out_unlock; /* nothing committing|committed */
328 } 335 }
329 336
330 cur_trans->use_count++; 337 atomic_inc(&cur_trans->use_count);
331 mutex_unlock(&root->fs_info->trans_mutex); 338 mutex_unlock(&root->fs_info->trans_mutex);
332 339
333 wait_for_commit(root, cur_trans); 340 wait_for_commit(root, cur_trans);
@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
457 wake_up_process(info->transaction_kthread); 464 wake_up_process(info->transaction_kthread);
458 } 465 }
459 466
460 if (lock)
461 mutex_lock(&info->trans_mutex);
462 WARN_ON(cur_trans != info->running_transaction); 467 WARN_ON(cur_trans != info->running_transaction);
463 WARN_ON(cur_trans->num_writers < 1); 468 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
464 cur_trans->num_writers--; 469 atomic_dec(&cur_trans->num_writers);
465 470
466 smp_mb(); 471 smp_mb();
467 if (waitqueue_active(&cur_trans->writer_wait)) 472 if (waitqueue_active(&cur_trans->writer_wait))
468 wake_up(&cur_trans->writer_wait); 473 wake_up(&cur_trans->writer_wait);
469 put_transaction(cur_trans); 474 put_transaction(cur_trans);
470 if (lock)
471 mutex_unlock(&info->trans_mutex);
472 475
473 if (current->journal_info == trans) 476 if (current->journal_info == trans)
474 current->journal_info = NULL; 477 current->journal_info = NULL;
@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1178 /* take transaction reference */ 1181 /* take transaction reference */
1179 mutex_lock(&root->fs_info->trans_mutex); 1182 mutex_lock(&root->fs_info->trans_mutex);
1180 cur_trans = trans->transaction; 1183 cur_trans = trans->transaction;
1181 cur_trans->use_count++; 1184 atomic_inc(&cur_trans->use_count);
1182 mutex_unlock(&root->fs_info->trans_mutex); 1185 mutex_unlock(&root->fs_info->trans_mutex);
1183 1186
1184 btrfs_end_transaction(trans, root); 1187 btrfs_end_transaction(trans, root);
@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1237 1240
1238 mutex_lock(&root->fs_info->trans_mutex); 1241 mutex_lock(&root->fs_info->trans_mutex);
1239 if (cur_trans->in_commit) { 1242 if (cur_trans->in_commit) {
1240 cur_trans->use_count++; 1243 atomic_inc(&cur_trans->use_count);
1241 mutex_unlock(&root->fs_info->trans_mutex); 1244 mutex_unlock(&root->fs_info->trans_mutex);
1242 btrfs_end_transaction(trans, root); 1245 btrfs_end_transaction(trans, root);
1243 1246
@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1259 prev_trans = list_entry(cur_trans->list.prev, 1262 prev_trans = list_entry(cur_trans->list.prev,
1260 struct btrfs_transaction, list); 1263 struct btrfs_transaction, list);
1261 if (!prev_trans->commit_done) { 1264 if (!prev_trans->commit_done) {
1262 prev_trans->use_count++; 1265 atomic_inc(&prev_trans->use_count);
1263 mutex_unlock(&root->fs_info->trans_mutex); 1266 mutex_unlock(&root->fs_info->trans_mutex);
1264 1267
1265 wait_for_commit(root, prev_trans); 1268 wait_for_commit(root, prev_trans);
@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1300 TASK_UNINTERRUPTIBLE); 1303 TASK_UNINTERRUPTIBLE);
1301 1304
1302 smp_mb(); 1305 smp_mb();
1303 if (cur_trans->num_writers > 1) 1306 if (atomic_read(&cur_trans->num_writers) > 1)
1304 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1307 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1305 else if (should_grow) 1308 else if (should_grow)
1306 schedule_timeout(1); 1309 schedule_timeout(1);
1307 1310
1308 mutex_lock(&root->fs_info->trans_mutex); 1311 mutex_lock(&root->fs_info->trans_mutex);
1309 finish_wait(&cur_trans->writer_wait, &wait); 1312 finish_wait(&cur_trans->writer_wait, &wait);
1310 } while (cur_trans->num_writers > 1 || 1313 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1311 (should_grow && cur_trans->num_joined != joined)); 1314 (should_grow && cur_trans->num_joined != joined));
1312 1315
1313 ret = create_pending_snapshots(trans, root->fs_info); 1316 ret = create_pending_snapshots(trans, root->fs_info);
@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1394 1397
1395 wake_up(&cur_trans->commit_wait); 1398 wake_up(&cur_trans->commit_wait);
1396 1399
1400 list_del_init(&cur_trans->list);
1397 put_transaction(cur_trans); 1401 put_transaction(cur_trans);
1398 put_transaction(cur_trans); 1402 put_transaction(cur_trans);
1399 1403
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
27 * total writers in this transaction, it must be zero before the 27 * total writers in this transaction, it must be zero before the
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 unsigned long num_writers; 30 atomic_t num_writers;
31 31
32 unsigned long num_joined; 32 unsigned long num_joined;
33 int in_commit; 33 int in_commit;
34 int use_count; 34 atomic_t use_count;
35 int commit_done; 35 int commit_done;
36 int blocked; 36 int blocked;
37 struct list_head list; 37 struct list_head list;